[3597] | 1 | /* $Id: mmath.h,v 1.2 2000-05-23 20:34:53 jeroen Exp $ */
|
---|
[2938] | 2 |
|
---|
| 3 | /*
|
---|
| 4 | * Mesa 3-D graphics library
|
---|
[3597] | 5 | * Version: 3.3
|
---|
[2938] | 6 | *
|
---|
| 7 | * Copyright (C) 1999 Brian Paul All Rights Reserved.
|
---|
| 8 | *
|
---|
| 9 | * Permission is hereby granted, free of charge, to any person obtaining a
|
---|
| 10 | * copy of this software and associated documentation files (the "Software"),
|
---|
| 11 | * to deal in the Software without restriction, including without limitation
|
---|
| 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
---|
| 13 | * and/or sell copies of the Software, and to permit persons to whom the
|
---|
| 14 | * Software is furnished to do so, subject to the following conditions:
|
---|
| 15 | *
|
---|
| 16 | * The above copyright notice and this permission notice shall be included
|
---|
| 17 | * in all copies or substantial portions of the Software.
|
---|
| 18 | *
|
---|
| 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
---|
| 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
---|
| 21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
---|
| 22 | * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
---|
| 23 | * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
---|
| 24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
---|
| 25 | */
|
---|
| 26 |
|
---|
| 27 |
|
---|
| 28 | /*
|
---|
| 29 | * Faster arithmetic functions. If the FAST_MATH preprocessor symbol is
|
---|
| 30 | * defined on the command line (-DFAST_MATH) then we'll use some (hopefully)
|
---|
| 31 | * faster functions for sqrt(), etc.
|
---|
| 32 | */
|
---|
| 33 |
|
---|
| 34 |
|
---|
| 35 | #ifndef MMATH_H
|
---|
| 36 | #define MMATH_H
|
---|
| 37 |
|
---|
| 38 |
|
---|
[3597] | 39 | #include "glheader.h"
|
---|
[2938] | 40 |
|
---|
[3597] | 41 |
|
---|
[2938] | 42 | /*
|
---|
| 43 | * Set the x86 FPU control word to guarentee only 32 bits of presision
|
---|
| 44 | * are stored in registers. Allowing the FPU to store more introduces
|
---|
| 45 | * differences between situations where numbers are pulled out of memory
|
---|
| 46 | * vs. situations where the compiler is able to optimize register usage.
|
---|
| 47 | *
|
---|
| 48 | * In the worst case, we force the compiler to use a memory access to
|
---|
| 49 | * truncate the float, by specifying the 'volatile' keyword.
|
---|
| 50 | */
|
---|
| 51 | #if defined(__linux__) && defined(__i386__)
|
---|
| 52 | #include <fpu_control.h>
|
---|
| 53 |
|
---|
| 54 | #if !defined(_FPU_SETCW)
|
---|
| 55 | #define _FPU_SETCW __setfpucw
|
---|
| 56 | typedef unsigned short fpu_control_t;
|
---|
| 57 | #endif
|
---|
| 58 |
|
---|
| 59 | #if !defined(_FPU_GETCW)
|
---|
| 60 | #define _FPU_GETCW(a) (a) = __fpu_control;
|
---|
| 61 | #endif
|
---|
| 62 |
|
---|
| 63 | /* Set it up how we want it.
|
---|
| 64 | */
|
---|
| 65 | #if !defined(NO_FAST_MATH)
|
---|
[3597] | 66 | #define START_FAST_MATH(x) \
|
---|
[2938] | 67 | { \
|
---|
| 68 | static fpu_control_t mask = _FPU_SINGLE | _FPU_MASK_IM \
|
---|
| 69 | | _FPU_MASK_DM | _FPU_MASK_ZM | _FPU_MASK_OM \
|
---|
| 70 | | _FPU_MASK_UM | _FPU_MASK_PM; \
|
---|
| 71 | _FPU_GETCW( x ); \
|
---|
| 72 | _FPU_SETCW( mask ); \
|
---|
| 73 | }
|
---|
| 74 | #else
|
---|
| 75 | #define START_FAST_MATH(x) \
|
---|
| 76 | { \
|
---|
| 77 | static fpu_control_t mask = _FPU_DEFAULT; \
|
---|
| 78 | _FPU_GETCW( x ); \
|
---|
| 79 | _FPU_SETCW( mask ); \
|
---|
| 80 | }
|
---|
| 81 | #endif
|
---|
| 82 |
|
---|
| 83 | /* Put it back how the application had it.
|
---|
| 84 | */
|
---|
| 85 | #define END_FAST_MATH(x) \
|
---|
| 86 | { \
|
---|
| 87 | _FPU_SETCW( x ); \
|
---|
| 88 | }
|
---|
| 89 |
|
---|
| 90 | #define HAVE_FAST_MATH
|
---|
| 91 |
|
---|
[3597] | 92 | #elif defined(__WATCOMC__) && !defined(NO_FAST_MATH)
|
---|
| 93 |
|
---|
| 94 | /* This is the watcom specific inline assembly version of setcw and getcw */
|
---|
| 95 |
|
---|
| 96 | void START_FAST_MATH2(unsigned short *x);
|
---|
| 97 | #pragma aux START_FAST_MATH2 = \
|
---|
| 98 | "fstcw word ptr [esi]" \
|
---|
| 99 | "or word ptr [esi], 0x3f" \
|
---|
| 100 | "fldcw word ptr [esi]" \
|
---|
| 101 | parm [esi] \
|
---|
| 102 | modify exact [];
|
---|
| 103 |
|
---|
| 104 | void END_FAST_MATH2(unsigned short *x);
|
---|
| 105 | #pragma aux END_FAST_MATH2 = \
|
---|
| 106 | "fldcw word ptr [esi]" \
|
---|
| 107 | parm [esi] \
|
---|
| 108 | modify exact [];
|
---|
| 109 |
|
---|
| 110 | #define START_FAST_MATH(x) START_FAST_MATH2(& x)
|
---|
| 111 | #define END_FAST_MATH(x) END_FAST_MATH2(& x)
|
---|
| 112 |
|
---|
| 113 | /*
|
---|
| 114 | __inline START_FAST_MATH(unsigned short x)
|
---|
| 115 | {
|
---|
| 116 | _asm {
|
---|
| 117 | fstcw ax
|
---|
| 118 | mov x , ax
|
---|
| 119 | or ax, 0x3f
|
---|
| 120 | fldcw ax
|
---|
| 121 | }
|
---|
| 122 | }
|
---|
| 123 |
|
---|
| 124 | __inline END_FAST_MATH(unsigned short x)
|
---|
| 125 | {
|
---|
| 126 | _asm {
|
---|
| 127 | fldcw x
|
---|
| 128 | }
|
---|
| 129 | }
|
---|
| 130 | */
|
---|
| 131 | #define HAVE_FAST_MATH
|
---|
| 132 |
|
---|
[2938] | 133 | #else
|
---|
| 134 | #define START_FAST_MATH(x) (void)(x)
|
---|
| 135 | #define END_FAST_MATH(x) (void)(x)
|
---|
| 136 |
|
---|
| 137 | /* The mac float really is a float, with the same precision as a
|
---|
| 138 | * single precision 387 float.
|
---|
| 139 | */
|
---|
| 140 | #if defined(macintosh)
|
---|
| 141 | #define HAVE_FAST_MATH
|
---|
| 142 | #endif
|
---|
| 143 |
|
---|
| 144 | #endif
|
---|
| 145 |
|
---|
| 146 |
|
---|
| 147 |
|
---|
| 148 | /*
|
---|
| 149 | * Float -> Int conversion
|
---|
| 150 | */
|
---|
| 151 |
|
---|
| 152 | #if defined(USE_X86_ASM)
|
---|
| 153 | #if defined(__GNUC__) && defined(__i386__)
|
---|
| 154 | static __inline__ int FloatToInt(float f)
|
---|
| 155 | {
|
---|
| 156 | int r;
|
---|
| 157 | __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st");
|
---|
| 158 | return r;
|
---|
| 159 | }
|
---|
| 160 | #elif defined(__MSC__) && defined(__WIN32__)
|
---|
| 161 | static __inline int FloatToInt(float f)
|
---|
| 162 | {
|
---|
| 163 | int r;
|
---|
| 164 | _asm {
|
---|
[3597] | 165 | fld f
|
---|
| 166 | fistp r
|
---|
| 167 | }
|
---|
[2938] | 168 | return r;
|
---|
| 169 | }
|
---|
[3597] | 170 | #elif defined(__WATCOMC__)
|
---|
| 171 | long FloatToInt(float f);
|
---|
| 172 | #pragma aux FloatToInt = \
|
---|
| 173 | "push eax" \
|
---|
| 174 | "fistp dword ptr [esp]" \
|
---|
| 175 | "pop eax" \
|
---|
| 176 | parm [8087] \
|
---|
| 177 | value [eax] \
|
---|
| 178 | modify exact [eax];
|
---|
| 179 | float asm_sqrt (float x);
|
---|
| 180 | #pragma aux asm_sqrt = \
|
---|
| 181 | "fsqrt" \
|
---|
| 182 | parm [8087] \
|
---|
| 183 | value [8087] \
|
---|
| 184 | modify exact [];
|
---|
| 185 | #else
|
---|
[2938] | 186 | #define FloatToInt(F) ((int) (F))
|
---|
| 187 | #endif
|
---|
| 188 | #else
|
---|
| 189 | #define FloatToInt(F) ((int) (F))
|
---|
| 190 | #endif
|
---|
| 191 |
|
---|
| 192 |
|
---|
| 193 | /*
|
---|
| 194 | * Square root
|
---|
| 195 | */
|
---|
| 196 |
|
---|
| 197 | extern float gl_sqrt(float x);
|
---|
| 198 |
|
---|
| 199 | #ifdef FAST_MATH
|
---|
[3597] | 200 | #if defined (__WATCOMC__) && defined(USE_X86_ASM)
|
---|
| 201 | # define GL_SQRT(X) asm_sqrt(X)
|
---|
| 202 | #else
|
---|
[2938] | 203 | # define GL_SQRT(X) gl_sqrt(X)
|
---|
[3597] | 204 | #endif
|
---|
[2938] | 205 | #else
|
---|
| 206 | # define GL_SQRT(X) sqrt(X)
|
---|
| 207 | #endif
|
---|
| 208 |
|
---|
| 209 |
|
---|
| 210 | /*
|
---|
| 211 | * Normalize a 3-element vector to unit length.
|
---|
| 212 | */
|
---|
[3597] | 213 | #define NORMALIZE_3FV( V ) \
|
---|
| 214 | do { \
|
---|
| 215 | GLdouble len = LEN_SQUARED_3FV(V); \
|
---|
| 216 | if (len > 1e-50) { \
|
---|
| 217 | len = 1.0 / GL_SQRT(len); \
|
---|
| 218 | V[0] = (GLfloat) (V[0] * len); \
|
---|
| 219 | V[1] = (GLfloat) (V[1] * len); \
|
---|
| 220 | V[2] = (GLfloat) (V[2] * len); \
|
---|
| 221 | } \
|
---|
[2938] | 222 | } while(0)
|
---|
| 223 |
|
---|
| 224 | #define LEN_3FV( V ) (GL_SQRT(V[0]*V[0]+V[1]*V[1]+V[2]*V[2]))
|
---|
| 225 |
|
---|
| 226 | #define LEN_SQUARED_3FV( V ) (V[0]*V[0]+V[1]*V[1]+V[2]*V[2])
|
---|
| 227 |
|
---|
| 228 | /*
|
---|
| 229 | * Optimization for:
|
---|
| 230 | * GLfloat f;
|
---|
| 231 | * GLubyte b = FloatToInt(CLAMP(f, 0, 1) * 255)
|
---|
| 232 | */
|
---|
| 233 |
|
---|
| 234 | #if defined(__i386__) || defined(__sparc__)
|
---|
| 235 | #define USE_IEEE
|
---|
| 236 | #endif
|
---|
| 237 |
|
---|
| 238 | #if defined(USE_IEEE) && !defined(DEBUG)
|
---|
| 239 |
|
---|
| 240 | #define IEEE_ONE 0x3f7f0000
|
---|
| 241 |
|
---|
[3597] | 242 | #define CLAMP_FLOAT_COLOR(f) \
|
---|
| 243 | do { \
|
---|
| 244 | if (*(GLuint *)&f >= IEEE_ONE) \
|
---|
| 245 | f = (*(GLint *)&f < 0) ? 0 : 1; \
|
---|
| 246 | } while(0)
|
---|
[2938] | 247 |
|
---|
| 248 | #define CLAMP_FLOAT_COLOR_VALUE(f) \
|
---|
| 249 | ( (*(GLuint *)&f >= IEEE_ONE) \
|
---|
| 250 | ? ((*(GLint *)&f < 0) ? 0 : 1) \
|
---|
| 251 | : f )
|
---|
| 252 |
|
---|
| 253 | /*
|
---|
| 254 | * This function/macro is sensitive to precision. Test carefully
|
---|
| 255 | * if you change it.
|
---|
| 256 | */
|
---|
| 257 | #define FLOAT_COLOR_TO_UBYTE_COLOR(b, f) \
|
---|
| 258 | do { \
|
---|
| 259 | union { GLfloat r; GLuint i; } tmp; \
|
---|
| 260 | tmp.r = f; \
|
---|
| 261 | b = ((tmp.i >= IEEE_ONE) \
|
---|
| 262 | ? ((GLint)tmp.i < 0) ? (GLubyte)0 : (GLubyte)255 \
|
---|
| 263 | : (tmp.r = tmp.r*(255.0F/256.0F) + 32768.0F, \
|
---|
| 264 | (GLubyte)tmp.i)); \
|
---|
| 265 | } while (0)
|
---|
| 266 |
|
---|
| 267 |
|
---|
| 268 | #define CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(b,f) \
|
---|
| 269 | FLOAT_COLOR_TO_UBYTE_COLOR(b, f)
|
---|
| 270 |
|
---|
| 271 | #else
|
---|
| 272 |
|
---|
| 273 | #define CLAMP_FLOAT_COLOR(f) \
|
---|
| 274 | (void) CLAMP_SELF(f,0,1)
|
---|
| 275 |
|
---|
| 276 | #define CLAMP_FLOAT_COLOR_VALUE(f) \
|
---|
| 277 | CLAMP(f,0,1)
|
---|
| 278 |
|
---|
| 279 | #define FLOAT_COLOR_TO_UBYTE_COLOR(b, f) \
|
---|
| 280 | b = ((GLubyte) FloatToInt(CLAMP(f, 0.0F, 1.0F) * 255.0F))
|
---|
| 281 |
|
---|
| 282 | #define CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(b,f) \
|
---|
| 283 | b = ((GLubyte) FloatToInt(f * 255.0F))
|
---|
| 284 |
|
---|
| 285 | #endif
|
---|
| 286 |
|
---|
| 287 |
|
---|
| 288 | extern float gl_ubyte_to_float_color_tab[256];
|
---|
| 289 | extern float gl_ubyte_to_float_255_color_tab[256];
|
---|
| 290 | #define UBYTE_COLOR_TO_FLOAT_COLOR(c) gl_ubyte_to_float_color_tab[c]
|
---|
| 291 |
|
---|
| 292 | #define UBYTE_COLOR_TO_FLOAT_255_COLOR(c) gl_ubyte_to_float_255_color_tab[c]
|
---|
| 293 |
|
---|
| 294 | #define UBYTE_COLOR_TO_FLOAT_255_COLOR2(f,c) \
|
---|
| 295 | (*(int *)&(f)) = ((int *)gl_ubyte_to_float_255_color_tab)[c]
|
---|
| 296 |
|
---|
| 297 |
|
---|
| 298 | #define UBYTE_RGBA_TO_FLOAT_RGBA(f,b) \
|
---|
| 299 | do { \
|
---|
| 300 | f[0] = UBYTE_COLOR_TO_FLOAT_COLOR(b[0]); \
|
---|
| 301 | f[1] = UBYTE_COLOR_TO_FLOAT_COLOR(b[1]); \
|
---|
| 302 | f[2] = UBYTE_COLOR_TO_FLOAT_COLOR(b[2]); \
|
---|
| 303 | f[3] = UBYTE_COLOR_TO_FLOAT_COLOR(b[3]); \
|
---|
| 304 | } while(0)
|
---|
| 305 |
|
---|
| 306 |
|
---|
| 307 | #define UBYTE_RGBA_TO_FLOAT_255_RGBA(f,b) \
|
---|
| 308 | do { \
|
---|
| 309 | f[0] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[0]); \
|
---|
| 310 | f[1] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[1]); \
|
---|
| 311 | f[2] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[2]); \
|
---|
| 312 | f[3] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[3]); \
|
---|
| 313 | } while(0)
|
---|
| 314 |
|
---|
| 315 | #define FLOAT_RGBA_TO_UBYTE_RGBA(b,f) \
|
---|
| 316 | do { \
|
---|
| 317 | FLOAT_COLOR_TO_UBYTE_COLOR((b[0]),(f[0])); \
|
---|
| 318 | FLOAT_COLOR_TO_UBYTE_COLOR((b[1]),(f[1])); \
|
---|
| 319 | FLOAT_COLOR_TO_UBYTE_COLOR((b[2]),(f[2])); \
|
---|
| 320 | FLOAT_COLOR_TO_UBYTE_COLOR((b[3]),(f[3])); \
|
---|
| 321 | } while(0)
|
---|
| 322 |
|
---|
| 323 | #define FLOAT_RGB_TO_UBYTE_RGB(b,f) \
|
---|
| 324 | do { \
|
---|
| 325 | FLOAT_COLOR_TO_UBYTE_COLOR(b[0],f[0]); \
|
---|
| 326 | FLOAT_COLOR_TO_UBYTE_COLOR(b[1],f[1]); \
|
---|
| 327 | FLOAT_COLOR_TO_UBYTE_COLOR(b[2],f[2]); \
|
---|
| 328 | } while(0)
|
---|
| 329 |
|
---|
| 330 |
|
---|
[3597] | 331 | extern void _mesa_init_math(void);
|
---|
[2938] | 332 |
|
---|
| 333 |
|
---|
| 334 | #endif
|
---|