| 1 | /* $Id: mmath.h,v 1.2 2000-05-23 20:34:53 jeroen Exp $ */ | 
|---|
| 2 |  | 
|---|
| 3 | /* | 
|---|
| 4 | * Mesa 3-D graphics library | 
|---|
| 5 | * Version:  3.3 | 
|---|
| 6 | * | 
|---|
| 7 | * Copyright (C) 1999  Brian Paul   All Rights Reserved. | 
|---|
| 8 | * | 
|---|
| 9 | * Permission is hereby granted, free of charge, to any person obtaining a | 
|---|
| 10 | * copy of this software and associated documentation files (the "Software"), | 
|---|
| 11 | * to deal in the Software without restriction, including without limitation | 
|---|
| 12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, | 
|---|
| 13 | * and/or sell copies of the Software, and to permit persons to whom the | 
|---|
| 14 | * Software is furnished to do so, subject to the following conditions: | 
|---|
| 15 | * | 
|---|
| 16 | * The above copyright notice and this permission notice shall be included | 
|---|
| 17 | * in all copies or substantial portions of the Software. | 
|---|
| 18 | * | 
|---|
| 19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS | 
|---|
| 20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, | 
|---|
| 21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL | 
|---|
| 22 | * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN | 
|---|
| 23 | * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN | 
|---|
| 24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. | 
|---|
| 25 | */ | 
|---|
| 26 |  | 
|---|
| 27 |  | 
|---|
| 28 | /* | 
|---|
| 29 | * Faster arithmetic functions.  If the FAST_MATH preprocessor symbol is | 
|---|
| 30 | * defined on the command line (-DFAST_MATH) then we'll use some (hopefully) | 
|---|
| 31 | * faster functions for sqrt(), etc. | 
|---|
| 32 | */ | 
|---|
| 33 |  | 
|---|
| 34 |  | 
|---|
| 35 | #ifndef MMATH_H | 
|---|
| 36 | #define MMATH_H | 
|---|
| 37 |  | 
|---|
| 38 |  | 
|---|
| 39 | #include "glheader.h" | 
|---|
| 40 |  | 
|---|
| 41 |  | 
|---|
| 42 | /* | 
|---|
| 43 | * Set the x86 FPU control word to guarentee only 32 bits of presision | 
|---|
| 44 | * are stored in registers.  Allowing the FPU to store more introduces | 
|---|
| 45 | * differences between situations where numbers are pulled out of memory | 
|---|
| 46 | * vs. situations where the compiler is able to optimize register usage. | 
|---|
| 47 | * | 
|---|
| 48 | * In the worst case, we force the compiler to use a memory access to | 
|---|
| 49 | * truncate the float, by specifying the 'volatile' keyword. | 
|---|
| 50 | */ | 
|---|
| 51 | #if defined(__linux__) && defined(__i386__) | 
|---|
| 52 | #include <fpu_control.h> | 
|---|
| 53 |  | 
|---|
| 54 | #if !defined(_FPU_SETCW) | 
|---|
| 55 | #define _FPU_SETCW __setfpucw | 
|---|
| 56 | typedef unsigned short fpu_control_t; | 
|---|
| 57 | #endif | 
|---|
| 58 |  | 
|---|
| 59 | #if !defined(_FPU_GETCW) | 
|---|
| 60 | #define _FPU_GETCW(a) (a) = __fpu_control; | 
|---|
| 61 | #endif | 
|---|
| 62 |  | 
|---|
| 63 | /* Set it up how we want it. | 
|---|
| 64 | */ | 
|---|
| 65 | #if !defined(NO_FAST_MATH) | 
|---|
| 66 | #define START_FAST_MATH(x)                  \ | 
|---|
| 67 | {                                                            \ | 
|---|
| 68 | static fpu_control_t mask = _FPU_SINGLE | _FPU_MASK_IM    \ | 
|---|
| 69 | | _FPU_MASK_DM | _FPU_MASK_ZM | _FPU_MASK_OM        \ | 
|---|
| 70 | | _FPU_MASK_UM | _FPU_MASK_PM;                      \ | 
|---|
| 71 | _FPU_GETCW( x );                                          \ | 
|---|
| 72 | _FPU_SETCW( mask );                                       \ | 
|---|
| 73 | } | 
|---|
| 74 | #else | 
|---|
| 75 | #define START_FAST_MATH(x)                      \ | 
|---|
| 76 | {                                            \ | 
|---|
| 77 | static fpu_control_t mask = _FPU_DEFAULT; \ | 
|---|
| 78 | _FPU_GETCW( x );                          \ | 
|---|
| 79 | _FPU_SETCW( mask );                       \ | 
|---|
| 80 | } | 
|---|
| 81 | #endif | 
|---|
| 82 |  | 
|---|
| 83 | /* Put it back how the application had it. | 
|---|
| 84 | */ | 
|---|
| 85 | #define END_FAST_MATH(x)                        \ | 
|---|
| 86 | {                                            \ | 
|---|
| 87 | _FPU_SETCW( x );                          \ | 
|---|
| 88 | } | 
|---|
| 89 |  | 
|---|
| 90 | #define HAVE_FAST_MATH | 
|---|
| 91 |  | 
|---|
| 92 | #elif defined(__WATCOMC__) && !defined(NO_FAST_MATH) | 
|---|
| 93 |  | 
|---|
| 94 | /* This is the watcom specific inline assembly version of setcw and getcw */ | 
|---|
| 95 |  | 
|---|
| 96 | void START_FAST_MATH2(unsigned short *x); | 
|---|
| 97 | #pragma aux START_FAST_MATH2 =          \ | 
|---|
| 98 | "fstcw   word ptr [esi]"            \ | 
|---|
| 99 | "or      word ptr [esi], 0x3f"      \ | 
|---|
| 100 | "fldcw   word ptr [esi]"            \ | 
|---|
| 101 | parm [esi]                          \ | 
|---|
| 102 | modify exact []; | 
|---|
| 103 |  | 
|---|
| 104 | void END_FAST_MATH2(unsigned short *x); | 
|---|
| 105 | #pragma aux END_FAST_MATH2 =            \ | 
|---|
| 106 | "fldcw   word ptr [esi]"            \ | 
|---|
| 107 | parm [esi]                          \ | 
|---|
| 108 | modify exact []; | 
|---|
| 109 |  | 
|---|
| 110 | #define START_FAST_MATH(x)  START_FAST_MATH2(& x) | 
|---|
| 111 | #define END_FAST_MATH(x)  END_FAST_MATH2(& x) | 
|---|
| 112 |  | 
|---|
| 113 | /* | 
|---|
| 114 | __inline START_FAST_MATH(unsigned short x) | 
|---|
| 115 | { | 
|---|
| 116 | _asm { | 
|---|
| 117 | fstcw   ax | 
|---|
| 118 | mov     x , ax | 
|---|
| 119 | or      ax, 0x3f | 
|---|
| 120 | fldcw   ax | 
|---|
| 121 | } | 
|---|
| 122 | } | 
|---|
| 123 |  | 
|---|
| 124 | __inline END_FAST_MATH(unsigned short x) | 
|---|
| 125 | { | 
|---|
| 126 | _asm { | 
|---|
| 127 | fldcw   x | 
|---|
| 128 | } | 
|---|
| 129 | } | 
|---|
| 130 | */ | 
|---|
| 131 | #define HAVE_FAST_MATH | 
|---|
| 132 |  | 
|---|
| 133 | #else | 
|---|
| 134 | #define START_FAST_MATH(x) (void)(x) | 
|---|
| 135 | #define END_FAST_MATH(x)   (void)(x) | 
|---|
| 136 |  | 
|---|
| 137 | /* The mac float really is a float, with the same precision as a | 
|---|
| 138 | * single precision 387 float. | 
|---|
| 139 | */ | 
|---|
| 140 | #if defined(macintosh) | 
|---|
| 141 | #define HAVE_FAST_MATH | 
|---|
| 142 | #endif | 
|---|
| 143 |  | 
|---|
| 144 | #endif | 
|---|
| 145 |  | 
|---|
| 146 |  | 
|---|
| 147 |  | 
|---|
| 148 | /* | 
|---|
| 149 | * Float -> Int conversion | 
|---|
| 150 | */ | 
|---|
| 151 |  | 
|---|
| 152 | #if defined(USE_X86_ASM) | 
|---|
| 153 | #if defined(__GNUC__) && defined(__i386__) | 
|---|
| 154 | static __inline__ int FloatToInt(float f) | 
|---|
| 155 | { | 
|---|
| 156 | int r; | 
|---|
| 157 | __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st"); | 
|---|
| 158 | return r; | 
|---|
| 159 | } | 
|---|
| 160 | #elif  defined(__MSC__) && defined(__WIN32__) | 
|---|
| 161 | static __inline int FloatToInt(float f) | 
|---|
| 162 | { | 
|---|
| 163 | int r; | 
|---|
| 164 | _asm { | 
|---|
| 165 | fld f | 
|---|
| 166 | fistp r | 
|---|
| 167 | } | 
|---|
| 168 | return r; | 
|---|
| 169 | } | 
|---|
| 170 | #elif defined(__WATCOMC__) | 
|---|
| 171 | long FloatToInt(float f); | 
|---|
| 172 | #pragma aux FloatToInt =                \ | 
|---|
| 173 | "push   eax"                        \ | 
|---|
| 174 | "fistp  dword ptr [esp]"            \ | 
|---|
| 175 | "pop    eax"                        \ | 
|---|
| 176 | parm [8087]                         \ | 
|---|
| 177 | value [eax]                         \ | 
|---|
| 178 | modify exact [eax]; | 
|---|
| 179 | float asm_sqrt (float x); | 
|---|
| 180 | #pragma aux asm_sqrt =                  \ | 
|---|
| 181 | "fsqrt"                             \ | 
|---|
| 182 | parm [8087]                         \ | 
|---|
| 183 | value [8087]                        \ | 
|---|
| 184 | modify exact []; | 
|---|
| 185 | #else | 
|---|
| 186 | #define FloatToInt(F) ((int) (F)) | 
|---|
| 187 | #endif | 
|---|
| 188 | #else | 
|---|
| 189 | #define FloatToInt(F) ((int) (F)) | 
|---|
| 190 | #endif | 
|---|
| 191 |  | 
|---|
| 192 |  | 
|---|
| 193 | /* | 
|---|
| 194 | * Square root | 
|---|
| 195 | */ | 
|---|
| 196 |  | 
|---|
| 197 | extern float gl_sqrt(float x); | 
|---|
| 198 |  | 
|---|
| 199 | #ifdef FAST_MATH | 
|---|
| 200 | #if defined (__WATCOMC__) && defined(USE_X86_ASM) | 
|---|
| 201 | #  define GL_SQRT(X)  asm_sqrt(X) | 
|---|
| 202 | #else | 
|---|
| 203 | #  define GL_SQRT(X)  gl_sqrt(X) | 
|---|
| 204 | #endif | 
|---|
| 205 | #else | 
|---|
| 206 | #  define GL_SQRT(X)  sqrt(X) | 
|---|
| 207 | #endif | 
|---|
| 208 |  | 
|---|
| 209 |  | 
|---|
| 210 | /* | 
|---|
| 211 | * Normalize a 3-element vector to unit length. | 
|---|
| 212 | */ | 
|---|
| 213 | #define NORMALIZE_3FV( V )                      \ | 
|---|
| 214 | do {                                            \ | 
|---|
| 215 | GLdouble len = LEN_SQUARED_3FV(V);           \ | 
|---|
| 216 | if (len > 1e-50) {                           \ | 
|---|
| 217 | len = 1.0 / GL_SQRT(len);                 \ | 
|---|
| 218 | V[0] = (GLfloat) (V[0] * len);            \ | 
|---|
| 219 | V[1] = (GLfloat) (V[1] * len);            \ | 
|---|
| 220 | V[2] = (GLfloat) (V[2] * len);            \ | 
|---|
| 221 | }                                            \ | 
|---|
| 222 | } while(0) | 
|---|
| 223 |  | 
|---|
| 224 | #define LEN_3FV( V ) (GL_SQRT(V[0]*V[0]+V[1]*V[1]+V[2]*V[2])) | 
|---|
| 225 |  | 
|---|
| 226 | #define LEN_SQUARED_3FV( V ) (V[0]*V[0]+V[1]*V[1]+V[2]*V[2]) | 
|---|
| 227 |  | 
|---|
| 228 | /* | 
|---|
| 229 | * Optimization for: | 
|---|
| 230 | * GLfloat f; | 
|---|
| 231 | * GLubyte b = FloatToInt(CLAMP(f, 0, 1) * 255) | 
|---|
| 232 | */ | 
|---|
| 233 |  | 
|---|
| 234 | #if defined(__i386__) || defined(__sparc__) | 
|---|
| 235 | #define USE_IEEE | 
|---|
| 236 | #endif | 
|---|
| 237 |  | 
|---|
| 238 | #if defined(USE_IEEE) && !defined(DEBUG) | 
|---|
| 239 |  | 
|---|
| 240 | #define IEEE_ONE 0x3f7f0000 | 
|---|
| 241 |  | 
|---|
| 242 | #define CLAMP_FLOAT_COLOR(f)                    \ | 
|---|
| 243 | do {                                    \ | 
|---|
| 244 | if (*(GLuint *)&f >= IEEE_ONE)       \ | 
|---|
| 245 | f = (*(GLint *)&f < 0) ? 0 : 1;   \ | 
|---|
| 246 | } while(0) | 
|---|
| 247 |  | 
|---|
| 248 | #define CLAMP_FLOAT_COLOR_VALUE(f)              \ | 
|---|
| 249 | ( (*(GLuint *)&f >= IEEE_ONE)               \ | 
|---|
| 250 | ? ((*(GLint *)&f < 0) ? 0 : 1)            \ | 
|---|
| 251 | : f ) | 
|---|
| 252 |  | 
|---|
| 253 | /* | 
|---|
| 254 | * This function/macro is sensitive to precision.  Test carefully | 
|---|
| 255 | * if you change it. | 
|---|
| 256 | */ | 
|---|
| 257 | #define FLOAT_COLOR_TO_UBYTE_COLOR(b, f)                        \ | 
|---|
| 258 | do {                                                    \ | 
|---|
| 259 | union { GLfloat r; GLuint i; } tmp;                  \ | 
|---|
| 260 | tmp.r = f;                                           \ | 
|---|
| 261 | b = ((tmp.i >= IEEE_ONE)                             \ | 
|---|
| 262 | ? ((GLint)tmp.i < 0) ? (GLubyte)0 : (GLubyte)255 \ | 
|---|
| 263 | : (tmp.r = tmp.r*(255.0F/256.0F) + 32768.0F,     \ | 
|---|
| 264 | (GLubyte)tmp.i));                             \ | 
|---|
| 265 | } while (0) | 
|---|
| 266 |  | 
|---|
| 267 |  | 
|---|
| 268 | #define CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(b,f) \ | 
|---|
| 269 | FLOAT_COLOR_TO_UBYTE_COLOR(b, f) | 
|---|
| 270 |  | 
|---|
| 271 | #else | 
|---|
| 272 |  | 
|---|
| 273 | #define CLAMP_FLOAT_COLOR(f) \ | 
|---|
| 274 | (void) CLAMP_SELF(f,0,1) | 
|---|
| 275 |  | 
|---|
| 276 | #define CLAMP_FLOAT_COLOR_VALUE(f) \ | 
|---|
| 277 | CLAMP(f,0,1) | 
|---|
| 278 |  | 
|---|
| 279 | #define FLOAT_COLOR_TO_UBYTE_COLOR(b, f)                        \ | 
|---|
| 280 | b = ((GLubyte) FloatToInt(CLAMP(f, 0.0F, 1.0F) * 255.0F)) | 
|---|
| 281 |  | 
|---|
| 282 | #define CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(b,f) \ | 
|---|
| 283 | b = ((GLubyte) FloatToInt(f * 255.0F)) | 
|---|
| 284 |  | 
|---|
| 285 | #endif | 
|---|
| 286 |  | 
|---|
| 287 |  | 
|---|
| 288 | extern float gl_ubyte_to_float_color_tab[256]; | 
|---|
| 289 | extern float gl_ubyte_to_float_255_color_tab[256]; | 
|---|
| 290 | #define UBYTE_COLOR_TO_FLOAT_COLOR(c) gl_ubyte_to_float_color_tab[c] | 
|---|
| 291 |  | 
|---|
| 292 | #define UBYTE_COLOR_TO_FLOAT_255_COLOR(c) gl_ubyte_to_float_255_color_tab[c] | 
|---|
| 293 |  | 
|---|
| 294 | #define UBYTE_COLOR_TO_FLOAT_255_COLOR2(f,c) \ | 
|---|
| 295 | (*(int *)&(f)) = ((int *)gl_ubyte_to_float_255_color_tab)[c] | 
|---|
| 296 |  | 
|---|
| 297 |  | 
|---|
| 298 | #define UBYTE_RGBA_TO_FLOAT_RGBA(f,b)           \ | 
|---|
| 299 | do {                                            \ | 
|---|
| 300 | f[0] = UBYTE_COLOR_TO_FLOAT_COLOR(b[0]);     \ | 
|---|
| 301 | f[1] = UBYTE_COLOR_TO_FLOAT_COLOR(b[1]);     \ | 
|---|
| 302 | f[2] = UBYTE_COLOR_TO_FLOAT_COLOR(b[2]);     \ | 
|---|
| 303 | f[3] = UBYTE_COLOR_TO_FLOAT_COLOR(b[3]);     \ | 
|---|
| 304 | } while(0) | 
|---|
| 305 |  | 
|---|
| 306 |  | 
|---|
| 307 | #define UBYTE_RGBA_TO_FLOAT_255_RGBA(f,b)               \ | 
|---|
| 308 | do {                                            \ | 
|---|
| 309 | f[0] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[0]); \ | 
|---|
| 310 | f[1] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[1]); \ | 
|---|
| 311 | f[2] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[2]); \ | 
|---|
| 312 | f[3] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[3]); \ | 
|---|
| 313 | } while(0) | 
|---|
| 314 |  | 
|---|
| 315 | #define FLOAT_RGBA_TO_UBYTE_RGBA(b,f)           \ | 
|---|
| 316 | do {                                            \ | 
|---|
| 317 | FLOAT_COLOR_TO_UBYTE_COLOR((b[0]),(f[0]));   \ | 
|---|
| 318 | FLOAT_COLOR_TO_UBYTE_COLOR((b[1]),(f[1]));   \ | 
|---|
| 319 | FLOAT_COLOR_TO_UBYTE_COLOR((b[2]),(f[2]));   \ | 
|---|
| 320 | FLOAT_COLOR_TO_UBYTE_COLOR((b[3]),(f[3]));   \ | 
|---|
| 321 | } while(0) | 
|---|
| 322 |  | 
|---|
| 323 | #define FLOAT_RGB_TO_UBYTE_RGB(b,f)             \ | 
|---|
| 324 | do {                                            \ | 
|---|
| 325 | FLOAT_COLOR_TO_UBYTE_COLOR(b[0],f[0]);       \ | 
|---|
| 326 | FLOAT_COLOR_TO_UBYTE_COLOR(b[1],f[1]);       \ | 
|---|
| 327 | FLOAT_COLOR_TO_UBYTE_COLOR(b[2],f[2]);       \ | 
|---|
| 328 | } while(0) | 
|---|
| 329 |  | 
|---|
| 330 |  | 
|---|
| 331 | extern void _mesa_init_math(void); | 
|---|
| 332 |  | 
|---|
| 333 |  | 
|---|
| 334 | #endif | 
|---|