1 | /* $Id: mmath.h,v 1.2 2000-05-23 20:34:53 jeroen Exp $ */
|
---|
2 |
|
---|
3 | /*
|
---|
4 | * Mesa 3-D graphics library
|
---|
5 | * Version: 3.3
|
---|
6 | *
|
---|
7 | * Copyright (C) 1999 Brian Paul All Rights Reserved.
|
---|
8 | *
|
---|
9 | * Permission is hereby granted, free of charge, to any person obtaining a
|
---|
10 | * copy of this software and associated documentation files (the "Software"),
|
---|
11 | * to deal in the Software without restriction, including without limitation
|
---|
12 | * the rights to use, copy, modify, merge, publish, distribute, sublicense,
|
---|
13 | * and/or sell copies of the Software, and to permit persons to whom the
|
---|
14 | * Software is furnished to do so, subject to the following conditions:
|
---|
15 | *
|
---|
16 | * The above copyright notice and this permission notice shall be included
|
---|
17 | * in all copies or substantial portions of the Software.
|
---|
18 | *
|
---|
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
|
---|
20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
---|
21 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
---|
22 | * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
|
---|
23 | * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
|
---|
24 | * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
---|
25 | */
|
---|
26 |
|
---|
27 |
|
---|
28 | /*
|
---|
29 | * Faster arithmetic functions. If the FAST_MATH preprocessor symbol is
|
---|
30 | * defined on the command line (-DFAST_MATH) then we'll use some (hopefully)
|
---|
31 | * faster functions for sqrt(), etc.
|
---|
32 | */
|
---|
33 |
|
---|
34 |
|
---|
35 | #ifndef MMATH_H
|
---|
36 | #define MMATH_H
|
---|
37 |
|
---|
38 |
|
---|
39 | #include "glheader.h"
|
---|
40 |
|
---|
41 |
|
---|
42 | /*
|
---|
43 | * Set the x86 FPU control word to guarentee only 32 bits of presision
|
---|
44 | * are stored in registers. Allowing the FPU to store more introduces
|
---|
45 | * differences between situations where numbers are pulled out of memory
|
---|
46 | * vs. situations where the compiler is able to optimize register usage.
|
---|
47 | *
|
---|
48 | * In the worst case, we force the compiler to use a memory access to
|
---|
49 | * truncate the float, by specifying the 'volatile' keyword.
|
---|
50 | */
|
---|
51 | #if defined(__linux__) && defined(__i386__)
|
---|
52 | #include <fpu_control.h>
|
---|
53 |
|
---|
54 | #if !defined(_FPU_SETCW)
|
---|
55 | #define _FPU_SETCW __setfpucw
|
---|
56 | typedef unsigned short fpu_control_t;
|
---|
57 | #endif
|
---|
58 |
|
---|
59 | #if !defined(_FPU_GETCW)
|
---|
60 | #define _FPU_GETCW(a) (a) = __fpu_control;
|
---|
61 | #endif
|
---|
62 |
|
---|
63 | /* Set it up how we want it.
|
---|
64 | */
|
---|
65 | #if !defined(NO_FAST_MATH)
|
---|
66 | #define START_FAST_MATH(x) \
|
---|
67 | { \
|
---|
68 | static fpu_control_t mask = _FPU_SINGLE | _FPU_MASK_IM \
|
---|
69 | | _FPU_MASK_DM | _FPU_MASK_ZM | _FPU_MASK_OM \
|
---|
70 | | _FPU_MASK_UM | _FPU_MASK_PM; \
|
---|
71 | _FPU_GETCW( x ); \
|
---|
72 | _FPU_SETCW( mask ); \
|
---|
73 | }
|
---|
74 | #else
|
---|
75 | #define START_FAST_MATH(x) \
|
---|
76 | { \
|
---|
77 | static fpu_control_t mask = _FPU_DEFAULT; \
|
---|
78 | _FPU_GETCW( x ); \
|
---|
79 | _FPU_SETCW( mask ); \
|
---|
80 | }
|
---|
81 | #endif
|
---|
82 |
|
---|
83 | /* Put it back how the application had it.
|
---|
84 | */
|
---|
85 | #define END_FAST_MATH(x) \
|
---|
86 | { \
|
---|
87 | _FPU_SETCW( x ); \
|
---|
88 | }
|
---|
89 |
|
---|
90 | #define HAVE_FAST_MATH
|
---|
91 |
|
---|
92 | #elif defined(__WATCOMC__) && !defined(NO_FAST_MATH)
|
---|
93 |
|
---|
94 | /* This is the watcom specific inline assembly version of setcw and getcw */
|
---|
95 |
|
---|
96 | void START_FAST_MATH2(unsigned short *x);
|
---|
97 | #pragma aux START_FAST_MATH2 = \
|
---|
98 | "fstcw word ptr [esi]" \
|
---|
99 | "or word ptr [esi], 0x3f" \
|
---|
100 | "fldcw word ptr [esi]" \
|
---|
101 | parm [esi] \
|
---|
102 | modify exact [];
|
---|
103 |
|
---|
104 | void END_FAST_MATH2(unsigned short *x);
|
---|
105 | #pragma aux END_FAST_MATH2 = \
|
---|
106 | "fldcw word ptr [esi]" \
|
---|
107 | parm [esi] \
|
---|
108 | modify exact [];
|
---|
109 |
|
---|
110 | #define START_FAST_MATH(x) START_FAST_MATH2(& x)
|
---|
111 | #define END_FAST_MATH(x) END_FAST_MATH2(& x)
|
---|
112 |
|
---|
113 | /*
|
---|
114 | __inline START_FAST_MATH(unsigned short x)
|
---|
115 | {
|
---|
116 | _asm {
|
---|
117 | fstcw ax
|
---|
118 | mov x , ax
|
---|
119 | or ax, 0x3f
|
---|
120 | fldcw ax
|
---|
121 | }
|
---|
122 | }
|
---|
123 |
|
---|
124 | __inline END_FAST_MATH(unsigned short x)
|
---|
125 | {
|
---|
126 | _asm {
|
---|
127 | fldcw x
|
---|
128 | }
|
---|
129 | }
|
---|
130 | */
|
---|
131 | #define HAVE_FAST_MATH
|
---|
132 |
|
---|
133 | #else
|
---|
134 | #define START_FAST_MATH(x) (void)(x)
|
---|
135 | #define END_FAST_MATH(x) (void)(x)
|
---|
136 |
|
---|
137 | /* The mac float really is a float, with the same precision as a
|
---|
138 | * single precision 387 float.
|
---|
139 | */
|
---|
140 | #if defined(macintosh)
|
---|
141 | #define HAVE_FAST_MATH
|
---|
142 | #endif
|
---|
143 |
|
---|
144 | #endif
|
---|
145 |
|
---|
146 |
|
---|
147 |
|
---|
148 | /*
|
---|
149 | * Float -> Int conversion
|
---|
150 | */
|
---|
151 |
|
---|
152 | #if defined(USE_X86_ASM)
|
---|
153 | #if defined(__GNUC__) && defined(__i386__)
|
---|
154 | static __inline__ int FloatToInt(float f)
|
---|
155 | {
|
---|
156 | int r;
|
---|
157 | __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st");
|
---|
158 | return r;
|
---|
159 | }
|
---|
160 | #elif defined(__MSC__) && defined(__WIN32__)
|
---|
161 | static __inline int FloatToInt(float f)
|
---|
162 | {
|
---|
163 | int r;
|
---|
164 | _asm {
|
---|
165 | fld f
|
---|
166 | fistp r
|
---|
167 | }
|
---|
168 | return r;
|
---|
169 | }
|
---|
170 | #elif defined(__WATCOMC__)
|
---|
171 | long FloatToInt(float f);
|
---|
172 | #pragma aux FloatToInt = \
|
---|
173 | "push eax" \
|
---|
174 | "fistp dword ptr [esp]" \
|
---|
175 | "pop eax" \
|
---|
176 | parm [8087] \
|
---|
177 | value [eax] \
|
---|
178 | modify exact [eax];
|
---|
179 | float asm_sqrt (float x);
|
---|
180 | #pragma aux asm_sqrt = \
|
---|
181 | "fsqrt" \
|
---|
182 | parm [8087] \
|
---|
183 | value [8087] \
|
---|
184 | modify exact [];
|
---|
185 | #else
|
---|
186 | #define FloatToInt(F) ((int) (F))
|
---|
187 | #endif
|
---|
188 | #else
|
---|
189 | #define FloatToInt(F) ((int) (F))
|
---|
190 | #endif
|
---|
191 |
|
---|
192 |
|
---|
193 | /*
|
---|
194 | * Square root
|
---|
195 | */
|
---|
196 |
|
---|
197 | extern float gl_sqrt(float x);
|
---|
198 |
|
---|
199 | #ifdef FAST_MATH
|
---|
200 | #if defined (__WATCOMC__) && defined(USE_X86_ASM)
|
---|
201 | # define GL_SQRT(X) asm_sqrt(X)
|
---|
202 | #else
|
---|
203 | # define GL_SQRT(X) gl_sqrt(X)
|
---|
204 | #endif
|
---|
205 | #else
|
---|
206 | # define GL_SQRT(X) sqrt(X)
|
---|
207 | #endif
|
---|
208 |
|
---|
209 |
|
---|
210 | /*
|
---|
211 | * Normalize a 3-element vector to unit length.
|
---|
212 | */
|
---|
213 | #define NORMALIZE_3FV( V ) \
|
---|
214 | do { \
|
---|
215 | GLdouble len = LEN_SQUARED_3FV(V); \
|
---|
216 | if (len > 1e-50) { \
|
---|
217 | len = 1.0 / GL_SQRT(len); \
|
---|
218 | V[0] = (GLfloat) (V[0] * len); \
|
---|
219 | V[1] = (GLfloat) (V[1] * len); \
|
---|
220 | V[2] = (GLfloat) (V[2] * len); \
|
---|
221 | } \
|
---|
222 | } while(0)
|
---|
223 |
|
---|
224 | #define LEN_3FV( V ) (GL_SQRT(V[0]*V[0]+V[1]*V[1]+V[2]*V[2]))
|
---|
225 |
|
---|
226 | #define LEN_SQUARED_3FV( V ) (V[0]*V[0]+V[1]*V[1]+V[2]*V[2])
|
---|
227 |
|
---|
228 | /*
|
---|
229 | * Optimization for:
|
---|
230 | * GLfloat f;
|
---|
231 | * GLubyte b = FloatToInt(CLAMP(f, 0, 1) * 255)
|
---|
232 | */
|
---|
233 |
|
---|
234 | #if defined(__i386__) || defined(__sparc__)
|
---|
235 | #define USE_IEEE
|
---|
236 | #endif
|
---|
237 |
|
---|
238 | #if defined(USE_IEEE) && !defined(DEBUG)
|
---|
239 |
|
---|
240 | #define IEEE_ONE 0x3f7f0000
|
---|
241 |
|
---|
242 | #define CLAMP_FLOAT_COLOR(f) \
|
---|
243 | do { \
|
---|
244 | if (*(GLuint *)&f >= IEEE_ONE) \
|
---|
245 | f = (*(GLint *)&f < 0) ? 0 : 1; \
|
---|
246 | } while(0)
|
---|
247 |
|
---|
248 | #define CLAMP_FLOAT_COLOR_VALUE(f) \
|
---|
249 | ( (*(GLuint *)&f >= IEEE_ONE) \
|
---|
250 | ? ((*(GLint *)&f < 0) ? 0 : 1) \
|
---|
251 | : f )
|
---|
252 |
|
---|
253 | /*
|
---|
254 | * This function/macro is sensitive to precision. Test carefully
|
---|
255 | * if you change it.
|
---|
256 | */
|
---|
257 | #define FLOAT_COLOR_TO_UBYTE_COLOR(b, f) \
|
---|
258 | do { \
|
---|
259 | union { GLfloat r; GLuint i; } tmp; \
|
---|
260 | tmp.r = f; \
|
---|
261 | b = ((tmp.i >= IEEE_ONE) \
|
---|
262 | ? ((GLint)tmp.i < 0) ? (GLubyte)0 : (GLubyte)255 \
|
---|
263 | : (tmp.r = tmp.r*(255.0F/256.0F) + 32768.0F, \
|
---|
264 | (GLubyte)tmp.i)); \
|
---|
265 | } while (0)
|
---|
266 |
|
---|
267 |
|
---|
268 | #define CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(b,f) \
|
---|
269 | FLOAT_COLOR_TO_UBYTE_COLOR(b, f)
|
---|
270 |
|
---|
271 | #else
|
---|
272 |
|
---|
273 | #define CLAMP_FLOAT_COLOR(f) \
|
---|
274 | (void) CLAMP_SELF(f,0,1)
|
---|
275 |
|
---|
276 | #define CLAMP_FLOAT_COLOR_VALUE(f) \
|
---|
277 | CLAMP(f,0,1)
|
---|
278 |
|
---|
279 | #define FLOAT_COLOR_TO_UBYTE_COLOR(b, f) \
|
---|
280 | b = ((GLubyte) FloatToInt(CLAMP(f, 0.0F, 1.0F) * 255.0F))
|
---|
281 |
|
---|
282 | #define CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(b,f) \
|
---|
283 | b = ((GLubyte) FloatToInt(f * 255.0F))
|
---|
284 |
|
---|
285 | #endif
|
---|
286 |
|
---|
287 |
|
---|
288 | extern float gl_ubyte_to_float_color_tab[256];
|
---|
289 | extern float gl_ubyte_to_float_255_color_tab[256];
|
---|
290 | #define UBYTE_COLOR_TO_FLOAT_COLOR(c) gl_ubyte_to_float_color_tab[c]
|
---|
291 |
|
---|
292 | #define UBYTE_COLOR_TO_FLOAT_255_COLOR(c) gl_ubyte_to_float_255_color_tab[c]
|
---|
293 |
|
---|
294 | #define UBYTE_COLOR_TO_FLOAT_255_COLOR2(f,c) \
|
---|
295 | (*(int *)&(f)) = ((int *)gl_ubyte_to_float_255_color_tab)[c]
|
---|
296 |
|
---|
297 |
|
---|
298 | #define UBYTE_RGBA_TO_FLOAT_RGBA(f,b) \
|
---|
299 | do { \
|
---|
300 | f[0] = UBYTE_COLOR_TO_FLOAT_COLOR(b[0]); \
|
---|
301 | f[1] = UBYTE_COLOR_TO_FLOAT_COLOR(b[1]); \
|
---|
302 | f[2] = UBYTE_COLOR_TO_FLOAT_COLOR(b[2]); \
|
---|
303 | f[3] = UBYTE_COLOR_TO_FLOAT_COLOR(b[3]); \
|
---|
304 | } while(0)
|
---|
305 |
|
---|
306 |
|
---|
307 | #define UBYTE_RGBA_TO_FLOAT_255_RGBA(f,b) \
|
---|
308 | do { \
|
---|
309 | f[0] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[0]); \
|
---|
310 | f[1] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[1]); \
|
---|
311 | f[2] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[2]); \
|
---|
312 | f[3] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[3]); \
|
---|
313 | } while(0)
|
---|
314 |
|
---|
315 | #define FLOAT_RGBA_TO_UBYTE_RGBA(b,f) \
|
---|
316 | do { \
|
---|
317 | FLOAT_COLOR_TO_UBYTE_COLOR((b[0]),(f[0])); \
|
---|
318 | FLOAT_COLOR_TO_UBYTE_COLOR((b[1]),(f[1])); \
|
---|
319 | FLOAT_COLOR_TO_UBYTE_COLOR((b[2]),(f[2])); \
|
---|
320 | FLOAT_COLOR_TO_UBYTE_COLOR((b[3]),(f[3])); \
|
---|
321 | } while(0)
|
---|
322 |
|
---|
323 | #define FLOAT_RGB_TO_UBYTE_RGB(b,f) \
|
---|
324 | do { \
|
---|
325 | FLOAT_COLOR_TO_UBYTE_COLOR(b[0],f[0]); \
|
---|
326 | FLOAT_COLOR_TO_UBYTE_COLOR(b[1],f[1]); \
|
---|
327 | FLOAT_COLOR_TO_UBYTE_COLOR(b[2],f[2]); \
|
---|
328 | } while(0)
|
---|
329 |
|
---|
330 |
|
---|
331 | extern void _mesa_init_math(void);
|
---|
332 |
|
---|
333 |
|
---|
334 | #endif
|
---|