source: trunk/src/opengl/mesa/mmath.h@ 4803

Last change on this file since 4803 was 3597, checked in by jeroen, 25 years ago

* empty log message *

File size: 9.4 KB
Line 
1/* $Id: mmath.h,v 1.2 2000-05-23 20:34:53 jeroen Exp $ */
2
3/*
4 * Mesa 3-D graphics library
5 * Version: 3.3
6 *
7 * Copyright (C) 1999 Brian Paul All Rights Reserved.
8 *
9 * Permission is hereby granted, free of charge, to any person obtaining a
10 * copy of this software and associated documentation files (the "Software"),
11 * to deal in the Software without restriction, including without limitation
12 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
13 * and/or sell copies of the Software, and to permit persons to whom the
14 * Software is furnished to do so, subject to the following conditions:
15 *
16 * The above copyright notice and this permission notice shall be included
17 * in all copies or substantial portions of the Software.
18 *
19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
20 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22 * BRIAN PAUL BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
23 * AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
24 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27
28/*
29 * Faster arithmetic functions. If the FAST_MATH preprocessor symbol is
30 * defined on the command line (-DFAST_MATH) then we'll use some (hopefully)
31 * faster functions for sqrt(), etc.
32 */
33
34
35#ifndef MMATH_H
36#define MMATH_H
37
38
39#include "glheader.h"
40
41
42/*
43 * Set the x86 FPU control word to guarentee only 32 bits of presision
44 * are stored in registers. Allowing the FPU to store more introduces
45 * differences between situations where numbers are pulled out of memory
46 * vs. situations where the compiler is able to optimize register usage.
47 *
48 * In the worst case, we force the compiler to use a memory access to
49 * truncate the float, by specifying the 'volatile' keyword.
50 */
51#if defined(__linux__) && defined(__i386__)
52#include <fpu_control.h>
53
54#if !defined(_FPU_SETCW)
55#define _FPU_SETCW __setfpucw
56typedef unsigned short fpu_control_t;
57#endif
58
59#if !defined(_FPU_GETCW)
60#define _FPU_GETCW(a) (a) = __fpu_control;
61#endif
62
63/* Set it up how we want it.
64 */
65#if !defined(NO_FAST_MATH)
66#define START_FAST_MATH(x) \
67 { \
68 static fpu_control_t mask = _FPU_SINGLE | _FPU_MASK_IM \
69 | _FPU_MASK_DM | _FPU_MASK_ZM | _FPU_MASK_OM \
70 | _FPU_MASK_UM | _FPU_MASK_PM; \
71 _FPU_GETCW( x ); \
72 _FPU_SETCW( mask ); \
73 }
74#else
75#define START_FAST_MATH(x) \
76 { \
77 static fpu_control_t mask = _FPU_DEFAULT; \
78 _FPU_GETCW( x ); \
79 _FPU_SETCW( mask ); \
80 }
81#endif
82
83/* Put it back how the application had it.
84 */
85#define END_FAST_MATH(x) \
86 { \
87 _FPU_SETCW( x ); \
88 }
89
90#define HAVE_FAST_MATH
91
92#elif defined(__WATCOMC__) && !defined(NO_FAST_MATH)
93
94/* This is the watcom specific inline assembly version of setcw and getcw */
95
96void START_FAST_MATH2(unsigned short *x);
97#pragma aux START_FAST_MATH2 = \
98 "fstcw word ptr [esi]" \
99 "or word ptr [esi], 0x3f" \
100 "fldcw word ptr [esi]" \
101 parm [esi] \
102 modify exact [];
103
104void END_FAST_MATH2(unsigned short *x);
105#pragma aux END_FAST_MATH2 = \
106 "fldcw word ptr [esi]" \
107 parm [esi] \
108 modify exact [];
109
110#define START_FAST_MATH(x) START_FAST_MATH2(& x)
111#define END_FAST_MATH(x) END_FAST_MATH2(& x)
112
113/*
114__inline START_FAST_MATH(unsigned short x)
115 {
116 _asm {
117 fstcw ax
118 mov x , ax
119 or ax, 0x3f
120 fldcw ax
121 }
122 }
123
124__inline END_FAST_MATH(unsigned short x)
125 {
126 _asm {
127 fldcw x
128 }
129 }
130*/
131#define HAVE_FAST_MATH
132
133#else
134#define START_FAST_MATH(x) (void)(x)
135#define END_FAST_MATH(x) (void)(x)
136
137/* The mac float really is a float, with the same precision as a
138 * single precision 387 float.
139 */
140#if defined(macintosh)
141#define HAVE_FAST_MATH
142#endif
143
144#endif
145
146
147
148/*
149 * Float -> Int conversion
150 */
151
152#if defined(USE_X86_ASM)
153#if defined(__GNUC__) && defined(__i386__)
154static __inline__ int FloatToInt(float f)
155{
156 int r;
157 __asm__ ("fistpl %0" : "=m" (r) : "t" (f) : "st");
158 return r;
159}
160#elif defined(__MSC__) && defined(__WIN32__)
161static __inline int FloatToInt(float f)
162{
163 int r;
164 _asm {
165 fld f
166 fistp r
167 }
168 return r;
169}
170#elif defined(__WATCOMC__)
171long FloatToInt(float f);
172#pragma aux FloatToInt = \
173 "push eax" \
174 "fistp dword ptr [esp]" \
175 "pop eax" \
176 parm [8087] \
177 value [eax] \
178 modify exact [eax];
179float asm_sqrt (float x);
180#pragma aux asm_sqrt = \
181 "fsqrt" \
182 parm [8087] \
183 value [8087] \
184 modify exact [];
185#else
186#define FloatToInt(F) ((int) (F))
187#endif
188#else
189#define FloatToInt(F) ((int) (F))
190#endif
191
192
193/*
194 * Square root
195 */
196
197extern float gl_sqrt(float x);
198
199#ifdef FAST_MATH
200#if defined (__WATCOMC__) && defined(USE_X86_ASM)
201# define GL_SQRT(X) asm_sqrt(X)
202#else
203# define GL_SQRT(X) gl_sqrt(X)
204#endif
205#else
206# define GL_SQRT(X) sqrt(X)
207#endif
208
209
210/*
211 * Normalize a 3-element vector to unit length.
212 */
213#define NORMALIZE_3FV( V ) \
214do { \
215 GLdouble len = LEN_SQUARED_3FV(V); \
216 if (len > 1e-50) { \
217 len = 1.0 / GL_SQRT(len); \
218 V[0] = (GLfloat) (V[0] * len); \
219 V[1] = (GLfloat) (V[1] * len); \
220 V[2] = (GLfloat) (V[2] * len); \
221 } \
222} while(0)
223
224#define LEN_3FV( V ) (GL_SQRT(V[0]*V[0]+V[1]*V[1]+V[2]*V[2]))
225
226#define LEN_SQUARED_3FV( V ) (V[0]*V[0]+V[1]*V[1]+V[2]*V[2])
227
228/*
229 * Optimization for:
230 * GLfloat f;
231 * GLubyte b = FloatToInt(CLAMP(f, 0, 1) * 255)
232 */
233
234#if defined(__i386__) || defined(__sparc__)
235#define USE_IEEE
236#endif
237
238#if defined(USE_IEEE) && !defined(DEBUG)
239
240#define IEEE_ONE 0x3f7f0000
241
242#define CLAMP_FLOAT_COLOR(f) \
243 do { \
244 if (*(GLuint *)&f >= IEEE_ONE) \
245 f = (*(GLint *)&f < 0) ? 0 : 1; \
246 } while(0)
247
248#define CLAMP_FLOAT_COLOR_VALUE(f) \
249 ( (*(GLuint *)&f >= IEEE_ONE) \
250 ? ((*(GLint *)&f < 0) ? 0 : 1) \
251 : f )
252
253/*
254 * This function/macro is sensitive to precision. Test carefully
255 * if you change it.
256 */
257#define FLOAT_COLOR_TO_UBYTE_COLOR(b, f) \
258 do { \
259 union { GLfloat r; GLuint i; } tmp; \
260 tmp.r = f; \
261 b = ((tmp.i >= IEEE_ONE) \
262 ? ((GLint)tmp.i < 0) ? (GLubyte)0 : (GLubyte)255 \
263 : (tmp.r = tmp.r*(255.0F/256.0F) + 32768.0F, \
264 (GLubyte)tmp.i)); \
265 } while (0)
266
267
268#define CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(b,f) \
269 FLOAT_COLOR_TO_UBYTE_COLOR(b, f)
270
271#else
272
273#define CLAMP_FLOAT_COLOR(f) \
274 (void) CLAMP_SELF(f,0,1)
275
276#define CLAMP_FLOAT_COLOR_VALUE(f) \
277 CLAMP(f,0,1)
278
279#define FLOAT_COLOR_TO_UBYTE_COLOR(b, f) \
280 b = ((GLubyte) FloatToInt(CLAMP(f, 0.0F, 1.0F) * 255.0F))
281
282#define CLAMPED_FLOAT_COLOR_TO_UBYTE_COLOR(b,f) \
283 b = ((GLubyte) FloatToInt(f * 255.0F))
284
285#endif
286
287
288extern float gl_ubyte_to_float_color_tab[256];
289extern float gl_ubyte_to_float_255_color_tab[256];
290#define UBYTE_COLOR_TO_FLOAT_COLOR(c) gl_ubyte_to_float_color_tab[c]
291
292#define UBYTE_COLOR_TO_FLOAT_255_COLOR(c) gl_ubyte_to_float_255_color_tab[c]
293
294#define UBYTE_COLOR_TO_FLOAT_255_COLOR2(f,c) \
295 (*(int *)&(f)) = ((int *)gl_ubyte_to_float_255_color_tab)[c]
296
297
298#define UBYTE_RGBA_TO_FLOAT_RGBA(f,b) \
299do { \
300 f[0] = UBYTE_COLOR_TO_FLOAT_COLOR(b[0]); \
301 f[1] = UBYTE_COLOR_TO_FLOAT_COLOR(b[1]); \
302 f[2] = UBYTE_COLOR_TO_FLOAT_COLOR(b[2]); \
303 f[3] = UBYTE_COLOR_TO_FLOAT_COLOR(b[3]); \
304} while(0)
305
306
307#define UBYTE_RGBA_TO_FLOAT_255_RGBA(f,b) \
308do { \
309 f[0] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[0]); \
310 f[1] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[1]); \
311 f[2] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[2]); \
312 f[3] = UBYTE_COLOR_TO_FLOAT_255_COLOR(b[3]); \
313} while(0)
314
315#define FLOAT_RGBA_TO_UBYTE_RGBA(b,f) \
316do { \
317 FLOAT_COLOR_TO_UBYTE_COLOR((b[0]),(f[0])); \
318 FLOAT_COLOR_TO_UBYTE_COLOR((b[1]),(f[1])); \
319 FLOAT_COLOR_TO_UBYTE_COLOR((b[2]),(f[2])); \
320 FLOAT_COLOR_TO_UBYTE_COLOR((b[3]),(f[3])); \
321} while(0)
322
323#define FLOAT_RGB_TO_UBYTE_RGB(b,f) \
324do { \
325 FLOAT_COLOR_TO_UBYTE_COLOR(b[0],f[0]); \
326 FLOAT_COLOR_TO_UBYTE_COLOR(b[1],f[1]); \
327 FLOAT_COLOR_TO_UBYTE_COLOR(b[2],f[2]); \
328} while(0)
329
330
331extern void _mesa_init_math(void);
332
333
334#endif
Note: See TracBrowser for help on using the repository browser.