source: trunk/src/opengl/glide/cvg/texus/nccnnet.c

Last change on this file was 6653, checked in by bird, 24 years ago

Added $Id:$ keyword.

File size: 15.8 KB
Line 
1/* $Id: nccnnet.c,v 1.2 2001-09-05 14:30:45 bird Exp $ */
2
3/*
4** THIS SOFTWARE IS SUBJECT TO COPYRIGHT PROTECTION AND IS OFFERED ONLY
5** PURSUANT TO THE 3DFX GLIDE GENERAL PUBLIC LICENSE. THERE IS NO RIGHT
6** TO USE THE GLIDE TRADEMARK WITHOUT PRIOR WRITTEN PERMISSION OF 3DFX
7** INTERACTIVE, INC. A COPY OF THIS LICENSE MAY BE OBTAINED FROM THE
8** DISTRIBUTOR OR BY CONTACTING 3DFX INTERACTIVE INC(info@3dfx.com).
9** THIS PROGRAM IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
10** EXPRESSED OR IMPLIED. SEE THE 3DFX GLIDE GENERAL PUBLIC LICENSE FOR A
11** FULL TEXT OF THE NON-WARRANTY PROVISIONS.
12**
13** USE, DUPLICATION OR DISCLOSURE BY THE GOVERNMENT IS SUBJECT TO
14** RESTRICTIONS AS SET FORTH IN SUBDIVISION (C)(1)(II) OF THE RIGHTS IN
15** TECHNICAL DATA AND COMPUTER SOFTWARE CLAUSE AT DFARS 252.227-7013,
16** AND/OR IN SIMILAR OR SUCCESSOR CLAUSES IN THE FAR, DOD OR NASA FAR
17** SUPPLEMENT. UNPUBLISHED RIGHTS RESERVED UNDER THE COPYRIGHT LAWS OF
18** THE UNITED STATES.
19**
20** COPYRIGHT 3DFX INTERACTIVE, INC. 1999, ALL RIGHTS RESERVED
21**
22** $Revision: 1.2 $
23** $Date: 2001-09-05 14:30:45 $
24*/
25#include <stdio.h>
26#include <stdlib.h>
27#include <string.h>
28#include <math.h>
29
30#include "texusint.h"
31
32/*
33 * This file implements the neural net quantizer, which takes an image in
34 * ARGB8888 format and produces an optimal YAB table, and an 8 bit image
35 * in YAB format that best represents the original image. A very detailed
36 * explanation of the algorithm is available in
37 * /tdfx/engr/devel/sst1/docs/yab.doc. The summary follows.
38 *
39 * Neural net algorithms first determine a "representative sample" of the
40 * input image. This representative sample is repeatedly run through the net
41 * during the network learning stage, and the neural net "learns" which colors
42 * are important and which ones are not. It's quite possible to feed every
43 * pixel in the original image repeatedly into the neural net
44 * to make it learn; however, this can be extremely time consuming.
45 *
46 * So, we prefer to make a representative sample of colors for the input image.
47 * The original yab.doc suggests we try to derive sample colors by reducing the
48 * ARGB8888 colors to RGB555. I've found that simply color quantizing to 256
49 * colors (just like for the 8-bit palettized case) works quite well, and
50 * so we first quantize to 8 bit palette, and use the palette as the sample
51 * colors to feed the neural network. This also makes using 256-palette
52 * textures very easy.
53 *
54 * After the "representative colors" are determined, we train the neural net,
55 * and obtain the optimal YAB table. Each sample color in the palette,
56 * which was originally in ARGB8888 format is now replaced with IRGB8888,
57 * where the RGB is the same as before, but the alpha channel is replaced
58 * with an 8 bit number I which is the YAB index corresponding to this
59 * representative color.
60 *
61 * So now it's possible to translate the original image into an 8 bit image
62 * by first looking up the original pixel in the representative colors table,
63 * extracting the alpha channel, and using it as the index into the YAB table
64 * delivered by the algorithm.
65 *
66 * In the process of converting the original image to the YAB format, we could
67 * optionally dither the image. Ordered dithering doesn't quite work, so we
68 * use error-diffusion dithering.
69 *
70 * I've found that there are three speed bottlenecks to overcome. The first
71 * time consuming operation is the computation of representative image colors.
72 * 256 color quantization is used for this part. The second bottleneck is the
73 * training of the neural net algorithm itself, and I've optimized this as
74 * much as possible. The third bottleneck is the translation of the original
75 * image into the 8 bit YAB indexed image; this still needs work, especially
76 * when error diffusion dithering is enabled.
77 *
78 * So, now, onto business.
79 */
80
81/******************************************************************************
82 *
83 * The hardcore neural net stuff begins here.
84 *
85 */
86
87#define MAX_ITERATIONS 4000
88#define MAX_DRYSPELLS 2000
89#define MAX_NEURONS 256
90
91typedef struct _weight {
92 long r, g, b; // fixed point, SUBPIXEL precision bits
93 int ir, ig, ib; // pure integers, maybe -256 to 255.
94} Weight;
95
96typedef struct _vector {
97 Weight *py, *pa, *pb;
98 long r, g, b; // pure integers, 0 to 255.
99} Neuron;
100
101static Weight Y[16], A[4], B[4];
102static Neuron N[MAX_NEURONS];
103static long errR, errG, errB, errMax;
104static long totR, totG, totB;
105
106
107#define SUBPIXEL 22
108#define ABS(x) (((x) < 0) ? (-(x)) : (x))
109#define INT(x) ((x) >> SUBPIXEL)
110#define CLAMP_255(x) if (x < 0) x = 0; else if (x > 255) x = 255
111#define CLAMP_PLUS(x) if (x < 0) x = 0; else if (x > ((256 << SUBPIXEL)-1)) \
112 x = ((256 << SUBPIXEL) -1)
113#define CLAMP_BOTH(x) if (x < (-256 << SUBPIXEL)) x = (-256 << SUBPIXEL); \
114 else if (x > ((256 << SUBPIXEL) - 1)) \
115 x = ((256 << SUBPIXEL) -1)
116
117static int
118_nn_modifyNeurons(long ir, long ig, long ib)
119{
120 int i;
121 int d0, d1; // closest & next closest distance to input
122 int p0, p1; // index into the 256 color table.
123 long d, dr, dg, db;
124 Weight *py, *pa, *pb;
125 Neuron *n;
126
127 d0 = d1 = 0x7fffffff;
128 p0 = p1 = 0;
129
130 /* Find 2 neurons with smallest distances to r, g, b */
131 for (i=0, n=N; i<256; i++, n++) {
132 py = n->py;
133 pa = n->pa;
134 pb = n->pb;
135
136 n->r = py->ir + pa->ir + pb->ir; CLAMP_255(n->r);
137 n->g = py->ir + pa->ig + pb->ig; CLAMP_255(n->g);
138 n->b = py->ir + pa->ib + pb->ib; CLAMP_255(n->b);
139
140 d = DISTANCE(n->r, n->g, n->b, ir, ig, ib);
141 if (d < d0) {
142 d1 = d0; d0 = d;
143 p1 = p0; p0 = i;
144
145 } else if (d < d1) {
146 d1 = d;
147 p1 = i;
148 }
149 }
150
151 /* track errors */
152 dr = ABS(N[p0].r - ir);
153 dg = ABS(N[p0].g - ig);
154 db = ABS(N[p0].b - ib);
155
156 totR += dr;
157 totG += dg;
158 totB += db;
159
160 if (errMax < d0) {
161 errMax = d0;
162 errR = dr;
163 errG = dg;
164 errB = db;
165 }
166
167
168 // Modify weights for d0 and d1 at positions p0 and p1.
169
170 // The 16 comes from 8.16 format, 4 comes from alpha, the learing rate, set
171 // at 1/16 for now.
172
173 // py->r += (ar * 0.25f + ag * 0.5f + ab * 0.25f) * alpha;
174 // pa->r += ar * 0.25f * alpha;
175 // pb->r += ar * 0.25f * alpha;
176
177
178 // For the closest neuron, apply maximal learning rate.
179 dr = (ir - N[p0].r) << (SUBPIXEL - 1);
180 dg = (ig - N[p0].g) << (SUBPIXEL - 1);
181 db = (ib - N[p0].b) << (SUBPIXEL - 1);
182 py = N[p0].py;
183 pa = N[p0].pa;
184 pb = N[p0].pb;
185 py->r += (dr >> 2) + (dg >> 1) + (db >> 2); CLAMP_PLUS(py->r);
186 pa->r += (dr >> 2) ; CLAMP_BOTH(pa->r);
187 pa->g += (dg >> 2) ; CLAMP_BOTH(pa->g);
188 pa->b += (db >> 2) ; CLAMP_BOTH(pa->b);
189 pb->r += (dr >> 2) ; CLAMP_BOTH(pb->r);
190 pb->g += (dg >> 2) ; CLAMP_BOTH(pb->g);
191 pb->b += (db >> 2) ; CLAMP_BOTH(pb->b);
192
193 py->ir = INT(py->r);
194 pa->ir = INT(pa->r); pa->ig = INT(pa->g); pa->ib = INT(pa->b);
195 pb->ir = INT(pb->r); pb->ig = INT(pb->g); pb->ib = INT(pb->b);
196
197 // For the next closest neuron, apply 1/4 the learning rate.
198 dr = (ir - N[p1].r) << (SUBPIXEL - 2);
199 dg = (ig - N[p1].g) << (SUBPIXEL - 2);
200 db = (ib - N[p1].b) << (SUBPIXEL - 2);
201 py = N[p1].py;
202 pa = N[p1].pa;
203 pb = N[p1].pb;
204 py->r += (dr >> 2) + (dg >> 1) + (db >> 2); CLAMP_PLUS(py->r);
205 pa->r += (dr >> 2) ; CLAMP_BOTH(pa->r);
206 pa->g += (dg >> 2) ; CLAMP_BOTH(pa->g);
207 pa->b += (db >> 2) ; CLAMP_BOTH(pa->b);
208 pb->r += (dr >> 2) ; CLAMP_BOTH(pb->r);
209 pb->g += (dg >> 2) ; CLAMP_BOTH(pb->g);
210 pb->b += (db >> 2) ; CLAMP_BOTH(pb->b);
211
212 py->ir = INT(py->r);
213 pa->ir = INT(pa->r); pa->ig = INT(pa->g); pa->ib = INT(pa->b);
214 pb->ir = INT(pb->r); pb->ig = INT(pb->g); pb->ib = INT(pb->b);
215
216 return p0; // best fit neuron is returned.
217}
218
219static void
220_nn_initTables()
221{
222 int i;
223
224 // Intensity ramp
225 for (i=0; i<16; i++) {
226 Y[i].r = ((long) ((255.0f * i)/15.0f)) << SUBPIXEL;
227 Y[i].ir = INT(Y[i].r);
228 }
229
230 for (i=0; i< 4; i++) {
231
232 A[i].r = A[i].g = A[i].b = 0;
233 A[i].ir = INT(A[i].r);
234 A[i].ig = INT(A[i].g);
235 A[i].ib = INT(A[i].b);
236
237 B[i].r = B[i].g = B[i].b = 0;
238 B[i].ir = INT(B[i].r);
239 B[i].ig = INT(B[i].g);
240 B[i].ib = INT(B[i].b);
241 }
242
243 for (i=0; i<MAX_NEURONS; i++) {
244 int iy, ia, ib;
245
246 iy = (i >> 4) & 0x0f;
247 ia = (i >> 2) & 0x03;
248 ib = (i >> 0) & 0x03;
249
250 N[i].py = &Y[iy];
251 N[i].pa = &A[ia];
252 N[i].pb = &B[ib];
253
254 N[i].r = INT(Y[iy].r)+INT(A[ia].r)+INT(B[ib].r);CLAMP_255(N[i].r);
255 N[i].g = INT(Y[iy].r)+INT(A[ia].g)+INT(B[ib].g);CLAMP_255(N[i].g);
256 N[i].b = INT(Y[iy].r)+INT(A[ia].b)+INT(B[ib].b);CLAMP_255(N[i].b);
257 }
258}
259
260static int order[256];
261static int
262_nn_randomOrder(const void *a, const void *b)
263{
264 a = b; // no compiler warnings
265 return (rand() % 3) - 1;
266}
267
268static void
269txMapPal256toYAB(FxU32 *YAB, FxU8 *map, int nsamples, FxU32 *samples)
270{
271 int i;
272 long bstR, bstG, bstB, bstMax;
273 int iterations; // track how many inputs have been fed to NN
274 int drySpells; // how many inputs since last best case.
275 long yab2pal[256];
276
277 _nn_initTables();
278 /*
279 * Select a number which is relatively prime to nsamples.
280 */
281 for (i=0; i<nsamples; i++) order[i] = i;
282 qsort(order, nsamples, sizeof(int), _nn_randomOrder);
283
284 iterations = drySpells = 0;
285 bstMax = 0x7fffffff;
286
287 while ((iterations < MAX_ITERATIONS) && (drySpells < MAX_DRYSPELLS)) {
288
289 // An epoch:
290 // A whole cycle of inputs is presented to the network in an epoch.
291
292 errR = errG = errB = errMax = 0;
293 totR = totG = totB = 0;
294
295 for (i = 0; i< nsamples; i++) {
296 FxU32 *pRGB;
297
298 // We present the samples randomly to the network.
299 // _nn_modify_neurons() makes the neurons learn
300 // errR, errG, errB, errMax are computed in _nn_modifyNeurons(), as
301 // are totR, totG, totB (accumulative errors).
302
303 pRGB = (FxU32 *) (&samples[order[i]]);
304 (void) _nn_modifyNeurons(((*pRGB >> 16) & 0xFF),
305 ((*pRGB >> 8) & 0xFF), ((*pRGB) & 0xFF));
306 }
307
308 iterations += nsamples;
309
310 if (errMax < bstMax) {
311 /*
312 * A lower total error than before, take a Snapshot
313 *
314 * YAB[] has 16 entries for Y, 12 entries each for A & B.
315 */
316 for (i=0; i<16; i++) {
317 YAB[i] = Y[i].ir;
318 if ((Y[i].ir < 0) || (Y[i].ir > 255)) {
319 txPanic("Bad Y!\n");
320 }
321 }
322
323 for (i=0; i<4; i++) {
324 YAB[16 + 3*i + 0] = A[i].ir;
325 YAB[16 + 3*i + 1] = A[i].ig;
326 YAB[16 + 3*i + 2] = A[i].ib;
327
328 if ((A[i].ir < -256) || (A[i].ir > 255) ||
329 (A[i].ig < -256) || (A[i].ig > 255) ||
330 (A[i].ib < -256) || (A[i].ib > 255)) txPanic("Bad A!\n");
331 }
332 for (i=0; i<4; i++) {
333 YAB[28 + 3*i + 0] = B[i].ir;
334 YAB[28 + 3*i + 1] = B[i].ig;
335 YAB[28 + 3*i + 2] = B[i].ib;
336
337 if ((B[i].ir < -256) || (B[i].ir > 255) ||
338 (B[i].ig < -256) || (B[i].ig > 255) ||
339 (B[i].ib < -256) || (B[i].ib > 255)) txPanic("Bad B!\n");
340 }
341
342 bstMax = errMax;
343 bstR = errR;
344 bstG = errG;
345 bstB = errB;
346#if 0
347 printf("%8d%, dry=%8d, eMax=%8x eMax=%3d%3d%3d eAvg=%3d%3d%3d\n",
348 iterations, drySpells, errMax,
349 errG, errR, errB,
350 totG/nsamples, totR/nsamples, totB/nsamples
351 );
352#endif
353 drySpells = 0;
354 }
355 else {
356 drySpells += nsamples;
357 }
358
359 if (errMax == 0) {
360 // printf("******Exact Solution in %d iterations\n", iterations);
361 // _nn_Dump();
362 break;
363 }
364 }
365
366 /*
367 * At this point YAB has the YAB table, samples has input palette,
368 * Replace MSB of samples with index to be used with YAB table.
369 */
370
371 txYABtoPal256((long*)yab2pal, (long*)YAB);
372
373 for (i=0; i<nsamples; i++) {
374 int ir, ig, ib;
375
376 // The input color, and add diffused errors to these.
377 ir = (samples[i] >> 16) & 0xFF;
378 ig = (samples[i] >> 8) & 0xFF;
379 ib = (samples[i] ) & 0xFF;
380
381 // Find closest color in the yab2pal tables
382 map[i] = (FxU8) txNearestColor(ir, ig, ib, (const FxU32 *)yab2pal, 256);
383 }
384}
385
386void
387txMipNccNNet(TxMip *pxMip, TxMip *txMip, int format, FxU32 dither, FxU32 comp)
388{
389 int i, w, h;
390 int ncolors;
391 int pixsize = (pxMip->format == GR_TEXFMT_YIQ_422) ? 1 : 2;
392 long yabTable[16+12+12];
393 FxU8 map[256];
394
395
396 /*
397 * Get a 256 color palette, to be used as samples
398 * Incidentally, convert src 32 bit image to dst 8 bit indexed image,
399 * with indices referring to the 256 color palette.
400 * Also incidentally, pack the alpha channel if necessary.
401 */
402 if( txVerbose )
403 {
404 printf("NCC Neural nets..."); fflush(stdout);
405 }
406 pxMip->format = (format == GR_TEXFMT_YIQ_422) ? GR_TEXFMT_P_8 :
407 GR_TEXFMT_AP_88;
408 ncolors = txMipPal256(pxMip, txMip, pxMip->format, 0, 0);
409 if( txVerbose )
410 {
411 printf("%d samples...", ncolors); fflush(stdout);
412 }
413 txMapPal256toYAB((FxU32 *)yabTable, (FxU8 *)map, ncolors, (FxU32 *)pxMip->pal);
414 if( txVerbose )
415 {
416 printf("eMax=(%3d%3d%3d)...eAvg=(%3d%3d%3d)\n",
417 errG, errR, errB,
418 totG/ncolors, totR/ncolors, totB/ncolors
419 );
420 }
421
422
423 if ((dither & TX_DITHER_MASK) != TX_DITHER_NONE) {
424 /*
425 * At this point, we can lose the 256 color palette, and replace it with
426 * the 256 color palette generated from the YAB table. This will be
427 * useful for error diffusion dithering.
428 */
429 txYABtoPal256((long *)pxMip->pal, (long *)yabTable);
430 txDiffuseIndex(pxMip, txMip, pixsize, pxMip->pal, 256);
431 }
432 else {
433
434 /* Translate image to YAB format */
435 w = txMip->width;
436 h = txMip->height;
437 for (i=0; i< txMip->depth; i++) {
438 int npixels;
439 FxU8 *src8;
440 FxU16 *src16;
441
442 npixels = w * h;
443
444 if (pixsize == 2) {
445 src16 = (FxU16 *) pxMip->data[i];
446 while (npixels--) {
447 *src16 = (*src16 & 0xFF00) | (map[*src16 & 0x00FF]);
448 src16++;
449 }
450 } else {
451 src8 = (FxU8 *) pxMip->data[i];
452 while (npixels--) {
453 *src8 = map[*src8];
454 src8++;
455 }
456 }
457 if (w > 1) w >>= 1;
458 if (h > 1) h >>= 1;
459 }
460 }
461
462 /* Copy the YAB table to pal. */
463 pxMip->format = format;
464 for (i=0; i< (16+12+12); i++)
465 pxMip->pal[i] = yabTable[i];
466}
Note: See TracBrowser for help on using the repository browser.