Context Navigation

source: trunk/src/opengl/glide/cvg/texus/nccnnet.c

Visit:

Last change on this file was 6653, checked in by bird, 24 years ago
Added $Id:$ keyword.
File size: 15.8 KB

Line
1	/* $Id: nccnnet.c,v 1.2 2001-09-05 14:30:45 bird Exp $ */
2
3	/*
4	** THIS SOFTWARE IS SUBJECT TO COPYRIGHT PROTECTION AND IS OFFERED ONLY
5	** PURSUANT TO THE 3DFX GLIDE GENERAL PUBLIC LICENSE. THERE IS NO RIGHT
6	** TO USE THE GLIDE TRADEMARK WITHOUT PRIOR WRITTEN PERMISSION OF 3DFX
7	** INTERACTIVE, INC. A COPY OF THIS LICENSE MAY BE OBTAINED FROM THE
8	** DISTRIBUTOR OR BY CONTACTING 3DFX INTERACTIVE INC(info@3dfx.com).
9	** THIS PROGRAM IS PROVIDED "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER
10	** EXPRESSED OR IMPLIED. SEE THE 3DFX GLIDE GENERAL PUBLIC LICENSE FOR A
11	** FULL TEXT OF THE NON-WARRANTY PROVISIONS.
12	**
13	** USE, DUPLICATION OR DISCLOSURE BY THE GOVERNMENT IS SUBJECT TO
14	** RESTRICTIONS AS SET FORTH IN SUBDIVISION (C)(1)(II) OF THE RIGHTS IN
15	** TECHNICAL DATA AND COMPUTER SOFTWARE CLAUSE AT DFARS 252.227-7013,
16	** AND/OR IN SIMILAR OR SUCCESSOR CLAUSES IN THE FAR, DOD OR NASA FAR
17	** SUPPLEMENT. UNPUBLISHED RIGHTS RESERVED UNDER THE COPYRIGHT LAWS OF
18	** THE UNITED STATES.
19	**
20	** COPYRIGHT 3DFX INTERACTIVE, INC. 1999, ALL RIGHTS RESERVED
21	**
22	** $Revision: 1.2 $
23	** $Date: 2001-09-05 14:30:45 $
24	*/
25	#include <stdio.h>
26	#include <stdlib.h>
27	#include <string.h>
28	#include <math.h>
29
30	#include "texusint.h"
31
32	/*
33	* This file implements the neural net quantizer, which takes an image in
34	* ARGB8888 format and produces an optimal YAB table, and an 8 bit image
35	* in YAB format that best represents the original image. A very detailed
36	* explanation of the algorithm is available in
37	* /tdfx/engr/devel/sst1/docs/yab.doc. The summary follows.
38	*
39	* Neural net algorithms first determine a "representative sample" of the
40	* input image. This representative sample is repeatedly run through the net
41	* during the network learning stage, and the neural net "learns" which colors
42	* are important and which ones are not. It's quite possible to feed every
43	* pixel in the original image repeatedly into the neural net
44	* to make it learn; however, this can be extremely time consuming.
45	*
46	* So, we prefer to make a representative sample of colors for the input image.
47	* The original yab.doc suggests we try to derive sample colors by reducing the
48	* ARGB8888 colors to RGB555. I've found that simply color quantizing to 256
49	* colors (just like for the 8-bit palettized case) works quite well, and
50	* so we first quantize to 8 bit palette, and use the palette as the sample
51	* colors to feed the neural network. This also makes using 256-palette
52	* textures very easy.
53	*
54	* After the "representative colors" are determined, we train the neural net,
55	* and obtain the optimal YAB table. Each sample color in the palette,
56	* which was originally in ARGB8888 format is now replaced with IRGB8888,
57	* where the RGB is the same as before, but the alpha channel is replaced
58	* with an 8 bit number I which is the YAB index corresponding to this
59	* representative color.
60	*
61	* So now it's possible to translate the original image into an 8 bit image
62	* by first looking up the original pixel in the representative colors table,
63	* extracting the alpha channel, and using it as the index into the YAB table
64	* delivered by the algorithm.
65	*
66	* In the process of converting the original image to the YAB format, we could
67	* optionally dither the image. Ordered dithering doesn't quite work, so we
68	* use error-diffusion dithering.
69	*
70	* I've found that there are three speed bottlenecks to overcome. The first
71	* time consuming operation is the computation of representative image colors.
72	* 256 color quantization is used for this part. The second bottleneck is the
73	* training of the neural net algorithm itself, and I've optimized this as
74	* much as possible. The third bottleneck is the translation of the original
75	* image into the 8 bit YAB indexed image; this still needs work, especially
76	* when error diffusion dithering is enabled.
77	*
78	* So, now, onto business.
79	*/
80
81	/******************************************************************************
82	*
83	* The hardcore neural net stuff begins here.
84	*
85	*/
86
87	#define MAX_ITERATIONS 4000
88	#define MAX_DRYSPELLS 2000
89	#define MAX_NEURONS 256
90
91	typedef struct _weight {
92	long r, g, b; // fixed point, SUBPIXEL precision bits
93	int ir, ig, ib; // pure integers, maybe -256 to 255.
94	} Weight;
95
96	typedef struct _vector {
97	Weight py, pa, *pb;
98	long r, g, b; // pure integers, 0 to 255.
99	} Neuron;
100
101	static Weight Y[16], A[4], B[4];
102	static Neuron N[MAX_NEURONS];
103	static long errR, errG, errB, errMax;
104	static long totR, totG, totB;
105
106
107	#define SUBPIXEL 22
108	#define ABS(x) (((x) < 0) ? (-(x)) : (x))
109	#define INT(x) ((x) >> SUBPIXEL)
110	#define CLAMP_255(x) if (x < 0) x = 0; else if (x > 255) x = 255
111	#define CLAMP_PLUS(x) if (x < 0) x = 0; else if (x > ((256 << SUBPIXEL)-1)) \
112	x = ((256 << SUBPIXEL) -1)
113	#define CLAMP_BOTH(x) if (x < (-256 << SUBPIXEL)) x = (-256 << SUBPIXEL); \
114	else if (x > ((256 << SUBPIXEL) - 1)) \
115	x = ((256 << SUBPIXEL) -1)
116
117	static int
118	_nn_modifyNeurons(long ir, long ig, long ib)
119	{
120	int i;
121	int d0, d1; // closest & next closest distance to input
122	int p0, p1; // index into the 256 color table.
123	long d, dr, dg, db;
124	Weight py, pa, *pb;
125	Neuron *n;
126
127	d0 = d1 = 0x7fffffff;
128	p0 = p1 = 0;
129
130	/* Find 2 neurons with smallest distances to r, g, b */
131	for (i=0, n=N; i<256; i++, n++) {
132	py = n->py;
133	pa = n->pa;
134	pb = n->pb;
135
136	n->r = py->ir + pa->ir + pb->ir; CLAMP_255(n->r);
137	n->g = py->ir + pa->ig + pb->ig; CLAMP_255(n->g);
138	n->b = py->ir + pa->ib + pb->ib; CLAMP_255(n->b);
139
140	d = DISTANCE(n->r, n->g, n->b, ir, ig, ib);
141	if (d < d0) {
142	d1 = d0; d0 = d;
143	p1 = p0; p0 = i;
144
145	} else if (d < d1) {
146	d1 = d;
147	p1 = i;
148	}
149	}
150
151	/* track errors */
152	dr = ABS(N[p0].r - ir);
153	dg = ABS(N[p0].g - ig);
154	db = ABS(N[p0].b - ib);
155
156	totR += dr;
157	totG += dg;
158	totB += db;
159
160	if (errMax < d0) {
161	errMax = d0;
162	errR = dr;
163	errG = dg;
164	errB = db;
165	}
166
167
168	// Modify weights for d0 and d1 at positions p0 and p1.
169
170	// The 16 comes from 8.16 format, 4 comes from alpha, the learing rate, set
171	// at 1/16 for now.
172
173	// py->r += (ar * 0.25f + ag * 0.5f + ab * 0.25f) * alpha;
174	// pa->r += ar * 0.25f * alpha;
175	// pb->r += ar * 0.25f * alpha;
176
177
178	// For the closest neuron, apply maximal learning rate.
179	dr = (ir - N[p0].r) << (SUBPIXEL - 1);
180	dg = (ig - N[p0].g) << (SUBPIXEL - 1);
181	db = (ib - N[p0].b) << (SUBPIXEL - 1);
182	py = N[p0].py;
183	pa = N[p0].pa;
184	pb = N[p0].pb;
185	py->r += (dr >> 2) + (dg >> 1) + (db >> 2); CLAMP_PLUS(py->r);
186	pa->r += (dr >> 2) ; CLAMP_BOTH(pa->r);
187	pa->g += (dg >> 2) ; CLAMP_BOTH(pa->g);
188	pa->b += (db >> 2) ; CLAMP_BOTH(pa->b);
189	pb->r += (dr >> 2) ; CLAMP_BOTH(pb->r);
190	pb->g += (dg >> 2) ; CLAMP_BOTH(pb->g);
191	pb->b += (db >> 2) ; CLAMP_BOTH(pb->b);
192
193	py->ir = INT(py->r);
194	pa->ir = INT(pa->r); pa->ig = INT(pa->g); pa->ib = INT(pa->b);
195	pb->ir = INT(pb->r); pb->ig = INT(pb->g); pb->ib = INT(pb->b);
196
197	// For the next closest neuron, apply 1/4 the learning rate.
198	dr = (ir - N[p1].r) << (SUBPIXEL - 2);
199	dg = (ig - N[p1].g) << (SUBPIXEL - 2);
200	db = (ib - N[p1].b) << (SUBPIXEL - 2);
201	py = N[p1].py;
202	pa = N[p1].pa;
203	pb = N[p1].pb;
204	py->r += (dr >> 2) + (dg >> 1) + (db >> 2); CLAMP_PLUS(py->r);
205	pa->r += (dr >> 2) ; CLAMP_BOTH(pa->r);
206	pa->g += (dg >> 2) ; CLAMP_BOTH(pa->g);
207	pa->b += (db >> 2) ; CLAMP_BOTH(pa->b);
208	pb->r += (dr >> 2) ; CLAMP_BOTH(pb->r);
209	pb->g += (dg >> 2) ; CLAMP_BOTH(pb->g);
210	pb->b += (db >> 2) ; CLAMP_BOTH(pb->b);
211
212	py->ir = INT(py->r);
213	pa->ir = INT(pa->r); pa->ig = INT(pa->g); pa->ib = INT(pa->b);
214	pb->ir = INT(pb->r); pb->ig = INT(pb->g); pb->ib = INT(pb->b);
215
216	return p0; // best fit neuron is returned.
217	}
218
219	static void
220	_nn_initTables()
221	{
222	int i;
223
224	// Intensity ramp
225	for (i=0; i<16; i++) {
226	Y[i].r = ((long) ((255.0f * i)/15.0f)) << SUBPIXEL;
227	Y[i].ir = INT(Y[i].r);
228	}
229
230	for (i=0; i< 4; i++) {
231
232	A[i].r = A[i].g = A[i].b = 0;
233	A[i].ir = INT(A[i].r);
234	A[i].ig = INT(A[i].g);
235	A[i].ib = INT(A[i].b);
236
237	B[i].r = B[i].g = B[i].b = 0;
238	B[i].ir = INT(B[i].r);
239	B[i].ig = INT(B[i].g);
240	B[i].ib = INT(B[i].b);
241	}
242
243	for (i=0; i<MAX_NEURONS; i++) {
244	int iy, ia, ib;
245
246	iy = (i >> 4) & 0x0f;
247	ia = (i >> 2) & 0x03;
248	ib = (i >> 0) & 0x03;
249
250	N[i].py = &Y[iy];
251	N[i].pa = &A[ia];
252	N[i].pb = &B[ib];
253
254	N[i].r = INT(Y[iy].r)+INT(A[ia].r)+INT(B[ib].r);CLAMP_255(N[i].r);
255	N[i].g = INT(Y[iy].r)+INT(A[ia].g)+INT(B[ib].g);CLAMP_255(N[i].g);
256	N[i].b = INT(Y[iy].r)+INT(A[ia].b)+INT(B[ib].b);CLAMP_255(N[i].b);
257	}
258	}
259
260	static int order[256];
261	static int
262	_nn_randomOrder(const void a, const void b)
263	{
264	a = b; // no compiler warnings
265	return (rand() % 3) - 1;
266	}
267
268	static void
269	txMapPal256toYAB(FxU32 YAB, FxU8 map, int nsamples, FxU32 *samples)
270	{
271	int i;
272	long bstR, bstG, bstB, bstMax;
273	int iterations; // track how many inputs have been fed to NN
274	int drySpells; // how many inputs since last best case.
275	long yab2pal[256];
276
277	_nn_initTables();
278	/*
279	* Select a number which is relatively prime to nsamples.
280	*/
281	for (i=0; i<nsamples; i++) order[i] = i;
282	qsort(order, nsamples, sizeof(int), _nn_randomOrder);
283
284	iterations = drySpells = 0;
285	bstMax = 0x7fffffff;
286
287	while ((iterations < MAX_ITERATIONS) && (drySpells < MAX_DRYSPELLS)) {
288
289	// An epoch:
290	// A whole cycle of inputs is presented to the network in an epoch.
291
292	errR = errG = errB = errMax = 0;
293	totR = totG = totB = 0;
294
295	for (i = 0; i< nsamples; i++) {
296	FxU32 *pRGB;
297
298	// We present the samples randomly to the network.
299	// _nn_modify_neurons() makes the neurons learn
300	// errR, errG, errB, errMax are computed in _nn_modifyNeurons(), as
301	// are totR, totG, totB (accumulative errors).
302
303	pRGB = (FxU32 *) (&samples[order[i]]);
304	(void) _nn_modifyNeurons(((*pRGB >> 16) & 0xFF),
305	((pRGB >> 8) & 0xFF), ((pRGB) & 0xFF));
306	}
307
308	iterations += nsamples;
309
310	if (errMax < bstMax) {
311	/*
312	* A lower total error than before, take a Snapshot
313	*
314	* YAB[] has 16 entries for Y, 12 entries each for A & B.
315	*/
316	for (i=0; i<16; i++) {
317	YAB[i] = Y[i].ir;
318	if ((Y[i].ir < 0) \|\| (Y[i].ir > 255)) {
319	txPanic("Bad Y!\n");
320	}
321	}
322
323	for (i=0; i<4; i++) {
324	YAB[16 + 3*i + 0] = A[i].ir;
325	YAB[16 + 3*i + 1] = A[i].ig;
326	YAB[16 + 3*i + 2] = A[i].ib;
327
328	if ((A[i].ir < -256) \|\| (A[i].ir > 255) \|\|
329	(A[i].ig < -256) \|\| (A[i].ig > 255) \|\|
330	(A[i].ib < -256) \|\| (A[i].ib > 255)) txPanic("Bad A!\n");
331	}
332	for (i=0; i<4; i++) {
333	YAB[28 + 3*i + 0] = B[i].ir;
334	YAB[28 + 3*i + 1] = B[i].ig;
335	YAB[28 + 3*i + 2] = B[i].ib;
336
337	if ((B[i].ir < -256) \|\| (B[i].ir > 255) \|\|
338	(B[i].ig < -256) \|\| (B[i].ig > 255) \|\|
339	(B[i].ib < -256) \|\| (B[i].ib > 255)) txPanic("Bad B!\n");
340	}
341
342	bstMax = errMax;
343	bstR = errR;
344	bstG = errG;
345	bstB = errB;
346	#if 0
347	printf("%8d%, dry=%8d, eMax=%8x eMax=%3d%3d%3d eAvg=%3d%3d%3d\n",
348	iterations, drySpells, errMax,
349	errG, errR, errB,
350	totG/nsamples, totR/nsamples, totB/nsamples
351	);
352	#endif
353	drySpells = 0;
354	}
355	else {
356	drySpells += nsamples;
357	}
358
359	if (errMax == 0) {
360	// printf("******Exact Solution in %d iterations\n", iterations);
361	// _nn_Dump();
362	break;
363	}
364	}
365
366	/*
367	* At this point YAB has the YAB table, samples has input palette,
368	* Replace MSB of samples with index to be used with YAB table.
369	*/
370
371	txYABtoPal256((long)yab2pal, (long)YAB);
372
373	for (i=0; i<nsamples; i++) {
374	int ir, ig, ib;
375
376	// The input color, and add diffused errors to these.
377	ir = (samples[i] >> 16) & 0xFF;
378	ig = (samples[i] >> 8) & 0xFF;
379	ib = (samples[i] ) & 0xFF;
380
381	// Find closest color in the yab2pal tables
382	map[i] = (FxU8) txNearestColor(ir, ig, ib, (const FxU32 *)yab2pal, 256);
383	}
384	}
385
386	void
387	txMipNccNNet(TxMip pxMip, TxMip txMip, int format, FxU32 dither, FxU32 comp)
388	{
389	int i, w, h;
390	int ncolors;
391	int pixsize = (pxMip->format == GR_TEXFMT_YIQ_422) ? 1 : 2;
392	long yabTable[16+12+12];
393	FxU8 map[256];
394
395
396	/*
397	* Get a 256 color palette, to be used as samples
398	* Incidentally, convert src 32 bit image to dst 8 bit indexed image,
399	* with indices referring to the 256 color palette.
400	* Also incidentally, pack the alpha channel if necessary.
401	*/
402	if( txVerbose )
403	{
404	printf("NCC Neural nets..."); fflush(stdout);
405	}
406	pxMip->format = (format == GR_TEXFMT_YIQ_422) ? GR_TEXFMT_P_8 :
407	GR_TEXFMT_AP_88;
408	ncolors = txMipPal256(pxMip, txMip, pxMip->format, 0, 0);
409	if( txVerbose )
410	{
411	printf("%d samples...", ncolors); fflush(stdout);
412	}
413	txMapPal256toYAB((FxU32 )yabTable, (FxU8 )map, ncolors, (FxU32 *)pxMip->pal);
414	if( txVerbose )
415	{
416	printf("eMax=(%3d%3d%3d)...eAvg=(%3d%3d%3d)\n",
417	errG, errR, errB,
418	totG/ncolors, totR/ncolors, totB/ncolors
419	);
420	}
421
422
423	if ((dither & TX_DITHER_MASK) != TX_DITHER_NONE) {
424	/*
425	* At this point, we can lose the 256 color palette, and replace it with
426	* the 256 color palette generated from the YAB table. This will be
427	* useful for error diffusion dithering.
428	*/
429	txYABtoPal256((long )pxMip->pal, (long )yabTable);
430	txDiffuseIndex(pxMip, txMip, pixsize, pxMip->pal, 256);
431	}
432	else {
433
434	/* Translate image to YAB format */
435	w = txMip->width;
436	h = txMip->height;
437	for (i=0; i< txMip->depth; i++) {
438	int npixels;
439	FxU8 *src8;
440	FxU16 *src16;
441
442	npixels = w * h;
443
444	if (pixsize == 2) {
445	src16 = (FxU16 *) pxMip->data[i];
446	while (npixels--) {
447	src16 = (src16 & 0xFF00) \| (map[*src16 & 0x00FF]);
448	src16++;
449	}
450	} else {
451	src8 = (FxU8 *) pxMip->data[i];
452	while (npixels--) {
453	src8 = map[src8];
454	src8++;
455	}
456	}
457	if (w > 1) w >>= 1;
458	if (h > 1) h >>= 1;
459	}
460	}
461
462	/* Copy the YAB table to pal. */
463	pxMip->format = format;
464	for (i=0; i< (16+12+12); i++)
465	pxMip->pal[i] = yabTable[i];
466	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: