source: trunk/gcc/zlib/trees.c@ 3471

Last change on this file since 3471 was 1477, checked in by bird, 21 years ago

This commit was generated by cvs2svn to compensate for changes in r1476,
which included commits to RCS files with non-trunk default branches.

  • Property cvs2svn:cvs-rev set to 1.1.1.3
  • Property svn:eol-style set to native
  • Property svn:executable set to *
File size: 42.7 KB
Line 
1/* trees.c -- output deflated data using Huffman coding
2 * Copyright (C) 1995-2002 Jean-loup Gailly
3 * For conditions of distribution and use, see copyright notice in zlib.h
4 */
5
6/*
7 * ALGORITHM
8 *
9 * The "deflation" process uses several Huffman trees. The more
10 * common source values are represented by shorter bit sequences.
11 *
12 * Each code tree is stored in a compressed form which is itself
13 * a Huffman encoding of the lengths of all the code strings (in
14 * ascending order by source values). The actual code strings are
15 * reconstructed from the lengths in the inflate process, as described
16 * in the deflate specification.
17 *
18 * REFERENCES
19 *
20 * Deutsch, L.P.,"'Deflate' Compressed Data Format Specification".
21 * Available in ftp.uu.net:/pub/archiving/zip/doc/deflate-1.1.doc
22 *
23 * Storer, James A.
24 * Data Compression: Methods and Theory, pp. 49-50.
25 * Computer Science Press, 1988. ISBN 0-7167-8156-5.
26 *
27 * Sedgewick, R.
28 * Algorithms, p290.
29 * Addison-Wesley, 1983. ISBN 0-201-06672-6.
30 */
31
32/* @(#) $Id: trees.c,v 1.1.1.2 2002/03/11 21:53:27 tromey Exp $ */
33
34/* #define GEN_TREES_H */
35
36#include "deflate.h"
37
38#ifdef DEBUG
39# include <ctype.h>
40#endif
41
42/* ===========================================================================
43 * Constants
44 */
45
46#define MAX_BL_BITS 7
47/* Bit length codes must not exceed MAX_BL_BITS bits */
48
49#define END_BLOCK 256
50/* end of block literal code */
51
52#define REP_3_6 16
53/* repeat previous bit length 3-6 times (2 bits of repeat count) */
54
55#define REPZ_3_10 17
56/* repeat a zero length 3-10 times (3 bits of repeat count) */
57
58#define REPZ_11_138 18
59/* repeat a zero length 11-138 times (7 bits of repeat count) */
60
61local const int extra_lbits[LENGTH_CODES] /* extra bits for each length code */
62 = {0,0,0,0,0,0,0,0,1,1,1,1,2,2,2,2,3,3,3,3,4,4,4,4,5,5,5,5,0};
63
64local const int extra_dbits[D_CODES] /* extra bits for each distance code */
65 = {0,0,0,0,1,1,2,2,3,3,4,4,5,5,6,6,7,7,8,8,9,9,10,10,11,11,12,12,13,13};
66
67local const int extra_blbits[BL_CODES]/* extra bits for each bit length code */
68 = {0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,2,3,7};
69
70local const uch bl_order[BL_CODES]
71 = {16,17,18,0,8,7,9,6,10,5,11,4,12,3,13,2,14,1,15};
72/* The lengths of the bit length codes are sent in order of decreasing
73 * probability, to avoid transmitting the lengths for unused bit length codes.
74 */
75
76#define Buf_size (8 * 2*sizeof(char))
77/* Number of bits used within bi_buf. (bi_buf might be implemented on
78 * more than 16 bits on some systems.)
79 */
80
81/* ===========================================================================
82 * Local data. These are initialized only once.
83 */
84
85#define DIST_CODE_LEN 512 /* see definition of array dist_code below */
86
87#if defined(GEN_TREES_H) || !defined(STDC)
88/* non ANSI compilers may not accept trees.h */
89
90local ct_data static_ltree[L_CODES+2];
91/* The static literal tree. Since the bit lengths are imposed, there is no
92 * need for the L_CODES extra codes used during heap construction. However
93 * The codes 286 and 287 are needed to build a canonical tree (see _tr_init
94 * below).
95 */
96
97local ct_data static_dtree[D_CODES];
98/* The static distance tree. (Actually a trivial tree since all codes use
99 * 5 bits.)
100 */
101
102uch _dist_code[DIST_CODE_LEN];
103/* Distance codes. The first 256 values correspond to the distances
104 * 3 .. 258, the last 256 values correspond to the top 8 bits of
105 * the 15 bit distances.
106 */
107
108uch _length_code[MAX_MATCH-MIN_MATCH+1];
109/* length code for each normalized match length (0 == MIN_MATCH) */
110
111local int base_length[LENGTH_CODES];
112/* First normalized length for each code (0 = MIN_MATCH) */
113
114local int base_dist[D_CODES];
115/* First normalized distance for each code (0 = distance of 1) */
116
117#else
118# include "trees.h"
119#endif /* GEN_TREES_H */
120
121struct static_tree_desc_s {
122 const ct_data *static_tree; /* static tree or NULL */
123 const intf *extra_bits; /* extra bits for each code or NULL */
124 int extra_base; /* base index for extra_bits */
125 int elems; /* max number of elements in the tree */
126 int max_length; /* max bit length for the codes */
127};
128
129local static_tree_desc static_l_desc =
130{static_ltree, extra_lbits, LITERALS+1, L_CODES, MAX_BITS};
131
132local static_tree_desc static_d_desc =
133{static_dtree, extra_dbits, 0, D_CODES, MAX_BITS};
134
135local static_tree_desc static_bl_desc =
136{(const ct_data *)0, extra_blbits, 0, BL_CODES, MAX_BL_BITS};
137
138/* ===========================================================================
139 * Local (static) routines in this file.
140 */
141
142local void tr_static_init OF((void));
143local void init_block OF((deflate_state *s));
144local void pqdownheap OF((deflate_state *s, ct_data *tree, int k));
145local void gen_bitlen OF((deflate_state *s, tree_desc *desc));
146local void gen_codes OF((ct_data *tree, int max_code, ushf *bl_count));
147local void build_tree OF((deflate_state *s, tree_desc *desc));
148local void scan_tree OF((deflate_state *s, ct_data *tree, int max_code));
149local void send_tree OF((deflate_state *s, ct_data *tree, int max_code));
150local int build_bl_tree OF((deflate_state *s));
151local void send_all_trees OF((deflate_state *s, int lcodes, int dcodes,
152 int blcodes));
153local void compress_block OF((deflate_state *s, ct_data *ltree,
154 ct_data *dtree));
155local void set_data_type OF((deflate_state *s));
156local unsigned bi_reverse OF((unsigned value, int length));
157local void bi_windup OF((deflate_state *s));
158local void bi_flush OF((deflate_state *s));
159local void copy_block OF((deflate_state *s, charf *buf, unsigned len,
160 int header));
161
162#ifdef GEN_TREES_H
163local void gen_trees_header OF((void));
164#endif
165
166#ifndef DEBUG
167# define send_code(s, c, tree) send_bits(s, tree[c].Code, tree[c].Len)
168 /* Send a code of the given tree. c and tree must not have side effects */
169
170#else /* DEBUG */
171# define send_code(s, c, tree) \
172 { if (z_verbose>2) fprintf(stderr,"\ncd %3d ",(c)); \
173 send_bits(s, tree[c].Code, tree[c].Len); }
174#endif
175
176/* ===========================================================================
177 * Output a short LSB first on the stream.
178 * IN assertion: there is enough room in pendingBuf.
179 */
180#define put_short(s, w) { \
181 put_byte(s, (uch)((w) & 0xff)); \
182 put_byte(s, (uch)((ush)(w) >> 8)); \
183}
184
185/* ===========================================================================
186 * Send a value on a given number of bits.
187 * IN assertion: length <= 16 and value fits in length bits.
188 */
189#ifdef DEBUG
190local void send_bits OF((deflate_state *s, int value, int length));
191
192local void send_bits(s, value, length)
193 deflate_state *s;
194 int value; /* value to send */
195 int length; /* number of bits */
196{
197 Tracevv((stderr," l %2d v %4x ", length, value));
198 Assert(length > 0 && length <= 15, "invalid length");
199 s->bits_sent += (ulg)length;
200
201 /* If not enough room in bi_buf, use (valid) bits from bi_buf and
202 * (16 - bi_valid) bits from value, leaving (width - (16-bi_valid))
203 * unused bits in value.
204 */
205 if (s->bi_valid > (int)Buf_size - length) {
206 s->bi_buf |= (value << s->bi_valid);
207 put_short(s, s->bi_buf);
208 s->bi_buf = (ush)value >> (Buf_size - s->bi_valid);
209 s->bi_valid += length - Buf_size;
210 } else {
211 s->bi_buf |= value << s->bi_valid;
212 s->bi_valid += length;
213 }
214}
215#else /* !DEBUG */
216
217#define send_bits(s, value, length) \
218{ int len = length;\
219 if (s->bi_valid > (int)Buf_size - len) {\
220 int val = value;\
221 s->bi_buf |= (val << s->bi_valid);\
222 put_short(s, s->bi_buf);\
223 s->bi_buf = (ush)val >> (Buf_size - s->bi_valid);\
224 s->bi_valid += len - Buf_size;\
225 } else {\
226 s->bi_buf |= (value) << s->bi_valid;\
227 s->bi_valid += len;\
228 }\
229}
230#endif /* DEBUG */
231
232
233#define MAX(a,b) (a >= b ? a : b)
234/* the arguments must not have side effects */
235
236/* ===========================================================================
237 * Initialize the various 'constant' tables.
238 */
239local void tr_static_init()
240{
241#if defined(GEN_TREES_H) || !defined(STDC)
242 static int static_init_done = 0;
243 int n; /* iterates over tree elements */
244 int bits; /* bit counter */
245 int length; /* length value */
246 int code; /* code value */
247 int dist; /* distance index */
248 ush bl_count[MAX_BITS+1];
249 /* number of codes at each bit length for an optimal tree */
250
251 if (static_init_done) return;
252
253 /* For some embedded targets, global variables are not initialized: */
254 static_l_desc.static_tree = static_ltree;
255 static_l_desc.extra_bits = extra_lbits;
256 static_d_desc.static_tree = static_dtree;
257 static_d_desc.extra_bits = extra_dbits;
258 static_bl_desc.extra_bits = extra_blbits;
259
260 /* Initialize the mapping length (0..255) -> length code (0..28) */
261 length = 0;
262 for (code = 0; code < LENGTH_CODES-1; code++) {
263 base_length[code] = length;
264 for (n = 0; n < (1<<extra_lbits[code]); n++) {
265 _length_code[length++] = (uch)code;
266 }
267 }
268 Assert (length == 256, "tr_static_init: length != 256");
269 /* Note that the length 255 (match length 258) can be represented
270 * in two different ways: code 284 + 5 bits or code 285, so we
271 * overwrite length_code[255] to use the best encoding:
272 */
273 _length_code[length-1] = (uch)code;
274
275 /* Initialize the mapping dist (0..32K) -> dist code (0..29) */
276 dist = 0;
277 for (code = 0 ; code < 16; code++) {
278 base_dist[code] = dist;
279 for (n = 0; n < (1<<extra_dbits[code]); n++) {
280 _dist_code[dist++] = (uch)code;
281 }
282 }
283 Assert (dist == 256, "tr_static_init: dist != 256");
284 dist >>= 7; /* from now on, all distances are divided by 128 */
285 for ( ; code < D_CODES; code++) {
286 base_dist[code] = dist << 7;
287 for (n = 0; n < (1<<(extra_dbits[code]-7)); n++) {
288 _dist_code[256 + dist++] = (uch)code;
289 }
290 }
291 Assert (dist == 256, "tr_static_init: 256+dist != 512");
292
293 /* Construct the codes of the static literal tree */
294 for (bits = 0; bits <= MAX_BITS; bits++) bl_count[bits] = 0;
295 n = 0;
296 while (n <= 143) static_ltree[n++].Len = 8, bl_count[8]++;
297 while (n <= 255) static_ltree[n++].Len = 9, bl_count[9]++;
298 while (n <= 279) static_ltree[n++].Len = 7, bl_count[7]++;
299 while (n <= 287) static_ltree[n++].Len = 8, bl_count[8]++;
300 /* Codes 286 and 287 do not exist, but we must include them in the
301 * tree construction to get a canonical Huffman tree (longest code
302 * all ones)
303 */
304 gen_codes((ct_data *)static_ltree, L_CODES+1, bl_count);
305
306 /* The static distance tree is trivial: */
307 for (n = 0; n < D_CODES; n++) {
308 static_dtree[n].Len = 5;
309 static_dtree[n].Code = bi_reverse((unsigned)n, 5);
310 }
311 static_init_done = 1;
312
313# ifdef GEN_TREES_H
314 gen_trees_header();
315# endif
316#endif /* defined(GEN_TREES_H) || !defined(STDC) */
317}
318
319/* ===========================================================================
320 * Genererate the file trees.h describing the static trees.
321 */
322#ifdef GEN_TREES_H
323# ifndef DEBUG
324# include <stdio.h>
325# endif
326
327# define SEPARATOR(i, last, width) \
328 ((i) == (last)? "\n};\n\n" : \
329 ((i) % (width) == (width)-1 ? ",\n" : ", "))
330
331void gen_trees_header()
332{
333 FILE *header = fopen("trees.h", "w");
334 int i;
335
336 Assert (header != NULL, "Can't open trees.h");
337 fprintf(header,
338 "/* header created automatically with -DGEN_TREES_H */\n\n");
339
340 fprintf(header, "local const ct_data static_ltree[L_CODES+2] = {\n");
341 for (i = 0; i < L_CODES+2; i++) {
342 fprintf(header, "{{%3u},{%3u}}%s", static_ltree[i].Code,
343 static_ltree[i].Len, SEPARATOR(i, L_CODES+1, 5));
344 }
345
346 fprintf(header, "local const ct_data static_dtree[D_CODES] = {\n");
347 for (i = 0; i < D_CODES; i++) {
348 fprintf(header, "{{%2u},{%2u}}%s", static_dtree[i].Code,
349 static_dtree[i].Len, SEPARATOR(i, D_CODES-1, 5));
350 }
351
352 fprintf(header, "const uch _dist_code[DIST_CODE_LEN] = {\n");
353 for (i = 0; i < DIST_CODE_LEN; i++) {
354 fprintf(header, "%2u%s", _dist_code[i],
355 SEPARATOR(i, DIST_CODE_LEN-1, 20));
356 }
357
358 fprintf(header, "const uch _length_code[MAX_MATCH-MIN_MATCH+1]= {\n");
359 for (i = 0; i < MAX_MATCH-MIN_MATCH+1; i++) {
360 fprintf(header, "%2u%s", _length_code[i],
361 SEPARATOR(i, MAX_MATCH-MIN_MATCH, 20));
362 }
363
364 fprintf(header, "local const int base_length[LENGTH_CODES] = {\n");
365 for (i = 0; i < LENGTH_CODES; i++) {
366 fprintf(header, "%1u%s", base_length[i],
367 SEPARATOR(i, LENGTH_CODES-1, 20));
368 }
369
370 fprintf(header, "local const int base_dist[D_CODES] = {\n");
371 for (i = 0; i < D_CODES; i++) {
372 fprintf(header, "%5u%s", base_dist[i],
373 SEPARATOR(i, D_CODES-1, 10));
374 }
375
376 fclose(header);
377}
378#endif /* GEN_TREES_H */
379
380/* ===========================================================================
381 * Initialize the tree data structures for a new zlib stream.
382 */
383void _tr_init(s)
384 deflate_state *s;
385{
386 tr_static_init();
387
388 s->l_desc.dyn_tree = s->dyn_ltree;
389 s->l_desc.stat_desc = &static_l_desc;
390
391 s->d_desc.dyn_tree = s->dyn_dtree;
392 s->d_desc.stat_desc = &static_d_desc;
393
394 s->bl_desc.dyn_tree = s->bl_tree;
395 s->bl_desc.stat_desc = &static_bl_desc;
396
397 s->bi_buf = 0;
398 s->bi_valid = 0;
399 s->last_eob_len = 8; /* enough lookahead for inflate */
400#ifdef DEBUG
401 s->compressed_len = 0L;
402 s->bits_sent = 0L;
403#endif
404
405 /* Initialize the first block of the first file: */
406 init_block(s);
407}
408
409/* ===========================================================================
410 * Initialize a new block.
411 */
412local void init_block(s)
413 deflate_state *s;
414{
415 int n; /* iterates over tree elements */
416
417 /* Initialize the trees. */
418 for (n = 0; n < L_CODES; n++) s->dyn_ltree[n].Freq = 0;
419 for (n = 0; n < D_CODES; n++) s->dyn_dtree[n].Freq = 0;
420 for (n = 0; n < BL_CODES; n++) s->bl_tree[n].Freq = 0;
421
422 s->dyn_ltree[END_BLOCK].Freq = 1;
423 s->opt_len = s->static_len = 0L;
424 s->last_lit = s->matches = 0;
425}
426
427#define SMALLEST 1
428/* Index within the heap array of least frequent node in the Huffman tree */
429
430
431/* ===========================================================================
432 * Remove the smallest element from the heap and recreate the heap with
433 * one less element. Updates heap and heap_len.
434 */
435#define pqremove(s, tree, top) \
436{\
437 top = s->heap[SMALLEST]; \
438 s->heap[SMALLEST] = s->heap[s->heap_len--]; \
439 pqdownheap(s, tree, SMALLEST); \
440}
441
442/* ===========================================================================
443 * Compares to subtrees, using the tree depth as tie breaker when
444 * the subtrees have equal frequency. This minimizes the worst case length.
445 */
446#define smaller(tree, n, m, depth) \
447 (tree[n].Freq < tree[m].Freq || \
448 (tree[n].Freq == tree[m].Freq && depth[n] <= depth[m]))
449
450/* ===========================================================================
451 * Restore the heap property by moving down the tree starting at node k,
452 * exchanging a node with the smallest of its two sons if necessary, stopping
453 * when the heap property is re-established (each father smaller than its
454 * two sons).
455 */
456local void pqdownheap(s, tree, k)
457 deflate_state *s;
458 ct_data *tree; /* the tree to restore */
459 int k; /* node to move down */
460{
461 int v = s->heap[k];
462 int j = k << 1; /* left son of k */
463 while (j <= s->heap_len) {
464 /* Set j to the smallest of the two sons: */
465 if (j < s->heap_len &&
466 smaller(tree, s->heap[j+1], s->heap[j], s->depth)) {
467 j++;
468 }
469 /* Exit if v is smaller than both sons */
470 if (smaller(tree, v, s->heap[j], s->depth)) break;
471
472 /* Exchange v with the smallest son */
473 s->heap[k] = s->heap[j]; k = j;
474
475 /* And continue down the tree, setting j to the left son of k */
476 j <<= 1;
477 }
478 s->heap[k] = v;
479}
480
481/* ===========================================================================
482 * Compute the optimal bit lengths for a tree and update the total bit length
483 * for the current block.
484 * IN assertion: the fields freq and dad are set, heap[heap_max] and
485 * above are the tree nodes sorted by increasing frequency.
486 * OUT assertions: the field len is set to the optimal bit length, the
487 * array bl_count contains the frequencies for each bit length.
488 * The length opt_len is updated; static_len is also updated if stree is
489 * not null.
490 */
491local void gen_bitlen(s, desc)
492 deflate_state *s;
493 tree_desc *desc; /* the tree descriptor */
494{
495 ct_data *tree = desc->dyn_tree;
496 int max_code = desc->max_code;
497 const ct_data *stree = desc->stat_desc->static_tree;
498 const intf *extra = desc->stat_desc->extra_bits;
499 int base = desc->stat_desc->extra_base;
500 int max_length = desc->stat_desc->max_length;
501 int h; /* heap index */
502 int n, m; /* iterate over the tree elements */
503 int bits; /* bit length */
504 int xbits; /* extra bits */
505 ush f; /* frequency */
506 int overflow = 0; /* number of elements with bit length too large */
507
508 for (bits = 0; bits <= MAX_BITS; bits++) s->bl_count[bits] = 0;
509
510 /* In a first pass, compute the optimal bit lengths (which may
511 * overflow in the case of the bit length tree).
512 */
513 tree[s->heap[s->heap_max]].Len = 0; /* root of the heap */
514
515 for (h = s->heap_max+1; h < HEAP_SIZE; h++) {
516 n = s->heap[h];
517 bits = tree[tree[n].Dad].Len + 1;
518 if (bits > max_length) bits = max_length, overflow++;
519 tree[n].Len = (ush)bits;
520 /* We overwrite tree[n].Dad which is no longer needed */
521
522 if (n > max_code) continue; /* not a leaf node */
523
524 s->bl_count[bits]++;
525 xbits = 0;
526 if (n >= base) xbits = extra[n-base];
527 f = tree[n].Freq;
528 s->opt_len += (ulg)f * (bits + xbits);
529 if (stree) s->static_len += (ulg)f * (stree[n].Len + xbits);
530 }
531 if (overflow == 0) return;
532
533 Trace((stderr,"\nbit length overflow\n"));
534 /* This happens for example on obj2 and pic of the Calgary corpus */
535
536 /* Find the first bit length which could increase: */
537 do {
538 bits = max_length-1;
539 while (s->bl_count[bits] == 0) bits--;
540 s->bl_count[bits]--; /* move one leaf down the tree */
541 s->bl_count[bits+1] += 2; /* move one overflow item as its brother */
542 s->bl_count[max_length]--;
543 /* The brother of the overflow item also moves one step up,
544 * but this does not affect bl_count[max_length]
545 */
546 overflow -= 2;
547 } while (overflow > 0);
548
549 /* Now recompute all bit lengths, scanning in increasing frequency.
550 * h is still equal to HEAP_SIZE. (It is simpler to reconstruct all
551 * lengths instead of fixing only the wrong ones. This idea is taken
552 * from 'ar' written by Haruhiko Okumura.)
553 */
554 for (bits = max_length; bits != 0; bits--) {
555 n = s->bl_count[bits];
556 while (n != 0) {
557 m = s->heap[--h];
558 if (m > max_code) continue;
559 if (tree[m].Len != (unsigned) bits) {
560 Trace((stderr,"code %d bits %d->%d\n", m, tree[m].Len, bits));
561 s->opt_len += ((long)bits - (long)tree[m].Len)
562 *(long)tree[m].Freq;
563 tree[m].Len = (ush)bits;
564 }
565 n--;
566 }
567 }
568}
569
570/* ===========================================================================
571 * Generate the codes for a given tree and bit counts (which need not be
572 * optimal).
573 * IN assertion: the array bl_count contains the bit length statistics for
574 * the given tree and the field len is set for all tree elements.
575 * OUT assertion: the field code is set for all tree elements of non
576 * zero code length.
577 */
578local void gen_codes (tree, max_code, bl_count)
579 ct_data *tree; /* the tree to decorate */
580 int max_code; /* largest code with non zero frequency */
581 ushf *bl_count; /* number of codes at each bit length */
582{
583 ush next_code[MAX_BITS+1]; /* next code value for each bit length */
584 ush code = 0; /* running code value */
585 int bits; /* bit index */
586 int n; /* code index */
587
588 /* The distribution counts are first used to generate the code values
589 * without bit reversal.
590 */
591 for (bits = 1; bits <= MAX_BITS; bits++) {
592 next_code[bits] = code = (code + bl_count[bits-1]) << 1;
593 }
594 /* Check that the bit counts in bl_count are consistent. The last code
595 * must be all ones.
596 */
597 Assert (code + bl_count[MAX_BITS]-1 == (1<<MAX_BITS)-1,
598 "inconsistent bit counts");
599 Tracev((stderr,"\ngen_codes: max_code %d ", max_code));
600
601 for (n = 0; n <= max_code; n++) {
602 int len = tree[n].Len;
603 if (len == 0) continue;
604 /* Now reverse the bits */
605 tree[n].Code = bi_reverse(next_code[len]++, len);
606
607 Tracecv(tree != static_ltree, (stderr,"\nn %3d %c l %2d c %4x (%x) ",
608 n, (isgraph(n) ? n : ' '), len, tree[n].Code, next_code[len]-1));
609 }
610}
611
612/* ===========================================================================
613 * Construct one Huffman tree and assigns the code bit strings and lengths.
614 * Update the total bit length for the current block.
615 * IN assertion: the field freq is set for all tree elements.
616 * OUT assertions: the fields len and code are set to the optimal bit length
617 * and corresponding code. The length opt_len is updated; static_len is
618 * also updated if stree is not null. The field max_code is set.
619 */
620local void build_tree(s, desc)
621 deflate_state *s;
622 tree_desc *desc; /* the tree descriptor */
623{
624 ct_data *tree = desc->dyn_tree;
625 const ct_data *stree = desc->stat_desc->static_tree;
626 int elems = desc->stat_desc->elems;
627 int n, m; /* iterate over heap elements */
628 int max_code = -1; /* largest code with non zero frequency */
629 int node; /* new node being created */
630
631 /* Construct the initial heap, with least frequent element in
632 * heap[SMALLEST]. The sons of heap[n] are heap[2*n] and heap[2*n+1].
633 * heap[0] is not used.
634 */
635 s->heap_len = 0, s->heap_max = HEAP_SIZE;
636
637 for (n = 0; n < elems; n++) {
638 if (tree[n].Freq != 0) {
639 s->heap[++(s->heap_len)] = max_code = n;
640 s->depth[n] = 0;
641 } else {
642 tree[n].Len = 0;
643 }
644 }
645
646 /* The pkzip format requires that at least one distance code exists,
647 * and that at least one bit should be sent even if there is only one
648 * possible code. So to avoid special checks later on we force at least
649 * two codes of non zero frequency.
650 */
651 while (s->heap_len < 2) {
652 node = s->heap[++(s->heap_len)] = (max_code < 2 ? ++max_code : 0);
653 tree[node].Freq = 1;
654 s->depth[node] = 0;
655 s->opt_len--; if (stree) s->static_len -= stree[node].Len;
656 /* node is 0 or 1 so it does not have extra bits */
657 }
658 desc->max_code = max_code;
659
660 /* The elements heap[heap_len/2+1 .. heap_len] are leaves of the tree,
661 * establish sub-heaps of increasing lengths:
662 */
663 for (n = s->heap_len/2; n >= 1; n--) pqdownheap(s, tree, n);
664
665 /* Construct the Huffman tree by repeatedly combining the least two
666 * frequent nodes.
667 */
668 node = elems; /* next internal node of the tree */
669 do {
670 pqremove(s, tree, n); /* n = node of least frequency */
671 m = s->heap[SMALLEST]; /* m = node of next least frequency */
672
673 s->heap[--(s->heap_max)] = n; /* keep the nodes sorted by frequency */
674 s->heap[--(s->heap_max)] = m;
675
676 /* Create a new node father of n and m */
677 tree[node].Freq = tree[n].Freq + tree[m].Freq;
678 s->depth[node] = (uch) (MAX(s->depth[n], s->depth[m]) + 1);
679 tree[n].Dad = tree[m].Dad = (ush)node;
680#ifdef DUMP_BL_TREE
681 if (tree == s->bl_tree) {
682 fprintf(stderr,"\nnode %d(%d), sons %d(%d) %d(%d)",
683 node, tree[node].Freq, n, tree[n].Freq, m, tree[m].Freq);
684 }
685#endif
686 /* and insert the new node in the heap */
687 s->heap[SMALLEST] = node++;
688 pqdownheap(s, tree, SMALLEST);
689
690 } while (s->heap_len >= 2);
691
692 s->heap[--(s->heap_max)] = s->heap[SMALLEST];
693
694 /* At this point, the fields freq and dad are set. We can now
695 * generate the bit lengths.
696 */
697 gen_bitlen(s, (tree_desc *)desc);
698
699 /* The field len is now set, we can generate the bit codes */
700 gen_codes ((ct_data *)tree, max_code, s->bl_count);
701}
702
703/* ===========================================================================
704 * Scan a literal or distance tree to determine the frequencies of the codes
705 * in the bit length tree.
706 */
707local void scan_tree (s, tree, max_code)
708 deflate_state *s;
709 ct_data *tree; /* the tree to be scanned */
710 int max_code; /* and its largest code of non zero frequency */
711{
712 int n; /* iterates over all tree elements */
713 int prevlen = -1; /* last emitted length */
714 int curlen; /* length of current code */
715 int nextlen = tree[0].Len; /* length of next code */
716 int count = 0; /* repeat count of the current code */
717 int max_count = 7; /* max repeat count */
718 int min_count = 4; /* min repeat count */
719
720 if (nextlen == 0) max_count = 138, min_count = 3;
721 tree[max_code+1].Len = (ush)0xffff; /* guard */
722
723 for (n = 0; n <= max_code; n++) {
724 curlen = nextlen; nextlen = tree[n+1].Len;
725 if (++count < max_count && curlen == nextlen) {
726 continue;
727 } else if (count < min_count) {
728 s->bl_tree[curlen].Freq += count;
729 } else if (curlen != 0) {
730 if (curlen != prevlen) s->bl_tree[curlen].Freq++;
731 s->bl_tree[REP_3_6].Freq++;
732 } else if (count <= 10) {
733 s->bl_tree[REPZ_3_10].Freq++;
734 } else {
735 s->bl_tree[REPZ_11_138].Freq++;
736 }
737 count = 0; prevlen = curlen;
738 if (nextlen == 0) {
739 max_count = 138, min_count = 3;
740 } else if (curlen == nextlen) {
741 max_count = 6, min_count = 3;
742 } else {
743 max_count = 7, min_count = 4;
744 }
745 }
746}
747
748/* ===========================================================================
749 * Send a literal or distance tree in compressed form, using the codes in
750 * bl_tree.
751 */
752local void send_tree (s, tree, max_code)
753 deflate_state *s;
754 ct_data *tree; /* the tree to be scanned */
755 int max_code; /* and its largest code of non zero frequency */
756{
757 int n; /* iterates over all tree elements */
758 int prevlen = -1; /* last emitted length */
759 int curlen; /* length of current code */
760 int nextlen = tree[0].Len; /* length of next code */
761 int count = 0; /* repeat count of the current code */
762 int max_count = 7; /* max repeat count */
763 int min_count = 4; /* min repeat count */
764
765 /* tree[max_code+1].Len = -1; */ /* guard already set */
766 if (nextlen == 0) max_count = 138, min_count = 3;
767
768 for (n = 0; n <= max_code; n++) {
769 curlen = nextlen; nextlen = tree[n+1].Len;
770 if (++count < max_count && curlen == nextlen) {
771 continue;
772 } else if (count < min_count) {
773 do { send_code(s, curlen, s->bl_tree); } while (--count != 0);
774
775 } else if (curlen != 0) {
776 if (curlen != prevlen) {
777 send_code(s, curlen, s->bl_tree); count--;
778 }
779 Assert(count >= 3 && count <= 6, " 3_6?");
780 send_code(s, REP_3_6, s->bl_tree); send_bits(s, count-3, 2);
781
782 } else if (count <= 10) {
783 send_code(s, REPZ_3_10, s->bl_tree); send_bits(s, count-3, 3);
784
785 } else {
786 send_code(s, REPZ_11_138, s->bl_tree); send_bits(s, count-11, 7);
787 }
788 count = 0; prevlen = curlen;
789 if (nextlen == 0) {
790 max_count = 138, min_count = 3;
791 } else if (curlen == nextlen) {
792 max_count = 6, min_count = 3;
793 } else {
794 max_count = 7, min_count = 4;
795 }
796 }
797}
798
799/* ===========================================================================
800 * Construct the Huffman tree for the bit lengths and return the index in
801 * bl_order of the last bit length code to send.
802 */
803local int build_bl_tree(s)
804 deflate_state *s;
805{
806 int max_blindex; /* index of last bit length code of non zero freq */
807
808 /* Determine the bit length frequencies for literal and distance trees */
809 scan_tree(s, (ct_data *)s->dyn_ltree, s->l_desc.max_code);
810 scan_tree(s, (ct_data *)s->dyn_dtree, s->d_desc.max_code);
811
812 /* Build the bit length tree: */
813 build_tree(s, (tree_desc *)(&(s->bl_desc)));
814 /* opt_len now includes the length of the tree representations, except
815 * the lengths of the bit lengths codes and the 5+5+4 bits for the counts.
816 */
817
818 /* Determine the number of bit length codes to send. The pkzip format
819 * requires that at least 4 bit length codes be sent. (appnote.txt says
820 * 3 but the actual value used is 4.)
821 */
822 for (max_blindex = BL_CODES-1; max_blindex >= 3; max_blindex--) {
823 if (s->bl_tree[bl_order[max_blindex]].Len != 0) break;
824 }
825 /* Update opt_len to include the bit length tree and counts */
826 s->opt_len += 3*(max_blindex+1) + 5+5+4;
827 Tracev((stderr, "\ndyn trees: dyn %ld, stat %ld",
828 s->opt_len, s->static_len));
829
830 return max_blindex;
831}
832
833/* ===========================================================================
834 * Send the header for a block using dynamic Huffman trees: the counts, the
835 * lengths of the bit length codes, the literal tree and the distance tree.
836 * IN assertion: lcodes >= 257, dcodes >= 1, blcodes >= 4.
837 */
838local void send_all_trees(s, lcodes, dcodes, blcodes)
839 deflate_state *s;
840 int lcodes, dcodes, blcodes; /* number of codes for each tree */
841{
842 int rank; /* index in bl_order */
843
844 Assert (lcodes >= 257 && dcodes >= 1 && blcodes >= 4, "not enough codes");
845 Assert (lcodes <= L_CODES && dcodes <= D_CODES && blcodes <= BL_CODES,
846 "too many codes");
847 Tracev((stderr, "\nbl counts: "));
848 send_bits(s, lcodes-257, 5); /* not +255 as stated in appnote.txt */
849 send_bits(s, dcodes-1, 5);
850 send_bits(s, blcodes-4, 4); /* not -3 as stated in appnote.txt */
851 for (rank = 0; rank < blcodes; rank++) {
852 Tracev((stderr, "\nbl code %2d ", bl_order[rank]));
853 send_bits(s, s->bl_tree[bl_order[rank]].Len, 3);
854 }
855 Tracev((stderr, "\nbl tree: sent %ld", s->bits_sent));
856
857 send_tree(s, (ct_data *)s->dyn_ltree, lcodes-1); /* literal tree */
858 Tracev((stderr, "\nlit tree: sent %ld", s->bits_sent));
859
860 send_tree(s, (ct_data *)s->dyn_dtree, dcodes-1); /* distance tree */
861 Tracev((stderr, "\ndist tree: sent %ld", s->bits_sent));
862}
863
864/* ===========================================================================
865 * Send a stored block
866 */
867void _tr_stored_block(s, buf, stored_len, eof)
868 deflate_state *s;
869 charf *buf; /* input block */
870 ulg stored_len; /* length of input block */
871 int eof; /* true if this is the last block for a file */
872{
873 send_bits(s, (STORED_BLOCK<<1)+eof, 3); /* send block type */
874#ifdef DEBUG
875 s->compressed_len = (s->compressed_len + 3 + 7) & (ulg)~7L;
876 s->compressed_len += (stored_len + 4) << 3;
877#endif
878 copy_block(s, buf, (unsigned)stored_len, 1); /* with header */
879}
880
881/* ===========================================================================
882 * Send one empty static block to give enough lookahead for inflate.
883 * This takes 10 bits, of which 7 may remain in the bit buffer.
884 * The current inflate code requires 9 bits of lookahead. If the
885 * last two codes for the previous block (real code plus EOB) were coded
886 * on 5 bits or less, inflate may have only 5+3 bits of lookahead to decode
887 * the last real code. In this case we send two empty static blocks instead
888 * of one. (There are no problems if the previous block is stored or fixed.)
889 * To simplify the code, we assume the worst case of last real code encoded
890 * on one bit only.
891 */
892void _tr_align(s)
893 deflate_state *s;
894{
895 send_bits(s, STATIC_TREES<<1, 3);
896 send_code(s, END_BLOCK, static_ltree);
897#ifdef DEBUG
898 s->compressed_len += 10L; /* 3 for block type, 7 for EOB */
899#endif
900 bi_flush(s);
901 /* Of the 10 bits for the empty block, we have already sent
902 * (10 - bi_valid) bits. The lookahead for the last real code (before
903 * the EOB of the previous block) was thus at least one plus the length
904 * of the EOB plus what we have just sent of the empty static block.
905 */
906 if (1 + s->last_eob_len + 10 - s->bi_valid < 9) {
907 send_bits(s, STATIC_TREES<<1, 3);
908 send_code(s, END_BLOCK, static_ltree);
909#ifdef DEBUG
910 s->compressed_len += 10L;
911#endif
912 bi_flush(s);
913 }
914 s->last_eob_len = 7;
915}
916
917/* ===========================================================================
918 * Determine the best encoding for the current block: dynamic trees, static
919 * trees or store, and output the encoded block to the zip file.
920 */
921void _tr_flush_block(s, buf, stored_len, eof)
922 deflate_state *s;
923 charf *buf; /* input block, or NULL if too old */
924 ulg stored_len; /* length of input block */
925 int eof; /* true if this is the last block for a file */
926{
927 ulg opt_lenb, static_lenb; /* opt_len and static_len in bytes */
928 int max_blindex = 0; /* index of last bit length code of non zero freq */
929
930 /* Build the Huffman trees unless a stored block is forced */
931 if (s->level > 0) {
932
933 /* Check if the file is ascii or binary */
934 if (s->data_type == Z_UNKNOWN) set_data_type(s);
935
936 /* Construct the literal and distance trees */
937 build_tree(s, (tree_desc *)(&(s->l_desc)));
938 Tracev((stderr, "\nlit data: dyn %ld, stat %ld", s->opt_len,
939 s->static_len));
940
941 build_tree(s, (tree_desc *)(&(s->d_desc)));
942 Tracev((stderr, "\ndist data: dyn %ld, stat %ld", s->opt_len,
943 s->static_len));
944 /* At this point, opt_len and static_len are the total bit lengths of
945 * the compressed block data, excluding the tree representations.
946 */
947
948 /* Build the bit length tree for the above two trees, and get the index
949 * in bl_order of the last bit length code to send.
950 */
951 max_blindex = build_bl_tree(s);
952
953 /* Determine the best encoding. Compute first the block length in bytes*/
954 opt_lenb = (s->opt_len+3+7)>>3;
955 static_lenb = (s->static_len+3+7)>>3;
956
957 Tracev((stderr, "\nopt %lu(%lu) stat %lu(%lu) stored %lu lit %u ",
958 opt_lenb, s->opt_len, static_lenb, s->static_len, stored_len,
959 s->last_lit));
960
961 if (static_lenb <= opt_lenb) opt_lenb = static_lenb;
962
963 } else {
964 Assert(buf != (char*)0, "lost buf");
965 opt_lenb = static_lenb = stored_len + 5; /* force a stored block */
966 }
967
968#ifdef FORCE_STORED
969 if (buf != (char*)0) { /* force stored block */
970#else
971 if (stored_len+4 <= opt_lenb && buf != (char*)0) {
972 /* 4: two words for the lengths */
973#endif
974 /* The test buf != NULL is only necessary if LIT_BUFSIZE > WSIZE.
975 * Otherwise we can't have processed more than WSIZE input bytes since
976 * the last block flush, because compression would have been
977 * successful. If LIT_BUFSIZE <= WSIZE, it is never too late to
978 * transform a block into a stored block.
979 */
980 _tr_stored_block(s, buf, stored_len, eof);
981
982#ifdef FORCE_STATIC
983 } else if (static_lenb >= 0) { /* force static trees */
984#else
985 } else if (static_lenb == opt_lenb) {
986#endif
987 send_bits(s, (STATIC_TREES<<1)+eof, 3);
988 compress_block(s, (ct_data *)static_ltree, (ct_data *)static_dtree);
989#ifdef DEBUG
990 s->compressed_len += 3 + s->static_len;
991#endif
992 } else {
993 send_bits(s, (DYN_TREES<<1)+eof, 3);
994 send_all_trees(s, s->l_desc.max_code+1, s->d_desc.max_code+1,
995 max_blindex+1);
996 compress_block(s, (ct_data *)s->dyn_ltree, (ct_data *)s->dyn_dtree);
997#ifdef DEBUG
998 s->compressed_len += 3 + s->opt_len;
999#endif
1000 }
1001 Assert (s->compressed_len == s->bits_sent, "bad compressed size");
1002 /* The above check is made mod 2^32, for files larger than 512 MB
1003 * and uLong implemented on 32 bits.
1004 */
1005 init_block(s);
1006
1007 if (eof) {
1008 bi_windup(s);
1009#ifdef DEBUG
1010 s->compressed_len += 7; /* align on byte boundary */
1011#endif
1012 }
1013 Tracev((stderr,"\ncomprlen %lu(%lu) ", s->compressed_len>>3,
1014 s->compressed_len-7*eof));
1015}
1016
1017/* ===========================================================================
1018 * Save the match info and tally the frequency counts. Return true if
1019 * the current block must be flushed.
1020 */
1021int _tr_tally (s, dist, lc)
1022 deflate_state *s;
1023 unsigned dist; /* distance of matched string */
1024 unsigned lc; /* match length-MIN_MATCH or unmatched char (if dist==0) */
1025{
1026 s->d_buf[s->last_lit] = (ush)dist;
1027 s->l_buf[s->last_lit++] = (uch)lc;
1028 if (dist == 0) {
1029 /* lc is the unmatched char */
1030 s->dyn_ltree[lc].Freq++;
1031 } else {
1032 s->matches++;
1033 /* Here, lc is the match length - MIN_MATCH */
1034 dist--; /* dist = match distance - 1 */
1035 Assert((ush)dist < (ush)MAX_DIST(s) &&
1036 (ush)lc <= (ush)(MAX_MATCH-MIN_MATCH) &&
1037 (ush)d_code(dist) < (ush)D_CODES, "_tr_tally: bad match");
1038
1039 s->dyn_ltree[_length_code[lc]+LITERALS+1].Freq++;
1040 s->dyn_dtree[d_code(dist)].Freq++;
1041 }
1042
1043#ifdef TRUNCATE_BLOCK
1044 /* Try to guess if it is profitable to stop the current block here */
1045 if ((s->last_lit & 0x1fff) == 0 && s->level > 2) {
1046 /* Compute an upper bound for the compressed length */
1047 ulg out_length = (ulg)s->last_lit*8L;
1048 ulg in_length = (ulg)((long)s->strstart - s->block_start);
1049 int dcode;
1050 for (dcode = 0; dcode < D_CODES; dcode++) {
1051 out_length += (ulg)s->dyn_dtree[dcode].Freq *
1052 (5L+extra_dbits[dcode]);
1053 }
1054 out_length >>= 3;
1055 Tracev((stderr,"\nlast_lit %u, in %ld, out ~%ld(%ld%%) ",
1056 s->last_lit, in_length, out_length,
1057 100L - out_length*100L/in_length));
1058 if (s->matches < s->last_lit/2 && out_length < in_length/2) return 1;
1059 }
1060#endif
1061 return (s->last_lit == s->lit_bufsize-1);
1062 /* We avoid equality with lit_bufsize because of wraparound at 64K
1063 * on 16 bit machines and because stored blocks are restricted to
1064 * 64K-1 bytes.
1065 */
1066}
1067
1068/* ===========================================================================
1069 * Send the block data compressed using the given Huffman trees
1070 */
1071local void compress_block(s, ltree, dtree)
1072 deflate_state *s;
1073 ct_data *ltree; /* literal tree */
1074 ct_data *dtree; /* distance tree */
1075{
1076 unsigned dist; /* distance of matched string */
1077 int lc; /* match length or unmatched char (if dist == 0) */
1078 unsigned lx = 0; /* running index in l_buf */
1079 unsigned code; /* the code to send */
1080 int extra; /* number of extra bits to send */
1081
1082 if (s->last_lit != 0) do {
1083 dist = s->d_buf[lx];
1084 lc = s->l_buf[lx++];
1085 if (dist == 0) {
1086 send_code(s, lc, ltree); /* send a literal byte */
1087 Tracecv(isgraph(lc), (stderr," '%c' ", lc));
1088 } else {
1089 /* Here, lc is the match length - MIN_MATCH */
1090 code = _length_code[lc];
1091 send_code(s, code+LITERALS+1, ltree); /* send the length code */
1092 extra = extra_lbits[code];
1093 if (extra != 0) {
1094 lc -= base_length[code];
1095 send_bits(s, lc, extra); /* send the extra length bits */
1096 }
1097 dist--; /* dist is now the match distance - 1 */
1098 code = d_code(dist);
1099 Assert (code < D_CODES, "bad d_code");
1100
1101 send_code(s, code, dtree); /* send the distance code */
1102 extra = extra_dbits[code];
1103 if (extra != 0) {
1104 dist -= base_dist[code];
1105 send_bits(s, dist, extra); /* send the extra distance bits */
1106 }
1107 } /* literal or match pair ? */
1108
1109 /* Check that the overlay between pending_buf and d_buf+l_buf is ok: */
1110 Assert(s->pending < s->lit_bufsize + 2*lx, "pendingBuf overflow");
1111
1112 } while (lx < s->last_lit);
1113
1114 send_code(s, END_BLOCK, ltree);
1115 s->last_eob_len = ltree[END_BLOCK].Len;
1116}
1117
1118/* ===========================================================================
1119 * Set the data type to ASCII or BINARY, using a crude approximation:
1120 * binary if more than 20% of the bytes are <= 6 or >= 128, ascii otherwise.
1121 * IN assertion: the fields freq of dyn_ltree are set and the total of all
1122 * frequencies does not exceed 64K (to fit in an int on 16 bit machines).
1123 */
1124local void set_data_type(s)
1125 deflate_state *s;
1126{
1127 int n = 0;
1128 unsigned ascii_freq = 0;
1129 unsigned bin_freq = 0;
1130 while (n < 7) bin_freq += s->dyn_ltree[n++].Freq;
1131 while (n < 128) ascii_freq += s->dyn_ltree[n++].Freq;
1132 while (n < LITERALS) bin_freq += s->dyn_ltree[n++].Freq;
1133 s->data_type = (Byte)(bin_freq > (ascii_freq >> 2) ? Z_BINARY : Z_ASCII);
1134}
1135
1136/* ===========================================================================
1137 * Reverse the first len bits of a code, using straightforward code (a faster
1138 * method would use a table)
1139 * IN assertion: 1 <= len <= 15
1140 */
1141local unsigned bi_reverse(code, len)
1142 unsigned code; /* the value to invert */
1143 int len; /* its bit length */
1144{
1145 register unsigned res = 0;
1146 do {
1147 res |= code & 1;
1148 code >>= 1, res <<= 1;
1149 } while (--len > 0);
1150 return res >> 1;
1151}
1152
1153/* ===========================================================================
1154 * Flush the bit buffer, keeping at most 7 bits in it.
1155 */
1156local void bi_flush(s)
1157 deflate_state *s;
1158{
1159 if (s->bi_valid == 16) {
1160 put_short(s, s->bi_buf);
1161 s->bi_buf = 0;
1162 s->bi_valid = 0;
1163 } else if (s->bi_valid >= 8) {
1164 put_byte(s, (Byte)s->bi_buf);
1165 s->bi_buf >>= 8;
1166 s->bi_valid -= 8;
1167 }
1168}
1169
1170/* ===========================================================================
1171 * Flush the bit buffer and align the output on a byte boundary
1172 */
1173local void bi_windup(s)
1174 deflate_state *s;
1175{
1176 if (s->bi_valid > 8) {
1177 put_short(s, s->bi_buf);
1178 } else if (s->bi_valid > 0) {
1179 put_byte(s, (Byte)s->bi_buf);
1180 }
1181 s->bi_buf = 0;
1182 s->bi_valid = 0;
1183#ifdef DEBUG
1184 s->bits_sent = (s->bits_sent+7) & ~7;
1185#endif
1186}
1187
1188/* ===========================================================================
1189 * Copy a stored block, storing first the length and its
1190 * one's complement if requested.
1191 */
1192local void copy_block(s, buf, len, header)
1193 deflate_state *s;
1194 charf *buf; /* the input data */
1195 unsigned len; /* its length */
1196 int header; /* true if block header must be written */
1197{
1198 bi_windup(s); /* align on byte boundary */
1199 s->last_eob_len = 8; /* enough lookahead for inflate */
1200
1201 if (header) {
1202 put_short(s, (ush)len);
1203 put_short(s, (ush)~len);
1204#ifdef DEBUG
1205 s->bits_sent += 2*16;
1206#endif
1207 }
1208#ifdef DEBUG
1209 s->bits_sent += (ulg)len<<3;
1210#endif
1211 while (len--) {
1212 put_byte(s, *buf++);
1213 }
1214}
Note: See TracBrowser for help on using the repository browser.