1 | #!/usr/bin/perl
|
---|
2 | #
|
---|
3 | # Generate code page .c files from ftp.unicode.org descriptions
|
---|
4 | #
|
---|
5 | # Copyright 2000 Alexandre Julliard
|
---|
6 | #
|
---|
7 |
|
---|
8 | # base directory for ftp.unicode.org files
|
---|
9 | $BASEDIR = "ftp.unicode.org/Public/";
|
---|
10 | $MAPPREFIX = $BASEDIR . "MAPPINGS/";
|
---|
11 |
|
---|
12 | # UnicodeData file
|
---|
13 | $UNICODEDATA = $BASEDIR . "UNIDATA/UnicodeData.txt";
|
---|
14 |
|
---|
15 | # Defaults mapping
|
---|
16 | $DEFAULTS = "./defaults";
|
---|
17 |
|
---|
18 | # Default char for undefined mappings
|
---|
19 | $DEF_CHAR = ord '?';
|
---|
20 |
|
---|
21 | @allfiles =
|
---|
22 | (
|
---|
23 | [ 37, "VENDORS/MICSFT/EBCDIC/CP037.TXT", "IBM EBCDIC US Canada" ],
|
---|
24 | [ 42, "VENDORS/ADOBE/symbol.txt", "Symbol" ],
|
---|
25 | [ 424, "VENDORS/MISC/CP424.TXT", "IBM EBCDIC Hebrew" ],
|
---|
26 | [ 437, "VENDORS/MICSFT/PC/CP437.TXT", "OEM United States" ],
|
---|
27 | [ 500, "VENDORS/MICSFT/EBCDIC/CP500.TXT", "IBM EBCDIC International" ],
|
---|
28 | [ 737, "VENDORS/MICSFT/PC/CP737.TXT", "OEM Greek 437G" ],
|
---|
29 | [ 775, "VENDORS/MICSFT/PC/CP775.TXT", "OEM Baltic" ],
|
---|
30 | [ 850, "VENDORS/MICSFT/PC/CP850.TXT", "OEM Multilingual Latin 1" ],
|
---|
31 | [ 852, "VENDORS/MICSFT/PC/CP852.TXT", "OEM Slovak Latin 2" ],
|
---|
32 | [ 855, "VENDORS/MICSFT/PC/CP855.TXT", "OEM Cyrillic" ],
|
---|
33 | [ 856, "VENDORS/MISC/CP856.TXT", "Hebrew PC" ],
|
---|
34 | [ 857, "VENDORS/MICSFT/PC/CP857.TXT", "OEM Turkish" ],
|
---|
35 | [ 860, "VENDORS/MICSFT/PC/CP860.TXT", "OEM Portuguese" ],
|
---|
36 | [ 861, "VENDORS/MICSFT/PC/CP861.TXT", "OEM Icelandic" ],
|
---|
37 | [ 862, "VENDORS/MICSFT/PC/CP862.TXT", "OEM Hebrew" ],
|
---|
38 | [ 863, "VENDORS/MICSFT/PC/CP863.TXT", "OEM Canadian French" ],
|
---|
39 | [ 864, "VENDORS/MICSFT/PC/CP864.TXT", "OEM Arabic" ],
|
---|
40 | [ 865, "VENDORS/MICSFT/PC/CP865.TXT", "OEM Nordic" ],
|
---|
41 | [ 866, "VENDORS/MICSFT/PC/CP866.TXT", "OEM Russian" ],
|
---|
42 | [ 869, "VENDORS/MICSFT/PC/CP869.TXT", "OEM Greek" ],
|
---|
43 | [ 874, "VENDORS/MICSFT/PC/CP874.TXT", "ANSI/OEM Thai" ],
|
---|
44 | [ 875, "VENDORS/MICSFT/EBCDIC/CP875.TXT", "IBM EBCDIC Greek" ],
|
---|
45 | [ 878, "VENDORS/MISC/KOI8-R.TXT", "Russian KOI8" ],
|
---|
46 | [ 932, "VENDORS/MICSFT/WINDOWS/CP932.TXT", "ANSI/OEM Japanese Shift-JIS" ],
|
---|
47 | [ 936, "VENDORS/MICSFT/WINDOWS/CP936.TXT", "ANSI/OEM Simplified Chinese GBK" ],
|
---|
48 | [ 949, "VENDORS/MICSFT/WINDOWS/CP949.TXT", "ANSI/OEM Korean Unified Hangul" ],
|
---|
49 | [ 950, "VENDORS/MICSFT/WINDOWS/CP950.TXT", "ANSI/OEM Traditional Chinese Big5" ],
|
---|
50 | [ 1006, "VENDORS/MISC/CP1006.TXT", "IBM Arabic" ],
|
---|
51 | [ 1026, "VENDORS/MICSFT/EBCDIC/CP1026.TXT", "IBM EBCDIC Latin 5 Turkish" ],
|
---|
52 | [ 1250, "VENDORS/MICSFT/WINDOWS/CP1250.TXT", "ANSI Eastern Europe" ],
|
---|
53 | [ 1251, "VENDORS/MICSFT/WINDOWS/CP1251.TXT", "ANSI Cyrillic" ],
|
---|
54 | [ 1252, "VENDORS/MICSFT/WINDOWS/CP1252.TXT", "ANSI Latin 1" ],
|
---|
55 | [ 1253, "VENDORS/MICSFT/WINDOWS/CP1253.TXT", "ANSI Greek" ],
|
---|
56 | [ 1254, "VENDORS/MICSFT/WINDOWS/CP1254.TXT", "ANSI Turkish" ],
|
---|
57 | [ 1255, "VENDORS/MICSFT/WINDOWS/CP1255.TXT", "ANSI Hebrew" ],
|
---|
58 | [ 1256, "VENDORS/MICSFT/WINDOWS/CP1256.TXT", "ANSI Arabic" ],
|
---|
59 | [ 1257, "VENDORS/MICSFT/WINDOWS/CP1257.TXT", "ANSI Baltic" ],
|
---|
60 | [ 1258, "VENDORS/MICSFT/WINDOWS/CP1258.TXT", "ANSI/OEM Viet Nam" ],
|
---|
61 | [ 10000, "VENDORS/MICSFT/MAC/ROMAN.TXT", "Mac Roman" ],
|
---|
62 | [ 10006, "VENDORS/MICSFT/MAC/GREEK.TXT", "Mac Greek" ],
|
---|
63 | [ 10007, "VENDORS/MICSFT/MAC/CYRILLIC.TXT", "Mac Cyrillic" ],
|
---|
64 | [ 10029, "VENDORS/MICSFT/MAC/LATIN2.TXT", "Mac Latin 2" ],
|
---|
65 | [ 10079, "VENDORS/MICSFT/MAC/ICELAND.TXT", "Mac Icelandic" ],
|
---|
66 | [ 10081, "VENDORS/MICSFT/MAC/TURKISH.TXT", "Mac Turkish" ],
|
---|
67 | [ 20866, "VENDORS/MISC/KOI8-R.TXT", "Russian KOI8" ],
|
---|
68 | [ 28591, "ISO8859/8859-1.TXT", "ISO 8859-1 Latin 1" ],
|
---|
69 | [ 28592, "ISO8859/8859-2.TXT", "ISO 8859-2 Latin 2 (East European)" ],
|
---|
70 | [ 28593, "ISO8859/8859-3.TXT", "ISO 8859-3 Latin 3 (South European)" ],
|
---|
71 | [ 28594, "ISO8859/8859-4.TXT", "ISO 8859-4 Latin 4 (Baltic old)" ],
|
---|
72 | [ 28595, "ISO8859/8859-5.TXT", "ISO 8859-5 Cyrillic" ],
|
---|
73 | [ 28596, "ISO8859/8859-6.TXT", "ISO 8859-6 Arabic" ],
|
---|
74 | [ 28597, "ISO8859/8859-7.TXT", "ISO 8859-7 Greek" ],
|
---|
75 | [ 28598, "ISO8859/8859-8.TXT", "ISO 8859-8 Hebrew" ],
|
---|
76 | [ 28599, "ISO8859/8859-9.TXT", "ISO 8859-9 Latin 5 (Turkish)" ],
|
---|
77 | [ 28600, "ISO8859/8859-10.TXT", "ISO 8859-10 Latin 6 (Nordic)" ],
|
---|
78 | [ 28603, "ISO8859/8859-13.TXT", "ISO 8859-13 Latin 7 (Baltic)" ],
|
---|
79 | [ 28604, "ISO8859/8859-14.TXT", "ISO 8859-14 Latin 8 (Celtic)" ],
|
---|
80 | [ 28605, "ISO8859/8859-15.TXT", "ISO 8859-15 Latin 9 (Euro)" ]
|
---|
81 | );
|
---|
82 |
|
---|
83 |
|
---|
84 | %ctype =
|
---|
85 | (
|
---|
86 | "upper" => 0x0001,
|
---|
87 | "lower" => 0x0002,
|
---|
88 | "digit" => 0x0004,
|
---|
89 | "space" => 0x0008,
|
---|
90 | "punct" => 0x0010,
|
---|
91 | "cntrl" => 0x0020,
|
---|
92 | "blank" => 0x0040,
|
---|
93 | "xdigit" => 0x0080,
|
---|
94 | "alpha" => 0x0100
|
---|
95 | );
|
---|
96 |
|
---|
97 | %categories =
|
---|
98 | (
|
---|
99 | "Lu" => $ctype{"alpha"}|$ctype{"upper"}, # Letter, Uppercase
|
---|
100 | "Ll" => $ctype{"alpha"}|$ctype{"lower"}, # Letter, Lowercase
|
---|
101 | "Lt" => $ctype{"alpha"}, # Letter, Titlecase
|
---|
102 | "Mn" => $ctype{"punct"}, # Mark, Non-Spacing
|
---|
103 | "Mc" => $ctype{"punct"}, # Mark, Spacing Combining
|
---|
104 | "Me" => $ctype{"punct"}, # Mark, Enclosing
|
---|
105 | "Nd" => $ctype{"digit"}, # Number, Decimal Digit
|
---|
106 | "Nl" => $ctype{"punct"}, # Number, Letter
|
---|
107 | "No" => $ctype{"punct"}, # Number, Other
|
---|
108 | "Zs" => $ctype{"space"}, # Separator, Space
|
---|
109 | "Zl" => 0, # Separator, Line
|
---|
110 | "Zp" => 0, # Separator, Paragraph
|
---|
111 | "Cc" => $ctype{"cntrl"}, # Other, Control
|
---|
112 | "Cf" => 0, # Other, Format
|
---|
113 | "Cs" => 0, # Other, Surrogate
|
---|
114 | "Co" => 0, # Other, Private Use
|
---|
115 | "Cn" => 0, # Other, Not Assigned
|
---|
116 | "Lm" => $ctype{"punct"}, # Letter, Modifier
|
---|
117 | "Lo" => $ctype{"alpha"}, # Letter, Other
|
---|
118 | "Pc" => $ctype{"punct"}, # Punctuation, Connector
|
---|
119 | "Pd" => $ctype{"punct"}, # Punctuation, Dash
|
---|
120 | "Ps" => $ctype{"punct"}, # Punctuation, Open
|
---|
121 | "Pe" => $ctype{"punct"}, # Punctuation, Close
|
---|
122 | "Pi" => $ctype{"punct"}, # Punctuation, Initial quote
|
---|
123 | "Pf" => $ctype{"punct"}, # Punctuation, Final quote
|
---|
124 | "Po" => $ctype{"punct"}, # Punctuation, Other
|
---|
125 | "Sm" => $ctype{"punct"}, # Symbol, Math
|
---|
126 | "Sc" => $ctype{"punct"}, # Symbol, Currency
|
---|
127 | "Sk" => $ctype{"punct"}, # Symbol, Modifier
|
---|
128 | "So" => $ctype{"punct"} # Symbol, Other
|
---|
129 | );
|
---|
130 |
|
---|
131 | # a few characters need additional categories that cannot be determined automatically
|
---|
132 | %special_categories =
|
---|
133 | (
|
---|
134 | "xdigit" => [ ord('0')..ord('9'),ord('A')..ord('F'),ord('a')..ord('f'),
|
---|
135 | 0xff10..0xff19, 0xff21..0xff26, 0xff41..0xff46 ],
|
---|
136 | "space" => [ 0x09..0x0d, 0xfeff ],
|
---|
137 | "blank" => [ 0x09, 0x20, 0xa0, 0xfeff ]
|
---|
138 | );
|
---|
139 |
|
---|
140 | %directions =
|
---|
141 | (
|
---|
142 | "L" => 1, # Left-to-Right
|
---|
143 | "LRE" => 11, # Left-to-Right Embedding
|
---|
144 | "LRO" => 11, # Left-to-Right Override
|
---|
145 | "R" => 2, # Right-to-Left
|
---|
146 | "AL" => 2, # Right-to-Left Arabic
|
---|
147 | "RLE" => 11, # Right-to-Left Embedding
|
---|
148 | "RLO" => 11, # Right-to-Left Override
|
---|
149 | "PDF" => 11, # Pop Directional Format
|
---|
150 | "EN" => 3, # European Number
|
---|
151 | "ES" => 4, # European Number Separator
|
---|
152 | "ET" => 5, # European Number Terminator
|
---|
153 | "AN" => 6, # Arabic Number
|
---|
154 | "CS" => 7, # Common Number Separator
|
---|
155 | "NSM" => 0, # Non-Spacing Mark
|
---|
156 | "BN" => 0, # Boundary Neutral
|
---|
157 | "B" => 8, # Paragraph Separator
|
---|
158 | "S" => 9, # Segment Separator
|
---|
159 | "WS" => 10, # Whitespace
|
---|
160 | "ON" => 11 # Other Neutrals
|
---|
161 | );
|
---|
162 |
|
---|
163 |
|
---|
164 | ################################################################
|
---|
165 | # main routine
|
---|
166 |
|
---|
167 | READ_DEFAULTS();
|
---|
168 | DUMP_CASE_MAPPINGS();
|
---|
169 | DUMP_COMPOSE_TABLES();
|
---|
170 | DUMP_CTYPE_TABLES();
|
---|
171 |
|
---|
172 | foreach $file (@allfiles) { HANDLE_FILE( @$file ); }
|
---|
173 |
|
---|
174 | OUTPUT_CPTABLE();
|
---|
175 |
|
---|
176 | exit(0);
|
---|
177 |
|
---|
178 |
|
---|
179 | ################################################################
|
---|
180 | # read in the defaults file
|
---|
181 | sub READ_DEFAULTS
|
---|
182 | {
|
---|
183 | @unicode_defaults = ();
|
---|
184 | @unicode_aliases = ();
|
---|
185 | @tolower_table = ();
|
---|
186 | @toupper_table = ();
|
---|
187 | @category_table = ();
|
---|
188 | @direction_table = ();
|
---|
189 | @decomp_table = ();
|
---|
190 | @compose_table = ();
|
---|
191 |
|
---|
192 | # first setup a few default mappings
|
---|
193 |
|
---|
194 | open DEFAULTS or die "Cannot open $DEFAULTS";
|
---|
195 | print "Loading $DEFAULTS\n";
|
---|
196 | while (<DEFAULTS>)
|
---|
197 | {
|
---|
198 | next if /^\#/; # skip comments
|
---|
199 | next if /^$/; # skip empty lines
|
---|
200 | if (/^(([0-9a-fA-F]+)(,[0-9a-fA-F]+)*)\s+([0-9a-fA-F]+|'.'|none)\s+(\#.*)?/)
|
---|
201 | {
|
---|
202 | my @src = map hex, split /,/,$1;
|
---|
203 | my $dst = $4;
|
---|
204 | my $comment = $5;
|
---|
205 | if ($#src > 0) { push @unicode_aliases, \@src; }
|
---|
206 | next if ($dst eq "none");
|
---|
207 | $dst = ($dst =~ /\'.\'/) ? ord substr($dst,1,1) : hex $dst;
|
---|
208 | foreach $src (@src)
|
---|
209 | {
|
---|
210 | die "Duplicate value" if defined($unicode_defaults[$src]);
|
---|
211 | $unicode_defaults[$src] = $dst;
|
---|
212 | }
|
---|
213 | next;
|
---|
214 | }
|
---|
215 | die "Unrecognized line $_\n";
|
---|
216 | }
|
---|
217 |
|
---|
218 | # now build mappings from the decomposition field of the Unicode database
|
---|
219 |
|
---|
220 | open UNICODEDATA or die "Cannot open $UNICODEDATA";
|
---|
221 | print "Loading $UNICODEDATA\n";
|
---|
222 | while (<UNICODEDATA>)
|
---|
223 | {
|
---|
224 | # Decode the fields ...
|
---|
225 | ($code, $name, $cat, $comb, $bidi,
|
---|
226 | $decomp, $dec, $dig, $num, $mirror,
|
---|
227 | $oldname, $comment, $upper, $lower, $title) = split /;/;
|
---|
228 |
|
---|
229 | my $src = hex $code;
|
---|
230 |
|
---|
231 | die "unknown category $cat" unless defined $categories{$cat};
|
---|
232 | die "unknown directionality $bidi" unless defined $directions{$bidi};
|
---|
233 |
|
---|
234 | $uniname[$src] = $name;
|
---|
235 | $category_table[$src] = $categories{$cat};
|
---|
236 | $direction_table[$src] = $directions{$bidi};
|
---|
237 |
|
---|
238 | if ($lower ne "")
|
---|
239 | {
|
---|
240 | $tolower_table[$src] = hex $lower;
|
---|
241 | $category_table[$src] |= $ctype{"upper"}|$ctype{"alpha"};
|
---|
242 | }
|
---|
243 | if ($upper ne "")
|
---|
244 | {
|
---|
245 | $toupper_table[$src] = hex $upper;
|
---|
246 | $category_table[$src] |= $ctype{"lower"}|$ctype{"alpha"};
|
---|
247 | }
|
---|
248 | if ($dec ne "")
|
---|
249 | {
|
---|
250 | $category_table[$src] |= $ctype{"digit"};
|
---|
251 | }
|
---|
252 |
|
---|
253 | # copy the category and direction for everything between First/Last pairs
|
---|
254 | if ($name =~ /, First>/) { $start = $src; }
|
---|
255 | if ($name =~ /, Last>/)
|
---|
256 | {
|
---|
257 | while ($start < $src)
|
---|
258 | {
|
---|
259 | $category_table[$start] = $category_table[$src];
|
---|
260 | $direction_table[$start] = $direction_table[$src];
|
---|
261 | $start++;
|
---|
262 | }
|
---|
263 | }
|
---|
264 |
|
---|
265 | next if $decomp eq ""; # no decomposition, skip it
|
---|
266 |
|
---|
267 | if ($decomp =~ /^<([a-zA-Z]+)>\s+([0-9a-fA-F]+)$/)
|
---|
268 | {
|
---|
269 | # decomposition of the form "<foo> 1234" -> use char if type is known
|
---|
270 | next unless ($1 eq "font" ||
|
---|
271 | $1 eq "noBreak" ||
|
---|
272 | $1 eq "circle" ||
|
---|
273 | $1 eq "super" ||
|
---|
274 | $1 eq "sub" ||
|
---|
275 | $1 eq "wide" ||
|
---|
276 | $1 eq "narrow" ||
|
---|
277 | $1 eq "compat" ||
|
---|
278 | $1 eq "small");
|
---|
279 | $dst = hex $2;
|
---|
280 | }
|
---|
281 | elsif ($decomp =~ /^<compat>\s+0020\s+([0-9a-fA-F]+)/)
|
---|
282 | {
|
---|
283 | # decomposition "<compat> 0020 1234" -> combining accent
|
---|
284 | $dst = hex $1;
|
---|
285 | }
|
---|
286 | elsif ($decomp =~ /^([0-9a-fA-F]+)/)
|
---|
287 | {
|
---|
288 | # decomposition contains only char values without prefix -> use first char
|
---|
289 | $dst = hex $1;
|
---|
290 | $category_table[$src] |= $category_table[$dst];
|
---|
291 | # store decomposition if it contains two chars
|
---|
292 | if ($decomp =~ /^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)$/)
|
---|
293 | {
|
---|
294 | $decomp_table[$src] = [ hex $1, hex $2 ];
|
---|
295 | push @compose_table, [ hex $1, hex $2, $src ];
|
---|
296 | }
|
---|
297 | }
|
---|
298 | else
|
---|
299 | {
|
---|
300 | next;
|
---|
301 | }
|
---|
302 |
|
---|
303 | next if defined($unicode_defaults[$src]); # may have been set in the defaults file
|
---|
304 |
|
---|
305 | # check for loops
|
---|
306 | for ($i = $dst; ; $i = $unicode_defaults[$i])
|
---|
307 | {
|
---|
308 | die sprintf("loop detected for %04x -> %04x",$src,$dst) if $i == $src;
|
---|
309 | last unless defined($unicode_defaults[$i]);
|
---|
310 | }
|
---|
311 | $unicode_defaults[$src] = $dst;
|
---|
312 | }
|
---|
313 |
|
---|
314 | # patch the category of some special characters
|
---|
315 |
|
---|
316 | foreach $cat (keys %special_categories)
|
---|
317 | {
|
---|
318 | my $flag = $ctype{$cat};
|
---|
319 | foreach $i (@{$special_categories{$cat}}) { $category_table[$i] |= $flag; }
|
---|
320 | }
|
---|
321 | }
|
---|
322 |
|
---|
323 |
|
---|
324 | ################################################################
|
---|
325 | # parse the input file
|
---|
326 | sub READ_FILE
|
---|
327 | {
|
---|
328 | my $name = shift;
|
---|
329 | open INPUT,$name or die "Cannot open $name";
|
---|
330 | @cp2uni = ();
|
---|
331 | @lead_bytes = ();
|
---|
332 | @uni2cp = ();
|
---|
333 |
|
---|
334 | while (<INPUT>)
|
---|
335 | {
|
---|
336 | next if /^\#/; # skip comments
|
---|
337 | next if /^$/; # skip empty lines
|
---|
338 | next if /\x1a/; # skip ^Z
|
---|
339 | next if (/^0x([0-9a-fA-F]+)\s+\#UNDEFINED/); # undefined char
|
---|
340 |
|
---|
341 | if (/^0x([0-9a-fA-F]+)\s+\#DBCS LEAD BYTE/)
|
---|
342 | {
|
---|
343 | $cp = hex $1;
|
---|
344 | push @lead_bytes,$cp;
|
---|
345 | $cp2uni[$cp] = 0;
|
---|
346 | next;
|
---|
347 | }
|
---|
348 | if (/^0x([0-9a-fA-F]+)\s+0x([0-9a-fA-F]+)\s+(\#.*)?/)
|
---|
349 | {
|
---|
350 | $cp = hex $1;
|
---|
351 | $uni = hex $2;
|
---|
352 | $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
|
---|
353 | $uni2cp[$uni] = $cp unless defined($uni2cp[$uni]);
|
---|
354 | next;
|
---|
355 | }
|
---|
356 | die "$name: Unrecognized line $_\n";
|
---|
357 | }
|
---|
358 | }
|
---|
359 |
|
---|
360 |
|
---|
361 | ################################################################
|
---|
362 | # parse the symbol.txt file, since its syntax is different from the other ones
|
---|
363 | sub READ_SYMBOL_FILE
|
---|
364 | {
|
---|
365 | my $name = shift;
|
---|
366 | open INPUT,$name or die "Cannot open $name";
|
---|
367 | @cp2uni = ();
|
---|
368 | @lead_bytes = ();
|
---|
369 | @uni2cp = ();
|
---|
370 |
|
---|
371 | while (<INPUT>)
|
---|
372 | {
|
---|
373 | next if /^\#/; # skip comments
|
---|
374 | next if /^$/; # skip empty lines
|
---|
375 | next if /\x1a/; # skip ^Z
|
---|
376 | if (/^([0-9a-fA-F]+)\s+([0-9a-fA-F]+)\s+(\#.*)?/)
|
---|
377 | {
|
---|
378 | $uni = hex $1;
|
---|
379 | $cp = hex $2;
|
---|
380 | $cp2uni[$cp] = $uni unless defined($cp2uni[$cp]);
|
---|
381 | $uni2cp[$uni] = $cp unless defined($uni2cp[$uni]);
|
---|
382 | next;
|
---|
383 | }
|
---|
384 | die "$name: Unrecognized line $_\n";
|
---|
385 | }
|
---|
386 | }
|
---|
387 |
|
---|
388 |
|
---|
389 | ################################################################
|
---|
390 | # add default mappings once the file had been read
|
---|
391 | sub ADD_DEFAULT_MAPPINGS
|
---|
392 | {
|
---|
393 | # Apply aliases
|
---|
394 |
|
---|
395 | foreach $alias (@unicode_aliases)
|
---|
396 | {
|
---|
397 | my $target = undef;
|
---|
398 | foreach $src (@$alias)
|
---|
399 | {
|
---|
400 | if (defined($uni2cp[$src]))
|
---|
401 | {
|
---|
402 | $target = $uni2cp[$src];
|
---|
403 | last;
|
---|
404 | }
|
---|
405 | }
|
---|
406 | next unless defined($target);
|
---|
407 |
|
---|
408 | # At least one char of the alias set is defined, set the others to the same value
|
---|
409 | foreach $src (@$alias)
|
---|
410 | {
|
---|
411 | $uni2cp[$src] = $target unless defined($uni2cp[$src]);
|
---|
412 | }
|
---|
413 | }
|
---|
414 |
|
---|
415 | # For every src -> target mapping in the defaults table,
|
---|
416 | # make uni2cp[src] = uni2cp[target] if uni2cp[target] is defined
|
---|
417 |
|
---|
418 | for ($src = 0; $src < 65536; $src++)
|
---|
419 | {
|
---|
420 | next if defined($uni2cp[$src]); # source has a definition already
|
---|
421 | next unless defined($unicode_defaults[$src]); # no default for this char
|
---|
422 | my $target = $unicode_defaults[$src];
|
---|
423 |
|
---|
424 | # do a recursive mapping until we find a target char that is defined
|
---|
425 | while (!defined($uni2cp[$target]) &&
|
---|
426 | defined($unicode_defaults[$target])) { $target = $unicode_defaults[$target]; }
|
---|
427 |
|
---|
428 | if (defined($uni2cp[$target])) { $uni2cp[$src] = $uni2cp[$target]; }
|
---|
429 | }
|
---|
430 |
|
---|
431 | # Add an identity mapping for all undefined chars
|
---|
432 |
|
---|
433 | for ($i = 0; $i < 256; $i++)
|
---|
434 | {
|
---|
435 | next if defined($cp2uni[$i]);
|
---|
436 | next if defined($uni2cp[$i]);
|
---|
437 | $cp2uni[$i] = $uni2cp[$i] = $i;
|
---|
438 | }
|
---|
439 | }
|
---|
440 |
|
---|
441 | ################################################################
|
---|
442 | # dump an array of integers
|
---|
443 | sub DUMP_ARRAY
|
---|
444 | {
|
---|
445 | my ($format,$default,@array) = @_;
|
---|
446 | my $i, $ret = " ";
|
---|
447 | for ($i = 0; $i < $#array; $i++)
|
---|
448 | {
|
---|
449 | $ret .= sprintf($format, defined $array[$i] ? $array[$i] : $default);
|
---|
450 | $ret .= (($i % 8) != 7) ? ", " : ",\n ";
|
---|
451 | }
|
---|
452 | $ret .= sprintf($format, defined $array[$i] ? $array[$i] : $default);
|
---|
453 | return $ret;
|
---|
454 | }
|
---|
455 |
|
---|
456 | ################################################################
|
---|
457 | # dump an SBCS mapping table
|
---|
458 | sub DUMP_SBCS_TABLE
|
---|
459 | {
|
---|
460 | my ($codepage, $name) = @_;
|
---|
461 | my $i;
|
---|
462 |
|
---|
463 | # output the ascii->unicode table
|
---|
464 |
|
---|
465 | printf OUTPUT "static const WCHAR cp2uni[256] =\n";
|
---|
466 | printf OUTPUT "{\n%s\n};\n\n", DUMP_ARRAY( "0x%04x", $DEF_CHAR, @cp2uni[0 .. 255] );
|
---|
467 |
|
---|
468 | # count the number of unicode->ascii subtables that contain something
|
---|
469 |
|
---|
470 | my @filled = ();
|
---|
471 | my $subtables = 1;
|
---|
472 | for ($i = 0; $i < 65536; $i++)
|
---|
473 | {
|
---|
474 | next unless defined $uni2cp[$i];
|
---|
475 | $filled[$i >> 8] = 1;
|
---|
476 | $subtables++;
|
---|
477 | $i |= 255;
|
---|
478 | }
|
---|
479 |
|
---|
480 | # output all the subtables into a single array
|
---|
481 |
|
---|
482 | printf OUTPUT "static const unsigned char uni2cp_low[%d] =\n{\n", $subtables*256;
|
---|
483 | for ($i = 0; $i < 256; $i++)
|
---|
484 | {
|
---|
485 | next unless $filled[$i];
|
---|
486 | printf OUTPUT " /* 0x%02x00 .. 0x%02xff */\n", $i, $i;
|
---|
487 | printf OUTPUT "%s,\n", DUMP_ARRAY( "0x%02x", $DEF_CHAR, @uni2cp[($i<<8) .. ($i<<8)+255] );
|
---|
488 | }
|
---|
489 | printf OUTPUT " /* defaults */\n";
|
---|
490 | printf OUTPUT "%s\n};\n\n", DUMP_ARRAY( "0x%02x", 0, ($DEF_CHAR) x 256 );
|
---|
491 |
|
---|
492 | # output a table of the offsets of the subtables in the previous array
|
---|
493 |
|
---|
494 | my $pos = 0;
|
---|
495 | my @offsets = ();
|
---|
496 | for ($i = 0; $i < 256; $i++)
|
---|
497 | {
|
---|
498 | if ($filled[$i]) { push @offsets, $pos; $pos += 256; }
|
---|
499 | else { push @offsets, ($subtables-1) * 256; }
|
---|
500 | }
|
---|
501 | printf OUTPUT "static const unsigned short uni2cp_high[256] =\n";
|
---|
502 | printf OUTPUT "{\n%s\n};\n\n", DUMP_ARRAY( "0x%04x", 0, @offsets );
|
---|
503 |
|
---|
504 | # output the code page descriptor
|
---|
505 |
|
---|
506 | printf OUTPUT "const struct sbcs_table cptable_%03d =\n{\n", $codepage;
|
---|
507 | printf OUTPUT " { %d, 1, 0x%04x, 0x%04x, \"%s\" },\n",
|
---|
508 | $codepage, $DEF_CHAR, $DEF_CHAR, $name;
|
---|
509 | printf OUTPUT " cp2uni,\n";
|
---|
510 | printf OUTPUT " uni2cp_low,\n";
|
---|
511 | printf OUTPUT " uni2cp_high\n};\n";
|
---|
512 | }
|
---|
513 |
|
---|
514 |
|
---|
515 | ################################################################
|
---|
516 | # dump a DBCS mapping table
|
---|
517 | sub DUMP_DBCS_TABLE
|
---|
518 | {
|
---|
519 | my ($codepage, $name) = @_;
|
---|
520 | my $i, $x, $y;
|
---|
521 |
|
---|
522 | # build a list of lead bytes that are actually used
|
---|
523 |
|
---|
524 | my @lblist = ();
|
---|
525 | LBLOOP: for ($y = 0; $y <= $#lead_bytes; $y++)
|
---|
526 | {
|
---|
527 | my $base = $lead_bytes[$y] << 8;
|
---|
528 | for ($x = 0; $x < 256; $x++)
|
---|
529 | {
|
---|
530 | if (defined $cp2uni[$base+$x])
|
---|
531 | {
|
---|
532 | push @lblist,$lead_bytes[$y];
|
---|
533 | next LBLOOP;
|
---|
534 | }
|
---|
535 | }
|
---|
536 | }
|
---|
537 | my $unused = ($#lead_bytes > $#lblist);
|
---|
538 |
|
---|
539 | # output the ascii->unicode table for the single byte chars
|
---|
540 |
|
---|
541 | printf OUTPUT "static const WCHAR cp2uni[%d] =\n", 256 * ($#lblist + 2 + $unused);
|
---|
542 | printf OUTPUT "{\n%s,\n", DUMP_ARRAY( "0x%04x", $DEF_CHAR, @cp2uni[0 .. 255] );
|
---|
543 |
|
---|
544 | # output the default table for unused lead bytes
|
---|
545 |
|
---|
546 | if ($unused)
|
---|
547 | {
|
---|
548 | printf OUTPUT " /* unused lead bytes */\n";
|
---|
549 | printf OUTPUT "%s,\n", DUMP_ARRAY( "0x%04x", 0, ($DEF_CHAR) x 256 );
|
---|
550 | }
|
---|
551 |
|
---|
552 | # output the ascii->unicode table for each DBCS lead byte
|
---|
553 |
|
---|
554 | for ($y = 0; $y <= $#lblist; $y++)
|
---|
555 | {
|
---|
556 | my $base = $lblist[$y] << 8;
|
---|
557 | printf OUTPUT " /* lead byte %02x */\n", $lblist[$y];
|
---|
558 | printf OUTPUT "%s", DUMP_ARRAY( "0x%04x", $DEF_CHAR, @cp2uni[$base .. $base+255] );
|
---|
559 | printf OUTPUT ($y < $#lblist) ? ",\n" : "\n};\n\n";
|
---|
560 | }
|
---|
561 |
|
---|
562 | # output the lead byte subtables offsets
|
---|
563 |
|
---|
564 | my @offsets = ();
|
---|
565 | for ($x = 0; $x < 256; $x++) { $offsets[$x] = 0; }
|
---|
566 | for ($x = 0; $x <= $#lblist; $x++) { $offsets[$lblist[$x]] = $x + 1; }
|
---|
567 | if ($unused)
|
---|
568 | {
|
---|
569 | # increment all lead bytes offset to take into account the unused table
|
---|
570 | for ($x = 0; $x <= $#lead_bytes; $x++) { $offsets[$lead_bytes[$x]]++; }
|
---|
571 | }
|
---|
572 | printf OUTPUT "static const unsigned char cp2uni_leadbytes[256] =\n";
|
---|
573 | printf OUTPUT "{\n%s\n};\n\n", DUMP_ARRAY( "0x%02x", 0, @offsets );
|
---|
574 |
|
---|
575 | # count the number of unicode->ascii subtables that contain something
|
---|
576 |
|
---|
577 | my @filled = ();
|
---|
578 | my $subtables = 1;
|
---|
579 | for ($i = 0; $i < 65536; $i++)
|
---|
580 | {
|
---|
581 | next unless defined $uni2cp[$i];
|
---|
582 | $filled[$i >> 8] = 1;
|
---|
583 | $subtables++;
|
---|
584 | $i |= 255;
|
---|
585 | }
|
---|
586 |
|
---|
587 | # output all the subtables into a single array
|
---|
588 |
|
---|
589 | printf OUTPUT "static const unsigned short uni2cp_low[%d] =\n{\n", $subtables*256;
|
---|
590 | for ($y = 0; $y < 256; $y++)
|
---|
591 | {
|
---|
592 | next unless $filled[$y];
|
---|
593 | printf OUTPUT " /* 0x%02x00 .. 0x%02xff */\n", $y, $y;
|
---|
594 | printf OUTPUT "%s,\n", DUMP_ARRAY( "0x%04x", $DEF_CHAR, @uni2cp[($y<<8) .. ($y<<8)+255] );
|
---|
595 | }
|
---|
596 | printf OUTPUT " /* defaults */\n";
|
---|
597 | printf OUTPUT "%s\n};\n\n", DUMP_ARRAY( "0x%04x", 0, ($DEF_CHAR) x 256 );
|
---|
598 |
|
---|
599 | # output a table of the offsets of the subtables in the previous array
|
---|
600 |
|
---|
601 | my $pos = 0;
|
---|
602 | my @offsets = ();
|
---|
603 | for ($y = 0; $y < 256; $y++)
|
---|
604 | {
|
---|
605 | if ($filled[$y]) { push @offsets, $pos; $pos += 256; }
|
---|
606 | else { push @offsets, ($subtables-1) * 256; }
|
---|
607 | }
|
---|
608 | printf OUTPUT "static const unsigned short uni2cp_high[256] =\n";
|
---|
609 | printf OUTPUT "{\n%s\n};\n\n", DUMP_ARRAY( "0x%04x", 0, @offsets );
|
---|
610 |
|
---|
611 | # output the code page descriptor
|
---|
612 |
|
---|
613 | printf OUTPUT "const struct dbcs_table cptable_%03d =\n{\n", $codepage;
|
---|
614 | printf OUTPUT " { %d, 2, 0x%04x, 0x%04x, \"%s\" },\n",
|
---|
615 | $codepage, $DEF_CHAR, $DEF_CHAR, $name;
|
---|
616 | printf OUTPUT " cp2uni,\n";
|
---|
617 | printf OUTPUT " cp2uni_leadbytes,\n";
|
---|
618 | printf OUTPUT " uni2cp_low,\n";
|
---|
619 | printf OUTPUT " uni2cp_high,\n";
|
---|
620 | DUMP_LB_RANGES();
|
---|
621 | printf OUTPUT "};\n";
|
---|
622 | }
|
---|
623 |
|
---|
624 |
|
---|
625 | ################################################################
|
---|
626 | # dump the list of defined lead byte ranges
|
---|
627 | sub DUMP_LB_RANGES
|
---|
628 | {
|
---|
629 | my @list = ();
|
---|
630 | my $i = 0;
|
---|
631 | foreach $i (@lead_bytes) { $list[$i] = 1; }
|
---|
632 | my $on = 0;
|
---|
633 | printf OUTPUT " { ";
|
---|
634 | for ($i = 0; $i < 256; $i++)
|
---|
635 | {
|
---|
636 | if ($on)
|
---|
637 | {
|
---|
638 | if (!defined $list[$i]) { printf OUTPUT "0x%02x, ", $i-1; $on = 0; }
|
---|
639 | }
|
---|
640 | else
|
---|
641 | {
|
---|
642 | if ($list[$i]) { printf OUTPUT "0x%02x, ", $i; $on = 1; }
|
---|
643 | }
|
---|
644 | }
|
---|
645 | if ($on) { printf OUTPUT "0xff, "; }
|
---|
646 | printf OUTPUT "0x00, 0x00 }\n";
|
---|
647 | }
|
---|
648 |
|
---|
649 |
|
---|
650 | ################################################################
|
---|
651 | # dump the case mapping tables
|
---|
652 | sub DUMP_CASE_MAPPINGS
|
---|
653 | {
|
---|
654 | open OUTPUT,">casemap.c" or die "Cannot create casemap.c";
|
---|
655 | printf "Building casemap.c\n";
|
---|
656 | printf OUTPUT "/* Unicode case mappings */\n";
|
---|
657 | printf OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
|
---|
658 | printf OUTPUT "#include \"wine/unicode.h\"\n\n";
|
---|
659 |
|
---|
660 | DUMP_CASE_TABLE( "casemap_lower", @tolower_table );
|
---|
661 | DUMP_CASE_TABLE( "casemap_upper", @toupper_table );
|
---|
662 | close OUTPUT;
|
---|
663 | }
|
---|
664 |
|
---|
665 |
|
---|
666 | ################################################################
|
---|
667 | # dump a case mapping table
|
---|
668 | sub DUMP_CASE_TABLE
|
---|
669 | {
|
---|
670 | my ($name,@table) = @_;
|
---|
671 |
|
---|
672 | # count the number of sub tables that contain something
|
---|
673 |
|
---|
674 | my @filled = ();
|
---|
675 | my $pos = 512;
|
---|
676 | for ($i = 0; $i < 65536; $i++)
|
---|
677 | {
|
---|
678 | next unless defined $table[$i];
|
---|
679 | $filled[$i >> 8] = $pos;
|
---|
680 | $pos += 256;
|
---|
681 | $i |= 255;
|
---|
682 | }
|
---|
683 | for ($i = 0; $i < 65536; $i++)
|
---|
684 | {
|
---|
685 | next unless defined $table[$i];
|
---|
686 | $table[$i] = ($table[$i] - $i) & 0xffff;
|
---|
687 | }
|
---|
688 |
|
---|
689 | # dump the table
|
---|
690 |
|
---|
691 | printf OUTPUT "const WCHAR %s[%d] =\n", $name, $pos;
|
---|
692 | printf OUTPUT "{\n /* index */\n";
|
---|
693 | printf OUTPUT "%s,\n", DUMP_ARRAY( "0x%04x", 256, @filled );
|
---|
694 | printf OUTPUT " /* defaults */\n";
|
---|
695 | printf OUTPUT "%s", DUMP_ARRAY( "0x%04x", 0, (0) x 256 );
|
---|
696 | for ($i = 0; $i < 256; $i++)
|
---|
697 | {
|
---|
698 | next unless $filled[$i];
|
---|
699 | printf OUTPUT ",\n /* 0x%02x00 .. 0x%02xff */\n", $i, $i;
|
---|
700 | printf OUTPUT "%s", DUMP_ARRAY( "0x%04x", 0, @table[($i<<8) .. ($i<<8)+255] );
|
---|
701 | }
|
---|
702 | printf OUTPUT "\n};\n";
|
---|
703 | }
|
---|
704 |
|
---|
705 |
|
---|
706 | ################################################################
|
---|
707 | # dump the ctype tables
|
---|
708 | sub DUMP_CTYPE_TABLES
|
---|
709 | {
|
---|
710 | open OUTPUT,">wctype.c" or die "Cannot create casemap.c";
|
---|
711 | printf "Building wctype.c\n";
|
---|
712 | printf OUTPUT "/* Unicode ctype tables */\n";
|
---|
713 | printf OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
|
---|
714 | printf OUTPUT "#include \"wine/unicode.h\"\n\n";
|
---|
715 |
|
---|
716 | my $i;
|
---|
717 | my @array = (0) x 256;
|
---|
718 |
|
---|
719 | # add the direction in the high 4 bits of the category
|
---|
720 | for ($i = 0; $i < 65536; $i++)
|
---|
721 | {
|
---|
722 | $category_table[$i] |= $direction_table[$i] << 12;
|
---|
723 | }
|
---|
724 |
|
---|
725 | # try to merge table rows
|
---|
726 | for ($row = 0; $row < 256; $row++)
|
---|
727 | {
|
---|
728 | my $rowtxt = sprintf "%04x" x 256, @category_table[($row<<8)..($row<<8)+255];
|
---|
729 | if (defined($sequences{$rowtxt}))
|
---|
730 | {
|
---|
731 | # reuse an existing row
|
---|
732 | $array[$row] = $sequences{$rowtxt};
|
---|
733 | }
|
---|
734 | else
|
---|
735 | {
|
---|
736 | # create a new row
|
---|
737 | $sequences{$rowtxt} = $array[$row] = $#array + 1;
|
---|
738 | push @array, @category_table[($row<<8)..($row<<8)+255];
|
---|
739 | }
|
---|
740 | }
|
---|
741 |
|
---|
742 | printf OUTPUT "const unsigned short wctype_table[%d] =\n{\n", $#array+1;
|
---|
743 | printf OUTPUT " /* offsets */\n%s,\n", DUMP_ARRAY( "0x%04x", 0, @array[0..255] );
|
---|
744 | printf OUTPUT " /* values */\n%s\n};\n", DUMP_ARRAY( "0x%04x", 0, @array[256..$#array] );
|
---|
745 |
|
---|
746 | close OUTPUT;
|
---|
747 | }
|
---|
748 |
|
---|
749 |
|
---|
750 | ################################################################
|
---|
751 | # dump the char composition tables
|
---|
752 | sub DUMP_COMPOSE_TABLES
|
---|
753 | {
|
---|
754 | open OUTPUT,">compose.c" or die "Cannot create compose.c";
|
---|
755 | printf "Building compose.c\n";
|
---|
756 | printf OUTPUT "/* Unicode char composition */\n";
|
---|
757 | printf OUTPUT "/* Automatically generated; DO NOT EDIT!! */\n\n";
|
---|
758 | printf OUTPUT "#include \"wine/unicode.h\"\n\n";
|
---|
759 |
|
---|
760 | ######### composition table
|
---|
761 |
|
---|
762 | my @filled = ();
|
---|
763 | foreach $i (@compose_table)
|
---|
764 | {
|
---|
765 | my @comp = @$i;
|
---|
766 | push @{$filled[$comp[1]]}, [ $comp[0], $comp[2] ];
|
---|
767 | }
|
---|
768 |
|
---|
769 | # count how many different second chars we have
|
---|
770 |
|
---|
771 | for ($i = $count = 0; $i < 65536; $i++)
|
---|
772 | {
|
---|
773 | next unless defined $filled[$i];
|
---|
774 | $count++;
|
---|
775 | }
|
---|
776 |
|
---|
777 | # build the table of second chars and offsets
|
---|
778 |
|
---|
779 | my $pos = $count + 1;
|
---|
780 | for ($i = 0; $i < 65536; $i++)
|
---|
781 | {
|
---|
782 | next unless defined $filled[$i];
|
---|
783 | push @table, $i, $pos;
|
---|
784 | $pos += @{$filled[$i]};
|
---|
785 | }
|
---|
786 | # terminator with last position
|
---|
787 | push @table, 0, $pos;
|
---|
788 | printf OUTPUT "const WCHAR unicode_compose_table[0x%x] =\n{\n", 2*$pos;
|
---|
789 | printf OUTPUT " /* second chars + offsets */\n%s", DUMP_ARRAY( "0x%04x", 0, @table );
|
---|
790 |
|
---|
791 | # build the table of first chars and mappings
|
---|
792 |
|
---|
793 | for ($i = 0; $i < 65536; $i++)
|
---|
794 | {
|
---|
795 | next unless defined $filled[$i];
|
---|
796 | my @table = ();
|
---|
797 | my @list = sort { $a->[0] <=> $b->[0] } @{$filled[$i]};
|
---|
798 | for ($j = 0; $j <= $#list; $j++)
|
---|
799 | {
|
---|
800 | push @table, $list[$j][0], $list[$j][1];
|
---|
801 | }
|
---|
802 | printf OUTPUT ",\n /* 0x%04x */\n%s", $i, DUMP_ARRAY( "0x%04x", 0, @table );
|
---|
803 | }
|
---|
804 | printf OUTPUT "\n};\n\nconst unsigned int unicode_compose_table_size = %d;\n\n", $count;
|
---|
805 |
|
---|
806 | ######### decomposition table
|
---|
807 |
|
---|
808 | # first determine all the 16-char subsets that contain something
|
---|
809 |
|
---|
810 | my @filled = (0) x 4096;
|
---|
811 | my $pos = 16*2; # for the null subset
|
---|
812 | for ($i = 0; $i < 65536; $i++)
|
---|
813 | {
|
---|
814 | next unless defined $decomp_table[$i];
|
---|
815 | $filled[$i >> 4] = $pos;
|
---|
816 | $pos += 16*2;
|
---|
817 | $i |= 15;
|
---|
818 | }
|
---|
819 | my $total = $pos;
|
---|
820 |
|
---|
821 | # now count the 256-char subsets that contain something
|
---|
822 |
|
---|
823 | my @filled_idx = (256) x 256;
|
---|
824 | $pos = 256 + 16;
|
---|
825 | for ($i = 0; $i < 4096; $i++)
|
---|
826 | {
|
---|
827 | next unless $filled[$i];
|
---|
828 | $filled_idx[$i >> 4] = $pos;
|
---|
829 | $pos += 16;
|
---|
830 | $i |= 15;
|
---|
831 | }
|
---|
832 | my $null_offset = $pos; # null mapping
|
---|
833 | $total += $pos;
|
---|
834 |
|
---|
835 | # add the index offsets to the subsets positions
|
---|
836 |
|
---|
837 | for ($i = 0; $i < 4096; $i++)
|
---|
838 | {
|
---|
839 | next unless $filled[$i];
|
---|
840 | $filled[$i] += $null_offset;
|
---|
841 | }
|
---|
842 |
|
---|
843 | # dump the main index
|
---|
844 |
|
---|
845 | printf OUTPUT "const WCHAR unicode_decompose_table[%d] =\n", $total;
|
---|
846 | printf OUTPUT "{\n /* index */\n";
|
---|
847 | printf OUTPUT "%s", DUMP_ARRAY( "0x%04x", 0, @filled_idx );
|
---|
848 | printf OUTPUT ",\n /* null sub-index */\n%s", DUMP_ARRAY( "0x%04x", 0, ($null_offset) x 16 );
|
---|
849 |
|
---|
850 | # dump the second-level indexes
|
---|
851 |
|
---|
852 | for ($i = 0; $i < 256; $i++)
|
---|
853 | {
|
---|
854 | next unless ($filled_idx[$i] > 256);
|
---|
855 | my @table = @filled[($i<<4)..($i<<4)+15];
|
---|
856 | for ($j = 0; $j < 16; $j++) { $table[$j] ||= $null_offset; }
|
---|
857 | printf OUTPUT ",\n /* sub-index %02x */\n", $i;
|
---|
858 | printf OUTPUT "%s", DUMP_ARRAY( "0x%04x", 0, @table );
|
---|
859 | }
|
---|
860 |
|
---|
861 | # dump the 16-char subsets
|
---|
862 |
|
---|
863 | printf OUTPUT ",\n /* null mapping */\n";
|
---|
864 | printf OUTPUT "%s", DUMP_ARRAY( "0x%04x", 0, (0) x 32 );
|
---|
865 |
|
---|
866 | for ($i = 0; $i < 4096; $i++)
|
---|
867 | {
|
---|
868 | next unless $filled[$i];
|
---|
869 | my @table = (0) x 32;
|
---|
870 | for ($j = 0; $j < 16; $j++)
|
---|
871 | {
|
---|
872 | if (defined $decomp_table[($i<<4) + $j])
|
---|
873 | {
|
---|
874 | $table[2 * $j] = ${$decomp_table[($i << 4) + $j]}[0];
|
---|
875 | $table[2 * $j + 1] = ${$decomp_table[($i << 4) + $j]}[1];
|
---|
876 | }
|
---|
877 | }
|
---|
878 | printf OUTPUT ",\n /* 0x%03x0 .. 0x%03xf */\n", $i, $i;
|
---|
879 | printf OUTPUT "%s", DUMP_ARRAY( "0x%04x", 0, @table );
|
---|
880 | }
|
---|
881 |
|
---|
882 | printf OUTPUT "\n};\n";
|
---|
883 | close OUTPUT;
|
---|
884 | }
|
---|
885 |
|
---|
886 |
|
---|
887 | ################################################################
|
---|
888 | # read an input file and generate the corresponding .c file
|
---|
889 | sub HANDLE_FILE
|
---|
890 | {
|
---|
891 | my ($codepage,$filename,$comment) = @_;
|
---|
892 |
|
---|
893 | # symbol codepage file is special
|
---|
894 | if ($codepage == 42) { READ_SYMBOL_FILE($MAPPREFIX . $filename); }
|
---|
895 | else { READ_FILE($MAPPREFIX . $filename); }
|
---|
896 |
|
---|
897 | ADD_DEFAULT_MAPPINGS();
|
---|
898 |
|
---|
899 | my $output = sprintf "c_%03d.c", $codepage;
|
---|
900 | open OUTPUT,">$output" or die "Cannot create $output";
|
---|
901 |
|
---|
902 | printf "Building %s from %s (%s)\n", $output, $filename, $comment;
|
---|
903 |
|
---|
904 | # dump all tables
|
---|
905 |
|
---|
906 | printf OUTPUT "/* code page %03d (%s) */\n", $codepage, $comment;
|
---|
907 | printf OUTPUT "/* generated from %s */\n", $MAPPREFIX . $filename;
|
---|
908 | printf OUTPUT "/* DO NOT EDIT!! */\n\n";
|
---|
909 | printf OUTPUT "#include \"wine/unicode.h\"\n\n";
|
---|
910 |
|
---|
911 | if ($#lead_bytes == -1) { DUMP_SBCS_TABLE( $codepage, $comment ); }
|
---|
912 | else { DUMP_DBCS_TABLE( $codepage, $comment ); }
|
---|
913 | close OUTPUT;
|
---|
914 | }
|
---|
915 |
|
---|
916 |
|
---|
917 | ################################################################
|
---|
918 | # output the list of codepage tables into the cptable.c file
|
---|
919 | sub OUTPUT_CPTABLE
|
---|
920 | {
|
---|
921 | @tables_decl = ();
|
---|
922 |
|
---|
923 | foreach $file (@allfiles)
|
---|
924 | {
|
---|
925 | my ($codepage,$filename,$comment) = @$file;
|
---|
926 | push @tables_decl, sprintf("extern union cptable cptable_%03d;\n",$codepage);
|
---|
927 | }
|
---|
928 |
|
---|
929 | push @tables_decl, sprintf("\nstatic const union cptable * const cptables[%d] =\n{\n",$#allfiles+1);
|
---|
930 | foreach $file (@allfiles)
|
---|
931 | {
|
---|
932 | my ($codepage,$filename,$comment) = @$file;
|
---|
933 | push @tables_decl, sprintf(" &cptable_%03d,\n", $codepage);
|
---|
934 | }
|
---|
935 | push @tables_decl, "};";
|
---|
936 | REPLACE_IN_FILE( "cptable.c", @tables_decl );
|
---|
937 | }
|
---|
938 |
|
---|
939 | ################################################################
|
---|
940 | # replace the contents of a file between ### cpmap ### marks
|
---|
941 |
|
---|
942 | sub REPLACE_IN_FILE
|
---|
943 | {
|
---|
944 | my $name = shift;
|
---|
945 | my @data = @_;
|
---|
946 | my @lines = ();
|
---|
947 | open(FILE,$name) or die "Can't open $name";
|
---|
948 | while (<FILE>)
|
---|
949 | {
|
---|
950 | push @lines, $_;
|
---|
951 | last if /\#\#\# cpmap begin \#\#\#/;
|
---|
952 | }
|
---|
953 | push @lines, @data;
|
---|
954 | while (<FILE>)
|
---|
955 | {
|
---|
956 | if (/\#\#\# cpmap end \#\#\#/) { push @lines, "\n", $_; last; }
|
---|
957 | }
|
---|
958 | push @lines, <FILE>;
|
---|
959 | open(FILE,">$name") or die "Can't modify $name";
|
---|
960 | print FILE @lines;
|
---|
961 | close(FILE);
|
---|
962 | }
|
---|