Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

unicode-decomp.pl@ 2960

Visit:

Last change on this file since 2960 was 2, checked in by bird, 23 years ago
Initial revision
Property cvs2svn:cvs-rev set to `1.1` Property svn:eol-style set to `native` Property svn:executable set to ``*
File size: 3.9 KB

Line
1	#!/usr/bin/perl -w
2	# unicode-decomp.pl - script to generate database for java.text.Collator
3	# Copyright (C) 1998, 1999, 2002 Free Software Foundation, Inc.
4	#
5	# This file is part of libjava.
6	#
7	# This software is copyrighted work licensed under the terms of the
8	# Libjava License. Please consult the file "LIBJAVA_LICENSE" for
9	# details.
10
11	# Code for reading UnicodeData.txt and generating the code for
12	# gnu.java.lang.CharData. For now, the relevant Unicode definition files
13	# are found in libjava/gnu/gcj/convert/.
14	#
15	# Usage: ./unicode-decomp.pl [-n] <UnicodeData.txt> <decomp.h>
16	# where <UnicodeData.txt> is obtained from www.unicode.org (named
17	# UnicodeData-3.0.0.txt for Unicode version 3.0.0), and <CharData.java>
18	# is the final location of include/java-chardecomp.h.
19	# As of JDK 1.4, use Unicode version 3.0.0 for best results.
20	#
21	# If this exits with nonzero status, then you must investigate the
22	# cause of the problem.
23	# Diagnostics and other information to stderr.
24	# With -n, the files are not created, but all processing still occurs.
25
26	# These maps characters to their decompositions.
27	my %canonical_decomposition = ();
28	my %full_decomposition = ();
29
30	# Handle `-n' and open output files.
31	if ($ARGV[0] && $ARGV[0] eq '-n')
32	{
33	shift @ARGV;
34	$ARGV[1] = '/dev/null';
35	}
36	die "Usage: $0 <UnicodeData.txt> <java-chardecomp.h>" unless @ARGV == 2;
37	open (UNICODE, "< $ARGV[0]") \|\| die "Can't open Unicode attribute file: $!\n";
38
39	# Process the Unicode file.
40	$\| = 1;
41	my $count = 0;
42	print STDERR "Parsing attributes file";
43	while (<UNICODE>)
44	{
45	print STDERR "." unless $count++ % 1000;
46	chomp;
47	s/\r//g;
48	my ($ch, undef, undef, undef, undef, $decomp) = split ';';
49	$ch = hex($ch);
50
51	if ($decomp ne '')
52	{
53	my $is_full = 0;
54	my @decomp = ();
55	foreach (split (' ', $decomp))
56	{
57	if (/^\<.*\>$/)
58	{
59	$is_full = 1;
60	next;
61	}
62	push (@decomp, hex ($_));
63	}
64	my $s = pack "n*", @decomp;
65	if ($is_full)
66	{
67	$full_decomposition{$ch} = $s;
68	}
69	else
70	{
71	$canonical_decomposition{$ch} = $s;
72	}
73	}
74	}
75
76	# Now generate decomposition tables.
77	open DECOMP, "> $ARGV[1]" or die "Can't open output file: $!\n";
78	print STDERR "\nGenerating tables\n";
79	print DECOMP <<EOF;
80	// java-chardecomp.h - Decomposition character tables -- c++ --
81
82	#ifndef __JAVA_CHARDECOMP_H__
83	#define __JAVA_CHARDECOMP_H__
84
85
86	// These tables are automatically generated by the $0
87	// script. DO NOT EDIT the tables. Instead, fix the script
88	// and run it again.
89
90	// This file should only be included by natCollator.cc
91
92	struct decomp_entry
93	{
94	jchar key;
95	const char *value;
96	};
97
98	EOF
99
100	&write_decompositions;
101
102	print DECOMP "#endif /* __JAVA_CHARDECOMP_H__ */\n";
103
104	close(DECOMP);
105	print STDERR "Done\n";
106	exit;
107
108
109	# Write a single decomposition table.
110	sub write_single_decomposition($$%)
111	{
112	my ($name, $is_canon, %table) = @_;
113	my $first_line = 1;
114	print DECOMP "static const decomp_entry ${name}_decomposition[] =\n{\n";
115
116	for my $key (0 .. 0xffff)
117	{
118	next if ! defined $table{$key};
119	print DECOMP ",\n" unless $first_line;
120	$first_line = 0;
121
122	printf DECOMP " { 0x%04x, \"", $key;
123
124	# We represent the expansion as a series of bytes, terminated
125	# with a double nul. This is ugly, but relatively
126	# space-efficient. Most expansions are short, but there are a
127	# few that are very long (e.g. \uFDFA). This means that if we
128	# chose a fixed-space representation we would waste a lot of
129	# space.
130	my @expansion = unpack "n*", $table{$key};
131	foreach my $char (@expansion)
132	{
133	printf DECOMP "\\x%02x\\x%02x", ($char / 256), ($char % 256);
134	}
135
136	print DECOMP "\" }";
137	}
138
139	print DECOMP "\n};\n\n";
140	}
141
142	sub write_decompositions()
143	{
144	&write_single_decomposition ('canonical', 1, %canonical_decomposition);
145	&write_single_decomposition ('full', 0, %full_decomposition);
146	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/gcc/libjava/scripts/unicode-decomp.pl@ 2960

Download in other formats: