source: trunk/gcc/libjava/scripts/unicode-decomp.pl@ 3689

Last change on this file since 3689 was 2, checked in by bird, 23 years ago

Initial revision

  • Property cvs2svn:cvs-rev set to 1.1
  • Property svn:eol-style set to native
  • Property svn:executable set to *
File size: 3.9 KB
Line 
1#!/usr/bin/perl -w
2# unicode-decomp.pl - script to generate database for java.text.Collator
3# Copyright (C) 1998, 1999, 2002 Free Software Foundation, Inc.
4#
5# This file is part of libjava.
6#
7# This software is copyrighted work licensed under the terms of the
8# Libjava License. Please consult the file "LIBJAVA_LICENSE" for
9# details.
10
11# Code for reading UnicodeData.txt and generating the code for
12# gnu.java.lang.CharData. For now, the relevant Unicode definition files
13# are found in libjava/gnu/gcj/convert/.
14#
15# Usage: ./unicode-decomp.pl [-n] <UnicodeData.txt> <decomp.h>
16# where <UnicodeData.txt> is obtained from www.unicode.org (named
17# UnicodeData-3.0.0.txt for Unicode version 3.0.0), and <CharData.java>
18# is the final location of include/java-chardecomp.h.
19# As of JDK 1.4, use Unicode version 3.0.0 for best results.
20#
21# If this exits with nonzero status, then you must investigate the
22# cause of the problem.
23# Diagnostics and other information to stderr.
24# With -n, the files are not created, but all processing still occurs.
25
26# These maps characters to their decompositions.
27my %canonical_decomposition = ();
28my %full_decomposition = ();
29
30# Handle `-n' and open output files.
31if ($ARGV[0] && $ARGV[0] eq '-n')
32{
33 shift @ARGV;
34 $ARGV[1] = '/dev/null';
35}
36die "Usage: $0 <UnicodeData.txt> <java-chardecomp.h>" unless @ARGV == 2;
37open (UNICODE, "< $ARGV[0]") || die "Can't open Unicode attribute file: $!\n";
38
39# Process the Unicode file.
40$| = 1;
41my $count = 0;
42print STDERR "Parsing attributes file";
43while (<UNICODE>)
44{
45 print STDERR "." unless $count++ % 1000;
46 chomp;
47 s/\r//g;
48 my ($ch, undef, undef, undef, undef, $decomp) = split ';';
49 $ch = hex($ch);
50
51 if ($decomp ne '')
52 {
53 my $is_full = 0;
54 my @decomp = ();
55 foreach (split (' ', $decomp))
56 {
57 if (/^\<.*\>$/)
58 {
59 $is_full = 1;
60 next;
61 }
62 push (@decomp, hex ($_));
63 }
64 my $s = pack "n*", @decomp;
65 if ($is_full)
66 {
67 $full_decomposition{$ch} = $s;
68 }
69 else
70 {
71 $canonical_decomposition{$ch} = $s;
72 }
73 }
74}
75
76# Now generate decomposition tables.
77open DECOMP, "> $ARGV[1]" or die "Can't open output file: $!\n";
78print STDERR "\nGenerating tables\n";
79print DECOMP <<EOF;
80// java-chardecomp.h - Decomposition character tables -*- c++ -*-
81
82#ifndef __JAVA_CHARDECOMP_H__
83#define __JAVA_CHARDECOMP_H__
84
85
86// These tables are automatically generated by the $0
87// script. DO NOT EDIT the tables. Instead, fix the script
88// and run it again.
89
90// This file should only be included by natCollator.cc
91
92struct decomp_entry
93{
94 jchar key;
95 const char *value;
96};
97
98EOF
99
100&write_decompositions;
101
102print DECOMP "#endif /* __JAVA_CHARDECOMP_H__ */\n";
103
104close(DECOMP);
105print STDERR "Done\n";
106exit;
107
108
109# Write a single decomposition table.
110sub write_single_decomposition($$%)
111{
112 my ($name, $is_canon, %table) = @_;
113 my $first_line = 1;
114 print DECOMP "static const decomp_entry ${name}_decomposition[] =\n{\n";
115
116 for my $key (0 .. 0xffff)
117 {
118 next if ! defined $table{$key};
119 print DECOMP ",\n" unless $first_line;
120 $first_line = 0;
121
122 printf DECOMP " { 0x%04x, \"", $key;
123
124 # We represent the expansion as a series of bytes, terminated
125 # with a double nul. This is ugly, but relatively
126 # space-efficient. Most expansions are short, but there are a
127 # few that are very long (e.g. \uFDFA). This means that if we
128 # chose a fixed-space representation we would waste a lot of
129 # space.
130 my @expansion = unpack "n*", $table{$key};
131 foreach my $char (@expansion)
132 {
133 printf DECOMP "\\x%02x\\x%02x", ($char / 256), ($char % 256);
134 }
135
136 print DECOMP "\" }";
137 }
138
139 print DECOMP "\n};\n\n";
140}
141
142sub write_decompositions()
143{
144 &write_single_decomposition ('canonical', 1, %canonical_decomposition);
145 &write_single_decomposition ('full', 0, %full_decomposition);
146}
Note: See TracBrowser for help on using the repository browser.