1 | /**********************************************************************
|
---|
2 | ** Copyright (C) 2000-2002 Trolltech AS. All rights reserved.
|
---|
3 | **
|
---|
4 | ** This file is part of Qt Linguist.
|
---|
5 | **
|
---|
6 | ** This file may be distributed and/or modified under the terms of the
|
---|
7 | ** GNU General Public License version 2 as published by the Free Software
|
---|
8 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
---|
9 | ** packaging of this file.
|
---|
10 | **
|
---|
11 | ** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition
|
---|
12 | ** licenses may use this file in accordance with the Qt Commercial License
|
---|
13 | ** Agreement provided with the Software.
|
---|
14 | **
|
---|
15 | ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
|
---|
16 | ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
---|
17 | **
|
---|
18 | ** See http://www.trolltech.com/gpl/ for GPL licensing information.
|
---|
19 | ** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
|
---|
20 | ** information about Qt Commercial License Agreements.
|
---|
21 | **
|
---|
22 | ** Contact info@trolltech.com if any conditions of this licensing are
|
---|
23 | ** not clear to you.
|
---|
24 | **
|
---|
25 | **********************************************************************/
|
---|
26 |
|
---|
27 | #include <metatranslator.h>
|
---|
28 |
|
---|
29 | #include <qmemarray.h>
|
---|
30 | #include <qcstring.h>
|
---|
31 | #include <qmap.h>
|
---|
32 | #include <qstringlist.h>
|
---|
33 |
|
---|
34 | #include <ctype.h>
|
---|
35 |
|
---|
36 | typedef QMap<QCString, MetaTranslatorMessage> TMM;
|
---|
37 | typedef QValueList<MetaTranslatorMessage> TML;
|
---|
38 |
|
---|
39 | static bool isDigitFriendly( int c )
|
---|
40 | {
|
---|
41 | return ispunct((uchar)c) || isspace((uchar)c);
|
---|
42 | }
|
---|
43 |
|
---|
44 | static int numberLength( const char *s )
|
---|
45 | {
|
---|
46 | int i = 0;
|
---|
47 |
|
---|
48 | if ( isdigit((uchar)s[0]) ) {
|
---|
49 | do {
|
---|
50 | i++;
|
---|
51 | } while (isdigit((uchar)s[i]) ||
|
---|
52 | (isDigitFriendly(s[i]) &&
|
---|
53 | (isdigit((uchar)s[i + 1]) ||
|
---|
54 | (isDigitFriendly(s[i + 1]) && isdigit((uchar)s[i + 2])))));
|
---|
55 | }
|
---|
56 | return i;
|
---|
57 | }
|
---|
58 |
|
---|
59 | /*
|
---|
60 | Returns a version of 'key' where all numbers have been replaced by zeroes. If
|
---|
61 | there were none, returns "".
|
---|
62 | */
|
---|
63 | static QCString zeroKey( const char *key )
|
---|
64 | {
|
---|
65 | QCString zeroed( strlen(key) + 1 );
|
---|
66 | char *z = zeroed.data();
|
---|
67 | int i = 0, j = 0;
|
---|
68 | int len;
|
---|
69 | bool metSomething = FALSE;
|
---|
70 |
|
---|
71 | while ( key[i] != '\0' ) {
|
---|
72 | len = numberLength( key + i );
|
---|
73 | if ( len > 0 ) {
|
---|
74 | i += len;
|
---|
75 | z[j++] = '0';
|
---|
76 | metSomething = TRUE;
|
---|
77 | } else {
|
---|
78 | z[j++] = key[i++];
|
---|
79 | }
|
---|
80 | }
|
---|
81 | z[j] = '\0';
|
---|
82 |
|
---|
83 | if ( metSomething )
|
---|
84 | return zeroed;
|
---|
85 | else
|
---|
86 | return "";
|
---|
87 | }
|
---|
88 |
|
---|
89 | static QString translationAttempt( const QString& oldTranslation,
|
---|
90 | const char *oldSource,
|
---|
91 | const char *newSource )
|
---|
92 | {
|
---|
93 | int p = zeroKey( oldSource ).contains( '0' );
|
---|
94 | int oldSourceLen = qstrlen( oldSource );
|
---|
95 | QString attempt;
|
---|
96 | QStringList oldNumbers;
|
---|
97 | QStringList newNumbers;
|
---|
98 | QMemArray<bool> met( p );
|
---|
99 | QMemArray<int> matchedYet( p );
|
---|
100 | int i, j;
|
---|
101 | int k = 0, ell, best;
|
---|
102 | int m, n;
|
---|
103 | int pass;
|
---|
104 |
|
---|
105 | /*
|
---|
106 | This algorithm is hard to follow, so we'll consider an example
|
---|
107 | all along: oldTranslation is "XeT 3.0", oldSource is "TeX 3.0"
|
---|
108 | and newSource is "XeT 3.1".
|
---|
109 |
|
---|
110 | First, we set up two tables: oldNumbers and newNumbers. In our
|
---|
111 | example, oldNumber[0] is "3.0" and newNumber[0] is "3.1".
|
---|
112 | */
|
---|
113 | for ( i = 0, j = 0; i < oldSourceLen; i++, j++ ) {
|
---|
114 | m = numberLength( oldSource + i );
|
---|
115 | n = numberLength( newSource + j );
|
---|
116 | if ( m > 0 ) {
|
---|
117 | oldNumbers.append( QCString(oldSource + i, m + 1) );
|
---|
118 | newNumbers.append( QCString(newSource + j, n + 1) );
|
---|
119 | i += m;
|
---|
120 | j += n;
|
---|
121 | met[k] = FALSE;
|
---|
122 | matchedYet[k] = 0;
|
---|
123 | k++;
|
---|
124 | }
|
---|
125 | }
|
---|
126 |
|
---|
127 | /*
|
---|
128 | We now go over the old translation, "XeT 3.0", one letter at a
|
---|
129 | time, looking for numbers found in oldNumbers. Whenever such a
|
---|
130 | number is met, it is replaced with its newNumber equivalent. In
|
---|
131 | our example, the "3.0" of "XeT 3.0" becomes "3.1".
|
---|
132 | */
|
---|
133 | for ( i = 0; i < (int) oldTranslation.length(); i++ ) {
|
---|
134 | attempt += oldTranslation[i];
|
---|
135 | for ( k = 0; k < p; k++ ) {
|
---|
136 | if ( oldTranslation[i] == oldNumbers[k][matchedYet[k]] )
|
---|
137 | matchedYet[k]++;
|
---|
138 | else
|
---|
139 | matchedYet[k] = 0;
|
---|
140 | }
|
---|
141 |
|
---|
142 | /*
|
---|
143 | Let's find out if the last character ended a match. We make
|
---|
144 | two passes over the data. In the first pass, we try to
|
---|
145 | match only numbers that weren't matched yet; if that fails,
|
---|
146 | the second pass does the trick. This is useful in some
|
---|
147 | suspicious cases, flagged below.
|
---|
148 | */
|
---|
149 | for ( pass = 0; pass < 2; pass++ ) {
|
---|
150 | best = p; // an impossible value
|
---|
151 | for ( k = 0; k < p; k++ ) {
|
---|
152 | if ( (!met[k] || pass > 0) &&
|
---|
153 | matchedYet[k] == (int) oldNumbers[k].length() &&
|
---|
154 | numberLength(oldTranslation.latin1() + (i + 1) -
|
---|
155 | matchedYet[k]) == matchedYet[k] ) {
|
---|
156 | // the longer the better
|
---|
157 | if ( best == p || matchedYet[k] > matchedYet[best] )
|
---|
158 | best = k;
|
---|
159 | }
|
---|
160 | }
|
---|
161 | if ( best != p ) {
|
---|
162 | attempt.truncate( attempt.length() - matchedYet[best] );
|
---|
163 | attempt += newNumbers[best];
|
---|
164 | met[best] = TRUE;
|
---|
165 | for ( k = 0; k < p; k++ )
|
---|
166 | matchedYet[k] = 0;
|
---|
167 | break;
|
---|
168 | }
|
---|
169 | }
|
---|
170 | }
|
---|
171 |
|
---|
172 | /*
|
---|
173 | We flag two kinds of suspicious cases. They are identified as
|
---|
174 | such with comments such as "{2000?}" at the end.
|
---|
175 |
|
---|
176 | Example of the first kind: old source text "TeX 3.0" translated
|
---|
177 | as "XeT 2.0" is flagged "TeX 2.0 {3.0?}", no matter what the
|
---|
178 | new text is.
|
---|
179 | */
|
---|
180 | for ( k = 0; k < p; k++ ) {
|
---|
181 | if ( !met[k] )
|
---|
182 | attempt += QString( " {" ) + newNumbers[k] + QString( "?}" );
|
---|
183 | }
|
---|
184 |
|
---|
185 | /*
|
---|
186 | Example of the second kind: "1 of 1" translated as "1 af 1",
|
---|
187 | with new source text "1 of 2", generates "1 af 2 {1 or 2?}"
|
---|
188 | because it's not clear which of "1 af 2" and "2 af 1" is right.
|
---|
189 | */
|
---|
190 | for ( k = 0; k < p; k++ ) {
|
---|
191 | for ( ell = 0; ell < p; ell++ ) {
|
---|
192 | if ( k != ell && oldNumbers[k] == oldNumbers[ell] &&
|
---|
193 | newNumbers[k] < newNumbers[ell] )
|
---|
194 | attempt += QString( " {" ) + newNumbers[k] + QString( " or " ) +
|
---|
195 | newNumbers[ell] + QString( "?}" );
|
---|
196 | }
|
---|
197 | }
|
---|
198 | return attempt;
|
---|
199 | }
|
---|
200 |
|
---|
201 | /*
|
---|
202 | Augments a MetaTranslator with translations easily derived from
|
---|
203 | similar existing (probably obsolete) translations.
|
---|
204 |
|
---|
205 | For example, if "TeX 3.0" is translated as "XeT 3.0" and "TeX 3.1"
|
---|
206 | has no translation, "XeT 3.1" is added to the translator and is
|
---|
207 | marked Unfinished.
|
---|
208 | */
|
---|
209 | void applyNumberHeuristic( MetaTranslator *tor, bool verbose )
|
---|
210 | {
|
---|
211 | TMM translated, untranslated;
|
---|
212 | TMM::Iterator t, u;
|
---|
213 | TML all = tor->messages();
|
---|
214 | TML::Iterator it;
|
---|
215 | int inserted = 0;
|
---|
216 |
|
---|
217 | for ( it = all.begin(); it != all.end(); ++it ) {
|
---|
218 | if ( (*it).type() == MetaTranslatorMessage::Unfinished ) {
|
---|
219 | if ( (*it).translation().isEmpty() )
|
---|
220 | untranslated.insert(QCString((*it).context()) + "\n" + (*it).sourceText() + "\n"
|
---|
221 | + (*it).comment(), *it);
|
---|
222 | } else if ( !(*it).translation().isEmpty() ) {
|
---|
223 | translated.insert( zeroKey((*it).sourceText()), *it );
|
---|
224 | }
|
---|
225 | }
|
---|
226 |
|
---|
227 | for ( u = untranslated.begin(); u != untranslated.end(); ++u ) {
|
---|
228 | t = translated.find( zeroKey((*u).sourceText()) );
|
---|
229 | if ( t != translated.end() && !t.key().isEmpty() &&
|
---|
230 | qstrcmp((*t).sourceText(), (*u).sourceText()) != 0 ) {
|
---|
231 | MetaTranslatorMessage m( *u );
|
---|
232 | m.setTranslation(translationAttempt((*t).translation(), (*t).sourceText(),
|
---|
233 | (*u).sourceText()));
|
---|
234 | tor->insert( m );
|
---|
235 | inserted++;
|
---|
236 | }
|
---|
237 | }
|
---|
238 | if ( verbose && inserted != 0 )
|
---|
239 | fprintf( stderr, " number heuristic provided %d translation%s\n",
|
---|
240 | inserted, inserted == 1 ? "" : "s" );
|
---|
241 | }
|
---|