| 1 | /**********************************************************************
|
|---|
| 2 | ** Copyright (C) 2000-2002 Trolltech AS. All rights reserved.
|
|---|
| 3 | **
|
|---|
| 4 | ** This file is part of Qt Linguist.
|
|---|
| 5 | **
|
|---|
| 6 | ** This file may be distributed and/or modified under the terms of the
|
|---|
| 7 | ** GNU General Public License version 2 as published by the Free Software
|
|---|
| 8 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
|---|
| 9 | ** packaging of this file.
|
|---|
| 10 | **
|
|---|
| 11 | ** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition
|
|---|
| 12 | ** licenses may use this file in accordance with the Qt Commercial License
|
|---|
| 13 | ** Agreement provided with the Software.
|
|---|
| 14 | **
|
|---|
| 15 | ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
|
|---|
| 16 | ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|---|
| 17 | **
|
|---|
| 18 | ** See http://www.trolltech.com/gpl/ for GPL licensing information.
|
|---|
| 19 | ** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
|
|---|
| 20 | ** information about Qt Commercial License Agreements.
|
|---|
| 21 | **
|
|---|
| 22 | ** Contact info@trolltech.com if any conditions of this licensing are
|
|---|
| 23 | ** not clear to you.
|
|---|
| 24 | **
|
|---|
| 25 | **********************************************************************/
|
|---|
| 26 |
|
|---|
| 27 | #include <metatranslator.h>
|
|---|
| 28 |
|
|---|
| 29 | #include <qmemarray.h>
|
|---|
| 30 | #include <qcstring.h>
|
|---|
| 31 | #include <qmap.h>
|
|---|
| 32 | #include <qstringlist.h>
|
|---|
| 33 |
|
|---|
| 34 | #include <ctype.h>
|
|---|
| 35 |
|
|---|
| 36 | typedef QMap<QCString, MetaTranslatorMessage> TMM;
|
|---|
| 37 | typedef QValueList<MetaTranslatorMessage> TML;
|
|---|
| 38 |
|
|---|
| 39 | static bool isDigitFriendly( int c )
|
|---|
| 40 | {
|
|---|
| 41 | return ispunct((uchar)c) || isspace((uchar)c);
|
|---|
| 42 | }
|
|---|
| 43 |
|
|---|
| 44 | static int numberLength( const char *s )
|
|---|
| 45 | {
|
|---|
| 46 | int i = 0;
|
|---|
| 47 |
|
|---|
| 48 | if ( isdigit((uchar)s[0]) ) {
|
|---|
| 49 | do {
|
|---|
| 50 | i++;
|
|---|
| 51 | } while (isdigit((uchar)s[i]) ||
|
|---|
| 52 | (isDigitFriendly(s[i]) &&
|
|---|
| 53 | (isdigit((uchar)s[i + 1]) ||
|
|---|
| 54 | (isDigitFriendly(s[i + 1]) && isdigit((uchar)s[i + 2])))));
|
|---|
| 55 | }
|
|---|
| 56 | return i;
|
|---|
| 57 | }
|
|---|
| 58 |
|
|---|
| 59 | /*
|
|---|
| 60 | Returns a version of 'key' where all numbers have been replaced by zeroes. If
|
|---|
| 61 | there were none, returns "".
|
|---|
| 62 | */
|
|---|
| 63 | static QCString zeroKey( const char *key )
|
|---|
| 64 | {
|
|---|
| 65 | QCString zeroed( strlen(key) + 1 );
|
|---|
| 66 | char *z = zeroed.data();
|
|---|
| 67 | int i = 0, j = 0;
|
|---|
| 68 | int len;
|
|---|
| 69 | bool metSomething = FALSE;
|
|---|
| 70 |
|
|---|
| 71 | while ( key[i] != '\0' ) {
|
|---|
| 72 | len = numberLength( key + i );
|
|---|
| 73 | if ( len > 0 ) {
|
|---|
| 74 | i += len;
|
|---|
| 75 | z[j++] = '0';
|
|---|
| 76 | metSomething = TRUE;
|
|---|
| 77 | } else {
|
|---|
| 78 | z[j++] = key[i++];
|
|---|
| 79 | }
|
|---|
| 80 | }
|
|---|
| 81 | z[j] = '\0';
|
|---|
| 82 |
|
|---|
| 83 | if ( metSomething )
|
|---|
| 84 | return zeroed;
|
|---|
| 85 | else
|
|---|
| 86 | return "";
|
|---|
| 87 | }
|
|---|
| 88 |
|
|---|
| 89 | static QString translationAttempt( const QString& oldTranslation,
|
|---|
| 90 | const char *oldSource,
|
|---|
| 91 | const char *newSource )
|
|---|
| 92 | {
|
|---|
| 93 | int p = zeroKey( oldSource ).contains( '0' );
|
|---|
| 94 | int oldSourceLen = qstrlen( oldSource );
|
|---|
| 95 | QString attempt;
|
|---|
| 96 | QStringList oldNumbers;
|
|---|
| 97 | QStringList newNumbers;
|
|---|
| 98 | QMemArray<bool> met( p );
|
|---|
| 99 | QMemArray<int> matchedYet( p );
|
|---|
| 100 | int i, j;
|
|---|
| 101 | int k = 0, ell, best;
|
|---|
| 102 | int m, n;
|
|---|
| 103 | int pass;
|
|---|
| 104 |
|
|---|
| 105 | /*
|
|---|
| 106 | This algorithm is hard to follow, so we'll consider an example
|
|---|
| 107 | all along: oldTranslation is "XeT 3.0", oldSource is "TeX 3.0"
|
|---|
| 108 | and newSource is "XeT 3.1".
|
|---|
| 109 |
|
|---|
| 110 | First, we set up two tables: oldNumbers and newNumbers. In our
|
|---|
| 111 | example, oldNumber[0] is "3.0" and newNumber[0] is "3.1".
|
|---|
| 112 | */
|
|---|
| 113 | for ( i = 0, j = 0; i < oldSourceLen; i++, j++ ) {
|
|---|
| 114 | m = numberLength( oldSource + i );
|
|---|
| 115 | n = numberLength( newSource + j );
|
|---|
| 116 | if ( m > 0 ) {
|
|---|
| 117 | oldNumbers.append( QCString(oldSource + i, m + 1) );
|
|---|
| 118 | newNumbers.append( QCString(newSource + j, n + 1) );
|
|---|
| 119 | i += m;
|
|---|
| 120 | j += n;
|
|---|
| 121 | met[k] = FALSE;
|
|---|
| 122 | matchedYet[k] = 0;
|
|---|
| 123 | k++;
|
|---|
| 124 | }
|
|---|
| 125 | }
|
|---|
| 126 |
|
|---|
| 127 | /*
|
|---|
| 128 | We now go over the old translation, "XeT 3.0", one letter at a
|
|---|
| 129 | time, looking for numbers found in oldNumbers. Whenever such a
|
|---|
| 130 | number is met, it is replaced with its newNumber equivalent. In
|
|---|
| 131 | our example, the "3.0" of "XeT 3.0" becomes "3.1".
|
|---|
| 132 | */
|
|---|
| 133 | for ( i = 0; i < (int) oldTranslation.length(); i++ ) {
|
|---|
| 134 | attempt += oldTranslation[i];
|
|---|
| 135 | for ( k = 0; k < p; k++ ) {
|
|---|
| 136 | if ( oldTranslation[i] == oldNumbers[k][matchedYet[k]] )
|
|---|
| 137 | matchedYet[k]++;
|
|---|
| 138 | else
|
|---|
| 139 | matchedYet[k] = 0;
|
|---|
| 140 | }
|
|---|
| 141 |
|
|---|
| 142 | /*
|
|---|
| 143 | Let's find out if the last character ended a match. We make
|
|---|
| 144 | two passes over the data. In the first pass, we try to
|
|---|
| 145 | match only numbers that weren't matched yet; if that fails,
|
|---|
| 146 | the second pass does the trick. This is useful in some
|
|---|
| 147 | suspicious cases, flagged below.
|
|---|
| 148 | */
|
|---|
| 149 | for ( pass = 0; pass < 2; pass++ ) {
|
|---|
| 150 | best = p; // an impossible value
|
|---|
| 151 | for ( k = 0; k < p; k++ ) {
|
|---|
| 152 | if ( (!met[k] || pass > 0) &&
|
|---|
| 153 | matchedYet[k] == (int) oldNumbers[k].length() &&
|
|---|
| 154 | numberLength(oldTranslation.latin1() + (i + 1) -
|
|---|
| 155 | matchedYet[k]) == matchedYet[k] ) {
|
|---|
| 156 | // the longer the better
|
|---|
| 157 | if ( best == p || matchedYet[k] > matchedYet[best] )
|
|---|
| 158 | best = k;
|
|---|
| 159 | }
|
|---|
| 160 | }
|
|---|
| 161 | if ( best != p ) {
|
|---|
| 162 | attempt.truncate( attempt.length() - matchedYet[best] );
|
|---|
| 163 | attempt += newNumbers[best];
|
|---|
| 164 | met[best] = TRUE;
|
|---|
| 165 | for ( k = 0; k < p; k++ )
|
|---|
| 166 | matchedYet[k] = 0;
|
|---|
| 167 | break;
|
|---|
| 168 | }
|
|---|
| 169 | }
|
|---|
| 170 | }
|
|---|
| 171 |
|
|---|
| 172 | /*
|
|---|
| 173 | We flag two kinds of suspicious cases. They are identified as
|
|---|
| 174 | such with comments such as "{2000?}" at the end.
|
|---|
| 175 |
|
|---|
| 176 | Example of the first kind: old source text "TeX 3.0" translated
|
|---|
| 177 | as "XeT 2.0" is flagged "TeX 2.0 {3.0?}", no matter what the
|
|---|
| 178 | new text is.
|
|---|
| 179 | */
|
|---|
| 180 | for ( k = 0; k < p; k++ ) {
|
|---|
| 181 | if ( !met[k] )
|
|---|
| 182 | attempt += QString( " {" ) + newNumbers[k] + QString( "?}" );
|
|---|
| 183 | }
|
|---|
| 184 |
|
|---|
| 185 | /*
|
|---|
| 186 | Example of the second kind: "1 of 1" translated as "1 af 1",
|
|---|
| 187 | with new source text "1 of 2", generates "1 af 2 {1 or 2?}"
|
|---|
| 188 | because it's not clear which of "1 af 2" and "2 af 1" is right.
|
|---|
| 189 | */
|
|---|
| 190 | for ( k = 0; k < p; k++ ) {
|
|---|
| 191 | for ( ell = 0; ell < p; ell++ ) {
|
|---|
| 192 | if ( k != ell && oldNumbers[k] == oldNumbers[ell] &&
|
|---|
| 193 | newNumbers[k] < newNumbers[ell] )
|
|---|
| 194 | attempt += QString( " {" ) + newNumbers[k] + QString( " or " ) +
|
|---|
| 195 | newNumbers[ell] + QString( "?}" );
|
|---|
| 196 | }
|
|---|
| 197 | }
|
|---|
| 198 | return attempt;
|
|---|
| 199 | }
|
|---|
| 200 |
|
|---|
| 201 | /*
|
|---|
| 202 | Augments a MetaTranslator with translations easily derived from
|
|---|
| 203 | similar existing (probably obsolete) translations.
|
|---|
| 204 |
|
|---|
| 205 | For example, if "TeX 3.0" is translated as "XeT 3.0" and "TeX 3.1"
|
|---|
| 206 | has no translation, "XeT 3.1" is added to the translator and is
|
|---|
| 207 | marked Unfinished.
|
|---|
| 208 | */
|
|---|
| 209 | void applyNumberHeuristic( MetaTranslator *tor, bool verbose )
|
|---|
| 210 | {
|
|---|
| 211 | TMM translated, untranslated;
|
|---|
| 212 | TMM::Iterator t, u;
|
|---|
| 213 | TML all = tor->messages();
|
|---|
| 214 | TML::Iterator it;
|
|---|
| 215 | int inserted = 0;
|
|---|
| 216 |
|
|---|
| 217 | for ( it = all.begin(); it != all.end(); ++it ) {
|
|---|
| 218 | if ( (*it).type() == MetaTranslatorMessage::Unfinished ) {
|
|---|
| 219 | if ( (*it).translation().isEmpty() )
|
|---|
| 220 | untranslated.insert(QCString((*it).context()) + "\n" + (*it).sourceText() + "\n"
|
|---|
| 221 | + (*it).comment(), *it);
|
|---|
| 222 | } else if ( !(*it).translation().isEmpty() ) {
|
|---|
| 223 | translated.insert( zeroKey((*it).sourceText()), *it );
|
|---|
| 224 | }
|
|---|
| 225 | }
|
|---|
| 226 |
|
|---|
| 227 | for ( u = untranslated.begin(); u != untranslated.end(); ++u ) {
|
|---|
| 228 | t = translated.find( zeroKey((*u).sourceText()) );
|
|---|
| 229 | if ( t != translated.end() && !t.key().isEmpty() &&
|
|---|
| 230 | qstrcmp((*t).sourceText(), (*u).sourceText()) != 0 ) {
|
|---|
| 231 | MetaTranslatorMessage m( *u );
|
|---|
| 232 | m.setTranslation(translationAttempt((*t).translation(), (*t).sourceText(),
|
|---|
| 233 | (*u).sourceText()));
|
|---|
| 234 | tor->insert( m );
|
|---|
| 235 | inserted++;
|
|---|
| 236 | }
|
|---|
| 237 | }
|
|---|
| 238 | if ( verbose && inserted != 0 )
|
|---|
| 239 | fprintf( stderr, " number heuristic provided %d translation%s\n",
|
|---|
| 240 | inserted, inserted == 1 ? "" : "s" );
|
|---|
| 241 | }
|
|---|