1 | /****************************************************************************
|
---|
2 | ** $Id: qtextcodec.cpp 2 2005-11-16 15:49:26Z dmik $
|
---|
3 | **
|
---|
4 | ** Implementation of QTextCodec class
|
---|
5 | **
|
---|
6 | ** Created : 981015
|
---|
7 | **
|
---|
8 | ** Copyright (C) 1998-2002 Trolltech AS. All rights reserved.
|
---|
9 | **
|
---|
10 | ** This file is part of the tools module of the Qt GUI Toolkit.
|
---|
11 | **
|
---|
12 | ** This file may be distributed under the terms of the Q Public License
|
---|
13 | ** as defined by Trolltech AS of Norway and appearing in the file
|
---|
14 | ** LICENSE.QPL included in the packaging of this file.
|
---|
15 | **
|
---|
16 | ** This file may be distributed and/or modified under the terms of the
|
---|
17 | ** GNU General Public License version 2 as published by the Free Software
|
---|
18 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
---|
19 | ** packaging of this file.
|
---|
20 | **
|
---|
21 | ** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition
|
---|
22 | ** licenses may use this file in accordance with the Qt Commercial License
|
---|
23 | ** Agreement provided with the Software.
|
---|
24 | **
|
---|
25 | ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
|
---|
26 | ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
---|
27 | **
|
---|
28 | ** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
|
---|
29 | ** information about Qt Commercial License Agreements.
|
---|
30 | ** See http://www.trolltech.com/qpl/ for QPL licensing information.
|
---|
31 | ** See http://www.trolltech.com/gpl/ for GPL licensing information.
|
---|
32 | **
|
---|
33 | ** Contact info@trolltech.com if any conditions of this licensing are
|
---|
34 | ** not clear to you.
|
---|
35 | **
|
---|
36 | **********************************************************************/
|
---|
37 |
|
---|
38 | #include "qplatformdefs.h"
|
---|
39 |
|
---|
40 | // UNIX Large File Support redefines open -> open64
|
---|
41 | #if defined(open)
|
---|
42 | # undef open
|
---|
43 | #endif
|
---|
44 |
|
---|
45 | #include "qtextcodec.h"
|
---|
46 | #ifndef QT_NO_TEXTCODEC
|
---|
47 |
|
---|
48 | #include "qvaluelist.h"
|
---|
49 | #include "qtextcodecfactory.h"
|
---|
50 | #include "qutfcodec.h"
|
---|
51 | #include "qnamespace.h"
|
---|
52 | #ifndef QT_NO_CODECS
|
---|
53 | #include "qrtlcodec.h"
|
---|
54 | #include "qtsciicodec.h"
|
---|
55 | #include "qisciicodec_p.h"
|
---|
56 | #endif // QT_NO_CODECS
|
---|
57 | #ifndef QT_NO_BIG_CODECS
|
---|
58 | #include "qbig5codec.h"
|
---|
59 | #include "qeucjpcodec.h"
|
---|
60 | #include "qeuckrcodec.h"
|
---|
61 | #include "qgb18030codec.h"
|
---|
62 | #include "qjiscodec.h"
|
---|
63 | #include "qjpunicode.h"
|
---|
64 | #include "qsjiscodec.h"
|
---|
65 | #endif // QT_NO_BIG_CODECS
|
---|
66 | #include "qfile.h"
|
---|
67 | #include "qstrlist.h"
|
---|
68 | #include "qstring.h"
|
---|
69 | #include "../tools/qlocale_p.h"
|
---|
70 |
|
---|
71 | #if !defined(QT_NO_CODECS) && !defined(QT_NO_BIG_CODECS) && defined(Q_WS_X11)
|
---|
72 | # include "qfontcodecs_p.h"
|
---|
73 | #endif
|
---|
74 |
|
---|
75 | #ifdef QT_THREAD_SUPPORT
|
---|
76 | # include <private/qmutexpool_p.h>
|
---|
77 | #endif // QT_THREAD_SUPPORT
|
---|
78 |
|
---|
79 | #include <stdlib.h>
|
---|
80 | #include <ctype.h>
|
---|
81 | #ifndef Q_OS_TEMP
|
---|
82 | #include <locale.h>
|
---|
83 | #endif
|
---|
84 | #if defined(_XOPEN_UNIX) && !defined(Q_OS_QNX6)
|
---|
85 | #include <langinfo.h>
|
---|
86 | #endif
|
---|
87 |
|
---|
88 | static QValueList<QTextCodec*> *all = 0;
|
---|
89 | static bool destroying_is_ok; // starts out as 0
|
---|
90 | static QTextCodec * localeMapper = 0;
|
---|
91 |
|
---|
92 | class QTextCodecCleanup {
|
---|
93 | public:
|
---|
94 | ~QTextCodecCleanup() {
|
---|
95 | QTextCodec::deleteAllCodecs();
|
---|
96 | }
|
---|
97 | };
|
---|
98 | static QTextCodecCleanup qtextcodec_cleanup;
|
---|
99 |
|
---|
100 | /*!
|
---|
101 | Deletes all the created codecs.
|
---|
102 |
|
---|
103 | \warning Do not call this function.
|
---|
104 |
|
---|
105 | QApplication calls this function just before exiting to delete
|
---|
106 | any QTextCodec objects that may be lying around. Since various
|
---|
107 | other classes hold pointers to QTextCodec objects, it is not safe
|
---|
108 | to call this function earlier.
|
---|
109 |
|
---|
110 | If you are using the utility classes (like QString) but not using
|
---|
111 | QApplication, calling this function at the very end of your
|
---|
112 | application may be helpful for chasing down memory leaks by
|
---|
113 | eliminating any QTextCodec objects.
|
---|
114 | */
|
---|
115 |
|
---|
116 | void QTextCodec::deleteAllCodecs()
|
---|
117 | {
|
---|
118 | if ( !all )
|
---|
119 | return;
|
---|
120 |
|
---|
121 | #ifdef QT_THREAD_SUPPORT
|
---|
122 | QMutexLocker locker( qt_global_mutexpool ?
|
---|
123 | qt_global_mutexpool->get( &all ) : 0 );
|
---|
124 | if ( !all )
|
---|
125 | return;
|
---|
126 | #endif // QT_THREAD_SUPPORT
|
---|
127 |
|
---|
128 | destroying_is_ok = TRUE;
|
---|
129 |
|
---|
130 | QValueList<QTextCodec*> *ball = all;
|
---|
131 | all = 0;
|
---|
132 | QValueList<QTextCodec*>::Iterator it;
|
---|
133 | for ( it = ball->begin(); it != ball->end(); ++it ) {
|
---|
134 | delete *it;
|
---|
135 | *it = 0;
|
---|
136 | }
|
---|
137 | ball->clear();
|
---|
138 | delete ball;
|
---|
139 |
|
---|
140 | destroying_is_ok = FALSE;
|
---|
141 | }
|
---|
142 |
|
---|
143 |
|
---|
144 | static void realSetup();
|
---|
145 |
|
---|
146 |
|
---|
147 | static inline void setup()
|
---|
148 | {
|
---|
149 | if ( all ) return;
|
---|
150 |
|
---|
151 | #ifdef QT_THREAD_SUPPORT
|
---|
152 | QMutexLocker locker( qt_global_mutexpool ?
|
---|
153 | qt_global_mutexpool->get( &all ) : 0 );
|
---|
154 | if ( all ) return;
|
---|
155 | #endif // QT_THREAD_SUPPORT
|
---|
156 |
|
---|
157 | realSetup();
|
---|
158 | }
|
---|
159 |
|
---|
160 |
|
---|
161 | class QTextStatelessEncoder: public QTextEncoder {
|
---|
162 | const QTextCodec* codec;
|
---|
163 | public:
|
---|
164 | QTextStatelessEncoder(const QTextCodec*);
|
---|
165 | QCString fromUnicode(const QString& uc, int& lenInOut);
|
---|
166 | };
|
---|
167 |
|
---|
168 |
|
---|
169 | class QTextStatelessDecoder : public QTextDecoder {
|
---|
170 | const QTextCodec* codec;
|
---|
171 | public:
|
---|
172 | QTextStatelessDecoder(const QTextCodec*);
|
---|
173 | QString toUnicode(const char* chars, int len);
|
---|
174 | };
|
---|
175 |
|
---|
176 | QTextStatelessEncoder::QTextStatelessEncoder(const QTextCodec* c) :
|
---|
177 | codec(c)
|
---|
178 | {
|
---|
179 | }
|
---|
180 |
|
---|
181 |
|
---|
182 | QCString QTextStatelessEncoder::fromUnicode(const QString& uc, int& lenInOut)
|
---|
183 | {
|
---|
184 | return codec->fromUnicode(uc,lenInOut);
|
---|
185 | }
|
---|
186 |
|
---|
187 |
|
---|
188 | QTextStatelessDecoder::QTextStatelessDecoder(const QTextCodec* c) :
|
---|
189 | codec(c)
|
---|
190 | {
|
---|
191 | }
|
---|
192 |
|
---|
193 |
|
---|
194 | QString QTextStatelessDecoder::toUnicode(const char* chars, int len)
|
---|
195 | {
|
---|
196 | return codec->toUnicode(chars,len);
|
---|
197 | }
|
---|
198 |
|
---|
199 |
|
---|
200 |
|
---|
201 | /*!
|
---|
202 | \class QTextCodec qtextcodec.h
|
---|
203 | \brief The QTextCodec class provides conversion between text encodings.
|
---|
204 | \reentrant
|
---|
205 | \ingroup i18n
|
---|
206 |
|
---|
207 | Qt uses Unicode to store, draw and manipulate strings. In many
|
---|
208 | situations you may wish to deal with data that uses a different
|
---|
209 | encoding. For example, most Japanese documents are still stored in
|
---|
210 | Shift-JIS or ISO2022, while Russian users often have their
|
---|
211 | documents in KOI8-R or CP1251.
|
---|
212 |
|
---|
213 | Qt provides a set of QTextCodec classes to help with converting
|
---|
214 | non-Unicode formats to and from Unicode. You can also create your
|
---|
215 | own codec classes (\link #subclassing see later\endlink).
|
---|
216 |
|
---|
217 | The supported encodings are:
|
---|
218 | \list
|
---|
219 | \i Latin1
|
---|
220 | \i Big5 -- Chinese
|
---|
221 | \i Big5-HKSCS -- Chinese
|
---|
222 | \i eucJP -- Japanese
|
---|
223 | \i eucKR -- Korean
|
---|
224 | \i GB2312 -- Chinese
|
---|
225 | \i GBK -- Chinese
|
---|
226 | \i GB18030 -- Chinese
|
---|
227 | \i JIS7 -- Japanese
|
---|
228 | \i Shift-JIS -- Japanese
|
---|
229 | \i TSCII -- Tamil
|
---|
230 | \i utf8 -- Unicode, 8-bit
|
---|
231 | \i utf16 -- Unicode
|
---|
232 | \i KOI8-R -- Russian
|
---|
233 | \i KOI8-U -- Ukrainian
|
---|
234 | \i ISO8859-1 -- Western
|
---|
235 | \i ISO8859-2 -- Central European
|
---|
236 | \i ISO8859-3 -- Central European
|
---|
237 | \i ISO8859-4 -- Baltic
|
---|
238 | \i ISO8859-5 -- Cyrillic
|
---|
239 | \i ISO8859-6 -- Arabic
|
---|
240 | \i ISO8859-7 -- Greek
|
---|
241 | \i ISO8859-8 -- Hebrew, visually ordered
|
---|
242 | \i ISO8859-8-i -- Hebrew, logically ordered
|
---|
243 | \i ISO8859-9 -- Turkish
|
---|
244 | \i ISO8859-10
|
---|
245 | \i ISO8859-13
|
---|
246 | \i ISO8859-14
|
---|
247 | \i ISO8859-15 -- Western
|
---|
248 | \i IBM 850
|
---|
249 | \i IBM 866
|
---|
250 | \i CP874
|
---|
251 | \i CP1250 -- Central European
|
---|
252 | \i CP1251 -- Cyrillic
|
---|
253 | \i CP1252 -- Western
|
---|
254 | \i CP1253 -- Greek
|
---|
255 | \i CP1254 -- Turkish
|
---|
256 | \i CP1255 -- Hebrew
|
---|
257 | \i CP1256 -- Arabic
|
---|
258 | \i CP1257 -- Baltic
|
---|
259 | \i CP1258
|
---|
260 | \i Apple Roman
|
---|
261 | \i TIS-620 -- Thai
|
---|
262 | \endlist
|
---|
263 |
|
---|
264 | QTextCodecs can be used as follows to convert some locally encoded
|
---|
265 | string to Unicode. Suppose you have some string encoded in Russian
|
---|
266 | KOI8-R encoding, and want to convert it to Unicode. The simple way
|
---|
267 | to do this is:
|
---|
268 |
|
---|
269 | \code
|
---|
270 | QCString locallyEncoded = "..."; // text to convert
|
---|
271 | QTextCodec *codec = QTextCodec::codecForName("KOI8-R"); // get the codec for KOI8-R
|
---|
272 | QString unicodeString = codec->toUnicode( locallyEncoded );
|
---|
273 | \endcode
|
---|
274 |
|
---|
275 | After this, \c{unicodeString} holds the text converted to Unicode.
|
---|
276 | Converting a string from Unicode to the local encoding is just as
|
---|
277 | easy:
|
---|
278 |
|
---|
279 | \code
|
---|
280 | QString unicodeString = "..."; // any Unicode text
|
---|
281 | QTextCodec *codec = QTextCodec::codecForName("KOI8-R"); // get the codec for KOI8-R
|
---|
282 | QCString locallyEncoded = codec->fromUnicode( unicodeString );
|
---|
283 | \endcode
|
---|
284 |
|
---|
285 | Some care must be taken when trying to convert the data in chunks,
|
---|
286 | for example, when receiving it over a network. In such cases it is
|
---|
287 | possible that a multi-byte character will be split over two
|
---|
288 | chunks. At best this might result in the loss of a character and
|
---|
289 | at worst cause the entire conversion to fail.
|
---|
290 |
|
---|
291 | The approach to use in these situations is to create a QTextDecoder
|
---|
292 | object for the codec and use this QTextDecoder for the whole
|
---|
293 | decoding process, as shown below:
|
---|
294 |
|
---|
295 | \code
|
---|
296 | QTextCodec *codec = QTextCodec::codecForName( "Shift-JIS" );
|
---|
297 | QTextDecoder *decoder = codec->makeDecoder();
|
---|
298 |
|
---|
299 | QString unicodeString;
|
---|
300 | while( receiving_data ) {
|
---|
301 | QByteArray chunk = new_data;
|
---|
302 | unicodeString += decoder->toUnicode( chunk.data(), chunk.length() );
|
---|
303 | }
|
---|
304 | \endcode
|
---|
305 |
|
---|
306 | The QTextDecoder object maintains state between chunks and therefore
|
---|
307 | works correctly even if a multi-byte character is split between
|
---|
308 | chunks.
|
---|
309 |
|
---|
310 | \target subclassing
|
---|
311 | \section1 Creating your own Codec class
|
---|
312 |
|
---|
313 | Support for new text encodings can be added to Qt by creating
|
---|
314 | QTextCodec subclasses.
|
---|
315 |
|
---|
316 | Built-in codecs can be overridden by custom codecs since more
|
---|
317 | recently created QTextCodec objects take precedence over earlier
|
---|
318 | ones.
|
---|
319 |
|
---|
320 | You may find it more convenient to make your codec class available
|
---|
321 | as a plugin; see the \link plugins-howto.html plugin
|
---|
322 | documentation\endlink for more details.
|
---|
323 |
|
---|
324 | The abstract virtual functions describe the encoder to the
|
---|
325 | system and the coder is used as required in the different
|
---|
326 | text file formats supported by QTextStream, and under X11, for the
|
---|
327 | locale-specific character input and output.
|
---|
328 |
|
---|
329 | To add support for another 8-bit encoding to Qt, make a subclass
|
---|
330 | of QTextCodec and implement at least the following methods:
|
---|
331 |
|
---|
332 | \code
|
---|
333 | const char* name() const
|
---|
334 | \endcode
|
---|
335 | Return the official name for the encoding.
|
---|
336 |
|
---|
337 | \code
|
---|
338 | int mibEnum() const
|
---|
339 | \endcode
|
---|
340 | Return the MIB enum for the encoding if it is listed in the
|
---|
341 | \link http://www.iana.org/assignments/character-sets
|
---|
342 | IANA character-sets encoding file\endlink.
|
---|
343 |
|
---|
344 | If the encoding is multi-byte then it will have "state"; that is,
|
---|
345 | the interpretation of some bytes will be dependent on some preceding
|
---|
346 | bytes. For such encodings, you must implement:
|
---|
347 |
|
---|
348 | \code
|
---|
349 | QTextDecoder* makeDecoder() const
|
---|
350 | \endcode
|
---|
351 | Return a QTextDecoder that remembers incomplete multi-byte sequence
|
---|
352 | prefixes or other required state.
|
---|
353 |
|
---|
354 | If the encoding does \e not require state, you should implement:
|
---|
355 |
|
---|
356 | \code
|
---|
357 | QString toUnicode(const char* chars, int len) const
|
---|
358 | \endcode
|
---|
359 | Converts \e len characters from \e chars to Unicode.
|
---|
360 |
|
---|
361 | The base QTextCodec class has default implementations of the above
|
---|
362 | two functions, \e{but they are mutually recursive}, so you must
|
---|
363 | re-implement at least one of them, or both for improved efficiency.
|
---|
364 |
|
---|
365 | For conversion from Unicode to 8-bit encodings, it is rarely necessary
|
---|
366 | to maintain state. However, two functions similar to the two above
|
---|
367 | are used for encoding:
|
---|
368 |
|
---|
369 | \code
|
---|
370 | QTextEncoder* makeEncoder() const
|
---|
371 | \endcode
|
---|
372 | Return a QTextEncoder.
|
---|
373 |
|
---|
374 | \code
|
---|
375 | QCString fromUnicode(const QString& uc, int& lenInOut ) const
|
---|
376 | \endcode
|
---|
377 | Converts \e lenInOut characters (of type QChar) from the start of
|
---|
378 | the string \e uc, returning a QCString result, and also returning
|
---|
379 | the \link QCString::length() length\endlink of the result in
|
---|
380 | \e lenInOut.
|
---|
381 |
|
---|
382 | Again, these are mutually recursive so only one needs to be implemented,
|
---|
383 | or both if greater efficiency is possible.
|
---|
384 |
|
---|
385 | Finally, you must implement:
|
---|
386 |
|
---|
387 | \code
|
---|
388 | int heuristicContentMatch(const char* chars, int len) const
|
---|
389 | \endcode
|
---|
390 | Gives a value indicating how likely it is that \e len characters
|
---|
391 | from \e chars are in the encoding.
|
---|
392 |
|
---|
393 | A good model for this function is the
|
---|
394 | QWindowsLocalCodec::heuristicContentMatch function found in the Qt
|
---|
395 | sources.
|
---|
396 |
|
---|
397 | A QTextCodec subclass might have improved performance if you also
|
---|
398 | re-implement:
|
---|
399 |
|
---|
400 | \code
|
---|
401 | bool canEncode( QChar ) const
|
---|
402 | \endcode
|
---|
403 | Test if a Unicode character can be encoded.
|
---|
404 |
|
---|
405 | \code
|
---|
406 | bool canEncode( const QString& ) const
|
---|
407 | \endcode
|
---|
408 | Test if a string of Unicode characters can be encoded.
|
---|
409 |
|
---|
410 | \code
|
---|
411 | int heuristicNameMatch(const char* hint) const
|
---|
412 | \endcode
|
---|
413 | Test if a possibly non-standard name is referring to the codec.
|
---|
414 |
|
---|
415 | Codecs can also be created as \link plugins-howto.html plugins\endlink.
|
---|
416 | */
|
---|
417 |
|
---|
418 |
|
---|
419 | /*!
|
---|
420 | \nonreentrant
|
---|
421 |
|
---|
422 | Constructs a QTextCodec, and gives it the highest precedence. The
|
---|
423 | QTextCodec should always be constructed on the heap (i.e. with \c
|
---|
424 | new). Qt takes ownership and will delete it when the application
|
---|
425 | terminates.
|
---|
426 | */
|
---|
427 | QTextCodec::QTextCodec()
|
---|
428 | {
|
---|
429 | setup();
|
---|
430 | all->insert( all->begin(), this );
|
---|
431 | }
|
---|
432 |
|
---|
433 |
|
---|
434 | /*!
|
---|
435 | \nonreentrant
|
---|
436 |
|
---|
437 | Destroys the QTextCodec. Note that you should not delete codecs
|
---|
438 | yourself: once created they become Qt's responsibility.
|
---|
439 | */
|
---|
440 | QTextCodec::~QTextCodec()
|
---|
441 | {
|
---|
442 | if ( !destroying_is_ok )
|
---|
443 | qWarning("QTextCodec::~QTextCodec() called by application");
|
---|
444 | if ( all )
|
---|
445 | all->remove( this );
|
---|
446 | }
|
---|
447 |
|
---|
448 |
|
---|
449 | /*!
|
---|
450 | Returns a value indicating how likely it is that this decoder is
|
---|
451 | appropriate for decoding some format that has the given name. The
|
---|
452 | name is compared with the \a hint.
|
---|
453 |
|
---|
454 | A good match returns a positive number around the length of the
|
---|
455 | string. A bad match is negative.
|
---|
456 |
|
---|
457 | The default implementation calls simpleHeuristicNameMatch() with
|
---|
458 | the name of the codec.
|
---|
459 | */
|
---|
460 | int QTextCodec::heuristicNameMatch(const char* hint) const
|
---|
461 | {
|
---|
462 | return simpleHeuristicNameMatch(name(),hint);
|
---|
463 | }
|
---|
464 |
|
---|
465 |
|
---|
466 | // returns a string containing the letters and numbers from input,
|
---|
467 | // with a space separating run of a character class. e.g. "iso8859-1"
|
---|
468 | // becomes "iso 8859 1"
|
---|
469 | static QString lettersAndNumbers( const char * input )
|
---|
470 | {
|
---|
471 | QString result;
|
---|
472 | QChar c;
|
---|
473 |
|
---|
474 | while( input && *input ) {
|
---|
475 | c = *input;
|
---|
476 | if ( c.isLetter() || c.isNumber() )
|
---|
477 | result += c.lower();
|
---|
478 | if ( input[1] ) {
|
---|
479 | // add space at character class transition, except
|
---|
480 | // transition from upper-case to lower-case letter
|
---|
481 | QChar n( input[1] );
|
---|
482 | if ( c.isLetter() && n.isLetter() ) {
|
---|
483 | if ( c == c.lower() && n == n.upper() )
|
---|
484 | result += ' ';
|
---|
485 | } else if ( c.category() != n.category() ) {
|
---|
486 | result += ' ';
|
---|
487 | }
|
---|
488 | }
|
---|
489 | input++;
|
---|
490 | }
|
---|
491 | return result.simplifyWhiteSpace();
|
---|
492 | }
|
---|
493 |
|
---|
494 | /*!
|
---|
495 | A simple utility function for heuristicNameMatch(): it does some
|
---|
496 | very minor character-skipping so that almost-exact matches score
|
---|
497 | high. \a name is the text we're matching and \a hint is used for
|
---|
498 | the comparison.
|
---|
499 | */
|
---|
500 | int QTextCodec::simpleHeuristicNameMatch(const char* name, const char* hint)
|
---|
501 | {
|
---|
502 | // if they're the same, return a perfect score.
|
---|
503 | if ( name && hint && *name && *hint && qstricmp( name, hint ) == 0 )
|
---|
504 | return qstrlen( hint );
|
---|
505 |
|
---|
506 | // if the letters and numbers are the same, we have an "almost"
|
---|
507 | // perfect match.
|
---|
508 | QString h( lettersAndNumbers( hint ) );
|
---|
509 | QString n( lettersAndNumbers( name ) );
|
---|
510 | if ( h == n )
|
---|
511 | return qstrlen( hint )-1;
|
---|
512 |
|
---|
513 | if ( h.stripWhiteSpace() == n.stripWhiteSpace() )
|
---|
514 | return qstrlen( hint )-2;
|
---|
515 |
|
---|
516 | // could do some more here, but I don't think it's worth it
|
---|
517 |
|
---|
518 | return 0;
|
---|
519 | }
|
---|
520 |
|
---|
521 |
|
---|
522 | /*!
|
---|
523 | Returns the QTextCodec \a i positions from the most recently
|
---|
524 | inserted codec, or 0 if there is no such QTextCodec. Thus,
|
---|
525 | codecForIndex(0) returns the most recently created QTextCodec.
|
---|
526 | */
|
---|
527 | QTextCodec* QTextCodec::codecForIndex(int i)
|
---|
528 | {
|
---|
529 | setup();
|
---|
530 | return (uint)i >= all->count() ? 0 : *all->at(i);
|
---|
531 | }
|
---|
532 |
|
---|
533 |
|
---|
534 | /*!
|
---|
535 | Returns the QTextCodec which matches the \link
|
---|
536 | QTextCodec::mibEnum() MIBenum\endlink \a mib.
|
---|
537 | */
|
---|
538 | QTextCodec* QTextCodec::codecForMib(int mib)
|
---|
539 | {
|
---|
540 | setup();
|
---|
541 | QValueList<QTextCodec*>::ConstIterator i;
|
---|
542 | QTextCodec* result=0;
|
---|
543 | for ( i = all->begin(); i != all->end(); ++i ) {
|
---|
544 | result = *i;
|
---|
545 | if ( result->mibEnum()==mib )
|
---|
546 | return result;
|
---|
547 | }
|
---|
548 |
|
---|
549 | #if !defined(QT_NO_COMPONENT) && !defined(QT_LITE_COMPONENT)
|
---|
550 | if ( !result || (result && result->mibEnum() != mib) ) {
|
---|
551 | QTextCodec *codec = QTextCodecFactory::createForMib(mib);
|
---|
552 | if (codec)
|
---|
553 | result = codec;
|
---|
554 | }
|
---|
555 | #endif // !QT_NO_COMPONENT !QT_LITE_COMPONENT
|
---|
556 |
|
---|
557 | return result;
|
---|
558 | }
|
---|
559 |
|
---|
560 |
|
---|
561 |
|
---|
562 |
|
---|
563 |
|
---|
564 | #ifdef Q_OS_WIN32
|
---|
565 | class QWindowsLocalCodec: public QTextCodec
|
---|
566 | {
|
---|
567 | public:
|
---|
568 | QWindowsLocalCodec();
|
---|
569 | ~QWindowsLocalCodec();
|
---|
570 |
|
---|
571 | QString toUnicode(const char* chars, int len) const;
|
---|
572 | QCString fromUnicode(const QString& uc, int& lenInOut ) const;
|
---|
573 |
|
---|
574 | const char* name() const;
|
---|
575 | int mibEnum() const;
|
---|
576 |
|
---|
577 | int heuristicContentMatch(const char* chars, int len) const;
|
---|
578 | };
|
---|
579 |
|
---|
580 | QWindowsLocalCodec::QWindowsLocalCodec()
|
---|
581 | {
|
---|
582 | }
|
---|
583 |
|
---|
584 | QWindowsLocalCodec::~QWindowsLocalCodec()
|
---|
585 | {
|
---|
586 | }
|
---|
587 |
|
---|
588 |
|
---|
589 | QString QWindowsLocalCodec::toUnicode(const char* chars, int len) const
|
---|
590 | {
|
---|
591 | if ( len == 1 && chars ) { // Optimization; avoids allocation
|
---|
592 | char c[2];
|
---|
593 | c[0] = *chars;
|
---|
594 | c[1] = 0;
|
---|
595 | return qt_winMB2QString( c, 2 );
|
---|
596 | }
|
---|
597 | if ( len < 0 )
|
---|
598 | return qt_winMB2QString( chars );
|
---|
599 | QCString s(chars,len+1);
|
---|
600 | return qt_winMB2QString(s);
|
---|
601 | }
|
---|
602 |
|
---|
603 | QCString QWindowsLocalCodec::fromUnicode(const QString& uc, int& lenInOut ) const
|
---|
604 | {
|
---|
605 | QCString r = qt_winQString2MB( uc, lenInOut );
|
---|
606 | lenInOut = r.length();
|
---|
607 | return r;
|
---|
608 | }
|
---|
609 |
|
---|
610 |
|
---|
611 | const char* QWindowsLocalCodec::name() const
|
---|
612 | {
|
---|
613 | return "System";
|
---|
614 | }
|
---|
615 |
|
---|
616 | int QWindowsLocalCodec::mibEnum() const
|
---|
617 | {
|
---|
618 | return 0;
|
---|
619 | }
|
---|
620 |
|
---|
621 |
|
---|
622 | int QWindowsLocalCodec::heuristicContentMatch(const char* chars, int len) const
|
---|
623 | {
|
---|
624 | // ### Not a bad default implementation?
|
---|
625 | QString t = toUnicode(chars,len);
|
---|
626 | int l = t.length();
|
---|
627 | QCString mb = fromUnicode(t,l);
|
---|
628 | int i=0;
|
---|
629 | while ( i < len ) {
|
---|
630 | if ( chars[i] == mb[i] )
|
---|
631 | i++;
|
---|
632 | else
|
---|
633 | break;
|
---|
634 | }
|
---|
635 | return i;
|
---|
636 | }
|
---|
637 |
|
---|
638 | #else
|
---|
639 |
|
---|
640 | /* locale names mostly copied from XFree86 */
|
---|
641 | static const char * const iso8859_2locales[] = {
|
---|
642 | "croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr",
|
---|
643 | "hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro",
|
---|
644 | "ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk",
|
---|
645 | "sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 };
|
---|
646 |
|
---|
647 | static const char * const iso8859_3locales[] = {
|
---|
648 | "eo", 0 };
|
---|
649 |
|
---|
650 | static const char * const iso8859_4locales[] = {
|
---|
651 | "ee", "ee_EE", 0 };
|
---|
652 |
|
---|
653 | static const char * const iso8859_5locales[] = {
|
---|
654 | "mk", "mk_MK", "sp", "sp_YU", 0 };
|
---|
655 |
|
---|
656 | static const char * const cp_1251locales[] = {
|
---|
657 | "be", "be_BY", "bg", "bg_BG", "bulgarian", 0 };
|
---|
658 |
|
---|
659 | static const char * const pt_154locales[] = {
|
---|
660 | "ba_RU", "ky", "ky_KG", "kk", "kk_KZ", 0 };
|
---|
661 |
|
---|
662 | static const char * const iso8859_6locales[] = {
|
---|
663 | "ar_AA", "ar_SA", "arabic", 0 };
|
---|
664 |
|
---|
665 | static const char * const iso8859_7locales[] = {
|
---|
666 | "el", "el_GR", "greek", 0 };
|
---|
667 |
|
---|
668 | static const char * const iso8859_8locales[] = {
|
---|
669 | "hebrew", "he", "he_IL", "iw", "iw_IL", 0 };
|
---|
670 |
|
---|
671 | static const char * const iso8859_9locales[] = {
|
---|
672 | "tr", "tr_TR", "turkish", 0 };
|
---|
673 |
|
---|
674 | static const char * const iso8859_13locales[] = {
|
---|
675 | "lt", "lt_LT", "lv", "lv_LV", 0 };
|
---|
676 |
|
---|
677 | static const char * const iso8859_15locales[] = {
|
---|
678 | "et", "et_EE",
|
---|
679 | // Euro countries
|
---|
680 | "br_FR", "ca_ES", "de", "de_AT", "de_BE", "de_DE", "de_LU", "en_IE",
|
---|
681 | "es", "es_ES", "eu_ES", "fi", "fi_FI", "finnish", "fr", "fr_FR",
|
---|
682 | "fr_BE", "fr_LU", "french", "ga_IE", "gl_ES", "it", "it_IT", "oc_FR",
|
---|
683 | "nl", "nl_BE", "nl_NL", "pt", "pt_PT", "sv_FI", "wa_BE",
|
---|
684 | 0 };
|
---|
685 |
|
---|
686 | static const char * const koi8_ulocales[] = {
|
---|
687 | "uk", "uk_UA", "ru_UA", "ukrainian", 0 };
|
---|
688 |
|
---|
689 | static const char * const tis_620locales[] = {
|
---|
690 | "th", "th_TH", "thai", 0 };
|
---|
691 |
|
---|
692 | static const char * const tcvnlocales[] = {
|
---|
693 | "vi", "vi_VN", 0 };
|
---|
694 |
|
---|
695 | static bool try_locale_list( const char * const locale[], const char * lang )
|
---|
696 | {
|
---|
697 | int i;
|
---|
698 | for( i=0; locale[i] && *locale[i] && strcmp(locale[i], lang); i++ )
|
---|
699 | ;
|
---|
700 | return locale[i] != 0;
|
---|
701 | }
|
---|
702 |
|
---|
703 | // For the probably_koi8_locales we have to look. the standard says
|
---|
704 | // these are 8859-5, but almost all Russian users use KOI8-R and
|
---|
705 | // incorrectly set $LANG to ru_RU. We'll check tolower() to see what
|
---|
706 | // tolower() thinks ru_RU means.
|
---|
707 |
|
---|
708 | // If you read the history, it seems that many Russians blame ISO and
|
---|
709 | // Perestroika for the confusion.
|
---|
710 | //
|
---|
711 | // The real bug is that some programs break if the user specifies
|
---|
712 | // ru_RU.KOI8-R.
|
---|
713 |
|
---|
714 | static const char * const probably_koi8_rlocales[] = {
|
---|
715 | "ru", "ru_SU", "ru_RU", "russian", 0 };
|
---|
716 |
|
---|
717 | static QTextCodec * ru_RU_hack( const char * i ) {
|
---|
718 | QTextCodec * ru_RU_codec = 0;
|
---|
719 |
|
---|
720 | QCString origlocale = setlocale( LC_CTYPE, i );
|
---|
721 | // unicode koi8r latin5 name
|
---|
722 | // 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU
|
---|
723 | // 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU
|
---|
724 | int latin5 = tolower( 0xCE );
|
---|
725 | int koi8r = tolower( 0xE0 );
|
---|
726 | if ( koi8r == 0xC0 && latin5 != 0xEE ) {
|
---|
727 | ru_RU_codec = QTextCodec::codecForName( "KOI8-R" );
|
---|
728 | } else if ( koi8r != 0xC0 && latin5 == 0xEE ) {
|
---|
729 | ru_RU_codec = QTextCodec::codecForName( "ISO 8859-5" );
|
---|
730 | } else {
|
---|
731 | // something else again... let's assume... *throws dice*
|
---|
732 | ru_RU_codec = QTextCodec::codecForName( "KOI8-R" );
|
---|
733 | qWarning( "QTextCodec: using KOI8-R, probe failed (%02x %02x %s)",
|
---|
734 | koi8r, latin5, i );
|
---|
735 | }
|
---|
736 | setlocale( LC_CTYPE, origlocale.data() );
|
---|
737 |
|
---|
738 | return ru_RU_codec;
|
---|
739 | }
|
---|
740 |
|
---|
741 | #endif
|
---|
742 |
|
---|
743 | /*!
|
---|
744 | Set the codec to \a c; this will be returned by codecForLocale().
|
---|
745 | This might be needed for some applications that want to use their
|
---|
746 | own mechanism for setting the locale.
|
---|
747 |
|
---|
748 | \sa codecForLocale()
|
---|
749 | */
|
---|
750 | void QTextCodec::setCodecForLocale(QTextCodec *c) {
|
---|
751 | localeMapper = c;
|
---|
752 | }
|
---|
753 |
|
---|
754 | /*! Returns a pointer to the codec most suitable for this locale. */
|
---|
755 |
|
---|
756 | QTextCodec* QTextCodec::codecForLocale()
|
---|
757 | {
|
---|
758 | if ( localeMapper )
|
---|
759 | return localeMapper;
|
---|
760 |
|
---|
761 | setup();
|
---|
762 |
|
---|
763 | return localeMapper;
|
---|
764 | }
|
---|
765 |
|
---|
766 |
|
---|
767 | /*!
|
---|
768 | Searches all installed QTextCodec objects and returns the one
|
---|
769 | which best matches \a name; the match is case-insensitive. Returns
|
---|
770 | 0 if no codec's heuristicNameMatch() reports a match better than
|
---|
771 | \a accuracy, or if \a name is a null string.
|
---|
772 |
|
---|
773 | \sa heuristicNameMatch()
|
---|
774 | */
|
---|
775 |
|
---|
776 | QTextCodec* QTextCodec::codecForName( const char* name, int accuracy )
|
---|
777 | {
|
---|
778 | if ( !name || !*name )
|
---|
779 | return 0;
|
---|
780 |
|
---|
781 | setup();
|
---|
782 | QValueList<QTextCodec*>::ConstIterator i;
|
---|
783 | QTextCodec* result = 0;
|
---|
784 | int best = accuracy;
|
---|
785 | QTextCodec* cursor;
|
---|
786 | for ( i = all->begin(); i != all->end(); ++i ) {
|
---|
787 | cursor = *i;
|
---|
788 | int s = cursor->heuristicNameMatch( name );
|
---|
789 | if ( s > best ) {
|
---|
790 | best = s;
|
---|
791 | result = cursor;
|
---|
792 | }
|
---|
793 | }
|
---|
794 |
|
---|
795 | #if !defined(QT_NO_COMPONENT) && !defined(QT_LITE_COMPONENT)
|
---|
796 | if ( !result )
|
---|
797 | result = QTextCodecFactory::createForName(name);
|
---|
798 | #endif // !QT_NO_COMPONENT !QT_LITE_COMPONENT
|
---|
799 |
|
---|
800 | return result;
|
---|
801 | }
|
---|
802 |
|
---|
803 |
|
---|
804 | /*!
|
---|
805 | Searches all installed QTextCodec objects, returning the one which
|
---|
806 | most recognizes the given content. May return 0.
|
---|
807 |
|
---|
808 | Note that this is often a poor choice, since character encodings
|
---|
809 | often use most of the available character sequences, and so only
|
---|
810 | by linguistic analysis could a true match be made.
|
---|
811 |
|
---|
812 | \a chars contains the string to check, and \a len contains the
|
---|
813 | number of characters in the string to use.
|
---|
814 |
|
---|
815 | \sa heuristicContentMatch()
|
---|
816 | */
|
---|
817 | QTextCodec* QTextCodec::codecForContent(const char* chars, int len)
|
---|
818 | {
|
---|
819 | setup();
|
---|
820 | QValueList<QTextCodec*>::ConstIterator i;
|
---|
821 | QTextCodec* result = 0;
|
---|
822 | int best=0;
|
---|
823 | QTextCodec* cursor;
|
---|
824 | for ( i = all->begin(); i != all->end(); ++i ) {
|
---|
825 | cursor = *i;
|
---|
826 | int s = cursor->heuristicContentMatch(chars,len);
|
---|
827 | if ( s > best ) {
|
---|
828 | best = s;
|
---|
829 | result = cursor;
|
---|
830 | }
|
---|
831 | }
|
---|
832 | return result;
|
---|
833 | }
|
---|
834 |
|
---|
835 |
|
---|
836 | /*!
|
---|
837 | \fn const char* QTextCodec::name() const
|
---|
838 |
|
---|
839 | QTextCodec subclasses must reimplement this function. It returns
|
---|
840 | the name of the encoding supported by the subclass. When choosing
|
---|
841 | a name for an encoding, consider these points:
|
---|
842 | \list
|
---|
843 | \i On X11, heuristicNameMatch( const char * hint )
|
---|
844 | is used to test if a the QTextCodec
|
---|
845 | can convert between Unicode and the encoding of a font
|
---|
846 | with encoding \e hint, such as "iso8859-1" for Latin-1 fonts,
|
---|
847 | "koi8-r" for Russian KOI8 fonts.
|
---|
848 | The default algorithm of heuristicNameMatch() uses name().
|
---|
849 | \i Some applications may use this function to present
|
---|
850 | encodings to the end user.
|
---|
851 | \endlist
|
---|
852 | */
|
---|
853 |
|
---|
854 | /*!
|
---|
855 | \fn int QTextCodec::mibEnum() const
|
---|
856 |
|
---|
857 | Subclasses of QTextCodec must reimplement this function. It
|
---|
858 | returns the MIBenum (see \link
|
---|
859 | http://www.iana.org/assignments/character-sets the
|
---|
860 | IANA character-sets encoding file\endlink for more information).
|
---|
861 | It is important that each QTextCodec subclass returns the correct
|
---|
862 | unique value for this function.
|
---|
863 | */
|
---|
864 |
|
---|
865 |
|
---|
866 | /*!
|
---|
867 | Returns the preferred mime name of the encoding as defined in the
|
---|
868 | \link http://www.iana.org/assignments/character-sets
|
---|
869 | IANA character-sets encoding file\endlink.
|
---|
870 | */
|
---|
871 | const char* QTextCodec::mimeName() const
|
---|
872 | {
|
---|
873 | return name();
|
---|
874 | }
|
---|
875 |
|
---|
876 |
|
---|
877 | /*!
|
---|
878 | \fn int QTextCodec::heuristicContentMatch(const char* chars, int len) const
|
---|
879 |
|
---|
880 | QTextCodec subclasses must reimplement this function. It examines
|
---|
881 | the first \a len bytes of \a chars and returns a value indicating
|
---|
882 | how likely it is that the string is a prefix of text encoded in
|
---|
883 | the encoding of the subclass. A negative return value indicates
|
---|
884 | that the text is detectably not in the encoding (e.g. it contains
|
---|
885 | characters undefined in the encoding). A return value of 0
|
---|
886 | indicates that the text should be decoded with this codec rather
|
---|
887 | than as ASCII, but there is no particular evidence. The value
|
---|
888 | should range up to \a len. Thus, most decoders will return -1, 0,
|
---|
889 | or -\a len.
|
---|
890 |
|
---|
891 | The characters are not null terminated.
|
---|
892 |
|
---|
893 | \sa codecForContent().
|
---|
894 | */
|
---|
895 |
|
---|
896 |
|
---|
897 | /*!
|
---|
898 | Creates a QTextDecoder which stores enough state to decode chunks
|
---|
899 | of char* data to create chunks of Unicode data. The default
|
---|
900 | implementation creates a stateless decoder, which is only
|
---|
901 | sufficient for the simplest encodings where each byte corresponds
|
---|
902 | to exactly one Unicode character.
|
---|
903 |
|
---|
904 | The caller is responsible for deleting the returned object.
|
---|
905 | */
|
---|
906 | QTextDecoder* QTextCodec::makeDecoder() const
|
---|
907 | {
|
---|
908 | return new QTextStatelessDecoder(this);
|
---|
909 | }
|
---|
910 |
|
---|
911 |
|
---|
912 | /*!
|
---|
913 | Creates a QTextEncoder which stores enough state to encode chunks
|
---|
914 | of Unicode data as char* data. The default implementation creates
|
---|
915 | a stateless encoder, which is only sufficient for the simplest
|
---|
916 | encodings where each Unicode character corresponds to exactly one
|
---|
917 | character.
|
---|
918 |
|
---|
919 | The caller is responsible for deleting the returned object.
|
---|
920 | */
|
---|
921 | QTextEncoder* QTextCodec::makeEncoder() const
|
---|
922 | {
|
---|
923 | return new QTextStatelessEncoder(this);
|
---|
924 | }
|
---|
925 |
|
---|
926 |
|
---|
927 | /*!
|
---|
928 | QTextCodec subclasses must reimplement this function or
|
---|
929 | makeDecoder(). It converts the first \a len characters of \a chars
|
---|
930 | to Unicode.
|
---|
931 |
|
---|
932 | The default implementation makes a decoder with makeDecoder() and
|
---|
933 | converts the input with that. Note that the default makeDecoder()
|
---|
934 | implementation makes a decoder that simply calls
|
---|
935 | this function, hence subclasses \e must reimplement one function or
|
---|
936 | the other to avoid infinite recursion.
|
---|
937 | */
|
---|
938 | QString QTextCodec::toUnicode(const char* chars, int len) const
|
---|
939 | {
|
---|
940 | if ( chars == 0 )
|
---|
941 | return QString::null;
|
---|
942 | QTextDecoder* i = makeDecoder();
|
---|
943 | QString result = i->toUnicode(chars,len);
|
---|
944 | delete i;
|
---|
945 | return result;
|
---|
946 | }
|
---|
947 |
|
---|
948 |
|
---|
949 | /*!
|
---|
950 | QTextCodec subclasses must reimplement either this function or
|
---|
951 | makeEncoder(). It converts the first \a lenInOut characters of \a
|
---|
952 | uc from Unicode to the encoding of the subclass. If \a lenInOut is
|
---|
953 | negative or too large, the length of \a uc is used instead.
|
---|
954 |
|
---|
955 | Converts \a lenInOut characters (not bytes) from \a uc, producing
|
---|
956 | a QCString. \a lenInOut will be set to the \link
|
---|
957 | QCString::length() length\endlink of the result (in bytes).
|
---|
958 |
|
---|
959 | The default implementation makes an encoder with makeEncoder() and
|
---|
960 | converts the input with that. Note that the default makeEncoder()
|
---|
961 | implementation makes an encoder that simply calls this function,
|
---|
962 | hence subclasses \e must reimplement one function or the other to
|
---|
963 | avoid infinite recursion.
|
---|
964 | */
|
---|
965 |
|
---|
966 | QCString QTextCodec::fromUnicode(const QString& uc, int& lenInOut) const
|
---|
967 | {
|
---|
968 | QTextEncoder* i = makeEncoder();
|
---|
969 | QCString result = i->fromUnicode(uc, lenInOut);
|
---|
970 | delete i;
|
---|
971 | return result;
|
---|
972 | }
|
---|
973 |
|
---|
974 | /*!
|
---|
975 | \overload
|
---|
976 | \internal
|
---|
977 | */
|
---|
978 | QByteArray QTextCodec::fromUnicode( const QString &str, int pos, int len ) const
|
---|
979 | {
|
---|
980 | QByteArray a;
|
---|
981 | if( len < 0 )
|
---|
982 | len = str.length() - pos;
|
---|
983 | a = fromUnicode( str.mid(pos, len) );
|
---|
984 | if( a.size() > 0 && a[(int)a.size() - 1] == '\0' )
|
---|
985 | a.resize( a.size() - 1 );
|
---|
986 | return a;
|
---|
987 | }
|
---|
988 |
|
---|
989 | /*!
|
---|
990 | \overload
|
---|
991 |
|
---|
992 | \a uc is the unicode source string.
|
---|
993 | */
|
---|
994 | QCString QTextCodec::fromUnicode(const QString& uc) const
|
---|
995 | {
|
---|
996 | int l = uc.length();
|
---|
997 | return fromUnicode(uc,l);
|
---|
998 | }
|
---|
999 |
|
---|
1000 | /*!
|
---|
1001 | \overload
|
---|
1002 |
|
---|
1003 | \a a contains the source characters; \a len contains the number of
|
---|
1004 | characters in \a a to use.
|
---|
1005 | */
|
---|
1006 | QString QTextCodec::toUnicode(const QByteArray& a, int len) const
|
---|
1007 | {
|
---|
1008 | int l = a.size();
|
---|
1009 | l = QMIN( l, len );
|
---|
1010 | return toUnicode( a.data(), l );
|
---|
1011 | }
|
---|
1012 |
|
---|
1013 | /*!
|
---|
1014 | \overload
|
---|
1015 |
|
---|
1016 | \a a contains the source characters.
|
---|
1017 | */
|
---|
1018 | QString QTextCodec::toUnicode(const QByteArray& a) const
|
---|
1019 | {
|
---|
1020 | int l = a.size();
|
---|
1021 | return toUnicode( a.data(), l );
|
---|
1022 | }
|
---|
1023 |
|
---|
1024 | /*!
|
---|
1025 | \overload
|
---|
1026 |
|
---|
1027 | \a a contains the source characters; \a len contains the number of
|
---|
1028 | characters in \a a to use.
|
---|
1029 | */
|
---|
1030 | QString QTextCodec::toUnicode(const QCString& a, int len) const
|
---|
1031 | {
|
---|
1032 | int l = a.length();
|
---|
1033 | l = QMIN( l, len );
|
---|
1034 | return toUnicode( a.data(), l );
|
---|
1035 | }
|
---|
1036 |
|
---|
1037 | /*!
|
---|
1038 | \overload
|
---|
1039 |
|
---|
1040 | \a a contains the source characters.
|
---|
1041 | */
|
---|
1042 | QString QTextCodec::toUnicode(const QCString& a) const
|
---|
1043 | {
|
---|
1044 | int l = a.length();
|
---|
1045 | return toUnicode( a.data(), l );
|
---|
1046 | }
|
---|
1047 |
|
---|
1048 | /*!
|
---|
1049 | \overload
|
---|
1050 |
|
---|
1051 | \a chars contains the source characters.
|
---|
1052 | */
|
---|
1053 | QString QTextCodec::toUnicode(const char* chars) const
|
---|
1054 | {
|
---|
1055 | return toUnicode(chars,qstrlen(chars));
|
---|
1056 | }
|
---|
1057 |
|
---|
1058 | /*!
|
---|
1059 | \internal
|
---|
1060 | */
|
---|
1061 | unsigned short QTextCodec::characterFromUnicode(const QString &str, int pos) const
|
---|
1062 | {
|
---|
1063 | QCString result = QTextCodec::fromUnicode(QString(str[pos]));
|
---|
1064 | uchar *ch = (uchar *) result.data();
|
---|
1065 | ushort retval = 0;
|
---|
1066 | if (result.size() > 2) {
|
---|
1067 | retval = (ushort) *ch << 8;
|
---|
1068 | ch++;
|
---|
1069 | }
|
---|
1070 | return retval + *ch;
|
---|
1071 | }
|
---|
1072 |
|
---|
1073 | /*!
|
---|
1074 | Returns TRUE if the Unicode character \a ch can be fully encoded
|
---|
1075 | with this codec; otherwise returns FALSE. The default
|
---|
1076 | implementation tests if the result of toUnicode(fromUnicode(ch))
|
---|
1077 | is the original \a ch. Subclasses may be able to improve the
|
---|
1078 | efficiency.
|
---|
1079 | */
|
---|
1080 | bool QTextCodec::canEncode( QChar ch ) const
|
---|
1081 | {
|
---|
1082 | return toUnicode(fromUnicode(ch)) == ch;
|
---|
1083 | }
|
---|
1084 |
|
---|
1085 | /*!
|
---|
1086 | \overload
|
---|
1087 |
|
---|
1088 | \a s contains the string being tested for encode-ability.
|
---|
1089 | */
|
---|
1090 | bool QTextCodec::canEncode( const QString& s ) const
|
---|
1091 | {
|
---|
1092 | if ( s.isEmpty() )
|
---|
1093 | return TRUE;
|
---|
1094 | return toUnicode(fromUnicode(s)) == s;
|
---|
1095 | }
|
---|
1096 |
|
---|
1097 |
|
---|
1098 |
|
---|
1099 | /*!
|
---|
1100 | \class QTextEncoder qtextcodec.h
|
---|
1101 | \brief The QTextEncoder class provides a state-based encoder.
|
---|
1102 | \reentrant
|
---|
1103 | \ingroup i18n
|
---|
1104 |
|
---|
1105 | The encoder converts Unicode into another format, remembering any
|
---|
1106 | state that is required between calls.
|
---|
1107 |
|
---|
1108 | \sa QTextCodec::makeEncoder()
|
---|
1109 | */
|
---|
1110 |
|
---|
1111 | /*!
|
---|
1112 | Destroys the encoder.
|
---|
1113 | */
|
---|
1114 | QTextEncoder::~QTextEncoder()
|
---|
1115 | {
|
---|
1116 | }
|
---|
1117 |
|
---|
1118 | /*!
|
---|
1119 | \fn QCString QTextEncoder::fromUnicode(const QString& uc, int& lenInOut)
|
---|
1120 |
|
---|
1121 | Converts \a lenInOut characters (not bytes) from \a uc, producing
|
---|
1122 | a QCString. \a lenInOut will be set to the \link
|
---|
1123 | QCString::length() length\endlink of the result (in bytes).
|
---|
1124 |
|
---|
1125 | The encoder is free to record state to use when subsequent calls
|
---|
1126 | are made to this function (for example, it might change modes with
|
---|
1127 | escape sequences if needed during the encoding of one string, then
|
---|
1128 | assume that mode applies when a subsequent call begins).
|
---|
1129 | */
|
---|
1130 |
|
---|
1131 | /*!
|
---|
1132 | \class QTextDecoder qtextcodec.h
|
---|
1133 | \brief The QTextDecoder class provides a state-based decoder.
|
---|
1134 | \reentrant
|
---|
1135 | \ingroup i18n
|
---|
1136 |
|
---|
1137 | The decoder converts a text format into Unicode, remembering any
|
---|
1138 | state that is required between calls.
|
---|
1139 |
|
---|
1140 | \sa QTextCodec::makeEncoder()
|
---|
1141 | */
|
---|
1142 |
|
---|
1143 |
|
---|
1144 | /*!
|
---|
1145 | Destroys the decoder.
|
---|
1146 | */
|
---|
1147 | QTextDecoder::~QTextDecoder()
|
---|
1148 | {
|
---|
1149 | }
|
---|
1150 |
|
---|
1151 | /*!
|
---|
1152 | \fn QString QTextDecoder::toUnicode(const char* chars, int len)
|
---|
1153 |
|
---|
1154 | Converts the first \a len bytes in \a chars to Unicode, returning
|
---|
1155 | the result.
|
---|
1156 |
|
---|
1157 | If not all characters are used (e.g. if only part of a multi-byte
|
---|
1158 | encoding is at the end of the characters), the decoder remembers
|
---|
1159 | enough state to continue with the next call to this function.
|
---|
1160 | */
|
---|
1161 |
|
---|
1162 | #define CHAINED 0xffff
|
---|
1163 |
|
---|
1164 | struct QMultiByteUnicodeTable {
|
---|
1165 | // If multiByte, ignore unicode and index into multiByte
|
---|
1166 | // with the next character.
|
---|
1167 | QMultiByteUnicodeTable() : unicode(0xfffd), multiByte(0) { }
|
---|
1168 |
|
---|
1169 | ~QMultiByteUnicodeTable()
|
---|
1170 | {
|
---|
1171 | if ( multiByte )
|
---|
1172 | delete [] multiByte;
|
---|
1173 | }
|
---|
1174 |
|
---|
1175 | ushort unicode;
|
---|
1176 | QMultiByteUnicodeTable* multiByte;
|
---|
1177 | };
|
---|
1178 |
|
---|
1179 | static int getByte(char* &cursor)
|
---|
1180 | {
|
---|
1181 | int byte = 0;
|
---|
1182 | if ( *cursor ) {
|
---|
1183 | if ( cursor[1] == 'x' )
|
---|
1184 | byte = strtol(cursor+2,&cursor,16);
|
---|
1185 | else if ( cursor[1] == 'd' )
|
---|
1186 | byte = strtol(cursor+2,&cursor,10);
|
---|
1187 | else
|
---|
1188 | byte = strtol(cursor+2,&cursor,8);
|
---|
1189 | }
|
---|
1190 | return byte&0xff;
|
---|
1191 | }
|
---|
1192 |
|
---|
1193 | class QTextCodecFromIOD;
|
---|
1194 |
|
---|
1195 | class QTextCodecFromIODDecoder : public QTextDecoder {
|
---|
1196 | const QTextCodecFromIOD* codec;
|
---|
1197 | QMultiByteUnicodeTable* mb;
|
---|
1198 | public:
|
---|
1199 | QTextCodecFromIODDecoder(const QTextCodecFromIOD* c);
|
---|
1200 | QString toUnicode(const char* chars, int len);
|
---|
1201 | };
|
---|
1202 |
|
---|
1203 | class QTextCodecFromIOD : public QTextCodec {
|
---|
1204 | friend class QTextCodecFromIODDecoder;
|
---|
1205 |
|
---|
1206 | QCString n;
|
---|
1207 |
|
---|
1208 | // If from_unicode_page[row()][cell()] is 0 and from_unicode_page_multiByte,
|
---|
1209 | // use from_unicode_page_multiByte[row()][cell()] as string.
|
---|
1210 | char** from_unicode_page;
|
---|
1211 | char*** from_unicode_page_multiByte;
|
---|
1212 | char unkn;
|
---|
1213 |
|
---|
1214 | // Only one of these is used
|
---|
1215 | ushort* to_unicode;
|
---|
1216 | QMultiByteUnicodeTable* to_unicode_multiByte;
|
---|
1217 | int max_bytes_per_char;
|
---|
1218 | QStrList aliases;
|
---|
1219 |
|
---|
1220 | bool stateless() const { return !to_unicode_multiByte; }
|
---|
1221 |
|
---|
1222 | public:
|
---|
1223 | QTextCodecFromIOD(QIODevice* iod)
|
---|
1224 | {
|
---|
1225 | from_unicode_page = 0;
|
---|
1226 | to_unicode_multiByte = 0;
|
---|
1227 | to_unicode = 0;
|
---|
1228 | from_unicode_page_multiByte = 0;
|
---|
1229 | max_bytes_per_char = 1;
|
---|
1230 |
|
---|
1231 | const int maxlen=100;
|
---|
1232 | char line[maxlen];
|
---|
1233 | char esc='\\';
|
---|
1234 | char comm='%';
|
---|
1235 | bool incmap = FALSE;
|
---|
1236 | while (iod->readLine(line,maxlen) > 0) {
|
---|
1237 | if (0==qstrnicmp(line,"<code_set_name>",15))
|
---|
1238 | n = line+15;
|
---|
1239 | else if (0==qstrnicmp(line,"<escape_char> ",14))
|
---|
1240 | esc = line[14];
|
---|
1241 | else if (0==qstrnicmp(line,"<comment_char> ",15))
|
---|
1242 | comm = line[15];
|
---|
1243 | else if (line[0]==comm && 0==qstrnicmp(line+1," alias ",7)) {
|
---|
1244 | aliases.append(line+8);
|
---|
1245 | } else if (0==qstrnicmp(line,"CHARMAP",7)) {
|
---|
1246 | if (!from_unicode_page) {
|
---|
1247 | from_unicode_page = new char*[256];
|
---|
1248 | for (int i=0; i<256; i++)
|
---|
1249 | from_unicode_page[i]=0;
|
---|
1250 | }
|
---|
1251 | if (!to_unicode) {
|
---|
1252 | to_unicode = new ushort[256];
|
---|
1253 | }
|
---|
1254 | incmap = TRUE;
|
---|
1255 | } else if (0==qstrnicmp(line,"END CHARMAP",11))
|
---|
1256 | break;
|
---|
1257 | else if (incmap) {
|
---|
1258 | char* cursor = line;
|
---|
1259 | int byte=-1,unicode=-1;
|
---|
1260 | ushort* mb_unicode=0;
|
---|
1261 | const int maxmb=8; // more -> we'll need to improve datastructures
|
---|
1262 | char mb[maxmb+1];
|
---|
1263 | int nmb=0;
|
---|
1264 |
|
---|
1265 | while (*cursor) {
|
---|
1266 | if (cursor[0]=='<' && cursor[1]=='U' &&
|
---|
1267 | cursor[2]>='0' && cursor[2]<='9' &&
|
---|
1268 | cursor[3]>='0' && cursor[3]<='9') {
|
---|
1269 |
|
---|
1270 | unicode = strtol(cursor+2,&cursor,16);
|
---|
1271 |
|
---|
1272 | } else if (*cursor==esc) {
|
---|
1273 |
|
---|
1274 | byte = getByte(cursor);
|
---|
1275 |
|
---|
1276 | if ( *cursor == esc ) {
|
---|
1277 | if ( !to_unicode_multiByte ) {
|
---|
1278 | to_unicode_multiByte =
|
---|
1279 | new QMultiByteUnicodeTable[256];
|
---|
1280 | for (int i=0; i<256; i++) {
|
---|
1281 | to_unicode_multiByte[i].unicode =
|
---|
1282 | to_unicode[i];
|
---|
1283 | to_unicode_multiByte[i].multiByte = 0;
|
---|
1284 | }
|
---|
1285 | delete [] to_unicode;
|
---|
1286 | to_unicode = 0;
|
---|
1287 | }
|
---|
1288 | QMultiByteUnicodeTable* mbut =
|
---|
1289 | to_unicode_multiByte+byte;
|
---|
1290 | mb[nmb++] = byte;
|
---|
1291 | while ( nmb < maxmb && *cursor == esc ) {
|
---|
1292 | // Always at least once
|
---|
1293 |
|
---|
1294 | mbut->unicode = CHAINED;
|
---|
1295 | byte = getByte(cursor);
|
---|
1296 | mb[nmb++] = byte;
|
---|
1297 | if (!mbut->multiByte) {
|
---|
1298 | mbut->multiByte =
|
---|
1299 | new QMultiByteUnicodeTable[256];
|
---|
1300 | }
|
---|
1301 | mbut = mbut->multiByte+byte;
|
---|
1302 | mb_unicode = & mbut->unicode;
|
---|
1303 | }
|
---|
1304 |
|
---|
1305 | if ( nmb > max_bytes_per_char )
|
---|
1306 | max_bytes_per_char = nmb;
|
---|
1307 | }
|
---|
1308 | } else {
|
---|
1309 | cursor++;
|
---|
1310 | }
|
---|
1311 | }
|
---|
1312 |
|
---|
1313 | if (unicode >= 0 && unicode <= 0xffff)
|
---|
1314 | {
|
---|
1315 | QChar ch((ushort)unicode);
|
---|
1316 | if (!from_unicode_page[ch.row()]) {
|
---|
1317 | from_unicode_page[ch.row()] = new char[256];
|
---|
1318 | for (int i=0; i<256; i++)
|
---|
1319 | from_unicode_page[ch.row()][i]=0;
|
---|
1320 | }
|
---|
1321 | if ( mb_unicode ) {
|
---|
1322 | from_unicode_page[ch.row()][ch.cell()] = 0;
|
---|
1323 | if (!from_unicode_page_multiByte) {
|
---|
1324 | from_unicode_page_multiByte = new char**[256];
|
---|
1325 | for (int i=0; i<256; i++)
|
---|
1326 | from_unicode_page_multiByte[i]=0;
|
---|
1327 | }
|
---|
1328 | if (!from_unicode_page_multiByte[ch.row()]) {
|
---|
1329 | from_unicode_page_multiByte[ch.row()] = new char*[256];
|
---|
1330 | for (int i=0; i<256; i++)
|
---|
1331 | from_unicode_page_multiByte[ch.row()][i] = 0;
|
---|
1332 | }
|
---|
1333 | mb[nmb++] = 0;
|
---|
1334 | from_unicode_page_multiByte[ch.row()][ch.cell()]
|
---|
1335 | = qstrdup(mb);
|
---|
1336 | *mb_unicode = unicode;
|
---|
1337 | } else {
|
---|
1338 | from_unicode_page[ch.row()][ch.cell()] = (char)byte;
|
---|
1339 | if ( to_unicode )
|
---|
1340 | to_unicode[byte] = unicode;
|
---|
1341 | else
|
---|
1342 | to_unicode_multiByte[byte].unicode = unicode;
|
---|
1343 | }
|
---|
1344 | } else {
|
---|
1345 | }
|
---|
1346 | }
|
---|
1347 | }
|
---|
1348 | n = n.stripWhiteSpace();
|
---|
1349 |
|
---|
1350 | unkn = '?'; // ##### Might be a bad choice.
|
---|
1351 | }
|
---|
1352 |
|
---|
1353 | ~QTextCodecFromIOD()
|
---|
1354 | {
|
---|
1355 | if ( from_unicode_page ) {
|
---|
1356 | for (int i=0; i<256; i++)
|
---|
1357 | if (from_unicode_page[i])
|
---|
1358 | delete [] from_unicode_page[i];
|
---|
1359 | }
|
---|
1360 | if ( from_unicode_page_multiByte ) {
|
---|
1361 | for (int i=0; i<256; i++)
|
---|
1362 | if (from_unicode_page_multiByte[i])
|
---|
1363 | for (int j=0; j<256; j++)
|
---|
1364 | if (from_unicode_page_multiByte[i][j])
|
---|
1365 | delete [] from_unicode_page_multiByte[i][j];
|
---|
1366 | }
|
---|
1367 | if ( to_unicode )
|
---|
1368 | delete [] to_unicode;
|
---|
1369 | if ( to_unicode_multiByte )
|
---|
1370 | delete [] to_unicode_multiByte;
|
---|
1371 | }
|
---|
1372 |
|
---|
1373 | bool ok() const
|
---|
1374 | {
|
---|
1375 | return !!from_unicode_page;
|
---|
1376 | }
|
---|
1377 |
|
---|
1378 | QTextDecoder* makeDecoder() const
|
---|
1379 | {
|
---|
1380 | if ( stateless() )
|
---|
1381 | return QTextCodec::makeDecoder();
|
---|
1382 | else
|
---|
1383 | return new QTextCodecFromIODDecoder(this);
|
---|
1384 | }
|
---|
1385 |
|
---|
1386 | const char* name() const
|
---|
1387 | {
|
---|
1388 | return n;
|
---|
1389 | }
|
---|
1390 |
|
---|
1391 | int mibEnum() const
|
---|
1392 | {
|
---|
1393 | return 0; // #### Unknown.
|
---|
1394 | }
|
---|
1395 |
|
---|
1396 | int heuristicContentMatch(const char*, int) const
|
---|
1397 | {
|
---|
1398 | return 0;
|
---|
1399 | }
|
---|
1400 |
|
---|
1401 | int heuristicNameMatch(const char* hint) const
|
---|
1402 | {
|
---|
1403 | int bestr = QTextCodec::heuristicNameMatch(hint);
|
---|
1404 | QStrListIterator it(aliases);
|
---|
1405 | char* a;
|
---|
1406 | while ((a=it.current())) {
|
---|
1407 | ++it;
|
---|
1408 | int r = simpleHeuristicNameMatch(a,hint);
|
---|
1409 | if (r > bestr)
|
---|
1410 | bestr = r;
|
---|
1411 | }
|
---|
1412 | return bestr;
|
---|
1413 | }
|
---|
1414 |
|
---|
1415 | QString toUnicode(const char* chars, int len) const
|
---|
1416 | {
|
---|
1417 | const uchar* uchars = (const uchar*)chars;
|
---|
1418 | QString result;
|
---|
1419 | QMultiByteUnicodeTable* multiByte=to_unicode_multiByte;
|
---|
1420 | if ( multiByte ) {
|
---|
1421 | while (len--) {
|
---|
1422 | QMultiByteUnicodeTable& mb = multiByte[*uchars];
|
---|
1423 | if ( mb.multiByte ) {
|
---|
1424 | // Chained multi-byte
|
---|
1425 | multiByte = mb.multiByte;
|
---|
1426 | } else {
|
---|
1427 | result += QChar(mb.unicode);
|
---|
1428 | multiByte=to_unicode_multiByte;
|
---|
1429 | }
|
---|
1430 | uchars++;
|
---|
1431 | }
|
---|
1432 | } else {
|
---|
1433 | while (len--)
|
---|
1434 | result += QChar(to_unicode[*uchars++]);
|
---|
1435 | }
|
---|
1436 | return result;
|
---|
1437 | }
|
---|
1438 |
|
---|
1439 | #if !defined(Q_NO_USING_KEYWORD)
|
---|
1440 | using QTextCodec::fromUnicode;
|
---|
1441 | #endif
|
---|
1442 | QCString fromUnicode(const QString& uc, int& lenInOut) const
|
---|
1443 | {
|
---|
1444 | if (lenInOut > (int)uc.length())
|
---|
1445 | lenInOut = uc.length();
|
---|
1446 | int rlen = lenInOut*max_bytes_per_char;
|
---|
1447 | QCString rstr(rlen);
|
---|
1448 | char* cursor = rstr.data();
|
---|
1449 | char* s=0;
|
---|
1450 | int l = lenInOut;
|
---|
1451 | int lout = 0;
|
---|
1452 | for (int i=0; i<l; i++) {
|
---|
1453 | QChar ch = uc[i];
|
---|
1454 | if ( ch == QChar::null ) {
|
---|
1455 | // special
|
---|
1456 | *cursor++ = 0;
|
---|
1457 | } else if ( from_unicode_page[ch.row()] &&
|
---|
1458 | from_unicode_page[ch.row()][ch.cell()] )
|
---|
1459 | {
|
---|
1460 | *cursor++ = from_unicode_page[ch.row()][ch.cell()];
|
---|
1461 | lout++;
|
---|
1462 | } else if ( from_unicode_page_multiByte &&
|
---|
1463 | from_unicode_page_multiByte[ch.row()] &&
|
---|
1464 | (s=from_unicode_page_multiByte[ch.row()][ch.cell()]) )
|
---|
1465 | {
|
---|
1466 | while (*s) {
|
---|
1467 | *cursor++ = *s++;
|
---|
1468 | lout++;
|
---|
1469 | }
|
---|
1470 | } else {
|
---|
1471 | *cursor++ = unkn;
|
---|
1472 | lout++;
|
---|
1473 | }
|
---|
1474 | }
|
---|
1475 | *cursor = 0;
|
---|
1476 | lenInOut = lout;
|
---|
1477 | return rstr;
|
---|
1478 | }
|
---|
1479 | };
|
---|
1480 |
|
---|
1481 | QTextCodecFromIODDecoder::QTextCodecFromIODDecoder(const QTextCodecFromIOD* c) :
|
---|
1482 | codec(c)
|
---|
1483 | {
|
---|
1484 | mb = codec->to_unicode_multiByte;
|
---|
1485 | }
|
---|
1486 |
|
---|
1487 | QString QTextCodecFromIODDecoder::toUnicode(const char* chars, int len)
|
---|
1488 | {
|
---|
1489 | const uchar* uchars = (const uchar*)chars;
|
---|
1490 | QString result;
|
---|
1491 | while (len--) {
|
---|
1492 | QMultiByteUnicodeTable& t = mb[*uchars];
|
---|
1493 | if ( t.multiByte ) {
|
---|
1494 | // Chained multi-byte
|
---|
1495 | mb = t.multiByte;
|
---|
1496 | } else {
|
---|
1497 | if ( t.unicode )
|
---|
1498 | result += QChar(t.unicode);
|
---|
1499 | mb=codec->to_unicode_multiByte;
|
---|
1500 | }
|
---|
1501 | uchars++;
|
---|
1502 | }
|
---|
1503 | return result;
|
---|
1504 | }
|
---|
1505 |
|
---|
1506 | #ifndef QT_NO_CODECS
|
---|
1507 | // Cannot use <pre> or \code
|
---|
1508 | /*!
|
---|
1509 | Reads a POSIX2 charmap definition from \a iod.
|
---|
1510 | The parser recognizes the following lines:
|
---|
1511 |
|
---|
1512 | <font name="sans">
|
---|
1513 | <code_set_name> <i>name</i></br>
|
---|
1514 | <escape_char> <i>character</i></br>
|
---|
1515 | % alias <i>alias</i></br>
|
---|
1516 | CHARMAP</br>
|
---|
1517 | <<i>token</i>> /x<i>hexbyte</i> <U<i>unicode</i>> ...</br>
|
---|
1518 | <<i>token</i>> /d<i>decbyte</i> <U<i>unicode</i>> ...</br>
|
---|
1519 | <<i>token</i>> /<i>octbyte</i> <U<i>unicode</i>> ...</br>
|
---|
1520 | <<i>token</i>> /<i>any</i>/<i>any</i>... <U<i>unicode</i>> ...</br>
|
---|
1521 | END CHARMAP</br>
|
---|
1522 | </font>
|
---|
1523 |
|
---|
1524 | The resulting QTextCodec is returned (and also added to the global
|
---|
1525 | list of codecs). The name() of the result is taken from the
|
---|
1526 | code_set_name.
|
---|
1527 |
|
---|
1528 | Note that a codec constructed in this way uses much more memory
|
---|
1529 | and is slower than a hand-written QTextCodec subclass, since
|
---|
1530 | tables in code are kept in memory shared by all Qt applications.
|
---|
1531 |
|
---|
1532 | \sa loadCharmapFile()
|
---|
1533 | */
|
---|
1534 | QTextCodec* QTextCodec::loadCharmap(QIODevice* iod)
|
---|
1535 | {
|
---|
1536 | QTextCodecFromIOD* r = new QTextCodecFromIOD(iod);
|
---|
1537 | if ( !r->ok() ) {
|
---|
1538 | delete r;
|
---|
1539 | r = 0;
|
---|
1540 | }
|
---|
1541 | return r;
|
---|
1542 | }
|
---|
1543 |
|
---|
1544 | /*!
|
---|
1545 | A convenience function for loadCharmap() that loads the charmap
|
---|
1546 | definition from the file \a filename.
|
---|
1547 | */
|
---|
1548 | QTextCodec* QTextCodec::loadCharmapFile(QString filename)
|
---|
1549 | {
|
---|
1550 | QFile f(filename);
|
---|
1551 | if (f.open(IO_ReadOnly)) {
|
---|
1552 | QTextCodecFromIOD* r = new QTextCodecFromIOD(&f);
|
---|
1553 | if ( !r->ok() )
|
---|
1554 | delete r;
|
---|
1555 | else
|
---|
1556 | return r;
|
---|
1557 | }
|
---|
1558 | return 0;
|
---|
1559 | }
|
---|
1560 |
|
---|
1561 | #endif //QT_NO_CODECS
|
---|
1562 |
|
---|
1563 | /*!
|
---|
1564 | Returns a string representing the current language and
|
---|
1565 | sublanguage, e.g. "pt" for Portuguese, or "pt_br" for Portuguese/Brazil.
|
---|
1566 | */
|
---|
1567 |
|
---|
1568 | const char* QTextCodec::locale()
|
---|
1569 | {
|
---|
1570 | return QLocalePrivate::systemLocaleName();
|
---|
1571 | }
|
---|
1572 |
|
---|
1573 | #ifndef QT_NO_CODECS
|
---|
1574 |
|
---|
1575 | class QSimpleTextCodec: public QTextCodec
|
---|
1576 | {
|
---|
1577 | public:
|
---|
1578 | QSimpleTextCodec( int );
|
---|
1579 | ~QSimpleTextCodec();
|
---|
1580 |
|
---|
1581 | QString toUnicode(const char* chars, int len) const;
|
---|
1582 | #if !defined(Q_NO_USING_KEYWORD)
|
---|
1583 | using QTextCodec::fromUnicode;
|
---|
1584 | #endif
|
---|
1585 | QCString fromUnicode(const QString& uc, int& lenInOut ) const;
|
---|
1586 | unsigned short characterFromUnicode(const QString &str, int pos) const;
|
---|
1587 |
|
---|
1588 | const char* name() const;
|
---|
1589 | const char* mimeName() const;
|
---|
1590 | int mibEnum() const;
|
---|
1591 |
|
---|
1592 | int heuristicContentMatch(const char* chars, int len) const;
|
---|
1593 |
|
---|
1594 | int heuristicNameMatch(const char* hint) const;
|
---|
1595 | #if !defined(Q_NO_USING_KEYWORD)
|
---|
1596 | using QTextCodec::canEncode;
|
---|
1597 | #endif
|
---|
1598 | bool canEncode( QChar ch ) const;
|
---|
1599 |
|
---|
1600 | void fromUnicode( const QChar *in, unsigned short *out, int length ) const;
|
---|
1601 |
|
---|
1602 | private:
|
---|
1603 | void buildReverseMap();
|
---|
1604 |
|
---|
1605 | int forwardIndex;
|
---|
1606 | #ifndef Q_WS_QWS
|
---|
1607 | QMemArray<unsigned char> *reverseMap;
|
---|
1608 | #endif
|
---|
1609 | };
|
---|
1610 |
|
---|
1611 | #ifdef Q_WS_QWS
|
---|
1612 | static const QSimpleTextCodec * reverseOwner = 0;
|
---|
1613 | static QMemArray<unsigned char> * reverseMap = 0;
|
---|
1614 | #endif
|
---|
1615 |
|
---|
1616 | #define LAST_MIB 2004
|
---|
1617 |
|
---|
1618 | static const struct {
|
---|
1619 | const char *mime;
|
---|
1620 | const char * cs;
|
---|
1621 | int mib;
|
---|
1622 | Q_UINT16 values[128];
|
---|
1623 | } unicodevalues[] = {
|
---|
1624 | // from RFC 1489, ftp://ftp.isi.edu/in-notes/rfc1489.txt
|
---|
1625 | { "KOI8-R", "KOI8-R", 2084,
|
---|
1626 | { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
|
---|
1627 | 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
|
---|
1628 | 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219/**/, 0x221A, 0x2248,
|
---|
1629 | 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
|
---|
1630 | 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,
|
---|
1631 | 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E,
|
---|
1632 | 0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,
|
---|
1633 | 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9,
|
---|
1634 | 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
|
---|
1635 | 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
|
---|
1636 | 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
|
---|
1637 | 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
|
---|
1638 | 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
|
---|
1639 | 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
|
---|
1640 | 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
|
---|
1641 | 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },
|
---|
1642 | // /**/ - The BULLET OPERATOR is confused. Some people think
|
---|
1643 | // it should be 0x2022 (BULLET).
|
---|
1644 |
|
---|
1645 | // from RFC 2319, ftp://ftp.isi.edu/in-notes/rfc2319.txt
|
---|
1646 | { "KOI8-U", "KOI8-U", 2088,
|
---|
1647 | { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
|
---|
1648 | 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
|
---|
1649 | 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248,
|
---|
1650 | 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
|
---|
1651 | 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457,
|
---|
1652 | 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E,
|
---|
1653 | 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407,
|
---|
1654 | 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9,
|
---|
1655 | 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
|
---|
1656 | 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
|
---|
1657 | 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
|
---|
1658 | 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
|
---|
1659 | 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
|
---|
1660 | 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
|
---|
1661 | 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
|
---|
1662 | 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },
|
---|
1663 |
|
---|
1664 | // next bits generated from tables on the Unicode 2.0 CD. we can
|
---|
1665 | // use these tables since this is part of the transition to using
|
---|
1666 | // unicode everywhere in qt.
|
---|
1667 |
|
---|
1668 | // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo 0x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; for a in 8859-* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ; cat /tmp/digits ) | sort | uniq -w4 | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done
|
---|
1669 |
|
---|
1670 | // then I inserted the files manually.
|
---|
1671 | { "ISO-8859-2", "ISO 8859-2", 5,
|
---|
1672 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
---|
1673 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
---|
1674 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
---|
1675 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
---|
1676 | 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,
|
---|
1677 | 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
|
---|
1678 | 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,
|
---|
1679 | 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
|
---|
1680 | 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
|
---|
1681 | 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
|
---|
1682 | 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
|
---|
1683 | 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
|
---|
1684 | 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
|
---|
1685 | 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
|
---|
1686 | 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
|
---|
1687 | 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} },
|
---|
1688 | { "ISO-8859-3", "ISO 8859-3", 6,
|
---|
1689 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
---|
1690 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
---|
1691 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
---|
1692 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
---|
1693 | 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7,
|
---|
1694 | 0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0xFFFD, 0x017B,
|
---|
1695 | 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7,
|
---|
1696 | 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0xFFFD, 0x017C,
|
---|
1697 | 0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7,
|
---|
1698 | 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
|
---|
1699 | 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7,
|
---|
1700 | 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF,
|
---|
1701 | 0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7,
|
---|
1702 | 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
|
---|
1703 | 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7,
|
---|
1704 | 0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9} },
|
---|
1705 | { "ISO-8859-4", "ISO 8859-4", 7,
|
---|
1706 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
---|
1707 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
---|
1708 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
---|
1709 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
---|
1710 | 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7,
|
---|
1711 | 0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF,
|
---|
1712 | 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7,
|
---|
1713 | 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B,
|
---|
1714 | 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
|
---|
1715 | 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A,
|
---|
1716 | 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
|
---|
1717 | 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF,
|
---|
1718 | 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
|
---|
1719 | 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B,
|
---|
1720 | 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
|
---|
1721 | 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9} },
|
---|
1722 | { "ISO-8859-5", "ISO 8859-5", 8,
|
---|
1723 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
---|
1724 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
---|
1725 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
---|
1726 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
---|
1727 | 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
|
---|
1728 | 0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F,
|
---|
1729 | 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
|
---|
1730 | 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
|
---|
1731 | 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
|
---|
1732 | 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
|
---|
1733 | 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
|
---|
1734 | 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
|
---|
1735 | 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
|
---|
1736 | 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
|
---|
1737 | 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
|
---|
1738 | 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F} },
|
---|
1739 | { "ISO-8859-6", "ISO 8859-6", 82,
|
---|
1740 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
---|
1741 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
---|
1742 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
---|
1743 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
---|
1744 | 0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
1745 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD,
|
---|
1746 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
1747 | 0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F,
|
---|
1748 | 0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
|
---|
1749 | 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
|
---|
1750 | 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
|
---|
1751 | 0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
1752 | 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
|
---|
1753 | 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F,
|
---|
1754 | 0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
1755 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
|
---|
1756 | { "ISO-8859-7", "ISO 8859-7", 10,
|
---|
1757 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
---|
1758 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
---|
1759 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
---|
1760 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
---|
1761 | 0x00A0, 0x2018, 0x2019, 0x00A3, 0xFFFD, 0xFFFD, 0x00A6, 0x00A7,
|
---|
1762 | 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0xFFFD, 0x2015,
|
---|
1763 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7,
|
---|
1764 | 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
|
---|
1765 | 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
|
---|
1766 | 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
|
---|
1767 | 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
|
---|
1768 | 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
|
---|
1769 | 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
|
---|
1770 | 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
|
---|
1771 | 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
|
---|
1772 | 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} },
|
---|
1773 | { "ISO-8859-8-I", "ISO 8859-8-I", 85,
|
---|
1774 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
---|
1775 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
---|
1776 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
---|
1777 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
---|
1778 | 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
|
---|
1779 | 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x203E,
|
---|
1780 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
---|
1781 | 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD,
|
---|
1782 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
1783 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
1784 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
1785 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017,
|
---|
1786 | 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
|
---|
1787 | 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
|
---|
1788 | 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
|
---|
1789 | 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
|
---|
1790 | { "ISO-8859-9", "ISO 8859-9", 12,
|
---|
1791 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
---|
1792 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
---|
1793 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
---|
1794 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
---|
1795 | 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
|
---|
1796 | 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
|
---|
1797 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
---|
1798 | 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
|
---|
1799 | 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
|
---|
1800 | 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
|
---|
1801 | 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
|
---|
1802 | 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
|
---|
1803 | 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
|
---|
1804 | 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
|
---|
1805 | 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
|
---|
1806 | 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} },
|
---|
1807 | { "ISO-8859-10", "ISO 8859-10", 13,
|
---|
1808 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
---|
1809 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
---|
1810 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
---|
1811 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
---|
1812 | 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7,
|
---|
1813 | 0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A,
|
---|
1814 | 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7,
|
---|
1815 | 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B,
|
---|
1816 | 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
|
---|
1817 | 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF,
|
---|
1818 | 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168,
|
---|
1819 | 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
|
---|
1820 | 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
|
---|
1821 | 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF,
|
---|
1822 | 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169,
|
---|
1823 | 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138} },
|
---|
1824 | { "ISO-8859-13", "ISO 8859-13", 109,
|
---|
1825 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
---|
1826 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
---|
1827 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
---|
1828 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
---|
1829 | 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7,
|
---|
1830 | 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
|
---|
1831 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7,
|
---|
1832 | 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
|
---|
1833 | 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
|
---|
1834 | 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
|
---|
1835 | 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
|
---|
1836 | 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
|
---|
1837 | 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
|
---|
1838 | 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
|
---|
1839 | 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
|
---|
1840 | 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019} },
|
---|
1841 | { "ISO-8859-14", "ISO 8859-14", 110,
|
---|
1842 | { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
|
---|
1843 | 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
|
---|
1844 | 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
|
---|
1845 | 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
|
---|
1846 | 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7,
|
---|
1847 | 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178,
|
---|
1848 | 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56,
|
---|
1849 | 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61,
|
---|
1850 | 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
|
---|
1851 | 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
|
---|
1852 | 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A,
|
---|
1853 | 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF,
|
---|
1854 | 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
|
---|
1855 | 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
|
---|
1856 | 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B,
|
---|
1857 | 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF} },
|
---|
1858 |
|
---|
1859 | // next bits generated again from tables on the Unicode 3.0 CD.
|
---|
1860 |
|
---|
1861 | // $ for a in CP* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ) | sort | sed -e 's/#UNDEF.*$/0xFFFD/' | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done
|
---|
1862 |
|
---|
1863 | { "CP 850", "IBM 850", 2009,
|
---|
1864 | { 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
|
---|
1865 | 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
|
---|
1866 | 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
|
---|
1867 | 0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
|
---|
1868 | 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
|
---|
1869 | 0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
|
---|
1870 | 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
|
---|
1871 | 0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
|
---|
1872 | 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
|
---|
1873 | 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4,
|
---|
1874 | 0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
|
---|
1875 | 0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
|
---|
1876 | 0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
|
---|
1877 | 0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
|
---|
1878 | 0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
|
---|
1879 | 0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0} },
|
---|
1880 | { "CP 874", "CP 874", 0, //### what is the mib?
|
---|
1881 | { 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD,
|
---|
1882 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
1883 | 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
---|
1884 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
1885 | 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
|
---|
1886 | 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
|
---|
1887 | 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
|
---|
1888 | 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
|
---|
1889 | 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
|
---|
1890 | 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
|
---|
1891 | 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
|
---|
1892 | 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,
|
---|
1893 | 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
|
---|
1894 | 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
|
---|
1895 | 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
|
---|
1896 | 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
|
---|
1897 | { "IBM 866", "IBM 866", 2086,
|
---|
1898 | { 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
|
---|
1899 | 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
|
---|
1900 | 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
|
---|
1901 | 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
|
---|
1902 | 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
|
---|
1903 | 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
|
---|
1904 | 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556,
|
---|
1905 | 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510,
|
---|
1906 | 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F,
|
---|
1907 | 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567,
|
---|
1908 | 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B,
|
---|
1909 | 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580,
|
---|
1910 | 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
|
---|
1911 | 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
|
---|
1912 | 0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E,
|
---|
1913 | 0x00B0, 0x2219, 0x00B7, 0x221A, 0x2116, 0x00A4, 0x25A0, 0x00A0} },
|
---|
1914 |
|
---|
1915 | { "windows-1250", "CP 1250", 2250,
|
---|
1916 | { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,
|
---|
1917 | 0xFFFD, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179,
|
---|
1918 | 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
---|
1919 | 0xFFFD, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A,
|
---|
1920 | 0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7,
|
---|
1921 | 0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B,
|
---|
1922 | 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
---|
1923 | 0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C,
|
---|
1924 | 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
|
---|
1925 | 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
|
---|
1926 | 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
|
---|
1927 | 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
|
---|
1928 | 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
|
---|
1929 | 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
|
---|
1930 | 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
|
---|
1931 | 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} },
|
---|
1932 | { "windows-1251", "CP 1251", 2251,
|
---|
1933 | { 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021,
|
---|
1934 | 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F,
|
---|
1935 | 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
---|
1936 | 0xFFFD, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F,
|
---|
1937 | 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7,
|
---|
1938 | 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407,
|
---|
1939 | 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7,
|
---|
1940 | 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457,
|
---|
1941 | 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
|
---|
1942 | 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
|
---|
1943 | 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
|
---|
1944 | 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
|
---|
1945 | 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
|
---|
1946 | 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
|
---|
1947 | 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
|
---|
1948 | 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F} },
|
---|
1949 | { "windows-1252", "CP 1252", 2252,
|
---|
1950 | { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
|
---|
1951 | 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,
|
---|
1952 | 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
---|
1953 | 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178,
|
---|
1954 | 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
|
---|
1955 | 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
|
---|
1956 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
---|
1957 | 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
|
---|
1958 | 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
|
---|
1959 | 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
|
---|
1960 | 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
|
---|
1961 | 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
|
---|
1962 | 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
|
---|
1963 | 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
|
---|
1964 | 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
|
---|
1965 | 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },
|
---|
1966 | { "windows-1253", "CP 1253", 2253,
|
---|
1967 | { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
|
---|
1968 | 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
1969 | 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
---|
1970 | 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
1971 | 0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
|
---|
1972 | 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015,
|
---|
1973 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7,
|
---|
1974 | 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
|
---|
1975 | 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
|
---|
1976 | 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
|
---|
1977 | 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
|
---|
1978 | 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
|
---|
1979 | 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
|
---|
1980 | 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
|
---|
1981 | 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
|
---|
1982 | 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} },
|
---|
1983 | { "windows-1254", "CP 1254", 2254,
|
---|
1984 | { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
|
---|
1985 | 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
1986 | 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
---|
1987 | 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
|
---|
1988 | 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
|
---|
1989 | 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
|
---|
1990 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
---|
1991 | 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
|
---|
1992 | 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
|
---|
1993 | 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
|
---|
1994 | 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
|
---|
1995 | 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
|
---|
1996 | 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
|
---|
1997 | 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
|
---|
1998 | 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
|
---|
1999 | 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} },
|
---|
2000 | { "windows-1255", "CP 1255", 2255,
|
---|
2001 | { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
|
---|
2002 | 0x02C6, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
2003 | 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
---|
2004 | 0x02DC, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
2005 | 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7,
|
---|
2006 | 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
|
---|
2007 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
---|
2008 | 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
|
---|
2009 | 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7,
|
---|
2010 | 0x05B8, 0x05B9, 0xFFFD, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
|
---|
2011 | 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3,
|
---|
2012 | 0x05F4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
2013 | 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
|
---|
2014 | 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
|
---|
2015 | 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
|
---|
2016 | 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD} },
|
---|
2017 | { "windows-1256", "CP 1256", 2256,
|
---|
2018 | { 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
|
---|
2019 | 0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688,
|
---|
2020 | 0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
---|
2021 | 0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA,
|
---|
2022 | 0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
|
---|
2023 | 0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
|
---|
2024 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
---|
2025 | 0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F,
|
---|
2026 | 0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
|
---|
2027 | 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
|
---|
2028 | 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7,
|
---|
2029 | 0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643,
|
---|
2030 | 0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7,
|
---|
2031 | 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A, 0x00EE, 0x00EF,
|
---|
2032 | 0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7,
|
---|
2033 | 0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2} },
|
---|
2034 | { "windows-1257", "CP 1257", 2257,
|
---|
2035 | { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,
|
---|
2036 | 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0x00A8, 0x02C7, 0x00B8,
|
---|
2037 | 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
---|
2038 | 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0x00AF, 0x02DB, 0xFFFD,
|
---|
2039 | 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0xFFFD, 0x00A6, 0x00A7,
|
---|
2040 | 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
|
---|
2041 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
---|
2042 | 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
|
---|
2043 | 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
|
---|
2044 | 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
|
---|
2045 | 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
|
---|
2046 | 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
|
---|
2047 | 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
|
---|
2048 | 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
|
---|
2049 | 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
|
---|
2050 | 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9} },
|
---|
2051 | { "windows-1258", "CP 1258", 2258,
|
---|
2052 | { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
|
---|
2053 | 0x02C6, 0x2030, 0xFFFD, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
2054 | 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
---|
2055 | 0x02DC, 0x2122, 0xFFFD, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
|
---|
2056 | 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
|
---|
2057 | 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
|
---|
2058 | 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
---|
2059 | 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
|
---|
2060 | 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
|
---|
2061 | 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300, 0x00CD, 0x00CE, 0x00CF,
|
---|
2062 | 0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7,
|
---|
2063 | 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF,
|
---|
2064 | 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
|
---|
2065 | 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED, 0x00EE, 0x00EF,
|
---|
2066 | 0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7,
|
---|
2067 | 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF} },
|
---|
2068 |
|
---|
2069 | { "Apple Roman", "Apple Roman", 0,
|
---|
2070 | { 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1,
|
---|
2071 | 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8,
|
---|
2072 | 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3,
|
---|
2073 | 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC,
|
---|
2074 | 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF,
|
---|
2075 | 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8,
|
---|
2076 | 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211,
|
---|
2077 | 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8,
|
---|
2078 | 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB,
|
---|
2079 | 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153,
|
---|
2080 | 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA,
|
---|
2081 | 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02,
|
---|
2082 | 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1,
|
---|
2083 | 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4,
|
---|
2084 | 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC,
|
---|
2085 | 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7} },
|
---|
2086 |
|
---|
2087 |
|
---|
2088 |
|
---|
2089 | // This one is based on the charmap file
|
---|
2090 | // /usr/share/i18n/charmaps/SAMI-WS2.gz, which is manually adapted
|
---|
2091 | // to this format by BÞrre Gaup <boerre@subdimension.com>
|
---|
2092 | { "WINSAMI2", "WS2", 0,
|
---|
2093 | { 0x20AC, 0xFFFD, 0x010C, 0x0192, 0x010D, 0x01B7, 0x0292, 0x01EE,
|
---|
2094 | 0x01EF, 0x0110, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
2095 | 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
|
---|
2096 | 0x0111, 0x01E6, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
|
---|
2097 | 0x00A0, 0x01E7, 0x01E4, 0x00A3, 0x00A4, 0x01E5, 0x00A6, 0x00A7,
|
---|
2098 | 0x00A8, 0x00A9, 0x021E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x021F,
|
---|
2099 | 0x00B0, 0x00B1, 0x01E8, 0x01E9, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
|
---|
2100 | 0x014A, 0x014B, 0x0166, 0x00BB, 0x0167, 0x00BD, 0x017D, 0x017E,
|
---|
2101 | 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
|
---|
2102 | 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
|
---|
2103 | 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
|
---|
2104 | 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
|
---|
2105 | 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
|
---|
2106 | 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
|
---|
2107 | 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
|
---|
2108 | 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },
|
---|
2109 |
|
---|
2110 |
|
---|
2111 | // this one is generated from the charmap file located in /usr/share/i18n/charmaps
|
---|
2112 | // on most Linux distributions. The thai character set tis620 is byte by byte equivalent
|
---|
2113 | // to iso8859-11, so we name it 8859-11 here, but recognise the name tis620 too.
|
---|
2114 |
|
---|
2115 | // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; ( cut -c25- < TIS-620 ; cat /tmp/digits ) | awk '/^x[89ABCDEF]/{ print $1, $2 }' | sed -e 's/<U/0x/' -e 's/>//' | sort | uniq -w4 | cut -c5- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/tis-620
|
---|
2116 | { "TIS-620", "ISO 8859-11", 2259, // Thai character set mib enum taken from tis620 (which is byte by byte equivalent)
|
---|
2117 | { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
2118 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
2119 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
2120 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
2121 | 0xFFFD, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
|
---|
2122 | 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
|
---|
2123 | 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
|
---|
2124 | 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
|
---|
2125 | 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
|
---|
2126 | 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
|
---|
2127 | 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
|
---|
2128 | 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,
|
---|
2129 | 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
|
---|
2130 | 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
|
---|
2131 | 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
|
---|
2132 | 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD } },
|
---|
2133 |
|
---|
2134 | /*
|
---|
2135 | Name: hp-roman8 [HP-PCL5,RFC1345,KXS2]
|
---|
2136 | MIBenum: 2004
|
---|
2137 | Source: LaserJet IIP Printer User's Manual,
|
---|
2138 | HP part no 33471-90901, Hewlet-Packard, June 1989.
|
---|
2139 | Alias: roman8
|
---|
2140 | Alias: r8
|
---|
2141 | Alias: csHPRoman8
|
---|
2142 | */
|
---|
2143 | { "Roman8", "HP-Roman8", 2004,
|
---|
2144 | { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
2145 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
2146 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
2147 | 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
|
---|
2148 | 0x00A0, 0x00C0, 0x00C2, 0x00C8, 0x00CA, 0x00CB, 0x00CE, 0x00CF,
|
---|
2149 | 0x00B4, 0x02CB, 0x02C6, 0x00A8, 0x02DC, 0x00D9, 0x00DB, 0x20A4,
|
---|
2150 | 0x00AF, 0x00DD, 0x00FD, 0x00B0, 0x00C7, 0x00E7, 0x00D1, 0x00F1,
|
---|
2151 | 0x00A1, 0x00BF, 0x00A4, 0x00A3, 0x00A5, 0x00A7, 0x0192, 0x00A2,
|
---|
2152 | 0x00E2, 0x00EA, 0x00F4, 0x00FB, 0x00E1, 0x00E9, 0x00F3, 0x00FA,
|
---|
2153 | 0x00E0, 0x00E8, 0x00F2, 0x00F9, 0x00E4, 0x00EB, 0x00F6, 0x00FC,
|
---|
2154 | 0x00C5, 0x00EE, 0x00D8, 0x00C6, 0x00E5, 0x00ED, 0x00F8, 0x00E6,
|
---|
2155 | 0x00C4, 0x00EC, 0x00D6, 0x00DC, 0x00C9, 0x00EF, 0x00DF, 0x00D4,
|
---|
2156 | 0x00C1, 0x00C3, 0x00E3, 0x00D0, 0x00F0, 0x00CD, 0x00CC, 0x00D3,
|
---|
2157 | 0x00D2, 0x00D5, 0x00F5, 0x0160, 0x0161, 0x00DA, 0x0178, 0x00FF,
|
---|
2158 | 0x00DE, 0x00FE, 0x00B7, 0x00B5, 0x00B6, 0x00BE, 0x2014, 0x00BC,
|
---|
2159 | 0x00BD, 0x00AA, 0x00BA, 0x00AB, 0x25A0, 0x00BB, 0x00B1, 0xFFFD } }
|
---|
2160 |
|
---|
2161 | // if you add more chacater sets at the end, change LAST_MIB above
|
---|
2162 | };
|
---|
2163 |
|
---|
2164 | QSimpleTextCodec::QSimpleTextCodec( int i )
|
---|
2165 | : QTextCodec(), forwardIndex( i )
|
---|
2166 | {
|
---|
2167 | #ifndef Q_WS_QWS
|
---|
2168 | reverseMap = 0;
|
---|
2169 | #endif
|
---|
2170 | }
|
---|
2171 |
|
---|
2172 |
|
---|
2173 | QSimpleTextCodec::~QSimpleTextCodec()
|
---|
2174 | {
|
---|
2175 | #ifndef Q_WS_QWS
|
---|
2176 | delete reverseMap;
|
---|
2177 | #else
|
---|
2178 | if ( reverseOwner == this ) {
|
---|
2179 | delete reverseMap;
|
---|
2180 | reverseMap = 0;
|
---|
2181 | reverseOwner = 0;
|
---|
2182 | }
|
---|
2183 | #endif
|
---|
2184 | }
|
---|
2185 |
|
---|
2186 | void QSimpleTextCodec::buildReverseMap()
|
---|
2187 | {
|
---|
2188 | #ifdef Q_WS_QWS
|
---|
2189 | if ( reverseOwner != this ) {
|
---|
2190 | int m = 0;
|
---|
2191 | int i = 0;
|
---|
2192 | while( i < 128 ) {
|
---|
2193 | if ( unicodevalues[forwardIndex].values[i] > m &&
|
---|
2194 | unicodevalues[forwardIndex].values[i] < 0xfffd )
|
---|
2195 | m = unicodevalues[forwardIndex].values[i];
|
---|
2196 | i++;
|
---|
2197 | }
|
---|
2198 | m++;
|
---|
2199 | if ( !reverseMap )
|
---|
2200 | reverseMap = new QMemArray<unsigned char>( m );
|
---|
2201 | if ( m > (int)(reverseMap->size()) )
|
---|
2202 | reverseMap->resize( m );
|
---|
2203 | for( i = 0; i < 128 && i < m; i++ )
|
---|
2204 | (*reverseMap)[i] = (char)i;
|
---|
2205 | for( ;i < m; i++ )
|
---|
2206 | (*reverseMap)[i] = 0;
|
---|
2207 | for( i=128; i<256; i++ ) {
|
---|
2208 | int u = unicodevalues[forwardIndex].values[i-128];
|
---|
2209 | if ( u < m )
|
---|
2210 | (*reverseMap)[u] = (char)(unsigned char)(i);
|
---|
2211 | }
|
---|
2212 | reverseOwner = this;
|
---|
2213 | }
|
---|
2214 | #else
|
---|
2215 | if ( !reverseMap ) {
|
---|
2216 | QMemArray<unsigned char> **map = &((QSimpleTextCodec *)this)->reverseMap;
|
---|
2217 | int m = 0;
|
---|
2218 | int i = 0;
|
---|
2219 | while( i < 128 ) {
|
---|
2220 | if ( unicodevalues[forwardIndex].values[i] > m &&
|
---|
2221 | unicodevalues[forwardIndex].values[i] < 0xfffd )
|
---|
2222 | m = unicodevalues[forwardIndex].values[i];
|
---|
2223 | i++;
|
---|
2224 | }
|
---|
2225 | m++;
|
---|
2226 | *map = new QMemArray<unsigned char>( m );
|
---|
2227 | for( i = 0; i < 128 && i < m; i++ )
|
---|
2228 | (**map)[i] = (char)i;
|
---|
2229 | for( ;i < m; i++ )
|
---|
2230 | (**map)[i] = 0;
|
---|
2231 | for( i=128; i<256; i++ ) {
|
---|
2232 | int u = unicodevalues[forwardIndex].values[i-128];
|
---|
2233 | if ( u < m )
|
---|
2234 | (**map)[u] = (char)(unsigned char)(i);
|
---|
2235 | }
|
---|
2236 | }
|
---|
2237 | #endif
|
---|
2238 | }
|
---|
2239 |
|
---|
2240 | QString QSimpleTextCodec::toUnicode(const char* chars, int len) const
|
---|
2241 | {
|
---|
2242 | if ( len <= 0 || chars == 0 )
|
---|
2243 | return QString::null;
|
---|
2244 |
|
---|
2245 | const unsigned char * c = (const unsigned char *)chars;
|
---|
2246 | int i;
|
---|
2247 |
|
---|
2248 | for ( i = 0; i < len; i++ )
|
---|
2249 | if ( c[i] == '\0' ) {
|
---|
2250 | len = i;
|
---|
2251 | break;
|
---|
2252 | }
|
---|
2253 |
|
---|
2254 | QString r;
|
---|
2255 | r.setUnicode(0, len);
|
---|
2256 | QChar* uc = (QChar*)r.unicode(); // const_cast
|
---|
2257 |
|
---|
2258 | for ( i = 0; i < len; i++ ) {
|
---|
2259 | if ( c[i] > 127 )
|
---|
2260 | uc[i] = unicodevalues[forwardIndex].values[c[i]-128];
|
---|
2261 | else
|
---|
2262 | uc[i] = c[i];
|
---|
2263 | }
|
---|
2264 | return r;
|
---|
2265 | }
|
---|
2266 |
|
---|
2267 |
|
---|
2268 | QCString QSimpleTextCodec::fromUnicode(const QString& uc, int& len ) const
|
---|
2269 | {
|
---|
2270 | #ifdef Q_WS_QWS
|
---|
2271 | if ( this != reverseOwner )
|
---|
2272 | #else
|
---|
2273 | if ( !reverseMap )
|
---|
2274 | #endif
|
---|
2275 | ((QSimpleTextCodec *)this)->buildReverseMap();
|
---|
2276 |
|
---|
2277 | if ( len <0 || len > (int)uc.length() )
|
---|
2278 | len = uc.length();
|
---|
2279 | QCString r( len+1 );
|
---|
2280 | int i = len;
|
---|
2281 | int u;
|
---|
2282 | const QChar* ucp = uc.unicode();
|
---|
2283 | unsigned char* rp = (unsigned char *)r.data();
|
---|
2284 | unsigned char* rmp = reverseMap->data();
|
---|
2285 | int rmsize = (int) reverseMap->size();
|
---|
2286 | while( i-- )
|
---|
2287 | {
|
---|
2288 | u = ucp->unicode();
|
---|
2289 | *rp = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : '?' );
|
---|
2290 | if ( *rp == 0 ) *rp = '?';
|
---|
2291 | rp++;
|
---|
2292 | ucp++;
|
---|
2293 | }
|
---|
2294 | r[len] = 0;
|
---|
2295 | return r;
|
---|
2296 | }
|
---|
2297 |
|
---|
2298 | void QSimpleTextCodec::fromUnicode( const QChar *in, unsigned short *out, int length ) const
|
---|
2299 | {
|
---|
2300 | #ifdef Q_WS_QWS
|
---|
2301 | if ( this != reverseOwner )
|
---|
2302 | #else
|
---|
2303 | if ( !reverseMap )
|
---|
2304 | #endif
|
---|
2305 | ((QSimpleTextCodec *)this)->buildReverseMap();
|
---|
2306 |
|
---|
2307 | unsigned char* rmp = reverseMap->data();
|
---|
2308 | int rmsize = (int) reverseMap->size();
|
---|
2309 | while ( length-- ) {
|
---|
2310 | unsigned short u = in->unicode();
|
---|
2311 | *out = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : 0 );
|
---|
2312 | ++in;
|
---|
2313 | ++out;
|
---|
2314 | }
|
---|
2315 | }
|
---|
2316 |
|
---|
2317 | unsigned short QSimpleTextCodec::characterFromUnicode(const QString &str, int pos) const
|
---|
2318 | {
|
---|
2319 | #ifdef Q_WS_QWS
|
---|
2320 | if ( this != reverseOwner )
|
---|
2321 | #else
|
---|
2322 | if ( !reverseMap )
|
---|
2323 | #endif
|
---|
2324 | ((QSimpleTextCodec *)this)->buildReverseMap();
|
---|
2325 |
|
---|
2326 | unsigned short u = str[pos].unicode();
|
---|
2327 | unsigned char* rmp = reverseMap->data();
|
---|
2328 | int rmsize = (int) reverseMap->size();
|
---|
2329 | return u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : 0 );
|
---|
2330 | }
|
---|
2331 |
|
---|
2332 | bool QSimpleTextCodec::canEncode( QChar ch ) const
|
---|
2333 | {
|
---|
2334 | #ifdef Q_WS_QWS
|
---|
2335 | if ( this != reverseOwner )
|
---|
2336 | #else
|
---|
2337 | if ( !reverseMap )
|
---|
2338 | #endif
|
---|
2339 | ((QSimpleTextCodec *)this)->buildReverseMap();
|
---|
2340 |
|
---|
2341 | unsigned short u = ch.unicode();
|
---|
2342 | unsigned char* rmp = reverseMap->data();
|
---|
2343 | int rmsize = (int) reverseMap->size();
|
---|
2344 | return u < 128 ? TRUE : (( u < rmsize ) ? (*(rmp+u) != 0) : FALSE );
|
---|
2345 | }
|
---|
2346 |
|
---|
2347 | const char* QSimpleTextCodec::name() const
|
---|
2348 | {
|
---|
2349 | return unicodevalues[forwardIndex].cs;
|
---|
2350 | }
|
---|
2351 |
|
---|
2352 | const char* QSimpleTextCodec::mimeName() const
|
---|
2353 | {
|
---|
2354 | return unicodevalues[forwardIndex].mime;
|
---|
2355 | }
|
---|
2356 |
|
---|
2357 |
|
---|
2358 | int QSimpleTextCodec::mibEnum() const
|
---|
2359 | {
|
---|
2360 | return unicodevalues[forwardIndex].mib;
|
---|
2361 | }
|
---|
2362 |
|
---|
2363 | int QSimpleTextCodec::heuristicNameMatch(const char* hint) const
|
---|
2364 | {
|
---|
2365 | if ( qstricmp( hint, mimeName() ) == 0 )
|
---|
2366 | return 10000; // return a large value
|
---|
2367 | if ( hint[0]=='k' ) {
|
---|
2368 | QCString lhint = QCString(hint).lower();
|
---|
2369 | // Help people with messy fonts
|
---|
2370 | if ( lhint == "koi8-1" )
|
---|
2371 | return QTextCodec::heuristicNameMatch("koi8-r")-1;
|
---|
2372 | if ( lhint == "koi8-ru" )
|
---|
2373 | return QTextCodec::heuristicNameMatch("koi8-r")-1;
|
---|
2374 | } else if ( hint[0] == 't' && mibEnum() == 2259 /* iso8859-11 */ ) {
|
---|
2375 | // 8859-11 and tis620 are byte by byte equivalent
|
---|
2376 | int i = simpleHeuristicNameMatch("tis620-0", hint);
|
---|
2377 | if( !i )
|
---|
2378 | i = simpleHeuristicNameMatch("tis-620", hint);
|
---|
2379 | if( i ) return i;
|
---|
2380 | } else if ( mibEnum() == 82 /* ISO 8859-6 */ ) {
|
---|
2381 | int i = simpleHeuristicNameMatch("ISO 8859-6-I", hint);
|
---|
2382 | if ( i )
|
---|
2383 | return i;
|
---|
2384 | }
|
---|
2385 | return QTextCodec::heuristicNameMatch(hint);
|
---|
2386 | }
|
---|
2387 |
|
---|
2388 | int QSimpleTextCodec::heuristicContentMatch(const char* chars, int len) const
|
---|
2389 | {
|
---|
2390 | if ( len<1 || !chars )
|
---|
2391 | return -1;
|
---|
2392 | int i = 0;
|
---|
2393 | const uchar * c = (const unsigned char *)chars;
|
---|
2394 | int r = 0;
|
---|
2395 | while( i<len && c && *c ) {
|
---|
2396 | if ( *c >= 128 ) {
|
---|
2397 | if ( unicodevalues[forwardIndex].values[(*c)-128] == 0xfffd )
|
---|
2398 | return -1;
|
---|
2399 | }
|
---|
2400 | if ( (*c >= ' ' && *c < 127) ||
|
---|
2401 | *c == '\n' || *c == '\t' || *c == '\r' )
|
---|
2402 | r++;
|
---|
2403 | i++;
|
---|
2404 | c++;
|
---|
2405 | }
|
---|
2406 | if ( mibEnum()==4 )
|
---|
2407 | r+=1;
|
---|
2408 | return r;
|
---|
2409 | }
|
---|
2410 |
|
---|
2411 | #endif
|
---|
2412 |
|
---|
2413 | class QLatin1Codec : public QTextCodec
|
---|
2414 | {
|
---|
2415 | public:
|
---|
2416 | #if !defined(Q_NO_USING_KEYWORD)
|
---|
2417 | using QTextCodec::fromUnicode;
|
---|
2418 | using QTextCodec::toUnicode;
|
---|
2419 | #endif
|
---|
2420 | QString toUnicode(const char* chars, int len) const;
|
---|
2421 | QCString fromUnicode(const QString& uc, int& lenInOut ) const;
|
---|
2422 | void fromUnicode( const QChar *in, unsigned short *out, int length ) const;
|
---|
2423 | unsigned short characterFromUnicode(const QString &str, int pos) const;
|
---|
2424 |
|
---|
2425 | const char* name() const;
|
---|
2426 | const char* mimeName() const;
|
---|
2427 | int mibEnum() const;
|
---|
2428 |
|
---|
2429 | int heuristicContentMatch(const char* chars, int len) const;
|
---|
2430 |
|
---|
2431 | private:
|
---|
2432 | int forwardIndex;
|
---|
2433 | };
|
---|
2434 |
|
---|
2435 |
|
---|
2436 | QString QLatin1Codec::toUnicode(const char* chars, int len) const
|
---|
2437 | {
|
---|
2438 | if ( chars == 0 )
|
---|
2439 | return QString::null;
|
---|
2440 |
|
---|
2441 | return QString::fromLatin1(chars, len);
|
---|
2442 | }
|
---|
2443 |
|
---|
2444 |
|
---|
2445 | QCString QLatin1Codec::fromUnicode(const QString& uc, int& len ) const
|
---|
2446 | {
|
---|
2447 | if ( len <0 || len > (int)uc.length() )
|
---|
2448 | len = uc.length();
|
---|
2449 | QCString r( len+1 );
|
---|
2450 | char *d = r.data();
|
---|
2451 | int i = 0;
|
---|
2452 | const QChar *ch = uc.unicode();
|
---|
2453 | while ( i < len ) {
|
---|
2454 | d[i] = ch->row() ? '?' : ch->cell();
|
---|
2455 | i++;
|
---|
2456 | ch++;
|
---|
2457 | }
|
---|
2458 | r[len] = 0;
|
---|
2459 | return r;
|
---|
2460 | }
|
---|
2461 |
|
---|
2462 | void QLatin1Codec::fromUnicode( const QChar *in, unsigned short *out, int length ) const
|
---|
2463 | {
|
---|
2464 | while ( length-- ) {
|
---|
2465 | *out = in->row() ? 0 : in->cell();
|
---|
2466 | ++in;
|
---|
2467 | ++out;
|
---|
2468 | }
|
---|
2469 | }
|
---|
2470 |
|
---|
2471 | unsigned short QLatin1Codec::characterFromUnicode(const QString &str, int pos) const
|
---|
2472 | {
|
---|
2473 | const QChar *ch = str.unicode() + pos;
|
---|
2474 | if (ch->row())
|
---|
2475 | return 0;
|
---|
2476 | return (unsigned short) ch->cell();
|
---|
2477 | }
|
---|
2478 |
|
---|
2479 |
|
---|
2480 | const char* QLatin1Codec::name() const
|
---|
2481 | {
|
---|
2482 | return "ISO 8859-1";
|
---|
2483 | }
|
---|
2484 |
|
---|
2485 | const char* QLatin1Codec::mimeName() const
|
---|
2486 | {
|
---|
2487 | return "ISO-8859-1";
|
---|
2488 | }
|
---|
2489 |
|
---|
2490 |
|
---|
2491 | int QLatin1Codec::mibEnum() const
|
---|
2492 | {
|
---|
2493 | return 4;
|
---|
2494 | }
|
---|
2495 |
|
---|
2496 | int QLatin1Codec::heuristicContentMatch(const char* chars, int len) const
|
---|
2497 | {
|
---|
2498 | if ( len<1 || !chars )
|
---|
2499 | return -1;
|
---|
2500 | int i = 0;
|
---|
2501 | const uchar * c = (const unsigned char *)chars;
|
---|
2502 | int r = 0;
|
---|
2503 | while( i<len && c && *c ) {
|
---|
2504 | if ( *c >= 0x80 && *c < 0xa0 )
|
---|
2505 | return -1;
|
---|
2506 | if ( (*c >= ' ' && *c < 127) ||
|
---|
2507 | *c == '\n' || *c == '\t' || *c == '\r' )
|
---|
2508 | r++;
|
---|
2509 | i++;
|
---|
2510 | c++;
|
---|
2511 | }
|
---|
2512 | if ( this == (const QTextCodec *)codecForLocale() )
|
---|
2513 | r += 5;
|
---|
2514 | return r;
|
---|
2515 | }
|
---|
2516 |
|
---|
2517 | class QLatin15Codec: public QLatin1Codec
|
---|
2518 | {
|
---|
2519 | public:
|
---|
2520 | QString toUnicode(const char* chars, int len) const;
|
---|
2521 | #if !defined(Q_NO_USING_KEYWORD)
|
---|
2522 | using QTextCodec::fromUnicode;
|
---|
2523 | #endif
|
---|
2524 | QCString fromUnicode(const QString& uc, int& lenInOut ) const;
|
---|
2525 | void fromUnicode( const QChar *in, unsigned short *out, int length ) const;
|
---|
2526 | unsigned short characterFromUnicode(const QString &str, int pos) const;
|
---|
2527 |
|
---|
2528 | const char* name() const;
|
---|
2529 | const char* mimeName() const;
|
---|
2530 | int mibEnum() const;
|
---|
2531 |
|
---|
2532 | private:
|
---|
2533 | int forwardIndex;
|
---|
2534 | };
|
---|
2535 |
|
---|
2536 |
|
---|
2537 | QString QLatin15Codec::toUnicode(const char* chars, int len) const
|
---|
2538 | {
|
---|
2539 | if ( chars == 0 )
|
---|
2540 | return QString::null;
|
---|
2541 |
|
---|
2542 | QString str = QString::fromLatin1(chars, len);
|
---|
2543 | QChar *uc = (QChar *)str.unicode();
|
---|
2544 | while( len-- ) {
|
---|
2545 | switch( uc->unicode() ) {
|
---|
2546 | case 0xa4:
|
---|
2547 | *uc = 0x20ac;
|
---|
2548 | break;
|
---|
2549 | case 0xa6:
|
---|
2550 | *uc = 0x0160;
|
---|
2551 | break;
|
---|
2552 | case 0xa8:
|
---|
2553 | *uc = 0x0161;
|
---|
2554 | break;
|
---|
2555 | case 0xb4:
|
---|
2556 | *uc = 0x017d;
|
---|
2557 | break;
|
---|
2558 | case 0xb8:
|
---|
2559 | *uc = 0x017e;
|
---|
2560 | break;
|
---|
2561 | case 0xbc:
|
---|
2562 | *uc = 0x0152;
|
---|
2563 | break;
|
---|
2564 | case 0xbd:
|
---|
2565 | *uc = 0x0153;
|
---|
2566 | break;
|
---|
2567 | case 0xbe:
|
---|
2568 | *uc = 0x0178;
|
---|
2569 | break;
|
---|
2570 | default:
|
---|
2571 | break;
|
---|
2572 | }
|
---|
2573 | uc++;
|
---|
2574 | }
|
---|
2575 | return str;
|
---|
2576 | }
|
---|
2577 |
|
---|
2578 | static inline unsigned char
|
---|
2579 | latin15CharFromUnicode( unsigned short uc, bool replacement = TRUE )
|
---|
2580 | {
|
---|
2581 | uchar c;
|
---|
2582 | if ( uc < 0x0100 ) {
|
---|
2583 | if ( uc > 0xa3 && uc < 0xbf ) {
|
---|
2584 | switch( uc ) {
|
---|
2585 | case 0xa4:
|
---|
2586 | case 0xa6:
|
---|
2587 | case 0xa8:
|
---|
2588 | case 0xb4:
|
---|
2589 | case 0xb8:
|
---|
2590 | case 0xbc:
|
---|
2591 | case 0xbd:
|
---|
2592 | case 0xbe:
|
---|
2593 | c = replacement ? '?' : 0;
|
---|
2594 | break;
|
---|
2595 | default:
|
---|
2596 | c = (unsigned char) uc;
|
---|
2597 | break;
|
---|
2598 | }
|
---|
2599 | } else {
|
---|
2600 | c = (unsigned char) uc;
|
---|
2601 | }
|
---|
2602 | } else {
|
---|
2603 | if ( uc == 0x20ac )
|
---|
2604 | c = 0xa4;
|
---|
2605 | else if ( (uc & 0xff00) == 0x0100 ) {
|
---|
2606 | switch( uc ) {
|
---|
2607 | case 0x0160:
|
---|
2608 | c = 0xa6;
|
---|
2609 | break;
|
---|
2610 | case 0x0161:
|
---|
2611 | c = 0xa8;
|
---|
2612 | break;
|
---|
2613 | case 0x017d:
|
---|
2614 | c = 0xb4;
|
---|
2615 | break;
|
---|
2616 | case 0x017e:
|
---|
2617 | c = 0xb8;
|
---|
2618 | break;
|
---|
2619 | case 0x0152:
|
---|
2620 | c = 0xbc;
|
---|
2621 | break;
|
---|
2622 | case 0x0153:
|
---|
2623 | c = 0xbd;
|
---|
2624 | break;
|
---|
2625 | case 0x0178:
|
---|
2626 | c = 0xbe;
|
---|
2627 | break;
|
---|
2628 | default:
|
---|
2629 | c = replacement ? '?' : 0;
|
---|
2630 | }
|
---|
2631 | } else {
|
---|
2632 | c = replacement ? '?' : 0;
|
---|
2633 | }
|
---|
2634 | }
|
---|
2635 | return c;
|
---|
2636 | }
|
---|
2637 |
|
---|
2638 |
|
---|
2639 | void QLatin15Codec::fromUnicode( const QChar *in, unsigned short *out, int length ) const
|
---|
2640 | {
|
---|
2641 | while ( length-- ) {
|
---|
2642 | *out = latin15CharFromUnicode( in->unicode(), FALSE );
|
---|
2643 | ++in;
|
---|
2644 | ++out;
|
---|
2645 | }
|
---|
2646 | }
|
---|
2647 |
|
---|
2648 |
|
---|
2649 | QCString QLatin15Codec::fromUnicode(const QString& uc, int& len ) const
|
---|
2650 | {
|
---|
2651 | if ( len <0 || len > (int)uc.length() )
|
---|
2652 | len = uc.length();
|
---|
2653 | QCString r( len+1 );
|
---|
2654 | char *d = r.data();
|
---|
2655 | int i = 0;
|
---|
2656 | const QChar *ch = uc.unicode();
|
---|
2657 | while ( i < len ) {
|
---|
2658 | d[i] = latin15CharFromUnicode( ch->unicode() );
|
---|
2659 | i++;
|
---|
2660 | ch++;
|
---|
2661 | }
|
---|
2662 | r[len] = 0;
|
---|
2663 | return r;
|
---|
2664 | }
|
---|
2665 |
|
---|
2666 | unsigned short QLatin15Codec::characterFromUnicode(const QString &str, int pos) const
|
---|
2667 | {
|
---|
2668 | return latin15CharFromUnicode( str.unicode()[pos].unicode(), FALSE );
|
---|
2669 | }
|
---|
2670 |
|
---|
2671 |
|
---|
2672 | const char* QLatin15Codec::name() const
|
---|
2673 | {
|
---|
2674 | return "ISO 8859-15";
|
---|
2675 | }
|
---|
2676 |
|
---|
2677 | const char* QLatin15Codec::mimeName() const
|
---|
2678 | {
|
---|
2679 | return "ISO-8859-15";
|
---|
2680 | }
|
---|
2681 |
|
---|
2682 |
|
---|
2683 | int QLatin15Codec::mibEnum() const
|
---|
2684 | {
|
---|
2685 | return 111;
|
---|
2686 | }
|
---|
2687 |
|
---|
2688 |
|
---|
2689 | /* the next two functions are implicitely thread safe,
|
---|
2690 | as they are only called by setup() which uses a mutex.
|
---|
2691 | */
|
---|
2692 | static void setupLocaleMapper()
|
---|
2693 | {
|
---|
2694 | #ifdef Q_OS_WIN32
|
---|
2695 | localeMapper = QTextCodec::codecForName( "System" );
|
---|
2696 | #else
|
---|
2697 |
|
---|
2698 | #if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF)
|
---|
2699 | char *charset = nl_langinfo (CODESET);
|
---|
2700 | if ( charset )
|
---|
2701 | localeMapper = QTextCodec::codecForName( charset );
|
---|
2702 | #endif
|
---|
2703 |
|
---|
2704 | if ( !localeMapper ) {
|
---|
2705 | // Very poorly defined and followed standards causes lots of code
|
---|
2706 | // to try to get all the cases...
|
---|
2707 |
|
---|
2708 | // Try to determine locale codeset from locale name assigned to
|
---|
2709 | // LC_CTYPE category.
|
---|
2710 |
|
---|
2711 | // First part is getting that locale name. First try setlocale() which
|
---|
2712 | // definitely knows it, but since we cannot fully trust it, get ready
|
---|
2713 | // to fall back to environment variables.
|
---|
2714 | char * ctype = qstrdup( setlocale( LC_CTYPE, 0 ) );
|
---|
2715 |
|
---|
2716 | // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG
|
---|
2717 | // environment variables.
|
---|
2718 | char * lang = qstrdup( getenv("LC_ALL") );
|
---|
2719 | if ( !lang || lang[0] == 0 || strcmp( lang, "C" ) == 0 ) {
|
---|
2720 | if ( lang ) delete [] lang;
|
---|
2721 | lang = qstrdup( getenv("LC_CTYPE") );
|
---|
2722 | }
|
---|
2723 | if ( !lang || lang[0] == 0 || strcmp( lang, "C" ) == 0 ) {
|
---|
2724 | if ( lang ) delete [] lang;
|
---|
2725 | lang = qstrdup( getenv("LANG") );
|
---|
2726 | }
|
---|
2727 |
|
---|
2728 | // Now try these in order:
|
---|
2729 | // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
|
---|
2730 | // 2. CODESET from lang if it contains a .CODESET part
|
---|
2731 | // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
|
---|
2732 | // 4. locale (ditto)
|
---|
2733 | // 5. check for "@euro"
|
---|
2734 | // 6. guess locale from ctype unless ctype is "C"
|
---|
2735 | // 7. guess locale from lang
|
---|
2736 |
|
---|
2737 | // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
|
---|
2738 | char * codeset = ctype ? strchr( ctype, '.' ) : 0;
|
---|
2739 | if ( codeset && *codeset == '.' )
|
---|
2740 | localeMapper = QTextCodec::codecForName( codeset + 1 );
|
---|
2741 |
|
---|
2742 | // 2. CODESET from lang if it contains a .CODESET part
|
---|
2743 | codeset = lang ? strchr( lang, '.' ) : 0;
|
---|
2744 | if ( !localeMapper && codeset && *codeset == '.' )
|
---|
2745 | localeMapper = QTextCodec::codecForName( codeset + 1 );
|
---|
2746 |
|
---|
2747 | // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
|
---|
2748 | if ( !localeMapper && ctype && *ctype != 0 && strcmp (ctype, "C") != 0 )
|
---|
2749 | localeMapper = QTextCodec::codecForName( ctype );
|
---|
2750 |
|
---|
2751 | // 4. locale (ditto)
|
---|
2752 | if ( !localeMapper && lang && *lang != 0 )
|
---|
2753 | localeMapper = QTextCodec::codecForName( lang );
|
---|
2754 |
|
---|
2755 | // 5. "@euro"
|
---|
2756 | if ( ctype && strstr( ctype, "@euro" ) || lang && strstr( lang, "@euro" ) )
|
---|
2757 | localeMapper = QTextCodec::codecForName( "ISO 8859-15" );
|
---|
2758 |
|
---|
2759 | // 6. guess locale from ctype unless ctype is "C"
|
---|
2760 | // 7. guess locale from lang
|
---|
2761 | char * try_by_name = ctype;
|
---|
2762 | if ( ctype && *ctype != 0 && strcmp (ctype, "C") != 0 )
|
---|
2763 | try_by_name = lang;
|
---|
2764 |
|
---|
2765 | // Now do the guessing.
|
---|
2766 | if ( lang && *lang && !localeMapper && try_by_name && *try_by_name ) {
|
---|
2767 | if ( try_locale_list( iso8859_15locales, lang ) )
|
---|
2768 | localeMapper = QTextCodec::codecForName( "ISO 8859-15" );
|
---|
2769 | else if ( try_locale_list( iso8859_2locales, lang ) )
|
---|
2770 | localeMapper = QTextCodec::codecForName( "ISO 8859-2" );
|
---|
2771 | else if ( try_locale_list( iso8859_3locales, lang ) )
|
---|
2772 | localeMapper = QTextCodec::codecForName( "ISO 8859-3" );
|
---|
2773 | else if ( try_locale_list( iso8859_4locales, lang ) )
|
---|
2774 | localeMapper = QTextCodec::codecForName( "ISO 8859-4" );
|
---|
2775 | else if ( try_locale_list( iso8859_5locales, lang ) )
|
---|
2776 | localeMapper = QTextCodec::codecForName( "ISO 8859-5" );
|
---|
2777 | else if ( try_locale_list( iso8859_6locales, lang ) )
|
---|
2778 | localeMapper = QTextCodec::codecForName( "ISO 8859-6" );
|
---|
2779 | else if ( try_locale_list( iso8859_7locales, lang ) )
|
---|
2780 | localeMapper = QTextCodec::codecForName( "ISO 8859-7" );
|
---|
2781 | else if ( try_locale_list( iso8859_8locales, lang ) )
|
---|
2782 | localeMapper = QTextCodec::codecForName( "ISO 8859-8-I" );
|
---|
2783 | else if ( try_locale_list( iso8859_9locales, lang ) )
|
---|
2784 | localeMapper = QTextCodec::codecForName( "ISO 8859-9" );
|
---|
2785 | else if ( try_locale_list( iso8859_13locales, lang ) )
|
---|
2786 | localeMapper = QTextCodec::codecForName( "ISO 8859-13" );
|
---|
2787 | else if ( try_locale_list( tis_620locales, lang ) )
|
---|
2788 | localeMapper = QTextCodec::codecForName( "ISO 8859-11" );
|
---|
2789 | else if ( try_locale_list( koi8_ulocales, lang ) )
|
---|
2790 | localeMapper = QTextCodec::codecForName( "KOI8-U" );
|
---|
2791 | else if ( try_locale_list( cp_1251locales, lang ) )
|
---|
2792 | localeMapper = QTextCodec::codecForName( "CP 1251" );
|
---|
2793 | else if ( try_locale_list( pt_154locales, lang ) )
|
---|
2794 | localeMapper = QTextCodec::codecForName( "PT 154" );
|
---|
2795 | else if ( try_locale_list( probably_koi8_rlocales, lang ) )
|
---|
2796 | localeMapper = ru_RU_hack( lang );
|
---|
2797 | }
|
---|
2798 |
|
---|
2799 | delete [] ctype;
|
---|
2800 | delete [] lang;
|
---|
2801 | }
|
---|
2802 | if ( localeMapper && localeMapper->mibEnum() == 11 )
|
---|
2803 | localeMapper = QTextCodec::codecForName( "ISO 8859-8-I" );
|
---|
2804 |
|
---|
2805 | // If everything failed, we default to 8859-1
|
---|
2806 | // We could perhaps default to 8859-15.
|
---|
2807 | if ( !localeMapper )
|
---|
2808 | localeMapper = QTextCodec::codecForName( "ISO 8859-1" );
|
---|
2809 | #endif
|
---|
2810 | }
|
---|
2811 |
|
---|
2812 |
|
---|
2813 | static void realSetup()
|
---|
2814 | {
|
---|
2815 | #if defined(QT_CHECK_STATE)
|
---|
2816 | if ( destroying_is_ok )
|
---|
2817 | qWarning( "QTextCodec: creating new codec during codec cleanup!" );
|
---|
2818 | #endif
|
---|
2819 | all = new QValueList<QTextCodec*>;
|
---|
2820 |
|
---|
2821 | (void)new QLatin1Codec;
|
---|
2822 | (void)new QLatin15Codec;
|
---|
2823 | (void)new QUtf8Codec;
|
---|
2824 | (void)new QUtf16Codec;
|
---|
2825 |
|
---|
2826 | #ifndef QT_NO_CODECS
|
---|
2827 | int i = 0;
|
---|
2828 | do {
|
---|
2829 | (void)new QSimpleTextCodec( i );
|
---|
2830 | } while( unicodevalues[i++].mib != LAST_MIB );
|
---|
2831 |
|
---|
2832 | (void)new QTsciiCodec;
|
---|
2833 |
|
---|
2834 | for (i = 0; i < 9; ++i) {
|
---|
2835 | (void)new QIsciiCodec(i);
|
---|
2836 | }
|
---|
2837 | #endif // QT_NO_CODECS
|
---|
2838 | #ifndef QT_NO_CODEC_HEBREW
|
---|
2839 | (void)new QHebrewCodec;
|
---|
2840 | #endif
|
---|
2841 | #ifndef QT_NO_BIG_CODECS
|
---|
2842 | (void)new QBig5Codec;
|
---|
2843 | (void)new QBig5hkscsCodec;
|
---|
2844 | (void)new QEucJpCodec;
|
---|
2845 | (void)new QEucKrCodec;
|
---|
2846 | (void)new QGb2312Codec;
|
---|
2847 | (void)new QGbkCodec;
|
---|
2848 | (void)new QGb18030Codec;
|
---|
2849 | (void)new QJisCodec;
|
---|
2850 | (void)new QSjisCodec;
|
---|
2851 | #endif // QT_NO_BIG_CODECS
|
---|
2852 |
|
---|
2853 | #ifdef Q_OS_WIN32
|
---|
2854 | (void) new QWindowsLocalCodec;
|
---|
2855 | #endif // Q_OS_WIN32
|
---|
2856 |
|
---|
2857 | if ( !localeMapper )
|
---|
2858 | setupLocaleMapper();
|
---|
2859 | }
|
---|
2860 |
|
---|
2861 | void QTextCodec::fromUnicodeInternal( const QChar *in, unsigned short *out, int length )
|
---|
2862 | {
|
---|
2863 | switch( mibEnum() ) {
|
---|
2864 | #ifndef QT_NO_CODECS
|
---|
2865 | case 2084:
|
---|
2866 | case 2088:
|
---|
2867 | case 5:
|
---|
2868 | case 6:
|
---|
2869 | case 7:
|
---|
2870 | case 8:
|
---|
2871 | case 82:
|
---|
2872 | case 10:
|
---|
2873 | case 85:
|
---|
2874 | case 12:
|
---|
2875 | case 13:
|
---|
2876 | case 109:
|
---|
2877 | case 110:
|
---|
2878 | case 2004:
|
---|
2879 | case 2009:
|
---|
2880 | case 2086:
|
---|
2881 | case 2250:
|
---|
2882 | case 2251:
|
---|
2883 | case 2252:
|
---|
2884 | case 2253:
|
---|
2885 | case 2254:
|
---|
2886 | case 2255:
|
---|
2887 | case 2256:
|
---|
2888 | case 2257:
|
---|
2889 | case 2258:
|
---|
2890 | case 2259:
|
---|
2891 | ((QSimpleTextCodec *)this)->fromUnicode( in, out, length );
|
---|
2892 | break;
|
---|
2893 |
|
---|
2894 | #if !defined(QT_NO_BIG_CODECS) && defined(Q_WS_X11)
|
---|
2895 | // the QFont*Codecs are only used on X11
|
---|
2896 |
|
---|
2897 | case 15:
|
---|
2898 | ((QFontJis0201Codec *) this)->fromUnicode( in, out, length );
|
---|
2899 | break;
|
---|
2900 |
|
---|
2901 | case 63:
|
---|
2902 | ((QFontJis0208Codec *) this)->fromUnicode( in, out, length );
|
---|
2903 | break;
|
---|
2904 |
|
---|
2905 | case 36:
|
---|
2906 | ((QFontKsc5601Codec *) this)->fromUnicode( in, out, length );
|
---|
2907 | break;
|
---|
2908 |
|
---|
2909 | case 57:
|
---|
2910 | ((QFontGb2312Codec *) this)->fromUnicode( in, out, length );
|
---|
2911 | break;
|
---|
2912 |
|
---|
2913 | case -113:
|
---|
2914 | ((QFontGbkCodec *) this)->fromUnicode( in, out, length );
|
---|
2915 | break;
|
---|
2916 |
|
---|
2917 | case -114:
|
---|
2918 | ((QFontGb18030_0Codec *) this)->fromUnicode( in, out, length );
|
---|
2919 | break;
|
---|
2920 |
|
---|
2921 | case -2026:
|
---|
2922 | ((QFontBig5Codec *) this)->fromUnicode( in, out, length );
|
---|
2923 | break;
|
---|
2924 |
|
---|
2925 | case -2101:
|
---|
2926 | ((QFontBig5hkscsCodec *) this)->fromUnicode( in, out, length );
|
---|
2927 | break;
|
---|
2928 |
|
---|
2929 | case -4242:
|
---|
2930 | ((QFontLaoCodec *) this)->fromUnicode( in, out, length );
|
---|
2931 | break;
|
---|
2932 | #endif
|
---|
2933 | #endif // QT_NO_CODECS
|
---|
2934 |
|
---|
2935 | case 4:
|
---|
2936 | ((QLatin1Codec *) this)->fromUnicode( in, out, length );
|
---|
2937 | break;
|
---|
2938 |
|
---|
2939 | case 111:
|
---|
2940 | ((QLatin15Codec *) this)->fromUnicode( in, out, length );
|
---|
2941 | break;
|
---|
2942 |
|
---|
2943 | default:
|
---|
2944 | {
|
---|
2945 | QConstString string( in, length );
|
---|
2946 | QString str = string.string();
|
---|
2947 | for ( int i = 0; i < length; i++ )
|
---|
2948 | out[i] = characterFromUnicode( str, i );
|
---|
2949 | }
|
---|
2950 | }
|
---|
2951 | }
|
---|
2952 |
|
---|
2953 |
|
---|
2954 | /*!
|
---|
2955 | \fn QTextCodec* QTextCodec::codecForTr()
|
---|
2956 |
|
---|
2957 | Returns the codec used by QObject::tr() on its argument. If this
|
---|
2958 | function returns 0 (the default), tr() assumes Latin-1.
|
---|
2959 |
|
---|
2960 | \sa setCodecForTr()
|
---|
2961 | */
|
---|
2962 |
|
---|
2963 | /*!
|
---|
2964 | \fn void QTextCodec::setCodecForTr(QTextCodec *c)
|
---|
2965 | \nonreentrant
|
---|
2966 |
|
---|
2967 | Sets the codec used by QObject::tr() on its argument to \a c. If
|
---|
2968 | \a c is 0 (the default), tr() assumes Latin-1.
|
---|
2969 |
|
---|
2970 | If the literal quoted text in the program is not in the Latin-1
|
---|
2971 | encoding, this function can be used to set the appropriate
|
---|
2972 | encoding. For example, software developed by Korean programmers
|
---|
2973 | might use eucKR for all the text in the program, in which case the
|
---|
2974 | main() function might look like this:
|
---|
2975 |
|
---|
2976 | \code
|
---|
2977 | int main(int argc, char** argv)
|
---|
2978 | {
|
---|
2979 | QApplication app(argc, argv);
|
---|
2980 | ... install any additional codecs ...
|
---|
2981 | QTextCodec::setCodecForTr( QTextCodec::codecForName("eucKR") );
|
---|
2982 | ...
|
---|
2983 | }
|
---|
2984 | \endcode
|
---|
2985 |
|
---|
2986 | Note that this is not the way to select the encoding that the \e
|
---|
2987 | user has chosen. For example, to convert an application containing
|
---|
2988 | literal English strings to Korean, all that is needed is for the
|
---|
2989 | English strings to be passed through tr() and for translation
|
---|
2990 | files to be loaded. For details of internationalization, see the
|
---|
2991 | \link i18n.html Qt internationalization documentation\endlink.
|
---|
2992 |
|
---|
2993 | \sa codecForTr(), setCodecForTr(), setCodecForCStrings()
|
---|
2994 | */
|
---|
2995 |
|
---|
2996 |
|
---|
2997 | /*!
|
---|
2998 | \fn QTextCodec* QTextCodec::codecForCStrings()
|
---|
2999 |
|
---|
3000 | Returns the codec used by QString to convert to and from const
|
---|
3001 | char* and QCStrings. If this function returns 0 (the default),
|
---|
3002 | QString assumes Latin-1.
|
---|
3003 |
|
---|
3004 | \sa setCodecForCStrings()
|
---|
3005 | */
|
---|
3006 |
|
---|
3007 | /*!
|
---|
3008 | \fn void QTextCodec::setCodecForCStrings(QTextCodec *c)
|
---|
3009 | \nonreentrant
|
---|
3010 |
|
---|
3011 | Sets the codec used by QString to convert to and from const char*
|
---|
3012 | and QCStrings. If \a c is 0 (the default), QString assumes Latin-1.
|
---|
3013 |
|
---|
3014 | \warning Some codecs do not preserve the characters in the ascii
|
---|
3015 | range (0x00 to 0x7f). For example, the Japanese Shift-JIS
|
---|
3016 | encoding maps the backslash character (0x5a) to the Yen character.
|
---|
3017 | This leads to unexpected results when using the backslash
|
---|
3018 | character to escape characters in strings used in e.g. regular
|
---|
3019 | expressions. Use QString::fromLatin1() to preserve characters in
|
---|
3020 | the ascii range when needed.
|
---|
3021 |
|
---|
3022 | \sa codecForCStrings(), setCodecForTr(), setCodecForCStrings()
|
---|
3023 | */
|
---|
3024 |
|
---|
3025 |
|
---|
3026 | QTextCodec *QTextCodec::cftr = 0;
|
---|
3027 | QTextCodec *QTextCodec::cfcs = 0;
|
---|
3028 |
|
---|
3029 |
|
---|
3030 | #endif // QT_NO_TEXTCODEC
|
---|