source: vendor/trolltech/current/src/codecs/qtextcodec.cpp

Last change on this file was 2, checked in by dmik, 20 years ago

Imported xplatform parts of the official release 3.3.1 from Trolltech

  • Property svn:keywords set to Id
File size: 101.2 KB
Line 
1/****************************************************************************
2** $Id: qtextcodec.cpp 2 2005-11-16 15:49:26Z dmik $
3**
4** Implementation of QTextCodec class
5**
6** Created : 981015
7**
8** Copyright (C) 1998-2002 Trolltech AS. All rights reserved.
9**
10** This file is part of the tools module of the Qt GUI Toolkit.
11**
12** This file may be distributed under the terms of the Q Public License
13** as defined by Trolltech AS of Norway and appearing in the file
14** LICENSE.QPL included in the packaging of this file.
15**
16** This file may be distributed and/or modified under the terms of the
17** GNU General Public License version 2 as published by the Free Software
18** Foundation and appearing in the file LICENSE.GPL included in the
19** packaging of this file.
20**
21** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition
22** licenses may use this file in accordance with the Qt Commercial License
23** Agreement provided with the Software.
24**
25** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
26** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
27**
28** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
29** information about Qt Commercial License Agreements.
30** See http://www.trolltech.com/qpl/ for QPL licensing information.
31** See http://www.trolltech.com/gpl/ for GPL licensing information.
32**
33** Contact info@trolltech.com if any conditions of this licensing are
34** not clear to you.
35**
36**********************************************************************/
37
38#include "qplatformdefs.h"
39
40// UNIX Large File Support redefines open -> open64
41#if defined(open)
42# undef open
43#endif
44
45#include "qtextcodec.h"
46#ifndef QT_NO_TEXTCODEC
47
48#include "qvaluelist.h"
49#include "qtextcodecfactory.h"
50#include "qutfcodec.h"
51#include "qnamespace.h"
52#ifndef QT_NO_CODECS
53#include "qrtlcodec.h"
54#include "qtsciicodec.h"
55#include "qisciicodec_p.h"
56#endif // QT_NO_CODECS
57#ifndef QT_NO_BIG_CODECS
58#include "qbig5codec.h"
59#include "qeucjpcodec.h"
60#include "qeuckrcodec.h"
61#include "qgb18030codec.h"
62#include "qjiscodec.h"
63#include "qjpunicode.h"
64#include "qsjiscodec.h"
65#endif // QT_NO_BIG_CODECS
66#include "qfile.h"
67#include "qstrlist.h"
68#include "qstring.h"
69#include "../tools/qlocale_p.h"
70
71#if !defined(QT_NO_CODECS) && !defined(QT_NO_BIG_CODECS) && defined(Q_WS_X11)
72# include "qfontcodecs_p.h"
73#endif
74
75#ifdef QT_THREAD_SUPPORT
76# include <private/qmutexpool_p.h>
77#endif // QT_THREAD_SUPPORT
78
79#include <stdlib.h>
80#include <ctype.h>
81#ifndef Q_OS_TEMP
82#include <locale.h>
83#endif
84#if defined(_XOPEN_UNIX) && !defined(Q_OS_QNX6)
85#include <langinfo.h>
86#endif
87
88static QValueList<QTextCodec*> *all = 0;
89static bool destroying_is_ok; // starts out as 0
90static QTextCodec * localeMapper = 0;
91
92class QTextCodecCleanup {
93public:
94 ~QTextCodecCleanup() {
95 QTextCodec::deleteAllCodecs();
96 }
97};
98static QTextCodecCleanup qtextcodec_cleanup;
99
100/*!
101 Deletes all the created codecs.
102
103 \warning Do not call this function.
104
105 QApplication calls this function just before exiting to delete
106 any QTextCodec objects that may be lying around. Since various
107 other classes hold pointers to QTextCodec objects, it is not safe
108 to call this function earlier.
109
110 If you are using the utility classes (like QString) but not using
111 QApplication, calling this function at the very end of your
112 application may be helpful for chasing down memory leaks by
113 eliminating any QTextCodec objects.
114*/
115
116void QTextCodec::deleteAllCodecs()
117{
118 if ( !all )
119 return;
120
121#ifdef QT_THREAD_SUPPORT
122 QMutexLocker locker( qt_global_mutexpool ?
123 qt_global_mutexpool->get( &all ) : 0 );
124 if ( !all )
125 return;
126#endif // QT_THREAD_SUPPORT
127
128 destroying_is_ok = TRUE;
129
130 QValueList<QTextCodec*> *ball = all;
131 all = 0;
132 QValueList<QTextCodec*>::Iterator it;
133 for ( it = ball->begin(); it != ball->end(); ++it ) {
134 delete *it;
135 *it = 0;
136 }
137 ball->clear();
138 delete ball;
139
140 destroying_is_ok = FALSE;
141}
142
143
144static void realSetup();
145
146
147static inline void setup()
148{
149 if ( all ) return;
150
151#ifdef QT_THREAD_SUPPORT
152 QMutexLocker locker( qt_global_mutexpool ?
153 qt_global_mutexpool->get( &all ) : 0 );
154 if ( all ) return;
155#endif // QT_THREAD_SUPPORT
156
157 realSetup();
158}
159
160
161class QTextStatelessEncoder: public QTextEncoder {
162 const QTextCodec* codec;
163public:
164 QTextStatelessEncoder(const QTextCodec*);
165 QCString fromUnicode(const QString& uc, int& lenInOut);
166};
167
168
169class QTextStatelessDecoder : public QTextDecoder {
170 const QTextCodec* codec;
171public:
172 QTextStatelessDecoder(const QTextCodec*);
173 QString toUnicode(const char* chars, int len);
174};
175
176QTextStatelessEncoder::QTextStatelessEncoder(const QTextCodec* c) :
177 codec(c)
178{
179}
180
181
182QCString QTextStatelessEncoder::fromUnicode(const QString& uc, int& lenInOut)
183{
184 return codec->fromUnicode(uc,lenInOut);
185}
186
187
188QTextStatelessDecoder::QTextStatelessDecoder(const QTextCodec* c) :
189 codec(c)
190{
191}
192
193
194QString QTextStatelessDecoder::toUnicode(const char* chars, int len)
195{
196 return codec->toUnicode(chars,len);
197}
198
199
200
201/*!
202 \class QTextCodec qtextcodec.h
203 \brief The QTextCodec class provides conversion between text encodings.
204 \reentrant
205 \ingroup i18n
206
207 Qt uses Unicode to store, draw and manipulate strings. In many
208 situations you may wish to deal with data that uses a different
209 encoding. For example, most Japanese documents are still stored in
210 Shift-JIS or ISO2022, while Russian users often have their
211 documents in KOI8-R or CP1251.
212
213 Qt provides a set of QTextCodec classes to help with converting
214 non-Unicode formats to and from Unicode. You can also create your
215 own codec classes (\link #subclassing see later\endlink).
216
217 The supported encodings are:
218 \list
219 \i Latin1
220 \i Big5 -- Chinese
221 \i Big5-HKSCS -- Chinese
222 \i eucJP -- Japanese
223 \i eucKR -- Korean
224 \i GB2312 -- Chinese
225 \i GBK -- Chinese
226 \i GB18030 -- Chinese
227 \i JIS7 -- Japanese
228 \i Shift-JIS -- Japanese
229 \i TSCII -- Tamil
230 \i utf8 -- Unicode, 8-bit
231 \i utf16 -- Unicode
232 \i KOI8-R -- Russian
233 \i KOI8-U -- Ukrainian
234 \i ISO8859-1 -- Western
235 \i ISO8859-2 -- Central European
236 \i ISO8859-3 -- Central European
237 \i ISO8859-4 -- Baltic
238 \i ISO8859-5 -- Cyrillic
239 \i ISO8859-6 -- Arabic
240 \i ISO8859-7 -- Greek
241 \i ISO8859-8 -- Hebrew, visually ordered
242 \i ISO8859-8-i -- Hebrew, logically ordered
243 \i ISO8859-9 -- Turkish
244 \i ISO8859-10
245 \i ISO8859-13
246 \i ISO8859-14
247 \i ISO8859-15 -- Western
248 \i IBM 850
249 \i IBM 866
250 \i CP874
251 \i CP1250 -- Central European
252 \i CP1251 -- Cyrillic
253 \i CP1252 -- Western
254 \i CP1253 -- Greek
255 \i CP1254 -- Turkish
256 \i CP1255 -- Hebrew
257 \i CP1256 -- Arabic
258 \i CP1257 -- Baltic
259 \i CP1258
260 \i Apple Roman
261 \i TIS-620 -- Thai
262 \endlist
263
264 QTextCodecs can be used as follows to convert some locally encoded
265 string to Unicode. Suppose you have some string encoded in Russian
266 KOI8-R encoding, and want to convert it to Unicode. The simple way
267 to do this is:
268
269 \code
270 QCString locallyEncoded = "..."; // text to convert
271 QTextCodec *codec = QTextCodec::codecForName("KOI8-R"); // get the codec for KOI8-R
272 QString unicodeString = codec->toUnicode( locallyEncoded );
273 \endcode
274
275 After this, \c{unicodeString} holds the text converted to Unicode.
276 Converting a string from Unicode to the local encoding is just as
277 easy:
278
279 \code
280 QString unicodeString = "..."; // any Unicode text
281 QTextCodec *codec = QTextCodec::codecForName("KOI8-R"); // get the codec for KOI8-R
282 QCString locallyEncoded = codec->fromUnicode( unicodeString );
283 \endcode
284
285 Some care must be taken when trying to convert the data in chunks,
286 for example, when receiving it over a network. In such cases it is
287 possible that a multi-byte character will be split over two
288 chunks. At best this might result in the loss of a character and
289 at worst cause the entire conversion to fail.
290
291 The approach to use in these situations is to create a QTextDecoder
292 object for the codec and use this QTextDecoder for the whole
293 decoding process, as shown below:
294
295 \code
296 QTextCodec *codec = QTextCodec::codecForName( "Shift-JIS" );
297 QTextDecoder *decoder = codec->makeDecoder();
298
299 QString unicodeString;
300 while( receiving_data ) {
301 QByteArray chunk = new_data;
302 unicodeString += decoder->toUnicode( chunk.data(), chunk.length() );
303 }
304 \endcode
305
306 The QTextDecoder object maintains state between chunks and therefore
307 works correctly even if a multi-byte character is split between
308 chunks.
309
310 \target subclassing
311 \section1 Creating your own Codec class
312
313 Support for new text encodings can be added to Qt by creating
314 QTextCodec subclasses.
315
316 Built-in codecs can be overridden by custom codecs since more
317 recently created QTextCodec objects take precedence over earlier
318 ones.
319
320 You may find it more convenient to make your codec class available
321 as a plugin; see the \link plugins-howto.html plugin
322 documentation\endlink for more details.
323
324 The abstract virtual functions describe the encoder to the
325 system and the coder is used as required in the different
326 text file formats supported by QTextStream, and under X11, for the
327 locale-specific character input and output.
328
329 To add support for another 8-bit encoding to Qt, make a subclass
330 of QTextCodec and implement at least the following methods:
331
332 \code
333 const char* name() const
334 \endcode
335 Return the official name for the encoding.
336
337 \code
338 int mibEnum() const
339 \endcode
340 Return the MIB enum for the encoding if it is listed in the
341 \link http://www.iana.org/assignments/character-sets
342 IANA character-sets encoding file\endlink.
343
344 If the encoding is multi-byte then it will have "state"; that is,
345 the interpretation of some bytes will be dependent on some preceding
346 bytes. For such encodings, you must implement:
347
348 \code
349 QTextDecoder* makeDecoder() const
350 \endcode
351 Return a QTextDecoder that remembers incomplete multi-byte sequence
352 prefixes or other required state.
353
354 If the encoding does \e not require state, you should implement:
355
356 \code
357 QString toUnicode(const char* chars, int len) const
358 \endcode
359 Converts \e len characters from \e chars to Unicode.
360
361 The base QTextCodec class has default implementations of the above
362 two functions, \e{but they are mutually recursive}, so you must
363 re-implement at least one of them, or both for improved efficiency.
364
365 For conversion from Unicode to 8-bit encodings, it is rarely necessary
366 to maintain state. However, two functions similar to the two above
367 are used for encoding:
368
369 \code
370 QTextEncoder* makeEncoder() const
371 \endcode
372 Return a QTextEncoder.
373
374 \code
375 QCString fromUnicode(const QString& uc, int& lenInOut ) const
376 \endcode
377 Converts \e lenInOut characters (of type QChar) from the start of
378 the string \e uc, returning a QCString result, and also returning
379 the \link QCString::length() length\endlink of the result in
380 \e lenInOut.
381
382 Again, these are mutually recursive so only one needs to be implemented,
383 or both if greater efficiency is possible.
384
385 Finally, you must implement:
386
387 \code
388 int heuristicContentMatch(const char* chars, int len) const
389 \endcode
390 Gives a value indicating how likely it is that \e len characters
391 from \e chars are in the encoding.
392
393 A good model for this function is the
394 QWindowsLocalCodec::heuristicContentMatch function found in the Qt
395 sources.
396
397 A QTextCodec subclass might have improved performance if you also
398 re-implement:
399
400 \code
401 bool canEncode( QChar ) const
402 \endcode
403 Test if a Unicode character can be encoded.
404
405 \code
406 bool canEncode( const QString& ) const
407 \endcode
408 Test if a string of Unicode characters can be encoded.
409
410 \code
411 int heuristicNameMatch(const char* hint) const
412 \endcode
413 Test if a possibly non-standard name is referring to the codec.
414
415 Codecs can also be created as \link plugins-howto.html plugins\endlink.
416*/
417
418
419/*!
420 \nonreentrant
421
422 Constructs a QTextCodec, and gives it the highest precedence. The
423 QTextCodec should always be constructed on the heap (i.e. with \c
424 new). Qt takes ownership and will delete it when the application
425 terminates.
426*/
427QTextCodec::QTextCodec()
428{
429 setup();
430 all->insert( all->begin(), this );
431}
432
433
434/*!
435 \nonreentrant
436
437 Destroys the QTextCodec. Note that you should not delete codecs
438 yourself: once created they become Qt's responsibility.
439*/
440QTextCodec::~QTextCodec()
441{
442 if ( !destroying_is_ok )
443 qWarning("QTextCodec::~QTextCodec() called by application");
444 if ( all )
445 all->remove( this );
446}
447
448
449/*!
450 Returns a value indicating how likely it is that this decoder is
451 appropriate for decoding some format that has the given name. The
452 name is compared with the \a hint.
453
454 A good match returns a positive number around the length of the
455 string. A bad match is negative.
456
457 The default implementation calls simpleHeuristicNameMatch() with
458 the name of the codec.
459*/
460int QTextCodec::heuristicNameMatch(const char* hint) const
461{
462 return simpleHeuristicNameMatch(name(),hint);
463}
464
465
466// returns a string containing the letters and numbers from input,
467// with a space separating run of a character class. e.g. "iso8859-1"
468// becomes "iso 8859 1"
469static QString lettersAndNumbers( const char * input )
470{
471 QString result;
472 QChar c;
473
474 while( input && *input ) {
475 c = *input;
476 if ( c.isLetter() || c.isNumber() )
477 result += c.lower();
478 if ( input[1] ) {
479 // add space at character class transition, except
480 // transition from upper-case to lower-case letter
481 QChar n( input[1] );
482 if ( c.isLetter() && n.isLetter() ) {
483 if ( c == c.lower() && n == n.upper() )
484 result += ' ';
485 } else if ( c.category() != n.category() ) {
486 result += ' ';
487 }
488 }
489 input++;
490 }
491 return result.simplifyWhiteSpace();
492}
493
494/*!
495 A simple utility function for heuristicNameMatch(): it does some
496 very minor character-skipping so that almost-exact matches score
497 high. \a name is the text we're matching and \a hint is used for
498 the comparison.
499*/
500int QTextCodec::simpleHeuristicNameMatch(const char* name, const char* hint)
501{
502 // if they're the same, return a perfect score.
503 if ( name && hint && *name && *hint && qstricmp( name, hint ) == 0 )
504 return qstrlen( hint );
505
506 // if the letters and numbers are the same, we have an "almost"
507 // perfect match.
508 QString h( lettersAndNumbers( hint ) );
509 QString n( lettersAndNumbers( name ) );
510 if ( h == n )
511 return qstrlen( hint )-1;
512
513 if ( h.stripWhiteSpace() == n.stripWhiteSpace() )
514 return qstrlen( hint )-2;
515
516 // could do some more here, but I don't think it's worth it
517
518 return 0;
519}
520
521
522/*!
523 Returns the QTextCodec \a i positions from the most recently
524 inserted codec, or 0 if there is no such QTextCodec. Thus,
525 codecForIndex(0) returns the most recently created QTextCodec.
526*/
527QTextCodec* QTextCodec::codecForIndex(int i)
528{
529 setup();
530 return (uint)i >= all->count() ? 0 : *all->at(i);
531}
532
533
534/*!
535 Returns the QTextCodec which matches the \link
536 QTextCodec::mibEnum() MIBenum\endlink \a mib.
537*/
538QTextCodec* QTextCodec::codecForMib(int mib)
539{
540 setup();
541 QValueList<QTextCodec*>::ConstIterator i;
542 QTextCodec* result=0;
543 for ( i = all->begin(); i != all->end(); ++i ) {
544 result = *i;
545 if ( result->mibEnum()==mib )
546 return result;
547 }
548
549#if !defined(QT_NO_COMPONENT) && !defined(QT_LITE_COMPONENT)
550 if ( !result || (result && result->mibEnum() != mib) ) {
551 QTextCodec *codec = QTextCodecFactory::createForMib(mib);
552 if (codec)
553 result = codec;
554 }
555#endif // !QT_NO_COMPONENT !QT_LITE_COMPONENT
556
557 return result;
558}
559
560
561
562
563
564#ifdef Q_OS_WIN32
565class QWindowsLocalCodec: public QTextCodec
566{
567public:
568 QWindowsLocalCodec();
569 ~QWindowsLocalCodec();
570
571 QString toUnicode(const char* chars, int len) const;
572 QCString fromUnicode(const QString& uc, int& lenInOut ) const;
573
574 const char* name() const;
575 int mibEnum() const;
576
577 int heuristicContentMatch(const char* chars, int len) const;
578};
579
580QWindowsLocalCodec::QWindowsLocalCodec()
581{
582}
583
584QWindowsLocalCodec::~QWindowsLocalCodec()
585{
586}
587
588
589QString QWindowsLocalCodec::toUnicode(const char* chars, int len) const
590{
591 if ( len == 1 && chars ) { // Optimization; avoids allocation
592 char c[2];
593 c[0] = *chars;
594 c[1] = 0;
595 return qt_winMB2QString( c, 2 );
596 }
597 if ( len < 0 )
598 return qt_winMB2QString( chars );
599 QCString s(chars,len+1);
600 return qt_winMB2QString(s);
601}
602
603QCString QWindowsLocalCodec::fromUnicode(const QString& uc, int& lenInOut ) const
604{
605 QCString r = qt_winQString2MB( uc, lenInOut );
606 lenInOut = r.length();
607 return r;
608}
609
610
611const char* QWindowsLocalCodec::name() const
612{
613 return "System";
614}
615
616int QWindowsLocalCodec::mibEnum() const
617{
618 return 0;
619}
620
621
622int QWindowsLocalCodec::heuristicContentMatch(const char* chars, int len) const
623{
624 // ### Not a bad default implementation?
625 QString t = toUnicode(chars,len);
626 int l = t.length();
627 QCString mb = fromUnicode(t,l);
628 int i=0;
629 while ( i < len ) {
630 if ( chars[i] == mb[i] )
631 i++;
632 else
633 break;
634 }
635 return i;
636}
637
638#else
639
640/* locale names mostly copied from XFree86 */
641static const char * const iso8859_2locales[] = {
642 "croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr",
643 "hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro",
644 "ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk",
645 "sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 };
646
647static const char * const iso8859_3locales[] = {
648 "eo", 0 };
649
650static const char * const iso8859_4locales[] = {
651 "ee", "ee_EE", 0 };
652
653static const char * const iso8859_5locales[] = {
654 "mk", "mk_MK", "sp", "sp_YU", 0 };
655
656static const char * const cp_1251locales[] = {
657 "be", "be_BY", "bg", "bg_BG", "bulgarian", 0 };
658
659static const char * const pt_154locales[] = {
660 "ba_RU", "ky", "ky_KG", "kk", "kk_KZ", 0 };
661
662static const char * const iso8859_6locales[] = {
663 "ar_AA", "ar_SA", "arabic", 0 };
664
665static const char * const iso8859_7locales[] = {
666 "el", "el_GR", "greek", 0 };
667
668static const char * const iso8859_8locales[] = {
669 "hebrew", "he", "he_IL", "iw", "iw_IL", 0 };
670
671static const char * const iso8859_9locales[] = {
672 "tr", "tr_TR", "turkish", 0 };
673
674static const char * const iso8859_13locales[] = {
675 "lt", "lt_LT", "lv", "lv_LV", 0 };
676
677static const char * const iso8859_15locales[] = {
678 "et", "et_EE",
679 // Euro countries
680 "br_FR", "ca_ES", "de", "de_AT", "de_BE", "de_DE", "de_LU", "en_IE",
681 "es", "es_ES", "eu_ES", "fi", "fi_FI", "finnish", "fr", "fr_FR",
682 "fr_BE", "fr_LU", "french", "ga_IE", "gl_ES", "it", "it_IT", "oc_FR",
683 "nl", "nl_BE", "nl_NL", "pt", "pt_PT", "sv_FI", "wa_BE",
684 0 };
685
686static const char * const koi8_ulocales[] = {
687 "uk", "uk_UA", "ru_UA", "ukrainian", 0 };
688
689static const char * const tis_620locales[] = {
690 "th", "th_TH", "thai", 0 };
691
692static const char * const tcvnlocales[] = {
693 "vi", "vi_VN", 0 };
694
695static bool try_locale_list( const char * const locale[], const char * lang )
696{
697 int i;
698 for( i=0; locale[i] && *locale[i] && strcmp(locale[i], lang); i++ )
699 ;
700 return locale[i] != 0;
701}
702
703// For the probably_koi8_locales we have to look. the standard says
704// these are 8859-5, but almost all Russian users use KOI8-R and
705// incorrectly set $LANG to ru_RU. We'll check tolower() to see what
706// tolower() thinks ru_RU means.
707
708// If you read the history, it seems that many Russians blame ISO and
709// Perestroika for the confusion.
710//
711// The real bug is that some programs break if the user specifies
712// ru_RU.KOI8-R.
713
714static const char * const probably_koi8_rlocales[] = {
715 "ru", "ru_SU", "ru_RU", "russian", 0 };
716
717static QTextCodec * ru_RU_hack( const char * i ) {
718 QTextCodec * ru_RU_codec = 0;
719
720 QCString origlocale = setlocale( LC_CTYPE, i );
721 // unicode koi8r latin5 name
722 // 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU
723 // 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU
724 int latin5 = tolower( 0xCE );
725 int koi8r = tolower( 0xE0 );
726 if ( koi8r == 0xC0 && latin5 != 0xEE ) {
727 ru_RU_codec = QTextCodec::codecForName( "KOI8-R" );
728 } else if ( koi8r != 0xC0 && latin5 == 0xEE ) {
729 ru_RU_codec = QTextCodec::codecForName( "ISO 8859-5" );
730 } else {
731 // something else again... let's assume... *throws dice*
732 ru_RU_codec = QTextCodec::codecForName( "KOI8-R" );
733 qWarning( "QTextCodec: using KOI8-R, probe failed (%02x %02x %s)",
734 koi8r, latin5, i );
735 }
736 setlocale( LC_CTYPE, origlocale.data() );
737
738 return ru_RU_codec;
739}
740
741#endif
742
743/*!
744 Set the codec to \a c; this will be returned by codecForLocale().
745 This might be needed for some applications that want to use their
746 own mechanism for setting the locale.
747
748 \sa codecForLocale()
749*/
750void QTextCodec::setCodecForLocale(QTextCodec *c) {
751 localeMapper = c;
752}
753
754/*! Returns a pointer to the codec most suitable for this locale. */
755
756QTextCodec* QTextCodec::codecForLocale()
757{
758 if ( localeMapper )
759 return localeMapper;
760
761 setup();
762
763 return localeMapper;
764}
765
766
767/*!
768 Searches all installed QTextCodec objects and returns the one
769 which best matches \a name; the match is case-insensitive. Returns
770 0 if no codec's heuristicNameMatch() reports a match better than
771 \a accuracy, or if \a name is a null string.
772
773 \sa heuristicNameMatch()
774*/
775
776QTextCodec* QTextCodec::codecForName( const char* name, int accuracy )
777{
778 if ( !name || !*name )
779 return 0;
780
781 setup();
782 QValueList<QTextCodec*>::ConstIterator i;
783 QTextCodec* result = 0;
784 int best = accuracy;
785 QTextCodec* cursor;
786 for ( i = all->begin(); i != all->end(); ++i ) {
787 cursor = *i;
788 int s = cursor->heuristicNameMatch( name );
789 if ( s > best ) {
790 best = s;
791 result = cursor;
792 }
793 }
794
795#if !defined(QT_NO_COMPONENT) && !defined(QT_LITE_COMPONENT)
796 if ( !result )
797 result = QTextCodecFactory::createForName(name);
798#endif // !QT_NO_COMPONENT !QT_LITE_COMPONENT
799
800 return result;
801}
802
803
804/*!
805 Searches all installed QTextCodec objects, returning the one which
806 most recognizes the given content. May return 0.
807
808 Note that this is often a poor choice, since character encodings
809 often use most of the available character sequences, and so only
810 by linguistic analysis could a true match be made.
811
812 \a chars contains the string to check, and \a len contains the
813 number of characters in the string to use.
814
815 \sa heuristicContentMatch()
816*/
817QTextCodec* QTextCodec::codecForContent(const char* chars, int len)
818{
819 setup();
820 QValueList<QTextCodec*>::ConstIterator i;
821 QTextCodec* result = 0;
822 int best=0;
823 QTextCodec* cursor;
824 for ( i = all->begin(); i != all->end(); ++i ) {
825 cursor = *i;
826 int s = cursor->heuristicContentMatch(chars,len);
827 if ( s > best ) {
828 best = s;
829 result = cursor;
830 }
831 }
832 return result;
833}
834
835
836/*!
837 \fn const char* QTextCodec::name() const
838
839 QTextCodec subclasses must reimplement this function. It returns
840 the name of the encoding supported by the subclass. When choosing
841 a name for an encoding, consider these points:
842 \list
843 \i On X11, heuristicNameMatch( const char * hint )
844 is used to test if a the QTextCodec
845 can convert between Unicode and the encoding of a font
846 with encoding \e hint, such as "iso8859-1" for Latin-1 fonts,
847 "koi8-r" for Russian KOI8 fonts.
848 The default algorithm of heuristicNameMatch() uses name().
849 \i Some applications may use this function to present
850 encodings to the end user.
851 \endlist
852 */
853
854/*!
855 \fn int QTextCodec::mibEnum() const
856
857 Subclasses of QTextCodec must reimplement this function. It
858 returns the MIBenum (see \link
859 http://www.iana.org/assignments/character-sets the
860 IANA character-sets encoding file\endlink for more information).
861 It is important that each QTextCodec subclass returns the correct
862 unique value for this function.
863*/
864
865
866/*!
867 Returns the preferred mime name of the encoding as defined in the
868 \link http://www.iana.org/assignments/character-sets
869 IANA character-sets encoding file\endlink.
870*/
871const char* QTextCodec::mimeName() const
872{
873 return name();
874}
875
876
877/*!
878 \fn int QTextCodec::heuristicContentMatch(const char* chars, int len) const
879
880 QTextCodec subclasses must reimplement this function. It examines
881 the first \a len bytes of \a chars and returns a value indicating
882 how likely it is that the string is a prefix of text encoded in
883 the encoding of the subclass. A negative return value indicates
884 that the text is detectably not in the encoding (e.g. it contains
885 characters undefined in the encoding). A return value of 0
886 indicates that the text should be decoded with this codec rather
887 than as ASCII, but there is no particular evidence. The value
888 should range up to \a len. Thus, most decoders will return -1, 0,
889 or -\a len.
890
891 The characters are not null terminated.
892
893 \sa codecForContent().
894*/
895
896
897/*!
898 Creates a QTextDecoder which stores enough state to decode chunks
899 of char* data to create chunks of Unicode data. The default
900 implementation creates a stateless decoder, which is only
901 sufficient for the simplest encodings where each byte corresponds
902 to exactly one Unicode character.
903
904 The caller is responsible for deleting the returned object.
905*/
906QTextDecoder* QTextCodec::makeDecoder() const
907{
908 return new QTextStatelessDecoder(this);
909}
910
911
912/*!
913 Creates a QTextEncoder which stores enough state to encode chunks
914 of Unicode data as char* data. The default implementation creates
915 a stateless encoder, which is only sufficient for the simplest
916 encodings where each Unicode character corresponds to exactly one
917 character.
918
919 The caller is responsible for deleting the returned object.
920*/
921QTextEncoder* QTextCodec::makeEncoder() const
922{
923 return new QTextStatelessEncoder(this);
924}
925
926
927/*!
928 QTextCodec subclasses must reimplement this function or
929 makeDecoder(). It converts the first \a len characters of \a chars
930 to Unicode.
931
932 The default implementation makes a decoder with makeDecoder() and
933 converts the input with that. Note that the default makeDecoder()
934 implementation makes a decoder that simply calls
935 this function, hence subclasses \e must reimplement one function or
936 the other to avoid infinite recursion.
937*/
938QString QTextCodec::toUnicode(const char* chars, int len) const
939{
940 if ( chars == 0 )
941 return QString::null;
942 QTextDecoder* i = makeDecoder();
943 QString result = i->toUnicode(chars,len);
944 delete i;
945 return result;
946}
947
948
949/*!
950 QTextCodec subclasses must reimplement either this function or
951 makeEncoder(). It converts the first \a lenInOut characters of \a
952 uc from Unicode to the encoding of the subclass. If \a lenInOut is
953 negative or too large, the length of \a uc is used instead.
954
955 Converts \a lenInOut characters (not bytes) from \a uc, producing
956 a QCString. \a lenInOut will be set to the \link
957 QCString::length() length\endlink of the result (in bytes).
958
959 The default implementation makes an encoder with makeEncoder() and
960 converts the input with that. Note that the default makeEncoder()
961 implementation makes an encoder that simply calls this function,
962 hence subclasses \e must reimplement one function or the other to
963 avoid infinite recursion.
964*/
965
966QCString QTextCodec::fromUnicode(const QString& uc, int& lenInOut) const
967{
968 QTextEncoder* i = makeEncoder();
969 QCString result = i->fromUnicode(uc, lenInOut);
970 delete i;
971 return result;
972}
973
974/*!
975 \overload
976 \internal
977*/
978QByteArray QTextCodec::fromUnicode( const QString &str, int pos, int len ) const
979{
980 QByteArray a;
981 if( len < 0 )
982 len = str.length() - pos;
983 a = fromUnicode( str.mid(pos, len) );
984 if( a.size() > 0 && a[(int)a.size() - 1] == '\0' )
985 a.resize( a.size() - 1 );
986 return a;
987}
988
989/*!
990 \overload
991
992 \a uc is the unicode source string.
993*/
994QCString QTextCodec::fromUnicode(const QString& uc) const
995{
996 int l = uc.length();
997 return fromUnicode(uc,l);
998}
999
1000/*!
1001 \overload
1002
1003 \a a contains the source characters; \a len contains the number of
1004 characters in \a a to use.
1005*/
1006QString QTextCodec::toUnicode(const QByteArray& a, int len) const
1007{
1008 int l = a.size();
1009 l = QMIN( l, len );
1010 return toUnicode( a.data(), l );
1011}
1012
1013/*!
1014 \overload
1015
1016 \a a contains the source characters.
1017*/
1018QString QTextCodec::toUnicode(const QByteArray& a) const
1019{
1020 int l = a.size();
1021 return toUnicode( a.data(), l );
1022}
1023
1024/*!
1025 \overload
1026
1027 \a a contains the source characters; \a len contains the number of
1028 characters in \a a to use.
1029*/
1030QString QTextCodec::toUnicode(const QCString& a, int len) const
1031{
1032 int l = a.length();
1033 l = QMIN( l, len );
1034 return toUnicode( a.data(), l );
1035}
1036
1037/*!
1038 \overload
1039
1040 \a a contains the source characters.
1041*/
1042QString QTextCodec::toUnicode(const QCString& a) const
1043{
1044 int l = a.length();
1045 return toUnicode( a.data(), l );
1046}
1047
1048/*!
1049 \overload
1050
1051 \a chars contains the source characters.
1052*/
1053QString QTextCodec::toUnicode(const char* chars) const
1054{
1055 return toUnicode(chars,qstrlen(chars));
1056}
1057
1058/*!
1059 \internal
1060*/
1061unsigned short QTextCodec::characterFromUnicode(const QString &str, int pos) const
1062{
1063 QCString result = QTextCodec::fromUnicode(QString(str[pos]));
1064 uchar *ch = (uchar *) result.data();
1065 ushort retval = 0;
1066 if (result.size() > 2) {
1067 retval = (ushort) *ch << 8;
1068 ch++;
1069 }
1070 return retval + *ch;
1071}
1072
1073/*!
1074 Returns TRUE if the Unicode character \a ch can be fully encoded
1075 with this codec; otherwise returns FALSE. The default
1076 implementation tests if the result of toUnicode(fromUnicode(ch))
1077 is the original \a ch. Subclasses may be able to improve the
1078 efficiency.
1079*/
1080bool QTextCodec::canEncode( QChar ch ) const
1081{
1082 return toUnicode(fromUnicode(ch)) == ch;
1083}
1084
1085/*!
1086 \overload
1087
1088 \a s contains the string being tested for encode-ability.
1089*/
1090bool QTextCodec::canEncode( const QString& s ) const
1091{
1092 if ( s.isEmpty() )
1093 return TRUE;
1094 return toUnicode(fromUnicode(s)) == s;
1095}
1096
1097
1098
1099/*!
1100 \class QTextEncoder qtextcodec.h
1101 \brief The QTextEncoder class provides a state-based encoder.
1102 \reentrant
1103 \ingroup i18n
1104
1105 The encoder converts Unicode into another format, remembering any
1106 state that is required between calls.
1107
1108 \sa QTextCodec::makeEncoder()
1109*/
1110
1111/*!
1112 Destroys the encoder.
1113*/
1114QTextEncoder::~QTextEncoder()
1115{
1116}
1117
1118/*!
1119 \fn QCString QTextEncoder::fromUnicode(const QString& uc, int& lenInOut)
1120
1121 Converts \a lenInOut characters (not bytes) from \a uc, producing
1122 a QCString. \a lenInOut will be set to the \link
1123 QCString::length() length\endlink of the result (in bytes).
1124
1125 The encoder is free to record state to use when subsequent calls
1126 are made to this function (for example, it might change modes with
1127 escape sequences if needed during the encoding of one string, then
1128 assume that mode applies when a subsequent call begins).
1129*/
1130
1131/*!
1132 \class QTextDecoder qtextcodec.h
1133 \brief The QTextDecoder class provides a state-based decoder.
1134 \reentrant
1135 \ingroup i18n
1136
1137 The decoder converts a text format into Unicode, remembering any
1138 state that is required between calls.
1139
1140 \sa QTextCodec::makeEncoder()
1141*/
1142
1143
1144/*!
1145 Destroys the decoder.
1146*/
1147QTextDecoder::~QTextDecoder()
1148{
1149}
1150
1151/*!
1152 \fn QString QTextDecoder::toUnicode(const char* chars, int len)
1153
1154 Converts the first \a len bytes in \a chars to Unicode, returning
1155 the result.
1156
1157 If not all characters are used (e.g. if only part of a multi-byte
1158 encoding is at the end of the characters), the decoder remembers
1159 enough state to continue with the next call to this function.
1160*/
1161
1162#define CHAINED 0xffff
1163
1164struct QMultiByteUnicodeTable {
1165 // If multiByte, ignore unicode and index into multiByte
1166 // with the next character.
1167 QMultiByteUnicodeTable() : unicode(0xfffd), multiByte(0) { }
1168
1169 ~QMultiByteUnicodeTable()
1170 {
1171 if ( multiByte )
1172 delete [] multiByte;
1173 }
1174
1175 ushort unicode;
1176 QMultiByteUnicodeTable* multiByte;
1177};
1178
1179static int getByte(char* &cursor)
1180{
1181 int byte = 0;
1182 if ( *cursor ) {
1183 if ( cursor[1] == 'x' )
1184 byte = strtol(cursor+2,&cursor,16);
1185 else if ( cursor[1] == 'd' )
1186 byte = strtol(cursor+2,&cursor,10);
1187 else
1188 byte = strtol(cursor+2,&cursor,8);
1189 }
1190 return byte&0xff;
1191}
1192
1193class QTextCodecFromIOD;
1194
1195class QTextCodecFromIODDecoder : public QTextDecoder {
1196 const QTextCodecFromIOD* codec;
1197 QMultiByteUnicodeTable* mb;
1198public:
1199 QTextCodecFromIODDecoder(const QTextCodecFromIOD* c);
1200 QString toUnicode(const char* chars, int len);
1201};
1202
1203class QTextCodecFromIOD : public QTextCodec {
1204 friend class QTextCodecFromIODDecoder;
1205
1206 QCString n;
1207
1208 // If from_unicode_page[row()][cell()] is 0 and from_unicode_page_multiByte,
1209 // use from_unicode_page_multiByte[row()][cell()] as string.
1210 char** from_unicode_page;
1211 char*** from_unicode_page_multiByte;
1212 char unkn;
1213
1214 // Only one of these is used
1215 ushort* to_unicode;
1216 QMultiByteUnicodeTable* to_unicode_multiByte;
1217 int max_bytes_per_char;
1218 QStrList aliases;
1219
1220 bool stateless() const { return !to_unicode_multiByte; }
1221
1222public:
1223 QTextCodecFromIOD(QIODevice* iod)
1224 {
1225 from_unicode_page = 0;
1226 to_unicode_multiByte = 0;
1227 to_unicode = 0;
1228 from_unicode_page_multiByte = 0;
1229 max_bytes_per_char = 1;
1230
1231 const int maxlen=100;
1232 char line[maxlen];
1233 char esc='\\';
1234 char comm='%';
1235 bool incmap = FALSE;
1236 while (iod->readLine(line,maxlen) > 0) {
1237 if (0==qstrnicmp(line,"<code_set_name>",15))
1238 n = line+15;
1239 else if (0==qstrnicmp(line,"<escape_char> ",14))
1240 esc = line[14];
1241 else if (0==qstrnicmp(line,"<comment_char> ",15))
1242 comm = line[15];
1243 else if (line[0]==comm && 0==qstrnicmp(line+1," alias ",7)) {
1244 aliases.append(line+8);
1245 } else if (0==qstrnicmp(line,"CHARMAP",7)) {
1246 if (!from_unicode_page) {
1247 from_unicode_page = new char*[256];
1248 for (int i=0; i<256; i++)
1249 from_unicode_page[i]=0;
1250 }
1251 if (!to_unicode) {
1252 to_unicode = new ushort[256];
1253 }
1254 incmap = TRUE;
1255 } else if (0==qstrnicmp(line,"END CHARMAP",11))
1256 break;
1257 else if (incmap) {
1258 char* cursor = line;
1259 int byte=-1,unicode=-1;
1260 ushort* mb_unicode=0;
1261 const int maxmb=8; // more -> we'll need to improve datastructures
1262 char mb[maxmb+1];
1263 int nmb=0;
1264
1265 while (*cursor) {
1266 if (cursor[0]=='<' && cursor[1]=='U' &&
1267 cursor[2]>='0' && cursor[2]<='9' &&
1268 cursor[3]>='0' && cursor[3]<='9') {
1269
1270 unicode = strtol(cursor+2,&cursor,16);
1271
1272 } else if (*cursor==esc) {
1273
1274 byte = getByte(cursor);
1275
1276 if ( *cursor == esc ) {
1277 if ( !to_unicode_multiByte ) {
1278 to_unicode_multiByte =
1279 new QMultiByteUnicodeTable[256];
1280 for (int i=0; i<256; i++) {
1281 to_unicode_multiByte[i].unicode =
1282 to_unicode[i];
1283 to_unicode_multiByte[i].multiByte = 0;
1284 }
1285 delete [] to_unicode;
1286 to_unicode = 0;
1287 }
1288 QMultiByteUnicodeTable* mbut =
1289 to_unicode_multiByte+byte;
1290 mb[nmb++] = byte;
1291 while ( nmb < maxmb && *cursor == esc ) {
1292 // Always at least once
1293
1294 mbut->unicode = CHAINED;
1295 byte = getByte(cursor);
1296 mb[nmb++] = byte;
1297 if (!mbut->multiByte) {
1298 mbut->multiByte =
1299 new QMultiByteUnicodeTable[256];
1300 }
1301 mbut = mbut->multiByte+byte;
1302 mb_unicode = & mbut->unicode;
1303 }
1304
1305 if ( nmb > max_bytes_per_char )
1306 max_bytes_per_char = nmb;
1307 }
1308 } else {
1309 cursor++;
1310 }
1311 }
1312
1313 if (unicode >= 0 && unicode <= 0xffff)
1314 {
1315 QChar ch((ushort)unicode);
1316 if (!from_unicode_page[ch.row()]) {
1317 from_unicode_page[ch.row()] = new char[256];
1318 for (int i=0; i<256; i++)
1319 from_unicode_page[ch.row()][i]=0;
1320 }
1321 if ( mb_unicode ) {
1322 from_unicode_page[ch.row()][ch.cell()] = 0;
1323 if (!from_unicode_page_multiByte) {
1324 from_unicode_page_multiByte = new char**[256];
1325 for (int i=0; i<256; i++)
1326 from_unicode_page_multiByte[i]=0;
1327 }
1328 if (!from_unicode_page_multiByte[ch.row()]) {
1329 from_unicode_page_multiByte[ch.row()] = new char*[256];
1330 for (int i=0; i<256; i++)
1331 from_unicode_page_multiByte[ch.row()][i] = 0;
1332 }
1333 mb[nmb++] = 0;
1334 from_unicode_page_multiByte[ch.row()][ch.cell()]
1335 = qstrdup(mb);
1336 *mb_unicode = unicode;
1337 } else {
1338 from_unicode_page[ch.row()][ch.cell()] = (char)byte;
1339 if ( to_unicode )
1340 to_unicode[byte] = unicode;
1341 else
1342 to_unicode_multiByte[byte].unicode = unicode;
1343 }
1344 } else {
1345 }
1346 }
1347 }
1348 n = n.stripWhiteSpace();
1349
1350 unkn = '?'; // ##### Might be a bad choice.
1351 }
1352
1353 ~QTextCodecFromIOD()
1354 {
1355 if ( from_unicode_page ) {
1356 for (int i=0; i<256; i++)
1357 if (from_unicode_page[i])
1358 delete [] from_unicode_page[i];
1359 }
1360 if ( from_unicode_page_multiByte ) {
1361 for (int i=0; i<256; i++)
1362 if (from_unicode_page_multiByte[i])
1363 for (int j=0; j<256; j++)
1364 if (from_unicode_page_multiByte[i][j])
1365 delete [] from_unicode_page_multiByte[i][j];
1366 }
1367 if ( to_unicode )
1368 delete [] to_unicode;
1369 if ( to_unicode_multiByte )
1370 delete [] to_unicode_multiByte;
1371 }
1372
1373 bool ok() const
1374 {
1375 return !!from_unicode_page;
1376 }
1377
1378 QTextDecoder* makeDecoder() const
1379 {
1380 if ( stateless() )
1381 return QTextCodec::makeDecoder();
1382 else
1383 return new QTextCodecFromIODDecoder(this);
1384 }
1385
1386 const char* name() const
1387 {
1388 return n;
1389 }
1390
1391 int mibEnum() const
1392 {
1393 return 0; // #### Unknown.
1394 }
1395
1396 int heuristicContentMatch(const char*, int) const
1397 {
1398 return 0;
1399 }
1400
1401 int heuristicNameMatch(const char* hint) const
1402 {
1403 int bestr = QTextCodec::heuristicNameMatch(hint);
1404 QStrListIterator it(aliases);
1405 char* a;
1406 while ((a=it.current())) {
1407 ++it;
1408 int r = simpleHeuristicNameMatch(a,hint);
1409 if (r > bestr)
1410 bestr = r;
1411 }
1412 return bestr;
1413 }
1414
1415 QString toUnicode(const char* chars, int len) const
1416 {
1417 const uchar* uchars = (const uchar*)chars;
1418 QString result;
1419 QMultiByteUnicodeTable* multiByte=to_unicode_multiByte;
1420 if ( multiByte ) {
1421 while (len--) {
1422 QMultiByteUnicodeTable& mb = multiByte[*uchars];
1423 if ( mb.multiByte ) {
1424 // Chained multi-byte
1425 multiByte = mb.multiByte;
1426 } else {
1427 result += QChar(mb.unicode);
1428 multiByte=to_unicode_multiByte;
1429 }
1430 uchars++;
1431 }
1432 } else {
1433 while (len--)
1434 result += QChar(to_unicode[*uchars++]);
1435 }
1436 return result;
1437 }
1438
1439#if !defined(Q_NO_USING_KEYWORD)
1440 using QTextCodec::fromUnicode;
1441#endif
1442 QCString fromUnicode(const QString& uc, int& lenInOut) const
1443 {
1444 if (lenInOut > (int)uc.length())
1445 lenInOut = uc.length();
1446 int rlen = lenInOut*max_bytes_per_char;
1447 QCString rstr(rlen);
1448 char* cursor = rstr.data();
1449 char* s=0;
1450 int l = lenInOut;
1451 int lout = 0;
1452 for (int i=0; i<l; i++) {
1453 QChar ch = uc[i];
1454 if ( ch == QChar::null ) {
1455 // special
1456 *cursor++ = 0;
1457 } else if ( from_unicode_page[ch.row()] &&
1458 from_unicode_page[ch.row()][ch.cell()] )
1459 {
1460 *cursor++ = from_unicode_page[ch.row()][ch.cell()];
1461 lout++;
1462 } else if ( from_unicode_page_multiByte &&
1463 from_unicode_page_multiByte[ch.row()] &&
1464 (s=from_unicode_page_multiByte[ch.row()][ch.cell()]) )
1465 {
1466 while (*s) {
1467 *cursor++ = *s++;
1468 lout++;
1469 }
1470 } else {
1471 *cursor++ = unkn;
1472 lout++;
1473 }
1474 }
1475 *cursor = 0;
1476 lenInOut = lout;
1477 return rstr;
1478 }
1479};
1480
1481QTextCodecFromIODDecoder::QTextCodecFromIODDecoder(const QTextCodecFromIOD* c) :
1482 codec(c)
1483{
1484 mb = codec->to_unicode_multiByte;
1485}
1486
1487QString QTextCodecFromIODDecoder::toUnicode(const char* chars, int len)
1488{
1489 const uchar* uchars = (const uchar*)chars;
1490 QString result;
1491 while (len--) {
1492 QMultiByteUnicodeTable& t = mb[*uchars];
1493 if ( t.multiByte ) {
1494 // Chained multi-byte
1495 mb = t.multiByte;
1496 } else {
1497 if ( t.unicode )
1498 result += QChar(t.unicode);
1499 mb=codec->to_unicode_multiByte;
1500 }
1501 uchars++;
1502 }
1503 return result;
1504}
1505
1506#ifndef QT_NO_CODECS
1507// Cannot use <pre> or \code
1508/*!
1509 Reads a POSIX2 charmap definition from \a iod.
1510 The parser recognizes the following lines:
1511
1512<font name="sans">
1513&nbsp;&nbsp;&lt;code_set_name&gt; <i>name</i></br>
1514&nbsp;&nbsp;&lt;escape_char&gt; <i>character</i></br>
1515&nbsp;&nbsp;% alias <i>alias</i></br>
1516&nbsp;&nbsp;CHARMAP</br>
1517&nbsp;&nbsp;&lt;<i>token</i>&gt; /x<i>hexbyte</i> &lt;U<i>unicode</i>&gt; ...</br>
1518&nbsp;&nbsp;&lt;<i>token</i>&gt; /d<i>decbyte</i> &lt;U<i>unicode</i>&gt; ...</br>
1519&nbsp;&nbsp;&lt;<i>token</i>&gt; /<i>octbyte</i> &lt;U<i>unicode</i>&gt; ...</br>
1520&nbsp;&nbsp;&lt;<i>token</i>&gt; /<i>any</i>/<i>any</i>... &lt;U<i>unicode</i>&gt; ...</br>
1521&nbsp;&nbsp;END CHARMAP</br>
1522</font>
1523
1524 The resulting QTextCodec is returned (and also added to the global
1525 list of codecs). The name() of the result is taken from the
1526 code_set_name.
1527
1528 Note that a codec constructed in this way uses much more memory
1529 and is slower than a hand-written QTextCodec subclass, since
1530 tables in code are kept in memory shared by all Qt applications.
1531
1532 \sa loadCharmapFile()
1533*/
1534QTextCodec* QTextCodec::loadCharmap(QIODevice* iod)
1535{
1536 QTextCodecFromIOD* r = new QTextCodecFromIOD(iod);
1537 if ( !r->ok() ) {
1538 delete r;
1539 r = 0;
1540 }
1541 return r;
1542}
1543
1544/*!
1545 A convenience function for loadCharmap() that loads the charmap
1546 definition from the file \a filename.
1547*/
1548QTextCodec* QTextCodec::loadCharmapFile(QString filename)
1549{
1550 QFile f(filename);
1551 if (f.open(IO_ReadOnly)) {
1552 QTextCodecFromIOD* r = new QTextCodecFromIOD(&f);
1553 if ( !r->ok() )
1554 delete r;
1555 else
1556 return r;
1557 }
1558 return 0;
1559}
1560
1561#endif //QT_NO_CODECS
1562
1563/*!
1564 Returns a string representing the current language and
1565 sublanguage, e.g. "pt" for Portuguese, or "pt_br" for Portuguese/Brazil.
1566*/
1567
1568const char* QTextCodec::locale()
1569{
1570 return QLocalePrivate::systemLocaleName();
1571}
1572
1573#ifndef QT_NO_CODECS
1574
1575class QSimpleTextCodec: public QTextCodec
1576{
1577public:
1578 QSimpleTextCodec( int );
1579 ~QSimpleTextCodec();
1580
1581 QString toUnicode(const char* chars, int len) const;
1582#if !defined(Q_NO_USING_KEYWORD)
1583 using QTextCodec::fromUnicode;
1584#endif
1585 QCString fromUnicode(const QString& uc, int& lenInOut ) const;
1586 unsigned short characterFromUnicode(const QString &str, int pos) const;
1587
1588 const char* name() const;
1589 const char* mimeName() const;
1590 int mibEnum() const;
1591
1592 int heuristicContentMatch(const char* chars, int len) const;
1593
1594 int heuristicNameMatch(const char* hint) const;
1595#if !defined(Q_NO_USING_KEYWORD)
1596 using QTextCodec::canEncode;
1597#endif
1598 bool canEncode( QChar ch ) const;
1599
1600 void fromUnicode( const QChar *in, unsigned short *out, int length ) const;
1601
1602private:
1603 void buildReverseMap();
1604
1605 int forwardIndex;
1606#ifndef Q_WS_QWS
1607 QMemArray<unsigned char> *reverseMap;
1608#endif
1609};
1610
1611#ifdef Q_WS_QWS
1612static const QSimpleTextCodec * reverseOwner = 0;
1613static QMemArray<unsigned char> * reverseMap = 0;
1614#endif
1615
1616#define LAST_MIB 2004
1617
1618static const struct {
1619 const char *mime;
1620 const char * cs;
1621 int mib;
1622 Q_UINT16 values[128];
1623} unicodevalues[] = {
1624 // from RFC 1489, ftp://ftp.isi.edu/in-notes/rfc1489.txt
1625 { "KOI8-R", "KOI8-R", 2084,
1626 { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
1627 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
1628 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219/**/, 0x221A, 0x2248,
1629 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
1630 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,
1631 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E,
1632 0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,
1633 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9,
1634 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
1635 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
1636 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
1637 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
1638 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
1639 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
1640 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
1641 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },
1642 // /**/ - The BULLET OPERATOR is confused. Some people think
1643 // it should be 0x2022 (BULLET).
1644
1645 // from RFC 2319, ftp://ftp.isi.edu/in-notes/rfc2319.txt
1646 { "KOI8-U", "KOI8-U", 2088,
1647 { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
1648 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
1649 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248,
1650 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
1651 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457,
1652 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E,
1653 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407,
1654 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9,
1655 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
1656 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
1657 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
1658 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
1659 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
1660 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
1661 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
1662 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },
1663
1664 // next bits generated from tables on the Unicode 2.0 CD. we can
1665 // use these tables since this is part of the transition to using
1666 // unicode everywhere in qt.
1667
1668 // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo 0x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; for a in 8859-* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ; cat /tmp/digits ) | sort | uniq -w4 | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done
1669
1670 // then I inserted the files manually.
1671 { "ISO-8859-2", "ISO 8859-2", 5,
1672 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1673 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1674 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1675 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1676 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,
1677 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
1678 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,
1679 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
1680 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
1681 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
1682 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
1683 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
1684 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
1685 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
1686 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
1687 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} },
1688 { "ISO-8859-3", "ISO 8859-3", 6,
1689 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1690 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1691 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1692 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1693 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7,
1694 0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0xFFFD, 0x017B,
1695 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7,
1696 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0xFFFD, 0x017C,
1697 0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7,
1698 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1699 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7,
1700 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF,
1701 0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7,
1702 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1703 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7,
1704 0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9} },
1705 { "ISO-8859-4", "ISO 8859-4", 7,
1706 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1707 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1708 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1709 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1710 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7,
1711 0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF,
1712 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7,
1713 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B,
1714 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
1715 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A,
1716 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
1717 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF,
1718 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
1719 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B,
1720 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
1721 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9} },
1722 { "ISO-8859-5", "ISO 8859-5", 8,
1723 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1724 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1725 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1726 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1727 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
1728 0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F,
1729 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
1730 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
1731 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
1732 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
1733 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
1734 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
1735 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
1736 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
1737 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
1738 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F} },
1739 { "ISO-8859-6", "ISO 8859-6", 82,
1740 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1741 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1742 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1743 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1744 0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD,
1745 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD,
1746 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1747 0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F,
1748 0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
1749 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
1750 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
1751 0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1752 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
1753 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F,
1754 0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1755 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
1756 { "ISO-8859-7", "ISO 8859-7", 10,
1757 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1758 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1759 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1760 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1761 0x00A0, 0x2018, 0x2019, 0x00A3, 0xFFFD, 0xFFFD, 0x00A6, 0x00A7,
1762 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0xFFFD, 0x2015,
1763 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7,
1764 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
1765 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
1766 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
1767 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
1768 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
1769 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
1770 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
1771 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
1772 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} },
1773 { "ISO-8859-8-I", "ISO 8859-8-I", 85,
1774 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1775 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1776 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1777 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1778 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1779 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x203E,
1780 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1781 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD,
1782 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1783 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1784 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1785 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017,
1786 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
1787 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
1788 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
1789 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
1790 { "ISO-8859-9", "ISO 8859-9", 12,
1791 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1792 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1793 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1794 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1795 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1796 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
1797 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1798 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
1799 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
1800 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1801 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
1802 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
1803 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
1804 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1805 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
1806 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} },
1807 { "ISO-8859-10", "ISO 8859-10", 13,
1808 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1809 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1810 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1811 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1812 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7,
1813 0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A,
1814 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7,
1815 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B,
1816 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
1817 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF,
1818 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168,
1819 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
1820 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
1821 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF,
1822 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169,
1823 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138} },
1824 { "ISO-8859-13", "ISO 8859-13", 109,
1825 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1826 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1827 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1828 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1829 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7,
1830 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
1831 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7,
1832 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
1833 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
1834 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
1835 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
1836 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
1837 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
1838 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
1839 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
1840 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019} },
1841 { "ISO-8859-14", "ISO 8859-14", 110,
1842 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1843 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1844 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1845 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1846 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7,
1847 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178,
1848 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56,
1849 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61,
1850 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
1851 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1852 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A,
1853 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF,
1854 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
1855 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1856 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B,
1857 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF} },
1858
1859 // next bits generated again from tables on the Unicode 3.0 CD.
1860
1861 // $ for a in CP* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ) | sort | sed -e 's/#UNDEF.*$/0xFFFD/' | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done
1862
1863 { "CP 850", "IBM 850", 2009,
1864 { 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
1865 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
1866 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
1867 0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
1868 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
1869 0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
1870 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
1871 0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
1872 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
1873 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4,
1874 0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
1875 0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
1876 0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
1877 0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
1878 0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
1879 0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0} },
1880 { "CP 874", "CP 874", 0, //### what is the mib?
1881 { 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD,
1882 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1883 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1884 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1885 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
1886 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
1887 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
1888 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
1889 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
1890 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
1891 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
1892 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,
1893 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
1894 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
1895 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
1896 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
1897 { "IBM 866", "IBM 866", 2086,
1898 { 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
1899 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
1900 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
1901 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
1902 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
1903 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
1904 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556,
1905 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510,
1906 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F,
1907 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567,
1908 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B,
1909 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580,
1910 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
1911 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
1912 0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E,
1913 0x00B0, 0x2219, 0x00B7, 0x221A, 0x2116, 0x00A4, 0x25A0, 0x00A0} },
1914
1915 { "windows-1250", "CP 1250", 2250,
1916 { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,
1917 0xFFFD, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179,
1918 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1919 0xFFFD, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A,
1920 0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7,
1921 0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B,
1922 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1923 0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C,
1924 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
1925 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
1926 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
1927 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
1928 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
1929 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
1930 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
1931 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} },
1932 { "windows-1251", "CP 1251", 2251,
1933 { 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021,
1934 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F,
1935 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1936 0xFFFD, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F,
1937 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7,
1938 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407,
1939 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7,
1940 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457,
1941 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
1942 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
1943 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
1944 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
1945 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
1946 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
1947 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
1948 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F} },
1949 { "windows-1252", "CP 1252", 2252,
1950 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
1951 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,
1952 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1953 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178,
1954 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1955 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
1956 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1957 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
1958 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
1959 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1960 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
1961 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
1962 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
1963 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1964 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
1965 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },
1966 { "windows-1253", "CP 1253", 2253,
1967 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
1968 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1969 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1970 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1971 0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1972 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015,
1973 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7,
1974 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
1975 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
1976 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
1977 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
1978 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
1979 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
1980 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
1981 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
1982 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} },
1983 { "windows-1254", "CP 1254", 2254,
1984 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
1985 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
1986 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1987 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
1988 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1989 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
1990 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1991 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
1992 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
1993 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1994 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
1995 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
1996 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
1997 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1998 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
1999 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} },
2000 { "windows-1255", "CP 1255", 2255,
2001 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
2002 0x02C6, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2003 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
2004 0x02DC, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2005 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7,
2006 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
2007 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
2008 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
2009 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7,
2010 0x05B8, 0x05B9, 0xFFFD, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
2011 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3,
2012 0x05F4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2013 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
2014 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
2015 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
2016 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD} },
2017 { "windows-1256", "CP 1256", 2256,
2018 { 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
2019 0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688,
2020 0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
2021 0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA,
2022 0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
2023 0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
2024 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
2025 0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F,
2026 0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2027 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
2028 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7,
2029 0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643,
2030 0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7,
2031 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A, 0x00EE, 0x00EF,
2032 0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7,
2033 0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2} },
2034 { "windows-1257", "CP 1257", 2257,
2035 { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,
2036 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0x00A8, 0x02C7, 0x00B8,
2037 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
2038 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0x00AF, 0x02DB, 0xFFFD,
2039 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0xFFFD, 0x00A6, 0x00A7,
2040 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
2041 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
2042 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
2043 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
2044 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
2045 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
2046 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
2047 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
2048 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
2049 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
2050 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9} },
2051 { "windows-1258", "CP 1258", 2258,
2052 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
2053 0x02C6, 0x2030, 0xFFFD, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
2054 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
2055 0x02DC, 0x2122, 0xFFFD, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
2056 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
2057 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
2058 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
2059 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
2060 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
2061 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300, 0x00CD, 0x00CE, 0x00CF,
2062 0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7,
2063 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF,
2064 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
2065 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED, 0x00EE, 0x00EF,
2066 0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7,
2067 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF} },
2068
2069 { "Apple Roman", "Apple Roman", 0,
2070 { 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1,
2071 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8,
2072 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3,
2073 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC,
2074 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF,
2075 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8,
2076 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211,
2077 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8,
2078 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB,
2079 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153,
2080 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA,
2081 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02,
2082 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1,
2083 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4,
2084 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC,
2085 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7} },
2086
2087
2088
2089 // This one is based on the charmap file
2090 // /usr/share/i18n/charmaps/SAMI-WS2.gz, which is manually adapted
2091 // to this format by BÞrre Gaup <boerre@subdimension.com>
2092 { "WINSAMI2", "WS2", 0,
2093 { 0x20AC, 0xFFFD, 0x010C, 0x0192, 0x010D, 0x01B7, 0x0292, 0x01EE,
2094 0x01EF, 0x0110, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
2095 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
2096 0x0111, 0x01E6, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
2097 0x00A0, 0x01E7, 0x01E4, 0x00A3, 0x00A4, 0x01E5, 0x00A6, 0x00A7,
2098 0x00A8, 0x00A9, 0x021E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x021F,
2099 0x00B0, 0x00B1, 0x01E8, 0x01E9, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
2100 0x014A, 0x014B, 0x0166, 0x00BB, 0x0167, 0x00BD, 0x017D, 0x017E,
2101 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
2102 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
2103 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
2104 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
2105 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
2106 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
2107 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
2108 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },
2109
2110
2111 // this one is generated from the charmap file located in /usr/share/i18n/charmaps
2112 // on most Linux distributions. The thai character set tis620 is byte by byte equivalent
2113 // to iso8859-11, so we name it 8859-11 here, but recognise the name tis620 too.
2114
2115 // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; ( cut -c25- < TIS-620 ; cat /tmp/digits ) | awk '/^x[89ABCDEF]/{ print $1, $2 }' | sed -e 's/<U/0x/' -e 's/>//' | sort | uniq -w4 | cut -c5- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/tis-620
2116 { "TIS-620", "ISO 8859-11", 2259, // Thai character set mib enum taken from tis620 (which is byte by byte equivalent)
2117 { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2118 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2119 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2120 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2121 0xFFFD, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
2122 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
2123 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
2124 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
2125 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
2126 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
2127 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
2128 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,
2129 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
2130 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
2131 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
2132 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD } },
2133
2134 /*
2135 Name: hp-roman8 [HP-PCL5,RFC1345,KXS2]
2136 MIBenum: 2004
2137 Source: LaserJet IIP Printer User's Manual,
2138 HP part no 33471-90901, Hewlet-Packard, June 1989.
2139 Alias: roman8
2140 Alias: r8
2141 Alias: csHPRoman8
2142 */
2143 { "Roman8", "HP-Roman8", 2004,
2144 { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2145 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2146 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2147 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2148 0x00A0, 0x00C0, 0x00C2, 0x00C8, 0x00CA, 0x00CB, 0x00CE, 0x00CF,
2149 0x00B4, 0x02CB, 0x02C6, 0x00A8, 0x02DC, 0x00D9, 0x00DB, 0x20A4,
2150 0x00AF, 0x00DD, 0x00FD, 0x00B0, 0x00C7, 0x00E7, 0x00D1, 0x00F1,
2151 0x00A1, 0x00BF, 0x00A4, 0x00A3, 0x00A5, 0x00A7, 0x0192, 0x00A2,
2152 0x00E2, 0x00EA, 0x00F4, 0x00FB, 0x00E1, 0x00E9, 0x00F3, 0x00FA,
2153 0x00E0, 0x00E8, 0x00F2, 0x00F9, 0x00E4, 0x00EB, 0x00F6, 0x00FC,
2154 0x00C5, 0x00EE, 0x00D8, 0x00C6, 0x00E5, 0x00ED, 0x00F8, 0x00E6,
2155 0x00C4, 0x00EC, 0x00D6, 0x00DC, 0x00C9, 0x00EF, 0x00DF, 0x00D4,
2156 0x00C1, 0x00C3, 0x00E3, 0x00D0, 0x00F0, 0x00CD, 0x00CC, 0x00D3,
2157 0x00D2, 0x00D5, 0x00F5, 0x0160, 0x0161, 0x00DA, 0x0178, 0x00FF,
2158 0x00DE, 0x00FE, 0x00B7, 0x00B5, 0x00B6, 0x00BE, 0x2014, 0x00BC,
2159 0x00BD, 0x00AA, 0x00BA, 0x00AB, 0x25A0, 0x00BB, 0x00B1, 0xFFFD } }
2160
2161 // if you add more chacater sets at the end, change LAST_MIB above
2162};
2163
2164QSimpleTextCodec::QSimpleTextCodec( int i )
2165 : QTextCodec(), forwardIndex( i )
2166{
2167#ifndef Q_WS_QWS
2168 reverseMap = 0;
2169#endif
2170}
2171
2172
2173QSimpleTextCodec::~QSimpleTextCodec()
2174{
2175#ifndef Q_WS_QWS
2176 delete reverseMap;
2177#else
2178 if ( reverseOwner == this ) {
2179 delete reverseMap;
2180 reverseMap = 0;
2181 reverseOwner = 0;
2182 }
2183#endif
2184}
2185
2186void QSimpleTextCodec::buildReverseMap()
2187{
2188#ifdef Q_WS_QWS
2189 if ( reverseOwner != this ) {
2190 int m = 0;
2191 int i = 0;
2192 while( i < 128 ) {
2193 if ( unicodevalues[forwardIndex].values[i] > m &&
2194 unicodevalues[forwardIndex].values[i] < 0xfffd )
2195 m = unicodevalues[forwardIndex].values[i];
2196 i++;
2197 }
2198 m++;
2199 if ( !reverseMap )
2200 reverseMap = new QMemArray<unsigned char>( m );
2201 if ( m > (int)(reverseMap->size()) )
2202 reverseMap->resize( m );
2203 for( i = 0; i < 128 && i < m; i++ )
2204 (*reverseMap)[i] = (char)i;
2205 for( ;i < m; i++ )
2206 (*reverseMap)[i] = 0;
2207 for( i=128; i<256; i++ ) {
2208 int u = unicodevalues[forwardIndex].values[i-128];
2209 if ( u < m )
2210 (*reverseMap)[u] = (char)(unsigned char)(i);
2211 }
2212 reverseOwner = this;
2213 }
2214#else
2215 if ( !reverseMap ) {
2216 QMemArray<unsigned char> **map = &((QSimpleTextCodec *)this)->reverseMap;
2217 int m = 0;
2218 int i = 0;
2219 while( i < 128 ) {
2220 if ( unicodevalues[forwardIndex].values[i] > m &&
2221 unicodevalues[forwardIndex].values[i] < 0xfffd )
2222 m = unicodevalues[forwardIndex].values[i];
2223 i++;
2224 }
2225 m++;
2226 *map = new QMemArray<unsigned char>( m );
2227 for( i = 0; i < 128 && i < m; i++ )
2228 (**map)[i] = (char)i;
2229 for( ;i < m; i++ )
2230 (**map)[i] = 0;
2231 for( i=128; i<256; i++ ) {
2232 int u = unicodevalues[forwardIndex].values[i-128];
2233 if ( u < m )
2234 (**map)[u] = (char)(unsigned char)(i);
2235 }
2236 }
2237#endif
2238}
2239
2240QString QSimpleTextCodec::toUnicode(const char* chars, int len) const
2241{
2242 if ( len <= 0 || chars == 0 )
2243 return QString::null;
2244
2245 const unsigned char * c = (const unsigned char *)chars;
2246 int i;
2247
2248 for ( i = 0; i < len; i++ )
2249 if ( c[i] == '\0' ) {
2250 len = i;
2251 break;
2252 }
2253
2254 QString r;
2255 r.setUnicode(0, len);
2256 QChar* uc = (QChar*)r.unicode(); // const_cast
2257
2258 for ( i = 0; i < len; i++ ) {
2259 if ( c[i] > 127 )
2260 uc[i] = unicodevalues[forwardIndex].values[c[i]-128];
2261 else
2262 uc[i] = c[i];
2263 }
2264 return r;
2265}
2266
2267
2268QCString QSimpleTextCodec::fromUnicode(const QString& uc, int& len ) const
2269{
2270#ifdef Q_WS_QWS
2271 if ( this != reverseOwner )
2272#else
2273 if ( !reverseMap )
2274#endif
2275 ((QSimpleTextCodec *)this)->buildReverseMap();
2276
2277 if ( len <0 || len > (int)uc.length() )
2278 len = uc.length();
2279 QCString r( len+1 );
2280 int i = len;
2281 int u;
2282 const QChar* ucp = uc.unicode();
2283 unsigned char* rp = (unsigned char *)r.data();
2284 unsigned char* rmp = reverseMap->data();
2285 int rmsize = (int) reverseMap->size();
2286 while( i-- )
2287 {
2288 u = ucp->unicode();
2289 *rp = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : '?' );
2290 if ( *rp == 0 ) *rp = '?';
2291 rp++;
2292 ucp++;
2293 }
2294 r[len] = 0;
2295 return r;
2296}
2297
2298void QSimpleTextCodec::fromUnicode( const QChar *in, unsigned short *out, int length ) const
2299{
2300#ifdef Q_WS_QWS
2301 if ( this != reverseOwner )
2302#else
2303 if ( !reverseMap )
2304#endif
2305 ((QSimpleTextCodec *)this)->buildReverseMap();
2306
2307 unsigned char* rmp = reverseMap->data();
2308 int rmsize = (int) reverseMap->size();
2309 while ( length-- ) {
2310 unsigned short u = in->unicode();
2311 *out = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : 0 );
2312 ++in;
2313 ++out;
2314 }
2315}
2316
2317unsigned short QSimpleTextCodec::characterFromUnicode(const QString &str, int pos) const
2318{
2319#ifdef Q_WS_QWS
2320 if ( this != reverseOwner )
2321#else
2322 if ( !reverseMap )
2323#endif
2324 ((QSimpleTextCodec *)this)->buildReverseMap();
2325
2326 unsigned short u = str[pos].unicode();
2327 unsigned char* rmp = reverseMap->data();
2328 int rmsize = (int) reverseMap->size();
2329 return u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : 0 );
2330}
2331
2332bool QSimpleTextCodec::canEncode( QChar ch ) const
2333{
2334#ifdef Q_WS_QWS
2335 if ( this != reverseOwner )
2336#else
2337 if ( !reverseMap )
2338#endif
2339 ((QSimpleTextCodec *)this)->buildReverseMap();
2340
2341 unsigned short u = ch.unicode();
2342 unsigned char* rmp = reverseMap->data();
2343 int rmsize = (int) reverseMap->size();
2344 return u < 128 ? TRUE : (( u < rmsize ) ? (*(rmp+u) != 0) : FALSE );
2345}
2346
2347const char* QSimpleTextCodec::name() const
2348{
2349 return unicodevalues[forwardIndex].cs;
2350}
2351
2352const char* QSimpleTextCodec::mimeName() const
2353{
2354 return unicodevalues[forwardIndex].mime;
2355}
2356
2357
2358int QSimpleTextCodec::mibEnum() const
2359{
2360 return unicodevalues[forwardIndex].mib;
2361}
2362
2363int QSimpleTextCodec::heuristicNameMatch(const char* hint) const
2364{
2365 if ( qstricmp( hint, mimeName() ) == 0 )
2366 return 10000; // return a large value
2367 if ( hint[0]=='k' ) {
2368 QCString lhint = QCString(hint).lower();
2369 // Help people with messy fonts
2370 if ( lhint == "koi8-1" )
2371 return QTextCodec::heuristicNameMatch("koi8-r")-1;
2372 if ( lhint == "koi8-ru" )
2373 return QTextCodec::heuristicNameMatch("koi8-r")-1;
2374 } else if ( hint[0] == 't' && mibEnum() == 2259 /* iso8859-11 */ ) {
2375 // 8859-11 and tis620 are byte by byte equivalent
2376 int i = simpleHeuristicNameMatch("tis620-0", hint);
2377 if( !i )
2378 i = simpleHeuristicNameMatch("tis-620", hint);
2379 if( i ) return i;
2380 } else if ( mibEnum() == 82 /* ISO 8859-6 */ ) {
2381 int i = simpleHeuristicNameMatch("ISO 8859-6-I", hint);
2382 if ( i )
2383 return i;
2384 }
2385 return QTextCodec::heuristicNameMatch(hint);
2386}
2387
2388int QSimpleTextCodec::heuristicContentMatch(const char* chars, int len) const
2389{
2390 if ( len<1 || !chars )
2391 return -1;
2392 int i = 0;
2393 const uchar * c = (const unsigned char *)chars;
2394 int r = 0;
2395 while( i<len && c && *c ) {
2396 if ( *c >= 128 ) {
2397 if ( unicodevalues[forwardIndex].values[(*c)-128] == 0xfffd )
2398 return -1;
2399 }
2400 if ( (*c >= ' ' && *c < 127) ||
2401 *c == '\n' || *c == '\t' || *c == '\r' )
2402 r++;
2403 i++;
2404 c++;
2405 }
2406 if ( mibEnum()==4 )
2407 r+=1;
2408 return r;
2409}
2410
2411#endif
2412
2413class QLatin1Codec : public QTextCodec
2414{
2415public:
2416#if !defined(Q_NO_USING_KEYWORD)
2417 using QTextCodec::fromUnicode;
2418 using QTextCodec::toUnicode;
2419#endif
2420 QString toUnicode(const char* chars, int len) const;
2421 QCString fromUnicode(const QString& uc, int& lenInOut ) const;
2422 void fromUnicode( const QChar *in, unsigned short *out, int length ) const;
2423 unsigned short characterFromUnicode(const QString &str, int pos) const;
2424
2425 const char* name() const;
2426 const char* mimeName() const;
2427 int mibEnum() const;
2428
2429 int heuristicContentMatch(const char* chars, int len) const;
2430
2431private:
2432 int forwardIndex;
2433};
2434
2435
2436QString QLatin1Codec::toUnicode(const char* chars, int len) const
2437{
2438 if ( chars == 0 )
2439 return QString::null;
2440
2441 return QString::fromLatin1(chars, len);
2442}
2443
2444
2445QCString QLatin1Codec::fromUnicode(const QString& uc, int& len ) const
2446{
2447 if ( len <0 || len > (int)uc.length() )
2448 len = uc.length();
2449 QCString r( len+1 );
2450 char *d = r.data();
2451 int i = 0;
2452 const QChar *ch = uc.unicode();
2453 while ( i < len ) {
2454 d[i] = ch->row() ? '?' : ch->cell();
2455 i++;
2456 ch++;
2457 }
2458 r[len] = 0;
2459 return r;
2460}
2461
2462void QLatin1Codec::fromUnicode( const QChar *in, unsigned short *out, int length ) const
2463{
2464 while ( length-- ) {
2465 *out = in->row() ? 0 : in->cell();
2466 ++in;
2467 ++out;
2468 }
2469}
2470
2471unsigned short QLatin1Codec::characterFromUnicode(const QString &str, int pos) const
2472{
2473 const QChar *ch = str.unicode() + pos;
2474 if (ch->row())
2475 return 0;
2476 return (unsigned short) ch->cell();
2477}
2478
2479
2480const char* QLatin1Codec::name() const
2481{
2482 return "ISO 8859-1";
2483}
2484
2485const char* QLatin1Codec::mimeName() const
2486{
2487 return "ISO-8859-1";
2488}
2489
2490
2491int QLatin1Codec::mibEnum() const
2492{
2493 return 4;
2494}
2495
2496int QLatin1Codec::heuristicContentMatch(const char* chars, int len) const
2497{
2498 if ( len<1 || !chars )
2499 return -1;
2500 int i = 0;
2501 const uchar * c = (const unsigned char *)chars;
2502 int r = 0;
2503 while( i<len && c && *c ) {
2504 if ( *c >= 0x80 && *c < 0xa0 )
2505 return -1;
2506 if ( (*c >= ' ' && *c < 127) ||
2507 *c == '\n' || *c == '\t' || *c == '\r' )
2508 r++;
2509 i++;
2510 c++;
2511 }
2512 if ( this == (const QTextCodec *)codecForLocale() )
2513 r += 5;
2514 return r;
2515}
2516
2517class QLatin15Codec: public QLatin1Codec
2518{
2519public:
2520 QString toUnicode(const char* chars, int len) const;
2521#if !defined(Q_NO_USING_KEYWORD)
2522 using QTextCodec::fromUnicode;
2523#endif
2524 QCString fromUnicode(const QString& uc, int& lenInOut ) const;
2525 void fromUnicode( const QChar *in, unsigned short *out, int length ) const;
2526 unsigned short characterFromUnicode(const QString &str, int pos) const;
2527
2528 const char* name() const;
2529 const char* mimeName() const;
2530 int mibEnum() const;
2531
2532private:
2533 int forwardIndex;
2534};
2535
2536
2537QString QLatin15Codec::toUnicode(const char* chars, int len) const
2538{
2539 if ( chars == 0 )
2540 return QString::null;
2541
2542 QString str = QString::fromLatin1(chars, len);
2543 QChar *uc = (QChar *)str.unicode();
2544 while( len-- ) {
2545 switch( uc->unicode() ) {
2546 case 0xa4:
2547 *uc = 0x20ac;
2548 break;
2549 case 0xa6:
2550 *uc = 0x0160;
2551 break;
2552 case 0xa8:
2553 *uc = 0x0161;
2554 break;
2555 case 0xb4:
2556 *uc = 0x017d;
2557 break;
2558 case 0xb8:
2559 *uc = 0x017e;
2560 break;
2561 case 0xbc:
2562 *uc = 0x0152;
2563 break;
2564 case 0xbd:
2565 *uc = 0x0153;
2566 break;
2567 case 0xbe:
2568 *uc = 0x0178;
2569 break;
2570 default:
2571 break;
2572 }
2573 uc++;
2574 }
2575 return str;
2576}
2577
2578static inline unsigned char
2579latin15CharFromUnicode( unsigned short uc, bool replacement = TRUE )
2580{
2581 uchar c;
2582 if ( uc < 0x0100 ) {
2583 if ( uc > 0xa3 && uc < 0xbf ) {
2584 switch( uc ) {
2585 case 0xa4:
2586 case 0xa6:
2587 case 0xa8:
2588 case 0xb4:
2589 case 0xb8:
2590 case 0xbc:
2591 case 0xbd:
2592 case 0xbe:
2593 c = replacement ? '?' : 0;
2594 break;
2595 default:
2596 c = (unsigned char) uc;
2597 break;
2598 }
2599 } else {
2600 c = (unsigned char) uc;
2601 }
2602 } else {
2603 if ( uc == 0x20ac )
2604 c = 0xa4;
2605 else if ( (uc & 0xff00) == 0x0100 ) {
2606 switch( uc ) {
2607 case 0x0160:
2608 c = 0xa6;
2609 break;
2610 case 0x0161:
2611 c = 0xa8;
2612 break;
2613 case 0x017d:
2614 c = 0xb4;
2615 break;
2616 case 0x017e:
2617 c = 0xb8;
2618 break;
2619 case 0x0152:
2620 c = 0xbc;
2621 break;
2622 case 0x0153:
2623 c = 0xbd;
2624 break;
2625 case 0x0178:
2626 c = 0xbe;
2627 break;
2628 default:
2629 c = replacement ? '?' : 0;
2630 }
2631 } else {
2632 c = replacement ? '?' : 0;
2633 }
2634 }
2635 return c;
2636}
2637
2638
2639void QLatin15Codec::fromUnicode( const QChar *in, unsigned short *out, int length ) const
2640{
2641 while ( length-- ) {
2642 *out = latin15CharFromUnicode( in->unicode(), FALSE );
2643 ++in;
2644 ++out;
2645 }
2646}
2647
2648
2649QCString QLatin15Codec::fromUnicode(const QString& uc, int& len ) const
2650{
2651 if ( len <0 || len > (int)uc.length() )
2652 len = uc.length();
2653 QCString r( len+1 );
2654 char *d = r.data();
2655 int i = 0;
2656 const QChar *ch = uc.unicode();
2657 while ( i < len ) {
2658 d[i] = latin15CharFromUnicode( ch->unicode() );
2659 i++;
2660 ch++;
2661 }
2662 r[len] = 0;
2663 return r;
2664}
2665
2666unsigned short QLatin15Codec::characterFromUnicode(const QString &str, int pos) const
2667{
2668 return latin15CharFromUnicode( str.unicode()[pos].unicode(), FALSE );
2669}
2670
2671
2672const char* QLatin15Codec::name() const
2673{
2674 return "ISO 8859-15";
2675}
2676
2677const char* QLatin15Codec::mimeName() const
2678{
2679 return "ISO-8859-15";
2680}
2681
2682
2683int QLatin15Codec::mibEnum() const
2684{
2685 return 111;
2686}
2687
2688
2689/* the next two functions are implicitely thread safe,
2690 as they are only called by setup() which uses a mutex.
2691*/
2692static void setupLocaleMapper()
2693{
2694#ifdef Q_OS_WIN32
2695 localeMapper = QTextCodec::codecForName( "System" );
2696#else
2697
2698#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF)
2699 char *charset = nl_langinfo (CODESET);
2700 if ( charset )
2701 localeMapper = QTextCodec::codecForName( charset );
2702#endif
2703
2704 if ( !localeMapper ) {
2705 // Very poorly defined and followed standards causes lots of code
2706 // to try to get all the cases...
2707
2708 // Try to determine locale codeset from locale name assigned to
2709 // LC_CTYPE category.
2710
2711 // First part is getting that locale name. First try setlocale() which
2712 // definitely knows it, but since we cannot fully trust it, get ready
2713 // to fall back to environment variables.
2714 char * ctype = qstrdup( setlocale( LC_CTYPE, 0 ) );
2715
2716 // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG
2717 // environment variables.
2718 char * lang = qstrdup( getenv("LC_ALL") );
2719 if ( !lang || lang[0] == 0 || strcmp( lang, "C" ) == 0 ) {
2720 if ( lang ) delete [] lang;
2721 lang = qstrdup( getenv("LC_CTYPE") );
2722 }
2723 if ( !lang || lang[0] == 0 || strcmp( lang, "C" ) == 0 ) {
2724 if ( lang ) delete [] lang;
2725 lang = qstrdup( getenv("LANG") );
2726 }
2727
2728 // Now try these in order:
2729 // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
2730 // 2. CODESET from lang if it contains a .CODESET part
2731 // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
2732 // 4. locale (ditto)
2733 // 5. check for "@euro"
2734 // 6. guess locale from ctype unless ctype is "C"
2735 // 7. guess locale from lang
2736
2737 // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
2738 char * codeset = ctype ? strchr( ctype, '.' ) : 0;
2739 if ( codeset && *codeset == '.' )
2740 localeMapper = QTextCodec::codecForName( codeset + 1 );
2741
2742 // 2. CODESET from lang if it contains a .CODESET part
2743 codeset = lang ? strchr( lang, '.' ) : 0;
2744 if ( !localeMapper && codeset && *codeset == '.' )
2745 localeMapper = QTextCodec::codecForName( codeset + 1 );
2746
2747 // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
2748 if ( !localeMapper && ctype && *ctype != 0 && strcmp (ctype, "C") != 0 )
2749 localeMapper = QTextCodec::codecForName( ctype );
2750
2751 // 4. locale (ditto)
2752 if ( !localeMapper && lang && *lang != 0 )
2753 localeMapper = QTextCodec::codecForName( lang );
2754
2755 // 5. "@euro"
2756 if ( ctype && strstr( ctype, "@euro" ) || lang && strstr( lang, "@euro" ) )
2757 localeMapper = QTextCodec::codecForName( "ISO 8859-15" );
2758
2759 // 6. guess locale from ctype unless ctype is "C"
2760 // 7. guess locale from lang
2761 char * try_by_name = ctype;
2762 if ( ctype && *ctype != 0 && strcmp (ctype, "C") != 0 )
2763 try_by_name = lang;
2764
2765 // Now do the guessing.
2766 if ( lang && *lang && !localeMapper && try_by_name && *try_by_name ) {
2767 if ( try_locale_list( iso8859_15locales, lang ) )
2768 localeMapper = QTextCodec::codecForName( "ISO 8859-15" );
2769 else if ( try_locale_list( iso8859_2locales, lang ) )
2770 localeMapper = QTextCodec::codecForName( "ISO 8859-2" );
2771 else if ( try_locale_list( iso8859_3locales, lang ) )
2772 localeMapper = QTextCodec::codecForName( "ISO 8859-3" );
2773 else if ( try_locale_list( iso8859_4locales, lang ) )
2774 localeMapper = QTextCodec::codecForName( "ISO 8859-4" );
2775 else if ( try_locale_list( iso8859_5locales, lang ) )
2776 localeMapper = QTextCodec::codecForName( "ISO 8859-5" );
2777 else if ( try_locale_list( iso8859_6locales, lang ) )
2778 localeMapper = QTextCodec::codecForName( "ISO 8859-6" );
2779 else if ( try_locale_list( iso8859_7locales, lang ) )
2780 localeMapper = QTextCodec::codecForName( "ISO 8859-7" );
2781 else if ( try_locale_list( iso8859_8locales, lang ) )
2782 localeMapper = QTextCodec::codecForName( "ISO 8859-8-I" );
2783 else if ( try_locale_list( iso8859_9locales, lang ) )
2784 localeMapper = QTextCodec::codecForName( "ISO 8859-9" );
2785 else if ( try_locale_list( iso8859_13locales, lang ) )
2786 localeMapper = QTextCodec::codecForName( "ISO 8859-13" );
2787 else if ( try_locale_list( tis_620locales, lang ) )
2788 localeMapper = QTextCodec::codecForName( "ISO 8859-11" );
2789 else if ( try_locale_list( koi8_ulocales, lang ) )
2790 localeMapper = QTextCodec::codecForName( "KOI8-U" );
2791 else if ( try_locale_list( cp_1251locales, lang ) )
2792 localeMapper = QTextCodec::codecForName( "CP 1251" );
2793 else if ( try_locale_list( pt_154locales, lang ) )
2794 localeMapper = QTextCodec::codecForName( "PT 154" );
2795 else if ( try_locale_list( probably_koi8_rlocales, lang ) )
2796 localeMapper = ru_RU_hack( lang );
2797 }
2798
2799 delete [] ctype;
2800 delete [] lang;
2801 }
2802 if ( localeMapper && localeMapper->mibEnum() == 11 )
2803 localeMapper = QTextCodec::codecForName( "ISO 8859-8-I" );
2804
2805 // If everything failed, we default to 8859-1
2806 // We could perhaps default to 8859-15.
2807 if ( !localeMapper )
2808 localeMapper = QTextCodec::codecForName( "ISO 8859-1" );
2809#endif
2810}
2811
2812
2813static void realSetup()
2814{
2815#if defined(QT_CHECK_STATE)
2816 if ( destroying_is_ok )
2817 qWarning( "QTextCodec: creating new codec during codec cleanup!" );
2818#endif
2819 all = new QValueList<QTextCodec*>;
2820
2821 (void)new QLatin1Codec;
2822 (void)new QLatin15Codec;
2823 (void)new QUtf8Codec;
2824 (void)new QUtf16Codec;
2825
2826#ifndef QT_NO_CODECS
2827 int i = 0;
2828 do {
2829 (void)new QSimpleTextCodec( i );
2830 } while( unicodevalues[i++].mib != LAST_MIB );
2831
2832 (void)new QTsciiCodec;
2833
2834 for (i = 0; i < 9; ++i) {
2835 (void)new QIsciiCodec(i);
2836 }
2837#endif // QT_NO_CODECS
2838#ifndef QT_NO_CODEC_HEBREW
2839 (void)new QHebrewCodec;
2840#endif
2841#ifndef QT_NO_BIG_CODECS
2842 (void)new QBig5Codec;
2843 (void)new QBig5hkscsCodec;
2844 (void)new QEucJpCodec;
2845 (void)new QEucKrCodec;
2846 (void)new QGb2312Codec;
2847 (void)new QGbkCodec;
2848 (void)new QGb18030Codec;
2849 (void)new QJisCodec;
2850 (void)new QSjisCodec;
2851#endif // QT_NO_BIG_CODECS
2852
2853#ifdef Q_OS_WIN32
2854 (void) new QWindowsLocalCodec;
2855#endif // Q_OS_WIN32
2856
2857 if ( !localeMapper )
2858 setupLocaleMapper();
2859}
2860
2861void QTextCodec::fromUnicodeInternal( const QChar *in, unsigned short *out, int length )
2862{
2863 switch( mibEnum() ) {
2864#ifndef QT_NO_CODECS
2865 case 2084:
2866 case 2088:
2867 case 5:
2868 case 6:
2869 case 7:
2870 case 8:
2871 case 82:
2872 case 10:
2873 case 85:
2874 case 12:
2875 case 13:
2876 case 109:
2877 case 110:
2878 case 2004:
2879 case 2009:
2880 case 2086:
2881 case 2250:
2882 case 2251:
2883 case 2252:
2884 case 2253:
2885 case 2254:
2886 case 2255:
2887 case 2256:
2888 case 2257:
2889 case 2258:
2890 case 2259:
2891 ((QSimpleTextCodec *)this)->fromUnicode( in, out, length );
2892 break;
2893
2894#if !defined(QT_NO_BIG_CODECS) && defined(Q_WS_X11)
2895 // the QFont*Codecs are only used on X11
2896
2897 case 15:
2898 ((QFontJis0201Codec *) this)->fromUnicode( in, out, length );
2899 break;
2900
2901 case 63:
2902 ((QFontJis0208Codec *) this)->fromUnicode( in, out, length );
2903 break;
2904
2905 case 36:
2906 ((QFontKsc5601Codec *) this)->fromUnicode( in, out, length );
2907 break;
2908
2909 case 57:
2910 ((QFontGb2312Codec *) this)->fromUnicode( in, out, length );
2911 break;
2912
2913 case -113:
2914 ((QFontGbkCodec *) this)->fromUnicode( in, out, length );
2915 break;
2916
2917 case -114:
2918 ((QFontGb18030_0Codec *) this)->fromUnicode( in, out, length );
2919 break;
2920
2921 case -2026:
2922 ((QFontBig5Codec *) this)->fromUnicode( in, out, length );
2923 break;
2924
2925 case -2101:
2926 ((QFontBig5hkscsCodec *) this)->fromUnicode( in, out, length );
2927 break;
2928
2929 case -4242:
2930 ((QFontLaoCodec *) this)->fromUnicode( in, out, length );
2931 break;
2932#endif
2933#endif // QT_NO_CODECS
2934
2935 case 4:
2936 ((QLatin1Codec *) this)->fromUnicode( in, out, length );
2937 break;
2938
2939 case 111:
2940 ((QLatin15Codec *) this)->fromUnicode( in, out, length );
2941 break;
2942
2943 default:
2944 {
2945 QConstString string( in, length );
2946 QString str = string.string();
2947 for ( int i = 0; i < length; i++ )
2948 out[i] = characterFromUnicode( str, i );
2949 }
2950 }
2951}
2952
2953
2954/*!
2955 \fn QTextCodec* QTextCodec::codecForTr()
2956
2957 Returns the codec used by QObject::tr() on its argument. If this
2958 function returns 0 (the default), tr() assumes Latin-1.
2959
2960 \sa setCodecForTr()
2961*/
2962
2963/*!
2964 \fn void QTextCodec::setCodecForTr(QTextCodec *c)
2965 \nonreentrant
2966
2967 Sets the codec used by QObject::tr() on its argument to \a c. If
2968 \a c is 0 (the default), tr() assumes Latin-1.
2969
2970 If the literal quoted text in the program is not in the Latin-1
2971 encoding, this function can be used to set the appropriate
2972 encoding. For example, software developed by Korean programmers
2973 might use eucKR for all the text in the program, in which case the
2974 main() function might look like this:
2975
2976 \code
2977 int main(int argc, char** argv)
2978 {
2979 QApplication app(argc, argv);
2980 ... install any additional codecs ...
2981 QTextCodec::setCodecForTr( QTextCodec::codecForName("eucKR") );
2982 ...
2983 }
2984 \endcode
2985
2986 Note that this is not the way to select the encoding that the \e
2987 user has chosen. For example, to convert an application containing
2988 literal English strings to Korean, all that is needed is for the
2989 English strings to be passed through tr() and for translation
2990 files to be loaded. For details of internationalization, see the
2991 \link i18n.html Qt internationalization documentation\endlink.
2992
2993 \sa codecForTr(), setCodecForTr(), setCodecForCStrings()
2994*/
2995
2996
2997/*!
2998 \fn QTextCodec* QTextCodec::codecForCStrings()
2999
3000 Returns the codec used by QString to convert to and from const
3001 char* and QCStrings. If this function returns 0 (the default),
3002 QString assumes Latin-1.
3003
3004 \sa setCodecForCStrings()
3005*/
3006
3007/*!
3008 \fn void QTextCodec::setCodecForCStrings(QTextCodec *c)
3009 \nonreentrant
3010
3011 Sets the codec used by QString to convert to and from const char*
3012 and QCStrings. If \a c is 0 (the default), QString assumes Latin-1.
3013
3014 \warning Some codecs do not preserve the characters in the ascii
3015 range (0x00 to 0x7f). For example, the Japanese Shift-JIS
3016 encoding maps the backslash character (0x5a) to the Yen character.
3017 This leads to unexpected results when using the backslash
3018 character to escape characters in strings used in e.g. regular
3019 expressions. Use QString::fromLatin1() to preserve characters in
3020 the ascii range when needed.
3021
3022 \sa codecForCStrings(), setCodecForTr(), setCodecForCStrings()
3023*/
3024
3025
3026QTextCodec *QTextCodec::cftr = 0;
3027QTextCodec *QTextCodec::cfcs = 0;
3028
3029
3030#endif // QT_NO_TEXTCODEC
Note: See TracBrowser for help on using the repository browser.