source: trunk/src/codecs/qtextcodec.cpp@ 8

Last change on this file since 8 was 8, checked in by dmik, 20 years ago

Transferred Qt for OS/2 version 3.3.1-rc5 sources from the CVS

  • Property svn:keywords set to Id
File size: 102.5 KB
Line 
1/****************************************************************************
2** $Id: qtextcodec.cpp 8 2005-11-16 19:36:46Z dmik $
3**
4** Implementation of QTextCodec class
5**
6** Created : 981015
7**
8** Copyright (C) 1998-2002 Trolltech AS. All rights reserved.
9**
10** This file is part of the tools module of the Qt GUI Toolkit.
11**
12** This file may be distributed under the terms of the Q Public License
13** as defined by Trolltech AS of Norway and appearing in the file
14** LICENSE.QPL included in the packaging of this file.
15**
16** This file may be distributed and/or modified under the terms of the
17** GNU General Public License version 2 as published by the Free Software
18** Foundation and appearing in the file LICENSE.GPL included in the
19** packaging of this file.
20**
21** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition
22** licenses may use this file in accordance with the Qt Commercial License
23** Agreement provided with the Software.
24**
25** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
26** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
27**
28** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
29** information about Qt Commercial License Agreements.
30** See http://www.trolltech.com/qpl/ for QPL licensing information.
31** See http://www.trolltech.com/gpl/ for GPL licensing information.
32**
33** Contact info@trolltech.com if any conditions of this licensing are
34** not clear to you.
35**
36**********************************************************************/
37
38#include "qplatformdefs.h"
39
40// UNIX Large File Support redefines open -> open64
41#if defined(open)
42# undef open
43#endif
44
45#include "qtextcodec.h"
46#ifndef QT_NO_TEXTCODEC
47
48#include "qvaluelist.h"
49#include "qtextcodecfactory.h"
50#include "qutfcodec.h"
51#include "qnamespace.h"
52#ifndef QT_NO_CODECS
53#include "qrtlcodec.h"
54#include "qtsciicodec.h"
55#include "qisciicodec_p.h"
56#endif // QT_NO_CODECS
57#ifndef QT_NO_BIG_CODECS
58#include "qbig5codec.h"
59#include "qeucjpcodec.h"
60#include "qeuckrcodec.h"
61#include "qgb18030codec.h"
62#include "qjiscodec.h"
63#include "qjpunicode.h"
64#include "qsjiscodec.h"
65#endif // QT_NO_BIG_CODECS
66#include "qfile.h"
67#include "qstrlist.h"
68#include "qstring.h"
69#include "../tools/qlocale_p.h"
70
71#if !defined(QT_NO_CODECS) && !defined(QT_NO_BIG_CODECS) && defined(Q_WS_X11)
72# include "qfontcodecs_p.h"
73#endif
74
75#ifdef QT_THREAD_SUPPORT
76# include <private/qmutexpool_p.h>
77#endif // QT_THREAD_SUPPORT
78
79#include <stdlib.h>
80#include <ctype.h>
81#ifndef Q_OS_TEMP
82#include <locale.h>
83#endif
84#if defined(_XOPEN_UNIX) && !defined(Q_OS_QNX6)
85#include <langinfo.h>
86#endif
87
88static QValueList<QTextCodec*> *all = 0;
89static bool destroying_is_ok; // starts out as 0
90static QTextCodec * localeMapper = 0;
91
92class QTextCodecCleanup {
93public:
94 ~QTextCodecCleanup() {
95 QTextCodec::deleteAllCodecs();
96 }
97};
98static QTextCodecCleanup qtextcodec_cleanup;
99
100/*!
101 Deletes all the created codecs.
102
103 \warning Do not call this function.
104
105 QApplication calls this function just before exiting to delete
106 any QTextCodec objects that may be lying around. Since various
107 other classes hold pointers to QTextCodec objects, it is not safe
108 to call this function earlier.
109
110 If you are using the utility classes (like QString) but not using
111 QApplication, calling this function at the very end of your
112 application may be helpful for chasing down memory leaks by
113 eliminating any QTextCodec objects.
114*/
115
116void QTextCodec::deleteAllCodecs()
117{
118 if ( !all )
119 return;
120
121#ifdef QT_THREAD_SUPPORT
122 QMutexLocker locker( qt_global_mutexpool ?
123 qt_global_mutexpool->get( &all ) : 0 );
124 if ( !all )
125 return;
126#endif // QT_THREAD_SUPPORT
127
128 destroying_is_ok = TRUE;
129
130 QValueList<QTextCodec*> *ball = all;
131 all = 0;
132 QValueList<QTextCodec*>::Iterator it;
133 for ( it = ball->begin(); it != ball->end(); ++it ) {
134 delete *it;
135 *it = 0;
136 }
137 ball->clear();
138 delete ball;
139
140 destroying_is_ok = FALSE;
141}
142
143
144static void realSetup();
145
146
147static inline void setup()
148{
149 if ( all ) return;
150
151#ifdef QT_THREAD_SUPPORT
152 QMutexLocker locker( qt_global_mutexpool ?
153 qt_global_mutexpool->get( &all ) : 0 );
154 if ( all ) return;
155#endif // QT_THREAD_SUPPORT
156
157 realSetup();
158}
159
160
161class QTextStatelessEncoder: public QTextEncoder {
162 const QTextCodec* codec;
163public:
164 QTextStatelessEncoder(const QTextCodec*);
165 QCString fromUnicode(const QString& uc, int& lenInOut);
166};
167
168
169class QTextStatelessDecoder : public QTextDecoder {
170 const QTextCodec* codec;
171public:
172 QTextStatelessDecoder(const QTextCodec*);
173 QString toUnicode(const char* chars, int len);
174};
175
176QTextStatelessEncoder::QTextStatelessEncoder(const QTextCodec* c) :
177 codec(c)
178{
179}
180
181
182QCString QTextStatelessEncoder::fromUnicode(const QString& uc, int& lenInOut)
183{
184 return codec->fromUnicode(uc,lenInOut);
185}
186
187
188QTextStatelessDecoder::QTextStatelessDecoder(const QTextCodec* c) :
189 codec(c)
190{
191}
192
193
194QString QTextStatelessDecoder::toUnicode(const char* chars, int len)
195{
196 return codec->toUnicode(chars,len);
197}
198
199
200
201/*!
202 \class QTextCodec qtextcodec.h
203 \brief The QTextCodec class provides conversion between text encodings.
204 \reentrant
205 \ingroup i18n
206
207 Qt uses Unicode to store, draw and manipulate strings. In many
208 situations you may wish to deal with data that uses a different
209 encoding. For example, most Japanese documents are still stored in
210 Shift-JIS or ISO2022, while Russian users often have their
211 documents in KOI8-R or CP1251.
212
213 Qt provides a set of QTextCodec classes to help with converting
214 non-Unicode formats to and from Unicode. You can also create your
215 own codec classes (\link #subclassing see later\endlink).
216
217 The supported encodings are:
218 \list
219 \i Latin1
220 \i Big5 -- Chinese
221 \i Big5-HKSCS -- Chinese
222 \i eucJP -- Japanese
223 \i eucKR -- Korean
224 \i GB2312 -- Chinese
225 \i GBK -- Chinese
226 \i GB18030 -- Chinese
227 \i JIS7 -- Japanese
228 \i Shift-JIS -- Japanese
229 \i TSCII -- Tamil
230 \i utf8 -- Unicode, 8-bit
231 \i utf16 -- Unicode
232 \i KOI8-R -- Russian
233 \i KOI8-U -- Ukrainian
234 \i ISO8859-1 -- Western
235 \i ISO8859-2 -- Central European
236 \i ISO8859-3 -- Central European
237 \i ISO8859-4 -- Baltic
238 \i ISO8859-5 -- Cyrillic
239 \i ISO8859-6 -- Arabic
240 \i ISO8859-7 -- Greek
241 \i ISO8859-8 -- Hebrew, visually ordered
242 \i ISO8859-8-i -- Hebrew, logically ordered
243 \i ISO8859-9 -- Turkish
244 \i ISO8859-10
245 \i ISO8859-13
246 \i ISO8859-14
247 \i ISO8859-15 -- Western
248 \i IBM 850
249 \i IBM 866
250 \i CP874
251 \i CP1250 -- Central European
252 \i CP1251 -- Cyrillic
253 \i CP1252 -- Western
254 \i CP1253 -- Greek
255 \i CP1254 -- Turkish
256 \i CP1255 -- Hebrew
257 \i CP1256 -- Arabic
258 \i CP1257 -- Baltic
259 \i CP1258
260 \i Apple Roman
261 \i TIS-620 -- Thai
262 \endlist
263
264 QTextCodecs can be used as follows to convert some locally encoded
265 string to Unicode. Suppose you have some string encoded in Russian
266 KOI8-R encoding, and want to convert it to Unicode. The simple way
267 to do this is:
268
269 \code
270 QCString locallyEncoded = "..."; // text to convert
271 QTextCodec *codec = QTextCodec::codecForName("KOI8-R"); // get the codec for KOI8-R
272 QString unicodeString = codec->toUnicode( locallyEncoded );
273 \endcode
274
275 After this, \c{unicodeString} holds the text converted to Unicode.
276 Converting a string from Unicode to the local encoding is just as
277 easy:
278
279 \code
280 QString unicodeString = "..."; // any Unicode text
281 QTextCodec *codec = QTextCodec::codecForName("KOI8-R"); // get the codec for KOI8-R
282 QCString locallyEncoded = codec->fromUnicode( unicodeString );
283 \endcode
284
285 Some care must be taken when trying to convert the data in chunks,
286 for example, when receiving it over a network. In such cases it is
287 possible that a multi-byte character will be split over two
288 chunks. At best this might result in the loss of a character and
289 at worst cause the entire conversion to fail.
290
291 The approach to use in these situations is to create a QTextDecoder
292 object for the codec and use this QTextDecoder for the whole
293 decoding process, as shown below:
294
295 \code
296 QTextCodec *codec = QTextCodec::codecForName( "Shift-JIS" );
297 QTextDecoder *decoder = codec->makeDecoder();
298
299 QString unicodeString;
300 while( receiving_data ) {
301 QByteArray chunk = new_data;
302 unicodeString += decoder->toUnicode( chunk.data(), chunk.length() );
303 }
304 \endcode
305
306 The QTextDecoder object maintains state between chunks and therefore
307 works correctly even if a multi-byte character is split between
308 chunks.
309
310 \target subclassing
311 \section1 Creating your own Codec class
312
313 Support for new text encodings can be added to Qt by creating
314 QTextCodec subclasses.
315
316 Built-in codecs can be overridden by custom codecs since more
317 recently created QTextCodec objects take precedence over earlier
318 ones.
319
320 You may find it more convenient to make your codec class available
321 as a plugin; see the \link plugins-howto.html plugin
322 documentation\endlink for more details.
323
324 The abstract virtual functions describe the encoder to the
325 system and the coder is used as required in the different
326 text file formats supported by QTextStream, and under X11, for the
327 locale-specific character input and output.
328
329 To add support for another 8-bit encoding to Qt, make a subclass
330 of QTextCodec and implement at least the following methods:
331
332 \code
333 const char* name() const
334 \endcode
335 Return the official name for the encoding.
336
337 \code
338 int mibEnum() const
339 \endcode
340 Return the MIB enum for the encoding if it is listed in the
341 \link http://www.iana.org/assignments/character-sets
342 IANA character-sets encoding file\endlink.
343
344 If the encoding is multi-byte then it will have "state"; that is,
345 the interpretation of some bytes will be dependent on some preceding
346 bytes. For such encodings, you must implement:
347
348 \code
349 QTextDecoder* makeDecoder() const
350 \endcode
351 Return a QTextDecoder that remembers incomplete multi-byte sequence
352 prefixes or other required state.
353
354 If the encoding does \e not require state, you should implement:
355
356 \code
357 QString toUnicode(const char* chars, int len) const
358 \endcode
359 Converts \e len characters from \e chars to Unicode.
360
361 The base QTextCodec class has default implementations of the above
362 two functions, \e{but they are mutually recursive}, so you must
363 re-implement at least one of them, or both for improved efficiency.
364
365 For conversion from Unicode to 8-bit encodings, it is rarely necessary
366 to maintain state. However, two functions similar to the two above
367 are used for encoding:
368
369 \code
370 QTextEncoder* makeEncoder() const
371 \endcode
372 Return a QTextEncoder.
373
374 \code
375 QCString fromUnicode(const QString& uc, int& lenInOut ) const
376 \endcode
377 Converts \e lenInOut characters (of type QChar) from the start of
378 the string \e uc, returning a QCString result, and also returning
379 the \link QCString::length() length\endlink of the result in
380 \e lenInOut.
381
382 Again, these are mutually recursive so only one needs to be implemented,
383 or both if greater efficiency is possible.
384
385 Finally, you must implement:
386
387 \code
388 int heuristicContentMatch(const char* chars, int len) const
389 \endcode
390 Gives a value indicating how likely it is that \e len characters
391 from \e chars are in the encoding.
392
393 A good model for this function is the
394 QWindowsLocalCodec::heuristicContentMatch function found in the Qt
395 sources.
396
397 A QTextCodec subclass might have improved performance if you also
398 re-implement:
399
400 \code
401 bool canEncode( QChar ) const
402 \endcode
403 Test if a Unicode character can be encoded.
404
405 \code
406 bool canEncode( const QString& ) const
407 \endcode
408 Test if a string of Unicode characters can be encoded.
409
410 \code
411 int heuristicNameMatch(const char* hint) const
412 \endcode
413 Test if a possibly non-standard name is referring to the codec.
414
415 Codecs can also be created as \link plugins-howto.html plugins\endlink.
416*/
417
418
419/*!
420 \nonreentrant
421
422 Constructs a QTextCodec, and gives it the highest precedence. The
423 QTextCodec should always be constructed on the heap (i.e. with \c
424 new). Qt takes ownership and will delete it when the application
425 terminates.
426*/
427QTextCodec::QTextCodec()
428{
429 setup();
430 all->insert( all->begin(), this );
431}
432
433
434/*!
435 \nonreentrant
436
437 Destroys the QTextCodec. Note that you should not delete codecs
438 yourself: once created they become Qt's responsibility.
439*/
440QTextCodec::~QTextCodec()
441{
442 if ( !destroying_is_ok )
443 qWarning("QTextCodec::~QTextCodec() called by application");
444 if ( all )
445 all->remove( this );
446}
447
448
449/*!
450 Returns a value indicating how likely it is that this decoder is
451 appropriate for decoding some format that has the given name. The
452 name is compared with the \a hint.
453
454 A good match returns a positive number around the length of the
455 string. A bad match is negative.
456
457 The default implementation calls simpleHeuristicNameMatch() with
458 the name of the codec.
459*/
460int QTextCodec::heuristicNameMatch(const char* hint) const
461{
462 return simpleHeuristicNameMatch(name(),hint);
463}
464
465
466// returns a string containing the letters and numbers from input,
467// with a space separating run of a character class. e.g. "iso8859-1"
468// becomes "iso 8859 1"
469static QString lettersAndNumbers( const char * input )
470{
471 QString result;
472 QChar c;
473
474 while( input && *input ) {
475 c = *input;
476 if ( c.isLetter() || c.isNumber() )
477 result += c.lower();
478 if ( input[1] ) {
479 // add space at character class transition, except
480 // transition from upper-case to lower-case letter
481 QChar n( input[1] );
482 if ( c.isLetter() && n.isLetter() ) {
483 if ( c == c.lower() && n == n.upper() )
484 result += ' ';
485 } else if ( c.category() != n.category() ) {
486 result += ' ';
487 }
488 }
489 input++;
490 }
491 return result.simplifyWhiteSpace();
492}
493
494/*!
495 A simple utility function for heuristicNameMatch(): it does some
496 very minor character-skipping so that almost-exact matches score
497 high. \a name is the text we're matching and \a hint is used for
498 the comparison.
499*/
500int QTextCodec::simpleHeuristicNameMatch(const char* name, const char* hint)
501{
502 // if they're the same, return a perfect score.
503 if ( name && hint && *name && *hint && qstricmp( name, hint ) == 0 )
504 return qstrlen( hint );
505
506 // if the letters and numbers are the same, we have an "almost"
507 // perfect match.
508 QString h( lettersAndNumbers( hint ) );
509 QString n( lettersAndNumbers( name ) );
510 if ( h == n )
511 return qstrlen( hint )-1;
512
513 if ( h.stripWhiteSpace() == n.stripWhiteSpace() )
514 return qstrlen( hint )-2;
515
516 // could do some more here, but I don't think it's worth it
517
518 return 0;
519}
520
521
522/*!
523 Returns the QTextCodec \a i positions from the most recently
524 inserted codec, or 0 if there is no such QTextCodec. Thus,
525 codecForIndex(0) returns the most recently created QTextCodec.
526*/
527QTextCodec* QTextCodec::codecForIndex(int i)
528{
529 setup();
530 return (uint)i >= all->count() ? 0 : *all->at(i);
531}
532
533
534/*!
535 Returns the QTextCodec which matches the \link
536 QTextCodec::mibEnum() MIBenum\endlink \a mib.
537*/
538QTextCodec* QTextCodec::codecForMib(int mib)
539{
540 setup();
541 QValueList<QTextCodec*>::ConstIterator i;
542 QTextCodec* result=0;
543 for ( i = all->begin(); i != all->end(); ++i ) {
544 result = *i;
545 if ( result->mibEnum()==mib )
546 return result;
547 }
548
549#if !defined(QT_NO_COMPONENT) && !defined(QT_LITE_COMPONENT)
550 if ( !result || (result && result->mibEnum() != mib) ) {
551 QTextCodec *codec = QTextCodecFactory::createForMib(mib);
552 if (codec)
553 result = codec;
554 }
555#endif // !QT_NO_COMPONENT !QT_LITE_COMPONENT
556
557 return result;
558}
559
560
561
562
563
564#if defined(Q_OS_WIN32)
565class QWindowsLocalCodec: public QTextCodec
566{
567public:
568 QWindowsLocalCodec();
569 ~QWindowsLocalCodec();
570
571 QString toUnicode(const char* chars, int len) const;
572 QCString fromUnicode(const QString& uc, int& lenInOut ) const;
573
574 const char* name() const;
575 int mibEnum() const;
576
577 int heuristicContentMatch(const char* chars, int len) const;
578};
579
580QWindowsLocalCodec::QWindowsLocalCodec()
581{
582}
583
584QWindowsLocalCodec::~QWindowsLocalCodec()
585{
586}
587
588
589QString QWindowsLocalCodec::toUnicode(const char* chars, int len) const
590{
591 if ( len == 1 && chars ) { // Optimization; avoids allocation
592 char c[2];
593 c[0] = *chars;
594 c[1] = 0;
595 return qt_winMB2QString( c, 2 );
596 }
597 if ( len < 0 )
598 return qt_winMB2QString( chars );
599 QCString s(chars,len+1);
600 return qt_winMB2QString(s);
601}
602
603QCString QWindowsLocalCodec::fromUnicode(const QString& uc, int& lenInOut ) const
604{
605 QCString r = qt_winQString2MB( uc, lenInOut );
606 lenInOut = r.length();
607 return r;
608}
609
610
611const char* QWindowsLocalCodec::name() const
612{
613 return "System";
614}
615
616int QWindowsLocalCodec::mibEnum() const
617{
618 return 0;
619}
620
621
622int QWindowsLocalCodec::heuristicContentMatch(const char* chars, int len) const
623{
624 // ### Not a bad default implementation?
625 QString t = toUnicode(chars,len);
626 int l = t.length();
627 QCString mb = fromUnicode(t,l);
628 int i=0;
629 while ( i < len ) {
630 if ( chars[i] == mb[i] )
631 i++;
632 else
633 break;
634 }
635 return i;
636}
637
638#elif defined(Q_OS_OS2)
639
640class QOS2LocalCodec: public QTextCodec
641{
642public:
643 QOS2LocalCodec();
644 ~QOS2LocalCodec();
645
646 QString toUnicode(const char* chars, int len) const;
647 QCString fromUnicode(const QString& uc, int& lenInOut ) const;
648
649 const char* name() const;
650 int mibEnum() const;
651
652 int heuristicContentMatch(const char* chars, int len) const;
653};
654
655QOS2LocalCodec::QOS2LocalCodec()
656{
657}
658
659QOS2LocalCodec::~QOS2LocalCodec()
660{
661}
662
663
664QString QOS2LocalCodec::toUnicode(const char* chars, int len) const
665{
666 return qt_os2MB2QString( chars, len );
667}
668
669QCString QOS2LocalCodec::fromUnicode(const QString& uc, int& lenInOut ) const
670{
671 QCString r = qt_os2QString2MB( uc, lenInOut );
672 lenInOut = r.length();
673 return r;
674}
675
676
677const char* QOS2LocalCodec::name() const
678{
679 return "System";
680}
681
682int QOS2LocalCodec::mibEnum() const
683{
684 return 0;
685}
686
687
688int QOS2LocalCodec::heuristicContentMatch(const char* chars, int len) const
689{
690 // ### Not a bad default implementation?
691 QString t = toUnicode(chars,len);
692 int l = t.length();
693 QCString mb = fromUnicode(t,l);
694 int i=0;
695 while ( i < len ) {
696 if ( chars[i] == mb[i] )
697 i++;
698 else
699 break;
700 }
701 return i;
702}
703
704#else
705
706/* locale names mostly copied from XFree86 */
707static const char * const iso8859_2locales[] = {
708 "croatian", "cs", "cs_CS", "cs_CZ","cz", "cz_CZ", "czech", "hr",
709 "hr_HR", "hu", "hu_HU", "hungarian", "pl", "pl_PL", "polish", "ro",
710 "ro_RO", "rumanian", "serbocroatian", "sh", "sh_SP", "sh_YU", "sk",
711 "sk_SK", "sl", "sl_CS", "sl_SI", "slovak", "slovene", "sr_SP", 0 };
712
713static const char * const iso8859_3locales[] = {
714 "eo", 0 };
715
716static const char * const iso8859_4locales[] = {
717 "ee", "ee_EE", 0 };
718
719static const char * const iso8859_5locales[] = {
720 "mk", "mk_MK", "sp", "sp_YU", 0 };
721
722static const char * const cp_1251locales[] = {
723 "be", "be_BY", "bg", "bg_BG", "bulgarian", 0 };
724
725static const char * const pt_154locales[] = {
726 "ba_RU", "ky", "ky_KG", "kk", "kk_KZ", 0 };
727
728static const char * const iso8859_6locales[] = {
729 "ar_AA", "ar_SA", "arabic", 0 };
730
731static const char * const iso8859_7locales[] = {
732 "el", "el_GR", "greek", 0 };
733
734static const char * const iso8859_8locales[] = {
735 "hebrew", "he", "he_IL", "iw", "iw_IL", 0 };
736
737static const char * const iso8859_9locales[] = {
738 "tr", "tr_TR", "turkish", 0 };
739
740static const char * const iso8859_13locales[] = {
741 "lt", "lt_LT", "lv", "lv_LV", 0 };
742
743static const char * const iso8859_15locales[] = {
744 "et", "et_EE",
745 // Euro countries
746 "br_FR", "ca_ES", "de", "de_AT", "de_BE", "de_DE", "de_LU", "en_IE",
747 "es", "es_ES", "eu_ES", "fi", "fi_FI", "finnish", "fr", "fr_FR",
748 "fr_BE", "fr_LU", "french", "ga_IE", "gl_ES", "it", "it_IT", "oc_FR",
749 "nl", "nl_BE", "nl_NL", "pt", "pt_PT", "sv_FI", "wa_BE",
750 0 };
751
752static const char * const koi8_ulocales[] = {
753 "uk", "uk_UA", "ru_UA", "ukrainian", 0 };
754
755static const char * const tis_620locales[] = {
756 "th", "th_TH", "thai", 0 };
757
758static const char * const tcvnlocales[] = {
759 "vi", "vi_VN", 0 };
760
761static bool try_locale_list( const char * const locale[], const char * lang )
762{
763 int i;
764 for( i=0; locale[i] && *locale[i] && strcmp(locale[i], lang); i++ )
765 ;
766 return locale[i] != 0;
767}
768
769// For the probably_koi8_locales we have to look. the standard says
770// these are 8859-5, but almost all Russian users use KOI8-R and
771// incorrectly set $LANG to ru_RU. We'll check tolower() to see what
772// tolower() thinks ru_RU means.
773
774// If you read the history, it seems that many Russians blame ISO and
775// Perestroika for the confusion.
776//
777// The real bug is that some programs break if the user specifies
778// ru_RU.KOI8-R.
779
780static const char * const probably_koi8_rlocales[] = {
781 "ru", "ru_SU", "ru_RU", "russian", 0 };
782
783static QTextCodec * ru_RU_hack( const char * i ) {
784 QTextCodec * ru_RU_codec = 0;
785
786 QCString origlocale = setlocale( LC_CTYPE, i );
787 // unicode koi8r latin5 name
788 // 0x044E 0xC0 0xEE CYRILLIC SMALL LETTER YU
789 // 0x042E 0xE0 0xCE CYRILLIC CAPITAL LETTER YU
790 int latin5 = tolower( 0xCE );
791 int koi8r = tolower( 0xE0 );
792 if ( koi8r == 0xC0 && latin5 != 0xEE ) {
793 ru_RU_codec = QTextCodec::codecForName( "KOI8-R" );
794 } else if ( koi8r != 0xC0 && latin5 == 0xEE ) {
795 ru_RU_codec = QTextCodec::codecForName( "ISO 8859-5" );
796 } else {
797 // something else again... let's assume... *throws dice*
798 ru_RU_codec = QTextCodec::codecForName( "KOI8-R" );
799 qWarning( "QTextCodec: using KOI8-R, probe failed (%02x %02x %s)",
800 koi8r, latin5, i );
801 }
802 setlocale( LC_CTYPE, origlocale.data() );
803
804 return ru_RU_codec;
805}
806
807#endif
808
809/*!
810 Set the codec to \a c; this will be returned by codecForLocale().
811 This might be needed for some applications that want to use their
812 own mechanism for setting the locale.
813
814 \sa codecForLocale()
815*/
816void QTextCodec::setCodecForLocale(QTextCodec *c) {
817 localeMapper = c;
818}
819
820/*! Returns a pointer to the codec most suitable for this locale. */
821
822QTextCodec* QTextCodec::codecForLocale()
823{
824 if ( localeMapper )
825 return localeMapper;
826
827 setup();
828
829 return localeMapper;
830}
831
832
833/*!
834 Searches all installed QTextCodec objects and returns the one
835 which best matches \a name; the match is case-insensitive. Returns
836 0 if no codec's heuristicNameMatch() reports a match better than
837 \a accuracy, or if \a name is a null string.
838
839 \sa heuristicNameMatch()
840*/
841
842QTextCodec* QTextCodec::codecForName( const char* name, int accuracy )
843{
844 if ( !name || !*name )
845 return 0;
846
847 setup();
848 QValueList<QTextCodec*>::ConstIterator i;
849 QTextCodec* result = 0;
850 int best = accuracy;
851 QTextCodec* cursor;
852 for ( i = all->begin(); i != all->end(); ++i ) {
853 cursor = *i;
854 int s = cursor->heuristicNameMatch( name );
855 if ( s > best ) {
856 best = s;
857 result = cursor;
858 }
859 }
860
861#if !defined(QT_NO_COMPONENT) && !defined(QT_LITE_COMPONENT)
862 if ( !result )
863 result = QTextCodecFactory::createForName(name);
864#endif // !QT_NO_COMPONENT !QT_LITE_COMPONENT
865
866 return result;
867}
868
869
870/*!
871 Searches all installed QTextCodec objects, returning the one which
872 most recognizes the given content. May return 0.
873
874 Note that this is often a poor choice, since character encodings
875 often use most of the available character sequences, and so only
876 by linguistic analysis could a true match be made.
877
878 \a chars contains the string to check, and \a len contains the
879 number of characters in the string to use.
880
881 \sa heuristicContentMatch()
882*/
883QTextCodec* QTextCodec::codecForContent(const char* chars, int len)
884{
885 setup();
886 QValueList<QTextCodec*>::ConstIterator i;
887 QTextCodec* result = 0;
888 int best=0;
889 QTextCodec* cursor;
890 for ( i = all->begin(); i != all->end(); ++i ) {
891 cursor = *i;
892 int s = cursor->heuristicContentMatch(chars,len);
893 if ( s > best ) {
894 best = s;
895 result = cursor;
896 }
897 }
898 return result;
899}
900
901
902/*!
903 \fn const char* QTextCodec::name() const
904
905 QTextCodec subclasses must reimplement this function. It returns
906 the name of the encoding supported by the subclass. When choosing
907 a name for an encoding, consider these points:
908 \list
909 \i On X11, heuristicNameMatch( const char * hint )
910 is used to test if a the QTextCodec
911 can convert between Unicode and the encoding of a font
912 with encoding \e hint, such as "iso8859-1" for Latin-1 fonts,
913 "koi8-r" for Russian KOI8 fonts.
914 The default algorithm of heuristicNameMatch() uses name().
915 \i Some applications may use this function to present
916 encodings to the end user.
917 \endlist
918 */
919
920/*!
921 \fn int QTextCodec::mibEnum() const
922
923 Subclasses of QTextCodec must reimplement this function. It
924 returns the MIBenum (see \link
925 http://www.iana.org/assignments/character-sets the
926 IANA character-sets encoding file\endlink for more information).
927 It is important that each QTextCodec subclass returns the correct
928 unique value for this function.
929*/
930
931
932/*!
933 Returns the preferred mime name of the encoding as defined in the
934 \link http://www.iana.org/assignments/character-sets
935 IANA character-sets encoding file\endlink.
936*/
937const char* QTextCodec::mimeName() const
938{
939 return name();
940}
941
942
943/*!
944 \fn int QTextCodec::heuristicContentMatch(const char* chars, int len) const
945
946 QTextCodec subclasses must reimplement this function. It examines
947 the first \a len bytes of \a chars and returns a value indicating
948 how likely it is that the string is a prefix of text encoded in
949 the encoding of the subclass. A negative return value indicates
950 that the text is detectably not in the encoding (e.g. it contains
951 characters undefined in the encoding). A return value of 0
952 indicates that the text should be decoded with this codec rather
953 than as ASCII, but there is no particular evidence. The value
954 should range up to \a len. Thus, most decoders will return -1, 0,
955 or -\a len.
956
957 The characters are not null terminated.
958
959 \sa codecForContent().
960*/
961
962
963/*!
964 Creates a QTextDecoder which stores enough state to decode chunks
965 of char* data to create chunks of Unicode data. The default
966 implementation creates a stateless decoder, which is only
967 sufficient for the simplest encodings where each byte corresponds
968 to exactly one Unicode character.
969
970 The caller is responsible for deleting the returned object.
971*/
972QTextDecoder* QTextCodec::makeDecoder() const
973{
974 return new QTextStatelessDecoder(this);
975}
976
977
978/*!
979 Creates a QTextEncoder which stores enough state to encode chunks
980 of Unicode data as char* data. The default implementation creates
981 a stateless encoder, which is only sufficient for the simplest
982 encodings where each Unicode character corresponds to exactly one
983 character.
984
985 The caller is responsible for deleting the returned object.
986*/
987QTextEncoder* QTextCodec::makeEncoder() const
988{
989 return new QTextStatelessEncoder(this);
990}
991
992
993/*!
994 QTextCodec subclasses must reimplement this function or
995 makeDecoder(). It converts the first \a len characters of \a chars
996 to Unicode.
997
998 The default implementation makes a decoder with makeDecoder() and
999 converts the input with that. Note that the default makeDecoder()
1000 implementation makes a decoder that simply calls
1001 this function, hence subclasses \e must reimplement one function or
1002 the other to avoid infinite recursion.
1003*/
1004QString QTextCodec::toUnicode(const char* chars, int len) const
1005{
1006 if ( chars == 0 )
1007 return QString::null;
1008 QTextDecoder* i = makeDecoder();
1009 QString result = i->toUnicode(chars,len);
1010 delete i;
1011 return result;
1012}
1013
1014
1015/*!
1016 QTextCodec subclasses must reimplement either this function or
1017 makeEncoder(). It converts the first \a lenInOut characters of \a
1018 uc from Unicode to the encoding of the subclass. If \a lenInOut is
1019 negative or too large, the length of \a uc is used instead.
1020
1021 Converts \a lenInOut characters (not bytes) from \a uc, producing
1022 a QCString. \a lenInOut will be set to the \link
1023 QCString::length() length\endlink of the result (in bytes).
1024
1025 The default implementation makes an encoder with makeEncoder() and
1026 converts the input with that. Note that the default makeEncoder()
1027 implementation makes an encoder that simply calls this function,
1028 hence subclasses \e must reimplement one function or the other to
1029 avoid infinite recursion.
1030*/
1031
1032QCString QTextCodec::fromUnicode(const QString& uc, int& lenInOut) const
1033{
1034 QTextEncoder* i = makeEncoder();
1035 QCString result = i->fromUnicode(uc, lenInOut);
1036 delete i;
1037 return result;
1038}
1039
1040/*!
1041 \overload
1042 \internal
1043*/
1044QByteArray QTextCodec::fromUnicode( const QString &str, int pos, int len ) const
1045{
1046 QByteArray a;
1047 if( len < 0 )
1048 len = str.length() - pos;
1049 a = fromUnicode( str.mid(pos, len) );
1050 if( a.size() > 0 && a[(int)a.size() - 1] == '\0' )
1051 a.resize( a.size() - 1 );
1052 return a;
1053}
1054
1055/*!
1056 \overload
1057
1058 \a uc is the unicode source string.
1059*/
1060QCString QTextCodec::fromUnicode(const QString& uc) const
1061{
1062 int l = uc.length();
1063 return fromUnicode(uc,l);
1064}
1065
1066/*!
1067 \overload
1068
1069 \a a contains the source characters; \a len contains the number of
1070 characters in \a a to use.
1071*/
1072QString QTextCodec::toUnicode(const QByteArray& a, int len) const
1073{
1074 int l = a.size();
1075 l = QMIN( l, len );
1076 return toUnicode( a.data(), l );
1077}
1078
1079/*!
1080 \overload
1081
1082 \a a contains the source characters.
1083*/
1084QString QTextCodec::toUnicode(const QByteArray& a) const
1085{
1086 int l = a.size();
1087 return toUnicode( a.data(), l );
1088}
1089
1090/*!
1091 \overload
1092
1093 \a a contains the source characters; \a len contains the number of
1094 characters in \a a to use.
1095*/
1096QString QTextCodec::toUnicode(const QCString& a, int len) const
1097{
1098 int l = a.length();
1099 l = QMIN( l, len );
1100 return toUnicode( a.data(), l );
1101}
1102
1103/*!
1104 \overload
1105
1106 \a a contains the source characters.
1107*/
1108QString QTextCodec::toUnicode(const QCString& a) const
1109{
1110 int l = a.length();
1111 return toUnicode( a.data(), l );
1112}
1113
1114/*!
1115 \overload
1116
1117 \a chars contains the source characters.
1118*/
1119QString QTextCodec::toUnicode(const char* chars) const
1120{
1121 return toUnicode(chars,qstrlen(chars));
1122}
1123
1124/*!
1125 \internal
1126*/
1127unsigned short QTextCodec::characterFromUnicode(const QString &str, int pos) const
1128{
1129 QCString result = QTextCodec::fromUnicode(QString(str[pos]));
1130 uchar *ch = (uchar *) result.data();
1131 ushort retval = 0;
1132 if (result.size() > 2) {
1133 retval = (ushort) *ch << 8;
1134 ch++;
1135 }
1136 return retval + *ch;
1137}
1138
1139/*!
1140 Returns TRUE if the Unicode character \a ch can be fully encoded
1141 with this codec; otherwise returns FALSE. The default
1142 implementation tests if the result of toUnicode(fromUnicode(ch))
1143 is the original \a ch. Subclasses may be able to improve the
1144 efficiency.
1145*/
1146bool QTextCodec::canEncode( QChar ch ) const
1147{
1148 return toUnicode(fromUnicode(ch)) == ch;
1149}
1150
1151/*!
1152 \overload
1153
1154 \a s contains the string being tested for encode-ability.
1155*/
1156bool QTextCodec::canEncode( const QString& s ) const
1157{
1158 if ( s.isEmpty() )
1159 return TRUE;
1160 return toUnicode(fromUnicode(s)) == s;
1161}
1162
1163
1164
1165/*!
1166 \class QTextEncoder qtextcodec.h
1167 \brief The QTextEncoder class provides a state-based encoder.
1168 \reentrant
1169 \ingroup i18n
1170
1171 The encoder converts Unicode into another format, remembering any
1172 state that is required between calls.
1173
1174 \sa QTextCodec::makeEncoder()
1175*/
1176
1177/*!
1178 Destroys the encoder.
1179*/
1180QTextEncoder::~QTextEncoder()
1181{
1182}
1183
1184/*!
1185 \fn QCString QTextEncoder::fromUnicode(const QString& uc, int& lenInOut)
1186
1187 Converts \a lenInOut characters (not bytes) from \a uc, producing
1188 a QCString. \a lenInOut will be set to the \link
1189 QCString::length() length\endlink of the result (in bytes).
1190
1191 The encoder is free to record state to use when subsequent calls
1192 are made to this function (for example, it might change modes with
1193 escape sequences if needed during the encoding of one string, then
1194 assume that mode applies when a subsequent call begins).
1195*/
1196
1197/*!
1198 \class QTextDecoder qtextcodec.h
1199 \brief The QTextDecoder class provides a state-based decoder.
1200 \reentrant
1201 \ingroup i18n
1202
1203 The decoder converts a text format into Unicode, remembering any
1204 state that is required between calls.
1205
1206 \sa QTextCodec::makeEncoder()
1207*/
1208
1209
1210/*!
1211 Destroys the decoder.
1212*/
1213QTextDecoder::~QTextDecoder()
1214{
1215}
1216
1217/*!
1218 \fn QString QTextDecoder::toUnicode(const char* chars, int len)
1219
1220 Converts the first \a len bytes in \a chars to Unicode, returning
1221 the result.
1222
1223 If not all characters are used (e.g. if only part of a multi-byte
1224 encoding is at the end of the characters), the decoder remembers
1225 enough state to continue with the next call to this function.
1226*/
1227
1228#define CHAINED 0xffff
1229
1230struct QMultiByteUnicodeTable {
1231 // If multiByte, ignore unicode and index into multiByte
1232 // with the next character.
1233 QMultiByteUnicodeTable() : unicode(0xfffd), multiByte(0) { }
1234
1235 ~QMultiByteUnicodeTable()
1236 {
1237 if ( multiByte )
1238 delete [] multiByte;
1239 }
1240
1241 ushort unicode;
1242 QMultiByteUnicodeTable* multiByte;
1243};
1244
1245static int getByte(char* &cursor)
1246{
1247 int byte = 0;
1248 if ( *cursor ) {
1249 if ( cursor[1] == 'x' )
1250 byte = strtol(cursor+2,&cursor,16);
1251 else if ( cursor[1] == 'd' )
1252 byte = strtol(cursor+2,&cursor,10);
1253 else
1254 byte = strtol(cursor+2,&cursor,8);
1255 }
1256 return byte&0xff;
1257}
1258
1259class QTextCodecFromIOD;
1260
1261class QTextCodecFromIODDecoder : public QTextDecoder {
1262 const QTextCodecFromIOD* codec;
1263 QMultiByteUnicodeTable* mb;
1264public:
1265 QTextCodecFromIODDecoder(const QTextCodecFromIOD* c);
1266 QString toUnicode(const char* chars, int len);
1267};
1268
1269class QTextCodecFromIOD : public QTextCodec {
1270 friend class QTextCodecFromIODDecoder;
1271
1272 QCString n;
1273
1274 // If from_unicode_page[row()][cell()] is 0 and from_unicode_page_multiByte,
1275 // use from_unicode_page_multiByte[row()][cell()] as string.
1276 char** from_unicode_page;
1277 char*** from_unicode_page_multiByte;
1278 char unkn;
1279
1280 // Only one of these is used
1281 ushort* to_unicode;
1282 QMultiByteUnicodeTable* to_unicode_multiByte;
1283 int max_bytes_per_char;
1284 QStrList aliases;
1285
1286 bool stateless() const { return !to_unicode_multiByte; }
1287
1288public:
1289 QTextCodecFromIOD(QIODevice* iod)
1290 {
1291 from_unicode_page = 0;
1292 to_unicode_multiByte = 0;
1293 to_unicode = 0;
1294 from_unicode_page_multiByte = 0;
1295 max_bytes_per_char = 1;
1296
1297 const int maxlen=100;
1298 char line[maxlen];
1299 char esc='\\';
1300 char comm='%';
1301 bool incmap = FALSE;
1302 while (iod->readLine(line,maxlen) > 0) {
1303 if (0==qstrnicmp(line,"<code_set_name>",15))
1304 n = line+15;
1305 else if (0==qstrnicmp(line,"<escape_char> ",14))
1306 esc = line[14];
1307 else if (0==qstrnicmp(line,"<comment_char> ",15))
1308 comm = line[15];
1309 else if (line[0]==comm && 0==qstrnicmp(line+1," alias ",7)) {
1310 aliases.append(line+8);
1311 } else if (0==qstrnicmp(line,"CHARMAP",7)) {
1312 if (!from_unicode_page) {
1313 from_unicode_page = new char*[256];
1314 for (int i=0; i<256; i++)
1315 from_unicode_page[i]=0;
1316 }
1317 if (!to_unicode) {
1318 to_unicode = new ushort[256];
1319 }
1320 incmap = TRUE;
1321 } else if (0==qstrnicmp(line,"END CHARMAP",11))
1322 break;
1323 else if (incmap) {
1324 char* cursor = line;
1325 int byte=-1,unicode=-1;
1326 ushort* mb_unicode=0;
1327 const int maxmb=8; // more -> we'll need to improve datastructures
1328 char mb[maxmb+1];
1329 int nmb=0;
1330
1331 while (*cursor) {
1332 if (cursor[0]=='<' && cursor[1]=='U' &&
1333 cursor[2]>='0' && cursor[2]<='9' &&
1334 cursor[3]>='0' && cursor[3]<='9') {
1335
1336 unicode = strtol(cursor+2,&cursor,16);
1337
1338 } else if (*cursor==esc) {
1339
1340 byte = getByte(cursor);
1341
1342 if ( *cursor == esc ) {
1343 if ( !to_unicode_multiByte ) {
1344 to_unicode_multiByte =
1345 new QMultiByteUnicodeTable[256];
1346 for (int i=0; i<256; i++) {
1347 to_unicode_multiByte[i].unicode =
1348 to_unicode[i];
1349 to_unicode_multiByte[i].multiByte = 0;
1350 }
1351 delete [] to_unicode;
1352 to_unicode = 0;
1353 }
1354 QMultiByteUnicodeTable* mbut =
1355 to_unicode_multiByte+byte;
1356 mb[nmb++] = byte;
1357 while ( nmb < maxmb && *cursor == esc ) {
1358 // Always at least once
1359
1360 mbut->unicode = CHAINED;
1361 byte = getByte(cursor);
1362 mb[nmb++] = byte;
1363 if (!mbut->multiByte) {
1364 mbut->multiByte =
1365 new QMultiByteUnicodeTable[256];
1366 }
1367 mbut = mbut->multiByte+byte;
1368 mb_unicode = & mbut->unicode;
1369 }
1370
1371 if ( nmb > max_bytes_per_char )
1372 max_bytes_per_char = nmb;
1373 }
1374 } else {
1375 cursor++;
1376 }
1377 }
1378
1379 if (unicode >= 0 && unicode <= 0xffff)
1380 {
1381 QChar ch((ushort)unicode);
1382 if (!from_unicode_page[ch.row()]) {
1383 from_unicode_page[ch.row()] = new char[256];
1384 for (int i=0; i<256; i++)
1385 from_unicode_page[ch.row()][i]=0;
1386 }
1387 if ( mb_unicode ) {
1388 from_unicode_page[ch.row()][ch.cell()] = 0;
1389 if (!from_unicode_page_multiByte) {
1390 from_unicode_page_multiByte = new char**[256];
1391 for (int i=0; i<256; i++)
1392 from_unicode_page_multiByte[i]=0;
1393 }
1394 if (!from_unicode_page_multiByte[ch.row()]) {
1395 from_unicode_page_multiByte[ch.row()] = new char*[256];
1396 for (int i=0; i<256; i++)
1397 from_unicode_page_multiByte[ch.row()][i] = 0;
1398 }
1399 mb[nmb++] = 0;
1400 from_unicode_page_multiByte[ch.row()][ch.cell()]
1401 = qstrdup(mb);
1402 *mb_unicode = unicode;
1403 } else {
1404 from_unicode_page[ch.row()][ch.cell()] = (char)byte;
1405 if ( to_unicode )
1406 to_unicode[byte] = unicode;
1407 else
1408 to_unicode_multiByte[byte].unicode = unicode;
1409 }
1410 } else {
1411 }
1412 }
1413 }
1414 n = n.stripWhiteSpace();
1415
1416 unkn = '?'; // ##### Might be a bad choice.
1417 }
1418
1419 ~QTextCodecFromIOD()
1420 {
1421 if ( from_unicode_page ) {
1422 for (int i=0; i<256; i++)
1423 if (from_unicode_page[i])
1424 delete [] from_unicode_page[i];
1425 }
1426 if ( from_unicode_page_multiByte ) {
1427 for (int i=0; i<256; i++)
1428 if (from_unicode_page_multiByte[i])
1429 for (int j=0; j<256; j++)
1430 if (from_unicode_page_multiByte[i][j])
1431 delete [] from_unicode_page_multiByte[i][j];
1432 }
1433 if ( to_unicode )
1434 delete [] to_unicode;
1435 if ( to_unicode_multiByte )
1436 delete [] to_unicode_multiByte;
1437 }
1438
1439 bool ok() const
1440 {
1441 return !!from_unicode_page;
1442 }
1443
1444 QTextDecoder* makeDecoder() const
1445 {
1446 if ( stateless() )
1447 return QTextCodec::makeDecoder();
1448 else
1449 return new QTextCodecFromIODDecoder(this);
1450 }
1451
1452 const char* name() const
1453 {
1454 return n;
1455 }
1456
1457 int mibEnum() const
1458 {
1459 return 0; // #### Unknown.
1460 }
1461
1462 int heuristicContentMatch(const char*, int) const
1463 {
1464 return 0;
1465 }
1466
1467 int heuristicNameMatch(const char* hint) const
1468 {
1469 int bestr = QTextCodec::heuristicNameMatch(hint);
1470 QStrListIterator it(aliases);
1471 char* a;
1472 while ((a=it.current())) {
1473 ++it;
1474 int r = simpleHeuristicNameMatch(a,hint);
1475 if (r > bestr)
1476 bestr = r;
1477 }
1478 return bestr;
1479 }
1480
1481 QString toUnicode(const char* chars, int len) const
1482 {
1483 const uchar* uchars = (const uchar*)chars;
1484 QString result;
1485 QMultiByteUnicodeTable* multiByte=to_unicode_multiByte;
1486 if ( multiByte ) {
1487 while (len--) {
1488 QMultiByteUnicodeTable& mb = multiByte[*uchars];
1489 if ( mb.multiByte ) {
1490 // Chained multi-byte
1491 multiByte = mb.multiByte;
1492 } else {
1493 result += QChar(mb.unicode);
1494 multiByte=to_unicode_multiByte;
1495 }
1496 uchars++;
1497 }
1498 } else {
1499 while (len--)
1500 result += QChar(to_unicode[*uchars++]);
1501 }
1502 return result;
1503 }
1504
1505#if !defined(Q_NO_USING_KEYWORD)
1506 using QTextCodec::fromUnicode;
1507#endif
1508 QCString fromUnicode(const QString& uc, int& lenInOut) const
1509 {
1510 if (lenInOut > (int)uc.length())
1511 lenInOut = uc.length();
1512 int rlen = lenInOut*max_bytes_per_char;
1513 QCString rstr(rlen);
1514 char* cursor = rstr.data();
1515 char* s=0;
1516 int l = lenInOut;
1517 int lout = 0;
1518 for (int i=0; i<l; i++) {
1519 QChar ch = uc[i];
1520 if ( ch == QChar::null ) {
1521 // special
1522 *cursor++ = 0;
1523 } else if ( from_unicode_page[ch.row()] &&
1524 from_unicode_page[ch.row()][ch.cell()] )
1525 {
1526 *cursor++ = from_unicode_page[ch.row()][ch.cell()];
1527 lout++;
1528 } else if ( from_unicode_page_multiByte &&
1529 from_unicode_page_multiByte[ch.row()] &&
1530 (s=from_unicode_page_multiByte[ch.row()][ch.cell()]) )
1531 {
1532 while (*s) {
1533 *cursor++ = *s++;
1534 lout++;
1535 }
1536 } else {
1537 *cursor++ = unkn;
1538 lout++;
1539 }
1540 }
1541 *cursor = 0;
1542 lenInOut = lout;
1543 return rstr;
1544 }
1545};
1546
1547QTextCodecFromIODDecoder::QTextCodecFromIODDecoder(const QTextCodecFromIOD* c) :
1548 codec(c)
1549{
1550 mb = codec->to_unicode_multiByte;
1551}
1552
1553QString QTextCodecFromIODDecoder::toUnicode(const char* chars, int len)
1554{
1555 const uchar* uchars = (const uchar*)chars;
1556 QString result;
1557 while (len--) {
1558 QMultiByteUnicodeTable& t = mb[*uchars];
1559 if ( t.multiByte ) {
1560 // Chained multi-byte
1561 mb = t.multiByte;
1562 } else {
1563 if ( t.unicode )
1564 result += QChar(t.unicode);
1565 mb=codec->to_unicode_multiByte;
1566 }
1567 uchars++;
1568 }
1569 return result;
1570}
1571
1572#ifndef QT_NO_CODECS
1573// Cannot use <pre> or \code
1574/*!
1575 Reads a POSIX2 charmap definition from \a iod.
1576 The parser recognizes the following lines:
1577
1578<font name="sans">
1579&nbsp;&nbsp;&lt;code_set_name&gt; <i>name</i></br>
1580&nbsp;&nbsp;&lt;escape_char&gt; <i>character</i></br>
1581&nbsp;&nbsp;% alias <i>alias</i></br>
1582&nbsp;&nbsp;CHARMAP</br>
1583&nbsp;&nbsp;&lt;<i>token</i>&gt; /x<i>hexbyte</i> &lt;U<i>unicode</i>&gt; ...</br>
1584&nbsp;&nbsp;&lt;<i>token</i>&gt; /d<i>decbyte</i> &lt;U<i>unicode</i>&gt; ...</br>
1585&nbsp;&nbsp;&lt;<i>token</i>&gt; /<i>octbyte</i> &lt;U<i>unicode</i>&gt; ...</br>
1586&nbsp;&nbsp;&lt;<i>token</i>&gt; /<i>any</i>/<i>any</i>... &lt;U<i>unicode</i>&gt; ...</br>
1587&nbsp;&nbsp;END CHARMAP</br>
1588</font>
1589
1590 The resulting QTextCodec is returned (and also added to the global
1591 list of codecs). The name() of the result is taken from the
1592 code_set_name.
1593
1594 Note that a codec constructed in this way uses much more memory
1595 and is slower than a hand-written QTextCodec subclass, since
1596 tables in code are kept in memory shared by all Qt applications.
1597
1598 \sa loadCharmapFile()
1599*/
1600QTextCodec* QTextCodec::loadCharmap(QIODevice* iod)
1601{
1602 QTextCodecFromIOD* r = new QTextCodecFromIOD(iod);
1603 if ( !r->ok() ) {
1604 delete r;
1605 r = 0;
1606 }
1607 return r;
1608}
1609
1610/*!
1611 A convenience function for loadCharmap() that loads the charmap
1612 definition from the file \a filename.
1613*/
1614QTextCodec* QTextCodec::loadCharmapFile(QString filename)
1615{
1616 QFile f(filename);
1617 if (f.open(IO_ReadOnly)) {
1618 QTextCodecFromIOD* r = new QTextCodecFromIOD(&f);
1619 if ( !r->ok() )
1620 delete r;
1621 else
1622 return r;
1623 }
1624 return 0;
1625}
1626
1627#endif //QT_NO_CODECS
1628
1629/*!
1630 Returns a string representing the current language and
1631 sublanguage, e.g. "pt" for Portuguese, or "pt_br" for Portuguese/Brazil.
1632*/
1633
1634const char* QTextCodec::locale()
1635{
1636 return QLocalePrivate::systemLocaleName();
1637}
1638
1639#ifndef QT_NO_CODECS
1640
1641class QSimpleTextCodec: public QTextCodec
1642{
1643public:
1644 QSimpleTextCodec( int );
1645 ~QSimpleTextCodec();
1646
1647 QString toUnicode(const char* chars, int len) const;
1648#if !defined(Q_NO_USING_KEYWORD)
1649 using QTextCodec::fromUnicode;
1650#endif
1651 QCString fromUnicode(const QString& uc, int& lenInOut ) const;
1652 unsigned short characterFromUnicode(const QString &str, int pos) const;
1653
1654 const char* name() const;
1655 const char* mimeName() const;
1656 int mibEnum() const;
1657
1658 int heuristicContentMatch(const char* chars, int len) const;
1659
1660 int heuristicNameMatch(const char* hint) const;
1661#if !defined(Q_NO_USING_KEYWORD)
1662 using QTextCodec::canEncode;
1663#endif
1664 bool canEncode( QChar ch ) const;
1665
1666 void fromUnicode( const QChar *in, unsigned short *out, int length ) const;
1667
1668private:
1669 void buildReverseMap();
1670
1671 int forwardIndex;
1672#ifndef Q_WS_QWS
1673 QMemArray<unsigned char> *reverseMap;
1674#endif
1675};
1676
1677#ifdef Q_WS_QWS
1678static const QSimpleTextCodec * reverseOwner = 0;
1679static QMemArray<unsigned char> * reverseMap = 0;
1680#endif
1681
1682#define LAST_MIB 2004
1683
1684static const struct {
1685 const char *mime;
1686 const char * cs;
1687 int mib;
1688 Q_UINT16 values[128];
1689} unicodevalues[] = {
1690 // from RFC 1489, ftp://ftp.isi.edu/in-notes/rfc1489.txt
1691 { "KOI8-R", "KOI8-R", 2084,
1692 { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
1693 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
1694 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219/**/, 0x221A, 0x2248,
1695 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
1696 0x2550, 0x2551, 0x2552, 0x0451, 0x2553, 0x2554, 0x2555, 0x2556,
1697 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x255C, 0x255D, 0x255E,
1698 0x255F, 0x2560, 0x2561, 0x0401, 0x2562, 0x2563, 0x2564, 0x2565,
1699 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x256B, 0x256C, 0x00A9,
1700 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
1701 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
1702 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
1703 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
1704 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
1705 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
1706 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
1707 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },
1708 // /**/ - The BULLET OPERATOR is confused. Some people think
1709 // it should be 0x2022 (BULLET).
1710
1711 // from RFC 2319, ftp://ftp.isi.edu/in-notes/rfc2319.txt
1712 { "KOI8-U", "KOI8-U", 2088,
1713 { 0x2500, 0x2502, 0x250C, 0x2510, 0x2514, 0x2518, 0x251C, 0x2524,
1714 0x252C, 0x2534, 0x253C, 0x2580, 0x2584, 0x2588, 0x258C, 0x2590,
1715 0x2591, 0x2592, 0x2593, 0x2320, 0x25A0, 0x2219, 0x221A, 0x2248,
1716 0x2264, 0x2265, 0x00A0, 0x2321, 0x00B0, 0x00B2, 0x00B7, 0x00F7,
1717 0x2550, 0x2551, 0x2552, 0x0451, 0x0454, 0x2554, 0x0456, 0x0457,
1718 0x2557, 0x2558, 0x2559, 0x255A, 0x255B, 0x0491, 0x255D, 0x255E,
1719 0x255F, 0x2560, 0x2561, 0x0401, 0x0404, 0x2563, 0x0406, 0x0407,
1720 0x2566, 0x2567, 0x2568, 0x2569, 0x256A, 0x0490, 0x256C, 0x00A9,
1721 0x044E, 0x0430, 0x0431, 0x0446, 0x0434, 0x0435, 0x0444, 0x0433,
1722 0x0445, 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E,
1723 0x043F, 0x044F, 0x0440, 0x0441, 0x0442, 0x0443, 0x0436, 0x0432,
1724 0x044C, 0x044B, 0x0437, 0x0448, 0x044D, 0x0449, 0x0447, 0x044A,
1725 0x042E, 0x0410, 0x0411, 0x0426, 0x0414, 0x0415, 0x0424, 0x0413,
1726 0x0425, 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E,
1727 0x041F, 0x042F, 0x0420, 0x0421, 0x0422, 0x0423, 0x0416, 0x0412,
1728 0x042C, 0x042B, 0x0417, 0x0428, 0x042D, 0x0429, 0x0427, 0x042A } },
1729
1730 // next bits generated from tables on the Unicode 2.0 CD. we can
1731 // use these tables since this is part of the transition to using
1732 // unicode everywhere in qt.
1733
1734 // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo 0x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; for a in 8859-* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ; cat /tmp/digits ) | sort | uniq -w4 | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done
1735
1736 // then I inserted the files manually.
1737 { "ISO-8859-2", "ISO 8859-2", 5,
1738 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1739 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1740 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1741 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1742 0x00A0, 0x0104, 0x02D8, 0x0141, 0x00A4, 0x013D, 0x015A, 0x00A7,
1743 0x00A8, 0x0160, 0x015E, 0x0164, 0x0179, 0x00AD, 0x017D, 0x017B,
1744 0x00B0, 0x0105, 0x02DB, 0x0142, 0x00B4, 0x013E, 0x015B, 0x02C7,
1745 0x00B8, 0x0161, 0x015F, 0x0165, 0x017A, 0x02DD, 0x017E, 0x017C,
1746 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
1747 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
1748 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
1749 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
1750 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
1751 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
1752 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
1753 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} },
1754 { "ISO-8859-3", "ISO 8859-3", 6,
1755 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1756 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1757 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1758 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1759 0x00A0, 0x0126, 0x02D8, 0x00A3, 0x00A4, 0xFFFD, 0x0124, 0x00A7,
1760 0x00A8, 0x0130, 0x015E, 0x011E, 0x0134, 0x00AD, 0xFFFD, 0x017B,
1761 0x00B0, 0x0127, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x0125, 0x00B7,
1762 0x00B8, 0x0131, 0x015F, 0x011F, 0x0135, 0x00BD, 0xFFFD, 0x017C,
1763 0x00C0, 0x00C1, 0x00C2, 0xFFFD, 0x00C4, 0x010A, 0x0108, 0x00C7,
1764 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1765 0xFFFD, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x0120, 0x00D6, 0x00D7,
1766 0x011C, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x016C, 0x015C, 0x00DF,
1767 0x00E0, 0x00E1, 0x00E2, 0xFFFD, 0x00E4, 0x010B, 0x0109, 0x00E7,
1768 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1769 0xFFFD, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x0121, 0x00F6, 0x00F7,
1770 0x011D, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x016D, 0x015D, 0x02D9} },
1771 { "ISO-8859-4", "ISO 8859-4", 7,
1772 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1773 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1774 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1775 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1776 0x00A0, 0x0104, 0x0138, 0x0156, 0x00A4, 0x0128, 0x013B, 0x00A7,
1777 0x00A8, 0x0160, 0x0112, 0x0122, 0x0166, 0x00AD, 0x017D, 0x00AF,
1778 0x00B0, 0x0105, 0x02DB, 0x0157, 0x00B4, 0x0129, 0x013C, 0x02C7,
1779 0x00B8, 0x0161, 0x0113, 0x0123, 0x0167, 0x014A, 0x017E, 0x014B,
1780 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
1781 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x012A,
1782 0x0110, 0x0145, 0x014C, 0x0136, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
1783 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x0168, 0x016A, 0x00DF,
1784 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
1785 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x012B,
1786 0x0111, 0x0146, 0x014D, 0x0137, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
1787 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x0169, 0x016B, 0x02D9} },
1788 { "ISO-8859-5", "ISO 8859-5", 8,
1789 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1790 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1791 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1792 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1793 0x00A0, 0x0401, 0x0402, 0x0403, 0x0404, 0x0405, 0x0406, 0x0407,
1794 0x0408, 0x0409, 0x040A, 0x040B, 0x040C, 0x00AD, 0x040E, 0x040F,
1795 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
1796 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
1797 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
1798 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
1799 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
1800 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
1801 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
1802 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
1803 0x2116, 0x0451, 0x0452, 0x0453, 0x0454, 0x0455, 0x0456, 0x0457,
1804 0x0458, 0x0459, 0x045A, 0x045B, 0x045C, 0x00A7, 0x045E, 0x045F} },
1805 { "ISO-8859-6", "ISO 8859-6", 82,
1806 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1807 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1808 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1809 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1810 0x00A0, 0xFFFD, 0xFFFD, 0xFFFD, 0x00A4, 0xFFFD, 0xFFFD, 0xFFFD,
1811 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x060C, 0x00AD, 0xFFFD, 0xFFFD,
1812 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1813 0xFFFD, 0xFFFD, 0xFFFD, 0x061B, 0xFFFD, 0xFFFD, 0xFFFD, 0x061F,
1814 0xFFFD, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
1815 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
1816 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x0637,
1817 0x0638, 0x0639, 0x063A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1818 0x0640, 0x0641, 0x0642, 0x0643, 0x0644, 0x0645, 0x0646, 0x0647,
1819 0x0648, 0x0649, 0x064A, 0x064B, 0x064C, 0x064D, 0x064E, 0x064F,
1820 0x0650, 0x0651, 0x0652, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1821 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
1822 { "ISO-8859-7", "ISO 8859-7", 10,
1823 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1824 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1825 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1826 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1827 0x00A0, 0x2018, 0x2019, 0x00A3, 0xFFFD, 0xFFFD, 0x00A6, 0x00A7,
1828 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0xFFFD, 0x2015,
1829 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x0385, 0x0386, 0x00B7,
1830 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
1831 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
1832 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
1833 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
1834 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
1835 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
1836 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
1837 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
1838 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} },
1839 { "ISO-8859-8-I", "ISO 8859-8-I", 85,
1840 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1841 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1842 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1843 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1844 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1845 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x203E,
1846 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1847 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0xFFFD,
1848 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1849 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1850 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1851 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2017,
1852 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
1853 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
1854 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
1855 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
1856 { "ISO-8859-9", "ISO 8859-9", 12,
1857 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1858 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1859 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1860 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1861 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
1862 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
1863 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1864 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
1865 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
1866 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1867 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
1868 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
1869 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
1870 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1871 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
1872 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} },
1873 { "ISO-8859-10", "ISO 8859-10", 13,
1874 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1875 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1876 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1877 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1878 0x00A0, 0x0104, 0x0112, 0x0122, 0x012A, 0x0128, 0x0136, 0x00A7,
1879 0x013B, 0x0110, 0x0160, 0x0166, 0x017D, 0x00AD, 0x016A, 0x014A,
1880 0x00B0, 0x0105, 0x0113, 0x0123, 0x012B, 0x0129, 0x0137, 0x00B7,
1881 0x013C, 0x0111, 0x0161, 0x0167, 0x017E, 0x2015, 0x016B, 0x014B,
1882 0x0100, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x012E,
1883 0x010C, 0x00C9, 0x0118, 0x00CB, 0x0116, 0x00CD, 0x00CE, 0x00CF,
1884 0x00D0, 0x0145, 0x014C, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x0168,
1885 0x00D8, 0x0172, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
1886 0x0101, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x012F,
1887 0x010D, 0x00E9, 0x0119, 0x00EB, 0x0117, 0x00ED, 0x00EE, 0x00EF,
1888 0x00F0, 0x0146, 0x014D, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x0169,
1889 0x00F8, 0x0173, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x0138} },
1890 { "ISO-8859-13", "ISO 8859-13", 109,
1891 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1892 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1893 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1894 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1895 0x00A0, 0x201D, 0x00A2, 0x00A3, 0x00A4, 0x201E, 0x00A6, 0x00A7,
1896 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
1897 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x201C, 0x00B5, 0x00B6, 0x00B7,
1898 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
1899 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
1900 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
1901 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
1902 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
1903 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
1904 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
1905 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
1906 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x2019} },
1907 { "ISO-8859-14", "ISO 8859-14", 110,
1908 { 0x0080, 0x0081, 0x0082, 0x0083, 0x0084, 0x0085, 0x0086, 0x0087,
1909 0x0088, 0x0089, 0x008A, 0x008B, 0x008C, 0x008D, 0x008E, 0x008F,
1910 0x0090, 0x0091, 0x0092, 0x0093, 0x0094, 0x0095, 0x0096, 0x0097,
1911 0x0098, 0x0099, 0x009A, 0x009B, 0x009C, 0x009D, 0x009E, 0x009F,
1912 0x00A0, 0x1E02, 0x1E03, 0x00A3, 0x010A, 0x010B, 0x1E0A, 0x00A7,
1913 0x1E80, 0x00A9, 0x1E82, 0x1E0B, 0x1EF2, 0x00AD, 0x00AE, 0x0178,
1914 0x1E1E, 0x1E1F, 0x0120, 0x0121, 0x1E40, 0x1E41, 0x00B6, 0x1E56,
1915 0x1E81, 0x1E57, 0x1E83, 0x1E60, 0x1EF3, 0x1E84, 0x1E85, 0x1E61,
1916 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
1917 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
1918 0x0174, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x1E6A,
1919 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x0176, 0x00DF,
1920 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
1921 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
1922 0x0175, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x1E6B,
1923 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x0177, 0x00FF} },
1924
1925 // next bits generated again from tables on the Unicode 3.0 CD.
1926
1927 // $ for a in CP* ; do ( awk '/^0x[89ABCDEF]/{ print $1, $2 }' < $a ) | sort | sed -e 's/#UNDEF.*$/0xFFFD/' | cut -c6- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/$a ; done
1928
1929 { "CP 850", "IBM 850", 2009,
1930 { 0x00C7, 0x00FC, 0x00E9, 0x00E2, 0x00E4, 0x00E0, 0x00E5, 0x00E7,
1931 0x00EA, 0x00EB, 0x00E8, 0x00EF, 0x00EE, 0x00EC, 0x00C4, 0x00C5,
1932 0x00C9, 0x00E6, 0x00C6, 0x00F4, 0x00F6, 0x00F2, 0x00FB, 0x00F9,
1933 0x00FF, 0x00D6, 0x00DC, 0x00F8, 0x00A3, 0x00D8, 0x00D7, 0x0192,
1934 0x00E1, 0x00ED, 0x00F3, 0x00FA, 0x00F1, 0x00D1, 0x00AA, 0x00BA,
1935 0x00BF, 0x00AE, 0x00AC, 0x00BD, 0x00BC, 0x00A1, 0x00AB, 0x00BB,
1936 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x00C1, 0x00C2, 0x00C0,
1937 0x00A9, 0x2563, 0x2551, 0x2557, 0x255D, 0x00A2, 0x00A5, 0x2510,
1938 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x00E3, 0x00C3,
1939 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x00A4,
1940 0x00F0, 0x00D0, 0x00CA, 0x00CB, 0x00C8, 0x0131, 0x00CD, 0x00CE,
1941 0x00CF, 0x2518, 0x250C, 0x2588, 0x2584, 0x00A6, 0x00CC, 0x2580,
1942 0x00D3, 0x00DF, 0x00D4, 0x00D2, 0x00F5, 0x00D5, 0x00B5, 0x00FE,
1943 0x00DE, 0x00DA, 0x00DB, 0x00D9, 0x00FD, 0x00DD, 0x00AF, 0x00B4,
1944 0x00AD, 0x00B1, 0x2017, 0x00BE, 0x00B6, 0x00A7, 0x00F7, 0x00B8,
1945 0x00B0, 0x00A8, 0x00B7, 0x00B9, 0x00B3, 0x00B2, 0x25A0, 0x00A0} },
1946 { "CP 874", "CP 874", 0, //### what is the mib?
1947 { 0x20AC, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x2026, 0xFFFD, 0xFFFD,
1948 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1949 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1950 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
1951 0x00A0, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
1952 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
1953 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
1954 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
1955 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
1956 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
1957 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
1958 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,
1959 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
1960 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
1961 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
1962 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD} },
1963 { "IBM 866", "IBM 866", 2086,
1964 { 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
1965 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
1966 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
1967 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
1968 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
1969 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
1970 0x2591, 0x2592, 0x2593, 0x2502, 0x2524, 0x2561, 0x2562, 0x2556,
1971 0x2555, 0x2563, 0x2551, 0x2557, 0x255D, 0x255C, 0x255B, 0x2510,
1972 0x2514, 0x2534, 0x252C, 0x251C, 0x2500, 0x253C, 0x255E, 0x255F,
1973 0x255A, 0x2554, 0x2569, 0x2566, 0x2560, 0x2550, 0x256C, 0x2567,
1974 0x2568, 0x2564, 0x2565, 0x2559, 0x2558, 0x2552, 0x2553, 0x256B,
1975 0x256A, 0x2518, 0x250C, 0x2588, 0x2584, 0x258C, 0x2590, 0x2580,
1976 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
1977 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F,
1978 0x0401, 0x0451, 0x0404, 0x0454, 0x0407, 0x0457, 0x040E, 0x045E,
1979 0x00B0, 0x2219, 0x00B7, 0x221A, 0x2116, 0x00A4, 0x25A0, 0x00A0} },
1980
1981 { "windows-1250", "CP 1250", 2250,
1982 { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,
1983 0xFFFD, 0x2030, 0x0160, 0x2039, 0x015A, 0x0164, 0x017D, 0x0179,
1984 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
1985 0xFFFD, 0x2122, 0x0161, 0x203A, 0x015B, 0x0165, 0x017E, 0x017A,
1986 0x00A0, 0x02C7, 0x02D8, 0x0141, 0x00A4, 0x0104, 0x00A6, 0x00A7,
1987 0x00A8, 0x00A9, 0x015E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x017B,
1988 0x00B0, 0x00B1, 0x02DB, 0x0142, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
1989 0x00B8, 0x0105, 0x015F, 0x00BB, 0x013D, 0x02DD, 0x013E, 0x017C,
1990 0x0154, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x0139, 0x0106, 0x00C7,
1991 0x010C, 0x00C9, 0x0118, 0x00CB, 0x011A, 0x00CD, 0x00CE, 0x010E,
1992 0x0110, 0x0143, 0x0147, 0x00D3, 0x00D4, 0x0150, 0x00D6, 0x00D7,
1993 0x0158, 0x016E, 0x00DA, 0x0170, 0x00DC, 0x00DD, 0x0162, 0x00DF,
1994 0x0155, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x013A, 0x0107, 0x00E7,
1995 0x010D, 0x00E9, 0x0119, 0x00EB, 0x011B, 0x00ED, 0x00EE, 0x010F,
1996 0x0111, 0x0144, 0x0148, 0x00F3, 0x00F4, 0x0151, 0x00F6, 0x00F7,
1997 0x0159, 0x016F, 0x00FA, 0x0171, 0x00FC, 0x00FD, 0x0163, 0x02D9} },
1998 { "windows-1251", "CP 1251", 2251,
1999 { 0x0402, 0x0403, 0x201A, 0x0453, 0x201E, 0x2026, 0x2020, 0x2021,
2000 0x20AC, 0x2030, 0x0409, 0x2039, 0x040A, 0x040C, 0x040B, 0x040F,
2001 0x0452, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
2002 0xFFFD, 0x2122, 0x0459, 0x203A, 0x045A, 0x045C, 0x045B, 0x045F,
2003 0x00A0, 0x040E, 0x045E, 0x0408, 0x00A4, 0x0490, 0x00A6, 0x00A7,
2004 0x0401, 0x00A9, 0x0404, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x0407,
2005 0x00B0, 0x00B1, 0x0406, 0x0456, 0x0491, 0x00B5, 0x00B6, 0x00B7,
2006 0x0451, 0x2116, 0x0454, 0x00BB, 0x0458, 0x0405, 0x0455, 0x0457,
2007 0x0410, 0x0411, 0x0412, 0x0413, 0x0414, 0x0415, 0x0416, 0x0417,
2008 0x0418, 0x0419, 0x041A, 0x041B, 0x041C, 0x041D, 0x041E, 0x041F,
2009 0x0420, 0x0421, 0x0422, 0x0423, 0x0424, 0x0425, 0x0426, 0x0427,
2010 0x0428, 0x0429, 0x042A, 0x042B, 0x042C, 0x042D, 0x042E, 0x042F,
2011 0x0430, 0x0431, 0x0432, 0x0433, 0x0434, 0x0435, 0x0436, 0x0437,
2012 0x0438, 0x0439, 0x043A, 0x043B, 0x043C, 0x043D, 0x043E, 0x043F,
2013 0x0440, 0x0441, 0x0442, 0x0443, 0x0444, 0x0445, 0x0446, 0x0447,
2014 0x0448, 0x0449, 0x044A, 0x044B, 0x044C, 0x044D, 0x044E, 0x044F} },
2015 { "windows-1252", "CP 1252", 2252,
2016 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
2017 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0x017D, 0xFFFD,
2018 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
2019 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0x017E, 0x0178,
2020 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
2021 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
2022 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
2023 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
2024 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
2025 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
2026 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
2027 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
2028 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
2029 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
2030 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
2031 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },
2032 { "windows-1253", "CP 1253", 2253,
2033 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
2034 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2035 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
2036 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2037 0x00A0, 0x0385, 0x0386, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
2038 0x00A8, 0x00A9, 0xFFFD, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x2015,
2039 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x0384, 0x00B5, 0x00B6, 0x00B7,
2040 0x0388, 0x0389, 0x038A, 0x00BB, 0x038C, 0x00BD, 0x038E, 0x038F,
2041 0x0390, 0x0391, 0x0392, 0x0393, 0x0394, 0x0395, 0x0396, 0x0397,
2042 0x0398, 0x0399, 0x039A, 0x039B, 0x039C, 0x039D, 0x039E, 0x039F,
2043 0x03A0, 0x03A1, 0xFFFD, 0x03A3, 0x03A4, 0x03A5, 0x03A6, 0x03A7,
2044 0x03A8, 0x03A9, 0x03AA, 0x03AB, 0x03AC, 0x03AD, 0x03AE, 0x03AF,
2045 0x03B0, 0x03B1, 0x03B2, 0x03B3, 0x03B4, 0x03B5, 0x03B6, 0x03B7,
2046 0x03B8, 0x03B9, 0x03BA, 0x03BB, 0x03BC, 0x03BD, 0x03BE, 0x03BF,
2047 0x03C0, 0x03C1, 0x03C2, 0x03C3, 0x03C4, 0x03C5, 0x03C6, 0x03C7,
2048 0x03C8, 0x03C9, 0x03CA, 0x03CB, 0x03CC, 0x03CD, 0x03CE, 0xFFFD} },
2049 { "windows-1254", "CP 1254", 2254,
2050 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
2051 0x02C6, 0x2030, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
2052 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
2053 0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
2054 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
2055 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
2056 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
2057 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
2058 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
2059 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
2060 0x011E, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
2061 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x0130, 0x015E, 0x00DF,
2062 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
2063 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
2064 0x011F, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
2065 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x0131, 0x015F, 0x00FF} },
2066 { "windows-1255", "CP 1255", 2255,
2067 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
2068 0x02C6, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2069 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
2070 0x02DC, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2071 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x20AA, 0x00A5, 0x00A6, 0x00A7,
2072 0x00A8, 0x00A9, 0x00D7, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
2073 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
2074 0x00B8, 0x00B9, 0x00F7, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
2075 0x05B0, 0x05B1, 0x05B2, 0x05B3, 0x05B4, 0x05B5, 0x05B6, 0x05B7,
2076 0x05B8, 0x05B9, 0xFFFD, 0x05BB, 0x05BC, 0x05BD, 0x05BE, 0x05BF,
2077 0x05C0, 0x05C1, 0x05C2, 0x05C3, 0x05F0, 0x05F1, 0x05F2, 0x05F3,
2078 0x05F4, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2079 0x05D0, 0x05D1, 0x05D2, 0x05D3, 0x05D4, 0x05D5, 0x05D6, 0x05D7,
2080 0x05D8, 0x05D9, 0x05DA, 0x05DB, 0x05DC, 0x05DD, 0x05DE, 0x05DF,
2081 0x05E0, 0x05E1, 0x05E2, 0x05E3, 0x05E4, 0x05E5, 0x05E6, 0x05E7,
2082 0x05E8, 0x05E9, 0x05EA, 0xFFFD, 0xFFFD, 0x200E, 0x200F, 0xFFFD} },
2083 { "windows-1256", "CP 1256", 2256,
2084 { 0x20AC, 0x067E, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
2085 0x02C6, 0x2030, 0x0679, 0x2039, 0x0152, 0x0686, 0x0698, 0x0688,
2086 0x06AF, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
2087 0x06A9, 0x2122, 0x0691, 0x203A, 0x0153, 0x200C, 0x200D, 0x06BA,
2088 0x00A0, 0x060C, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
2089 0x00A8, 0x00A9, 0x06BE, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
2090 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
2091 0x00B8, 0x00B9, 0x061B, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x061F,
2092 0x06C1, 0x0621, 0x0622, 0x0623, 0x0624, 0x0625, 0x0626, 0x0627,
2093 0x0628, 0x0629, 0x062A, 0x062B, 0x062C, 0x062D, 0x062E, 0x062F,
2094 0x0630, 0x0631, 0x0632, 0x0633, 0x0634, 0x0635, 0x0636, 0x00D7,
2095 0x0637, 0x0638, 0x0639, 0x063A, 0x0640, 0x0641, 0x0642, 0x0643,
2096 0x00E0, 0x0644, 0x00E2, 0x0645, 0x0646, 0x0647, 0x0648, 0x00E7,
2097 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0649, 0x064A, 0x00EE, 0x00EF,
2098 0x064B, 0x064C, 0x064D, 0x064E, 0x00F4, 0x064F, 0x0650, 0x00F7,
2099 0x0651, 0x00F9, 0x0652, 0x00FB, 0x00FC, 0x200E, 0x200F, 0x06D2} },
2100 { "windows-1257", "CP 1257", 2257,
2101 { 0x20AC, 0xFFFD, 0x201A, 0xFFFD, 0x201E, 0x2026, 0x2020, 0x2021,
2102 0xFFFD, 0x2030, 0xFFFD, 0x2039, 0xFFFD, 0x00A8, 0x02C7, 0x00B8,
2103 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
2104 0xFFFD, 0x2122, 0xFFFD, 0x203A, 0xFFFD, 0x00AF, 0x02DB, 0xFFFD,
2105 0x00A0, 0xFFFD, 0x00A2, 0x00A3, 0x00A4, 0xFFFD, 0x00A6, 0x00A7,
2106 0x00D8, 0x00A9, 0x0156, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00C6,
2107 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
2108 0x00F8, 0x00B9, 0x0157, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00E6,
2109 0x0104, 0x012E, 0x0100, 0x0106, 0x00C4, 0x00C5, 0x0118, 0x0112,
2110 0x010C, 0x00C9, 0x0179, 0x0116, 0x0122, 0x0136, 0x012A, 0x013B,
2111 0x0160, 0x0143, 0x0145, 0x00D3, 0x014C, 0x00D5, 0x00D6, 0x00D7,
2112 0x0172, 0x0141, 0x015A, 0x016A, 0x00DC, 0x017B, 0x017D, 0x00DF,
2113 0x0105, 0x012F, 0x0101, 0x0107, 0x00E4, 0x00E5, 0x0119, 0x0113,
2114 0x010D, 0x00E9, 0x017A, 0x0117, 0x0123, 0x0137, 0x012B, 0x013C,
2115 0x0161, 0x0144, 0x0146, 0x00F3, 0x014D, 0x00F5, 0x00F6, 0x00F7,
2116 0x0173, 0x0142, 0x015B, 0x016B, 0x00FC, 0x017C, 0x017E, 0x02D9} },
2117 { "windows-1258", "CP 1258", 2258,
2118 { 0x20AC, 0xFFFD, 0x201A, 0x0192, 0x201E, 0x2026, 0x2020, 0x2021,
2119 0x02C6, 0x2030, 0xFFFD, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
2120 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
2121 0x02DC, 0x2122, 0xFFFD, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
2122 0x00A0, 0x00A1, 0x00A2, 0x00A3, 0x00A4, 0x00A5, 0x00A6, 0x00A7,
2123 0x00A8, 0x00A9, 0x00AA, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x00AF,
2124 0x00B0, 0x00B1, 0x00B2, 0x00B3, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
2125 0x00B8, 0x00B9, 0x00BA, 0x00BB, 0x00BC, 0x00BD, 0x00BE, 0x00BF,
2126 0x00C0, 0x00C1, 0x00C2, 0x0102, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
2127 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x0300, 0x00CD, 0x00CE, 0x00CF,
2128 0x0110, 0x00D1, 0x0309, 0x00D3, 0x00D4, 0x01A0, 0x00D6, 0x00D7,
2129 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x01AF, 0x0303, 0x00DF,
2130 0x00E0, 0x00E1, 0x00E2, 0x0103, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
2131 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x0301, 0x00ED, 0x00EE, 0x00EF,
2132 0x0111, 0x00F1, 0x0323, 0x00F3, 0x00F4, 0x01A1, 0x00F6, 0x00F7,
2133 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x01B0, 0x20AB, 0x00FF} },
2134
2135 { "Apple Roman", "Apple Roman", 0,
2136 { 0x00C4, 0x00C5, 0x00C7, 0x00C9, 0x00D1, 0x00D6, 0x00DC, 0x00E1,
2137 0x00E0, 0x00E2, 0x00E4, 0x00E3, 0x00E5, 0x00E7, 0x00E9, 0x00E8,
2138 0x00EA, 0x00EB, 0x00ED, 0x00EC, 0x00EE, 0x00EF, 0x00F1, 0x00F3,
2139 0x00F2, 0x00F4, 0x00F6, 0x00F5, 0x00FA, 0x00F9, 0x00FB, 0x00FC,
2140 0x2020, 0x00B0, 0x00A2, 0x00A3, 0x00A7, 0x2022, 0x00B6, 0x00DF,
2141 0x00AE, 0x00A9, 0x2122, 0x00B4, 0x00A8, 0x2260, 0x00C6, 0x00D8,
2142 0x221E, 0x00B1, 0x2264, 0x2265, 0x00A5, 0x00B5, 0x2202, 0x2211,
2143 0x220F, 0x03C0, 0x222B, 0x00AA, 0x00BA, 0x03A9, 0x00E6, 0x00F8,
2144 0x00BF, 0x00A1, 0x00AC, 0x221A, 0x0192, 0x2248, 0x2206, 0x00AB,
2145 0x00BB, 0x2026, 0x00A0, 0x00C0, 0x00C3, 0x00D5, 0x0152, 0x0153,
2146 0x2013, 0x2014, 0x201C, 0x201D, 0x2018, 0x2019, 0x00F7, 0x25CA,
2147 0x00FF, 0x0178, 0x2044, 0x20AC, 0x2039, 0x203A, 0xFB01, 0xFB02,
2148 0x2021, 0x00B7, 0x201A, 0x201E, 0x2030, 0x00C2, 0x00CA, 0x00C1,
2149 0x00CB, 0x00C8, 0x00CD, 0x00CE, 0x00CF, 0x00CC, 0x00D3, 0x00D4,
2150 0xF8FF, 0x00D2, 0x00DA, 0x00DB, 0x00D9, 0x0131, 0x02C6, 0x02DC,
2151 0x00AF, 0x02D8, 0x02D9, 0x02DA, 0x00B8, 0x02DD, 0x02DB, 0x02C7} },
2152
2153
2154
2155 // This one is based on the charmap file
2156 // /usr/share/i18n/charmaps/SAMI-WS2.gz, which is manually adapted
2157 // to this format by BÞrre Gaup <boerre@subdimension.com>
2158 { "WINSAMI2", "WS2", 0,
2159 { 0x20AC, 0xFFFD, 0x010C, 0x0192, 0x010D, 0x01B7, 0x0292, 0x01EE,
2160 0x01EF, 0x0110, 0x0160, 0x2039, 0x0152, 0xFFFD, 0xFFFD, 0xFFFD,
2161 0xFFFD, 0x2018, 0x2019, 0x201C, 0x201D, 0x2022, 0x2013, 0x2014,
2162 0x0111, 0x01E6, 0x0161, 0x203A, 0x0153, 0xFFFD, 0xFFFD, 0x0178,
2163 0x00A0, 0x01E7, 0x01E4, 0x00A3, 0x00A4, 0x01E5, 0x00A6, 0x00A7,
2164 0x00A8, 0x00A9, 0x021E, 0x00AB, 0x00AC, 0x00AD, 0x00AE, 0x021F,
2165 0x00B0, 0x00B1, 0x01E8, 0x01E9, 0x00B4, 0x00B5, 0x00B6, 0x00B7,
2166 0x014A, 0x014B, 0x0166, 0x00BB, 0x0167, 0x00BD, 0x017D, 0x017E,
2167 0x00C0, 0x00C1, 0x00C2, 0x00C3, 0x00C4, 0x00C5, 0x00C6, 0x00C7,
2168 0x00C8, 0x00C9, 0x00CA, 0x00CB, 0x00CC, 0x00CD, 0x00CE, 0x00CF,
2169 0x00D0, 0x00D1, 0x00D2, 0x00D3, 0x00D4, 0x00D5, 0x00D6, 0x00D7,
2170 0x00D8, 0x00D9, 0x00DA, 0x00DB, 0x00DC, 0x00DD, 0x00DE, 0x00DF,
2171 0x00E0, 0x00E1, 0x00E2, 0x00E3, 0x00E4, 0x00E5, 0x00E6, 0x00E7,
2172 0x00E8, 0x00E9, 0x00EA, 0x00EB, 0x00EC, 0x00ED, 0x00EE, 0x00EF,
2173 0x00F0, 0x00F1, 0x00F2, 0x00F3, 0x00F4, 0x00F5, 0x00F6, 0x00F7,
2174 0x00F8, 0x00F9, 0x00FA, 0x00FB, 0x00FC, 0x00FD, 0x00FE, 0x00FF} },
2175
2176
2177 // this one is generated from the charmap file located in /usr/share/i18n/charmaps
2178 // on most Linux distributions. The thai character set tis620 is byte by byte equivalent
2179 // to iso8859-11, so we name it 8859-11 here, but recognise the name tis620 too.
2180
2181 // $ for A in 8 9 A B C D E F ; do for B in 0 1 2 3 4 5 6 7 8 9 A B C D E F ; do echo x${A}${B} 0xFFFD ; done ; done > /tmp/digits ; ( cut -c25- < TIS-620 ; cat /tmp/digits ) | awk '/^x[89ABCDEF]/{ print $1, $2 }' | sed -e 's/<U/0x/' -e 's/>//' | sort | uniq -w4 | cut -c5- | paste '-d ' - - - - - - - - | sed -e 's/ /, /g' -e 's/$/,/' -e '$ s/,$/} },/' -e '1 s/^/{ /' > ~/tmp/tis-620
2182 { "TIS-620", "ISO 8859-11", 2259, // Thai character set mib enum taken from tis620 (which is byte by byte equivalent)
2183 { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2184 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2185 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2186 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2187 0xFFFD, 0x0E01, 0x0E02, 0x0E03, 0x0E04, 0x0E05, 0x0E06, 0x0E07,
2188 0x0E08, 0x0E09, 0x0E0A, 0x0E0B, 0x0E0C, 0x0E0D, 0x0E0E, 0x0E0F,
2189 0x0E10, 0x0E11, 0x0E12, 0x0E13, 0x0E14, 0x0E15, 0x0E16, 0x0E17,
2190 0x0E18, 0x0E19, 0x0E1A, 0x0E1B, 0x0E1C, 0x0E1D, 0x0E1E, 0x0E1F,
2191 0x0E20, 0x0E21, 0x0E22, 0x0E23, 0x0E24, 0x0E25, 0x0E26, 0x0E27,
2192 0x0E28, 0x0E29, 0x0E2A, 0x0E2B, 0x0E2C, 0x0E2D, 0x0E2E, 0x0E2F,
2193 0x0E30, 0x0E31, 0x0E32, 0x0E33, 0x0E34, 0x0E35, 0x0E36, 0x0E37,
2194 0x0E38, 0x0E39, 0x0E3A, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0x0E3F,
2195 0x0E40, 0x0E41, 0x0E42, 0x0E43, 0x0E44, 0x0E45, 0x0E46, 0x0E47,
2196 0x0E48, 0x0E49, 0x0E4A, 0x0E4B, 0x0E4C, 0x0E4D, 0x0E4E, 0x0E4F,
2197 0x0E50, 0x0E51, 0x0E52, 0x0E53, 0x0E54, 0x0E55, 0x0E56, 0x0E57,
2198 0x0E58, 0x0E59, 0x0E5A, 0x0E5B, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD } },
2199
2200 /*
2201 Name: hp-roman8 [HP-PCL5,RFC1345,KXS2]
2202 MIBenum: 2004
2203 Source: LaserJet IIP Printer User's Manual,
2204 HP part no 33471-90901, Hewlet-Packard, June 1989.
2205 Alias: roman8
2206 Alias: r8
2207 Alias: csHPRoman8
2208 */
2209 { "Roman8", "HP-Roman8", 2004,
2210 { 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2211 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2212 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2213 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD, 0xFFFD,
2214 0x00A0, 0x00C0, 0x00C2, 0x00C8, 0x00CA, 0x00CB, 0x00CE, 0x00CF,
2215 0x00B4, 0x02CB, 0x02C6, 0x00A8, 0x02DC, 0x00D9, 0x00DB, 0x20A4,
2216 0x00AF, 0x00DD, 0x00FD, 0x00B0, 0x00C7, 0x00E7, 0x00D1, 0x00F1,
2217 0x00A1, 0x00BF, 0x00A4, 0x00A3, 0x00A5, 0x00A7, 0x0192, 0x00A2,
2218 0x00E2, 0x00EA, 0x00F4, 0x00FB, 0x00E1, 0x00E9, 0x00F3, 0x00FA,
2219 0x00E0, 0x00E8, 0x00F2, 0x00F9, 0x00E4, 0x00EB, 0x00F6, 0x00FC,
2220 0x00C5, 0x00EE, 0x00D8, 0x00C6, 0x00E5, 0x00ED, 0x00F8, 0x00E6,
2221 0x00C4, 0x00EC, 0x00D6, 0x00DC, 0x00C9, 0x00EF, 0x00DF, 0x00D4,
2222 0x00C1, 0x00C3, 0x00E3, 0x00D0, 0x00F0, 0x00CD, 0x00CC, 0x00D3,
2223 0x00D2, 0x00D5, 0x00F5, 0x0160, 0x0161, 0x00DA, 0x0178, 0x00FF,
2224 0x00DE, 0x00FE, 0x00B7, 0x00B5, 0x00B6, 0x00BE, 0x2014, 0x00BC,
2225 0x00BD, 0x00AA, 0x00BA, 0x00AB, 0x25A0, 0x00BB, 0x00B1, 0xFFFD } }
2226
2227 // if you add more chacater sets at the end, change LAST_MIB above
2228};
2229
2230QSimpleTextCodec::QSimpleTextCodec( int i )
2231 : QTextCodec(), forwardIndex( i )
2232{
2233#ifndef Q_WS_QWS
2234 reverseMap = 0;
2235#endif
2236}
2237
2238
2239QSimpleTextCodec::~QSimpleTextCodec()
2240{
2241#ifndef Q_WS_QWS
2242 delete reverseMap;
2243#else
2244 if ( reverseOwner == this ) {
2245 delete reverseMap;
2246 reverseMap = 0;
2247 reverseOwner = 0;
2248 }
2249#endif
2250}
2251
2252void QSimpleTextCodec::buildReverseMap()
2253{
2254#ifdef Q_WS_QWS
2255 if ( reverseOwner != this ) {
2256 int m = 0;
2257 int i = 0;
2258 while( i < 128 ) {
2259 if ( unicodevalues[forwardIndex].values[i] > m &&
2260 unicodevalues[forwardIndex].values[i] < 0xfffd )
2261 m = unicodevalues[forwardIndex].values[i];
2262 i++;
2263 }
2264 m++;
2265 if ( !reverseMap )
2266 reverseMap = new QMemArray<unsigned char>( m );
2267 if ( m > (int)(reverseMap->size()) )
2268 reverseMap->resize( m );
2269 for( i = 0; i < 128 && i < m; i++ )
2270 (*reverseMap)[i] = (char)i;
2271 for( ;i < m; i++ )
2272 (*reverseMap)[i] = 0;
2273 for( i=128; i<256; i++ ) {
2274 int u = unicodevalues[forwardIndex].values[i-128];
2275 if ( u < m )
2276 (*reverseMap)[u] = (char)(unsigned char)(i);
2277 }
2278 reverseOwner = this;
2279 }
2280#else
2281 if ( !reverseMap ) {
2282 QMemArray<unsigned char> **map = &((QSimpleTextCodec *)this)->reverseMap;
2283 int m = 0;
2284 int i = 0;
2285 while( i < 128 ) {
2286 if ( unicodevalues[forwardIndex].values[i] > m &&
2287 unicodevalues[forwardIndex].values[i] < 0xfffd )
2288 m = unicodevalues[forwardIndex].values[i];
2289 i++;
2290 }
2291 m++;
2292 *map = new QMemArray<unsigned char>( m );
2293 for( i = 0; i < 128 && i < m; i++ )
2294 (**map)[i] = (char)i;
2295 for( ;i < m; i++ )
2296 (**map)[i] = 0;
2297 for( i=128; i<256; i++ ) {
2298 int u = unicodevalues[forwardIndex].values[i-128];
2299 if ( u < m )
2300 (**map)[u] = (char)(unsigned char)(i);
2301 }
2302 }
2303#endif
2304}
2305
2306QString QSimpleTextCodec::toUnicode(const char* chars, int len) const
2307{
2308 if ( len <= 0 || chars == 0 )
2309 return QString::null;
2310
2311 const unsigned char * c = (const unsigned char *)chars;
2312 int i;
2313
2314 for ( i = 0; i < len; i++ )
2315 if ( c[i] == '\0' ) {
2316 len = i;
2317 break;
2318 }
2319
2320 QString r;
2321 r.setUnicode(0, len);
2322 QChar* uc = (QChar*)r.unicode(); // const_cast
2323
2324 for ( i = 0; i < len; i++ ) {
2325 if ( c[i] > 127 )
2326 uc[i] = unicodevalues[forwardIndex].values[c[i]-128];
2327 else
2328 uc[i] = c[i];
2329 }
2330 return r;
2331}
2332
2333
2334QCString QSimpleTextCodec::fromUnicode(const QString& uc, int& len ) const
2335{
2336#ifdef Q_WS_QWS
2337 if ( this != reverseOwner )
2338#else
2339 if ( !reverseMap )
2340#endif
2341 ((QSimpleTextCodec *)this)->buildReverseMap();
2342
2343 if ( len <0 || len > (int)uc.length() )
2344 len = uc.length();
2345 QCString r( len+1 );
2346 int i = len;
2347 int u;
2348 const QChar* ucp = uc.unicode();
2349 unsigned char* rp = (unsigned char *)r.data();
2350 unsigned char* rmp = reverseMap->data();
2351 int rmsize = (int) reverseMap->size();
2352 while( i-- )
2353 {
2354 u = ucp->unicode();
2355 *rp = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : '?' );
2356 if ( *rp == 0 ) *rp = '?';
2357 rp++;
2358 ucp++;
2359 }
2360 r[len] = 0;
2361 return r;
2362}
2363
2364void QSimpleTextCodec::fromUnicode( const QChar *in, unsigned short *out, int length ) const
2365{
2366#ifdef Q_WS_QWS
2367 if ( this != reverseOwner )
2368#else
2369 if ( !reverseMap )
2370#endif
2371 ((QSimpleTextCodec *)this)->buildReverseMap();
2372
2373 unsigned char* rmp = reverseMap->data();
2374 int rmsize = (int) reverseMap->size();
2375 while ( length-- ) {
2376 unsigned short u = in->unicode();
2377 *out = u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : 0 );
2378 ++in;
2379 ++out;
2380 }
2381}
2382
2383unsigned short QSimpleTextCodec::characterFromUnicode(const QString &str, int pos) const
2384{
2385#ifdef Q_WS_QWS
2386 if ( this != reverseOwner )
2387#else
2388 if ( !reverseMap )
2389#endif
2390 ((QSimpleTextCodec *)this)->buildReverseMap();
2391
2392 unsigned short u = str[pos].unicode();
2393 unsigned char* rmp = reverseMap->data();
2394 int rmsize = (int) reverseMap->size();
2395 return u < 128 ? u : (( u < rmsize ) ? (*(rmp+u)) : 0 );
2396}
2397
2398bool QSimpleTextCodec::canEncode( QChar ch ) const
2399{
2400#ifdef Q_WS_QWS
2401 if ( this != reverseOwner )
2402#else
2403 if ( !reverseMap )
2404#endif
2405 ((QSimpleTextCodec *)this)->buildReverseMap();
2406
2407 unsigned short u = ch.unicode();
2408 unsigned char* rmp = reverseMap->data();
2409 int rmsize = (int) reverseMap->size();
2410 return u < 128 ? TRUE : (( u < rmsize ) ? (*(rmp+u) != 0) : FALSE );
2411}
2412
2413const char* QSimpleTextCodec::name() const
2414{
2415 return unicodevalues[forwardIndex].cs;
2416}
2417
2418const char* QSimpleTextCodec::mimeName() const
2419{
2420 return unicodevalues[forwardIndex].mime;
2421}
2422
2423
2424int QSimpleTextCodec::mibEnum() const
2425{
2426 return unicodevalues[forwardIndex].mib;
2427}
2428
2429int QSimpleTextCodec::heuristicNameMatch(const char* hint) const
2430{
2431 if ( qstricmp( hint, mimeName() ) == 0 )
2432 return 10000; // return a large value
2433 if ( hint[0]=='k' ) {
2434 QCString lhint = QCString(hint).lower();
2435 // Help people with messy fonts
2436 if ( lhint == "koi8-1" )
2437 return QTextCodec::heuristicNameMatch("koi8-r")-1;
2438 if ( lhint == "koi8-ru" )
2439 return QTextCodec::heuristicNameMatch("koi8-r")-1;
2440 } else if ( hint[0] == 't' && mibEnum() == 2259 /* iso8859-11 */ ) {
2441 // 8859-11 and tis620 are byte by byte equivalent
2442 int i = simpleHeuristicNameMatch("tis620-0", hint);
2443 if( !i )
2444 i = simpleHeuristicNameMatch("tis-620", hint);
2445 if( i ) return i;
2446 } else if ( mibEnum() == 82 /* ISO 8859-6 */ ) {
2447 int i = simpleHeuristicNameMatch("ISO 8859-6-I", hint);
2448 if ( i )
2449 return i;
2450 }
2451 return QTextCodec::heuristicNameMatch(hint);
2452}
2453
2454int QSimpleTextCodec::heuristicContentMatch(const char* chars, int len) const
2455{
2456 if ( len<1 || !chars )
2457 return -1;
2458 int i = 0;
2459 const uchar * c = (const unsigned char *)chars;
2460 int r = 0;
2461 while( i<len && c && *c ) {
2462 if ( *c >= 128 ) {
2463 if ( unicodevalues[forwardIndex].values[(*c)-128] == 0xfffd )
2464 return -1;
2465 }
2466 if ( (*c >= ' ' && *c < 127) ||
2467 *c == '\n' || *c == '\t' || *c == '\r' )
2468 r++;
2469 i++;
2470 c++;
2471 }
2472 if ( mibEnum()==4 )
2473 r+=1;
2474 return r;
2475}
2476
2477#endif
2478
2479class QLatin1Codec : public QTextCodec
2480{
2481public:
2482#if !defined(Q_NO_USING_KEYWORD)
2483 using QTextCodec::fromUnicode;
2484 using QTextCodec::toUnicode;
2485#endif
2486 QString toUnicode(const char* chars, int len) const;
2487 QCString fromUnicode(const QString& uc, int& lenInOut ) const;
2488 void fromUnicode( const QChar *in, unsigned short *out, int length ) const;
2489 unsigned short characterFromUnicode(const QString &str, int pos) const;
2490
2491 const char* name() const;
2492 const char* mimeName() const;
2493 int mibEnum() const;
2494
2495 int heuristicContentMatch(const char* chars, int len) const;
2496
2497private:
2498 int forwardIndex;
2499};
2500
2501
2502QString QLatin1Codec::toUnicode(const char* chars, int len) const
2503{
2504 if ( chars == 0 )
2505 return QString::null;
2506
2507 return QString::fromLatin1(chars, len);
2508}
2509
2510
2511QCString QLatin1Codec::fromUnicode(const QString& uc, int& len ) const
2512{
2513 if ( len <0 || len > (int)uc.length() )
2514 len = uc.length();
2515 QCString r( len+1 );
2516 char *d = r.data();
2517 int i = 0;
2518 const QChar *ch = uc.unicode();
2519 while ( i < len ) {
2520 d[i] = ch->row() ? '?' : ch->cell();
2521 i++;
2522 ch++;
2523 }
2524 r[len] = 0;
2525 return r;
2526}
2527
2528void QLatin1Codec::fromUnicode( const QChar *in, unsigned short *out, int length ) const
2529{
2530 while ( length-- ) {
2531 *out = in->row() ? 0 : in->cell();
2532 ++in;
2533 ++out;
2534 }
2535}
2536
2537unsigned short QLatin1Codec::characterFromUnicode(const QString &str, int pos) const
2538{
2539 const QChar *ch = str.unicode() + pos;
2540 if (ch->row())
2541 return 0;
2542 return (unsigned short) ch->cell();
2543}
2544
2545
2546const char* QLatin1Codec::name() const
2547{
2548 return "ISO 8859-1";
2549}
2550
2551const char* QLatin1Codec::mimeName() const
2552{
2553 return "ISO-8859-1";
2554}
2555
2556
2557int QLatin1Codec::mibEnum() const
2558{
2559 return 4;
2560}
2561
2562int QLatin1Codec::heuristicContentMatch(const char* chars, int len) const
2563{
2564 if ( len<1 || !chars )
2565 return -1;
2566 int i = 0;
2567 const uchar * c = (const unsigned char *)chars;
2568 int r = 0;
2569 while( i<len && c && *c ) {
2570 if ( *c >= 0x80 && *c < 0xa0 )
2571 return -1;
2572 if ( (*c >= ' ' && *c < 127) ||
2573 *c == '\n' || *c == '\t' || *c == '\r' )
2574 r++;
2575 i++;
2576 c++;
2577 }
2578 if ( this == (const QTextCodec *)codecForLocale() )
2579 r += 5;
2580 return r;
2581}
2582
2583class QLatin15Codec: public QLatin1Codec
2584{
2585public:
2586 QString toUnicode(const char* chars, int len) const;
2587#if !defined(Q_NO_USING_KEYWORD)
2588 using QTextCodec::fromUnicode;
2589#endif
2590 QCString fromUnicode(const QString& uc, int& lenInOut ) const;
2591 void fromUnicode( const QChar *in, unsigned short *out, int length ) const;
2592 unsigned short characterFromUnicode(const QString &str, int pos) const;
2593
2594 const char* name() const;
2595 const char* mimeName() const;
2596 int mibEnum() const;
2597
2598private:
2599 int forwardIndex;
2600};
2601
2602
2603QString QLatin15Codec::toUnicode(const char* chars, int len) const
2604{
2605 if ( chars == 0 )
2606 return QString::null;
2607
2608 QString str = QString::fromLatin1(chars, len);
2609 QChar *uc = (QChar *)str.unicode();
2610 while( len-- ) {
2611 switch( uc->unicode() ) {
2612 case 0xa4:
2613 *uc = 0x20ac;
2614 break;
2615 case 0xa6:
2616 *uc = 0x0160;
2617 break;
2618 case 0xa8:
2619 *uc = 0x0161;
2620 break;
2621 case 0xb4:
2622 *uc = 0x017d;
2623 break;
2624 case 0xb8:
2625 *uc = 0x017e;
2626 break;
2627 case 0xbc:
2628 *uc = 0x0152;
2629 break;
2630 case 0xbd:
2631 *uc = 0x0153;
2632 break;
2633 case 0xbe:
2634 *uc = 0x0178;
2635 break;
2636 default:
2637 break;
2638 }
2639 uc++;
2640 }
2641 return str;
2642}
2643
2644static inline unsigned char
2645latin15CharFromUnicode( unsigned short uc, bool replacement = TRUE )
2646{
2647 uchar c;
2648 if ( uc < 0x0100 ) {
2649 if ( uc > 0xa3 && uc < 0xbf ) {
2650 switch( uc ) {
2651 case 0xa4:
2652 case 0xa6:
2653 case 0xa8:
2654 case 0xb4:
2655 case 0xb8:
2656 case 0xbc:
2657 case 0xbd:
2658 case 0xbe:
2659 c = replacement ? '?' : 0;
2660 break;
2661 default:
2662 c = (unsigned char) uc;
2663 break;
2664 }
2665 } else {
2666 c = (unsigned char) uc;
2667 }
2668 } else {
2669 if ( uc == 0x20ac )
2670 c = 0xa4;
2671 else if ( (uc & 0xff00) == 0x0100 ) {
2672 switch( uc ) {
2673 case 0x0160:
2674 c = 0xa6;
2675 break;
2676 case 0x0161:
2677 c = 0xa8;
2678 break;
2679 case 0x017d:
2680 c = 0xb4;
2681 break;
2682 case 0x017e:
2683 c = 0xb8;
2684 break;
2685 case 0x0152:
2686 c = 0xbc;
2687 break;
2688 case 0x0153:
2689 c = 0xbd;
2690 break;
2691 case 0x0178:
2692 c = 0xbe;
2693 break;
2694 default:
2695 c = replacement ? '?' : 0;
2696 }
2697 } else {
2698 c = replacement ? '?' : 0;
2699 }
2700 }
2701 return c;
2702}
2703
2704
2705void QLatin15Codec::fromUnicode( const QChar *in, unsigned short *out, int length ) const
2706{
2707 while ( length-- ) {
2708 *out = latin15CharFromUnicode( in->unicode(), FALSE );
2709 ++in;
2710 ++out;
2711 }
2712}
2713
2714
2715QCString QLatin15Codec::fromUnicode(const QString& uc, int& len ) const
2716{
2717 if ( len <0 || len > (int)uc.length() )
2718 len = uc.length();
2719 QCString r( len+1 );
2720 char *d = r.data();
2721 int i = 0;
2722 const QChar *ch = uc.unicode();
2723 while ( i < len ) {
2724 d[i] = latin15CharFromUnicode( ch->unicode() );
2725 i++;
2726 ch++;
2727 }
2728 r[len] = 0;
2729 return r;
2730}
2731
2732unsigned short QLatin15Codec::characterFromUnicode(const QString &str, int pos) const
2733{
2734 return latin15CharFromUnicode( str.unicode()[pos].unicode(), FALSE );
2735}
2736
2737
2738const char* QLatin15Codec::name() const
2739{
2740 return "ISO 8859-15";
2741}
2742
2743const char* QLatin15Codec::mimeName() const
2744{
2745 return "ISO-8859-15";
2746}
2747
2748
2749int QLatin15Codec::mibEnum() const
2750{
2751 return 111;
2752}
2753
2754
2755/* the next two functions are implicitely thread safe,
2756 as they are only called by setup() which uses a mutex.
2757*/
2758static void setupLocaleMapper()
2759{
2760#if defined(Q_OS_WIN32) || defined(Q_OS_OS2)
2761 localeMapper = QTextCodec::codecForName( "System" );
2762#else
2763
2764#if defined (_XOPEN_UNIX) && !defined(Q_OS_QNX6) && !defined(Q_OS_OSF)
2765 char *charset = nl_langinfo (CODESET);
2766 if ( charset )
2767 localeMapper = QTextCodec::codecForName( charset );
2768#endif
2769
2770 if ( !localeMapper ) {
2771 // Very poorly defined and followed standards causes lots of code
2772 // to try to get all the cases...
2773
2774 // Try to determine locale codeset from locale name assigned to
2775 // LC_CTYPE category.
2776
2777 // First part is getting that locale name. First try setlocale() which
2778 // definitely knows it, but since we cannot fully trust it, get ready
2779 // to fall back to environment variables.
2780 char * ctype = qstrdup( setlocale( LC_CTYPE, 0 ) );
2781
2782 // Get the first nonempty value from $LC_ALL, $LC_CTYPE, and $LANG
2783 // environment variables.
2784 char * lang = qstrdup( getenv("LC_ALL") );
2785 if ( !lang || lang[0] == 0 || strcmp( lang, "C" ) == 0 ) {
2786 if ( lang ) delete [] lang;
2787 lang = qstrdup( getenv("LC_CTYPE") );
2788 }
2789 if ( !lang || lang[0] == 0 || strcmp( lang, "C" ) == 0 ) {
2790 if ( lang ) delete [] lang;
2791 lang = qstrdup( getenv("LANG") );
2792 }
2793
2794 // Now try these in order:
2795 // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
2796 // 2. CODESET from lang if it contains a .CODESET part
2797 // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
2798 // 4. locale (ditto)
2799 // 5. check for "@euro"
2800 // 6. guess locale from ctype unless ctype is "C"
2801 // 7. guess locale from lang
2802
2803 // 1. CODESET from ctype if it contains a .CODESET part (e.g. en_US.ISO8859-15)
2804 char * codeset = ctype ? strchr( ctype, '.' ) : 0;
2805 if ( codeset && *codeset == '.' )
2806 localeMapper = QTextCodec::codecForName( codeset + 1 );
2807
2808 // 2. CODESET from lang if it contains a .CODESET part
2809 codeset = lang ? strchr( lang, '.' ) : 0;
2810 if ( !localeMapper && codeset && *codeset == '.' )
2811 localeMapper = QTextCodec::codecForName( codeset + 1 );
2812
2813 // 3. ctype (maybe the locale is named "ISO-8859-1" or something)
2814 if ( !localeMapper && ctype && *ctype != 0 && strcmp (ctype, "C") != 0 )
2815 localeMapper = QTextCodec::codecForName( ctype );
2816
2817 // 4. locale (ditto)
2818 if ( !localeMapper && lang && *lang != 0 )
2819 localeMapper = QTextCodec::codecForName( lang );
2820
2821 // 5. "@euro"
2822 if ( ctype && strstr( ctype, "@euro" ) || lang && strstr( lang, "@euro" ) )
2823 localeMapper = QTextCodec::codecForName( "ISO 8859-15" );
2824
2825 // 6. guess locale from ctype unless ctype is "C"
2826 // 7. guess locale from lang
2827 char * try_by_name = ctype;
2828 if ( ctype && *ctype != 0 && strcmp (ctype, "C") != 0 )
2829 try_by_name = lang;
2830
2831 // Now do the guessing.
2832 if ( lang && *lang && !localeMapper && try_by_name && *try_by_name ) {
2833 if ( try_locale_list( iso8859_15locales, lang ) )
2834 localeMapper = QTextCodec::codecForName( "ISO 8859-15" );
2835 else if ( try_locale_list( iso8859_2locales, lang ) )
2836 localeMapper = QTextCodec::codecForName( "ISO 8859-2" );
2837 else if ( try_locale_list( iso8859_3locales, lang ) )
2838 localeMapper = QTextCodec::codecForName( "ISO 8859-3" );
2839 else if ( try_locale_list( iso8859_4locales, lang ) )
2840 localeMapper = QTextCodec::codecForName( "ISO 8859-4" );
2841 else if ( try_locale_list( iso8859_5locales, lang ) )
2842 localeMapper = QTextCodec::codecForName( "ISO 8859-5" );
2843 else if ( try_locale_list( iso8859_6locales, lang ) )
2844 localeMapper = QTextCodec::codecForName( "ISO 8859-6" );
2845 else if ( try_locale_list( iso8859_7locales, lang ) )
2846 localeMapper = QTextCodec::codecForName( "ISO 8859-7" );
2847 else if ( try_locale_list( iso8859_8locales, lang ) )
2848 localeMapper = QTextCodec::codecForName( "ISO 8859-8-I" );
2849 else if ( try_locale_list( iso8859_9locales, lang ) )
2850 localeMapper = QTextCodec::codecForName( "ISO 8859-9" );
2851 else if ( try_locale_list( iso8859_13locales, lang ) )
2852 localeMapper = QTextCodec::codecForName( "ISO 8859-13" );
2853 else if ( try_locale_list( tis_620locales, lang ) )
2854 localeMapper = QTextCodec::codecForName( "ISO 8859-11" );
2855 else if ( try_locale_list( koi8_ulocales, lang ) )
2856 localeMapper = QTextCodec::codecForName( "KOI8-U" );
2857 else if ( try_locale_list( cp_1251locales, lang ) )
2858 localeMapper = QTextCodec::codecForName( "CP 1251" );
2859 else if ( try_locale_list( pt_154locales, lang ) )
2860 localeMapper = QTextCodec::codecForName( "PT 154" );
2861 else if ( try_locale_list( probably_koi8_rlocales, lang ) )
2862 localeMapper = ru_RU_hack( lang );
2863 }
2864
2865 delete [] ctype;
2866 delete [] lang;
2867 }
2868 if ( localeMapper && localeMapper->mibEnum() == 11 )
2869 localeMapper = QTextCodec::codecForName( "ISO 8859-8-I" );
2870
2871 // If everything failed, we default to 8859-1
2872 // We could perhaps default to 8859-15.
2873 if ( !localeMapper )
2874 localeMapper = QTextCodec::codecForName( "ISO 8859-1" );
2875#endif
2876}
2877
2878
2879static void realSetup()
2880{
2881#if defined(QT_CHECK_STATE)
2882 if ( destroying_is_ok )
2883 qWarning( "QTextCodec: creating new codec during codec cleanup!" );
2884#endif
2885 all = new QValueList<QTextCodec*>;
2886
2887 (void)new QLatin1Codec;
2888 (void)new QLatin15Codec;
2889 (void)new QUtf8Codec;
2890 (void)new QUtf16Codec;
2891
2892#ifndef QT_NO_CODECS
2893 int i = 0;
2894 do {
2895 (void)new QSimpleTextCodec( i );
2896 } while( unicodevalues[i++].mib != LAST_MIB );
2897
2898 (void)new QTsciiCodec;
2899
2900 for (i = 0; i < 9; ++i) {
2901 (void)new QIsciiCodec(i);
2902 }
2903#endif // QT_NO_CODECS
2904#ifndef QT_NO_CODEC_HEBREW
2905 (void)new QHebrewCodec;
2906#endif
2907#ifndef QT_NO_BIG_CODECS
2908 (void)new QBig5Codec;
2909 (void)new QBig5hkscsCodec;
2910 (void)new QEucJpCodec;
2911 (void)new QEucKrCodec;
2912 (void)new QGb2312Codec;
2913 (void)new QGbkCodec;
2914 (void)new QGb18030Codec;
2915 (void)new QJisCodec;
2916 (void)new QSjisCodec;
2917#endif // QT_NO_BIG_CODECS
2918
2919#ifdef Q_OS_WIN32
2920 (void) new QWindowsLocalCodec;
2921#endif // Q_OS_WIN32
2922
2923#ifdef Q_OS_OS2
2924 (void) new QOS2LocalCodec;
2925#endif // Q_OS_OS2
2926
2927 if ( !localeMapper )
2928 setupLocaleMapper();
2929}
2930
2931void QTextCodec::fromUnicodeInternal( const QChar *in, unsigned short *out, int length )
2932{
2933 switch( mibEnum() ) {
2934#ifndef QT_NO_CODECS
2935 case 2084:
2936 case 2088:
2937 case 5:
2938 case 6:
2939 case 7:
2940 case 8:
2941 case 82:
2942 case 10:
2943 case 85:
2944 case 12:
2945 case 13:
2946 case 109:
2947 case 110:
2948 case 2004:
2949 case 2009:
2950 case 2086:
2951 case 2250:
2952 case 2251:
2953 case 2252:
2954 case 2253:
2955 case 2254:
2956 case 2255:
2957 case 2256:
2958 case 2257:
2959 case 2258:
2960 case 2259:
2961 ((QSimpleTextCodec *)this)->fromUnicode( in, out, length );
2962 break;
2963
2964#if !defined(QT_NO_BIG_CODECS) && defined(Q_WS_X11)
2965 // the QFont*Codecs are only used on X11
2966
2967 case 15:
2968 ((QFontJis0201Codec *) this)->fromUnicode( in, out, length );
2969 break;
2970
2971 case 63:
2972 ((QFontJis0208Codec *) this)->fromUnicode( in, out, length );
2973 break;
2974
2975 case 36:
2976 ((QFontKsc5601Codec *) this)->fromUnicode( in, out, length );
2977 break;
2978
2979 case 57:
2980 ((QFontGb2312Codec *) this)->fromUnicode( in, out, length );
2981 break;
2982
2983 case -113:
2984 ((QFontGbkCodec *) this)->fromUnicode( in, out, length );
2985 break;
2986
2987 case -114:
2988 ((QFontGb18030_0Codec *) this)->fromUnicode( in, out, length );
2989 break;
2990
2991 case -2026:
2992 ((QFontBig5Codec *) this)->fromUnicode( in, out, length );
2993 break;
2994
2995 case -2101:
2996 ((QFontBig5hkscsCodec *) this)->fromUnicode( in, out, length );
2997 break;
2998
2999 case -4242:
3000 ((QFontLaoCodec *) this)->fromUnicode( in, out, length );
3001 break;
3002#endif
3003#endif // QT_NO_CODECS
3004
3005 case 4:
3006 ((QLatin1Codec *) this)->fromUnicode( in, out, length );
3007 break;
3008
3009 case 111:
3010 ((QLatin15Codec *) this)->fromUnicode( in, out, length );
3011 break;
3012
3013 default:
3014 {
3015 QConstString string( in, length );
3016 QString str = string.string();
3017 for ( int i = 0; i < length; i++ )
3018 out[i] = characterFromUnicode( str, i );
3019 }
3020 }
3021}
3022
3023
3024/*!
3025 \fn QTextCodec* QTextCodec::codecForTr()
3026
3027 Returns the codec used by QObject::tr() on its argument. If this
3028 function returns 0 (the default), tr() assumes Latin-1.
3029
3030 \sa setCodecForTr()
3031*/
3032
3033/*!
3034 \fn void QTextCodec::setCodecForTr(QTextCodec *c)
3035 \nonreentrant
3036
3037 Sets the codec used by QObject::tr() on its argument to \a c. If
3038 \a c is 0 (the default), tr() assumes Latin-1.
3039
3040 If the literal quoted text in the program is not in the Latin-1
3041 encoding, this function can be used to set the appropriate
3042 encoding. For example, software developed by Korean programmers
3043 might use eucKR for all the text in the program, in which case the
3044 main() function might look like this:
3045
3046 \code
3047 int main(int argc, char** argv)
3048 {
3049 QApplication app(argc, argv);
3050 ... install any additional codecs ...
3051 QTextCodec::setCodecForTr( QTextCodec::codecForName("eucKR") );
3052 ...
3053 }
3054 \endcode
3055
3056 Note that this is not the way to select the encoding that the \e
3057 user has chosen. For example, to convert an application containing
3058 literal English strings to Korean, all that is needed is for the
3059 English strings to be passed through tr() and for translation
3060 files to be loaded. For details of internationalization, see the
3061 \link i18n.html Qt internationalization documentation\endlink.
3062
3063 \sa codecForTr(), setCodecForTr(), setCodecForCStrings()
3064*/
3065
3066
3067/*!
3068 \fn QTextCodec* QTextCodec::codecForCStrings()
3069
3070 Returns the codec used by QString to convert to and from const
3071 char* and QCStrings. If this function returns 0 (the default),
3072 QString assumes Latin-1.
3073
3074 \sa setCodecForCStrings()
3075*/
3076
3077/*!
3078 \fn void QTextCodec::setCodecForCStrings(QTextCodec *c)
3079 \nonreentrant
3080
3081 Sets the codec used by QString to convert to and from const char*
3082 and QCStrings. If \a c is 0 (the default), QString assumes Latin-1.
3083
3084 \warning Some codecs do not preserve the characters in the ascii
3085 range (0x00 to 0x7f). For example, the Japanese Shift-JIS
3086 encoding maps the backslash character (0x5a) to the Yen character.
3087 This leads to unexpected results when using the backslash
3088 character to escape characters in strings used in e.g. regular
3089 expressions. Use QString::fromLatin1() to preserve characters in
3090 the ascii range when needed.
3091
3092 \sa codecForCStrings(), setCodecForTr(), setCodecForCStrings()
3093*/
3094
3095
3096QTextCodec *QTextCodec::cftr = 0;
3097QTextCodec *QTextCodec::cfcs = 0;
3098
3099
3100#endif // QT_NO_TEXTCODEC
Note: See TracBrowser for help on using the repository browser.