source: trunk/src/codecs/qutfcodec.cpp@ 36

Last change on this file since 36 was 2, checked in by dmik, 20 years ago

Imported xplatform parts of the official release 3.3.1 from Trolltech

  • Property svn:keywords set to Id
File size: 7.7 KB
Line 
1/****************************************************************************
2** $Id: qutfcodec.cpp 2 2005-11-16 15:49:26Z dmik $
3**
4** Implementation of QUtf{8,16}Codec class
5**
6** Created : 981015
7**
8** Copyright (C) 1998-2002 Trolltech AS. All rights reserved.
9**
10** This file is part of the tools module of the Qt GUI Toolkit.
11**
12** This file may be distributed under the terms of the Q Public License
13** as defined by Trolltech AS of Norway and appearing in the file
14** LICENSE.QPL included in the packaging of this file.
15**
16** This file may be distributed and/or modified under the terms of the
17** GNU General Public License version 2 as published by the Free Software
18** Foundation and appearing in the file LICENSE.GPL included in the
19** packaging of this file.
20**
21** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition
22** licenses may use this file in accordance with the Qt Commercial License
23** Agreement provided with the Software.
24**
25** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
26** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
27**
28** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
29** information about Qt Commercial License Agreements.
30** See http://www.trolltech.com/qpl/ for QPL licensing information.
31** See http://www.trolltech.com/gpl/ for GPL licensing information.
32**
33** Contact info@trolltech.com if any conditions of this licensing are
34** not clear to you.
35**
36**********************************************************************/
37
38#include "qutfcodec.h"
39
40#ifndef QT_NO_TEXTCODEC
41
42int QUtf8Codec::mibEnum() const
43{
44 return 106;
45}
46
47QCString QUtf8Codec::fromUnicode(const QString& uc, int& lenInOut) const
48{
49 int l = uc.length();
50 if (lenInOut > 0)
51 l = QMIN(l, lenInOut);
52 int rlen = l*3+1;
53 QCString rstr(rlen);
54 uchar* cursor = (uchar*)rstr.data();
55 const QChar *ch = uc.unicode();
56 for (int i=0; i < l; i++) {
57 uint u = ch->unicode();
58 if ( u < 0x80 ) {
59 *cursor++ = (uchar)u;
60 } else {
61 if ( u < 0x0800 ) {
62 *cursor++ = 0xc0 | ((uchar) (u >> 6));
63 } else {
64 if (u >= 0xd800 && u < 0xdc00 && i < l-1) {
65 unsigned short low = ch[1].unicode();
66 if (low >= 0xdc00 && low < 0xe000) {
67 ++ch;
68 ++i;
69 u = (u - 0xd800)*0x400 + (low - 0xdc00) + 0x10000;
70 }
71 }
72 if (u > 0xffff) {
73 // see QString::fromUtf8() and QString::utf8() for explanations
74 if (u > 0x10fe00 && u < 0x10ff00) {
75 *cursor++ = (u - 0x10fe00);
76 ++ch;
77 continue;
78 } else {
79 *cursor++ = 0xf0 | ((uchar) (u >> 18));
80 *cursor++ = 0x80 | ( ((uchar) (u >> 12)) & 0x3f);
81 }
82 } else {
83 *cursor++ = 0xe0 | ((uchar) (u >> 12));
84 }
85 *cursor++ = 0x80 | ( ((uchar) (u >> 6)) & 0x3f);
86 }
87 *cursor++ = 0x80 | ((uchar) (u&0x3f));
88 }
89 ++ch;
90 }
91 *cursor = 0;
92 lenInOut = cursor - (uchar*)rstr.data();
93 ((QByteArray&)rstr).resize(lenInOut+1);
94 return rstr;
95}
96
97QString QUtf8Codec::toUnicode(const char* chars, int len) const
98{
99 if (len > 3 && (uchar)chars[0] == 0xef && (uchar)chars[1] == 0xbb && (uchar)chars[2] == 0xbf) {
100 // starts with a byte order mark
101 chars += 3;
102 len -= 3;
103 }
104 return QString::fromUtf8( chars, len );
105}
106
107
108const char* QUtf8Codec::name() const
109{
110 return "UTF-8";
111}
112
113int QUtf8Codec::heuristicContentMatch(const char* chars, int len) const
114{
115 int score = 0;
116 for (int i=0; i<len; i++) {
117 uchar ch = chars[i];
118 // No nulls allowed.
119 if ( !ch )
120 return -1;
121 if ( ch < 128 ) {
122 // Inconclusive
123 score++;
124 } else if ( (ch&0xe0) == 0xc0 ) {
125 if ( i < len-1 ) {
126 uchar c2 = chars[++i];
127 if ( (c2&0xc0) != 0x80 )
128 return -1;
129 score+=3;
130 }
131 } else if ( (ch&0xf0) == 0xe0 ) {
132 if ( i < len-1 ) {
133 uchar c2 = chars[++i];
134 if ( (c2&0xc0) != 0x80 ) {
135 return -1;
136#if 0
137 if ( i < len-1 ) {
138 uchar c3 = chars[++i];
139 if ( (c3&0xc0) != 0x80 )
140 return -1;
141 score+=3;
142 }
143#endif
144 }
145 score+=2;
146 }
147 }
148 }
149 return score;
150}
151
152
153
154
155class QUtf8Decoder : public QTextDecoder {
156 uint uc;
157 int need;
158 bool headerDone;
159public:
160 QUtf8Decoder() : need(0), headerDone(FALSE)
161 {
162 }
163
164 QString toUnicode(const char* chars, int len)
165 {
166 QString result;
167 result.setLength( len ); // worst case
168 QChar *qch = (QChar *)result.unicode();
169 uchar ch;
170 for (int i=0; i<len; i++) {
171 ch = *chars++;
172 if (need) {
173 if ( (ch&0xc0) == 0x80 ) {
174 uc = (uc << 6) | (ch & 0x3f);
175 need--;
176 if ( !need ) {
177 if (uc > 0xffff) {
178 // surrogate pair
179 uc -= 0x10000;
180 unsigned short high = uc/0x400 + 0xd800;
181 unsigned short low = uc%0x400 + 0xdc00;
182 *qch++ = QChar(high);
183 *qch++ = QChar(low);
184 headerDone = TRUE;
185 } else {
186 if (headerDone || QChar(uc) != QChar::byteOrderMark)
187 *qch++ = uc;
188 headerDone = TRUE;
189 }
190 }
191 } else {
192 // error
193 *qch++ = QChar::replacement;
194 need = 0;
195 }
196 } else {
197 if ( ch < 128 ) {
198 *qch++ = ch;
199 headerDone = TRUE;
200 } else if ((ch & 0xe0) == 0xc0) {
201 uc = ch & 0x1f;
202 need = 1;
203 } else if ((ch & 0xf0) == 0xe0) {
204 uc = ch & 0x0f;
205 need = 2;
206 } else if ((ch&0xf8) == 0xf0) {
207 uc = ch & 0x07;
208 need = 3;
209 }
210 }
211 }
212 result.truncate( qch - result.unicode() );
213 return result;
214 }
215};
216
217QTextDecoder* QUtf8Codec::makeDecoder() const
218{
219 return new QUtf8Decoder;
220}
221
222
223
224
225
226
227int QUtf16Codec::mibEnum() const
228{
229 return 1000;
230}
231
232const char* QUtf16Codec::name() const
233{
234 return "ISO-10646-UCS-2";
235}
236
237int QUtf16Codec::heuristicContentMatch(const char* chars, int len) const
238{
239 uchar* uchars = (uchar*)chars;
240 if ( len >= 2 && (uchars[0] == 0xff && uchars[1] == 0xfe ||
241 uchars[1] == 0xff && uchars[0] == 0xfe) )
242 return len;
243 else
244 return 0;
245}
246
247
248
249
250class QUtf16Encoder : public QTextEncoder {
251 bool headerdone;
252public:
253 QUtf16Encoder() : headerdone(FALSE)
254 {
255 }
256
257 QCString fromUnicode(const QString& uc, int& lenInOut)
258 {
259 if ( headerdone ) {
260 lenInOut = uc.length()*sizeof(QChar);
261 QCString d(lenInOut);
262 memcpy(d.data(),uc.unicode(),lenInOut);
263 return d;
264 } else {
265 headerdone = TRUE;
266 lenInOut = (1+uc.length())*sizeof(QChar);
267 QCString d(lenInOut);
268 memcpy(d.data(),&QChar::byteOrderMark,sizeof(QChar));
269 memcpy(d.data()+sizeof(QChar),uc.unicode(),uc.length()*sizeof(QChar));
270 return d;
271 }
272 }
273};
274
275class QUtf16Decoder : public QTextDecoder {
276 uchar buf;
277 bool half;
278 bool swap;
279 bool headerdone;
280
281public:
282 QUtf16Decoder() : half(FALSE), swap(FALSE), headerdone(FALSE)
283 {
284 }
285
286 QString toUnicode(const char* chars, int len)
287 {
288 QString result;
289 result.setLength( len ); // worst case
290 QChar *qch = (QChar *)result.unicode();
291 QChar ch;
292 while ( len-- ) {
293 if ( half ) {
294 if ( swap ) {
295 ch.setRow( *chars++ );
296 ch.setCell( buf );
297 } else {
298 ch.setRow( buf );
299 ch.setCell( *chars++ );
300 }
301 if ( !headerdone ) {
302 if ( ch == QChar::byteOrderSwapped ) {
303 swap = !swap;
304 } else if ( ch == QChar::byteOrderMark ) {
305 // Ignore ZWNBSP
306 } else {
307 *qch++ = ch;
308 }
309 headerdone = TRUE;
310 } else
311 *qch++ = ch;
312 half = FALSE;
313 } else {
314 buf = *chars++;
315 half = TRUE;
316 }
317 }
318 result.truncate( qch - result.unicode() );
319 return result;
320 }
321};
322
323QTextDecoder* QUtf16Codec::makeDecoder() const
324{
325 return new QUtf16Decoder;
326}
327
328QTextEncoder* QUtf16Codec::makeEncoder() const
329{
330 return new QUtf16Encoder;
331}
332
333#endif //QT_NO_TEXTCODEC
Note: See TracBrowser for help on using the repository browser.