| 1 | /****************************************************************************
|
|---|
| 2 | ** $Id: qutfcodec.cpp 2 2005-11-16 15:49:26Z dmik $
|
|---|
| 3 | **
|
|---|
| 4 | ** Implementation of QUtf{8,16}Codec class
|
|---|
| 5 | **
|
|---|
| 6 | ** Created : 981015
|
|---|
| 7 | **
|
|---|
| 8 | ** Copyright (C) 1998-2002 Trolltech AS. All rights reserved.
|
|---|
| 9 | **
|
|---|
| 10 | ** This file is part of the tools module of the Qt GUI Toolkit.
|
|---|
| 11 | **
|
|---|
| 12 | ** This file may be distributed under the terms of the Q Public License
|
|---|
| 13 | ** as defined by Trolltech AS of Norway and appearing in the file
|
|---|
| 14 | ** LICENSE.QPL included in the packaging of this file.
|
|---|
| 15 | **
|
|---|
| 16 | ** This file may be distributed and/or modified under the terms of the
|
|---|
| 17 | ** GNU General Public License version 2 as published by the Free Software
|
|---|
| 18 | ** Foundation and appearing in the file LICENSE.GPL included in the
|
|---|
| 19 | ** packaging of this file.
|
|---|
| 20 | **
|
|---|
| 21 | ** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition
|
|---|
| 22 | ** licenses may use this file in accordance with the Qt Commercial License
|
|---|
| 23 | ** Agreement provided with the Software.
|
|---|
| 24 | **
|
|---|
| 25 | ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
|
|---|
| 26 | ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|---|
| 27 | **
|
|---|
| 28 | ** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
|
|---|
| 29 | ** information about Qt Commercial License Agreements.
|
|---|
| 30 | ** See http://www.trolltech.com/qpl/ for QPL licensing information.
|
|---|
| 31 | ** See http://www.trolltech.com/gpl/ for GPL licensing information.
|
|---|
| 32 | **
|
|---|
| 33 | ** Contact info@trolltech.com if any conditions of this licensing are
|
|---|
| 34 | ** not clear to you.
|
|---|
| 35 | **
|
|---|
| 36 | **********************************************************************/
|
|---|
| 37 |
|
|---|
| 38 | #include "qutfcodec.h"
|
|---|
| 39 |
|
|---|
| 40 | #ifndef QT_NO_TEXTCODEC
|
|---|
| 41 |
|
|---|
| 42 | int QUtf8Codec::mibEnum() const
|
|---|
| 43 | {
|
|---|
| 44 | return 106;
|
|---|
| 45 | }
|
|---|
| 46 |
|
|---|
| 47 | QCString QUtf8Codec::fromUnicode(const QString& uc, int& lenInOut) const
|
|---|
| 48 | {
|
|---|
| 49 | int l = uc.length();
|
|---|
| 50 | if (lenInOut > 0)
|
|---|
| 51 | l = QMIN(l, lenInOut);
|
|---|
| 52 | int rlen = l*3+1;
|
|---|
| 53 | QCString rstr(rlen);
|
|---|
| 54 | uchar* cursor = (uchar*)rstr.data();
|
|---|
| 55 | const QChar *ch = uc.unicode();
|
|---|
| 56 | for (int i=0; i < l; i++) {
|
|---|
| 57 | uint u = ch->unicode();
|
|---|
| 58 | if ( u < 0x80 ) {
|
|---|
| 59 | *cursor++ = (uchar)u;
|
|---|
| 60 | } else {
|
|---|
| 61 | if ( u < 0x0800 ) {
|
|---|
| 62 | *cursor++ = 0xc0 | ((uchar) (u >> 6));
|
|---|
| 63 | } else {
|
|---|
| 64 | if (u >= 0xd800 && u < 0xdc00 && i < l-1) {
|
|---|
| 65 | unsigned short low = ch[1].unicode();
|
|---|
| 66 | if (low >= 0xdc00 && low < 0xe000) {
|
|---|
| 67 | ++ch;
|
|---|
| 68 | ++i;
|
|---|
| 69 | u = (u - 0xd800)*0x400 + (low - 0xdc00) + 0x10000;
|
|---|
| 70 | }
|
|---|
| 71 | }
|
|---|
| 72 | if (u > 0xffff) {
|
|---|
| 73 | // see QString::fromUtf8() and QString::utf8() for explanations
|
|---|
| 74 | if (u > 0x10fe00 && u < 0x10ff00) {
|
|---|
| 75 | *cursor++ = (u - 0x10fe00);
|
|---|
| 76 | ++ch;
|
|---|
| 77 | continue;
|
|---|
| 78 | } else {
|
|---|
| 79 | *cursor++ = 0xf0 | ((uchar) (u >> 18));
|
|---|
| 80 | *cursor++ = 0x80 | ( ((uchar) (u >> 12)) & 0x3f);
|
|---|
| 81 | }
|
|---|
| 82 | } else {
|
|---|
| 83 | *cursor++ = 0xe0 | ((uchar) (u >> 12));
|
|---|
| 84 | }
|
|---|
| 85 | *cursor++ = 0x80 | ( ((uchar) (u >> 6)) & 0x3f);
|
|---|
| 86 | }
|
|---|
| 87 | *cursor++ = 0x80 | ((uchar) (u&0x3f));
|
|---|
| 88 | }
|
|---|
| 89 | ++ch;
|
|---|
| 90 | }
|
|---|
| 91 | *cursor = 0;
|
|---|
| 92 | lenInOut = cursor - (uchar*)rstr.data();
|
|---|
| 93 | ((QByteArray&)rstr).resize(lenInOut+1);
|
|---|
| 94 | return rstr;
|
|---|
| 95 | }
|
|---|
| 96 |
|
|---|
| 97 | QString QUtf8Codec::toUnicode(const char* chars, int len) const
|
|---|
| 98 | {
|
|---|
| 99 | if (len > 3 && (uchar)chars[0] == 0xef && (uchar)chars[1] == 0xbb && (uchar)chars[2] == 0xbf) {
|
|---|
| 100 | // starts with a byte order mark
|
|---|
| 101 | chars += 3;
|
|---|
| 102 | len -= 3;
|
|---|
| 103 | }
|
|---|
| 104 | return QString::fromUtf8( chars, len );
|
|---|
| 105 | }
|
|---|
| 106 |
|
|---|
| 107 |
|
|---|
| 108 | const char* QUtf8Codec::name() const
|
|---|
| 109 | {
|
|---|
| 110 | return "UTF-8";
|
|---|
| 111 | }
|
|---|
| 112 |
|
|---|
| 113 | int QUtf8Codec::heuristicContentMatch(const char* chars, int len) const
|
|---|
| 114 | {
|
|---|
| 115 | int score = 0;
|
|---|
| 116 | for (int i=0; i<len; i++) {
|
|---|
| 117 | uchar ch = chars[i];
|
|---|
| 118 | // No nulls allowed.
|
|---|
| 119 | if ( !ch )
|
|---|
| 120 | return -1;
|
|---|
| 121 | if ( ch < 128 ) {
|
|---|
| 122 | // Inconclusive
|
|---|
| 123 | score++;
|
|---|
| 124 | } else if ( (ch&0xe0) == 0xc0 ) {
|
|---|
| 125 | if ( i < len-1 ) {
|
|---|
| 126 | uchar c2 = chars[++i];
|
|---|
| 127 | if ( (c2&0xc0) != 0x80 )
|
|---|
| 128 | return -1;
|
|---|
| 129 | score+=3;
|
|---|
| 130 | }
|
|---|
| 131 | } else if ( (ch&0xf0) == 0xe0 ) {
|
|---|
| 132 | if ( i < len-1 ) {
|
|---|
| 133 | uchar c2 = chars[++i];
|
|---|
| 134 | if ( (c2&0xc0) != 0x80 ) {
|
|---|
| 135 | return -1;
|
|---|
| 136 | #if 0
|
|---|
| 137 | if ( i < len-1 ) {
|
|---|
| 138 | uchar c3 = chars[++i];
|
|---|
| 139 | if ( (c3&0xc0) != 0x80 )
|
|---|
| 140 | return -1;
|
|---|
| 141 | score+=3;
|
|---|
| 142 | }
|
|---|
| 143 | #endif
|
|---|
| 144 | }
|
|---|
| 145 | score+=2;
|
|---|
| 146 | }
|
|---|
| 147 | }
|
|---|
| 148 | }
|
|---|
| 149 | return score;
|
|---|
| 150 | }
|
|---|
| 151 |
|
|---|
| 152 |
|
|---|
| 153 |
|
|---|
| 154 |
|
|---|
| 155 | class QUtf8Decoder : public QTextDecoder {
|
|---|
| 156 | uint uc;
|
|---|
| 157 | int need;
|
|---|
| 158 | bool headerDone;
|
|---|
| 159 | public:
|
|---|
| 160 | QUtf8Decoder() : need(0), headerDone(FALSE)
|
|---|
| 161 | {
|
|---|
| 162 | }
|
|---|
| 163 |
|
|---|
| 164 | QString toUnicode(const char* chars, int len)
|
|---|
| 165 | {
|
|---|
| 166 | QString result;
|
|---|
| 167 | result.setLength( len ); // worst case
|
|---|
| 168 | QChar *qch = (QChar *)result.unicode();
|
|---|
| 169 | uchar ch;
|
|---|
| 170 | for (int i=0; i<len; i++) {
|
|---|
| 171 | ch = *chars++;
|
|---|
| 172 | if (need) {
|
|---|
| 173 | if ( (ch&0xc0) == 0x80 ) {
|
|---|
| 174 | uc = (uc << 6) | (ch & 0x3f);
|
|---|
| 175 | need--;
|
|---|
| 176 | if ( !need ) {
|
|---|
| 177 | if (uc > 0xffff) {
|
|---|
| 178 | // surrogate pair
|
|---|
| 179 | uc -= 0x10000;
|
|---|
| 180 | unsigned short high = uc/0x400 + 0xd800;
|
|---|
| 181 | unsigned short low = uc%0x400 + 0xdc00;
|
|---|
| 182 | *qch++ = QChar(high);
|
|---|
| 183 | *qch++ = QChar(low);
|
|---|
| 184 | headerDone = TRUE;
|
|---|
| 185 | } else {
|
|---|
| 186 | if (headerDone || QChar(uc) != QChar::byteOrderMark)
|
|---|
| 187 | *qch++ = uc;
|
|---|
| 188 | headerDone = TRUE;
|
|---|
| 189 | }
|
|---|
| 190 | }
|
|---|
| 191 | } else {
|
|---|
| 192 | // error
|
|---|
| 193 | *qch++ = QChar::replacement;
|
|---|
| 194 | need = 0;
|
|---|
| 195 | }
|
|---|
| 196 | } else {
|
|---|
| 197 | if ( ch < 128 ) {
|
|---|
| 198 | *qch++ = ch;
|
|---|
| 199 | headerDone = TRUE;
|
|---|
| 200 | } else if ((ch & 0xe0) == 0xc0) {
|
|---|
| 201 | uc = ch & 0x1f;
|
|---|
| 202 | need = 1;
|
|---|
| 203 | } else if ((ch & 0xf0) == 0xe0) {
|
|---|
| 204 | uc = ch & 0x0f;
|
|---|
| 205 | need = 2;
|
|---|
| 206 | } else if ((ch&0xf8) == 0xf0) {
|
|---|
| 207 | uc = ch & 0x07;
|
|---|
| 208 | need = 3;
|
|---|
| 209 | }
|
|---|
| 210 | }
|
|---|
| 211 | }
|
|---|
| 212 | result.truncate( qch - result.unicode() );
|
|---|
| 213 | return result;
|
|---|
| 214 | }
|
|---|
| 215 | };
|
|---|
| 216 |
|
|---|
| 217 | QTextDecoder* QUtf8Codec::makeDecoder() const
|
|---|
| 218 | {
|
|---|
| 219 | return new QUtf8Decoder;
|
|---|
| 220 | }
|
|---|
| 221 |
|
|---|
| 222 |
|
|---|
| 223 |
|
|---|
| 224 |
|
|---|
| 225 |
|
|---|
| 226 |
|
|---|
| 227 | int QUtf16Codec::mibEnum() const
|
|---|
| 228 | {
|
|---|
| 229 | return 1000;
|
|---|
| 230 | }
|
|---|
| 231 |
|
|---|
| 232 | const char* QUtf16Codec::name() const
|
|---|
| 233 | {
|
|---|
| 234 | return "ISO-10646-UCS-2";
|
|---|
| 235 | }
|
|---|
| 236 |
|
|---|
| 237 | int QUtf16Codec::heuristicContentMatch(const char* chars, int len) const
|
|---|
| 238 | {
|
|---|
| 239 | uchar* uchars = (uchar*)chars;
|
|---|
| 240 | if ( len >= 2 && (uchars[0] == 0xff && uchars[1] == 0xfe ||
|
|---|
| 241 | uchars[1] == 0xff && uchars[0] == 0xfe) )
|
|---|
| 242 | return len;
|
|---|
| 243 | else
|
|---|
| 244 | return 0;
|
|---|
| 245 | }
|
|---|
| 246 |
|
|---|
| 247 |
|
|---|
| 248 |
|
|---|
| 249 |
|
|---|
| 250 | class QUtf16Encoder : public QTextEncoder {
|
|---|
| 251 | bool headerdone;
|
|---|
| 252 | public:
|
|---|
| 253 | QUtf16Encoder() : headerdone(FALSE)
|
|---|
| 254 | {
|
|---|
| 255 | }
|
|---|
| 256 |
|
|---|
| 257 | QCString fromUnicode(const QString& uc, int& lenInOut)
|
|---|
| 258 | {
|
|---|
| 259 | if ( headerdone ) {
|
|---|
| 260 | lenInOut = uc.length()*sizeof(QChar);
|
|---|
| 261 | QCString d(lenInOut);
|
|---|
| 262 | memcpy(d.data(),uc.unicode(),lenInOut);
|
|---|
| 263 | return d;
|
|---|
| 264 | } else {
|
|---|
| 265 | headerdone = TRUE;
|
|---|
| 266 | lenInOut = (1+uc.length())*sizeof(QChar);
|
|---|
| 267 | QCString d(lenInOut);
|
|---|
| 268 | memcpy(d.data(),&QChar::byteOrderMark,sizeof(QChar));
|
|---|
| 269 | memcpy(d.data()+sizeof(QChar),uc.unicode(),uc.length()*sizeof(QChar));
|
|---|
| 270 | return d;
|
|---|
| 271 | }
|
|---|
| 272 | }
|
|---|
| 273 | };
|
|---|
| 274 |
|
|---|
| 275 | class QUtf16Decoder : public QTextDecoder {
|
|---|
| 276 | uchar buf;
|
|---|
| 277 | bool half;
|
|---|
| 278 | bool swap;
|
|---|
| 279 | bool headerdone;
|
|---|
| 280 |
|
|---|
| 281 | public:
|
|---|
| 282 | QUtf16Decoder() : half(FALSE), swap(FALSE), headerdone(FALSE)
|
|---|
| 283 | {
|
|---|
| 284 | }
|
|---|
| 285 |
|
|---|
| 286 | QString toUnicode(const char* chars, int len)
|
|---|
| 287 | {
|
|---|
| 288 | QString result;
|
|---|
| 289 | result.setLength( len ); // worst case
|
|---|
| 290 | QChar *qch = (QChar *)result.unicode();
|
|---|
| 291 | QChar ch;
|
|---|
| 292 | while ( len-- ) {
|
|---|
| 293 | if ( half ) {
|
|---|
| 294 | if ( swap ) {
|
|---|
| 295 | ch.setRow( *chars++ );
|
|---|
| 296 | ch.setCell( buf );
|
|---|
| 297 | } else {
|
|---|
| 298 | ch.setRow( buf );
|
|---|
| 299 | ch.setCell( *chars++ );
|
|---|
| 300 | }
|
|---|
| 301 | if ( !headerdone ) {
|
|---|
| 302 | if ( ch == QChar::byteOrderSwapped ) {
|
|---|
| 303 | swap = !swap;
|
|---|
| 304 | } else if ( ch == QChar::byteOrderMark ) {
|
|---|
| 305 | // Ignore ZWNBSP
|
|---|
| 306 | } else {
|
|---|
| 307 | *qch++ = ch;
|
|---|
| 308 | }
|
|---|
| 309 | headerdone = TRUE;
|
|---|
| 310 | } else
|
|---|
| 311 | *qch++ = ch;
|
|---|
| 312 | half = FALSE;
|
|---|
| 313 | } else {
|
|---|
| 314 | buf = *chars++;
|
|---|
| 315 | half = TRUE;
|
|---|
| 316 | }
|
|---|
| 317 | }
|
|---|
| 318 | result.truncate( qch - result.unicode() );
|
|---|
| 319 | return result;
|
|---|
| 320 | }
|
|---|
| 321 | };
|
|---|
| 322 |
|
|---|
| 323 | QTextDecoder* QUtf16Codec::makeDecoder() const
|
|---|
| 324 | {
|
|---|
| 325 | return new QUtf16Decoder;
|
|---|
| 326 | }
|
|---|
| 327 |
|
|---|
| 328 | QTextEncoder* QUtf16Codec::makeEncoder() const
|
|---|
| 329 | {
|
|---|
| 330 | return new QUtf16Encoder;
|
|---|
| 331 | }
|
|---|
| 332 |
|
|---|
| 333 | #endif //QT_NO_TEXTCODEC
|
|---|