| 1 | /**************************************************************************** | 
|---|
| 2 | ** $Id: qutfcodec.cpp 2 2005-11-16 15:49:26Z dmik $ | 
|---|
| 3 | ** | 
|---|
| 4 | ** Implementation of QUtf{8,16}Codec class | 
|---|
| 5 | ** | 
|---|
| 6 | ** Created : 981015 | 
|---|
| 7 | ** | 
|---|
| 8 | ** Copyright (C) 1998-2002 Trolltech AS.  All rights reserved. | 
|---|
| 9 | ** | 
|---|
| 10 | ** This file is part of the tools module of the Qt GUI Toolkit. | 
|---|
| 11 | ** | 
|---|
| 12 | ** This file may be distributed under the terms of the Q Public License | 
|---|
| 13 | ** as defined by Trolltech AS of Norway and appearing in the file | 
|---|
| 14 | ** LICENSE.QPL included in the packaging of this file. | 
|---|
| 15 | ** | 
|---|
| 16 | ** This file may be distributed and/or modified under the terms of the | 
|---|
| 17 | ** GNU General Public License version 2 as published by the Free Software | 
|---|
| 18 | ** Foundation and appearing in the file LICENSE.GPL included in the | 
|---|
| 19 | ** packaging of this file. | 
|---|
| 20 | ** | 
|---|
| 21 | ** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition | 
|---|
| 22 | ** licenses may use this file in accordance with the Qt Commercial License | 
|---|
| 23 | ** Agreement provided with the Software. | 
|---|
| 24 | ** | 
|---|
| 25 | ** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE | 
|---|
| 26 | ** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. | 
|---|
| 27 | ** | 
|---|
| 28 | ** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for | 
|---|
| 29 | **   information about Qt Commercial License Agreements. | 
|---|
| 30 | ** See http://www.trolltech.com/qpl/ for QPL licensing information. | 
|---|
| 31 | ** See http://www.trolltech.com/gpl/ for GPL licensing information. | 
|---|
| 32 | ** | 
|---|
| 33 | ** Contact info@trolltech.com if any conditions of this licensing are | 
|---|
| 34 | ** not clear to you. | 
|---|
| 35 | ** | 
|---|
| 36 | **********************************************************************/ | 
|---|
| 37 |  | 
|---|
| 38 | #include "qutfcodec.h" | 
|---|
| 39 |  | 
|---|
| 40 | #ifndef QT_NO_TEXTCODEC | 
|---|
| 41 |  | 
|---|
| 42 | int QUtf8Codec::mibEnum() const | 
|---|
| 43 | { | 
|---|
| 44 | return 106; | 
|---|
| 45 | } | 
|---|
| 46 |  | 
|---|
| 47 | QCString QUtf8Codec::fromUnicode(const QString& uc, int& lenInOut) const | 
|---|
| 48 | { | 
|---|
| 49 | int l = uc.length(); | 
|---|
| 50 | if (lenInOut > 0) | 
|---|
| 51 | l = QMIN(l, lenInOut); | 
|---|
| 52 | int rlen = l*3+1; | 
|---|
| 53 | QCString rstr(rlen); | 
|---|
| 54 | uchar* cursor = (uchar*)rstr.data(); | 
|---|
| 55 | const QChar *ch = uc.unicode(); | 
|---|
| 56 | for (int i=0; i < l; i++) { | 
|---|
| 57 | uint u = ch->unicode(); | 
|---|
| 58 | if ( u < 0x80 ) { | 
|---|
| 59 | *cursor++ = (uchar)u; | 
|---|
| 60 | } else { | 
|---|
| 61 | if ( u < 0x0800 ) { | 
|---|
| 62 | *cursor++ = 0xc0 | ((uchar) (u >> 6)); | 
|---|
| 63 | } else { | 
|---|
| 64 | if (u >= 0xd800 && u < 0xdc00 && i < l-1) { | 
|---|
| 65 | unsigned short low = ch[1].unicode(); | 
|---|
| 66 | if (low >= 0xdc00 && low < 0xe000) { | 
|---|
| 67 | ++ch; | 
|---|
| 68 | ++i; | 
|---|
| 69 | u = (u - 0xd800)*0x400 + (low - 0xdc00) + 0x10000; | 
|---|
| 70 | } | 
|---|
| 71 | } | 
|---|
| 72 | if (u > 0xffff) { | 
|---|
| 73 | // see QString::fromUtf8() and QString::utf8() for explanations | 
|---|
| 74 | if (u > 0x10fe00 && u < 0x10ff00) { | 
|---|
| 75 | *cursor++ = (u - 0x10fe00); | 
|---|
| 76 | ++ch; | 
|---|
| 77 | continue; | 
|---|
| 78 | } else { | 
|---|
| 79 | *cursor++ = 0xf0 | ((uchar) (u >> 18)); | 
|---|
| 80 | *cursor++ = 0x80 | ( ((uchar) (u >> 12)) & 0x3f); | 
|---|
| 81 | } | 
|---|
| 82 | } else { | 
|---|
| 83 | *cursor++ = 0xe0 | ((uchar) (u >> 12)); | 
|---|
| 84 | } | 
|---|
| 85 | *cursor++ = 0x80 | ( ((uchar) (u >> 6)) & 0x3f); | 
|---|
| 86 | } | 
|---|
| 87 | *cursor++ = 0x80 | ((uchar) (u&0x3f)); | 
|---|
| 88 | } | 
|---|
| 89 | ++ch; | 
|---|
| 90 | } | 
|---|
| 91 | *cursor = 0; | 
|---|
| 92 | lenInOut = cursor - (uchar*)rstr.data(); | 
|---|
| 93 | ((QByteArray&)rstr).resize(lenInOut+1); | 
|---|
| 94 | return rstr; | 
|---|
| 95 | } | 
|---|
| 96 |  | 
|---|
| 97 | QString QUtf8Codec::toUnicode(const char* chars, int len) const | 
|---|
| 98 | { | 
|---|
| 99 | if (len > 3 && (uchar)chars[0] == 0xef && (uchar)chars[1] == 0xbb && (uchar)chars[2] == 0xbf) { | 
|---|
| 100 | // starts with a byte order mark | 
|---|
| 101 | chars += 3; | 
|---|
| 102 | len -= 3; | 
|---|
| 103 | } | 
|---|
| 104 | return QString::fromUtf8( chars, len ); | 
|---|
| 105 | } | 
|---|
| 106 |  | 
|---|
| 107 |  | 
|---|
| 108 | const char* QUtf8Codec::name() const | 
|---|
| 109 | { | 
|---|
| 110 | return "UTF-8"; | 
|---|
| 111 | } | 
|---|
| 112 |  | 
|---|
| 113 | int QUtf8Codec::heuristicContentMatch(const char* chars, int len) const | 
|---|
| 114 | { | 
|---|
| 115 | int score = 0; | 
|---|
| 116 | for (int i=0; i<len; i++) { | 
|---|
| 117 | uchar ch = chars[i]; | 
|---|
| 118 | // No nulls allowed. | 
|---|
| 119 | if ( !ch ) | 
|---|
| 120 | return -1; | 
|---|
| 121 | if ( ch < 128 ) { | 
|---|
| 122 | // Inconclusive | 
|---|
| 123 | score++; | 
|---|
| 124 | } else if ( (ch&0xe0) == 0xc0 ) { | 
|---|
| 125 | if ( i < len-1 ) { | 
|---|
| 126 | uchar c2 = chars[++i]; | 
|---|
| 127 | if ( (c2&0xc0) != 0x80 ) | 
|---|
| 128 | return -1; | 
|---|
| 129 | score+=3; | 
|---|
| 130 | } | 
|---|
| 131 | } else if ( (ch&0xf0) == 0xe0 ) { | 
|---|
| 132 | if ( i < len-1 ) { | 
|---|
| 133 | uchar c2 = chars[++i]; | 
|---|
| 134 | if ( (c2&0xc0) != 0x80 ) { | 
|---|
| 135 | return -1; | 
|---|
| 136 | #if 0 | 
|---|
| 137 | if ( i < len-1 ) { | 
|---|
| 138 | uchar c3 = chars[++i]; | 
|---|
| 139 | if ( (c3&0xc0) != 0x80 ) | 
|---|
| 140 | return -1; | 
|---|
| 141 | score+=3; | 
|---|
| 142 | } | 
|---|
| 143 | #endif | 
|---|
| 144 | } | 
|---|
| 145 | score+=2; | 
|---|
| 146 | } | 
|---|
| 147 | } | 
|---|
| 148 | } | 
|---|
| 149 | return score; | 
|---|
| 150 | } | 
|---|
| 151 |  | 
|---|
| 152 |  | 
|---|
| 153 |  | 
|---|
| 154 |  | 
|---|
| 155 | class QUtf8Decoder : public QTextDecoder { | 
|---|
| 156 | uint uc; | 
|---|
| 157 | int need; | 
|---|
| 158 | bool headerDone; | 
|---|
| 159 | public: | 
|---|
| 160 | QUtf8Decoder() : need(0), headerDone(FALSE) | 
|---|
| 161 | { | 
|---|
| 162 | } | 
|---|
| 163 |  | 
|---|
| 164 | QString toUnicode(const char* chars, int len) | 
|---|
| 165 | { | 
|---|
| 166 | QString result; | 
|---|
| 167 | result.setLength( len ); // worst case | 
|---|
| 168 | QChar *qch = (QChar *)result.unicode(); | 
|---|
| 169 | uchar ch; | 
|---|
| 170 | for (int i=0; i<len; i++) { | 
|---|
| 171 | ch = *chars++; | 
|---|
| 172 | if (need) { | 
|---|
| 173 | if ( (ch&0xc0) == 0x80 ) { | 
|---|
| 174 | uc = (uc << 6) | (ch & 0x3f); | 
|---|
| 175 | need--; | 
|---|
| 176 | if ( !need ) { | 
|---|
| 177 | if (uc > 0xffff) { | 
|---|
| 178 | // surrogate pair | 
|---|
| 179 | uc -= 0x10000; | 
|---|
| 180 | unsigned short high = uc/0x400 + 0xd800; | 
|---|
| 181 | unsigned short low = uc%0x400 + 0xdc00; | 
|---|
| 182 | *qch++ = QChar(high); | 
|---|
| 183 | *qch++ = QChar(low); | 
|---|
| 184 | headerDone = TRUE; | 
|---|
| 185 | } else { | 
|---|
| 186 | if (headerDone || QChar(uc) != QChar::byteOrderMark) | 
|---|
| 187 | *qch++ = uc; | 
|---|
| 188 | headerDone = TRUE; | 
|---|
| 189 | } | 
|---|
| 190 | } | 
|---|
| 191 | } else { | 
|---|
| 192 | // error | 
|---|
| 193 | *qch++ = QChar::replacement; | 
|---|
| 194 | need = 0; | 
|---|
| 195 | } | 
|---|
| 196 | } else { | 
|---|
| 197 | if ( ch < 128 ) { | 
|---|
| 198 | *qch++ = ch; | 
|---|
| 199 | headerDone = TRUE; | 
|---|
| 200 | } else if ((ch & 0xe0) == 0xc0) { | 
|---|
| 201 | uc = ch & 0x1f; | 
|---|
| 202 | need = 1; | 
|---|
| 203 | } else if ((ch & 0xf0) == 0xe0) { | 
|---|
| 204 | uc = ch & 0x0f; | 
|---|
| 205 | need = 2; | 
|---|
| 206 | } else if ((ch&0xf8) == 0xf0) { | 
|---|
| 207 | uc = ch & 0x07; | 
|---|
| 208 | need = 3; | 
|---|
| 209 | } | 
|---|
| 210 | } | 
|---|
| 211 | } | 
|---|
| 212 | result.truncate( qch - result.unicode() ); | 
|---|
| 213 | return result; | 
|---|
| 214 | } | 
|---|
| 215 | }; | 
|---|
| 216 |  | 
|---|
| 217 | QTextDecoder* QUtf8Codec::makeDecoder() const | 
|---|
| 218 | { | 
|---|
| 219 | return new QUtf8Decoder; | 
|---|
| 220 | } | 
|---|
| 221 |  | 
|---|
| 222 |  | 
|---|
| 223 |  | 
|---|
| 224 |  | 
|---|
| 225 |  | 
|---|
| 226 |  | 
|---|
| 227 | int QUtf16Codec::mibEnum() const | 
|---|
| 228 | { | 
|---|
| 229 | return 1000; | 
|---|
| 230 | } | 
|---|
| 231 |  | 
|---|
| 232 | const char* QUtf16Codec::name() const | 
|---|
| 233 | { | 
|---|
| 234 | return "ISO-10646-UCS-2"; | 
|---|
| 235 | } | 
|---|
| 236 |  | 
|---|
| 237 | int QUtf16Codec::heuristicContentMatch(const char* chars, int len) const | 
|---|
| 238 | { | 
|---|
| 239 | uchar* uchars = (uchar*)chars; | 
|---|
| 240 | if ( len >= 2 && (uchars[0] == 0xff && uchars[1] == 0xfe || | 
|---|
| 241 | uchars[1] == 0xff && uchars[0] == 0xfe) ) | 
|---|
| 242 | return len; | 
|---|
| 243 | else | 
|---|
| 244 | return 0; | 
|---|
| 245 | } | 
|---|
| 246 |  | 
|---|
| 247 |  | 
|---|
| 248 |  | 
|---|
| 249 |  | 
|---|
| 250 | class QUtf16Encoder : public QTextEncoder { | 
|---|
| 251 | bool headerdone; | 
|---|
| 252 | public: | 
|---|
| 253 | QUtf16Encoder() : headerdone(FALSE) | 
|---|
| 254 | { | 
|---|
| 255 | } | 
|---|
| 256 |  | 
|---|
| 257 | QCString fromUnicode(const QString& uc, int& lenInOut) | 
|---|
| 258 | { | 
|---|
| 259 | if ( headerdone ) { | 
|---|
| 260 | lenInOut = uc.length()*sizeof(QChar); | 
|---|
| 261 | QCString d(lenInOut); | 
|---|
| 262 | memcpy(d.data(),uc.unicode(),lenInOut); | 
|---|
| 263 | return d; | 
|---|
| 264 | } else { | 
|---|
| 265 | headerdone = TRUE; | 
|---|
| 266 | lenInOut = (1+uc.length())*sizeof(QChar); | 
|---|
| 267 | QCString d(lenInOut); | 
|---|
| 268 | memcpy(d.data(),&QChar::byteOrderMark,sizeof(QChar)); | 
|---|
| 269 | memcpy(d.data()+sizeof(QChar),uc.unicode(),uc.length()*sizeof(QChar)); | 
|---|
| 270 | return d; | 
|---|
| 271 | } | 
|---|
| 272 | } | 
|---|
| 273 | }; | 
|---|
| 274 |  | 
|---|
| 275 | class QUtf16Decoder : public QTextDecoder { | 
|---|
| 276 | uchar buf; | 
|---|
| 277 | bool half; | 
|---|
| 278 | bool swap; | 
|---|
| 279 | bool headerdone; | 
|---|
| 280 |  | 
|---|
| 281 | public: | 
|---|
| 282 | QUtf16Decoder() : half(FALSE), swap(FALSE), headerdone(FALSE) | 
|---|
| 283 | { | 
|---|
| 284 | } | 
|---|
| 285 |  | 
|---|
| 286 | QString toUnicode(const char* chars, int len) | 
|---|
| 287 | { | 
|---|
| 288 | QString result; | 
|---|
| 289 | result.setLength( len ); // worst case | 
|---|
| 290 | QChar *qch = (QChar *)result.unicode(); | 
|---|
| 291 | QChar ch; | 
|---|
| 292 | while ( len-- ) { | 
|---|
| 293 | if ( half ) { | 
|---|
| 294 | if ( swap ) { | 
|---|
| 295 | ch.setRow( *chars++ ); | 
|---|
| 296 | ch.setCell( buf ); | 
|---|
| 297 | } else { | 
|---|
| 298 | ch.setRow( buf ); | 
|---|
| 299 | ch.setCell( *chars++ ); | 
|---|
| 300 | } | 
|---|
| 301 | if ( !headerdone ) { | 
|---|
| 302 | if ( ch == QChar::byteOrderSwapped ) { | 
|---|
| 303 | swap = !swap; | 
|---|
| 304 | } else if ( ch == QChar::byteOrderMark ) { | 
|---|
| 305 | // Ignore ZWNBSP | 
|---|
| 306 | } else { | 
|---|
| 307 | *qch++ = ch; | 
|---|
| 308 | } | 
|---|
| 309 | headerdone = TRUE; | 
|---|
| 310 | } else | 
|---|
| 311 | *qch++ = ch; | 
|---|
| 312 | half = FALSE; | 
|---|
| 313 | } else { | 
|---|
| 314 | buf = *chars++; | 
|---|
| 315 | half = TRUE; | 
|---|
| 316 | } | 
|---|
| 317 | } | 
|---|
| 318 | result.truncate( qch - result.unicode() ); | 
|---|
| 319 | return result; | 
|---|
| 320 | } | 
|---|
| 321 | }; | 
|---|
| 322 |  | 
|---|
| 323 | QTextDecoder* QUtf16Codec::makeDecoder() const | 
|---|
| 324 | { | 
|---|
| 325 | return new QUtf16Decoder; | 
|---|
| 326 | } | 
|---|
| 327 |  | 
|---|
| 328 | QTextEncoder* QUtf16Codec::makeEncoder() const | 
|---|
| 329 | { | 
|---|
| 330 | return new QUtf16Encoder; | 
|---|
| 331 | } | 
|---|
| 332 |  | 
|---|
| 333 | #endif //QT_NO_TEXTCODEC | 
|---|