source: trunk/src/codecs/qsjiscodec.cpp@ 94

Last change on this file since 94 was 2, checked in by dmik, 20 years ago

Imported xplatform parts of the official release 3.3.1 from Trolltech

  • Property svn:keywords set to Id
File size: 9.8 KB
Line 
1/****************************************************************************
2** $Id: qsjiscodec.cpp 2 2005-11-16 15:49:26Z dmik $
3**
4** Implementation of QSjisCodec class
5**
6** Created : 990225
7**
8** Copyright (C) 2000-2002 Trolltech AS. All rights reserved.
9**
10** This file is part of the tools module of the Qt GUI Toolkit.
11**
12** This file may be distributed under the terms of the Q Public License
13** as defined by Trolltech AS of Norway and appearing in the file
14** LICENSE.QPL included in the packaging of this file.
15**
16** This file may be distributed and/or modified under the terms of the
17** GNU General Public License version 2 as published by the Free Software
18** Foundation and appearing in the file LICENSE.GPL included in the
19** packaging of this file.
20**
21** Licensees holding valid Qt Enterprise Edition or Qt Professional Edition
22** licenses may use this file in accordance with the Qt Commercial License
23** Agreement provided with the Software.
24**
25** This file is provided AS IS with NO WARRANTY OF ANY KIND, INCLUDING THE
26** WARRANTY OF DESIGN, MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
27**
28** See http://www.trolltech.com/pricing.html or email sales@trolltech.com for
29** information about Qt Commercial License Agreements.
30** See http://www.trolltech.com/qpl/ for QPL licensing information.
31** See http://www.trolltech.com/gpl/ for GPL licensing information.
32**
33** Contact info@trolltech.com if any conditions of this licensing are
34** not clear to you.
35**
36**********************************************************************/
37
38// Most of the code here was originally written by Serika Kurusugawa
39// a.k.a. Junji Takagi, and is included in Qt with the author's permission,
40// and the grateful thanks of the Trolltech team.
41
42/*! \class QSjisCodec qsjiscodec.h
43 \reentrant
44 \ingroup i18n
45 \brief The QSjisCodec class provides conversion to and from Shift-JIS.
46
47 More precisely, the QSjisCodec class subclasses QTextCodec to
48 provide support for Shift-JIS, an encoding of JIS X 0201 Latin, JIS
49 X 0201 Kana or JIS X 0208.
50
51 The environment variable \c UNICODEMAP_JP can be used to fine-tune
52 QJisCodec, QSjisCodec and QEucJpCodec. The \l QJisCodec
53 documentation describes how to use this variable.
54
55 Most of the code here was written by Serika Kurusugawa,
56 a.k.a. Junji Takagi, and is included in Qt with the author's
57 permission and the grateful thanks of the Trolltech team.
58 Here is the copyright statement for the code as it was at the
59 point of contribution. Trolltech's subsequent modifications
60 are covered by the usual copyright for Qt.
61
62 \legalese
63
64 Copyright (C) 1999 Serika Kurusugawa. All rights reserved.
65
66 Redistribution and use in source and binary forms, with or without
67 modification, are permitted provided that the following conditions
68 are met:
69 \list 1
70 \i Redistributions of source code must retain the above copyright
71 notice, this list of conditions and the following disclaimer.
72 \i Redistributions in binary form must reproduce the above copyright
73 notice, this list of conditions and the following disclaimer in the
74 documentation and/or other materials provided with the distribution.
75 \endlist
76
77 THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS".
78 ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
79 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
80 ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
81 FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
82 DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
83 OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
84 HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
85 LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
86 OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
87 SUCH DAMAGE.
88*/
89
90#include "qsjiscodec.h"
91
92#ifndef QT_NO_BIG_CODECS
93
94static const uchar Esc = 0x1b;
95
96#define IsKana(c) (((c) >= 0xa1) && ((c) <= 0xdf))
97#define IsSjisChar1(c) ((((c) >= 0x81) && ((c) <= 0x9f)) || \
98 (((c) >= 0xe0) && ((c) <= 0xfc)))
99#define IsSjisChar2(c) (((c) >= 0x40) && ((c) != 0x7f) && ((c) <= 0xfc))
100#define IsUserDefinedChar1(c) (((c) >= 0xf0) && ((c) <= 0xfc))
101
102#define QValidChar(u) ((u) ? QChar((ushort)(u)) : QChar::replacement)
103
104/*!
105 Creates a Shift-JIS codec. Note that this is done automatically by
106 the QApplication, you do not need construct your own.
107*/
108QSjisCodec::QSjisCodec() : conv(QJpUnicodeConv::newConverter(QJpUnicodeConv::Default))
109{
110}
111
112
113/*!
114 Destroys the Shift-JIS codec.
115*/
116QSjisCodec::~QSjisCodec()
117{
118 delete (QJpUnicodeConv*)conv;
119 conv = 0;
120}
121
122
123/*!
124 \reimp
125*/
126int QSjisCodec::mibEnum() const
127{
128 /*
129 Name: Shift_JIS (preferred MIME name)
130 MIBenum: 17
131 Source: A Microsoft code that extends csHalfWidthKatakana to include
132 kanji by adding a second byte when the value of the first
133 byte is in the ranges 81-9F or E0-EF.
134 Alias: MS_Kanji
135 Alias: csShiftJIS
136 */
137 return 17;
138}
139
140/*!
141 \reimp
142*/
143QCString QSjisCodec::fromUnicode(const QString& uc, int& lenInOut) const
144{
145 int l = QMIN((int)uc.length(),lenInOut);
146 int rlen = l*2+1;
147 QCString rstr(rlen);
148 uchar* cursor = (uchar*)rstr.data();
149 for (int i=0; i<l; i++) {
150 QChar ch = uc[i];
151 uint j;
152 if ( ch.row() == 0x00 && ch.cell() < 0x80 ) {
153 // ASCII
154 *cursor++ = ch.cell();
155 } else if ((j = conv->unicodeToJisx0201(ch.row(), ch.cell())) != 0) {
156 // JIS X 0201 Latin or JIS X 0201 Kana
157 *cursor++ = j;
158 } else if ((j = conv->unicodeToSjis(ch.row(), ch.cell())) != 0) {
159 // JIS X 0208
160 *cursor++ = (j >> 8);
161 *cursor++ = (j & 0xff);
162 } else if ((j = conv->unicodeToJisx0212(ch.row(), ch.cell())) != 0) {
163 // JIS X 0212 (can't be encoded in ShiftJIS !)
164 *cursor++ = 0x81; // white square
165 *cursor++ = 0xa0; // white square
166 } else {
167 // Error
168 *cursor++ = '?'; // unknown char
169 }
170 }
171 lenInOut = cursor - (uchar*)rstr.data();
172 rstr.truncate(lenInOut);
173 return rstr;
174}
175
176/*!
177 \reimp
178*/
179QString QSjisCodec::toUnicode(const char* chars, int len) const
180{
181 QString result;
182 for (int i=0; i<len; i++) {
183 uchar ch = chars[i];
184 if ( ch < 0x80 || IsKana(ch) ) {
185 // JIS X 0201 Latin or JIS X 0201 Kana
186 uint u = conv->jisx0201ToUnicode(ch);
187 result += QValidChar(u);
188 } else if ( IsSjisChar1(ch) ) {
189 // JIS X 0208
190 if ( i < len-1 ) {
191 uchar c2 = chars[++i];
192 if ( IsSjisChar2(c2) ) {
193 if ( IsUserDefinedChar1(ch) ) {
194 result += QChar::replacement;
195 } else {
196 uint u = conv->sjisToUnicode(ch, c2);
197 result += QValidChar(u);
198 }
199 } else {
200 i--;
201 result += QChar::replacement;
202 }
203 } else {
204 result += QChar::replacement;
205 }
206 } else {
207 result += QChar::replacement;
208 }
209 }
210 return result;
211}
212
213/*!
214 \reimp
215*/
216const char* QSjisCodec::name() const
217{
218 return "SJIS";
219}
220
221/*!
222 Returns the codec's mime name.
223*/
224const char* QSjisCodec::mimeName() const
225{
226 return "Shift_JIS";
227}
228
229/*!
230 \reimp
231*/
232int QSjisCodec::heuristicNameMatch(const char* hint) const
233{
234 int score = 0;
235 bool ja = FALSE;
236 if (qstrnicmp(hint, "ja_JP", 5) == 0 || qstrnicmp(hint, "japan", 5) == 0) {
237 score += 3;
238 ja = TRUE;
239 } else if (qstrnicmp(hint, "ja", 2) == 0) {
240 score += 2;
241 ja = TRUE;
242 }
243 const char *p;
244 if (ja) {
245 p = strchr(hint, '.');
246 if (p == 0) {
247 return score - 1;
248 }
249 p++;
250 } else {
251 p = hint;
252 }
253 if (p) {
254 if ((qstricmp(p, "mscode") == 0) ||
255 (qstricmp(p, "PCK") == 0) ||
256 (qstricmp(p, "SJIS") == 0) ||
257 (simpleHeuristicNameMatch(p, "ShiftJIS") > 0) ||
258 (simpleHeuristicNameMatch(p, "x-sjis") > 0)) {
259 return score + 4;
260 }
261 }
262 return QTextCodec::heuristicNameMatch(hint);
263}
264
265/*!
266 \reimp
267*/
268int QSjisCodec::heuristicContentMatch(const char* chars, int len) const
269{
270 int score = 0;
271 for (int i=0; i<len; i++) {
272 uchar ch = chars[i];
273 // No nulls allowed.
274 if ( !ch || ch == Esc )
275 return -1;
276 if ( ch < 32 && ch != '\t' && ch != '\n' && ch != '\r' ) {
277 // Suspicious
278 if ( score )
279 score--;
280 } else if ( ch < 0x80 ) {
281 // Inconclusive
282 score++;
283 } else if ( IsKana(ch) ) {
284 // JIS X 0201 Kana
285 score++;
286 } else if ( IsSjisChar1(ch) ) {
287 // JIS X 0208-1990
288 if ( i < len-1 ) {
289 uchar c2 = chars[++i];
290 if ( !IsSjisChar2(c2) )
291 return -1;
292 score++;
293 }
294 score++;
295 } else {
296 // Invalid
297 return -1;
298 }
299 }
300 return score;
301}
302
303class QSjisDecoder : public QTextDecoder {
304 uchar buf[1];
305 int nbuf;
306 const QJpUnicodeConv * const conv;
307public:
308 QSjisDecoder(const QJpUnicodeConv *c) : nbuf(0), conv(c)
309 {
310 }
311
312 QString toUnicode(const char* chars, int len)
313 {
314 QString result;
315 for (int i=0; i<len; i++) {
316 uchar ch = chars[i];
317 switch (nbuf) {
318 case 0:
319 if ( ch < 0x80 || IsKana(ch) ) {
320 // JIS X 0201 Latin or JIS X 0201 Kana
321 uint u = conv->jisx0201ToUnicode(ch);
322 result += QValidChar(u);
323 } else if ( IsSjisChar1(ch) ) {
324 // JIS X 0208
325 buf[0] = ch;
326 nbuf = 1;
327 } else {
328 // Invalid
329 result += QChar::replacement;
330 }
331 break;
332 case 1:
333 // JIS X 0208
334 if ( IsSjisChar2(ch) ) {
335 if ( IsUserDefinedChar1(buf[0]) ) {
336 result += QChar::replacement;
337 } else {
338 uint u = conv->sjisToUnicode(buf[0], ch);
339 result += QValidChar(u);
340 }
341 } else {
342 // Invalid
343 result += QChar::replacement;
344 }
345 nbuf = 0;
346 break;
347 }
348 }
349 return result;
350 }
351};
352
353/*!
354 \reimp
355*/
356QTextDecoder* QSjisCodec::makeDecoder() const
357{
358 return new QSjisDecoder(conv);
359}
360
361#endif
Note: See TracBrowser for help on using the repository browser.