source: trunk/src/helpers/encodings.c@ 111

Last change on this file since 111 was 97, checked in by umoeller, 24 years ago

XML updates.

  • Property svn:eol-style set to CRLF
  • Property svn:keywords set to Author Date Id Revision
File size: 6.9 KB
Line 
1
2/*
3 *@@sourcefile encodings.c:
4 * character encoding translations.
5 *
6 *@@header "encodings\base.h"
7 *@@added V0.9.9 (2001-02-14) [umoeller]
8 */
9
10/*
11 * Copyright (C) 2001 Ulrich M”ller.
12 * This file is part of the "XWorkplace helpers" source package.
13 * This is free software; you can redistribute it and/or modify
14 * it under the terms of the GNU General Public License as published
15 * by the Free Software Foundation, in version 2 as it comes in the
16 * "COPYING" file of the XWorkplace main distribution.
17 * This program is distributed in the hope that it will be useful,
18 * but WITHOUT ANY WARRANTY; without even the implied warranty of
19 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 * GNU General Public License for more details.
21 */
22
23#define OS2EMX_PLAIN_CHAR
24 // this is needed for "os2emx.h"; if this is defined,
25 // emx will define PSZ as _signed_ char, otherwise
26 // as unsigned char
27
28#include <stdlib.h>
29#include <string.h>
30
31#include "setup.h" // code generation and debugging options
32
33#include "encodings\base.h" // includes all other encodings
34
35#pragma hdrstop
36
37typedef struct _ENCODINGTABLE
38{
39 XWPENCODINGID EncodingID;
40
41 unsigned short cEntries;
42 unsigned short ausEntries[1]; // variable size
43} ENCODINGTABLE, *PENCODINGTABLE;
44
45
46/*
47 *@@ encRegisterEncoding:
48 * registers a new proprietary encoding with the engine.
49 *
50 * Before you can translate encodings with this engine,
51 * you have to register them. This makes sure that the
52 * big encoding tables will only be linked to the executable
53 * code if they are explicitly referenced. As a result, you
54 * have to #include "encodings\base.h" and pass a pointer to
55 * one of the global tables in the header files to this
56 * function.
57 *
58 * This returns an encoding handle that can then be used
59 * with the other encoding functions.
60 *
61 * Example:
62 *
63 + #include "encodings\base.h"
64 + #include "encodings\alltables.h" // or a specific table only
65 +
66 + int rc = encRegisterEncoding(&G_iso8859_1,
67 + sizeof(G_iso8859_1) / sizeof(G_iso8859_1[0]),
68 + enc_iso8859_1); // ID to register with
69 */
70
71long encRegisterEncoding(PXWPENCODINGMAP pEncodingMap,
72 unsigned long cArrayEntries, // count of array items
73 XWPENCODINGID EncodingID) // enum from encodings\base.h
74{
75 long lrc = 0;
76
77 unsigned short usHighest = 0;
78 unsigned long ul;
79
80 // step 1:
81 // run through the table and calculate the highest
82 // character entry used
83 for (ul = 0;
84 ul < cArrayEntries;
85 ul++)
86 {
87 unsigned short usFrom = pEncodingMap[ul].usFrom;
88 if (usFrom > usHighest)
89 usHighest = usFrom;
90 }
91
92 // step 2: allocate encoding table
93 if (usHighest)
94 {
95 // allocate memory as needed
96 unsigned long cb = sizeof(ENCODINGTABLE)
97 + ( (usHighest - 1)
98 * sizeof(unsigned short)
99 );
100
101 PENCODINGTABLE pTableNew = (PENCODINGTABLE)malloc(cb);
102 if (pTableNew)
103 {
104 memset(pTableNew, -1, cb);
105 pTableNew->cEntries = usHighest; // array size
106
107 // step 3: fill encoding table
108 // this only has the Unicode target USHORTs;
109 // the source is simply the offset. So to
110 // get Unicode for character 123 in the specific encoding,
111 // do pTableNew->ausEntries[123].
112 // If you get 0xFFFF, the encoding is undefined.
113
114 for (ul = 0;
115 ul < cArrayEntries;
116 ul++)
117 {
118 PXWPENCODINGMAP pEntry = &pEncodingMap[ul];
119 pTableNew->ausEntries[pEntry->usFrom] = pEntry->usUni;
120 }
121
122 lrc = (long)pTableNew;
123 }
124 }
125
126 return (lrc);
127}
128
129/*
130 *@@ encDecodeUTF8:
131 * decodes one UTF-8 character and returns
132 * the Unicode value or -1 if the character
133 * is invalid.
134 *
135 * On input, *ppch is assumed to point to
136 * the first byte of the UTF-8 char to be
137 * read.
138 *
139 * This function will advance *ppch by at
140 * least one byte (or more if the UTF-8
141 * char initially pointed to introduces
142 * a multi-byte sequence).
143 *
144 * This returns -1 if *ppch points to an
145 * invalid encoding (in which case the
146 * pointer is advanced anyway).
147 *
148 * This returns 0 if **ppch points to a
149 * null character.
150 *
151 *@@added V0.9.14 (2001-08-09) [umoeller]
152 */
153
154unsigned long encDecodeUTF8(const char **ppch)
155{
156 unsigned long ulChar = **ppch;
157
158 if (!ulChar)
159 return 0;
160
161 // if (ulChar < 0x80): simple, one byte only... use that
162
163 if (ulChar >= 0x80)
164 {
165 unsigned long ulCount = 1;
166 int fIllegal = 0;
167
168 // note: 0xc0 and 0xc1 are reserved and
169 // cannot appear as the first UTF-8 byte
170
171 if ( (ulChar >= 0xc2)
172 && (ulChar < 0xe0)
173 )
174 {
175 // that's two bytes
176 ulCount = 2;
177 ulChar &= 0x1f;
178 }
179 else if ((ulChar & 0xf0) == 0xe0)
180 {
181 // three bytes
182 ulCount = 3;
183 ulChar &= 0x0f;
184 }
185 else if ((ulChar & 0xf8) == 0xf0)
186 {
187 // four bytes
188 ulCount = 4;
189 ulChar &= 0x07;
190 }
191 else if ((ulChar & 0xfc) == 0xf8)
192 {
193 // five bytes
194 ulCount = 5;
195 ulChar &= 0x03;
196 }
197 else if ((ulChar & 0xfe) == 0xfc)
198 {
199 // six bytes
200 ulCount = 6;
201 ulChar &= 0x01;
202 }
203 else
204 ++fIllegal;
205
206 if (!fIllegal)
207 {
208 // go for the second and more bytes then
209 int ul2;
210
211 for (ul2 = 1;
212 ul2 < ulCount;
213 ++ul2)
214 {
215 unsigned long ulChar2 = *((*ppch) + ul2);
216
217 if (!(ulChar2 & 0xc0)) // != 0x80)
218 {
219 ++fIllegal;
220 break;
221 }
222
223 ulChar <<= 6;
224 ulChar |= ulChar2 & 0x3f;
225 }
226 }
227
228 if (fIllegal)
229 {
230 // skip all the following characters
231 // until we find something with bit 7 off
232 do
233 {
234 ulChar = *(++(*ppch));
235 if (!ulChar)
236 break;
237 } while (ulChar & 0x80);
238 }
239 else
240 *ppch += ulCount;
241 }
242 else
243 (*ppch)++;
244
245 return (ulChar);
246}
247
Note: See TracBrowser for help on using the repository browser.