Context Navigation

source: trunk/src/helpers/encodings.c@ 133

Visit:

Last change on this file since 133 was 97, checked in by umoeller, 24 years ago
XML updates.
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 6.9 KB

Line
1
2	/*
3	*@@sourcefile encodings.c:
4	* character encoding translations.
5	*
6	*@@header "encodings\base.h"
7	*@@added V0.9.9 (2001-02-14) [umoeller]
8	*/
9
10	/*
11	* Copyright (C) 2001 Ulrich Mller.
12	* This file is part of the "XWorkplace helpers" source package.
13	* This is free software; you can redistribute it and/or modify
14	* it under the terms of the GNU General Public License as published
15	* by the Free Software Foundation, in version 2 as it comes in the
16	* "COPYING" file of the XWorkplace main distribution.
17	* This program is distributed in the hope that it will be useful,
18	* but WITHOUT ANY WARRANTY; without even the implied warranty of
19	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20	* GNU General Public License for more details.
21	*/
22
23	#define OS2EMX_PLAIN_CHAR
24	// this is needed for "os2emx.h"; if this is defined,
25	// emx will define PSZ as _signed_ char, otherwise
26	// as unsigned char
27
28	#include <stdlib.h>
29	#include <string.h>
30
31	#include "setup.h" // code generation and debugging options
32
33	#include "encodings\base.h" // includes all other encodings
34
35	#pragma hdrstop
36
37	typedef struct _ENCODINGTABLE
38	{
39	XWPENCODINGID EncodingID;
40
41	unsigned short cEntries;
42	unsigned short ausEntries[1]; // variable size
43	} ENCODINGTABLE, *PENCODINGTABLE;
44
45
46	/*
47	*@@ encRegisterEncoding:
48	* registers a new proprietary encoding with the engine.
49	*
50	* Before you can translate encodings with this engine,
51	* you have to register them. This makes sure that the
52	* big encoding tables will only be linked to the executable
53	* code if they are explicitly referenced. As a result, you
54	* have to #include "encodings\base.h" and pass a pointer to
55	* one of the global tables in the header files to this
56	* function.
57	*
58	* This returns an encoding handle that can then be used
59	* with the other encoding functions.
60	*
61	* Example:
62	*
63	+ #include "encodings\base.h"
64	+ #include "encodings\alltables.h" // or a specific table only
65	+
66	+ int rc = encRegisterEncoding(&G_iso8859_1,
67	+ sizeof(G_iso8859_1) / sizeof(G_iso8859_1[0]),
68	+ enc_iso8859_1); // ID to register with
69	*/
70
71	long encRegisterEncoding(PXWPENCODINGMAP pEncodingMap,
72	unsigned long cArrayEntries, // count of array items
73	XWPENCODINGID EncodingID) // enum from encodings\base.h
74	{
75	long lrc = 0;
76
77	unsigned short usHighest = 0;
78	unsigned long ul;
79
80	// step 1:
81	// run through the table and calculate the highest
82	// character entry used
83	for (ul = 0;
84	ul < cArrayEntries;
85	ul++)
86	{
87	unsigned short usFrom = pEncodingMap[ul].usFrom;
88	if (usFrom > usHighest)
89	usHighest = usFrom;
90	}
91
92	// step 2: allocate encoding table
93	if (usHighest)
94	{
95	// allocate memory as needed
96	unsigned long cb = sizeof(ENCODINGTABLE)
97	+ ( (usHighest - 1)
98	* sizeof(unsigned short)
99	);
100
101	PENCODINGTABLE pTableNew = (PENCODINGTABLE)malloc(cb);
102	if (pTableNew)
103	{
104	memset(pTableNew, -1, cb);
105	pTableNew->cEntries = usHighest; // array size
106
107	// step 3: fill encoding table
108	// this only has the Unicode target USHORTs;
109	// the source is simply the offset. So to
110	// get Unicode for character 123 in the specific encoding,
111	// do pTableNew->ausEntries[123].
112	// If you get 0xFFFF, the encoding is undefined.
113
114	for (ul = 0;
115	ul < cArrayEntries;
116	ul++)
117	{
118	PXWPENCODINGMAP pEntry = &pEncodingMap[ul];
119	pTableNew->ausEntries[pEntry->usFrom] = pEntry->usUni;
120	}
121
122	lrc = (long)pTableNew;
123	}
124	}
125
126	return (lrc);
127	}
128
129	/*
130	*@@ encDecodeUTF8:
131	* decodes one UTF-8 character and returns
132	* the Unicode value or -1 if the character
133	* is invalid.
134	*
135	* On input, *ppch is assumed to point to
136	* the first byte of the UTF-8 char to be
137	* read.
138	*
139	* This function will advance *ppch by at
140	* least one byte (or more if the UTF-8
141	* char initially pointed to introduces
142	* a multi-byte sequence).
143	*
144	* This returns -1 if *ppch points to an
145	* invalid encoding (in which case the
146	* pointer is advanced anyway).
147	*
148	* This returns 0 if **ppch points to a
149	* null character.
150	*
151	*@@added V0.9.14 (2001-08-09) [umoeller]
152	*/
153
154	unsigned long encDecodeUTF8(const char **ppch)
155	{
156	unsigned long ulChar = **ppch;
157
158	if (!ulChar)
159	return 0;
160
161	// if (ulChar < 0x80): simple, one byte only... use that
162
163	if (ulChar >= 0x80)
164	{
165	unsigned long ulCount = 1;
166	int fIllegal = 0;
167
168	// note: 0xc0 and 0xc1 are reserved and
169	// cannot appear as the first UTF-8 byte
170
171	if ( (ulChar >= 0xc2)
172	&& (ulChar < 0xe0)
173	)
174	{
175	// that's two bytes
176	ulCount = 2;
177	ulChar &= 0x1f;
178	}
179	else if ((ulChar & 0xf0) == 0xe0)
180	{
181	// three bytes
182	ulCount = 3;
183	ulChar &= 0x0f;
184	}
185	else if ((ulChar & 0xf8) == 0xf0)
186	{
187	// four bytes
188	ulCount = 4;
189	ulChar &= 0x07;
190	}
191	else if ((ulChar & 0xfc) == 0xf8)
192	{
193	// five bytes
194	ulCount = 5;
195	ulChar &= 0x03;
196	}
197	else if ((ulChar & 0xfe) == 0xfc)
198	{
199	// six bytes
200	ulCount = 6;
201	ulChar &= 0x01;
202	}
203	else
204	++fIllegal;
205
206	if (!fIllegal)
207	{
208	// go for the second and more bytes then
209	int ul2;
210
211	for (ul2 = 1;
212	ul2 < ulCount;
213	++ul2)
214	{
215	unsigned long ulChar2 = ((ppch) + ul2);
216
217	if (!(ulChar2 & 0xc0)) // != 0x80)
218	{
219	++fIllegal;
220	break;
221	}
222
223	ulChar <<= 6;
224	ulChar \|= ulChar2 & 0x3f;
225	}
226	}
227
228	if (fIllegal)
229	{
230	// skip all the following characters
231	// until we find something with bit 7 off
232	do
233	{
234	ulChar = (++(ppch));
235	if (!ulChar)
236	break;
237	} while (ulChar & 0x80);
238	}
239	else
240	*ppch += ulCount;
241	}
242	else
243	(*ppch)++;
244
245	return (ulChar);
246	}
247

Note: See TracBrowser for help on using the repository browser.

Download in other formats: