Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

bs_string_conv.cpp

Visit:

Last change on this file was 449, checked in by pr, 5 years ago
Add BSUString copy constructor for codepaged C strings. Substitute "(C)" for copyright symbol on unsupported codepages.
Property svn:eol-style set to `CRLF` Property svn:keywords set to `Author Date Id Revision`
File size: 8.2 KB

Line
1
2	/*
3	*@@sourcefile bs_string_conv.cpp:
4	* implementation for BSUniCodec.
5	*
6	*@@header "cppbase\bs_string.h"
7	*@@added V0.9.18 (2002-03-08) [umoeller]
8	*/
9
10	/*
11	* This file Copyright (C) 2002-2008 Ulrich Mller.
12	* This program is free software; you can redistribute it and/or modify
13	* it under the terms of the GNU General Public License as published by
14	* the Free Software Foundation, in version 2 as it comes in the COPYING
15	* file of this distribution.
16	* This program is distributed in the hope that it will be useful,
17	* but WITHOUT ANY WARRANTY; without even the implied warranty of
18	* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	* GNU General Public License for more details.
20	*/
21
22	#define OS2EMX_PLAIN_CHAR
23	// this is needed for "os2emx.h"; if this is defined,
24	// emx will define PSZ as _signed_ char, otherwise
25	// as unsigned char
26
27	#define INCL_DOSSEMAPHORES
28	#define INCL_DOSNLS
29	#include <os2.h>
30
31	#include <stdio.h>
32	#include <stdlib.h>
33	#include <string.h>
34	#include <stdarg.h>
35
36	#include "setup.h"
37
38	#include "helpers\stringh.h"
39	#include "helpers\xstring.h"
40
41	// base includes
42	#include "cppbase\bs_base.h"
43	#include "cppbase\bs_string.h"
44	#include "cppbase\bs_errors.h"
45
46	#include "encodings\base.h"
47
48	#pragma hdrstop
49
50	#define CP_UTF8 1208
51	#define UNI_COPYRIGHT 0xA9
52
53	DEFINE_CLASS(BSUniCodec, BSRoot);
54
55	/*
56	*@@ FindEncodingID:
57	*
58	*@@added V0.9.18 (2002-03-08) [umoeller]
59	*/
60
61	static ENCID FindEncodingID(USHORT usCodepage,
62	BOOL *pfDouble) // out: TRUE if double-byte cp
63	{
64	ENCBYTECOUNT bc;
65	ENCID id = encFindIdForCodepage(usCodepage, NULL, &bc);
66	if ( id != UNSUPPORTED
67	&& ( (bc == SINGLE)
68	\|\| (bc == DOUBLE)
69	)
70	)
71	{
72	if (pfDouble)
73	*pfDouble = (bc == DOUBLE);
74
75	return (id);
76	}
77
78	throw BSUnsupportedCPExcpt(usCodepage);
79	}
80
81	/* ******************************************************************
82	*
83	* BSUniCodec implementation
84	*
85	********************************************************************/
86
87	/*
88	*@@ BSUniCodec:
89	* constructor. Creates the internal conversion
90	* object by calling encCreateCodec.
91	*
92	*@@changed WarpIN V1.0.18 (2008-09-24) [pr]: added codepage 1208 support @@fixes 1127
93	*/
94
95	BSUniCodec::BSUniCodec(unsigned short usCodepage) // in: codepage
96	: BSRoot(tBSUniCodec),
97	_usCodepage(usCodepage)
98	{
99	ENCID id;
100
101	// WarpIN V1.0.18
102	if (usCodepage == CP_UTF8)
103	{
104	_pCodec = NULL;
105	return;
106	}
107
108	id = FindEncodingID(usCodepage, &_fDouble);
109	if (_fDouble)
110	{
111	APIRET arc;
112	COUNTRYCODE cc = { 0, usCodepage };
113	if (arc = DosQueryDBCSEnv(sizeof(_achDBCS),
114	&cc,
115	_achDBCS))
116	{
117	CHAR sz[200];
118	sprintf(sz,
119	"DosQueryDBCSEnv returned error %d for codepage %d",
120	arc,
121	usCodepage);
122	throw BSExcptBase(sz);
123	}
124	}
125
126	_pCodec = encCreateCodec(id);
127	}
128
129	/*
130	*@@ ~BSUniCodec:
131	* destructor. Frees the internal conversion object
132	* by calling encFreeCodec.
133	*
134	*@@changed WarpIN V1.0.18 (2008-09-24) [pr]: added codepage 1208 support @@fixes 1127
135	*/
136
137	BSUniCodec::~BSUniCodec()
138	{
139	if (_pCodec)
140	{
141	encFreeCodec((PCONVERSION*)&_pCodec);
142	_pCodec = NULL;
143	}
144	}
145
146	/*
147	*@@ IsLeadByte:
148	* returns TRUE if c is a DBCS lead byte.
149	*
150	*@@added V0.9.19 (2002-04-02) [umoeller]
151	*/
152
153	static BOOL IsLeadByte(CHAR c, // in: character to test
154	PSZ pachDBCS) // in: DBCS array from DosQueryDBCSEnv
155	{
156	while (*pachDBCS)
157	{
158	if ( (c >= *pachDBCS++)
159	&& (c <= *pachDBCS++)
160	)
161	return TRUE;
162	}
163
164	return FALSE;
165	}
166
167	/*
168	*@@ Codepage2Uni:
169	* converts the given string from codepage-specific
170	* to UTF-8. Used by BSString::assignUtf8 and
171	* others.
172	*
173	*@@changed WarpIN V1.0.18 (2008-09-24) [pr]: added codepage 1208 support @@fixes 1127
174	*/
175
176	void BSUniCodec::Codepage2Uni(BSUString &ustr, // out: target
177	const char *pcszCP, // in: cp string
178	unsigned long ulLength) // in: length of cp string
179	{
180	PCONVERSION pTable = (PCONVERSION)_pCodec;
181
182	// WarpIN V1.0.18
183	if (QueryCodepage() == CP_UTF8)
184	{
185	ustr.assignUtf8(pcszCP);
186	return;
187	}
188
189	XSTRING xstrNew;
190	xstrInit(&xstrNew, ulLength + 1);
191
192	ULONG ul;
193	for (ul = 0;
194	ul < ulLength;
195	++ul)
196	{
197	unsigned short c = pcszCP[ul];
198
199	if (_fDouble)
200	{
201	// we're using a double-byte codepage:
202	// check if this is a DBCS char
203	if (IsLeadByte(c, _achDBCS))
204	c = (c << 8) \| pcszCP[++ul];
205	}
206
207	// convert this char to Unicode, using the current codepage
208	unsigned long ulUni = encChar2Uni(pTable, c);
209
210	if (ulUni == 0xFFFF)
211	// shouldn't happen
212	xstrcatc(&xstrNew, '?');
213	else if (ulUni < 0x80)
214	xstrcatc(&xstrNew, (CHAR)ulUni);
215	else if (ulUni < 0x800)
216	{
217	xstrcatc(&xstrNew, (CHAR)(0xC0 \| ulUni>>6));
218	xstrcatc(&xstrNew, (CHAR)(0x80 \| ulUni & 0x3F));
219	}
220	else if (ulUni < 0x10000)
221	{
222	xstrcatc(&xstrNew, (CHAR)(0xE0 \| ulUni>>12));
223	xstrcatc(&xstrNew, (CHAR)(0x80 \| ulUni>>6 & 0x3F));
224	xstrcatc(&xstrNew, (CHAR)(0x80 \| ulUni & 0x3F));
225	}
226	/* else if (ulUni < 0x200000)
227	{
228	xstrcatc(&xstrNew, (CHAR)(0xF0 \| ulUni>>18));
229	xstrcatc(&xstrNew, (CHAR)(0x80 \| ulUni>>12 & 0x3F));
230	xstrcatc(&xstrNew, (CHAR)(0x80 \| ulUni>>6 & 0x3F));
231	xstrcatc(&xstrNew, (CHAR)(0x80 \| ulUni & 0x3F));
232	} */
233	else
234	{
235	CHAR sz[100];
236	sprintf(sz,
237	"Unsupported Unicode character %u at offset %u in string",
238	ulUni,
239	ul);
240	throw BSExcptBase(sz);
241	}
242	}
243
244	// copy back
245	ustr._take_from(xstrNew);
246	}
247
248	/*
249	*@@ Uni2Codepage:
250	* converts the given string from UTF-8 to
251	* codepage-specific. Used by BSUString::assignCP
252	* and others.
253	*
254	* Characters that are not supported with the
255	* given codepage are replaced by '?'.
256	*
257	*@@changed WarpIN V1.0.18 (2008-09-24) [pr]: added codepage 1208 support @@fixes 1127
258	*@@changed WarpIN V1.0.25 (2020-09-05) [pr]: added copyright symbol substitution
259	*/
260
261	void BSUniCodec::Uni2Codepage(BSString &str,
262	const char *pcszUni,
263	unsigned long ulLength)
264	{
265	// WarpIN V1.0.18
266	if (QueryCodepage() == CP_UTF8)
267	{
268	str.assign(pcszUni);
269	return;
270	}
271
272	PCONVERSION pTable = (PCONVERSION)_pCodec;
273
274	XSTRING xstrNew;
275	xstrInit(&xstrNew, ulLength + 1);
276
277	const char *pFirst = pcszUni;
278
279	while (*pcszUni)
280	{
281	unsigned long ulUni = encDecodeUTF8(&pcszUni);
282	unsigned short us = encUni2Char(pTable, ulUni);
283	if (us > 0xFF)
284	{
285	if (us == 0xFFFF)
286	if (ulUni == UNI_COPYRIGHT)
287	{
288	xstrcat(&xstrNew, "(C)", 3);
289	continue;
290	}
291	else
292	us = '?';
293	else if (_fDouble)
294	{
295	// we're using a double-byte codepage:
296	// store lead byte first
297	xstrcatc(&xstrNew, (CHAR)(us >> 8));
298	}
299	else
300	{
301	// not double-byte codepage: then we can't handle > 0xFF
302	CHAR sz[100];
303	sprintf(sz,
304	"Unsupported Unicode character %u at offset %u in string",
305	ulUni,
306	pcszUni - pFirst);
307	throw BSExcptBase(sz);
308
309	}
310	}
311	xstrcatc(&xstrNew, (CHAR)us);
312	}
313
314	// copy back
315	str._take_from(xstrNew);
316	}
317

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/src/cppbase/bs_string_conv.cpp

Download in other formats: