Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

localcharset.c@ 3619

Last change on this file since 3619 was 3613, checked in by bird, 14 months ago
src/sed: Merged in changes between 4.1.5 and 4.9 from the vendor branch. (svn merge ^{/vendor/sed/4.1.5}/vendor/sed/current .)
File size: 39.0 KB

Line
1	/* Determine a canonical name for the current locale's character encoding.
2
3	Copyright (C) 2000-2006, 2008-2022 Free Software Foundation, Inc.
4
5	This file is free software: you can redistribute it and/or modify
6	it under the terms of the GNU Lesser General Public License as
7	published by the Free Software Foundation; either version 2.1 of the
8	License, or (at your option) any later version.
9
10	This file is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13	GNU Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public License
16	along with this program. If not, see <https://www.gnu.org/licenses/>. */
17
18	/* Written by Bruno Haible <bruno@clisp.org>. */
19
20	#include <config.h>
21
22	/* Specification. */
23	#include "localcharset.h"
24
25	#include <stddef.h>
26	#include <stdio.h>
27	#include <string.h>
28	#include <stdlib.h>
29
30	#if defined __APPLE__ && defined __MACH__ && HAVE_LANGINFO_CODESET
31	# define DARWIN7 /* Darwin 7 or newer, i.e. Mac OS X 10.3 or newer */
32	#endif
33
34	#if defined _WIN32 && !defined __CYGWIN__
35	# define WINDOWS_NATIVE
36	# include <locale.h>
37	#endif
38
39	#if defined __EMX__
40	/* Assume EMX program runs on OS/2, even if compiled under DOS. */
41	# ifndef OS2
42	# define OS2
43	# endif
44	#endif
45
46	#if !defined WINDOWS_NATIVE
47	# if HAVE_LANGINFO_CODESET
48	# include <langinfo.h>
49	# else
50	# if 0 /* see comment regarding use of setlocale(), below */
51	# include <locale.h>
52	# endif
53	# endif
54	# ifdef __CYGWIN__
55	# define WIN32_LEAN_AND_MEAN
56	# include <windows.h>
57	# endif
58	#elif defined WINDOWS_NATIVE
59	# define WIN32_LEAN_AND_MEAN
60	# include <windows.h>
61	/* For the use of setlocale() below, the Gnulib override in setlocale.c is
62	not needed; see the platform lists in setlocale_null.m4. */
63	# undef setlocale
64	#endif
65	#if defined OS2
66	# define INCL_DOS
67	# include <os2.h>
68	#endif
69
70	/* For MB_CUR_MAX_L */
71	#if defined DARWIN7
72	# include <xlocale.h>
73	#endif
74
75
76	#if HAVE_LANGINFO_CODESET \|\| defined WINDOWS_NATIVE \|\| defined OS2
77
78	/* On these platforms, we use a mapping from non-canonical encoding name
79	to GNU canonical encoding name. */
80
81	/* With glibc-2.1 or newer, we don't need any canonicalization,
82	because glibc has iconv and both glibc and libiconv support all
83	GNU canonical names directly. */
84	# if !((defined __GNU_LIBRARY__ && __GLIBC__ >= 2) \|\| defined __UCLIBC__)
85
86	struct table_entry
87	{
88	const char alias[11+1];
89	const char canonical[11+1];
90	};
91
92	/* Table of platform-dependent mappings, sorted in ascending order. */
93	static const struct table_entry alias_table[] =
94	{
95	# if defined __FreeBSD__ /* FreeBSD */
96	/{ "ARMSCII-8", "ARMSCII-8" },/
97	{ "Big5", "BIG5" },
98	{ "C", "ASCII" },
99	/{ "CP1131", "CP1131" },/
100	/{ "CP1251", "CP1251" },/
101	/{ "CP866", "CP866" },/
102	/{ "GB18030", "GB18030" },/
103	/{ "GB2312", "GB2312" },/
104	/{ "GBK", "GBK" },/
105	/{ "ISCII-DEV", "?" },/
106	{ "ISO8859-1", "ISO-8859-1" },
107	{ "ISO8859-13", "ISO-8859-13" },
108	{ "ISO8859-15", "ISO-8859-15" },
109	{ "ISO8859-2", "ISO-8859-2" },
110	{ "ISO8859-5", "ISO-8859-5" },
111	{ "ISO8859-7", "ISO-8859-7" },
112	{ "ISO8859-9", "ISO-8859-9" },
113	/{ "KOI8-R", "KOI8-R" },/
114	/{ "KOI8-U", "KOI8-U" },/
115	{ "SJIS", "SHIFT_JIS" },
116	{ "US-ASCII", "ASCII" },
117	{ "eucCN", "GB2312" },
118	{ "eucJP", "EUC-JP" },
119	{ "eucKR", "EUC-KR" }
120	# define alias_table_defined
121	# endif
122	# if defined __NetBSD__ /* NetBSD */
123	{ "646", "ASCII" },
124	/{ "ARMSCII-8", "ARMSCII-8" },/
125	/{ "BIG5", "BIG5" },/
126	{ "Big5-HKSCS", "BIG5-HKSCS" },
127	/{ "CP1251", "CP1251" },/
128	/{ "CP866", "CP866" },/
129	/{ "GB18030", "GB18030" },/
130	/{ "GB2312", "GB2312" },/
131	{ "ISO8859-1", "ISO-8859-1" },
132	{ "ISO8859-13", "ISO-8859-13" },
133	{ "ISO8859-15", "ISO-8859-15" },
134	{ "ISO8859-2", "ISO-8859-2" },
135	{ "ISO8859-4", "ISO-8859-4" },
136	{ "ISO8859-5", "ISO-8859-5" },
137	{ "ISO8859-7", "ISO-8859-7" },
138	/{ "KOI8-R", "KOI8-R" },/
139	/{ "KOI8-U", "KOI8-U" },/
140	/{ "PT154", "PT154" },/
141	{ "SJIS", "SHIFT_JIS" },
142	{ "eucCN", "GB2312" },
143	{ "eucJP", "EUC-JP" },
144	{ "eucKR", "EUC-KR" },
145	{ "eucTW", "EUC-TW" }
146	# define alias_table_defined
147	# endif
148	# if defined __OpenBSD__ /* OpenBSD */
149	{ "646", "ASCII" },
150	{ "ISO8859-1", "ISO-8859-1" },
151	{ "ISO8859-13", "ISO-8859-13" },
152	{ "ISO8859-15", "ISO-8859-15" },
153	{ "ISO8859-2", "ISO-8859-2" },
154	{ "ISO8859-4", "ISO-8859-4" },
155	{ "ISO8859-5", "ISO-8859-5" },
156	{ "ISO8859-7", "ISO-8859-7" },
157	{ "US-ASCII", "ASCII" }
158	# define alias_table_defined
159	# endif
160	# if defined __APPLE__ && defined __MACH__ /* Mac OS X */
161	/* Darwin 7.5 has nl_langinfo(CODESET), but sometimes its value is
162	useless:
163	- It returns the empty string when LANG is set to a locale of the
164	form ll_CC, although ll_CC/LC_CTYPE is a symlink to an UTF-8
165	LC_CTYPE file.
166	- The environment variables LANG, LC_CTYPE, LC_ALL are not set by
167	the system; nl_langinfo(CODESET) returns "US-ASCII" in this case.
168	- The documentation says:
169	"... all code that calls BSD system routines should ensure
170	that the const *char parameters of these routines are in UTF-8
171	encoding. All BSD system functions expect their string
172	parameters to be in UTF-8 encoding and nothing else."
173	It also says
174	"An additional caveat is that string parameters for files,
175	paths, and other file-system entities must be in canonical
176	UTF-8. In a canonical UTF-8 Unicode string, all decomposable
177	characters are decomposed ..."
178	but this is not true: You can pass non-decomposed UTF-8 strings
179	to file system functions, and it is the OS which will convert
180	them to decomposed UTF-8 before accessing the file system.
181	- The Apple Terminal application displays UTF-8 by default.
182	- However, other applications are free to use different encodings:
183	- xterm uses ISO-8859-1 by default.
184	- TextEdit uses MacRoman by default.
185	We prefer UTF-8 over decomposed UTF-8-MAC because one should
186	minimize the use of decomposed Unicode. Unfortunately, through the
187	Darwin file system, decomposed UTF-8 strings are leaked into user
188	space nevertheless.
189	Then there are also the locales with encodings other than US-ASCII
190	and UTF-8. These locales can be occasionally useful to users (e.g.
191	when grepping through ISO-8859-1 encoded text files), when all their
192	file names are in US-ASCII.
193	*/
194	{ "ARMSCII-8", "ARMSCII-8" },
195	{ "Big5", "BIG5" },
196	{ "Big5HKSCS", "BIG5-HKSCS" },
197	{ "CP1131", "CP1131" },
198	{ "CP1251", "CP1251" },
199	{ "CP866", "CP866" },
200	{ "CP949", "CP949" },
201	{ "GB18030", "GB18030" },
202	{ "GB2312", "GB2312" },
203	{ "GBK", "GBK" },
204	/{ "ISCII-DEV", "?" },/
205	{ "ISO8859-1", "ISO-8859-1" },
206	{ "ISO8859-13", "ISO-8859-13" },
207	{ "ISO8859-15", "ISO-8859-15" },
208	{ "ISO8859-2", "ISO-8859-2" },
209	{ "ISO8859-4", "ISO-8859-4" },
210	{ "ISO8859-5", "ISO-8859-5" },
211	{ "ISO8859-7", "ISO-8859-7" },
212	{ "ISO8859-9", "ISO-8859-9" },
213	{ "KOI8-R", "KOI8-R" },
214	{ "KOI8-U", "KOI8-U" },
215	{ "PT154", "PT154" },
216	{ "SJIS", "SHIFT_JIS" },
217	{ "eucCN", "GB2312" },
218	{ "eucJP", "EUC-JP" },
219	{ "eucKR", "EUC-KR" }
220	# define alias_table_defined
221	# endif
222	# if defined _AIX /* AIX */
223	/{ "GBK", "GBK" },/
224	{ "IBM-1046", "CP1046" },
225	{ "IBM-1124", "CP1124" },
226	{ "IBM-1129", "CP1129" },
227	{ "IBM-1252", "CP1252" },
228	{ "IBM-850", "CP850" },
229	{ "IBM-856", "CP856" },
230	{ "IBM-921", "ISO-8859-13" },
231	{ "IBM-922", "CP922" },
232	{ "IBM-932", "CP932" },
233	{ "IBM-943", "CP943" },
234	{ "IBM-eucCN", "GB2312" },
235	{ "IBM-eucJP", "EUC-JP" },
236	{ "IBM-eucKR", "EUC-KR" },
237	{ "IBM-eucTW", "EUC-TW" },
238	{ "ISO8859-1", "ISO-8859-1" },
239	{ "ISO8859-15", "ISO-8859-15" },
240	{ "ISO8859-2", "ISO-8859-2" },
241	{ "ISO8859-5", "ISO-8859-5" },
242	{ "ISO8859-6", "ISO-8859-6" },
243	{ "ISO8859-7", "ISO-8859-7" },
244	{ "ISO8859-8", "ISO-8859-8" },
245	{ "ISO8859-9", "ISO-8859-9" },
246	{ "TIS-620", "TIS-620" },
247	/{ "UTF-8", "UTF-8" },/
248	{ "big5", "BIG5" }
249	# define alias_table_defined
250	# endif
251	# if defined __hpux /* HP-UX */
252	{ "SJIS", "SHIFT_JIS" },
253	{ "arabic8", "HP-ARABIC8" },
254	{ "big5", "BIG5" },
255	{ "cp1251", "CP1251" },
256	{ "eucJP", "EUC-JP" },
257	{ "eucKR", "EUC-KR" },
258	{ "eucTW", "EUC-TW" },
259	{ "gb18030", "GB18030" },
260	{ "greek8", "HP-GREEK8" },
261	{ "hebrew8", "HP-HEBREW8" },
262	{ "hkbig5", "BIG5-HKSCS" },
263	{ "hp15CN", "GB2312" },
264	{ "iso88591", "ISO-8859-1" },
265	{ "iso885913", "ISO-8859-13" },
266	{ "iso885915", "ISO-8859-15" },
267	{ "iso88592", "ISO-8859-2" },
268	{ "iso88594", "ISO-8859-4" },
269	{ "iso88595", "ISO-8859-5" },
270	{ "iso88596", "ISO-8859-6" },
271	{ "iso88597", "ISO-8859-7" },
272	{ "iso88598", "ISO-8859-8" },
273	{ "iso88599", "ISO-8859-9" },
274	{ "kana8", "HP-KANA8" },
275	{ "koi8r", "KOI8-R" },
276	{ "roman8", "HP-ROMAN8" },
277	{ "tis620", "TIS-620" },
278	{ "turkish8", "HP-TURKISH8" },
279	{ "utf8", "UTF-8" }
280	# define alias_table_defined
281	# endif
282	# if defined __sgi /* IRIX */
283	{ "ISO8859-1", "ISO-8859-1" },
284	{ "ISO8859-15", "ISO-8859-15" },
285	{ "ISO8859-2", "ISO-8859-2" },
286	{ "ISO8859-5", "ISO-8859-5" },
287	{ "ISO8859-7", "ISO-8859-7" },
288	{ "ISO8859-9", "ISO-8859-9" },
289	{ "eucCN", "GB2312" },
290	{ "eucJP", "EUC-JP" },
291	{ "eucKR", "EUC-KR" },
292	{ "eucTW", "EUC-TW" }
293	# define alias_table_defined
294	# endif
295	# if defined __osf__ /* OSF/1 */
296	/{ "GBK", "GBK" },/
297	{ "ISO8859-1", "ISO-8859-1" },
298	{ "ISO8859-15", "ISO-8859-15" },
299	{ "ISO8859-2", "ISO-8859-2" },
300	{ "ISO8859-4", "ISO-8859-4" },
301	{ "ISO8859-5", "ISO-8859-5" },
302	{ "ISO8859-7", "ISO-8859-7" },
303	{ "ISO8859-8", "ISO-8859-8" },
304	{ "ISO8859-9", "ISO-8859-9" },
305	{ "KSC5601", "CP949" },
306	{ "SJIS", "SHIFT_JIS" },
307	{ "TACTIS", "TIS-620" },
308	/{ "UTF-8", "UTF-8" },/
309	{ "big5", "BIG5" },
310	{ "cp850", "CP850" },
311	{ "dechanyu", "DEC-HANYU" },
312	{ "dechanzi", "GB2312" },
313	{ "deckanji", "DEC-KANJI" },
314	{ "deckorean", "EUC-KR" },
315	{ "eucJP", "EUC-JP" },
316	{ "eucKR", "EUC-KR" },
317	{ "eucTW", "EUC-TW" },
318	{ "sdeckanji", "EUC-JP" }
319	# define alias_table_defined
320	# endif
321	# if defined __sun /* Solaris */
322	{ "5601", "EUC-KR" },
323	{ "646", "ASCII" },
324	/{ "BIG5", "BIG5" },/
325	{ "Big5-HKSCS", "BIG5-HKSCS" },
326	{ "GB18030", "GB18030" },
327	/{ "GBK", "GBK" },/
328	{ "ISO8859-1", "ISO-8859-1" },
329	{ "ISO8859-11", "TIS-620" },
330	{ "ISO8859-13", "ISO-8859-13" },
331	{ "ISO8859-15", "ISO-8859-15" },
332	{ "ISO8859-2", "ISO-8859-2" },
333	{ "ISO8859-3", "ISO-8859-3" },
334	{ "ISO8859-4", "ISO-8859-4" },
335	{ "ISO8859-5", "ISO-8859-5" },
336	{ "ISO8859-6", "ISO-8859-6" },
337	{ "ISO8859-7", "ISO-8859-7" },
338	{ "ISO8859-8", "ISO-8859-8" },
339	{ "ISO8859-9", "ISO-8859-9" },
340	{ "PCK", "SHIFT_JIS" },
341	{ "TIS620.2533", "TIS-620" },
342	/{ "UTF-8", "UTF-8" },/
343	{ "ansi-1251", "CP1251" },
344	{ "cns11643", "EUC-TW" },
345	{ "eucJP", "EUC-JP" },
346	{ "gb2312", "GB2312" },
347	{ "koi8-r", "KOI8-R" }
348	# define alias_table_defined
349	# endif
350	# if defined __minix /* Minix */
351	{ "646", "ASCII" }
352	# define alias_table_defined
353	# endif
354	# if defined WINDOWS_NATIVE \|\| defined __CYGWIN__ /* Windows */
355	{ "CP1361", "JOHAB" },
356	{ "CP20127", "ASCII" },
357	{ "CP20866", "KOI8-R" },
358	{ "CP20936", "GB2312" },
359	{ "CP21866", "KOI8-RU" },
360	{ "CP28591", "ISO-8859-1" },
361	{ "CP28592", "ISO-8859-2" },
362	{ "CP28593", "ISO-8859-3" },
363	{ "CP28594", "ISO-8859-4" },
364	{ "CP28595", "ISO-8859-5" },
365	{ "CP28596", "ISO-8859-6" },
366	{ "CP28597", "ISO-8859-7" },
367	{ "CP28598", "ISO-8859-8" },
368	{ "CP28599", "ISO-8859-9" },
369	{ "CP28605", "ISO-8859-15" },
370	{ "CP38598", "ISO-8859-8" },
371	{ "CP51932", "EUC-JP" },
372	{ "CP51936", "GB2312" },
373	{ "CP51949", "EUC-KR" },
374	{ "CP51950", "EUC-TW" },
375	{ "CP54936", "GB18030" },
376	{ "CP65001", "UTF-8" },
377	{ "CP936", "GBK" }
378	# define alias_table_defined
379	# endif
380	# if defined OS2 /* OS/2 */
381	/* The list of encodings is taken from "List of OS/2 Codepages"
382	by Alex Taylor:
383	<http://altsan.org/os2/toolkits/uls/index.html#codepages>.
384	See also "__convcp() of kLIBC":
385	<https://github.com/bitwiseworks/libc/blob/master/src/emx/src/lib/locale/__convcp.c>. */
386	{ "CP1004", "CP1252" },
387	/{ "CP1041", "CP943" },/
388	/{ "CP1088", "CP949" },/
389	{ "CP1089", "ISO-8859-6" },
390	/{ "CP1114", "CP950" },/
391	/{ "CP1115", "GB2312" },/
392	{ "CP1208", "UTF-8" },
393	/{ "CP1380", "GB2312" },/
394	{ "CP1381", "GB2312" },
395	{ "CP1383", "GB2312" },
396	{ "CP1386", "GBK" },
397	/{ "CP301", "CP943" },/
398	{ "CP3372", "EUC-JP" },
399	{ "CP4946", "CP850" },
400	/{ "CP5048", "JIS_X0208-1990" },/
401	/{ "CP5049", "JIS_X0212-1990" },/
402	/{ "CP5067", "KS_C_5601-1987" },/
403	{ "CP813", "ISO-8859-7" },
404	{ "CP819", "ISO-8859-1" },
405	{ "CP878", "KOI8-R" },
406	/{ "CP897", "CP943" },/
407	{ "CP912", "ISO-8859-2" },
408	{ "CP913", "ISO-8859-3" },
409	{ "CP914", "ISO-8859-4" },
410	{ "CP915", "ISO-8859-5" },
411	{ "CP916", "ISO-8859-8" },
412	{ "CP920", "ISO-8859-9" },
413	{ "CP921", "ISO-8859-13" },
414	{ "CP923", "ISO-8859-15" },
415	/{ "CP941", "CP943" },/
416	/{ "CP947", "CP950" },/
417	/{ "CP951", "CP949" },/
418	/{ "CP952", "JIS_X0208-1990" },/
419	/{ "CP953", "JIS_X0212-1990" },/
420	{ "CP954", "EUC-JP" },
421	{ "CP964", "EUC-TW" },
422	{ "CP970", "EUC-KR" },
423	/{ "CP971", "KS_C_5601-1987" },/
424	{ "IBM-1004", "CP1252" },
425	/{ "IBM-1006", "?" },/
426	/{ "IBM-1008", "?" },/
427	/{ "IBM-1041", "CP943" },/
428	/{ "IBM-1051", "?" },/
429	/{ "IBM-1088", "CP949" },/
430	{ "IBM-1089", "ISO-8859-6" },
431	/{ "IBM-1098", "?" },/
432	/{ "IBM-1114", "CP950" },/
433	/{ "IBM-1115", "GB2312" },/
434	/{ "IBM-1116", "?" },/
435	/{ "IBM-1117", "?" },/
436	/{ "IBM-1118", "?" },/
437	/{ "IBM-1119", "?" },/
438	{ "IBM-1124", "CP1124" },
439	{ "IBM-1125", "CP1125" },
440	{ "IBM-1131", "CP1131" },
441	{ "IBM-1208", "UTF-8" },
442	{ "IBM-1250", "CP1250" },
443	{ "IBM-1251", "CP1251" },
444	{ "IBM-1252", "CP1252" },
445	{ "IBM-1253", "CP1253" },
446	{ "IBM-1254", "CP1254" },
447	{ "IBM-1255", "CP1255" },
448	{ "IBM-1256", "CP1256" },
449	{ "IBM-1257", "CP1257" },
450	/{ "IBM-1275", "?" },/
451	/{ "IBM-1276", "?" },/
452	/{ "IBM-1277", "?" },/
453	/{ "IBM-1280", "?" },/
454	/{ "IBM-1281", "?" },/
455	/{ "IBM-1282", "?" },/
456	/{ "IBM-1283", "?" },/
457	/{ "IBM-1380", "GB2312" },/
458	{ "IBM-1381", "GB2312" },
459	{ "IBM-1383", "GB2312" },
460	{ "IBM-1386", "GBK" },
461	/{ "IBM-301", "CP943" },/
462	{ "IBM-3372", "EUC-JP" },
463	{ "IBM-367", "ASCII" },
464	{ "IBM-437", "CP437" },
465	{ "IBM-4946", "CP850" },
466	/{ "IBM-5048", "JIS_X0208-1990" },/
467	/{ "IBM-5049", "JIS_X0212-1990" },/
468	/{ "IBM-5067", "KS_C_5601-1987" },/
469	{ "IBM-813", "ISO-8859-7" },
470	{ "IBM-819", "ISO-8859-1" },
471	{ "IBM-850", "CP850" },
472	/{ "IBM-851", "?" },/
473	{ "IBM-852", "CP852" },
474	{ "IBM-855", "CP855" },
475	{ "IBM-856", "CP856" },
476	{ "IBM-857", "CP857" },
477	/{ "IBM-859", "?" },/
478	{ "IBM-860", "CP860" },
479	{ "IBM-861", "CP861" },
480	{ "IBM-862", "CP862" },
481	{ "IBM-863", "CP863" },
482	{ "IBM-864", "CP864" },
483	{ "IBM-865", "CP865" },
484	{ "IBM-866", "CP866" },
485	/{ "IBM-868", "?" },/
486	{ "IBM-869", "CP869" },
487	{ "IBM-874", "CP874" },
488	{ "IBM-878", "KOI8-R" },
489	/{ "IBM-895", "?" },/
490	/{ "IBM-897", "CP943" },/
491	/{ "IBM-907", "?" },/
492	/{ "IBM-909", "?" },/
493	{ "IBM-912", "ISO-8859-2" },
494	{ "IBM-913", "ISO-8859-3" },
495	{ "IBM-914", "ISO-8859-4" },
496	{ "IBM-915", "ISO-8859-5" },
497	{ "IBM-916", "ISO-8859-8" },
498	{ "IBM-920", "ISO-8859-9" },
499	{ "IBM-921", "ISO-8859-13" },
500	{ "IBM-922", "CP922" },
501	{ "IBM-923", "ISO-8859-15" },
502	{ "IBM-932", "CP932" },
503	/{ "IBM-941", "CP943" },/
504	/{ "IBM-942", "?" },/
505	{ "IBM-943", "CP943" },
506	/{ "IBM-947", "CP950" },/
507	{ "IBM-949", "CP949" },
508	{ "IBM-950", "CP950" },
509	/{ "IBM-951", "CP949" },/
510	/{ "IBM-952", "JIS_X0208-1990" },/
511	/{ "IBM-953", "JIS_X0212-1990" },/
512	{ "IBM-954", "EUC-JP" },
513	/{ "IBM-955", "?" },/
514	{ "IBM-964", "EUC-TW" },
515	{ "IBM-970", "EUC-KR" },
516	/{ "IBM-971", "KS_C_5601-1987" },/
517	{ "IBM-eucCN", "GB2312" },
518	{ "IBM-eucJP", "EUC-JP" },
519	{ "IBM-eucKR", "EUC-KR" },
520	{ "IBM-eucTW", "EUC-TW" },
521	{ "IBM33722", "EUC-JP" },
522	{ "ISO8859-1", "ISO-8859-1" },
523	{ "ISO8859-2", "ISO-8859-2" },
524	{ "ISO8859-3", "ISO-8859-3" },
525	{ "ISO8859-4", "ISO-8859-4" },
526	{ "ISO8859-5", "ISO-8859-5" },
527	{ "ISO8859-6", "ISO-8859-6" },
528	{ "ISO8859-7", "ISO-8859-7" },
529	{ "ISO8859-8", "ISO-8859-8" },
530	{ "ISO8859-9", "ISO-8859-9" },
531	/{ "JISX0201-1976", "JISX0201-1976" },/
532	/{ "JISX0208-1978", "?" },/
533	/{ "JISX0208-1983", "JIS_X0208-1983" },/
534	/{ "JISX0208-1990", "JIS_X0208-1990" },/
535	/{ "JISX0212-1990", "JIS_X0212-1990" },/
536	/{ "KSC5601-1987", "KS_C_5601-1987" },/
537	{ "SJIS-1", "CP943" },
538	{ "SJIS-2", "CP943" },
539	{ "eucJP", "EUC-JP" },
540	{ "eucKR", "EUC-KR" },
541	{ "eucTW-1993", "EUC-TW" }
542	# define alias_table_defined
543	# endif
544	# if defined VMS /* OpenVMS */
545	/* The list of encodings is taken from the OpenVMS 7.3-1 documentation
546	"Compaq C Run-Time Library Reference Manual for OpenVMS systems"
547	section 10.7 "Handling Different Character Sets". */
548	{ "DECHANYU", "DEC-HANYU" },
549	{ "DECHANZI", "GB2312" },
550	{ "DECKANJI", "DEC-KANJI" },
551	{ "DECKOREAN", "EUC-KR" },
552	{ "ISO8859-1", "ISO-8859-1" },
553	{ "ISO8859-2", "ISO-8859-2" },
554	{ "ISO8859-5", "ISO-8859-5" },
555	{ "ISO8859-7", "ISO-8859-7" },
556	{ "ISO8859-8", "ISO-8859-8" },
557	{ "ISO8859-9", "ISO-8859-9" },
558	{ "SDECKANJI", "EUC-JP" },
559	{ "SJIS", "SHIFT_JIS" },
560	{ "eucJP", "EUC-JP" },
561	{ "eucTW", "EUC-TW" }
562	# define alias_table_defined
563	# endif
564	# ifndef alias_table_defined
565	/* Just a dummy entry, to avoid a C syntax error. */
566	{ "", "" }
567	# endif
568	};
569
570	# endif
571
572	#else
573
574	/* On these platforms, we use a mapping from locale name to GNU canonical
575	encoding name. */
576
577	struct table_entry
578	{
579	const char locale[17+1];
580	const char canonical[11+1];
581	};
582
583	/* Table of platform-dependent mappings, sorted in ascending order. */
584	static const struct table_entry locale_table[] =
585	{
586	# if defined __FreeBSD__ /* FreeBSD 4.2 */
587	{ "cs_CZ.ISO_8859-2", "ISO-8859-2" },
588	{ "da_DK.DIS_8859-15", "ISO-8859-15" },
589	{ "da_DK.ISO_8859-1", "ISO-8859-1" },
590	{ "de_AT.DIS_8859-15", "ISO-8859-15" },
591	{ "de_AT.ISO_8859-1", "ISO-8859-1" },
592	{ "de_CH.DIS_8859-15", "ISO-8859-15" },
593	{ "de_CH.ISO_8859-1", "ISO-8859-1" },
594	{ "de_DE.DIS_8859-15", "ISO-8859-15" },
595	{ "de_DE.ISO_8859-1", "ISO-8859-1" },
596	{ "en_AU.DIS_8859-15", "ISO-8859-15" },
597	{ "en_AU.ISO_8859-1", "ISO-8859-1" },
598	{ "en_CA.DIS_8859-15", "ISO-8859-15" },
599	{ "en_CA.ISO_8859-1", "ISO-8859-1" },
600	{ "en_GB.DIS_8859-15", "ISO-8859-15" },
601	{ "en_GB.ISO_8859-1", "ISO-8859-1" },
602	{ "en_US.DIS_8859-15", "ISO-8859-15" },
603	{ "en_US.ISO_8859-1", "ISO-8859-1" },
604	{ "es_ES.DIS_8859-15", "ISO-8859-15" },
605	{ "es_ES.ISO_8859-1", "ISO-8859-1" },
606	{ "fi_FI.DIS_8859-15", "ISO-8859-15" },
607	{ "fi_FI.ISO_8859-1", "ISO-8859-1" },
608	{ "fr_BE.DIS_8859-15", "ISO-8859-15" },
609	{ "fr_BE.ISO_8859-1", "ISO-8859-1" },
610	{ "fr_CA.DIS_8859-15", "ISO-8859-15" },
611	{ "fr_CA.ISO_8859-1", "ISO-8859-1" },
612	{ "fr_CH.DIS_8859-15", "ISO-8859-15" },
613	{ "fr_CH.ISO_8859-1", "ISO-8859-1" },
614	{ "fr_FR.DIS_8859-15", "ISO-8859-15" },
615	{ "fr_FR.ISO_8859-1", "ISO-8859-1" },
616	{ "hr_HR.ISO_8859-2", "ISO-8859-2" },
617	{ "hu_HU.ISO_8859-2", "ISO-8859-2" },
618	{ "is_IS.DIS_8859-15", "ISO-8859-15" },
619	{ "is_IS.ISO_8859-1", "ISO-8859-1" },
620	{ "it_CH.DIS_8859-15", "ISO-8859-15" },
621	{ "it_CH.ISO_8859-1", "ISO-8859-1" },
622	{ "it_IT.DIS_8859-15", "ISO-8859-15" },
623	{ "it_IT.ISO_8859-1", "ISO-8859-1" },
624	{ "ja_JP.EUC", "EUC-JP" },
625	{ "ja_JP.SJIS", "SHIFT_JIS" },
626	{ "ja_JP.Shift_JIS", "SHIFT_JIS" },
627	{ "ko_KR.EUC", "EUC-KR" },
628	{ "la_LN.ASCII", "ASCII" },
629	{ "la_LN.DIS_8859-15", "ISO-8859-15" },
630	{ "la_LN.ISO_8859-1", "ISO-8859-1" },
631	{ "la_LN.ISO_8859-2", "ISO-8859-2" },
632	{ "la_LN.ISO_8859-4", "ISO-8859-4" },
633	{ "lt_LN.ASCII", "ASCII" },
634	{ "lt_LN.DIS_8859-15", "ISO-8859-15" },
635	{ "lt_LN.ISO_8859-1", "ISO-8859-1" },
636	{ "lt_LN.ISO_8859-2", "ISO-8859-2" },
637	{ "lt_LT.ISO_8859-4", "ISO-8859-4" },
638	{ "nl_BE.DIS_8859-15", "ISO-8859-15" },
639	{ "nl_BE.ISO_8859-1", "ISO-8859-1" },
640	{ "nl_NL.DIS_8859-15", "ISO-8859-15" },
641	{ "nl_NL.ISO_8859-1", "ISO-8859-1" },
642	{ "no_NO.DIS_8859-15", "ISO-8859-15" },
643	{ "no_NO.ISO_8859-1", "ISO-8859-1" },
644	{ "pl_PL.ISO_8859-2", "ISO-8859-2" },
645	{ "pt_PT.DIS_8859-15", "ISO-8859-15" },
646	{ "pt_PT.ISO_8859-1", "ISO-8859-1" },
647	{ "ru_RU.CP866", "CP866" },
648	{ "ru_RU.ISO_8859-5", "ISO-8859-5" },
649	{ "ru_RU.KOI8-R", "KOI8-R" },
650	{ "ru_SU.CP866", "CP866" },
651	{ "ru_SU.ISO_8859-5", "ISO-8859-5" },
652	{ "ru_SU.KOI8-R", "KOI8-R" },
653	{ "sl_SI.ISO_8859-2", "ISO-8859-2" },
654	{ "sv_SE.DIS_8859-15", "ISO-8859-15" },
655	{ "sv_SE.ISO_8859-1", "ISO-8859-1" },
656	{ "uk_UA.KOI8-U", "KOI8-U" },
657	{ "zh_CN.EUC", "GB2312" },
658	{ "zh_TW.BIG5", "BIG5" },
659	{ "zh_TW.Big5", "BIG5" }
660	# define locale_table_defined
661	# endif
662	# if defined __DJGPP__ /* DOS / DJGPP 2.03 */
663	/* The encodings given here may not all be correct.
664	If you find that the encoding given for your language and
665	country is not the one your DOS machine actually uses, just
666	correct it in this file, and send a mail to
667	Juan Manuel Guerrero <juan.guerrero@gmx.de>
668	and <bug-gnulib@gnu.org>. */
669	{ "C", "ASCII" },
670	{ "ar", "CP864" },
671	{ "ar_AE", "CP864" },
672	{ "ar_DZ", "CP864" },
673	{ "ar_EG", "CP864" },
674	{ "ar_IQ", "CP864" },
675	{ "ar_IR", "CP864" },
676	{ "ar_JO", "CP864" },
677	{ "ar_KW", "CP864" },
678	{ "ar_MA", "CP864" },
679	{ "ar_OM", "CP864" },
680	{ "ar_QA", "CP864" },
681	{ "ar_SA", "CP864" },
682	{ "ar_SY", "CP864" },
683	{ "be", "CP866" },
684	{ "be_BE", "CP866" },
685	{ "bg", "CP866" }, /* not CP855 ?? */
686	{ "bg_BG", "CP866" }, /* not CP855 ?? */
687	{ "ca", "CP850" },
688	{ "ca_ES", "CP850" },
689	{ "cs", "CP852" },
690	{ "cs_CZ", "CP852" },
691	{ "da", "CP865" }, /* not CP850 ?? */
692	{ "da_DK", "CP865" }, /* not CP850 ?? */
693	{ "de", "CP850" },
694	{ "de_AT", "CP850" },
695	{ "de_CH", "CP850" },
696	{ "de_DE", "CP850" },
697	{ "el", "CP869" },
698	{ "el_GR", "CP869" },
699	{ "en", "CP850" },
700	{ "en_AU", "CP850" }, /* not CP437 ?? */
701	{ "en_CA", "CP850" },
702	{ "en_GB", "CP850" },
703	{ "en_NZ", "CP437" },
704	{ "en_US", "CP437" },
705	{ "en_ZA", "CP850" }, /* not CP437 ?? */
706	{ "eo", "CP850" },
707	{ "eo_EO", "CP850" },
708	{ "es", "CP850" },
709	{ "es_AR", "CP850" },
710	{ "es_BO", "CP850" },
711	{ "es_CL", "CP850" },
712	{ "es_CO", "CP850" },
713	{ "es_CR", "CP850" },
714	{ "es_CU", "CP850" },
715	{ "es_DO", "CP850" },
716	{ "es_EC", "CP850" },
717	{ "es_ES", "CP850" },
718	{ "es_GT", "CP850" },
719	{ "es_HN", "CP850" },
720	{ "es_MX", "CP850" },
721	{ "es_NI", "CP850" },
722	{ "es_PA", "CP850" },
723	{ "es_PE", "CP850" },
724	{ "es_PY", "CP850" },
725	{ "es_SV", "CP850" },
726	{ "es_UY", "CP850" },
727	{ "es_VE", "CP850" },
728	{ "et", "CP850" },
729	{ "et_EE", "CP850" },
730	{ "eu", "CP850" },
731	{ "eu_ES", "CP850" },
732	{ "fi", "CP850" },
733	{ "fi_FI", "CP850" },
734	{ "fr", "CP850" },
735	{ "fr_BE", "CP850" },
736	{ "fr_CA", "CP850" },
737	{ "fr_CH", "CP850" },
738	{ "fr_FR", "CP850" },
739	{ "ga", "CP850" },
740	{ "ga_IE", "CP850" },
741	{ "gd", "CP850" },
742	{ "gd_GB", "CP850" },
743	{ "gl", "CP850" },
744	{ "gl_ES", "CP850" },
745	{ "he", "CP862" },
746	{ "he_IL", "CP862" },
747	{ "hr", "CP852" },
748	{ "hr_HR", "CP852" },
749	{ "hu", "CP852" },
750	{ "hu_HU", "CP852" },
751	{ "id", "CP850" }, /* not CP437 ?? */
752	{ "id_ID", "CP850" }, /* not CP437 ?? */
753	{ "is", "CP861" }, /* not CP850 ?? */
754	{ "is_IS", "CP861" }, /* not CP850 ?? */
755	{ "it", "CP850" },
756	{ "it_CH", "CP850" },
757	{ "it_IT", "CP850" },
758	{ "ja", "CP932" },
759	{ "ja_JP", "CP932" },
760	{ "kr", "CP949" }, /* not CP934 ?? */
761	{ "kr_KR", "CP949" }, /* not CP934 ?? */
762	{ "lt", "CP775" },
763	{ "lt_LT", "CP775" },
764	{ "lv", "CP775" },
765	{ "lv_LV", "CP775" },
766	{ "mk", "CP866" }, /* not CP855 ?? */
767	{ "mk_MK", "CP866" }, /* not CP855 ?? */
768	{ "mt", "CP850" },
769	{ "mt_MT", "CP850" },
770	{ "nb", "CP865" }, /* not CP850 ?? */
771	{ "nb_NO", "CP865" }, /* not CP850 ?? */
772	{ "nl", "CP850" },
773	{ "nl_BE", "CP850" },
774	{ "nl_NL", "CP850" },
775	{ "nn", "CP865" }, /* not CP850 ?? */
776	{ "nn_NO", "CP865" }, /* not CP850 ?? */
777	{ "no", "CP865" }, /* not CP850 ?? */
778	{ "no_NO", "CP865" }, /* not CP850 ?? */
779	{ "pl", "CP852" },
780	{ "pl_PL", "CP852" },
781	{ "pt", "CP850" },
782	{ "pt_BR", "CP850" },
783	{ "pt_PT", "CP850" },
784	{ "ro", "CP852" },
785	{ "ro_RO", "CP852" },
786	{ "ru", "CP866" },
787	{ "ru_RU", "CP866" },
788	{ "sk", "CP852" },
789	{ "sk_SK", "CP852" },
790	{ "sl", "CP852" },
791	{ "sl_SI", "CP852" },
792	{ "sq", "CP852" },
793	{ "sq_AL", "CP852" },
794	{ "sr", "CP852" }, /* CP852 or CP866 or CP855 ?? */
795	{ "sr_CS", "CP852" }, /* CP852 or CP866 or CP855 ?? */
796	{ "sr_YU", "CP852" }, /* CP852 or CP866 or CP855 ?? */
797	{ "sv", "CP850" },
798	{ "sv_SE", "CP850" },
799	{ "th", "CP874" },
800	{ "th_TH", "CP874" },
801	{ "tr", "CP857" },
802	{ "tr_TR", "CP857" },
803	{ "uk", "CP1125" },
804	{ "uk_UA", "CP1125" },
805	{ "zh_CN", "GBK" },
806	{ "zh_TW", "CP950" } /* not CP938 ?? */
807	# define locale_table_defined
808	# endif
809	# ifndef locale_table_defined
810	/* Just a dummy entry, to avoid a C syntax error. */
811	{ "", "" }
812	# endif
813	};
814
815	#endif
816
817
818	/* Determine the current locale's character encoding, and canonicalize it
819	into one of the canonical names listed below.
820	The result must not be freed; it is statically allocated. The result
821	becomes invalid when setlocale() is used to change the global locale, or
822	when the value of one of the environment variables LC_ALL, LC_CTYPE, LANG
823	is changed; threads in multithreaded programs should not do this.
824	If the canonical name cannot be determined, the result is a non-canonical
825	name. */
826
827	#ifdef STATIC
828	STATIC
829	#endif
830	const char *
831	locale_charset (void)
832	{
833	const char *codeset;
834
835	/* This function must be multithread-safe. To achieve this without using
836	thread-local storage, we use a simple strcpy or memcpy to fill this static
837	buffer. Filling it through, for example, strcpy + strcat would not be
838	guaranteed to leave the buffer's contents intact if another thread is
839	currently accessing it. If necessary, the contents is first assembled in
840	a stack-allocated buffer. */
841
842	#if HAVE_LANGINFO_CODESET \|\| defined WINDOWS_NATIVE \|\| defined OS2
843
844	# if HAVE_LANGINFO_CODESET
845
846	/* Most systems support nl_langinfo (CODESET) nowadays. */
847	codeset = nl_langinfo (CODESET);
848
849	# ifdef __CYGWIN__
850	/* Cygwin < 1.7 does not have locales. nl_langinfo (CODESET) always
851	returns "US-ASCII". Return the suffix of the locale name from the
852	environment variables (if present) or the codepage as a number. */
853	if (codeset != NULL && strcmp (codeset, "US-ASCII") == 0)
854	{
855	const char *locale;
856	static char resultbuf[2 + 10 + 1];
857
858	locale = getenv ("LC_ALL");
859	if (locale == NULL \|\| locale[0] == '\0')
860	{
861	locale = getenv ("LC_CTYPE");
862	if (locale == NULL \|\| locale[0] == '\0')
863	locale = getenv ("LANG");
864	}
865	if (locale != NULL && locale[0] != '\0')
866	{
867	/* If the locale name contains an encoding after the dot, return
868	it. */
869	const char *dot = strchr (locale, '.');
870
871	if (dot != NULL)
872	{
873	const char *modifier;
874
875	dot++;
876	/* Look for the possible @... trailer and remove it, if any. */
877	modifier = strchr (dot, '@');
878	if (modifier == NULL)
879	return dot;
880	if (modifier - dot < sizeof (resultbuf))
881	{
882	/* This way of filling resultbuf is multithread-safe. */
883	memcpy (resultbuf, dot, modifier - dot);
884	resultbuf [modifier - dot] = '\0';
885	return resultbuf;
886	}
887	}
888	}
889
890	/* The Windows API has a function returning the locale's codepage as a
891	number: GetACP(). This encoding is used by Cygwin, unless the user
892	has set the environment variable CYGWIN=codepage:oem (which very few
893	people do).
894	Output directed to console windows needs to be converted (to
895	GetOEMCP() if the console is using a raster font, or to
896	GetConsoleOutputCP() if it is using a TrueType font). Cygwin does
897	this conversion transparently (see winsup/cygwin/fhandler_console.cc),
898	converting to GetConsoleOutputCP(). This leads to correct results,
899	except when SetConsoleOutputCP has been called and a raster font is
900	in use. */
901	{
902	char buf[2 + 10 + 1];
903
904	sprintf (buf, "CP%u", GetACP ());
905	strcpy (resultbuf, buf);
906	codeset = resultbuf;
907	}
908	}
909	# endif
910
911	if (codeset == NULL)
912	/* The canonical name cannot be determined. */
913	codeset = "";
914
915	# elif defined WINDOWS_NATIVE
916
917	char buf[2 + 10 + 1];
918	static char resultbuf[2 + 10 + 1];
919
920	/* The Windows API has a function returning the locale's codepage as
921	a number, but the value doesn't change according to what the
922	'setlocale' call specified. So we use it as a last resort, in
923	case the string returned by 'setlocale' doesn't specify the
924	codepage. */
925	char *current_locale = setlocale (LC_CTYPE, NULL);
926	char *pdot = strrchr (current_locale, '.');
927
928	if (pdot && 2 + strlen (pdot + 1) + 1 <= sizeof (buf))
929	sprintf (buf, "CP%s", pdot + 1);
930	else
931	{
932	/* The Windows API has a function returning the locale's codepage as a
933	number: GetACP().
934	When the output goes to a console window, it needs to be provided in
935	GetOEMCP() encoding if the console is using a raster font, or in
936	GetConsoleOutputCP() encoding if it is using a TrueType font.
937	But in GUI programs and for output sent to files and pipes, GetACP()
938	encoding is the best bet. */
939	sprintf (buf, "CP%u", GetACP ());
940	}
941	/* For a locale name such as "French_France.65001", in Windows 10,
942	setlocale now returns "French_France.utf8" instead. */
943	if (strcmp (buf + 2, "65001") == 0 \|\| strcmp (buf + 2, "utf8") == 0)
944	codeset = "UTF-8";
945	else
946	{
947	strcpy (resultbuf, buf);
948	codeset = resultbuf;
949	}
950
951	# elif defined OS2
952
953	const char *locale;
954	static char resultbuf[2 + 10 + 1];
955	ULONG cp[3];
956	ULONG cplen;
957
958	codeset = NULL;
959
960	/* Allow user to override the codeset, as set in the operating system,
961	with standard language environment variables. */
962	locale = getenv ("LC_ALL");
963	if (locale == NULL \|\| locale[0] == '\0')
964	{
965	locale = getenv ("LC_CTYPE");
966	if (locale == NULL \|\| locale[0] == '\0')
967	locale = getenv ("LANG");
968	}
969	if (locale != NULL && locale[0] != '\0')
970	{
971	/* If the locale name contains an encoding after the dot, return it. */
972	const char *dot = strchr (locale, '.');
973
974	if (dot != NULL)
975	{
976	const char *modifier;
977
978	dot++;
979	/* Look for the possible @... trailer and remove it, if any. */
980	modifier = strchr (dot, '@');
981	if (modifier == NULL)
982	return dot;
983	if (modifier - dot < sizeof (resultbuf))
984	{
985	/* This way of filling resultbuf is multithread-safe. */
986	memcpy (resultbuf, dot, modifier - dot);
987	resultbuf [modifier - dot] = '\0';
988	return resultbuf;
989	}
990	}
991
992	/* For the POSIX locale, don't use the system's codepage. */
993	if (strcmp (locale, "C") == 0 \|\| strcmp (locale, "POSIX") == 0)
994	codeset = "";
995	}
996
997	if (codeset == NULL)
998	{
999	/* OS/2 has a function returning the locale's codepage as a number. */
1000	if (DosQueryCp (sizeof (cp), cp, &cplen))
1001	codeset = "";
1002	else
1003	{
1004	char buf[2 + 10 + 1];
1005
1006	sprintf (buf, "CP%u", cp[0]);
1007	strcpy (resultbuf, buf);
1008	codeset = resultbuf;
1009	}
1010	}
1011
1012	# else
1013
1014	# error "Add code for other platforms here."
1015
1016	# endif
1017
1018	/* Resolve alias. */
1019	{
1020	# ifdef alias_table_defined
1021	/* On some platforms, UTF-8 locales are the most frequently used ones.
1022	Speed up the common case and slow down the less common cases by
1023	testing for this case first. */
1024	# if defined __OpenBSD__ \|\| (defined __APPLE__ && defined __MACH__) \|\| defined __sun \|\| defined __CYGWIN__
1025	if (strcmp (codeset, "UTF-8") == 0)
1026	goto done_table_lookup;
1027	else
1028	# endif
1029	{
1030	const struct table_entry * const table = alias_table;
1031	size_t const table_size =
1032	sizeof (alias_table) / sizeof (struct table_entry);
1033	/* The table is sorted. Perform a binary search. */
1034	size_t hi = table_size;
1035	size_t lo = 0;
1036	while (lo < hi)
1037	{
1038	/* Invariant:
1039	for i < lo, strcmp (table[i].alias, codeset) < 0,
1040	for i >= hi, strcmp (table[i].alias, codeset) > 0. */
1041	size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1042	int cmp = strcmp (table[mid].alias, codeset);
1043	if (cmp < 0)
1044	lo = mid + 1;
1045	else if (cmp > 0)
1046	hi = mid;
1047	else
1048	{
1049	/* Found an i with
1050	strcmp (table[i].alias, codeset) == 0. */
1051	codeset = table[mid].canonical;
1052	goto done_table_lookup;
1053	}
1054	}
1055	}
1056	if (0)
1057	done_table_lookup: ;
1058	else
1059	# endif
1060	{
1061	/* Did not find it in the table. */
1062	/* On Mac OS X, all modern locales use the UTF-8 encoding.
1063	BeOS and Haiku have a single locale, and it has UTF-8 encoding. */
1064	# if (defined __APPLE__ && defined __MACH__) \|\| defined __BEOS__ \|\| defined __HAIKU__
1065	codeset = "UTF-8";
1066	# else
1067	/* Don't return an empty string. GNU libc and GNU libiconv interpret
1068	the empty string as denoting "the locale's character encoding",
1069	thus GNU libiconv would call this function a second time. */
1070	if (codeset[0] == '\0')
1071	codeset = "ASCII";
1072	# endif
1073	}
1074	}
1075
1076	#else
1077
1078	/* On old systems which lack it, use setlocale or getenv. */
1079	const char *locale = NULL;
1080
1081	/* But most old systems don't have a complete set of locales. Some
1082	(like DJGPP) have only the C locale. Therefore we don't use setlocale
1083	here; it would return "C" when it doesn't support the locale name the
1084	user has set. */
1085	# if 0
1086	locale = setlocale (LC_CTYPE, NULL);
1087	# endif
1088	if (locale == NULL \|\| locale[0] == '\0')
1089	{
1090	locale = getenv ("LC_ALL");
1091	if (locale == NULL \|\| locale[0] == '\0')
1092	{
1093	locale = getenv ("LC_CTYPE");
1094	if (locale == NULL \|\| locale[0] == '\0')
1095	locale = getenv ("LANG");
1096	if (locale == NULL)
1097	locale = "";
1098	}
1099	}
1100
1101	/* Map locale name to canonical encoding name. */
1102	{
1103	# ifdef locale_table_defined
1104	const struct table_entry * const table = locale_table;
1105	size_t const table_size =
1106	sizeof (locale_table) / sizeof (struct table_entry);
1107	/* The table is sorted. Perform a binary search. */
1108	size_t hi = table_size;
1109	size_t lo = 0;
1110	while (lo < hi)
1111	{
1112	/* Invariant:
1113	for i < lo, strcmp (table[i].locale, locale) < 0,
1114	for i >= hi, strcmp (table[i].locale, locale) > 0. */
1115	size_t mid = (hi + lo) >> 1; /* >= lo, < hi */
1116	int cmp = strcmp (table[mid].locale, locale);
1117	if (cmp < 0)
1118	lo = mid + 1;
1119	else if (cmp > 0)
1120	hi = mid;
1121	else
1122	{
1123	/* Found an i with
1124	strcmp (table[i].locale, locale) == 0. */
1125	codeset = table[mid].canonical;
1126	goto done_table_lookup;
1127	}
1128	}
1129	if (0)
1130	done_table_lookup: ;
1131	else
1132	# endif
1133	{
1134	/* Did not find it in the table. */
1135	/* On Mac OS X, all modern locales use the UTF-8 encoding.
1136	BeOS and Haiku have a single locale, and it has UTF-8 encoding. */
1137	# if (defined __APPLE__ && defined __MACH__) \|\| defined __BEOS__ \|\| defined __HAIKU__
1138	codeset = "UTF-8";
1139	# else
1140	/* The canonical name cannot be determined. */
1141	/* Don't return an empty string. GNU libc and GNU libiconv interpret
1142	the empty string as denoting "the locale's character encoding",
1143	thus GNU libiconv would call this function a second time. */
1144	codeset = "ASCII";
1145	# endif
1146	}
1147	}
1148
1149	#endif
1150
1151	#ifdef DARWIN7
1152	/* Mac OS X sets MB_CUR_MAX to 1 when LC_ALL=C, and "UTF-8"
1153	(the default codeset) does not work when MB_CUR_MAX is 1. */
1154	if (strcmp (codeset, "UTF-8") == 0 && MB_CUR_MAX_L (uselocale (NULL)) <= 1)
1155	codeset = "ASCII";
1156	#endif
1157
1158	return codeset;
1159	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/src/sed/lib/localcharset.c@ 3619

Download in other formats: