Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

source: branches/libc-0.6/src/libctests/glibc/catgets/gencat.c

Visit:

Last change on this file was 2036, checked in by bird, 20 years ago
Initial revision
Property cvs2svn:cvs-rev set to `1.1` Property svn:eol-style set to `native` Property svn:executable set to ``* Property svn:keywords set to `Author Date Id Revision`
File size: 34.9 KB

Line
1	/* Copyright (C) 1996-2002, 2003, 2004, 2005 Free Software Foundation, Inc.
2	This file is part of the GNU C Library.
3	Contributed by Ulrich Drepper <drepper@redhat.com>, 1996.
4
5	The GNU C Library is free software; you can redistribute it and/or
6	modify it under the terms of the GNU Lesser General Public
7	License as published by the Free Software Foundation; either
8	version 2.1 of the License, or (at your option) any later version.
9
10	The GNU C Library is distributed in the hope that it will be useful,
11	but WITHOUT ANY WARRANTY; without even the implied warranty of
12	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13	Lesser General Public License for more details.
14
15	You should have received a copy of the GNU Lesser General Public
16	License along with the GNU C Library; if not, write to the Free
17	Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
18	02111-1307 USA. */
19
20	#ifdef HAVE_CONFIG_H
21	# include <config.h>
22	#endif
23
24	#include <argp.h>
25	#include <assert.h>
26	#include <ctype.h>
27	#include <endian.h>
28	#include <errno.h>
29	#include <error.h>
30	#include <fcntl.h>
31	#include <iconv.h>
32	#include <langinfo.h>
33	#include <locale.h>
34	#include <libintl.h>
35	#include <limits.h>
36	#include <nl_types.h>
37	#include <obstack.h>
38	#include <stdint.h>
39	#include <stdio.h>
40	#include <stdlib.h>
41	#include <string.h>
42	#include <unistd.h>
43	#include <wchar.h>
44
45	#include "version.h"
46
47	#include "catgetsinfo.h"
48
49
50	#define SWAPU32(w) \
51	(((w) << 24) \| (((w) & 0xff00) << 8) \| (((w) >> 8) & 0xff00) \| ((w) >> 24))
52
53	struct message_list
54	{
55	int number;
56	const char *message;
57
58	const char *fname;
59	size_t line;
60	const char *symbol;
61
62	struct message_list *next;
63	};
64
65
66	struct set_list
67	{
68	int number;
69	int deleted;
70	struct message_list *messages;
71	int last_message;
72
73	const char *fname;
74	size_t line;
75	const char *symbol;
76
77	struct set_list *next;
78	};
79
80
81	struct catalog
82	{
83	struct set_list *all_sets;
84	struct set_list *current_set;
85	size_t total_messages;
86	wint_t quote_char;
87	int last_set;
88
89	struct obstack mem_pool;
90	};
91
92
93	/* If non-zero force creation of new file, not using existing one. */
94	static int force_new;
95
96	/* Name of output file. */
97	static const char *output_name;
98
99	/* Name of generated C header file. */
100	static const char *header_name;
101
102	/* Name and version of program. */
103	static void print_version (FILE stream, struct argp_state state);
104	void (argp_program_version_hook) (FILE , struct argp_state *) = print_version;
105
106	#define OPT_NEW 1
107
108	/* Definitions of arguments for argp functions. */
109	static const struct argp_option options[] =
110	{
111	{ "header", 'H', N_("NAME"), 0,
112	N_("Create C header file NAME containing symbol definitions") },
113	{ "new", OPT_NEW, NULL, 0,
114	N_("Do not use existing catalog, force new output file") },
115	{ "output", 'o', N_("NAME"), 0, N_("Write output to file NAME") },
116	{ NULL, 0, NULL, 0, NULL }
117	};
118
119	/* Short description of program. */
120	static const char doc[] = N_("Generate message catalog.\
121	\vIf INPUT-FILE is -, input is read from standard input. If OUTPUT-FILE\n\
122	is -, output is written to standard output.\n");
123
124	/* Strings for arguments in help texts. */
125	static const char args_doc[] = N_("\
126	-o OUTPUT-FILE [INPUT-FILE]...\n[OUTPUT-FILE [INPUT-FILE]...]");
127
128	/* Prototype for option handler. */
129	static error_t parse_opt (int key, char arg, struct argp_state state);
130
131	/* Function to print some extra text in the help message. */
132	static char more_help (int key, const char text, void *input);
133
134	/* Data structure to communicate with argp functions. */
135	static struct argp argp =
136	{
137	options, parse_opt, args_doc, doc, NULL, more_help
138	};
139
140
141	/* Wrapper functions with error checking for standard functions. */
142	extern void *xmalloc (size_t n);
143	extern void *xcalloc (size_t n, size_t s);
144	extern void xrealloc (void o, size_t n);
145	extern char xstrdup (const char );
146
147	/* Prototypes for local functions. */
148	static void error_print (void);
149	static struct catalog read_input_file (struct catalog current,
150	const char *fname);
151	static void write_out (struct catalog result, const char output_name,
152	const char *header_name);
153	static struct set_list find_set (struct catalog current, int number);
154	static void normalize_line (const char *fname, size_t line, iconv_t cd,
155	wchar_t *string, wchar_t quote_char,
156	wchar_t escape_char);
157	static void read_old (struct catalog catalog, const char file_name);
158	static int open_conversion (const char codesetp, iconv_t cd_towcp,
159	iconv_t cd_tombp, wchar_t escape_charp);
160
161
162	int
163	main (int argc, char *argv[])
164	{
165	struct catalog *result;
166	int remaining;
167
168	/* Set program name for messages. */
169	error_print_progname = error_print;
170
171	/* Set locale via LC_ALL. */
172	setlocale (LC_ALL, "");
173
174	/* Set the text message domain. */
175	textdomain (PACKAGE);
176
177	/* Initialize local variables. */
178	result = NULL;
179
180	/* Parse and process arguments. */
181	argp_parse (&argp, argc, argv, 0, &remaining, NULL);
182
183	/* Determine output file. */
184	if (output_name == NULL)
185	output_name = remaining < argc ? argv[remaining++] : "-";
186
187	/* Process all input files. */
188	setlocale (LC_CTYPE, "C");
189	if (remaining < argc)
190	do
191	result = read_input_file (result, argv[remaining]);
192	while (++remaining < argc);
193	else
194	result = read_input_file (NULL, "-");
195
196	/* Write out the result. */
197	if (result != NULL)
198	write_out (result, output_name, header_name);
199
200	return error_message_count != 0;
201	}
202
203
204	/* Handle program arguments. */
205	static error_t
206	parse_opt (int key, char arg, struct argp_state state)
207	{
208	switch (key)
209	{
210	case 'H':
211	header_name = arg;
212	break;
213	case OPT_NEW:
214	force_new = 1;
215	break;
216	case 'o':
217	output_name = arg;
218	break;
219	default:
220	return ARGP_ERR_UNKNOWN;
221	}
222	return 0;
223	}
224
225
226	static char *
227	more_help (int key, const char text, void input)
228	{
229	switch (key)
230	{
231	case ARGP_KEY_HELP_EXTRA:
232	/* We print some extra information. */
233	return strdup (gettext ("\
234	For bug reporting instructions, please see:\n\
235	<http://www.gnu.org/software/libc/bugs.html>.\n"));
236	default:
237	break;
238	}
239	return (char *) text;
240	}
241
242	/* Print the version information. */
243	static void
244	print_version (FILE stream, struct argp_state state)
245	{
246	fprintf (stream, "gencat (GNU %s) %s\n", PACKAGE, VERSION);
247	fprintf (stream, gettext ("\
248	Copyright (C) %s Free Software Foundation, Inc.\n\
249	This is free software; see the source for copying conditions. There is NO\n\
250	warranty; not even for MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.\n\
251	"), "2005");
252	fprintf (stream, gettext ("Written by %s.\n"), "Ulrich Drepper");
253	}
254
255
256	/* The address of this function will be assigned to the hook in the
257	error functions. */
258	static void
259	error_print ()
260	{
261	/* We don't want the program name to be printed in messages. Emacs'
262	compile.el does not like this. */
263	}
264
265
266	static struct catalog *
267	read_input_file (struct catalog current, const char fname)
268	{
269	FILE *fp;
270	char *buf;
271	size_t len;
272	size_t line_number;
273	wchar_t *wbuf;
274	size_t wbufsize;
275	iconv_t cd_towc = (iconv_t) -1;
276	iconv_t cd_tomb = (iconv_t) -1;
277	wchar_t escape_char = L'\\';
278	char *codeset = NULL;
279
280	if (strcmp (fname, "-") == 0 \|\| strcmp (fname, "/dev/stdin") == 0)
281	{
282	fp = stdin;
283	fname = gettext ("standard input");
284	}
285	else
286	fp = fopen (fname, "r");
287	if (fp == NULL)
288	{
289	error (0, errno, gettext ("cannot open input file `%s'"), fname);
290	return current;
291	}
292
293	/* If we haven't seen anything yet, allocate result structure. */
294	if (current == NULL)
295	{
296	current = (struct catalog ) xcalloc (1, sizeof (current));
297
298	#define obstack_chunk_alloc malloc
299	#define obstack_chunk_free free
300	obstack_init (&current->mem_pool);
301
302	current->current_set = find_set (current, NL_SETD);
303	}
304
305	buf = NULL;
306	len = 0;
307	line_number = 0;
308
309	wbufsize = 1024;
310	wbuf = (wchar_t *) xmalloc (wbufsize);
311
312	while (!feof (fp))
313	{
314	int continued;
315	int used;
316	size_t start_line = line_number + 1;
317	char *this_line;
318
319	do
320	{
321	int act_len;
322
323	act_len = getline (&buf, &len, fp);
324	if (act_len <= 0)
325	break;
326	++line_number;
327
328	/* It the line continued? */
329	continued = 0;
330	if (buf[act_len - 1] == '\n')
331	{
332	--act_len;
333
334	/* There might be more than one backslash at the end of
335	the line. Only if there is an odd number of them is
336	the line continued. */
337	if (act_len > 0 && buf[act_len - 1] == '\\')
338	{
339	int temp_act_len = act_len;
340
341	do
342	{
343	--temp_act_len;
344	continued = !continued;
345	}
346	while (temp_act_len > 0 && buf[temp_act_len - 1] == '\\');
347
348	if (continued)
349	--act_len;
350	}
351	}
352
353	/* Append to currently selected line. */
354	obstack_grow (&current->mem_pool, buf, act_len);
355	}
356	while (continued);
357
358	obstack_1grow (&current->mem_pool, '\0');
359	this_line = (char *) obstack_finish (&current->mem_pool);
360
361	used = 0;
362	if (this_line[0] == '$')
363	{
364	if (isblank (this_line[1]))
365	{
366	int cnt = 1;
367	while (isblank (this_line[cnt]))
368	++cnt;
369	if (strncmp (&this_line[cnt], "codeset=", 8) != 0)
370	/* This is a comment line. Do nothing. */;
371	else if (codeset != NULL)
372	/* Ignore multiple codeset. */;
373	else
374	{
375	int start = cnt + 8;
376	cnt = start;
377	while (this_line[cnt] != '\0' && !isspace (this_line[cnt]))
378	++cnt;
379	if (cnt != start)
380	{
381	int len = cnt - start;
382	codeset = xmalloc (len + 1);
383	((char ) mempcpy (codeset, &this_line[start], len))
384	= '\0';
385	}
386	}
387	}
388	else if (strncmp (&this_line[1], "set", 3) == 0)
389	{
390	int cnt = sizeof ("set");
391	int set_number;
392	const char *symbol = NULL;
393	while (isspace (this_line[cnt]))
394	++cnt;
395
396	if (isdigit (this_line[cnt]))
397	{
398	set_number = atol (&this_line[cnt]);
399
400	/* If the given number for the character set is
401	higher than any we used for symbolic set names
402	avoid clashing by using only higher numbers for
403	the following symbolic definitions. */
404	if (set_number > current->last_set)
405	current->last_set = set_number;
406	}
407	else
408	{
409	/* See whether it is a reasonable identifier. */
410	int start = cnt;
411	while (isalnum (this_line[cnt]) \|\| this_line[cnt] == '_')
412	++cnt;
413
414	if (cnt == start)
415	{
416	/* No correct character found. */
417	error_at_line (0, 0, fname, start_line,
418	gettext ("illegal set number"));
419	set_number = 0;
420	}
421	else
422	{
423	/* We have found seomthing that looks like a
424	correct identifier. */
425	struct set_list *runp;
426
427	this_line[cnt] = '\0';
428	used = 1;
429	symbol = &this_line[start];
430
431	/* Test whether the identifier was already used. */
432	runp = current->all_sets;
433	while (runp != 0)
434	if (runp->symbol != NULL
435	&& strcmp (runp->symbol, symbol) == 0)
436	break;
437	else
438	runp = runp->next;
439
440	if (runp != NULL)
441	{
442	/* We cannot allow duplicate identifiers for
443	message sets. */
444	error_at_line (0, 0, fname, start_line,
445	gettext ("duplicate set definition"));
446	error_at_line (0, 0, runp->fname, runp->line,
447	gettext ("\
448	this is the first definition"));
449	set_number = 0;
450	}
451	else
452	/* Allocate next free message set for identifier. */
453	set_number = ++current->last_set;
454	}
455	}
456
457	if (set_number != 0)
458	{
459	/* We found a legal set number. */
460	current->current_set = find_set (current, set_number);
461	if (symbol != NULL)
462	used = 1;
463	current->current_set->symbol = symbol;
464	current->current_set->fname = fname;
465	current->current_set->line = start_line;
466	}
467	}
468	else if (strncmp (&this_line[1], "delset", 6) == 0)
469	{
470	int cnt = sizeof ("delset");
471	size_t set_number;
472	while (isspace (this_line[cnt]))
473	++cnt;
474
475	if (isdigit (this_line[cnt]))
476	{
477	size_t set_number = atol (&this_line[cnt]);
478	struct set_list *set;
479
480	/* Mark the message set with the given number as
481	deleted. */
482	set = find_set (current, set_number);
483	set->deleted = 1;
484	}
485	else
486	{
487	/* See whether it is a reasonable identifier. */
488	int start = cnt;
489	while (isalnum (this_line[cnt]) \|\| this_line[cnt] == '_')
490	++cnt;
491
492	if (cnt == start)
493	{
494	error_at_line (0, 0, fname, start_line,
495	gettext ("illegal set number"));
496	set_number = 0;
497	}
498	else
499	{
500	const char *symbol;
501	struct set_list *runp;
502
503	this_line[cnt] = '\0';
504	used = 1;
505	symbol = &this_line[start];
506
507	/* We have a symbolic set name. This name must
508	appear somewhere else in the catalogs read so
509	far. */
510	set_number = 0;
511	for (runp = current->all_sets; runp != NULL;
512	runp = runp->next)
513	{
514	if (strcmp (runp->symbol, symbol) == 0)
515	{
516	runp->deleted = 1;
517	break;
518	}
519	}
520	if (runp == NULL)
521	/* Name does not exist before. */
522	error_at_line (0, 0, fname, start_line,
523	gettext ("unknown set `%s'"), symbol);
524	}
525	}
526	}
527	else if (strncmp (&this_line[1], "quote", 5) == 0)
528	{
529	char buf[2];
530	char *bufptr;
531	size_t buflen;
532	char *wbufptr;
533	size_t wbuflen;
534	int cnt;
535
536	cnt = sizeof ("quote");
537	while (isspace (this_line[cnt]))
538	++cnt;
539
540	/* We need the conversion. */
541	if (cd_towc == (iconv_t) -1
542	&& open_conversion (codeset, &cd_towc, &cd_tomb,
543	&escape_char) != 0)
544	/* Something is wrong. */
545	goto out;
546
547	/* Yes, the quote char can be '\0'; this means no quote
548	char. The function using the information works on
549	wide characters so we have to convert it here. */
550	buf[0] = this_line[cnt];
551	buf[1] = '\0';
552	bufptr = buf;
553	buflen = 2;
554
555	wbufptr = (char *) wbuf;
556	wbuflen = wbufsize;
557
558	/* Flush the state. */
559	iconv (cd_towc, NULL, NULL, NULL, NULL);
560
561	iconv (cd_towc, &bufptr, &buflen, &wbufptr, &wbuflen);
562	if (buflen != 0 \|\| (wchar_t *) wbufptr != &wbuf[2])
563	error_at_line (0, 0, fname, start_line,
564	gettext ("invalid quote character"));
565	else
566	/* Use the converted wide character. */
567	current->quote_char = wbuf[0];
568	}
569	else
570	{
571	int cnt;
572	cnt = 2;
573	while (this_line[cnt] != '\0' && !isspace (this_line[cnt]))
574	++cnt;
575	this_line[cnt] = '\0';
576	error_at_line (0, 0, fname, start_line,
577	gettext ("unknown directive `%s': line ignored"),
578	&this_line[1]);
579	}
580	}
581	else if (isalnum (this_line[0]) \|\| this_line[0] == '_')
582	{
583	const char *ident = this_line;
584	char *line = this_line;
585	int message_number;
586
587	do
588	++line;
589	while (line[0] != '\0' && !isspace (line[0]));
590	if (line[0] != '\0')
591	line++ = '\0'; / Terminate the identifier. */
592
593	/* Now we found the beginning of the message itself. */
594
595	if (isdigit (ident[0]))
596	{
597	struct message_list *runp;
598	struct message_list *lastp;
599
600	message_number = atoi (ident);
601
602	/* Find location to insert the new message. */
603	runp = current->current_set->messages;
604	lastp = NULL;
605	while (runp != NULL)
606	if (runp->number == message_number)
607	break;
608	else
609	{
610	lastp = runp;
611	runp = runp->next;
612	}
613	if (runp != NULL)
614	{
615	/* Oh, oh. There is already a message with this
616	number in the message set. */
617	if (runp->symbol == NULL)
618	{
619	/* The existing message had its number specified
620	by the user. Fatal collision type uh, oh. */
621	error_at_line (0, 0, fname, start_line,
622	gettext ("duplicated message number"));
623	error_at_line (0, 0, runp->fname, runp->line,
624	gettext ("this is the first definition"));
625	message_number = 0;
626	}
627	else
628	{
629	/* Collision was with number auto-assigned to a
630	symbolic. Change existing symbolic number
631	and move to end the list (if not already there). */
632	runp->number = ++current->current_set->last_message;
633
634	if (runp->next != NULL)
635	{
636	struct message_list *endp;
637
638	if (lastp == NULL)
639	current->current_set->messages=runp->next;
640	else
641	lastp->next=runp->next;
642
643	endp = runp->next;
644	while (endp->next != NULL)
645	endp = endp->next;
646
647	endp->next = runp;
648	runp->next = NULL;
649	}
650	}
651	}
652	ident = NULL; /* We don't have a symbol. */
653
654	if (message_number != 0
655	&& message_number > current->current_set->last_message)
656	current->current_set->last_message = message_number;
657	}
658	else if (ident[0] != '\0')
659	{
660	struct message_list *runp;
661	struct message_list *lastp;
662
663	/* Test whether the symbolic name was not used for
664	another message in this message set. */
665	runp = current->current_set->messages;
666	lastp = NULL;
667	while (runp != NULL)
668	if (runp->symbol != NULL && strcmp (ident, runp->symbol) == 0)
669	break;
670	else
671	runp = runp->next;
672	if (runp != NULL)
673	{
674	/* The name is already used. */
675	error_at_line (0, 0, fname, start_line, gettext ("\
676	duplicated message identifier"));
677	error_at_line (0, 0, runp->fname, runp->line,
678	gettext ("this is the first definition"));
679	message_number = 0;
680	}
681	else
682	/* Give the message the next unused number. */
683	message_number = ++current->current_set->last_message;
684	}
685	else
686	message_number = 0;
687
688	if (message_number != 0)
689	{
690	char *inbuf;
691	size_t inlen;
692	char *outbuf;
693	size_t outlen;
694	struct message_list *newp;
695	size_t line_len = strlen (line) + 1;
696	size_t ident_len = 0;
697
698	/* We need the conversion. */
699	if (cd_towc == (iconv_t) -1
700	&& open_conversion (codeset, &cd_towc, &cd_tomb,
701	&escape_char) != 0)
702	/* Something is wrong. */
703	goto out;
704
705	/* Convert to a wide character string. We have to
706	interpret escape sequences which will be impossible
707	without doing the conversion if the codeset of the
708	message is stateful. */
709	while (1)
710	{
711	inbuf = line;
712	inlen = line_len;
713	outbuf = (char *) wbuf;
714	outlen = wbufsize;
715
716	/* Flush the state. */
717	iconv (cd_towc, NULL, NULL, NULL, NULL);
718
719	iconv (cd_towc, &inbuf, &inlen, &outbuf, &outlen);
720	if (inlen == 0)
721	{
722	/* The string is converted. */
723	assert (outlen < wbufsize);
724	assert (wbuf[(wbufsize - outlen) / sizeof (wchar_t) - 1]
725	== L'\0');
726	break;
727	}
728
729	if (outlen != 0)
730	{
731	/* Something is wrong with this string, we ignore it. */
732	error_at_line (0, 0, fname, start_line, gettext ("\
733	invalid character: message ignored"));
734	goto ignore;
735	}
736
737	/* The output buffer is too small. */
738	wbufsize *= 2;
739	wbuf = (wchar_t *) xrealloc (wbuf, wbufsize);
740	}
741
742	/* Strip quote characters, change escape sequences into
743	correct characters etc. */
744	normalize_line (fname, start_line, cd_towc, wbuf,
745	current->quote_char, escape_char);
746
747	if (ident)
748	ident_len = line - this_line;
749
750	/* Now the string is free of escape sequences. Convert it
751	back into a multibyte character string. First free the
752	memory allocated for the original string. */
753	obstack_free (&current->mem_pool, this_line);
754
755	used = 1; /* Yes, we use the line. */
756
757	/* Now fill in the new string. It should never happen that
758	the replaced string is longer than the original. */
759	inbuf = (char *) wbuf;
760	inlen = (wcslen (wbuf) + 1) * sizeof (wchar_t);
761
762	outlen = obstack_room (&current->mem_pool);
763	obstack_blank (&current->mem_pool, outlen);
764	this_line = (char *) obstack_base (&current->mem_pool);
765	outbuf = this_line + ident_len;
766	outlen -= ident_len;
767
768	/* Flush the state. */
769	iconv (cd_tomb, NULL, NULL, NULL, NULL);
770
771	iconv (cd_tomb, &inbuf, &inlen, &outbuf, &outlen);
772	if (inlen != 0)
773	{
774	error_at_line (0, 0, fname, start_line,
775	gettext ("invalid line"));
776	goto ignore;
777	}
778	assert (outbuf[-1] == '\0');
779
780	/* Free the memory in the obstack we don't use. */
781	obstack_blank (&current->mem_pool, -(int) outlen);
782	line = obstack_finish (&current->mem_pool);
783
784	newp = (struct message_list ) xmalloc (sizeof (newp));
785	newp->number = message_number;
786	newp->message = line + ident_len;
787	/* Remember symbolic name; is NULL if no is given. */
788	newp->symbol = ident ? line : NULL;
789	/* Remember where we found the character. */
790	newp->fname = fname;
791	newp->line = start_line;
792
793	/* Find place to insert to message. We keep them in a
794	sorted single linked list. */
795	if (current->current_set->messages == NULL
796	\|\| current->current_set->messages->number > message_number)
797	{
798	newp->next = current->current_set->messages;
799	current->current_set->messages = newp;
800	}
801	else
802	{
803	struct message_list *runp;
804	runp = current->current_set->messages;
805	while (runp->next != NULL)
806	if (runp->next->number > message_number)
807	break;
808	else
809	runp = runp->next;
810	newp->next = runp->next;
811	runp->next = newp;
812	}
813	}
814	++current->total_messages;
815	}
816	else
817	{
818	size_t cnt;
819
820	cnt = 0;
821	/* See whether we have any non-white space character in this
822	line. */
823	while (this_line[cnt] != '\0' && isspace (this_line[cnt]))
824	++cnt;
825
826	if (this_line[cnt] != '\0')
827	/* Yes, some unknown characters found. */
828	error_at_line (0, 0, fname, start_line,
829	gettext ("malformed line ignored"));
830	}
831
832	ignore:
833	/* We can save the memory for the line if it was not used. */
834	if (!used)
835	obstack_free (&current->mem_pool, this_line);
836	}
837
838	/* Close the conversion modules. */
839	iconv_close (cd_towc);
840	iconv_close (cd_tomb);
841	free (codeset);
842
843	out:
844	free (wbuf);
845
846	if (fp != stdin)
847	fclose (fp);
848	return current;
849	}
850
851
852	static void
853	write_out (struct catalog catalog, const char output_name,
854	const char *header_name)
855	{
856	/* Computing the "optimal" size. */
857	struct set_list *set_run;
858	size_t best_total, best_size, best_depth;
859	size_t act_size, act_depth;
860	struct catalog_obj obj;
861	struct obstack string_pool;
862	const char *strings;
863	size_t strings_size;
864	uint32_t array1, array2;
865	size_t cnt;
866	int fd;
867
868	/* If not otherwise told try to read file with existing
869	translations. */
870	if (!force_new)
871	read_old (catalog, output_name);
872
873	/* Initialize best_size with a very high value. */
874	best_total = best_size = best_depth = UINT_MAX;
875
876	/* We need some start size for testing. Let's start with
877	TOTAL_MESSAGES / 5, which theoretically provides a mean depth of
878	5. */
879	act_size = 1 + catalog->total_messages / 5;
880
881	/* We determine the size of a hash table here. Because the message
882	numbers can be chosen arbitrary by the programmer we cannot use
883	the simple method of accessing the array using the message
884	number. The algorithm is based on the trivial hash function
885	NUMBER % TABLE_SIZE, where collisions are stored in a second
886	dimension up to TABLE_DEPTH. We here compute TABLE_SIZE so that
887	the needed space (= TABLE_SIZE * TABLE_DEPTH) is minimal. */
888	while (act_size <= best_total)
889	{
890	size_t deep[act_size];
891
892	act_depth = 1;
893	memset (deep, '\0', act_size * sizeof (size_t));
894	set_run = catalog->all_sets;
895	while (set_run != NULL)
896	{
897	struct message_list *message_run;
898
899	message_run = set_run->messages;
900	while (message_run != NULL)
901	{
902	size_t idx = (message_run->number * set_run->number) % act_size;
903
904	++deep[idx];
905	if (deep[idx] > act_depth)
906	{
907	act_depth = deep[idx];
908	if (act_depth * act_size > best_total)
909	break;
910	}
911	message_run = message_run->next;
912	}
913	set_run = set_run->next;
914	}
915
916	if (act_depth * act_size <= best_total)
917	{
918	/* We have found a better solution. */
919	best_total = act_depth * act_size;
920	best_size = act_size;
921	best_depth = act_depth;
922	}
923
924	++act_size;
925	}
926
927	/* let's be prepared for an empty message file. */
928	if (best_size == UINT_MAX)
929	{
930	best_size = 1;
931	best_depth = 1;
932	}
933
934	/* OK, now we have the size we will use. Fill in the header, build
935	the table and the second one with swapped byte order. */
936	obj.magic = CATGETS_MAGIC;
937	obj.plane_size = best_size;
938	obj.plane_depth = best_depth;
939
940	/* Allocate room for all needed arrays. */
941	array1 =
942	(uint32_t ) alloca (best_size best_depth * sizeof (uint32_t) * 3);
943	memset (array1, '\0', best_size * best_depth * sizeof (uint32_t) * 3);
944	array2
945	= (uint32_t ) alloca (best_size best_depth * sizeof (uint32_t) * 3);
946	obstack_init (&string_pool);
947
948	set_run = catalog->all_sets;
949	while (set_run != NULL)
950	{
951	struct message_list *message_run;
952
953	message_run = set_run->messages;
954	while (message_run != NULL)
955	{
956	size_t idx = (((message_run->number * set_run->number) % best_size)
957	* 3);
958	/* Determine collision depth. */
959	while (array1[idx] != 0)
960	idx += best_size * 3;
961
962	/* Store set number, message number and pointer into string
963	space, relative to the first string. */
964	array1[idx + 0] = set_run->number;
965	array1[idx + 1] = message_run->number;
966	array1[idx + 2] = obstack_object_size (&string_pool);
967
968	/* Add current string to the continuous space containing all
969	strings. */
970	obstack_grow0 (&string_pool, message_run->message,
971	strlen (message_run->message));
972
973	message_run = message_run->next;
974	}
975
976	set_run = set_run->next;
977	}
978	strings_size = obstack_object_size (&string_pool);
979	strings = obstack_finish (&string_pool);
980
981	/* Compute ARRAY2 by changing the byte order. */
982	for (cnt = 0; cnt < best_size * best_depth * 3; ++cnt)
983	array2[cnt] = SWAPU32 (array1[cnt]);
984
985	/* Now we can write out the whole data. */
986	if (strcmp (output_name, "-") == 0
987	\|\| strcmp (output_name, "/dev/stdout") == 0)
988	fd = STDOUT_FILENO;
989	else
990	{
991	fd = creat (output_name, 0666);
992	if (fd < 0)
993	error (EXIT_FAILURE, errno, gettext ("cannot open output file `%s'"),
994	output_name);
995	}
996
997	/* Write out header. */
998	write (fd, &obj, sizeof (obj));
999
1000	/* We always write out the little endian version of the index
1001	arrays. */
1002	#if __BYTE_ORDER == __LITTLE_ENDIAN
1003	write (fd, array1, best_size * best_depth * sizeof (uint32_t) * 3);
1004	write (fd, array2, best_size * best_depth * sizeof (uint32_t) * 3);
1005	#elif __BYTE_ORDER == __BIG_ENDIAN
1006	write (fd, array2, best_size * best_depth * sizeof (uint32_t) * 3);
1007	write (fd, array1, best_size * best_depth * sizeof (uint32_t) * 3);
1008	#else
1009	# error Cannot handle __BYTE_ORDER byte order
1010	#endif
1011
1012	/* Finally write the strings. */
1013	write (fd, strings, strings_size);
1014
1015	if (fd != STDOUT_FILENO)
1016	close (fd);
1017
1018	/* If requested now write out the header file. */
1019	if (header_name != NULL)
1020	{
1021	int first = 1;
1022	FILE *fp;
1023
1024	/* Open output file. "-" or "/dev/stdout" means write to
1025	standard output. */
1026	if (strcmp (header_name, "-") == 0
1027	\|\| strcmp (header_name, "/dev/stdout") == 0)
1028	fp = stdout;
1029	else
1030	{
1031	fp = fopen (header_name, "w");
1032	if (fp == NULL)
1033	error (EXIT_FAILURE, errno,
1034	gettext ("cannot open output file `%s'"), header_name);
1035	}
1036
1037	/* Iterate over all sets and all messages. */
1038	set_run = catalog->all_sets;
1039	while (set_run != NULL)
1040	{
1041	struct message_list *message_run;
1042
1043	/* If the current message set has a symbolic name write this
1044	out first. */
1045	if (set_run->symbol != NULL)
1046	fprintf (fp, "%s#define %sSet %#x\t/* %s:%Zu */\n",
1047	first ? "" : "\n", set_run->symbol, set_run->number - 1,
1048	set_run->fname, set_run->line);
1049	first = 0;
1050
1051	message_run = set_run->messages;
1052	while (message_run != NULL)
1053	{
1054	/* If the current message has a symbolic name write
1055	#define out. But we have to take care for the set
1056	not having a symbolic name. */
1057	if (message_run->symbol != NULL)
1058	{
1059	if (set_run->symbol == NULL)
1060	fprintf (fp, "#define AutomaticSet%d%s %#x\t/* %s:%Zu */\n",
1061	set_run->number, message_run->symbol,
1062	message_run->number, message_run->fname,
1063	message_run->line);
1064	else
1065	fprintf (fp, "#define %s%s %#x\t/* %s:%Zu */\n",
1066	set_run->symbol, message_run->symbol,
1067	message_run->number, message_run->fname,
1068	message_run->line);
1069	}
1070
1071	message_run = message_run->next;
1072	}
1073
1074	set_run = set_run->next;
1075	}
1076
1077	if (fp != stdout)
1078	fclose (fp);
1079	}
1080	}
1081
1082
1083	static struct set_list *
1084	find_set (struct catalog *current, int number)
1085	{
1086	struct set_list *result = current->all_sets;
1087
1088	/* We must avoid set number 0 because a set of this number signals
1089	in the tables that the entry is not occupied. */
1090	++number;
1091
1092	while (result != NULL)
1093	if (result->number == number)
1094	return result;
1095	else
1096	result = result->next;
1097
1098	/* Prepare new message set. */
1099	result = (struct set_list ) xcalloc (1, sizeof (result));
1100	result->number = number;
1101	result->next = current->all_sets;
1102	current->all_sets = result;
1103
1104	return result;
1105	}
1106
1107
1108	/* Normalize given string inplace* by processing escape sequences
1109	and quote characters. */
1110	static void
1111	normalize_line (const char fname, size_t line, iconv_t cd, wchar_t string,
1112	wchar_t quote_char, wchar_t escape_char)
1113	{
1114	int is_quoted;
1115	wchar_t *rp = string;
1116	wchar_t *wp = string;
1117
1118	if (quote_char != L'\0' && *rp == quote_char)
1119	{
1120	is_quoted = 1;
1121	++rp;
1122	}
1123	else
1124	is_quoted = 0;
1125
1126	while (*rp != L'\0')
1127	if (*rp == quote_char)
1128	/* We simply end the string when we find the first time an
1129	not-escaped quote character. */
1130	break;
1131	else if (*rp == escape_char)
1132	{
1133	++rp;
1134	if (quote_char != L'\0' && *rp == quote_char)
1135	/* This is an extension to XPG. */
1136	wp++ = rp++;
1137	else
1138	/* Recognize escape sequences. */
1139	switch (*rp)
1140	{
1141	case L'n':
1142	*wp++ = L'\n';
1143	++rp;
1144	break;
1145	case L't':
1146	*wp++ = L'\t';
1147	++rp;
1148	break;
1149	case L'v':
1150	*wp++ = L'\v';
1151	++rp;
1152	break;
1153	case L'b':
1154	*wp++ = L'\b';
1155	++rp;
1156	break;
1157	case L'r':
1158	*wp++ = L'\r';
1159	++rp;
1160	break;
1161	case L'f':
1162	*wp++ = L'\f';
1163	++rp;
1164	break;
1165	case L'0' ... L'7':
1166	{
1167	int number;
1168	char cbuf[2];
1169	char *cbufptr;
1170	size_t cbufin;
1171	wchar_t wcbuf[2];
1172	char *wcbufptr;
1173	size_t wcbufin;
1174
1175	number = *rp++ - L'0';
1176	while (number <= (255 / 8) && rp >= L'0' && rp <= L'7')
1177	{
1178	number *= 8;
1179	number += *rp++ - L'0';
1180	}
1181
1182	cbuf[0] = (char) number;
1183	cbuf[1] = '\0';
1184	cbufptr = cbuf;
1185	cbufin = 2;
1186
1187	wcbufptr = (char *) wcbuf;
1188	wcbufin = sizeof (wcbuf);
1189
1190	/* Flush the state. */
1191	iconv (cd, NULL, NULL, NULL, NULL);
1192
1193	iconv (cd, &cbufptr, &cbufin, &wcbufptr, &wcbufin);
1194	if (cbufptr != &cbuf[2] \|\| (wchar_t *) wcbufptr != &wcbuf[2])
1195	error_at_line (0, 0, fname, line,
1196	gettext ("invalid escape sequence"));
1197	else
1198	*wp++ = wcbuf[0];
1199	}
1200	break;
1201	default:
1202	if (*rp == escape_char)
1203	{
1204	*wp++ = escape_char;
1205	++rp;
1206	}
1207	else
1208	/* Simply ignore the backslash character. */;
1209	break;
1210	}
1211	}
1212	else
1213	wp++ = rp++;
1214
1215	/* If we saw a quote character at the beginning we expect another
1216	one at the end. */
1217	if (is_quoted && *rp != quote_char)
1218	error_at_line (0, 0, fname, line, gettext ("unterminated message"));
1219
1220	/* Terminate string. */
1221	*wp = L'\0';
1222	return;
1223	}
1224
1225
1226	static void
1227	read_old (struct catalog catalog, const char file_name)
1228	{
1229	struct catalog_info old_cat_obj;
1230	struct set_list *set = NULL;
1231	int last_set = -1;
1232	size_t cnt;
1233
1234	/* Try to open catalog, but don't look through the NLSPATH. */
1235	if (__open_catalog (file_name, NULL, NULL, &old_cat_obj) != 0)
1236	{
1237	if (errno == ENOENT)
1238	/* No problem, the catalog simply does not exist. */
1239	return;
1240	else
1241	error (EXIT_FAILURE, errno,
1242	gettext ("while opening old catalog file"));
1243	}
1244
1245	/* OK, we have the catalog loaded. Now read all messages and merge
1246	them. When set and message number clash for any message the new
1247	one is used. If the new one is empty it indicates that the
1248	message should be deleted. */
1249	for (cnt = 0; cnt < old_cat_obj.plane_size * old_cat_obj.plane_depth; ++cnt)
1250	{
1251	struct message_list message, last;
1252
1253	if (old_cat_obj.name_ptr[cnt * 3 + 0] == 0)
1254	/* No message in this slot. */
1255	continue;
1256
1257	if (old_cat_obj.name_ptr[cnt * 3 + 0] - 1 != (uint32_t) last_set)
1258	{
1259	last_set = old_cat_obj.name_ptr[cnt * 3 + 0] - 1;
1260	set = find_set (catalog, old_cat_obj.name_ptr[cnt * 3 + 0] - 1);
1261	}
1262
1263	last = NULL;
1264	message = set->messages;
1265	while (message != NULL)
1266	{
1267	if ((uint32_t) message->number >= old_cat_obj.name_ptr[cnt * 3 + 1])
1268	break;
1269	last = message;
1270	message = message->next;
1271	}
1272
1273	if (message == NULL
1274	\|\| (uint32_t) message->number > old_cat_obj.name_ptr[cnt * 3 + 1])
1275	{
1276	/* We have found a message which is not yet in the catalog.
1277	Insert it at the right position. */
1278	struct message_list *newp;
1279
1280	newp = (struct message_list ) xmalloc (sizeof(newp));
1281	newp->number = old_cat_obj.name_ptr[cnt * 3 + 1];
1282	newp->message =
1283	&old_cat_obj.strings[old_cat_obj.name_ptr[cnt * 3 + 2]];
1284	newp->fname = NULL;
1285	newp->line = 0;
1286	newp->symbol = NULL;
1287	newp->next = message;
1288
1289	if (last == NULL)
1290	set->messages = newp;
1291	else
1292	last->next = newp;
1293
1294	++catalog->total_messages;
1295	}
1296	else if (*message->message == '\0')
1297	{
1298	/* The new empty message has overridden the old one thus
1299	"deleting" it as required. Now remove the empty remains. */
1300	if (last == NULL)
1301	set->messages = message->next;
1302	else
1303	last->next = message->next;
1304	}
1305	}
1306	}
1307
1308
1309	static int
1310	open_conversion (const char codeset, iconv_t cd_towcp, iconv_t *cd_tombp,
1311	wchar_t *escape_charp)
1312	{
1313	char buf[2];
1314	char *bufptr;
1315	size_t bufsize;
1316	wchar_t wbuf[2];
1317	char *wbufptr;
1318	size_t wbufsize;
1319
1320	/* If the input file does not specify the codeset use the locale's. */
1321	if (codeset == NULL)
1322	{
1323	setlocale (LC_ALL, "");
1324	codeset = nl_langinfo (CODESET);
1325	setlocale (LC_ALL, "C");
1326	}
1327
1328	/* Get the conversion modules. */
1329	*cd_towcp = iconv_open ("WCHAR_T", codeset);
1330	*cd_tombp = iconv_open (codeset, "WCHAR_T");
1331	if (cd_towcp == (iconv_t) -1 \|\| cd_tombp == (iconv_t) -1)
1332	{
1333	error (0, 0, gettext ("conversion modules not available"));
1334	if (*cd_towcp != (iconv_t) -1)
1335	iconv_close (*cd_towcp);
1336
1337	return 1;
1338	}
1339
1340	/* One special case for historical reasons is the backslash
1341	character. In some codesets the byte value 0x5c is not mapped to
1342	U005c in Unicode. These charsets then don't have a backslash
1343	character at all. Therefore we have to live with whatever the
1344	codeset provides and recognize, instead of the U005c, the character
1345	the byte value 0x5c is mapped to. */
1346	buf[0] = '\\';
1347	buf[1] = '\0';
1348	bufptr = buf;
1349	bufsize = 2;
1350
1351	wbufptr = (char *) wbuf;
1352	wbufsize = sizeof (wbuf);
1353
1354	iconv (*cd_towcp, &bufptr, &bufsize, &wbufptr, &wbufsize);
1355	if (bufsize != 0 \|\| wbufsize != 0)
1356	{
1357	/* Something went wrong, we couldn't convert the byte 0x5c. Go
1358	on with using U005c. */
1359	error (0, 0, gettext ("cannot determine escape character"));
1360	*escape_charp = L'\\';
1361	}
1362	else
1363	*escape_charp = wbuf[0];
1364
1365	return 0;
1366	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: