source: trunk/essentials/app-arch/cpio/lib/quotearg.c

Last change on this file was 3332, checked in by bird, 18 years ago

cpio 2.7

File size: 18.1 KB
Line 
1/* quotearg.c - quote arguments for output
2
3 Copyright (C) 1998, 1999, 2000, 2001, 2002, 2004, 2005, 2006 Free
4 Software Foundation, Inc.
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software Foundation,
18 Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. */
19
20/* Written by Paul Eggert <eggert@twinsun.com> */
21
22#include <config.h>
23
24#include "quotearg.h"
25
26#include "xalloc.h"
27
28#include <ctype.h>
29#include <errno.h>
30#include <limits.h>
31#include <stdbool.h>
32#include <stdlib.h>
33#include <string.h>
34
35#include "gettext.h"
36#define _(msgid) gettext (msgid)
37#define N_(msgid) msgid
38
39#if HAVE_WCHAR_H
40
41/* BSD/OS 4.1 wchar.h requires FILE and struct tm to be declared. */
42# include <stdio.h>
43# include <time.h>
44
45# include <wchar.h>
46#endif
47
48#if !HAVE_MBRTOWC
49/* Disable multibyte processing entirely. Since MB_CUR_MAX is 1, the
50 other macros are defined only for documentation and to satisfy C
51 syntax. */
52# undef MB_CUR_MAX
53# define MB_CUR_MAX 1
54# define mbrtowc(pwc, s, n, ps) ((*(pwc) = *(s)) != 0)
55# define iswprint(wc) isprint ((unsigned char) (wc))
56# undef HAVE_MBSINIT
57#endif
58
59#if !defined mbsinit && !HAVE_MBSINIT
60# define mbsinit(ps) 1
61#endif
62
63#ifndef iswprint
64# if HAVE_WCTYPE_H
65# include <wctype.h>
66# endif
67# if !defined iswprint && !HAVE_ISWPRINT
68# define iswprint(wc) 1
69# endif
70#endif
71
72#ifndef SIZE_MAX
73# define SIZE_MAX ((size_t) -1)
74#endif
75
76#define INT_BITS (sizeof (int) * CHAR_BIT)
77
78struct quoting_options
79{
80 /* Basic quoting style. */
81 enum quoting_style style;
82
83 /* Quote the characters indicated by this bit vector even if the
84 quoting style would not normally require them to be quoted. */
85 unsigned int quote_these_too[(UCHAR_MAX / INT_BITS) + 1];
86};
87
88/* Names of quoting styles. */
89char const *const quoting_style_args[] =
90{
91 "literal",
92 "shell",
93 "shell-always",
94 "c",
95 "escape",
96 "locale",
97 "clocale",
98 0
99};
100
101/* Correspondences to quoting style names. */
102enum quoting_style const quoting_style_vals[] =
103{
104 literal_quoting_style,
105 shell_quoting_style,
106 shell_always_quoting_style,
107 c_quoting_style,
108 escape_quoting_style,
109 locale_quoting_style,
110 clocale_quoting_style
111};
112
113/* The default quoting options. */
114static struct quoting_options default_quoting_options;
115
116/* Allocate a new set of quoting options, with contents initially identical
117 to O if O is not null, or to the default if O is null.
118 It is the caller's responsibility to free the result. */
119struct quoting_options *
120clone_quoting_options (struct quoting_options *o)
121{
122 int e = errno;
123 struct quoting_options *p = xmalloc (sizeof *p);
124 *p = *(o ? o : &default_quoting_options);
125 errno = e;
126 return p;
127}
128
129/* Get the value of O's quoting style. If O is null, use the default. */
130enum quoting_style
131get_quoting_style (struct quoting_options *o)
132{
133 return (o ? o : &default_quoting_options)->style;
134}
135
136/* In O (or in the default if O is null),
137 set the value of the quoting style to S. */
138void
139set_quoting_style (struct quoting_options *o, enum quoting_style s)
140{
141 (o ? o : &default_quoting_options)->style = s;
142}
143
144/* In O (or in the default if O is null),
145 set the value of the quoting options for character C to I.
146 Return the old value. Currently, the only values defined for I are
147 0 (the default) and 1 (which means to quote the character even if
148 it would not otherwise be quoted). */
149int
150set_char_quoting (struct quoting_options *o, char c, int i)
151{
152 unsigned char uc = c;
153 unsigned int *p =
154 (o ? o : &default_quoting_options)->quote_these_too + uc / INT_BITS;
155 int shift = uc % INT_BITS;
156 int r = (*p >> shift) & 1;
157 *p ^= ((i & 1) ^ r) << shift;
158 return r;
159}
160
161/* MSGID approximates a quotation mark. Return its translation if it
162 has one; otherwise, return either it or "\"", depending on S. */
163static char const *
164gettext_quote (char const *msgid, enum quoting_style s)
165{
166 char const *translation = _(msgid);
167 if (translation == msgid && s == clocale_quoting_style)
168 translation = "\"";
169 return translation;
170}
171
172/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
173 argument ARG (of size ARGSIZE), using QUOTING_STYLE and the
174 non-quoting-style part of O to control quoting.
175 Terminate the output with a null character, and return the written
176 size of the output, not counting the terminating null.
177 If BUFFERSIZE is too small to store the output string, return the
178 value that would have been returned had BUFFERSIZE been large enough.
179 If ARGSIZE is SIZE_MAX, use the string length of the argument for ARGSIZE.
180
181 This function acts like quotearg_buffer (BUFFER, BUFFERSIZE, ARG,
182 ARGSIZE, O), except it uses QUOTING_STYLE instead of the quoting
183 style specified by O, and O may not be null. */
184
185static size_t
186quotearg_buffer_restyled (char *buffer, size_t buffersize,
187 char const *arg, size_t argsize,
188 enum quoting_style quoting_style,
189 struct quoting_options const *o)
190{
191 size_t i;
192 size_t len = 0;
193 char const *quote_string = 0;
194 size_t quote_string_len = 0;
195 bool backslash_escapes = false;
196 bool unibyte_locale = MB_CUR_MAX == 1;
197
198#define STORE(c) \
199 do \
200 { \
201 if (len < buffersize) \
202 buffer[len] = (c); \
203 len++; \
204 } \
205 while (0)
206
207 switch (quoting_style)
208 {
209 case c_quoting_style:
210 STORE ('"');
211 backslash_escapes = true;
212 quote_string = "\"";
213 quote_string_len = 1;
214 break;
215
216 case escape_quoting_style:
217 backslash_escapes = true;
218 break;
219
220 case locale_quoting_style:
221 case clocale_quoting_style:
222 {
223 /* TRANSLATORS:
224 Get translations for open and closing quotation marks.
225
226 The message catalog should translate "`" to a left
227 quotation mark suitable for the locale, and similarly for
228 "'". If the catalog has no translation,
229 locale_quoting_style quotes `like this', and
230 clocale_quoting_style quotes "like this".
231
232 For example, an American English Unicode locale should
233 translate "`" to U+201C (LEFT DOUBLE QUOTATION MARK), and
234 should translate "'" to U+201D (RIGHT DOUBLE QUOTATION
235 MARK). A British English Unicode locale should instead
236 translate these to U+2018 (LEFT SINGLE QUOTATION MARK) and
237 U+2019 (RIGHT SINGLE QUOTATION MARK), respectively.
238
239 If you don't know what to put here, please see
240 <http://en.wikipedia.org/wiki/Quotation_mark#Glyphs>
241 and use glyphs suitable for your language. */
242
243 char const *left = gettext_quote (N_("`"), quoting_style);
244 char const *right = gettext_quote (N_("'"), quoting_style);
245 for (quote_string = left; *quote_string; quote_string++)
246 STORE (*quote_string);
247 backslash_escapes = true;
248 quote_string = right;
249 quote_string_len = strlen (quote_string);
250 }
251 break;
252
253 case shell_always_quoting_style:
254 STORE ('\'');
255 quote_string = "'";
256 quote_string_len = 1;
257 break;
258
259 default:
260 break;
261 }
262
263 for (i = 0; ! (argsize == SIZE_MAX ? arg[i] == '\0' : i == argsize); i++)
264 {
265 unsigned char c;
266 unsigned char esc;
267
268 if (backslash_escapes
269 && quote_string_len
270 && i + quote_string_len <= argsize
271 && memcmp (arg + i, quote_string, quote_string_len) == 0)
272 STORE ('\\');
273
274 c = arg[i];
275 switch (c)
276 {
277 case '\0':
278 if (backslash_escapes)
279 {
280 STORE ('\\');
281 STORE ('0');
282 STORE ('0');
283 c = '0';
284 }
285 break;
286
287 case '?':
288 switch (quoting_style)
289 {
290 case shell_quoting_style:
291 goto use_shell_always_quoting_style;
292
293 case c_quoting_style:
294 if (i + 2 < argsize && arg[i + 1] == '?')
295 switch (arg[i + 2])
296 {
297 case '!': case '\'':
298 case '(': case ')': case '-': case '/':
299 case '<': case '=': case '>':
300 /* Escape the second '?' in what would otherwise be
301 a trigraph. */
302 c = arg[i + 2];
303 i += 2;
304 STORE ('?');
305 STORE ('\\');
306 STORE ('?');
307 break;
308
309 default:
310 break;
311 }
312 break;
313
314 default:
315 break;
316 }
317 break;
318
319 case '\a': esc = 'a'; goto c_escape;
320 case '\b': esc = 'b'; goto c_escape;
321 case '\f': esc = 'f'; goto c_escape;
322 case '\n': esc = 'n'; goto c_and_shell_escape;
323 case '\r': esc = 'r'; goto c_and_shell_escape;
324 case '\t': esc = 't'; goto c_and_shell_escape;
325 case '\v': esc = 'v'; goto c_escape;
326 case '\\': esc = c; goto c_and_shell_escape;
327
328 c_and_shell_escape:
329 if (quoting_style == shell_quoting_style)
330 goto use_shell_always_quoting_style;
331 c_escape:
332 if (backslash_escapes)
333 {
334 c = esc;
335 goto store_escape;
336 }
337 break;
338
339 case '{': case '}': /* sometimes special if isolated */
340 if (! (argsize == SIZE_MAX ? arg[1] == '\0' : argsize == 1))
341 break;
342 /* Fall through. */
343 case '#': case '~':
344 if (i != 0)
345 break;
346 /* Fall through. */
347 case ' ':
348 case '!': /* special in bash */
349 case '"': case '$': case '&':
350 case '(': case ')': case '*': case ';':
351 case '<':
352 case '=': /* sometimes special in 0th or (with "set -k") later args */
353 case '>': case '[':
354 case '^': /* special in old /bin/sh, e.g. SunOS 4.1.4 */
355 case '`': case '|':
356 /* A shell special character. In theory, '$' and '`' could
357 be the first bytes of multibyte characters, which means
358 we should check them with mbrtowc, but in practice this
359 doesn't happen so it's not worth worrying about. */
360 if (quoting_style == shell_quoting_style)
361 goto use_shell_always_quoting_style;
362 break;
363
364 case '\'':
365 switch (quoting_style)
366 {
367 case shell_quoting_style:
368 goto use_shell_always_quoting_style;
369
370 case shell_always_quoting_style:
371 STORE ('\'');
372 STORE ('\\');
373 STORE ('\'');
374 break;
375
376 default:
377 break;
378 }
379 break;
380
381 case '%': case '+': case ',': case '-': case '.': case '/':
382 case '0': case '1': case '2': case '3': case '4': case '5':
383 case '6': case '7': case '8': case '9': case ':':
384 case 'A': case 'B': case 'C': case 'D': case 'E': case 'F':
385 case 'G': case 'H': case 'I': case 'J': case 'K': case 'L':
386 case 'M': case 'N': case 'O': case 'P': case 'Q': case 'R':
387 case 'S': case 'T': case 'U': case 'V': case 'W': case 'X':
388 case 'Y': case 'Z': case ']': case '_': case 'a': case 'b':
389 case 'c': case 'd': case 'e': case 'f': case 'g': case 'h':
390 case 'i': case 'j': case 'k': case 'l': case 'm': case 'n':
391 case 'o': case 'p': case 'q': case 'r': case 's': case 't':
392 case 'u': case 'v': case 'w': case 'x': case 'y': case 'z':
393 /* These characters don't cause problems, no matter what the
394 quoting style is. They cannot start multibyte sequences. */
395 break;
396
397 default:
398 /* If we have a multibyte sequence, copy it until we reach
399 its end, find an error, or come back to the initial shift
400 state. For C-like styles, if the sequence has
401 unprintable characters, escape the whole sequence, since
402 we can't easily escape single characters within it. */
403 {
404 /* Length of multibyte sequence found so far. */
405 size_t m;
406
407 bool printable;
408
409 if (unibyte_locale)
410 {
411 m = 1;
412 printable = isprint (c) != 0;
413 }
414 else
415 {
416 mbstate_t mbstate;
417 memset (&mbstate, 0, sizeof mbstate);
418
419 m = 0;
420 printable = true;
421 if (argsize == SIZE_MAX)
422 argsize = strlen (arg);
423
424 do
425 {
426 wchar_t w;
427 size_t bytes = mbrtowc (&w, &arg[i + m],
428 argsize - (i + m), &mbstate);
429 if (bytes == 0)
430 break;
431 else if (bytes == (size_t) -1)
432 {
433 printable = false;
434 break;
435 }
436 else if (bytes == (size_t) -2)
437 {
438 printable = false;
439 while (i + m < argsize && arg[i + m])
440 m++;
441 break;
442 }
443 else
444 {
445 /* Work around a bug with older shells that "see" a '\'
446 that is really the 2nd byte of a multibyte character.
447 In practice the problem is limited to ASCII
448 chars >= '@' that are shell special chars. */
449 if ('[' == 0x5b && quoting_style == shell_quoting_style)
450 {
451 size_t j;
452 for (j = 1; j < bytes; j++)
453 switch (arg[i + m + j])
454 {
455 case '[': case '\\': case '^':
456 case '`': case '|':
457 goto use_shell_always_quoting_style;
458
459 default:
460 break;
461 }
462 }
463
464 if (! iswprint (w))
465 printable = false;
466 m += bytes;
467 }
468 }
469 while (! mbsinit (&mbstate));
470 }
471
472 if (1 < m || (backslash_escapes && ! printable))
473 {
474 /* Output a multibyte sequence, or an escaped
475 unprintable unibyte character. */
476 size_t ilim = i + m;
477
478 for (;;)
479 {
480 if (backslash_escapes && ! printable)
481 {
482 STORE ('\\');
483 STORE ('0' + (c >> 6));
484 STORE ('0' + ((c >> 3) & 7));
485 c = '0' + (c & 7);
486 }
487 if (ilim <= i + 1)
488 break;
489 STORE (c);
490 c = arg[++i];
491 }
492
493 goto store_c;
494 }
495 }
496 }
497
498 if (! (backslash_escapes
499 && o->quote_these_too[c / INT_BITS] & (1 << (c % INT_BITS))))
500 goto store_c;
501
502 store_escape:
503 STORE ('\\');
504
505 store_c:
506 STORE (c);
507 }
508
509 if (i == 0 && quoting_style == shell_quoting_style)
510 goto use_shell_always_quoting_style;
511
512 if (quote_string)
513 for (; *quote_string; quote_string++)
514 STORE (*quote_string);
515
516 if (len < buffersize)
517 buffer[len] = '\0';
518 return len;
519
520 use_shell_always_quoting_style:
521 return quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
522 shell_always_quoting_style, o);
523}
524
525/* Place into buffer BUFFER (of size BUFFERSIZE) a quoted version of
526 argument ARG (of size ARGSIZE), using O to control quoting.
527 If O is null, use the default.
528 Terminate the output with a null character, and return the written
529 size of the output, not counting the terminating null.
530 If BUFFERSIZE is too small to store the output string, return the
531 value that would have been returned had BUFFERSIZE been large enough.
532 If ARGSIZE is SIZE_MAX, use the string length of the argument for
533 ARGSIZE. */
534size_t
535quotearg_buffer (char *buffer, size_t buffersize,
536 char const *arg, size_t argsize,
537 struct quoting_options const *o)
538{
539 struct quoting_options const *p = o ? o : &default_quoting_options;
540 int e = errno;
541 size_t r = quotearg_buffer_restyled (buffer, buffersize, arg, argsize,
542 p->style, p);
543 errno = e;
544 return r;
545}
546
547/* Like quotearg_buffer (..., ARG, ARGSIZE, O), except return newly
548 allocated storage containing the quoted string. */
549char *
550quotearg_alloc (char const *arg, size_t argsize,
551 struct quoting_options const *o)
552{
553 int e = errno;
554 size_t bufsize = quotearg_buffer (0, 0, arg, argsize, o) + 1;
555 char *buf = xmalloc (bufsize);
556 quotearg_buffer (buf, bufsize, arg, argsize, o);
557 errno = e;
558 return buf;
559}
560
561/* Use storage slot N to return a quoted version of argument ARG.
562 ARG is of size ARGSIZE, but if that is SIZE_MAX, ARG is a
563 null-terminated string.
564 OPTIONS specifies the quoting options.
565 The returned value points to static storage that can be
566 reused by the next call to this function with the same value of N.
567 N must be nonnegative. N is deliberately declared with type "int"
568 to allow for future extensions (using negative values). */
569static char *
570quotearg_n_options (int n, char const *arg, size_t argsize,
571 struct quoting_options const *options)
572{
573 int e = errno;
574
575 /* Preallocate a slot 0 buffer, so that the caller can always quote
576 one small component of a "memory exhausted" message in slot 0. */
577 static char slot0[256];
578 static unsigned int nslots = 1;
579 unsigned int n0 = n;
580 struct slotvec
581 {
582 size_t size;
583 char *val;
584 };
585 static struct slotvec slotvec0 = {sizeof slot0, slot0};
586 static struct slotvec *slotvec = &slotvec0;
587
588 if (n < 0)
589 abort ();
590
591 if (nslots <= n0)
592 {
593 /* FIXME: technically, the type of n1 should be `unsigned int',
594 but that evokes an unsuppressible warning from gcc-4.0.1 and
595 older. If gcc ever provides an option to suppress that warning,
596 revert to the original type, so that the test in xalloc_oversized
597 is once again performed only at compile time. */
598 size_t n1 = n0 + 1;
599
600 if (xalloc_oversized (n1, sizeof *slotvec))
601 xalloc_die ();
602
603 if (slotvec == &slotvec0)
604 {
605 slotvec = xmalloc (sizeof *slotvec);
606 *slotvec = slotvec0;
607 }
608 slotvec = xrealloc (slotvec, n1 * sizeof *slotvec);
609 memset (slotvec + nslots, 0, (n1 - nslots) * sizeof *slotvec);
610 nslots = n1;
611 }
612
613 {
614 size_t size = slotvec[n].size;
615 char *val = slotvec[n].val;
616 size_t qsize = quotearg_buffer (val, size, arg, argsize, options);
617
618 if (size <= qsize)
619 {
620 slotvec[n].size = size = qsize + 1;
621 if (val != slot0)
622 free (val);
623 slotvec[n].val = val = xmalloc (size);
624 quotearg_buffer (val, size, arg, argsize, options);
625 }
626
627 errno = e;
628 return val;
629 }
630}
631
632char *
633quotearg_n (int n, char const *arg)
634{
635 return quotearg_n_options (n, arg, SIZE_MAX, &default_quoting_options);
636}
637
638char *
639quotearg (char const *arg)
640{
641 return quotearg_n (0, arg);
642}
643
644/* Return quoting options for STYLE, with no extra quoting. */
645static struct quoting_options
646quoting_options_from_style (enum quoting_style style)
647{
648 struct quoting_options o;
649 o.style = style;
650 memset (o.quote_these_too, 0, sizeof o.quote_these_too);
651 return o;
652}
653
654char *
655quotearg_n_style (int n, enum quoting_style s, char const *arg)
656{
657 struct quoting_options const o = quoting_options_from_style (s);
658 return quotearg_n_options (n, arg, SIZE_MAX, &o);
659}
660
661char *
662quotearg_n_style_mem (int n, enum quoting_style s,
663 char const *arg, size_t argsize)
664{
665 struct quoting_options const o = quoting_options_from_style (s);
666 return quotearg_n_options (n, arg, argsize, &o);
667}
668
669char *
670quotearg_style (enum quoting_style s, char const *arg)
671{
672 return quotearg_n_style (0, s, arg);
673}
674
675char *
676quotearg_char (char const *arg, char ch)
677{
678 struct quoting_options options;
679 options = default_quoting_options;
680 set_char_quoting (&options, ch, 1);
681 return quotearg_n_options (0, arg, SIZE_MAX, &options);
682}
683
684char *
685quotearg_colon (char const *arg)
686{
687 return quotearg_char (arg, ':');
688}
Note: See TracBrowser for help on using the repository browser.