source: trunk/essentials/sys-apps/findutils/lib/qmark.c

Last change on this file was 3170, checked in by bird, 18 years ago

findutils 4.3.2

File size: 6.1 KB
Line 
1/* pred.c -- execute the expression tree.
2 Copyright (C) 1990, 1991, 1992, 1993, 1994, 2000,
3 2003, 2004 Free Software Foundation, Inc.
4
5 This program is free software; you can redistribute it and/or modify
6 it under the terms of the GNU General Public License as published by
7 the Free Software Foundation; either version 2, or (at your option)
8 any later version.
9
10 This program is distributed in the hope that it will be useful,
11 but WITHOUT ANY WARRANTY; without even the implied warranty of
12 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 GNU General Public License for more details.
14
15 You should have received a copy of the GNU General Public License
16 along with this program; if not, write to the Free Software
17 Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
18 USA.
19*/
20
21#ifdef HAVE_CONFIG_H
22#include <config.h>
23#endif
24
25# include <stddef.h>
26# include <stdlib.h>
27#include <ctype.h>
28
29#if HAVE_STRING_H || STDC_HEADERS
30#include <string.h>
31#else
32#include <strings.h>
33#endif
34
35
36/* Get mbstate_t, mbrtowc(), mbsinit(), wcwidth(). */
37#if HAVE_WCHAR_H
38# include <wchar.h>
39#endif
40
41#include "printquoted.h"
42
43
44/*
45 This comment, IN_CTYPE_DOMAIN and ISPRINT were borrowed from
46 coreutils at Sun Jun 5 21:17:40 2005 UTC.
47
48 Jim Meyering writes:
49
50 "... Some ctype macros are valid only for character codes that
51 isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
52 using /bin/cc or gcc but without giving an ansi option). So, all
53 ctype uses should be through macros like ISPRINT... If
54 STDC_HEADERS is defined, then autoconf has verified that the ctype
55 macros don't need to be guarded with references to isascii. ...
56 Defining isascii to 1 should let any compiler worth its salt
57 eliminate the && through constant folding."
58
59 Bruno Haible adds:
60
61 "... Furthermore, isupper(c) etc. have an undefined result if c is
62 outside the range -1 <= c <= 255. One is tempted to write isupper(c)
63 with c being of type `char', but this is wrong if c is an 8-bit
64 character >= 128 which gets sign-extended to a negative value.
65 The macro ISUPPER protects against this as well." */
66
67
68
69
70/* ISPRINT is defined in <sys/euc.h> on at least Solaris2.6 systems. */
71#undef ISPRINT
72#define ISPRINT(c) (IN_CTYPE_DOMAIN (c) && isprint (c))
73
74#if STDC_HEADERS || (!defined (isascii) && !HAVE_ISASCII)
75# define IN_CTYPE_DOMAIN(c) 1
76#else
77# define IN_CTYPE_DOMAIN(c) isascii(c)
78#endif
79
80
81
82
83
84/* Convert a possibly-signed character to an unsigned character. This is
85 * a bit safer than casting to unsigned char, since it catches some type
86 * errors that the cast doesn't.
87 *
88 * This code taken from coreutils' system.h header at
89 * Sun Jun 5 21:05:21 2005 UTC.
90 */
91static inline unsigned char to_uchar (char ch)
92{
93 return ch;
94}
95
96
97
98static size_t
99unibyte_qmark_chars(char *buf, size_t len)
100{
101 char *p = buf;
102 char const *plimit = buf + len;
103
104 while (p < plimit)
105 {
106 if (! ISPRINT (to_uchar (*p)))
107 *p = '?';
108 p++;
109 }
110 return len;
111}
112
113
114#if HAVE_MBRTOWC
115static size_t
116multibyte_qmark_chars(char *buf, size_t len)
117{
118 if (MB_CUR_MAX <= 1)
119 {
120 return unibyte_qmark_chars(buf, len);
121 }
122 else
123 {
124 char const *p = buf;
125 char const *plimit = buf + len;
126 char *q = buf;
127
128 while (p < plimit)
129 switch (*p)
130 {
131 case ' ': case '!': case '"': case '#': case '%':
132 case '&': case '\'': case '(': case ')': case '*':
133 case '+': case ',': case '-': case '.': case '/':
134 case '0': case '1': case '2': case '3': case '4':
135 case '5': case '6': case '7': case '8': case '9':
136 case ':': case ';': case '<': case '=': case '>':
137 case '?':
138 case 'A': case 'B': case 'C': case 'D': case 'E':
139 case 'F': case 'G': case 'H': case 'I': case 'J':
140 case 'K': case 'L': case 'M': case 'N': case 'O':
141 case 'P': case 'Q': case 'R': case 'S': case 'T':
142 case 'U': case 'V': case 'W': case 'X': case 'Y':
143 case 'Z':
144 case '[': case '\\': case ']': case '^': case '_':
145 case 'a': case 'b': case 'c': case 'd': case 'e':
146 case 'f': case 'g': case 'h': case 'i': case 'j':
147 case 'k': case 'l': case 'm': case 'n': case 'o':
148 case 'p': case 'q': case 'r': case 's': case 't':
149 case 'u': case 'v': case 'w': case 'x': case 'y':
150 case 'z': case '{': case '|': case '}': case '~':
151 /* These characters are printable ASCII characters. */
152 *q++ = *p++;
153 break;
154 default:
155 /* If we have a multibyte sequence, copy it until we
156 reach its end, replacing each non-printable multibyte
157 character with a single question mark. */
158 {
159 mbstate_t mbstate;
160 memset (&mbstate, 0, sizeof mbstate);
161 do
162 {
163 wchar_t wc;
164 size_t bytes;
165 int w;
166
167 bytes = mbrtowc (&wc, p, plimit - p, &mbstate);
168
169 if (bytes == (size_t) -1)
170 {
171 /* An invalid multibyte sequence was
172 encountered. Skip one input byte, and
173 put a question mark. */
174 p++;
175 *q++ = '?';
176 break;
177 }
178
179 if (bytes == (size_t) -2)
180 {
181 /* An incomplete multibyte character
182 at the end. Replace it entirely with
183 a question mark. */
184 p = plimit;
185 *q++ = '?';
186 break;
187 }
188
189 if (bytes == 0)
190 /* A null wide character was encountered. */
191 bytes = 1;
192
193 w = wcwidth (wc);
194 if (w >= 0)
195 {
196 /* A printable multibyte character.
197 Keep it. */
198 for (; bytes > 0; --bytes)
199 *q++ = *p++;
200 }
201 else
202 {
203 /* An unprintable multibyte character.
204 Replace it entirely with a question
205 mark. */
206 p += bytes;
207 *q++ = '?';
208 }
209 }
210 while (! mbsinit (&mbstate));
211 }
212 break;
213 }
214
215 /* The buffer may have shrunk. */
216 len = q - buf;
217 return len;
218 }
219}
220#endif
221
222
223/* Scan BUF, replacing any dangerous-looking characters with question
224 * marks. This code is taken from the ls.c file in coreutils as at
225 * Sun Jun 5 20:51:54 2005 UTC.
226 *
227 * This function may shrink the buffer. Either way, the new length
228 * is returned.
229 */
230size_t
231qmark_chars(char *buf, size_t len)
232{
233#if HAVE_MBRTOWC
234 return multibyte_qmark_chars(buf, len);
235#else
236 return unibyte_qmark_chars(buf, len);
237#endif
238}
239
Note: See TracBrowser for help on using the repository browser.