source: trunk/server/lib/util/charset/charset.h

Last change on this file was 745, checked in by Silvan Scherrer, 13 years ago

Samba Server: updated trunk to 3.6.0

File size: 11.1 KB
Line 
1/*
2 Unix SMB/CIFS implementation.
3 charset defines
4 Copyright (C) Andrew Tridgell 2001
5 Copyright (C) Jelmer Vernooij 2002
6
7 This program is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3 of the License, or
10 (at your option) any later version.
11
12 This program is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with this program. If not, see <http://www.gnu.org/licenses/>.
19*/
20
21/* This is a public header file that is installed as part of Samba.
22 * If you remove any functions or change their signature, update
23 * the so version number. */
24
25#ifndef __CHARSET_H__
26#define __CHARSET_H__
27
28#include <talloc.h>
29
30/* this defines the charset types used in samba */
31typedef enum {CH_UTF16LE=0, CH_UTF16=0, CH_UNIX, CH_DISPLAY, CH_DOS, CH_UTF8, CH_UTF16BE, CH_UTF16MUNGED} charset_t;
32
33#define NUM_CHARSETS 7
34
35/*
36 * SMB UCS2 (16-bit unicode) internal type.
37 * smb_ucs2_t is *always* in little endian format.
38 */
39
40typedef uint16_t smb_ucs2_t;
41
42#ifdef WORDS_BIGENDIAN
43#define UCS2_SHIFT 8
44#else
45#define UCS2_SHIFT 0
46#endif
47
48/* turn a 7 bit character into a ucs2 character */
49#define UCS2_CHAR(c) ((c) << UCS2_SHIFT)
50
51/* return an ascii version of a ucs2 character */
52#define UCS2_TO_CHAR(c) (((c) >> UCS2_SHIFT) & 0xff)
53
54/* Copy into a smb_ucs2_t from a possibly unaligned buffer. Return the copied smb_ucs2_t */
55#define COPY_UCS2_CHAR(dest,src) (((unsigned char *)(dest))[0] = ((unsigned char *)(src))[0],\
56 ((unsigned char *)(dest))[1] = ((unsigned char *)(src))[1], (dest))
57
58
59
60/*
61 * for each charset we have a function that pulls from that charset to
62 * a ucs2 buffer, and a function that pushes to a ucs2 buffer
63 * */
64
65struct charset_functions {
66 const char *name;
67 size_t (*pull)(void *, const char **inbuf, size_t *inbytesleft,
68 char **outbuf, size_t *outbytesleft);
69 size_t (*push)(void *, const char **inbuf, size_t *inbytesleft,
70 char **outbuf, size_t *outbytesleft);
71 struct charset_functions *prev, *next;
72};
73
74/* this type is used for manipulating unicode codepoints */
75typedef uint32_t codepoint_t;
76
77#define INVALID_CODEPOINT ((codepoint_t)-1)
78
79/*
80 * This is auxiliary struct used by source/script/gen-8-bit-gap.sh script
81 * during generation of an encoding table for charset module
82 * */
83
84struct charset_gap_table {
85 uint16_t start;
86 uint16_t end;
87 int32_t idx;
88};
89
90
91/* generic iconv conversion structure */
92typedef struct smb_iconv_s {
93 size_t (*direct)(void *cd, const char **inbuf, size_t *inbytesleft,
94 char **outbuf, size_t *outbytesleft);
95 size_t (*pull)(void *cd, const char **inbuf, size_t *inbytesleft,
96 char **outbuf, size_t *outbytesleft);
97 size_t (*push)(void *cd, const char **inbuf, size_t *inbytesleft,
98 char **outbuf, size_t *outbytesleft);
99 void *cd_direct, *cd_pull, *cd_push;
100 char *from_name, *to_name;
101} *smb_iconv_t;
102
103/* string manipulation flags */
104#define STR_TERMINATE 1
105#define STR_UPPER 2
106#define STR_ASCII 4
107#define STR_UNICODE 8
108#define STR_NOALIGN 16
109#define STR_NO_RANGE_CHECK 32
110#define STR_LEN8BIT 64
111#define STR_TERMINATE_ASCII 128 /* only terminate if ascii */
112#define STR_LEN_NOTERM 256 /* the length field is the unterminated length */
113
114struct loadparm_context;
115struct smb_iconv_convenience;
116
117/* replace some string functions with multi-byte
118 versions */
119#define strlower(s) strlower_m(s)
120#define strupper(s) strupper_m(s)
121
122char *strchr_m(const char *s, char c);
123size_t strlen_m_ext(const char *s, charset_t src_charset, charset_t dst_charset);
124size_t strlen_m_ext_term(const char *s, charset_t src_charset,
125 charset_t dst_charset);
126size_t strlen_m_term(const char *s);
127size_t strlen_m_term_null(const char *s);
128size_t strlen_m(const char *s);
129char *alpha_strcpy(char *dest, const char *src, const char *other_safe_chars, size_t maxlength);
130void string_replace_m(char *s, char oldc, char newc);
131bool strcsequal_m(const char *s1,const char *s2);
132bool strequal_m(const char *s1, const char *s2);
133int strncasecmp_m(const char *s1, const char *s2, size_t n);
134bool next_token(const char **ptr,char *buff, const char *sep, size_t bufsize);
135int strcasecmp_m(const char *s1, const char *s2);
136size_t count_chars_m(const char *s, char c);
137void strupper_m(char *s);
138void strlower_m(char *s);
139char *strupper_talloc(TALLOC_CTX *ctx, const char *src);
140char *talloc_strdup_upper(TALLOC_CTX *ctx, const char *src);
141char *strupper_talloc_n(TALLOC_CTX *ctx, const char *src, size_t n);
142char *strlower_talloc(TALLOC_CTX *ctx, const char *src);
143bool strhasupper(const char *string);
144bool strhaslower(const char *string);
145char *strrchr_m(const char *s, char c);
146char *strchr_m(const char *s, char c);
147
148bool push_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
149bool push_ucs2_talloc(TALLOC_CTX *ctx, smb_ucs2_t **dest, const char *src, size_t *converted_size);
150bool push_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
151bool pull_ascii_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
152bool pull_ucs2_talloc(TALLOC_CTX *ctx, char **dest, const smb_ucs2_t *src, size_t *converted_size);
153bool pull_utf8_talloc(TALLOC_CTX *ctx, char **dest, const char *src, size_t *converted_size);
154ssize_t push_string(void *dest, const char *src, size_t dest_len, int flags);
155ssize_t pull_string(char *dest, const void *src, size_t dest_len, size_t src_len, int flags);
156
157bool convert_string_talloc(TALLOC_CTX *ctx,
158 charset_t from, charset_t to,
159 void const *src, size_t srclen,
160 void *dest, size_t *converted_size,
161 bool allow_badcharcnv);
162
163size_t convert_string(charset_t from, charset_t to,
164 void const *src, size_t srclen,
165 void *dest, size_t destlen, bool allow_badcharcnv);
166
167ssize_t iconv_talloc(TALLOC_CTX *mem_ctx,
168 smb_iconv_t cd,
169 void const *src, size_t srclen,
170 void *dest);
171
172extern struct smb_iconv_convenience *global_iconv_convenience;
173struct smb_iconv_convenience *get_iconv_convenience(void);
174smb_iconv_t get_conv_handle(struct smb_iconv_convenience *ic,
175 charset_t from, charset_t to);
176const char *charset_name(struct smb_iconv_convenience *ic, charset_t ch);
177
178codepoint_t next_codepoint_ext(const char *str, charset_t src_charset,
179 size_t *size);
180codepoint_t next_codepoint(const char *str, size_t *size);
181ssize_t push_codepoint(char *str, codepoint_t c);
182
183/* codepoints */
184codepoint_t next_codepoint_convenience_ext(struct smb_iconv_convenience *ic,
185 const char *str, charset_t src_charset,
186 size_t *size);
187codepoint_t next_codepoint_convenience(struct smb_iconv_convenience *ic,
188 const char *str, size_t *size);
189ssize_t push_codepoint_convenience(struct smb_iconv_convenience *ic,
190 char *str, codepoint_t c);
191
192codepoint_t toupper_m(codepoint_t val);
193codepoint_t tolower_m(codepoint_t val);
194bool islower_m(codepoint_t val);
195bool isupper_m(codepoint_t val);
196int codepoint_cmpi(codepoint_t c1, codepoint_t c2);
197
198/* Iconv convenience functions */
199struct smb_iconv_convenience *smb_iconv_convenience_reinit(TALLOC_CTX *mem_ctx,
200 const char *dos_charset,
201 const char *unix_charset,
202 const char *display_charset,
203 bool native_iconv,
204 struct smb_iconv_convenience *old_ic);
205
206bool convert_string_convenience(struct smb_iconv_convenience *ic,
207 charset_t from, charset_t to,
208 void const *src, size_t srclen,
209 void *dest, size_t destlen, size_t *converted_size,
210 bool allow_badcharcnv);
211bool convert_string_talloc_convenience(TALLOC_CTX *ctx,
212 struct smb_iconv_convenience *ic,
213 charset_t from, charset_t to,
214 void const *src, size_t srclen,
215 void *dest, size_t *converted_size, bool allow_badcharcnv);
216/* iconv */
217smb_iconv_t smb_iconv_open(const char *tocode, const char *fromcode);
218int smb_iconv_close(smb_iconv_t cd);
219size_t smb_iconv(smb_iconv_t cd,
220 const char **inbuf, size_t *inbytesleft,
221 char **outbuf, size_t *outbytesleft);
222smb_iconv_t smb_iconv_open_ex(TALLOC_CTX *mem_ctx, const char *tocode,
223 const char *fromcode, bool native_iconv);
224
225void load_case_tables(void);
226void load_case_tables_library(void);
227bool smb_register_charset(const struct charset_functions *funcs_in);
228
229/*
230 * Define stub for charset module which implements 8-bit encoding with gaps.
231 * Encoding tables for such module should be produced from glibc's CHARMAPs
232 * using script source/script/gen-8bit-gap.sh
233 * CHARSETNAME is CAPITALIZED charset name
234 *
235 * */
236#define SMB_GENERATE_CHARSET_MODULE_8_BIT_GAP(CHARSETNAME) \
237static size_t CHARSETNAME ## _push(void *cd, const char **inbuf, size_t *inbytesleft, \
238 char **outbuf, size_t *outbytesleft) \
239{ \
240 while (*inbytesleft >= 2 && *outbytesleft >= 1) { \
241 int i; \
242 int done = 0; \
243 \
244 uint16 ch = SVAL(*inbuf,0); \
245 \
246 for (i=0; from_idx[i].start != 0xffff; i++) { \
247 if ((from_idx[i].start <= ch) && (from_idx[i].end >= ch)) { \
248 ((unsigned char*)(*outbuf))[0] = from_ucs2[from_idx[i].idx+ch]; \
249 (*inbytesleft) -= 2; \
250 (*outbytesleft) -= 1; \
251 (*inbuf) += 2; \
252 (*outbuf) += 1; \
253 done = 1; \
254 break; \
255 } \
256 } \
257 if (!done) { \
258 errno = EINVAL; \
259 return -1; \
260 } \
261 \
262 } \
263 \
264 if (*inbytesleft == 1) { \
265 errno = EINVAL; \
266 return -1; \
267 } \
268 \
269 if (*inbytesleft > 1) { \
270 errno = E2BIG; \
271 return -1; \
272 } \
273 \
274 return 0; \
275} \
276 \
277static size_t CHARSETNAME ## _pull(void *cd, const char **inbuf, size_t *inbytesleft, \
278 char **outbuf, size_t *outbytesleft) \
279{ \
280 while (*inbytesleft >= 1 && *outbytesleft >= 2) { \
281 SSVAL(*outbuf, 0, to_ucs2[((unsigned char*)(*inbuf))[0]]); \
282 (*inbytesleft) -= 1; \
283 (*outbytesleft) -= 2; \
284 (*inbuf) += 1; \
285 (*outbuf) += 2; \
286 } \
287 \
288 if (*inbytesleft > 0) { \
289 errno = E2BIG; \
290 return -1; \
291 } \
292 \
293 return 0; \
294} \
295 \
296struct charset_functions CHARSETNAME ## _functions = \
297 {#CHARSETNAME, CHARSETNAME ## _pull, CHARSETNAME ## _push}; \
298 \
299NTSTATUS charset_ ## CHARSETNAME ## _init(void); \
300NTSTATUS charset_ ## CHARSETNAME ## _init(void) \
301{ \
302 if (!smb_register_charset(& CHARSETNAME ## _functions)) { \
303 return NT_STATUS_INTERNAL_ERROR; \
304 } \
305 return NT_STATUS_OK; \
306} \
307
308
309#endif /* __CHARSET_H__ */
Note: See TracBrowser for help on using the repository browser.