source: vendor/3.6.23/source3/modules/charset_macosxfs.c

Last change on this file was 740, checked in by Silvan Scherrer, 13 years ago

Samba Server: update vendor to 3.6.0

File size: 16.6 KB
Line 
1/*
2 Unix SMB/CIFS implementation.
3 Samba charset module for Mac OS X/Darwin
4 Copyright (C) Benjamin Riefenstahl 2003
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 3 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program. If not, see <http://www.gnu.org/licenses/>.
18*/
19
20/*
21 * modules/charset_macosxfs.c
22 *
23 * A Samba charset module to use on Mac OS X/Darwin as the filesystem
24 * and display encoding.
25 *
26 * Actually two implementations are provided here. The default
27 * implementation is based on the official CFString API. The other is
28 * based on internal CFString APIs as defined in the OpenDarwin
29 * source.
30 */
31
32#include "includes.h"
33
34/*
35 * Include OS frameworks. These are only needed in this module.
36 */
37#include <CoreFoundation/CFString.h>
38
39/*
40 * See if autoconf has found us the internal headers in some form.
41 */
42#if HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H
43# include <CoreFoundation/CFStringEncodingConverter.h>
44# include <CoreFoundation/CFUnicodePrecomposition.h>
45# define USE_INTERNAL_API 1
46#elif HAVE_CFSTRINGENCODINGCONVERTER_H
47# include <CFStringEncodingConverter.h>
48# include <CFUnicodePrecomposition.h>
49# define USE_INTERNAL_API 1
50#endif
51
52/*
53 * Compile time configuration: Do we want debug output?
54 */
55/* #define DEBUG_STRINGS 1 */
56
57/*
58 * A simple, but efficient memory provider for our buffers.
59 */
60static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize)
61{
62 if (newsize > *size) {
63 *size = newsize + 128;
64 buffer = SMB_REALLOC(buffer, *size);
65 }
66 return buffer;
67}
68
69/*
70 * While there is a version of OpenDarwin for intel, the usual case is
71 * big-endian PPC. So we need byte swapping to handle the
72 * little-endian byte order of the network protocol. We also need an
73 * additional dynamic buffer to do this work for incoming data blocks,
74 * because we have to consider the original data as constant.
75 *
76 * We abstract the differences away by providing a simple facade with
77 * these functions/macros:
78 *
79 * le_to_native(dst,src,len)
80 * native_to_le(cp,len)
81 * set_ucbuffer_with_le(buffer,bufsize,data,size)
82 * set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
83 */
84#ifdef WORDS_BIGENDIAN
85
86static inline void swap_bytes (char * dst, const char * src, size_t len)
87{
88 const char *srcend = src + len;
89 while (src < srcend) {
90 dst[0] = src[1];
91 dst[1] = src[0];
92 dst += 2;
93 src += 2;
94 }
95}
96static inline void swap_bytes_inplace (char * cp, size_t len)
97{
98 char temp;
99 char *end = cp + len;
100 while (cp < end) {
101 temp = cp[1];
102 cp[1] = cp[0];
103 cp[0] = temp;
104 cp += 2;
105 }
106}
107
108#define le_to_native(dst,src,len) swap_bytes(dst,src,len)
109#define native_to_le(cp,len) swap_bytes_inplace(cp,len)
110#define set_ucbuffer_with_le(buffer,bufsize,data,size) \
111 set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)
112
113#else /* ! WORDS_BIGENDIAN */
114
115#define le_to_native(dst,src,len) memcpy(dst,src,len)
116#define native_to_le(cp,len) /* nothing */
117#define set_ucbuffer_with_le(buffer,bufsize,data,size) \
118 (((void)(bufsize)),(UniChar*)(data))
119
120#endif
121
122static inline UniChar *set_ucbuffer_with_le_copy (
123 UniChar *buffer, size_t *bufsize,
124 const void *data, size_t size, size_t reserve)
125{
126 buffer = resize_buffer(buffer, bufsize, size+reserve);
127 le_to_native((char*)buffer,data,size);
128 return buffer;
129}
130
131
132/*
133 * A simple hexdump function for debugging error conditions.
134 */
135#define debug_out(s) DEBUG(0,(s))
136
137#ifdef DEBUG_STRINGS
138
139static void hexdump( const char * label, const char * s, size_t len )
140{
141 size_t restlen = len;
142 debug_out("<<<<<<<\n");
143 debug_out(label);
144 debug_out("\n");
145 while (restlen > 0) {
146 char line[100];
147 size_t i, j;
148 char * d = line;
149#undef sprintf
150 d += sprintf(d, "%04X ", (unsigned)(len-restlen));
151 *d++ = ' ';
152 for( i = 0; i<restlen && i<8; ++i ) {
153 d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
154 }
155 for( j = i; j<8; ++j ) {
156 d += sprintf(d, " ");
157 }
158 *d++ = ' ';
159 for( i = 8; i<restlen && i<16; ++i ) {
160 d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
161 }
162 for( j = i; j<16; ++j ) {
163 d += sprintf(d, " ");
164 }
165 *d++ = ' ';
166 for( i = 0; i<restlen && i<16; ++i ) {
167 if(s[i] < ' ' || s[i] >= 0x7F || !isprint(s[i]))
168 *d++ = '.';
169 else
170 *d++ = s[i];
171 }
172 *d++ = '\n';
173 *d = 0;
174 restlen -= i;
175 s += i;
176 debug_out(line);
177 }
178 debug_out(">>>>>>>\n");
179}
180
181#else /* !DEBUG_STRINGS */
182
183#define hexdump(label,s,len) /* nothing */
184
185#endif
186
187
188#if !USE_INTERNAL_API
189
190/*
191 * An implementation based on documented Mac OS X APIs.
192 *
193 * This does a certain amount of memory management, creating and
194 * manipulating CFString objects. We try to minimize the impact by
195 * keeping those objects around and re-using them. We also use
196 * external backing store for the CFStrings where this is possible and
197 * benficial.
198 *
199 * The Unicode normalizations forms available at this level are
200 * generic, not specifically for the file system. So they may not be
201 * perfect fits.
202 */
203static size_t macosxfs_encoding_pull(
204 void *cd, /* Encoder handle */
205 char **inbuf, size_t *inbytesleft, /* Script string */
206 char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */
207{
208 static const int script_code = kCFStringEncodingUTF8;
209 static CFMutableStringRef cfstring = NULL;
210 size_t outsize;
211 CFRange range;
212
213 (void) cd; /* UNUSED */
214
215 if (0 == *inbytesleft) {
216 return 0;
217 }
218
219 if (NULL == cfstring) {
220 /*
221 * A version with an external backing store as in the
222 * push function should have been more efficient, but
223 * testing shows, that it is actually slower (!).
224 * Maybe kCFAllocatorDefault gets shortcut evaluation
225 * internally, while kCFAllocatorNull doesn't.
226 */
227 cfstring = CFStringCreateMutable(kCFAllocatorDefault,0);
228 }
229
230 /*
231 * Three methods of appending to a CFString, choose the most
232 * efficient.
233 */
234 if (0 == (*inbuf)[*inbytesleft-1]) {
235 CFStringAppendCString(cfstring, *inbuf, script_code);
236 } else if (*inbytesleft <= 255) {
237 Str255 buffer;
238 buffer[0] = *inbytesleft;
239 memcpy(buffer+1, *inbuf, buffer[0]);
240 CFStringAppendPascalString(cfstring, buffer, script_code);
241 } else {
242 /*
243 * We would like to use a fixed buffer and a loop
244 * here, but than we can't garantee that the input is
245 * well-formed UTF-8, as we are supposed to do.
246 */
247 static char *buffer = NULL;
248 static size_t buflen = 0;
249 buffer = resize_buffer(buffer, &buflen, *inbytesleft+1);
250 memcpy(buffer, *inbuf, *inbytesleft);
251 buffer[*inbytesleft] = 0;
252 CFStringAppendCString(cfstring, *inbuf, script_code);
253 }
254
255 /*
256 * Compose characters, using the non-canonical composition
257 * form.
258 */
259 CFStringNormalize(cfstring, kCFStringNormalizationFormC);
260
261 outsize = CFStringGetLength(cfstring);
262 range = CFRangeMake(0,outsize);
263
264 if (outsize == 0) {
265 /*
266 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
267 * errors here. That function will always pass 2
268 * characters. smbd/open.c:check_for_pipe() cuts a
269 * patchname to 10 characters blindly. Suppress the
270 * debug output in those cases.
271 */
272 if(2 != *inbytesleft && 10 != *inbytesleft) {
273 debug_out("String conversion: "
274 "An unknown error occurred\n");
275 hexdump("UTF8->UTF16LE (old) input",
276 *inbuf, *inbytesleft);
277 }
278 errno = EILSEQ; /* Not sure, but this is what we have
279 * actually seen. */
280 return -1;
281 }
282 if (outsize*2 > *outbytesleft) {
283 CFStringDelete(cfstring, range);
284 debug_out("String conversion: "
285 "Output buffer too small\n");
286 hexdump("UTF8->UTF16LE (old) input",
287 *inbuf, *inbytesleft);
288 errno = E2BIG;
289 return -1;
290 }
291
292 CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf);
293 CFStringDelete(cfstring, range);
294
295 native_to_le(*outbuf, outsize*2);
296
297 /*
298 * Add a converted null byte, if the CFString conversions
299 * prevented that until now.
300 */
301 if (0 == (*inbuf)[*inbytesleft-1] &&
302 (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) {
303
304 if ((outsize*2+2) > *outbytesleft) {
305 debug_out("String conversion: "
306 "Output buffer too small\n");
307 hexdump("UTF8->UTF16LE (old) input",
308 *inbuf, *inbytesleft);
309 errno = E2BIG;
310 return -1;
311 }
312
313 (*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0;
314 outsize += 2;
315 }
316
317 *inbuf += *inbytesleft;
318 *inbytesleft = 0;
319 *outbuf += outsize*2;
320 *outbytesleft -= outsize*2;
321
322 return 0;
323}
324
325static size_t macosxfs_encoding_push(
326 void *cd, /* Encoder handle */
327 char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
328 char **outbuf, size_t *outbytesleft) /* Script string */
329{
330 static const int script_code = kCFStringEncodingUTF8;
331 static CFMutableStringRef cfstring = NULL;
332 static UniChar *buffer = NULL;
333 static size_t buflen = 0;
334 CFIndex outsize, cfsize, charsconverted;
335
336 (void) cd; /* UNUSED */
337
338 if (0 == *inbytesleft) {
339 return 0;
340 }
341
342 /*
343 * We need a buffer that can hold 4 times the original data,
344 * because that is the theoretical maximum that decomposition
345 * can create currently (in Unicode 4.0).
346 */
347 buffer = set_ucbuffer_with_le_copy(
348 buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft);
349
350 if (NULL == cfstring) {
351 cfstring = CFStringCreateMutableWithExternalCharactersNoCopy(
352 kCFAllocatorDefault,
353 buffer, *inbytesleft/2, buflen/2,
354 kCFAllocatorNull);
355 } else {
356 CFStringSetExternalCharactersNoCopy(
357 cfstring,
358 buffer, *inbytesleft/2, buflen/2);
359 }
360
361 /*
362 * Decompose characters, using the non-canonical decomposition
363 * form.
364 *
365 * NB: This isn't exactly what HFS+ wants (see note on
366 * kCFStringEncodingUseHFSPlusCanonical in
367 * CFStringEncodingConverter.h), but AFAIK it's the best that
368 * the official API can do.
369 */
370 CFStringNormalize(cfstring, kCFStringNormalizationFormD);
371
372 cfsize = CFStringGetLength(cfstring);
373 charsconverted = CFStringGetBytes(
374 cfstring, CFRangeMake(0,cfsize),
375 script_code, 0, False,
376 *outbuf, *outbytesleft, &outsize);
377
378 if (0 == charsconverted) {
379 debug_out("String conversion: "
380 "Buffer too small or not convertable\n");
381 hexdump("UTF16LE->UTF8 (old) input",
382 *inbuf, *inbytesleft);
383 errno = EILSEQ; /* Probably more likely. */
384 return -1;
385 }
386
387 /*
388 * Add a converted null byte, if the CFString conversions
389 * prevented that until now.
390 */
391 if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] &&
392 (0 != (*outbuf)[outsize-1])) {
393
394 if (((size_t)outsize+1) > *outbytesleft) {
395 debug_out("String conversion: "
396 "Output buffer too small\n");
397 hexdump("UTF16LE->UTF8 (old) input",
398 *inbuf, *inbytesleft);
399 errno = E2BIG;
400 return -1;
401 }
402
403 (*outbuf)[outsize] = 0;
404 ++outsize;
405 }
406
407 *inbuf += *inbytesleft;
408 *inbytesleft = 0;
409 *outbuf += outsize;
410 *outbytesleft -= outsize;
411
412 return 0;
413}
414
415#else /* USE_INTERNAL_API */
416
417/*
418 * An implementation based on internal code as known from the
419 * OpenDarwin CVS.
420 *
421 * This code doesn't need much memory management because it uses
422 * functions that operate on the raw memory directly.
423 *
424 * The push routine here is faster and more compatible with HFS+ than
425 * the other implementation above. The pull routine is only faster
426 * for some strings, slightly slower for others. The pull routine
427 * looses because it has to iterate over the data twice, once to
428 * decode UTF-8 and than to do the character composition required by
429 * Windows.
430 */
431static size_t macosxfs_encoding_pull(
432 void *cd, /* Encoder handle */
433 char **inbuf, size_t *inbytesleft, /* Script string */
434 char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */
435{
436 static const int script_code = kCFStringEncodingUTF8;
437 UInt32 srcCharsUsed = 0;
438 UInt32 dstCharsUsed = 0;
439 UInt32 result;
440 uint32_t dstDecomposedUsed = 0;
441 uint32_t dstPrecomposedUsed = 0;
442
443 (void) cd; /* UNUSED */
444
445 if (0 == *inbytesleft) {
446 return 0;
447 }
448
449 result = CFStringEncodingBytesToUnicode(
450 script_code, kCFStringEncodingComposeCombinings,
451 *inbuf, *inbytesleft, &srcCharsUsed,
452 (UniChar*)*outbuf, *outbytesleft, &dstCharsUsed);
453
454 switch(result) {
455 case kCFStringEncodingConversionSuccess:
456 if (*inbytesleft == srcCharsUsed)
457 break;
458 else
459 ; /*fall through*/
460 case kCFStringEncodingInsufficientOutputBufferLength:
461 debug_out("String conversion: "
462 "Output buffer too small\n");
463 hexdump("UTF8->UTF16LE (new) input",
464 *inbuf, *inbytesleft);
465 errno = E2BIG;
466 return -1;
467 case kCFStringEncodingInvalidInputStream:
468 /*
469 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
470 * errors here. That function will always pass 2
471 * characters. smbd/open.c:check_for_pipe() cuts a
472 * patchname to 10 characters blindly. Suppress the
473 * debug output in those cases.
474 */
475 if(2 != *inbytesleft && 10 != *inbytesleft) {
476 debug_out("String conversion: "
477 "Invalid input sequence\n");
478 hexdump("UTF8->UTF16LE (new) input",
479 *inbuf, *inbytesleft);
480 }
481 errno = EILSEQ;
482 return -1;
483 case kCFStringEncodingConverterUnavailable:
484 debug_out("String conversion: "
485 "Unknown encoding\n");
486 hexdump("UTF8->UTF16LE (new) input",
487 *inbuf, *inbytesleft);
488 errno = EINVAL;
489 return -1;
490 }
491
492 /*
493 * It doesn't look like CFStringEncodingBytesToUnicode() can
494 * produce precomposed characters (flags=ComposeCombinings
495 * doesn't do it), so we need another pass over the data here.
496 * We can do this in-place, as the string can only get
497 * shorter.
498 *
499 * (Actually in theory there should be an internal
500 * decomposition and reordering before the actual composition
501 * step. But we should be able to rely on that we always get
502 * fully decomposed strings for input, so this can't create
503 * problems in reality.)
504 */
505 CFUniCharPrecompose(
506 (const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed,
507 (UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed);
508
509 native_to_le(*outbuf, dstPrecomposedUsed*2);
510
511 *inbuf += srcCharsUsed;
512 *inbytesleft -= srcCharsUsed;
513 *outbuf += dstPrecomposedUsed*2;
514 *outbytesleft -= dstPrecomposedUsed*2;
515
516 return 0;
517}
518
519static size_t macosxfs_encoding_push(
520 void *cd, /* Encoder handle */
521 char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
522 char **outbuf, size_t *outbytesleft) /* Script string */
523{
524 static const int script_code = kCFStringEncodingUTF8;
525 static UniChar *buffer = NULL;
526 static size_t buflen = 0;
527 UInt32 srcCharsUsed=0, dstCharsUsed=0, result;
528
529 (void) cd; /* UNUSED */
530
531 if (0 == *inbytesleft) {
532 return 0;
533 }
534
535 buffer = set_ucbuffer_with_le(
536 buffer, &buflen, *inbuf, *inbytesleft);
537
538 result = CFStringEncodingUnicodeToBytes(
539 script_code, kCFStringEncodingUseHFSPlusCanonical,
540 buffer, *inbytesleft/2, &srcCharsUsed,
541 *outbuf, *outbytesleft, &dstCharsUsed);
542
543 switch(result) {
544 case kCFStringEncodingConversionSuccess:
545 if (*inbytesleft/2 == srcCharsUsed)
546 break;
547 else
548 ; /*fall through*/
549 case kCFStringEncodingInsufficientOutputBufferLength:
550 debug_out("String conversion: "
551 "Output buffer too small\n");
552 hexdump("UTF16LE->UTF8 (new) input",
553 *inbuf, *inbytesleft);
554 errno = E2BIG;
555 return -1;
556 case kCFStringEncodingInvalidInputStream:
557 /*
558 * HACK: smbd/open.c:check_for_pipe():is_legal_name()
559 * cuts a pathname to 10 characters blindly. Suppress
560 * the debug output in those cases.
561 */
562 if(10 != *inbytesleft) {
563 debug_out("String conversion: "
564 "Invalid input sequence\n");
565 hexdump("UTF16LE->UTF8 (new) input",
566 *inbuf, *inbytesleft);
567 }
568 errno = EILSEQ;
569 return -1;
570 case kCFStringEncodingConverterUnavailable:
571 debug_out("String conversion: "
572 "Unknown encoding\n");
573 hexdump("UTF16LE->UTF8 (new) input",
574 *inbuf, *inbytesleft);
575 errno = EINVAL;
576 return -1;
577 }
578
579 *inbuf += srcCharsUsed*2;
580 *inbytesleft -= srcCharsUsed*2;
581 *outbuf += dstCharsUsed;
582 *outbytesleft -= dstCharsUsed;
583
584 return 0;
585}
586
587#endif /* USE_INTERNAL_API */
588
589/*
590 * For initialization, actually install the encoding as "macosxfs".
591 */
592static struct charset_functions macosxfs_encoding_functions = {
593 "MACOSXFS", macosxfs_encoding_pull, macosxfs_encoding_push
594};
595
596NTSTATUS charset_macosxfs_init(void)
597{
598 if (!smb_register_charset(&macosxfs_encoding_functions)) {
599 return NT_STATUS_INTERNAL_ERROR;
600 }
601 return NT_STATUS_OK;
602}
603
604/* eof */
Note: See TracBrowser for help on using the repository browser.