source: branches/samba-3.0/source/modules/charset_macosxfs.c

Last change on this file was 1, checked in by Paul Smedley, 18 years ago

Initial code import

File size: 16.0 KB
Line 
1/*
2 Unix SMB/CIFS implementation.
3 Samba charset module for Mac OS X/Darwin
4 Copyright (C) Benjamin Riefenstahl 2003
5
6 This program is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2 of the License, or
9 (at your option) any later version.
10
11 This program is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with this program; if not, write to the Free Software
18 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19*/
20
21/*
22 * modules/charset_macosxfs.c
23 *
24 * A Samba charset module to use on Mac OS X/Darwin as the filesystem
25 * and display encoding.
26 *
27 * Actually two implementations are provided here. The default
28 * implementation is based on the official CFString API. The other is
29 * based on internal CFString APIs as defined in the OpenDarwin
30 * source.
31 */
32
33#include "includes.h"
34
35/*
36 * Include OS frameworks. These are only needed in this module.
37 */
38#include <CoreFoundation/CFString.h>
39
40/*
41 * See if autoconf has found us the internal headers in some form.
42 */
43#if HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H
44# include <Corefoundation/CFStringEncodingConverter.h>
45# include <Corefoundation/CFUnicodePrecomposition.h>
46# define USE_INTERNAL_API 1
47#elif HAVE_CFSTRINGENCODINGCONVERTER_H
48# include <CFStringEncodingConverter.h>
49# include <CFUnicodePrecomposition.h>
50# define USE_INTERNAL_API 1
51#endif
52
53/*
54 * Compile time configuration: Do we want debug output?
55 */
56/* #define DEBUG_STRINGS 1 */
57
58/*
59 * A simple, but efficient memory provider for our buffers.
60 */
61static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize)
62{
63 if (newsize > *size) {
64 *size = newsize + 128;
65 buffer = SMB_REALLOC(buffer, *size);
66 }
67 return buffer;
68}
69
70/*
71 * While there is a version of OpenDarwin for intel, the usual case is
72 * big-endian PPC. So we need byte swapping to handle the
73 * little-endian byte order of the network protocol. We also need an
74 * additional dynamic buffer to do this work for incoming data blocks,
75 * because we have to consider the original data as constant.
76 *
77 * We abstract the differences away by providing a simple facade with
78 * these functions/macros:
79 *
80 * le_to_native(dst,src,len)
81 * native_to_le(cp,len)
82 * set_ucbuffer_with_le(buffer,bufsize,data,size)
83 * set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
84 */
85#ifdef WORDS_BIGENDIAN
86
87static inline void swap_bytes (char * dst, const char * src, size_t len)
88{
89 const char *srcend = src + len;
90 while (src < srcend) {
91 dst[0] = src[1];
92 dst[1] = src[0];
93 dst += 2;
94 src += 2;
95 }
96}
97static inline void swap_bytes_inplace (char * cp, size_t len)
98{
99 char temp;
100 char *end = cp + len;
101 while (cp < end) {
102 temp = cp[1];
103 cp[1] = cp[0];
104 cp[0] = temp;
105 cp += 2;
106 }
107}
108
109#define le_to_native(dst,src,len) swap_bytes(dst,src,len)
110#define native_to_le(cp,len) swap_bytes_inplace(cp,len)
111#define set_ucbuffer_with_le(buffer,bufsize,data,size) \
112 set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)
113
114#else /* ! WORDS_BIGENDIAN */
115
116#define le_to_native(dst,src,len) memcpy(dst,src,len)
117#define native_to_le(cp,len) /* nothing */
118#define set_ucbuffer_with_le(buffer,bufsize,data,size) \
119 (((void)(bufsize)),(UniChar*)(data))
120
121#endif
122
123static inline UniChar *set_ucbuffer_with_le_copy (
124 UniChar *buffer, size_t *bufsize,
125 const void *data, size_t size, size_t reserve)
126{
127 buffer = resize_buffer(buffer, bufsize, size+reserve);
128 le_to_native((char*)buffer,data,size);
129 return buffer;
130}
131
132
133/*
134 * A simple hexdump function for debugging error conditions.
135 */
136#define debug_out(s) DEBUG(0,(s))
137
138#ifdef DEBUG_STRINGS
139
140static void hexdump( const char * label, const char * s, size_t len )
141{
142 size_t restlen = len;
143 debug_out("<<<<<<<\n");
144 debug_out(label);
145 debug_out("\n");
146 while (restlen > 0) {
147 char line[100];
148 size_t i, j;
149 char * d = line;
150#undef sprintf
151 d += sprintf(d, "%04X ", (unsigned)(len-restlen));
152 *d++ = ' ';
153 for( i = 0; i<restlen && i<8; ++i ) {
154 d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
155 }
156 for( j = i; j<8; ++j ) {
157 d += sprintf(d, " ");
158 }
159 *d++ = ' ';
160 for( i = 8; i<restlen && i<16; ++i ) {
161 d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
162 }
163 for( j = i; j<16; ++j ) {
164 d += sprintf(d, " ");
165 }
166 *d++ = ' ';
167 for( i = 0; i<restlen && i<16; ++i ) {
168 if(s[i] < ' ' || s[i] >= 0x7F || !isprint(s[i]))
169 *d++ = '.';
170 else
171 *d++ = s[i];
172 }
173 *d++ = '\n';
174 *d = 0;
175 restlen -= i;
176 s += i;
177 debug_out(line);
178 }
179 debug_out(">>>>>>>\n");
180}
181
182#else /* !DEBUG_STRINGS */
183
184#define hexdump(label,s,len) /* nothing */
185
186#endif
187
188
189#if !USE_INTERNAL_API
190
191/*
192 * An implementation based on documented Mac OS X APIs.
193 *
194 * This does a certain amount of memory management, creating and
195 * manipulating CFString objects. We try to minimize the impact by
196 * keeping those objects around and re-using them. We also use
197 * external backing store for the CFStrings where this is possible and
198 * benficial.
199 *
200 * The Unicode normalizations forms available at this level are
201 * generic, not specifically for the file system. So they may not be
202 * perfect fits.
203 */
204static size_t macosxfs_encoding_pull(
205 void *cd, /* Encoder handle */
206 char **inbuf, size_t *inbytesleft, /* Script string */
207 char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */
208{
209 static const int script_code = kCFStringEncodingUTF8;
210 static CFMutableStringRef cfstring = NULL;
211 size_t outsize;
212 CFRange range;
213
214 (void) cd; /* UNUSED */
215
216 if (0 == *inbytesleft) {
217 return 0;
218 }
219
220 if (NULL == cfstring) {
221 /*
222 * A version with an external backing store as in the
223 * push function should have been more efficient, but
224 * testing shows, that it is actually slower (!).
225 * Maybe kCFAllocatorDefault gets shortcut evaluation
226 * internally, while kCFAllocatorNull doesn't.
227 */
228 cfstring = CFStringCreateMutable(kCFAllocatorDefault,0);
229 }
230
231 /*
232 * Three methods of appending to a CFString, choose the most
233 * efficient.
234 */
235 if (0 == (*inbuf)[*inbytesleft-1]) {
236 CFStringAppendCString(cfstring, *inbuf, script_code);
237 } else if (*inbytesleft <= 255) {
238 Str255 buffer;
239 buffer[0] = *inbytesleft;
240 memcpy(buffer+1, *inbuf, buffer[0]);
241 CFStringAppendPascalString(cfstring, buffer, script_code);
242 } else {
243 /*
244 * We would like to use a fixed buffer and a loop
245 * here, but than we can't garantee that the input is
246 * well-formed UTF-8, as we are supposed to do.
247 */
248 static char *buffer = NULL;
249 static size_t buflen = 0;
250 buffer = resize_buffer(buffer, &buflen, *inbytesleft+1);
251 memcpy(buffer, *inbuf, *inbytesleft);
252 buffer[*inbytesleft] = 0;
253 CFStringAppendCString(cfstring, *inbuf, script_code);
254 }
255
256 /*
257 * Compose characters, using the non-canonical composition
258 * form.
259 */
260 CFStringNormalize(cfstring, kCFStringNormalizationFormC);
261
262 outsize = CFStringGetLength(cfstring);
263 range = CFRangeMake(0,outsize);
264
265 if (outsize == 0) {
266 /*
267 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
268 * errors here. That function will always pass 2
269 * characters. smbd/open.c:check_for_pipe() cuts a
270 * patchname to 10 characters blindly. Suppress the
271 * debug output in those cases.
272 */
273 if(2 != *inbytesleft && 10 != *inbytesleft) {
274 debug_out("String conversion: "
275 "An unknown error occurred\n");
276 hexdump("UTF8->UTF16LE (old) input",
277 *inbuf, *inbytesleft);
278 }
279 errno = EILSEQ; /* Not sure, but this is what we have
280 * actually seen. */
281 return -1;
282 }
283 if (outsize*2 > *outbytesleft) {
284 CFStringDelete(cfstring, range);
285 debug_out("String conversion: "
286 "Output buffer too small\n");
287 hexdump("UTF8->UTF16LE (old) input",
288 *inbuf, *inbytesleft);
289 errno = E2BIG;
290 return -1;
291 }
292
293 CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf);
294 CFStringDelete(cfstring, range);
295
296 native_to_le(*outbuf, outsize*2);
297
298 /*
299 * Add a converted null byte, if the CFString conversions
300 * prevented that until now.
301 */
302 if (0 == (*inbuf)[*inbytesleft-1] &&
303 (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) {
304
305 if ((outsize*2+2) > *outbytesleft) {
306 debug_out("String conversion: "
307 "Output buffer too small\n");
308 hexdump("UTF8->UTF16LE (old) input",
309 *inbuf, *inbytesleft);
310 errno = E2BIG;
311 return -1;
312 }
313
314 (*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0;
315 outsize += 2;
316 }
317
318 *inbuf += *inbytesleft;
319 *inbytesleft = 0;
320 *outbuf += outsize*2;
321 *outbytesleft -= outsize*2;
322
323 return 0;
324}
325
326static size_t macosxfs_encoding_push(
327 void *cd, /* Encoder handle */
328 char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
329 char **outbuf, size_t *outbytesleft) /* Script string */
330{
331 static const int script_code = kCFStringEncodingUTF8;
332 static CFMutableStringRef cfstring = NULL;
333 static UniChar *buffer = NULL;
334 static size_t buflen = 0;
335 CFIndex outsize, cfsize, charsconverted;
336
337 (void) cd; /* UNUSED */
338
339 if (0 == *inbytesleft) {
340 return 0;
341 }
342
343 /*
344 * We need a buffer that can hold 4 times the original data,
345 * because that is the theoretical maximum that decomposition
346 * can create currently (in Unicode 4.0).
347 */
348 buffer = set_ucbuffer_with_le_copy(
349 buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft);
350
351 if (NULL == cfstring) {
352 cfstring = CFStringCreateMutableWithExternalCharactersNoCopy(
353 kCFAllocatorDefault,
354 buffer, *inbytesleft/2, buflen/2,
355 kCFAllocatorNull);
356 } else {
357 CFStringSetExternalCharactersNoCopy(
358 cfstring,
359 buffer, *inbytesleft/2, buflen/2);
360 }
361
362 /*
363 * Decompose characters, using the non-canonical decomposition
364 * form.
365 *
366 * NB: This isn't exactly what HFS+ wants (see note on
367 * kCFStringEncodingUseHFSPlusCanonical in
368 * CFStringEncodingConverter.h), but AFAIK it's the best that
369 * the official API can do.
370 */
371 CFStringNormalize(cfstring, kCFStringNormalizationFormD);
372
373 cfsize = CFStringGetLength(cfstring);
374 charsconverted = CFStringGetBytes(
375 cfstring, CFRangeMake(0,cfsize),
376 script_code, 0, False,
377 *outbuf, *outbytesleft, &outsize);
378
379 if (0 == charsconverted) {
380 debug_out("String conversion: "
381 "Buffer too small or not convertable\n");
382 hexdump("UTF16LE->UTF8 (old) input",
383 *inbuf, *inbytesleft);
384 errno = EILSEQ; /* Probably more likely. */
385 return -1;
386 }
387
388 /*
389 * Add a converted null byte, if the CFString conversions
390 * prevented that until now.
391 */
392 if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] &&
393 (0 != (*outbuf)[outsize-1])) {
394
395 if (((size_t)outsize+1) > *outbytesleft) {
396 debug_out("String conversion: "
397 "Output buffer too small\n");
398 hexdump("UTF16LE->UTF8 (old) input",
399 *inbuf, *inbytesleft);
400 errno = E2BIG;
401 return -1;
402 }
403
404 (*outbuf)[outsize] = 0;
405 ++outsize;
406 }
407
408 *inbuf += *inbytesleft;
409 *inbytesleft = 0;
410 *outbuf += outsize;
411 *outbytesleft -= outsize;
412
413 return 0;
414}
415
416#else /* USE_INTERNAL_API */
417
418/*
419 * An implementation based on internal code as known from the
420 * OpenDarwin CVS.
421 *
422 * This code doesn't need much memory management because it uses
423 * functions that operate on the raw memory directly.
424 *
425 * The push routine here is faster and more compatible with HFS+ than
426 * the other implementation above. The pull routine is only faster
427 * for some strings, slightly slower for others. The pull routine
428 * looses because it has to iterate over the data twice, once to
429 * decode UTF-8 and than to do the character composition required by
430 * Windows.
431 */
432static size_t macosxfs_encoding_pull(
433 void *cd, /* Encoder handle */
434 char **inbuf, size_t *inbytesleft, /* Script string */
435 char **outbuf, size_t *outbytesleft) /* UTF-16-LE string */
436{
437 static const int script_code = kCFStringEncodingUTF8;
438 UInt32 srcCharsUsed = 0;
439 UInt32 dstCharsUsed = 0;
440 UInt32 result;
441 uint32_t dstDecomposedUsed = 0;
442 uint32_t dstPrecomposedUsed = 0;
443
444 (void) cd; /* UNUSED */
445
446 if (0 == *inbytesleft) {
447 return 0;
448 }
449
450 result = CFStringEncodingBytesToUnicode(
451 script_code, kCFStringEncodingComposeCombinings,
452 *inbuf, *inbytesleft, &srcCharsUsed,
453 (UniChar*)*outbuf, *outbytesleft, &dstCharsUsed);
454
455 switch(result) {
456 case kCFStringEncodingConversionSuccess:
457 if (*inbytesleft == srcCharsUsed)
458 break;
459 else
460 ; /*fall through*/
461 case kCFStringEncodingInsufficientOutputBufferLength:
462 debug_out("String conversion: "
463 "Output buffer too small\n");
464 hexdump("UTF8->UTF16LE (new) input",
465 *inbuf, *inbytesleft);
466 errno = E2BIG;
467 return -1;
468 case kCFStringEncodingInvalidInputStream:
469 /*
470 * HACK: smbd/mangle_hash2.c:is_legal_name() expects
471 * errors here. That function will always pass 2
472 * characters. smbd/open.c:check_for_pipe() cuts a
473 * patchname to 10 characters blindly. Suppress the
474 * debug output in those cases.
475 */
476 if(2 != *inbytesleft && 10 != *inbytesleft) {
477 debug_out("String conversion: "
478 "Invalid input sequence\n");
479 hexdump("UTF8->UTF16LE (new) input",
480 *inbuf, *inbytesleft);
481 }
482 errno = EILSEQ;
483 return -1;
484 case kCFStringEncodingConverterUnavailable:
485 debug_out("String conversion: "
486 "Unknown encoding\n");
487 hexdump("UTF8->UTF16LE (new) input",
488 *inbuf, *inbytesleft);
489 errno = EINVAL;
490 return -1;
491 }
492
493 /*
494 * It doesn't look like CFStringEncodingBytesToUnicode() can
495 * produce precomposed characters (flags=ComposeCombinings
496 * doesn't do it), so we need another pass over the data here.
497 * We can do this in-place, as the string can only get
498 * shorter.
499 *
500 * (Actually in theory there should be an internal
501 * decomposition and reordering before the actual composition
502 * step. But we should be able to rely on that we always get
503 * fully decomposed strings for input, so this can't create
504 * problems in reality.)
505 */
506 CFUniCharPrecompose(
507 (const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed,
508 (UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed);
509
510 native_to_le(*outbuf, dstPrecomposedUsed*2);
511
512 *inbuf += srcCharsUsed;
513 *inbytesleft -= srcCharsUsed;
514 *outbuf += dstPrecomposedUsed*2;
515 *outbytesleft -= dstPrecomposedUsed*2;
516
517 return 0;
518}
519
520static size_t macosxfs_encoding_push(
521 void *cd, /* Encoder handle */
522 char **inbuf, size_t *inbytesleft, /* UTF-16-LE string */
523 char **outbuf, size_t *outbytesleft) /* Script string */
524{
525 static const int script_code = kCFStringEncodingUTF8;
526 static UniChar *buffer = NULL;
527 static size_t buflen = 0;
528 UInt32 srcCharsUsed=0, dstCharsUsed=0, result;
529
530 (void) cd; /* UNUSED */
531
532 if (0 == *inbytesleft) {
533 return 0;
534 }
535
536 buffer = set_ucbuffer_with_le(
537 buffer, &buflen, *inbuf, *inbytesleft);
538
539 result = CFStringEncodingUnicodeToBytes(
540 script_code, kCFStringEncodingUseHFSPlusCanonical,
541 buffer, *inbytesleft/2, &srcCharsUsed,
542 *outbuf, *outbytesleft, &dstCharsUsed);
543
544 switch(result) {
545 case kCFStringEncodingConversionSuccess:
546 if (*inbytesleft/2 == srcCharsUsed)
547 break;
548 else
549 ; /*fall through*/
550 case kCFStringEncodingInsufficientOutputBufferLength:
551 debug_out("String conversion: "
552 "Output buffer too small\n");
553 hexdump("UTF16LE->UTF8 (new) input",
554 *inbuf, *inbytesleft);
555 errno = E2BIG;
556 return -1;
557 case kCFStringEncodingInvalidInputStream:
558 /*
559 * HACK: smbd/open.c:check_for_pipe():is_legal_name()
560 * cuts a pathname to 10 characters blindly. Suppress
561 * the debug output in those cases.
562 */
563 if(10 != *inbytesleft) {
564 debug_out("String conversion: "
565 "Invalid input sequence\n");
566 hexdump("UTF16LE->UTF8 (new) input",
567 *inbuf, *inbytesleft);
568 }
569 errno = EILSEQ;
570 return -1;
571 case kCFStringEncodingConverterUnavailable:
572 debug_out("String conversion: "
573 "Unknown encoding\n");
574 hexdump("UTF16LE->UTF8 (new) input",
575 *inbuf, *inbytesleft);
576 errno = EINVAL;
577 return -1;
578 }
579
580 *inbuf += srcCharsUsed*2;
581 *inbytesleft -= srcCharsUsed*2;
582 *outbuf += dstCharsUsed;
583 *outbytesleft -= dstCharsUsed;
584
585 return 0;
586}
587
588#endif /* USE_INTERNAL_API */
589
590/*
591 * For initialization, actually install the encoding as "macosxfs".
592 */
593static struct charset_functions macosxfs_encoding_functions = {
594 "MACOSXFS", macosxfs_encoding_pull, macosxfs_encoding_push
595};
596
597NTSTATUS charset_macosxfs_init(void)
598{
599 return smb_register_charset(&macosxfs_encoding_functions);
600}
601
602/* eof */
Note: See TracBrowser for help on using the repository browser.