Context Navigation

charset_macosxfs.c

Visit:

Last change on this file was 1, checked in by Paul Smedley, 18 years ago
Initial code import
File size: 16.0 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3	Samba charset module for Mac OS X/Darwin
4	Copyright (C) Benjamin Riefenstahl 2003
5
6	This program is free software; you can redistribute it and/or modify
7	it under the terms of the GNU General Public License as published by
8	the Free Software Foundation; either version 2 of the License, or
9	(at your option) any later version.
10
11	This program is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	GNU General Public License for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with this program; if not, write to the Free Software
18	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
19	*/
20
21	/*
22	* modules/charset_macosxfs.c
23	*
24	* A Samba charset module to use on Mac OS X/Darwin as the filesystem
25	* and display encoding.
26	*
27	* Actually two implementations are provided here. The default
28	* implementation is based on the official CFString API. The other is
29	* based on internal CFString APIs as defined in the OpenDarwin
30	* source.
31	*/
32
33	#include "includes.h"
34
35	/*
36	* Include OS frameworks. These are only needed in this module.
37	*/
38	#include <CoreFoundation/CFString.h>
39
40	/*
41	* See if autoconf has found us the internal headers in some form.
42	*/
43	#if HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H
44	# include <Corefoundation/CFStringEncodingConverter.h>
45	# include <Corefoundation/CFUnicodePrecomposition.h>
46	# define USE_INTERNAL_API 1
47	#elif HAVE_CFSTRINGENCODINGCONVERTER_H
48	# include <CFStringEncodingConverter.h>
49	# include <CFUnicodePrecomposition.h>
50	# define USE_INTERNAL_API 1
51	#endif
52
53	/*
54	* Compile time configuration: Do we want debug output?
55	*/
56	/* #define DEBUG_STRINGS 1 */
57
58	/*
59	* A simple, but efficient memory provider for our buffers.
60	*/
61	static inline void resize_buffer (void buffer, size_t *size, size_t newsize)
62	{
63	if (newsize > *size) {
64	*size = newsize + 128;
65	buffer = SMB_REALLOC(buffer, *size);
66	}
67	return buffer;
68	}
69
70	/*
71	* While there is a version of OpenDarwin for intel, the usual case is
72	* big-endian PPC. So we need byte swapping to handle the
73	* little-endian byte order of the network protocol. We also need an
74	* additional dynamic buffer to do this work for incoming data blocks,
75	* because we have to consider the original data as constant.
76	*
77	* We abstract the differences away by providing a simple facade with
78	* these functions/macros:
79	*
80	* le_to_native(dst,src,len)
81	* native_to_le(cp,len)
82	* set_ucbuffer_with_le(buffer,bufsize,data,size)
83	* set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
84	*/
85	#ifdef WORDS_BIGENDIAN
86
87	static inline void swap_bytes (char * dst, const char * src, size_t len)
88	{
89	const char *srcend = src + len;
90	while (src < srcend) {
91	dst[0] = src[1];
92	dst[1] = src[0];
93	dst += 2;
94	src += 2;
95	}
96	}
97	static inline void swap_bytes_inplace (char * cp, size_t len)
98	{
99	char temp;
100	char *end = cp + len;
101	while (cp < end) {
102	temp = cp[1];
103	cp[1] = cp[0];
104	cp[0] = temp;
105	cp += 2;
106	}
107	}
108
109	#define le_to_native(dst,src,len) swap_bytes(dst,src,len)
110	#define native_to_le(cp,len) swap_bytes_inplace(cp,len)
111	#define set_ucbuffer_with_le(buffer,bufsize,data,size) \
112	set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)
113
114	#else /* ! WORDS_BIGENDIAN */
115
116	#define le_to_native(dst,src,len) memcpy(dst,src,len)
117	#define native_to_le(cp,len) /* nothing */
118	#define set_ucbuffer_with_le(buffer,bufsize,data,size) \
119	(((void)(bufsize)),(UniChar*)(data))
120
121	#endif
122
123	static inline UniChar *set_ucbuffer_with_le_copy (
124	UniChar buffer, size_t bufsize,
125	const void *data, size_t size, size_t reserve)
126	{
127	buffer = resize_buffer(buffer, bufsize, size+reserve);
128	le_to_native((char*)buffer,data,size);
129	return buffer;
130	}
131
132
133	/*
134	* A simple hexdump function for debugging error conditions.
135	*/
136	#define debug_out(s) DEBUG(0,(s))
137
138	#ifdef DEBUG_STRINGS
139
140	static void hexdump( const char * label, const char * s, size_t len )
141	{
142	size_t restlen = len;
143	debug_out("<<<<<<<\n");
144	debug_out(label);
145	debug_out("\n");
146	while (restlen > 0) {
147	char line[100];
148	size_t i, j;
149	char * d = line;
150	#undef sprintf
151	d += sprintf(d, "%04X ", (unsigned)(len-restlen));
152	*d++ = ' ';
153	for( i = 0; i<restlen && i<8; ++i ) {
154	d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
155	}
156	for( j = i; j<8; ++j ) {
157	d += sprintf(d, " ");
158	}
159	*d++ = ' ';
160	for( i = 8; i<restlen && i<16; ++i ) {
161	d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
162	}
163	for( j = i; j<16; ++j ) {
164	d += sprintf(d, " ");
165	}
166	*d++ = ' ';
167	for( i = 0; i<restlen && i<16; ++i ) {
168	if(s[i] < ' ' \|\| s[i] >= 0x7F \|\| !isprint(s[i]))
169	*d++ = '.';
170	else
171	*d++ = s[i];
172	}
173	*d++ = '\n';
174	*d = 0;
175	restlen -= i;
176	s += i;
177	debug_out(line);
178	}
179	debug_out(">>>>>>>\n");
180	}
181
182	#else /* !DEBUG_STRINGS */
183
184	#define hexdump(label,s,len) /* nothing */
185
186	#endif
187
188
189	#if !USE_INTERNAL_API
190
191	/*
192	* An implementation based on documented Mac OS X APIs.
193	*
194	* This does a certain amount of memory management, creating and
195	* manipulating CFString objects. We try to minimize the impact by
196	* keeping those objects around and re-using them. We also use
197	* external backing store for the CFStrings where this is possible and
198	* benficial.
199	*
200	* The Unicode normalizations forms available at this level are
201	* generic, not specifically for the file system. So they may not be
202	* perfect fits.
203	*/
204	static size_t macosxfs_encoding_pull(
205	void cd, / Encoder handle */
206	char *inbuf, size_t inbytesleft, /* Script string */
207	char *outbuf, size_t outbytesleft) /* UTF-16-LE string */
208	{
209	static const int script_code = kCFStringEncodingUTF8;
210	static CFMutableStringRef cfstring = NULL;
211	size_t outsize;
212	CFRange range;
213
214	(void) cd; /* UNUSED */
215
216	if (0 == *inbytesleft) {
217	return 0;
218	}
219
220	if (NULL == cfstring) {
221	/*
222	* A version with an external backing store as in the
223	* push function should have been more efficient, but
224	* testing shows, that it is actually slower (!).
225	* Maybe kCFAllocatorDefault gets shortcut evaluation
226	* internally, while kCFAllocatorNull doesn't.
227	*/
228	cfstring = CFStringCreateMutable(kCFAllocatorDefault,0);
229	}
230
231	/*
232	* Three methods of appending to a CFString, choose the most
233	* efficient.
234	*/
235	if (0 == (inbuf)[inbytesleft-1]) {
236	CFStringAppendCString(cfstring, *inbuf, script_code);
237	} else if (*inbytesleft <= 255) {
238	Str255 buffer;
239	buffer[0] = *inbytesleft;
240	memcpy(buffer+1, *inbuf, buffer[0]);
241	CFStringAppendPascalString(cfstring, buffer, script_code);
242	} else {
243	/*
244	* We would like to use a fixed buffer and a loop
245	* here, but than we can't garantee that the input is
246	* well-formed UTF-8, as we are supposed to do.
247	*/
248	static char *buffer = NULL;
249	static size_t buflen = 0;
250	buffer = resize_buffer(buffer, &buflen, *inbytesleft+1);
251	memcpy(buffer, inbuf, inbytesleft);
252	buffer[*inbytesleft] = 0;
253	CFStringAppendCString(cfstring, *inbuf, script_code);
254	}
255
256	/*
257	* Compose characters, using the non-canonical composition
258	* form.
259	*/
260	CFStringNormalize(cfstring, kCFStringNormalizationFormC);
261
262	outsize = CFStringGetLength(cfstring);
263	range = CFRangeMake(0,outsize);
264
265	if (outsize == 0) {
266	/*
267	* HACK: smbd/mangle_hash2.c:is_legal_name() expects
268	* errors here. That function will always pass 2
269	* characters. smbd/open.c:check_for_pipe() cuts a
270	* patchname to 10 characters blindly. Suppress the
271	* debug output in those cases.
272	*/
273	if(2 != inbytesleft && 10 != inbytesleft) {
274	debug_out("String conversion: "
275	"An unknown error occurred\n");
276	hexdump("UTF8->UTF16LE (old) input",
277	inbuf, inbytesleft);
278	}
279	errno = EILSEQ; /* Not sure, but this is what we have
280	* actually seen. */
281	return -1;
282	}
283	if (outsize2 > outbytesleft) {
284	CFStringDelete(cfstring, range);
285	debug_out("String conversion: "
286	"Output buffer too small\n");
287	hexdump("UTF8->UTF16LE (old) input",
288	inbuf, inbytesleft);
289	errno = E2BIG;
290	return -1;
291	}
292
293	CFStringGetCharacters(cfstring, range, (UniChar)outbuf);
294	CFStringDelete(cfstring, range);
295
296	native_to_le(outbuf, outsize2);
297
298	/*
299	* Add a converted null byte, if the CFString conversions
300	* prevented that until now.
301	*/
302	if (0 == (inbuf)[inbytesleft-1] &&
303	(0 != (outbuf)[outsize2-1] \|\| 0 != (outbuf)[outsize2-2])) {
304
305	if ((outsize2+2) > outbytesleft) {
306	debug_out("String conversion: "
307	"Output buffer too small\n");
308	hexdump("UTF8->UTF16LE (old) input",
309	inbuf, inbytesleft);
310	errno = E2BIG;
311	return -1;
312	}
313
314	(outbuf)[outsize2] = (outbuf)[outsize2+1] = 0;
315	outsize += 2;
316	}
317
318	inbuf += inbytesleft;
319	*inbytesleft = 0;
320	outbuf += outsize2;
321	outbytesleft -= outsize2;
322
323	return 0;
324	}
325
326	static size_t macosxfs_encoding_push(
327	void cd, / Encoder handle */
328	char *inbuf, size_t inbytesleft, /* UTF-16-LE string */
329	char *outbuf, size_t outbytesleft) /* Script string */
330	{
331	static const int script_code = kCFStringEncodingUTF8;
332	static CFMutableStringRef cfstring = NULL;
333	static UniChar *buffer = NULL;
334	static size_t buflen = 0;
335	CFIndex outsize, cfsize, charsconverted;
336
337	(void) cd; /* UNUSED */
338
339	if (0 == *inbytesleft) {
340	return 0;
341	}
342
343	/*
344	* We need a buffer that can hold 4 times the original data,
345	* because that is the theoretical maximum that decomposition
346	* can create currently (in Unicode 4.0).
347	*/
348	buffer = set_ucbuffer_with_le_copy(
349	buffer, &buflen, inbuf, inbytesleft, 3 * *inbytesleft);
350
351	if (NULL == cfstring) {
352	cfstring = CFStringCreateMutableWithExternalCharactersNoCopy(
353	kCFAllocatorDefault,
354	buffer, *inbytesleft/2, buflen/2,
355	kCFAllocatorNull);
356	} else {
357	CFStringSetExternalCharactersNoCopy(
358	cfstring,
359	buffer, *inbytesleft/2, buflen/2);
360	}
361
362	/*
363	* Decompose characters, using the non-canonical decomposition
364	* form.
365	*
366	* NB: This isn't exactly what HFS+ wants (see note on
367	* kCFStringEncodingUseHFSPlusCanonical in
368	* CFStringEncodingConverter.h), but AFAIK it's the best that
369	* the official API can do.
370	*/
371	CFStringNormalize(cfstring, kCFStringNormalizationFormD);
372
373	cfsize = CFStringGetLength(cfstring);
374	charsconverted = CFStringGetBytes(
375	cfstring, CFRangeMake(0,cfsize),
376	script_code, 0, False,
377	outbuf, outbytesleft, &outsize);
378
379	if (0 == charsconverted) {
380	debug_out("String conversion: "
381	"Buffer too small or not convertable\n");
382	hexdump("UTF16LE->UTF8 (old) input",
383	inbuf, inbytesleft);
384	errno = EILSEQ; /* Probably more likely. */
385	return -1;
386	}
387
388	/*
389	* Add a converted null byte, if the CFString conversions
390	* prevented that until now.
391	*/
392	if (0 == (inbuf)[inbytesleft-1] && 0 == (inbuf)[inbytesleft-2] &&
393	(0 != (*outbuf)[outsize-1])) {
394
395	if (((size_t)outsize+1) > *outbytesleft) {
396	debug_out("String conversion: "
397	"Output buffer too small\n");
398	hexdump("UTF16LE->UTF8 (old) input",
399	inbuf, inbytesleft);
400	errno = E2BIG;
401	return -1;
402	}
403
404	(*outbuf)[outsize] = 0;
405	++outsize;
406	}
407
408	inbuf += inbytesleft;
409	*inbytesleft = 0;
410	*outbuf += outsize;
411	*outbytesleft -= outsize;
412
413	return 0;
414	}
415
416	#else /* USE_INTERNAL_API */
417
418	/*
419	* An implementation based on internal code as known from the
420	* OpenDarwin CVS.
421	*
422	* This code doesn't need much memory management because it uses
423	* functions that operate on the raw memory directly.
424	*
425	* The push routine here is faster and more compatible with HFS+ than
426	* the other implementation above. The pull routine is only faster
427	* for some strings, slightly slower for others. The pull routine
428	* looses because it has to iterate over the data twice, once to
429	* decode UTF-8 and than to do the character composition required by
430	* Windows.
431	*/
432	static size_t macosxfs_encoding_pull(
433	void cd, / Encoder handle */
434	char *inbuf, size_t inbytesleft, /* Script string */
435	char *outbuf, size_t outbytesleft) /* UTF-16-LE string */
436	{
437	static const int script_code = kCFStringEncodingUTF8;
438	UInt32 srcCharsUsed = 0;
439	UInt32 dstCharsUsed = 0;
440	UInt32 result;
441	uint32_t dstDecomposedUsed = 0;
442	uint32_t dstPrecomposedUsed = 0;
443
444	(void) cd; /* UNUSED */
445
446	if (0 == *inbytesleft) {
447	return 0;
448	}
449
450	result = CFStringEncodingBytesToUnicode(
451	script_code, kCFStringEncodingComposeCombinings,
452	inbuf, inbytesleft, &srcCharsUsed,
453	(UniChar)outbuf, *outbytesleft, &dstCharsUsed);
454
455	switch(result) {
456	case kCFStringEncodingConversionSuccess:
457	if (*inbytesleft == srcCharsUsed)
458	break;
459	else
460	; /fall through/
461	case kCFStringEncodingInsufficientOutputBufferLength:
462	debug_out("String conversion: "
463	"Output buffer too small\n");
464	hexdump("UTF8->UTF16LE (new) input",
465	inbuf, inbytesleft);
466	errno = E2BIG;
467	return -1;
468	case kCFStringEncodingInvalidInputStream:
469	/*
470	* HACK: smbd/mangle_hash2.c:is_legal_name() expects
471	* errors here. That function will always pass 2
472	* characters. smbd/open.c:check_for_pipe() cuts a
473	* patchname to 10 characters blindly. Suppress the
474	* debug output in those cases.
475	*/
476	if(2 != inbytesleft && 10 != inbytesleft) {
477	debug_out("String conversion: "
478	"Invalid input sequence\n");
479	hexdump("UTF8->UTF16LE (new) input",
480	inbuf, inbytesleft);
481	}
482	errno = EILSEQ;
483	return -1;
484	case kCFStringEncodingConverterUnavailable:
485	debug_out("String conversion: "
486	"Unknown encoding\n");
487	hexdump("UTF8->UTF16LE (new) input",
488	inbuf, inbytesleft);
489	errno = EINVAL;
490	return -1;
491	}
492
493	/*
494	* It doesn't look like CFStringEncodingBytesToUnicode() can
495	* produce precomposed characters (flags=ComposeCombinings
496	* doesn't do it), so we need another pass over the data here.
497	* We can do this in-place, as the string can only get
498	* shorter.
499	*
500	* (Actually in theory there should be an internal
501	* decomposition and reordering before the actual composition
502	* step. But we should be able to rely on that we always get
503	* fully decomposed strings for input, so this can't create
504	* problems in reality.)
505	*/
506	CFUniCharPrecompose(
507	(const UTF16Char )outbuf, dstCharsUsed, &dstDecomposedUsed,
508	(UTF16Char )outbuf, dstCharsUsed, &dstPrecomposedUsed);
509
510	native_to_le(outbuf, dstPrecomposedUsed2);
511
512	*inbuf += srcCharsUsed;
513	*inbytesleft -= srcCharsUsed;
514	outbuf += dstPrecomposedUsed2;
515	outbytesleft -= dstPrecomposedUsed2;
516
517	return 0;
518	}
519
520	static size_t macosxfs_encoding_push(
521	void cd, / Encoder handle */
522	char *inbuf, size_t inbytesleft, /* UTF-16-LE string */
523	char *outbuf, size_t outbytesleft) /* Script string */
524	{
525	static const int script_code = kCFStringEncodingUTF8;
526	static UniChar *buffer = NULL;
527	static size_t buflen = 0;
528	UInt32 srcCharsUsed=0, dstCharsUsed=0, result;
529
530	(void) cd; /* UNUSED */
531
532	if (0 == *inbytesleft) {
533	return 0;
534	}
535
536	buffer = set_ucbuffer_with_le(
537	buffer, &buflen, inbuf, inbytesleft);
538
539	result = CFStringEncodingUnicodeToBytes(
540	script_code, kCFStringEncodingUseHFSPlusCanonical,
541	buffer, *inbytesleft/2, &srcCharsUsed,
542	outbuf, outbytesleft, &dstCharsUsed);
543
544	switch(result) {
545	case kCFStringEncodingConversionSuccess:
546	if (*inbytesleft/2 == srcCharsUsed)
547	break;
548	else
549	; /fall through/
550	case kCFStringEncodingInsufficientOutputBufferLength:
551	debug_out("String conversion: "
552	"Output buffer too small\n");
553	hexdump("UTF16LE->UTF8 (new) input",
554	inbuf, inbytesleft);
555	errno = E2BIG;
556	return -1;
557	case kCFStringEncodingInvalidInputStream:
558	/*
559	* HACK: smbd/open.c:check_for_pipe():is_legal_name()
560	* cuts a pathname to 10 characters blindly. Suppress
561	* the debug output in those cases.
562	*/
563	if(10 != *inbytesleft) {
564	debug_out("String conversion: "
565	"Invalid input sequence\n");
566	hexdump("UTF16LE->UTF8 (new) input",
567	inbuf, inbytesleft);
568	}
569	errno = EILSEQ;
570	return -1;
571	case kCFStringEncodingConverterUnavailable:
572	debug_out("String conversion: "
573	"Unknown encoding\n");
574	hexdump("UTF16LE->UTF8 (new) input",
575	inbuf, inbytesleft);
576	errno = EINVAL;
577	return -1;
578	}
579
580	inbuf += srcCharsUsed2;
581	inbytesleft -= srcCharsUsed2;
582	*outbuf += dstCharsUsed;
583	*outbytesleft -= dstCharsUsed;
584
585	return 0;
586	}
587
588	#endif /* USE_INTERNAL_API */
589
590	/*
591	* For initialization, actually install the encoding as "macosxfs".
592	*/
593	static struct charset_functions macosxfs_encoding_functions = {
594	"MACOSXFS", macosxfs_encoding_pull, macosxfs_encoding_push
595	};
596
597	NTSTATUS charset_macosxfs_init(void)
598	{
599	return smb_register_charset(&macosxfs_encoding_functions);
600	}
601
602	/* eof */

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/samba-3.0/source/modules/charset_macosxfs.c

Download in other formats: