Context Navigation

source: vendor/3.6.9/source3/modules/charset_macosxfs.c

Visit:

Last change on this file was 740, checked in by Silvan Scherrer, 13 years ago
Samba Server: update vendor to 3.6.0
File size: 16.6 KB

Line
1	/*
2	Unix SMB/CIFS implementation.
3	Samba charset module for Mac OS X/Darwin
4	Copyright (C) Benjamin Riefenstahl 2003
5
6	This program is free software; you can redistribute it and/or modify
7	it under the terms of the GNU General Public License as published by
8	the Free Software Foundation; either version 3 of the License, or
9	(at your option) any later version.
10
11	This program is distributed in the hope that it will be useful,
12	but WITHOUT ANY WARRANTY; without even the implied warranty of
13	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14	GNU General Public License for more details.
15
16	You should have received a copy of the GNU General Public License
17	along with this program. If not, see <http://www.gnu.org/licenses/>.
18	*/
19
20	/*
21	* modules/charset_macosxfs.c
22	*
23	* A Samba charset module to use on Mac OS X/Darwin as the filesystem
24	* and display encoding.
25	*
26	* Actually two implementations are provided here. The default
27	* implementation is based on the official CFString API. The other is
28	* based on internal CFString APIs as defined in the OpenDarwin
29	* source.
30	*/
31
32	#include "includes.h"
33
34	/*
35	* Include OS frameworks. These are only needed in this module.
36	*/
37	#include <CoreFoundation/CFString.h>
38
39	/*
40	* See if autoconf has found us the internal headers in some form.
41	*/
42	#if HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H
43	# include <CoreFoundation/CFStringEncodingConverter.h>
44	# include <CoreFoundation/CFUnicodePrecomposition.h>
45	# define USE_INTERNAL_API 1
46	#elif HAVE_CFSTRINGENCODINGCONVERTER_H
47	# include <CFStringEncodingConverter.h>
48	# include <CFUnicodePrecomposition.h>
49	# define USE_INTERNAL_API 1
50	#endif
51
52	/*
53	* Compile time configuration: Do we want debug output?
54	*/
55	/* #define DEBUG_STRINGS 1 */
56
57	/*
58	* A simple, but efficient memory provider for our buffers.
59	*/
60	static inline void resize_buffer (void buffer, size_t *size, size_t newsize)
61	{
62	if (newsize > *size) {
63	*size = newsize + 128;
64	buffer = SMB_REALLOC(buffer, *size);
65	}
66	return buffer;
67	}
68
69	/*
70	* While there is a version of OpenDarwin for intel, the usual case is
71	* big-endian PPC. So we need byte swapping to handle the
72	* little-endian byte order of the network protocol. We also need an
73	* additional dynamic buffer to do this work for incoming data blocks,
74	* because we have to consider the original data as constant.
75	*
76	* We abstract the differences away by providing a simple facade with
77	* these functions/macros:
78	*
79	* le_to_native(dst,src,len)
80	* native_to_le(cp,len)
81	* set_ucbuffer_with_le(buffer,bufsize,data,size)
82	* set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
83	*/
84	#ifdef WORDS_BIGENDIAN
85
86	static inline void swap_bytes (char * dst, const char * src, size_t len)
87	{
88	const char *srcend = src + len;
89	while (src < srcend) {
90	dst[0] = src[1];
91	dst[1] = src[0];
92	dst += 2;
93	src += 2;
94	}
95	}
96	static inline void swap_bytes_inplace (char * cp, size_t len)
97	{
98	char temp;
99	char *end = cp + len;
100	while (cp < end) {
101	temp = cp[1];
102	cp[1] = cp[0];
103	cp[0] = temp;
104	cp += 2;
105	}
106	}
107
108	#define le_to_native(dst,src,len) swap_bytes(dst,src,len)
109	#define native_to_le(cp,len) swap_bytes_inplace(cp,len)
110	#define set_ucbuffer_with_le(buffer,bufsize,data,size) \
111	set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)
112
113	#else /* ! WORDS_BIGENDIAN */
114
115	#define le_to_native(dst,src,len) memcpy(dst,src,len)
116	#define native_to_le(cp,len) /* nothing */
117	#define set_ucbuffer_with_le(buffer,bufsize,data,size) \
118	(((void)(bufsize)),(UniChar*)(data))
119
120	#endif
121
122	static inline UniChar *set_ucbuffer_with_le_copy (
123	UniChar buffer, size_t bufsize,
124	const void *data, size_t size, size_t reserve)
125	{
126	buffer = resize_buffer(buffer, bufsize, size+reserve);
127	le_to_native((char*)buffer,data,size);
128	return buffer;
129	}
130
131
132	/*
133	* A simple hexdump function for debugging error conditions.
134	*/
135	#define debug_out(s) DEBUG(0,(s))
136
137	#ifdef DEBUG_STRINGS
138
139	static void hexdump( const char * label, const char * s, size_t len )
140	{
141	size_t restlen = len;
142	debug_out("<<<<<<<\n");
143	debug_out(label);
144	debug_out("\n");
145	while (restlen > 0) {
146	char line[100];
147	size_t i, j;
148	char * d = line;
149	#undef sprintf
150	d += sprintf(d, "%04X ", (unsigned)(len-restlen));
151	*d++ = ' ';
152	for( i = 0; i<restlen && i<8; ++i ) {
153	d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
154	}
155	for( j = i; j<8; ++j ) {
156	d += sprintf(d, " ");
157	}
158	*d++ = ' ';
159	for( i = 8; i<restlen && i<16; ++i ) {
160	d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
161	}
162	for( j = i; j<16; ++j ) {
163	d += sprintf(d, " ");
164	}
165	*d++ = ' ';
166	for( i = 0; i<restlen && i<16; ++i ) {
167	if(s[i] < ' ' \|\| s[i] >= 0x7F \|\| !isprint(s[i]))
168	*d++ = '.';
169	else
170	*d++ = s[i];
171	}
172	*d++ = '\n';
173	*d = 0;
174	restlen -= i;
175	s += i;
176	debug_out(line);
177	}
178	debug_out(">>>>>>>\n");
179	}
180
181	#else /* !DEBUG_STRINGS */
182
183	#define hexdump(label,s,len) /* nothing */
184
185	#endif
186
187
188	#if !USE_INTERNAL_API
189
190	/*
191	* An implementation based on documented Mac OS X APIs.
192	*
193	* This does a certain amount of memory management, creating and
194	* manipulating CFString objects. We try to minimize the impact by
195	* keeping those objects around and re-using them. We also use
196	* external backing store for the CFStrings where this is possible and
197	* benficial.
198	*
199	* The Unicode normalizations forms available at this level are
200	* generic, not specifically for the file system. So they may not be
201	* perfect fits.
202	*/
203	static size_t macosxfs_encoding_pull(
204	void cd, / Encoder handle */
205	char *inbuf, size_t inbytesleft, /* Script string */
206	char *outbuf, size_t outbytesleft) /* UTF-16-LE string */
207	{
208	static const int script_code = kCFStringEncodingUTF8;
209	static CFMutableStringRef cfstring = NULL;
210	size_t outsize;
211	CFRange range;
212
213	(void) cd; /* UNUSED */
214
215	if (0 == *inbytesleft) {
216	return 0;
217	}
218
219	if (NULL == cfstring) {
220	/*
221	* A version with an external backing store as in the
222	* push function should have been more efficient, but
223	* testing shows, that it is actually slower (!).
224	* Maybe kCFAllocatorDefault gets shortcut evaluation
225	* internally, while kCFAllocatorNull doesn't.
226	*/
227	cfstring = CFStringCreateMutable(kCFAllocatorDefault,0);
228	}
229
230	/*
231	* Three methods of appending to a CFString, choose the most
232	* efficient.
233	*/
234	if (0 == (inbuf)[inbytesleft-1]) {
235	CFStringAppendCString(cfstring, *inbuf, script_code);
236	} else if (*inbytesleft <= 255) {
237	Str255 buffer;
238	buffer[0] = *inbytesleft;
239	memcpy(buffer+1, *inbuf, buffer[0]);
240	CFStringAppendPascalString(cfstring, buffer, script_code);
241	} else {
242	/*
243	* We would like to use a fixed buffer and a loop
244	* here, but than we can't garantee that the input is
245	* well-formed UTF-8, as we are supposed to do.
246	*/
247	static char *buffer = NULL;
248	static size_t buflen = 0;
249	buffer = resize_buffer(buffer, &buflen, *inbytesleft+1);
250	memcpy(buffer, inbuf, inbytesleft);
251	buffer[*inbytesleft] = 0;
252	CFStringAppendCString(cfstring, *inbuf, script_code);
253	}
254
255	/*
256	* Compose characters, using the non-canonical composition
257	* form.
258	*/
259	CFStringNormalize(cfstring, kCFStringNormalizationFormC);
260
261	outsize = CFStringGetLength(cfstring);
262	range = CFRangeMake(0,outsize);
263
264	if (outsize == 0) {
265	/*
266	* HACK: smbd/mangle_hash2.c:is_legal_name() expects
267	* errors here. That function will always pass 2
268	* characters. smbd/open.c:check_for_pipe() cuts a
269	* patchname to 10 characters blindly. Suppress the
270	* debug output in those cases.
271	*/
272	if(2 != inbytesleft && 10 != inbytesleft) {
273	debug_out("String conversion: "
274	"An unknown error occurred\n");
275	hexdump("UTF8->UTF16LE (old) input",
276	inbuf, inbytesleft);
277	}
278	errno = EILSEQ; /* Not sure, but this is what we have
279	* actually seen. */
280	return -1;
281	}
282	if (outsize2 > outbytesleft) {
283	CFStringDelete(cfstring, range);
284	debug_out("String conversion: "
285	"Output buffer too small\n");
286	hexdump("UTF8->UTF16LE (old) input",
287	inbuf, inbytesleft);
288	errno = E2BIG;
289	return -1;
290	}
291
292	CFStringGetCharacters(cfstring, range, (UniChar)outbuf);
293	CFStringDelete(cfstring, range);
294
295	native_to_le(outbuf, outsize2);
296
297	/*
298	* Add a converted null byte, if the CFString conversions
299	* prevented that until now.
300	*/
301	if (0 == (inbuf)[inbytesleft-1] &&
302	(0 != (outbuf)[outsize2-1] \|\| 0 != (outbuf)[outsize2-2])) {
303
304	if ((outsize2+2) > outbytesleft) {
305	debug_out("String conversion: "
306	"Output buffer too small\n");
307	hexdump("UTF8->UTF16LE (old) input",
308	inbuf, inbytesleft);
309	errno = E2BIG;
310	return -1;
311	}
312
313	(outbuf)[outsize2] = (outbuf)[outsize2+1] = 0;
314	outsize += 2;
315	}
316
317	inbuf += inbytesleft;
318	*inbytesleft = 0;
319	outbuf += outsize2;
320	outbytesleft -= outsize2;
321
322	return 0;
323	}
324
325	static size_t macosxfs_encoding_push(
326	void cd, / Encoder handle */
327	char *inbuf, size_t inbytesleft, /* UTF-16-LE string */
328	char *outbuf, size_t outbytesleft) /* Script string */
329	{
330	static const int script_code = kCFStringEncodingUTF8;
331	static CFMutableStringRef cfstring = NULL;
332	static UniChar *buffer = NULL;
333	static size_t buflen = 0;
334	CFIndex outsize, cfsize, charsconverted;
335
336	(void) cd; /* UNUSED */
337
338	if (0 == *inbytesleft) {
339	return 0;
340	}
341
342	/*
343	* We need a buffer that can hold 4 times the original data,
344	* because that is the theoretical maximum that decomposition
345	* can create currently (in Unicode 4.0).
346	*/
347	buffer = set_ucbuffer_with_le_copy(
348	buffer, &buflen, inbuf, inbytesleft, 3 * *inbytesleft);
349
350	if (NULL == cfstring) {
351	cfstring = CFStringCreateMutableWithExternalCharactersNoCopy(
352	kCFAllocatorDefault,
353	buffer, *inbytesleft/2, buflen/2,
354	kCFAllocatorNull);
355	} else {
356	CFStringSetExternalCharactersNoCopy(
357	cfstring,
358	buffer, *inbytesleft/2, buflen/2);
359	}
360
361	/*
362	* Decompose characters, using the non-canonical decomposition
363	* form.
364	*
365	* NB: This isn't exactly what HFS+ wants (see note on
366	* kCFStringEncodingUseHFSPlusCanonical in
367	* CFStringEncodingConverter.h), but AFAIK it's the best that
368	* the official API can do.
369	*/
370	CFStringNormalize(cfstring, kCFStringNormalizationFormD);
371
372	cfsize = CFStringGetLength(cfstring);
373	charsconverted = CFStringGetBytes(
374	cfstring, CFRangeMake(0,cfsize),
375	script_code, 0, False,
376	outbuf, outbytesleft, &outsize);
377
378	if (0 == charsconverted) {
379	debug_out("String conversion: "
380	"Buffer too small or not convertable\n");
381	hexdump("UTF16LE->UTF8 (old) input",
382	inbuf, inbytesleft);
383	errno = EILSEQ; /* Probably more likely. */
384	return -1;
385	}
386
387	/*
388	* Add a converted null byte, if the CFString conversions
389	* prevented that until now.
390	*/
391	if (0 == (inbuf)[inbytesleft-1] && 0 == (inbuf)[inbytesleft-2] &&
392	(0 != (*outbuf)[outsize-1])) {
393
394	if (((size_t)outsize+1) > *outbytesleft) {
395	debug_out("String conversion: "
396	"Output buffer too small\n");
397	hexdump("UTF16LE->UTF8 (old) input",
398	inbuf, inbytesleft);
399	errno = E2BIG;
400	return -1;
401	}
402
403	(*outbuf)[outsize] = 0;
404	++outsize;
405	}
406
407	inbuf += inbytesleft;
408	*inbytesleft = 0;
409	*outbuf += outsize;
410	*outbytesleft -= outsize;
411
412	return 0;
413	}
414
415	#else /* USE_INTERNAL_API */
416
417	/*
418	* An implementation based on internal code as known from the
419	* OpenDarwin CVS.
420	*
421	* This code doesn't need much memory management because it uses
422	* functions that operate on the raw memory directly.
423	*
424	* The push routine here is faster and more compatible with HFS+ than
425	* the other implementation above. The pull routine is only faster
426	* for some strings, slightly slower for others. The pull routine
427	* looses because it has to iterate over the data twice, once to
428	* decode UTF-8 and than to do the character composition required by
429	* Windows.
430	*/
431	static size_t macosxfs_encoding_pull(
432	void cd, / Encoder handle */
433	char *inbuf, size_t inbytesleft, /* Script string */
434	char *outbuf, size_t outbytesleft) /* UTF-16-LE string */
435	{
436	static const int script_code = kCFStringEncodingUTF8;
437	UInt32 srcCharsUsed = 0;
438	UInt32 dstCharsUsed = 0;
439	UInt32 result;
440	uint32_t dstDecomposedUsed = 0;
441	uint32_t dstPrecomposedUsed = 0;
442
443	(void) cd; /* UNUSED */
444
445	if (0 == *inbytesleft) {
446	return 0;
447	}
448
449	result = CFStringEncodingBytesToUnicode(
450	script_code, kCFStringEncodingComposeCombinings,
451	inbuf, inbytesleft, &srcCharsUsed,
452	(UniChar)outbuf, *outbytesleft, &dstCharsUsed);
453
454	switch(result) {
455	case kCFStringEncodingConversionSuccess:
456	if (*inbytesleft == srcCharsUsed)
457	break;
458	else
459	; /fall through/
460	case kCFStringEncodingInsufficientOutputBufferLength:
461	debug_out("String conversion: "
462	"Output buffer too small\n");
463	hexdump("UTF8->UTF16LE (new) input",
464	inbuf, inbytesleft);
465	errno = E2BIG;
466	return -1;
467	case kCFStringEncodingInvalidInputStream:
468	/*
469	* HACK: smbd/mangle_hash2.c:is_legal_name() expects
470	* errors here. That function will always pass 2
471	* characters. smbd/open.c:check_for_pipe() cuts a
472	* patchname to 10 characters blindly. Suppress the
473	* debug output in those cases.
474	*/
475	if(2 != inbytesleft && 10 != inbytesleft) {
476	debug_out("String conversion: "
477	"Invalid input sequence\n");
478	hexdump("UTF8->UTF16LE (new) input",
479	inbuf, inbytesleft);
480	}
481	errno = EILSEQ;
482	return -1;
483	case kCFStringEncodingConverterUnavailable:
484	debug_out("String conversion: "
485	"Unknown encoding\n");
486	hexdump("UTF8->UTF16LE (new) input",
487	inbuf, inbytesleft);
488	errno = EINVAL;
489	return -1;
490	}
491
492	/*
493	* It doesn't look like CFStringEncodingBytesToUnicode() can
494	* produce precomposed characters (flags=ComposeCombinings
495	* doesn't do it), so we need another pass over the data here.
496	* We can do this in-place, as the string can only get
497	* shorter.
498	*
499	* (Actually in theory there should be an internal
500	* decomposition and reordering before the actual composition
501	* step. But we should be able to rely on that we always get
502	* fully decomposed strings for input, so this can't create
503	* problems in reality.)
504	*/
505	CFUniCharPrecompose(
506	(const UTF16Char )outbuf, dstCharsUsed, &dstDecomposedUsed,
507	(UTF16Char )outbuf, dstCharsUsed, &dstPrecomposedUsed);
508
509	native_to_le(outbuf, dstPrecomposedUsed2);
510
511	*inbuf += srcCharsUsed;
512	*inbytesleft -= srcCharsUsed;
513	outbuf += dstPrecomposedUsed2;
514	outbytesleft -= dstPrecomposedUsed2;
515
516	return 0;
517	}
518
519	static size_t macosxfs_encoding_push(
520	void cd, / Encoder handle */
521	char *inbuf, size_t inbytesleft, /* UTF-16-LE string */
522	char *outbuf, size_t outbytesleft) /* Script string */
523	{
524	static const int script_code = kCFStringEncodingUTF8;
525	static UniChar *buffer = NULL;
526	static size_t buflen = 0;
527	UInt32 srcCharsUsed=0, dstCharsUsed=0, result;
528
529	(void) cd; /* UNUSED */
530
531	if (0 == *inbytesleft) {
532	return 0;
533	}
534
535	buffer = set_ucbuffer_with_le(
536	buffer, &buflen, inbuf, inbytesleft);
537
538	result = CFStringEncodingUnicodeToBytes(
539	script_code, kCFStringEncodingUseHFSPlusCanonical,
540	buffer, *inbytesleft/2, &srcCharsUsed,
541	outbuf, outbytesleft, &dstCharsUsed);
542
543	switch(result) {
544	case kCFStringEncodingConversionSuccess:
545	if (*inbytesleft/2 == srcCharsUsed)
546	break;
547	else
548	; /fall through/
549	case kCFStringEncodingInsufficientOutputBufferLength:
550	debug_out("String conversion: "
551	"Output buffer too small\n");
552	hexdump("UTF16LE->UTF8 (new) input",
553	inbuf, inbytesleft);
554	errno = E2BIG;
555	return -1;
556	case kCFStringEncodingInvalidInputStream:
557	/*
558	* HACK: smbd/open.c:check_for_pipe():is_legal_name()
559	* cuts a pathname to 10 characters blindly. Suppress
560	* the debug output in those cases.
561	*/
562	if(10 != *inbytesleft) {
563	debug_out("String conversion: "
564	"Invalid input sequence\n");
565	hexdump("UTF16LE->UTF8 (new) input",
566	inbuf, inbytesleft);
567	}
568	errno = EILSEQ;
569	return -1;
570	case kCFStringEncodingConverterUnavailable:
571	debug_out("String conversion: "
572	"Unknown encoding\n");
573	hexdump("UTF16LE->UTF8 (new) input",
574	inbuf, inbytesleft);
575	errno = EINVAL;
576	return -1;
577	}
578
579	inbuf += srcCharsUsed2;
580	inbytesleft -= srcCharsUsed2;
581	*outbuf += dstCharsUsed;
582	*outbytesleft -= dstCharsUsed;
583
584	return 0;
585	}
586
587	#endif /* USE_INTERNAL_API */
588
589	/*
590	* For initialization, actually install the encoding as "macosxfs".
591	*/
592	static struct charset_functions macosxfs_encoding_functions = {
593	"MACOSXFS", macosxfs_encoding_pull, macosxfs_encoding_push
594	};
595
596	NTSTATUS charset_macosxfs_init(void)
597	{
598	if (!smb_register_charset(&macosxfs_encoding_functions)) {
599	return NT_STATUS_INTERNAL_ERROR;
600	}
601	return NT_STATUS_OK;
602	}
603
604	/* eof */

Note: See TracBrowser for help on using the repository browser.

Download in other formats: