| 1 | /* 
 | 
|---|
| 2 |    Unix SMB/CIFS implementation.
 | 
|---|
| 3 |    Samba charset module for Mac OS X/Darwin
 | 
|---|
| 4 |    Copyright (C) Benjamin Riefenstahl 2003
 | 
|---|
| 5 |    
 | 
|---|
| 6 |    This program is free software; you can redistribute it and/or modify
 | 
|---|
| 7 |    it under the terms of the GNU General Public License as published by
 | 
|---|
| 8 |    the Free Software Foundation; either version 3 of the License, or
 | 
|---|
| 9 |    (at your option) any later version.
 | 
|---|
| 10 |    
 | 
|---|
| 11 |    This program is distributed in the hope that it will be useful,
 | 
|---|
| 12 |    but WITHOUT ANY WARRANTY; without even the implied warranty of
 | 
|---|
| 13 |    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 | 
|---|
| 14 |    GNU General Public License for more details.
 | 
|---|
| 15 |    
 | 
|---|
| 16 |    You should have received a copy of the GNU General Public License
 | 
|---|
| 17 |    along with this program.  If not, see <http://www.gnu.org/licenses/>.
 | 
|---|
| 18 | */
 | 
|---|
| 19 | 
 | 
|---|
| 20 | /*
 | 
|---|
| 21 |  * modules/charset_macosxfs.c
 | 
|---|
| 22 |  *
 | 
|---|
| 23 |  * A Samba charset module to use on Mac OS X/Darwin as the filesystem
 | 
|---|
| 24 |  * and display encoding.
 | 
|---|
| 25 |  *
 | 
|---|
| 26 |  * Actually two implementations are provided here.  The default
 | 
|---|
| 27 |  * implementation is based on the official CFString API.  The other is
 | 
|---|
| 28 |  * based on internal CFString APIs as defined in the OpenDarwin
 | 
|---|
| 29 |  * source.
 | 
|---|
| 30 |  */
 | 
|---|
| 31 | 
 | 
|---|
| 32 | #include "includes.h"
 | 
|---|
| 33 | 
 | 
|---|
| 34 | /*
 | 
|---|
| 35 |  * Include OS frameworks.  These are only needed in this module.
 | 
|---|
| 36 |  */
 | 
|---|
| 37 | #include <CoreFoundation/CFString.h>
 | 
|---|
| 38 | 
 | 
|---|
| 39 | /*
 | 
|---|
| 40 |  * See if autoconf has found us the internal headers in some form.
 | 
|---|
| 41 |  */
 | 
|---|
| 42 | #if HAVE_COREFOUNDATION_CFSTRINGENCODINGCONVERTER_H
 | 
|---|
| 43 | #       include <CoreFoundation/CFStringEncodingConverter.h>
 | 
|---|
| 44 | #       include <CoreFoundation/CFUnicodePrecomposition.h>
 | 
|---|
| 45 | #       define USE_INTERNAL_API 1
 | 
|---|
| 46 | #elif HAVE_CFSTRINGENCODINGCONVERTER_H
 | 
|---|
| 47 | #       include <CFStringEncodingConverter.h>
 | 
|---|
| 48 | #       include <CFUnicodePrecomposition.h>
 | 
|---|
| 49 | #       define USE_INTERNAL_API 1
 | 
|---|
| 50 | #endif
 | 
|---|
| 51 | 
 | 
|---|
| 52 | /*
 | 
|---|
| 53 |  * Compile time configuration: Do we want debug output?
 | 
|---|
| 54 |  */
 | 
|---|
| 55 | /* #define DEBUG_STRINGS 1 */
 | 
|---|
| 56 | 
 | 
|---|
| 57 | /*
 | 
|---|
| 58 |  * A simple, but efficient memory provider for our buffers.
 | 
|---|
| 59 |  */
 | 
|---|
| 60 | static inline void *resize_buffer (void *buffer, size_t *size, size_t newsize)
 | 
|---|
| 61 | {
 | 
|---|
| 62 |         if (newsize > *size) {
 | 
|---|
| 63 |                 *size = newsize + 128;
 | 
|---|
| 64 |                 buffer = SMB_REALLOC(buffer, *size);
 | 
|---|
| 65 |         }
 | 
|---|
| 66 |         return buffer;
 | 
|---|
| 67 | }
 | 
|---|
| 68 | 
 | 
|---|
| 69 | /*
 | 
|---|
| 70 |  * While there is a version of OpenDarwin for intel, the usual case is
 | 
|---|
| 71 |  * big-endian PPC.  So we need byte swapping to handle the
 | 
|---|
| 72 |  * little-endian byte order of the network protocol.  We also need an
 | 
|---|
| 73 |  * additional dynamic buffer to do this work for incoming data blocks,
 | 
|---|
| 74 |  * because we have to consider the original data as constant.
 | 
|---|
| 75 |  *
 | 
|---|
| 76 |  * We abstract the differences away by providing a simple facade with
 | 
|---|
| 77 |  * these functions/macros:
 | 
|---|
| 78 |  *
 | 
|---|
| 79 |  *      le_to_native(dst,src,len)
 | 
|---|
| 80 |  *      native_to_le(cp,len)
 | 
|---|
| 81 |  *      set_ucbuffer_with_le(buffer,bufsize,data,size)
 | 
|---|
| 82 |  *      set_ucbuffer_with_le_copy(buffer,bufsize,data,size,reserve)
 | 
|---|
| 83 |  */
 | 
|---|
| 84 | #ifdef WORDS_BIGENDIAN
 | 
|---|
| 85 | 
 | 
|---|
| 86 | static inline void swap_bytes (char * dst, const char * src, size_t len)
 | 
|---|
| 87 | {
 | 
|---|
| 88 |         const char *srcend = src + len;
 | 
|---|
| 89 |         while (src < srcend) {
 | 
|---|
| 90 |                 dst[0] = src[1];
 | 
|---|
| 91 |                 dst[1] = src[0];
 | 
|---|
| 92 |                 dst += 2;
 | 
|---|
| 93 |                 src += 2;
 | 
|---|
| 94 |         }
 | 
|---|
| 95 | }
 | 
|---|
| 96 | static inline void swap_bytes_inplace (char * cp, size_t len)
 | 
|---|
| 97 | {
 | 
|---|
| 98 |         char temp;
 | 
|---|
| 99 |         char *end = cp + len;
 | 
|---|
| 100 |         while (cp  < end) {
 | 
|---|
| 101 |                 temp = cp[1];
 | 
|---|
| 102 |                 cp[1] = cp[0];
 | 
|---|
| 103 |                 cp[0] = temp;
 | 
|---|
| 104 |                 cp += 2;
 | 
|---|
| 105 |         }
 | 
|---|
| 106 | }
 | 
|---|
| 107 | 
 | 
|---|
| 108 | #define le_to_native(dst,src,len)       swap_bytes(dst,src,len)
 | 
|---|
| 109 | #define native_to_le(cp,len)            swap_bytes_inplace(cp,len)
 | 
|---|
| 110 | #define set_ucbuffer_with_le(buffer,bufsize,data,size) \
 | 
|---|
| 111 |         set_ucbuffer_with_le_copy(buffer,bufsize,data,size,0)
 | 
|---|
| 112 | 
 | 
|---|
| 113 | #else   /* ! WORDS_BIGENDIAN */
 | 
|---|
| 114 | 
 | 
|---|
| 115 | #define le_to_native(dst,src,len)       memcpy(dst,src,len)
 | 
|---|
| 116 | #define native_to_le(cp,len)            /* nothing */
 | 
|---|
| 117 | #define set_ucbuffer_with_le(buffer,bufsize,data,size) \
 | 
|---|
| 118 |         (((void)(bufsize)),(UniChar*)(data))
 | 
|---|
| 119 | 
 | 
|---|
| 120 | #endif
 | 
|---|
| 121 | 
 | 
|---|
| 122 | static inline UniChar *set_ucbuffer_with_le_copy (
 | 
|---|
| 123 |         UniChar *buffer, size_t *bufsize,
 | 
|---|
| 124 |         const void *data, size_t size, size_t reserve)
 | 
|---|
| 125 | {
 | 
|---|
| 126 |         buffer = resize_buffer(buffer, bufsize, size+reserve);
 | 
|---|
| 127 |         le_to_native((char*)buffer,data,size);
 | 
|---|
| 128 |         return buffer;
 | 
|---|
| 129 | }
 | 
|---|
| 130 | 
 | 
|---|
| 131 | 
 | 
|---|
| 132 | /*
 | 
|---|
| 133 |  * A simple hexdump function for debugging error conditions.
 | 
|---|
| 134 |  */
 | 
|---|
| 135 | #define debug_out(s)    DEBUG(0,(s))
 | 
|---|
| 136 | 
 | 
|---|
| 137 | #ifdef DEBUG_STRINGS
 | 
|---|
| 138 | 
 | 
|---|
| 139 | static void hexdump( const char * label, const char * s, size_t len )
 | 
|---|
| 140 | {
 | 
|---|
| 141 |         size_t restlen = len;
 | 
|---|
| 142 |         debug_out("<<<<<<<\n");
 | 
|---|
| 143 |         debug_out(label);
 | 
|---|
| 144 |         debug_out("\n");
 | 
|---|
| 145 |         while (restlen > 0) {
 | 
|---|
| 146 |                 char line[100];
 | 
|---|
| 147 |                 size_t i, j;
 | 
|---|
| 148 |                 char * d = line;
 | 
|---|
| 149 | #undef sprintf
 | 
|---|
| 150 |                 d += sprintf(d, "%04X ", (unsigned)(len-restlen));
 | 
|---|
| 151 |                 *d++ = ' ';
 | 
|---|
| 152 |                 for( i = 0; i<restlen && i<8; ++i ) {
 | 
|---|
| 153 |                         d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
 | 
|---|
| 154 |                 }
 | 
|---|
| 155 |                 for( j = i; j<8; ++j ) {
 | 
|---|
| 156 |                         d += sprintf(d, "   ");
 | 
|---|
| 157 |                 }
 | 
|---|
| 158 |                 *d++ = ' ';
 | 
|---|
| 159 |                 for( i = 8; i<restlen && i<16; ++i ) {
 | 
|---|
| 160 |                         d += sprintf(d, "%02X ", ((unsigned)s[i]) & 0xFF);
 | 
|---|
| 161 |                 }
 | 
|---|
| 162 |                 for( j = i; j<16; ++j ) {
 | 
|---|
| 163 |                         d += sprintf(d, "   ");
 | 
|---|
| 164 |                 }
 | 
|---|
| 165 |                 *d++ = ' ';
 | 
|---|
| 166 |                 for( i = 0; i<restlen && i<16; ++i ) {
 | 
|---|
| 167 |                         if(s[i] < ' ' || s[i] >= 0x7F || !isprint(s[i]))
 | 
|---|
| 168 |                                 *d++ = '.';
 | 
|---|
| 169 |                         else
 | 
|---|
| 170 |                                 *d++ = s[i];
 | 
|---|
| 171 |                 }
 | 
|---|
| 172 |                 *d++ = '\n';
 | 
|---|
| 173 |                 *d = 0;
 | 
|---|
| 174 |                 restlen -= i;
 | 
|---|
| 175 |                 s += i;
 | 
|---|
| 176 |                 debug_out(line);
 | 
|---|
| 177 |         }
 | 
|---|
| 178 |         debug_out(">>>>>>>\n");
 | 
|---|
| 179 | }
 | 
|---|
| 180 | 
 | 
|---|
| 181 | #else   /* !DEBUG_STRINGS */
 | 
|---|
| 182 | 
 | 
|---|
| 183 | #define hexdump(label,s,len) /* nothing */
 | 
|---|
| 184 | 
 | 
|---|
| 185 | #endif
 | 
|---|
| 186 | 
 | 
|---|
| 187 | 
 | 
|---|
| 188 | #if !USE_INTERNAL_API
 | 
|---|
| 189 | 
 | 
|---|
| 190 | /*
 | 
|---|
| 191 |  * An implementation based on documented Mac OS X APIs.
 | 
|---|
| 192 |  *
 | 
|---|
| 193 |  * This does a certain amount of memory management, creating and
 | 
|---|
| 194 |  * manipulating CFString objects.  We try to minimize the impact by
 | 
|---|
| 195 |  * keeping those objects around and re-using them.  We also use
 | 
|---|
| 196 |  * external backing store for the CFStrings where this is possible and
 | 
|---|
| 197 |  * benficial.
 | 
|---|
| 198 |  *
 | 
|---|
| 199 |  * The Unicode normalizations forms available at this level are
 | 
|---|
| 200 |  * generic, not specifically for the file system.  So they may not be
 | 
|---|
| 201 |  * perfect fits.
 | 
|---|
| 202 |  */
 | 
|---|
| 203 | static size_t macosxfs_encoding_pull(
 | 
|---|
| 204 |         void *cd,                               /* Encoder handle */
 | 
|---|
| 205 |         char **inbuf, size_t *inbytesleft,      /* Script string */
 | 
|---|
| 206 |         char **outbuf, size_t *outbytesleft)    /* UTF-16-LE string */
 | 
|---|
| 207 | {
 | 
|---|
| 208 |         static const int script_code = kCFStringEncodingUTF8;
 | 
|---|
| 209 |         static CFMutableStringRef cfstring = NULL;
 | 
|---|
| 210 |         size_t outsize;
 | 
|---|
| 211 |         CFRange range;
 | 
|---|
| 212 | 
 | 
|---|
| 213 |         (void) cd; /* UNUSED */
 | 
|---|
| 214 | 
 | 
|---|
| 215 |         if (0 == *inbytesleft) {
 | 
|---|
| 216 |                 return 0;
 | 
|---|
| 217 |         }
 | 
|---|
| 218 | 
 | 
|---|
| 219 |         if (NULL == cfstring) {
 | 
|---|
| 220 |                 /*
 | 
|---|
| 221 |                  * A version with an external backing store as in the
 | 
|---|
| 222 |                  * push function should have been more efficient, but
 | 
|---|
| 223 |                  * testing shows, that it is actually slower (!).
 | 
|---|
| 224 |                  * Maybe kCFAllocatorDefault gets shortcut evaluation
 | 
|---|
| 225 |                  * internally, while kCFAllocatorNull doesn't.
 | 
|---|
| 226 |                  */
 | 
|---|
| 227 |                 cfstring = CFStringCreateMutable(kCFAllocatorDefault,0);
 | 
|---|
| 228 |         }
 | 
|---|
| 229 | 
 | 
|---|
| 230 |         /*
 | 
|---|
| 231 |          * Three methods of appending to a CFString, choose the most
 | 
|---|
| 232 |          * efficient.
 | 
|---|
| 233 |          */
 | 
|---|
| 234 |         if (0 == (*inbuf)[*inbytesleft-1]) {
 | 
|---|
| 235 |                 CFStringAppendCString(cfstring, *inbuf, script_code);
 | 
|---|
| 236 |         } else if (*inbytesleft <= 255) {
 | 
|---|
| 237 |                 Str255 buffer;
 | 
|---|
| 238 |                 buffer[0] = *inbytesleft;
 | 
|---|
| 239 |                 memcpy(buffer+1, *inbuf, buffer[0]);
 | 
|---|
| 240 |                 CFStringAppendPascalString(cfstring, buffer, script_code);
 | 
|---|
| 241 |         } else {
 | 
|---|
| 242 |                 /*
 | 
|---|
| 243 |                  * We would like to use a fixed buffer and a loop
 | 
|---|
| 244 |                  * here, but than we can't garantee that the input is
 | 
|---|
| 245 |                  * well-formed UTF-8, as we are supposed to do.
 | 
|---|
| 246 |                  */
 | 
|---|
| 247 |                 static char *buffer = NULL;
 | 
|---|
| 248 |                 static size_t buflen = 0;
 | 
|---|
| 249 |                 buffer = resize_buffer(buffer, &buflen, *inbytesleft+1);
 | 
|---|
| 250 |                 memcpy(buffer, *inbuf, *inbytesleft);
 | 
|---|
| 251 |                 buffer[*inbytesleft] = 0;
 | 
|---|
| 252 |                 CFStringAppendCString(cfstring, *inbuf, script_code);
 | 
|---|
| 253 |         }
 | 
|---|
| 254 | 
 | 
|---|
| 255 |         /*
 | 
|---|
| 256 |          * Compose characters, using the non-canonical composition
 | 
|---|
| 257 |          * form.
 | 
|---|
| 258 |          */
 | 
|---|
| 259 |         CFStringNormalize(cfstring, kCFStringNormalizationFormC);
 | 
|---|
| 260 | 
 | 
|---|
| 261 |         outsize = CFStringGetLength(cfstring);
 | 
|---|
| 262 |         range = CFRangeMake(0,outsize);
 | 
|---|
| 263 | 
 | 
|---|
| 264 |         if (outsize == 0) {
 | 
|---|
| 265 |                 /*
 | 
|---|
| 266 |                  * HACK: smbd/mangle_hash2.c:is_legal_name() expects
 | 
|---|
| 267 |                  * errors here.  That function will always pass 2
 | 
|---|
| 268 |                  * characters.  smbd/open.c:check_for_pipe() cuts a
 | 
|---|
| 269 |                  * patchname to 10 characters blindly.  Suppress the
 | 
|---|
| 270 |                  * debug output in those cases.
 | 
|---|
| 271 |                  */
 | 
|---|
| 272 |                 if(2 != *inbytesleft && 10 != *inbytesleft) {
 | 
|---|
| 273 |                         debug_out("String conversion: "
 | 
|---|
| 274 |                                   "An unknown error occurred\n");
 | 
|---|
| 275 |                         hexdump("UTF8->UTF16LE (old) input",
 | 
|---|
| 276 |                                 *inbuf, *inbytesleft);
 | 
|---|
| 277 |                 }
 | 
|---|
| 278 |                 errno = EILSEQ; /* Not sure, but this is what we have
 | 
|---|
| 279 |                                  * actually seen. */
 | 
|---|
| 280 |                 return -1;
 | 
|---|
| 281 |         }
 | 
|---|
| 282 |         if (outsize*2 > *outbytesleft) {
 | 
|---|
| 283 |                 CFStringDelete(cfstring, range);
 | 
|---|
| 284 |                 debug_out("String conversion: "
 | 
|---|
| 285 |                           "Output buffer too small\n");
 | 
|---|
| 286 |                 hexdump("UTF8->UTF16LE (old) input",
 | 
|---|
| 287 |                         *inbuf, *inbytesleft);
 | 
|---|
| 288 |                 errno = E2BIG;
 | 
|---|
| 289 |                 return -1;
 | 
|---|
| 290 |         }
 | 
|---|
| 291 | 
 | 
|---|
| 292 |         CFStringGetCharacters(cfstring, range, (UniChar*)*outbuf);
 | 
|---|
| 293 |         CFStringDelete(cfstring, range);
 | 
|---|
| 294 | 
 | 
|---|
| 295 |         native_to_le(*outbuf, outsize*2);
 | 
|---|
| 296 | 
 | 
|---|
| 297 |         /*
 | 
|---|
| 298 |          * Add a converted null byte, if the CFString conversions
 | 
|---|
| 299 |          * prevented that until now.
 | 
|---|
| 300 |          */
 | 
|---|
| 301 |         if (0 == (*inbuf)[*inbytesleft-1] && 
 | 
|---|
| 302 |             (0 != (*outbuf)[outsize*2-1] || 0 != (*outbuf)[outsize*2-2])) {
 | 
|---|
| 303 | 
 | 
|---|
| 304 |                 if ((outsize*2+2) > *outbytesleft) {
 | 
|---|
| 305 |                         debug_out("String conversion: "
 | 
|---|
| 306 |                                   "Output buffer too small\n");
 | 
|---|
| 307 |                         hexdump("UTF8->UTF16LE (old) input",
 | 
|---|
| 308 |                                 *inbuf, *inbytesleft);
 | 
|---|
| 309 |                         errno = E2BIG;
 | 
|---|
| 310 |                         return -1;
 | 
|---|
| 311 |                 }
 | 
|---|
| 312 | 
 | 
|---|
| 313 |                 (*outbuf)[outsize*2] = (*outbuf)[outsize*2+1] = 0;
 | 
|---|
| 314 |                 outsize += 2;
 | 
|---|
| 315 |         }
 | 
|---|
| 316 | 
 | 
|---|
| 317 |         *inbuf += *inbytesleft;
 | 
|---|
| 318 |         *inbytesleft = 0;
 | 
|---|
| 319 |         *outbuf += outsize*2;
 | 
|---|
| 320 |         *outbytesleft -= outsize*2;
 | 
|---|
| 321 | 
 | 
|---|
| 322 |         return 0;
 | 
|---|
| 323 | }
 | 
|---|
| 324 | 
 | 
|---|
| 325 | static size_t macosxfs_encoding_push(
 | 
|---|
| 326 |         void *cd,                               /* Encoder handle */
 | 
|---|
| 327 |         char **inbuf, size_t *inbytesleft,      /* UTF-16-LE string */
 | 
|---|
| 328 |         char **outbuf, size_t *outbytesleft)    /* Script string */
 | 
|---|
| 329 | {
 | 
|---|
| 330 |         static const int script_code = kCFStringEncodingUTF8;
 | 
|---|
| 331 |         static CFMutableStringRef cfstring = NULL;
 | 
|---|
| 332 |         static UniChar *buffer = NULL;
 | 
|---|
| 333 |         static size_t buflen = 0;
 | 
|---|
| 334 |         CFIndex outsize, cfsize, charsconverted;
 | 
|---|
| 335 | 
 | 
|---|
| 336 |         (void) cd; /* UNUSED */
 | 
|---|
| 337 | 
 | 
|---|
| 338 |         if (0 == *inbytesleft) {
 | 
|---|
| 339 |                 return 0;
 | 
|---|
| 340 |         }
 | 
|---|
| 341 | 
 | 
|---|
| 342 |         /*
 | 
|---|
| 343 |          * We need a buffer that can hold 4 times the original data,
 | 
|---|
| 344 |          * because that is the theoretical maximum that decomposition
 | 
|---|
| 345 |          * can create currently (in Unicode 4.0).
 | 
|---|
| 346 |          */
 | 
|---|
| 347 |         buffer = set_ucbuffer_with_le_copy(
 | 
|---|
| 348 |                 buffer, &buflen, *inbuf, *inbytesleft, 3 * *inbytesleft);
 | 
|---|
| 349 | 
 | 
|---|
| 350 |         if (NULL == cfstring) {
 | 
|---|
| 351 |                 cfstring = CFStringCreateMutableWithExternalCharactersNoCopy(
 | 
|---|
| 352 |                         kCFAllocatorDefault,
 | 
|---|
| 353 |                         buffer, *inbytesleft/2, buflen/2,
 | 
|---|
| 354 |                         kCFAllocatorNull);
 | 
|---|
| 355 |         } else {
 | 
|---|
| 356 |                 CFStringSetExternalCharactersNoCopy(
 | 
|---|
| 357 |                         cfstring,
 | 
|---|
| 358 |                         buffer, *inbytesleft/2, buflen/2);
 | 
|---|
| 359 |         }
 | 
|---|
| 360 | 
 | 
|---|
| 361 |         /*
 | 
|---|
| 362 |          * Decompose characters, using the non-canonical decomposition
 | 
|---|
| 363 |          * form.
 | 
|---|
| 364 |          *
 | 
|---|
| 365 |          * NB: This isn't exactly what HFS+ wants (see note on
 | 
|---|
| 366 |          * kCFStringEncodingUseHFSPlusCanonical in
 | 
|---|
| 367 |          * CFStringEncodingConverter.h), but AFAIK it's the best that
 | 
|---|
| 368 |          * the official API can do.
 | 
|---|
| 369 |          */
 | 
|---|
| 370 |         CFStringNormalize(cfstring, kCFStringNormalizationFormD);
 | 
|---|
| 371 | 
 | 
|---|
| 372 |         cfsize = CFStringGetLength(cfstring);
 | 
|---|
| 373 |         charsconverted = CFStringGetBytes(
 | 
|---|
| 374 |                 cfstring, CFRangeMake(0,cfsize),
 | 
|---|
| 375 |                 script_code, 0, False,
 | 
|---|
| 376 |                 *outbuf, *outbytesleft, &outsize);
 | 
|---|
| 377 | 
 | 
|---|
| 378 |         if (0 == charsconverted) {
 | 
|---|
| 379 |                 debug_out("String conversion: "
 | 
|---|
| 380 |                           "Buffer too small or not convertable\n");
 | 
|---|
| 381 |                 hexdump("UTF16LE->UTF8 (old) input",
 | 
|---|
| 382 |                         *inbuf, *inbytesleft);
 | 
|---|
| 383 |                 errno = EILSEQ; /* Probably more likely. */
 | 
|---|
| 384 |                 return -1;
 | 
|---|
| 385 |         }
 | 
|---|
| 386 | 
 | 
|---|
| 387 |         /*
 | 
|---|
| 388 |          * Add a converted null byte, if the CFString conversions
 | 
|---|
| 389 |          * prevented that until now.
 | 
|---|
| 390 |          */
 | 
|---|
| 391 |         if (0 == (*inbuf)[*inbytesleft-1] && 0 == (*inbuf)[*inbytesleft-2] &&
 | 
|---|
| 392 |             (0 != (*outbuf)[outsize-1])) {
 | 
|---|
| 393 | 
 | 
|---|
| 394 |                 if (((size_t)outsize+1) > *outbytesleft) {
 | 
|---|
| 395 |                         debug_out("String conversion: "
 | 
|---|
| 396 |                                   "Output buffer too small\n");
 | 
|---|
| 397 |                         hexdump("UTF16LE->UTF8 (old) input",
 | 
|---|
| 398 |                                 *inbuf, *inbytesleft);
 | 
|---|
| 399 |                         errno = E2BIG;
 | 
|---|
| 400 |                         return -1;
 | 
|---|
| 401 |                 }
 | 
|---|
| 402 | 
 | 
|---|
| 403 |                 (*outbuf)[outsize] = 0;
 | 
|---|
| 404 |                 ++outsize;
 | 
|---|
| 405 |         }
 | 
|---|
| 406 | 
 | 
|---|
| 407 |         *inbuf += *inbytesleft;
 | 
|---|
| 408 |         *inbytesleft = 0;
 | 
|---|
| 409 |         *outbuf += outsize;
 | 
|---|
| 410 |         *outbytesleft -= outsize;
 | 
|---|
| 411 | 
 | 
|---|
| 412 |         return 0;
 | 
|---|
| 413 | }
 | 
|---|
| 414 | 
 | 
|---|
| 415 | #else /* USE_INTERNAL_API */
 | 
|---|
| 416 | 
 | 
|---|
| 417 | /*
 | 
|---|
| 418 |  * An implementation based on internal code as known from the
 | 
|---|
| 419 |  * OpenDarwin CVS.
 | 
|---|
| 420 |  *
 | 
|---|
| 421 |  * This code doesn't need much memory management because it uses
 | 
|---|
| 422 |  * functions that operate on the raw memory directly.
 | 
|---|
| 423 |  *
 | 
|---|
| 424 |  * The push routine here is faster and more compatible with HFS+ than
 | 
|---|
| 425 |  * the other implementation above.  The pull routine is only faster
 | 
|---|
| 426 |  * for some strings, slightly slower for others.  The pull routine
 | 
|---|
| 427 |  * looses because it has to iterate over the data twice, once to
 | 
|---|
| 428 |  * decode UTF-8 and than to do the character composition required by
 | 
|---|
| 429 |  * Windows.
 | 
|---|
| 430 |  */
 | 
|---|
| 431 | static size_t macosxfs_encoding_pull(
 | 
|---|
| 432 |         void *cd,                               /* Encoder handle */
 | 
|---|
| 433 |         char **inbuf, size_t *inbytesleft,      /* Script string */
 | 
|---|
| 434 |         char **outbuf, size_t *outbytesleft)    /* UTF-16-LE string */
 | 
|---|
| 435 | {
 | 
|---|
| 436 |         static const int script_code = kCFStringEncodingUTF8;
 | 
|---|
| 437 |         UInt32 srcCharsUsed = 0;
 | 
|---|
| 438 |         UInt32 dstCharsUsed = 0;
 | 
|---|
| 439 |         UInt32 result;
 | 
|---|
| 440 |         uint32_t dstDecomposedUsed = 0;
 | 
|---|
| 441 |         uint32_t dstPrecomposedUsed = 0;
 | 
|---|
| 442 | 
 | 
|---|
| 443 |         (void) cd; /* UNUSED */
 | 
|---|
| 444 | 
 | 
|---|
| 445 |         if (0 == *inbytesleft) {
 | 
|---|
| 446 |                 return 0;
 | 
|---|
| 447 |         }
 | 
|---|
| 448 | 
 | 
|---|
| 449 |         result = CFStringEncodingBytesToUnicode(
 | 
|---|
| 450 |                 script_code, kCFStringEncodingComposeCombinings,
 | 
|---|
| 451 |                 *inbuf, *inbytesleft, &srcCharsUsed,
 | 
|---|
| 452 |                 (UniChar*)*outbuf, *outbytesleft, &dstCharsUsed);
 | 
|---|
| 453 | 
 | 
|---|
| 454 |         switch(result) {
 | 
|---|
| 455 |         case kCFStringEncodingConversionSuccess:
 | 
|---|
| 456 |                 if (*inbytesleft == srcCharsUsed)
 | 
|---|
| 457 |                         break;
 | 
|---|
| 458 |                 else
 | 
|---|
| 459 |                         ; /*fall through*/
 | 
|---|
| 460 |         case kCFStringEncodingInsufficientOutputBufferLength:
 | 
|---|
| 461 |                 debug_out("String conversion: "
 | 
|---|
| 462 |                           "Output buffer too small\n");
 | 
|---|
| 463 |                 hexdump("UTF8->UTF16LE (new) input",
 | 
|---|
| 464 |                         *inbuf, *inbytesleft);
 | 
|---|
| 465 |                 errno = E2BIG;
 | 
|---|
| 466 |                 return -1;
 | 
|---|
| 467 |         case kCFStringEncodingInvalidInputStream:
 | 
|---|
| 468 |                 /*
 | 
|---|
| 469 |                  * HACK: smbd/mangle_hash2.c:is_legal_name() expects
 | 
|---|
| 470 |                  * errors here.  That function will always pass 2
 | 
|---|
| 471 |                  * characters.  smbd/open.c:check_for_pipe() cuts a
 | 
|---|
| 472 |                  * patchname to 10 characters blindly.  Suppress the
 | 
|---|
| 473 |                  * debug output in those cases.
 | 
|---|
| 474 |                  */
 | 
|---|
| 475 |                 if(2 != *inbytesleft && 10 != *inbytesleft) {
 | 
|---|
| 476 |                         debug_out("String conversion: "
 | 
|---|
| 477 |                                   "Invalid input sequence\n");
 | 
|---|
| 478 |                         hexdump("UTF8->UTF16LE (new) input",
 | 
|---|
| 479 |                                 *inbuf, *inbytesleft);
 | 
|---|
| 480 |                 }
 | 
|---|
| 481 |                 errno = EILSEQ;
 | 
|---|
| 482 |                 return -1;
 | 
|---|
| 483 |         case kCFStringEncodingConverterUnavailable:
 | 
|---|
| 484 |                 debug_out("String conversion: "
 | 
|---|
| 485 |                           "Unknown encoding\n");
 | 
|---|
| 486 |                 hexdump("UTF8->UTF16LE (new) input",
 | 
|---|
| 487 |                         *inbuf, *inbytesleft);
 | 
|---|
| 488 |                 errno = EINVAL;
 | 
|---|
| 489 |                 return -1;
 | 
|---|
| 490 |         }
 | 
|---|
| 491 | 
 | 
|---|
| 492 |         /*
 | 
|---|
| 493 |          * It doesn't look like CFStringEncodingBytesToUnicode() can
 | 
|---|
| 494 |          * produce precomposed characters (flags=ComposeCombinings
 | 
|---|
| 495 |          * doesn't do it), so we need another pass over the data here.
 | 
|---|
| 496 |          * We can do this in-place, as the string can only get
 | 
|---|
| 497 |          * shorter.
 | 
|---|
| 498 |          *
 | 
|---|
| 499 |          * (Actually in theory there should be an internal
 | 
|---|
| 500 |          * decomposition and reordering before the actual composition
 | 
|---|
| 501 |          * step.  But we should be able to rely on that we always get
 | 
|---|
| 502 |          * fully decomposed strings for input, so this can't create
 | 
|---|
| 503 |          * problems in reality.)
 | 
|---|
| 504 |          */
 | 
|---|
| 505 |         CFUniCharPrecompose(
 | 
|---|
| 506 |                 (const UTF16Char *)*outbuf, dstCharsUsed, &dstDecomposedUsed,
 | 
|---|
| 507 |                 (UTF16Char *)*outbuf, dstCharsUsed, &dstPrecomposedUsed);
 | 
|---|
| 508 | 
 | 
|---|
| 509 |         native_to_le(*outbuf, dstPrecomposedUsed*2);
 | 
|---|
| 510 | 
 | 
|---|
| 511 |         *inbuf += srcCharsUsed;
 | 
|---|
| 512 |         *inbytesleft -= srcCharsUsed;
 | 
|---|
| 513 |         *outbuf += dstPrecomposedUsed*2;
 | 
|---|
| 514 |         *outbytesleft -= dstPrecomposedUsed*2;
 | 
|---|
| 515 | 
 | 
|---|
| 516 |         return 0;
 | 
|---|
| 517 | }
 | 
|---|
| 518 | 
 | 
|---|
| 519 | static size_t macosxfs_encoding_push(
 | 
|---|
| 520 |         void *cd,                               /* Encoder handle */
 | 
|---|
| 521 |         char **inbuf, size_t *inbytesleft,      /* UTF-16-LE string */
 | 
|---|
| 522 |         char **outbuf, size_t *outbytesleft)    /* Script string */
 | 
|---|
| 523 | {
 | 
|---|
| 524 |         static const int script_code = kCFStringEncodingUTF8;
 | 
|---|
| 525 |         static UniChar *buffer = NULL;
 | 
|---|
| 526 |         static size_t buflen = 0;
 | 
|---|
| 527 |         UInt32 srcCharsUsed=0, dstCharsUsed=0, result;
 | 
|---|
| 528 | 
 | 
|---|
| 529 |         (void) cd; /* UNUSED */
 | 
|---|
| 530 | 
 | 
|---|
| 531 |         if (0 == *inbytesleft) {
 | 
|---|
| 532 |                 return 0;
 | 
|---|
| 533 |         }
 | 
|---|
| 534 | 
 | 
|---|
| 535 |         buffer = set_ucbuffer_with_le(
 | 
|---|
| 536 |                 buffer, &buflen, *inbuf, *inbytesleft);
 | 
|---|
| 537 | 
 | 
|---|
| 538 |         result = CFStringEncodingUnicodeToBytes(
 | 
|---|
| 539 |                 script_code, kCFStringEncodingUseHFSPlusCanonical,
 | 
|---|
| 540 |                 buffer, *inbytesleft/2, &srcCharsUsed,
 | 
|---|
| 541 |                 *outbuf, *outbytesleft, &dstCharsUsed);
 | 
|---|
| 542 | 
 | 
|---|
| 543 |         switch(result) {
 | 
|---|
| 544 |         case kCFStringEncodingConversionSuccess:
 | 
|---|
| 545 |                 if (*inbytesleft/2 == srcCharsUsed)
 | 
|---|
| 546 |                         break;
 | 
|---|
| 547 |                 else
 | 
|---|
| 548 |                         ; /*fall through*/
 | 
|---|
| 549 |         case kCFStringEncodingInsufficientOutputBufferLength:
 | 
|---|
| 550 |                 debug_out("String conversion: "
 | 
|---|
| 551 |                           "Output buffer too small\n");
 | 
|---|
| 552 |                 hexdump("UTF16LE->UTF8 (new) input",
 | 
|---|
| 553 |                         *inbuf, *inbytesleft);
 | 
|---|
| 554 |                 errno = E2BIG;
 | 
|---|
| 555 |                 return -1;
 | 
|---|
| 556 |         case kCFStringEncodingInvalidInputStream:
 | 
|---|
| 557 |                 /*
 | 
|---|
| 558 |                  * HACK: smbd/open.c:check_for_pipe():is_legal_name()
 | 
|---|
| 559 |                  * cuts a pathname to 10 characters blindly.  Suppress
 | 
|---|
| 560 |                  * the debug output in those cases.
 | 
|---|
| 561 |                  */
 | 
|---|
| 562 |                 if(10 != *inbytesleft) {
 | 
|---|
| 563 |                         debug_out("String conversion: "
 | 
|---|
| 564 |                                   "Invalid input sequence\n");
 | 
|---|
| 565 |                         hexdump("UTF16LE->UTF8 (new) input",
 | 
|---|
| 566 |                                 *inbuf, *inbytesleft);
 | 
|---|
| 567 |                 }
 | 
|---|
| 568 |                 errno = EILSEQ;
 | 
|---|
| 569 |                 return -1;
 | 
|---|
| 570 |         case kCFStringEncodingConverterUnavailable:
 | 
|---|
| 571 |                 debug_out("String conversion: "
 | 
|---|
| 572 |                           "Unknown encoding\n");
 | 
|---|
| 573 |                 hexdump("UTF16LE->UTF8 (new) input",
 | 
|---|
| 574 |                         *inbuf, *inbytesleft);
 | 
|---|
| 575 |                 errno = EINVAL;
 | 
|---|
| 576 |                 return -1;
 | 
|---|
| 577 |         }
 | 
|---|
| 578 | 
 | 
|---|
| 579 |         *inbuf += srcCharsUsed*2;
 | 
|---|
| 580 |         *inbytesleft -= srcCharsUsed*2;
 | 
|---|
| 581 |         *outbuf += dstCharsUsed;
 | 
|---|
| 582 |         *outbytesleft -= dstCharsUsed;
 | 
|---|
| 583 | 
 | 
|---|
| 584 |         return 0;
 | 
|---|
| 585 | }
 | 
|---|
| 586 | 
 | 
|---|
| 587 | #endif /* USE_INTERNAL_API */
 | 
|---|
| 588 | 
 | 
|---|
| 589 | /*
 | 
|---|
| 590 |  * For initialization, actually install the encoding as "macosxfs".
 | 
|---|
| 591 |  */
 | 
|---|
| 592 | static struct charset_functions macosxfs_encoding_functions = {
 | 
|---|
| 593 |         "MACOSXFS", macosxfs_encoding_pull, macosxfs_encoding_push
 | 
|---|
| 594 | };
 | 
|---|
| 595 | 
 | 
|---|
| 596 | NTSTATUS charset_macosxfs_init(void)
 | 
|---|
| 597 | {
 | 
|---|
| 598 |         return smb_register_charset(&macosxfs_encoding_functions);
 | 
|---|
| 599 | }
 | 
|---|
| 600 | 
 | 
|---|
| 601 | /* eof */
 | 
|---|