Changeset 745 for trunk/server/source3/lib/charcnv.c
- Timestamp:
- Nov 27, 2012, 4:43:17 PM (13 years ago)
- Location:
- trunk/server
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/server
- Property svn:mergeinfo changed
/vendor/current merged: 581,587,591,594,597,600,615,618,740
- Property svn:mergeinfo changed
-
trunk/server/source3/lib/charcnv.c
r599 r745 46 46 47 47 48 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];49 48 static bool conv_silent; /* Should we do a debug if the conversion fails ? */ 50 49 static bool initialized; 51 50 52 /**53 * Return the name of a charset to give to iconv().54 **/55 static const char *charset_name(charset_t ch)56 {57 const char *ret;58 59 switch (ch) {60 case CH_UTF16LE:61 ret = "UTF-16LE";62 break;63 case CH_UTF16BE:64 ret = "UTF-16BE";65 break;66 case CH_UNIX:67 ret = lp_unix_charset();68 break;69 case CH_DOS:70 ret = lp_dos_charset();71 break;72 case CH_DISPLAY:73 ret = lp_display_charset();74 break;75 case CH_UTF8:76 ret = "UTF8";77 break;78 default:79 ret = NULL;80 }81 82 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)83 if (ret && !strcmp(ret, "LOCALE")) {84 const char *ln = NULL;85 86 #ifdef HAVE_SETLOCALE87 setlocale(LC_ALL, "");88 #endif89 ln = nl_langinfo(CODESET);90 if (ln) {91 /* Check whether the charset name is supported92 by iconv */93 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");94 if (handle == (smb_iconv_t) -1) {95 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));96 ln = NULL;97 } else {98 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));99 smb_iconv_close(handle);100 }101 }102 ret = ln;103 }104 #endif105 106 if (!ret || !*ret) ret = "ASCII";107 return ret;108 }109 110 51 void lazy_initialize_conv(void) 111 52 { 112 53 if (!initialized) { 113 load_case_tables ();54 load_case_tables_library(); 114 55 init_iconv(); 115 56 initialized = true; … … 122 63 void gfree_charcnv(void) 123 64 { 124 int c1, c2; 125 126 for (c1=0;c1<NUM_CHARSETS;c1++) { 127 for (c2=0;c2<NUM_CHARSETS;c2++) { 128 if ( conv_handles[c1][c2] ) { 129 smb_iconv_close( conv_handles[c1][c2] ); 130 conv_handles[c1][c2] = 0; 131 } 132 } 133 } 65 TALLOC_FREE(global_iconv_convenience); 134 66 initialized = false; 135 67 } … … 144 76 void init_iconv(void) 145 77 { 146 int c1, c2; 147 bool did_reload = False; 148 149 /* so that charset_name() works we need to get the UNIX<->UCS2 going 150 first */ 151 if (!conv_handles[CH_UNIX][CH_UTF16LE]) 152 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII"); 153 154 if (!conv_handles[CH_UTF16LE][CH_UNIX]) 155 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE)); 156 157 for (c1=0;c1<NUM_CHARSETS;c1++) { 158 for (c2=0;c2<NUM_CHARSETS;c2++) { 159 const char *n1 = charset_name((charset_t)c1); 160 const char *n2 = charset_name((charset_t)c2); 161 if (conv_handles[c1][c2] && 162 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 && 163 strcmp(n2, conv_handles[c1][c2]->to_name) == 0) 164 continue; 165 166 did_reload = True; 167 168 if (conv_handles[c1][c2]) 169 smb_iconv_close(conv_handles[c1][c2]); 170 171 conv_handles[c1][c2] = smb_iconv_open(n2,n1); 172 if (conv_handles[c1][c2] == (smb_iconv_t)-1) { 173 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n", 174 charset_name((charset_t)c1), charset_name((charset_t)c2))); 175 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) { 176 n1 = "ASCII"; 177 } 178 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) { 179 n2 = "ASCII"; 180 } 181 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n", 182 n1, n2 )); 183 conv_handles[c1][c2] = smb_iconv_open(n2,n1); 184 if (!conv_handles[c1][c2]) { 185 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2)); 186 smb_panic("init_iconv: conv_handle initialization failed"); 187 } 188 } 189 } 190 } 191 192 if (did_reload) { 193 /* XXX: Does this really get called every time the dos 194 * codepage changes? */ 195 /* XXX: Is the did_reload test too strict? */ 196 conv_silent = True; 197 init_valid_table(); 198 conv_silent = False; 199 } 78 global_iconv_convenience = smb_iconv_convenience_reinit(NULL, lp_dos_charset(), 79 lp_unix_charset(), lp_display_charset(), 80 true, global_iconv_convenience); 200 81 } 201 82 … … 224 105 char* outbuf = (char*)dest; 225 106 smb_iconv_t descriptor; 107 struct smb_iconv_convenience *ic; 226 108 227 109 lazy_initialize_conv(); 228 229 descriptor = conv_handles[from][to];110 ic = get_iconv_convenience(); 111 descriptor = get_conv_handle(ic, from, to); 230 112 231 113 if (srclen == (size_t)-1) { … … 265 147 if (from == CH_UNIX) { 266 148 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n", 267 charset_name(from), charset_name(to),149 charset_name(ic, from), charset_name(ic, to), 268 150 (unsigned int)srclen, (unsigned int)destlen, (const char *)src)); 269 151 } else { 270 152 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n", 271 charset_name(from), charset_name(to),153 charset_name(ic, from), charset_name(ic, to), 272 154 (unsigned int)srclen, (unsigned int)destlen)); 273 155 } … … 562 444 smb_iconv_t descriptor; 563 445 void **dest = (void **)dst; 446 struct smb_iconv_convenience *ic; 564 447 565 448 *dest = NULL; … … 596 479 597 480 lazy_initialize_conv(); 598 599 descriptor = conv_handles[from][to];481 ic = get_iconv_convenience(); 482 descriptor = get_conv_handle(ic, from, to); 600 483 601 484 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { … … 1818 1701 } 1819 1702 1820 /*1821 Return the unicode codepoint for the next multi-byte CH_UNIX character1822 in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.1823 1824 Also return the number of bytes consumed (which tells the caller1825 how many bytes to skip to get to the next CH_UNIX character).1826 1827 Return INVALID_CODEPOINT if the next character cannot be converted.1828 */1829 codepoint_t next_codepoint(const char *str, size_t *size)1830 {1831 /* It cannot occupy more than 4 bytes in UTF16 format */1832 uint8_t buf[4];1833 smb_iconv_t descriptor;1834 size_t ilen_orig;1835 size_t ilen;1836 size_t olen;1837 char *outbuf;1838 1839 if ((str[0] & 0x80) == 0) {1840 *size = 1;1841 return (codepoint_t)str[0];1842 }1843 1844 /* We assume that no multi-byte character can take1845 more than 5 bytes. This is OK as we only1846 support codepoints up to 1M */1847 1848 ilen_orig = strnlen(str, 5);1849 ilen = ilen_orig;1850 1851 lazy_initialize_conv();1852 1853 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];1854 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {1855 *size = 1;1856 return INVALID_CODEPOINT;1857 }1858 1859 /* This looks a little strange, but it is needed to cope1860 with codepoints above 64k which are encoded as per RFC2781. */1861 olen = 2;1862 outbuf = (char *)buf;1863 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);1864 if (olen == 2) {1865 /* We failed to convert to a 2 byte character.1866 See if we can convert to a 4 UTF16-LE byte char encoding.1867 */1868 olen = 4;1869 outbuf = (char *)buf;1870 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);1871 if (olen == 4) {1872 /* We didn't convert any bytes */1873 *size = 1;1874 return INVALID_CODEPOINT;1875 }1876 olen = 4 - olen;1877 } else {1878 olen = 2 - olen;1879 }1880 1881 *size = ilen_orig - ilen;1882 1883 if (olen == 2) {1884 /* 2 byte, UTF16-LE encoded value. */1885 return (codepoint_t)SVAL(buf, 0);1886 }1887 if (olen == 4) {1888 /* Decode a 4 byte UTF16-LE character manually.1889 See RFC2871 for the encoding machanism.1890 */1891 codepoint_t w1 = SVAL(buf,0) & ~0xD800;1892 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;1893 1894 return (codepoint_t)0x10000 +1895 (w1 << 10) + w2;1896 }1897 1898 /* no other length is valid */1899 return INVALID_CODEPOINT;1900 }1901 1902 /*1903 push a single codepoint into a CH_UNIX string the target string must1904 be able to hold the full character, which is guaranteed if it is at1905 least 5 bytes in size. The caller may pass less than 5 bytes if they1906 are sure the character will fit (for example, you can assume that1907 uppercase/lowercase of a character will not add more than 1 byte)1908 1909 return the number of bytes occupied by the CH_UNIX character, or1910 -1 on failure1911 */1912 _PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)1913 {1914 smb_iconv_t descriptor;1915 uint8_t buf[4];1916 size_t ilen, olen;1917 const char *inbuf;1918 1919 if (c < 128) {1920 *str = c;1921 return 1;1922 }1923 1924 lazy_initialize_conv();1925 1926 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];1927 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {1928 return -1;1929 }1930 1931 if (c < 0x10000) {1932 ilen = 2;1933 olen = 5;1934 inbuf = (char *)buf;1935 SSVAL(buf, 0, c);1936 smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);1937 if (ilen != 0) {1938 return -1;1939 }1940 return 5 - olen;1941 }1942 1943 c -= 0x10000;1944 1945 buf[0] = (c>>10) & 0xFF;1946 buf[1] = (c>>18) | 0xd8;1947 buf[2] = c & 0xFF;1948 buf[3] = ((c>>8) & 0x3) | 0xdc;1949 1950 ilen = 4;1951 olen = 5;1952 inbuf = (char *)buf;1953 1954 smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);1955 if (ilen != 0) {1956 return -1;1957 }1958 return 5 - olen;1959 }1960 1961
Note:
See TracChangeset
for help on using the changeset viewer.