Changeset 740 for vendor/current/source3/lib/charcnv.c
- Timestamp:
- Nov 14, 2012, 12:59:34 PM (13 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
vendor/current/source3/lib/charcnv.c
r597 r740 46 46 47 47 48 static smb_iconv_t conv_handles[NUM_CHARSETS][NUM_CHARSETS];49 48 static bool conv_silent; /* Should we do a debug if the conversion fails ? */ 50 49 static bool initialized; 51 50 52 /**53 * Return the name of a charset to give to iconv().54 **/55 static const char *charset_name(charset_t ch)56 {57 const char *ret;58 59 switch (ch) {60 case CH_UTF16LE:61 ret = "UTF-16LE";62 break;63 case CH_UTF16BE:64 ret = "UTF-16BE";65 break;66 case CH_UNIX:67 ret = lp_unix_charset();68 break;69 case CH_DOS:70 ret = lp_dos_charset();71 break;72 case CH_DISPLAY:73 ret = lp_display_charset();74 break;75 case CH_UTF8:76 ret = "UTF8";77 break;78 default:79 ret = NULL;80 }81 82 #if defined(HAVE_NL_LANGINFO) && defined(CODESET)83 if (ret && !strcmp(ret, "LOCALE")) {84 const char *ln = NULL;85 86 #ifdef HAVE_SETLOCALE87 setlocale(LC_ALL, "");88 #endif89 ln = nl_langinfo(CODESET);90 if (ln) {91 /* Check whether the charset name is supported92 by iconv */93 smb_iconv_t handle = smb_iconv_open(ln,"UCS-2LE");94 if (handle == (smb_iconv_t) -1) {95 DEBUG(5,("Locale charset '%s' unsupported, using ASCII instead\n", ln));96 ln = NULL;97 } else {98 DEBUG(5,("Substituting charset '%s' for LOCALE\n", ln));99 smb_iconv_close(handle);100 }101 }102 ret = ln;103 }104 #endif105 106 if (!ret || !*ret) ret = "ASCII";107 return ret;108 }109 110 51 void lazy_initialize_conv(void) 111 52 { 112 53 if (!initialized) { 113 load_case_tables ();54 load_case_tables_library(); 114 55 init_iconv(); 115 56 initialized = true; … … 122 63 void gfree_charcnv(void) 123 64 { 124 int c1, c2; 125 126 for (c1=0;c1<NUM_CHARSETS;c1++) { 127 for (c2=0;c2<NUM_CHARSETS;c2++) { 128 if ( conv_handles[c1][c2] ) { 129 smb_iconv_close( conv_handles[c1][c2] ); 130 conv_handles[c1][c2] = 0; 131 } 132 } 133 } 65 TALLOC_FREE(global_iconv_convenience); 134 66 initialized = false; 135 67 } … … 144 76 void init_iconv(void) 145 77 { 146 int c1, c2; 147 bool did_reload = False; 148 149 /* so that charset_name() works we need to get the UNIX<->UCS2 going 150 first */ 151 if (!conv_handles[CH_UNIX][CH_UTF16LE]) 152 conv_handles[CH_UNIX][CH_UTF16LE] = smb_iconv_open(charset_name(CH_UTF16LE), "ASCII"); 153 154 if (!conv_handles[CH_UTF16LE][CH_UNIX]) 155 conv_handles[CH_UTF16LE][CH_UNIX] = smb_iconv_open("ASCII", charset_name(CH_UTF16LE)); 156 157 for (c1=0;c1<NUM_CHARSETS;c1++) { 158 for (c2=0;c2<NUM_CHARSETS;c2++) { 159 const char *n1 = charset_name((charset_t)c1); 160 const char *n2 = charset_name((charset_t)c2); 161 if (conv_handles[c1][c2] && 162 strcmp(n1, conv_handles[c1][c2]->from_name) == 0 && 163 strcmp(n2, conv_handles[c1][c2]->to_name) == 0) 164 continue; 165 166 did_reload = True; 167 168 if (conv_handles[c1][c2]) 169 smb_iconv_close(conv_handles[c1][c2]); 170 171 conv_handles[c1][c2] = smb_iconv_open(n2,n1); 172 if (conv_handles[c1][c2] == (smb_iconv_t)-1) { 173 DEBUG(0,("init_iconv: Conversion from %s to %s not supported\n", 174 charset_name((charset_t)c1), charset_name((charset_t)c2))); 175 if (c1 != CH_UTF16LE && c1 != CH_UTF16BE) { 176 n1 = "ASCII"; 177 } 178 if (c2 != CH_UTF16LE && c2 != CH_UTF16BE) { 179 n2 = "ASCII"; 180 } 181 DEBUG(0,("init_iconv: Attempting to replace with conversion from %s to %s\n", 182 n1, n2 )); 183 conv_handles[c1][c2] = smb_iconv_open(n2,n1); 184 if (!conv_handles[c1][c2]) { 185 DEBUG(0,("init_iconv: Conversion from %s to %s failed", n1, n2)); 186 smb_panic("init_iconv: conv_handle initialization failed"); 187 } 188 } 189 } 190 } 191 192 if (did_reload) { 193 /* XXX: Does this really get called every time the dos 194 * codepage changes? */ 195 /* XXX: Is the did_reload test too strict? */ 196 conv_silent = True; 197 init_valid_table(); 198 conv_silent = False; 199 } 78 global_iconv_convenience = smb_iconv_convenience_reinit(NULL, lp_dos_charset(), 79 lp_unix_charset(), lp_display_charset(), 80 true, global_iconv_convenience); 200 81 } 201 82 … … 224 105 char* outbuf = (char*)dest; 225 106 smb_iconv_t descriptor; 107 struct smb_iconv_convenience *ic; 226 108 227 109 lazy_initialize_conv(); 228 229 descriptor = conv_handles[from][to];110 ic = get_iconv_convenience(); 111 descriptor = get_conv_handle(ic, from, to); 230 112 231 113 if (srclen == (size_t)-1) { … … 265 147 if (from == CH_UNIX) { 266 148 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u - '%s'\n", 267 charset_name(from), charset_name(to),149 charset_name(ic, from), charset_name(ic, to), 268 150 (unsigned int)srclen, (unsigned int)destlen, (const char *)src)); 269 151 } else { 270 152 DEBUG(3,("E2BIG: convert_string(%s,%s): srclen=%u destlen=%u\n", 271 charset_name(from), charset_name(to),153 charset_name(ic, from), charset_name(ic, to), 272 154 (unsigned int)srclen, (unsigned int)destlen)); 273 155 } … … 562 444 smb_iconv_t descriptor; 563 445 void **dest = (void **)dst; 446 struct smb_iconv_convenience *ic; 564 447 565 448 *dest = NULL; … … 596 479 597 480 lazy_initialize_conv(); 598 599 descriptor = conv_handles[from][to];481 ic = get_iconv_convenience(); 482 descriptor = get_conv_handle(ic, from, to); 600 483 601 484 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) { … … 1818 1701 } 1819 1702 1820 /*1821 Return the unicode codepoint for the next multi-byte CH_UNIX character1822 in the string. The unicode codepoint (codepoint_t) is an unsinged 32 bit value.1823 1824 Also return the number of bytes consumed (which tells the caller1825 how many bytes to skip to get to the next CH_UNIX character).1826 1827 Return INVALID_CODEPOINT if the next character cannot be converted.1828 */1829 1830 codepoint_t next_codepoint(const char *str, size_t *size)1831 {1832 /* It cannot occupy more than 4 bytes in UTF16 format */1833 uint8_t buf[4];1834 smb_iconv_t descriptor;1835 size_t ilen_orig;1836 size_t ilen;1837 size_t olen;1838 char *outbuf;1839 1840 if ((str[0] & 0x80) == 0) {1841 *size = 1;1842 return (codepoint_t)str[0];1843 }1844 1845 /* We assume that no multi-byte character can take1846 more than 5 bytes. This is OK as we only1847 support codepoints up to 1M */1848 1849 ilen_orig = strnlen(str, 5);1850 ilen = ilen_orig;1851 1852 lazy_initialize_conv();1853 1854 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];1855 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {1856 *size = 1;1857 return INVALID_CODEPOINT;1858 }1859 1860 /* This looks a little strange, but it is needed to cope1861 with codepoints above 64k which are encoded as per RFC2781. */1862 olen = 2;1863 outbuf = (char *)buf;1864 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);1865 if (olen == 2) {1866 /* We failed to convert to a 2 byte character.1867 See if we can convert to a 4 UTF16-LE byte char encoding.1868 */1869 olen = 4;1870 outbuf = (char *)buf;1871 smb_iconv(descriptor, &str, &ilen, &outbuf, &olen);1872 if (olen == 4) {1873 /* We didn't convert any bytes */1874 *size = 1;1875 return INVALID_CODEPOINT;1876 }1877 olen = 4 - olen;1878 } else {1879 olen = 2 - olen;1880 }1881 1882 *size = ilen_orig - ilen;1883 1884 if (olen == 2) {1885 /* 2 byte, UTF16-LE encoded value. */1886 return (codepoint_t)SVAL(buf, 0);1887 }1888 if (olen == 4) {1889 /* Decode a 4 byte UTF16-LE character manually.1890 See RFC2871 for the encoding machanism.1891 */1892 codepoint_t w1 = SVAL(buf,0) & ~0xD800;1893 codepoint_t w2 = SVAL(buf,2) & ~0xDC00;1894 1895 return (codepoint_t)0x10000 +1896 (w1 << 10) + w2;1897 }1898 1899 /* no other length is valid */1900 return INVALID_CODEPOINT;1901 }1902 1903 /*1904 push a single codepoint into a CH_UNIX string the target string must1905 be able to hold the full character, which is guaranteed if it is at1906 least 5 bytes in size. The caller may pass less than 5 bytes if they1907 are sure the character will fit (for example, you can assume that1908 uppercase/lowercase of a character will not add more than 1 byte)1909 1910 return the number of bytes occupied by the CH_UNIX character, or1911 -1 on failure1912 */1913 _PUBLIC_ ssize_t push_codepoint(char *str, codepoint_t c)1914 {1915 smb_iconv_t descriptor;1916 uint8_t buf[4];1917 size_t ilen, olen;1918 const char *inbuf;1919 1920 if (c < 128) {1921 *str = c;1922 return 1;1923 }1924 1925 lazy_initialize_conv();1926 1927 descriptor = conv_handles[CH_UNIX][CH_UTF16LE];1928 if (descriptor == (smb_iconv_t)-1 || descriptor == (smb_iconv_t)0) {1929 return -1;1930 }1931 1932 if (c < 0x10000) {1933 ilen = 2;1934 olen = 5;1935 inbuf = (char *)buf;1936 SSVAL(buf, 0, c);1937 smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);1938 if (ilen != 0) {1939 return -1;1940 }1941 return 5 - olen;1942 }1943 1944 c -= 0x10000;1945 1946 buf[0] = (c>>10) & 0xFF;1947 buf[1] = (c>>18) | 0xd8;1948 buf[2] = c & 0xFF;1949 buf[3] = ((c>>8) & 0x3) | 0xdc;1950 1951 ilen = 4;1952 olen = 5;1953 inbuf = (char *)buf;1954 1955 smb_iconv(descriptor, &inbuf, &ilen, &str, &olen);1956 if (ilen != 0) {1957 return -1;1958 }1959 return 5 - olen;1960 }1961 1962
Note:
See TracChangeset
for help on using the changeset viewer.