| 1 | /*
|
|---|
| 2 | * Copyright (c) 2004, 2006, 2007, 2008 Kungliga Tekniska Högskolan
|
|---|
| 3 | * (Royal Institute of Technology, Stockholm, Sweden).
|
|---|
| 4 | * All rights reserved.
|
|---|
| 5 | *
|
|---|
| 6 | * Redistribution and use in source and binary forms, with or without
|
|---|
| 7 | * modification, are permitted provided that the following conditions
|
|---|
| 8 | * are met:
|
|---|
| 9 | *
|
|---|
| 10 | * 1. Redistributions of source code must retain the above copyright
|
|---|
| 11 | * notice, this list of conditions and the following disclaimer.
|
|---|
| 12 | *
|
|---|
| 13 | * 2. Redistributions in binary form must reproduce the above copyright
|
|---|
| 14 | * notice, this list of conditions and the following disclaimer in the
|
|---|
| 15 | * documentation and/or other materials provided with the distribution.
|
|---|
| 16 | *
|
|---|
| 17 | * 3. Neither the name of the Institute nor the names of its contributors
|
|---|
| 18 | * may be used to endorse or promote products derived from this software
|
|---|
| 19 | * without specific prior written permission.
|
|---|
| 20 | *
|
|---|
| 21 | * THIS SOFTWARE IS PROVIDED BY THE INSTITUTE AND CONTRIBUTORS ``AS IS'' AND
|
|---|
| 22 | * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|---|
| 23 | * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|---|
| 24 | * ARE DISCLAIMED. IN NO EVENT SHALL THE INSTITUTE OR CONTRIBUTORS BE LIABLE
|
|---|
| 25 | * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
|
|---|
| 26 | * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
|
|---|
| 27 | * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
|
|---|
| 28 | * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
|
|---|
| 29 | * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
|
|---|
| 30 | * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
|
|---|
| 31 | * SUCH DAMAGE.
|
|---|
| 32 | */
|
|---|
| 33 |
|
|---|
| 34 | #include <config.h>
|
|---|
| 35 | #include "windlocl.h"
|
|---|
| 36 |
|
|---|
| 37 | static int
|
|---|
| 38 | utf8toutf32(const unsigned char **pp, uint32_t *out)
|
|---|
| 39 | {
|
|---|
| 40 | const unsigned char *p = *pp;
|
|---|
| 41 | unsigned c = *p;
|
|---|
| 42 |
|
|---|
| 43 | if (c & 0x80) {
|
|---|
| 44 | if ((c & 0xE0) == 0xC0) {
|
|---|
| 45 | const unsigned c2 = *++p;
|
|---|
| 46 | if ((c2 & 0xC0) == 0x80) {
|
|---|
| 47 | *out = ((c & 0x1F) << 6)
|
|---|
| 48 | | (c2 & 0x3F);
|
|---|
| 49 | } else {
|
|---|
| 50 | return WIND_ERR_INVALID_UTF8;
|
|---|
| 51 | }
|
|---|
| 52 | } else if ((c & 0xF0) == 0xE0) {
|
|---|
| 53 | const unsigned c2 = *++p;
|
|---|
| 54 | if ((c2 & 0xC0) == 0x80) {
|
|---|
| 55 | const unsigned c3 = *++p;
|
|---|
| 56 | if ((c3 & 0xC0) == 0x80) {
|
|---|
| 57 | *out = ((c & 0x0F) << 12)
|
|---|
| 58 | | ((c2 & 0x3F) << 6)
|
|---|
| 59 | | (c3 & 0x3F);
|
|---|
| 60 | } else {
|
|---|
| 61 | return WIND_ERR_INVALID_UTF8;
|
|---|
| 62 | }
|
|---|
| 63 | } else {
|
|---|
| 64 | return WIND_ERR_INVALID_UTF8;
|
|---|
| 65 | }
|
|---|
| 66 | } else if ((c & 0xF8) == 0xF0) {
|
|---|
| 67 | const unsigned c2 = *++p;
|
|---|
| 68 | if ((c2 & 0xC0) == 0x80) {
|
|---|
| 69 | const unsigned c3 = *++p;
|
|---|
| 70 | if ((c3 & 0xC0) == 0x80) {
|
|---|
| 71 | const unsigned c4 = *++p;
|
|---|
| 72 | if ((c4 & 0xC0) == 0x80) {
|
|---|
| 73 | *out = ((c & 0x07) << 18)
|
|---|
| 74 | | ((c2 & 0x3F) << 12)
|
|---|
| 75 | | ((c3 & 0x3F) << 6)
|
|---|
| 76 | | (c4 & 0x3F);
|
|---|
| 77 | } else {
|
|---|
| 78 | return WIND_ERR_INVALID_UTF8;
|
|---|
| 79 | }
|
|---|
| 80 | } else {
|
|---|
| 81 | return WIND_ERR_INVALID_UTF8;
|
|---|
| 82 | }
|
|---|
| 83 | } else {
|
|---|
| 84 | return WIND_ERR_INVALID_UTF8;
|
|---|
| 85 | }
|
|---|
| 86 | } else {
|
|---|
| 87 | return WIND_ERR_INVALID_UTF8;
|
|---|
| 88 | }
|
|---|
| 89 | } else {
|
|---|
| 90 | *out = c;
|
|---|
| 91 | }
|
|---|
| 92 |
|
|---|
| 93 | *pp = p;
|
|---|
| 94 |
|
|---|
| 95 | return 0;
|
|---|
| 96 | }
|
|---|
| 97 |
|
|---|
| 98 | /**
|
|---|
| 99 | * Convert an UTF-8 string to an UCS4 string.
|
|---|
| 100 | *
|
|---|
| 101 | * @param in an UTF-8 string to convert.
|
|---|
| 102 | * @param out the resulting UCS4 strint, must be at least
|
|---|
| 103 | * wind_utf8ucs4_length() long. If out is NULL, the function will
|
|---|
| 104 | * calculate the needed space for the out variable (just like
|
|---|
| 105 | * wind_utf8ucs4_length()).
|
|---|
| 106 | * @param out_len before processing out_len should be the length of
|
|---|
| 107 | * the out variable, after processing it will be the length of the out
|
|---|
| 108 | * string.
|
|---|
| 109 | *
|
|---|
| 110 | * @return returns 0 on success, an wind error code otherwise
|
|---|
| 111 | * @ingroup wind
|
|---|
| 112 | */
|
|---|
| 113 |
|
|---|
| 114 | int
|
|---|
| 115 | wind_utf8ucs4(const char *in, uint32_t *out, size_t *out_len)
|
|---|
| 116 | {
|
|---|
| 117 | const unsigned char *p;
|
|---|
| 118 | size_t o = 0;
|
|---|
| 119 | int ret;
|
|---|
| 120 |
|
|---|
| 121 | for (p = (const unsigned char *)in; *p != '\0'; ++p) {
|
|---|
| 122 | uint32_t u;
|
|---|
| 123 |
|
|---|
| 124 | ret = utf8toutf32(&p, &u);
|
|---|
| 125 | if (ret)
|
|---|
| 126 | return ret;
|
|---|
| 127 |
|
|---|
| 128 | if (out) {
|
|---|
| 129 | if (o >= *out_len)
|
|---|
| 130 | return WIND_ERR_OVERRUN;
|
|---|
| 131 | out[o] = u;
|
|---|
| 132 | }
|
|---|
| 133 | o++;
|
|---|
| 134 | }
|
|---|
| 135 | *out_len = o;
|
|---|
| 136 | return 0;
|
|---|
| 137 | }
|
|---|
| 138 |
|
|---|
| 139 | /**
|
|---|
| 140 | * Calculate the length of from converting a UTF-8 string to a UCS4
|
|---|
| 141 | * string.
|
|---|
| 142 | *
|
|---|
| 143 | * @param in an UTF-8 string to convert.
|
|---|
| 144 | * @param out_len the length of the resulting UCS4 string.
|
|---|
| 145 | *
|
|---|
| 146 | * @return returns 0 on success, an wind error code otherwise
|
|---|
| 147 | * @ingroup wind
|
|---|
| 148 | */
|
|---|
| 149 |
|
|---|
| 150 | int
|
|---|
| 151 | wind_utf8ucs4_length(const char *in, size_t *out_len)
|
|---|
| 152 | {
|
|---|
| 153 | return wind_utf8ucs4(in, NULL, out_len);
|
|---|
| 154 | }
|
|---|
| 155 |
|
|---|
| 156 | static const char first_char[4] =
|
|---|
| 157 | { 0x00, 0xC0, 0xE0, 0xF0 };
|
|---|
| 158 |
|
|---|
| 159 | /**
|
|---|
| 160 | * Convert an UCS4 string to a UTF-8 string.
|
|---|
| 161 | *
|
|---|
| 162 | * @param in an UCS4 string to convert.
|
|---|
| 163 | * @param in_len the length input array.
|
|---|
| 164 |
|
|---|
| 165 | * @param out the resulting UTF-8 strint, must be at least
|
|---|
| 166 | * wind_ucs4utf8_length() + 1 long (the extra char for the NUL). If
|
|---|
| 167 | * out is NULL, the function will calculate the needed space for the
|
|---|
| 168 | * out variable (just like wind_ucs4utf8_length()).
|
|---|
| 169 |
|
|---|
| 170 | * @param out_len before processing out_len should be the length of
|
|---|
| 171 | * the out variable, after processing it will be the length of the out
|
|---|
| 172 | * string.
|
|---|
| 173 | *
|
|---|
| 174 | * @return returns 0 on success, an wind error code otherwise
|
|---|
| 175 | * @ingroup wind
|
|---|
| 176 | */
|
|---|
| 177 |
|
|---|
| 178 | int
|
|---|
| 179 | wind_ucs4utf8(const uint32_t *in, size_t in_len, char *out, size_t *out_len)
|
|---|
| 180 | {
|
|---|
| 181 | uint32_t ch;
|
|---|
| 182 | size_t i, len, o;
|
|---|
| 183 |
|
|---|
| 184 | for (o = 0, i = 0; i < in_len; i++) {
|
|---|
| 185 | ch = in[i];
|
|---|
| 186 |
|
|---|
| 187 | if (ch < 0x80) {
|
|---|
| 188 | len = 1;
|
|---|
| 189 | } else if (ch < 0x800) {
|
|---|
| 190 | len = 2;
|
|---|
| 191 | } else if (ch < 0x10000) {
|
|---|
| 192 | len = 3;
|
|---|
| 193 | } else if (ch <= 0x10FFFF) {
|
|---|
| 194 | len = 4;
|
|---|
| 195 | } else
|
|---|
| 196 | return WIND_ERR_INVALID_UTF32;
|
|---|
| 197 |
|
|---|
| 198 | o += len;
|
|---|
| 199 |
|
|---|
| 200 | if (out) {
|
|---|
| 201 | if (o >= *out_len)
|
|---|
| 202 | return WIND_ERR_OVERRUN;
|
|---|
| 203 |
|
|---|
| 204 | switch(len) {
|
|---|
| 205 | case 4:
|
|---|
| 206 | out[3] = (ch | 0x80) & 0xbf;
|
|---|
| 207 | ch = ch >> 6;
|
|---|
| 208 | case 3:
|
|---|
| 209 | out[2] = (ch | 0x80) & 0xbf;
|
|---|
| 210 | ch = ch >> 6;
|
|---|
| 211 | case 2:
|
|---|
| 212 | out[1] = (ch | 0x80) & 0xbf;
|
|---|
| 213 | ch = ch >> 6;
|
|---|
| 214 | case 1:
|
|---|
| 215 | out[0] = ch | first_char[len - 1];
|
|---|
| 216 | }
|
|---|
| 217 | }
|
|---|
| 218 | out += len;
|
|---|
| 219 | }
|
|---|
| 220 | if (out) {
|
|---|
| 221 | if (o + 1 >= *out_len)
|
|---|
| 222 | return WIND_ERR_OVERRUN;
|
|---|
| 223 | *out = '\0';
|
|---|
| 224 | }
|
|---|
| 225 | *out_len = o;
|
|---|
| 226 | return 0;
|
|---|
| 227 | }
|
|---|
| 228 |
|
|---|
| 229 | /**
|
|---|
| 230 | * Calculate the length of from converting a UCS4 string to an UTF-8 string.
|
|---|
| 231 | *
|
|---|
| 232 | * @param in an UCS4 string to convert.
|
|---|
| 233 | * @param in_len the length of UCS4 string to convert.
|
|---|
| 234 | * @param out_len the length of the resulting UTF-8 string.
|
|---|
| 235 | *
|
|---|
| 236 | * @return returns 0 on success, an wind error code otherwise
|
|---|
| 237 | * @ingroup wind
|
|---|
| 238 | */
|
|---|
| 239 |
|
|---|
| 240 | int
|
|---|
| 241 | wind_ucs4utf8_length(const uint32_t *in, size_t in_len, size_t *out_len)
|
|---|
| 242 | {
|
|---|
| 243 | return wind_ucs4utf8(in, in_len, NULL, out_len);
|
|---|
| 244 | }
|
|---|
| 245 |
|
|---|
| 246 | /**
|
|---|
| 247 | * Read in an UCS2 from a buffer.
|
|---|
| 248 | *
|
|---|
| 249 | * @param ptr The input buffer to read from.
|
|---|
| 250 | * @param len the length of the input buffer.
|
|---|
| 251 | * @param flags Flags to control the behavior of the function.
|
|---|
| 252 | * @param out the output UCS2, the array must be at least out/2 long.
|
|---|
| 253 | * @param out_len the output length
|
|---|
| 254 | *
|
|---|
| 255 | * @return returns 0 on success, an wind error code otherwise.
|
|---|
| 256 | * @ingroup wind
|
|---|
| 257 | */
|
|---|
| 258 |
|
|---|
| 259 | int
|
|---|
| 260 | wind_ucs2read(const void *ptr, size_t len, unsigned int *flags,
|
|---|
| 261 | uint16_t *out, size_t *out_len)
|
|---|
| 262 | {
|
|---|
| 263 | const unsigned char *p = ptr;
|
|---|
| 264 | int little = ((*flags) & WIND_RW_LE);
|
|---|
| 265 | size_t olen = *out_len;
|
|---|
| 266 |
|
|---|
| 267 | /** if len is zero, flags are unchanged */
|
|---|
| 268 | if (len == 0) {
|
|---|
| 269 | *out_len = 0;
|
|---|
| 270 | return 0;
|
|---|
| 271 | }
|
|---|
| 272 |
|
|---|
| 273 | /** if len is odd, WIND_ERR_LENGTH_NOT_MOD2 is returned */
|
|---|
| 274 | if (len & 1)
|
|---|
| 275 | return WIND_ERR_LENGTH_NOT_MOD2;
|
|---|
| 276 |
|
|---|
| 277 | /**
|
|---|
| 278 | * If the flags WIND_RW_BOM is set, check for BOM. If not BOM is
|
|---|
| 279 | * found, check is LE/BE flag is already and use that otherwise
|
|---|
| 280 | * fail with WIND_ERR_NO_BOM. When done, clear WIND_RW_BOM and
|
|---|
| 281 | * the LE/BE flag and set the resulting LE/BE flag.
|
|---|
| 282 | */
|
|---|
| 283 | if ((*flags) & WIND_RW_BOM) {
|
|---|
| 284 | uint16_t bom = (p[0] << 8) + p[1];
|
|---|
| 285 | if (bom == 0xfffe || bom == 0xfeff) {
|
|---|
| 286 | little = (bom == 0xfffe);
|
|---|
| 287 | p += 2;
|
|---|
| 288 | len -= 2;
|
|---|
| 289 | } else if (((*flags) & (WIND_RW_LE|WIND_RW_BE)) != 0) {
|
|---|
| 290 | /* little already set */
|
|---|
| 291 | } else
|
|---|
| 292 | return WIND_ERR_NO_BOM;
|
|---|
| 293 | *flags = ((*flags) & ~(WIND_RW_BOM|WIND_RW_LE|WIND_RW_BE));
|
|---|
| 294 | *flags |= little ? WIND_RW_LE : WIND_RW_BE;
|
|---|
| 295 | }
|
|---|
| 296 |
|
|---|
| 297 | while (len) {
|
|---|
| 298 | if (olen < 1)
|
|---|
| 299 | return WIND_ERR_OVERRUN;
|
|---|
| 300 | if (little)
|
|---|
| 301 | *out = (p[1] << 8) + p[0];
|
|---|
| 302 | else
|
|---|
| 303 | *out = (p[0] << 8) + p[1];
|
|---|
| 304 | out++; p += 2; len -= 2; olen--;
|
|---|
| 305 | }
|
|---|
| 306 | *out_len -= olen;
|
|---|
| 307 | return 0;
|
|---|
| 308 | }
|
|---|
| 309 |
|
|---|
| 310 | /**
|
|---|
| 311 | * Write an UCS2 string to a buffer.
|
|---|
| 312 | *
|
|---|
| 313 | * @param in The input UCS2 string.
|
|---|
| 314 | * @param in_len the length of the input buffer.
|
|---|
| 315 | * @param flags Flags to control the behavior of the function.
|
|---|
| 316 | * @param ptr The input buffer to write to, the array must be at least
|
|---|
| 317 | * (in + 1) * 2 bytes long.
|
|---|
| 318 | * @param out_len the output length
|
|---|
| 319 | *
|
|---|
| 320 | * @return returns 0 on success, an wind error code otherwise.
|
|---|
| 321 | * @ingroup wind
|
|---|
| 322 | */
|
|---|
| 323 |
|
|---|
| 324 | int
|
|---|
| 325 | wind_ucs2write(const uint16_t *in, size_t in_len, unsigned int *flags,
|
|---|
| 326 | void *ptr, size_t *out_len)
|
|---|
| 327 | {
|
|---|
| 328 | unsigned char *p = ptr;
|
|---|
| 329 | size_t len = *out_len;
|
|---|
| 330 |
|
|---|
| 331 | /** If in buffer is not of length be mod 2, WIND_ERR_LENGTH_NOT_MOD2 is returned*/
|
|---|
| 332 | if (len & 1)
|
|---|
| 333 | return WIND_ERR_LENGTH_NOT_MOD2;
|
|---|
| 334 |
|
|---|
| 335 | /** On zero input length, flags are preserved */
|
|---|
| 336 | if (in_len == 0) {
|
|---|
| 337 | *out_len = 0;
|
|---|
| 338 | return 0;
|
|---|
| 339 | }
|
|---|
| 340 | /** If flags have WIND_RW_BOM set, the byte order mark is written
|
|---|
| 341 | * first to the output data */
|
|---|
| 342 | if ((*flags) & WIND_RW_BOM) {
|
|---|
| 343 | uint16_t bom = 0xfffe;
|
|---|
| 344 |
|
|---|
| 345 | if (len < 2)
|
|---|
| 346 | return WIND_ERR_OVERRUN;
|
|---|
| 347 |
|
|---|
| 348 | if ((*flags) & WIND_RW_LE) {
|
|---|
| 349 | p[0] = (bom ) & 0xff;
|
|---|
| 350 | p[1] = (bom >> 8) & 0xff;
|
|---|
| 351 | } else {
|
|---|
| 352 | p[1] = (bom ) & 0xff;
|
|---|
| 353 | p[0] = (bom >> 8) & 0xff;
|
|---|
| 354 | }
|
|---|
| 355 | len -= 2;
|
|---|
| 356 | }
|
|---|
| 357 |
|
|---|
| 358 | while (in_len) {
|
|---|
| 359 | /** If the output wont fit into out_len, WIND_ERR_OVERRUN is returned */
|
|---|
| 360 | if (len < 2)
|
|---|
| 361 | return WIND_ERR_OVERRUN;
|
|---|
| 362 | if ((*flags) & WIND_RW_LE) {
|
|---|
| 363 | p[0] = (in[0] ) & 0xff;
|
|---|
| 364 | p[1] = (in[0] >> 8) & 0xff;
|
|---|
| 365 | } else {
|
|---|
| 366 | p[1] = (in[0] ) & 0xff;
|
|---|
| 367 | p[0] = (in[0] >> 8) & 0xff;
|
|---|
| 368 | }
|
|---|
| 369 | len -= 2;
|
|---|
| 370 | in_len--;
|
|---|
| 371 | p += 2;
|
|---|
| 372 | in++;
|
|---|
| 373 | }
|
|---|
| 374 | *out_len -= len;
|
|---|
| 375 | return 0;
|
|---|
| 376 | }
|
|---|
| 377 |
|
|---|
| 378 |
|
|---|
| 379 | /**
|
|---|
| 380 | * Convert an UTF-8 string to an UCS2 string.
|
|---|
| 381 | *
|
|---|
| 382 | * @param in an UTF-8 string to convert.
|
|---|
| 383 | * @param out the resulting UCS2 strint, must be at least
|
|---|
| 384 | * wind_utf8ucs2_length() long. If out is NULL, the function will
|
|---|
| 385 | * calculate the needed space for the out variable (just like
|
|---|
| 386 | * wind_utf8ucs2_length()).
|
|---|
| 387 | * @param out_len before processing out_len should be the length of
|
|---|
| 388 | * the out variable, after processing it will be the length of the out
|
|---|
| 389 | * string.
|
|---|
| 390 | *
|
|---|
| 391 | * @return returns 0 on success, an wind error code otherwise
|
|---|
| 392 | * @ingroup wind
|
|---|
| 393 | */
|
|---|
| 394 |
|
|---|
| 395 | int
|
|---|
| 396 | wind_utf8ucs2(const char *in, uint16_t *out, size_t *out_len)
|
|---|
| 397 | {
|
|---|
| 398 | const unsigned char *p;
|
|---|
| 399 | size_t o = 0;
|
|---|
| 400 | int ret;
|
|---|
| 401 |
|
|---|
| 402 | for (p = (const unsigned char *)in; *p != '\0'; ++p) {
|
|---|
| 403 | uint32_t u;
|
|---|
| 404 |
|
|---|
| 405 | ret = utf8toutf32(&p, &u);
|
|---|
| 406 | if (ret)
|
|---|
| 407 | return ret;
|
|---|
| 408 |
|
|---|
| 409 | if (u & 0xffff0000)
|
|---|
| 410 | return WIND_ERR_NOT_UTF16;
|
|---|
| 411 |
|
|---|
| 412 | if (out) {
|
|---|
| 413 | if (o >= *out_len)
|
|---|
| 414 | return WIND_ERR_OVERRUN;
|
|---|
| 415 | out[o] = u;
|
|---|
| 416 | }
|
|---|
| 417 | o++;
|
|---|
| 418 | }
|
|---|
| 419 | *out_len = o;
|
|---|
| 420 | return 0;
|
|---|
| 421 | }
|
|---|
| 422 |
|
|---|
| 423 | /**
|
|---|
| 424 | * Calculate the length of from converting a UTF-8 string to a UCS2
|
|---|
| 425 | * string.
|
|---|
| 426 | *
|
|---|
| 427 | * @param in an UTF-8 string to convert.
|
|---|
| 428 | * @param out_len the length of the resulting UCS4 string.
|
|---|
| 429 | *
|
|---|
| 430 | * @return returns 0 on success, an wind error code otherwise
|
|---|
| 431 | * @ingroup wind
|
|---|
| 432 | */
|
|---|
| 433 |
|
|---|
| 434 | int
|
|---|
| 435 | wind_utf8ucs2_length(const char *in, size_t *out_len)
|
|---|
| 436 | {
|
|---|
| 437 | return wind_utf8ucs2(in, NULL, out_len);
|
|---|
| 438 | }
|
|---|
| 439 |
|
|---|
| 440 | /**
|
|---|
| 441 | * Convert an UCS2 string to a UTF-8 string.
|
|---|
| 442 | *
|
|---|
| 443 | * @param in an UCS2 string to convert.
|
|---|
| 444 | * @param in_len the length of the in UCS2 string.
|
|---|
| 445 | * @param out the resulting UTF-8 strint, must be at least
|
|---|
| 446 | * wind_ucs2utf8_length() long. If out is NULL, the function will
|
|---|
| 447 | * calculate the needed space for the out variable (just like
|
|---|
| 448 | * wind_ucs2utf8_length()).
|
|---|
| 449 | * @param out_len before processing out_len should be the length of
|
|---|
| 450 | * the out variable, after processing it will be the length of the out
|
|---|
| 451 | * string.
|
|---|
| 452 | *
|
|---|
| 453 | * @return returns 0 on success, an wind error code otherwise
|
|---|
| 454 | * @ingroup wind
|
|---|
| 455 | */
|
|---|
| 456 |
|
|---|
| 457 | int
|
|---|
| 458 | wind_ucs2utf8(const uint16_t *in, size_t in_len, char *out, size_t *out_len)
|
|---|
| 459 | {
|
|---|
| 460 | uint16_t ch;
|
|---|
| 461 | size_t i, len, o;
|
|---|
| 462 |
|
|---|
| 463 | for (o = 0, i = 0; i < in_len; i++) {
|
|---|
| 464 | ch = in[i];
|
|---|
| 465 |
|
|---|
| 466 | if (ch < 0x80) {
|
|---|
| 467 | len = 1;
|
|---|
| 468 | } else if (ch < 0x800) {
|
|---|
| 469 | len = 2;
|
|---|
| 470 | } else
|
|---|
| 471 | len = 3;
|
|---|
| 472 |
|
|---|
| 473 | o += len;
|
|---|
| 474 |
|
|---|
| 475 | if (out) {
|
|---|
| 476 | if (o >= *out_len)
|
|---|
| 477 | return WIND_ERR_OVERRUN;
|
|---|
| 478 |
|
|---|
| 479 | switch(len) {
|
|---|
| 480 | case 3:
|
|---|
| 481 | out[2] = (ch | 0x80) & 0xbf;
|
|---|
| 482 | ch = ch >> 6;
|
|---|
| 483 | case 2:
|
|---|
| 484 | out[1] = (ch | 0x80) & 0xbf;
|
|---|
| 485 | ch = ch >> 6;
|
|---|
| 486 | case 1:
|
|---|
| 487 | out[0] = ch | first_char[len - 1];
|
|---|
| 488 | }
|
|---|
| 489 | out += len;
|
|---|
| 490 | }
|
|---|
| 491 | }
|
|---|
| 492 | if (out) {
|
|---|
| 493 | if (o >= *out_len)
|
|---|
| 494 | return WIND_ERR_OVERRUN;
|
|---|
| 495 | *out = '\0';
|
|---|
| 496 | }
|
|---|
| 497 | *out_len = o;
|
|---|
| 498 | return 0;
|
|---|
| 499 | }
|
|---|
| 500 |
|
|---|
| 501 | /**
|
|---|
| 502 | * Calculate the length of from converting a UCS2 string to an UTF-8 string.
|
|---|
| 503 | *
|
|---|
| 504 | * @param in an UCS2 string to convert.
|
|---|
| 505 | * @param in_len an UCS2 string length to convert.
|
|---|
| 506 | * @param out_len the length of the resulting UTF-8 string.
|
|---|
| 507 | *
|
|---|
| 508 | * @return returns 0 on success, an wind error code otherwise
|
|---|
| 509 | * @ingroup wind
|
|---|
| 510 | */
|
|---|
| 511 |
|
|---|
| 512 | int
|
|---|
| 513 | wind_ucs2utf8_length(const uint16_t *in, size_t in_len, size_t *out_len)
|
|---|
| 514 | {
|
|---|
| 515 | return wind_ucs2utf8(in, in_len, NULL, out_len);
|
|---|
| 516 | }
|
|---|