Changeset 596 for trunk/server/source3/lib/util_str.c
- Timestamp:
- Jul 2, 2011, 3:35:33 PM (14 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
trunk/server/source3/lib/util_str.c
r454 r596 1455 1455 1456 1456 /** 1457 * Calculate the number of units (8 or 16-bit, depending on the 1458 * destination charset), that would be needed to convert the input 1459 * string which is expected to be in in CH_UNIX encoding to the 1460 * destination charset (which should be a unicode charset). 1461 */ 1462 size_t strlen_m_ext(const char *s, const charset_t dst_charset) 1463 { 1464 size_t count = 0; 1465 1466 if (!s) { 1467 return 0; 1468 } 1469 1470 while (*s && !(((uint8_t)*s) & 0x80)) { 1471 s++; 1472 count++; 1473 } 1474 1475 if (!*s) { 1476 return count; 1477 } 1478 1479 while (*s) { 1480 size_t c_size; 1481 codepoint_t c = next_codepoint(s, &c_size); 1482 s += c_size; 1483 1484 switch(dst_charset) { 1485 case CH_UTF16LE: 1486 case CH_UTF16BE: 1487 case CH_UTF16MUNGED: 1488 if (c < 0x10000) { 1489 /* Unicode char fits into 16 bits. */ 1490 count += 1; 1491 } else { 1492 /* Double-width unicode char - 32 bits. */ 1493 count += 2; 1494 } 1495 break; 1496 case CH_UTF8: 1497 /* 1498 * this only checks ranges, and does not 1499 * check for invalid codepoints 1500 */ 1501 if (c < 0x80) { 1502 count += 1; 1503 } else if (c < 0x800) { 1504 count += 2; 1505 } else if (c < 0x1000) { 1506 count += 3; 1507 } else { 1508 count += 4; 1509 } 1510 break; 1511 default: 1512 /* 1513 * non-unicode encoding: 1514 * assume that each codepoint fits into 1515 * one unit in the destination encoding. 1516 */ 1517 count += 1; 1518 } 1519 } 1520 1521 return count; 1522 } 1523 1524 size_t strlen_m_ext_term(const char *s, const charset_t dst_charset) 1525 { 1526 if (!s) { 1527 return 0; 1528 } 1529 return strlen_m_ext(s, dst_charset) + 1; 1530 } 1531 1532 /** 1457 1533 Count the number of UCS2 characters in a string. Normally this will 1458 1534 be the same as the number of bytes in a string for single byte strings, … … 1462 1538 size_t strlen_m(const char *s) 1463 1539 { 1464 size_t count = 0; 1465 1466 if (!s) { 1467 return 0; 1468 } 1469 1470 while (*s && !(((uint8_t)*s) & 0x80)) { 1471 s++; 1472 count++; 1473 } 1474 1475 if (!*s) { 1476 return count; 1477 } 1478 1479 while (*s) { 1480 size_t c_size; 1481 codepoint_t c = next_codepoint(s, &c_size); 1482 if (c < 0x10000) { 1483 /* Unicode char fits into 16 bits. */ 1484 count += 1; 1485 } else { 1486 /* Double-width unicode char - 32 bits. */ 1487 count += 2; 1488 } 1489 s += c_size; 1490 } 1491 1492 return count; 1540 return strlen_m_ext(s, CH_UTF16LE); 1493 1541 } 1494 1542
Note:
See TracChangeset
for help on using the changeset viewer.