↓
|
idna_to_ascii_4i
|
37
|
96
|
199
|
idna.c
|
int
idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
{
size_t len, outlen;
uint32_t *src; /* XXX don't need to copy data? */
int rc;
/*
* ToASCII consists of the following steps:
*
* 1. If all code points in the sequence are in the ASCII range (0..7F)
* then skip to step 3.
*/
{
size_t i;
int inasciirange;
inasciirange = 1;
for (i = 0; i < inlen; i++)
if (in[i] > 0x7F)
inasciirange = 0;
if (inasciirange)
{
src = malloc (sizeof (in[0]) * (inlen + 1));
if (src == NULL)
return IDNA_MALLOC_ERROR;
memcpy (src, in, sizeof (in[0]) * inlen);
src[inlen] = 0;
goto step3;
}
}
/*
* 2. Perform the steps specified in [NAMEPREP] and fail if there is
* an error. The AllowUnassigned flag is used in [NAMEPREP].
*/
{
char *p;
p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
if (p == NULL)
return IDNA_MALLOC_ERROR;
len = strlen (p);
do
{
char *newp;
len = 2 * len + 10; /* XXX better guess? */
newp = realloc (p, len);
if (newp == NULL)
{
free (p);
return IDNA_MALLOC_ERROR;
}
p = newp;
if (flags & IDNA_ALLOW_UNASSIGNED)
rc = stringprep_nameprep (p, len);
else
rc = stringprep_nameprep_no_unassigned (p, len);
}
while (rc == STRINGPREP_TOO_SMALL_BUFFER);
if (rc != STRINGPREP_OK)
{
free (p);
return IDNA_STRINGPREP_ERROR;
}
src = stringprep_utf8_to_ucs4 (p, -1, NULL);
free (p);
if (!src)
return IDNA_MALLOC_ERROR;
}
step3:
/*
* 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
*
* (a) Verify the absence of non-LDH ASCII code points; that is,
* the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
*
* (b) Verify the absence of leading and trailing hyphen-minus;
* that is, the absence of U+002D at the beginning and end of
* the sequence.
*/
if (flags & IDNA_USE_STD3_ASCII_RULES)
{
size_t i;
for (i = 0; src[i]; i++)
if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
(src[i] >= 0x3A && src[i] <= 0x40) ||
(src[i] >= 0x5B && src[i] <= 0x60) ||
(src[i] >= 0x7B && src[i] <= 0x7F))
{
free (src);
return IDNA_CONTAINS_NON_LDH;
}
if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
{
free (src);
return IDNA_CONTAINS_MINUS;
}
}
/*
* 4. If all code points in the sequence are in the ASCII range
* (0..7F), then skip to step 8.
*/
{
size_t i;
int inasciirange;
inasciirange = 1;
for (i = 0; src[i]; i++)
{
if (src[i] > 0x7F)
inasciirange = 0;
/* copy string to output buffer if we are about to skip to step8 */
if (i < 64)
out[i] = src[i];
}
if (i < 64)
out[i] = '\0';
else
{
free (src);
return IDNA_INVALID_LENGTH;
}
if (inasciirange)
goto step8;
}
/*
* 5. Verify that the sequence does NOT begin with the ACE prefix.
*
*/
{
size_t i;
int match;
match = 1;
for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
match = 0;
if (match)
{
free (src);
return IDNA_CONTAINS_ACE_PREFIX;
}
}
/*
* 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
* and fail if there is an error.
*/
for (len = 0; src[len]; len++)
;
src[len] = '\0';
outlen = 63 - strlen (IDNA_ACE_PREFIX);
rc = punycode_encode (len, src, NULL,
&outlen, &out[strlen (IDNA_ACE_PREFIX)]);
if (rc != PUNYCODE_SUCCESS)
{
free (src);
return IDNA_PUNYCODE_ERROR;
}
out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';
/*
* 7. Prepend the ACE prefix.
*/
memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));
/*
* 8. Verify that the number of code points is in the range 1 to 63
* inclusive (0 is excluded).
*/
step8:
free (src);
if (strlen (out) < 1)
return IDNA_INVALID_LENGTH;
return IDNA_SUCCESS;
}
|
↓
|
stringprep_4i
|
30
|
77
|
149
|
stringprep.c
|
int
stringprep_4i (uint32_t * ucs4, size_t *len, size_t maxucs4len,
Stringprep_profile_flags flags,
const Stringprep_profile * profile)
{
size_t i, j;
ssize_t k;
size_t ucs4len = *len;
int rc;
for (i = 0; profile[i].operation; i++)
{
switch (profile[i].operation)
{
case STRINGPREP_NFKC:
{
uint32_t *q = 0;
if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
break;
if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
/* Profile requires NFKC, but callee asked for no NFKC. */
return STRINGPREP_FLAG_ERROR;
q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
if (!q)
return STRINGPREP_NFKC_FAILED;
for (ucs4len = 0; q[ucs4len]; ucs4len++)
;
if (ucs4len >= maxucs4len)
{
free (q);
return STRINGPREP_TOO_SMALL_BUFFER;
}
memcpy (ucs4, q, ucs4len * sizeof (ucs4[0]));
free (q);
}
break;
case STRINGPREP_PROHIBIT_TABLE:
k = stringprep_find_string_in_table (ucs4, ucs4len,
NULL, profile[i].table,
profile[i].table_size);
if (k != -1)
return STRINGPREP_CONTAINS_PROHIBITED;
break;
case STRINGPREP_UNASSIGNED_TABLE:
if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
break;
if (flags & STRINGPREP_NO_UNASSIGNED)
{
k = stringprep_find_string_in_table
(ucs4, ucs4len, NULL, profile[i].table,
profile[i].table_size);
if (k != -1)
return STRINGPREP_CONTAINS_UNASSIGNED;
}
break;
case STRINGPREP_MAP_TABLE:
if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
break;
rc = stringprep_apply_table_to_string
(ucs4, &ucs4len, maxucs4len, profile[i].table,
profile[i].table_size);
if (rc != STRINGPREP_OK)
return rc;
break;
case STRINGPREP_BIDI_PROHIBIT_TABLE:
case STRINGPREP_BIDI_RAL_TABLE:
case STRINGPREP_BIDI_L_TABLE:
break;
case STRINGPREP_BIDI:
{
int done_prohibited = 0;
int done_ral = 0;
int done_l = 0;
size_t contains_ral = SIZE_MAX;
size_t contains_l = SIZE_MAX;
for (j = 0; profile[j].operation; j++)
if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
{
done_prohibited = 1;
k = stringprep_find_string_in_table (ucs4, ucs4len,
NULL,
profile[j].table,
profile[j].table_size);
if (k != -1)
return STRINGPREP_BIDI_CONTAINS_PROHIBITED;
}
else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
{
done_ral = 1;
if (stringprep_find_string_in_table
(ucs4, ucs4len, NULL, profile[j].table,
profile[j].table_size) != -1)
contains_ral = j;
}
else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
{
done_l = 1;
if (stringprep_find_string_in_table
(ucs4, ucs4len, NULL, profile[j].table,
profile[j].table_size) != -1)
contains_l = j;
}
if (!done_prohibited || !done_ral || !done_l)
return STRINGPREP_PROFILE_ERROR;
if (contains_ral != SIZE_MAX && contains_l != SIZE_MAX)
return STRINGPREP_BIDI_BOTH_L_AND_RAL;
if (contains_ral != SIZE_MAX)
{
if (!(stringprep_find_character_in_table
(ucs4[0], profile[contains_ral].table,
profile[contains_ral].table_size) != -1
&&
stringprep_find_character_in_table (ucs4[ucs4len - 1],
profile
[contains_ral].table,
profile
[contains_ral].table_size)
!= -1))
return STRINGPREP_BIDI_LEADTRAIL_NOT_RAL;
}
}
break;
default:
return STRINGPREP_PROFILE_ERROR;
break;
}
}
*len = ucs4len;
return STRINGPREP_OK;
}
|
↓
|
_g_utf8_normalize_wc
|
30
|
76
|
135
|
nfkc.c
|
static gunichar *
_g_utf8_normalize_wc (const gchar * str, gssize max_len, GNormalizeMode mode)
{
gsize n_wc;
gunichar *wc_buffer;
const char *p;
gsize last_start;
gboolean do_compat = (mode == G_NORMALIZE_NFKC || mode == G_NORMALIZE_NFKD);
gboolean do_compose = (mode == G_NORMALIZE_NFC || mode == G_NORMALIZE_NFKC);
n_wc = 0;
p = str;
while ((max_len < 0 || p < str + max_len) && *p)
{
const gchar *decomp;
gunichar wc = g_utf8_get_char (p);
if (wc >= SBase && wc < SBase + SCount)
{
gsize result_len;
decompose_hangul (wc, NULL, &result_len);
n_wc += result_len;
}
else
{
decomp = find_decomposition (wc, do_compat);
if (decomp)
n_wc += g_utf8_strlen (decomp);
else
n_wc++;
}
p = g_utf8_next_char (p);
}
wc_buffer = g_malloc (sizeof (gunichar) * (n_wc + 1));
if (!wc_buffer)
return NULL;
last_start = 0;
n_wc = 0;
p = str;
while ((max_len < 0 || p < str + max_len) && *p)
{
gunichar wc = g_utf8_get_char (p);
const gchar *decomp;
int cc;
gsize old_n_wc = n_wc;
if (wc >= SBase && wc < SBase + SCount)
{
gsize result_len;
decompose_hangul (wc, wc_buffer + n_wc, &result_len);
n_wc += result_len;
}
else
{
decomp = find_decomposition (wc, do_compat);
if (decomp)
{
const char *pd;
for (pd = decomp; *pd != '\0'; pd = g_utf8_next_char (pd))
wc_buffer[n_wc++] = g_utf8_get_char (pd);
}
else
wc_buffer[n_wc++] = wc;
}
if (n_wc > 0)
{
cc = COMBINING_CLASS (wc_buffer[old_n_wc]);
if (cc == 0)
{
g_unicode_canonical_ordering (wc_buffer + last_start,
n_wc - last_start);
last_start = old_n_wc;
}
}
p = g_utf8_next_char (p);
}
if (n_wc > 0)
{
g_unicode_canonical_ordering (wc_buffer + last_start,
n_wc - last_start);
/* dead assignment: last_start = n_wc; */
}
wc_buffer[n_wc] = 0;
/* All decomposed and reordered */
if (do_compose && n_wc > 0)
{
gsize i, j;
int last_cc = 0;
last_start = 0;
for (i = 0; i < n_wc; i++)
{
int cc = COMBINING_CLASS (wc_buffer[i]);
if (i > 0 &&
(last_cc == 0 || last_cc != cc) &&
combine (wc_buffer[last_start], wc_buffer[i],
&wc_buffer[last_start]))
{
for (j = i + 1; j < n_wc; j++)
wc_buffer[j - 1] = wc_buffer[j];
n_wc--;
i--;
if (i == last_start)
last_cc = 0;
else
last_cc = COMBINING_CLASS (wc_buffer[i - 1]);
continue;
}
if (cc == 0)
last_start = i;
last_cc = cc;
}
}
wc_buffer[n_wc] = 0;
return wc_buffer;
}
|
↓
|
punycode_decode
|
26
|
66
|
106
|
punycode.c
|
int
punycode_decode (size_t input_length,
const char input[],
size_t *output_length,
punycode_uint output[], unsigned char case_flags[])
{
punycode_uint n, out, i, max_out, bias, oldi, w, k, digit, t;
size_t b, j, in;
/* Initialize the state: */
n = initial_n;
out = i = 0;
max_out = *output_length > maxint ? maxint
: (punycode_uint) * output_length;
bias = initial_bias;
/* Handle the basic code points: Let b be the number of input code */
/* points before the last delimiter, or 0 if there is none, then */
/* copy the first b code points to the output. */
for (b = j = 0; j < input_length; ++j)
if (delim (input[j]))
b = j;
if (b > max_out)
return punycode_big_output;
for (j = 0; j < b; ++j)
{
if (case_flags)
case_flags[out] = flagged (input[j]);
if (!basic (input[j]))
return punycode_bad_input;
output[out++] = input[j];
}
for (j = b + (b > 0); j < input_length; ++j)
if (!basic (input[j]))
return punycode_bad_input;
/* Main decoding loop: Start just after the last delimiter if any */
/* basic code points were copied; start at the beginning otherwise. */
for (in = b > 0 ? b + 1 : 0; in < input_length; ++out)
{
/* in is the index of the next ASCII code point to be consumed, */
/* and out is the number of code points in the output array. */
/* Decode a generalized variable-length integer into delta, */
/* which gets added to i. The overflow checking is easier */
/* if we increase i as we go, then subtract off its starting */
/* value at the end to obtain delta. */
for (oldi = i, w = 1, k = base;; k += base)
{
if (in >= input_length)
return punycode_bad_input;
digit = decode_digit (input[in++]);
if (digit >= base)
return punycode_bad_input;
if (digit > (maxint - i) / w)
return punycode_overflow;
i += digit * w;
t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */
k >= bias + tmax ? tmax : k - bias;
if (digit < t)
break;
if (w > maxint / (base - t))
return punycode_overflow;
w *= (base - t);
}
bias = adapt (i - oldi, out + 1, oldi == 0);
/* i was supposed to wrap around from out+1 to 0, */
/* incrementing n each time, so we'll fix that now: */
if (i / (out + 1) > maxint - n)
return punycode_overflow;
n += i / (out + 1);
if (n > 0x10FFFF || (n >= 0xD800 && n <= 0xDBFF))
return punycode_bad_input;
i %= (out + 1);
/* Insert n at position i of the output: */
/* not needed for Punycode: */
/* if (basic(n)) return punycode_bad_input; */
if (out >= max_out)
return punycode_big_output;
if (case_flags)
{
memmove (case_flags + i + 1, case_flags + i, out - i);
/* Case of last ASCII code point determines case flag: */
case_flags[i] = flagged (input[in - 1]);
}
memmove (output + i + 1, output + i, (out - i) * sizeof *output);
output[i++] = n;
}
*output_length = (size_t) out;
/* cannot overflow because out <= old value of *output_length */
return punycode_success;
}
|
↓
|
punycode_encode
|
25
|
59
|
114
|
punycode.c
|
int
punycode_encode (size_t input_length,
const punycode_uint input[],
const unsigned char case_flags[],
size_t *output_length, char output[])
{
punycode_uint input_len, n, delta, h, b, bias, j, m, q, k, t;
size_t out, max_out;
/* The Punycode spec assumes that the input length is the same type */
/* of integer as a code point, so we need to convert the size_t to */
/* a punycode_uint, which could overflow. */
if (input_length > maxint)
return punycode_overflow;
input_len = (punycode_uint) input_length;
/* Initialize the state: */
n = initial_n;
delta = 0;
out = 0;
max_out = *output_length;
bias = initial_bias;
/* Handle the basic code points: */
for (j = 0; j < input_len; ++j)
{
if (basic (input[j]))
{
if (max_out - out < 2)
return punycode_big_output;
output[out++] = case_flags ?
encode_basic (input[j], case_flags[j]) : (char) input[j];
}
else if (input[j] > 0x10FFFF
|| (input[j] >= 0xD800 && input[j] <= 0xDBFF))
return punycode_bad_input;
/* else if (input[j] < n) return punycode_bad_input; */
/* (not needed for Punycode with unsigned code points) */
}
h = b = (punycode_uint) out;
/* cannot overflow because out <= input_len <= maxint */
/* h is the number of code points that have been handled, b is the */
/* number of basic code points, and out is the number of ASCII code */
/* points that have been output. */
if (b > 0)
output[out++] = delimiter;
/* Main encoding loop: */
while (h < input_len)
{
/* All non-basic code points < n have been */
/* handled already. Find the next larger one: */
for (m = maxint, j = 0; j < input_len; ++j)
{
/* if (basic(input[j])) continue; */
/* (not needed for Punycode) */
if (input[j] >= n && input[j] < m)
m = input[j];
}
/* Increase delta enough to advance the decoder's */
/* <n,i> state to <m,0>, but guard against overflow: */
if (m - n > (maxint - delta) / (h + 1))
return punycode_overflow;
delta += (m - n) * (h + 1);
n = m;
for (j = 0; j < input_len; ++j)
{
/* Punycode does not need to check whether input[j] is basic: */
if (input[j] < n /* || basic(input[j]) */ )
{
if (++delta == 0)
return punycode_overflow;
}
if (input[j] == n)
{
/* Represent delta as a generalized variable-length integer: */
for (q = delta, k = base;; k += base)
{
if (out >= max_out)
return punycode_big_output;
t = k <= bias /* + tmin */ ? tmin : /* +tmin not needed */
k >= bias + tmax ? tmax : k - bias;
if (q < t)
break;
output[out++] = encode_digit (t + (q - t) % (base - t), 0);
q = (q - t) / (base - t);
}
output[out++] = encode_digit (q, case_flags && case_flags[j]);
bias = adapt (delta, h + 1, h == b);
delta = 0;
++h;
}
}
++delta, ++n;
}
*output_length = out;
return punycode_success;
}
|
↓
|
idna_to_ascii_4z
|
15
|
46
|
85
|
idna.c
|
int
idna_to_ascii_4z (const uint32_t * input, char **output, int flags)
{
const uint32_t *start = input;
const uint32_t *end;
char buf[64];
char *out = NULL;
int rc;
/* 1) Whenever dots are used as label separators, the following
characters MUST be recognized as dots: U+002E (full stop),
U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
U+FF61 (halfwidth ideographic full stop). */
if (input[0] == 0)
{
/* Handle implicit zero-length root label. */
*output = malloc (1);
if (!*output)
return IDNA_MALLOC_ERROR;
strcpy (*output, "");
return IDNA_SUCCESS;
}
if (DOTP (input[0]) && input[1] == 0)
{
/* Handle explicit zero-length root label. */
*output = malloc (2);
if (!*output)
return IDNA_MALLOC_ERROR;
strcpy (*output, ".");
return IDNA_SUCCESS;
}
*output = NULL;
do
{
end = start;
for (; *end && !DOTP (*end); end++)
;
if (*end == '\0' && start == end)
{
/* Handle explicit zero-length root label. */
buf[0] = '\0';
}
else
{
rc = idna_to_ascii_4i (start, (size_t) (end - start), buf, flags);
if (rc != IDNA_SUCCESS)
{
free (out);
return rc;
}
}
if (out)
{
size_t l = strlen (out) + 1 + strlen (buf) + 1;
char *newp = realloc (out, l);
if (!newp)
{
free (out);
return IDNA_MALLOC_ERROR;
}
out = newp;
strcat (out, ".");
strcat (out, buf);
}
else
{
out = strdup (buf);
if (!out)
return IDNA_MALLOC_ERROR;
}
start = end + 1;
}
while (*end);
*output = out;
return IDNA_SUCCESS;
}
|
↓
|
tld_get_4
|
14
|
24
|
37
|
tld.c
|
int
tld_get_4 (const uint32_t * in, size_t inlen, char **out)
{
const uint32_t *ipos;
size_t olen;
*out = NULL;
if (!in || inlen == 0)
return TLD_NODATA;
ipos = &in[inlen - 1];
olen = 0;
/* Scan backwards for non(latin)letters. */
while (ipos >= in && ((*ipos >= 0x41 && *ipos <= 0x5A) ||
(*ipos >= 0x61 && *ipos <= 0x7A)))
ipos--, olen++;
if (olen > 0 && ipos >= in && DOTP (*ipos))
{
/* Found something that appears a TLD. */
char *out_s = malloc (sizeof (char) * (olen + 1));
char *opos = out_s;
if (!opos)
return TLD_MALLOC_ERROR;
ipos++;
/* Transcribe to lowercase ascii string. */
for (; ipos < &in[inlen]; ipos++, opos++)
*opos = *ipos > 0x5A ? *ipos : *ipos + 0x20;
*opos = 0;
*out = out_s;
return TLD_SUCCESS;
}
return TLD_NO_TLD;
}
|
↓
|
idna_to_unicode_internal
|
12
|
48
|
119
|
idna.c
|
static int
idna_to_unicode_internal (char *utf8in,
uint32_t * out, size_t *outlen, int flags)
{
int rc;
char tmpout[64];
size_t utf8len = strlen (utf8in) + 1;
size_t addlen = 0, addinc = utf8len / 10 + 1;
/*
* ToUnicode consists of the following steps:
*
* 1. If the sequence contains any code points outside the ASCII range
* (0..7F) then proceed to step 2, otherwise skip to step 3.
*/
{
size_t i;
int inasciirange;
inasciirange = 1;
for (i = 0; utf8in[i]; i++)
if (utf8in[i] & ~0x7F)
inasciirange = 0;
if (inasciirange)
goto step3;
}
/*
* 2. Perform the steps specified in [NAMEPREP] and fail if there is an
* error. (If step 3 of ToASCII is also performed here, it will not
* affect the overall behavior of ToUnicode, but it is not
* necessary.) The AllowUnassigned flag is used in [NAMEPREP].
*/
do
{
char *newp = realloc (utf8in, utf8len + addlen);
if (newp == NULL)
{
free (utf8in);
return IDNA_MALLOC_ERROR;
}
utf8in = newp;
if (flags & IDNA_ALLOW_UNASSIGNED)
rc = stringprep_nameprep (utf8in, utf8len + addlen);
else
rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
addlen += addinc;
addinc *= 2;
}
while (rc == STRINGPREP_TOO_SMALL_BUFFER);
if (rc != STRINGPREP_OK)
{
free (utf8in);
return IDNA_STRINGPREP_ERROR;
}
/* 3. Verify that the sequence begins with the ACE prefix, and save a
* copy of the sequence.
* ... The ToASCII and ToUnicode operations MUST recognize the ACE
prefix in a case-insensitive manner.
*/
step3:
if (c_strncasecmp (utf8in, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX)) != 0)
{
free (utf8in);
return IDNA_NO_ACE_PREFIX;
}
/* 4. Remove the ACE prefix.
*/
memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);
/* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
* and fail if there is an error. Save a copy of the result of
* this step.
*/
(*outlen)--; /* reserve one for the zero */
rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
if (rc != PUNYCODE_SUCCESS)
{
free (utf8in);
return IDNA_PUNYCODE_ERROR;
}
out[*outlen] = 0; /* add zero */
/* 6. Apply ToASCII.
*/
rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
if (rc != IDNA_SUCCESS)
{
free (utf8in);
return rc;
}
/* 7. Verify that the result of step 6 matches the saved copy from
* step 3, using a case-insensitive ASCII comparison.
*/
if (c_strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
{
free (utf8in);
return IDNA_ROUNDTRIP_VERIFY_ERROR;
}
/* 8. Return the saved copy from step 5.
*/
free (utf8in);
return IDNA_SUCCESS;
}
|
↓
|
combine
|
12
|
21
|
54
|
nfkc.c
|
static gboolean
combine (gunichar a, gunichar b, gunichar * result)
{
gushort index_a, index_b;
if (combine_hangul (a, b, result))
return TRUE;
index_a = COMPOSE_INDEX (a);
if (index_a >= COMPOSE_FIRST_SINGLE_START && index_a < COMPOSE_SECOND_START)
{
if (b == compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][0])
{
*result =
compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][1];
return TRUE;
}
else
return FALSE;
}
index_b = COMPOSE_INDEX (b);
if (index_b >= COMPOSE_SECOND_SINGLE_START)
{
if (a ==
compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][0])
{
*result =
compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][1];
return TRUE;
}
else
return FALSE;
}
if (index_a >= COMPOSE_FIRST_START && index_a < COMPOSE_FIRST_SINGLE_START
&& index_b >= COMPOSE_SECOND_START
&& index_b < COMPOSE_SECOND_SINGLE_START)
{
gunichar res =
compose_array[index_a - COMPOSE_FIRST_START][index_b -
COMPOSE_SECOND_START];
if (res)
{
*result = res;
return TRUE;
}
}
return FALSE;
}
|
↓
|
g_utf8_to_ucs4_fast
|
11
|
38
|
75
|
nfkc.c
|
static gunichar *
g_utf8_to_ucs4_fast (const gchar * str, glong len, glong * items_written)
{
gunichar *result;
gsize n_chars, i;
const gchar *p;
g_return_val_if_fail (str != NULL, NULL);
p = str;
n_chars = 0;
if (len < 0)
{
while (*p)
{
p = g_utf8_next_char (p);
++n_chars;
}
}
else
{
while (p < str + len && *p)
{
p = g_utf8_next_char (p);
++n_chars;
}
}
result = g_malloc (sizeof (gunichar) * (n_chars + 1));
if (!result)
return NULL;
p = str;
for (i = 0; i < n_chars; i++)
{
gunichar wc = (guchar) * p++;
if (wc < 0x80)
{
result[i] = wc;
}
else
{
gunichar mask = 0x40;
if (G_UNLIKELY ((wc & mask) == 0))
{
/* It's an out-of-sequence 10xxxxxxx byte.
* Rather than making an ugly hash of this and the next byte
* and overrunning the buffer, it's more useful to treat it
* with a replacement character */
result[i] = 0xfffd;
continue;
}
do
{
wc <<= 6;
wc |= (guchar) (*p++) & 0x3f;
mask <<= 5;
}
while ((wc & mask) != 0);
wc &= mask - 1;
result[i] = wc;
}
}
result[i] = 0;
if (items_written)
*items_written = i;
return result;
}
|
↓
|
_tld_checkchar
|
11
|
15
|
28
|
tld.c
|
static int
_tld_checkchar (uint32_t ch, const Tld_table * tld)
{
const Tld_table_element *s, *e, *m;
if (!tld)
return TLD_SUCCESS;
/* Check for [-a-z0-9.]. */
if ((ch >= 0x61 && ch <= 0x7A) ||
(ch >= 0x30 && ch <= 0x39) || ch == 0x2D || DOTP (ch))
return TLD_SUCCESS;
s = tld->valid;
e = s + tld->nvalid;
while (s < e)
{
m = s + ((e - s) >> 1);
if (ch < m->start)
e = m;
else if (ch > m->end)
s = m + 1;
else
return TLD_SUCCESS;
}
return TLD_INVALID;
}
|
|
find_decomposition
|
10
|
21
|
41
|
nfkc.c
|
|
combine_hangul
|
10
|
12
|
27
|
nfkc.c
|
|
g_ucs4_to_utf8
|
9
|
26
|
41
|
nfkc.c
|
|
stringprep_profile
|
9
|
25
|
39
|
stringprep.c
|
|
g_unichar_to_utf8
|
8
|
28
|
51
|
nfkc.c
|
|
idna_to_unicode_4z4z
|
7
|
38
|
64
|
idna.c
|
|
stringprep
|
7
|
32
|
55
|
stringprep.c
|
|
g_unicode_canonical_ordering
|
7
|
24
|
36
|
nfkc.c
|
|
pr29_4
|
7
|
15
|
27
|
pr29.c
|
|
tld_get_table
|
5
|
9
|
14
|
tld.c
|
|
stringprep_apply_table_to_string
|
5
|
17
|
33
|
stringprep.c
|
|
tld_check_4t
|
5
|
13
|
24
|
tld.c
|
|
tld_default_table
|
4
|
8
|
16
|
tld.c
|
|
decode_digit
|
4
|
4
|
6
|
punycode.c
|
|
tld_check_4
|
4
|
13
|
28
|
tld.c
|
|
idna_to_unicode_44i
|
4
|
12
|
24
|
idna.c
|
|
stringprep_find_string_in_table
|
4
|
10
|
22
|
stringprep.c
|
|
decompose_hangul
|
4
|
10
|
21
|
nfkc.c
|
|
first_column
|
4
|
10
|
12
|
pr29.c
|
|
adapt
|
3
|
9
|
16
|
punycode.c
|
|
stringprep_4zi_1
|
3
|
8
|
18
|
stringprep.c
|
|
combinationclass
|
3
|
7
|
11
|
pr29.c
|
|
in_last_column_row
|
3
|
7
|
11
|
pr29.c
|
|
stringprep_utf8_nfkc_normalize
|
3
|
7
|
15
|
nfkc.c
|
|
stringprep_utf8_to_ucs4
|
3
|
7
|
15
|
nfkc.c
|
|
tld_check_4z
|
3
|
6
|
14
|
tld.c
|
|
stringprep_4zi
|
3
|
6
|
12
|
stringprep.c
|
|
tld_check_4tz
|
3
|
6
|
13
|
tld.c
|
|
tld_get_4z
|
3
|
6
|
13
|
tld.c
|
|
_compare_table_element
|
3
|
5
|
10
|
stringprep.c
|
|
stringprep_check_version
|
3
|
3
|
8
|
version.c
|
|
tld_get_z
|
3
|
14
|
22
|
tld.c
|
|
tld_check_8z
|
3
|
11
|
21
|
tld.c
|
|
idna_to_unicode_8zlz
|
3
|
10
|
18
|
idna.c
|
|
idna_to_unicode_8z8z
|
3
|
10
|
18
|
idna.c
|
|
tld_check_lz
|
3
|
10
|
20
|
tld.c
|
|
tld_strerror
|
2
|
18
|
40
|
strerror-tld.c
|
|
punycode_strerror
|
2
|
14
|
32
|
strerror-punycode.c
|
|
pr29_strerror
|
2
|
12
|
28
|
strerror-pr29.c
|
|
idna_to_unicode_8z4z
|
2
|
9
|
16
|
idna.c
|
|
idna_to_ascii_8z
|
2
|
9
|
18
|
idna.c
|
|
idna_to_unicode_lzlz
|
2
|
8
|
15
|
idna.c
|
|
idna_to_ascii_lz
|
2
|
8
|
16
|
idna.c
|
|
g_utf8_get_char
|
2
|
8
|
14
|
nfkc.c
|
|
pr29_8z
|
2
|
8
|
16
|
pr29.c
|
|
stringprep_ucs4_nfkc_normalize
|
2
|
8
|
15
|
nfkc.c
|
|
g_utf8_normalize
|
2
|
6
|
13
|
nfkc.c
|
|
g_utf8_strlen
|
2
|
6
|
15
|
nfkc.c
|
|
pr29_4z
|
2
|
6
|
10
|
pr29.c
|
|
stringprep_find_character_in_table
|
2
|
3
|
37
|
stringprep.c
|
|
encode_basic
|
2
|
2
|
6
|
punycode.c
|
|
stringprep_strerror
|
2
|
32
|
68
|
strerror-stringprep.c
|
|
idna_strerror
|
2
|
30
|
64
|
strerror-idna.c
|
|
stringprep_unichar_to_utf8
|
1
|
1
|
5
|
nfkc.c
|
|
stringprep_utf8_to_unichar
|
1
|
1
|
5
|
nfkc.c
|
|
idn_free
|
1
|
1
|
5
|
idn-free.c
|
|
stringprep_utf8_to_locale
|
1
|
1
|
5
|
toutf8.c
|
|
stringprep_locale_to_utf8
|
1
|
1
|
5
|
toutf8.c
|
|
stringprep_convert
|
1
|
1
|
16
|
toutf8.c
|
|
encode_digit
|
1
|
1
|
7
|
punycode.c
|
|
stringprep_ucs4_to_utf8
|
1
|
1
|
7
|
nfkc.c
|