Cyclomatic Complexity report for GNU Libidn 1.38

int
idna_to_ascii_4i (const uint32_t * in, size_t inlen, char *out, int flags)
{
  size_t len, outlen;
  uint32_t *src;		/* XXX don't need to copy data? */
  int rc;

  /*
   * ToASCII consists of the following steps:
   *
   * 1. If all code points in the sequence are in the ASCII range (0..7F)
   * then skip to step 3.
   */

  {
    size_t i;
    int inasciirange;

    inasciirange = 1;
    for (i = 0; i < inlen; i++)
      if (in[i] > 0x7F)
	inasciirange = 0;
    if (inasciirange)
      {
	src = malloc (sizeof (in[0]) * (inlen + 1));
	if (src == NULL)
	  return IDNA_MALLOC_ERROR;

	memcpy (src, in, sizeof (in[0]) * inlen);
	src[inlen] = 0;

	goto step3;
      }
  }

  /*
   * 2. Perform the steps specified in [NAMEPREP] and fail if there is
   * an error. The AllowUnassigned flag is used in [NAMEPREP].
   */

  {
    char *p;

    p = stringprep_ucs4_to_utf8 (in, (ssize_t) inlen, NULL, NULL);
    if (p == NULL)
      return IDNA_MALLOC_ERROR;

    len = strlen (p);
    do
      {
	char *newp;

	len = 2 * len + 10;	/* XXX better guess? */
	newp = realloc (p, len);
	if (newp == NULL)
	  {
	    free (p);
	    return IDNA_MALLOC_ERROR;
	  }
	p = newp;

	if (flags & IDNA_ALLOW_UNASSIGNED)
	  rc = stringprep_nameprep (p, len);
	else
	  rc = stringprep_nameprep_no_unassigned (p, len);
      }
    while (rc == STRINGPREP_TOO_SMALL_BUFFER);

    if (rc != STRINGPREP_OK)
      {
	free (p);
	return IDNA_STRINGPREP_ERROR;
      }

    src = stringprep_utf8_to_ucs4 (p, -1, NULL);

    free (p);

    if (!src)
      return IDNA_MALLOC_ERROR;
  }

step3:
  /*
   * 3. If the UseSTD3ASCIIRules flag is set, then perform these checks:
   *
   * (a) Verify the absence of non-LDH ASCII code points; that is,
   * the absence of 0..2C, 2E..2F, 3A..40, 5B..60, and 7B..7F.
   *
   * (b) Verify the absence of leading and trailing hyphen-minus;
   * that is, the absence of U+002D at the beginning and end of
   * the sequence.
   */

  if (flags & IDNA_USE_STD3_ASCII_RULES)
    {
      size_t i;

      for (i = 0; src[i]; i++)
	if (src[i] <= 0x2C || src[i] == 0x2E || src[i] == 0x2F ||
	    (src[i] >= 0x3A && src[i] <= 0x40) ||
	    (src[i] >= 0x5B && src[i] <= 0x60) ||
	    (src[i] >= 0x7B && src[i] <= 0x7F))
	  {
	    free (src);
	    return IDNA_CONTAINS_NON_LDH;
	  }

      if (src[0] == 0x002D || (i > 0 && src[i - 1] == 0x002D))
	{
	  free (src);
	  return IDNA_CONTAINS_MINUS;
	}
    }

  /*
   * 4. If all code points in the sequence are in the ASCII range
   * (0..7F), then skip to step 8.
   */

  {
    size_t i;
    int inasciirange;

    inasciirange = 1;
    for (i = 0; src[i]; i++)
      {
	if (src[i] > 0x7F)
	  inasciirange = 0;
	/* copy string to output buffer if we are about to skip to step8 */
	if (i < 64)
	  out[i] = src[i];
      }
    if (i < 64)
      out[i] = '\0';
    else
      {
	free (src);
	return IDNA_INVALID_LENGTH;
      }
    if (inasciirange)
      goto step8;
  }

  /*
   * 5. Verify that the sequence does NOT begin with the ACE prefix.
   *
   */

  {
    size_t i;
    int match;

    match = 1;
    for (i = 0; match && i < strlen (IDNA_ACE_PREFIX); i++)
      if (((uint32_t) IDNA_ACE_PREFIX[i] & 0xFF) != src[i])
	match = 0;
    if (match)
      {
	free (src);
	return IDNA_CONTAINS_ACE_PREFIX;
      }
  }

  /*
   * 6. Encode the sequence using the encoding algorithm in [PUNYCODE]
   * and fail if there is an error.
   */
  for (len = 0; src[len]; len++)
    ;
  src[len] = '\0';
  outlen = 63 - strlen (IDNA_ACE_PREFIX);
  rc = punycode_encode (len, src, NULL,
			&outlen, &out[strlen (IDNA_ACE_PREFIX)]);
  if (rc != PUNYCODE_SUCCESS)
    {
      free (src);
      return IDNA_PUNYCODE_ERROR;
    }
  out[strlen (IDNA_ACE_PREFIX) + outlen] = '\0';

  /*
   * 7. Prepend the ACE prefix.
   */

  memcpy (out, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX));

  /*
   * 8. Verify that the number of code points is in the range 1 to 63
   * inclusive (0 is excluded).
   */

step8:
  free (src);
  if (strlen (out) < 1)
    return IDNA_INVALID_LENGTH;

  return IDNA_SUCCESS;
}

↓

stringprep_4i

149

int
stringprep_4i (uint32_t * ucs4, size_t *len, size_t maxucs4len,
	       Stringprep_profile_flags flags,
	       const Stringprep_profile * profile)
{
  size_t i, j;
  ssize_t k;
  size_t ucs4len = *len;
  int rc;

  for (i = 0; profile[i].operation; i++)
    {
      switch (profile[i].operation)
	{
	case STRINGPREP_NFKC:
	  {
	    uint32_t *q = 0;

	    if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
	      break;

	    if (flags & STRINGPREP_NO_NFKC && !profile[i].flags)
	      /* Profile requires NFKC, but callee asked for no NFKC. */
	      return STRINGPREP_FLAG_ERROR;

	    q = stringprep_ucs4_nfkc_normalize (ucs4, ucs4len);
	    if (!q)
	      return STRINGPREP_NFKC_FAILED;

	    for (ucs4len = 0; q[ucs4len]; ucs4len++)
	      ;

	    if (ucs4len >= maxucs4len)
	      {
		free (q);
		return STRINGPREP_TOO_SMALL_BUFFER;
	      }

	    memcpy (ucs4, q, ucs4len * sizeof (ucs4[0]));

	    free (q);
	  }
	  break;

	case STRINGPREP_PROHIBIT_TABLE:
	  k = stringprep_find_string_in_table (ucs4, ucs4len,
					       NULL, profile[i].table,
					       profile[i].table_size);
	  if (k != -1)
	    return STRINGPREP_CONTAINS_PROHIBITED;
	  break;

	case STRINGPREP_UNASSIGNED_TABLE:
	  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
	    break;
	  if (flags & STRINGPREP_NO_UNASSIGNED)
	    {
	      k = stringprep_find_string_in_table
		(ucs4, ucs4len, NULL, profile[i].table,
		 profile[i].table_size);
	      if (k != -1)
		return STRINGPREP_CONTAINS_UNASSIGNED;
	    }
	  break;

	case STRINGPREP_MAP_TABLE:
	  if (UNAPPLICAPLEFLAGS (flags, profile[i].flags))
	    break;
	  rc = stringprep_apply_table_to_string
	    (ucs4, &ucs4len, maxucs4len, profile[i].table,
	     profile[i].table_size);
	  if (rc != STRINGPREP_OK)
	    return rc;
	  break;

	case STRINGPREP_BIDI_PROHIBIT_TABLE:
	case STRINGPREP_BIDI_RAL_TABLE:
	case STRINGPREP_BIDI_L_TABLE:
	  break;

	case STRINGPREP_BIDI:
	  {
	    int done_prohibited = 0;
	    int done_ral = 0;
	    int done_l = 0;
	    size_t contains_ral = SIZE_MAX;
	    size_t contains_l = SIZE_MAX;

	    for (j = 0; profile[j].operation; j++)
	      if (profile[j].operation == STRINGPREP_BIDI_PROHIBIT_TABLE)
		{
		  done_prohibited = 1;
		  k = stringprep_find_string_in_table (ucs4, ucs4len,
						       NULL,
						       profile[j].table,
						       profile[j].table_size);
		  if (k != -1)
		    return STRINGPREP_BIDI_CONTAINS_PROHIBITED;
		}
	      else if (profile[j].operation == STRINGPREP_BIDI_RAL_TABLE)
		{
		  done_ral = 1;
		  if (stringprep_find_string_in_table
		      (ucs4, ucs4len, NULL, profile[j].table,
		       profile[j].table_size) != -1)
		    contains_ral = j;
		}
	      else if (profile[j].operation == STRINGPREP_BIDI_L_TABLE)
		{
		  done_l = 1;
		  if (stringprep_find_string_in_table
		      (ucs4, ucs4len, NULL, profile[j].table,
		       profile[j].table_size) != -1)
		    contains_l = j;
		}

	    if (!done_prohibited || !done_ral || !done_l)
	      return STRINGPREP_PROFILE_ERROR;

	    if (contains_ral != SIZE_MAX && contains_l != SIZE_MAX)
	      return STRINGPREP_BIDI_BOTH_L_AND_RAL;

	    if (contains_ral != SIZE_MAX)
	      {
		if (!(stringprep_find_character_in_table
		      (ucs4[0], profile[contains_ral].table,
		       profile[contains_ral].table_size) != -1
		      &&
		      stringprep_find_character_in_table (ucs4[ucs4len - 1],
							  profile
							  [contains_ral].table,
							  profile
							  [contains_ral].table_size)
		      != -1))
		  return STRINGPREP_BIDI_LEADTRAIL_NOT_RAL;
	      }
	  }
	  break;

	default:
	  return STRINGPREP_PROFILE_ERROR;
	  break;
	}
    }

  *len = ucs4len;

  return STRINGPREP_OK;
}

↓

_g_utf8_normalize_wc

135

static gunichar *
_g_utf8_normalize_wc (const gchar * str, gssize max_len, GNormalizeMode mode)
{
  gsize n_wc;
  gunichar *wc_buffer;
  const char *p;
  gsize last_start;
  gboolean do_compat = (mode == G_NORMALIZE_NFKC || mode == G_NORMALIZE_NFKD);
  gboolean do_compose = (mode == G_NORMALIZE_NFC || mode == G_NORMALIZE_NFKC);

  n_wc = 0;
  p = str;
  while ((max_len < 0 || p < str + max_len) && *p)
    {
      const gchar *decomp;
      gunichar wc = g_utf8_get_char (p);

      if (wc >= SBase && wc < SBase + SCount)
	{
	  gsize result_len;
	  decompose_hangul (wc, NULL, &result_len);
	  n_wc += result_len;
	}
      else
	{
	  decomp = find_decomposition (wc, do_compat);

	  if (decomp)
	    n_wc += g_utf8_strlen (decomp);
	  else
	    n_wc++;
	}

      p = g_utf8_next_char (p);
    }

  wc_buffer = g_malloc (sizeof (gunichar) * (n_wc + 1));
  if (!wc_buffer)
    return NULL;

  last_start = 0;
  n_wc = 0;
  p = str;
  while ((max_len < 0 || p < str + max_len) && *p)
    {
      gunichar wc = g_utf8_get_char (p);
      const gchar *decomp;
      int cc;
      gsize old_n_wc = n_wc;

      if (wc >= SBase && wc < SBase + SCount)
	{
	  gsize result_len;
	  decompose_hangul (wc, wc_buffer + n_wc, &result_len);
	  n_wc += result_len;
	}
      else
	{
	  decomp = find_decomposition (wc, do_compat);

	  if (decomp)
	    {
	      const char *pd;
	      for (pd = decomp; *pd != '\0'; pd = g_utf8_next_char (pd))
		wc_buffer[n_wc++] = g_utf8_get_char (pd);
	    }
	  else
	    wc_buffer[n_wc++] = wc;
	}

      if (n_wc > 0)
	{
	  cc = COMBINING_CLASS (wc_buffer[old_n_wc]);

	  if (cc == 0)
	    {
	      g_unicode_canonical_ordering (wc_buffer + last_start,
					    n_wc - last_start);
	      last_start = old_n_wc;
	    }
	}

      p = g_utf8_next_char (p);
    }

  if (n_wc > 0)
    {
      g_unicode_canonical_ordering (wc_buffer + last_start,
				    n_wc - last_start);
      /* dead assignment: last_start = n_wc; */
    }

  wc_buffer[n_wc] = 0;

  /* All decomposed and reordered */

  if (do_compose && n_wc > 0)
    {
      gsize i, j;
      int last_cc = 0;
      last_start = 0;

      for (i = 0; i < n_wc; i++)
	{
	  int cc = COMBINING_CLASS (wc_buffer[i]);

	  if (i > 0 &&
	      (last_cc == 0 || last_cc != cc) &&
	      combine (wc_buffer[last_start], wc_buffer[i],
		       &wc_buffer[last_start]))
	    {
	      for (j = i + 1; j < n_wc; j++)
		wc_buffer[j - 1] = wc_buffer[j];
	      n_wc--;
	      i--;

	      if (i == last_start)
		last_cc = 0;
	      else
		last_cc = COMBINING_CLASS (wc_buffer[i - 1]);

	      continue;
	    }

	  if (cc == 0)
	    last_start = i;

	  last_cc = cc;
	}
    }

  wc_buffer[n_wc] = 0;

  return wc_buffer;
}

↓

punycode_decode

106

int
punycode_decode (size_t input_length,
		 const char input[],
		 size_t *output_length,
		 punycode_uint output[], unsigned char case_flags[])
{
  punycode_uint n, out, i, max_out, bias, oldi, w, k, digit, t;
  size_t b, j, in;

  /* Initialize the state: */

  n = initial_n;
  out = i = 0;
  max_out = *output_length > maxint ? maxint
    : (punycode_uint) * output_length;
  bias = initial_bias;

  /* Handle the basic code points:  Let b be the number of input code */
  /* points before the last delimiter, or 0 if there is none, then    */
  /* copy the first b code points to the output.                      */

  for (b = j = 0; j < input_length; ++j)
    if (delim (input[j]))
      b = j;
  if (b > max_out)
    return punycode_big_output;

  for (j = 0; j < b; ++j)
    {
      if (case_flags)
	case_flags[out] = flagged (input[j]);
      if (!basic (input[j]))
	return punycode_bad_input;
      output[out++] = input[j];
    }
  for (j = b + (b > 0); j < input_length; ++j)
    if (!basic (input[j]))
      return punycode_bad_input;

  /* Main decoding loop:  Start just after the last delimiter if any  */
  /* basic code points were copied; start at the beginning otherwise. */

  for (in = b > 0 ? b + 1 : 0; in < input_length; ++out)
    {

      /* in is the index of the next ASCII code point to be consumed, */
      /* and out is the number of code points in the output array.    */

      /* Decode a generalized variable-length integer into delta,  */
      /* which gets added to i.  The overflow checking is easier   */
      /* if we increase i as we go, then subtract off its starting */
      /* value at the end to obtain delta.                         */

      for (oldi = i, w = 1, k = base;; k += base)
	{
	  if (in >= input_length)
	    return punycode_bad_input;
	  digit = decode_digit (input[in++]);
	  if (digit >= base)
	    return punycode_bad_input;
	  if (digit > (maxint - i) / w)
	    return punycode_overflow;
	  i += digit * w;
	  t = k <= bias /* + tmin */ ? tmin :	/* +tmin not needed */
	    k >= bias + tmax ? tmax : k - bias;
	  if (digit < t)
	    break;
	  if (w > maxint / (base - t))
	    return punycode_overflow;
	  w *= (base - t);
	}

      bias = adapt (i - oldi, out + 1, oldi == 0);

      /* i was supposed to wrap around from out+1 to 0,   */
      /* incrementing n each time, so we'll fix that now: */

      if (i / (out + 1) > maxint - n)
	return punycode_overflow;
      n += i / (out + 1);
      if (n > 0x10FFFF || (n >= 0xD800 && n <= 0xDBFF))
	return punycode_bad_input;
      i %= (out + 1);

      /* Insert n at position i of the output: */

      /* not needed for Punycode: */
      /* if (basic(n)) return punycode_bad_input; */
      if (out >= max_out)
	return punycode_big_output;

      if (case_flags)
	{
	  memmove (case_flags + i + 1, case_flags + i, out - i);
	  /* Case of last ASCII code point determines case flag: */
	  case_flags[i] = flagged (input[in - 1]);
	}

      memmove (output + i + 1, output + i, (out - i) * sizeof *output);
      output[i++] = n;
    }

  *output_length = (size_t) out;
  /* cannot overflow because out <= old value of *output_length */
  return punycode_success;
}

↓

punycode_encode

114

int
punycode_encode (size_t input_length,
		 const punycode_uint input[],
		 const unsigned char case_flags[],
		 size_t *output_length, char output[])
{
  punycode_uint input_len, n, delta, h, b, bias, j, m, q, k, t;
  size_t out, max_out;

  /* The Punycode spec assumes that the input length is the same type */
  /* of integer as a code point, so we need to convert the size_t to  */
  /* a punycode_uint, which could overflow.                           */

  if (input_length > maxint)
    return punycode_overflow;
  input_len = (punycode_uint) input_length;

  /* Initialize the state: */

  n = initial_n;
  delta = 0;
  out = 0;
  max_out = *output_length;
  bias = initial_bias;

  /* Handle the basic code points: */

  for (j = 0; j < input_len; ++j)
    {
      if (basic (input[j]))
	{
	  if (max_out - out < 2)
	    return punycode_big_output;
	  output[out++] = case_flags ?
	    encode_basic (input[j], case_flags[j]) : (char) input[j];
	}
      else if (input[j] > 0x10FFFF
	       || (input[j] >= 0xD800 && input[j] <= 0xDBFF))
	return punycode_bad_input;
      /* else if (input[j] < n) return punycode_bad_input; */
      /* (not needed for Punycode with unsigned code points) */
    }

  h = b = (punycode_uint) out;
  /* cannot overflow because out <= input_len <= maxint */

  /* h is the number of code points that have been handled, b is the  */
  /* number of basic code points, and out is the number of ASCII code */
  /* points that have been output.                                    */

  if (b > 0)
    output[out++] = delimiter;

  /* Main encoding loop: */

  while (h < input_len)
    {
      /* All non-basic code points < n have been     */
      /* handled already.  Find the next larger one: */

      for (m = maxint, j = 0; j < input_len; ++j)
	{
	  /* if (basic(input[j])) continue; */
	  /* (not needed for Punycode) */
	  if (input[j] >= n && input[j] < m)
	    m = input[j];
	}

      /* Increase delta enough to advance the decoder's    */
      /* <n,i> state to <m,0>, but guard against overflow: */

      if (m - n > (maxint - delta) / (h + 1))
	return punycode_overflow;
      delta += (m - n) * (h + 1);
      n = m;

      for (j = 0; j < input_len; ++j)
	{
	  /* Punycode does not need to check whether input[j] is basic: */
	  if (input[j] < n /* || basic(input[j]) */ )
	    {
	      if (++delta == 0)
		return punycode_overflow;
	    }

	  if (input[j] == n)
	    {
	      /* Represent delta as a generalized variable-length integer: */

	      for (q = delta, k = base;; k += base)
		{
		  if (out >= max_out)
		    return punycode_big_output;
		  t = k <= bias /* + tmin */ ? tmin :	/* +tmin not needed */
		    k >= bias + tmax ? tmax : k - bias;
		  if (q < t)
		    break;
		  output[out++] = encode_digit (t + (q - t) % (base - t), 0);
		  q = (q - t) / (base - t);
		}

	      output[out++] = encode_digit (q, case_flags && case_flags[j]);
	      bias = adapt (delta, h + 1, h == b);
	      delta = 0;
	      ++h;
	    }
	}

      ++delta, ++n;
    }

  *output_length = out;
  return punycode_success;
}

↓

idna_to_ascii_4z

int
idna_to_ascii_4z (const uint32_t * input, char **output, int flags)
{
  const uint32_t *start = input;
  const uint32_t *end;
  char buf[64];
  char *out = NULL;
  int rc;

  /* 1) Whenever dots are used as label separators, the following
     characters MUST be recognized as dots: U+002E (full stop),
     U+3002 (ideographic full stop), U+FF0E (fullwidth full stop),
     U+FF61 (halfwidth ideographic full stop). */

  if (input[0] == 0)
    {
      /* Handle implicit zero-length root label. */
      *output = malloc (1);
      if (!*output)
	return IDNA_MALLOC_ERROR;
      strcpy (*output, "");
      return IDNA_SUCCESS;
    }

  if (DOTP (input[0]) && input[1] == 0)
    {
      /* Handle explicit zero-length root label. */
      *output = malloc (2);
      if (!*output)
	return IDNA_MALLOC_ERROR;
      strcpy (*output, ".");
      return IDNA_SUCCESS;
    }

  *output = NULL;
  do
    {
      end = start;

      for (; *end && !DOTP (*end); end++)
	;

      if (*end == '\0' && start == end)
	{
	  /* Handle explicit zero-length root label. */
	  buf[0] = '\0';
	}
      else
	{
	  rc = idna_to_ascii_4i (start, (size_t) (end - start), buf, flags);
	  if (rc != IDNA_SUCCESS)
	    {
	      free (out);
	      return rc;
	    }
	}

      if (out)
	{
	  size_t l = strlen (out) + 1 + strlen (buf) + 1;
	  char *newp = realloc (out, l);
	  if (!newp)
	    {
	      free (out);
	      return IDNA_MALLOC_ERROR;
	    }
	  out = newp;
	  strcat (out, ".");
	  strcat (out, buf);
	}
      else
	{
	  out = strdup (buf);
	  if (!out)
	    return IDNA_MALLOC_ERROR;
	}

      start = end + 1;
    }
  while (*end);

  *output = out;

  return IDNA_SUCCESS;
}

↓

tld_get_4

int
tld_get_4 (const uint32_t * in, size_t inlen, char **out)
{
  const uint32_t *ipos;
  size_t olen;

  *out = NULL;
  if (!in || inlen == 0)
    return TLD_NODATA;

  ipos = &in[inlen - 1];
  olen = 0;
  /* Scan backwards for non(latin)letters. */
  while (ipos >= in && ((*ipos >= 0x41 && *ipos <= 0x5A) ||
			(*ipos >= 0x61 && *ipos <= 0x7A)))
    ipos--, olen++;

  if (olen > 0 && ipos >= in && DOTP (*ipos))
    {
      /* Found something that appears a TLD. */
      char *out_s = malloc (sizeof (char) * (olen + 1));
      char *opos = out_s;

      if (!opos)
	return TLD_MALLOC_ERROR;

      ipos++;
      /* Transcribe to lowercase ascii string. */
      for (; ipos < &in[inlen]; ipos++, opos++)
	*opos = *ipos > 0x5A ? *ipos : *ipos + 0x20;
      *opos = 0;
      *out = out_s;
      return TLD_SUCCESS;
    }

  return TLD_NO_TLD;
}

↓

idna_to_unicode_internal

119

static int
idna_to_unicode_internal (char *utf8in,
			  uint32_t * out, size_t *outlen, int flags)
{
  int rc;
  char tmpout[64];
  size_t utf8len = strlen (utf8in) + 1;
  size_t addlen = 0, addinc = utf8len / 10 + 1;

  /*
   * ToUnicode consists of the following steps:
   *
   * 1. If the sequence contains any code points outside the ASCII range
   * (0..7F) then proceed to step 2, otherwise skip to step 3.
   */

  {
    size_t i;
    int inasciirange;

    inasciirange = 1;
    for (i = 0; utf8in[i]; i++)
      if (utf8in[i] & ~0x7F)
	inasciirange = 0;
    if (inasciirange)
      goto step3;
  }

  /*
   * 2. Perform the steps specified in [NAMEPREP] and fail if there is an
   * error. (If step 3 of ToASCII is also performed here, it will not
   * affect the overall behavior of ToUnicode, but it is not
   * necessary.) The AllowUnassigned flag is used in [NAMEPREP].
   */
  do
    {
      char *newp = realloc (utf8in, utf8len + addlen);
      if (newp == NULL)
	{
	  free (utf8in);
	  return IDNA_MALLOC_ERROR;
	}
      utf8in = newp;
      if (flags & IDNA_ALLOW_UNASSIGNED)
	rc = stringprep_nameprep (utf8in, utf8len + addlen);
      else
	rc = stringprep_nameprep_no_unassigned (utf8in, utf8len + addlen);
      addlen += addinc;
      addinc *= 2;
    }
  while (rc == STRINGPREP_TOO_SMALL_BUFFER);

  if (rc != STRINGPREP_OK)
    {
      free (utf8in);
      return IDNA_STRINGPREP_ERROR;
    }

  /* 3. Verify that the sequence begins with the ACE prefix, and save a
   * copy of the sequence.
   * ... The ToASCII and ToUnicode operations MUST recognize the ACE
   prefix in a case-insensitive manner.
   */

step3:
  if (c_strncasecmp (utf8in, IDNA_ACE_PREFIX, strlen (IDNA_ACE_PREFIX)) != 0)
    {
      free (utf8in);
      return IDNA_NO_ACE_PREFIX;
    }

  /* 4. Remove the ACE prefix.
   */

  memmove (utf8in, &utf8in[strlen (IDNA_ACE_PREFIX)],
	   strlen (utf8in) - strlen (IDNA_ACE_PREFIX) + 1);

  /* 5. Decode the sequence using the decoding algorithm in [PUNYCODE]
   * and fail if there is an error. Save a copy of the result of
   * this step.
   */

  (*outlen)--;			/* reserve one for the zero */

  rc = punycode_decode (strlen (utf8in), utf8in, outlen, out, NULL);
  if (rc != PUNYCODE_SUCCESS)
    {
      free (utf8in);
      return IDNA_PUNYCODE_ERROR;
    }

  out[*outlen] = 0;		/* add zero */

  /* 6. Apply ToASCII.
   */

  rc = idna_to_ascii_4i (out, *outlen, tmpout, flags);
  if (rc != IDNA_SUCCESS)
    {
      free (utf8in);
      return rc;
    }

  /* 7. Verify that the result of step 6 matches the saved copy from
   * step 3, using a case-insensitive ASCII comparison.
   */

  if (c_strcasecmp (utf8in, tmpout + strlen (IDNA_ACE_PREFIX)) != 0)
    {
      free (utf8in);
      return IDNA_ROUNDTRIP_VERIFY_ERROR;
    }

  /* 8. Return the saved copy from step 5.
   */

  free (utf8in);
  return IDNA_SUCCESS;
}

↓

combine

static gboolean
combine (gunichar a, gunichar b, gunichar * result)
{
  gushort index_a, index_b;

  if (combine_hangul (a, b, result))
    return TRUE;

  index_a = COMPOSE_INDEX (a);

  if (index_a >= COMPOSE_FIRST_SINGLE_START && index_a < COMPOSE_SECOND_START)
    {
      if (b == compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][0])
	{
	  *result =
	    compose_first_single[index_a - COMPOSE_FIRST_SINGLE_START][1];
	  return TRUE;
	}
      else
	return FALSE;
    }

  index_b = COMPOSE_INDEX (b);

  if (index_b >= COMPOSE_SECOND_SINGLE_START)
    {
      if (a ==
	  compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][0])
	{
	  *result =
	    compose_second_single[index_b - COMPOSE_SECOND_SINGLE_START][1];
	  return TRUE;
	}
      else
	return FALSE;
    }

  if (index_a >= COMPOSE_FIRST_START && index_a < COMPOSE_FIRST_SINGLE_START
      && index_b >= COMPOSE_SECOND_START
      && index_b < COMPOSE_SECOND_SINGLE_START)
    {
      gunichar res =
	compose_array[index_a - COMPOSE_FIRST_START][index_b -
						     COMPOSE_SECOND_START];

      if (res)
	{
	  *result = res;
	  return TRUE;
	}
    }

  return FALSE;
}

↓

g_utf8_to_ucs4_fast

static gunichar *
g_utf8_to_ucs4_fast (const gchar * str, glong len, glong * items_written)
{
  gunichar *result;
  gsize n_chars, i;
  const gchar *p;

  g_return_val_if_fail (str != NULL, NULL);

  p = str;
  n_chars = 0;
  if (len < 0)
    {
      while (*p)
	{
	  p = g_utf8_next_char (p);
	  ++n_chars;
	}
    }
  else
    {
      while (p < str + len && *p)
	{
	  p = g_utf8_next_char (p);
	  ++n_chars;
	}
    }

  result = g_malloc (sizeof (gunichar) * (n_chars + 1));
  if (!result)
    return NULL;

  p = str;
  for (i = 0; i < n_chars; i++)
    {
      gunichar wc = (guchar) * p++;

      if (wc < 0x80)
	{
	  result[i] = wc;
	}
      else
	{
	  gunichar mask = 0x40;

	  if (G_UNLIKELY ((wc & mask) == 0))
	    {
	      /* It's an out-of-sequence 10xxxxxxx byte.
	       * Rather than making an ugly hash of this and the next byte
	       * and overrunning the buffer, it's more useful to treat it
	       * with a replacement character */
	      result[i] = 0xfffd;
	      continue;
	    }

	  do
	    {
	      wc <<= 6;
	      wc |= (guchar) (*p++) & 0x3f;
	      mask <<= 5;
	    }
	  while ((wc & mask) != 0);

	  wc &= mask - 1;

	  result[i] = wc;
	}
    }
  result[i] = 0;

  if (items_written)
    *items_written = i;

  return result;
}

↓

_tld_checkchar

static int
_tld_checkchar (uint32_t ch, const Tld_table * tld)
{
  const Tld_table_element *s, *e, *m;

  if (!tld)
    return TLD_SUCCESS;

  /* Check for [-a-z0-9.]. */
  if ((ch >= 0x61 && ch <= 0x7A) ||
      (ch >= 0x30 && ch <= 0x39) || ch == 0x2D || DOTP (ch))
    return TLD_SUCCESS;

  s = tld->valid;
  e = s + tld->nvalid;
  while (s < e)
    {
      m = s + ((e - s) >> 1);
      if (ch < m->start)
	e = m;
      else if (ch > m->end)
	s = m + 1;
      else
	return TLD_SUCCESS;
    }

  return TLD_INVALID;
}

find_decomposition

combine_hangul

g_ucs4_to_utf8

stringprep_profile

g_unichar_to_utf8

idna_to_unicode_4z4z

stringprep

g_unicode_canonical_ordering

pr29_4

tld_get_table

stringprep_apply_table_to_string

tld_check_4t

tld_default_table

decode_digit

tld_check_4

idna_to_unicode_44i

stringprep_find_string_in_table

decompose_hangul

first_column

adapt

stringprep_4zi_1

combinationclass

in_last_column_row

stringprep_utf8_nfkc_normalize

stringprep_utf8_to_ucs4

tld_check_4z

stringprep_4zi

tld_check_4tz

tld_get_4z

_compare_table_element

stringprep_check_version

version.c

tld_get_z

tld_check_8z

idna_to_unicode_8zlz

idna_to_unicode_8z8z

tld_check_lz

tld_strerror

strerror-tld.c

punycode_strerror

strerror-punycode.c

pr29_strerror

strerror-pr29.c

idna_to_unicode_8z4z

idna_to_ascii_8z

idna_to_unicode_lzlz

idna_to_ascii_lz

g_utf8_get_char

pr29_8z

stringprep_ucs4_nfkc_normalize

g_utf8_normalize

g_utf8_strlen

pr29_4z

stringprep_find_character_in_table

encode_basic

stringprep_strerror

strerror-stringprep.c

idna_strerror

strerror-idna.c

stringprep_unichar_to_utf8

stringprep_utf8_to_unichar

idn_free

idn-free.c

stringprep_utf8_to_locale

toutf8.c

stringprep_locale_to_utf8

toutf8.c

stringprep_convert

toutf8.c

encode_digit

stringprep_ucs4_to_utf8