Changeset 391 for python/trunk/Objects/stringlib

python/trunk

Property svn:mergeinfo set to
/python/vendor/Python-2.7.6 merged eligible
/python/vendor/current merged eligible

python/trunk/Objects/stringlib/README.txt

-              r2
+              r391
 STRINGLIB_EMPTY
+    a PyObject representing the empty string
+int STRINGLIB_CMP(STRINGLIB_CHAR*, STRINGLIB_CHAR*, Py_ssize_t)
+    compares two strings. returns 0 if they match, and non-zero if not.
+    a PyObject representing the empty string, only to be used if
+    STRINGLIB_MUTABLE is 0
 Py_ssize_t STRINGLIB_LEN(PyObject*)
 …
     returns the pointer to the character data for the given string
     object (which must be of the right type)
+int STRINGLIB_CHECK_EXACT(PyObject *)
+    returns true if the object is an instance of our type, not a subclass
+STRINGLIB_MUTABLE
+    must be 0 or 1 to tell the cpp macros in stringlib code if the object
+    being operated on is mutable or not

python/trunk/Objects/stringlib/count.h

-              r2
+              r391
 Py_LOCAL_INLINE(Py_ssize_t)
 stringlib_count(const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                const STRINGLIB_CHAR* sub, Py_ssize_t sub_len)
+                const STRINGLIB_CHAR* sub, Py_ssize_t sub_len,
+                Py_ssize_t maxcount)
+{
     Py_ssize_t count;
 …
         return 0; /* start > len(str) */
     if (sub_len == 0)
         return str_len + 1;
+        return (str_len < maxcount) ? str_len + 1 : maxcount;
     count = fastsearch(str, str_len, sub, sub_len, FAST_COUNT);
+    count = fastsearch(str, str_len, sub, sub_len, maxcount, FAST_COUNT);
     if (count < 0)
         count = 0; /* no match */
+        return 0; /* no match */
     return count;
 …
 #endif
-/*
-Local variables:
-c-basic-offset: 4
-indent-tabs-mode: nil
-End:
-*/

python/trunk/Objects/stringlib/ctype.h

r2	r391
108	108	return newobj;
109	109	}
110

python/trunk/Objects/stringlib/fastsearch.h

-              r2
+              r391
 /* fast search/count implementation, based on a mix between boyer-
    moore and horspool, with a few more bells and whistles on the top.
    for some more background, see: http://effbot.org/stringlib */
+   for some more background, see: http://effbot.org/zone/stringlib.htm */
 /* note: fastsearch may access s[n], which isn't a problem when using
 …
 #define FAST_COUNT 0
 #define FAST_SEARCH 1
+#define FAST_RSEARCH 2
+#if LONG_BIT >= 128
+#define STRINGLIB_BLOOM_WIDTH 128
+#elif LONG_BIT >= 64
+#define STRINGLIB_BLOOM_WIDTH 64
+#elif LONG_BIT >= 32
+#define STRINGLIB_BLOOM_WIDTH 32
+#else
+#error "LONG_BIT is smaller than 32"
+#endif
+#define STRINGLIB_BLOOM_ADD(mask, ch) \
+    ((mask |= (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
+#define STRINGLIB_BLOOM(mask, ch)     \
+    ((mask &  (1UL << ((ch) & (STRINGLIB_BLOOM_WIDTH -1)))))
 Py_LOCAL_INLINE(Py_ssize_t)
 fastsearch(const STRINGLIB_CHAR* s, Py_ssize_t n,
            const STRINGLIB_CHAR* p, Py_ssize_t m,
            int mode)
+           Py_ssize_t maxcount, int mode)
+{
     long mask;
+    unsigned long mask;
     Py_ssize_t skip, count = 0;
     Py_ssize_t i, j, mlast, w;
 …
     w = n - m;
     if (w < 0)
+    if (w < 0 || (mode == FAST_COUNT && maxcount == 0))
         return -1;
 …
         if (mode == FAST_COUNT) {
             for (i = 0; i < n; i++)
+                if (s[i] == p[0]) {
+                    count++;
+                    if (count == maxcount)
+                        return maxcount;
+                }
+            return count;
+        } else if (mode == FAST_SEARCH) {
+            for (i = 0; i < n; i++)
                 if (s[i] == p[0])
+                    count++;
+            return count;
+        } else {
+            for (i = 0; i < n; i++)
+                    return i;
+        } else {    /* FAST_RSEARCH */
+            for (i = n - 1; i > -1; i--)
                 if (s[i] == p[0])
                     return i;
 …
     mlast = m - 1;
+    skip = mlast - 1;
+    mask = 0;
+    /* create compressed boyer-moore delta 1 table */
+    skip = mlast - 1;
+    /* process pattern[:-1] */
+    for (mask = i = 0; i < mlast; i++) {
+        mask |= (1 << (p[i] & 0x1F));
+        if (p[i] == p[mlast])
+            skip = mlast - i - 1;
+    }
+    /* process pattern[-1] outside the loop */
+    mask |= (1 << (p[mlast] & 0x1F));
+    if (mode != FAST_RSEARCH) {
+    for (i = 0; i <= w; i++) {
+        /* note: using mlast in the skip path slows things down on x86 */
+        if (s[i+m-1] == p[m-1]) {
+            /* candidate match */
+            for (j = 0; j < mlast; j++)
+                if (s[i+j] != p[j])
+                    break;
+            if (j == mlast) {
+                /* got a match! */
+                if (mode != FAST_COUNT)
+        /* create compressed boyer-moore delta 1 table */
+        /* process pattern[:-1] */
+        for (i = 0; i < mlast; i++) {
+            STRINGLIB_BLOOM_ADD(mask, p[i]);
+            if (p[i] == p[mlast])
+                skip = mlast - i - 1;
+        }
+        /* process pattern[-1] outside the loop */
+        STRINGLIB_BLOOM_ADD(mask, p[mlast]);
+        for (i = 0; i <= w; i++) {
+            /* note: using mlast in the skip path slows things down on x86 */
+            if (s[i+m-1] == p[m-1]) {
+                /* candidate match */
+                for (j = 0; j < mlast; j++)
+                    if (s[i+j] != p[j])
+                        break;
+                if (j == mlast) {
+                    /* got a match! */
+                    if (mode != FAST_COUNT)
+                        return i;
+                    count++;
+                    if (count == maxcount)
+                        return maxcount;
+                    i = i + mlast;
+                    continue;
+                }
+                /* miss: check if next character is part of pattern */
+                if (!STRINGLIB_BLOOM(mask, s[i+m]))
+                    i = i + m;
+                else
+                    i = i + skip;
+            } else {
+                /* skip: check if next character is part of pattern */
+                if (!STRINGLIB_BLOOM(mask, s[i+m]))
+                    i = i + m;
+            }
+        }
+    } else {    /* FAST_RSEARCH */
+        /* create compressed boyer-moore delta 1 table */
+        /* process pattern[0] outside the loop */
+        STRINGLIB_BLOOM_ADD(mask, p[0]);
+        /* process pattern[:0:-1] */
+        for (i = mlast; i > 0; i--) {
+            STRINGLIB_BLOOM_ADD(mask, p[i]);
+            if (p[i] == p[0])
+                skip = i - 1;
+        }
+        for (i = w; i >= 0; i--) {
+            if (s[i] == p[0]) {
+                /* candidate match */
+                for (j = mlast; j > 0; j--)
+                    if (s[i+j] != p[j])
+                        break;
+                if (j == 0)
+                    /* got a match! */
                     return i;
+                count++;
+                i = i + mlast;
+                continue;
+                /* miss: check if previous character is part of pattern */
+                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
+                    i = i - m;
+                else
+                    i = i - skip;
+            } else {
+                /* skip: check if previous character is part of pattern */
+                if (i > 0 && !STRINGLIB_BLOOM(mask, s[i-1]))
+                    i = i - m;
+            }
-            /* miss: check if next character is part of pattern */
-            if (!(mask & (1 << (s[i+m] & 0x1F))))
-                i = i + m;
-            else
-                i = i + skip;
-        } else {
-            /* skip: check if next character is part of pattern */
-            if (!(mask & (1 << (s[i+m] & 0x1F))))
-                i = i + m;
+        }
+    }
 …
 #endif
-/*
-Local variables:
-c-basic-offset: 4
-indent-tabs-mode: nil
-End:
-*/

python/trunk/Objects/stringlib/find.h

-              r2
+              r391
         return offset;
     pos = fastsearch(str, str_len, sub, sub_len, FAST_SEARCH);
+    pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_SEARCH);
     if (pos >= 0)
 …
                 Py_ssize_t offset)
+{
+    /* XXX - create reversefastsearch helper! */
+    if (sub_len == 0) {
+        if (str_len < 0)
+            return -1;
+        return str_len + offset;
+    } else {
+        Py_ssize_t j, pos = -1;
+        for (j = str_len - sub_len; j >= 0; --j)
+            if (STRINGLIB_CMP(str+j, sub, sub_len) == 0) {
+                pos = j + offset;
+                break;
+            }
+        return pos;
+    Py_ssize_t pos;
+    if (str_len < 0)
+        return -1;
+    if (sub_len == 0)
+        return str_len + offset;
+    pos = fastsearch(str, str_len, sub, sub_len, -1, FAST_RSEARCH);
+    if (pos >= 0)
+        pos += offset;
+    return pos;
+}
+/* helper macro to fixup start/end slice values */
+#define ADJUST_INDICES(start, end, len)         \
+    if (end > len)                              \
+        end = len;                              \
+    else if (end < 0) {                         \
+        end += len;                             \
+        if (end < 0)                            \
+            end = 0;                            \
+    }                                           \
+    if (start < 0) {                            \
+        start += len;                           \
+        if (start < 0)                          \
+            start = 0;                          \
+    }
+}
 Py_LOCAL_INLINE(Py_ssize_t)
 …
                      Py_ssize_t start, Py_ssize_t end)
+{
+    if (start < 0)
+        start += str_len;
+    if (start < 0)
+        start = 0;
+    if (end > str_len)
+        end = str_len;
+    if (end < 0)
+        end += str_len;
+    if (end < 0)
+        end = 0;
+    return stringlib_find(
+        str + start, end - start,
+        sub, sub_len, start
+        );
+    ADJUST_INDICES(start, end, str_len);
+    return stringlib_find(str + start, end - start, sub, sub_len, start);
+}
 …
                       Py_ssize_t start, Py_ssize_t end)
+{
+    if (start < 0)
+        start += str_len;
+    if (start < 0)
+        start = 0;
+    if (end > str_len)
+        end = str_len;
+    if (end < 0)
+        end += str_len;
+    if (end < 0)
+        end = 0;
+    ADJUST_INDICES(start, end, str_len);
     return stringlib_rfind(str + start, end - start, sub, sub_len, start);
+}
 #if defined(STRINGLIB_STR) && !defined(FROM_BYTEARRAY)
+#ifdef STRINGLIB_WANT_CONTAINS_OBJ
 Py_LOCAL_INLINE(int)
 …
+}
+#endif /* STRINGLIB_STR */
+#ifdef FROM_UNICODE
+#endif /* STRINGLIB_WANT_CONTAINS_OBJ */
 /*
 This function is a helper for the "find" family (find, rfind, index,
 rindex) of unicodeobject.c file, because they all have the same
 behaviour for the arguments.
+rindex) and for count, startswith and endswith, because they all have
+the same behaviour for the arguments.
 It does not touch the variables received until it knows everything
 is ok.
-Note that we receive a pointer to the pointer of the substring object,
-so when we create that object in this function we don't DECREF it,
-because it continues living in the caller functions (those functions,
-after finishing using the substring, must DECREF it).
 */
+#define FORMAT_BUFFER_SIZE 50
 Py_LOCAL_INLINE(int)
+_ParseTupleFinds (PyObject *args, PyObject **substring,
+                  Py_ssize_t *start, Py_ssize_t *end) {
+    PyObject *tmp_substring;
+stringlib_parse_args_finds(const char * function_name, PyObject *args,
+                           PyObject **subobj,
+                           Py_ssize_t *start, Py_ssize_t *end)
+{
+    PyObject *tmp_subobj;
     Py_ssize_t tmp_start = 0;
     Py_ssize_t tmp_end = PY_SSIZE_T_MAX;
     PyObject *obj_start=Py_None, *obj_end=Py_None;
+    char format[FORMAT_BUFFER_SIZE] = "O|OO:";
+    size_t len = strlen(format);
+    if (!PyArg_ParseTuple(args, "O|OO:find", &tmp_substring,
+         &obj_start, &obj_end))
+    strncpy(format + len, function_name, FORMAT_BUFFER_SIZE - len - 1);
+    format[FORMAT_BUFFER_SIZE - 1] = '\0';
+    if (!PyArg_ParseTuple(args, format, &tmp_subobj, &obj_start, &obj_end))
         return 0;
 …
             return 0;
-    tmp_substring = PyUnicode_FromObject(tmp_substring);
-    if (!tmp_substring)
-        return 0;
     *start = tmp_start;
     *end = tmp_end;
     *substring = tmp_substring;
+    *subobj = tmp_subobj;
     return 1;
+}
+#endif /* FROM_UNICODE */
+#undef FORMAT_BUFFER_SIZE
+#if STRINGLIB_IS_UNICODE
+/*
+Wraps stringlib_parse_args_finds() and additionally ensures that the
+first argument is a unicode object.
+Note that we receive a pointer to the pointer of the substring object,
+so when we create that object in this function we don't DECREF it,
+because it continues living in the caller functions (those functions,
+after finishing using the substring, must DECREF it).
+*/
+Py_LOCAL_INLINE(int)
+stringlib_parse_args_finds_unicode(const char * function_name, PyObject *args,
+                                   PyUnicodeObject **substring,
+                                   Py_ssize_t *start, Py_ssize_t *end)
+{
+    PyObject *tmp_substring;
+    if(stringlib_parse_args_finds(function_name, args, &tmp_substring,
+                                  start, end)) {
+        tmp_substring = PyUnicode_FromObject(tmp_substring);
+        if (!tmp_substring)
+            return 0;
+        *substring = (PyUnicodeObject *)tmp_substring;
+        return 1;
+    }
+    return 0;
+}
+#endif /* STRINGLIB_IS_UNICODE */
 #endif /* STRINGLIB_FIND_H */
-/*
-Local variables:
-c-basic-offset: 4
-indent-tabs-mode: nil
-End:
-*/

python/trunk/Objects/stringlib/formatter.h

-              r2
+              r391
 /* implements the string, long, and float formatters.  that is,
    string.__format__, etc. */
+#include <locale.h>
 /* Before including this, you must include either:
 …
    FORMAT_LONG
    FORMAT_FLOAT
+   FORMAT_COMPLEX
    to be whatever you want the public names of these functions to
    be.  These are the only non-static functions defined here.
 */
-#define ALLOW_PARENS_FOR_SIGN 0
 /* Raises an exception about an unknown presentation type for this
 …
+}
+static void
+invalid_comma_type(STRINGLIB_CHAR presentation_type)
+{
+#if STRINGLIB_IS_UNICODE
+    /* See comment in unknown_presentation_type */
+    if (presentation_type > 32 && presentation_type < 128)
+#endif
+        PyErr_Format(PyExc_ValueError,
+                     "Cannot specify ',' with '%c'.",
+                     (char)presentation_type);
+#if STRINGLIB_IS_UNICODE
+    else
+        PyErr_Format(PyExc_ValueError,
+                     "Cannot specify ',' with '\\x%x'.",
+                     (unsigned int)presentation_type);
+#endif
+}
 /*
     get_integer consumes 0 or more decimal digit characters from an
 …
                   Py_ssize_t *result)
+{
     Py_ssize_t accumulator, digitval, oldaccumulator;
+    Py_ssize_t accumulator, digitval;
     int numdigits;
     accumulator = numdigits = 0;
 …
             break;
         /*
            This trick was copied from old Unicode format code.  It's cute,
+           but would really suck on an old machine with a slow divide
            implementation.  Fortunately, in the normal case we do not
            expect too many digits.
+           Detect possible overflow before it happens:
+              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
+              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
         */
+        oldaccumulator = accumulator;
+        accumulator *= 10;
+        if ((accumulator+10)/10 != oldaccumulator+1) {
+        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
             PyErr_Format(PyExc_ValueError,
                          "Too many decimal digits in format string");
             return -1;
+        }
         accumulator += digitval;
+        accumulator = accumulator * 10 + digitval;
+    }
     *result = accumulator;
 …
     switch (c) {
     case ' ': case '+': case '-':
-#if ALLOW_PARENS_FOR_SIGN
-    case '(':
-#endif
         return 1;
     default:
 …
     STRINGLIB_CHAR sign;
     Py_ssize_t width;
+    int thousands_separators;
     Py_ssize_t precision;
     STRINGLIB_CHAR type;
 } InternalFormatSpec;
+#if 0
+/* Occassionally useful for debugging. Should normally be commented out. */
+static void
+DEBUG_PRINT_FORMAT_SPEC(InternalFormatSpec *format)
+{
+    printf("internal format spec: fill_char %d\n", format->fill_char);
+    printf("internal format spec: align %d\n", format->align);
+    printf("internal format spec: alternate %d\n", format->alternate);
+    printf("internal format spec: sign %d\n", format->sign);
+    printf("internal format spec: width %zd\n", format->width);
+    printf("internal format spec: thousands_separators %d\n",
+           format->thousands_separators);
+    printf("internal format spec: precision %zd\n", format->precision);
+    printf("internal format spec: type %c\n", format->type);
+    printf("\n");
+}
+#endif
 /*
 …
 static int
 parse_internal_render_format_spec(STRINGLIB_CHAR *format_spec,
                                   Py_ssize_t format_spec_len,
+                                  Py_ssize_t format_spec_len,
                                   InternalFormatSpec *format,
+                                  char default_type)
+                                  char default_type,
+                                  char default_align)
+{
     STRINGLIB_CHAR *ptr = format_spec;
 …
        the input string */
+    Py_ssize_t specified_width;
+    Py_ssize_t consumed;
+    int align_specified = 0;
     format->fill_char = '\0';
     format->align = '\0';
+    format->align = default_align;
     format->alternate = 0;
     format->sign = '\0';
     format->width = -1;
+    format->thousands_separators = 0;
     format->precision = -1;
     format->type = default_type;
 …
         format->align = ptr[1];
         format->fill_char = ptr[0];
+        align_specified = 1;
         ptr += 2;
+    }
     else if (end-ptr >= 1 && is_alignment_token(ptr[0])) {
         format->align = ptr[0];
+        align_specified = 1;
         ++ptr;
+    }
 …
         format->sign = ptr[0];
         ++ptr;
-#if ALLOW_PARENS_FOR_SIGN
-        if (end-ptr >= 1 && ptr[0] == ')') {
-            ++ptr;
+        }
-#endif
+    }
 …
        applies to integers. */
     if (end-ptr >= 1 && ptr[0] == '#') {
         format->alternate = 1;
         ++ptr;
+        format->alternate = 1;
+        ++ptr;
+    }
 …
     if (format->fill_char == '\0' && end-ptr >= 1 && ptr[0] == '0') {
         format->fill_char = '0';
         if (format->align == '\0') {
+        if (!align_specified) {
             format->align = '=';
+        }
 …
+    }
+    /* XXX add error checking */
+    specified_width = get_integer(&ptr, end, &format->width);
+    /* if specified_width is 0, we didn't consume any characters for
+       the width. in that case, reset the width to -1, because
+       get_integer() will have set it to zero */
+    if (specified_width == 0) {
+    consumed = get_integer(&ptr, end, &format->width);
+    if (consumed == -1)
+        /* Overflow error. Exception already set. */
+        return 0;
+    /* If consumed is 0, we didn't consume any characters for the
+       width. In that case, reset the width to -1, because
+       get_integer() will have set it to zero. -1 is how we record
+       that the width wasn't specified. */
+    if (consumed == 0)
         format->width = -1;
+    /* Comma signifies add thousands separators */
+    if (end-ptr && ptr[0] == ',') {
+        format->thousands_separators = 1;
+        ++ptr;
+    }
 …
         ++ptr;
+        /* XXX add error checking */
+        specified_width = get_integer(&ptr, end, &format->precision);
+        /* not having a precision after a dot is an error */
+        if (specified_width == 0) {
+        consumed = get_integer(&ptr, end, &format->precision);
+        if (consumed == -1)
+            /* Overflow error. Exception already set. */
+            return 0;
+        /* Not having a precision after a dot is an error. */
+        if (consumed == 0) {
             PyErr_Format(PyExc_ValueError,
                          "Format specifier missing precision");
 …
+    }
     /* Finally, parse the type field */
+    /* Finally, parse the type field. */
     if (end-ptr > 1) {
         /* invalid conversion spec */
+        /* More than one char remain, invalid conversion spec. */
         PyErr_Format(PyExc_ValueError, "Invalid conversion specification");
         return 0;
 …
+    }
+    /* Do as much validating as we can, just by looking at the format
+       specifier.  Do not take into account what type of formatting
+       we're doing (int, float, string). */
+    if (format->thousands_separators) {
+        switch (format->type) {
+        case 'd':
+        case 'e':
+        case 'f':
+        case 'g':
+        case 'E':
+        case 'G':
+        case '%':
+        case 'F':
+        case '\0':
+            /* These are allowed. See PEP 378.*/
+            break;
+        default:
+            invalid_comma_type(format->type);
+            return 0;
+        }
+    }
     return 1;
+}
+#if defined FORMAT_FLOAT || defined FORMAT_LONG
+/* Calculate the padding needed. */
+static void
+calc_padding(Py_ssize_t nchars, Py_ssize_t width, STRINGLIB_CHAR align,
+             Py_ssize_t *n_lpadding, Py_ssize_t *n_rpadding,
+             Py_ssize_t *n_total)
+{
+    if (width >= 0) {
+        if (nchars > width)
+            *n_total = nchars;
+        else
+            *n_total = width;
+    }
+    else {
+        /* not specified, use all of the chars and no more */
+        *n_total = nchars;
+    }
+    /* Figure out how much leading space we need, based on the
+       aligning */
+    if (align == '>')
+        *n_lpadding = *n_total - nchars;
+    else if (align == '^')
+        *n_lpadding = (*n_total - nchars) / 2;
+    else if (align == '<' || align == '=')
+        *n_lpadding = 0;
+    else {
+        /* We should never have an unspecified alignment. */
+        *n_lpadding = 0;
+        assert(0);
+    }
+    *n_rpadding = *n_total - nchars - *n_lpadding;
+}
+/* Do the padding, and return a pointer to where the caller-supplied
+   content goes. */
+static STRINGLIB_CHAR *
+fill_padding(STRINGLIB_CHAR *p, Py_ssize_t nchars, STRINGLIB_CHAR fill_char,
+             Py_ssize_t n_lpadding, Py_ssize_t n_rpadding)
+{
+    /* Pad on left. */
+    if (n_lpadding)
+        STRINGLIB_FILL(p, fill_char, n_lpadding);
+    /* Pad on right. */
+    if (n_rpadding)
+        STRINGLIB_FILL(p + nchars + n_lpadding, fill_char, n_rpadding);
+    /* Pointer to the user content. */
+    return p + n_lpadding;
+}
+#if defined FORMAT_FLOAT || defined FORMAT_LONG || defined FORMAT_COMPLEX
 /************************************************************************/
 /*********** common routines for numeric formatting *********************/
 /************************************************************************/
+/* Locale type codes. */
+#define LT_CURRENT_LOCALE 0
+#define LT_DEFAULT_LOCALE 1
+#define LT_NO_LOCALE 2
+/* Locale info needed for formatting integers and the part of floats
+   before and including the decimal. Note that locales only support
+-bit chars, not unicode. */
+typedef struct {
+    char *decimal_point;
+    char *thousands_sep;
+    char *grouping;
+} LocaleInfo;
 /* describes the layout for an integer, see the comment in
 …
     Py_ssize_t n_spadding;
     Py_ssize_t n_rpadding;
+    char lsign;
+    Py_ssize_t n_lsign;
+    char rsign;
+    Py_ssize_t n_rsign;
+    Py_ssize_t n_total; /* just a convenience, it's derivable from the
+                           other fields */
+    char sign;
+    Py_ssize_t n_sign;      /* number of digits needed for sign (0/1) */
+    Py_ssize_t n_grouped_digits; /* Space taken up by the digits, including
+                                    any grouping chars. */
+    Py_ssize_t n_decimal;   /* 0 if only an integer */
+    Py_ssize_t n_remainder; /* Digits in decimal and/or exponent part,
+                               excluding the decimal itself, if
+                               present. */
+    /* These 2 are not the widths of fields, but are needed by
+       STRINGLIB_GROUPING. */
+    Py_ssize_t n_digits;    /* The number of digits before a decimal
+                               or exponent. */
+    Py_ssize_t n_min_width; /* The min_width we used when we computed
+                               the n_grouped_digits width. */
 } NumberFieldWidths;
+/* Given a number of the form:
+   digits[remainder]
+   where ptr points to the start and end points to the end, find where
+    the integer part ends. This could be a decimal, an exponent, both,
+    or neither.
+   If a decimal point is present, set *has_decimal and increment
+    remainder beyond it.
+   Results are undefined (but shouldn't crash) for improperly
+    formatted strings.
+*/
+static void
+parse_number(STRINGLIB_CHAR *ptr, Py_ssize_t len,
+             Py_ssize_t *n_remainder, int *has_decimal)
+{
+    STRINGLIB_CHAR *end = ptr + len;
+    STRINGLIB_CHAR *remainder;
+    while (ptr<end && isdigit(*ptr))
+        ++ptr;
+    remainder = ptr;
+    /* Does remainder start with a decimal point? */
+    *has_decimal = ptr<end && *remainder == '.';
+    /* Skip the decimal point. */
+    if (*has_decimal)
+        remainder++;
+    *n_remainder = end - remainder;
+}
 /* not all fields of format are used.  for example, precision is
 …
    about what it does?  or is passing a single format parameter easier
    and more efficient enough to justify a little obfuscation? */
+static void
+calc_number_widths(NumberFieldWidths *spec, STRINGLIB_CHAR actual_sign,
+                   Py_ssize_t n_prefix, Py_ssize_t n_digits,
+                   const InternalFormatSpec *format)
+{
+static Py_ssize_t
+calc_number_widths(NumberFieldWidths *spec, Py_ssize_t n_prefix,
+                   STRINGLIB_CHAR sign_char, STRINGLIB_CHAR *number,
+                   Py_ssize_t n_number, Py_ssize_t n_remainder,
+                   int has_decimal, const LocaleInfo *locale,
+                   const InternalFormatSpec *format)
+{
+    Py_ssize_t n_non_digit_non_padding;
+    Py_ssize_t n_padding;
+    spec->n_digits = n_number - n_remainder - (has_decimal?1:0);
     spec->n_lpadding = 0;
+    spec->n_prefix = 0;
+    spec->n_prefix = n_prefix;
+    spec->n_decimal = has_decimal ? strlen(locale->decimal_point) : 0;
+    spec->n_remainder = n_remainder;
     spec->n_spadding = 0;
     spec->n_rpadding = 0;
+    spec->lsign = '\0';
+    spec->n_lsign = 0;
+    spec->rsign = '\0';
+    spec->n_rsign = 0;
+    spec->sign = '\0';
+    spec->n_sign = 0;
     /* the output will look like:
        |                                                                    |
        | <lpadding> <lsign> <prefix> <spadding> <digits> <rsign> <rpadding> |
        |                                                                    |
        lsign and rsign are computed from format->sign and the actual
+       |                                                                                         |
+       | <lpadding> <sign> <prefix> <spadding> <grouped_digits> <decimal> <remainder> <rpadding> |
+       |                                                                                         |
+       sign is computed from format->sign and the actual
        sign of the number
 …
     /* compute the various parts we're going to write */
+    if (format->sign == '+') {
+    switch (format->sign) {
+    case '+':
         /* always put a + or - */
+        spec->n_lsign = 1;
+        spec->lsign = (actual_sign == '-' ? '-' : '+');
+    }
+#if ALLOW_PARENS_FOR_SIGN
+    else if (format->sign == '(') {
+        if (actual_sign == '-') {
+            spec->n_lsign = 1;
+            spec->lsign = '(';
+            spec->n_rsign = 1;
+            spec->rsign = ')';
+        spec->n_sign = 1;
+        spec->sign = (sign_char == '-' ? '-' : '+');
+        break;
+    case ' ':
+        spec->n_sign = 1;
+        spec->sign = (sign_char == '-' ? '-' : ' ');
+        break;
+    default:
+        /* Not specified, or the default (-) */
+        if (sign_char == '-') {
+            spec->n_sign = 1;
+            spec->sign = '-';
+        }
+    }
+#endif
+    else if (format->sign == ' ') {
+        spec->n_lsign = 1;
+        spec->lsign = (actual_sign == '-' ? '-' : ' ');
+    }
+    else {
+        /* non specified, or the default (-) */
+        if (actual_sign == '-') {
+            spec->n_lsign = 1;
+            spec->lsign = '-';
+    /* The number of chars used for non-digits and non-padding. */
+    n_non_digit_non_padding = spec->n_sign + spec->n_prefix + spec->n_decimal +
+        spec->n_remainder;
+    /* min_width can go negative, that's okay. format->width == -1 means
+       we don't care. */
+    if (format->fill_char == '0' && format->align == '=')
+        spec->n_min_width = format->width - n_non_digit_non_padding;
+    else
+        spec->n_min_width = 0;
+    if (spec->n_digits == 0)
+        /* This case only occurs when using 'c' formatting, we need
+           to special case it because the grouping code always wants
+           to have at least one character. */
+        spec->n_grouped_digits = 0;
+    else
+        spec->n_grouped_digits = STRINGLIB_GROUPING(NULL, 0, NULL,
+                                                    spec->n_digits,
+                                                    spec->n_min_width,
+                                                    locale->grouping,
+                                                    locale->thousands_sep);
+    /* Given the desired width and the total of digit and non-digit
+       space we consume, see if we need any padding. format->width can
+       be negative (meaning no padding), but this code still works in
+       that case. */
+    n_padding = format->width -
+                        (n_non_digit_non_padding + spec->n_grouped_digits);
+    if (n_padding > 0) {
+        /* Some padding is needed. Determine if it's left, space, or right. */
+        switch (format->align) {
+        case '<':
+            spec->n_rpadding = n_padding;
+            break;
+        case '^':
+            spec->n_lpadding = n_padding / 2;
+            spec->n_rpadding = n_padding - spec->n_lpadding;
+            break;
+        case '=':
+            spec->n_spadding = n_padding;
+            break;
+        case '>':
+            spec->n_lpadding = n_padding;
+            break;
+        default:
+            /* Shouldn't get here, but treat it as '>' */
+            spec->n_lpadding = n_padding;
+            assert(0);
+            break;
+        }
+    }
+    spec->n_prefix = n_prefix;
+    /* now the number of padding characters */
+    if (format->width == -1) {
+        /* no padding at all, nothing to do */
+    }
+    else {
+        /* see if any padding is needed */
+        if (spec->n_lsign + n_digits + spec->n_rsign +
+                spec->n_prefix >= format->width) {
+            /* no padding needed, we're already bigger than the
+               requested width */
+    return spec->n_lpadding + spec->n_sign + spec->n_prefix +
+        spec->n_spadding + spec->n_grouped_digits + spec->n_decimal +
+        spec->n_remainder + spec->n_rpadding;
+}
+/* Fill in the digit parts of a numbers's string representation,
+   as determined in calc_number_widths().
+   No error checking, since we know the buffer is the correct size. */
+static void
+fill_number(STRINGLIB_CHAR *buf, const NumberFieldWidths *spec,
+            STRINGLIB_CHAR *digits, Py_ssize_t n_digits,
+            STRINGLIB_CHAR *prefix, STRINGLIB_CHAR fill_char,
+            LocaleInfo *locale, int toupper)
+{
+    /* Used to keep track of digits, decimal, and remainder. */
+    STRINGLIB_CHAR *p = digits;
+#ifndef NDEBUG
+    Py_ssize_t r;
+#endif
+    if (spec->n_lpadding) {
+        STRINGLIB_FILL(buf, fill_char, spec->n_lpadding);
+        buf += spec->n_lpadding;
+    }
+    if (spec->n_sign == 1) {
+        *buf++ = spec->sign;
+    }
+    if (spec->n_prefix) {
+        memmove(buf,
+                prefix,
+                spec->n_prefix * sizeof(STRINGLIB_CHAR));
+        if (toupper) {
+            Py_ssize_t t;
+            for (t = 0; t < spec->n_prefix; ++t)
+                buf[t] = STRINGLIB_TOUPPER(buf[t]);
+        }
+        else {
+            /* determine which of left, space, or right padding is
+               needed */
+            Py_ssize_t padding = format->width -
+                                    (spec->n_lsign + spec->n_prefix +
+                                     n_digits + spec->n_rsign);
+            if (format->align == '<')
+                spec->n_rpadding = padding;
+            else if (format->align == '>')
+                spec->n_lpadding = padding;
+            else if (format->align == '^') {
+                spec->n_lpadding = padding / 2;
+                spec->n_rpadding = padding - spec->n_lpadding;
+            }
+            else if (format->align == '=')
+                spec->n_spadding = padding;
+            else
+                spec->n_lpadding = padding;
+        }
+    }
+    spec->n_total = spec->n_lpadding + spec->n_lsign + spec->n_prefix +
+            spec->n_spadding + n_digits + spec->n_rsign + spec->n_rpadding;
+}
+/* fill in the non-digit parts of a numbers's string representation,
+   as determined in calc_number_widths().  returns the pointer to
+   where the digits go. */
+static STRINGLIB_CHAR *
+fill_non_digits(STRINGLIB_CHAR *p_buf, const NumberFieldWidths *spec,
+                STRINGLIB_CHAR *prefix, Py_ssize_t n_digits,
+                STRINGLIB_CHAR fill_char)
+{
+    STRINGLIB_CHAR *p_digits;
+    if (spec->n_lpadding) {
+        STRINGLIB_FILL(p_buf, fill_char, spec->n_lpadding);
+        p_buf += spec->n_lpadding;
+    }
+    if (spec->n_lsign == 1) {
+        *p_buf++ = spec->lsign;
+    }
+    if (spec->n_prefix) {
+        memmove(p_buf,
+                prefix,
+                spec->n_prefix * sizeof(STRINGLIB_CHAR));
+        p_buf += spec->n_prefix;
+        buf += spec->n_prefix;
+    }
     if (spec->n_spadding) {
+        STRINGLIB_FILL(p_buf, fill_char, spec->n_spadding);
+        p_buf += spec->n_spadding;
+    }
+    p_digits = p_buf;
+    p_buf += n_digits;
+    if (spec->n_rsign == 1) {
+        *p_buf++ = spec->rsign;
+    }
+        STRINGLIB_FILL(buf, fill_char, spec->n_spadding);
+        buf += spec->n_spadding;
+    }
+    /* Only for type 'c' special case, it has no digits. */
+    if (spec->n_digits != 0) {
+        /* Fill the digits with InsertThousandsGrouping. */
+#ifndef NDEBUG
+        r =
+#endif
+            STRINGLIB_GROUPING(buf, spec->n_grouped_digits, digits,
+                               spec->n_digits, spec->n_min_width,
+                               locale->grouping, locale->thousands_sep);
+#ifndef NDEBUG
+        assert(r == spec->n_grouped_digits);
+#endif
+        p += spec->n_digits;
+    }
+    if (toupper) {
+        Py_ssize_t t;
+        for (t = 0; t < spec->n_grouped_digits; ++t)
+            buf[t] = STRINGLIB_TOUPPER(buf[t]);
+    }
+    buf += spec->n_grouped_digits;
+    if (spec->n_decimal) {
+        Py_ssize_t t;
+        for (t = 0; t < spec->n_decimal; ++t)
+            buf[t] = locale->decimal_point[t];
+        buf += spec->n_decimal;
+        p += 1;
+    }
+    if (spec->n_remainder) {
+        memcpy(buf, p, spec->n_remainder * sizeof(STRINGLIB_CHAR));
+        buf += spec->n_remainder;
+        p += spec->n_remainder;
+    }
     if (spec->n_rpadding) {
+        STRINGLIB_FILL(p_buf, fill_char, spec->n_rpadding);
+        p_buf += spec->n_rpadding;
+    }
+    return p_digits;
+}
+#endif /* FORMAT_FLOAT || FORMAT_LONG */
+        STRINGLIB_FILL(buf, fill_char, spec->n_rpadding);
+        buf += spec->n_rpadding;
+    }
+}
+static char no_grouping[1] = {CHAR_MAX};
+/* Find the decimal point character(s?), thousands_separator(s?), and
+   grouping description, either for the current locale if type is
+   LT_CURRENT_LOCALE, a hard-coded locale if LT_DEFAULT_LOCALE, or
+   none if LT_NO_LOCALE. */
+static void
+get_locale_info(int type, LocaleInfo *locale_info)
+{
+    switch (type) {
+    case LT_CURRENT_LOCALE: {
+        struct lconv *locale_data = localeconv();
+        locale_info->decimal_point = locale_data->decimal_point;
+        locale_info->thousands_sep = locale_data->thousands_sep;
+        locale_info->grouping = locale_data->grouping;
+        break;
+    }
+    case LT_DEFAULT_LOCALE:
+        locale_info->decimal_point = ".";
+        locale_info->thousands_sep = ",";
+        locale_info->grouping = "\3"; /* Group every 3 characters.  The
+                                         (implicit) trailing 0 means repeat
+                                         infinitely. */
+        break;
+    case LT_NO_LOCALE:
+        locale_info->decimal_point = ".";
+        locale_info->thousands_sep = "";
+        locale_info->grouping = no_grouping;
+        break;
+    default:
+        assert(0);
+    }
+}
+#endif /* FORMAT_FLOAT || FORMAT_LONG || FORMAT_COMPLEX */
 /************************************************************************/
 …
 format_string_internal(PyObject *value, const InternalFormatSpec *format)
+{
-    Py_ssize_t width; /* total field width */
     Py_ssize_t lpad;
+    STRINGLIB_CHAR *dst;
+    STRINGLIB_CHAR *src = STRINGLIB_STR(value);
+    Py_ssize_t rpad;
+    Py_ssize_t total;
+    STRINGLIB_CHAR *p;
     Py_ssize_t len = STRINGLIB_LEN(value);
     PyObject *result = NULL;
 …
         PyErr_SetString(PyExc_ValueError,
                         "Alternate form (#) not allowed in string format "
                         "specifier");
+                        "specifier");
         goto done;
+    }
 …
+    }
+    if (format->width >= 0) {
+        width = format->width;
+        /* but use at least len characters */
+        if (len > width) {
+            width = len;
+        }
+    }
+    else {
+        /* not specified, use all of the chars and no more */
+        width = len;
+    }
+    calc_padding(len, format->width, format->align, &lpad, &rpad, &total);
     /* allocate the resulting string */
     result = STRINGLIB_NEW(NULL, width);
+    result = STRINGLIB_NEW(NULL, total);
     if (result == NULL)
         goto done;
+    /* now write into that space */
+    dst = STRINGLIB_STR(result);
+    /* figure out how much leading space we need, based on the
+       aligning */
+    if (format->align == '>')
+        lpad = width - len;
+    else if (format->align == '^')
+        lpad = (width - len) / 2;
+    else
+        lpad = 0;
+    /* if right aligning, increment the destination allow space on the
+       left */
+    memcpy(dst + lpad, src, len * sizeof(STRINGLIB_CHAR));
+    /* do any padding */
+    if (width > len) {
+        STRINGLIB_CHAR fill_char = format->fill_char;
+        if (fill_char == '\0') {
+            /* use the default, if not specified */
+            fill_char = ' ';
+        }
+        /* pad on left */
+        if (lpad)
+            STRINGLIB_FILL(dst, fill_char, lpad);
+        /* pad on right */
+        if (width - len - lpad)
+            STRINGLIB_FILL(dst + len + lpad, fill_char, width - len - lpad);
+    }
+    /* Write into that space. First the padding. */
+    p = fill_padding(STRINGLIB_STR(result), len,
+                     format->fill_char=='\0'?' ':format->fill_char,
+                     lpad, rpad);
+    /* Then the source string. */
+    memcpy(p, STRINGLIB_STR(value), len * sizeof(STRINGLIB_CHAR));
 done:
 …
 static PyObject *
 format_int_or_long_internal(PyObject *value, const InternalFormatSpec *format,
                             IntOrLongToString tostring)
+                            IntOrLongToString tostring)
+{
     PyObject *result = NULL;
 …
     STRINGLIB_CHAR *pnumeric_chars;
     STRINGLIB_CHAR numeric_char;
+    STRINGLIB_CHAR sign = '\0';
+    STRINGLIB_CHAR *p;
+    STRINGLIB_CHAR sign_char = '\0';
     Py_ssize_t n_digits;       /* count of digits need from the computed
                                   string */
+    Py_ssize_t n_leading_chars;
+    Py_ssize_t n_grouping_chars = 0; /* Count of additional chars to
+                                        allocate, used for 'n'
+                                        formatting. */
+    Py_ssize_t n_remainder = 0; /* Used only for 'c' formatting, which
+                                   produces non-digits */
     Py_ssize_t n_prefix = 0;   /* Count of prefix chars, (e.g., '0x') */
+    Py_ssize_t n_total;
     STRINGLIB_CHAR *prefix = NULL;
     NumberFieldWidths spec;
     long x;
+    /* Locale settings, either from the actual locale or
+       from a hard-code pseudo-locale */
+    LocaleInfo locale;
     /* no precision allowed on integers */
 …
         goto done;
+    }
     /* special case for character formatting */
 …
             PyErr_SetString(PyExc_ValueError,
                             "Sign not allowed with integer"
+                            " format specifier 'c'");
+            goto done;
+        }
+        /* Error to specify a comma. */
+        if (format->thousands_separators) {
+            PyErr_SetString(PyExc_ValueError,
+                            "Thousands separators not allowed with integer"
                             " format specifier 'c'");
             goto done;
 …
+        }
 #endif
         numeric_char = (STRINGLIB_CHAR)x;
         pnumeric_chars = &numeric_char;
+        numeric_char = (STRINGLIB_CHAR)x;
+        pnumeric_chars = &numeric_char;
         n_digits = 1;
+        /* As a sort-of hack, we tell calc_number_widths that we only
+           have "remainder" characters. calc_number_widths thinks
+           these are characters that don't get formatted, only copied
+           into the output string. We do this for 'c' formatting,
+           because the characters are likely to be non-digits. */
+        n_remainder = 1;
+    }
     else {
         int base;
         int leading_chars_to_skip = 0;  /* Number of characters added by
                                            PyNumber_ToBase that we want to
                                            skip over. */
+        int leading_chars_to_skip = 0;  /* Number of characters added by
+                                           PyNumber_ToBase that we want to
+                                           skip over. */
         /* Compute the base and how many characters will be added by
 …
         case 'b':
             base = 2;
             leading_chars_to_skip = 2; /* 0b */
+            leading_chars_to_skip = 2; /* 0b */
             break;
         case 'o':
             base = 8;
             leading_chars_to_skip = 2; /* 0o */
+            leading_chars_to_skip = 2; /* 0o */
             break;
         case 'x':
         case 'X':
             base = 16;
             leading_chars_to_skip = 2; /* 0x */
+            leading_chars_to_skip = 2; /* 0x */
             break;
         default:  /* shouldn't be needed, but stops a compiler warning */
 …
+        }
         /* The number of prefix chars is the same as the leading
            chars to skip */
         if (format->alternate)
             n_prefix = leading_chars_to_skip;
+        /* The number of prefix chars is the same as the leading
+           chars to skip */
+        if (format->alternate)
+            n_prefix = leading_chars_to_skip;
         /* Do the hard part, converting to a string in a given base */
         tmp = tostring(value, base);
+        tmp = tostring(value, base);
         if (tmp == NULL)
             goto done;
         pnumeric_chars = STRINGLIB_STR(tmp);
+        pnumeric_chars = STRINGLIB_STR(tmp);
         n_digits = STRINGLIB_LEN(tmp);
         prefix = pnumeric_chars;
         /* Remember not to modify what pnumeric_chars points to.  it
            might be interned.  Only modify it after we copy it into a
            newly allocated output buffer. */
+        prefix = pnumeric_chars;
+        /* Remember not to modify what pnumeric_chars points to.  it
+           might be interned.  Only modify it after we copy it into a
+           newly allocated output buffer. */
         /* Is a sign character present in the output?  If so, remember it
            and skip it */
         sign = pnumeric_chars[0];
         if (sign == '-') {
             ++prefix;
             ++leading_chars_to_skip;
+        if (pnumeric_chars[0] == '-') {
+            sign_char = pnumeric_chars[0];
+            ++prefix;
+            ++leading_chars_to_skip;
+        }
+        /* Skip over the leading chars (0x, 0b, etc.) */
+        n_digits -= leading_chars_to_skip;
+        pnumeric_chars += leading_chars_to_skip;
+    }
+    if (format->type == 'n')
+            /* Compute how many additional chars we need to allocate
+               to hold the thousands grouping. */
+            STRINGLIB_GROUPING(NULL, n_digits, n_digits,
+, &n_grouping_chars, 0);
+    /* Calculate the widths of the various leading and trailing parts */
+    calc_number_widths(&spec, sign, n_prefix, n_digits + n_grouping_chars,
+                       format);
+    /* Allocate a new string to hold the result */
+    result = STRINGLIB_NEW(NULL, spec.n_total);
+        /* Skip over the leading chars (0x, 0b, etc.) */
+        n_digits -= leading_chars_to_skip;
+        pnumeric_chars += leading_chars_to_skip;
+    }
+    /* Determine the grouping, separator, and decimal point, if any. */
+    get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
+                    (format->thousands_separators ?
+                     LT_DEFAULT_LOCALE :
+                     LT_NO_LOCALE),
+                    &locale);
+    /* Calculate how much memory we'll need. */
+    n_total = calc_number_widths(&spec, n_prefix, sign_char, pnumeric_chars,
+                       n_digits, n_remainder, 0, &locale, format);
+    /* Allocate the memory. */
+    result = STRINGLIB_NEW(NULL, n_total);
     if (!result)
+        goto done;
+    p = STRINGLIB_STR(result);
+    /* XXX There is too much magic here regarding the internals of
+       spec and the location of the prefix and digits.  It would be
+       better if calc_number_widths returned a number of logical
+       offsets into the buffer, and those were used.  Maybe in a
+       future code cleanup. */
+    /* Fill in the digit parts */
+    n_leading_chars = spec.n_lpadding + spec.n_lsign +
+            spec.n_prefix + spec.n_spadding;
+    memmove(p + n_leading_chars,
+            pnumeric_chars,
+            n_digits * sizeof(STRINGLIB_CHAR));
+    /* If type is 'X', convert the filled in digits to uppercase */
+    if (format->type == 'X') {
+        Py_ssize_t t;
+        for (t = 0; t < n_digits; ++t)
+            p[t + n_leading_chars] = STRINGLIB_TOUPPER(p[t + n_leading_chars]);
+    }
+    /* Insert the grouping, if any, after the uppercasing of the digits, so
+       we can ensure that grouping chars won't be affected. */
+    if (n_grouping_chars) {
+            /* We know this can't fail, since we've already
+               reserved enough space. */
+            STRINGLIB_CHAR *pstart = p + n_leading_chars;
+#ifndef NDEBUG
+            int r =
+#endif
+                STRINGLIB_GROUPING(pstart, n_digits, n_digits,
+                           spec.n_total+n_grouping_chars-n_leading_chars,
+                           NULL, 0);
+            assert(r);
+    }
+    /* Fill in the non-digit parts (padding, sign, etc.) */
+    fill_non_digits(p, &spec, prefix, n_digits + n_grouping_chars,
+                    format->fill_char == '\0' ? ' ' : format->fill_char);
+    /* If type is 'X', uppercase the prefix.  This has to be done after the
+       prefix is filled in by fill_non_digits */
+    if (format->type == 'X') {
+        Py_ssize_t t;
+        for (t = 0; t < n_prefix; ++t)
+            p[t + spec.n_lpadding + spec.n_lsign] =
+                    STRINGLIB_TOUPPER(p[t + spec.n_lpadding + spec.n_lsign]);
+    }
+        goto done;
+    /* Populate the memory. */
+    fill_number(STRINGLIB_STR(result), &spec, pnumeric_chars, n_digits,
+                prefix, format->fill_char == '\0' ? ' ' : format->fill_char,
+                &locale, format->type == 'X');
 done:
 …
 #ifdef FORMAT_FLOAT
 #if STRINGLIB_IS_UNICODE
+/* taken from unicodeobject.c */
+static Py_ssize_t
+strtounicode(Py_UNICODE *buffer, const char *charbuffer)
+{
+    register Py_ssize_t i;
+    Py_ssize_t len = strlen(charbuffer);
+    for (i = len - 1; i >= 0; --i)
+        buffer[i] = (Py_UNICODE) charbuffer[i];
+    return len;
+}
+#endif
+/* see FORMATBUFLEN in unicodeobject.c */
+#define FLOAT_FORMATBUFLEN 120
+static void
+strtounicode(Py_UNICODE *buffer, const char *charbuffer, Py_ssize_t len)
+{
+    Py_ssize_t i;
+    for (i = 0; i < len; ++i)
+        buffer[i] = (Py_UNICODE)charbuffer[i];
+}
+#endif
 /* much of this is taken from unicodeobject.c */
 static PyObject *
 format_float_internal(PyObject *value,
+                      const InternalFormatSpec *format)
+{
+    /* fmt = '%.' + `prec` + `type` + '%%'
+       worst case length = 2 + 10 (len of INT_MAX) + 1 + 2 = 15 (use 20)*/
+    char fmt[20];
+    /* taken from unicodeobject.c */
+    /* Worst case length calc to ensure no buffer overrun:
+       'g' formats:
+         fmt = %#.<prec>g
+         buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
+            for any double rep.)
+         len = 1 + prec + 1 + 2 + 5 = 9 + prec
+       'f' formats:
+         buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
+         len = 1 + 50 + 1 + prec = 52 + prec
+       If prec=0 the effective precision is 1 (the leading digit is
+       always given), therefore increase the length by one.
+    */
+    char charbuf[FLOAT_FORMATBUFLEN];
+                      const InternalFormatSpec *format)
+{
+    char *buf = NULL;       /* buffer returned from PyOS_double_to_string */
     Py_ssize_t n_digits;
+    double x;
+    Py_ssize_t precision = format->precision;
+    PyObject *result = NULL;
+    STRINGLIB_CHAR sign;
+    char* trailing = "";
+    Py_ssize_t n_remainder;
+    Py_ssize_t n_total;
+    int has_decimal;
+    double val;
+    Py_ssize_t precision;
+    Py_ssize_t default_precision = 6;
+    STRINGLIB_CHAR type = format->type;
+    int add_pct = 0;
     STRINGLIB_CHAR *p;
     NumberFieldWidths spec;
+    STRINGLIB_CHAR type = format->type;
+    int flags = 0;
+    PyObject *result = NULL;
+    STRINGLIB_CHAR sign_char = '\0';
+    int float_type; /* Used to see if we have a nan, inf, or regular float. */
 #if STRINGLIB_IS_UNICODE
+    Py_UNICODE unicodebuf[FLOAT_FORMATBUFLEN];
+#endif
+    /* alternate is not allowed on floats. */
+    Py_UNICODE *unicode_tmp = NULL;
+#endif
+    /* Locale settings, either from the actual locale or
+       from a hard-code pseudo-locale */
+    LocaleInfo locale;
+    if (format->precision > INT_MAX) {
+        PyErr_SetString(PyExc_ValueError, "precision too big");
+        goto done;
+    }
+    precision = (int)format->precision;
+    /* Alternate is not allowed on floats. */
     if (format->alternate) {
         PyErr_SetString(PyExc_ValueError,
                         "Alternate form (#) not allowed in float format "
+                        "specifier");
+        goto done;
+    }
+    /* first, do the conversion as 8-bit chars, using the platform's
+       snprintf.  then, if needed, convert to unicode. */
+    /* 'F' is the same as 'f', per the PEP */
+    if (type == 'F')
+        type = 'f';
+    x = PyFloat_AsDouble(value);
+    if (x == -1.0 && PyErr_Occurred())
+                        "specifier");
+        goto done;
+    }
+    if (type == '\0') {
+        /* Omitted type specifier. This is like 'g' but with at least one
+           digit after the decimal point, and different default precision.*/
+        type = 'g';
+        default_precision = PyFloat_STR_PRECISION;
+        flags |= Py_DTSF_ADD_DOT_0;
+    }
+    if (type == 'n')
+        /* 'n' is the same as 'g', except for the locale used to
+           format the result. We take care of that later. */
+        type = 'g';
+    val = PyFloat_AsDouble(value);
+    if (val == -1.0 && PyErr_Occurred())
         goto done;
     if (type == '%') {
         type = 'f';
         x *= 100;
         trailing = "%";
+        val *= 100;
+        add_pct = 1;
+    }
     if (precision < 0)
+        precision = 6;
+    if (type == 'f' && fabs(x) >= 1e50)
+        type = 'g';
+    /* cast "type", because if we're in unicode we need to pass a
+-bit char.  this is safe, because we've restricted what "type"
+       can be */
+    PyOS_snprintf(fmt, sizeof(fmt), "%%.%" PY_FORMAT_SIZE_T "d%c", precision,
+                  (char)type);
+    /* do the actual formatting */
+    PyOS_ascii_formatd(charbuf, sizeof(charbuf), fmt, x);
+    /* adding trailing to fmt with PyOS_snprintf doesn't work, not
+       sure why.  we'll just concatentate it here, no harm done.  we
+       know we can't have a buffer overflow from the fmt size
+       analysis */
+    strcat(charbuf, trailing);
+    /* rather than duplicate the code for snprintf for both unicode
+       and 8 bit strings, we just use the 8 bit version and then
+       convert to unicode in a separate code path.  that's probably
+       the lesser of 2 evils. */
+        precision = default_precision;
+    /* Cast "type", because if we're in unicode we need to pass a
+-bit char. This is safe, because we've restricted what "type"
+       can be. */
+    buf = PyOS_double_to_string(val, (char)type, precision, flags,
+                                &float_type);
+    if (buf == NULL)
+        goto done;
+    n_digits = strlen(buf);
+    if (add_pct) {
+        /* We know that buf has a trailing zero (since we just called
+           strlen() on it), and we don't use that fact any more. So we
+           can just write over the trailing zero. */
+        buf[n_digits] = '%';
+        n_digits += 1;
+    }
+    /* Since there is no unicode version of PyOS_double_to_string,
+       just use the 8 bit version and then convert to unicode. */
 #if STRINGLIB_IS_UNICODE
+    n_digits = strtounicode(unicodebuf, charbuf);
+    p = unicodebuf;
+    unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_digits)*sizeof(Py_UNICODE));
+    if (unicode_tmp == NULL) {
+        PyErr_NoMemory();
+        goto done;
+    }
+    strtounicode(unicode_tmp, buf, n_digits);
+    p = unicode_tmp;
 #else
+    /* compute the length.  I believe this is done because the return
+       value from snprintf above is unreliable */
+    n_digits = strlen(charbuf);
+    p = charbuf;
+#endif
+    /* is a sign character present in the output?  if so, remember it
+    p = buf;
+#endif
+    /* Is a sign character present in the output?  If so, remember it
        and skip it */
     sign = p[0];
     if (sign == '-') {
+    if (*p == '-') {
+        sign_char = *p;
         ++p;
         --n_digits;
+    }
+    calc_number_widths(&spec, sign, 0, n_digits, format);
+    /* allocate a string with enough space */
+    result = STRINGLIB_NEW(NULL, spec.n_total);
+    /* Determine if we have any "remainder" (after the digits, might include
+       decimal or exponent or both (or neither)) */
+    parse_number(p, n_digits, &n_remainder, &has_decimal);
+    /* Determine the grouping, separator, and decimal point, if any. */
+    get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
+                    (format->thousands_separators ?
+                     LT_DEFAULT_LOCALE :
+                     LT_NO_LOCALE),
+                    &locale);
+    /* Calculate how much memory we'll need. */
+    n_total = calc_number_widths(&spec, 0, sign_char, p, n_digits,
+                                 n_remainder, has_decimal, &locale, format);
+    /* Allocate the memory. */
+    result = STRINGLIB_NEW(NULL, n_total);
     if (result == NULL)
         goto done;
+    /* Fill in the non-digit parts (padding, sign, etc.) */
+    fill_non_digits(STRINGLIB_STR(result), &spec, NULL, n_digits,
+                    format->fill_char == '\0' ? ' ' : format->fill_char);
+    /* fill in the digit parts */
+    memmove(STRINGLIB_STR(result) +
+               (spec.n_lpadding + spec.n_lsign + spec.n_spadding),
+            p,
+            n_digits * sizeof(STRINGLIB_CHAR));
+    /* Populate the memory. */
+    fill_number(STRINGLIB_STR(result), &spec, p, n_digits, NULL,
+                format->fill_char == '\0' ? ' ' : format->fill_char, &locale,
+);
 done:
+    PyMem_Free(buf);
+#if STRINGLIB_IS_UNICODE
+    PyMem_Free(unicode_tmp);
+#endif
     return result;
+}
 #endif /* FORMAT_FLOAT */
+/************************************************************************/
+/*********** complex formatting *****************************************/
+/************************************************************************/
+#ifdef FORMAT_COMPLEX
+static PyObject *
+format_complex_internal(PyObject *value,
+                        const InternalFormatSpec *format)
+{
+    double re;
+    double im;
+    char *re_buf = NULL;       /* buffer returned from PyOS_double_to_string */
+    char *im_buf = NULL;       /* buffer returned from PyOS_double_to_string */
+    InternalFormatSpec tmp_format = *format;
+    Py_ssize_t n_re_digits;
+    Py_ssize_t n_im_digits;
+    Py_ssize_t n_re_remainder;
+    Py_ssize_t n_im_remainder;
+    Py_ssize_t n_re_total;
+    Py_ssize_t n_im_total;
+    int re_has_decimal;
+    int im_has_decimal;
+    Py_ssize_t precision;
+    Py_ssize_t default_precision = 6;
+    STRINGLIB_CHAR type = format->type;
+    STRINGLIB_CHAR *p_re;
+    STRINGLIB_CHAR *p_im;
+    NumberFieldWidths re_spec;
+    NumberFieldWidths im_spec;
+    int flags = 0;
+    PyObject *result = NULL;
+    STRINGLIB_CHAR *p;
+    STRINGLIB_CHAR re_sign_char = '\0';
+    STRINGLIB_CHAR im_sign_char = '\0';
+    int re_float_type; /* Used to see if we have a nan, inf, or regular float. */
+    int im_float_type;
+    int add_parens = 0;
+    int skip_re = 0;
+    Py_ssize_t lpad;
+    Py_ssize_t rpad;
+    Py_ssize_t total;
+#if STRINGLIB_IS_UNICODE
+    Py_UNICODE *re_unicode_tmp = NULL;
+    Py_UNICODE *im_unicode_tmp = NULL;
+#endif
+    /* Locale settings, either from the actual locale or
+       from a hard-code pseudo-locale */
+    LocaleInfo locale;
+    if (format->precision > INT_MAX) {
+        PyErr_SetString(PyExc_ValueError, "precision too big");
+        goto done;
+    }
+    precision = (int)format->precision;
+    /* Alternate is not allowed on complex. */
+    if (format->alternate) {
+        PyErr_SetString(PyExc_ValueError,
+                        "Alternate form (#) not allowed in complex format "
+                        "specifier");
+        goto done;
+    }
+    /* Neither is zero pading. */
+    if (format->fill_char == '0') {
+        PyErr_SetString(PyExc_ValueError,
+                        "Zero padding is not allowed in complex format "
+                        "specifier");
+        goto done;
+    }
+    /* Neither is '=' alignment . */
+    if (format->align == '=') {
+        PyErr_SetString(PyExc_ValueError,
+                        "'=' alignment flag is not allowed in complex format "
+                        "specifier");
+        goto done;
+    }
+    re = PyComplex_RealAsDouble(value);
+    if (re == -1.0 && PyErr_Occurred())
+        goto done;
+    im = PyComplex_ImagAsDouble(value);
+    if (im == -1.0 && PyErr_Occurred())
+        goto done;
+    if (type == '\0') {
+        /* Omitted type specifier. Should be like str(self). */
+        type = 'g';
+        default_precision = PyFloat_STR_PRECISION;
+        if (re == 0.0 && copysign(1.0, re) == 1.0)
+            skip_re = 1;
+        else
+            add_parens = 1;
+    }
+    if (type == 'n')
+        /* 'n' is the same as 'g', except for the locale used to
+           format the result. We take care of that later. */
+        type = 'g';
+    if (precision < 0)
+        precision = default_precision;
+    /* Cast "type", because if we're in unicode we need to pass a
+-bit char. This is safe, because we've restricted what "type"
+       can be. */
+    re_buf = PyOS_double_to_string(re, (char)type, precision, flags,
+                                   &re_float_type);
+    if (re_buf == NULL)
+        goto done;
+    im_buf = PyOS_double_to_string(im, (char)type, precision, flags,
+                                   &im_float_type);
+    if (im_buf == NULL)
+        goto done;
+    n_re_digits = strlen(re_buf);
+    n_im_digits = strlen(im_buf);
+    /* Since there is no unicode version of PyOS_double_to_string,
+       just use the 8 bit version and then convert to unicode. */
+#if STRINGLIB_IS_UNICODE
+    re_unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_re_digits)*sizeof(Py_UNICODE));
+    if (re_unicode_tmp == NULL) {
+        PyErr_NoMemory();
+        goto done;
+    }
+    strtounicode(re_unicode_tmp, re_buf, n_re_digits);
+    p_re = re_unicode_tmp;
+    im_unicode_tmp = (Py_UNICODE*)PyMem_Malloc((n_im_digits)*sizeof(Py_UNICODE));
+    if (im_unicode_tmp == NULL) {
+        PyErr_NoMemory();
+        goto done;
+    }
+    strtounicode(im_unicode_tmp, im_buf, n_im_digits);
+    p_im = im_unicode_tmp;
+#else
+    p_re = re_buf;
+    p_im = im_buf;
+#endif
+    /* Is a sign character present in the output?  If so, remember it
+       and skip it */
+    if (*p_re == '-') {
+        re_sign_char = *p_re;
+        ++p_re;
+        --n_re_digits;
+    }
+    if (*p_im == '-') {
+        im_sign_char = *p_im;
+        ++p_im;
+        --n_im_digits;
+    }
+    /* Determine if we have any "remainder" (after the digits, might include
+       decimal or exponent or both (or neither)) */
+    parse_number(p_re, n_re_digits, &n_re_remainder, &re_has_decimal);
+    parse_number(p_im, n_im_digits, &n_im_remainder, &im_has_decimal);
+    /* Determine the grouping, separator, and decimal point, if any. */
+    get_locale_info(format->type == 'n' ? LT_CURRENT_LOCALE :
+                    (format->thousands_separators ?
+                     LT_DEFAULT_LOCALE :
+                     LT_NO_LOCALE),
+                    &locale);
+    /* Turn off any padding. We'll do it later after we've composed
+       the numbers without padding. */
+    tmp_format.fill_char = '\0';
+    tmp_format.align = '<';
+    tmp_format.width = -1;
+    /* Calculate how much memory we'll need. */
+    n_re_total = calc_number_widths(&re_spec, 0, re_sign_char, p_re,
+                                    n_re_digits, n_re_remainder,
+                                    re_has_decimal, &locale, &tmp_format);
+    /* Same formatting, but always include a sign, unless the real part is
+     * going to be omitted, in which case we use whatever sign convention was
+     * requested by the original format. */
+    if (!skip_re)
+        tmp_format.sign = '+';
+    n_im_total = calc_number_widths(&im_spec, 0, im_sign_char, p_im,
+                                    n_im_digits, n_im_remainder,
+                                    im_has_decimal, &locale, &tmp_format);
+    if (skip_re)
+        n_re_total = 0;
+    /* Add 1 for the 'j', and optionally 2 for parens. */
+    calc_padding(n_re_total + n_im_total + 1 + add_parens * 2,
+                 format->width, format->align, &lpad, &rpad, &total);
+    result = STRINGLIB_NEW(NULL, total);
+    if (result == NULL)
+        goto done;
+    /* Populate the memory. First, the padding. */
+    p = fill_padding(STRINGLIB_STR(result),
+                     n_re_total + n_im_total + 1 + add_parens * 2,
+                     format->fill_char=='\0' ? ' ' : format->fill_char,
+                     lpad, rpad);
+    if (add_parens)
+        *p++ = '(';
+    if (!skip_re) {
+        fill_number(p, &re_spec, p_re, n_re_digits, NULL, 0, &locale, 0);
+        p += n_re_total;
+    }
+    fill_number(p, &im_spec, p_im, n_im_digits, NULL, 0, &locale, 0);
+    p += n_im_total;
+    *p++ = 'j';
+    if (add_parens)
+        *p++ = ')';
+done:
+    PyMem_Free(re_buf);
+    PyMem_Free(im_buf);
+#if STRINGLIB_IS_UNICODE
+    PyMem_Free(re_unicode_tmp);
+    PyMem_Free(im_unicode_tmp);
+#endif
+    return result;
+}
+#endif /* FORMAT_COMPLEX */
 /************************************************************************/
 …
 PyObject *
 FORMAT_STRING(PyObject *obj,
               STRINGLIB_CHAR *format_spec,
               Py_ssize_t format_spec_len)
+              STRINGLIB_CHAR *format_spec,
+              Py_ssize_t format_spec_len)
+{
     InternalFormatSpec format;
 …
     /* parse the format_spec */
     if (!parse_internal_render_format_spec(format_spec, format_spec_len,
                                            &format, 's'))
+                                           &format, 's', '<'))
         goto done;
 …
 static PyObject*
 format_int_or_long(PyObject* obj,
                    STRINGLIB_CHAR *format_spec,
                    Py_ssize_t format_spec_len,
                    IntOrLongToString tostring)
+                   STRINGLIB_CHAR *format_spec,
+                   Py_ssize_t format_spec_len,
+                   IntOrLongToString tostring)
+{
     PyObject *result = NULL;
 …
     /* parse the format_spec */
     if (!parse_internal_render_format_spec(format_spec,
                                            format_spec_len,
                                            &format, 'd'))
+                                           format_spec_len,
+                                           &format, 'd', '>'))
         goto done;
 …
     case 'n':
         /* no type conversion needed, already an int (or long).  do
            the formatting */
             result = format_int_or_long_internal(obj, &format, tostring);
+           the formatting */
+            result = format_int_or_long_internal(obj, &format, tostring);
         break;
 …
 PyObject *
 FORMAT_LONG(PyObject *obj,
             STRINGLIB_CHAR *format_spec,
             Py_ssize_t format_spec_len)
+            STRINGLIB_CHAR *format_spec,
+            Py_ssize_t format_spec_len)
+{
     return format_int_or_long(obj, format_spec, format_spec_len,
                               long_format);
+                              long_format);
+}
 #endif /* FORMAT_LONG */
 …
 PyObject *
 FORMAT_INT(PyObject *obj,
            STRINGLIB_CHAR *format_spec,
            Py_ssize_t format_spec_len)
+           STRINGLIB_CHAR *format_spec,
+           Py_ssize_t format_spec_len)
+{
     return format_int_or_long(obj, format_spec, format_spec_len,
                               int_format);
+                              int_format);
+}
 #endif /* FORMAT_INT */
 …
 PyObject *
 FORMAT_FLOAT(PyObject *obj,
              STRINGLIB_CHAR *format_spec,
              Py_ssize_t format_spec_len)
+             STRINGLIB_CHAR *format_spec,
+             Py_ssize_t format_spec_len)
+{
     PyObject *result = NULL;
 …
     /* parse the format_spec */
     if (!parse_internal_render_format_spec(format_spec,
                                            format_spec_len,
                                            &format, '\0'))
+                                           format_spec_len,
+                                           &format, '\0', '>'))
         goto done;
     /* type conversion? */
     switch (format.type) {
+    case '\0':
+        /* 'Z' means like 'g', but with at least one decimal.  See
+           PyOS_ascii_formatd */
+        format.type = 'Z';
+        /* Deliberate fall through to the next case statement */
+    case '\0': /* No format code: like 'g', but with at least one decimal. */
     case 'e':
     case 'E':
 …
+}
 #endif /* FORMAT_FLOAT */
+#ifdef FORMAT_COMPLEX
+PyObject *
+FORMAT_COMPLEX(PyObject *obj,
+               STRINGLIB_CHAR *format_spec,
+               Py_ssize_t format_spec_len)
+{
+    PyObject *result = NULL;
+    InternalFormatSpec format;
+    /* check for the special case of zero length format spec, make
+       it equivalent to str(obj) */
+    if (format_spec_len == 0) {
+        result = STRINGLIB_TOSTR(obj);
+        goto done;
+    }
+    /* parse the format_spec */
+    if (!parse_internal_render_format_spec(format_spec,
+                                           format_spec_len,
+                                           &format, '\0', '>'))
+        goto done;
+    /* type conversion? */
+    switch (format.type) {
+    case '\0': /* No format code: like 'g', but with at least one decimal. */
+    case 'e':
+    case 'E':
+    case 'f':
+    case 'F':
+    case 'g':
+    case 'G':
+    case 'n':
+        /* no conversion, already a complex.  do the formatting */
+        result = format_complex_internal(obj, &format);
+        break;
+    default:
+        /* unknown */
+        unknown_presentation_type(format.type, obj->ob_type->tp_name);
+        goto done;
+    }
+done:
+    return result;
+}
+#endif /* FORMAT_COMPLEX */

python/trunk/Objects/stringlib/localeutil.h

-              r2
+              r391
 #include <locale.h>
+#define MAX(x, y) ((x) < (y) ? (y) : (x))
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+typedef struct {
+    const char *grouping;
+    char previous;
+    Py_ssize_t i; /* Where we're currently pointing in grouping. */
+} GroupGenerator;
+static void
+_GroupGenerator_init(GroupGenerator *self, const char *grouping)
+{
+    self->grouping = grouping;
+    self->i = 0;
+    self->previous = 0;
+}
+/* Returns the next grouping, or 0 to signify end. */
+static Py_ssize_t
+_GroupGenerator_next(GroupGenerator *self)
+{
+    /* Note that we don't really do much error checking here. If a
+       grouping string contains just CHAR_MAX, for example, then just
+       terminate the generator. That shouldn't happen, but at least we
+       fail gracefully. */
+    switch (self->grouping[self->i]) {
+    case 0:
+        return self->previous;
+    case CHAR_MAX:
+        /* Stop the generator. */
+        return 0;
+    default: {
+        char ch = self->grouping[self->i];
+        self->previous = ch;
+        self->i++;
+        return (Py_ssize_t)ch;
+    }
+    }
+}
+/* Fill in some digits, leading zeros, and thousands separator. All
+   are optional, depending on when we're called. */
+static void
+fill(STRINGLIB_CHAR **digits_end, STRINGLIB_CHAR **buffer_end,
+     Py_ssize_t n_chars, Py_ssize_t n_zeros, const char* thousands_sep,
+     Py_ssize_t thousands_sep_len)
+{
+#if STRINGLIB_IS_UNICODE
+    Py_ssize_t i;
+#endif
+    if (thousands_sep) {
+        *buffer_end -= thousands_sep_len;
+        /* Copy the thousands_sep chars into the buffer. */
+#if STRINGLIB_IS_UNICODE
+        /* Convert from the char's of the thousands_sep from
+           the locale into unicode. */
+        for (i = 0; i < thousands_sep_len; ++i)
+            (*buffer_end)[i] = thousands_sep[i];
+#else
+        /* No conversion, just memcpy the thousands_sep. */
+        memcpy(*buffer_end, thousands_sep, thousands_sep_len);
+#endif
+    }
+    *buffer_end -= n_chars;
+    *digits_end -= n_chars;
+    memcpy(*buffer_end, *digits_end, n_chars * sizeof(STRINGLIB_CHAR));
+    *buffer_end -= n_zeros;
+    STRINGLIB_FILL(*buffer_end, '0', n_zeros);
+}
 /**
  * _Py_InsertThousandsGrouping:
  * @buffer: A pointer to the start of a string.
+ * @n_buffer: The length of the string.
+ * @n_buffer: Number of characters in @buffer.
+ * @digits: A pointer to the digits we're reading from. If count
+ *          is non-NULL, this is unused.
  * @n_digits: The number of digits in the string, in which we want
  *            to put the grouping chars.
+ * @buf_size: The maximum size of the buffer pointed to by buffer.
+ * @count: If non-NULL, points to a variable that will receive the
+ *         number of characters we need to insert (and no formatting
+ *         will actually occur).
+ * @append_zero_char: If non-zero, put a trailing zero at the end of
+ *         of the resulting string, if and only if we modified the
+ *         string.
+ *
+ * Inserts thousand grouping characters (as defined in the current
+ *  locale) into the string between buffer and buffer+n_digits.  If
+ *  count is non-NULL, don't do any formatting, just count the number
+ *  of characters to insert.  This is used by the caller to
+ *  appropriately resize the buffer, if needed.  If count is non-NULL,
+ *  buffer can be NULL (it is not dereferenced at all in that case).
+ * @min_width: The minimum width of the digits in the output string.
+ *             Output will be zero-padded on the left to fill.
+ * @grouping: see definition in localeconv().
+ * @thousands_sep: see definition in localeconv().
+ *
+ * There are 2 modes: counting and filling. If @buffer is NULL,
+ *  we are in counting mode, else filling mode.
+ * If counting, the required buffer size is returned.
+ * If filling, we know the buffer will be large enough, so we don't
+ *  need to pass in the buffer size.
+ * Inserts thousand grouping characters (as defined by grouping and
+ *  thousands_sep) into the string between buffer and buffer+n_digits.
+ *
  * Return value: 0 on error, else 1.  Note that no error can occur if
 …
  * This name won't be used, the includer of this file should define
  *  it to be the actual function name, based on unicode or string.
+ *
+ * As closely as possible, this code mimics the logic in decimal.py's
+    _insert_thousands_sep().
  **/
 int
+Py_ssize_t
 _Py_InsertThousandsGrouping(STRINGLIB_CHAR *buffer,
+                            Py_ssize_t n_buffer,
+                            Py_ssize_t n_digits,
+                            Py_ssize_t buf_size,
+                            Py_ssize_t *count,
+                            int append_zero_char)
+{
+        struct lconv *locale_data = localeconv();
+        const char *grouping = locale_data->grouping;
+        const char *thousands_sep = locale_data->thousands_sep;
+        Py_ssize_t thousands_sep_len = strlen(thousands_sep);
+        STRINGLIB_CHAR *pend = NULL; /* current end of buffer */
+        STRINGLIB_CHAR *pmax = NULL; /* max of buffer */
+        char current_grouping;
+        Py_ssize_t remaining = n_digits; /* Number of chars remaining to
+                                            be looked at */
+        /* Initialize the character count, if we're just counting. */
+        if (count)
+                *count = 0;
+        else {
+                /* We're not just counting, we're modifying buffer */
+                pend = buffer + n_buffer;
+                pmax = buffer + buf_size;
+        }
+        /* Starting at the end and working right-to-left, keep track of
+           what grouping needs to be added and insert that. */
+        current_grouping = *grouping++;
+        /* If the first character is 0, perform no grouping at all. */
+        if (current_grouping == 0)
+                return 1;
+        while (remaining > current_grouping) {
+                /* Always leave buffer and pend valid at the end of this
+                   loop, since we might leave with a return statement. */
+                remaining -= current_grouping;
+                if (count) {
+                        /* We're only counting, not touching the memory. */
+                        *count += thousands_sep_len;
+                }
+                else {
+                        /* Do the formatting. */
+                        STRINGLIB_CHAR *plast = buffer + remaining;
+                        /* Is there room to insert thousands_sep_len chars? */
+                        if (pmax - pend < thousands_sep_len)
+                                /* No room. */
+                                return 0;
+                        /* Move the rest of the string down. */
+                        memmove(plast + thousands_sep_len,
+                                plast,
+                                (pend - plast) * sizeof(STRINGLIB_CHAR));
+                        /* Copy the thousands_sep chars into the buffer. */
+#if STRINGLIB_IS_UNICODE
+                        /* Convert from the char's of the thousands_sep from
+                           the locale into unicode. */
+                        {
+                                Py_ssize_t i;
+                                for (i = 0; i < thousands_sep_len; ++i)
+                                        plast[i] = thousands_sep[i];
+                        }
+#else
+                        /* No conversion, just memcpy the thousands_sep. */
+                        memcpy(plast, thousands_sep, thousands_sep_len);
+#endif
+                }
+                /* Adjust end pointer. */
+                pend += thousands_sep_len;
+                /* Move to the next grouping character, unless we're
+                   repeating (which is designated by a grouping of 0). */
+                if (*grouping != 0) {
+                        current_grouping = *grouping++;
+                        if (current_grouping == CHAR_MAX)
+                                /* We're done. */
+                                break;
+                }
+        }
+        if (append_zero_char) {
+                /* Append a zero character to mark the end of the string,
+                   if there's room. */
+                if (pend - (buffer + remaining) < 1)
+                        /* No room, error. */
+                        return 0;
+                *pend = 0;
+        }
+        return 1;
+                            Py_ssize_t n_buffer,
+                            STRINGLIB_CHAR *digits,
+                            Py_ssize_t n_digits,
+                            Py_ssize_t min_width,
+                            const char *grouping,
+                            const char *thousands_sep)
+{
+    Py_ssize_t count = 0;
+    Py_ssize_t n_zeros;
+    int loop_broken = 0;
+    int use_separator = 0; /* First time through, don't append the
+                              separator. They only go between
+                              groups. */
+    STRINGLIB_CHAR *buffer_end = NULL;
+    STRINGLIB_CHAR *digits_end = NULL;
+    Py_ssize_t l;
+    Py_ssize_t n_chars;
+    Py_ssize_t thousands_sep_len = strlen(thousands_sep);
+    Py_ssize_t remaining = n_digits; /* Number of chars remaining to
+                                        be looked at */
+    /* A generator that returns all of the grouping widths, until it
+       returns 0. */
+    GroupGenerator groupgen;
+    _GroupGenerator_init(&groupgen, grouping);
+    if (buffer) {
+        buffer_end = buffer + n_buffer;
+        digits_end = digits + n_digits;
+    }
+    while ((l = _GroupGenerator_next(&groupgen)) > 0) {
+        l = MIN(l, MAX(MAX(remaining, min_width), 1));
+        n_zeros = MAX(0, l - remaining);
+        n_chars = MAX(0, MIN(remaining, l));
+        /* Use n_zero zero's and n_chars chars */
+        /* Count only, don't do anything. */
+        count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
+        if (buffer) {
+            /* Copy into the output buffer. */
+            fill(&digits_end, &buffer_end, n_chars, n_zeros,
+                 use_separator ? thousands_sep : NULL, thousands_sep_len);
+        }
+        /* Use a separator next time. */
+        use_separator = 1;
+        remaining -= n_chars;
+        min_width -= l;
+        if (remaining <= 0 && min_width <= 0) {
+            loop_broken = 1;
+            break;
+        }
+        min_width -= thousands_sep_len;
+    }
+    if (!loop_broken) {
+        /* We left the loop without using a break statement. */
+        l = MAX(MAX(remaining, min_width), 1);
+        n_zeros = MAX(0, l - remaining);
+        n_chars = MAX(0, MIN(remaining, l));
+        /* Use n_zero zero's and n_chars chars */
+        count += (use_separator ? thousands_sep_len : 0) + n_zeros + n_chars;
+        if (buffer) {
+            /* Copy into the output buffer. */
+            fill(&digits_end, &buffer_end, n_chars, n_zeros,
+                 use_separator ? thousands_sep : NULL, thousands_sep_len);
+        }
+    }
+    return count;
+}
+/**
+ * _Py_InsertThousandsGroupingLocale:
+ * @buffer: A pointer to the start of a string.
+ * @n_digits: The number of digits in the string, in which we want
+ *            to put the grouping chars.
+ *
+ * Reads thee current locale and calls _Py_InsertThousandsGrouping().
+ **/
+Py_ssize_t
+_Py_InsertThousandsGroupingLocale(STRINGLIB_CHAR *buffer,
+                                  Py_ssize_t n_buffer,
+                                  STRINGLIB_CHAR *digits,
+                                  Py_ssize_t n_digits,
+                                  Py_ssize_t min_width)
+{
+        struct lconv *locale_data = localeconv();
+        const char *grouping = locale_data->grouping;
+        const char *thousands_sep = locale_data->thousands_sep;
+        return _Py_InsertThousandsGrouping(buffer, n_buffer, digits, n_digits,
+                                           min_width, grouping, thousands_sep);
+}
 #endif /* STRINGLIB_LOCALEUTIL_H */

python/trunk/Objects/stringlib/partition.h

-              r2
+              r391
 Py_LOCAL_INLINE(PyObject*)
 stringlib_partition(
     PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len,
     PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len
+    )
+stringlib_partition(PyObject* str_obj,
+                    const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                    PyObject* sep_obj,
+                    const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
+{
     PyObject* out;
 …
     if (sep_len == 0) {
         PyErr_SetString(PyExc_ValueError, "empty separator");
         return NULL;
+        return NULL;
+    }
     out = PyTuple_New(3);
     if (!out)
         return NULL;
+        return NULL;
     pos = fastsearch(str, str_len, sep, sep_len, FAST_SEARCH);
+    pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_SEARCH);
     if (pos < 0) {
+        Py_INCREF(str_obj);
+        PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
+        Py_INCREF(STRINGLIB_EMPTY);
+        PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
+        Py_INCREF(STRINGLIB_EMPTY);
+        PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY);
+        return out;
+#if STRINGLIB_MUTABLE
+        PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(str, str_len));
+        PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
+        PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(NULL, 0));
+#else
+        Py_INCREF(str_obj);
+        PyTuple_SET_ITEM(out, 0, (PyObject*) str_obj);
+        Py_INCREF(STRINGLIB_EMPTY);
+        PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
+        Py_INCREF(STRINGLIB_EMPTY);
+        PyTuple_SET_ITEM(out, 2, (PyObject*) STRINGLIB_EMPTY);
+#endif
+        return out;
+    }
 …
     if (PyErr_Occurred()) {
         Py_DECREF(out);
         return NULL;
+        Py_DECREF(out);
+        return NULL;
+    }
 …
 Py_LOCAL_INLINE(PyObject*)
 stringlib_rpartition(
     PyObject* str_obj, const STRINGLIB_CHAR* str, Py_ssize_t str_len,
     PyObject* sep_obj, const STRINGLIB_CHAR* sep, Py_ssize_t sep_len
+    )
+stringlib_rpartition(PyObject* str_obj,
+                     const STRINGLIB_CHAR* str, Py_ssize_t str_len,
+                     PyObject* sep_obj,
+                     const STRINGLIB_CHAR* sep, Py_ssize_t sep_len)
+{
     PyObject* out;
     Py_ssize_t pos, j;
+    Py_ssize_t pos;
     if (sep_len == 0) {
         PyErr_SetString(PyExc_ValueError, "empty separator");
         return NULL;
+        return NULL;
+    }
     out = PyTuple_New(3);
     if (!out)
         return NULL;
+        return NULL;
+    /* XXX - create reversefastsearch helper! */
+        pos = -1;
+        for (j = str_len - sep_len; j >= 0; --j)
+            if (STRINGLIB_CMP(str+j, sep, sep_len) == 0) {
+                pos = j;
+                break;
+            }
+    pos = fastsearch(str, str_len, sep, sep_len, -1, FAST_RSEARCH);
     if (pos < 0) {
+        Py_INCREF(STRINGLIB_EMPTY);
+        PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY);
+        Py_INCREF(STRINGLIB_EMPTY);
+        PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
+        Py_INCREF(str_obj);
+        PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj);
+        return out;
+#if STRINGLIB_MUTABLE
+        PyTuple_SET_ITEM(out, 0, STRINGLIB_NEW(NULL, 0));
+        PyTuple_SET_ITEM(out, 1, STRINGLIB_NEW(NULL, 0));
+        PyTuple_SET_ITEM(out, 2, STRINGLIB_NEW(str, str_len));
+#else
+        Py_INCREF(STRINGLIB_EMPTY);
+        PyTuple_SET_ITEM(out, 0, (PyObject*) STRINGLIB_EMPTY);
+        Py_INCREF(STRINGLIB_EMPTY);
+        PyTuple_SET_ITEM(out, 1, (PyObject*) STRINGLIB_EMPTY);
+        Py_INCREF(str_obj);
+        PyTuple_SET_ITEM(out, 2, (PyObject*) str_obj);
+#endif
+        return out;
+    }
 …
     if (PyErr_Occurred()) {
         Py_DECREF(out);
         return NULL;
+        Py_DECREF(out);
+        return NULL;
+    }
 …
 #endif
-/*
-Local variables:
-c-basic-offset: 4
-indent-tabs-mode: nil
-End:
-*/

python/trunk/Objects/stringlib/string_format.h

-              r2
+              r391
 /* Defines for Python 2.6 compatability */
+/* Defines for Python 2.6 compatibility */
 #if PY_VERSION_HEX < 0x03000000
 #define PyLong_FromSsize_t _PyLong_FromSsize_t
 …
+typedef enum {
+    ANS_INIT,
+    ANS_AUTO,
+    ANS_MANUAL
+} AutoNumberState;   /* Keep track if we're auto-numbering fields */
+/* Keeps track of our auto-numbering state, and which number field we're on */
+typedef struct {
+    AutoNumberState an_state;
+    int an_field_number;
+} AutoNumber;
 /* forward declaration for recursion */
 static PyObject *
 build_string(SubString *input, PyObject *args, PyObject *kwargs,
              int recursion_depth);
+             int recursion_depth, AutoNumber *auto_number);
 …
 /**************************  Utility  functions  ************************/
 /************************************************************************/
+static void
+AutoNumber_Init(AutoNumber *auto_number)
+{
+    auto_number->an_state = ANS_INIT;
+    auto_number->an_field_number = 0;
+}
 /* fill in a SubString from a pointer and length */
 …
     return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
+}
+/* Return 1 if an error has been detected switching between automatic
+   field numbering and manual field specification, else return 0. Set
+   ValueError on error. */
+static int
+autonumber_state_error(AutoNumberState state, int field_name_is_empty)
+{
+    if (state == ANS_MANUAL) {
+        if (field_name_is_empty) {
+            PyErr_SetString(PyExc_ValueError, "cannot switch from "
+                            "manual field specification to "
+                            "automatic field numbering");
+            return 1;
+        }
+    }
+    else {
+        if (!field_name_is_empty) {
+            PyErr_SetString(PyExc_ValueError, "cannot switch from "
+                            "automatic field numbering to "
+                            "manual field specification");
+            return 1;
+        }
+    }
+    return 0;
+}
 /************************************************************************/
 …
     Py_ssize_t accumulator = 0;
     Py_ssize_t digitval;
-    Py_ssize_t oldaccumulator;
     STRINGLIB_CHAR *p;
 …
             return -1;
         /*
            This trick was copied from old Unicode format code.  It's cute,
+           but would really suck on an old machine with a slow divide
            implementation.  Fortunately, in the normal case we do not
            expect too many digits.
+           Detect possible overflow before it happens:
+              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
+              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
         */
+        oldaccumulator = accumulator;
+        accumulator *= 10;
+        if ((accumulator+10)/10 != oldaccumulator+1) {
+        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
             PyErr_Format(PyExc_ValueError,
                          "Too many decimal digits in format string");
             return -1;
+        }
         accumulator += digitval;
+        accumulator = accumulator * 10 + digitval;
+    }
     return accumulator;
 …
             return 0;
         *name_idx = get_integer(name);
+        if (*name_idx == -1 && PyErr_Occurred())
+            return 0;
         break;
     default:
 …
 static int
 field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
+                 Py_ssize_t *first_idx, FieldNameIterator *rest)
+                 Py_ssize_t *first_idx, FieldNameIterator *rest,
+                 AutoNumber *auto_number)
+{
     STRINGLIB_CHAR c;
     STRINGLIB_CHAR *p = ptr;
     STRINGLIB_CHAR *end = ptr + len;
+    int field_name_is_empty;
+    int using_numeric_index;
     /* find the part up until the first '.' or '[' */
 …
     /* see if "first" is an integer, in which case it's used as an index */
     *first_idx = get_integer(first);
+    /* zero length string is an error */
+    if (first->ptr >= first->end) {
+        PyErr_SetString(PyExc_ValueError, "empty field name");
+        goto error;
+    if (*first_idx == -1 && PyErr_Occurred())
+        return 0;
+    field_name_is_empty = first->ptr >= first->end;
+    /* If the field name is omitted or if we have a numeric index
+       specified, then we're doing numeric indexing into args. */
+    using_numeric_index = field_name_is_empty || *first_idx != -1;
+    /* We always get here exactly one time for each field we're
+       processing. And we get here in field order (counting by left
+       braces). So this is the perfect place to handle automatic field
+       numbering if the field name is omitted. */
+    /* Check if we need to do the auto-numbering. It's not needed if
+       we're called from string.Format routines, because it's handled
+       in that class by itself. */
+    if (auto_number) {
+        /* Initialize our auto numbering state if this is the first
+           time we're either auto-numbering or manually numbering. */
+        if (auto_number->an_state == ANS_INIT && using_numeric_index)
+            auto_number->an_state = field_name_is_empty ?
+                ANS_AUTO : ANS_MANUAL;
+        /* Make sure our state is consistent with what we're doing
+           this time through. Only check if we're using a numeric
+           index. */
+        if (using_numeric_index)
+            if (autonumber_state_error(auto_number->an_state,
+                                       field_name_is_empty))
+                return 0;
+        /* Zero length field means we want to do auto-numbering of the
+           fields. */
+        if (field_name_is_empty)
+            *first_idx = (auto_number->an_field_number)++;
+    }
     return 1;
-error:
-    return 0;
+}
 …
 */
 static PyObject *
+get_field_object(SubString *input, PyObject *args, PyObject *kwargs)
+get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
+                 AutoNumber *auto_number)
+{
     PyObject *obj = NULL;
 …
     if (!field_name_split(input->ptr, input->end - input->ptr, &first,
                           &index, &rest)) {
+                          &index, &rest, auto_number)) {
         goto error;
+    }
 …
     PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;
     STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
             format_spec->ptr : NULL;
+            format_spec->ptr : NULL;
     Py_ssize_t format_spec_len = format_spec->ptr ?
             format_spec->end - format_spec->ptr : 0;
+            format_spec->end - format_spec->ptr : 0;
     /* If we know the type exactly, skip the lookup of __format__ and just
 …
 #if STRINGLIB_IS_UNICODE
     if (PyUnicode_CheckExact(fieldobj))
         formatter = _PyUnicode_FormatAdvanced;
+        formatter = _PyUnicode_FormatAdvanced;
     /* Unfortunately, there's a problem with checking for int, long,
        and float here.  If we're being included as unicode, their
 …
 #else
     if (PyString_CheckExact(fieldobj))
         formatter = _PyBytes_FormatAdvanced;
+        formatter = _PyBytes_FormatAdvanced;
     else if (PyInt_CheckExact(fieldobj))
         formatter =_PyInt_FormatAdvanced;
+        formatter =_PyInt_FormatAdvanced;
     else if (PyLong_CheckExact(fieldobj))
         formatter =_PyLong_FormatAdvanced;
+        formatter =_PyLong_FormatAdvanced;
     else if (PyFloat_CheckExact(fieldobj))
         formatter = _PyFloat_FormatAdvanced;
+        formatter = _PyFloat_FormatAdvanced;
 #endif
     if (formatter) {
         /* we know exactly which formatter will be called when __format__ is
            looked up, so call it directly, instead. */
         result = formatter(fieldobj, format_spec_start, format_spec_len);
+        /* we know exactly which formatter will be called when __format__ is
+           looked up, so call it directly, instead. */
+        result = formatter(fieldobj, format_spec_start, format_spec_len);
+    }
     else {
         /* We need to create an object out of the pointers we have, because
            __format__ takes a string/unicode object for format_spec. */
         format_spec_object = STRINGLIB_NEW(format_spec_start,
                                            format_spec_len);
         if (format_spec_object == NULL)
             goto done;
         result = PyObject_Format(fieldobj, format_spec_object);
+        /* We need to create an object out of the pointers we have, because
+           __format__ takes a string/unicode object for format_spec. */
+        format_spec_object = STRINGLIB_NEW(format_spec_start,
+                                           format_spec_len);
+        if (format_spec_object == NULL)
+            goto done;
+        result = PyObject_Format(fieldobj, format_spec_object);
+    }
     if (result == NULL)
 …
        be unicode */
+    {
         PyObject *tmp = STRINGLIB_TOSTR(result);
         if (tmp == NULL)
             goto done;
         Py_DECREF(result);
         result = tmp;
+        PyObject *tmp = STRINGLIB_TOSTR(result);
+        if (tmp == NULL)
+            goto done;
+        Py_DECREF(result);
+        result = tmp;
+    }
 #endif
 …
             STRINGLIB_CHAR *conversion)
+{
+    /* Note this function works if the field name is zero length,
+       which is good.  Zero length field names are handled later, in
+       field_name_split. */
     STRINGLIB_CHAR c = 0;
 …
     SubString_init(format_spec, NULL, 0);
     /* search for the field name.  it's terminated by the end of the
        string, or a ':' or '!' */
+    /* Search for the field name.  it's terminated by the end of
+       the string, or a ':' or '!' */
     field_name->ptr = str->ptr;
     while (str->ptr < str->end) {
 …
+            }
+        }
+        return 1;
+    }
+    else {
+    }
+    else
         /* end of string, there's no format_spec or conversion */
         field_name->end = str->ptr;
+        return 1;
+    }
+    return 1;
+}
 …
 static int
 MarkupIterator_next(MarkupIterator *self, SubString *literal,
                     SubString *field_name, SubString *format_spec,
                     STRINGLIB_CHAR *conversion,
+                    int *field_present, SubString *field_name,
+                    SubString *format_spec, STRINGLIB_CHAR *conversion,
                     int *format_spec_needs_expanding)
+{
 …
     *conversion = '\0';
     *format_spec_needs_expanding = 0;
+    *field_present = 0;
     /* No more input, end of iterator.  This is the normal exit
 …
        braces.  note that this prohibits escaped braces, so that
        format_specs cannot have braces in them. */
+    *field_present = 1;
     count = 1;
 …
                     return 0;
-                /* a zero length field_name is an error */
-                if (field_name->ptr == field_name->end) {
-                    PyErr_SetString(PyExc_ValueError, "zero length field name "
-                                    "in format");
-                    return 0;
+                }
                 /* success */
                 return 2;
 …
         return STRINGLIB_TOSTR(obj);
     default:
         if (conversion > 32 && conversion < 127) {
                 /* It's the ASCII subrange; casting to char is safe
                    (assuming the execution character set is an ASCII
                    superset). */
                 PyErr_Format(PyExc_ValueError,
+        if (conversion > 32 && conversion < 127) {
+                /* It's the ASCII subrange; casting to char is safe
+                   (assuming the execution character set is an ASCII
+                   superset). */
+                PyErr_Format(PyExc_ValueError,
                      "Unknown conversion specifier %c",
                      (char)conversion);
         } else
                 PyErr_Format(PyExc_ValueError,
                      "Unknown conversion specifier \\x%x",
                      (unsigned int)conversion);
+        } else
+                PyErr_Format(PyExc_ValueError,
+                     "Unknown conversion specifier \\x%x",
+                     (unsigned int)conversion);
         return NULL;
+    }
 …
    format_spec_needs_expanding is an optimization.  if it's false,
    just output the string directly, otherwise recursively expand the
+   format_spec string. */
+   format_spec string.
+   field_name is allowed to be zero length, in which case we
+   are doing auto field numbering.
+*/
 static int
 …
               int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
               OutputString *output, PyObject *args, PyObject *kwargs,
               int recursion_depth)
+              int recursion_depth, AutoNumber *auto_number)
+{
     PyObject *tmp = NULL;
 …
     /* convert field_name to an object */
     fieldobj = get_field_object(field_name, args, kwargs);
+    fieldobj = get_field_object(field_name, args, kwargs, auto_number);
     if (fieldobj == NULL)
         goto done;
 …
     /* if needed, recurively compute the format_spec */
     if (format_spec_needs_expanding) {
+        tmp = build_string(format_spec, args, kwargs, recursion_depth-1);
+        tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
+                           auto_number);
         if (tmp == NULL)
             goto done;
 …
 static int
 do_markup(SubString *input, PyObject *args, PyObject *kwargs,
           OutputString *output, int recursion_depth)
+          OutputString *output, int recursion_depth, AutoNumber *auto_number)
+{
     MarkupIterator iter;
     int format_spec_needs_expanding;
     int result;
+    int field_present;
     SubString literal;
     SubString field_name;
 …
     MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
+    while ((result = MarkupIterator_next(&iter, &literal, &field_name,
+                                         &format_spec, &conversion,
+    while ((result = MarkupIterator_next(&iter, &literal, &field_present,
+                                         &field_name, &format_spec,
+                                         &conversion,
                                          &format_spec_needs_expanding)) == 2) {
         if (!output_data(output, literal.ptr, literal.end - literal.ptr))
             return 0;
         if (field_name.ptr != field_name.end)
+        if (field_present)
             if (!output_markup(&field_name, &format_spec,
                                format_spec_needs_expanding, conversion, output,
                                args, kwargs, recursion_depth))
+                               args, kwargs, recursion_depth, auto_number))
                 return 0;
+    }
 …
 static PyObject *
 build_string(SubString *input, PyObject *args, PyObject *kwargs,
              int recursion_depth)
+             int recursion_depth, AutoNumber *auto_number)
+{
     OutputString output;
 …
         goto done;
+    if (!do_markup(input, args, kwargs, &output, recursion_depth)) {
+    if (!do_markup(input, args, kwargs, &output, recursion_depth,
+                   auto_number)) {
         goto done;
+    }
 …
     int recursion_depth = 2;
+    AutoNumber auto_number;
+    AutoNumber_Init(&auto_number);
     SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
     return build_string(&input, args, kwargs, recursion_depth);
+    return build_string(&input, args, kwargs, recursion_depth, &auto_number);
+}
 …
     STRINGLIB_CHAR conversion;
     int format_spec_needs_expanding;
+    int result = MarkupIterator_next(&it->it_markup, &literal, &field_name,
+                                     &format_spec, &conversion,
+    int field_present;
+    int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
+                                     &field_name, &format_spec, &conversion,
                                      &format_spec_needs_expanding);
 …
         PyObject *conversion_str = NULL;
         PyObject *tuple = NULL;
-        int has_field = field_name.ptr != field_name.end;
         literal_str = SubString_new_object(&literal);
 …
         /* if field_name is non-zero length, return a string for
            format_spec (even if zero length), else return None */
         format_spec_str = (has_field ?
+        format_spec_str = (field_present ?
                            SubString_new_object_or_empty :
                            SubString_new_object)(&format_spec);
 …
+        }
         else
             conversion_str = STRINGLIB_NEW(&conversion, 1);
+            conversion_str = STRINGLIB_NEW(&conversion, 1);
         if (conversion_str == NULL)
             goto done;
 …
 static PyMethodDef formatteriter_methods[] = {
     {NULL,              NULL}           /* sentinel */
+    {NULL,              NULL}           /* sentinel */
 };
 static PyTypeObject PyFormatterIter_Type = {
     PyVarObject_HEAD_INIT(&PyType_Type, 0)
     "formatteriterator",                /* tp_name */
     sizeof(formatteriterobject),        /* tp_basicsize */
 ,                                  /* tp_itemsize */
+    "formatteriterator",                /* tp_name */
+    sizeof(formatteriterobject),        /* tp_basicsize */
+,                                  /* tp_itemsize */
     /* methods */
     (destructor)formatteriter_dealloc,  /* tp_dealloc */
 ,                                  /* tp_print */
 ,                                  /* tp_getattr */
 ,                                  /* tp_setattr */
 ,                                  /* tp_compare */
 ,                                  /* tp_repr */
 ,                                  /* tp_as_number */
 ,                                  /* tp_as_sequence */
 ,                                  /* tp_as_mapping */
 ,                                  /* tp_hash */
 ,                                  /* tp_call */
 ,                                  /* tp_str */
     PyObject_GenericGetAttr,            /* tp_getattro */
 ,                                  /* tp_setattro */
 ,                                  /* tp_as_buffer */
     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
 ,                                  /* tp_doc */
 ,                                  /* tp_traverse */
 ,                                  /* tp_clear */
 ,                                  /* tp_richcompare */
 ,                                  /* tp_weaklistoffset */
     PyObject_SelfIter,                  /* tp_iter */
     (iternextfunc)formatteriter_next,   /* tp_iternext */
     formatteriter_methods,              /* tp_methods */
+    (destructor)formatteriter_dealloc,  /* tp_dealloc */
+,                                  /* tp_print */
+,                                  /* tp_getattr */
+,                                  /* tp_setattr */
+,                                  /* tp_compare */
+,                                  /* tp_repr */
+,                                  /* tp_as_number */
+,                                  /* tp_as_sequence */
+,                                  /* tp_as_mapping */
+,                                  /* tp_hash */
+,                                  /* tp_call */
+,                                  /* tp_str */
+    PyObject_GenericGetAttr,            /* tp_getattro */
+,                                  /* tp_setattro */
+,                                  /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
+,                                  /* tp_doc */
+,                                  /* tp_traverse */
+,                                  /* tp_clear */
+,                                  /* tp_richcompare */
+,                                  /* tp_weaklistoffset */
+    PyObject_SelfIter,                  /* tp_iter */
+    (iternextfunc)formatteriter_next,   /* tp_iternext */
+    formatteriter_methods,              /* tp_methods */
 ,
 };
 …
 static PyMethodDef fieldnameiter_methods[] = {
     {NULL,              NULL}           /* sentinel */
+    {NULL,              NULL}           /* sentinel */
 };
 static PyTypeObject PyFieldNameIter_Type = {
     PyVarObject_HEAD_INIT(&PyType_Type, 0)
     "fieldnameiterator",                /* tp_name */
     sizeof(fieldnameiterobject),        /* tp_basicsize */
 ,                                  /* tp_itemsize */
+    "fieldnameiterator",                /* tp_name */
+    sizeof(fieldnameiterobject),        /* tp_basicsize */
+,                                  /* tp_itemsize */
     /* methods */
     (destructor)fieldnameiter_dealloc,  /* tp_dealloc */
 ,                                  /* tp_print */
 ,                                  /* tp_getattr */
 ,                                  /* tp_setattr */
 ,                                  /* tp_compare */
 ,                                  /* tp_repr */
 ,                                  /* tp_as_number */
 ,                                  /* tp_as_sequence */
 ,                                  /* tp_as_mapping */
 ,                                  /* tp_hash */
 ,                                  /* tp_call */
 ,                                  /* tp_str */
     PyObject_GenericGetAttr,            /* tp_getattro */
 ,                                  /* tp_setattro */
 ,                                  /* tp_as_buffer */
     Py_TPFLAGS_DEFAULT,                 /* tp_flags */
 ,                                  /* tp_doc */
 ,                                  /* tp_traverse */
 ,                                  /* tp_clear */
 ,                                  /* tp_richcompare */
 ,                                  /* tp_weaklistoffset */
     PyObject_SelfIter,                  /* tp_iter */
     (iternextfunc)fieldnameiter_next,   /* tp_iternext */
     fieldnameiter_methods,              /* tp_methods */
+    (destructor)fieldnameiter_dealloc,  /* tp_dealloc */
+,                                  /* tp_print */
+,                                  /* tp_getattr */
+,                                  /* tp_setattr */
+,                                  /* tp_compare */
+,                                  /* tp_repr */
+,                                  /* tp_as_number */
+,                                  /* tp_as_sequence */
+,                                  /* tp_as_mapping */
+,                                  /* tp_hash */
+,                                  /* tp_call */
+,                                  /* tp_str */
+    PyObject_GenericGetAttr,            /* tp_getattro */
+,                                  /* tp_setattro */
+,                                  /* tp_as_buffer */
+    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
+,                                  /* tp_doc */
+,                                  /* tp_traverse */
+,                                  /* tp_clear */
+,                                  /* tp_richcompare */
+,                                  /* tp_weaklistoffset */
+    PyObject_SelfIter,                  /* tp_iter */
+    (iternextfunc)fieldnameiter_next,   /* tp_iternext */
+    fieldnameiter_methods,              /* tp_methods */
 };
 …
     it->str = self;
+    /* Pass in auto_number = NULL. We'll return an empty string for
+       first_obj in that case. */
     if (!field_name_split(STRINGLIB_STR(self),
                           STRINGLIB_LEN(self),
                           &first, &first_idx, &it->it_field))
+                          &first, &first_idx, &it->it_field, NULL))
         goto done;

python/trunk/Objects/stringlib/stringdefs.h

-              r2
+              r391
 #define STRINGLIB_PARSE_CODE     "S"
 #define STRINGLIB_EMPTY          nullstring
+#define STRINGLIB_ISSPACE        Py_ISSPACE
+#define STRINGLIB_ISLINEBREAK(x) ((x == '\n') || (x == '\r'))
 #define STRINGLIB_ISDECIMAL(x)   ((x >= '0') && (x <= '9'))
 #define STRINGLIB_TODECIMAL(x)   (STRINGLIB_ISDECIMAL(x) ? (x - '0') : -1)
 #define STRINGLIB_TOUPPER        toupper
 #define STRINGLIB_TOLOWER        tolower
+#define STRINGLIB_TOUPPER        Py_TOUPPER
+#define STRINGLIB_TOLOWER        Py_TOLOWER
 #define STRINGLIB_FILL           memset
 #define STRINGLIB_STR            PyString_AS_STRING
 …
 #define STRINGLIB_RESIZE         _PyString_Resize
 #define STRINGLIB_CHECK          PyString_Check
 #define STRINGLIB_CMP            memcmp
+#define STRINGLIB_CHECK_EXACT    PyString_CheckExact
 #define STRINGLIB_TOSTR          PyObject_Str
 #define STRINGLIB_GROUPING       _PyString_InsertThousandsGrouping
+#define STRINGLIB_GROUPING_LOCALE _PyString_InsertThousandsGroupingLocale
+#define STRINGLIB_WANT_CONTAINS_OBJ 1
 #endif /* !STRINGLIB_STRINGDEFS_H */

python/trunk/Objects/stringlib/transmogrify.h

-              r2
+              r391
 /* NOTE: this API is -ONLY- for use with single byte character strings. */
 /* Do not use it with Unicode. */
-#include "bytes_methods.h"
-#ifndef STRINGLIB_MUTABLE
-#warning "STRINGLIB_MUTABLE not defined before #include, assuming 0"
-#define STRINGLIB_MUTABLE 0
-#endif
 /* the more complicated methods.  parts of these should be pulled out into the
 …
     return (PyObject*) s;
+}
-#define _STRINGLIB_SPLIT_APPEND(data, left, right)              \
-        str = STRINGLIB_NEW((data) + (left),                    \
-                                         (right) - (left));     \
-        if (str == NULL)                                        \
-                goto onError;                                   \
-        if (PyList_Append(list, str)) {                         \
-                Py_DECREF(str);                                 \
-                goto onError;                                   \
-        }                                                       \
-        else                                                    \
-                Py_DECREF(str);
-PyDoc_STRVAR(splitlines__doc__,
-"B.splitlines([keepends]) -> list of lines\n\
-\n\
-Return a list of the lines in B, breaking at line boundaries.\n\
-Line breaks are not included in the resulting list unless keepends\n\
-is given and true.");
-static PyObject*
-stringlib_splitlines(PyObject *self, PyObject *args)
+{
-    register Py_ssize_t i;
-    register Py_ssize_t j;
-    Py_ssize_t len;
-    int keepends = 0;
-    PyObject *list;
-    PyObject *str;
-    char *data;
-    if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
-        return NULL;
-    data = STRINGLIB_STR(self);
-    len = STRINGLIB_LEN(self);
-    /* This does not use the preallocated list because splitlines is
-       usually run with hundreds of newlines.  The overhead of
-       switching between PyList_SET_ITEM and append causes about a
--3% slowdown for that common case.  A smarter implementation
-       could move the if check out, so the SET_ITEMs are done first
-       and the appends only done when the prealloc buffer is full.
-       That's too much work for little gain.*/
-    list = PyList_New(0);
-    if (!list)
-        goto onError;
-    for (i = j = 0; i < len; ) {
-        Py_ssize_t eol;
-        /* Find a line and append it */
-        while (i < len && data[i] != '\n' && data[i] != '\r')
-            i++;
-        /* Skip the line break reading CRLF as one line break */
-        eol = i;
-        if (i < len) {
-            if (data[i] == '\r' && i + 1 < len &&
-                data[i+1] == '\n')
-                i += 2;
-            else
-                i++;
-            if (keepends)
-                eol = i;
+        }
-        _STRINGLIB_SPLIT_APPEND(data, j, eol);
-        j = i;
+    }
-    if (j < len) {
-        _STRINGLIB_SPLIT_APPEND(data, j, len);
+    }
-    return list;
- onError:
-    Py_XDECREF(list);
-    return NULL;
+}
-#undef _STRINGLIB_SPLIT_APPEND

python/trunk/Objects/stringlib/unicodedefs.h

-              r2
+              r391
 #define STRINGLIB_PARSE_CODE     "U"
 #define STRINGLIB_EMPTY          unicode_empty
+#define STRINGLIB_ISSPACE        Py_UNICODE_ISSPACE
+#define STRINGLIB_ISLINEBREAK    BLOOM_LINEBREAK
 #define STRINGLIB_ISDECIMAL      Py_UNICODE_ISDECIMAL
 #define STRINGLIB_TODECIMAL      Py_UNICODE_TODECIMAL
 …
 #define STRINGLIB_RESIZE         PyUnicode_Resize
 #define STRINGLIB_CHECK          PyUnicode_Check
+#define STRINGLIB_CHECK_EXACT    PyUnicode_CheckExact
 #define STRINGLIB_GROUPING       _PyUnicode_InsertThousandsGrouping
 …
 #define STRINGLIB_WANT_CONTAINS_OBJ 1
-/* STRINGLIB_CMP was defined as:
-Py_LOCAL_INLINE(int)
-STRINGLIB_CMP(const Py_UNICODE* str, const Py_UNICODE* other, Py_ssize_t len)
+{
-    if (str[0] != other[0])
-        return 1;
-    return memcmp((void*) str, (void*) other, len * sizeof(Py_UNICODE));
+}
-but unfortunately that gives a error if the function isn't used in a file that
-includes this file.  So, reluctantly convert it to a macro instead. */
-#define STRINGLIB_CMP(str, other, len) \
-    (((str)[0] != (other)[0]) ? \
-: \
-     memcmp((void*) (str), (void*) (other), (len) * sizeof(Py_UNICODE)))
 #endif /* !STRINGLIB_UNICODEDEFS_H */

Context Navigation

Legend:

python/trunk

python/trunk/Objects/stringlib/README.txt

python/trunk/Objects/stringlib/count.h

python/trunk/Objects/stringlib/ctype.h

python/trunk/Objects/stringlib/fastsearch.h

python/trunk/Objects/stringlib/find.h

python/trunk/Objects/stringlib/formatter.h

python/trunk/Objects/stringlib/localeutil.h

python/trunk/Objects/stringlib/partition.h

python/trunk/Objects/stringlib/string_format.h

python/trunk/Objects/stringlib/stringdefs.h

python/trunk/Objects/stringlib/transmogrify.h

python/trunk/Objects/stringlib/unicodedefs.h

Download in other formats:

/python/vendor/Python-2.7.6	merged	eligible
/python/vendor/current	merged	eligible