1 | /*
|
---|
2 | string_format.h -- implementation of string.format().
|
---|
3 |
|
---|
4 | It uses the Objects/stringlib conventions, so that it can be
|
---|
5 | compiled for both unicode and string objects.
|
---|
6 | */
|
---|
7 |
|
---|
8 |
|
---|
9 | /* Defines for Python 2.6 compatibility */
|
---|
10 | #if PY_VERSION_HEX < 0x03000000
|
---|
11 | #define PyLong_FromSsize_t _PyLong_FromSsize_t
|
---|
12 | #endif
|
---|
13 |
|
---|
14 | /* Defines for more efficiently reallocating the string buffer */
|
---|
15 | #define INITIAL_SIZE_INCREMENT 100
|
---|
16 | #define SIZE_MULTIPLIER 2
|
---|
17 | #define MAX_SIZE_INCREMENT 3200
|
---|
18 |
|
---|
19 |
|
---|
20 | /************************************************************************/
|
---|
21 | /*********** Global data structures and forward declarations *********/
|
---|
22 | /************************************************************************/
|
---|
23 |
|
---|
24 | /*
|
---|
25 | A SubString consists of the characters between two string or
|
---|
26 | unicode pointers.
|
---|
27 | */
|
---|
28 | typedef struct {
|
---|
29 | STRINGLIB_CHAR *ptr;
|
---|
30 | STRINGLIB_CHAR *end;
|
---|
31 | } SubString;
|
---|
32 |
|
---|
33 |
|
---|
34 | typedef enum {
|
---|
35 | ANS_INIT,
|
---|
36 | ANS_AUTO,
|
---|
37 | ANS_MANUAL
|
---|
38 | } AutoNumberState; /* Keep track if we're auto-numbering fields */
|
---|
39 |
|
---|
40 | /* Keeps track of our auto-numbering state, and which number field we're on */
|
---|
41 | typedef struct {
|
---|
42 | AutoNumberState an_state;
|
---|
43 | int an_field_number;
|
---|
44 | } AutoNumber;
|
---|
45 |
|
---|
46 |
|
---|
47 | /* forward declaration for recursion */
|
---|
48 | static PyObject *
|
---|
49 | build_string(SubString *input, PyObject *args, PyObject *kwargs,
|
---|
50 | int recursion_depth, AutoNumber *auto_number);
|
---|
51 |
|
---|
52 |
|
---|
53 |
|
---|
54 | /************************************************************************/
|
---|
55 | /************************** Utility functions ************************/
|
---|
56 | /************************************************************************/
|
---|
57 |
|
---|
58 | static void
|
---|
59 | AutoNumber_Init(AutoNumber *auto_number)
|
---|
60 | {
|
---|
61 | auto_number->an_state = ANS_INIT;
|
---|
62 | auto_number->an_field_number = 0;
|
---|
63 | }
|
---|
64 |
|
---|
65 | /* fill in a SubString from a pointer and length */
|
---|
66 | Py_LOCAL_INLINE(void)
|
---|
67 | SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
|
---|
68 | {
|
---|
69 | str->ptr = p;
|
---|
70 | if (p == NULL)
|
---|
71 | str->end = NULL;
|
---|
72 | else
|
---|
73 | str->end = str->ptr + len;
|
---|
74 | }
|
---|
75 |
|
---|
76 | /* return a new string. if str->ptr is NULL, return None */
|
---|
77 | Py_LOCAL_INLINE(PyObject *)
|
---|
78 | SubString_new_object(SubString *str)
|
---|
79 | {
|
---|
80 | if (str->ptr == NULL) {
|
---|
81 | Py_INCREF(Py_None);
|
---|
82 | return Py_None;
|
---|
83 | }
|
---|
84 | return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
|
---|
85 | }
|
---|
86 |
|
---|
87 | /* return a new string. if str->ptr is NULL, return None */
|
---|
88 | Py_LOCAL_INLINE(PyObject *)
|
---|
89 | SubString_new_object_or_empty(SubString *str)
|
---|
90 | {
|
---|
91 | if (str->ptr == NULL) {
|
---|
92 | return STRINGLIB_NEW(NULL, 0);
|
---|
93 | }
|
---|
94 | return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
|
---|
95 | }
|
---|
96 |
|
---|
97 | /* Return 1 if an error has been detected switching between automatic
|
---|
98 | field numbering and manual field specification, else return 0. Set
|
---|
99 | ValueError on error. */
|
---|
100 | static int
|
---|
101 | autonumber_state_error(AutoNumberState state, int field_name_is_empty)
|
---|
102 | {
|
---|
103 | if (state == ANS_MANUAL) {
|
---|
104 | if (field_name_is_empty) {
|
---|
105 | PyErr_SetString(PyExc_ValueError, "cannot switch from "
|
---|
106 | "manual field specification to "
|
---|
107 | "automatic field numbering");
|
---|
108 | return 1;
|
---|
109 | }
|
---|
110 | }
|
---|
111 | else {
|
---|
112 | if (!field_name_is_empty) {
|
---|
113 | PyErr_SetString(PyExc_ValueError, "cannot switch from "
|
---|
114 | "automatic field numbering to "
|
---|
115 | "manual field specification");
|
---|
116 | return 1;
|
---|
117 | }
|
---|
118 | }
|
---|
119 | return 0;
|
---|
120 | }
|
---|
121 |
|
---|
122 |
|
---|
123 | /************************************************************************/
|
---|
124 | /*********** Output string management functions ****************/
|
---|
125 | /************************************************************************/
|
---|
126 |
|
---|
127 | typedef struct {
|
---|
128 | STRINGLIB_CHAR *ptr;
|
---|
129 | STRINGLIB_CHAR *end;
|
---|
130 | PyObject *obj;
|
---|
131 | Py_ssize_t size_increment;
|
---|
132 | } OutputString;
|
---|
133 |
|
---|
134 | /* initialize an OutputString object, reserving size characters */
|
---|
135 | static int
|
---|
136 | output_initialize(OutputString *output, Py_ssize_t size)
|
---|
137 | {
|
---|
138 | output->obj = STRINGLIB_NEW(NULL, size);
|
---|
139 | if (output->obj == NULL)
|
---|
140 | return 0;
|
---|
141 |
|
---|
142 | output->ptr = STRINGLIB_STR(output->obj);
|
---|
143 | output->end = STRINGLIB_LEN(output->obj) + output->ptr;
|
---|
144 | output->size_increment = INITIAL_SIZE_INCREMENT;
|
---|
145 |
|
---|
146 | return 1;
|
---|
147 | }
|
---|
148 |
|
---|
149 | /*
|
---|
150 | output_extend reallocates the output string buffer.
|
---|
151 | It returns a status: 0 for a failed reallocation,
|
---|
152 | 1 for success.
|
---|
153 | */
|
---|
154 |
|
---|
155 | static int
|
---|
156 | output_extend(OutputString *output, Py_ssize_t count)
|
---|
157 | {
|
---|
158 | STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
|
---|
159 | Py_ssize_t curlen = output->ptr - startptr;
|
---|
160 | Py_ssize_t maxlen = curlen + count + output->size_increment;
|
---|
161 |
|
---|
162 | if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
|
---|
163 | return 0;
|
---|
164 | startptr = STRINGLIB_STR(output->obj);
|
---|
165 | output->ptr = startptr + curlen;
|
---|
166 | output->end = startptr + maxlen;
|
---|
167 | if (output->size_increment < MAX_SIZE_INCREMENT)
|
---|
168 | output->size_increment *= SIZE_MULTIPLIER;
|
---|
169 | return 1;
|
---|
170 | }
|
---|
171 |
|
---|
172 | /*
|
---|
173 | output_data dumps characters into our output string
|
---|
174 | buffer.
|
---|
175 |
|
---|
176 | In some cases, it has to reallocate the string.
|
---|
177 |
|
---|
178 | It returns a status: 0 for a failed reallocation,
|
---|
179 | 1 for success.
|
---|
180 | */
|
---|
181 | static int
|
---|
182 | output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
|
---|
183 | {
|
---|
184 | if ((count > output->end - output->ptr) && !output_extend(output, count))
|
---|
185 | return 0;
|
---|
186 | memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
|
---|
187 | output->ptr += count;
|
---|
188 | return 1;
|
---|
189 | }
|
---|
190 |
|
---|
191 | /************************************************************************/
|
---|
192 | /*********** Format string parsing -- integers and identifiers *********/
|
---|
193 | /************************************************************************/
|
---|
194 |
|
---|
195 | static Py_ssize_t
|
---|
196 | get_integer(const SubString *str)
|
---|
197 | {
|
---|
198 | Py_ssize_t accumulator = 0;
|
---|
199 | Py_ssize_t digitval;
|
---|
200 | STRINGLIB_CHAR *p;
|
---|
201 |
|
---|
202 | /* empty string is an error */
|
---|
203 | if (str->ptr >= str->end)
|
---|
204 | return -1;
|
---|
205 |
|
---|
206 | for (p = str->ptr; p < str->end; p++) {
|
---|
207 | digitval = STRINGLIB_TODECIMAL(*p);
|
---|
208 | if (digitval < 0)
|
---|
209 | return -1;
|
---|
210 | /*
|
---|
211 | Detect possible overflow before it happens:
|
---|
212 |
|
---|
213 | accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
|
---|
214 | accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
|
---|
215 | */
|
---|
216 | if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
|
---|
217 | PyErr_Format(PyExc_ValueError,
|
---|
218 | "Too many decimal digits in format string");
|
---|
219 | return -1;
|
---|
220 | }
|
---|
221 | accumulator = accumulator * 10 + digitval;
|
---|
222 | }
|
---|
223 | return accumulator;
|
---|
224 | }
|
---|
225 |
|
---|
226 | /************************************************************************/
|
---|
227 | /******** Functions to get field objects and specification strings ******/
|
---|
228 | /************************************************************************/
|
---|
229 |
|
---|
230 | /* do the equivalent of obj.name */
|
---|
231 | static PyObject *
|
---|
232 | getattr(PyObject *obj, SubString *name)
|
---|
233 | {
|
---|
234 | PyObject *newobj;
|
---|
235 | PyObject *str = SubString_new_object(name);
|
---|
236 | if (str == NULL)
|
---|
237 | return NULL;
|
---|
238 | newobj = PyObject_GetAttr(obj, str);
|
---|
239 | Py_DECREF(str);
|
---|
240 | return newobj;
|
---|
241 | }
|
---|
242 |
|
---|
243 | /* do the equivalent of obj[idx], where obj is a sequence */
|
---|
244 | static PyObject *
|
---|
245 | getitem_sequence(PyObject *obj, Py_ssize_t idx)
|
---|
246 | {
|
---|
247 | return PySequence_GetItem(obj, idx);
|
---|
248 | }
|
---|
249 |
|
---|
250 | /* do the equivalent of obj[idx], where obj is not a sequence */
|
---|
251 | static PyObject *
|
---|
252 | getitem_idx(PyObject *obj, Py_ssize_t idx)
|
---|
253 | {
|
---|
254 | PyObject *newobj;
|
---|
255 | PyObject *idx_obj = PyLong_FromSsize_t(idx);
|
---|
256 | if (idx_obj == NULL)
|
---|
257 | return NULL;
|
---|
258 | newobj = PyObject_GetItem(obj, idx_obj);
|
---|
259 | Py_DECREF(idx_obj);
|
---|
260 | return newobj;
|
---|
261 | }
|
---|
262 |
|
---|
263 | /* do the equivalent of obj[name] */
|
---|
264 | static PyObject *
|
---|
265 | getitem_str(PyObject *obj, SubString *name)
|
---|
266 | {
|
---|
267 | PyObject *newobj;
|
---|
268 | PyObject *str = SubString_new_object(name);
|
---|
269 | if (str == NULL)
|
---|
270 | return NULL;
|
---|
271 | newobj = PyObject_GetItem(obj, str);
|
---|
272 | Py_DECREF(str);
|
---|
273 | return newobj;
|
---|
274 | }
|
---|
275 |
|
---|
276 | typedef struct {
|
---|
277 | /* the entire string we're parsing. we assume that someone else
|
---|
278 | is managing its lifetime, and that it will exist for the
|
---|
279 | lifetime of the iterator. can be empty */
|
---|
280 | SubString str;
|
---|
281 |
|
---|
282 | /* pointer to where we are inside field_name */
|
---|
283 | STRINGLIB_CHAR *ptr;
|
---|
284 | } FieldNameIterator;
|
---|
285 |
|
---|
286 |
|
---|
287 | static int
|
---|
288 | FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
|
---|
289 | Py_ssize_t len)
|
---|
290 | {
|
---|
291 | SubString_init(&self->str, ptr, len);
|
---|
292 | self->ptr = self->str.ptr;
|
---|
293 | return 1;
|
---|
294 | }
|
---|
295 |
|
---|
296 | static int
|
---|
297 | _FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
|
---|
298 | {
|
---|
299 | STRINGLIB_CHAR c;
|
---|
300 |
|
---|
301 | name->ptr = self->ptr;
|
---|
302 |
|
---|
303 | /* return everything until '.' or '[' */
|
---|
304 | while (self->ptr < self->str.end) {
|
---|
305 | switch (c = *self->ptr++) {
|
---|
306 | case '[':
|
---|
307 | case '.':
|
---|
308 | /* backup so that we this character will be seen next time */
|
---|
309 | self->ptr--;
|
---|
310 | break;
|
---|
311 | default:
|
---|
312 | continue;
|
---|
313 | }
|
---|
314 | break;
|
---|
315 | }
|
---|
316 | /* end of string is okay */
|
---|
317 | name->end = self->ptr;
|
---|
318 | return 1;
|
---|
319 | }
|
---|
320 |
|
---|
321 | static int
|
---|
322 | _FieldNameIterator_item(FieldNameIterator *self, SubString *name)
|
---|
323 | {
|
---|
324 | int bracket_seen = 0;
|
---|
325 | STRINGLIB_CHAR c;
|
---|
326 |
|
---|
327 | name->ptr = self->ptr;
|
---|
328 |
|
---|
329 | /* return everything until ']' */
|
---|
330 | while (self->ptr < self->str.end) {
|
---|
331 | switch (c = *self->ptr++) {
|
---|
332 | case ']':
|
---|
333 | bracket_seen = 1;
|
---|
334 | break;
|
---|
335 | default:
|
---|
336 | continue;
|
---|
337 | }
|
---|
338 | break;
|
---|
339 | }
|
---|
340 | /* make sure we ended with a ']' */
|
---|
341 | if (!bracket_seen) {
|
---|
342 | PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
|
---|
343 | return 0;
|
---|
344 | }
|
---|
345 |
|
---|
346 | /* end of string is okay */
|
---|
347 | /* don't include the ']' */
|
---|
348 | name->end = self->ptr-1;
|
---|
349 | return 1;
|
---|
350 | }
|
---|
351 |
|
---|
352 | /* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
|
---|
353 | static int
|
---|
354 | FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
|
---|
355 | Py_ssize_t *name_idx, SubString *name)
|
---|
356 | {
|
---|
357 | /* check at end of input */
|
---|
358 | if (self->ptr >= self->str.end)
|
---|
359 | return 1;
|
---|
360 |
|
---|
361 | switch (*self->ptr++) {
|
---|
362 | case '.':
|
---|
363 | *is_attribute = 1;
|
---|
364 | if (_FieldNameIterator_attr(self, name) == 0)
|
---|
365 | return 0;
|
---|
366 | *name_idx = -1;
|
---|
367 | break;
|
---|
368 | case '[':
|
---|
369 | *is_attribute = 0;
|
---|
370 | if (_FieldNameIterator_item(self, name) == 0)
|
---|
371 | return 0;
|
---|
372 | *name_idx = get_integer(name);
|
---|
373 | if (*name_idx == -1 && PyErr_Occurred())
|
---|
374 | return 0;
|
---|
375 | break;
|
---|
376 | default:
|
---|
377 | /* Invalid character follows ']' */
|
---|
378 | PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
|
---|
379 | "follow ']' in format field specifier");
|
---|
380 | return 0;
|
---|
381 | }
|
---|
382 |
|
---|
383 | /* empty string is an error */
|
---|
384 | if (name->ptr == name->end) {
|
---|
385 | PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
|
---|
386 | return 0;
|
---|
387 | }
|
---|
388 |
|
---|
389 | return 2;
|
---|
390 | }
|
---|
391 |
|
---|
392 |
|
---|
393 | /* input: field_name
|
---|
394 | output: 'first' points to the part before the first '[' or '.'
|
---|
395 | 'first_idx' is -1 if 'first' is not an integer, otherwise
|
---|
396 | it's the value of first converted to an integer
|
---|
397 | 'rest' is an iterator to return the rest
|
---|
398 | */
|
---|
399 | static int
|
---|
400 | field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
|
---|
401 | Py_ssize_t *first_idx, FieldNameIterator *rest,
|
---|
402 | AutoNumber *auto_number)
|
---|
403 | {
|
---|
404 | STRINGLIB_CHAR c;
|
---|
405 | STRINGLIB_CHAR *p = ptr;
|
---|
406 | STRINGLIB_CHAR *end = ptr + len;
|
---|
407 | int field_name_is_empty;
|
---|
408 | int using_numeric_index;
|
---|
409 |
|
---|
410 | /* find the part up until the first '.' or '[' */
|
---|
411 | while (p < end) {
|
---|
412 | switch (c = *p++) {
|
---|
413 | case '[':
|
---|
414 | case '.':
|
---|
415 | /* backup so that we this character is available to the
|
---|
416 | "rest" iterator */
|
---|
417 | p--;
|
---|
418 | break;
|
---|
419 | default:
|
---|
420 | continue;
|
---|
421 | }
|
---|
422 | break;
|
---|
423 | }
|
---|
424 |
|
---|
425 | /* set up the return values */
|
---|
426 | SubString_init(first, ptr, p - ptr);
|
---|
427 | FieldNameIterator_init(rest, p, end - p);
|
---|
428 |
|
---|
429 | /* see if "first" is an integer, in which case it's used as an index */
|
---|
430 | *first_idx = get_integer(first);
|
---|
431 | if (*first_idx == -1 && PyErr_Occurred())
|
---|
432 | return 0;
|
---|
433 |
|
---|
434 | field_name_is_empty = first->ptr >= first->end;
|
---|
435 |
|
---|
436 | /* If the field name is omitted or if we have a numeric index
|
---|
437 | specified, then we're doing numeric indexing into args. */
|
---|
438 | using_numeric_index = field_name_is_empty || *first_idx != -1;
|
---|
439 |
|
---|
440 | /* We always get here exactly one time for each field we're
|
---|
441 | processing. And we get here in field order (counting by left
|
---|
442 | braces). So this is the perfect place to handle automatic field
|
---|
443 | numbering if the field name is omitted. */
|
---|
444 |
|
---|
445 | /* Check if we need to do the auto-numbering. It's not needed if
|
---|
446 | we're called from string.Format routines, because it's handled
|
---|
447 | in that class by itself. */
|
---|
448 | if (auto_number) {
|
---|
449 | /* Initialize our auto numbering state if this is the first
|
---|
450 | time we're either auto-numbering or manually numbering. */
|
---|
451 | if (auto_number->an_state == ANS_INIT && using_numeric_index)
|
---|
452 | auto_number->an_state = field_name_is_empty ?
|
---|
453 | ANS_AUTO : ANS_MANUAL;
|
---|
454 |
|
---|
455 | /* Make sure our state is consistent with what we're doing
|
---|
456 | this time through. Only check if we're using a numeric
|
---|
457 | index. */
|
---|
458 | if (using_numeric_index)
|
---|
459 | if (autonumber_state_error(auto_number->an_state,
|
---|
460 | field_name_is_empty))
|
---|
461 | return 0;
|
---|
462 | /* Zero length field means we want to do auto-numbering of the
|
---|
463 | fields. */
|
---|
464 | if (field_name_is_empty)
|
---|
465 | *first_idx = (auto_number->an_field_number)++;
|
---|
466 | }
|
---|
467 |
|
---|
468 | return 1;
|
---|
469 | }
|
---|
470 |
|
---|
471 |
|
---|
472 | /*
|
---|
473 | get_field_object returns the object inside {}, before the
|
---|
474 | format_spec. It handles getindex and getattr lookups and consumes
|
---|
475 | the entire input string.
|
---|
476 | */
|
---|
477 | static PyObject *
|
---|
478 | get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
|
---|
479 | AutoNumber *auto_number)
|
---|
480 | {
|
---|
481 | PyObject *obj = NULL;
|
---|
482 | int ok;
|
---|
483 | int is_attribute;
|
---|
484 | SubString name;
|
---|
485 | SubString first;
|
---|
486 | Py_ssize_t index;
|
---|
487 | FieldNameIterator rest;
|
---|
488 |
|
---|
489 | if (!field_name_split(input->ptr, input->end - input->ptr, &first,
|
---|
490 | &index, &rest, auto_number)) {
|
---|
491 | goto error;
|
---|
492 | }
|
---|
493 |
|
---|
494 | if (index == -1) {
|
---|
495 | /* look up in kwargs */
|
---|
496 | PyObject *key = SubString_new_object(&first);
|
---|
497 | if (key == NULL)
|
---|
498 | goto error;
|
---|
499 | if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
|
---|
500 | PyErr_SetObject(PyExc_KeyError, key);
|
---|
501 | Py_DECREF(key);
|
---|
502 | goto error;
|
---|
503 | }
|
---|
504 | Py_DECREF(key);
|
---|
505 | Py_INCREF(obj);
|
---|
506 | }
|
---|
507 | else {
|
---|
508 | /* look up in args */
|
---|
509 | obj = PySequence_GetItem(args, index);
|
---|
510 | if (obj == NULL)
|
---|
511 | goto error;
|
---|
512 | }
|
---|
513 |
|
---|
514 | /* iterate over the rest of the field_name */
|
---|
515 | while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
|
---|
516 | &name)) == 2) {
|
---|
517 | PyObject *tmp;
|
---|
518 |
|
---|
519 | if (is_attribute)
|
---|
520 | /* getattr lookup "." */
|
---|
521 | tmp = getattr(obj, &name);
|
---|
522 | else
|
---|
523 | /* getitem lookup "[]" */
|
---|
524 | if (index == -1)
|
---|
525 | tmp = getitem_str(obj, &name);
|
---|
526 | else
|
---|
527 | if (PySequence_Check(obj))
|
---|
528 | tmp = getitem_sequence(obj, index);
|
---|
529 | else
|
---|
530 | /* not a sequence */
|
---|
531 | tmp = getitem_idx(obj, index);
|
---|
532 | if (tmp == NULL)
|
---|
533 | goto error;
|
---|
534 |
|
---|
535 | /* assign to obj */
|
---|
536 | Py_DECREF(obj);
|
---|
537 | obj = tmp;
|
---|
538 | }
|
---|
539 | /* end of iterator, this is the non-error case */
|
---|
540 | if (ok == 1)
|
---|
541 | return obj;
|
---|
542 | error:
|
---|
543 | Py_XDECREF(obj);
|
---|
544 | return NULL;
|
---|
545 | }
|
---|
546 |
|
---|
547 | /************************************************************************/
|
---|
548 | /***************** Field rendering functions **************************/
|
---|
549 | /************************************************************************/
|
---|
550 |
|
---|
551 | /*
|
---|
552 | render_field() is the main function in this section. It takes the
|
---|
553 | field object and field specification string generated by
|
---|
554 | get_field_and_spec, and renders the field into the output string.
|
---|
555 |
|
---|
556 | render_field calls fieldobj.__format__(format_spec) method, and
|
---|
557 | appends to the output.
|
---|
558 | */
|
---|
559 | static int
|
---|
560 | render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
|
---|
561 | {
|
---|
562 | int ok = 0;
|
---|
563 | PyObject *result = NULL;
|
---|
564 | PyObject *format_spec_object = NULL;
|
---|
565 | PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;
|
---|
566 | STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
|
---|
567 | format_spec->ptr : NULL;
|
---|
568 | Py_ssize_t format_spec_len = format_spec->ptr ?
|
---|
569 | format_spec->end - format_spec->ptr : 0;
|
---|
570 |
|
---|
571 | /* If we know the type exactly, skip the lookup of __format__ and just
|
---|
572 | call the formatter directly. */
|
---|
573 | #if STRINGLIB_IS_UNICODE
|
---|
574 | if (PyUnicode_CheckExact(fieldobj))
|
---|
575 | formatter = _PyUnicode_FormatAdvanced;
|
---|
576 | /* Unfortunately, there's a problem with checking for int, long,
|
---|
577 | and float here. If we're being included as unicode, their
|
---|
578 | formatters expect string format_spec args. For now, just skip
|
---|
579 | this optimization for unicode. This could be fixed, but it's a
|
---|
580 | hassle. */
|
---|
581 | #else
|
---|
582 | if (PyString_CheckExact(fieldobj))
|
---|
583 | formatter = _PyBytes_FormatAdvanced;
|
---|
584 | else if (PyInt_CheckExact(fieldobj))
|
---|
585 | formatter =_PyInt_FormatAdvanced;
|
---|
586 | else if (PyLong_CheckExact(fieldobj))
|
---|
587 | formatter =_PyLong_FormatAdvanced;
|
---|
588 | else if (PyFloat_CheckExact(fieldobj))
|
---|
589 | formatter = _PyFloat_FormatAdvanced;
|
---|
590 | #endif
|
---|
591 |
|
---|
592 | if (formatter) {
|
---|
593 | /* we know exactly which formatter will be called when __format__ is
|
---|
594 | looked up, so call it directly, instead. */
|
---|
595 | result = formatter(fieldobj, format_spec_start, format_spec_len);
|
---|
596 | }
|
---|
597 | else {
|
---|
598 | /* We need to create an object out of the pointers we have, because
|
---|
599 | __format__ takes a string/unicode object for format_spec. */
|
---|
600 | format_spec_object = STRINGLIB_NEW(format_spec_start,
|
---|
601 | format_spec_len);
|
---|
602 | if (format_spec_object == NULL)
|
---|
603 | goto done;
|
---|
604 |
|
---|
605 | result = PyObject_Format(fieldobj, format_spec_object);
|
---|
606 | }
|
---|
607 | if (result == NULL)
|
---|
608 | goto done;
|
---|
609 |
|
---|
610 | #if PY_VERSION_HEX >= 0x03000000
|
---|
611 | assert(PyUnicode_Check(result));
|
---|
612 | #else
|
---|
613 | assert(PyString_Check(result) || PyUnicode_Check(result));
|
---|
614 |
|
---|
615 | /* Convert result to our type. We could be str, and result could
|
---|
616 | be unicode */
|
---|
617 | {
|
---|
618 | PyObject *tmp = STRINGLIB_TOSTR(result);
|
---|
619 | if (tmp == NULL)
|
---|
620 | goto done;
|
---|
621 | Py_DECREF(result);
|
---|
622 | result = tmp;
|
---|
623 | }
|
---|
624 | #endif
|
---|
625 |
|
---|
626 | ok = output_data(output,
|
---|
627 | STRINGLIB_STR(result), STRINGLIB_LEN(result));
|
---|
628 | done:
|
---|
629 | Py_XDECREF(format_spec_object);
|
---|
630 | Py_XDECREF(result);
|
---|
631 | return ok;
|
---|
632 | }
|
---|
633 |
|
---|
634 | static int
|
---|
635 | parse_field(SubString *str, SubString *field_name, SubString *format_spec,
|
---|
636 | STRINGLIB_CHAR *conversion)
|
---|
637 | {
|
---|
638 | /* Note this function works if the field name is zero length,
|
---|
639 | which is good. Zero length field names are handled later, in
|
---|
640 | field_name_split. */
|
---|
641 |
|
---|
642 | STRINGLIB_CHAR c = 0;
|
---|
643 |
|
---|
644 | /* initialize these, as they may be empty */
|
---|
645 | *conversion = '\0';
|
---|
646 | SubString_init(format_spec, NULL, 0);
|
---|
647 |
|
---|
648 | /* Search for the field name. it's terminated by the end of
|
---|
649 | the string, or a ':' or '!' */
|
---|
650 | field_name->ptr = str->ptr;
|
---|
651 | while (str->ptr < str->end) {
|
---|
652 | switch (c = *(str->ptr++)) {
|
---|
653 | case ':':
|
---|
654 | case '!':
|
---|
655 | break;
|
---|
656 | default:
|
---|
657 | continue;
|
---|
658 | }
|
---|
659 | break;
|
---|
660 | }
|
---|
661 |
|
---|
662 | if (c == '!' || c == ':') {
|
---|
663 | /* we have a format specifier and/or a conversion */
|
---|
664 | /* don't include the last character */
|
---|
665 | field_name->end = str->ptr-1;
|
---|
666 |
|
---|
667 | /* the format specifier is the rest of the string */
|
---|
668 | format_spec->ptr = str->ptr;
|
---|
669 | format_spec->end = str->end;
|
---|
670 |
|
---|
671 | /* see if there's a conversion specifier */
|
---|
672 | if (c == '!') {
|
---|
673 | /* there must be another character present */
|
---|
674 | if (format_spec->ptr >= format_spec->end) {
|
---|
675 | PyErr_SetString(PyExc_ValueError,
|
---|
676 | "end of format while looking for conversion "
|
---|
677 | "specifier");
|
---|
678 | return 0;
|
---|
679 | }
|
---|
680 | *conversion = *(format_spec->ptr++);
|
---|
681 |
|
---|
682 | /* if there is another character, it must be a colon */
|
---|
683 | if (format_spec->ptr < format_spec->end) {
|
---|
684 | c = *(format_spec->ptr++);
|
---|
685 | if (c != ':') {
|
---|
686 | PyErr_SetString(PyExc_ValueError,
|
---|
687 | "expected ':' after format specifier");
|
---|
688 | return 0;
|
---|
689 | }
|
---|
690 | }
|
---|
691 | }
|
---|
692 | }
|
---|
693 | else
|
---|
694 | /* end of string, there's no format_spec or conversion */
|
---|
695 | field_name->end = str->ptr;
|
---|
696 |
|
---|
697 | return 1;
|
---|
698 | }
|
---|
699 |
|
---|
700 | /************************************************************************/
|
---|
701 | /******* Output string allocation and escape-to-markup processing ******/
|
---|
702 | /************************************************************************/
|
---|
703 |
|
---|
704 | /* MarkupIterator breaks the string into pieces of either literal
|
---|
705 | text, or things inside {} that need to be marked up. it is
|
---|
706 | designed to make it easy to wrap a Python iterator around it, for
|
---|
707 | use with the Formatter class */
|
---|
708 |
|
---|
709 | typedef struct {
|
---|
710 | SubString str;
|
---|
711 | } MarkupIterator;
|
---|
712 |
|
---|
713 | static int
|
---|
714 | MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
|
---|
715 | {
|
---|
716 | SubString_init(&self->str, ptr, len);
|
---|
717 | return 1;
|
---|
718 | }
|
---|
719 |
|
---|
720 | /* returns 0 on error, 1 on non-error termination, and 2 if it got a
|
---|
721 | string (or something to be expanded) */
|
---|
722 | static int
|
---|
723 | MarkupIterator_next(MarkupIterator *self, SubString *literal,
|
---|
724 | int *field_present, SubString *field_name,
|
---|
725 | SubString *format_spec, STRINGLIB_CHAR *conversion,
|
---|
726 | int *format_spec_needs_expanding)
|
---|
727 | {
|
---|
728 | int at_end;
|
---|
729 | STRINGLIB_CHAR c = 0;
|
---|
730 | STRINGLIB_CHAR *start;
|
---|
731 | int count;
|
---|
732 | Py_ssize_t len;
|
---|
733 | int markup_follows = 0;
|
---|
734 |
|
---|
735 | /* initialize all of the output variables */
|
---|
736 | SubString_init(literal, NULL, 0);
|
---|
737 | SubString_init(field_name, NULL, 0);
|
---|
738 | SubString_init(format_spec, NULL, 0);
|
---|
739 | *conversion = '\0';
|
---|
740 | *format_spec_needs_expanding = 0;
|
---|
741 | *field_present = 0;
|
---|
742 |
|
---|
743 | /* No more input, end of iterator. This is the normal exit
|
---|
744 | path. */
|
---|
745 | if (self->str.ptr >= self->str.end)
|
---|
746 | return 1;
|
---|
747 |
|
---|
748 | start = self->str.ptr;
|
---|
749 |
|
---|
750 | /* First read any literal text. Read until the end of string, an
|
---|
751 | escaped '{' or '}', or an unescaped '{'. In order to never
|
---|
752 | allocate memory and so I can just pass pointers around, if
|
---|
753 | there's an escaped '{' or '}' then we'll return the literal
|
---|
754 | including the brace, but no format object. The next time
|
---|
755 | through, we'll return the rest of the literal, skipping past
|
---|
756 | the second consecutive brace. */
|
---|
757 | while (self->str.ptr < self->str.end) {
|
---|
758 | switch (c = *(self->str.ptr++)) {
|
---|
759 | case '{':
|
---|
760 | case '}':
|
---|
761 | markup_follows = 1;
|
---|
762 | break;
|
---|
763 | default:
|
---|
764 | continue;
|
---|
765 | }
|
---|
766 | break;
|
---|
767 | }
|
---|
768 |
|
---|
769 | at_end = self->str.ptr >= self->str.end;
|
---|
770 | len = self->str.ptr - start;
|
---|
771 |
|
---|
772 | if ((c == '}') && (at_end || (c != *self->str.ptr))) {
|
---|
773 | PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
|
---|
774 | "in format string");
|
---|
775 | return 0;
|
---|
776 | }
|
---|
777 | if (at_end && c == '{') {
|
---|
778 | PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
|
---|
779 | "in format string");
|
---|
780 | return 0;
|
---|
781 | }
|
---|
782 | if (!at_end) {
|
---|
783 | if (c == *self->str.ptr) {
|
---|
784 | /* escaped } or {, skip it in the input. there is no
|
---|
785 | markup object following us, just this literal text */
|
---|
786 | self->str.ptr++;
|
---|
787 | markup_follows = 0;
|
---|
788 | }
|
---|
789 | else
|
---|
790 | len--;
|
---|
791 | }
|
---|
792 |
|
---|
793 | /* record the literal text */
|
---|
794 | literal->ptr = start;
|
---|
795 | literal->end = start + len;
|
---|
796 |
|
---|
797 | if (!markup_follows)
|
---|
798 | return 2;
|
---|
799 |
|
---|
800 | /* this is markup, find the end of the string by counting nested
|
---|
801 | braces. note that this prohibits escaped braces, so that
|
---|
802 | format_specs cannot have braces in them. */
|
---|
803 | *field_present = 1;
|
---|
804 | count = 1;
|
---|
805 |
|
---|
806 | start = self->str.ptr;
|
---|
807 |
|
---|
808 | /* we know we can't have a zero length string, so don't worry
|
---|
809 | about that case */
|
---|
810 | while (self->str.ptr < self->str.end) {
|
---|
811 | switch (c = *(self->str.ptr++)) {
|
---|
812 | case '{':
|
---|
813 | /* the format spec needs to be recursively expanded.
|
---|
814 | this is an optimization, and not strictly needed */
|
---|
815 | *format_spec_needs_expanding = 1;
|
---|
816 | count++;
|
---|
817 | break;
|
---|
818 | case '}':
|
---|
819 | count--;
|
---|
820 | if (count <= 0) {
|
---|
821 | /* we're done. parse and get out */
|
---|
822 | SubString s;
|
---|
823 |
|
---|
824 | SubString_init(&s, start, self->str.ptr - 1 - start);
|
---|
825 | if (parse_field(&s, field_name, format_spec, conversion) == 0)
|
---|
826 | return 0;
|
---|
827 |
|
---|
828 | /* success */
|
---|
829 | return 2;
|
---|
830 | }
|
---|
831 | break;
|
---|
832 | }
|
---|
833 | }
|
---|
834 |
|
---|
835 | /* end of string while searching for matching '}' */
|
---|
836 | PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
|
---|
837 | return 0;
|
---|
838 | }
|
---|
839 |
|
---|
840 |
|
---|
841 | /* do the !r or !s conversion on obj */
|
---|
842 | static PyObject *
|
---|
843 | do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
|
---|
844 | {
|
---|
845 | /* XXX in pre-3.0, do we need to convert this to unicode, since it
|
---|
846 | might have returned a string? */
|
---|
847 | switch (conversion) {
|
---|
848 | case 'r':
|
---|
849 | return PyObject_Repr(obj);
|
---|
850 | case 's':
|
---|
851 | return STRINGLIB_TOSTR(obj);
|
---|
852 | default:
|
---|
853 | if (conversion > 32 && conversion < 127) {
|
---|
854 | /* It's the ASCII subrange; casting to char is safe
|
---|
855 | (assuming the execution character set is an ASCII
|
---|
856 | superset). */
|
---|
857 | PyErr_Format(PyExc_ValueError,
|
---|
858 | "Unknown conversion specifier %c",
|
---|
859 | (char)conversion);
|
---|
860 | } else
|
---|
861 | PyErr_Format(PyExc_ValueError,
|
---|
862 | "Unknown conversion specifier \\x%x",
|
---|
863 | (unsigned int)conversion);
|
---|
864 | return NULL;
|
---|
865 | }
|
---|
866 | }
|
---|
867 |
|
---|
868 | /* given:
|
---|
869 |
|
---|
870 | {field_name!conversion:format_spec}
|
---|
871 |
|
---|
872 | compute the result and write it to output.
|
---|
873 | format_spec_needs_expanding is an optimization. if it's false,
|
---|
874 | just output the string directly, otherwise recursively expand the
|
---|
875 | format_spec string.
|
---|
876 |
|
---|
877 | field_name is allowed to be zero length, in which case we
|
---|
878 | are doing auto field numbering.
|
---|
879 | */
|
---|
880 |
|
---|
881 | static int
|
---|
882 | output_markup(SubString *field_name, SubString *format_spec,
|
---|
883 | int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
|
---|
884 | OutputString *output, PyObject *args, PyObject *kwargs,
|
---|
885 | int recursion_depth, AutoNumber *auto_number)
|
---|
886 | {
|
---|
887 | PyObject *tmp = NULL;
|
---|
888 | PyObject *fieldobj = NULL;
|
---|
889 | SubString expanded_format_spec;
|
---|
890 | SubString *actual_format_spec;
|
---|
891 | int result = 0;
|
---|
892 |
|
---|
893 | /* convert field_name to an object */
|
---|
894 | fieldobj = get_field_object(field_name, args, kwargs, auto_number);
|
---|
895 | if (fieldobj == NULL)
|
---|
896 | goto done;
|
---|
897 |
|
---|
898 | if (conversion != '\0') {
|
---|
899 | tmp = do_conversion(fieldobj, conversion);
|
---|
900 | if (tmp == NULL)
|
---|
901 | goto done;
|
---|
902 |
|
---|
903 | /* do the assignment, transferring ownership: fieldobj = tmp */
|
---|
904 | Py_DECREF(fieldobj);
|
---|
905 | fieldobj = tmp;
|
---|
906 | tmp = NULL;
|
---|
907 | }
|
---|
908 |
|
---|
909 | /* if needed, recurively compute the format_spec */
|
---|
910 | if (format_spec_needs_expanding) {
|
---|
911 | tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
|
---|
912 | auto_number);
|
---|
913 | if (tmp == NULL)
|
---|
914 | goto done;
|
---|
915 |
|
---|
916 | /* note that in the case we're expanding the format string,
|
---|
917 | tmp must be kept around until after the call to
|
---|
918 | render_field. */
|
---|
919 | SubString_init(&expanded_format_spec,
|
---|
920 | STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
|
---|
921 | actual_format_spec = &expanded_format_spec;
|
---|
922 | }
|
---|
923 | else
|
---|
924 | actual_format_spec = format_spec;
|
---|
925 |
|
---|
926 | if (render_field(fieldobj, actual_format_spec, output) == 0)
|
---|
927 | goto done;
|
---|
928 |
|
---|
929 | result = 1;
|
---|
930 |
|
---|
931 | done:
|
---|
932 | Py_XDECREF(fieldobj);
|
---|
933 | Py_XDECREF(tmp);
|
---|
934 |
|
---|
935 | return result;
|
---|
936 | }
|
---|
937 |
|
---|
938 | /*
|
---|
939 | do_markup is the top-level loop for the format() method. It
|
---|
940 | searches through the format string for escapes to markup codes, and
|
---|
941 | calls other functions to move non-markup text to the output,
|
---|
942 | and to perform the markup to the output.
|
---|
943 | */
|
---|
944 | static int
|
---|
945 | do_markup(SubString *input, PyObject *args, PyObject *kwargs,
|
---|
946 | OutputString *output, int recursion_depth, AutoNumber *auto_number)
|
---|
947 | {
|
---|
948 | MarkupIterator iter;
|
---|
949 | int format_spec_needs_expanding;
|
---|
950 | int result;
|
---|
951 | int field_present;
|
---|
952 | SubString literal;
|
---|
953 | SubString field_name;
|
---|
954 | SubString format_spec;
|
---|
955 | STRINGLIB_CHAR conversion;
|
---|
956 |
|
---|
957 | MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
|
---|
958 | while ((result = MarkupIterator_next(&iter, &literal, &field_present,
|
---|
959 | &field_name, &format_spec,
|
---|
960 | &conversion,
|
---|
961 | &format_spec_needs_expanding)) == 2) {
|
---|
962 | if (!output_data(output, literal.ptr, literal.end - literal.ptr))
|
---|
963 | return 0;
|
---|
964 | if (field_present)
|
---|
965 | if (!output_markup(&field_name, &format_spec,
|
---|
966 | format_spec_needs_expanding, conversion, output,
|
---|
967 | args, kwargs, recursion_depth, auto_number))
|
---|
968 | return 0;
|
---|
969 | }
|
---|
970 | return result;
|
---|
971 | }
|
---|
972 |
|
---|
973 |
|
---|
974 | /*
|
---|
975 | build_string allocates the output string and then
|
---|
976 | calls do_markup to do the heavy lifting.
|
---|
977 | */
|
---|
978 | static PyObject *
|
---|
979 | build_string(SubString *input, PyObject *args, PyObject *kwargs,
|
---|
980 | int recursion_depth, AutoNumber *auto_number)
|
---|
981 | {
|
---|
982 | OutputString output;
|
---|
983 | PyObject *result = NULL;
|
---|
984 | Py_ssize_t count;
|
---|
985 |
|
---|
986 | output.obj = NULL; /* needed so cleanup code always works */
|
---|
987 |
|
---|
988 | /* check the recursion level */
|
---|
989 | if (recursion_depth <= 0) {
|
---|
990 | PyErr_SetString(PyExc_ValueError,
|
---|
991 | "Max string recursion exceeded");
|
---|
992 | goto done;
|
---|
993 | }
|
---|
994 |
|
---|
995 | /* initial size is the length of the format string, plus the size
|
---|
996 | increment. seems like a reasonable default */
|
---|
997 | if (!output_initialize(&output,
|
---|
998 | input->end - input->ptr +
|
---|
999 | INITIAL_SIZE_INCREMENT))
|
---|
1000 | goto done;
|
---|
1001 |
|
---|
1002 | if (!do_markup(input, args, kwargs, &output, recursion_depth,
|
---|
1003 | auto_number)) {
|
---|
1004 | goto done;
|
---|
1005 | }
|
---|
1006 |
|
---|
1007 | count = output.ptr - STRINGLIB_STR(output.obj);
|
---|
1008 | if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
|
---|
1009 | goto done;
|
---|
1010 | }
|
---|
1011 |
|
---|
1012 | /* transfer ownership to result */
|
---|
1013 | result = output.obj;
|
---|
1014 | output.obj = NULL;
|
---|
1015 |
|
---|
1016 | done:
|
---|
1017 | Py_XDECREF(output.obj);
|
---|
1018 | return result;
|
---|
1019 | }
|
---|
1020 |
|
---|
1021 | /************************************************************************/
|
---|
1022 | /*********** main routine ***********************************************/
|
---|
1023 | /************************************************************************/
|
---|
1024 |
|
---|
1025 | /* this is the main entry point */
|
---|
1026 | static PyObject *
|
---|
1027 | do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
|
---|
1028 | {
|
---|
1029 | SubString input;
|
---|
1030 |
|
---|
1031 | /* PEP 3101 says only 2 levels, so that
|
---|
1032 | "{0:{1}}".format('abc', 's') # works
|
---|
1033 | "{0:{1:{2}}}".format('abc', 's', '') # fails
|
---|
1034 | */
|
---|
1035 | int recursion_depth = 2;
|
---|
1036 |
|
---|
1037 | AutoNumber auto_number;
|
---|
1038 |
|
---|
1039 | AutoNumber_Init(&auto_number);
|
---|
1040 | SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
|
---|
1041 | return build_string(&input, args, kwargs, recursion_depth, &auto_number);
|
---|
1042 | }
|
---|
1043 |
|
---|
1044 |
|
---|
1045 |
|
---|
1046 | /************************************************************************/
|
---|
1047 | /*********** formatteriterator ******************************************/
|
---|
1048 | /************************************************************************/
|
---|
1049 |
|
---|
1050 | /* This is used to implement string.Formatter.vparse(). It exists so
|
---|
1051 | Formatter can share code with the built in unicode.format() method.
|
---|
1052 | It's really just a wrapper around MarkupIterator that is callable
|
---|
1053 | from Python. */
|
---|
1054 |
|
---|
1055 | typedef struct {
|
---|
1056 | PyObject_HEAD
|
---|
1057 |
|
---|
1058 | STRINGLIB_OBJECT *str;
|
---|
1059 |
|
---|
1060 | MarkupIterator it_markup;
|
---|
1061 | } formatteriterobject;
|
---|
1062 |
|
---|
1063 | static void
|
---|
1064 | formatteriter_dealloc(formatteriterobject *it)
|
---|
1065 | {
|
---|
1066 | Py_XDECREF(it->str);
|
---|
1067 | PyObject_FREE(it);
|
---|
1068 | }
|
---|
1069 |
|
---|
1070 | /* returns a tuple:
|
---|
1071 | (literal, field_name, format_spec, conversion)
|
---|
1072 |
|
---|
1073 | literal is any literal text to output. might be zero length
|
---|
1074 | field_name is the string before the ':'. might be None
|
---|
1075 | format_spec is the string after the ':'. mibht be None
|
---|
1076 | conversion is either None, or the string after the '!'
|
---|
1077 | */
|
---|
1078 | static PyObject *
|
---|
1079 | formatteriter_next(formatteriterobject *it)
|
---|
1080 | {
|
---|
1081 | SubString literal;
|
---|
1082 | SubString field_name;
|
---|
1083 | SubString format_spec;
|
---|
1084 | STRINGLIB_CHAR conversion;
|
---|
1085 | int format_spec_needs_expanding;
|
---|
1086 | int field_present;
|
---|
1087 | int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
|
---|
1088 | &field_name, &format_spec, &conversion,
|
---|
1089 | &format_spec_needs_expanding);
|
---|
1090 |
|
---|
1091 | /* all of the SubString objects point into it->str, so no
|
---|
1092 | memory management needs to be done on them */
|
---|
1093 | assert(0 <= result && result <= 2);
|
---|
1094 | if (result == 0 || result == 1)
|
---|
1095 | /* if 0, error has already been set, if 1, iterator is empty */
|
---|
1096 | return NULL;
|
---|
1097 | else {
|
---|
1098 | PyObject *literal_str = NULL;
|
---|
1099 | PyObject *field_name_str = NULL;
|
---|
1100 | PyObject *format_spec_str = NULL;
|
---|
1101 | PyObject *conversion_str = NULL;
|
---|
1102 | PyObject *tuple = NULL;
|
---|
1103 |
|
---|
1104 | literal_str = SubString_new_object(&literal);
|
---|
1105 | if (literal_str == NULL)
|
---|
1106 | goto done;
|
---|
1107 |
|
---|
1108 | field_name_str = SubString_new_object(&field_name);
|
---|
1109 | if (field_name_str == NULL)
|
---|
1110 | goto done;
|
---|
1111 |
|
---|
1112 | /* if field_name is non-zero length, return a string for
|
---|
1113 | format_spec (even if zero length), else return None */
|
---|
1114 | format_spec_str = (field_present ?
|
---|
1115 | SubString_new_object_or_empty :
|
---|
1116 | SubString_new_object)(&format_spec);
|
---|
1117 | if (format_spec_str == NULL)
|
---|
1118 | goto done;
|
---|
1119 |
|
---|
1120 | /* if the conversion is not specified, return a None,
|
---|
1121 | otherwise create a one length string with the conversion
|
---|
1122 | character */
|
---|
1123 | if (conversion == '\0') {
|
---|
1124 | conversion_str = Py_None;
|
---|
1125 | Py_INCREF(conversion_str);
|
---|
1126 | }
|
---|
1127 | else
|
---|
1128 | conversion_str = STRINGLIB_NEW(&conversion, 1);
|
---|
1129 | if (conversion_str == NULL)
|
---|
1130 | goto done;
|
---|
1131 |
|
---|
1132 | tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
|
---|
1133 | conversion_str);
|
---|
1134 | done:
|
---|
1135 | Py_XDECREF(literal_str);
|
---|
1136 | Py_XDECREF(field_name_str);
|
---|
1137 | Py_XDECREF(format_spec_str);
|
---|
1138 | Py_XDECREF(conversion_str);
|
---|
1139 | return tuple;
|
---|
1140 | }
|
---|
1141 | }
|
---|
1142 |
|
---|
1143 | static PyMethodDef formatteriter_methods[] = {
|
---|
1144 | {NULL, NULL} /* sentinel */
|
---|
1145 | };
|
---|
1146 |
|
---|
1147 | static PyTypeObject PyFormatterIter_Type = {
|
---|
1148 | PyVarObject_HEAD_INIT(&PyType_Type, 0)
|
---|
1149 | "formatteriterator", /* tp_name */
|
---|
1150 | sizeof(formatteriterobject), /* tp_basicsize */
|
---|
1151 | 0, /* tp_itemsize */
|
---|
1152 | /* methods */
|
---|
1153 | (destructor)formatteriter_dealloc, /* tp_dealloc */
|
---|
1154 | 0, /* tp_print */
|
---|
1155 | 0, /* tp_getattr */
|
---|
1156 | 0, /* tp_setattr */
|
---|
1157 | 0, /* tp_compare */
|
---|
1158 | 0, /* tp_repr */
|
---|
1159 | 0, /* tp_as_number */
|
---|
1160 | 0, /* tp_as_sequence */
|
---|
1161 | 0, /* tp_as_mapping */
|
---|
1162 | 0, /* tp_hash */
|
---|
1163 | 0, /* tp_call */
|
---|
1164 | 0, /* tp_str */
|
---|
1165 | PyObject_GenericGetAttr, /* tp_getattro */
|
---|
1166 | 0, /* tp_setattro */
|
---|
1167 | 0, /* tp_as_buffer */
|
---|
1168 | Py_TPFLAGS_DEFAULT, /* tp_flags */
|
---|
1169 | 0, /* tp_doc */
|
---|
1170 | 0, /* tp_traverse */
|
---|
1171 | 0, /* tp_clear */
|
---|
1172 | 0, /* tp_richcompare */
|
---|
1173 | 0, /* tp_weaklistoffset */
|
---|
1174 | PyObject_SelfIter, /* tp_iter */
|
---|
1175 | (iternextfunc)formatteriter_next, /* tp_iternext */
|
---|
1176 | formatteriter_methods, /* tp_methods */
|
---|
1177 | 0,
|
---|
1178 | };
|
---|
1179 |
|
---|
1180 | /* unicode_formatter_parser is used to implement
|
---|
1181 | string.Formatter.vformat. it parses a string and returns tuples
|
---|
1182 | describing the parsed elements. It's a wrapper around
|
---|
1183 | stringlib/string_format.h's MarkupIterator */
|
---|
1184 | static PyObject *
|
---|
1185 | formatter_parser(STRINGLIB_OBJECT *self)
|
---|
1186 | {
|
---|
1187 | formatteriterobject *it;
|
---|
1188 |
|
---|
1189 | it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
|
---|
1190 | if (it == NULL)
|
---|
1191 | return NULL;
|
---|
1192 |
|
---|
1193 | /* take ownership, give the object to the iterator */
|
---|
1194 | Py_INCREF(self);
|
---|
1195 | it->str = self;
|
---|
1196 |
|
---|
1197 | /* initialize the contained MarkupIterator */
|
---|
1198 | MarkupIterator_init(&it->it_markup,
|
---|
1199 | STRINGLIB_STR(self),
|
---|
1200 | STRINGLIB_LEN(self));
|
---|
1201 |
|
---|
1202 | return (PyObject *)it;
|
---|
1203 | }
|
---|
1204 |
|
---|
1205 |
|
---|
1206 | /************************************************************************/
|
---|
1207 | /*********** fieldnameiterator ******************************************/
|
---|
1208 | /************************************************************************/
|
---|
1209 |
|
---|
1210 |
|
---|
1211 | /* This is used to implement string.Formatter.vparse(). It parses the
|
---|
1212 | field name into attribute and item values. It's a Python-callable
|
---|
1213 | wrapper around FieldNameIterator */
|
---|
1214 |
|
---|
1215 | typedef struct {
|
---|
1216 | PyObject_HEAD
|
---|
1217 |
|
---|
1218 | STRINGLIB_OBJECT *str;
|
---|
1219 |
|
---|
1220 | FieldNameIterator it_field;
|
---|
1221 | } fieldnameiterobject;
|
---|
1222 |
|
---|
1223 | static void
|
---|
1224 | fieldnameiter_dealloc(fieldnameiterobject *it)
|
---|
1225 | {
|
---|
1226 | Py_XDECREF(it->str);
|
---|
1227 | PyObject_FREE(it);
|
---|
1228 | }
|
---|
1229 |
|
---|
1230 | /* returns a tuple:
|
---|
1231 | (is_attr, value)
|
---|
1232 | is_attr is true if we used attribute syntax (e.g., '.foo')
|
---|
1233 | false if we used index syntax (e.g., '[foo]')
|
---|
1234 | value is an integer or string
|
---|
1235 | */
|
---|
1236 | static PyObject *
|
---|
1237 | fieldnameiter_next(fieldnameiterobject *it)
|
---|
1238 | {
|
---|
1239 | int result;
|
---|
1240 | int is_attr;
|
---|
1241 | Py_ssize_t idx;
|
---|
1242 | SubString name;
|
---|
1243 |
|
---|
1244 | result = FieldNameIterator_next(&it->it_field, &is_attr,
|
---|
1245 | &idx, &name);
|
---|
1246 | if (result == 0 || result == 1)
|
---|
1247 | /* if 0, error has already been set, if 1, iterator is empty */
|
---|
1248 | return NULL;
|
---|
1249 | else {
|
---|
1250 | PyObject* result = NULL;
|
---|
1251 | PyObject* is_attr_obj = NULL;
|
---|
1252 | PyObject* obj = NULL;
|
---|
1253 |
|
---|
1254 | is_attr_obj = PyBool_FromLong(is_attr);
|
---|
1255 | if (is_attr_obj == NULL)
|
---|
1256 | goto done;
|
---|
1257 |
|
---|
1258 | /* either an integer or a string */
|
---|
1259 | if (idx != -1)
|
---|
1260 | obj = PyLong_FromSsize_t(idx);
|
---|
1261 | else
|
---|
1262 | obj = SubString_new_object(&name);
|
---|
1263 | if (obj == NULL)
|
---|
1264 | goto done;
|
---|
1265 |
|
---|
1266 | /* return a tuple of values */
|
---|
1267 | result = PyTuple_Pack(2, is_attr_obj, obj);
|
---|
1268 |
|
---|
1269 | done:
|
---|
1270 | Py_XDECREF(is_attr_obj);
|
---|
1271 | Py_XDECREF(obj);
|
---|
1272 | return result;
|
---|
1273 | }
|
---|
1274 | }
|
---|
1275 |
|
---|
1276 | static PyMethodDef fieldnameiter_methods[] = {
|
---|
1277 | {NULL, NULL} /* sentinel */
|
---|
1278 | };
|
---|
1279 |
|
---|
1280 | static PyTypeObject PyFieldNameIter_Type = {
|
---|
1281 | PyVarObject_HEAD_INIT(&PyType_Type, 0)
|
---|
1282 | "fieldnameiterator", /* tp_name */
|
---|
1283 | sizeof(fieldnameiterobject), /* tp_basicsize */
|
---|
1284 | 0, /* tp_itemsize */
|
---|
1285 | /* methods */
|
---|
1286 | (destructor)fieldnameiter_dealloc, /* tp_dealloc */
|
---|
1287 | 0, /* tp_print */
|
---|
1288 | 0, /* tp_getattr */
|
---|
1289 | 0, /* tp_setattr */
|
---|
1290 | 0, /* tp_compare */
|
---|
1291 | 0, /* tp_repr */
|
---|
1292 | 0, /* tp_as_number */
|
---|
1293 | 0, /* tp_as_sequence */
|
---|
1294 | 0, /* tp_as_mapping */
|
---|
1295 | 0, /* tp_hash */
|
---|
1296 | 0, /* tp_call */
|
---|
1297 | 0, /* tp_str */
|
---|
1298 | PyObject_GenericGetAttr, /* tp_getattro */
|
---|
1299 | 0, /* tp_setattro */
|
---|
1300 | 0, /* tp_as_buffer */
|
---|
1301 | Py_TPFLAGS_DEFAULT, /* tp_flags */
|
---|
1302 | 0, /* tp_doc */
|
---|
1303 | 0, /* tp_traverse */
|
---|
1304 | 0, /* tp_clear */
|
---|
1305 | 0, /* tp_richcompare */
|
---|
1306 | 0, /* tp_weaklistoffset */
|
---|
1307 | PyObject_SelfIter, /* tp_iter */
|
---|
1308 | (iternextfunc)fieldnameiter_next, /* tp_iternext */
|
---|
1309 | fieldnameiter_methods, /* tp_methods */
|
---|
1310 | 0};
|
---|
1311 |
|
---|
1312 | /* unicode_formatter_field_name_split is used to implement
|
---|
1313 | string.Formatter.vformat. it takes an PEP 3101 "field name", and
|
---|
1314 | returns a tuple of (first, rest): "first", the part before the
|
---|
1315 | first '.' or '['; and "rest", an iterator for the rest of the field
|
---|
1316 | name. it's a wrapper around stringlib/string_format.h's
|
---|
1317 | field_name_split. The iterator it returns is a
|
---|
1318 | FieldNameIterator */
|
---|
1319 | static PyObject *
|
---|
1320 | formatter_field_name_split(STRINGLIB_OBJECT *self)
|
---|
1321 | {
|
---|
1322 | SubString first;
|
---|
1323 | Py_ssize_t first_idx;
|
---|
1324 | fieldnameiterobject *it;
|
---|
1325 |
|
---|
1326 | PyObject *first_obj = NULL;
|
---|
1327 | PyObject *result = NULL;
|
---|
1328 |
|
---|
1329 | it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
|
---|
1330 | if (it == NULL)
|
---|
1331 | return NULL;
|
---|
1332 |
|
---|
1333 | /* take ownership, give the object to the iterator. this is
|
---|
1334 | just to keep the field_name alive */
|
---|
1335 | Py_INCREF(self);
|
---|
1336 | it->str = self;
|
---|
1337 |
|
---|
1338 | /* Pass in auto_number = NULL. We'll return an empty string for
|
---|
1339 | first_obj in that case. */
|
---|
1340 | if (!field_name_split(STRINGLIB_STR(self),
|
---|
1341 | STRINGLIB_LEN(self),
|
---|
1342 | &first, &first_idx, &it->it_field, NULL))
|
---|
1343 | goto done;
|
---|
1344 |
|
---|
1345 | /* first becomes an integer, if possible; else a string */
|
---|
1346 | if (first_idx != -1)
|
---|
1347 | first_obj = PyLong_FromSsize_t(first_idx);
|
---|
1348 | else
|
---|
1349 | /* convert "first" into a string object */
|
---|
1350 | first_obj = SubString_new_object(&first);
|
---|
1351 | if (first_obj == NULL)
|
---|
1352 | goto done;
|
---|
1353 |
|
---|
1354 | /* return a tuple of values */
|
---|
1355 | result = PyTuple_Pack(2, first_obj, it);
|
---|
1356 |
|
---|
1357 | done:
|
---|
1358 | Py_XDECREF(it);
|
---|
1359 | Py_XDECREF(first_obj);
|
---|
1360 | return result;
|
---|
1361 | }
|
---|