Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

string_format.h

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 41.4 KB

Line
1	/*
2	string_format.h -- implementation of string.format().
3
4	It uses the Objects/stringlib conventions, so that it can be
5	compiled for both unicode and string objects.
6	*/
7
8
9	/* Defines for Python 2.6 compatibility */
10	#if PY_VERSION_HEX < 0x03000000
11	#define PyLong_FromSsize_t _PyLong_FromSsize_t
12	#endif
13
14	/* Defines for more efficiently reallocating the string buffer */
15	#define INITIAL_SIZE_INCREMENT 100
16	#define SIZE_MULTIPLIER 2
17	#define MAX_SIZE_INCREMENT 3200
18
19
20	/************************************************************************/
21	/********* Global data structures and forward declarations *******/
22	/************************************************************************/
23
24	/*
25	A SubString consists of the characters between two string or
26	unicode pointers.
27	*/
28	typedef struct {
29	STRINGLIB_CHAR *ptr;
30	STRINGLIB_CHAR *end;
31	} SubString;
32
33
34	typedef enum {
35	ANS_INIT,
36	ANS_AUTO,
37	ANS_MANUAL
38	} AutoNumberState; /* Keep track if we're auto-numbering fields */
39
40	/* Keeps track of our auto-numbering state, and which number field we're on */
41	typedef struct {
42	AutoNumberState an_state;
43	int an_field_number;
44	} AutoNumber;
45
46
47	/* forward declaration for recursion */
48	static PyObject *
49	build_string(SubString input, PyObject args, PyObject *kwargs,
50	int recursion_depth, AutoNumber *auto_number);
51
52
53
54	/************************************************************************/
55	/************************ Utility functions **********************/
56	/************************************************************************/
57
58	static void
59	AutoNumber_Init(AutoNumber *auto_number)
60	{
61	auto_number->an_state = ANS_INIT;
62	auto_number->an_field_number = 0;
63	}
64
65	/* fill in a SubString from a pointer and length */
66	Py_LOCAL_INLINE(void)
67	SubString_init(SubString str, STRINGLIB_CHAR p, Py_ssize_t len)
68	{
69	str->ptr = p;
70	if (p == NULL)
71	str->end = NULL;
72	else
73	str->end = str->ptr + len;
74	}
75
76	/* return a new string. if str->ptr is NULL, return None */
77	Py_LOCAL_INLINE(PyObject *)
78	SubString_new_object(SubString *str)
79	{
80	if (str->ptr == NULL) {
81	Py_INCREF(Py_None);
82	return Py_None;
83	}
84	return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
85	}
86
87	/* return a new string. if str->ptr is NULL, return None */
88	Py_LOCAL_INLINE(PyObject *)
89	SubString_new_object_or_empty(SubString *str)
90	{
91	if (str->ptr == NULL) {
92	return STRINGLIB_NEW(NULL, 0);
93	}
94	return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
95	}
96
97	/* Return 1 if an error has been detected switching between automatic
98	field numbering and manual field specification, else return 0. Set
99	ValueError on error. */
100	static int
101	autonumber_state_error(AutoNumberState state, int field_name_is_empty)
102	{
103	if (state == ANS_MANUAL) {
104	if (field_name_is_empty) {
105	PyErr_SetString(PyExc_ValueError, "cannot switch from "
106	"manual field specification to "
107	"automatic field numbering");
108	return 1;
109	}
110	}
111	else {
112	if (!field_name_is_empty) {
113	PyErr_SetString(PyExc_ValueError, "cannot switch from "
114	"automatic field numbering to "
115	"manual field specification");
116	return 1;
117	}
118	}
119	return 0;
120	}
121
122
123	/************************************************************************/
124	/********* Output string management functions **************/
125	/************************************************************************/
126
127	typedef struct {
128	STRINGLIB_CHAR *ptr;
129	STRINGLIB_CHAR *end;
130	PyObject *obj;
131	Py_ssize_t size_increment;
132	} OutputString;
133
134	/* initialize an OutputString object, reserving size characters */
135	static int
136	output_initialize(OutputString *output, Py_ssize_t size)
137	{
138	output->obj = STRINGLIB_NEW(NULL, size);
139	if (output->obj == NULL)
140	return 0;
141
142	output->ptr = STRINGLIB_STR(output->obj);
143	output->end = STRINGLIB_LEN(output->obj) + output->ptr;
144	output->size_increment = INITIAL_SIZE_INCREMENT;
145
146	return 1;
147	}
148
149	/*
150	output_extend reallocates the output string buffer.
151	It returns a status: 0 for a failed reallocation,
152	1 for success.
153	*/
154
155	static int
156	output_extend(OutputString *output, Py_ssize_t count)
157	{
158	STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
159	Py_ssize_t curlen = output->ptr - startptr;
160	Py_ssize_t maxlen = curlen + count + output->size_increment;
161
162	if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
163	return 0;
164	startptr = STRINGLIB_STR(output->obj);
165	output->ptr = startptr + curlen;
166	output->end = startptr + maxlen;
167	if (output->size_increment < MAX_SIZE_INCREMENT)
168	output->size_increment *= SIZE_MULTIPLIER;
169	return 1;
170	}
171
172	/*
173	output_data dumps characters into our output string
174	buffer.
175
176	In some cases, it has to reallocate the string.
177
178	It returns a status: 0 for a failed reallocation,
179	1 for success.
180	*/
181	static int
182	output_data(OutputString output, const STRINGLIB_CHAR s, Py_ssize_t count)
183	{
184	if ((count > output->end - output->ptr) && !output_extend(output, count))
185	return 0;
186	memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
187	output->ptr += count;
188	return 1;
189	}
190
191	/************************************************************************/
192	/********* Format string parsing -- integers and identifiers *******/
193	/************************************************************************/
194
195	static Py_ssize_t
196	get_integer(const SubString *str)
197	{
198	Py_ssize_t accumulator = 0;
199	Py_ssize_t digitval;
200	STRINGLIB_CHAR *p;
201
202	/* empty string is an error */
203	if (str->ptr >= str->end)
204	return -1;
205
206	for (p = str->ptr; p < str->end; p++) {
207	digitval = STRINGLIB_TODECIMAL(*p);
208	if (digitval < 0)
209	return -1;
210	/*
211	Detect possible overflow before it happens:
212
213	accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
214	accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
215	*/
216	if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
217	PyErr_Format(PyExc_ValueError,
218	"Too many decimal digits in format string");
219	return -1;
220	}
221	accumulator = accumulator * 10 + digitval;
222	}
223	return accumulator;
224	}
225
226	/************************************************************************/
227	/****** Functions to get field objects and specification strings ****/
228	/************************************************************************/
229
230	/* do the equivalent of obj.name */
231	static PyObject *
232	getattr(PyObject obj, SubString name)
233	{
234	PyObject *newobj;
235	PyObject *str = SubString_new_object(name);
236	if (str == NULL)
237	return NULL;
238	newobj = PyObject_GetAttr(obj, str);
239	Py_DECREF(str);
240	return newobj;
241	}
242
243	/* do the equivalent of obj[idx], where obj is a sequence */
244	static PyObject *
245	getitem_sequence(PyObject *obj, Py_ssize_t idx)
246	{
247	return PySequence_GetItem(obj, idx);
248	}
249
250	/* do the equivalent of obj[idx], where obj is not a sequence */
251	static PyObject *
252	getitem_idx(PyObject *obj, Py_ssize_t idx)
253	{
254	PyObject *newobj;
255	PyObject *idx_obj = PyLong_FromSsize_t(idx);
256	if (idx_obj == NULL)
257	return NULL;
258	newobj = PyObject_GetItem(obj, idx_obj);
259	Py_DECREF(idx_obj);
260	return newobj;
261	}
262
263	/* do the equivalent of obj[name] */
264	static PyObject *
265	getitem_str(PyObject obj, SubString name)
266	{
267	PyObject *newobj;
268	PyObject *str = SubString_new_object(name);
269	if (str == NULL)
270	return NULL;
271	newobj = PyObject_GetItem(obj, str);
272	Py_DECREF(str);
273	return newobj;
274	}
275
276	typedef struct {
277	/* the entire string we're parsing. we assume that someone else
278	is managing its lifetime, and that it will exist for the
279	lifetime of the iterator. can be empty */
280	SubString str;
281
282	/* pointer to where we are inside field_name */
283	STRINGLIB_CHAR *ptr;
284	} FieldNameIterator;
285
286
287	static int
288	FieldNameIterator_init(FieldNameIterator self, STRINGLIB_CHAR ptr,
289	Py_ssize_t len)
290	{
291	SubString_init(&self->str, ptr, len);
292	self->ptr = self->str.ptr;
293	return 1;
294	}
295
296	static int
297	_FieldNameIterator_attr(FieldNameIterator self, SubString name)
298	{
299	STRINGLIB_CHAR c;
300
301	name->ptr = self->ptr;
302
303	/* return everything until '.' or '[' */
304	while (self->ptr < self->str.end) {
305	switch (c = *self->ptr++) {
306	case '[':
307	case '.':
308	/* backup so that we this character will be seen next time */
309	self->ptr--;
310	break;
311	default:
312	continue;
313	}
314	break;
315	}
316	/* end of string is okay */
317	name->end = self->ptr;
318	return 1;
319	}
320
321	static int
322	_FieldNameIterator_item(FieldNameIterator self, SubString name)
323	{
324	int bracket_seen = 0;
325	STRINGLIB_CHAR c;
326
327	name->ptr = self->ptr;
328
329	/* return everything until ']' */
330	while (self->ptr < self->str.end) {
331	switch (c = *self->ptr++) {
332	case ']':
333	bracket_seen = 1;
334	break;
335	default:
336	continue;
337	}
338	break;
339	}
340	/* make sure we ended with a ']' */
341	if (!bracket_seen) {
342	PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
343	return 0;
344	}
345
346	/* end of string is okay */
347	/* don't include the ']' */
348	name->end = self->ptr-1;
349	return 1;
350	}
351
352	/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
353	static int
354	FieldNameIterator_next(FieldNameIterator self, int is_attribute,
355	Py_ssize_t name_idx, SubString name)
356	{
357	/* check at end of input */
358	if (self->ptr >= self->str.end)
359	return 1;
360
361	switch (*self->ptr++) {
362	case '.':
363	*is_attribute = 1;
364	if (_FieldNameIterator_attr(self, name) == 0)
365	return 0;
366	*name_idx = -1;
367	break;
368	case '[':
369	*is_attribute = 0;
370	if (_FieldNameIterator_item(self, name) == 0)
371	return 0;
372	*name_idx = get_integer(name);
373	if (*name_idx == -1 && PyErr_Occurred())
374	return 0;
375	break;
376	default:
377	/* Invalid character follows ']' */
378	PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
379	"follow ']' in format field specifier");
380	return 0;
381	}
382
383	/* empty string is an error */
384	if (name->ptr == name->end) {
385	PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
386	return 0;
387	}
388
389	return 2;
390	}
391
392
393	/* input: field_name
394	output: 'first' points to the part before the first '[' or '.'
395	'first_idx' is -1 if 'first' is not an integer, otherwise
396	it's the value of first converted to an integer
397	'rest' is an iterator to return the rest
398	*/
399	static int
400	field_name_split(STRINGLIB_CHAR ptr, Py_ssize_t len, SubString first,
401	Py_ssize_t first_idx, FieldNameIterator rest,
402	AutoNumber *auto_number)
403	{
404	STRINGLIB_CHAR c;
405	STRINGLIB_CHAR *p = ptr;
406	STRINGLIB_CHAR *end = ptr + len;
407	int field_name_is_empty;
408	int using_numeric_index;
409
410	/* find the part up until the first '.' or '[' */
411	while (p < end) {
412	switch (c = *p++) {
413	case '[':
414	case '.':
415	/* backup so that we this character is available to the
416	"rest" iterator */
417	p--;
418	break;
419	default:
420	continue;
421	}
422	break;
423	}
424
425	/* set up the return values */
426	SubString_init(first, ptr, p - ptr);
427	FieldNameIterator_init(rest, p, end - p);
428
429	/* see if "first" is an integer, in which case it's used as an index */
430	*first_idx = get_integer(first);
431	if (*first_idx == -1 && PyErr_Occurred())
432	return 0;
433
434	field_name_is_empty = first->ptr >= first->end;
435
436	/* If the field name is omitted or if we have a numeric index
437	specified, then we're doing numeric indexing into args. */
438	using_numeric_index = field_name_is_empty \|\| *first_idx != -1;
439
440	/* We always get here exactly one time for each field we're
441	processing. And we get here in field order (counting by left
442	braces). So this is the perfect place to handle automatic field
443	numbering if the field name is omitted. */
444
445	/* Check if we need to do the auto-numbering. It's not needed if
446	we're called from string.Format routines, because it's handled
447	in that class by itself. */
448	if (auto_number) {
449	/* Initialize our auto numbering state if this is the first
450	time we're either auto-numbering or manually numbering. */
451	if (auto_number->an_state == ANS_INIT && using_numeric_index)
452	auto_number->an_state = field_name_is_empty ?
453	ANS_AUTO : ANS_MANUAL;
454
455	/* Make sure our state is consistent with what we're doing
456	this time through. Only check if we're using a numeric
457	index. */
458	if (using_numeric_index)
459	if (autonumber_state_error(auto_number->an_state,
460	field_name_is_empty))
461	return 0;
462	/* Zero length field means we want to do auto-numbering of the
463	fields. */
464	if (field_name_is_empty)
465	*first_idx = (auto_number->an_field_number)++;
466	}
467
468	return 1;
469	}
470
471
472	/*
473	get_field_object returns the object inside {}, before the
474	format_spec. It handles getindex and getattr lookups and consumes
475	the entire input string.
476	*/
477	static PyObject *
478	get_field_object(SubString input, PyObject args, PyObject *kwargs,
479	AutoNumber *auto_number)
480	{
481	PyObject *obj = NULL;
482	int ok;
483	int is_attribute;
484	SubString name;
485	SubString first;
486	Py_ssize_t index;
487	FieldNameIterator rest;
488
489	if (!field_name_split(input->ptr, input->end - input->ptr, &first,
490	&index, &rest, auto_number)) {
491	goto error;
492	}
493
494	if (index == -1) {
495	/* look up in kwargs */
496	PyObject *key = SubString_new_object(&first);
497	if (key == NULL)
498	goto error;
499	if ((kwargs == NULL) \|\| (obj = PyDict_GetItem(kwargs, key)) == NULL) {
500	PyErr_SetObject(PyExc_KeyError, key);
501	Py_DECREF(key);
502	goto error;
503	}
504	Py_DECREF(key);
505	Py_INCREF(obj);
506	}
507	else {
508	/* look up in args */
509	obj = PySequence_GetItem(args, index);
510	if (obj == NULL)
511	goto error;
512	}
513
514	/* iterate over the rest of the field_name */
515	while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
516	&name)) == 2) {
517	PyObject *tmp;
518
519	if (is_attribute)
520	/* getattr lookup "." */
521	tmp = getattr(obj, &name);
522	else
523	/* getitem lookup "[]" */
524	if (index == -1)
525	tmp = getitem_str(obj, &name);
526	else
527	if (PySequence_Check(obj))
528	tmp = getitem_sequence(obj, index);
529	else
530	/* not a sequence */
531	tmp = getitem_idx(obj, index);
532	if (tmp == NULL)
533	goto error;
534
535	/* assign to obj */
536	Py_DECREF(obj);
537	obj = tmp;
538	}
539	/* end of iterator, this is the non-error case */
540	if (ok == 1)
541	return obj;
542	error:
543	Py_XDECREF(obj);
544	return NULL;
545	}
546
547	/************************************************************************/
548	/*************** Field rendering functions ************************/
549	/************************************************************************/
550
551	/*
552	render_field() is the main function in this section. It takes the
553	field object and field specification string generated by
554	get_field_and_spec, and renders the field into the output string.
555
556	render_field calls fieldobj.__format__(format_spec) method, and
557	appends to the output.
558	*/
559	static int
560	render_field(PyObject fieldobj, SubString format_spec, OutputString *output)
561	{
562	int ok = 0;
563	PyObject *result = NULL;
564	PyObject *format_spec_object = NULL;
565	PyObject (formatter)(PyObject , STRINGLIB_CHAR , Py_ssize_t) = NULL;
566	STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
567	format_spec->ptr : NULL;
568	Py_ssize_t format_spec_len = format_spec->ptr ?
569	format_spec->end - format_spec->ptr : 0;
570
571	/* If we know the type exactly, skip the lookup of __format__ and just
572	call the formatter directly. */
573	#if STRINGLIB_IS_UNICODE
574	if (PyUnicode_CheckExact(fieldobj))
575	formatter = _PyUnicode_FormatAdvanced;
576	/* Unfortunately, there's a problem with checking for int, long,
577	and float here. If we're being included as unicode, their
578	formatters expect string format_spec args. For now, just skip
579	this optimization for unicode. This could be fixed, but it's a
580	hassle. */
581	#else
582	if (PyString_CheckExact(fieldobj))
583	formatter = _PyBytes_FormatAdvanced;
584	else if (PyInt_CheckExact(fieldobj))
585	formatter =_PyInt_FormatAdvanced;
586	else if (PyLong_CheckExact(fieldobj))
587	formatter =_PyLong_FormatAdvanced;
588	else if (PyFloat_CheckExact(fieldobj))
589	formatter = _PyFloat_FormatAdvanced;
590	#endif
591
592	if (formatter) {
593	/* we know exactly which formatter will be called when __format__ is
594	looked up, so call it directly, instead. */
595	result = formatter(fieldobj, format_spec_start, format_spec_len);
596	}
597	else {
598	/* We need to create an object out of the pointers we have, because
599	__format__ takes a string/unicode object for format_spec. */
600	format_spec_object = STRINGLIB_NEW(format_spec_start,
601	format_spec_len);
602	if (format_spec_object == NULL)
603	goto done;
604
605	result = PyObject_Format(fieldobj, format_spec_object);
606	}
607	if (result == NULL)
608	goto done;
609
610	#if PY_VERSION_HEX >= 0x03000000
611	assert(PyUnicode_Check(result));
612	#else
613	assert(PyString_Check(result) \|\| PyUnicode_Check(result));
614
615	/* Convert result to our type. We could be str, and result could
616	be unicode */
617	{
618	PyObject *tmp = STRINGLIB_TOSTR(result);
619	if (tmp == NULL)
620	goto done;
621	Py_DECREF(result);
622	result = tmp;
623	}
624	#endif
625
626	ok = output_data(output,
627	STRINGLIB_STR(result), STRINGLIB_LEN(result));
628	done:
629	Py_XDECREF(format_spec_object);
630	Py_XDECREF(result);
631	return ok;
632	}
633
634	static int
635	parse_field(SubString str, SubString field_name, SubString *format_spec,
636	STRINGLIB_CHAR *conversion)
637	{
638	/* Note this function works if the field name is zero length,
639	which is good. Zero length field names are handled later, in
640	field_name_split. */
641
642	STRINGLIB_CHAR c = 0;
643
644	/* initialize these, as they may be empty */
645	*conversion = '\0';
646	SubString_init(format_spec, NULL, 0);
647
648	/* Search for the field name. it's terminated by the end of
649	the string, or a ':' or '!' */
650	field_name->ptr = str->ptr;
651	while (str->ptr < str->end) {
652	switch (c = *(str->ptr++)) {
653	case ':':
654	case '!':
655	break;
656	default:
657	continue;
658	}
659	break;
660	}
661
662	if (c == '!' \|\| c == ':') {
663	/* we have a format specifier and/or a conversion */
664	/* don't include the last character */
665	field_name->end = str->ptr-1;
666
667	/* the format specifier is the rest of the string */
668	format_spec->ptr = str->ptr;
669	format_spec->end = str->end;
670
671	/* see if there's a conversion specifier */
672	if (c == '!') {
673	/* there must be another character present */
674	if (format_spec->ptr >= format_spec->end) {
675	PyErr_SetString(PyExc_ValueError,
676	"end of format while looking for conversion "
677	"specifier");
678	return 0;
679	}
680	conversion = (format_spec->ptr++);
681
682	/* if there is another character, it must be a colon */
683	if (format_spec->ptr < format_spec->end) {
684	c = *(format_spec->ptr++);
685	if (c != ':') {
686	PyErr_SetString(PyExc_ValueError,
687	"expected ':' after format specifier");
688	return 0;
689	}
690	}
691	}
692	}
693	else
694	/* end of string, there's no format_spec or conversion */
695	field_name->end = str->ptr;
696
697	return 1;
698	}
699
700	/************************************************************************/
701	/***** Output string allocation and escape-to-markup processing ****/
702	/************************************************************************/
703
704	/* MarkupIterator breaks the string into pieces of either literal
705	text, or things inside {} that need to be marked up. it is
706	designed to make it easy to wrap a Python iterator around it, for
707	use with the Formatter class */
708
709	typedef struct {
710	SubString str;
711	} MarkupIterator;
712
713	static int
714	MarkupIterator_init(MarkupIterator self, STRINGLIB_CHAR ptr, Py_ssize_t len)
715	{
716	SubString_init(&self->str, ptr, len);
717	return 1;
718	}
719
720	/* returns 0 on error, 1 on non-error termination, and 2 if it got a
721	string (or something to be expanded) */
722	static int
723	MarkupIterator_next(MarkupIterator self, SubString literal,
724	int field_present, SubString field_name,
725	SubString format_spec, STRINGLIB_CHAR conversion,
726	int *format_spec_needs_expanding)
727	{
728	int at_end;
729	STRINGLIB_CHAR c = 0;
730	STRINGLIB_CHAR *start;
731	int count;
732	Py_ssize_t len;
733	int markup_follows = 0;
734
735	/* initialize all of the output variables */
736	SubString_init(literal, NULL, 0);
737	SubString_init(field_name, NULL, 0);
738	SubString_init(format_spec, NULL, 0);
739	*conversion = '\0';
740	*format_spec_needs_expanding = 0;
741	*field_present = 0;
742
743	/* No more input, end of iterator. This is the normal exit
744	path. */
745	if (self->str.ptr >= self->str.end)
746	return 1;
747
748	start = self->str.ptr;
749
750	/* First read any literal text. Read until the end of string, an
751	escaped '{' or '}', or an unescaped '{'. In order to never
752	allocate memory and so I can just pass pointers around, if
753	there's an escaped '{' or '}' then we'll return the literal
754	including the brace, but no format object. The next time
755	through, we'll return the rest of the literal, skipping past
756	the second consecutive brace. */
757	while (self->str.ptr < self->str.end) {
758	switch (c = *(self->str.ptr++)) {
759	case '{':
760	case '}':
761	markup_follows = 1;
762	break;
763	default:
764	continue;
765	}
766	break;
767	}
768
769	at_end = self->str.ptr >= self->str.end;
770	len = self->str.ptr - start;
771
772	if ((c == '}') && (at_end \|\| (c != *self->str.ptr))) {
773	PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
774	"in format string");
775	return 0;
776	}
777	if (at_end && c == '{') {
778	PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
779	"in format string");
780	return 0;
781	}
782	if (!at_end) {
783	if (c == *self->str.ptr) {
784	/* escaped } or {, skip it in the input. there is no
785	markup object following us, just this literal text */
786	self->str.ptr++;
787	markup_follows = 0;
788	}
789	else
790	len--;
791	}
792
793	/* record the literal text */
794	literal->ptr = start;
795	literal->end = start + len;
796
797	if (!markup_follows)
798	return 2;
799
800	/* this is markup, find the end of the string by counting nested
801	braces. note that this prohibits escaped braces, so that
802	format_specs cannot have braces in them. */
803	*field_present = 1;
804	count = 1;
805
806	start = self->str.ptr;
807
808	/* we know we can't have a zero length string, so don't worry
809	about that case */
810	while (self->str.ptr < self->str.end) {
811	switch (c = *(self->str.ptr++)) {
812	case '{':
813	/* the format spec needs to be recursively expanded.
814	this is an optimization, and not strictly needed */
815	*format_spec_needs_expanding = 1;
816	count++;
817	break;
818	case '}':
819	count--;
820	if (count <= 0) {
821	/* we're done. parse and get out */
822	SubString s;
823
824	SubString_init(&s, start, self->str.ptr - 1 - start);
825	if (parse_field(&s, field_name, format_spec, conversion) == 0)
826	return 0;
827
828	/* success */
829	return 2;
830	}
831	break;
832	}
833	}
834
835	/* end of string while searching for matching '}' */
836	PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
837	return 0;
838	}
839
840
841	/* do the !r or !s conversion on obj */
842	static PyObject *
843	do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
844	{
845	/* XXX in pre-3.0, do we need to convert this to unicode, since it
846	might have returned a string? */
847	switch (conversion) {
848	case 'r':
849	return PyObject_Repr(obj);
850	case 's':
851	return STRINGLIB_TOSTR(obj);
852	default:
853	if (conversion > 32 && conversion < 127) {
854	/* It's the ASCII subrange; casting to char is safe
855	(assuming the execution character set is an ASCII
856	superset). */
857	PyErr_Format(PyExc_ValueError,
858	"Unknown conversion specifier %c",
859	(char)conversion);
860	} else
861	PyErr_Format(PyExc_ValueError,
862	"Unknown conversion specifier \\x%x",
863	(unsigned int)conversion);
864	return NULL;
865	}
866	}
867
868	/* given:
869
870	{field_name!conversion:format_spec}
871
872	compute the result and write it to output.
873	format_spec_needs_expanding is an optimization. if it's false,
874	just output the string directly, otherwise recursively expand the
875	format_spec string.
876
877	field_name is allowed to be zero length, in which case we
878	are doing auto field numbering.
879	*/
880
881	static int
882	output_markup(SubString field_name, SubString format_spec,
883	int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
884	OutputString output, PyObject args, PyObject *kwargs,
885	int recursion_depth, AutoNumber *auto_number)
886	{
887	PyObject *tmp = NULL;
888	PyObject *fieldobj = NULL;
889	SubString expanded_format_spec;
890	SubString *actual_format_spec;
891	int result = 0;
892
893	/* convert field_name to an object */
894	fieldobj = get_field_object(field_name, args, kwargs, auto_number);
895	if (fieldobj == NULL)
896	goto done;
897
898	if (conversion != '\0') {
899	tmp = do_conversion(fieldobj, conversion);
900	if (tmp == NULL)
901	goto done;
902
903	/* do the assignment, transferring ownership: fieldobj = tmp */
904	Py_DECREF(fieldobj);
905	fieldobj = tmp;
906	tmp = NULL;
907	}
908
909	/* if needed, recurively compute the format_spec */
910	if (format_spec_needs_expanding) {
911	tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
912	auto_number);
913	if (tmp == NULL)
914	goto done;
915
916	/* note that in the case we're expanding the format string,
917	tmp must be kept around until after the call to
918	render_field. */
919	SubString_init(&expanded_format_spec,
920	STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
921	actual_format_spec = &expanded_format_spec;
922	}
923	else
924	actual_format_spec = format_spec;
925
926	if (render_field(fieldobj, actual_format_spec, output) == 0)
927	goto done;
928
929	result = 1;
930
931	done:
932	Py_XDECREF(fieldobj);
933	Py_XDECREF(tmp);
934
935	return result;
936	}
937
938	/*
939	do_markup is the top-level loop for the format() method. It
940	searches through the format string for escapes to markup codes, and
941	calls other functions to move non-markup text to the output,
942	and to perform the markup to the output.
943	*/
944	static int
945	do_markup(SubString input, PyObject args, PyObject *kwargs,
946	OutputString output, int recursion_depth, AutoNumber auto_number)
947	{
948	MarkupIterator iter;
949	int format_spec_needs_expanding;
950	int result;
951	int field_present;
952	SubString literal;
953	SubString field_name;
954	SubString format_spec;
955	STRINGLIB_CHAR conversion;
956
957	MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
958	while ((result = MarkupIterator_next(&iter, &literal, &field_present,
959	&field_name, &format_spec,
960	&conversion,
961	&format_spec_needs_expanding)) == 2) {
962	if (!output_data(output, literal.ptr, literal.end - literal.ptr))
963	return 0;
964	if (field_present)
965	if (!output_markup(&field_name, &format_spec,
966	format_spec_needs_expanding, conversion, output,
967	args, kwargs, recursion_depth, auto_number))
968	return 0;
969	}
970	return result;
971	}
972
973
974	/*
975	build_string allocates the output string and then
976	calls do_markup to do the heavy lifting.
977	*/
978	static PyObject *
979	build_string(SubString input, PyObject args, PyObject *kwargs,
980	int recursion_depth, AutoNumber *auto_number)
981	{
982	OutputString output;
983	PyObject *result = NULL;
984	Py_ssize_t count;
985
986	output.obj = NULL; /* needed so cleanup code always works */
987
988	/* check the recursion level */
989	if (recursion_depth <= 0) {
990	PyErr_SetString(PyExc_ValueError,
991	"Max string recursion exceeded");
992	goto done;
993	}
994
995	/* initial size is the length of the format string, plus the size
996	increment. seems like a reasonable default */
997	if (!output_initialize(&output,
998	input->end - input->ptr +
999	INITIAL_SIZE_INCREMENT))
1000	goto done;
1001
1002	if (!do_markup(input, args, kwargs, &output, recursion_depth,
1003	auto_number)) {
1004	goto done;
1005	}
1006
1007	count = output.ptr - STRINGLIB_STR(output.obj);
1008	if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
1009	goto done;
1010	}
1011
1012	/* transfer ownership to result */
1013	result = output.obj;
1014	output.obj = NULL;
1015
1016	done:
1017	Py_XDECREF(output.obj);
1018	return result;
1019	}
1020
1021	/************************************************************************/
1022	/********* main routine *********************************************/
1023	/************************************************************************/
1024
1025	/* this is the main entry point */
1026	static PyObject *
1027	do_string_format(PyObject self, PyObject args, PyObject *kwargs)
1028	{
1029	SubString input;
1030
1031	/* PEP 3101 says only 2 levels, so that
1032	"{0:{1}}".format('abc', 's') # works
1033	"{0:{1:{2}}}".format('abc', 's', '') # fails
1034	*/
1035	int recursion_depth = 2;
1036
1037	AutoNumber auto_number;
1038
1039	AutoNumber_Init(&auto_number);
1040	SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
1041	return build_string(&input, args, kwargs, recursion_depth, &auto_number);
1042	}
1043
1044
1045
1046	/************************************************************************/
1047	/********* formatteriterator ****************************************/
1048	/************************************************************************/
1049
1050	/* This is used to implement string.Formatter.vparse(). It exists so
1051	Formatter can share code with the built in unicode.format() method.
1052	It's really just a wrapper around MarkupIterator that is callable
1053	from Python. */
1054
1055	typedef struct {
1056	PyObject_HEAD
1057
1058	STRINGLIB_OBJECT *str;
1059
1060	MarkupIterator it_markup;
1061	} formatteriterobject;
1062
1063	static void
1064	formatteriter_dealloc(formatteriterobject *it)
1065	{
1066	Py_XDECREF(it->str);
1067	PyObject_FREE(it);
1068	}
1069
1070	/* returns a tuple:
1071	(literal, field_name, format_spec, conversion)
1072
1073	literal is any literal text to output. might be zero length
1074	field_name is the string before the ':'. might be None
1075	format_spec is the string after the ':'. mibht be None
1076	conversion is either None, or the string after the '!'
1077	*/
1078	static PyObject *
1079	formatteriter_next(formatteriterobject *it)
1080	{
1081	SubString literal;
1082	SubString field_name;
1083	SubString format_spec;
1084	STRINGLIB_CHAR conversion;
1085	int format_spec_needs_expanding;
1086	int field_present;
1087	int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1088	&field_name, &format_spec, &conversion,
1089	&format_spec_needs_expanding);
1090
1091	/* all of the SubString objects point into it->str, so no
1092	memory management needs to be done on them */
1093	assert(0 <= result && result <= 2);
1094	if (result == 0 \|\| result == 1)
1095	/* if 0, error has already been set, if 1, iterator is empty */
1096	return NULL;
1097	else {
1098	PyObject *literal_str = NULL;
1099	PyObject *field_name_str = NULL;
1100	PyObject *format_spec_str = NULL;
1101	PyObject *conversion_str = NULL;
1102	PyObject *tuple = NULL;
1103
1104	literal_str = SubString_new_object(&literal);
1105	if (literal_str == NULL)
1106	goto done;
1107
1108	field_name_str = SubString_new_object(&field_name);
1109	if (field_name_str == NULL)
1110	goto done;
1111
1112	/* if field_name is non-zero length, return a string for
1113	format_spec (even if zero length), else return None */
1114	format_spec_str = (field_present ?
1115	SubString_new_object_or_empty :
1116	SubString_new_object)(&format_spec);
1117	if (format_spec_str == NULL)
1118	goto done;
1119
1120	/* if the conversion is not specified, return a None,
1121	otherwise create a one length string with the conversion
1122	character */
1123	if (conversion == '\0') {
1124	conversion_str = Py_None;
1125	Py_INCREF(conversion_str);
1126	}
1127	else
1128	conversion_str = STRINGLIB_NEW(&conversion, 1);
1129	if (conversion_str == NULL)
1130	goto done;
1131
1132	tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1133	conversion_str);
1134	done:
1135	Py_XDECREF(literal_str);
1136	Py_XDECREF(field_name_str);
1137	Py_XDECREF(format_spec_str);
1138	Py_XDECREF(conversion_str);
1139	return tuple;
1140	}
1141	}
1142
1143	static PyMethodDef formatteriter_methods[] = {
1144	{NULL, NULL} /* sentinel */
1145	};
1146
1147	static PyTypeObject PyFormatterIter_Type = {
1148	PyVarObject_HEAD_INIT(&PyType_Type, 0)
1149	"formatteriterator", /* tp_name */
1150	sizeof(formatteriterobject), /* tp_basicsize */
1151	0, /* tp_itemsize */
1152	/* methods */
1153	(destructor)formatteriter_dealloc, /* tp_dealloc */
1154	0, /* tp_print */
1155	0, /* tp_getattr */
1156	0, /* tp_setattr */
1157	0, /* tp_compare */
1158	0, /* tp_repr */
1159	0, /* tp_as_number */
1160	0, /* tp_as_sequence */
1161	0, /* tp_as_mapping */
1162	0, /* tp_hash */
1163	0, /* tp_call */
1164	0, /* tp_str */
1165	PyObject_GenericGetAttr, /* tp_getattro */
1166	0, /* tp_setattro */
1167	0, /* tp_as_buffer */
1168	Py_TPFLAGS_DEFAULT, /* tp_flags */
1169	0, /* tp_doc */
1170	0, /* tp_traverse */
1171	0, /* tp_clear */
1172	0, /* tp_richcompare */
1173	0, /* tp_weaklistoffset */
1174	PyObject_SelfIter, /* tp_iter */
1175	(iternextfunc)formatteriter_next, /* tp_iternext */
1176	formatteriter_methods, /* tp_methods */
1177	0,
1178	};
1179
1180	/* unicode_formatter_parser is used to implement
1181	string.Formatter.vformat. it parses a string and returns tuples
1182	describing the parsed elements. It's a wrapper around
1183	stringlib/string_format.h's MarkupIterator */
1184	static PyObject *
1185	formatter_parser(STRINGLIB_OBJECT *self)
1186	{
1187	formatteriterobject *it;
1188
1189	it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1190	if (it == NULL)
1191	return NULL;
1192
1193	/* take ownership, give the object to the iterator */
1194	Py_INCREF(self);
1195	it->str = self;
1196
1197	/* initialize the contained MarkupIterator */
1198	MarkupIterator_init(&it->it_markup,
1199	STRINGLIB_STR(self),
1200	STRINGLIB_LEN(self));
1201
1202	return (PyObject *)it;
1203	}
1204
1205
1206	/************************************************************************/
1207	/********* fieldnameiterator ****************************************/
1208	/************************************************************************/
1209
1210
1211	/* This is used to implement string.Formatter.vparse(). It parses the
1212	field name into attribute and item values. It's a Python-callable
1213	wrapper around FieldNameIterator */
1214
1215	typedef struct {
1216	PyObject_HEAD
1217
1218	STRINGLIB_OBJECT *str;
1219
1220	FieldNameIterator it_field;
1221	} fieldnameiterobject;
1222
1223	static void
1224	fieldnameiter_dealloc(fieldnameiterobject *it)
1225	{
1226	Py_XDECREF(it->str);
1227	PyObject_FREE(it);
1228	}
1229
1230	/* returns a tuple:
1231	(is_attr, value)
1232	is_attr is true if we used attribute syntax (e.g., '.foo')
1233	false if we used index syntax (e.g., '[foo]')
1234	value is an integer or string
1235	*/
1236	static PyObject *
1237	fieldnameiter_next(fieldnameiterobject *it)
1238	{
1239	int result;
1240	int is_attr;
1241	Py_ssize_t idx;
1242	SubString name;
1243
1244	result = FieldNameIterator_next(&it->it_field, &is_attr,
1245	&idx, &name);
1246	if (result == 0 \|\| result == 1)
1247	/* if 0, error has already been set, if 1, iterator is empty */
1248	return NULL;
1249	else {
1250	PyObject* result = NULL;
1251	PyObject* is_attr_obj = NULL;
1252	PyObject* obj = NULL;
1253
1254	is_attr_obj = PyBool_FromLong(is_attr);
1255	if (is_attr_obj == NULL)
1256	goto done;
1257
1258	/* either an integer or a string */
1259	if (idx != -1)
1260	obj = PyLong_FromSsize_t(idx);
1261	else
1262	obj = SubString_new_object(&name);
1263	if (obj == NULL)
1264	goto done;
1265
1266	/* return a tuple of values */
1267	result = PyTuple_Pack(2, is_attr_obj, obj);
1268
1269	done:
1270	Py_XDECREF(is_attr_obj);
1271	Py_XDECREF(obj);
1272	return result;
1273	}
1274	}
1275
1276	static PyMethodDef fieldnameiter_methods[] = {
1277	{NULL, NULL} /* sentinel */
1278	};
1279
1280	static PyTypeObject PyFieldNameIter_Type = {
1281	PyVarObject_HEAD_INIT(&PyType_Type, 0)
1282	"fieldnameiterator", /* tp_name */
1283	sizeof(fieldnameiterobject), /* tp_basicsize */
1284	0, /* tp_itemsize */
1285	/* methods */
1286	(destructor)fieldnameiter_dealloc, /* tp_dealloc */
1287	0, /* tp_print */
1288	0, /* tp_getattr */
1289	0, /* tp_setattr */
1290	0, /* tp_compare */
1291	0, /* tp_repr */
1292	0, /* tp_as_number */
1293	0, /* tp_as_sequence */
1294	0, /* tp_as_mapping */
1295	0, /* tp_hash */
1296	0, /* tp_call */
1297	0, /* tp_str */
1298	PyObject_GenericGetAttr, /* tp_getattro */
1299	0, /* tp_setattro */
1300	0, /* tp_as_buffer */
1301	Py_TPFLAGS_DEFAULT, /* tp_flags */
1302	0, /* tp_doc */
1303	0, /* tp_traverse */
1304	0, /* tp_clear */
1305	0, /* tp_richcompare */
1306	0, /* tp_weaklistoffset */
1307	PyObject_SelfIter, /* tp_iter */
1308	(iternextfunc)fieldnameiter_next, /* tp_iternext */
1309	fieldnameiter_methods, /* tp_methods */
1310	0};
1311
1312	/* unicode_formatter_field_name_split is used to implement
1313	string.Formatter.vformat. it takes an PEP 3101 "field name", and
1314	returns a tuple of (first, rest): "first", the part before the
1315	first '.' or '['; and "rest", an iterator for the rest of the field
1316	name. it's a wrapper around stringlib/string_format.h's
1317	field_name_split. The iterator it returns is a
1318	FieldNameIterator */
1319	static PyObject *
1320	formatter_field_name_split(STRINGLIB_OBJECT *self)
1321	{
1322	SubString first;
1323	Py_ssize_t first_idx;
1324	fieldnameiterobject *it;
1325
1326	PyObject *first_obj = NULL;
1327	PyObject *result = NULL;
1328
1329	it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1330	if (it == NULL)
1331	return NULL;
1332
1333	/* take ownership, give the object to the iterator. this is
1334	just to keep the field_name alive */
1335	Py_INCREF(self);
1336	it->str = self;
1337
1338	/* Pass in auto_number = NULL. We'll return an empty string for
1339	first_obj in that case. */
1340	if (!field_name_split(STRINGLIB_STR(self),
1341	STRINGLIB_LEN(self),
1342	&first, &first_idx, &it->it_field, NULL))
1343	goto done;
1344
1345	/* first becomes an integer, if possible; else a string */
1346	if (first_idx != -1)
1347	first_obj = PyLong_FromSsize_t(first_idx);
1348	else
1349	/* convert "first" into a string object */
1350	first_obj = SubString_new_object(&first);
1351	if (first_obj == NULL)
1352	goto done;
1353
1354	/* return a tuple of values */
1355	result = PyTuple_Pack(2, first_obj, it);
1356
1357	done:
1358	Py_XDECREF(it);
1359	Py_XDECREF(first_obj);
1360	return result;
1361	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Objects/stringlib/string_format.h

Download in other formats: