Context Navigation

stringobject.c@ 391

Last change on this file since 391 was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 141.9 KB

Line
1	/* String (str/bytes) object implementation */
2
3	#define PY_SSIZE_T_CLEAN
4
5	#include "Python.h"
6	#include <ctype.h>
7	#include <stddef.h>
8
9	#ifdef COUNT_ALLOCS
10	Py_ssize_t null_strings, one_strings;
11	#endif
12
13	static PyStringObject *characters[UCHAR_MAX + 1];
14	static PyStringObject *nullstring;
15
16	/* This dictionary holds all interned strings. Note that references to
17	strings in this dictionary are not counted in the string's ob_refcnt.
18	When the interned string reaches a refcnt of 0 the string deallocation
19	function will delete the reference from this dictionary.
20
21	Another way to look at this is that to say that the actual reference
22	count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23	*/
24	static PyObject *interned;
25
26	/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27	for a string of length n should request PyStringObject_SIZE + n bytes.
28
29	Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30	3 bytes per string allocation on a typical system.
31	*/
32	#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
34	/*
35	For PyString_FromString(), the parameter `str' points to a null-terminated
36	string containing exactly `size' bytes.
37
38	For PyString_FromStringAndSize(), the parameter the parameter `str' is
39	either NULL or else points to a string containing at least `size' bytes.
40	For PyString_FromStringAndSize(), the string in the `str' parameter does
41	not have to be null-terminated. (Therefore it is safe to construct a
42	substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43	If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
44	bytes (setting the last byte to the null terminating character) and you can
45	fill in the data yourself. If `str' is non-NULL then the resulting
46	PyString object must be treated as immutable and you must not fill in nor
47	alter the data yourself, since the strings may be shared.
48
49	The PyObject member `op->ob_size', which denotes the number of "extra
50	items" in a variable-size object, will contain the number of bytes
51	allocated for string data, not counting the null terminating character.
52	It is therefore equal to the `size' parameter (for
53	PyString_FromStringAndSize()) or the length of the string in the `str'
54	parameter (for PyString_FromString()).
55	*/
56	PyObject *
57	PyString_FromStringAndSize(const char *str, Py_ssize_t size)
58	{
59	register PyStringObject *op;
60	if (size < 0) {
61	PyErr_SetString(PyExc_SystemError,
62	"Negative size passed to PyString_FromStringAndSize");
63	return NULL;
64	}
65	if (size == 0 && (op = nullstring) != NULL) {
66	#ifdef COUNT_ALLOCS
67	null_strings++;
68	#endif
69	Py_INCREF(op);
70	return (PyObject *)op;
71	}
72	if (size == 1 && str != NULL &&
73	(op = characters[*str & UCHAR_MAX]) != NULL)
74	{
75	#ifdef COUNT_ALLOCS
76	one_strings++;
77	#endif
78	Py_INCREF(op);
79	return (PyObject *)op;
80	}
81
82	if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83	PyErr_SetString(PyExc_OverflowError, "string is too large");
84	return NULL;
85	}
86
87	/* Inline PyObject_NewVar */
88	op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89	if (op == NULL)
90	return PyErr_NoMemory();
91	PyObject_INIT_VAR(op, &PyString_Type, size);
92	op->ob_shash = -1;
93	op->ob_sstate = SSTATE_NOT_INTERNED;
94	if (str != NULL)
95	Py_MEMCPY(op->ob_sval, str, size);
96	op->ob_sval[size] = '\0';
97	/* share short strings */
98	if (size == 0) {
99	PyObject t = (PyObject )op;
100	PyString_InternInPlace(&t);
101	op = (PyStringObject *)t;
102	nullstring = op;
103	Py_INCREF(op);
104	} else if (size == 1 && str != NULL) {
105	PyObject t = (PyObject )op;
106	PyString_InternInPlace(&t);
107	op = (PyStringObject *)t;
108	characters[*str & UCHAR_MAX] = op;
109	Py_INCREF(op);
110	}
111	return (PyObject *) op;
112	}
113
114	PyObject *
115	PyString_FromString(const char *str)
116	{
117	register size_t size;
118	register PyStringObject *op;
119
120	assert(str != NULL);
121	size = strlen(str);
122	if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123	PyErr_SetString(PyExc_OverflowError,
124	"string is too long for a Python string");
125	return NULL;
126	}
127	if (size == 0 && (op = nullstring) != NULL) {
128	#ifdef COUNT_ALLOCS
129	null_strings++;
130	#endif
131	Py_INCREF(op);
132	return (PyObject *)op;
133	}
134	if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
135	#ifdef COUNT_ALLOCS
136	one_strings++;
137	#endif
138	Py_INCREF(op);
139	return (PyObject *)op;
140	}
141
142	/* Inline PyObject_NewVar */
143	op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144	if (op == NULL)
145	return PyErr_NoMemory();
146	PyObject_INIT_VAR(op, &PyString_Type, size);
147	op->ob_shash = -1;
148	op->ob_sstate = SSTATE_NOT_INTERNED;
149	Py_MEMCPY(op->ob_sval, str, size+1);
150	/* share short strings */
151	if (size == 0) {
152	PyObject t = (PyObject )op;
153	PyString_InternInPlace(&t);
154	op = (PyStringObject *)t;
155	nullstring = op;
156	Py_INCREF(op);
157	} else if (size == 1) {
158	PyObject t = (PyObject )op;
159	PyString_InternInPlace(&t);
160	op = (PyStringObject *)t;
161	characters[*str & UCHAR_MAX] = op;
162	Py_INCREF(op);
163	}
164	return (PyObject *) op;
165	}
166
167	PyObject *
168	PyString_FromFormatV(const char *format, va_list vargs)
169	{
170	va_list count;
171	Py_ssize_t n = 0;
172	const char* f;
173	char *s;
174	PyObject* string;
175
176	#ifdef VA_LIST_IS_ARRAY
177	Py_MEMCPY(count, vargs, sizeof(va_list));
178	#else
179	#ifdef __va_copy
180	__va_copy(count, vargs);
181	#else
182	count = vargs;
183	#endif
184	#endif
185	/* step 1: figure out how large a buffer we need */
186	for (f = format; *f; f++) {
187	if (*f == '%') {
188	#ifdef HAVE_LONG_LONG
189	int longlongflag = 0;
190	#endif
191	const char* p = f;
192	while (++f && f != '%' && !isalpha(Py_CHARMASK(*f)))
193	;
194
195	/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196	* they don't affect the amount of space we reserve.
197	*/
198	if (*f == 'l') {
199	if (f[1] == 'd' \|\| f[1] == 'u') {
200	++f;
201	}
202	#ifdef HAVE_LONG_LONG
203	else if (f[1] == 'l' &&
204	(f[2] == 'd' \|\| f[2] == 'u')) {
205	longlongflag = 1;
206	f += 2;
207	}
208	#endif
209	}
210	else if (*f == 'z' && (f[1] == 'd' \|\| f[1] == 'u')) {
211	++f;
212	}
213
214	switch (*f) {
215	case 'c':
216	(void)va_arg(count, int);
217	/* fall through... */
218	case '%':
219	n++;
220	break;
221	case 'd': case 'u': case 'i': case 'x':
222	(void) va_arg(count, int);
223	#ifdef HAVE_LONG_LONG
224	/* Need at most
225	ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226	plus 1 for the sign. 53/22 is an upper
227	bound for log10(256). */
228	if (longlongflag)
229	n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230	else
231	#endif
232	/* 20 bytes is enough to hold a 64-bit
233	integer. Decimal takes the most
234	space. This isn't enough for
235	octal. */
236	n += 20;
237
238	break;
239	case 's':
240	s = va_arg(count, char*);
241	n += strlen(s);
242	break;
243	case 'p':
244	(void) va_arg(count, int);
245	/* maximum 64-bit pointer representation:
246	* 0xffffffffffffffff
247	* so 19 characters is enough.
248	* XXX I count 18 -- what's the extra for?
249	*/
250	n += 19;
251	break;
252	default:
253	/* if we stumble upon an unknown
254	formatting code, copy the rest of
255	the format string to the output
256	string. (we cannot just skip the
257	code, since there's no way to know
258	what's in the argument list) */
259	n += strlen(p);
260	goto expand;
261	}
262	} else
263	n++;
264	}
265	expand:
266	/* step 2: fill the buffer */
267	/* Since we've analyzed how much space we need for the worst case,
268	use sprintf directly instead of the slower PyOS_snprintf. */
269	string = PyString_FromStringAndSize(NULL, n);
270	if (!string)
271	return NULL;
272
273	s = PyString_AsString(string);
274
275	for (f = format; *f; f++) {
276	if (*f == '%') {
277	const char* p = f++;
278	Py_ssize_t i;
279	int longflag = 0;
280	#ifdef HAVE_LONG_LONG
281	int longlongflag = 0;
282	#endif
283	int size_tflag = 0;
284	/* parse the width.precision part (we're only
285	interested in the precision value, if any) */
286	n = 0;
287	while (isdigit(Py_CHARMASK(*f)))
288	n = (n10) + f++ - '0';
289	if (*f == '.') {
290	f++;
291	n = 0;
292	while (isdigit(Py_CHARMASK(*f)))
293	n = (n10) + f++ - '0';
294	}
295	while (f && f != '%' && !isalpha(Py_CHARMASK(*f)))
296	f++;
297	/* Handle %ld, %lu, %lld and %llu. */
298	if (*f == 'l') {
299	if (f[1] == 'd' \|\| f[1] == 'u') {
300	longflag = 1;
301	++f;
302	}
303	#ifdef HAVE_LONG_LONG
304	else if (f[1] == 'l' &&
305	(f[2] == 'd' \|\| f[2] == 'u')) {
306	longlongflag = 1;
307	f += 2;
308	}
309	#endif
310	}
311	/* handle the size_t flag. */
312	else if (*f == 'z' && (f[1] == 'd' \|\| f[1] == 'u')) {
313	size_tflag = 1;
314	++f;
315	}
316
317	switch (*f) {
318	case 'c':
319	*s++ = va_arg(vargs, int);
320	break;
321	case 'd':
322	if (longflag)
323	sprintf(s, "%ld", va_arg(vargs, long));
324	#ifdef HAVE_LONG_LONG
325	else if (longlongflag)
326	sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327	va_arg(vargs, PY_LONG_LONG));
328	#endif
329	else if (size_tflag)
330	sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331	va_arg(vargs, Py_ssize_t));
332	else
333	sprintf(s, "%d", va_arg(vargs, int));
334	s += strlen(s);
335	break;
336	case 'u':
337	if (longflag)
338	sprintf(s, "%lu",
339	va_arg(vargs, unsigned long));
340	#ifdef HAVE_LONG_LONG
341	else if (longlongflag)
342	sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343	va_arg(vargs, PY_LONG_LONG));
344	#endif
345	else if (size_tflag)
346	sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347	va_arg(vargs, size_t));
348	else
349	sprintf(s, "%u",
350	va_arg(vargs, unsigned int));
351	s += strlen(s);
352	break;
353	case 'i':
354	sprintf(s, "%i", va_arg(vargs, int));
355	s += strlen(s);
356	break;
357	case 'x':
358	sprintf(s, "%x", va_arg(vargs, int));
359	s += strlen(s);
360	break;
361	case 's':
362	p = va_arg(vargs, char*);
363	i = strlen(p);
364	if (n > 0 && i > n)
365	i = n;
366	Py_MEMCPY(s, p, i);
367	s += i;
368	break;
369	case 'p':
370	sprintf(s, "%p", va_arg(vargs, void*));
371	/* %p is ill-defined: ensure leading 0x. */
372	if (s[1] == 'X')
373	s[1] = 'x';
374	else if (s[1] != 'x') {
375	memmove(s+2, s, strlen(s)+1);
376	s[0] = '0';
377	s[1] = 'x';
378	}
379	s += strlen(s);
380	break;
381	case '%':
382	*s++ = '%';
383	break;
384	default:
385	strcpy(s, p);
386	s += strlen(s);
387	goto end;
388	}
389	} else
390	s++ = f;
391	}
392
393	end:
394	if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395	return NULL;
396	return string;
397	}
398
399	PyObject *
400	PyString_FromFormat(const char *format, ...)
401	{
402	PyObject* ret;
403	va_list vargs;
404
405	#ifdef HAVE_STDARG_PROTOTYPES
406	va_start(vargs, format);
407	#else
408	va_start(vargs);
409	#endif
410	ret = PyString_FromFormatV(format, vargs);
411	va_end(vargs);
412	return ret;
413	}
414
415
416	PyObject PyString_Decode(const char s,
417	Py_ssize_t size,
418	const char *encoding,
419	const char *errors)
420	{
421	PyObject v, str;
422
423	str = PyString_FromStringAndSize(s, size);
424	if (str == NULL)
425	return NULL;
426	v = PyString_AsDecodedString(str, encoding, errors);
427	Py_DECREF(str);
428	return v;
429	}
430
431	PyObject PyString_AsDecodedObject(PyObject str,
432	const char *encoding,
433	const char *errors)
434	{
435	PyObject *v;
436
437	if (!PyString_Check(str)) {
438	PyErr_BadArgument();
439	goto onError;
440	}
441
442	if (encoding == NULL) {
443	#ifdef Py_USING_UNICODE
444	encoding = PyUnicode_GetDefaultEncoding();
445	#else
446	PyErr_SetString(PyExc_ValueError, "no encoding specified");
447	goto onError;
448	#endif
449	}
450
451	/* Decode via the codec registry */
452	v = PyCodec_Decode(str, encoding, errors);
453	if (v == NULL)
454	goto onError;
455
456	return v;
457
458	onError:
459	return NULL;
460	}
461
462	PyObject PyString_AsDecodedString(PyObject str,
463	const char *encoding,
464	const char *errors)
465	{
466	PyObject *v;
467
468	v = PyString_AsDecodedObject(str, encoding, errors);
469	if (v == NULL)
470	goto onError;
471
472	#ifdef Py_USING_UNICODE
473	/* Convert Unicode to a string using the default encoding */
474	if (PyUnicode_Check(v)) {
475	PyObject *temp = v;
476	v = PyUnicode_AsEncodedString(v, NULL, NULL);
477	Py_DECREF(temp);
478	if (v == NULL)
479	goto onError;
480	}
481	#endif
482	if (!PyString_Check(v)) {
483	PyErr_Format(PyExc_TypeError,
484	"decoder did not return a string object (type=%.400s)",
485	Py_TYPE(v)->tp_name);
486	Py_DECREF(v);
487	goto onError;
488	}
489
490	return v;
491
492	onError:
493	return NULL;
494	}
495
496	PyObject PyString_Encode(const char s,
497	Py_ssize_t size,
498	const char *encoding,
499	const char *errors)
500	{
501	PyObject v, str;
502
503	str = PyString_FromStringAndSize(s, size);
504	if (str == NULL)
505	return NULL;
506	v = PyString_AsEncodedString(str, encoding, errors);
507	Py_DECREF(str);
508	return v;
509	}
510
511	PyObject PyString_AsEncodedObject(PyObject str,
512	const char *encoding,
513	const char *errors)
514	{
515	PyObject *v;
516
517	if (!PyString_Check(str)) {
518	PyErr_BadArgument();
519	goto onError;
520	}
521
522	if (encoding == NULL) {
523	#ifdef Py_USING_UNICODE
524	encoding = PyUnicode_GetDefaultEncoding();
525	#else
526	PyErr_SetString(PyExc_ValueError, "no encoding specified");
527	goto onError;
528	#endif
529	}
530
531	/* Encode via the codec registry */
532	v = PyCodec_Encode(str, encoding, errors);
533	if (v == NULL)
534	goto onError;
535
536	return v;
537
538	onError:
539	return NULL;
540	}
541
542	PyObject PyString_AsEncodedString(PyObject str,
543	const char *encoding,
544	const char *errors)
545	{
546	PyObject *v;
547
548	v = PyString_AsEncodedObject(str, encoding, errors);
549	if (v == NULL)
550	goto onError;
551
552	#ifdef Py_USING_UNICODE
553	/* Convert Unicode to a string using the default encoding */
554	if (PyUnicode_Check(v)) {
555	PyObject *temp = v;
556	v = PyUnicode_AsEncodedString(v, NULL, NULL);
557	Py_DECREF(temp);
558	if (v == NULL)
559	goto onError;
560	}
561	#endif
562	if (!PyString_Check(v)) {
563	PyErr_Format(PyExc_TypeError,
564	"encoder did not return a string object (type=%.400s)",
565	Py_TYPE(v)->tp_name);
566	Py_DECREF(v);
567	goto onError;
568	}
569
570	return v;
571
572	onError:
573	return NULL;
574	}
575
576	static void
577	string_dealloc(PyObject *op)
578	{
579	switch (PyString_CHECK_INTERNED(op)) {
580	case SSTATE_NOT_INTERNED:
581	break;
582
583	case SSTATE_INTERNED_MORTAL:
584	/* revive dead object temporarily for DelItem */
585	Py_REFCNT(op) = 3;
586	if (PyDict_DelItem(interned, op) != 0)
587	Py_FatalError(
588	"deletion of interned string failed");
589	break;
590
591	case SSTATE_INTERNED_IMMORTAL:
592	Py_FatalError("Immortal interned string died.");
593
594	default:
595	Py_FatalError("Inconsistent interned string state.");
596	}
597	Py_TYPE(op)->tp_free(op);
598	}
599
600	/* Unescape a backslash-escaped string. If unicode is non-zero,
601	the string is a u-literal. If recode_encoding is non-zero,
602	the string is UTF-8 encoded and should be re-encoded in the
603	specified encoding. */
604
605	PyObject PyString_DecodeEscape(const char s,
606	Py_ssize_t len,
607	const char *errors,
608	Py_ssize_t unicode,
609	const char *recode_encoding)
610	{
611	int c;
612	char p, buf;
613	const char *end;
614	PyObject *v;
615	Py_ssize_t newlen = recode_encoding ? 4*len:len;
616	v = PyString_FromStringAndSize((char *)NULL, newlen);
617	if (v == NULL)
618	return NULL;
619	p = buf = PyString_AsString(v);
620	end = s + len;
621	while (s < end) {
622	if (*s != '\\') {
623	non_esc:
624	#ifdef Py_USING_UNICODE
625	if (recode_encoding && (*s & 0x80)) {
626	PyObject u, w;
627	char *r;
628	const char* t;
629	Py_ssize_t rn;
630	t = s;
631	/* Decode non-ASCII bytes as UTF-8. */
632	while (t < end && (*t & 0x80)) t++;
633	u = PyUnicode_DecodeUTF8(s, t - s, errors);
634	if(!u) goto failed;
635
636	/* Recode them in target encoding. */
637	w = PyUnicode_AsEncodedString(
638	u, recode_encoding, errors);
639	Py_DECREF(u);
640	if (!w) goto failed;
641
642	/* Append bytes to output buffer. */
643	assert(PyString_Check(w));
644	r = PyString_AS_STRING(w);
645	rn = PyString_GET_SIZE(w);
646	Py_MEMCPY(p, r, rn);
647	p += rn;
648	Py_DECREF(w);
649	s = t;
650	} else {
651	p++ = s++;
652	}
653	#else
654	p++ = s++;
655	#endif
656	continue;
657	}
658	s++;
659	if (s==end) {
660	PyErr_SetString(PyExc_ValueError,
661	"Trailing \\ in string");
662	goto failed;
663	}
664	switch (*s++) {
665	/* XXX This assumes ASCII! */
666	case '\n': break;
667	case '\\': *p++ = '\\'; break;
668	case '\'': *p++ = '\''; break;
669	case '\"': *p++ = '\"'; break;
670	case 'b': *p++ = '\b'; break;
671	case 'f': p++ = '\014'; break; / FF */
672	case 't': *p++ = '\t'; break;
673	case 'n': *p++ = '\n'; break;
674	case 'r': *p++ = '\r'; break;
675	case 'v': p++ = '\013'; break; / VT */
676	case 'a': p++ = '\007'; break; / BEL, not classic C */
677	case '0': case '1': case '2': case '3':
678	case '4': case '5': case '6': case '7':
679	c = s[-1] - '0';
680	if (s < end && '0' <= s && s <= '7') {
681	c = (c<<3) + *s++ - '0';
682	if (s < end && '0' <= s && s <= '7')
683	c = (c<<3) + *s++ - '0';
684	}
685	*p++ = c;
686	break;
687	case 'x':
688	if (s+1 < end &&
689	isxdigit(Py_CHARMASK(s[0])) &&
690	isxdigit(Py_CHARMASK(s[1])))
691	{
692	unsigned int x = 0;
693	c = Py_CHARMASK(*s);
694	s++;
695	if (isdigit(c))
696	x = c - '0';
697	else if (islower(c))
698	x = 10 + c - 'a';
699	else
700	x = 10 + c - 'A';
701	x = x << 4;
702	c = Py_CHARMASK(*s);
703	s++;
704	if (isdigit(c))
705	x += c - '0';
706	else if (islower(c))
707	x += 10 + c - 'a';
708	else
709	x += 10 + c - 'A';
710	*p++ = x;
711	break;
712	}
713	if (!errors \|\| strcmp(errors, "strict") == 0) {
714	PyErr_SetString(PyExc_ValueError,
715	"invalid \\x escape");
716	goto failed;
717	}
718	if (strcmp(errors, "replace") == 0) {
719	*p++ = '?';
720	} else if (strcmp(errors, "ignore") == 0)
721	/* do nothing */;
722	else {
723	PyErr_Format(PyExc_ValueError,
724	"decoding error; "
725	"unknown error handling code: %.400s",
726	errors);
727	goto failed;
728	}
729	/* skip \x */
730	if (s < end && isxdigit(Py_CHARMASK(s[0])))
731	s++; /* and a hexdigit */
732	break;
733	#ifndef Py_USING_UNICODE
734	case 'u':
735	case 'U':
736	case 'N':
737	if (unicode) {
738	PyErr_SetString(PyExc_ValueError,
739	"Unicode escapes not legal "
740	"when Unicode disabled");
741	goto failed;
742	}
743	#endif
744	default:
745	*p++ = '\\';
746	s--;
747	goto non_esc; /* an arbitrary number of unescaped
748	UTF-8 bytes may follow. */
749	}
750	}
751	if (p-buf < newlen && _PyString_Resize(&v, p - buf))
752	goto failed;
753	return v;
754	failed:
755	Py_DECREF(v);
756	return NULL;
757	}
758
759	/* -------------------------------------------------------------------- */
760	/* object api */
761
762	static Py_ssize_t
763	string_getsize(register PyObject *op)
764	{
765	char *s;
766	Py_ssize_t len;
767	if (PyString_AsStringAndSize(op, &s, &len))
768	return -1;
769	return len;
770	}
771
772	static /const/ char *
773	string_getbuffer(register PyObject *op)
774	{
775	char *s;
776	Py_ssize_t len;
777	if (PyString_AsStringAndSize(op, &s, &len))
778	return NULL;
779	return s;
780	}
781
782	Py_ssize_t
783	PyString_Size(register PyObject *op)
784	{
785	if (!PyString_Check(op))
786	return string_getsize(op);
787	return Py_SIZE(op);
788	}
789
790	/const/ char *
791	PyString_AsString(register PyObject *op)
792	{
793	if (!PyString_Check(op))
794	return string_getbuffer(op);
795	return ((PyStringObject *)op) -> ob_sval;
796	}
797
798	int
799	PyString_AsStringAndSize(register PyObject *obj,
800	register char **s,
801	register Py_ssize_t *len)
802	{
803	if (s == NULL) {
804	PyErr_BadInternalCall();
805	return -1;
806	}
807
808	if (!PyString_Check(obj)) {
809	#ifdef Py_USING_UNICODE
810	if (PyUnicode_Check(obj)) {
811	obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
812	if (obj == NULL)
813	return -1;
814	}
815	else
816	#endif
817	{
818	PyErr_Format(PyExc_TypeError,
819	"expected string or Unicode object, "
820	"%.200s found", Py_TYPE(obj)->tp_name);
821	return -1;
822	}
823	}
824
825	*s = PyString_AS_STRING(obj);
826	if (len != NULL)
827	*len = PyString_GET_SIZE(obj);
828	else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
829	PyErr_SetString(PyExc_TypeError,
830	"expected string without null bytes");
831	return -1;
832	}
833	return 0;
834	}
835
836	/* -------------------------------------------------------------------- */
837	/* Methods */
838
839	#include "stringlib/stringdefs.h"
840	#include "stringlib/fastsearch.h"
841
842	#include "stringlib/count.h"
843	#include "stringlib/find.h"
844	#include "stringlib/partition.h"
845	#include "stringlib/split.h"
846
847	#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
848	#include "stringlib/localeutil.h"
849
850
851
852	static int
853	string_print(PyStringObject op, FILE fp, int flags)
854	{
855	Py_ssize_t i, str_len;
856	char c;
857	int quote;
858
859	/* XXX Ought to check for interrupts when writing long strings */
860	if (! PyString_CheckExact(op)) {
861	int ret;
862	/* A str subclass may have its own __str__ method. */
863	op = (PyStringObject ) PyObject_Str((PyObject )op);
864	if (op == NULL)
865	return -1;
866	ret = string_print(op, fp, flags);
867	Py_DECREF(op);
868	return ret;
869	}
870	if (flags & Py_PRINT_RAW) {
871	char *data = op->ob_sval;
872	Py_ssize_t size = Py_SIZE(op);
873	Py_BEGIN_ALLOW_THREADS
874	while (size > INT_MAX) {
875	/* Very long strings cannot be written atomically.
876	* But don't write exactly INT_MAX bytes at a time
877	* to avoid memory aligment issues.
878	*/
879	const int chunk_size = INT_MAX & ~0x3FFF;
880	fwrite(data, 1, chunk_size, fp);
881	data += chunk_size;
882	size -= chunk_size;
883	}
884	#ifdef __VMS
885	if (size) fwrite(data, (size_t)size, 1, fp);
886	#else
887	fwrite(data, 1, (size_t)size, fp);
888	#endif
889	Py_END_ALLOW_THREADS
890	return 0;
891	}
892
893	/* figure out which quote to use; single is preferred */
894	quote = '\'';
895	if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
896	!memchr(op->ob_sval, '"', Py_SIZE(op)))
897	quote = '"';
898
899	str_len = Py_SIZE(op);
900	Py_BEGIN_ALLOW_THREADS
901	fputc(quote, fp);
902	for (i = 0; i < str_len; i++) {
903	/* Since strings are immutable and the caller should have a
904	reference, accessing the interal buffer should not be an issue
905	with the GIL released. */
906	c = op->ob_sval[i];
907	if (c == quote \|\| c == '\\')
908	fprintf(fp, "\\%c", c);
909	else if (c == '\t')
910	fprintf(fp, "\\t");
911	else if (c == '\n')
912	fprintf(fp, "\\n");
913	else if (c == '\r')
914	fprintf(fp, "\\r");
915	else if (c < ' ' \|\| c >= 0x7f)
916	fprintf(fp, "\\x%02x", c & 0xff);
917	else
918	fputc(c, fp);
919	}
920	fputc(quote, fp);
921	Py_END_ALLOW_THREADS
922	return 0;
923	}
924
925	PyObject *
926	PyString_Repr(PyObject *obj, int smartquotes)
927	{
928	register PyStringObject* op = (PyStringObject*) obj;
929	size_t newsize = 2 + 4 * Py_SIZE(op);
930	PyObject *v;
931	if (newsize > PY_SSIZE_T_MAX \|\| newsize / 4 != Py_SIZE(op)) {
932	PyErr_SetString(PyExc_OverflowError,
933	"string is too large to make repr");
934	return NULL;
935	}
936	v = PyString_FromStringAndSize((char *)NULL, newsize);
937	if (v == NULL) {
938	return NULL;
939	}
940	else {
941	register Py_ssize_t i;
942	register char c;
943	register char *p;
944	int quote;
945
946	/* figure out which quote to use; single is preferred */
947	quote = '\'';
948	if (smartquotes &&
949	memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
950	!memchr(op->ob_sval, '"', Py_SIZE(op)))
951	quote = '"';
952
953	p = PyString_AS_STRING(v);
954	*p++ = quote;
955	for (i = 0; i < Py_SIZE(op); i++) {
956	/* There's at least enough room for a hex escape
957	and a closing quote. */
958	assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
959	c = op->ob_sval[i];
960	if (c == quote \|\| c == '\\')
961	p++ = '\\', p++ = c;
962	else if (c == '\t')
963	p++ = '\\', p++ = 't';
964	else if (c == '\n')
965	p++ = '\\', p++ = 'n';
966	else if (c == '\r')
967	p++ = '\\', p++ = 'r';
968	else if (c < ' ' \|\| c >= 0x7f) {
969	/* For performance, we don't want to call
970	PyOS_snprintf here (extra layers of
971	function call). */
972	sprintf(p, "\\x%02x", c & 0xff);
973	p += 4;
974	}
975	else
976	*p++ = c;
977	}
978	assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
979	*p++ = quote;
980	*p = '\0';
981	if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
982	return NULL;
983	return v;
984	}
985	}
986
987	static PyObject *
988	string_repr(PyObject *op)
989	{
990	return PyString_Repr(op, 1);
991	}
992
993	static PyObject *
994	string_str(PyObject *s)
995	{
996	assert(PyString_Check(s));
997	if (PyString_CheckExact(s)) {
998	Py_INCREF(s);
999	return s;
1000	}
1001	else {
1002	/* Subtype -- return genuine string with the same value. */
1003	PyStringObject t = (PyStringObject ) s;
1004	return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1005	}
1006	}
1007
1008	static Py_ssize_t
1009	string_length(PyStringObject *a)
1010	{
1011	return Py_SIZE(a);
1012	}
1013
1014	static PyObject *
1015	string_concat(register PyStringObject a, register PyObject bb)
1016	{
1017	register Py_ssize_t size;
1018	register PyStringObject *op;
1019	if (!PyString_Check(bb)) {
1020	#ifdef Py_USING_UNICODE
1021	if (PyUnicode_Check(bb))
1022	return PyUnicode_Concat((PyObject *)a, bb);
1023	#endif
1024	if (PyByteArray_Check(bb))
1025	return PyByteArray_Concat((PyObject *)a, bb);
1026	PyErr_Format(PyExc_TypeError,
1027	"cannot concatenate 'str' and '%.200s' objects",
1028	Py_TYPE(bb)->tp_name);
1029	return NULL;
1030	}
1031	#define b ((PyStringObject *)bb)
1032	/* Optimize cases with empty left or right operand */
1033	if ((Py_SIZE(a) == 0 \|\| Py_SIZE(b) == 0) &&
1034	PyString_CheckExact(a) && PyString_CheckExact(b)) {
1035	if (Py_SIZE(a) == 0) {
1036	Py_INCREF(bb);
1037	return bb;
1038	}
1039	Py_INCREF(a);
1040	return (PyObject *)a;
1041	}
1042	size = Py_SIZE(a) + Py_SIZE(b);
1043	/* Check that string sizes are not negative, to prevent an
1044	overflow in cases where we are passed incorrectly-created
1045	strings with negative lengths (due to a bug in other code).
1046	*/
1047	if (Py_SIZE(a) < 0 \|\| Py_SIZE(b) < 0 \|\|
1048	Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1049	PyErr_SetString(PyExc_OverflowError,
1050	"strings are too large to concat");
1051	return NULL;
1052	}
1053
1054	/* Inline PyObject_NewVar */
1055	if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1056	PyErr_SetString(PyExc_OverflowError,
1057	"strings are too large to concat");
1058	return NULL;
1059	}
1060	op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1061	if (op == NULL)
1062	return PyErr_NoMemory();
1063	PyObject_INIT_VAR(op, &PyString_Type, size);
1064	op->ob_shash = -1;
1065	op->ob_sstate = SSTATE_NOT_INTERNED;
1066	Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1067	Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1068	op->ob_sval[size] = '\0';
1069	return (PyObject *) op;
1070	#undef b
1071	}
1072
1073	static PyObject *
1074	string_repeat(register PyStringObject *a, register Py_ssize_t n)
1075	{
1076	register Py_ssize_t i;
1077	register Py_ssize_t j;
1078	register Py_ssize_t size;
1079	register PyStringObject *op;
1080	size_t nbytes;
1081	if (n < 0)
1082	n = 0;
1083	/* watch out for overflows: the size can overflow int,
1084	* and the # of bytes needed can overflow size_t
1085	*/
1086	size = Py_SIZE(a) * n;
1087	if (n && size / n != Py_SIZE(a)) {
1088	PyErr_SetString(PyExc_OverflowError,
1089	"repeated string is too long");
1090	return NULL;
1091	}
1092	if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1093	Py_INCREF(a);
1094	return (PyObject *)a;
1095	}
1096	nbytes = (size_t)size;
1097	if (nbytes + PyStringObject_SIZE <= nbytes) {
1098	PyErr_SetString(PyExc_OverflowError,
1099	"repeated string is too long");
1100	return NULL;
1101	}
1102	op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1103	if (op == NULL)
1104	return PyErr_NoMemory();
1105	PyObject_INIT_VAR(op, &PyString_Type, size);
1106	op->ob_shash = -1;
1107	op->ob_sstate = SSTATE_NOT_INTERNED;
1108	op->ob_sval[size] = '\0';
1109	if (Py_SIZE(a) == 1 && n > 0) {
1110	memset(op->ob_sval, a->ob_sval[0] , n);
1111	return (PyObject *) op;
1112	}
1113	i = 0;
1114	if (i < size) {
1115	Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1116	i = Py_SIZE(a);
1117	}
1118	while (i < size) {
1119	j = (i <= size-i) ? i : size-i;
1120	Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1121	i += j;
1122	}
1123	return (PyObject *) op;
1124	}
1125
1126	/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1127
1128	static PyObject *
1129	string_slice(register PyStringObject *a, register Py_ssize_t i,
1130	register Py_ssize_t j)
1131	/* j -- may be negative! */
1132	{
1133	if (i < 0)
1134	i = 0;
1135	if (j < 0)
1136	j = 0; /* Avoid signed/unsigned bug in next line */
1137	if (j > Py_SIZE(a))
1138	j = Py_SIZE(a);
1139	if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1140	/* It's the same as a */
1141	Py_INCREF(a);
1142	return (PyObject *)a;
1143	}
1144	if (j < i)
1145	j = i;
1146	return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1147	}
1148
1149	static int
1150	string_contains(PyObject str_obj, PyObject sub_obj)
1151	{
1152	if (!PyString_CheckExact(sub_obj)) {
1153	#ifdef Py_USING_UNICODE
1154	if (PyUnicode_Check(sub_obj))
1155	return PyUnicode_Contains(str_obj, sub_obj);
1156	#endif
1157	if (!PyString_Check(sub_obj)) {
1158	PyErr_Format(PyExc_TypeError,
1159	"'in <string>' requires string as left operand, "
1160	"not %.200s", Py_TYPE(sub_obj)->tp_name);
1161	return -1;
1162	}
1163	}
1164
1165	return stringlib_contains_obj(str_obj, sub_obj);
1166	}
1167
1168	static PyObject *
1169	string_item(PyStringObject *a, register Py_ssize_t i)
1170	{
1171	char pchar;
1172	PyObject *v;
1173	if (i < 0 \|\| i >= Py_SIZE(a)) {
1174	PyErr_SetString(PyExc_IndexError, "string index out of range");
1175	return NULL;
1176	}
1177	pchar = a->ob_sval[i];
1178	v = (PyObject *)characters[pchar & UCHAR_MAX];
1179	if (v == NULL)
1180	v = PyString_FromStringAndSize(&pchar, 1);
1181	else {
1182	#ifdef COUNT_ALLOCS
1183	one_strings++;
1184	#endif
1185	Py_INCREF(v);
1186	}
1187	return v;
1188	}
1189
1190	static PyObject*
1191	string_richcompare(PyStringObject a, PyStringObject b, int op)
1192	{
1193	int c;
1194	Py_ssize_t len_a, len_b;
1195	Py_ssize_t min_len;
1196	PyObject *result;
1197
1198	/* Make sure both arguments are strings. */
1199	if (!(PyString_Check(a) && PyString_Check(b))) {
1200	result = Py_NotImplemented;
1201	goto out;
1202	}
1203	if (a == b) {
1204	switch (op) {
1205	case Py_EQ:case Py_LE:case Py_GE:
1206	result = Py_True;
1207	goto out;
1208	case Py_NE:case Py_LT:case Py_GT:
1209	result = Py_False;
1210	goto out;
1211	}
1212	}
1213	if (op == Py_EQ) {
1214	/* Supporting Py_NE here as well does not save
1215	much time, since Py_NE is rarely used. */
1216	if (Py_SIZE(a) == Py_SIZE(b)
1217	&& (a->ob_sval[0] == b->ob_sval[0]
1218	&& memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1219	result = Py_True;
1220	} else {
1221	result = Py_False;
1222	}
1223	goto out;
1224	}
1225	len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1226	min_len = (len_a < len_b) ? len_a : len_b;
1227	if (min_len > 0) {
1228	c = Py_CHARMASK(a->ob_sval) - Py_CHARMASK(b->ob_sval);
1229	if (c==0)
1230	c = memcmp(a->ob_sval, b->ob_sval, min_len);
1231	} else
1232	c = 0;
1233	if (c == 0)
1234	c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1235	switch (op) {
1236	case Py_LT: c = c < 0; break;
1237	case Py_LE: c = c <= 0; break;
1238	case Py_EQ: assert(0); break; /* unreachable */
1239	case Py_NE: c = c != 0; break;
1240	case Py_GT: c = c > 0; break;
1241	case Py_GE: c = c >= 0; break;
1242	default:
1243	result = Py_NotImplemented;
1244	goto out;
1245	}
1246	result = c ? Py_True : Py_False;
1247	out:
1248	Py_INCREF(result);
1249	return result;
1250	}
1251
1252	int
1253	_PyString_Eq(PyObject o1, PyObject o2)
1254	{
1255	PyStringObject a = (PyStringObject) o1;
1256	PyStringObject b = (PyStringObject) o2;
1257	return Py_SIZE(a) == Py_SIZE(b)
1258	&& memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1259	}
1260
1261	static long
1262	string_hash(PyStringObject *a)
1263	{
1264	register Py_ssize_t len;
1265	register unsigned char *p;
1266	register long x;
1267
1268	#ifdef Py_DEBUG
1269	assert(_Py_HashSecret_Initialized);
1270	#endif
1271	if (a->ob_shash != -1)
1272	return a->ob_shash;
1273	len = Py_SIZE(a);
1274	/*
1275	We make the hash of the empty string be 0, rather than using
1276	(prefix ^ suffix), since this slightly obfuscates the hash secret
1277	*/
1278	if (len == 0) {
1279	a->ob_shash = 0;
1280	return 0;
1281	}
1282	p = (unsigned char *) a->ob_sval;
1283	x = _Py_HashSecret.prefix;
1284	x ^= *p << 7;
1285	while (--len >= 0)
1286	x = (1000003x) ^ p++;
1287	x ^= Py_SIZE(a);
1288	x ^= _Py_HashSecret.suffix;
1289	if (x == -1)
1290	x = -2;
1291	a->ob_shash = x;
1292	return x;
1293	}
1294
1295	static PyObject*
1296	string_subscript(PyStringObject* self, PyObject* item)
1297	{
1298	if (PyIndex_Check(item)) {
1299	Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1300	if (i == -1 && PyErr_Occurred())
1301	return NULL;
1302	if (i < 0)
1303	i += PyString_GET_SIZE(self);
1304	return string_item(self, i);
1305	}
1306	else if (PySlice_Check(item)) {
1307	Py_ssize_t start, stop, step, slicelength, cur, i;
1308	char* source_buf;
1309	char* result_buf;
1310	PyObject* result;
1311
1312	if (PySlice_GetIndicesEx((PySliceObject*)item,
1313	PyString_GET_SIZE(self),
1314	&start, &stop, &step, &slicelength) < 0) {
1315	return NULL;
1316	}
1317
1318	if (slicelength <= 0) {
1319	return PyString_FromStringAndSize("", 0);
1320	}
1321	else if (start == 0 && step == 1 &&
1322	slicelength == PyString_GET_SIZE(self) &&
1323	PyString_CheckExact(self)) {
1324	Py_INCREF(self);
1325	return (PyObject *)self;
1326	}
1327	else if (step == 1) {
1328	return PyString_FromStringAndSize(
1329	PyString_AS_STRING(self) + start,
1330	slicelength);
1331	}
1332	else {
1333	source_buf = PyString_AsString((PyObject*)self);
1334	result_buf = (char *)PyMem_Malloc(slicelength);
1335	if (result_buf == NULL)
1336	return PyErr_NoMemory();
1337
1338	for (cur = start, i = 0; i < slicelength;
1339	cur += step, i++) {
1340	result_buf[i] = source_buf[cur];
1341	}
1342
1343	result = PyString_FromStringAndSize(result_buf,
1344	slicelength);
1345	PyMem_Free(result_buf);
1346	return result;
1347	}
1348	}
1349	else {
1350	PyErr_Format(PyExc_TypeError,
1351	"string indices must be integers, not %.200s",
1352	Py_TYPE(item)->tp_name);
1353	return NULL;
1354	}
1355	}
1356
1357	static Py_ssize_t
1358	string_buffer_getreadbuf(PyStringObject self, Py_ssize_t index, const void *ptr)
1359	{
1360	if ( index != 0 ) {
1361	PyErr_SetString(PyExc_SystemError,
1362	"accessing non-existent string segment");
1363	return -1;
1364	}
1365	ptr = (void )self->ob_sval;
1366	return Py_SIZE(self);
1367	}
1368
1369	static Py_ssize_t
1370	string_buffer_getwritebuf(PyStringObject self, Py_ssize_t index, const void *ptr)
1371	{
1372	PyErr_SetString(PyExc_TypeError,
1373	"Cannot use string as modifiable buffer");
1374	return -1;
1375	}
1376
1377	static Py_ssize_t
1378	string_buffer_getsegcount(PyStringObject self, Py_ssize_t lenp)
1379	{
1380	if ( lenp )
1381	*lenp = Py_SIZE(self);
1382	return 1;
1383	}
1384
1385	static Py_ssize_t
1386	string_buffer_getcharbuf(PyStringObject self, Py_ssize_t index, const char *ptr)
1387	{
1388	if ( index != 0 ) {
1389	PyErr_SetString(PyExc_SystemError,
1390	"accessing non-existent string segment");
1391	return -1;
1392	}
1393	*ptr = self->ob_sval;
1394	return Py_SIZE(self);
1395	}
1396
1397	static int
1398	string_buffer_getbuffer(PyStringObject self, Py_buffer view, int flags)
1399	{
1400	return PyBuffer_FillInfo(view, (PyObject*)self,
1401	(void *)self->ob_sval, Py_SIZE(self),
1402	1, flags);
1403	}
1404
1405	static PySequenceMethods string_as_sequence = {
1406	(lenfunc)string_length, /sq_length/
1407	(binaryfunc)string_concat, /sq_concat/
1408	(ssizeargfunc)string_repeat, /sq_repeat/
1409	(ssizeargfunc)string_item, /sq_item/
1410	(ssizessizeargfunc)string_slice, /sq_slice/
1411	0, /sq_ass_item/
1412	0, /sq_ass_slice/
1413	(objobjproc)string_contains /sq_contains/
1414	};
1415
1416	static PyMappingMethods string_as_mapping = {
1417	(lenfunc)string_length,
1418	(binaryfunc)string_subscript,
1419	0,
1420	};
1421
1422	static PyBufferProcs string_as_buffer = {
1423	(readbufferproc)string_buffer_getreadbuf,
1424	(writebufferproc)string_buffer_getwritebuf,
1425	(segcountproc)string_buffer_getsegcount,
1426	(charbufferproc)string_buffer_getcharbuf,
1427	(getbufferproc)string_buffer_getbuffer,
1428	0, /* XXX */
1429	};
1430
1431
1432
1433	#define LEFTSTRIP 0
1434	#define RIGHTSTRIP 1
1435	#define BOTHSTRIP 2
1436
1437	/* Arrays indexed by above */
1438	static const char *stripformat[] = {"\|O:lstrip", "\|O:rstrip", "\|O:strip"};
1439
1440	#define STRIPNAME(i) (stripformat[i]+3)
1441
1442	PyDoc_STRVAR(split__doc__,
1443	"S.split([sep [,maxsplit]]) -> list of strings\n\
1444	\n\
1445	Return a list of the words in the string S, using sep as the\n\
1446	delimiter string. If maxsplit is given, at most maxsplit\n\
1447	splits are done. If sep is not specified or is None, any\n\
1448	whitespace string is a separator and empty strings are removed\n\
1449	from the result.");
1450
1451	static PyObject *
1452	string_split(PyStringObject self, PyObject args)
1453	{
1454	Py_ssize_t len = PyString_GET_SIZE(self), n;
1455	Py_ssize_t maxsplit = -1;
1456	const char s = PyString_AS_STRING(self), sub;
1457	PyObject *subobj = Py_None;
1458
1459	if (!PyArg_ParseTuple(args, "\|On:split", &subobj, &maxsplit))
1460	return NULL;
1461	if (maxsplit < 0)
1462	maxsplit = PY_SSIZE_T_MAX;
1463	if (subobj == Py_None)
1464	return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1465	if (PyString_Check(subobj)) {
1466	sub = PyString_AS_STRING(subobj);
1467	n = PyString_GET_SIZE(subobj);
1468	}
1469	#ifdef Py_USING_UNICODE
1470	else if (PyUnicode_Check(subobj))
1471	return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1472	#endif
1473	else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1474	return NULL;
1475
1476	return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1477	}
1478
1479	PyDoc_STRVAR(partition__doc__,
1480	"S.partition(sep) -> (head, sep, tail)\n\
1481	\n\
1482	Search for the separator sep in S, and return the part before it,\n\
1483	the separator itself, and the part after it. If the separator is not\n\
1484	found, return S and two empty strings.");
1485
1486	static PyObject *
1487	string_partition(PyStringObject self, PyObject sep_obj)
1488	{
1489	const char *sep;
1490	Py_ssize_t sep_len;
1491
1492	if (PyString_Check(sep_obj)) {
1493	sep = PyString_AS_STRING(sep_obj);
1494	sep_len = PyString_GET_SIZE(sep_obj);
1495	}
1496	#ifdef Py_USING_UNICODE
1497	else if (PyUnicode_Check(sep_obj))
1498	return PyUnicode_Partition((PyObject *) self, sep_obj);
1499	#endif
1500	else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1501	return NULL;
1502
1503	return stringlib_partition(
1504	(PyObject*) self,
1505	PyString_AS_STRING(self), PyString_GET_SIZE(self),
1506	sep_obj, sep, sep_len
1507	);
1508	}
1509
1510	PyDoc_STRVAR(rpartition__doc__,
1511	"S.rpartition(sep) -> (head, sep, tail)\n\
1512	\n\
1513	Search for the separator sep in S, starting at the end of S, and return\n\
1514	the part before it, the separator itself, and the part after it. If the\n\
1515	separator is not found, return two empty strings and S.");
1516
1517	static PyObject *
1518	string_rpartition(PyStringObject self, PyObject sep_obj)
1519	{
1520	const char *sep;
1521	Py_ssize_t sep_len;
1522
1523	if (PyString_Check(sep_obj)) {
1524	sep = PyString_AS_STRING(sep_obj);
1525	sep_len = PyString_GET_SIZE(sep_obj);
1526	}
1527	#ifdef Py_USING_UNICODE
1528	else if (PyUnicode_Check(sep_obj))
1529	return PyUnicode_RPartition((PyObject *) self, sep_obj);
1530	#endif
1531	else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1532	return NULL;
1533
1534	return stringlib_rpartition(
1535	(PyObject*) self,
1536	PyString_AS_STRING(self), PyString_GET_SIZE(self),
1537	sep_obj, sep, sep_len
1538	);
1539	}
1540
1541	PyDoc_STRVAR(rsplit__doc__,
1542	"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1543	\n\
1544	Return a list of the words in the string S, using sep as the\n\
1545	delimiter string, starting at the end of the string and working\n\
1546	to the front. If maxsplit is given, at most maxsplit splits are\n\
1547	done. If sep is not specified or is None, any whitespace string\n\
1548	is a separator.");
1549
1550	static PyObject *
1551	string_rsplit(PyStringObject self, PyObject args)
1552	{
1553	Py_ssize_t len = PyString_GET_SIZE(self), n;
1554	Py_ssize_t maxsplit = -1;
1555	const char s = PyString_AS_STRING(self), sub;
1556	PyObject *subobj = Py_None;
1557
1558	if (!PyArg_ParseTuple(args, "\|On:rsplit", &subobj, &maxsplit))
1559	return NULL;
1560	if (maxsplit < 0)
1561	maxsplit = PY_SSIZE_T_MAX;
1562	if (subobj == Py_None)
1563	return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1564	if (PyString_Check(subobj)) {
1565	sub = PyString_AS_STRING(subobj);
1566	n = PyString_GET_SIZE(subobj);
1567	}
1568	#ifdef Py_USING_UNICODE
1569	else if (PyUnicode_Check(subobj))
1570	return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1571	#endif
1572	else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1573	return NULL;
1574
1575	return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1576	}
1577
1578
1579	PyDoc_STRVAR(join__doc__,
1580	"S.join(iterable) -> string\n\
1581	\n\
1582	Return a string which is the concatenation of the strings in the\n\
1583	iterable. The separator between elements is S.");
1584
1585	static PyObject *
1586	string_join(PyStringObject self, PyObject orig)
1587	{
1588	char *sep = PyString_AS_STRING(self);
1589	const Py_ssize_t seplen = PyString_GET_SIZE(self);
1590	PyObject *res = NULL;
1591	char *p;
1592	Py_ssize_t seqlen = 0;
1593	size_t sz = 0;
1594	Py_ssize_t i;
1595	PyObject seq, item;
1596
1597	seq = PySequence_Fast(orig, "");
1598	if (seq == NULL) {
1599	return NULL;
1600	}
1601
1602	seqlen = PySequence_Size(seq);
1603	if (seqlen == 0) {
1604	Py_DECREF(seq);
1605	return PyString_FromString("");
1606	}
1607	if (seqlen == 1) {
1608	item = PySequence_Fast_GET_ITEM(seq, 0);
1609	if (PyString_CheckExact(item) \|\| PyUnicode_CheckExact(item)) {
1610	Py_INCREF(item);
1611	Py_DECREF(seq);
1612	return item;
1613	}
1614	}
1615
1616	/* There are at least two things to join, or else we have a subclass
1617	* of the builtin types in the sequence.
1618	* Do a pre-pass to figure out the total amount of space we'll
1619	* need (sz), see whether any argument is absurd, and defer to
1620	* the Unicode join if appropriate.
1621	*/
1622	for (i = 0; i < seqlen; i++) {
1623	const size_t old_sz = sz;
1624	item = PySequence_Fast_GET_ITEM(seq, i);
1625	if (!PyString_Check(item)){
1626	#ifdef Py_USING_UNICODE
1627	if (PyUnicode_Check(item)) {
1628	/* Defer to Unicode join.
1629	* CAUTION: There's no gurantee that the
1630	* original sequence can be iterated over
1631	* again, so we must pass seq here.
1632	*/
1633	PyObject *result;
1634	result = PyUnicode_Join((PyObject *)self, seq);
1635	Py_DECREF(seq);
1636	return result;
1637	}
1638	#endif
1639	PyErr_Format(PyExc_TypeError,
1640	"sequence item %zd: expected string,"
1641	" %.80s found",
1642	i, Py_TYPE(item)->tp_name);
1643	Py_DECREF(seq);
1644	return NULL;
1645	}
1646	sz += PyString_GET_SIZE(item);
1647	if (i != 0)
1648	sz += seplen;
1649	if (sz < old_sz \|\| sz > PY_SSIZE_T_MAX) {
1650	PyErr_SetString(PyExc_OverflowError,
1651	"join() result is too long for a Python string");
1652	Py_DECREF(seq);
1653	return NULL;
1654	}
1655	}
1656
1657	/* Allocate result space. */
1658	res = PyString_FromStringAndSize((char*)NULL, sz);
1659	if (res == NULL) {
1660	Py_DECREF(seq);
1661	return NULL;
1662	}
1663
1664	/* Catenate everything. */
1665	p = PyString_AS_STRING(res);
1666	for (i = 0; i < seqlen; ++i) {
1667	size_t n;
1668	item = PySequence_Fast_GET_ITEM(seq, i);
1669	n = PyString_GET_SIZE(item);
1670	Py_MEMCPY(p, PyString_AS_STRING(item), n);
1671	p += n;
1672	if (i < seqlen - 1) {
1673	Py_MEMCPY(p, sep, seplen);
1674	p += seplen;
1675	}
1676	}
1677
1678	Py_DECREF(seq);
1679	return res;
1680	}
1681
1682	PyObject *
1683	_PyString_Join(PyObject sep, PyObject x)
1684	{
1685	assert(sep != NULL && PyString_Check(sep));
1686	assert(x != NULL);
1687	return string_join((PyStringObject *)sep, x);
1688	}
1689
1690	/* helper macro to fixup start/end slice values */
1691	#define ADJUST_INDICES(start, end, len) \
1692	if (end > len) \
1693	end = len; \
1694	else if (end < 0) { \
1695	end += len; \
1696	if (end < 0) \
1697	end = 0; \
1698	} \
1699	if (start < 0) { \
1700	start += len; \
1701	if (start < 0) \
1702	start = 0; \
1703	}
1704
1705	Py_LOCAL_INLINE(Py_ssize_t)
1706	string_find_internal(PyStringObject self, PyObject args, int dir)
1707	{
1708	PyObject *subobj;
1709	const char *sub;
1710	Py_ssize_t sub_len;
1711	Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1712
1713	if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1714	args, &subobj, &start, &end))
1715	return -2;
1716
1717	if (PyString_Check(subobj)) {
1718	sub = PyString_AS_STRING(subobj);
1719	sub_len = PyString_GET_SIZE(subobj);
1720	}
1721	#ifdef Py_USING_UNICODE
1722	else if (PyUnicode_Check(subobj))
1723	return PyUnicode_Find(
1724	(PyObject *)self, subobj, start, end, dir);
1725	#endif
1726	else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1727	/* XXX - the "expected a character buffer object" is pretty
1728	confusing for a non-expert. remap to something else ? */
1729	return -2;
1730
1731	if (dir > 0)
1732	return stringlib_find_slice(
1733	PyString_AS_STRING(self), PyString_GET_SIZE(self),
1734	sub, sub_len, start, end);
1735	else
1736	return stringlib_rfind_slice(
1737	PyString_AS_STRING(self), PyString_GET_SIZE(self),
1738	sub, sub_len, start, end);
1739	}
1740
1741
1742	PyDoc_STRVAR(find__doc__,
1743	"S.find(sub [,start [,end]]) -> int\n\
1744	\n\
1745	Return the lowest index in S where substring sub is found,\n\
1746	such that sub is contained within S[start:end]. Optional\n\
1747	arguments start and end are interpreted as in slice notation.\n\
1748	\n\
1749	Return -1 on failure.");
1750
1751	static PyObject *
1752	string_find(PyStringObject self, PyObject args)
1753	{
1754	Py_ssize_t result = string_find_internal(self, args, +1);
1755	if (result == -2)
1756	return NULL;
1757	return PyInt_FromSsize_t(result);
1758	}
1759
1760
1761	PyDoc_STRVAR(index__doc__,
1762	"S.index(sub [,start [,end]]) -> int\n\
1763	\n\
1764	Like S.find() but raise ValueError when the substring is not found.");
1765
1766	static PyObject *
1767	string_index(PyStringObject self, PyObject args)
1768	{
1769	Py_ssize_t result = string_find_internal(self, args, +1);
1770	if (result == -2)
1771	return NULL;
1772	if (result == -1) {
1773	PyErr_SetString(PyExc_ValueError,
1774	"substring not found");
1775	return NULL;
1776	}
1777	return PyInt_FromSsize_t(result);
1778	}
1779
1780
1781	PyDoc_STRVAR(rfind__doc__,
1782	"S.rfind(sub [,start [,end]]) -> int\n\
1783	\n\
1784	Return the highest index in S where substring sub is found,\n\
1785	such that sub is contained within S[start:end]. Optional\n\
1786	arguments start and end are interpreted as in slice notation.\n\
1787	\n\
1788	Return -1 on failure.");
1789
1790	static PyObject *
1791	string_rfind(PyStringObject self, PyObject args)
1792	{
1793	Py_ssize_t result = string_find_internal(self, args, -1);
1794	if (result == -2)
1795	return NULL;
1796	return PyInt_FromSsize_t(result);
1797	}
1798
1799
1800	PyDoc_STRVAR(rindex__doc__,
1801	"S.rindex(sub [,start [,end]]) -> int\n\
1802	\n\
1803	Like S.rfind() but raise ValueError when the substring is not found.");
1804
1805	static PyObject *
1806	string_rindex(PyStringObject self, PyObject args)
1807	{
1808	Py_ssize_t result = string_find_internal(self, args, -1);
1809	if (result == -2)
1810	return NULL;
1811	if (result == -1) {
1812	PyErr_SetString(PyExc_ValueError,
1813	"substring not found");
1814	return NULL;
1815	}
1816	return PyInt_FromSsize_t(result);
1817	}
1818
1819
1820	Py_LOCAL_INLINE(PyObject *)
1821	do_xstrip(PyStringObject self, int striptype, PyObject sepobj)
1822	{
1823	char *s = PyString_AS_STRING(self);
1824	Py_ssize_t len = PyString_GET_SIZE(self);
1825	char *sep = PyString_AS_STRING(sepobj);
1826	Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1827	Py_ssize_t i, j;
1828
1829	i = 0;
1830	if (striptype != RIGHTSTRIP) {
1831	while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1832	i++;
1833	}
1834	}
1835
1836	j = len;
1837	if (striptype != LEFTSTRIP) {
1838	do {
1839	j--;
1840	} while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1841	j++;
1842	}
1843
1844	if (i == 0 && j == len && PyString_CheckExact(self)) {
1845	Py_INCREF(self);
1846	return (PyObject*)self;
1847	}
1848	else
1849	return PyString_FromStringAndSize(s+i, j-i);
1850	}
1851
1852
1853	Py_LOCAL_INLINE(PyObject *)
1854	do_strip(PyStringObject *self, int striptype)
1855	{
1856	char *s = PyString_AS_STRING(self);
1857	Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1858
1859	i = 0;
1860	if (striptype != RIGHTSTRIP) {
1861	while (i < len && isspace(Py_CHARMASK(s[i]))) {
1862	i++;
1863	}
1864	}
1865
1866	j = len;
1867	if (striptype != LEFTSTRIP) {
1868	do {
1869	j--;
1870	} while (j >= i && isspace(Py_CHARMASK(s[j])));
1871	j++;
1872	}
1873
1874	if (i == 0 && j == len && PyString_CheckExact(self)) {
1875	Py_INCREF(self);
1876	return (PyObject*)self;
1877	}
1878	else
1879	return PyString_FromStringAndSize(s+i, j-i);
1880	}
1881
1882
1883	Py_LOCAL_INLINE(PyObject *)
1884	do_argstrip(PyStringObject self, int striptype, PyObject args)
1885	{
1886	PyObject *sep = NULL;
1887
1888	if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1889	return NULL;
1890
1891	if (sep != NULL && sep != Py_None) {
1892	if (PyString_Check(sep))
1893	return do_xstrip(self, striptype, sep);
1894	#ifdef Py_USING_UNICODE
1895	else if (PyUnicode_Check(sep)) {
1896	PyObject uniself = PyUnicode_FromObject((PyObject )self);
1897	PyObject *res;
1898	if (uniself==NULL)
1899	return NULL;
1900	res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1901	striptype, sep);
1902	Py_DECREF(uniself);
1903	return res;
1904	}
1905	#endif
1906	PyErr_Format(PyExc_TypeError,
1907	#ifdef Py_USING_UNICODE
1908	"%s arg must be None, str or unicode",
1909	#else
1910	"%s arg must be None or str",
1911	#endif
1912	STRIPNAME(striptype));
1913	return NULL;
1914	}
1915
1916	return do_strip(self, striptype);
1917	}
1918
1919
1920	PyDoc_STRVAR(strip__doc__,
1921	"S.strip([chars]) -> string or unicode\n\
1922	\n\
1923	Return a copy of the string S with leading and trailing\n\
1924	whitespace removed.\n\
1925	If chars is given and not None, remove characters in chars instead.\n\
1926	If chars is unicode, S will be converted to unicode before stripping");
1927
1928	static PyObject *
1929	string_strip(PyStringObject self, PyObject args)
1930	{
1931	if (PyTuple_GET_SIZE(args) == 0)
1932	return do_strip(self, BOTHSTRIP); /* Common case */
1933	else
1934	return do_argstrip(self, BOTHSTRIP, args);
1935	}
1936
1937
1938	PyDoc_STRVAR(lstrip__doc__,
1939	"S.lstrip([chars]) -> string or unicode\n\
1940	\n\
1941	Return a copy of the string S with leading whitespace removed.\n\
1942	If chars is given and not None, remove characters in chars instead.\n\
1943	If chars is unicode, S will be converted to unicode before stripping");
1944
1945	static PyObject *
1946	string_lstrip(PyStringObject self, PyObject args)
1947	{
1948	if (PyTuple_GET_SIZE(args) == 0)
1949	return do_strip(self, LEFTSTRIP); /* Common case */
1950	else
1951	return do_argstrip(self, LEFTSTRIP, args);
1952	}
1953
1954
1955	PyDoc_STRVAR(rstrip__doc__,
1956	"S.rstrip([chars]) -> string or unicode\n\
1957	\n\
1958	Return a copy of the string S with trailing whitespace removed.\n\
1959	If chars is given and not None, remove characters in chars instead.\n\
1960	If chars is unicode, S will be converted to unicode before stripping");
1961
1962	static PyObject *
1963	string_rstrip(PyStringObject self, PyObject args)
1964	{
1965	if (PyTuple_GET_SIZE(args) == 0)
1966	return do_strip(self, RIGHTSTRIP); /* Common case */
1967	else
1968	return do_argstrip(self, RIGHTSTRIP, args);
1969	}
1970
1971
1972	PyDoc_STRVAR(lower__doc__,
1973	"S.lower() -> string\n\
1974	\n\
1975	Return a copy of the string S converted to lowercase.");
1976
1977	/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1978	#ifndef _tolower
1979	#define _tolower tolower
1980	#endif
1981
1982	static PyObject *
1983	string_lower(PyStringObject *self)
1984	{
1985	char *s;
1986	Py_ssize_t i, n = PyString_GET_SIZE(self);
1987	PyObject *newobj;
1988
1989	newobj = PyString_FromStringAndSize(NULL, n);
1990	if (!newobj)
1991	return NULL;
1992
1993	s = PyString_AS_STRING(newobj);
1994
1995	Py_MEMCPY(s, PyString_AS_STRING(self), n);
1996
1997	for (i = 0; i < n; i++) {
1998	int c = Py_CHARMASK(s[i]);
1999	if (isupper(c))
2000	s[i] = _tolower(c);
2001	}
2002
2003	return newobj;
2004	}
2005
2006	PyDoc_STRVAR(upper__doc__,
2007	"S.upper() -> string\n\
2008	\n\
2009	Return a copy of the string S converted to uppercase.");
2010
2011	#ifndef _toupper
2012	#define _toupper toupper
2013	#endif
2014
2015	static PyObject *
2016	string_upper(PyStringObject *self)
2017	{
2018	char *s;
2019	Py_ssize_t i, n = PyString_GET_SIZE(self);
2020	PyObject *newobj;
2021
2022	newobj = PyString_FromStringAndSize(NULL, n);
2023	if (!newobj)
2024	return NULL;
2025
2026	s = PyString_AS_STRING(newobj);
2027
2028	Py_MEMCPY(s, PyString_AS_STRING(self), n);
2029
2030	for (i = 0; i < n; i++) {
2031	int c = Py_CHARMASK(s[i]);
2032	if (islower(c))
2033	s[i] = _toupper(c);
2034	}
2035
2036	return newobj;
2037	}
2038
2039	PyDoc_STRVAR(title__doc__,
2040	"S.title() -> string\n\
2041	\n\
2042	Return a titlecased version of S, i.e. words start with uppercase\n\
2043	characters, all remaining cased characters have lowercase.");
2044
2045	static PyObject*
2046	string_title(PyStringObject *self)
2047	{
2048	char s = PyString_AS_STRING(self), s_new;
2049	Py_ssize_t i, n = PyString_GET_SIZE(self);
2050	int previous_is_cased = 0;
2051	PyObject *newobj;
2052
2053	newobj = PyString_FromStringAndSize(NULL, n);
2054	if (newobj == NULL)
2055	return NULL;
2056	s_new = PyString_AsString(newobj);
2057	for (i = 0; i < n; i++) {
2058	int c = Py_CHARMASK(*s++);
2059	if (islower(c)) {
2060	if (!previous_is_cased)
2061	c = toupper(c);
2062	previous_is_cased = 1;
2063	} else if (isupper(c)) {
2064	if (previous_is_cased)
2065	c = tolower(c);
2066	previous_is_cased = 1;
2067	} else
2068	previous_is_cased = 0;
2069	*s_new++ = c;
2070	}
2071	return newobj;
2072	}
2073
2074	PyDoc_STRVAR(capitalize__doc__,
2075	"S.capitalize() -> string\n\
2076	\n\
2077	Return a copy of the string S with only its first character\n\
2078	capitalized.");
2079
2080	static PyObject *
2081	string_capitalize(PyStringObject *self)
2082	{
2083	char s = PyString_AS_STRING(self), s_new;
2084	Py_ssize_t i, n = PyString_GET_SIZE(self);
2085	PyObject *newobj;
2086
2087	newobj = PyString_FromStringAndSize(NULL, n);
2088	if (newobj == NULL)
2089	return NULL;
2090	s_new = PyString_AsString(newobj);
2091	if (0 < n) {
2092	int c = Py_CHARMASK(*s++);
2093	if (islower(c))
2094	*s_new = toupper(c);
2095	else
2096	*s_new = c;
2097	s_new++;
2098	}
2099	for (i = 1; i < n; i++) {
2100	int c = Py_CHARMASK(*s++);
2101	if (isupper(c))
2102	*s_new = tolower(c);
2103	else
2104	*s_new = c;
2105	s_new++;
2106	}
2107	return newobj;
2108	}
2109
2110
2111	PyDoc_STRVAR(count__doc__,
2112	"S.count(sub[, start[, end]]) -> int\n\
2113	\n\
2114	Return the number of non-overlapping occurrences of substring sub in\n\
2115	string S[start:end]. Optional arguments start and end are interpreted\n\
2116	as in slice notation.");
2117
2118	static PyObject *
2119	string_count(PyStringObject self, PyObject args)
2120	{
2121	PyObject *sub_obj;
2122	const char str = PyString_AS_STRING(self), sub;
2123	Py_ssize_t sub_len;
2124	Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2125
2126	if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
2127	return NULL;
2128
2129	if (PyString_Check(sub_obj)) {
2130	sub = PyString_AS_STRING(sub_obj);
2131	sub_len = PyString_GET_SIZE(sub_obj);
2132	}
2133	#ifdef Py_USING_UNICODE
2134	else if (PyUnicode_Check(sub_obj)) {
2135	Py_ssize_t count;
2136	count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2137	if (count == -1)
2138	return NULL;
2139	else
2140	return PyInt_FromSsize_t(count);
2141	}
2142	#endif
2143	else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2144	return NULL;
2145
2146	ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
2147
2148	return PyInt_FromSsize_t(
2149	stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2150	);
2151	}
2152
2153	PyDoc_STRVAR(swapcase__doc__,
2154	"S.swapcase() -> string\n\
2155	\n\
2156	Return a copy of the string S with uppercase characters\n\
2157	converted to lowercase and vice versa.");
2158
2159	static PyObject *
2160	string_swapcase(PyStringObject *self)
2161	{
2162	char s = PyString_AS_STRING(self), s_new;
2163	Py_ssize_t i, n = PyString_GET_SIZE(self);
2164	PyObject *newobj;
2165
2166	newobj = PyString_FromStringAndSize(NULL, n);
2167	if (newobj == NULL)
2168	return NULL;
2169	s_new = PyString_AsString(newobj);
2170	for (i = 0; i < n; i++) {
2171	int c = Py_CHARMASK(*s++);
2172	if (islower(c)) {
2173	*s_new = toupper(c);
2174	}
2175	else if (isupper(c)) {
2176	*s_new = tolower(c);
2177	}
2178	else
2179	*s_new = c;
2180	s_new++;
2181	}
2182	return newobj;
2183	}
2184
2185
2186	PyDoc_STRVAR(translate__doc__,
2187	"S.translate(table [,deletechars]) -> string\n\
2188	\n\
2189	Return a copy of the string S, where all characters occurring\n\
2190	in the optional argument deletechars are removed, and the\n\
2191	remaining characters have been mapped through the given\n\
2192	translation table, which must be a string of length 256 or None.\n\
2193	If the table argument is None, no translation is applied and\n\
2194	the operation simply removes the characters in deletechars.");
2195
2196	static PyObject *
2197	string_translate(PyStringObject self, PyObject args)
2198	{
2199	register char input, output;
2200	const char *table;
2201	register Py_ssize_t i, c, changed = 0;
2202	PyObject input_obj = (PyObject)self;
2203	const char output_start, del_table=NULL;
2204	Py_ssize_t inlen, tablen, dellen = 0;
2205	PyObject *result;
2206	int trans_table[256];
2207	PyObject tableobj, delobj = NULL;
2208
2209	if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2210	&tableobj, &delobj))
2211	return NULL;
2212
2213	if (PyString_Check(tableobj)) {
2214	table = PyString_AS_STRING(tableobj);
2215	tablen = PyString_GET_SIZE(tableobj);
2216	}
2217	else if (tableobj == Py_None) {
2218	table = NULL;
2219	tablen = 256;
2220	}
2221	#ifdef Py_USING_UNICODE
2222	else if (PyUnicode_Check(tableobj)) {
2223	/* Unicode .translate() does not support the deletechars
2224	parameter; instead a mapping to None will cause characters
2225	to be deleted. */
2226	if (delobj != NULL) {
2227	PyErr_SetString(PyExc_TypeError,
2228	"deletions are implemented differently for unicode");
2229	return NULL;
2230	}
2231	return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2232	}
2233	#endif
2234	else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2235	return NULL;
2236
2237	if (tablen != 256) {
2238	PyErr_SetString(PyExc_ValueError,
2239	"translation table must be 256 characters long");
2240	return NULL;
2241	}
2242
2243	if (delobj != NULL) {
2244	if (PyString_Check(delobj)) {
2245	del_table = PyString_AS_STRING(delobj);
2246	dellen = PyString_GET_SIZE(delobj);
2247	}
2248	#ifdef Py_USING_UNICODE
2249	else if (PyUnicode_Check(delobj)) {
2250	PyErr_SetString(PyExc_TypeError,
2251	"deletions are implemented differently for unicode");
2252	return NULL;
2253	}
2254	#endif
2255	else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2256	return NULL;
2257	}
2258	else {
2259	del_table = NULL;
2260	dellen = 0;
2261	}
2262
2263	inlen = PyString_GET_SIZE(input_obj);
2264	result = PyString_FromStringAndSize((char *)NULL, inlen);
2265	if (result == NULL)
2266	return NULL;
2267	output_start = output = PyString_AsString(result);
2268	input = PyString_AS_STRING(input_obj);
2269
2270	if (dellen == 0 && table != NULL) {
2271	/* If no deletions are required, use faster code */
2272	for (i = inlen; --i >= 0; ) {
2273	c = Py_CHARMASK(*input++);
2274	if (Py_CHARMASK((*output++ = table[c])) != c)
2275	changed = 1;
2276	}
2277	if (changed \|\| !PyString_CheckExact(input_obj))
2278	return result;
2279	Py_DECREF(result);
2280	Py_INCREF(input_obj);
2281	return input_obj;
2282	}
2283
2284	if (table == NULL) {
2285	for (i = 0; i < 256; i++)
2286	trans_table[i] = Py_CHARMASK(i);
2287	} else {
2288	for (i = 0; i < 256; i++)
2289	trans_table[i] = Py_CHARMASK(table[i]);
2290	}
2291
2292	for (i = 0; i < dellen; i++)
2293	trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2294
2295	for (i = inlen; --i >= 0; ) {
2296	c = Py_CHARMASK(*input++);
2297	if (trans_table[c] != -1)
2298	if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2299	continue;
2300	changed = 1;
2301	}
2302	if (!changed && PyString_CheckExact(input_obj)) {
2303	Py_DECREF(result);
2304	Py_INCREF(input_obj);
2305	return input_obj;
2306	}
2307	/* Fix the size of the resulting string */
2308	if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2309	return NULL;
2310	return result;
2311	}
2312
2313
2314	/* find and count characters and substrings */
2315
2316	#define findchar(target, target_len, c) \
2317	((char )memchr((const void )(target), c, target_len))
2318
2319	/* String ops must return a string. */
2320	/* If the object is subclass of string, create a copy */
2321	Py_LOCAL(PyStringObject *)
2322	return_self(PyStringObject *self)
2323	{
2324	if (PyString_CheckExact(self)) {
2325	Py_INCREF(self);
2326	return self;
2327	}
2328	return (PyStringObject *)PyString_FromStringAndSize(
2329	PyString_AS_STRING(self),
2330	PyString_GET_SIZE(self));
2331	}
2332
2333	Py_LOCAL_INLINE(Py_ssize_t)
2334	countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
2335	{
2336	Py_ssize_t count=0;
2337	const char *start=target;
2338	const char *end=target+target_len;
2339
2340	while ( (start=findchar(start, end-start, c)) != NULL ) {
2341	count++;
2342	if (count >= maxcount)
2343	break;
2344	start += 1;
2345	}
2346	return count;
2347	}
2348
2349
2350	/* Algorithms for different cases of string replacement */
2351
2352	/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2353	Py_LOCAL(PyStringObject *)
2354	replace_interleave(PyStringObject *self,
2355	const char *to_s, Py_ssize_t to_len,
2356	Py_ssize_t maxcount)
2357	{
2358	char self_s, result_s;
2359	Py_ssize_t self_len, result_len;
2360	Py_ssize_t count, i, product;
2361	PyStringObject *result;
2362
2363	self_len = PyString_GET_SIZE(self);
2364
2365	/* 1 at the end plus 1 after every character */
2366	count = self_len+1;
2367	if (maxcount < count)
2368	count = maxcount;
2369
2370	/* Check for overflow */
2371	/* result_len = count * to_len + self_len; */
2372	product = count * to_len;
2373	if (product / to_len != count) {
2374	PyErr_SetString(PyExc_OverflowError,
2375	"replace string is too long");
2376	return NULL;
2377	}
2378	result_len = product + self_len;
2379	if (result_len < 0) {
2380	PyErr_SetString(PyExc_OverflowError,
2381	"replace string is too long");
2382	return NULL;
2383	}
2384
2385	if (! (result = (PyStringObject *)
2386	PyString_FromStringAndSize(NULL, result_len)) )
2387	return NULL;
2388
2389	self_s = PyString_AS_STRING(self);
2390	result_s = PyString_AS_STRING(result);
2391
2392	/* TODO: special case single character, which doesn't need memcpy */
2393
2394	/* Lay the first one down (guaranteed this will occur) */
2395	Py_MEMCPY(result_s, to_s, to_len);
2396	result_s += to_len;
2397	count -= 1;
2398
2399	for (i=0; i<count; i++) {
2400	result_s++ = self_s++;
2401	Py_MEMCPY(result_s, to_s, to_len);
2402	result_s += to_len;
2403	}
2404
2405	/* Copy the rest of the original string */
2406	Py_MEMCPY(result_s, self_s, self_len-i);
2407
2408	return result;
2409	}
2410
2411	/* Special case for deleting a single character */
2412	/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2413	Py_LOCAL(PyStringObject *)
2414	replace_delete_single_character(PyStringObject *self,
2415	char from_c, Py_ssize_t maxcount)
2416	{
2417	char self_s, result_s;
2418	char start, next, *end;
2419	Py_ssize_t self_len, result_len;
2420	Py_ssize_t count;
2421	PyStringObject *result;
2422
2423	self_len = PyString_GET_SIZE(self);
2424	self_s = PyString_AS_STRING(self);
2425
2426	count = countchar(self_s, self_len, from_c, maxcount);
2427	if (count == 0) {
2428	return return_self(self);
2429	}
2430
2431	result_len = self_len - count; /* from_len == 1 */
2432	assert(result_len>=0);
2433
2434	if ( (result = (PyStringObject *)
2435	PyString_FromStringAndSize(NULL, result_len)) == NULL)
2436	return NULL;
2437	result_s = PyString_AS_STRING(result);
2438
2439	start = self_s;
2440	end = self_s + self_len;
2441	while (count-- > 0) {
2442	next = findchar(start, end-start, from_c);
2443	if (next == NULL)
2444	break;
2445	Py_MEMCPY(result_s, start, next-start);
2446	result_s += (next-start);
2447	start = next+1;
2448	}
2449	Py_MEMCPY(result_s, start, end-start);
2450
2451	return result;
2452	}
2453
2454	/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2455
2456	Py_LOCAL(PyStringObject *)
2457	replace_delete_substring(PyStringObject *self,
2458	const char *from_s, Py_ssize_t from_len,
2459	Py_ssize_t maxcount) {
2460	char self_s, result_s;
2461	char start, next, *end;
2462	Py_ssize_t self_len, result_len;
2463	Py_ssize_t count, offset;
2464	PyStringObject *result;
2465
2466	self_len = PyString_GET_SIZE(self);
2467	self_s = PyString_AS_STRING(self);
2468
2469	count = stringlib_count(self_s, self_len,
2470	from_s, from_len,
2471	maxcount);
2472
2473	if (count == 0) {
2474	/* no matches */
2475	return return_self(self);
2476	}
2477
2478	result_len = self_len - (count * from_len);
2479	assert (result_len>=0);
2480
2481	if ( (result = (PyStringObject *)
2482	PyString_FromStringAndSize(NULL, result_len)) == NULL )
2483	return NULL;
2484
2485	result_s = PyString_AS_STRING(result);
2486
2487	start = self_s;
2488	end = self_s + self_len;
2489	while (count-- > 0) {
2490	offset = stringlib_find(start, end-start,
2491	from_s, from_len,
2492	0);
2493	if (offset == -1)
2494	break;
2495	next = start + offset;
2496
2497	Py_MEMCPY(result_s, start, next-start);
2498
2499	result_s += (next-start);
2500	start = next+from_len;
2501	}
2502	Py_MEMCPY(result_s, start, end-start);
2503	return result;
2504	}
2505
2506	/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2507	Py_LOCAL(PyStringObject *)
2508	replace_single_character_in_place(PyStringObject *self,
2509	char from_c, char to_c,
2510	Py_ssize_t maxcount)
2511	{
2512	char self_s, result_s, start, end, *next;
2513	Py_ssize_t self_len;
2514	PyStringObject *result;
2515
2516	/* The result string will be the same size */
2517	self_s = PyString_AS_STRING(self);
2518	self_len = PyString_GET_SIZE(self);
2519
2520	next = findchar(self_s, self_len, from_c);
2521
2522	if (next == NULL) {
2523	/* No matches; return the original string */
2524	return return_self(self);
2525	}
2526
2527	/* Need to make a new string */
2528	result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2529	if (result == NULL)
2530	return NULL;
2531	result_s = PyString_AS_STRING(result);
2532	Py_MEMCPY(result_s, self_s, self_len);
2533
2534	/* change everything in-place, starting with this one */
2535	start = result_s + (next-self_s);
2536	*start = to_c;
2537	start++;
2538	end = result_s + self_len;
2539
2540	while (--maxcount > 0) {
2541	next = findchar(start, end-start, from_c);
2542	if (next == NULL)
2543	break;
2544	*next = to_c;
2545	start = next+1;
2546	}
2547
2548	return result;
2549	}
2550
2551	/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2552	Py_LOCAL(PyStringObject *)
2553	replace_substring_in_place(PyStringObject *self,
2554	const char *from_s, Py_ssize_t from_len,
2555	const char *to_s, Py_ssize_t to_len,
2556	Py_ssize_t maxcount)
2557	{
2558	char result_s, start, *end;
2559	char *self_s;
2560	Py_ssize_t self_len, offset;
2561	PyStringObject *result;
2562
2563	/* The result string will be the same size */
2564
2565	self_s = PyString_AS_STRING(self);
2566	self_len = PyString_GET_SIZE(self);
2567
2568	offset = stringlib_find(self_s, self_len,
2569	from_s, from_len,
2570	0);
2571	if (offset == -1) {
2572	/* No matches; return the original string */
2573	return return_self(self);
2574	}
2575
2576	/* Need to make a new string */
2577	result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2578	if (result == NULL)
2579	return NULL;
2580	result_s = PyString_AS_STRING(result);
2581	Py_MEMCPY(result_s, self_s, self_len);
2582
2583	/* change everything in-place, starting with this one */
2584	start = result_s + offset;
2585	Py_MEMCPY(start, to_s, from_len);
2586	start += from_len;
2587	end = result_s + self_len;
2588
2589	while ( --maxcount > 0) {
2590	offset = stringlib_find(start, end-start,
2591	from_s, from_len,
2592	0);
2593	if (offset==-1)
2594	break;
2595	Py_MEMCPY(start+offset, to_s, from_len);
2596	start += offset+from_len;
2597	}
2598
2599	return result;
2600	}
2601
2602	/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2603	Py_LOCAL(PyStringObject *)
2604	replace_single_character(PyStringObject *self,
2605	char from_c,
2606	const char *to_s, Py_ssize_t to_len,
2607	Py_ssize_t maxcount)
2608	{
2609	char self_s, result_s;
2610	char start, next, *end;
2611	Py_ssize_t self_len, result_len;
2612	Py_ssize_t count, product;
2613	PyStringObject *result;
2614
2615	self_s = PyString_AS_STRING(self);
2616	self_len = PyString_GET_SIZE(self);
2617
2618	count = countchar(self_s, self_len, from_c, maxcount);
2619	if (count == 0) {
2620	/* no matches, return unchanged */
2621	return return_self(self);
2622	}
2623
2624	/* use the difference between current and new, hence the "-1" */
2625	/* result_len = self_len + count * (to_len-1) */
2626	product = count * (to_len-1);
2627	if (product / (to_len-1) != count) {
2628	PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2629	return NULL;
2630	}
2631	result_len = self_len + product;
2632	if (result_len < 0) {
2633	PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2634	return NULL;
2635	}
2636
2637	if ( (result = (PyStringObject *)
2638	PyString_FromStringAndSize(NULL, result_len)) == NULL)
2639	return NULL;
2640	result_s = PyString_AS_STRING(result);
2641
2642	start = self_s;
2643	end = self_s + self_len;
2644	while (count-- > 0) {
2645	next = findchar(start, end-start, from_c);
2646	if (next == NULL)
2647	break;
2648
2649	if (next == start) {
2650	/* replace with the 'to' */
2651	Py_MEMCPY(result_s, to_s, to_len);
2652	result_s += to_len;
2653	start += 1;
2654	} else {
2655	/* copy the unchanged old then the 'to' */
2656	Py_MEMCPY(result_s, start, next-start);
2657	result_s += (next-start);
2658	Py_MEMCPY(result_s, to_s, to_len);
2659	result_s += to_len;
2660	start = next+1;
2661	}
2662	}
2663	/* Copy the remainder of the remaining string */
2664	Py_MEMCPY(result_s, start, end-start);
2665
2666	return result;
2667	}
2668
2669	/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2670	Py_LOCAL(PyStringObject *)
2671	replace_substring(PyStringObject *self,
2672	const char *from_s, Py_ssize_t from_len,
2673	const char *to_s, Py_ssize_t to_len,
2674	Py_ssize_t maxcount) {
2675	char self_s, result_s;
2676	char start, next, *end;
2677	Py_ssize_t self_len, result_len;
2678	Py_ssize_t count, offset, product;
2679	PyStringObject *result;
2680
2681	self_s = PyString_AS_STRING(self);
2682	self_len = PyString_GET_SIZE(self);
2683
2684	count = stringlib_count(self_s, self_len,
2685	from_s, from_len,
2686	maxcount);
2687
2688	if (count == 0) {
2689	/* no matches, return unchanged */
2690	return return_self(self);
2691	}
2692
2693	/* Check for overflow */
2694	/* result_len = self_len + count * (to_len-from_len) */
2695	product = count * (to_len-from_len);
2696	if (product / (to_len-from_len) != count) {
2697	PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2698	return NULL;
2699	}
2700	result_len = self_len + product;
2701	if (result_len < 0) {
2702	PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2703	return NULL;
2704	}
2705
2706	if ( (result = (PyStringObject *)
2707	PyString_FromStringAndSize(NULL, result_len)) == NULL)
2708	return NULL;
2709	result_s = PyString_AS_STRING(result);
2710
2711	start = self_s;
2712	end = self_s + self_len;
2713	while (count-- > 0) {
2714	offset = stringlib_find(start, end-start,
2715	from_s, from_len,
2716	0);
2717	if (offset == -1)
2718	break;
2719	next = start+offset;
2720	if (next == start) {
2721	/* replace with the 'to' */
2722	Py_MEMCPY(result_s, to_s, to_len);
2723	result_s += to_len;
2724	start += from_len;
2725	} else {
2726	/* copy the unchanged old then the 'to' */
2727	Py_MEMCPY(result_s, start, next-start);
2728	result_s += (next-start);
2729	Py_MEMCPY(result_s, to_s, to_len);
2730	result_s += to_len;
2731	start = next+from_len;
2732	}
2733	}
2734	/* Copy the remainder of the remaining string */
2735	Py_MEMCPY(result_s, start, end-start);
2736
2737	return result;
2738	}
2739
2740
2741	Py_LOCAL(PyStringObject *)
2742	replace(PyStringObject *self,
2743	const char *from_s, Py_ssize_t from_len,
2744	const char *to_s, Py_ssize_t to_len,
2745	Py_ssize_t maxcount)
2746	{
2747	if (maxcount < 0) {
2748	maxcount = PY_SSIZE_T_MAX;
2749	} else if (maxcount == 0 \|\| PyString_GET_SIZE(self) == 0) {
2750	/* nothing to do; return the original string */
2751	return return_self(self);
2752	}
2753
2754	if (maxcount == 0 \|\|
2755	(from_len == 0 && to_len == 0)) {
2756	/* nothing to do; return the original string */
2757	return return_self(self);
2758	}
2759
2760	/* Handle zero-length special cases */
2761
2762	if (from_len == 0) {
2763	/* insert the 'to' string everywhere. */
2764	/* >>> "Python".replace("", ".") */
2765	/* '.P.y.t.h.o.n.' */
2766	return replace_interleave(self, to_s, to_len, maxcount);
2767	}
2768
2769	/* Except for "".replace("", "A") == "A" there is no way beyond this */
2770	/* point for an empty self string to generate a non-empty string */
2771	/* Special case so the remaining code always gets a non-empty string */
2772	if (PyString_GET_SIZE(self) == 0) {
2773	return return_self(self);
2774	}
2775
2776	if (to_len == 0) {
2777	/* delete all occurances of 'from' string */
2778	if (from_len == 1) {
2779	return replace_delete_single_character(
2780	self, from_s[0], maxcount);
2781	} else {
2782	return replace_delete_substring(self, from_s, from_len, maxcount);
2783	}
2784	}
2785
2786	/* Handle special case where both strings have the same length */
2787
2788	if (from_len == to_len) {
2789	if (from_len == 1) {
2790	return replace_single_character_in_place(
2791	self,
2792	from_s[0],
2793	to_s[0],
2794	maxcount);
2795	} else {
2796	return replace_substring_in_place(
2797	self, from_s, from_len, to_s, to_len, maxcount);
2798	}
2799	}
2800
2801	/* Otherwise use the more generic algorithms */
2802	if (from_len == 1) {
2803	return replace_single_character(self, from_s[0],
2804	to_s, to_len, maxcount);
2805	} else {
2806	/* len('from')>=2, len('to')>=1 */
2807	return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2808	}
2809	}
2810
2811	PyDoc_STRVAR(replace__doc__,
2812	"S.replace(old, new[, count]) -> string\n\
2813	\n\
2814	Return a copy of string S with all occurrences of substring\n\
2815	old replaced by new. If the optional argument count is\n\
2816	given, only the first count occurrences are replaced.");
2817
2818	static PyObject *
2819	string_replace(PyStringObject self, PyObject args)
2820	{
2821	Py_ssize_t count = -1;
2822	PyObject from, to;
2823	const char from_s, to_s;
2824	Py_ssize_t from_len, to_len;
2825
2826	if (!PyArg_ParseTuple(args, "OO\|n:replace", &from, &to, &count))
2827	return NULL;
2828
2829	if (PyString_Check(from)) {
2830	from_s = PyString_AS_STRING(from);
2831	from_len = PyString_GET_SIZE(from);
2832	}
2833	#ifdef Py_USING_UNICODE
2834	if (PyUnicode_Check(from))
2835	return PyUnicode_Replace((PyObject *)self,
2836	from, to, count);
2837	#endif
2838	else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2839	return NULL;
2840
2841	if (PyString_Check(to)) {
2842	to_s = PyString_AS_STRING(to);
2843	to_len = PyString_GET_SIZE(to);
2844	}
2845	#ifdef Py_USING_UNICODE
2846	else if (PyUnicode_Check(to))
2847	return PyUnicode_Replace((PyObject *)self,
2848	from, to, count);
2849	#endif
2850	else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2851	return NULL;
2852
2853	return (PyObject )replace((PyStringObject ) self,
2854	from_s, from_len,
2855	to_s, to_len, count);
2856	}
2857
2858	/ End DALKE /
2859
2860	/* Matches the end (direction >= 0) or start (direction < 0) of self
2861	* against substr, using the start and end arguments. Returns
2862	* -1 on error, 0 if not found and 1 if found.
2863	*/
2864	Py_LOCAL(int)
2865	_string_tailmatch(PyStringObject self, PyObject substr, Py_ssize_t start,
2866	Py_ssize_t end, int direction)
2867	{
2868	Py_ssize_t len = PyString_GET_SIZE(self);
2869	Py_ssize_t slen;
2870	const char* sub;
2871	const char* str;
2872
2873	if (PyString_Check(substr)) {
2874	sub = PyString_AS_STRING(substr);
2875	slen = PyString_GET_SIZE(substr);
2876	}
2877	#ifdef Py_USING_UNICODE
2878	else if (PyUnicode_Check(substr))
2879	return PyUnicode_Tailmatch((PyObject *)self,
2880	substr, start, end, direction);
2881	#endif
2882	else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2883	return -1;
2884	str = PyString_AS_STRING(self);
2885
2886	ADJUST_INDICES(start, end, len);
2887
2888	if (direction < 0) {
2889	/* startswith */
2890	if (start+slen > len)
2891	return 0;
2892	} else {
2893	/* endswith */
2894	if (end-start < slen \|\| start > len)
2895	return 0;
2896
2897	if (end-slen > start)
2898	start = end - slen;
2899	}
2900	if (end-start >= slen)
2901	return ! memcmp(str+start, sub, slen);
2902	return 0;
2903	}
2904
2905
2906	PyDoc_STRVAR(startswith__doc__,
2907	"S.startswith(prefix[, start[, end]]) -> bool\n\
2908	\n\
2909	Return True if S starts with the specified prefix, False otherwise.\n\
2910	With optional start, test S beginning at that position.\n\
2911	With optional end, stop comparing S at that position.\n\
2912	prefix can also be a tuple of strings to try.");
2913
2914	static PyObject *
2915	string_startswith(PyStringObject self, PyObject args)
2916	{
2917	Py_ssize_t start = 0;
2918	Py_ssize_t end = PY_SSIZE_T_MAX;
2919	PyObject *subobj;
2920	int result;
2921
2922	if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
2923	return NULL;
2924	if (PyTuple_Check(subobj)) {
2925	Py_ssize_t i;
2926	for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2927	result = _string_tailmatch(self,
2928	PyTuple_GET_ITEM(subobj, i),
2929	start, end, -1);
2930	if (result == -1)
2931	return NULL;
2932	else if (result) {
2933	Py_RETURN_TRUE;
2934	}
2935	}
2936	Py_RETURN_FALSE;
2937	}
2938	result = _string_tailmatch(self, subobj, start, end, -1);
2939	if (result == -1) {
2940	if (PyErr_ExceptionMatches(PyExc_TypeError))
2941	PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2942	"unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2943	return NULL;
2944	}
2945	else
2946	return PyBool_FromLong(result);
2947	}
2948
2949
2950	PyDoc_STRVAR(endswith__doc__,
2951	"S.endswith(suffix[, start[, end]]) -> bool\n\
2952	\n\
2953	Return True if S ends with the specified suffix, False otherwise.\n\
2954	With optional start, test S beginning at that position.\n\
2955	With optional end, stop comparing S at that position.\n\
2956	suffix can also be a tuple of strings to try.");
2957
2958	static PyObject *
2959	string_endswith(PyStringObject self, PyObject args)
2960	{
2961	Py_ssize_t start = 0;
2962	Py_ssize_t end = PY_SSIZE_T_MAX;
2963	PyObject *subobj;
2964	int result;
2965
2966	if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
2967	return NULL;
2968	if (PyTuple_Check(subobj)) {
2969	Py_ssize_t i;
2970	for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2971	result = _string_tailmatch(self,
2972	PyTuple_GET_ITEM(subobj, i),
2973	start, end, +1);
2974	if (result == -1)
2975	return NULL;
2976	else if (result) {
2977	Py_RETURN_TRUE;
2978	}
2979	}
2980	Py_RETURN_FALSE;
2981	}
2982	result = _string_tailmatch(self, subobj, start, end, +1);
2983	if (result == -1) {
2984	if (PyErr_ExceptionMatches(PyExc_TypeError))
2985	PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2986	"unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2987	return NULL;
2988	}
2989	else
2990	return PyBool_FromLong(result);
2991	}
2992
2993
2994	PyDoc_STRVAR(encode__doc__,
2995	"S.encode([encoding[,errors]]) -> object\n\
2996	\n\
2997	Encodes S using the codec registered for encoding. encoding defaults\n\
2998	to the default encoding. errors may be given to set a different error\n\
2999	handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3000	a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3001	'xmlcharrefreplace' as well as any other name registered with\n\
3002	codecs.register_error that is able to handle UnicodeEncodeErrors.");
3003
3004	static PyObject *
3005	string_encode(PyStringObject self, PyObject args, PyObject *kwargs)
3006	{
3007	static char *kwlist[] = {"encoding", "errors", 0};
3008	char *encoding = NULL;
3009	char *errors = NULL;
3010	PyObject *v;
3011
3012	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "\|ss:encode",
3013	kwlist, &encoding, &errors))
3014	return NULL;
3015	v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3016	if (v == NULL)
3017	goto onError;
3018	if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3019	PyErr_Format(PyExc_TypeError,
3020	"encoder did not return a string/unicode object "
3021	"(type=%.400s)",
3022	Py_TYPE(v)->tp_name);
3023	Py_DECREF(v);
3024	return NULL;
3025	}
3026	return v;
3027
3028	onError:
3029	return NULL;
3030	}
3031
3032
3033	PyDoc_STRVAR(decode__doc__,
3034	"S.decode([encoding[,errors]]) -> object\n\
3035	\n\
3036	Decodes S using the codec registered for encoding. encoding defaults\n\
3037	to the default encoding. errors may be given to set a different error\n\
3038	handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3039	a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3040	as well as any other name registered with codecs.register_error that is\n\
3041	able to handle UnicodeDecodeErrors.");
3042
3043	static PyObject *
3044	string_decode(PyStringObject self, PyObject args, PyObject *kwargs)
3045	{
3046	static char *kwlist[] = {"encoding", "errors", 0};
3047	char *encoding = NULL;
3048	char *errors = NULL;
3049	PyObject *v;
3050
3051	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "\|ss:decode",
3052	kwlist, &encoding, &errors))
3053	return NULL;
3054	v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3055	if (v == NULL)
3056	goto onError;
3057	if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3058	PyErr_Format(PyExc_TypeError,
3059	"decoder did not return a string/unicode object "
3060	"(type=%.400s)",
3061	Py_TYPE(v)->tp_name);
3062	Py_DECREF(v);
3063	return NULL;
3064	}
3065	return v;
3066
3067	onError:
3068	return NULL;
3069	}
3070
3071
3072	PyDoc_STRVAR(expandtabs__doc__,
3073	"S.expandtabs([tabsize]) -> string\n\
3074	\n\
3075	Return a copy of S where all tab characters are expanded using spaces.\n\
3076	If tabsize is not given, a tab size of 8 characters is assumed.");
3077
3078	static PyObject*
3079	string_expandtabs(PyStringObject self, PyObject args)
3080	{
3081	const char e, p, *qe;
3082	char *q;
3083	Py_ssize_t i, j, incr;
3084	PyObject *u;
3085	int tabsize = 8;
3086
3087	if (!PyArg_ParseTuple(args, "\|i:expandtabs", &tabsize))
3088	return NULL;
3089
3090	/* First pass: determine size of output string */
3091	i = 0; /* chars up to and including most recent \n or \r */
3092	j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3093	e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3094	for (p = PyString_AS_STRING(self); p < e; p++)
3095	if (*p == '\t') {
3096	if (tabsize > 0) {
3097	incr = tabsize - (j % tabsize);
3098	if (j > PY_SSIZE_T_MAX - incr)
3099	goto overflow1;
3100	j += incr;
3101	}
3102	}
3103	else {
3104	if (j > PY_SSIZE_T_MAX - 1)
3105	goto overflow1;
3106	j++;
3107	if (p == '\n' \|\| p == '\r') {
3108	if (i > PY_SSIZE_T_MAX - j)
3109	goto overflow1;
3110	i += j;
3111	j = 0;
3112	}
3113	}
3114
3115	if (i > PY_SSIZE_T_MAX - j)
3116	goto overflow1;
3117
3118	/* Second pass: create output string and fill it */
3119	u = PyString_FromStringAndSize(NULL, i + j);
3120	if (!u)
3121	return NULL;
3122
3123	j = 0; /* same as in first pass */
3124	q = PyString_AS_STRING(u); /* next output char */
3125	qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3126
3127	for (p = PyString_AS_STRING(self); p < e; p++)
3128	if (*p == '\t') {
3129	if (tabsize > 0) {
3130	i = tabsize - (j % tabsize);
3131	j += i;
3132	while (i--) {
3133	if (q >= qe)
3134	goto overflow2;
3135	*q++ = ' ';
3136	}
3137	}
3138	}
3139	else {
3140	if (q >= qe)
3141	goto overflow2;
3142	q++ = p;
3143	j++;
3144	if (p == '\n' \|\| p == '\r')
3145	j = 0;
3146	}
3147
3148	return u;
3149
3150	overflow2:
3151	Py_DECREF(u);
3152	overflow1:
3153	PyErr_SetString(PyExc_OverflowError, "new string is too long");
3154	return NULL;
3155	}
3156
3157	Py_LOCAL_INLINE(PyObject *)
3158	pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3159	{
3160	PyObject *u;
3161
3162	if (left < 0)
3163	left = 0;
3164	if (right < 0)
3165	right = 0;
3166
3167	if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3168	Py_INCREF(self);
3169	return (PyObject *)self;
3170	}
3171
3172	u = PyString_FromStringAndSize(NULL,
3173	left + PyString_GET_SIZE(self) + right);
3174	if (u) {
3175	if (left)
3176	memset(PyString_AS_STRING(u), fill, left);
3177	Py_MEMCPY(PyString_AS_STRING(u) + left,
3178	PyString_AS_STRING(self),
3179	PyString_GET_SIZE(self));
3180	if (right)
3181	memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3182	fill, right);
3183	}
3184
3185	return u;
3186	}
3187
3188	PyDoc_STRVAR(ljust__doc__,
3189	"S.ljust(width[, fillchar]) -> string\n"
3190	"\n"
3191	"Return S left-justified in a string of length width. Padding is\n"
3192	"done using the specified fill character (default is a space).");
3193
3194	static PyObject *
3195	string_ljust(PyStringObject self, PyObject args)
3196	{
3197	Py_ssize_t width;
3198	char fillchar = ' ';
3199
3200	if (!PyArg_ParseTuple(args, "n\|c:ljust", &width, &fillchar))
3201	return NULL;
3202
3203	if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3204	Py_INCREF(self);
3205	return (PyObject*) self;
3206	}
3207
3208	return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3209	}
3210
3211
3212	PyDoc_STRVAR(rjust__doc__,
3213	"S.rjust(width[, fillchar]) -> string\n"
3214	"\n"
3215	"Return S right-justified in a string of length width. Padding is\n"
3216	"done using the specified fill character (default is a space)");
3217
3218	static PyObject *
3219	string_rjust(PyStringObject self, PyObject args)
3220	{
3221	Py_ssize_t width;
3222	char fillchar = ' ';
3223
3224	if (!PyArg_ParseTuple(args, "n\|c:rjust", &width, &fillchar))
3225	return NULL;
3226
3227	if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3228	Py_INCREF(self);
3229	return (PyObject*) self;
3230	}
3231
3232	return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3233	}
3234
3235
3236	PyDoc_STRVAR(center__doc__,
3237	"S.center(width[, fillchar]) -> string\n"
3238	"\n"
3239	"Return S centered in a string of length width. Padding is\n"
3240	"done using the specified fill character (default is a space)");
3241
3242	static PyObject *
3243	string_center(PyStringObject self, PyObject args)
3244	{
3245	Py_ssize_t marg, left;
3246	Py_ssize_t width;
3247	char fillchar = ' ';
3248
3249	if (!PyArg_ParseTuple(args, "n\|c:center", &width, &fillchar))
3250	return NULL;
3251
3252	if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3253	Py_INCREF(self);
3254	return (PyObject*) self;
3255	}
3256
3257	marg = width - PyString_GET_SIZE(self);
3258	left = marg / 2 + (marg & width & 1);
3259
3260	return pad(self, left, marg - left, fillchar);
3261	}
3262
3263	PyDoc_STRVAR(zfill__doc__,
3264	"S.zfill(width) -> string\n"
3265	"\n"
3266	"Pad a numeric string S with zeros on the left, to fill a field\n"
3267	"of the specified width. The string S is never truncated.");
3268
3269	static PyObject *
3270	string_zfill(PyStringObject self, PyObject args)
3271	{
3272	Py_ssize_t fill;
3273	PyObject *s;
3274	char *p;
3275	Py_ssize_t width;
3276
3277	if (!PyArg_ParseTuple(args, "n:zfill", &width))
3278	return NULL;
3279
3280	if (PyString_GET_SIZE(self) >= width) {
3281	if (PyString_CheckExact(self)) {
3282	Py_INCREF(self);
3283	return (PyObject*) self;
3284	}
3285	else
3286	return PyString_FromStringAndSize(
3287	PyString_AS_STRING(self),
3288	PyString_GET_SIZE(self)
3289	);
3290	}
3291
3292	fill = width - PyString_GET_SIZE(self);
3293
3294	s = pad(self, fill, 0, '0');
3295
3296	if (s == NULL)
3297	return NULL;
3298
3299	p = PyString_AS_STRING(s);
3300	if (p[fill] == '+' \|\| p[fill] == '-') {
3301	/* move sign to beginning of string */
3302	p[0] = p[fill];
3303	p[fill] = '0';
3304	}
3305
3306	return (PyObject*) s;
3307	}
3308
3309	PyDoc_STRVAR(isspace__doc__,
3310	"S.isspace() -> bool\n\
3311	\n\
3312	Return True if all characters in S are whitespace\n\
3313	and there is at least one character in S, False otherwise.");
3314
3315	static PyObject*
3316	string_isspace(PyStringObject *self)
3317	{
3318	register const unsigned char *p
3319	= (unsigned char *) PyString_AS_STRING(self);
3320	register const unsigned char *e;
3321
3322	/* Shortcut for single character strings */
3323	if (PyString_GET_SIZE(self) == 1 &&
3324	isspace(*p))
3325	return PyBool_FromLong(1);
3326
3327	/* Special case for empty strings */
3328	if (PyString_GET_SIZE(self) == 0)
3329	return PyBool_FromLong(0);
3330
3331	e = p + PyString_GET_SIZE(self);
3332	for (; p < e; p++) {
3333	if (!isspace(*p))
3334	return PyBool_FromLong(0);
3335	}
3336	return PyBool_FromLong(1);
3337	}
3338
3339
3340	PyDoc_STRVAR(isalpha__doc__,
3341	"S.isalpha() -> bool\n\
3342	\n\
3343	Return True if all characters in S are alphabetic\n\
3344	and there is at least one character in S, False otherwise.");
3345
3346	static PyObject*
3347	string_isalpha(PyStringObject *self)
3348	{
3349	register const unsigned char *p
3350	= (unsigned char *) PyString_AS_STRING(self);
3351	register const unsigned char *e;
3352
3353	/* Shortcut for single character strings */
3354	if (PyString_GET_SIZE(self) == 1 &&
3355	isalpha(*p))
3356	return PyBool_FromLong(1);
3357
3358	/* Special case for empty strings */
3359	if (PyString_GET_SIZE(self) == 0)
3360	return PyBool_FromLong(0);
3361
3362	e = p + PyString_GET_SIZE(self);
3363	for (; p < e; p++) {
3364	if (!isalpha(*p))
3365	return PyBool_FromLong(0);
3366	}
3367	return PyBool_FromLong(1);
3368	}
3369
3370
3371	PyDoc_STRVAR(isalnum__doc__,
3372	"S.isalnum() -> bool\n\
3373	\n\
3374	Return True if all characters in S are alphanumeric\n\
3375	and there is at least one character in S, False otherwise.");
3376
3377	static PyObject*
3378	string_isalnum(PyStringObject *self)
3379	{
3380	register const unsigned char *p
3381	= (unsigned char *) PyString_AS_STRING(self);
3382	register const unsigned char *e;
3383
3384	/* Shortcut for single character strings */
3385	if (PyString_GET_SIZE(self) == 1 &&
3386	isalnum(*p))
3387	return PyBool_FromLong(1);
3388
3389	/* Special case for empty strings */
3390	if (PyString_GET_SIZE(self) == 0)
3391	return PyBool_FromLong(0);
3392
3393	e = p + PyString_GET_SIZE(self);
3394	for (; p < e; p++) {
3395	if (!isalnum(*p))
3396	return PyBool_FromLong(0);
3397	}
3398	return PyBool_FromLong(1);
3399	}
3400
3401
3402	PyDoc_STRVAR(isdigit__doc__,
3403	"S.isdigit() -> bool\n\
3404	\n\
3405	Return True if all characters in S are digits\n\
3406	and there is at least one character in S, False otherwise.");
3407
3408	static PyObject*
3409	string_isdigit(PyStringObject *self)
3410	{
3411	register const unsigned char *p
3412	= (unsigned char *) PyString_AS_STRING(self);
3413	register const unsigned char *e;
3414
3415	/* Shortcut for single character strings */
3416	if (PyString_GET_SIZE(self) == 1 &&
3417	isdigit(*p))
3418	return PyBool_FromLong(1);
3419
3420	/* Special case for empty strings */
3421	if (PyString_GET_SIZE(self) == 0)
3422	return PyBool_FromLong(0);
3423
3424	e = p + PyString_GET_SIZE(self);
3425	for (; p < e; p++) {
3426	if (!isdigit(*p))
3427	return PyBool_FromLong(0);
3428	}
3429	return PyBool_FromLong(1);
3430	}
3431
3432
3433	PyDoc_STRVAR(islower__doc__,
3434	"S.islower() -> bool\n\
3435	\n\
3436	Return True if all cased characters in S are lowercase and there is\n\
3437	at least one cased character in S, False otherwise.");
3438
3439	static PyObject*
3440	string_islower(PyStringObject *self)
3441	{
3442	register const unsigned char *p
3443	= (unsigned char *) PyString_AS_STRING(self);
3444	register const unsigned char *e;
3445	int cased;
3446
3447	/* Shortcut for single character strings */
3448	if (PyString_GET_SIZE(self) == 1)
3449	return PyBool_FromLong(islower(*p) != 0);
3450
3451	/* Special case for empty strings */
3452	if (PyString_GET_SIZE(self) == 0)
3453	return PyBool_FromLong(0);
3454
3455	e = p + PyString_GET_SIZE(self);
3456	cased = 0;
3457	for (; p < e; p++) {
3458	if (isupper(*p))
3459	return PyBool_FromLong(0);
3460	else if (!cased && islower(*p))
3461	cased = 1;
3462	}
3463	return PyBool_FromLong(cased);
3464	}
3465
3466
3467	PyDoc_STRVAR(isupper__doc__,
3468	"S.isupper() -> bool\n\
3469	\n\
3470	Return True if all cased characters in S are uppercase and there is\n\
3471	at least one cased character in S, False otherwise.");
3472
3473	static PyObject*
3474	string_isupper(PyStringObject *self)
3475	{
3476	register const unsigned char *p
3477	= (unsigned char *) PyString_AS_STRING(self);
3478	register const unsigned char *e;
3479	int cased;
3480
3481	/* Shortcut for single character strings */
3482	if (PyString_GET_SIZE(self) == 1)
3483	return PyBool_FromLong(isupper(*p) != 0);
3484
3485	/* Special case for empty strings */
3486	if (PyString_GET_SIZE(self) == 0)
3487	return PyBool_FromLong(0);
3488
3489	e = p + PyString_GET_SIZE(self);
3490	cased = 0;
3491	for (; p < e; p++) {
3492	if (islower(*p))
3493	return PyBool_FromLong(0);
3494	else if (!cased && isupper(*p))
3495	cased = 1;
3496	}
3497	return PyBool_FromLong(cased);
3498	}
3499
3500
3501	PyDoc_STRVAR(istitle__doc__,
3502	"S.istitle() -> bool\n\
3503	\n\
3504	Return True if S is a titlecased string and there is at least one\n\
3505	character in S, i.e. uppercase characters may only follow uncased\n\
3506	characters and lowercase characters only cased ones. Return False\n\
3507	otherwise.");
3508
3509	static PyObject*
3510	string_istitle(PyStringObject self, PyObject uncased)
3511	{
3512	register const unsigned char *p
3513	= (unsigned char *) PyString_AS_STRING(self);
3514	register const unsigned char *e;
3515	int cased, previous_is_cased;
3516
3517	/* Shortcut for single character strings */
3518	if (PyString_GET_SIZE(self) == 1)
3519	return PyBool_FromLong(isupper(*p) != 0);
3520
3521	/* Special case for empty strings */
3522	if (PyString_GET_SIZE(self) == 0)
3523	return PyBool_FromLong(0);
3524
3525	e = p + PyString_GET_SIZE(self);
3526	cased = 0;
3527	previous_is_cased = 0;
3528	for (; p < e; p++) {
3529	register const unsigned char ch = *p;
3530
3531	if (isupper(ch)) {
3532	if (previous_is_cased)
3533	return PyBool_FromLong(0);
3534	previous_is_cased = 1;
3535	cased = 1;
3536	}
3537	else if (islower(ch)) {
3538	if (!previous_is_cased)
3539	return PyBool_FromLong(0);
3540	previous_is_cased = 1;
3541	cased = 1;
3542	}
3543	else
3544	previous_is_cased = 0;
3545	}
3546	return PyBool_FromLong(cased);
3547	}
3548
3549
3550	PyDoc_STRVAR(splitlines__doc__,
3551	"S.splitlines(keepends=False) -> list of strings\n\
3552	\n\
3553	Return a list of the lines in S, breaking at line boundaries.\n\
3554	Line breaks are not included in the resulting list unless keepends\n\
3555	is given and true.");
3556
3557	static PyObject*
3558	string_splitlines(PyStringObject self, PyObject args)
3559	{
3560	int keepends = 0;
3561
3562	if (!PyArg_ParseTuple(args, "\|i:splitlines", &keepends))
3563	return NULL;
3564
3565	return stringlib_splitlines(
3566	(PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3567	keepends
3568	);
3569	}
3570
3571	PyDoc_STRVAR(sizeof__doc__,
3572	"S.__sizeof__() -> size of S in memory, in bytes");
3573
3574	static PyObject *
3575	string_sizeof(PyStringObject *v)
3576	{
3577	Py_ssize_t res;
3578	res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3579	return PyInt_FromSsize_t(res);
3580	}
3581
3582	static PyObject *
3583	string_getnewargs(PyStringObject *v)
3584	{
3585	return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3586	}
3587
3588
3589	#include "stringlib/string_format.h"
3590
3591	PyDoc_STRVAR(format__doc__,
3592	"S.format(args, *kwargs) -> string\n\
3593	\n\
3594	Return a formatted version of S, using substitutions from args and kwargs.\n\
3595	The substitutions are identified by braces ('{' and '}').");
3596
3597	static PyObject *
3598	string__format__(PyObject* self, PyObject* args)
3599	{
3600	PyObject *format_spec;
3601	PyObject *result = NULL;
3602	PyObject *tmp = NULL;
3603
3604	/* If 2.x, convert format_spec to the same type as value */
3605	/* This is to allow things like u''.format('') */
3606	if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3607	goto done;
3608	if (!(PyString_Check(format_spec) \|\| PyUnicode_Check(format_spec))) {
3609	PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3610	"or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3611	goto done;
3612	}
3613	tmp = PyObject_Str(format_spec);
3614	if (tmp == NULL)
3615	goto done;
3616	format_spec = tmp;
3617
3618	result = _PyBytes_FormatAdvanced(self,
3619	PyString_AS_STRING(format_spec),
3620	PyString_GET_SIZE(format_spec));
3621	done:
3622	Py_XDECREF(tmp);
3623	return result;
3624	}
3625
3626	PyDoc_STRVAR(p_format__doc__,
3627	"S.__format__(format_spec) -> string\n\
3628	\n\
3629	Return a formatted version of S as described by format_spec.");
3630
3631
3632	static PyMethodDef
3633	string_methods[] = {
3634	/* Counterparts of the obsolete stropmodule functions; except
3635	string.maketrans(). */
3636	{"join", (PyCFunction)string_join, METH_O, join__doc__},
3637	{"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3638	{"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3639	{"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3640	{"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3641	{"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3642	{"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3643	{"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3644	{"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3645	{"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3646	{"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3647	{"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3648	{"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3649	capitalize__doc__},
3650	{"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3651	{"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3652	endswith__doc__},
3653	{"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3654	{"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3655	{"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3656	{"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3657	{"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3658	{"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3659	{"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3660	{"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3661	{"rpartition", (PyCFunction)string_rpartition, METH_O,
3662	rpartition__doc__},
3663	{"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3664	startswith__doc__},
3665	{"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3666	{"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3667	swapcase__doc__},
3668	{"translate", (PyCFunction)string_translate, METH_VARARGS,
3669	translate__doc__},
3670	{"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3671	{"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3672	{"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3673	{"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3674	{"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3675	{"format", (PyCFunction) do_string_format, METH_VARARGS \| METH_KEYWORDS, format__doc__},
3676	{"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3677	{"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3678	{"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3679	{"encode", (PyCFunction)string_encode, METH_VARARGS \| METH_KEYWORDS, encode__doc__},
3680	{"decode", (PyCFunction)string_decode, METH_VARARGS \| METH_KEYWORDS, decode__doc__},
3681	{"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3682	expandtabs__doc__},
3683	{"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3684	splitlines__doc__},
3685	{"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3686	sizeof__doc__},
3687	{"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3688	{NULL, NULL} /* sentinel */
3689	};
3690
3691	static PyObject *
3692	str_subtype_new(PyTypeObject type, PyObject args, PyObject *kwds);
3693
3694	static PyObject *
3695	string_new(PyTypeObject type, PyObject args, PyObject *kwds)
3696	{
3697	PyObject *x = NULL;
3698	static char *kwlist[] = {"object", 0};
3699
3700	if (type != &PyString_Type)
3701	return str_subtype_new(type, args, kwds);
3702	if (!PyArg_ParseTupleAndKeywords(args, kwds, "\|O:str", kwlist, &x))
3703	return NULL;
3704	if (x == NULL)
3705	return PyString_FromString("");
3706	return PyObject_Str(x);
3707	}
3708
3709	static PyObject *
3710	str_subtype_new(PyTypeObject type, PyObject args, PyObject *kwds)
3711	{
3712	PyObject tmp, pnew;
3713	Py_ssize_t n;
3714
3715	assert(PyType_IsSubtype(type, &PyString_Type));
3716	tmp = string_new(&PyString_Type, args, kwds);
3717	if (tmp == NULL)
3718	return NULL;
3719	assert(PyString_CheckExact(tmp));
3720	n = PyString_GET_SIZE(tmp);
3721	pnew = type->tp_alloc(type, n);
3722	if (pnew != NULL) {
3723	Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3724	((PyStringObject *)pnew)->ob_shash =
3725	((PyStringObject *)tmp)->ob_shash;
3726	((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3727	}
3728	Py_DECREF(tmp);
3729	return pnew;
3730	}
3731
3732	static PyObject *
3733	basestring_new(PyTypeObject type, PyObject args, PyObject *kwds)
3734	{
3735	PyErr_SetString(PyExc_TypeError,
3736	"The basestring type cannot be instantiated");
3737	return NULL;
3738	}
3739
3740	static PyObject *
3741	string_mod(PyObject v, PyObject w)
3742	{
3743	if (!PyString_Check(v)) {
3744	Py_INCREF(Py_NotImplemented);
3745	return Py_NotImplemented;
3746	}
3747	return PyString_Format(v, w);
3748	}
3749
3750	PyDoc_STRVAR(basestring_doc,
3751	"Type basestring cannot be instantiated; it is the base for str and unicode.");
3752
3753	static PyNumberMethods string_as_number = {
3754	0, /nb_add/
3755	0, /nb_subtract/
3756	0, /nb_multiply/
3757	0, /nb_divide/
3758	string_mod, /nb_remainder/
3759	};
3760
3761
3762	PyTypeObject PyBaseString_Type = {
3763	PyVarObject_HEAD_INIT(&PyType_Type, 0)
3764	"basestring",
3765	0,
3766	0,
3767	0, /* tp_dealloc */
3768	0, /* tp_print */
3769	0, /* tp_getattr */
3770	0, /* tp_setattr */
3771	0, /* tp_compare */
3772	0, /* tp_repr */
3773	0, /* tp_as_number */
3774	0, /* tp_as_sequence */
3775	0, /* tp_as_mapping */
3776	0, /* tp_hash */
3777	0, /* tp_call */
3778	0, /* tp_str */
3779	0, /* tp_getattro */
3780	0, /* tp_setattro */
3781	0, /* tp_as_buffer */
3782	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_BASETYPE, /* tp_flags */
3783	basestring_doc, /* tp_doc */
3784	0, /* tp_traverse */
3785	0, /* tp_clear */
3786	0, /* tp_richcompare */
3787	0, /* tp_weaklistoffset */
3788	0, /* tp_iter */
3789	0, /* tp_iternext */
3790	0, /* tp_methods */
3791	0, /* tp_members */
3792	0, /* tp_getset */
3793	&PyBaseObject_Type, /* tp_base */
3794	0, /* tp_dict */
3795	0, /* tp_descr_get */
3796	0, /* tp_descr_set */
3797	0, /* tp_dictoffset */
3798	0, /* tp_init */
3799	0, /* tp_alloc */
3800	basestring_new, /* tp_new */
3801	0, /* tp_free */
3802	};
3803
3804	PyDoc_STRVAR(string_doc,
3805	"str(object='') -> string\n\
3806	\n\
3807	Return a nice string representation of the object.\n\
3808	If the argument is a string, the return value is the same object.");
3809
3810	PyTypeObject PyString_Type = {
3811	PyVarObject_HEAD_INIT(&PyType_Type, 0)
3812	"str",
3813	PyStringObject_SIZE,
3814	sizeof(char),
3815	string_dealloc, /* tp_dealloc */
3816	(printfunc)string_print, /* tp_print */
3817	0, /* tp_getattr */
3818	0, /* tp_setattr */
3819	0, /* tp_compare */
3820	string_repr, /* tp_repr */
3821	&string_as_number, /* tp_as_number */
3822	&string_as_sequence, /* tp_as_sequence */
3823	&string_as_mapping, /* tp_as_mapping */
3824	(hashfunc)string_hash, /* tp_hash */
3825	0, /* tp_call */
3826	string_str, /* tp_str */
3827	PyObject_GenericGetAttr, /* tp_getattro */
3828	0, /* tp_setattro */
3829	&string_as_buffer, /* tp_as_buffer */
3830	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_CHECKTYPES \|
3831	Py_TPFLAGS_BASETYPE \| Py_TPFLAGS_STRING_SUBCLASS \|
3832	Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */
3833	string_doc, /* tp_doc */
3834	0, /* tp_traverse */
3835	0, /* tp_clear */
3836	(richcmpfunc)string_richcompare, /* tp_richcompare */
3837	0, /* tp_weaklistoffset */
3838	0, /* tp_iter */
3839	0, /* tp_iternext */
3840	string_methods, /* tp_methods */
3841	0, /* tp_members */
3842	0, /* tp_getset */
3843	&PyBaseString_Type, /* tp_base */
3844	0, /* tp_dict */
3845	0, /* tp_descr_get */
3846	0, /* tp_descr_set */
3847	0, /* tp_dictoffset */
3848	0, /* tp_init */
3849	0, /* tp_alloc */
3850	string_new, /* tp_new */
3851	PyObject_Del, /* tp_free */
3852	};
3853
3854	void
3855	PyString_Concat(register PyObject *pv, register PyObject w)
3856	{
3857	register PyObject *v;
3858	if (*pv == NULL)
3859	return;
3860	if (w == NULL \|\| !PyString_Check(*pv)) {
3861	Py_CLEAR(*pv);
3862	return;
3863	}
3864	v = string_concat((PyStringObject ) pv, w);
3865	Py_DECREF(*pv);
3866	*pv = v;
3867	}
3868
3869	void
3870	PyString_ConcatAndDel(register PyObject *pv, register PyObject w)
3871	{
3872	PyString_Concat(pv, w);
3873	Py_XDECREF(w);
3874	}
3875
3876
3877	/* The following function breaks the notion that strings are immutable:
3878	it changes the size of a string. We get away with this only if there
3879	is only one module referencing the object. You can also think of it
3880	as creating a new string object and destroying the old one, only
3881	more efficiently. In any case, don't use this if the string may
3882	already be known to some other part of the code...
3883	Note that if there's not enough memory to resize the string, the original
3884	string object at pv is deallocated, pv is set to NULL, an "out of
3885	memory" exception is set, and -1 is returned. Else (on success) 0 is
3886	returned, and the value in *pv may or may not be the same as on input.
3887	As always, an extra byte is allocated for a trailing \0 byte (newsize
3888	does not include that), and a trailing \0 byte is stored.
3889	*/
3890
3891	int
3892	_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
3893	{
3894	register PyObject *v;
3895	register PyStringObject *sv;
3896	v = *pv;
3897	if (!PyString_Check(v) \|\| Py_REFCNT(v) != 1 \|\| newsize < 0 \|\|
3898	PyString_CHECK_INTERNED(v)) {
3899	*pv = 0;
3900	Py_DECREF(v);
3901	PyErr_BadInternalCall();
3902	return -1;
3903	}
3904	/* XXX UNREF/NEWREF interface should be more symmetrical */
3905	_Py_DEC_REFTOTAL;
3906	_Py_ForgetReference(v);
3907	pv = (PyObject )
3908	PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3909	if (*pv == NULL) {
3910	PyObject_Del(v);
3911	PyErr_NoMemory();
3912	return -1;
3913	}
3914	_Py_NewReference(*pv);
3915	sv = (PyStringObject ) pv;
3916	Py_SIZE(sv) = newsize;
3917	sv->ob_sval[newsize] = '\0';
3918	sv->ob_shash = -1; /* invalidate cached hash value */
3919	return 0;
3920	}
3921
3922	/* Helpers for formatstring */
3923
3924	Py_LOCAL_INLINE(PyObject *)
3925	getnextarg(PyObject args, Py_ssize_t arglen, Py_ssize_t p_argidx)
3926	{
3927	Py_ssize_t argidx = *p_argidx;
3928	if (argidx < arglen) {
3929	(*p_argidx)++;
3930	if (arglen < 0)
3931	return args;
3932	else
3933	return PyTuple_GetItem(args, argidx);
3934	}
3935	PyErr_SetString(PyExc_TypeError,
3936	"not enough arguments for format string");
3937	return NULL;
3938	}
3939
3940	/* Format codes
3941	* F_LJUST '-'
3942	* F_SIGN '+'
3943	* F_BLANK ' '
3944	* F_ALT '#'
3945	* F_ZERO '0'
3946	*/
3947	#define F_LJUST (1<<0)
3948	#define F_SIGN (1<<1)
3949	#define F_BLANK (1<<2)
3950	#define F_ALT (1<<3)
3951	#define F_ZERO (1<<4)
3952
3953	/* Returns a new reference to a PyString object, or NULL on failure. */
3954
3955	static PyObject *
3956	formatfloat(PyObject *v, int flags, int prec, int type)
3957	{
3958	char *p;
3959	PyObject *result;
3960	double x;
3961
3962	x = PyFloat_AsDouble(v);
3963	if (x == -1.0 && PyErr_Occurred()) {
3964	PyErr_Format(PyExc_TypeError, "float argument required, "
3965	"not %.200s", Py_TYPE(v)->tp_name);
3966	return NULL;
3967	}
3968
3969	if (prec < 0)
3970	prec = 6;
3971
3972	p = PyOS_double_to_string(x, type, prec,
3973	(flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
3974
3975	if (p == NULL)
3976	return NULL;
3977	result = PyString_FromStringAndSize(p, strlen(p));
3978	PyMem_Free(p);
3979	return result;
3980	}
3981
3982	/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3983	* the F_ALT flag, for Python's long (unbounded) ints. It's not used for
3984	* Python's regular ints.
3985	* Return value: a new PyString*, or NULL if error.
3986	* . *pbuf is set to point into it,
3987	* *plen set to the # of chars following that.
3988	* Caller must decref it when done using pbuf.
3989	* The string starting at *pbuf is of the form
3990	* "-"? ("0x" \| "0X")? digit+
3991	* "0x"/"0X" are present only for x and X conversions, with F_ALT
3992	* set in flags. The case of hex digits will be correct,
3993	* There will be at least prec digits, zero-filled on the left if
3994	* necessary to get that many.
3995	* val object to be converted
3996	* flags bitmask of format flags; only F_ALT is looked at
3997	* prec minimum number of digits; 0-fill on left if needed
3998	* type a character in [duoxX]; u acts the same as d
3999	*
4000	* CAUTION: o, x and X conversions on regular ints can never
4001	* produce a '-' sign, but can for Python's unbounded ints.
4002	*/
4003	PyObject*
4004	_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4005	char *pbuf, int plen)
4006	{
4007	PyObject *result = NULL;
4008	char *buf;
4009	Py_ssize_t i;
4010	int sign; /* 1 if '-', else 0 */
4011	int len; /* number of characters */
4012	Py_ssize_t llen;
4013	int numdigits; /* len == numnondigits + numdigits */
4014	int numnondigits = 0;
4015
4016	switch (type) {
4017	case 'd':
4018	case 'u':
4019	result = Py_TYPE(val)->tp_str(val);
4020	break;
4021	case 'o':
4022	result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4023	break;
4024	case 'x':
4025	case 'X':
4026	numnondigits = 2;
4027	result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4028	break;
4029	default:
4030	assert(!"'type' not in [duoxX]");
4031	}
4032	if (!result)
4033	return NULL;
4034
4035	buf = PyString_AsString(result);
4036	if (!buf) {
4037	Py_DECREF(result);
4038	return NULL;
4039	}
4040
4041	/* To modify the string in-place, there can only be one reference. */
4042	if (Py_REFCNT(result) != 1) {
4043	PyErr_BadInternalCall();
4044	return NULL;
4045	}
4046	llen = PyString_Size(result);
4047	if (llen > INT_MAX) {
4048	PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4049	return NULL;
4050	}
4051	len = (int)llen;
4052	if (buf[len-1] == 'L') {
4053	--len;
4054	buf[len] = '\0';
4055	}
4056	sign = buf[0] == '-';
4057	numnondigits += sign;
4058	numdigits = len - numnondigits;
4059	assert(numdigits > 0);
4060
4061	/* Get rid of base marker unless F_ALT */
4062	if ((flags & F_ALT) == 0) {
4063	/* Need to skip 0x, 0X or 0. */
4064	int skipped = 0;
4065	switch (type) {
4066	case 'o':
4067	assert(buf[sign] == '0');
4068	/* If 0 is only digit, leave it alone. */
4069	if (numdigits > 1) {
4070	skipped = 1;
4071	--numdigits;
4072	}
4073	break;
4074	case 'x':
4075	case 'X':
4076	assert(buf[sign] == '0');
4077	assert(buf[sign + 1] == 'x');
4078	skipped = 2;
4079	numnondigits -= 2;
4080	break;
4081	}
4082	if (skipped) {
4083	buf += skipped;
4084	len -= skipped;
4085	if (sign)
4086	buf[0] = '-';
4087	}
4088	assert(len == numnondigits + numdigits);
4089	assert(numdigits > 0);
4090	}
4091
4092	/* Fill with leading zeroes to meet minimum width. */
4093	if (prec > numdigits) {
4094	PyObject *r1 = PyString_FromStringAndSize(NULL,
4095	numnondigits + prec);
4096	char *b1;
4097	if (!r1) {
4098	Py_DECREF(result);
4099	return NULL;
4100	}
4101	b1 = PyString_AS_STRING(r1);
4102	for (i = 0; i < numnondigits; ++i)
4103	b1++ = buf++;
4104	for (i = 0; i < prec - numdigits; i++)
4105	*b1++ = '0';
4106	for (i = 0; i < numdigits; i++)
4107	b1++ = buf++;
4108	*b1 = '\0';
4109	Py_DECREF(result);
4110	result = r1;
4111	buf = PyString_AS_STRING(result);
4112	len = numnondigits + prec;
4113	}
4114
4115	/* Fix up case for hex conversions. */
4116	if (type == 'X') {
4117	/* Need to convert all lower case letters to upper case.
4118	and need to convert 0x to 0X (and -0x to -0X). */
4119	for (i = 0; i < len; i++)
4120	if (buf[i] >= 'a' && buf[i] <= 'x')
4121	buf[i] -= 'a'-'A';
4122	}
4123	*pbuf = buf;
4124	*plen = len;
4125	return result;
4126	}
4127
4128	Py_LOCAL_INLINE(int)
4129	formatint(char *buf, size_t buflen, int flags,
4130	int prec, int type, PyObject *v)
4131	{
4132	/* fmt = '%#.' + `prec` + 'l' + `type`
4133	worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4134	+ 1 + 1 = 24 */
4135	char fmt[64]; /* plenty big enough! */
4136	char *sign;
4137	long x;
4138
4139	x = PyInt_AsLong(v);
4140	if (x == -1 && PyErr_Occurred()) {
4141	PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4142	Py_TYPE(v)->tp_name);
4143	return -1;
4144	}
4145	if (x < 0 && type == 'u') {
4146	type = 'd';
4147	}
4148	if (x < 0 && (type == 'x' \|\| type == 'X' \|\| type == 'o'))
4149	sign = "-";
4150	else
4151	sign = "";
4152	if (prec < 0)
4153	prec = 1;
4154
4155	if ((flags & F_ALT) &&
4156	(type == 'x' \|\| type == 'X')) {
4157	/* When converting under %#x or %#X, there are a number
4158	* of issues that cause pain:
4159	* - when 0 is being converted, the C standard leaves off
4160	* the '0x' or '0X', which is inconsistent with other
4161	* %#x/%#X conversions and inconsistent with Python's
4162	* hex() function
4163	* - there are platforms that violate the standard and
4164	* convert 0 with the '0x' or '0X'
4165	* (Metrowerks, Compaq Tru64)
4166	* - there are platforms that give '0x' when converting
4167	* under %#X, but convert 0 in accordance with the
4168	* standard (OS/2 EMX)
4169	*
4170	* We can achieve the desired consistency by inserting our
4171	* own '0x' or '0X' prefix, and substituting %x/%X in place
4172	* of %#x/%#X.
4173	*
4174	* Note that this is the same approach as used in
4175	* formatint() in unicodeobject.c
4176	*/
4177	PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4178	sign, type, prec, type);
4179	}
4180	else {
4181	PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4182	sign, (flags&F_ALT) ? "#" : "",
4183	prec, type);
4184	}
4185
4186	/* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4187	* worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4188	*/
4189	if (buflen <= 14 \|\| buflen <= (size_t)3 + (size_t)prec) {
4190	PyErr_SetString(PyExc_OverflowError,
4191	"formatted integer is too long (precision too large?)");
4192	return -1;
4193	}
4194	if (sign[0])
4195	PyOS_snprintf(buf, buflen, fmt, -x);
4196	else
4197	PyOS_snprintf(buf, buflen, fmt, x);
4198	return (int)strlen(buf);
4199	}
4200
4201	Py_LOCAL_INLINE(int)
4202	formatchar(char buf, size_t buflen, PyObject v)
4203	{
4204	/* presume that the buffer is at least 2 characters long */
4205	if (PyString_Check(v)) {
4206	if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4207	return -1;
4208	}
4209	else {
4210	if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4211	return -1;
4212	}
4213	buf[1] = '\0';
4214	return 1;
4215	}
4216
4217	/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4218
4219	FORMATBUFLEN is the length of the buffer in which the ints &
4220	chars are formatted. XXX This is a magic number. Each formatting
4221	routine does bounds checking to ensure no overflow, but a better
4222	solution may be to malloc a buffer of appropriate size for each
4223	format. For now, the current solution is sufficient.
4224	*/
4225	#define FORMATBUFLEN (size_t)120
4226
4227	PyObject *
4228	PyString_Format(PyObject format, PyObject args)
4229	{
4230	char fmt, res;
4231	Py_ssize_t arglen, argidx;
4232	Py_ssize_t reslen, rescnt, fmtcnt;
4233	int args_owned = 0;
4234	PyObject result, orig_args;
4235	#ifdef Py_USING_UNICODE
4236	PyObject v, w;
4237	#endif
4238	PyObject *dict = NULL;
4239	if (format == NULL \|\| !PyString_Check(format) \|\| args == NULL) {
4240	PyErr_BadInternalCall();
4241	return NULL;
4242	}
4243	orig_args = args;
4244	fmt = PyString_AS_STRING(format);
4245	fmtcnt = PyString_GET_SIZE(format);
4246	reslen = rescnt = fmtcnt + 100;
4247	result = PyString_FromStringAndSize((char *)NULL, reslen);
4248	if (result == NULL)
4249	return NULL;
4250	res = PyString_AsString(result);
4251	if (PyTuple_Check(args)) {
4252	arglen = PyTuple_GET_SIZE(args);
4253	argidx = 0;
4254	}
4255	else {
4256	arglen = -1;
4257	argidx = -2;
4258	}
4259	if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
4260	!PyTuple_Check(args) && !PyObject_TypeCheck(args, &PyBaseString_Type))
4261	dict = args;
4262	while (--fmtcnt >= 0) {
4263	if (*fmt != '%') {
4264	if (--rescnt < 0) {
4265	rescnt = fmtcnt + 100;
4266	reslen += rescnt;
4267	if (_PyString_Resize(&result, reslen))
4268	return NULL;
4269	res = PyString_AS_STRING(result)
4270	+ reslen - rescnt;
4271	--rescnt;
4272	}
4273	res++ = fmt++;
4274	}
4275	else {
4276	/* Got a format specifier */
4277	int flags = 0;
4278	Py_ssize_t width = -1;
4279	int prec = -1;
4280	int c = '\0';
4281	int fill;
4282	int isnumok;
4283	PyObject *v = NULL;
4284	PyObject *temp = NULL;
4285	char *pbuf;
4286	int sign;
4287	Py_ssize_t len;
4288	char formatbuf[FORMATBUFLEN];
4289	/* For format{int,char}() */
4290	#ifdef Py_USING_UNICODE
4291	char *fmt_start = fmt;
4292	Py_ssize_t argidx_start = argidx;
4293	#endif
4294
4295	fmt++;
4296	if (*fmt == '(') {
4297	char *keystart;
4298	Py_ssize_t keylen;
4299	PyObject *key;
4300	int pcount = 1;
4301
4302	if (dict == NULL) {
4303	PyErr_SetString(PyExc_TypeError,
4304	"format requires a mapping");
4305	goto error;
4306	}
4307	++fmt;
4308	--fmtcnt;
4309	keystart = fmt;
4310	/* Skip over balanced parentheses */
4311	while (pcount > 0 && --fmtcnt >= 0) {
4312	if (*fmt == ')')
4313	--pcount;
4314	else if (*fmt == '(')
4315	++pcount;
4316	fmt++;
4317	}
4318	keylen = fmt - keystart - 1;
4319	if (fmtcnt < 0 \|\| pcount > 0) {
4320	PyErr_SetString(PyExc_ValueError,
4321	"incomplete format key");
4322	goto error;
4323	}
4324	key = PyString_FromStringAndSize(keystart,
4325	keylen);
4326	if (key == NULL)
4327	goto error;
4328	if (args_owned) {
4329	Py_DECREF(args);
4330	args_owned = 0;
4331	}
4332	args = PyObject_GetItem(dict, key);
4333	Py_DECREF(key);
4334	if (args == NULL) {
4335	goto error;
4336	}
4337	args_owned = 1;
4338	arglen = -1;
4339	argidx = -2;
4340	}
4341	while (--fmtcnt >= 0) {
4342	switch (c = *fmt++) {
4343	case '-': flags \|= F_LJUST; continue;
4344	case '+': flags \|= F_SIGN; continue;
4345	case ' ': flags \|= F_BLANK; continue;
4346	case '#': flags \|= F_ALT; continue;
4347	case '0': flags \|= F_ZERO; continue;
4348	}
4349	break;
4350	}
4351	if (c == '*') {
4352	v = getnextarg(args, arglen, &argidx);
4353	if (v == NULL)
4354	goto error;
4355	if (!PyInt_Check(v)) {
4356	PyErr_SetString(PyExc_TypeError,
4357	"* wants int");
4358	goto error;
4359	}
4360	width = PyInt_AsSsize_t(v);
4361	if (width == -1 && PyErr_Occurred())
4362	goto error;
4363	if (width < 0) {
4364	flags \|= F_LJUST;
4365	width = -width;
4366	}
4367	if (--fmtcnt >= 0)
4368	c = *fmt++;
4369	}
4370	else if (c >= 0 && isdigit(c)) {
4371	width = c - '0';
4372	while (--fmtcnt >= 0) {
4373	c = Py_CHARMASK(*fmt++);
4374	if (!isdigit(c))
4375	break;
4376	if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
4377	PyErr_SetString(
4378	PyExc_ValueError,
4379	"width too big");
4380	goto error;
4381	}
4382	width = width*10 + (c - '0');
4383	}
4384	}
4385	if (c == '.') {
4386	prec = 0;
4387	if (--fmtcnt >= 0)
4388	c = *fmt++;
4389	if (c == '*') {
4390	v = getnextarg(args, arglen, &argidx);
4391	if (v == NULL)
4392	goto error;
4393	if (!PyInt_Check(v)) {
4394	PyErr_SetString(
4395	PyExc_TypeError,
4396	"* wants int");
4397	goto error;
4398	}
4399	prec = _PyInt_AsInt(v);
4400	if (prec == -1 && PyErr_Occurred())
4401	goto error;
4402	if (prec < 0)
4403	prec = 0;
4404	if (--fmtcnt >= 0)
4405	c = *fmt++;
4406	}
4407	else if (c >= 0 && isdigit(c)) {
4408	prec = c - '0';
4409	while (--fmtcnt >= 0) {
4410	c = Py_CHARMASK(*fmt++);
4411	if (!isdigit(c))
4412	break;
4413	if (prec > (INT_MAX - ((int)c - '0')) / 10) {
4414	PyErr_SetString(
4415	PyExc_ValueError,
4416	"prec too big");
4417	goto error;
4418	}
4419	prec = prec*10 + (c - '0');
4420	}
4421	}
4422	} /* prec */
4423	if (fmtcnt >= 0) {
4424	if (c == 'h' \|\| c == 'l' \|\| c == 'L') {
4425	if (--fmtcnt >= 0)
4426	c = *fmt++;
4427	}
4428	}
4429	if (fmtcnt < 0) {
4430	PyErr_SetString(PyExc_ValueError,
4431	"incomplete format");
4432	goto error;
4433	}
4434	if (c != '%') {
4435	v = getnextarg(args, arglen, &argidx);
4436	if (v == NULL)
4437	goto error;
4438	}
4439	sign = 0;
4440	fill = ' ';
4441	switch (c) {
4442	case '%':
4443	pbuf = "%";
4444	len = 1;
4445	break;
4446	case 's':
4447	#ifdef Py_USING_UNICODE
4448	if (PyUnicode_Check(v)) {
4449	fmt = fmt_start;
4450	argidx = argidx_start;
4451	goto unicode;
4452	}
4453	#endif
4454	temp = _PyObject_Str(v);
4455	#ifdef Py_USING_UNICODE
4456	if (temp != NULL && PyUnicode_Check(temp)) {
4457	Py_DECREF(temp);
4458	fmt = fmt_start;
4459	argidx = argidx_start;
4460	goto unicode;
4461	}
4462	#endif
4463	/* Fall through */
4464	case 'r':
4465	if (c == 'r')
4466	temp = PyObject_Repr(v);
4467	if (temp == NULL)
4468	goto error;
4469	if (!PyString_Check(temp)) {
4470	PyErr_SetString(PyExc_TypeError,
4471	"%s argument has non-string str()");
4472	Py_DECREF(temp);
4473	goto error;
4474	}
4475	pbuf = PyString_AS_STRING(temp);
4476	len = PyString_GET_SIZE(temp);
4477	if (prec >= 0 && len > prec)
4478	len = prec;
4479	break;
4480	case 'i':
4481	case 'd':
4482	case 'u':
4483	case 'o':
4484	case 'x':
4485	case 'X':
4486	if (c == 'i')
4487	c = 'd';
4488	isnumok = 0;
4489	if (PyNumber_Check(v)) {
4490	PyObject *iobj=NULL;
4491
4492	if (PyInt_Check(v) \|\| (PyLong_Check(v))) {
4493	iobj = v;
4494	Py_INCREF(iobj);
4495	}
4496	else {
4497	iobj = PyNumber_Int(v);
4498	if (iobj==NULL) {
4499	PyErr_Clear();
4500	iobj = PyNumber_Long(v);
4501	}
4502	}
4503	if (iobj!=NULL) {
4504	if (PyInt_Check(iobj)) {
4505	isnumok = 1;
4506	pbuf = formatbuf;
4507	len = formatint(pbuf,
4508	sizeof(formatbuf),
4509	flags, prec, c, iobj);
4510	Py_DECREF(iobj);
4511	if (len < 0)
4512	goto error;
4513	sign = 1;
4514	}
4515	else if (PyLong_Check(iobj)) {
4516	int ilen;
4517
4518	isnumok = 1;
4519	temp = _PyString_FormatLong(iobj, flags,
4520	prec, c, &pbuf, &ilen);
4521	Py_DECREF(iobj);
4522	len = ilen;
4523	if (!temp)
4524	goto error;
4525	sign = 1;
4526	}
4527	else {
4528	Py_DECREF(iobj);
4529	}
4530	}
4531	}
4532	if (!isnumok) {
4533	PyErr_Format(PyExc_TypeError,
4534	"%%%c format: a number is required, "
4535	"not %.200s", c, Py_TYPE(v)->tp_name);
4536	goto error;
4537	}
4538	if (flags & F_ZERO)
4539	fill = '0';
4540	break;
4541	case 'e':
4542	case 'E':
4543	case 'f':
4544	case 'F':
4545	case 'g':
4546	case 'G':
4547	temp = formatfloat(v, flags, prec, c);
4548	if (temp == NULL)
4549	goto error;
4550	pbuf = PyString_AS_STRING(temp);
4551	len = PyString_GET_SIZE(temp);
4552	sign = 1;
4553	if (flags & F_ZERO)
4554	fill = '0';
4555	break;
4556	case 'c':
4557	#ifdef Py_USING_UNICODE
4558	if (PyUnicode_Check(v)) {
4559	fmt = fmt_start;
4560	argidx = argidx_start;
4561	goto unicode;
4562	}
4563	#endif
4564	pbuf = formatbuf;
4565	len = formatchar(pbuf, sizeof(formatbuf), v);
4566	if (len < 0)
4567	goto error;
4568	break;
4569	default:
4570	PyErr_Format(PyExc_ValueError,
4571	"unsupported format character '%c' (0x%x) "
4572	"at index %zd",
4573	c, c,
4574	(Py_ssize_t)(fmt - 1 -
4575	PyString_AsString(format)));
4576	goto error;
4577	}
4578	if (sign) {
4579	if (pbuf == '-' \|\| pbuf == '+') {
4580	sign = *pbuf++;
4581	len--;
4582	}
4583	else if (flags & F_SIGN)
4584	sign = '+';
4585	else if (flags & F_BLANK)
4586	sign = ' ';
4587	else
4588	sign = 0;
4589	}
4590	if (width < len)
4591	width = len;
4592	if (rescnt - (sign != 0) < width) {
4593	reslen -= rescnt;
4594	rescnt = width + fmtcnt + 100;
4595	reslen += rescnt;
4596	if (reslen < 0) {
4597	Py_DECREF(result);
4598	Py_XDECREF(temp);
4599	return PyErr_NoMemory();
4600	}
4601	if (_PyString_Resize(&result, reslen)) {
4602	Py_XDECREF(temp);
4603	return NULL;
4604	}
4605	res = PyString_AS_STRING(result)
4606	+ reslen - rescnt;
4607	}
4608	if (sign) {
4609	if (fill != ' ')
4610	*res++ = sign;
4611	rescnt--;
4612	if (width > len)
4613	width--;
4614	}
4615	if ((flags & F_ALT) && (c == 'x' \|\| c == 'X')) {
4616	assert(pbuf[0] == '0');
4617	assert(pbuf[1] == c);
4618	if (fill != ' ') {
4619	res++ = pbuf++;
4620	res++ = pbuf++;
4621	}
4622	rescnt -= 2;
4623	width -= 2;
4624	if (width < 0)
4625	width = 0;
4626	len -= 2;
4627	}
4628	if (width > len && !(flags & F_LJUST)) {
4629	do {
4630	--rescnt;
4631	*res++ = fill;
4632	} while (--width > len);
4633	}
4634	if (fill == ' ') {
4635	if (sign)
4636	*res++ = sign;
4637	if ((flags & F_ALT) &&
4638	(c == 'x' \|\| c == 'X')) {
4639	assert(pbuf[0] == '0');
4640	assert(pbuf[1] == c);
4641	res++ = pbuf++;
4642	res++ = pbuf++;
4643	}
4644	}
4645	Py_MEMCPY(res, pbuf, len);
4646	res += len;
4647	rescnt -= len;
4648	while (--width >= len) {
4649	--rescnt;
4650	*res++ = ' ';
4651	}
4652	if (dict && (argidx < arglen) && c != '%') {
4653	PyErr_SetString(PyExc_TypeError,
4654	"not all arguments converted during string formatting");
4655	Py_XDECREF(temp);
4656	goto error;
4657	}
4658	Py_XDECREF(temp);
4659	} /* '%' */
4660	} /* until end */
4661	if (argidx < arglen && !dict) {
4662	PyErr_SetString(PyExc_TypeError,
4663	"not all arguments converted during string formatting");
4664	goto error;
4665	}
4666	if (args_owned) {
4667	Py_DECREF(args);
4668	}
4669	if (_PyString_Resize(&result, reslen - rescnt))
4670	return NULL;
4671	return result;
4672
4673	#ifdef Py_USING_UNICODE
4674	unicode:
4675	if (args_owned) {
4676	Py_DECREF(args);
4677	args_owned = 0;
4678	}
4679	/* Fiddle args right (remove the first argidx arguments) */
4680	if (PyTuple_Check(orig_args) && argidx > 0) {
4681	PyObject *v;
4682	Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4683	v = PyTuple_New(n);
4684	if (v == NULL)
4685	goto error;
4686	while (--n >= 0) {
4687	PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4688	Py_INCREF(w);
4689	PyTuple_SET_ITEM(v, n, w);
4690	}
4691	args = v;
4692	} else {
4693	Py_INCREF(orig_args);
4694	args = orig_args;
4695	}
4696	args_owned = 1;
4697	/* Take what we have of the result and let the Unicode formatting
4698	function format the rest of the input. */
4699	rescnt = res - PyString_AS_STRING(result);
4700	if (_PyString_Resize(&result, rescnt))
4701	goto error;
4702	fmtcnt = PyString_GET_SIZE(format) - \
4703	(fmt - PyString_AS_STRING(format));
4704	format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4705	if (format == NULL)
4706	goto error;
4707	v = PyUnicode_Format(format, args);
4708	Py_DECREF(format);
4709	if (v == NULL)
4710	goto error;
4711	/* Paste what we have (result) to what the Unicode formatting
4712	function returned (v) and return the result (or error) */
4713	w = PyUnicode_Concat(result, v);
4714	Py_DECREF(result);
4715	Py_DECREF(v);
4716	Py_DECREF(args);
4717	return w;
4718	#endif /* Py_USING_UNICODE */
4719
4720	error:
4721	Py_DECREF(result);
4722	if (args_owned) {
4723	Py_DECREF(args);
4724	}
4725	return NULL;
4726	}
4727
4728	void
4729	PyString_InternInPlace(PyObject **p)
4730	{
4731	register PyStringObject s = (PyStringObject )(*p);
4732	PyObject *t;
4733	if (s == NULL \|\| !PyString_Check(s))
4734	Py_FatalError("PyString_InternInPlace: strings only please!");
4735	/* If it's a string subclass, we don't really know what putting
4736	it in the interned dict might do. */
4737	if (!PyString_CheckExact(s))
4738	return;
4739	if (PyString_CHECK_INTERNED(s))
4740	return;
4741	if (interned == NULL) {
4742	interned = PyDict_New();
4743	if (interned == NULL) {
4744	PyErr_Clear(); /* Don't leave an exception */
4745	return;
4746	}
4747	}
4748	t = PyDict_GetItem(interned, (PyObject *)s);
4749	if (t) {
4750	Py_INCREF(t);
4751	Py_DECREF(*p);
4752	*p = t;
4753	return;
4754	}
4755
4756	if (PyDict_SetItem(interned, (PyObject )s, (PyObject )s) < 0) {
4757	PyErr_Clear();
4758	return;
4759	}
4760	/* The two references in interned are not counted by refcnt.
4761	The string deallocator will take care of this */
4762	Py_REFCNT(s) -= 2;
4763	PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4764	}
4765
4766	void
4767	PyString_InternImmortal(PyObject **p)
4768	{
4769	PyString_InternInPlace(p);
4770	if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4771	PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4772	Py_INCREF(*p);
4773	}
4774	}
4775
4776
4777	PyObject *
4778	PyString_InternFromString(const char *cp)
4779	{
4780	PyObject *s = PyString_FromString(cp);
4781	if (s == NULL)
4782	return NULL;
4783	PyString_InternInPlace(&s);
4784	return s;
4785	}
4786
4787	void
4788	PyString_Fini(void)
4789	{
4790	int i;
4791	for (i = 0; i < UCHAR_MAX + 1; i++)
4792	Py_CLEAR(characters[i]);
4793	Py_CLEAR(nullstring);
4794	}
4795
4796	void _Py_ReleaseInternedStrings(void)
4797	{
4798	PyObject *keys;
4799	PyStringObject *s;
4800	Py_ssize_t i, n;
4801	Py_ssize_t immortal_size = 0, mortal_size = 0;
4802
4803	if (interned == NULL \|\| !PyDict_Check(interned))
4804	return;
4805	keys = PyDict_Keys(interned);
4806	if (keys == NULL \|\| !PyList_Check(keys)) {
4807	PyErr_Clear();
4808	return;
4809	}
4810
4811	/* Since _Py_ReleaseInternedStrings() is intended to help a leak
4812	detector, interned strings are not forcibly deallocated; rather, we
4813	give them their stolen references back, and then clear and DECREF
4814	the interned dict. */
4815
4816	n = PyList_GET_SIZE(keys);
4817	fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4818	n);
4819	for (i = 0; i < n; i++) {
4820	s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4821	switch (s->ob_sstate) {
4822	case SSTATE_NOT_INTERNED:
4823	/* XXX Shouldn't happen */
4824	break;
4825	case SSTATE_INTERNED_IMMORTAL:
4826	Py_REFCNT(s) += 1;
4827	immortal_size += Py_SIZE(s);
4828	break;
4829	case SSTATE_INTERNED_MORTAL:
4830	Py_REFCNT(s) += 2;
4831	mortal_size += Py_SIZE(s);
4832	break;
4833	default:
4834	Py_FatalError("Inconsistent interned string state.");
4835	}
4836	s->ob_sstate = SSTATE_NOT_INTERNED;
4837	}
4838	fprintf(stderr, "total size of all interned strings: "
4839	"%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4840	"mortal/immortal\n", mortal_size, immortal_size);
4841	Py_DECREF(keys);
4842	PyDict_Clear(interned);
4843	Py_CLEAR(interned);
4844	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Objects/stringobject.c@ 391

Download in other formats: