Context Navigation

stringobject.c

Visit:

Last change on this file was 3225, checked in by bird, 18 years ago
Python 2.5
File size: 119.5 KB

Line
1	/* String object implementation */
2
3	#define PY_SSIZE_T_CLEAN
4
5	#include "Python.h"
6
7	#include <ctype.h>
8
9	#ifdef COUNT_ALLOCS
10	int null_strings, one_strings;
11	#endif
12
13	static PyStringObject *characters[UCHAR_MAX + 1];
14	static PyStringObject *nullstring;
15
16	/* This dictionary holds all interned strings. Note that references to
17	strings in this dictionary are not counted in the string's ob_refcnt.
18	When the interned string reaches a refcnt of 0 the string deallocation
19	function will delete the reference from this dictionary.
20
21	Another way to look at this is that to say that the actual reference
22	count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23	*/
24	static PyObject *interned;
25
26	/*
27	For both PyString_FromString() and PyString_FromStringAndSize(), the
28	parameter `size' denotes number of characters to allocate, not counting any
29	null terminating character.
30
31	For PyString_FromString(), the parameter `str' points to a null-terminated
32	string containing exactly `size' bytes.
33
34	For PyString_FromStringAndSize(), the parameter the parameter `str' is
35	either NULL or else points to a string containing at least `size' bytes.
36	For PyString_FromStringAndSize(), the string in the `str' parameter does
37	not have to be null-terminated. (Therefore it is safe to construct a
38	substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39	If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40	bytes (setting the last byte to the null terminating character) and you can
41	fill in the data yourself. If `str' is non-NULL then the resulting
42	PyString object must be treated as immutable and you must not fill in nor
43	alter the data yourself, since the strings may be shared.
44
45	The PyObject member `op->ob_size', which denotes the number of "extra
46	items" in a variable-size object, will contain the number of bytes
47	allocated for string data, not counting the null terminating character. It
48	is therefore equal to the equal to the `size' parameter (for
49	PyString_FromStringAndSize()) or the length of the string in the `str'
50	parameter (for PyString_FromString()).
51	*/
52	PyObject *
53	PyString_FromStringAndSize(const char *str, Py_ssize_t size)
54	{
55	register PyStringObject *op;
56	assert(size >= 0);
57	if (size == 0 && (op = nullstring) != NULL) {
58	#ifdef COUNT_ALLOCS
59	null_strings++;
60	#endif
61	Py_INCREF(op);
62	return (PyObject *)op;
63	}
64	if (size == 1 && str != NULL &&
65	(op = characters[*str & UCHAR_MAX]) != NULL)
66	{
67	#ifdef COUNT_ALLOCS
68	one_strings++;
69	#endif
70	Py_INCREF(op);
71	return (PyObject *)op;
72	}
73
74	/* Inline PyObject_NewVar */
75	op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
76	if (op == NULL)
77	return PyErr_NoMemory();
78	PyObject_INIT_VAR(op, &PyString_Type, size);
79	op->ob_shash = -1;
80	op->ob_sstate = SSTATE_NOT_INTERNED;
81	if (str != NULL)
82	Py_MEMCPY(op->ob_sval, str, size);
83	op->ob_sval[size] = '\0';
84	/* share short strings */
85	if (size == 0) {
86	PyObject t = (PyObject )op;
87	PyString_InternInPlace(&t);
88	op = (PyStringObject *)t;
89	nullstring = op;
90	Py_INCREF(op);
91	} else if (size == 1 && str != NULL) {
92	PyObject t = (PyObject )op;
93	PyString_InternInPlace(&t);
94	op = (PyStringObject *)t;
95	characters[*str & UCHAR_MAX] = op;
96	Py_INCREF(op);
97	}
98	return (PyObject *) op;
99	}
100
101	PyObject *
102	PyString_FromString(const char *str)
103	{
104	register size_t size;
105	register PyStringObject *op;
106
107	assert(str != NULL);
108	size = strlen(str);
109	if (size > PY_SSIZE_T_MAX) {
110	PyErr_SetString(PyExc_OverflowError,
111	"string is too long for a Python string");
112	return NULL;
113	}
114	if (size == 0 && (op = nullstring) != NULL) {
115	#ifdef COUNT_ALLOCS
116	null_strings++;
117	#endif
118	Py_INCREF(op);
119	return (PyObject *)op;
120	}
121	if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122	#ifdef COUNT_ALLOCS
123	one_strings++;
124	#endif
125	Py_INCREF(op);
126	return (PyObject *)op;
127	}
128
129	/* Inline PyObject_NewVar */
130	op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
131	if (op == NULL)
132	return PyErr_NoMemory();
133	PyObject_INIT_VAR(op, &PyString_Type, size);
134	op->ob_shash = -1;
135	op->ob_sstate = SSTATE_NOT_INTERNED;
136	Py_MEMCPY(op->ob_sval, str, size+1);
137	/* share short strings */
138	if (size == 0) {
139	PyObject t = (PyObject )op;
140	PyString_InternInPlace(&t);
141	op = (PyStringObject *)t;
142	nullstring = op;
143	Py_INCREF(op);
144	} else if (size == 1) {
145	PyObject t = (PyObject )op;
146	PyString_InternInPlace(&t);
147	op = (PyStringObject *)t;
148	characters[*str & UCHAR_MAX] = op;
149	Py_INCREF(op);
150	}
151	return (PyObject *) op;
152	}
153
154	PyObject *
155	PyString_FromFormatV(const char *format, va_list vargs)
156	{
157	va_list count;
158	Py_ssize_t n = 0;
159	const char* f;
160	char *s;
161	PyObject* string;
162
163	#ifdef VA_LIST_IS_ARRAY
164	Py_MEMCPY(count, vargs, sizeof(va_list));
165	#else
166	#ifdef __va_copy
167	__va_copy(count, vargs);
168	#else
169	count = vargs;
170	#endif
171	#endif
172	/* step 1: figure out how large a buffer we need */
173	for (f = format; *f; f++) {
174	if (*f == '%') {
175	const char* p = f;
176	while (++f && f != '%' && !isalpha(Py_CHARMASK(*f)))
177	;
178
179	/* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180	* they don't affect the amount of space we reserve.
181	*/
182	if ((f == 'l' \|\| f == 'z') &&
183	(f[1] == 'd' \|\| f[1] == 'u'))
184	++f;
185
186	switch (*f) {
187	case 'c':
188	(void)va_arg(count, int);
189	/* fall through... */
190	case '%':
191	n++;
192	break;
193	case 'd': case 'u': case 'i': case 'x':
194	(void) va_arg(count, int);
195	/* 20 bytes is enough to hold a 64-bit
196	integer. Decimal takes the most space.
197	This isn't enough for octal. */
198	n += 20;
199	break;
200	case 's':
201	s = va_arg(count, char*);
202	n += strlen(s);
203	break;
204	case 'p':
205	(void) va_arg(count, int);
206	/* maximum 64-bit pointer representation:
207	* 0xffffffffffffffff
208	* so 19 characters is enough.
209	* XXX I count 18 -- what's the extra for?
210	*/
211	n += 19;
212	break;
213	default:
214	/* if we stumble upon an unknown
215	formatting code, copy the rest of
216	the format string to the output
217	string. (we cannot just skip the
218	code, since there's no way to know
219	what's in the argument list) */
220	n += strlen(p);
221	goto expand;
222	}
223	} else
224	n++;
225	}
226	expand:
227	/* step 2: fill the buffer */
228	/* Since we've analyzed how much space we need for the worst case,
229	use sprintf directly instead of the slower PyOS_snprintf. */
230	string = PyString_FromStringAndSize(NULL, n);
231	if (!string)
232	return NULL;
233
234	s = PyString_AsString(string);
235
236	for (f = format; *f; f++) {
237	if (*f == '%') {
238	const char* p = f++;
239	Py_ssize_t i;
240	int longflag = 0;
241	int size_tflag = 0;
242	/* parse the width.precision part (we're only
243	interested in the precision value, if any) */
244	n = 0;
245	while (isdigit(Py_CHARMASK(*f)))
246	n = (n10) + f++ - '0';
247	if (*f == '.') {
248	f++;
249	n = 0;
250	while (isdigit(Py_CHARMASK(*f)))
251	n = (n10) + f++ - '0';
252	}
253	while (f && f != '%' && !isalpha(Py_CHARMASK(*f)))
254	f++;
255	/* handle the long flag, but only for %ld and %lu.
256	others can be added when necessary. */
257	if (*f == 'l' && (f[1] == 'd' \|\| f[1] == 'u')) {
258	longflag = 1;
259	++f;
260	}
261	/* handle the size_t flag. */
262	if (*f == 'z' && (f[1] == 'd' \|\| f[1] == 'u')) {
263	size_tflag = 1;
264	++f;
265	}
266
267	switch (*f) {
268	case 'c':
269	*s++ = va_arg(vargs, int);
270	break;
271	case 'd':
272	if (longflag)
273	sprintf(s, "%ld", va_arg(vargs, long));
274	else if (size_tflag)
275	sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276	va_arg(vargs, Py_ssize_t));
277	else
278	sprintf(s, "%d", va_arg(vargs, int));
279	s += strlen(s);
280	break;
281	case 'u':
282	if (longflag)
283	sprintf(s, "%lu",
284	va_arg(vargs, unsigned long));
285	else if (size_tflag)
286	sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287	va_arg(vargs, size_t));
288	else
289	sprintf(s, "%u",
290	va_arg(vargs, unsigned int));
291	s += strlen(s);
292	break;
293	case 'i':
294	sprintf(s, "%i", va_arg(vargs, int));
295	s += strlen(s);
296	break;
297	case 'x':
298	sprintf(s, "%x", va_arg(vargs, int));
299	s += strlen(s);
300	break;
301	case 's':
302	p = va_arg(vargs, char*);
303	i = strlen(p);
304	if (n > 0 && i > n)
305	i = n;
306	Py_MEMCPY(s, p, i);
307	s += i;
308	break;
309	case 'p':
310	sprintf(s, "%p", va_arg(vargs, void*));
311	/* %p is ill-defined: ensure leading 0x. */
312	if (s[1] == 'X')
313	s[1] = 'x';
314	else if (s[1] != 'x') {
315	memmove(s+2, s, strlen(s)+1);
316	s[0] = '0';
317	s[1] = 'x';
318	}
319	s += strlen(s);
320	break;
321	case '%':
322	*s++ = '%';
323	break;
324	default:
325	strcpy(s, p);
326	s += strlen(s);
327	goto end;
328	}
329	} else
330	s++ = f;
331	}
332
333	end:
334	_PyString_Resize(&string, s - PyString_AS_STRING(string));
335	return string;
336	}
337
338	PyObject *
339	PyString_FromFormat(const char *format, ...)
340	{
341	PyObject* ret;
342	va_list vargs;
343
344	#ifdef HAVE_STDARG_PROTOTYPES
345	va_start(vargs, format);
346	#else
347	va_start(vargs);
348	#endif
349	ret = PyString_FromFormatV(format, vargs);
350	va_end(vargs);
351	return ret;
352	}
353
354
355	PyObject PyString_Decode(const char s,
356	Py_ssize_t size,
357	const char *encoding,
358	const char *errors)
359	{
360	PyObject v, str;
361
362	str = PyString_FromStringAndSize(s, size);
363	if (str == NULL)
364	return NULL;
365	v = PyString_AsDecodedString(str, encoding, errors);
366	Py_DECREF(str);
367	return v;
368	}
369
370	PyObject PyString_AsDecodedObject(PyObject str,
371	const char *encoding,
372	const char *errors)
373	{
374	PyObject *v;
375
376	if (!PyString_Check(str)) {
377	PyErr_BadArgument();
378	goto onError;
379	}
380
381	if (encoding == NULL) {
382	#ifdef Py_USING_UNICODE
383	encoding = PyUnicode_GetDefaultEncoding();
384	#else
385	PyErr_SetString(PyExc_ValueError, "no encoding specified");
386	goto onError;
387	#endif
388	}
389
390	/* Decode via the codec registry */
391	v = PyCodec_Decode(str, encoding, errors);
392	if (v == NULL)
393	goto onError;
394
395	return v;
396
397	onError:
398	return NULL;
399	}
400
401	PyObject PyString_AsDecodedString(PyObject str,
402	const char *encoding,
403	const char *errors)
404	{
405	PyObject *v;
406
407	v = PyString_AsDecodedObject(str, encoding, errors);
408	if (v == NULL)
409	goto onError;
410
411	#ifdef Py_USING_UNICODE
412	/* Convert Unicode to a string using the default encoding */
413	if (PyUnicode_Check(v)) {
414	PyObject *temp = v;
415	v = PyUnicode_AsEncodedString(v, NULL, NULL);
416	Py_DECREF(temp);
417	if (v == NULL)
418	goto onError;
419	}
420	#endif
421	if (!PyString_Check(v)) {
422	PyErr_Format(PyExc_TypeError,
423	"decoder did not return a string object (type=%.400s)",
424	v->ob_type->tp_name);
425	Py_DECREF(v);
426	goto onError;
427	}
428
429	return v;
430
431	onError:
432	return NULL;
433	}
434
435	PyObject PyString_Encode(const char s,
436	Py_ssize_t size,
437	const char *encoding,
438	const char *errors)
439	{
440	PyObject v, str;
441
442	str = PyString_FromStringAndSize(s, size);
443	if (str == NULL)
444	return NULL;
445	v = PyString_AsEncodedString(str, encoding, errors);
446	Py_DECREF(str);
447	return v;
448	}
449
450	PyObject PyString_AsEncodedObject(PyObject str,
451	const char *encoding,
452	const char *errors)
453	{
454	PyObject *v;
455
456	if (!PyString_Check(str)) {
457	PyErr_BadArgument();
458	goto onError;
459	}
460
461	if (encoding == NULL) {
462	#ifdef Py_USING_UNICODE
463	encoding = PyUnicode_GetDefaultEncoding();
464	#else
465	PyErr_SetString(PyExc_ValueError, "no encoding specified");
466	goto onError;
467	#endif
468	}
469
470	/* Encode via the codec registry */
471	v = PyCodec_Encode(str, encoding, errors);
472	if (v == NULL)
473	goto onError;
474
475	return v;
476
477	onError:
478	return NULL;
479	}
480
481	PyObject PyString_AsEncodedString(PyObject str,
482	const char *encoding,
483	const char *errors)
484	{
485	PyObject *v;
486
487	v = PyString_AsEncodedObject(str, encoding, errors);
488	if (v == NULL)
489	goto onError;
490
491	#ifdef Py_USING_UNICODE
492	/* Convert Unicode to a string using the default encoding */
493	if (PyUnicode_Check(v)) {
494	PyObject *temp = v;
495	v = PyUnicode_AsEncodedString(v, NULL, NULL);
496	Py_DECREF(temp);
497	if (v == NULL)
498	goto onError;
499	}
500	#endif
501	if (!PyString_Check(v)) {
502	PyErr_Format(PyExc_TypeError,
503	"encoder did not return a string object (type=%.400s)",
504	v->ob_type->tp_name);
505	Py_DECREF(v);
506	goto onError;
507	}
508
509	return v;
510
511	onError:
512	return NULL;
513	}
514
515	static void
516	string_dealloc(PyObject *op)
517	{
518	switch (PyString_CHECK_INTERNED(op)) {
519	case SSTATE_NOT_INTERNED:
520	break;
521
522	case SSTATE_INTERNED_MORTAL:
523	/* revive dead object temporarily for DelItem */
524	op->ob_refcnt = 3;
525	if (PyDict_DelItem(interned, op) != 0)
526	Py_FatalError(
527	"deletion of interned string failed");
528	break;
529
530	case SSTATE_INTERNED_IMMORTAL:
531	Py_FatalError("Immortal interned string died.");
532
533	default:
534	Py_FatalError("Inconsistent interned string state.");
535	}
536	op->ob_type->tp_free(op);
537	}
538
539	/* Unescape a backslash-escaped string. If unicode is non-zero,
540	the string is a u-literal. If recode_encoding is non-zero,
541	the string is UTF-8 encoded and should be re-encoded in the
542	specified encoding. */
543
544	PyObject PyString_DecodeEscape(const char s,
545	Py_ssize_t len,
546	const char *errors,
547	Py_ssize_t unicode,
548	const char *recode_encoding)
549	{
550	int c;
551	char p, buf;
552	const char *end;
553	PyObject *v;
554	Py_ssize_t newlen = recode_encoding ? 4*len:len;
555	v = PyString_FromStringAndSize((char *)NULL, newlen);
556	if (v == NULL)
557	return NULL;
558	p = buf = PyString_AsString(v);
559	end = s + len;
560	while (s < end) {
561	if (*s != '\\') {
562	non_esc:
563	#ifdef Py_USING_UNICODE
564	if (recode_encoding && (*s & 0x80)) {
565	PyObject u, w;
566	char *r;
567	const char* t;
568	Py_ssize_t rn;
569	t = s;
570	/* Decode non-ASCII bytes as UTF-8. */
571	while (t < end && (*t & 0x80)) t++;
572	u = PyUnicode_DecodeUTF8(s, t - s, errors);
573	if(!u) goto failed;
574
575	/* Recode them in target encoding. */
576	w = PyUnicode_AsEncodedString(
577	u, recode_encoding, errors);
578	Py_DECREF(u);
579	if (!w) goto failed;
580
581	/* Append bytes to output buffer. */
582	assert(PyString_Check(w));
583	r = PyString_AS_STRING(w);
584	rn = PyString_GET_SIZE(w);
585	Py_MEMCPY(p, r, rn);
586	p += rn;
587	Py_DECREF(w);
588	s = t;
589	} else {
590	p++ = s++;
591	}
592	#else
593	p++ = s++;
594	#endif
595	continue;
596	}
597	s++;
598	if (s==end) {
599	PyErr_SetString(PyExc_ValueError,
600	"Trailing \\ in string");
601	goto failed;
602	}
603	switch (*s++) {
604	/* XXX This assumes ASCII! */
605	case '\n': break;
606	case '\\': *p++ = '\\'; break;
607	case '\'': *p++ = '\''; break;
608	case '\"': *p++ = '\"'; break;
609	case 'b': *p++ = '\b'; break;
610	case 'f': p++ = '\014'; break; / FF */
611	case 't': *p++ = '\t'; break;
612	case 'n': *p++ = '\n'; break;
613	case 'r': *p++ = '\r'; break;
614	case 'v': p++ = '\013'; break; / VT */
615	case 'a': p++ = '\007'; break; / BEL, not classic C */
616	case '0': case '1': case '2': case '3':
617	case '4': case '5': case '6': case '7':
618	c = s[-1] - '0';
619	if ('0' <= s && s <= '7') {
620	c = (c<<3) + *s++ - '0';
621	if ('0' <= s && s <= '7')
622	c = (c<<3) + *s++ - '0';
623	}
624	*p++ = c;
625	break;
626	case 'x':
627	if (isxdigit(Py_CHARMASK(s[0]))
628	&& isxdigit(Py_CHARMASK(s[1]))) {
629	unsigned int x = 0;
630	c = Py_CHARMASK(*s);
631	s++;
632	if (isdigit(c))
633	x = c - '0';
634	else if (islower(c))
635	x = 10 + c - 'a';
636	else
637	x = 10 + c - 'A';
638	x = x << 4;
639	c = Py_CHARMASK(*s);
640	s++;
641	if (isdigit(c))
642	x += c - '0';
643	else if (islower(c))
644	x += 10 + c - 'a';
645	else
646	x += 10 + c - 'A';
647	*p++ = x;
648	break;
649	}
650	if (!errors \|\| strcmp(errors, "strict") == 0) {
651	PyErr_SetString(PyExc_ValueError,
652	"invalid \\x escape");
653	goto failed;
654	}
655	if (strcmp(errors, "replace") == 0) {
656	*p++ = '?';
657	} else if (strcmp(errors, "ignore") == 0)
658	/* do nothing */;
659	else {
660	PyErr_Format(PyExc_ValueError,
661	"decoding error; "
662	"unknown error handling code: %.400s",
663	errors);
664	goto failed;
665	}
666	#ifndef Py_USING_UNICODE
667	case 'u':
668	case 'U':
669	case 'N':
670	if (unicode) {
671	PyErr_SetString(PyExc_ValueError,
672	"Unicode escapes not legal "
673	"when Unicode disabled");
674	goto failed;
675	}
676	#endif
677	default:
678	*p++ = '\\';
679	s--;
680	goto non_esc; /* an arbitry number of unescaped
681	UTF-8 bytes may follow. */
682	}
683	}
684	if (p-buf < newlen)
685	_PyString_Resize(&v, p - buf);
686	return v;
687	failed:
688	Py_DECREF(v);
689	return NULL;
690	}
691
692	/* -------------------------------------------------------------------- */
693	/* object api */
694
695	static Py_ssize_t
696	string_getsize(register PyObject *op)
697	{
698	char *s;
699	Py_ssize_t len;
700	if (PyString_AsStringAndSize(op, &s, &len))
701	return -1;
702	return len;
703	}
704
705	static /const/ char *
706	string_getbuffer(register PyObject *op)
707	{
708	char *s;
709	Py_ssize_t len;
710	if (PyString_AsStringAndSize(op, &s, &len))
711	return NULL;
712	return s;
713	}
714
715	Py_ssize_t
716	PyString_Size(register PyObject *op)
717	{
718	if (!PyString_Check(op))
719	return string_getsize(op);
720	return ((PyStringObject *)op) -> ob_size;
721	}
722
723	/const/ char *
724	PyString_AsString(register PyObject *op)
725	{
726	if (!PyString_Check(op))
727	return string_getbuffer(op);
728	return ((PyStringObject *)op) -> ob_sval;
729	}
730
731	int
732	PyString_AsStringAndSize(register PyObject *obj,
733	register char **s,
734	register Py_ssize_t *len)
735	{
736	if (s == NULL) {
737	PyErr_BadInternalCall();
738	return -1;
739	}
740
741	if (!PyString_Check(obj)) {
742	#ifdef Py_USING_UNICODE
743	if (PyUnicode_Check(obj)) {
744	obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
745	if (obj == NULL)
746	return -1;
747	}
748	else
749	#endif
750	{
751	PyErr_Format(PyExc_TypeError,
752	"expected string or Unicode object, "
753	"%.200s found", obj->ob_type->tp_name);
754	return -1;
755	}
756	}
757
758	*s = PyString_AS_STRING(obj);
759	if (len != NULL)
760	*len = PyString_GET_SIZE(obj);
761	else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
762	PyErr_SetString(PyExc_TypeError,
763	"expected string without null bytes");
764	return -1;
765	}
766	return 0;
767	}
768
769	/* -------------------------------------------------------------------- */
770	/* Methods */
771
772	#define STRINGLIB_CHAR char
773
774	#define STRINGLIB_CMP memcmp
775	#define STRINGLIB_LEN PyString_GET_SIZE
776	#define STRINGLIB_NEW PyString_FromStringAndSize
777	#define STRINGLIB_STR PyString_AS_STRING
778
779	#define STRINGLIB_EMPTY nullstring
780
781	#include "stringlib/fastsearch.h"
782
783	#include "stringlib/count.h"
784	#include "stringlib/find.h"
785	#include "stringlib/partition.h"
786
787
788	static int
789	string_print(PyStringObject op, FILE fp, int flags)
790	{
791	Py_ssize_t i;
792	char c;
793	int quote;
794
795	/* XXX Ought to check for interrupts when writing long strings */
796	if (! PyString_CheckExact(op)) {
797	int ret;
798	/* A str subclass may have its own __str__ method. */
799	op = (PyStringObject ) PyObject_Str((PyObject )op);
800	if (op == NULL)
801	return -1;
802	ret = string_print(op, fp, flags);
803	Py_DECREF(op);
804	return ret;
805	}
806	if (flags & Py_PRINT_RAW) {
807	#ifdef __VMS
808	if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
809	#else
810	fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
811	#endif
812	return 0;
813	}
814
815	/* figure out which quote to use; single is preferred */
816	quote = '\'';
817	if (memchr(op->ob_sval, '\'', op->ob_size) &&
818	!memchr(op->ob_sval, '"', op->ob_size))
819	quote = '"';
820
821	fputc(quote, fp);
822	for (i = 0; i < op->ob_size; i++) {
823	c = op->ob_sval[i];
824	if (c == quote \|\| c == '\\')
825	fprintf(fp, "\\%c", c);
826	else if (c == '\t')
827	fprintf(fp, "\\t");
828	else if (c == '\n')
829	fprintf(fp, "\\n");
830	else if (c == '\r')
831	fprintf(fp, "\\r");
832	else if (c < ' ' \|\| c >= 0x7f)
833	fprintf(fp, "\\x%02x", c & 0xff);
834	else
835	fputc(c, fp);
836	}
837	fputc(quote, fp);
838	return 0;
839	}
840
841	PyObject *
842	PyString_Repr(PyObject *obj, int smartquotes)
843	{
844	register PyStringObject* op = (PyStringObject*) obj;
845	size_t newsize = 2 + 4 * op->ob_size;
846	PyObject *v;
847	if (newsize > PY_SSIZE_T_MAX) {
848	PyErr_SetString(PyExc_OverflowError,
849	"string is too large to make repr");
850	}
851	v = PyString_FromStringAndSize((char *)NULL, newsize);
852	if (v == NULL) {
853	return NULL;
854	}
855	else {
856	register Py_ssize_t i;
857	register char c;
858	register char *p;
859	int quote;
860
861	/* figure out which quote to use; single is preferred */
862	quote = '\'';
863	if (smartquotes &&
864	memchr(op->ob_sval, '\'', op->ob_size) &&
865	!memchr(op->ob_sval, '"', op->ob_size))
866	quote = '"';
867
868	p = PyString_AS_STRING(v);
869	*p++ = quote;
870	for (i = 0; i < op->ob_size; i++) {
871	/* There's at least enough room for a hex escape
872	and a closing quote. */
873	assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
874	c = op->ob_sval[i];
875	if (c == quote \|\| c == '\\')
876	p++ = '\\', p++ = c;
877	else if (c == '\t')
878	p++ = '\\', p++ = 't';
879	else if (c == '\n')
880	p++ = '\\', p++ = 'n';
881	else if (c == '\r')
882	p++ = '\\', p++ = 'r';
883	else if (c < ' ' \|\| c >= 0x7f) {
884	/* For performance, we don't want to call
885	PyOS_snprintf here (extra layers of
886	function call). */
887	sprintf(p, "\\x%02x", c & 0xff);
888	p += 4;
889	}
890	else
891	*p++ = c;
892	}
893	assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
894	*p++ = quote;
895	*p = '\0';
896	_PyString_Resize(
897	&v, (p - PyString_AS_STRING(v)));
898	return v;
899	}
900	}
901
902	static PyObject *
903	string_repr(PyObject *op)
904	{
905	return PyString_Repr(op, 1);
906	}
907
908	static PyObject *
909	string_str(PyObject *s)
910	{
911	assert(PyString_Check(s));
912	if (PyString_CheckExact(s)) {
913	Py_INCREF(s);
914	return s;
915	}
916	else {
917	/* Subtype -- return genuine string with the same value. */
918	PyStringObject t = (PyStringObject ) s;
919	return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
920	}
921	}
922
923	static Py_ssize_t
924	string_length(PyStringObject *a)
925	{
926	return a->ob_size;
927	}
928
929	static PyObject *
930	string_concat(register PyStringObject a, register PyObject bb)
931	{
932	register Py_ssize_t size;
933	register PyStringObject *op;
934	if (!PyString_Check(bb)) {
935	#ifdef Py_USING_UNICODE
936	if (PyUnicode_Check(bb))
937	return PyUnicode_Concat((PyObject *)a, bb);
938	#endif
939	PyErr_Format(PyExc_TypeError,
940	"cannot concatenate 'str' and '%.200s' objects",
941	bb->ob_type->tp_name);
942	return NULL;
943	}
944	#define b ((PyStringObject *)bb)
945	/* Optimize cases with empty left or right operand */
946	if ((a->ob_size == 0 \|\| b->ob_size == 0) &&
947	PyString_CheckExact(a) && PyString_CheckExact(b)) {
948	if (a->ob_size == 0) {
949	Py_INCREF(bb);
950	return bb;
951	}
952	Py_INCREF(a);
953	return (PyObject *)a;
954	}
955	size = a->ob_size + b->ob_size;
956	if (size < 0) {
957	PyErr_SetString(PyExc_OverflowError,
958	"strings are too large to concat");
959	return NULL;
960	}
961
962	/* Inline PyObject_NewVar */
963	op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
964	if (op == NULL)
965	return PyErr_NoMemory();
966	PyObject_INIT_VAR(op, &PyString_Type, size);
967	op->ob_shash = -1;
968	op->ob_sstate = SSTATE_NOT_INTERNED;
969	Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
970	Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
971	op->ob_sval[size] = '\0';
972	return (PyObject *) op;
973	#undef b
974	}
975
976	static PyObject *
977	string_repeat(register PyStringObject *a, register Py_ssize_t n)
978	{
979	register Py_ssize_t i;
980	register Py_ssize_t j;
981	register Py_ssize_t size;
982	register PyStringObject *op;
983	size_t nbytes;
984	if (n < 0)
985	n = 0;
986	/* watch out for overflows: the size can overflow int,
987	* and the # of bytes needed can overflow size_t
988	*/
989	size = a->ob_size * n;
990	if (n && size / n != a->ob_size) {
991	PyErr_SetString(PyExc_OverflowError,
992	"repeated string is too long");
993	return NULL;
994	}
995	if (size == a->ob_size && PyString_CheckExact(a)) {
996	Py_INCREF(a);
997	return (PyObject *)a;
998	}
999	nbytes = (size_t)size;
1000	if (nbytes + sizeof(PyStringObject) <= nbytes) {
1001	PyErr_SetString(PyExc_OverflowError,
1002	"repeated string is too long");
1003	return NULL;
1004	}
1005	op = (PyStringObject *)
1006	PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
1007	if (op == NULL)
1008	return PyErr_NoMemory();
1009	PyObject_INIT_VAR(op, &PyString_Type, size);
1010	op->ob_shash = -1;
1011	op->ob_sstate = SSTATE_NOT_INTERNED;
1012	op->ob_sval[size] = '\0';
1013	if (a->ob_size == 1 && n > 0) {
1014	memset(op->ob_sval, a->ob_sval[0] , n);
1015	return (PyObject *) op;
1016	}
1017	i = 0;
1018	if (i < size) {
1019	Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
1020	i = a->ob_size;
1021	}
1022	while (i < size) {
1023	j = (i <= size-i) ? i : size-i;
1024	Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1025	i += j;
1026	}
1027	return (PyObject *) op;
1028	}
1029
1030	/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1031
1032	static PyObject *
1033	string_slice(register PyStringObject *a, register Py_ssize_t i,
1034	register Py_ssize_t j)
1035	/* j -- may be negative! */
1036	{
1037	if (i < 0)
1038	i = 0;
1039	if (j < 0)
1040	j = 0; /* Avoid signed/unsigned bug in next line */
1041	if (j > a->ob_size)
1042	j = a->ob_size;
1043	if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1044	/* It's the same as a */
1045	Py_INCREF(a);
1046	return (PyObject *)a;
1047	}
1048	if (j < i)
1049	j = i;
1050	return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1051	}
1052
1053	static int
1054	string_contains(PyObject str_obj, PyObject sub_obj)
1055	{
1056	if (!PyString_CheckExact(sub_obj)) {
1057	#ifdef Py_USING_UNICODE
1058	if (PyUnicode_Check(sub_obj))
1059	return PyUnicode_Contains(str_obj, sub_obj);
1060	#endif
1061	if (!PyString_Check(sub_obj)) {
1062	PyErr_SetString(PyExc_TypeError,
1063	"'in <string>' requires string as left operand");
1064	return -1;
1065	}
1066	}
1067
1068	return stringlib_contains_obj(str_obj, sub_obj);
1069	}
1070
1071	static PyObject *
1072	string_item(PyStringObject *a, register Py_ssize_t i)
1073	{
1074	char pchar;
1075	PyObject *v;
1076	if (i < 0 \|\| i >= a->ob_size) {
1077	PyErr_SetString(PyExc_IndexError, "string index out of range");
1078	return NULL;
1079	}
1080	pchar = a->ob_sval[i];
1081	v = (PyObject *)characters[pchar & UCHAR_MAX];
1082	if (v == NULL)
1083	v = PyString_FromStringAndSize(&pchar, 1);
1084	else {
1085	#ifdef COUNT_ALLOCS
1086	one_strings++;
1087	#endif
1088	Py_INCREF(v);
1089	}
1090	return v;
1091	}
1092
1093	static PyObject*
1094	string_richcompare(PyStringObject a, PyStringObject b, int op)
1095	{
1096	int c;
1097	Py_ssize_t len_a, len_b;
1098	Py_ssize_t min_len;
1099	PyObject *result;
1100
1101	/* Make sure both arguments are strings. */
1102	if (!(PyString_Check(a) && PyString_Check(b))) {
1103	result = Py_NotImplemented;
1104	goto out;
1105	}
1106	if (a == b) {
1107	switch (op) {
1108	case Py_EQ:case Py_LE:case Py_GE:
1109	result = Py_True;
1110	goto out;
1111	case Py_NE:case Py_LT:case Py_GT:
1112	result = Py_False;
1113	goto out;
1114	}
1115	}
1116	if (op == Py_EQ) {
1117	/* Supporting Py_NE here as well does not save
1118	much time, since Py_NE is rarely used. */
1119	if (a->ob_size == b->ob_size
1120	&& (a->ob_sval[0] == b->ob_sval[0]
1121	&& memcmp(a->ob_sval, b->ob_sval,
1122	a->ob_size) == 0)) {
1123	result = Py_True;
1124	} else {
1125	result = Py_False;
1126	}
1127	goto out;
1128	}
1129	len_a = a->ob_size; len_b = b->ob_size;
1130	min_len = (len_a < len_b) ? len_a : len_b;
1131	if (min_len > 0) {
1132	c = Py_CHARMASK(a->ob_sval) - Py_CHARMASK(b->ob_sval);
1133	if (c==0)
1134	c = memcmp(a->ob_sval, b->ob_sval, min_len);
1135	}else
1136	c = 0;
1137	if (c == 0)
1138	c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1139	switch (op) {
1140	case Py_LT: c = c < 0; break;
1141	case Py_LE: c = c <= 0; break;
1142	case Py_EQ: assert(0); break; /* unreachable */
1143	case Py_NE: c = c != 0; break;
1144	case Py_GT: c = c > 0; break;
1145	case Py_GE: c = c >= 0; break;
1146	default:
1147	result = Py_NotImplemented;
1148	goto out;
1149	}
1150	result = c ? Py_True : Py_False;
1151	out:
1152	Py_INCREF(result);
1153	return result;
1154	}
1155
1156	int
1157	_PyString_Eq(PyObject o1, PyObject o2)
1158	{
1159	PyStringObject a = (PyStringObject) o1;
1160	PyStringObject b = (PyStringObject) o2;
1161	return a->ob_size == b->ob_size
1162	&& a->ob_sval == b->ob_sval
1163	&& memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
1164	}
1165
1166	static long
1167	string_hash(PyStringObject *a)
1168	{
1169	register Py_ssize_t len;
1170	register unsigned char *p;
1171	register long x;
1172
1173	if (a->ob_shash != -1)
1174	return a->ob_shash;
1175	len = a->ob_size;
1176	p = (unsigned char *) a->ob_sval;
1177	x = *p << 7;
1178	while (--len >= 0)
1179	x = (1000003x) ^ p++;
1180	x ^= a->ob_size;
1181	if (x == -1)
1182	x = -2;
1183	a->ob_shash = x;
1184	return x;
1185	}
1186
1187	static PyObject*
1188	string_subscript(PyStringObject* self, PyObject* item)
1189	{
1190	if (PyIndex_Check(item)) {
1191	Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1192	if (i == -1 && PyErr_Occurred())
1193	return NULL;
1194	if (i < 0)
1195	i += PyString_GET_SIZE(self);
1196	return string_item(self, i);
1197	}
1198	else if (PySlice_Check(item)) {
1199	Py_ssize_t start, stop, step, slicelength, cur, i;
1200	char* source_buf;
1201	char* result_buf;
1202	PyObject* result;
1203
1204	if (PySlice_GetIndicesEx((PySliceObject*)item,
1205	PyString_GET_SIZE(self),
1206	&start, &stop, &step, &slicelength) < 0) {
1207	return NULL;
1208	}
1209
1210	if (slicelength <= 0) {
1211	return PyString_FromStringAndSize("", 0);
1212	}
1213	else {
1214	source_buf = PyString_AsString((PyObject*)self);
1215	result_buf = (char *)PyMem_Malloc(slicelength);
1216	if (result_buf == NULL)
1217	return PyErr_NoMemory();
1218
1219	for (cur = start, i = 0; i < slicelength;
1220	cur += step, i++) {
1221	result_buf[i] = source_buf[cur];
1222	}
1223
1224	result = PyString_FromStringAndSize(result_buf,
1225	slicelength);
1226	PyMem_Free(result_buf);
1227	return result;
1228	}
1229	}
1230	else {
1231	PyErr_SetString(PyExc_TypeError,
1232	"string indices must be integers");
1233	return NULL;
1234	}
1235	}
1236
1237	static Py_ssize_t
1238	string_buffer_getreadbuf(PyStringObject self, Py_ssize_t index, const void *ptr)
1239	{
1240	if ( index != 0 ) {
1241	PyErr_SetString(PyExc_SystemError,
1242	"accessing non-existent string segment");
1243	return -1;
1244	}
1245	ptr = (void )self->ob_sval;
1246	return self->ob_size;
1247	}
1248
1249	static Py_ssize_t
1250	string_buffer_getwritebuf(PyStringObject self, Py_ssize_t index, const void *ptr)
1251	{
1252	PyErr_SetString(PyExc_TypeError,
1253	"Cannot use string as modifiable buffer");
1254	return -1;
1255	}
1256
1257	static Py_ssize_t
1258	string_buffer_getsegcount(PyStringObject self, Py_ssize_t lenp)
1259	{
1260	if ( lenp )
1261	*lenp = self->ob_size;
1262	return 1;
1263	}
1264
1265	static Py_ssize_t
1266	string_buffer_getcharbuf(PyStringObject self, Py_ssize_t index, const char *ptr)
1267	{
1268	if ( index != 0 ) {
1269	PyErr_SetString(PyExc_SystemError,
1270	"accessing non-existent string segment");
1271	return -1;
1272	}
1273	*ptr = self->ob_sval;
1274	return self->ob_size;
1275	}
1276
1277	static PySequenceMethods string_as_sequence = {
1278	(lenfunc)string_length, /sq_length/
1279	(binaryfunc)string_concat, /sq_concat/
1280	(ssizeargfunc)string_repeat, /sq_repeat/
1281	(ssizeargfunc)string_item, /sq_item/
1282	(ssizessizeargfunc)string_slice, /sq_slice/
1283	0, /sq_ass_item/
1284	0, /sq_ass_slice/
1285	(objobjproc)string_contains /sq_contains/
1286	};
1287
1288	static PyMappingMethods string_as_mapping = {
1289	(lenfunc)string_length,
1290	(binaryfunc)string_subscript,
1291	0,
1292	};
1293
1294	static PyBufferProcs string_as_buffer = {
1295	(readbufferproc)string_buffer_getreadbuf,
1296	(writebufferproc)string_buffer_getwritebuf,
1297	(segcountproc)string_buffer_getsegcount,
1298	(charbufferproc)string_buffer_getcharbuf,
1299	};
1300
1301
1302
1303
1304	#define LEFTSTRIP 0
1305	#define RIGHTSTRIP 1
1306	#define BOTHSTRIP 2
1307
1308	/* Arrays indexed by above */
1309	static const char *stripformat[] = {"\|O:lstrip", "\|O:rstrip", "\|O:strip"};
1310
1311	#define STRIPNAME(i) (stripformat[i]+3)
1312
1313
1314	/* Don't call if length < 2 */
1315	#define Py_STRING_MATCH(target, offset, pattern, length) \
1316	(target[offset] == pattern[0] && \
1317	target[offset+length-1] == pattern[length-1] && \
1318	!memcmp(target+offset+1, pattern+1, length-2) )
1319
1320
1321	/* Overallocate the initial list to reduce the number of reallocs for small
1322	split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1323	resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1324	text (roughly 11 words per line) and field delimited data (usually 1-10
1325	fields). For large strings the split algorithms are bandwidth limited
1326	so increasing the preallocation likely will not improve things.*/
1327
1328	#define MAX_PREALLOC 12
1329
1330	/* 5 splits gives 6 elements */
1331	#define PREALLOC_SIZE(maxsplit) \
1332	(maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1333
1334	#define SPLIT_APPEND(data, left, right) \
1335	str = PyString_FromStringAndSize((data) + (left), \
1336	(right) - (left)); \
1337	if (str == NULL) \
1338	goto onError; \
1339	if (PyList_Append(list, str)) { \
1340	Py_DECREF(str); \
1341	goto onError; \
1342	} \
1343	else \
1344	Py_DECREF(str);
1345
1346	#define SPLIT_ADD(data, left, right) { \
1347	str = PyString_FromStringAndSize((data) + (left), \
1348	(right) - (left)); \
1349	if (str == NULL) \
1350	goto onError; \
1351	if (count < MAX_PREALLOC) { \
1352	PyList_SET_ITEM(list, count, str); \
1353	} else { \
1354	if (PyList_Append(list, str)) { \
1355	Py_DECREF(str); \
1356	goto onError; \
1357	} \
1358	else \
1359	Py_DECREF(str); \
1360	} \
1361	count++; }
1362
1363	/* Always force the list to the expected size. */
1364	#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
1365
1366	#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1367	#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1368	#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1369	#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1370
1371	Py_LOCAL_INLINE(PyObject *)
1372	split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
1373	{
1374	Py_ssize_t i, j, count=0;
1375	PyObject *str;
1376	PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1377
1378	if (list == NULL)
1379	return NULL;
1380
1381	i = j = 0;
1382
1383	while (maxsplit-- > 0) {
1384	SKIP_SPACE(s, i, len);
1385	if (i==len) break;
1386	j = i; i++;
1387	SKIP_NONSPACE(s, i, len);
1388	SPLIT_ADD(s, j, i);
1389	}
1390
1391	if (i < len) {
1392	/* Only occurs when maxsplit was reached */
1393	/* Skip any remaining whitespace and copy to end of string */
1394	SKIP_SPACE(s, i, len);
1395	if (i != len)
1396	SPLIT_ADD(s, i, len);
1397	}
1398	FIX_PREALLOC_SIZE(list);
1399	return list;
1400	onError:
1401	Py_DECREF(list);
1402	return NULL;
1403	}
1404
1405	Py_LOCAL_INLINE(PyObject *)
1406	split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1407	{
1408	register Py_ssize_t i, j, count=0;
1409	PyObject *str;
1410	PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1411
1412	if (list == NULL)
1413	return NULL;
1414
1415	i = j = 0;
1416	while ((j < len) && (maxcount-- > 0)) {
1417	for(; j<len; j++) {
1418	/* I found that using memchr makes no difference */
1419	if (s[j] == ch) {
1420	SPLIT_ADD(s, i, j);
1421	i = j = j + 1;
1422	break;
1423	}
1424	}
1425	}
1426	if (i <= len) {
1427	SPLIT_ADD(s, i, len);
1428	}
1429	FIX_PREALLOC_SIZE(list);
1430	return list;
1431
1432	onError:
1433	Py_DECREF(list);
1434	return NULL;
1435	}
1436
1437	PyDoc_STRVAR(split__doc__,
1438	"S.split([sep [,maxsplit]]) -> list of strings\n\
1439	\n\
1440	Return a list of the words in the string S, using sep as the\n\
1441	delimiter string. If maxsplit is given, at most maxsplit\n\
1442	splits are done. If sep is not specified or is None, any\n\
1443	whitespace string is a separator.");
1444
1445	static PyObject *
1446	string_split(PyStringObject self, PyObject args)
1447	{
1448	Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1449	Py_ssize_t maxsplit = -1, count=0;
1450	const char s = PyString_AS_STRING(self), sub;
1451	PyObject list, str, *subobj = Py_None;
1452	#ifdef USE_FAST
1453	Py_ssize_t pos;
1454	#endif
1455
1456	if (!PyArg_ParseTuple(args, "\|On:split", &subobj, &maxsplit))
1457	return NULL;
1458	if (maxsplit < 0)
1459	maxsplit = PY_SSIZE_T_MAX;
1460	if (subobj == Py_None)
1461	return split_whitespace(s, len, maxsplit);
1462	if (PyString_Check(subobj)) {
1463	sub = PyString_AS_STRING(subobj);
1464	n = PyString_GET_SIZE(subobj);
1465	}
1466	#ifdef Py_USING_UNICODE
1467	else if (PyUnicode_Check(subobj))
1468	return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1469	#endif
1470	else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1471	return NULL;
1472
1473	if (n == 0) {
1474	PyErr_SetString(PyExc_ValueError, "empty separator");
1475	return NULL;
1476	}
1477	else if (n == 1)
1478	return split_char(s, len, sub[0], maxsplit);
1479
1480	list = PyList_New(PREALLOC_SIZE(maxsplit));
1481	if (list == NULL)
1482	return NULL;
1483
1484	#ifdef USE_FAST
1485	i = j = 0;
1486	while (maxsplit-- > 0) {
1487	pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1488	if (pos < 0)
1489	break;
1490	j = i+pos;
1491	SPLIT_ADD(s, i, j);
1492	i = j + n;
1493	}
1494	#else
1495	i = j = 0;
1496	while ((j+n <= len) && (maxsplit-- > 0)) {
1497	for (; j+n <= len; j++) {
1498	if (Py_STRING_MATCH(s, j, sub, n)) {
1499	SPLIT_ADD(s, i, j);
1500	i = j = j + n;
1501	break;
1502	}
1503	}
1504	}
1505	#endif
1506	SPLIT_ADD(s, i, len);
1507	FIX_PREALLOC_SIZE(list);
1508	return list;
1509
1510	onError:
1511	Py_DECREF(list);
1512	return NULL;
1513	}
1514
1515	PyDoc_STRVAR(partition__doc__,
1516	"S.partition(sep) -> (head, sep, tail)\n\
1517	\n\
1518	Searches for the separator sep in S, and returns the part before it,\n\
1519	the separator itself, and the part after it. If the separator is not\n\
1520	found, returns S and two empty strings.");
1521
1522	static PyObject *
1523	string_partition(PyStringObject self, PyObject sep_obj)
1524	{
1525	const char *sep;
1526	Py_ssize_t sep_len;
1527
1528	if (PyString_Check(sep_obj)) {
1529	sep = PyString_AS_STRING(sep_obj);
1530	sep_len = PyString_GET_SIZE(sep_obj);
1531	}
1532	#ifdef Py_USING_UNICODE
1533	else if (PyUnicode_Check(sep_obj))
1534	return PyUnicode_Partition((PyObject *) self, sep_obj);
1535	#endif
1536	else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1537	return NULL;
1538
1539	return stringlib_partition(
1540	(PyObject*) self,
1541	PyString_AS_STRING(self), PyString_GET_SIZE(self),
1542	sep_obj, sep, sep_len
1543	);
1544	}
1545
1546	PyDoc_STRVAR(rpartition__doc__,
1547	"S.rpartition(sep) -> (tail, sep, head)\n\
1548	\n\
1549	Searches for the separator sep in S, starting at the end of S, and returns\n\
1550	the part before it, the separator itself, and the part after it. If the\n\
1551	separator is not found, returns two empty strings and S.");
1552
1553	static PyObject *
1554	string_rpartition(PyStringObject self, PyObject sep_obj)
1555	{
1556	const char *sep;
1557	Py_ssize_t sep_len;
1558
1559	if (PyString_Check(sep_obj)) {
1560	sep = PyString_AS_STRING(sep_obj);
1561	sep_len = PyString_GET_SIZE(sep_obj);
1562	}
1563	#ifdef Py_USING_UNICODE
1564	else if (PyUnicode_Check(sep_obj))
1565	return PyUnicode_Partition((PyObject *) self, sep_obj);
1566	#endif
1567	else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1568	return NULL;
1569
1570	return stringlib_rpartition(
1571	(PyObject*) self,
1572	PyString_AS_STRING(self), PyString_GET_SIZE(self),
1573	sep_obj, sep, sep_len
1574	);
1575	}
1576
1577	Py_LOCAL_INLINE(PyObject *)
1578	rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
1579	{
1580	Py_ssize_t i, j, count=0;
1581	PyObject *str;
1582	PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1583
1584	if (list == NULL)
1585	return NULL;
1586
1587	i = j = len-1;
1588
1589	while (maxsplit-- > 0) {
1590	RSKIP_SPACE(s, i);
1591	if (i<0) break;
1592	j = i; i--;
1593	RSKIP_NONSPACE(s, i);
1594	SPLIT_ADD(s, i + 1, j + 1);
1595	}
1596	if (i >= 0) {
1597	/* Only occurs when maxsplit was reached */
1598	/* Skip any remaining whitespace and copy to beginning of string */
1599	RSKIP_SPACE(s, i);
1600	if (i >= 0)
1601	SPLIT_ADD(s, 0, i + 1);
1602
1603	}
1604	FIX_PREALLOC_SIZE(list);
1605	if (PyList_Reverse(list) < 0)
1606	goto onError;
1607	return list;
1608	onError:
1609	Py_DECREF(list);
1610	return NULL;
1611	}
1612
1613	Py_LOCAL_INLINE(PyObject *)
1614	rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1615	{
1616	register Py_ssize_t i, j, count=0;
1617	PyObject *str;
1618	PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1619
1620	if (list == NULL)
1621	return NULL;
1622
1623	i = j = len - 1;
1624	while ((i >= 0) && (maxcount-- > 0)) {
1625	for (; i >= 0; i--) {
1626	if (s[i] == ch) {
1627	SPLIT_ADD(s, i + 1, j + 1);
1628	j = i = i - 1;
1629	break;
1630	}
1631	}
1632	}
1633	if (j >= -1) {
1634	SPLIT_ADD(s, 0, j + 1);
1635	}
1636	FIX_PREALLOC_SIZE(list);
1637	if (PyList_Reverse(list) < 0)
1638	goto onError;
1639	return list;
1640
1641	onError:
1642	Py_DECREF(list);
1643	return NULL;
1644	}
1645
1646	PyDoc_STRVAR(rsplit__doc__,
1647	"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1648	\n\
1649	Return a list of the words in the string S, using sep as the\n\
1650	delimiter string, starting at the end of the string and working\n\
1651	to the front. If maxsplit is given, at most maxsplit splits are\n\
1652	done. If sep is not specified or is None, any whitespace string\n\
1653	is a separator.");
1654
1655	static PyObject *
1656	string_rsplit(PyStringObject self, PyObject args)
1657	{
1658	Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1659	Py_ssize_t maxsplit = -1, count=0;
1660	const char s = PyString_AS_STRING(self), sub;
1661	PyObject list, str, *subobj = Py_None;
1662
1663	if (!PyArg_ParseTuple(args, "\|On:rsplit", &subobj, &maxsplit))
1664	return NULL;
1665	if (maxsplit < 0)
1666	maxsplit = PY_SSIZE_T_MAX;
1667	if (subobj == Py_None)
1668	return rsplit_whitespace(s, len, maxsplit);
1669	if (PyString_Check(subobj)) {
1670	sub = PyString_AS_STRING(subobj);
1671	n = PyString_GET_SIZE(subobj);
1672	}
1673	#ifdef Py_USING_UNICODE
1674	else if (PyUnicode_Check(subobj))
1675	return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1676	#endif
1677	else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1678	return NULL;
1679
1680	if (n == 0) {
1681	PyErr_SetString(PyExc_ValueError, "empty separator");
1682	return NULL;
1683	}
1684	else if (n == 1)
1685	return rsplit_char(s, len, sub[0], maxsplit);
1686
1687	list = PyList_New(PREALLOC_SIZE(maxsplit));
1688	if (list == NULL)
1689	return NULL;
1690
1691	j = len;
1692	i = j - n;
1693
1694	while ( (i >= 0) && (maxsplit-- > 0) ) {
1695	for (; i>=0; i--) {
1696	if (Py_STRING_MATCH(s, i, sub, n)) {
1697	SPLIT_ADD(s, i + n, j);
1698	j = i;
1699	i -= n;
1700	break;
1701	}
1702	}
1703	}
1704	SPLIT_ADD(s, 0, j);
1705	FIX_PREALLOC_SIZE(list);
1706	if (PyList_Reverse(list) < 0)
1707	goto onError;
1708	return list;
1709
1710	onError:
1711	Py_DECREF(list);
1712	return NULL;
1713	}
1714
1715
1716	PyDoc_STRVAR(join__doc__,
1717	"S.join(sequence) -> string\n\
1718	\n\
1719	Return a string which is the concatenation of the strings in the\n\
1720	sequence. The separator between elements is S.");
1721
1722	static PyObject *
1723	string_join(PyStringObject self, PyObject orig)
1724	{
1725	char *sep = PyString_AS_STRING(self);
1726	const Py_ssize_t seplen = PyString_GET_SIZE(self);
1727	PyObject *res = NULL;
1728	char *p;
1729	Py_ssize_t seqlen = 0;
1730	size_t sz = 0;
1731	Py_ssize_t i;
1732	PyObject seq, item;
1733
1734	seq = PySequence_Fast(orig, "");
1735	if (seq == NULL) {
1736	return NULL;
1737	}
1738
1739	seqlen = PySequence_Size(seq);
1740	if (seqlen == 0) {
1741	Py_DECREF(seq);
1742	return PyString_FromString("");
1743	}
1744	if (seqlen == 1) {
1745	item = PySequence_Fast_GET_ITEM(seq, 0);
1746	if (PyString_CheckExact(item) \|\| PyUnicode_CheckExact(item)) {
1747	Py_INCREF(item);
1748	Py_DECREF(seq);
1749	return item;
1750	}
1751	}
1752
1753	/* There are at least two things to join, or else we have a subclass
1754	* of the builtin types in the sequence.
1755	* Do a pre-pass to figure out the total amount of space we'll
1756	* need (sz), see whether any argument is absurd, and defer to
1757	* the Unicode join if appropriate.
1758	*/
1759	for (i = 0; i < seqlen; i++) {
1760	const size_t old_sz = sz;
1761	item = PySequence_Fast_GET_ITEM(seq, i);
1762	if (!PyString_Check(item)){
1763	#ifdef Py_USING_UNICODE
1764	if (PyUnicode_Check(item)) {
1765	/* Defer to Unicode join.
1766	* CAUTION: There's no gurantee that the
1767	* original sequence can be iterated over
1768	* again, so we must pass seq here.
1769	*/
1770	PyObject *result;
1771	result = PyUnicode_Join((PyObject *)self, seq);
1772	Py_DECREF(seq);
1773	return result;
1774	}
1775	#endif
1776	PyErr_Format(PyExc_TypeError,
1777	"sequence item %zd: expected string,"
1778	" %.80s found",
1779	i, item->ob_type->tp_name);
1780	Py_DECREF(seq);
1781	return NULL;
1782	}
1783	sz += PyString_GET_SIZE(item);
1784	if (i != 0)
1785	sz += seplen;
1786	if (sz < old_sz \|\| sz > PY_SSIZE_T_MAX) {
1787	PyErr_SetString(PyExc_OverflowError,
1788	"join() result is too long for a Python string");
1789	Py_DECREF(seq);
1790	return NULL;
1791	}
1792	}
1793
1794	/* Allocate result space. */
1795	res = PyString_FromStringAndSize((char*)NULL, sz);
1796	if (res == NULL) {
1797	Py_DECREF(seq);
1798	return NULL;
1799	}
1800
1801	/* Catenate everything. */
1802	p = PyString_AS_STRING(res);
1803	for (i = 0; i < seqlen; ++i) {
1804	size_t n;
1805	item = PySequence_Fast_GET_ITEM(seq, i);
1806	n = PyString_GET_SIZE(item);
1807	Py_MEMCPY(p, PyString_AS_STRING(item), n);
1808	p += n;
1809	if (i < seqlen - 1) {
1810	Py_MEMCPY(p, sep, seplen);
1811	p += seplen;
1812	}
1813	}
1814
1815	Py_DECREF(seq);
1816	return res;
1817	}
1818
1819	PyObject *
1820	_PyString_Join(PyObject sep, PyObject x)
1821	{
1822	assert(sep != NULL && PyString_Check(sep));
1823	assert(x != NULL);
1824	return string_join((PyStringObject *)sep, x);
1825	}
1826
1827	Py_LOCAL_INLINE(void)
1828	string_adjust_indices(Py_ssize_t start, Py_ssize_t end, Py_ssize_t len)
1829	{
1830	if (*end > len)
1831	*end = len;
1832	else if (*end < 0)
1833	*end += len;
1834	if (*end < 0)
1835	*end = 0;
1836	if (*start < 0)
1837	*start += len;
1838	if (*start < 0)
1839	*start = 0;
1840	}
1841
1842	Py_LOCAL_INLINE(Py_ssize_t)
1843	string_find_internal(PyStringObject self, PyObject args, int dir)
1844	{
1845	PyObject *subobj;
1846	const char *sub;
1847	Py_ssize_t sub_len;
1848	Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1849
1850	if (!PyArg_ParseTuple(args, "O\|O&O&:find/rfind/index/rindex", &subobj,
1851	_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1852	return -2;
1853	if (PyString_Check(subobj)) {
1854	sub = PyString_AS_STRING(subobj);
1855	sub_len = PyString_GET_SIZE(subobj);
1856	}
1857	#ifdef Py_USING_UNICODE
1858	else if (PyUnicode_Check(subobj))
1859	return PyUnicode_Find(
1860	(PyObject *)self, subobj, start, end, dir);
1861	#endif
1862	else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1863	/* XXX - the "expected a character buffer object" is pretty
1864	confusing for a non-expert. remap to something else ? */
1865	return -2;
1866
1867	if (dir > 0)
1868	return stringlib_find_slice(
1869	PyString_AS_STRING(self), PyString_GET_SIZE(self),
1870	sub, sub_len, start, end);
1871	else
1872	return stringlib_rfind_slice(
1873	PyString_AS_STRING(self), PyString_GET_SIZE(self),
1874	sub, sub_len, start, end);
1875	}
1876
1877
1878	PyDoc_STRVAR(find__doc__,
1879	"S.find(sub [,start [,end]]) -> int\n\
1880	\n\
1881	Return the lowest index in S where substring sub is found,\n\
1882	such that sub is contained within s[start,end]. Optional\n\
1883	arguments start and end are interpreted as in slice notation.\n\
1884	\n\
1885	Return -1 on failure.");
1886
1887	static PyObject *
1888	string_find(PyStringObject self, PyObject args)
1889	{
1890	Py_ssize_t result = string_find_internal(self, args, +1);
1891	if (result == -2)
1892	return NULL;
1893	return PyInt_FromSsize_t(result);
1894	}
1895
1896
1897	PyDoc_STRVAR(index__doc__,
1898	"S.index(sub [,start [,end]]) -> int\n\
1899	\n\
1900	Like S.find() but raise ValueError when the substring is not found.");
1901
1902	static PyObject *
1903	string_index(PyStringObject self, PyObject args)
1904	{
1905	Py_ssize_t result = string_find_internal(self, args, +1);
1906	if (result == -2)
1907	return NULL;
1908	if (result == -1) {
1909	PyErr_SetString(PyExc_ValueError,
1910	"substring not found");
1911	return NULL;
1912	}
1913	return PyInt_FromSsize_t(result);
1914	}
1915
1916
1917	PyDoc_STRVAR(rfind__doc__,
1918	"S.rfind(sub [,start [,end]]) -> int\n\
1919	\n\
1920	Return the highest index in S where substring sub is found,\n\
1921	such that sub is contained within s[start,end]. Optional\n\
1922	arguments start and end are interpreted as in slice notation.\n\
1923	\n\
1924	Return -1 on failure.");
1925
1926	static PyObject *
1927	string_rfind(PyStringObject self, PyObject args)
1928	{
1929	Py_ssize_t result = string_find_internal(self, args, -1);
1930	if (result == -2)
1931	return NULL;
1932	return PyInt_FromSsize_t(result);
1933	}
1934
1935
1936	PyDoc_STRVAR(rindex__doc__,
1937	"S.rindex(sub [,start [,end]]) -> int\n\
1938	\n\
1939	Like S.rfind() but raise ValueError when the substring is not found.");
1940
1941	static PyObject *
1942	string_rindex(PyStringObject self, PyObject args)
1943	{
1944	Py_ssize_t result = string_find_internal(self, args, -1);
1945	if (result == -2)
1946	return NULL;
1947	if (result == -1) {
1948	PyErr_SetString(PyExc_ValueError,
1949	"substring not found");
1950	return NULL;
1951	}
1952	return PyInt_FromSsize_t(result);
1953	}
1954
1955
1956	Py_LOCAL_INLINE(PyObject *)
1957	do_xstrip(PyStringObject self, int striptype, PyObject sepobj)
1958	{
1959	char *s = PyString_AS_STRING(self);
1960	Py_ssize_t len = PyString_GET_SIZE(self);
1961	char *sep = PyString_AS_STRING(sepobj);
1962	Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1963	Py_ssize_t i, j;
1964
1965	i = 0;
1966	if (striptype != RIGHTSTRIP) {
1967	while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1968	i++;
1969	}
1970	}
1971
1972	j = len;
1973	if (striptype != LEFTSTRIP) {
1974	do {
1975	j--;
1976	} while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1977	j++;
1978	}
1979
1980	if (i == 0 && j == len && PyString_CheckExact(self)) {
1981	Py_INCREF(self);
1982	return (PyObject*)self;
1983	}
1984	else
1985	return PyString_FromStringAndSize(s+i, j-i);
1986	}
1987
1988
1989	Py_LOCAL_INLINE(PyObject *)
1990	do_strip(PyStringObject *self, int striptype)
1991	{
1992	char *s = PyString_AS_STRING(self);
1993	Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1994
1995	i = 0;
1996	if (striptype != RIGHTSTRIP) {
1997	while (i < len && isspace(Py_CHARMASK(s[i]))) {
1998	i++;
1999	}
2000	}
2001
2002	j = len;
2003	if (striptype != LEFTSTRIP) {
2004	do {
2005	j--;
2006	} while (j >= i && isspace(Py_CHARMASK(s[j])));
2007	j++;
2008	}
2009
2010	if (i == 0 && j == len && PyString_CheckExact(self)) {
2011	Py_INCREF(self);
2012	return (PyObject*)self;
2013	}
2014	else
2015	return PyString_FromStringAndSize(s+i, j-i);
2016	}
2017
2018
2019	Py_LOCAL_INLINE(PyObject *)
2020	do_argstrip(PyStringObject self, int striptype, PyObject args)
2021	{
2022	PyObject *sep = NULL;
2023
2024	if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2025	return NULL;
2026
2027	if (sep != NULL && sep != Py_None) {
2028	if (PyString_Check(sep))
2029	return do_xstrip(self, striptype, sep);
2030	#ifdef Py_USING_UNICODE
2031	else if (PyUnicode_Check(sep)) {
2032	PyObject uniself = PyUnicode_FromObject((PyObject )self);
2033	PyObject *res;
2034	if (uniself==NULL)
2035	return NULL;
2036	res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2037	striptype, sep);
2038	Py_DECREF(uniself);
2039	return res;
2040	}
2041	#endif
2042	PyErr_Format(PyExc_TypeError,
2043	#ifdef Py_USING_UNICODE
2044	"%s arg must be None, str or unicode",
2045	#else
2046	"%s arg must be None or str",
2047	#endif
2048	STRIPNAME(striptype));
2049	return NULL;
2050	}
2051
2052	return do_strip(self, striptype);
2053	}
2054
2055
2056	PyDoc_STRVAR(strip__doc__,
2057	"S.strip([chars]) -> string or unicode\n\
2058	\n\
2059	Return a copy of the string S with leading and trailing\n\
2060	whitespace removed.\n\
2061	If chars is given and not None, remove characters in chars instead.\n\
2062	If chars is unicode, S will be converted to unicode before stripping");
2063
2064	static PyObject *
2065	string_strip(PyStringObject self, PyObject args)
2066	{
2067	if (PyTuple_GET_SIZE(args) == 0)
2068	return do_strip(self, BOTHSTRIP); /* Common case */
2069	else
2070	return do_argstrip(self, BOTHSTRIP, args);
2071	}
2072
2073
2074	PyDoc_STRVAR(lstrip__doc__,
2075	"S.lstrip([chars]) -> string or unicode\n\
2076	\n\
2077	Return a copy of the string S with leading whitespace removed.\n\
2078	If chars is given and not None, remove characters in chars instead.\n\
2079	If chars is unicode, S will be converted to unicode before stripping");
2080
2081	static PyObject *
2082	string_lstrip(PyStringObject self, PyObject args)
2083	{
2084	if (PyTuple_GET_SIZE(args) == 0)
2085	return do_strip(self, LEFTSTRIP); /* Common case */
2086	else
2087	return do_argstrip(self, LEFTSTRIP, args);
2088	}
2089
2090
2091	PyDoc_STRVAR(rstrip__doc__,
2092	"S.rstrip([chars]) -> string or unicode\n\
2093	\n\
2094	Return a copy of the string S with trailing whitespace removed.\n\
2095	If chars is given and not None, remove characters in chars instead.\n\
2096	If chars is unicode, S will be converted to unicode before stripping");
2097
2098	static PyObject *
2099	string_rstrip(PyStringObject self, PyObject args)
2100	{
2101	if (PyTuple_GET_SIZE(args) == 0)
2102	return do_strip(self, RIGHTSTRIP); /* Common case */
2103	else
2104	return do_argstrip(self, RIGHTSTRIP, args);
2105	}
2106
2107
2108	PyDoc_STRVAR(lower__doc__,
2109	"S.lower() -> string\n\
2110	\n\
2111	Return a copy of the string S converted to lowercase.");
2112
2113	/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2114	#ifndef _tolower
2115	#define _tolower tolower
2116	#endif
2117
2118	static PyObject *
2119	string_lower(PyStringObject *self)
2120	{
2121	char *s;
2122	Py_ssize_t i, n = PyString_GET_SIZE(self);
2123	PyObject *newobj;
2124
2125	newobj = PyString_FromStringAndSize(NULL, n);
2126	if (!newobj)
2127	return NULL;
2128
2129	s = PyString_AS_STRING(newobj);
2130
2131	Py_MEMCPY(s, PyString_AS_STRING(self), n);
2132
2133	for (i = 0; i < n; i++) {
2134	int c = Py_CHARMASK(s[i]);
2135	if (isupper(c))
2136	s[i] = _tolower(c);
2137	}
2138
2139	return newobj;
2140	}
2141
2142	PyDoc_STRVAR(upper__doc__,
2143	"S.upper() -> string\n\
2144	\n\
2145	Return a copy of the string S converted to uppercase.");
2146
2147	#ifndef _toupper
2148	#define _toupper toupper
2149	#endif
2150
2151	static PyObject *
2152	string_upper(PyStringObject *self)
2153	{
2154	char *s;
2155	Py_ssize_t i, n = PyString_GET_SIZE(self);
2156	PyObject *newobj;
2157
2158	newobj = PyString_FromStringAndSize(NULL, n);
2159	if (!newobj)
2160	return NULL;
2161
2162	s = PyString_AS_STRING(newobj);
2163
2164	Py_MEMCPY(s, PyString_AS_STRING(self), n);
2165
2166	for (i = 0; i < n; i++) {
2167	int c = Py_CHARMASK(s[i]);
2168	if (islower(c))
2169	s[i] = _toupper(c);
2170	}
2171
2172	return newobj;
2173	}
2174
2175	PyDoc_STRVAR(title__doc__,
2176	"S.title() -> string\n\
2177	\n\
2178	Return a titlecased version of S, i.e. words start with uppercase\n\
2179	characters, all remaining cased characters have lowercase.");
2180
2181	static PyObject*
2182	string_title(PyStringObject *self)
2183	{
2184	char s = PyString_AS_STRING(self), s_new;
2185	Py_ssize_t i, n = PyString_GET_SIZE(self);
2186	int previous_is_cased = 0;
2187	PyObject *newobj;
2188
2189	newobj = PyString_FromStringAndSize(NULL, n);
2190	if (newobj == NULL)
2191	return NULL;
2192	s_new = PyString_AsString(newobj);
2193	for (i = 0; i < n; i++) {
2194	int c = Py_CHARMASK(*s++);
2195	if (islower(c)) {
2196	if (!previous_is_cased)
2197	c = toupper(c);
2198	previous_is_cased = 1;
2199	} else if (isupper(c)) {
2200	if (previous_is_cased)
2201	c = tolower(c);
2202	previous_is_cased = 1;
2203	} else
2204	previous_is_cased = 0;
2205	*s_new++ = c;
2206	}
2207	return newobj;
2208	}
2209
2210	PyDoc_STRVAR(capitalize__doc__,
2211	"S.capitalize() -> string\n\
2212	\n\
2213	Return a copy of the string S with only its first character\n\
2214	capitalized.");
2215
2216	static PyObject *
2217	string_capitalize(PyStringObject *self)
2218	{
2219	char s = PyString_AS_STRING(self), s_new;
2220	Py_ssize_t i, n = PyString_GET_SIZE(self);
2221	PyObject *newobj;
2222
2223	newobj = PyString_FromStringAndSize(NULL, n);
2224	if (newobj == NULL)
2225	return NULL;
2226	s_new = PyString_AsString(newobj);
2227	if (0 < n) {
2228	int c = Py_CHARMASK(*s++);
2229	if (islower(c))
2230	*s_new = toupper(c);
2231	else
2232	*s_new = c;
2233	s_new++;
2234	}
2235	for (i = 1; i < n; i++) {
2236	int c = Py_CHARMASK(*s++);
2237	if (isupper(c))
2238	*s_new = tolower(c);
2239	else
2240	*s_new = c;
2241	s_new++;
2242	}
2243	return newobj;
2244	}
2245
2246
2247	PyDoc_STRVAR(count__doc__,
2248	"S.count(sub[, start[, end]]) -> int\n\
2249	\n\
2250	Return the number of non-overlapping occurrences of substring sub in\n\
2251	string S[start:end]. Optional arguments start and end are interpreted\n\
2252	as in slice notation.");
2253
2254	static PyObject *
2255	string_count(PyStringObject self, PyObject args)
2256	{
2257	PyObject *sub_obj;
2258	const char str = PyString_AS_STRING(self), sub;
2259	Py_ssize_t sub_len;
2260	Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2261
2262	if (!PyArg_ParseTuple(args, "O\|O&O&:count", &sub_obj,
2263	_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2264	return NULL;
2265
2266	if (PyString_Check(sub_obj)) {
2267	sub = PyString_AS_STRING(sub_obj);
2268	sub_len = PyString_GET_SIZE(sub_obj);
2269	}
2270	#ifdef Py_USING_UNICODE
2271	else if (PyUnicode_Check(sub_obj)) {
2272	Py_ssize_t count;
2273	count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2274	if (count == -1)
2275	return NULL;
2276	else
2277	return PyInt_FromSsize_t(count);
2278	}
2279	#endif
2280	else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2281	return NULL;
2282
2283	string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
2284
2285	return PyInt_FromSsize_t(
2286	stringlib_count(str + start, end - start, sub, sub_len)
2287	);
2288	}
2289
2290	PyDoc_STRVAR(swapcase__doc__,
2291	"S.swapcase() -> string\n\
2292	\n\
2293	Return a copy of the string S with uppercase characters\n\
2294	converted to lowercase and vice versa.");
2295
2296	static PyObject *
2297	string_swapcase(PyStringObject *self)
2298	{
2299	char s = PyString_AS_STRING(self), s_new;
2300	Py_ssize_t i, n = PyString_GET_SIZE(self);
2301	PyObject *newobj;
2302
2303	newobj = PyString_FromStringAndSize(NULL, n);
2304	if (newobj == NULL)
2305	return NULL;
2306	s_new = PyString_AsString(newobj);
2307	for (i = 0; i < n; i++) {
2308	int c = Py_CHARMASK(*s++);
2309	if (islower(c)) {
2310	*s_new = toupper(c);
2311	}
2312	else if (isupper(c)) {
2313	*s_new = tolower(c);
2314	}
2315	else
2316	*s_new = c;
2317	s_new++;
2318	}
2319	return newobj;
2320	}
2321
2322
2323	PyDoc_STRVAR(translate__doc__,
2324	"S.translate(table [,deletechars]) -> string\n\
2325	\n\
2326	Return a copy of the string S, where all characters occurring\n\
2327	in the optional argument deletechars are removed, and the\n\
2328	remaining characters have been mapped through the given\n\
2329	translation table, which must be a string of length 256.");
2330
2331	static PyObject *
2332	string_translate(PyStringObject self, PyObject args)
2333	{
2334	register char input, output;
2335	register const char *table;
2336	register Py_ssize_t i, c, changed = 0;
2337	PyObject input_obj = (PyObject)self;
2338	const char table1, output_start, *del_table=NULL;
2339	Py_ssize_t inlen, tablen, dellen = 0;
2340	PyObject *result;
2341	int trans_table[256];
2342	PyObject tableobj, delobj = NULL;
2343
2344	if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2345	&tableobj, &delobj))
2346	return NULL;
2347
2348	if (PyString_Check(tableobj)) {
2349	table1 = PyString_AS_STRING(tableobj);
2350	tablen = PyString_GET_SIZE(tableobj);
2351	}
2352	#ifdef Py_USING_UNICODE
2353	else if (PyUnicode_Check(tableobj)) {
2354	/* Unicode .translate() does not support the deletechars
2355	parameter; instead a mapping to None will cause characters
2356	to be deleted. */
2357	if (delobj != NULL) {
2358	PyErr_SetString(PyExc_TypeError,
2359	"deletions are implemented differently for unicode");
2360	return NULL;
2361	}
2362	return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2363	}
2364	#endif
2365	else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
2366	return NULL;
2367
2368	if (tablen != 256) {
2369	PyErr_SetString(PyExc_ValueError,
2370	"translation table must be 256 characters long");
2371	return NULL;
2372	}
2373
2374	if (delobj != NULL) {
2375	if (PyString_Check(delobj)) {
2376	del_table = PyString_AS_STRING(delobj);
2377	dellen = PyString_GET_SIZE(delobj);
2378	}
2379	#ifdef Py_USING_UNICODE
2380	else if (PyUnicode_Check(delobj)) {
2381	PyErr_SetString(PyExc_TypeError,
2382	"deletions are implemented differently for unicode");
2383	return NULL;
2384	}
2385	#endif
2386	else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2387	return NULL;
2388	}
2389	else {
2390	del_table = NULL;
2391	dellen = 0;
2392	}
2393
2394	table = table1;
2395	inlen = PyString_GET_SIZE(input_obj);
2396	result = PyString_FromStringAndSize((char *)NULL, inlen);
2397	if (result == NULL)
2398	return NULL;
2399	output_start = output = PyString_AsString(result);
2400	input = PyString_AS_STRING(input_obj);
2401
2402	if (dellen == 0) {
2403	/* If no deletions are required, use faster code */
2404	for (i = inlen; --i >= 0; ) {
2405	c = Py_CHARMASK(*input++);
2406	if (Py_CHARMASK((*output++ = table[c])) != c)
2407	changed = 1;
2408	}
2409	if (changed \|\| !PyString_CheckExact(input_obj))
2410	return result;
2411	Py_DECREF(result);
2412	Py_INCREF(input_obj);
2413	return input_obj;
2414	}
2415
2416	for (i = 0; i < 256; i++)
2417	trans_table[i] = Py_CHARMASK(table[i]);
2418
2419	for (i = 0; i < dellen; i++)
2420	trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2421
2422	for (i = inlen; --i >= 0; ) {
2423	c = Py_CHARMASK(*input++);
2424	if (trans_table[c] != -1)
2425	if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2426	continue;
2427	changed = 1;
2428	}
2429	if (!changed && PyString_CheckExact(input_obj)) {
2430	Py_DECREF(result);
2431	Py_INCREF(input_obj);
2432	return input_obj;
2433	}
2434	/* Fix the size of the resulting string */
2435	if (inlen > 0)
2436	_PyString_Resize(&result, output - output_start);
2437	return result;
2438	}
2439
2440
2441	#define FORWARD 1
2442	#define REVERSE -1
2443
2444	/* find and count characters and substrings */
2445
2446	#define findchar(target, target_len, c) \
2447	((char )memchr((const void )(target), c, target_len))
2448
2449	/* String ops must return a string. */
2450	/* If the object is subclass of string, create a copy */
2451	Py_LOCAL(PyStringObject *)
2452	return_self(PyStringObject *self)
2453	{
2454	if (PyString_CheckExact(self)) {
2455	Py_INCREF(self);
2456	return self;
2457	}
2458	return (PyStringObject *)PyString_FromStringAndSize(
2459	PyString_AS_STRING(self),
2460	PyString_GET_SIZE(self));
2461	}
2462
2463	Py_LOCAL_INLINE(Py_ssize_t)
2464	countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2465	{
2466	Py_ssize_t count=0;
2467	const char *start=target;
2468	const char *end=target+target_len;
2469
2470	while ( (start=findchar(start, end-start, c)) != NULL ) {
2471	count++;
2472	if (count >= maxcount)
2473	break;
2474	start += 1;
2475	}
2476	return count;
2477	}
2478
2479	Py_LOCAL(Py_ssize_t)
2480	findstring(const char *target, Py_ssize_t target_len,
2481	const char *pattern, Py_ssize_t pattern_len,
2482	Py_ssize_t start,
2483	Py_ssize_t end,
2484	int direction)
2485	{
2486	if (start < 0) {
2487	start += target_len;
2488	if (start < 0)
2489	start = 0;
2490	}
2491	if (end > target_len) {
2492	end = target_len;
2493	} else if (end < 0) {
2494	end += target_len;
2495	if (end < 0)
2496	end = 0;
2497	}
2498
2499	/* zero-length substrings always match at the first attempt */
2500	if (pattern_len == 0)
2501	return (direction > 0) ? start : end;
2502
2503	end -= pattern_len;
2504
2505	if (direction < 0) {
2506	for (; end >= start; end--)
2507	if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2508	return end;
2509	} else {
2510	for (; start <= end; start++)
2511	if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2512	return start;
2513	}
2514	return -1;
2515	}
2516
2517	Py_LOCAL_INLINE(Py_ssize_t)
2518	countstring(const char *target, Py_ssize_t target_len,
2519	const char *pattern, Py_ssize_t pattern_len,
2520	Py_ssize_t start,
2521	Py_ssize_t end,
2522	int direction, Py_ssize_t maxcount)
2523	{
2524	Py_ssize_t count=0;
2525
2526	if (start < 0) {
2527	start += target_len;
2528	if (start < 0)
2529	start = 0;
2530	}
2531	if (end > target_len) {
2532	end = target_len;
2533	} else if (end < 0) {
2534	end += target_len;
2535	if (end < 0)
2536	end = 0;
2537	}
2538
2539	/* zero-length substrings match everywhere */
2540	if (pattern_len == 0 \|\| maxcount == 0) {
2541	if (target_len+1 < maxcount)
2542	return target_len+1;
2543	return maxcount;
2544	}
2545
2546	end -= pattern_len;
2547	if (direction < 0) {
2548	for (; (end >= start); end--)
2549	if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2550	count++;
2551	if (--maxcount <= 0) break;
2552	end -= pattern_len-1;
2553	}
2554	} else {
2555	for (; (start <= end); start++)
2556	if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2557	count++;
2558	if (--maxcount <= 0)
2559	break;
2560	start += pattern_len-1;
2561	}
2562	}
2563	return count;
2564	}
2565
2566
2567	/* Algorithms for different cases of string replacement */
2568
2569	/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2570	Py_LOCAL(PyStringObject *)
2571	replace_interleave(PyStringObject *self,
2572	const char *to_s, Py_ssize_t to_len,
2573	Py_ssize_t maxcount)
2574	{
2575	char self_s, result_s;
2576	Py_ssize_t self_len, result_len;
2577	Py_ssize_t count, i, product;
2578	PyStringObject *result;
2579
2580	self_len = PyString_GET_SIZE(self);
2581
2582	/* 1 at the end plus 1 after every character */
2583	count = self_len+1;
2584	if (maxcount < count)
2585	count = maxcount;
2586
2587	/* Check for overflow */
2588	/* result_len = count * to_len + self_len; */
2589	product = count * to_len;
2590	if (product / to_len != count) {
2591	PyErr_SetString(PyExc_OverflowError,
2592	"replace string is too long");
2593	return NULL;
2594	}
2595	result_len = product + self_len;
2596	if (result_len < 0) {
2597	PyErr_SetString(PyExc_OverflowError,
2598	"replace string is too long");
2599	return NULL;
2600	}
2601
2602	if (! (result = (PyStringObject *)
2603	PyString_FromStringAndSize(NULL, result_len)) )
2604	return NULL;
2605
2606	self_s = PyString_AS_STRING(self);
2607	result_s = PyString_AS_STRING(result);
2608
2609	/* TODO: special case single character, which doesn't need memcpy */
2610
2611	/* Lay the first one down (guaranteed this will occur) */
2612	Py_MEMCPY(result_s, to_s, to_len);
2613	result_s += to_len;
2614	count -= 1;
2615
2616	for (i=0; i<count; i++) {
2617	result_s++ = self_s++;
2618	Py_MEMCPY(result_s, to_s, to_len);
2619	result_s += to_len;
2620	}
2621
2622	/* Copy the rest of the original string */
2623	Py_MEMCPY(result_s, self_s, self_len-i);
2624
2625	return result;
2626	}
2627
2628	/* Special case for deleting a single character */
2629	/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2630	Py_LOCAL(PyStringObject *)
2631	replace_delete_single_character(PyStringObject *self,
2632	char from_c, Py_ssize_t maxcount)
2633	{
2634	char self_s, result_s;
2635	char start, next, *end;
2636	Py_ssize_t self_len, result_len;
2637	Py_ssize_t count;
2638	PyStringObject *result;
2639
2640	self_len = PyString_GET_SIZE(self);
2641	self_s = PyString_AS_STRING(self);
2642
2643	count = countchar(self_s, self_len, from_c, maxcount);
2644	if (count == 0) {
2645	return return_self(self);
2646	}
2647
2648	result_len = self_len - count; /* from_len == 1 */
2649	assert(result_len>=0);
2650
2651	if ( (result = (PyStringObject *)
2652	PyString_FromStringAndSize(NULL, result_len)) == NULL)
2653	return NULL;
2654	result_s = PyString_AS_STRING(result);
2655
2656	start = self_s;
2657	end = self_s + self_len;
2658	while (count-- > 0) {
2659	next = findchar(start, end-start, from_c);
2660	if (next == NULL)
2661	break;
2662	Py_MEMCPY(result_s, start, next-start);
2663	result_s += (next-start);
2664	start = next+1;
2665	}
2666	Py_MEMCPY(result_s, start, end-start);
2667
2668	return result;
2669	}
2670
2671	/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2672
2673	Py_LOCAL(PyStringObject *)
2674	replace_delete_substring(PyStringObject *self,
2675	const char *from_s, Py_ssize_t from_len,
2676	Py_ssize_t maxcount) {
2677	char self_s, result_s;
2678	char start, next, *end;
2679	Py_ssize_t self_len, result_len;
2680	Py_ssize_t count, offset;
2681	PyStringObject *result;
2682
2683	self_len = PyString_GET_SIZE(self);
2684	self_s = PyString_AS_STRING(self);
2685
2686	count = countstring(self_s, self_len,
2687	from_s, from_len,
2688	0, self_len, 1,
2689	maxcount);
2690
2691	if (count == 0) {
2692	/* no matches */
2693	return return_self(self);
2694	}
2695
2696	result_len = self_len - (count * from_len);
2697	assert (result_len>=0);
2698
2699	if ( (result = (PyStringObject *)
2700	PyString_FromStringAndSize(NULL, result_len)) == NULL )
2701	return NULL;
2702
2703	result_s = PyString_AS_STRING(result);
2704
2705	start = self_s;
2706	end = self_s + self_len;
2707	while (count-- > 0) {
2708	offset = findstring(start, end-start,
2709	from_s, from_len,
2710	0, end-start, FORWARD);
2711	if (offset == -1)
2712	break;
2713	next = start + offset;
2714
2715	Py_MEMCPY(result_s, start, next-start);
2716
2717	result_s += (next-start);
2718	start = next+from_len;
2719	}
2720	Py_MEMCPY(result_s, start, end-start);
2721	return result;
2722	}
2723
2724	/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2725	Py_LOCAL(PyStringObject *)
2726	replace_single_character_in_place(PyStringObject *self,
2727	char from_c, char to_c,
2728	Py_ssize_t maxcount)
2729	{
2730	char self_s, result_s, start, end, *next;
2731	Py_ssize_t self_len;
2732	PyStringObject *result;
2733
2734	/* The result string will be the same size */
2735	self_s = PyString_AS_STRING(self);
2736	self_len = PyString_GET_SIZE(self);
2737
2738	next = findchar(self_s, self_len, from_c);
2739
2740	if (next == NULL) {
2741	/* No matches; return the original string */
2742	return return_self(self);
2743	}
2744
2745	/* Need to make a new string */
2746	result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2747	if (result == NULL)
2748	return NULL;
2749	result_s = PyString_AS_STRING(result);
2750	Py_MEMCPY(result_s, self_s, self_len);
2751
2752	/* change everything in-place, starting with this one */
2753	start = result_s + (next-self_s);
2754	*start = to_c;
2755	start++;
2756	end = result_s + self_len;
2757
2758	while (--maxcount > 0) {
2759	next = findchar(start, end-start, from_c);
2760	if (next == NULL)
2761	break;
2762	*next = to_c;
2763	start = next+1;
2764	}
2765
2766	return result;
2767	}
2768
2769	/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2770	Py_LOCAL(PyStringObject *)
2771	replace_substring_in_place(PyStringObject *self,
2772	const char *from_s, Py_ssize_t from_len,
2773	const char *to_s, Py_ssize_t to_len,
2774	Py_ssize_t maxcount)
2775	{
2776	char result_s, start, *end;
2777	char *self_s;
2778	Py_ssize_t self_len, offset;
2779	PyStringObject *result;
2780
2781	/* The result string will be the same size */
2782
2783	self_s = PyString_AS_STRING(self);
2784	self_len = PyString_GET_SIZE(self);
2785
2786	offset = findstring(self_s, self_len,
2787	from_s, from_len,
2788	0, self_len, FORWARD);
2789	if (offset == -1) {
2790	/* No matches; return the original string */
2791	return return_self(self);
2792	}
2793
2794	/* Need to make a new string */
2795	result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2796	if (result == NULL)
2797	return NULL;
2798	result_s = PyString_AS_STRING(result);
2799	Py_MEMCPY(result_s, self_s, self_len);
2800
2801	/* change everything in-place, starting with this one */
2802	start = result_s + offset;
2803	Py_MEMCPY(start, to_s, from_len);
2804	start += from_len;
2805	end = result_s + self_len;
2806
2807	while ( --maxcount > 0) {
2808	offset = findstring(start, end-start,
2809	from_s, from_len,
2810	0, end-start, FORWARD);
2811	if (offset==-1)
2812	break;
2813	Py_MEMCPY(start+offset, to_s, from_len);
2814	start += offset+from_len;
2815	}
2816
2817	return result;
2818	}
2819
2820	/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2821	Py_LOCAL(PyStringObject *)
2822	replace_single_character(PyStringObject *self,
2823	char from_c,
2824	const char *to_s, Py_ssize_t to_len,
2825	Py_ssize_t maxcount)
2826	{
2827	char self_s, result_s;
2828	char start, next, *end;
2829	Py_ssize_t self_len, result_len;
2830	Py_ssize_t count, product;
2831	PyStringObject *result;
2832
2833	self_s = PyString_AS_STRING(self);
2834	self_len = PyString_GET_SIZE(self);
2835
2836	count = countchar(self_s, self_len, from_c, maxcount);
2837	if (count == 0) {
2838	/* no matches, return unchanged */
2839	return return_self(self);
2840	}
2841
2842	/* use the difference between current and new, hence the "-1" */
2843	/* result_len = self_len + count * (to_len-1) */
2844	product = count * (to_len-1);
2845	if (product / (to_len-1) != count) {
2846	PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2847	return NULL;
2848	}
2849	result_len = self_len + product;
2850	if (result_len < 0) {
2851	PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2852	return NULL;
2853	}
2854
2855	if ( (result = (PyStringObject *)
2856	PyString_FromStringAndSize(NULL, result_len)) == NULL)
2857	return NULL;
2858	result_s = PyString_AS_STRING(result);
2859
2860	start = self_s;
2861	end = self_s + self_len;
2862	while (count-- > 0) {
2863	next = findchar(start, end-start, from_c);
2864	if (next == NULL)
2865	break;
2866
2867	if (next == start) {
2868	/* replace with the 'to' */
2869	Py_MEMCPY(result_s, to_s, to_len);
2870	result_s += to_len;
2871	start += 1;
2872	} else {
2873	/* copy the unchanged old then the 'to' */
2874	Py_MEMCPY(result_s, start, next-start);
2875	result_s += (next-start);
2876	Py_MEMCPY(result_s, to_s, to_len);
2877	result_s += to_len;
2878	start = next+1;
2879	}
2880	}
2881	/* Copy the remainder of the remaining string */
2882	Py_MEMCPY(result_s, start, end-start);
2883
2884	return result;
2885	}
2886
2887	/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2888	Py_LOCAL(PyStringObject *)
2889	replace_substring(PyStringObject *self,
2890	const char *from_s, Py_ssize_t from_len,
2891	const char *to_s, Py_ssize_t to_len,
2892	Py_ssize_t maxcount) {
2893	char self_s, result_s;
2894	char start, next, *end;
2895	Py_ssize_t self_len, result_len;
2896	Py_ssize_t count, offset, product;
2897	PyStringObject *result;
2898
2899	self_s = PyString_AS_STRING(self);
2900	self_len = PyString_GET_SIZE(self);
2901
2902	count = countstring(self_s, self_len,
2903	from_s, from_len,
2904	0, self_len, FORWARD, maxcount);
2905	if (count == 0) {
2906	/* no matches, return unchanged */
2907	return return_self(self);
2908	}
2909
2910	/* Check for overflow */
2911	/* result_len = self_len + count * (to_len-from_len) */
2912	product = count * (to_len-from_len);
2913	if (product / (to_len-from_len) != count) {
2914	PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2915	return NULL;
2916	}
2917	result_len = self_len + product;
2918	if (result_len < 0) {
2919	PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2920	return NULL;
2921	}
2922
2923	if ( (result = (PyStringObject *)
2924	PyString_FromStringAndSize(NULL, result_len)) == NULL)
2925	return NULL;
2926	result_s = PyString_AS_STRING(result);
2927
2928	start = self_s;
2929	end = self_s + self_len;
2930	while (count-- > 0) {
2931	offset = findstring(start, end-start,
2932	from_s, from_len,
2933	0, end-start, FORWARD);
2934	if (offset == -1)
2935	break;
2936	next = start+offset;
2937	if (next == start) {
2938	/* replace with the 'to' */
2939	Py_MEMCPY(result_s, to_s, to_len);
2940	result_s += to_len;
2941	start += from_len;
2942	} else {
2943	/* copy the unchanged old then the 'to' */
2944	Py_MEMCPY(result_s, start, next-start);
2945	result_s += (next-start);
2946	Py_MEMCPY(result_s, to_s, to_len);
2947	result_s += to_len;
2948	start = next+from_len;
2949	}
2950	}
2951	/* Copy the remainder of the remaining string */
2952	Py_MEMCPY(result_s, start, end-start);
2953
2954	return result;
2955	}
2956
2957
2958	Py_LOCAL(PyStringObject *)
2959	replace(PyStringObject *self,
2960	const char *from_s, Py_ssize_t from_len,
2961	const char *to_s, Py_ssize_t to_len,
2962	Py_ssize_t maxcount)
2963	{
2964	if (maxcount < 0) {
2965	maxcount = PY_SSIZE_T_MAX;
2966	} else if (maxcount == 0 \|\| PyString_GET_SIZE(self) == 0) {
2967	/* nothing to do; return the original string */
2968	return return_self(self);
2969	}
2970
2971	if (maxcount == 0 \|\|
2972	(from_len == 0 && to_len == 0)) {
2973	/* nothing to do; return the original string */
2974	return return_self(self);
2975	}
2976
2977	/* Handle zero-length special cases */
2978
2979	if (from_len == 0) {
2980	/* insert the 'to' string everywhere. */
2981	/* >>> "Python".replace("", ".") */
2982	/* '.P.y.t.h.o.n.' */
2983	return replace_interleave(self, to_s, to_len, maxcount);
2984	}
2985
2986	/* Except for "".replace("", "A") == "A" there is no way beyond this */
2987	/* point for an empty self string to generate a non-empty string */
2988	/* Special case so the remaining code always gets a non-empty string */
2989	if (PyString_GET_SIZE(self) == 0) {
2990	return return_self(self);
2991	}
2992
2993	if (to_len == 0) {
2994	/* delete all occurances of 'from' string */
2995	if (from_len == 1) {
2996	return replace_delete_single_character(
2997	self, from_s[0], maxcount);
2998	} else {
2999	return replace_delete_substring(self, from_s, from_len, maxcount);
3000	}
3001	}
3002
3003	/* Handle special case where both strings have the same length */
3004
3005	if (from_len == to_len) {
3006	if (from_len == 1) {
3007	return replace_single_character_in_place(
3008	self,
3009	from_s[0],
3010	to_s[0],
3011	maxcount);
3012	} else {
3013	return replace_substring_in_place(
3014	self, from_s, from_len, to_s, to_len, maxcount);
3015	}
3016	}
3017
3018	/* Otherwise use the more generic algorithms */
3019	if (from_len == 1) {
3020	return replace_single_character(self, from_s[0],
3021	to_s, to_len, maxcount);
3022	} else {
3023	/* len('from')>=2, len('to')>=1 */
3024	return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3025	}
3026	}
3027
3028	PyDoc_STRVAR(replace__doc__,
3029	"S.replace (old, new[, count]) -> string\n\
3030	\n\
3031	Return a copy of string S with all occurrences of substring\n\
3032	old replaced by new. If the optional argument count is\n\
3033	given, only the first count occurrences are replaced.");
3034
3035	static PyObject *
3036	string_replace(PyStringObject self, PyObject args)
3037	{
3038	Py_ssize_t count = -1;
3039	PyObject from, to;
3040	const char from_s, to_s;
3041	Py_ssize_t from_len, to_len;
3042
3043	if (!PyArg_ParseTuple(args, "OO\|n:replace", &from, &to, &count))
3044	return NULL;
3045
3046	if (PyString_Check(from)) {
3047	from_s = PyString_AS_STRING(from);
3048	from_len = PyString_GET_SIZE(from);
3049	}
3050	#ifdef Py_USING_UNICODE
3051	if (PyUnicode_Check(from))
3052	return PyUnicode_Replace((PyObject *)self,
3053	from, to, count);
3054	#endif
3055	else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3056	return NULL;
3057
3058	if (PyString_Check(to)) {
3059	to_s = PyString_AS_STRING(to);
3060	to_len = PyString_GET_SIZE(to);
3061	}
3062	#ifdef Py_USING_UNICODE
3063	else if (PyUnicode_Check(to))
3064	return PyUnicode_Replace((PyObject *)self,
3065	from, to, count);
3066	#endif
3067	else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3068	return NULL;
3069
3070	return (PyObject )replace((PyStringObject ) self,
3071	from_s, from_len,
3072	to_s, to_len, count);
3073	}
3074
3075	/ End DALKE /
3076
3077	/* Matches the end (direction >= 0) or start (direction < 0) of self
3078	* against substr, using the start and end arguments. Returns
3079	* -1 on error, 0 if not found and 1 if found.
3080	*/
3081	Py_LOCAL(int)
3082	_string_tailmatch(PyStringObject self, PyObject substr, Py_ssize_t start,
3083	Py_ssize_t end, int direction)
3084	{
3085	Py_ssize_t len = PyString_GET_SIZE(self);
3086	Py_ssize_t slen;
3087	const char* sub;
3088	const char* str;
3089
3090	if (PyString_Check(substr)) {
3091	sub = PyString_AS_STRING(substr);
3092	slen = PyString_GET_SIZE(substr);
3093	}
3094	#ifdef Py_USING_UNICODE
3095	else if (PyUnicode_Check(substr))
3096	return PyUnicode_Tailmatch((PyObject *)self,
3097	substr, start, end, direction);
3098	#endif
3099	else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3100	return -1;
3101	str = PyString_AS_STRING(self);
3102
3103	string_adjust_indices(&start, &end, len);
3104
3105	if (direction < 0) {
3106	/* startswith */
3107	if (start+slen > len)
3108	return 0;
3109	} else {
3110	/* endswith */
3111	if (end-start < slen \|\| start > len)
3112	return 0;
3113
3114	if (end-slen > start)
3115	start = end - slen;
3116	}
3117	if (end-start >= slen)
3118	return ! memcmp(str+start, sub, slen);
3119	return 0;
3120	}
3121
3122
3123	PyDoc_STRVAR(startswith__doc__,
3124	"S.startswith(prefix[, start[, end]]) -> bool\n\
3125	\n\
3126	Return True if S starts with the specified prefix, False otherwise.\n\
3127	With optional start, test S beginning at that position.\n\
3128	With optional end, stop comparing S at that position.\n\
3129	prefix can also be a tuple of strings to try.");
3130
3131	static PyObject *
3132	string_startswith(PyStringObject self, PyObject args)
3133	{
3134	Py_ssize_t start = 0;
3135	Py_ssize_t end = PY_SSIZE_T_MAX;
3136	PyObject *subobj;
3137	int result;
3138
3139	if (!PyArg_ParseTuple(args, "O\|O&O&:startswith", &subobj,
3140	_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3141	return NULL;
3142	if (PyTuple_Check(subobj)) {
3143	Py_ssize_t i;
3144	for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3145	result = _string_tailmatch(self,
3146	PyTuple_GET_ITEM(subobj, i),
3147	start, end, -1);
3148	if (result == -1)
3149	return NULL;
3150	else if (result) {
3151	Py_RETURN_TRUE;
3152	}
3153	}
3154	Py_RETURN_FALSE;
3155	}
3156	result = _string_tailmatch(self, subobj, start, end, -1);
3157	if (result == -1)
3158	return NULL;
3159	else
3160	return PyBool_FromLong(result);
3161	}
3162
3163
3164	PyDoc_STRVAR(endswith__doc__,
3165	"S.endswith(suffix[, start[, end]]) -> bool\n\
3166	\n\
3167	Return True if S ends with the specified suffix, False otherwise.\n\
3168	With optional start, test S beginning at that position.\n\
3169	With optional end, stop comparing S at that position.\n\
3170	suffix can also be a tuple of strings to try.");
3171
3172	static PyObject *
3173	string_endswith(PyStringObject self, PyObject args)
3174	{
3175	Py_ssize_t start = 0;
3176	Py_ssize_t end = PY_SSIZE_T_MAX;
3177	PyObject *subobj;
3178	int result;
3179
3180	if (!PyArg_ParseTuple(args, "O\|O&O&:endswith", &subobj,
3181	_PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3182	return NULL;
3183	if (PyTuple_Check(subobj)) {
3184	Py_ssize_t i;
3185	for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3186	result = _string_tailmatch(self,
3187	PyTuple_GET_ITEM(subobj, i),
3188	start, end, +1);
3189	if (result == -1)
3190	return NULL;
3191	else if (result) {
3192	Py_RETURN_TRUE;
3193	}
3194	}
3195	Py_RETURN_FALSE;
3196	}
3197	result = _string_tailmatch(self, subobj, start, end, +1);
3198	if (result == -1)
3199	return NULL;
3200	else
3201	return PyBool_FromLong(result);
3202	}
3203
3204
3205	PyDoc_STRVAR(encode__doc__,
3206	"S.encode([encoding[,errors]]) -> object\n\
3207	\n\
3208	Encodes S using the codec registered for encoding. encoding defaults\n\
3209	to the default encoding. errors may be given to set a different error\n\
3210	handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3211	a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3212	'xmlcharrefreplace' as well as any other name registered with\n\
3213	codecs.register_error that is able to handle UnicodeEncodeErrors.");
3214
3215	static PyObject *
3216	string_encode(PyStringObject self, PyObject args)
3217	{
3218	char *encoding = NULL;
3219	char *errors = NULL;
3220	PyObject *v;
3221
3222	if (!PyArg_ParseTuple(args, "\|ss:encode", &encoding, &errors))
3223	return NULL;
3224	v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3225	if (v == NULL)
3226	goto onError;
3227	if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3228	PyErr_Format(PyExc_TypeError,
3229	"encoder did not return a string/unicode object "
3230	"(type=%.400s)",
3231	v->ob_type->tp_name);
3232	Py_DECREF(v);
3233	return NULL;
3234	}
3235	return v;
3236
3237	onError:
3238	return NULL;
3239	}
3240
3241
3242	PyDoc_STRVAR(decode__doc__,
3243	"S.decode([encoding[,errors]]) -> object\n\
3244	\n\
3245	Decodes S using the codec registered for encoding. encoding defaults\n\
3246	to the default encoding. errors may be given to set a different error\n\
3247	handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3248	a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3249	as well as any other name registerd with codecs.register_error that is\n\
3250	able to handle UnicodeDecodeErrors.");
3251
3252	static PyObject *
3253	string_decode(PyStringObject self, PyObject args)
3254	{
3255	char *encoding = NULL;
3256	char *errors = NULL;
3257	PyObject *v;
3258
3259	if (!PyArg_ParseTuple(args, "\|ss:decode", &encoding, &errors))
3260	return NULL;
3261	v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3262	if (v == NULL)
3263	goto onError;
3264	if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3265	PyErr_Format(PyExc_TypeError,
3266	"decoder did not return a string/unicode object "
3267	"(type=%.400s)",
3268	v->ob_type->tp_name);
3269	Py_DECREF(v);
3270	return NULL;
3271	}
3272	return v;
3273
3274	onError:
3275	return NULL;
3276	}
3277
3278
3279	PyDoc_STRVAR(expandtabs__doc__,
3280	"S.expandtabs([tabsize]) -> string\n\
3281	\n\
3282	Return a copy of S where all tab characters are expanded using spaces.\n\
3283	If tabsize is not given, a tab size of 8 characters is assumed.");
3284
3285	static PyObject*
3286	string_expandtabs(PyStringObject self, PyObject args)
3287	{
3288	const char e, p;
3289	char *q;
3290	Py_ssize_t i, j;
3291	PyObject *u;
3292	int tabsize = 8;
3293
3294	if (!PyArg_ParseTuple(args, "\|i:expandtabs", &tabsize))
3295	return NULL;
3296
3297	/* First pass: determine size of output string */
3298	i = j = 0;
3299	e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3300	for (p = PyString_AS_STRING(self); p < e; p++)
3301	if (*p == '\t') {
3302	if (tabsize > 0)
3303	j += tabsize - (j % tabsize);
3304	}
3305	else {
3306	j++;
3307	if (p == '\n' \|\| p == '\r') {
3308	i += j;
3309	j = 0;
3310	}
3311	}
3312
3313	/* Second pass: create output string and fill it */
3314	u = PyString_FromStringAndSize(NULL, i + j);
3315	if (!u)
3316	return NULL;
3317
3318	j = 0;
3319	q = PyString_AS_STRING(u);
3320
3321	for (p = PyString_AS_STRING(self); p < e; p++)
3322	if (*p == '\t') {
3323	if (tabsize > 0) {
3324	i = tabsize - (j % tabsize);
3325	j += i;
3326	while (i--)
3327	*q++ = ' ';
3328	}
3329	}
3330	else {
3331	j++;
3332	q++ = p;
3333	if (p == '\n' \|\| p == '\r')
3334	j = 0;
3335	}
3336
3337	return u;
3338	}
3339
3340	Py_LOCAL_INLINE(PyObject *)
3341	pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3342	{
3343	PyObject *u;
3344
3345	if (left < 0)
3346	left = 0;
3347	if (right < 0)
3348	right = 0;
3349
3350	if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3351	Py_INCREF(self);
3352	return (PyObject *)self;
3353	}
3354
3355	u = PyString_FromStringAndSize(NULL,
3356	left + PyString_GET_SIZE(self) + right);
3357	if (u) {
3358	if (left)
3359	memset(PyString_AS_STRING(u), fill, left);
3360	Py_MEMCPY(PyString_AS_STRING(u) + left,
3361	PyString_AS_STRING(self),
3362	PyString_GET_SIZE(self));
3363	if (right)
3364	memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3365	fill, right);
3366	}
3367
3368	return u;
3369	}
3370
3371	PyDoc_STRVAR(ljust__doc__,
3372	"S.ljust(width[, fillchar]) -> string\n"
3373	"\n"
3374	"Return S left justified in a string of length width. Padding is\n"
3375	"done using the specified fill character (default is a space).");
3376
3377	static PyObject *
3378	string_ljust(PyStringObject self, PyObject args)
3379	{
3380	Py_ssize_t width;
3381	char fillchar = ' ';
3382
3383	if (!PyArg_ParseTuple(args, "n\|c:ljust", &width, &fillchar))
3384	return NULL;
3385
3386	if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3387	Py_INCREF(self);
3388	return (PyObject*) self;
3389	}
3390
3391	return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3392	}
3393
3394
3395	PyDoc_STRVAR(rjust__doc__,
3396	"S.rjust(width[, fillchar]) -> string\n"
3397	"\n"
3398	"Return S right justified in a string of length width. Padding is\n"
3399	"done using the specified fill character (default is a space)");
3400
3401	static PyObject *
3402	string_rjust(PyStringObject self, PyObject args)
3403	{
3404	Py_ssize_t width;
3405	char fillchar = ' ';
3406
3407	if (!PyArg_ParseTuple(args, "n\|c:rjust", &width, &fillchar))
3408	return NULL;
3409
3410	if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3411	Py_INCREF(self);
3412	return (PyObject*) self;
3413	}
3414
3415	return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3416	}
3417
3418
3419	PyDoc_STRVAR(center__doc__,
3420	"S.center(width[, fillchar]) -> string\n"
3421	"\n"
3422	"Return S centered in a string of length width. Padding is\n"
3423	"done using the specified fill character (default is a space)");
3424
3425	static PyObject *
3426	string_center(PyStringObject self, PyObject args)
3427	{
3428	Py_ssize_t marg, left;
3429	Py_ssize_t width;
3430	char fillchar = ' ';
3431
3432	if (!PyArg_ParseTuple(args, "n\|c:center", &width, &fillchar))
3433	return NULL;
3434
3435	if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3436	Py_INCREF(self);
3437	return (PyObject*) self;
3438	}
3439
3440	marg = width - PyString_GET_SIZE(self);
3441	left = marg / 2 + (marg & width & 1);
3442
3443	return pad(self, left, marg - left, fillchar);
3444	}
3445
3446	PyDoc_STRVAR(zfill__doc__,
3447	"S.zfill(width) -> string\n"
3448	"\n"
3449	"Pad a numeric string S with zeros on the left, to fill a field\n"
3450	"of the specified width. The string S is never truncated.");
3451
3452	static PyObject *
3453	string_zfill(PyStringObject self, PyObject args)
3454	{
3455	Py_ssize_t fill;
3456	PyObject *s;
3457	char *p;
3458	Py_ssize_t width;
3459
3460	if (!PyArg_ParseTuple(args, "n:zfill", &width))
3461	return NULL;
3462
3463	if (PyString_GET_SIZE(self) >= width) {
3464	if (PyString_CheckExact(self)) {
3465	Py_INCREF(self);
3466	return (PyObject*) self;
3467	}
3468	else
3469	return PyString_FromStringAndSize(
3470	PyString_AS_STRING(self),
3471	PyString_GET_SIZE(self)
3472	);
3473	}
3474
3475	fill = width - PyString_GET_SIZE(self);
3476
3477	s = pad(self, fill, 0, '0');
3478
3479	if (s == NULL)
3480	return NULL;
3481
3482	p = PyString_AS_STRING(s);
3483	if (p[fill] == '+' \|\| p[fill] == '-') {
3484	/* move sign to beginning of string */
3485	p[0] = p[fill];
3486	p[fill] = '0';
3487	}
3488
3489	return (PyObject*) s;
3490	}
3491
3492	PyDoc_STRVAR(isspace__doc__,
3493	"S.isspace() -> bool\n\
3494	\n\
3495	Return True if all characters in S are whitespace\n\
3496	and there is at least one character in S, False otherwise.");
3497
3498	static PyObject*
3499	string_isspace(PyStringObject *self)
3500	{
3501	register const unsigned char *p
3502	= (unsigned char *) PyString_AS_STRING(self);
3503	register const unsigned char *e;
3504
3505	/* Shortcut for single character strings */
3506	if (PyString_GET_SIZE(self) == 1 &&
3507	isspace(*p))
3508	return PyBool_FromLong(1);
3509
3510	/* Special case for empty strings */
3511	if (PyString_GET_SIZE(self) == 0)
3512	return PyBool_FromLong(0);
3513
3514	e = p + PyString_GET_SIZE(self);
3515	for (; p < e; p++) {
3516	if (!isspace(*p))
3517	return PyBool_FromLong(0);
3518	}
3519	return PyBool_FromLong(1);
3520	}
3521
3522
3523	PyDoc_STRVAR(isalpha__doc__,
3524	"S.isalpha() -> bool\n\
3525	\n\
3526	Return True if all characters in S are alphabetic\n\
3527	and there is at least one character in S, False otherwise.");
3528
3529	static PyObject*
3530	string_isalpha(PyStringObject *self)
3531	{
3532	register const unsigned char *p
3533	= (unsigned char *) PyString_AS_STRING(self);
3534	register const unsigned char *e;
3535
3536	/* Shortcut for single character strings */
3537	if (PyString_GET_SIZE(self) == 1 &&
3538	isalpha(*p))
3539	return PyBool_FromLong(1);
3540
3541	/* Special case for empty strings */
3542	if (PyString_GET_SIZE(self) == 0)
3543	return PyBool_FromLong(0);
3544
3545	e = p + PyString_GET_SIZE(self);
3546	for (; p < e; p++) {
3547	if (!isalpha(*p))
3548	return PyBool_FromLong(0);
3549	}
3550	return PyBool_FromLong(1);
3551	}
3552
3553
3554	PyDoc_STRVAR(isalnum__doc__,
3555	"S.isalnum() -> bool\n\
3556	\n\
3557	Return True if all characters in S are alphanumeric\n\
3558	and there is at least one character in S, False otherwise.");
3559
3560	static PyObject*
3561	string_isalnum(PyStringObject *self)
3562	{
3563	register const unsigned char *p
3564	= (unsigned char *) PyString_AS_STRING(self);
3565	register const unsigned char *e;
3566
3567	/* Shortcut for single character strings */
3568	if (PyString_GET_SIZE(self) == 1 &&
3569	isalnum(*p))
3570	return PyBool_FromLong(1);
3571
3572	/* Special case for empty strings */
3573	if (PyString_GET_SIZE(self) == 0)
3574	return PyBool_FromLong(0);
3575
3576	e = p + PyString_GET_SIZE(self);
3577	for (; p < e; p++) {
3578	if (!isalnum(*p))
3579	return PyBool_FromLong(0);
3580	}
3581	return PyBool_FromLong(1);
3582	}
3583
3584
3585	PyDoc_STRVAR(isdigit__doc__,
3586	"S.isdigit() -> bool\n\
3587	\n\
3588	Return True if all characters in S are digits\n\
3589	and there is at least one character in S, False otherwise.");
3590
3591	static PyObject*
3592	string_isdigit(PyStringObject *self)
3593	{
3594	register const unsigned char *p
3595	= (unsigned char *) PyString_AS_STRING(self);
3596	register const unsigned char *e;
3597
3598	/* Shortcut for single character strings */
3599	if (PyString_GET_SIZE(self) == 1 &&
3600	isdigit(*p))
3601	return PyBool_FromLong(1);
3602
3603	/* Special case for empty strings */
3604	if (PyString_GET_SIZE(self) == 0)
3605	return PyBool_FromLong(0);
3606
3607	e = p + PyString_GET_SIZE(self);
3608	for (; p < e; p++) {
3609	if (!isdigit(*p))
3610	return PyBool_FromLong(0);
3611	}
3612	return PyBool_FromLong(1);
3613	}
3614
3615
3616	PyDoc_STRVAR(islower__doc__,
3617	"S.islower() -> bool\n\
3618	\n\
3619	Return True if all cased characters in S are lowercase and there is\n\
3620	at least one cased character in S, False otherwise.");
3621
3622	static PyObject*
3623	string_islower(PyStringObject *self)
3624	{
3625	register const unsigned char *p
3626	= (unsigned char *) PyString_AS_STRING(self);
3627	register const unsigned char *e;
3628	int cased;
3629
3630	/* Shortcut for single character strings */
3631	if (PyString_GET_SIZE(self) == 1)
3632	return PyBool_FromLong(islower(*p) != 0);
3633
3634	/* Special case for empty strings */
3635	if (PyString_GET_SIZE(self) == 0)
3636	return PyBool_FromLong(0);
3637
3638	e = p + PyString_GET_SIZE(self);
3639	cased = 0;
3640	for (; p < e; p++) {
3641	if (isupper(*p))
3642	return PyBool_FromLong(0);
3643	else if (!cased && islower(*p))
3644	cased = 1;
3645	}
3646	return PyBool_FromLong(cased);
3647	}
3648
3649
3650	PyDoc_STRVAR(isupper__doc__,
3651	"S.isupper() -> bool\n\
3652	\n\
3653	Return True if all cased characters in S are uppercase and there is\n\
3654	at least one cased character in S, False otherwise.");
3655
3656	static PyObject*
3657	string_isupper(PyStringObject *self)
3658	{
3659	register const unsigned char *p
3660	= (unsigned char *) PyString_AS_STRING(self);
3661	register const unsigned char *e;
3662	int cased;
3663
3664	/* Shortcut for single character strings */
3665	if (PyString_GET_SIZE(self) == 1)
3666	return PyBool_FromLong(isupper(*p) != 0);
3667
3668	/* Special case for empty strings */
3669	if (PyString_GET_SIZE(self) == 0)
3670	return PyBool_FromLong(0);
3671
3672	e = p + PyString_GET_SIZE(self);
3673	cased = 0;
3674	for (; p < e; p++) {
3675	if (islower(*p))
3676	return PyBool_FromLong(0);
3677	else if (!cased && isupper(*p))
3678	cased = 1;
3679	}
3680	return PyBool_FromLong(cased);
3681	}
3682
3683
3684	PyDoc_STRVAR(istitle__doc__,
3685	"S.istitle() -> bool\n\
3686	\n\
3687	Return True if S is a titlecased string and there is at least one\n\
3688	character in S, i.e. uppercase characters may only follow uncased\n\
3689	characters and lowercase characters only cased ones. Return False\n\
3690	otherwise.");
3691
3692	static PyObject*
3693	string_istitle(PyStringObject self, PyObject uncased)
3694	{
3695	register const unsigned char *p
3696	= (unsigned char *) PyString_AS_STRING(self);
3697	register const unsigned char *e;
3698	int cased, previous_is_cased;
3699
3700	/* Shortcut for single character strings */
3701	if (PyString_GET_SIZE(self) == 1)
3702	return PyBool_FromLong(isupper(*p) != 0);
3703
3704	/* Special case for empty strings */
3705	if (PyString_GET_SIZE(self) == 0)
3706	return PyBool_FromLong(0);
3707
3708	e = p + PyString_GET_SIZE(self);
3709	cased = 0;
3710	previous_is_cased = 0;
3711	for (; p < e; p++) {
3712	register const unsigned char ch = *p;
3713
3714	if (isupper(ch)) {
3715	if (previous_is_cased)
3716	return PyBool_FromLong(0);
3717	previous_is_cased = 1;
3718	cased = 1;
3719	}
3720	else if (islower(ch)) {
3721	if (!previous_is_cased)
3722	return PyBool_FromLong(0);
3723	previous_is_cased = 1;
3724	cased = 1;
3725	}
3726	else
3727	previous_is_cased = 0;
3728	}
3729	return PyBool_FromLong(cased);
3730	}
3731
3732
3733	PyDoc_STRVAR(splitlines__doc__,
3734	"S.splitlines([keepends]) -> list of strings\n\
3735	\n\
3736	Return a list of the lines in S, breaking at line boundaries.\n\
3737	Line breaks are not included in the resulting list unless keepends\n\
3738	is given and true.");
3739
3740	static PyObject*
3741	string_splitlines(PyStringObject self, PyObject args)
3742	{
3743	register Py_ssize_t i;
3744	register Py_ssize_t j;
3745	Py_ssize_t len;
3746	int keepends = 0;
3747	PyObject *list;
3748	PyObject *str;
3749	char *data;
3750
3751	if (!PyArg_ParseTuple(args, "\|i:splitlines", &keepends))
3752	return NULL;
3753
3754	data = PyString_AS_STRING(self);
3755	len = PyString_GET_SIZE(self);
3756
3757	/* This does not use the preallocated list because splitlines is
3758	usually run with hundreds of newlines. The overhead of
3759	switching between PyList_SET_ITEM and append causes about a
3760	2-3% slowdown for that common case. A smarter implementation
3761	could move the if check out, so the SET_ITEMs are done first
3762	and the appends only done when the prealloc buffer is full.
3763	That's too much work for little gain.*/
3764
3765	list = PyList_New(0);
3766	if (!list)
3767	goto onError;
3768
3769	for (i = j = 0; i < len; ) {
3770	Py_ssize_t eol;
3771
3772	/* Find a line and append it */
3773	while (i < len && data[i] != '\n' && data[i] != '\r')
3774	i++;
3775
3776	/* Skip the line break reading CRLF as one line break */
3777	eol = i;
3778	if (i < len) {
3779	if (data[i] == '\r' && i + 1 < len &&
3780	data[i+1] == '\n')
3781	i += 2;
3782	else
3783	i++;
3784	if (keepends)
3785	eol = i;
3786	}
3787	SPLIT_APPEND(data, j, eol);
3788	j = i;
3789	}
3790	if (j < len) {
3791	SPLIT_APPEND(data, j, len);
3792	}
3793
3794	return list;
3795
3796	onError:
3797	Py_XDECREF(list);
3798	return NULL;
3799	}
3800
3801	#undef SPLIT_APPEND
3802	#undef SPLIT_ADD
3803	#undef MAX_PREALLOC
3804	#undef PREALLOC_SIZE
3805
3806	static PyObject *
3807	string_getnewargs(PyStringObject *v)
3808	{
3809	return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3810	}
3811
3812
3813
3814	static PyMethodDef
3815	string_methods[] = {
3816	/* Counterparts of the obsolete stropmodule functions; except
3817	string.maketrans(). */
3818	{"join", (PyCFunction)string_join, METH_O, join__doc__},
3819	{"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3820	{"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3821	{"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3822	{"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3823	{"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3824	{"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3825	{"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3826	{"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3827	{"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3828	{"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3829	{"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3830	{"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3831	capitalize__doc__},
3832	{"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3833	{"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3834	endswith__doc__},
3835	{"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3836	{"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3837	{"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3838	{"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3839	{"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3840	{"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3841	{"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3842	{"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3843	{"rpartition", (PyCFunction)string_rpartition, METH_O,
3844	rpartition__doc__},
3845	{"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3846	startswith__doc__},
3847	{"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3848	{"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3849	swapcase__doc__},
3850	{"translate", (PyCFunction)string_translate, METH_VARARGS,
3851	translate__doc__},
3852	{"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3853	{"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3854	{"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3855	{"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3856	{"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3857	{"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3858	{"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3859	{"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3860	expandtabs__doc__},
3861	{"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3862	splitlines__doc__},
3863	{"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3864	{NULL, NULL} /* sentinel */
3865	};
3866
3867	static PyObject *
3868	str_subtype_new(PyTypeObject type, PyObject args, PyObject *kwds);
3869
3870	static PyObject *
3871	string_new(PyTypeObject type, PyObject args, PyObject *kwds)
3872	{
3873	PyObject *x = NULL;
3874	static char *kwlist[] = {"object", 0};
3875
3876	if (type != &PyString_Type)
3877	return str_subtype_new(type, args, kwds);
3878	if (!PyArg_ParseTupleAndKeywords(args, kwds, "\|O:str", kwlist, &x))
3879	return NULL;
3880	if (x == NULL)
3881	return PyString_FromString("");
3882	return PyObject_Str(x);
3883	}
3884
3885	static PyObject *
3886	str_subtype_new(PyTypeObject type, PyObject args, PyObject *kwds)
3887	{
3888	PyObject tmp, pnew;
3889	Py_ssize_t n;
3890
3891	assert(PyType_IsSubtype(type, &PyString_Type));
3892	tmp = string_new(&PyString_Type, args, kwds);
3893	if (tmp == NULL)
3894	return NULL;
3895	assert(PyString_CheckExact(tmp));
3896	n = PyString_GET_SIZE(tmp);
3897	pnew = type->tp_alloc(type, n);
3898	if (pnew != NULL) {
3899	Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3900	((PyStringObject *)pnew)->ob_shash =
3901	((PyStringObject *)tmp)->ob_shash;
3902	((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3903	}
3904	Py_DECREF(tmp);
3905	return pnew;
3906	}
3907
3908	static PyObject *
3909	basestring_new(PyTypeObject type, PyObject args, PyObject *kwds)
3910	{
3911	PyErr_SetString(PyExc_TypeError,
3912	"The basestring type cannot be instantiated");
3913	return NULL;
3914	}
3915
3916	static PyObject *
3917	string_mod(PyObject v, PyObject w)
3918	{
3919	if (!PyString_Check(v)) {
3920	Py_INCREF(Py_NotImplemented);
3921	return Py_NotImplemented;
3922	}
3923	return PyString_Format(v, w);
3924	}
3925
3926	PyDoc_STRVAR(basestring_doc,
3927	"Type basestring cannot be instantiated; it is the base for str and unicode.");
3928
3929	static PyNumberMethods string_as_number = {
3930	0, /nb_add/
3931	0, /nb_subtract/
3932	0, /nb_multiply/
3933	0, /nb_divide/
3934	string_mod, /nb_remainder/
3935	};
3936
3937
3938	PyTypeObject PyBaseString_Type = {
3939	PyObject_HEAD_INIT(&PyType_Type)
3940	0,
3941	"basestring",
3942	0,
3943	0,
3944	0, /* tp_dealloc */
3945	0, /* tp_print */
3946	0, /* tp_getattr */
3947	0, /* tp_setattr */
3948	0, /* tp_compare */
3949	0, /* tp_repr */
3950	0, /* tp_as_number */
3951	0, /* tp_as_sequence */
3952	0, /* tp_as_mapping */
3953	0, /* tp_hash */
3954	0, /* tp_call */
3955	0, /* tp_str */
3956	0, /* tp_getattro */
3957	0, /* tp_setattro */
3958	0, /* tp_as_buffer */
3959	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_BASETYPE, /* tp_flags */
3960	basestring_doc, /* tp_doc */
3961	0, /* tp_traverse */
3962	0, /* tp_clear */
3963	0, /* tp_richcompare */
3964	0, /* tp_weaklistoffset */
3965	0, /* tp_iter */
3966	0, /* tp_iternext */
3967	0, /* tp_methods */
3968	0, /* tp_members */
3969	0, /* tp_getset */
3970	&PyBaseObject_Type, /* tp_base */
3971	0, /* tp_dict */
3972	0, /* tp_descr_get */
3973	0, /* tp_descr_set */
3974	0, /* tp_dictoffset */
3975	0, /* tp_init */
3976	0, /* tp_alloc */
3977	basestring_new, /* tp_new */
3978	0, /* tp_free */
3979	};
3980
3981	PyDoc_STRVAR(string_doc,
3982	"str(object) -> string\n\
3983	\n\
3984	Return a nice string representation of the object.\n\
3985	If the argument is a string, the return value is the same object.");
3986
3987	PyTypeObject PyString_Type = {
3988	PyObject_HEAD_INIT(&PyType_Type)
3989	0,
3990	"str",
3991	sizeof(PyStringObject),
3992	sizeof(char),
3993	string_dealloc, /* tp_dealloc */
3994	(printfunc)string_print, /* tp_print */
3995	0, /* tp_getattr */
3996	0, /* tp_setattr */
3997	0, /* tp_compare */
3998	string_repr, /* tp_repr */
3999	&string_as_number, /* tp_as_number */
4000	&string_as_sequence, /* tp_as_sequence */
4001	&string_as_mapping, /* tp_as_mapping */
4002	(hashfunc)string_hash, /* tp_hash */
4003	0, /* tp_call */
4004	string_str, /* tp_str */
4005	PyObject_GenericGetAttr, /* tp_getattro */
4006	0, /* tp_setattro */
4007	&string_as_buffer, /* tp_as_buffer */
4008	Py_TPFLAGS_DEFAULT \| Py_TPFLAGS_CHECKTYPES \|
4009	Py_TPFLAGS_BASETYPE, /* tp_flags */
4010	string_doc, /* tp_doc */
4011	0, /* tp_traverse */
4012	0, /* tp_clear */
4013	(richcmpfunc)string_richcompare, /* tp_richcompare */
4014	0, /* tp_weaklistoffset */
4015	0, /* tp_iter */
4016	0, /* tp_iternext */
4017	string_methods, /* tp_methods */
4018	0, /* tp_members */
4019	0, /* tp_getset */
4020	&PyBaseString_Type, /* tp_base */
4021	0, /* tp_dict */
4022	0, /* tp_descr_get */
4023	0, /* tp_descr_set */
4024	0, /* tp_dictoffset */
4025	0, /* tp_init */
4026	0, /* tp_alloc */
4027	string_new, /* tp_new */
4028	PyObject_Del, /* tp_free */
4029	};
4030
4031	void
4032	PyString_Concat(register PyObject *pv, register PyObject w)
4033	{
4034	register PyObject *v;
4035	if (*pv == NULL)
4036	return;
4037	if (w == NULL \|\| !PyString_Check(*pv)) {
4038	Py_DECREF(*pv);
4039	*pv = NULL;
4040	return;
4041	}
4042	v = string_concat((PyStringObject ) pv, w);
4043	Py_DECREF(*pv);
4044	*pv = v;
4045	}
4046
4047	void
4048	PyString_ConcatAndDel(register PyObject *pv, register PyObject w)
4049	{
4050	PyString_Concat(pv, w);
4051	Py_XDECREF(w);
4052	}
4053
4054
4055	/* The following function breaks the notion that strings are immutable:
4056	it changes the size of a string. We get away with this only if there
4057	is only one module referencing the object. You can also think of it
4058	as creating a new string object and destroying the old one, only
4059	more efficiently. In any case, don't use this if the string may
4060	already be known to some other part of the code...
4061	Note that if there's not enough memory to resize the string, the original
4062	string object at pv is deallocated, pv is set to NULL, an "out of
4063	memory" exception is set, and -1 is returned. Else (on success) 0 is
4064	returned, and the value in *pv may or may not be the same as on input.
4065	As always, an extra byte is allocated for a trailing \0 byte (newsize
4066	does not include that), and a trailing \0 byte is stored.
4067	*/
4068
4069	int
4070	_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
4071	{
4072	register PyObject *v;
4073	register PyStringObject *sv;
4074	v = *pv;
4075	if (!PyString_Check(v) \|\| v->ob_refcnt != 1 \|\| newsize < 0 \|\|
4076	PyString_CHECK_INTERNED(v)) {
4077	*pv = 0;
4078	Py_DECREF(v);
4079	PyErr_BadInternalCall();
4080	return -1;
4081	}
4082	/* XXX UNREF/NEWREF interface should be more symmetrical */
4083	_Py_DEC_REFTOTAL;
4084	_Py_ForgetReference(v);
4085	pv = (PyObject )
4086	PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
4087	if (*pv == NULL) {
4088	PyObject_Del(v);
4089	PyErr_NoMemory();
4090	return -1;
4091	}
4092	_Py_NewReference(*pv);
4093	sv = (PyStringObject ) pv;
4094	sv->ob_size = newsize;
4095	sv->ob_sval[newsize] = '\0';
4096	sv->ob_shash = -1; /* invalidate cached hash value */
4097	return 0;
4098	}
4099
4100	/* Helpers for formatstring */
4101
4102	Py_LOCAL_INLINE(PyObject *)
4103	getnextarg(PyObject args, Py_ssize_t arglen, Py_ssize_t p_argidx)
4104	{
4105	Py_ssize_t argidx = *p_argidx;
4106	if (argidx < arglen) {
4107	(*p_argidx)++;
4108	if (arglen < 0)
4109	return args;
4110	else
4111	return PyTuple_GetItem(args, argidx);
4112	}
4113	PyErr_SetString(PyExc_TypeError,
4114	"not enough arguments for format string");
4115	return NULL;
4116	}
4117
4118	/* Format codes
4119	* F_LJUST '-'
4120	* F_SIGN '+'
4121	* F_BLANK ' '
4122	* F_ALT '#'
4123	* F_ZERO '0'
4124	*/
4125	#define F_LJUST (1<<0)
4126	#define F_SIGN (1<<1)
4127	#define F_BLANK (1<<2)
4128	#define F_ALT (1<<3)
4129	#define F_ZERO (1<<4)
4130
4131	Py_LOCAL_INLINE(int)
4132	formatfloat(char *buf, size_t buflen, int flags,
4133	int prec, int type, PyObject *v)
4134	{
4135	/* fmt = '%#.' + `prec` + `type`
4136	worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4137	char fmt[20];
4138	double x;
4139	x = PyFloat_AsDouble(v);
4140	if (x == -1.0 && PyErr_Occurred()) {
4141	PyErr_SetString(PyExc_TypeError, "float argument required");
4142	return -1;
4143	}
4144	if (prec < 0)
4145	prec = 6;
4146	if (type == 'f' && fabs(x)/1e25 >= 1e25)
4147	type = 'g';
4148	/* Worst case length calc to ensure no buffer overrun:
4149
4150	'g' formats:
4151	fmt = %#.<prec>g
4152	buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4153	for any double rep.)
4154	len = 1 + prec + 1 + 2 + 5 = 9 + prec
4155
4156	'f' formats:
4157	buf = '-' + [0-9]x + '.' + [0-9]prec (with x < 50)
4158	len = 1 + 50 + 1 + prec = 52 + prec
4159
4160	If prec=0 the effective precision is 1 (the leading digit is
4161	always given), therefore increase the length by one.
4162
4163	*/
4164	if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) \|\|
4165	(type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4166	PyErr_SetString(PyExc_OverflowError,
4167	"formatted float is too long (precision too large?)");
4168	return -1;
4169	}
4170	PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4171	(flags&F_ALT) ? "#" : "",
4172	prec, type);
4173	PyOS_ascii_formatd(buf, buflen, fmt, x);
4174	return (int)strlen(buf);
4175	}
4176
4177	/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4178	* the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4179	* Python's regular ints.
4180	* Return value: a new PyString*, or NULL if error.
4181	* . *pbuf is set to point into it,
4182	* *plen set to the # of chars following that.
4183	* Caller must decref it when done using pbuf.
4184	* The string starting at *pbuf is of the form
4185	* "-"? ("0x" \| "0X")? digit+
4186	* "0x"/"0X" are present only for x and X conversions, with F_ALT
4187	* set in flags. The case of hex digits will be correct,
4188	* There will be at least prec digits, zero-filled on the left if
4189	* necessary to get that many.
4190	* val object to be converted
4191	* flags bitmask of format flags; only F_ALT is looked at
4192	* prec minimum number of digits; 0-fill on left if needed
4193	* type a character in [duoxX]; u acts the same as d
4194	*
4195	* CAUTION: o, x and X conversions on regular ints can never
4196	* produce a '-' sign, but can for Python's unbounded ints.
4197	*/
4198	PyObject*
4199	_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4200	char *pbuf, int plen)
4201	{
4202	PyObject *result = NULL;
4203	char *buf;
4204	Py_ssize_t i;
4205	int sign; /* 1 if '-', else 0 */
4206	int len; /* number of characters */
4207	Py_ssize_t llen;
4208	int numdigits; /* len == numnondigits + numdigits */
4209	int numnondigits = 0;
4210
4211	switch (type) {
4212	case 'd':
4213	case 'u':
4214	result = val->ob_type->tp_str(val);
4215	break;
4216	case 'o':
4217	result = val->ob_type->tp_as_number->nb_oct(val);
4218	break;
4219	case 'x':
4220	case 'X':
4221	numnondigits = 2;
4222	result = val->ob_type->tp_as_number->nb_hex(val);
4223	break;
4224	default:
4225	assert(!"'type' not in [duoxX]");
4226	}
4227	if (!result)
4228	return NULL;
4229
4230	buf = PyString_AsString(result);
4231	if (!buf) {
4232	Py_DECREF(result);
4233	return NULL;
4234	}
4235
4236	/* To modify the string in-place, there can only be one reference. */
4237	if (result->ob_refcnt != 1) {
4238	PyErr_BadInternalCall();
4239	return NULL;
4240	}
4241	llen = PyString_Size(result);
4242	if (llen > PY_SSIZE_T_MAX) {
4243	PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4244	return NULL;
4245	}
4246	len = (int)llen;
4247	if (buf[len-1] == 'L') {
4248	--len;
4249	buf[len] = '\0';
4250	}
4251	sign = buf[0] == '-';
4252	numnondigits += sign;
4253	numdigits = len - numnondigits;
4254	assert(numdigits > 0);
4255
4256	/* Get rid of base marker unless F_ALT */
4257	if ((flags & F_ALT) == 0) {
4258	/* Need to skip 0x, 0X or 0. */
4259	int skipped = 0;
4260	switch (type) {
4261	case 'o':
4262	assert(buf[sign] == '0');
4263	/* If 0 is only digit, leave it alone. */
4264	if (numdigits > 1) {
4265	skipped = 1;
4266	--numdigits;
4267	}
4268	break;
4269	case 'x':
4270	case 'X':
4271	assert(buf[sign] == '0');
4272	assert(buf[sign + 1] == 'x');
4273	skipped = 2;
4274	numnondigits -= 2;
4275	break;
4276	}
4277	if (skipped) {
4278	buf += skipped;
4279	len -= skipped;
4280	if (sign)
4281	buf[0] = '-';
4282	}
4283	assert(len == numnondigits + numdigits);
4284	assert(numdigits > 0);
4285	}
4286
4287	/* Fill with leading zeroes to meet minimum width. */
4288	if (prec > numdigits) {
4289	PyObject *r1 = PyString_FromStringAndSize(NULL,
4290	numnondigits + prec);
4291	char *b1;
4292	if (!r1) {
4293	Py_DECREF(result);
4294	return NULL;
4295	}
4296	b1 = PyString_AS_STRING(r1);
4297	for (i = 0; i < numnondigits; ++i)
4298	b1++ = buf++;
4299	for (i = 0; i < prec - numdigits; i++)
4300	*b1++ = '0';
4301	for (i = 0; i < numdigits; i++)
4302	b1++ = buf++;
4303	*b1 = '\0';
4304	Py_DECREF(result);
4305	result = r1;
4306	buf = PyString_AS_STRING(result);
4307	len = numnondigits + prec;
4308	}
4309
4310	/* Fix up case for hex conversions. */
4311	if (type == 'X') {
4312	/* Need to convert all lower case letters to upper case.
4313	and need to convert 0x to 0X (and -0x to -0X). */
4314	for (i = 0; i < len; i++)
4315	if (buf[i] >= 'a' && buf[i] <= 'x')
4316	buf[i] -= 'a'-'A';
4317	}
4318	*pbuf = buf;
4319	*plen = len;
4320	return result;
4321	}
4322
4323	Py_LOCAL_INLINE(int)
4324	formatint(char *buf, size_t buflen, int flags,
4325	int prec, int type, PyObject *v)
4326	{
4327	/* fmt = '%#.' + `prec` + 'l' + `type`
4328	worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4329	+ 1 + 1 = 24 */
4330	char fmt[64]; /* plenty big enough! */
4331	char *sign;
4332	long x;
4333
4334	x = PyInt_AsLong(v);
4335	if (x == -1 && PyErr_Occurred()) {
4336	PyErr_SetString(PyExc_TypeError, "int argument required");
4337	return -1;
4338	}
4339	if (x < 0 && type == 'u') {
4340	type = 'd';
4341	}
4342	if (x < 0 && (type == 'x' \|\| type == 'X' \|\| type == 'o'))
4343	sign = "-";
4344	else
4345	sign = "";
4346	if (prec < 0)
4347	prec = 1;
4348
4349	if ((flags & F_ALT) &&
4350	(type == 'x' \|\| type == 'X')) {
4351	/* When converting under %#x or %#X, there are a number
4352	* of issues that cause pain:
4353	* - when 0 is being converted, the C standard leaves off
4354	* the '0x' or '0X', which is inconsistent with other
4355	* %#x/%#X conversions and inconsistent with Python's
4356	* hex() function
4357	* - there are platforms that violate the standard and
4358	* convert 0 with the '0x' or '0X'
4359	* (Metrowerks, Compaq Tru64)
4360	* - there are platforms that give '0x' when converting
4361	* under %#X, but convert 0 in accordance with the
4362	* standard (OS/2 EMX)
4363	*
4364	* We can achieve the desired consistency by inserting our
4365	* own '0x' or '0X' prefix, and substituting %x/%X in place
4366	* of %#x/%#X.
4367	*
4368	* Note that this is the same approach as used in
4369	* formatint() in unicodeobject.c
4370	*/
4371	PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4372	sign, type, prec, type);
4373	}
4374	else {
4375	PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4376	sign, (flags&F_ALT) ? "#" : "",
4377	prec, type);
4378	}
4379
4380	/* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4381	* worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4382	*/
4383	if (buflen <= 14 \|\| buflen <= (size_t)3 + (size_t)prec) {
4384	PyErr_SetString(PyExc_OverflowError,
4385	"formatted integer is too long (precision too large?)");
4386	return -1;
4387	}
4388	if (sign[0])
4389	PyOS_snprintf(buf, buflen, fmt, -x);
4390	else
4391	PyOS_snprintf(buf, buflen, fmt, x);
4392	return (int)strlen(buf);
4393	}
4394
4395	Py_LOCAL_INLINE(int)
4396	formatchar(char buf, size_t buflen, PyObject v)
4397	{
4398	/* presume that the buffer is at least 2 characters long */
4399	if (PyString_Check(v)) {
4400	if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4401	return -1;
4402	}
4403	else {
4404	if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4405	return -1;
4406	}
4407	buf[1] = '\0';
4408	return 1;
4409	}
4410
4411	/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4412
4413	FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4414	chars are formatted. XXX This is a magic number. Each formatting
4415	routine does bounds checking to ensure no overflow, but a better
4416	solution may be to malloc a buffer of appropriate size for each
4417	format. For now, the current solution is sufficient.
4418	*/
4419	#define FORMATBUFLEN (size_t)120
4420
4421	PyObject *
4422	PyString_Format(PyObject format, PyObject args)
4423	{
4424	char fmt, res;
4425	Py_ssize_t arglen, argidx;
4426	Py_ssize_t reslen, rescnt, fmtcnt;
4427	int args_owned = 0;
4428	PyObject result, orig_args;
4429	#ifdef Py_USING_UNICODE
4430	PyObject v, w;
4431	#endif
4432	PyObject *dict = NULL;
4433	if (format == NULL \|\| !PyString_Check(format) \|\| args == NULL) {
4434	PyErr_BadInternalCall();
4435	return NULL;
4436	}
4437	orig_args = args;
4438	fmt = PyString_AS_STRING(format);
4439	fmtcnt = PyString_GET_SIZE(format);
4440	reslen = rescnt = fmtcnt + 100;
4441	result = PyString_FromStringAndSize((char *)NULL, reslen);
4442	if (result == NULL)
4443	return NULL;
4444	res = PyString_AsString(result);
4445	if (PyTuple_Check(args)) {
4446	arglen = PyTuple_GET_SIZE(args);
4447	argidx = 0;
4448	}
4449	else {
4450	arglen = -1;
4451	argidx = -2;
4452	}
4453	if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4454	!PyObject_TypeCheck(args, &PyBaseString_Type))
4455	dict = args;
4456	while (--fmtcnt >= 0) {
4457	if (*fmt != '%') {
4458	if (--rescnt < 0) {
4459	rescnt = fmtcnt + 100;
4460	reslen += rescnt;
4461	if (_PyString_Resize(&result, reslen) < 0)
4462	return NULL;
4463	res = PyString_AS_STRING(result)
4464	+ reslen - rescnt;
4465	--rescnt;
4466	}
4467	res++ = fmt++;
4468	}
4469	else {
4470	/* Got a format specifier */
4471	int flags = 0;
4472	Py_ssize_t width = -1;
4473	int prec = -1;
4474	int c = '\0';
4475	int fill;
4476	PyObject *v = NULL;
4477	PyObject *temp = NULL;
4478	char *pbuf;
4479	int sign;
4480	Py_ssize_t len;
4481	char formatbuf[FORMATBUFLEN];
4482	/* For format{float,int,char}() */
4483	#ifdef Py_USING_UNICODE
4484	char *fmt_start = fmt;
4485	Py_ssize_t argidx_start = argidx;
4486	#endif
4487
4488	fmt++;
4489	if (*fmt == '(') {
4490	char *keystart;
4491	Py_ssize_t keylen;
4492	PyObject *key;
4493	int pcount = 1;
4494
4495	if (dict == NULL) {
4496	PyErr_SetString(PyExc_TypeError,
4497	"format requires a mapping");
4498	goto error;
4499	}
4500	++fmt;
4501	--fmtcnt;
4502	keystart = fmt;
4503	/* Skip over balanced parentheses */
4504	while (pcount > 0 && --fmtcnt >= 0) {
4505	if (*fmt == ')')
4506	--pcount;
4507	else if (*fmt == '(')
4508	++pcount;
4509	fmt++;
4510	}
4511	keylen = fmt - keystart - 1;
4512	if (fmtcnt < 0 \|\| pcount > 0) {
4513	PyErr_SetString(PyExc_ValueError,
4514	"incomplete format key");
4515	goto error;
4516	}
4517	key = PyString_FromStringAndSize(keystart,
4518	keylen);
4519	if (key == NULL)
4520	goto error;
4521	if (args_owned) {
4522	Py_DECREF(args);
4523	args_owned = 0;
4524	}
4525	args = PyObject_GetItem(dict, key);
4526	Py_DECREF(key);
4527	if (args == NULL) {
4528	goto error;
4529	}
4530	args_owned = 1;
4531	arglen = -1;
4532	argidx = -2;
4533	}
4534	while (--fmtcnt >= 0) {
4535	switch (c = *fmt++) {
4536	case '-': flags \|= F_LJUST; continue;
4537	case '+': flags \|= F_SIGN; continue;
4538	case ' ': flags \|= F_BLANK; continue;
4539	case '#': flags \|= F_ALT; continue;
4540	case '0': flags \|= F_ZERO; continue;
4541	}
4542	break;
4543	}
4544	if (c == '*') {
4545	v = getnextarg(args, arglen, &argidx);
4546	if (v == NULL)
4547	goto error;
4548	if (!PyInt_Check(v)) {
4549	PyErr_SetString(PyExc_TypeError,
4550	"* wants int");
4551	goto error;
4552	}
4553	width = PyInt_AsLong(v);
4554	if (width < 0) {
4555	flags \|= F_LJUST;
4556	width = -width;
4557	}
4558	if (--fmtcnt >= 0)
4559	c = *fmt++;
4560	}
4561	else if (c >= 0 && isdigit(c)) {
4562	width = c - '0';
4563	while (--fmtcnt >= 0) {
4564	c = Py_CHARMASK(*fmt++);
4565	if (!isdigit(c))
4566	break;
4567	if ((width*10) / 10 != width) {
4568	PyErr_SetString(
4569	PyExc_ValueError,
4570	"width too big");
4571	goto error;
4572	}
4573	width = width*10 + (c - '0');
4574	}
4575	}
4576	if (c == '.') {
4577	prec = 0;
4578	if (--fmtcnt >= 0)
4579	c = *fmt++;
4580	if (c == '*') {
4581	v = getnextarg(args, arglen, &argidx);
4582	if (v == NULL)
4583	goto error;
4584	if (!PyInt_Check(v)) {
4585	PyErr_SetString(
4586	PyExc_TypeError,
4587	"* wants int");
4588	goto error;
4589	}
4590	prec = PyInt_AsLong(v);
4591	if (prec < 0)
4592	prec = 0;
4593	if (--fmtcnt >= 0)
4594	c = *fmt++;
4595	}
4596	else if (c >= 0 && isdigit(c)) {
4597	prec = c - '0';
4598	while (--fmtcnt >= 0) {
4599	c = Py_CHARMASK(*fmt++);
4600	if (!isdigit(c))
4601	break;
4602	if ((prec*10) / 10 != prec) {
4603	PyErr_SetString(
4604	PyExc_ValueError,
4605	"prec too big");
4606	goto error;
4607	}
4608	prec = prec*10 + (c - '0');
4609	}
4610	}
4611	} /* prec */
4612	if (fmtcnt >= 0) {
4613	if (c == 'h' \|\| c == 'l' \|\| c == 'L') {
4614	if (--fmtcnt >= 0)
4615	c = *fmt++;
4616	}
4617	}
4618	if (fmtcnt < 0) {
4619	PyErr_SetString(PyExc_ValueError,
4620	"incomplete format");
4621	goto error;
4622	}
4623	if (c != '%') {
4624	v = getnextarg(args, arglen, &argidx);
4625	if (v == NULL)
4626	goto error;
4627	}
4628	sign = 0;
4629	fill = ' ';
4630	switch (c) {
4631	case '%':
4632	pbuf = "%";
4633	len = 1;
4634	break;
4635	case 's':
4636	#ifdef Py_USING_UNICODE
4637	if (PyUnicode_Check(v)) {
4638	fmt = fmt_start;
4639	argidx = argidx_start;
4640	goto unicode;
4641	}
4642	#endif
4643	temp = _PyObject_Str(v);
4644	#ifdef Py_USING_UNICODE
4645	if (temp != NULL && PyUnicode_Check(temp)) {
4646	Py_DECREF(temp);
4647	fmt = fmt_start;
4648	argidx = argidx_start;
4649	goto unicode;
4650	}
4651	#endif
4652	/* Fall through */
4653	case 'r':
4654	if (c == 'r')
4655	temp = PyObject_Repr(v);
4656	if (temp == NULL)
4657	goto error;
4658	if (!PyString_Check(temp)) {
4659	PyErr_SetString(PyExc_TypeError,
4660	"%s argument has non-string str()");
4661	Py_DECREF(temp);
4662	goto error;
4663	}
4664	pbuf = PyString_AS_STRING(temp);
4665	len = PyString_GET_SIZE(temp);
4666	if (prec >= 0 && len > prec)
4667	len = prec;
4668	break;
4669	case 'i':
4670	case 'd':
4671	case 'u':
4672	case 'o':
4673	case 'x':
4674	case 'X':
4675	if (c == 'i')
4676	c = 'd';
4677	if (PyLong_Check(v)) {
4678	int ilen;
4679	temp = _PyString_FormatLong(v, flags,
4680	prec, c, &pbuf, &ilen);
4681	len = ilen;
4682	if (!temp)
4683	goto error;
4684	sign = 1;
4685	}
4686	else {
4687	pbuf = formatbuf;
4688	len = formatint(pbuf,
4689	sizeof(formatbuf),
4690	flags, prec, c, v);
4691	if (len < 0)
4692	goto error;
4693	sign = 1;
4694	}
4695	if (flags & F_ZERO)
4696	fill = '0';
4697	break;
4698	case 'e':
4699	case 'E':
4700	case 'f':
4701	case 'F':
4702	case 'g':
4703	case 'G':
4704	if (c == 'F')
4705	c = 'f';
4706	pbuf = formatbuf;
4707	len = formatfloat(pbuf, sizeof(formatbuf),
4708	flags, prec, c, v);
4709	if (len < 0)
4710	goto error;
4711	sign = 1;
4712	if (flags & F_ZERO)
4713	fill = '0';
4714	break;
4715	case 'c':
4716	#ifdef Py_USING_UNICODE
4717	if (PyUnicode_Check(v)) {
4718	fmt = fmt_start;
4719	argidx = argidx_start;
4720	goto unicode;
4721	}
4722	#endif
4723	pbuf = formatbuf;
4724	len = formatchar(pbuf, sizeof(formatbuf), v);
4725	if (len < 0)
4726	goto error;
4727	break;
4728	default:
4729	PyErr_Format(PyExc_ValueError,
4730	"unsupported format character '%c' (0x%x) "
4731	"at index %i",
4732	c, c,
4733	(int)(fmt - 1 - PyString_AsString(format)));
4734	goto error;
4735	}
4736	if (sign) {
4737	if (pbuf == '-' \|\| pbuf == '+') {
4738	sign = *pbuf++;
4739	len--;
4740	}
4741	else if (flags & F_SIGN)
4742	sign = '+';
4743	else if (flags & F_BLANK)
4744	sign = ' ';
4745	else
4746	sign = 0;
4747	}
4748	if (width < len)
4749	width = len;
4750	if (rescnt - (sign != 0) < width) {
4751	reslen -= rescnt;
4752	rescnt = width + fmtcnt + 100;
4753	reslen += rescnt;
4754	if (reslen < 0) {
4755	Py_DECREF(result);
4756	return PyErr_NoMemory();
4757	}
4758	if (_PyString_Resize(&result, reslen) < 0)
4759	return NULL;
4760	res = PyString_AS_STRING(result)
4761	+ reslen - rescnt;
4762	}
4763	if (sign) {
4764	if (fill != ' ')
4765	*res++ = sign;
4766	rescnt--;
4767	if (width > len)
4768	width--;
4769	}
4770	if ((flags & F_ALT) && (c == 'x' \|\| c == 'X')) {
4771	assert(pbuf[0] == '0');
4772	assert(pbuf[1] == c);
4773	if (fill != ' ') {
4774	res++ = pbuf++;
4775	res++ = pbuf++;
4776	}
4777	rescnt -= 2;
4778	width -= 2;
4779	if (width < 0)
4780	width = 0;
4781	len -= 2;
4782	}
4783	if (width > len && !(flags & F_LJUST)) {
4784	do {
4785	--rescnt;
4786	*res++ = fill;
4787	} while (--width > len);
4788	}
4789	if (fill == ' ') {
4790	if (sign)
4791	*res++ = sign;
4792	if ((flags & F_ALT) &&
4793	(c == 'x' \|\| c == 'X')) {
4794	assert(pbuf[0] == '0');
4795	assert(pbuf[1] == c);
4796	res++ = pbuf++;
4797	res++ = pbuf++;
4798	}
4799	}
4800	Py_MEMCPY(res, pbuf, len);
4801	res += len;
4802	rescnt -= len;
4803	while (--width >= len) {
4804	--rescnt;
4805	*res++ = ' ';
4806	}
4807	if (dict && (argidx < arglen) && c != '%') {
4808	PyErr_SetString(PyExc_TypeError,
4809	"not all arguments converted during string formatting");
4810	goto error;
4811	}
4812	Py_XDECREF(temp);
4813	} /* '%' */
4814	} /* until end */
4815	if (argidx < arglen && !dict) {
4816	PyErr_SetString(PyExc_TypeError,
4817	"not all arguments converted during string formatting");
4818	goto error;
4819	}
4820	if (args_owned) {
4821	Py_DECREF(args);
4822	}
4823	_PyString_Resize(&result, reslen - rescnt);
4824	return result;
4825
4826	#ifdef Py_USING_UNICODE
4827	unicode:
4828	if (args_owned) {
4829	Py_DECREF(args);
4830	args_owned = 0;
4831	}
4832	/* Fiddle args right (remove the first argidx arguments) */
4833	if (PyTuple_Check(orig_args) && argidx > 0) {
4834	PyObject *v;
4835	Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4836	v = PyTuple_New(n);
4837	if (v == NULL)
4838	goto error;
4839	while (--n >= 0) {
4840	PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4841	Py_INCREF(w);
4842	PyTuple_SET_ITEM(v, n, w);
4843	}
4844	args = v;
4845	} else {
4846	Py_INCREF(orig_args);
4847	args = orig_args;
4848	}
4849	args_owned = 1;
4850	/* Take what we have of the result and let the Unicode formatting
4851	function format the rest of the input. */
4852	rescnt = res - PyString_AS_STRING(result);
4853	if (_PyString_Resize(&result, rescnt))
4854	goto error;
4855	fmtcnt = PyString_GET_SIZE(format) - \
4856	(fmt - PyString_AS_STRING(format));
4857	format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4858	if (format == NULL)
4859	goto error;
4860	v = PyUnicode_Format(format, args);
4861	Py_DECREF(format);
4862	if (v == NULL)
4863	goto error;
4864	/* Paste what we have (result) to what the Unicode formatting
4865	function returned (v) and return the result (or error) */
4866	w = PyUnicode_Concat(result, v);
4867	Py_DECREF(result);
4868	Py_DECREF(v);
4869	Py_DECREF(args);
4870	return w;
4871	#endif /* Py_USING_UNICODE */
4872
4873	error:
4874	Py_DECREF(result);
4875	if (args_owned) {
4876	Py_DECREF(args);
4877	}
4878	return NULL;
4879	}
4880
4881	void
4882	PyString_InternInPlace(PyObject **p)
4883	{
4884	register PyStringObject s = (PyStringObject )(*p);
4885	PyObject *t;
4886	if (s == NULL \|\| !PyString_Check(s))
4887	Py_FatalError("PyString_InternInPlace: strings only please!");
4888	/* If it's a string subclass, we don't really know what putting
4889	it in the interned dict might do. */
4890	if (!PyString_CheckExact(s))
4891	return;
4892	if (PyString_CHECK_INTERNED(s))
4893	return;
4894	if (interned == NULL) {
4895	interned = PyDict_New();
4896	if (interned == NULL) {
4897	PyErr_Clear(); /* Don't leave an exception */
4898	return;
4899	}
4900	}
4901	t = PyDict_GetItem(interned, (PyObject *)s);
4902	if (t) {
4903	Py_INCREF(t);
4904	Py_DECREF(*p);
4905	*p = t;
4906	return;
4907	}
4908
4909	if (PyDict_SetItem(interned, (PyObject )s, (PyObject )s) < 0) {
4910	PyErr_Clear();
4911	return;
4912	}
4913	/* The two references in interned are not counted by refcnt.
4914	The string deallocator will take care of this */
4915	s->ob_refcnt -= 2;
4916	PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4917	}
4918
4919	void
4920	PyString_InternImmortal(PyObject **p)
4921	{
4922	PyString_InternInPlace(p);
4923	if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4924	PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4925	Py_INCREF(*p);
4926	}
4927	}
4928
4929
4930	PyObject *
4931	PyString_InternFromString(const char *cp)
4932	{
4933	PyObject *s = PyString_FromString(cp);
4934	if (s == NULL)
4935	return NULL;
4936	PyString_InternInPlace(&s);
4937	return s;
4938	}
4939
4940	void
4941	PyString_Fini(void)
4942	{
4943	int i;
4944	for (i = 0; i < UCHAR_MAX + 1; i++) {
4945	Py_XDECREF(characters[i]);
4946	characters[i] = NULL;
4947	}
4948	Py_XDECREF(nullstring);
4949	nullstring = NULL;
4950	}
4951
4952	void _Py_ReleaseInternedStrings(void)
4953	{
4954	PyObject *keys;
4955	PyStringObject *s;
4956	Py_ssize_t i, n;
4957
4958	if (interned == NULL \|\| !PyDict_Check(interned))
4959	return;
4960	keys = PyDict_Keys(interned);
4961	if (keys == NULL \|\| !PyList_Check(keys)) {
4962	PyErr_Clear();
4963	return;
4964	}
4965
4966	/* Since _Py_ReleaseInternedStrings() is intended to help a leak
4967	detector, interned strings are not forcibly deallocated; rather, we
4968	give them their stolen references back, and then clear and DECREF
4969	the interned dict. */
4970
4971	fprintf(stderr, "releasing interned strings\n");
4972	n = PyList_GET_SIZE(keys);
4973	for (i = 0; i < n; i++) {
4974	s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4975	switch (s->ob_sstate) {
4976	case SSTATE_NOT_INTERNED:
4977	/* XXX Shouldn't happen */
4978	break;
4979	case SSTATE_INTERNED_IMMORTAL:
4980	s->ob_refcnt += 1;
4981	break;
4982	case SSTATE_INTERNED_MORTAL:
4983	s->ob_refcnt += 2;
4984	break;
4985	default:
4986	Py_FatalError("Inconsistent interned string state.");
4987	}
4988	s->ob_sstate = SSTATE_NOT_INTERNED;
4989	}
4990	Py_DECREF(keys);
4991	PyDict_Clear(interned);
4992	Py_DECREF(interned);
4993	interned = NULL;
4994	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: vendor/python/2.5/Objects/stringobject.c

Download in other formats: