source: vendor/python/2.5/Objects/stringobject.c

Last change on this file was 3225, checked in by bird, 18 years ago

Python 2.5

File size: 119.5 KB
Line 
1/* String object implementation */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
6
7#include <ctype.h>
8
9#ifdef COUNT_ALLOCS
10int null_strings, one_strings;
11#endif
12
13static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
15
16/* This dictionary holds all interned strings. Note that references to
17 strings in this dictionary are *not* counted in the string's ob_refcnt.
18 When the interned string reaches a refcnt of 0 the string deallocation
19 function will delete the reference from this dictionary.
20
21 Another way to look at this is that to say that the actual reference
22 count of a string is: s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
26/*
27 For both PyString_FromString() and PyString_FromStringAndSize(), the
28 parameter `size' denotes number of characters to allocate, not counting any
29 null terminating character.
30
31 For PyString_FromString(), the parameter `str' points to a null-terminated
32 string containing exactly `size' bytes.
33
34 For PyString_FromStringAndSize(), the parameter the parameter `str' is
35 either NULL or else points to a string containing at least `size' bytes.
36 For PyString_FromStringAndSize(), the string in the `str' parameter does
37 not have to be null-terminated. (Therefore it is safe to construct a
38 substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
39 If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
40 bytes (setting the last byte to the null terminating character) and you can
41 fill in the data yourself. If `str' is non-NULL then the resulting
42 PyString object must be treated as immutable and you must not fill in nor
43 alter the data yourself, since the strings may be shared.
44
45 The PyObject member `op->ob_size', which denotes the number of "extra
46 items" in a variable-size object, will contain the number of bytes
47 allocated for string data, not counting the null terminating character. It
48 is therefore equal to the equal to the `size' parameter (for
49 PyString_FromStringAndSize()) or the length of the string in the `str'
50 parameter (for PyString_FromString()).
51*/
52PyObject *
53PyString_FromStringAndSize(const char *str, Py_ssize_t size)
54{
55 register PyStringObject *op;
56 assert(size >= 0);
57 if (size == 0 && (op = nullstring) != NULL) {
58#ifdef COUNT_ALLOCS
59 null_strings++;
60#endif
61 Py_INCREF(op);
62 return (PyObject *)op;
63 }
64 if (size == 1 && str != NULL &&
65 (op = characters[*str & UCHAR_MAX]) != NULL)
66 {
67#ifdef COUNT_ALLOCS
68 one_strings++;
69#endif
70 Py_INCREF(op);
71 return (PyObject *)op;
72 }
73
74 /* Inline PyObject_NewVar */
75 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
76 if (op == NULL)
77 return PyErr_NoMemory();
78 PyObject_INIT_VAR(op, &PyString_Type, size);
79 op->ob_shash = -1;
80 op->ob_sstate = SSTATE_NOT_INTERNED;
81 if (str != NULL)
82 Py_MEMCPY(op->ob_sval, str, size);
83 op->ob_sval[size] = '\0';
84 /* share short strings */
85 if (size == 0) {
86 PyObject *t = (PyObject *)op;
87 PyString_InternInPlace(&t);
88 op = (PyStringObject *)t;
89 nullstring = op;
90 Py_INCREF(op);
91 } else if (size == 1 && str != NULL) {
92 PyObject *t = (PyObject *)op;
93 PyString_InternInPlace(&t);
94 op = (PyStringObject *)t;
95 characters[*str & UCHAR_MAX] = op;
96 Py_INCREF(op);
97 }
98 return (PyObject *) op;
99}
100
101PyObject *
102PyString_FromString(const char *str)
103{
104 register size_t size;
105 register PyStringObject *op;
106
107 assert(str != NULL);
108 size = strlen(str);
109 if (size > PY_SSIZE_T_MAX) {
110 PyErr_SetString(PyExc_OverflowError,
111 "string is too long for a Python string");
112 return NULL;
113 }
114 if (size == 0 && (op = nullstring) != NULL) {
115#ifdef COUNT_ALLOCS
116 null_strings++;
117#endif
118 Py_INCREF(op);
119 return (PyObject *)op;
120 }
121 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
122#ifdef COUNT_ALLOCS
123 one_strings++;
124#endif
125 Py_INCREF(op);
126 return (PyObject *)op;
127 }
128
129 /* Inline PyObject_NewVar */
130 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
131 if (op == NULL)
132 return PyErr_NoMemory();
133 PyObject_INIT_VAR(op, &PyString_Type, size);
134 op->ob_shash = -1;
135 op->ob_sstate = SSTATE_NOT_INTERNED;
136 Py_MEMCPY(op->ob_sval, str, size+1);
137 /* share short strings */
138 if (size == 0) {
139 PyObject *t = (PyObject *)op;
140 PyString_InternInPlace(&t);
141 op = (PyStringObject *)t;
142 nullstring = op;
143 Py_INCREF(op);
144 } else if (size == 1) {
145 PyObject *t = (PyObject *)op;
146 PyString_InternInPlace(&t);
147 op = (PyStringObject *)t;
148 characters[*str & UCHAR_MAX] = op;
149 Py_INCREF(op);
150 }
151 return (PyObject *) op;
152}
153
154PyObject *
155PyString_FromFormatV(const char *format, va_list vargs)
156{
157 va_list count;
158 Py_ssize_t n = 0;
159 const char* f;
160 char *s;
161 PyObject* string;
162
163#ifdef VA_LIST_IS_ARRAY
164 Py_MEMCPY(count, vargs, sizeof(va_list));
165#else
166#ifdef __va_copy
167 __va_copy(count, vargs);
168#else
169 count = vargs;
170#endif
171#endif
172 /* step 1: figure out how large a buffer we need */
173 for (f = format; *f; f++) {
174 if (*f == '%') {
175 const char* p = f;
176 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
177 ;
178
179 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
180 * they don't affect the amount of space we reserve.
181 */
182 if ((*f == 'l' || *f == 'z') &&
183 (f[1] == 'd' || f[1] == 'u'))
184 ++f;
185
186 switch (*f) {
187 case 'c':
188 (void)va_arg(count, int);
189 /* fall through... */
190 case '%':
191 n++;
192 break;
193 case 'd': case 'u': case 'i': case 'x':
194 (void) va_arg(count, int);
195 /* 20 bytes is enough to hold a 64-bit
196 integer. Decimal takes the most space.
197 This isn't enough for octal. */
198 n += 20;
199 break;
200 case 's':
201 s = va_arg(count, char*);
202 n += strlen(s);
203 break;
204 case 'p':
205 (void) va_arg(count, int);
206 /* maximum 64-bit pointer representation:
207 * 0xffffffffffffffff
208 * so 19 characters is enough.
209 * XXX I count 18 -- what's the extra for?
210 */
211 n += 19;
212 break;
213 default:
214 /* if we stumble upon an unknown
215 formatting code, copy the rest of
216 the format string to the output
217 string. (we cannot just skip the
218 code, since there's no way to know
219 what's in the argument list) */
220 n += strlen(p);
221 goto expand;
222 }
223 } else
224 n++;
225 }
226 expand:
227 /* step 2: fill the buffer */
228 /* Since we've analyzed how much space we need for the worst case,
229 use sprintf directly instead of the slower PyOS_snprintf. */
230 string = PyString_FromStringAndSize(NULL, n);
231 if (!string)
232 return NULL;
233
234 s = PyString_AsString(string);
235
236 for (f = format; *f; f++) {
237 if (*f == '%') {
238 const char* p = f++;
239 Py_ssize_t i;
240 int longflag = 0;
241 int size_tflag = 0;
242 /* parse the width.precision part (we're only
243 interested in the precision value, if any) */
244 n = 0;
245 while (isdigit(Py_CHARMASK(*f)))
246 n = (n*10) + *f++ - '0';
247 if (*f == '.') {
248 f++;
249 n = 0;
250 while (isdigit(Py_CHARMASK(*f)))
251 n = (n*10) + *f++ - '0';
252 }
253 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
254 f++;
255 /* handle the long flag, but only for %ld and %lu.
256 others can be added when necessary. */
257 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
258 longflag = 1;
259 ++f;
260 }
261 /* handle the size_t flag. */
262 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
263 size_tflag = 1;
264 ++f;
265 }
266
267 switch (*f) {
268 case 'c':
269 *s++ = va_arg(vargs, int);
270 break;
271 case 'd':
272 if (longflag)
273 sprintf(s, "%ld", va_arg(vargs, long));
274 else if (size_tflag)
275 sprintf(s, "%" PY_FORMAT_SIZE_T "d",
276 va_arg(vargs, Py_ssize_t));
277 else
278 sprintf(s, "%d", va_arg(vargs, int));
279 s += strlen(s);
280 break;
281 case 'u':
282 if (longflag)
283 sprintf(s, "%lu",
284 va_arg(vargs, unsigned long));
285 else if (size_tflag)
286 sprintf(s, "%" PY_FORMAT_SIZE_T "u",
287 va_arg(vargs, size_t));
288 else
289 sprintf(s, "%u",
290 va_arg(vargs, unsigned int));
291 s += strlen(s);
292 break;
293 case 'i':
294 sprintf(s, "%i", va_arg(vargs, int));
295 s += strlen(s);
296 break;
297 case 'x':
298 sprintf(s, "%x", va_arg(vargs, int));
299 s += strlen(s);
300 break;
301 case 's':
302 p = va_arg(vargs, char*);
303 i = strlen(p);
304 if (n > 0 && i > n)
305 i = n;
306 Py_MEMCPY(s, p, i);
307 s += i;
308 break;
309 case 'p':
310 sprintf(s, "%p", va_arg(vargs, void*));
311 /* %p is ill-defined: ensure leading 0x. */
312 if (s[1] == 'X')
313 s[1] = 'x';
314 else if (s[1] != 'x') {
315 memmove(s+2, s, strlen(s)+1);
316 s[0] = '0';
317 s[1] = 'x';
318 }
319 s += strlen(s);
320 break;
321 case '%':
322 *s++ = '%';
323 break;
324 default:
325 strcpy(s, p);
326 s += strlen(s);
327 goto end;
328 }
329 } else
330 *s++ = *f;
331 }
332
333 end:
334 _PyString_Resize(&string, s - PyString_AS_STRING(string));
335 return string;
336}
337
338PyObject *
339PyString_FromFormat(const char *format, ...)
340{
341 PyObject* ret;
342 va_list vargs;
343
344#ifdef HAVE_STDARG_PROTOTYPES
345 va_start(vargs, format);
346#else
347 va_start(vargs);
348#endif
349 ret = PyString_FromFormatV(format, vargs);
350 va_end(vargs);
351 return ret;
352}
353
354
355PyObject *PyString_Decode(const char *s,
356 Py_ssize_t size,
357 const char *encoding,
358 const char *errors)
359{
360 PyObject *v, *str;
361
362 str = PyString_FromStringAndSize(s, size);
363 if (str == NULL)
364 return NULL;
365 v = PyString_AsDecodedString(str, encoding, errors);
366 Py_DECREF(str);
367 return v;
368}
369
370PyObject *PyString_AsDecodedObject(PyObject *str,
371 const char *encoding,
372 const char *errors)
373{
374 PyObject *v;
375
376 if (!PyString_Check(str)) {
377 PyErr_BadArgument();
378 goto onError;
379 }
380
381 if (encoding == NULL) {
382#ifdef Py_USING_UNICODE
383 encoding = PyUnicode_GetDefaultEncoding();
384#else
385 PyErr_SetString(PyExc_ValueError, "no encoding specified");
386 goto onError;
387#endif
388 }
389
390 /* Decode via the codec registry */
391 v = PyCodec_Decode(str, encoding, errors);
392 if (v == NULL)
393 goto onError;
394
395 return v;
396
397 onError:
398 return NULL;
399}
400
401PyObject *PyString_AsDecodedString(PyObject *str,
402 const char *encoding,
403 const char *errors)
404{
405 PyObject *v;
406
407 v = PyString_AsDecodedObject(str, encoding, errors);
408 if (v == NULL)
409 goto onError;
410
411#ifdef Py_USING_UNICODE
412 /* Convert Unicode to a string using the default encoding */
413 if (PyUnicode_Check(v)) {
414 PyObject *temp = v;
415 v = PyUnicode_AsEncodedString(v, NULL, NULL);
416 Py_DECREF(temp);
417 if (v == NULL)
418 goto onError;
419 }
420#endif
421 if (!PyString_Check(v)) {
422 PyErr_Format(PyExc_TypeError,
423 "decoder did not return a string object (type=%.400s)",
424 v->ob_type->tp_name);
425 Py_DECREF(v);
426 goto onError;
427 }
428
429 return v;
430
431 onError:
432 return NULL;
433}
434
435PyObject *PyString_Encode(const char *s,
436 Py_ssize_t size,
437 const char *encoding,
438 const char *errors)
439{
440 PyObject *v, *str;
441
442 str = PyString_FromStringAndSize(s, size);
443 if (str == NULL)
444 return NULL;
445 v = PyString_AsEncodedString(str, encoding, errors);
446 Py_DECREF(str);
447 return v;
448}
449
450PyObject *PyString_AsEncodedObject(PyObject *str,
451 const char *encoding,
452 const char *errors)
453{
454 PyObject *v;
455
456 if (!PyString_Check(str)) {
457 PyErr_BadArgument();
458 goto onError;
459 }
460
461 if (encoding == NULL) {
462#ifdef Py_USING_UNICODE
463 encoding = PyUnicode_GetDefaultEncoding();
464#else
465 PyErr_SetString(PyExc_ValueError, "no encoding specified");
466 goto onError;
467#endif
468 }
469
470 /* Encode via the codec registry */
471 v = PyCodec_Encode(str, encoding, errors);
472 if (v == NULL)
473 goto onError;
474
475 return v;
476
477 onError:
478 return NULL;
479}
480
481PyObject *PyString_AsEncodedString(PyObject *str,
482 const char *encoding,
483 const char *errors)
484{
485 PyObject *v;
486
487 v = PyString_AsEncodedObject(str, encoding, errors);
488 if (v == NULL)
489 goto onError;
490
491#ifdef Py_USING_UNICODE
492 /* Convert Unicode to a string using the default encoding */
493 if (PyUnicode_Check(v)) {
494 PyObject *temp = v;
495 v = PyUnicode_AsEncodedString(v, NULL, NULL);
496 Py_DECREF(temp);
497 if (v == NULL)
498 goto onError;
499 }
500#endif
501 if (!PyString_Check(v)) {
502 PyErr_Format(PyExc_TypeError,
503 "encoder did not return a string object (type=%.400s)",
504 v->ob_type->tp_name);
505 Py_DECREF(v);
506 goto onError;
507 }
508
509 return v;
510
511 onError:
512 return NULL;
513}
514
515static void
516string_dealloc(PyObject *op)
517{
518 switch (PyString_CHECK_INTERNED(op)) {
519 case SSTATE_NOT_INTERNED:
520 break;
521
522 case SSTATE_INTERNED_MORTAL:
523 /* revive dead object temporarily for DelItem */
524 op->ob_refcnt = 3;
525 if (PyDict_DelItem(interned, op) != 0)
526 Py_FatalError(
527 "deletion of interned string failed");
528 break;
529
530 case SSTATE_INTERNED_IMMORTAL:
531 Py_FatalError("Immortal interned string died.");
532
533 default:
534 Py_FatalError("Inconsistent interned string state.");
535 }
536 op->ob_type->tp_free(op);
537}
538
539/* Unescape a backslash-escaped string. If unicode is non-zero,
540 the string is a u-literal. If recode_encoding is non-zero,
541 the string is UTF-8 encoded and should be re-encoded in the
542 specified encoding. */
543
544PyObject *PyString_DecodeEscape(const char *s,
545 Py_ssize_t len,
546 const char *errors,
547 Py_ssize_t unicode,
548 const char *recode_encoding)
549{
550 int c;
551 char *p, *buf;
552 const char *end;
553 PyObject *v;
554 Py_ssize_t newlen = recode_encoding ? 4*len:len;
555 v = PyString_FromStringAndSize((char *)NULL, newlen);
556 if (v == NULL)
557 return NULL;
558 p = buf = PyString_AsString(v);
559 end = s + len;
560 while (s < end) {
561 if (*s != '\\') {
562 non_esc:
563#ifdef Py_USING_UNICODE
564 if (recode_encoding && (*s & 0x80)) {
565 PyObject *u, *w;
566 char *r;
567 const char* t;
568 Py_ssize_t rn;
569 t = s;
570 /* Decode non-ASCII bytes as UTF-8. */
571 while (t < end && (*t & 0x80)) t++;
572 u = PyUnicode_DecodeUTF8(s, t - s, errors);
573 if(!u) goto failed;
574
575 /* Recode them in target encoding. */
576 w = PyUnicode_AsEncodedString(
577 u, recode_encoding, errors);
578 Py_DECREF(u);
579 if (!w) goto failed;
580
581 /* Append bytes to output buffer. */
582 assert(PyString_Check(w));
583 r = PyString_AS_STRING(w);
584 rn = PyString_GET_SIZE(w);
585 Py_MEMCPY(p, r, rn);
586 p += rn;
587 Py_DECREF(w);
588 s = t;
589 } else {
590 *p++ = *s++;
591 }
592#else
593 *p++ = *s++;
594#endif
595 continue;
596 }
597 s++;
598 if (s==end) {
599 PyErr_SetString(PyExc_ValueError,
600 "Trailing \\ in string");
601 goto failed;
602 }
603 switch (*s++) {
604 /* XXX This assumes ASCII! */
605 case '\n': break;
606 case '\\': *p++ = '\\'; break;
607 case '\'': *p++ = '\''; break;
608 case '\"': *p++ = '\"'; break;
609 case 'b': *p++ = '\b'; break;
610 case 'f': *p++ = '\014'; break; /* FF */
611 case 't': *p++ = '\t'; break;
612 case 'n': *p++ = '\n'; break;
613 case 'r': *p++ = '\r'; break;
614 case 'v': *p++ = '\013'; break; /* VT */
615 case 'a': *p++ = '\007'; break; /* BEL, not classic C */
616 case '0': case '1': case '2': case '3':
617 case '4': case '5': case '6': case '7':
618 c = s[-1] - '0';
619 if ('0' <= *s && *s <= '7') {
620 c = (c<<3) + *s++ - '0';
621 if ('0' <= *s && *s <= '7')
622 c = (c<<3) + *s++ - '0';
623 }
624 *p++ = c;
625 break;
626 case 'x':
627 if (isxdigit(Py_CHARMASK(s[0]))
628 && isxdigit(Py_CHARMASK(s[1]))) {
629 unsigned int x = 0;
630 c = Py_CHARMASK(*s);
631 s++;
632 if (isdigit(c))
633 x = c - '0';
634 else if (islower(c))
635 x = 10 + c - 'a';
636 else
637 x = 10 + c - 'A';
638 x = x << 4;
639 c = Py_CHARMASK(*s);
640 s++;
641 if (isdigit(c))
642 x += c - '0';
643 else if (islower(c))
644 x += 10 + c - 'a';
645 else
646 x += 10 + c - 'A';
647 *p++ = x;
648 break;
649 }
650 if (!errors || strcmp(errors, "strict") == 0) {
651 PyErr_SetString(PyExc_ValueError,
652 "invalid \\x escape");
653 goto failed;
654 }
655 if (strcmp(errors, "replace") == 0) {
656 *p++ = '?';
657 } else if (strcmp(errors, "ignore") == 0)
658 /* do nothing */;
659 else {
660 PyErr_Format(PyExc_ValueError,
661 "decoding error; "
662 "unknown error handling code: %.400s",
663 errors);
664 goto failed;
665 }
666#ifndef Py_USING_UNICODE
667 case 'u':
668 case 'U':
669 case 'N':
670 if (unicode) {
671 PyErr_SetString(PyExc_ValueError,
672 "Unicode escapes not legal "
673 "when Unicode disabled");
674 goto failed;
675 }
676#endif
677 default:
678 *p++ = '\\';
679 s--;
680 goto non_esc; /* an arbitry number of unescaped
681 UTF-8 bytes may follow. */
682 }
683 }
684 if (p-buf < newlen)
685 _PyString_Resize(&v, p - buf);
686 return v;
687 failed:
688 Py_DECREF(v);
689 return NULL;
690}
691
692/* -------------------------------------------------------------------- */
693/* object api */
694
695static Py_ssize_t
696string_getsize(register PyObject *op)
697{
698 char *s;
699 Py_ssize_t len;
700 if (PyString_AsStringAndSize(op, &s, &len))
701 return -1;
702 return len;
703}
704
705static /*const*/ char *
706string_getbuffer(register PyObject *op)
707{
708 char *s;
709 Py_ssize_t len;
710 if (PyString_AsStringAndSize(op, &s, &len))
711 return NULL;
712 return s;
713}
714
715Py_ssize_t
716PyString_Size(register PyObject *op)
717{
718 if (!PyString_Check(op))
719 return string_getsize(op);
720 return ((PyStringObject *)op) -> ob_size;
721}
722
723/*const*/ char *
724PyString_AsString(register PyObject *op)
725{
726 if (!PyString_Check(op))
727 return string_getbuffer(op);
728 return ((PyStringObject *)op) -> ob_sval;
729}
730
731int
732PyString_AsStringAndSize(register PyObject *obj,
733 register char **s,
734 register Py_ssize_t *len)
735{
736 if (s == NULL) {
737 PyErr_BadInternalCall();
738 return -1;
739 }
740
741 if (!PyString_Check(obj)) {
742#ifdef Py_USING_UNICODE
743 if (PyUnicode_Check(obj)) {
744 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
745 if (obj == NULL)
746 return -1;
747 }
748 else
749#endif
750 {
751 PyErr_Format(PyExc_TypeError,
752 "expected string or Unicode object, "
753 "%.200s found", obj->ob_type->tp_name);
754 return -1;
755 }
756 }
757
758 *s = PyString_AS_STRING(obj);
759 if (len != NULL)
760 *len = PyString_GET_SIZE(obj);
761 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
762 PyErr_SetString(PyExc_TypeError,
763 "expected string without null bytes");
764 return -1;
765 }
766 return 0;
767}
768
769/* -------------------------------------------------------------------- */
770/* Methods */
771
772#define STRINGLIB_CHAR char
773
774#define STRINGLIB_CMP memcmp
775#define STRINGLIB_LEN PyString_GET_SIZE
776#define STRINGLIB_NEW PyString_FromStringAndSize
777#define STRINGLIB_STR PyString_AS_STRING
778
779#define STRINGLIB_EMPTY nullstring
780
781#include "stringlib/fastsearch.h"
782
783#include "stringlib/count.h"
784#include "stringlib/find.h"
785#include "stringlib/partition.h"
786
787
788static int
789string_print(PyStringObject *op, FILE *fp, int flags)
790{
791 Py_ssize_t i;
792 char c;
793 int quote;
794
795 /* XXX Ought to check for interrupts when writing long strings */
796 if (! PyString_CheckExact(op)) {
797 int ret;
798 /* A str subclass may have its own __str__ method. */
799 op = (PyStringObject *) PyObject_Str((PyObject *)op);
800 if (op == NULL)
801 return -1;
802 ret = string_print(op, fp, flags);
803 Py_DECREF(op);
804 return ret;
805 }
806 if (flags & Py_PRINT_RAW) {
807#ifdef __VMS
808 if (op->ob_size) fwrite(op->ob_sval, (int) op->ob_size, 1, fp);
809#else
810 fwrite(op->ob_sval, 1, (int) op->ob_size, fp);
811#endif
812 return 0;
813 }
814
815 /* figure out which quote to use; single is preferred */
816 quote = '\'';
817 if (memchr(op->ob_sval, '\'', op->ob_size) &&
818 !memchr(op->ob_sval, '"', op->ob_size))
819 quote = '"';
820
821 fputc(quote, fp);
822 for (i = 0; i < op->ob_size; i++) {
823 c = op->ob_sval[i];
824 if (c == quote || c == '\\')
825 fprintf(fp, "\\%c", c);
826 else if (c == '\t')
827 fprintf(fp, "\\t");
828 else if (c == '\n')
829 fprintf(fp, "\\n");
830 else if (c == '\r')
831 fprintf(fp, "\\r");
832 else if (c < ' ' || c >= 0x7f)
833 fprintf(fp, "\\x%02x", c & 0xff);
834 else
835 fputc(c, fp);
836 }
837 fputc(quote, fp);
838 return 0;
839}
840
841PyObject *
842PyString_Repr(PyObject *obj, int smartquotes)
843{
844 register PyStringObject* op = (PyStringObject*) obj;
845 size_t newsize = 2 + 4 * op->ob_size;
846 PyObject *v;
847 if (newsize > PY_SSIZE_T_MAX) {
848 PyErr_SetString(PyExc_OverflowError,
849 "string is too large to make repr");
850 }
851 v = PyString_FromStringAndSize((char *)NULL, newsize);
852 if (v == NULL) {
853 return NULL;
854 }
855 else {
856 register Py_ssize_t i;
857 register char c;
858 register char *p;
859 int quote;
860
861 /* figure out which quote to use; single is preferred */
862 quote = '\'';
863 if (smartquotes &&
864 memchr(op->ob_sval, '\'', op->ob_size) &&
865 !memchr(op->ob_sval, '"', op->ob_size))
866 quote = '"';
867
868 p = PyString_AS_STRING(v);
869 *p++ = quote;
870 for (i = 0; i < op->ob_size; i++) {
871 /* There's at least enough room for a hex escape
872 and a closing quote. */
873 assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
874 c = op->ob_sval[i];
875 if (c == quote || c == '\\')
876 *p++ = '\\', *p++ = c;
877 else if (c == '\t')
878 *p++ = '\\', *p++ = 't';
879 else if (c == '\n')
880 *p++ = '\\', *p++ = 'n';
881 else if (c == '\r')
882 *p++ = '\\', *p++ = 'r';
883 else if (c < ' ' || c >= 0x7f) {
884 /* For performance, we don't want to call
885 PyOS_snprintf here (extra layers of
886 function call). */
887 sprintf(p, "\\x%02x", c & 0xff);
888 p += 4;
889 }
890 else
891 *p++ = c;
892 }
893 assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
894 *p++ = quote;
895 *p = '\0';
896 _PyString_Resize(
897 &v, (p - PyString_AS_STRING(v)));
898 return v;
899 }
900}
901
902static PyObject *
903string_repr(PyObject *op)
904{
905 return PyString_Repr(op, 1);
906}
907
908static PyObject *
909string_str(PyObject *s)
910{
911 assert(PyString_Check(s));
912 if (PyString_CheckExact(s)) {
913 Py_INCREF(s);
914 return s;
915 }
916 else {
917 /* Subtype -- return genuine string with the same value. */
918 PyStringObject *t = (PyStringObject *) s;
919 return PyString_FromStringAndSize(t->ob_sval, t->ob_size);
920 }
921}
922
923static Py_ssize_t
924string_length(PyStringObject *a)
925{
926 return a->ob_size;
927}
928
929static PyObject *
930string_concat(register PyStringObject *a, register PyObject *bb)
931{
932 register Py_ssize_t size;
933 register PyStringObject *op;
934 if (!PyString_Check(bb)) {
935#ifdef Py_USING_UNICODE
936 if (PyUnicode_Check(bb))
937 return PyUnicode_Concat((PyObject *)a, bb);
938#endif
939 PyErr_Format(PyExc_TypeError,
940 "cannot concatenate 'str' and '%.200s' objects",
941 bb->ob_type->tp_name);
942 return NULL;
943 }
944#define b ((PyStringObject *)bb)
945 /* Optimize cases with empty left or right operand */
946 if ((a->ob_size == 0 || b->ob_size == 0) &&
947 PyString_CheckExact(a) && PyString_CheckExact(b)) {
948 if (a->ob_size == 0) {
949 Py_INCREF(bb);
950 return bb;
951 }
952 Py_INCREF(a);
953 return (PyObject *)a;
954 }
955 size = a->ob_size + b->ob_size;
956 if (size < 0) {
957 PyErr_SetString(PyExc_OverflowError,
958 "strings are too large to concat");
959 return NULL;
960 }
961
962 /* Inline PyObject_NewVar */
963 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject) + size);
964 if (op == NULL)
965 return PyErr_NoMemory();
966 PyObject_INIT_VAR(op, &PyString_Type, size);
967 op->ob_shash = -1;
968 op->ob_sstate = SSTATE_NOT_INTERNED;
969 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
970 Py_MEMCPY(op->ob_sval + a->ob_size, b->ob_sval, b->ob_size);
971 op->ob_sval[size] = '\0';
972 return (PyObject *) op;
973#undef b
974}
975
976static PyObject *
977string_repeat(register PyStringObject *a, register Py_ssize_t n)
978{
979 register Py_ssize_t i;
980 register Py_ssize_t j;
981 register Py_ssize_t size;
982 register PyStringObject *op;
983 size_t nbytes;
984 if (n < 0)
985 n = 0;
986 /* watch out for overflows: the size can overflow int,
987 * and the # of bytes needed can overflow size_t
988 */
989 size = a->ob_size * n;
990 if (n && size / n != a->ob_size) {
991 PyErr_SetString(PyExc_OverflowError,
992 "repeated string is too long");
993 return NULL;
994 }
995 if (size == a->ob_size && PyString_CheckExact(a)) {
996 Py_INCREF(a);
997 return (PyObject *)a;
998 }
999 nbytes = (size_t)size;
1000 if (nbytes + sizeof(PyStringObject) <= nbytes) {
1001 PyErr_SetString(PyExc_OverflowError,
1002 "repeated string is too long");
1003 return NULL;
1004 }
1005 op = (PyStringObject *)
1006 PyObject_MALLOC(sizeof(PyStringObject) + nbytes);
1007 if (op == NULL)
1008 return PyErr_NoMemory();
1009 PyObject_INIT_VAR(op, &PyString_Type, size);
1010 op->ob_shash = -1;
1011 op->ob_sstate = SSTATE_NOT_INTERNED;
1012 op->ob_sval[size] = '\0';
1013 if (a->ob_size == 1 && n > 0) {
1014 memset(op->ob_sval, a->ob_sval[0] , n);
1015 return (PyObject *) op;
1016 }
1017 i = 0;
1018 if (i < size) {
1019 Py_MEMCPY(op->ob_sval, a->ob_sval, a->ob_size);
1020 i = a->ob_size;
1021 }
1022 while (i < size) {
1023 j = (i <= size-i) ? i : size-i;
1024 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1025 i += j;
1026 }
1027 return (PyObject *) op;
1028}
1029
1030/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1031
1032static PyObject *
1033string_slice(register PyStringObject *a, register Py_ssize_t i,
1034 register Py_ssize_t j)
1035 /* j -- may be negative! */
1036{
1037 if (i < 0)
1038 i = 0;
1039 if (j < 0)
1040 j = 0; /* Avoid signed/unsigned bug in next line */
1041 if (j > a->ob_size)
1042 j = a->ob_size;
1043 if (i == 0 && j == a->ob_size && PyString_CheckExact(a)) {
1044 /* It's the same as a */
1045 Py_INCREF(a);
1046 return (PyObject *)a;
1047 }
1048 if (j < i)
1049 j = i;
1050 return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1051}
1052
1053static int
1054string_contains(PyObject *str_obj, PyObject *sub_obj)
1055{
1056 if (!PyString_CheckExact(sub_obj)) {
1057#ifdef Py_USING_UNICODE
1058 if (PyUnicode_Check(sub_obj))
1059 return PyUnicode_Contains(str_obj, sub_obj);
1060#endif
1061 if (!PyString_Check(sub_obj)) {
1062 PyErr_SetString(PyExc_TypeError,
1063 "'in <string>' requires string as left operand");
1064 return -1;
1065 }
1066 }
1067
1068 return stringlib_contains_obj(str_obj, sub_obj);
1069}
1070
1071static PyObject *
1072string_item(PyStringObject *a, register Py_ssize_t i)
1073{
1074 char pchar;
1075 PyObject *v;
1076 if (i < 0 || i >= a->ob_size) {
1077 PyErr_SetString(PyExc_IndexError, "string index out of range");
1078 return NULL;
1079 }
1080 pchar = a->ob_sval[i];
1081 v = (PyObject *)characters[pchar & UCHAR_MAX];
1082 if (v == NULL)
1083 v = PyString_FromStringAndSize(&pchar, 1);
1084 else {
1085#ifdef COUNT_ALLOCS
1086 one_strings++;
1087#endif
1088 Py_INCREF(v);
1089 }
1090 return v;
1091}
1092
1093static PyObject*
1094string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1095{
1096 int c;
1097 Py_ssize_t len_a, len_b;
1098 Py_ssize_t min_len;
1099 PyObject *result;
1100
1101 /* Make sure both arguments are strings. */
1102 if (!(PyString_Check(a) && PyString_Check(b))) {
1103 result = Py_NotImplemented;
1104 goto out;
1105 }
1106 if (a == b) {
1107 switch (op) {
1108 case Py_EQ:case Py_LE:case Py_GE:
1109 result = Py_True;
1110 goto out;
1111 case Py_NE:case Py_LT:case Py_GT:
1112 result = Py_False;
1113 goto out;
1114 }
1115 }
1116 if (op == Py_EQ) {
1117 /* Supporting Py_NE here as well does not save
1118 much time, since Py_NE is rarely used. */
1119 if (a->ob_size == b->ob_size
1120 && (a->ob_sval[0] == b->ob_sval[0]
1121 && memcmp(a->ob_sval, b->ob_sval,
1122 a->ob_size) == 0)) {
1123 result = Py_True;
1124 } else {
1125 result = Py_False;
1126 }
1127 goto out;
1128 }
1129 len_a = a->ob_size; len_b = b->ob_size;
1130 min_len = (len_a < len_b) ? len_a : len_b;
1131 if (min_len > 0) {
1132 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1133 if (c==0)
1134 c = memcmp(a->ob_sval, b->ob_sval, min_len);
1135 }else
1136 c = 0;
1137 if (c == 0)
1138 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1139 switch (op) {
1140 case Py_LT: c = c < 0; break;
1141 case Py_LE: c = c <= 0; break;
1142 case Py_EQ: assert(0); break; /* unreachable */
1143 case Py_NE: c = c != 0; break;
1144 case Py_GT: c = c > 0; break;
1145 case Py_GE: c = c >= 0; break;
1146 default:
1147 result = Py_NotImplemented;
1148 goto out;
1149 }
1150 result = c ? Py_True : Py_False;
1151 out:
1152 Py_INCREF(result);
1153 return result;
1154}
1155
1156int
1157_PyString_Eq(PyObject *o1, PyObject *o2)
1158{
1159 PyStringObject *a = (PyStringObject*) o1;
1160 PyStringObject *b = (PyStringObject*) o2;
1161 return a->ob_size == b->ob_size
1162 && *a->ob_sval == *b->ob_sval
1163 && memcmp(a->ob_sval, b->ob_sval, a->ob_size) == 0;
1164}
1165
1166static long
1167string_hash(PyStringObject *a)
1168{
1169 register Py_ssize_t len;
1170 register unsigned char *p;
1171 register long x;
1172
1173 if (a->ob_shash != -1)
1174 return a->ob_shash;
1175 len = a->ob_size;
1176 p = (unsigned char *) a->ob_sval;
1177 x = *p << 7;
1178 while (--len >= 0)
1179 x = (1000003*x) ^ *p++;
1180 x ^= a->ob_size;
1181 if (x == -1)
1182 x = -2;
1183 a->ob_shash = x;
1184 return x;
1185}
1186
1187static PyObject*
1188string_subscript(PyStringObject* self, PyObject* item)
1189{
1190 if (PyIndex_Check(item)) {
1191 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1192 if (i == -1 && PyErr_Occurred())
1193 return NULL;
1194 if (i < 0)
1195 i += PyString_GET_SIZE(self);
1196 return string_item(self, i);
1197 }
1198 else if (PySlice_Check(item)) {
1199 Py_ssize_t start, stop, step, slicelength, cur, i;
1200 char* source_buf;
1201 char* result_buf;
1202 PyObject* result;
1203
1204 if (PySlice_GetIndicesEx((PySliceObject*)item,
1205 PyString_GET_SIZE(self),
1206 &start, &stop, &step, &slicelength) < 0) {
1207 return NULL;
1208 }
1209
1210 if (slicelength <= 0) {
1211 return PyString_FromStringAndSize("", 0);
1212 }
1213 else {
1214 source_buf = PyString_AsString((PyObject*)self);
1215 result_buf = (char *)PyMem_Malloc(slicelength);
1216 if (result_buf == NULL)
1217 return PyErr_NoMemory();
1218
1219 for (cur = start, i = 0; i < slicelength;
1220 cur += step, i++) {
1221 result_buf[i] = source_buf[cur];
1222 }
1223
1224 result = PyString_FromStringAndSize(result_buf,
1225 slicelength);
1226 PyMem_Free(result_buf);
1227 return result;
1228 }
1229 }
1230 else {
1231 PyErr_SetString(PyExc_TypeError,
1232 "string indices must be integers");
1233 return NULL;
1234 }
1235}
1236
1237static Py_ssize_t
1238string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1239{
1240 if ( index != 0 ) {
1241 PyErr_SetString(PyExc_SystemError,
1242 "accessing non-existent string segment");
1243 return -1;
1244 }
1245 *ptr = (void *)self->ob_sval;
1246 return self->ob_size;
1247}
1248
1249static Py_ssize_t
1250string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1251{
1252 PyErr_SetString(PyExc_TypeError,
1253 "Cannot use string as modifiable buffer");
1254 return -1;
1255}
1256
1257static Py_ssize_t
1258string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1259{
1260 if ( lenp )
1261 *lenp = self->ob_size;
1262 return 1;
1263}
1264
1265static Py_ssize_t
1266string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1267{
1268 if ( index != 0 ) {
1269 PyErr_SetString(PyExc_SystemError,
1270 "accessing non-existent string segment");
1271 return -1;
1272 }
1273 *ptr = self->ob_sval;
1274 return self->ob_size;
1275}
1276
1277static PySequenceMethods string_as_sequence = {
1278 (lenfunc)string_length, /*sq_length*/
1279 (binaryfunc)string_concat, /*sq_concat*/
1280 (ssizeargfunc)string_repeat, /*sq_repeat*/
1281 (ssizeargfunc)string_item, /*sq_item*/
1282 (ssizessizeargfunc)string_slice, /*sq_slice*/
1283 0, /*sq_ass_item*/
1284 0, /*sq_ass_slice*/
1285 (objobjproc)string_contains /*sq_contains*/
1286};
1287
1288static PyMappingMethods string_as_mapping = {
1289 (lenfunc)string_length,
1290 (binaryfunc)string_subscript,
1291 0,
1292};
1293
1294static PyBufferProcs string_as_buffer = {
1295 (readbufferproc)string_buffer_getreadbuf,
1296 (writebufferproc)string_buffer_getwritebuf,
1297 (segcountproc)string_buffer_getsegcount,
1298 (charbufferproc)string_buffer_getcharbuf,
1299};
1300
1301
1302
1303
1304#define LEFTSTRIP 0
1305#define RIGHTSTRIP 1
1306#define BOTHSTRIP 2
1307
1308/* Arrays indexed by above */
1309static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1310
1311#define STRIPNAME(i) (stripformat[i]+3)
1312
1313
1314/* Don't call if length < 2 */
1315#define Py_STRING_MATCH(target, offset, pattern, length) \
1316 (target[offset] == pattern[0] && \
1317 target[offset+length-1] == pattern[length-1] && \
1318 !memcmp(target+offset+1, pattern+1, length-2) )
1319
1320
1321/* Overallocate the initial list to reduce the number of reallocs for small
1322 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three
1323 resizes, to sizes 4, 8, then 16. Most observed string splits are for human
1324 text (roughly 11 words per line) and field delimited data (usually 1-10
1325 fields). For large strings the split algorithms are bandwidth limited
1326 so increasing the preallocation likely will not improve things.*/
1327
1328#define MAX_PREALLOC 12
1329
1330/* 5 splits gives 6 elements */
1331#define PREALLOC_SIZE(maxsplit) \
1332 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
1333
1334#define SPLIT_APPEND(data, left, right) \
1335 str = PyString_FromStringAndSize((data) + (left), \
1336 (right) - (left)); \
1337 if (str == NULL) \
1338 goto onError; \
1339 if (PyList_Append(list, str)) { \
1340 Py_DECREF(str); \
1341 goto onError; \
1342 } \
1343 else \
1344 Py_DECREF(str);
1345
1346#define SPLIT_ADD(data, left, right) { \
1347 str = PyString_FromStringAndSize((data) + (left), \
1348 (right) - (left)); \
1349 if (str == NULL) \
1350 goto onError; \
1351 if (count < MAX_PREALLOC) { \
1352 PyList_SET_ITEM(list, count, str); \
1353 } else { \
1354 if (PyList_Append(list, str)) { \
1355 Py_DECREF(str); \
1356 goto onError; \
1357 } \
1358 else \
1359 Py_DECREF(str); \
1360 } \
1361 count++; }
1362
1363/* Always force the list to the expected size. */
1364#define FIX_PREALLOC_SIZE(list) ((PyListObject *)list)->ob_size = count
1365
1366#define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }
1367#define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }
1368#define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }
1369#define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }
1370
1371Py_LOCAL_INLINE(PyObject *)
1372split_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
1373{
1374 Py_ssize_t i, j, count=0;
1375 PyObject *str;
1376 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1377
1378 if (list == NULL)
1379 return NULL;
1380
1381 i = j = 0;
1382
1383 while (maxsplit-- > 0) {
1384 SKIP_SPACE(s, i, len);
1385 if (i==len) break;
1386 j = i; i++;
1387 SKIP_NONSPACE(s, i, len);
1388 SPLIT_ADD(s, j, i);
1389 }
1390
1391 if (i < len) {
1392 /* Only occurs when maxsplit was reached */
1393 /* Skip any remaining whitespace and copy to end of string */
1394 SKIP_SPACE(s, i, len);
1395 if (i != len)
1396 SPLIT_ADD(s, i, len);
1397 }
1398 FIX_PREALLOC_SIZE(list);
1399 return list;
1400 onError:
1401 Py_DECREF(list);
1402 return NULL;
1403}
1404
1405Py_LOCAL_INLINE(PyObject *)
1406split_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1407{
1408 register Py_ssize_t i, j, count=0;
1409 PyObject *str;
1410 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1411
1412 if (list == NULL)
1413 return NULL;
1414
1415 i = j = 0;
1416 while ((j < len) && (maxcount-- > 0)) {
1417 for(; j<len; j++) {
1418 /* I found that using memchr makes no difference */
1419 if (s[j] == ch) {
1420 SPLIT_ADD(s, i, j);
1421 i = j = j + 1;
1422 break;
1423 }
1424 }
1425 }
1426 if (i <= len) {
1427 SPLIT_ADD(s, i, len);
1428 }
1429 FIX_PREALLOC_SIZE(list);
1430 return list;
1431
1432 onError:
1433 Py_DECREF(list);
1434 return NULL;
1435}
1436
1437PyDoc_STRVAR(split__doc__,
1438"S.split([sep [,maxsplit]]) -> list of strings\n\
1439\n\
1440Return a list of the words in the string S, using sep as the\n\
1441delimiter string. If maxsplit is given, at most maxsplit\n\
1442splits are done. If sep is not specified or is None, any\n\
1443whitespace string is a separator.");
1444
1445static PyObject *
1446string_split(PyStringObject *self, PyObject *args)
1447{
1448 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1449 Py_ssize_t maxsplit = -1, count=0;
1450 const char *s = PyString_AS_STRING(self), *sub;
1451 PyObject *list, *str, *subobj = Py_None;
1452#ifdef USE_FAST
1453 Py_ssize_t pos;
1454#endif
1455
1456 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1457 return NULL;
1458 if (maxsplit < 0)
1459 maxsplit = PY_SSIZE_T_MAX;
1460 if (subobj == Py_None)
1461 return split_whitespace(s, len, maxsplit);
1462 if (PyString_Check(subobj)) {
1463 sub = PyString_AS_STRING(subobj);
1464 n = PyString_GET_SIZE(subobj);
1465 }
1466#ifdef Py_USING_UNICODE
1467 else if (PyUnicode_Check(subobj))
1468 return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1469#endif
1470 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1471 return NULL;
1472
1473 if (n == 0) {
1474 PyErr_SetString(PyExc_ValueError, "empty separator");
1475 return NULL;
1476 }
1477 else if (n == 1)
1478 return split_char(s, len, sub[0], maxsplit);
1479
1480 list = PyList_New(PREALLOC_SIZE(maxsplit));
1481 if (list == NULL)
1482 return NULL;
1483
1484#ifdef USE_FAST
1485 i = j = 0;
1486 while (maxsplit-- > 0) {
1487 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH);
1488 if (pos < 0)
1489 break;
1490 j = i+pos;
1491 SPLIT_ADD(s, i, j);
1492 i = j + n;
1493 }
1494#else
1495 i = j = 0;
1496 while ((j+n <= len) && (maxsplit-- > 0)) {
1497 for (; j+n <= len; j++) {
1498 if (Py_STRING_MATCH(s, j, sub, n)) {
1499 SPLIT_ADD(s, i, j);
1500 i = j = j + n;
1501 break;
1502 }
1503 }
1504 }
1505#endif
1506 SPLIT_ADD(s, i, len);
1507 FIX_PREALLOC_SIZE(list);
1508 return list;
1509
1510 onError:
1511 Py_DECREF(list);
1512 return NULL;
1513}
1514
1515PyDoc_STRVAR(partition__doc__,
1516"S.partition(sep) -> (head, sep, tail)\n\
1517\n\
1518Searches for the separator sep in S, and returns the part before it,\n\
1519the separator itself, and the part after it. If the separator is not\n\
1520found, returns S and two empty strings.");
1521
1522static PyObject *
1523string_partition(PyStringObject *self, PyObject *sep_obj)
1524{
1525 const char *sep;
1526 Py_ssize_t sep_len;
1527
1528 if (PyString_Check(sep_obj)) {
1529 sep = PyString_AS_STRING(sep_obj);
1530 sep_len = PyString_GET_SIZE(sep_obj);
1531 }
1532#ifdef Py_USING_UNICODE
1533 else if (PyUnicode_Check(sep_obj))
1534 return PyUnicode_Partition((PyObject *) self, sep_obj);
1535#endif
1536 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1537 return NULL;
1538
1539 return stringlib_partition(
1540 (PyObject*) self,
1541 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1542 sep_obj, sep, sep_len
1543 );
1544}
1545
1546PyDoc_STRVAR(rpartition__doc__,
1547"S.rpartition(sep) -> (tail, sep, head)\n\
1548\n\
1549Searches for the separator sep in S, starting at the end of S, and returns\n\
1550the part before it, the separator itself, and the part after it. If the\n\
1551separator is not found, returns two empty strings and S.");
1552
1553static PyObject *
1554string_rpartition(PyStringObject *self, PyObject *sep_obj)
1555{
1556 const char *sep;
1557 Py_ssize_t sep_len;
1558
1559 if (PyString_Check(sep_obj)) {
1560 sep = PyString_AS_STRING(sep_obj);
1561 sep_len = PyString_GET_SIZE(sep_obj);
1562 }
1563#ifdef Py_USING_UNICODE
1564 else if (PyUnicode_Check(sep_obj))
1565 return PyUnicode_Partition((PyObject *) self, sep_obj);
1566#endif
1567 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1568 return NULL;
1569
1570 return stringlib_rpartition(
1571 (PyObject*) self,
1572 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1573 sep_obj, sep, sep_len
1574 );
1575}
1576
1577Py_LOCAL_INLINE(PyObject *)
1578rsplit_whitespace(const char *s, Py_ssize_t len, Py_ssize_t maxsplit)
1579{
1580 Py_ssize_t i, j, count=0;
1581 PyObject *str;
1582 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));
1583
1584 if (list == NULL)
1585 return NULL;
1586
1587 i = j = len-1;
1588
1589 while (maxsplit-- > 0) {
1590 RSKIP_SPACE(s, i);
1591 if (i<0) break;
1592 j = i; i--;
1593 RSKIP_NONSPACE(s, i);
1594 SPLIT_ADD(s, i + 1, j + 1);
1595 }
1596 if (i >= 0) {
1597 /* Only occurs when maxsplit was reached */
1598 /* Skip any remaining whitespace and copy to beginning of string */
1599 RSKIP_SPACE(s, i);
1600 if (i >= 0)
1601 SPLIT_ADD(s, 0, i + 1);
1602
1603 }
1604 FIX_PREALLOC_SIZE(list);
1605 if (PyList_Reverse(list) < 0)
1606 goto onError;
1607 return list;
1608 onError:
1609 Py_DECREF(list);
1610 return NULL;
1611}
1612
1613Py_LOCAL_INLINE(PyObject *)
1614rsplit_char(const char *s, Py_ssize_t len, char ch, Py_ssize_t maxcount)
1615{
1616 register Py_ssize_t i, j, count=0;
1617 PyObject *str;
1618 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1619
1620 if (list == NULL)
1621 return NULL;
1622
1623 i = j = len - 1;
1624 while ((i >= 0) && (maxcount-- > 0)) {
1625 for (; i >= 0; i--) {
1626 if (s[i] == ch) {
1627 SPLIT_ADD(s, i + 1, j + 1);
1628 j = i = i - 1;
1629 break;
1630 }
1631 }
1632 }
1633 if (j >= -1) {
1634 SPLIT_ADD(s, 0, j + 1);
1635 }
1636 FIX_PREALLOC_SIZE(list);
1637 if (PyList_Reverse(list) < 0)
1638 goto onError;
1639 return list;
1640
1641 onError:
1642 Py_DECREF(list);
1643 return NULL;
1644}
1645
1646PyDoc_STRVAR(rsplit__doc__,
1647"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1648\n\
1649Return a list of the words in the string S, using sep as the\n\
1650delimiter string, starting at the end of the string and working\n\
1651to the front. If maxsplit is given, at most maxsplit splits are\n\
1652done. If sep is not specified or is None, any whitespace string\n\
1653is a separator.");
1654
1655static PyObject *
1656string_rsplit(PyStringObject *self, PyObject *args)
1657{
1658 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;
1659 Py_ssize_t maxsplit = -1, count=0;
1660 const char *s = PyString_AS_STRING(self), *sub;
1661 PyObject *list, *str, *subobj = Py_None;
1662
1663 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1664 return NULL;
1665 if (maxsplit < 0)
1666 maxsplit = PY_SSIZE_T_MAX;
1667 if (subobj == Py_None)
1668 return rsplit_whitespace(s, len, maxsplit);
1669 if (PyString_Check(subobj)) {
1670 sub = PyString_AS_STRING(subobj);
1671 n = PyString_GET_SIZE(subobj);
1672 }
1673#ifdef Py_USING_UNICODE
1674 else if (PyUnicode_Check(subobj))
1675 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1676#endif
1677 else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1678 return NULL;
1679
1680 if (n == 0) {
1681 PyErr_SetString(PyExc_ValueError, "empty separator");
1682 return NULL;
1683 }
1684 else if (n == 1)
1685 return rsplit_char(s, len, sub[0], maxsplit);
1686
1687 list = PyList_New(PREALLOC_SIZE(maxsplit));
1688 if (list == NULL)
1689 return NULL;
1690
1691 j = len;
1692 i = j - n;
1693
1694 while ( (i >= 0) && (maxsplit-- > 0) ) {
1695 for (; i>=0; i--) {
1696 if (Py_STRING_MATCH(s, i, sub, n)) {
1697 SPLIT_ADD(s, i + n, j);
1698 j = i;
1699 i -= n;
1700 break;
1701 }
1702 }
1703 }
1704 SPLIT_ADD(s, 0, j);
1705 FIX_PREALLOC_SIZE(list);
1706 if (PyList_Reverse(list) < 0)
1707 goto onError;
1708 return list;
1709
1710onError:
1711 Py_DECREF(list);
1712 return NULL;
1713}
1714
1715
1716PyDoc_STRVAR(join__doc__,
1717"S.join(sequence) -> string\n\
1718\n\
1719Return a string which is the concatenation of the strings in the\n\
1720sequence. The separator between elements is S.");
1721
1722static PyObject *
1723string_join(PyStringObject *self, PyObject *orig)
1724{
1725 char *sep = PyString_AS_STRING(self);
1726 const Py_ssize_t seplen = PyString_GET_SIZE(self);
1727 PyObject *res = NULL;
1728 char *p;
1729 Py_ssize_t seqlen = 0;
1730 size_t sz = 0;
1731 Py_ssize_t i;
1732 PyObject *seq, *item;
1733
1734 seq = PySequence_Fast(orig, "");
1735 if (seq == NULL) {
1736 return NULL;
1737 }
1738
1739 seqlen = PySequence_Size(seq);
1740 if (seqlen == 0) {
1741 Py_DECREF(seq);
1742 return PyString_FromString("");
1743 }
1744 if (seqlen == 1) {
1745 item = PySequence_Fast_GET_ITEM(seq, 0);
1746 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1747 Py_INCREF(item);
1748 Py_DECREF(seq);
1749 return item;
1750 }
1751 }
1752
1753 /* There are at least two things to join, or else we have a subclass
1754 * of the builtin types in the sequence.
1755 * Do a pre-pass to figure out the total amount of space we'll
1756 * need (sz), see whether any argument is absurd, and defer to
1757 * the Unicode join if appropriate.
1758 */
1759 for (i = 0; i < seqlen; i++) {
1760 const size_t old_sz = sz;
1761 item = PySequence_Fast_GET_ITEM(seq, i);
1762 if (!PyString_Check(item)){
1763#ifdef Py_USING_UNICODE
1764 if (PyUnicode_Check(item)) {
1765 /* Defer to Unicode join.
1766 * CAUTION: There's no gurantee that the
1767 * original sequence can be iterated over
1768 * again, so we must pass seq here.
1769 */
1770 PyObject *result;
1771 result = PyUnicode_Join((PyObject *)self, seq);
1772 Py_DECREF(seq);
1773 return result;
1774 }
1775#endif
1776 PyErr_Format(PyExc_TypeError,
1777 "sequence item %zd: expected string,"
1778 " %.80s found",
1779 i, item->ob_type->tp_name);
1780 Py_DECREF(seq);
1781 return NULL;
1782 }
1783 sz += PyString_GET_SIZE(item);
1784 if (i != 0)
1785 sz += seplen;
1786 if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1787 PyErr_SetString(PyExc_OverflowError,
1788 "join() result is too long for a Python string");
1789 Py_DECREF(seq);
1790 return NULL;
1791 }
1792 }
1793
1794 /* Allocate result space. */
1795 res = PyString_FromStringAndSize((char*)NULL, sz);
1796 if (res == NULL) {
1797 Py_DECREF(seq);
1798 return NULL;
1799 }
1800
1801 /* Catenate everything. */
1802 p = PyString_AS_STRING(res);
1803 for (i = 0; i < seqlen; ++i) {
1804 size_t n;
1805 item = PySequence_Fast_GET_ITEM(seq, i);
1806 n = PyString_GET_SIZE(item);
1807 Py_MEMCPY(p, PyString_AS_STRING(item), n);
1808 p += n;
1809 if (i < seqlen - 1) {
1810 Py_MEMCPY(p, sep, seplen);
1811 p += seplen;
1812 }
1813 }
1814
1815 Py_DECREF(seq);
1816 return res;
1817}
1818
1819PyObject *
1820_PyString_Join(PyObject *sep, PyObject *x)
1821{
1822 assert(sep != NULL && PyString_Check(sep));
1823 assert(x != NULL);
1824 return string_join((PyStringObject *)sep, x);
1825}
1826
1827Py_LOCAL_INLINE(void)
1828string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len)
1829{
1830 if (*end > len)
1831 *end = len;
1832 else if (*end < 0)
1833 *end += len;
1834 if (*end < 0)
1835 *end = 0;
1836 if (*start < 0)
1837 *start += len;
1838 if (*start < 0)
1839 *start = 0;
1840}
1841
1842Py_LOCAL_INLINE(Py_ssize_t)
1843string_find_internal(PyStringObject *self, PyObject *args, int dir)
1844{
1845 PyObject *subobj;
1846 const char *sub;
1847 Py_ssize_t sub_len;
1848 Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1849
1850 if (!PyArg_ParseTuple(args, "O|O&O&:find/rfind/index/rindex", &subobj,
1851 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
1852 return -2;
1853 if (PyString_Check(subobj)) {
1854 sub = PyString_AS_STRING(subobj);
1855 sub_len = PyString_GET_SIZE(subobj);
1856 }
1857#ifdef Py_USING_UNICODE
1858 else if (PyUnicode_Check(subobj))
1859 return PyUnicode_Find(
1860 (PyObject *)self, subobj, start, end, dir);
1861#endif
1862 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1863 /* XXX - the "expected a character buffer object" is pretty
1864 confusing for a non-expert. remap to something else ? */
1865 return -2;
1866
1867 if (dir > 0)
1868 return stringlib_find_slice(
1869 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1870 sub, sub_len, start, end);
1871 else
1872 return stringlib_rfind_slice(
1873 PyString_AS_STRING(self), PyString_GET_SIZE(self),
1874 sub, sub_len, start, end);
1875}
1876
1877
1878PyDoc_STRVAR(find__doc__,
1879"S.find(sub [,start [,end]]) -> int\n\
1880\n\
1881Return the lowest index in S where substring sub is found,\n\
1882such that sub is contained within s[start,end]. Optional\n\
1883arguments start and end are interpreted as in slice notation.\n\
1884\n\
1885Return -1 on failure.");
1886
1887static PyObject *
1888string_find(PyStringObject *self, PyObject *args)
1889{
1890 Py_ssize_t result = string_find_internal(self, args, +1);
1891 if (result == -2)
1892 return NULL;
1893 return PyInt_FromSsize_t(result);
1894}
1895
1896
1897PyDoc_STRVAR(index__doc__,
1898"S.index(sub [,start [,end]]) -> int\n\
1899\n\
1900Like S.find() but raise ValueError when the substring is not found.");
1901
1902static PyObject *
1903string_index(PyStringObject *self, PyObject *args)
1904{
1905 Py_ssize_t result = string_find_internal(self, args, +1);
1906 if (result == -2)
1907 return NULL;
1908 if (result == -1) {
1909 PyErr_SetString(PyExc_ValueError,
1910 "substring not found");
1911 return NULL;
1912 }
1913 return PyInt_FromSsize_t(result);
1914}
1915
1916
1917PyDoc_STRVAR(rfind__doc__,
1918"S.rfind(sub [,start [,end]]) -> int\n\
1919\n\
1920Return the highest index in S where substring sub is found,\n\
1921such that sub is contained within s[start,end]. Optional\n\
1922arguments start and end are interpreted as in slice notation.\n\
1923\n\
1924Return -1 on failure.");
1925
1926static PyObject *
1927string_rfind(PyStringObject *self, PyObject *args)
1928{
1929 Py_ssize_t result = string_find_internal(self, args, -1);
1930 if (result == -2)
1931 return NULL;
1932 return PyInt_FromSsize_t(result);
1933}
1934
1935
1936PyDoc_STRVAR(rindex__doc__,
1937"S.rindex(sub [,start [,end]]) -> int\n\
1938\n\
1939Like S.rfind() but raise ValueError when the substring is not found.");
1940
1941static PyObject *
1942string_rindex(PyStringObject *self, PyObject *args)
1943{
1944 Py_ssize_t result = string_find_internal(self, args, -1);
1945 if (result == -2)
1946 return NULL;
1947 if (result == -1) {
1948 PyErr_SetString(PyExc_ValueError,
1949 "substring not found");
1950 return NULL;
1951 }
1952 return PyInt_FromSsize_t(result);
1953}
1954
1955
1956Py_LOCAL_INLINE(PyObject *)
1957do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1958{
1959 char *s = PyString_AS_STRING(self);
1960 Py_ssize_t len = PyString_GET_SIZE(self);
1961 char *sep = PyString_AS_STRING(sepobj);
1962 Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1963 Py_ssize_t i, j;
1964
1965 i = 0;
1966 if (striptype != RIGHTSTRIP) {
1967 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1968 i++;
1969 }
1970 }
1971
1972 j = len;
1973 if (striptype != LEFTSTRIP) {
1974 do {
1975 j--;
1976 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1977 j++;
1978 }
1979
1980 if (i == 0 && j == len && PyString_CheckExact(self)) {
1981 Py_INCREF(self);
1982 return (PyObject*)self;
1983 }
1984 else
1985 return PyString_FromStringAndSize(s+i, j-i);
1986}
1987
1988
1989Py_LOCAL_INLINE(PyObject *)
1990do_strip(PyStringObject *self, int striptype)
1991{
1992 char *s = PyString_AS_STRING(self);
1993 Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1994
1995 i = 0;
1996 if (striptype != RIGHTSTRIP) {
1997 while (i < len && isspace(Py_CHARMASK(s[i]))) {
1998 i++;
1999 }
2000 }
2001
2002 j = len;
2003 if (striptype != LEFTSTRIP) {
2004 do {
2005 j--;
2006 } while (j >= i && isspace(Py_CHARMASK(s[j])));
2007 j++;
2008 }
2009
2010 if (i == 0 && j == len && PyString_CheckExact(self)) {
2011 Py_INCREF(self);
2012 return (PyObject*)self;
2013 }
2014 else
2015 return PyString_FromStringAndSize(s+i, j-i);
2016}
2017
2018
2019Py_LOCAL_INLINE(PyObject *)
2020do_argstrip(PyStringObject *self, int striptype, PyObject *args)
2021{
2022 PyObject *sep = NULL;
2023
2024 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
2025 return NULL;
2026
2027 if (sep != NULL && sep != Py_None) {
2028 if (PyString_Check(sep))
2029 return do_xstrip(self, striptype, sep);
2030#ifdef Py_USING_UNICODE
2031 else if (PyUnicode_Check(sep)) {
2032 PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
2033 PyObject *res;
2034 if (uniself==NULL)
2035 return NULL;
2036 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
2037 striptype, sep);
2038 Py_DECREF(uniself);
2039 return res;
2040 }
2041#endif
2042 PyErr_Format(PyExc_TypeError,
2043#ifdef Py_USING_UNICODE
2044 "%s arg must be None, str or unicode",
2045#else
2046 "%s arg must be None or str",
2047#endif
2048 STRIPNAME(striptype));
2049 return NULL;
2050 }
2051
2052 return do_strip(self, striptype);
2053}
2054
2055
2056PyDoc_STRVAR(strip__doc__,
2057"S.strip([chars]) -> string or unicode\n\
2058\n\
2059Return a copy of the string S with leading and trailing\n\
2060whitespace removed.\n\
2061If chars is given and not None, remove characters in chars instead.\n\
2062If chars is unicode, S will be converted to unicode before stripping");
2063
2064static PyObject *
2065string_strip(PyStringObject *self, PyObject *args)
2066{
2067 if (PyTuple_GET_SIZE(args) == 0)
2068 return do_strip(self, BOTHSTRIP); /* Common case */
2069 else
2070 return do_argstrip(self, BOTHSTRIP, args);
2071}
2072
2073
2074PyDoc_STRVAR(lstrip__doc__,
2075"S.lstrip([chars]) -> string or unicode\n\
2076\n\
2077Return a copy of the string S with leading whitespace removed.\n\
2078If chars is given and not None, remove characters in chars instead.\n\
2079If chars is unicode, S will be converted to unicode before stripping");
2080
2081static PyObject *
2082string_lstrip(PyStringObject *self, PyObject *args)
2083{
2084 if (PyTuple_GET_SIZE(args) == 0)
2085 return do_strip(self, LEFTSTRIP); /* Common case */
2086 else
2087 return do_argstrip(self, LEFTSTRIP, args);
2088}
2089
2090
2091PyDoc_STRVAR(rstrip__doc__,
2092"S.rstrip([chars]) -> string or unicode\n\
2093\n\
2094Return a copy of the string S with trailing whitespace removed.\n\
2095If chars is given and not None, remove characters in chars instead.\n\
2096If chars is unicode, S will be converted to unicode before stripping");
2097
2098static PyObject *
2099string_rstrip(PyStringObject *self, PyObject *args)
2100{
2101 if (PyTuple_GET_SIZE(args) == 0)
2102 return do_strip(self, RIGHTSTRIP); /* Common case */
2103 else
2104 return do_argstrip(self, RIGHTSTRIP, args);
2105}
2106
2107
2108PyDoc_STRVAR(lower__doc__,
2109"S.lower() -> string\n\
2110\n\
2111Return a copy of the string S converted to lowercase.");
2112
2113/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
2114#ifndef _tolower
2115#define _tolower tolower
2116#endif
2117
2118static PyObject *
2119string_lower(PyStringObject *self)
2120{
2121 char *s;
2122 Py_ssize_t i, n = PyString_GET_SIZE(self);
2123 PyObject *newobj;
2124
2125 newobj = PyString_FromStringAndSize(NULL, n);
2126 if (!newobj)
2127 return NULL;
2128
2129 s = PyString_AS_STRING(newobj);
2130
2131 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2132
2133 for (i = 0; i < n; i++) {
2134 int c = Py_CHARMASK(s[i]);
2135 if (isupper(c))
2136 s[i] = _tolower(c);
2137 }
2138
2139 return newobj;
2140}
2141
2142PyDoc_STRVAR(upper__doc__,
2143"S.upper() -> string\n\
2144\n\
2145Return a copy of the string S converted to uppercase.");
2146
2147#ifndef _toupper
2148#define _toupper toupper
2149#endif
2150
2151static PyObject *
2152string_upper(PyStringObject *self)
2153{
2154 char *s;
2155 Py_ssize_t i, n = PyString_GET_SIZE(self);
2156 PyObject *newobj;
2157
2158 newobj = PyString_FromStringAndSize(NULL, n);
2159 if (!newobj)
2160 return NULL;
2161
2162 s = PyString_AS_STRING(newobj);
2163
2164 Py_MEMCPY(s, PyString_AS_STRING(self), n);
2165
2166 for (i = 0; i < n; i++) {
2167 int c = Py_CHARMASK(s[i]);
2168 if (islower(c))
2169 s[i] = _toupper(c);
2170 }
2171
2172 return newobj;
2173}
2174
2175PyDoc_STRVAR(title__doc__,
2176"S.title() -> string\n\
2177\n\
2178Return a titlecased version of S, i.e. words start with uppercase\n\
2179characters, all remaining cased characters have lowercase.");
2180
2181static PyObject*
2182string_title(PyStringObject *self)
2183{
2184 char *s = PyString_AS_STRING(self), *s_new;
2185 Py_ssize_t i, n = PyString_GET_SIZE(self);
2186 int previous_is_cased = 0;
2187 PyObject *newobj;
2188
2189 newobj = PyString_FromStringAndSize(NULL, n);
2190 if (newobj == NULL)
2191 return NULL;
2192 s_new = PyString_AsString(newobj);
2193 for (i = 0; i < n; i++) {
2194 int c = Py_CHARMASK(*s++);
2195 if (islower(c)) {
2196 if (!previous_is_cased)
2197 c = toupper(c);
2198 previous_is_cased = 1;
2199 } else if (isupper(c)) {
2200 if (previous_is_cased)
2201 c = tolower(c);
2202 previous_is_cased = 1;
2203 } else
2204 previous_is_cased = 0;
2205 *s_new++ = c;
2206 }
2207 return newobj;
2208}
2209
2210PyDoc_STRVAR(capitalize__doc__,
2211"S.capitalize() -> string\n\
2212\n\
2213Return a copy of the string S with only its first character\n\
2214capitalized.");
2215
2216static PyObject *
2217string_capitalize(PyStringObject *self)
2218{
2219 char *s = PyString_AS_STRING(self), *s_new;
2220 Py_ssize_t i, n = PyString_GET_SIZE(self);
2221 PyObject *newobj;
2222
2223 newobj = PyString_FromStringAndSize(NULL, n);
2224 if (newobj == NULL)
2225 return NULL;
2226 s_new = PyString_AsString(newobj);
2227 if (0 < n) {
2228 int c = Py_CHARMASK(*s++);
2229 if (islower(c))
2230 *s_new = toupper(c);
2231 else
2232 *s_new = c;
2233 s_new++;
2234 }
2235 for (i = 1; i < n; i++) {
2236 int c = Py_CHARMASK(*s++);
2237 if (isupper(c))
2238 *s_new = tolower(c);
2239 else
2240 *s_new = c;
2241 s_new++;
2242 }
2243 return newobj;
2244}
2245
2246
2247PyDoc_STRVAR(count__doc__,
2248"S.count(sub[, start[, end]]) -> int\n\
2249\n\
2250Return the number of non-overlapping occurrences of substring sub in\n\
2251string S[start:end]. Optional arguments start and end are interpreted\n\
2252as in slice notation.");
2253
2254static PyObject *
2255string_count(PyStringObject *self, PyObject *args)
2256{
2257 PyObject *sub_obj;
2258 const char *str = PyString_AS_STRING(self), *sub;
2259 Py_ssize_t sub_len;
2260 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2261
2262 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj,
2263 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
2264 return NULL;
2265
2266 if (PyString_Check(sub_obj)) {
2267 sub = PyString_AS_STRING(sub_obj);
2268 sub_len = PyString_GET_SIZE(sub_obj);
2269 }
2270#ifdef Py_USING_UNICODE
2271 else if (PyUnicode_Check(sub_obj)) {
2272 Py_ssize_t count;
2273 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2274 if (count == -1)
2275 return NULL;
2276 else
2277 return PyInt_FromSsize_t(count);
2278 }
2279#endif
2280 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2281 return NULL;
2282
2283 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));
2284
2285 return PyInt_FromSsize_t(
2286 stringlib_count(str + start, end - start, sub, sub_len)
2287 );
2288}
2289
2290PyDoc_STRVAR(swapcase__doc__,
2291"S.swapcase() -> string\n\
2292\n\
2293Return a copy of the string S with uppercase characters\n\
2294converted to lowercase and vice versa.");
2295
2296static PyObject *
2297string_swapcase(PyStringObject *self)
2298{
2299 char *s = PyString_AS_STRING(self), *s_new;
2300 Py_ssize_t i, n = PyString_GET_SIZE(self);
2301 PyObject *newobj;
2302
2303 newobj = PyString_FromStringAndSize(NULL, n);
2304 if (newobj == NULL)
2305 return NULL;
2306 s_new = PyString_AsString(newobj);
2307 for (i = 0; i < n; i++) {
2308 int c = Py_CHARMASK(*s++);
2309 if (islower(c)) {
2310 *s_new = toupper(c);
2311 }
2312 else if (isupper(c)) {
2313 *s_new = tolower(c);
2314 }
2315 else
2316 *s_new = c;
2317 s_new++;
2318 }
2319 return newobj;
2320}
2321
2322
2323PyDoc_STRVAR(translate__doc__,
2324"S.translate(table [,deletechars]) -> string\n\
2325\n\
2326Return a copy of the string S, where all characters occurring\n\
2327in the optional argument deletechars are removed, and the\n\
2328remaining characters have been mapped through the given\n\
2329translation table, which must be a string of length 256.");
2330
2331static PyObject *
2332string_translate(PyStringObject *self, PyObject *args)
2333{
2334 register char *input, *output;
2335 register const char *table;
2336 register Py_ssize_t i, c, changed = 0;
2337 PyObject *input_obj = (PyObject*)self;
2338 const char *table1, *output_start, *del_table=NULL;
2339 Py_ssize_t inlen, tablen, dellen = 0;
2340 PyObject *result;
2341 int trans_table[256];
2342 PyObject *tableobj, *delobj = NULL;
2343
2344 if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2345 &tableobj, &delobj))
2346 return NULL;
2347
2348 if (PyString_Check(tableobj)) {
2349 table1 = PyString_AS_STRING(tableobj);
2350 tablen = PyString_GET_SIZE(tableobj);
2351 }
2352#ifdef Py_USING_UNICODE
2353 else if (PyUnicode_Check(tableobj)) {
2354 /* Unicode .translate() does not support the deletechars
2355 parameter; instead a mapping to None will cause characters
2356 to be deleted. */
2357 if (delobj != NULL) {
2358 PyErr_SetString(PyExc_TypeError,
2359 "deletions are implemented differently for unicode");
2360 return NULL;
2361 }
2362 return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2363 }
2364#endif
2365 else if (PyObject_AsCharBuffer(tableobj, &table1, &tablen))
2366 return NULL;
2367
2368 if (tablen != 256) {
2369 PyErr_SetString(PyExc_ValueError,
2370 "translation table must be 256 characters long");
2371 return NULL;
2372 }
2373
2374 if (delobj != NULL) {
2375 if (PyString_Check(delobj)) {
2376 del_table = PyString_AS_STRING(delobj);
2377 dellen = PyString_GET_SIZE(delobj);
2378 }
2379#ifdef Py_USING_UNICODE
2380 else if (PyUnicode_Check(delobj)) {
2381 PyErr_SetString(PyExc_TypeError,
2382 "deletions are implemented differently for unicode");
2383 return NULL;
2384 }
2385#endif
2386 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2387 return NULL;
2388 }
2389 else {
2390 del_table = NULL;
2391 dellen = 0;
2392 }
2393
2394 table = table1;
2395 inlen = PyString_GET_SIZE(input_obj);
2396 result = PyString_FromStringAndSize((char *)NULL, inlen);
2397 if (result == NULL)
2398 return NULL;
2399 output_start = output = PyString_AsString(result);
2400 input = PyString_AS_STRING(input_obj);
2401
2402 if (dellen == 0) {
2403 /* If no deletions are required, use faster code */
2404 for (i = inlen; --i >= 0; ) {
2405 c = Py_CHARMASK(*input++);
2406 if (Py_CHARMASK((*output++ = table[c])) != c)
2407 changed = 1;
2408 }
2409 if (changed || !PyString_CheckExact(input_obj))
2410 return result;
2411 Py_DECREF(result);
2412 Py_INCREF(input_obj);
2413 return input_obj;
2414 }
2415
2416 for (i = 0; i < 256; i++)
2417 trans_table[i] = Py_CHARMASK(table[i]);
2418
2419 for (i = 0; i < dellen; i++)
2420 trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2421
2422 for (i = inlen; --i >= 0; ) {
2423 c = Py_CHARMASK(*input++);
2424 if (trans_table[c] != -1)
2425 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2426 continue;
2427 changed = 1;
2428 }
2429 if (!changed && PyString_CheckExact(input_obj)) {
2430 Py_DECREF(result);
2431 Py_INCREF(input_obj);
2432 return input_obj;
2433 }
2434 /* Fix the size of the resulting string */
2435 if (inlen > 0)
2436 _PyString_Resize(&result, output - output_start);
2437 return result;
2438}
2439
2440
2441#define FORWARD 1
2442#define REVERSE -1
2443
2444/* find and count characters and substrings */
2445
2446#define findchar(target, target_len, c) \
2447 ((char *)memchr((const void *)(target), c, target_len))
2448
2449/* String ops must return a string. */
2450/* If the object is subclass of string, create a copy */
2451Py_LOCAL(PyStringObject *)
2452return_self(PyStringObject *self)
2453{
2454 if (PyString_CheckExact(self)) {
2455 Py_INCREF(self);
2456 return self;
2457 }
2458 return (PyStringObject *)PyString_FromStringAndSize(
2459 PyString_AS_STRING(self),
2460 PyString_GET_SIZE(self));
2461}
2462
2463Py_LOCAL_INLINE(Py_ssize_t)
2464countchar(const char *target, int target_len, char c, Py_ssize_t maxcount)
2465{
2466 Py_ssize_t count=0;
2467 const char *start=target;
2468 const char *end=target+target_len;
2469
2470 while ( (start=findchar(start, end-start, c)) != NULL ) {
2471 count++;
2472 if (count >= maxcount)
2473 break;
2474 start += 1;
2475 }
2476 return count;
2477}
2478
2479Py_LOCAL(Py_ssize_t)
2480findstring(const char *target, Py_ssize_t target_len,
2481 const char *pattern, Py_ssize_t pattern_len,
2482 Py_ssize_t start,
2483 Py_ssize_t end,
2484 int direction)
2485{
2486 if (start < 0) {
2487 start += target_len;
2488 if (start < 0)
2489 start = 0;
2490 }
2491 if (end > target_len) {
2492 end = target_len;
2493 } else if (end < 0) {
2494 end += target_len;
2495 if (end < 0)
2496 end = 0;
2497 }
2498
2499 /* zero-length substrings always match at the first attempt */
2500 if (pattern_len == 0)
2501 return (direction > 0) ? start : end;
2502
2503 end -= pattern_len;
2504
2505 if (direction < 0) {
2506 for (; end >= start; end--)
2507 if (Py_STRING_MATCH(target, end, pattern, pattern_len))
2508 return end;
2509 } else {
2510 for (; start <= end; start++)
2511 if (Py_STRING_MATCH(target, start, pattern, pattern_len))
2512 return start;
2513 }
2514 return -1;
2515}
2516
2517Py_LOCAL_INLINE(Py_ssize_t)
2518countstring(const char *target, Py_ssize_t target_len,
2519 const char *pattern, Py_ssize_t pattern_len,
2520 Py_ssize_t start,
2521 Py_ssize_t end,
2522 int direction, Py_ssize_t maxcount)
2523{
2524 Py_ssize_t count=0;
2525
2526 if (start < 0) {
2527 start += target_len;
2528 if (start < 0)
2529 start = 0;
2530 }
2531 if (end > target_len) {
2532 end = target_len;
2533 } else if (end < 0) {
2534 end += target_len;
2535 if (end < 0)
2536 end = 0;
2537 }
2538
2539 /* zero-length substrings match everywhere */
2540 if (pattern_len == 0 || maxcount == 0) {
2541 if (target_len+1 < maxcount)
2542 return target_len+1;
2543 return maxcount;
2544 }
2545
2546 end -= pattern_len;
2547 if (direction < 0) {
2548 for (; (end >= start); end--)
2549 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) {
2550 count++;
2551 if (--maxcount <= 0) break;
2552 end -= pattern_len-1;
2553 }
2554 } else {
2555 for (; (start <= end); start++)
2556 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) {
2557 count++;
2558 if (--maxcount <= 0)
2559 break;
2560 start += pattern_len-1;
2561 }
2562 }
2563 return count;
2564}
2565
2566
2567/* Algorithms for different cases of string replacement */
2568
2569/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2570Py_LOCAL(PyStringObject *)
2571replace_interleave(PyStringObject *self,
2572 const char *to_s, Py_ssize_t to_len,
2573 Py_ssize_t maxcount)
2574{
2575 char *self_s, *result_s;
2576 Py_ssize_t self_len, result_len;
2577 Py_ssize_t count, i, product;
2578 PyStringObject *result;
2579
2580 self_len = PyString_GET_SIZE(self);
2581
2582 /* 1 at the end plus 1 after every character */
2583 count = self_len+1;
2584 if (maxcount < count)
2585 count = maxcount;
2586
2587 /* Check for overflow */
2588 /* result_len = count * to_len + self_len; */
2589 product = count * to_len;
2590 if (product / to_len != count) {
2591 PyErr_SetString(PyExc_OverflowError,
2592 "replace string is too long");
2593 return NULL;
2594 }
2595 result_len = product + self_len;
2596 if (result_len < 0) {
2597 PyErr_SetString(PyExc_OverflowError,
2598 "replace string is too long");
2599 return NULL;
2600 }
2601
2602 if (! (result = (PyStringObject *)
2603 PyString_FromStringAndSize(NULL, result_len)) )
2604 return NULL;
2605
2606 self_s = PyString_AS_STRING(self);
2607 result_s = PyString_AS_STRING(result);
2608
2609 /* TODO: special case single character, which doesn't need memcpy */
2610
2611 /* Lay the first one down (guaranteed this will occur) */
2612 Py_MEMCPY(result_s, to_s, to_len);
2613 result_s += to_len;
2614 count -= 1;
2615
2616 for (i=0; i<count; i++) {
2617 *result_s++ = *self_s++;
2618 Py_MEMCPY(result_s, to_s, to_len);
2619 result_s += to_len;
2620 }
2621
2622 /* Copy the rest of the original string */
2623 Py_MEMCPY(result_s, self_s, self_len-i);
2624
2625 return result;
2626}
2627
2628/* Special case for deleting a single character */
2629/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2630Py_LOCAL(PyStringObject *)
2631replace_delete_single_character(PyStringObject *self,
2632 char from_c, Py_ssize_t maxcount)
2633{
2634 char *self_s, *result_s;
2635 char *start, *next, *end;
2636 Py_ssize_t self_len, result_len;
2637 Py_ssize_t count;
2638 PyStringObject *result;
2639
2640 self_len = PyString_GET_SIZE(self);
2641 self_s = PyString_AS_STRING(self);
2642
2643 count = countchar(self_s, self_len, from_c, maxcount);
2644 if (count == 0) {
2645 return return_self(self);
2646 }
2647
2648 result_len = self_len - count; /* from_len == 1 */
2649 assert(result_len>=0);
2650
2651 if ( (result = (PyStringObject *)
2652 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2653 return NULL;
2654 result_s = PyString_AS_STRING(result);
2655
2656 start = self_s;
2657 end = self_s + self_len;
2658 while (count-- > 0) {
2659 next = findchar(start, end-start, from_c);
2660 if (next == NULL)
2661 break;
2662 Py_MEMCPY(result_s, start, next-start);
2663 result_s += (next-start);
2664 start = next+1;
2665 }
2666 Py_MEMCPY(result_s, start, end-start);
2667
2668 return result;
2669}
2670
2671/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2672
2673Py_LOCAL(PyStringObject *)
2674replace_delete_substring(PyStringObject *self,
2675 const char *from_s, Py_ssize_t from_len,
2676 Py_ssize_t maxcount) {
2677 char *self_s, *result_s;
2678 char *start, *next, *end;
2679 Py_ssize_t self_len, result_len;
2680 Py_ssize_t count, offset;
2681 PyStringObject *result;
2682
2683 self_len = PyString_GET_SIZE(self);
2684 self_s = PyString_AS_STRING(self);
2685
2686 count = countstring(self_s, self_len,
2687 from_s, from_len,
2688 0, self_len, 1,
2689 maxcount);
2690
2691 if (count == 0) {
2692 /* no matches */
2693 return return_self(self);
2694 }
2695
2696 result_len = self_len - (count * from_len);
2697 assert (result_len>=0);
2698
2699 if ( (result = (PyStringObject *)
2700 PyString_FromStringAndSize(NULL, result_len)) == NULL )
2701 return NULL;
2702
2703 result_s = PyString_AS_STRING(result);
2704
2705 start = self_s;
2706 end = self_s + self_len;
2707 while (count-- > 0) {
2708 offset = findstring(start, end-start,
2709 from_s, from_len,
2710 0, end-start, FORWARD);
2711 if (offset == -1)
2712 break;
2713 next = start + offset;
2714
2715 Py_MEMCPY(result_s, start, next-start);
2716
2717 result_s += (next-start);
2718 start = next+from_len;
2719 }
2720 Py_MEMCPY(result_s, start, end-start);
2721 return result;
2722}
2723
2724/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2725Py_LOCAL(PyStringObject *)
2726replace_single_character_in_place(PyStringObject *self,
2727 char from_c, char to_c,
2728 Py_ssize_t maxcount)
2729{
2730 char *self_s, *result_s, *start, *end, *next;
2731 Py_ssize_t self_len;
2732 PyStringObject *result;
2733
2734 /* The result string will be the same size */
2735 self_s = PyString_AS_STRING(self);
2736 self_len = PyString_GET_SIZE(self);
2737
2738 next = findchar(self_s, self_len, from_c);
2739
2740 if (next == NULL) {
2741 /* No matches; return the original string */
2742 return return_self(self);
2743 }
2744
2745 /* Need to make a new string */
2746 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2747 if (result == NULL)
2748 return NULL;
2749 result_s = PyString_AS_STRING(result);
2750 Py_MEMCPY(result_s, self_s, self_len);
2751
2752 /* change everything in-place, starting with this one */
2753 start = result_s + (next-self_s);
2754 *start = to_c;
2755 start++;
2756 end = result_s + self_len;
2757
2758 while (--maxcount > 0) {
2759 next = findchar(start, end-start, from_c);
2760 if (next == NULL)
2761 break;
2762 *next = to_c;
2763 start = next+1;
2764 }
2765
2766 return result;
2767}
2768
2769/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2770Py_LOCAL(PyStringObject *)
2771replace_substring_in_place(PyStringObject *self,
2772 const char *from_s, Py_ssize_t from_len,
2773 const char *to_s, Py_ssize_t to_len,
2774 Py_ssize_t maxcount)
2775{
2776 char *result_s, *start, *end;
2777 char *self_s;
2778 Py_ssize_t self_len, offset;
2779 PyStringObject *result;
2780
2781 /* The result string will be the same size */
2782
2783 self_s = PyString_AS_STRING(self);
2784 self_len = PyString_GET_SIZE(self);
2785
2786 offset = findstring(self_s, self_len,
2787 from_s, from_len,
2788 0, self_len, FORWARD);
2789 if (offset == -1) {
2790 /* No matches; return the original string */
2791 return return_self(self);
2792 }
2793
2794 /* Need to make a new string */
2795 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2796 if (result == NULL)
2797 return NULL;
2798 result_s = PyString_AS_STRING(result);
2799 Py_MEMCPY(result_s, self_s, self_len);
2800
2801 /* change everything in-place, starting with this one */
2802 start = result_s + offset;
2803 Py_MEMCPY(start, to_s, from_len);
2804 start += from_len;
2805 end = result_s + self_len;
2806
2807 while ( --maxcount > 0) {
2808 offset = findstring(start, end-start,
2809 from_s, from_len,
2810 0, end-start, FORWARD);
2811 if (offset==-1)
2812 break;
2813 Py_MEMCPY(start+offset, to_s, from_len);
2814 start += offset+from_len;
2815 }
2816
2817 return result;
2818}
2819
2820/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2821Py_LOCAL(PyStringObject *)
2822replace_single_character(PyStringObject *self,
2823 char from_c,
2824 const char *to_s, Py_ssize_t to_len,
2825 Py_ssize_t maxcount)
2826{
2827 char *self_s, *result_s;
2828 char *start, *next, *end;
2829 Py_ssize_t self_len, result_len;
2830 Py_ssize_t count, product;
2831 PyStringObject *result;
2832
2833 self_s = PyString_AS_STRING(self);
2834 self_len = PyString_GET_SIZE(self);
2835
2836 count = countchar(self_s, self_len, from_c, maxcount);
2837 if (count == 0) {
2838 /* no matches, return unchanged */
2839 return return_self(self);
2840 }
2841
2842 /* use the difference between current and new, hence the "-1" */
2843 /* result_len = self_len + count * (to_len-1) */
2844 product = count * (to_len-1);
2845 if (product / (to_len-1) != count) {
2846 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2847 return NULL;
2848 }
2849 result_len = self_len + product;
2850 if (result_len < 0) {
2851 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2852 return NULL;
2853 }
2854
2855 if ( (result = (PyStringObject *)
2856 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2857 return NULL;
2858 result_s = PyString_AS_STRING(result);
2859
2860 start = self_s;
2861 end = self_s + self_len;
2862 while (count-- > 0) {
2863 next = findchar(start, end-start, from_c);
2864 if (next == NULL)
2865 break;
2866
2867 if (next == start) {
2868 /* replace with the 'to' */
2869 Py_MEMCPY(result_s, to_s, to_len);
2870 result_s += to_len;
2871 start += 1;
2872 } else {
2873 /* copy the unchanged old then the 'to' */
2874 Py_MEMCPY(result_s, start, next-start);
2875 result_s += (next-start);
2876 Py_MEMCPY(result_s, to_s, to_len);
2877 result_s += to_len;
2878 start = next+1;
2879 }
2880 }
2881 /* Copy the remainder of the remaining string */
2882 Py_MEMCPY(result_s, start, end-start);
2883
2884 return result;
2885}
2886
2887/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2888Py_LOCAL(PyStringObject *)
2889replace_substring(PyStringObject *self,
2890 const char *from_s, Py_ssize_t from_len,
2891 const char *to_s, Py_ssize_t to_len,
2892 Py_ssize_t maxcount) {
2893 char *self_s, *result_s;
2894 char *start, *next, *end;
2895 Py_ssize_t self_len, result_len;
2896 Py_ssize_t count, offset, product;
2897 PyStringObject *result;
2898
2899 self_s = PyString_AS_STRING(self);
2900 self_len = PyString_GET_SIZE(self);
2901
2902 count = countstring(self_s, self_len,
2903 from_s, from_len,
2904 0, self_len, FORWARD, maxcount);
2905 if (count == 0) {
2906 /* no matches, return unchanged */
2907 return return_self(self);
2908 }
2909
2910 /* Check for overflow */
2911 /* result_len = self_len + count * (to_len-from_len) */
2912 product = count * (to_len-from_len);
2913 if (product / (to_len-from_len) != count) {
2914 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2915 return NULL;
2916 }
2917 result_len = self_len + product;
2918 if (result_len < 0) {
2919 PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2920 return NULL;
2921 }
2922
2923 if ( (result = (PyStringObject *)
2924 PyString_FromStringAndSize(NULL, result_len)) == NULL)
2925 return NULL;
2926 result_s = PyString_AS_STRING(result);
2927
2928 start = self_s;
2929 end = self_s + self_len;
2930 while (count-- > 0) {
2931 offset = findstring(start, end-start,
2932 from_s, from_len,
2933 0, end-start, FORWARD);
2934 if (offset == -1)
2935 break;
2936 next = start+offset;
2937 if (next == start) {
2938 /* replace with the 'to' */
2939 Py_MEMCPY(result_s, to_s, to_len);
2940 result_s += to_len;
2941 start += from_len;
2942 } else {
2943 /* copy the unchanged old then the 'to' */
2944 Py_MEMCPY(result_s, start, next-start);
2945 result_s += (next-start);
2946 Py_MEMCPY(result_s, to_s, to_len);
2947 result_s += to_len;
2948 start = next+from_len;
2949 }
2950 }
2951 /* Copy the remainder of the remaining string */
2952 Py_MEMCPY(result_s, start, end-start);
2953
2954 return result;
2955}
2956
2957
2958Py_LOCAL(PyStringObject *)
2959replace(PyStringObject *self,
2960 const char *from_s, Py_ssize_t from_len,
2961 const char *to_s, Py_ssize_t to_len,
2962 Py_ssize_t maxcount)
2963{
2964 if (maxcount < 0) {
2965 maxcount = PY_SSIZE_T_MAX;
2966 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2967 /* nothing to do; return the original string */
2968 return return_self(self);
2969 }
2970
2971 if (maxcount == 0 ||
2972 (from_len == 0 && to_len == 0)) {
2973 /* nothing to do; return the original string */
2974 return return_self(self);
2975 }
2976
2977 /* Handle zero-length special cases */
2978
2979 if (from_len == 0) {
2980 /* insert the 'to' string everywhere. */
2981 /* >>> "Python".replace("", ".") */
2982 /* '.P.y.t.h.o.n.' */
2983 return replace_interleave(self, to_s, to_len, maxcount);
2984 }
2985
2986 /* Except for "".replace("", "A") == "A" there is no way beyond this */
2987 /* point for an empty self string to generate a non-empty string */
2988 /* Special case so the remaining code always gets a non-empty string */
2989 if (PyString_GET_SIZE(self) == 0) {
2990 return return_self(self);
2991 }
2992
2993 if (to_len == 0) {
2994 /* delete all occurances of 'from' string */
2995 if (from_len == 1) {
2996 return replace_delete_single_character(
2997 self, from_s[0], maxcount);
2998 } else {
2999 return replace_delete_substring(self, from_s, from_len, maxcount);
3000 }
3001 }
3002
3003 /* Handle special case where both strings have the same length */
3004
3005 if (from_len == to_len) {
3006 if (from_len == 1) {
3007 return replace_single_character_in_place(
3008 self,
3009 from_s[0],
3010 to_s[0],
3011 maxcount);
3012 } else {
3013 return replace_substring_in_place(
3014 self, from_s, from_len, to_s, to_len, maxcount);
3015 }
3016 }
3017
3018 /* Otherwise use the more generic algorithms */
3019 if (from_len == 1) {
3020 return replace_single_character(self, from_s[0],
3021 to_s, to_len, maxcount);
3022 } else {
3023 /* len('from')>=2, len('to')>=1 */
3024 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
3025 }
3026}
3027
3028PyDoc_STRVAR(replace__doc__,
3029"S.replace (old, new[, count]) -> string\n\
3030\n\
3031Return a copy of string S with all occurrences of substring\n\
3032old replaced by new. If the optional argument count is\n\
3033given, only the first count occurrences are replaced.");
3034
3035static PyObject *
3036string_replace(PyStringObject *self, PyObject *args)
3037{
3038 Py_ssize_t count = -1;
3039 PyObject *from, *to;
3040 const char *from_s, *to_s;
3041 Py_ssize_t from_len, to_len;
3042
3043 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
3044 return NULL;
3045
3046 if (PyString_Check(from)) {
3047 from_s = PyString_AS_STRING(from);
3048 from_len = PyString_GET_SIZE(from);
3049 }
3050#ifdef Py_USING_UNICODE
3051 if (PyUnicode_Check(from))
3052 return PyUnicode_Replace((PyObject *)self,
3053 from, to, count);
3054#endif
3055 else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
3056 return NULL;
3057
3058 if (PyString_Check(to)) {
3059 to_s = PyString_AS_STRING(to);
3060 to_len = PyString_GET_SIZE(to);
3061 }
3062#ifdef Py_USING_UNICODE
3063 else if (PyUnicode_Check(to))
3064 return PyUnicode_Replace((PyObject *)self,
3065 from, to, count);
3066#endif
3067 else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
3068 return NULL;
3069
3070 return (PyObject *)replace((PyStringObject *) self,
3071 from_s, from_len,
3072 to_s, to_len, count);
3073}
3074
3075/** End DALKE **/
3076
3077/* Matches the end (direction >= 0) or start (direction < 0) of self
3078 * against substr, using the start and end arguments. Returns
3079 * -1 on error, 0 if not found and 1 if found.
3080 */
3081Py_LOCAL(int)
3082_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
3083 Py_ssize_t end, int direction)
3084{
3085 Py_ssize_t len = PyString_GET_SIZE(self);
3086 Py_ssize_t slen;
3087 const char* sub;
3088 const char* str;
3089
3090 if (PyString_Check(substr)) {
3091 sub = PyString_AS_STRING(substr);
3092 slen = PyString_GET_SIZE(substr);
3093 }
3094#ifdef Py_USING_UNICODE
3095 else if (PyUnicode_Check(substr))
3096 return PyUnicode_Tailmatch((PyObject *)self,
3097 substr, start, end, direction);
3098#endif
3099 else if (PyObject_AsCharBuffer(substr, &sub, &slen))
3100 return -1;
3101 str = PyString_AS_STRING(self);
3102
3103 string_adjust_indices(&start, &end, len);
3104
3105 if (direction < 0) {
3106 /* startswith */
3107 if (start+slen > len)
3108 return 0;
3109 } else {
3110 /* endswith */
3111 if (end-start < slen || start > len)
3112 return 0;
3113
3114 if (end-slen > start)
3115 start = end - slen;
3116 }
3117 if (end-start >= slen)
3118 return ! memcmp(str+start, sub, slen);
3119 return 0;
3120}
3121
3122
3123PyDoc_STRVAR(startswith__doc__,
3124"S.startswith(prefix[, start[, end]]) -> bool\n\
3125\n\
3126Return True if S starts with the specified prefix, False otherwise.\n\
3127With optional start, test S beginning at that position.\n\
3128With optional end, stop comparing S at that position.\n\
3129prefix can also be a tuple of strings to try.");
3130
3131static PyObject *
3132string_startswith(PyStringObject *self, PyObject *args)
3133{
3134 Py_ssize_t start = 0;
3135 Py_ssize_t end = PY_SSIZE_T_MAX;
3136 PyObject *subobj;
3137 int result;
3138
3139 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj,
3140 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3141 return NULL;
3142 if (PyTuple_Check(subobj)) {
3143 Py_ssize_t i;
3144 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3145 result = _string_tailmatch(self,
3146 PyTuple_GET_ITEM(subobj, i),
3147 start, end, -1);
3148 if (result == -1)
3149 return NULL;
3150 else if (result) {
3151 Py_RETURN_TRUE;
3152 }
3153 }
3154 Py_RETURN_FALSE;
3155 }
3156 result = _string_tailmatch(self, subobj, start, end, -1);
3157 if (result == -1)
3158 return NULL;
3159 else
3160 return PyBool_FromLong(result);
3161}
3162
3163
3164PyDoc_STRVAR(endswith__doc__,
3165"S.endswith(suffix[, start[, end]]) -> bool\n\
3166\n\
3167Return True if S ends with the specified suffix, False otherwise.\n\
3168With optional start, test S beginning at that position.\n\
3169With optional end, stop comparing S at that position.\n\
3170suffix can also be a tuple of strings to try.");
3171
3172static PyObject *
3173string_endswith(PyStringObject *self, PyObject *args)
3174{
3175 Py_ssize_t start = 0;
3176 Py_ssize_t end = PY_SSIZE_T_MAX;
3177 PyObject *subobj;
3178 int result;
3179
3180 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj,
3181 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end))
3182 return NULL;
3183 if (PyTuple_Check(subobj)) {
3184 Py_ssize_t i;
3185 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
3186 result = _string_tailmatch(self,
3187 PyTuple_GET_ITEM(subobj, i),
3188 start, end, +1);
3189 if (result == -1)
3190 return NULL;
3191 else if (result) {
3192 Py_RETURN_TRUE;
3193 }
3194 }
3195 Py_RETURN_FALSE;
3196 }
3197 result = _string_tailmatch(self, subobj, start, end, +1);
3198 if (result == -1)
3199 return NULL;
3200 else
3201 return PyBool_FromLong(result);
3202}
3203
3204
3205PyDoc_STRVAR(encode__doc__,
3206"S.encode([encoding[,errors]]) -> object\n\
3207\n\
3208Encodes S using the codec registered for encoding. encoding defaults\n\
3209to the default encoding. errors may be given to set a different error\n\
3210handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3211a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3212'xmlcharrefreplace' as well as any other name registered with\n\
3213codecs.register_error that is able to handle UnicodeEncodeErrors.");
3214
3215static PyObject *
3216string_encode(PyStringObject *self, PyObject *args)
3217{
3218 char *encoding = NULL;
3219 char *errors = NULL;
3220 PyObject *v;
3221
3222 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors))
3223 return NULL;
3224 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3225 if (v == NULL)
3226 goto onError;
3227 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3228 PyErr_Format(PyExc_TypeError,
3229 "encoder did not return a string/unicode object "
3230 "(type=%.400s)",
3231 v->ob_type->tp_name);
3232 Py_DECREF(v);
3233 return NULL;
3234 }
3235 return v;
3236
3237 onError:
3238 return NULL;
3239}
3240
3241
3242PyDoc_STRVAR(decode__doc__,
3243"S.decode([encoding[,errors]]) -> object\n\
3244\n\
3245Decodes S using the codec registered for encoding. encoding defaults\n\
3246to the default encoding. errors may be given to set a different error\n\
3247handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3248a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3249as well as any other name registerd with codecs.register_error that is\n\
3250able to handle UnicodeDecodeErrors.");
3251
3252static PyObject *
3253string_decode(PyStringObject *self, PyObject *args)
3254{
3255 char *encoding = NULL;
3256 char *errors = NULL;
3257 PyObject *v;
3258
3259 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors))
3260 return NULL;
3261 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3262 if (v == NULL)
3263 goto onError;
3264 if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3265 PyErr_Format(PyExc_TypeError,
3266 "decoder did not return a string/unicode object "
3267 "(type=%.400s)",
3268 v->ob_type->tp_name);
3269 Py_DECREF(v);
3270 return NULL;
3271 }
3272 return v;
3273
3274 onError:
3275 return NULL;
3276}
3277
3278
3279PyDoc_STRVAR(expandtabs__doc__,
3280"S.expandtabs([tabsize]) -> string\n\
3281\n\
3282Return a copy of S where all tab characters are expanded using spaces.\n\
3283If tabsize is not given, a tab size of 8 characters is assumed.");
3284
3285static PyObject*
3286string_expandtabs(PyStringObject *self, PyObject *args)
3287{
3288 const char *e, *p;
3289 char *q;
3290 Py_ssize_t i, j;
3291 PyObject *u;
3292 int tabsize = 8;
3293
3294 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3295 return NULL;
3296
3297 /* First pass: determine size of output string */
3298 i = j = 0;
3299 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self);
3300 for (p = PyString_AS_STRING(self); p < e; p++)
3301 if (*p == '\t') {
3302 if (tabsize > 0)
3303 j += tabsize - (j % tabsize);
3304 }
3305 else {
3306 j++;
3307 if (*p == '\n' || *p == '\r') {
3308 i += j;
3309 j = 0;
3310 }
3311 }
3312
3313 /* Second pass: create output string and fill it */
3314 u = PyString_FromStringAndSize(NULL, i + j);
3315 if (!u)
3316 return NULL;
3317
3318 j = 0;
3319 q = PyString_AS_STRING(u);
3320
3321 for (p = PyString_AS_STRING(self); p < e; p++)
3322 if (*p == '\t') {
3323 if (tabsize > 0) {
3324 i = tabsize - (j % tabsize);
3325 j += i;
3326 while (i--)
3327 *q++ = ' ';
3328 }
3329 }
3330 else {
3331 j++;
3332 *q++ = *p;
3333 if (*p == '\n' || *p == '\r')
3334 j = 0;
3335 }
3336
3337 return u;
3338}
3339
3340Py_LOCAL_INLINE(PyObject *)
3341pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3342{
3343 PyObject *u;
3344
3345 if (left < 0)
3346 left = 0;
3347 if (right < 0)
3348 right = 0;
3349
3350 if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3351 Py_INCREF(self);
3352 return (PyObject *)self;
3353 }
3354
3355 u = PyString_FromStringAndSize(NULL,
3356 left + PyString_GET_SIZE(self) + right);
3357 if (u) {
3358 if (left)
3359 memset(PyString_AS_STRING(u), fill, left);
3360 Py_MEMCPY(PyString_AS_STRING(u) + left,
3361 PyString_AS_STRING(self),
3362 PyString_GET_SIZE(self));
3363 if (right)
3364 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3365 fill, right);
3366 }
3367
3368 return u;
3369}
3370
3371PyDoc_STRVAR(ljust__doc__,
3372"S.ljust(width[, fillchar]) -> string\n"
3373"\n"
3374"Return S left justified in a string of length width. Padding is\n"
3375"done using the specified fill character (default is a space).");
3376
3377static PyObject *
3378string_ljust(PyStringObject *self, PyObject *args)
3379{
3380 Py_ssize_t width;
3381 char fillchar = ' ';
3382
3383 if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3384 return NULL;
3385
3386 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3387 Py_INCREF(self);
3388 return (PyObject*) self;
3389 }
3390
3391 return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3392}
3393
3394
3395PyDoc_STRVAR(rjust__doc__,
3396"S.rjust(width[, fillchar]) -> string\n"
3397"\n"
3398"Return S right justified in a string of length width. Padding is\n"
3399"done using the specified fill character (default is a space)");
3400
3401static PyObject *
3402string_rjust(PyStringObject *self, PyObject *args)
3403{
3404 Py_ssize_t width;
3405 char fillchar = ' ';
3406
3407 if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3408 return NULL;
3409
3410 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3411 Py_INCREF(self);
3412 return (PyObject*) self;
3413 }
3414
3415 return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3416}
3417
3418
3419PyDoc_STRVAR(center__doc__,
3420"S.center(width[, fillchar]) -> string\n"
3421"\n"
3422"Return S centered in a string of length width. Padding is\n"
3423"done using the specified fill character (default is a space)");
3424
3425static PyObject *
3426string_center(PyStringObject *self, PyObject *args)
3427{
3428 Py_ssize_t marg, left;
3429 Py_ssize_t width;
3430 char fillchar = ' ';
3431
3432 if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3433 return NULL;
3434
3435 if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3436 Py_INCREF(self);
3437 return (PyObject*) self;
3438 }
3439
3440 marg = width - PyString_GET_SIZE(self);
3441 left = marg / 2 + (marg & width & 1);
3442
3443 return pad(self, left, marg - left, fillchar);
3444}
3445
3446PyDoc_STRVAR(zfill__doc__,
3447"S.zfill(width) -> string\n"
3448"\n"
3449"Pad a numeric string S with zeros on the left, to fill a field\n"
3450"of the specified width. The string S is never truncated.");
3451
3452static PyObject *
3453string_zfill(PyStringObject *self, PyObject *args)
3454{
3455 Py_ssize_t fill;
3456 PyObject *s;
3457 char *p;
3458 Py_ssize_t width;
3459
3460 if (!PyArg_ParseTuple(args, "n:zfill", &width))
3461 return NULL;
3462
3463 if (PyString_GET_SIZE(self) >= width) {
3464 if (PyString_CheckExact(self)) {
3465 Py_INCREF(self);
3466 return (PyObject*) self;
3467 }
3468 else
3469 return PyString_FromStringAndSize(
3470 PyString_AS_STRING(self),
3471 PyString_GET_SIZE(self)
3472 );
3473 }
3474
3475 fill = width - PyString_GET_SIZE(self);
3476
3477 s = pad(self, fill, 0, '0');
3478
3479 if (s == NULL)
3480 return NULL;
3481
3482 p = PyString_AS_STRING(s);
3483 if (p[fill] == '+' || p[fill] == '-') {
3484 /* move sign to beginning of string */
3485 p[0] = p[fill];
3486 p[fill] = '0';
3487 }
3488
3489 return (PyObject*) s;
3490}
3491
3492PyDoc_STRVAR(isspace__doc__,
3493"S.isspace() -> bool\n\
3494\n\
3495Return True if all characters in S are whitespace\n\
3496and there is at least one character in S, False otherwise.");
3497
3498static PyObject*
3499string_isspace(PyStringObject *self)
3500{
3501 register const unsigned char *p
3502 = (unsigned char *) PyString_AS_STRING(self);
3503 register const unsigned char *e;
3504
3505 /* Shortcut for single character strings */
3506 if (PyString_GET_SIZE(self) == 1 &&
3507 isspace(*p))
3508 return PyBool_FromLong(1);
3509
3510 /* Special case for empty strings */
3511 if (PyString_GET_SIZE(self) == 0)
3512 return PyBool_FromLong(0);
3513
3514 e = p + PyString_GET_SIZE(self);
3515 for (; p < e; p++) {
3516 if (!isspace(*p))
3517 return PyBool_FromLong(0);
3518 }
3519 return PyBool_FromLong(1);
3520}
3521
3522
3523PyDoc_STRVAR(isalpha__doc__,
3524"S.isalpha() -> bool\n\
3525\n\
3526Return True if all characters in S are alphabetic\n\
3527and there is at least one character in S, False otherwise.");
3528
3529static PyObject*
3530string_isalpha(PyStringObject *self)
3531{
3532 register const unsigned char *p
3533 = (unsigned char *) PyString_AS_STRING(self);
3534 register const unsigned char *e;
3535
3536 /* Shortcut for single character strings */
3537 if (PyString_GET_SIZE(self) == 1 &&
3538 isalpha(*p))
3539 return PyBool_FromLong(1);
3540
3541 /* Special case for empty strings */
3542 if (PyString_GET_SIZE(self) == 0)
3543 return PyBool_FromLong(0);
3544
3545 e = p + PyString_GET_SIZE(self);
3546 for (; p < e; p++) {
3547 if (!isalpha(*p))
3548 return PyBool_FromLong(0);
3549 }
3550 return PyBool_FromLong(1);
3551}
3552
3553
3554PyDoc_STRVAR(isalnum__doc__,
3555"S.isalnum() -> bool\n\
3556\n\
3557Return True if all characters in S are alphanumeric\n\
3558and there is at least one character in S, False otherwise.");
3559
3560static PyObject*
3561string_isalnum(PyStringObject *self)
3562{
3563 register const unsigned char *p
3564 = (unsigned char *) PyString_AS_STRING(self);
3565 register const unsigned char *e;
3566
3567 /* Shortcut for single character strings */
3568 if (PyString_GET_SIZE(self) == 1 &&
3569 isalnum(*p))
3570 return PyBool_FromLong(1);
3571
3572 /* Special case for empty strings */
3573 if (PyString_GET_SIZE(self) == 0)
3574 return PyBool_FromLong(0);
3575
3576 e = p + PyString_GET_SIZE(self);
3577 for (; p < e; p++) {
3578 if (!isalnum(*p))
3579 return PyBool_FromLong(0);
3580 }
3581 return PyBool_FromLong(1);
3582}
3583
3584
3585PyDoc_STRVAR(isdigit__doc__,
3586"S.isdigit() -> bool\n\
3587\n\
3588Return True if all characters in S are digits\n\
3589and there is at least one character in S, False otherwise.");
3590
3591static PyObject*
3592string_isdigit(PyStringObject *self)
3593{
3594 register const unsigned char *p
3595 = (unsigned char *) PyString_AS_STRING(self);
3596 register const unsigned char *e;
3597
3598 /* Shortcut for single character strings */
3599 if (PyString_GET_SIZE(self) == 1 &&
3600 isdigit(*p))
3601 return PyBool_FromLong(1);
3602
3603 /* Special case for empty strings */
3604 if (PyString_GET_SIZE(self) == 0)
3605 return PyBool_FromLong(0);
3606
3607 e = p + PyString_GET_SIZE(self);
3608 for (; p < e; p++) {
3609 if (!isdigit(*p))
3610 return PyBool_FromLong(0);
3611 }
3612 return PyBool_FromLong(1);
3613}
3614
3615
3616PyDoc_STRVAR(islower__doc__,
3617"S.islower() -> bool\n\
3618\n\
3619Return True if all cased characters in S are lowercase and there is\n\
3620at least one cased character in S, False otherwise.");
3621
3622static PyObject*
3623string_islower(PyStringObject *self)
3624{
3625 register const unsigned char *p
3626 = (unsigned char *) PyString_AS_STRING(self);
3627 register const unsigned char *e;
3628 int cased;
3629
3630 /* Shortcut for single character strings */
3631 if (PyString_GET_SIZE(self) == 1)
3632 return PyBool_FromLong(islower(*p) != 0);
3633
3634 /* Special case for empty strings */
3635 if (PyString_GET_SIZE(self) == 0)
3636 return PyBool_FromLong(0);
3637
3638 e = p + PyString_GET_SIZE(self);
3639 cased = 0;
3640 for (; p < e; p++) {
3641 if (isupper(*p))
3642 return PyBool_FromLong(0);
3643 else if (!cased && islower(*p))
3644 cased = 1;
3645 }
3646 return PyBool_FromLong(cased);
3647}
3648
3649
3650PyDoc_STRVAR(isupper__doc__,
3651"S.isupper() -> bool\n\
3652\n\
3653Return True if all cased characters in S are uppercase and there is\n\
3654at least one cased character in S, False otherwise.");
3655
3656static PyObject*
3657string_isupper(PyStringObject *self)
3658{
3659 register const unsigned char *p
3660 = (unsigned char *) PyString_AS_STRING(self);
3661 register const unsigned char *e;
3662 int cased;
3663
3664 /* Shortcut for single character strings */
3665 if (PyString_GET_SIZE(self) == 1)
3666 return PyBool_FromLong(isupper(*p) != 0);
3667
3668 /* Special case for empty strings */
3669 if (PyString_GET_SIZE(self) == 0)
3670 return PyBool_FromLong(0);
3671
3672 e = p + PyString_GET_SIZE(self);
3673 cased = 0;
3674 for (; p < e; p++) {
3675 if (islower(*p))
3676 return PyBool_FromLong(0);
3677 else if (!cased && isupper(*p))
3678 cased = 1;
3679 }
3680 return PyBool_FromLong(cased);
3681}
3682
3683
3684PyDoc_STRVAR(istitle__doc__,
3685"S.istitle() -> bool\n\
3686\n\
3687Return True if S is a titlecased string and there is at least one\n\
3688character in S, i.e. uppercase characters may only follow uncased\n\
3689characters and lowercase characters only cased ones. Return False\n\
3690otherwise.");
3691
3692static PyObject*
3693string_istitle(PyStringObject *self, PyObject *uncased)
3694{
3695 register const unsigned char *p
3696 = (unsigned char *) PyString_AS_STRING(self);
3697 register const unsigned char *e;
3698 int cased, previous_is_cased;
3699
3700 /* Shortcut for single character strings */
3701 if (PyString_GET_SIZE(self) == 1)
3702 return PyBool_FromLong(isupper(*p) != 0);
3703
3704 /* Special case for empty strings */
3705 if (PyString_GET_SIZE(self) == 0)
3706 return PyBool_FromLong(0);
3707
3708 e = p + PyString_GET_SIZE(self);
3709 cased = 0;
3710 previous_is_cased = 0;
3711 for (; p < e; p++) {
3712 register const unsigned char ch = *p;
3713
3714 if (isupper(ch)) {
3715 if (previous_is_cased)
3716 return PyBool_FromLong(0);
3717 previous_is_cased = 1;
3718 cased = 1;
3719 }
3720 else if (islower(ch)) {
3721 if (!previous_is_cased)
3722 return PyBool_FromLong(0);
3723 previous_is_cased = 1;
3724 cased = 1;
3725 }
3726 else
3727 previous_is_cased = 0;
3728 }
3729 return PyBool_FromLong(cased);
3730}
3731
3732
3733PyDoc_STRVAR(splitlines__doc__,
3734"S.splitlines([keepends]) -> list of strings\n\
3735\n\
3736Return a list of the lines in S, breaking at line boundaries.\n\
3737Line breaks are not included in the resulting list unless keepends\n\
3738is given and true.");
3739
3740static PyObject*
3741string_splitlines(PyStringObject *self, PyObject *args)
3742{
3743 register Py_ssize_t i;
3744 register Py_ssize_t j;
3745 Py_ssize_t len;
3746 int keepends = 0;
3747 PyObject *list;
3748 PyObject *str;
3749 char *data;
3750
3751 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3752 return NULL;
3753
3754 data = PyString_AS_STRING(self);
3755 len = PyString_GET_SIZE(self);
3756
3757 /* This does not use the preallocated list because splitlines is
3758 usually run with hundreds of newlines. The overhead of
3759 switching between PyList_SET_ITEM and append causes about a
3760 2-3% slowdown for that common case. A smarter implementation
3761 could move the if check out, so the SET_ITEMs are done first
3762 and the appends only done when the prealloc buffer is full.
3763 That's too much work for little gain.*/
3764
3765 list = PyList_New(0);
3766 if (!list)
3767 goto onError;
3768
3769 for (i = j = 0; i < len; ) {
3770 Py_ssize_t eol;
3771
3772 /* Find a line and append it */
3773 while (i < len && data[i] != '\n' && data[i] != '\r')
3774 i++;
3775
3776 /* Skip the line break reading CRLF as one line break */
3777 eol = i;
3778 if (i < len) {
3779 if (data[i] == '\r' && i + 1 < len &&
3780 data[i+1] == '\n')
3781 i += 2;
3782 else
3783 i++;
3784 if (keepends)
3785 eol = i;
3786 }
3787 SPLIT_APPEND(data, j, eol);
3788 j = i;
3789 }
3790 if (j < len) {
3791 SPLIT_APPEND(data, j, len);
3792 }
3793
3794 return list;
3795
3796 onError:
3797 Py_XDECREF(list);
3798 return NULL;
3799}
3800
3801#undef SPLIT_APPEND
3802#undef SPLIT_ADD
3803#undef MAX_PREALLOC
3804#undef PREALLOC_SIZE
3805
3806static PyObject *
3807string_getnewargs(PyStringObject *v)
3808{
3809 return Py_BuildValue("(s#)", v->ob_sval, v->ob_size);
3810}
3811
3812
3813
3814static PyMethodDef
3815string_methods[] = {
3816 /* Counterparts of the obsolete stropmodule functions; except
3817 string.maketrans(). */
3818 {"join", (PyCFunction)string_join, METH_O, join__doc__},
3819 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3820 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3821 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3822 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3823 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3824 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3825 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3826 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3827 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3828 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3829 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3830 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3831 capitalize__doc__},
3832 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3833 {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3834 endswith__doc__},
3835 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3836 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3837 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3838 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3839 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3840 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3841 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3842 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3843 {"rpartition", (PyCFunction)string_rpartition, METH_O,
3844 rpartition__doc__},
3845 {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3846 startswith__doc__},
3847 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3848 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3849 swapcase__doc__},
3850 {"translate", (PyCFunction)string_translate, METH_VARARGS,
3851 translate__doc__},
3852 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3853 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3854 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3855 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3856 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3857 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},
3858 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},
3859 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3860 expandtabs__doc__},
3861 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3862 splitlines__doc__},
3863 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS},
3864 {NULL, NULL} /* sentinel */
3865};
3866
3867static PyObject *
3868str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3869
3870static PyObject *
3871string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3872{
3873 PyObject *x = NULL;
3874 static char *kwlist[] = {"object", 0};
3875
3876 if (type != &PyString_Type)
3877 return str_subtype_new(type, args, kwds);
3878 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3879 return NULL;
3880 if (x == NULL)
3881 return PyString_FromString("");
3882 return PyObject_Str(x);
3883}
3884
3885static PyObject *
3886str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3887{
3888 PyObject *tmp, *pnew;
3889 Py_ssize_t n;
3890
3891 assert(PyType_IsSubtype(type, &PyString_Type));
3892 tmp = string_new(&PyString_Type, args, kwds);
3893 if (tmp == NULL)
3894 return NULL;
3895 assert(PyString_CheckExact(tmp));
3896 n = PyString_GET_SIZE(tmp);
3897 pnew = type->tp_alloc(type, n);
3898 if (pnew != NULL) {
3899 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3900 ((PyStringObject *)pnew)->ob_shash =
3901 ((PyStringObject *)tmp)->ob_shash;
3902 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3903 }
3904 Py_DECREF(tmp);
3905 return pnew;
3906}
3907
3908static PyObject *
3909basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3910{
3911 PyErr_SetString(PyExc_TypeError,
3912 "The basestring type cannot be instantiated");
3913 return NULL;
3914}
3915
3916static PyObject *
3917string_mod(PyObject *v, PyObject *w)
3918{
3919 if (!PyString_Check(v)) {
3920 Py_INCREF(Py_NotImplemented);
3921 return Py_NotImplemented;
3922 }
3923 return PyString_Format(v, w);
3924}
3925
3926PyDoc_STRVAR(basestring_doc,
3927"Type basestring cannot be instantiated; it is the base for str and unicode.");
3928
3929static PyNumberMethods string_as_number = {
3930 0, /*nb_add*/
3931 0, /*nb_subtract*/
3932 0, /*nb_multiply*/
3933 0, /*nb_divide*/
3934 string_mod, /*nb_remainder*/
3935};
3936
3937
3938PyTypeObject PyBaseString_Type = {
3939 PyObject_HEAD_INIT(&PyType_Type)
3940 0,
3941 "basestring",
3942 0,
3943 0,
3944 0, /* tp_dealloc */
3945 0, /* tp_print */
3946 0, /* tp_getattr */
3947 0, /* tp_setattr */
3948 0, /* tp_compare */
3949 0, /* tp_repr */
3950 0, /* tp_as_number */
3951 0, /* tp_as_sequence */
3952 0, /* tp_as_mapping */
3953 0, /* tp_hash */
3954 0, /* tp_call */
3955 0, /* tp_str */
3956 0, /* tp_getattro */
3957 0, /* tp_setattro */
3958 0, /* tp_as_buffer */
3959 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3960 basestring_doc, /* tp_doc */
3961 0, /* tp_traverse */
3962 0, /* tp_clear */
3963 0, /* tp_richcompare */
3964 0, /* tp_weaklistoffset */
3965 0, /* tp_iter */
3966 0, /* tp_iternext */
3967 0, /* tp_methods */
3968 0, /* tp_members */
3969 0, /* tp_getset */
3970 &PyBaseObject_Type, /* tp_base */
3971 0, /* tp_dict */
3972 0, /* tp_descr_get */
3973 0, /* tp_descr_set */
3974 0, /* tp_dictoffset */
3975 0, /* tp_init */
3976 0, /* tp_alloc */
3977 basestring_new, /* tp_new */
3978 0, /* tp_free */
3979};
3980
3981PyDoc_STRVAR(string_doc,
3982"str(object) -> string\n\
3983\n\
3984Return a nice string representation of the object.\n\
3985If the argument is a string, the return value is the same object.");
3986
3987PyTypeObject PyString_Type = {
3988 PyObject_HEAD_INIT(&PyType_Type)
3989 0,
3990 "str",
3991 sizeof(PyStringObject),
3992 sizeof(char),
3993 string_dealloc, /* tp_dealloc */
3994 (printfunc)string_print, /* tp_print */
3995 0, /* tp_getattr */
3996 0, /* tp_setattr */
3997 0, /* tp_compare */
3998 string_repr, /* tp_repr */
3999 &string_as_number, /* tp_as_number */
4000 &string_as_sequence, /* tp_as_sequence */
4001 &string_as_mapping, /* tp_as_mapping */
4002 (hashfunc)string_hash, /* tp_hash */
4003 0, /* tp_call */
4004 string_str, /* tp_str */
4005 PyObject_GenericGetAttr, /* tp_getattro */
4006 0, /* tp_setattro */
4007 &string_as_buffer, /* tp_as_buffer */
4008 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
4009 Py_TPFLAGS_BASETYPE, /* tp_flags */
4010 string_doc, /* tp_doc */
4011 0, /* tp_traverse */
4012 0, /* tp_clear */
4013 (richcmpfunc)string_richcompare, /* tp_richcompare */
4014 0, /* tp_weaklistoffset */
4015 0, /* tp_iter */
4016 0, /* tp_iternext */
4017 string_methods, /* tp_methods */
4018 0, /* tp_members */
4019 0, /* tp_getset */
4020 &PyBaseString_Type, /* tp_base */
4021 0, /* tp_dict */
4022 0, /* tp_descr_get */
4023 0, /* tp_descr_set */
4024 0, /* tp_dictoffset */
4025 0, /* tp_init */
4026 0, /* tp_alloc */
4027 string_new, /* tp_new */
4028 PyObject_Del, /* tp_free */
4029};
4030
4031void
4032PyString_Concat(register PyObject **pv, register PyObject *w)
4033{
4034 register PyObject *v;
4035 if (*pv == NULL)
4036 return;
4037 if (w == NULL || !PyString_Check(*pv)) {
4038 Py_DECREF(*pv);
4039 *pv = NULL;
4040 return;
4041 }
4042 v = string_concat((PyStringObject *) *pv, w);
4043 Py_DECREF(*pv);
4044 *pv = v;
4045}
4046
4047void
4048PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
4049{
4050 PyString_Concat(pv, w);
4051 Py_XDECREF(w);
4052}
4053
4054
4055/* The following function breaks the notion that strings are immutable:
4056 it changes the size of a string. We get away with this only if there
4057 is only one module referencing the object. You can also think of it
4058 as creating a new string object and destroying the old one, only
4059 more efficiently. In any case, don't use this if the string may
4060 already be known to some other part of the code...
4061 Note that if there's not enough memory to resize the string, the original
4062 string object at *pv is deallocated, *pv is set to NULL, an "out of
4063 memory" exception is set, and -1 is returned. Else (on success) 0 is
4064 returned, and the value in *pv may or may not be the same as on input.
4065 As always, an extra byte is allocated for a trailing \0 byte (newsize
4066 does *not* include that), and a trailing \0 byte is stored.
4067*/
4068
4069int
4070_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
4071{
4072 register PyObject *v;
4073 register PyStringObject *sv;
4074 v = *pv;
4075 if (!PyString_Check(v) || v->ob_refcnt != 1 || newsize < 0 ||
4076 PyString_CHECK_INTERNED(v)) {
4077 *pv = 0;
4078 Py_DECREF(v);
4079 PyErr_BadInternalCall();
4080 return -1;
4081 }
4082 /* XXX UNREF/NEWREF interface should be more symmetrical */
4083 _Py_DEC_REFTOTAL;
4084 _Py_ForgetReference(v);
4085 *pv = (PyObject *)
4086 PyObject_REALLOC((char *)v, sizeof(PyStringObject) + newsize);
4087 if (*pv == NULL) {
4088 PyObject_Del(v);
4089 PyErr_NoMemory();
4090 return -1;
4091 }
4092 _Py_NewReference(*pv);
4093 sv = (PyStringObject *) *pv;
4094 sv->ob_size = newsize;
4095 sv->ob_sval[newsize] = '\0';
4096 sv->ob_shash = -1; /* invalidate cached hash value */
4097 return 0;
4098}
4099
4100/* Helpers for formatstring */
4101
4102Py_LOCAL_INLINE(PyObject *)
4103getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
4104{
4105 Py_ssize_t argidx = *p_argidx;
4106 if (argidx < arglen) {
4107 (*p_argidx)++;
4108 if (arglen < 0)
4109 return args;
4110 else
4111 return PyTuple_GetItem(args, argidx);
4112 }
4113 PyErr_SetString(PyExc_TypeError,
4114 "not enough arguments for format string");
4115 return NULL;
4116}
4117
4118/* Format codes
4119 * F_LJUST '-'
4120 * F_SIGN '+'
4121 * F_BLANK ' '
4122 * F_ALT '#'
4123 * F_ZERO '0'
4124 */
4125#define F_LJUST (1<<0)
4126#define F_SIGN (1<<1)
4127#define F_BLANK (1<<2)
4128#define F_ALT (1<<3)
4129#define F_ZERO (1<<4)
4130
4131Py_LOCAL_INLINE(int)
4132formatfloat(char *buf, size_t buflen, int flags,
4133 int prec, int type, PyObject *v)
4134{
4135 /* fmt = '%#.' + `prec` + `type`
4136 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/
4137 char fmt[20];
4138 double x;
4139 x = PyFloat_AsDouble(v);
4140 if (x == -1.0 && PyErr_Occurred()) {
4141 PyErr_SetString(PyExc_TypeError, "float argument required");
4142 return -1;
4143 }
4144 if (prec < 0)
4145 prec = 6;
4146 if (type == 'f' && fabs(x)/1e25 >= 1e25)
4147 type = 'g';
4148 /* Worst case length calc to ensure no buffer overrun:
4149
4150 'g' formats:
4151 fmt = %#.<prec>g
4152 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp
4153 for any double rep.)
4154 len = 1 + prec + 1 + 2 + 5 = 9 + prec
4155
4156 'f' formats:
4157 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50)
4158 len = 1 + 50 + 1 + prec = 52 + prec
4159
4160 If prec=0 the effective precision is 1 (the leading digit is
4161 always given), therefore increase the length by one.
4162
4163 */
4164 if ((type == 'g' && buflen <= (size_t)10 + (size_t)prec) ||
4165 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) {
4166 PyErr_SetString(PyExc_OverflowError,
4167 "formatted float is too long (precision too large?)");
4168 return -1;
4169 }
4170 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c",
4171 (flags&F_ALT) ? "#" : "",
4172 prec, type);
4173 PyOS_ascii_formatd(buf, buflen, fmt, x);
4174 return (int)strlen(buf);
4175}
4176
4177/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
4178 * the F_ALT flag, for Python's long (unbounded) ints. It's not used for
4179 * Python's regular ints.
4180 * Return value: a new PyString*, or NULL if error.
4181 * . *pbuf is set to point into it,
4182 * *plen set to the # of chars following that.
4183 * Caller must decref it when done using pbuf.
4184 * The string starting at *pbuf is of the form
4185 * "-"? ("0x" | "0X")? digit+
4186 * "0x"/"0X" are present only for x and X conversions, with F_ALT
4187 * set in flags. The case of hex digits will be correct,
4188 * There will be at least prec digits, zero-filled on the left if
4189 * necessary to get that many.
4190 * val object to be converted
4191 * flags bitmask of format flags; only F_ALT is looked at
4192 * prec minimum number of digits; 0-fill on left if needed
4193 * type a character in [duoxX]; u acts the same as d
4194 *
4195 * CAUTION: o, x and X conversions on regular ints can never
4196 * produce a '-' sign, but can for Python's unbounded ints.
4197 */
4198PyObject*
4199_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4200 char **pbuf, int *plen)
4201{
4202 PyObject *result = NULL;
4203 char *buf;
4204 Py_ssize_t i;
4205 int sign; /* 1 if '-', else 0 */
4206 int len; /* number of characters */
4207 Py_ssize_t llen;
4208 int numdigits; /* len == numnondigits + numdigits */
4209 int numnondigits = 0;
4210
4211 switch (type) {
4212 case 'd':
4213 case 'u':
4214 result = val->ob_type->tp_str(val);
4215 break;
4216 case 'o':
4217 result = val->ob_type->tp_as_number->nb_oct(val);
4218 break;
4219 case 'x':
4220 case 'X':
4221 numnondigits = 2;
4222 result = val->ob_type->tp_as_number->nb_hex(val);
4223 break;
4224 default:
4225 assert(!"'type' not in [duoxX]");
4226 }
4227 if (!result)
4228 return NULL;
4229
4230 buf = PyString_AsString(result);
4231 if (!buf) {
4232 Py_DECREF(result);
4233 return NULL;
4234 }
4235
4236 /* To modify the string in-place, there can only be one reference. */
4237 if (result->ob_refcnt != 1) {
4238 PyErr_BadInternalCall();
4239 return NULL;
4240 }
4241 llen = PyString_Size(result);
4242 if (llen > PY_SSIZE_T_MAX) {
4243 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4244 return NULL;
4245 }
4246 len = (int)llen;
4247 if (buf[len-1] == 'L') {
4248 --len;
4249 buf[len] = '\0';
4250 }
4251 sign = buf[0] == '-';
4252 numnondigits += sign;
4253 numdigits = len - numnondigits;
4254 assert(numdigits > 0);
4255
4256 /* Get rid of base marker unless F_ALT */
4257 if ((flags & F_ALT) == 0) {
4258 /* Need to skip 0x, 0X or 0. */
4259 int skipped = 0;
4260 switch (type) {
4261 case 'o':
4262 assert(buf[sign] == '0');
4263 /* If 0 is only digit, leave it alone. */
4264 if (numdigits > 1) {
4265 skipped = 1;
4266 --numdigits;
4267 }
4268 break;
4269 case 'x':
4270 case 'X':
4271 assert(buf[sign] == '0');
4272 assert(buf[sign + 1] == 'x');
4273 skipped = 2;
4274 numnondigits -= 2;
4275 break;
4276 }
4277 if (skipped) {
4278 buf += skipped;
4279 len -= skipped;
4280 if (sign)
4281 buf[0] = '-';
4282 }
4283 assert(len == numnondigits + numdigits);
4284 assert(numdigits > 0);
4285 }
4286
4287 /* Fill with leading zeroes to meet minimum width. */
4288 if (prec > numdigits) {
4289 PyObject *r1 = PyString_FromStringAndSize(NULL,
4290 numnondigits + prec);
4291 char *b1;
4292 if (!r1) {
4293 Py_DECREF(result);
4294 return NULL;
4295 }
4296 b1 = PyString_AS_STRING(r1);
4297 for (i = 0; i < numnondigits; ++i)
4298 *b1++ = *buf++;
4299 for (i = 0; i < prec - numdigits; i++)
4300 *b1++ = '0';
4301 for (i = 0; i < numdigits; i++)
4302 *b1++ = *buf++;
4303 *b1 = '\0';
4304 Py_DECREF(result);
4305 result = r1;
4306 buf = PyString_AS_STRING(result);
4307 len = numnondigits + prec;
4308 }
4309
4310 /* Fix up case for hex conversions. */
4311 if (type == 'X') {
4312 /* Need to convert all lower case letters to upper case.
4313 and need to convert 0x to 0X (and -0x to -0X). */
4314 for (i = 0; i < len; i++)
4315 if (buf[i] >= 'a' && buf[i] <= 'x')
4316 buf[i] -= 'a'-'A';
4317 }
4318 *pbuf = buf;
4319 *plen = len;
4320 return result;
4321}
4322
4323Py_LOCAL_INLINE(int)
4324formatint(char *buf, size_t buflen, int flags,
4325 int prec, int type, PyObject *v)
4326{
4327 /* fmt = '%#.' + `prec` + 'l' + `type`
4328 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4329 + 1 + 1 = 24 */
4330 char fmt[64]; /* plenty big enough! */
4331 char *sign;
4332 long x;
4333
4334 x = PyInt_AsLong(v);
4335 if (x == -1 && PyErr_Occurred()) {
4336 PyErr_SetString(PyExc_TypeError, "int argument required");
4337 return -1;
4338 }
4339 if (x < 0 && type == 'u') {
4340 type = 'd';
4341 }
4342 if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4343 sign = "-";
4344 else
4345 sign = "";
4346 if (prec < 0)
4347 prec = 1;
4348
4349 if ((flags & F_ALT) &&
4350 (type == 'x' || type == 'X')) {
4351 /* When converting under %#x or %#X, there are a number
4352 * of issues that cause pain:
4353 * - when 0 is being converted, the C standard leaves off
4354 * the '0x' or '0X', which is inconsistent with other
4355 * %#x/%#X conversions and inconsistent with Python's
4356 * hex() function
4357 * - there are platforms that violate the standard and
4358 * convert 0 with the '0x' or '0X'
4359 * (Metrowerks, Compaq Tru64)
4360 * - there are platforms that give '0x' when converting
4361 * under %#X, but convert 0 in accordance with the
4362 * standard (OS/2 EMX)
4363 *
4364 * We can achieve the desired consistency by inserting our
4365 * own '0x' or '0X' prefix, and substituting %x/%X in place
4366 * of %#x/%#X.
4367 *
4368 * Note that this is the same approach as used in
4369 * formatint() in unicodeobject.c
4370 */
4371 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4372 sign, type, prec, type);
4373 }
4374 else {
4375 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4376 sign, (flags&F_ALT) ? "#" : "",
4377 prec, type);
4378 }
4379
4380 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4381 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4382 */
4383 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4384 PyErr_SetString(PyExc_OverflowError,
4385 "formatted integer is too long (precision too large?)");
4386 return -1;
4387 }
4388 if (sign[0])
4389 PyOS_snprintf(buf, buflen, fmt, -x);
4390 else
4391 PyOS_snprintf(buf, buflen, fmt, x);
4392 return (int)strlen(buf);
4393}
4394
4395Py_LOCAL_INLINE(int)
4396formatchar(char *buf, size_t buflen, PyObject *v)
4397{
4398 /* presume that the buffer is at least 2 characters long */
4399 if (PyString_Check(v)) {
4400 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4401 return -1;
4402 }
4403 else {
4404 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4405 return -1;
4406 }
4407 buf[1] = '\0';
4408 return 1;
4409}
4410
4411/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4412
4413 FORMATBUFLEN is the length of the buffer in which the floats, ints, &
4414 chars are formatted. XXX This is a magic number. Each formatting
4415 routine does bounds checking to ensure no overflow, but a better
4416 solution may be to malloc a buffer of appropriate size for each
4417 format. For now, the current solution is sufficient.
4418*/
4419#define FORMATBUFLEN (size_t)120
4420
4421PyObject *
4422PyString_Format(PyObject *format, PyObject *args)
4423{
4424 char *fmt, *res;
4425 Py_ssize_t arglen, argidx;
4426 Py_ssize_t reslen, rescnt, fmtcnt;
4427 int args_owned = 0;
4428 PyObject *result, *orig_args;
4429#ifdef Py_USING_UNICODE
4430 PyObject *v, *w;
4431#endif
4432 PyObject *dict = NULL;
4433 if (format == NULL || !PyString_Check(format) || args == NULL) {
4434 PyErr_BadInternalCall();
4435 return NULL;
4436 }
4437 orig_args = args;
4438 fmt = PyString_AS_STRING(format);
4439 fmtcnt = PyString_GET_SIZE(format);
4440 reslen = rescnt = fmtcnt + 100;
4441 result = PyString_FromStringAndSize((char *)NULL, reslen);
4442 if (result == NULL)
4443 return NULL;
4444 res = PyString_AsString(result);
4445 if (PyTuple_Check(args)) {
4446 arglen = PyTuple_GET_SIZE(args);
4447 argidx = 0;
4448 }
4449 else {
4450 arglen = -1;
4451 argidx = -2;
4452 }
4453 if (args->ob_type->tp_as_mapping && !PyTuple_Check(args) &&
4454 !PyObject_TypeCheck(args, &PyBaseString_Type))
4455 dict = args;
4456 while (--fmtcnt >= 0) {
4457 if (*fmt != '%') {
4458 if (--rescnt < 0) {
4459 rescnt = fmtcnt + 100;
4460 reslen += rescnt;
4461 if (_PyString_Resize(&result, reslen) < 0)
4462 return NULL;
4463 res = PyString_AS_STRING(result)
4464 + reslen - rescnt;
4465 --rescnt;
4466 }
4467 *res++ = *fmt++;
4468 }
4469 else {
4470 /* Got a format specifier */
4471 int flags = 0;
4472 Py_ssize_t width = -1;
4473 int prec = -1;
4474 int c = '\0';
4475 int fill;
4476 PyObject *v = NULL;
4477 PyObject *temp = NULL;
4478 char *pbuf;
4479 int sign;
4480 Py_ssize_t len;
4481 char formatbuf[FORMATBUFLEN];
4482 /* For format{float,int,char}() */
4483#ifdef Py_USING_UNICODE
4484 char *fmt_start = fmt;
4485 Py_ssize_t argidx_start = argidx;
4486#endif
4487
4488 fmt++;
4489 if (*fmt == '(') {
4490 char *keystart;
4491 Py_ssize_t keylen;
4492 PyObject *key;
4493 int pcount = 1;
4494
4495 if (dict == NULL) {
4496 PyErr_SetString(PyExc_TypeError,
4497 "format requires a mapping");
4498 goto error;
4499 }
4500 ++fmt;
4501 --fmtcnt;
4502 keystart = fmt;
4503 /* Skip over balanced parentheses */
4504 while (pcount > 0 && --fmtcnt >= 0) {
4505 if (*fmt == ')')
4506 --pcount;
4507 else if (*fmt == '(')
4508 ++pcount;
4509 fmt++;
4510 }
4511 keylen = fmt - keystart - 1;
4512 if (fmtcnt < 0 || pcount > 0) {
4513 PyErr_SetString(PyExc_ValueError,
4514 "incomplete format key");
4515 goto error;
4516 }
4517 key = PyString_FromStringAndSize(keystart,
4518 keylen);
4519 if (key == NULL)
4520 goto error;
4521 if (args_owned) {
4522 Py_DECREF(args);
4523 args_owned = 0;
4524 }
4525 args = PyObject_GetItem(dict, key);
4526 Py_DECREF(key);
4527 if (args == NULL) {
4528 goto error;
4529 }
4530 args_owned = 1;
4531 arglen = -1;
4532 argidx = -2;
4533 }
4534 while (--fmtcnt >= 0) {
4535 switch (c = *fmt++) {
4536 case '-': flags |= F_LJUST; continue;
4537 case '+': flags |= F_SIGN; continue;
4538 case ' ': flags |= F_BLANK; continue;
4539 case '#': flags |= F_ALT; continue;
4540 case '0': flags |= F_ZERO; continue;
4541 }
4542 break;
4543 }
4544 if (c == '*') {
4545 v = getnextarg(args, arglen, &argidx);
4546 if (v == NULL)
4547 goto error;
4548 if (!PyInt_Check(v)) {
4549 PyErr_SetString(PyExc_TypeError,
4550 "* wants int");
4551 goto error;
4552 }
4553 width = PyInt_AsLong(v);
4554 if (width < 0) {
4555 flags |= F_LJUST;
4556 width = -width;
4557 }
4558 if (--fmtcnt >= 0)
4559 c = *fmt++;
4560 }
4561 else if (c >= 0 && isdigit(c)) {
4562 width = c - '0';
4563 while (--fmtcnt >= 0) {
4564 c = Py_CHARMASK(*fmt++);
4565 if (!isdigit(c))
4566 break;
4567 if ((width*10) / 10 != width) {
4568 PyErr_SetString(
4569 PyExc_ValueError,
4570 "width too big");
4571 goto error;
4572 }
4573 width = width*10 + (c - '0');
4574 }
4575 }
4576 if (c == '.') {
4577 prec = 0;
4578 if (--fmtcnt >= 0)
4579 c = *fmt++;
4580 if (c == '*') {
4581 v = getnextarg(args, arglen, &argidx);
4582 if (v == NULL)
4583 goto error;
4584 if (!PyInt_Check(v)) {
4585 PyErr_SetString(
4586 PyExc_TypeError,
4587 "* wants int");
4588 goto error;
4589 }
4590 prec = PyInt_AsLong(v);
4591 if (prec < 0)
4592 prec = 0;
4593 if (--fmtcnt >= 0)
4594 c = *fmt++;
4595 }
4596 else if (c >= 0 && isdigit(c)) {
4597 prec = c - '0';
4598 while (--fmtcnt >= 0) {
4599 c = Py_CHARMASK(*fmt++);
4600 if (!isdigit(c))
4601 break;
4602 if ((prec*10) / 10 != prec) {
4603 PyErr_SetString(
4604 PyExc_ValueError,
4605 "prec too big");
4606 goto error;
4607 }
4608 prec = prec*10 + (c - '0');
4609 }
4610 }
4611 } /* prec */
4612 if (fmtcnt >= 0) {
4613 if (c == 'h' || c == 'l' || c == 'L') {
4614 if (--fmtcnt >= 0)
4615 c = *fmt++;
4616 }
4617 }
4618 if (fmtcnt < 0) {
4619 PyErr_SetString(PyExc_ValueError,
4620 "incomplete format");
4621 goto error;
4622 }
4623 if (c != '%') {
4624 v = getnextarg(args, arglen, &argidx);
4625 if (v == NULL)
4626 goto error;
4627 }
4628 sign = 0;
4629 fill = ' ';
4630 switch (c) {
4631 case '%':
4632 pbuf = "%";
4633 len = 1;
4634 break;
4635 case 's':
4636#ifdef Py_USING_UNICODE
4637 if (PyUnicode_Check(v)) {
4638 fmt = fmt_start;
4639 argidx = argidx_start;
4640 goto unicode;
4641 }
4642#endif
4643 temp = _PyObject_Str(v);
4644#ifdef Py_USING_UNICODE
4645 if (temp != NULL && PyUnicode_Check(temp)) {
4646 Py_DECREF(temp);
4647 fmt = fmt_start;
4648 argidx = argidx_start;
4649 goto unicode;
4650 }
4651#endif
4652 /* Fall through */
4653 case 'r':
4654 if (c == 'r')
4655 temp = PyObject_Repr(v);
4656 if (temp == NULL)
4657 goto error;
4658 if (!PyString_Check(temp)) {
4659 PyErr_SetString(PyExc_TypeError,
4660 "%s argument has non-string str()");
4661 Py_DECREF(temp);
4662 goto error;
4663 }
4664 pbuf = PyString_AS_STRING(temp);
4665 len = PyString_GET_SIZE(temp);
4666 if (prec >= 0 && len > prec)
4667 len = prec;
4668 break;
4669 case 'i':
4670 case 'd':
4671 case 'u':
4672 case 'o':
4673 case 'x':
4674 case 'X':
4675 if (c == 'i')
4676 c = 'd';
4677 if (PyLong_Check(v)) {
4678 int ilen;
4679 temp = _PyString_FormatLong(v, flags,
4680 prec, c, &pbuf, &ilen);
4681 len = ilen;
4682 if (!temp)
4683 goto error;
4684 sign = 1;
4685 }
4686 else {
4687 pbuf = formatbuf;
4688 len = formatint(pbuf,
4689 sizeof(formatbuf),
4690 flags, prec, c, v);
4691 if (len < 0)
4692 goto error;
4693 sign = 1;
4694 }
4695 if (flags & F_ZERO)
4696 fill = '0';
4697 break;
4698 case 'e':
4699 case 'E':
4700 case 'f':
4701 case 'F':
4702 case 'g':
4703 case 'G':
4704 if (c == 'F')
4705 c = 'f';
4706 pbuf = formatbuf;
4707 len = formatfloat(pbuf, sizeof(formatbuf),
4708 flags, prec, c, v);
4709 if (len < 0)
4710 goto error;
4711 sign = 1;
4712 if (flags & F_ZERO)
4713 fill = '0';
4714 break;
4715 case 'c':
4716#ifdef Py_USING_UNICODE
4717 if (PyUnicode_Check(v)) {
4718 fmt = fmt_start;
4719 argidx = argidx_start;
4720 goto unicode;
4721 }
4722#endif
4723 pbuf = formatbuf;
4724 len = formatchar(pbuf, sizeof(formatbuf), v);
4725 if (len < 0)
4726 goto error;
4727 break;
4728 default:
4729 PyErr_Format(PyExc_ValueError,
4730 "unsupported format character '%c' (0x%x) "
4731 "at index %i",
4732 c, c,
4733 (int)(fmt - 1 - PyString_AsString(format)));
4734 goto error;
4735 }
4736 if (sign) {
4737 if (*pbuf == '-' || *pbuf == '+') {
4738 sign = *pbuf++;
4739 len--;
4740 }
4741 else if (flags & F_SIGN)
4742 sign = '+';
4743 else if (flags & F_BLANK)
4744 sign = ' ';
4745 else
4746 sign = 0;
4747 }
4748 if (width < len)
4749 width = len;
4750 if (rescnt - (sign != 0) < width) {
4751 reslen -= rescnt;
4752 rescnt = width + fmtcnt + 100;
4753 reslen += rescnt;
4754 if (reslen < 0) {
4755 Py_DECREF(result);
4756 return PyErr_NoMemory();
4757 }
4758 if (_PyString_Resize(&result, reslen) < 0)
4759 return NULL;
4760 res = PyString_AS_STRING(result)
4761 + reslen - rescnt;
4762 }
4763 if (sign) {
4764 if (fill != ' ')
4765 *res++ = sign;
4766 rescnt--;
4767 if (width > len)
4768 width--;
4769 }
4770 if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4771 assert(pbuf[0] == '0');
4772 assert(pbuf[1] == c);
4773 if (fill != ' ') {
4774 *res++ = *pbuf++;
4775 *res++ = *pbuf++;
4776 }
4777 rescnt -= 2;
4778 width -= 2;
4779 if (width < 0)
4780 width = 0;
4781 len -= 2;
4782 }
4783 if (width > len && !(flags & F_LJUST)) {
4784 do {
4785 --rescnt;
4786 *res++ = fill;
4787 } while (--width > len);
4788 }
4789 if (fill == ' ') {
4790 if (sign)
4791 *res++ = sign;
4792 if ((flags & F_ALT) &&
4793 (c == 'x' || c == 'X')) {
4794 assert(pbuf[0] == '0');
4795 assert(pbuf[1] == c);
4796 *res++ = *pbuf++;
4797 *res++ = *pbuf++;
4798 }
4799 }
4800 Py_MEMCPY(res, pbuf, len);
4801 res += len;
4802 rescnt -= len;
4803 while (--width >= len) {
4804 --rescnt;
4805 *res++ = ' ';
4806 }
4807 if (dict && (argidx < arglen) && c != '%') {
4808 PyErr_SetString(PyExc_TypeError,
4809 "not all arguments converted during string formatting");
4810 goto error;
4811 }
4812 Py_XDECREF(temp);
4813 } /* '%' */
4814 } /* until end */
4815 if (argidx < arglen && !dict) {
4816 PyErr_SetString(PyExc_TypeError,
4817 "not all arguments converted during string formatting");
4818 goto error;
4819 }
4820 if (args_owned) {
4821 Py_DECREF(args);
4822 }
4823 _PyString_Resize(&result, reslen - rescnt);
4824 return result;
4825
4826#ifdef Py_USING_UNICODE
4827 unicode:
4828 if (args_owned) {
4829 Py_DECREF(args);
4830 args_owned = 0;
4831 }
4832 /* Fiddle args right (remove the first argidx arguments) */
4833 if (PyTuple_Check(orig_args) && argidx > 0) {
4834 PyObject *v;
4835 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4836 v = PyTuple_New(n);
4837 if (v == NULL)
4838 goto error;
4839 while (--n >= 0) {
4840 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4841 Py_INCREF(w);
4842 PyTuple_SET_ITEM(v, n, w);
4843 }
4844 args = v;
4845 } else {
4846 Py_INCREF(orig_args);
4847 args = orig_args;
4848 }
4849 args_owned = 1;
4850 /* Take what we have of the result and let the Unicode formatting
4851 function format the rest of the input. */
4852 rescnt = res - PyString_AS_STRING(result);
4853 if (_PyString_Resize(&result, rescnt))
4854 goto error;
4855 fmtcnt = PyString_GET_SIZE(format) - \
4856 (fmt - PyString_AS_STRING(format));
4857 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4858 if (format == NULL)
4859 goto error;
4860 v = PyUnicode_Format(format, args);
4861 Py_DECREF(format);
4862 if (v == NULL)
4863 goto error;
4864 /* Paste what we have (result) to what the Unicode formatting
4865 function returned (v) and return the result (or error) */
4866 w = PyUnicode_Concat(result, v);
4867 Py_DECREF(result);
4868 Py_DECREF(v);
4869 Py_DECREF(args);
4870 return w;
4871#endif /* Py_USING_UNICODE */
4872
4873 error:
4874 Py_DECREF(result);
4875 if (args_owned) {
4876 Py_DECREF(args);
4877 }
4878 return NULL;
4879}
4880
4881void
4882PyString_InternInPlace(PyObject **p)
4883{
4884 register PyStringObject *s = (PyStringObject *)(*p);
4885 PyObject *t;
4886 if (s == NULL || !PyString_Check(s))
4887 Py_FatalError("PyString_InternInPlace: strings only please!");
4888 /* If it's a string subclass, we don't really know what putting
4889 it in the interned dict might do. */
4890 if (!PyString_CheckExact(s))
4891 return;
4892 if (PyString_CHECK_INTERNED(s))
4893 return;
4894 if (interned == NULL) {
4895 interned = PyDict_New();
4896 if (interned == NULL) {
4897 PyErr_Clear(); /* Don't leave an exception */
4898 return;
4899 }
4900 }
4901 t = PyDict_GetItem(interned, (PyObject *)s);
4902 if (t) {
4903 Py_INCREF(t);
4904 Py_DECREF(*p);
4905 *p = t;
4906 return;
4907 }
4908
4909 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4910 PyErr_Clear();
4911 return;
4912 }
4913 /* The two references in interned are not counted by refcnt.
4914 The string deallocator will take care of this */
4915 s->ob_refcnt -= 2;
4916 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4917}
4918
4919void
4920PyString_InternImmortal(PyObject **p)
4921{
4922 PyString_InternInPlace(p);
4923 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4924 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4925 Py_INCREF(*p);
4926 }
4927}
4928
4929
4930PyObject *
4931PyString_InternFromString(const char *cp)
4932{
4933 PyObject *s = PyString_FromString(cp);
4934 if (s == NULL)
4935 return NULL;
4936 PyString_InternInPlace(&s);
4937 return s;
4938}
4939
4940void
4941PyString_Fini(void)
4942{
4943 int i;
4944 for (i = 0; i < UCHAR_MAX + 1; i++) {
4945 Py_XDECREF(characters[i]);
4946 characters[i] = NULL;
4947 }
4948 Py_XDECREF(nullstring);
4949 nullstring = NULL;
4950}
4951
4952void _Py_ReleaseInternedStrings(void)
4953{
4954 PyObject *keys;
4955 PyStringObject *s;
4956 Py_ssize_t i, n;
4957
4958 if (interned == NULL || !PyDict_Check(interned))
4959 return;
4960 keys = PyDict_Keys(interned);
4961 if (keys == NULL || !PyList_Check(keys)) {
4962 PyErr_Clear();
4963 return;
4964 }
4965
4966 /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4967 detector, interned strings are not forcibly deallocated; rather, we
4968 give them their stolen references back, and then clear and DECREF
4969 the interned dict. */
4970
4971 fprintf(stderr, "releasing interned strings\n");
4972 n = PyList_GET_SIZE(keys);
4973 for (i = 0; i < n; i++) {
4974 s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4975 switch (s->ob_sstate) {
4976 case SSTATE_NOT_INTERNED:
4977 /* XXX Shouldn't happen */
4978 break;
4979 case SSTATE_INTERNED_IMMORTAL:
4980 s->ob_refcnt += 1;
4981 break;
4982 case SSTATE_INTERNED_MORTAL:
4983 s->ob_refcnt += 2;
4984 break;
4985 default:
4986 Py_FatalError("Inconsistent interned string state.");
4987 }
4988 s->ob_sstate = SSTATE_NOT_INTERNED;
4989 }
4990 Py_DECREF(keys);
4991 PyDict_Clear(interned);
4992 Py_DECREF(interned);
4993 interned = NULL;
4994}
Note: See TracBrowser for help on using the repository browser.