Changeset 388 for python/vendor/current/Objects/stringobject.c
- Timestamp:
- Mar 19, 2014, 11:11:30 AM (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
python/vendor/current/Objects/stringobject.c
r2 r388 5 5 #include "Python.h" 6 6 #include <ctype.h> 7 #include <stddef.h> 7 8 8 9 #ifdef COUNT_ALLOCS 9 int null_strings, one_strings;10 Py_ssize_t null_strings, one_strings; 10 11 #endif 11 12 … … 23 24 static PyObject *interned; 24 25 26 /* PyStringObject_SIZE gives the basic size of a string; any memory allocation 27 for a string of length n should request PyStringObject_SIZE + n bytes. 28 29 Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves 30 3 bytes per string allocation on a typical system. 31 */ 32 #define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1) 33 25 34 /* 26 For both PyString_FromString() and PyString_FromStringAndSize(), the27 parameter `size' denotes number of characters to allocate, not counting any28 null terminating character.29 30 35 For PyString_FromString(), the parameter `str' points to a null-terminated 31 36 string containing exactly `size' bytes. … … 44 49 The PyObject member `op->ob_size', which denotes the number of "extra 45 50 items" in a variable-size object, will contain the number of bytes 46 allocated for string data, not counting the null terminating character. It47 is therefore equal to the equal to the `size' parameter (for51 allocated for string data, not counting the null terminating character. 52 It is therefore equal to the `size' parameter (for 48 53 PyString_FromStringAndSize()) or the length of the string in the `str' 49 54 parameter (for PyString_FromString()). … … 52 57 PyString_FromStringAndSize(const char *str, Py_ssize_t size) 53 58 { 54 55 56 57 58 59 60 59 register PyStringObject *op; 60 if (size < 0) { 61 PyErr_SetString(PyExc_SystemError, 62 "Negative size passed to PyString_FromStringAndSize"); 63 return NULL; 64 } 65 if (size == 0 && (op = nullstring) != NULL) { 61 66 #ifdef COUNT_ALLOCS 62 67 null_strings++; 63 68 #endif 64 65 66 67 68 69 69 Py_INCREF(op); 70 return (PyObject *)op; 71 } 72 if (size == 1 && str != NULL && 73 (op = characters[*str & UCHAR_MAX]) != NULL) 74 { 70 75 #ifdef COUNT_ALLOCS 71 76 one_strings++; 72 77 #endif 73 74 75 76 77 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {78 79 80 81 82 83 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject)+ size);84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 78 Py_INCREF(op); 79 return (PyObject *)op; 80 } 81 82 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) { 83 PyErr_SetString(PyExc_OverflowError, "string is too large"); 84 return NULL; 85 } 86 87 /* Inline PyObject_NewVar */ 88 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size); 89 if (op == NULL) 90 return PyErr_NoMemory(); 91 PyObject_INIT_VAR(op, &PyString_Type, size); 92 op->ob_shash = -1; 93 op->ob_sstate = SSTATE_NOT_INTERNED; 94 if (str != NULL) 95 Py_MEMCPY(op->ob_sval, str, size); 96 op->ob_sval[size] = '\0'; 97 /* share short strings */ 98 if (size == 0) { 99 PyObject *t = (PyObject *)op; 100 PyString_InternInPlace(&t); 101 op = (PyStringObject *)t; 102 nullstring = op; 103 Py_INCREF(op); 104 } else if (size == 1 && str != NULL) { 105 PyObject *t = (PyObject *)op; 106 PyString_InternInPlace(&t); 107 op = (PyStringObject *)t; 108 characters[*str & UCHAR_MAX] = op; 109 Py_INCREF(op); 110 } 111 return (PyObject *) op; 107 112 } 108 113 … … 110 115 PyString_FromString(const char *str) 111 116 { 112 113 114 115 116 117 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {118 119 120 121 122 117 register size_t size; 118 register PyStringObject *op; 119 120 assert(str != NULL); 121 size = strlen(str); 122 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) { 123 PyErr_SetString(PyExc_OverflowError, 124 "string is too long for a Python string"); 125 return NULL; 126 } 127 if (size == 0 && (op = nullstring) != NULL) { 123 128 #ifdef COUNT_ALLOCS 124 129 null_strings++; 125 130 #endif 126 127 128 129 131 Py_INCREF(op); 132 return (PyObject *)op; 133 } 134 if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) { 130 135 #ifdef COUNT_ALLOCS 131 136 one_strings++; 132 137 #endif 133 134 135 136 137 138 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject)+ size);139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 138 Py_INCREF(op); 139 return (PyObject *)op; 140 } 141 142 /* Inline PyObject_NewVar */ 143 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size); 144 if (op == NULL) 145 return PyErr_NoMemory(); 146 PyObject_INIT_VAR(op, &PyString_Type, size); 147 op->ob_shash = -1; 148 op->ob_sstate = SSTATE_NOT_INTERNED; 149 Py_MEMCPY(op->ob_sval, str, size+1); 150 /* share short strings */ 151 if (size == 0) { 152 PyObject *t = (PyObject *)op; 153 PyString_InternInPlace(&t); 154 op = (PyStringObject *)t; 155 nullstring = op; 156 Py_INCREF(op); 157 } else if (size == 1) { 158 PyObject *t = (PyObject *)op; 159 PyString_InternInPlace(&t); 160 op = (PyStringObject *)t; 161 characters[*str & UCHAR_MAX] = op; 162 Py_INCREF(op); 163 } 164 return (PyObject *) op; 160 165 } 161 166 … … 163 168 PyString_FromFormatV(const char *format, va_list vargs) 164 169 { 165 166 167 168 169 170 va_list count; 171 Py_ssize_t n = 0; 172 const char* f; 173 char *s; 174 PyObject* string; 170 175 171 176 #ifdef VA_LIST_IS_ARRAY 172 177 Py_MEMCPY(count, vargs, sizeof(va_list)); 173 178 #else 174 179 #ifdef __va_copy 175 180 __va_copy(count, vargs); 176 181 #else 177 182 count = vargs; 178 183 #endif 179 184 #endif 180 /* step 1: figure out how large a buffer we need */ 181 for (f = format; *f; f++) { 182 if (*f == '%') { 183 const char* p = f; 184 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f))) 185 ; 186 187 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since 188 * they don't affect the amount of space we reserve. 189 */ 190 if ((*f == 'l' || *f == 'z') && 191 (f[1] == 'd' || f[1] == 'u')) 192 ++f; 193 194 switch (*f) { 195 case 'c': 196 (void)va_arg(count, int); 197 /* fall through... */ 198 case '%': 199 n++; 200 break; 201 case 'd': case 'u': case 'i': case 'x': 202 (void) va_arg(count, int); 203 /* 20 bytes is enough to hold a 64-bit 204 integer. Decimal takes the most space. 205 This isn't enough for octal. */ 206 n += 20; 207 break; 208 case 's': 209 s = va_arg(count, char*); 210 n += strlen(s); 211 break; 212 case 'p': 213 (void) va_arg(count, int); 214 /* maximum 64-bit pointer representation: 215 * 0xffffffffffffffff 216 * so 19 characters is enough. 217 * XXX I count 18 -- what's the extra for? 218 */ 219 n += 19; 220 break; 221 default: 222 /* if we stumble upon an unknown 223 formatting code, copy the rest of 224 the format string to the output 225 string. (we cannot just skip the 226 code, since there's no way to know 227 what's in the argument list) */ 228 n += strlen(p); 229 goto expand; 230 } 231 } else 232 n++; 233 } 185 /* step 1: figure out how large a buffer we need */ 186 for (f = format; *f; f++) { 187 if (*f == '%') { 188 #ifdef HAVE_LONG_LONG 189 int longlongflag = 0; 190 #endif 191 const char* p = f; 192 while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f))) 193 ; 194 195 /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since 196 * they don't affect the amount of space we reserve. 197 */ 198 if (*f == 'l') { 199 if (f[1] == 'd' || f[1] == 'u') { 200 ++f; 201 } 202 #ifdef HAVE_LONG_LONG 203 else if (f[1] == 'l' && 204 (f[2] == 'd' || f[2] == 'u')) { 205 longlongflag = 1; 206 f += 2; 207 } 208 #endif 209 } 210 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { 211 ++f; 212 } 213 214 switch (*f) { 215 case 'c': 216 (void)va_arg(count, int); 217 /* fall through... */ 218 case '%': 219 n++; 220 break; 221 case 'd': case 'u': case 'i': case 'x': 222 (void) va_arg(count, int); 223 #ifdef HAVE_LONG_LONG 224 /* Need at most 225 ceil(log10(256)*SIZEOF_LONG_LONG) digits, 226 plus 1 for the sign. 53/22 is an upper 227 bound for log10(256). */ 228 if (longlongflag) 229 n += 2 + (SIZEOF_LONG_LONG*53-1) / 22; 230 else 231 #endif 232 /* 20 bytes is enough to hold a 64-bit 233 integer. Decimal takes the most 234 space. This isn't enough for 235 octal. */ 236 n += 20; 237 238 break; 239 case 's': 240 s = va_arg(count, char*); 241 n += strlen(s); 242 break; 243 case 'p': 244 (void) va_arg(count, int); 245 /* maximum 64-bit pointer representation: 246 * 0xffffffffffffffff 247 * so 19 characters is enough. 248 * XXX I count 18 -- what's the extra for? 249 */ 250 n += 19; 251 break; 252 default: 253 /* if we stumble upon an unknown 254 formatting code, copy the rest of 255 the format string to the output 256 string. (we cannot just skip the 257 code, since there's no way to know 258 what's in the argument list) */ 259 n += strlen(p); 260 goto expand; 261 } 262 } else 263 n++; 264 } 234 265 expand: 235 /* step 2: fill the buffer */ 236 /* Since we've analyzed how much space we need for the worst case, 237 use sprintf directly instead of the slower PyOS_snprintf. */ 238 string = PyString_FromStringAndSize(NULL, n); 239 if (!string) 240 return NULL; 241 242 s = PyString_AsString(string); 243 244 for (f = format; *f; f++) { 245 if (*f == '%') { 246 const char* p = f++; 247 Py_ssize_t i; 248 int longflag = 0; 249 int size_tflag = 0; 250 /* parse the width.precision part (we're only 251 interested in the precision value, if any) */ 252 n = 0; 253 while (isdigit(Py_CHARMASK(*f))) 254 n = (n*10) + *f++ - '0'; 255 if (*f == '.') { 256 f++; 257 n = 0; 258 while (isdigit(Py_CHARMASK(*f))) 259 n = (n*10) + *f++ - '0'; 260 } 261 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f))) 262 f++; 263 /* handle the long flag, but only for %ld and %lu. 264 others can be added when necessary. */ 265 if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) { 266 longflag = 1; 267 ++f; 268 } 269 /* handle the size_t flag. */ 270 if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { 271 size_tflag = 1; 272 ++f; 273 } 274 275 switch (*f) { 276 case 'c': 277 *s++ = va_arg(vargs, int); 278 break; 279 case 'd': 280 if (longflag) 281 sprintf(s, "%ld", va_arg(vargs, long)); 282 else if (size_tflag) 283 sprintf(s, "%" PY_FORMAT_SIZE_T "d", 284 va_arg(vargs, Py_ssize_t)); 285 else 286 sprintf(s, "%d", va_arg(vargs, int)); 287 s += strlen(s); 288 break; 289 case 'u': 290 if (longflag) 291 sprintf(s, "%lu", 292 va_arg(vargs, unsigned long)); 293 else if (size_tflag) 294 sprintf(s, "%" PY_FORMAT_SIZE_T "u", 295 va_arg(vargs, size_t)); 296 else 297 sprintf(s, "%u", 298 va_arg(vargs, unsigned int)); 299 s += strlen(s); 300 break; 301 case 'i': 302 sprintf(s, "%i", va_arg(vargs, int)); 303 s += strlen(s); 304 break; 305 case 'x': 306 sprintf(s, "%x", va_arg(vargs, int)); 307 s += strlen(s); 308 break; 309 case 's': 310 p = va_arg(vargs, char*); 311 i = strlen(p); 312 if (n > 0 && i > n) 313 i = n; 314 Py_MEMCPY(s, p, i); 315 s += i; 316 break; 317 case 'p': 318 sprintf(s, "%p", va_arg(vargs, void*)); 319 /* %p is ill-defined: ensure leading 0x. */ 320 if (s[1] == 'X') 321 s[1] = 'x'; 322 else if (s[1] != 'x') { 323 memmove(s+2, s, strlen(s)+1); 324 s[0] = '0'; 325 s[1] = 'x'; 326 } 327 s += strlen(s); 328 break; 329 case '%': 330 *s++ = '%'; 331 break; 332 default: 333 strcpy(s, p); 334 s += strlen(s); 335 goto end; 336 } 337 } else 338 *s++ = *f; 339 } 266 /* step 2: fill the buffer */ 267 /* Since we've analyzed how much space we need for the worst case, 268 use sprintf directly instead of the slower PyOS_snprintf. */ 269 string = PyString_FromStringAndSize(NULL, n); 270 if (!string) 271 return NULL; 272 273 s = PyString_AsString(string); 274 275 for (f = format; *f; f++) { 276 if (*f == '%') { 277 const char* p = f++; 278 Py_ssize_t i; 279 int longflag = 0; 280 #ifdef HAVE_LONG_LONG 281 int longlongflag = 0; 282 #endif 283 int size_tflag = 0; 284 /* parse the width.precision part (we're only 285 interested in the precision value, if any) */ 286 n = 0; 287 while (isdigit(Py_CHARMASK(*f))) 288 n = (n*10) + *f++ - '0'; 289 if (*f == '.') { 290 f++; 291 n = 0; 292 while (isdigit(Py_CHARMASK(*f))) 293 n = (n*10) + *f++ - '0'; 294 } 295 while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f))) 296 f++; 297 /* Handle %ld, %lu, %lld and %llu. */ 298 if (*f == 'l') { 299 if (f[1] == 'd' || f[1] == 'u') { 300 longflag = 1; 301 ++f; 302 } 303 #ifdef HAVE_LONG_LONG 304 else if (f[1] == 'l' && 305 (f[2] == 'd' || f[2] == 'u')) { 306 longlongflag = 1; 307 f += 2; 308 } 309 #endif 310 } 311 /* handle the size_t flag. */ 312 else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) { 313 size_tflag = 1; 314 ++f; 315 } 316 317 switch (*f) { 318 case 'c': 319 *s++ = va_arg(vargs, int); 320 break; 321 case 'd': 322 if (longflag) 323 sprintf(s, "%ld", va_arg(vargs, long)); 324 #ifdef HAVE_LONG_LONG 325 else if (longlongflag) 326 sprintf(s, "%" PY_FORMAT_LONG_LONG "d", 327 va_arg(vargs, PY_LONG_LONG)); 328 #endif 329 else if (size_tflag) 330 sprintf(s, "%" PY_FORMAT_SIZE_T "d", 331 va_arg(vargs, Py_ssize_t)); 332 else 333 sprintf(s, "%d", va_arg(vargs, int)); 334 s += strlen(s); 335 break; 336 case 'u': 337 if (longflag) 338 sprintf(s, "%lu", 339 va_arg(vargs, unsigned long)); 340 #ifdef HAVE_LONG_LONG 341 else if (longlongflag) 342 sprintf(s, "%" PY_FORMAT_LONG_LONG "u", 343 va_arg(vargs, PY_LONG_LONG)); 344 #endif 345 else if (size_tflag) 346 sprintf(s, "%" PY_FORMAT_SIZE_T "u", 347 va_arg(vargs, size_t)); 348 else 349 sprintf(s, "%u", 350 va_arg(vargs, unsigned int)); 351 s += strlen(s); 352 break; 353 case 'i': 354 sprintf(s, "%i", va_arg(vargs, int)); 355 s += strlen(s); 356 break; 357 case 'x': 358 sprintf(s, "%x", va_arg(vargs, int)); 359 s += strlen(s); 360 break; 361 case 's': 362 p = va_arg(vargs, char*); 363 i = strlen(p); 364 if (n > 0 && i > n) 365 i = n; 366 Py_MEMCPY(s, p, i); 367 s += i; 368 break; 369 case 'p': 370 sprintf(s, "%p", va_arg(vargs, void*)); 371 /* %p is ill-defined: ensure leading 0x. */ 372 if (s[1] == 'X') 373 s[1] = 'x'; 374 else if (s[1] != 'x') { 375 memmove(s+2, s, strlen(s)+1); 376 s[0] = '0'; 377 s[1] = 'x'; 378 } 379 s += strlen(s); 380 break; 381 case '%': 382 *s++ = '%'; 383 break; 384 default: 385 strcpy(s, p); 386 s += strlen(s); 387 goto end; 388 } 389 } else 390 *s++ = *f; 391 } 340 392 341 393 end: 342 _PyString_Resize(&string, s - PyString_AS_STRING(string)); 343 return string; 394 if (_PyString_Resize(&string, s - PyString_AS_STRING(string))) 395 return NULL; 396 return string; 344 397 } 345 398 … … 347 400 PyString_FromFormat(const char *format, ...) 348 401 { 349 350 402 PyObject* ret; 403 va_list vargs; 351 404 352 405 #ifdef HAVE_STDARG_PROTOTYPES 353 406 va_start(vargs, format); 354 407 #else 355 408 va_start(vargs); 356 409 #endif 357 358 359 410 ret = PyString_FromFormatV(format, vargs); 411 va_end(vargs); 412 return ret; 360 413 } 361 414 362 415 363 416 PyObject *PyString_Decode(const char *s, 364 365 366 417 Py_ssize_t size, 418 const char *encoding, 419 const char *errors) 367 420 { 368 421 PyObject *v, *str; … … 370 423 str = PyString_FromStringAndSize(s, size); 371 424 if (str == NULL) 372 425 return NULL; 373 426 v = PyString_AsDecodedString(str, encoding, errors); 374 427 Py_DECREF(str); … … 377 430 378 431 PyObject *PyString_AsDecodedObject(PyObject *str, 379 380 432 const char *encoding, 433 const char *errors) 381 434 { 382 435 PyObject *v; … … 389 442 if (encoding == NULL) { 390 443 #ifdef Py_USING_UNICODE 391 444 encoding = PyUnicode_GetDefaultEncoding(); 392 445 #else 393 394 446 PyErr_SetString(PyExc_ValueError, "no encoding specified"); 447 goto onError; 395 448 #endif 396 449 } … … 408 461 409 462 PyObject *PyString_AsDecodedString(PyObject *str, 410 411 463 const char *encoding, 464 const char *errors) 412 465 { 413 466 PyObject *v; … … 420 473 /* Convert Unicode to a string using the default encoding */ 421 474 if (PyUnicode_Check(v)) { 422 423 424 425 426 475 PyObject *temp = v; 476 v = PyUnicode_AsEncodedString(v, NULL, NULL); 477 Py_DECREF(temp); 478 if (v == NULL) 479 goto onError; 427 480 } 428 481 #endif … … 442 495 443 496 PyObject *PyString_Encode(const char *s, 444 445 446 497 Py_ssize_t size, 498 const char *encoding, 499 const char *errors) 447 500 { 448 501 PyObject *v, *str; … … 450 503 str = PyString_FromStringAndSize(s, size); 451 504 if (str == NULL) 452 505 return NULL; 453 506 v = PyString_AsEncodedString(str, encoding, errors); 454 507 Py_DECREF(str); … … 457 510 458 511 PyObject *PyString_AsEncodedObject(PyObject *str, 459 460 512 const char *encoding, 513 const char *errors) 461 514 { 462 515 PyObject *v; … … 469 522 if (encoding == NULL) { 470 523 #ifdef Py_USING_UNICODE 471 524 encoding = PyUnicode_GetDefaultEncoding(); 472 525 #else 473 474 526 PyErr_SetString(PyExc_ValueError, "no encoding specified"); 527 goto onError; 475 528 #endif 476 529 } … … 488 541 489 542 PyObject *PyString_AsEncodedString(PyObject *str, 490 491 543 const char *encoding, 544 const char *errors) 492 545 { 493 546 PyObject *v; … … 500 553 /* Convert Unicode to a string using the default encoding */ 501 554 if (PyUnicode_Check(v)) { 502 503 504 505 506 555 PyObject *temp = v; 556 v = PyUnicode_AsEncodedString(v, NULL, NULL); 557 Py_DECREF(temp); 558 if (v == NULL) 559 goto onError; 507 560 } 508 561 #endif … … 524 577 string_dealloc(PyObject *op) 525 578 { 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 579 switch (PyString_CHECK_INTERNED(op)) { 580 case SSTATE_NOT_INTERNED: 581 break; 582 583 case SSTATE_INTERNED_MORTAL: 584 /* revive dead object temporarily for DelItem */ 585 Py_REFCNT(op) = 3; 586 if (PyDict_DelItem(interned, op) != 0) 587 Py_FatalError( 588 "deletion of interned string failed"); 589 break; 590 591 case SSTATE_INTERNED_IMMORTAL: 592 Py_FatalError("Immortal interned string died."); 593 594 default: 595 Py_FatalError("Inconsistent interned string state."); 596 } 597 Py_TYPE(op)->tp_free(op); 545 598 } 546 599 … … 551 604 552 605 PyObject *PyString_DecodeEscape(const char *s, 553 554 555 556 557 { 558 559 560 561 562 563 564 565 566 567 568 569 570 606 Py_ssize_t len, 607 const char *errors, 608 Py_ssize_t unicode, 609 const char *recode_encoding) 610 { 611 int c; 612 char *p, *buf; 613 const char *end; 614 PyObject *v; 615 Py_ssize_t newlen = recode_encoding ? 4*len:len; 616 v = PyString_FromStringAndSize((char *)NULL, newlen); 617 if (v == NULL) 618 return NULL; 619 p = buf = PyString_AsString(v); 620 end = s + len; 621 while (s < end) { 622 if (*s != '\\') { 623 non_esc: 571 624 #ifdef Py_USING_UNICODE 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 if (!w)goto failed;588 589 590 591 592 593 594 595 596 597 598 599 625 if (recode_encoding && (*s & 0x80)) { 626 PyObject *u, *w; 627 char *r; 628 const char* t; 629 Py_ssize_t rn; 630 t = s; 631 /* Decode non-ASCII bytes as UTF-8. */ 632 while (t < end && (*t & 0x80)) t++; 633 u = PyUnicode_DecodeUTF8(s, t - s, errors); 634 if(!u) goto failed; 635 636 /* Recode them in target encoding. */ 637 w = PyUnicode_AsEncodedString( 638 u, recode_encoding, errors); 639 Py_DECREF(u); 640 if (!w) goto failed; 641 642 /* Append bytes to output buffer. */ 643 assert(PyString_Check(w)); 644 r = PyString_AS_STRING(w); 645 rn = PyString_GET_SIZE(w); 646 Py_MEMCPY(p, r, rn); 647 p += rn; 648 Py_DECREF(w); 649 s = t; 650 } else { 651 *p++ = *s++; 652 } 600 653 #else 601 654 *p++ = *s++; 602 655 #endif 603 continue; 604 } 605 s++; 606 if (s==end) { 607 PyErr_SetString(PyExc_ValueError, 608 "Trailing \\ in string"); 609 goto failed; 610 } 611 switch (*s++) { 612 /* XXX This assumes ASCII! */ 613 case '\n': break; 614 case '\\': *p++ = '\\'; break; 615 case '\'': *p++ = '\''; break; 616 case '\"': *p++ = '\"'; break; 617 case 'b': *p++ = '\b'; break; 618 case 'f': *p++ = '\014'; break; /* FF */ 619 case 't': *p++ = '\t'; break; 620 case 'n': *p++ = '\n'; break; 621 case 'r': *p++ = '\r'; break; 622 case 'v': *p++ = '\013'; break; /* VT */ 623 case 'a': *p++ = '\007'; break; /* BEL, not classic C */ 624 case '0': case '1': case '2': case '3': 625 case '4': case '5': case '6': case '7': 626 c = s[-1] - '0'; 627 if (s < end && '0' <= *s && *s <= '7') { 628 c = (c<<3) + *s++ - '0'; 629 if (s < end && '0' <= *s && *s <= '7') 630 c = (c<<3) + *s++ - '0'; 631 } 632 *p++ = c; 633 break; 634 case 'x': 635 if (s+1 < end && 636 isxdigit(Py_CHARMASK(s[0])) && 637 isxdigit(Py_CHARMASK(s[1]))) 638 { 639 unsigned int x = 0; 640 c = Py_CHARMASK(*s); 641 s++; 642 if (isdigit(c)) 643 x = c - '0'; 644 else if (islower(c)) 645 x = 10 + c - 'a'; 646 else 647 x = 10 + c - 'A'; 648 x = x << 4; 649 c = Py_CHARMASK(*s); 650 s++; 651 if (isdigit(c)) 652 x += c - '0'; 653 else if (islower(c)) 654 x += 10 + c - 'a'; 655 else 656 x += 10 + c - 'A'; 657 *p++ = x; 658 break; 659 } 660 if (!errors || strcmp(errors, "strict") == 0) { 661 PyErr_SetString(PyExc_ValueError, 662 "invalid \\x escape"); 663 goto failed; 664 } 665 if (strcmp(errors, "replace") == 0) { 666 *p++ = '?'; 667 } else if (strcmp(errors, "ignore") == 0) 668 /* do nothing */; 669 else { 670 PyErr_Format(PyExc_ValueError, 671 "decoding error; " 672 "unknown error handling code: %.400s", 673 errors); 674 goto failed; 675 } 656 continue; 657 } 658 s++; 659 if (s==end) { 660 PyErr_SetString(PyExc_ValueError, 661 "Trailing \\ in string"); 662 goto failed; 663 } 664 switch (*s++) { 665 /* XXX This assumes ASCII! */ 666 case '\n': break; 667 case '\\': *p++ = '\\'; break; 668 case '\'': *p++ = '\''; break; 669 case '\"': *p++ = '\"'; break; 670 case 'b': *p++ = '\b'; break; 671 case 'f': *p++ = '\014'; break; /* FF */ 672 case 't': *p++ = '\t'; break; 673 case 'n': *p++ = '\n'; break; 674 case 'r': *p++ = '\r'; break; 675 case 'v': *p++ = '\013'; break; /* VT */ 676 case 'a': *p++ = '\007'; break; /* BEL, not classic C */ 677 case '0': case '1': case '2': case '3': 678 case '4': case '5': case '6': case '7': 679 c = s[-1] - '0'; 680 if (s < end && '0' <= *s && *s <= '7') { 681 c = (c<<3) + *s++ - '0'; 682 if (s < end && '0' <= *s && *s <= '7') 683 c = (c<<3) + *s++ - '0'; 684 } 685 *p++ = c; 686 break; 687 case 'x': 688 if (s+1 < end && 689 isxdigit(Py_CHARMASK(s[0])) && 690 isxdigit(Py_CHARMASK(s[1]))) 691 { 692 unsigned int x = 0; 693 c = Py_CHARMASK(*s); 694 s++; 695 if (isdigit(c)) 696 x = c - '0'; 697 else if (islower(c)) 698 x = 10 + c - 'a'; 699 else 700 x = 10 + c - 'A'; 701 x = x << 4; 702 c = Py_CHARMASK(*s); 703 s++; 704 if (isdigit(c)) 705 x += c - '0'; 706 else if (islower(c)) 707 x += 10 + c - 'a'; 708 else 709 x += 10 + c - 'A'; 710 *p++ = x; 711 break; 712 } 713 if (!errors || strcmp(errors, "strict") == 0) { 714 PyErr_SetString(PyExc_ValueError, 715 "invalid \\x escape"); 716 goto failed; 717 } 718 if (strcmp(errors, "replace") == 0) { 719 *p++ = '?'; 720 } else if (strcmp(errors, "ignore") == 0) 721 /* do nothing */; 722 else { 723 PyErr_Format(PyExc_ValueError, 724 "decoding error; " 725 "unknown error handling code: %.400s", 726 errors); 727 goto failed; 728 } 729 /* skip \x */ 730 if (s < end && isxdigit(Py_CHARMASK(s[0]))) 731 s++; /* and a hexdigit */ 732 break; 676 733 #ifndef Py_USING_UNICODE 677 678 679 680 681 682 683 684 685 734 case 'u': 735 case 'U': 736 case 'N': 737 if (unicode) { 738 PyErr_SetString(PyExc_ValueError, 739 "Unicode escapes not legal " 740 "when Unicode disabled"); 741 goto failed; 742 } 686 743 #endif 687 688 689 690 goto non_esc; /* an arbitry number of unescaped691 692 693 694 if (p-buf < newlen)695 _PyString_Resize(&v, p - buf);696 744 default: 745 *p++ = '\\'; 746 s--; 747 goto non_esc; /* an arbitrary number of unescaped 748 UTF-8 bytes may follow. */ 749 } 750 } 751 if (p-buf < newlen && _PyString_Resize(&v, p - buf)) 752 goto failed; 753 return v; 697 754 failed: 698 699 755 Py_DECREF(v); 756 return NULL; 700 757 } 701 758 … … 706 763 string_getsize(register PyObject *op) 707 764 { 708 709 710 711 712 765 char *s; 766 Py_ssize_t len; 767 if (PyString_AsStringAndSize(op, &s, &len)) 768 return -1; 769 return len; 713 770 } 714 771 … … 716 773 string_getbuffer(register PyObject *op) 717 774 { 718 719 720 721 722 775 char *s; 776 Py_ssize_t len; 777 if (PyString_AsStringAndSize(op, &s, &len)) 778 return NULL; 779 return s; 723 780 } 724 781 … … 726 783 PyString_Size(register PyObject *op) 727 784 { 728 729 730 785 if (!PyString_Check(op)) 786 return string_getsize(op); 787 return Py_SIZE(op); 731 788 } 732 789 … … 734 791 PyString_AsString(register PyObject *op) 735 792 { 736 737 738 793 if (!PyString_Check(op)) 794 return string_getbuffer(op); 795 return ((PyStringObject *)op) -> ob_sval; 739 796 } 740 797 741 798 int 742 799 PyString_AsStringAndSize(register PyObject *obj, 743 744 745 { 746 747 748 749 750 751 800 register char **s, 801 register Py_ssize_t *len) 802 { 803 if (s == NULL) { 804 PyErr_BadInternalCall(); 805 return -1; 806 } 807 808 if (!PyString_Check(obj)) { 752 809 #ifdef Py_USING_UNICODE 753 754 755 756 757 758 810 if (PyUnicode_Check(obj)) { 811 obj = _PyUnicode_AsDefaultEncodedString(obj, NULL); 812 if (obj == NULL) 813 return -1; 814 } 815 else 759 816 #endif 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 817 { 818 PyErr_Format(PyExc_TypeError, 819 "expected string or Unicode object, " 820 "%.200s found", Py_TYPE(obj)->tp_name); 821 return -1; 822 } 823 } 824 825 *s = PyString_AS_STRING(obj); 826 if (len != NULL) 827 *len = PyString_GET_SIZE(obj); 828 else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) { 829 PyErr_SetString(PyExc_TypeError, 830 "expected string without null bytes"); 831 return -1; 832 } 833 return 0; 777 834 } 778 835 … … 786 843 #include "stringlib/find.h" 787 844 #include "stringlib/partition.h" 845 #include "stringlib/split.h" 788 846 789 847 #define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping … … 795 853 string_print(PyStringObject *op, FILE *fp, int flags) 796 854 { 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 855 Py_ssize_t i, str_len; 856 char c; 857 int quote; 858 859 /* XXX Ought to check for interrupts when writing long strings */ 860 if (! PyString_CheckExact(op)) { 861 int ret; 862 /* A str subclass may have its own __str__ method. */ 863 op = (PyStringObject *) PyObject_Str((PyObject *)op); 864 if (op == NULL) 865 return -1; 866 ret = string_print(op, fp, flags); 867 Py_DECREF(op); 868 return ret; 869 } 870 if (flags & Py_PRINT_RAW) { 871 char *data = op->ob_sval; 872 Py_ssize_t size = Py_SIZE(op); 873 Py_BEGIN_ALLOW_THREADS 874 while (size > INT_MAX) { 875 /* Very long strings cannot be written atomically. 876 * But don't write exactly INT_MAX bytes at a time 877 * to avoid memory aligment issues. 878 */ 879 const int chunk_size = INT_MAX & ~0x3FFF; 880 fwrite(data, 1, chunk_size, fp); 881 data += chunk_size; 882 size -= chunk_size; 883 } 826 884 #ifdef __VMS 827 if (size) fwrite(data, (int)size, 1, fp);885 if (size) fwrite(data, (size_t)size, 1, fp); 828 886 #else 829 fwrite(data, 1, (int)size, fp);887 fwrite(data, 1, (size_t)size, fp); 830 888 #endif 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 889 Py_END_ALLOW_THREADS 890 return 0; 891 } 892 893 /* figure out which quote to use; single is preferred */ 894 quote = '\''; 895 if (memchr(op->ob_sval, '\'', Py_SIZE(op)) && 896 !memchr(op->ob_sval, '"', Py_SIZE(op))) 897 quote = '"'; 898 899 str_len = Py_SIZE(op); 900 Py_BEGIN_ALLOW_THREADS 901 fputc(quote, fp); 902 for (i = 0; i < str_len; i++) { 903 /* Since strings are immutable and the caller should have a 904 reference, accessing the interal buffer should not be an issue 905 with the GIL released. */ 906 c = op->ob_sval[i]; 907 if (c == quote || c == '\\') 908 fprintf(fp, "\\%c", c); 909 else if (c == '\t') 910 fprintf(fp, "\\t"); 911 else if (c == '\n') 912 fprintf(fp, "\\n"); 913 else if (c == '\r') 914 fprintf(fp, "\\r"); 915 else if (c < ' ' || c >= 0x7f) 916 fprintf(fp, "\\x%02x", c & 0xff); 917 else 918 fputc(c, fp); 919 } 920 fputc(quote, fp); 921 Py_END_ALLOW_THREADS 922 return 0; 865 923 } 866 924 … … 868 926 PyString_Repr(PyObject *obj, int smartquotes) 869 927 { 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 _PyString_Resize( 924 &v, (p - PyString_AS_STRING(v)));925 926 928 register PyStringObject* op = (PyStringObject*) obj; 929 size_t newsize = 2 + 4 * Py_SIZE(op); 930 PyObject *v; 931 if (newsize > PY_SSIZE_T_MAX || newsize / 4 != Py_SIZE(op)) { 932 PyErr_SetString(PyExc_OverflowError, 933 "string is too large to make repr"); 934 return NULL; 935 } 936 v = PyString_FromStringAndSize((char *)NULL, newsize); 937 if (v == NULL) { 938 return NULL; 939 } 940 else { 941 register Py_ssize_t i; 942 register char c; 943 register char *p; 944 int quote; 945 946 /* figure out which quote to use; single is preferred */ 947 quote = '\''; 948 if (smartquotes && 949 memchr(op->ob_sval, '\'', Py_SIZE(op)) && 950 !memchr(op->ob_sval, '"', Py_SIZE(op))) 951 quote = '"'; 952 953 p = PyString_AS_STRING(v); 954 *p++ = quote; 955 for (i = 0; i < Py_SIZE(op); i++) { 956 /* There's at least enough room for a hex escape 957 and a closing quote. */ 958 assert(newsize - (p - PyString_AS_STRING(v)) >= 5); 959 c = op->ob_sval[i]; 960 if (c == quote || c == '\\') 961 *p++ = '\\', *p++ = c; 962 else if (c == '\t') 963 *p++ = '\\', *p++ = 't'; 964 else if (c == '\n') 965 *p++ = '\\', *p++ = 'n'; 966 else if (c == '\r') 967 *p++ = '\\', *p++ = 'r'; 968 else if (c < ' ' || c >= 0x7f) { 969 /* For performance, we don't want to call 970 PyOS_snprintf here (extra layers of 971 function call). */ 972 sprintf(p, "\\x%02x", c & 0xff); 973 p += 4; 974 } 975 else 976 *p++ = c; 977 } 978 assert(newsize - (p - PyString_AS_STRING(v)) >= 1); 979 *p++ = quote; 980 *p = '\0'; 981 if (_PyString_Resize(&v, (p - PyString_AS_STRING(v)))) 982 return NULL; 983 return v; 984 } 927 985 } 928 986 … … 930 988 string_repr(PyObject *op) 931 989 { 932 990 return PyString_Repr(op, 1); 933 991 } 934 992 … … 936 994 string_str(PyObject *s) 937 995 { 938 939 940 941 942 943 944 945 946 947 996 assert(PyString_Check(s)); 997 if (PyString_CheckExact(s)) { 998 Py_INCREF(s); 999 return s; 1000 } 1001 else { 1002 /* Subtype -- return genuine string with the same value. */ 1003 PyStringObject *t = (PyStringObject *) s; 1004 return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t)); 1005 } 948 1006 } 949 1007 … … 951 1009 string_length(PyStringObject *a) 952 1010 { 953 1011 return Py_SIZE(a); 954 1012 } 955 1013 … … 957 1015 string_concat(register PyStringObject *a, register PyObject *bb) 958 1016 { 959 960 961 1017 register Py_ssize_t size; 1018 register PyStringObject *op; 1019 if (!PyString_Check(bb)) { 962 1020 #ifdef Py_USING_UNICODE 963 964 1021 if (PyUnicode_Check(bb)) 1022 return PyUnicode_Concat((PyObject *)a, bb); 965 1023 #endif 966 967 968 969 970 971 972 1024 if (PyByteArray_Check(bb)) 1025 return PyByteArray_Concat((PyObject *)a, bb); 1026 PyErr_Format(PyExc_TypeError, 1027 "cannot concatenate 'str' and '%.200s' objects", 1028 Py_TYPE(bb)->tp_name); 1029 return NULL; 1030 } 973 1031 #define b ((PyStringObject *)bb) 974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992 993 994 995 996 997 if (size > PY_SSIZE_T_MAX - sizeof(PyStringObject)) {998 999 1000 1001 1002 op = (PyStringObject *)PyObject_MALLOC(sizeof(PyStringObject)+ size);1003 1004 1005 1006 1007 1008 1009 1010 1011 1032 /* Optimize cases with empty left or right operand */ 1033 if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) && 1034 PyString_CheckExact(a) && PyString_CheckExact(b)) { 1035 if (Py_SIZE(a) == 0) { 1036 Py_INCREF(bb); 1037 return bb; 1038 } 1039 Py_INCREF(a); 1040 return (PyObject *)a; 1041 } 1042 size = Py_SIZE(a) + Py_SIZE(b); 1043 /* Check that string sizes are not negative, to prevent an 1044 overflow in cases where we are passed incorrectly-created 1045 strings with negative lengths (due to a bug in other code). 1046 */ 1047 if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 || 1048 Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) { 1049 PyErr_SetString(PyExc_OverflowError, 1050 "strings are too large to concat"); 1051 return NULL; 1052 } 1053 1054 /* Inline PyObject_NewVar */ 1055 if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) { 1056 PyErr_SetString(PyExc_OverflowError, 1057 "strings are too large to concat"); 1058 return NULL; 1059 } 1060 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size); 1061 if (op == NULL) 1062 return PyErr_NoMemory(); 1063 PyObject_INIT_VAR(op, &PyString_Type, size); 1064 op->ob_shash = -1; 1065 op->ob_sstate = SSTATE_NOT_INTERNED; 1066 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a)); 1067 Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b)); 1068 op->ob_sval[size] = '\0'; 1069 return (PyObject *) op; 1012 1070 #undef b 1013 1071 } … … 1016 1074 string_repeat(register PyStringObject *a, register Py_ssize_t n) 1017 1075 { 1018 register Py_ssize_t i; 1019 register Py_ssize_t j; 1020 register Py_ssize_t size; 1021 register PyStringObject *op; 1022 size_t nbytes; 1023 if (n < 0) 1024 n = 0; 1025 /* watch out for overflows: the size can overflow int, 1026 * and the # of bytes needed can overflow size_t 1027 */ 1028 size = Py_SIZE(a) * n; 1029 if (n && size / n != Py_SIZE(a)) { 1030 PyErr_SetString(PyExc_OverflowError, 1031 "repeated string is too long"); 1032 return NULL; 1033 } 1034 if (size == Py_SIZE(a) && PyString_CheckExact(a)) { 1035 Py_INCREF(a); 1036 return (PyObject *)a; 1037 } 1038 nbytes = (size_t)size; 1039 if (nbytes + sizeof(PyStringObject) <= nbytes) { 1040 PyErr_SetString(PyExc_OverflowError, 1041 "repeated string is too long"); 1042 return NULL; 1043 } 1044 op = (PyStringObject *) 1045 PyObject_MALLOC(sizeof(PyStringObject) + nbytes); 1046 if (op == NULL) 1047 return PyErr_NoMemory(); 1048 PyObject_INIT_VAR(op, &PyString_Type, size); 1049 op->ob_shash = -1; 1050 op->ob_sstate = SSTATE_NOT_INTERNED; 1051 op->ob_sval[size] = '\0'; 1052 if (Py_SIZE(a) == 1 && n > 0) { 1053 memset(op->ob_sval, a->ob_sval[0] , n); 1054 return (PyObject *) op; 1055 } 1056 i = 0; 1057 if (i < size) { 1058 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a)); 1059 i = Py_SIZE(a); 1060 } 1061 while (i < size) { 1062 j = (i <= size-i) ? i : size-i; 1063 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j); 1064 i += j; 1065 } 1066 return (PyObject *) op; 1076 register Py_ssize_t i; 1077 register Py_ssize_t j; 1078 register Py_ssize_t size; 1079 register PyStringObject *op; 1080 size_t nbytes; 1081 if (n < 0) 1082 n = 0; 1083 /* watch out for overflows: the size can overflow int, 1084 * and the # of bytes needed can overflow size_t 1085 */ 1086 size = Py_SIZE(a) * n; 1087 if (n && size / n != Py_SIZE(a)) { 1088 PyErr_SetString(PyExc_OverflowError, 1089 "repeated string is too long"); 1090 return NULL; 1091 } 1092 if (size == Py_SIZE(a) && PyString_CheckExact(a)) { 1093 Py_INCREF(a); 1094 return (PyObject *)a; 1095 } 1096 nbytes = (size_t)size; 1097 if (nbytes + PyStringObject_SIZE <= nbytes) { 1098 PyErr_SetString(PyExc_OverflowError, 1099 "repeated string is too long"); 1100 return NULL; 1101 } 1102 op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes); 1103 if (op == NULL) 1104 return PyErr_NoMemory(); 1105 PyObject_INIT_VAR(op, &PyString_Type, size); 1106 op->ob_shash = -1; 1107 op->ob_sstate = SSTATE_NOT_INTERNED; 1108 op->ob_sval[size] = '\0'; 1109 if (Py_SIZE(a) == 1 && n > 0) { 1110 memset(op->ob_sval, a->ob_sval[0] , n); 1111 return (PyObject *) op; 1112 } 1113 i = 0; 1114 if (i < size) { 1115 Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a)); 1116 i = Py_SIZE(a); 1117 } 1118 while (i < size) { 1119 j = (i <= size-i) ? i : size-i; 1120 Py_MEMCPY(op->ob_sval+i, op->ob_sval, j); 1121 i += j; 1122 } 1123 return (PyObject *) op; 1067 1124 } 1068 1125 … … 1071 1128 static PyObject * 1072 1129 string_slice(register PyStringObject *a, register Py_ssize_t i, 1073 1130 register Py_ssize_t j) 1074 1131 /* j -- may be negative! */ 1075 1132 { 1076 1077 1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1133 if (i < 0) 1134 i = 0; 1135 if (j < 0) 1136 j = 0; /* Avoid signed/unsigned bug in next line */ 1137 if (j > Py_SIZE(a)) 1138 j = Py_SIZE(a); 1139 if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) { 1140 /* It's the same as a */ 1141 Py_INCREF(a); 1142 return (PyObject *)a; 1143 } 1144 if (j < i) 1145 j = i; 1146 return PyString_FromStringAndSize(a->ob_sval + i, j-i); 1090 1147 } 1091 1148 … … 1093 1150 string_contains(PyObject *str_obj, PyObject *sub_obj) 1094 1151 { 1095 1152 if (!PyString_CheckExact(sub_obj)) { 1096 1153 #ifdef Py_USING_UNICODE 1097 1098 1154 if (PyUnicode_Check(sub_obj)) 1155 return PyUnicode_Contains(str_obj, sub_obj); 1099 1156 #endif 1100 1101 1102 1103 1104 1105 1106 1107 1108 1157 if (!PyString_Check(sub_obj)) { 1158 PyErr_Format(PyExc_TypeError, 1159 "'in <string>' requires string as left operand, " 1160 "not %.200s", Py_TYPE(sub_obj)->tp_name); 1161 return -1; 1162 } 1163 } 1164 1165 return stringlib_contains_obj(str_obj, sub_obj); 1109 1166 } 1110 1167 … … 1112 1169 string_item(PyStringObject *a, register Py_ssize_t i) 1113 1170 { 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123 1124 1171 char pchar; 1172 PyObject *v; 1173 if (i < 0 || i >= Py_SIZE(a)) { 1174 PyErr_SetString(PyExc_IndexError, "string index out of range"); 1175 return NULL; 1176 } 1177 pchar = a->ob_sval[i]; 1178 v = (PyObject *)characters[pchar & UCHAR_MAX]; 1179 if (v == NULL) 1180 v = PyString_FromStringAndSize(&pchar, 1); 1181 else { 1125 1182 #ifdef COUNT_ALLOCS 1126 1183 one_strings++; 1127 1184 #endif 1128 1129 1130 1185 Py_INCREF(v); 1186 } 1187 return v; 1131 1188 } 1132 1189 … … 1134 1191 string_richcompare(PyStringObject *a, PyStringObject *b, int op) 1135 1192 { 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147 1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1193 int c; 1194 Py_ssize_t len_a, len_b; 1195 Py_ssize_t min_len; 1196 PyObject *result; 1197 1198 /* Make sure both arguments are strings. */ 1199 if (!(PyString_Check(a) && PyString_Check(b))) { 1200 result = Py_NotImplemented; 1201 goto out; 1202 } 1203 if (a == b) { 1204 switch (op) { 1205 case Py_EQ:case Py_LE:case Py_GE: 1206 result = Py_True; 1207 goto out; 1208 case Py_NE:case Py_LT:case Py_GT: 1209 result = Py_False; 1210 goto out; 1211 } 1212 } 1213 if (op == Py_EQ) { 1214 /* Supporting Py_NE here as well does not save 1215 much time, since Py_NE is rarely used. */ 1216 if (Py_SIZE(a) == Py_SIZE(b) 1217 && (a->ob_sval[0] == b->ob_sval[0] 1218 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) { 1219 result = Py_True; 1220 } else { 1221 result = Py_False; 1222 } 1223 goto out; 1224 } 1225 len_a = Py_SIZE(a); len_b = Py_SIZE(b); 1226 min_len = (len_a < len_b) ? len_a : len_b; 1227 if (min_len > 0) { 1228 c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval); 1229 if (c==0) 1230 c = memcmp(a->ob_sval, b->ob_sval, min_len); 1231 } else 1232 c = 0; 1233 if (c == 0) 1234 c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0; 1235 switch (op) { 1236 case Py_LT: c = c < 0; break; 1237 case Py_LE: c = c <= 0; break; 1238 case Py_EQ: assert(0); break; /* unreachable */ 1239 case Py_NE: c = c != 0; break; 1240 case Py_GT: c = c > 0; break; 1241 case Py_GE: c = c >= 0; break; 1242 default: 1243 result = Py_NotImplemented; 1244 goto out; 1245 } 1246 result = c ? Py_True : Py_False; 1190 1247 out: 1191 1192 1248 Py_INCREF(result); 1249 return result; 1193 1250 } 1194 1251 … … 1196 1253 _PyString_Eq(PyObject *o1, PyObject *o2) 1197 1254 { 1198 PyStringObject *a = (PyStringObject*) o1; 1199 PyStringObject *b = (PyStringObject*) o2; 1200 return Py_SIZE(a) == Py_SIZE(b) 1201 && *a->ob_sval == *b->ob_sval 1202 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0; 1255 PyStringObject *a = (PyStringObject*) o1; 1256 PyStringObject *b = (PyStringObject*) o2; 1257 return Py_SIZE(a) == Py_SIZE(b) 1258 && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0; 1203 1259 } 1204 1260 … … 1206 1262 string_hash(PyStringObject *a) 1207 1263 { 1208 register Py_ssize_t len; 1209 register unsigned char *p; 1210 register long x; 1211 1212 if (a->ob_shash != -1) 1213 return a->ob_shash; 1214 len = Py_SIZE(a); 1215 p = (unsigned char *) a->ob_sval; 1216 x = *p << 7; 1217 while (--len >= 0) 1218 x = (1000003*x) ^ *p++; 1219 x ^= Py_SIZE(a); 1220 if (x == -1) 1221 x = -2; 1222 a->ob_shash = x; 1223 return x; 1264 register Py_ssize_t len; 1265 register unsigned char *p; 1266 register long x; 1267 1268 #ifdef Py_DEBUG 1269 assert(_Py_HashSecret_Initialized); 1270 #endif 1271 if (a->ob_shash != -1) 1272 return a->ob_shash; 1273 len = Py_SIZE(a); 1274 /* 1275 We make the hash of the empty string be 0, rather than using 1276 (prefix ^ suffix), since this slightly obfuscates the hash secret 1277 */ 1278 if (len == 0) { 1279 a->ob_shash = 0; 1280 return 0; 1281 } 1282 p = (unsigned char *) a->ob_sval; 1283 x = _Py_HashSecret.prefix; 1284 x ^= *p << 7; 1285 while (--len >= 0) 1286 x = (1000003*x) ^ *p++; 1287 x ^= Py_SIZE(a); 1288 x ^= _Py_HashSecret.suffix; 1289 if (x == -1) 1290 x = -2; 1291 a->ob_shash = x; 1292 return x; 1224 1293 } 1225 1294 … … 1227 1296 string_subscript(PyStringObject* self, PyObject* item) 1228 1297 { 1229 1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1298 if (PyIndex_Check(item)) { 1299 Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError); 1300 if (i == -1 && PyErr_Occurred()) 1301 return NULL; 1302 if (i < 0) 1303 i += PyString_GET_SIZE(self); 1304 return string_item(self, i); 1305 } 1306 else if (PySlice_Check(item)) { 1307 Py_ssize_t start, stop, step, slicelength, cur, i; 1308 char* source_buf; 1309 char* result_buf; 1310 PyObject* result; 1311 1312 if (PySlice_GetIndicesEx((PySliceObject*)item, 1313 PyString_GET_SIZE(self), 1314 &start, &stop, &step, &slicelength) < 0) { 1315 return NULL; 1316 } 1317 1318 if (slicelength <= 0) { 1319 return PyString_FromStringAndSize("", 0); 1320 } 1321 else if (start == 0 && step == 1 && 1322 slicelength == PyString_GET_SIZE(self) && 1323 PyString_CheckExact(self)) { 1324 Py_INCREF(self); 1325 return (PyObject *)self; 1326 } 1327 else if (step == 1) { 1328 return PyString_FromStringAndSize( 1329 PyString_AS_STRING(self) + start, 1330 slicelength); 1331 } 1332 else { 1333 source_buf = PyString_AsString((PyObject*)self); 1334 result_buf = (char *)PyMem_Malloc(slicelength); 1335 if (result_buf == NULL) 1336 return PyErr_NoMemory(); 1337 1338 for (cur = start, i = 0; i < slicelength; 1339 cur += step, i++) { 1340 result_buf[i] = source_buf[cur]; 1341 } 1342 1343 result = PyString_FromStringAndSize(result_buf, 1344 slicelength); 1345 PyMem_Free(result_buf); 1346 return result; 1347 } 1348 } 1349 else { 1350 PyErr_Format(PyExc_TypeError, 1351 "string indices must be integers, not %.200s", 1352 Py_TYPE(item)->tp_name); 1353 return NULL; 1354 } 1286 1355 } 1287 1356 … … 1289 1358 string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr) 1290 1359 { 1291 1292 1293 1294 1295 1296 1297 1360 if ( index != 0 ) { 1361 PyErr_SetString(PyExc_SystemError, 1362 "accessing non-existent string segment"); 1363 return -1; 1364 } 1365 *ptr = (void *)self->ob_sval; 1366 return Py_SIZE(self); 1298 1367 } 1299 1368 … … 1301 1370 string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr) 1302 1371 { 1303 1304 1305 1372 PyErr_SetString(PyExc_TypeError, 1373 "Cannot use string as modifiable buffer"); 1374 return -1; 1306 1375 } 1307 1376 … … 1309 1378 string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp) 1310 1379 { 1311 1312 1313 1380 if ( lenp ) 1381 *lenp = Py_SIZE(self); 1382 return 1; 1314 1383 } 1315 1384 … … 1317 1386 string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr) 1318 1387 { 1319 1320 1321 1322 1323 1324 1325 1388 if ( index != 0 ) { 1389 PyErr_SetString(PyExc_SystemError, 1390 "accessing non-existent string segment"); 1391 return -1; 1392 } 1393 *ptr = self->ob_sval; 1394 return Py_SIZE(self); 1326 1395 } 1327 1396 … … 1329 1398 string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags) 1330 1399 { 1331 1332 1333 1400 return PyBuffer_FillInfo(view, (PyObject*)self, 1401 (void *)self->ob_sval, Py_SIZE(self), 1402 1, flags); 1334 1403 } 1335 1404 1336 1405 static PySequenceMethods string_as_sequence = { 1337 1338 1339 1340 1341 1342 0,/*sq_ass_item*/1343 0,/*sq_ass_slice*/1344 1406 (lenfunc)string_length, /*sq_length*/ 1407 (binaryfunc)string_concat, /*sq_concat*/ 1408 (ssizeargfunc)string_repeat, /*sq_repeat*/ 1409 (ssizeargfunc)string_item, /*sq_item*/ 1410 (ssizessizeargfunc)string_slice, /*sq_slice*/ 1411 0, /*sq_ass_item*/ 1412 0, /*sq_ass_slice*/ 1413 (objobjproc)string_contains /*sq_contains*/ 1345 1414 }; 1346 1415 1347 1416 static PyMappingMethods string_as_mapping = { 1348 1349 1350 1417 (lenfunc)string_length, 1418 (binaryfunc)string_subscript, 1419 0, 1351 1420 }; 1352 1421 1353 1422 static PyBufferProcs string_as_buffer = { 1354 1355 1356 1357 1358 1359 1423 (readbufferproc)string_buffer_getreadbuf, 1424 (writebufferproc)string_buffer_getwritebuf, 1425 (segcountproc)string_buffer_getsegcount, 1426 (charbufferproc)string_buffer_getcharbuf, 1427 (getbufferproc)string_buffer_getbuffer, 1428 0, /* XXX */ 1360 1429 }; 1361 1430 … … 1370 1439 1371 1440 #define STRIPNAME(i) (stripformat[i]+3) 1372 1373 1374 /* Don't call if length < 2 */1375 #define Py_STRING_MATCH(target, offset, pattern, length) \1376 (target[offset] == pattern[0] && \1377 target[offset+length-1] == pattern[length-1] && \1378 !memcmp(target+offset+1, pattern+1, length-2) )1379 1380 1381 /* Overallocate the initial list to reduce the number of reallocs for small1382 split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three1383 resizes, to sizes 4, 8, then 16. Most observed string splits are for human1384 text (roughly 11 words per line) and field delimited data (usually 1-101385 fields). For large strings the split algorithms are bandwidth limited1386 so increasing the preallocation likely will not improve things.*/1387 1388 #define MAX_PREALLOC 121389 1390 /* 5 splits gives 6 elements */1391 #define PREALLOC_SIZE(maxsplit) \1392 (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)1393 1394 #define SPLIT_APPEND(data, left, right) \1395 str = PyString_FromStringAndSize((data) + (left), \1396 (right) - (left)); \1397 if (str == NULL) \1398 goto onError; \1399 if (PyList_Append(list, str)) { \1400 Py_DECREF(str); \1401 goto onError; \1402 } \1403 else \1404 Py_DECREF(str);1405 1406 #define SPLIT_ADD(data, left, right) { \1407 str = PyString_FromStringAndSize((data) + (left), \1408 (right) - (left)); \1409 if (str == NULL) \1410 goto onError; \1411 if (count < MAX_PREALLOC) { \1412 PyList_SET_ITEM(list, count, str); \1413 } else { \1414 if (PyList_Append(list, str)) { \1415 Py_DECREF(str); \1416 goto onError; \1417 } \1418 else \1419 Py_DECREF(str); \1420 } \1421 count++; }1422 1423 /* Always force the list to the expected size. */1424 #define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count1425 1426 #define SKIP_SPACE(s, i, len) { while (i<len && isspace(Py_CHARMASK(s[i]))) i++; }1427 #define SKIP_NONSPACE(s, i, len) { while (i<len && !isspace(Py_CHARMASK(s[i]))) i++; }1428 #define RSKIP_SPACE(s, i) { while (i>=0 && isspace(Py_CHARMASK(s[i]))) i--; }1429 #define RSKIP_NONSPACE(s, i) { while (i>=0 && !isspace(Py_CHARMASK(s[i]))) i--; }1430 1431 Py_LOCAL_INLINE(PyObject *)1432 split_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit)1433 {1434 const char *s = PyString_AS_STRING(self);1435 Py_ssize_t i, j, count=0;1436 PyObject *str;1437 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit));1438 1439 if (list == NULL)1440 return NULL;1441 1442 i = j = 0;1443 1444 while (maxsplit-- > 0) {1445 SKIP_SPACE(s, i, len);1446 if (i==len) break;1447 j = i; i++;1448 SKIP_NONSPACE(s, i, len);1449 if (j == 0 && i == len && PyString_CheckExact(self)) {1450 /* No whitespace in self, so just use it as list[0] */1451 Py_INCREF(self);1452 PyList_SET_ITEM(list, 0, (PyObject *)self);1453 count++;1454 break;1455 }1456 SPLIT_ADD(s, j, i);1457 }1458 1459 if (i < len) {1460 /* Only occurs when maxsplit was reached */1461 /* Skip any remaining whitespace and copy to end of string */1462 SKIP_SPACE(s, i, len);1463 if (i != len)1464 SPLIT_ADD(s, i, len);1465 }1466 FIX_PREALLOC_SIZE(list);1467 return list;1468 onError:1469 Py_DECREF(list);1470 return NULL;1471 }1472 1473 Py_LOCAL_INLINE(PyObject *)1474 split_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount)1475 {1476 const char *s = PyString_AS_STRING(self);1477 register Py_ssize_t i, j, count=0;1478 PyObject *str;1479 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));1480 1481 if (list == NULL)1482 return NULL;1483 1484 i = j = 0;1485 while ((j < len) && (maxcount-- > 0)) {1486 for(; j<len; j++) {1487 /* I found that using memchr makes no difference */1488 if (s[j] == ch) {1489 SPLIT_ADD(s, i, j);1490 i = j = j + 1;1491 break;1492 }1493 }1494 }1495 if (i == 0 && count == 0 && PyString_CheckExact(self)) {1496 /* ch not in self, so just use self as list[0] */1497 Py_INCREF(self);1498 PyList_SET_ITEM(list, 0, (PyObject *)self);1499 count++;1500 }1501 else if (i <= len) {1502 SPLIT_ADD(s, i, len);1503 }1504 FIX_PREALLOC_SIZE(list);1505 return list;1506 1507 onError:1508 Py_DECREF(list);1509 return NULL;1510 }1511 1441 1512 1442 PyDoc_STRVAR(split__doc__, … … 1522 1452 string_split(PyStringObject *self, PyObject *args) 1523 1453 { 1524 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j; 1525 Py_ssize_t maxsplit = -1, count=0; 1526 const char *s = PyString_AS_STRING(self), *sub; 1527 PyObject *list, *str, *subobj = Py_None; 1528 #ifdef USE_FAST 1529 Py_ssize_t pos; 1454 Py_ssize_t len = PyString_GET_SIZE(self), n; 1455 Py_ssize_t maxsplit = -1; 1456 const char *s = PyString_AS_STRING(self), *sub; 1457 PyObject *subobj = Py_None; 1458 1459 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit)) 1460 return NULL; 1461 if (maxsplit < 0) 1462 maxsplit = PY_SSIZE_T_MAX; 1463 if (subobj == Py_None) 1464 return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit); 1465 if (PyString_Check(subobj)) { 1466 sub = PyString_AS_STRING(subobj); 1467 n = PyString_GET_SIZE(subobj); 1468 } 1469 #ifdef Py_USING_UNICODE 1470 else if (PyUnicode_Check(subobj)) 1471 return PyUnicode_Split((PyObject *)self, subobj, maxsplit); 1530 1472 #endif 1531 1532 if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit)) 1533 return NULL; 1534 if (maxsplit < 0) 1535 maxsplit = PY_SSIZE_T_MAX; 1536 if (subobj == Py_None) 1537 return split_whitespace(self, len, maxsplit); 1538 if (PyString_Check(subobj)) { 1539 sub = PyString_AS_STRING(subobj); 1540 n = PyString_GET_SIZE(subobj); 1541 } 1542 #ifdef Py_USING_UNICODE 1543 else if (PyUnicode_Check(subobj)) 1544 return PyUnicode_Split((PyObject *)self, subobj, maxsplit); 1545 #endif 1546 else if (PyObject_AsCharBuffer(subobj, &sub, &n)) 1547 return NULL; 1548 1549 if (n == 0) { 1550 PyErr_SetString(PyExc_ValueError, "empty separator"); 1551 return NULL; 1552 } 1553 else if (n == 1) 1554 return split_char(self, len, sub[0], maxsplit); 1555 1556 list = PyList_New(PREALLOC_SIZE(maxsplit)); 1557 if (list == NULL) 1558 return NULL; 1559 1560 #ifdef USE_FAST 1561 i = j = 0; 1562 while (maxsplit-- > 0) { 1563 pos = fastsearch(s+i, len-i, sub, n, FAST_SEARCH); 1564 if (pos < 0) 1565 break; 1566 j = i+pos; 1567 SPLIT_ADD(s, i, j); 1568 i = j + n; 1569 } 1570 #else 1571 i = j = 0; 1572 while ((j+n <= len) && (maxsplit-- > 0)) { 1573 for (; j+n <= len; j++) { 1574 if (Py_STRING_MATCH(s, j, sub, n)) { 1575 SPLIT_ADD(s, i, j); 1576 i = j = j + n; 1577 break; 1578 } 1579 } 1580 } 1581 #endif 1582 SPLIT_ADD(s, i, len); 1583 FIX_PREALLOC_SIZE(list); 1584 return list; 1585 1586 onError: 1587 Py_DECREF(list); 1588 return NULL; 1473 else if (PyObject_AsCharBuffer(subobj, &sub, &n)) 1474 return NULL; 1475 1476 return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit); 1589 1477 } 1590 1478 … … 1599 1487 string_partition(PyStringObject *self, PyObject *sep_obj) 1600 1488 { 1601 1602 1603 1604 1605 1606 1607 1489 const char *sep; 1490 Py_ssize_t sep_len; 1491 1492 if (PyString_Check(sep_obj)) { 1493 sep = PyString_AS_STRING(sep_obj); 1494 sep_len = PyString_GET_SIZE(sep_obj); 1495 } 1608 1496 #ifdef Py_USING_UNICODE 1609 1610 1497 else if (PyUnicode_Check(sep_obj)) 1498 return PyUnicode_Partition((PyObject *) self, sep_obj); 1611 1499 #endif 1612 1613 1614 1615 1616 1617 1618 1619 1500 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len)) 1501 return NULL; 1502 1503 return stringlib_partition( 1504 (PyObject*) self, 1505 PyString_AS_STRING(self), PyString_GET_SIZE(self), 1506 sep_obj, sep, sep_len 1507 ); 1620 1508 } 1621 1509 … … 1630 1518 string_rpartition(PyStringObject *self, PyObject *sep_obj) 1631 1519 { 1632 1633 1634 1635 1636 1637 1638 1520 const char *sep; 1521 Py_ssize_t sep_len; 1522 1523 if (PyString_Check(sep_obj)) { 1524 sep = PyString_AS_STRING(sep_obj); 1525 sep_len = PyString_GET_SIZE(sep_obj); 1526 } 1639 1527 #ifdef Py_USING_UNICODE 1640 1641 1528 else if (PyUnicode_Check(sep_obj)) 1529 return PyUnicode_RPartition((PyObject *) self, sep_obj); 1642 1530 #endif 1643 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len)) 1644 return NULL; 1645 1646 return stringlib_rpartition( 1647 (PyObject*) self, 1648 PyString_AS_STRING(self), PyString_GET_SIZE(self), 1649 sep_obj, sep, sep_len 1650 ); 1651 } 1652 1653 Py_LOCAL_INLINE(PyObject *) 1654 rsplit_whitespace(PyStringObject *self, Py_ssize_t len, Py_ssize_t maxsplit) 1655 { 1656 const char *s = PyString_AS_STRING(self); 1657 Py_ssize_t i, j, count=0; 1658 PyObject *str; 1659 PyObject *list = PyList_New(PREALLOC_SIZE(maxsplit)); 1660 1661 if (list == NULL) 1662 return NULL; 1663 1664 i = j = len-1; 1665 1666 while (maxsplit-- > 0) { 1667 RSKIP_SPACE(s, i); 1668 if (i<0) break; 1669 j = i; i--; 1670 RSKIP_NONSPACE(s, i); 1671 if (j == len-1 && i < 0 && PyString_CheckExact(self)) { 1672 /* No whitespace in self, so just use it as list[0] */ 1673 Py_INCREF(self); 1674 PyList_SET_ITEM(list, 0, (PyObject *)self); 1675 count++; 1676 break; 1677 } 1678 SPLIT_ADD(s, i + 1, j + 1); 1679 } 1680 if (i >= 0) { 1681 /* Only occurs when maxsplit was reached */ 1682 /* Skip any remaining whitespace and copy to beginning of string */ 1683 RSKIP_SPACE(s, i); 1684 if (i >= 0) 1685 SPLIT_ADD(s, 0, i + 1); 1686 1687 } 1688 FIX_PREALLOC_SIZE(list); 1689 if (PyList_Reverse(list) < 0) 1690 goto onError; 1691 return list; 1692 onError: 1693 Py_DECREF(list); 1694 return NULL; 1695 } 1696 1697 Py_LOCAL_INLINE(PyObject *) 1698 rsplit_char(PyStringObject *self, Py_ssize_t len, char ch, Py_ssize_t maxcount) 1699 { 1700 const char *s = PyString_AS_STRING(self); 1701 register Py_ssize_t i, j, count=0; 1702 PyObject *str; 1703 PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); 1704 1705 if (list == NULL) 1706 return NULL; 1707 1708 i = j = len - 1; 1709 while ((i >= 0) && (maxcount-- > 0)) { 1710 for (; i >= 0; i--) { 1711 if (s[i] == ch) { 1712 SPLIT_ADD(s, i + 1, j + 1); 1713 j = i = i - 1; 1714 break; 1715 } 1716 } 1717 } 1718 if (i < 0 && count == 0 && PyString_CheckExact(self)) { 1719 /* ch not in self, so just use self as list[0] */ 1720 Py_INCREF(self); 1721 PyList_SET_ITEM(list, 0, (PyObject *)self); 1722 count++; 1723 } 1724 else if (j >= -1) { 1725 SPLIT_ADD(s, 0, j + 1); 1726 } 1727 FIX_PREALLOC_SIZE(list); 1728 if (PyList_Reverse(list) < 0) 1729 goto onError; 1730 return list; 1731 1732 onError: 1733 Py_DECREF(list); 1734 return NULL; 1531 else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len)) 1532 return NULL; 1533 1534 return stringlib_rpartition( 1535 (PyObject*) self, 1536 PyString_AS_STRING(self), PyString_GET_SIZE(self), 1537 sep_obj, sep, sep_len 1538 ); 1735 1539 } 1736 1540 … … 1747 1551 string_rsplit(PyStringObject *self, PyObject *args) 1748 1552 { 1749 Py_ssize_t len = PyString_GET_SIZE(self), n, i, j;1750 Py_ssize_t maxsplit = -1, count=0;1751 const char *s, *sub;1752 PyObject *list, *str,*subobj = Py_None;1753 1754 1755 1756 1757 1758 1759 return rsplit_whitespace(self, len, maxsplit);1760 1761 1762 1763 1553 Py_ssize_t len = PyString_GET_SIZE(self), n; 1554 Py_ssize_t maxsplit = -1; 1555 const char *s = PyString_AS_STRING(self), *sub; 1556 PyObject *subobj = Py_None; 1557 1558 if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit)) 1559 return NULL; 1560 if (maxsplit < 0) 1561 maxsplit = PY_SSIZE_T_MAX; 1562 if (subobj == Py_None) 1563 return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit); 1564 if (PyString_Check(subobj)) { 1565 sub = PyString_AS_STRING(subobj); 1566 n = PyString_GET_SIZE(subobj); 1567 } 1764 1568 #ifdef Py_USING_UNICODE 1765 1766 1569 else if (PyUnicode_Check(subobj)) 1570 return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit); 1767 1571 #endif 1768 else if (PyObject_AsCharBuffer(subobj, &sub, &n)) 1769 return NULL; 1770 1771 if (n == 0) { 1772 PyErr_SetString(PyExc_ValueError, "empty separator"); 1773 return NULL; 1774 } 1775 else if (n == 1) 1776 return rsplit_char(self, len, sub[0], maxsplit); 1777 1778 list = PyList_New(PREALLOC_SIZE(maxsplit)); 1779 if (list == NULL) 1780 return NULL; 1781 1782 j = len; 1783 i = j - n; 1784 1785 s = PyString_AS_STRING(self); 1786 while ( (i >= 0) && (maxsplit-- > 0) ) { 1787 for (; i>=0; i--) { 1788 if (Py_STRING_MATCH(s, i, sub, n)) { 1789 SPLIT_ADD(s, i + n, j); 1790 j = i; 1791 i -= n; 1792 break; 1793 } 1794 } 1795 } 1796 SPLIT_ADD(s, 0, j); 1797 FIX_PREALLOC_SIZE(list); 1798 if (PyList_Reverse(list) < 0) 1799 goto onError; 1800 return list; 1801 1802 onError: 1803 Py_DECREF(list); 1804 return NULL; 1572 else if (PyObject_AsCharBuffer(subobj, &sub, &n)) 1573 return NULL; 1574 1575 return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit); 1805 1576 } 1806 1577 … … 1815 1586 string_join(PyStringObject *self, PyObject *orig) 1816 1587 { 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1588 char *sep = PyString_AS_STRING(self); 1589 const Py_ssize_t seplen = PyString_GET_SIZE(self); 1590 PyObject *res = NULL; 1591 char *p; 1592 Py_ssize_t seqlen = 0; 1593 size_t sz = 0; 1594 Py_ssize_t i; 1595 PyObject *seq, *item; 1596 1597 seq = PySequence_Fast(orig, ""); 1598 if (seq == NULL) { 1599 return NULL; 1600 } 1601 1602 seqlen = PySequence_Size(seq); 1603 if (seqlen == 0) { 1604 Py_DECREF(seq); 1605 return PyString_FromString(""); 1606 } 1607 if (seqlen == 1) { 1608 item = PySequence_Fast_GET_ITEM(seq, 0); 1609 if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) { 1610 Py_INCREF(item); 1611 Py_DECREF(seq); 1612 return item; 1613 } 1614 } 1615 1616 /* There are at least two things to join, or else we have a subclass 1617 * of the builtin types in the sequence. 1618 * Do a pre-pass to figure out the total amount of space we'll 1619 * need (sz), see whether any argument is absurd, and defer to 1620 * the Unicode join if appropriate. 1621 */ 1622 for (i = 0; i < seqlen; i++) { 1623 const size_t old_sz = sz; 1624 item = PySequence_Fast_GET_ITEM(seq, i); 1625 if (!PyString_Check(item)){ 1855 1626 #ifdef Py_USING_UNICODE 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1627 if (PyUnicode_Check(item)) { 1628 /* Defer to Unicode join. 1629 * CAUTION: There's no gurantee that the 1630 * original sequence can be iterated over 1631 * again, so we must pass seq here. 1632 */ 1633 PyObject *result; 1634 result = PyUnicode_Join((PyObject *)self, seq); 1635 Py_DECREF(seq); 1636 return result; 1637 } 1867 1638 #endif 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1639 PyErr_Format(PyExc_TypeError, 1640 "sequence item %zd: expected string," 1641 " %.80s found", 1642 i, Py_TYPE(item)->tp_name); 1643 Py_DECREF(seq); 1644 return NULL; 1645 } 1646 sz += PyString_GET_SIZE(item); 1647 if (i != 0) 1648 sz += seplen; 1649 if (sz < old_sz || sz > PY_SSIZE_T_MAX) { 1650 PyErr_SetString(PyExc_OverflowError, 1651 "join() result is too long for a Python string"); 1652 Py_DECREF(seq); 1653 return NULL; 1654 } 1655 } 1656 1657 /* Allocate result space. */ 1658 res = PyString_FromStringAndSize((char*)NULL, sz); 1659 if (res == NULL) { 1660 Py_DECREF(seq); 1661 return NULL; 1662 } 1663 1664 /* Catenate everything. */ 1665 p = PyString_AS_STRING(res); 1666 for (i = 0; i < seqlen; ++i) { 1667 size_t n; 1668 item = PySequence_Fast_GET_ITEM(seq, i); 1669 n = PyString_GET_SIZE(item); 1670 Py_MEMCPY(p, PyString_AS_STRING(item), n); 1671 p += n; 1672 if (i < seqlen - 1) { 1673 Py_MEMCPY(p, sep, seplen); 1674 p += seplen; 1675 } 1676 } 1677 1678 Py_DECREF(seq); 1679 return res; 1909 1680 } 1910 1681 … … 1912 1683 _PyString_Join(PyObject *sep, PyObject *x) 1913 1684 { 1914 1915 1916 1917 } 1918 1919 Py_LOCAL_INLINE(void) 1920 string_adjust_indices(Py_ssize_t *start, Py_ssize_t *end, Py_ssize_t len) 1921 { 1922 if (*end > len) 1923 *end = len; 1924 else if (*end < 0) 1925 *end += len; 1926 if (*end < 0) 1927 *end = 0; 1928 if (*start < 0) 1929 *start += len; 1930 if (*start < 0) 1931 *start = 0; 1932 }1685 assert(sep != NULL && PyString_Check(sep)); 1686 assert(x != NULL); 1687 return string_join((PyStringObject *)sep, x); 1688 } 1689 1690 /* helper macro to fixup start/end slice values */ 1691 #define ADJUST_INDICES(start, end, len) \ 1692 if (end > len) \ 1693 end = len; \ 1694 else if (end < 0) { \ 1695 end += len; \ 1696 if (end < 0) \ 1697 end = 0; \ 1698 } \ 1699 if (start < 0) { \ 1700 start += len; \ 1701 if (start < 0) \ 1702 start = 0; \ 1703 } 1933 1704 1934 1705 Py_LOCAL_INLINE(Py_ssize_t) 1935 1706 string_find_internal(PyStringObject *self, PyObject *args, int dir) 1936 1707 { 1937 PyObject *subobj; 1938 const char *sub; 1939 Py_ssize_t sub_len; 1940 Py_ssize_t start=0, end=PY_SSIZE_T_MAX; 1941 PyObject *obj_start=Py_None, *obj_end=Py_None; 1942 1943 if (!PyArg_ParseTuple(args, "O|OO:find/rfind/index/rindex", &subobj, 1944 &obj_start, &obj_end)) 1945 return -2; 1946 /* To support None in "start" and "end" arguments, meaning 1947 the same as if they were not passed. 1948 */ 1949 if (obj_start != Py_None) 1950 if (!_PyEval_SliceIndex(obj_start, &start)) 1951 return -2; 1952 if (obj_end != Py_None) 1953 if (!_PyEval_SliceIndex(obj_end, &end)) 1954 return -2; 1955 1956 if (PyString_Check(subobj)) { 1957 sub = PyString_AS_STRING(subobj); 1958 sub_len = PyString_GET_SIZE(subobj); 1959 } 1708 PyObject *subobj; 1709 const char *sub; 1710 Py_ssize_t sub_len; 1711 Py_ssize_t start=0, end=PY_SSIZE_T_MAX; 1712 1713 if (!stringlib_parse_args_finds("find/rfind/index/rindex", 1714 args, &subobj, &start, &end)) 1715 return -2; 1716 1717 if (PyString_Check(subobj)) { 1718 sub = PyString_AS_STRING(subobj); 1719 sub_len = PyString_GET_SIZE(subobj); 1720 } 1960 1721 #ifdef Py_USING_UNICODE 1961 1962 1963 1722 else if (PyUnicode_Check(subobj)) 1723 return PyUnicode_Find( 1724 (PyObject *)self, subobj, start, end, dir); 1964 1725 #endif 1965 1966 1967 1968 1969 1970 1971 1972 1973 1974 1975 1976 1977 1726 else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len)) 1727 /* XXX - the "expected a character buffer object" is pretty 1728 confusing for a non-expert. remap to something else ? */ 1729 return -2; 1730 1731 if (dir > 0) 1732 return stringlib_find_slice( 1733 PyString_AS_STRING(self), PyString_GET_SIZE(self), 1734 sub, sub_len, start, end); 1735 else 1736 return stringlib_rfind_slice( 1737 PyString_AS_STRING(self), PyString_GET_SIZE(self), 1738 sub, sub_len, start, end); 1978 1739 } 1979 1740 … … 1983 1744 \n\ 1984 1745 Return the lowest index in S where substring sub is found,\n\ 1985 such that sub is contained within s[start:end]. Optional\n\1746 such that sub is contained within S[start:end]. Optional\n\ 1986 1747 arguments start and end are interpreted as in slice notation.\n\ 1987 1748 \n\ … … 1991 1752 string_find(PyStringObject *self, PyObject *args) 1992 1753 { 1993 1994 1995 1996 1754 Py_ssize_t result = string_find_internal(self, args, +1); 1755 if (result == -2) 1756 return NULL; 1757 return PyInt_FromSsize_t(result); 1997 1758 } 1998 1759 … … 2006 1767 string_index(PyStringObject *self, PyObject *args) 2007 1768 { 2008 2009 2010 2011 2012 2013 2014 2015 2016 1769 Py_ssize_t result = string_find_internal(self, args, +1); 1770 if (result == -2) 1771 return NULL; 1772 if (result == -1) { 1773 PyErr_SetString(PyExc_ValueError, 1774 "substring not found"); 1775 return NULL; 1776 } 1777 return PyInt_FromSsize_t(result); 2017 1778 } 2018 1779 … … 2022 1783 \n\ 2023 1784 Return the highest index in S where substring sub is found,\n\ 2024 such that sub is contained within s[start:end]. Optional\n\1785 such that sub is contained within S[start:end]. Optional\n\ 2025 1786 arguments start and end are interpreted as in slice notation.\n\ 2026 1787 \n\ … … 2030 1791 string_rfind(PyStringObject *self, PyObject *args) 2031 1792 { 2032 2033 2034 2035 1793 Py_ssize_t result = string_find_internal(self, args, -1); 1794 if (result == -2) 1795 return NULL; 1796 return PyInt_FromSsize_t(result); 2036 1797 } 2037 1798 … … 2045 1806 string_rindex(PyStringObject *self, PyObject *args) 2046 1807 { 2047 2048 2049 2050 2051 2052 2053 2054 2055 1808 Py_ssize_t result = string_find_internal(self, args, -1); 1809 if (result == -2) 1810 return NULL; 1811 if (result == -1) { 1812 PyErr_SetString(PyExc_ValueError, 1813 "substring not found"); 1814 return NULL; 1815 } 1816 return PyInt_FromSsize_t(result); 2056 1817 } 2057 1818 … … 2060 1821 do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj) 2061 1822 { 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075 2076 2077 2078 2079 2080 2081 2082 2083 2084 2085 2086 2087 2088 1823 char *s = PyString_AS_STRING(self); 1824 Py_ssize_t len = PyString_GET_SIZE(self); 1825 char *sep = PyString_AS_STRING(sepobj); 1826 Py_ssize_t seplen = PyString_GET_SIZE(sepobj); 1827 Py_ssize_t i, j; 1828 1829 i = 0; 1830 if (striptype != RIGHTSTRIP) { 1831 while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) { 1832 i++; 1833 } 1834 } 1835 1836 j = len; 1837 if (striptype != LEFTSTRIP) { 1838 do { 1839 j--; 1840 } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen)); 1841 j++; 1842 } 1843 1844 if (i == 0 && j == len && PyString_CheckExact(self)) { 1845 Py_INCREF(self); 1846 return (PyObject*)self; 1847 } 1848 else 1849 return PyString_FromStringAndSize(s+i, j-i); 2089 1850 } 2090 1851 … … 2093 1854 do_strip(PyStringObject *self, int striptype) 2094 1855 { 2095 2096 2097 2098 2099 2100 2101 2102 2103 2104 2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 1856 char *s = PyString_AS_STRING(self); 1857 Py_ssize_t len = PyString_GET_SIZE(self), i, j; 1858 1859 i = 0; 1860 if (striptype != RIGHTSTRIP) { 1861 while (i < len && isspace(Py_CHARMASK(s[i]))) { 1862 i++; 1863 } 1864 } 1865 1866 j = len; 1867 if (striptype != LEFTSTRIP) { 1868 do { 1869 j--; 1870 } while (j >= i && isspace(Py_CHARMASK(s[j]))); 1871 j++; 1872 } 1873 1874 if (i == 0 && j == len && PyString_CheckExact(self)) { 1875 Py_INCREF(self); 1876 return (PyObject*)self; 1877 } 1878 else 1879 return PyString_FromStringAndSize(s+i, j-i); 2119 1880 } 2120 1881 … … 2123 1884 do_argstrip(PyStringObject *self, int striptype, PyObject *args) 2124 1885 { 2125 2126 2127 2128 2129 2130 2131 2132 1886 PyObject *sep = NULL; 1887 1888 if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep)) 1889 return NULL; 1890 1891 if (sep != NULL && sep != Py_None) { 1892 if (PyString_Check(sep)) 1893 return do_xstrip(self, striptype, sep); 2133 1894 #ifdef Py_USING_UNICODE 2134 2135 2136 2137 2138 2139 2140 2141 2142 2143 1895 else if (PyUnicode_Check(sep)) { 1896 PyObject *uniself = PyUnicode_FromObject((PyObject *)self); 1897 PyObject *res; 1898 if (uniself==NULL) 1899 return NULL; 1900 res = _PyUnicode_XStrip((PyUnicodeObject *)uniself, 1901 striptype, sep); 1902 Py_DECREF(uniself); 1903 return res; 1904 } 2144 1905 #endif 2145 1906 PyErr_Format(PyExc_TypeError, 2146 1907 #ifdef Py_USING_UNICODE 2147 1908 "%s arg must be None, str or unicode", 2148 1909 #else 2149 1910 "%s arg must be None or str", 2150 1911 #endif 2151 2152 2153 2154 2155 1912 STRIPNAME(striptype)); 1913 return NULL; 1914 } 1915 1916 return do_strip(self, striptype); 2156 1917 } 2157 1918 … … 2168 1929 string_strip(PyStringObject *self, PyObject *args) 2169 1930 { 2170 2171 2172 2173 1931 if (PyTuple_GET_SIZE(args) == 0) 1932 return do_strip(self, BOTHSTRIP); /* Common case */ 1933 else 1934 return do_argstrip(self, BOTHSTRIP, args); 2174 1935 } 2175 1936 … … 2185 1946 string_lstrip(PyStringObject *self, PyObject *args) 2186 1947 { 2187 2188 2189 2190 1948 if (PyTuple_GET_SIZE(args) == 0) 1949 return do_strip(self, LEFTSTRIP); /* Common case */ 1950 else 1951 return do_argstrip(self, LEFTSTRIP, args); 2191 1952 } 2192 1953 … … 2202 1963 string_rstrip(PyStringObject *self, PyObject *args) 2203 1964 { 2204 2205 2206 2207 1965 if (PyTuple_GET_SIZE(args) == 0) 1966 return do_strip(self, RIGHTSTRIP); /* Common case */ 1967 else 1968 return do_argstrip(self, RIGHTSTRIP, args); 2208 1969 } 2209 1970 … … 2222 1983 string_lower(PyStringObject *self) 2223 1984 { 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 1985 char *s; 1986 Py_ssize_t i, n = PyString_GET_SIZE(self); 1987 PyObject *newobj; 1988 1989 newobj = PyString_FromStringAndSize(NULL, n); 1990 if (!newobj) 1991 return NULL; 1992 1993 s = PyString_AS_STRING(newobj); 1994 1995 Py_MEMCPY(s, PyString_AS_STRING(self), n); 1996 1997 for (i = 0; i < n; i++) { 1998 int c = Py_CHARMASK(s[i]); 1999 if (isupper(c)) 2000 s[i] = _tolower(c); 2001 } 2002 2003 return newobj; 2243 2004 } 2244 2005 … … 2255 2016 string_upper(PyStringObject *self) 2256 2017 { 2257 2258 2259 2260 2261 2262 2263 2264 2265 2266 2267 2268 2269 2270 2271 2272 2273 2274 2275 2018 char *s; 2019 Py_ssize_t i, n = PyString_GET_SIZE(self); 2020 PyObject *newobj; 2021 2022 newobj = PyString_FromStringAndSize(NULL, n); 2023 if (!newobj) 2024 return NULL; 2025 2026 s = PyString_AS_STRING(newobj); 2027 2028 Py_MEMCPY(s, PyString_AS_STRING(self), n); 2029 2030 for (i = 0; i < n; i++) { 2031 int c = Py_CHARMASK(s[i]); 2032 if (islower(c)) 2033 s[i] = _toupper(c); 2034 } 2035 2036 return newobj; 2276 2037 } 2277 2038 … … 2285 2046 string_title(PyStringObject *self) 2286 2047 { 2287 2288 2289 2290 2291 2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307 2308 2309 2310 2048 char *s = PyString_AS_STRING(self), *s_new; 2049 Py_ssize_t i, n = PyString_GET_SIZE(self); 2050 int previous_is_cased = 0; 2051 PyObject *newobj; 2052 2053 newobj = PyString_FromStringAndSize(NULL, n); 2054 if (newobj == NULL) 2055 return NULL; 2056 s_new = PyString_AsString(newobj); 2057 for (i = 0; i < n; i++) { 2058 int c = Py_CHARMASK(*s++); 2059 if (islower(c)) { 2060 if (!previous_is_cased) 2061 c = toupper(c); 2062 previous_is_cased = 1; 2063 } else if (isupper(c)) { 2064 if (previous_is_cased) 2065 c = tolower(c); 2066 previous_is_cased = 1; 2067 } else 2068 previous_is_cased = 0; 2069 *s_new++ = c; 2070 } 2071 return newobj; 2311 2072 } 2312 2073 … … 2320 2081 string_capitalize(PyStringObject *self) 2321 2082 { 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2083 char *s = PyString_AS_STRING(self), *s_new; 2084 Py_ssize_t i, n = PyString_GET_SIZE(self); 2085 PyObject *newobj; 2086 2087 newobj = PyString_FromStringAndSize(NULL, n); 2088 if (newobj == NULL) 2089 return NULL; 2090 s_new = PyString_AsString(newobj); 2091 if (0 < n) { 2092 int c = Py_CHARMASK(*s++); 2093 if (islower(c)) 2094 *s_new = toupper(c); 2095 else 2096 *s_new = c; 2097 s_new++; 2098 } 2099 for (i = 1; i < n; i++) { 2100 int c = Py_CHARMASK(*s++); 2101 if (isupper(c)) 2102 *s_new = tolower(c); 2103 else 2104 *s_new = c; 2105 s_new++; 2106 } 2107 return newobj; 2347 2108 } 2348 2109 … … 2358 2119 string_count(PyStringObject *self, PyObject *args) 2359 2120 { 2360 PyObject *sub_obj; 2361 const char *str = PyString_AS_STRING(self), *sub; 2362 Py_ssize_t sub_len; 2363 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; 2364 2365 if (!PyArg_ParseTuple(args, "O|O&O&:count", &sub_obj, 2366 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) 2367 return NULL; 2368 2369 if (PyString_Check(sub_obj)) { 2370 sub = PyString_AS_STRING(sub_obj); 2371 sub_len = PyString_GET_SIZE(sub_obj); 2372 } 2121 PyObject *sub_obj; 2122 const char *str = PyString_AS_STRING(self), *sub; 2123 Py_ssize_t sub_len; 2124 Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; 2125 2126 if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end)) 2127 return NULL; 2128 2129 if (PyString_Check(sub_obj)) { 2130 sub = PyString_AS_STRING(sub_obj); 2131 sub_len = PyString_GET_SIZE(sub_obj); 2132 } 2373 2133 #ifdef Py_USING_UNICODE 2374 2375 2376 2377 2378 2379 2380 2381 2134 else if (PyUnicode_Check(sub_obj)) { 2135 Py_ssize_t count; 2136 count = PyUnicode_Count((PyObject *)self, sub_obj, start, end); 2137 if (count == -1) 2138 return NULL; 2139 else 2140 return PyInt_FromSsize_t(count); 2141 } 2382 2142 #endif 2383 2384 2385 2386 string_adjust_indices(&start, &end, PyString_GET_SIZE(self));2387 2388 2389 stringlib_count(str + start, end - start, sub, sub_len)2390 2143 else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len)) 2144 return NULL; 2145 2146 ADJUST_INDICES(start, end, PyString_GET_SIZE(self)); 2147 2148 return PyInt_FromSsize_t( 2149 stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX) 2150 ); 2391 2151 } 2392 2152 … … 2400 2160 string_swapcase(PyStringObject *self) 2401 2161 { 2402 2403 2404 2405 2406 2407 2408 2409 2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2162 char *s = PyString_AS_STRING(self), *s_new; 2163 Py_ssize_t i, n = PyString_GET_SIZE(self); 2164 PyObject *newobj; 2165 2166 newobj = PyString_FromStringAndSize(NULL, n); 2167 if (newobj == NULL) 2168 return NULL; 2169 s_new = PyString_AsString(newobj); 2170 for (i = 0; i < n; i++) { 2171 int c = Py_CHARMASK(*s++); 2172 if (islower(c)) { 2173 *s_new = toupper(c); 2174 } 2175 else if (isupper(c)) { 2176 *s_new = tolower(c); 2177 } 2178 else 2179 *s_new = c; 2180 s_new++; 2181 } 2182 return newobj; 2423 2183 } 2424 2184 … … 2430 2190 in the optional argument deletechars are removed, and the\n\ 2431 2191 remaining characters have been mapped through the given\n\ 2432 translation table, which must be a string of length 256."); 2192 translation table, which must be a string of length 256 or None.\n\ 2193 If the table argument is None, no translation is applied and\n\ 2194 the operation simply removes the characters in deletechars."); 2433 2195 2434 2196 static PyObject * 2435 2197 string_translate(PyStringObject *self, PyObject *args) 2436 2198 { 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2199 register char *input, *output; 2200 const char *table; 2201 register Py_ssize_t i, c, changed = 0; 2202 PyObject *input_obj = (PyObject*)self; 2203 const char *output_start, *del_table=NULL; 2204 Py_ssize_t inlen, tablen, dellen = 0; 2205 PyObject *result; 2206 int trans_table[256]; 2207 PyObject *tableobj, *delobj = NULL; 2208 2209 if (!PyArg_UnpackTuple(args, "translate", 1, 2, 2210 &tableobj, &delobj)) 2211 return NULL; 2212 2213 if (PyString_Check(tableobj)) { 2214 table = PyString_AS_STRING(tableobj); 2215 tablen = PyString_GET_SIZE(tableobj); 2216 } 2217 else if (tableobj == Py_None) { 2218 table = NULL; 2219 tablen = 256; 2220 } 2459 2221 #ifdef Py_USING_UNICODE 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2222 else if (PyUnicode_Check(tableobj)) { 2223 /* Unicode .translate() does not support the deletechars 2224 parameter; instead a mapping to None will cause characters 2225 to be deleted. */ 2226 if (delobj != NULL) { 2227 PyErr_SetString(PyExc_TypeError, 2228 "deletions are implemented differently for unicode"); 2229 return NULL; 2230 } 2231 return PyUnicode_Translate((PyObject *)self, tableobj, NULL); 2232 } 2471 2233 #endif 2472 2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484 2485 2234 else if (PyObject_AsCharBuffer(tableobj, &table, &tablen)) 2235 return NULL; 2236 2237 if (tablen != 256) { 2238 PyErr_SetString(PyExc_ValueError, 2239 "translation table must be 256 characters long"); 2240 return NULL; 2241 } 2242 2243 if (delobj != NULL) { 2244 if (PyString_Check(delobj)) { 2245 del_table = PyString_AS_STRING(delobj); 2246 dellen = PyString_GET_SIZE(delobj); 2247 } 2486 2248 #ifdef Py_USING_UNICODE 2487 2488 2489 2490 2491 2249 else if (PyUnicode_Check(delobj)) { 2250 PyErr_SetString(PyExc_TypeError, 2251 "deletions are implemented differently for unicode"); 2252 return NULL; 2253 } 2492 2254 #endif 2493 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen)) 2494 return NULL; 2495 } 2496 else { 2497 del_table = NULL; 2498 dellen = 0; 2499 } 2500 2501 inlen = PyString_GET_SIZE(input_obj); 2502 result = PyString_FromStringAndSize((char *)NULL, inlen); 2503 if (result == NULL) 2504 return NULL; 2505 output_start = output = PyString_AsString(result); 2506 input = PyString_AS_STRING(input_obj); 2507 2508 if (dellen == 0 && table != NULL) { 2509 /* If no deletions are required, use faster code */ 2510 for (i = inlen; --i >= 0; ) { 2511 c = Py_CHARMASK(*input++); 2512 if (Py_CHARMASK((*output++ = table[c])) != c) 2513 changed = 1; 2514 } 2515 if (changed || !PyString_CheckExact(input_obj)) 2516 return result; 2517 Py_DECREF(result); 2518 Py_INCREF(input_obj); 2519 return input_obj; 2520 } 2521 2522 if (table == NULL) { 2523 for (i = 0; i < 256; i++) 2524 trans_table[i] = Py_CHARMASK(i); 2525 } else { 2526 for (i = 0; i < 256; i++) 2527 trans_table[i] = Py_CHARMASK(table[i]); 2528 } 2529 2530 for (i = 0; i < dellen; i++) 2531 trans_table[(int) Py_CHARMASK(del_table[i])] = -1; 2532 2533 for (i = inlen; --i >= 0; ) { 2534 c = Py_CHARMASK(*input++); 2535 if (trans_table[c] != -1) 2536 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c) 2537 continue; 2538 changed = 1; 2539 } 2540 if (!changed && PyString_CheckExact(input_obj)) { 2541 Py_DECREF(result); 2542 Py_INCREF(input_obj); 2543 return input_obj; 2544 } 2545 /* Fix the size of the resulting string */ 2546 if (inlen > 0) 2547 _PyString_Resize(&result, output - output_start); 2548 return result; 2549 } 2550 2551 2552 #define FORWARD 1 2553 #define REVERSE -1 2255 else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen)) 2256 return NULL; 2257 } 2258 else { 2259 del_table = NULL; 2260 dellen = 0; 2261 } 2262 2263 inlen = PyString_GET_SIZE(input_obj); 2264 result = PyString_FromStringAndSize((char *)NULL, inlen); 2265 if (result == NULL) 2266 return NULL; 2267 output_start = output = PyString_AsString(result); 2268 input = PyString_AS_STRING(input_obj); 2269 2270 if (dellen == 0 && table != NULL) { 2271 /* If no deletions are required, use faster code */ 2272 for (i = inlen; --i >= 0; ) { 2273 c = Py_CHARMASK(*input++); 2274 if (Py_CHARMASK((*output++ = table[c])) != c) 2275 changed = 1; 2276 } 2277 if (changed || !PyString_CheckExact(input_obj)) 2278 return result; 2279 Py_DECREF(result); 2280 Py_INCREF(input_obj); 2281 return input_obj; 2282 } 2283 2284 if (table == NULL) { 2285 for (i = 0; i < 256; i++) 2286 trans_table[i] = Py_CHARMASK(i); 2287 } else { 2288 for (i = 0; i < 256; i++) 2289 trans_table[i] = Py_CHARMASK(table[i]); 2290 } 2291 2292 for (i = 0; i < dellen; i++) 2293 trans_table[(int) Py_CHARMASK(del_table[i])] = -1; 2294 2295 for (i = inlen; --i >= 0; ) { 2296 c = Py_CHARMASK(*input++); 2297 if (trans_table[c] != -1) 2298 if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c) 2299 continue; 2300 changed = 1; 2301 } 2302 if (!changed && PyString_CheckExact(input_obj)) { 2303 Py_DECREF(result); 2304 Py_INCREF(input_obj); 2305 return input_obj; 2306 } 2307 /* Fix the size of the resulting string */ 2308 if (inlen > 0 && _PyString_Resize(&result, output - output_start)) 2309 return NULL; 2310 return result; 2311 } 2312 2554 2313 2555 2314 /* find and count characters and substrings */ 2556 2315 2557 #define findchar(target, target_len, c) 2316 #define findchar(target, target_len, c) \ 2558 2317 ((char *)memchr((const void *)(target), c, target_len)) 2559 2318 … … 2563 2322 return_self(PyStringObject *self) 2564 2323 { 2565 2566 2567 2568 2569 2570 2571 2324 if (PyString_CheckExact(self)) { 2325 Py_INCREF(self); 2326 return self; 2327 } 2328 return (PyStringObject *)PyString_FromStringAndSize( 2329 PyString_AS_STRING(self), 2330 PyString_GET_SIZE(self)); 2572 2331 } 2573 2332 2574 2333 Py_LOCAL_INLINE(Py_ssize_t) 2575 countchar(const char *target, int target_len, char c, Py_ssize_t maxcount) 2576 { 2577 Py_ssize_t count=0; 2578 const char *start=target; 2579 const char *end=target+target_len; 2580 2581 while ( (start=findchar(start, end-start, c)) != NULL ) { 2582 count++; 2583 if (count >= maxcount) 2584 break; 2585 start += 1; 2586 } 2587 return count; 2588 } 2589 2590 Py_LOCAL(Py_ssize_t) 2591 findstring(const char *target, Py_ssize_t target_len, 2592 const char *pattern, Py_ssize_t pattern_len, 2593 Py_ssize_t start, 2594 Py_ssize_t end, 2595 int direction) 2596 { 2597 if (start < 0) { 2598 start += target_len; 2599 if (start < 0) 2600 start = 0; 2601 } 2602 if (end > target_len) { 2603 end = target_len; 2604 } else if (end < 0) { 2605 end += target_len; 2606 if (end < 0) 2607 end = 0; 2608 } 2609 2610 /* zero-length substrings always match at the first attempt */ 2611 if (pattern_len == 0) 2612 return (direction > 0) ? start : end; 2613 2614 end -= pattern_len; 2615 2616 if (direction < 0) { 2617 for (; end >= start; end--) 2618 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) 2619 return end; 2620 } else { 2621 for (; start <= end; start++) 2622 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) 2623 return start; 2624 } 2625 return -1; 2626 } 2627 2628 Py_LOCAL_INLINE(Py_ssize_t) 2629 countstring(const char *target, Py_ssize_t target_len, 2630 const char *pattern, Py_ssize_t pattern_len, 2631 Py_ssize_t start, 2632 Py_ssize_t end, 2633 int direction, Py_ssize_t maxcount) 2634 { 2635 Py_ssize_t count=0; 2636 2637 if (start < 0) { 2638 start += target_len; 2639 if (start < 0) 2640 start = 0; 2641 } 2642 if (end > target_len) { 2643 end = target_len; 2644 } else if (end < 0) { 2645 end += target_len; 2646 if (end < 0) 2647 end = 0; 2648 } 2649 2650 /* zero-length substrings match everywhere */ 2651 if (pattern_len == 0 || maxcount == 0) { 2652 if (target_len+1 < maxcount) 2653 return target_len+1; 2654 return maxcount; 2655 } 2656 2657 end -= pattern_len; 2658 if (direction < 0) { 2659 for (; (end >= start); end--) 2660 if (Py_STRING_MATCH(target, end, pattern, pattern_len)) { 2661 count++; 2662 if (--maxcount <= 0) break; 2663 end -= pattern_len-1; 2664 } 2665 } else { 2666 for (; (start <= end); start++) 2667 if (Py_STRING_MATCH(target, start, pattern, pattern_len)) { 2668 count++; 2669 if (--maxcount <= 0) 2670 break; 2671 start += pattern_len-1; 2672 } 2673 } 2674 return count; 2334 countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount) 2335 { 2336 Py_ssize_t count=0; 2337 const char *start=target; 2338 const char *end=target+target_len; 2339 2340 while ( (start=findchar(start, end-start, c)) != NULL ) { 2341 count++; 2342 if (count >= maxcount) 2343 break; 2344 start += 1; 2345 } 2346 return count; 2675 2347 } 2676 2348 … … 2681 2353 Py_LOCAL(PyStringObject *) 2682 2354 replace_interleave(PyStringObject *self, 2683 2684 2685 { 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 if (maxcount < count) 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2355 const char *to_s, Py_ssize_t to_len, 2356 Py_ssize_t maxcount) 2357 { 2358 char *self_s, *result_s; 2359 Py_ssize_t self_len, result_len; 2360 Py_ssize_t count, i, product; 2361 PyStringObject *result; 2362 2363 self_len = PyString_GET_SIZE(self); 2364 2365 /* 1 at the end plus 1 after every character */ 2366 count = self_len+1; 2367 if (maxcount < count) 2368 count = maxcount; 2369 2370 /* Check for overflow */ 2371 /* result_len = count * to_len + self_len; */ 2372 product = count * to_len; 2373 if (product / to_len != count) { 2374 PyErr_SetString(PyExc_OverflowError, 2375 "replace string is too long"); 2376 return NULL; 2377 } 2378 result_len = product + self_len; 2379 if (result_len < 0) { 2380 PyErr_SetString(PyExc_OverflowError, 2381 "replace string is too long"); 2382 return NULL; 2383 } 2384 2385 if (! (result = (PyStringObject *) 2386 PyString_FromStringAndSize(NULL, result_len)) ) 2387 return NULL; 2388 2389 self_s = PyString_AS_STRING(self); 2390 result_s = PyString_AS_STRING(result); 2391 2392 /* TODO: special case single character, which doesn't need memcpy */ 2393 2394 /* Lay the first one down (guaranteed this will occur) */ 2395 Py_MEMCPY(result_s, to_s, to_len); 2396 result_s += to_len; 2397 count -= 1; 2398 2399 for (i=0; i<count; i++) { 2400 *result_s++ = *self_s++; 2401 Py_MEMCPY(result_s, to_s, to_len); 2402 result_s += to_len; 2403 } 2404 2405 /* Copy the rest of the original string */ 2406 Py_MEMCPY(result_s, self_s, self_len-i); 2407 2408 return result; 2737 2409 } 2738 2410 … … 2741 2413 Py_LOCAL(PyStringObject *) 2742 2414 replace_delete_single_character(PyStringObject *self, 2743 2744 { 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2415 char from_c, Py_ssize_t maxcount) 2416 { 2417 char *self_s, *result_s; 2418 char *start, *next, *end; 2419 Py_ssize_t self_len, result_len; 2420 Py_ssize_t count; 2421 PyStringObject *result; 2422 2423 self_len = PyString_GET_SIZE(self); 2424 self_s = PyString_AS_STRING(self); 2425 2426 count = countchar(self_s, self_len, from_c, maxcount); 2427 if (count == 0) { 2428 return return_self(self); 2429 } 2430 2431 result_len = self_len - count; /* from_len == 1 */ 2432 assert(result_len>=0); 2433 2434 if ( (result = (PyStringObject *) 2435 PyString_FromStringAndSize(NULL, result_len)) == NULL) 2436 return NULL; 2437 result_s = PyString_AS_STRING(result); 2438 2439 start = self_s; 2440 end = self_s + self_len; 2441 while (count-- > 0) { 2442 next = findchar(start, end-start, from_c); 2443 if (next == NULL) 2444 break; 2445 Py_MEMCPY(result_s, start, next-start); 2446 result_s += (next-start); 2447 start = next+1; 2448 } 2449 Py_MEMCPY(result_s, start, end-start); 2450 2451 return result; 2780 2452 } 2781 2453 … … 2784 2456 Py_LOCAL(PyStringObject *) 2785 2457 replace_delete_substring(PyStringObject *self, 2786 const char *from_s, Py_ssize_t from_len, 2787 Py_ssize_t maxcount) { 2788 char *self_s, *result_s; 2789 char *start, *next, *end; 2790 Py_ssize_t self_len, result_len; 2791 Py_ssize_t count, offset; 2792 PyStringObject *result; 2793 2794 self_len = PyString_GET_SIZE(self); 2795 self_s = PyString_AS_STRING(self); 2796 2797 count = countstring(self_s, self_len, 2798 from_s, from_len, 2799 0, self_len, 1, 2800 maxcount); 2801 2802 if (count == 0) { 2803 /* no matches */ 2804 return return_self(self); 2805 } 2806 2807 result_len = self_len - (count * from_len); 2808 assert (result_len>=0); 2809 2810 if ( (result = (PyStringObject *) 2811 PyString_FromStringAndSize(NULL, result_len)) == NULL ) 2812 return NULL; 2813 2814 result_s = PyString_AS_STRING(result); 2815 2816 start = self_s; 2817 end = self_s + self_len; 2818 while (count-- > 0) { 2819 offset = findstring(start, end-start, 2820 from_s, from_len, 2821 0, end-start, FORWARD); 2822 if (offset == -1) 2823 break; 2824 next = start + offset; 2825 2826 Py_MEMCPY(result_s, start, next-start); 2827 2828 result_s += (next-start); 2829 start = next+from_len; 2830 } 2831 Py_MEMCPY(result_s, start, end-start); 2832 return result; 2458 const char *from_s, Py_ssize_t from_len, 2459 Py_ssize_t maxcount) { 2460 char *self_s, *result_s; 2461 char *start, *next, *end; 2462 Py_ssize_t self_len, result_len; 2463 Py_ssize_t count, offset; 2464 PyStringObject *result; 2465 2466 self_len = PyString_GET_SIZE(self); 2467 self_s = PyString_AS_STRING(self); 2468 2469 count = stringlib_count(self_s, self_len, 2470 from_s, from_len, 2471 maxcount); 2472 2473 if (count == 0) { 2474 /* no matches */ 2475 return return_self(self); 2476 } 2477 2478 result_len = self_len - (count * from_len); 2479 assert (result_len>=0); 2480 2481 if ( (result = (PyStringObject *) 2482 PyString_FromStringAndSize(NULL, result_len)) == NULL ) 2483 return NULL; 2484 2485 result_s = PyString_AS_STRING(result); 2486 2487 start = self_s; 2488 end = self_s + self_len; 2489 while (count-- > 0) { 2490 offset = stringlib_find(start, end-start, 2491 from_s, from_len, 2492 0); 2493 if (offset == -1) 2494 break; 2495 next = start + offset; 2496 2497 Py_MEMCPY(result_s, start, next-start); 2498 2499 result_s += (next-start); 2500 start = next+from_len; 2501 } 2502 Py_MEMCPY(result_s, start, end-start); 2503 return result; 2833 2504 } 2834 2505 … … 2836 2507 Py_LOCAL(PyStringObject *) 2837 2508 replace_single_character_in_place(PyStringObject *self, 2838 2839 2840 { 2841 2842 2843 2844 2845 2846 2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2509 char from_c, char to_c, 2510 Py_ssize_t maxcount) 2511 { 2512 char *self_s, *result_s, *start, *end, *next; 2513 Py_ssize_t self_len; 2514 PyStringObject *result; 2515 2516 /* The result string will be the same size */ 2517 self_s = PyString_AS_STRING(self); 2518 self_len = PyString_GET_SIZE(self); 2519 2520 next = findchar(self_s, self_len, from_c); 2521 2522 if (next == NULL) { 2523 /* No matches; return the original string */ 2524 return return_self(self); 2525 } 2526 2527 /* Need to make a new string */ 2528 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len); 2529 if (result == NULL) 2530 return NULL; 2531 result_s = PyString_AS_STRING(result); 2532 Py_MEMCPY(result_s, self_s, self_len); 2533 2534 /* change everything in-place, starting with this one */ 2535 start = result_s + (next-self_s); 2536 *start = to_c; 2537 start++; 2538 end = result_s + self_len; 2539 2540 while (--maxcount > 0) { 2541 next = findchar(start, end-start, from_c); 2542 if (next == NULL) 2543 break; 2544 *next = to_c; 2545 start = next+1; 2546 } 2547 2548 return result; 2878 2549 } 2879 2550 … … 2881 2552 Py_LOCAL(PyStringObject *) 2882 2553 replace_substring_in_place(PyStringObject *self, 2883 2884 2885 2886 { 2887 2888 2889 2890 2891 2892 2893 2894 2895 2896 2897 offset = findstring(self_s, self_len,2898 2899 0, self_len, FORWARD);2900 2901 2902 2903 2904 2905 2906 2907 2908 2909 2910 2911 2912 2913 2914 2915 2916 2917 2918 2919 offset = findstring(start, end-start,2920 2921 0, end-start, FORWARD);2922 2923 2924 2925 2926 2927 2928 2554 const char *from_s, Py_ssize_t from_len, 2555 const char *to_s, Py_ssize_t to_len, 2556 Py_ssize_t maxcount) 2557 { 2558 char *result_s, *start, *end; 2559 char *self_s; 2560 Py_ssize_t self_len, offset; 2561 PyStringObject *result; 2562 2563 /* The result string will be the same size */ 2564 2565 self_s = PyString_AS_STRING(self); 2566 self_len = PyString_GET_SIZE(self); 2567 2568 offset = stringlib_find(self_s, self_len, 2569 from_s, from_len, 2570 0); 2571 if (offset == -1) { 2572 /* No matches; return the original string */ 2573 return return_self(self); 2574 } 2575 2576 /* Need to make a new string */ 2577 result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len); 2578 if (result == NULL) 2579 return NULL; 2580 result_s = PyString_AS_STRING(result); 2581 Py_MEMCPY(result_s, self_s, self_len); 2582 2583 /* change everything in-place, starting with this one */ 2584 start = result_s + offset; 2585 Py_MEMCPY(start, to_s, from_len); 2586 start += from_len; 2587 end = result_s + self_len; 2588 2589 while ( --maxcount > 0) { 2590 offset = stringlib_find(start, end-start, 2591 from_s, from_len, 2592 0); 2593 if (offset==-1) 2594 break; 2595 Py_MEMCPY(start+offset, to_s, from_len); 2596 start += offset+from_len; 2597 } 2598 2599 return result; 2929 2600 } 2930 2601 … … 2932 2603 Py_LOCAL(PyStringObject *) 2933 2604 replace_single_character(PyStringObject *self, 2934 2935 2936 2937 { 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 2969 2970 2971 2972 2973 2974 2975 if (next == NULL) 2976 2977 2978 2979 2980 2981 2982 2983 2984 2985 2986 2987 2988 2989 2990 2991 2992 2993 2994 2995 2605 char from_c, 2606 const char *to_s, Py_ssize_t to_len, 2607 Py_ssize_t maxcount) 2608 { 2609 char *self_s, *result_s; 2610 char *start, *next, *end; 2611 Py_ssize_t self_len, result_len; 2612 Py_ssize_t count, product; 2613 PyStringObject *result; 2614 2615 self_s = PyString_AS_STRING(self); 2616 self_len = PyString_GET_SIZE(self); 2617 2618 count = countchar(self_s, self_len, from_c, maxcount); 2619 if (count == 0) { 2620 /* no matches, return unchanged */ 2621 return return_self(self); 2622 } 2623 2624 /* use the difference between current and new, hence the "-1" */ 2625 /* result_len = self_len + count * (to_len-1) */ 2626 product = count * (to_len-1); 2627 if (product / (to_len-1) != count) { 2628 PyErr_SetString(PyExc_OverflowError, "replace string is too long"); 2629 return NULL; 2630 } 2631 result_len = self_len + product; 2632 if (result_len < 0) { 2633 PyErr_SetString(PyExc_OverflowError, "replace string is too long"); 2634 return NULL; 2635 } 2636 2637 if ( (result = (PyStringObject *) 2638 PyString_FromStringAndSize(NULL, result_len)) == NULL) 2639 return NULL; 2640 result_s = PyString_AS_STRING(result); 2641 2642 start = self_s; 2643 end = self_s + self_len; 2644 while (count-- > 0) { 2645 next = findchar(start, end-start, from_c); 2646 if (next == NULL) 2647 break; 2648 2649 if (next == start) { 2650 /* replace with the 'to' */ 2651 Py_MEMCPY(result_s, to_s, to_len); 2652 result_s += to_len; 2653 start += 1; 2654 } else { 2655 /* copy the unchanged old then the 'to' */ 2656 Py_MEMCPY(result_s, start, next-start); 2657 result_s += (next-start); 2658 Py_MEMCPY(result_s, to_s, to_len); 2659 result_s += to_len; 2660 start = next+1; 2661 } 2662 } 2663 /* Copy the remainder of the remaining string */ 2664 Py_MEMCPY(result_s, start, end-start); 2665 2666 return result; 2996 2667 } 2997 2668 … … 2999 2670 Py_LOCAL(PyStringObject *) 3000 2671 replace_substring(PyStringObject *self, 3001 const char *from_s, Py_ssize_t from_len, 3002 const char *to_s, Py_ssize_t to_len, 3003 Py_ssize_t maxcount) { 3004 char *self_s, *result_s; 3005 char *start, *next, *end; 3006 Py_ssize_t self_len, result_len; 3007 Py_ssize_t count, offset, product; 3008 PyStringObject *result; 3009 3010 self_s = PyString_AS_STRING(self); 3011 self_len = PyString_GET_SIZE(self); 3012 3013 count = countstring(self_s, self_len, 3014 from_s, from_len, 3015 0, self_len, FORWARD, maxcount); 3016 if (count == 0) { 3017 /* no matches, return unchanged */ 3018 return return_self(self); 3019 } 3020 3021 /* Check for overflow */ 3022 /* result_len = self_len + count * (to_len-from_len) */ 3023 product = count * (to_len-from_len); 3024 if (product / (to_len-from_len) != count) { 3025 PyErr_SetString(PyExc_OverflowError, "replace string is too long"); 3026 return NULL; 3027 } 3028 result_len = self_len + product; 3029 if (result_len < 0) { 3030 PyErr_SetString(PyExc_OverflowError, "replace string is too long"); 3031 return NULL; 3032 } 3033 3034 if ( (result = (PyStringObject *) 3035 PyString_FromStringAndSize(NULL, result_len)) == NULL) 3036 return NULL; 3037 result_s = PyString_AS_STRING(result); 3038 3039 start = self_s; 3040 end = self_s + self_len; 3041 while (count-- > 0) { 3042 offset = findstring(start, end-start, 3043 from_s, from_len, 3044 0, end-start, FORWARD); 3045 if (offset == -1) 3046 break; 3047 next = start+offset; 3048 if (next == start) { 3049 /* replace with the 'to' */ 3050 Py_MEMCPY(result_s, to_s, to_len); 3051 result_s += to_len; 3052 start += from_len; 3053 } else { 3054 /* copy the unchanged old then the 'to' */ 3055 Py_MEMCPY(result_s, start, next-start); 3056 result_s += (next-start); 3057 Py_MEMCPY(result_s, to_s, to_len); 3058 result_s += to_len; 3059 start = next+from_len; 3060 } 3061 } 3062 /* Copy the remainder of the remaining string */ 3063 Py_MEMCPY(result_s, start, end-start); 3064 3065 return result; 2672 const char *from_s, Py_ssize_t from_len, 2673 const char *to_s, Py_ssize_t to_len, 2674 Py_ssize_t maxcount) { 2675 char *self_s, *result_s; 2676 char *start, *next, *end; 2677 Py_ssize_t self_len, result_len; 2678 Py_ssize_t count, offset, product; 2679 PyStringObject *result; 2680 2681 self_s = PyString_AS_STRING(self); 2682 self_len = PyString_GET_SIZE(self); 2683 2684 count = stringlib_count(self_s, self_len, 2685 from_s, from_len, 2686 maxcount); 2687 2688 if (count == 0) { 2689 /* no matches, return unchanged */ 2690 return return_self(self); 2691 } 2692 2693 /* Check for overflow */ 2694 /* result_len = self_len + count * (to_len-from_len) */ 2695 product = count * (to_len-from_len); 2696 if (product / (to_len-from_len) != count) { 2697 PyErr_SetString(PyExc_OverflowError, "replace string is too long"); 2698 return NULL; 2699 } 2700 result_len = self_len + product; 2701 if (result_len < 0) { 2702 PyErr_SetString(PyExc_OverflowError, "replace string is too long"); 2703 return NULL; 2704 } 2705 2706 if ( (result = (PyStringObject *) 2707 PyString_FromStringAndSize(NULL, result_len)) == NULL) 2708 return NULL; 2709 result_s = PyString_AS_STRING(result); 2710 2711 start = self_s; 2712 end = self_s + self_len; 2713 while (count-- > 0) { 2714 offset = stringlib_find(start, end-start, 2715 from_s, from_len, 2716 0); 2717 if (offset == -1) 2718 break; 2719 next = start+offset; 2720 if (next == start) { 2721 /* replace with the 'to' */ 2722 Py_MEMCPY(result_s, to_s, to_len); 2723 result_s += to_len; 2724 start += from_len; 2725 } else { 2726 /* copy the unchanged old then the 'to' */ 2727 Py_MEMCPY(result_s, start, next-start); 2728 result_s += (next-start); 2729 Py_MEMCPY(result_s, to_s, to_len); 2730 result_s += to_len; 2731 start = next+from_len; 2732 } 2733 } 2734 /* Copy the remainder of the remaining string */ 2735 Py_MEMCPY(result_s, start, end-start); 2736 2737 return result; 3066 2738 } 3067 2739 … … 3069 2741 Py_LOCAL(PyStringObject *) 3070 2742 replace(PyStringObject *self, 3071 3072 3073 3074 { 3075 3076 3077 3078 3079 3080 3081 3082 3083 3084 3085 3086 3087 3088 3089 3090 3091 3092 3093 3094 3095 3096 3097 3098 3099 3100 3101 3102 3103 3104 3105 3106 3107 3108 3109 3110 3111 3112 3113 3114 3115 3116 3117 3118 3119 3120 3121 3122 3123 3124 3125 3126 3127 3128 3129 3130 3131 3132 3133 3134 3135 3136 2743 const char *from_s, Py_ssize_t from_len, 2744 const char *to_s, Py_ssize_t to_len, 2745 Py_ssize_t maxcount) 2746 { 2747 if (maxcount < 0) { 2748 maxcount = PY_SSIZE_T_MAX; 2749 } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) { 2750 /* nothing to do; return the original string */ 2751 return return_self(self); 2752 } 2753 2754 if (maxcount == 0 || 2755 (from_len == 0 && to_len == 0)) { 2756 /* nothing to do; return the original string */ 2757 return return_self(self); 2758 } 2759 2760 /* Handle zero-length special cases */ 2761 2762 if (from_len == 0) { 2763 /* insert the 'to' string everywhere. */ 2764 /* >>> "Python".replace("", ".") */ 2765 /* '.P.y.t.h.o.n.' */ 2766 return replace_interleave(self, to_s, to_len, maxcount); 2767 } 2768 2769 /* Except for "".replace("", "A") == "A" there is no way beyond this */ 2770 /* point for an empty self string to generate a non-empty string */ 2771 /* Special case so the remaining code always gets a non-empty string */ 2772 if (PyString_GET_SIZE(self) == 0) { 2773 return return_self(self); 2774 } 2775 2776 if (to_len == 0) { 2777 /* delete all occurances of 'from' string */ 2778 if (from_len == 1) { 2779 return replace_delete_single_character( 2780 self, from_s[0], maxcount); 2781 } else { 2782 return replace_delete_substring(self, from_s, from_len, maxcount); 2783 } 2784 } 2785 2786 /* Handle special case where both strings have the same length */ 2787 2788 if (from_len == to_len) { 2789 if (from_len == 1) { 2790 return replace_single_character_in_place( 2791 self, 2792 from_s[0], 2793 to_s[0], 2794 maxcount); 2795 } else { 2796 return replace_substring_in_place( 2797 self, from_s, from_len, to_s, to_len, maxcount); 2798 } 2799 } 2800 2801 /* Otherwise use the more generic algorithms */ 2802 if (from_len == 1) { 2803 return replace_single_character(self, from_s[0], 2804 to_s, to_len, maxcount); 2805 } else { 2806 /* len('from')>=2, len('to')>=1 */ 2807 return replace_substring(self, from_s, from_len, to_s, to_len, maxcount); 2808 } 3137 2809 } 3138 2810 3139 2811 PyDoc_STRVAR(replace__doc__, 3140 "S.replace 2812 "S.replace(old, new[, count]) -> string\n\ 3141 2813 \n\ 3142 2814 Return a copy of string S with all occurrences of substring\n\ … … 3147 2819 string_replace(PyStringObject *self, PyObject *args) 3148 2820 { 3149 3150 3151 3152 3153 3154 3155 3156 3157 3158 3159 3160 2821 Py_ssize_t count = -1; 2822 PyObject *from, *to; 2823 const char *from_s, *to_s; 2824 Py_ssize_t from_len, to_len; 2825 2826 if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count)) 2827 return NULL; 2828 2829 if (PyString_Check(from)) { 2830 from_s = PyString_AS_STRING(from); 2831 from_len = PyString_GET_SIZE(from); 2832 } 3161 2833 #ifdef Py_USING_UNICODE 3162 3163 3164 2834 if (PyUnicode_Check(from)) 2835 return PyUnicode_Replace((PyObject *)self, 2836 from, to, count); 3165 2837 #endif 3166 3167 3168 3169 3170 3171 3172 2838 else if (PyObject_AsCharBuffer(from, &from_s, &from_len)) 2839 return NULL; 2840 2841 if (PyString_Check(to)) { 2842 to_s = PyString_AS_STRING(to); 2843 to_len = PyString_GET_SIZE(to); 2844 } 3173 2845 #ifdef Py_USING_UNICODE 3174 3175 3176 2846 else if (PyUnicode_Check(to)) 2847 return PyUnicode_Replace((PyObject *)self, 2848 from, to, count); 3177 2849 #endif 3178 3179 3180 3181 3182 3183 2850 else if (PyObject_AsCharBuffer(to, &to_s, &to_len)) 2851 return NULL; 2852 2853 return (PyObject *)replace((PyStringObject *) self, 2854 from_s, from_len, 2855 to_s, to_len, count); 3184 2856 } 3185 2857 … … 3192 2864 Py_LOCAL(int) 3193 2865 _string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start, 3194 3195 { 3196 3197 3198 3199 3200 3201 3202 3203 3204 2866 Py_ssize_t end, int direction) 2867 { 2868 Py_ssize_t len = PyString_GET_SIZE(self); 2869 Py_ssize_t slen; 2870 const char* sub; 2871 const char* str; 2872 2873 if (PyString_Check(substr)) { 2874 sub = PyString_AS_STRING(substr); 2875 slen = PyString_GET_SIZE(substr); 2876 } 3205 2877 #ifdef Py_USING_UNICODE 3206 3207 3208 2878 else if (PyUnicode_Check(substr)) 2879 return PyUnicode_Tailmatch((PyObject *)self, 2880 substr, start, end, direction); 3209 2881 #endif 3210 3211 3212 3213 3214 string_adjust_indices(&start, &end, len);3215 3216 3217 3218 3219 3220 3221 3222 3223 3224 3225 3226 3227 3228 3229 3230 2882 else if (PyObject_AsCharBuffer(substr, &sub, &slen)) 2883 return -1; 2884 str = PyString_AS_STRING(self); 2885 2886 ADJUST_INDICES(start, end, len); 2887 2888 if (direction < 0) { 2889 /* startswith */ 2890 if (start+slen > len) 2891 return 0; 2892 } else { 2893 /* endswith */ 2894 if (end-start < slen || start > len) 2895 return 0; 2896 2897 if (end-slen > start) 2898 start = end - slen; 2899 } 2900 if (end-start >= slen) 2901 return ! memcmp(str+start, sub, slen); 2902 return 0; 3231 2903 } 3232 2904 … … 3243 2915 string_startswith(PyStringObject *self, PyObject *args) 3244 2916 { 3245 Py_ssize_t start = 0; 3246 Py_ssize_t end = PY_SSIZE_T_MAX; 3247 PyObject *subobj; 3248 int result; 3249 3250 if (!PyArg_ParseTuple(args, "O|O&O&:startswith", &subobj, 3251 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) 3252 return NULL; 3253 if (PyTuple_Check(subobj)) { 3254 Py_ssize_t i; 3255 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { 3256 result = _string_tailmatch(self, 3257 PyTuple_GET_ITEM(subobj, i), 3258 start, end, -1); 3259 if (result == -1) 3260 return NULL; 3261 else if (result) { 3262 Py_RETURN_TRUE; 3263 } 3264 } 3265 Py_RETURN_FALSE; 3266 } 3267 result = _string_tailmatch(self, subobj, start, end, -1); 3268 if (result == -1) 3269 return NULL; 3270 else 3271 return PyBool_FromLong(result); 2917 Py_ssize_t start = 0; 2918 Py_ssize_t end = PY_SSIZE_T_MAX; 2919 PyObject *subobj; 2920 int result; 2921 2922 if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end)) 2923 return NULL; 2924 if (PyTuple_Check(subobj)) { 2925 Py_ssize_t i; 2926 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { 2927 result = _string_tailmatch(self, 2928 PyTuple_GET_ITEM(subobj, i), 2929 start, end, -1); 2930 if (result == -1) 2931 return NULL; 2932 else if (result) { 2933 Py_RETURN_TRUE; 2934 } 2935 } 2936 Py_RETURN_FALSE; 2937 } 2938 result = _string_tailmatch(self, subobj, start, end, -1); 2939 if (result == -1) { 2940 if (PyErr_ExceptionMatches(PyExc_TypeError)) 2941 PyErr_Format(PyExc_TypeError, "startswith first arg must be str, " 2942 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name); 2943 return NULL; 2944 } 2945 else 2946 return PyBool_FromLong(result); 3272 2947 } 3273 2948 … … 3284 2959 string_endswith(PyStringObject *self, PyObject *args) 3285 2960 { 3286 Py_ssize_t start = 0; 3287 Py_ssize_t end = PY_SSIZE_T_MAX; 3288 PyObject *subobj; 3289 int result; 3290 3291 if (!PyArg_ParseTuple(args, "O|O&O&:endswith", &subobj, 3292 _PyEval_SliceIndex, &start, _PyEval_SliceIndex, &end)) 3293 return NULL; 3294 if (PyTuple_Check(subobj)) { 3295 Py_ssize_t i; 3296 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { 3297 result = _string_tailmatch(self, 3298 PyTuple_GET_ITEM(subobj, i), 3299 start, end, +1); 3300 if (result == -1) 3301 return NULL; 3302 else if (result) { 3303 Py_RETURN_TRUE; 3304 } 3305 } 3306 Py_RETURN_FALSE; 3307 } 3308 result = _string_tailmatch(self, subobj, start, end, +1); 3309 if (result == -1) 3310 return NULL; 3311 else 3312 return PyBool_FromLong(result); 2961 Py_ssize_t start = 0; 2962 Py_ssize_t end = PY_SSIZE_T_MAX; 2963 PyObject *subobj; 2964 int result; 2965 2966 if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end)) 2967 return NULL; 2968 if (PyTuple_Check(subobj)) { 2969 Py_ssize_t i; 2970 for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) { 2971 result = _string_tailmatch(self, 2972 PyTuple_GET_ITEM(subobj, i), 2973 start, end, +1); 2974 if (result == -1) 2975 return NULL; 2976 else if (result) { 2977 Py_RETURN_TRUE; 2978 } 2979 } 2980 Py_RETURN_FALSE; 2981 } 2982 result = _string_tailmatch(self, subobj, start, end, +1); 2983 if (result == -1) { 2984 if (PyErr_ExceptionMatches(PyExc_TypeError)) 2985 PyErr_Format(PyExc_TypeError, "endswith first arg must be str, " 2986 "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name); 2987 return NULL; 2988 } 2989 else 2990 return PyBool_FromLong(result); 3313 2991 } 3314 2992 … … 3325 3003 3326 3004 static PyObject * 3327 string_encode(PyStringObject *self, PyObject *args) 3328 { 3005 string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs) 3006 { 3007 static char *kwlist[] = {"encoding", "errors", 0}; 3329 3008 char *encoding = NULL; 3330 3009 char *errors = NULL; 3331 3010 PyObject *v; 3332 3011 3333 if (!PyArg_ParseTuple(args, "|ss:encode", &encoding, &errors)) 3012 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode", 3013 kwlist, &encoding, &errors)) 3334 3014 return NULL; 3335 3015 v = PyString_AsEncodedObject((PyObject *)self, encoding, errors); … … 3362 3042 3363 3043 static PyObject * 3364 string_decode(PyStringObject *self, PyObject *args) 3365 { 3044 string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs) 3045 { 3046 static char *kwlist[] = {"encoding", "errors", 0}; 3366 3047 char *encoding = NULL; 3367 3048 char *errors = NULL; 3368 3049 PyObject *v; 3369 3050 3370 if (!PyArg_ParseTuple(args, "|ss:decode", &encoding, &errors)) 3051 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode", 3052 kwlist, &encoding, &errors)) 3371 3053 return NULL; 3372 3054 v = PyString_AsDecodedObject((PyObject *)self, encoding, errors); … … 3404 3086 3405 3087 if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize)) 3406 3088 return NULL; 3407 3089 3408 3090 /* First pass: determine size of output string */ … … 3411 3093 e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */ 3412 3094 for (p = PyString_AS_STRING(self); p < e; p++) 3413 if (*p == '\t') { 3414 if (tabsize > 0) { 3415 incr = tabsize - (j % tabsize); 3416 if (j > PY_SSIZE_T_MAX - incr) 3417 goto overflow1; 3418 j += incr; 3419 } 3420 } 3421 else { 3422 if (j > PY_SSIZE_T_MAX - 1) 3423 goto overflow1; 3424 j++; 3425 if (*p == '\n' || *p == '\r') { 3426 if (i > PY_SSIZE_T_MAX - j) 3427 goto overflow1; 3428 i += j; 3429 j = 0; 3430 } 3095 if (*p == '\t') { 3096 if (tabsize > 0) { 3097 incr = tabsize - (j % tabsize); 3098 if (j > PY_SSIZE_T_MAX - incr) 3099 goto overflow1; 3100 j += incr; 3431 3101 } 3102 } 3103 else { 3104 if (j > PY_SSIZE_T_MAX - 1) 3105 goto overflow1; 3106 j++; 3107 if (*p == '\n' || *p == '\r') { 3108 if (i > PY_SSIZE_T_MAX - j) 3109 goto overflow1; 3110 i += j; 3111 j = 0; 3112 } 3113 } 3432 3114 3433 3115 if (i > PY_SSIZE_T_MAX - j) 3434 3116 goto overflow1; 3435 3117 3436 3118 /* Second pass: create output string and fill it */ … … 3444 3126 3445 3127 for (p = PyString_AS_STRING(self); p < e; p++) 3446 if (*p == '\t') { 3447 if (tabsize > 0) { 3448 i = tabsize - (j % tabsize); 3449 j += i; 3450 while (i--) { 3451 if (q >= qe) 3452 goto overflow2; 3453 *q++ = ' '; 3454 } 3455 } 3456 } 3457 else { 3458 if (q >= qe) 3459 goto overflow2; 3460 *q++ = *p; 3461 j++; 3462 if (*p == '\n' || *p == '\r') 3463 j = 0; 3128 if (*p == '\t') { 3129 if (tabsize > 0) { 3130 i = tabsize - (j % tabsize); 3131 j += i; 3132 while (i--) { 3133 if (q >= qe) 3134 goto overflow2; 3135 *q++ = ' '; 3136 } 3464 3137 } 3138 } 3139 else { 3140 if (q >= qe) 3141 goto overflow2; 3142 *q++ = *p; 3143 j++; 3144 if (*p == '\n' || *p == '\r') 3145 j = 0; 3146 } 3465 3147 3466 3148 return u; … … 3489 3171 3490 3172 u = PyString_FromStringAndSize(NULL, 3491 3173 left + PyString_GET_SIZE(self) + right); 3492 3174 if (u) { 3493 3175 if (left) 3494 3176 memset(PyString_AS_STRING(u), fill, left); 3495 3177 Py_MEMCPY(PyString_AS_STRING(u) + left, 3496 3497 3178 PyString_AS_STRING(self), 3179 PyString_GET_SIZE(self)); 3498 3180 if (right) 3499 3181 memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self), 3500 3182 fill, right); 3501 3183 } 3502 3184 … … 3603 3285 else 3604 3286 return PyString_FromStringAndSize( 3605 3606 3287 PyString_AS_STRING(self), 3288 PyString_GET_SIZE(self) 3607 3289 ); 3608 3290 } … … 3640 3322 /* Shortcut for single character strings */ 3641 3323 if (PyString_GET_SIZE(self) == 1 && 3642 3643 3324 isspace(*p)) 3325 return PyBool_FromLong(1); 3644 3326 3645 3327 /* Special case for empty strings */ 3646 3328 if (PyString_GET_SIZE(self) == 0) 3647 3329 return PyBool_FromLong(0); 3648 3330 3649 3331 e = p + PyString_GET_SIZE(self); 3650 3332 for (; p < e; p++) { 3651 3652 3333 if (!isspace(*p)) 3334 return PyBool_FromLong(0); 3653 3335 } 3654 3336 return PyBool_FromLong(1); … … 3671 3353 /* Shortcut for single character strings */ 3672 3354 if (PyString_GET_SIZE(self) == 1 && 3673 3674 3355 isalpha(*p)) 3356 return PyBool_FromLong(1); 3675 3357 3676 3358 /* Special case for empty strings */ 3677 3359 if (PyString_GET_SIZE(self) == 0) 3678 3360 return PyBool_FromLong(0); 3679 3361 3680 3362 e = p + PyString_GET_SIZE(self); 3681 3363 for (; p < e; p++) { 3682 3683 3364 if (!isalpha(*p)) 3365 return PyBool_FromLong(0); 3684 3366 } 3685 3367 return PyBool_FromLong(1); … … 3702 3384 /* Shortcut for single character strings */ 3703 3385 if (PyString_GET_SIZE(self) == 1 && 3704 3705 3386 isalnum(*p)) 3387 return PyBool_FromLong(1); 3706 3388 3707 3389 /* Special case for empty strings */ 3708 3390 if (PyString_GET_SIZE(self) == 0) 3709 3391 return PyBool_FromLong(0); 3710 3392 3711 3393 e = p + PyString_GET_SIZE(self); 3712 3394 for (; p < e; p++) { 3713 3714 3395 if (!isalnum(*p)) 3396 return PyBool_FromLong(0); 3715 3397 } 3716 3398 return PyBool_FromLong(1); … … 3733 3415 /* Shortcut for single character strings */ 3734 3416 if (PyString_GET_SIZE(self) == 1 && 3735 3736 3417 isdigit(*p)) 3418 return PyBool_FromLong(1); 3737 3419 3738 3420 /* Special case for empty strings */ 3739 3421 if (PyString_GET_SIZE(self) == 0) 3740 3422 return PyBool_FromLong(0); 3741 3423 3742 3424 e = p + PyString_GET_SIZE(self); 3743 3425 for (; p < e; p++) { 3744 3745 3426 if (!isdigit(*p)) 3427 return PyBool_FromLong(0); 3746 3428 } 3747 3429 return PyBool_FromLong(1); … … 3765 3447 /* Shortcut for single character strings */ 3766 3448 if (PyString_GET_SIZE(self) == 1) 3767 3449 return PyBool_FromLong(islower(*p) != 0); 3768 3450 3769 3451 /* Special case for empty strings */ 3770 3452 if (PyString_GET_SIZE(self) == 0) 3771 3453 return PyBool_FromLong(0); 3772 3454 3773 3455 e = p + PyString_GET_SIZE(self); 3774 3456 cased = 0; 3775 3457 for (; p < e; p++) { 3776 3777 3778 3779 3458 if (isupper(*p)) 3459 return PyBool_FromLong(0); 3460 else if (!cased && islower(*p)) 3461 cased = 1; 3780 3462 } 3781 3463 return PyBool_FromLong(cased); … … 3799 3481 /* Shortcut for single character strings */ 3800 3482 if (PyString_GET_SIZE(self) == 1) 3801 3483 return PyBool_FromLong(isupper(*p) != 0); 3802 3484 3803 3485 /* Special case for empty strings */ 3804 3486 if (PyString_GET_SIZE(self) == 0) 3805 3487 return PyBool_FromLong(0); 3806 3488 3807 3489 e = p + PyString_GET_SIZE(self); 3808 3490 cased = 0; 3809 3491 for (; p < e; p++) { 3810 3811 3812 3813 3492 if (islower(*p)) 3493 return PyBool_FromLong(0); 3494 else if (!cased && isupper(*p)) 3495 cased = 1; 3814 3496 } 3815 3497 return PyBool_FromLong(cased); … … 3835 3517 /* Shortcut for single character strings */ 3836 3518 if (PyString_GET_SIZE(self) == 1) 3837 3519 return PyBool_FromLong(isupper(*p) != 0); 3838 3520 3839 3521 /* Special case for empty strings */ 3840 3522 if (PyString_GET_SIZE(self) == 0) 3841 3523 return PyBool_FromLong(0); 3842 3524 3843 3525 e = p + PyString_GET_SIZE(self); … … 3845 3527 previous_is_cased = 0; 3846 3528 for (; p < e; p++) { 3847 3848 3849 3850 3851 3852 3853 3854 3855 3856 3857 3858 3859 3860 3861 3862 3529 register const unsigned char ch = *p; 3530 3531 if (isupper(ch)) { 3532 if (previous_is_cased) 3533 return PyBool_FromLong(0); 3534 previous_is_cased = 1; 3535 cased = 1; 3536 } 3537 else if (islower(ch)) { 3538 if (!previous_is_cased) 3539 return PyBool_FromLong(0); 3540 previous_is_cased = 1; 3541 cased = 1; 3542 } 3543 else 3544 previous_is_cased = 0; 3863 3545 } 3864 3546 return PyBool_FromLong(cased); … … 3867 3549 3868 3550 PyDoc_STRVAR(splitlines__doc__, 3869 "S.splitlines( [keepends]) -> list of strings\n\3551 "S.splitlines(keepends=False) -> list of strings\n\ 3870 3552 \n\ 3871 3553 Return a list of the lines in S, breaking at line boundaries.\n\ … … 3876 3558 string_splitlines(PyStringObject *self, PyObject *args) 3877 3559 { 3878 register Py_ssize_t i;3879 register Py_ssize_t j;3880 Py_ssize_t len;3881 3560 int keepends = 0; 3882 PyObject *list;3883 PyObject *str;3884 char *data;3885 3561 3886 3562 if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends)) 3887 3563 return NULL; 3888 3564 3889 data = PyString_AS_STRING(self); 3890 len = PyString_GET_SIZE(self); 3891 3892 /* This does not use the preallocated list because splitlines is 3893 usually run with hundreds of newlines. The overhead of 3894 switching between PyList_SET_ITEM and append causes about a 3895 2-3% slowdown for that common case. A smarter implementation 3896 could move the if check out, so the SET_ITEMs are done first 3897 and the appends only done when the prealloc buffer is full. 3898 That's too much work for little gain.*/ 3899 3900 list = PyList_New(0); 3901 if (!list) 3902 goto onError; 3903 3904 for (i = j = 0; i < len; ) { 3905 Py_ssize_t eol; 3906 3907 /* Find a line and append it */ 3908 while (i < len && data[i] != '\n' && data[i] != '\r') 3909 i++; 3910 3911 /* Skip the line break reading CRLF as one line break */ 3912 eol = i; 3913 if (i < len) { 3914 if (data[i] == '\r' && i + 1 < len && 3915 data[i+1] == '\n') 3916 i += 2; 3917 else 3918 i++; 3919 if (keepends) 3920 eol = i; 3921 } 3922 SPLIT_APPEND(data, j, eol); 3923 j = i; 3924 } 3925 if (j < len) { 3926 SPLIT_APPEND(data, j, len); 3927 } 3928 3929 return list; 3930 3931 onError: 3932 Py_XDECREF(list); 3933 return NULL; 3565 return stringlib_splitlines( 3566 (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self), 3567 keepends 3568 ); 3934 3569 } 3935 3570 … … 3940 3575 string_sizeof(PyStringObject *v) 3941 3576 { 3942 Py_ssize_t res; 3943 res = sizeof(PyStringObject) + v->ob_size * v->ob_type->tp_itemsize; 3944 return PyInt_FromSsize_t(res); 3945 } 3946 3947 #undef SPLIT_APPEND 3948 #undef SPLIT_ADD 3949 #undef MAX_PREALLOC 3950 #undef PREALLOC_SIZE 3577 Py_ssize_t res; 3578 res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize; 3579 return PyInt_FromSsize_t(res); 3580 } 3951 3581 3952 3582 static PyObject * 3953 3583 string_getnewargs(PyStringObject *v) 3954 3584 { 3955 3585 return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v)); 3956 3586 } 3957 3587 … … 3960 3590 3961 3591 PyDoc_STRVAR(format__doc__, 3962 "S.format(*args, **kwargs) -> unicode\n\3592 "S.format(*args, **kwargs) -> string\n\ 3963 3593 \n\ 3964 "); 3594 Return a formatted version of S, using substitutions from args and kwargs.\n\ 3595 The substitutions are identified by braces ('{' and '}')."); 3965 3596 3966 3597 static PyObject * … … 3977 3608 if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) { 3978 3609 PyErr_Format(PyExc_TypeError, "__format__ arg must be str " 3979 3980 3610 "or unicode, not %s", Py_TYPE(format_spec)->tp_name); 3611 goto done; 3981 3612 } 3982 3613 tmp = PyObject_Str(format_spec); … … 3986 3617 3987 3618 result = _PyBytes_FormatAdvanced(self, 3988 3989 3619 PyString_AS_STRING(format_spec), 3620 PyString_GET_SIZE(format_spec)); 3990 3621 done: 3991 3622 Py_XDECREF(tmp); … … 3994 3625 3995 3626 PyDoc_STRVAR(p_format__doc__, 3996 "S.__format__(format_spec) -> unicode\n\3627 "S.__format__(format_spec) -> string\n\ 3997 3628 \n\ 3998 ");3629 Return a formatted version of S as described by format_spec."); 3999 3630 4000 3631 4001 3632 static PyMethodDef 4002 3633 string_methods[] = { 4003 4004 4005 4006 4007 4008 4009 4010 4011 4012 4013 4014 4015 4016 4017 4018 4019 4020 4021 4022 4023 4024 4025 4026 4027 4028 4029 4030 4031 4032 4033 4034 4035 4036 4037 4038 4039 4040 4041 4042 4043 4044 4045 4046 4047 4048 {"encode", (PyCFunction)string_encode, METH_VARARGS, encode__doc__},4049 {"decode", (PyCFunction)string_decode, METH_VARARGS, decode__doc__},4050 4051 4052 4053 4054 4055 4056 {"__getnewargs__", (PyCFunction)string_getnewargs,METH_NOARGS},4057 {NULL, NULL}/* sentinel */3634 /* Counterparts of the obsolete stropmodule functions; except 3635 string.maketrans(). */ 3636 {"join", (PyCFunction)string_join, METH_O, join__doc__}, 3637 {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__}, 3638 {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__}, 3639 {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__}, 3640 {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__}, 3641 {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__}, 3642 {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__}, 3643 {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__}, 3644 {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__}, 3645 {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__}, 3646 {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__}, 3647 {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__}, 3648 {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS, 3649 capitalize__doc__}, 3650 {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__}, 3651 {"endswith", (PyCFunction)string_endswith, METH_VARARGS, 3652 endswith__doc__}, 3653 {"partition", (PyCFunction)string_partition, METH_O, partition__doc__}, 3654 {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__}, 3655 {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__}, 3656 {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__}, 3657 {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__}, 3658 {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__}, 3659 {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__}, 3660 {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__}, 3661 {"rpartition", (PyCFunction)string_rpartition, METH_O, 3662 rpartition__doc__}, 3663 {"startswith", (PyCFunction)string_startswith, METH_VARARGS, 3664 startswith__doc__}, 3665 {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__}, 3666 {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS, 3667 swapcase__doc__}, 3668 {"translate", (PyCFunction)string_translate, METH_VARARGS, 3669 translate__doc__}, 3670 {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__}, 3671 {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__}, 3672 {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__}, 3673 {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__}, 3674 {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__}, 3675 {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__}, 3676 {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__}, 3677 {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS}, 3678 {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS}, 3679 {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__}, 3680 {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__}, 3681 {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS, 3682 expandtabs__doc__}, 3683 {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS, 3684 splitlines__doc__}, 3685 {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS, 3686 sizeof__doc__}, 3687 {"__getnewargs__", (PyCFunction)string_getnewargs, METH_NOARGS}, 3688 {NULL, NULL} /* sentinel */ 4058 3689 }; 4059 3690 … … 4064 3695 string_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 4065 3696 { 4066 4067 4068 4069 4070 4071 4072 4073 4074 4075 3697 PyObject *x = NULL; 3698 static char *kwlist[] = {"object", 0}; 3699 3700 if (type != &PyString_Type) 3701 return str_subtype_new(type, args, kwds); 3702 if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x)) 3703 return NULL; 3704 if (x == NULL) 3705 return PyString_FromString(""); 3706 return PyObject_Str(x); 4076 3707 } 4077 3708 … … 4079 3710 str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 4080 3711 { 4081 4082 4083 4084 4085 4086 4087 4088 4089 4090 4091 4092 4093 4094 4095 4096 4097 4098 3712 PyObject *tmp, *pnew; 3713 Py_ssize_t n; 3714 3715 assert(PyType_IsSubtype(type, &PyString_Type)); 3716 tmp = string_new(&PyString_Type, args, kwds); 3717 if (tmp == NULL) 3718 return NULL; 3719 assert(PyString_CheckExact(tmp)); 3720 n = PyString_GET_SIZE(tmp); 3721 pnew = type->tp_alloc(type, n); 3722 if (pnew != NULL) { 3723 Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1); 3724 ((PyStringObject *)pnew)->ob_shash = 3725 ((PyStringObject *)tmp)->ob_shash; 3726 ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED; 3727 } 3728 Py_DECREF(tmp); 3729 return pnew; 4099 3730 } 4100 3731 … … 4102 3733 basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 4103 3734 { 4104 4105 4106 3735 PyErr_SetString(PyExc_TypeError, 3736 "The basestring type cannot be instantiated"); 3737 return NULL; 4107 3738 } 4108 3739 … … 4110 3741 string_mod(PyObject *v, PyObject *w) 4111 3742 { 4112 4113 4114 4115 4116 3743 if (!PyString_Check(v)) { 3744 Py_INCREF(Py_NotImplemented); 3745 return Py_NotImplemented; 3746 } 3747 return PyString_Format(v, w); 4117 3748 } 4118 3749 … … 4121 3752 4122 3753 static PyNumberMethods string_as_number = { 4123 0,/*nb_add*/4124 0,/*nb_subtract*/4125 0,/*nb_multiply*/4126 0,/*nb_divide*/4127 string_mod,/*nb_remainder*/3754 0, /*nb_add*/ 3755 0, /*nb_subtract*/ 3756 0, /*nb_multiply*/ 3757 0, /*nb_divide*/ 3758 string_mod, /*nb_remainder*/ 4128 3759 }; 4129 3760 4130 3761 4131 3762 PyTypeObject PyBaseString_Type = { 4132 4133 4134 4135 4136 0,/* tp_dealloc */4137 0,/* tp_print */4138 0,/* tp_getattr */4139 0,/* tp_setattr */4140 0,/* tp_compare */4141 0,/* tp_repr */4142 0,/* tp_as_number */4143 0,/* tp_as_sequence */4144 0,/* tp_as_mapping */4145 0,/* tp_hash */4146 0,/* tp_call */4147 0,/* tp_str */4148 0,/* tp_getattro */4149 0,/* tp_setattro */4150 0,/* tp_as_buffer */4151 4152 basestring_doc,/* tp_doc */4153 0,/* tp_traverse */4154 0,/* tp_clear */4155 0,/* tp_richcompare */4156 0,/* tp_weaklistoffset */4157 0,/* tp_iter */4158 0,/* tp_iternext */4159 0,/* tp_methods */4160 0,/* tp_members */4161 0,/* tp_getset */4162 &PyBaseObject_Type,/* tp_base */4163 0,/* tp_dict */4164 0,/* tp_descr_get */4165 0,/* tp_descr_set */4166 0,/* tp_dictoffset */4167 0,/* tp_init */4168 0,/* tp_alloc */4169 basestring_new,/* tp_new */4170 0,/* tp_free */3763 PyVarObject_HEAD_INIT(&PyType_Type, 0) 3764 "basestring", 3765 0, 3766 0, 3767 0, /* tp_dealloc */ 3768 0, /* tp_print */ 3769 0, /* tp_getattr */ 3770 0, /* tp_setattr */ 3771 0, /* tp_compare */ 3772 0, /* tp_repr */ 3773 0, /* tp_as_number */ 3774 0, /* tp_as_sequence */ 3775 0, /* tp_as_mapping */ 3776 0, /* tp_hash */ 3777 0, /* tp_call */ 3778 0, /* tp_str */ 3779 0, /* tp_getattro */ 3780 0, /* tp_setattro */ 3781 0, /* tp_as_buffer */ 3782 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */ 3783 basestring_doc, /* tp_doc */ 3784 0, /* tp_traverse */ 3785 0, /* tp_clear */ 3786 0, /* tp_richcompare */ 3787 0, /* tp_weaklistoffset */ 3788 0, /* tp_iter */ 3789 0, /* tp_iternext */ 3790 0, /* tp_methods */ 3791 0, /* tp_members */ 3792 0, /* tp_getset */ 3793 &PyBaseObject_Type, /* tp_base */ 3794 0, /* tp_dict */ 3795 0, /* tp_descr_get */ 3796 0, /* tp_descr_set */ 3797 0, /* tp_dictoffset */ 3798 0, /* tp_init */ 3799 0, /* tp_alloc */ 3800 basestring_new, /* tp_new */ 3801 0, /* tp_free */ 4171 3802 }; 4172 3803 4173 3804 PyDoc_STRVAR(string_doc, 4174 "str(object ) -> string\n\3805 "str(object='') -> string\n\ 4175 3806 \n\ 4176 3807 Return a nice string representation of the object.\n\ … … 4178 3809 4179 3810 PyTypeObject PyString_Type = { 4180 4181 4182 sizeof(PyStringObject),4183 4184 string_dealloc,/* tp_dealloc */4185 (printfunc)string_print,/* tp_print */4186 0,/* tp_getattr */4187 0,/* tp_setattr */4188 0,/* tp_compare */4189 string_repr,/* tp_repr */4190 &string_as_number,/* tp_as_number */4191 &string_as_sequence,/* tp_as_sequence */4192 &string_as_mapping,/* tp_as_mapping */4193 (hashfunc)string_hash,/* tp_hash */4194 0,/* tp_call */4195 string_str,/* tp_str */4196 PyObject_GenericGetAttr,/* tp_getattro */4197 0,/* tp_setattro */4198 &string_as_buffer,/* tp_as_buffer */4199 4200 4201 Py_TPFLAGS_HAVE_NEWBUFFER,/* tp_flags */4202 string_doc,/* tp_doc */4203 0,/* tp_traverse */4204 0,/* tp_clear */4205 (richcmpfunc)string_richcompare,/* tp_richcompare */4206 0,/* tp_weaklistoffset */4207 0,/* tp_iter */4208 0,/* tp_iternext */4209 string_methods,/* tp_methods */4210 0,/* tp_members */4211 0,/* tp_getset */4212 &PyBaseString_Type,/* tp_base */4213 0,/* tp_dict */4214 0,/* tp_descr_get */4215 0,/* tp_descr_set */4216 0,/* tp_dictoffset */4217 0,/* tp_init */4218 0,/* tp_alloc */4219 string_new,/* tp_new */4220 PyObject_Del,/* tp_free */3811 PyVarObject_HEAD_INIT(&PyType_Type, 0) 3812 "str", 3813 PyStringObject_SIZE, 3814 sizeof(char), 3815 string_dealloc, /* tp_dealloc */ 3816 (printfunc)string_print, /* tp_print */ 3817 0, /* tp_getattr */ 3818 0, /* tp_setattr */ 3819 0, /* tp_compare */ 3820 string_repr, /* tp_repr */ 3821 &string_as_number, /* tp_as_number */ 3822 &string_as_sequence, /* tp_as_sequence */ 3823 &string_as_mapping, /* tp_as_mapping */ 3824 (hashfunc)string_hash, /* tp_hash */ 3825 0, /* tp_call */ 3826 string_str, /* tp_str */ 3827 PyObject_GenericGetAttr, /* tp_getattro */ 3828 0, /* tp_setattro */ 3829 &string_as_buffer, /* tp_as_buffer */ 3830 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES | 3831 Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS | 3832 Py_TPFLAGS_HAVE_NEWBUFFER, /* tp_flags */ 3833 string_doc, /* tp_doc */ 3834 0, /* tp_traverse */ 3835 0, /* tp_clear */ 3836 (richcmpfunc)string_richcompare, /* tp_richcompare */ 3837 0, /* tp_weaklistoffset */ 3838 0, /* tp_iter */ 3839 0, /* tp_iternext */ 3840 string_methods, /* tp_methods */ 3841 0, /* tp_members */ 3842 0, /* tp_getset */ 3843 &PyBaseString_Type, /* tp_base */ 3844 0, /* tp_dict */ 3845 0, /* tp_descr_get */ 3846 0, /* tp_descr_set */ 3847 0, /* tp_dictoffset */ 3848 0, /* tp_init */ 3849 0, /* tp_alloc */ 3850 string_new, /* tp_new */ 3851 PyObject_Del, /* tp_free */ 4221 3852 }; 4222 3853 … … 4224 3855 PyString_Concat(register PyObject **pv, register PyObject *w) 4225 3856 { 4226 register PyObject *v; 4227 if (*pv == NULL) 4228 return; 4229 if (w == NULL || !PyString_Check(*pv)) { 4230 Py_DECREF(*pv); 4231 *pv = NULL; 4232 return; 4233 } 4234 v = string_concat((PyStringObject *) *pv, w); 4235 Py_DECREF(*pv); 4236 *pv = v; 3857 register PyObject *v; 3858 if (*pv == NULL) 3859 return; 3860 if (w == NULL || !PyString_Check(*pv)) { 3861 Py_CLEAR(*pv); 3862 return; 3863 } 3864 v = string_concat((PyStringObject *) *pv, w); 3865 Py_DECREF(*pv); 3866 *pv = v; 4237 3867 } 4238 3868 … … 4240 3870 PyString_ConcatAndDel(register PyObject **pv, register PyObject *w) 4241 3871 { 4242 4243 3872 PyString_Concat(pv, w); 3873 Py_XDECREF(w); 4244 3874 } 4245 3875 … … 4262 3892 _PyString_Resize(PyObject **pv, Py_ssize_t newsize) 4263 3893 { 4264 4265 4266 4267 4268 4269 4270 4271 4272 4273 4274 4275 4276 4277 4278 PyObject_REALLOC((char *)v, sizeof(PyStringObject)+ newsize);4279 4280 4281 4282 4283 4284 4285 4286 4287 4288 sv->ob_shash = -1;/* invalidate cached hash value */4289 3894 register PyObject *v; 3895 register PyStringObject *sv; 3896 v = *pv; 3897 if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 || 3898 PyString_CHECK_INTERNED(v)) { 3899 *pv = 0; 3900 Py_DECREF(v); 3901 PyErr_BadInternalCall(); 3902 return -1; 3903 } 3904 /* XXX UNREF/NEWREF interface should be more symmetrical */ 3905 _Py_DEC_REFTOTAL; 3906 _Py_ForgetReference(v); 3907 *pv = (PyObject *) 3908 PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize); 3909 if (*pv == NULL) { 3910 PyObject_Del(v); 3911 PyErr_NoMemory(); 3912 return -1; 3913 } 3914 _Py_NewReference(*pv); 3915 sv = (PyStringObject *) *pv; 3916 Py_SIZE(sv) = newsize; 3917 sv->ob_sval[newsize] = '\0'; 3918 sv->ob_shash = -1; /* invalidate cached hash value */ 3919 return 0; 4290 3920 } 4291 3921 … … 4295 3925 getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx) 4296 3926 { 4297 4298 4299 4300 4301 4302 4303 4304 4305 4306 4307 3927 Py_ssize_t argidx = *p_argidx; 3928 if (argidx < arglen) { 3929 (*p_argidx)++; 3930 if (arglen < 0) 3931 return args; 3932 else 3933 return PyTuple_GetItem(args, argidx); 3934 } 3935 PyErr_SetString(PyExc_TypeError, 3936 "not enough arguments for format string"); 3937 return NULL; 4308 3938 } 4309 3939 4310 3940 /* Format codes 4311 * F_LJUST 4312 * F_SIGN 4313 * F_BLANK 4314 * F_ALT 4315 * F_ZERO 3941 * F_LJUST '-' 3942 * F_SIGN '+' 3943 * F_BLANK ' ' 3944 * F_ALT '#' 3945 * F_ZERO '0' 4316 3946 */ 4317 3947 #define F_LJUST (1<<0) 4318 #define F_SIGN 3948 #define F_SIGN (1<<1) 4319 3949 #define F_BLANK (1<<2) 4320 #define F_ALT (1<<3) 4321 #define F_ZERO (1<<4) 4322 4323 Py_LOCAL_INLINE(int) 4324 formatfloat(char *buf, size_t buflen, int flags, 4325 int prec, int type, PyObject *v) 4326 { 4327 /* fmt = '%#.' + `prec` + `type` 4328 worst case length = 3 + 10 (len of INT_MAX) + 1 = 14 (use 20)*/ 4329 char fmt[20]; 4330 double x; 4331 x = PyFloat_AsDouble(v); 4332 if (x == -1.0 && PyErr_Occurred()) { 4333 PyErr_Format(PyExc_TypeError, "float argument required, " 4334 "not %.200s", Py_TYPE(v)->tp_name); 4335 return -1; 4336 } 4337 if (prec < 0) 4338 prec = 6; 4339 #if SIZEOF_INT > 4 4340 /* make sure that the decimal representation of precision really does 4341 need at most 10 digits: platforms with sizeof(int) == 8 exist! */ 4342 if (prec > 0x7fffffff) { 4343 PyErr_SetString(PyExc_OverflowError, 4344 "outrageously large precision " 4345 "for formatted float"); 4346 return -1; 4347 } 4348 #endif 4349 4350 if (type == 'f' && fabs(x) >= 1e50) 4351 type = 'g'; 4352 /* Worst case length calc to ensure no buffer overrun: 4353 4354 'g' formats: 4355 fmt = %#.<prec>g 4356 buf = '-' + [0-9]*prec + '.' + 'e+' + (longest exp 4357 for any double rep.) 4358 len = 1 + prec + 1 + 2 + 5 = 9 + prec 4359 4360 'f' formats: 4361 buf = '-' + [0-9]*x + '.' + [0-9]*prec (with x < 50) 4362 len = 1 + 50 + 1 + prec = 52 + prec 4363 4364 If prec=0 the effective precision is 1 (the leading digit is 4365 always given), therefore increase the length by one. 4366 4367 */ 4368 if (((type == 'g' || type == 'G') && 4369 buflen <= (size_t)10 + (size_t)prec) || 4370 (type == 'f' && buflen <= (size_t)53 + (size_t)prec)) { 4371 PyErr_SetString(PyExc_OverflowError, 4372 "formatted float is too long (precision too large?)"); 4373 return -1; 4374 } 4375 PyOS_snprintf(fmt, sizeof(fmt), "%%%s.%d%c", 4376 (flags&F_ALT) ? "#" : "", 4377 prec, type); 4378 PyOS_ascii_formatd(buf, buflen, fmt, x); 4379 return (int)strlen(buf); 3950 #define F_ALT (1<<3) 3951 #define F_ZERO (1<<4) 3952 3953 /* Returns a new reference to a PyString object, or NULL on failure. */ 3954 3955 static PyObject * 3956 formatfloat(PyObject *v, int flags, int prec, int type) 3957 { 3958 char *p; 3959 PyObject *result; 3960 double x; 3961 3962 x = PyFloat_AsDouble(v); 3963 if (x == -1.0 && PyErr_Occurred()) { 3964 PyErr_Format(PyExc_TypeError, "float argument required, " 3965 "not %.200s", Py_TYPE(v)->tp_name); 3966 return NULL; 3967 } 3968 3969 if (prec < 0) 3970 prec = 6; 3971 3972 p = PyOS_double_to_string(x, type, prec, 3973 (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL); 3974 3975 if (p == NULL) 3976 return NULL; 3977 result = PyString_FromStringAndSize(p, strlen(p)); 3978 PyMem_Free(p); 3979 return result; 4380 3980 } 4381 3981 … … 4393 3993 * There will be at least prec digits, zero-filled on the left if 4394 3994 * necessary to get that many. 4395 * val 4396 * flags 4397 * prec 4398 * type 3995 * val object to be converted 3996 * flags bitmask of format flags; only F_ALT is looked at 3997 * prec minimum number of digits; 0-fill on left if needed 3998 * type a character in [duoxX]; u acts the same as d 4399 3999 * 4400 4000 * CAUTION: o, x and X conversions on regular ints can never … … 4403 4003 PyObject* 4404 4004 _PyString_FormatLong(PyObject *val, int flags, int prec, int type, 4405 4406 { 4407 4408 4409 4410 int sign;/* 1 if '-', else 0 */4411 int len;/* number of characters */4412 4413 int numdigits;/* len == numnondigits + numdigits */4414 4415 4416 4417 4418 4419 4420 4421 4422 4423 4424 4425 4426 4427 4428 4429 4430 4431 4432 4433 4434 4435 4436 4437 4438 4439 4440 4441 4442 4443 4444 4445 4446 4447 4448 4449 4450 4451 4452 4453 4454 4455 4456 4457 4458 4459 4460 4461 4462 4463 4464 4465 4466 4467 4468 4469 4470 4471 4472 4473 4474 4475 4476 4477 4478 4479 4480 4481 4482 4483 4484 4485 4486 4487 4488 4489 4490 4491 4492 4493 4494 4495 4496 4497 4498 4499 4500 4501 4502 4503 4504 4505 4506 4507 4508 4509 4510 4511 4512 4513 4514 4515 4516 4517 4518 4519 4520 4521 4522 4523 4524 4525 4005 char **pbuf, int *plen) 4006 { 4007 PyObject *result = NULL; 4008 char *buf; 4009 Py_ssize_t i; 4010 int sign; /* 1 if '-', else 0 */ 4011 int len; /* number of characters */ 4012 Py_ssize_t llen; 4013 int numdigits; /* len == numnondigits + numdigits */ 4014 int numnondigits = 0; 4015 4016 switch (type) { 4017 case 'd': 4018 case 'u': 4019 result = Py_TYPE(val)->tp_str(val); 4020 break; 4021 case 'o': 4022 result = Py_TYPE(val)->tp_as_number->nb_oct(val); 4023 break; 4024 case 'x': 4025 case 'X': 4026 numnondigits = 2; 4027 result = Py_TYPE(val)->tp_as_number->nb_hex(val); 4028 break; 4029 default: 4030 assert(!"'type' not in [duoxX]"); 4031 } 4032 if (!result) 4033 return NULL; 4034 4035 buf = PyString_AsString(result); 4036 if (!buf) { 4037 Py_DECREF(result); 4038 return NULL; 4039 } 4040 4041 /* To modify the string in-place, there can only be one reference. */ 4042 if (Py_REFCNT(result) != 1) { 4043 PyErr_BadInternalCall(); 4044 return NULL; 4045 } 4046 llen = PyString_Size(result); 4047 if (llen > INT_MAX) { 4048 PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong"); 4049 return NULL; 4050 } 4051 len = (int)llen; 4052 if (buf[len-1] == 'L') { 4053 --len; 4054 buf[len] = '\0'; 4055 } 4056 sign = buf[0] == '-'; 4057 numnondigits += sign; 4058 numdigits = len - numnondigits; 4059 assert(numdigits > 0); 4060 4061 /* Get rid of base marker unless F_ALT */ 4062 if ((flags & F_ALT) == 0) { 4063 /* Need to skip 0x, 0X or 0. */ 4064 int skipped = 0; 4065 switch (type) { 4066 case 'o': 4067 assert(buf[sign] == '0'); 4068 /* If 0 is only digit, leave it alone. */ 4069 if (numdigits > 1) { 4070 skipped = 1; 4071 --numdigits; 4072 } 4073 break; 4074 case 'x': 4075 case 'X': 4076 assert(buf[sign] == '0'); 4077 assert(buf[sign + 1] == 'x'); 4078 skipped = 2; 4079 numnondigits -= 2; 4080 break; 4081 } 4082 if (skipped) { 4083 buf += skipped; 4084 len -= skipped; 4085 if (sign) 4086 buf[0] = '-'; 4087 } 4088 assert(len == numnondigits + numdigits); 4089 assert(numdigits > 0); 4090 } 4091 4092 /* Fill with leading zeroes to meet minimum width. */ 4093 if (prec > numdigits) { 4094 PyObject *r1 = PyString_FromStringAndSize(NULL, 4095 numnondigits + prec); 4096 char *b1; 4097 if (!r1) { 4098 Py_DECREF(result); 4099 return NULL; 4100 } 4101 b1 = PyString_AS_STRING(r1); 4102 for (i = 0; i < numnondigits; ++i) 4103 *b1++ = *buf++; 4104 for (i = 0; i < prec - numdigits; i++) 4105 *b1++ = '0'; 4106 for (i = 0; i < numdigits; i++) 4107 *b1++ = *buf++; 4108 *b1 = '\0'; 4109 Py_DECREF(result); 4110 result = r1; 4111 buf = PyString_AS_STRING(result); 4112 len = numnondigits + prec; 4113 } 4114 4115 /* Fix up case for hex conversions. */ 4116 if (type == 'X') { 4117 /* Need to convert all lower case letters to upper case. 4118 and need to convert 0x to 0X (and -0x to -0X). */ 4119 for (i = 0; i < len; i++) 4120 if (buf[i] >= 'a' && buf[i] <= 'x') 4121 buf[i] -= 'a'-'A'; 4122 } 4123 *pbuf = buf; 4124 *plen = len; 4125 return result; 4526 4126 } 4527 4127 … … 4530 4130 int prec, int type, PyObject *v) 4531 4131 { 4532 4533 4534 4535 char fmt[64];/* plenty big enough! */4536 4537 4538 4539 4540 4541 4542 4543 4544 4545 4546 4547 4548 4549 4550 4551 4552 4553 4554 4555 4556 4557 4558 4559 4560 4561 4562 4563 4564 4565 4566 4567 4568 4569 4570 4571 4572 4573 4574 4575 4576 4577 4578 4579 4580 4581 4582 4583 4584 4585 4586 4587 4588 4589 4590 4591 4592 4593 4594 4595 4596 4597 4598 4132 /* fmt = '%#.' + `prec` + 'l' + `type` 4133 worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine) 4134 + 1 + 1 = 24 */ 4135 char fmt[64]; /* plenty big enough! */ 4136 char *sign; 4137 long x; 4138 4139 x = PyInt_AsLong(v); 4140 if (x == -1 && PyErr_Occurred()) { 4141 PyErr_Format(PyExc_TypeError, "int argument required, not %.200s", 4142 Py_TYPE(v)->tp_name); 4143 return -1; 4144 } 4145 if (x < 0 && type == 'u') { 4146 type = 'd'; 4147 } 4148 if (x < 0 && (type == 'x' || type == 'X' || type == 'o')) 4149 sign = "-"; 4150 else 4151 sign = ""; 4152 if (prec < 0) 4153 prec = 1; 4154 4155 if ((flags & F_ALT) && 4156 (type == 'x' || type == 'X')) { 4157 /* When converting under %#x or %#X, there are a number 4158 * of issues that cause pain: 4159 * - when 0 is being converted, the C standard leaves off 4160 * the '0x' or '0X', which is inconsistent with other 4161 * %#x/%#X conversions and inconsistent with Python's 4162 * hex() function 4163 * - there are platforms that violate the standard and 4164 * convert 0 with the '0x' or '0X' 4165 * (Metrowerks, Compaq Tru64) 4166 * - there are platforms that give '0x' when converting 4167 * under %#X, but convert 0 in accordance with the 4168 * standard (OS/2 EMX) 4169 * 4170 * We can achieve the desired consistency by inserting our 4171 * own '0x' or '0X' prefix, and substituting %x/%X in place 4172 * of %#x/%#X. 4173 * 4174 * Note that this is the same approach as used in 4175 * formatint() in unicodeobject.c 4176 */ 4177 PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c", 4178 sign, type, prec, type); 4179 } 4180 else { 4181 PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c", 4182 sign, (flags&F_ALT) ? "#" : "", 4183 prec, type); 4184 } 4185 4186 /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal)) 4187 * worst case buf = '-0x' + [0-9]*prec, where prec >= 11 4188 */ 4189 if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) { 4190 PyErr_SetString(PyExc_OverflowError, 4191 "formatted integer is too long (precision too large?)"); 4192 return -1; 4193 } 4194 if (sign[0]) 4195 PyOS_snprintf(buf, buflen, fmt, -x); 4196 else 4197 PyOS_snprintf(buf, buflen, fmt, x); 4198 return (int)strlen(buf); 4599 4199 } 4600 4200 … … 4602 4202 formatchar(char *buf, size_t buflen, PyObject *v) 4603 4203 { 4604 4605 4606 4607 4608 4609 4610 4611 4612 4613 4614 4204 /* presume that the buffer is at least 2 characters long */ 4205 if (PyString_Check(v)) { 4206 if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0])) 4207 return -1; 4208 } 4209 else { 4210 if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0])) 4211 return -1; 4212 } 4213 buf[1] = '\0'; 4214 return 1; 4615 4215 } 4616 4216 4617 4217 /* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) 4618 4218 4619 FORMATBUFLEN is the length of the buffer in which the floats, ints,&4219 FORMATBUFLEN is the length of the buffer in which the ints & 4620 4220 chars are formatted. XXX This is a magic number. Each formatting 4621 4221 routine does bounds checking to ensure no overflow, but a better … … 4628 4228 PyString_Format(PyObject *format, PyObject *args) 4629 4229 { 4630 4631 4632 4633 4634 4230 char *fmt, *res; 4231 Py_ssize_t arglen, argidx; 4232 Py_ssize_t reslen, rescnt, fmtcnt; 4233 int args_owned = 0; 4234 PyObject *result, *orig_args; 4635 4235 #ifdef Py_USING_UNICODE 4636 4236 PyObject *v, *w; 4637 4237 #endif 4638 4639 4640 4641 4642 4643 4644 4645 4646 4647 4648 4649 4650 4651 4652 4653 4654 4655 4656 4657 4658 4659 if (Py_TYPE(args)->tp_as_mapping && !PyTuple_Check(args)&&4660 4661 4662 4663 4664 4665 4666 4667 if (_PyString_Resize(&result, reslen) < 0)4668 4669 4670 4671 4672 4673 4674 4675 4676 4677 4678 4679 4680 4681 4682 4683 4684 4685 4686 4687 4688 4689 /* For format{float,int,char}() */4238 PyObject *dict = NULL; 4239 if (format == NULL || !PyString_Check(format) || args == NULL) { 4240 PyErr_BadInternalCall(); 4241 return NULL; 4242 } 4243 orig_args = args; 4244 fmt = PyString_AS_STRING(format); 4245 fmtcnt = PyString_GET_SIZE(format); 4246 reslen = rescnt = fmtcnt + 100; 4247 result = PyString_FromStringAndSize((char *)NULL, reslen); 4248 if (result == NULL) 4249 return NULL; 4250 res = PyString_AsString(result); 4251 if (PyTuple_Check(args)) { 4252 arglen = PyTuple_GET_SIZE(args); 4253 argidx = 0; 4254 } 4255 else { 4256 arglen = -1; 4257 argidx = -2; 4258 } 4259 if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript && 4260 !PyTuple_Check(args) && !PyObject_TypeCheck(args, &PyBaseString_Type)) 4261 dict = args; 4262 while (--fmtcnt >= 0) { 4263 if (*fmt != '%') { 4264 if (--rescnt < 0) { 4265 rescnt = fmtcnt + 100; 4266 reslen += rescnt; 4267 if (_PyString_Resize(&result, reslen)) 4268 return NULL; 4269 res = PyString_AS_STRING(result) 4270 + reslen - rescnt; 4271 --rescnt; 4272 } 4273 *res++ = *fmt++; 4274 } 4275 else { 4276 /* Got a format specifier */ 4277 int flags = 0; 4278 Py_ssize_t width = -1; 4279 int prec = -1; 4280 int c = '\0'; 4281 int fill; 4282 int isnumok; 4283 PyObject *v = NULL; 4284 PyObject *temp = NULL; 4285 char *pbuf; 4286 int sign; 4287 Py_ssize_t len; 4288 char formatbuf[FORMATBUFLEN]; 4289 /* For format{int,char}() */ 4690 4290 #ifdef Py_USING_UNICODE 4691 4692 4291 char *fmt_start = fmt; 4292 Py_ssize_t argidx_start = argidx; 4693 4293 #endif 4694 4294 4695 fmt++; 4696 if (*fmt == '(') { 4697 char *keystart; 4698 Py_ssize_t keylen; 4699 PyObject *key; 4700 int pcount = 1; 4701 4702 if (dict == NULL) { 4703 PyErr_SetString(PyExc_TypeError, 4704 "format requires a mapping"); 4705 goto error; 4706 } 4707 ++fmt; 4708 --fmtcnt; 4709 keystart = fmt; 4710 /* Skip over balanced parentheses */ 4711 while (pcount > 0 && --fmtcnt >= 0) { 4712 if (*fmt == ')') 4713 --pcount; 4714 else if (*fmt == '(') 4715 ++pcount; 4716 fmt++; 4717 } 4718 keylen = fmt - keystart - 1; 4719 if (fmtcnt < 0 || pcount > 0) { 4720 PyErr_SetString(PyExc_ValueError, 4721 "incomplete format key"); 4722 goto error; 4723 } 4724 key = PyString_FromStringAndSize(keystart, 4725 keylen); 4726 if (key == NULL) 4727 goto error; 4728 if (args_owned) { 4729 Py_DECREF(args); 4730 args_owned = 0; 4731 } 4732 args = PyObject_GetItem(dict, key); 4733 Py_DECREF(key); 4734 if (args == NULL) { 4735 goto error; 4736 } 4737 args_owned = 1; 4738 arglen = -1; 4739 argidx = -2; 4740 } 4741 while (--fmtcnt >= 0) { 4742 switch (c = *fmt++) { 4743 case '-': flags |= F_LJUST; continue; 4744 case '+': flags |= F_SIGN; continue; 4745 case ' ': flags |= F_BLANK; continue; 4746 case '#': flags |= F_ALT; continue; 4747 case '0': flags |= F_ZERO; continue; 4748 } 4749 break; 4750 } 4751 if (c == '*') { 4752 v = getnextarg(args, arglen, &argidx); 4753 if (v == NULL) 4754 goto error; 4755 if (!PyInt_Check(v)) { 4756 PyErr_SetString(PyExc_TypeError, 4757 "* wants int"); 4758 goto error; 4759 } 4760 width = PyInt_AsLong(v); 4761 if (width < 0) { 4762 flags |= F_LJUST; 4763 width = -width; 4764 } 4765 if (--fmtcnt >= 0) 4766 c = *fmt++; 4767 } 4768 else if (c >= 0 && isdigit(c)) { 4769 width = c - '0'; 4770 while (--fmtcnt >= 0) { 4771 c = Py_CHARMASK(*fmt++); 4772 if (!isdigit(c)) 4773 break; 4774 if ((width*10) / 10 != width) { 4775 PyErr_SetString( 4776 PyExc_ValueError, 4777 "width too big"); 4778 goto error; 4779 } 4780 width = width*10 + (c - '0'); 4781 } 4782 } 4783 if (c == '.') { 4784 prec = 0; 4785 if (--fmtcnt >= 0) 4786 c = *fmt++; 4787 if (c == '*') { 4788 v = getnextarg(args, arglen, &argidx); 4789 if (v == NULL) 4790 goto error; 4791 if (!PyInt_Check(v)) { 4792 PyErr_SetString( 4793 PyExc_TypeError, 4794 "* wants int"); 4795 goto error; 4796 } 4797 prec = PyInt_AsLong(v); 4798 if (prec < 0) 4799 prec = 0; 4800 if (--fmtcnt >= 0) 4801 c = *fmt++; 4802 } 4803 else if (c >= 0 && isdigit(c)) { 4804 prec = c - '0'; 4805 while (--fmtcnt >= 0) { 4806 c = Py_CHARMASK(*fmt++); 4807 if (!isdigit(c)) 4808 break; 4809 if ((prec*10) / 10 != prec) { 4810 PyErr_SetString( 4811 PyExc_ValueError, 4812 "prec too big"); 4813 goto error; 4814 } 4815 prec = prec*10 + (c - '0'); 4816 } 4817 } 4818 } /* prec */ 4819 if (fmtcnt >= 0) { 4820 if (c == 'h' || c == 'l' || c == 'L') { 4821 if (--fmtcnt >= 0) 4822 c = *fmt++; 4823 } 4824 } 4825 if (fmtcnt < 0) { 4826 PyErr_SetString(PyExc_ValueError, 4827 "incomplete format"); 4828 goto error; 4829 } 4830 if (c != '%') { 4831 v = getnextarg(args, arglen, &argidx); 4832 if (v == NULL) 4833 goto error; 4834 } 4835 sign = 0; 4836 fill = ' '; 4837 switch (c) { 4838 case '%': 4839 pbuf = "%"; 4840 len = 1; 4841 break; 4842 case 's': 4295 fmt++; 4296 if (*fmt == '(') { 4297 char *keystart; 4298 Py_ssize_t keylen; 4299 PyObject *key; 4300 int pcount = 1; 4301 4302 if (dict == NULL) { 4303 PyErr_SetString(PyExc_TypeError, 4304 "format requires a mapping"); 4305 goto error; 4306 } 4307 ++fmt; 4308 --fmtcnt; 4309 keystart = fmt; 4310 /* Skip over balanced parentheses */ 4311 while (pcount > 0 && --fmtcnt >= 0) { 4312 if (*fmt == ')') 4313 --pcount; 4314 else if (*fmt == '(') 4315 ++pcount; 4316 fmt++; 4317 } 4318 keylen = fmt - keystart - 1; 4319 if (fmtcnt < 0 || pcount > 0) { 4320 PyErr_SetString(PyExc_ValueError, 4321 "incomplete format key"); 4322 goto error; 4323 } 4324 key = PyString_FromStringAndSize(keystart, 4325 keylen); 4326 if (key == NULL) 4327 goto error; 4328 if (args_owned) { 4329 Py_DECREF(args); 4330 args_owned = 0; 4331 } 4332 args = PyObject_GetItem(dict, key); 4333 Py_DECREF(key); 4334 if (args == NULL) { 4335 goto error; 4336 } 4337 args_owned = 1; 4338 arglen = -1; 4339 argidx = -2; 4340 } 4341 while (--fmtcnt >= 0) { 4342 switch (c = *fmt++) { 4343 case '-': flags |= F_LJUST; continue; 4344 case '+': flags |= F_SIGN; continue; 4345 case ' ': flags |= F_BLANK; continue; 4346 case '#': flags |= F_ALT; continue; 4347 case '0': flags |= F_ZERO; continue; 4348 } 4349 break; 4350 } 4351 if (c == '*') { 4352 v = getnextarg(args, arglen, &argidx); 4353 if (v == NULL) 4354 goto error; 4355 if (!PyInt_Check(v)) { 4356 PyErr_SetString(PyExc_TypeError, 4357 "* wants int"); 4358 goto error; 4359 } 4360 width = PyInt_AsSsize_t(v); 4361 if (width == -1 && PyErr_Occurred()) 4362 goto error; 4363 if (width < 0) { 4364 flags |= F_LJUST; 4365 width = -width; 4366 } 4367 if (--fmtcnt >= 0) 4368 c = *fmt++; 4369 } 4370 else if (c >= 0 && isdigit(c)) { 4371 width = c - '0'; 4372 while (--fmtcnt >= 0) { 4373 c = Py_CHARMASK(*fmt++); 4374 if (!isdigit(c)) 4375 break; 4376 if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) { 4377 PyErr_SetString( 4378 PyExc_ValueError, 4379 "width too big"); 4380 goto error; 4381 } 4382 width = width*10 + (c - '0'); 4383 } 4384 } 4385 if (c == '.') { 4386 prec = 0; 4387 if (--fmtcnt >= 0) 4388 c = *fmt++; 4389 if (c == '*') { 4390 v = getnextarg(args, arglen, &argidx); 4391 if (v == NULL) 4392 goto error; 4393 if (!PyInt_Check(v)) { 4394 PyErr_SetString( 4395 PyExc_TypeError, 4396 "* wants int"); 4397 goto error; 4398 } 4399 prec = _PyInt_AsInt(v); 4400 if (prec == -1 && PyErr_Occurred()) 4401 goto error; 4402 if (prec < 0) 4403 prec = 0; 4404 if (--fmtcnt >= 0) 4405 c = *fmt++; 4406 } 4407 else if (c >= 0 && isdigit(c)) { 4408 prec = c - '0'; 4409 while (--fmtcnt >= 0) { 4410 c = Py_CHARMASK(*fmt++); 4411 if (!isdigit(c)) 4412 break; 4413 if (prec > (INT_MAX - ((int)c - '0')) / 10) { 4414 PyErr_SetString( 4415 PyExc_ValueError, 4416 "prec too big"); 4417 goto error; 4418 } 4419 prec = prec*10 + (c - '0'); 4420 } 4421 } 4422 } /* prec */ 4423 if (fmtcnt >= 0) { 4424 if (c == 'h' || c == 'l' || c == 'L') { 4425 if (--fmtcnt >= 0) 4426 c = *fmt++; 4427 } 4428 } 4429 if (fmtcnt < 0) { 4430 PyErr_SetString(PyExc_ValueError, 4431 "incomplete format"); 4432 goto error; 4433 } 4434 if (c != '%') { 4435 v = getnextarg(args, arglen, &argidx); 4436 if (v == NULL) 4437 goto error; 4438 } 4439 sign = 0; 4440 fill = ' '; 4441 switch (c) { 4442 case '%': 4443 pbuf = "%"; 4444 len = 1; 4445 break; 4446 case 's': 4843 4447 #ifdef Py_USING_UNICODE 4844 4845 4846 4847 4848 4448 if (PyUnicode_Check(v)) { 4449 fmt = fmt_start; 4450 argidx = argidx_start; 4451 goto unicode; 4452 } 4849 4453 #endif 4850 4454 temp = _PyObject_Str(v); 4851 4455 #ifdef Py_USING_UNICODE 4852 4853 4854 4855 4856 4857 4456 if (temp != NULL && PyUnicode_Check(temp)) { 4457 Py_DECREF(temp); 4458 fmt = fmt_start; 4459 argidx = argidx_start; 4460 goto unicode; 4461 } 4858 4462 #endif 4859 /* Fall through */ 4860 case 'r': 4861 if (c == 'r') 4862 temp = PyObject_Repr(v); 4863 if (temp == NULL) 4864 goto error; 4865 if (!PyString_Check(temp)) { 4866 PyErr_SetString(PyExc_TypeError, 4867 "%s argument has non-string str()"); 4868 Py_DECREF(temp); 4869 goto error; 4870 } 4871 pbuf = PyString_AS_STRING(temp); 4872 len = PyString_GET_SIZE(temp); 4873 if (prec >= 0 && len > prec) 4874 len = prec; 4875 break; 4876 case 'i': 4877 case 'd': 4878 case 'u': 4879 case 'o': 4880 case 'x': 4881 case 'X': 4882 if (c == 'i') 4883 c = 'd'; 4884 isnumok = 0; 4885 if (PyNumber_Check(v)) { 4886 PyObject *iobj=NULL; 4887 4888 if (PyInt_Check(v) || (PyLong_Check(v))) { 4889 iobj = v; 4890 Py_INCREF(iobj); 4891 } 4892 else { 4893 iobj = PyNumber_Int(v); 4894 if (iobj==NULL) iobj = PyNumber_Long(v); 4895 } 4896 if (iobj!=NULL) { 4897 if (PyInt_Check(iobj)) { 4898 isnumok = 1; 4899 pbuf = formatbuf; 4900 len = formatint(pbuf, 4901 sizeof(formatbuf), 4902 flags, prec, c, iobj); 4903 Py_DECREF(iobj); 4904 if (len < 0) 4905 goto error; 4906 sign = 1; 4907 } 4908 else if (PyLong_Check(iobj)) { 4909 int ilen; 4910 4911 isnumok = 1; 4912 temp = _PyString_FormatLong(iobj, flags, 4913 prec, c, &pbuf, &ilen); 4914 Py_DECREF(iobj); 4915 len = ilen; 4916 if (!temp) 4917 goto error; 4918 sign = 1; 4919 } 4920 else { 4921 Py_DECREF(iobj); 4922 } 4923 } 4924 } 4925 if (!isnumok) { 4926 PyErr_Format(PyExc_TypeError, 4927 "%%%c format: a number is required, " 4928 "not %.200s", c, Py_TYPE(v)->tp_name); 4929 goto error; 4930 } 4931 if (flags & F_ZERO) 4932 fill = '0'; 4933 break; 4934 case 'e': 4935 case 'E': 4936 case 'f': 4937 case 'F': 4938 case 'g': 4939 case 'G': 4940 if (c == 'F') 4941 c = 'f'; 4942 pbuf = formatbuf; 4943 len = formatfloat(pbuf, sizeof(formatbuf), 4944 flags, prec, c, v); 4945 if (len < 0) 4946 goto error; 4947 sign = 1; 4948 if (flags & F_ZERO) 4949 fill = '0'; 4950 break; 4951 case 'c': 4463 /* Fall through */ 4464 case 'r': 4465 if (c == 'r') 4466 temp = PyObject_Repr(v); 4467 if (temp == NULL) 4468 goto error; 4469 if (!PyString_Check(temp)) { 4470 PyErr_SetString(PyExc_TypeError, 4471 "%s argument has non-string str()"); 4472 Py_DECREF(temp); 4473 goto error; 4474 } 4475 pbuf = PyString_AS_STRING(temp); 4476 len = PyString_GET_SIZE(temp); 4477 if (prec >= 0 && len > prec) 4478 len = prec; 4479 break; 4480 case 'i': 4481 case 'd': 4482 case 'u': 4483 case 'o': 4484 case 'x': 4485 case 'X': 4486 if (c == 'i') 4487 c = 'd'; 4488 isnumok = 0; 4489 if (PyNumber_Check(v)) { 4490 PyObject *iobj=NULL; 4491 4492 if (PyInt_Check(v) || (PyLong_Check(v))) { 4493 iobj = v; 4494 Py_INCREF(iobj); 4495 } 4496 else { 4497 iobj = PyNumber_Int(v); 4498 if (iobj==NULL) { 4499 PyErr_Clear(); 4500 iobj = PyNumber_Long(v); 4501 } 4502 } 4503 if (iobj!=NULL) { 4504 if (PyInt_Check(iobj)) { 4505 isnumok = 1; 4506 pbuf = formatbuf; 4507 len = formatint(pbuf, 4508 sizeof(formatbuf), 4509 flags, prec, c, iobj); 4510 Py_DECREF(iobj); 4511 if (len < 0) 4512 goto error; 4513 sign = 1; 4514 } 4515 else if (PyLong_Check(iobj)) { 4516 int ilen; 4517 4518 isnumok = 1; 4519 temp = _PyString_FormatLong(iobj, flags, 4520 prec, c, &pbuf, &ilen); 4521 Py_DECREF(iobj); 4522 len = ilen; 4523 if (!temp) 4524 goto error; 4525 sign = 1; 4526 } 4527 else { 4528 Py_DECREF(iobj); 4529 } 4530 } 4531 } 4532 if (!isnumok) { 4533 PyErr_Format(PyExc_TypeError, 4534 "%%%c format: a number is required, " 4535 "not %.200s", c, Py_TYPE(v)->tp_name); 4536 goto error; 4537 } 4538 if (flags & F_ZERO) 4539 fill = '0'; 4540 break; 4541 case 'e': 4542 case 'E': 4543 case 'f': 4544 case 'F': 4545 case 'g': 4546 case 'G': 4547 temp = formatfloat(v, flags, prec, c); 4548 if (temp == NULL) 4549 goto error; 4550 pbuf = PyString_AS_STRING(temp); 4551 len = PyString_GET_SIZE(temp); 4552 sign = 1; 4553 if (flags & F_ZERO) 4554 fill = '0'; 4555 break; 4556 case 'c': 4952 4557 #ifdef Py_USING_UNICODE 4953 4954 4955 4956 4957 4558 if (PyUnicode_Check(v)) { 4559 fmt = fmt_start; 4560 argidx = argidx_start; 4561 goto unicode; 4562 } 4958 4563 #endif 4959 pbuf = formatbuf; 4960 len = formatchar(pbuf, sizeof(formatbuf), v); 4961 if (len < 0) 4962 goto error; 4963 break; 4964 default: 4965 PyErr_Format(PyExc_ValueError, 4966 "unsupported format character '%c' (0x%x) " 4967 "at index %zd", 4968 c, c, 4969 (Py_ssize_t)(fmt - 1 - 4970 PyString_AsString(format))); 4971 goto error; 4972 } 4973 if (sign) { 4974 if (*pbuf == '-' || *pbuf == '+') { 4975 sign = *pbuf++; 4976 len--; 4977 } 4978 else if (flags & F_SIGN) 4979 sign = '+'; 4980 else if (flags & F_BLANK) 4981 sign = ' '; 4982 else 4983 sign = 0; 4984 } 4985 if (width < len) 4986 width = len; 4987 if (rescnt - (sign != 0) < width) { 4988 reslen -= rescnt; 4989 rescnt = width + fmtcnt + 100; 4990 reslen += rescnt; 4991 if (reslen < 0) { 4992 Py_DECREF(result); 4993 Py_XDECREF(temp); 4994 return PyErr_NoMemory(); 4995 } 4996 if (_PyString_Resize(&result, reslen) < 0) { 4997 Py_XDECREF(temp); 4998 return NULL; 4999 } 5000 res = PyString_AS_STRING(result) 5001 + reslen - rescnt; 5002 } 5003 if (sign) { 5004 if (fill != ' ') 5005 *res++ = sign; 5006 rescnt--; 5007 if (width > len) 5008 width--; 5009 } 5010 if ((flags & F_ALT) && (c == 'x' || c == 'X')) { 5011 assert(pbuf[0] == '0'); 5012 assert(pbuf[1] == c); 5013 if (fill != ' ') { 5014 *res++ = *pbuf++; 5015 *res++ = *pbuf++; 5016 } 5017 rescnt -= 2; 5018 width -= 2; 5019 if (width < 0) 5020 width = 0; 5021 len -= 2; 5022 } 5023 if (width > len && !(flags & F_LJUST)) { 5024 do { 5025 --rescnt; 5026 *res++ = fill; 5027 } while (--width > len); 5028 } 5029 if (fill == ' ') { 5030 if (sign) 5031 *res++ = sign; 5032 if ((flags & F_ALT) && 5033 (c == 'x' || c == 'X')) { 5034 assert(pbuf[0] == '0'); 5035 assert(pbuf[1] == c); 5036 *res++ = *pbuf++; 5037 *res++ = *pbuf++; 5038 } 5039 } 5040 Py_MEMCPY(res, pbuf, len); 5041 res += len; 5042 rescnt -= len; 5043 while (--width >= len) { 5044 --rescnt; 5045 *res++ = ' '; 5046 } 5047 if (dict && (argidx < arglen) && c != '%') { 5048 PyErr_SetString(PyExc_TypeError, 5049 "not all arguments converted during string formatting"); 5050 Py_XDECREF(temp); 5051 goto error; 5052 } 5053 Py_XDECREF(temp); 5054 } /* '%' */ 5055 } /* until end */ 5056 if (argidx < arglen && !dict) { 5057 PyErr_SetString(PyExc_TypeError, 5058 "not all arguments converted during string formatting"); 5059 goto error; 5060 } 5061 if (args_owned) { 5062 Py_DECREF(args); 5063 } 5064 _PyString_Resize(&result, reslen - rescnt); 5065 return result; 4564 pbuf = formatbuf; 4565 len = formatchar(pbuf, sizeof(formatbuf), v); 4566 if (len < 0) 4567 goto error; 4568 break; 4569 default: 4570 PyErr_Format(PyExc_ValueError, 4571 "unsupported format character '%c' (0x%x) " 4572 "at index %zd", 4573 c, c, 4574 (Py_ssize_t)(fmt - 1 - 4575 PyString_AsString(format))); 4576 goto error; 4577 } 4578 if (sign) { 4579 if (*pbuf == '-' || *pbuf == '+') { 4580 sign = *pbuf++; 4581 len--; 4582 } 4583 else if (flags & F_SIGN) 4584 sign = '+'; 4585 else if (flags & F_BLANK) 4586 sign = ' '; 4587 else 4588 sign = 0; 4589 } 4590 if (width < len) 4591 width = len; 4592 if (rescnt - (sign != 0) < width) { 4593 reslen -= rescnt; 4594 rescnt = width + fmtcnt + 100; 4595 reslen += rescnt; 4596 if (reslen < 0) { 4597 Py_DECREF(result); 4598 Py_XDECREF(temp); 4599 return PyErr_NoMemory(); 4600 } 4601 if (_PyString_Resize(&result, reslen)) { 4602 Py_XDECREF(temp); 4603 return NULL; 4604 } 4605 res = PyString_AS_STRING(result) 4606 + reslen - rescnt; 4607 } 4608 if (sign) { 4609 if (fill != ' ') 4610 *res++ = sign; 4611 rescnt--; 4612 if (width > len) 4613 width--; 4614 } 4615 if ((flags & F_ALT) && (c == 'x' || c == 'X')) { 4616 assert(pbuf[0] == '0'); 4617 assert(pbuf[1] == c); 4618 if (fill != ' ') { 4619 *res++ = *pbuf++; 4620 *res++ = *pbuf++; 4621 } 4622 rescnt -= 2; 4623 width -= 2; 4624 if (width < 0) 4625 width = 0; 4626 len -= 2; 4627 } 4628 if (width > len && !(flags & F_LJUST)) { 4629 do { 4630 --rescnt; 4631 *res++ = fill; 4632 } while (--width > len); 4633 } 4634 if (fill == ' ') { 4635 if (sign) 4636 *res++ = sign; 4637 if ((flags & F_ALT) && 4638 (c == 'x' || c == 'X')) { 4639 assert(pbuf[0] == '0'); 4640 assert(pbuf[1] == c); 4641 *res++ = *pbuf++; 4642 *res++ = *pbuf++; 4643 } 4644 } 4645 Py_MEMCPY(res, pbuf, len); 4646 res += len; 4647 rescnt -= len; 4648 while (--width >= len) { 4649 --rescnt; 4650 *res++ = ' '; 4651 } 4652 if (dict && (argidx < arglen) && c != '%') { 4653 PyErr_SetString(PyExc_TypeError, 4654 "not all arguments converted during string formatting"); 4655 Py_XDECREF(temp); 4656 goto error; 4657 } 4658 Py_XDECREF(temp); 4659 } /* '%' */ 4660 } /* until end */ 4661 if (argidx < arglen && !dict) { 4662 PyErr_SetString(PyExc_TypeError, 4663 "not all arguments converted during string formatting"); 4664 goto error; 4665 } 4666 if (args_owned) { 4667 Py_DECREF(args); 4668 } 4669 if (_PyString_Resize(&result, reslen - rescnt)) 4670 return NULL; 4671 return result; 5066 4672 5067 4673 #ifdef Py_USING_UNICODE 5068 4674 unicode: 5069 5070 5071 5072 5073 5074 5075 5076 5077 5078 5079 5080 5081 5082 5083 5084 5085 5086 5087 5088 5089 5090 5091 5092 5093 5094 5095 5096 5097 5098 5099 5100 5101 5102 5103 5104 5105 5106 5107 5108 5109 5110 5111 4675 if (args_owned) { 4676 Py_DECREF(args); 4677 args_owned = 0; 4678 } 4679 /* Fiddle args right (remove the first argidx arguments) */ 4680 if (PyTuple_Check(orig_args) && argidx > 0) { 4681 PyObject *v; 4682 Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx; 4683 v = PyTuple_New(n); 4684 if (v == NULL) 4685 goto error; 4686 while (--n >= 0) { 4687 PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx); 4688 Py_INCREF(w); 4689 PyTuple_SET_ITEM(v, n, w); 4690 } 4691 args = v; 4692 } else { 4693 Py_INCREF(orig_args); 4694 args = orig_args; 4695 } 4696 args_owned = 1; 4697 /* Take what we have of the result and let the Unicode formatting 4698 function format the rest of the input. */ 4699 rescnt = res - PyString_AS_STRING(result); 4700 if (_PyString_Resize(&result, rescnt)) 4701 goto error; 4702 fmtcnt = PyString_GET_SIZE(format) - \ 4703 (fmt - PyString_AS_STRING(format)); 4704 format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL); 4705 if (format == NULL) 4706 goto error; 4707 v = PyUnicode_Format(format, args); 4708 Py_DECREF(format); 4709 if (v == NULL) 4710 goto error; 4711 /* Paste what we have (result) to what the Unicode formatting 4712 function returned (v) and return the result (or error) */ 4713 w = PyUnicode_Concat(result, v); 4714 Py_DECREF(result); 4715 Py_DECREF(v); 4716 Py_DECREF(args); 4717 return w; 5112 4718 #endif /* Py_USING_UNICODE */ 5113 4719 5114 4720 error: 5115 5116 5117 5118 5119 4721 Py_DECREF(result); 4722 if (args_owned) { 4723 Py_DECREF(args); 4724 } 4725 return NULL; 5120 4726 } 5121 4727 … … 5123 4729 PyString_InternInPlace(PyObject **p) 5124 4730 { 5125 5126 5127 5128 5129 5130 5131 5132 5133 5134 5135 5136 5137 5138 5139 5140 5141 5142 5143 5144 5145 5146 5147 5148 5149 5150 5151 5152 5153 5154 5155 5156 5157 4731 register PyStringObject *s = (PyStringObject *)(*p); 4732 PyObject *t; 4733 if (s == NULL || !PyString_Check(s)) 4734 Py_FatalError("PyString_InternInPlace: strings only please!"); 4735 /* If it's a string subclass, we don't really know what putting 4736 it in the interned dict might do. */ 4737 if (!PyString_CheckExact(s)) 4738 return; 4739 if (PyString_CHECK_INTERNED(s)) 4740 return; 4741 if (interned == NULL) { 4742 interned = PyDict_New(); 4743 if (interned == NULL) { 4744 PyErr_Clear(); /* Don't leave an exception */ 4745 return; 4746 } 4747 } 4748 t = PyDict_GetItem(interned, (PyObject *)s); 4749 if (t) { 4750 Py_INCREF(t); 4751 Py_DECREF(*p); 4752 *p = t; 4753 return; 4754 } 4755 4756 if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) { 4757 PyErr_Clear(); 4758 return; 4759 } 4760 /* The two references in interned are not counted by refcnt. 4761 The string deallocator will take care of this */ 4762 Py_REFCNT(s) -= 2; 4763 PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL; 5158 4764 } 5159 4765 … … 5161 4767 PyString_InternImmortal(PyObject **p) 5162 4768 { 5163 5164 5165 5166 5167 4769 PyString_InternInPlace(p); 4770 if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) { 4771 PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL; 4772 Py_INCREF(*p); 4773 } 5168 4774 } 5169 4775 … … 5172 4778 PyString_InternFromString(const char *cp) 5173 4779 { 5174 5175 5176 5177 5178 4780 PyObject *s = PyString_FromString(cp); 4781 if (s == NULL) 4782 return NULL; 4783 PyString_InternInPlace(&s); 4784 return s; 5179 4785 } 5180 4786 … … 5182 4788 PyString_Fini(void) 5183 4789 { 5184 int i; 5185 for (i = 0; i < UCHAR_MAX + 1; i++) { 5186 Py_XDECREF(characters[i]); 5187 characters[i] = NULL; 5188 } 5189 Py_XDECREF(nullstring); 5190 nullstring = NULL; 4790 int i; 4791 for (i = 0; i < UCHAR_MAX + 1; i++) 4792 Py_CLEAR(characters[i]); 4793 Py_CLEAR(nullstring); 5191 4794 } 5192 4795 5193 4796 void _Py_ReleaseInternedStrings(void) 5194 4797 { 5195 PyObject *keys; 5196 PyStringObject *s; 5197 Py_ssize_t i, n; 5198 Py_ssize_t immortal_size = 0, mortal_size = 0; 5199 5200 if (interned == NULL || !PyDict_Check(interned)) 5201 return; 5202 keys = PyDict_Keys(interned); 5203 if (keys == NULL || !PyList_Check(keys)) { 5204 PyErr_Clear(); 5205 return; 5206 } 5207 5208 /* Since _Py_ReleaseInternedStrings() is intended to help a leak 5209 detector, interned strings are not forcibly deallocated; rather, we 5210 give them their stolen references back, and then clear and DECREF 5211 the interned dict. */ 5212 5213 n = PyList_GET_SIZE(keys); 5214 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n", 5215 n); 5216 for (i = 0; i < n; i++) { 5217 s = (PyStringObject *) PyList_GET_ITEM(keys, i); 5218 switch (s->ob_sstate) { 5219 case SSTATE_NOT_INTERNED: 5220 /* XXX Shouldn't happen */ 5221 break; 5222 case SSTATE_INTERNED_IMMORTAL: 5223 Py_REFCNT(s) += 1; 5224 immortal_size += Py_SIZE(s); 5225 break; 5226 case SSTATE_INTERNED_MORTAL: 5227 Py_REFCNT(s) += 2; 5228 mortal_size += Py_SIZE(s); 5229 break; 5230 default: 5231 Py_FatalError("Inconsistent interned string state."); 5232 } 5233 s->ob_sstate = SSTATE_NOT_INTERNED; 5234 } 5235 fprintf(stderr, "total size of all interned strings: " 5236 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d " 5237 "mortal/immortal\n", mortal_size, immortal_size); 5238 Py_DECREF(keys); 5239 PyDict_Clear(interned); 5240 Py_DECREF(interned); 5241 interned = NULL; 5242 } 4798 PyObject *keys; 4799 PyStringObject *s; 4800 Py_ssize_t i, n; 4801 Py_ssize_t immortal_size = 0, mortal_size = 0; 4802 4803 if (interned == NULL || !PyDict_Check(interned)) 4804 return; 4805 keys = PyDict_Keys(interned); 4806 if (keys == NULL || !PyList_Check(keys)) { 4807 PyErr_Clear(); 4808 return; 4809 } 4810 4811 /* Since _Py_ReleaseInternedStrings() is intended to help a leak 4812 detector, interned strings are not forcibly deallocated; rather, we 4813 give them their stolen references back, and then clear and DECREF 4814 the interned dict. */ 4815 4816 n = PyList_GET_SIZE(keys); 4817 fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n", 4818 n); 4819 for (i = 0; i < n; i++) { 4820 s = (PyStringObject *) PyList_GET_ITEM(keys, i); 4821 switch (s->ob_sstate) { 4822 case SSTATE_NOT_INTERNED: 4823 /* XXX Shouldn't happen */ 4824 break; 4825 case SSTATE_INTERNED_IMMORTAL: 4826 Py_REFCNT(s) += 1; 4827 immortal_size += Py_SIZE(s); 4828 break; 4829 case SSTATE_INTERNED_MORTAL: 4830 Py_REFCNT(s) += 2; 4831 mortal_size += Py_SIZE(s); 4832 break; 4833 default: 4834 Py_FatalError("Inconsistent interned string state."); 4835 } 4836 s->ob_sstate = SSTATE_NOT_INTERNED; 4837 } 4838 fprintf(stderr, "total size of all interned strings: " 4839 "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d " 4840 "mortal/immortal\n", mortal_size, immortal_size); 4841 Py_DECREF(keys); 4842 PyDict_Clear(interned); 4843 Py_CLEAR(interned); 4844 }
Note:
See TracChangeset
for help on using the changeset viewer.