Changeset 391 for python/trunk/Modules/_json.c
- Timestamp:
- Mar 19, 2014, 11:31:01 PM (11 years ago)
- Location:
- python/trunk
- Files:
-
- 2 edited
Legend:
- Unmodified
- Added
- Removed
-
python/trunk
-
Property svn:mergeinfo
set to
/python/vendor/Python-2.7.6 merged eligible /python/vendor/current merged eligible
-
Property svn:mergeinfo
set to
-
python/trunk/Modules/_json.c
r2 r391 1 1 #include "Python.h" 2 #include "structmember.h" 3 #if PY_VERSION_HEX < 0x02060000 && !defined(Py_TYPE) 4 #define Py_TYPE(ob) (((PyObject*)(ob))->ob_type) 5 #endif 6 #if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) 7 typedef int Py_ssize_t; 8 #define PY_SSIZE_T_MAX INT_MAX 9 #define PY_SSIZE_T_MIN INT_MIN 10 #define PyInt_FromSsize_t PyInt_FromLong 11 #define PyInt_AsSsize_t PyInt_AsLong 12 #endif 13 #ifndef Py_IS_FINITE 14 #define Py_IS_FINITE(X) (!Py_IS_INFINITY(X) && !Py_IS_NAN(X)) 15 #endif 16 17 #ifdef __GNUC__ 18 #define UNUSED __attribute__((__unused__)) 19 #else 20 #define UNUSED 21 #endif 2 22 3 23 #define DEFAULT_ENCODING "utf-8" 24 25 #define PyScanner_Check(op) PyObject_TypeCheck(op, &PyScannerType) 26 #define PyScanner_CheckExact(op) (Py_TYPE(op) == &PyScannerType) 27 #define PyEncoder_Check(op) PyObject_TypeCheck(op, &PyEncoderType) 28 #define PyEncoder_CheckExact(op) (Py_TYPE(op) == &PyEncoderType) 29 30 static PyTypeObject PyScannerType; 31 static PyTypeObject PyEncoderType; 32 33 typedef struct _PyScannerObject { 34 PyObject_HEAD 35 PyObject *encoding; 36 PyObject *strict; 37 PyObject *object_hook; 38 PyObject *pairs_hook; 39 PyObject *parse_float; 40 PyObject *parse_int; 41 PyObject *parse_constant; 42 } PyScannerObject; 43 44 static PyMemberDef scanner_members[] = { 45 {"encoding", T_OBJECT, offsetof(PyScannerObject, encoding), READONLY, "encoding"}, 46 {"strict", T_OBJECT, offsetof(PyScannerObject, strict), READONLY, "strict"}, 47 {"object_hook", T_OBJECT, offsetof(PyScannerObject, object_hook), READONLY, "object_hook"}, 48 {"object_pairs_hook", T_OBJECT, offsetof(PyScannerObject, pairs_hook), READONLY, "object_pairs_hook"}, 49 {"parse_float", T_OBJECT, offsetof(PyScannerObject, parse_float), READONLY, "parse_float"}, 50 {"parse_int", T_OBJECT, offsetof(PyScannerObject, parse_int), READONLY, "parse_int"}, 51 {"parse_constant", T_OBJECT, offsetof(PyScannerObject, parse_constant), READONLY, "parse_constant"}, 52 {NULL} 53 }; 54 55 typedef struct _PyEncoderObject { 56 PyObject_HEAD 57 PyObject *markers; 58 PyObject *defaultfn; 59 PyObject *encoder; 60 PyObject *indent; 61 PyObject *key_separator; 62 PyObject *item_separator; 63 PyObject *sort_keys; 64 PyObject *skipkeys; 65 int fast_encode; 66 int allow_nan; 67 } PyEncoderObject; 68 69 static PyMemberDef encoder_members[] = { 70 {"markers", T_OBJECT, offsetof(PyEncoderObject, markers), READONLY, "markers"}, 71 {"default", T_OBJECT, offsetof(PyEncoderObject, defaultfn), READONLY, "default"}, 72 {"encoder", T_OBJECT, offsetof(PyEncoderObject, encoder), READONLY, "encoder"}, 73 {"indent", T_OBJECT, offsetof(PyEncoderObject, indent), READONLY, "indent"}, 74 {"key_separator", T_OBJECT, offsetof(PyEncoderObject, key_separator), READONLY, "key_separator"}, 75 {"item_separator", T_OBJECT, offsetof(PyEncoderObject, item_separator), READONLY, "item_separator"}, 76 {"sort_keys", T_OBJECT, offsetof(PyEncoderObject, sort_keys), READONLY, "sort_keys"}, 77 {"skipkeys", T_OBJECT, offsetof(PyEncoderObject, skipkeys), READONLY, "skipkeys"}, 78 {NULL} 79 }; 80 81 static Py_ssize_t 82 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); 83 static PyObject * 84 ascii_escape_unicode(PyObject *pystr); 85 static PyObject * 86 ascii_escape_str(PyObject *pystr); 87 static PyObject * 88 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr); 89 void init_json(void); 90 static PyObject * 91 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); 92 static PyObject * 93 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr); 94 static PyObject * 95 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx); 96 static PyObject * 97 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds); 98 static int 99 scanner_init(PyObject *self, PyObject *args, PyObject *kwds); 100 static void 101 scanner_dealloc(PyObject *self); 102 static int 103 scanner_clear(PyObject *self); 104 static PyObject * 105 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds); 106 static int 107 encoder_init(PyObject *self, PyObject *args, PyObject *kwds); 108 static void 109 encoder_dealloc(PyObject *self); 110 static int 111 encoder_clear(PyObject *self); 112 static int 113 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level); 114 static int 115 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level); 116 static int 117 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level); 118 static PyObject * 119 _encoded_const(PyObject *obj); 120 static void 121 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end); 122 static PyObject * 123 encoder_encode_string(PyEncoderObject *s, PyObject *obj); 124 static int 125 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr); 126 static PyObject * 127 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr); 128 static PyObject * 129 encoder_encode_float(PyEncoderObject *s, PyObject *obj); 130 4 131 #define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"') 132 #define IS_WHITESPACE(c) (((c) == ' ') || ((c) == '\t') || ((c) == '\n') || ((c) == '\r')) 133 5 134 #define MIN_EXPANSION 6 6 7 135 #ifdef Py_UNICODE_WIDE 8 136 #define MAX_EXPANSION (2 * MIN_EXPANSION) … … 11 139 #endif 12 140 141 static int 142 _convertPyInt_AsSsize_t(PyObject *o, Py_ssize_t *size_ptr) 143 { 144 /* PyObject to Py_ssize_t converter */ 145 *size_ptr = PyInt_AsSsize_t(o); 146 if (*size_ptr == -1 && PyErr_Occurred()) 147 return 0; 148 return 1; 149 } 150 151 static PyObject * 152 _convertPyInt_FromSsize_t(Py_ssize_t *size_ptr) 153 { 154 /* Py_ssize_t to PyObject converter */ 155 return PyInt_FromSsize_t(*size_ptr); 156 } 157 13 158 static Py_ssize_t 14 159 ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) 15 160 { 16 Py_UNICODE x; 161 /* Escape unicode code point c to ASCII escape sequences 162 in char *output. output must have at least 12 bytes unused to 163 accommodate an escaped surrogate pair "\uXXXX\uXXXX" */ 17 164 output[chars++] = '\\'; 18 165 switch (c) { … … 31 178 c = 0xd800 | ((v >> 10) & 0x3ff); 32 179 output[chars++] = 'u'; 33 x = (c & 0xf000) >> 12; 34 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 35 x = (c & 0x0f00) >> 8; 36 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 37 x = (c & 0x00f0) >> 4; 38 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 39 x = (c & 0x000f); 40 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 180 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; 181 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; 182 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; 183 output[chars++] = "0123456789abcdef"[(c ) & 0xf]; 41 184 c = 0xdc00 | (v & 0x3ff); 42 185 output[chars++] = '\\'; … … 44 187 #endif 45 188 output[chars++] = 'u'; 46 x = (c & 0xf000) >> 12; 47 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 48 x = (c & 0x0f00) >> 8; 49 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 50 x = (c & 0x00f0) >> 4; 51 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 52 x = (c & 0x000f); 53 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 189 output[chars++] = "0123456789abcdef"[(c >> 12) & 0xf]; 190 output[chars++] = "0123456789abcdef"[(c >> 8) & 0xf]; 191 output[chars++] = "0123456789abcdef"[(c >> 4) & 0xf]; 192 output[chars++] = "0123456789abcdef"[(c ) & 0xf]; 54 193 } 55 194 return chars; … … 59 198 ascii_escape_unicode(PyObject *pystr) 60 199 { 200 /* Take a PyUnicode pystr and return a new ASCII-only escaped PyString */ 61 201 Py_ssize_t i; 62 202 Py_ssize_t input_chars; 63 203 Py_ssize_t output_size; 204 Py_ssize_t max_output_size; 64 205 Py_ssize_t chars; 65 206 PyObject *rval; … … 69 210 input_chars = PyUnicode_GET_SIZE(pystr); 70 211 input_unicode = PyUnicode_AS_UNICODE(pystr); 212 71 213 /* One char input can be up to 6 chars output, estimate 4 of these */ 72 214 output_size = 2 + (MIN_EXPANSION * 4) + input_chars; 215 max_output_size = 2 + (input_chars * MAX_EXPANSION); 73 216 rval = PyString_FromStringAndSize(NULL, output_size); 74 217 if (rval == NULL) { … … 83 226 output[chars++] = (char)c; 84 227 } 85 228 else { 86 229 chars = ascii_escape_char(c, output, chars); 87 230 } 88 231 if (output_size - chars < (1 + MAX_EXPANSION)) { 89 232 /* There's more than four, so let's resize by a lot */ 90 output_size *=2;233 Py_ssize_t new_output_size = output_size * 2; 91 234 /* This is an upper bound */ 92 if (output_size > 2 + (input_chars * MAX_EXPANSION)) { 93 output_size = 2 + (input_chars * MAX_EXPANSION); 94 } 95 if (_PyString_Resize(&rval, output_size) == -1) { 96 return NULL; 97 } 98 output = PyString_AS_STRING(rval); 235 if (new_output_size > max_output_size) { 236 new_output_size = max_output_size; 237 } 238 /* Make sure that the output size changed before resizing */ 239 if (new_output_size != output_size) { 240 output_size = new_output_size; 241 if (_PyString_Resize(&rval, output_size) == -1) { 242 return NULL; 243 } 244 output = PyString_AS_STRING(rval); 245 } 99 246 } 100 247 } … … 109 256 ascii_escape_str(PyObject *pystr) 110 257 { 258 /* Take a PyString pystr and return a new ASCII-only escaped PyString */ 111 259 Py_ssize_t i; 112 260 Py_ssize_t input_chars; … … 119 267 input_chars = PyString_GET_SIZE(pystr); 120 268 input_str = PyString_AS_STRING(pystr); 121 /* One char input can be up to 6 chars output, estimate 4 of these */ 122 output_size = 2 + (MIN_EXPANSION * 4) + input_chars; 269 270 /* Fast path for a string that's already ASCII */ 271 for (i = 0; i < input_chars; i++) { 272 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; 273 if (!S_CHAR(c)) { 274 /* If we have to escape something, scan the string for unicode */ 275 Py_ssize_t j; 276 for (j = i; j < input_chars; j++) { 277 c = (Py_UNICODE)(unsigned char)input_str[j]; 278 if (c > 0x7f) { 279 /* We hit a non-ASCII character, bail to unicode mode */ 280 PyObject *uni; 281 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); 282 if (uni == NULL) { 283 return NULL; 284 } 285 rval = ascii_escape_unicode(uni); 286 Py_DECREF(uni); 287 return rval; 288 } 289 } 290 break; 291 } 292 } 293 294 if (i == input_chars) { 295 /* Input is already ASCII */ 296 output_size = 2 + input_chars; 297 } 298 else { 299 /* One char input can be up to 6 chars output, estimate 4 of these */ 300 output_size = 2 + (MIN_EXPANSION * 4) + input_chars; 301 } 123 302 rval = PyString_FromStringAndSize(NULL, output_size); 124 303 if (rval == NULL) { … … 126 305 } 127 306 output = PyString_AS_STRING(rval); 128 chars = 0; 129 output[chars++] = '"'; 130 for (i = 0; i < input_chars; i++) { 131 Py_UNICODE c = (Py_UNICODE)input_str[i]; 307 output[0] = '"'; 308 309 /* We know that everything up to i is ASCII already */ 310 chars = i + 1; 311 memcpy(&output[1], input_str, i); 312 313 for (; i < input_chars; i++) { 314 Py_UNICODE c = (Py_UNICODE)(unsigned char)input_str[i]; 132 315 if (S_CHAR(c)) { 133 316 output[chars++] = (char)c; 134 317 } 135 else if (c > 0x7F) { 136 /* We hit a non-ASCII character, bail to unicode mode */ 137 PyObject *uni; 138 Py_DECREF(rval); 139 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); 140 if (uni == NULL) { 141 return NULL; 142 } 143 rval = ascii_escape_unicode(uni); 144 Py_DECREF(uni); 145 return rval; 146 } 147 else { 318 else { 148 319 chars = ascii_escape_char(c, output, chars); 149 320 } … … 168 339 } 169 340 170 void341 static void 171 342 raise_errmsg(char *msg, PyObject *s, Py_ssize_t end) 172 343 { 344 /* Use the Python function json.decoder.errmsg to raise a nice 345 looking ValueError exception */ 173 346 static PyObject *errmsg_fn = NULL; 174 347 PyObject *pymsg; … … 178 351 return; 179 352 errmsg_fn = PyObject_GetAttrString(decoder, "errmsg"); 353 Py_DECREF(decoder); 180 354 if (errmsg_fn == NULL) 181 355 return; 182 Py_DECREF(decoder); 183 } 184 pymsg = PyObject_CallFunction(errmsg_fn, "(zOn)", msg, s, end); 356 } 357 pymsg = PyObject_CallFunction(errmsg_fn, "(zOO&)", msg, s, _convertPyInt_FromSsize_t, &end); 185 358 if (pymsg) { 186 359 PyErr_SetObject(PyExc_ValueError, pymsg); 187 360 Py_DECREF(pymsg); 188 361 } 189 /*190 191 def linecol(doc, pos):192 lineno = doc.count('\n', 0, pos) + 1193 if lineno == 1:194 colno = pos195 else:196 colno = pos - doc.rindex('\n', 0, pos)197 return lineno, colno198 199 def errmsg(msg, doc, pos, end=None):200 lineno, colno = linecol(doc, pos)201 if end is None:202 return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)203 endlineno, endcolno = linecol(doc, end)204 return '%s: line %d column %d - line %d column %d (char %d - %d)' % (205 msg, lineno, colno, endlineno, endcolno, pos, end)206 207 */208 362 } 209 363 … … 211 365 join_list_unicode(PyObject *lst) 212 366 { 213 static PyObject *ustr = NULL; 214 static PyObject *joinstr = NULL; 215 if (ustr == NULL) { 216 Py_UNICODE c = 0; 217 ustr = PyUnicode_FromUnicode(&c, 0); 218 } 219 if (joinstr == NULL) { 220 joinstr = PyString_InternFromString("join"); 221 } 222 if (joinstr == NULL || ustr == NULL) { 223 return NULL; 224 } 225 return PyObject_CallMethodObjArgs(ustr, joinstr, lst, NULL); 226 } 227 228 static PyObject * 229 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict) 230 { 367 /* return u''.join(lst) */ 368 static PyObject *joinfn = NULL; 369 if (joinfn == NULL) { 370 PyObject *ustr = PyUnicode_FromUnicode(NULL, 0); 371 if (ustr == NULL) 372 return NULL; 373 374 joinfn = PyObject_GetAttrString(ustr, "join"); 375 Py_DECREF(ustr); 376 if (joinfn == NULL) 377 return NULL; 378 } 379 return PyObject_CallFunctionObjArgs(joinfn, lst, NULL); 380 } 381 382 static PyObject * 383 _build_rval_index_tuple(PyObject *rval, Py_ssize_t idx) { 384 /* return (rval, idx) tuple, stealing reference to rval */ 385 PyObject *tpl; 386 PyObject *pyidx; 387 /* 388 steal a reference to rval, returns (rval, idx) 389 */ 390 if (rval == NULL) { 391 return NULL; 392 } 393 pyidx = PyInt_FromSsize_t(idx); 394 if (pyidx == NULL) { 395 Py_DECREF(rval); 396 return NULL; 397 } 398 tpl = PyTuple_New(2); 399 if (tpl == NULL) { 400 Py_DECREF(pyidx); 401 Py_DECREF(rval); 402 return NULL; 403 } 404 PyTuple_SET_ITEM(tpl, 0, rval); 405 PyTuple_SET_ITEM(tpl, 1, pyidx); 406 return tpl; 407 } 408 409 static PyObject * 410 scanstring_str(PyObject *pystr, Py_ssize_t end, char *encoding, int strict, Py_ssize_t *next_end_ptr) 411 { 412 /* Read the JSON string from PyString pystr. 413 end is the index of the first character after the quote. 414 encoding is the encoding of pystr (must be an ASCII superset) 415 if strict is zero then literal control characters are allowed 416 *next_end_ptr is a return-by-reference index of the character 417 after the end quote 418 419 Return value is a new PyString (if ASCII-only) or PyUnicode 420 */ 231 421 PyObject *rval; 232 422 Py_ssize_t len = PyString_GET_SIZE(pystr); 233 423 Py_ssize_t begin = end - 1; 234 Py_ssize_t next = begin;424 Py_ssize_t next; 235 425 char *buf = PyString_AS_STRING(pystr); 236 426 PyObject *chunks = PyList_New(0); … … 247 437 PyObject *chunk = NULL; 248 438 for (next = end; next < len; next++) { 249 c = buf[next];439 c = (unsigned char)buf[next]; 250 440 if (c == '"' || c == '\\') { 251 441 break; … … 262 452 /* Pick up this chunk if it's not zero length */ 263 453 if (next != end) { 264 PyObject *strchunk = Py Buffer_FromMemory(&buf[end], next - end);454 PyObject *strchunk = PyString_FromStringAndSize(&buf[end], next - end); 265 455 if (strchunk == NULL) { 266 456 goto bail; … … 316 506 /* Decode 4 hex digits */ 317 507 for (; next < end; next++) { 318 Py_ssize_t shl = (end - next - 1) << 2;319 508 Py_UNICODE digit = buf[next]; 509 c <<= 4; 320 510 switch (digit) { 321 511 case '0': case '1': case '2': case '3': case '4': 322 512 case '5': case '6': case '7': case '8': case '9': 323 c |= (digit - '0') << shl; break;513 c |= (digit - '0'); break; 324 514 case 'a': case 'b': case 'c': case 'd': case 'e': 325 515 case 'f': 326 c |= (digit - 'a' + 10) << shl; break;516 c |= (digit - 'a' + 10); break; 327 517 case 'A': case 'B': case 'C': case 'D': case 'E': 328 518 case 'F': 329 c |= (digit - 'A' + 10) << shl; break;519 c |= (digit - 'A' + 10); break; 330 520 default: 331 521 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); … … 335 525 #ifdef Py_UNICODE_WIDE 336 526 /* Surrogate pair */ 337 if ( c >= 0xd800 && c <= 0xdbff) {527 if ((c & 0xfc00) == 0xd800) { 338 528 Py_UNICODE c2 = 0; 339 529 if (end + 6 >= len) { 340 raise_errmsg(" Invalid \\uXXXX\\uXXXX surrogate pair", pystr,341 end - 5);530 raise_errmsg("Unpaired high surrogate", pystr, end - 5); 531 goto bail; 342 532 } 343 533 if (buf[next++] != '\\' || buf[next++] != 'u') { 344 raise_errmsg(" Invalid \\uXXXX\\uXXXX surrogate pair", pystr,345 end - 5);534 raise_errmsg("Unpaired high surrogate", pystr, end - 5); 535 goto bail; 346 536 } 347 537 end += 6; 348 538 /* Decode 4 hex digits */ 349 539 for (; next < end; next++) { 350 Py_ssize_t shl = (end - next - 1) << 2;351 540 Py_UNICODE digit = buf[next]; 541 c2 <<= 4; 352 542 switch (digit) { 353 543 case '0': case '1': case '2': case '3': case '4': 354 544 case '5': case '6': case '7': case '8': case '9': 355 c2 |= (digit - '0') << shl; break;545 c2 |= (digit - '0'); break; 356 546 case 'a': case 'b': case 'c': case 'd': case 'e': 357 547 case 'f': 358 c2 |= (digit - 'a' + 10) << shl; break;548 c2 |= (digit - 'a' + 10); break; 359 549 case 'A': case 'B': case 'C': case 'D': case 'E': 360 550 case 'F': 361 c2 |= (digit - 'A' + 10) << shl; break;551 c2 |= (digit - 'A' + 10); break; 362 552 default: 363 553 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); … … 365 555 } 366 556 } 557 if ((c2 & 0xfc00) != 0xdc00) { 558 raise_errmsg("Unpaired high surrogate", pystr, end - 5); 559 goto bail; 560 } 367 561 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); 562 } 563 else if ((c & 0xfc00) == 0xdc00) { 564 raise_errmsg("Unpaired low surrogate", pystr, end - 5); 565 goto bail; 368 566 } 369 567 #endif … … 385 583 } 386 584 Py_CLEAR(chunks); 387 return Py_BuildValue("(Nn)", rval, end); 585 *next_end_ptr = end; 586 return rval; 388 587 bail: 588 *next_end_ptr = -1; 389 589 Py_XDECREF(chunks); 390 590 return NULL; … … 393 593 394 594 static PyObject * 395 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict) 396 { 595 scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict, Py_ssize_t *next_end_ptr) 596 { 597 /* Read the JSON string from PyUnicode pystr. 598 end is the index of the first character after the quote. 599 if strict is zero then literal control characters are allowed 600 *next_end_ptr is a return-by-reference index of the character 601 after the end quote 602 603 Return value is a new PyUnicode 604 */ 397 605 PyObject *rval; 398 606 Py_ssize_t len = PyUnicode_GET_SIZE(pystr); 399 607 Py_ssize_t begin = end - 1; 400 Py_ssize_t next = begin;608 Py_ssize_t next; 401 609 const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr); 402 610 PyObject *chunks = PyList_New(0); … … 477 685 /* Decode 4 hex digits */ 478 686 for (; next < end; next++) { 479 Py_ssize_t shl = (end - next - 1) << 2;480 687 Py_UNICODE digit = buf[next]; 688 c <<= 4; 481 689 switch (digit) { 482 690 case '0': case '1': case '2': case '3': case '4': 483 691 case '5': case '6': case '7': case '8': case '9': 484 c |= (digit - '0') << shl; break;692 c |= (digit - '0'); break; 485 693 case 'a': case 'b': case 'c': case 'd': case 'e': 486 694 case 'f': 487 c |= (digit - 'a' + 10) << shl; break;695 c |= (digit - 'a' + 10); break; 488 696 case 'A': case 'B': case 'C': case 'D': case 'E': 489 697 case 'F': 490 c |= (digit - 'A' + 10) << shl; break;698 c |= (digit - 'A' + 10); break; 491 699 default: 492 700 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); … … 496 704 #ifdef Py_UNICODE_WIDE 497 705 /* Surrogate pair */ 498 if ( c >= 0xd800 && c <= 0xdbff) {706 if ((c & 0xfc00) == 0xd800) { 499 707 Py_UNICODE c2 = 0; 500 708 if (end + 6 >= len) { 501 raise_errmsg(" Invalid \\uXXXX\\uXXXX surrogate pair", pystr,502 end - 5);709 raise_errmsg("Unpaired high surrogate", pystr, end - 5); 710 goto bail; 503 711 } 504 712 if (buf[next++] != '\\' || buf[next++] != 'u') { 505 raise_errmsg(" Invalid \\uXXXX\\uXXXX surrogate pair", pystr,506 end - 5);713 raise_errmsg("Unpaired high surrogate", pystr, end - 5); 714 goto bail; 507 715 } 508 716 end += 6; 509 717 /* Decode 4 hex digits */ 510 718 for (; next < end; next++) { 511 Py_ssize_t shl = (end - next - 1) << 2;512 719 Py_UNICODE digit = buf[next]; 720 c2 <<= 4; 513 721 switch (digit) { 514 722 case '0': case '1': case '2': case '3': case '4': 515 723 case '5': case '6': case '7': case '8': case '9': 516 c2 |= (digit - '0') << shl; break;724 c2 |= (digit - '0'); break; 517 725 case 'a': case 'b': case 'c': case 'd': case 'e': 518 726 case 'f': 519 c2 |= (digit - 'a' + 10) << shl; break;727 c2 |= (digit - 'a' + 10); break; 520 728 case 'A': case 'B': case 'C': case 'D': case 'E': 521 729 case 'F': 522 c2 |= (digit - 'A' + 10) << shl; break;730 c2 |= (digit - 'A' + 10); break; 523 731 default: 524 732 raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5); … … 526 734 } 527 735 } 736 if ((c2 & 0xfc00) != 0xdc00) { 737 raise_errmsg("Unpaired high surrogate", pystr, end - 5); 738 goto bail; 739 } 528 740 c = 0x10000 + (((c - 0xd800) << 10) | (c2 - 0xdc00)); 741 } 742 else if ((c & 0xfc00) == 0xdc00) { 743 raise_errmsg("Unpaired low surrogate", pystr, end - 5); 744 goto bail; 529 745 } 530 746 #endif … … 545 761 goto bail; 546 762 } 547 Py_CLEAR(chunks); 548 return Py_BuildValue("(Nn)", rval, end); 763 Py_DECREF(chunks); 764 *next_end_ptr = end; 765 return rval; 549 766 bail: 767 *next_end_ptr = -1; 550 768 Py_XDECREF(chunks); 551 769 return NULL; … … 553 771 554 772 PyDoc_STRVAR(pydoc_scanstring, 555 "scanstring(basestring, end, encoding) -> (str, end)\n"); 556 557 static PyObject * 558 py_scanstring(PyObject* self, PyObject *args) 773 "scanstring(basestring, end, encoding, strict=True) -> (str, end)\n" 774 "\n" 775 "Scan the string s for a JSON string. End is the index of the\n" 776 "character in s after the quote that started the JSON string.\n" 777 "Unescapes all valid JSON string escape sequences and raises ValueError\n" 778 "on attempt to decode an invalid string. If strict is False then literal\n" 779 "control characters are allowed in the string.\n" 780 "\n" 781 "Returns a tuple of the decoded string and the index of the character in s\n" 782 "after the end quote." 783 ); 784 785 static PyObject * 786 py_scanstring(PyObject* self UNUSED, PyObject *args) 559 787 { 560 788 PyObject *pystr; 789 PyObject *rval; 561 790 Py_ssize_t end; 791 Py_ssize_t next_end = -1; 562 792 char *encoding = NULL; 563 int strict = 0;564 if (!PyArg_ParseTuple(args, "O n|zi:scanstring", &pystr, &end, &encoding, &strict)) {793 int strict = 1; 794 if (!PyArg_ParseTuple(args, "OO&|zi:scanstring", &pystr, _convertPyInt_AsSsize_t, &end, &encoding, &strict)) { 565 795 return NULL; 566 796 } … … 569 799 } 570 800 if (PyString_Check(pystr)) { 571 r eturn scanstring_str(pystr, end, encoding, strict);801 rval = scanstring_str(pystr, end, encoding, strict, &next_end); 572 802 } 573 803 else if (PyUnicode_Check(pystr)) { 574 r eturn scanstring_unicode(pystr, end, strict);804 rval = scanstring_unicode(pystr, end, strict, &next_end); 575 805 } 576 806 else { 577 PyErr_Format(PyExc_TypeError, 578 "first argument must be a string or unicode, not %.80s",807 PyErr_Format(PyExc_TypeError, 808 "first argument must be a string, not %.80s", 579 809 Py_TYPE(pystr)->tp_name); 580 810 return NULL; 581 811 } 812 return _build_rval_index_tuple(rval, next_end); 582 813 } 583 814 584 815 PyDoc_STRVAR(pydoc_encode_basestring_ascii, 585 "encode_basestring_ascii(basestring) -> str\n"); 586 587 static PyObject * 588 py_encode_basestring_ascii(PyObject* self, PyObject *pystr) 589 { 816 "encode_basestring_ascii(basestring) -> str\n" 817 "\n" 818 "Return an ASCII-only JSON representation of a Python string" 819 ); 820 821 static PyObject * 822 py_encode_basestring_ascii(PyObject* self UNUSED, PyObject *pystr) 823 { 824 /* Return an ASCII-only JSON representation of a Python string */ 590 825 /* METH_O */ 591 826 if (PyString_Check(pystr)) { … … 596 831 } 597 832 else { 598 PyErr_Format(PyExc_TypeError, 599 "first argument must be a string or unicode, not %.80s",833 PyErr_Format(PyExc_TypeError, 834 "first argument must be a string, not %.80s", 600 835 Py_TYPE(pystr)->tp_name); 601 836 return NULL; … … 603 838 } 604 839 605 static PyMethodDef json_methods[] = { 606 {"encode_basestring_ascii", (PyCFunction)py_encode_basestring_ascii, 607 METH_O, pydoc_encode_basestring_ascii}, 608 {"scanstring", (PyCFunction)py_scanstring, METH_VARARGS, 609 pydoc_scanstring}, 840 static void 841 scanner_dealloc(PyObject *self) 842 { 843 /* Deallocate scanner object */ 844 scanner_clear(self); 845 Py_TYPE(self)->tp_free(self); 846 } 847 848 static int 849 scanner_traverse(PyObject *self, visitproc visit, void *arg) 850 { 851 PyScannerObject *s; 852 assert(PyScanner_Check(self)); 853 s = (PyScannerObject *)self; 854 Py_VISIT(s->encoding); 855 Py_VISIT(s->strict); 856 Py_VISIT(s->object_hook); 857 Py_VISIT(s->pairs_hook); 858 Py_VISIT(s->parse_float); 859 Py_VISIT(s->parse_int); 860 Py_VISIT(s->parse_constant); 861 return 0; 862 } 863 864 static int 865 scanner_clear(PyObject *self) 866 { 867 PyScannerObject *s; 868 assert(PyScanner_Check(self)); 869 s = (PyScannerObject *)self; 870 Py_CLEAR(s->encoding); 871 Py_CLEAR(s->strict); 872 Py_CLEAR(s->object_hook); 873 Py_CLEAR(s->pairs_hook); 874 Py_CLEAR(s->parse_float); 875 Py_CLEAR(s->parse_int); 876 Py_CLEAR(s->parse_constant); 877 return 0; 878 } 879 880 static PyObject * 881 _parse_object_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { 882 /* Read a JSON object from PyString pystr. 883 idx is the index of the first character after the opening curly brace. 884 *next_idx_ptr is a return-by-reference index to the first character after 885 the closing curly brace. 886 887 Returns a new PyObject (usually a dict, but object_hook can change that) 888 */ 889 char *str = PyString_AS_STRING(pystr); 890 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; 891 PyObject *rval; 892 PyObject *pairs; 893 PyObject *item; 894 PyObject *key = NULL; 895 PyObject *val = NULL; 896 char *encoding = PyString_AS_STRING(s->encoding); 897 int strict = PyObject_IsTrue(s->strict); 898 Py_ssize_t next_idx; 899 900 pairs = PyList_New(0); 901 if (pairs == NULL) 902 return NULL; 903 904 /* skip whitespace after { */ 905 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 906 907 /* only loop if the object is non-empty */ 908 if (idx <= end_idx && str[idx] != '}') { 909 while (idx <= end_idx) { 910 /* read key */ 911 if (str[idx] != '"') { 912 raise_errmsg("Expecting property name", pystr, idx); 913 goto bail; 914 } 915 key = scanstring_str(pystr, idx + 1, encoding, strict, &next_idx); 916 if (key == NULL) 917 goto bail; 918 idx = next_idx; 919 920 /* skip whitespace between key and : delimiter, read :, skip whitespace */ 921 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 922 if (idx > end_idx || str[idx] != ':') { 923 raise_errmsg("Expecting : delimiter", pystr, idx); 924 goto bail; 925 } 926 idx++; 927 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 928 929 /* read any JSON data type */ 930 val = scan_once_str(s, pystr, idx, &next_idx); 931 if (val == NULL) 932 goto bail; 933 934 item = PyTuple_Pack(2, key, val); 935 if (item == NULL) 936 goto bail; 937 Py_CLEAR(key); 938 Py_CLEAR(val); 939 if (PyList_Append(pairs, item) == -1) { 940 Py_DECREF(item); 941 goto bail; 942 } 943 Py_DECREF(item); 944 idx = next_idx; 945 946 /* skip whitespace before } or , */ 947 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 948 949 /* bail if the object is closed or we didn't get the , delimiter */ 950 if (idx > end_idx) break; 951 if (str[idx] == '}') { 952 break; 953 } 954 else if (str[idx] != ',') { 955 raise_errmsg("Expecting , delimiter", pystr, idx); 956 goto bail; 957 } 958 idx++; 959 960 /* skip whitespace after , delimiter */ 961 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 962 } 963 } 964 /* verify that idx < end_idx, str[idx] should be '}' */ 965 if (idx > end_idx || str[idx] != '}') { 966 raise_errmsg("Expecting object", pystr, end_idx); 967 goto bail; 968 } 969 970 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ 971 if (s->pairs_hook != Py_None) { 972 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); 973 if (val == NULL) 974 goto bail; 975 Py_DECREF(pairs); 976 *next_idx_ptr = idx + 1; 977 return val; 978 } 979 980 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type), 981 pairs, NULL); 982 if (rval == NULL) 983 goto bail; 984 Py_CLEAR(pairs); 985 986 /* if object_hook is not None: rval = object_hook(rval) */ 987 if (s->object_hook != Py_None) { 988 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); 989 if (val == NULL) 990 goto bail; 991 Py_DECREF(rval); 992 rval = val; 993 val = NULL; 994 } 995 *next_idx_ptr = idx + 1; 996 return rval; 997 bail: 998 Py_XDECREF(key); 999 Py_XDECREF(val); 1000 Py_XDECREF(pairs); 1001 return NULL; 1002 } 1003 1004 static PyObject * 1005 _parse_object_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { 1006 /* Read a JSON object from PyUnicode pystr. 1007 idx is the index of the first character after the opening curly brace. 1008 *next_idx_ptr is a return-by-reference index to the first character after 1009 the closing curly brace. 1010 1011 Returns a new PyObject (usually a dict, but object_hook can change that) 1012 */ 1013 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); 1014 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; 1015 PyObject *rval; 1016 PyObject *pairs; 1017 PyObject *item; 1018 PyObject *key = NULL; 1019 PyObject *val = NULL; 1020 int strict = PyObject_IsTrue(s->strict); 1021 Py_ssize_t next_idx; 1022 1023 pairs = PyList_New(0); 1024 if (pairs == NULL) 1025 return NULL; 1026 1027 /* skip whitespace after { */ 1028 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1029 1030 /* only loop if the object is non-empty */ 1031 if (idx <= end_idx && str[idx] != '}') { 1032 while (idx <= end_idx) { 1033 /* read key */ 1034 if (str[idx] != '"') { 1035 raise_errmsg("Expecting property name enclosed in double quotes", pystr, idx); 1036 goto bail; 1037 } 1038 key = scanstring_unicode(pystr, idx + 1, strict, &next_idx); 1039 if (key == NULL) 1040 goto bail; 1041 idx = next_idx; 1042 1043 /* skip whitespace between key and : delimiter, read :, skip whitespace */ 1044 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1045 if (idx > end_idx || str[idx] != ':') { 1046 raise_errmsg("Expecting ':' delimiter", pystr, idx); 1047 goto bail; 1048 } 1049 idx++; 1050 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1051 1052 /* read any JSON term */ 1053 val = scan_once_unicode(s, pystr, idx, &next_idx); 1054 if (val == NULL) 1055 goto bail; 1056 1057 item = PyTuple_Pack(2, key, val); 1058 if (item == NULL) 1059 goto bail; 1060 Py_CLEAR(key); 1061 Py_CLEAR(val); 1062 if (PyList_Append(pairs, item) == -1) { 1063 Py_DECREF(item); 1064 goto bail; 1065 } 1066 Py_DECREF(item); 1067 idx = next_idx; 1068 1069 /* skip whitespace before } or , */ 1070 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1071 1072 /* bail if the object is closed or we didn't get the , delimiter */ 1073 if (idx > end_idx) break; 1074 if (str[idx] == '}') { 1075 break; 1076 } 1077 else if (str[idx] != ',') { 1078 raise_errmsg("Expecting ',' delimiter", pystr, idx); 1079 goto bail; 1080 } 1081 idx++; 1082 1083 /* skip whitespace after , delimiter */ 1084 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1085 } 1086 } 1087 1088 /* verify that idx < end_idx, str[idx] should be '}' */ 1089 if (idx > end_idx || str[idx] != '}') { 1090 raise_errmsg("Expecting object", pystr, end_idx); 1091 goto bail; 1092 } 1093 1094 /* if pairs_hook is not None: rval = object_pairs_hook(pairs) */ 1095 if (s->pairs_hook != Py_None) { 1096 val = PyObject_CallFunctionObjArgs(s->pairs_hook, pairs, NULL); 1097 if (val == NULL) 1098 goto bail; 1099 Py_DECREF(pairs); 1100 *next_idx_ptr = idx + 1; 1101 return val; 1102 } 1103 1104 rval = PyObject_CallFunctionObjArgs((PyObject *)(&PyDict_Type), 1105 pairs, NULL); 1106 if (rval == NULL) 1107 goto bail; 1108 Py_CLEAR(pairs); 1109 1110 /* if object_hook is not None: rval = object_hook(rval) */ 1111 if (s->object_hook != Py_None) { 1112 val = PyObject_CallFunctionObjArgs(s->object_hook, rval, NULL); 1113 if (val == NULL) 1114 goto bail; 1115 Py_DECREF(rval); 1116 rval = val; 1117 val = NULL; 1118 } 1119 *next_idx_ptr = idx + 1; 1120 return rval; 1121 bail: 1122 Py_XDECREF(key); 1123 Py_XDECREF(val); 1124 Py_XDECREF(pairs); 1125 return NULL; 1126 } 1127 1128 static PyObject * 1129 _parse_array_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { 1130 /* Read a JSON array from PyString pystr. 1131 idx is the index of the first character after the opening brace. 1132 *next_idx_ptr is a return-by-reference index to the first character after 1133 the closing brace. 1134 1135 Returns a new PyList 1136 */ 1137 char *str = PyString_AS_STRING(pystr); 1138 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; 1139 PyObject *val = NULL; 1140 PyObject *rval = PyList_New(0); 1141 Py_ssize_t next_idx; 1142 if (rval == NULL) 1143 return NULL; 1144 1145 /* skip whitespace after [ */ 1146 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1147 1148 /* only loop if the array is non-empty */ 1149 if (idx <= end_idx && str[idx] != ']') { 1150 while (idx <= end_idx) { 1151 1152 /* read any JSON term and de-tuplefy the (rval, idx) */ 1153 val = scan_once_str(s, pystr, idx, &next_idx); 1154 if (val == NULL) 1155 goto bail; 1156 1157 if (PyList_Append(rval, val) == -1) 1158 goto bail; 1159 1160 Py_CLEAR(val); 1161 idx = next_idx; 1162 1163 /* skip whitespace between term and , */ 1164 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1165 1166 /* bail if the array is closed or we didn't get the , delimiter */ 1167 if (idx > end_idx) break; 1168 if (str[idx] == ']') { 1169 break; 1170 } 1171 else if (str[idx] != ',') { 1172 raise_errmsg("Expecting , delimiter", pystr, idx); 1173 goto bail; 1174 } 1175 idx++; 1176 1177 /* skip whitespace after , */ 1178 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1179 } 1180 } 1181 1182 /* verify that idx < end_idx, str[idx] should be ']' */ 1183 if (idx > end_idx || str[idx] != ']') { 1184 raise_errmsg("Expecting object", pystr, end_idx); 1185 goto bail; 1186 } 1187 *next_idx_ptr = idx + 1; 1188 return rval; 1189 bail: 1190 Py_XDECREF(val); 1191 Py_DECREF(rval); 1192 return NULL; 1193 } 1194 1195 static PyObject * 1196 _parse_array_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { 1197 /* Read a JSON array from PyString pystr. 1198 idx is the index of the first character after the opening brace. 1199 *next_idx_ptr is a return-by-reference index to the first character after 1200 the closing brace. 1201 1202 Returns a new PyList 1203 */ 1204 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); 1205 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; 1206 PyObject *val = NULL; 1207 PyObject *rval = PyList_New(0); 1208 Py_ssize_t next_idx; 1209 if (rval == NULL) 1210 return NULL; 1211 1212 /* skip whitespace after [ */ 1213 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1214 1215 /* only loop if the array is non-empty */ 1216 if (idx <= end_idx && str[idx] != ']') { 1217 while (idx <= end_idx) { 1218 1219 /* read any JSON term */ 1220 val = scan_once_unicode(s, pystr, idx, &next_idx); 1221 if (val == NULL) 1222 goto bail; 1223 1224 if (PyList_Append(rval, val) == -1) 1225 goto bail; 1226 1227 Py_CLEAR(val); 1228 idx = next_idx; 1229 1230 /* skip whitespace between term and , */ 1231 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1232 1233 /* bail if the array is closed or we didn't get the , delimiter */ 1234 if (idx > end_idx) break; 1235 if (str[idx] == ']') { 1236 break; 1237 } 1238 else if (str[idx] != ',') { 1239 raise_errmsg("Expecting ',' delimiter", pystr, idx); 1240 goto bail; 1241 } 1242 idx++; 1243 1244 /* skip whitespace after , */ 1245 while (idx <= end_idx && IS_WHITESPACE(str[idx])) idx++; 1246 } 1247 } 1248 1249 /* verify that idx < end_idx, str[idx] should be ']' */ 1250 if (idx > end_idx || str[idx] != ']') { 1251 raise_errmsg("Expecting object", pystr, end_idx); 1252 goto bail; 1253 } 1254 *next_idx_ptr = idx + 1; 1255 return rval; 1256 bail: 1257 Py_XDECREF(val); 1258 Py_DECREF(rval); 1259 return NULL; 1260 } 1261 1262 static PyObject * 1263 _parse_constant(PyScannerObject *s, char *constant, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) { 1264 /* Read a JSON constant from PyString pystr. 1265 constant is the constant string that was found 1266 ("NaN", "Infinity", "-Infinity"). 1267 idx is the index of the first character of the constant 1268 *next_idx_ptr is a return-by-reference index to the first character after 1269 the constant. 1270 1271 Returns the result of parse_constant 1272 */ 1273 PyObject *cstr; 1274 PyObject *rval; 1275 /* constant is "NaN", "Infinity", or "-Infinity" */ 1276 cstr = PyString_InternFromString(constant); 1277 if (cstr == NULL) 1278 return NULL; 1279 1280 /* rval = parse_constant(constant) */ 1281 rval = PyObject_CallFunctionObjArgs(s->parse_constant, cstr, NULL); 1282 idx += PyString_GET_SIZE(cstr); 1283 Py_DECREF(cstr); 1284 *next_idx_ptr = idx; 1285 return rval; 1286 } 1287 1288 static PyObject * 1289 _match_number_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { 1290 /* Read a JSON number from PyString pystr. 1291 idx is the index of the first character of the number 1292 *next_idx_ptr is a return-by-reference index to the first character after 1293 the number. 1294 1295 Returns a new PyObject representation of that number: 1296 PyInt, PyLong, or PyFloat. 1297 May return other types if parse_int or parse_float are set 1298 */ 1299 char *str = PyString_AS_STRING(pystr); 1300 Py_ssize_t end_idx = PyString_GET_SIZE(pystr) - 1; 1301 Py_ssize_t idx = start; 1302 int is_float = 0; 1303 PyObject *rval; 1304 PyObject *numstr; 1305 1306 /* read a sign if it's there, make sure it's not the end of the string */ 1307 if (str[idx] == '-') { 1308 idx++; 1309 if (idx > end_idx) { 1310 PyErr_SetNone(PyExc_StopIteration); 1311 return NULL; 1312 } 1313 } 1314 1315 /* read as many integer digits as we find as long as it doesn't start with 0 */ 1316 if (str[idx] >= '1' && str[idx] <= '9') { 1317 idx++; 1318 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; 1319 } 1320 /* if it starts with 0 we only expect one integer digit */ 1321 else if (str[idx] == '0') { 1322 idx++; 1323 } 1324 /* no integer digits, error */ 1325 else { 1326 PyErr_SetNone(PyExc_StopIteration); 1327 return NULL; 1328 } 1329 1330 /* if the next char is '.' followed by a digit then read all float digits */ 1331 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { 1332 is_float = 1; 1333 idx += 2; 1334 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; 1335 } 1336 1337 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ 1338 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { 1339 1340 /* save the index of the 'e' or 'E' just in case we need to backtrack */ 1341 Py_ssize_t e_start = idx; 1342 idx++; 1343 1344 /* read an exponent sign if present */ 1345 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; 1346 1347 /* read all digits */ 1348 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; 1349 1350 /* if we got a digit, then parse as float. if not, backtrack */ 1351 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { 1352 is_float = 1; 1353 } 1354 else { 1355 idx = e_start; 1356 } 1357 } 1358 1359 /* copy the section we determined to be a number */ 1360 numstr = PyString_FromStringAndSize(&str[start], idx - start); 1361 if (numstr == NULL) 1362 return NULL; 1363 if (is_float) { 1364 /* parse as a float using a fast path if available, otherwise call user defined method */ 1365 if (s->parse_float != (PyObject *)&PyFloat_Type) { 1366 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); 1367 } 1368 else { 1369 double d = PyOS_string_to_double(PyString_AS_STRING(numstr), 1370 NULL, NULL); 1371 if (d == -1.0 && PyErr_Occurred()) 1372 return NULL; 1373 rval = PyFloat_FromDouble(d); 1374 } 1375 } 1376 else { 1377 /* parse as an int using a fast path if available, otherwise call user defined method */ 1378 if (s->parse_int != (PyObject *)&PyInt_Type) { 1379 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); 1380 } 1381 else { 1382 rval = PyInt_FromString(PyString_AS_STRING(numstr), NULL, 10); 1383 } 1384 } 1385 Py_DECREF(numstr); 1386 *next_idx_ptr = idx; 1387 return rval; 1388 } 1389 1390 static PyObject * 1391 _match_number_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t start, Py_ssize_t *next_idx_ptr) { 1392 /* Read a JSON number from PyUnicode pystr. 1393 idx is the index of the first character of the number 1394 *next_idx_ptr is a return-by-reference index to the first character after 1395 the number. 1396 1397 Returns a new PyObject representation of that number: 1398 PyInt, PyLong, or PyFloat. 1399 May return other types if parse_int or parse_float are set 1400 */ 1401 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); 1402 Py_ssize_t end_idx = PyUnicode_GET_SIZE(pystr) - 1; 1403 Py_ssize_t idx = start; 1404 int is_float = 0; 1405 PyObject *rval; 1406 PyObject *numstr; 1407 1408 /* read a sign if it's there, make sure it's not the end of the string */ 1409 if (str[idx] == '-') { 1410 idx++; 1411 if (idx > end_idx) { 1412 PyErr_SetNone(PyExc_StopIteration); 1413 return NULL; 1414 } 1415 } 1416 1417 /* read as many integer digits as we find as long as it doesn't start with 0 */ 1418 if (str[idx] >= '1' && str[idx] <= '9') { 1419 idx++; 1420 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; 1421 } 1422 /* if it starts with 0 we only expect one integer digit */ 1423 else if (str[idx] == '0') { 1424 idx++; 1425 } 1426 /* no integer digits, error */ 1427 else { 1428 PyErr_SetNone(PyExc_StopIteration); 1429 return NULL; 1430 } 1431 1432 /* if the next char is '.' followed by a digit then read all float digits */ 1433 if (idx < end_idx && str[idx] == '.' && str[idx + 1] >= '0' && str[idx + 1] <= '9') { 1434 is_float = 1; 1435 idx += 2; 1436 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; 1437 } 1438 1439 /* if the next char is 'e' or 'E' then maybe read the exponent (or backtrack) */ 1440 if (idx < end_idx && (str[idx] == 'e' || str[idx] == 'E')) { 1441 Py_ssize_t e_start = idx; 1442 idx++; 1443 1444 /* read an exponent sign if present */ 1445 if (idx < end_idx && (str[idx] == '-' || str[idx] == '+')) idx++; 1446 1447 /* read all digits */ 1448 while (idx <= end_idx && str[idx] >= '0' && str[idx] <= '9') idx++; 1449 1450 /* if we got a digit, then parse as float. if not, backtrack */ 1451 if (str[idx - 1] >= '0' && str[idx - 1] <= '9') { 1452 is_float = 1; 1453 } 1454 else { 1455 idx = e_start; 1456 } 1457 } 1458 1459 /* copy the section we determined to be a number */ 1460 numstr = PyUnicode_FromUnicode(&str[start], idx - start); 1461 if (numstr == NULL) 1462 return NULL; 1463 if (is_float) { 1464 /* parse as a float using a fast path if available, otherwise call user defined method */ 1465 if (s->parse_float != (PyObject *)&PyFloat_Type) { 1466 rval = PyObject_CallFunctionObjArgs(s->parse_float, numstr, NULL); 1467 } 1468 else { 1469 rval = PyFloat_FromString(numstr, NULL); 1470 } 1471 } 1472 else { 1473 /* no fast path for unicode -> int, just call */ 1474 rval = PyObject_CallFunctionObjArgs(s->parse_int, numstr, NULL); 1475 } 1476 Py_DECREF(numstr); 1477 *next_idx_ptr = idx; 1478 return rval; 1479 } 1480 1481 static PyObject * 1482 scan_once_str(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) 1483 { 1484 /* Read one JSON term (of any kind) from PyString pystr. 1485 idx is the index of the first character of the term 1486 *next_idx_ptr is a return-by-reference index to the first character after 1487 the number. 1488 1489 Returns a new PyObject representation of the term. 1490 */ 1491 PyObject *res; 1492 char *str = PyString_AS_STRING(pystr); 1493 Py_ssize_t length = PyString_GET_SIZE(pystr); 1494 if (idx >= length) { 1495 PyErr_SetNone(PyExc_StopIteration); 1496 return NULL; 1497 } 1498 switch (str[idx]) { 1499 case '"': 1500 /* string */ 1501 return scanstring_str(pystr, idx + 1, 1502 PyString_AS_STRING(s->encoding), 1503 PyObject_IsTrue(s->strict), 1504 next_idx_ptr); 1505 case '{': 1506 /* object */ 1507 if (Py_EnterRecursiveCall(" while decoding a JSON object " 1508 "from a byte string")) 1509 return NULL; 1510 res = _parse_object_str(s, pystr, idx + 1, next_idx_ptr); 1511 Py_LeaveRecursiveCall(); 1512 return res; 1513 case '[': 1514 /* array */ 1515 if (Py_EnterRecursiveCall(" while decoding a JSON array " 1516 "from a byte string")) 1517 return NULL; 1518 res = _parse_array_str(s, pystr, idx + 1, next_idx_ptr); 1519 Py_LeaveRecursiveCall(); 1520 return res; 1521 case 'n': 1522 /* null */ 1523 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { 1524 Py_INCREF(Py_None); 1525 *next_idx_ptr = idx + 4; 1526 return Py_None; 1527 } 1528 break; 1529 case 't': 1530 /* true */ 1531 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { 1532 Py_INCREF(Py_True); 1533 *next_idx_ptr = idx + 4; 1534 return Py_True; 1535 } 1536 break; 1537 case 'f': 1538 /* false */ 1539 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { 1540 Py_INCREF(Py_False); 1541 *next_idx_ptr = idx + 5; 1542 return Py_False; 1543 } 1544 break; 1545 case 'N': 1546 /* NaN */ 1547 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { 1548 return _parse_constant(s, "NaN", idx, next_idx_ptr); 1549 } 1550 break; 1551 case 'I': 1552 /* Infinity */ 1553 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { 1554 return _parse_constant(s, "Infinity", idx, next_idx_ptr); 1555 } 1556 break; 1557 case '-': 1558 /* -Infinity */ 1559 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { 1560 return _parse_constant(s, "-Infinity", idx, next_idx_ptr); 1561 } 1562 break; 1563 } 1564 /* Didn't find a string, object, array, or named constant. Look for a number. */ 1565 return _match_number_str(s, pystr, idx, next_idx_ptr); 1566 } 1567 1568 static PyObject * 1569 scan_once_unicode(PyScannerObject *s, PyObject *pystr, Py_ssize_t idx, Py_ssize_t *next_idx_ptr) 1570 { 1571 /* Read one JSON term (of any kind) from PyUnicode pystr. 1572 idx is the index of the first character of the term 1573 *next_idx_ptr is a return-by-reference index to the first character after 1574 the number. 1575 1576 Returns a new PyObject representation of the term. 1577 */ 1578 PyObject *res; 1579 Py_UNICODE *str = PyUnicode_AS_UNICODE(pystr); 1580 Py_ssize_t length = PyUnicode_GET_SIZE(pystr); 1581 if (idx >= length) { 1582 PyErr_SetNone(PyExc_StopIteration); 1583 return NULL; 1584 } 1585 switch (str[idx]) { 1586 case '"': 1587 /* string */ 1588 return scanstring_unicode(pystr, idx + 1, 1589 PyObject_IsTrue(s->strict), 1590 next_idx_ptr); 1591 case '{': 1592 /* object */ 1593 if (Py_EnterRecursiveCall(" while decoding a JSON object " 1594 "from a unicode string")) 1595 return NULL; 1596 res = _parse_object_unicode(s, pystr, idx + 1, next_idx_ptr); 1597 Py_LeaveRecursiveCall(); 1598 return res; 1599 case '[': 1600 /* array */ 1601 if (Py_EnterRecursiveCall(" while decoding a JSON array " 1602 "from a unicode string")) 1603 return NULL; 1604 res = _parse_array_unicode(s, pystr, idx + 1, next_idx_ptr); 1605 Py_LeaveRecursiveCall(); 1606 return res; 1607 case 'n': 1608 /* null */ 1609 if ((idx + 3 < length) && str[idx + 1] == 'u' && str[idx + 2] == 'l' && str[idx + 3] == 'l') { 1610 Py_INCREF(Py_None); 1611 *next_idx_ptr = idx + 4; 1612 return Py_None; 1613 } 1614 break; 1615 case 't': 1616 /* true */ 1617 if ((idx + 3 < length) && str[idx + 1] == 'r' && str[idx + 2] == 'u' && str[idx + 3] == 'e') { 1618 Py_INCREF(Py_True); 1619 *next_idx_ptr = idx + 4; 1620 return Py_True; 1621 } 1622 break; 1623 case 'f': 1624 /* false */ 1625 if ((idx + 4 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'l' && str[idx + 3] == 's' && str[idx + 4] == 'e') { 1626 Py_INCREF(Py_False); 1627 *next_idx_ptr = idx + 5; 1628 return Py_False; 1629 } 1630 break; 1631 case 'N': 1632 /* NaN */ 1633 if ((idx + 2 < length) && str[idx + 1] == 'a' && str[idx + 2] == 'N') { 1634 return _parse_constant(s, "NaN", idx, next_idx_ptr); 1635 } 1636 break; 1637 case 'I': 1638 /* Infinity */ 1639 if ((idx + 7 < length) && str[idx + 1] == 'n' && str[idx + 2] == 'f' && str[idx + 3] == 'i' && str[idx + 4] == 'n' && str[idx + 5] == 'i' && str[idx + 6] == 't' && str[idx + 7] == 'y') { 1640 return _parse_constant(s, "Infinity", idx, next_idx_ptr); 1641 } 1642 break; 1643 case '-': 1644 /* -Infinity */ 1645 if ((idx + 8 < length) && str[idx + 1] == 'I' && str[idx + 2] == 'n' && str[idx + 3] == 'f' && str[idx + 4] == 'i' && str[idx + 5] == 'n' && str[idx + 6] == 'i' && str[idx + 7] == 't' && str[idx + 8] == 'y') { 1646 return _parse_constant(s, "-Infinity", idx, next_idx_ptr); 1647 } 1648 break; 1649 } 1650 /* Didn't find a string, object, array, or named constant. Look for a number. */ 1651 return _match_number_unicode(s, pystr, idx, next_idx_ptr); 1652 } 1653 1654 static PyObject * 1655 scanner_call(PyObject *self, PyObject *args, PyObject *kwds) 1656 { 1657 /* Python callable interface to scan_once_{str,unicode} */ 1658 PyObject *pystr; 1659 PyObject *rval; 1660 Py_ssize_t idx; 1661 Py_ssize_t next_idx = -1; 1662 static char *kwlist[] = {"string", "idx", NULL}; 1663 PyScannerObject *s; 1664 assert(PyScanner_Check(self)); 1665 s = (PyScannerObject *)self; 1666 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:scan_once", kwlist, &pystr, _convertPyInt_AsSsize_t, &idx)) 1667 return NULL; 1668 1669 if (PyString_Check(pystr)) { 1670 rval = scan_once_str(s, pystr, idx, &next_idx); 1671 } 1672 else if (PyUnicode_Check(pystr)) { 1673 rval = scan_once_unicode(s, pystr, idx, &next_idx); 1674 } 1675 else { 1676 PyErr_Format(PyExc_TypeError, 1677 "first argument must be a string, not %.80s", 1678 Py_TYPE(pystr)->tp_name); 1679 return NULL; 1680 } 1681 return _build_rval_index_tuple(rval, next_idx); 1682 } 1683 1684 static PyObject * 1685 scanner_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1686 { 1687 PyScannerObject *s; 1688 s = (PyScannerObject *)type->tp_alloc(type, 0); 1689 if (s != NULL) { 1690 s->encoding = NULL; 1691 s->strict = NULL; 1692 s->object_hook = NULL; 1693 s->pairs_hook = NULL; 1694 s->parse_float = NULL; 1695 s->parse_int = NULL; 1696 s->parse_constant = NULL; 1697 } 1698 return (PyObject *)s; 1699 } 1700 1701 static int 1702 scanner_init(PyObject *self, PyObject *args, PyObject *kwds) 1703 { 1704 /* Initialize Scanner object */ 1705 PyObject *ctx; 1706 static char *kwlist[] = {"context", NULL}; 1707 PyScannerObject *s; 1708 1709 assert(PyScanner_Check(self)); 1710 s = (PyScannerObject *)self; 1711 1712 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O:make_scanner", kwlist, &ctx)) 1713 return -1; 1714 1715 /* PyString_AS_STRING is used on encoding */ 1716 s->encoding = PyObject_GetAttrString(ctx, "encoding"); 1717 if (s->encoding == NULL) 1718 goto bail; 1719 if (s->encoding == Py_None) { 1720 Py_DECREF(Py_None); 1721 s->encoding = PyString_InternFromString(DEFAULT_ENCODING); 1722 } 1723 else if (PyUnicode_Check(s->encoding)) { 1724 PyObject *tmp = PyUnicode_AsEncodedString(s->encoding, NULL, NULL); 1725 Py_DECREF(s->encoding); 1726 s->encoding = tmp; 1727 } 1728 if (s->encoding == NULL) 1729 goto bail; 1730 if (!PyString_Check(s->encoding)) { 1731 PyErr_Format(PyExc_TypeError, 1732 "encoding must be a string, not %.80s", 1733 Py_TYPE(s->encoding)->tp_name); 1734 goto bail; 1735 } 1736 1737 1738 /* All of these will fail "gracefully" so we don't need to verify them */ 1739 s->strict = PyObject_GetAttrString(ctx, "strict"); 1740 if (s->strict == NULL) 1741 goto bail; 1742 s->object_hook = PyObject_GetAttrString(ctx, "object_hook"); 1743 if (s->object_hook == NULL) 1744 goto bail; 1745 s->pairs_hook = PyObject_GetAttrString(ctx, "object_pairs_hook"); 1746 if (s->pairs_hook == NULL) 1747 goto bail; 1748 s->parse_float = PyObject_GetAttrString(ctx, "parse_float"); 1749 if (s->parse_float == NULL) 1750 goto bail; 1751 s->parse_int = PyObject_GetAttrString(ctx, "parse_int"); 1752 if (s->parse_int == NULL) 1753 goto bail; 1754 s->parse_constant = PyObject_GetAttrString(ctx, "parse_constant"); 1755 if (s->parse_constant == NULL) 1756 goto bail; 1757 1758 return 0; 1759 1760 bail: 1761 Py_CLEAR(s->encoding); 1762 Py_CLEAR(s->strict); 1763 Py_CLEAR(s->object_hook); 1764 Py_CLEAR(s->pairs_hook); 1765 Py_CLEAR(s->parse_float); 1766 Py_CLEAR(s->parse_int); 1767 Py_CLEAR(s->parse_constant); 1768 return -1; 1769 } 1770 1771 PyDoc_STRVAR(scanner_doc, "JSON scanner object"); 1772 1773 static 1774 PyTypeObject PyScannerType = { 1775 PyObject_HEAD_INIT(NULL) 1776 0, /* tp_internal */ 1777 "_json.Scanner", /* tp_name */ 1778 sizeof(PyScannerObject), /* tp_basicsize */ 1779 0, /* tp_itemsize */ 1780 scanner_dealloc, /* tp_dealloc */ 1781 0, /* tp_print */ 1782 0, /* tp_getattr */ 1783 0, /* tp_setattr */ 1784 0, /* tp_compare */ 1785 0, /* tp_repr */ 1786 0, /* tp_as_number */ 1787 0, /* tp_as_sequence */ 1788 0, /* tp_as_mapping */ 1789 0, /* tp_hash */ 1790 scanner_call, /* tp_call */ 1791 0, /* tp_str */ 1792 0,/* PyObject_GenericGetAttr, */ /* tp_getattro */ 1793 0,/* PyObject_GenericSetAttr, */ /* tp_setattro */ 1794 0, /* tp_as_buffer */ 1795 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 1796 scanner_doc, /* tp_doc */ 1797 scanner_traverse, /* tp_traverse */ 1798 scanner_clear, /* tp_clear */ 1799 0, /* tp_richcompare */ 1800 0, /* tp_weaklistoffset */ 1801 0, /* tp_iter */ 1802 0, /* tp_iternext */ 1803 0, /* tp_methods */ 1804 scanner_members, /* tp_members */ 1805 0, /* tp_getset */ 1806 0, /* tp_base */ 1807 0, /* tp_dict */ 1808 0, /* tp_descr_get */ 1809 0, /* tp_descr_set */ 1810 0, /* tp_dictoffset */ 1811 scanner_init, /* tp_init */ 1812 0,/* PyType_GenericAlloc, */ /* tp_alloc */ 1813 scanner_new, /* tp_new */ 1814 0,/* PyObject_GC_Del, */ /* tp_free */ 1815 }; 1816 1817 static PyObject * 1818 encoder_new(PyTypeObject *type, PyObject *args, PyObject *kwds) 1819 { 1820 PyEncoderObject *s; 1821 s = (PyEncoderObject *)type->tp_alloc(type, 0); 1822 if (s != NULL) { 1823 s->markers = NULL; 1824 s->defaultfn = NULL; 1825 s->encoder = NULL; 1826 s->indent = NULL; 1827 s->key_separator = NULL; 1828 s->item_separator = NULL; 1829 s->sort_keys = NULL; 1830 s->skipkeys = NULL; 1831 } 1832 return (PyObject *)s; 1833 } 1834 1835 static int 1836 encoder_init(PyObject *self, PyObject *args, PyObject *kwds) 1837 { 1838 /* initialize Encoder object */ 1839 static char *kwlist[] = {"markers", "default", "encoder", "indent", "key_separator", "item_separator", "sort_keys", "skipkeys", "allow_nan", NULL}; 1840 1841 PyEncoderObject *s; 1842 PyObject *markers, *defaultfn, *encoder, *indent, *key_separator; 1843 PyObject *item_separator, *sort_keys, *skipkeys, *allow_nan; 1844 1845 assert(PyEncoder_Check(self)); 1846 s = (PyEncoderObject *)self; 1847 1848 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OOOOOOOOO:make_encoder", kwlist, 1849 &markers, &defaultfn, &encoder, &indent, &key_separator, &item_separator, 1850 &sort_keys, &skipkeys, &allow_nan)) 1851 return -1; 1852 1853 s->markers = markers; 1854 s->defaultfn = defaultfn; 1855 s->encoder = encoder; 1856 s->indent = indent; 1857 s->key_separator = key_separator; 1858 s->item_separator = item_separator; 1859 s->sort_keys = sort_keys; 1860 s->skipkeys = skipkeys; 1861 s->fast_encode = (PyCFunction_Check(s->encoder) && PyCFunction_GetFunction(s->encoder) == (PyCFunction)py_encode_basestring_ascii); 1862 s->allow_nan = PyObject_IsTrue(allow_nan); 1863 1864 Py_INCREF(s->markers); 1865 Py_INCREF(s->defaultfn); 1866 Py_INCREF(s->encoder); 1867 Py_INCREF(s->indent); 1868 Py_INCREF(s->key_separator); 1869 Py_INCREF(s->item_separator); 1870 Py_INCREF(s->sort_keys); 1871 Py_INCREF(s->skipkeys); 1872 return 0; 1873 } 1874 1875 static PyObject * 1876 encoder_call(PyObject *self, PyObject *args, PyObject *kwds) 1877 { 1878 /* Python callable interface to encode_listencode_obj */ 1879 static char *kwlist[] = {"obj", "_current_indent_level", NULL}; 1880 PyObject *obj; 1881 PyObject *rval; 1882 Py_ssize_t indent_level; 1883 PyEncoderObject *s; 1884 assert(PyEncoder_Check(self)); 1885 s = (PyEncoderObject *)self; 1886 if (!PyArg_ParseTupleAndKeywords(args, kwds, "OO&:_iterencode", kwlist, 1887 &obj, _convertPyInt_AsSsize_t, &indent_level)) 1888 return NULL; 1889 rval = PyList_New(0); 1890 if (rval == NULL) 1891 return NULL; 1892 if (encoder_listencode_obj(s, rval, obj, indent_level)) { 1893 Py_DECREF(rval); 1894 return NULL; 1895 } 1896 return rval; 1897 } 1898 1899 static PyObject * 1900 _encoded_const(PyObject *obj) 1901 { 1902 /* Return the JSON string representation of None, True, False */ 1903 if (obj == Py_None) { 1904 static PyObject *s_null = NULL; 1905 if (s_null == NULL) { 1906 s_null = PyString_InternFromString("null"); 1907 } 1908 Py_INCREF(s_null); 1909 return s_null; 1910 } 1911 else if (obj == Py_True) { 1912 static PyObject *s_true = NULL; 1913 if (s_true == NULL) { 1914 s_true = PyString_InternFromString("true"); 1915 } 1916 Py_INCREF(s_true); 1917 return s_true; 1918 } 1919 else if (obj == Py_False) { 1920 static PyObject *s_false = NULL; 1921 if (s_false == NULL) { 1922 s_false = PyString_InternFromString("false"); 1923 } 1924 Py_INCREF(s_false); 1925 return s_false; 1926 } 1927 else { 1928 PyErr_SetString(PyExc_ValueError, "not a const"); 1929 return NULL; 1930 } 1931 } 1932 1933 static PyObject * 1934 encoder_encode_float(PyEncoderObject *s, PyObject *obj) 1935 { 1936 /* Return the JSON representation of a PyFloat */ 1937 double i = PyFloat_AS_DOUBLE(obj); 1938 if (!Py_IS_FINITE(i)) { 1939 if (!s->allow_nan) { 1940 PyErr_SetString(PyExc_ValueError, "Out of range float values are not JSON compliant"); 1941 return NULL; 1942 } 1943 if (i > 0) { 1944 return PyString_FromString("Infinity"); 1945 } 1946 else if (i < 0) { 1947 return PyString_FromString("-Infinity"); 1948 } 1949 else { 1950 return PyString_FromString("NaN"); 1951 } 1952 } 1953 /* Use a better float format here? */ 1954 return PyObject_Repr(obj); 1955 } 1956 1957 static PyObject * 1958 encoder_encode_string(PyEncoderObject *s, PyObject *obj) 1959 { 1960 /* Return the JSON representation of a string */ 1961 if (s->fast_encode) 1962 return py_encode_basestring_ascii(NULL, obj); 1963 else 1964 return PyObject_CallFunctionObjArgs(s->encoder, obj, NULL); 1965 } 1966 1967 static int 1968 _steal_list_append(PyObject *lst, PyObject *stolen) 1969 { 1970 /* Append stolen and then decrement its reference count */ 1971 int rval = PyList_Append(lst, stolen); 1972 Py_DECREF(stolen); 1973 return rval; 1974 } 1975 1976 static int 1977 encoder_listencode_obj(PyEncoderObject *s, PyObject *rval, PyObject *obj, Py_ssize_t indent_level) 1978 { 1979 /* Encode Python object obj to a JSON term, rval is a PyList */ 1980 PyObject *newobj; 1981 int rv; 1982 1983 if (obj == Py_None || obj == Py_True || obj == Py_False) { 1984 PyObject *cstr = _encoded_const(obj); 1985 if (cstr == NULL) 1986 return -1; 1987 return _steal_list_append(rval, cstr); 1988 } 1989 else if (PyString_Check(obj) || PyUnicode_Check(obj)) 1990 { 1991 PyObject *encoded = encoder_encode_string(s, obj); 1992 if (encoded == NULL) 1993 return -1; 1994 return _steal_list_append(rval, encoded); 1995 } 1996 else if (PyInt_Check(obj) || PyLong_Check(obj)) { 1997 PyObject *encoded = PyObject_Str(obj); 1998 if (encoded == NULL) 1999 return -1; 2000 return _steal_list_append(rval, encoded); 2001 } 2002 else if (PyFloat_Check(obj)) { 2003 PyObject *encoded = encoder_encode_float(s, obj); 2004 if (encoded == NULL) 2005 return -1; 2006 return _steal_list_append(rval, encoded); 2007 } 2008 else if (PyList_Check(obj) || PyTuple_Check(obj)) { 2009 if (Py_EnterRecursiveCall(" while encoding a JSON object")) 2010 return -1; 2011 rv = encoder_listencode_list(s, rval, obj, indent_level); 2012 Py_LeaveRecursiveCall(); 2013 return rv; 2014 } 2015 else if (PyDict_Check(obj)) { 2016 if (Py_EnterRecursiveCall(" while encoding a JSON object")) 2017 return -1; 2018 rv = encoder_listencode_dict(s, rval, obj, indent_level); 2019 Py_LeaveRecursiveCall(); 2020 return rv; 2021 } 2022 else { 2023 PyObject *ident = NULL; 2024 if (s->markers != Py_None) { 2025 int has_key; 2026 ident = PyLong_FromVoidPtr(obj); 2027 if (ident == NULL) 2028 return -1; 2029 has_key = PyDict_Contains(s->markers, ident); 2030 if (has_key) { 2031 if (has_key != -1) 2032 PyErr_SetString(PyExc_ValueError, "Circular reference detected"); 2033 Py_DECREF(ident); 2034 return -1; 2035 } 2036 if (PyDict_SetItem(s->markers, ident, obj)) { 2037 Py_DECREF(ident); 2038 return -1; 2039 } 2040 } 2041 newobj = PyObject_CallFunctionObjArgs(s->defaultfn, obj, NULL); 2042 if (newobj == NULL) { 2043 Py_XDECREF(ident); 2044 return -1; 2045 } 2046 2047 if (Py_EnterRecursiveCall(" while encoding a JSON object")) 2048 return -1; 2049 rv = encoder_listencode_obj(s, rval, newobj, indent_level); 2050 Py_LeaveRecursiveCall(); 2051 2052 Py_DECREF(newobj); 2053 if (rv) { 2054 Py_XDECREF(ident); 2055 return -1; 2056 } 2057 if (ident != NULL) { 2058 if (PyDict_DelItem(s->markers, ident)) { 2059 Py_XDECREF(ident); 2060 return -1; 2061 } 2062 Py_XDECREF(ident); 2063 } 2064 return rv; 2065 } 2066 } 2067 2068 static int 2069 encoder_listencode_dict(PyEncoderObject *s, PyObject *rval, PyObject *dct, Py_ssize_t indent_level) 2070 { 2071 /* Encode Python dict dct a JSON term, rval is a PyList */ 2072 static PyObject *open_dict = NULL; 2073 static PyObject *close_dict = NULL; 2074 static PyObject *empty_dict = NULL; 2075 PyObject *kstr = NULL; 2076 PyObject *ident = NULL; 2077 PyObject *key = NULL; 2078 PyObject *value = NULL; 2079 PyObject *it = NULL; 2080 int skipkeys; 2081 Py_ssize_t idx; 2082 2083 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) { 2084 open_dict = PyString_InternFromString("{"); 2085 close_dict = PyString_InternFromString("}"); 2086 empty_dict = PyString_InternFromString("{}"); 2087 if (open_dict == NULL || close_dict == NULL || empty_dict == NULL) 2088 return -1; 2089 } 2090 if (Py_SIZE(dct) == 0) 2091 return PyList_Append(rval, empty_dict); 2092 2093 if (s->markers != Py_None) { 2094 int has_key; 2095 ident = PyLong_FromVoidPtr(dct); 2096 if (ident == NULL) 2097 goto bail; 2098 has_key = PyDict_Contains(s->markers, ident); 2099 if (has_key) { 2100 if (has_key != -1) 2101 PyErr_SetString(PyExc_ValueError, "Circular reference detected"); 2102 goto bail; 2103 } 2104 if (PyDict_SetItem(s->markers, ident, dct)) { 2105 goto bail; 2106 } 2107 } 2108 2109 if (PyList_Append(rval, open_dict)) 2110 goto bail; 2111 2112 if (s->indent != Py_None) { 2113 /* TODO: DOES NOT RUN */ 2114 indent_level += 1; 2115 /* 2116 newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) 2117 separator = _item_separator + newline_indent 2118 buf += newline_indent 2119 */ 2120 } 2121 2122 /* TODO: C speedup not implemented for sort_keys */ 2123 2124 it = PyObject_GetIter(dct); 2125 if (it == NULL) 2126 goto bail; 2127 skipkeys = PyObject_IsTrue(s->skipkeys); 2128 idx = 0; 2129 while ((key = PyIter_Next(it)) != NULL) { 2130 PyObject *encoded; 2131 2132 if (PyString_Check(key) || PyUnicode_Check(key)) { 2133 Py_INCREF(key); 2134 kstr = key; 2135 } 2136 else if (PyFloat_Check(key)) { 2137 kstr = encoder_encode_float(s, key); 2138 if (kstr == NULL) 2139 goto bail; 2140 } 2141 else if (PyInt_Check(key) || PyLong_Check(key)) { 2142 kstr = PyObject_Str(key); 2143 if (kstr == NULL) 2144 goto bail; 2145 } 2146 else if (key == Py_True || key == Py_False || key == Py_None) { 2147 kstr = _encoded_const(key); 2148 if (kstr == NULL) 2149 goto bail; 2150 } 2151 else if (skipkeys) { 2152 Py_DECREF(key); 2153 continue; 2154 } 2155 else { 2156 /* TODO: include repr of key */ 2157 PyErr_SetString(PyExc_TypeError, "keys must be a string"); 2158 goto bail; 2159 } 2160 2161 if (idx) { 2162 if (PyList_Append(rval, s->item_separator)) 2163 goto bail; 2164 } 2165 2166 value = PyObject_GetItem(dct, key); 2167 if (value == NULL) 2168 goto bail; 2169 2170 encoded = encoder_encode_string(s, kstr); 2171 Py_CLEAR(kstr); 2172 if (encoded == NULL) 2173 goto bail; 2174 if (PyList_Append(rval, encoded)) { 2175 Py_DECREF(encoded); 2176 goto bail; 2177 } 2178 Py_DECREF(encoded); 2179 if (PyList_Append(rval, s->key_separator)) 2180 goto bail; 2181 if (encoder_listencode_obj(s, rval, value, indent_level)) 2182 goto bail; 2183 idx += 1; 2184 Py_CLEAR(value); 2185 Py_DECREF(key); 2186 } 2187 if (PyErr_Occurred()) 2188 goto bail; 2189 Py_CLEAR(it); 2190 2191 if (ident != NULL) { 2192 if (PyDict_DelItem(s->markers, ident)) 2193 goto bail; 2194 Py_CLEAR(ident); 2195 } 2196 if (s->indent != Py_None) { 2197 /* TODO: DOES NOT RUN */ 2198 /* 2199 indent_level -= 1; 2200 2201 yield '\n' + (' ' * (_indent * _current_indent_level)) 2202 */ 2203 } 2204 if (PyList_Append(rval, close_dict)) 2205 goto bail; 2206 return 0; 2207 2208 bail: 2209 Py_XDECREF(it); 2210 Py_XDECREF(key); 2211 Py_XDECREF(value); 2212 Py_XDECREF(kstr); 2213 Py_XDECREF(ident); 2214 return -1; 2215 } 2216 2217 2218 static int 2219 encoder_listencode_list(PyEncoderObject *s, PyObject *rval, PyObject *seq, Py_ssize_t indent_level) 2220 { 2221 /* Encode Python list seq to a JSON term, rval is a PyList */ 2222 static PyObject *open_array = NULL; 2223 static PyObject *close_array = NULL; 2224 static PyObject *empty_array = NULL; 2225 PyObject *ident = NULL; 2226 PyObject *s_fast = NULL; 2227 Py_ssize_t i; 2228 2229 if (open_array == NULL || close_array == NULL || empty_array == NULL) { 2230 open_array = PyString_InternFromString("["); 2231 close_array = PyString_InternFromString("]"); 2232 empty_array = PyString_InternFromString("[]"); 2233 if (open_array == NULL || close_array == NULL || empty_array == NULL) 2234 return -1; 2235 } 2236 ident = NULL; 2237 s_fast = PySequence_Fast(seq, "_iterencode_list needs a sequence"); 2238 if (s_fast == NULL) 2239 return -1; 2240 if (PySequence_Fast_GET_SIZE(s_fast) == 0) { 2241 Py_DECREF(s_fast); 2242 return PyList_Append(rval, empty_array); 2243 } 2244 2245 if (s->markers != Py_None) { 2246 int has_key; 2247 ident = PyLong_FromVoidPtr(seq); 2248 if (ident == NULL) 2249 goto bail; 2250 has_key = PyDict_Contains(s->markers, ident); 2251 if (has_key) { 2252 if (has_key != -1) 2253 PyErr_SetString(PyExc_ValueError, "Circular reference detected"); 2254 goto bail; 2255 } 2256 if (PyDict_SetItem(s->markers, ident, seq)) { 2257 goto bail; 2258 } 2259 } 2260 2261 if (PyList_Append(rval, open_array)) 2262 goto bail; 2263 if (s->indent != Py_None) { 2264 /* TODO: DOES NOT RUN */ 2265 indent_level += 1; 2266 /* 2267 newline_indent = '\n' + (' ' * (_indent * _current_indent_level)) 2268 separator = _item_separator + newline_indent 2269 buf += newline_indent 2270 */ 2271 } 2272 for (i = 0; i < PySequence_Fast_GET_SIZE(s_fast); i++) { 2273 PyObject *obj = PySequence_Fast_GET_ITEM(s_fast, i); 2274 if (i) { 2275 if (PyList_Append(rval, s->item_separator)) 2276 goto bail; 2277 } 2278 if (encoder_listencode_obj(s, rval, obj, indent_level)) 2279 goto bail; 2280 } 2281 if (ident != NULL) { 2282 if (PyDict_DelItem(s->markers, ident)) 2283 goto bail; 2284 Py_CLEAR(ident); 2285 } 2286 if (s->indent != Py_None) { 2287 /* TODO: DOES NOT RUN */ 2288 /* 2289 indent_level -= 1; 2290 2291 yield '\n' + (' ' * (_indent * _current_indent_level)) 2292 */ 2293 } 2294 if (PyList_Append(rval, close_array)) 2295 goto bail; 2296 Py_DECREF(s_fast); 2297 return 0; 2298 2299 bail: 2300 Py_XDECREF(ident); 2301 Py_DECREF(s_fast); 2302 return -1; 2303 } 2304 2305 static void 2306 encoder_dealloc(PyObject *self) 2307 { 2308 /* Deallocate Encoder */ 2309 encoder_clear(self); 2310 Py_TYPE(self)->tp_free(self); 2311 } 2312 2313 static int 2314 encoder_traverse(PyObject *self, visitproc visit, void *arg) 2315 { 2316 PyEncoderObject *s; 2317 assert(PyEncoder_Check(self)); 2318 s = (PyEncoderObject *)self; 2319 Py_VISIT(s->markers); 2320 Py_VISIT(s->defaultfn); 2321 Py_VISIT(s->encoder); 2322 Py_VISIT(s->indent); 2323 Py_VISIT(s->key_separator); 2324 Py_VISIT(s->item_separator); 2325 Py_VISIT(s->sort_keys); 2326 Py_VISIT(s->skipkeys); 2327 return 0; 2328 } 2329 2330 static int 2331 encoder_clear(PyObject *self) 2332 { 2333 /* Deallocate Encoder */ 2334 PyEncoderObject *s; 2335 assert(PyEncoder_Check(self)); 2336 s = (PyEncoderObject *)self; 2337 Py_CLEAR(s->markers); 2338 Py_CLEAR(s->defaultfn); 2339 Py_CLEAR(s->encoder); 2340 Py_CLEAR(s->indent); 2341 Py_CLEAR(s->key_separator); 2342 Py_CLEAR(s->item_separator); 2343 Py_CLEAR(s->sort_keys); 2344 Py_CLEAR(s->skipkeys); 2345 return 0; 2346 } 2347 2348 PyDoc_STRVAR(encoder_doc, "_iterencode(obj, _current_indent_level) -> iterable"); 2349 2350 static 2351 PyTypeObject PyEncoderType = { 2352 PyObject_HEAD_INIT(NULL) 2353 0, /* tp_internal */ 2354 "_json.Encoder", /* tp_name */ 2355 sizeof(PyEncoderObject), /* tp_basicsize */ 2356 0, /* tp_itemsize */ 2357 encoder_dealloc, /* tp_dealloc */ 2358 0, /* tp_print */ 2359 0, /* tp_getattr */ 2360 0, /* tp_setattr */ 2361 0, /* tp_compare */ 2362 0, /* tp_repr */ 2363 0, /* tp_as_number */ 2364 0, /* tp_as_sequence */ 2365 0, /* tp_as_mapping */ 2366 0, /* tp_hash */ 2367 encoder_call, /* tp_call */ 2368 0, /* tp_str */ 2369 0, /* tp_getattro */ 2370 0, /* tp_setattro */ 2371 0, /* tp_as_buffer */ 2372 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC, /* tp_flags */ 2373 encoder_doc, /* tp_doc */ 2374 encoder_traverse, /* tp_traverse */ 2375 encoder_clear, /* tp_clear */ 2376 0, /* tp_richcompare */ 2377 0, /* tp_weaklistoffset */ 2378 0, /* tp_iter */ 2379 0, /* tp_iternext */ 2380 0, /* tp_methods */ 2381 encoder_members, /* tp_members */ 2382 0, /* tp_getset */ 2383 0, /* tp_base */ 2384 0, /* tp_dict */ 2385 0, /* tp_descr_get */ 2386 0, /* tp_descr_set */ 2387 0, /* tp_dictoffset */ 2388 encoder_init, /* tp_init */ 2389 0, /* tp_alloc */ 2390 encoder_new, /* tp_new */ 2391 0, /* tp_free */ 2392 }; 2393 2394 static PyMethodDef speedups_methods[] = { 2395 {"encode_basestring_ascii", 2396 (PyCFunction)py_encode_basestring_ascii, 2397 METH_O, 2398 pydoc_encode_basestring_ascii}, 2399 {"scanstring", 2400 (PyCFunction)py_scanstring, 2401 METH_VARARGS, 2402 pydoc_scanstring}, 610 2403 {NULL, NULL, 0, NULL} 611 2404 }; … … 618 2411 { 619 2412 PyObject *m; 620 m = Py_InitModule3("_json", json_methods, module_doc); 621 } 2413 PyScannerType.tp_new = PyType_GenericNew; 2414 if (PyType_Ready(&PyScannerType) < 0) 2415 return; 2416 PyEncoderType.tp_new = PyType_GenericNew; 2417 if (PyType_Ready(&PyEncoderType) < 0) 2418 return; 2419 m = Py_InitModule3("_json", speedups_methods, module_doc); 2420 Py_INCREF((PyObject*)&PyScannerType); 2421 PyModule_AddObject(m, "make_scanner", (PyObject*)&PyScannerType); 2422 Py_INCREF((PyObject*)&PyEncoderType); 2423 PyModule_AddObject(m, "make_encoder", (PyObject*)&PyEncoderType); 2424 }
Note:
See TracChangeset
for help on using the changeset viewer.