Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

_json.c@ 389

Last change on this file since 389 was 2, checked in by Yuri Dario, 15 years ago
Initial import for vendor code.
Property svn:eol-style set to `native`
File size: 19.7 KB

Line
1	#include "Python.h"
2
3	#define DEFAULT_ENCODING "utf-8"
4	#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '"')
5	#define MIN_EXPANSION 6
6
7	#ifdef Py_UNICODE_WIDE
8	#define MAX_EXPANSION (2 * MIN_EXPANSION)
9	#else
10	#define MAX_EXPANSION MIN_EXPANSION
11	#endif
12
13	static Py_ssize_t
14	ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars)
15	{
16	Py_UNICODE x;
17	output[chars++] = '\\';
18	switch (c) {
19	case '\\': output[chars++] = (char)c; break;
20	case '"': output[chars++] = (char)c; break;
21	case '\b': output[chars++] = 'b'; break;
22	case '\f': output[chars++] = 'f'; break;
23	case '\n': output[chars++] = 'n'; break;
24	case '\r': output[chars++] = 'r'; break;
25	case '\t': output[chars++] = 't'; break;
26	default:
27	#ifdef Py_UNICODE_WIDE
28	if (c >= 0x10000) {
29	/* UTF-16 surrogate pair */
30	Py_UNICODE v = c - 0x10000;
31	c = 0xd800 \| ((v >> 10) & 0x3ff);
32	output[chars++] = 'u';
33	x = (c & 0xf000) >> 12;
34	output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
35	x = (c & 0x0f00) >> 8;
36	output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
37	x = (c & 0x00f0) >> 4;
38	output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
39	x = (c & 0x000f);
40	output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
41	c = 0xdc00 \| (v & 0x3ff);
42	output[chars++] = '\\';
43	}
44	#endif
45	output[chars++] = 'u';
46	x = (c & 0xf000) >> 12;
47	output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
48	x = (c & 0x0f00) >> 8;
49	output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
50	x = (c & 0x00f0) >> 4;
51	output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
52	x = (c & 0x000f);
53	output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
54	}
55	return chars;
56	}
57
58	static PyObject *
59	ascii_escape_unicode(PyObject *pystr)
60	{
61	Py_ssize_t i;
62	Py_ssize_t input_chars;
63	Py_ssize_t output_size;
64	Py_ssize_t chars;
65	PyObject *rval;
66	char *output;
67	Py_UNICODE *input_unicode;
68
69	input_chars = PyUnicode_GET_SIZE(pystr);
70	input_unicode = PyUnicode_AS_UNICODE(pystr);
71	/* One char input can be up to 6 chars output, estimate 4 of these */
72	output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
73	rval = PyString_FromStringAndSize(NULL, output_size);
74	if (rval == NULL) {
75	return NULL;
76	}
77	output = PyString_AS_STRING(rval);
78	chars = 0;
79	output[chars++] = '"';
80	for (i = 0; i < input_chars; i++) {
81	Py_UNICODE c = input_unicode[i];
82	if (S_CHAR(c)) {
83	output[chars++] = (char)c;
84	}
85	else {
86	chars = ascii_escape_char(c, output, chars);
87	}
88	if (output_size - chars < (1 + MAX_EXPANSION)) {
89	/* There's more than four, so let's resize by a lot */
90	output_size *= 2;
91	/* This is an upper bound */
92	if (output_size > 2 + (input_chars * MAX_EXPANSION)) {
93	output_size = 2 + (input_chars * MAX_EXPANSION);
94	}
95	if (_PyString_Resize(&rval, output_size) == -1) {
96	return NULL;
97	}
98	output = PyString_AS_STRING(rval);
99	}
100	}
101	output[chars++] = '"';
102	if (_PyString_Resize(&rval, chars) == -1) {
103	return NULL;
104	}
105	return rval;
106	}
107
108	static PyObject *
109	ascii_escape_str(PyObject *pystr)
110	{
111	Py_ssize_t i;
112	Py_ssize_t input_chars;
113	Py_ssize_t output_size;
114	Py_ssize_t chars;
115	PyObject *rval;
116	char *output;
117	char *input_str;
118
119	input_chars = PyString_GET_SIZE(pystr);
120	input_str = PyString_AS_STRING(pystr);
121	/* One char input can be up to 6 chars output, estimate 4 of these */
122	output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
123	rval = PyString_FromStringAndSize(NULL, output_size);
124	if (rval == NULL) {
125	return NULL;
126	}
127	output = PyString_AS_STRING(rval);
128	chars = 0;
129	output[chars++] = '"';
130	for (i = 0; i < input_chars; i++) {
131	Py_UNICODE c = (Py_UNICODE)input_str[i];
132	if (S_CHAR(c)) {
133	output[chars++] = (char)c;
134	}
135	else if (c > 0x7F) {
136	/* We hit a non-ASCII character, bail to unicode mode */
137	PyObject *uni;
138	Py_DECREF(rval);
139	uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
140	if (uni == NULL) {
141	return NULL;
142	}
143	rval = ascii_escape_unicode(uni);
144	Py_DECREF(uni);
145	return rval;
146	}
147	else {
148	chars = ascii_escape_char(c, output, chars);
149	}
150	/* An ASCII char can't possibly expand to a surrogate! */
151	if (output_size - chars < (1 + MIN_EXPANSION)) {
152	/* There's more than four, so let's resize by a lot */
153	output_size *= 2;
154	if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
155	output_size = 2 + (input_chars * MIN_EXPANSION);
156	}
157	if (_PyString_Resize(&rval, output_size) == -1) {
158	return NULL;
159	}
160	output = PyString_AS_STRING(rval);
161	}
162	}
163	output[chars++] = '"';
164	if (_PyString_Resize(&rval, chars) == -1) {
165	return NULL;
166	}
167	return rval;
168	}
169
170	void
171	raise_errmsg(char msg, PyObject s, Py_ssize_t end)
172	{
173	static PyObject *errmsg_fn = NULL;
174	PyObject *pymsg;
175	if (errmsg_fn == NULL) {
176	PyObject *decoder = PyImport_ImportModule("json.decoder");
177	if (decoder == NULL)
178	return;
179	errmsg_fn = PyObject_GetAttrString(decoder, "errmsg");
180	if (errmsg_fn == NULL)
181	return;
182	Py_DECREF(decoder);
183	}
184	pymsg = PyObject_CallFunction(errmsg_fn, "(zOn)", msg, s, end);
185	if (pymsg) {
186	PyErr_SetObject(PyExc_ValueError, pymsg);
187	Py_DECREF(pymsg);
188	}
189	/*
190
191	def linecol(doc, pos):
192	lineno = doc.count('\n', 0, pos) + 1
193	if lineno == 1:
194	colno = pos
195	else:
196	colno = pos - doc.rindex('\n', 0, pos)
197	return lineno, colno
198
199	def errmsg(msg, doc, pos, end=None):
200	lineno, colno = linecol(doc, pos)
201	if end is None:
202	return '%s: line %d column %d (char %d)' % (msg, lineno, colno, pos)
203	endlineno, endcolno = linecol(doc, end)
204	return '%s: line %d column %d - line %d column %d (char %d - %d)' % (
205	msg, lineno, colno, endlineno, endcolno, pos, end)
206
207	*/
208	}
209
210	static PyObject *
211	join_list_unicode(PyObject *lst)
212	{
213	static PyObject *ustr = NULL;
214	static PyObject *joinstr = NULL;
215	if (ustr == NULL) {
216	Py_UNICODE c = 0;
217	ustr = PyUnicode_FromUnicode(&c, 0);
218	}
219	if (joinstr == NULL) {
220	joinstr = PyString_InternFromString("join");
221	}
222	if (joinstr == NULL \|\| ustr == NULL) {
223	return NULL;
224	}
225	return PyObject_CallMethodObjArgs(ustr, joinstr, lst, NULL);
226	}
227
228	static PyObject *
229	scanstring_str(PyObject pystr, Py_ssize_t end, char encoding, int strict)
230	{
231	PyObject *rval;
232	Py_ssize_t len = PyString_GET_SIZE(pystr);
233	Py_ssize_t begin = end - 1;
234	Py_ssize_t next = begin;
235	char *buf = PyString_AS_STRING(pystr);
236	PyObject *chunks = PyList_New(0);
237	if (chunks == NULL) {
238	goto bail;
239	}
240	if (end < 0 \|\| len <= end) {
241	PyErr_SetString(PyExc_ValueError, "end is out of bounds");
242	goto bail;
243	}
244	while (1) {
245	/* Find the end of the string or the next escape */
246	Py_UNICODE c = 0;
247	PyObject *chunk = NULL;
248	for (next = end; next < len; next++) {
249	c = buf[next];
250	if (c == '"' \|\| c == '\\') {
251	break;
252	}
253	else if (strict && c <= 0x1f) {
254	raise_errmsg("Invalid control character at", pystr, next);
255	goto bail;
256	}
257	}
258	if (!(c == '"' \|\| c == '\\')) {
259	raise_errmsg("Unterminated string starting at", pystr, begin);
260	goto bail;
261	}
262	/* Pick up this chunk if it's not zero length */
263	if (next != end) {
264	PyObject *strchunk = PyBuffer_FromMemory(&buf[end], next - end);
265	if (strchunk == NULL) {
266	goto bail;
267	}
268	chunk = PyUnicode_FromEncodedObject(strchunk, encoding, NULL);
269	Py_DECREF(strchunk);
270	if (chunk == NULL) {
271	goto bail;
272	}
273	if (PyList_Append(chunks, chunk)) {
274	Py_DECREF(chunk);
275	goto bail;
276	}
277	Py_DECREF(chunk);
278	}
279	next++;
280	if (c == '"') {
281	end = next;
282	break;
283	}
284	if (next == len) {
285	raise_errmsg("Unterminated string starting at", pystr, begin);
286	goto bail;
287	}
288	c = buf[next];
289	if (c != 'u') {
290	/* Non-unicode backslash escapes */
291	end = next + 1;
292	switch (c) {
293	case '"': break;
294	case '\\': break;
295	case '/': break;
296	case 'b': c = '\b'; break;
297	case 'f': c = '\f'; break;
298	case 'n': c = '\n'; break;
299	case 'r': c = '\r'; break;
300	case 't': c = '\t'; break;
301	default: c = 0;
302	}
303	if (c == 0) {
304	raise_errmsg("Invalid \\escape", pystr, end - 2);
305	goto bail;
306	}
307	}
308	else {
309	c = 0;
310	next++;
311	end = next + 4;
312	if (end >= len) {
313	raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
314	goto bail;
315	}
316	/* Decode 4 hex digits */
317	for (; next < end; next++) {
318	Py_ssize_t shl = (end - next - 1) << 2;
319	Py_UNICODE digit = buf[next];
320	switch (digit) {
321	case '0': case '1': case '2': case '3': case '4':
322	case '5': case '6': case '7': case '8': case '9':
323	c \|= (digit - '0') << shl; break;
324	case 'a': case 'b': case 'c': case 'd': case 'e':
325	case 'f':
326	c \|= (digit - 'a' + 10) << shl; break;
327	case 'A': case 'B': case 'C': case 'D': case 'E':
328	case 'F':
329	c \|= (digit - 'A' + 10) << shl; break;
330	default:
331	raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
332	goto bail;
333	}
334	}
335	#ifdef Py_UNICODE_WIDE
336	/* Surrogate pair */
337	if (c >= 0xd800 && c <= 0xdbff) {
338	Py_UNICODE c2 = 0;
339	if (end + 6 >= len) {
340	raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
341	end - 5);
342	}
343	if (buf[next++] != '\\' \|\| buf[next++] != 'u') {
344	raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
345	end - 5);
346	}
347	end += 6;
348	/* Decode 4 hex digits */
349	for (; next < end; next++) {
350	Py_ssize_t shl = (end - next - 1) << 2;
351	Py_UNICODE digit = buf[next];
352	switch (digit) {
353	case '0': case '1': case '2': case '3': case '4':
354	case '5': case '6': case '7': case '8': case '9':
355	c2 \|= (digit - '0') << shl; break;
356	case 'a': case 'b': case 'c': case 'd': case 'e':
357	case 'f':
358	c2 \|= (digit - 'a' + 10) << shl; break;
359	case 'A': case 'B': case 'C': case 'D': case 'E':
360	case 'F':
361	c2 \|= (digit - 'A' + 10) << shl; break;
362	default:
363	raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
364	goto bail;
365	}
366	}
367	c = 0x10000 + (((c - 0xd800) << 10) \| (c2 - 0xdc00));
368	}
369	#endif
370	}
371	chunk = PyUnicode_FromUnicode(&c, 1);
372	if (chunk == NULL) {
373	goto bail;
374	}
375	if (PyList_Append(chunks, chunk)) {
376	Py_DECREF(chunk);
377	goto bail;
378	}
379	Py_DECREF(chunk);
380	}
381
382	rval = join_list_unicode(chunks);
383	if (rval == NULL) {
384	goto bail;
385	}
386	Py_CLEAR(chunks);
387	return Py_BuildValue("(Nn)", rval, end);
388	bail:
389	Py_XDECREF(chunks);
390	return NULL;
391	}
392
393
394	static PyObject *
395	scanstring_unicode(PyObject *pystr, Py_ssize_t end, int strict)
396	{
397	PyObject *rval;
398	Py_ssize_t len = PyUnicode_GET_SIZE(pystr);
399	Py_ssize_t begin = end - 1;
400	Py_ssize_t next = begin;
401	const Py_UNICODE *buf = PyUnicode_AS_UNICODE(pystr);
402	PyObject *chunks = PyList_New(0);
403	if (chunks == NULL) {
404	goto bail;
405	}
406	if (end < 0 \|\| len <= end) {
407	PyErr_SetString(PyExc_ValueError, "end is out of bounds");
408	goto bail;
409	}
410	while (1) {
411	/* Find the end of the string or the next escape */
412	Py_UNICODE c = 0;
413	PyObject *chunk = NULL;
414	for (next = end; next < len; next++) {
415	c = buf[next];
416	if (c == '"' \|\| c == '\\') {
417	break;
418	}
419	else if (strict && c <= 0x1f) {
420	raise_errmsg("Invalid control character at", pystr, next);
421	goto bail;
422	}
423	}
424	if (!(c == '"' \|\| c == '\\')) {
425	raise_errmsg("Unterminated string starting at", pystr, begin);
426	goto bail;
427	}
428	/* Pick up this chunk if it's not zero length */
429	if (next != end) {
430	chunk = PyUnicode_FromUnicode(&buf[end], next - end);
431	if (chunk == NULL) {
432	goto bail;
433	}
434	if (PyList_Append(chunks, chunk)) {
435	Py_DECREF(chunk);
436	goto bail;
437	}
438	Py_DECREF(chunk);
439	}
440	next++;
441	if (c == '"') {
442	end = next;
443	break;
444	}
445	if (next == len) {
446	raise_errmsg("Unterminated string starting at", pystr, begin);
447	goto bail;
448	}
449	c = buf[next];
450	if (c != 'u') {
451	/* Non-unicode backslash escapes */
452	end = next + 1;
453	switch (c) {
454	case '"': break;
455	case '\\': break;
456	case '/': break;
457	case 'b': c = '\b'; break;
458	case 'f': c = '\f'; break;
459	case 'n': c = '\n'; break;
460	case 'r': c = '\r'; break;
461	case 't': c = '\t'; break;
462	default: c = 0;
463	}
464	if (c == 0) {
465	raise_errmsg("Invalid \\escape", pystr, end - 2);
466	goto bail;
467	}
468	}
469	else {
470	c = 0;
471	next++;
472	end = next + 4;
473	if (end >= len) {
474	raise_errmsg("Invalid \\uXXXX escape", pystr, next - 1);
475	goto bail;
476	}
477	/* Decode 4 hex digits */
478	for (; next < end; next++) {
479	Py_ssize_t shl = (end - next - 1) << 2;
480	Py_UNICODE digit = buf[next];
481	switch (digit) {
482	case '0': case '1': case '2': case '3': case '4':
483	case '5': case '6': case '7': case '8': case '9':
484	c \|= (digit - '0') << shl; break;
485	case 'a': case 'b': case 'c': case 'd': case 'e':
486	case 'f':
487	c \|= (digit - 'a' + 10) << shl; break;
488	case 'A': case 'B': case 'C': case 'D': case 'E':
489	case 'F':
490	c \|= (digit - 'A' + 10) << shl; break;
491	default:
492	raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
493	goto bail;
494	}
495	}
496	#ifdef Py_UNICODE_WIDE
497	/* Surrogate pair */
498	if (c >= 0xd800 && c <= 0xdbff) {
499	Py_UNICODE c2 = 0;
500	if (end + 6 >= len) {
501	raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
502	end - 5);
503	}
504	if (buf[next++] != '\\' \|\| buf[next++] != 'u') {
505	raise_errmsg("Invalid \\uXXXX\\uXXXX surrogate pair", pystr,
506	end - 5);
507	}
508	end += 6;
509	/* Decode 4 hex digits */
510	for (; next < end; next++) {
511	Py_ssize_t shl = (end - next - 1) << 2;
512	Py_UNICODE digit = buf[next];
513	switch (digit) {
514	case '0': case '1': case '2': case '3': case '4':
515	case '5': case '6': case '7': case '8': case '9':
516	c2 \|= (digit - '0') << shl; break;
517	case 'a': case 'b': case 'c': case 'd': case 'e':
518	case 'f':
519	c2 \|= (digit - 'a' + 10) << shl; break;
520	case 'A': case 'B': case 'C': case 'D': case 'E':
521	case 'F':
522	c2 \|= (digit - 'A' + 10) << shl; break;
523	default:
524	raise_errmsg("Invalid \\uXXXX escape", pystr, end - 5);
525	goto bail;
526	}
527	}
528	c = 0x10000 + (((c - 0xd800) << 10) \| (c2 - 0xdc00));
529	}
530	#endif
531	}
532	chunk = PyUnicode_FromUnicode(&c, 1);
533	if (chunk == NULL) {
534	goto bail;
535	}
536	if (PyList_Append(chunks, chunk)) {
537	Py_DECREF(chunk);
538	goto bail;
539	}
540	Py_DECREF(chunk);
541	}
542
543	rval = join_list_unicode(chunks);
544	if (rval == NULL) {
545	goto bail;
546	}
547	Py_CLEAR(chunks);
548	return Py_BuildValue("(Nn)", rval, end);
549	bail:
550	Py_XDECREF(chunks);
551	return NULL;
552	}
553
554	PyDoc_STRVAR(pydoc_scanstring,
555	"scanstring(basestring, end, encoding) -> (str, end)\n");
556
557	static PyObject *
558	py_scanstring(PyObject* self, PyObject *args)
559	{
560	PyObject *pystr;
561	Py_ssize_t end;
562	char *encoding = NULL;
563	int strict = 0;
564	if (!PyArg_ParseTuple(args, "On\|zi:scanstring", &pystr, &end, &encoding, &strict)) {
565	return NULL;
566	}
567	if (encoding == NULL) {
568	encoding = DEFAULT_ENCODING;
569	}
570	if (PyString_Check(pystr)) {
571	return scanstring_str(pystr, end, encoding, strict);
572	}
573	else if (PyUnicode_Check(pystr)) {
574	return scanstring_unicode(pystr, end, strict);
575	}
576	else {
577	PyErr_Format(PyExc_TypeError,
578	"first argument must be a string or unicode, not %.80s",
579	Py_TYPE(pystr)->tp_name);
580	return NULL;
581	}
582	}
583
584	PyDoc_STRVAR(pydoc_encode_basestring_ascii,
585	"encode_basestring_ascii(basestring) -> str\n");
586
587	static PyObject *
588	py_encode_basestring_ascii(PyObject* self, PyObject *pystr)
589	{
590	/* METH_O */
591	if (PyString_Check(pystr)) {
592	return ascii_escape_str(pystr);
593	}
594	else if (PyUnicode_Check(pystr)) {
595	return ascii_escape_unicode(pystr);
596	}
597	else {
598	PyErr_Format(PyExc_TypeError,
599	"first argument must be a string or unicode, not %.80s",
600	Py_TYPE(pystr)->tp_name);
601	return NULL;
602	}
603	}
604
605	static PyMethodDef json_methods[] = {
606	{"encode_basestring_ascii", (PyCFunction)py_encode_basestring_ascii,
607	METH_O, pydoc_encode_basestring_ascii},
608	{"scanstring", (PyCFunction)py_scanstring, METH_VARARGS,
609	pydoc_scanstring},
610	{NULL, NULL, 0, NULL}
611	};
612
613	PyDoc_STRVAR(module_doc,
614	"json speedups\n");
615
616	void
617	init_json(void)
618	{
619	PyObject *m;
620	m = Py_InitModule3("_json", json_methods, module_doc);
621	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Modules/_json.c@ 389

Download in other formats: