Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

codecs.c

Last change on this file was 388, checked in by dmik, 11 years ago
python: Update vendor to 2.7.6.
Property svn:eol-style set to `native`
File size: 24.7 KB

Line
1	/* ------------------------------------------------------------------------
2
3	Python Codec Registry and support functions
4
5	Written by Marc-Andre Lemburg (mal@lemburg.com).
6
7	Copyright (c) Corporation for National Research Initiatives.
8
9	------------------------------------------------------------------------ */
10
11	#include "Python.h"
12	#include <ctype.h>
13
14	/* --- Codec Registry ----------------------------------------------------- */
15
16	/* Import the standard encodings package which will register the first
17	codec search function.
18
19	This is done in a lazy way so that the Unicode implementation does
20	not downgrade startup time of scripts not needing it.
21
22	ImportErrors are silently ignored by this function. Only one try is
23	made.
24
25	*/
26
27	static int _PyCodecRegistry_Init(void); /* Forward */
28
29	int PyCodec_Register(PyObject *search_function)
30	{
31	PyInterpreterState *interp = PyThreadState_GET()->interp;
32	if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
33	goto onError;
34	if (search_function == NULL) {
35	PyErr_BadArgument();
36	goto onError;
37	}
38	if (!PyCallable_Check(search_function)) {
39	PyErr_SetString(PyExc_TypeError, "argument must be callable");
40	goto onError;
41	}
42	return PyList_Append(interp->codec_search_path, search_function);
43
44	onError:
45	return -1;
46	}
47
48	/* Convert a string to a normalized Python string: all characters are
49	converted to lower case, spaces are replaced with underscores. */
50
51	static
52	PyObject normalizestring(const char string)
53	{
54	register size_t i;
55	size_t len = strlen(string);
56	char *p;
57	PyObject *v;
58
59	if (len > PY_SSIZE_T_MAX) {
60	PyErr_SetString(PyExc_OverflowError, "string is too large");
61	return NULL;
62	}
63
64	v = PyString_FromStringAndSize(NULL, len);
65	if (v == NULL)
66	return NULL;
67	p = PyString_AS_STRING(v);
68	for (i = 0; i < len; i++) {
69	register char ch = string[i];
70	if (ch == ' ')
71	ch = '-';
72	else
73	ch = Py_TOLOWER(Py_CHARMASK(ch));
74	p[i] = ch;
75	}
76	return v;
77	}
78
79	/* Lookup the given encoding and return a tuple providing the codec
80	facilities.
81
82	The encoding string is looked up converted to all lower-case
83	characters. This makes encodings looked up through this mechanism
84	effectively case-insensitive.
85
86	If no codec is found, a LookupError is set and NULL returned.
87
88	As side effect, this tries to load the encodings package, if not
89	yet done. This is part of the lazy load strategy for the encodings
90	package.
91
92	*/
93
94	PyObject _PyCodec_Lookup(const char encoding)
95	{
96	PyInterpreterState *interp;
97	PyObject result, args = NULL, *v;
98	Py_ssize_t i, len;
99
100	if (encoding == NULL) {
101	PyErr_BadArgument();
102	goto onError;
103	}
104
105	interp = PyThreadState_GET()->interp;
106	if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
107	goto onError;
108
109	/* Convert the encoding to a normalized Python string: all
110	characters are converted to lower case, spaces and hyphens are
111	replaced with underscores. */
112	v = normalizestring(encoding);
113	if (v == NULL)
114	goto onError;
115	PyString_InternInPlace(&v);
116
117	/* First, try to lookup the name in the registry dictionary */
118	result = PyDict_GetItem(interp->codec_search_cache, v);
119	if (result != NULL) {
120	Py_INCREF(result);
121	Py_DECREF(v);
122	return result;
123	}
124
125	/* Next, scan the search functions in order of registration */
126	args = PyTuple_New(1);
127	if (args == NULL)
128	goto onError;
129	PyTuple_SET_ITEM(args,0,v);
130
131	len = PyList_Size(interp->codec_search_path);
132	if (len < 0)
133	goto onError;
134	if (len == 0) {
135	PyErr_SetString(PyExc_LookupError,
136	"no codec search functions registered: "
137	"can't find encoding");
138	goto onError;
139	}
140
141	for (i = 0; i < len; i++) {
142	PyObject *func;
143
144	func = PyList_GetItem(interp->codec_search_path, i);
145	if (func == NULL)
146	goto onError;
147	result = PyEval_CallObject(func, args);
148	if (result == NULL)
149	goto onError;
150	if (result == Py_None) {
151	Py_DECREF(result);
152	continue;
153	}
154	if (!PyTuple_Check(result) \|\| PyTuple_GET_SIZE(result) != 4) {
155	PyErr_SetString(PyExc_TypeError,
156	"codec search functions must return 4-tuples");
157	Py_DECREF(result);
158	goto onError;
159	}
160	break;
161	}
162	if (i == len) {
163	/* XXX Perhaps we should cache misses too ? */
164	PyErr_Format(PyExc_LookupError,
165	"unknown encoding: %s", encoding);
166	goto onError;
167	}
168
169	/* Cache and return the result */
170	PyDict_SetItem(interp->codec_search_cache, v, result);
171	Py_DECREF(args);
172	return result;
173
174	onError:
175	Py_XDECREF(args);
176	return NULL;
177	}
178
179	static
180	PyObject args_tuple(PyObject object,
181	const char *errors)
182	{
183	PyObject *args;
184
185	args = PyTuple_New(1 + (errors != NULL));
186	if (args == NULL)
187	return NULL;
188	Py_INCREF(object);
189	PyTuple_SET_ITEM(args,0,object);
190	if (errors) {
191	PyObject *v;
192
193	v = PyString_FromString(errors);
194	if (v == NULL) {
195	Py_DECREF(args);
196	return NULL;
197	}
198	PyTuple_SET_ITEM(args, 1, v);
199	}
200	return args;
201	}
202
203	/* Helper function to get a codec item */
204
205	static
206	PyObject codec_getitem(const char encoding, int index)
207	{
208	PyObject *codecs;
209	PyObject *v;
210
211	codecs = _PyCodec_Lookup(encoding);
212	if (codecs == NULL)
213	return NULL;
214	v = PyTuple_GET_ITEM(codecs, index);
215	Py_DECREF(codecs);
216	Py_INCREF(v);
217	return v;
218	}
219
220	/* Helper function to create an incremental codec. */
221
222	static
223	PyObject codec_getincrementalcodec(const char encoding,
224	const char *errors,
225	const char *attrname)
226	{
227	PyObject codecs, ret, *inccodec;
228
229	codecs = _PyCodec_Lookup(encoding);
230	if (codecs == NULL)
231	return NULL;
232	inccodec = PyObject_GetAttrString(codecs, attrname);
233	Py_DECREF(codecs);
234	if (inccodec == NULL)
235	return NULL;
236	if (errors)
237	ret = PyObject_CallFunction(inccodec, "s", errors);
238	else
239	ret = PyObject_CallFunction(inccodec, NULL);
240	Py_DECREF(inccodec);
241	return ret;
242	}
243
244	/* Helper function to create a stream codec. */
245
246	static
247	PyObject codec_getstreamcodec(const char encoding,
248	PyObject *stream,
249	const char *errors,
250	const int index)
251	{
252	PyObject codecs, streamcodec, *codeccls;
253
254	codecs = _PyCodec_Lookup(encoding);
255	if (codecs == NULL)
256	return NULL;
257
258	codeccls = PyTuple_GET_ITEM(codecs, index);
259	if (errors != NULL)
260	streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors);
261	else
262	streamcodec = PyObject_CallFunction(codeccls, "O", stream);
263	Py_DECREF(codecs);
264	return streamcodec;
265	}
266
267	/* Convenience APIs to query the Codec registry.
268
269	All APIs return a codec object with incremented refcount.
270
271	*/
272
273	PyObject PyCodec_Encoder(const char encoding)
274	{
275	return codec_getitem(encoding, 0);
276	}
277
278	PyObject PyCodec_Decoder(const char encoding)
279	{
280	return codec_getitem(encoding, 1);
281	}
282
283	PyObject PyCodec_IncrementalEncoder(const char encoding,
284	const char *errors)
285	{
286	return codec_getincrementalcodec(encoding, errors, "incrementalencoder");
287	}
288
289	PyObject PyCodec_IncrementalDecoder(const char encoding,
290	const char *errors)
291	{
292	return codec_getincrementalcodec(encoding, errors, "incrementaldecoder");
293	}
294
295	PyObject PyCodec_StreamReader(const char encoding,
296	PyObject *stream,
297	const char *errors)
298	{
299	return codec_getstreamcodec(encoding, stream, errors, 2);
300	}
301
302	PyObject PyCodec_StreamWriter(const char encoding,
303	PyObject *stream,
304	const char *errors)
305	{
306	return codec_getstreamcodec(encoding, stream, errors, 3);
307	}
308
309	/* Encode an object (e.g. an Unicode object) using the given encoding
310	and return the resulting encoded object (usually a Python string).
311
312	errors is passed to the encoder factory as argument if non-NULL. */
313
314	PyObject PyCodec_Encode(PyObject object,
315	const char *encoding,
316	const char *errors)
317	{
318	PyObject *encoder = NULL;
319	PyObject args = NULL, result = NULL;
320	PyObject *v;
321
322	encoder = PyCodec_Encoder(encoding);
323	if (encoder == NULL)
324	goto onError;
325
326	args = args_tuple(object, errors);
327	if (args == NULL)
328	goto onError;
329
330	result = PyEval_CallObject(encoder,args);
331	if (result == NULL)
332	goto onError;
333
334	if (!PyTuple_Check(result) \|\|
335	PyTuple_GET_SIZE(result) != 2) {
336	PyErr_SetString(PyExc_TypeError,
337	"encoder must return a tuple (object,integer)");
338	goto onError;
339	}
340	v = PyTuple_GET_ITEM(result,0);
341	Py_INCREF(v);
342	/* We don't check or use the second (integer) entry. */
343
344	Py_DECREF(args);
345	Py_DECREF(encoder);
346	Py_DECREF(result);
347	return v;
348
349	onError:
350	Py_XDECREF(result);
351	Py_XDECREF(args);
352	Py_XDECREF(encoder);
353	return NULL;
354	}
355
356	/* Decode an object (usually a Python string) using the given encoding
357	and return an equivalent object (e.g. an Unicode object).
358
359	errors is passed to the decoder factory as argument if non-NULL. */
360
361	PyObject PyCodec_Decode(PyObject object,
362	const char *encoding,
363	const char *errors)
364	{
365	PyObject *decoder = NULL;
366	PyObject args = NULL, result = NULL;
367	PyObject *v;
368
369	decoder = PyCodec_Decoder(encoding);
370	if (decoder == NULL)
371	goto onError;
372
373	args = args_tuple(object, errors);
374	if (args == NULL)
375	goto onError;
376
377	result = PyEval_CallObject(decoder,args);
378	if (result == NULL)
379	goto onError;
380	if (!PyTuple_Check(result) \|\|
381	PyTuple_GET_SIZE(result) != 2) {
382	PyErr_SetString(PyExc_TypeError,
383	"decoder must return a tuple (object,integer)");
384	goto onError;
385	}
386	v = PyTuple_GET_ITEM(result,0);
387	Py_INCREF(v);
388	/* We don't check or use the second (integer) entry. */
389
390	Py_DECREF(args);
391	Py_DECREF(decoder);
392	Py_DECREF(result);
393	return v;
394
395	onError:
396	Py_XDECREF(args);
397	Py_XDECREF(decoder);
398	Py_XDECREF(result);
399	return NULL;
400	}
401
402	/* Register the error handling callback function error under the name
403	name. This function will be called by the codec when it encounters
404	an unencodable characters/undecodable bytes and doesn't know the
405	callback name, when name is specified as the error parameter
406	in the call to the encode/decode function.
407	Return 0 on success, -1 on error */
408	int PyCodec_RegisterError(const char name, PyObject error)
409	{
410	PyInterpreterState *interp = PyThreadState_GET()->interp;
411	if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
412	return -1;
413	if (!PyCallable_Check(error)) {
414	PyErr_SetString(PyExc_TypeError, "handler must be callable");
415	return -1;
416	}
417	return PyDict_SetItemString(interp->codec_error_registry,
418	(char *)name, error);
419	}
420
421	/* Lookup the error handling callback function registered under the
422	name error. As a special case NULL can be passed, in which case
423	the error handling callback for strict encoding will be returned. */
424	PyObject PyCodec_LookupError(const char name)
425	{
426	PyObject *handler = NULL;
427
428	PyInterpreterState *interp = PyThreadState_GET()->interp;
429	if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
430	return NULL;
431
432	if (name==NULL)
433	name = "strict";
434	handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name);
435	if (!handler)
436	PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
437	else
438	Py_INCREF(handler);
439	return handler;
440	}
441
442	static void wrong_exception_type(PyObject *exc)
443	{
444	PyObject *type = PyObject_GetAttrString(exc, "__class__");
445	if (type != NULL) {
446	PyObject *name = PyObject_GetAttrString(type, "__name__");
447	Py_DECREF(type);
448	if (name != NULL) {
449	PyObject *string = PyObject_Str(name);
450	Py_DECREF(name);
451	if (string != NULL) {
452	PyErr_Format(PyExc_TypeError,
453	"don't know how to handle %.400s in error callback",
454	PyString_AS_STRING(string));
455	Py_DECREF(string);
456	}
457	}
458	}
459	}
460
461	PyObject PyCodec_StrictErrors(PyObject exc)
462	{
463	if (PyExceptionInstance_Check(exc))
464	PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
465	else
466	PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
467	return NULL;
468	}
469
470
471	#ifdef Py_USING_UNICODE
472	PyObject PyCodec_IgnoreErrors(PyObject exc)
473	{
474	Py_ssize_t end;
475	if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
476	if (PyUnicodeEncodeError_GetEnd(exc, &end))
477	return NULL;
478	}
479	else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
480	if (PyUnicodeDecodeError_GetEnd(exc, &end))
481	return NULL;
482	}
483	else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
484	if (PyUnicodeTranslateError_GetEnd(exc, &end))
485	return NULL;
486	}
487	else {
488	wrong_exception_type(exc);
489	return NULL;
490	}
491	/* ouch: passing NULL, 0, pos gives None instead of u'' */
492	return Py_BuildValue("(u#n)", &end, 0, end);
493	}
494
495
496	PyObject PyCodec_ReplaceErrors(PyObject exc)
497	{
498	PyObject *restuple;
499	Py_ssize_t start;
500	Py_ssize_t end;
501	Py_ssize_t i;
502
503	if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
504	PyObject *res;
505	Py_UNICODE *p;
506	if (PyUnicodeEncodeError_GetStart(exc, &start))
507	return NULL;
508	if (PyUnicodeEncodeError_GetEnd(exc, &end))
509	return NULL;
510	res = PyUnicode_FromUnicode(NULL, end-start);
511	if (res == NULL)
512	return NULL;
513	for (p = PyUnicode_AS_UNICODE(res), i = start;
514	i<end; ++p, ++i)
515	*p = '?';
516	restuple = Py_BuildValue("(On)", res, end);
517	Py_DECREF(res);
518	return restuple;
519	}
520	else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
521	Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
522	if (PyUnicodeDecodeError_GetEnd(exc, &end))
523	return NULL;
524	return Py_BuildValue("(u#n)", &res, (Py_ssize_t)1, end);
525	}
526	else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
527	PyObject *res;
528	Py_UNICODE *p;
529	if (PyUnicodeTranslateError_GetStart(exc, &start))
530	return NULL;
531	if (PyUnicodeTranslateError_GetEnd(exc, &end))
532	return NULL;
533	res = PyUnicode_FromUnicode(NULL, end-start);
534	if (res == NULL)
535	return NULL;
536	for (p = PyUnicode_AS_UNICODE(res), i = start;
537	i<end; ++p, ++i)
538	*p = Py_UNICODE_REPLACEMENT_CHARACTER;
539	restuple = Py_BuildValue("(On)", res, end);
540	Py_DECREF(res);
541	return restuple;
542	}
543	else {
544	wrong_exception_type(exc);
545	return NULL;
546	}
547	}
548
549	PyObject PyCodec_XMLCharRefReplaceErrors(PyObject exc)
550	{
551	if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
552	PyObject *restuple;
553	PyObject *object;
554	Py_ssize_t start;
555	Py_ssize_t end;
556	PyObject *res;
557	Py_UNICODE *p;
558	Py_UNICODE *startp;
559	Py_UNICODE *e;
560	Py_UNICODE *outp;
561	int ressize;
562	if (PyUnicodeEncodeError_GetStart(exc, &start))
563	return NULL;
564	if (PyUnicodeEncodeError_GetEnd(exc, &end))
565	return NULL;
566	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
567	return NULL;
568	startp = PyUnicode_AS_UNICODE(object);
569	e = startp + end;
570	for (p = startp+start, ressize = 0; p < e;) {
571	Py_UCS4 ch = *p++;
572	#ifndef Py_UNICODE_WIDE
573	if ((0xD800 <= ch && ch <= 0xDBFF) &&
574	(p < e) &&
575	(0xDC00 <= p && p <= 0xDFFF)) {
576	ch = ((((ch & 0x03FF) << 10) \|
577	((Py_UCS4)*p++ & 0x03FF)) + 0x10000);
578	}
579	#endif
580	if (ch < 10)
581	ressize += 2+1+1;
582	else if (ch < 100)
583	ressize += 2+2+1;
584	else if (ch < 1000)
585	ressize += 2+3+1;
586	else if (ch < 10000)
587	ressize += 2+4+1;
588	else if (ch < 100000)
589	ressize += 2+5+1;
590	else if (ch < 1000000)
591	ressize += 2+6+1;
592	else
593	ressize += 2+7+1;
594	}
595	/* allocate replacement */
596	res = PyUnicode_FromUnicode(NULL, ressize);
597	if (res == NULL) {
598	Py_DECREF(object);
599	return NULL;
600	}
601	/* generate replacement */
602	for (p = startp+start, outp = PyUnicode_AS_UNICODE(res); p < e;) {
603	int digits;
604	int base;
605	Py_UCS4 ch = *p++;
606	#ifndef Py_UNICODE_WIDE
607	if ((0xD800 <= ch && ch <= 0xDBFF) &&
608	(p < startp+end) &&
609	(0xDC00 <= p && p <= 0xDFFF)) {
610	ch = ((((ch & 0x03FF) << 10) \|
611	((Py_UCS4)*p++ & 0x03FF)) + 0x10000);
612	}
613	#endif
614	*outp++ = '&';
615	*outp++ = '#';
616	if (ch < 10) {
617	digits = 1;
618	base = 1;
619	}
620	else if (ch < 100) {
621	digits = 2;
622	base = 10;
623	}
624	else if (ch < 1000) {
625	digits = 3;
626	base = 100;
627	}
628	else if (ch < 10000) {
629	digits = 4;
630	base = 1000;
631	}
632	else if (ch < 100000) {
633	digits = 5;
634	base = 10000;
635	}
636	else if (ch < 1000000) {
637	digits = 6;
638	base = 100000;
639	}
640	else {
641	digits = 7;
642	base = 1000000;
643	}
644	while (digits-->0) {
645	*outp++ = '0' + ch/base;
646	ch %= base;
647	base /= 10;
648	}
649	*outp++ = ';';
650	}
651	restuple = Py_BuildValue("(On)", res, end);
652	Py_DECREF(res);
653	Py_DECREF(object);
654	return restuple;
655	}
656	else {
657	wrong_exception_type(exc);
658	return NULL;
659	}
660	}
661
662	static Py_UNICODE hexdigits[] = {
663	'0', '1', '2', '3', '4', '5', '6', '7',
664	'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
665	};
666
667	PyObject PyCodec_BackslashReplaceErrors(PyObject exc)
668	{
669	if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
670	PyObject *restuple;
671	PyObject *object;
672	Py_ssize_t start;
673	Py_ssize_t end;
674	PyObject *res;
675	Py_UNICODE *p;
676	Py_UNICODE *startp;
677	Py_UNICODE *outp;
678	int ressize;
679	if (PyUnicodeEncodeError_GetStart(exc, &start))
680	return NULL;
681	if (PyUnicodeEncodeError_GetEnd(exc, &end))
682	return NULL;
683	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
684	return NULL;
685	startp = PyUnicode_AS_UNICODE(object);
686	for (p = startp+start, ressize = 0; p < startp+end; ++p) {
687	#ifdef Py_UNICODE_WIDE
688	if (*p >= 0x00010000)
689	ressize += 1+1+8;
690	else
691	#endif
692	if (*p >= 0x100) {
693	ressize += 1+1+4;
694	}
695	else
696	ressize += 1+1+2;
697	}
698	res = PyUnicode_FromUnicode(NULL, ressize);
699	if (res==NULL)
700	return NULL;
701	for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
702	p < startp+end; ++p) {
703	Py_UNICODE c = *p;
704	*outp++ = '\\';
705	#ifdef Py_UNICODE_WIDE
706	if (c >= 0x00010000) {
707	*outp++ = 'U';
708	*outp++ = hexdigits[(c>>28)&0xf];
709	*outp++ = hexdigits[(c>>24)&0xf];
710	*outp++ = hexdigits[(c>>20)&0xf];
711	*outp++ = hexdigits[(c>>16)&0xf];
712	*outp++ = hexdigits[(c>>12)&0xf];
713	*outp++ = hexdigits[(c>>8)&0xf];
714	}
715	else
716	#endif
717	if (c >= 0x100) {
718	*outp++ = 'u';
719	*outp++ = hexdigits[(c>>12)&0xf];
720	*outp++ = hexdigits[(c>>8)&0xf];
721	}
722	else
723	*outp++ = 'x';
724	*outp++ = hexdigits[(c>>4)&0xf];
725	*outp++ = hexdigits[c&0xf];
726	}
727
728	restuple = Py_BuildValue("(On)", res, end);
729	Py_DECREF(res);
730	Py_DECREF(object);
731	return restuple;
732	}
733	else {
734	wrong_exception_type(exc);
735	return NULL;
736	}
737	}
738	#endif
739
740	static PyObject strict_errors(PyObject self, PyObject *exc)
741	{
742	return PyCodec_StrictErrors(exc);
743	}
744
745
746	#ifdef Py_USING_UNICODE
747	static PyObject ignore_errors(PyObject self, PyObject *exc)
748	{
749	return PyCodec_IgnoreErrors(exc);
750	}
751
752
753	static PyObject replace_errors(PyObject self, PyObject *exc)
754	{
755	return PyCodec_ReplaceErrors(exc);
756	}
757
758
759	static PyObject xmlcharrefreplace_errors(PyObject self, PyObject *exc)
760	{
761	return PyCodec_XMLCharRefReplaceErrors(exc);
762	}
763
764
765	static PyObject backslashreplace_errors(PyObject self, PyObject *exc)
766	{
767	return PyCodec_BackslashReplaceErrors(exc);
768	}
769	#endif
770
771	static int _PyCodecRegistry_Init(void)
772	{
773	static struct {
774	char *name;
775	PyMethodDef def;
776	} methods[] =
777	{
778	{
779	"strict",
780	{
781	"strict_errors",
782	strict_errors,
783	METH_O,
784	PyDoc_STR("Implements the 'strict' error handling, which "
785	"raises a UnicodeError on coding errors.")
786	}
787	},
788	#ifdef Py_USING_UNICODE
789	{
790	"ignore",
791	{
792	"ignore_errors",
793	ignore_errors,
794	METH_O,
795	PyDoc_STR("Implements the 'ignore' error handling, which "
796	"ignores malformed data and continues.")
797	}
798	},
799	{
800	"replace",
801	{
802	"replace_errors",
803	replace_errors,
804	METH_O,
805	PyDoc_STR("Implements the 'replace' error handling, which "
806	"replaces malformed data with a replacement marker.")
807	}
808	},
809	{
810	"xmlcharrefreplace",
811	{
812	"xmlcharrefreplace_errors",
813	xmlcharrefreplace_errors,
814	METH_O,
815	PyDoc_STR("Implements the 'xmlcharrefreplace' error handling, "
816	"which replaces an unencodable character with the "
817	"appropriate XML character reference.")
818	}
819	},
820	{
821	"backslashreplace",
822	{
823	"backslashreplace_errors",
824	backslashreplace_errors,
825	METH_O,
826	PyDoc_STR("Implements the 'backslashreplace' error handling, "
827	"which replaces an unencodable character with a "
828	"backslashed escape sequence.")
829	}
830	}
831	#endif
832	};
833
834	PyInterpreterState *interp = PyThreadState_GET()->interp;
835	PyObject *mod;
836	unsigned i;
837
838	if (interp->codec_search_path != NULL)
839	return 0;
840
841	interp->codec_search_path = PyList_New(0);
842	interp->codec_search_cache = PyDict_New();
843	interp->codec_error_registry = PyDict_New();
844
845	if (interp->codec_error_registry) {
846	for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
847	PyObject *func = PyCFunction_New(&methods[i].def, NULL);
848	int res;
849	if (!func)
850	Py_FatalError("can't initialize codec error registry");
851	res = PyCodec_RegisterError(methods[i].name, func);
852	Py_DECREF(func);
853	if (res)
854	Py_FatalError("can't initialize codec error registry");
855	}
856	}
857
858	if (interp->codec_search_path == NULL \|\|
859	interp->codec_search_cache == NULL \|\|
860	interp->codec_error_registry == NULL)
861	Py_FatalError("can't initialize codec registry");
862
863	mod = PyImport_ImportModuleLevel("encodings", NULL, NULL, NULL, 0);
864	if (mod == NULL) {
865	if (PyErr_ExceptionMatches(PyExc_ImportError)) {
866	/* Ignore ImportErrors... this is done so that
867	distributions can disable the encodings package. Note
868	that other errors are not masked, e.g. SystemErrors
869	raised to inform the user of an error in the Python
870	configuration are still reported back to the user. */
871	PyErr_Clear();
872	return 0;
873	}
874	return -1;
875	}
876	Py_DECREF(mod);
877	return 0;
878	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/vendor/current/Python/codecs.c

Download in other formats: