Context Navigation

← Previous Revision
Next Revision →
Normal
Revision Log

codecs.c

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 24.7 KB

Rev	Line
[2]	1	/* ------------------------------------------------------------------------
	2
	3	Python Codec Registry and support functions
	4
	5	Written by Marc-Andre Lemburg (mal@lemburg.com).
	6
	7	Copyright (c) Corporation for National Research Initiatives.
	8
	9	------------------------------------------------------------------------ */
	10
	11	#include "Python.h"
	12	#include <ctype.h>
	13
	14	/* --- Codec Registry ----------------------------------------------------- */
	15
	16	/* Import the standard encodings package which will register the first
[391]	17	codec search function.
[2]	18
	19	This is done in a lazy way so that the Unicode implementation does
	20	not downgrade startup time of scripts not needing it.
	21
	22	ImportErrors are silently ignored by this function. Only one try is
	23	made.
	24
	25	*/
	26
	27	static int _PyCodecRegistry_Init(void); /* Forward */
	28
	29	int PyCodec_Register(PyObject *search_function)
	30	{
	31	PyInterpreterState *interp = PyThreadState_GET()->interp;
	32	if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
[391]	33	goto onError;
[2]	34	if (search_function == NULL) {
[391]	35	PyErr_BadArgument();
	36	goto onError;
[2]	37	}
	38	if (!PyCallable_Check(search_function)) {
[391]	39	PyErr_SetString(PyExc_TypeError, "argument must be callable");
	40	goto onError;
[2]	41	}
	42	return PyList_Append(interp->codec_search_path, search_function);
	43
	44	onError:
	45	return -1;
	46	}
	47
	48	/* Convert a string to a normalized Python string: all characters are
	49	converted to lower case, spaces are replaced with underscores. */
	50
	51	static
	52	PyObject normalizestring(const char string)
	53	{
	54	register size_t i;
	55	size_t len = strlen(string);
	56	char *p;
	57	PyObject *v;
[391]	58
[2]	59	if (len > PY_SSIZE_T_MAX) {
[391]	60	PyErr_SetString(PyExc_OverflowError, "string is too large");
	61	return NULL;
[2]	62	}
[391]	63
[2]	64	v = PyString_FromStringAndSize(NULL, len);
	65	if (v == NULL)
[391]	66	return NULL;
[2]	67	p = PyString_AS_STRING(v);
	68	for (i = 0; i < len; i++) {
	69	register char ch = string[i];
	70	if (ch == ' ')
	71	ch = '-';
	72	else
[391]	73	ch = Py_TOLOWER(Py_CHARMASK(ch));
	74	p[i] = ch;
[2]	75	}
	76	return v;
	77	}
	78
	79	/* Lookup the given encoding and return a tuple providing the codec
	80	facilities.
	81
	82	The encoding string is looked up converted to all lower-case
	83	characters. This makes encodings looked up through this mechanism
	84	effectively case-insensitive.
	85
[391]	86	If no codec is found, a LookupError is set and NULL returned.
[2]	87
	88	As side effect, this tries to load the encodings package, if not
	89	yet done. This is part of the lazy load strategy for the encodings
	90	package.
	91
	92	*/
	93
	94	PyObject _PyCodec_Lookup(const char encoding)
	95	{
	96	PyInterpreterState *interp;
	97	PyObject result, args = NULL, *v;
	98	Py_ssize_t i, len;
	99
	100	if (encoding == NULL) {
[391]	101	PyErr_BadArgument();
	102	goto onError;
[2]	103	}
	104
	105	interp = PyThreadState_GET()->interp;
	106	if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
[391]	107	goto onError;
[2]	108
	109	/* Convert the encoding to a normalized Python string: all
	110	characters are converted to lower case, spaces and hyphens are
	111	replaced with underscores. */
	112	v = normalizestring(encoding);
	113	if (v == NULL)
[391]	114	goto onError;
[2]	115	PyString_InternInPlace(&v);
	116
	117	/* First, try to lookup the name in the registry dictionary */
	118	result = PyDict_GetItem(interp->codec_search_cache, v);
	119	if (result != NULL) {
[391]	120	Py_INCREF(result);
	121	Py_DECREF(v);
	122	return result;
[2]	123	}
[391]	124
[2]	125	/* Next, scan the search functions in order of registration */
	126	args = PyTuple_New(1);
	127	if (args == NULL)
[391]	128	goto onError;
[2]	129	PyTuple_SET_ITEM(args,0,v);
	130
	131	len = PyList_Size(interp->codec_search_path);
	132	if (len < 0)
[391]	133	goto onError;
[2]	134	if (len == 0) {
[391]	135	PyErr_SetString(PyExc_LookupError,
	136	"no codec search functions registered: "
	137	"can't find encoding");
	138	goto onError;
[2]	139	}
	140
	141	for (i = 0; i < len; i++) {
[391]	142	PyObject *func;
	143
	144	func = PyList_GetItem(interp->codec_search_path, i);
	145	if (func == NULL)
	146	goto onError;
	147	result = PyEval_CallObject(func, args);
	148	if (result == NULL)
	149	goto onError;
	150	if (result == Py_None) {
	151	Py_DECREF(result);
	152	continue;
	153	}
	154	if (!PyTuple_Check(result) \|\| PyTuple_GET_SIZE(result) != 4) {
	155	PyErr_SetString(PyExc_TypeError,
	156	"codec search functions must return 4-tuples");
	157	Py_DECREF(result);
	158	goto onError;
	159	}
	160	break;
[2]	161	}
	162	if (i == len) {
[391]	163	/* XXX Perhaps we should cache misses too ? */
	164	PyErr_Format(PyExc_LookupError,
[2]	165	"unknown encoding: %s", encoding);
[391]	166	goto onError;
[2]	167	}
	168
	169	/* Cache and return the result */
	170	PyDict_SetItem(interp->codec_search_cache, v, result);
	171	Py_DECREF(args);
	172	return result;
	173
	174	onError:
	175	Py_XDECREF(args);
	176	return NULL;
	177	}
	178
	179	static
	180	PyObject args_tuple(PyObject object,
[391]	181	const char *errors)
[2]	182	{
	183	PyObject *args;
[391]	184
[2]	185	args = PyTuple_New(1 + (errors != NULL));
	186	if (args == NULL)
[391]	187	return NULL;
[2]	188	Py_INCREF(object);
	189	PyTuple_SET_ITEM(args,0,object);
	190	if (errors) {
[391]	191	PyObject *v;
	192
	193	v = PyString_FromString(errors);
	194	if (v == NULL) {
	195	Py_DECREF(args);
	196	return NULL;
	197	}
	198	PyTuple_SET_ITEM(args, 1, v);
[2]	199	}
	200	return args;
	201	}
	202
	203	/* Helper function to get a codec item */
	204
	205	static
	206	PyObject codec_getitem(const char encoding, int index)
	207	{
	208	PyObject *codecs;
	209	PyObject *v;
	210
	211	codecs = _PyCodec_Lookup(encoding);
	212	if (codecs == NULL)
[391]	213	return NULL;
[2]	214	v = PyTuple_GET_ITEM(codecs, index);
	215	Py_DECREF(codecs);
	216	Py_INCREF(v);
	217	return v;
	218	}
	219
	220	/* Helper function to create an incremental codec. */
	221
	222	static
	223	PyObject codec_getincrementalcodec(const char encoding,
[391]	224	const char *errors,
	225	const char *attrname)
[2]	226	{
	227	PyObject codecs, ret, *inccodec;
	228
	229	codecs = _PyCodec_Lookup(encoding);
	230	if (codecs == NULL)
[391]	231	return NULL;
[2]	232	inccodec = PyObject_GetAttrString(codecs, attrname);
	233	Py_DECREF(codecs);
	234	if (inccodec == NULL)
[391]	235	return NULL;
[2]	236	if (errors)
[391]	237	ret = PyObject_CallFunction(inccodec, "s", errors);
[2]	238	else
[391]	239	ret = PyObject_CallFunction(inccodec, NULL);
[2]	240	Py_DECREF(inccodec);
	241	return ret;
	242	}
	243
	244	/* Helper function to create a stream codec. */
	245
	246	static
	247	PyObject codec_getstreamcodec(const char encoding,
[391]	248	PyObject *stream,
	249	const char *errors,
	250	const int index)
[2]	251	{
	252	PyObject codecs, streamcodec, *codeccls;
	253
	254	codecs = _PyCodec_Lookup(encoding);
	255	if (codecs == NULL)
[391]	256	return NULL;
[2]	257
	258	codeccls = PyTuple_GET_ITEM(codecs, index);
	259	if (errors != NULL)
[391]	260	streamcodec = PyObject_CallFunction(codeccls, "Os", stream, errors);
[2]	261	else
[391]	262	streamcodec = PyObject_CallFunction(codeccls, "O", stream);
[2]	263	Py_DECREF(codecs);
	264	return streamcodec;
	265	}
	266
[391]	267	/* Convenience APIs to query the Codec registry.
	268
[2]	269	All APIs return a codec object with incremented refcount.
[391]	270
[2]	271	*/
	272
	273	PyObject PyCodec_Encoder(const char encoding)
	274	{
	275	return codec_getitem(encoding, 0);
	276	}
	277
	278	PyObject PyCodec_Decoder(const char encoding)
	279	{
	280	return codec_getitem(encoding, 1);
	281	}
	282
	283	PyObject PyCodec_IncrementalEncoder(const char encoding,
[391]	284	const char *errors)
[2]	285	{
	286	return codec_getincrementalcodec(encoding, errors, "incrementalencoder");
	287	}
	288
	289	PyObject PyCodec_IncrementalDecoder(const char encoding,
[391]	290	const char *errors)
[2]	291	{
	292	return codec_getincrementalcodec(encoding, errors, "incrementaldecoder");
	293	}
	294
	295	PyObject PyCodec_StreamReader(const char encoding,
[391]	296	PyObject *stream,
	297	const char *errors)
[2]	298	{
	299	return codec_getstreamcodec(encoding, stream, errors, 2);
	300	}
	301
	302	PyObject PyCodec_StreamWriter(const char encoding,
[391]	303	PyObject *stream,
	304	const char *errors)
[2]	305	{
	306	return codec_getstreamcodec(encoding, stream, errors, 3);
	307	}
	308
	309	/* Encode an object (e.g. an Unicode object) using the given encoding
	310	and return the resulting encoded object (usually a Python string).
	311
	312	errors is passed to the encoder factory as argument if non-NULL. */
	313
	314	PyObject PyCodec_Encode(PyObject object,
[391]	315	const char *encoding,
	316	const char *errors)
[2]	317	{
	318	PyObject *encoder = NULL;
	319	PyObject args = NULL, result = NULL;
	320	PyObject *v;
	321
	322	encoder = PyCodec_Encoder(encoding);
	323	if (encoder == NULL)
[391]	324	goto onError;
[2]	325
	326	args = args_tuple(object, errors);
	327	if (args == NULL)
[391]	328	goto onError;
	329
[2]	330	result = PyEval_CallObject(encoder,args);
	331	if (result == NULL)
[391]	332	goto onError;
[2]	333
[391]	334	if (!PyTuple_Check(result) \|\|
	335	PyTuple_GET_SIZE(result) != 2) {
	336	PyErr_SetString(PyExc_TypeError,
	337	"encoder must return a tuple (object,integer)");
	338	goto onError;
[2]	339	}
	340	v = PyTuple_GET_ITEM(result,0);
	341	Py_INCREF(v);
	342	/* We don't check or use the second (integer) entry. */
	343
	344	Py_DECREF(args);
	345	Py_DECREF(encoder);
	346	Py_DECREF(result);
	347	return v;
[391]	348
[2]	349	onError:
	350	Py_XDECREF(result);
	351	Py_XDECREF(args);
	352	Py_XDECREF(encoder);
	353	return NULL;
	354	}
	355
	356	/* Decode an object (usually a Python string) using the given encoding
	357	and return an equivalent object (e.g. an Unicode object).
	358
	359	errors is passed to the decoder factory as argument if non-NULL. */
	360
	361	PyObject PyCodec_Decode(PyObject object,
[391]	362	const char *encoding,
	363	const char *errors)
[2]	364	{
	365	PyObject *decoder = NULL;
	366	PyObject args = NULL, result = NULL;
	367	PyObject *v;
	368
	369	decoder = PyCodec_Decoder(encoding);
	370	if (decoder == NULL)
[391]	371	goto onError;
[2]	372
	373	args = args_tuple(object, errors);
	374	if (args == NULL)
[391]	375	goto onError;
	376
[2]	377	result = PyEval_CallObject(decoder,args);
	378	if (result == NULL)
[391]	379	goto onError;
	380	if (!PyTuple_Check(result) \|\|
	381	PyTuple_GET_SIZE(result) != 2) {
	382	PyErr_SetString(PyExc_TypeError,
	383	"decoder must return a tuple (object,integer)");
	384	goto onError;
[2]	385	}
	386	v = PyTuple_GET_ITEM(result,0);
	387	Py_INCREF(v);
	388	/* We don't check or use the second (integer) entry. */
	389
	390	Py_DECREF(args);
	391	Py_DECREF(decoder);
	392	Py_DECREF(result);
	393	return v;
[391]	394
[2]	395	onError:
	396	Py_XDECREF(args);
	397	Py_XDECREF(decoder);
	398	Py_XDECREF(result);
	399	return NULL;
	400	}
	401
	402	/* Register the error handling callback function error under the name
	403	name. This function will be called by the codec when it encounters
	404	an unencodable characters/undecodable bytes and doesn't know the
	405	callback name, when name is specified as the error parameter
	406	in the call to the encode/decode function.
	407	Return 0 on success, -1 on error */
	408	int PyCodec_RegisterError(const char name, PyObject error)
	409	{
	410	PyInterpreterState *interp = PyThreadState_GET()->interp;
	411	if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
[391]	412	return -1;
[2]	413	if (!PyCallable_Check(error)) {
[391]	414	PyErr_SetString(PyExc_TypeError, "handler must be callable");
	415	return -1;
[2]	416	}
	417	return PyDict_SetItemString(interp->codec_error_registry,
[391]	418	(char *)name, error);
[2]	419	}
	420
	421	/* Lookup the error handling callback function registered under the
	422	name error. As a special case NULL can be passed, in which case
	423	the error handling callback for strict encoding will be returned. */
	424	PyObject PyCodec_LookupError(const char name)
	425	{
	426	PyObject *handler = NULL;
	427
	428	PyInterpreterState *interp = PyThreadState_GET()->interp;
	429	if (interp->codec_search_path == NULL && _PyCodecRegistry_Init())
[391]	430	return NULL;
[2]	431
	432	if (name==NULL)
[391]	433	name = "strict";
[2]	434	handler = PyDict_GetItemString(interp->codec_error_registry, (char *)name);
	435	if (!handler)
[391]	436	PyErr_Format(PyExc_LookupError, "unknown error handler name '%.400s'", name);
[2]	437	else
[391]	438	Py_INCREF(handler);
[2]	439	return handler;
	440	}
	441
	442	static void wrong_exception_type(PyObject *exc)
	443	{
	444	PyObject *type = PyObject_GetAttrString(exc, "__class__");
	445	if (type != NULL) {
[391]	446	PyObject *name = PyObject_GetAttrString(type, "__name__");
	447	Py_DECREF(type);
	448	if (name != NULL) {
	449	PyObject *string = PyObject_Str(name);
	450	Py_DECREF(name);
	451	if (string != NULL) {
	452	PyErr_Format(PyExc_TypeError,
	453	"don't know how to handle %.400s in error callback",
	454	PyString_AS_STRING(string));
	455	Py_DECREF(string);
	456	}
	457	}
[2]	458	}
	459	}
	460
	461	PyObject PyCodec_StrictErrors(PyObject exc)
	462	{
	463	if (PyExceptionInstance_Check(exc))
	464	PyErr_SetObject(PyExceptionInstance_Class(exc), exc);
	465	else
[391]	466	PyErr_SetString(PyExc_TypeError, "codec must pass exception instance");
[2]	467	return NULL;
	468	}
	469
	470
	471	#ifdef Py_USING_UNICODE
	472	PyObject PyCodec_IgnoreErrors(PyObject exc)
	473	{
	474	Py_ssize_t end;
	475	if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
[391]	476	if (PyUnicodeEncodeError_GetEnd(exc, &end))
	477	return NULL;
[2]	478	}
	479	else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
[391]	480	if (PyUnicodeDecodeError_GetEnd(exc, &end))
	481	return NULL;
[2]	482	}
	483	else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
[391]	484	if (PyUnicodeTranslateError_GetEnd(exc, &end))
	485	return NULL;
[2]	486	}
	487	else {
[391]	488	wrong_exception_type(exc);
	489	return NULL;
[2]	490	}
	491	/* ouch: passing NULL, 0, pos gives None instead of u'' */
	492	return Py_BuildValue("(u#n)", &end, 0, end);
	493	}
	494
	495
	496	PyObject PyCodec_ReplaceErrors(PyObject exc)
	497	{
	498	PyObject *restuple;
	499	Py_ssize_t start;
	500	Py_ssize_t end;
	501	Py_ssize_t i;
	502
	503	if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
[391]	504	PyObject *res;
	505	Py_UNICODE *p;
	506	if (PyUnicodeEncodeError_GetStart(exc, &start))
	507	return NULL;
	508	if (PyUnicodeEncodeError_GetEnd(exc, &end))
	509	return NULL;
	510	res = PyUnicode_FromUnicode(NULL, end-start);
	511	if (res == NULL)
	512	return NULL;
	513	for (p = PyUnicode_AS_UNICODE(res), i = start;
	514	i<end; ++p, ++i)
	515	*p = '?';
	516	restuple = Py_BuildValue("(On)", res, end);
	517	Py_DECREF(res);
	518	return restuple;
[2]	519	}
	520	else if (PyObject_IsInstance(exc, PyExc_UnicodeDecodeError)) {
[391]	521	Py_UNICODE res = Py_UNICODE_REPLACEMENT_CHARACTER;
	522	if (PyUnicodeDecodeError_GetEnd(exc, &end))
	523	return NULL;
	524	return Py_BuildValue("(u#n)", &res, (Py_ssize_t)1, end);
[2]	525	}
	526	else if (PyObject_IsInstance(exc, PyExc_UnicodeTranslateError)) {
[391]	527	PyObject *res;
	528	Py_UNICODE *p;
	529	if (PyUnicodeTranslateError_GetStart(exc, &start))
	530	return NULL;
	531	if (PyUnicodeTranslateError_GetEnd(exc, &end))
	532	return NULL;
	533	res = PyUnicode_FromUnicode(NULL, end-start);
	534	if (res == NULL)
	535	return NULL;
	536	for (p = PyUnicode_AS_UNICODE(res), i = start;
	537	i<end; ++p, ++i)
	538	*p = Py_UNICODE_REPLACEMENT_CHARACTER;
	539	restuple = Py_BuildValue("(On)", res, end);
	540	Py_DECREF(res);
	541	return restuple;
[2]	542	}
	543	else {
[391]	544	wrong_exception_type(exc);
	545	return NULL;
[2]	546	}
	547	}
	548
	549	PyObject PyCodec_XMLCharRefReplaceErrors(PyObject exc)
	550	{
	551	if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
[391]	552	PyObject *restuple;
	553	PyObject *object;
	554	Py_ssize_t start;
	555	Py_ssize_t end;
	556	PyObject *res;
	557	Py_UNICODE *p;
	558	Py_UNICODE *startp;
	559	Py_UNICODE *e;
	560	Py_UNICODE *outp;
	561	int ressize;
	562	if (PyUnicodeEncodeError_GetStart(exc, &start))
	563	return NULL;
	564	if (PyUnicodeEncodeError_GetEnd(exc, &end))
	565	return NULL;
	566	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
	567	return NULL;
	568	startp = PyUnicode_AS_UNICODE(object);
	569	e = startp + end;
	570	for (p = startp+start, ressize = 0; p < e;) {
	571	Py_UCS4 ch = *p++;
[2]	572	#ifndef Py_UNICODE_WIDE
[391]	573	if ((0xD800 <= ch && ch <= 0xDBFF) &&
	574	(p < e) &&
	575	(0xDC00 <= p && p <= 0xDFFF)) {
	576	ch = ((((ch & 0x03FF) << 10) \|
	577	((Py_UCS4)*p++ & 0x03FF)) + 0x10000);
	578	}
[2]	579	#endif
[391]	580	if (ch < 10)
	581	ressize += 2+1+1;
	582	else if (ch < 100)
	583	ressize += 2+2+1;
	584	else if (ch < 1000)
	585	ressize += 2+3+1;
	586	else if (ch < 10000)
	587	ressize += 2+4+1;
	588	else if (ch < 100000)
	589	ressize += 2+5+1;
	590	else if (ch < 1000000)
	591	ressize += 2+6+1;
	592	else
	593	ressize += 2+7+1;
	594	}
	595	/* allocate replacement */
	596	res = PyUnicode_FromUnicode(NULL, ressize);
	597	if (res == NULL) {
	598	Py_DECREF(object);
	599	return NULL;
	600	}
	601	/* generate replacement */
	602	for (p = startp+start, outp = PyUnicode_AS_UNICODE(res); p < e;) {
	603	int digits;
	604	int base;
	605	Py_UCS4 ch = *p++;
[2]	606	#ifndef Py_UNICODE_WIDE
[391]	607	if ((0xD800 <= ch && ch <= 0xDBFF) &&
	608	(p < startp+end) &&
	609	(0xDC00 <= p && p <= 0xDFFF)) {
	610	ch = ((((ch & 0x03FF) << 10) \|
	611	((Py_UCS4)*p++ & 0x03FF)) + 0x10000);
	612	}
[2]	613	#endif
[391]	614	*outp++ = '&';
	615	*outp++ = '#';
	616	if (ch < 10) {
	617	digits = 1;
	618	base = 1;
	619	}
	620	else if (ch < 100) {
	621	digits = 2;
	622	base = 10;
	623	}
	624	else if (ch < 1000) {
	625	digits = 3;
	626	base = 100;
	627	}
	628	else if (ch < 10000) {
	629	digits = 4;
	630	base = 1000;
	631	}
	632	else if (ch < 100000) {
	633	digits = 5;
	634	base = 10000;
	635	}
	636	else if (ch < 1000000) {
	637	digits = 6;
	638	base = 100000;
	639	}
	640	else {
	641	digits = 7;
	642	base = 1000000;
	643	}
	644	while (digits-->0) {
	645	*outp++ = '0' + ch/base;
	646	ch %= base;
	647	base /= 10;
	648	}
	649	*outp++ = ';';
	650	}
	651	restuple = Py_BuildValue("(On)", res, end);
	652	Py_DECREF(res);
	653	Py_DECREF(object);
	654	return restuple;
[2]	655	}
	656	else {
[391]	657	wrong_exception_type(exc);
	658	return NULL;
[2]	659	}
	660	}
	661
	662	static Py_UNICODE hexdigits[] = {
	663	'0', '1', '2', '3', '4', '5', '6', '7',
	664	'8', '9', 'a', 'b', 'c', 'd', 'e', 'f'
	665	};
	666
	667	PyObject PyCodec_BackslashReplaceErrors(PyObject exc)
	668	{
	669	if (PyObject_IsInstance(exc, PyExc_UnicodeEncodeError)) {
[391]	670	PyObject *restuple;
	671	PyObject *object;
	672	Py_ssize_t start;
	673	Py_ssize_t end;
	674	PyObject *res;
	675	Py_UNICODE *p;
	676	Py_UNICODE *startp;
	677	Py_UNICODE *outp;
	678	int ressize;
	679	if (PyUnicodeEncodeError_GetStart(exc, &start))
	680	return NULL;
	681	if (PyUnicodeEncodeError_GetEnd(exc, &end))
	682	return NULL;
	683	if (!(object = PyUnicodeEncodeError_GetObject(exc)))
	684	return NULL;
	685	startp = PyUnicode_AS_UNICODE(object);
	686	for (p = startp+start, ressize = 0; p < startp+end; ++p) {
[2]	687	#ifdef Py_UNICODE_WIDE
[391]	688	if (*p >= 0x00010000)
	689	ressize += 1+1+8;
	690	else
[2]	691	#endif
[391]	692	if (*p >= 0x100) {
	693	ressize += 1+1+4;
	694	}
	695	else
	696	ressize += 1+1+2;
	697	}
	698	res = PyUnicode_FromUnicode(NULL, ressize);
	699	if (res==NULL)
	700	return NULL;
	701	for (p = startp+start, outp = PyUnicode_AS_UNICODE(res);
	702	p < startp+end; ++p) {
	703	Py_UNICODE c = *p;
	704	*outp++ = '\\';
[2]	705	#ifdef Py_UNICODE_WIDE
[391]	706	if (c >= 0x00010000) {
	707	*outp++ = 'U';
	708	*outp++ = hexdigits[(c>>28)&0xf];
	709	*outp++ = hexdigits[(c>>24)&0xf];
	710	*outp++ = hexdigits[(c>>20)&0xf];
	711	*outp++ = hexdigits[(c>>16)&0xf];
	712	*outp++ = hexdigits[(c>>12)&0xf];
	713	*outp++ = hexdigits[(c>>8)&0xf];
	714	}
	715	else
[2]	716	#endif
[391]	717	if (c >= 0x100) {
	718	*outp++ = 'u';
	719	*outp++ = hexdigits[(c>>12)&0xf];
	720	*outp++ = hexdigits[(c>>8)&0xf];
	721	}
	722	else
	723	*outp++ = 'x';
	724	*outp++ = hexdigits[(c>>4)&0xf];
	725	*outp++ = hexdigits[c&0xf];
	726	}
[2]	727
[391]	728	restuple = Py_BuildValue("(On)", res, end);
	729	Py_DECREF(res);
	730	Py_DECREF(object);
	731	return restuple;
[2]	732	}
	733	else {
[391]	734	wrong_exception_type(exc);
	735	return NULL;
[2]	736	}
	737	}
	738	#endif
	739
	740	static PyObject strict_errors(PyObject self, PyObject *exc)
	741	{
	742	return PyCodec_StrictErrors(exc);
	743	}
	744
	745
	746	#ifdef Py_USING_UNICODE
	747	static PyObject ignore_errors(PyObject self, PyObject *exc)
	748	{
	749	return PyCodec_IgnoreErrors(exc);
	750	}
	751
	752
	753	static PyObject replace_errors(PyObject self, PyObject *exc)
	754	{
	755	return PyCodec_ReplaceErrors(exc);
	756	}
	757
	758
	759	static PyObject xmlcharrefreplace_errors(PyObject self, PyObject *exc)
	760	{
	761	return PyCodec_XMLCharRefReplaceErrors(exc);
	762	}
	763
	764
	765	static PyObject backslashreplace_errors(PyObject self, PyObject *exc)
	766	{
	767	return PyCodec_BackslashReplaceErrors(exc);
	768	}
	769	#endif
	770
	771	static int _PyCodecRegistry_Init(void)
	772	{
	773	static struct {
[391]	774	char *name;
	775	PyMethodDef def;
[2]	776	} methods[] =
	777	{
[391]	778	{
	779	"strict",
	780	{
	781	"strict_errors",
	782	strict_errors,
	783	METH_O,
	784	PyDoc_STR("Implements the 'strict' error handling, which "
	785	"raises a UnicodeError on coding errors.")
	786	}
	787	},
[2]	788	#ifdef Py_USING_UNICODE
[391]	789	{
	790	"ignore",
	791	{
	792	"ignore_errors",
	793	ignore_errors,
	794	METH_O,
	795	PyDoc_STR("Implements the 'ignore' error handling, which "
	796	"ignores malformed data and continues.")
	797	}
	798	},
	799	{
	800	"replace",
	801	{
	802	"replace_errors",
	803	replace_errors,
	804	METH_O,
	805	PyDoc_STR("Implements the 'replace' error handling, which "
	806	"replaces malformed data with a replacement marker.")
	807	}
	808	},
	809	{
	810	"xmlcharrefreplace",
	811	{
	812	"xmlcharrefreplace_errors",
	813	xmlcharrefreplace_errors,
	814	METH_O,
	815	PyDoc_STR("Implements the 'xmlcharrefreplace' error handling, "
	816	"which replaces an unencodable character with the "
	817	"appropriate XML character reference.")
	818	}
	819	},
	820	{
	821	"backslashreplace",
	822	{
	823	"backslashreplace_errors",
	824	backslashreplace_errors,
	825	METH_O,
	826	PyDoc_STR("Implements the 'backslashreplace' error handling, "
	827	"which replaces an unencodable character with a "
	828	"backslashed escape sequence.")
	829	}
	830	}
[2]	831	#endif
	832	};
	833
	834	PyInterpreterState *interp = PyThreadState_GET()->interp;
	835	PyObject *mod;
	836	unsigned i;
	837
	838	if (interp->codec_search_path != NULL)
[391]	839	return 0;
[2]	840
	841	interp->codec_search_path = PyList_New(0);
	842	interp->codec_search_cache = PyDict_New();
	843	interp->codec_error_registry = PyDict_New();
	844
	845	if (interp->codec_error_registry) {
[391]	846	for (i = 0; i < sizeof(methods)/sizeof(methods[0]); ++i) {
	847	PyObject *func = PyCFunction_New(&methods[i].def, NULL);
	848	int res;
	849	if (!func)
	850	Py_FatalError("can't initialize codec error registry");
	851	res = PyCodec_RegisterError(methods[i].name, func);
	852	Py_DECREF(func);
	853	if (res)
	854	Py_FatalError("can't initialize codec error registry");
	855	}
[2]	856	}
	857
	858	if (interp->codec_search_path == NULL \|\|
[391]	859	interp->codec_search_cache == NULL \|\|
	860	interp->codec_error_registry == NULL)
	861	Py_FatalError("can't initialize codec registry");
[2]	862
	863	mod = PyImport_ImportModuleLevel("encodings", NULL, NULL, NULL, 0);
	864	if (mod == NULL) {
[391]	865	if (PyErr_ExceptionMatches(PyExc_ImportError)) {
	866	/* Ignore ImportErrors... this is done so that
	867	distributions can disable the encodings package. Note
	868	that other errors are not masked, e.g. SystemErrors
	869	raised to inform the user of an error in the Python
	870	configuration are still reported back to the user. */
	871	PyErr_Clear();
	872	return 0;
	873	}
	874	return -1;
[2]	875	}
	876	Py_DECREF(mod);
	877	return 0;
	878	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Python/codecs.c

Download in other formats: