source: vendor/python/2.5/Modules/_codecsmodule.c

Last change on this file was 3225, checked in by bird, 18 years ago

Python 2.5

File size: 23.4 KB
Line 
1/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
13 lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)
14
15 The builtin Unicode codecs use the following interface:
16
17 <encoding>_encode(Unicode_object[,errors='strict']) ->
18 (string object, bytes consumed)
19
20 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21 (Unicode object, bytes consumed)
22
23 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
27 These <encoding>s are available: utf_8, unicode_escape,
28 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
31
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
34Copyright (c) Corporation for National Research Initiatives.
35
36 ------------------------------------------------------------------------ */
37
38#define PY_SSIZE_T_CLEAN
39#include "Python.h"
40
41/* --- Registry ----------------------------------------------------------- */
42
43PyDoc_STRVAR(register__doc__,
44"register(search_function)\n\
45\n\
46Register a codec search function. Search functions are expected to take\n\
47one argument, the encoding name in all lower case letters, and return\n\
48a tuple of functions (encoder, decoder, stream_reader, stream_writer).");
49
50static
51PyObject *codec_register(PyObject *self, PyObject *search_function)
52{
53 if (PyCodec_Register(search_function))
54 return NULL;
55
56 Py_RETURN_NONE;
57}
58
59PyDoc_STRVAR(lookup__doc__,
60"lookup(encoding) -> (encoder, decoder, stream_reader, stream_writer)\n\
61\n\
62Looks up a codec tuple in the Python codec registry and returns\n\
63a tuple of functions.");
64
65static
66PyObject *codec_lookup(PyObject *self, PyObject *args)
67{
68 char *encoding;
69
70 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
71 return NULL;
72
73 return _PyCodec_Lookup(encoding);
74}
75
76PyDoc_STRVAR(encode__doc__,
77"encode(obj, [encoding[,errors]]) -> object\n\
78\n\
79Encodes obj using the codec registered for encoding. encoding defaults\n\
80to the default encoding. errors may be given to set a different error\n\
81handling scheme. Default is 'strict' meaning that encoding errors raise\n\
82a ValueError. Other possible values are 'ignore', 'replace' and\n\
83'xmlcharrefreplace' as well as any other name registered with\n\
84codecs.register_error that can handle ValueErrors.");
85
86static PyObject *
87codec_encode(PyObject *self, PyObject *args)
88{
89 const char *encoding = NULL;
90 const char *errors = NULL;
91 PyObject *v;
92
93 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
94 return NULL;
95
96#ifdef Py_USING_UNICODE
97 if (encoding == NULL)
98 encoding = PyUnicode_GetDefaultEncoding();
99#else
100 if (encoding == NULL) {
101 PyErr_SetString(PyExc_ValueError, "no encoding specified");
102 return NULL;
103 }
104#endif
105
106 /* Encode via the codec registry */
107 return PyCodec_Encode(v, encoding, errors);
108}
109
110PyDoc_STRVAR(decode__doc__,
111"decode(obj, [encoding[,errors]]) -> object\n\
112\n\
113Decodes obj using the codec registered for encoding. encoding defaults\n\
114to the default encoding. errors may be given to set a different error\n\
115handling scheme. Default is 'strict' meaning that encoding errors raise\n\
116a ValueError. Other possible values are 'ignore' and 'replace'\n\
117as well as any other name registerd with codecs.register_error that is\n\
118able to handle ValueErrors.");
119
120static PyObject *
121codec_decode(PyObject *self, PyObject *args)
122{
123 const char *encoding = NULL;
124 const char *errors = NULL;
125 PyObject *v;
126
127 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
128 return NULL;
129
130#ifdef Py_USING_UNICODE
131 if (encoding == NULL)
132 encoding = PyUnicode_GetDefaultEncoding();
133#else
134 if (encoding == NULL) {
135 PyErr_SetString(PyExc_ValueError, "no encoding specified");
136 return NULL;
137 }
138#endif
139
140 /* Decode via the codec registry */
141 return PyCodec_Decode(v, encoding, errors);
142}
143
144/* --- Helpers ------------------------------------------------------------ */
145
146static
147PyObject *codec_tuple(PyObject *unicode,
148 Py_ssize_t len)
149{
150 PyObject *v;
151 if (unicode == NULL)
152 return NULL;
153 v = Py_BuildValue("On", unicode, len);
154 Py_DECREF(unicode);
155 return v;
156}
157
158/* --- String codecs ------------------------------------------------------ */
159static PyObject *
160escape_decode(PyObject *self,
161 PyObject *args)
162{
163 const char *errors = NULL;
164 const char *data;
165 Py_ssize_t size;
166
167 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
168 &data, &size, &errors))
169 return NULL;
170 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
171 size);
172}
173
174static PyObject *
175escape_encode(PyObject *self,
176 PyObject *args)
177{
178 PyObject *str;
179 const char *errors = NULL;
180 char *buf;
181 Py_ssize_t len;
182
183 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
184 &PyString_Type, &str, &errors))
185 return NULL;
186
187 str = PyString_Repr(str, 0);
188 if (!str)
189 return NULL;
190
191 /* The string will be quoted. Unquote, similar to unicode-escape. */
192 buf = PyString_AS_STRING (str);
193 len = PyString_GET_SIZE (str);
194 memmove(buf, buf+1, len-2);
195 if (_PyString_Resize(&str, len-2) < 0)
196 return NULL;
197
198 return codec_tuple(str, PyString_Size(str));
199}
200
201#ifdef Py_USING_UNICODE
202/* --- Decoder ------------------------------------------------------------ */
203
204static PyObject *
205unicode_internal_decode(PyObject *self,
206 PyObject *args)
207{
208 PyObject *obj;
209 const char *errors = NULL;
210 const char *data;
211 Py_ssize_t size;
212
213 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
214 &obj, &errors))
215 return NULL;
216
217 if (PyUnicode_Check(obj)) {
218 Py_INCREF(obj);
219 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
220 }
221 else {
222 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
223 return NULL;
224
225 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
226 size);
227 }
228}
229
230static PyObject *
231utf_7_decode(PyObject *self,
232 PyObject *args)
233{
234 const char *data;
235 Py_ssize_t size;
236 const char *errors = NULL;
237
238 if (!PyArg_ParseTuple(args, "t#|z:utf_7_decode",
239 &data, &size, &errors))
240 return NULL;
241
242 return codec_tuple(PyUnicode_DecodeUTF7(data, size, errors),
243 size);
244}
245
246static PyObject *
247utf_8_decode(PyObject *self,
248 PyObject *args)
249{
250 const char *data;
251 Py_ssize_t size;
252 const char *errors = NULL;
253 int final = 0;
254 Py_ssize_t consumed;
255 PyObject *decoded = NULL;
256
257 if (!PyArg_ParseTuple(args, "t#|zi:utf_8_decode",
258 &data, &size, &errors, &final))
259 return NULL;
260 if (size < 0) {
261 PyErr_SetString(PyExc_ValueError, "negative argument");
262 return 0;
263 }
264 consumed = size;
265
266 decoded = PyUnicode_DecodeUTF8Stateful(data, size, errors,
267 final ? NULL : &consumed);
268 if (decoded == NULL)
269 return NULL;
270 return codec_tuple(decoded, consumed);
271}
272
273static PyObject *
274utf_16_decode(PyObject *self,
275 PyObject *args)
276{
277 const char *data;
278 Py_ssize_t size;
279 const char *errors = NULL;
280 int byteorder = 0;
281 int final = 0;
282 Py_ssize_t consumed;
283 PyObject *decoded;
284
285 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_decode",
286 &data, &size, &errors, &final))
287 return NULL;
288 if (size < 0) {
289 PyErr_SetString(PyExc_ValueError, "negative argument");
290 return 0;
291 }
292 consumed = size; /* This is overwritten unless final is true. */
293 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
294 final ? NULL : &consumed);
295 if (decoded == NULL)
296 return NULL;
297 return codec_tuple(decoded, consumed);
298}
299
300static PyObject *
301utf_16_le_decode(PyObject *self,
302 PyObject *args)
303{
304 const char *data;
305 Py_ssize_t size;
306 const char *errors = NULL;
307 int byteorder = -1;
308 int final = 0;
309 Py_ssize_t consumed;
310 PyObject *decoded = NULL;
311
312 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_le_decode",
313 &data, &size, &errors, &final))
314 return NULL;
315
316 if (size < 0) {
317 PyErr_SetString(PyExc_ValueError, "negative argument");
318 return 0;
319 }
320 consumed = size; /* This is overwritten unless final is true. */
321 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
322 &byteorder, final ? NULL : &consumed);
323 if (decoded == NULL)
324 return NULL;
325 return codec_tuple(decoded, consumed);
326
327}
328
329static PyObject *
330utf_16_be_decode(PyObject *self,
331 PyObject *args)
332{
333 const char *data;
334 Py_ssize_t size;
335 const char *errors = NULL;
336 int byteorder = 1;
337 int final = 0;
338 Py_ssize_t consumed;
339 PyObject *decoded = NULL;
340
341 if (!PyArg_ParseTuple(args, "t#|zi:utf_16_be_decode",
342 &data, &size, &errors, &final))
343 return NULL;
344 if (size < 0) {
345 PyErr_SetString(PyExc_ValueError, "negative argument");
346 return 0;
347 }
348 consumed = size; /* This is overwritten unless final is true. */
349 decoded = PyUnicode_DecodeUTF16Stateful(data, size, errors,
350 &byteorder, final ? NULL : &consumed);
351 if (decoded == NULL)
352 return NULL;
353 return codec_tuple(decoded, consumed);
354}
355
356/* This non-standard version also provides access to the byteorder
357 parameter of the builtin UTF-16 codec.
358
359 It returns a tuple (unicode, bytesread, byteorder) with byteorder
360 being the value in effect at the end of data.
361
362*/
363
364static PyObject *
365utf_16_ex_decode(PyObject *self,
366 PyObject *args)
367{
368 const char *data;
369 Py_ssize_t size;
370 const char *errors = NULL;
371 int byteorder = 0;
372 PyObject *unicode, *tuple;
373 int final = 0;
374 Py_ssize_t consumed;
375
376 if (!PyArg_ParseTuple(args, "t#|zii:utf_16_ex_decode",
377 &data, &size, &errors, &byteorder, &final))
378 return NULL;
379 if (size < 0) {
380 PyErr_SetString(PyExc_ValueError, "negative argument");
381 return 0;
382 }
383 consumed = size; /* This is overwritten unless final is true. */
384 unicode = PyUnicode_DecodeUTF16Stateful(data, size, errors, &byteorder,
385 final ? NULL : &consumed);
386 if (unicode == NULL)
387 return NULL;
388 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
389 Py_DECREF(unicode);
390 return tuple;
391}
392
393static PyObject *
394unicode_escape_decode(PyObject *self,
395 PyObject *args)
396{
397 const char *data;
398 Py_ssize_t size;
399 const char *errors = NULL;
400
401 if (!PyArg_ParseTuple(args, "t#|z:unicode_escape_decode",
402 &data, &size, &errors))
403 return NULL;
404
405 return codec_tuple(PyUnicode_DecodeUnicodeEscape(data, size, errors),
406 size);
407}
408
409static PyObject *
410raw_unicode_escape_decode(PyObject *self,
411 PyObject *args)
412{
413 const char *data;
414 Py_ssize_t size;
415 const char *errors = NULL;
416
417 if (!PyArg_ParseTuple(args, "t#|z:raw_unicode_escape_decode",
418 &data, &size, &errors))
419 return NULL;
420
421 return codec_tuple(PyUnicode_DecodeRawUnicodeEscape(data, size, errors),
422 size);
423}
424
425static PyObject *
426latin_1_decode(PyObject *self,
427 PyObject *args)
428{
429 const char *data;
430 Py_ssize_t size;
431 const char *errors = NULL;
432
433 if (!PyArg_ParseTuple(args, "t#|z:latin_1_decode",
434 &data, &size, &errors))
435 return NULL;
436
437 return codec_tuple(PyUnicode_DecodeLatin1(data, size, errors),
438 size);
439}
440
441static PyObject *
442ascii_decode(PyObject *self,
443 PyObject *args)
444{
445 const char *data;
446 Py_ssize_t size;
447 const char *errors = NULL;
448
449 if (!PyArg_ParseTuple(args, "t#|z:ascii_decode",
450 &data, &size, &errors))
451 return NULL;
452
453 return codec_tuple(PyUnicode_DecodeASCII(data, size, errors),
454 size);
455}
456
457static PyObject *
458charmap_decode(PyObject *self,
459 PyObject *args)
460{
461 const char *data;
462 Py_ssize_t size;
463 const char *errors = NULL;
464 PyObject *mapping = NULL;
465
466 if (!PyArg_ParseTuple(args, "t#|zO:charmap_decode",
467 &data, &size, &errors, &mapping))
468 return NULL;
469 if (mapping == Py_None)
470 mapping = NULL;
471
472 return codec_tuple(PyUnicode_DecodeCharmap(data, size, mapping, errors),
473 size);
474}
475
476#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
477
478static PyObject *
479mbcs_decode(PyObject *self,
480 PyObject *args)
481{
482 const char *data;
483 Py_ssize_t size, consumed;
484 const char *errors = NULL;
485 int final = 0;
486 PyObject *decoded;
487
488 if (!PyArg_ParseTuple(args, "t#|zi:mbcs_decode",
489 &data, &size, &errors, &final))
490 return NULL;
491
492 decoded = PyUnicode_DecodeMBCSStateful(
493 data, size, errors, final ? NULL : &consumed);
494 if (!decoded)
495 return NULL;
496 return codec_tuple(decoded, final ? size : consumed);
497}
498
499#endif /* MS_WINDOWS */
500
501/* --- Encoder ------------------------------------------------------------ */
502
503static PyObject *
504readbuffer_encode(PyObject *self,
505 PyObject *args)
506{
507 const char *data;
508 Py_ssize_t size;
509 const char *errors = NULL;
510
511 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
512 &data, &size, &errors))
513 return NULL;
514
515 return codec_tuple(PyString_FromStringAndSize(data, size),
516 size);
517}
518
519static PyObject *
520charbuffer_encode(PyObject *self,
521 PyObject *args)
522{
523 const char *data;
524 Py_ssize_t size;
525 const char *errors = NULL;
526
527 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
528 &data, &size, &errors))
529 return NULL;
530
531 return codec_tuple(PyString_FromStringAndSize(data, size),
532 size);
533}
534
535static PyObject *
536unicode_internal_encode(PyObject *self,
537 PyObject *args)
538{
539 PyObject *obj;
540 const char *errors = NULL;
541 const char *data;
542 Py_ssize_t size;
543
544 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
545 &obj, &errors))
546 return NULL;
547
548 if (PyUnicode_Check(obj)) {
549 data = PyUnicode_AS_DATA(obj);
550 size = PyUnicode_GET_DATA_SIZE(obj);
551 return codec_tuple(PyString_FromStringAndSize(data, size),
552 size);
553 }
554 else {
555 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
556 return NULL;
557 return codec_tuple(PyString_FromStringAndSize(data, size),
558 size);
559 }
560}
561
562static PyObject *
563utf_7_encode(PyObject *self,
564 PyObject *args)
565{
566 PyObject *str, *v;
567 const char *errors = NULL;
568
569 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
570 &str, &errors))
571 return NULL;
572
573 str = PyUnicode_FromObject(str);
574 if (str == NULL)
575 return NULL;
576 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
577 PyUnicode_GET_SIZE(str),
578 0,
579 0,
580 errors),
581 PyUnicode_GET_SIZE(str));
582 Py_DECREF(str);
583 return v;
584}
585
586static PyObject *
587utf_8_encode(PyObject *self,
588 PyObject *args)
589{
590 PyObject *str, *v;
591 const char *errors = NULL;
592
593 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
594 &str, &errors))
595 return NULL;
596
597 str = PyUnicode_FromObject(str);
598 if (str == NULL)
599 return NULL;
600 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
601 PyUnicode_GET_SIZE(str),
602 errors),
603 PyUnicode_GET_SIZE(str));
604 Py_DECREF(str);
605 return v;
606}
607
608/* This version provides access to the byteorder parameter of the
609 builtin UTF-16 codecs as optional third argument. It defaults to 0
610 which means: use the native byte order and prepend the data with a
611 BOM mark.
612
613*/
614
615static PyObject *
616utf_16_encode(PyObject *self,
617 PyObject *args)
618{
619 PyObject *str, *v;
620 const char *errors = NULL;
621 int byteorder = 0;
622
623 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
624 &str, &errors, &byteorder))
625 return NULL;
626
627 str = PyUnicode_FromObject(str);
628 if (str == NULL)
629 return NULL;
630 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
631 PyUnicode_GET_SIZE(str),
632 errors,
633 byteorder),
634 PyUnicode_GET_SIZE(str));
635 Py_DECREF(str);
636 return v;
637}
638
639static PyObject *
640utf_16_le_encode(PyObject *self,
641 PyObject *args)
642{
643 PyObject *str, *v;
644 const char *errors = NULL;
645
646 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
647 &str, &errors))
648 return NULL;
649
650 str = PyUnicode_FromObject(str);
651 if (str == NULL)
652 return NULL;
653 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
654 PyUnicode_GET_SIZE(str),
655 errors,
656 -1),
657 PyUnicode_GET_SIZE(str));
658 Py_DECREF(str);
659 return v;
660}
661
662static PyObject *
663utf_16_be_encode(PyObject *self,
664 PyObject *args)
665{
666 PyObject *str, *v;
667 const char *errors = NULL;
668
669 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
670 &str, &errors))
671 return NULL;
672
673 str = PyUnicode_FromObject(str);
674 if (str == NULL)
675 return NULL;
676 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
677 PyUnicode_GET_SIZE(str),
678 errors,
679 +1),
680 PyUnicode_GET_SIZE(str));
681 Py_DECREF(str);
682 return v;
683}
684
685static PyObject *
686unicode_escape_encode(PyObject *self,
687 PyObject *args)
688{
689 PyObject *str, *v;
690 const char *errors = NULL;
691
692 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
693 &str, &errors))
694 return NULL;
695
696 str = PyUnicode_FromObject(str);
697 if (str == NULL)
698 return NULL;
699 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
700 PyUnicode_GET_SIZE(str)),
701 PyUnicode_GET_SIZE(str));
702 Py_DECREF(str);
703 return v;
704}
705
706static PyObject *
707raw_unicode_escape_encode(PyObject *self,
708 PyObject *args)
709{
710 PyObject *str, *v;
711 const char *errors = NULL;
712
713 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
714 &str, &errors))
715 return NULL;
716
717 str = PyUnicode_FromObject(str);
718 if (str == NULL)
719 return NULL;
720 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
721 PyUnicode_AS_UNICODE(str),
722 PyUnicode_GET_SIZE(str)),
723 PyUnicode_GET_SIZE(str));
724 Py_DECREF(str);
725 return v;
726}
727
728static PyObject *
729latin_1_encode(PyObject *self,
730 PyObject *args)
731{
732 PyObject *str, *v;
733 const char *errors = NULL;
734
735 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
736 &str, &errors))
737 return NULL;
738
739 str = PyUnicode_FromObject(str);
740 if (str == NULL)
741 return NULL;
742 v = codec_tuple(PyUnicode_EncodeLatin1(
743 PyUnicode_AS_UNICODE(str),
744 PyUnicode_GET_SIZE(str),
745 errors),
746 PyUnicode_GET_SIZE(str));
747 Py_DECREF(str);
748 return v;
749}
750
751static PyObject *
752ascii_encode(PyObject *self,
753 PyObject *args)
754{
755 PyObject *str, *v;
756 const char *errors = NULL;
757
758 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
759 &str, &errors))
760 return NULL;
761
762 str = PyUnicode_FromObject(str);
763 if (str == NULL)
764 return NULL;
765 v = codec_tuple(PyUnicode_EncodeASCII(
766 PyUnicode_AS_UNICODE(str),
767 PyUnicode_GET_SIZE(str),
768 errors),
769 PyUnicode_GET_SIZE(str));
770 Py_DECREF(str);
771 return v;
772}
773
774static PyObject *
775charmap_encode(PyObject *self,
776 PyObject *args)
777{
778 PyObject *str, *v;
779 const char *errors = NULL;
780 PyObject *mapping = NULL;
781
782 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
783 &str, &errors, &mapping))
784 return NULL;
785 if (mapping == Py_None)
786 mapping = NULL;
787
788 str = PyUnicode_FromObject(str);
789 if (str == NULL)
790 return NULL;
791 v = codec_tuple(PyUnicode_EncodeCharmap(
792 PyUnicode_AS_UNICODE(str),
793 PyUnicode_GET_SIZE(str),
794 mapping,
795 errors),
796 PyUnicode_GET_SIZE(str));
797 Py_DECREF(str);
798 return v;
799}
800
801static PyObject*
802charmap_build(PyObject *self, PyObject *args)
803{
804 PyObject *map;
805 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
806 return NULL;
807 return PyUnicode_BuildEncodingMap(map);
808}
809
810#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
811
812static PyObject *
813mbcs_encode(PyObject *self,
814 PyObject *args)
815{
816 PyObject *str, *v;
817 const char *errors = NULL;
818
819 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
820 &str, &errors))
821 return NULL;
822
823 str = PyUnicode_FromObject(str);
824 if (str == NULL)
825 return NULL;
826 v = codec_tuple(PyUnicode_EncodeMBCS(
827 PyUnicode_AS_UNICODE(str),
828 PyUnicode_GET_SIZE(str),
829 errors),
830 PyUnicode_GET_SIZE(str));
831 Py_DECREF(str);
832 return v;
833}
834
835#endif /* MS_WINDOWS */
836#endif /* Py_USING_UNICODE */
837
838/* --- Error handler registry --------------------------------------------- */
839
840PyDoc_STRVAR(register_error__doc__,
841"register_error(errors, handler)\n\
842\n\
843Register the specified error handler under the name\n\
844errors. handler must be a callable object, that\n\
845will be called with an exception instance containing\n\
846information about the location of the encoding/decoding\n\
847error and must return a (replacement, new position) tuple.");
848
849static PyObject *register_error(PyObject *self, PyObject *args)
850{
851 const char *name;
852 PyObject *handler;
853
854 if (!PyArg_ParseTuple(args, "sO:register_error",
855 &name, &handler))
856 return NULL;
857 if (PyCodec_RegisterError(name, handler))
858 return NULL;
859 Py_RETURN_NONE;
860}
861
862PyDoc_STRVAR(lookup_error__doc__,
863"lookup_error(errors) -> handler\n\
864\n\
865Return the error handler for the specified error handling name\n\
866or raise a LookupError, if no handler exists under this name.");
867
868static PyObject *lookup_error(PyObject *self, PyObject *args)
869{
870 const char *name;
871
872 if (!PyArg_ParseTuple(args, "s:lookup_error",
873 &name))
874 return NULL;
875 return PyCodec_LookupError(name);
876}
877
878/* --- Module API --------------------------------------------------------- */
879
880static PyMethodDef _codecs_functions[] = {
881 {"register", codec_register, METH_O,
882 register__doc__},
883 {"lookup", codec_lookup, METH_VARARGS,
884 lookup__doc__},
885 {"encode", codec_encode, METH_VARARGS,
886 encode__doc__},
887 {"decode", codec_decode, METH_VARARGS,
888 decode__doc__},
889 {"escape_encode", escape_encode, METH_VARARGS},
890 {"escape_decode", escape_decode, METH_VARARGS},
891#ifdef Py_USING_UNICODE
892 {"utf_8_encode", utf_8_encode, METH_VARARGS},
893 {"utf_8_decode", utf_8_decode, METH_VARARGS},
894 {"utf_7_encode", utf_7_encode, METH_VARARGS},
895 {"utf_7_decode", utf_7_decode, METH_VARARGS},
896 {"utf_16_encode", utf_16_encode, METH_VARARGS},
897 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
898 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
899 {"utf_16_decode", utf_16_decode, METH_VARARGS},
900 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
901 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
902 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
903 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
904 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
905 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
906 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
907 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
908 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
909 {"latin_1_encode", latin_1_encode, METH_VARARGS},
910 {"latin_1_decode", latin_1_decode, METH_VARARGS},
911 {"ascii_encode", ascii_encode, METH_VARARGS},
912 {"ascii_decode", ascii_decode, METH_VARARGS},
913 {"charmap_encode", charmap_encode, METH_VARARGS},
914 {"charmap_decode", charmap_decode, METH_VARARGS},
915 {"charmap_build", charmap_build, METH_VARARGS},
916 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
917 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
918#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
919 {"mbcs_encode", mbcs_encode, METH_VARARGS},
920 {"mbcs_decode", mbcs_decode, METH_VARARGS},
921#endif
922#endif /* Py_USING_UNICODE */
923 {"register_error", register_error, METH_VARARGS,
924 register_error__doc__},
925 {"lookup_error", lookup_error, METH_VARARGS,
926 lookup_error__doc__},
927 {NULL, NULL} /* sentinel */
928};
929
930PyMODINIT_FUNC
931init_codecs(void)
932{
933 Py_InitModule("_codecs", _codecs_functions);
934}
Note: See TracBrowser for help on using the repository browser.