source: python/vendor/Python-2.6.5/Modules/_codecsmodule.c

Last change on this file was 2, checked in by Yuri Dario, 15 years ago

Initial import for vendor code.

  • Property svn:eol-style set to native
File size: 28.1 KB
Line 
1/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
13 lookup(encoding) -> CodecInfo object
14
15 The builtin Unicode codecs use the following interface:
16
17 <encoding>_encode(Unicode_object[,errors='strict']) ->
18 (string object, bytes consumed)
19
20 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21 (Unicode object, bytes consumed)
22
23 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
27 These <encoding>s are available: utf_8, unicode_escape,
28 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
31
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
34Copyright (c) Corporation for National Research Initiatives.
35
36 ------------------------------------------------------------------------ */
37
38#define PY_SSIZE_T_CLEAN
39#include "Python.h"
40
41/* --- Registry ----------------------------------------------------------- */
42
43PyDoc_STRVAR(register__doc__,
44"register(search_function)\n\
45\n\
46Register a codec search function. Search functions are expected to take\n\
47one argument, the encoding name in all lower case letters, and return\n\
48a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
49(or a CodecInfo object).");
50
51static
52PyObject *codec_register(PyObject *self, PyObject *search_function)
53{
54 if (PyCodec_Register(search_function))
55 return NULL;
56
57 Py_RETURN_NONE;
58}
59
60PyDoc_STRVAR(lookup__doc__,
61"lookup(encoding) -> CodecInfo\n\
62\n\
63Looks up a codec tuple in the Python codec registry and returns\n\
64a CodecInfo object.");
65
66static
67PyObject *codec_lookup(PyObject *self, PyObject *args)
68{
69 char *encoding;
70
71 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
72 return NULL;
73
74 return _PyCodec_Lookup(encoding);
75}
76
77PyDoc_STRVAR(encode__doc__,
78"encode(obj, [encoding[,errors]]) -> object\n\
79\n\
80Encodes obj using the codec registered for encoding. encoding defaults\n\
81to the default encoding. errors may be given to set a different error\n\
82handling scheme. Default is 'strict' meaning that encoding errors raise\n\
83a ValueError. Other possible values are 'ignore', 'replace' and\n\
84'xmlcharrefreplace' as well as any other name registered with\n\
85codecs.register_error that can handle ValueErrors.");
86
87static PyObject *
88codec_encode(PyObject *self, PyObject *args)
89{
90 const char *encoding = NULL;
91 const char *errors = NULL;
92 PyObject *v;
93
94 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
95 return NULL;
96
97#ifdef Py_USING_UNICODE
98 if (encoding == NULL)
99 encoding = PyUnicode_GetDefaultEncoding();
100#else
101 if (encoding == NULL) {
102 PyErr_SetString(PyExc_ValueError, "no encoding specified");
103 return NULL;
104 }
105#endif
106
107 /* Encode via the codec registry */
108 return PyCodec_Encode(v, encoding, errors);
109}
110
111PyDoc_STRVAR(decode__doc__,
112"decode(obj, [encoding[,errors]]) -> object\n\
113\n\
114Decodes obj using the codec registered for encoding. encoding defaults\n\
115to the default encoding. errors may be given to set a different error\n\
116handling scheme. Default is 'strict' meaning that encoding errors raise\n\
117a ValueError. Other possible values are 'ignore' and 'replace'\n\
118as well as any other name registered with codecs.register_error that is\n\
119able to handle ValueErrors.");
120
121static PyObject *
122codec_decode(PyObject *self, PyObject *args)
123{
124 const char *encoding = NULL;
125 const char *errors = NULL;
126 PyObject *v;
127
128 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
129 return NULL;
130
131#ifdef Py_USING_UNICODE
132 if (encoding == NULL)
133 encoding = PyUnicode_GetDefaultEncoding();
134#else
135 if (encoding == NULL) {
136 PyErr_SetString(PyExc_ValueError, "no encoding specified");
137 return NULL;
138 }
139#endif
140
141 /* Decode via the codec registry */
142 return PyCodec_Decode(v, encoding, errors);
143}
144
145/* --- Helpers ------------------------------------------------------------ */
146
147static
148PyObject *codec_tuple(PyObject *unicode,
149 Py_ssize_t len)
150{
151 PyObject *v;
152 if (unicode == NULL)
153 return NULL;
154 v = Py_BuildValue("On", unicode, len);
155 Py_DECREF(unicode);
156 return v;
157}
158
159/* --- String codecs ------------------------------------------------------ */
160static PyObject *
161escape_decode(PyObject *self,
162 PyObject *args)
163{
164 const char *errors = NULL;
165 const char *data;
166 Py_ssize_t size;
167
168 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
169 &data, &size, &errors))
170 return NULL;
171 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
172 size);
173}
174
175static PyObject *
176escape_encode(PyObject *self,
177 PyObject *args)
178{
179 PyObject *str;
180 const char *errors = NULL;
181 char *buf;
182 Py_ssize_t len;
183
184 if (!PyArg_ParseTuple(args, "O!|z:escape_encode",
185 &PyString_Type, &str, &errors))
186 return NULL;
187
188 str = PyString_Repr(str, 0);
189 if (!str)
190 return NULL;
191
192 /* The string will be quoted. Unquote, similar to unicode-escape. */
193 buf = PyString_AS_STRING (str);
194 len = PyString_GET_SIZE (str);
195 memmove(buf, buf+1, len-2);
196 if (_PyString_Resize(&str, len-2) < 0)
197 return NULL;
198
199 return codec_tuple(str, PyString_Size(str));
200}
201
202#ifdef Py_USING_UNICODE
203/* --- Decoder ------------------------------------------------------------ */
204
205static PyObject *
206unicode_internal_decode(PyObject *self,
207 PyObject *args)
208{
209 PyObject *obj;
210 const char *errors = NULL;
211 const char *data;
212 Py_ssize_t size;
213
214 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
215 &obj, &errors))
216 return NULL;
217
218 if (PyUnicode_Check(obj)) {
219 Py_INCREF(obj);
220 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
221 }
222 else {
223 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
224 return NULL;
225
226 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
227 size);
228 }
229}
230
231static PyObject *
232utf_7_decode(PyObject *self,
233 PyObject *args)
234{
235 Py_buffer pbuf;
236 const char *errors = NULL;
237 int final = 0;
238 Py_ssize_t consumed;
239 PyObject *decoded = NULL;
240
241 if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode",
242 &pbuf, &errors, &final))
243 return NULL;
244 consumed = pbuf.len;
245
246 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
247 final ? NULL : &consumed);
248 PyBuffer_Release(&pbuf);
249 if (decoded == NULL)
250 return NULL;
251 return codec_tuple(decoded, consumed);
252}
253
254static PyObject *
255utf_8_decode(PyObject *self,
256 PyObject *args)
257{
258 Py_buffer pbuf;
259 const char *errors = NULL;
260 int final = 0;
261 Py_ssize_t consumed;
262 PyObject *decoded = NULL;
263
264 if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode",
265 &pbuf, &errors, &final))
266 return NULL;
267 consumed = pbuf.len;
268
269 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
270 final ? NULL : &consumed);
271 PyBuffer_Release(&pbuf);
272 if (decoded == NULL)
273 return NULL;
274 return codec_tuple(decoded, consumed);
275}
276
277static PyObject *
278utf_16_decode(PyObject *self,
279 PyObject *args)
280{
281 Py_buffer pbuf;
282 const char *errors = NULL;
283 int byteorder = 0;
284 int final = 0;
285 Py_ssize_t consumed;
286 PyObject *decoded;
287
288 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode",
289 &pbuf, &errors, &final))
290 return NULL;
291 consumed = pbuf.len; /* This is overwritten unless final is true. */
292 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
293 &byteorder, final ? NULL : &consumed);
294 PyBuffer_Release(&pbuf);
295 if (decoded == NULL)
296 return NULL;
297 return codec_tuple(decoded, consumed);
298}
299
300static PyObject *
301utf_16_le_decode(PyObject *self,
302 PyObject *args)
303{
304 Py_buffer pbuf;
305 const char *errors = NULL;
306 int byteorder = -1;
307 int final = 0;
308 Py_ssize_t consumed;
309 PyObject *decoded = NULL;
310
311 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode",
312 &pbuf, &errors, &final))
313 return NULL;
314
315 consumed = pbuf.len; /* This is overwritten unless final is true. */
316 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
317 &byteorder, final ? NULL : &consumed);
318 PyBuffer_Release(&pbuf);
319 if (decoded == NULL)
320 return NULL;
321 return codec_tuple(decoded, consumed);
322}
323
324static PyObject *
325utf_16_be_decode(PyObject *self,
326 PyObject *args)
327{
328 Py_buffer pbuf;
329 const char *errors = NULL;
330 int byteorder = 1;
331 int final = 0;
332 Py_ssize_t consumed;
333 PyObject *decoded = NULL;
334
335 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode",
336 &pbuf, &errors, &final))
337 return NULL;
338
339 consumed = pbuf.len; /* This is overwritten unless final is true. */
340 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
341 &byteorder, final ? NULL : &consumed);
342 PyBuffer_Release(&pbuf);
343 if (decoded == NULL)
344 return NULL;
345 return codec_tuple(decoded, consumed);
346}
347
348/* This non-standard version also provides access to the byteorder
349 parameter of the builtin UTF-16 codec.
350
351 It returns a tuple (unicode, bytesread, byteorder) with byteorder
352 being the value in effect at the end of data.
353
354*/
355
356static PyObject *
357utf_16_ex_decode(PyObject *self,
358 PyObject *args)
359{
360 Py_buffer pbuf;
361 const char *errors = NULL;
362 int byteorder = 0;
363 PyObject *unicode, *tuple;
364 int final = 0;
365 Py_ssize_t consumed;
366
367 if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode",
368 &pbuf, &errors, &byteorder, &final))
369 return NULL;
370 consumed = pbuf.len; /* This is overwritten unless final is true. */
371 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
372 &byteorder, final ? NULL : &consumed);
373 PyBuffer_Release(&pbuf);
374 if (unicode == NULL)
375 return NULL;
376 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
377 Py_DECREF(unicode);
378 return tuple;
379}
380
381static PyObject *
382utf_32_decode(PyObject *self,
383 PyObject *args)
384{
385 Py_buffer pbuf;
386 const char *errors = NULL;
387 int byteorder = 0;
388 int final = 0;
389 Py_ssize_t consumed;
390 PyObject *decoded;
391
392 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode",
393 &pbuf, &errors, &final))
394 return NULL;
395 consumed = pbuf.len; /* This is overwritten unless final is true. */
396 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
397 &byteorder, final ? NULL : &consumed);
398 PyBuffer_Release(&pbuf);
399 if (decoded == NULL)
400 return NULL;
401 return codec_tuple(decoded, consumed);
402}
403
404static PyObject *
405utf_32_le_decode(PyObject *self,
406 PyObject *args)
407{
408 Py_buffer pbuf;
409 const char *errors = NULL;
410 int byteorder = -1;
411 int final = 0;
412 Py_ssize_t consumed;
413 PyObject *decoded;
414
415 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode",
416 &pbuf, &errors, &final))
417 return NULL;
418 consumed = pbuf.len; /* This is overwritten unless final is true. */
419 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
420 &byteorder, final ? NULL : &consumed);
421 PyBuffer_Release(&pbuf);
422 if (decoded == NULL)
423 return NULL;
424 return codec_tuple(decoded, consumed);
425}
426
427static PyObject *
428utf_32_be_decode(PyObject *self,
429 PyObject *args)
430{
431 Py_buffer pbuf;
432 const char *errors = NULL;
433 int byteorder = 1;
434 int final = 0;
435 Py_ssize_t consumed;
436 PyObject *decoded;
437
438 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode",
439 &pbuf, &errors, &final))
440 return NULL;
441 consumed = pbuf.len; /* This is overwritten unless final is true. */
442 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
443 &byteorder, final ? NULL : &consumed);
444 PyBuffer_Release(&pbuf);
445 if (decoded == NULL)
446 return NULL;
447 return codec_tuple(decoded, consumed);
448}
449
450/* This non-standard version also provides access to the byteorder
451 parameter of the builtin UTF-32 codec.
452
453 It returns a tuple (unicode, bytesread, byteorder) with byteorder
454 being the value in effect at the end of data.
455
456*/
457
458static PyObject *
459utf_32_ex_decode(PyObject *self,
460 PyObject *args)
461{
462 Py_buffer pbuf;
463 const char *errors = NULL;
464 int byteorder = 0;
465 PyObject *unicode, *tuple;
466 int final = 0;
467 Py_ssize_t consumed;
468
469 if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode",
470 &pbuf, &errors, &byteorder, &final))
471 return NULL;
472 consumed = pbuf.len; /* This is overwritten unless final is true. */
473 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
474 &byteorder, final ? NULL : &consumed);
475 PyBuffer_Release(&pbuf);
476 if (unicode == NULL)
477 return NULL;
478 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
479 Py_DECREF(unicode);
480 return tuple;
481}
482
483static PyObject *
484unicode_escape_decode(PyObject *self,
485 PyObject *args)
486{
487 Py_buffer pbuf;
488 const char *errors = NULL;
489 PyObject *unicode;
490
491 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
492 &pbuf, &errors))
493 return NULL;
494
495 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
496 PyBuffer_Release(&pbuf);
497 return codec_tuple(unicode, pbuf.len);
498}
499
500static PyObject *
501raw_unicode_escape_decode(PyObject *self,
502 PyObject *args)
503{
504 Py_buffer pbuf;
505 const char *errors = NULL;
506 PyObject *unicode;
507
508 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
509 &pbuf, &errors))
510 return NULL;
511
512 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
513 PyBuffer_Release(&pbuf);
514 return codec_tuple(unicode, pbuf.len);
515}
516
517static PyObject *
518latin_1_decode(PyObject *self,
519 PyObject *args)
520{
521 Py_buffer pbuf;
522 PyObject *unicode;
523 const char *errors = NULL;
524
525 if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode",
526 &pbuf, &errors))
527 return NULL;
528
529 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
530 PyBuffer_Release(&pbuf);
531 return codec_tuple(unicode, pbuf.len);
532}
533
534static PyObject *
535ascii_decode(PyObject *self,
536 PyObject *args)
537{
538 Py_buffer pbuf;
539 PyObject *unicode;
540 const char *errors = NULL;
541
542 if (!PyArg_ParseTuple(args, "s*|z:ascii_decode",
543 &pbuf, &errors))
544 return NULL;
545
546 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
547 PyBuffer_Release(&pbuf);
548 return codec_tuple(unicode, pbuf.len);
549}
550
551static PyObject *
552charmap_decode(PyObject *self,
553 PyObject *args)
554{
555 Py_buffer pbuf;
556 PyObject *unicode;
557 const char *errors = NULL;
558 PyObject *mapping = NULL;
559
560 if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode",
561 &pbuf, &errors, &mapping))
562 return NULL;
563 if (mapping == Py_None)
564 mapping = NULL;
565
566 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
567 PyBuffer_Release(&pbuf);
568 return codec_tuple(unicode, pbuf.len);
569}
570
571#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
572
573static PyObject *
574mbcs_decode(PyObject *self,
575 PyObject *args)
576{
577 Py_buffer pbuf;
578 const char *errors = NULL;
579 int final = 0;
580 Py_ssize_t consumed;
581 PyObject *decoded = NULL;
582
583 if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode",
584 &pbuf, &errors, &final))
585 return NULL;
586 consumed = pbuf.len;
587
588 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
589 final ? NULL : &consumed);
590 PyBuffer_Release(&pbuf);
591 if (decoded == NULL)
592 return NULL;
593 return codec_tuple(decoded, consumed);
594}
595
596#endif /* MS_WINDOWS */
597
598/* --- Encoder ------------------------------------------------------------ */
599
600static PyObject *
601readbuffer_encode(PyObject *self,
602 PyObject *args)
603{
604 const char *data;
605 Py_ssize_t size;
606 const char *errors = NULL;
607
608 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
609 &data, &size, &errors))
610 return NULL;
611
612 return codec_tuple(PyString_FromStringAndSize(data, size),
613 size);
614}
615
616static PyObject *
617charbuffer_encode(PyObject *self,
618 PyObject *args)
619{
620 const char *data;
621 Py_ssize_t size;
622 const char *errors = NULL;
623
624 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
625 &data, &size, &errors))
626 return NULL;
627
628 return codec_tuple(PyString_FromStringAndSize(data, size),
629 size);
630}
631
632static PyObject *
633unicode_internal_encode(PyObject *self,
634 PyObject *args)
635{
636 PyObject *obj;
637 const char *errors = NULL;
638 const char *data;
639 Py_ssize_t size;
640
641 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
642 &obj, &errors))
643 return NULL;
644
645 if (PyUnicode_Check(obj)) {
646 data = PyUnicode_AS_DATA(obj);
647 size = PyUnicode_GET_DATA_SIZE(obj);
648 return codec_tuple(PyString_FromStringAndSize(data, size),
649 size);
650 }
651 else {
652 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
653 return NULL;
654 return codec_tuple(PyString_FromStringAndSize(data, size),
655 size);
656 }
657}
658
659static PyObject *
660utf_7_encode(PyObject *self,
661 PyObject *args)
662{
663 PyObject *str, *v;
664 const char *errors = NULL;
665
666 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
667 &str, &errors))
668 return NULL;
669
670 str = PyUnicode_FromObject(str);
671 if (str == NULL)
672 return NULL;
673 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
674 PyUnicode_GET_SIZE(str),
675 0,
676 0,
677 errors),
678 PyUnicode_GET_SIZE(str));
679 Py_DECREF(str);
680 return v;
681}
682
683static PyObject *
684utf_8_encode(PyObject *self,
685 PyObject *args)
686{
687 PyObject *str, *v;
688 const char *errors = NULL;
689
690 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
691 &str, &errors))
692 return NULL;
693
694 str = PyUnicode_FromObject(str);
695 if (str == NULL)
696 return NULL;
697 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
698 PyUnicode_GET_SIZE(str),
699 errors),
700 PyUnicode_GET_SIZE(str));
701 Py_DECREF(str);
702 return v;
703}
704
705/* This version provides access to the byteorder parameter of the
706 builtin UTF-16 codecs as optional third argument. It defaults to 0
707 which means: use the native byte order and prepend the data with a
708 BOM mark.
709
710*/
711
712static PyObject *
713utf_16_encode(PyObject *self,
714 PyObject *args)
715{
716 PyObject *str, *v;
717 const char *errors = NULL;
718 int byteorder = 0;
719
720 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
721 &str, &errors, &byteorder))
722 return NULL;
723
724 str = PyUnicode_FromObject(str);
725 if (str == NULL)
726 return NULL;
727 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
728 PyUnicode_GET_SIZE(str),
729 errors,
730 byteorder),
731 PyUnicode_GET_SIZE(str));
732 Py_DECREF(str);
733 return v;
734}
735
736static PyObject *
737utf_16_le_encode(PyObject *self,
738 PyObject *args)
739{
740 PyObject *str, *v;
741 const char *errors = NULL;
742
743 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
744 &str, &errors))
745 return NULL;
746
747 str = PyUnicode_FromObject(str);
748 if (str == NULL)
749 return NULL;
750 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
751 PyUnicode_GET_SIZE(str),
752 errors,
753 -1),
754 PyUnicode_GET_SIZE(str));
755 Py_DECREF(str);
756 return v;
757}
758
759static PyObject *
760utf_16_be_encode(PyObject *self,
761 PyObject *args)
762{
763 PyObject *str, *v;
764 const char *errors = NULL;
765
766 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
767 &str, &errors))
768 return NULL;
769
770 str = PyUnicode_FromObject(str);
771 if (str == NULL)
772 return NULL;
773 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
774 PyUnicode_GET_SIZE(str),
775 errors,
776 +1),
777 PyUnicode_GET_SIZE(str));
778 Py_DECREF(str);
779 return v;
780}
781
782/* This version provides access to the byteorder parameter of the
783 builtin UTF-32 codecs as optional third argument. It defaults to 0
784 which means: use the native byte order and prepend the data with a
785 BOM mark.
786
787*/
788
789static PyObject *
790utf_32_encode(PyObject *self,
791 PyObject *args)
792{
793 PyObject *str, *v;
794 const char *errors = NULL;
795 int byteorder = 0;
796
797 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
798 &str, &errors, &byteorder))
799 return NULL;
800
801 str = PyUnicode_FromObject(str);
802 if (str == NULL)
803 return NULL;
804 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
805 PyUnicode_GET_SIZE(str),
806 errors,
807 byteorder),
808 PyUnicode_GET_SIZE(str));
809 Py_DECREF(str);
810 return v;
811}
812
813static PyObject *
814utf_32_le_encode(PyObject *self,
815 PyObject *args)
816{
817 PyObject *str, *v;
818 const char *errors = NULL;
819
820 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
821 &str, &errors))
822 return NULL;
823
824 str = PyUnicode_FromObject(str);
825 if (str == NULL)
826 return NULL;
827 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
828 PyUnicode_GET_SIZE(str),
829 errors,
830 -1),
831 PyUnicode_GET_SIZE(str));
832 Py_DECREF(str);
833 return v;
834}
835
836static PyObject *
837utf_32_be_encode(PyObject *self,
838 PyObject *args)
839{
840 PyObject *str, *v;
841 const char *errors = NULL;
842
843 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
844 &str, &errors))
845 return NULL;
846
847 str = PyUnicode_FromObject(str);
848 if (str == NULL)
849 return NULL;
850 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
851 PyUnicode_GET_SIZE(str),
852 errors,
853 +1),
854 PyUnicode_GET_SIZE(str));
855 Py_DECREF(str);
856 return v;
857}
858
859static PyObject *
860unicode_escape_encode(PyObject *self,
861 PyObject *args)
862{
863 PyObject *str, *v;
864 const char *errors = NULL;
865
866 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
867 &str, &errors))
868 return NULL;
869
870 str = PyUnicode_FromObject(str);
871 if (str == NULL)
872 return NULL;
873 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
874 PyUnicode_GET_SIZE(str)),
875 PyUnicode_GET_SIZE(str));
876 Py_DECREF(str);
877 return v;
878}
879
880static PyObject *
881raw_unicode_escape_encode(PyObject *self,
882 PyObject *args)
883{
884 PyObject *str, *v;
885 const char *errors = NULL;
886
887 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
888 &str, &errors))
889 return NULL;
890
891 str = PyUnicode_FromObject(str);
892 if (str == NULL)
893 return NULL;
894 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
895 PyUnicode_AS_UNICODE(str),
896 PyUnicode_GET_SIZE(str)),
897 PyUnicode_GET_SIZE(str));
898 Py_DECREF(str);
899 return v;
900}
901
902static PyObject *
903latin_1_encode(PyObject *self,
904 PyObject *args)
905{
906 PyObject *str, *v;
907 const char *errors = NULL;
908
909 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
910 &str, &errors))
911 return NULL;
912
913 str = PyUnicode_FromObject(str);
914 if (str == NULL)
915 return NULL;
916 v = codec_tuple(PyUnicode_EncodeLatin1(
917 PyUnicode_AS_UNICODE(str),
918 PyUnicode_GET_SIZE(str),
919 errors),
920 PyUnicode_GET_SIZE(str));
921 Py_DECREF(str);
922 return v;
923}
924
925static PyObject *
926ascii_encode(PyObject *self,
927 PyObject *args)
928{
929 PyObject *str, *v;
930 const char *errors = NULL;
931
932 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
933 &str, &errors))
934 return NULL;
935
936 str = PyUnicode_FromObject(str);
937 if (str == NULL)
938 return NULL;
939 v = codec_tuple(PyUnicode_EncodeASCII(
940 PyUnicode_AS_UNICODE(str),
941 PyUnicode_GET_SIZE(str),
942 errors),
943 PyUnicode_GET_SIZE(str));
944 Py_DECREF(str);
945 return v;
946}
947
948static PyObject *
949charmap_encode(PyObject *self,
950 PyObject *args)
951{
952 PyObject *str, *v;
953 const char *errors = NULL;
954 PyObject *mapping = NULL;
955
956 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
957 &str, &errors, &mapping))
958 return NULL;
959 if (mapping == Py_None)
960 mapping = NULL;
961
962 str = PyUnicode_FromObject(str);
963 if (str == NULL)
964 return NULL;
965 v = codec_tuple(PyUnicode_EncodeCharmap(
966 PyUnicode_AS_UNICODE(str),
967 PyUnicode_GET_SIZE(str),
968 mapping,
969 errors),
970 PyUnicode_GET_SIZE(str));
971 Py_DECREF(str);
972 return v;
973}
974
975static PyObject*
976charmap_build(PyObject *self, PyObject *args)
977{
978 PyObject *map;
979 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
980 return NULL;
981 return PyUnicode_BuildEncodingMap(map);
982}
983
984#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
985
986static PyObject *
987mbcs_encode(PyObject *self,
988 PyObject *args)
989{
990 PyObject *str, *v;
991 const char *errors = NULL;
992
993 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
994 &str, &errors))
995 return NULL;
996
997 str = PyUnicode_FromObject(str);
998 if (str == NULL)
999 return NULL;
1000 v = codec_tuple(PyUnicode_EncodeMBCS(
1001 PyUnicode_AS_UNICODE(str),
1002 PyUnicode_GET_SIZE(str),
1003 errors),
1004 PyUnicode_GET_SIZE(str));
1005 Py_DECREF(str);
1006 return v;
1007}
1008
1009#endif /* MS_WINDOWS */
1010#endif /* Py_USING_UNICODE */
1011
1012/* --- Error handler registry --------------------------------------------- */
1013
1014PyDoc_STRVAR(register_error__doc__,
1015"register_error(errors, handler)\n\
1016\n\
1017Register the specified error handler under the name\n\
1018errors. handler must be a callable object, that\n\
1019will be called with an exception instance containing\n\
1020information about the location of the encoding/decoding\n\
1021error and must return a (replacement, new position) tuple.");
1022
1023static PyObject *register_error(PyObject *self, PyObject *args)
1024{
1025 const char *name;
1026 PyObject *handler;
1027
1028 if (!PyArg_ParseTuple(args, "sO:register_error",
1029 &name, &handler))
1030 return NULL;
1031 if (PyCodec_RegisterError(name, handler))
1032 return NULL;
1033 Py_RETURN_NONE;
1034}
1035
1036PyDoc_STRVAR(lookup_error__doc__,
1037"lookup_error(errors) -> handler\n\
1038\n\
1039Return the error handler for the specified error handling name\n\
1040or raise a LookupError, if no handler exists under this name.");
1041
1042static PyObject *lookup_error(PyObject *self, PyObject *args)
1043{
1044 const char *name;
1045
1046 if (!PyArg_ParseTuple(args, "s:lookup_error",
1047 &name))
1048 return NULL;
1049 return PyCodec_LookupError(name);
1050}
1051
1052/* --- Module API --------------------------------------------------------- */
1053
1054static PyMethodDef _codecs_functions[] = {
1055 {"register", codec_register, METH_O,
1056 register__doc__},
1057 {"lookup", codec_lookup, METH_VARARGS,
1058 lookup__doc__},
1059 {"encode", codec_encode, METH_VARARGS,
1060 encode__doc__},
1061 {"decode", codec_decode, METH_VARARGS,
1062 decode__doc__},
1063 {"escape_encode", escape_encode, METH_VARARGS},
1064 {"escape_decode", escape_decode, METH_VARARGS},
1065#ifdef Py_USING_UNICODE
1066 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1067 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1068 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1069 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1070 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1071 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1072 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1073 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1074 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1075 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1076 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
1077 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1078 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1079 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1080 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1081 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1082 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1083 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
1084 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1085 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1086 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1087 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1088 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1089 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1090 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1091 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1092 {"ascii_encode", ascii_encode, METH_VARARGS},
1093 {"ascii_decode", ascii_decode, METH_VARARGS},
1094 {"charmap_encode", charmap_encode, METH_VARARGS},
1095 {"charmap_decode", charmap_decode, METH_VARARGS},
1096 {"charmap_build", charmap_build, METH_VARARGS},
1097 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
1098 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
1099#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1100 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1101 {"mbcs_decode", mbcs_decode, METH_VARARGS},
1102#endif
1103#endif /* Py_USING_UNICODE */
1104 {"register_error", register_error, METH_VARARGS,
1105 register_error__doc__},
1106 {"lookup_error", lookup_error, METH_VARARGS,
1107 lookup_error__doc__},
1108 {NULL, NULL} /* sentinel */
1109};
1110
1111PyMODINIT_FUNC
1112init_codecs(void)
1113{
1114 Py_InitModule("_codecs", _codecs_functions);
1115}
Note: See TracBrowser for help on using the repository browser.