source: python/vendor/Python-2.7.6/Modules/_codecsmodule.c

Last change on this file was 388, checked in by dmik, 11 years ago

python: Update vendor to 2.7.6.

  • Property svn:eol-style set to native
File size: 32.7 KB
Line 
1/* ------------------------------------------------------------------------
2
3 _codecs -- Provides access to the codec registry and the builtin
4 codecs.
5
6 This module should never be imported directly. The standard library
7 module "codecs" wraps this builtin module for use within Python.
8
9 The codec registry is accessible via:
10
11 register(search_function) -> None
12
13 lookup(encoding) -> CodecInfo object
14
15 The builtin Unicode codecs use the following interface:
16
17 <encoding>_encode(Unicode_object[,errors='strict']) ->
18 (string object, bytes consumed)
19
20 <encoding>_decode(char_buffer_obj[,errors='strict']) ->
21 (Unicode object, bytes consumed)
22
23 <encoding>_encode() interfaces also accept non-Unicode object as
24 input. The objects are then converted to Unicode using
25 PyUnicode_FromObject() prior to applying the conversion.
26
27 These <encoding>s are available: utf_8, unicode_escape,
28 raw_unicode_escape, unicode_internal, latin_1, ascii (7-bit),
29 mbcs (on win32).
30
31
32Written by Marc-Andre Lemburg (mal@lemburg.com).
33
34Copyright (c) Corporation for National Research Initiatives.
35
36 ------------------------------------------------------------------------ */
37
38#define PY_SSIZE_T_CLEAN
39#include "Python.h"
40
41/* --- Registry ----------------------------------------------------------- */
42
43PyDoc_STRVAR(register__doc__,
44"register(search_function)\n\
45\n\
46Register a codec search function. Search functions are expected to take\n\
47one argument, the encoding name in all lower case letters, and return\n\
48a tuple of functions (encoder, decoder, stream_reader, stream_writer)\n\
49(or a CodecInfo object).");
50
51static
52PyObject *codec_register(PyObject *self, PyObject *search_function)
53{
54 if (PyCodec_Register(search_function))
55 return NULL;
56
57 Py_RETURN_NONE;
58}
59
60PyDoc_STRVAR(lookup__doc__,
61"lookup(encoding) -> CodecInfo\n\
62\n\
63Looks up a codec tuple in the Python codec registry and returns\n\
64a CodecInfo object.");
65
66static
67PyObject *codec_lookup(PyObject *self, PyObject *args)
68{
69 char *encoding;
70
71 if (!PyArg_ParseTuple(args, "s:lookup", &encoding))
72 return NULL;
73
74 return _PyCodec_Lookup(encoding);
75}
76
77PyDoc_STRVAR(encode__doc__,
78"encode(obj, [encoding[,errors]]) -> object\n\
79\n\
80Encodes obj using the codec registered for encoding. encoding defaults\n\
81to the default encoding. errors may be given to set a different error\n\
82handling scheme. Default is 'strict' meaning that encoding errors raise\n\
83a ValueError. Other possible values are 'ignore', 'replace' and\n\
84'xmlcharrefreplace' as well as any other name registered with\n\
85codecs.register_error that can handle ValueErrors.");
86
87static PyObject *
88codec_encode(PyObject *self, PyObject *args)
89{
90 const char *encoding = NULL;
91 const char *errors = NULL;
92 PyObject *v;
93
94 if (!PyArg_ParseTuple(args, "O|ss:encode", &v, &encoding, &errors))
95 return NULL;
96
97#ifdef Py_USING_UNICODE
98 if (encoding == NULL)
99 encoding = PyUnicode_GetDefaultEncoding();
100#else
101 if (encoding == NULL) {
102 PyErr_SetString(PyExc_ValueError, "no encoding specified");
103 return NULL;
104 }
105#endif
106
107 /* Encode via the codec registry */
108 return PyCodec_Encode(v, encoding, errors);
109}
110
111PyDoc_STRVAR(decode__doc__,
112"decode(obj, [encoding[,errors]]) -> object\n\
113\n\
114Decodes obj using the codec registered for encoding. encoding defaults\n\
115to the default encoding. errors may be given to set a different error\n\
116handling scheme. Default is 'strict' meaning that encoding errors raise\n\
117a ValueError. Other possible values are 'ignore' and 'replace'\n\
118as well as any other name registered with codecs.register_error that is\n\
119able to handle ValueErrors.");
120
121static PyObject *
122codec_decode(PyObject *self, PyObject *args)
123{
124 const char *encoding = NULL;
125 const char *errors = NULL;
126 PyObject *v;
127
128 if (!PyArg_ParseTuple(args, "O|ss:decode", &v, &encoding, &errors))
129 return NULL;
130
131#ifdef Py_USING_UNICODE
132 if (encoding == NULL)
133 encoding = PyUnicode_GetDefaultEncoding();
134#else
135 if (encoding == NULL) {
136 PyErr_SetString(PyExc_ValueError, "no encoding specified");
137 return NULL;
138 }
139#endif
140
141 /* Decode via the codec registry */
142 return PyCodec_Decode(v, encoding, errors);
143}
144
145/* --- Helpers ------------------------------------------------------------ */
146
147static
148PyObject *codec_tuple(PyObject *unicode,
149 Py_ssize_t len)
150{
151 PyObject *v;
152 if (unicode == NULL)
153 return NULL;
154 v = Py_BuildValue("On", unicode, len);
155 Py_DECREF(unicode);
156 return v;
157}
158
159/* --- String codecs ------------------------------------------------------ */
160static PyObject *
161escape_decode(PyObject *self,
162 PyObject *args)
163{
164 const char *errors = NULL;
165 const char *data;
166 Py_ssize_t size;
167
168 if (!PyArg_ParseTuple(args, "s#|z:escape_decode",
169 &data, &size, &errors))
170 return NULL;
171 return codec_tuple(PyString_DecodeEscape(data, size, errors, 0, NULL),
172 size);
173}
174
175static PyObject *
176escape_encode(PyObject *self,
177 PyObject *args)
178{
179 PyObject *str;
180 const char *errors = NULL;
181 char *buf;
182 Py_ssize_t consumed, len;
183
184 if (!PyArg_ParseTuple(args, "S|z:escape_encode",
185 &str, &errors))
186 return NULL;
187
188 consumed = PyString_GET_SIZE(str);
189 str = PyString_Repr(str, 0);
190 if (!str)
191 return NULL;
192
193 /* The string will be quoted. Unquote, similar to unicode-escape. */
194 buf = PyString_AS_STRING (str);
195 len = PyString_GET_SIZE (str);
196 memmove(buf, buf+1, len-2);
197 if (_PyString_Resize(&str, len-2) < 0)
198 return NULL;
199
200 return codec_tuple(str, consumed);
201}
202
203#ifdef Py_USING_UNICODE
204/* --- Decoder ------------------------------------------------------------ */
205
206static PyObject *
207unicode_internal_decode(PyObject *self,
208 PyObject *args)
209{
210 PyObject *obj;
211 const char *errors = NULL;
212 const char *data;
213 Py_ssize_t size;
214
215 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_decode",
216 &obj, &errors))
217 return NULL;
218
219 if (PyUnicode_Check(obj)) {
220 Py_INCREF(obj);
221 return codec_tuple(obj, PyUnicode_GET_SIZE(obj));
222 }
223 else {
224 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
225 return NULL;
226
227 return codec_tuple(_PyUnicode_DecodeUnicodeInternal(data, size, errors),
228 size);
229 }
230}
231
232static PyObject *
233utf_7_decode(PyObject *self,
234 PyObject *args)
235{
236 Py_buffer pbuf;
237 const char *errors = NULL;
238 int final = 0;
239 Py_ssize_t consumed;
240 PyObject *decoded = NULL;
241
242 if (!PyArg_ParseTuple(args, "s*|zi:utf_7_decode",
243 &pbuf, &errors, &final))
244 return NULL;
245 consumed = pbuf.len;
246
247 decoded = PyUnicode_DecodeUTF7Stateful(pbuf.buf, pbuf.len, errors,
248 final ? NULL : &consumed);
249 PyBuffer_Release(&pbuf);
250 if (decoded == NULL)
251 return NULL;
252 return codec_tuple(decoded, consumed);
253}
254
255static PyObject *
256utf_8_decode(PyObject *self,
257 PyObject *args)
258{
259 Py_buffer pbuf;
260 const char *errors = NULL;
261 int final = 0;
262 Py_ssize_t consumed;
263 PyObject *decoded = NULL;
264
265 if (!PyArg_ParseTuple(args, "s*|zi:utf_8_decode",
266 &pbuf, &errors, &final))
267 return NULL;
268 consumed = pbuf.len;
269
270 decoded = PyUnicode_DecodeUTF8Stateful(pbuf.buf, pbuf.len, errors,
271 final ? NULL : &consumed);
272 PyBuffer_Release(&pbuf);
273 if (decoded == NULL)
274 return NULL;
275 return codec_tuple(decoded, consumed);
276}
277
278static PyObject *
279utf_16_decode(PyObject *self,
280 PyObject *args)
281{
282 Py_buffer pbuf;
283 const char *errors = NULL;
284 int byteorder = 0;
285 int final = 0;
286 Py_ssize_t consumed;
287 PyObject *decoded;
288
289 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_decode",
290 &pbuf, &errors, &final))
291 return NULL;
292 consumed = pbuf.len; /* This is overwritten unless final is true. */
293 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
294 &byteorder, final ? NULL : &consumed);
295 PyBuffer_Release(&pbuf);
296 if (decoded == NULL)
297 return NULL;
298 return codec_tuple(decoded, consumed);
299}
300
301static PyObject *
302utf_16_le_decode(PyObject *self,
303 PyObject *args)
304{
305 Py_buffer pbuf;
306 const char *errors = NULL;
307 int byteorder = -1;
308 int final = 0;
309 Py_ssize_t consumed;
310 PyObject *decoded = NULL;
311
312 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_le_decode",
313 &pbuf, &errors, &final))
314 return NULL;
315
316 consumed = pbuf.len; /* This is overwritten unless final is true. */
317 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
318 &byteorder, final ? NULL : &consumed);
319 PyBuffer_Release(&pbuf);
320 if (decoded == NULL)
321 return NULL;
322 return codec_tuple(decoded, consumed);
323}
324
325static PyObject *
326utf_16_be_decode(PyObject *self,
327 PyObject *args)
328{
329 Py_buffer pbuf;
330 const char *errors = NULL;
331 int byteorder = 1;
332 int final = 0;
333 Py_ssize_t consumed;
334 PyObject *decoded = NULL;
335
336 if (!PyArg_ParseTuple(args, "s*|zi:utf_16_be_decode",
337 &pbuf, &errors, &final))
338 return NULL;
339
340 consumed = pbuf.len; /* This is overwritten unless final is true. */
341 decoded = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
342 &byteorder, final ? NULL : &consumed);
343 PyBuffer_Release(&pbuf);
344 if (decoded == NULL)
345 return NULL;
346 return codec_tuple(decoded, consumed);
347}
348
349/* This non-standard version also provides access to the byteorder
350 parameter of the builtin UTF-16 codec.
351
352 It returns a tuple (unicode, bytesread, byteorder) with byteorder
353 being the value in effect at the end of data.
354
355*/
356
357static PyObject *
358utf_16_ex_decode(PyObject *self,
359 PyObject *args)
360{
361 Py_buffer pbuf;
362 const char *errors = NULL;
363 int byteorder = 0;
364 PyObject *unicode, *tuple;
365 int final = 0;
366 Py_ssize_t consumed;
367
368 if (!PyArg_ParseTuple(args, "s*|zii:utf_16_ex_decode",
369 &pbuf, &errors, &byteorder, &final))
370 return NULL;
371 consumed = pbuf.len; /* This is overwritten unless final is true. */
372 unicode = PyUnicode_DecodeUTF16Stateful(pbuf.buf, pbuf.len, errors,
373 &byteorder, final ? NULL : &consumed);
374 PyBuffer_Release(&pbuf);
375 if (unicode == NULL)
376 return NULL;
377 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
378 Py_DECREF(unicode);
379 return tuple;
380}
381
382static PyObject *
383utf_32_decode(PyObject *self,
384 PyObject *args)
385{
386 Py_buffer pbuf;
387 const char *errors = NULL;
388 int byteorder = 0;
389 int final = 0;
390 Py_ssize_t consumed;
391 PyObject *decoded;
392
393 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_decode",
394 &pbuf, &errors, &final))
395 return NULL;
396 consumed = pbuf.len; /* This is overwritten unless final is true. */
397 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
398 &byteorder, final ? NULL : &consumed);
399 PyBuffer_Release(&pbuf);
400 if (decoded == NULL)
401 return NULL;
402 return codec_tuple(decoded, consumed);
403}
404
405static PyObject *
406utf_32_le_decode(PyObject *self,
407 PyObject *args)
408{
409 Py_buffer pbuf;
410 const char *errors = NULL;
411 int byteorder = -1;
412 int final = 0;
413 Py_ssize_t consumed;
414 PyObject *decoded;
415
416 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_le_decode",
417 &pbuf, &errors, &final))
418 return NULL;
419 consumed = pbuf.len; /* This is overwritten unless final is true. */
420 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
421 &byteorder, final ? NULL : &consumed);
422 PyBuffer_Release(&pbuf);
423 if (decoded == NULL)
424 return NULL;
425 return codec_tuple(decoded, consumed);
426}
427
428static PyObject *
429utf_32_be_decode(PyObject *self,
430 PyObject *args)
431{
432 Py_buffer pbuf;
433 const char *errors = NULL;
434 int byteorder = 1;
435 int final = 0;
436 Py_ssize_t consumed;
437 PyObject *decoded;
438
439 if (!PyArg_ParseTuple(args, "s*|zi:utf_32_be_decode",
440 &pbuf, &errors, &final))
441 return NULL;
442 consumed = pbuf.len; /* This is overwritten unless final is true. */
443 decoded = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
444 &byteorder, final ? NULL : &consumed);
445 PyBuffer_Release(&pbuf);
446 if (decoded == NULL)
447 return NULL;
448 return codec_tuple(decoded, consumed);
449}
450
451/* This non-standard version also provides access to the byteorder
452 parameter of the builtin UTF-32 codec.
453
454 It returns a tuple (unicode, bytesread, byteorder) with byteorder
455 being the value in effect at the end of data.
456
457*/
458
459static PyObject *
460utf_32_ex_decode(PyObject *self,
461 PyObject *args)
462{
463 Py_buffer pbuf;
464 const char *errors = NULL;
465 int byteorder = 0;
466 PyObject *unicode, *tuple;
467 int final = 0;
468 Py_ssize_t consumed;
469
470 if (!PyArg_ParseTuple(args, "s*|zii:utf_32_ex_decode",
471 &pbuf, &errors, &byteorder, &final))
472 return NULL;
473 consumed = pbuf.len; /* This is overwritten unless final is true. */
474 unicode = PyUnicode_DecodeUTF32Stateful(pbuf.buf, pbuf.len, errors,
475 &byteorder, final ? NULL : &consumed);
476 PyBuffer_Release(&pbuf);
477 if (unicode == NULL)
478 return NULL;
479 tuple = Py_BuildValue("Oni", unicode, consumed, byteorder);
480 Py_DECREF(unicode);
481 return tuple;
482}
483
484static PyObject *
485unicode_escape_decode(PyObject *self,
486 PyObject *args)
487{
488 Py_buffer pbuf;
489 const char *errors = NULL;
490 PyObject *unicode;
491
492 if (!PyArg_ParseTuple(args, "s*|z:unicode_escape_decode",
493 &pbuf, &errors))
494 return NULL;
495
496 unicode = PyUnicode_DecodeUnicodeEscape(pbuf.buf, pbuf.len, errors);
497 PyBuffer_Release(&pbuf);
498 return codec_tuple(unicode, pbuf.len);
499}
500
501static PyObject *
502raw_unicode_escape_decode(PyObject *self,
503 PyObject *args)
504{
505 Py_buffer pbuf;
506 const char *errors = NULL;
507 PyObject *unicode;
508
509 if (!PyArg_ParseTuple(args, "s*|z:raw_unicode_escape_decode",
510 &pbuf, &errors))
511 return NULL;
512
513 unicode = PyUnicode_DecodeRawUnicodeEscape(pbuf.buf, pbuf.len, errors);
514 PyBuffer_Release(&pbuf);
515 return codec_tuple(unicode, pbuf.len);
516}
517
518static PyObject *
519latin_1_decode(PyObject *self,
520 PyObject *args)
521{
522 Py_buffer pbuf;
523 PyObject *unicode;
524 const char *errors = NULL;
525
526 if (!PyArg_ParseTuple(args, "s*|z:latin_1_decode",
527 &pbuf, &errors))
528 return NULL;
529
530 unicode = PyUnicode_DecodeLatin1(pbuf.buf, pbuf.len, errors);
531 PyBuffer_Release(&pbuf);
532 return codec_tuple(unicode, pbuf.len);
533}
534
535static PyObject *
536ascii_decode(PyObject *self,
537 PyObject *args)
538{
539 Py_buffer pbuf;
540 PyObject *unicode;
541 const char *errors = NULL;
542
543 if (!PyArg_ParseTuple(args, "s*|z:ascii_decode",
544 &pbuf, &errors))
545 return NULL;
546
547 unicode = PyUnicode_DecodeASCII(pbuf.buf, pbuf.len, errors);
548 PyBuffer_Release(&pbuf);
549 return codec_tuple(unicode, pbuf.len);
550}
551
552static PyObject *
553charmap_decode(PyObject *self,
554 PyObject *args)
555{
556 Py_buffer pbuf;
557 PyObject *unicode;
558 const char *errors = NULL;
559 PyObject *mapping = NULL;
560
561 if (!PyArg_ParseTuple(args, "s*|zO:charmap_decode",
562 &pbuf, &errors, &mapping))
563 return NULL;
564 if (mapping == Py_None)
565 mapping = NULL;
566
567 unicode = PyUnicode_DecodeCharmap(pbuf.buf, pbuf.len, mapping, errors);
568 PyBuffer_Release(&pbuf);
569 return codec_tuple(unicode, pbuf.len);
570}
571
572#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
573
574static PyObject *
575mbcs_decode(PyObject *self,
576 PyObject *args)
577{
578 Py_buffer pbuf;
579 const char *errors = NULL;
580 int final = 0;
581 Py_ssize_t consumed;
582 PyObject *decoded = NULL;
583
584 if (!PyArg_ParseTuple(args, "s*|zi:mbcs_decode",
585 &pbuf, &errors, &final))
586 return NULL;
587 consumed = pbuf.len;
588
589 decoded = PyUnicode_DecodeMBCSStateful(pbuf.buf, pbuf.len, errors,
590 final ? NULL : &consumed);
591 PyBuffer_Release(&pbuf);
592 if (decoded == NULL)
593 return NULL;
594 return codec_tuple(decoded, consumed);
595}
596
597#endif /* MS_WINDOWS */
598
599/* --- Encoder ------------------------------------------------------------ */
600
601static PyObject *
602readbuffer_encode(PyObject *self,
603 PyObject *args)
604{
605 const char *data;
606 Py_ssize_t size;
607 const char *errors = NULL;
608
609 if (!PyArg_ParseTuple(args, "s#|z:readbuffer_encode",
610 &data, &size, &errors))
611 return NULL;
612
613 return codec_tuple(PyString_FromStringAndSize(data, size),
614 size);
615}
616
617static PyObject *
618charbuffer_encode(PyObject *self,
619 PyObject *args)
620{
621 const char *data;
622 Py_ssize_t size;
623 const char *errors = NULL;
624
625 if (!PyArg_ParseTuple(args, "t#|z:charbuffer_encode",
626 &data, &size, &errors))
627 return NULL;
628
629 return codec_tuple(PyString_FromStringAndSize(data, size),
630 size);
631}
632
633static PyObject *
634unicode_internal_encode(PyObject *self,
635 PyObject *args)
636{
637 PyObject *obj;
638 const char *errors = NULL;
639 const char *data;
640 Py_ssize_t size;
641
642 if (!PyArg_ParseTuple(args, "O|z:unicode_internal_encode",
643 &obj, &errors))
644 return NULL;
645
646 if (PyUnicode_Check(obj)) {
647 data = PyUnicode_AS_DATA(obj);
648 size = PyUnicode_GET_DATA_SIZE(obj);
649 return codec_tuple(PyString_FromStringAndSize(data, size),
650 PyUnicode_GET_SIZE(obj));
651 }
652 else {
653 if (PyObject_AsReadBuffer(obj, (const void **)&data, &size))
654 return NULL;
655 return codec_tuple(PyString_FromStringAndSize(data, size),
656 size);
657 }
658}
659
660static PyObject *
661utf_7_encode(PyObject *self,
662 PyObject *args)
663{
664 PyObject *str, *v;
665 const char *errors = NULL;
666
667 if (!PyArg_ParseTuple(args, "O|z:utf_7_encode",
668 &str, &errors))
669 return NULL;
670
671 str = PyUnicode_FromObject(str);
672 if (str == NULL)
673 return NULL;
674 v = codec_tuple(PyUnicode_EncodeUTF7(PyUnicode_AS_UNICODE(str),
675 PyUnicode_GET_SIZE(str),
676 0,
677 0,
678 errors),
679 PyUnicode_GET_SIZE(str));
680 Py_DECREF(str);
681 return v;
682}
683
684static PyObject *
685utf_8_encode(PyObject *self,
686 PyObject *args)
687{
688 PyObject *str, *v;
689 const char *errors = NULL;
690
691 if (!PyArg_ParseTuple(args, "O|z:utf_8_encode",
692 &str, &errors))
693 return NULL;
694
695 str = PyUnicode_FromObject(str);
696 if (str == NULL)
697 return NULL;
698 v = codec_tuple(PyUnicode_EncodeUTF8(PyUnicode_AS_UNICODE(str),
699 PyUnicode_GET_SIZE(str),
700 errors),
701 PyUnicode_GET_SIZE(str));
702 Py_DECREF(str);
703 return v;
704}
705
706/* This version provides access to the byteorder parameter of the
707 builtin UTF-16 codecs as optional third argument. It defaults to 0
708 which means: use the native byte order and prepend the data with a
709 BOM mark.
710
711*/
712
713static PyObject *
714utf_16_encode(PyObject *self,
715 PyObject *args)
716{
717 PyObject *str, *v;
718 const char *errors = NULL;
719 int byteorder = 0;
720
721 if (!PyArg_ParseTuple(args, "O|zi:utf_16_encode",
722 &str, &errors, &byteorder))
723 return NULL;
724
725 str = PyUnicode_FromObject(str);
726 if (str == NULL)
727 return NULL;
728 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
729 PyUnicode_GET_SIZE(str),
730 errors,
731 byteorder),
732 PyUnicode_GET_SIZE(str));
733 Py_DECREF(str);
734 return v;
735}
736
737static PyObject *
738utf_16_le_encode(PyObject *self,
739 PyObject *args)
740{
741 PyObject *str, *v;
742 const char *errors = NULL;
743
744 if (!PyArg_ParseTuple(args, "O|z:utf_16_le_encode",
745 &str, &errors))
746 return NULL;
747
748 str = PyUnicode_FromObject(str);
749 if (str == NULL)
750 return NULL;
751 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
752 PyUnicode_GET_SIZE(str),
753 errors,
754 -1),
755 PyUnicode_GET_SIZE(str));
756 Py_DECREF(str);
757 return v;
758}
759
760static PyObject *
761utf_16_be_encode(PyObject *self,
762 PyObject *args)
763{
764 PyObject *str, *v;
765 const char *errors = NULL;
766
767 if (!PyArg_ParseTuple(args, "O|z:utf_16_be_encode",
768 &str, &errors))
769 return NULL;
770
771 str = PyUnicode_FromObject(str);
772 if (str == NULL)
773 return NULL;
774 v = codec_tuple(PyUnicode_EncodeUTF16(PyUnicode_AS_UNICODE(str),
775 PyUnicode_GET_SIZE(str),
776 errors,
777 +1),
778 PyUnicode_GET_SIZE(str));
779 Py_DECREF(str);
780 return v;
781}
782
783/* This version provides access to the byteorder parameter of the
784 builtin UTF-32 codecs as optional third argument. It defaults to 0
785 which means: use the native byte order and prepend the data with a
786 BOM mark.
787
788*/
789
790static PyObject *
791utf_32_encode(PyObject *self,
792 PyObject *args)
793{
794 PyObject *str, *v;
795 const char *errors = NULL;
796 int byteorder = 0;
797
798 if (!PyArg_ParseTuple(args, "O|zi:utf_32_encode",
799 &str, &errors, &byteorder))
800 return NULL;
801
802 str = PyUnicode_FromObject(str);
803 if (str == NULL)
804 return NULL;
805 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
806 PyUnicode_GET_SIZE(str),
807 errors,
808 byteorder),
809 PyUnicode_GET_SIZE(str));
810 Py_DECREF(str);
811 return v;
812}
813
814static PyObject *
815utf_32_le_encode(PyObject *self,
816 PyObject *args)
817{
818 PyObject *str, *v;
819 const char *errors = NULL;
820
821 if (!PyArg_ParseTuple(args, "O|z:utf_32_le_encode",
822 &str, &errors))
823 return NULL;
824
825 str = PyUnicode_FromObject(str);
826 if (str == NULL)
827 return NULL;
828 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
829 PyUnicode_GET_SIZE(str),
830 errors,
831 -1),
832 PyUnicode_GET_SIZE(str));
833 Py_DECREF(str);
834 return v;
835}
836
837static PyObject *
838utf_32_be_encode(PyObject *self,
839 PyObject *args)
840{
841 PyObject *str, *v;
842 const char *errors = NULL;
843
844 if (!PyArg_ParseTuple(args, "O|z:utf_32_be_encode",
845 &str, &errors))
846 return NULL;
847
848 str = PyUnicode_FromObject(str);
849 if (str == NULL)
850 return NULL;
851 v = codec_tuple(PyUnicode_EncodeUTF32(PyUnicode_AS_UNICODE(str),
852 PyUnicode_GET_SIZE(str),
853 errors,
854 +1),
855 PyUnicode_GET_SIZE(str));
856 Py_DECREF(str);
857 return v;
858}
859
860static PyObject *
861unicode_escape_encode(PyObject *self,
862 PyObject *args)
863{
864 PyObject *str, *v;
865 const char *errors = NULL;
866
867 if (!PyArg_ParseTuple(args, "O|z:unicode_escape_encode",
868 &str, &errors))
869 return NULL;
870
871 str = PyUnicode_FromObject(str);
872 if (str == NULL)
873 return NULL;
874 v = codec_tuple(PyUnicode_EncodeUnicodeEscape(PyUnicode_AS_UNICODE(str),
875 PyUnicode_GET_SIZE(str)),
876 PyUnicode_GET_SIZE(str));
877 Py_DECREF(str);
878 return v;
879}
880
881static PyObject *
882raw_unicode_escape_encode(PyObject *self,
883 PyObject *args)
884{
885 PyObject *str, *v;
886 const char *errors = NULL;
887
888 if (!PyArg_ParseTuple(args, "O|z:raw_unicode_escape_encode",
889 &str, &errors))
890 return NULL;
891
892 str = PyUnicode_FromObject(str);
893 if (str == NULL)
894 return NULL;
895 v = codec_tuple(PyUnicode_EncodeRawUnicodeEscape(
896 PyUnicode_AS_UNICODE(str),
897 PyUnicode_GET_SIZE(str)),
898 PyUnicode_GET_SIZE(str));
899 Py_DECREF(str);
900 return v;
901}
902
903static PyObject *
904latin_1_encode(PyObject *self,
905 PyObject *args)
906{
907 PyObject *str, *v;
908 const char *errors = NULL;
909
910 if (!PyArg_ParseTuple(args, "O|z:latin_1_encode",
911 &str, &errors))
912 return NULL;
913
914 str = PyUnicode_FromObject(str);
915 if (str == NULL)
916 return NULL;
917 v = codec_tuple(PyUnicode_EncodeLatin1(
918 PyUnicode_AS_UNICODE(str),
919 PyUnicode_GET_SIZE(str),
920 errors),
921 PyUnicode_GET_SIZE(str));
922 Py_DECREF(str);
923 return v;
924}
925
926static PyObject *
927ascii_encode(PyObject *self,
928 PyObject *args)
929{
930 PyObject *str, *v;
931 const char *errors = NULL;
932
933 if (!PyArg_ParseTuple(args, "O|z:ascii_encode",
934 &str, &errors))
935 return NULL;
936
937 str = PyUnicode_FromObject(str);
938 if (str == NULL)
939 return NULL;
940 v = codec_tuple(PyUnicode_EncodeASCII(
941 PyUnicode_AS_UNICODE(str),
942 PyUnicode_GET_SIZE(str),
943 errors),
944 PyUnicode_GET_SIZE(str));
945 Py_DECREF(str);
946 return v;
947}
948
949static PyObject *
950charmap_encode(PyObject *self,
951 PyObject *args)
952{
953 PyObject *str, *v;
954 const char *errors = NULL;
955 PyObject *mapping = NULL;
956
957 if (!PyArg_ParseTuple(args, "O|zO:charmap_encode",
958 &str, &errors, &mapping))
959 return NULL;
960 if (mapping == Py_None)
961 mapping = NULL;
962
963 str = PyUnicode_FromObject(str);
964 if (str == NULL)
965 return NULL;
966 v = codec_tuple(PyUnicode_EncodeCharmap(
967 PyUnicode_AS_UNICODE(str),
968 PyUnicode_GET_SIZE(str),
969 mapping,
970 errors),
971 PyUnicode_GET_SIZE(str));
972 Py_DECREF(str);
973 return v;
974}
975
976static PyObject*
977charmap_build(PyObject *self, PyObject *args)
978{
979 PyObject *map;
980 if (!PyArg_ParseTuple(args, "U:charmap_build", &map))
981 return NULL;
982 return PyUnicode_BuildEncodingMap(map);
983}
984
985#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
986
987static PyObject *
988mbcs_encode(PyObject *self,
989 PyObject *args)
990{
991 PyObject *str, *v;
992 const char *errors = NULL;
993
994 if (!PyArg_ParseTuple(args, "O|z:mbcs_encode",
995 &str, &errors))
996 return NULL;
997
998 str = PyUnicode_FromObject(str);
999 if (str == NULL)
1000 return NULL;
1001 v = codec_tuple(PyUnicode_EncodeMBCS(
1002 PyUnicode_AS_UNICODE(str),
1003 PyUnicode_GET_SIZE(str),
1004 errors),
1005 PyUnicode_GET_SIZE(str));
1006 Py_DECREF(str);
1007 return v;
1008}
1009
1010#endif /* MS_WINDOWS */
1011#endif /* Py_USING_UNICODE */
1012
1013/* --- Error handler registry --------------------------------------------- */
1014
1015PyDoc_STRVAR(register_error__doc__,
1016"register_error(errors, handler)\n\
1017\n\
1018Register the specified error handler under the name\n\
1019errors. handler must be a callable object, that\n\
1020will be called with an exception instance containing\n\
1021information about the location of the encoding/decoding\n\
1022error and must return a (replacement, new position) tuple.");
1023
1024static PyObject *register_error(PyObject *self, PyObject *args)
1025{
1026 const char *name;
1027 PyObject *handler;
1028
1029 if (!PyArg_ParseTuple(args, "sO:register_error",
1030 &name, &handler))
1031 return NULL;
1032 if (PyCodec_RegisterError(name, handler))
1033 return NULL;
1034 Py_RETURN_NONE;
1035}
1036
1037PyDoc_STRVAR(lookup_error__doc__,
1038"lookup_error(errors) -> handler\n\
1039\n\
1040Return the error handler for the specified error handling name\n\
1041or raise a LookupError, if no handler exists under this name.");
1042
1043static PyObject *lookup_error(PyObject *self, PyObject *args)
1044{
1045 const char *name;
1046
1047 if (!PyArg_ParseTuple(args, "s:lookup_error",
1048 &name))
1049 return NULL;
1050 return PyCodec_LookupError(name);
1051}
1052
1053/* --- Module API --------------------------------------------------------- */
1054
1055static PyMethodDef _codecs_functions[] = {
1056 {"register", codec_register, METH_O,
1057 register__doc__},
1058 {"lookup", codec_lookup, METH_VARARGS,
1059 lookup__doc__},
1060 {"encode", codec_encode, METH_VARARGS,
1061 encode__doc__},
1062 {"decode", codec_decode, METH_VARARGS,
1063 decode__doc__},
1064 {"escape_encode", escape_encode, METH_VARARGS},
1065 {"escape_decode", escape_decode, METH_VARARGS},
1066#ifdef Py_USING_UNICODE
1067 {"utf_8_encode", utf_8_encode, METH_VARARGS},
1068 {"utf_8_decode", utf_8_decode, METH_VARARGS},
1069 {"utf_7_encode", utf_7_encode, METH_VARARGS},
1070 {"utf_7_decode", utf_7_decode, METH_VARARGS},
1071 {"utf_16_encode", utf_16_encode, METH_VARARGS},
1072 {"utf_16_le_encode", utf_16_le_encode, METH_VARARGS},
1073 {"utf_16_be_encode", utf_16_be_encode, METH_VARARGS},
1074 {"utf_16_decode", utf_16_decode, METH_VARARGS},
1075 {"utf_16_le_decode", utf_16_le_decode, METH_VARARGS},
1076 {"utf_16_be_decode", utf_16_be_decode, METH_VARARGS},
1077 {"utf_16_ex_decode", utf_16_ex_decode, METH_VARARGS},
1078 {"utf_32_encode", utf_32_encode, METH_VARARGS},
1079 {"utf_32_le_encode", utf_32_le_encode, METH_VARARGS},
1080 {"utf_32_be_encode", utf_32_be_encode, METH_VARARGS},
1081 {"utf_32_decode", utf_32_decode, METH_VARARGS},
1082 {"utf_32_le_decode", utf_32_le_decode, METH_VARARGS},
1083 {"utf_32_be_decode", utf_32_be_decode, METH_VARARGS},
1084 {"utf_32_ex_decode", utf_32_ex_decode, METH_VARARGS},
1085 {"unicode_escape_encode", unicode_escape_encode, METH_VARARGS},
1086 {"unicode_escape_decode", unicode_escape_decode, METH_VARARGS},
1087 {"unicode_internal_encode", unicode_internal_encode, METH_VARARGS},
1088 {"unicode_internal_decode", unicode_internal_decode, METH_VARARGS},
1089 {"raw_unicode_escape_encode", raw_unicode_escape_encode, METH_VARARGS},
1090 {"raw_unicode_escape_decode", raw_unicode_escape_decode, METH_VARARGS},
1091 {"latin_1_encode", latin_1_encode, METH_VARARGS},
1092 {"latin_1_decode", latin_1_decode, METH_VARARGS},
1093 {"ascii_encode", ascii_encode, METH_VARARGS},
1094 {"ascii_decode", ascii_decode, METH_VARARGS},
1095 {"charmap_encode", charmap_encode, METH_VARARGS},
1096 {"charmap_decode", charmap_decode, METH_VARARGS},
1097 {"charmap_build", charmap_build, METH_VARARGS},
1098 {"readbuffer_encode", readbuffer_encode, METH_VARARGS},
1099 {"charbuffer_encode", charbuffer_encode, METH_VARARGS},
1100#if defined(MS_WINDOWS) && defined(HAVE_USABLE_WCHAR_T)
1101 {"mbcs_encode", mbcs_encode, METH_VARARGS},
1102 {"mbcs_decode", mbcs_decode, METH_VARARGS},
1103#endif
1104#endif /* Py_USING_UNICODE */
1105 {"register_error", register_error, METH_VARARGS,
1106 register_error__doc__},
1107 {"lookup_error", lookup_error, METH_VARARGS,
1108 lookup_error__doc__},
1109 {NULL, NULL} /* sentinel */
1110};
1111
1112PyMODINIT_FUNC
1113init_codecs(void)
1114{
1115 Py_InitModule("_codecs", _codecs_functions);
1116}
Note: See TracBrowser for help on using the repository browser.