Context Navigation

binascii.c

Visit:

Last change on this file was 3225, checked in by bird, 18 years ago
Python 2.5
File size: 41.6 KB

Line
1	/*
2	** Routines to represent binary data in ASCII and vice-versa
3	**
4	** This module currently supports the following encodings:
5	** uuencode:
6	** each line encodes 45 bytes (except possibly the last)
7	** First char encodes (binary) length, rest data
8	** each char encodes 6 bits, as follows:
9	** binary: 01234567 abcdefgh ijklmnop
10	** ascii: 012345 67abcd efghij klmnop
11	** ASCII encoding method is "excess-space": 000000 is encoded as ' ', etc.
12	** short binary data is zero-extended (so the bits are always in the
13	** right place), this does not reflect in the length.
14	** base64:
15	** Line breaks are insignificant, but lines are at most 76 chars
16	** each char encodes 6 bits, in similar order as uucode/hqx. Encoding
17	** is done via a table.
18	** Short binary data is filled (in ASCII) with '='.
19	** hqx:
20	** File starts with introductory text, real data starts and ends
21	** with colons.
22	** Data consists of three similar parts: info, datafork, resourcefork.
23	** Each part is protected (at the end) with a 16-bit crc
24	** The binary data is run-length encoded, and then ascii-fied:
25	** binary: 01234567 abcdefgh ijklmnop
26	** ascii: 012345 67abcd efghij klmnop
27	** ASCII encoding is table-driven, see the code.
28	** Short binary data results in the runt ascii-byte being output with
29	** the bits in the right place.
30	**
31	** While I was reading dozens of programs that encode or decode the formats
32	** here (documentation? hihi:-) I have formulated Jansen's Observation:
33	**
34	** Programs that encode binary data in ASCII are written in
35	** such a style that they are as unreadable as possible. Devices used
36	** include unnecessary global variables, burying important tables
37	** in unrelated sourcefiles, putting functions in include files,
38	** using seemingly-descriptive variable names for different purposes,
39	** calls to empty subroutines and a host of others.
40	**
41	** I have attempted to break with this tradition, but I guess that that
42	** does make the performance sub-optimal. Oh well, too bad...
43	**
44	** Jack Jansen, CWI, July 1995.
45	**
46	** Added support for quoted-printable encoding, based on rfc 1521 et al
47	** quoted-printable encoding specifies that non printable characters (anything
48	** below 32 and above 126) be encoded as =XX where XX is the hexadecimal value
49	** of the character. It also specifies some other behavior to enable 8bit data
50	** in a mail message with little difficulty (maximum line sizes, protecting
51	** some cases of whitespace, etc).
52	**
53	** Brandon Long, September 2001.
54	*/
55
56	#define PY_SSIZE_T_CLEAN
57
58	#include "Python.h"
59
60	static PyObject *Error;
61	static PyObject *Incomplete;
62
63	/*
64	** hqx lookup table, ascii->binary.
65	*/
66
67	#define RUNCHAR 0x90
68
69	#define DONE 0x7F
70	#define SKIP 0x7E
71	#define FAIL 0x7D
72
73	static unsigned char table_a2b_hqx[256] = {
74	/* ^@ ^A ^B ^C ^D ^E ^F ^G */
75	/* 0*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
76	/* \b \t \n ^K ^L \r ^N ^O */
77	/* 1*/ FAIL, FAIL, SKIP, FAIL, FAIL, SKIP, FAIL, FAIL,
78	/* ^P ^Q ^R ^S ^T ^U ^V ^W */
79	/* 2*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
80	/* ^X ^Y ^Z ^[ ^\ ^] ^^ ^_ */
81	/* 3*/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
82	/* ! " # $ % & ' */
83	/* 4*/ FAIL, 0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06,
84	/* ( ) * + , - . / */
85	/* 5*/ 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, FAIL, FAIL,
86	/* 0 1 2 3 4 5 6 7 */
87	/* 6*/ 0x0D, 0x0E, 0x0F, 0x10, 0x11, 0x12, 0x13, FAIL,
88	/* 8 9 : ; < = > ? */
89	/* 7*/ 0x14, 0x15, DONE, FAIL, FAIL, FAIL, FAIL, FAIL,
90	/* @ A B C D E F G */
91	/* 8*/ 0x16, 0x17, 0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D,
92	/* H I J K L M N O */
93	/* 9*/ 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, FAIL,
94	/* P Q R S T U V W */
95	/10/ 0x25, 0x26, 0x27, 0x28, 0x29, 0x2A, 0x2B, FAIL,
96	/* X Y Z [ \ ] ^ _ */
97	/11/ 0x2C, 0x2D, 0x2E, 0x2F, FAIL, FAIL, FAIL, FAIL,
98	/* ` a b c d e f g */
99	/12/ 0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, FAIL,
100	/* h i j k l m n o */
101	/13/ 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, FAIL, FAIL,
102	/* p q r s t u v w */
103	/14/ 0x3D, 0x3E, 0x3F, FAIL, FAIL, FAIL, FAIL, FAIL,
104	/* x y z { \| } ~ ^? */
105	/15/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
106	/16/ FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
107	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
108	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
109	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
110	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
111	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
112	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
113	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
114	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
115	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
116	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
117	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
118	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
119	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
120	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
121	FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL, FAIL,
122	};
123
124	static unsigned char table_b2a_hqx[] =
125	"!\"#$%&'()*+,-012345689@ABCDEFGHIJKLMNPQRSTUVXYZ[`abcdefhijklmpqr";
126
127	static char table_a2b_base64[] = {
128	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
129	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
130	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,62, -1,-1,-1,63,
131	52,53,54,55, 56,57,58,59, 60,61,-1,-1, -1, 0,-1,-1, /* Note PAD->0 */
132	-1, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9,10, 11,12,13,14,
133	15,16,17,18, 19,20,21,22, 23,24,25,-1, -1,-1,-1,-1,
134	-1,26,27,28, 29,30,31,32, 33,34,35,36, 37,38,39,40,
135	41,42,43,44, 45,46,47,48, 49,50,51,-1, -1,-1,-1,-1
136	};
137
138	#define BASE64_PAD '='
139
140	/* Max binary chunk size; limited only by available memory */
141	#define BASE64_MAXBIN (INT_MAX/2 - sizeof(PyStringObject) - 3)
142
143	static unsigned char table_b2a_base64[] =
144	"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
145
146
147
148	static unsigned short crctab_hqx[256] = {
149	0x0000, 0x1021, 0x2042, 0x3063, 0x4084, 0x50a5, 0x60c6, 0x70e7,
150	0x8108, 0x9129, 0xa14a, 0xb16b, 0xc18c, 0xd1ad, 0xe1ce, 0xf1ef,
151	0x1231, 0x0210, 0x3273, 0x2252, 0x52b5, 0x4294, 0x72f7, 0x62d6,
152	0x9339, 0x8318, 0xb37b, 0xa35a, 0xd3bd, 0xc39c, 0xf3ff, 0xe3de,
153	0x2462, 0x3443, 0x0420, 0x1401, 0x64e6, 0x74c7, 0x44a4, 0x5485,
154	0xa56a, 0xb54b, 0x8528, 0x9509, 0xe5ee, 0xf5cf, 0xc5ac, 0xd58d,
155	0x3653, 0x2672, 0x1611, 0x0630, 0x76d7, 0x66f6, 0x5695, 0x46b4,
156	0xb75b, 0xa77a, 0x9719, 0x8738, 0xf7df, 0xe7fe, 0xd79d, 0xc7bc,
157	0x48c4, 0x58e5, 0x6886, 0x78a7, 0x0840, 0x1861, 0x2802, 0x3823,
158	0xc9cc, 0xd9ed, 0xe98e, 0xf9af, 0x8948, 0x9969, 0xa90a, 0xb92b,
159	0x5af5, 0x4ad4, 0x7ab7, 0x6a96, 0x1a71, 0x0a50, 0x3a33, 0x2a12,
160	0xdbfd, 0xcbdc, 0xfbbf, 0xeb9e, 0x9b79, 0x8b58, 0xbb3b, 0xab1a,
161	0x6ca6, 0x7c87, 0x4ce4, 0x5cc5, 0x2c22, 0x3c03, 0x0c60, 0x1c41,
162	0xedae, 0xfd8f, 0xcdec, 0xddcd, 0xad2a, 0xbd0b, 0x8d68, 0x9d49,
163	0x7e97, 0x6eb6, 0x5ed5, 0x4ef4, 0x3e13, 0x2e32, 0x1e51, 0x0e70,
164	0xff9f, 0xefbe, 0xdfdd, 0xcffc, 0xbf1b, 0xaf3a, 0x9f59, 0x8f78,
165	0x9188, 0x81a9, 0xb1ca, 0xa1eb, 0xd10c, 0xc12d, 0xf14e, 0xe16f,
166	0x1080, 0x00a1, 0x30c2, 0x20e3, 0x5004, 0x4025, 0x7046, 0x6067,
167	0x83b9, 0x9398, 0xa3fb, 0xb3da, 0xc33d, 0xd31c, 0xe37f, 0xf35e,
168	0x02b1, 0x1290, 0x22f3, 0x32d2, 0x4235, 0x5214, 0x6277, 0x7256,
169	0xb5ea, 0xa5cb, 0x95a8, 0x8589, 0xf56e, 0xe54f, 0xd52c, 0xc50d,
170	0x34e2, 0x24c3, 0x14a0, 0x0481, 0x7466, 0x6447, 0x5424, 0x4405,
171	0xa7db, 0xb7fa, 0x8799, 0x97b8, 0xe75f, 0xf77e, 0xc71d, 0xd73c,
172	0x26d3, 0x36f2, 0x0691, 0x16b0, 0x6657, 0x7676, 0x4615, 0x5634,
173	0xd94c, 0xc96d, 0xf90e, 0xe92f, 0x99c8, 0x89e9, 0xb98a, 0xa9ab,
174	0x5844, 0x4865, 0x7806, 0x6827, 0x18c0, 0x08e1, 0x3882, 0x28a3,
175	0xcb7d, 0xdb5c, 0xeb3f, 0xfb1e, 0x8bf9, 0x9bd8, 0xabbb, 0xbb9a,
176	0x4a75, 0x5a54, 0x6a37, 0x7a16, 0x0af1, 0x1ad0, 0x2ab3, 0x3a92,
177	0xfd2e, 0xed0f, 0xdd6c, 0xcd4d, 0xbdaa, 0xad8b, 0x9de8, 0x8dc9,
178	0x7c26, 0x6c07, 0x5c64, 0x4c45, 0x3ca2, 0x2c83, 0x1ce0, 0x0cc1,
179	0xef1f, 0xff3e, 0xcf5d, 0xdf7c, 0xaf9b, 0xbfba, 0x8fd9, 0x9ff8,
180	0x6e17, 0x7e36, 0x4e55, 0x5e74, 0x2e93, 0x3eb2, 0x0ed1, 0x1ef0,
181	};
182
183	PyDoc_STRVAR(doc_a2b_uu, "(ascii) -> bin. Decode a line of uuencoded data");
184
185	static PyObject *
186	binascii_a2b_uu(PyObject self, PyObject args)
187	{
188	unsigned char ascii_data, bin_data;
189	int leftbits = 0;
190	unsigned char this_ch;
191	unsigned int leftchar = 0;
192	PyObject *rv;
193	Py_ssize_t ascii_len, bin_len;
194
195	if ( !PyArg_ParseTuple(args, "t#:a2b_uu", &ascii_data, &ascii_len) )
196	return NULL;
197
198	/* First byte: binary data length (in bytes) */
199	bin_len = (*ascii_data++ - ' ') & 077;
200	ascii_len--;
201
202	/* Allocate the buffer */
203	if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
204	return NULL;
205	bin_data = (unsigned char *)PyString_AsString(rv);
206
207	for( ; bin_len > 0 ; ascii_len--, ascii_data++ ) {
208	/* XXX is it really best to add NULs if there's no more data */
209	this_ch = (ascii_len > 0) ? *ascii_data : 0;
210	if ( this_ch == '\n' \|\| this_ch == '\r' \|\| ascii_len <= 0) {
211	/*
212	** Whitespace. Assume some spaces got eaten at
213	** end-of-line. (We check this later)
214	*/
215	this_ch = 0;
216	} else {
217	/* Check the character for legality
218	** The 64 in stead of the expected 63 is because
219	** there are a few uuencodes out there that use
220	** '`' as zero instead of space.
221	*/
222	if ( this_ch < ' ' \|\| this_ch > (' ' + 64)) {
223	PyErr_SetString(Error, "Illegal char");
224	Py_DECREF(rv);
225	return NULL;
226	}
227	this_ch = (this_ch - ' ') & 077;
228	}
229	/*
230	** Shift it in on the low end, and see if there's
231	** a byte ready for output.
232	*/
233	leftchar = (leftchar << 6) \| (this_ch);
234	leftbits += 6;
235	if ( leftbits >= 8 ) {
236	leftbits -= 8;
237	*bin_data++ = (leftchar >> leftbits) & 0xff;
238	leftchar &= ((1 << leftbits) - 1);
239	bin_len--;
240	}
241	}
242	/*
243	** Finally, check that if there's anything left on the line
244	** that it's whitespace only.
245	*/
246	while( ascii_len-- > 0 ) {
247	this_ch = *ascii_data++;
248	/* Extra '`' may be written as padding in some cases */
249	if ( this_ch != ' ' && this_ch != ' '+64 &&
250	this_ch != '\n' && this_ch != '\r' ) {
251	PyErr_SetString(Error, "Trailing garbage");
252	Py_DECREF(rv);
253	return NULL;
254	}
255	}
256	return rv;
257	}
258
259	PyDoc_STRVAR(doc_b2a_uu, "(bin) -> ascii. Uuencode line of data");
260
261	static PyObject *
262	binascii_b2a_uu(PyObject self, PyObject args)
263	{
264	unsigned char ascii_data, bin_data;
265	int leftbits = 0;
266	unsigned char this_ch;
267	unsigned int leftchar = 0;
268	PyObject *rv;
269	Py_ssize_t bin_len;
270
271	if ( !PyArg_ParseTuple(args, "s#:b2a_uu", &bin_data, &bin_len) )
272	return NULL;
273	if ( bin_len > 45 ) {
274	/* The 45 is a limit that appears in all uuencode's */
275	PyErr_SetString(Error, "At most 45 bytes at once");
276	return NULL;
277	}
278
279	/* We're lazy and allocate to much (fixed up later) */
280	if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2+2)) == NULL )
281	return NULL;
282	ascii_data = (unsigned char *)PyString_AsString(rv);
283
284	/* Store the length */
285	*ascii_data++ = ' ' + (bin_len & 077);
286
287	for( ; bin_len > 0 \|\| leftbits != 0 ; bin_len--, bin_data++ ) {
288	/* Shift the data (or padding) into our buffer */
289	if ( bin_len > 0 ) /* Data */
290	leftchar = (leftchar << 8) \| *bin_data;
291	else /* Padding */
292	leftchar <<= 8;
293	leftbits += 8;
294
295	/* See if there are 6-bit groups ready */
296	while ( leftbits >= 6 ) {
297	this_ch = (leftchar >> (leftbits-6)) & 0x3f;
298	leftbits -= 6;
299	*ascii_data++ = this_ch + ' ';
300	}
301	}
302	ascii_data++ = '\n'; / Append a courtesy newline */
303
304	_PyString_Resize(&rv, (ascii_data -
305	(unsigned char *)PyString_AsString(rv)));
306	return rv;
307	}
308
309
310	static int
311	binascii_find_valid(unsigned char *s, Py_ssize_t slen, int num)
312	{
313	/* Finds & returns the (num+1)th
314	** valid character for base64, or -1 if none.
315	*/
316
317	int ret = -1;
318	unsigned char c, b64val;
319
320	while ((slen > 0) && (ret == -1)) {
321	c = *s;
322	b64val = table_a2b_base64[c & 0x7f];
323	if ( ((c <= 0x7f) && (b64val != (unsigned char)-1)) ) {
324	if (num == 0)
325	ret = *s;
326	num--;
327	}
328
329	s++;
330	slen--;
331	}
332	return ret;
333	}
334
335	PyDoc_STRVAR(doc_a2b_base64, "(ascii) -> bin. Decode a line of base64 data");
336
337	static PyObject *
338	binascii_a2b_base64(PyObject self, PyObject args)
339	{
340	unsigned char ascii_data, bin_data;
341	int leftbits = 0;
342	unsigned char this_ch;
343	unsigned int leftchar = 0;
344	PyObject *rv;
345	Py_ssize_t ascii_len, bin_len;
346	int quad_pos = 0;
347
348	if ( !PyArg_ParseTuple(args, "t#:a2b_base64", &ascii_data, &ascii_len) )
349	return NULL;
350
351	bin_len = ((ascii_len+3)/4)3; / Upper bound, corrected later */
352
353	/* Allocate the buffer */
354	if ( (rv=PyString_FromStringAndSize(NULL, bin_len)) == NULL )
355	return NULL;
356	bin_data = (unsigned char *)PyString_AsString(rv);
357	bin_len = 0;
358
359	for( ; ascii_len > 0; ascii_len--, ascii_data++) {
360	this_ch = *ascii_data;
361
362	if (this_ch > 0x7f \|\|
363	this_ch == '\r' \|\| this_ch == '\n' \|\| this_ch == ' ')
364	continue;
365
366	/* Check for pad sequences and ignore
367	** the invalid ones.
368	*/
369	if (this_ch == BASE64_PAD) {
370	if ( (quad_pos < 2) \|\|
371	((quad_pos == 2) &&
372	(binascii_find_valid(ascii_data, ascii_len, 1)
373	!= BASE64_PAD)) )
374	{
375	continue;
376	}
377	else {
378	/* A pad sequence means no more input.
379	** We've already interpreted the data
380	** from the quad at this point.
381	*/
382	leftbits = 0;
383	break;
384	}
385	}
386
387	this_ch = table_a2b_base64[*ascii_data];
388	if ( this_ch == (unsigned char) -1 )
389	continue;
390
391	/*
392	** Shift it in on the low end, and see if there's
393	** a byte ready for output.
394	*/
395	quad_pos = (quad_pos + 1) & 0x03;
396	leftchar = (leftchar << 6) \| (this_ch);
397	leftbits += 6;
398
399	if ( leftbits >= 8 ) {
400	leftbits -= 8;
401	*bin_data++ = (leftchar >> leftbits) & 0xff;
402	bin_len++;
403	leftchar &= ((1 << leftbits) - 1);
404	}
405	}
406
407	if (leftbits != 0) {
408	PyErr_SetString(Error, "Incorrect padding");
409	Py_DECREF(rv);
410	return NULL;
411	}
412
413	/* And set string size correctly. If the result string is empty
414	** (because the input was all invalid) return the shared empty
415	** string instead; _PyString_Resize() won't do this for us.
416	*/
417	if (bin_len > 0)
418	_PyString_Resize(&rv, bin_len);
419	else {
420	Py_DECREF(rv);
421	rv = PyString_FromString("");
422	}
423	return rv;
424	}
425
426	PyDoc_STRVAR(doc_b2a_base64, "(bin) -> ascii. Base64-code line of data");
427
428	static PyObject *
429	binascii_b2a_base64(PyObject self, PyObject args)
430	{
431	unsigned char ascii_data, bin_data;
432	int leftbits = 0;
433	unsigned char this_ch;
434	unsigned int leftchar = 0;
435	PyObject *rv;
436	Py_ssize_t bin_len;
437
438	if ( !PyArg_ParseTuple(args, "s#:b2a_base64", &bin_data, &bin_len) )
439	return NULL;
440	if ( bin_len > BASE64_MAXBIN ) {
441	PyErr_SetString(Error, "Too much data for base64 line");
442	return NULL;
443	}
444
445	/* We're lazy and allocate too much (fixed up later).
446	"+3" leaves room for up to two pad characters and a trailing
447	newline. Note that 'b' gets encoded as 'Yg==\n' (1 in, 5 out). */
448	if ( (rv=PyString_FromStringAndSize(NULL, bin_len*2 + 3)) == NULL )
449	return NULL;
450	ascii_data = (unsigned char *)PyString_AsString(rv);
451
452	for( ; bin_len > 0 ; bin_len--, bin_data++ ) {
453	/* Shift the data into our buffer */
454	leftchar = (leftchar << 8) \| *bin_data;
455	leftbits += 8;
456
457	/* See if there are 6-bit groups ready */
458	while ( leftbits >= 6 ) {
459	this_ch = (leftchar >> (leftbits-6)) & 0x3f;
460	leftbits -= 6;
461	*ascii_data++ = table_b2a_base64[this_ch];
462	}
463	}
464	if ( leftbits == 2 ) {
465	*ascii_data++ = table_b2a_base64[(leftchar&3) << 4];
466	*ascii_data++ = BASE64_PAD;
467	*ascii_data++ = BASE64_PAD;
468	} else if ( leftbits == 4 ) {
469	*ascii_data++ = table_b2a_base64[(leftchar&0xf) << 2];
470	*ascii_data++ = BASE64_PAD;
471	}
472	ascii_data++ = '\n'; / Append a courtesy newline */
473
474	_PyString_Resize(&rv, (ascii_data -
475	(unsigned char *)PyString_AsString(rv)));
476	return rv;
477	}
478
479	PyDoc_STRVAR(doc_a2b_hqx, "ascii -> bin, done. Decode .hqx coding");
480
481	static PyObject *
482	binascii_a2b_hqx(PyObject self, PyObject args)
483	{
484	unsigned char ascii_data, bin_data;
485	int leftbits = 0;
486	unsigned char this_ch;
487	unsigned int leftchar = 0;
488	PyObject *rv;
489	Py_ssize_t len;
490	int done = 0;
491
492	if ( !PyArg_ParseTuple(args, "t#:a2b_hqx", &ascii_data, &len) )
493	return NULL;
494
495	/* Allocate a string that is too big (fixed later)
496	Add two to the initial length to prevent interning which
497	would preclude subsequent resizing. */
498	if ( (rv=PyString_FromStringAndSize(NULL, len+2)) == NULL )
499	return NULL;
500	bin_data = (unsigned char *)PyString_AsString(rv);
501
502	for( ; len > 0 ; len--, ascii_data++ ) {
503	/* Get the byte and look it up */
504	this_ch = table_a2b_hqx[*ascii_data];
505	if ( this_ch == SKIP )
506	continue;
507	if ( this_ch == FAIL ) {
508	PyErr_SetString(Error, "Illegal char");
509	Py_DECREF(rv);
510	return NULL;
511	}
512	if ( this_ch == DONE ) {
513	/* The terminating colon */
514	done = 1;
515	break;
516	}
517
518	/* Shift it into the buffer and see if any bytes are ready */
519	leftchar = (leftchar << 6) \| (this_ch);
520	leftbits += 6;
521	if ( leftbits >= 8 ) {
522	leftbits -= 8;
523	*bin_data++ = (leftchar >> leftbits) & 0xff;
524	leftchar &= ((1 << leftbits) - 1);
525	}
526	}
527
528	if ( leftbits && !done ) {
529	PyErr_SetString(Incomplete,
530	"String has incomplete number of bytes");
531	Py_DECREF(rv);
532	return NULL;
533	}
534	_PyString_Resize(
535	&rv, (bin_data - (unsigned char *)PyString_AsString(rv)));
536	if (rv) {
537	PyObject *rrv = Py_BuildValue("Oi", rv, done);
538	Py_DECREF(rv);
539	return rrv;
540	}
541
542	return NULL;
543	}
544
545	PyDoc_STRVAR(doc_rlecode_hqx, "Binhex RLE-code binary data");
546
547	static PyObject *
548	binascii_rlecode_hqx(PyObject self, PyObject args)
549	{
550	unsigned char in_data, out_data;
551	PyObject *rv;
552	unsigned char ch;
553	Py_ssize_t in, inend, len;
554
555	if ( !PyArg_ParseTuple(args, "s#:rlecode_hqx", &in_data, &len) )
556	return NULL;
557
558	/* Worst case: output is twice as big as input (fixed later) */
559	if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
560	return NULL;
561	out_data = (unsigned char *)PyString_AsString(rv);
562
563	for( in=0; in<len; in++) {
564	ch = in_data[in];
565	if ( ch == RUNCHAR ) {
566	/* RUNCHAR. Escape it. */
567	*out_data++ = RUNCHAR;
568	*out_data++ = 0;
569	} else {
570	/* Check how many following are the same */
571	for(inend=in+1;
572	inend<len && in_data[inend] == ch &&
573	inend < in+255;
574	inend++) ;
575	if ( inend - in > 3 ) {
576	/* More than 3 in a row. Output RLE. */
577	*out_data++ = ch;
578	*out_data++ = RUNCHAR;
579	*out_data++ = inend-in;
580	in = inend-1;
581	} else {
582	/* Less than 3. Output the byte itself */
583	*out_data++ = ch;
584	}
585	}
586	}
587	_PyString_Resize(&rv, (out_data -
588	(unsigned char *)PyString_AsString(rv)));
589	return rv;
590	}
591
592	PyDoc_STRVAR(doc_b2a_hqx, "Encode .hqx data");
593
594	static PyObject *
595	binascii_b2a_hqx(PyObject self, PyObject args)
596	{
597	unsigned char ascii_data, bin_data;
598	int leftbits = 0;
599	unsigned char this_ch;
600	unsigned int leftchar = 0;
601	PyObject *rv;
602	Py_ssize_t len;
603
604	if ( !PyArg_ParseTuple(args, "s#:b2a_hqx", &bin_data, &len) )
605	return NULL;
606
607	/* Allocate a buffer that is at least large enough */
608	if ( (rv=PyString_FromStringAndSize(NULL, len*2+2)) == NULL )
609	return NULL;
610	ascii_data = (unsigned char *)PyString_AsString(rv);
611
612	for( ; len > 0 ; len--, bin_data++ ) {
613	/* Shift into our buffer, and output any 6bits ready */
614	leftchar = (leftchar << 8) \| *bin_data;
615	leftbits += 8;
616	while ( leftbits >= 6 ) {
617	this_ch = (leftchar >> (leftbits-6)) & 0x3f;
618	leftbits -= 6;
619	*ascii_data++ = table_b2a_hqx[this_ch];
620	}
621	}
622	/* Output a possible runt byte */
623	if ( leftbits ) {
624	leftchar <<= (6-leftbits);
625	*ascii_data++ = table_b2a_hqx[leftchar & 0x3f];
626	}
627	_PyString_Resize(&rv, (ascii_data -
628	(unsigned char *)PyString_AsString(rv)));
629	return rv;
630	}
631
632	PyDoc_STRVAR(doc_rledecode_hqx, "Decode hexbin RLE-coded string");
633
634	static PyObject *
635	binascii_rledecode_hqx(PyObject self, PyObject args)
636	{
637	unsigned char in_data, out_data;
638	unsigned char in_byte, in_repeat;
639	PyObject *rv;
640	Py_ssize_t in_len, out_len, out_len_left;
641
642	if ( !PyArg_ParseTuple(args, "s#:rledecode_hqx", &in_data, &in_len) )
643	return NULL;
644
645	/* Empty string is a special case */
646	if ( in_len == 0 )
647	return PyString_FromString("");
648
649	/* Allocate a buffer of reasonable size. Resized when needed */
650	out_len = in_len*2;
651	if ( (rv=PyString_FromStringAndSize(NULL, out_len)) == NULL )
652	return NULL;
653	out_len_left = out_len;
654	out_data = (unsigned char *)PyString_AsString(rv);
655
656	/*
657	** We need two macros here to get/put bytes and handle
658	** end-of-buffer for input and output strings.
659	*/
660	#define INBYTE(b) \
661	do { \
662	if ( --in_len < 0 ) { \
663	PyErr_SetString(Incomplete, ""); \
664	Py_DECREF(rv); \
665	return NULL; \
666	} \
667	b = *in_data++; \
668	} while(0)
669
670	#define OUTBYTE(b) \
671	do { \
672	if ( --out_len_left < 0 ) { \
673	_PyString_Resize(&rv, 2*out_len); \
674	if ( rv == NULL ) return NULL; \
675	out_data = (unsigned char *)PyString_AsString(rv) \
676	+ out_len; \
677	out_len_left = out_len-1; \
678	out_len = out_len * 2; \
679	} \
680	*out_data++ = b; \
681	} while(0)
682
683	/*
684	** Handle first byte separately (since we have to get angry
685	** in case of an orphaned RLE code).
686	*/
687	INBYTE(in_byte);
688
689	if (in_byte == RUNCHAR) {
690	INBYTE(in_repeat);
691	if (in_repeat != 0) {
692	/* Note Error, not Incomplete (which is at the end
693	** of the string only). This is a programmer error.
694	*/
695	PyErr_SetString(Error, "Orphaned RLE code at start");
696	Py_DECREF(rv);
697	return NULL;
698	}
699	OUTBYTE(RUNCHAR);
700	} else {
701	OUTBYTE(in_byte);
702	}
703
704	while( in_len > 0 ) {
705	INBYTE(in_byte);
706
707	if (in_byte == RUNCHAR) {
708	INBYTE(in_repeat);
709	if ( in_repeat == 0 ) {
710	/* Just an escaped RUNCHAR value */
711	OUTBYTE(RUNCHAR);
712	} else {
713	/* Pick up value and output a sequence of it */
714	in_byte = out_data[-1];
715	while ( --in_repeat > 0 )
716	OUTBYTE(in_byte);
717	}
718	} else {
719	/* Normal byte */
720	OUTBYTE(in_byte);
721	}
722	}
723	_PyString_Resize(&rv, (out_data -
724	(unsigned char *)PyString_AsString(rv)));
725	return rv;
726	}
727
728	PyDoc_STRVAR(doc_crc_hqx,
729	"(data, oldcrc) -> newcrc. Compute hqx CRC incrementally");
730
731	static PyObject *
732	binascii_crc_hqx(PyObject self, PyObject args)
733	{
734	unsigned char *bin_data;
735	unsigned int crc;
736	Py_ssize_t len;
737
738	if ( !PyArg_ParseTuple(args, "s#i:crc_hqx", &bin_data, &len, &crc) )
739	return NULL;
740
741	while(len--) {
742	crc=((crc<<8)&0xff00)^crctab_hqx[((crc>>8)&0xff)^*bin_data++];
743	}
744
745	return Py_BuildValue("i", crc);
746	}
747
748	PyDoc_STRVAR(doc_crc32,
749	"(data, oldcrc = 0) -> newcrc. Compute CRC-32 incrementally");
750
751	/* Crc - 32 BIT ANSI X3.66 CRC checksum files
752	Also known as: ISO 3307
753	**********************************************************************\|
754	* *\|
755	* Demonstration program to compute the 32-bit CRC used as the frame *\|
756	* check sequence in ADCCP (ANSI X3.66, also known as FIPS PUB 71 *\|
757	* and FED-STD-1003, the U.S. versions of CCITT's X.25 link-level *\|
758	* protocol). The 32-bit FCS was added via the Federal Register, *\|
759	* 1 June 1982, p.23798. I presume but don't know for certain that *\|
760	* this polynomial is or will be included in CCITT V.41, which *\|
761	* defines the 16-bit CRC (often called CRC-CCITT) polynomial. FIPS *\|
762	* PUB 78 says that the 32-bit FCS reduces otherwise undetected *\|
763	* errors by a factor of 10^-5 over 16-bit FCS. *\|
764	* *\|
765	**********************************************************************\|
766
767	Copyright (C) 1986 Gary S. Brown. You may use this program, or
768	code or tables extracted from it, as desired without restriction.
769
770	First, the polynomial itself and its table of feedback terms. The
771	polynomial is
772	X^32+X^26+X^23+X^22+X^16+X^12+X^11+X^10+X^8+X^7+X^5+X^4+X^2+X^1+X^0
773	Note that we take it "backwards" and put the highest-order term in
774	the lowest-order bit. The X^32 term is "implied"; the LSB is the
775	X^31 term, etc. The X^0 term (usually shown as "+1") results in
776	the MSB being 1.
777
778	Note that the usual hardware shift register implementation, which
779	is what we're using (we're merely optimizing it by doing eight-bit
780	chunks at a time) shifts bits into the lowest-order term. In our
781	implementation, that means shifting towards the right. Why do we
782	do it this way? Because the calculated CRC must be transmitted in
783	order from highest-order term to lowest-order term. UARTs transmit
784	characters in order from LSB to MSB. By storing the CRC this way,
785	we hand it to the UART in the order low-byte to high-byte; the UART
786	sends each low-bit to hight-bit; and the result is transmission bit
787	by bit from highest- to lowest-order term without requiring any bit
788	shuffling on our part. Reception works similarly.
789
790	The feedback terms table consists of 256, 32-bit entries. Notes:
791
792	1. The table can be generated at runtime if desired; code to do so
793	is shown later. It might not be obvious, but the feedback
794	terms simply represent the results of eight shift/xor opera-
795	tions for all combinations of data and CRC register values.
796
797	2. The CRC accumulation logic is the same for all CRC polynomials,
798	be they sixteen or thirty-two bits wide. You simply choose the
799	appropriate table. Alternatively, because the table can be
800	generated at runtime, you can start by generating the table for
801	the polynomial in question and use exactly the same "updcrc",
802	if your application needn't simultaneously handle two CRC
803	polynomials. (Note, however, that XMODEM is strange.)
804
805	3. For 16-bit CRCs, the table entries need be only 16 bits wide;
806	of course, 32-bit entries work OK if the high 16 bits are zero.
807
808	4. The values must be right-shifted by eight bits by the "updcrc"
809	logic; the shift must be unsigned (bring in zeroes). On some
810	hardware you could probably optimize the shift in assembler by
811	using byte-swap instructions.
812	********************************************************************/
813
814	static unsigned long crc_32_tab[256] = {
815	0x00000000UL, 0x77073096UL, 0xee0e612cUL, 0x990951baUL, 0x076dc419UL,
816	0x706af48fUL, 0xe963a535UL, 0x9e6495a3UL, 0x0edb8832UL, 0x79dcb8a4UL,
817	0xe0d5e91eUL, 0x97d2d988UL, 0x09b64c2bUL, 0x7eb17cbdUL, 0xe7b82d07UL,
818	0x90bf1d91UL, 0x1db71064UL, 0x6ab020f2UL, 0xf3b97148UL, 0x84be41deUL,
819	0x1adad47dUL, 0x6ddde4ebUL, 0xf4d4b551UL, 0x83d385c7UL, 0x136c9856UL,
820	0x646ba8c0UL, 0xfd62f97aUL, 0x8a65c9ecUL, 0x14015c4fUL, 0x63066cd9UL,
821	0xfa0f3d63UL, 0x8d080df5UL, 0x3b6e20c8UL, 0x4c69105eUL, 0xd56041e4UL,
822	0xa2677172UL, 0x3c03e4d1UL, 0x4b04d447UL, 0xd20d85fdUL, 0xa50ab56bUL,
823	0x35b5a8faUL, 0x42b2986cUL, 0xdbbbc9d6UL, 0xacbcf940UL, 0x32d86ce3UL,
824	0x45df5c75UL, 0xdcd60dcfUL, 0xabd13d59UL, 0x26d930acUL, 0x51de003aUL,
825	0xc8d75180UL, 0xbfd06116UL, 0x21b4f4b5UL, 0x56b3c423UL, 0xcfba9599UL,
826	0xb8bda50fUL, 0x2802b89eUL, 0x5f058808UL, 0xc60cd9b2UL, 0xb10be924UL,
827	0x2f6f7c87UL, 0x58684c11UL, 0xc1611dabUL, 0xb6662d3dUL, 0x76dc4190UL,
828	0x01db7106UL, 0x98d220bcUL, 0xefd5102aUL, 0x71b18589UL, 0x06b6b51fUL,
829	0x9fbfe4a5UL, 0xe8b8d433UL, 0x7807c9a2UL, 0x0f00f934UL, 0x9609a88eUL,
830	0xe10e9818UL, 0x7f6a0dbbUL, 0x086d3d2dUL, 0x91646c97UL, 0xe6635c01UL,
831	0x6b6b51f4UL, 0x1c6c6162UL, 0x856530d8UL, 0xf262004eUL, 0x6c0695edUL,
832	0x1b01a57bUL, 0x8208f4c1UL, 0xf50fc457UL, 0x65b0d9c6UL, 0x12b7e950UL,
833	0x8bbeb8eaUL, 0xfcb9887cUL, 0x62dd1ddfUL, 0x15da2d49UL, 0x8cd37cf3UL,
834	0xfbd44c65UL, 0x4db26158UL, 0x3ab551ceUL, 0xa3bc0074UL, 0xd4bb30e2UL,
835	0x4adfa541UL, 0x3dd895d7UL, 0xa4d1c46dUL, 0xd3d6f4fbUL, 0x4369e96aUL,
836	0x346ed9fcUL, 0xad678846UL, 0xda60b8d0UL, 0x44042d73UL, 0x33031de5UL,
837	0xaa0a4c5fUL, 0xdd0d7cc9UL, 0x5005713cUL, 0x270241aaUL, 0xbe0b1010UL,
838	0xc90c2086UL, 0x5768b525UL, 0x206f85b3UL, 0xb966d409UL, 0xce61e49fUL,
839	0x5edef90eUL, 0x29d9c998UL, 0xb0d09822UL, 0xc7d7a8b4UL, 0x59b33d17UL,
840	0x2eb40d81UL, 0xb7bd5c3bUL, 0xc0ba6cadUL, 0xedb88320UL, 0x9abfb3b6UL,
841	0x03b6e20cUL, 0x74b1d29aUL, 0xead54739UL, 0x9dd277afUL, 0x04db2615UL,
842	0x73dc1683UL, 0xe3630b12UL, 0x94643b84UL, 0x0d6d6a3eUL, 0x7a6a5aa8UL,
843	0xe40ecf0bUL, 0x9309ff9dUL, 0x0a00ae27UL, 0x7d079eb1UL, 0xf00f9344UL,
844	0x8708a3d2UL, 0x1e01f268UL, 0x6906c2feUL, 0xf762575dUL, 0x806567cbUL,
845	0x196c3671UL, 0x6e6b06e7UL, 0xfed41b76UL, 0x89d32be0UL, 0x10da7a5aUL,
846	0x67dd4accUL, 0xf9b9df6fUL, 0x8ebeeff9UL, 0x17b7be43UL, 0x60b08ed5UL,
847	0xd6d6a3e8UL, 0xa1d1937eUL, 0x38d8c2c4UL, 0x4fdff252UL, 0xd1bb67f1UL,
848	0xa6bc5767UL, 0x3fb506ddUL, 0x48b2364bUL, 0xd80d2bdaUL, 0xaf0a1b4cUL,
849	0x36034af6UL, 0x41047a60UL, 0xdf60efc3UL, 0xa867df55UL, 0x316e8eefUL,
850	0x4669be79UL, 0xcb61b38cUL, 0xbc66831aUL, 0x256fd2a0UL, 0x5268e236UL,
851	0xcc0c7795UL, 0xbb0b4703UL, 0x220216b9UL, 0x5505262fUL, 0xc5ba3bbeUL,
852	0xb2bd0b28UL, 0x2bb45a92UL, 0x5cb36a04UL, 0xc2d7ffa7UL, 0xb5d0cf31UL,
853	0x2cd99e8bUL, 0x5bdeae1dUL, 0x9b64c2b0UL, 0xec63f226UL, 0x756aa39cUL,
854	0x026d930aUL, 0x9c0906a9UL, 0xeb0e363fUL, 0x72076785UL, 0x05005713UL,
855	0x95bf4a82UL, 0xe2b87a14UL, 0x7bb12baeUL, 0x0cb61b38UL, 0x92d28e9bUL,
856	0xe5d5be0dUL, 0x7cdcefb7UL, 0x0bdbdf21UL, 0x86d3d2d4UL, 0xf1d4e242UL,
857	0x68ddb3f8UL, 0x1fda836eUL, 0x81be16cdUL, 0xf6b9265bUL, 0x6fb077e1UL,
858	0x18b74777UL, 0x88085ae6UL, 0xff0f6a70UL, 0x66063bcaUL, 0x11010b5cUL,
859	0x8f659effUL, 0xf862ae69UL, 0x616bffd3UL, 0x166ccf45UL, 0xa00ae278UL,
860	0xd70dd2eeUL, 0x4e048354UL, 0x3903b3c2UL, 0xa7672661UL, 0xd06016f7UL,
861	0x4969474dUL, 0x3e6e77dbUL, 0xaed16a4aUL, 0xd9d65adcUL, 0x40df0b66UL,
862	0x37d83bf0UL, 0xa9bcae53UL, 0xdebb9ec5UL, 0x47b2cf7fUL, 0x30b5ffe9UL,
863	0xbdbdf21cUL, 0xcabac28aUL, 0x53b39330UL, 0x24b4a3a6UL, 0xbad03605UL,
864	0xcdd70693UL, 0x54de5729UL, 0x23d967bfUL, 0xb3667a2eUL, 0xc4614ab8UL,
865	0x5d681b02UL, 0x2a6f2b94UL, 0xb40bbe37UL, 0xc30c8ea1UL, 0x5a05df1bUL,
866	0x2d02ef8dUL
867	};
868
869	static PyObject *
870	binascii_crc32(PyObject self, PyObject args)
871	{ /* By Jim Ahlstrom; All rights transferred to CNRI */
872	unsigned char *bin_data;
873	unsigned long crc = 0UL; /* initial value of CRC */
874	Py_ssize_t len;
875	long result;
876
877	if ( !PyArg_ParseTuple(args, "s#\|l:crc32", &bin_data, &len, &crc) )
878	return NULL;
879
880	crc = ~ crc;
881	#if SIZEOF_LONG > 4
882	/* only want the trailing 32 bits */
883	crc &= 0xFFFFFFFFUL;
884	#endif
885	while (len--)
886	crc = crc_32_tab[(crc ^ *bin_data++) & 0xffUL] ^ (crc >> 8);
887	/* Note: (crc >> 8) MUST zero fill on left */
888
889	result = (long)(crc ^ 0xFFFFFFFFUL);
890	#if SIZEOF_LONG > 4
891	/* Extend the sign bit. This is one way to ensure the result is the
892	* same across platforms. The other way would be to return an
893	* unbounded unsigned long, but the evidence suggests that lots of
894	* code outside this treats the result as if it were a signed 4-byte
895	* integer.
896	*/
897	result \|= -(result & (1L << 31));
898	#endif
899	return PyInt_FromLong(result);
900	}
901
902
903	static PyObject *
904	binascii_hexlify(PyObject self, PyObject args)
905	{
906	char* argbuf;
907	Py_ssize_t arglen;
908	PyObject *retval;
909	char* retbuf;
910	Py_ssize_t i, j;
911
912	if (!PyArg_ParseTuple(args, "s#:b2a_hex", &argbuf, &arglen))
913	return NULL;
914
915	retval = PyString_FromStringAndSize(NULL, arglen*2);
916	if (!retval)
917	return NULL;
918	retbuf = PyString_AsString(retval);
919	if (!retbuf)
920	goto finally;
921
922	/* make hex version of string, taken from shamodule.c */
923	for (i=j=0; i < arglen; i++) {
924	char c;
925	c = (argbuf[i] >> 4) & 0xf;
926	c = (c>9) ? c+'a'-10 : c + '0';
927	retbuf[j++] = c;
928	c = argbuf[i] & 0xf;
929	c = (c>9) ? c+'a'-10 : c + '0';
930	retbuf[j++] = c;
931	}
932	return retval;
933
934	finally:
935	Py_DECREF(retval);
936	return NULL;
937	}
938
939	PyDoc_STRVAR(doc_hexlify,
940	"b2a_hex(data) -> s; Hexadecimal representation of binary data.\n\
941	\n\
942	This function is also available as \"hexlify()\".");
943
944
945	static int
946	to_int(int c)
947	{
948	if (isdigit(c))
949	return c - '0';
950	else {
951	if (isupper(c))
952	c = tolower(c);
953	if (c >= 'a' && c <= 'f')
954	return c - 'a' + 10;
955	}
956	return -1;
957	}
958
959
960	static PyObject *
961	binascii_unhexlify(PyObject self, PyObject args)
962	{
963	char* argbuf;
964	Py_ssize_t arglen;
965	PyObject *retval;
966	char* retbuf;
967	Py_ssize_t i, j;
968
969	if (!PyArg_ParseTuple(args, "s#:a2b_hex", &argbuf, &arglen))
970	return NULL;
971
972	/* XXX What should we do about strings with an odd length? Should
973	* we add an implicit leading zero, or a trailing zero? For now,
974	* raise an exception.
975	*/
976	if (arglen % 2) {
977	PyErr_SetString(PyExc_TypeError, "Odd-length string");
978	return NULL;
979	}
980
981	retval = PyString_FromStringAndSize(NULL, (arglen/2));
982	if (!retval)
983	return NULL;
984	retbuf = PyString_AsString(retval);
985	if (!retbuf)
986	goto finally;
987
988	for (i=j=0; i < arglen; i += 2) {
989	int top = to_int(Py_CHARMASK(argbuf[i]));
990	int bot = to_int(Py_CHARMASK(argbuf[i+1]));
991	if (top == -1 \|\| bot == -1) {
992	PyErr_SetString(PyExc_TypeError,
993	"Non-hexadecimal digit found");
994	goto finally;
995	}
996	retbuf[j++] = (top << 4) + bot;
997	}
998	return retval;
999
1000	finally:
1001	Py_DECREF(retval);
1002	return NULL;
1003	}
1004
1005	PyDoc_STRVAR(doc_unhexlify,
1006	"a2b_hex(hexstr) -> s; Binary data of hexadecimal representation.\n\
1007	\n\
1008	hexstr must contain an even number of hex digits (upper or lower case).\n\
1009	This function is also available as \"unhexlify()\"");
1010
1011	static int table_hex[128] = {
1012	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1013	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1014	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1015	0, 1, 2, 3, 4, 5, 6, 7, 8, 9,-1,-1, -1,-1,-1,-1,
1016	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1017	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1018	-1,10,11,12, 13,14,15,-1, -1,-1,-1,-1, -1,-1,-1,-1,
1019	-1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1, -1,-1,-1,-1
1020	};
1021
1022	#define hexval(c) table_hex[(unsigned int)(c)]
1023
1024	#define MAXLINESIZE 76
1025
1026	PyDoc_STRVAR(doc_a2b_qp, "Decode a string of qp-encoded data");
1027
1028	static PyObject*
1029	binascii_a2b_qp(PyObject self, PyObject args, PyObject *kwargs)
1030	{
1031	Py_ssize_t in, out;
1032	char ch;
1033	unsigned char data, odata;
1034	Py_ssize_t datalen = 0;
1035	PyObject *rv;
1036	static char *kwlist[] = {"data", "header", NULL};
1037	int header = 0;
1038
1039	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#\|i", kwlist, &data,
1040	&datalen, &header))
1041	return NULL;
1042
1043	/* We allocate the output same size as input, this is overkill.
1044	* The previous implementation used calloc() so we'll zero out the
1045	* memory here too, since PyMem_Malloc() does not guarantee that.
1046	*/
1047	odata = (unsigned char *) PyMem_Malloc(datalen);
1048	if (odata == NULL) {
1049	PyErr_NoMemory();
1050	return NULL;
1051	}
1052	memset(odata, 0, datalen);
1053
1054	in = out = 0;
1055	while (in < datalen) {
1056	if (data[in] == '=') {
1057	in++;
1058	if (in >= datalen) break;
1059	/* Soft line breaks */
1060	if ((data[in] == '\n') \|\| (data[in] == '\r') \|\|
1061	(data[in] == ' ') \|\| (data[in] == '\t')) {
1062	if (data[in] != '\n') {
1063	while (in < datalen && data[in] != '\n') in++;
1064	}
1065	if (in < datalen) in++;
1066	}
1067	else if (data[in] == '=') {
1068	/* broken case from broken python qp */
1069	odata[out++] = '=';
1070	in++;
1071	}
1072	else if (((data[in] >= 'A' && data[in] <= 'F') \|\|
1073	(data[in] >= 'a' && data[in] <= 'f') \|\|
1074	(data[in] >= '0' && data[in] <= '9')) &&
1075	((data[in+1] >= 'A' && data[in+1] <= 'F') \|\|
1076	(data[in+1] >= 'a' && data[in+1] <= 'f') \|\|
1077	(data[in+1] >= '0' && data[in+1] <= '9'))) {
1078	/* hexval */
1079	ch = hexval(data[in]) << 4;
1080	in++;
1081	ch \|= hexval(data[in]);
1082	in++;
1083	odata[out++] = ch;
1084	}
1085	else {
1086	odata[out++] = '=';
1087	}
1088	}
1089	else if (header && data[in] == '_') {
1090	odata[out++] = ' ';
1091	in++;
1092	}
1093	else {
1094	odata[out] = data[in];
1095	in++;
1096	out++;
1097	}
1098	}
1099	if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1100	PyMem_Free(odata);
1101	return NULL;
1102	}
1103	PyMem_Free(odata);
1104	return rv;
1105	}
1106
1107	static int
1108	to_hex (unsigned char ch, unsigned char *s)
1109	{
1110	unsigned int uvalue = ch;
1111
1112	s[1] = "0123456789ABCDEF"[uvalue % 16];
1113	uvalue = (uvalue / 16);
1114	s[0] = "0123456789ABCDEF"[uvalue % 16];
1115	return 0;
1116	}
1117
1118	PyDoc_STRVAR(doc_b2a_qp,
1119	"b2a_qp(data, quotetabs=0, istext=1, header=0) -> s; \n\
1120	Encode a string using quoted-printable encoding. \n\
1121	\n\
1122	On encoding, when istext is set, newlines are not encoded, and white \n\
1123	space at end of lines is. When istext is not set, \\r and \\n (CR/LF) are \n\
1124	both encoded. When quotetabs is set, space and tabs are encoded.");
1125
1126	/* XXX: This is ridiculously complicated to be backward compatible
1127	* (mostly) with the quopri module. It doesn't re-create the quopri
1128	* module bug where text ending in CRLF has the CR encoded */
1129	static PyObject*
1130	binascii_b2a_qp (PyObject self, PyObject args, PyObject *kwargs)
1131	{
1132	Py_ssize_t in, out;
1133	unsigned char data, odata;
1134	Py_ssize_t datalen = 0, odatalen = 0;
1135	PyObject *rv;
1136	unsigned int linelen = 0;
1137	static char *kwlist[] = {"data", "quotetabs", "istext",
1138	"header", NULL};
1139	int istext = 1;
1140	int quotetabs = 0;
1141	int header = 0;
1142	unsigned char ch;
1143	int crlf = 0;
1144	unsigned char *p;
1145
1146	if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s#\|iii", kwlist, &data,
1147	&datalen, &quotetabs, &istext, &header))
1148	return NULL;
1149
1150	/* See if this string is using CRLF line ends */
1151	/* XXX: this function has the side effect of converting all of
1152	* the end of lines to be the same depending on this detection
1153	* here */
1154	p = (unsigned char ) strchr((char )data, '\n');
1155	if ((p != NULL) && (p > data) && (*(p-1) == '\r'))
1156	crlf = 1;
1157
1158	/* First, scan to see how many characters need to be encoded */
1159	in = 0;
1160	while (in < datalen) {
1161	if ((data[in] > 126) \|\|
1162	(data[in] == '=') \|\|
1163	(header && data[in] == '_') \|\|
1164	((data[in] == '.') && (linelen == 1)) \|\|
1165	(!istext && ((data[in] == '\r') \|\| (data[in] == '\n'))) \|\|
1166	((data[in] == '\t' \|\| data[in] == ' ') && (in + 1 == datalen)) \|\|
1167	((data[in] < 33) &&
1168	(data[in] != '\r') && (data[in] != '\n') &&
1169	(quotetabs && ((data[in] != '\t') \|\| (data[in] != ' ')))))
1170	{
1171	if ((linelen + 3) >= MAXLINESIZE) {
1172	linelen = 0;
1173	if (crlf)
1174	odatalen += 3;
1175	else
1176	odatalen += 2;
1177	}
1178	linelen += 3;
1179	odatalen += 3;
1180	in++;
1181	}
1182	else {
1183	if (istext &&
1184	((data[in] == '\n') \|\|
1185	((in+1 < datalen) && (data[in] == '\r') &&
1186	(data[in+1] == '\n'))))
1187	{
1188	linelen = 0;
1189	/* Protect against whitespace on end of line */
1190	if (in && ((data[in-1] == ' ') \|\| (data[in-1] == '\t')))
1191	odatalen += 2;
1192	if (crlf)
1193	odatalen += 2;
1194	else
1195	odatalen += 1;
1196	if (data[in] == '\r')
1197	in += 2;
1198	else
1199	in++;
1200	}
1201	else {
1202	if ((in + 1 != datalen) &&
1203	(data[in+1] != '\n') &&
1204	(linelen + 1) >= MAXLINESIZE) {
1205	linelen = 0;
1206	if (crlf)
1207	odatalen += 3;
1208	else
1209	odatalen += 2;
1210	}
1211	linelen++;
1212	odatalen++;
1213	in++;
1214	}
1215	}
1216	}
1217
1218	/* We allocate the output same size as input, this is overkill.
1219	* The previous implementation used calloc() so we'll zero out the
1220	* memory here too, since PyMem_Malloc() does not guarantee that.
1221	*/
1222	odata = (unsigned char *) PyMem_Malloc(odatalen);
1223	if (odata == NULL) {
1224	PyErr_NoMemory();
1225	return NULL;
1226	}
1227	memset(odata, 0, odatalen);
1228
1229	in = out = linelen = 0;
1230	while (in < datalen) {
1231	if ((data[in] > 126) \|\|
1232	(data[in] == '=') \|\|
1233	(header && data[in] == '_') \|\|
1234	((data[in] == '.') && (linelen == 1)) \|\|
1235	(!istext && ((data[in] == '\r') \|\| (data[in] == '\n'))) \|\|
1236	((data[in] == '\t' \|\| data[in] == ' ') && (in + 1 == datalen)) \|\|
1237	((data[in] < 33) &&
1238	(data[in] != '\r') && (data[in] != '\n') &&
1239	(quotetabs && ((data[in] != '\t') \|\| (data[in] != ' ')))))
1240	{
1241	if ((linelen + 3 )>= MAXLINESIZE) {
1242	odata[out++] = '=';
1243	if (crlf) odata[out++] = '\r';
1244	odata[out++] = '\n';
1245	linelen = 0;
1246	}
1247	odata[out++] = '=';
1248	to_hex(data[in], &odata[out]);
1249	out += 2;
1250	in++;
1251	linelen += 3;
1252	}
1253	else {
1254	if (istext &&
1255	((data[in] == '\n') \|\|
1256	((in+1 < datalen) && (data[in] == '\r') &&
1257	(data[in+1] == '\n'))))
1258	{
1259	linelen = 0;
1260	/* Protect against whitespace on end of line */
1261	if (out && ((odata[out-1] == ' ') \|\| (odata[out-1] == '\t'))) {
1262	ch = odata[out-1];
1263	odata[out-1] = '=';
1264	to_hex(ch, &odata[out]);
1265	out += 2;
1266	}
1267
1268	if (crlf) odata[out++] = '\r';
1269	odata[out++] = '\n';
1270	if (data[in] == '\r')
1271	in += 2;
1272	else
1273	in++;
1274	}
1275	else {
1276	if ((in + 1 != datalen) &&
1277	(data[in+1] != '\n') &&
1278	(linelen + 1) >= MAXLINESIZE) {
1279	odata[out++] = '=';
1280	if (crlf) odata[out++] = '\r';
1281	odata[out++] = '\n';
1282	linelen = 0;
1283	}
1284	linelen++;
1285	if (header && data[in] == ' ') {
1286	odata[out++] = '_';
1287	in++;
1288	}
1289	else {
1290	odata[out++] = data[in++];
1291	}
1292	}
1293	}
1294	}
1295	if ((rv = PyString_FromStringAndSize((char *)odata, out)) == NULL) {
1296	PyMem_Free(odata);
1297	return NULL;
1298	}
1299	PyMem_Free(odata);
1300	return rv;
1301	}
1302
1303	/* List of functions defined in the module */
1304
1305	static struct PyMethodDef binascii_module_methods[] = {
1306	{"a2b_uu", binascii_a2b_uu, METH_VARARGS, doc_a2b_uu},
1307	{"b2a_uu", binascii_b2a_uu, METH_VARARGS, doc_b2a_uu},
1308	{"a2b_base64", binascii_a2b_base64, METH_VARARGS, doc_a2b_base64},
1309	{"b2a_base64", binascii_b2a_base64, METH_VARARGS, doc_b2a_base64},
1310	{"a2b_hqx", binascii_a2b_hqx, METH_VARARGS, doc_a2b_hqx},
1311	{"b2a_hqx", binascii_b2a_hqx, METH_VARARGS, doc_b2a_hqx},
1312	{"b2a_hex", binascii_hexlify, METH_VARARGS, doc_hexlify},
1313	{"a2b_hex", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1314	{"hexlify", binascii_hexlify, METH_VARARGS, doc_hexlify},
1315	{"unhexlify", binascii_unhexlify, METH_VARARGS, doc_unhexlify},
1316	{"rlecode_hqx", binascii_rlecode_hqx, METH_VARARGS, doc_rlecode_hqx},
1317	{"rledecode_hqx", binascii_rledecode_hqx, METH_VARARGS,
1318	doc_rledecode_hqx},
1319	{"crc_hqx", binascii_crc_hqx, METH_VARARGS, doc_crc_hqx},
1320	{"crc32", binascii_crc32, METH_VARARGS, doc_crc32},
1321	{"a2b_qp", (PyCFunction)binascii_a2b_qp, METH_VARARGS \| METH_KEYWORDS,
1322	doc_a2b_qp},
1323	{"b2a_qp", (PyCFunction)binascii_b2a_qp, METH_VARARGS \| METH_KEYWORDS,
1324	doc_b2a_qp},
1325	{NULL, NULL} /* sentinel */
1326	};
1327
1328
1329	/* Initialization function for the module (must be called initbinascii) */
1330	PyDoc_STRVAR(doc_binascii, "Conversion between binary data and ASCII");
1331
1332	PyMODINIT_FUNC
1333	initbinascii(void)
1334	{
1335	PyObject m, d, *x;
1336
1337	/* Create the module and add the functions */
1338	m = Py_InitModule("binascii", binascii_module_methods);
1339	if (m == NULL)
1340	return;
1341
1342	d = PyModule_GetDict(m);
1343	x = PyString_FromString(doc_binascii);
1344	PyDict_SetItemString(d, "__doc__", x);
1345	Py_XDECREF(x);
1346
1347	Error = PyErr_NewException("binascii.Error", NULL, NULL);
1348	PyDict_SetItemString(d, "Error", Error);
1349	Incomplete = PyErr_NewException("binascii.Incomplete", NULL, NULL);
1350	PyDict_SetItemString(d, "Incomplete", Incomplete);
1351	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: vendor/python/2.5/Modules/binascii.c

Download in other formats: