Context Navigation

py_tdbpack.c

Visit:

Last change on this file was 1, checked in by Paul Smedley, 18 years ago
Initial code import
File size: 18.7 KB

Line
1	/* -- c-file-style: "python"; indent-tabs-mode: nil; --
2
3	Python wrapper for Samba tdb pack/unpack functions
4	Copyright (C) Martin Pool 2002, 2003
5
6
7	NOTE PYTHON STYLE GUIDE
8	http://www.python.org/peps/pep-0007.html
9
10
11	This program is free software; you can redistribute it and/or modify
12	it under the terms of the GNU General Public License as published by
13	the Free Software Foundation; either version 2 of the License, or
14	(at your option) any later version.
15
16	This program is distributed in the hope that it will be useful,
17	but WITHOUT ANY WARRANTY; without even the implied warranty of
18	MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19	GNU General Public License for more details.
20
21	You should have received a copy of the GNU General Public License
22	along with this program; if not, write to the Free Software
23	Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24	*/
25
26	#include "Python.h"
27
28	/* This symbol is used in both config.h and Python.h which causes an
29	annoying compiler warning. */
30
31	#ifdef HAVE_FSTAT
32	#undef HAVE_FSTAT
33	#endif
34
35	/* This module is supposed to be standalone, however for portability
36	it would be good to use the FUNCTION_MACRO preprocessor define. */
37
38	#include "include/config.h"
39
40	#ifdef HAVE_FUNCTION_MACRO
41	#define FUNCTION_MACRO (__FUNCTION__)
42	#else
43	#define FUNCTION_MACRO (__FILE__)
44	#endif
45
46	static PyObject * pytdbpack_number(char ch, PyObject val_iter, PyObject packed_list);
47	static PyObject * pytdbpack_str(char ch,
48	PyObject val_iter, PyObject packed_list,
49	const char *encoding);
50	static PyObject * pytdbpack_buffer(PyObject val_iter, PyObject packed_list);
51
52	static PyObject pytdbunpack_item(char, char pbuf, int plen, PyObject *);
53
54	static PyObject pytdbpack_data(const char format_str,
55	PyObject *val_seq,
56	PyObject *val_list);
57
58	static PyObject *
59	pytdbunpack_string(char *pbuf, int plen, const char *encoding);
60
61	static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
62
63
64	static PyObject *pytdbpack_bad_type(char ch,
65	const char *expected,
66	PyObject *val_obj);
67
68	static const char * pytdbpack_docstring =
69	"Convert between Python values and Samba binary encodings.\n"
70	"\n"
71	"This module is conceptually similar to the standard 'struct' module, but it\n"
72	"uses both a different binary format and a different description string.\n"
73	"\n"
74	"Samba's encoding is based on that used inside DCE-RPC and SMB: a\n"
75	"little-endian, unpadded, non-self-describing binary format. It is intended\n"
76	"that these functions be as similar as possible to the routines in Samba's\n"
77	"tdb/tdbutil module, with appropriate adjustments for Python datatypes.\n"
78	"\n"
79	"Python strings are used to specify the format of data to be packed or\n"
80	"unpacked.\n"
81	"\n"
82	"String encodings are implied by the database format: they may be either DOS\n"
83	"codepage (currently hardcoded to 850), or Unix codepage (currently hardcoded\n"
84	"to be the same as the default Python encoding).\n"
85	"\n"
86	"tdbpack format strings:\n"
87	"\n"
88	" 'f': NUL-terminated string in codepage iso8859-1\n"
89	" \n"
90	" 'P': same as 'f'\n"
91	"\n"
92	" 'F': NUL-terminated string in iso-8859-1\n"
93	"\n"
94	" 'd': 4 byte little-endian unsigned number\n"
95	"\n"
96	" 'w': 2 byte little-endian unsigned number\n"
97	"\n"
98	" 'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is\n"
99	" really just an \"exists\" or \"does not exist\" flag. The boolean\n"
100	" value of the Python object is used.\n"
101	" \n"
102	" 'B': 4-byte LE length, followed by that many bytes of binary data.\n"
103	" Corresponds to a Python integer giving the length, followed by a byte\n"
104	" string of the appropriate length.\n"
105	"\n"
106	" '$': Special flag indicating that the preceding format code should be\n"
107	" repeated while data remains. This is only supported for unpacking.\n"
108	"\n"
109	" Every code corresponds to a single Python object, except 'B' which\n"
110	" corresponds to two values (length and contents), and '$', which produces\n"
111	" however many make sense.\n";
112
113	static char const pytdbpack_doc[] =
114	"pack(format, values) -> buffer\n"
115	"Pack Python objects into Samba binary format according to format string.\n"
116	"\n"
117	"arguments:\n"
118	" format -- string of tdbpack format characters\n"
119	" values -- sequence of value objects corresponding 1:1 to format characters\n"
120	"\n"
121	"returns:\n"
122	" buffer -- string containing packed data\n"
123	"\n"
124	"raises:\n"
125	" IndexError -- if there are too few values for the format\n"
126	" ValueError -- if any of the format characters is illegal\n"
127	" TypeError -- if the format is not a string, or values is not a sequence,\n"
128	" or any of the values is of the wrong type for the corresponding\n"
129	" format character\n"
130	"\n"
131	"notes:\n"
132	" For historical reasons, it is not an error to pass more values than are consumed\n"
133	" by the format.\n";
134
135
136	static char const pytdbunpack_doc[] =
137	"unpack(format, buffer) -> (values, rest)\n"
138	"Unpack Samba binary data according to format string.\n"
139	"\n"
140	"arguments:\n"
141	" format -- string of tdbpack characters\n"
142	" buffer -- string of packed binary data\n"
143	"\n"
144	"returns:\n"
145	" 2-tuple of:\n"
146	" values -- sequence of values corresponding 1:1 to format characters\n"
147	" rest -- string containing data that was not decoded, or '' if the\n"
148	" whole string was consumed\n"
149	"\n"
150	"raises:\n"
151	" IndexError -- if there is insufficient data in the buffer for the\n"
152	" format (or if the data is corrupt and contains a variable-length\n"
153	" field extending past the end)\n"
154	" ValueError -- if any of the format characters is illegal\n"
155	"\n"
156	"notes:\n"
157	" Because unconsumed data is returned, you can feed it back in to the\n"
158	" unpacker to extract further fields. Alternatively, if you wish to modify\n"
159	" some fields near the start of the data, you may be able to save time by\n"
160	" only unpacking and repacking the necessary part.\n";
161
162
163	const char *pytdb_dos_encoding = "cp850";
164
165	/* NULL, meaning that the Samba default encoding must be the same as the
166	Python default encoding. */
167	const char *pytdb_unix_encoding = NULL;
168
169
170	/*
171	* Pack objects to bytes.
172	*
173	* All objects are first individually encoded onto a list, and then the list
174	* of strings is concatenated. This is faster than concatenating strings,
175	* and reasonably simple to code.
176	*/
177	static PyObject *
178	pytdbpack(PyObject *self,
179	PyObject *args)
180	{
181	char *format_str;
182	PyObject val_seq, val_iter = NULL,
183	packed_list = NULL, packed_str = NULL,
184	*empty_str = NULL;
185
186	/* TODO: Test passing wrong types or too many arguments */
187	if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
188	return NULL;
189
190	if (!(val_iter = PyObject_GetIter(val_seq)))
191	goto out;
192
193	/* Create list to hold strings until we're done, then join them all. */
194	if (!(packed_list = PyList_New(0)))
195	goto out;
196
197	if (!pytdbpack_data(format_str, val_iter, packed_list))
198	goto out;
199
200	/* this function is not officially documented but it works */
201	if (!(empty_str = PyString_InternFromString("")))
202	goto out;
203
204	packed_str = _PyString_Join(empty_str, packed_list);
205
206	out:
207	Py_XDECREF(empty_str);
208	Py_XDECREF(val_iter);
209	Py_XDECREF(packed_list);
210
211	return packed_str;
212	}
213
214
215	/*
216	Pack data according to FORMAT_STR from the elements of VAL_SEQ into
217	PACKED_BUF.
218
219	The string has already been checked out, so we know that VAL_SEQ is large
220	enough to hold the packed data, and that there are enough value items.
221	(However, their types may not have been thoroughly checked yet.)
222
223	In addition, val_seq is a Python Fast sequence.
224
225	Returns NULL for error (with exception set), or None.
226	*/
227	PyObject *
228	pytdbpack_data(const char *format_str,
229	PyObject *val_iter,
230	PyObject *packed_list)
231	{
232	int format_i, val_i = 0;
233
234	for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
235	char ch = format_str[format_i];
236
237	switch (ch) {
238	/* dispatch to the appropriate packer for this type,
239	which should pull things off the iterator, and
240	append them to the packed_list */
241	case 'w':
242	case 'd':
243	case 'p':
244	if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
245	return NULL;
246	break;
247
248	case 'f':
249	case 'P':
250	if (!(packed_list = pytdbpack_str(ch, val_iter, packed_list, pytdb_unix_encoding)))
251	return NULL;
252	break;
253
254	case 'B':
255	if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
256	return NULL;
257	break;
258
259	default:
260	PyErr_Format(PyExc_ValueError,
261	"%s: format character '%c' is not supported",
262	FUNCTION_MACRO, ch);
263	return NULL;
264	}
265	}
266
267	return packed_list;
268	}
269
270
271	static PyObject *
272	pytdbpack_number(char ch, PyObject val_iter, PyObject packed_list)
273	{
274	unsigned long val_long;
275	PyObject val_obj = NULL, long_obj = NULL, *result_obj = NULL;
276	PyObject *new_list = NULL;
277	unsigned char pack_buf[4];
278
279	if (!(val_obj = PyIter_Next(val_iter)))
280	goto out;
281
282	if (!(long_obj = PyNumber_Long(val_obj))) {
283	pytdbpack_bad_type(ch, "Number", val_obj);
284	goto out;
285	}
286
287	val_long = PyLong_AsUnsignedLong(long_obj);
288	pack_le_uint32(val_long, pack_buf);
289
290	/* pack as 32-bit; if just packing a 'w' 16-bit word then only take
291	the first two bytes. */
292
293	if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
294	goto out;
295
296	if (PyList_Append(packed_list, result_obj) != -1)
297	new_list = packed_list;
298
299	out:
300	Py_XDECREF(val_obj);
301	Py_XDECREF(long_obj);
302	Py_XDECREF(result_obj);
303
304	return new_list;
305	}
306
307
308	/*
309	* Take one string from the iterator val_iter, convert it to 8-bit, and return
310	* it.
311	*
312	* If the input is neither a string nor Unicode, an exception is raised.
313	*
314	* If the input is Unicode, then it is converted to the appropriate encoding.
315	*
316	* If the input is a String, and encoding is not null, then it is converted to
317	* Unicode using the default decoding method, and then converted to the
318	* encoding. If the encoding is NULL, then the string is written out as-is --
319	* this is used when the default Python encoding is the same as the Samba
320	* encoding.
321	*
322	* I hope this approach avoids being too fragile w.r.t. being passed either
323	* Unicode or String objects.
324	*/
325	static PyObject *
326	pytdbpack_str(char ch,
327	PyObject val_iter, PyObject packed_list, const char *encoding)
328	{
329	PyObject *val_obj = NULL;
330	PyObject *unicode_obj = NULL;
331	PyObject *coded_str = NULL;
332	PyObject *nul_str = NULL;
333	PyObject *new_list = NULL;
334
335	if (!(val_obj = PyIter_Next(val_iter)))
336	goto out;
337
338	if (PyUnicode_Check(val_obj)) {
339	if (!(coded_str = PyUnicode_AsEncodedString(val_obj, encoding, NULL)))
340	goto out;
341	}
342	else if (PyString_Check(val_obj) && !encoding) {
343	/* For efficiency, we assume that the Python interpreter has
344	the same default string encoding as Samba's native string
345	encoding. On the PSA, both are always 8859-1. */
346	coded_str = val_obj;
347	Py_INCREF(coded_str);
348	}
349	else if (PyString_Check(val_obj)) {
350	/* String, but needs to be converted */
351	if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
352	goto out;
353	if (!(coded_str = PyUnicode_AsEncodedString(unicode_obj, encoding, NULL)))
354	goto out;
355	}
356	else {
357	pytdbpack_bad_type(ch, "String or Unicode", val_obj);
358	goto out;
359	}
360
361	if (!nul_str)
362	/* this is constant and often-used; hold it forever */
363	if (!(nul_str = PyString_FromStringAndSize("", 1)))
364	goto out;
365
366	if ((PyList_Append(packed_list, coded_str) != -1)
367	&& (PyList_Append(packed_list, nul_str) != -1))
368	new_list = packed_list;
369
370	out:
371	Py_XDECREF(val_obj);
372	Py_XDECREF(unicode_obj);
373	Py_XDECREF(coded_str);
374
375	return new_list;
376	}
377
378
379	/*
380	* Pack (LENGTH, BUFFER) pair onto the list.
381	*
382	* The buffer must already be a String, not Unicode, because it contains 8-bit
383	* untranslated data. In some cases it will actually be UTF_16_LE data.
384	*/
385	static PyObject *
386	pytdbpack_buffer(PyObject val_iter, PyObject packed_list)
387	{
388	PyObject *val_obj;
389	PyObject *new_list = NULL;
390
391	/* pull off integer and stick onto list */
392	if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
393	return NULL;
394
395	/* this assumes that the string is the right length; the old code did
396	the same. */
397	if (!(val_obj = PyIter_Next(val_iter)))
398	return NULL;
399
400	if (!PyString_Check(val_obj)) {
401	pytdbpack_bad_type('B', "String", val_obj);
402	goto out;
403	}
404
405	if (PyList_Append(packed_list, val_obj) != -1)
406	new_list = packed_list;
407
408	out:
409	Py_XDECREF(val_obj);
410	return new_list;
411	}
412
413
414	static PyObject *pytdbpack_bad_type(char ch,
415	const char *expected,
416	PyObject *val_obj)
417	{
418	PyObject *r = PyObject_Repr(val_obj);
419	if (!r)
420	return NULL;
421	PyErr_Format(PyExc_TypeError,
422	"tdbpack: format '%c' requires %s, not %s",
423	ch, expected, PyString_AS_STRING(r));
424	Py_DECREF(r);
425	return val_obj;
426	}
427
428
429	/*
430	XXX: glib and Samba have quicker macro for doing the endianness conversions,
431	but I don't know of one in plain libc, and it's probably not a big deal. I
432	realize this is kind of dumb because we'll almost always be on x86, but
433	being safe is important.
434	*/
435	static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
436	{
437	pbuf[0] = val_long & 0xff;
438	pbuf[1] = (val_long >> 8) & 0xff;
439	pbuf[2] = (val_long >> 16) & 0xff;
440	pbuf[3] = (val_long >> 24) & 0xff;
441	}
442
443
444	#if 0 /* not used */
445	static void pack_bytes(long len, const char *from,
446	unsigned char **pbuf)
447	{
448	memcpy(*pbuf, from, len);
449	(*pbuf) += len;
450	}
451	#endif
452
453
454	static PyObject *
455	pytdbunpack(PyObject *self,
456	PyObject *args)
457	{
458	char format_str, packed_str, *ppacked;
459	PyObject val_list = NULL, ret_tuple = NULL;
460	PyObject *rest_string = NULL;
461	int format_len, packed_len;
462	char last_format = '#'; /* invalid */
463	int i;
464
465	/* get arguments */
466	if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
467	return NULL;
468
469	format_len = strlen(format_str);
470
471	/* Allocate list to hold results. Initially empty, and we append
472	results as we go along. */
473	val_list = PyList_New(0);
474	if (!val_list)
475	goto failed;
476	ret_tuple = PyTuple_New(2);
477	if (!ret_tuple)
478	goto failed;
479
480	/* For every object, unpack. */
481	for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
482	last_format = format_str[i];
483	/* packed_len is reduced in place */
484	if (!pytdbunpack_item(format_str[i], &ppacked, &packed_len, val_list))
485	goto failed;
486	}
487
488	/* If the last character was '$', keep going until out of space */
489	if (format_str[i] == '$') {
490	if (i == 0) {
491	PyErr_Format(PyExc_ValueError,
492	"%s: '$' may not be first character in format",
493	FUNCTION_MACRO);
494	return NULL;
495	}
496	while (packed_len > 0)
497	if (!pytdbunpack_item(last_format, &ppacked, &packed_len, val_list))
498	goto failed;
499	}
500
501	/* save leftovers for next time */
502	rest_string = PyString_FromStringAndSize(ppacked, packed_len);
503	if (!rest_string)
504	goto failed;
505
506	/* return (values, rest) tuple; give up references to them */
507	PyTuple_SET_ITEM(ret_tuple, 0, val_list);
508	val_list = NULL;
509	PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
510	val_list = NULL;
511	return ret_tuple;
512
513	failed:
514	/* handle failure: deallocate anything. XDECREF forms handle NULL
515	pointers for objects that haven't been allocated yet. */
516	Py_XDECREF(val_list);
517	Py_XDECREF(ret_tuple);
518	Py_XDECREF(rest_string);
519	return NULL;
520	}
521
522
523	static void
524	pytdbunpack_err_too_short(void)
525	{
526	PyErr_Format(PyExc_IndexError,
527	"%s: data too short for unpack format", FUNCTION_MACRO);
528	}
529
530
531	static PyObject *
532	pytdbunpack_uint32(char *pbuf, int plen)
533	{
534	unsigned long v;
535	unsigned char *b;
536
537	if (*plen < 4) {
538	pytdbunpack_err_too_short();
539	return NULL;
540	}
541
542	b = *pbuf;
543	v = b[0] \| b[1]<<8 \| b[2]<<16 \| b[3]<<24;
544
545	(*pbuf) += 4;
546	(*plen) -= 4;
547
548	return PyLong_FromUnsignedLong(v);
549	}
550
551
552	static PyObject pytdbunpack_int16(char pbuf, int plen)
553	{
554	long v;
555	unsigned char *b;
556
557	if (*plen < 2) {
558	pytdbunpack_err_too_short();
559	return NULL;
560	}
561
562	b = *pbuf;
563	v = b[0] \| b[1]<<8;
564
565	(*pbuf) += 2;
566	(*plen) -= 2;
567
568	return PyInt_FromLong(v);
569	}
570
571
572	static PyObject *
573	pytdbunpack_string(char *pbuf, int plen, const char *encoding)
574	{
575	int len;
576	char nul_ptr, start;
577
578	start = *pbuf;
579
580	nul_ptr = memchr(start, '\0', *plen);
581	if (!nul_ptr) {
582	pytdbunpack_err_too_short();
583	return NULL;
584	}
585
586	len = nul_ptr - start;
587
588	pbuf += len + 1; / skip \0 */
589	*plen -= len + 1;
590
591	return PyString_Decode(start, len, encoding, NULL);
592	}
593
594
595	static PyObject *
596	pytdbunpack_buffer(char *pbuf, int plen, PyObject *val_list)
597	{
598	/* first get 32-bit len */
599	long slen;
600	unsigned char *b;
601	unsigned char *start;
602	PyObject str_obj = NULL, len_obj = NULL;
603
604	if (*plen < 4) {
605	pytdbunpack_err_too_short();
606	return NULL;
607	}
608
609	b = *pbuf;
610	slen = b[0] \| b[1]<<8 \| b[2]<<16 \| b[3]<<24;
611
612	if (slen < 0) { /* surely you jest */
613	PyErr_Format(PyExc_ValueError,
614	"%s: buffer seems to have negative length", FUNCTION_MACRO);
615	return NULL;
616	}
617
618	(*pbuf) += 4;
619	(*plen) -= 4;
620	start = *pbuf;
621
622	if (*plen < slen) {
623	PyErr_Format(PyExc_IndexError,
624	"%s: not enough data to unpack buffer: "
625	"need %d bytes, have %d", FUNCTION_MACRO,
626	(int) slen, *plen);
627	return NULL;
628	}
629
630	(*pbuf) += slen;
631	(*plen) -= slen;
632
633	if (!(len_obj = PyInt_FromLong(slen)))
634	goto failed;
635
636	if (PyList_Append(val_list, len_obj) == -1)
637	goto failed;
638
639	if (!(str_obj = PyString_FromStringAndSize(start, slen)))
640	goto failed;
641
642	if (PyList_Append(val_list, str_obj) == -1)
643	goto failed;
644
645	Py_DECREF(len_obj);
646	Py_DECREF(str_obj);
647
648	return val_list;
649
650	failed:
651	Py_XDECREF(len_obj); /* handles NULL */
652	Py_XDECREF(str_obj);
653	return NULL;
654	}
655
656
657	/* Unpack a single field from packed data, according to format character CH.
658	Remaining data is at PBUF, of PLEN.
659
660	PBUF is advanced, and PLEN reduced to reflect the amount of data that has
661	been consumed.
662
663	Returns a reference to None, or NULL for failure.
664	*/
665	static PyObject *pytdbunpack_item(char ch,
666	char **pbuf,
667	int *plen,
668	PyObject *val_list)
669	{
670	PyObject *unpacked;
671
672	if (ch == 'w') { /* 16-bit int */
673	unpacked = pytdbunpack_int16(pbuf, plen);
674	}
675	else if (ch == 'd' \|\| ch == 'p') { /* 32-bit int */
676	/* pointers can just come through as integers */
677	unpacked = pytdbunpack_uint32(pbuf, plen);
678	}
679	else if (ch == 'f' \|\| ch == 'P') { /* nul-term string */
680	unpacked = pytdbunpack_string(pbuf, plen, pytdb_unix_encoding);
681	}
682	else if (ch == 'B') { /* length, buffer */
683	return pytdbunpack_buffer(pbuf, plen, val_list);
684	}
685	else {
686	PyErr_Format(PyExc_ValueError,
687	"%s: format character '%c' is not supported",
688	FUNCTION_MACRO, ch);
689
690	return NULL;
691	}
692
693	/* otherwise OK */
694	if (!unpacked)
695	return NULL;
696
697	if (PyList_Append(val_list, unpacked) == -1)
698	val_list = NULL;
699
700	/* PyList_Append takes a new reference to the inserted object.
701	Therefore, we no longer need the original reference. */
702	Py_DECREF(unpacked);
703
704	return val_list;
705	}
706
707
708
709
710
711
712	static PyMethodDef pytdbpack_methods[] = {
713	{ "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
714	{ "unpack", pytdbunpack, METH_VARARGS, (char *) pytdbunpack_doc },
715	};
716
717	DL_EXPORT(void)
718	inittdbpack(void)
719	{
720	Py_InitModule3("tdbpack", pytdbpack_methods,
721	(char *) pytdbpack_docstring);
722	}

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: branches/samba-3.0/source/python/py_tdbpack.c

Download in other formats: