source: branches/samba-3.0/source/python/py_tdbpack.c

Last change on this file was 1, checked in by Paul Smedley, 18 years ago

Initial code import

File size: 18.7 KB
Line 
1/* -*- c-file-style: "python"; indent-tabs-mode: nil; -*-
2
3 Python wrapper for Samba tdb pack/unpack functions
4 Copyright (C) Martin Pool 2002, 2003
5
6
7 NOTE PYTHON STYLE GUIDE
8 http://www.python.org/peps/pep-0007.html
9
10
11 This program is free software; you can redistribute it and/or modify
12 it under the terms of the GNU General Public License as published by
13 the Free Software Foundation; either version 2 of the License, or
14 (at your option) any later version.
15
16 This program is distributed in the hope that it will be useful,
17 but WITHOUT ANY WARRANTY; without even the implied warranty of
18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 GNU General Public License for more details.
20
21 You should have received a copy of the GNU General Public License
22 along with this program; if not, write to the Free Software
23 Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
24*/
25
26#include "Python.h"
27
28/* This symbol is used in both config.h and Python.h which causes an
29 annoying compiler warning. */
30
31#ifdef HAVE_FSTAT
32#undef HAVE_FSTAT
33#endif
34
35/* This module is supposed to be standalone, however for portability
36 it would be good to use the FUNCTION_MACRO preprocessor define. */
37
38#include "include/config.h"
39
40#ifdef HAVE_FUNCTION_MACRO
41#define FUNCTION_MACRO (__FUNCTION__)
42#else
43#define FUNCTION_MACRO (__FILE__)
44#endif
45
46static PyObject * pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list);
47static PyObject * pytdbpack_str(char ch,
48 PyObject *val_iter, PyObject *packed_list,
49 const char *encoding);
50static PyObject * pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list);
51
52static PyObject *pytdbunpack_item(char, char **pbuf, int *plen, PyObject *);
53
54static PyObject *pytdbpack_data(const char *format_str,
55 PyObject *val_seq,
56 PyObject *val_list);
57
58static PyObject *
59pytdbunpack_string(char **pbuf, int *plen, const char *encoding);
60
61static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf);
62
63
64static PyObject *pytdbpack_bad_type(char ch,
65 const char *expected,
66 PyObject *val_obj);
67
68static const char * pytdbpack_docstring =
69"Convert between Python values and Samba binary encodings.\n"
70"\n"
71"This module is conceptually similar to the standard 'struct' module, but it\n"
72"uses both a different binary format and a different description string.\n"
73"\n"
74"Samba's encoding is based on that used inside DCE-RPC and SMB: a\n"
75"little-endian, unpadded, non-self-describing binary format. It is intended\n"
76"that these functions be as similar as possible to the routines in Samba's\n"
77"tdb/tdbutil module, with appropriate adjustments for Python datatypes.\n"
78"\n"
79"Python strings are used to specify the format of data to be packed or\n"
80"unpacked.\n"
81"\n"
82"String encodings are implied by the database format: they may be either DOS\n"
83"codepage (currently hardcoded to 850), or Unix codepage (currently hardcoded\n"
84"to be the same as the default Python encoding).\n"
85"\n"
86"tdbpack format strings:\n"
87"\n"
88" 'f': NUL-terminated string in codepage iso8859-1\n"
89" \n"
90" 'P': same as 'f'\n"
91"\n"
92" 'F': NUL-terminated string in iso-8859-1\n"
93"\n"
94" 'd': 4 byte little-endian unsigned number\n"
95"\n"
96" 'w': 2 byte little-endian unsigned number\n"
97"\n"
98" 'P': \"Pointer\" value -- in the subset of DCERPC used by Samba, this is\n"
99" really just an \"exists\" or \"does not exist\" flag. The boolean\n"
100" value of the Python object is used.\n"
101" \n"
102" 'B': 4-byte LE length, followed by that many bytes of binary data.\n"
103" Corresponds to a Python integer giving the length, followed by a byte\n"
104" string of the appropriate length.\n"
105"\n"
106" '$': Special flag indicating that the preceding format code should be\n"
107" repeated while data remains. This is only supported for unpacking.\n"
108"\n"
109" Every code corresponds to a single Python object, except 'B' which\n"
110" corresponds to two values (length and contents), and '$', which produces\n"
111" however many make sense.\n";
112
113static char const pytdbpack_doc[] =
114"pack(format, values) -> buffer\n"
115"Pack Python objects into Samba binary format according to format string.\n"
116"\n"
117"arguments:\n"
118" format -- string of tdbpack format characters\n"
119" values -- sequence of value objects corresponding 1:1 to format characters\n"
120"\n"
121"returns:\n"
122" buffer -- string containing packed data\n"
123"\n"
124"raises:\n"
125" IndexError -- if there are too few values for the format\n"
126" ValueError -- if any of the format characters is illegal\n"
127" TypeError -- if the format is not a string, or values is not a sequence,\n"
128" or any of the values is of the wrong type for the corresponding\n"
129" format character\n"
130"\n"
131"notes:\n"
132" For historical reasons, it is not an error to pass more values than are consumed\n"
133" by the format.\n";
134
135
136static char const pytdbunpack_doc[] =
137"unpack(format, buffer) -> (values, rest)\n"
138"Unpack Samba binary data according to format string.\n"
139"\n"
140"arguments:\n"
141" format -- string of tdbpack characters\n"
142" buffer -- string of packed binary data\n"
143"\n"
144"returns:\n"
145" 2-tuple of:\n"
146" values -- sequence of values corresponding 1:1 to format characters\n"
147" rest -- string containing data that was not decoded, or '' if the\n"
148" whole string was consumed\n"
149"\n"
150"raises:\n"
151" IndexError -- if there is insufficient data in the buffer for the\n"
152" format (or if the data is corrupt and contains a variable-length\n"
153" field extending past the end)\n"
154" ValueError -- if any of the format characters is illegal\n"
155"\n"
156"notes:\n"
157" Because unconsumed data is returned, you can feed it back in to the\n"
158" unpacker to extract further fields. Alternatively, if you wish to modify\n"
159" some fields near the start of the data, you may be able to save time by\n"
160" only unpacking and repacking the necessary part.\n";
161
162
163const char *pytdb_dos_encoding = "cp850";
164
165/* NULL, meaning that the Samba default encoding *must* be the same as the
166 Python default encoding. */
167const char *pytdb_unix_encoding = NULL;
168
169
170/*
171 * Pack objects to bytes.
172 *
173 * All objects are first individually encoded onto a list, and then the list
174 * of strings is concatenated. This is faster than concatenating strings,
175 * and reasonably simple to code.
176 */
177static PyObject *
178pytdbpack(PyObject *self,
179 PyObject *args)
180{
181 char *format_str;
182 PyObject *val_seq, *val_iter = NULL,
183 *packed_list = NULL, *packed_str = NULL,
184 *empty_str = NULL;
185
186 /* TODO: Test passing wrong types or too many arguments */
187 if (!PyArg_ParseTuple(args, "sO", &format_str, &val_seq))
188 return NULL;
189
190 if (!(val_iter = PyObject_GetIter(val_seq)))
191 goto out;
192
193 /* Create list to hold strings until we're done, then join them all. */
194 if (!(packed_list = PyList_New(0)))
195 goto out;
196
197 if (!pytdbpack_data(format_str, val_iter, packed_list))
198 goto out;
199
200 /* this function is not officially documented but it works */
201 if (!(empty_str = PyString_InternFromString("")))
202 goto out;
203
204 packed_str = _PyString_Join(empty_str, packed_list);
205
206 out:
207 Py_XDECREF(empty_str);
208 Py_XDECREF(val_iter);
209 Py_XDECREF(packed_list);
210
211 return packed_str;
212}
213
214
215/*
216 Pack data according to FORMAT_STR from the elements of VAL_SEQ into
217 PACKED_BUF.
218
219 The string has already been checked out, so we know that VAL_SEQ is large
220 enough to hold the packed data, and that there are enough value items.
221 (However, their types may not have been thoroughly checked yet.)
222
223 In addition, val_seq is a Python Fast sequence.
224
225 Returns NULL for error (with exception set), or None.
226*/
227PyObject *
228pytdbpack_data(const char *format_str,
229 PyObject *val_iter,
230 PyObject *packed_list)
231{
232 int format_i, val_i = 0;
233
234 for (format_i = 0, val_i = 0; format_str[format_i]; format_i++) {
235 char ch = format_str[format_i];
236
237 switch (ch) {
238 /* dispatch to the appropriate packer for this type,
239 which should pull things off the iterator, and
240 append them to the packed_list */
241 case 'w':
242 case 'd':
243 case 'p':
244 if (!(packed_list = pytdbpack_number(ch, val_iter, packed_list)))
245 return NULL;
246 break;
247
248 case 'f':
249 case 'P':
250 if (!(packed_list = pytdbpack_str(ch, val_iter, packed_list, pytdb_unix_encoding)))
251 return NULL;
252 break;
253
254 case 'B':
255 if (!(packed_list = pytdbpack_buffer(val_iter, packed_list)))
256 return NULL;
257 break;
258
259 default:
260 PyErr_Format(PyExc_ValueError,
261 "%s: format character '%c' is not supported",
262 FUNCTION_MACRO, ch);
263 return NULL;
264 }
265 }
266
267 return packed_list;
268}
269
270
271static PyObject *
272pytdbpack_number(char ch, PyObject *val_iter, PyObject *packed_list)
273{
274 unsigned long val_long;
275 PyObject *val_obj = NULL, *long_obj = NULL, *result_obj = NULL;
276 PyObject *new_list = NULL;
277 unsigned char pack_buf[4];
278
279 if (!(val_obj = PyIter_Next(val_iter)))
280 goto out;
281
282 if (!(long_obj = PyNumber_Long(val_obj))) {
283 pytdbpack_bad_type(ch, "Number", val_obj);
284 goto out;
285 }
286
287 val_long = PyLong_AsUnsignedLong(long_obj);
288 pack_le_uint32(val_long, pack_buf);
289
290 /* pack as 32-bit; if just packing a 'w' 16-bit word then only take
291 the first two bytes. */
292
293 if (!(result_obj = PyString_FromStringAndSize(pack_buf, ch == 'w' ? 2 : 4)))
294 goto out;
295
296 if (PyList_Append(packed_list, result_obj) != -1)
297 new_list = packed_list;
298
299 out:
300 Py_XDECREF(val_obj);
301 Py_XDECREF(long_obj);
302 Py_XDECREF(result_obj);
303
304 return new_list;
305}
306
307
308/*
309 * Take one string from the iterator val_iter, convert it to 8-bit, and return
310 * it.
311 *
312 * If the input is neither a string nor Unicode, an exception is raised.
313 *
314 * If the input is Unicode, then it is converted to the appropriate encoding.
315 *
316 * If the input is a String, and encoding is not null, then it is converted to
317 * Unicode using the default decoding method, and then converted to the
318 * encoding. If the encoding is NULL, then the string is written out as-is --
319 * this is used when the default Python encoding is the same as the Samba
320 * encoding.
321 *
322 * I hope this approach avoids being too fragile w.r.t. being passed either
323 * Unicode or String objects.
324 */
325static PyObject *
326pytdbpack_str(char ch,
327 PyObject *val_iter, PyObject *packed_list, const char *encoding)
328{
329 PyObject *val_obj = NULL;
330 PyObject *unicode_obj = NULL;
331 PyObject *coded_str = NULL;
332 PyObject *nul_str = NULL;
333 PyObject *new_list = NULL;
334
335 if (!(val_obj = PyIter_Next(val_iter)))
336 goto out;
337
338 if (PyUnicode_Check(val_obj)) {
339 if (!(coded_str = PyUnicode_AsEncodedString(val_obj, encoding, NULL)))
340 goto out;
341 }
342 else if (PyString_Check(val_obj) && !encoding) {
343 /* For efficiency, we assume that the Python interpreter has
344 the same default string encoding as Samba's native string
345 encoding. On the PSA, both are always 8859-1. */
346 coded_str = val_obj;
347 Py_INCREF(coded_str);
348 }
349 else if (PyString_Check(val_obj)) {
350 /* String, but needs to be converted */
351 if (!(unicode_obj = PyString_AsDecodedObject(val_obj, NULL, NULL)))
352 goto out;
353 if (!(coded_str = PyUnicode_AsEncodedString(unicode_obj, encoding, NULL)))
354 goto out;
355 }
356 else {
357 pytdbpack_bad_type(ch, "String or Unicode", val_obj);
358 goto out;
359 }
360
361 if (!nul_str)
362 /* this is constant and often-used; hold it forever */
363 if (!(nul_str = PyString_FromStringAndSize("", 1)))
364 goto out;
365
366 if ((PyList_Append(packed_list, coded_str) != -1)
367 && (PyList_Append(packed_list, nul_str) != -1))
368 new_list = packed_list;
369
370 out:
371 Py_XDECREF(val_obj);
372 Py_XDECREF(unicode_obj);
373 Py_XDECREF(coded_str);
374
375 return new_list;
376}
377
378
379/*
380 * Pack (LENGTH, BUFFER) pair onto the list.
381 *
382 * The buffer must already be a String, not Unicode, because it contains 8-bit
383 * untranslated data. In some cases it will actually be UTF_16_LE data.
384 */
385static PyObject *
386pytdbpack_buffer(PyObject *val_iter, PyObject *packed_list)
387{
388 PyObject *val_obj;
389 PyObject *new_list = NULL;
390
391 /* pull off integer and stick onto list */
392 if (!(packed_list = pytdbpack_number('d', val_iter, packed_list)))
393 return NULL;
394
395 /* this assumes that the string is the right length; the old code did
396 the same. */
397 if (!(val_obj = PyIter_Next(val_iter)))
398 return NULL;
399
400 if (!PyString_Check(val_obj)) {
401 pytdbpack_bad_type('B', "String", val_obj);
402 goto out;
403 }
404
405 if (PyList_Append(packed_list, val_obj) != -1)
406 new_list = packed_list;
407
408 out:
409 Py_XDECREF(val_obj);
410 return new_list;
411}
412
413
414static PyObject *pytdbpack_bad_type(char ch,
415 const char *expected,
416 PyObject *val_obj)
417{
418 PyObject *r = PyObject_Repr(val_obj);
419 if (!r)
420 return NULL;
421 PyErr_Format(PyExc_TypeError,
422 "tdbpack: format '%c' requires %s, not %s",
423 ch, expected, PyString_AS_STRING(r));
424 Py_DECREF(r);
425 return val_obj;
426}
427
428
429/*
430 XXX: glib and Samba have quicker macro for doing the endianness conversions,
431 but I don't know of one in plain libc, and it's probably not a big deal. I
432 realize this is kind of dumb because we'll almost always be on x86, but
433 being safe is important.
434*/
435static void pack_le_uint32(unsigned long val_long, unsigned char *pbuf)
436{
437 pbuf[0] = val_long & 0xff;
438 pbuf[1] = (val_long >> 8) & 0xff;
439 pbuf[2] = (val_long >> 16) & 0xff;
440 pbuf[3] = (val_long >> 24) & 0xff;
441}
442
443
444#if 0 /* not used */
445static void pack_bytes(long len, const char *from,
446 unsigned char **pbuf)
447{
448 memcpy(*pbuf, from, len);
449 (*pbuf) += len;
450}
451#endif
452
453
454static PyObject *
455pytdbunpack(PyObject *self,
456 PyObject *args)
457{
458 char *format_str, *packed_str, *ppacked;
459 PyObject *val_list = NULL, *ret_tuple = NULL;
460 PyObject *rest_string = NULL;
461 int format_len, packed_len;
462 char last_format = '#'; /* invalid */
463 int i;
464
465 /* get arguments */
466 if (!PyArg_ParseTuple(args, "ss#", &format_str, &packed_str, &packed_len))
467 return NULL;
468
469 format_len = strlen(format_str);
470
471 /* Allocate list to hold results. Initially empty, and we append
472 results as we go along. */
473 val_list = PyList_New(0);
474 if (!val_list)
475 goto failed;
476 ret_tuple = PyTuple_New(2);
477 if (!ret_tuple)
478 goto failed;
479
480 /* For every object, unpack. */
481 for (ppacked = packed_str, i = 0; i < format_len && format_str[i] != '$'; i++) {
482 last_format = format_str[i];
483 /* packed_len is reduced in place */
484 if (!pytdbunpack_item(format_str[i], &ppacked, &packed_len, val_list))
485 goto failed;
486 }
487
488 /* If the last character was '$', keep going until out of space */
489 if (format_str[i] == '$') {
490 if (i == 0) {
491 PyErr_Format(PyExc_ValueError,
492 "%s: '$' may not be first character in format",
493 FUNCTION_MACRO);
494 return NULL;
495 }
496 while (packed_len > 0)
497 if (!pytdbunpack_item(last_format, &ppacked, &packed_len, val_list))
498 goto failed;
499 }
500
501 /* save leftovers for next time */
502 rest_string = PyString_FromStringAndSize(ppacked, packed_len);
503 if (!rest_string)
504 goto failed;
505
506 /* return (values, rest) tuple; give up references to them */
507 PyTuple_SET_ITEM(ret_tuple, 0, val_list);
508 val_list = NULL;
509 PyTuple_SET_ITEM(ret_tuple, 1, rest_string);
510 val_list = NULL;
511 return ret_tuple;
512
513 failed:
514 /* handle failure: deallocate anything. XDECREF forms handle NULL
515 pointers for objects that haven't been allocated yet. */
516 Py_XDECREF(val_list);
517 Py_XDECREF(ret_tuple);
518 Py_XDECREF(rest_string);
519 return NULL;
520}
521
522
523static void
524pytdbunpack_err_too_short(void)
525{
526 PyErr_Format(PyExc_IndexError,
527 "%s: data too short for unpack format", FUNCTION_MACRO);
528}
529
530
531static PyObject *
532pytdbunpack_uint32(char **pbuf, int *plen)
533{
534 unsigned long v;
535 unsigned char *b;
536
537 if (*plen < 4) {
538 pytdbunpack_err_too_short();
539 return NULL;
540 }
541
542 b = *pbuf;
543 v = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
544
545 (*pbuf) += 4;
546 (*plen) -= 4;
547
548 return PyLong_FromUnsignedLong(v);
549}
550
551
552static PyObject *pytdbunpack_int16(char **pbuf, int *plen)
553{
554 long v;
555 unsigned char *b;
556
557 if (*plen < 2) {
558 pytdbunpack_err_too_short();
559 return NULL;
560 }
561
562 b = *pbuf;
563 v = b[0] | b[1]<<8;
564
565 (*pbuf) += 2;
566 (*plen) -= 2;
567
568 return PyInt_FromLong(v);
569}
570
571
572static PyObject *
573pytdbunpack_string(char **pbuf, int *plen, const char *encoding)
574{
575 int len;
576 char *nul_ptr, *start;
577
578 start = *pbuf;
579
580 nul_ptr = memchr(start, '\0', *plen);
581 if (!nul_ptr) {
582 pytdbunpack_err_too_short();
583 return NULL;
584 }
585
586 len = nul_ptr - start;
587
588 *pbuf += len + 1; /* skip \0 */
589 *plen -= len + 1;
590
591 return PyString_Decode(start, len, encoding, NULL);
592}
593
594
595static PyObject *
596pytdbunpack_buffer(char **pbuf, int *plen, PyObject *val_list)
597{
598 /* first get 32-bit len */
599 long slen;
600 unsigned char *b;
601 unsigned char *start;
602 PyObject *str_obj = NULL, *len_obj = NULL;
603
604 if (*plen < 4) {
605 pytdbunpack_err_too_short();
606 return NULL;
607 }
608
609 b = *pbuf;
610 slen = b[0] | b[1]<<8 | b[2]<<16 | b[3]<<24;
611
612 if (slen < 0) { /* surely you jest */
613 PyErr_Format(PyExc_ValueError,
614 "%s: buffer seems to have negative length", FUNCTION_MACRO);
615 return NULL;
616 }
617
618 (*pbuf) += 4;
619 (*plen) -= 4;
620 start = *pbuf;
621
622 if (*plen < slen) {
623 PyErr_Format(PyExc_IndexError,
624 "%s: not enough data to unpack buffer: "
625 "need %d bytes, have %d", FUNCTION_MACRO,
626 (int) slen, *plen);
627 return NULL;
628 }
629
630 (*pbuf) += slen;
631 (*plen) -= slen;
632
633 if (!(len_obj = PyInt_FromLong(slen)))
634 goto failed;
635
636 if (PyList_Append(val_list, len_obj) == -1)
637 goto failed;
638
639 if (!(str_obj = PyString_FromStringAndSize(start, slen)))
640 goto failed;
641
642 if (PyList_Append(val_list, str_obj) == -1)
643 goto failed;
644
645 Py_DECREF(len_obj);
646 Py_DECREF(str_obj);
647
648 return val_list;
649
650 failed:
651 Py_XDECREF(len_obj); /* handles NULL */
652 Py_XDECREF(str_obj);
653 return NULL;
654}
655
656
657/* Unpack a single field from packed data, according to format character CH.
658 Remaining data is at *PBUF, of *PLEN.
659
660 *PBUF is advanced, and *PLEN reduced to reflect the amount of data that has
661 been consumed.
662
663 Returns a reference to None, or NULL for failure.
664*/
665static PyObject *pytdbunpack_item(char ch,
666 char **pbuf,
667 int *plen,
668 PyObject *val_list)
669{
670 PyObject *unpacked;
671
672 if (ch == 'w') { /* 16-bit int */
673 unpacked = pytdbunpack_int16(pbuf, plen);
674 }
675 else if (ch == 'd' || ch == 'p') { /* 32-bit int */
676 /* pointers can just come through as integers */
677 unpacked = pytdbunpack_uint32(pbuf, plen);
678 }
679 else if (ch == 'f' || ch == 'P') { /* nul-term string */
680 unpacked = pytdbunpack_string(pbuf, plen, pytdb_unix_encoding);
681 }
682 else if (ch == 'B') { /* length, buffer */
683 return pytdbunpack_buffer(pbuf, plen, val_list);
684 }
685 else {
686 PyErr_Format(PyExc_ValueError,
687 "%s: format character '%c' is not supported",
688 FUNCTION_MACRO, ch);
689
690 return NULL;
691 }
692
693 /* otherwise OK */
694 if (!unpacked)
695 return NULL;
696
697 if (PyList_Append(val_list, unpacked) == -1)
698 val_list = NULL;
699
700 /* PyList_Append takes a new reference to the inserted object.
701 Therefore, we no longer need the original reference. */
702 Py_DECREF(unpacked);
703
704 return val_list;
705}
706
707
708
709
710
711
712static PyMethodDef pytdbpack_methods[] = {
713 { "pack", pytdbpack, METH_VARARGS, (char *) pytdbpack_doc },
714 { "unpack", pytdbunpack, METH_VARARGS, (char *) pytdbunpack_doc },
715};
716
717DL_EXPORT(void)
718inittdbpack(void)
719{
720 Py_InitModule3("tdbpack", pytdbpack_methods,
721 (char *) pytdbpack_docstring);
722}
Note: See TracBrowser for help on using the repository browser.