source: python/vendor/Python-2.7.6/Modules/bz2module.c

Last change on this file was 388, checked in by dmik, 11 years ago

python: Update vendor to 2.7.6.

  • Property svn:eol-style set to native
File size: 68.0 KB
Line 
1/*
2
3python-bz2 - python bz2 library interface
4
5Copyright (c) 2002 Gustavo Niemeyer <niemeyer@conectiva.com>
6Copyright (c) 2002 Python Software Foundation; All Rights Reserved
7
8*/
9
10#include "Python.h"
11#include <stdio.h>
12#include <bzlib.h>
13#include "structmember.h"
14
15#ifdef WITH_THREAD
16#include "pythread.h"
17#endif
18
19static char __author__[] =
20"The bz2 python module was written by:\n\
21\n\
22 Gustavo Niemeyer <niemeyer@conectiva.com>\n\
23";
24
25/* Our very own off_t-like type, 64-bit if possible */
26/* copied from Objects/fileobject.c */
27#if !defined(HAVE_LARGEFILE_SUPPORT)
28typedef off_t Py_off_t;
29#elif SIZEOF_OFF_T >= 8
30typedef off_t Py_off_t;
31#elif SIZEOF_FPOS_T >= 8
32typedef fpos_t Py_off_t;
33#else
34#error "Large file support, but neither off_t nor fpos_t is large enough."
35#endif
36
37#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
38
39#define MODE_CLOSED 0
40#define MODE_READ 1
41#define MODE_READ_EOF 2
42#define MODE_WRITE 3
43
44
45#ifndef BZ_CONFIG_ERROR
46
47#define BZ2_bzRead bzRead
48#define BZ2_bzReadOpen bzReadOpen
49#define BZ2_bzReadClose bzReadClose
50#define BZ2_bzWrite bzWrite
51#define BZ2_bzWriteOpen bzWriteOpen
52#define BZ2_bzWriteClose bzWriteClose
53#define BZ2_bzCompress bzCompress
54#define BZ2_bzCompressInit bzCompressInit
55#define BZ2_bzCompressEnd bzCompressEnd
56#define BZ2_bzDecompress bzDecompress
57#define BZ2_bzDecompressInit bzDecompressInit
58#define BZ2_bzDecompressEnd bzDecompressEnd
59
60#endif /* ! BZ_CONFIG_ERROR */
61
62
63#ifdef WITH_THREAD
64#define ACQUIRE_LOCK(obj) do { \
65 if (!PyThread_acquire_lock(obj->lock, 0)) { \
66 Py_BEGIN_ALLOW_THREADS \
67 PyThread_acquire_lock(obj->lock, 1); \
68 Py_END_ALLOW_THREADS \
69 } } while(0)
70#define RELEASE_LOCK(obj) PyThread_release_lock(obj->lock)
71#else
72#define ACQUIRE_LOCK(obj)
73#define RELEASE_LOCK(obj)
74#endif
75
76#define MIN(X, Y) (((X) < (Y)) ? (X) : (Y))
77
78/* Bits in f_newlinetypes */
79#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
80#define NEWLINE_CR 1 /* \r newline seen */
81#define NEWLINE_LF 2 /* \n newline seen */
82#define NEWLINE_CRLF 4 /* \r\n newline seen */
83
84/* ===================================================================== */
85/* Structure definitions. */
86
87typedef struct {
88 PyObject_HEAD
89 PyObject *file;
90
91 char* f_buf; /* Allocated readahead buffer */
92 char* f_bufend; /* Points after last occupied position */
93 char* f_bufptr; /* Current buffer position */
94
95 int f_softspace; /* Flag used by 'print' command */
96
97 int f_univ_newline; /* Handle any newline convention */
98 int f_newlinetypes; /* Types of newlines seen */
99 int f_skipnextlf; /* Skip next \n */
100
101 BZFILE *fp;
102 int mode;
103 Py_off_t pos;
104 Py_off_t size;
105#ifdef WITH_THREAD
106 PyThread_type_lock lock;
107#endif
108} BZ2FileObject;
109
110typedef struct {
111 PyObject_HEAD
112 bz_stream bzs;
113 int running;
114#ifdef WITH_THREAD
115 PyThread_type_lock lock;
116#endif
117} BZ2CompObject;
118
119typedef struct {
120 PyObject_HEAD
121 bz_stream bzs;
122 int running;
123 PyObject *unused_data;
124#ifdef WITH_THREAD
125 PyThread_type_lock lock;
126#endif
127} BZ2DecompObject;
128
129/* ===================================================================== */
130/* Utility functions. */
131
132/* Refuse regular I/O if there's data in the iteration-buffer.
133 * Mixing them would cause data to arrive out of order, as the read*
134 * methods don't use the iteration buffer. */
135static int
136check_iterbuffered(BZ2FileObject *f)
137{
138 if (f->f_buf != NULL &&
139 (f->f_bufend - f->f_bufptr) > 0 &&
140 f->f_buf[0] != '\0') {
141 PyErr_SetString(PyExc_ValueError,
142 "Mixing iteration and read methods would lose data");
143 return -1;
144 }
145 return 0;
146}
147
148static int
149Util_CatchBZ2Error(int bzerror)
150{
151 int ret = 0;
152 switch(bzerror) {
153 case BZ_OK:
154 case BZ_STREAM_END:
155 break;
156
157#ifdef BZ_CONFIG_ERROR
158 case BZ_CONFIG_ERROR:
159 PyErr_SetString(PyExc_SystemError,
160 "the bz2 library was not compiled "
161 "correctly");
162 ret = 1;
163 break;
164#endif
165
166 case BZ_PARAM_ERROR:
167 PyErr_SetString(PyExc_ValueError,
168 "the bz2 library has received wrong "
169 "parameters");
170 ret = 1;
171 break;
172
173 case BZ_MEM_ERROR:
174 PyErr_NoMemory();
175 ret = 1;
176 break;
177
178 case BZ_DATA_ERROR:
179 case BZ_DATA_ERROR_MAGIC:
180 PyErr_SetString(PyExc_IOError, "invalid data stream");
181 ret = 1;
182 break;
183
184 case BZ_IO_ERROR:
185 PyErr_SetString(PyExc_IOError, "unknown IO error");
186 ret = 1;
187 break;
188
189 case BZ_UNEXPECTED_EOF:
190 PyErr_SetString(PyExc_EOFError,
191 "compressed file ended before the "
192 "logical end-of-stream was detected");
193 ret = 1;
194 break;
195
196 case BZ_SEQUENCE_ERROR:
197 PyErr_SetString(PyExc_RuntimeError,
198 "wrong sequence of bz2 library "
199 "commands used");
200 ret = 1;
201 break;
202 }
203 return ret;
204}
205
206#if BUFSIZ < 8192
207#define SMALLCHUNK 8192
208#else
209#define SMALLCHUNK BUFSIZ
210#endif
211
212/* This is a hacked version of Python's fileobject.c:new_buffersize(). */
213static size_t
214Util_NewBufferSize(size_t currentsize)
215{
216 /* Expand the buffer by an amount proportional to the current size,
217 giving us amortized linear-time behavior. Use a less-than-double
218 growth factor to avoid excessive allocation. */
219 return currentsize + (currentsize >> 3) + 6;
220}
221
222static int
223Util_GrowBuffer(PyObject **buf)
224{
225 size_t size = PyString_GET_SIZE(*buf);
226 size_t new_size = Util_NewBufferSize(size);
227 if (new_size > size) {
228 return _PyString_Resize(buf, new_size);
229 } else { /* overflow */
230 PyErr_SetString(PyExc_OverflowError,
231 "Unable to allocate buffer - output too large");
232 return -1;
233 }
234}
235
236/* This is a hacked version of Python's fileobject.c:get_line(). */
237static PyObject *
238Util_GetLine(BZ2FileObject *f, int n)
239{
240 char c;
241 char *buf, *end;
242 size_t total_v_size; /* total # of slots in buffer */
243 size_t used_v_size; /* # used slots in buffer */
244 size_t increment; /* amount to increment the buffer */
245 PyObject *v;
246 int bzerror;
247 int bytes_read;
248 int newlinetypes = f->f_newlinetypes;
249 int skipnextlf = f->f_skipnextlf;
250 int univ_newline = f->f_univ_newline;
251
252 total_v_size = n > 0 ? n : 100;
253 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
254 if (v == NULL)
255 return NULL;
256
257 buf = BUF(v);
258 end = buf + total_v_size;
259
260 for (;;) {
261 Py_BEGIN_ALLOW_THREADS
262 while (buf != end) {
263 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
264 f->pos++;
265 if (bytes_read == 0) break;
266 if (univ_newline) {
267 if (skipnextlf) {
268 skipnextlf = 0;
269 if (c == '\n') {
270 /* Seeing a \n here with skipnextlf true means we
271 * saw a \r before.
272 */
273 newlinetypes |= NEWLINE_CRLF;
274 if (bzerror != BZ_OK) break;
275 bytes_read = BZ2_bzRead(&bzerror, f->fp, &c, 1);
276 f->pos++;
277 if (bytes_read == 0) break;
278 } else {
279 newlinetypes |= NEWLINE_CR;
280 }
281 }
282 if (c == '\r') {
283 skipnextlf = 1;
284 c = '\n';
285 } else if (c == '\n')
286 newlinetypes |= NEWLINE_LF;
287 }
288 *buf++ = c;
289 if (bzerror != BZ_OK || c == '\n') break;
290 }
291 if (univ_newline && bzerror == BZ_STREAM_END && skipnextlf)
292 newlinetypes |= NEWLINE_CR;
293 Py_END_ALLOW_THREADS
294 f->f_newlinetypes = newlinetypes;
295 f->f_skipnextlf = skipnextlf;
296 if (bzerror == BZ_STREAM_END) {
297 f->size = f->pos;
298 f->mode = MODE_READ_EOF;
299 break;
300 } else if (bzerror != BZ_OK) {
301 Util_CatchBZ2Error(bzerror);
302 Py_DECREF(v);
303 return NULL;
304 }
305 if (c == '\n')
306 break;
307 /* Must be because buf == end */
308 if (n > 0)
309 break;
310 used_v_size = total_v_size;
311 increment = total_v_size >> 2; /* mild exponential growth */
312 total_v_size += increment;
313 if (total_v_size > INT_MAX) {
314 PyErr_SetString(PyExc_OverflowError,
315 "line is longer than a Python string can hold");
316 Py_DECREF(v);
317 return NULL;
318 }
319 if (_PyString_Resize(&v, total_v_size) < 0)
320 return NULL;
321 buf = BUF(v) + used_v_size;
322 end = BUF(v) + total_v_size;
323 }
324
325 used_v_size = buf - BUF(v);
326 if (used_v_size != total_v_size)
327 _PyString_Resize(&v, used_v_size);
328 return v;
329}
330
331/* This is a hacked version of Python's
332 * fileobject.c:Py_UniversalNewlineFread(). */
333size_t
334Util_UnivNewlineRead(int *bzerror, BZFILE *stream,
335 char* buf, size_t n, BZ2FileObject *f)
336{
337 char *dst = buf;
338 int newlinetypes, skipnextlf;
339
340 assert(buf != NULL);
341 assert(stream != NULL);
342
343 if (!f->f_univ_newline)
344 return BZ2_bzRead(bzerror, stream, buf, n);
345
346 newlinetypes = f->f_newlinetypes;
347 skipnextlf = f->f_skipnextlf;
348
349 /* Invariant: n is the number of bytes remaining to be filled
350 * in the buffer.
351 */
352 while (n) {
353 size_t nread;
354 int shortread;
355 char *src = dst;
356
357 nread = BZ2_bzRead(bzerror, stream, dst, n);
358 assert(nread <= n);
359 n -= nread; /* assuming 1 byte out for each in; will adjust */
360 shortread = n != 0; /* true iff EOF or error */
361 while (nread--) {
362 char c = *src++;
363 if (c == '\r') {
364 /* Save as LF and set flag to skip next LF. */
365 *dst++ = '\n';
366 skipnextlf = 1;
367 }
368 else if (skipnextlf && c == '\n') {
369 /* Skip LF, and remember we saw CR LF. */
370 skipnextlf = 0;
371 newlinetypes |= NEWLINE_CRLF;
372 ++n;
373 }
374 else {
375 /* Normal char to be stored in buffer. Also
376 * update the newlinetypes flag if either this
377 * is an LF or the previous char was a CR.
378 */
379 if (c == '\n')
380 newlinetypes |= NEWLINE_LF;
381 else if (skipnextlf)
382 newlinetypes |= NEWLINE_CR;
383 *dst++ = c;
384 skipnextlf = 0;
385 }
386 }
387 if (shortread) {
388 /* If this is EOF, update type flags. */
389 if (skipnextlf && *bzerror == BZ_STREAM_END)
390 newlinetypes |= NEWLINE_CR;
391 break;
392 }
393 }
394 f->f_newlinetypes = newlinetypes;
395 f->f_skipnextlf = skipnextlf;
396 return dst - buf;
397}
398
399/* This is a hacked version of Python's fileobject.c:drop_readahead(). */
400static void
401Util_DropReadAhead(BZ2FileObject *f)
402{
403 if (f->f_buf != NULL) {
404 PyMem_Free(f->f_buf);
405 f->f_buf = NULL;
406 }
407}
408
409/* This is a hacked version of Python's fileobject.c:readahead(). */
410static int
411Util_ReadAhead(BZ2FileObject *f, int bufsize)
412{
413 int chunksize;
414 int bzerror;
415
416 if (f->f_buf != NULL) {
417 if((f->f_bufend - f->f_bufptr) >= 1)
418 return 0;
419 else
420 Util_DropReadAhead(f);
421 }
422 if (f->mode == MODE_READ_EOF) {
423 f->f_bufptr = f->f_buf;
424 f->f_bufend = f->f_buf;
425 return 0;
426 }
427 if ((f->f_buf = PyMem_Malloc(bufsize)) == NULL) {
428 PyErr_NoMemory();
429 return -1;
430 }
431 Py_BEGIN_ALLOW_THREADS
432 chunksize = Util_UnivNewlineRead(&bzerror, f->fp, f->f_buf,
433 bufsize, f);
434 Py_END_ALLOW_THREADS
435 f->pos += chunksize;
436 if (bzerror == BZ_STREAM_END) {
437 f->size = f->pos;
438 f->mode = MODE_READ_EOF;
439 } else if (bzerror != BZ_OK) {
440 Util_CatchBZ2Error(bzerror);
441 Util_DropReadAhead(f);
442 return -1;
443 }
444 f->f_bufptr = f->f_buf;
445 f->f_bufend = f->f_buf + chunksize;
446 return 0;
447}
448
449/* This is a hacked version of Python's
450 * fileobject.c:readahead_get_line_skip(). */
451static PyStringObject *
452Util_ReadAheadGetLineSkip(BZ2FileObject *f, int skip, int bufsize)
453{
454 PyStringObject* s;
455 char *bufptr;
456 char *buf;
457 int len;
458
459 if (f->f_buf == NULL)
460 if (Util_ReadAhead(f, bufsize) < 0)
461 return NULL;
462
463 len = f->f_bufend - f->f_bufptr;
464 if (len == 0)
465 return (PyStringObject *)
466 PyString_FromStringAndSize(NULL, skip);
467 bufptr = memchr(f->f_bufptr, '\n', len);
468 if (bufptr != NULL) {
469 bufptr++; /* Count the '\n' */
470 len = bufptr - f->f_bufptr;
471 s = (PyStringObject *)
472 PyString_FromStringAndSize(NULL, skip+len);
473 if (s == NULL)
474 return NULL;
475 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
476 f->f_bufptr = bufptr;
477 if (bufptr == f->f_bufend)
478 Util_DropReadAhead(f);
479 } else {
480 bufptr = f->f_bufptr;
481 buf = f->f_buf;
482 f->f_buf = NULL; /* Force new readahead buffer */
483 s = Util_ReadAheadGetLineSkip(f, skip+len,
484 bufsize + (bufsize>>2));
485 if (s == NULL) {
486 PyMem_Free(buf);
487 return NULL;
488 }
489 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
490 PyMem_Free(buf);
491 }
492 return s;
493}
494
495/* ===================================================================== */
496/* Methods of BZ2File. */
497
498PyDoc_STRVAR(BZ2File_read__doc__,
499"read([size]) -> string\n\
500\n\
501Read at most size uncompressed bytes, returned as a string. If the size\n\
502argument is negative or omitted, read until EOF is reached.\n\
503");
504
505/* This is a hacked version of Python's fileobject.c:file_read(). */
506static PyObject *
507BZ2File_read(BZ2FileObject *self, PyObject *args)
508{
509 long bytesrequested = -1;
510 size_t bytesread, buffersize, chunksize;
511 int bzerror;
512 PyObject *ret = NULL;
513
514 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
515 return NULL;
516
517 ACQUIRE_LOCK(self);
518 switch (self->mode) {
519 case MODE_READ:
520 break;
521 case MODE_READ_EOF:
522 ret = PyString_FromString("");
523 goto cleanup;
524 case MODE_CLOSED:
525 PyErr_SetString(PyExc_ValueError,
526 "I/O operation on closed file");
527 goto cleanup;
528 default:
529 PyErr_SetString(PyExc_IOError,
530 "file is not ready for reading");
531 goto cleanup;
532 }
533
534 /* refuse to mix with f.next() */
535 if (check_iterbuffered(self))
536 goto cleanup;
537
538 if (bytesrequested < 0)
539 buffersize = Util_NewBufferSize((size_t)0);
540 else
541 buffersize = bytesrequested;
542 if (buffersize > INT_MAX) {
543 PyErr_SetString(PyExc_OverflowError,
544 "requested number of bytes is "
545 "more than a Python string can hold");
546 goto cleanup;
547 }
548 ret = PyString_FromStringAndSize((char *)NULL, buffersize);
549 if (ret == NULL)
550 goto cleanup;
551 bytesread = 0;
552
553 for (;;) {
554 Py_BEGIN_ALLOW_THREADS
555 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
556 BUF(ret)+bytesread,
557 buffersize-bytesread,
558 self);
559 self->pos += chunksize;
560 Py_END_ALLOW_THREADS
561 bytesread += chunksize;
562 if (bzerror == BZ_STREAM_END) {
563 self->size = self->pos;
564 self->mode = MODE_READ_EOF;
565 break;
566 } else if (bzerror != BZ_OK) {
567 Util_CatchBZ2Error(bzerror);
568 Py_DECREF(ret);
569 ret = NULL;
570 goto cleanup;
571 }
572 if (bytesrequested < 0) {
573 buffersize = Util_NewBufferSize(buffersize);
574 if (_PyString_Resize(&ret, buffersize) < 0)
575 goto cleanup;
576 } else {
577 break;
578 }
579 }
580 if (bytesread != buffersize)
581 _PyString_Resize(&ret, bytesread);
582
583cleanup:
584 RELEASE_LOCK(self);
585 return ret;
586}
587
588PyDoc_STRVAR(BZ2File_readline__doc__,
589"readline([size]) -> string\n\
590\n\
591Return the next line from the file, as a string, retaining newline.\n\
592A non-negative size argument will limit the maximum number of bytes to\n\
593return (an incomplete line may be returned then). Return an empty\n\
594string at EOF.\n\
595");
596
597static PyObject *
598BZ2File_readline(BZ2FileObject *self, PyObject *args)
599{
600 PyObject *ret = NULL;
601 int sizehint = -1;
602
603 if (!PyArg_ParseTuple(args, "|i:readline", &sizehint))
604 return NULL;
605
606 ACQUIRE_LOCK(self);
607 switch (self->mode) {
608 case MODE_READ:
609 break;
610 case MODE_READ_EOF:
611 ret = PyString_FromString("");
612 goto cleanup;
613 case MODE_CLOSED:
614 PyErr_SetString(PyExc_ValueError,
615 "I/O operation on closed file");
616 goto cleanup;
617 default:
618 PyErr_SetString(PyExc_IOError,
619 "file is not ready for reading");
620 goto cleanup;
621 }
622
623 /* refuse to mix with f.next() */
624 if (check_iterbuffered(self))
625 goto cleanup;
626
627 if (sizehint == 0)
628 ret = PyString_FromString("");
629 else
630 ret = Util_GetLine(self, (sizehint < 0) ? 0 : sizehint);
631
632cleanup:
633 RELEASE_LOCK(self);
634 return ret;
635}
636
637PyDoc_STRVAR(BZ2File_readlines__doc__,
638"readlines([size]) -> list\n\
639\n\
640Call readline() repeatedly and return a list of lines read.\n\
641The optional size argument, if given, is an approximate bound on the\n\
642total number of bytes in the lines returned.\n\
643");
644
645/* This is a hacked version of Python's fileobject.c:file_readlines(). */
646static PyObject *
647BZ2File_readlines(BZ2FileObject *self, PyObject *args)
648{
649 long sizehint = 0;
650 PyObject *list = NULL;
651 PyObject *line;
652 char small_buffer[SMALLCHUNK];
653 char *buffer = small_buffer;
654 size_t buffersize = SMALLCHUNK;
655 PyObject *big_buffer = NULL;
656 size_t nfilled = 0;
657 size_t nread;
658 size_t totalread = 0;
659 char *p, *q, *end;
660 int err;
661 int shortread = 0;
662 int bzerror;
663
664 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
665 return NULL;
666
667 ACQUIRE_LOCK(self);
668 switch (self->mode) {
669 case MODE_READ:
670 break;
671 case MODE_READ_EOF:
672 list = PyList_New(0);
673 goto cleanup;
674 case MODE_CLOSED:
675 PyErr_SetString(PyExc_ValueError,
676 "I/O operation on closed file");
677 goto cleanup;
678 default:
679 PyErr_SetString(PyExc_IOError,
680 "file is not ready for reading");
681 goto cleanup;
682 }
683
684 /* refuse to mix with f.next() */
685 if (check_iterbuffered(self))
686 goto cleanup;
687
688 if ((list = PyList_New(0)) == NULL)
689 goto cleanup;
690
691 for (;;) {
692 Py_BEGIN_ALLOW_THREADS
693 nread = Util_UnivNewlineRead(&bzerror, self->fp,
694 buffer+nfilled,
695 buffersize-nfilled, self);
696 self->pos += nread;
697 Py_END_ALLOW_THREADS
698 if (bzerror == BZ_STREAM_END) {
699 self->size = self->pos;
700 self->mode = MODE_READ_EOF;
701 if (nread == 0) {
702 sizehint = 0;
703 break;
704 }
705 shortread = 1;
706 } else if (bzerror != BZ_OK) {
707 Util_CatchBZ2Error(bzerror);
708 error:
709 Py_DECREF(list);
710 list = NULL;
711 goto cleanup;
712 }
713 totalread += nread;
714 p = memchr(buffer+nfilled, '\n', nread);
715 if (!shortread && p == NULL) {
716 /* Need a larger buffer to fit this line */
717 nfilled += nread;
718 buffersize *= 2;
719 if (buffersize > INT_MAX) {
720 PyErr_SetString(PyExc_OverflowError,
721 "line is longer than a Python string can hold");
722 goto error;
723 }
724 if (big_buffer == NULL) {
725 /* Create the big buffer */
726 big_buffer = PyString_FromStringAndSize(
727 NULL, buffersize);
728 if (big_buffer == NULL)
729 goto error;
730 buffer = PyString_AS_STRING(big_buffer);
731 memcpy(buffer, small_buffer, nfilled);
732 }
733 else {
734 /* Grow the big buffer */
735 _PyString_Resize(&big_buffer, buffersize);
736 buffer = PyString_AS_STRING(big_buffer);
737 }
738 continue;
739 }
740 end = buffer+nfilled+nread;
741 q = buffer;
742 while (p != NULL) {
743 /* Process complete lines */
744 p++;
745 line = PyString_FromStringAndSize(q, p-q);
746 if (line == NULL)
747 goto error;
748 err = PyList_Append(list, line);
749 Py_DECREF(line);
750 if (err != 0)
751 goto error;
752 q = p;
753 p = memchr(q, '\n', end-q);
754 }
755 /* Move the remaining incomplete line to the start */
756 nfilled = end-q;
757 memmove(buffer, q, nfilled);
758 if (sizehint > 0)
759 if (totalread >= (size_t)sizehint)
760 break;
761 if (shortread) {
762 sizehint = 0;
763 break;
764 }
765 }
766 if (nfilled != 0) {
767 /* Partial last line */
768 line = PyString_FromStringAndSize(buffer, nfilled);
769 if (line == NULL)
770 goto error;
771 if (sizehint > 0) {
772 /* Need to complete the last line */
773 PyObject *rest = Util_GetLine(self, 0);
774 if (rest == NULL) {
775 Py_DECREF(line);
776 goto error;
777 }
778 PyString_Concat(&line, rest);
779 Py_DECREF(rest);
780 if (line == NULL)
781 goto error;
782 }
783 err = PyList_Append(list, line);
784 Py_DECREF(line);
785 if (err != 0)
786 goto error;
787 }
788
789 cleanup:
790 RELEASE_LOCK(self);
791 if (big_buffer) {
792 Py_DECREF(big_buffer);
793 }
794 return list;
795}
796
797PyDoc_STRVAR(BZ2File_xreadlines__doc__,
798"xreadlines() -> self\n\
799\n\
800For backward compatibility. BZ2File objects now include the performance\n\
801optimizations previously implemented in the xreadlines module.\n\
802");
803
804PyDoc_STRVAR(BZ2File_write__doc__,
805"write(data) -> None\n\
806\n\
807Write the 'data' string to file. Note that due to buffering, close() may\n\
808be needed before the file on disk reflects the data written.\n\
809");
810
811/* This is a hacked version of Python's fileobject.c:file_write(). */
812static PyObject *
813BZ2File_write(BZ2FileObject *self, PyObject *args)
814{
815 PyObject *ret = NULL;
816 Py_buffer pbuf;
817 char *buf;
818 int len;
819 int bzerror;
820
821 if (!PyArg_ParseTuple(args, "s*:write", &pbuf))
822 return NULL;
823 buf = pbuf.buf;
824 len = pbuf.len;
825
826 ACQUIRE_LOCK(self);
827 switch (self->mode) {
828 case MODE_WRITE:
829 break;
830
831 case MODE_CLOSED:
832 PyErr_SetString(PyExc_ValueError,
833 "I/O operation on closed file");
834 goto cleanup;
835
836 default:
837 PyErr_SetString(PyExc_IOError,
838 "file is not ready for writing");
839 goto cleanup;
840 }
841
842 self->f_softspace = 0;
843
844 Py_BEGIN_ALLOW_THREADS
845 BZ2_bzWrite (&bzerror, self->fp, buf, len);
846 self->pos += len;
847 Py_END_ALLOW_THREADS
848
849 if (bzerror != BZ_OK) {
850 Util_CatchBZ2Error(bzerror);
851 goto cleanup;
852 }
853
854 Py_INCREF(Py_None);
855 ret = Py_None;
856
857cleanup:
858 PyBuffer_Release(&pbuf);
859 RELEASE_LOCK(self);
860 return ret;
861}
862
863PyDoc_STRVAR(BZ2File_writelines__doc__,
864"writelines(sequence_of_strings) -> None\n\
865\n\
866Write the sequence of strings to the file. Note that newlines are not\n\
867added. The sequence can be any iterable object producing strings. This is\n\
868equivalent to calling write() for each string.\n\
869");
870
871/* This is a hacked version of Python's fileobject.c:file_writelines(). */
872static PyObject *
873BZ2File_writelines(BZ2FileObject *self, PyObject *seq)
874{
875#define CHUNKSIZE 1000
876 PyObject *list = NULL;
877 PyObject *iter = NULL;
878 PyObject *ret = NULL;
879 PyObject *line;
880 int i, j, index, len, islist;
881 int bzerror;
882
883 ACQUIRE_LOCK(self);
884 switch (self->mode) {
885 case MODE_WRITE:
886 break;
887
888 case MODE_CLOSED:
889 PyErr_SetString(PyExc_ValueError,
890 "I/O operation on closed file");
891 goto error;
892
893 default:
894 PyErr_SetString(PyExc_IOError,
895 "file is not ready for writing");
896 goto error;
897 }
898
899 islist = PyList_Check(seq);
900 if (!islist) {
901 iter = PyObject_GetIter(seq);
902 if (iter == NULL) {
903 PyErr_SetString(PyExc_TypeError,
904 "writelines() requires an iterable argument");
905 goto error;
906 }
907 list = PyList_New(CHUNKSIZE);
908 if (list == NULL)
909 goto error;
910 }
911
912 /* Strategy: slurp CHUNKSIZE lines into a private list,
913 checking that they are all strings, then write that list
914 without holding the interpreter lock, then come back for more. */
915 for (index = 0; ; index += CHUNKSIZE) {
916 if (islist) {
917 Py_XDECREF(list);
918 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
919 if (list == NULL)
920 goto error;
921 j = PyList_GET_SIZE(list);
922 }
923 else {
924 for (j = 0; j < CHUNKSIZE; j++) {
925 line = PyIter_Next(iter);
926 if (line == NULL) {
927 if (PyErr_Occurred())
928 goto error;
929 break;
930 }
931 PyList_SetItem(list, j, line);
932 }
933 }
934 if (j == 0)
935 break;
936
937 /* Check that all entries are indeed strings. If not,
938 apply the same rules as for file.write() and
939 convert the rets to strings. This is slow, but
940 seems to be the only way since all conversion APIs
941 could potentially execute Python code. */
942 for (i = 0; i < j; i++) {
943 PyObject *v = PyList_GET_ITEM(list, i);
944 if (!PyString_Check(v)) {
945 const char *buffer;
946 Py_ssize_t len;
947 if (PyObject_AsCharBuffer(v, &buffer, &len)) {
948 PyErr_SetString(PyExc_TypeError,
949 "writelines() "
950 "argument must be "
951 "a sequence of "
952 "strings");
953 goto error;
954 }
955 line = PyString_FromStringAndSize(buffer,
956 len);
957 if (line == NULL)
958 goto error;
959 Py_DECREF(v);
960 PyList_SET_ITEM(list, i, line);
961 }
962 }
963
964 self->f_softspace = 0;
965
966 /* Since we are releasing the global lock, the
967 following code may *not* execute Python code. */
968 Py_BEGIN_ALLOW_THREADS
969 for (i = 0; i < j; i++) {
970 line = PyList_GET_ITEM(list, i);
971 len = PyString_GET_SIZE(line);
972 BZ2_bzWrite (&bzerror, self->fp,
973 PyString_AS_STRING(line), len);
974 if (bzerror != BZ_OK) {
975 Py_BLOCK_THREADS
976 Util_CatchBZ2Error(bzerror);
977 goto error;
978 }
979 }
980 Py_END_ALLOW_THREADS
981
982 if (j < CHUNKSIZE)
983 break;
984 }
985
986 Py_INCREF(Py_None);
987 ret = Py_None;
988
989 error:
990 RELEASE_LOCK(self);
991 Py_XDECREF(list);
992 Py_XDECREF(iter);
993 return ret;
994#undef CHUNKSIZE
995}
996
997PyDoc_STRVAR(BZ2File_seek__doc__,
998"seek(offset [, whence]) -> None\n\
999\n\
1000Move to new file position. Argument offset is a byte count. Optional\n\
1001argument whence defaults to 0 (offset from start of file, offset\n\
1002should be >= 0); other values are 1 (move relative to current position,\n\
1003positive or negative), and 2 (move relative to end of file, usually\n\
1004negative, although many platforms allow seeking beyond the end of a file).\n\
1005\n\
1006Note that seeking of bz2 files is emulated, and depending on the parameters\n\
1007the operation may be extremely slow.\n\
1008");
1009
1010static PyObject *
1011BZ2File_seek(BZ2FileObject *self, PyObject *args)
1012{
1013 int where = 0;
1014 PyObject *offobj;
1015 Py_off_t offset;
1016 char small_buffer[SMALLCHUNK];
1017 char *buffer = small_buffer;
1018 size_t buffersize = SMALLCHUNK;
1019 Py_off_t bytesread = 0;
1020 size_t readsize;
1021 int chunksize;
1022 int bzerror;
1023 PyObject *ret = NULL;
1024
1025 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &where))
1026 return NULL;
1027#if !defined(HAVE_LARGEFILE_SUPPORT)
1028 offset = PyInt_AsLong(offobj);
1029#else
1030 offset = PyLong_Check(offobj) ?
1031 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
1032#endif
1033 if (PyErr_Occurred())
1034 return NULL;
1035
1036 ACQUIRE_LOCK(self);
1037 Util_DropReadAhead(self);
1038 switch (self->mode) {
1039 case MODE_READ:
1040 case MODE_READ_EOF:
1041 break;
1042
1043 case MODE_CLOSED:
1044 PyErr_SetString(PyExc_ValueError,
1045 "I/O operation on closed file");
1046 goto cleanup;
1047
1048 default:
1049 PyErr_SetString(PyExc_IOError,
1050 "seek works only while reading");
1051 goto cleanup;
1052 }
1053
1054 if (where == 2) {
1055 if (self->size == -1) {
1056 assert(self->mode != MODE_READ_EOF);
1057 for (;;) {
1058 Py_BEGIN_ALLOW_THREADS
1059 chunksize = Util_UnivNewlineRead(
1060 &bzerror, self->fp,
1061 buffer, buffersize,
1062 self);
1063 self->pos += chunksize;
1064 Py_END_ALLOW_THREADS
1065
1066 bytesread += chunksize;
1067 if (bzerror == BZ_STREAM_END) {
1068 break;
1069 } else if (bzerror != BZ_OK) {
1070 Util_CatchBZ2Error(bzerror);
1071 goto cleanup;
1072 }
1073 }
1074 self->mode = MODE_READ_EOF;
1075 self->size = self->pos;
1076 bytesread = 0;
1077 }
1078 offset = self->size + offset;
1079 } else if (where == 1) {
1080 offset = self->pos + offset;
1081 }
1082
1083 /* Before getting here, offset must be the absolute position the file
1084 * pointer should be set to. */
1085
1086 if (offset >= self->pos) {
1087 /* we can move forward */
1088 offset -= self->pos;
1089 } else {
1090 /* we cannot move back, so rewind the stream */
1091 BZ2_bzReadClose(&bzerror, self->fp);
1092 if (self->fp) {
1093 PyFile_DecUseCount((PyFileObject *)self->file);
1094 self->fp = NULL;
1095 }
1096 if (bzerror != BZ_OK) {
1097 Util_CatchBZ2Error(bzerror);
1098 goto cleanup;
1099 }
1100 ret = PyObject_CallMethod(self->file, "seek", "(i)", 0);
1101 if (!ret)
1102 goto cleanup;
1103 Py_DECREF(ret);
1104 ret = NULL;
1105 self->pos = 0;
1106 self->fp = BZ2_bzReadOpen(&bzerror, PyFile_AsFile(self->file),
1107 0, 0, NULL, 0);
1108 if (self->fp)
1109 PyFile_IncUseCount((PyFileObject *)self->file);
1110 if (bzerror != BZ_OK) {
1111 Util_CatchBZ2Error(bzerror);
1112 goto cleanup;
1113 }
1114 self->mode = MODE_READ;
1115 }
1116
1117 if (offset <= 0 || self->mode == MODE_READ_EOF)
1118 goto exit;
1119
1120 /* Before getting here, offset must be set to the number of bytes
1121 * to walk forward. */
1122 for (;;) {
1123 if (offset-bytesread > buffersize)
1124 readsize = buffersize;
1125 else
1126 /* offset might be wider that readsize, but the result
1127 * of the subtraction is bound by buffersize (see the
1128 * condition above). buffersize is 8192. */
1129 readsize = (size_t)(offset-bytesread);
1130 Py_BEGIN_ALLOW_THREADS
1131 chunksize = Util_UnivNewlineRead(&bzerror, self->fp,
1132 buffer, readsize, self);
1133 self->pos += chunksize;
1134 Py_END_ALLOW_THREADS
1135 bytesread += chunksize;
1136 if (bzerror == BZ_STREAM_END) {
1137 self->size = self->pos;
1138 self->mode = MODE_READ_EOF;
1139 break;
1140 } else if (bzerror != BZ_OK) {
1141 Util_CatchBZ2Error(bzerror);
1142 goto cleanup;
1143 }
1144 if (bytesread == offset)
1145 break;
1146 }
1147
1148exit:
1149 Py_INCREF(Py_None);
1150 ret = Py_None;
1151
1152cleanup:
1153 RELEASE_LOCK(self);
1154 return ret;
1155}
1156
1157PyDoc_STRVAR(BZ2File_tell__doc__,
1158"tell() -> int\n\
1159\n\
1160Return the current file position, an integer (may be a long integer).\n\
1161");
1162
1163static PyObject *
1164BZ2File_tell(BZ2FileObject *self, PyObject *args)
1165{
1166 PyObject *ret = NULL;
1167
1168 if (self->mode == MODE_CLOSED) {
1169 PyErr_SetString(PyExc_ValueError,
1170 "I/O operation on closed file");
1171 goto cleanup;
1172 }
1173
1174#if !defined(HAVE_LARGEFILE_SUPPORT)
1175 ret = PyInt_FromLong(self->pos);
1176#else
1177 ret = PyLong_FromLongLong(self->pos);
1178#endif
1179
1180cleanup:
1181 return ret;
1182}
1183
1184PyDoc_STRVAR(BZ2File_close__doc__,
1185"close() -> None or (perhaps) an integer\n\
1186\n\
1187Close the file. Sets data attribute .closed to true. A closed file\n\
1188cannot be used for further I/O operations. close() may be called more\n\
1189than once without error.\n\
1190");
1191
1192static PyObject *
1193BZ2File_close(BZ2FileObject *self)
1194{
1195 PyObject *ret = NULL;
1196 int bzerror = BZ_OK;
1197
1198 ACQUIRE_LOCK(self);
1199 switch (self->mode) {
1200 case MODE_READ:
1201 case MODE_READ_EOF:
1202 BZ2_bzReadClose(&bzerror, self->fp);
1203 break;
1204 case MODE_WRITE:
1205 BZ2_bzWriteClose(&bzerror, self->fp,
1206 0, NULL, NULL);
1207 break;
1208 }
1209 if (self->fp) {
1210 PyFile_DecUseCount((PyFileObject *)self->file);
1211 self->fp = NULL;
1212 }
1213 self->mode = MODE_CLOSED;
1214 ret = PyObject_CallMethod(self->file, "close", NULL);
1215 if (bzerror != BZ_OK) {
1216 Util_CatchBZ2Error(bzerror);
1217 Py_XDECREF(ret);
1218 ret = NULL;
1219 }
1220
1221 RELEASE_LOCK(self);
1222 return ret;
1223}
1224
1225PyDoc_STRVAR(BZ2File_enter_doc,
1226"__enter__() -> self.");
1227
1228static PyObject *
1229BZ2File_enter(BZ2FileObject *self)
1230{
1231 if (self->mode == MODE_CLOSED) {
1232 PyErr_SetString(PyExc_ValueError,
1233 "I/O operation on closed file");
1234 return NULL;
1235 }
1236 Py_INCREF(self);
1237 return (PyObject *) self;
1238}
1239
1240PyDoc_STRVAR(BZ2File_exit_doc,
1241"__exit__(*excinfo) -> None. Closes the file.");
1242
1243static PyObject *
1244BZ2File_exit(BZ2FileObject *self, PyObject *args)
1245{
1246 PyObject *ret = PyObject_CallMethod((PyObject *) self, "close", NULL);
1247 if (!ret)
1248 /* If error occurred, pass through */
1249 return NULL;
1250 Py_DECREF(ret);
1251 Py_RETURN_NONE;
1252}
1253
1254
1255static PyObject *BZ2File_getiter(BZ2FileObject *self);
1256
1257static PyMethodDef BZ2File_methods[] = {
1258 {"read", (PyCFunction)BZ2File_read, METH_VARARGS, BZ2File_read__doc__},
1259 {"readline", (PyCFunction)BZ2File_readline, METH_VARARGS, BZ2File_readline__doc__},
1260 {"readlines", (PyCFunction)BZ2File_readlines, METH_VARARGS, BZ2File_readlines__doc__},
1261 {"xreadlines", (PyCFunction)BZ2File_getiter, METH_VARARGS, BZ2File_xreadlines__doc__},
1262 {"write", (PyCFunction)BZ2File_write, METH_VARARGS, BZ2File_write__doc__},
1263 {"writelines", (PyCFunction)BZ2File_writelines, METH_O, BZ2File_writelines__doc__},
1264 {"seek", (PyCFunction)BZ2File_seek, METH_VARARGS, BZ2File_seek__doc__},
1265 {"tell", (PyCFunction)BZ2File_tell, METH_NOARGS, BZ2File_tell__doc__},
1266 {"close", (PyCFunction)BZ2File_close, METH_NOARGS, BZ2File_close__doc__},
1267 {"__enter__", (PyCFunction)BZ2File_enter, METH_NOARGS, BZ2File_enter_doc},
1268 {"__exit__", (PyCFunction)BZ2File_exit, METH_VARARGS, BZ2File_exit_doc},
1269 {NULL, NULL} /* sentinel */
1270};
1271
1272
1273/* ===================================================================== */
1274/* Getters and setters of BZ2File. */
1275
1276/* This is a hacked version of Python's fileobject.c:get_newlines(). */
1277static PyObject *
1278BZ2File_get_newlines(BZ2FileObject *self, void *closure)
1279{
1280 switch (self->f_newlinetypes) {
1281 case NEWLINE_UNKNOWN:
1282 Py_INCREF(Py_None);
1283 return Py_None;
1284 case NEWLINE_CR:
1285 return PyString_FromString("\r");
1286 case NEWLINE_LF:
1287 return PyString_FromString("\n");
1288 case NEWLINE_CR|NEWLINE_LF:
1289 return Py_BuildValue("(ss)", "\r", "\n");
1290 case NEWLINE_CRLF:
1291 return PyString_FromString("\r\n");
1292 case NEWLINE_CR|NEWLINE_CRLF:
1293 return Py_BuildValue("(ss)", "\r", "\r\n");
1294 case NEWLINE_LF|NEWLINE_CRLF:
1295 return Py_BuildValue("(ss)", "\n", "\r\n");
1296 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1297 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1298 default:
1299 PyErr_Format(PyExc_SystemError,
1300 "Unknown newlines value 0x%x\n",
1301 self->f_newlinetypes);
1302 return NULL;
1303 }
1304}
1305
1306static PyObject *
1307BZ2File_get_closed(BZ2FileObject *self, void *closure)
1308{
1309 return PyInt_FromLong(self->mode == MODE_CLOSED);
1310}
1311
1312static PyObject *
1313BZ2File_get_mode(BZ2FileObject *self, void *closure)
1314{
1315 return PyObject_GetAttrString(self->file, "mode");
1316}
1317
1318static PyObject *
1319BZ2File_get_name(BZ2FileObject *self, void *closure)
1320{
1321 return PyObject_GetAttrString(self->file, "name");
1322}
1323
1324static PyGetSetDef BZ2File_getset[] = {
1325 {"closed", (getter)BZ2File_get_closed, NULL,
1326 "True if the file is closed"},
1327 {"newlines", (getter)BZ2File_get_newlines, NULL,
1328 "end-of-line convention used in this file"},
1329 {"mode", (getter)BZ2File_get_mode, NULL,
1330 "file mode ('r', 'w', or 'U')"},
1331 {"name", (getter)BZ2File_get_name, NULL,
1332 "file name"},
1333 {NULL} /* Sentinel */
1334};
1335
1336
1337/* ===================================================================== */
1338/* Members of BZ2File_Type. */
1339
1340#undef OFF
1341#define OFF(x) offsetof(BZ2FileObject, x)
1342
1343static PyMemberDef BZ2File_members[] = {
1344 {"softspace", T_INT, OFF(f_softspace), 0,
1345 "flag indicating that a space needs to be printed; used by print"},
1346 {NULL} /* Sentinel */
1347};
1348
1349/* ===================================================================== */
1350/* Slot definitions for BZ2File_Type. */
1351
1352static int
1353BZ2File_init(BZ2FileObject *self, PyObject *args, PyObject *kwargs)
1354{
1355 static char *kwlist[] = {"filename", "mode", "buffering",
1356 "compresslevel", 0};
1357 PyObject *name;
1358 char *mode = "r";
1359 int buffering = -1;
1360 int compresslevel = 9;
1361 int bzerror;
1362 int mode_char = 0;
1363
1364 self->size = -1;
1365
1366 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "O|sii:BZ2File",
1367 kwlist, &name, &mode, &buffering,
1368 &compresslevel))
1369 return -1;
1370
1371 if (compresslevel < 1 || compresslevel > 9) {
1372 PyErr_SetString(PyExc_ValueError,
1373 "compresslevel must be between 1 and 9");
1374 return -1;
1375 }
1376
1377 for (;;) {
1378 int error = 0;
1379 switch (*mode) {
1380 case 'r':
1381 case 'w':
1382 if (mode_char)
1383 error = 1;
1384 mode_char = *mode;
1385 break;
1386
1387 case 'b':
1388 break;
1389
1390 case 'U':
1391#ifdef __VMS
1392 self->f_univ_newline = 0;
1393#else
1394 self->f_univ_newline = 1;
1395#endif
1396 break;
1397
1398 default:
1399 error = 1;
1400 break;
1401 }
1402 if (error) {
1403 PyErr_Format(PyExc_ValueError,
1404 "invalid mode char %c", *mode);
1405 return -1;
1406 }
1407 mode++;
1408 if (*mode == '\0')
1409 break;
1410 }
1411
1412 if (mode_char == 0) {
1413 mode_char = 'r';
1414 }
1415
1416 mode = (mode_char == 'r') ? "rb" : "wb";
1417
1418 self->file = PyObject_CallFunction((PyObject*)&PyFile_Type, "(Osi)",
1419 name, mode, buffering);
1420 if (self->file == NULL)
1421 return -1;
1422
1423 /* From now on, we have stuff to dealloc, so jump to error label
1424 * instead of returning */
1425
1426#ifdef WITH_THREAD
1427 self->lock = PyThread_allocate_lock();
1428 if (!self->lock) {
1429 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1430 goto error;
1431 }
1432#endif
1433
1434 if (mode_char == 'r')
1435 self->fp = BZ2_bzReadOpen(&bzerror,
1436 PyFile_AsFile(self->file),
1437 0, 0, NULL, 0);
1438 else
1439 self->fp = BZ2_bzWriteOpen(&bzerror,
1440 PyFile_AsFile(self->file),
1441 compresslevel, 0, 0);
1442
1443 if (bzerror != BZ_OK) {
1444 Util_CatchBZ2Error(bzerror);
1445 goto error;
1446 }
1447 PyFile_IncUseCount((PyFileObject *)self->file);
1448
1449 self->mode = (mode_char == 'r') ? MODE_READ : MODE_WRITE;
1450
1451 return 0;
1452
1453error:
1454 Py_CLEAR(self->file);
1455#ifdef WITH_THREAD
1456 if (self->lock) {
1457 PyThread_free_lock(self->lock);
1458 self->lock = NULL;
1459 }
1460#endif
1461 return -1;
1462}
1463
1464static void
1465BZ2File_dealloc(BZ2FileObject *self)
1466{
1467 int bzerror;
1468#ifdef WITH_THREAD
1469 if (self->lock)
1470 PyThread_free_lock(self->lock);
1471#endif
1472 switch (self->mode) {
1473 case MODE_READ:
1474 case MODE_READ_EOF:
1475 BZ2_bzReadClose(&bzerror, self->fp);
1476 break;
1477 case MODE_WRITE:
1478 BZ2_bzWriteClose(&bzerror, self->fp,
1479 0, NULL, NULL);
1480 break;
1481 }
1482 if (self->fp) {
1483 PyFile_DecUseCount((PyFileObject *)self->file);
1484 self->fp = NULL;
1485 }
1486 Util_DropReadAhead(self);
1487 Py_XDECREF(self->file);
1488 Py_TYPE(self)->tp_free((PyObject *)self);
1489}
1490
1491/* This is a hacked version of Python's fileobject.c:file_getiter(). */
1492static PyObject *
1493BZ2File_getiter(BZ2FileObject *self)
1494{
1495 if (self->mode == MODE_CLOSED) {
1496 PyErr_SetString(PyExc_ValueError,
1497 "I/O operation on closed file");
1498 return NULL;
1499 }
1500 Py_INCREF((PyObject*)self);
1501 return (PyObject *)self;
1502}
1503
1504/* This is a hacked version of Python's fileobject.c:file_iternext(). */
1505#define READAHEAD_BUFSIZE 8192
1506static PyObject *
1507BZ2File_iternext(BZ2FileObject *self)
1508{
1509 PyStringObject* ret;
1510 ACQUIRE_LOCK(self);
1511 if (self->mode == MODE_CLOSED) {
1512 RELEASE_LOCK(self);
1513 PyErr_SetString(PyExc_ValueError,
1514 "I/O operation on closed file");
1515 return NULL;
1516 }
1517 ret = Util_ReadAheadGetLineSkip(self, 0, READAHEAD_BUFSIZE);
1518 RELEASE_LOCK(self);
1519 if (ret == NULL || PyString_GET_SIZE(ret) == 0) {
1520 Py_XDECREF(ret);
1521 return NULL;
1522 }
1523 return (PyObject *)ret;
1524}
1525
1526/* ===================================================================== */
1527/* BZ2File_Type definition. */
1528
1529PyDoc_VAR(BZ2File__doc__) =
1530PyDoc_STR(
1531"BZ2File(name [, mode='r', buffering=0, compresslevel=9]) -> file object\n\
1532\n\
1533Open a bz2 file. The mode can be 'r' or 'w', for reading (default) or\n\
1534writing. When opened for writing, the file will be created if it doesn't\n\
1535exist, and truncated otherwise. If the buffering argument is given, 0 means\n\
1536unbuffered, and larger numbers specify the buffer size. If compresslevel\n\
1537is given, must be a number between 1 and 9.\n\
1538")
1539PyDoc_STR(
1540"\n\
1541Add a 'U' to mode to open the file for input with universal newline\n\
1542support. Any line ending in the input file will be seen as a '\\n' in\n\
1543Python. Also, a file so opened gains the attribute 'newlines'; the value\n\
1544for this attribute is one of None (no newline read yet), '\\r', '\\n',\n\
1545'\\r\\n' or a tuple containing all the newline types seen. Universal\n\
1546newlines are available only when reading.\n\
1547")
1548;
1549
1550static PyTypeObject BZ2File_Type = {
1551 PyVarObject_HEAD_INIT(NULL, 0)
1552 "bz2.BZ2File", /*tp_name*/
1553 sizeof(BZ2FileObject), /*tp_basicsize*/
1554 0, /*tp_itemsize*/
1555 (destructor)BZ2File_dealloc, /*tp_dealloc*/
1556 0, /*tp_print*/
1557 0, /*tp_getattr*/
1558 0, /*tp_setattr*/
1559 0, /*tp_compare*/
1560 0, /*tp_repr*/
1561 0, /*tp_as_number*/
1562 0, /*tp_as_sequence*/
1563 0, /*tp_as_mapping*/
1564 0, /*tp_hash*/
1565 0, /*tp_call*/
1566 0, /*tp_str*/
1567 PyObject_GenericGetAttr,/*tp_getattro*/
1568 PyObject_GenericSetAttr,/*tp_setattro*/
1569 0, /*tp_as_buffer*/
1570 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1571 BZ2File__doc__, /*tp_doc*/
1572 0, /*tp_traverse*/
1573 0, /*tp_clear*/
1574 0, /*tp_richcompare*/
1575 0, /*tp_weaklistoffset*/
1576 (getiterfunc)BZ2File_getiter, /*tp_iter*/
1577 (iternextfunc)BZ2File_iternext, /*tp_iternext*/
1578 BZ2File_methods, /*tp_methods*/
1579 BZ2File_members, /*tp_members*/
1580 BZ2File_getset, /*tp_getset*/
1581 0, /*tp_base*/
1582 0, /*tp_dict*/
1583 0, /*tp_descr_get*/
1584 0, /*tp_descr_set*/
1585 0, /*tp_dictoffset*/
1586 (initproc)BZ2File_init, /*tp_init*/
1587 PyType_GenericAlloc, /*tp_alloc*/
1588 PyType_GenericNew, /*tp_new*/
1589 _PyObject_Del, /*tp_free*/
1590 0, /*tp_is_gc*/
1591};
1592
1593
1594/* ===================================================================== */
1595/* Methods of BZ2Comp. */
1596
1597PyDoc_STRVAR(BZ2Comp_compress__doc__,
1598"compress(data) -> string\n\
1599\n\
1600Provide more data to the compressor object. It will return chunks of\n\
1601compressed data whenever possible. When you've finished providing data\n\
1602to compress, call the flush() method to finish the compression process,\n\
1603and return what is left in the internal buffers.\n\
1604");
1605
1606static PyObject *
1607BZ2Comp_compress(BZ2CompObject *self, PyObject *args)
1608{
1609 Py_buffer pdata;
1610 size_t input_left;
1611 size_t output_size = 0;
1612 PyObject *ret = NULL;
1613 bz_stream *bzs = &self->bzs;
1614 int bzerror;
1615
1616 if (!PyArg_ParseTuple(args, "s*:compress", &pdata))
1617 return NULL;
1618
1619 if (pdata.len == 0) {
1620 PyBuffer_Release(&pdata);
1621 return PyString_FromString("");
1622 }
1623
1624 ACQUIRE_LOCK(self);
1625 if (!self->running) {
1626 PyErr_SetString(PyExc_ValueError,
1627 "this object was already flushed");
1628 goto error;
1629 }
1630
1631 ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
1632 if (!ret)
1633 goto error;
1634
1635 bzs->next_in = pdata.buf;
1636 bzs->avail_in = MIN(pdata.len, UINT_MAX);
1637 input_left = pdata.len - bzs->avail_in;
1638
1639 bzs->next_out = BUF(ret);
1640 bzs->avail_out = PyString_GET_SIZE(ret);
1641
1642 for (;;) {
1643 char *saved_next_out;
1644
1645 Py_BEGIN_ALLOW_THREADS
1646 saved_next_out = bzs->next_out;
1647 bzerror = BZ2_bzCompress(bzs, BZ_RUN);
1648 output_size += bzs->next_out - saved_next_out;
1649 Py_END_ALLOW_THREADS
1650
1651 if (bzerror != BZ_RUN_OK) {
1652 Util_CatchBZ2Error(bzerror);
1653 goto error;
1654 }
1655 if (bzs->avail_in == 0) {
1656 if (input_left == 0)
1657 break; /* no more input data */
1658 bzs->avail_in = MIN(input_left, UINT_MAX);
1659 input_left -= bzs->avail_in;
1660 }
1661 if (bzs->avail_out == 0) {
1662 size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
1663 if (buffer_left == 0) {
1664 if (Util_GrowBuffer(&ret) < 0) {
1665 BZ2_bzCompressEnd(bzs);
1666 goto error;
1667 }
1668 bzs->next_out = BUF(ret) + output_size;
1669 buffer_left = PyString_GET_SIZE(ret) - output_size;
1670 }
1671 bzs->avail_out = MIN(buffer_left, UINT_MAX);
1672 }
1673 }
1674
1675 if (_PyString_Resize(&ret, output_size) < 0)
1676 goto error;
1677
1678 RELEASE_LOCK(self);
1679 PyBuffer_Release(&pdata);
1680 return ret;
1681
1682error:
1683 RELEASE_LOCK(self);
1684 PyBuffer_Release(&pdata);
1685 Py_XDECREF(ret);
1686 return NULL;
1687}
1688
1689PyDoc_STRVAR(BZ2Comp_flush__doc__,
1690"flush() -> string\n\
1691\n\
1692Finish the compression process and return what is left in internal buffers.\n\
1693You must not use the compressor object after calling this method.\n\
1694");
1695
1696static PyObject *
1697BZ2Comp_flush(BZ2CompObject *self)
1698{
1699 size_t output_size = 0;
1700 PyObject *ret = NULL;
1701 bz_stream *bzs = &self->bzs;
1702 int bzerror;
1703
1704 ACQUIRE_LOCK(self);
1705 if (!self->running) {
1706 PyErr_SetString(PyExc_ValueError, "object was already flushed");
1707 goto error;
1708 }
1709 self->running = 0;
1710
1711 ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
1712 if (!ret)
1713 goto error;
1714
1715 bzs->next_out = BUF(ret);
1716 bzs->avail_out = PyString_GET_SIZE(ret);
1717
1718 for (;;) {
1719 char *saved_next_out;
1720
1721 Py_BEGIN_ALLOW_THREADS
1722 saved_next_out = bzs->next_out;
1723 bzerror = BZ2_bzCompress(bzs, BZ_FINISH);
1724 output_size += bzs->next_out - saved_next_out;
1725 Py_END_ALLOW_THREADS
1726
1727 if (bzerror == BZ_STREAM_END) {
1728 break;
1729 } else if (bzerror != BZ_FINISH_OK) {
1730 Util_CatchBZ2Error(bzerror);
1731 goto error;
1732 }
1733 if (bzs->avail_out == 0) {
1734 size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
1735 if (buffer_left == 0) {
1736 if (Util_GrowBuffer(&ret) < 0)
1737 goto error;
1738 bzs->next_out = BUF(ret) + output_size;
1739 buffer_left = PyString_GET_SIZE(ret) - output_size;
1740 }
1741 bzs->avail_out = MIN(buffer_left, UINT_MAX);
1742 }
1743 }
1744
1745 if (output_size != PyString_GET_SIZE(ret))
1746 if (_PyString_Resize(&ret, output_size) < 0)
1747 goto error;
1748
1749 RELEASE_LOCK(self);
1750 return ret;
1751
1752error:
1753 RELEASE_LOCK(self);
1754 Py_XDECREF(ret);
1755 return NULL;
1756}
1757
1758static PyMethodDef BZ2Comp_methods[] = {
1759 {"compress", (PyCFunction)BZ2Comp_compress, METH_VARARGS,
1760 BZ2Comp_compress__doc__},
1761 {"flush", (PyCFunction)BZ2Comp_flush, METH_NOARGS,
1762 BZ2Comp_flush__doc__},
1763 {NULL, NULL} /* sentinel */
1764};
1765
1766
1767/* ===================================================================== */
1768/* Slot definitions for BZ2Comp_Type. */
1769
1770static int
1771BZ2Comp_init(BZ2CompObject *self, PyObject *args, PyObject *kwargs)
1772{
1773 int compresslevel = 9;
1774 int bzerror;
1775 static char *kwlist[] = {"compresslevel", 0};
1776
1777 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|i:BZ2Compressor",
1778 kwlist, &compresslevel))
1779 return -1;
1780
1781 if (compresslevel < 1 || compresslevel > 9) {
1782 PyErr_SetString(PyExc_ValueError,
1783 "compresslevel must be between 1 and 9");
1784 goto error;
1785 }
1786
1787#ifdef WITH_THREAD
1788 self->lock = PyThread_allocate_lock();
1789 if (!self->lock) {
1790 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
1791 goto error;
1792 }
1793#endif
1794
1795 memset(&self->bzs, 0, sizeof(bz_stream));
1796 bzerror = BZ2_bzCompressInit(&self->bzs, compresslevel, 0, 0);
1797 if (bzerror != BZ_OK) {
1798 Util_CatchBZ2Error(bzerror);
1799 goto error;
1800 }
1801
1802 self->running = 1;
1803
1804 return 0;
1805error:
1806#ifdef WITH_THREAD
1807 if (self->lock) {
1808 PyThread_free_lock(self->lock);
1809 self->lock = NULL;
1810 }
1811#endif
1812 return -1;
1813}
1814
1815static void
1816BZ2Comp_dealloc(BZ2CompObject *self)
1817{
1818#ifdef WITH_THREAD
1819 if (self->lock)
1820 PyThread_free_lock(self->lock);
1821#endif
1822 BZ2_bzCompressEnd(&self->bzs);
1823 Py_TYPE(self)->tp_free((PyObject *)self);
1824}
1825
1826
1827/* ===================================================================== */
1828/* BZ2Comp_Type definition. */
1829
1830PyDoc_STRVAR(BZ2Comp__doc__,
1831"BZ2Compressor([compresslevel=9]) -> compressor object\n\
1832\n\
1833Create a new compressor object. This object may be used to compress\n\
1834data sequentially. If you want to compress data in one shot, use the\n\
1835compress() function instead. The compresslevel parameter, if given,\n\
1836must be a number between 1 and 9.\n\
1837");
1838
1839static PyTypeObject BZ2Comp_Type = {
1840 PyVarObject_HEAD_INIT(NULL, 0)
1841 "bz2.BZ2Compressor", /*tp_name*/
1842 sizeof(BZ2CompObject), /*tp_basicsize*/
1843 0, /*tp_itemsize*/
1844 (destructor)BZ2Comp_dealloc, /*tp_dealloc*/
1845 0, /*tp_print*/
1846 0, /*tp_getattr*/
1847 0, /*tp_setattr*/
1848 0, /*tp_compare*/
1849 0, /*tp_repr*/
1850 0, /*tp_as_number*/
1851 0, /*tp_as_sequence*/
1852 0, /*tp_as_mapping*/
1853 0, /*tp_hash*/
1854 0, /*tp_call*/
1855 0, /*tp_str*/
1856 PyObject_GenericGetAttr,/*tp_getattro*/
1857 PyObject_GenericSetAttr,/*tp_setattro*/
1858 0, /*tp_as_buffer*/
1859 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
1860 BZ2Comp__doc__, /*tp_doc*/
1861 0, /*tp_traverse*/
1862 0, /*tp_clear*/
1863 0, /*tp_richcompare*/
1864 0, /*tp_weaklistoffset*/
1865 0, /*tp_iter*/
1866 0, /*tp_iternext*/
1867 BZ2Comp_methods, /*tp_methods*/
1868 0, /*tp_members*/
1869 0, /*tp_getset*/
1870 0, /*tp_base*/
1871 0, /*tp_dict*/
1872 0, /*tp_descr_get*/
1873 0, /*tp_descr_set*/
1874 0, /*tp_dictoffset*/
1875 (initproc)BZ2Comp_init, /*tp_init*/
1876 PyType_GenericAlloc, /*tp_alloc*/
1877 PyType_GenericNew, /*tp_new*/
1878 _PyObject_Del, /*tp_free*/
1879 0, /*tp_is_gc*/
1880};
1881
1882
1883/* ===================================================================== */
1884/* Members of BZ2Decomp. */
1885
1886#undef OFF
1887#define OFF(x) offsetof(BZ2DecompObject, x)
1888
1889static PyMemberDef BZ2Decomp_members[] = {
1890 {"unused_data", T_OBJECT, OFF(unused_data), RO},
1891 {NULL} /* Sentinel */
1892};
1893
1894
1895/* ===================================================================== */
1896/* Methods of BZ2Decomp. */
1897
1898PyDoc_STRVAR(BZ2Decomp_decompress__doc__,
1899"decompress(data) -> string\n\
1900\n\
1901Provide more data to the decompressor object. It will return chunks\n\
1902of decompressed data whenever possible. If you try to decompress data\n\
1903after the end of stream is found, EOFError will be raised. If any data\n\
1904was found after the end of stream, it'll be ignored and saved in\n\
1905unused_data attribute.\n\
1906");
1907
1908static PyObject *
1909BZ2Decomp_decompress(BZ2DecompObject *self, PyObject *args)
1910{
1911 Py_buffer pdata;
1912 size_t input_left;
1913 size_t output_size = 0;
1914 PyObject *ret = NULL;
1915 bz_stream *bzs = &self->bzs;
1916 int bzerror;
1917
1918 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
1919 return NULL;
1920
1921 ACQUIRE_LOCK(self);
1922 if (!self->running) {
1923 PyErr_SetString(PyExc_EOFError, "end of stream was "
1924 "already found");
1925 goto error;
1926 }
1927
1928 ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
1929 if (!ret)
1930 goto error;
1931
1932 bzs->next_in = pdata.buf;
1933 bzs->avail_in = MIN(pdata.len, UINT_MAX);
1934 input_left = pdata.len - bzs->avail_in;
1935
1936 bzs->next_out = BUF(ret);
1937 bzs->avail_out = PyString_GET_SIZE(ret);
1938
1939 for (;;) {
1940 char *saved_next_out;
1941
1942 Py_BEGIN_ALLOW_THREADS
1943 saved_next_out = bzs->next_out;
1944 bzerror = BZ2_bzDecompress(bzs);
1945 output_size += bzs->next_out - saved_next_out;
1946 Py_END_ALLOW_THREADS
1947
1948 if (bzerror == BZ_STREAM_END) {
1949 self->running = 0;
1950 input_left += bzs->avail_in;
1951 if (input_left != 0) {
1952 Py_DECREF(self->unused_data);
1953 self->unused_data =
1954 PyString_FromStringAndSize(bzs->next_in, input_left);
1955 if (self->unused_data == NULL)
1956 goto error;
1957 }
1958 break;
1959 }
1960 if (bzerror != BZ_OK) {
1961 Util_CatchBZ2Error(bzerror);
1962 goto error;
1963 }
1964 if (bzs->avail_in == 0) {
1965 if (input_left == 0)
1966 break; /* no more input data */
1967 bzs->avail_in = MIN(input_left, UINT_MAX);
1968 input_left -= bzs->avail_in;
1969 }
1970 if (bzs->avail_out == 0) {
1971 size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
1972 if (buffer_left == 0) {
1973 if (Util_GrowBuffer(&ret) < 0) {
1974 BZ2_bzDecompressEnd(bzs);
1975 goto error;
1976 }
1977 bzs->next_out = BUF(ret) + output_size;
1978 buffer_left = PyString_GET_SIZE(ret) - output_size;
1979 }
1980 bzs->avail_out = MIN(buffer_left, UINT_MAX);
1981 }
1982 }
1983
1984 if (output_size != PyString_GET_SIZE(ret))
1985 if (_PyString_Resize(&ret, output_size) < 0)
1986 goto error;
1987
1988 RELEASE_LOCK(self);
1989 PyBuffer_Release(&pdata);
1990 return ret;
1991
1992error:
1993 RELEASE_LOCK(self);
1994 PyBuffer_Release(&pdata);
1995 Py_XDECREF(ret);
1996 return NULL;
1997}
1998
1999static PyMethodDef BZ2Decomp_methods[] = {
2000 {"decompress", (PyCFunction)BZ2Decomp_decompress, METH_VARARGS, BZ2Decomp_decompress__doc__},
2001 {NULL, NULL} /* sentinel */
2002};
2003
2004
2005/* ===================================================================== */
2006/* Slot definitions for BZ2Decomp_Type. */
2007
2008static int
2009BZ2Decomp_init(BZ2DecompObject *self, PyObject *args, PyObject *kwargs)
2010{
2011 int bzerror;
2012
2013 if (!PyArg_ParseTuple(args, ":BZ2Decompressor"))
2014 return -1;
2015
2016#ifdef WITH_THREAD
2017 self->lock = PyThread_allocate_lock();
2018 if (!self->lock) {
2019 PyErr_SetString(PyExc_MemoryError, "unable to allocate lock");
2020 goto error;
2021 }
2022#endif
2023
2024 self->unused_data = PyString_FromString("");
2025 if (!self->unused_data)
2026 goto error;
2027
2028 memset(&self->bzs, 0, sizeof(bz_stream));
2029 bzerror = BZ2_bzDecompressInit(&self->bzs, 0, 0);
2030 if (bzerror != BZ_OK) {
2031 Util_CatchBZ2Error(bzerror);
2032 goto error;
2033 }
2034
2035 self->running = 1;
2036
2037 return 0;
2038
2039error:
2040#ifdef WITH_THREAD
2041 if (self->lock) {
2042 PyThread_free_lock(self->lock);
2043 self->lock = NULL;
2044 }
2045#endif
2046 Py_CLEAR(self->unused_data);
2047 return -1;
2048}
2049
2050static void
2051BZ2Decomp_dealloc(BZ2DecompObject *self)
2052{
2053#ifdef WITH_THREAD
2054 if (self->lock)
2055 PyThread_free_lock(self->lock);
2056#endif
2057 Py_XDECREF(self->unused_data);
2058 BZ2_bzDecompressEnd(&self->bzs);
2059 Py_TYPE(self)->tp_free((PyObject *)self);
2060}
2061
2062
2063/* ===================================================================== */
2064/* BZ2Decomp_Type definition. */
2065
2066PyDoc_STRVAR(BZ2Decomp__doc__,
2067"BZ2Decompressor() -> decompressor object\n\
2068\n\
2069Create a new decompressor object. This object may be used to decompress\n\
2070data sequentially. If you want to decompress data in one shot, use the\n\
2071decompress() function instead.\n\
2072");
2073
2074static PyTypeObject BZ2Decomp_Type = {
2075 PyVarObject_HEAD_INIT(NULL, 0)
2076 "bz2.BZ2Decompressor", /*tp_name*/
2077 sizeof(BZ2DecompObject), /*tp_basicsize*/
2078 0, /*tp_itemsize*/
2079 (destructor)BZ2Decomp_dealloc, /*tp_dealloc*/
2080 0, /*tp_print*/
2081 0, /*tp_getattr*/
2082 0, /*tp_setattr*/
2083 0, /*tp_compare*/
2084 0, /*tp_repr*/
2085 0, /*tp_as_number*/
2086 0, /*tp_as_sequence*/
2087 0, /*tp_as_mapping*/
2088 0, /*tp_hash*/
2089 0, /*tp_call*/
2090 0, /*tp_str*/
2091 PyObject_GenericGetAttr,/*tp_getattro*/
2092 PyObject_GenericSetAttr,/*tp_setattro*/
2093 0, /*tp_as_buffer*/
2094 Py_TPFLAGS_DEFAULT|Py_TPFLAGS_BASETYPE, /*tp_flags*/
2095 BZ2Decomp__doc__, /*tp_doc*/
2096 0, /*tp_traverse*/
2097 0, /*tp_clear*/
2098 0, /*tp_richcompare*/
2099 0, /*tp_weaklistoffset*/
2100 0, /*tp_iter*/
2101 0, /*tp_iternext*/
2102 BZ2Decomp_methods, /*tp_methods*/
2103 BZ2Decomp_members, /*tp_members*/
2104 0, /*tp_getset*/
2105 0, /*tp_base*/
2106 0, /*tp_dict*/
2107 0, /*tp_descr_get*/
2108 0, /*tp_descr_set*/
2109 0, /*tp_dictoffset*/
2110 (initproc)BZ2Decomp_init, /*tp_init*/
2111 PyType_GenericAlloc, /*tp_alloc*/
2112 PyType_GenericNew, /*tp_new*/
2113 _PyObject_Del, /*tp_free*/
2114 0, /*tp_is_gc*/
2115};
2116
2117
2118/* ===================================================================== */
2119/* Module functions. */
2120
2121PyDoc_STRVAR(bz2_compress__doc__,
2122"compress(data [, compresslevel=9]) -> string\n\
2123\n\
2124Compress data in one shot. If you want to compress data sequentially,\n\
2125use an instance of BZ2Compressor instead. The compresslevel parameter, if\n\
2126given, must be a number between 1 and 9.\n\
2127");
2128
2129static PyObject *
2130bz2_compress(PyObject *self, PyObject *args, PyObject *kwargs)
2131{
2132 int compresslevel=9;
2133 int action;
2134 Py_buffer pdata;
2135 size_t input_left;
2136 size_t output_size = 0;
2137 PyObject *ret = NULL;
2138 bz_stream _bzs;
2139 bz_stream *bzs = &_bzs;
2140 int bzerror;
2141 static char *kwlist[] = {"data", "compresslevel", 0};
2142
2143 if (!PyArg_ParseTupleAndKeywords(args, kwargs, "s*|i",
2144 kwlist, &pdata,
2145 &compresslevel))
2146 return NULL;
2147
2148 if (compresslevel < 1 || compresslevel > 9) {
2149 PyErr_SetString(PyExc_ValueError,
2150 "compresslevel must be between 1 and 9");
2151 PyBuffer_Release(&pdata);
2152 return NULL;
2153 }
2154
2155 ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
2156 if (!ret) {
2157 PyBuffer_Release(&pdata);
2158 return NULL;
2159 }
2160
2161 memset(bzs, 0, sizeof(bz_stream));
2162
2163 bzs->next_in = pdata.buf;
2164 bzs->avail_in = MIN(pdata.len, UINT_MAX);
2165 input_left = pdata.len - bzs->avail_in;
2166
2167 bzs->next_out = BUF(ret);
2168 bzs->avail_out = PyString_GET_SIZE(ret);
2169
2170 bzerror = BZ2_bzCompressInit(bzs, compresslevel, 0, 0);
2171 if (bzerror != BZ_OK) {
2172 Util_CatchBZ2Error(bzerror);
2173 PyBuffer_Release(&pdata);
2174 Py_DECREF(ret);
2175 return NULL;
2176 }
2177
2178 action = input_left > 0 ? BZ_RUN : BZ_FINISH;
2179
2180 for (;;) {
2181 char *saved_next_out;
2182
2183 Py_BEGIN_ALLOW_THREADS
2184 saved_next_out = bzs->next_out;
2185 bzerror = BZ2_bzCompress(bzs, action);
2186 output_size += bzs->next_out - saved_next_out;
2187 Py_END_ALLOW_THREADS
2188
2189 if (bzerror == BZ_STREAM_END) {
2190 break;
2191 } else if (bzerror != BZ_RUN_OK && bzerror != BZ_FINISH_OK) {
2192 BZ2_bzCompressEnd(bzs);
2193 Util_CatchBZ2Error(bzerror);
2194 PyBuffer_Release(&pdata);
2195 Py_DECREF(ret);
2196 return NULL;
2197 }
2198 if (action == BZ_RUN && bzs->avail_in == 0) {
2199 if (input_left == 0) {
2200 action = BZ_FINISH;
2201 } else {
2202 bzs->avail_in = MIN(input_left, UINT_MAX);
2203 input_left -= bzs->avail_in;
2204 }
2205 }
2206 if (bzs->avail_out == 0) {
2207 size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
2208 if (buffer_left == 0) {
2209 if (Util_GrowBuffer(&ret) < 0) {
2210 BZ2_bzCompressEnd(bzs);
2211 PyBuffer_Release(&pdata);
2212 return NULL;
2213 }
2214 bzs->next_out = BUF(ret) + output_size;
2215 buffer_left = PyString_GET_SIZE(ret) - output_size;
2216 }
2217 bzs->avail_out = MIN(buffer_left, UINT_MAX);
2218 }
2219 }
2220
2221 if (output_size != PyString_GET_SIZE(ret))
2222 _PyString_Resize(&ret, output_size); /* Sets ret to NULL on failure. */
2223
2224 BZ2_bzCompressEnd(bzs);
2225 PyBuffer_Release(&pdata);
2226 return ret;
2227}
2228
2229PyDoc_STRVAR(bz2_decompress__doc__,
2230"decompress(data) -> decompressed data\n\
2231\n\
2232Decompress data in one shot. If you want to decompress data sequentially,\n\
2233use an instance of BZ2Decompressor instead.\n\
2234");
2235
2236static PyObject *
2237bz2_decompress(PyObject *self, PyObject *args)
2238{
2239 Py_buffer pdata;
2240 size_t input_left;
2241 size_t output_size = 0;
2242 PyObject *ret;
2243 bz_stream _bzs;
2244 bz_stream *bzs = &_bzs;
2245 int bzerror;
2246
2247 if (!PyArg_ParseTuple(args, "s*:decompress", &pdata))
2248 return NULL;
2249
2250 if (pdata.len == 0) {
2251 PyBuffer_Release(&pdata);
2252 return PyString_FromString("");
2253 }
2254
2255 ret = PyString_FromStringAndSize(NULL, SMALLCHUNK);
2256 if (!ret) {
2257 PyBuffer_Release(&pdata);
2258 return NULL;
2259 }
2260
2261 memset(bzs, 0, sizeof(bz_stream));
2262
2263 bzs->next_in = pdata.buf;
2264 bzs->avail_in = MIN(pdata.len, UINT_MAX);
2265 input_left = pdata.len - bzs->avail_in;
2266
2267 bzs->next_out = BUF(ret);
2268 bzs->avail_out = PyString_GET_SIZE(ret);
2269
2270 bzerror = BZ2_bzDecompressInit(bzs, 0, 0);
2271 if (bzerror != BZ_OK) {
2272 Util_CatchBZ2Error(bzerror);
2273 Py_DECREF(ret);
2274 PyBuffer_Release(&pdata);
2275 return NULL;
2276 }
2277
2278 for (;;) {
2279 char *saved_next_out;
2280
2281 Py_BEGIN_ALLOW_THREADS
2282 saved_next_out = bzs->next_out;
2283 bzerror = BZ2_bzDecompress(bzs);
2284 output_size += bzs->next_out - saved_next_out;
2285 Py_END_ALLOW_THREADS
2286
2287 if (bzerror == BZ_STREAM_END) {
2288 break;
2289 } else if (bzerror != BZ_OK) {
2290 BZ2_bzDecompressEnd(bzs);
2291 Util_CatchBZ2Error(bzerror);
2292 PyBuffer_Release(&pdata);
2293 Py_DECREF(ret);
2294 return NULL;
2295 }
2296 if (bzs->avail_in == 0) {
2297 if (input_left == 0) {
2298 BZ2_bzDecompressEnd(bzs);
2299 PyErr_SetString(PyExc_ValueError,
2300 "couldn't find end of stream");
2301 PyBuffer_Release(&pdata);
2302 Py_DECREF(ret);
2303 return NULL;
2304 }
2305 bzs->avail_in = MIN(input_left, UINT_MAX);
2306 input_left -= bzs->avail_in;
2307 }
2308 if (bzs->avail_out == 0) {
2309 size_t buffer_left = PyString_GET_SIZE(ret) - output_size;
2310 if (buffer_left == 0) {
2311 if (Util_GrowBuffer(&ret) < 0) {
2312 BZ2_bzDecompressEnd(bzs);
2313 PyBuffer_Release(&pdata);
2314 return NULL;
2315 }
2316 bzs->next_out = BUF(ret) + output_size;
2317 buffer_left = PyString_GET_SIZE(ret) - output_size;
2318 }
2319 bzs->avail_out = MIN(buffer_left, UINT_MAX);
2320 }
2321 }
2322
2323 if (output_size != PyString_GET_SIZE(ret))
2324 _PyString_Resize(&ret, output_size); /* Sets ret to NULL on failure. */
2325
2326 BZ2_bzDecompressEnd(bzs);
2327 PyBuffer_Release(&pdata);
2328 return ret;
2329}
2330
2331static PyMethodDef bz2_methods[] = {
2332 {"compress", (PyCFunction) bz2_compress, METH_VARARGS|METH_KEYWORDS,
2333 bz2_compress__doc__},
2334 {"decompress", (PyCFunction) bz2_decompress, METH_VARARGS,
2335 bz2_decompress__doc__},
2336 {NULL, NULL} /* sentinel */
2337};
2338
2339/* ===================================================================== */
2340/* Initialization function. */
2341
2342PyDoc_STRVAR(bz2__doc__,
2343"The python bz2 module provides a comprehensive interface for\n\
2344the bz2 compression library. It implements a complete file\n\
2345interface, one shot (de)compression functions, and types for\n\
2346sequential (de)compression.\n\
2347");
2348
2349PyMODINIT_FUNC
2350initbz2(void)
2351{
2352 PyObject *m;
2353
2354 if (PyType_Ready(&BZ2File_Type) < 0)
2355 return;
2356 if (PyType_Ready(&BZ2Comp_Type) < 0)
2357 return;
2358 if (PyType_Ready(&BZ2Decomp_Type) < 0)
2359 return;
2360
2361 m = Py_InitModule3("bz2", bz2_methods, bz2__doc__);
2362 if (m == NULL)
2363 return;
2364
2365 PyModule_AddObject(m, "__author__", PyString_FromString(__author__));
2366
2367 Py_INCREF(&BZ2File_Type);
2368 PyModule_AddObject(m, "BZ2File", (PyObject *)&BZ2File_Type);
2369
2370 Py_INCREF(&BZ2Comp_Type);
2371 PyModule_AddObject(m, "BZ2Compressor", (PyObject *)&BZ2Comp_Type);
2372
2373 Py_INCREF(&BZ2Decomp_Type);
2374 PyModule_AddObject(m, "BZ2Decompressor", (PyObject *)&BZ2Decomp_Type);
2375}
Note: See TracBrowser for help on using the repository browser.