source: vendor/python/2.5/Objects/fileobject.c

Last change on this file was 3225, checked in by bird, 18 years ago

Python 2.5

File size: 62.1 KB
Line 
1/* File object implementation */
2
3#define PY_SSIZE_T_CLEAN
4#include "Python.h"
5#include "structmember.h"
6
7#ifdef HAVE_SYS_TYPES_H
8#include <sys/types.h>
9#endif /* HAVE_SYS_TYPES_H */
10
11#ifdef MS_WINDOWS
12#define fileno _fileno
13/* can simulate truncate with Win32 API functions; see file_truncate */
14#define HAVE_FTRUNCATE
15#define WIN32_LEAN_AND_MEAN
16#include <windows.h>
17#endif
18
19#ifdef _MSC_VER
20/* Need GetVersion to see if on NT so safe to use _wfopen */
21#define WIN32_LEAN_AND_MEAN
22#include <windows.h>
23#endif /* _MSC_VER */
24
25#if defined(PYOS_OS2) && defined(PYCC_GCC)
26#include <io.h>
27#endif
28
29#define BUF(v) PyString_AS_STRING((PyStringObject *)v)
30
31#ifndef DONT_HAVE_ERRNO_H
32#include <errno.h>
33#endif
34
35#ifdef HAVE_GETC_UNLOCKED
36#define GETC(f) getc_unlocked(f)
37#define FLOCKFILE(f) flockfile(f)
38#define FUNLOCKFILE(f) funlockfile(f)
39#else
40#define GETC(f) getc(f)
41#define FLOCKFILE(f)
42#define FUNLOCKFILE(f)
43#endif
44
45/* Bits in f_newlinetypes */
46#define NEWLINE_UNKNOWN 0 /* No newline seen, yet */
47#define NEWLINE_CR 1 /* \r newline seen */
48#define NEWLINE_LF 2 /* \n newline seen */
49#define NEWLINE_CRLF 4 /* \r\n newline seen */
50
51#ifdef __cplusplus
52extern "C" {
53#endif
54
55FILE *
56PyFile_AsFile(PyObject *f)
57{
58 if (f == NULL || !PyFile_Check(f))
59 return NULL;
60 else
61 return ((PyFileObject *)f)->f_fp;
62}
63
64PyObject *
65PyFile_Name(PyObject *f)
66{
67 if (f == NULL || !PyFile_Check(f))
68 return NULL;
69 else
70 return ((PyFileObject *)f)->f_name;
71}
72
73/* On Unix, fopen will succeed for directories.
74 In Python, there should be no file objects referring to
75 directories, so we need a check. */
76
77static PyFileObject*
78dircheck(PyFileObject* f)
79{
80#if defined(HAVE_FSTAT) && defined(S_IFDIR) && defined(EISDIR)
81 struct stat buf;
82 if (f->f_fp == NULL)
83 return f;
84 if (fstat(fileno(f->f_fp), &buf) == 0 &&
85 S_ISDIR(buf.st_mode)) {
86#ifdef HAVE_STRERROR
87 char *msg = strerror(EISDIR);
88#else
89 char *msg = "Is a directory";
90#endif
91 PyObject *exc = PyObject_CallFunction(PyExc_IOError, "(is)",
92 EISDIR, msg);
93 PyErr_SetObject(PyExc_IOError, exc);
94 Py_XDECREF(exc);
95 return NULL;
96 }
97#endif
98 return f;
99}
100
101
102static PyObject *
103fill_file_fields(PyFileObject *f, FILE *fp, PyObject *name, char *mode,
104 int (*close)(FILE *))
105{
106 assert(name != NULL);
107 assert(f != NULL);
108 assert(PyFile_Check(f));
109 assert(f->f_fp == NULL);
110
111 Py_DECREF(f->f_name);
112 Py_DECREF(f->f_mode);
113 Py_DECREF(f->f_encoding);
114
115 Py_INCREF(name);
116 f->f_name = name;
117
118 f->f_mode = PyString_FromString(mode);
119
120 f->f_close = close;
121 f->f_softspace = 0;
122 f->f_binary = strchr(mode,'b') != NULL;
123 f->f_buf = NULL;
124 f->f_univ_newline = (strchr(mode, 'U') != NULL);
125 f->f_newlinetypes = NEWLINE_UNKNOWN;
126 f->f_skipnextlf = 0;
127 Py_INCREF(Py_None);
128 f->f_encoding = Py_None;
129
130 if (f->f_mode == NULL)
131 return NULL;
132 f->f_fp = fp;
133 f = dircheck(f);
134 return (PyObject *) f;
135}
136
137/* check for known incorrect mode strings - problem is, platforms are
138 free to accept any mode characters they like and are supposed to
139 ignore stuff they don't understand... write or append mode with
140 universal newline support is expressly forbidden by PEP 278.
141 Additionally, remove the 'U' from the mode string as platforms
142 won't know what it is. */
143/* zero return is kewl - one is un-kewl */
144static int
145sanitize_the_mode(char *mode)
146{
147 char *upos;
148 size_t len = strlen(mode);
149
150 if (!len) {
151 PyErr_SetString(PyExc_ValueError, "empty mode string");
152 return 1;
153 }
154
155 upos = strchr(mode, 'U');
156 if (upos) {
157 memmove(upos, upos+1, len-(upos-mode)); /* incl null char */
158
159 if (mode[0] == 'w' || mode[0] == 'a') {
160 PyErr_Format(PyExc_ValueError, "universal newline "
161 "mode can only be used with modes "
162 "starting with 'r'");
163 return 1;
164 }
165
166 if (mode[0] != 'r') {
167 memmove(mode+1, mode, strlen(mode)+1);
168 mode[0] = 'r';
169 }
170
171 if (!strchr(mode, 'b')) {
172 memmove(mode+2, mode+1, strlen(mode));
173 mode[1] = 'b';
174 }
175 } else if (mode[0] != 'r' && mode[0] != 'w' && mode[0] != 'a') {
176 PyErr_Format(PyExc_ValueError, "mode string must begin with "
177 "one of 'r', 'w', 'a' or 'U', not '%.200s'", mode);
178 return 1;
179 }
180
181 return 0;
182}
183
184static PyObject *
185open_the_file(PyFileObject *f, char *name, char *mode)
186{
187 char *newmode;
188 assert(f != NULL);
189 assert(PyFile_Check(f));
190#ifdef MS_WINDOWS
191 /* windows ignores the passed name in order to support Unicode */
192 assert(f->f_name != NULL);
193#else
194 assert(name != NULL);
195#endif
196 assert(mode != NULL);
197 assert(f->f_fp == NULL);
198
199 /* probably need to replace 'U' by 'rb' */
200 newmode = PyMem_MALLOC(strlen(mode) + 3);
201 if (!newmode) {
202 PyErr_NoMemory();
203 return NULL;
204 }
205 strcpy(newmode, mode);
206
207 if (sanitize_the_mode(newmode)) {
208 f = NULL;
209 goto cleanup;
210 }
211
212 /* rexec.py can't stop a user from getting the file() constructor --
213 all they have to do is get *any* file object f, and then do
214 type(f). Here we prevent them from doing damage with it. */
215 if (PyEval_GetRestricted()) {
216 PyErr_SetString(PyExc_IOError,
217 "file() constructor not accessible in restricted mode");
218 f = NULL;
219 goto cleanup;
220 }
221 errno = 0;
222
223#ifdef MS_WINDOWS
224 if (PyUnicode_Check(f->f_name)) {
225 PyObject *wmode;
226 wmode = PyUnicode_DecodeASCII(newmode, strlen(newmode), NULL);
227 if (f->f_name && wmode) {
228 Py_BEGIN_ALLOW_THREADS
229 /* PyUnicode_AS_UNICODE OK without thread
230 lock as it is a simple dereference. */
231 f->f_fp = _wfopen(PyUnicode_AS_UNICODE(f->f_name),
232 PyUnicode_AS_UNICODE(wmode));
233 Py_END_ALLOW_THREADS
234 }
235 Py_XDECREF(wmode);
236 }
237#endif
238 if (NULL == f->f_fp && NULL != name) {
239 Py_BEGIN_ALLOW_THREADS
240 f->f_fp = fopen(name, newmode);
241 Py_END_ALLOW_THREADS
242 }
243
244 if (f->f_fp == NULL) {
245#if defined _MSC_VER && (_MSC_VER < 1400 || !defined(__STDC_SECURE_LIB__))
246 /* MSVC 6 (Microsoft) leaves errno at 0 for bad mode strings,
247 * across all Windows flavors. When it sets EINVAL varies
248 * across Windows flavors, the exact conditions aren't
249 * documented, and the answer lies in the OS's implementation
250 * of Win32's CreateFile function (whose source is secret).
251 * Seems the best we can do is map EINVAL to ENOENT.
252 * Starting with Visual Studio .NET 2005, EINVAL is correctly
253 * set by our CRT error handler (set in exceptions.c.)
254 */
255 if (errno == 0) /* bad mode string */
256 errno = EINVAL;
257 else if (errno == EINVAL) /* unknown, but not a mode string */
258 errno = ENOENT;
259#endif
260 if (errno == EINVAL)
261 PyErr_Format(PyExc_IOError, "invalid mode: %s",
262 mode);
263 else
264 PyErr_SetFromErrnoWithFilenameObject(PyExc_IOError, f->f_name);
265 f = NULL;
266 }
267 if (f != NULL)
268 f = dircheck(f);
269
270cleanup:
271 PyMem_FREE(newmode);
272
273 return (PyObject *)f;
274}
275
276PyObject *
277PyFile_FromFile(FILE *fp, char *name, char *mode, int (*close)(FILE *))
278{
279 PyFileObject *f = (PyFileObject *)PyFile_Type.tp_new(&PyFile_Type,
280 NULL, NULL);
281 if (f != NULL) {
282 PyObject *o_name = PyString_FromString(name);
283 if (o_name == NULL)
284 return NULL;
285 if (fill_file_fields(f, fp, o_name, mode, close) == NULL) {
286 Py_DECREF(f);
287 f = NULL;
288 }
289 Py_DECREF(o_name);
290 }
291 return (PyObject *) f;
292}
293
294PyObject *
295PyFile_FromString(char *name, char *mode)
296{
297 extern int fclose(FILE *);
298 PyFileObject *f;
299
300 f = (PyFileObject *)PyFile_FromFile((FILE *)NULL, name, mode, fclose);
301 if (f != NULL) {
302 if (open_the_file(f, name, mode) == NULL) {
303 Py_DECREF(f);
304 f = NULL;
305 }
306 }
307 return (PyObject *)f;
308}
309
310void
311PyFile_SetBufSize(PyObject *f, int bufsize)
312{
313 PyFileObject *file = (PyFileObject *)f;
314 if (bufsize >= 0) {
315 int type;
316 switch (bufsize) {
317 case 0:
318 type = _IONBF;
319 break;
320#ifdef HAVE_SETVBUF
321 case 1:
322 type = _IOLBF;
323 bufsize = BUFSIZ;
324 break;
325#endif
326 default:
327 type = _IOFBF;
328#ifndef HAVE_SETVBUF
329 bufsize = BUFSIZ;
330#endif
331 break;
332 }
333 fflush(file->f_fp);
334 if (type == _IONBF) {
335 PyMem_Free(file->f_setbuf);
336 file->f_setbuf = NULL;
337 } else {
338 file->f_setbuf = (char *)PyMem_Realloc(file->f_setbuf,
339 bufsize);
340 }
341#ifdef HAVE_SETVBUF
342 setvbuf(file->f_fp, file->f_setbuf, type, bufsize);
343#else /* !HAVE_SETVBUF */
344 setbuf(file->f_fp, file->f_setbuf);
345#endif /* !HAVE_SETVBUF */
346 }
347}
348
349/* Set the encoding used to output Unicode strings.
350 Returh 1 on success, 0 on failure. */
351
352int
353PyFile_SetEncoding(PyObject *f, const char *enc)
354{
355 PyFileObject *file = (PyFileObject*)f;
356 PyObject *str = PyString_FromString(enc);
357 if (!str)
358 return 0;
359 Py_DECREF(file->f_encoding);
360 file->f_encoding = str;
361 return 1;
362}
363
364static PyObject *
365err_closed(void)
366{
367 PyErr_SetString(PyExc_ValueError, "I/O operation on closed file");
368 return NULL;
369}
370
371/* Refuse regular file I/O if there's data in the iteration-buffer.
372 * Mixing them would cause data to arrive out of order, as the read*
373 * methods don't use the iteration buffer. */
374static PyObject *
375err_iterbuffered(void)
376{
377 PyErr_SetString(PyExc_ValueError,
378 "Mixing iteration and read methods would lose data");
379 return NULL;
380}
381
382static void drop_readahead(PyFileObject *);
383
384/* Methods */
385
386static void
387file_dealloc(PyFileObject *f)
388{
389 int sts = 0;
390 if (f->weakreflist != NULL)
391 PyObject_ClearWeakRefs((PyObject *) f);
392 if (f->f_fp != NULL && f->f_close != NULL) {
393 Py_BEGIN_ALLOW_THREADS
394 sts = (*f->f_close)(f->f_fp);
395 Py_END_ALLOW_THREADS
396 if (sts == EOF)
397#ifdef HAVE_STRERROR
398 PySys_WriteStderr("close failed: [Errno %d] %s\n", errno, strerror(errno));
399#else
400 PySys_WriteStderr("close failed: [Errno %d]\n", errno);
401#endif
402 }
403 PyMem_Free(f->f_setbuf);
404 Py_XDECREF(f->f_name);
405 Py_XDECREF(f->f_mode);
406 Py_XDECREF(f->f_encoding);
407 drop_readahead(f);
408 f->ob_type->tp_free((PyObject *)f);
409}
410
411static PyObject *
412file_repr(PyFileObject *f)
413{
414 if (PyUnicode_Check(f->f_name)) {
415#ifdef Py_USING_UNICODE
416 PyObject *ret = NULL;
417 PyObject *name = PyUnicode_AsUnicodeEscapeString(f->f_name);
418 const char *name_str = name ? PyString_AsString(name) : "?";
419 ret = PyString_FromFormat("<%s file u'%s', mode '%s' at %p>",
420 f->f_fp == NULL ? "closed" : "open",
421 name_str,
422 PyString_AsString(f->f_mode),
423 f);
424 Py_XDECREF(name);
425 return ret;
426#endif
427 } else {
428 return PyString_FromFormat("<%s file '%s', mode '%s' at %p>",
429 f->f_fp == NULL ? "closed" : "open",
430 PyString_AsString(f->f_name),
431 PyString_AsString(f->f_mode),
432 f);
433 }
434}
435
436static PyObject *
437file_close(PyFileObject *f)
438{
439 int sts = 0;
440 if (f->f_fp != NULL) {
441 if (f->f_close != NULL) {
442 Py_BEGIN_ALLOW_THREADS
443 errno = 0;
444 sts = (*f->f_close)(f->f_fp);
445 Py_END_ALLOW_THREADS
446 }
447 f->f_fp = NULL;
448 }
449 PyMem_Free(f->f_setbuf);
450 f->f_setbuf = NULL;
451 if (sts == EOF)
452 return PyErr_SetFromErrno(PyExc_IOError);
453 if (sts != 0)
454 return PyInt_FromLong((long)sts);
455 Py_INCREF(Py_None);
456 return Py_None;
457}
458
459
460/* Our very own off_t-like type, 64-bit if possible */
461#if !defined(HAVE_LARGEFILE_SUPPORT)
462typedef off_t Py_off_t;
463#elif SIZEOF_OFF_T >= 8
464typedef off_t Py_off_t;
465#elif SIZEOF_FPOS_T >= 8
466typedef fpos_t Py_off_t;
467#else
468#error "Large file support, but neither off_t nor fpos_t is large enough."
469#endif
470
471
472/* a portable fseek() function
473 return 0 on success, non-zero on failure (with errno set) */
474static int
475_portable_fseek(FILE *fp, Py_off_t offset, int whence)
476{
477#if !defined(HAVE_LARGEFILE_SUPPORT)
478 return fseek(fp, offset, whence);
479#elif defined(HAVE_FSEEKO) && SIZEOF_OFF_T >= 8
480 return fseeko(fp, offset, whence);
481#elif defined(HAVE_FSEEK64)
482 return fseek64(fp, offset, whence);
483#elif defined(__BEOS__)
484 return _fseek(fp, offset, whence);
485#elif SIZEOF_FPOS_T >= 8
486 /* lacking a 64-bit capable fseek(), use a 64-bit capable fsetpos()
487 and fgetpos() to implement fseek()*/
488 fpos_t pos;
489 switch (whence) {
490 case SEEK_END:
491#ifdef MS_WINDOWS
492 fflush(fp);
493 if (_lseeki64(fileno(fp), 0, 2) == -1)
494 return -1;
495#else
496 if (fseek(fp, 0, SEEK_END) != 0)
497 return -1;
498#endif
499 /* fall through */
500 case SEEK_CUR:
501 if (fgetpos(fp, &pos) != 0)
502 return -1;
503 offset += pos;
504 break;
505 /* case SEEK_SET: break; */
506 }
507 return fsetpos(fp, &offset);
508#else
509#error "Large file support, but no way to fseek."
510#endif
511}
512
513
514/* a portable ftell() function
515 Return -1 on failure with errno set appropriately, current file
516 position on success */
517static Py_off_t
518_portable_ftell(FILE* fp)
519{
520#if !defined(HAVE_LARGEFILE_SUPPORT)
521 return ftell(fp);
522#elif defined(HAVE_FTELLO) && SIZEOF_OFF_T >= 8
523 return ftello(fp);
524#elif defined(HAVE_FTELL64)
525 return ftell64(fp);
526#elif SIZEOF_FPOS_T >= 8
527 fpos_t pos;
528 if (fgetpos(fp, &pos) != 0)
529 return -1;
530 return pos;
531#else
532#error "Large file support, but no way to ftell."
533#endif
534}
535
536
537static PyObject *
538file_seek(PyFileObject *f, PyObject *args)
539{
540 int whence;
541 int ret;
542 Py_off_t offset;
543 PyObject *offobj;
544
545 if (f->f_fp == NULL)
546 return err_closed();
547 drop_readahead(f);
548 whence = 0;
549 if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence))
550 return NULL;
551#if !defined(HAVE_LARGEFILE_SUPPORT)
552 offset = PyInt_AsLong(offobj);
553#else
554 offset = PyLong_Check(offobj) ?
555 PyLong_AsLongLong(offobj) : PyInt_AsLong(offobj);
556#endif
557 if (PyErr_Occurred())
558 return NULL;
559
560 Py_BEGIN_ALLOW_THREADS
561 errno = 0;
562 ret = _portable_fseek(f->f_fp, offset, whence);
563 Py_END_ALLOW_THREADS
564
565 if (ret != 0) {
566 PyErr_SetFromErrno(PyExc_IOError);
567 clearerr(f->f_fp);
568 return NULL;
569 }
570 f->f_skipnextlf = 0;
571 Py_INCREF(Py_None);
572 return Py_None;
573}
574
575
576#ifdef HAVE_FTRUNCATE
577static PyObject *
578file_truncate(PyFileObject *f, PyObject *args)
579{
580 Py_off_t newsize;
581 PyObject *newsizeobj = NULL;
582 Py_off_t initialpos;
583 int ret;
584
585 if (f->f_fp == NULL)
586 return err_closed();
587 if (!PyArg_UnpackTuple(args, "truncate", 0, 1, &newsizeobj))
588 return NULL;
589
590 /* Get current file position. If the file happens to be open for
591 * update and the last operation was an input operation, C doesn't
592 * define what the later fflush() will do, but we promise truncate()
593 * won't change the current position (and fflush() *does* change it
594 * then at least on Windows). The easiest thing is to capture
595 * current pos now and seek back to it at the end.
596 */
597 Py_BEGIN_ALLOW_THREADS
598 errno = 0;
599 initialpos = _portable_ftell(f->f_fp);
600 Py_END_ALLOW_THREADS
601 if (initialpos == -1)
602 goto onioerror;
603
604 /* Set newsize to current postion if newsizeobj NULL, else to the
605 * specified value.
606 */
607 if (newsizeobj != NULL) {
608#if !defined(HAVE_LARGEFILE_SUPPORT)
609 newsize = PyInt_AsLong(newsizeobj);
610#else
611 newsize = PyLong_Check(newsizeobj) ?
612 PyLong_AsLongLong(newsizeobj) :
613 PyInt_AsLong(newsizeobj);
614#endif
615 if (PyErr_Occurred())
616 return NULL;
617 }
618 else /* default to current position */
619 newsize = initialpos;
620
621 /* Flush the stream. We're mixing stream-level I/O with lower-level
622 * I/O, and a flush may be necessary to synch both platform views
623 * of the current file state.
624 */
625 Py_BEGIN_ALLOW_THREADS
626 errno = 0;
627 ret = fflush(f->f_fp);
628 Py_END_ALLOW_THREADS
629 if (ret != 0)
630 goto onioerror;
631
632#ifdef MS_WINDOWS
633 /* MS _chsize doesn't work if newsize doesn't fit in 32 bits,
634 so don't even try using it. */
635 {
636 HANDLE hFile;
637
638 /* Have to move current pos to desired endpoint on Windows. */
639 Py_BEGIN_ALLOW_THREADS
640 errno = 0;
641 ret = _portable_fseek(f->f_fp, newsize, SEEK_SET) != 0;
642 Py_END_ALLOW_THREADS
643 if (ret)
644 goto onioerror;
645
646 /* Truncate. Note that this may grow the file! */
647 Py_BEGIN_ALLOW_THREADS
648 errno = 0;
649 hFile = (HANDLE)_get_osfhandle(fileno(f->f_fp));
650 ret = hFile == (HANDLE)-1;
651 if (ret == 0) {
652 ret = SetEndOfFile(hFile) == 0;
653 if (ret)
654 errno = EACCES;
655 }
656 Py_END_ALLOW_THREADS
657 if (ret)
658 goto onioerror;
659 }
660#else
661 Py_BEGIN_ALLOW_THREADS
662 errno = 0;
663 ret = ftruncate(fileno(f->f_fp), newsize);
664 Py_END_ALLOW_THREADS
665 if (ret != 0)
666 goto onioerror;
667#endif /* !MS_WINDOWS */
668
669 /* Restore original file position. */
670 Py_BEGIN_ALLOW_THREADS
671 errno = 0;
672 ret = _portable_fseek(f->f_fp, initialpos, SEEK_SET) != 0;
673 Py_END_ALLOW_THREADS
674 if (ret)
675 goto onioerror;
676
677 Py_INCREF(Py_None);
678 return Py_None;
679
680onioerror:
681 PyErr_SetFromErrno(PyExc_IOError);
682 clearerr(f->f_fp);
683 return NULL;
684}
685#endif /* HAVE_FTRUNCATE */
686
687static PyObject *
688file_tell(PyFileObject *f)
689{
690 Py_off_t pos;
691
692 if (f->f_fp == NULL)
693 return err_closed();
694 Py_BEGIN_ALLOW_THREADS
695 errno = 0;
696 pos = _portable_ftell(f->f_fp);
697 Py_END_ALLOW_THREADS
698 if (pos == -1) {
699 PyErr_SetFromErrno(PyExc_IOError);
700 clearerr(f->f_fp);
701 return NULL;
702 }
703 if (f->f_skipnextlf) {
704 int c;
705 c = GETC(f->f_fp);
706 if (c == '\n') {
707 pos++;
708 f->f_skipnextlf = 0;
709 } else if (c != EOF) ungetc(c, f->f_fp);
710 }
711#if !defined(HAVE_LARGEFILE_SUPPORT)
712 return PyInt_FromLong(pos);
713#else
714 return PyLong_FromLongLong(pos);
715#endif
716}
717
718static PyObject *
719file_fileno(PyFileObject *f)
720{
721 if (f->f_fp == NULL)
722 return err_closed();
723 return PyInt_FromLong((long) fileno(f->f_fp));
724}
725
726static PyObject *
727file_flush(PyFileObject *f)
728{
729 int res;
730
731 if (f->f_fp == NULL)
732 return err_closed();
733 Py_BEGIN_ALLOW_THREADS
734 errno = 0;
735 res = fflush(f->f_fp);
736 Py_END_ALLOW_THREADS
737 if (res != 0) {
738 PyErr_SetFromErrno(PyExc_IOError);
739 clearerr(f->f_fp);
740 return NULL;
741 }
742 Py_INCREF(Py_None);
743 return Py_None;
744}
745
746static PyObject *
747file_isatty(PyFileObject *f)
748{
749 long res;
750 if (f->f_fp == NULL)
751 return err_closed();
752 Py_BEGIN_ALLOW_THREADS
753 res = isatty((int)fileno(f->f_fp));
754 Py_END_ALLOW_THREADS
755 return PyBool_FromLong(res);
756}
757
758
759#if BUFSIZ < 8192
760#define SMALLCHUNK 8192
761#else
762#define SMALLCHUNK BUFSIZ
763#endif
764
765#if SIZEOF_INT < 4
766#define BIGCHUNK (512 * 32)
767#else
768#define BIGCHUNK (512 * 1024)
769#endif
770
771static size_t
772new_buffersize(PyFileObject *f, size_t currentsize)
773{
774#ifdef HAVE_FSTAT
775 off_t pos, end;
776 struct stat st;
777 if (fstat(fileno(f->f_fp), &st) == 0) {
778 end = st.st_size;
779 /* The following is not a bug: we really need to call lseek()
780 *and* ftell(). The reason is that some stdio libraries
781 mistakenly flush their buffer when ftell() is called and
782 the lseek() call it makes fails, thereby throwing away
783 data that cannot be recovered in any way. To avoid this,
784 we first test lseek(), and only call ftell() if lseek()
785 works. We can't use the lseek() value either, because we
786 need to take the amount of buffered data into account.
787 (Yet another reason why stdio stinks. :-) */
788 pos = lseek(fileno(f->f_fp), 0L, SEEK_CUR);
789 if (pos >= 0) {
790 pos = ftell(f->f_fp);
791 }
792 if (pos < 0)
793 clearerr(f->f_fp);
794 if (end > pos && pos >= 0)
795 return currentsize + end - pos + 1;
796 /* Add 1 so if the file were to grow we'd notice. */
797 }
798#endif
799 if (currentsize > SMALLCHUNK) {
800 /* Keep doubling until we reach BIGCHUNK;
801 then keep adding BIGCHUNK. */
802 if (currentsize <= BIGCHUNK)
803 return currentsize + currentsize;
804 else
805 return currentsize + BIGCHUNK;
806 }
807 return currentsize + SMALLCHUNK;
808}
809
810#if defined(EWOULDBLOCK) && defined(EAGAIN) && EWOULDBLOCK != EAGAIN
811#define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK || (x) == EAGAIN)
812#else
813#ifdef EWOULDBLOCK
814#define BLOCKED_ERRNO(x) ((x) == EWOULDBLOCK)
815#else
816#ifdef EAGAIN
817#define BLOCKED_ERRNO(x) ((x) == EAGAIN)
818#else
819#define BLOCKED_ERRNO(x) 0
820#endif
821#endif
822#endif
823
824static PyObject *
825file_read(PyFileObject *f, PyObject *args)
826{
827 long bytesrequested = -1;
828 size_t bytesread, buffersize, chunksize;
829 PyObject *v;
830
831 if (f->f_fp == NULL)
832 return err_closed();
833 /* refuse to mix with f.next() */
834 if (f->f_buf != NULL &&
835 (f->f_bufend - f->f_bufptr) > 0 &&
836 f->f_buf[0] != '\0')
837 return err_iterbuffered();
838 if (!PyArg_ParseTuple(args, "|l:read", &bytesrequested))
839 return NULL;
840 if (bytesrequested < 0)
841 buffersize = new_buffersize(f, (size_t)0);
842 else
843 buffersize = bytesrequested;
844 if (buffersize > PY_SSIZE_T_MAX) {
845 PyErr_SetString(PyExc_OverflowError,
846 "requested number of bytes is more than a Python string can hold");
847 return NULL;
848 }
849 v = PyString_FromStringAndSize((char *)NULL, buffersize);
850 if (v == NULL)
851 return NULL;
852 bytesread = 0;
853 for (;;) {
854 Py_BEGIN_ALLOW_THREADS
855 errno = 0;
856 chunksize = Py_UniversalNewlineFread(BUF(v) + bytesread,
857 buffersize - bytesread, f->f_fp, (PyObject *)f);
858 Py_END_ALLOW_THREADS
859 if (chunksize == 0) {
860 if (!ferror(f->f_fp))
861 break;
862 clearerr(f->f_fp);
863 /* When in non-blocking mode, data shouldn't
864 * be discarded if a blocking signal was
865 * received. That will also happen if
866 * chunksize != 0, but bytesread < buffersize. */
867 if (bytesread > 0 && BLOCKED_ERRNO(errno))
868 break;
869 PyErr_SetFromErrno(PyExc_IOError);
870 Py_DECREF(v);
871 return NULL;
872 }
873 bytesread += chunksize;
874 if (bytesread < buffersize) {
875 clearerr(f->f_fp);
876 break;
877 }
878 if (bytesrequested < 0) {
879 buffersize = new_buffersize(f, buffersize);
880 if (_PyString_Resize(&v, buffersize) < 0)
881 return NULL;
882 } else {
883 /* Got what was requested. */
884 break;
885 }
886 }
887 if (bytesread != buffersize)
888 _PyString_Resize(&v, bytesread);
889 return v;
890}
891
892static PyObject *
893file_readinto(PyFileObject *f, PyObject *args)
894{
895 char *ptr;
896 Py_ssize_t ntodo;
897 Py_ssize_t ndone, nnow;
898
899 if (f->f_fp == NULL)
900 return err_closed();
901 /* refuse to mix with f.next() */
902 if (f->f_buf != NULL &&
903 (f->f_bufend - f->f_bufptr) > 0 &&
904 f->f_buf[0] != '\0')
905 return err_iterbuffered();
906 if (!PyArg_ParseTuple(args, "w#", &ptr, &ntodo))
907 return NULL;
908 ndone = 0;
909 while (ntodo > 0) {
910 Py_BEGIN_ALLOW_THREADS
911 errno = 0;
912 nnow = Py_UniversalNewlineFread(ptr+ndone, ntodo, f->f_fp,
913 (PyObject *)f);
914 Py_END_ALLOW_THREADS
915 if (nnow == 0) {
916 if (!ferror(f->f_fp))
917 break;
918 PyErr_SetFromErrno(PyExc_IOError);
919 clearerr(f->f_fp);
920 return NULL;
921 }
922 ndone += nnow;
923 ntodo -= nnow;
924 }
925 return PyInt_FromSsize_t(ndone);
926}
927
928/**************************************************************************
929Routine to get next line using platform fgets().
930
931Under MSVC 6:
932
933+ MS threadsafe getc is very slow (multiple layers of function calls before+
934 after each character, to lock+unlock the stream).
935+ The stream-locking functions are MS-internal -- can't access them from user
936 code.
937+ There's nothing Tim could find in the MS C or platform SDK libraries that
938 can worm around this.
939+ MS fgets locks/unlocks only once per line; it's the only hook we have.
940
941So we use fgets for speed(!), despite that it's painful.
942
943MS realloc is also slow.
944
945Reports from other platforms on this method vs getc_unlocked (which MS doesn't
946have):
947 Linux a wash
948 Solaris a wash
949 Tru64 Unix getline_via_fgets significantly faster
950
951CAUTION: The C std isn't clear about this: in those cases where fgets
952writes something into the buffer, can it write into any position beyond the
953required trailing null byte? MSVC 6 fgets does not, and no platform is (yet)
954known on which it does; and it would be a strange way to code fgets. Still,
955getline_via_fgets may not work correctly if it does. The std test
956test_bufio.py should fail if platform fgets() routinely writes beyond the
957trailing null byte. #define DONT_USE_FGETS_IN_GETLINE to disable this code.
958**************************************************************************/
959
960/* Use this routine if told to, or by default on non-get_unlocked()
961 * platforms unless told not to. Yikes! Let's spell that out:
962 * On a platform with getc_unlocked():
963 * By default, use getc_unlocked().
964 * If you want to use fgets() instead, #define USE_FGETS_IN_GETLINE.
965 * On a platform without getc_unlocked():
966 * By default, use fgets().
967 * If you don't want to use fgets(), #define DONT_USE_FGETS_IN_GETLINE.
968 */
969#if !defined(USE_FGETS_IN_GETLINE) && !defined(HAVE_GETC_UNLOCKED)
970#define USE_FGETS_IN_GETLINE
971#endif
972
973#if defined(DONT_USE_FGETS_IN_GETLINE) && defined(USE_FGETS_IN_GETLINE)
974#undef USE_FGETS_IN_GETLINE
975#endif
976
977#ifdef USE_FGETS_IN_GETLINE
978static PyObject*
979getline_via_fgets(FILE *fp)
980{
981/* INITBUFSIZE is the maximum line length that lets us get away with the fast
982 * no-realloc, one-fgets()-call path. Boosting it isn't free, because we have
983 * to fill this much of the buffer with a known value in order to figure out
984 * how much of the buffer fgets() overwrites. So if INITBUFSIZE is larger
985 * than "most" lines, we waste time filling unused buffer slots. 100 is
986 * surely adequate for most peoples' email archives, chewing over source code,
987 * etc -- "regular old text files".
988 * MAXBUFSIZE is the maximum line length that lets us get away with the less
989 * fast (but still zippy) no-realloc, two-fgets()-call path. See above for
990 * cautions about boosting that. 300 was chosen because the worst real-life
991 * text-crunching job reported on Python-Dev was a mail-log crawler where over
992 * half the lines were 254 chars.
993 */
994#define INITBUFSIZE 100
995#define MAXBUFSIZE 300
996 char* p; /* temp */
997 char buf[MAXBUFSIZE];
998 PyObject* v; /* the string object result */
999 char* pvfree; /* address of next free slot */
1000 char* pvend; /* address one beyond last free slot */
1001 size_t nfree; /* # of free buffer slots; pvend-pvfree */
1002 size_t total_v_size; /* total # of slots in buffer */
1003 size_t increment; /* amount to increment the buffer */
1004
1005 /* Optimize for normal case: avoid _PyString_Resize if at all
1006 * possible via first reading into stack buffer "buf".
1007 */
1008 total_v_size = INITBUFSIZE; /* start small and pray */
1009 pvfree = buf;
1010 for (;;) {
1011 Py_BEGIN_ALLOW_THREADS
1012 pvend = buf + total_v_size;
1013 nfree = pvend - pvfree;
1014 memset(pvfree, '\n', nfree);
1015 assert(nfree < INT_MAX); /* Should be atmost MAXBUFSIZE */
1016 p = fgets(pvfree, (int)nfree, fp);
1017 Py_END_ALLOW_THREADS
1018
1019 if (p == NULL) {
1020 clearerr(fp);
1021 if (PyErr_CheckSignals())
1022 return NULL;
1023 v = PyString_FromStringAndSize(buf, pvfree - buf);
1024 return v;
1025 }
1026 /* fgets read *something* */
1027 p = memchr(pvfree, '\n', nfree);
1028 if (p != NULL) {
1029 /* Did the \n come from fgets or from us?
1030 * Since fgets stops at the first \n, and then writes
1031 * \0, if it's from fgets a \0 must be next. But if
1032 * that's so, it could not have come from us, since
1033 * the \n's we filled the buffer with have only more
1034 * \n's to the right.
1035 */
1036 if (p+1 < pvend && *(p+1) == '\0') {
1037 /* It's from fgets: we win! In particular,
1038 * we haven't done any mallocs yet, and can
1039 * build the final result on the first try.
1040 */
1041 ++p; /* include \n from fgets */
1042 }
1043 else {
1044 /* Must be from us: fgets didn't fill the
1045 * buffer and didn't find a newline, so it
1046 * must be the last and newline-free line of
1047 * the file.
1048 */
1049 assert(p > pvfree && *(p-1) == '\0');
1050 --p; /* don't include \0 from fgets */
1051 }
1052 v = PyString_FromStringAndSize(buf, p - buf);
1053 return v;
1054 }
1055 /* yuck: fgets overwrote all the newlines, i.e. the entire
1056 * buffer. So this line isn't over yet, or maybe it is but
1057 * we're exactly at EOF. If we haven't already, try using the
1058 * rest of the stack buffer.
1059 */
1060 assert(*(pvend-1) == '\0');
1061 if (pvfree == buf) {
1062 pvfree = pvend - 1; /* overwrite trailing null */
1063 total_v_size = MAXBUFSIZE;
1064 }
1065 else
1066 break;
1067 }
1068
1069 /* The stack buffer isn't big enough; malloc a string object and read
1070 * into its buffer.
1071 */
1072 total_v_size = MAXBUFSIZE << 1;
1073 v = PyString_FromStringAndSize((char*)NULL, (int)total_v_size);
1074 if (v == NULL)
1075 return v;
1076 /* copy over everything except the last null byte */
1077 memcpy(BUF(v), buf, MAXBUFSIZE-1);
1078 pvfree = BUF(v) + MAXBUFSIZE - 1;
1079
1080 /* Keep reading stuff into v; if it ever ends successfully, break
1081 * after setting p one beyond the end of the line. The code here is
1082 * very much like the code above, except reads into v's buffer; see
1083 * the code above for detailed comments about the logic.
1084 */
1085 for (;;) {
1086 Py_BEGIN_ALLOW_THREADS
1087 pvend = BUF(v) + total_v_size;
1088 nfree = pvend - pvfree;
1089 memset(pvfree, '\n', nfree);
1090 assert(nfree < INT_MAX);
1091 p = fgets(pvfree, (int)nfree, fp);
1092 Py_END_ALLOW_THREADS
1093
1094 if (p == NULL) {
1095 clearerr(fp);
1096 if (PyErr_CheckSignals()) {
1097 Py_DECREF(v);
1098 return NULL;
1099 }
1100 p = pvfree;
1101 break;
1102 }
1103 p = memchr(pvfree, '\n', nfree);
1104 if (p != NULL) {
1105 if (p+1 < pvend && *(p+1) == '\0') {
1106 /* \n came from fgets */
1107 ++p;
1108 break;
1109 }
1110 /* \n came from us; last line of file, no newline */
1111 assert(p > pvfree && *(p-1) == '\0');
1112 --p;
1113 break;
1114 }
1115 /* expand buffer and try again */
1116 assert(*(pvend-1) == '\0');
1117 increment = total_v_size >> 2; /* mild exponential growth */
1118 total_v_size += increment;
1119 if (total_v_size > PY_SSIZE_T_MAX) {
1120 PyErr_SetString(PyExc_OverflowError,
1121 "line is longer than a Python string can hold");
1122 Py_DECREF(v);
1123 return NULL;
1124 }
1125 if (_PyString_Resize(&v, (int)total_v_size) < 0)
1126 return NULL;
1127 /* overwrite the trailing null byte */
1128 pvfree = BUF(v) + (total_v_size - increment - 1);
1129 }
1130 if (BUF(v) + total_v_size != p)
1131 _PyString_Resize(&v, p - BUF(v));
1132 return v;
1133#undef INITBUFSIZE
1134#undef MAXBUFSIZE
1135}
1136#endif /* ifdef USE_FGETS_IN_GETLINE */
1137
1138/* Internal routine to get a line.
1139 Size argument interpretation:
1140 > 0: max length;
1141 <= 0: read arbitrary line
1142*/
1143
1144static PyObject *
1145get_line(PyFileObject *f, int n)
1146{
1147 FILE *fp = f->f_fp;
1148 int c;
1149 char *buf, *end;
1150 size_t total_v_size; /* total # of slots in buffer */
1151 size_t used_v_size; /* # used slots in buffer */
1152 size_t increment; /* amount to increment the buffer */
1153 PyObject *v;
1154 int newlinetypes = f->f_newlinetypes;
1155 int skipnextlf = f->f_skipnextlf;
1156 int univ_newline = f->f_univ_newline;
1157
1158#if defined(USE_FGETS_IN_GETLINE)
1159 if (n <= 0 && !univ_newline )
1160 return getline_via_fgets(fp);
1161#endif
1162 total_v_size = n > 0 ? n : 100;
1163 v = PyString_FromStringAndSize((char *)NULL, total_v_size);
1164 if (v == NULL)
1165 return NULL;
1166 buf = BUF(v);
1167 end = buf + total_v_size;
1168
1169 for (;;) {
1170 Py_BEGIN_ALLOW_THREADS
1171 FLOCKFILE(fp);
1172 if (univ_newline) {
1173 c = 'x'; /* Shut up gcc warning */
1174 while ( buf != end && (c = GETC(fp)) != EOF ) {
1175 if (skipnextlf ) {
1176 skipnextlf = 0;
1177 if (c == '\n') {
1178 /* Seeing a \n here with
1179 * skipnextlf true means we
1180 * saw a \r before.
1181 */
1182 newlinetypes |= NEWLINE_CRLF;
1183 c = GETC(fp);
1184 if (c == EOF) break;
1185 } else {
1186 newlinetypes |= NEWLINE_CR;
1187 }
1188 }
1189 if (c == '\r') {
1190 skipnextlf = 1;
1191 c = '\n';
1192 } else if ( c == '\n')
1193 newlinetypes |= NEWLINE_LF;
1194 *buf++ = c;
1195 if (c == '\n') break;
1196 }
1197 if ( c == EOF && skipnextlf )
1198 newlinetypes |= NEWLINE_CR;
1199 } else /* If not universal newlines use the normal loop */
1200 while ((c = GETC(fp)) != EOF &&
1201 (*buf++ = c) != '\n' &&
1202 buf != end)
1203 ;
1204 FUNLOCKFILE(fp);
1205 Py_END_ALLOW_THREADS
1206 f->f_newlinetypes = newlinetypes;
1207 f->f_skipnextlf = skipnextlf;
1208 if (c == '\n')
1209 break;
1210 if (c == EOF) {
1211 if (ferror(fp)) {
1212 PyErr_SetFromErrno(PyExc_IOError);
1213 clearerr(fp);
1214 Py_DECREF(v);
1215 return NULL;
1216 }
1217 clearerr(fp);
1218 if (PyErr_CheckSignals()) {
1219 Py_DECREF(v);
1220 return NULL;
1221 }
1222 break;
1223 }
1224 /* Must be because buf == end */
1225 if (n > 0)
1226 break;
1227 used_v_size = total_v_size;
1228 increment = total_v_size >> 2; /* mild exponential growth */
1229 total_v_size += increment;
1230 if (total_v_size > PY_SSIZE_T_MAX) {
1231 PyErr_SetString(PyExc_OverflowError,
1232 "line is longer than a Python string can hold");
1233 Py_DECREF(v);
1234 return NULL;
1235 }
1236 if (_PyString_Resize(&v, total_v_size) < 0)
1237 return NULL;
1238 buf = BUF(v) + used_v_size;
1239 end = BUF(v) + total_v_size;
1240 }
1241
1242 used_v_size = buf - BUF(v);
1243 if (used_v_size != total_v_size)
1244 _PyString_Resize(&v, used_v_size);
1245 return v;
1246}
1247
1248/* External C interface */
1249
1250PyObject *
1251PyFile_GetLine(PyObject *f, int n)
1252{
1253 PyObject *result;
1254
1255 if (f == NULL) {
1256 PyErr_BadInternalCall();
1257 return NULL;
1258 }
1259
1260 if (PyFile_Check(f)) {
1261 PyFileObject *fo = (PyFileObject *)f;
1262 if (fo->f_fp == NULL)
1263 return err_closed();
1264 /* refuse to mix with f.next() */
1265 if (fo->f_buf != NULL &&
1266 (fo->f_bufend - fo->f_bufptr) > 0 &&
1267 fo->f_buf[0] != '\0')
1268 return err_iterbuffered();
1269 result = get_line(fo, n);
1270 }
1271 else {
1272 PyObject *reader;
1273 PyObject *args;
1274
1275 reader = PyObject_GetAttrString(f, "readline");
1276 if (reader == NULL)
1277 return NULL;
1278 if (n <= 0)
1279 args = PyTuple_New(0);
1280 else
1281 args = Py_BuildValue("(i)", n);
1282 if (args == NULL) {
1283 Py_DECREF(reader);
1284 return NULL;
1285 }
1286 result = PyEval_CallObject(reader, args);
1287 Py_DECREF(reader);
1288 Py_DECREF(args);
1289 if (result != NULL && !PyString_Check(result) &&
1290 !PyUnicode_Check(result)) {
1291 Py_DECREF(result);
1292 result = NULL;
1293 PyErr_SetString(PyExc_TypeError,
1294 "object.readline() returned non-string");
1295 }
1296 }
1297
1298 if (n < 0 && result != NULL && PyString_Check(result)) {
1299 char *s = PyString_AS_STRING(result);
1300 Py_ssize_t len = PyString_GET_SIZE(result);
1301 if (len == 0) {
1302 Py_DECREF(result);
1303 result = NULL;
1304 PyErr_SetString(PyExc_EOFError,
1305 "EOF when reading a line");
1306 }
1307 else if (s[len-1] == '\n') {
1308 if (result->ob_refcnt == 1)
1309 _PyString_Resize(&result, len-1);
1310 else {
1311 PyObject *v;
1312 v = PyString_FromStringAndSize(s, len-1);
1313 Py_DECREF(result);
1314 result = v;
1315 }
1316 }
1317 }
1318#ifdef Py_USING_UNICODE
1319 if (n < 0 && result != NULL && PyUnicode_Check(result)) {
1320 Py_UNICODE *s = PyUnicode_AS_UNICODE(result);
1321 Py_ssize_t len = PyUnicode_GET_SIZE(result);
1322 if (len == 0) {
1323 Py_DECREF(result);
1324 result = NULL;
1325 PyErr_SetString(PyExc_EOFError,
1326 "EOF when reading a line");
1327 }
1328 else if (s[len-1] == '\n') {
1329 if (result->ob_refcnt == 1)
1330 PyUnicode_Resize(&result, len-1);
1331 else {
1332 PyObject *v;
1333 v = PyUnicode_FromUnicode(s, len-1);
1334 Py_DECREF(result);
1335 result = v;
1336 }
1337 }
1338 }
1339#endif
1340 return result;
1341}
1342
1343/* Python method */
1344
1345static PyObject *
1346file_readline(PyFileObject *f, PyObject *args)
1347{
1348 int n = -1;
1349
1350 if (f->f_fp == NULL)
1351 return err_closed();
1352 /* refuse to mix with f.next() */
1353 if (f->f_buf != NULL &&
1354 (f->f_bufend - f->f_bufptr) > 0 &&
1355 f->f_buf[0] != '\0')
1356 return err_iterbuffered();
1357 if (!PyArg_ParseTuple(args, "|i:readline", &n))
1358 return NULL;
1359 if (n == 0)
1360 return PyString_FromString("");
1361 if (n < 0)
1362 n = 0;
1363 return get_line(f, n);
1364}
1365
1366static PyObject *
1367file_readlines(PyFileObject *f, PyObject *args)
1368{
1369 long sizehint = 0;
1370 PyObject *list;
1371 PyObject *line;
1372 char small_buffer[SMALLCHUNK];
1373 char *buffer = small_buffer;
1374 size_t buffersize = SMALLCHUNK;
1375 PyObject *big_buffer = NULL;
1376 size_t nfilled = 0;
1377 size_t nread;
1378 size_t totalread = 0;
1379 char *p, *q, *end;
1380 int err;
1381 int shortread = 0;
1382
1383 if (f->f_fp == NULL)
1384 return err_closed();
1385 /* refuse to mix with f.next() */
1386 if (f->f_buf != NULL &&
1387 (f->f_bufend - f->f_bufptr) > 0 &&
1388 f->f_buf[0] != '\0')
1389 return err_iterbuffered();
1390 if (!PyArg_ParseTuple(args, "|l:readlines", &sizehint))
1391 return NULL;
1392 if ((list = PyList_New(0)) == NULL)
1393 return NULL;
1394 for (;;) {
1395 if (shortread)
1396 nread = 0;
1397 else {
1398 Py_BEGIN_ALLOW_THREADS
1399 errno = 0;
1400 nread = Py_UniversalNewlineFread(buffer+nfilled,
1401 buffersize-nfilled, f->f_fp, (PyObject *)f);
1402 Py_END_ALLOW_THREADS
1403 shortread = (nread < buffersize-nfilled);
1404 }
1405 if (nread == 0) {
1406 sizehint = 0;
1407 if (!ferror(f->f_fp))
1408 break;
1409 PyErr_SetFromErrno(PyExc_IOError);
1410 clearerr(f->f_fp);
1411 error:
1412 Py_DECREF(list);
1413 list = NULL;
1414 goto cleanup;
1415 }
1416 totalread += nread;
1417 p = (char *)memchr(buffer+nfilled, '\n', nread);
1418 if (p == NULL) {
1419 /* Need a larger buffer to fit this line */
1420 nfilled += nread;
1421 buffersize *= 2;
1422 if (buffersize > PY_SSIZE_T_MAX) {
1423 PyErr_SetString(PyExc_OverflowError,
1424 "line is longer than a Python string can hold");
1425 goto error;
1426 }
1427 if (big_buffer == NULL) {
1428 /* Create the big buffer */
1429 big_buffer = PyString_FromStringAndSize(
1430 NULL, buffersize);
1431 if (big_buffer == NULL)
1432 goto error;
1433 buffer = PyString_AS_STRING(big_buffer);
1434 memcpy(buffer, small_buffer, nfilled);
1435 }
1436 else {
1437 /* Grow the big buffer */
1438 if ( _PyString_Resize(&big_buffer, buffersize) < 0 )
1439 goto error;
1440 buffer = PyString_AS_STRING(big_buffer);
1441 }
1442 continue;
1443 }
1444 end = buffer+nfilled+nread;
1445 q = buffer;
1446 do {
1447 /* Process complete lines */
1448 p++;
1449 line = PyString_FromStringAndSize(q, p-q);
1450 if (line == NULL)
1451 goto error;
1452 err = PyList_Append(list, line);
1453 Py_DECREF(line);
1454 if (err != 0)
1455 goto error;
1456 q = p;
1457 p = (char *)memchr(q, '\n', end-q);
1458 } while (p != NULL);
1459 /* Move the remaining incomplete line to the start */
1460 nfilled = end-q;
1461 memmove(buffer, q, nfilled);
1462 if (sizehint > 0)
1463 if (totalread >= (size_t)sizehint)
1464 break;
1465 }
1466 if (nfilled != 0) {
1467 /* Partial last line */
1468 line = PyString_FromStringAndSize(buffer, nfilled);
1469 if (line == NULL)
1470 goto error;
1471 if (sizehint > 0) {
1472 /* Need to complete the last line */
1473 PyObject *rest = get_line(f, 0);
1474 if (rest == NULL) {
1475 Py_DECREF(line);
1476 goto error;
1477 }
1478 PyString_Concat(&line, rest);
1479 Py_DECREF(rest);
1480 if (line == NULL)
1481 goto error;
1482 }
1483 err = PyList_Append(list, line);
1484 Py_DECREF(line);
1485 if (err != 0)
1486 goto error;
1487 }
1488 cleanup:
1489 Py_XDECREF(big_buffer);
1490 return list;
1491}
1492
1493static PyObject *
1494file_write(PyFileObject *f, PyObject *args)
1495{
1496 char *s;
1497 Py_ssize_t n, n2;
1498 if (f->f_fp == NULL)
1499 return err_closed();
1500 if (!PyArg_ParseTuple(args, f->f_binary ? "s#" : "t#", &s, &n))
1501 return NULL;
1502 f->f_softspace = 0;
1503 Py_BEGIN_ALLOW_THREADS
1504 errno = 0;
1505 n2 = fwrite(s, 1, n, f->f_fp);
1506 Py_END_ALLOW_THREADS
1507 if (n2 != n) {
1508 PyErr_SetFromErrno(PyExc_IOError);
1509 clearerr(f->f_fp);
1510 return NULL;
1511 }
1512 Py_INCREF(Py_None);
1513 return Py_None;
1514}
1515
1516static PyObject *
1517file_writelines(PyFileObject *f, PyObject *seq)
1518{
1519#define CHUNKSIZE 1000
1520 PyObject *list, *line;
1521 PyObject *it; /* iter(seq) */
1522 PyObject *result;
1523 int index, islist;
1524 Py_ssize_t i, j, nwritten, len;
1525
1526 assert(seq != NULL);
1527 if (f->f_fp == NULL)
1528 return err_closed();
1529
1530 result = NULL;
1531 list = NULL;
1532 islist = PyList_Check(seq);
1533 if (islist)
1534 it = NULL;
1535 else {
1536 it = PyObject_GetIter(seq);
1537 if (it == NULL) {
1538 PyErr_SetString(PyExc_TypeError,
1539 "writelines() requires an iterable argument");
1540 return NULL;
1541 }
1542 /* From here on, fail by going to error, to reclaim "it". */
1543 list = PyList_New(CHUNKSIZE);
1544 if (list == NULL)
1545 goto error;
1546 }
1547
1548 /* Strategy: slurp CHUNKSIZE lines into a private list,
1549 checking that they are all strings, then write that list
1550 without holding the interpreter lock, then come back for more. */
1551 for (index = 0; ; index += CHUNKSIZE) {
1552 if (islist) {
1553 Py_XDECREF(list);
1554 list = PyList_GetSlice(seq, index, index+CHUNKSIZE);
1555 if (list == NULL)
1556 goto error;
1557 j = PyList_GET_SIZE(list);
1558 }
1559 else {
1560 for (j = 0; j < CHUNKSIZE; j++) {
1561 line = PyIter_Next(it);
1562 if (line == NULL) {
1563 if (PyErr_Occurred())
1564 goto error;
1565 break;
1566 }
1567 PyList_SetItem(list, j, line);
1568 }
1569 }
1570 if (j == 0)
1571 break;
1572
1573 /* Check that all entries are indeed strings. If not,
1574 apply the same rules as for file.write() and
1575 convert the results to strings. This is slow, but
1576 seems to be the only way since all conversion APIs
1577 could potentially execute Python code. */
1578 for (i = 0; i < j; i++) {
1579 PyObject *v = PyList_GET_ITEM(list, i);
1580 if (!PyString_Check(v)) {
1581 const char *buffer;
1582 if (((f->f_binary &&
1583 PyObject_AsReadBuffer(v,
1584 (const void**)&buffer,
1585 &len)) ||
1586 PyObject_AsCharBuffer(v,
1587 &buffer,
1588 &len))) {
1589 PyErr_SetString(PyExc_TypeError,
1590 "writelines() argument must be a sequence of strings");
1591 goto error;
1592 }
1593 line = PyString_FromStringAndSize(buffer,
1594 len);
1595 if (line == NULL)
1596 goto error;
1597 Py_DECREF(v);
1598 PyList_SET_ITEM(list, i, line);
1599 }
1600 }
1601
1602 /* Since we are releasing the global lock, the
1603 following code may *not* execute Python code. */
1604 Py_BEGIN_ALLOW_THREADS
1605 f->f_softspace = 0;
1606 errno = 0;
1607 for (i = 0; i < j; i++) {
1608 line = PyList_GET_ITEM(list, i);
1609 len = PyString_GET_SIZE(line);
1610 nwritten = fwrite(PyString_AS_STRING(line),
1611 1, len, f->f_fp);
1612 if (nwritten != len) {
1613 Py_BLOCK_THREADS
1614 PyErr_SetFromErrno(PyExc_IOError);
1615 clearerr(f->f_fp);
1616 goto error;
1617 }
1618 }
1619 Py_END_ALLOW_THREADS
1620
1621 if (j < CHUNKSIZE)
1622 break;
1623 }
1624
1625 Py_INCREF(Py_None);
1626 result = Py_None;
1627 error:
1628 Py_XDECREF(list);
1629 Py_XDECREF(it);
1630 return result;
1631#undef CHUNKSIZE
1632}
1633
1634static PyObject *
1635file_self(PyFileObject *f)
1636{
1637 if (f->f_fp == NULL)
1638 return err_closed();
1639 Py_INCREF(f);
1640 return (PyObject *)f;
1641}
1642
1643static PyObject *
1644file_exit(PyFileObject *f, PyObject *args)
1645{
1646 PyObject *ret = file_close(f);
1647 if (!ret)
1648 /* If error occurred, pass through */
1649 return NULL;
1650 Py_DECREF(ret);
1651 /* We cannot return the result of close since a true
1652 * value will be interpreted as "yes, swallow the
1653 * exception if one was raised inside the with block". */
1654 Py_RETURN_NONE;
1655}
1656
1657PyDoc_STRVAR(readline_doc,
1658"readline([size]) -> next line from the file, as a string.\n"
1659"\n"
1660"Retain newline. A non-negative size argument limits the maximum\n"
1661"number of bytes to return (an incomplete line may be returned then).\n"
1662"Return an empty string at EOF.");
1663
1664PyDoc_STRVAR(read_doc,
1665"read([size]) -> read at most size bytes, returned as a string.\n"
1666"\n"
1667"If the size argument is negative or omitted, read until EOF is reached.\n"
1668"Notice that when in non-blocking mode, less data than what was requested\n"
1669"may be returned, even if no size parameter was given.");
1670
1671PyDoc_STRVAR(write_doc,
1672"write(str) -> None. Write string str to file.\n"
1673"\n"
1674"Note that due to buffering, flush() or close() may be needed before\n"
1675"the file on disk reflects the data written.");
1676
1677PyDoc_STRVAR(fileno_doc,
1678"fileno() -> integer \"file descriptor\".\n"
1679"\n"
1680"This is needed for lower-level file interfaces, such os.read().");
1681
1682PyDoc_STRVAR(seek_doc,
1683"seek(offset[, whence]) -> None. Move to new file position.\n"
1684"\n"
1685"Argument offset is a byte count. Optional argument whence defaults to\n"
1686"0 (offset from start of file, offset should be >= 0); other values are 1\n"
1687"(move relative to current position, positive or negative), and 2 (move\n"
1688"relative to end of file, usually negative, although many platforms allow\n"
1689"seeking beyond the end of a file). If the file is opened in text mode,\n"
1690"only offsets returned by tell() are legal. Use of other offsets causes\n"
1691"undefined behavior."
1692"\n"
1693"Note that not all file objects are seekable.");
1694
1695#ifdef HAVE_FTRUNCATE
1696PyDoc_STRVAR(truncate_doc,
1697"truncate([size]) -> None. Truncate the file to at most size bytes.\n"
1698"\n"
1699"Size defaults to the current file position, as returned by tell().");
1700#endif
1701
1702PyDoc_STRVAR(tell_doc,
1703"tell() -> current file position, an integer (may be a long integer).");
1704
1705PyDoc_STRVAR(readinto_doc,
1706"readinto() -> Undocumented. Don't use this; it may go away.");
1707
1708PyDoc_STRVAR(readlines_doc,
1709"readlines([size]) -> list of strings, each a line from the file.\n"
1710"\n"
1711"Call readline() repeatedly and return a list of the lines so read.\n"
1712"The optional size argument, if given, is an approximate bound on the\n"
1713"total number of bytes in the lines returned.");
1714
1715PyDoc_STRVAR(xreadlines_doc,
1716"xreadlines() -> returns self.\n"
1717"\n"
1718"For backward compatibility. File objects now include the performance\n"
1719"optimizations previously implemented in the xreadlines module.");
1720
1721PyDoc_STRVAR(writelines_doc,
1722"writelines(sequence_of_strings) -> None. Write the strings to the file.\n"
1723"\n"
1724"Note that newlines are not added. The sequence can be any iterable object\n"
1725"producing strings. This is equivalent to calling write() for each string.");
1726
1727PyDoc_STRVAR(flush_doc,
1728"flush() -> None. Flush the internal I/O buffer.");
1729
1730PyDoc_STRVAR(close_doc,
1731"close() -> None or (perhaps) an integer. Close the file.\n"
1732"\n"
1733"Sets data attribute .closed to True. A closed file cannot be used for\n"
1734"further I/O operations. close() may be called more than once without\n"
1735"error. Some kinds of file objects (for example, opened by popen())\n"
1736"may return an exit status upon closing.");
1737
1738PyDoc_STRVAR(isatty_doc,
1739"isatty() -> true or false. True if the file is connected to a tty device.");
1740
1741PyDoc_STRVAR(enter_doc,
1742 "__enter__() -> self.");
1743
1744PyDoc_STRVAR(exit_doc,
1745 "__exit__(*excinfo) -> None. Closes the file.");
1746
1747static PyMethodDef file_methods[] = {
1748 {"readline", (PyCFunction)file_readline, METH_VARARGS, readline_doc},
1749 {"read", (PyCFunction)file_read, METH_VARARGS, read_doc},
1750 {"write", (PyCFunction)file_write, METH_VARARGS, write_doc},
1751 {"fileno", (PyCFunction)file_fileno, METH_NOARGS, fileno_doc},
1752 {"seek", (PyCFunction)file_seek, METH_VARARGS, seek_doc},
1753#ifdef HAVE_FTRUNCATE
1754 {"truncate", (PyCFunction)file_truncate, METH_VARARGS, truncate_doc},
1755#endif
1756 {"tell", (PyCFunction)file_tell, METH_NOARGS, tell_doc},
1757 {"readinto", (PyCFunction)file_readinto, METH_VARARGS, readinto_doc},
1758 {"readlines", (PyCFunction)file_readlines,METH_VARARGS, readlines_doc},
1759 {"xreadlines",(PyCFunction)file_self, METH_NOARGS, xreadlines_doc},
1760 {"writelines",(PyCFunction)file_writelines, METH_O, writelines_doc},
1761 {"flush", (PyCFunction)file_flush, METH_NOARGS, flush_doc},
1762 {"close", (PyCFunction)file_close, METH_NOARGS, close_doc},
1763 {"isatty", (PyCFunction)file_isatty, METH_NOARGS, isatty_doc},
1764 {"__enter__", (PyCFunction)file_self, METH_NOARGS, enter_doc},
1765 {"__exit__", (PyCFunction)file_exit, METH_VARARGS, exit_doc},
1766 {NULL, NULL} /* sentinel */
1767};
1768
1769#define OFF(x) offsetof(PyFileObject, x)
1770
1771static PyMemberDef file_memberlist[] = {
1772 {"softspace", T_INT, OFF(f_softspace), 0,
1773 "flag indicating that a space needs to be printed; used by print"},
1774 {"mode", T_OBJECT, OFF(f_mode), RO,
1775 "file mode ('r', 'U', 'w', 'a', possibly with 'b' or '+' added)"},
1776 {"name", T_OBJECT, OFF(f_name), RO,
1777 "file name"},
1778 {"encoding", T_OBJECT, OFF(f_encoding), RO,
1779 "file encoding"},
1780 /* getattr(f, "closed") is implemented without this table */
1781 {NULL} /* Sentinel */
1782};
1783
1784static PyObject *
1785get_closed(PyFileObject *f, void *closure)
1786{
1787 return PyBool_FromLong((long)(f->f_fp == 0));
1788}
1789static PyObject *
1790get_newlines(PyFileObject *f, void *closure)
1791{
1792 switch (f->f_newlinetypes) {
1793 case NEWLINE_UNKNOWN:
1794 Py_INCREF(Py_None);
1795 return Py_None;
1796 case NEWLINE_CR:
1797 return PyString_FromString("\r");
1798 case NEWLINE_LF:
1799 return PyString_FromString("\n");
1800 case NEWLINE_CR|NEWLINE_LF:
1801 return Py_BuildValue("(ss)", "\r", "\n");
1802 case NEWLINE_CRLF:
1803 return PyString_FromString("\r\n");
1804 case NEWLINE_CR|NEWLINE_CRLF:
1805 return Py_BuildValue("(ss)", "\r", "\r\n");
1806 case NEWLINE_LF|NEWLINE_CRLF:
1807 return Py_BuildValue("(ss)", "\n", "\r\n");
1808 case NEWLINE_CR|NEWLINE_LF|NEWLINE_CRLF:
1809 return Py_BuildValue("(sss)", "\r", "\n", "\r\n");
1810 default:
1811 PyErr_Format(PyExc_SystemError,
1812 "Unknown newlines value 0x%x\n",
1813 f->f_newlinetypes);
1814 return NULL;
1815 }
1816}
1817
1818static PyGetSetDef file_getsetlist[] = {
1819 {"closed", (getter)get_closed, NULL, "True if the file is closed"},
1820 {"newlines", (getter)get_newlines, NULL,
1821 "end-of-line convention used in this file"},
1822 {0},
1823};
1824
1825static void
1826drop_readahead(PyFileObject *f)
1827{
1828 if (f->f_buf != NULL) {
1829 PyMem_Free(f->f_buf);
1830 f->f_buf = NULL;
1831 }
1832}
1833
1834/* Make sure that file has a readahead buffer with at least one byte
1835 (unless at EOF) and no more than bufsize. Returns negative value on
1836 error, will set MemoryError if bufsize bytes cannot be allocated. */
1837static int
1838readahead(PyFileObject *f, int bufsize)
1839{
1840 Py_ssize_t chunksize;
1841
1842 if (f->f_buf != NULL) {
1843 if( (f->f_bufend - f->f_bufptr) >= 1)
1844 return 0;
1845 else
1846 drop_readahead(f);
1847 }
1848 if ((f->f_buf = (char *)PyMem_Malloc(bufsize)) == NULL) {
1849 PyErr_NoMemory();
1850 return -1;
1851 }
1852 Py_BEGIN_ALLOW_THREADS
1853 errno = 0;
1854 chunksize = Py_UniversalNewlineFread(
1855 f->f_buf, bufsize, f->f_fp, (PyObject *)f);
1856 Py_END_ALLOW_THREADS
1857 if (chunksize == 0) {
1858 if (ferror(f->f_fp)) {
1859 PyErr_SetFromErrno(PyExc_IOError);
1860 clearerr(f->f_fp);
1861 drop_readahead(f);
1862 return -1;
1863 }
1864 }
1865 f->f_bufptr = f->f_buf;
1866 f->f_bufend = f->f_buf + chunksize;
1867 return 0;
1868}
1869
1870/* Used by file_iternext. The returned string will start with 'skip'
1871 uninitialized bytes followed by the remainder of the line. Don't be
1872 horrified by the recursive call: maximum recursion depth is limited by
1873 logarithmic buffer growth to about 50 even when reading a 1gb line. */
1874
1875static PyStringObject *
1876readahead_get_line_skip(PyFileObject *f, int skip, int bufsize)
1877{
1878 PyStringObject* s;
1879 char *bufptr;
1880 char *buf;
1881 Py_ssize_t len;
1882
1883 if (f->f_buf == NULL)
1884 if (readahead(f, bufsize) < 0)
1885 return NULL;
1886
1887 len = f->f_bufend - f->f_bufptr;
1888 if (len == 0)
1889 return (PyStringObject *)
1890 PyString_FromStringAndSize(NULL, skip);
1891 bufptr = (char *)memchr(f->f_bufptr, '\n', len);
1892 if (bufptr != NULL) {
1893 bufptr++; /* Count the '\n' */
1894 len = bufptr - f->f_bufptr;
1895 s = (PyStringObject *)
1896 PyString_FromStringAndSize(NULL, skip+len);
1897 if (s == NULL)
1898 return NULL;
1899 memcpy(PyString_AS_STRING(s)+skip, f->f_bufptr, len);
1900 f->f_bufptr = bufptr;
1901 if (bufptr == f->f_bufend)
1902 drop_readahead(f);
1903 } else {
1904 bufptr = f->f_bufptr;
1905 buf = f->f_buf;
1906 f->f_buf = NULL; /* Force new readahead buffer */
1907 assert(skip+len < INT_MAX);
1908 s = readahead_get_line_skip(
1909 f, (int)(skip+len), bufsize + (bufsize>>2) );
1910 if (s == NULL) {
1911 PyMem_Free(buf);
1912 return NULL;
1913 }
1914 memcpy(PyString_AS_STRING(s)+skip, bufptr, len);
1915 PyMem_Free(buf);
1916 }
1917 return s;
1918}
1919
1920/* A larger buffer size may actually decrease performance. */
1921#define READAHEAD_BUFSIZE 8192
1922
1923static PyObject *
1924file_iternext(PyFileObject *f)
1925{
1926 PyStringObject* l;
1927
1928 if (f->f_fp == NULL)
1929 return err_closed();
1930
1931 l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE);
1932 if (l == NULL || PyString_GET_SIZE(l) == 0) {
1933 Py_XDECREF(l);
1934 return NULL;
1935 }
1936 return (PyObject *)l;
1937}
1938
1939
1940static PyObject *
1941file_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
1942{
1943 PyObject *self;
1944 static PyObject *not_yet_string;
1945
1946 assert(type != NULL && type->tp_alloc != NULL);
1947
1948 if (not_yet_string == NULL) {
1949 not_yet_string = PyString_FromString("<uninitialized file>");
1950 if (not_yet_string == NULL)
1951 return NULL;
1952 }
1953
1954 self = type->tp_alloc(type, 0);
1955 if (self != NULL) {
1956 /* Always fill in the name and mode, so that nobody else
1957 needs to special-case NULLs there. */
1958 Py_INCREF(not_yet_string);
1959 ((PyFileObject *)self)->f_name = not_yet_string;
1960 Py_INCREF(not_yet_string);
1961 ((PyFileObject *)self)->f_mode = not_yet_string;
1962 Py_INCREF(Py_None);
1963 ((PyFileObject *)self)->f_encoding = Py_None;
1964 ((PyFileObject *)self)->weakreflist = NULL;
1965 }
1966 return self;
1967}
1968
1969static int
1970file_init(PyObject *self, PyObject *args, PyObject *kwds)
1971{
1972 PyFileObject *foself = (PyFileObject *)self;
1973 int ret = 0;
1974 static char *kwlist[] = {"name", "mode", "buffering", 0};
1975 char *name = NULL;
1976 char *mode = "r";
1977 int bufsize = -1;
1978 int wideargument = 0;
1979
1980 assert(PyFile_Check(self));
1981 if (foself->f_fp != NULL) {
1982 /* Have to close the existing file first. */
1983 PyObject *closeresult = file_close(foself);
1984 if (closeresult == NULL)
1985 return -1;
1986 Py_DECREF(closeresult);
1987 }
1988
1989#ifdef Py_WIN_WIDE_FILENAMES
1990 if (GetVersion() < 0x80000000) { /* On NT, so wide API available */
1991 PyObject *po;
1992 if (PyArg_ParseTupleAndKeywords(args, kwds, "U|si:file",
1993 kwlist, &po, &mode, &bufsize)) {
1994 wideargument = 1;
1995 if (fill_file_fields(foself, NULL, po, mode,
1996 fclose) == NULL)
1997 goto Error;
1998 } else {
1999 /* Drop the argument parsing error as narrow
2000 strings are also valid. */
2001 PyErr_Clear();
2002 }
2003 }
2004#endif
2005
2006 if (!wideargument) {
2007 PyObject *o_name;
2008
2009 if (!PyArg_ParseTupleAndKeywords(args, kwds, "et|si:file", kwlist,
2010 Py_FileSystemDefaultEncoding,
2011 &name,
2012 &mode, &bufsize))
2013 return -1;
2014
2015 /* We parse again to get the name as a PyObject */
2016 if (!PyArg_ParseTupleAndKeywords(args, kwds, "O|si:file",
2017 kwlist, &o_name, &mode,
2018 &bufsize))
2019 goto Error;
2020
2021 if (fill_file_fields(foself, NULL, o_name, mode,
2022 fclose) == NULL)
2023 goto Error;
2024 }
2025 if (open_the_file(foself, name, mode) == NULL)
2026 goto Error;
2027 foself->f_setbuf = NULL;
2028 PyFile_SetBufSize(self, bufsize);
2029 goto Done;
2030
2031Error:
2032 ret = -1;
2033 /* fall through */
2034Done:
2035 PyMem_Free(name); /* free the encoded string */
2036 return ret;
2037}
2038
2039PyDoc_VAR(file_doc) =
2040PyDoc_STR(
2041"file(name[, mode[, buffering]]) -> file object\n"
2042"\n"
2043"Open a file. The mode can be 'r', 'w' or 'a' for reading (default),\n"
2044"writing or appending. The file will be created if it doesn't exist\n"
2045"when opened for writing or appending; it will be truncated when\n"
2046"opened for writing. Add a 'b' to the mode for binary files.\n"
2047"Add a '+' to the mode to allow simultaneous reading and writing.\n"
2048"If the buffering argument is given, 0 means unbuffered, 1 means line\n"
2049"buffered, and larger numbers specify the buffer size.\n"
2050)
2051PyDoc_STR(
2052"Add a 'U' to mode to open the file for input with universal newline\n"
2053"support. Any line ending in the input file will be seen as a '\\n'\n"
2054"in Python. Also, a file so opened gains the attribute 'newlines';\n"
2055"the value for this attribute is one of None (no newline read yet),\n"
2056"'\\r', '\\n', '\\r\\n' or a tuple containing all the newline types seen.\n"
2057"\n"
2058"'U' cannot be combined with 'w' or '+' mode.\n"
2059);
2060
2061PyTypeObject PyFile_Type = {
2062 PyObject_HEAD_INIT(&PyType_Type)
2063 0,
2064 "file",
2065 sizeof(PyFileObject),
2066 0,
2067 (destructor)file_dealloc, /* tp_dealloc */
2068 0, /* tp_print */
2069 0, /* tp_getattr */
2070 0, /* tp_setattr */
2071 0, /* tp_compare */
2072 (reprfunc)file_repr, /* tp_repr */
2073 0, /* tp_as_number */
2074 0, /* tp_as_sequence */
2075 0, /* tp_as_mapping */
2076 0, /* tp_hash */
2077 0, /* tp_call */
2078 0, /* tp_str */
2079 PyObject_GenericGetAttr, /* tp_getattro */
2080 /* softspace is writable: we must supply tp_setattro */
2081 PyObject_GenericSetAttr, /* tp_setattro */
2082 0, /* tp_as_buffer */
2083 Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE | Py_TPFLAGS_HAVE_WEAKREFS, /* tp_flags */
2084 file_doc, /* tp_doc */
2085 0, /* tp_traverse */
2086 0, /* tp_clear */
2087 0, /* tp_richcompare */
2088 offsetof(PyFileObject, weakreflist), /* tp_weaklistoffset */
2089 (getiterfunc)file_self, /* tp_iter */
2090 (iternextfunc)file_iternext, /* tp_iternext */
2091 file_methods, /* tp_methods */
2092 file_memberlist, /* tp_members */
2093 file_getsetlist, /* tp_getset */
2094 0, /* tp_base */
2095 0, /* tp_dict */
2096 0, /* tp_descr_get */
2097 0, /* tp_descr_set */
2098 0, /* tp_dictoffset */
2099 file_init, /* tp_init */
2100 PyType_GenericAlloc, /* tp_alloc */
2101 file_new, /* tp_new */
2102 PyObject_Del, /* tp_free */
2103};
2104
2105/* Interface for the 'soft space' between print items. */
2106
2107int
2108PyFile_SoftSpace(PyObject *f, int newflag)
2109{
2110 long oldflag = 0;
2111 if (f == NULL) {
2112 /* Do nothing */
2113 }
2114 else if (PyFile_Check(f)) {
2115 oldflag = ((PyFileObject *)f)->f_softspace;
2116 ((PyFileObject *)f)->f_softspace = newflag;
2117 }
2118 else {
2119 PyObject *v;
2120 v = PyObject_GetAttrString(f, "softspace");
2121 if (v == NULL)
2122 PyErr_Clear();
2123 else {
2124 if (PyInt_Check(v))
2125 oldflag = PyInt_AsLong(v);
2126 assert(oldflag < INT_MAX);
2127 Py_DECREF(v);
2128 }
2129 v = PyInt_FromLong((long)newflag);
2130 if (v == NULL)
2131 PyErr_Clear();
2132 else {
2133 if (PyObject_SetAttrString(f, "softspace", v) != 0)
2134 PyErr_Clear();
2135 Py_DECREF(v);
2136 }
2137 }
2138 return (int)oldflag;
2139}
2140
2141/* Interfaces to write objects/strings to file-like objects */
2142
2143int
2144PyFile_WriteObject(PyObject *v, PyObject *f, int flags)
2145{
2146 PyObject *writer, *value, *args, *result;
2147 if (f == NULL) {
2148 PyErr_SetString(PyExc_TypeError, "writeobject with NULL file");
2149 return -1;
2150 }
2151 else if (PyFile_Check(f)) {
2152 FILE *fp = PyFile_AsFile(f);
2153#ifdef Py_USING_UNICODE
2154 PyObject *enc = ((PyFileObject*)f)->f_encoding;
2155 int result;
2156#endif
2157 if (fp == NULL) {
2158 err_closed();
2159 return -1;
2160 }
2161#ifdef Py_USING_UNICODE
2162 if ((flags & Py_PRINT_RAW) &&
2163 PyUnicode_Check(v) && enc != Py_None) {
2164 char *cenc = PyString_AS_STRING(enc);
2165 value = PyUnicode_AsEncodedString(v, cenc, "strict");
2166 if (value == NULL)
2167 return -1;
2168 } else {
2169 value = v;
2170 Py_INCREF(value);
2171 }
2172 result = PyObject_Print(value, fp, flags);
2173 Py_DECREF(value);
2174 return result;
2175#else
2176 return PyObject_Print(v, fp, flags);
2177#endif
2178 }
2179 writer = PyObject_GetAttrString(f, "write");
2180 if (writer == NULL)
2181 return -1;
2182 if (flags & Py_PRINT_RAW) {
2183 if (PyUnicode_Check(v)) {
2184 value = v;
2185 Py_INCREF(value);
2186 } else
2187 value = PyObject_Str(v);
2188 }
2189 else
2190 value = PyObject_Repr(v);
2191 if (value == NULL) {
2192 Py_DECREF(writer);
2193 return -1;
2194 }
2195 args = PyTuple_Pack(1, value);
2196 if (args == NULL) {
2197 Py_DECREF(value);
2198 Py_DECREF(writer);
2199 return -1;
2200 }
2201 result = PyEval_CallObject(writer, args);
2202 Py_DECREF(args);
2203 Py_DECREF(value);
2204 Py_DECREF(writer);
2205 if (result == NULL)
2206 return -1;
2207 Py_DECREF(result);
2208 return 0;
2209}
2210
2211int
2212PyFile_WriteString(const char *s, PyObject *f)
2213{
2214 if (f == NULL) {
2215 /* Should be caused by a pre-existing error */
2216 if (!PyErr_Occurred())
2217 PyErr_SetString(PyExc_SystemError,
2218 "null file for PyFile_WriteString");
2219 return -1;
2220 }
2221 else if (PyFile_Check(f)) {
2222 FILE *fp = PyFile_AsFile(f);
2223 if (fp == NULL) {
2224 err_closed();
2225 return -1;
2226 }
2227 fputs(s, fp);
2228 return 0;
2229 }
2230 else if (!PyErr_Occurred()) {
2231 PyObject *v = PyString_FromString(s);
2232 int err;
2233 if (v == NULL)
2234 return -1;
2235 err = PyFile_WriteObject(v, f, Py_PRINT_RAW);
2236 Py_DECREF(v);
2237 return err;
2238 }
2239 else
2240 return -1;
2241}
2242
2243/* Try to get a file-descriptor from a Python object. If the object
2244 is an integer or long integer, its value is returned. If not, the
2245 object's fileno() method is called if it exists; the method must return
2246 an integer or long integer, which is returned as the file descriptor value.
2247 -1 is returned on failure.
2248*/
2249
2250int PyObject_AsFileDescriptor(PyObject *o)
2251{
2252 int fd;
2253 PyObject *meth;
2254
2255 if (PyInt_Check(o)) {
2256 fd = PyInt_AsLong(o);
2257 }
2258 else if (PyLong_Check(o)) {
2259 fd = PyLong_AsLong(o);
2260 }
2261 else if ((meth = PyObject_GetAttrString(o, "fileno")) != NULL)
2262 {
2263 PyObject *fno = PyEval_CallObject(meth, NULL);
2264 Py_DECREF(meth);
2265 if (fno == NULL)
2266 return -1;
2267
2268 if (PyInt_Check(fno)) {
2269 fd = PyInt_AsLong(fno);
2270 Py_DECREF(fno);
2271 }
2272 else if (PyLong_Check(fno)) {
2273 fd = PyLong_AsLong(fno);
2274 Py_DECREF(fno);
2275 }
2276 else {
2277 PyErr_SetString(PyExc_TypeError,
2278 "fileno() returned a non-integer");
2279 Py_DECREF(fno);
2280 return -1;
2281 }
2282 }
2283 else {
2284 PyErr_SetString(PyExc_TypeError,
2285 "argument must be an int, or have a fileno() method.");
2286 return -1;
2287 }
2288
2289 if (fd < 0) {
2290 PyErr_Format(PyExc_ValueError,
2291 "file descriptor cannot be a negative integer (%i)",
2292 fd);
2293 return -1;
2294 }
2295 return fd;
2296}
2297
2298/* From here on we need access to the real fgets and fread */
2299#undef fgets
2300#undef fread
2301
2302/*
2303** Py_UniversalNewlineFgets is an fgets variation that understands
2304** all of \r, \n and \r\n conventions.
2305** The stream should be opened in binary mode.
2306** If fobj is NULL the routine always does newline conversion, and
2307** it may peek one char ahead to gobble the second char in \r\n.
2308** If fobj is non-NULL it must be a PyFileObject. In this case there
2309** is no readahead but in stead a flag is used to skip a following
2310** \n on the next read. Also, if the file is open in binary mode
2311** the whole conversion is skipped. Finally, the routine keeps track of
2312** the different types of newlines seen.
2313** Note that we need no error handling: fgets() treats error and eof
2314** identically.
2315*/
2316char *
2317Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj)
2318{
2319 char *p = buf;
2320 int c;
2321 int newlinetypes = 0;
2322 int skipnextlf = 0;
2323 int univ_newline = 1;
2324
2325 if (fobj) {
2326 if (!PyFile_Check(fobj)) {
2327 errno = ENXIO; /* What can you do... */
2328 return NULL;
2329 }
2330 univ_newline = ((PyFileObject *)fobj)->f_univ_newline;
2331 if ( !univ_newline )
2332 return fgets(buf, n, stream);
2333 newlinetypes = ((PyFileObject *)fobj)->f_newlinetypes;
2334 skipnextlf = ((PyFileObject *)fobj)->f_skipnextlf;
2335 }
2336 FLOCKFILE(stream);
2337 c = 'x'; /* Shut up gcc warning */
2338 while (--n > 0 && (c = GETC(stream)) != EOF ) {
2339 if (skipnextlf ) {
2340 skipnextlf = 0;
2341 if (c == '\n') {
2342 /* Seeing a \n here with skipnextlf true
2343 ** means we saw a \r before.
2344 */
2345 newlinetypes |= NEWLINE_CRLF;
2346 c = GETC(stream);
2347 if (c == EOF) break;
2348 } else {
2349 /*
2350 ** Note that c == EOF also brings us here,
2351 ** so we're okay if the last char in the file
2352 ** is a CR.
2353 */
2354 newlinetypes |= NEWLINE_CR;
2355 }
2356 }
2357 if (c == '\r') {
2358 /* A \r is translated into a \n, and we skip
2359 ** an adjacent \n, if any. We don't set the
2360 ** newlinetypes flag until we've seen the next char.
2361 */
2362 skipnextlf = 1;
2363 c = '\n';
2364 } else if ( c == '\n') {
2365 newlinetypes |= NEWLINE_LF;
2366 }
2367 *p++ = c;
2368 if (c == '\n') break;
2369 }
2370 if ( c == EOF && skipnextlf )
2371 newlinetypes |= NEWLINE_CR;
2372 FUNLOCKFILE(stream);
2373 *p = '\0';
2374 if (fobj) {
2375 ((PyFileObject *)fobj)->f_newlinetypes = newlinetypes;
2376 ((PyFileObject *)fobj)->f_skipnextlf = skipnextlf;
2377 } else if ( skipnextlf ) {
2378 /* If we have no file object we cannot save the
2379 ** skipnextlf flag. We have to readahead, which
2380 ** will cause a pause if we're reading from an
2381 ** interactive stream, but that is very unlikely
2382 ** unless we're doing something silly like
2383 ** execfile("/dev/tty").
2384 */
2385 c = GETC(stream);
2386 if ( c != '\n' )
2387 ungetc(c, stream);
2388 }
2389 if (p == buf)
2390 return NULL;
2391 return buf;
2392}
2393
2394/*
2395** Py_UniversalNewlineFread is an fread variation that understands
2396** all of \r, \n and \r\n conventions.
2397** The stream should be opened in binary mode.
2398** fobj must be a PyFileObject. In this case there
2399** is no readahead but in stead a flag is used to skip a following
2400** \n on the next read. Also, if the file is open in binary mode
2401** the whole conversion is skipped. Finally, the routine keeps track of
2402** the different types of newlines seen.
2403*/
2404size_t
2405Py_UniversalNewlineFread(char *buf, size_t n,
2406 FILE *stream, PyObject *fobj)
2407{
2408 char *dst = buf;
2409 PyFileObject *f = (PyFileObject *)fobj;
2410 int newlinetypes, skipnextlf;
2411
2412 assert(buf != NULL);
2413 assert(stream != NULL);
2414
2415 if (!fobj || !PyFile_Check(fobj)) {
2416 errno = ENXIO; /* What can you do... */
2417 return 0;
2418 }
2419 if (!f->f_univ_newline)
2420 return fread(buf, 1, n, stream);
2421 newlinetypes = f->f_newlinetypes;
2422 skipnextlf = f->f_skipnextlf;
2423 /* Invariant: n is the number of bytes remaining to be filled
2424 * in the buffer.
2425 */
2426 while (n) {
2427 size_t nread;
2428 int shortread;
2429 char *src = dst;
2430
2431 nread = fread(dst, 1, n, stream);
2432 assert(nread <= n);
2433 if (nread == 0)
2434 break;
2435
2436 n -= nread; /* assuming 1 byte out for each in; will adjust */
2437 shortread = n != 0; /* true iff EOF or error */
2438 while (nread--) {
2439 char c = *src++;
2440 if (c == '\r') {
2441 /* Save as LF and set flag to skip next LF. */
2442 *dst++ = '\n';
2443 skipnextlf = 1;
2444 }
2445 else if (skipnextlf && c == '\n') {
2446 /* Skip LF, and remember we saw CR LF. */
2447 skipnextlf = 0;
2448 newlinetypes |= NEWLINE_CRLF;
2449 ++n;
2450 }
2451 else {
2452 /* Normal char to be stored in buffer. Also
2453 * update the newlinetypes flag if either this
2454 * is an LF or the previous char was a CR.
2455 */
2456 if (c == '\n')
2457 newlinetypes |= NEWLINE_LF;
2458 else if (skipnextlf)
2459 newlinetypes |= NEWLINE_CR;
2460 *dst++ = c;
2461 skipnextlf = 0;
2462 }
2463 }
2464 if (shortread) {
2465 /* If this is EOF, update type flags. */
2466 if (skipnextlf && feof(stream))
2467 newlinetypes |= NEWLINE_CR;
2468 break;
2469 }
2470 }
2471 f->f_newlinetypes = newlinetypes;
2472 f->f_skipnextlf = skipnextlf;
2473 return dst - buf;
2474}
2475
2476#ifdef __cplusplus
2477}
2478#endif
Note: See TracBrowser for help on using the repository browser.