Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

_pyio.py@ 603

Last change on this file since 603 was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 67.2 KB

Line
1	"""
2	Python implementation of the io module.
3	"""
4
5	from __future__ import (print_function, unicode_literals)
6
7	import os
8	import abc
9	import codecs
10	import warnings
11	import errno
12	# Import thread instead of threading to reduce startup cost
13	try:
14	from thread import allocate_lock as Lock
15	except ImportError:
16	from dummy_thread import allocate_lock as Lock
17
18	import io
19	from io import (__all__, SEEK_SET, SEEK_CUR, SEEK_END)
20	from errno import EINTR
21
22	__metaclass__ = type
23
24	# open() uses st_blksize whenever we can
25	DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
26
27	# NOTE: Base classes defined here are registered with the "official" ABCs
28	# defined in io.py. We don't use real inheritance though, because we don't
29	# want to inherit the C implementations.
30
31
32	class BlockingIOError(IOError):
33
34	"""Exception raised when I/O would block on a non-blocking I/O stream."""
35
36	def __init__(self, errno, strerror, characters_written=0):
37	super(IOError, self).__init__(errno, strerror)
38	if not isinstance(characters_written, (int, long)):
39	raise TypeError("characters_written must be a integer")
40	self.characters_written = characters_written
41
42
43	def open(file, mode="r", buffering=-1,
44	encoding=None, errors=None,
45	newline=None, closefd=True):
46
47	r"""Open file and return a stream. Raise IOError upon failure.
48
49	file is either a text or byte string giving the name (and the path
50	if the file isn't in the current working directory) of the file to
51	be opened or an integer file descriptor of the file to be
52	wrapped. (If a file descriptor is given, it is closed when the
53	returned I/O object is closed, unless closefd is set to False.)
54
55	mode is an optional string that specifies the mode in which the file
56	is opened. It defaults to 'r' which means open for reading in text
57	mode. Other common values are 'w' for writing (truncating the file if
58	it already exists), and 'a' for appending (which on some Unix systems,
59	means that all writes append to the end of the file regardless of the
60	current seek position). In text mode, if encoding is not specified the
61	encoding used is platform dependent. (For reading and writing raw
62	bytes use binary mode and leave encoding unspecified.) The available
63	modes are:
64
65	========= ===============================================================
66	Character Meaning
67	--------- ---------------------------------------------------------------
68	'r' open for reading (default)
69	'w' open for writing, truncating the file first
70	'a' open for writing, appending to the end of the file if it exists
71	'b' binary mode
72	't' text mode (default)
73	'+' open a disk file for updating (reading and writing)
74	'U' universal newline mode (for backwards compatibility; unneeded
75	for new code)
76	========= ===============================================================
77
78	The default mode is 'rt' (open for reading text). For binary random
79	access, the mode 'w+b' opens and truncates the file to 0 bytes, while
80	'r+b' opens the file without truncation.
81
82	Python distinguishes between files opened in binary and text modes,
83	even when the underlying operating system doesn't. Files opened in
84	binary mode (appending 'b' to the mode argument) return contents as
85	bytes objects without any decoding. In text mode (the default, or when
86	't' is appended to the mode argument), the contents of the file are
87	returned as strings, the bytes having been first decoded using a
88	platform-dependent encoding or using the specified encoding if given.
89
90	buffering is an optional integer used to set the buffering policy.
91	Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
92	line buffering (only usable in text mode), and an integer > 1 to indicate
93	the size of a fixed-size chunk buffer. When no buffering argument is
94	given, the default buffering policy works as follows:
95
96	* Binary files are buffered in fixed-size chunks; the size of the buffer
97	is chosen using a heuristic trying to determine the underlying device's
98	"block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
99	On many systems, the buffer will typically be 4096 or 8192 bytes long.
100
101	* "Interactive" text files (files for which isatty() returns True)
102	use line buffering. Other text files use the policy described above
103	for binary files.
104
105	encoding is the name of the encoding used to decode or encode the
106	file. This should only be used in text mode. The default encoding is
107	platform dependent, but any encoding supported by Python can be
108	passed. See the codecs module for the list of supported encodings.
109
110	errors is an optional string that specifies how encoding errors are to
111	be handled---this argument should not be used in binary mode. Pass
112	'strict' to raise a ValueError exception if there is an encoding error
113	(the default of None has the same effect), or pass 'ignore' to ignore
114	errors. (Note that ignoring encoding errors can lead to data loss.)
115	See the documentation for codecs.register for a list of the permitted
116	encoding error strings.
117
118	newline controls how universal newlines works (it only applies to text
119	mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
120	follows:
121
122	* On input, if newline is None, universal newlines mode is
123	enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
124	these are translated into '\n' before being returned to the
125	caller. If it is '', universal newline mode is enabled, but line
126	endings are returned to the caller untranslated. If it has any of
127	the other legal values, input lines are only terminated by the given
128	string, and the line ending is returned to the caller untranslated.
129
130	* On output, if newline is None, any '\n' characters written are
131	translated to the system default line separator, os.linesep. If
132	newline is '', no translation takes place. If newline is any of the
133	other legal values, any '\n' characters written are translated to
134	the given string.
135
136	If closefd is False, the underlying file descriptor will be kept open
137	when the file is closed. This does not work when a file name is given
138	and must be True in that case.
139
140	open() returns a file object whose type depends on the mode, and
141	through which the standard file operations such as reading and writing
142	are performed. When open() is used to open a file in a text mode ('w',
143	'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
144	a file in a binary mode, the returned class varies: in read binary
145	mode, it returns a BufferedReader; in write binary and append binary
146	modes, it returns a BufferedWriter, and in read/write mode, it returns
147	a BufferedRandom.
148
149	It is also possible to use a string or bytearray as a file for both
150	reading and writing. For strings StringIO can be used like a file
151	opened in a text mode, and for bytes a BytesIO can be used like a file
152	opened in a binary mode.
153	"""
154	if not isinstance(file, (basestring, int, long)):
155	raise TypeError("invalid file: %r" % file)
156	if not isinstance(mode, basestring):
157	raise TypeError("invalid mode: %r" % mode)
158	if not isinstance(buffering, (int, long)):
159	raise TypeError("invalid buffering: %r" % buffering)
160	if encoding is not None and not isinstance(encoding, basestring):
161	raise TypeError("invalid encoding: %r" % encoding)
162	if errors is not None and not isinstance(errors, basestring):
163	raise TypeError("invalid errors: %r" % errors)
164	modes = set(mode)
165	if modes - set("arwb+tU") or len(mode) > len(modes):
166	raise ValueError("invalid mode: %r" % mode)
167	reading = "r" in modes
168	writing = "w" in modes
169	appending = "a" in modes
170	updating = "+" in modes
171	text = "t" in modes
172	binary = "b" in modes
173	if "U" in modes:
174	if writing or appending:
175	raise ValueError("can't use U and writing mode at once")
176	reading = True
177	if text and binary:
178	raise ValueError("can't have text and binary mode at once")
179	if reading + writing + appending > 1:
180	raise ValueError("can't have read/write/append mode at once")
181	if not (reading or writing or appending):
182	raise ValueError("must have exactly one of read/write/append mode")
183	if binary and encoding is not None:
184	raise ValueError("binary mode doesn't take an encoding argument")
185	if binary and errors is not None:
186	raise ValueError("binary mode doesn't take an errors argument")
187	if binary and newline is not None:
188	raise ValueError("binary mode doesn't take a newline argument")
189	raw = FileIO(file,
190	(reading and "r" or "") +
191	(writing and "w" or "") +
192	(appending and "a" or "") +
193	(updating and "+" or ""),
194	closefd)
195	line_buffering = False
196	if buffering == 1 or buffering < 0 and raw.isatty():
197	buffering = -1
198	line_buffering = True
199	if buffering < 0:
200	buffering = DEFAULT_BUFFER_SIZE
201	try:
202	bs = os.fstat(raw.fileno()).st_blksize
203	except (os.error, AttributeError):
204	pass
205	else:
206	if bs > 1:
207	buffering = bs
208	if buffering < 0:
209	raise ValueError("invalid buffering size")
210	if buffering == 0:
211	if binary:
212	return raw
213	raise ValueError("can't have unbuffered text I/O")
214	if updating:
215	buffer = BufferedRandom(raw, buffering)
216	elif writing or appending:
217	buffer = BufferedWriter(raw, buffering)
218	elif reading:
219	buffer = BufferedReader(raw, buffering)
220	else:
221	raise ValueError("unknown mode: %r" % mode)
222	if binary:
223	return buffer
224	text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
225	text.mode = mode
226	return text
227
228
229	class DocDescriptor:
230	"""Helper for builtins.open.__doc__
231	"""
232	def __get__(self, obj, typ):
233	return (
234	"open(file, mode='r', buffering=-1, encoding=None, "
235	"errors=None, newline=None, closefd=True)\n\n" +
236	open.__doc__)
237
238	class OpenWrapper:
239	"""Wrapper for builtins.open
240
241	Trick so that open won't become a bound method when stored
242	as a class variable (as dbm.dumb does).
243
244	See initstdio() in Python/pythonrun.c.
245	"""
246	__doc__ = DocDescriptor()
247
248	def __new__(cls, args, *kwargs):
249	return open(args, *kwargs)
250
251
252	class UnsupportedOperation(ValueError, IOError):
253	pass
254
255
256	class IOBase:
257	__metaclass__ = abc.ABCMeta
258
259	"""The abstract base class for all I/O classes, acting on streams of
260	bytes. There is no public constructor.
261
262	This class provides dummy implementations for many methods that
263	derived classes can override selectively; the default implementations
264	represent a file that cannot be read, written or seeked.
265
266	Even though IOBase does not declare read, readinto, or write because
267	their signatures will vary, implementations and clients should
268	consider those methods part of the interface. Also, implementations
269	may raise a IOError when operations they do not support are called.
270
271	The basic type used for binary data read from or written to a file is
272	bytes. bytearrays are accepted too, and in some cases (such as
273	readinto) needed. Text I/O classes work with str data.
274
275	Note that calling any method (even inquiries) on a closed stream is
276	undefined. Implementations may raise IOError in this case.
277
278	IOBase (and its subclasses) support the iterator protocol, meaning
279	that an IOBase object can be iterated over yielding the lines in a
280	stream.
281
282	IOBase also supports the :keyword:`with` statement. In this example,
283	fp is closed after the suite of the with statement is complete:
284
285	with open('spam.txt', 'r') as fp:
286	fp.write('Spam and eggs!')
287	"""
288
289	### Internal ###
290
291	def _unsupported(self, name):
292	"""Internal: raise an exception for unsupported operations."""
293	raise UnsupportedOperation("%s.%s() not supported" %
294	(self.__class__.__name__, name))
295
296	### Positioning ###
297
298	def seek(self, pos, whence=0):
299	"""Change stream position.
300
301	Change the stream position to byte offset pos. Argument pos is
302	interpreted relative to the position indicated by whence. Values
303	for whence are:
304
305	* 0 -- start of stream (the default); offset should be zero or positive
306	* 1 -- current stream position; offset may be negative
307	* 2 -- end of stream; offset is usually negative
308
309	Return the new absolute position.
310	"""
311	self._unsupported("seek")
312
313	def tell(self):
314	"""Return current stream position."""
315	return self.seek(0, 1)
316
317	def truncate(self, pos=None):
318	"""Truncate file to size bytes.
319
320	Size defaults to the current IO position as reported by tell(). Return
321	the new size.
322	"""
323	self._unsupported("truncate")
324
325	### Flush and close ###
326
327	def flush(self):
328	"""Flush write buffers, if applicable.
329
330	This is not implemented for read-only and non-blocking streams.
331	"""
332	self._checkClosed()
333	# XXX Should this return the number of bytes written???
334
335	__closed = False
336
337	def close(self):
338	"""Flush and close the IO object.
339
340	This method has no effect if the file is already closed.
341	"""
342	if not self.__closed:
343	try:
344	self.flush()
345	finally:
346	self.__closed = True
347
348	def __del__(self):
349	"""Destructor. Calls close()."""
350	# The try/except block is in case this is called at program
351	# exit time, when it's possible that globals have already been
352	# deleted, and then the close() call might fail. Since
353	# there's nothing we can do about such failures and they annoy
354	# the end users, we suppress the traceback.
355	try:
356	self.close()
357	except:
358	pass
359
360	### Inquiries ###
361
362	def seekable(self):
363	"""Return whether object supports random access.
364
365	If False, seek(), tell() and truncate() will raise IOError.
366	This method may need to do a test seek().
367	"""
368	return False
369
370	def _checkSeekable(self, msg=None):
371	"""Internal: raise an IOError if file is not seekable
372	"""
373	if not self.seekable():
374	raise IOError("File or stream is not seekable."
375	if msg is None else msg)
376
377
378	def readable(self):
379	"""Return whether object was opened for reading.
380
381	If False, read() will raise IOError.
382	"""
383	return False
384
385	def _checkReadable(self, msg=None):
386	"""Internal: raise an IOError if file is not readable
387	"""
388	if not self.readable():
389	raise IOError("File or stream is not readable."
390	if msg is None else msg)
391
392	def writable(self):
393	"""Return whether object was opened for writing.
394
395	If False, write() and truncate() will raise IOError.
396	"""
397	return False
398
399	def _checkWritable(self, msg=None):
400	"""Internal: raise an IOError if file is not writable
401	"""
402	if not self.writable():
403	raise IOError("File or stream is not writable."
404	if msg is None else msg)
405
406	@property
407	def closed(self):
408	"""closed: bool. True iff the file has been closed.
409
410	For backwards compatibility, this is a property, not a predicate.
411	"""
412	return self.__closed
413
414	def _checkClosed(self, msg=None):
415	"""Internal: raise an ValueError if file is closed
416	"""
417	if self.closed:
418	raise ValueError("I/O operation on closed file."
419	if msg is None else msg)
420
421	### Context manager ###
422
423	def __enter__(self):
424	"""Context management protocol. Returns self."""
425	self._checkClosed()
426	return self
427
428	def __exit__(self, *args):
429	"""Context management protocol. Calls close()"""
430	self.close()
431
432	### Lower-level APIs ###
433
434	# XXX Should these be present even if unimplemented?
435
436	def fileno(self):
437	"""Returns underlying file descriptor if one exists.
438
439	An IOError is raised if the IO object does not use a file descriptor.
440	"""
441	self._unsupported("fileno")
442
443	def isatty(self):
444	"""Return whether this is an 'interactive' stream.
445
446	Return False if it can't be determined.
447	"""
448	self._checkClosed()
449	return False
450
451	### Readline[s] and writelines ###
452
453	def readline(self, limit=-1):
454	r"""Read and return a line from the stream.
455
456	If limit is specified, at most limit bytes will be read.
457
458	The line terminator is always b'\n' for binary files; for text
459	files, the newlines argument to open can be used to select the line
460	terminator(s) recognized.
461	"""
462	# For backwards compatibility, a (slowish) readline().
463	if hasattr(self, "peek"):
464	def nreadahead():
465	readahead = self.peek(1)
466	if not readahead:
467	return 1
468	n = (readahead.find(b"\n") + 1) or len(readahead)
469	if limit >= 0:
470	n = min(n, limit)
471	return n
472	else:
473	def nreadahead():
474	return 1
475	if limit is None:
476	limit = -1
477	elif not isinstance(limit, (int, long)):
478	raise TypeError("limit must be an integer")
479	res = bytearray()
480	while limit < 0 or len(res) < limit:
481	b = self.read(nreadahead())
482	if not b:
483	break
484	res += b
485	if res.endswith(b"\n"):
486	break
487	return bytes(res)
488
489	def __iter__(self):
490	self._checkClosed()
491	return self
492
493	def next(self):
494	line = self.readline()
495	if not line:
496	raise StopIteration
497	return line
498
499	def readlines(self, hint=None):
500	"""Return a list of lines from the stream.
501
502	hint can be specified to control the number of lines read: no more
503	lines will be read if the total size (in bytes/characters) of all
504	lines so far exceeds hint.
505	"""
506	if hint is not None and not isinstance(hint, (int, long)):
507	raise TypeError("integer or None expected")
508	if hint is None or hint <= 0:
509	return list(self)
510	n = 0
511	lines = []
512	for line in self:
513	lines.append(line)
514	n += len(line)
515	if n >= hint:
516	break
517	return lines
518
519	def writelines(self, lines):
520	self._checkClosed()
521	for line in lines:
522	self.write(line)
523
524	io.IOBase.register(IOBase)
525
526
527	class RawIOBase(IOBase):
528
529	"""Base class for raw binary I/O."""
530
531	# The read() method is implemented by calling readinto(); derived
532	# classes that want to support read() only need to implement
533	# readinto() as a primitive operation. In general, readinto() can be
534	# more efficient than read().
535
536	# (It would be tempting to also provide an implementation of
537	# readinto() in terms of read(), in case the latter is a more suitable
538	# primitive operation, but that would lead to nasty recursion in case
539	# a subclass doesn't implement either.)
540
541	def read(self, n=-1):
542	"""Read and return up to n bytes.
543
544	Returns an empty bytes object on EOF, or None if the object is
545	set not to block and has no data to read.
546	"""
547	if n is None:
548	n = -1
549	if n < 0:
550	return self.readall()
551	b = bytearray(n.__index__())
552	n = self.readinto(b)
553	if n is None:
554	return None
555	del b[n:]
556	return bytes(b)
557
558	def readall(self):
559	"""Read until EOF, using multiple read() call."""
560	res = bytearray()
561	while True:
562	data = self.read(DEFAULT_BUFFER_SIZE)
563	if not data:
564	break
565	res += data
566	if res:
567	return bytes(res)
568	else:
569	# b'' or None
570	return data
571
572	def readinto(self, b):
573	"""Read up to len(b) bytes into b.
574
575	Returns number of bytes read (0 for EOF), or None if the object
576	is set not to block and has no data to read.
577	"""
578	self._unsupported("readinto")
579
580	def write(self, b):
581	"""Write the given buffer to the IO stream.
582
583	Returns the number of bytes written, which may be less than len(b).
584	"""
585	self._unsupported("write")
586
587	io.RawIOBase.register(RawIOBase)
588	from _io import FileIO
589	RawIOBase.register(FileIO)
590
591
592	class BufferedIOBase(IOBase):
593
594	"""Base class for buffered IO objects.
595
596	The main difference with RawIOBase is that the read() method
597	supports omitting the size argument, and does not have a default
598	implementation that defers to readinto().
599
600	In addition, read(), readinto() and write() may raise
601	BlockingIOError if the underlying raw stream is in non-blocking
602	mode and not ready; unlike their raw counterparts, they will never
603	return None.
604
605	A typical implementation should not inherit from a RawIOBase
606	implementation, but wrap one.
607	"""
608
609	def read(self, n=None):
610	"""Read and return up to n bytes.
611
612	If the argument is omitted, None, or negative, reads and
613	returns all data until EOF.
614
615	If the argument is positive, and the underlying raw stream is
616	not 'interactive', multiple raw reads may be issued to satisfy
617	the byte count (unless EOF is reached first). But for
618	interactive raw streams (XXX and for pipes?), at most one raw
619	read will be issued, and a short result does not imply that
620	EOF is imminent.
621
622	Returns an empty bytes array on EOF.
623
624	Raises BlockingIOError if the underlying raw stream has no
625	data at the moment.
626	"""
627	self._unsupported("read")
628
629	def read1(self, n=None):
630	"""Read up to n bytes with at most one read() system call."""
631	self._unsupported("read1")
632
633	def readinto(self, b):
634	"""Read up to len(b) bytes into b.
635
636	Like read(), this may issue multiple reads to the underlying raw
637	stream, unless the latter is 'interactive'.
638
639	Returns the number of bytes read (0 for EOF).
640
641	Raises BlockingIOError if the underlying raw stream has no
642	data at the moment.
643	"""
644	# XXX This ought to work with anything that supports the buffer API
645	data = self.read(len(b))
646	n = len(data)
647	try:
648	b[:n] = data
649	except TypeError as err:
650	import array
651	if not isinstance(b, array.array):
652	raise err
653	b[:n] = array.array(b'b', data)
654	return n
655
656	def write(self, b):
657	"""Write the given buffer to the IO stream.
658
659	Return the number of bytes written, which is never less than
660	len(b).
661
662	Raises BlockingIOError if the buffer is full and the
663	underlying raw stream cannot accept more data at the moment.
664	"""
665	self._unsupported("write")
666
667	def detach(self):
668	"""
669	Separate the underlying raw stream from the buffer and return it.
670
671	After the raw stream has been detached, the buffer is in an unusable
672	state.
673	"""
674	self._unsupported("detach")
675
676	io.BufferedIOBase.register(BufferedIOBase)
677
678
679	class _BufferedIOMixin(BufferedIOBase):
680
681	"""A mixin implementation of BufferedIOBase with an underlying raw stream.
682
683	This passes most requests on to the underlying raw stream. It
684	does not provide implementations of read(), readinto() or
685	write().
686	"""
687
688	def __init__(self, raw):
689	self._raw = raw
690
691	### Positioning ###
692
693	def seek(self, pos, whence=0):
694	new_position = self.raw.seek(pos, whence)
695	if new_position < 0:
696	raise IOError("seek() returned an invalid position")
697	return new_position
698
699	def tell(self):
700	pos = self.raw.tell()
701	if pos < 0:
702	raise IOError("tell() returned an invalid position")
703	return pos
704
705	def truncate(self, pos=None):
706	# Flush the stream. We're mixing buffered I/O with lower-level I/O,
707	# and a flush may be necessary to synch both views of the current
708	# file state.
709	self.flush()
710
711	if pos is None:
712	pos = self.tell()
713	# XXX: Should seek() be used, instead of passing the position
714	# XXX directly to truncate?
715	return self.raw.truncate(pos)
716
717	### Flush and close ###
718
719	def flush(self):
720	if self.closed:
721	raise ValueError("flush of closed file")
722	self.raw.flush()
723
724	def close(self):
725	if self.raw is not None and not self.closed:
726	try:
727	# may raise BlockingIOError or BrokenPipeError etc
728	self.flush()
729	finally:
730	self.raw.close()
731
732	def detach(self):
733	if self.raw is None:
734	raise ValueError("raw stream already detached")
735	self.flush()
736	raw = self._raw
737	self._raw = None
738	return raw
739
740	### Inquiries ###
741
742	def seekable(self):
743	return self.raw.seekable()
744
745	def readable(self):
746	return self.raw.readable()
747
748	def writable(self):
749	return self.raw.writable()
750
751	@property
752	def raw(self):
753	return self._raw
754
755	@property
756	def closed(self):
757	return self.raw.closed
758
759	@property
760	def name(self):
761	return self.raw.name
762
763	@property
764	def mode(self):
765	return self.raw.mode
766
767	def __repr__(self):
768	clsname = self.__class__.__name__
769	try:
770	name = self.name
771	except AttributeError:
772	return "<_pyio.{0}>".format(clsname)
773	else:
774	return "<_pyio.{0} name={1!r}>".format(clsname, name)
775
776	### Lower-level APIs ###
777
778	def fileno(self):
779	return self.raw.fileno()
780
781	def isatty(self):
782	return self.raw.isatty()
783
784
785	class BytesIO(BufferedIOBase):
786
787	"""Buffered I/O implementation using an in-memory bytes buffer."""
788
789	def __init__(self, initial_bytes=None):
790	buf = bytearray()
791	if initial_bytes is not None:
792	buf.extend(initial_bytes)
793	self._buffer = buf
794	self._pos = 0
795
796	def __getstate__(self):
797	if self.closed:
798	raise ValueError("__getstate__ on closed file")
799	return self.__dict__.copy()
800
801	def getvalue(self):
802	"""Return the bytes value (contents) of the buffer
803	"""
804	if self.closed:
805	raise ValueError("getvalue on closed file")
806	return bytes(self._buffer)
807
808	def read(self, n=None):
809	if self.closed:
810	raise ValueError("read from closed file")
811	if n is None:
812	n = -1
813	if not isinstance(n, (int, long)):
814	raise TypeError("integer argument expected, got {0!r}".format(
815	type(n)))
816	if n < 0:
817	n = len(self._buffer)
818	if len(self._buffer) <= self._pos:
819	return b""
820	newpos = min(len(self._buffer), self._pos + n)
821	b = self._buffer[self._pos : newpos]
822	self._pos = newpos
823	return bytes(b)
824
825	def read1(self, n):
826	"""This is the same as read.
827	"""
828	return self.read(n)
829
830	def write(self, b):
831	if self.closed:
832	raise ValueError("write to closed file")
833	if isinstance(b, unicode):
834	raise TypeError("can't write unicode to binary stream")
835	n = len(b)
836	if n == 0:
837	return 0
838	pos = self._pos
839	if pos > len(self._buffer):
840	# Inserts null bytes between the current end of the file
841	# and the new write position.
842	padding = b'\x00' * (pos - len(self._buffer))
843	self._buffer += padding
844	self._buffer[pos:pos + n] = b
845	self._pos += n
846	return n
847
848	def seek(self, pos, whence=0):
849	if self.closed:
850	raise ValueError("seek on closed file")
851	try:
852	pos.__index__
853	except AttributeError:
854	raise TypeError("an integer is required")
855	if whence == 0:
856	if pos < 0:
857	raise ValueError("negative seek position %r" % (pos,))
858	self._pos = pos
859	elif whence == 1:
860	self._pos = max(0, self._pos + pos)
861	elif whence == 2:
862	self._pos = max(0, len(self._buffer) + pos)
863	else:
864	raise ValueError("invalid whence value")
865	return self._pos
866
867	def tell(self):
868	if self.closed:
869	raise ValueError("tell on closed file")
870	return self._pos
871
872	def truncate(self, pos=None):
873	if self.closed:
874	raise ValueError("truncate on closed file")
875	if pos is None:
876	pos = self._pos
877	else:
878	try:
879	pos.__index__
880	except AttributeError:
881	raise TypeError("an integer is required")
882	if pos < 0:
883	raise ValueError("negative truncate position %r" % (pos,))
884	del self._buffer[pos:]
885	return pos
886
887	def readable(self):
888	if self.closed:
889	raise ValueError("I/O operation on closed file.")
890	return True
891
892	def writable(self):
893	if self.closed:
894	raise ValueError("I/O operation on closed file.")
895	return True
896
897	def seekable(self):
898	if self.closed:
899	raise ValueError("I/O operation on closed file.")
900	return True
901
902
903	class BufferedReader(_BufferedIOMixin):
904
905	"""BufferedReader(raw[, buffer_size])
906
907	A buffer for a readable, sequential BaseRawIO object.
908
909	The constructor creates a BufferedReader for the given readable raw
910	stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
911	is used.
912	"""
913
914	def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
915	"""Create a new buffered reader using the given readable raw IO object.
916	"""
917	if not raw.readable():
918	raise IOError('"raw" argument must be readable.')
919
920	_BufferedIOMixin.__init__(self, raw)
921	if buffer_size <= 0:
922	raise ValueError("invalid buffer size")
923	self.buffer_size = buffer_size
924	self._reset_read_buf()
925	self._read_lock = Lock()
926
927	def _reset_read_buf(self):
928	self._read_buf = b""
929	self._read_pos = 0
930
931	def read(self, n=None):
932	"""Read n bytes.
933
934	Returns exactly n bytes of data unless the underlying raw IO
935	stream reaches EOF or if the call would block in non-blocking
936	mode. If n is negative, read until EOF or until read() would
937	block.
938	"""
939	if n is not None and n < -1:
940	raise ValueError("invalid number of bytes to read")
941	with self._read_lock:
942	return self._read_unlocked(n)
943
944	def _read_unlocked(self, n=None):
945	nodata_val = b""
946	empty_values = (b"", None)
947	buf = self._read_buf
948	pos = self._read_pos
949
950	# Special case for when the number of bytes to read is unspecified.
951	if n is None or n == -1:
952	self._reset_read_buf()
953	chunks = [buf[pos:]] # Strip the consumed bytes.
954	current_size = 0
955	while True:
956	# Read until EOF or until read() would block.
957	try:
958	chunk = self.raw.read()
959	except IOError as e:
960	if e.errno != EINTR:
961	raise
962	continue
963	if chunk in empty_values:
964	nodata_val = chunk
965	break
966	current_size += len(chunk)
967	chunks.append(chunk)
968	return b"".join(chunks) or nodata_val
969
970	# The number of bytes to read is specified, return at most n bytes.
971	avail = len(buf) - pos # Length of the available buffered data.
972	if n <= avail:
973	# Fast path: the data to read is fully buffered.
974	self._read_pos += n
975	return buf[pos:pos+n]
976	# Slow path: read from the stream until enough bytes are read,
977	# or until an EOF occurs or until read() would block.
978	chunks = [buf[pos:]]
979	wanted = max(self.buffer_size, n)
980	while avail < n:
981	try:
982	chunk = self.raw.read(wanted)
983	except IOError as e:
984	if e.errno != EINTR:
985	raise
986	continue
987	if chunk in empty_values:
988	nodata_val = chunk
989	break
990	avail += len(chunk)
991	chunks.append(chunk)
992	# n is more then avail only when an EOF occurred or when
993	# read() would have blocked.
994	n = min(n, avail)
995	out = b"".join(chunks)
996	self._read_buf = out[n:] # Save the extra data in the buffer.
997	self._read_pos = 0
998	return out[:n] if out else nodata_val
999
1000	def peek(self, n=0):
1001	"""Returns buffered bytes without advancing the position.
1002
1003	The argument indicates a desired minimal number of bytes; we
1004	do at most one raw read to satisfy it. We never return more
1005	than self.buffer_size.
1006	"""
1007	with self._read_lock:
1008	return self._peek_unlocked(n)
1009
1010	def _peek_unlocked(self, n=0):
1011	want = min(n, self.buffer_size)
1012	have = len(self._read_buf) - self._read_pos
1013	if have < want or have <= 0:
1014	to_read = self.buffer_size - have
1015	while True:
1016	try:
1017	current = self.raw.read(to_read)
1018	except IOError as e:
1019	if e.errno != EINTR:
1020	raise
1021	continue
1022	break
1023	if current:
1024	self._read_buf = self._read_buf[self._read_pos:] + current
1025	self._read_pos = 0
1026	return self._read_buf[self._read_pos:]
1027
1028	def read1(self, n):
1029	"""Reads up to n bytes, with at most one read() system call."""
1030	# Returns up to n bytes. If at least one byte is buffered, we
1031	# only return buffered bytes. Otherwise, we do one raw read.
1032	if n < 0:
1033	raise ValueError("number of bytes to read must be positive")
1034	if n == 0:
1035	return b""
1036	with self._read_lock:
1037	self._peek_unlocked(1)
1038	return self._read_unlocked(
1039	min(n, len(self._read_buf) - self._read_pos))
1040
1041	def tell(self):
1042	return _BufferedIOMixin.tell(self) - len(self._read_buf) + self._read_pos
1043
1044	def seek(self, pos, whence=0):
1045	if not (0 <= whence <= 2):
1046	raise ValueError("invalid whence value")
1047	with self._read_lock:
1048	if whence == 1:
1049	pos -= len(self._read_buf) - self._read_pos
1050	pos = _BufferedIOMixin.seek(self, pos, whence)
1051	self._reset_read_buf()
1052	return pos
1053
1054	class BufferedWriter(_BufferedIOMixin):
1055
1056	"""A buffer for a writeable sequential RawIO object.
1057
1058	The constructor creates a BufferedWriter for the given writeable raw
1059	stream. If the buffer_size is not given, it defaults to
1060	DEFAULT_BUFFER_SIZE.
1061	"""
1062
1063	_warning_stack_offset = 2
1064
1065	def __init__(self, raw,
1066	buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1067	if not raw.writable():
1068	raise IOError('"raw" argument must be writable.')
1069
1070	_BufferedIOMixin.__init__(self, raw)
1071	if buffer_size <= 0:
1072	raise ValueError("invalid buffer size")
1073	if max_buffer_size is not None:
1074	warnings.warn("max_buffer_size is deprecated", DeprecationWarning,
1075	self._warning_stack_offset)
1076	self.buffer_size = buffer_size
1077	self._write_buf = bytearray()
1078	self._write_lock = Lock()
1079
1080	def write(self, b):
1081	if self.closed:
1082	raise ValueError("write to closed file")
1083	if isinstance(b, unicode):
1084	raise TypeError("can't write unicode to binary stream")
1085	with self._write_lock:
1086	# XXX we can implement some more tricks to try and avoid
1087	# partial writes
1088	if len(self._write_buf) > self.buffer_size:
1089	# We're full, so let's pre-flush the buffer. (This may
1090	# raise BlockingIOError with characters_written == 0.)
1091	self._flush_unlocked()
1092	before = len(self._write_buf)
1093	self._write_buf.extend(b)
1094	written = len(self._write_buf) - before
1095	if len(self._write_buf) > self.buffer_size:
1096	try:
1097	self._flush_unlocked()
1098	except BlockingIOError as e:
1099	if len(self._write_buf) > self.buffer_size:
1100	# We've hit the buffer_size. We have to accept a partial
1101	# write and cut back our buffer.
1102	overage = len(self._write_buf) - self.buffer_size
1103	written -= overage
1104	self._write_buf = self._write_buf[:self.buffer_size]
1105	raise BlockingIOError(e.errno, e.strerror, written)
1106	return written
1107
1108	def truncate(self, pos=None):
1109	with self._write_lock:
1110	self._flush_unlocked()
1111	if pos is None:
1112	pos = self.raw.tell()
1113	return self.raw.truncate(pos)
1114
1115	def flush(self):
1116	with self._write_lock:
1117	self._flush_unlocked()
1118
1119	def _flush_unlocked(self):
1120	if self.closed:
1121	raise ValueError("flush of closed file")
1122	while self._write_buf:
1123	try:
1124	n = self.raw.write(self._write_buf)
1125	except BlockingIOError:
1126	raise RuntimeError("self.raw should implement RawIOBase: it "
1127	"should not raise BlockingIOError")
1128	except IOError as e:
1129	if e.errno != EINTR:
1130	raise
1131	continue
1132	if n is None:
1133	raise BlockingIOError(
1134	errno.EAGAIN,
1135	"write could not complete without blocking", 0)
1136	if n > len(self._write_buf) or n < 0:
1137	raise IOError("write() returned incorrect number of bytes")
1138	del self._write_buf[:n]
1139
1140	def tell(self):
1141	return _BufferedIOMixin.tell(self) + len(self._write_buf)
1142
1143	def seek(self, pos, whence=0):
1144	if not (0 <= whence <= 2):
1145	raise ValueError("invalid whence")
1146	with self._write_lock:
1147	self._flush_unlocked()
1148	return _BufferedIOMixin.seek(self, pos, whence)
1149
1150
1151	class BufferedRWPair(BufferedIOBase):
1152
1153	"""A buffered reader and writer object together.
1154
1155	A buffered reader object and buffered writer object put together to
1156	form a sequential IO object that can read and write. This is typically
1157	used with a socket or two-way pipe.
1158
1159	reader and writer are RawIOBase objects that are readable and
1160	writeable respectively. If the buffer_size is omitted it defaults to
1161	DEFAULT_BUFFER_SIZE.
1162	"""
1163
1164	# XXX The usefulness of this (compared to having two separate IO
1165	# objects) is questionable.
1166
1167	def __init__(self, reader, writer,
1168	buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1169	"""Constructor.
1170
1171	The arguments are two RawIO instances.
1172	"""
1173	if max_buffer_size is not None:
1174	warnings.warn("max_buffer_size is deprecated", DeprecationWarning, 2)
1175
1176	if not reader.readable():
1177	raise IOError('"reader" argument must be readable.')
1178
1179	if not writer.writable():
1180	raise IOError('"writer" argument must be writable.')
1181
1182	self.reader = BufferedReader(reader, buffer_size)
1183	self.writer = BufferedWriter(writer, buffer_size)
1184
1185	def read(self, n=None):
1186	if n is None:
1187	n = -1
1188	return self.reader.read(n)
1189
1190	def readinto(self, b):
1191	return self.reader.readinto(b)
1192
1193	def write(self, b):
1194	return self.writer.write(b)
1195
1196	def peek(self, n=0):
1197	return self.reader.peek(n)
1198
1199	def read1(self, n):
1200	return self.reader.read1(n)
1201
1202	def readable(self):
1203	return self.reader.readable()
1204
1205	def writable(self):
1206	return self.writer.writable()
1207
1208	def flush(self):
1209	return self.writer.flush()
1210
1211	def close(self):
1212	self.writer.close()
1213	self.reader.close()
1214
1215	def isatty(self):
1216	return self.reader.isatty() or self.writer.isatty()
1217
1218	@property
1219	def closed(self):
1220	return self.writer.closed
1221
1222
1223	class BufferedRandom(BufferedWriter, BufferedReader):
1224
1225	"""A buffered interface to random access streams.
1226
1227	The constructor creates a reader and writer for a seekable stream,
1228	raw, given in the first argument. If the buffer_size is omitted it
1229	defaults to DEFAULT_BUFFER_SIZE.
1230	"""
1231
1232	_warning_stack_offset = 3
1233
1234	def __init__(self, raw,
1235	buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1236	raw._checkSeekable()
1237	BufferedReader.__init__(self, raw, buffer_size)
1238	BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1239
1240	def seek(self, pos, whence=0):
1241	if not (0 <= whence <= 2):
1242	raise ValueError("invalid whence")
1243	self.flush()
1244	if self._read_buf:
1245	# Undo read ahead.
1246	with self._read_lock:
1247	self.raw.seek(self._read_pos - len(self._read_buf), 1)
1248	# First do the raw seek, then empty the read buffer, so that
1249	# if the raw seek fails, we don't lose buffered data forever.
1250	pos = self.raw.seek(pos, whence)
1251	with self._read_lock:
1252	self._reset_read_buf()
1253	if pos < 0:
1254	raise IOError("seek() returned invalid position")
1255	return pos
1256
1257	def tell(self):
1258	if self._write_buf:
1259	return BufferedWriter.tell(self)
1260	else:
1261	return BufferedReader.tell(self)
1262
1263	def truncate(self, pos=None):
1264	if pos is None:
1265	pos = self.tell()
1266	# Use seek to flush the read buffer.
1267	return BufferedWriter.truncate(self, pos)
1268
1269	def read(self, n=None):
1270	if n is None:
1271	n = -1
1272	self.flush()
1273	return BufferedReader.read(self, n)
1274
1275	def readinto(self, b):
1276	self.flush()
1277	return BufferedReader.readinto(self, b)
1278
1279	def peek(self, n=0):
1280	self.flush()
1281	return BufferedReader.peek(self, n)
1282
1283	def read1(self, n):
1284	self.flush()
1285	return BufferedReader.read1(self, n)
1286
1287	def write(self, b):
1288	if self._read_buf:
1289	# Undo readahead
1290	with self._read_lock:
1291	self.raw.seek(self._read_pos - len(self._read_buf), 1)
1292	self._reset_read_buf()
1293	return BufferedWriter.write(self, b)
1294
1295
1296	class TextIOBase(IOBase):
1297
1298	"""Base class for text I/O.
1299
1300	This class provides a character and line based interface to stream
1301	I/O. There is no readinto method because Python's character strings
1302	are immutable. There is no public constructor.
1303	"""
1304
1305	def read(self, n=-1):
1306	"""Read at most n characters from stream.
1307
1308	Read from underlying buffer until we have n characters or we hit EOF.
1309	If n is negative or omitted, read until EOF.
1310	"""
1311	self._unsupported("read")
1312
1313	def write(self, s):
1314	"""Write string s to stream."""
1315	self._unsupported("write")
1316
1317	def truncate(self, pos=None):
1318	"""Truncate size to pos."""
1319	self._unsupported("truncate")
1320
1321	def readline(self):
1322	"""Read until newline or EOF.
1323
1324	Returns an empty string if EOF is hit immediately.
1325	"""
1326	self._unsupported("readline")
1327
1328	def detach(self):
1329	"""
1330	Separate the underlying buffer from the TextIOBase and return it.
1331
1332	After the underlying buffer has been detached, the TextIO is in an
1333	unusable state.
1334	"""
1335	self._unsupported("detach")
1336
1337	@property
1338	def encoding(self):
1339	"""Subclasses should override."""
1340	return None
1341
1342	@property
1343	def newlines(self):
1344	"""Line endings translated so far.
1345
1346	Only line endings translated during reading are considered.
1347
1348	Subclasses should override.
1349	"""
1350	return None
1351
1352	@property
1353	def errors(self):
1354	"""Error setting of the decoder or encoder.
1355
1356	Subclasses should override."""
1357	return None
1358
1359	io.TextIOBase.register(TextIOBase)
1360
1361
1362	class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1363	r"""Codec used when reading a file in universal newlines mode. It wraps
1364	another incremental decoder, translating \r\n and \r into \n. It also
1365	records the types of newlines encountered. When used with
1366	translate=False, it ensures that the newline sequence is returned in
1367	one piece.
1368	"""
1369	def __init__(self, decoder, translate, errors='strict'):
1370	codecs.IncrementalDecoder.__init__(self, errors=errors)
1371	self.translate = translate
1372	self.decoder = decoder
1373	self.seennl = 0
1374	self.pendingcr = False
1375
1376	def decode(self, input, final=False):
1377	# decode input (with the eventual \r from a previous pass)
1378	if self.decoder is None:
1379	output = input
1380	else:
1381	output = self.decoder.decode(input, final=final)
1382	if self.pendingcr and (output or final):
1383	output = "\r" + output
1384	self.pendingcr = False
1385
1386	# retain last \r even when not translating data:
1387	# then readline() is sure to get \r\n in one pass
1388	if output.endswith("\r") and not final:
1389	output = output[:-1]
1390	self.pendingcr = True
1391
1392	# Record which newlines are read
1393	crlf = output.count('\r\n')
1394	cr = output.count('\r') - crlf
1395	lf = output.count('\n') - crlf
1396	self.seennl \|= (lf and self._LF) \| (cr and self._CR) \
1397	\| (crlf and self._CRLF)
1398
1399	if self.translate:
1400	if crlf:
1401	output = output.replace("\r\n", "\n")
1402	if cr:
1403	output = output.replace("\r", "\n")
1404
1405	return output
1406
1407	def getstate(self):
1408	if self.decoder is None:
1409	buf = b""
1410	flag = 0
1411	else:
1412	buf, flag = self.decoder.getstate()
1413	flag <<= 1
1414	if self.pendingcr:
1415	flag \|= 1
1416	return buf, flag
1417
1418	def setstate(self, state):
1419	buf, flag = state
1420	self.pendingcr = bool(flag & 1)
1421	if self.decoder is not None:
1422	self.decoder.setstate((buf, flag >> 1))
1423
1424	def reset(self):
1425	self.seennl = 0
1426	self.pendingcr = False
1427	if self.decoder is not None:
1428	self.decoder.reset()
1429
1430	_LF = 1
1431	_CR = 2
1432	_CRLF = 4
1433
1434	@property
1435	def newlines(self):
1436	return (None,
1437	"\n",
1438	"\r",
1439	("\r", "\n"),
1440	"\r\n",
1441	("\n", "\r\n"),
1442	("\r", "\r\n"),
1443	("\r", "\n", "\r\n")
1444	)[self.seennl]
1445
1446
1447	class TextIOWrapper(TextIOBase):
1448
1449	r"""Character and line based layer over a BufferedIOBase object, buffer.
1450
1451	encoding gives the name of the encoding that the stream will be
1452	decoded or encoded with. It defaults to locale.getpreferredencoding.
1453
1454	errors determines the strictness of encoding and decoding (see the
1455	codecs.register) and defaults to "strict".
1456
1457	newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1458	handling of line endings. If it is None, universal newlines is
1459	enabled. With this enabled, on input, the lines endings '\n', '\r',
1460	or '\r\n' are translated to '\n' before being returned to the
1461	caller. Conversely, on output, '\n' is translated to the system
1462	default line separator, os.linesep. If newline is any other of its
1463	legal values, that newline becomes the newline when the file is read
1464	and it is returned untranslated. On output, '\n' is converted to the
1465	newline.
1466
1467	If line_buffering is True, a call to flush is implied when a call to
1468	write contains a newline character.
1469	"""
1470
1471	_CHUNK_SIZE = 2048
1472
1473	def __init__(self, buffer, encoding=None, errors=None, newline=None,
1474	line_buffering=False):
1475	if newline is not None and not isinstance(newline, basestring):
1476	raise TypeError("illegal newline type: %r" % (type(newline),))
1477	if newline not in (None, "", "\n", "\r", "\r\n"):
1478	raise ValueError("illegal newline value: %r" % (newline,))
1479	if encoding is None:
1480	try:
1481	import locale
1482	except ImportError:
1483	# Importing locale may fail if Python is being built
1484	encoding = "ascii"
1485	else:
1486	encoding = locale.getpreferredencoding()
1487
1488	if not isinstance(encoding, basestring):
1489	raise ValueError("invalid encoding: %r" % encoding)
1490
1491	if errors is None:
1492	errors = "strict"
1493	else:
1494	if not isinstance(errors, basestring):
1495	raise ValueError("invalid errors: %r" % errors)
1496
1497	self._buffer = buffer
1498	self._line_buffering = line_buffering
1499	self._encoding = encoding
1500	self._errors = errors
1501	self._readuniversal = not newline
1502	self._readtranslate = newline is None
1503	self._readnl = newline
1504	self._writetranslate = newline != ''
1505	self._writenl = newline or os.linesep
1506	self._encoder = None
1507	self._decoder = None
1508	self._decoded_chars = '' # buffer for text returned from decoder
1509	self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1510	self._snapshot = None # info for reconstructing decoder state
1511	self._seekable = self._telling = self.buffer.seekable()
1512
1513	if self._seekable and self.writable():
1514	position = self.buffer.tell()
1515	if position != 0:
1516	try:
1517	self._get_encoder().setstate(0)
1518	except LookupError:
1519	# Sometimes the encoder doesn't exist
1520	pass
1521
1522	# self._snapshot is either None, or a tuple (dec_flags, next_input)
1523	# where dec_flags is the second (integer) item of the decoder state
1524	# and next_input is the chunk of input bytes that comes next after the
1525	# snapshot point. We use this to reconstruct decoder states in tell().
1526
1527	# Naming convention:
1528	# - "bytes_..." for integer variables that count input bytes
1529	# - "chars_..." for integer variables that count decoded characters
1530
1531	def __repr__(self):
1532	try:
1533	name = self.name
1534	except AttributeError:
1535	return "<_pyio.TextIOWrapper encoding='{0}'>".format(self.encoding)
1536	else:
1537	return "<_pyio.TextIOWrapper name={0!r} encoding='{1}'>".format(
1538	name, self.encoding)
1539
1540	@property
1541	def encoding(self):
1542	return self._encoding
1543
1544	@property
1545	def errors(self):
1546	return self._errors
1547
1548	@property
1549	def line_buffering(self):
1550	return self._line_buffering
1551
1552	@property
1553	def buffer(self):
1554	return self._buffer
1555
1556	def seekable(self):
1557	if self.closed:
1558	raise ValueError("I/O operation on closed file.")
1559	return self._seekable
1560
1561	def readable(self):
1562	return self.buffer.readable()
1563
1564	def writable(self):
1565	return self.buffer.writable()
1566
1567	def flush(self):
1568	self.buffer.flush()
1569	self._telling = self._seekable
1570
1571	def close(self):
1572	if self.buffer is not None and not self.closed:
1573	try:
1574	self.flush()
1575	finally:
1576	self.buffer.close()
1577
1578	@property
1579	def closed(self):
1580	return self.buffer.closed
1581
1582	@property
1583	def name(self):
1584	return self.buffer.name
1585
1586	def fileno(self):
1587	return self.buffer.fileno()
1588
1589	def isatty(self):
1590	return self.buffer.isatty()
1591
1592	def write(self, s):
1593	if self.closed:
1594	raise ValueError("write to closed file")
1595	if not isinstance(s, unicode):
1596	raise TypeError("can't write %s to text stream" %
1597	s.__class__.__name__)
1598	length = len(s)
1599	haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1600	if haslf and self._writetranslate and self._writenl != "\n":
1601	s = s.replace("\n", self._writenl)
1602	encoder = self._encoder or self._get_encoder()
1603	# XXX What if we were just reading?
1604	b = encoder.encode(s)
1605	self.buffer.write(b)
1606	if self._line_buffering and (haslf or "\r" in s):
1607	self.flush()
1608	self._snapshot = None
1609	if self._decoder:
1610	self._decoder.reset()
1611	return length
1612
1613	def _get_encoder(self):
1614	make_encoder = codecs.getincrementalencoder(self._encoding)
1615	self._encoder = make_encoder(self._errors)
1616	return self._encoder
1617
1618	def _get_decoder(self):
1619	make_decoder = codecs.getincrementaldecoder(self._encoding)
1620	decoder = make_decoder(self._errors)
1621	if self._readuniversal:
1622	decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1623	self._decoder = decoder
1624	return decoder
1625
1626	# The following three methods implement an ADT for _decoded_chars.
1627	# Text returned from the decoder is buffered here until the client
1628	# requests it by calling our read() or readline() method.
1629	def _set_decoded_chars(self, chars):
1630	"""Set the _decoded_chars buffer."""
1631	self._decoded_chars = chars
1632	self._decoded_chars_used = 0
1633
1634	def _get_decoded_chars(self, n=None):
1635	"""Advance into the _decoded_chars buffer."""
1636	offset = self._decoded_chars_used
1637	if n is None:
1638	chars = self._decoded_chars[offset:]
1639	else:
1640	chars = self._decoded_chars[offset:offset + n]
1641	self._decoded_chars_used += len(chars)
1642	return chars
1643
1644	def _rewind_decoded_chars(self, n):
1645	"""Rewind the _decoded_chars buffer."""
1646	if self._decoded_chars_used < n:
1647	raise AssertionError("rewind decoded_chars out of bounds")
1648	self._decoded_chars_used -= n
1649
1650	def _read_chunk(self):
1651	"""
1652	Read and decode the next chunk of data from the BufferedReader.
1653	"""
1654
1655	# The return value is True unless EOF was reached. The decoded
1656	# string is placed in self._decoded_chars (replacing its previous
1657	# value). The entire input chunk is sent to the decoder, though
1658	# some of it may remain buffered in the decoder, yet to be
1659	# converted.
1660
1661	if self._decoder is None:
1662	raise ValueError("no decoder")
1663
1664	if self._telling:
1665	# To prepare for tell(), we need to snapshot a point in the
1666	# file where the decoder's input buffer is empty.
1667
1668	dec_buffer, dec_flags = self._decoder.getstate()
1669	# Given this, we know there was a valid snapshot point
1670	# len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1671
1672	# Read a chunk, decode it, and put the result in self._decoded_chars.
1673	input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1674	eof = not input_chunk
1675	self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1676
1677	if self._telling:
1678	# At the snapshot point, len(dec_buffer) bytes before the read,
1679	# the next input to be decoded is dec_buffer + input_chunk.
1680	self._snapshot = (dec_flags, dec_buffer + input_chunk)
1681
1682	return not eof
1683
1684	def _pack_cookie(self, position, dec_flags=0,
1685	bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1686	# The meaning of a tell() cookie is: seek to position, set the
1687	# decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1688	# into the decoder with need_eof as the EOF flag, then skip
1689	# chars_to_skip characters of the decoded result. For most simple
1690	# decoders, tell() will often just give a byte offset in the file.
1691	return (position \| (dec_flags<<64) \| (bytes_to_feed<<128) \|
1692	(chars_to_skip<<192) \| bool(need_eof)<<256)
1693
1694	def _unpack_cookie(self, bigint):
1695	rest, position = divmod(bigint, 1<<64)
1696	rest, dec_flags = divmod(rest, 1<<64)
1697	rest, bytes_to_feed = divmod(rest, 1<<64)
1698	need_eof, chars_to_skip = divmod(rest, 1<<64)
1699	return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1700
1701	def tell(self):
1702	if not self._seekable:
1703	raise IOError("underlying stream is not seekable")
1704	if not self._telling:
1705	raise IOError("telling position disabled by next() call")
1706	self.flush()
1707	position = self.buffer.tell()
1708	decoder = self._decoder
1709	if decoder is None or self._snapshot is None:
1710	if self._decoded_chars:
1711	# This should never happen.
1712	raise AssertionError("pending decoded text")
1713	return position
1714
1715	# Skip backward to the snapshot point (see _read_chunk).
1716	dec_flags, next_input = self._snapshot
1717	position -= len(next_input)
1718
1719	# How many decoded characters have been used up since the snapshot?
1720	chars_to_skip = self._decoded_chars_used
1721	if chars_to_skip == 0:
1722	# We haven't moved from the snapshot point.
1723	return self._pack_cookie(position, dec_flags)
1724
1725	# Starting from the snapshot position, we will walk the decoder
1726	# forward until it gives us enough decoded characters.
1727	saved_state = decoder.getstate()
1728	try:
1729	# Note our initial start point.
1730	decoder.setstate((b'', dec_flags))
1731	start_pos = position
1732	start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1733	need_eof = 0
1734
1735	# Feed the decoder one byte at a time. As we go, note the
1736	# nearest "safe start point" before the current location
1737	# (a point where the decoder has nothing buffered, so seek()
1738	# can safely start from there and advance to this location).
1739	for next_byte in next_input:
1740	bytes_fed += 1
1741	chars_decoded += len(decoder.decode(next_byte))
1742	dec_buffer, dec_flags = decoder.getstate()
1743	if not dec_buffer and chars_decoded <= chars_to_skip:
1744	# Decoder buffer is empty, so this is a safe start point.
1745	start_pos += bytes_fed
1746	chars_to_skip -= chars_decoded
1747	start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1748	if chars_decoded >= chars_to_skip:
1749	break
1750	else:
1751	# We didn't get enough decoded data; signal EOF to get more.
1752	chars_decoded += len(decoder.decode(b'', final=True))
1753	need_eof = 1
1754	if chars_decoded < chars_to_skip:
1755	raise IOError("can't reconstruct logical file position")
1756
1757	# The returned cookie corresponds to the last safe start point.
1758	return self._pack_cookie(
1759	start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1760	finally:
1761	decoder.setstate(saved_state)
1762
1763	def truncate(self, pos=None):
1764	self.flush()
1765	if pos is None:
1766	pos = self.tell()
1767	return self.buffer.truncate(pos)
1768
1769	def detach(self):
1770	if self.buffer is None:
1771	raise ValueError("buffer is already detached")
1772	self.flush()
1773	buffer = self._buffer
1774	self._buffer = None
1775	return buffer
1776
1777	def seek(self, cookie, whence=0):
1778	if self.closed:
1779	raise ValueError("tell on closed file")
1780	if not self._seekable:
1781	raise IOError("underlying stream is not seekable")
1782	if whence == 1: # seek relative to current position
1783	if cookie != 0:
1784	raise IOError("can't do nonzero cur-relative seeks")
1785	# Seeking to the current position should attempt to
1786	# sync the underlying buffer with the current position.
1787	whence = 0
1788	cookie = self.tell()
1789	if whence == 2: # seek relative to end of file
1790	if cookie != 0:
1791	raise IOError("can't do nonzero end-relative seeks")
1792	self.flush()
1793	position = self.buffer.seek(0, 2)
1794	self._set_decoded_chars('')
1795	self._snapshot = None
1796	if self._decoder:
1797	self._decoder.reset()
1798	return position
1799	if whence != 0:
1800	raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1801	(whence,))
1802	if cookie < 0:
1803	raise ValueError("negative seek position %r" % (cookie,))
1804	self.flush()
1805
1806	# The strategy of seek() is to go back to the safe start point
1807	# and replay the effect of read(chars_to_skip) from there.
1808	start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1809	self._unpack_cookie(cookie)
1810
1811	# Seek back to the safe start point.
1812	self.buffer.seek(start_pos)
1813	self._set_decoded_chars('')
1814	self._snapshot = None
1815
1816	# Restore the decoder to its state from the safe start point.
1817	if cookie == 0 and self._decoder:
1818	self._decoder.reset()
1819	elif self._decoder or dec_flags or chars_to_skip:
1820	self._decoder = self._decoder or self._get_decoder()
1821	self._decoder.setstate((b'', dec_flags))
1822	self._snapshot = (dec_flags, b'')
1823
1824	if chars_to_skip:
1825	# Just like _read_chunk, feed the decoder and save a snapshot.
1826	input_chunk = self.buffer.read(bytes_to_feed)
1827	self._set_decoded_chars(
1828	self._decoder.decode(input_chunk, need_eof))
1829	self._snapshot = (dec_flags, input_chunk)
1830
1831	# Skip chars_to_skip of the decoded characters.
1832	if len(self._decoded_chars) < chars_to_skip:
1833	raise IOError("can't restore logical file position")
1834	self._decoded_chars_used = chars_to_skip
1835
1836	# Finally, reset the encoder (merely useful for proper BOM handling)
1837	try:
1838	encoder = self._encoder or self._get_encoder()
1839	except LookupError:
1840	# Sometimes the encoder doesn't exist
1841	pass
1842	else:
1843	if cookie != 0:
1844	encoder.setstate(0)
1845	else:
1846	encoder.reset()
1847	return cookie
1848
1849	def read(self, n=None):
1850	self._checkReadable()
1851	if n is None:
1852	n = -1
1853	decoder = self._decoder or self._get_decoder()
1854	try:
1855	n.__index__
1856	except AttributeError:
1857	raise TypeError("an integer is required")
1858	if n < 0:
1859	# Read everything.
1860	result = (self._get_decoded_chars() +
1861	decoder.decode(self.buffer.read(), final=True))
1862	self._set_decoded_chars('')
1863	self._snapshot = None
1864	return result
1865	else:
1866	# Keep reading chunks until we have n characters to return.
1867	eof = False
1868	result = self._get_decoded_chars(n)
1869	while len(result) < n and not eof:
1870	eof = not self._read_chunk()
1871	result += self._get_decoded_chars(n - len(result))
1872	return result
1873
1874	def next(self):
1875	self._telling = False
1876	line = self.readline()
1877	if not line:
1878	self._snapshot = None
1879	self._telling = self._seekable
1880	raise StopIteration
1881	return line
1882
1883	def readline(self, limit=None):
1884	if self.closed:
1885	raise ValueError("read from closed file")
1886	if limit is None:
1887	limit = -1
1888	elif not isinstance(limit, (int, long)):
1889	raise TypeError("limit must be an integer")
1890
1891	# Grab all the decoded text (we will rewind any extra bits later).
1892	line = self._get_decoded_chars()
1893
1894	start = 0
1895	# Make the decoder if it doesn't already exist.
1896	if not self._decoder:
1897	self._get_decoder()
1898
1899	pos = endpos = None
1900	while True:
1901	if self._readtranslate:
1902	# Newlines are already translated, only search for \n
1903	pos = line.find('\n', start)
1904	if pos >= 0:
1905	endpos = pos + 1
1906	break
1907	else:
1908	start = len(line)
1909
1910	elif self._readuniversal:
1911	# Universal newline search. Find any of \r, \r\n, \n
1912	# The decoder ensures that \r\n are not split in two pieces
1913
1914	# In C we'd look for these in parallel of course.
1915	nlpos = line.find("\n", start)
1916	crpos = line.find("\r", start)
1917	if crpos == -1:
1918	if nlpos == -1:
1919	# Nothing found
1920	start = len(line)
1921	else:
1922	# Found \n
1923	endpos = nlpos + 1
1924	break
1925	elif nlpos == -1:
1926	# Found lone \r
1927	endpos = crpos + 1
1928	break
1929	elif nlpos < crpos:
1930	# Found \n
1931	endpos = nlpos + 1
1932	break
1933	elif nlpos == crpos + 1:
1934	# Found \r\n
1935	endpos = crpos + 2
1936	break
1937	else:
1938	# Found \r
1939	endpos = crpos + 1
1940	break
1941	else:
1942	# non-universal
1943	pos = line.find(self._readnl)
1944	if pos >= 0:
1945	endpos = pos + len(self._readnl)
1946	break
1947
1948	if limit >= 0 and len(line) >= limit:
1949	endpos = limit # reached length limit
1950	break
1951
1952	# No line ending seen yet - get more data'
1953	while self._read_chunk():
1954	if self._decoded_chars:
1955	break
1956	if self._decoded_chars:
1957	line += self._get_decoded_chars()
1958	else:
1959	# end of file
1960	self._set_decoded_chars('')
1961	self._snapshot = None
1962	return line
1963
1964	if limit >= 0 and endpos > limit:
1965	endpos = limit # don't exceed limit
1966
1967	# Rewind _decoded_chars to just after the line ending we found.
1968	self._rewind_decoded_chars(len(line) - endpos)
1969	return line[:endpos]
1970
1971	@property
1972	def newlines(self):
1973	return self._decoder.newlines if self._decoder else None
1974
1975
1976	class StringIO(TextIOWrapper):
1977	"""Text I/O implementation using an in-memory buffer.
1978
1979	The initial_value argument sets the value of object. The newline
1980	argument is like the one of TextIOWrapper's constructor.
1981	"""
1982
1983	def __init__(self, initial_value="", newline="\n"):
1984	super(StringIO, self).__init__(BytesIO(),
1985	encoding="utf-8",
1986	errors="strict",
1987	newline=newline)
1988	# Issue #5645: make universal newlines semantics the same as in the
1989	# C version, even under Windows.
1990	if newline is None:
1991	self._writetranslate = False
1992	if initial_value:
1993	if not isinstance(initial_value, unicode):
1994	initial_value = unicode(initial_value)
1995	self.write(initial_value)
1996	self.seek(0)
1997
1998	def getvalue(self):
1999	self.flush()
2000	return self.buffer.getvalue().decode(self._encoding, self._errors)
2001
2002	def __repr__(self):
2003	# TextIOWrapper tells the encoding in its repr. In StringIO,
2004	# that's a implementation detail.
2005	return object.__repr__(self)
2006
2007	@property
2008	def errors(self):
2009	return None
2010
2011	@property
2012	def encoding(self):
2013	return None
2014
2015	def detach(self):
2016	# This doesn't make sense on StringIO.
2017	self._unsupported("detach")

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/_pyio.py@ 603

Download in other formats: