Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

io.py@ 389

Last change on this file since 389 was 2, checked in by Yuri Dario, 15 years ago
Initial import for vendor code.
Property svn:eol-style set to `native`
File size: 63.6 KB

Line
1	"""
2	The io module provides the Python interfaces to stream handling. The
3	builtin open function is defined in this module.
4
5	At the top of the I/O hierarchy is the abstract base class IOBase. It
6	defines the basic interface to a stream. Note, however, that there is no
7	separation between reading and writing to streams; implementations are
8	allowed to throw an IOError if they do not support a given operation.
9
10	Extending IOBase is RawIOBase which deals simply with the reading and
11	writing of raw bytes to a stream. FileIO subclasses RawIOBase to provide
12	an interface to OS files.
13
14	BufferedIOBase deals with buffering on a raw byte stream (RawIOBase). Its
15	subclasses, BufferedWriter, BufferedReader, and BufferedRWPair buffer
16	streams that are readable, writable, and both respectively.
17	BufferedRandom provides a buffered interface to random access
18	streams. BytesIO is a simple stream of in-memory bytes.
19
20	Another IOBase subclass, TextIOBase, deals with the encoding and decoding
21	of streams into text. TextIOWrapper, which extends it, is a buffered text
22	interface to a buffered raw stream (`BufferedIOBase`). Finally, StringIO
23	is a in-memory stream for text.
24
25	Argument names are not part of the specification, and only the arguments
26	of open() are intended to be used as keyword arguments.
27
28	data:
29
30	DEFAULT_BUFFER_SIZE
31
32	An int containing the default buffer size used by the module's buffered
33	I/O classes. open() uses the file's blksize (as obtained by os.stat) if
34	possible.
35	"""
36	# New I/O library conforming to PEP 3116.
37
38	# This is a prototype; hopefully eventually some of this will be
39	# reimplemented in C.
40
41	# XXX edge cases when switching between reading/writing
42	# XXX need to support 1 meaning line-buffered
43	# XXX whenever an argument is None, use the default value
44	# XXX read/write ops should check readable/writable
45	# XXX buffered readinto should work with arbitrary buffer objects
46	# XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG
47	# XXX check writable, readable and seekable in appropriate places
48	from __future__ import print_function
49	from __future__ import unicode_literals
50
51	__author__ = ("Guido van Rossum <guido@python.org>, "
52	"Mike Verdone <mike.verdone@gmail.com>, "
53	"Mark Russell <mark.russell@zen.co.uk>")
54
55	__all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO",
56	"BytesIO", "StringIO", "BufferedIOBase",
57	"BufferedReader", "BufferedWriter", "BufferedRWPair",
58	"BufferedRandom", "TextIOBase", "TextIOWrapper"]
59
60	import os
61	import abc
62	import codecs
63	import _fileio
64	import threading
65
66	# open() uses st_blksize whenever we can
67	DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes
68
69	# py3k has only new style classes
70	__metaclass__ = type
71
72	class BlockingIOError(IOError):
73
74	"""Exception raised when I/O would block on a non-blocking I/O stream."""
75
76	def __init__(self, errno, strerror, characters_written=0):
77	IOError.__init__(self, errno, strerror)
78	self.characters_written = characters_written
79
80
81	def open(file, mode="r", buffering=None, encoding=None, errors=None,
82	newline=None, closefd=True):
83	r"""Open file and return a stream. If the file cannot be opened, an IOError is
84	raised.
85
86	file is either a string giving the name (and the path if the file
87	isn't in the current working directory) of the file to be opened or an
88	integer file descriptor of the file to be wrapped. (If a file
89	descriptor is given, it is closed when the returned I/O object is
90	closed, unless closefd is set to False.)
91
92	mode is an optional string that specifies the mode in which the file
93	is opened. It defaults to 'r' which means open for reading in text
94	mode. Other common values are 'w' for writing (truncating the file if
95	it already exists), and 'a' for appending (which on some Unix systems,
96	means that all writes append to the end of the file regardless of the
97	current seek position). In text mode, if encoding is not specified the
98	encoding used is platform dependent. (For reading and writing raw
99	bytes use binary mode and leave encoding unspecified.) The available
100	modes are:
101
102	========= ===============================================================
103	Character Meaning
104	--------- ---------------------------------------------------------------
105	'r' open for reading (default)
106	'w' open for writing, truncating the file first
107	'a' open for writing, appending to the end of the file if it exists
108	'b' binary mode
109	't' text mode (default)
110	'+' open a disk file for updating (reading and writing)
111	'U' universal newline mode (for backwards compatibility; unneeded
112	for new code)
113	========= ===============================================================
114
115	The default mode is 'rt' (open for reading text). For binary random
116	access, the mode 'w+b' opens and truncates the file to 0 bytes, while
117	'r+b' opens the file without truncation.
118
119	Python distinguishes between files opened in binary and text modes,
120	even when the underlying operating system doesn't. Files opened in
121	binary mode (appending 'b' to the mode argument) return contents as
122	bytes objects without any decoding. In text mode (the default, or when
123	't' is appended to the mode argument), the contents of the file are
124	returned as strings, the bytes having been first decoded using a
125	platform-dependent encoding or using the specified encoding if given.
126
127	buffering is an optional integer used to set the buffering policy.
128	Pass 0 to switch buffering off (only allowed in binary mode), 1 to select
129	line buffering (only usable in text mode), and an integer > 1 to indicate
130	the size of a fixed-size chunk buffer. When no buffering argument is
131	given, the default buffering policy works as follows:
132
133	* Binary files are buffered in fixed-size chunks; the size of the buffer
134	is chosen using a heuristic trying to determine the underlying device's
135	"block size" and falling back on `io.DEFAULT_BUFFER_SIZE`.
136	On many systems, the buffer will typically be 4096 or 8192 bytes long.
137
138	* "Interactive" text files (files for which isatty() returns True)
139	use line buffering. Other text files use the policy described above
140	for binary files.
141
142	encoding is the name of the encoding used to decode or encode the
143	file. This should only be used in text mode. The default encoding is
144	platform dependent, but any encoding supported by Python can be
145	passed. See the codecs module for the list of supported encodings.
146
147	errors is an optional string that specifies how encoding errors are to
148	be handled---this argument should not be used in binary mode. Pass
149	'strict' to raise a ValueError exception if there is an encoding error
150	(the default of None has the same effect), or pass 'ignore' to ignore
151	errors. (Note that ignoring encoding errors can lead to data loss.)
152	See the documentation for codecs.register for a list of the permitted
153	encoding error strings.
154
155	newline controls how universal newlines works (it only applies to text
156	mode). It can be None, '', '\n', '\r', and '\r\n'. It works as
157	follows:
158
159	* On input, if newline is None, universal newlines mode is
160	enabled. Lines in the input can end in '\n', '\r', or '\r\n', and
161	these are translated into '\n' before being returned to the
162	caller. If it is '', universal newline mode is enabled, but line
163	endings are returned to the caller untranslated. If it has any of
164	the other legal values, input lines are only terminated by the given
165	string, and the line ending is returned to the caller untranslated.
166
167	* On output, if newline is None, any '\n' characters written are
168	translated to the system default line separator, os.linesep. If
169	newline is '', no translation takes place. If newline is any of the
170	other legal values, any '\n' characters written are translated to
171	the given string.
172
173	If closefd is False, the underlying file descriptor will be kept open
174	when the file is closed. This does not work when a file name is given
175	and must be True in that case.
176
177	open() returns a file object whose type depends on the mode, and
178	through which the standard file operations such as reading and writing
179	are performed. When open() is used to open a file in a text mode ('w',
180	'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open
181	a file in a binary mode, the returned class varies: in read binary
182	mode, it returns a BufferedReader; in write binary and append binary
183	modes, it returns a BufferedWriter, and in read/write mode, it returns
184	a BufferedRandom.
185
186	It is also possible to use a string or bytearray as a file for both
187	reading and writing. For strings StringIO can be used like a file
188	opened in a text mode, and for bytes a BytesIO can be used like a file
189	opened in a binary mode.
190	"""
191	if not isinstance(file, (basestring, int)):
192	raise TypeError("invalid file: %r" % file)
193	if not isinstance(mode, basestring):
194	raise TypeError("invalid mode: %r" % mode)
195	if buffering is not None and not isinstance(buffering, int):
196	raise TypeError("invalid buffering: %r" % buffering)
197	if encoding is not None and not isinstance(encoding, basestring):
198	raise TypeError("invalid encoding: %r" % encoding)
199	if errors is not None and not isinstance(errors, basestring):
200	raise TypeError("invalid errors: %r" % errors)
201	modes = set(mode)
202	if modes - set("arwb+tU") or len(mode) > len(modes):
203	raise ValueError("invalid mode: %r" % mode)
204	reading = "r" in modes
205	writing = "w" in modes
206	appending = "a" in modes
207	updating = "+" in modes
208	text = "t" in modes
209	binary = "b" in modes
210	if "U" in modes:
211	if writing or appending:
212	raise ValueError("can't use U and writing mode at once")
213	reading = True
214	if text and binary:
215	raise ValueError("can't have text and binary mode at once")
216	if reading + writing + appending > 1:
217	raise ValueError("can't have read/write/append mode at once")
218	if not (reading or writing or appending):
219	raise ValueError("must have exactly one of read/write/append mode")
220	if binary and encoding is not None:
221	raise ValueError("binary mode doesn't take an encoding argument")
222	if binary and errors is not None:
223	raise ValueError("binary mode doesn't take an errors argument")
224	if binary and newline is not None:
225	raise ValueError("binary mode doesn't take a newline argument")
226	raw = FileIO(file,
227	(reading and "r" or "") +
228	(writing and "w" or "") +
229	(appending and "a" or "") +
230	(updating and "+" or ""),
231	closefd)
232	if buffering is None:
233	buffering = -1
234	line_buffering = False
235	if buffering == 1 or buffering < 0 and raw.isatty():
236	buffering = -1
237	line_buffering = True
238	if buffering < 0:
239	buffering = DEFAULT_BUFFER_SIZE
240	try:
241	bs = os.fstat(raw.fileno()).st_blksize
242	except (os.error, AttributeError):
243	pass
244	else:
245	if bs > 1:
246	buffering = bs
247	if buffering < 0:
248	raise ValueError("invalid buffering size")
249	if buffering == 0:
250	if binary:
251	return raw
252	raise ValueError("can't have unbuffered text I/O")
253	if updating:
254	buffer = BufferedRandom(raw, buffering)
255	elif writing or appending:
256	buffer = BufferedWriter(raw, buffering)
257	elif reading:
258	buffer = BufferedReader(raw, buffering)
259	else:
260	raise ValueError("unknown mode: %r" % mode)
261	if binary:
262	return buffer
263	text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering)
264	text.mode = mode
265	return text
266
267	class _DocDescriptor:
268	"""Helper for builtins.open.__doc__
269	"""
270	def __get__(self, obj, typ):
271	return (
272	"open(file, mode='r', buffering=None, encoding=None, "
273	"errors=None, newline=None, closefd=True)\n\n" +
274	open.__doc__)
275
276	class OpenWrapper:
277	"""Wrapper for builtins.open
278
279	Trick so that open won't become a bound method when stored
280	as a class variable (as dumbdbm does).
281
282	See initstdio() in Python/pythonrun.c.
283	"""
284	__doc__ = _DocDescriptor()
285
286	def __new__(cls, args, *kwargs):
287	return open(args, *kwargs)
288
289
290	class UnsupportedOperation(ValueError, IOError):
291	pass
292
293
294	class IOBase(object):
295
296	"""The abstract base class for all I/O classes, acting on streams of
297	bytes. There is no public constructor.
298
299	This class provides dummy implementations for many methods that
300	derived classes can override selectively; the default implementations
301	represent a file that cannot be read, written or seeked.
302
303	Even though IOBase does not declare read, readinto, or write because
304	their signatures will vary, implementations and clients should
305	consider those methods part of the interface. Also, implementations
306	may raise a IOError when operations they do not support are called.
307
308	The basic type used for binary data read from or written to a file is
309	bytes. bytearrays are accepted too, and in some cases (such as
310	readinto) needed. Text I/O classes work with str data.
311
312	Note that calling any method (even inquiries) on a closed stream is
313	undefined. Implementations may raise IOError in this case.
314
315	IOBase (and its subclasses) support the iterator protocol, meaning
316	that an IOBase object can be iterated over yielding the lines in a
317	stream.
318
319	IOBase also supports the :keyword:`with` statement. In this example,
320	fp is closed after the suite of the with statment is complete:
321
322	with open('spam.txt', 'r') as fp:
323	fp.write('Spam and eggs!')
324	"""
325
326	__metaclass__ = abc.ABCMeta
327
328	### Internal ###
329
330	def _unsupported(self, name):
331	"""Internal: raise an exception for unsupported operations."""
332	raise UnsupportedOperation("%s.%s() not supported" %
333	(self.__class__.__name__, name))
334
335	### Positioning ###
336
337	def seek(self, pos, whence = 0):
338	"""Change stream position.
339
340	Change the stream position to byte offset offset. offset is
341	interpreted relative to the position indicated by whence. Values
342	for whence are:
343
344	* 0 -- start of stream (the default); offset should be zero or positive
345	* 1 -- current stream position; offset may be negative
346	* 2 -- end of stream; offset is usually negative
347
348	Return the new absolute position.
349	"""
350	self._unsupported("seek")
351
352	def tell(self):
353	"""Return current stream position."""
354	return self.seek(0, 1)
355
356	def truncate(self, pos = None):
357	"""Truncate file to size bytes.
358
359	Size defaults to the current IO position as reported by tell(). Return
360	the new size.
361	"""
362	self._unsupported("truncate")
363
364	### Flush and close ###
365
366	def flush(self):
367	"""Flush write buffers, if applicable.
368
369	This is not implemented for read-only and non-blocking streams.
370	"""
371	# XXX Should this return the number of bytes written???
372
373	__closed = False
374
375	def close(self):
376	"""Flush and close the IO object.
377
378	This method has no effect if the file is already closed.
379	"""
380	if not self.__closed:
381	try:
382	self.flush()
383	except IOError:
384	pass # If flush() fails, just give up
385	self.__closed = True
386
387	def __del__(self):
388	"""Destructor. Calls close()."""
389	# The try/except block is in case this is called at program
390	# exit time, when it's possible that globals have already been
391	# deleted, and then the close() call might fail. Since
392	# there's nothing we can do about such failures and they annoy
393	# the end users, we suppress the traceback.
394	try:
395	self.close()
396	except:
397	pass
398
399	### Inquiries ###
400
401	def seekable(self):
402	"""Return whether object supports random access.
403
404	If False, seek(), tell() and truncate() will raise IOError.
405	This method may need to do a test seek().
406	"""
407	return False
408
409	def _checkSeekable(self, msg=None):
410	"""Internal: raise an IOError if file is not seekable
411	"""
412	if not self.seekable():
413	raise IOError("File or stream is not seekable."
414	if msg is None else msg)
415
416
417	def readable(self):
418	"""Return whether object was opened for reading.
419
420	If False, read() will raise IOError.
421	"""
422	return False
423
424	def _checkReadable(self, msg=None):
425	"""Internal: raise an IOError if file is not readable
426	"""
427	if not self.readable():
428	raise IOError("File or stream is not readable."
429	if msg is None else msg)
430
431	def writable(self):
432	"""Return whether object was opened for writing.
433
434	If False, write() and truncate() will raise IOError.
435	"""
436	return False
437
438	def _checkWritable(self, msg=None):
439	"""Internal: raise an IOError if file is not writable
440	"""
441	if not self.writable():
442	raise IOError("File or stream is not writable."
443	if msg is None else msg)
444
445	@property
446	def closed(self):
447	"""closed: bool. True iff the file has been closed.
448
449	For backwards compatibility, this is a property, not a predicate.
450	"""
451	return self.__closed
452
453	def _checkClosed(self, msg=None):
454	"""Internal: raise an ValueError if file is closed
455	"""
456	if self.closed:
457	raise ValueError("I/O operation on closed file."
458	if msg is None else msg)
459
460	### Context manager ###
461
462	def __enter__(self):
463	"""Context management protocol. Returns self."""
464	self._checkClosed()
465	return self
466
467	def __exit__(self, *args):
468	"""Context management protocol. Calls close()"""
469	self.close()
470
471	### Lower-level APIs ###
472
473	# XXX Should these be present even if unimplemented?
474
475	def fileno(self):
476	"""Returns underlying file descriptor if one exists.
477
478	An IOError is raised if the IO object does not use a file descriptor.
479	"""
480	self._unsupported("fileno")
481
482	def isatty(self):
483	"""Return whether this is an 'interactive' stream.
484
485	Return False if it can't be determined.
486	"""
487	self._checkClosed()
488	return False
489
490	### Readline[s] and writelines ###
491
492	def readline(self, limit = -1):
493	r"""Read and return a line from the stream.
494
495	If limit is specified, at most limit bytes will be read.
496
497	The line terminator is always b'\n' for binary files; for text
498	files, the newlines argument to open can be used to select the line
499	terminator(s) recognized.
500	"""
501	self._checkClosed()
502	if hasattr(self, "peek"):
503	def nreadahead():
504	readahead = self.peek(1)
505	if not readahead:
506	return 1
507	n = (readahead.find(b"\n") + 1) or len(readahead)
508	if limit >= 0:
509	n = min(n, limit)
510	return n
511	else:
512	def nreadahead():
513	return 1
514	if limit is None:
515	limit = -1
516	if not isinstance(limit, (int, long)):
517	raise TypeError("limit must be an integer")
518	res = bytearray()
519	while limit < 0 or len(res) < limit:
520	b = self.read(nreadahead())
521	if not b:
522	break
523	res += b
524	if res.endswith(b"\n"):
525	break
526	return bytes(res)
527
528	def __iter__(self):
529	self._checkClosed()
530	return self
531
532	def next(self):
533	line = self.readline()
534	if not line:
535	raise StopIteration
536	return line
537
538	def readlines(self, hint=None):
539	"""Return a list of lines from the stream.
540
541	hint can be specified to control the number of lines read: no more
542	lines will be read if the total size (in bytes/characters) of all
543	lines so far exceeds hint.
544	"""
545	if hint is None:
546	hint = -1
547	if not isinstance(hint, (int, long)):
548	raise TypeError("hint must be an integer")
549	if hint <= 0:
550	return list(self)
551	n = 0
552	lines = []
553	for line in self:
554	lines.append(line)
555	n += len(line)
556	if n >= hint:
557	break
558	return lines
559
560	def writelines(self, lines):
561	self._checkClosed()
562	for line in lines:
563	self.write(line)
564
565
566	class RawIOBase(IOBase):
567
568	"""Base class for raw binary I/O."""
569
570	# The read() method is implemented by calling readinto(); derived
571	# classes that want to support read() only need to implement
572	# readinto() as a primitive operation. In general, readinto() can be
573	# more efficient than read().
574
575	# (It would be tempting to also provide an implementation of
576	# readinto() in terms of read(), in case the latter is a more suitable
577	# primitive operation, but that would lead to nasty recursion in case
578	# a subclass doesn't implement either.)
579
580	def read(self, n = -1):
581	"""Read and return up to n bytes.
582
583	Returns an empty bytes array on EOF, or None if the object is
584	set not to block and has no data to read.
585	"""
586	if n is None:
587	n = -1
588	if n < 0:
589	return self.readall()
590	b = bytearray(n.__index__())
591	n = self.readinto(b)
592	del b[n:]
593	return bytes(b)
594
595	def readall(self):
596	"""Read until EOF, using multiple read() call."""
597	res = bytearray()
598	while True:
599	data = self.read(DEFAULT_BUFFER_SIZE)
600	if not data:
601	break
602	res += data
603	return bytes(res)
604
605	def readinto(self, b):
606	"""Read up to len(b) bytes into b.
607
608	Returns number of bytes read (0 for EOF), or None if the object
609	is set not to block as has no data to read.
610	"""
611	self._unsupported("readinto")
612
613	def write(self, b):
614	"""Write the given buffer to the IO stream.
615
616	Returns the number of bytes written, which may be less than len(b).
617	"""
618	self._unsupported("write")
619
620
621	class FileIO(_fileio._FileIO, RawIOBase):
622
623	"""Raw I/O implementation for OS files."""
624
625	# This multiply inherits from _FileIO and RawIOBase to make
626	# isinstance(io.FileIO(), io.RawIOBase) return True without requiring
627	# that _fileio._FileIO inherits from io.RawIOBase (which would be hard
628	# to do since _fileio.c is written in C).
629
630	def __init__(self, name, mode="r", closefd=True):
631	_fileio._FileIO.__init__(self, name, mode, closefd)
632	self._name = name
633
634	def close(self):
635	_fileio._FileIO.close(self)
636	RawIOBase.close(self)
637
638	@property
639	def name(self):
640	return self._name
641
642
643	class BufferedIOBase(IOBase):
644
645	"""Base class for buffered IO objects.
646
647	The main difference with RawIOBase is that the read() method
648	supports omitting the size argument, and does not have a default
649	implementation that defers to readinto().
650
651	In addition, read(), readinto() and write() may raise
652	BlockingIOError if the underlying raw stream is in non-blocking
653	mode and not ready; unlike their raw counterparts, they will never
654	return None.
655
656	A typical implementation should not inherit from a RawIOBase
657	implementation, but wrap one.
658	"""
659
660	def read(self, n = None):
661	"""Read and return up to n bytes.
662
663	If the argument is omitted, None, or negative, reads and
664	returns all data until EOF.
665
666	If the argument is positive, and the underlying raw stream is
667	not 'interactive', multiple raw reads may be issued to satisfy
668	the byte count (unless EOF is reached first). But for
669	interactive raw streams (XXX and for pipes?), at most one raw
670	read will be issued, and a short result does not imply that
671	EOF is imminent.
672
673	Returns an empty bytes array on EOF.
674
675	Raises BlockingIOError if the underlying raw stream has no
676	data at the moment.
677	"""
678	self._unsupported("read")
679
680	def readinto(self, b):
681	"""Read up to len(b) bytes into b.
682
683	Like read(), this may issue multiple reads to the underlying raw
684	stream, unless the latter is 'interactive'.
685
686	Returns the number of bytes read (0 for EOF).
687
688	Raises BlockingIOError if the underlying raw stream has no
689	data at the moment.
690	"""
691	# XXX This ought to work with anything that supports the buffer API
692	data = self.read(len(b))
693	n = len(data)
694	try:
695	b[:n] = data
696	except TypeError as err:
697	import array
698	if not isinstance(b, array.array):
699	raise err
700	b[:n] = array.array(b'b', data)
701	return n
702
703	def write(self, b):
704	"""Write the given buffer to the IO stream.
705
706	Return the number of bytes written, which is never less than
707	len(b).
708
709	Raises BlockingIOError if the buffer is full and the
710	underlying raw stream cannot accept more data at the moment.
711	"""
712	self._unsupported("write")
713
714
715	class _BufferedIOMixin(BufferedIOBase):
716
717	"""A mixin implementation of BufferedIOBase with an underlying raw stream.
718
719	This passes most requests on to the underlying raw stream. It
720	does not provide implementations of read(), readinto() or
721	write().
722	"""
723
724	def __init__(self, raw):
725	self.raw = raw
726
727	### Positioning ###
728
729	def seek(self, pos, whence=0):
730	return self.raw.seek(pos, whence)
731
732	def tell(self):
733	return self.raw.tell()
734
735	def truncate(self, pos=None):
736	# Flush the stream. We're mixing buffered I/O with lower-level I/O,
737	# and a flush may be necessary to synch both views of the current
738	# file state.
739	self.flush()
740
741	if pos is None:
742	pos = self.tell()
743	# XXX: Should seek() be used, instead of passing the position
744	# XXX directly to truncate?
745	return self.raw.truncate(pos)
746
747	### Flush and close ###
748
749	def flush(self):
750	self.raw.flush()
751
752	def close(self):
753	if not self.closed:
754	try:
755	self.flush()
756	except IOError:
757	pass # If flush() fails, just give up
758	self.raw.close()
759
760	### Inquiries ###
761
762	def seekable(self):
763	return self.raw.seekable()
764
765	def readable(self):
766	return self.raw.readable()
767
768	def writable(self):
769	return self.raw.writable()
770
771	@property
772	def closed(self):
773	return self.raw.closed
774
775	@property
776	def name(self):
777	return self.raw.name
778
779	@property
780	def mode(self):
781	return self.raw.mode
782
783	### Lower-level APIs ###
784
785	def fileno(self):
786	return self.raw.fileno()
787
788	def isatty(self):
789	return self.raw.isatty()
790
791
792	class _BytesIO(BufferedIOBase):
793
794	"""Buffered I/O implementation using an in-memory bytes buffer."""
795
796	# XXX More docs
797
798	def __init__(self, initial_bytes=None):
799	buf = bytearray()
800	if initial_bytes is not None:
801	buf += bytearray(initial_bytes)
802	self._buffer = buf
803	self._pos = 0
804
805	def getvalue(self):
806	"""Return the bytes value (contents) of the buffer
807	"""
808	if self.closed:
809	raise ValueError("getvalue on closed file")
810	return bytes(self._buffer)
811
812	def read(self, n=None):
813	if self.closed:
814	raise ValueError("read from closed file")
815	if n is None:
816	n = -1
817	if not isinstance(n, (int, long)):
818	raise TypeError("argument must be an integer")
819	if n < 0:
820	n = len(self._buffer)
821	if len(self._buffer) <= self._pos:
822	return b""
823	newpos = min(len(self._buffer), self._pos + n)
824	b = self._buffer[self._pos : newpos]
825	self._pos = newpos
826	return bytes(b)
827
828	def read1(self, n):
829	"""this is the same as read.
830	"""
831	return self.read(n)
832
833	def write(self, b):
834	if self.closed:
835	raise ValueError("write to closed file")
836	if isinstance(b, unicode):
837	raise TypeError("can't write unicode to binary stream")
838	n = len(b)
839	if n == 0:
840	return 0
841	pos = self._pos
842	if pos > len(self._buffer):
843	# Inserts null bytes between the current end of the file
844	# and the new write position.
845	padding = b'\x00' * (pos - len(self._buffer))
846	self._buffer += padding
847	self._buffer[pos:pos + n] = b
848	self._pos += n
849	return n
850
851	def seek(self, pos, whence=0):
852	if self.closed:
853	raise ValueError("seek on closed file")
854	try:
855	pos = pos.__index__()
856	except AttributeError as err:
857	raise TypeError("an integer is required") # from err
858	if whence == 0:
859	if pos < 0:
860	raise ValueError("negative seek position %r" % (pos,))
861	self._pos = pos
862	elif whence == 1:
863	self._pos = max(0, self._pos + pos)
864	elif whence == 2:
865	self._pos = max(0, len(self._buffer) + pos)
866	else:
867	raise ValueError("invalid whence value")
868	return self._pos
869
870	def tell(self):
871	if self.closed:
872	raise ValueError("tell on closed file")
873	return self._pos
874
875	def truncate(self, pos=None):
876	if self.closed:
877	raise ValueError("truncate on closed file")
878	if pos is None:
879	pos = self._pos
880	elif pos < 0:
881	raise ValueError("negative truncate position %r" % (pos,))
882	del self._buffer[pos:]
883	return pos
884
885	def readable(self):
886	return True
887
888	def writable(self):
889	return True
890
891	def seekable(self):
892	return True
893
894	# Use the faster implementation of BytesIO if available
895	try:
896	import _bytesio
897
898	class BytesIO(_bytesio._BytesIO, BufferedIOBase):
899	__doc__ = _bytesio._BytesIO.__doc__
900
901	except ImportError:
902	BytesIO = _BytesIO
903
904
905	class BufferedReader(_BufferedIOMixin):
906
907	"""BufferedReader(raw[, buffer_size])
908
909	A buffer for a readable, sequential BaseRawIO object.
910
911	The constructor creates a BufferedReader for the given readable raw
912	stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE
913	is used.
914	"""
915
916	def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE):
917	"""Create a new buffered reader using the given readable raw IO object.
918	"""
919	raw._checkReadable()
920	_BufferedIOMixin.__init__(self, raw)
921	self.buffer_size = buffer_size
922	self._reset_read_buf()
923	self._read_lock = threading.Lock()
924
925	def _reset_read_buf(self):
926	self._read_buf = b""
927	self._read_pos = 0
928
929	def read(self, n=None):
930	"""Read n bytes.
931
932	Returns exactly n bytes of data unless the underlying raw IO
933	stream reaches EOF or if the call would block in non-blocking
934	mode. If n is negative, read until EOF or until read() would
935	block.
936	"""
937	with self._read_lock:
938	return self._read_unlocked(n)
939
940	def _read_unlocked(self, n=None):
941	nodata_val = b""
942	empty_values = (b"", None)
943	buf = self._read_buf
944	pos = self._read_pos
945
946	# Special case for when the number of bytes to read is unspecified.
947	if n is None or n == -1:
948	self._reset_read_buf()
949	chunks = [buf[pos:]] # Strip the consumed bytes.
950	current_size = 0
951	while True:
952	# Read until EOF or until read() would block.
953	chunk = self.raw.read()
954	if chunk in empty_values:
955	nodata_val = chunk
956	break
957	current_size += len(chunk)
958	chunks.append(chunk)
959	return b"".join(chunks) or nodata_val
960
961	# The number of bytes to read is specified, return at most n bytes.
962	avail = len(buf) - pos # Length of the available buffered data.
963	if n <= avail:
964	# Fast path: the data to read is fully buffered.
965	self._read_pos += n
966	return buf[pos:pos+n]
967	# Slow path: read from the stream until enough bytes are read,
968	# or until an EOF occurs or until read() would block.
969	chunks = [buf[pos:]]
970	wanted = max(self.buffer_size, n)
971	while avail < n:
972	chunk = self.raw.read(wanted)
973	if chunk in empty_values:
974	nodata_val = chunk
975	break
976	avail += len(chunk)
977	chunks.append(chunk)
978	# n is more then avail only when an EOF occurred or when
979	# read() would have blocked.
980	n = min(n, avail)
981	out = b"".join(chunks)
982	self._read_buf = out[n:] # Save the extra data in the buffer.
983	self._read_pos = 0
984	return out[:n] if out else nodata_val
985
986	def peek(self, n=0):
987	"""Returns buffered bytes without advancing the position.
988
989	The argument indicates a desired minimal number of bytes; we
990	do at most one raw read to satisfy it. We never return more
991	than self.buffer_size.
992	"""
993	with self._read_lock:
994	return self._peek_unlocked(n)
995
996	def _peek_unlocked(self, n=0):
997	want = min(n, self.buffer_size)
998	have = len(self._read_buf) - self._read_pos
999	if have < want:
1000	to_read = self.buffer_size - have
1001	current = self.raw.read(to_read)
1002	if current:
1003	self._read_buf = self._read_buf[self._read_pos:] + current
1004	self._read_pos = 0
1005	return self._read_buf[self._read_pos:]
1006
1007	def read1(self, n):
1008	"""Reads up to n bytes, with at most one read() system call."""
1009	# Returns up to n bytes. If at least one byte is buffered, we
1010	# only return buffered bytes. Otherwise, we do one raw read.
1011	if n <= 0:
1012	return b""
1013	with self._read_lock:
1014	self._peek_unlocked(1)
1015	return self._read_unlocked(
1016	min(n, len(self._read_buf) - self._read_pos))
1017
1018	def tell(self):
1019	return self.raw.tell() - len(self._read_buf) + self._read_pos
1020
1021	def seek(self, pos, whence=0):
1022	with self._read_lock:
1023	if whence == 1:
1024	pos -= len(self._read_buf) - self._read_pos
1025	pos = self.raw.seek(pos, whence)
1026	self._reset_read_buf()
1027	return pos
1028
1029
1030	class BufferedWriter(_BufferedIOMixin):
1031
1032	"""A buffer for a writeable sequential RawIO object.
1033
1034	The constructor creates a BufferedWriter for the given writeable raw
1035	stream. If the buffer_size is not given, it defaults to
1036	DEAFULT_BUFFER_SIZE. If max_buffer_size is omitted, it defaults to
1037	twice the buffer size.
1038	"""
1039
1040	def __init__(self, raw,
1041	buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1042	raw._checkWritable()
1043	_BufferedIOMixin.__init__(self, raw)
1044	self.buffer_size = buffer_size
1045	self.max_buffer_size = (2*buffer_size
1046	if max_buffer_size is None
1047	else max_buffer_size)
1048	self._write_buf = bytearray()
1049	self._write_lock = threading.Lock()
1050
1051	def write(self, b):
1052	if self.closed:
1053	raise ValueError("write to closed file")
1054	if isinstance(b, unicode):
1055	raise TypeError("can't write unicode to binary stream")
1056	with self._write_lock:
1057	# XXX we can implement some more tricks to try and avoid
1058	# partial writes
1059	if len(self._write_buf) > self.buffer_size:
1060	# We're full, so let's pre-flush the buffer
1061	try:
1062	self._flush_unlocked()
1063	except BlockingIOError as e:
1064	# We can't accept anything else.
1065	# XXX Why not just let the exception pass through?
1066	raise BlockingIOError(e.errno, e.strerror, 0)
1067	before = len(self._write_buf)
1068	self._write_buf.extend(b)
1069	written = len(self._write_buf) - before
1070	if len(self._write_buf) > self.buffer_size:
1071	try:
1072	self._flush_unlocked()
1073	except BlockingIOError as e:
1074	if len(self._write_buf) > self.max_buffer_size:
1075	# We've hit max_buffer_size. We have to accept a
1076	# partial write and cut back our buffer.
1077	overage = len(self._write_buf) - self.max_buffer_size
1078	self._write_buf = self._write_buf[:self.max_buffer_size]
1079	raise BlockingIOError(e.errno, e.strerror, overage)
1080	return written
1081
1082	def truncate(self, pos=None):
1083	with self._write_lock:
1084	self._flush_unlocked()
1085	if pos is None:
1086	pos = self.raw.tell()
1087	return self.raw.truncate(pos)
1088
1089	def flush(self):
1090	with self._write_lock:
1091	self._flush_unlocked()
1092
1093	def _flush_unlocked(self):
1094	if self.closed:
1095	raise ValueError("flush of closed file")
1096	written = 0
1097	try:
1098	while self._write_buf:
1099	n = self.raw.write(self._write_buf)
1100	del self._write_buf[:n]
1101	written += n
1102	except BlockingIOError as e:
1103	n = e.characters_written
1104	del self._write_buf[:n]
1105	written += n
1106	raise BlockingIOError(e.errno, e.strerror, written)
1107
1108	def tell(self):
1109	return self.raw.tell() + len(self._write_buf)
1110
1111	def seek(self, pos, whence=0):
1112	with self._write_lock:
1113	self._flush_unlocked()
1114	return self.raw.seek(pos, whence)
1115
1116
1117	class BufferedRWPair(BufferedIOBase):
1118
1119	"""A buffered reader and writer object together.
1120
1121	A buffered reader object and buffered writer object put together to
1122	form a sequential IO object that can read and write. This is typically
1123	used with a socket or two-way pipe.
1124
1125	reader and writer are RawIOBase objects that are readable and
1126	writeable respectively. If the buffer_size is omitted it defaults to
1127	DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered writer)
1128	defaults to twice the buffer size.
1129	"""
1130
1131	# XXX The usefulness of this (compared to having two separate IO
1132	# objects) is questionable.
1133
1134	def __init__(self, reader, writer,
1135	buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1136	"""Constructor.
1137
1138	The arguments are two RawIO instances.
1139	"""
1140	reader._checkReadable()
1141	writer._checkWritable()
1142	self.reader = BufferedReader(reader, buffer_size)
1143	self.writer = BufferedWriter(writer, buffer_size, max_buffer_size)
1144
1145	def read(self, n=None):
1146	if n is None:
1147	n = -1
1148	return self.reader.read(n)
1149
1150	def readinto(self, b):
1151	return self.reader.readinto(b)
1152
1153	def write(self, b):
1154	return self.writer.write(b)
1155
1156	def peek(self, n=0):
1157	return self.reader.peek(n)
1158
1159	def read1(self, n):
1160	return self.reader.read1(n)
1161
1162	def readable(self):
1163	return self.reader.readable()
1164
1165	def writable(self):
1166	return self.writer.writable()
1167
1168	def flush(self):
1169	return self.writer.flush()
1170
1171	def close(self):
1172	self.writer.close()
1173	self.reader.close()
1174
1175	def isatty(self):
1176	return self.reader.isatty() or self.writer.isatty()
1177
1178	@property
1179	def closed(self):
1180	return self.writer.closed
1181
1182
1183	class BufferedRandom(BufferedWriter, BufferedReader):
1184
1185	"""A buffered interface to random access streams.
1186
1187	The constructor creates a reader and writer for a seekable stream,
1188	raw, given in the first argument. If the buffer_size is omitted it
1189	defaults to DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered
1190	writer) defaults to twice the buffer size.
1191	"""
1192
1193	def __init__(self, raw,
1194	buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None):
1195	raw._checkSeekable()
1196	BufferedReader.__init__(self, raw, buffer_size)
1197	BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size)
1198
1199	def seek(self, pos, whence=0):
1200	self.flush()
1201	# First do the raw seek, then empty the read buffer, so that
1202	# if the raw seek fails, we don't lose buffered data forever.
1203	pos = self.raw.seek(pos, whence)
1204	with self._read_lock:
1205	self._reset_read_buf()
1206	return pos
1207
1208	def tell(self):
1209	if self._write_buf:
1210	return self.raw.tell() + len(self._write_buf)
1211	else:
1212	return BufferedReader.tell(self)
1213
1214	def truncate(self, pos=None):
1215	if pos is None:
1216	pos = self.tell()
1217	# Use seek to flush the read buffer.
1218	return BufferedWriter.truncate(self, pos)
1219
1220	def read(self, n=None):
1221	if n is None:
1222	n = -1
1223	self.flush()
1224	return BufferedReader.read(self, n)
1225
1226	def readinto(self, b):
1227	self.flush()
1228	return BufferedReader.readinto(self, b)
1229
1230	def peek(self, n=0):
1231	self.flush()
1232	return BufferedReader.peek(self, n)
1233
1234	def read1(self, n):
1235	self.flush()
1236	return BufferedReader.read1(self, n)
1237
1238	def write(self, b):
1239	if self._read_buf:
1240	# Undo readahead
1241	with self._read_lock:
1242	self.raw.seek(self._read_pos - len(self._read_buf), 1)
1243	self._reset_read_buf()
1244	return BufferedWriter.write(self, b)
1245
1246
1247	class TextIOBase(IOBase):
1248
1249	"""Base class for text I/O.
1250
1251	This class provides a character and line based interface to stream
1252	I/O. There is no readinto method because Python's character strings
1253	are immutable. There is no public constructor.
1254	"""
1255
1256	def read(self, n = -1):
1257	"""Read at most n characters from stream.
1258
1259	Read from underlying buffer until we have n characters or we hit EOF.
1260	If n is negative or omitted, read until EOF.
1261	"""
1262	self._unsupported("read")
1263
1264	def write(self, s):
1265	"""Write string s to stream."""
1266	self._unsupported("write")
1267
1268	def truncate(self, pos = None):
1269	"""Truncate size to pos."""
1270	self._unsupported("truncate")
1271
1272	def readline(self):
1273	"""Read until newline or EOF.
1274
1275	Returns an empty string if EOF is hit immediately.
1276	"""
1277	self._unsupported("readline")
1278
1279	@property
1280	def encoding(self):
1281	"""Subclasses should override."""
1282	return None
1283
1284	@property
1285	def newlines(self):
1286	"""Line endings translated so far.
1287
1288	Only line endings translated during reading are considered.
1289
1290	Subclasses should override.
1291	"""
1292	return None
1293
1294
1295	class IncrementalNewlineDecoder(codecs.IncrementalDecoder):
1296	"""Codec used when reading a file in universal newlines mode.
1297	It wraps another incremental decoder, translating \\r\\n and \\r into \\n.
1298	It also records the types of newlines encountered.
1299	When used with translate=False, it ensures that the newline sequence is
1300	returned in one piece.
1301	"""
1302	def __init__(self, decoder, translate, errors='strict'):
1303	codecs.IncrementalDecoder.__init__(self, errors=errors)
1304	self.translate = translate
1305	self.decoder = decoder
1306	self.seennl = 0
1307	self.pendingcr = False
1308
1309	def decode(self, input, final=False):
1310	# decode input (with the eventual \r from a previous pass)
1311	output = self.decoder.decode(input, final=final)
1312	if self.pendingcr and (output or final):
1313	output = "\r" + output
1314	self.pendingcr = False
1315
1316	# retain last \r even when not translating data:
1317	# then readline() is sure to get \r\n in one pass
1318	if output.endswith("\r") and not final:
1319	output = output[:-1]
1320	self.pendingcr = True
1321
1322	# Record which newlines are read
1323	crlf = output.count('\r\n')
1324	cr = output.count('\r') - crlf
1325	lf = output.count('\n') - crlf
1326	self.seennl \|= (lf and self._LF) \| (cr and self._CR) \
1327	\| (crlf and self._CRLF)
1328
1329	if self.translate:
1330	if crlf:
1331	output = output.replace("\r\n", "\n")
1332	if cr:
1333	output = output.replace("\r", "\n")
1334
1335	return output
1336
1337	def getstate(self):
1338	buf, flag = self.decoder.getstate()
1339	flag <<= 1
1340	if self.pendingcr:
1341	flag \|= 1
1342	return buf, flag
1343
1344	def setstate(self, state):
1345	buf, flag = state
1346	self.pendingcr = bool(flag & 1)
1347	self.decoder.setstate((buf, flag >> 1))
1348
1349	def reset(self):
1350	self.seennl = 0
1351	self.pendingcr = False
1352	self.decoder.reset()
1353
1354	_LF = 1
1355	_CR = 2
1356	_CRLF = 4
1357
1358	@property
1359	def newlines(self):
1360	return (None,
1361	"\n",
1362	"\r",
1363	("\r", "\n"),
1364	"\r\n",
1365	("\n", "\r\n"),
1366	("\r", "\r\n"),
1367	("\r", "\n", "\r\n")
1368	)[self.seennl]
1369
1370
1371	class TextIOWrapper(TextIOBase):
1372
1373	r"""Character and line based layer over a BufferedIOBase object, buffer.
1374
1375	encoding gives the name of the encoding that the stream will be
1376	decoded or encoded with. It defaults to locale.getpreferredencoding.
1377
1378	errors determines the strictness of encoding and decoding (see the
1379	codecs.register) and defaults to "strict".
1380
1381	newline can be None, '', '\n', '\r', or '\r\n'. It controls the
1382	handling of line endings. If it is None, universal newlines is
1383	enabled. With this enabled, on input, the lines endings '\n', '\r',
1384	or '\r\n' are translated to '\n' before being returned to the
1385	caller. Conversely, on output, '\n' is translated to the system
1386	default line separator, os.linesep. If newline is any other of its
1387	legal values, that newline becomes the newline when the file is read
1388	and it is returned untranslated. On output, '\n' is converted to the
1389	newline.
1390
1391	If line_buffering is True, a call to flush is implied when a call to
1392	write contains a newline character.
1393	"""
1394
1395	_CHUNK_SIZE = 128
1396
1397	def __init__(self, buffer, encoding=None, errors=None, newline=None,
1398	line_buffering=False):
1399	if newline not in (None, "", "\n", "\r", "\r\n"):
1400	raise ValueError("illegal newline value: %r" % (newline,))
1401	if encoding is None:
1402	try:
1403	encoding = os.device_encoding(buffer.fileno())
1404	except (AttributeError, UnsupportedOperation):
1405	pass
1406	if encoding is None:
1407	try:
1408	import locale
1409	except ImportError:
1410	# Importing locale may fail if Python is being built
1411	encoding = "ascii"
1412	else:
1413	encoding = locale.getpreferredencoding()
1414
1415	if not isinstance(encoding, basestring):
1416	raise ValueError("invalid encoding: %r" % encoding)
1417
1418	if errors is None:
1419	errors = "strict"
1420	else:
1421	if not isinstance(errors, basestring):
1422	raise ValueError("invalid errors: %r" % errors)
1423
1424	self.buffer = buffer
1425	self._line_buffering = line_buffering
1426	self._encoding = encoding
1427	self._errors = errors
1428	self._readuniversal = not newline
1429	self._readtranslate = newline is None
1430	self._readnl = newline
1431	self._writetranslate = newline != ''
1432	self._writenl = newline or os.linesep
1433	self._encoder = None
1434	self._decoder = None
1435	self._decoded_chars = '' # buffer for text returned from decoder
1436	self._decoded_chars_used = 0 # offset into _decoded_chars for read()
1437	self._snapshot = None # info for reconstructing decoder state
1438	self._seekable = self._telling = self.buffer.seekable()
1439
1440	# self._snapshot is either None, or a tuple (dec_flags, next_input)
1441	# where dec_flags is the second (integer) item of the decoder state
1442	# and next_input is the chunk of input bytes that comes next after the
1443	# snapshot point. We use this to reconstruct decoder states in tell().
1444
1445	# Naming convention:
1446	# - "bytes_..." for integer variables that count input bytes
1447	# - "chars_..." for integer variables that count decoded characters
1448
1449	@property
1450	def encoding(self):
1451	return self._encoding
1452
1453	@property
1454	def errors(self):
1455	return self._errors
1456
1457	@property
1458	def line_buffering(self):
1459	return self._line_buffering
1460
1461	def seekable(self):
1462	return self._seekable
1463
1464	def readable(self):
1465	return self.buffer.readable()
1466
1467	def writable(self):
1468	return self.buffer.writable()
1469
1470	def flush(self):
1471	self.buffer.flush()
1472	self._telling = self._seekable
1473
1474	def close(self):
1475	try:
1476	self.flush()
1477	except:
1478	pass # If flush() fails, just give up
1479	self.buffer.close()
1480
1481	@property
1482	def closed(self):
1483	return self.buffer.closed
1484
1485	@property
1486	def name(self):
1487	return self.buffer.name
1488
1489	def fileno(self):
1490	return self.buffer.fileno()
1491
1492	def isatty(self):
1493	return self.buffer.isatty()
1494
1495	def write(self, s):
1496	if self.closed:
1497	raise ValueError("write to closed file")
1498	if not isinstance(s, unicode):
1499	raise TypeError("can't write %s to text stream" %
1500	s.__class__.__name__)
1501	length = len(s)
1502	haslf = (self._writetranslate or self._line_buffering) and "\n" in s
1503	if haslf and self._writetranslate and self._writenl != "\n":
1504	s = s.replace("\n", self._writenl)
1505	encoder = self._encoder or self._get_encoder()
1506	# XXX What if we were just reading?
1507	b = encoder.encode(s)
1508	self.buffer.write(b)
1509	if self._line_buffering and (haslf or "\r" in s):
1510	self.flush()
1511	self._snapshot = None
1512	if self._decoder:
1513	self._decoder.reset()
1514	return length
1515
1516	def _get_encoder(self):
1517	make_encoder = codecs.getincrementalencoder(self._encoding)
1518	self._encoder = make_encoder(self._errors)
1519	return self._encoder
1520
1521	def _get_decoder(self):
1522	make_decoder = codecs.getincrementaldecoder(self._encoding)
1523	decoder = make_decoder(self._errors)
1524	if self._readuniversal:
1525	decoder = IncrementalNewlineDecoder(decoder, self._readtranslate)
1526	self._decoder = decoder
1527	return decoder
1528
1529	# The following three methods implement an ADT for _decoded_chars.
1530	# Text returned from the decoder is buffered here until the client
1531	# requests it by calling our read() or readline() method.
1532	def _set_decoded_chars(self, chars):
1533	"""Set the _decoded_chars buffer."""
1534	self._decoded_chars = chars
1535	self._decoded_chars_used = 0
1536
1537	def _get_decoded_chars(self, n=None):
1538	"""Advance into the _decoded_chars buffer."""
1539	offset = self._decoded_chars_used
1540	if n is None:
1541	chars = self._decoded_chars[offset:]
1542	else:
1543	chars = self._decoded_chars[offset:offset + n]
1544	self._decoded_chars_used += len(chars)
1545	return chars
1546
1547	def _rewind_decoded_chars(self, n):
1548	"""Rewind the _decoded_chars buffer."""
1549	if self._decoded_chars_used < n:
1550	raise AssertionError("rewind decoded_chars out of bounds")
1551	self._decoded_chars_used -= n
1552
1553	def _read_chunk(self):
1554	"""
1555	Read and decode the next chunk of data from the BufferedReader.
1556
1557	The return value is True unless EOF was reached. The decoded string
1558	is placed in self._decoded_chars (replacing its previous value).
1559	The entire input chunk is sent to the decoder, though some of it
1560	may remain buffered in the decoder, yet to be converted.
1561	"""
1562
1563	if self._decoder is None:
1564	raise ValueError("no decoder")
1565
1566	if self._telling:
1567	# To prepare for tell(), we need to snapshot a point in the
1568	# file where the decoder's input buffer is empty.
1569
1570	dec_buffer, dec_flags = self._decoder.getstate()
1571	# Given this, we know there was a valid snapshot point
1572	# len(dec_buffer) bytes ago with decoder state (b'', dec_flags).
1573
1574	# Read a chunk, decode it, and put the result in self._decoded_chars.
1575	input_chunk = self.buffer.read1(self._CHUNK_SIZE)
1576	eof = not input_chunk
1577	self._set_decoded_chars(self._decoder.decode(input_chunk, eof))
1578
1579	if self._telling:
1580	# At the snapshot point, len(dec_buffer) bytes before the read,
1581	# the next input to be decoded is dec_buffer + input_chunk.
1582	self._snapshot = (dec_flags, dec_buffer + input_chunk)
1583
1584	return not eof
1585
1586	def _pack_cookie(self, position, dec_flags=0,
1587	bytes_to_feed=0, need_eof=0, chars_to_skip=0):
1588	# The meaning of a tell() cookie is: seek to position, set the
1589	# decoder flags to dec_flags, read bytes_to_feed bytes, feed them
1590	# into the decoder with need_eof as the EOF flag, then skip
1591	# chars_to_skip characters of the decoded result. For most simple
1592	# decoders, tell() will often just give a byte offset in the file.
1593	return (position \| (dec_flags<<64) \| (bytes_to_feed<<128) \|
1594	(chars_to_skip<<192) \| bool(need_eof)<<256)
1595
1596	def _unpack_cookie(self, bigint):
1597	rest, position = divmod(bigint, 1<<64)
1598	rest, dec_flags = divmod(rest, 1<<64)
1599	rest, bytes_to_feed = divmod(rest, 1<<64)
1600	need_eof, chars_to_skip = divmod(rest, 1<<64)
1601	return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip
1602
1603	def tell(self):
1604	if not self._seekable:
1605	raise IOError("underlying stream is not seekable")
1606	if not self._telling:
1607	raise IOError("telling position disabled by next() call")
1608	self.flush()
1609	position = self.buffer.tell()
1610	decoder = self._decoder
1611	if decoder is None or self._snapshot is None:
1612	if self._decoded_chars:
1613	# This should never happen.
1614	raise AssertionError("pending decoded text")
1615	return position
1616
1617	# Skip backward to the snapshot point (see _read_chunk).
1618	dec_flags, next_input = self._snapshot
1619	position -= len(next_input)
1620
1621	# How many decoded characters have been used up since the snapshot?
1622	chars_to_skip = self._decoded_chars_used
1623	if chars_to_skip == 0:
1624	# We haven't moved from the snapshot point.
1625	return self._pack_cookie(position, dec_flags)
1626
1627	# Starting from the snapshot position, we will walk the decoder
1628	# forward until it gives us enough decoded characters.
1629	saved_state = decoder.getstate()
1630	try:
1631	# Note our initial start point.
1632	decoder.setstate((b'', dec_flags))
1633	start_pos = position
1634	start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1635	need_eof = 0
1636
1637	# Feed the decoder one byte at a time. As we go, note the
1638	# nearest "safe start point" before the current location
1639	# (a point where the decoder has nothing buffered, so seek()
1640	# can safely start from there and advance to this location).
1641	for next_byte in next_input:
1642	bytes_fed += 1
1643	chars_decoded += len(decoder.decode(next_byte))
1644	dec_buffer, dec_flags = decoder.getstate()
1645	if not dec_buffer and chars_decoded <= chars_to_skip:
1646	# Decoder buffer is empty, so this is a safe start point.
1647	start_pos += bytes_fed
1648	chars_to_skip -= chars_decoded
1649	start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0
1650	if chars_decoded >= chars_to_skip:
1651	break
1652	else:
1653	# We didn't get enough decoded data; signal EOF to get more.
1654	chars_decoded += len(decoder.decode(b'', final=True))
1655	need_eof = 1
1656	if chars_decoded < chars_to_skip:
1657	raise IOError("can't reconstruct logical file position")
1658
1659	# The returned cookie corresponds to the last safe start point.
1660	return self._pack_cookie(
1661	start_pos, start_flags, bytes_fed, need_eof, chars_to_skip)
1662	finally:
1663	decoder.setstate(saved_state)
1664
1665	def truncate(self, pos=None):
1666	self.flush()
1667	if pos is None:
1668	pos = self.tell()
1669	return self.buffer.truncate(pos)
1670
1671	def seek(self, cookie, whence=0):
1672	if self.closed:
1673	raise ValueError("tell on closed file")
1674	if not self._seekable:
1675	raise IOError("underlying stream is not seekable")
1676	if whence == 1: # seek relative to current position
1677	if cookie != 0:
1678	raise IOError("can't do nonzero cur-relative seeks")
1679	# Seeking to the current position should attempt to
1680	# sync the underlying buffer with the current position.
1681	whence = 0
1682	cookie = self.tell()
1683	if whence == 2: # seek relative to end of file
1684	if cookie != 0:
1685	raise IOError("can't do nonzero end-relative seeks")
1686	self.flush()
1687	position = self.buffer.seek(0, 2)
1688	self._set_decoded_chars('')
1689	self._snapshot = None
1690	if self._decoder:
1691	self._decoder.reset()
1692	return position
1693	if whence != 0:
1694	raise ValueError("invalid whence (%r, should be 0, 1 or 2)" %
1695	(whence,))
1696	if cookie < 0:
1697	raise ValueError("negative seek position %r" % (cookie,))
1698	self.flush()
1699
1700	# The strategy of seek() is to go back to the safe start point
1701	# and replay the effect of read(chars_to_skip) from there.
1702	start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \
1703	self._unpack_cookie(cookie)
1704
1705	# Seek back to the safe start point.
1706	self.buffer.seek(start_pos)
1707	self._set_decoded_chars('')
1708	self._snapshot = None
1709
1710	# Restore the decoder to its state from the safe start point.
1711	if self._decoder or dec_flags or chars_to_skip:
1712	self._decoder = self._decoder or self._get_decoder()
1713	self._decoder.setstate((b'', dec_flags))
1714	self._snapshot = (dec_flags, b'')
1715
1716	if chars_to_skip:
1717	# Just like _read_chunk, feed the decoder and save a snapshot.
1718	input_chunk = self.buffer.read(bytes_to_feed)
1719	self._set_decoded_chars(
1720	self._decoder.decode(input_chunk, need_eof))
1721	self._snapshot = (dec_flags, input_chunk)
1722
1723	# Skip chars_to_skip of the decoded characters.
1724	if len(self._decoded_chars) < chars_to_skip:
1725	raise IOError("can't restore logical file position")
1726	self._decoded_chars_used = chars_to_skip
1727
1728	return cookie
1729
1730	def read(self, n=None):
1731	if n is None:
1732	n = -1
1733	decoder = self._decoder or self._get_decoder()
1734	if n < 0:
1735	# Read everything.
1736	result = (self._get_decoded_chars() +
1737	decoder.decode(self.buffer.read(), final=True))
1738	self._set_decoded_chars('')
1739	self._snapshot = None
1740	return result
1741	else:
1742	# Keep reading chunks until we have n characters to return.
1743	eof = False
1744	result = self._get_decoded_chars(n)
1745	while len(result) < n and not eof:
1746	eof = not self._read_chunk()
1747	result += self._get_decoded_chars(n - len(result))
1748	return result
1749
1750	def next(self):
1751	self._telling = False
1752	line = self.readline()
1753	if not line:
1754	self._snapshot = None
1755	self._telling = self._seekable
1756	raise StopIteration
1757	return line
1758
1759	def readline(self, limit=None):
1760	if self.closed:
1761	raise ValueError("read from closed file")
1762	if limit is None:
1763	limit = -1
1764	if not isinstance(limit, (int, long)):
1765	raise TypeError("limit must be an integer")
1766
1767	# Grab all the decoded text (we will rewind any extra bits later).
1768	line = self._get_decoded_chars()
1769
1770	start = 0
1771	decoder = self._decoder or self._get_decoder()
1772
1773	pos = endpos = None
1774	while True:
1775	if self._readtranslate:
1776	# Newlines are already translated, only search for \n
1777	pos = line.find('\n', start)
1778	if pos >= 0:
1779	endpos = pos + 1
1780	break
1781	else:
1782	start = len(line)
1783
1784	elif self._readuniversal:
1785	# Universal newline search. Find any of \r, \r\n, \n
1786	# The decoder ensures that \r\n are not split in two pieces
1787
1788	# In C we'd look for these in parallel of course.
1789	nlpos = line.find("\n", start)
1790	crpos = line.find("\r", start)
1791	if crpos == -1:
1792	if nlpos == -1:
1793	# Nothing found
1794	start = len(line)
1795	else:
1796	# Found \n
1797	endpos = nlpos + 1
1798	break
1799	elif nlpos == -1:
1800	# Found lone \r
1801	endpos = crpos + 1
1802	break
1803	elif nlpos < crpos:
1804	# Found \n
1805	endpos = nlpos + 1
1806	break
1807	elif nlpos == crpos + 1:
1808	# Found \r\n
1809	endpos = crpos + 2
1810	break
1811	else:
1812	# Found \r
1813	endpos = crpos + 1
1814	break
1815	else:
1816	# non-universal
1817	pos = line.find(self._readnl)
1818	if pos >= 0:
1819	endpos = pos + len(self._readnl)
1820	break
1821
1822	if limit >= 0 and len(line) >= limit:
1823	endpos = limit # reached length limit
1824	break
1825
1826	# No line ending seen yet - get more data
1827	more_line = ''
1828	while self._read_chunk():
1829	if self._decoded_chars:
1830	break
1831	if self._decoded_chars:
1832	line += self._get_decoded_chars()
1833	else:
1834	# end of file
1835	self._set_decoded_chars('')
1836	self._snapshot = None
1837	return line
1838
1839	if limit >= 0 and endpos > limit:
1840	endpos = limit # don't exceed limit
1841
1842	# Rewind _decoded_chars to just after the line ending we found.
1843	self._rewind_decoded_chars(len(line) - endpos)
1844	return line[:endpos]
1845
1846	@property
1847	def newlines(self):
1848	return self._decoder.newlines if self._decoder else None
1849
1850	class StringIO(TextIOWrapper):
1851
1852	"""An in-memory stream for text. The initial_value argument sets the
1853	value of object. The other arguments are like those of TextIOWrapper's
1854	constructor.
1855	"""
1856
1857	def __init__(self, initial_value="", encoding="utf-8",
1858	errors="strict", newline="\n"):
1859	super(StringIO, self).__init__(BytesIO(),
1860	encoding=encoding,
1861	errors=errors,
1862	newline=newline)
1863	# Issue #5645: make universal newlines semantics the same as in the
1864	# C version, even under Windows.
1865	if newline is None:
1866	self._writetranslate = False
1867	if initial_value:
1868	if not isinstance(initial_value, unicode):
1869	initial_value = unicode(initial_value)
1870	self.write(initial_value)
1871	self.seek(0)
1872
1873	def getvalue(self):
1874	self.flush()
1875	return self.buffer.getvalue().decode(self._encoding, self._errors)

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/io.py@ 389

Download in other formats: