Changeset 388 for python/vendor/current/Lib/io.py
- Timestamp:
- Mar 19, 2014, 11:11:30 AM (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
python/vendor/current/Lib/io.py
r2 r388 1 """ 2 The io module provides the Python interfaces to stream handling. The 1 """The io module provides the Python interfaces to stream handling. The 3 2 builtin open function is defined in this module. 4 3 … … 6 5 defines the basic interface to a stream. Note, however, that there is no 7 6 separation between reading and writing to streams; implementations are 8 allowed to throwan IOError if they do not support a given operation.7 allowed to raise an IOError if they do not support a given operation. 9 8 10 9 Extending IOBase is RawIOBase which deals simply with the reading and … … 36 35 # New I/O library conforming to PEP 3116. 37 36 38 # This is a prototype; hopefully eventually some of this will be39 # reimplemented in C.40 41 # XXX edge cases when switching between reading/writing42 # XXX need to support 1 meaning line-buffered43 # XXX whenever an argument is None, use the default value44 # XXX read/write ops should check readable/writable45 # XXX buffered readinto should work with arbitrary buffer objects46 # XXX use incremental encoder for text output, at least for UTF-16 and UTF-8-SIG47 # XXX check writable, readable and seekable in appropriate places48 from __future__ import print_function49 from __future__ import unicode_literals50 51 37 __author__ = ("Guido van Rossum <guido@python.org>, " 52 38 "Mike Verdone <mike.verdone@gmail.com>, " 53 "Mark Russell <mark.russell@zen.co.uk>") 39 "Mark Russell <mark.russell@zen.co.uk>, " 40 "Antoine Pitrou <solipsis@pitrou.net>, " 41 "Amaury Forgeot d'Arc <amauryfa@gmail.com>, " 42 "Benjamin Peterson <benjamin@python.org>") 54 43 55 44 __all__ = ["BlockingIOError", "open", "IOBase", "RawIOBase", "FileIO", 56 45 "BytesIO", "StringIO", "BufferedIOBase", 57 46 "BufferedReader", "BufferedWriter", "BufferedRWPair", 58 "BufferedRandom", "TextIOBase", "TextIOWrapper"] 59 60 import os 61 import abc 62 import codecs 63 import _fileio 64 import threading 65 66 # open() uses st_blksize whenever we can 67 DEFAULT_BUFFER_SIZE = 8 * 1024 # bytes 68 69 # py3k has only new style classes 70 __metaclass__ = type 71 72 class BlockingIOError(IOError): 73 74 """Exception raised when I/O would block on a non-blocking I/O stream.""" 75 76 def __init__(self, errno, strerror, characters_written=0): 77 IOError.__init__(self, errno, strerror) 78 self.characters_written = characters_written 47 "BufferedRandom", "TextIOBase", "TextIOWrapper", 48 "UnsupportedOperation", "SEEK_SET", "SEEK_CUR", "SEEK_END"] 79 49 80 50 81 def open(file, mode="r", buffering=None, encoding=None, errors=None, 82 newline=None, closefd=True): 83 r"""Open file and return a stream. If the file cannot be opened, an IOError is 84 raised. 51 import _io 52 import abc 85 53 86 file is either a string giving the name (and the path if the file 87 isn't in the current working directory) of the file to be opened or an 88 integer file descriptor of the file to be wrapped. (If a file 89 descriptor is given, it is closed when the returned I/O object is 90 closed, unless closefd is set to False.) 54 from _io import (DEFAULT_BUFFER_SIZE, BlockingIOError, UnsupportedOperation, 55 open, FileIO, BytesIO, StringIO, BufferedReader, 56 BufferedWriter, BufferedRWPair, BufferedRandom, 57 IncrementalNewlineDecoder, TextIOWrapper) 91 58 92 mode is an optional string that specifies the mode in which the file 93 is opened. It defaults to 'r' which means open for reading in text 94 mode. Other common values are 'w' for writing (truncating the file if 95 it already exists), and 'a' for appending (which on some Unix systems, 96 means that all writes append to the end of the file regardless of the 97 current seek position). In text mode, if encoding is not specified the 98 encoding used is platform dependent. (For reading and writing raw 99 bytes use binary mode and leave encoding unspecified.) The available 100 modes are: 59 OpenWrapper = _io.open # for compatibility with _pyio 101 60 102 ========= =============================================================== 103 Character Meaning 104 --------- --------------------------------------------------------------- 105 'r' open for reading (default) 106 'w' open for writing, truncating the file first 107 'a' open for writing, appending to the end of the file if it exists 108 'b' binary mode 109 't' text mode (default) 110 '+' open a disk file for updating (reading and writing) 111 'U' universal newline mode (for backwards compatibility; unneeded 112 for new code) 113 ========= =============================================================== 61 # for seek() 62 SEEK_SET = 0 63 SEEK_CUR = 1 64 SEEK_END = 2 114 65 115 The default mode is 'rt' (open for reading text). For binary random 116 access, the mode 'w+b' opens and truncates the file to 0 bytes, while 117 'r+b' opens the file without truncation. 66 # Declaring ABCs in C is tricky so we do it here. 67 # Method descriptions and default implementations are inherited from the C 68 # version however. 69 class IOBase(_io._IOBase): 70 __metaclass__ = abc.ABCMeta 118 71 119 Python distinguishes between files opened in binary and text modes, 120 even when the underlying operating system doesn't. Files opened in 121 binary mode (appending 'b' to the mode argument) return contents as 122 bytes objects without any decoding. In text mode (the default, or when 123 't' is appended to the mode argument), the contents of the file are 124 returned as strings, the bytes having been first decoded using a 125 platform-dependent encoding or using the specified encoding if given. 126 127 buffering is an optional integer used to set the buffering policy. 128 Pass 0 to switch buffering off (only allowed in binary mode), 1 to select 129 line buffering (only usable in text mode), and an integer > 1 to indicate 130 the size of a fixed-size chunk buffer. When no buffering argument is 131 given, the default buffering policy works as follows: 132 133 * Binary files are buffered in fixed-size chunks; the size of the buffer 134 is chosen using a heuristic trying to determine the underlying device's 135 "block size" and falling back on `io.DEFAULT_BUFFER_SIZE`. 136 On many systems, the buffer will typically be 4096 or 8192 bytes long. 137 138 * "Interactive" text files (files for which isatty() returns True) 139 use line buffering. Other text files use the policy described above 140 for binary files. 141 142 encoding is the name of the encoding used to decode or encode the 143 file. This should only be used in text mode. The default encoding is 144 platform dependent, but any encoding supported by Python can be 145 passed. See the codecs module for the list of supported encodings. 146 147 errors is an optional string that specifies how encoding errors are to 148 be handled---this argument should not be used in binary mode. Pass 149 'strict' to raise a ValueError exception if there is an encoding error 150 (the default of None has the same effect), or pass 'ignore' to ignore 151 errors. (Note that ignoring encoding errors can lead to data loss.) 152 See the documentation for codecs.register for a list of the permitted 153 encoding error strings. 154 155 newline controls how universal newlines works (it only applies to text 156 mode). It can be None, '', '\n', '\r', and '\r\n'. It works as 157 follows: 158 159 * On input, if newline is None, universal newlines mode is 160 enabled. Lines in the input can end in '\n', '\r', or '\r\n', and 161 these are translated into '\n' before being returned to the 162 caller. If it is '', universal newline mode is enabled, but line 163 endings are returned to the caller untranslated. If it has any of 164 the other legal values, input lines are only terminated by the given 165 string, and the line ending is returned to the caller untranslated. 166 167 * On output, if newline is None, any '\n' characters written are 168 translated to the system default line separator, os.linesep. If 169 newline is '', no translation takes place. If newline is any of the 170 other legal values, any '\n' characters written are translated to 171 the given string. 172 173 If closefd is False, the underlying file descriptor will be kept open 174 when the file is closed. This does not work when a file name is given 175 and must be True in that case. 176 177 open() returns a file object whose type depends on the mode, and 178 through which the standard file operations such as reading and writing 179 are performed. When open() is used to open a file in a text mode ('w', 180 'r', 'wt', 'rt', etc.), it returns a TextIOWrapper. When used to open 181 a file in a binary mode, the returned class varies: in read binary 182 mode, it returns a BufferedReader; in write binary and append binary 183 modes, it returns a BufferedWriter, and in read/write mode, it returns 184 a BufferedRandom. 185 186 It is also possible to use a string or bytearray as a file for both 187 reading and writing. For strings StringIO can be used like a file 188 opened in a text mode, and for bytes a BytesIO can be used like a file 189 opened in a binary mode. 190 """ 191 if not isinstance(file, (basestring, int)): 192 raise TypeError("invalid file: %r" % file) 193 if not isinstance(mode, basestring): 194 raise TypeError("invalid mode: %r" % mode) 195 if buffering is not None and not isinstance(buffering, int): 196 raise TypeError("invalid buffering: %r" % buffering) 197 if encoding is not None and not isinstance(encoding, basestring): 198 raise TypeError("invalid encoding: %r" % encoding) 199 if errors is not None and not isinstance(errors, basestring): 200 raise TypeError("invalid errors: %r" % errors) 201 modes = set(mode) 202 if modes - set("arwb+tU") or len(mode) > len(modes): 203 raise ValueError("invalid mode: %r" % mode) 204 reading = "r" in modes 205 writing = "w" in modes 206 appending = "a" in modes 207 updating = "+" in modes 208 text = "t" in modes 209 binary = "b" in modes 210 if "U" in modes: 211 if writing or appending: 212 raise ValueError("can't use U and writing mode at once") 213 reading = True 214 if text and binary: 215 raise ValueError("can't have text and binary mode at once") 216 if reading + writing + appending > 1: 217 raise ValueError("can't have read/write/append mode at once") 218 if not (reading or writing or appending): 219 raise ValueError("must have exactly one of read/write/append mode") 220 if binary and encoding is not None: 221 raise ValueError("binary mode doesn't take an encoding argument") 222 if binary and errors is not None: 223 raise ValueError("binary mode doesn't take an errors argument") 224 if binary and newline is not None: 225 raise ValueError("binary mode doesn't take a newline argument") 226 raw = FileIO(file, 227 (reading and "r" or "") + 228 (writing and "w" or "") + 229 (appending and "a" or "") + 230 (updating and "+" or ""), 231 closefd) 232 if buffering is None: 233 buffering = -1 234 line_buffering = False 235 if buffering == 1 or buffering < 0 and raw.isatty(): 236 buffering = -1 237 line_buffering = True 238 if buffering < 0: 239 buffering = DEFAULT_BUFFER_SIZE 240 try: 241 bs = os.fstat(raw.fileno()).st_blksize 242 except (os.error, AttributeError): 243 pass 244 else: 245 if bs > 1: 246 buffering = bs 247 if buffering < 0: 248 raise ValueError("invalid buffering size") 249 if buffering == 0: 250 if binary: 251 return raw 252 raise ValueError("can't have unbuffered text I/O") 253 if updating: 254 buffer = BufferedRandom(raw, buffering) 255 elif writing or appending: 256 buffer = BufferedWriter(raw, buffering) 257 elif reading: 258 buffer = BufferedReader(raw, buffering) 259 else: 260 raise ValueError("unknown mode: %r" % mode) 261 if binary: 262 return buffer 263 text = TextIOWrapper(buffer, encoding, errors, newline, line_buffering) 264 text.mode = mode 265 return text 266 267 class _DocDescriptor: 268 """Helper for builtins.open.__doc__ 269 """ 270 def __get__(self, obj, typ): 271 return ( 272 "open(file, mode='r', buffering=None, encoding=None, " 273 "errors=None, newline=None, closefd=True)\n\n" + 274 open.__doc__) 275 276 class OpenWrapper: 277 """Wrapper for builtins.open 278 279 Trick so that open won't become a bound method when stored 280 as a class variable (as dumbdbm does). 281 282 See initstdio() in Python/pythonrun.c. 283 """ 284 __doc__ = _DocDescriptor() 285 286 def __new__(cls, *args, **kwargs): 287 return open(*args, **kwargs) 288 289 290 class UnsupportedOperation(ValueError, IOError): 72 class RawIOBase(_io._RawIOBase, IOBase): 291 73 pass 292 74 75 class BufferedIOBase(_io._BufferedIOBase, IOBase): 76 pass 293 77 294 class IOBase(object): 78 class TextIOBase(_io._TextIOBase, IOBase): 79 pass 295 80 296 """The abstract base class for all I/O classes, acting on streams of 297 bytes. There is no public constructor. 81 RawIOBase.register(FileIO) 298 82 299 This class provides dummy implementations for many methods that 300 derived classes can override selectively; the default implementations301 represent a file that cannot be read, written or seeked.83 for klass in (BytesIO, BufferedReader, BufferedWriter, BufferedRandom, 84 BufferedRWPair): 85 BufferedIOBase.register(klass) 302 86 303 Even though IOBase does not declare read, readinto, or write because 304 their signatures will vary, implementations and clients should 305 consider those methods part of the interface. Also, implementations 306 may raise a IOError when operations they do not support are called. 307 308 The basic type used for binary data read from or written to a file is 309 bytes. bytearrays are accepted too, and in some cases (such as 310 readinto) needed. Text I/O classes work with str data. 311 312 Note that calling any method (even inquiries) on a closed stream is 313 undefined. Implementations may raise IOError in this case. 314 315 IOBase (and its subclasses) support the iterator protocol, meaning 316 that an IOBase object can be iterated over yielding the lines in a 317 stream. 318 319 IOBase also supports the :keyword:`with` statement. In this example, 320 fp is closed after the suite of the with statment is complete: 321 322 with open('spam.txt', 'r') as fp: 323 fp.write('Spam and eggs!') 324 """ 325 326 __metaclass__ = abc.ABCMeta 327 328 ### Internal ### 329 330 def _unsupported(self, name): 331 """Internal: raise an exception for unsupported operations.""" 332 raise UnsupportedOperation("%s.%s() not supported" % 333 (self.__class__.__name__, name)) 334 335 ### Positioning ### 336 337 def seek(self, pos, whence = 0): 338 """Change stream position. 339 340 Change the stream position to byte offset offset. offset is 341 interpreted relative to the position indicated by whence. Values 342 for whence are: 343 344 * 0 -- start of stream (the default); offset should be zero or positive 345 * 1 -- current stream position; offset may be negative 346 * 2 -- end of stream; offset is usually negative 347 348 Return the new absolute position. 349 """ 350 self._unsupported("seek") 351 352 def tell(self): 353 """Return current stream position.""" 354 return self.seek(0, 1) 355 356 def truncate(self, pos = None): 357 """Truncate file to size bytes. 358 359 Size defaults to the current IO position as reported by tell(). Return 360 the new size. 361 """ 362 self._unsupported("truncate") 363 364 ### Flush and close ### 365 366 def flush(self): 367 """Flush write buffers, if applicable. 368 369 This is not implemented for read-only and non-blocking streams. 370 """ 371 # XXX Should this return the number of bytes written??? 372 373 __closed = False 374 375 def close(self): 376 """Flush and close the IO object. 377 378 This method has no effect if the file is already closed. 379 """ 380 if not self.__closed: 381 try: 382 self.flush() 383 except IOError: 384 pass # If flush() fails, just give up 385 self.__closed = True 386 387 def __del__(self): 388 """Destructor. Calls close().""" 389 # The try/except block is in case this is called at program 390 # exit time, when it's possible that globals have already been 391 # deleted, and then the close() call might fail. Since 392 # there's nothing we can do about such failures and they annoy 393 # the end users, we suppress the traceback. 394 try: 395 self.close() 396 except: 397 pass 398 399 ### Inquiries ### 400 401 def seekable(self): 402 """Return whether object supports random access. 403 404 If False, seek(), tell() and truncate() will raise IOError. 405 This method may need to do a test seek(). 406 """ 407 return False 408 409 def _checkSeekable(self, msg=None): 410 """Internal: raise an IOError if file is not seekable 411 """ 412 if not self.seekable(): 413 raise IOError("File or stream is not seekable." 414 if msg is None else msg) 415 416 417 def readable(self): 418 """Return whether object was opened for reading. 419 420 If False, read() will raise IOError. 421 """ 422 return False 423 424 def _checkReadable(self, msg=None): 425 """Internal: raise an IOError if file is not readable 426 """ 427 if not self.readable(): 428 raise IOError("File or stream is not readable." 429 if msg is None else msg) 430 431 def writable(self): 432 """Return whether object was opened for writing. 433 434 If False, write() and truncate() will raise IOError. 435 """ 436 return False 437 438 def _checkWritable(self, msg=None): 439 """Internal: raise an IOError if file is not writable 440 """ 441 if not self.writable(): 442 raise IOError("File or stream is not writable." 443 if msg is None else msg) 444 445 @property 446 def closed(self): 447 """closed: bool. True iff the file has been closed. 448 449 For backwards compatibility, this is a property, not a predicate. 450 """ 451 return self.__closed 452 453 def _checkClosed(self, msg=None): 454 """Internal: raise an ValueError if file is closed 455 """ 456 if self.closed: 457 raise ValueError("I/O operation on closed file." 458 if msg is None else msg) 459 460 ### Context manager ### 461 462 def __enter__(self): 463 """Context management protocol. Returns self.""" 464 self._checkClosed() 465 return self 466 467 def __exit__(self, *args): 468 """Context management protocol. Calls close()""" 469 self.close() 470 471 ### Lower-level APIs ### 472 473 # XXX Should these be present even if unimplemented? 474 475 def fileno(self): 476 """Returns underlying file descriptor if one exists. 477 478 An IOError is raised if the IO object does not use a file descriptor. 479 """ 480 self._unsupported("fileno") 481 482 def isatty(self): 483 """Return whether this is an 'interactive' stream. 484 485 Return False if it can't be determined. 486 """ 487 self._checkClosed() 488 return False 489 490 ### Readline[s] and writelines ### 491 492 def readline(self, limit = -1): 493 r"""Read and return a line from the stream. 494 495 If limit is specified, at most limit bytes will be read. 496 497 The line terminator is always b'\n' for binary files; for text 498 files, the newlines argument to open can be used to select the line 499 terminator(s) recognized. 500 """ 501 self._checkClosed() 502 if hasattr(self, "peek"): 503 def nreadahead(): 504 readahead = self.peek(1) 505 if not readahead: 506 return 1 507 n = (readahead.find(b"\n") + 1) or len(readahead) 508 if limit >= 0: 509 n = min(n, limit) 510 return n 511 else: 512 def nreadahead(): 513 return 1 514 if limit is None: 515 limit = -1 516 if not isinstance(limit, (int, long)): 517 raise TypeError("limit must be an integer") 518 res = bytearray() 519 while limit < 0 or len(res) < limit: 520 b = self.read(nreadahead()) 521 if not b: 522 break 523 res += b 524 if res.endswith(b"\n"): 525 break 526 return bytes(res) 527 528 def __iter__(self): 529 self._checkClosed() 530 return self 531 532 def next(self): 533 line = self.readline() 534 if not line: 535 raise StopIteration 536 return line 537 538 def readlines(self, hint=None): 539 """Return a list of lines from the stream. 540 541 hint can be specified to control the number of lines read: no more 542 lines will be read if the total size (in bytes/characters) of all 543 lines so far exceeds hint. 544 """ 545 if hint is None: 546 hint = -1 547 if not isinstance(hint, (int, long)): 548 raise TypeError("hint must be an integer") 549 if hint <= 0: 550 return list(self) 551 n = 0 552 lines = [] 553 for line in self: 554 lines.append(line) 555 n += len(line) 556 if n >= hint: 557 break 558 return lines 559 560 def writelines(self, lines): 561 self._checkClosed() 562 for line in lines: 563 self.write(line) 564 565 566 class RawIOBase(IOBase): 567 568 """Base class for raw binary I/O.""" 569 570 # The read() method is implemented by calling readinto(); derived 571 # classes that want to support read() only need to implement 572 # readinto() as a primitive operation. In general, readinto() can be 573 # more efficient than read(). 574 575 # (It would be tempting to also provide an implementation of 576 # readinto() in terms of read(), in case the latter is a more suitable 577 # primitive operation, but that would lead to nasty recursion in case 578 # a subclass doesn't implement either.) 579 580 def read(self, n = -1): 581 """Read and return up to n bytes. 582 583 Returns an empty bytes array on EOF, or None if the object is 584 set not to block and has no data to read. 585 """ 586 if n is None: 587 n = -1 588 if n < 0: 589 return self.readall() 590 b = bytearray(n.__index__()) 591 n = self.readinto(b) 592 del b[n:] 593 return bytes(b) 594 595 def readall(self): 596 """Read until EOF, using multiple read() call.""" 597 res = bytearray() 598 while True: 599 data = self.read(DEFAULT_BUFFER_SIZE) 600 if not data: 601 break 602 res += data 603 return bytes(res) 604 605 def readinto(self, b): 606 """Read up to len(b) bytes into b. 607 608 Returns number of bytes read (0 for EOF), or None if the object 609 is set not to block as has no data to read. 610 """ 611 self._unsupported("readinto") 612 613 def write(self, b): 614 """Write the given buffer to the IO stream. 615 616 Returns the number of bytes written, which may be less than len(b). 617 """ 618 self._unsupported("write") 619 620 621 class FileIO(_fileio._FileIO, RawIOBase): 622 623 """Raw I/O implementation for OS files.""" 624 625 # This multiply inherits from _FileIO and RawIOBase to make 626 # isinstance(io.FileIO(), io.RawIOBase) return True without requiring 627 # that _fileio._FileIO inherits from io.RawIOBase (which would be hard 628 # to do since _fileio.c is written in C). 629 630 def __init__(self, name, mode="r", closefd=True): 631 _fileio._FileIO.__init__(self, name, mode, closefd) 632 self._name = name 633 634 def close(self): 635 _fileio._FileIO.close(self) 636 RawIOBase.close(self) 637 638 @property 639 def name(self): 640 return self._name 641 642 643 class BufferedIOBase(IOBase): 644 645 """Base class for buffered IO objects. 646 647 The main difference with RawIOBase is that the read() method 648 supports omitting the size argument, and does not have a default 649 implementation that defers to readinto(). 650 651 In addition, read(), readinto() and write() may raise 652 BlockingIOError if the underlying raw stream is in non-blocking 653 mode and not ready; unlike their raw counterparts, they will never 654 return None. 655 656 A typical implementation should not inherit from a RawIOBase 657 implementation, but wrap one. 658 """ 659 660 def read(self, n = None): 661 """Read and return up to n bytes. 662 663 If the argument is omitted, None, or negative, reads and 664 returns all data until EOF. 665 666 If the argument is positive, and the underlying raw stream is 667 not 'interactive', multiple raw reads may be issued to satisfy 668 the byte count (unless EOF is reached first). But for 669 interactive raw streams (XXX and for pipes?), at most one raw 670 read will be issued, and a short result does not imply that 671 EOF is imminent. 672 673 Returns an empty bytes array on EOF. 674 675 Raises BlockingIOError if the underlying raw stream has no 676 data at the moment. 677 """ 678 self._unsupported("read") 679 680 def readinto(self, b): 681 """Read up to len(b) bytes into b. 682 683 Like read(), this may issue multiple reads to the underlying raw 684 stream, unless the latter is 'interactive'. 685 686 Returns the number of bytes read (0 for EOF). 687 688 Raises BlockingIOError if the underlying raw stream has no 689 data at the moment. 690 """ 691 # XXX This ought to work with anything that supports the buffer API 692 data = self.read(len(b)) 693 n = len(data) 694 try: 695 b[:n] = data 696 except TypeError as err: 697 import array 698 if not isinstance(b, array.array): 699 raise err 700 b[:n] = array.array(b'b', data) 701 return n 702 703 def write(self, b): 704 """Write the given buffer to the IO stream. 705 706 Return the number of bytes written, which is never less than 707 len(b). 708 709 Raises BlockingIOError if the buffer is full and the 710 underlying raw stream cannot accept more data at the moment. 711 """ 712 self._unsupported("write") 713 714 715 class _BufferedIOMixin(BufferedIOBase): 716 717 """A mixin implementation of BufferedIOBase with an underlying raw stream. 718 719 This passes most requests on to the underlying raw stream. It 720 does *not* provide implementations of read(), readinto() or 721 write(). 722 """ 723 724 def __init__(self, raw): 725 self.raw = raw 726 727 ### Positioning ### 728 729 def seek(self, pos, whence=0): 730 return self.raw.seek(pos, whence) 731 732 def tell(self): 733 return self.raw.tell() 734 735 def truncate(self, pos=None): 736 # Flush the stream. We're mixing buffered I/O with lower-level I/O, 737 # and a flush may be necessary to synch both views of the current 738 # file state. 739 self.flush() 740 741 if pos is None: 742 pos = self.tell() 743 # XXX: Should seek() be used, instead of passing the position 744 # XXX directly to truncate? 745 return self.raw.truncate(pos) 746 747 ### Flush and close ### 748 749 def flush(self): 750 self.raw.flush() 751 752 def close(self): 753 if not self.closed: 754 try: 755 self.flush() 756 except IOError: 757 pass # If flush() fails, just give up 758 self.raw.close() 759 760 ### Inquiries ### 761 762 def seekable(self): 763 return self.raw.seekable() 764 765 def readable(self): 766 return self.raw.readable() 767 768 def writable(self): 769 return self.raw.writable() 770 771 @property 772 def closed(self): 773 return self.raw.closed 774 775 @property 776 def name(self): 777 return self.raw.name 778 779 @property 780 def mode(self): 781 return self.raw.mode 782 783 ### Lower-level APIs ### 784 785 def fileno(self): 786 return self.raw.fileno() 787 788 def isatty(self): 789 return self.raw.isatty() 790 791 792 class _BytesIO(BufferedIOBase): 793 794 """Buffered I/O implementation using an in-memory bytes buffer.""" 795 796 # XXX More docs 797 798 def __init__(self, initial_bytes=None): 799 buf = bytearray() 800 if initial_bytes is not None: 801 buf += bytearray(initial_bytes) 802 self._buffer = buf 803 self._pos = 0 804 805 def getvalue(self): 806 """Return the bytes value (contents) of the buffer 807 """ 808 if self.closed: 809 raise ValueError("getvalue on closed file") 810 return bytes(self._buffer) 811 812 def read(self, n=None): 813 if self.closed: 814 raise ValueError("read from closed file") 815 if n is None: 816 n = -1 817 if not isinstance(n, (int, long)): 818 raise TypeError("argument must be an integer") 819 if n < 0: 820 n = len(self._buffer) 821 if len(self._buffer) <= self._pos: 822 return b"" 823 newpos = min(len(self._buffer), self._pos + n) 824 b = self._buffer[self._pos : newpos] 825 self._pos = newpos 826 return bytes(b) 827 828 def read1(self, n): 829 """this is the same as read. 830 """ 831 return self.read(n) 832 833 def write(self, b): 834 if self.closed: 835 raise ValueError("write to closed file") 836 if isinstance(b, unicode): 837 raise TypeError("can't write unicode to binary stream") 838 n = len(b) 839 if n == 0: 840 return 0 841 pos = self._pos 842 if pos > len(self._buffer): 843 # Inserts null bytes between the current end of the file 844 # and the new write position. 845 padding = b'\x00' * (pos - len(self._buffer)) 846 self._buffer += padding 847 self._buffer[pos:pos + n] = b 848 self._pos += n 849 return n 850 851 def seek(self, pos, whence=0): 852 if self.closed: 853 raise ValueError("seek on closed file") 854 try: 855 pos = pos.__index__() 856 except AttributeError as err: 857 raise TypeError("an integer is required") # from err 858 if whence == 0: 859 if pos < 0: 860 raise ValueError("negative seek position %r" % (pos,)) 861 self._pos = pos 862 elif whence == 1: 863 self._pos = max(0, self._pos + pos) 864 elif whence == 2: 865 self._pos = max(0, len(self._buffer) + pos) 866 else: 867 raise ValueError("invalid whence value") 868 return self._pos 869 870 def tell(self): 871 if self.closed: 872 raise ValueError("tell on closed file") 873 return self._pos 874 875 def truncate(self, pos=None): 876 if self.closed: 877 raise ValueError("truncate on closed file") 878 if pos is None: 879 pos = self._pos 880 elif pos < 0: 881 raise ValueError("negative truncate position %r" % (pos,)) 882 del self._buffer[pos:] 883 return pos 884 885 def readable(self): 886 return True 887 888 def writable(self): 889 return True 890 891 def seekable(self): 892 return True 893 894 # Use the faster implementation of BytesIO if available 895 try: 896 import _bytesio 897 898 class BytesIO(_bytesio._BytesIO, BufferedIOBase): 899 __doc__ = _bytesio._BytesIO.__doc__ 900 901 except ImportError: 902 BytesIO = _BytesIO 903 904 905 class BufferedReader(_BufferedIOMixin): 906 907 """BufferedReader(raw[, buffer_size]) 908 909 A buffer for a readable, sequential BaseRawIO object. 910 911 The constructor creates a BufferedReader for the given readable raw 912 stream and buffer_size. If buffer_size is omitted, DEFAULT_BUFFER_SIZE 913 is used. 914 """ 915 916 def __init__(self, raw, buffer_size=DEFAULT_BUFFER_SIZE): 917 """Create a new buffered reader using the given readable raw IO object. 918 """ 919 raw._checkReadable() 920 _BufferedIOMixin.__init__(self, raw) 921 self.buffer_size = buffer_size 922 self._reset_read_buf() 923 self._read_lock = threading.Lock() 924 925 def _reset_read_buf(self): 926 self._read_buf = b"" 927 self._read_pos = 0 928 929 def read(self, n=None): 930 """Read n bytes. 931 932 Returns exactly n bytes of data unless the underlying raw IO 933 stream reaches EOF or if the call would block in non-blocking 934 mode. If n is negative, read until EOF or until read() would 935 block. 936 """ 937 with self._read_lock: 938 return self._read_unlocked(n) 939 940 def _read_unlocked(self, n=None): 941 nodata_val = b"" 942 empty_values = (b"", None) 943 buf = self._read_buf 944 pos = self._read_pos 945 946 # Special case for when the number of bytes to read is unspecified. 947 if n is None or n == -1: 948 self._reset_read_buf() 949 chunks = [buf[pos:]] # Strip the consumed bytes. 950 current_size = 0 951 while True: 952 # Read until EOF or until read() would block. 953 chunk = self.raw.read() 954 if chunk in empty_values: 955 nodata_val = chunk 956 break 957 current_size += len(chunk) 958 chunks.append(chunk) 959 return b"".join(chunks) or nodata_val 960 961 # The number of bytes to read is specified, return at most n bytes. 962 avail = len(buf) - pos # Length of the available buffered data. 963 if n <= avail: 964 # Fast path: the data to read is fully buffered. 965 self._read_pos += n 966 return buf[pos:pos+n] 967 # Slow path: read from the stream until enough bytes are read, 968 # or until an EOF occurs or until read() would block. 969 chunks = [buf[pos:]] 970 wanted = max(self.buffer_size, n) 971 while avail < n: 972 chunk = self.raw.read(wanted) 973 if chunk in empty_values: 974 nodata_val = chunk 975 break 976 avail += len(chunk) 977 chunks.append(chunk) 978 # n is more then avail only when an EOF occurred or when 979 # read() would have blocked. 980 n = min(n, avail) 981 out = b"".join(chunks) 982 self._read_buf = out[n:] # Save the extra data in the buffer. 983 self._read_pos = 0 984 return out[:n] if out else nodata_val 985 986 def peek(self, n=0): 987 """Returns buffered bytes without advancing the position. 988 989 The argument indicates a desired minimal number of bytes; we 990 do at most one raw read to satisfy it. We never return more 991 than self.buffer_size. 992 """ 993 with self._read_lock: 994 return self._peek_unlocked(n) 995 996 def _peek_unlocked(self, n=0): 997 want = min(n, self.buffer_size) 998 have = len(self._read_buf) - self._read_pos 999 if have < want: 1000 to_read = self.buffer_size - have 1001 current = self.raw.read(to_read) 1002 if current: 1003 self._read_buf = self._read_buf[self._read_pos:] + current 1004 self._read_pos = 0 1005 return self._read_buf[self._read_pos:] 1006 1007 def read1(self, n): 1008 """Reads up to n bytes, with at most one read() system call.""" 1009 # Returns up to n bytes. If at least one byte is buffered, we 1010 # only return buffered bytes. Otherwise, we do one raw read. 1011 if n <= 0: 1012 return b"" 1013 with self._read_lock: 1014 self._peek_unlocked(1) 1015 return self._read_unlocked( 1016 min(n, len(self._read_buf) - self._read_pos)) 1017 1018 def tell(self): 1019 return self.raw.tell() - len(self._read_buf) + self._read_pos 1020 1021 def seek(self, pos, whence=0): 1022 with self._read_lock: 1023 if whence == 1: 1024 pos -= len(self._read_buf) - self._read_pos 1025 pos = self.raw.seek(pos, whence) 1026 self._reset_read_buf() 1027 return pos 1028 1029 1030 class BufferedWriter(_BufferedIOMixin): 1031 1032 """A buffer for a writeable sequential RawIO object. 1033 1034 The constructor creates a BufferedWriter for the given writeable raw 1035 stream. If the buffer_size is not given, it defaults to 1036 DEAFULT_BUFFER_SIZE. If max_buffer_size is omitted, it defaults to 1037 twice the buffer size. 1038 """ 1039 1040 def __init__(self, raw, 1041 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): 1042 raw._checkWritable() 1043 _BufferedIOMixin.__init__(self, raw) 1044 self.buffer_size = buffer_size 1045 self.max_buffer_size = (2*buffer_size 1046 if max_buffer_size is None 1047 else max_buffer_size) 1048 self._write_buf = bytearray() 1049 self._write_lock = threading.Lock() 1050 1051 def write(self, b): 1052 if self.closed: 1053 raise ValueError("write to closed file") 1054 if isinstance(b, unicode): 1055 raise TypeError("can't write unicode to binary stream") 1056 with self._write_lock: 1057 # XXX we can implement some more tricks to try and avoid 1058 # partial writes 1059 if len(self._write_buf) > self.buffer_size: 1060 # We're full, so let's pre-flush the buffer 1061 try: 1062 self._flush_unlocked() 1063 except BlockingIOError as e: 1064 # We can't accept anything else. 1065 # XXX Why not just let the exception pass through? 1066 raise BlockingIOError(e.errno, e.strerror, 0) 1067 before = len(self._write_buf) 1068 self._write_buf.extend(b) 1069 written = len(self._write_buf) - before 1070 if len(self._write_buf) > self.buffer_size: 1071 try: 1072 self._flush_unlocked() 1073 except BlockingIOError as e: 1074 if len(self._write_buf) > self.max_buffer_size: 1075 # We've hit max_buffer_size. We have to accept a 1076 # partial write and cut back our buffer. 1077 overage = len(self._write_buf) - self.max_buffer_size 1078 self._write_buf = self._write_buf[:self.max_buffer_size] 1079 raise BlockingIOError(e.errno, e.strerror, overage) 1080 return written 1081 1082 def truncate(self, pos=None): 1083 with self._write_lock: 1084 self._flush_unlocked() 1085 if pos is None: 1086 pos = self.raw.tell() 1087 return self.raw.truncate(pos) 1088 1089 def flush(self): 1090 with self._write_lock: 1091 self._flush_unlocked() 1092 1093 def _flush_unlocked(self): 1094 if self.closed: 1095 raise ValueError("flush of closed file") 1096 written = 0 1097 try: 1098 while self._write_buf: 1099 n = self.raw.write(self._write_buf) 1100 del self._write_buf[:n] 1101 written += n 1102 except BlockingIOError as e: 1103 n = e.characters_written 1104 del self._write_buf[:n] 1105 written += n 1106 raise BlockingIOError(e.errno, e.strerror, written) 1107 1108 def tell(self): 1109 return self.raw.tell() + len(self._write_buf) 1110 1111 def seek(self, pos, whence=0): 1112 with self._write_lock: 1113 self._flush_unlocked() 1114 return self.raw.seek(pos, whence) 1115 1116 1117 class BufferedRWPair(BufferedIOBase): 1118 1119 """A buffered reader and writer object together. 1120 1121 A buffered reader object and buffered writer object put together to 1122 form a sequential IO object that can read and write. This is typically 1123 used with a socket or two-way pipe. 1124 1125 reader and writer are RawIOBase objects that are readable and 1126 writeable respectively. If the buffer_size is omitted it defaults to 1127 DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered writer) 1128 defaults to twice the buffer size. 1129 """ 1130 1131 # XXX The usefulness of this (compared to having two separate IO 1132 # objects) is questionable. 1133 1134 def __init__(self, reader, writer, 1135 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): 1136 """Constructor. 1137 1138 The arguments are two RawIO instances. 1139 """ 1140 reader._checkReadable() 1141 writer._checkWritable() 1142 self.reader = BufferedReader(reader, buffer_size) 1143 self.writer = BufferedWriter(writer, buffer_size, max_buffer_size) 1144 1145 def read(self, n=None): 1146 if n is None: 1147 n = -1 1148 return self.reader.read(n) 1149 1150 def readinto(self, b): 1151 return self.reader.readinto(b) 1152 1153 def write(self, b): 1154 return self.writer.write(b) 1155 1156 def peek(self, n=0): 1157 return self.reader.peek(n) 1158 1159 def read1(self, n): 1160 return self.reader.read1(n) 1161 1162 def readable(self): 1163 return self.reader.readable() 1164 1165 def writable(self): 1166 return self.writer.writable() 1167 1168 def flush(self): 1169 return self.writer.flush() 1170 1171 def close(self): 1172 self.writer.close() 1173 self.reader.close() 1174 1175 def isatty(self): 1176 return self.reader.isatty() or self.writer.isatty() 1177 1178 @property 1179 def closed(self): 1180 return self.writer.closed 1181 1182 1183 class BufferedRandom(BufferedWriter, BufferedReader): 1184 1185 """A buffered interface to random access streams. 1186 1187 The constructor creates a reader and writer for a seekable stream, 1188 raw, given in the first argument. If the buffer_size is omitted it 1189 defaults to DEFAULT_BUFFER_SIZE. The max_buffer_size (for the buffered 1190 writer) defaults to twice the buffer size. 1191 """ 1192 1193 def __init__(self, raw, 1194 buffer_size=DEFAULT_BUFFER_SIZE, max_buffer_size=None): 1195 raw._checkSeekable() 1196 BufferedReader.__init__(self, raw, buffer_size) 1197 BufferedWriter.__init__(self, raw, buffer_size, max_buffer_size) 1198 1199 def seek(self, pos, whence=0): 1200 self.flush() 1201 # First do the raw seek, then empty the read buffer, so that 1202 # if the raw seek fails, we don't lose buffered data forever. 1203 pos = self.raw.seek(pos, whence) 1204 with self._read_lock: 1205 self._reset_read_buf() 1206 return pos 1207 1208 def tell(self): 1209 if self._write_buf: 1210 return self.raw.tell() + len(self._write_buf) 1211 else: 1212 return BufferedReader.tell(self) 1213 1214 def truncate(self, pos=None): 1215 if pos is None: 1216 pos = self.tell() 1217 # Use seek to flush the read buffer. 1218 return BufferedWriter.truncate(self, pos) 1219 1220 def read(self, n=None): 1221 if n is None: 1222 n = -1 1223 self.flush() 1224 return BufferedReader.read(self, n) 1225 1226 def readinto(self, b): 1227 self.flush() 1228 return BufferedReader.readinto(self, b) 1229 1230 def peek(self, n=0): 1231 self.flush() 1232 return BufferedReader.peek(self, n) 1233 1234 def read1(self, n): 1235 self.flush() 1236 return BufferedReader.read1(self, n) 1237 1238 def write(self, b): 1239 if self._read_buf: 1240 # Undo readahead 1241 with self._read_lock: 1242 self.raw.seek(self._read_pos - len(self._read_buf), 1) 1243 self._reset_read_buf() 1244 return BufferedWriter.write(self, b) 1245 1246 1247 class TextIOBase(IOBase): 1248 1249 """Base class for text I/O. 1250 1251 This class provides a character and line based interface to stream 1252 I/O. There is no readinto method because Python's character strings 1253 are immutable. There is no public constructor. 1254 """ 1255 1256 def read(self, n = -1): 1257 """Read at most n characters from stream. 1258 1259 Read from underlying buffer until we have n characters or we hit EOF. 1260 If n is negative or omitted, read until EOF. 1261 """ 1262 self._unsupported("read") 1263 1264 def write(self, s): 1265 """Write string s to stream.""" 1266 self._unsupported("write") 1267 1268 def truncate(self, pos = None): 1269 """Truncate size to pos.""" 1270 self._unsupported("truncate") 1271 1272 def readline(self): 1273 """Read until newline or EOF. 1274 1275 Returns an empty string if EOF is hit immediately. 1276 """ 1277 self._unsupported("readline") 1278 1279 @property 1280 def encoding(self): 1281 """Subclasses should override.""" 1282 return None 1283 1284 @property 1285 def newlines(self): 1286 """Line endings translated so far. 1287 1288 Only line endings translated during reading are considered. 1289 1290 Subclasses should override. 1291 """ 1292 return None 1293 1294 1295 class IncrementalNewlineDecoder(codecs.IncrementalDecoder): 1296 """Codec used when reading a file in universal newlines mode. 1297 It wraps another incremental decoder, translating \\r\\n and \\r into \\n. 1298 It also records the types of newlines encountered. 1299 When used with translate=False, it ensures that the newline sequence is 1300 returned in one piece. 1301 """ 1302 def __init__(self, decoder, translate, errors='strict'): 1303 codecs.IncrementalDecoder.__init__(self, errors=errors) 1304 self.translate = translate 1305 self.decoder = decoder 1306 self.seennl = 0 1307 self.pendingcr = False 1308 1309 def decode(self, input, final=False): 1310 # decode input (with the eventual \r from a previous pass) 1311 output = self.decoder.decode(input, final=final) 1312 if self.pendingcr and (output or final): 1313 output = "\r" + output 1314 self.pendingcr = False 1315 1316 # retain last \r even when not translating data: 1317 # then readline() is sure to get \r\n in one pass 1318 if output.endswith("\r") and not final: 1319 output = output[:-1] 1320 self.pendingcr = True 1321 1322 # Record which newlines are read 1323 crlf = output.count('\r\n') 1324 cr = output.count('\r') - crlf 1325 lf = output.count('\n') - crlf 1326 self.seennl |= (lf and self._LF) | (cr and self._CR) \ 1327 | (crlf and self._CRLF) 1328 1329 if self.translate: 1330 if crlf: 1331 output = output.replace("\r\n", "\n") 1332 if cr: 1333 output = output.replace("\r", "\n") 1334 1335 return output 1336 1337 def getstate(self): 1338 buf, flag = self.decoder.getstate() 1339 flag <<= 1 1340 if self.pendingcr: 1341 flag |= 1 1342 return buf, flag 1343 1344 def setstate(self, state): 1345 buf, flag = state 1346 self.pendingcr = bool(flag & 1) 1347 self.decoder.setstate((buf, flag >> 1)) 1348 1349 def reset(self): 1350 self.seennl = 0 1351 self.pendingcr = False 1352 self.decoder.reset() 1353 1354 _LF = 1 1355 _CR = 2 1356 _CRLF = 4 1357 1358 @property 1359 def newlines(self): 1360 return (None, 1361 "\n", 1362 "\r", 1363 ("\r", "\n"), 1364 "\r\n", 1365 ("\n", "\r\n"), 1366 ("\r", "\r\n"), 1367 ("\r", "\n", "\r\n") 1368 )[self.seennl] 1369 1370 1371 class TextIOWrapper(TextIOBase): 1372 1373 r"""Character and line based layer over a BufferedIOBase object, buffer. 1374 1375 encoding gives the name of the encoding that the stream will be 1376 decoded or encoded with. It defaults to locale.getpreferredencoding. 1377 1378 errors determines the strictness of encoding and decoding (see the 1379 codecs.register) and defaults to "strict". 1380 1381 newline can be None, '', '\n', '\r', or '\r\n'. It controls the 1382 handling of line endings. If it is None, universal newlines is 1383 enabled. With this enabled, on input, the lines endings '\n', '\r', 1384 or '\r\n' are translated to '\n' before being returned to the 1385 caller. Conversely, on output, '\n' is translated to the system 1386 default line separator, os.linesep. If newline is any other of its 1387 legal values, that newline becomes the newline when the file is read 1388 and it is returned untranslated. On output, '\n' is converted to the 1389 newline. 1390 1391 If line_buffering is True, a call to flush is implied when a call to 1392 write contains a newline character. 1393 """ 1394 1395 _CHUNK_SIZE = 128 1396 1397 def __init__(self, buffer, encoding=None, errors=None, newline=None, 1398 line_buffering=False): 1399 if newline not in (None, "", "\n", "\r", "\r\n"): 1400 raise ValueError("illegal newline value: %r" % (newline,)) 1401 if encoding is None: 1402 try: 1403 encoding = os.device_encoding(buffer.fileno()) 1404 except (AttributeError, UnsupportedOperation): 1405 pass 1406 if encoding is None: 1407 try: 1408 import locale 1409 except ImportError: 1410 # Importing locale may fail if Python is being built 1411 encoding = "ascii" 1412 else: 1413 encoding = locale.getpreferredencoding() 1414 1415 if not isinstance(encoding, basestring): 1416 raise ValueError("invalid encoding: %r" % encoding) 1417 1418 if errors is None: 1419 errors = "strict" 1420 else: 1421 if not isinstance(errors, basestring): 1422 raise ValueError("invalid errors: %r" % errors) 1423 1424 self.buffer = buffer 1425 self._line_buffering = line_buffering 1426 self._encoding = encoding 1427 self._errors = errors 1428 self._readuniversal = not newline 1429 self._readtranslate = newline is None 1430 self._readnl = newline 1431 self._writetranslate = newline != '' 1432 self._writenl = newline or os.linesep 1433 self._encoder = None 1434 self._decoder = None 1435 self._decoded_chars = '' # buffer for text returned from decoder 1436 self._decoded_chars_used = 0 # offset into _decoded_chars for read() 1437 self._snapshot = None # info for reconstructing decoder state 1438 self._seekable = self._telling = self.buffer.seekable() 1439 1440 # self._snapshot is either None, or a tuple (dec_flags, next_input) 1441 # where dec_flags is the second (integer) item of the decoder state 1442 # and next_input is the chunk of input bytes that comes next after the 1443 # snapshot point. We use this to reconstruct decoder states in tell(). 1444 1445 # Naming convention: 1446 # - "bytes_..." for integer variables that count input bytes 1447 # - "chars_..." for integer variables that count decoded characters 1448 1449 @property 1450 def encoding(self): 1451 return self._encoding 1452 1453 @property 1454 def errors(self): 1455 return self._errors 1456 1457 @property 1458 def line_buffering(self): 1459 return self._line_buffering 1460 1461 def seekable(self): 1462 return self._seekable 1463 1464 def readable(self): 1465 return self.buffer.readable() 1466 1467 def writable(self): 1468 return self.buffer.writable() 1469 1470 def flush(self): 1471 self.buffer.flush() 1472 self._telling = self._seekable 1473 1474 def close(self): 1475 try: 1476 self.flush() 1477 except: 1478 pass # If flush() fails, just give up 1479 self.buffer.close() 1480 1481 @property 1482 def closed(self): 1483 return self.buffer.closed 1484 1485 @property 1486 def name(self): 1487 return self.buffer.name 1488 1489 def fileno(self): 1490 return self.buffer.fileno() 1491 1492 def isatty(self): 1493 return self.buffer.isatty() 1494 1495 def write(self, s): 1496 if self.closed: 1497 raise ValueError("write to closed file") 1498 if not isinstance(s, unicode): 1499 raise TypeError("can't write %s to text stream" % 1500 s.__class__.__name__) 1501 length = len(s) 1502 haslf = (self._writetranslate or self._line_buffering) and "\n" in s 1503 if haslf and self._writetranslate and self._writenl != "\n": 1504 s = s.replace("\n", self._writenl) 1505 encoder = self._encoder or self._get_encoder() 1506 # XXX What if we were just reading? 1507 b = encoder.encode(s) 1508 self.buffer.write(b) 1509 if self._line_buffering and (haslf or "\r" in s): 1510 self.flush() 1511 self._snapshot = None 1512 if self._decoder: 1513 self._decoder.reset() 1514 return length 1515 1516 def _get_encoder(self): 1517 make_encoder = codecs.getincrementalencoder(self._encoding) 1518 self._encoder = make_encoder(self._errors) 1519 return self._encoder 1520 1521 def _get_decoder(self): 1522 make_decoder = codecs.getincrementaldecoder(self._encoding) 1523 decoder = make_decoder(self._errors) 1524 if self._readuniversal: 1525 decoder = IncrementalNewlineDecoder(decoder, self._readtranslate) 1526 self._decoder = decoder 1527 return decoder 1528 1529 # The following three methods implement an ADT for _decoded_chars. 1530 # Text returned from the decoder is buffered here until the client 1531 # requests it by calling our read() or readline() method. 1532 def _set_decoded_chars(self, chars): 1533 """Set the _decoded_chars buffer.""" 1534 self._decoded_chars = chars 1535 self._decoded_chars_used = 0 1536 1537 def _get_decoded_chars(self, n=None): 1538 """Advance into the _decoded_chars buffer.""" 1539 offset = self._decoded_chars_used 1540 if n is None: 1541 chars = self._decoded_chars[offset:] 1542 else: 1543 chars = self._decoded_chars[offset:offset + n] 1544 self._decoded_chars_used += len(chars) 1545 return chars 1546 1547 def _rewind_decoded_chars(self, n): 1548 """Rewind the _decoded_chars buffer.""" 1549 if self._decoded_chars_used < n: 1550 raise AssertionError("rewind decoded_chars out of bounds") 1551 self._decoded_chars_used -= n 1552 1553 def _read_chunk(self): 1554 """ 1555 Read and decode the next chunk of data from the BufferedReader. 1556 1557 The return value is True unless EOF was reached. The decoded string 1558 is placed in self._decoded_chars (replacing its previous value). 1559 The entire input chunk is sent to the decoder, though some of it 1560 may remain buffered in the decoder, yet to be converted. 1561 """ 1562 1563 if self._decoder is None: 1564 raise ValueError("no decoder") 1565 1566 if self._telling: 1567 # To prepare for tell(), we need to snapshot a point in the 1568 # file where the decoder's input buffer is empty. 1569 1570 dec_buffer, dec_flags = self._decoder.getstate() 1571 # Given this, we know there was a valid snapshot point 1572 # len(dec_buffer) bytes ago with decoder state (b'', dec_flags). 1573 1574 # Read a chunk, decode it, and put the result in self._decoded_chars. 1575 input_chunk = self.buffer.read1(self._CHUNK_SIZE) 1576 eof = not input_chunk 1577 self._set_decoded_chars(self._decoder.decode(input_chunk, eof)) 1578 1579 if self._telling: 1580 # At the snapshot point, len(dec_buffer) bytes before the read, 1581 # the next input to be decoded is dec_buffer + input_chunk. 1582 self._snapshot = (dec_flags, dec_buffer + input_chunk) 1583 1584 return not eof 1585 1586 def _pack_cookie(self, position, dec_flags=0, 1587 bytes_to_feed=0, need_eof=0, chars_to_skip=0): 1588 # The meaning of a tell() cookie is: seek to position, set the 1589 # decoder flags to dec_flags, read bytes_to_feed bytes, feed them 1590 # into the decoder with need_eof as the EOF flag, then skip 1591 # chars_to_skip characters of the decoded result. For most simple 1592 # decoders, tell() will often just give a byte offset in the file. 1593 return (position | (dec_flags<<64) | (bytes_to_feed<<128) | 1594 (chars_to_skip<<192) | bool(need_eof)<<256) 1595 1596 def _unpack_cookie(self, bigint): 1597 rest, position = divmod(bigint, 1<<64) 1598 rest, dec_flags = divmod(rest, 1<<64) 1599 rest, bytes_to_feed = divmod(rest, 1<<64) 1600 need_eof, chars_to_skip = divmod(rest, 1<<64) 1601 return position, dec_flags, bytes_to_feed, need_eof, chars_to_skip 1602 1603 def tell(self): 1604 if not self._seekable: 1605 raise IOError("underlying stream is not seekable") 1606 if not self._telling: 1607 raise IOError("telling position disabled by next() call") 1608 self.flush() 1609 position = self.buffer.tell() 1610 decoder = self._decoder 1611 if decoder is None or self._snapshot is None: 1612 if self._decoded_chars: 1613 # This should never happen. 1614 raise AssertionError("pending decoded text") 1615 return position 1616 1617 # Skip backward to the snapshot point (see _read_chunk). 1618 dec_flags, next_input = self._snapshot 1619 position -= len(next_input) 1620 1621 # How many decoded characters have been used up since the snapshot? 1622 chars_to_skip = self._decoded_chars_used 1623 if chars_to_skip == 0: 1624 # We haven't moved from the snapshot point. 1625 return self._pack_cookie(position, dec_flags) 1626 1627 # Starting from the snapshot position, we will walk the decoder 1628 # forward until it gives us enough decoded characters. 1629 saved_state = decoder.getstate() 1630 try: 1631 # Note our initial start point. 1632 decoder.setstate((b'', dec_flags)) 1633 start_pos = position 1634 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 1635 need_eof = 0 1636 1637 # Feed the decoder one byte at a time. As we go, note the 1638 # nearest "safe start point" before the current location 1639 # (a point where the decoder has nothing buffered, so seek() 1640 # can safely start from there and advance to this location). 1641 for next_byte in next_input: 1642 bytes_fed += 1 1643 chars_decoded += len(decoder.decode(next_byte)) 1644 dec_buffer, dec_flags = decoder.getstate() 1645 if not dec_buffer and chars_decoded <= chars_to_skip: 1646 # Decoder buffer is empty, so this is a safe start point. 1647 start_pos += bytes_fed 1648 chars_to_skip -= chars_decoded 1649 start_flags, bytes_fed, chars_decoded = dec_flags, 0, 0 1650 if chars_decoded >= chars_to_skip: 1651 break 1652 else: 1653 # We didn't get enough decoded data; signal EOF to get more. 1654 chars_decoded += len(decoder.decode(b'', final=True)) 1655 need_eof = 1 1656 if chars_decoded < chars_to_skip: 1657 raise IOError("can't reconstruct logical file position") 1658 1659 # The returned cookie corresponds to the last safe start point. 1660 return self._pack_cookie( 1661 start_pos, start_flags, bytes_fed, need_eof, chars_to_skip) 1662 finally: 1663 decoder.setstate(saved_state) 1664 1665 def truncate(self, pos=None): 1666 self.flush() 1667 if pos is None: 1668 pos = self.tell() 1669 return self.buffer.truncate(pos) 1670 1671 def seek(self, cookie, whence=0): 1672 if self.closed: 1673 raise ValueError("tell on closed file") 1674 if not self._seekable: 1675 raise IOError("underlying stream is not seekable") 1676 if whence == 1: # seek relative to current position 1677 if cookie != 0: 1678 raise IOError("can't do nonzero cur-relative seeks") 1679 # Seeking to the current position should attempt to 1680 # sync the underlying buffer with the current position. 1681 whence = 0 1682 cookie = self.tell() 1683 if whence == 2: # seek relative to end of file 1684 if cookie != 0: 1685 raise IOError("can't do nonzero end-relative seeks") 1686 self.flush() 1687 position = self.buffer.seek(0, 2) 1688 self._set_decoded_chars('') 1689 self._snapshot = None 1690 if self._decoder: 1691 self._decoder.reset() 1692 return position 1693 if whence != 0: 1694 raise ValueError("invalid whence (%r, should be 0, 1 or 2)" % 1695 (whence,)) 1696 if cookie < 0: 1697 raise ValueError("negative seek position %r" % (cookie,)) 1698 self.flush() 1699 1700 # The strategy of seek() is to go back to the safe start point 1701 # and replay the effect of read(chars_to_skip) from there. 1702 start_pos, dec_flags, bytes_to_feed, need_eof, chars_to_skip = \ 1703 self._unpack_cookie(cookie) 1704 1705 # Seek back to the safe start point. 1706 self.buffer.seek(start_pos) 1707 self._set_decoded_chars('') 1708 self._snapshot = None 1709 1710 # Restore the decoder to its state from the safe start point. 1711 if self._decoder or dec_flags or chars_to_skip: 1712 self._decoder = self._decoder or self._get_decoder() 1713 self._decoder.setstate((b'', dec_flags)) 1714 self._snapshot = (dec_flags, b'') 1715 1716 if chars_to_skip: 1717 # Just like _read_chunk, feed the decoder and save a snapshot. 1718 input_chunk = self.buffer.read(bytes_to_feed) 1719 self._set_decoded_chars( 1720 self._decoder.decode(input_chunk, need_eof)) 1721 self._snapshot = (dec_flags, input_chunk) 1722 1723 # Skip chars_to_skip of the decoded characters. 1724 if len(self._decoded_chars) < chars_to_skip: 1725 raise IOError("can't restore logical file position") 1726 self._decoded_chars_used = chars_to_skip 1727 1728 return cookie 1729 1730 def read(self, n=None): 1731 if n is None: 1732 n = -1 1733 decoder = self._decoder or self._get_decoder() 1734 if n < 0: 1735 # Read everything. 1736 result = (self._get_decoded_chars() + 1737 decoder.decode(self.buffer.read(), final=True)) 1738 self._set_decoded_chars('') 1739 self._snapshot = None 1740 return result 1741 else: 1742 # Keep reading chunks until we have n characters to return. 1743 eof = False 1744 result = self._get_decoded_chars(n) 1745 while len(result) < n and not eof: 1746 eof = not self._read_chunk() 1747 result += self._get_decoded_chars(n - len(result)) 1748 return result 1749 1750 def next(self): 1751 self._telling = False 1752 line = self.readline() 1753 if not line: 1754 self._snapshot = None 1755 self._telling = self._seekable 1756 raise StopIteration 1757 return line 1758 1759 def readline(self, limit=None): 1760 if self.closed: 1761 raise ValueError("read from closed file") 1762 if limit is None: 1763 limit = -1 1764 if not isinstance(limit, (int, long)): 1765 raise TypeError("limit must be an integer") 1766 1767 # Grab all the decoded text (we will rewind any extra bits later). 1768 line = self._get_decoded_chars() 1769 1770 start = 0 1771 decoder = self._decoder or self._get_decoder() 1772 1773 pos = endpos = None 1774 while True: 1775 if self._readtranslate: 1776 # Newlines are already translated, only search for \n 1777 pos = line.find('\n', start) 1778 if pos >= 0: 1779 endpos = pos + 1 1780 break 1781 else: 1782 start = len(line) 1783 1784 elif self._readuniversal: 1785 # Universal newline search. Find any of \r, \r\n, \n 1786 # The decoder ensures that \r\n are not split in two pieces 1787 1788 # In C we'd look for these in parallel of course. 1789 nlpos = line.find("\n", start) 1790 crpos = line.find("\r", start) 1791 if crpos == -1: 1792 if nlpos == -1: 1793 # Nothing found 1794 start = len(line) 1795 else: 1796 # Found \n 1797 endpos = nlpos + 1 1798 break 1799 elif nlpos == -1: 1800 # Found lone \r 1801 endpos = crpos + 1 1802 break 1803 elif nlpos < crpos: 1804 # Found \n 1805 endpos = nlpos + 1 1806 break 1807 elif nlpos == crpos + 1: 1808 # Found \r\n 1809 endpos = crpos + 2 1810 break 1811 else: 1812 # Found \r 1813 endpos = crpos + 1 1814 break 1815 else: 1816 # non-universal 1817 pos = line.find(self._readnl) 1818 if pos >= 0: 1819 endpos = pos + len(self._readnl) 1820 break 1821 1822 if limit >= 0 and len(line) >= limit: 1823 endpos = limit # reached length limit 1824 break 1825 1826 # No line ending seen yet - get more data 1827 more_line = '' 1828 while self._read_chunk(): 1829 if self._decoded_chars: 1830 break 1831 if self._decoded_chars: 1832 line += self._get_decoded_chars() 1833 else: 1834 # end of file 1835 self._set_decoded_chars('') 1836 self._snapshot = None 1837 return line 1838 1839 if limit >= 0 and endpos > limit: 1840 endpos = limit # don't exceed limit 1841 1842 # Rewind _decoded_chars to just after the line ending we found. 1843 self._rewind_decoded_chars(len(line) - endpos) 1844 return line[:endpos] 1845 1846 @property 1847 def newlines(self): 1848 return self._decoder.newlines if self._decoder else None 1849 1850 class StringIO(TextIOWrapper): 1851 1852 """An in-memory stream for text. The initial_value argument sets the 1853 value of object. The other arguments are like those of TextIOWrapper's 1854 constructor. 1855 """ 1856 1857 def __init__(self, initial_value="", encoding="utf-8", 1858 errors="strict", newline="\n"): 1859 super(StringIO, self).__init__(BytesIO(), 1860 encoding=encoding, 1861 errors=errors, 1862 newline=newline) 1863 # Issue #5645: make universal newlines semantics the same as in the 1864 # C version, even under Windows. 1865 if newline is None: 1866 self._writetranslate = False 1867 if initial_value: 1868 if not isinstance(initial_value, unicode): 1869 initial_value = unicode(initial_value) 1870 self.write(initial_value) 1871 self.seek(0) 1872 1873 def getvalue(self): 1874 self.flush() 1875 return self.buffer.getvalue().decode(self._encoding, self._errors) 87 for klass in (StringIO, TextIOWrapper): 88 TextIOBase.register(klass) 89 del klass
Note:
See TracChangeset
for help on using the changeset viewer.