| 1 | r"""File-like objects that read from or write to a string buffer. | 
|---|
| 2 |  | 
|---|
| 3 | This implements (nearly) all stdio methods. | 
|---|
| 4 |  | 
|---|
| 5 | f = StringIO()      # ready for writing | 
|---|
| 6 | f = StringIO(buf)   # ready for reading | 
|---|
| 7 | f.close()           # explicitly release resources held | 
|---|
| 8 | flag = f.isatty()   # always false | 
|---|
| 9 | pos = f.tell()      # get current position | 
|---|
| 10 | f.seek(pos)         # set current position | 
|---|
| 11 | f.seek(pos, mode)   # mode 0: absolute; 1: relative; 2: relative to EOF | 
|---|
| 12 | buf = f.read()      # read until EOF | 
|---|
| 13 | buf = f.read(n)     # read up to n bytes | 
|---|
| 14 | buf = f.readline()  # read until end of line ('\n') or EOF | 
|---|
| 15 | list = f.readlines()# list of f.readline() results until EOF | 
|---|
| 16 | f.truncate([size])  # truncate file at to at most size (default: current pos) | 
|---|
| 17 | f.write(buf)        # write at current position | 
|---|
| 18 | f.writelines(list)  # for line in list: f.write(line) | 
|---|
| 19 | f.getvalue()        # return whole file's contents as a string | 
|---|
| 20 |  | 
|---|
| 21 | Notes: | 
|---|
| 22 | - Using a real file is often faster (but less convenient). | 
|---|
| 23 | - There's also a much faster implementation in C, called cStringIO, but | 
|---|
| 24 | it's not subclassable. | 
|---|
| 25 | - fileno() is left unimplemented so that code which uses it triggers | 
|---|
| 26 | an exception early. | 
|---|
| 27 | - Seeking far beyond EOF and then writing will insert real null | 
|---|
| 28 | bytes that occupy space in the buffer. | 
|---|
| 29 | - There's a simple test set (see end of this file). | 
|---|
| 30 | """ | 
|---|
| 31 | try: | 
|---|
| 32 | from errno import EINVAL | 
|---|
| 33 | except ImportError: | 
|---|
| 34 | EINVAL = 22 | 
|---|
| 35 |  | 
|---|
| 36 | __all__ = ["StringIO"] | 
|---|
| 37 |  | 
|---|
| 38 | def _complain_ifclosed(closed): | 
|---|
| 39 | if closed: | 
|---|
| 40 | raise ValueError, "I/O operation on closed file" | 
|---|
| 41 |  | 
|---|
| 42 | class StringIO: | 
|---|
| 43 | """class StringIO([buffer]) | 
|---|
| 44 |  | 
|---|
| 45 | When a StringIO object is created, it can be initialized to an existing | 
|---|
| 46 | string by passing the string to the constructor. If no string is given, | 
|---|
| 47 | the StringIO will start empty. | 
|---|
| 48 |  | 
|---|
| 49 | The StringIO object can accept either Unicode or 8-bit strings, but | 
|---|
| 50 | mixing the two may take some care. If both are used, 8-bit strings that | 
|---|
| 51 | cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause | 
|---|
| 52 | a UnicodeError to be raised when getvalue() is called. | 
|---|
| 53 | """ | 
|---|
| 54 | def __init__(self, buf = ''): | 
|---|
| 55 | # Force self.buf to be a string or unicode | 
|---|
| 56 | if not isinstance(buf, basestring): | 
|---|
| 57 | buf = str(buf) | 
|---|
| 58 | self.buf = buf | 
|---|
| 59 | self.len = len(buf) | 
|---|
| 60 | self.buflist = [] | 
|---|
| 61 | self.pos = 0 | 
|---|
| 62 | self.closed = False | 
|---|
| 63 | self.softspace = 0 | 
|---|
| 64 |  | 
|---|
| 65 | def __iter__(self): | 
|---|
| 66 | return self | 
|---|
| 67 |  | 
|---|
| 68 | def next(self): | 
|---|
| 69 | """A file object is its own iterator, for example iter(f) returns f | 
|---|
| 70 | (unless f is closed). When a file is used as an iterator, typically | 
|---|
| 71 | in a for loop (for example, for line in f: print line), the next() | 
|---|
| 72 | method is called repeatedly. This method returns the next input line, | 
|---|
| 73 | or raises StopIteration when EOF is hit. | 
|---|
| 74 | """ | 
|---|
| 75 | _complain_ifclosed(self.closed) | 
|---|
| 76 | r = self.readline() | 
|---|
| 77 | if not r: | 
|---|
| 78 | raise StopIteration | 
|---|
| 79 | return r | 
|---|
| 80 |  | 
|---|
| 81 | def close(self): | 
|---|
| 82 | """Free the memory buffer. | 
|---|
| 83 | """ | 
|---|
| 84 | if not self.closed: | 
|---|
| 85 | self.closed = True | 
|---|
| 86 | del self.buf, self.pos | 
|---|
| 87 |  | 
|---|
| 88 | def isatty(self): | 
|---|
| 89 | """Returns False because StringIO objects are not connected to a | 
|---|
| 90 | tty-like device. | 
|---|
| 91 | """ | 
|---|
| 92 | _complain_ifclosed(self.closed) | 
|---|
| 93 | return False | 
|---|
| 94 |  | 
|---|
| 95 | def seek(self, pos, mode = 0): | 
|---|
| 96 | """Set the file's current position. | 
|---|
| 97 |  | 
|---|
| 98 | The mode argument is optional and defaults to 0 (absolute file | 
|---|
| 99 | positioning); other values are 1 (seek relative to the current | 
|---|
| 100 | position) and 2 (seek relative to the file's end). | 
|---|
| 101 |  | 
|---|
| 102 | There is no return value. | 
|---|
| 103 | """ | 
|---|
| 104 | _complain_ifclosed(self.closed) | 
|---|
| 105 | if self.buflist: | 
|---|
| 106 | self.buf += ''.join(self.buflist) | 
|---|
| 107 | self.buflist = [] | 
|---|
| 108 | if mode == 1: | 
|---|
| 109 | pos += self.pos | 
|---|
| 110 | elif mode == 2: | 
|---|
| 111 | pos += self.len | 
|---|
| 112 | self.pos = max(0, pos) | 
|---|
| 113 |  | 
|---|
| 114 | def tell(self): | 
|---|
| 115 | """Return the file's current position.""" | 
|---|
| 116 | _complain_ifclosed(self.closed) | 
|---|
| 117 | return self.pos | 
|---|
| 118 |  | 
|---|
| 119 | def read(self, n = -1): | 
|---|
| 120 | """Read at most size bytes from the file | 
|---|
| 121 | (less if the read hits EOF before obtaining size bytes). | 
|---|
| 122 |  | 
|---|
| 123 | If the size argument is negative or omitted, read all data until EOF | 
|---|
| 124 | is reached. The bytes are returned as a string object. An empty | 
|---|
| 125 | string is returned when EOF is encountered immediately. | 
|---|
| 126 | """ | 
|---|
| 127 | _complain_ifclosed(self.closed) | 
|---|
| 128 | if self.buflist: | 
|---|
| 129 | self.buf += ''.join(self.buflist) | 
|---|
| 130 | self.buflist = [] | 
|---|
| 131 | if n is None or n < 0: | 
|---|
| 132 | newpos = self.len | 
|---|
| 133 | else: | 
|---|
| 134 | newpos = min(self.pos+n, self.len) | 
|---|
| 135 | r = self.buf[self.pos:newpos] | 
|---|
| 136 | self.pos = newpos | 
|---|
| 137 | return r | 
|---|
| 138 |  | 
|---|
| 139 | def readline(self, length=None): | 
|---|
| 140 | r"""Read one entire line from the file. | 
|---|
| 141 |  | 
|---|
| 142 | A trailing newline character is kept in the string (but may be absent | 
|---|
| 143 | when a file ends with an incomplete line). If the size argument is | 
|---|
| 144 | present and non-negative, it is a maximum byte count (including the | 
|---|
| 145 | trailing newline) and an incomplete line may be returned. | 
|---|
| 146 |  | 
|---|
| 147 | An empty string is returned only when EOF is encountered immediately. | 
|---|
| 148 |  | 
|---|
| 149 | Note: Unlike stdio's fgets(), the returned string contains null | 
|---|
| 150 | characters ('\0') if they occurred in the input. | 
|---|
| 151 | """ | 
|---|
| 152 | _complain_ifclosed(self.closed) | 
|---|
| 153 | if self.buflist: | 
|---|
| 154 | self.buf += ''.join(self.buflist) | 
|---|
| 155 | self.buflist = [] | 
|---|
| 156 | i = self.buf.find('\n', self.pos) | 
|---|
| 157 | if i < 0: | 
|---|
| 158 | newpos = self.len | 
|---|
| 159 | else: | 
|---|
| 160 | newpos = i+1 | 
|---|
| 161 | if length is not None and length >= 0: | 
|---|
| 162 | if self.pos + length < newpos: | 
|---|
| 163 | newpos = self.pos + length | 
|---|
| 164 | r = self.buf[self.pos:newpos] | 
|---|
| 165 | self.pos = newpos | 
|---|
| 166 | return r | 
|---|
| 167 |  | 
|---|
| 168 | def readlines(self, sizehint = 0): | 
|---|
| 169 | """Read until EOF using readline() and return a list containing the | 
|---|
| 170 | lines thus read. | 
|---|
| 171 |  | 
|---|
| 172 | If the optional sizehint argument is present, instead of reading up | 
|---|
| 173 | to EOF, whole lines totalling approximately sizehint bytes (or more | 
|---|
| 174 | to accommodate a final whole line). | 
|---|
| 175 | """ | 
|---|
| 176 | total = 0 | 
|---|
| 177 | lines = [] | 
|---|
| 178 | line = self.readline() | 
|---|
| 179 | while line: | 
|---|
| 180 | lines.append(line) | 
|---|
| 181 | total += len(line) | 
|---|
| 182 | if 0 < sizehint <= total: | 
|---|
| 183 | break | 
|---|
| 184 | line = self.readline() | 
|---|
| 185 | return lines | 
|---|
| 186 |  | 
|---|
| 187 | def truncate(self, size=None): | 
|---|
| 188 | """Truncate the file's size. | 
|---|
| 189 |  | 
|---|
| 190 | If the optional size argument is present, the file is truncated to | 
|---|
| 191 | (at most) that size. The size defaults to the current position. | 
|---|
| 192 | The current file position is not changed unless the position | 
|---|
| 193 | is beyond the new file size. | 
|---|
| 194 |  | 
|---|
| 195 | If the specified size exceeds the file's current size, the | 
|---|
| 196 | file remains unchanged. | 
|---|
| 197 | """ | 
|---|
| 198 | _complain_ifclosed(self.closed) | 
|---|
| 199 | if size is None: | 
|---|
| 200 | size = self.pos | 
|---|
| 201 | elif size < 0: | 
|---|
| 202 | raise IOError(EINVAL, "Negative size not allowed") | 
|---|
| 203 | elif size < self.pos: | 
|---|
| 204 | self.pos = size | 
|---|
| 205 | self.buf = self.getvalue()[:size] | 
|---|
| 206 | self.len = size | 
|---|
| 207 |  | 
|---|
| 208 | def write(self, s): | 
|---|
| 209 | """Write a string to the file. | 
|---|
| 210 |  | 
|---|
| 211 | There is no return value. | 
|---|
| 212 | """ | 
|---|
| 213 | _complain_ifclosed(self.closed) | 
|---|
| 214 | if not s: return | 
|---|
| 215 | # Force s to be a string or unicode | 
|---|
| 216 | if not isinstance(s, basestring): | 
|---|
| 217 | s = str(s) | 
|---|
| 218 | spos = self.pos | 
|---|
| 219 | slen = self.len | 
|---|
| 220 | if spos == slen: | 
|---|
| 221 | self.buflist.append(s) | 
|---|
| 222 | self.len = self.pos = spos + len(s) | 
|---|
| 223 | return | 
|---|
| 224 | if spos > slen: | 
|---|
| 225 | self.buflist.append('\0'*(spos - slen)) | 
|---|
| 226 | slen = spos | 
|---|
| 227 | newpos = spos + len(s) | 
|---|
| 228 | if spos < slen: | 
|---|
| 229 | if self.buflist: | 
|---|
| 230 | self.buf += ''.join(self.buflist) | 
|---|
| 231 | self.buflist = [self.buf[:spos], s, self.buf[newpos:]] | 
|---|
| 232 | self.buf = '' | 
|---|
| 233 | if newpos > slen: | 
|---|
| 234 | slen = newpos | 
|---|
| 235 | else: | 
|---|
| 236 | self.buflist.append(s) | 
|---|
| 237 | slen = newpos | 
|---|
| 238 | self.len = slen | 
|---|
| 239 | self.pos = newpos | 
|---|
| 240 |  | 
|---|
| 241 | def writelines(self, iterable): | 
|---|
| 242 | """Write a sequence of strings to the file. The sequence can be any | 
|---|
| 243 | iterable object producing strings, typically a list of strings. There | 
|---|
| 244 | is no return value. | 
|---|
| 245 |  | 
|---|
| 246 | (The name is intended to match readlines(); writelines() does not add | 
|---|
| 247 | line separators.) | 
|---|
| 248 | """ | 
|---|
| 249 | write = self.write | 
|---|
| 250 | for line in iterable: | 
|---|
| 251 | write(line) | 
|---|
| 252 |  | 
|---|
| 253 | def flush(self): | 
|---|
| 254 | """Flush the internal buffer | 
|---|
| 255 | """ | 
|---|
| 256 | _complain_ifclosed(self.closed) | 
|---|
| 257 |  | 
|---|
| 258 | def getvalue(self): | 
|---|
| 259 | """ | 
|---|
| 260 | Retrieve the entire contents of the "file" at any time before | 
|---|
| 261 | the StringIO object's close() method is called. | 
|---|
| 262 |  | 
|---|
| 263 | The StringIO object can accept either Unicode or 8-bit strings, | 
|---|
| 264 | but mixing the two may take some care. If both are used, 8-bit | 
|---|
| 265 | strings that cannot be interpreted as 7-bit ASCII (that use the | 
|---|
| 266 | 8th bit) will cause a UnicodeError to be raised when getvalue() | 
|---|
| 267 | is called. | 
|---|
| 268 | """ | 
|---|
| 269 | _complain_ifclosed(self.closed) | 
|---|
| 270 | if self.buflist: | 
|---|
| 271 | self.buf += ''.join(self.buflist) | 
|---|
| 272 | self.buflist = [] | 
|---|
| 273 | return self.buf | 
|---|
| 274 |  | 
|---|
| 275 |  | 
|---|
| 276 | # A little test suite | 
|---|
| 277 |  | 
|---|
| 278 | def test(): | 
|---|
| 279 | import sys | 
|---|
| 280 | if sys.argv[1:]: | 
|---|
| 281 | file = sys.argv[1] | 
|---|
| 282 | else: | 
|---|
| 283 | file = '/etc/passwd' | 
|---|
| 284 | lines = open(file, 'r').readlines() | 
|---|
| 285 | text = open(file, 'r').read() | 
|---|
| 286 | f = StringIO() | 
|---|
| 287 | for line in lines[:-2]: | 
|---|
| 288 | f.write(line) | 
|---|
| 289 | f.writelines(lines[-2:]) | 
|---|
| 290 | if f.getvalue() != text: | 
|---|
| 291 | raise RuntimeError, 'write failed' | 
|---|
| 292 | length = f.tell() | 
|---|
| 293 | print 'File length =', length | 
|---|
| 294 | f.seek(len(lines[0])) | 
|---|
| 295 | f.write(lines[1]) | 
|---|
| 296 | f.seek(0) | 
|---|
| 297 | print 'First line =', repr(f.readline()) | 
|---|
| 298 | print 'Position =', f.tell() | 
|---|
| 299 | line = f.readline() | 
|---|
| 300 | print 'Second line =', repr(line) | 
|---|
| 301 | f.seek(-len(line), 1) | 
|---|
| 302 | line2 = f.read(len(line)) | 
|---|
| 303 | if line != line2: | 
|---|
| 304 | raise RuntimeError, 'bad result after seek back' | 
|---|
| 305 | f.seek(len(line2), 1) | 
|---|
| 306 | list = f.readlines() | 
|---|
| 307 | line = list[-1] | 
|---|
| 308 | f.seek(f.tell() - len(line)) | 
|---|
| 309 | line2 = f.read() | 
|---|
| 310 | if line != line2: | 
|---|
| 311 | raise RuntimeError, 'bad result after seek back from EOF' | 
|---|
| 312 | print 'Read', len(list), 'more lines' | 
|---|
| 313 | print 'File length =', f.tell() | 
|---|
| 314 | if f.tell() != length: | 
|---|
| 315 | raise RuntimeError, 'bad length' | 
|---|
| 316 | f.truncate(length/2) | 
|---|
| 317 | f.seek(0, 2) | 
|---|
| 318 | print 'Truncated length =', f.tell() | 
|---|
| 319 | if f.tell() != length/2: | 
|---|
| 320 | raise RuntimeError, 'truncate did not adjust length' | 
|---|
| 321 | f.close() | 
|---|
| 322 |  | 
|---|
| 323 | if __name__ == '__main__': | 
|---|
| 324 | test() | 
|---|