[2] | 1 | r"""File-like objects that read from or write to a string buffer.
|
---|
| 2 |
|
---|
| 3 | This implements (nearly) all stdio methods.
|
---|
| 4 |
|
---|
| 5 | f = StringIO() # ready for writing
|
---|
| 6 | f = StringIO(buf) # ready for reading
|
---|
| 7 | f.close() # explicitly release resources held
|
---|
| 8 | flag = f.isatty() # always false
|
---|
| 9 | pos = f.tell() # get current position
|
---|
| 10 | f.seek(pos) # set current position
|
---|
| 11 | f.seek(pos, mode) # mode 0: absolute; 1: relative; 2: relative to EOF
|
---|
| 12 | buf = f.read() # read until EOF
|
---|
| 13 | buf = f.read(n) # read up to n bytes
|
---|
| 14 | buf = f.readline() # read until end of line ('\n') or EOF
|
---|
| 15 | list = f.readlines()# list of f.readline() results until EOF
|
---|
| 16 | f.truncate([size]) # truncate file at to at most size (default: current pos)
|
---|
| 17 | f.write(buf) # write at current position
|
---|
| 18 | f.writelines(list) # for line in list: f.write(line)
|
---|
| 19 | f.getvalue() # return whole file's contents as a string
|
---|
| 20 |
|
---|
| 21 | Notes:
|
---|
| 22 | - Using a real file is often faster (but less convenient).
|
---|
| 23 | - There's also a much faster implementation in C, called cStringIO, but
|
---|
| 24 | it's not subclassable.
|
---|
| 25 | - fileno() is left unimplemented so that code which uses it triggers
|
---|
| 26 | an exception early.
|
---|
| 27 | - Seeking far beyond EOF and then writing will insert real null
|
---|
| 28 | bytes that occupy space in the buffer.
|
---|
| 29 | - There's a simple test set (see end of this file).
|
---|
| 30 | """
|
---|
| 31 | try:
|
---|
| 32 | from errno import EINVAL
|
---|
| 33 | except ImportError:
|
---|
| 34 | EINVAL = 22
|
---|
| 35 |
|
---|
| 36 | __all__ = ["StringIO"]
|
---|
| 37 |
|
---|
| 38 | def _complain_ifclosed(closed):
|
---|
| 39 | if closed:
|
---|
| 40 | raise ValueError, "I/O operation on closed file"
|
---|
| 41 |
|
---|
| 42 | class StringIO:
|
---|
| 43 | """class StringIO([buffer])
|
---|
| 44 |
|
---|
| 45 | When a StringIO object is created, it can be initialized to an existing
|
---|
| 46 | string by passing the string to the constructor. If no string is given,
|
---|
| 47 | the StringIO will start empty.
|
---|
| 48 |
|
---|
| 49 | The StringIO object can accept either Unicode or 8-bit strings, but
|
---|
| 50 | mixing the two may take some care. If both are used, 8-bit strings that
|
---|
| 51 | cannot be interpreted as 7-bit ASCII (that use the 8th bit) will cause
|
---|
| 52 | a UnicodeError to be raised when getvalue() is called.
|
---|
| 53 | """
|
---|
| 54 | def __init__(self, buf = ''):
|
---|
| 55 | # Force self.buf to be a string or unicode
|
---|
| 56 | if not isinstance(buf, basestring):
|
---|
| 57 | buf = str(buf)
|
---|
| 58 | self.buf = buf
|
---|
| 59 | self.len = len(buf)
|
---|
| 60 | self.buflist = []
|
---|
| 61 | self.pos = 0
|
---|
| 62 | self.closed = False
|
---|
| 63 | self.softspace = 0
|
---|
| 64 |
|
---|
| 65 | def __iter__(self):
|
---|
| 66 | return self
|
---|
| 67 |
|
---|
| 68 | def next(self):
|
---|
| 69 | """A file object is its own iterator, for example iter(f) returns f
|
---|
| 70 | (unless f is closed). When a file is used as an iterator, typically
|
---|
| 71 | in a for loop (for example, for line in f: print line), the next()
|
---|
| 72 | method is called repeatedly. This method returns the next input line,
|
---|
| 73 | or raises StopIteration when EOF is hit.
|
---|
| 74 | """
|
---|
| 75 | _complain_ifclosed(self.closed)
|
---|
| 76 | r = self.readline()
|
---|
| 77 | if not r:
|
---|
| 78 | raise StopIteration
|
---|
| 79 | return r
|
---|
| 80 |
|
---|
| 81 | def close(self):
|
---|
| 82 | """Free the memory buffer.
|
---|
| 83 | """
|
---|
| 84 | if not self.closed:
|
---|
| 85 | self.closed = True
|
---|
| 86 | del self.buf, self.pos
|
---|
| 87 |
|
---|
| 88 | def isatty(self):
|
---|
| 89 | """Returns False because StringIO objects are not connected to a
|
---|
| 90 | tty-like device.
|
---|
| 91 | """
|
---|
| 92 | _complain_ifclosed(self.closed)
|
---|
| 93 | return False
|
---|
| 94 |
|
---|
| 95 | def seek(self, pos, mode = 0):
|
---|
| 96 | """Set the file's current position.
|
---|
| 97 |
|
---|
| 98 | The mode argument is optional and defaults to 0 (absolute file
|
---|
| 99 | positioning); other values are 1 (seek relative to the current
|
---|
| 100 | position) and 2 (seek relative to the file's end).
|
---|
| 101 |
|
---|
| 102 | There is no return value.
|
---|
| 103 | """
|
---|
| 104 | _complain_ifclosed(self.closed)
|
---|
| 105 | if self.buflist:
|
---|
| 106 | self.buf += ''.join(self.buflist)
|
---|
| 107 | self.buflist = []
|
---|
| 108 | if mode == 1:
|
---|
| 109 | pos += self.pos
|
---|
| 110 | elif mode == 2:
|
---|
| 111 | pos += self.len
|
---|
| 112 | self.pos = max(0, pos)
|
---|
| 113 |
|
---|
| 114 | def tell(self):
|
---|
| 115 | """Return the file's current position."""
|
---|
| 116 | _complain_ifclosed(self.closed)
|
---|
| 117 | return self.pos
|
---|
| 118 |
|
---|
| 119 | def read(self, n = -1):
|
---|
| 120 | """Read at most size bytes from the file
|
---|
| 121 | (less if the read hits EOF before obtaining size bytes).
|
---|
| 122 |
|
---|
| 123 | If the size argument is negative or omitted, read all data until EOF
|
---|
| 124 | is reached. The bytes are returned as a string object. An empty
|
---|
| 125 | string is returned when EOF is encountered immediately.
|
---|
| 126 | """
|
---|
| 127 | _complain_ifclosed(self.closed)
|
---|
| 128 | if self.buflist:
|
---|
| 129 | self.buf += ''.join(self.buflist)
|
---|
| 130 | self.buflist = []
|
---|
[391] | 131 | if n is None or n < 0:
|
---|
[2] | 132 | newpos = self.len
|
---|
| 133 | else:
|
---|
| 134 | newpos = min(self.pos+n, self.len)
|
---|
| 135 | r = self.buf[self.pos:newpos]
|
---|
| 136 | self.pos = newpos
|
---|
| 137 | return r
|
---|
| 138 |
|
---|
| 139 | def readline(self, length=None):
|
---|
| 140 | r"""Read one entire line from the file.
|
---|
| 141 |
|
---|
| 142 | A trailing newline character is kept in the string (but may be absent
|
---|
| 143 | when a file ends with an incomplete line). If the size argument is
|
---|
| 144 | present and non-negative, it is a maximum byte count (including the
|
---|
| 145 | trailing newline) and an incomplete line may be returned.
|
---|
| 146 |
|
---|
| 147 | An empty string is returned only when EOF is encountered immediately.
|
---|
| 148 |
|
---|
| 149 | Note: Unlike stdio's fgets(), the returned string contains null
|
---|
| 150 | characters ('\0') if they occurred in the input.
|
---|
| 151 | """
|
---|
| 152 | _complain_ifclosed(self.closed)
|
---|
| 153 | if self.buflist:
|
---|
| 154 | self.buf += ''.join(self.buflist)
|
---|
| 155 | self.buflist = []
|
---|
| 156 | i = self.buf.find('\n', self.pos)
|
---|
| 157 | if i < 0:
|
---|
| 158 | newpos = self.len
|
---|
| 159 | else:
|
---|
| 160 | newpos = i+1
|
---|
[391] | 161 | if length is not None and length >= 0:
|
---|
[2] | 162 | if self.pos + length < newpos:
|
---|
| 163 | newpos = self.pos + length
|
---|
| 164 | r = self.buf[self.pos:newpos]
|
---|
| 165 | self.pos = newpos
|
---|
| 166 | return r
|
---|
| 167 |
|
---|
| 168 | def readlines(self, sizehint = 0):
|
---|
| 169 | """Read until EOF using readline() and return a list containing the
|
---|
| 170 | lines thus read.
|
---|
| 171 |
|
---|
| 172 | If the optional sizehint argument is present, instead of reading up
|
---|
| 173 | to EOF, whole lines totalling approximately sizehint bytes (or more
|
---|
| 174 | to accommodate a final whole line).
|
---|
| 175 | """
|
---|
| 176 | total = 0
|
---|
| 177 | lines = []
|
---|
| 178 | line = self.readline()
|
---|
| 179 | while line:
|
---|
| 180 | lines.append(line)
|
---|
| 181 | total += len(line)
|
---|
| 182 | if 0 < sizehint <= total:
|
---|
| 183 | break
|
---|
| 184 | line = self.readline()
|
---|
| 185 | return lines
|
---|
| 186 |
|
---|
| 187 | def truncate(self, size=None):
|
---|
| 188 | """Truncate the file's size.
|
---|
| 189 |
|
---|
| 190 | If the optional size argument is present, the file is truncated to
|
---|
| 191 | (at most) that size. The size defaults to the current position.
|
---|
| 192 | The current file position is not changed unless the position
|
---|
| 193 | is beyond the new file size.
|
---|
| 194 |
|
---|
| 195 | If the specified size exceeds the file's current size, the
|
---|
| 196 | file remains unchanged.
|
---|
| 197 | """
|
---|
| 198 | _complain_ifclosed(self.closed)
|
---|
| 199 | if size is None:
|
---|
| 200 | size = self.pos
|
---|
| 201 | elif size < 0:
|
---|
| 202 | raise IOError(EINVAL, "Negative size not allowed")
|
---|
| 203 | elif size < self.pos:
|
---|
| 204 | self.pos = size
|
---|
| 205 | self.buf = self.getvalue()[:size]
|
---|
| 206 | self.len = size
|
---|
| 207 |
|
---|
| 208 | def write(self, s):
|
---|
| 209 | """Write a string to the file.
|
---|
| 210 |
|
---|
| 211 | There is no return value.
|
---|
| 212 | """
|
---|
| 213 | _complain_ifclosed(self.closed)
|
---|
| 214 | if not s: return
|
---|
| 215 | # Force s to be a string or unicode
|
---|
| 216 | if not isinstance(s, basestring):
|
---|
| 217 | s = str(s)
|
---|
| 218 | spos = self.pos
|
---|
| 219 | slen = self.len
|
---|
| 220 | if spos == slen:
|
---|
| 221 | self.buflist.append(s)
|
---|
| 222 | self.len = self.pos = spos + len(s)
|
---|
| 223 | return
|
---|
| 224 | if spos > slen:
|
---|
| 225 | self.buflist.append('\0'*(spos - slen))
|
---|
| 226 | slen = spos
|
---|
| 227 | newpos = spos + len(s)
|
---|
| 228 | if spos < slen:
|
---|
| 229 | if self.buflist:
|
---|
| 230 | self.buf += ''.join(self.buflist)
|
---|
| 231 | self.buflist = [self.buf[:spos], s, self.buf[newpos:]]
|
---|
| 232 | self.buf = ''
|
---|
| 233 | if newpos > slen:
|
---|
| 234 | slen = newpos
|
---|
| 235 | else:
|
---|
| 236 | self.buflist.append(s)
|
---|
| 237 | slen = newpos
|
---|
| 238 | self.len = slen
|
---|
| 239 | self.pos = newpos
|
---|
| 240 |
|
---|
| 241 | def writelines(self, iterable):
|
---|
| 242 | """Write a sequence of strings to the file. The sequence can be any
|
---|
| 243 | iterable object producing strings, typically a list of strings. There
|
---|
| 244 | is no return value.
|
---|
| 245 |
|
---|
| 246 | (The name is intended to match readlines(); writelines() does not add
|
---|
| 247 | line separators.)
|
---|
| 248 | """
|
---|
| 249 | write = self.write
|
---|
| 250 | for line in iterable:
|
---|
| 251 | write(line)
|
---|
| 252 |
|
---|
| 253 | def flush(self):
|
---|
| 254 | """Flush the internal buffer
|
---|
| 255 | """
|
---|
| 256 | _complain_ifclosed(self.closed)
|
---|
| 257 |
|
---|
| 258 | def getvalue(self):
|
---|
| 259 | """
|
---|
| 260 | Retrieve the entire contents of the "file" at any time before
|
---|
| 261 | the StringIO object's close() method is called.
|
---|
| 262 |
|
---|
| 263 | The StringIO object can accept either Unicode or 8-bit strings,
|
---|
| 264 | but mixing the two may take some care. If both are used, 8-bit
|
---|
| 265 | strings that cannot be interpreted as 7-bit ASCII (that use the
|
---|
| 266 | 8th bit) will cause a UnicodeError to be raised when getvalue()
|
---|
| 267 | is called.
|
---|
| 268 | """
|
---|
[391] | 269 | _complain_ifclosed(self.closed)
|
---|
[2] | 270 | if self.buflist:
|
---|
| 271 | self.buf += ''.join(self.buflist)
|
---|
| 272 | self.buflist = []
|
---|
| 273 | return self.buf
|
---|
| 274 |
|
---|
| 275 |
|
---|
| 276 | # A little test suite
|
---|
| 277 |
|
---|
| 278 | def test():
|
---|
| 279 | import sys
|
---|
| 280 | if sys.argv[1:]:
|
---|
| 281 | file = sys.argv[1]
|
---|
| 282 | else:
|
---|
| 283 | file = '/etc/passwd'
|
---|
| 284 | lines = open(file, 'r').readlines()
|
---|
| 285 | text = open(file, 'r').read()
|
---|
| 286 | f = StringIO()
|
---|
| 287 | for line in lines[:-2]:
|
---|
| 288 | f.write(line)
|
---|
| 289 | f.writelines(lines[-2:])
|
---|
| 290 | if f.getvalue() != text:
|
---|
| 291 | raise RuntimeError, 'write failed'
|
---|
| 292 | length = f.tell()
|
---|
| 293 | print 'File length =', length
|
---|
| 294 | f.seek(len(lines[0]))
|
---|
| 295 | f.write(lines[1])
|
---|
| 296 | f.seek(0)
|
---|
| 297 | print 'First line =', repr(f.readline())
|
---|
| 298 | print 'Position =', f.tell()
|
---|
| 299 | line = f.readline()
|
---|
| 300 | print 'Second line =', repr(line)
|
---|
| 301 | f.seek(-len(line), 1)
|
---|
| 302 | line2 = f.read(len(line))
|
---|
| 303 | if line != line2:
|
---|
| 304 | raise RuntimeError, 'bad result after seek back'
|
---|
| 305 | f.seek(len(line2), 1)
|
---|
| 306 | list = f.readlines()
|
---|
| 307 | line = list[-1]
|
---|
| 308 | f.seek(f.tell() - len(line))
|
---|
| 309 | line2 = f.read()
|
---|
| 310 | if line != line2:
|
---|
| 311 | raise RuntimeError, 'bad result after seek back from EOF'
|
---|
| 312 | print 'Read', len(list), 'more lines'
|
---|
| 313 | print 'File length =', f.tell()
|
---|
| 314 | if f.tell() != length:
|
---|
| 315 | raise RuntimeError, 'bad length'
|
---|
| 316 | f.truncate(length/2)
|
---|
| 317 | f.seek(0, 2)
|
---|
| 318 | print 'Truncated length =', f.tell()
|
---|
| 319 | if f.tell() != length/2:
|
---|
| 320 | raise RuntimeError, 'truncate did not adjust length'
|
---|
| 321 | f.close()
|
---|
| 322 |
|
---|
| 323 | if __name__ == '__main__':
|
---|
| 324 | test()
|
---|