source: python/trunk/Lib/fileinput.py@ 602

Last change on this file since 602 was 391, checked in by dmik, 12 years ago

python: Merge vendor 2.7.6 to trunk.

  • Property svn:eol-style set to native
File size: 13.7 KB
Line 
1"""Helper class to quickly write a loop over all standard input files.
2
3Typical use is:
4
5 import fileinput
6 for line in fileinput.input():
7 process(line)
8
9This iterates over the lines of all files listed in sys.argv[1:],
10defaulting to sys.stdin if the list is empty. If a filename is '-' it
11is also replaced by sys.stdin. To specify an alternative list of
12filenames, pass it as the argument to input(). A single file name is
13also allowed.
14
15Functions filename(), lineno() return the filename and cumulative line
16number of the line that has just been read; filelineno() returns its
17line number in the current file; isfirstline() returns true iff the
18line just read is the first line of its file; isstdin() returns true
19iff the line was read from sys.stdin. Function nextfile() closes the
20current file so that the next iteration will read the first line from
21the next file (if any); lines not read from the file will not count
22towards the cumulative line count; the filename is not changed until
23after the first line of the next file has been read. Function close()
24closes the sequence.
25
26Before any lines have been read, filename() returns None and both line
27numbers are zero; nextfile() has no effect. After all lines have been
28read, filename() and the line number functions return the values
29pertaining to the last line read; nextfile() has no effect.
30
31All files are opened in text mode by default, you can override this by
32setting the mode parameter to input() or FileInput.__init__().
33If an I/O error occurs during opening or reading a file, the IOError
34exception is raised.
35
36If sys.stdin is used more than once, the second and further use will
37return no lines, except perhaps for interactive use, or if it has been
38explicitly reset (e.g. using sys.stdin.seek(0)).
39
40Empty files are opened and immediately closed; the only time their
41presence in the list of filenames is noticeable at all is when the
42last file opened is empty.
43
44It is possible that the last line of a file doesn't end in a newline
45character; otherwise lines are returned including the trailing
46newline.
47
48Class FileInput is the implementation; its methods filename(),
49lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
50correspond to the functions in the module. In addition it has a
51readline() method which returns the next input line, and a
52__getitem__() method which implements the sequence behavior. The
53sequence must be accessed in strictly sequential order; sequence
54access and readline() cannot be mixed.
55
56Optional in-place filtering: if the keyword argument inplace=1 is
57passed to input() or to the FileInput constructor, the file is moved
58to a backup file and standard output is directed to the input file.
59This makes it possible to write a filter that rewrites its input file
60in place. If the keyword argument backup=".<some extension>" is also
61given, it specifies the extension for the backup file, and the backup
62file remains around; by default, the extension is ".bak" and it is
63deleted when the output file is closed. In-place filtering is
64disabled when standard input is read. XXX The current implementation
65does not work for MS-DOS 8+3 filesystems.
66
67Performance: this module is unfortunately one of the slower ways of
68processing large numbers of input lines. Nevertheless, a significant
69speed-up has been obtained by using readlines(bufsize) instead of
70readline(). A new keyword argument, bufsize=N, is present on the
71input() function and the FileInput() class to override the default
72buffer size.
73
74XXX Possible additions:
75
76- optional getopt argument processing
77- isatty()
78- read(), read(size), even readlines()
79
80"""
81
82import sys, os
83
84__all__ = ["input","close","nextfile","filename","lineno","filelineno",
85 "isfirstline","isstdin","FileInput"]
86
87_state = None
88
89DEFAULT_BUFSIZE = 8*1024
90
91def input(files=None, inplace=0, backup="", bufsize=0,
92 mode="r", openhook=None):
93 """Return an instance of the FileInput class, which can be iterated.
94
95 The parameters are passed to the constructor of the FileInput class.
96 The returned instance, in addition to being an iterator,
97 keeps global state for the functions of this module,.
98 """
99 global _state
100 if _state and _state._file:
101 raise RuntimeError, "input() already active"
102 _state = FileInput(files, inplace, backup, bufsize, mode, openhook)
103 return _state
104
105def close():
106 """Close the sequence."""
107 global _state
108 state = _state
109 _state = None
110 if state:
111 state.close()
112
113def nextfile():
114 """
115 Close the current file so that the next iteration will read the first
116 line from the next file (if any); lines not read from the file will
117 not count towards the cumulative line count. The filename is not
118 changed until after the first line of the next file has been read.
119 Before the first line has been read, this function has no effect;
120 it cannot be used to skip the first file. After the last line of the
121 last file has been read, this function has no effect.
122 """
123 if not _state:
124 raise RuntimeError, "no active input()"
125 return _state.nextfile()
126
127def filename():
128 """
129 Return the name of the file currently being read.
130 Before the first line has been read, returns None.
131 """
132 if not _state:
133 raise RuntimeError, "no active input()"
134 return _state.filename()
135
136def lineno():
137 """
138 Return the cumulative line number of the line that has just been read.
139 Before the first line has been read, returns 0. After the last line
140 of the last file has been read, returns the line number of that line.
141 """
142 if not _state:
143 raise RuntimeError, "no active input()"
144 return _state.lineno()
145
146def filelineno():
147 """
148 Return the line number in the current file. Before the first line
149 has been read, returns 0. After the last line of the last file has
150 been read, returns the line number of that line within the file.
151 """
152 if not _state:
153 raise RuntimeError, "no active input()"
154 return _state.filelineno()
155
156def fileno():
157 """
158 Return the file number of the current file. When no file is currently
159 opened, returns -1.
160 """
161 if not _state:
162 raise RuntimeError, "no active input()"
163 return _state.fileno()
164
165def isfirstline():
166 """
167 Returns true the line just read is the first line of its file,
168 otherwise returns false.
169 """
170 if not _state:
171 raise RuntimeError, "no active input()"
172 return _state.isfirstline()
173
174def isstdin():
175 """
176 Returns true if the last line was read from sys.stdin,
177 otherwise returns false.
178 """
179 if not _state:
180 raise RuntimeError, "no active input()"
181 return _state.isstdin()
182
183class FileInput:
184 """FileInput([files[, inplace[, backup[, bufsize[, mode[, openhook]]]]]])
185
186 Class FileInput is the implementation of the module; its methods
187 filename(), lineno(), fileline(), isfirstline(), isstdin(), fileno(),
188 nextfile() and close() correspond to the functions of the same name
189 in the module.
190 In addition it has a readline() method which returns the next
191 input line, and a __getitem__() method which implements the
192 sequence behavior. The sequence must be accessed in strictly
193 sequential order; random access and readline() cannot be mixed.
194 """
195
196 def __init__(self, files=None, inplace=0, backup="", bufsize=0,
197 mode="r", openhook=None):
198 if isinstance(files, basestring):
199 files = (files,)
200 else:
201 if files is None:
202 files = sys.argv[1:]
203 if not files:
204 files = ('-',)
205 else:
206 files = tuple(files)
207 self._files = files
208 self._inplace = inplace
209 self._backup = backup
210 self._bufsize = bufsize or DEFAULT_BUFSIZE
211 self._savestdout = None
212 self._output = None
213 self._filename = None
214 self._lineno = 0
215 self._filelineno = 0
216 self._file = None
217 self._isstdin = False
218 self._backupfilename = None
219 self._buffer = []
220 self._bufindex = 0
221 # restrict mode argument to reading modes
222 if mode not in ('r', 'rU', 'U', 'rb'):
223 raise ValueError("FileInput opening mode must be one of "
224 "'r', 'rU', 'U' and 'rb'")
225 self._mode = mode
226 if inplace and openhook:
227 raise ValueError("FileInput cannot use an opening hook in inplace mode")
228 elif openhook and not hasattr(openhook, '__call__'):
229 raise ValueError("FileInput openhook must be callable")
230 self._openhook = openhook
231
232 def __del__(self):
233 self.close()
234
235 def close(self):
236 self.nextfile()
237 self._files = ()
238
239 def __iter__(self):
240 return self
241
242 def next(self):
243 try:
244 line = self._buffer[self._bufindex]
245 except IndexError:
246 pass
247 else:
248 self._bufindex += 1
249 self._lineno += 1
250 self._filelineno += 1
251 return line
252 line = self.readline()
253 if not line:
254 raise StopIteration
255 return line
256
257 def __getitem__(self, i):
258 if i != self._lineno:
259 raise RuntimeError, "accessing lines out of order"
260 try:
261 return self.next()
262 except StopIteration:
263 raise IndexError, "end of input reached"
264
265 def nextfile(self):
266 savestdout = self._savestdout
267 self._savestdout = 0
268 if savestdout:
269 sys.stdout = savestdout
270
271 output = self._output
272 self._output = 0
273 if output:
274 output.close()
275
276 file = self._file
277 self._file = 0
278 if file and not self._isstdin:
279 file.close()
280
281 backupfilename = self._backupfilename
282 self._backupfilename = 0
283 if backupfilename and not self._backup:
284 try: os.unlink(backupfilename)
285 except OSError: pass
286
287 self._isstdin = False
288 self._buffer = []
289 self._bufindex = 0
290
291 def readline(self):
292 try:
293 line = self._buffer[self._bufindex]
294 except IndexError:
295 pass
296 else:
297 self._bufindex += 1
298 self._lineno += 1
299 self._filelineno += 1
300 return line
301 if not self._file:
302 if not self._files:
303 return ""
304 self._filename = self._files[0]
305 self._files = self._files[1:]
306 self._filelineno = 0
307 self._file = None
308 self._isstdin = False
309 self._backupfilename = 0
310 if self._filename == '-':
311 self._filename = '<stdin>'
312 self._file = sys.stdin
313 self._isstdin = True
314 else:
315 if self._inplace:
316 self._backupfilename = (
317 self._filename + (self._backup or os.extsep+"bak"))
318 try: os.unlink(self._backupfilename)
319 except os.error: pass
320 # The next few lines may raise IOError
321 os.rename(self._filename, self._backupfilename)
322 self._file = open(self._backupfilename, self._mode)
323 try:
324 perm = os.fstat(self._file.fileno()).st_mode
325 except OSError:
326 self._output = open(self._filename, "w")
327 else:
328 fd = os.open(self._filename,
329 os.O_CREAT | os.O_WRONLY | os.O_TRUNC,
330 perm)
331 self._output = os.fdopen(fd, "w")
332 try:
333 if hasattr(os, 'chmod'):
334 os.chmod(self._filename, perm)
335 except OSError:
336 pass
337 self._savestdout = sys.stdout
338 sys.stdout = self._output
339 else:
340 # This may raise IOError
341 if self._openhook:
342 self._file = self._openhook(self._filename, self._mode)
343 else:
344 self._file = open(self._filename, self._mode)
345 self._buffer = self._file.readlines(self._bufsize)
346 self._bufindex = 0
347 if not self._buffer:
348 self.nextfile()
349 # Recursive call
350 return self.readline()
351
352 def filename(self):
353 return self._filename
354
355 def lineno(self):
356 return self._lineno
357
358 def filelineno(self):
359 return self._filelineno
360
361 def fileno(self):
362 if self._file:
363 try:
364 return self._file.fileno()
365 except ValueError:
366 return -1
367 else:
368 return -1
369
370 def isfirstline(self):
371 return self._filelineno == 1
372
373 def isstdin(self):
374 return self._isstdin
375
376
377def hook_compressed(filename, mode):
378 ext = os.path.splitext(filename)[1]
379 if ext == '.gz':
380 import gzip
381 return gzip.open(filename, mode)
382 elif ext == '.bz2':
383 import bz2
384 return bz2.BZ2File(filename, mode)
385 else:
386 return open(filename, mode)
387
388
389def hook_encoded(encoding):
390 import codecs
391 def openhook(filename, mode):
392 return codecs.open(filename, mode, encoding)
393 return openhook
394
395
396def _test():
397 import getopt
398 inplace = 0
399 backup = 0
400 opts, args = getopt.getopt(sys.argv[1:], "ib:")
401 for o, a in opts:
402 if o == '-i': inplace = 1
403 if o == '-b': backup = a
404 for line in input(args, inplace=inplace, backup=backup):
405 if line[-1:] == '\n': line = line[:-1]
406 if line[-1:] == '\r': line = line[:-1]
407 print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
408 isfirstline() and "*" or "", line)
409 print "%d: %s[%d]" % (lineno(), filename(), filelineno())
410
411if __name__ == '__main__':
412 _test()
Note: See TracBrowser for help on using the repository browser.