Skip to content

Commit 7d5b99d

Browse files
committed
A new standard module, as discussed on comp.lang.python, to simplify
the writing of filters. Typical use is: import fileinput for line in fileinput.input(): process(line) This iterates over the lines of all files listed in sys.argv[1:], defaulting to sys.stdin if the list is empty or when a filename is '-'. There is also an option to use this to direct the output back to the input files.
1 parent 2aa78ef commit 7d5b99d

File tree

1 file changed

+254
-0
lines changed

1 file changed

+254
-0
lines changed

Lib/fileinput.py

Lines changed: 254 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,254 @@
1+
"""Helper class to quickly write a loop over all standard input files.
2+
3+
Typical use is:
4+
5+
import fileinput
6+
for line in fileinput.input():
7+
process(line)
8+
9+
This iterates over the lines of all files listed in sys.argv[1:],
10+
defaulting to sys.stdin if the list is empty. If a filename is '-' it
11+
is also replaced by sys.stdin. To specify an alternative list of
12+
filenames, pass it as the argument to input(). A single file name is
13+
also allowed.
14+
15+
Functions filename(), lineno() return the filename and cumulative line
16+
number of the line that has just been read; filelineno() returns its
17+
line number in the current file; isfirstline() returns true iff the
18+
line just read is the first line of its file; isstdin() returns true
19+
iff the line was read from sys.stdin. Function nextfile() closes the
20+
current file so that the next iteration will read the first line from
21+
the next file (if any); lines not read from the file will not count
22+
towards the cumulative line count; the filename is not changed until
23+
after the first line of the next file has been read. Function close()
24+
closes the sequence.
25+
26+
Before any lines have been read, filename() returns None and both line
27+
numbers are zero; nextfile() has no effect. After all lines have been
28+
read, filename() and the line number functions return the values
29+
pertaining to the last line read; nextfile() has no effect.
30+
31+
All files are opened in text mode. If an I/O error occurs during
32+
opening or reading a file, the IOError exception is raised.
33+
34+
If sys.stdin is used more than once, the second and further use will
35+
return no lines, except perhaps for interactive use, or if it has been
36+
explicitly reset (e.g. using sys.stdin.seek(0)).
37+
38+
Empty files are opened and immediately closed; the only time their
39+
presence in the list of filenames is noticeable at all is when the
40+
last file opened is empty.
41+
42+
It is possible that the last line of a file doesn't end in a newline
43+
character; otherwise lines are returned including the trailing
44+
newline.
45+
46+
Class FileInput is the implementation; its methods filename(),
47+
lineno(), fileline(), isfirstline(), isstdin(), nextfile() and close()
48+
correspond to the functions in the module. In addition it has a
49+
readline() method which returns the next input line, and a
50+
__getitem__() method which implements the sequence behavior. The
51+
sequence must be accessed in strictly sequential order; sequence
52+
access and readline() cannot be mixed.
53+
54+
Optional in-place filtering: if the keyword argument inplace=1 is
55+
passed to input() or to the FileInput constructor, the file is moved
56+
to a backup file and standard output is directed to the input file.
57+
This makes it possible to write a filter that rewrites its input file
58+
in place. If the keyword argument backup=".<some extension>" is also
59+
given, it specifies the extension for the backup file, and the backup
60+
file remains around; by default, the extension is ".bak" and it is
61+
deleted when the output file is closed. In-place filtering is
62+
disabled when standard input is read. XXX The current implementation
63+
does not work for MS-DOS 8+3 filesystems.
64+
65+
XXX Possible additions:
66+
67+
- optional getopt argument processing
68+
- specify open mode ('r' or 'rb')
69+
- specify buffer size
70+
- fileno()
71+
- isatty()
72+
- read(), read(size), even readlines()
73+
74+
"""
75+
76+
import sys, os
77+
78+
_state = None
79+
80+
def input(files=(), inplace=0, backup=""):
81+
global _state
82+
if _state and _state._file:
83+
raise RuntimeError, "input() already active"
84+
_state = FileInput(files, inplace, backup)
85+
return _state
86+
87+
def close():
88+
global _state
89+
state = _state
90+
_state = None
91+
if state:
92+
state.close()
93+
94+
def nextfile():
95+
if not _state:
96+
raise RuntimeError, "no active input()"
97+
return _state.nextfile()
98+
99+
def filename():
100+
if not _state:
101+
raise RuntimeError, "no active input()"
102+
return _state.filename()
103+
104+
def lineno():
105+
if not _state:
106+
raise RuntimeError, "no active input()"
107+
return _state.lineno()
108+
109+
def filelineno():
110+
if not _state:
111+
raise RuntimeError, "no active input()"
112+
return _state.filelineno()
113+
114+
def isfirstline():
115+
if not _state:
116+
raise RuntimeError, "no active input()"
117+
return _state.isfirstline()
118+
119+
def isstdin():
120+
if not _state:
121+
raise RuntimeError, "no active input()"
122+
return _state.isstdin()
123+
124+
class FileInput:
125+
126+
def __init__(self, files=(), inplace=0, backup=""):
127+
if type(files) == type(''):
128+
files = (files,)
129+
else:
130+
files = tuple(files)
131+
if not files:
132+
files = tuple(sys.argv[1:])
133+
if not files:
134+
files = ('-',)
135+
self._files = files
136+
self._inplace = inplace
137+
self._backup = backup
138+
self._savestdout = None
139+
self._output = None
140+
self._filename = None
141+
self._lineno = 0
142+
self._filelineno = 0
143+
self._file = None
144+
self._isstdin = 0
145+
146+
def __del__(self):
147+
self.close()
148+
149+
def close(self):
150+
self.nextfile()
151+
self._files = ()
152+
153+
def __getitem__(self, i):
154+
if i != self._lineno:
155+
raise RuntimeError, "accessing lines out of order"
156+
line = self.readline()
157+
if not line:
158+
raise IndexError, "end of input reached"
159+
return line
160+
161+
def nextfile(self):
162+
savestdout = self._savestdout
163+
self._savestdout = 0
164+
if savestdout:
165+
sys.stdout = savestdout
166+
167+
output = self._output
168+
self._output = 0
169+
if output:
170+
output.close()
171+
172+
file = self._file
173+
self._file = 0
174+
if file and not self._isstdin:
175+
file.close()
176+
177+
backupfilename = self._backupfilename
178+
self._backupfilename = 0
179+
if backupfilename and not self._backup:
180+
try: os.unlink(backupfilename)
181+
except: pass
182+
183+
self._isstdin = 0
184+
185+
def readline(self):
186+
if not self._file:
187+
if not self._files:
188+
return ""
189+
self._filename = self._files[0]
190+
self._files = self._files[1:]
191+
self._filelineno = 0
192+
self._file = None
193+
self._isstdin = 0
194+
self._backupfilename = 0
195+
if self._filename == '-':
196+
self._filename = '<stdin>'
197+
self._file = sys.stdin
198+
self._isstdin = 1
199+
else:
200+
if self._inplace:
201+
self._backupfilename = (
202+
self._filename + (self._backup or ".bak"))
203+
try: os.unlink(self._backupfilename)
204+
except os.error: pass
205+
# The next three lines may raise IOError
206+
os.rename(self._filename, self._backupfilename)
207+
self._file = open(self._backupfilename, "r")
208+
self._output = open(self._filename, "w")
209+
self._savestdout = sys.stdout
210+
sys.stdout = self._output
211+
else:
212+
# This may raise IOError
213+
self._file = open(self._filename, "r")
214+
line = self._file.readline()
215+
if line:
216+
self._lineno = self._lineno + 1
217+
self._filelineno = self._filelineno + 1
218+
return line
219+
self.nextfile()
220+
# Recursive call
221+
return self.readline()
222+
223+
def filename(self):
224+
return self._filename
225+
226+
def lineno(self):
227+
return self._lineno
228+
229+
def filelineno(self):
230+
return self._filelineno
231+
232+
def isfirstline(self):
233+
return self._filelineno == 1
234+
235+
def isstdin(self):
236+
return self._isstdin
237+
238+
def _test():
239+
import getopt
240+
inplace = 0
241+
backup = 0
242+
opts, args = getopt.getopt(sys.argv[1:], "ib:")
243+
for o, a in opts:
244+
if o == '-i': inplace = 1
245+
if o == '-b': backup = a
246+
for line in input(args, inplace=inplace, backup=backup):
247+
if line[-1:] == '\n': line = line[:-1]
248+
if line[-1:] == '\r': line = line[:-1]
249+
print "%d: %s[%d]%s %s" % (lineno(), filename(), filelineno(),
250+
isfirstline() and "*" or "", line)
251+
print "%d: %s[%d]" % (lineno(), filename(), filelineno())
252+
253+
if __name__ == '__main__':
254+
_test()

0 commit comments

Comments
 (0)