1 | """Conversion pipeline templates.
|
---|
2 |
|
---|
3 | The problem:
|
---|
4 | ------------
|
---|
5 |
|
---|
6 | Suppose you have some data that you want to convert to another format,
|
---|
7 | such as from GIF image format to PPM image format. Maybe the
|
---|
8 | conversion involves several steps (e.g. piping it through compress or
|
---|
9 | uuencode). Some of the conversion steps may require that their input
|
---|
10 | is a disk file, others may be able to read standard input; similar for
|
---|
11 | their output. The input to the entire conversion may also be read
|
---|
12 | from a disk file or from an open file, and similar for its output.
|
---|
13 |
|
---|
14 | The module lets you construct a pipeline template by sticking one or
|
---|
15 | more conversion steps together. It will take care of creating and
|
---|
16 | removing temporary files if they are necessary to hold intermediate
|
---|
17 | data. You can then use the template to do conversions from many
|
---|
18 | different sources to many different destinations. The temporary
|
---|
19 | file names used are different each time the template is used.
|
---|
20 |
|
---|
21 | The templates are objects so you can create templates for many
|
---|
22 | different conversion steps and store them in a dictionary, for
|
---|
23 | instance.
|
---|
24 |
|
---|
25 |
|
---|
26 | Directions:
|
---|
27 | -----------
|
---|
28 |
|
---|
29 | To create a template:
|
---|
30 | t = Template()
|
---|
31 |
|
---|
32 | To add a conversion step to a template:
|
---|
33 | t.append(command, kind)
|
---|
34 | where kind is a string of two characters: the first is '-' if the
|
---|
35 | command reads its standard input or 'f' if it requires a file; the
|
---|
36 | second likewise for the output. The command must be valid /bin/sh
|
---|
37 | syntax. If input or output files are required, they are passed as
|
---|
38 | $IN and $OUT; otherwise, it must be possible to use the command in
|
---|
39 | a pipeline.
|
---|
40 |
|
---|
41 | To add a conversion step at the beginning:
|
---|
42 | t.prepend(command, kind)
|
---|
43 |
|
---|
44 | To convert a file to another file using a template:
|
---|
45 | sts = t.copy(infile, outfile)
|
---|
46 | If infile or outfile are the empty string, standard input is read or
|
---|
47 | standard output is written, respectively. The return value is the
|
---|
48 | exit status of the conversion pipeline.
|
---|
49 |
|
---|
50 | To open a file for reading or writing through a conversion pipeline:
|
---|
51 | fp = t.open(file, mode)
|
---|
52 | where mode is 'r' to read the file, or 'w' to write it -- just like
|
---|
53 | for the built-in function open() or for os.popen().
|
---|
54 |
|
---|
55 | To create a new template object initialized to a given one:
|
---|
56 | t2 = t.clone()
|
---|
57 |
|
---|
58 | For an example, see the function test() at the end of the file.
|
---|
59 | """ # '
|
---|
60 |
|
---|
61 |
|
---|
62 | import re
|
---|
63 | import os
|
---|
64 | import tempfile
|
---|
65 | import string
|
---|
66 |
|
---|
67 | __all__ = ["Template"]
|
---|
68 |
|
---|
69 | # Conversion step kinds
|
---|
70 |
|
---|
71 | FILEIN_FILEOUT = 'ff' # Must read & write real files
|
---|
72 | STDIN_FILEOUT = '-f' # Must write a real file
|
---|
73 | FILEIN_STDOUT = 'f-' # Must read a real file
|
---|
74 | STDIN_STDOUT = '--' # Normal pipeline element
|
---|
75 | SOURCE = '.-' # Must be first, writes stdout
|
---|
76 | SINK = '-.' # Must be last, reads stdin
|
---|
77 |
|
---|
78 | stepkinds = [FILEIN_FILEOUT, STDIN_FILEOUT, FILEIN_STDOUT, STDIN_STDOUT, \
|
---|
79 | SOURCE, SINK]
|
---|
80 |
|
---|
81 |
|
---|
82 | class Template:
|
---|
83 | """Class representing a pipeline template."""
|
---|
84 |
|
---|
85 | def __init__(self):
|
---|
86 | """Template() returns a fresh pipeline template."""
|
---|
87 | self.debugging = 0
|
---|
88 | self.reset()
|
---|
89 |
|
---|
90 | def __repr__(self):
|
---|
91 | """t.__repr__() implements repr(t)."""
|
---|
92 | return '<Template instance, steps=%r>' % (self.steps,)
|
---|
93 |
|
---|
94 | def reset(self):
|
---|
95 | """t.reset() restores a pipeline template to its initial state."""
|
---|
96 | self.steps = []
|
---|
97 |
|
---|
98 | def clone(self):
|
---|
99 | """t.clone() returns a new pipeline template with identical
|
---|
100 | initial state as the current one."""
|
---|
101 | t = Template()
|
---|
102 | t.steps = self.steps[:]
|
---|
103 | t.debugging = self.debugging
|
---|
104 | return t
|
---|
105 |
|
---|
106 | def debug(self, flag):
|
---|
107 | """t.debug(flag) turns debugging on or off."""
|
---|
108 | self.debugging = flag
|
---|
109 |
|
---|
110 | def append(self, cmd, kind):
|
---|
111 | """t.append(cmd, kind) adds a new step at the end."""
|
---|
112 | if type(cmd) is not type(''):
|
---|
113 | raise TypeError, \
|
---|
114 | 'Template.append: cmd must be a string'
|
---|
115 | if kind not in stepkinds:
|
---|
116 | raise ValueError, \
|
---|
117 | 'Template.append: bad kind %r' % (kind,)
|
---|
118 | if kind == SOURCE:
|
---|
119 | raise ValueError, \
|
---|
120 | 'Template.append: SOURCE can only be prepended'
|
---|
121 | if self.steps and self.steps[-1][1] == SINK:
|
---|
122 | raise ValueError, \
|
---|
123 | 'Template.append: already ends with SINK'
|
---|
124 | if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
|
---|
125 | raise ValueError, \
|
---|
126 | 'Template.append: missing $IN in cmd'
|
---|
127 | if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
|
---|
128 | raise ValueError, \
|
---|
129 | 'Template.append: missing $OUT in cmd'
|
---|
130 | self.steps.append((cmd, kind))
|
---|
131 |
|
---|
132 | def prepend(self, cmd, kind):
|
---|
133 | """t.prepend(cmd, kind) adds a new step at the front."""
|
---|
134 | if type(cmd) is not type(''):
|
---|
135 | raise TypeError, \
|
---|
136 | 'Template.prepend: cmd must be a string'
|
---|
137 | if kind not in stepkinds:
|
---|
138 | raise ValueError, \
|
---|
139 | 'Template.prepend: bad kind %r' % (kind,)
|
---|
140 | if kind == SINK:
|
---|
141 | raise ValueError, \
|
---|
142 | 'Template.prepend: SINK can only be appended'
|
---|
143 | if self.steps and self.steps[0][1] == SOURCE:
|
---|
144 | raise ValueError, \
|
---|
145 | 'Template.prepend: already begins with SOURCE'
|
---|
146 | if kind[0] == 'f' and not re.search(r'\$IN\b', cmd):
|
---|
147 | raise ValueError, \
|
---|
148 | 'Template.prepend: missing $IN in cmd'
|
---|
149 | if kind[1] == 'f' and not re.search(r'\$OUT\b', cmd):
|
---|
150 | raise ValueError, \
|
---|
151 | 'Template.prepend: missing $OUT in cmd'
|
---|
152 | self.steps.insert(0, (cmd, kind))
|
---|
153 |
|
---|
154 | def open(self, file, rw):
|
---|
155 | """t.open(file, rw) returns a pipe or file object open for
|
---|
156 | reading or writing; the file is the other end of the pipeline."""
|
---|
157 | if rw == 'r':
|
---|
158 | return self.open_r(file)
|
---|
159 | if rw == 'w':
|
---|
160 | return self.open_w(file)
|
---|
161 | raise ValueError, \
|
---|
162 | 'Template.open: rw must be \'r\' or \'w\', not %r' % (rw,)
|
---|
163 |
|
---|
164 | def open_r(self, file):
|
---|
165 | """t.open_r(file) and t.open_w(file) implement
|
---|
166 | t.open(file, 'r') and t.open(file, 'w') respectively."""
|
---|
167 | if not self.steps:
|
---|
168 | return open(file, 'r')
|
---|
169 | if self.steps[-1][1] == SINK:
|
---|
170 | raise ValueError, \
|
---|
171 | 'Template.open_r: pipeline ends width SINK'
|
---|
172 | cmd = self.makepipeline(file, '')
|
---|
173 | return os.popen(cmd, 'r')
|
---|
174 |
|
---|
175 | def open_w(self, file):
|
---|
176 | if not self.steps:
|
---|
177 | return open(file, 'w')
|
---|
178 | if self.steps[0][1] == SOURCE:
|
---|
179 | raise ValueError, \
|
---|
180 | 'Template.open_w: pipeline begins with SOURCE'
|
---|
181 | cmd = self.makepipeline('', file)
|
---|
182 | return os.popen(cmd, 'w')
|
---|
183 |
|
---|
184 | def copy(self, infile, outfile):
|
---|
185 | return os.system(self.makepipeline(infile, outfile))
|
---|
186 |
|
---|
187 | def makepipeline(self, infile, outfile):
|
---|
188 | cmd = makepipeline(infile, self.steps, outfile)
|
---|
189 | if self.debugging:
|
---|
190 | print cmd
|
---|
191 | cmd = 'set -x; ' + cmd
|
---|
192 | return cmd
|
---|
193 |
|
---|
194 |
|
---|
195 | def makepipeline(infile, steps, outfile):
|
---|
196 | # Build a list with for each command:
|
---|
197 | # [input filename or '', command string, kind, output filename or '']
|
---|
198 |
|
---|
199 | list = []
|
---|
200 | for cmd, kind in steps:
|
---|
201 | list.append(['', cmd, kind, ''])
|
---|
202 | #
|
---|
203 | # Make sure there is at least one step
|
---|
204 | #
|
---|
205 | if not list:
|
---|
206 | list.append(['', 'cat', '--', ''])
|
---|
207 | #
|
---|
208 | # Take care of the input and output ends
|
---|
209 | #
|
---|
210 | [cmd, kind] = list[0][1:3]
|
---|
211 | if kind[0] == 'f' and not infile:
|
---|
212 | list.insert(0, ['', 'cat', '--', ''])
|
---|
213 | list[0][0] = infile
|
---|
214 | #
|
---|
215 | [cmd, kind] = list[-1][1:3]
|
---|
216 | if kind[1] == 'f' and not outfile:
|
---|
217 | list.append(['', 'cat', '--', ''])
|
---|
218 | list[-1][-1] = outfile
|
---|
219 | #
|
---|
220 | # Invent temporary files to connect stages that need files
|
---|
221 | #
|
---|
222 | garbage = []
|
---|
223 | for i in range(1, len(list)):
|
---|
224 | lkind = list[i-1][2]
|
---|
225 | rkind = list[i][2]
|
---|
226 | if lkind[1] == 'f' or rkind[0] == 'f':
|
---|
227 | (fd, temp) = tempfile.mkstemp()
|
---|
228 | os.close(fd)
|
---|
229 | garbage.append(temp)
|
---|
230 | list[i-1][-1] = list[i][0] = temp
|
---|
231 | #
|
---|
232 | for item in list:
|
---|
233 | [inf, cmd, kind, outf] = item
|
---|
234 | if kind[1] == 'f':
|
---|
235 | cmd = 'OUT=' + quote(outf) + '; ' + cmd
|
---|
236 | if kind[0] == 'f':
|
---|
237 | cmd = 'IN=' + quote(inf) + '; ' + cmd
|
---|
238 | if kind[0] == '-' and inf:
|
---|
239 | cmd = cmd + ' <' + quote(inf)
|
---|
240 | if kind[1] == '-' and outf:
|
---|
241 | cmd = cmd + ' >' + quote(outf)
|
---|
242 | item[1] = cmd
|
---|
243 | #
|
---|
244 | cmdlist = list[0][1]
|
---|
245 | for item in list[1:]:
|
---|
246 | [cmd, kind] = item[1:3]
|
---|
247 | if item[0] == '':
|
---|
248 | if 'f' in kind:
|
---|
249 | cmd = '{ ' + cmd + '; }'
|
---|
250 | cmdlist = cmdlist + ' |\n' + cmd
|
---|
251 | else:
|
---|
252 | cmdlist = cmdlist + '\n' + cmd
|
---|
253 | #
|
---|
254 | if garbage:
|
---|
255 | rmcmd = 'rm -f'
|
---|
256 | for file in garbage:
|
---|
257 | rmcmd = rmcmd + ' ' + quote(file)
|
---|
258 | trapcmd = 'trap ' + quote(rmcmd + '; exit') + ' 1 2 3 13 14 15'
|
---|
259 | cmdlist = trapcmd + '\n' + cmdlist + '\n' + rmcmd
|
---|
260 | #
|
---|
261 | return cmdlist
|
---|
262 |
|
---|
263 |
|
---|
264 | # Reliably quote a string as a single argument for /bin/sh
|
---|
265 |
|
---|
266 | _safechars = string.ascii_letters + string.digits + '!@%_-+=:,./' # Safe unquoted
|
---|
267 | _funnychars = '"`$\\' # Unsafe inside "double quotes"
|
---|
268 |
|
---|
269 | def quote(file):
|
---|
270 | ''' return a shell-escaped version of the file string '''
|
---|
271 | for c in file:
|
---|
272 | if c not in _safechars:
|
---|
273 | break
|
---|
274 | else:
|
---|
275 | if not file:
|
---|
276 | return "''"
|
---|
277 | return file
|
---|
278 | if '\'' not in file:
|
---|
279 | return '\'' + file + '\''
|
---|
280 | res = ''
|
---|
281 | for c in file:
|
---|
282 | if c in _funnychars:
|
---|
283 | c = '\\' + c
|
---|
284 | res = res + c
|
---|
285 | return '"' + res + '"'
|
---|