source: python/trunk/Lib/email/generator.py

Last change on this file was 391, checked in by dmik, 11 years ago

python: Merge vendor 2.7.6 to trunk.

  • Property svn:eol-style set to native
File size: 13.9 KB
Line 
1# Copyright (C) 2001-2010 Python Software Foundation
2# Contact: email-sig@python.org
3
4"""Classes to generate plain text from a message object tree."""
5
6__all__ = ['Generator', 'DecodedGenerator']
7
8import re
9import sys
10import time
11import random
12import warnings
13
14from cStringIO import StringIO
15from email.header import Header
16
17UNDERSCORE = '_'
18NL = '\n'
19
20fcre = re.compile(r'^From ', re.MULTILINE)
21
22def _is8bitstring(s):
23 if isinstance(s, str):
24 try:
25 unicode(s, 'us-ascii')
26 except UnicodeError:
27 return True
28 return False
29
30
31
32
33class Generator:
34 """Generates output from a Message object tree.
35
36 This basic generator writes the message to the given file object as plain
37 text.
38 """
39 #
40 # Public interface
41 #
42
43 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
44 """Create the generator for message flattening.
45
46 outfp is the output file-like object for writing the message to. It
47 must have a write() method.
48
49 Optional mangle_from_ is a flag that, when True (the default), escapes
50 From_ lines in the body of the message by putting a `>' in front of
51 them.
52
53 Optional maxheaderlen specifies the longest length for a non-continued
54 header. When a header line is longer (in characters, with tabs
55 expanded to 8 spaces) than maxheaderlen, the header will split as
56 defined in the Header class. Set maxheaderlen to zero to disable
57 header wrapping. The default is 78, as recommended (but not required)
58 by RFC 2822.
59 """
60 self._fp = outfp
61 self._mangle_from_ = mangle_from_
62 self._maxheaderlen = maxheaderlen
63
64 def write(self, s):
65 # Just delegate to the file object
66 self._fp.write(s)
67
68 def flatten(self, msg, unixfrom=False):
69 """Print the message object tree rooted at msg to the output file
70 specified when the Generator instance was created.
71
72 unixfrom is a flag that forces the printing of a Unix From_ delimiter
73 before the first object in the message tree. If the original message
74 has no From_ delimiter, a `standard' one is crafted. By default, this
75 is False to inhibit the printing of any From_ delimiter.
76
77 Note that for subobjects, no From_ line is printed.
78 """
79 if unixfrom:
80 ufrom = msg.get_unixfrom()
81 if not ufrom:
82 ufrom = 'From nobody ' + time.ctime(time.time())
83 print >> self._fp, ufrom
84 self._write(msg)
85
86 def clone(self, fp):
87 """Clone this generator with the exact same options."""
88 return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
89
90 #
91 # Protected interface - undocumented ;/
92 #
93
94 def _write(self, msg):
95 # We can't write the headers yet because of the following scenario:
96 # say a multipart message includes the boundary string somewhere in
97 # its body. We'd have to calculate the new boundary /before/ we write
98 # the headers so that we can write the correct Content-Type:
99 # parameter.
100 #
101 # The way we do this, so as to make the _handle_*() methods simpler,
102 # is to cache any subpart writes into a StringIO. The we write the
103 # headers and the StringIO contents. That way, subpart handlers can
104 # Do The Right Thing, and can still modify the Content-Type: header if
105 # necessary.
106 oldfp = self._fp
107 try:
108 self._fp = sfp = StringIO()
109 self._dispatch(msg)
110 finally:
111 self._fp = oldfp
112 # Write the headers. First we see if the message object wants to
113 # handle that itself. If not, we'll do it generically.
114 meth = getattr(msg, '_write_headers', None)
115 if meth is None:
116 self._write_headers(msg)
117 else:
118 meth(self)
119 self._fp.write(sfp.getvalue())
120
121 def _dispatch(self, msg):
122 # Get the Content-Type: for the message, then try to dispatch to
123 # self._handle_<maintype>_<subtype>(). If there's no handler for the
124 # full MIME type, then dispatch to self._handle_<maintype>(). If
125 # that's missing too, then dispatch to self._writeBody().
126 main = msg.get_content_maintype()
127 sub = msg.get_content_subtype()
128 specific = UNDERSCORE.join((main, sub)).replace('-', '_')
129 meth = getattr(self, '_handle_' + specific, None)
130 if meth is None:
131 generic = main.replace('-', '_')
132 meth = getattr(self, '_handle_' + generic, None)
133 if meth is None:
134 meth = self._writeBody
135 meth(msg)
136
137 #
138 # Default handlers
139 #
140
141 def _write_headers(self, msg):
142 for h, v in msg.items():
143 print >> self._fp, '%s:' % h,
144 if self._maxheaderlen == 0:
145 # Explicit no-wrapping
146 print >> self._fp, v
147 elif isinstance(v, Header):
148 # Header instances know what to do
149 print >> self._fp, v.encode()
150 elif _is8bitstring(v):
151 # If we have raw 8bit data in a byte string, we have no idea
152 # what the encoding is. There is no safe way to split this
153 # string. If it's ascii-subset, then we could do a normal
154 # ascii split, but if it's multibyte then we could break the
155 # string. There's no way to know so the least harm seems to
156 # be to not split the string and risk it being too long.
157 print >> self._fp, v
158 else:
159 # Header's got lots of smarts, so use it. Note that this is
160 # fundamentally broken though because we lose idempotency when
161 # the header string is continued with tabs. It will now be
162 # continued with spaces. This was reversedly broken before we
163 # fixed bug 1974. Either way, we lose.
164 print >> self._fp, Header(
165 v, maxlinelen=self._maxheaderlen, header_name=h).encode()
166 # A blank line always separates headers from body
167 print >> self._fp
168
169 #
170 # Handlers for writing types and subtypes
171 #
172
173 def _handle_text(self, msg):
174 payload = msg.get_payload()
175 if payload is None:
176 return
177 if not isinstance(payload, basestring):
178 raise TypeError('string payload expected: %s' % type(payload))
179 if self._mangle_from_:
180 payload = fcre.sub('>From ', payload)
181 self._fp.write(payload)
182
183 # Default body handler
184 _writeBody = _handle_text
185
186 def _handle_multipart(self, msg):
187 # The trick here is to write out each part separately, merge them all
188 # together, and then make sure that the boundary we've chosen isn't
189 # present in the payload.
190 msgtexts = []
191 subparts = msg.get_payload()
192 if subparts is None:
193 subparts = []
194 elif isinstance(subparts, basestring):
195 # e.g. a non-strict parse of a message with no starting boundary.
196 self._fp.write(subparts)
197 return
198 elif not isinstance(subparts, list):
199 # Scalar payload
200 subparts = [subparts]
201 for part in subparts:
202 s = StringIO()
203 g = self.clone(s)
204 g.flatten(part, unixfrom=False)
205 msgtexts.append(s.getvalue())
206 # BAW: What about boundaries that are wrapped in double-quotes?
207 boundary = msg.get_boundary()
208 if not boundary:
209 # Create a boundary that doesn't appear in any of the
210 # message texts.
211 alltext = NL.join(msgtexts)
212 boundary = _make_boundary(alltext)
213 msg.set_boundary(boundary)
214 # If there's a preamble, write it out, with a trailing CRLF
215 if msg.preamble is not None:
216 if self._mangle_from_:
217 preamble = fcre.sub('>From ', msg.preamble)
218 else:
219 preamble = msg.preamble
220 print >> self._fp, preamble
221 # dash-boundary transport-padding CRLF
222 print >> self._fp, '--' + boundary
223 # body-part
224 if msgtexts:
225 self._fp.write(msgtexts.pop(0))
226 # *encapsulation
227 # --> delimiter transport-padding
228 # --> CRLF body-part
229 for body_part in msgtexts:
230 # delimiter transport-padding CRLF
231 print >> self._fp, '\n--' + boundary
232 # body-part
233 self._fp.write(body_part)
234 # close-delimiter transport-padding
235 self._fp.write('\n--' + boundary + '--')
236 if msg.epilogue is not None:
237 print >> self._fp
238 if self._mangle_from_:
239 epilogue = fcre.sub('>From ', msg.epilogue)
240 else:
241 epilogue = msg.epilogue
242 self._fp.write(epilogue)
243
244 def _handle_multipart_signed(self, msg):
245 # The contents of signed parts has to stay unmodified in order to keep
246 # the signature intact per RFC1847 2.1, so we disable header wrapping.
247 # RDM: This isn't enough to completely preserve the part, but it helps.
248 old_maxheaderlen = self._maxheaderlen
249 try:
250 self._maxheaderlen = 0
251 self._handle_multipart(msg)
252 finally:
253 self._maxheaderlen = old_maxheaderlen
254
255 def _handle_message_delivery_status(self, msg):
256 # We can't just write the headers directly to self's file object
257 # because this will leave an extra newline between the last header
258 # block and the boundary. Sigh.
259 blocks = []
260 for part in msg.get_payload():
261 s = StringIO()
262 g = self.clone(s)
263 g.flatten(part, unixfrom=False)
264 text = s.getvalue()
265 lines = text.split('\n')
266 # Strip off the unnecessary trailing empty line
267 if lines and lines[-1] == '':
268 blocks.append(NL.join(lines[:-1]))
269 else:
270 blocks.append(text)
271 # Now join all the blocks with an empty line. This has the lovely
272 # effect of separating each block with an empty line, but not adding
273 # an extra one after the last one.
274 self._fp.write(NL.join(blocks))
275
276 def _handle_message(self, msg):
277 s = StringIO()
278 g = self.clone(s)
279 # The payload of a message/rfc822 part should be a multipart sequence
280 # of length 1. The zeroth element of the list should be the Message
281 # object for the subpart. Extract that object, stringify it, and
282 # write it out.
283 # Except, it turns out, when it's a string instead, which happens when
284 # and only when HeaderParser is used on a message of mime type
285 # message/rfc822. Such messages are generated by, for example,
286 # Groupwise when forwarding unadorned messages. (Issue 7970.) So
287 # in that case we just emit the string body.
288 payload = msg.get_payload()
289 if isinstance(payload, list):
290 g.flatten(msg.get_payload(0), unixfrom=False)
291 payload = s.getvalue()
292 self._fp.write(payload)
293
294
295
296
297_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
298
299class DecodedGenerator(Generator):
300 """Generates a text representation of a message.
301
302 Like the Generator base class, except that non-text parts are substituted
303 with a format string representing the part.
304 """
305 def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
306 """Like Generator.__init__() except that an additional optional
307 argument is allowed.
308
309 Walks through all subparts of a message. If the subpart is of main
310 type `text', then it prints the decoded payload of the subpart.
311
312 Otherwise, fmt is a format string that is used instead of the message
313 payload. fmt is expanded with the following keywords (in
314 %(keyword)s format):
315
316 type : Full MIME type of the non-text part
317 maintype : Main MIME type of the non-text part
318 subtype : Sub-MIME type of the non-text part
319 filename : Filename of the non-text part
320 description: Description associated with the non-text part
321 encoding : Content transfer encoding of the non-text part
322
323 The default value for fmt is None, meaning
324
325 [Non-text (%(type)s) part of message omitted, filename %(filename)s]
326 """
327 Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
328 if fmt is None:
329 self._fmt = _FMT
330 else:
331 self._fmt = fmt
332
333 def _dispatch(self, msg):
334 for part in msg.walk():
335 maintype = part.get_content_maintype()
336 if maintype == 'text':
337 print >> self, part.get_payload(decode=True)
338 elif maintype == 'multipart':
339 # Just skip this
340 pass
341 else:
342 print >> self, self._fmt % {
343 'type' : part.get_content_type(),
344 'maintype' : part.get_content_maintype(),
345 'subtype' : part.get_content_subtype(),
346 'filename' : part.get_filename('[no filename]'),
347 'description': part.get('Content-Description',
348 '[no description]'),
349 'encoding' : part.get('Content-Transfer-Encoding',
350 '[no encoding]'),
351 }
352
353
354
355
356# Helper
357_width = len(repr(sys.maxint-1))
358_fmt = '%%0%dd' % _width
359
360def _make_boundary(text=None):
361 # Craft a random boundary. If text is given, ensure that the chosen
362 # boundary doesn't appear in the text.
363 token = random.randrange(sys.maxint)
364 boundary = ('=' * 15) + (_fmt % token) + '=='
365 if text is None:
366 return boundary
367 b = boundary
368 counter = 0
369 while True:
370 cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
371 if not cre.search(text):
372 break
373 b = boundary + '.' + str(counter)
374 counter += 1
375 return b
Note: See TracBrowser for help on using the repository browser.