1 | # Copyright (C) 2001-2010 Python Software Foundation
|
---|
2 | # Contact: email-sig@python.org
|
---|
3 |
|
---|
4 | """Classes to generate plain text from a message object tree."""
|
---|
5 |
|
---|
6 | __all__ = ['Generator', 'DecodedGenerator']
|
---|
7 |
|
---|
8 | import re
|
---|
9 | import sys
|
---|
10 | import time
|
---|
11 | import random
|
---|
12 | import warnings
|
---|
13 |
|
---|
14 | from cStringIO import StringIO
|
---|
15 | from email.header import Header
|
---|
16 |
|
---|
17 | UNDERSCORE = '_'
|
---|
18 | NL = '\n'
|
---|
19 |
|
---|
20 | fcre = re.compile(r'^From ', re.MULTILINE)
|
---|
21 |
|
---|
22 | def _is8bitstring(s):
|
---|
23 | if isinstance(s, str):
|
---|
24 | try:
|
---|
25 | unicode(s, 'us-ascii')
|
---|
26 | except UnicodeError:
|
---|
27 | return True
|
---|
28 | return False
|
---|
29 |
|
---|
30 |
|
---|
31 | |
---|
32 |
|
---|
33 | class Generator:
|
---|
34 | """Generates output from a Message object tree.
|
---|
35 |
|
---|
36 | This basic generator writes the message to the given file object as plain
|
---|
37 | text.
|
---|
38 | """
|
---|
39 | #
|
---|
40 | # Public interface
|
---|
41 | #
|
---|
42 |
|
---|
43 | def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
|
---|
44 | """Create the generator for message flattening.
|
---|
45 |
|
---|
46 | outfp is the output file-like object for writing the message to. It
|
---|
47 | must have a write() method.
|
---|
48 |
|
---|
49 | Optional mangle_from_ is a flag that, when True (the default), escapes
|
---|
50 | From_ lines in the body of the message by putting a `>' in front of
|
---|
51 | them.
|
---|
52 |
|
---|
53 | Optional maxheaderlen specifies the longest length for a non-continued
|
---|
54 | header. When a header line is longer (in characters, with tabs
|
---|
55 | expanded to 8 spaces) than maxheaderlen, the header will split as
|
---|
56 | defined in the Header class. Set maxheaderlen to zero to disable
|
---|
57 | header wrapping. The default is 78, as recommended (but not required)
|
---|
58 | by RFC 2822.
|
---|
59 | """
|
---|
60 | self._fp = outfp
|
---|
61 | self._mangle_from_ = mangle_from_
|
---|
62 | self._maxheaderlen = maxheaderlen
|
---|
63 |
|
---|
64 | def write(self, s):
|
---|
65 | # Just delegate to the file object
|
---|
66 | self._fp.write(s)
|
---|
67 |
|
---|
68 | def flatten(self, msg, unixfrom=False):
|
---|
69 | """Print the message object tree rooted at msg to the output file
|
---|
70 | specified when the Generator instance was created.
|
---|
71 |
|
---|
72 | unixfrom is a flag that forces the printing of a Unix From_ delimiter
|
---|
73 | before the first object in the message tree. If the original message
|
---|
74 | has no From_ delimiter, a `standard' one is crafted. By default, this
|
---|
75 | is False to inhibit the printing of any From_ delimiter.
|
---|
76 |
|
---|
77 | Note that for subobjects, no From_ line is printed.
|
---|
78 | """
|
---|
79 | if unixfrom:
|
---|
80 | ufrom = msg.get_unixfrom()
|
---|
81 | if not ufrom:
|
---|
82 | ufrom = 'From nobody ' + time.ctime(time.time())
|
---|
83 | print >> self._fp, ufrom
|
---|
84 | self._write(msg)
|
---|
85 |
|
---|
86 | def clone(self, fp):
|
---|
87 | """Clone this generator with the exact same options."""
|
---|
88 | return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
|
---|
89 |
|
---|
90 | #
|
---|
91 | # Protected interface - undocumented ;/
|
---|
92 | #
|
---|
93 |
|
---|
94 | def _write(self, msg):
|
---|
95 | # We can't write the headers yet because of the following scenario:
|
---|
96 | # say a multipart message includes the boundary string somewhere in
|
---|
97 | # its body. We'd have to calculate the new boundary /before/ we write
|
---|
98 | # the headers so that we can write the correct Content-Type:
|
---|
99 | # parameter.
|
---|
100 | #
|
---|
101 | # The way we do this, so as to make the _handle_*() methods simpler,
|
---|
102 | # is to cache any subpart writes into a StringIO. The we write the
|
---|
103 | # headers and the StringIO contents. That way, subpart handlers can
|
---|
104 | # Do The Right Thing, and can still modify the Content-Type: header if
|
---|
105 | # necessary.
|
---|
106 | oldfp = self._fp
|
---|
107 | try:
|
---|
108 | self._fp = sfp = StringIO()
|
---|
109 | self._dispatch(msg)
|
---|
110 | finally:
|
---|
111 | self._fp = oldfp
|
---|
112 | # Write the headers. First we see if the message object wants to
|
---|
113 | # handle that itself. If not, we'll do it generically.
|
---|
114 | meth = getattr(msg, '_write_headers', None)
|
---|
115 | if meth is None:
|
---|
116 | self._write_headers(msg)
|
---|
117 | else:
|
---|
118 | meth(self)
|
---|
119 | self._fp.write(sfp.getvalue())
|
---|
120 |
|
---|
121 | def _dispatch(self, msg):
|
---|
122 | # Get the Content-Type: for the message, then try to dispatch to
|
---|
123 | # self._handle_<maintype>_<subtype>(). If there's no handler for the
|
---|
124 | # full MIME type, then dispatch to self._handle_<maintype>(). If
|
---|
125 | # that's missing too, then dispatch to self._writeBody().
|
---|
126 | main = msg.get_content_maintype()
|
---|
127 | sub = msg.get_content_subtype()
|
---|
128 | specific = UNDERSCORE.join((main, sub)).replace('-', '_')
|
---|
129 | meth = getattr(self, '_handle_' + specific, None)
|
---|
130 | if meth is None:
|
---|
131 | generic = main.replace('-', '_')
|
---|
132 | meth = getattr(self, '_handle_' + generic, None)
|
---|
133 | if meth is None:
|
---|
134 | meth = self._writeBody
|
---|
135 | meth(msg)
|
---|
136 |
|
---|
137 | #
|
---|
138 | # Default handlers
|
---|
139 | #
|
---|
140 |
|
---|
141 | def _write_headers(self, msg):
|
---|
142 | for h, v in msg.items():
|
---|
143 | print >> self._fp, '%s:' % h,
|
---|
144 | if self._maxheaderlen == 0:
|
---|
145 | # Explicit no-wrapping
|
---|
146 | print >> self._fp, v
|
---|
147 | elif isinstance(v, Header):
|
---|
148 | # Header instances know what to do
|
---|
149 | print >> self._fp, v.encode()
|
---|
150 | elif _is8bitstring(v):
|
---|
151 | # If we have raw 8bit data in a byte string, we have no idea
|
---|
152 | # what the encoding is. There is no safe way to split this
|
---|
153 | # string. If it's ascii-subset, then we could do a normal
|
---|
154 | # ascii split, but if it's multibyte then we could break the
|
---|
155 | # string. There's no way to know so the least harm seems to
|
---|
156 | # be to not split the string and risk it being too long.
|
---|
157 | print >> self._fp, v
|
---|
158 | else:
|
---|
159 | # Header's got lots of smarts, so use it. Note that this is
|
---|
160 | # fundamentally broken though because we lose idempotency when
|
---|
161 | # the header string is continued with tabs. It will now be
|
---|
162 | # continued with spaces. This was reversedly broken before we
|
---|
163 | # fixed bug 1974. Either way, we lose.
|
---|
164 | print >> self._fp, Header(
|
---|
165 | v, maxlinelen=self._maxheaderlen, header_name=h).encode()
|
---|
166 | # A blank line always separates headers from body
|
---|
167 | print >> self._fp
|
---|
168 |
|
---|
169 | #
|
---|
170 | # Handlers for writing types and subtypes
|
---|
171 | #
|
---|
172 |
|
---|
173 | def _handle_text(self, msg):
|
---|
174 | payload = msg.get_payload()
|
---|
175 | if payload is None:
|
---|
176 | return
|
---|
177 | if not isinstance(payload, basestring):
|
---|
178 | raise TypeError('string payload expected: %s' % type(payload))
|
---|
179 | if self._mangle_from_:
|
---|
180 | payload = fcre.sub('>From ', payload)
|
---|
181 | self._fp.write(payload)
|
---|
182 |
|
---|
183 | # Default body handler
|
---|
184 | _writeBody = _handle_text
|
---|
185 |
|
---|
186 | def _handle_multipart(self, msg):
|
---|
187 | # The trick here is to write out each part separately, merge them all
|
---|
188 | # together, and then make sure that the boundary we've chosen isn't
|
---|
189 | # present in the payload.
|
---|
190 | msgtexts = []
|
---|
191 | subparts = msg.get_payload()
|
---|
192 | if subparts is None:
|
---|
193 | subparts = []
|
---|
194 | elif isinstance(subparts, basestring):
|
---|
195 | # e.g. a non-strict parse of a message with no starting boundary.
|
---|
196 | self._fp.write(subparts)
|
---|
197 | return
|
---|
198 | elif not isinstance(subparts, list):
|
---|
199 | # Scalar payload
|
---|
200 | subparts = [subparts]
|
---|
201 | for part in subparts:
|
---|
202 | s = StringIO()
|
---|
203 | g = self.clone(s)
|
---|
204 | g.flatten(part, unixfrom=False)
|
---|
205 | msgtexts.append(s.getvalue())
|
---|
206 | # BAW: What about boundaries that are wrapped in double-quotes?
|
---|
207 | boundary = msg.get_boundary()
|
---|
208 | if not boundary:
|
---|
209 | # Create a boundary that doesn't appear in any of the
|
---|
210 | # message texts.
|
---|
211 | alltext = NL.join(msgtexts)
|
---|
212 | boundary = _make_boundary(alltext)
|
---|
213 | msg.set_boundary(boundary)
|
---|
214 | # If there's a preamble, write it out, with a trailing CRLF
|
---|
215 | if msg.preamble is not None:
|
---|
216 | if self._mangle_from_:
|
---|
217 | preamble = fcre.sub('>From ', msg.preamble)
|
---|
218 | else:
|
---|
219 | preamble = msg.preamble
|
---|
220 | print >> self._fp, preamble
|
---|
221 | # dash-boundary transport-padding CRLF
|
---|
222 | print >> self._fp, '--' + boundary
|
---|
223 | # body-part
|
---|
224 | if msgtexts:
|
---|
225 | self._fp.write(msgtexts.pop(0))
|
---|
226 | # *encapsulation
|
---|
227 | # --> delimiter transport-padding
|
---|
228 | # --> CRLF body-part
|
---|
229 | for body_part in msgtexts:
|
---|
230 | # delimiter transport-padding CRLF
|
---|
231 | print >> self._fp, '\n--' + boundary
|
---|
232 | # body-part
|
---|
233 | self._fp.write(body_part)
|
---|
234 | # close-delimiter transport-padding
|
---|
235 | self._fp.write('\n--' + boundary + '--')
|
---|
236 | if msg.epilogue is not None:
|
---|
237 | print >> self._fp
|
---|
238 | if self._mangle_from_:
|
---|
239 | epilogue = fcre.sub('>From ', msg.epilogue)
|
---|
240 | else:
|
---|
241 | epilogue = msg.epilogue
|
---|
242 | self._fp.write(epilogue)
|
---|
243 |
|
---|
244 | def _handle_multipart_signed(self, msg):
|
---|
245 | # The contents of signed parts has to stay unmodified in order to keep
|
---|
246 | # the signature intact per RFC1847 2.1, so we disable header wrapping.
|
---|
247 | # RDM: This isn't enough to completely preserve the part, but it helps.
|
---|
248 | old_maxheaderlen = self._maxheaderlen
|
---|
249 | try:
|
---|
250 | self._maxheaderlen = 0
|
---|
251 | self._handle_multipart(msg)
|
---|
252 | finally:
|
---|
253 | self._maxheaderlen = old_maxheaderlen
|
---|
254 |
|
---|
255 | def _handle_message_delivery_status(self, msg):
|
---|
256 | # We can't just write the headers directly to self's file object
|
---|
257 | # because this will leave an extra newline between the last header
|
---|
258 | # block and the boundary. Sigh.
|
---|
259 | blocks = []
|
---|
260 | for part in msg.get_payload():
|
---|
261 | s = StringIO()
|
---|
262 | g = self.clone(s)
|
---|
263 | g.flatten(part, unixfrom=False)
|
---|
264 | text = s.getvalue()
|
---|
265 | lines = text.split('\n')
|
---|
266 | # Strip off the unnecessary trailing empty line
|
---|
267 | if lines and lines[-1] == '':
|
---|
268 | blocks.append(NL.join(lines[:-1]))
|
---|
269 | else:
|
---|
270 | blocks.append(text)
|
---|
271 | # Now join all the blocks with an empty line. This has the lovely
|
---|
272 | # effect of separating each block with an empty line, but not adding
|
---|
273 | # an extra one after the last one.
|
---|
274 | self._fp.write(NL.join(blocks))
|
---|
275 |
|
---|
276 | def _handle_message(self, msg):
|
---|
277 | s = StringIO()
|
---|
278 | g = self.clone(s)
|
---|
279 | # The payload of a message/rfc822 part should be a multipart sequence
|
---|
280 | # of length 1. The zeroth element of the list should be the Message
|
---|
281 | # object for the subpart. Extract that object, stringify it, and
|
---|
282 | # write it out.
|
---|
283 | # Except, it turns out, when it's a string instead, which happens when
|
---|
284 | # and only when HeaderParser is used on a message of mime type
|
---|
285 | # message/rfc822. Such messages are generated by, for example,
|
---|
286 | # Groupwise when forwarding unadorned messages. (Issue 7970.) So
|
---|
287 | # in that case we just emit the string body.
|
---|
288 | payload = msg.get_payload()
|
---|
289 | if isinstance(payload, list):
|
---|
290 | g.flatten(msg.get_payload(0), unixfrom=False)
|
---|
291 | payload = s.getvalue()
|
---|
292 | self._fp.write(payload)
|
---|
293 |
|
---|
294 |
|
---|
295 | |
---|
296 |
|
---|
297 | _FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
|
---|
298 |
|
---|
299 | class DecodedGenerator(Generator):
|
---|
300 | """Generates a text representation of a message.
|
---|
301 |
|
---|
302 | Like the Generator base class, except that non-text parts are substituted
|
---|
303 | with a format string representing the part.
|
---|
304 | """
|
---|
305 | def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
|
---|
306 | """Like Generator.__init__() except that an additional optional
|
---|
307 | argument is allowed.
|
---|
308 |
|
---|
309 | Walks through all subparts of a message. If the subpart is of main
|
---|
310 | type `text', then it prints the decoded payload of the subpart.
|
---|
311 |
|
---|
312 | Otherwise, fmt is a format string that is used instead of the message
|
---|
313 | payload. fmt is expanded with the following keywords (in
|
---|
314 | %(keyword)s format):
|
---|
315 |
|
---|
316 | type : Full MIME type of the non-text part
|
---|
317 | maintype : Main MIME type of the non-text part
|
---|
318 | subtype : Sub-MIME type of the non-text part
|
---|
319 | filename : Filename of the non-text part
|
---|
320 | description: Description associated with the non-text part
|
---|
321 | encoding : Content transfer encoding of the non-text part
|
---|
322 |
|
---|
323 | The default value for fmt is None, meaning
|
---|
324 |
|
---|
325 | [Non-text (%(type)s) part of message omitted, filename %(filename)s]
|
---|
326 | """
|
---|
327 | Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
|
---|
328 | if fmt is None:
|
---|
329 | self._fmt = _FMT
|
---|
330 | else:
|
---|
331 | self._fmt = fmt
|
---|
332 |
|
---|
333 | def _dispatch(self, msg):
|
---|
334 | for part in msg.walk():
|
---|
335 | maintype = part.get_content_maintype()
|
---|
336 | if maintype == 'text':
|
---|
337 | print >> self, part.get_payload(decode=True)
|
---|
338 | elif maintype == 'multipart':
|
---|
339 | # Just skip this
|
---|
340 | pass
|
---|
341 | else:
|
---|
342 | print >> self, self._fmt % {
|
---|
343 | 'type' : part.get_content_type(),
|
---|
344 | 'maintype' : part.get_content_maintype(),
|
---|
345 | 'subtype' : part.get_content_subtype(),
|
---|
346 | 'filename' : part.get_filename('[no filename]'),
|
---|
347 | 'description': part.get('Content-Description',
|
---|
348 | '[no description]'),
|
---|
349 | 'encoding' : part.get('Content-Transfer-Encoding',
|
---|
350 | '[no encoding]'),
|
---|
351 | }
|
---|
352 |
|
---|
353 |
|
---|
354 | |
---|
355 |
|
---|
356 | # Helper
|
---|
357 | _width = len(repr(sys.maxint-1))
|
---|
358 | _fmt = '%%0%dd' % _width
|
---|
359 |
|
---|
360 | def _make_boundary(text=None):
|
---|
361 | # Craft a random boundary. If text is given, ensure that the chosen
|
---|
362 | # boundary doesn't appear in the text.
|
---|
363 | token = random.randrange(sys.maxint)
|
---|
364 | boundary = ('=' * 15) + (_fmt % token) + '=='
|
---|
365 | if text is None:
|
---|
366 | return boundary
|
---|
367 | b = boundary
|
---|
368 | counter = 0
|
---|
369 | while True:
|
---|
370 | cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
|
---|
371 | if not cre.search(text):
|
---|
372 | break
|
---|
373 | b = boundary + '.' + str(counter)
|
---|
374 | counter += 1
|
---|
375 | return b
|
---|