Context Navigation

← Previous Revision
Next Revision →
Blame
Revision Log

generator.py

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 13.9 KB

Line
1	# Copyright (C) 2001-2010 Python Software Foundation
2	# Contact: email-sig@python.org
3
4	"""Classes to generate plain text from a message object tree."""
5
6	__all__ = ['Generator', 'DecodedGenerator']
7
8	import re
9	import sys
10	import time
11	import random
12	import warnings
13
14	from cStringIO import StringIO
15	from email.header import Header
16
17	UNDERSCORE = '_'
18	NL = '\n'
19
20	fcre = re.compile(r'^From ', re.MULTILINE)
21
22	def _is8bitstring(s):
23	if isinstance(s, str):
24	try:
25	unicode(s, 'us-ascii')
26	except UnicodeError:
27	return True
28	return False
29
30
31
32
33	class Generator:
34	"""Generates output from a Message object tree.
35
36	This basic generator writes the message to the given file object as plain
37	text.
38	"""
39	#
40	# Public interface
41	#
42
43	def __init__(self, outfp, mangle_from_=True, maxheaderlen=78):
44	"""Create the generator for message flattening.
45
46	outfp is the output file-like object for writing the message to. It
47	must have a write() method.
48
49	Optional mangle_from_ is a flag that, when True (the default), escapes
50	From_ lines in the body of the message by putting a `>' in front of
51	them.
52
53	Optional maxheaderlen specifies the longest length for a non-continued
54	header. When a header line is longer (in characters, with tabs
55	expanded to 8 spaces) than maxheaderlen, the header will split as
56	defined in the Header class. Set maxheaderlen to zero to disable
57	header wrapping. The default is 78, as recommended (but not required)
58	by RFC 2822.
59	"""
60	self._fp = outfp
61	self._mangle_from_ = mangle_from_
62	self._maxheaderlen = maxheaderlen
63
64	def write(self, s):
65	# Just delegate to the file object
66	self._fp.write(s)
67
68	def flatten(self, msg, unixfrom=False):
69	"""Print the message object tree rooted at msg to the output file
70	specified when the Generator instance was created.
71
72	unixfrom is a flag that forces the printing of a Unix From_ delimiter
73	before the first object in the message tree. If the original message
74	has no From_ delimiter, a `standard' one is crafted. By default, this
75	is False to inhibit the printing of any From_ delimiter.
76
77	Note that for subobjects, no From_ line is printed.
78	"""
79	if unixfrom:
80	ufrom = msg.get_unixfrom()
81	if not ufrom:
82	ufrom = 'From nobody ' + time.ctime(time.time())
83	print >> self._fp, ufrom
84	self._write(msg)
85
86	def clone(self, fp):
87	"""Clone this generator with the exact same options."""
88	return self.__class__(fp, self._mangle_from_, self._maxheaderlen)
89
90	#
91	# Protected interface - undocumented ;/
92	#
93
94	def _write(self, msg):
95	# We can't write the headers yet because of the following scenario:
96	# say a multipart message includes the boundary string somewhere in
97	# its body. We'd have to calculate the new boundary /before/ we write
98	# the headers so that we can write the correct Content-Type:
99	# parameter.
100	#
101	# The way we do this, so as to make the _handle_*() methods simpler,
102	# is to cache any subpart writes into a StringIO. The we write the
103	# headers and the StringIO contents. That way, subpart handlers can
104	# Do The Right Thing, and can still modify the Content-Type: header if
105	# necessary.
106	oldfp = self._fp
107	try:
108	self._fp = sfp = StringIO()
109	self._dispatch(msg)
110	finally:
111	self._fp = oldfp
112	# Write the headers. First we see if the message object wants to
113	# handle that itself. If not, we'll do it generically.
114	meth = getattr(msg, '_write_headers', None)
115	if meth is None:
116	self._write_headers(msg)
117	else:
118	meth(self)
119	self._fp.write(sfp.getvalue())
120
121	def _dispatch(self, msg):
122	# Get the Content-Type: for the message, then try to dispatch to
123	# self._handle_<maintype>_<subtype>(). If there's no handler for the
124	# full MIME type, then dispatch to self._handle_<maintype>(). If
125	# that's missing too, then dispatch to self._writeBody().
126	main = msg.get_content_maintype()
127	sub = msg.get_content_subtype()
128	specific = UNDERSCORE.join((main, sub)).replace('-', '_')
129	meth = getattr(self, '_handle_' + specific, None)
130	if meth is None:
131	generic = main.replace('-', '_')
132	meth = getattr(self, '_handle_' + generic, None)
133	if meth is None:
134	meth = self._writeBody
135	meth(msg)
136
137	#
138	# Default handlers
139	#
140
141	def _write_headers(self, msg):
142	for h, v in msg.items():
143	print >> self._fp, '%s:' % h,
144	if self._maxheaderlen == 0:
145	# Explicit no-wrapping
146	print >> self._fp, v
147	elif isinstance(v, Header):
148	# Header instances know what to do
149	print >> self._fp, v.encode()
150	elif _is8bitstring(v):
151	# If we have raw 8bit data in a byte string, we have no idea
152	# what the encoding is. There is no safe way to split this
153	# string. If it's ascii-subset, then we could do a normal
154	# ascii split, but if it's multibyte then we could break the
155	# string. There's no way to know so the least harm seems to
156	# be to not split the string and risk it being too long.
157	print >> self._fp, v
158	else:
159	# Header's got lots of smarts, so use it. Note that this is
160	# fundamentally broken though because we lose idempotency when
161	# the header string is continued with tabs. It will now be
162	# continued with spaces. This was reversedly broken before we
163	# fixed bug 1974. Either way, we lose.
164	print >> self._fp, Header(
165	v, maxlinelen=self._maxheaderlen, header_name=h).encode()
166	# A blank line always separates headers from body
167	print >> self._fp
168
169	#
170	# Handlers for writing types and subtypes
171	#
172
173	def _handle_text(self, msg):
174	payload = msg.get_payload()
175	if payload is None:
176	return
177	if not isinstance(payload, basestring):
178	raise TypeError('string payload expected: %s' % type(payload))
179	if self._mangle_from_:
180	payload = fcre.sub('>From ', payload)
181	self._fp.write(payload)
182
183	# Default body handler
184	_writeBody = _handle_text
185
186	def _handle_multipart(self, msg):
187	# The trick here is to write out each part separately, merge them all
188	# together, and then make sure that the boundary we've chosen isn't
189	# present in the payload.
190	msgtexts = []
191	subparts = msg.get_payload()
192	if subparts is None:
193	subparts = []
194	elif isinstance(subparts, basestring):
195	# e.g. a non-strict parse of a message with no starting boundary.
196	self._fp.write(subparts)
197	return
198	elif not isinstance(subparts, list):
199	# Scalar payload
200	subparts = [subparts]
201	for part in subparts:
202	s = StringIO()
203	g = self.clone(s)
204	g.flatten(part, unixfrom=False)
205	msgtexts.append(s.getvalue())
206	# BAW: What about boundaries that are wrapped in double-quotes?
207	boundary = msg.get_boundary()
208	if not boundary:
209	# Create a boundary that doesn't appear in any of the
210	# message texts.
211	alltext = NL.join(msgtexts)
212	boundary = _make_boundary(alltext)
213	msg.set_boundary(boundary)
214	# If there's a preamble, write it out, with a trailing CRLF
215	if msg.preamble is not None:
216	if self._mangle_from_:
217	preamble = fcre.sub('>From ', msg.preamble)
218	else:
219	preamble = msg.preamble
220	print >> self._fp, preamble
221	# dash-boundary transport-padding CRLF
222	print >> self._fp, '--' + boundary
223	# body-part
224	if msgtexts:
225	self._fp.write(msgtexts.pop(0))
226	# *encapsulation
227	# --> delimiter transport-padding
228	# --> CRLF body-part
229	for body_part in msgtexts:
230	# delimiter transport-padding CRLF
231	print >> self._fp, '\n--' + boundary
232	# body-part
233	self._fp.write(body_part)
234	# close-delimiter transport-padding
235	self._fp.write('\n--' + boundary + '--')
236	if msg.epilogue is not None:
237	print >> self._fp
238	if self._mangle_from_:
239	epilogue = fcre.sub('>From ', msg.epilogue)
240	else:
241	epilogue = msg.epilogue
242	self._fp.write(epilogue)
243
244	def _handle_multipart_signed(self, msg):
245	# The contents of signed parts has to stay unmodified in order to keep
246	# the signature intact per RFC1847 2.1, so we disable header wrapping.
247	# RDM: This isn't enough to completely preserve the part, but it helps.
248	old_maxheaderlen = self._maxheaderlen
249	try:
250	self._maxheaderlen = 0
251	self._handle_multipart(msg)
252	finally:
253	self._maxheaderlen = old_maxheaderlen
254
255	def _handle_message_delivery_status(self, msg):
256	# We can't just write the headers directly to self's file object
257	# because this will leave an extra newline between the last header
258	# block and the boundary. Sigh.
259	blocks = []
260	for part in msg.get_payload():
261	s = StringIO()
262	g = self.clone(s)
263	g.flatten(part, unixfrom=False)
264	text = s.getvalue()
265	lines = text.split('\n')
266	# Strip off the unnecessary trailing empty line
267	if lines and lines[-1] == '':
268	blocks.append(NL.join(lines[:-1]))
269	else:
270	blocks.append(text)
271	# Now join all the blocks with an empty line. This has the lovely
272	# effect of separating each block with an empty line, but not adding
273	# an extra one after the last one.
274	self._fp.write(NL.join(blocks))
275
276	def _handle_message(self, msg):
277	s = StringIO()
278	g = self.clone(s)
279	# The payload of a message/rfc822 part should be a multipart sequence
280	# of length 1. The zeroth element of the list should be the Message
281	# object for the subpart. Extract that object, stringify it, and
282	# write it out.
283	# Except, it turns out, when it's a string instead, which happens when
284	# and only when HeaderParser is used on a message of mime type
285	# message/rfc822. Such messages are generated by, for example,
286	# Groupwise when forwarding unadorned messages. (Issue 7970.) So
287	# in that case we just emit the string body.
288	payload = msg.get_payload()
289	if isinstance(payload, list):
290	g.flatten(msg.get_payload(0), unixfrom=False)
291	payload = s.getvalue()
292	self._fp.write(payload)
293
294
295
296
297	_FMT = '[Non-text (%(type)s) part of message omitted, filename %(filename)s]'
298
299	class DecodedGenerator(Generator):
300	"""Generates a text representation of a message.
301
302	Like the Generator base class, except that non-text parts are substituted
303	with a format string representing the part.
304	"""
305	def __init__(self, outfp, mangle_from_=True, maxheaderlen=78, fmt=None):
306	"""Like Generator.__init__() except that an additional optional
307	argument is allowed.
308
309	Walks through all subparts of a message. If the subpart is of main
310	type `text', then it prints the decoded payload of the subpart.
311
312	Otherwise, fmt is a format string that is used instead of the message
313	payload. fmt is expanded with the following keywords (in
314	%(keyword)s format):
315
316	type : Full MIME type of the non-text part
317	maintype : Main MIME type of the non-text part
318	subtype : Sub-MIME type of the non-text part
319	filename : Filename of the non-text part
320	description: Description associated with the non-text part
321	encoding : Content transfer encoding of the non-text part
322
323	The default value for fmt is None, meaning
324
325	[Non-text (%(type)s) part of message omitted, filename %(filename)s]
326	"""
327	Generator.__init__(self, outfp, mangle_from_, maxheaderlen)
328	if fmt is None:
329	self._fmt = _FMT
330	else:
331	self._fmt = fmt
332
333	def _dispatch(self, msg):
334	for part in msg.walk():
335	maintype = part.get_content_maintype()
336	if maintype == 'text':
337	print >> self, part.get_payload(decode=True)
338	elif maintype == 'multipart':
339	# Just skip this
340	pass
341	else:
342	print >> self, self._fmt % {
343	'type' : part.get_content_type(),
344	'maintype' : part.get_content_maintype(),
345	'subtype' : part.get_content_subtype(),
346	'filename' : part.get_filename('[no filename]'),
347	'description': part.get('Content-Description',
348	'[no description]'),
349	'encoding' : part.get('Content-Transfer-Encoding',
350	'[no encoding]'),
351	}
352
353
354
355
356	# Helper
357	_width = len(repr(sys.maxint-1))
358	_fmt = '%%0%dd' % _width
359
360	def _make_boundary(text=None):
361	# Craft a random boundary. If text is given, ensure that the chosen
362	# boundary doesn't appear in the text.
363	token = random.randrange(sys.maxint)
364	boundary = ('=' * 15) + (_fmt % token) + '=='
365	if text is None:
366	return boundary
367	b = boundary
368	counter = 0
369	while True:
370	cre = re.compile('^--' + re.escape(b) + '(--)?$', re.MULTILINE)
371	if not cre.search(text):
372	break
373	b = boundary + '.' + str(counter)
374	counter += 1
375	return b

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/email/generator.py

Download in other formats: