Context Navigation

utils.py@ 1538

Last change on this file since 1538 was 391, checked in by dmik, 12 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 9.6 KB

Line
1	# Copyright (C) 2001-2010 Python Software Foundation
2	# Author: Barry Warsaw
3	# Contact: email-sig@python.org
4
5	"""Miscellaneous utilities."""
6
7	__all__ = [
8	'collapse_rfc2231_value',
9	'decode_params',
10	'decode_rfc2231',
11	'encode_rfc2231',
12	'formataddr',
13	'formatdate',
14	'getaddresses',
15	'make_msgid',
16	'mktime_tz',
17	'parseaddr',
18	'parsedate',
19	'parsedate_tz',
20	'unquote',
21	]
22
23	import os
24	import re
25	import time
26	import base64
27	import random
28	import socket
29	import urllib
30	import warnings
31
32	from email._parseaddr import quote
33	from email._parseaddr import AddressList as _AddressList
34	from email._parseaddr import mktime_tz
35
36	# We need wormarounds for bugs in these methods in older Pythons (see below)
37	from email._parseaddr import parsedate as _parsedate
38	from email._parseaddr import parsedate_tz as _parsedate_tz
39
40	from quopri import decodestring as _qdecode
41
42	# Intrapackage imports
43	from email.encoders import _bencode, _qencode
44
45	COMMASPACE = ', '
46	EMPTYSTRING = ''
47	UEMPTYSTRING = u''
48	CRLF = '\r\n'
49	TICK = "'"
50
51	specialsre = re.compile(r'[][\\()<>@,:;".]')
52	escapesre = re.compile(r'[][\\()"]')
53
54
55
56
57	# Helpers
58
59	def _identity(s):
60	return s
61
62
63	def _bdecode(s):
64	"""Decodes a base64 string.
65
66	This function is equivalent to base64.decodestring and it's retained only
67	for backward compatibility. It used to remove the last \\n of the decoded
68	string, if it had any (see issue 7143).
69	"""
70	if not s:
71	return s
72	return base64.decodestring(s)
73
74
75
76
77	def fix_eols(s):
78	"""Replace all line-ending characters with \\r\\n."""
79	# Fix newlines with no preceding carriage return
80	s = re.sub(r'(?<!\r)\n', CRLF, s)
81	# Fix carriage returns with no following newline
82	s = re.sub(r'\r(?!\n)', CRLF, s)
83	return s
84
85
86
87
88	def formataddr(pair):
89	"""The inverse of parseaddr(), this takes a 2-tuple of the form
90	(realname, email_address) and returns the string value suitable
91	for an RFC 2822 From, To or Cc header.
92
93	If the first element of pair is false, then the second element is
94	returned unmodified.
95	"""
96	name, address = pair
97	if name:
98	quotes = ''
99	if specialsre.search(name):
100	quotes = '"'
101	name = escapesre.sub(r'\\\g<0>', name)
102	return '%s%s%s <%s>' % (quotes, name, quotes, address)
103	return address
104
105
106
107
108	def getaddresses(fieldvalues):
109	"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
110	all = COMMASPACE.join(fieldvalues)
111	a = _AddressList(all)
112	return a.addresslist
113
114
115
116
117	ecre = re.compile(r'''
118	=\? # literal =?
119	(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
120	\? # literal ?
121	(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
122	\? # literal ?
123	(?P<atom>.*?) # non-greedy up to the next ?= is the atom
124	\?= # literal ?=
125	''', re.VERBOSE \| re.IGNORECASE)
126
127
128
129
130	def formatdate(timeval=None, localtime=False, usegmt=False):
131	"""Returns a date string as specified by RFC 2822, e.g.:
132
133	Fri, 09 Nov 2001 01:08:47 -0000
134
135	Optional timeval if given is a floating point time value as accepted by
136	gmtime() and localtime(), otherwise the current time is used.
137
138	Optional localtime is a flag that when True, interprets timeval, and
139	returns a date relative to the local timezone instead of UTC, properly
140	taking daylight savings time into account.
141
142	Optional argument usegmt means that the timezone is written out as
143	an ascii string, not numeric one (so "GMT" instead of "+0000"). This
144	is needed for HTTP, and is only used when localtime==False.
145	"""
146	# Note: we cannot use strftime() because that honors the locale and RFC
147	# 2822 requires that day and month names be the English abbreviations.
148	if timeval is None:
149	timeval = time.time()
150	if localtime:
151	now = time.localtime(timeval)
152	# Calculate timezone offset, based on whether the local zone has
153	# daylight savings time, and whether DST is in effect.
154	if time.daylight and now[-1]:
155	offset = time.altzone
156	else:
157	offset = time.timezone
158	hours, minutes = divmod(abs(offset), 3600)
159	# Remember offset is in seconds west of UTC, but the timezone is in
160	# minutes east of UTC, so the signs differ.
161	if offset > 0:
162	sign = '-'
163	else:
164	sign = '+'
165	zone = '%s%02d%02d' % (sign, hours, minutes // 60)
166	else:
167	now = time.gmtime(timeval)
168	# Timezone offset is always -0000
169	if usegmt:
170	zone = 'GMT'
171	else:
172	zone = '-0000'
173	return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
174	['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
175	now[2],
176	['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
177	'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
178	now[0], now[3], now[4], now[5],
179	zone)
180
181
182
183
184	def make_msgid(idstring=None):
185	"""Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
186
187	<20020201195627.33539.96671@nightshade.la.mastaler.com>
188
189	Optional idstring if given is a string used to strengthen the
190	uniqueness of the message id.
191	"""
192	timeval = time.time()
193	utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
194	pid = os.getpid()
195	randint = random.randrange(100000)
196	if idstring is None:
197	idstring = ''
198	else:
199	idstring = '.' + idstring
200	idhost = socket.getfqdn()
201	msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
202	return msgid
203
204
205
206
207	# These functions are in the standalone mimelib version only because they've
208	# subsequently been fixed in the latest Python versions. We use this to worm
209	# around broken older Pythons.
210	def parsedate(data):
211	if not data:
212	return None
213	return _parsedate(data)
214
215
216	def parsedate_tz(data):
217	if not data:
218	return None
219	return _parsedate_tz(data)
220
221
222	def parseaddr(addr):
223	addrs = _AddressList(addr).addresslist
224	if not addrs:
225	return '', ''
226	return addrs[0]
227
228
229	# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
230	def unquote(str):
231	"""Remove quotes from a string."""
232	if len(str) > 1:
233	if str.startswith('"') and str.endswith('"'):
234	return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
235	if str.startswith('<') and str.endswith('>'):
236	return str[1:-1]
237	return str
238
239
240
241
242	# RFC2231-related functions - parameter encoding and decoding
243	def decode_rfc2231(s):
244	"""Decode string according to RFC 2231"""
245	parts = s.split(TICK, 2)
246	if len(parts) <= 2:
247	return None, None, s
248	return parts
249
250
251	def encode_rfc2231(s, charset=None, language=None):
252	"""Encode string according to RFC 2231.
253
254	If neither charset nor language is given, then s is returned as-is. If
255	charset is given but not language, the string is encoded using the empty
256	string for language.
257	"""
258	import urllib
259	s = urllib.quote(s, safe='')
260	if charset is None and language is None:
261	return s
262	if language is None:
263	language = ''
264	return "%s'%s'%s" % (charset, language, s)
265
266
267	rfc2231_continuation = re.compile(r'^(?P<name>\w+)\((?P<num>[0-9]+)\?)?$')
268
269	def decode_params(params):
270	"""Decode parameters list according to RFC 2231.
271
272	params is a sequence of 2-tuples containing (param name, string value).
273	"""
274	# Copy params so we don't mess with the original
275	params = params[:]
276	new_params = []
277	# Map parameter's name to a list of continuations. The values are a
278	# 3-tuple of the continuation number, the string value, and a flag
279	# specifying whether a particular segment is %-encoded.
280	rfc2231_params = {}
281	name, value = params.pop(0)
282	new_params.append((name, value))
283	while params:
284	name, value = params.pop(0)
285	if name.endswith('*'):
286	encoded = True
287	else:
288	encoded = False
289	value = unquote(value)
290	mo = rfc2231_continuation.match(name)
291	if mo:
292	name, num = mo.group('name', 'num')
293	if num is not None:
294	num = int(num)
295	rfc2231_params.setdefault(name, []).append((num, value, encoded))
296	else:
297	new_params.append((name, '"%s"' % quote(value)))
298	if rfc2231_params:
299	for name, continuations in rfc2231_params.items():
300	value = []
301	extended = False
302	# Sort by number
303	continuations.sort()
304	# And now append all values in numerical order, converting
305	# %-encodings for the encoded segments. If any of the
306	# continuation names ends in a *, then the entire string, after
307	# decoding segments and concatenating, must have the charset and
308	# language specifiers at the beginning of the string.
309	for num, s, encoded in continuations:
310	if encoded:
311	s = urllib.unquote(s)
312	extended = True
313	value.append(s)
314	value = quote(EMPTYSTRING.join(value))
315	if extended:
316	charset, language, value = decode_rfc2231(value)
317	new_params.append((name, (charset, language, '"%s"' % value)))
318	else:
319	new_params.append((name, '"%s"' % value))
320	return new_params
321
322	def collapse_rfc2231_value(value, errors='replace',
323	fallback_charset='us-ascii'):
324	if isinstance(value, tuple):
325	rawval = unquote(value[2])
326	charset = value[0] or 'us-ascii'
327	try:
328	return unicode(rawval, charset, errors)
329	except LookupError:
330	# XXX charset is unknown to Python.
331	return unicode(rawval, fallback_charset, errors)
332	else:
333	return unquote(value)

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/email/utils.py@ 1538

Download in other formats: