Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

utils.py@ 380

Last change on this file since 380 was 2, checked in by Yuri Dario, 15 years ago
Initial import for vendor code.
Property svn:eol-style set to `native`
File size: 9.6 KB

Line
1	# Copyright (C) 2001-2009 Python Software Foundation
2	# Author: Barry Warsaw
3	# Contact: email-sig@python.org
4
5	"""Miscellaneous utilities."""
6
7	__all__ = [
8	'collapse_rfc2231_value',
9	'decode_params',
10	'decode_rfc2231',
11	'encode_rfc2231',
12	'formataddr',
13	'formatdate',
14	'getaddresses',
15	'make_msgid',
16	'mktime_tz',
17	'parseaddr',
18	'parsedate',
19	'parsedate_tz',
20	'unquote',
21	]
22
23	import os
24	import re
25	import time
26	import base64
27	import random
28	import socket
29	import urllib
30	import warnings
31
32	from email._parseaddr import quote
33	from email._parseaddr import AddressList as _AddressList
34	from email._parseaddr import mktime_tz
35
36	# We need wormarounds for bugs in these methods in older Pythons (see below)
37	from email._parseaddr import parsedate as _parsedate
38	from email._parseaddr import parsedate_tz as _parsedate_tz
39
40	from quopri import decodestring as _qdecode
41
42	# Intrapackage imports
43	from email.encoders import _bencode, _qencode
44
45	COMMASPACE = ', '
46	EMPTYSTRING = ''
47	UEMPTYSTRING = u''
48	CRLF = '\r\n'
49	TICK = "'"
50
51	specialsre = re.compile(r'[][\\()<>@,:;".]')
52	escapesre = re.compile(r'[][\\()"]')
53
54
55
56
57	# Helpers
58
59	def _identity(s):
60	return s
61
62
63	def _bdecode(s):
64	# We can't quite use base64.encodestring() since it tacks on a "courtesy
65	# newline". Blech!
66	if not s:
67	return s
68	value = base64.decodestring(s)
69	if not s.endswith('\n') and value.endswith('\n'):
70	return value[:-1]
71	return value
72
73
74
75
76	def fix_eols(s):
77	"""Replace all line-ending characters with \r\n."""
78	# Fix newlines with no preceding carriage return
79	s = re.sub(r'(?<!\r)\n', CRLF, s)
80	# Fix carriage returns with no following newline
81	s = re.sub(r'\r(?!\n)', CRLF, s)
82	return s
83
84
85
86
87	def formataddr(pair):
88	"""The inverse of parseaddr(), this takes a 2-tuple of the form
89	(realname, email_address) and returns the string value suitable
90	for an RFC 2822 From, To or Cc header.
91
92	If the first element of pair is false, then the second element is
93	returned unmodified.
94	"""
95	name, address = pair
96	if name:
97	quotes = ''
98	if specialsre.search(name):
99	quotes = '"'
100	name = escapesre.sub(r'\\\g<0>', name)
101	return '%s%s%s <%s>' % (quotes, name, quotes, address)
102	return address
103
104
105
106
107	def getaddresses(fieldvalues):
108	"""Return a list of (REALNAME, EMAIL) for each fieldvalue."""
109	all = COMMASPACE.join(fieldvalues)
110	a = _AddressList(all)
111	return a.addresslist
112
113
114
115
116	ecre = re.compile(r'''
117	=\? # literal =?
118	(?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
119	\? # literal ?
120	(?P<encoding>[qb]) # either a "q" or a "b", case insensitive
121	\? # literal ?
122	(?P<atom>.*?) # non-greedy up to the next ?= is the atom
123	\?= # literal ?=
124	''', re.VERBOSE \| re.IGNORECASE)
125
126
127
128
129	def formatdate(timeval=None, localtime=False, usegmt=False):
130	"""Returns a date string as specified by RFC 2822, e.g.:
131
132	Fri, 09 Nov 2001 01:08:47 -0000
133
134	Optional timeval if given is a floating point time value as accepted by
135	gmtime() and localtime(), otherwise the current time is used.
136
137	Optional localtime is a flag that when True, interprets timeval, and
138	returns a date relative to the local timezone instead of UTC, properly
139	taking daylight savings time into account.
140
141	Optional argument usegmt means that the timezone is written out as
142	an ascii string, not numeric one (so "GMT" instead of "+0000"). This
143	is needed for HTTP, and is only used when localtime==False.
144	"""
145	# Note: we cannot use strftime() because that honors the locale and RFC
146	# 2822 requires that day and month names be the English abbreviations.
147	if timeval is None:
148	timeval = time.time()
149	if localtime:
150	now = time.localtime(timeval)
151	# Calculate timezone offset, based on whether the local zone has
152	# daylight savings time, and whether DST is in effect.
153	if time.daylight and now[-1]:
154	offset = time.altzone
155	else:
156	offset = time.timezone
157	hours, minutes = divmod(abs(offset), 3600)
158	# Remember offset is in seconds west of UTC, but the timezone is in
159	# minutes east of UTC, so the signs differ.
160	if offset > 0:
161	sign = '-'
162	else:
163	sign = '+'
164	zone = '%s%02d%02d' % (sign, hours, minutes // 60)
165	else:
166	now = time.gmtime(timeval)
167	# Timezone offset is always -0000
168	if usegmt:
169	zone = 'GMT'
170	else:
171	zone = '-0000'
172	return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
173	['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
174	now[2],
175	['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
176	'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
177	now[0], now[3], now[4], now[5],
178	zone)
179
180
181
182
183	def make_msgid(idstring=None):
184	"""Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
185
186	<20020201195627.33539.96671@nightshade.la.mastaler.com>
187
188	Optional idstring if given is a string used to strengthen the
189	uniqueness of the message id.
190	"""
191	timeval = time.time()
192	utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
193	pid = os.getpid()
194	randint = random.randrange(100000)
195	if idstring is None:
196	idstring = ''
197	else:
198	idstring = '.' + idstring
199	idhost = socket.getfqdn()
200	msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
201	return msgid
202
203
204
205
206	# These functions are in the standalone mimelib version only because they've
207	# subsequently been fixed in the latest Python versions. We use this to worm
208	# around broken older Pythons.
209	def parsedate(data):
210	if not data:
211	return None
212	return _parsedate(data)
213
214
215	def parsedate_tz(data):
216	if not data:
217	return None
218	return _parsedate_tz(data)
219
220
221	def parseaddr(addr):
222	addrs = _AddressList(addr).addresslist
223	if not addrs:
224	return '', ''
225	return addrs[0]
226
227
228	# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
229	def unquote(str):
230	"""Remove quotes from a string."""
231	if len(str) > 1:
232	if str.startswith('"') and str.endswith('"'):
233	return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
234	if str.startswith('<') and str.endswith('>'):
235	return str[1:-1]
236	return str
237
238
239
240
241	# RFC2231-related functions - parameter encoding and decoding
242	def decode_rfc2231(s):
243	"""Decode string according to RFC 2231"""
244	parts = s.split(TICK, 2)
245	if len(parts) <= 2:
246	return None, None, s
247	return parts
248
249
250	def encode_rfc2231(s, charset=None, language=None):
251	"""Encode string according to RFC 2231.
252
253	If neither charset nor language is given, then s is returned as-is. If
254	charset is given but not language, the string is encoded using the empty
255	string for language.
256	"""
257	import urllib
258	s = urllib.quote(s, safe='')
259	if charset is None and language is None:
260	return s
261	if language is None:
262	language = ''
263	return "%s'%s'%s" % (charset, language, s)
264
265
266	rfc2231_continuation = re.compile(r'^(?P<name>\w+)\((?P<num>[0-9]+)\?)?$')
267
268	def decode_params(params):
269	"""Decode parameters list according to RFC 2231.
270
271	params is a sequence of 2-tuples containing (param name, string value).
272	"""
273	# Copy params so we don't mess with the original
274	params = params[:]
275	new_params = []
276	# Map parameter's name to a list of continuations. The values are a
277	# 3-tuple of the continuation number, the string value, and a flag
278	# specifying whether a particular segment is %-encoded.
279	rfc2231_params = {}
280	name, value = params.pop(0)
281	new_params.append((name, value))
282	while params:
283	name, value = params.pop(0)
284	if name.endswith('*'):
285	encoded = True
286	else:
287	encoded = False
288	value = unquote(value)
289	mo = rfc2231_continuation.match(name)
290	if mo:
291	name, num = mo.group('name', 'num')
292	if num is not None:
293	num = int(num)
294	rfc2231_params.setdefault(name, []).append((num, value, encoded))
295	else:
296	new_params.append((name, '"%s"' % quote(value)))
297	if rfc2231_params:
298	for name, continuations in rfc2231_params.items():
299	value = []
300	extended = False
301	# Sort by number
302	continuations.sort()
303	# And now append all values in numerical order, converting
304	# %-encodings for the encoded segments. If any of the
305	# continuation names ends in a *, then the entire string, after
306	# decoding segments and concatenating, must have the charset and
307	# language specifiers at the beginning of the string.
308	for num, s, encoded in continuations:
309	if encoded:
310	s = urllib.unquote(s)
311	extended = True
312	value.append(s)
313	value = quote(EMPTYSTRING.join(value))
314	if extended:
315	charset, language, value = decode_rfc2231(value)
316	new_params.append((name, (charset, language, '"%s"' % value)))
317	else:
318	new_params.append((name, '"%s"' % value))
319	return new_params
320
321	def collapse_rfc2231_value(value, errors='replace',
322	fallback_charset='us-ascii'):
323	if isinstance(value, tuple):
324	rawval = unquote(value[2])
325	charset = value[0] or 'us-ascii'
326	try:
327	return unicode(rawval, charset, errors)
328	except LookupError:
329	# XXX charset is unknown to Python.
330	return unicode(rawval, fallback_charset, errors)
331	else:
332	return unquote(value)

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/email/utils.py@ 380

Download in other formats: