source: python/trunk/Lib/email/utils.py@ 779

Last change on this file since 779 was 391, checked in by dmik, 11 years ago

python: Merge vendor 2.7.6 to trunk.

  • Property svn:eol-style set to native
File size: 9.6 KB
Line 
1# Copyright (C) 2001-2010 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Miscellaneous utilities."""
6
7__all__ = [
8 'collapse_rfc2231_value',
9 'decode_params',
10 'decode_rfc2231',
11 'encode_rfc2231',
12 'formataddr',
13 'formatdate',
14 'getaddresses',
15 'make_msgid',
16 'mktime_tz',
17 'parseaddr',
18 'parsedate',
19 'parsedate_tz',
20 'unquote',
21 ]
22
23import os
24import re
25import time
26import base64
27import random
28import socket
29import urllib
30import warnings
31
32from email._parseaddr import quote
33from email._parseaddr import AddressList as _AddressList
34from email._parseaddr import mktime_tz
35
36# We need wormarounds for bugs in these methods in older Pythons (see below)
37from email._parseaddr import parsedate as _parsedate
38from email._parseaddr import parsedate_tz as _parsedate_tz
39
40from quopri import decodestring as _qdecode
41
42# Intrapackage imports
43from email.encoders import _bencode, _qencode
44
45COMMASPACE = ', '
46EMPTYSTRING = ''
47UEMPTYSTRING = u''
48CRLF = '\r\n'
49TICK = "'"
50
51specialsre = re.compile(r'[][\\()<>@,:;".]')
52escapesre = re.compile(r'[][\\()"]')
53
54
55
56
57# Helpers
58
59def _identity(s):
60 return s
61
62
63def _bdecode(s):
64 """Decodes a base64 string.
65
66 This function is equivalent to base64.decodestring and it's retained only
67 for backward compatibility. It used to remove the last \\n of the decoded
68 string, if it had any (see issue 7143).
69 """
70 if not s:
71 return s
72 return base64.decodestring(s)
73
74
75
76
77def fix_eols(s):
78 """Replace all line-ending characters with \\r\\n."""
79 # Fix newlines with no preceding carriage return
80 s = re.sub(r'(?<!\r)\n', CRLF, s)
81 # Fix carriage returns with no following newline
82 s = re.sub(r'\r(?!\n)', CRLF, s)
83 return s
84
85
86
87
88def formataddr(pair):
89 """The inverse of parseaddr(), this takes a 2-tuple of the form
90 (realname, email_address) and returns the string value suitable
91 for an RFC 2822 From, To or Cc header.
92
93 If the first element of pair is false, then the second element is
94 returned unmodified.
95 """
96 name, address = pair
97 if name:
98 quotes = ''
99 if specialsre.search(name):
100 quotes = '"'
101 name = escapesre.sub(r'\\\g<0>', name)
102 return '%s%s%s <%s>' % (quotes, name, quotes, address)
103 return address
104
105
106
107
108def getaddresses(fieldvalues):
109 """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
110 all = COMMASPACE.join(fieldvalues)
111 a = _AddressList(all)
112 return a.addresslist
113
114
115
116
117ecre = re.compile(r'''
118 =\? # literal =?
119 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
120 \? # literal ?
121 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
122 \? # literal ?
123 (?P<atom>.*?) # non-greedy up to the next ?= is the atom
124 \?= # literal ?=
125 ''', re.VERBOSE | re.IGNORECASE)
126
127
128
129
130def formatdate(timeval=None, localtime=False, usegmt=False):
131 """Returns a date string as specified by RFC 2822, e.g.:
132
133 Fri, 09 Nov 2001 01:08:47 -0000
134
135 Optional timeval if given is a floating point time value as accepted by
136 gmtime() and localtime(), otherwise the current time is used.
137
138 Optional localtime is a flag that when True, interprets timeval, and
139 returns a date relative to the local timezone instead of UTC, properly
140 taking daylight savings time into account.
141
142 Optional argument usegmt means that the timezone is written out as
143 an ascii string, not numeric one (so "GMT" instead of "+0000"). This
144 is needed for HTTP, and is only used when localtime==False.
145 """
146 # Note: we cannot use strftime() because that honors the locale and RFC
147 # 2822 requires that day and month names be the English abbreviations.
148 if timeval is None:
149 timeval = time.time()
150 if localtime:
151 now = time.localtime(timeval)
152 # Calculate timezone offset, based on whether the local zone has
153 # daylight savings time, and whether DST is in effect.
154 if time.daylight and now[-1]:
155 offset = time.altzone
156 else:
157 offset = time.timezone
158 hours, minutes = divmod(abs(offset), 3600)
159 # Remember offset is in seconds west of UTC, but the timezone is in
160 # minutes east of UTC, so the signs differ.
161 if offset > 0:
162 sign = '-'
163 else:
164 sign = '+'
165 zone = '%s%02d%02d' % (sign, hours, minutes // 60)
166 else:
167 now = time.gmtime(timeval)
168 # Timezone offset is always -0000
169 if usegmt:
170 zone = 'GMT'
171 else:
172 zone = '-0000'
173 return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
174 ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
175 now[2],
176 ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
177 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
178 now[0], now[3], now[4], now[5],
179 zone)
180
181
182
183
184def make_msgid(idstring=None):
185 """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
186
187 <20020201195627.33539.96671@nightshade.la.mastaler.com>
188
189 Optional idstring if given is a string used to strengthen the
190 uniqueness of the message id.
191 """
192 timeval = time.time()
193 utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
194 pid = os.getpid()
195 randint = random.randrange(100000)
196 if idstring is None:
197 idstring = ''
198 else:
199 idstring = '.' + idstring
200 idhost = socket.getfqdn()
201 msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
202 return msgid
203
204
205
206
207# These functions are in the standalone mimelib version only because they've
208# subsequently been fixed in the latest Python versions. We use this to worm
209# around broken older Pythons.
210def parsedate(data):
211 if not data:
212 return None
213 return _parsedate(data)
214
215
216def parsedate_tz(data):
217 if not data:
218 return None
219 return _parsedate_tz(data)
220
221
222def parseaddr(addr):
223 addrs = _AddressList(addr).addresslist
224 if not addrs:
225 return '', ''
226 return addrs[0]
227
228
229# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
230def unquote(str):
231 """Remove quotes from a string."""
232 if len(str) > 1:
233 if str.startswith('"') and str.endswith('"'):
234 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
235 if str.startswith('<') and str.endswith('>'):
236 return str[1:-1]
237 return str
238
239
240
241
242# RFC2231-related functions - parameter encoding and decoding
243def decode_rfc2231(s):
244 """Decode string according to RFC 2231"""
245 parts = s.split(TICK, 2)
246 if len(parts) <= 2:
247 return None, None, s
248 return parts
249
250
251def encode_rfc2231(s, charset=None, language=None):
252 """Encode string according to RFC 2231.
253
254 If neither charset nor language is given, then s is returned as-is. If
255 charset is given but not language, the string is encoded using the empty
256 string for language.
257 """
258 import urllib
259 s = urllib.quote(s, safe='')
260 if charset is None and language is None:
261 return s
262 if language is None:
263 language = ''
264 return "%s'%s'%s" % (charset, language, s)
265
266
267rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
268
269def decode_params(params):
270 """Decode parameters list according to RFC 2231.
271
272 params is a sequence of 2-tuples containing (param name, string value).
273 """
274 # Copy params so we don't mess with the original
275 params = params[:]
276 new_params = []
277 # Map parameter's name to a list of continuations. The values are a
278 # 3-tuple of the continuation number, the string value, and a flag
279 # specifying whether a particular segment is %-encoded.
280 rfc2231_params = {}
281 name, value = params.pop(0)
282 new_params.append((name, value))
283 while params:
284 name, value = params.pop(0)
285 if name.endswith('*'):
286 encoded = True
287 else:
288 encoded = False
289 value = unquote(value)
290 mo = rfc2231_continuation.match(name)
291 if mo:
292 name, num = mo.group('name', 'num')
293 if num is not None:
294 num = int(num)
295 rfc2231_params.setdefault(name, []).append((num, value, encoded))
296 else:
297 new_params.append((name, '"%s"' % quote(value)))
298 if rfc2231_params:
299 for name, continuations in rfc2231_params.items():
300 value = []
301 extended = False
302 # Sort by number
303 continuations.sort()
304 # And now append all values in numerical order, converting
305 # %-encodings for the encoded segments. If any of the
306 # continuation names ends in a *, then the entire string, after
307 # decoding segments and concatenating, must have the charset and
308 # language specifiers at the beginning of the string.
309 for num, s, encoded in continuations:
310 if encoded:
311 s = urllib.unquote(s)
312 extended = True
313 value.append(s)
314 value = quote(EMPTYSTRING.join(value))
315 if extended:
316 charset, language, value = decode_rfc2231(value)
317 new_params.append((name, (charset, language, '"%s"' % value)))
318 else:
319 new_params.append((name, '"%s"' % value))
320 return new_params
321
322def collapse_rfc2231_value(value, errors='replace',
323 fallback_charset='us-ascii'):
324 if isinstance(value, tuple):
325 rawval = unquote(value[2])
326 charset = value[0] or 'us-ascii'
327 try:
328 return unicode(rawval, charset, errors)
329 except LookupError:
330 # XXX charset is unknown to Python.
331 return unicode(rawval, fallback_charset, errors)
332 else:
333 return unquote(value)
Note: See TracBrowser for help on using the repository browser.