source: python/trunk/Lib/email/utils.py@ 27

Last change on this file since 27 was 2, checked in by Yuri Dario, 15 years ago

Initial import for vendor code.

  • Property svn:eol-style set to native
File size: 9.6 KB
Line 
1# Copyright (C) 2001-2009 Python Software Foundation
2# Author: Barry Warsaw
3# Contact: email-sig@python.org
4
5"""Miscellaneous utilities."""
6
7__all__ = [
8 'collapse_rfc2231_value',
9 'decode_params',
10 'decode_rfc2231',
11 'encode_rfc2231',
12 'formataddr',
13 'formatdate',
14 'getaddresses',
15 'make_msgid',
16 'mktime_tz',
17 'parseaddr',
18 'parsedate',
19 'parsedate_tz',
20 'unquote',
21 ]
22
23import os
24import re
25import time
26import base64
27import random
28import socket
29import urllib
30import warnings
31
32from email._parseaddr import quote
33from email._parseaddr import AddressList as _AddressList
34from email._parseaddr import mktime_tz
35
36# We need wormarounds for bugs in these methods in older Pythons (see below)
37from email._parseaddr import parsedate as _parsedate
38from email._parseaddr import parsedate_tz as _parsedate_tz
39
40from quopri import decodestring as _qdecode
41
42# Intrapackage imports
43from email.encoders import _bencode, _qencode
44
45COMMASPACE = ', '
46EMPTYSTRING = ''
47UEMPTYSTRING = u''
48CRLF = '\r\n'
49TICK = "'"
50
51specialsre = re.compile(r'[][\\()<>@,:;".]')
52escapesre = re.compile(r'[][\\()"]')
53
54
55
56
57# Helpers
58
59def _identity(s):
60 return s
61
62
63def _bdecode(s):
64 # We can't quite use base64.encodestring() since it tacks on a "courtesy
65 # newline". Blech!
66 if not s:
67 return s
68 value = base64.decodestring(s)
69 if not s.endswith('\n') and value.endswith('\n'):
70 return value[:-1]
71 return value
72
73
74
75
76def fix_eols(s):
77 """Replace all line-ending characters with \r\n."""
78 # Fix newlines with no preceding carriage return
79 s = re.sub(r'(?<!\r)\n', CRLF, s)
80 # Fix carriage returns with no following newline
81 s = re.sub(r'\r(?!\n)', CRLF, s)
82 return s
83
84
85
86
87def formataddr(pair):
88 """The inverse of parseaddr(), this takes a 2-tuple of the form
89 (realname, email_address) and returns the string value suitable
90 for an RFC 2822 From, To or Cc header.
91
92 If the first element of pair is false, then the second element is
93 returned unmodified.
94 """
95 name, address = pair
96 if name:
97 quotes = ''
98 if specialsre.search(name):
99 quotes = '"'
100 name = escapesre.sub(r'\\\g<0>', name)
101 return '%s%s%s <%s>' % (quotes, name, quotes, address)
102 return address
103
104
105
106
107def getaddresses(fieldvalues):
108 """Return a list of (REALNAME, EMAIL) for each fieldvalue."""
109 all = COMMASPACE.join(fieldvalues)
110 a = _AddressList(all)
111 return a.addresslist
112
113
114
115
116ecre = re.compile(r'''
117 =\? # literal =?
118 (?P<charset>[^?]*?) # non-greedy up to the next ? is the charset
119 \? # literal ?
120 (?P<encoding>[qb]) # either a "q" or a "b", case insensitive
121 \? # literal ?
122 (?P<atom>.*?) # non-greedy up to the next ?= is the atom
123 \?= # literal ?=
124 ''', re.VERBOSE | re.IGNORECASE)
125
126
127
128
129def formatdate(timeval=None, localtime=False, usegmt=False):
130 """Returns a date string as specified by RFC 2822, e.g.:
131
132 Fri, 09 Nov 2001 01:08:47 -0000
133
134 Optional timeval if given is a floating point time value as accepted by
135 gmtime() and localtime(), otherwise the current time is used.
136
137 Optional localtime is a flag that when True, interprets timeval, and
138 returns a date relative to the local timezone instead of UTC, properly
139 taking daylight savings time into account.
140
141 Optional argument usegmt means that the timezone is written out as
142 an ascii string, not numeric one (so "GMT" instead of "+0000"). This
143 is needed for HTTP, and is only used when localtime==False.
144 """
145 # Note: we cannot use strftime() because that honors the locale and RFC
146 # 2822 requires that day and month names be the English abbreviations.
147 if timeval is None:
148 timeval = time.time()
149 if localtime:
150 now = time.localtime(timeval)
151 # Calculate timezone offset, based on whether the local zone has
152 # daylight savings time, and whether DST is in effect.
153 if time.daylight and now[-1]:
154 offset = time.altzone
155 else:
156 offset = time.timezone
157 hours, minutes = divmod(abs(offset), 3600)
158 # Remember offset is in seconds west of UTC, but the timezone is in
159 # minutes east of UTC, so the signs differ.
160 if offset > 0:
161 sign = '-'
162 else:
163 sign = '+'
164 zone = '%s%02d%02d' % (sign, hours, minutes // 60)
165 else:
166 now = time.gmtime(timeval)
167 # Timezone offset is always -0000
168 if usegmt:
169 zone = 'GMT'
170 else:
171 zone = '-0000'
172 return '%s, %02d %s %04d %02d:%02d:%02d %s' % (
173 ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun'][now[6]],
174 now[2],
175 ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
176 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'][now[1] - 1],
177 now[0], now[3], now[4], now[5],
178 zone)
179
180
181
182
183def make_msgid(idstring=None):
184 """Returns a string suitable for RFC 2822 compliant Message-ID, e.g:
185
186 <20020201195627.33539.96671@nightshade.la.mastaler.com>
187
188 Optional idstring if given is a string used to strengthen the
189 uniqueness of the message id.
190 """
191 timeval = time.time()
192 utcdate = time.strftime('%Y%m%d%H%M%S', time.gmtime(timeval))
193 pid = os.getpid()
194 randint = random.randrange(100000)
195 if idstring is None:
196 idstring = ''
197 else:
198 idstring = '.' + idstring
199 idhost = socket.getfqdn()
200 msgid = '<%s.%s.%s%s@%s>' % (utcdate, pid, randint, idstring, idhost)
201 return msgid
202
203
204
205
206# These functions are in the standalone mimelib version only because they've
207# subsequently been fixed in the latest Python versions. We use this to worm
208# around broken older Pythons.
209def parsedate(data):
210 if not data:
211 return None
212 return _parsedate(data)
213
214
215def parsedate_tz(data):
216 if not data:
217 return None
218 return _parsedate_tz(data)
219
220
221def parseaddr(addr):
222 addrs = _AddressList(addr).addresslist
223 if not addrs:
224 return '', ''
225 return addrs[0]
226
227
228# rfc822.unquote() doesn't properly de-backslash-ify in Python pre-2.3.
229def unquote(str):
230 """Remove quotes from a string."""
231 if len(str) > 1:
232 if str.startswith('"') and str.endswith('"'):
233 return str[1:-1].replace('\\\\', '\\').replace('\\"', '"')
234 if str.startswith('<') and str.endswith('>'):
235 return str[1:-1]
236 return str
237
238
239
240
241# RFC2231-related functions - parameter encoding and decoding
242def decode_rfc2231(s):
243 """Decode string according to RFC 2231"""
244 parts = s.split(TICK, 2)
245 if len(parts) <= 2:
246 return None, None, s
247 return parts
248
249
250def encode_rfc2231(s, charset=None, language=None):
251 """Encode string according to RFC 2231.
252
253 If neither charset nor language is given, then s is returned as-is. If
254 charset is given but not language, the string is encoded using the empty
255 string for language.
256 """
257 import urllib
258 s = urllib.quote(s, safe='')
259 if charset is None and language is None:
260 return s
261 if language is None:
262 language = ''
263 return "%s'%s'%s" % (charset, language, s)
264
265
266rfc2231_continuation = re.compile(r'^(?P<name>\w+)\*((?P<num>[0-9]+)\*?)?$')
267
268def decode_params(params):
269 """Decode parameters list according to RFC 2231.
270
271 params is a sequence of 2-tuples containing (param name, string value).
272 """
273 # Copy params so we don't mess with the original
274 params = params[:]
275 new_params = []
276 # Map parameter's name to a list of continuations. The values are a
277 # 3-tuple of the continuation number, the string value, and a flag
278 # specifying whether a particular segment is %-encoded.
279 rfc2231_params = {}
280 name, value = params.pop(0)
281 new_params.append((name, value))
282 while params:
283 name, value = params.pop(0)
284 if name.endswith('*'):
285 encoded = True
286 else:
287 encoded = False
288 value = unquote(value)
289 mo = rfc2231_continuation.match(name)
290 if mo:
291 name, num = mo.group('name', 'num')
292 if num is not None:
293 num = int(num)
294 rfc2231_params.setdefault(name, []).append((num, value, encoded))
295 else:
296 new_params.append((name, '"%s"' % quote(value)))
297 if rfc2231_params:
298 for name, continuations in rfc2231_params.items():
299 value = []
300 extended = False
301 # Sort by number
302 continuations.sort()
303 # And now append all values in numerical order, converting
304 # %-encodings for the encoded segments. If any of the
305 # continuation names ends in a *, then the entire string, after
306 # decoding segments and concatenating, must have the charset and
307 # language specifiers at the beginning of the string.
308 for num, s, encoded in continuations:
309 if encoded:
310 s = urllib.unquote(s)
311 extended = True
312 value.append(s)
313 value = quote(EMPTYSTRING.join(value))
314 if extended:
315 charset, language, value = decode_rfc2231(value)
316 new_params.append((name, (charset, language, '"%s"' % value)))
317 else:
318 new_params.append((name, '"%s"' % value))
319 return new_params
320
321def collapse_rfc2231_value(value, errors='replace',
322 fallback_charset='us-ascii'):
323 if isinstance(value, tuple):
324 rawval = unquote(value[2])
325 charset = value[0] or 'us-ascii'
326 try:
327 return unicode(rawval, charset, errors)
328 except LookupError:
329 # XXX charset is unknown to Python.
330 return unicode(rawval, fallback_charset, errors)
331 else:
332 return unquote(value)
Note: See TracBrowser for help on using the repository browser.