1 | # Copyright (C) 2002-2007 Python Software Foundation
|
---|
2 | # Contact: email-sig@python.org
|
---|
3 |
|
---|
4 | """Email address parsing code.
|
---|
5 |
|
---|
6 | Lifted directly from rfc822.py. This should eventually be rewritten.
|
---|
7 | """
|
---|
8 |
|
---|
9 | __all__ = [
|
---|
10 | 'mktime_tz',
|
---|
11 | 'parsedate',
|
---|
12 | 'parsedate_tz',
|
---|
13 | 'quote',
|
---|
14 | ]
|
---|
15 |
|
---|
16 | import time, calendar
|
---|
17 |
|
---|
18 | SPACE = ' '
|
---|
19 | EMPTYSTRING = ''
|
---|
20 | COMMASPACE = ', '
|
---|
21 |
|
---|
22 | # Parse a date field
|
---|
23 | _monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
|
---|
24 | 'aug', 'sep', 'oct', 'nov', 'dec',
|
---|
25 | 'january', 'february', 'march', 'april', 'may', 'june', 'july',
|
---|
26 | 'august', 'september', 'october', 'november', 'december']
|
---|
27 |
|
---|
28 | _daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
|
---|
29 |
|
---|
30 | # The timezone table does not include the military time zones defined
|
---|
31 | # in RFC822, other than Z. According to RFC1123, the description in
|
---|
32 | # RFC822 gets the signs wrong, so we can't rely on any such time
|
---|
33 | # zones. RFC1123 recommends that numeric timezone indicators be used
|
---|
34 | # instead of timezone names.
|
---|
35 |
|
---|
36 | _timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
|
---|
37 | 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
|
---|
38 | 'EST': -500, 'EDT': -400, # Eastern
|
---|
39 | 'CST': -600, 'CDT': -500, # Central
|
---|
40 | 'MST': -700, 'MDT': -600, # Mountain
|
---|
41 | 'PST': -800, 'PDT': -700 # Pacific
|
---|
42 | }
|
---|
43 |
|
---|
44 |
|
---|
45 | def parsedate_tz(data):
|
---|
46 | """Convert a date string to a time tuple.
|
---|
47 |
|
---|
48 | Accounts for military timezones.
|
---|
49 | """
|
---|
50 | data = data.split()
|
---|
51 | # The FWS after the comma after the day-of-week is optional, so search and
|
---|
52 | # adjust for this.
|
---|
53 | if data[0].endswith(',') or data[0].lower() in _daynames:
|
---|
54 | # There's a dayname here. Skip it
|
---|
55 | del data[0]
|
---|
56 | else:
|
---|
57 | i = data[0].rfind(',')
|
---|
58 | if i >= 0:
|
---|
59 | data[0] = data[0][i+1:]
|
---|
60 | if len(data) == 3: # RFC 850 date, deprecated
|
---|
61 | stuff = data[0].split('-')
|
---|
62 | if len(stuff) == 3:
|
---|
63 | data = stuff + data[1:]
|
---|
64 | if len(data) == 4:
|
---|
65 | s = data[3]
|
---|
66 | i = s.find('+')
|
---|
67 | if i > 0:
|
---|
68 | data[3:] = [s[:i], s[i+1:]]
|
---|
69 | else:
|
---|
70 | data.append('') # Dummy tz
|
---|
71 | if len(data) < 5:
|
---|
72 | return None
|
---|
73 | data = data[:5]
|
---|
74 | [dd, mm, yy, tm, tz] = data
|
---|
75 | mm = mm.lower()
|
---|
76 | if mm not in _monthnames:
|
---|
77 | dd, mm = mm, dd.lower()
|
---|
78 | if mm not in _monthnames:
|
---|
79 | return None
|
---|
80 | mm = _monthnames.index(mm) + 1
|
---|
81 | if mm > 12:
|
---|
82 | mm -= 12
|
---|
83 | if dd[-1] == ',':
|
---|
84 | dd = dd[:-1]
|
---|
85 | i = yy.find(':')
|
---|
86 | if i > 0:
|
---|
87 | yy, tm = tm, yy
|
---|
88 | if yy[-1] == ',':
|
---|
89 | yy = yy[:-1]
|
---|
90 | if not yy[0].isdigit():
|
---|
91 | yy, tz = tz, yy
|
---|
92 | if tm[-1] == ',':
|
---|
93 | tm = tm[:-1]
|
---|
94 | tm = tm.split(':')
|
---|
95 | if len(tm) == 2:
|
---|
96 | [thh, tmm] = tm
|
---|
97 | tss = '0'
|
---|
98 | elif len(tm) == 3:
|
---|
99 | [thh, tmm, tss] = tm
|
---|
100 | else:
|
---|
101 | return None
|
---|
102 | try:
|
---|
103 | yy = int(yy)
|
---|
104 | dd = int(dd)
|
---|
105 | thh = int(thh)
|
---|
106 | tmm = int(tmm)
|
---|
107 | tss = int(tss)
|
---|
108 | except ValueError:
|
---|
109 | return None
|
---|
110 | # Check for a yy specified in two-digit format, then convert it to the
|
---|
111 | # appropriate four-digit format, according to the POSIX standard. RFC 822
|
---|
112 | # calls for a two-digit yy, but RFC 2822 (which obsoletes RFC 822)
|
---|
113 | # mandates a 4-digit yy. For more information, see the documentation for
|
---|
114 | # the time module.
|
---|
115 | if yy < 100:
|
---|
116 | # The year is between 1969 and 1999 (inclusive).
|
---|
117 | if yy > 68:
|
---|
118 | yy += 1900
|
---|
119 | # The year is between 2000 and 2068 (inclusive).
|
---|
120 | else:
|
---|
121 | yy += 2000
|
---|
122 | tzoffset = None
|
---|
123 | tz = tz.upper()
|
---|
124 | if tz in _timezones:
|
---|
125 | tzoffset = _timezones[tz]
|
---|
126 | else:
|
---|
127 | try:
|
---|
128 | tzoffset = int(tz)
|
---|
129 | except ValueError:
|
---|
130 | pass
|
---|
131 | # Convert a timezone offset into seconds ; -0500 -> -18000
|
---|
132 | if tzoffset:
|
---|
133 | if tzoffset < 0:
|
---|
134 | tzsign = -1
|
---|
135 | tzoffset = -tzoffset
|
---|
136 | else:
|
---|
137 | tzsign = 1
|
---|
138 | tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
|
---|
139 | # Daylight Saving Time flag is set to -1, since DST is unknown.
|
---|
140 | return yy, mm, dd, thh, tmm, tss, 0, 1, -1, tzoffset
|
---|
141 |
|
---|
142 |
|
---|
143 | def parsedate(data):
|
---|
144 | """Convert a time string to a time tuple."""
|
---|
145 | t = parsedate_tz(data)
|
---|
146 | if isinstance(t, tuple):
|
---|
147 | return t[:9]
|
---|
148 | else:
|
---|
149 | return t
|
---|
150 |
|
---|
151 |
|
---|
152 | def mktime_tz(data):
|
---|
153 | """Turn a 10-tuple as returned by parsedate_tz() into a POSIX timestamp."""
|
---|
154 | if data[9] is None:
|
---|
155 | # No zone info, so localtime is better assumption than GMT
|
---|
156 | return time.mktime(data[:8] + (-1,))
|
---|
157 | else:
|
---|
158 | t = calendar.timegm(data)
|
---|
159 | return t - data[9]
|
---|
160 |
|
---|
161 |
|
---|
162 | def quote(str):
|
---|
163 | """Prepare string to be used in a quoted string.
|
---|
164 |
|
---|
165 | Turns backslash and double quote characters into quoted pairs. These
|
---|
166 | are the only characters that need to be quoted inside a quoted string.
|
---|
167 | Does not add the surrounding double quotes.
|
---|
168 | """
|
---|
169 | return str.replace('\\', '\\\\').replace('"', '\\"')
|
---|
170 |
|
---|
171 |
|
---|
172 | class AddrlistClass:
|
---|
173 | """Address parser class by Ben Escoto.
|
---|
174 |
|
---|
175 | To understand what this class does, it helps to have a copy of RFC 2822 in
|
---|
176 | front of you.
|
---|
177 |
|
---|
178 | Note: this class interface is deprecated and may be removed in the future.
|
---|
179 | Use rfc822.AddressList instead.
|
---|
180 | """
|
---|
181 |
|
---|
182 | def __init__(self, field):
|
---|
183 | """Initialize a new instance.
|
---|
184 |
|
---|
185 | `field' is an unparsed address header field, containing
|
---|
186 | one or more addresses.
|
---|
187 | """
|
---|
188 | self.specials = '()<>@,:;.\"[]'
|
---|
189 | self.pos = 0
|
---|
190 | self.LWS = ' \t'
|
---|
191 | self.CR = '\r\n'
|
---|
192 | self.FWS = self.LWS + self.CR
|
---|
193 | self.atomends = self.specials + self.LWS + self.CR
|
---|
194 | # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
|
---|
195 | # is obsolete syntax. RFC 2822 requires that we recognize obsolete
|
---|
196 | # syntax, so allow dots in phrases.
|
---|
197 | self.phraseends = self.atomends.replace('.', '')
|
---|
198 | self.field = field
|
---|
199 | self.commentlist = []
|
---|
200 |
|
---|
201 | def gotonext(self):
|
---|
202 | """Parse up to the start of the next address."""
|
---|
203 | while self.pos < len(self.field):
|
---|
204 | if self.field[self.pos] in self.LWS + '\n\r':
|
---|
205 | self.pos += 1
|
---|
206 | elif self.field[self.pos] == '(':
|
---|
207 | self.commentlist.append(self.getcomment())
|
---|
208 | else:
|
---|
209 | break
|
---|
210 |
|
---|
211 | def getaddrlist(self):
|
---|
212 | """Parse all addresses.
|
---|
213 |
|
---|
214 | Returns a list containing all of the addresses.
|
---|
215 | """
|
---|
216 | result = []
|
---|
217 | while self.pos < len(self.field):
|
---|
218 | ad = self.getaddress()
|
---|
219 | if ad:
|
---|
220 | result += ad
|
---|
221 | else:
|
---|
222 | result.append(('', ''))
|
---|
223 | return result
|
---|
224 |
|
---|
225 | def getaddress(self):
|
---|
226 | """Parse the next address."""
|
---|
227 | self.commentlist = []
|
---|
228 | self.gotonext()
|
---|
229 |
|
---|
230 | oldpos = self.pos
|
---|
231 | oldcl = self.commentlist
|
---|
232 | plist = self.getphraselist()
|
---|
233 |
|
---|
234 | self.gotonext()
|
---|
235 | returnlist = []
|
---|
236 |
|
---|
237 | if self.pos >= len(self.field):
|
---|
238 | # Bad email address technically, no domain.
|
---|
239 | if plist:
|
---|
240 | returnlist = [(SPACE.join(self.commentlist), plist[0])]
|
---|
241 |
|
---|
242 | elif self.field[self.pos] in '.@':
|
---|
243 | # email address is just an addrspec
|
---|
244 | # this isn't very efficient since we start over
|
---|
245 | self.pos = oldpos
|
---|
246 | self.commentlist = oldcl
|
---|
247 | addrspec = self.getaddrspec()
|
---|
248 | returnlist = [(SPACE.join(self.commentlist), addrspec)]
|
---|
249 |
|
---|
250 | elif self.field[self.pos] == ':':
|
---|
251 | # address is a group
|
---|
252 | returnlist = []
|
---|
253 |
|
---|
254 | fieldlen = len(self.field)
|
---|
255 | self.pos += 1
|
---|
256 | while self.pos < len(self.field):
|
---|
257 | self.gotonext()
|
---|
258 | if self.pos < fieldlen and self.field[self.pos] == ';':
|
---|
259 | self.pos += 1
|
---|
260 | break
|
---|
261 | returnlist = returnlist + self.getaddress()
|
---|
262 |
|
---|
263 | elif self.field[self.pos] == '<':
|
---|
264 | # Address is a phrase then a route addr
|
---|
265 | routeaddr = self.getrouteaddr()
|
---|
266 |
|
---|
267 | if self.commentlist:
|
---|
268 | returnlist = [(SPACE.join(plist) + ' (' +
|
---|
269 | ' '.join(self.commentlist) + ')', routeaddr)]
|
---|
270 | else:
|
---|
271 | returnlist = [(SPACE.join(plist), routeaddr)]
|
---|
272 |
|
---|
273 | else:
|
---|
274 | if plist:
|
---|
275 | returnlist = [(SPACE.join(self.commentlist), plist[0])]
|
---|
276 | elif self.field[self.pos] in self.specials:
|
---|
277 | self.pos += 1
|
---|
278 |
|
---|
279 | self.gotonext()
|
---|
280 | if self.pos < len(self.field) and self.field[self.pos] == ',':
|
---|
281 | self.pos += 1
|
---|
282 | return returnlist
|
---|
283 |
|
---|
284 | def getrouteaddr(self):
|
---|
285 | """Parse a route address (Return-path value).
|
---|
286 |
|
---|
287 | This method just skips all the route stuff and returns the addrspec.
|
---|
288 | """
|
---|
289 | if self.field[self.pos] != '<':
|
---|
290 | return
|
---|
291 |
|
---|
292 | expectroute = False
|
---|
293 | self.pos += 1
|
---|
294 | self.gotonext()
|
---|
295 | adlist = ''
|
---|
296 | while self.pos < len(self.field):
|
---|
297 | if expectroute:
|
---|
298 | self.getdomain()
|
---|
299 | expectroute = False
|
---|
300 | elif self.field[self.pos] == '>':
|
---|
301 | self.pos += 1
|
---|
302 | break
|
---|
303 | elif self.field[self.pos] == '@':
|
---|
304 | self.pos += 1
|
---|
305 | expectroute = True
|
---|
306 | elif self.field[self.pos] == ':':
|
---|
307 | self.pos += 1
|
---|
308 | else:
|
---|
309 | adlist = self.getaddrspec()
|
---|
310 | self.pos += 1
|
---|
311 | break
|
---|
312 | self.gotonext()
|
---|
313 |
|
---|
314 | return adlist
|
---|
315 |
|
---|
316 | def getaddrspec(self):
|
---|
317 | """Parse an RFC 2822 addr-spec."""
|
---|
318 | aslist = []
|
---|
319 |
|
---|
320 | self.gotonext()
|
---|
321 | while self.pos < len(self.field):
|
---|
322 | if self.field[self.pos] == '.':
|
---|
323 | aslist.append('.')
|
---|
324 | self.pos += 1
|
---|
325 | elif self.field[self.pos] == '"':
|
---|
326 | aslist.append('"%s"' % quote(self.getquote()))
|
---|
327 | elif self.field[self.pos] in self.atomends:
|
---|
328 | break
|
---|
329 | else:
|
---|
330 | aslist.append(self.getatom())
|
---|
331 | self.gotonext()
|
---|
332 |
|
---|
333 | if self.pos >= len(self.field) or self.field[self.pos] != '@':
|
---|
334 | return EMPTYSTRING.join(aslist)
|
---|
335 |
|
---|
336 | aslist.append('@')
|
---|
337 | self.pos += 1
|
---|
338 | self.gotonext()
|
---|
339 | return EMPTYSTRING.join(aslist) + self.getdomain()
|
---|
340 |
|
---|
341 | def getdomain(self):
|
---|
342 | """Get the complete domain name from an address."""
|
---|
343 | sdlist = []
|
---|
344 | while self.pos < len(self.field):
|
---|
345 | if self.field[self.pos] in self.LWS:
|
---|
346 | self.pos += 1
|
---|
347 | elif self.field[self.pos] == '(':
|
---|
348 | self.commentlist.append(self.getcomment())
|
---|
349 | elif self.field[self.pos] == '[':
|
---|
350 | sdlist.append(self.getdomainliteral())
|
---|
351 | elif self.field[self.pos] == '.':
|
---|
352 | self.pos += 1
|
---|
353 | sdlist.append('.')
|
---|
354 | elif self.field[self.pos] in self.atomends:
|
---|
355 | break
|
---|
356 | else:
|
---|
357 | sdlist.append(self.getatom())
|
---|
358 | return EMPTYSTRING.join(sdlist)
|
---|
359 |
|
---|
360 | def getdelimited(self, beginchar, endchars, allowcomments=True):
|
---|
361 | """Parse a header fragment delimited by special characters.
|
---|
362 |
|
---|
363 | `beginchar' is the start character for the fragment.
|
---|
364 | If self is not looking at an instance of `beginchar' then
|
---|
365 | getdelimited returns the empty string.
|
---|
366 |
|
---|
367 | `endchars' is a sequence of allowable end-delimiting characters.
|
---|
368 | Parsing stops when one of these is encountered.
|
---|
369 |
|
---|
370 | If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
|
---|
371 | within the parsed fragment.
|
---|
372 | """
|
---|
373 | if self.field[self.pos] != beginchar:
|
---|
374 | return ''
|
---|
375 |
|
---|
376 | slist = ['']
|
---|
377 | quote = False
|
---|
378 | self.pos += 1
|
---|
379 | while self.pos < len(self.field):
|
---|
380 | if quote:
|
---|
381 | slist.append(self.field[self.pos])
|
---|
382 | quote = False
|
---|
383 | elif self.field[self.pos] in endchars:
|
---|
384 | self.pos += 1
|
---|
385 | break
|
---|
386 | elif allowcomments and self.field[self.pos] == '(':
|
---|
387 | slist.append(self.getcomment())
|
---|
388 | continue # have already advanced pos from getcomment
|
---|
389 | elif self.field[self.pos] == '\\':
|
---|
390 | quote = True
|
---|
391 | else:
|
---|
392 | slist.append(self.field[self.pos])
|
---|
393 | self.pos += 1
|
---|
394 |
|
---|
395 | return EMPTYSTRING.join(slist)
|
---|
396 |
|
---|
397 | def getquote(self):
|
---|
398 | """Get a quote-delimited fragment from self's field."""
|
---|
399 | return self.getdelimited('"', '"\r', False)
|
---|
400 |
|
---|
401 | def getcomment(self):
|
---|
402 | """Get a parenthesis-delimited fragment from self's field."""
|
---|
403 | return self.getdelimited('(', ')\r', True)
|
---|
404 |
|
---|
405 | def getdomainliteral(self):
|
---|
406 | """Parse an RFC 2822 domain-literal."""
|
---|
407 | return '[%s]' % self.getdelimited('[', ']\r', False)
|
---|
408 |
|
---|
409 | def getatom(self, atomends=None):
|
---|
410 | """Parse an RFC 2822 atom.
|
---|
411 |
|
---|
412 | Optional atomends specifies a different set of end token delimiters
|
---|
413 | (the default is to use self.atomends). This is used e.g. in
|
---|
414 | getphraselist() since phrase endings must not include the `.' (which
|
---|
415 | is legal in phrases)."""
|
---|
416 | atomlist = ['']
|
---|
417 | if atomends is None:
|
---|
418 | atomends = self.atomends
|
---|
419 |
|
---|
420 | while self.pos < len(self.field):
|
---|
421 | if self.field[self.pos] in atomends:
|
---|
422 | break
|
---|
423 | else:
|
---|
424 | atomlist.append(self.field[self.pos])
|
---|
425 | self.pos += 1
|
---|
426 |
|
---|
427 | return EMPTYSTRING.join(atomlist)
|
---|
428 |
|
---|
429 | def getphraselist(self):
|
---|
430 | """Parse a sequence of RFC 2822 phrases.
|
---|
431 |
|
---|
432 | A phrase is a sequence of words, which are in turn either RFC 2822
|
---|
433 | atoms or quoted-strings. Phrases are canonicalized by squeezing all
|
---|
434 | runs of continuous whitespace into one space.
|
---|
435 | """
|
---|
436 | plist = []
|
---|
437 |
|
---|
438 | while self.pos < len(self.field):
|
---|
439 | if self.field[self.pos] in self.FWS:
|
---|
440 | self.pos += 1
|
---|
441 | elif self.field[self.pos] == '"':
|
---|
442 | plist.append(self.getquote())
|
---|
443 | elif self.field[self.pos] == '(':
|
---|
444 | self.commentlist.append(self.getcomment())
|
---|
445 | elif self.field[self.pos] in self.phraseends:
|
---|
446 | break
|
---|
447 | else:
|
---|
448 | plist.append(self.getatom(self.phraseends))
|
---|
449 |
|
---|
450 | return plist
|
---|
451 |
|
---|
452 | class AddressList(AddrlistClass):
|
---|
453 | """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
|
---|
454 | def __init__(self, field):
|
---|
455 | AddrlistClass.__init__(self, field)
|
---|
456 | if field:
|
---|
457 | self.addresslist = self.getaddrlist()
|
---|
458 | else:
|
---|
459 | self.addresslist = []
|
---|
460 |
|
---|
461 | def __len__(self):
|
---|
462 | return len(self.addresslist)
|
---|
463 |
|
---|
464 | def __add__(self, other):
|
---|
465 | # Set union
|
---|
466 | newaddr = AddressList(None)
|
---|
467 | newaddr.addresslist = self.addresslist[:]
|
---|
468 | for x in other.addresslist:
|
---|
469 | if not x in self.addresslist:
|
---|
470 | newaddr.addresslist.append(x)
|
---|
471 | return newaddr
|
---|
472 |
|
---|
473 | def __iadd__(self, other):
|
---|
474 | # Set union, in-place
|
---|
475 | for x in other.addresslist:
|
---|
476 | if not x in self.addresslist:
|
---|
477 | self.addresslist.append(x)
|
---|
478 | return self
|
---|
479 |
|
---|
480 | def __sub__(self, other):
|
---|
481 | # Set difference
|
---|
482 | newaddr = AddressList(None)
|
---|
483 | for x in self.addresslist:
|
---|
484 | if not x in other.addresslist:
|
---|
485 | newaddr.addresslist.append(x)
|
---|
486 | return newaddr
|
---|
487 |
|
---|
488 | def __isub__(self, other):
|
---|
489 | # Set difference, in-place
|
---|
490 | for x in other.addresslist:
|
---|
491 | if x in self.addresslist:
|
---|
492 | self.addresslist.remove(x)
|
---|
493 | return self
|
---|
494 |
|
---|
495 | def __getitem__(self, index):
|
---|
496 | # Make indexing, slices, and 'in' work
|
---|
497 | return self.addresslist[index]
|
---|