source: trunk/essentials/dev-lang/python/Lib/rfc822.py

Last change on this file was 3225, checked in by bird, 18 years ago

Python 2.5

File size: 32.3 KB
Line 
1"""RFC 2822 message manipulation.
2
3Note: This is only a very rough sketch of a full RFC-822 parser; in particular
4the tokenizing of addresses does not adhere to all the quoting rules.
5
6Note: RFC 2822 is a long awaited update to RFC 822. This module should
7conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
8effort at RFC 2822 updates have been made, but a thorough audit has not been
9performed. Consider any RFC 2822 non-conformance to be a bug.
10
11 RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
12 RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
13
14Directions for use:
15
16To create a Message object: first open a file, e.g.:
17
18 fp = open(file, 'r')
19
20You can use any other legal way of getting an open file object, e.g. use
21sys.stdin or call os.popen(). Then pass the open file object to the Message()
22constructor:
23
24 m = Message(fp)
25
26This class can work with any input object that supports a readline method. If
27the input object has seek and tell capability, the rewindbody method will
28work; also illegal lines will be pushed back onto the input stream. If the
29input object lacks seek but has an `unread' method that can push back a line
30of input, Message will use that to push back illegal lines. Thus this class
31can be used to parse messages coming from a buffered stream.
32
33The optional `seekable' argument is provided as a workaround for certain stdio
34libraries in which tell() discards buffered data before discovering that the
35lseek() system call doesn't work. For maximum portability, you should set the
36seekable argument to zero to prevent that initial \code{tell} when passing in
37an unseekable object such as a a file object created from a socket object. If
38it is 1 on entry -- which it is by default -- the tell() method of the open
39file object is called once; if this raises an exception, seekable is reset to
400. For other nonzero values of seekable, this test is not made.
41
42To get the text of a particular header there are several methods:
43
44 str = m.getheader(name)
45 str = m.getrawheader(name)
46
47where name is the name of the header, e.g. 'Subject'. The difference is that
48getheader() strips the leading and trailing whitespace, while getrawheader()
49doesn't. Both functions retain embedded whitespace (including newlines)
50exactly as they are specified in the header, and leave the case of the text
51unchanged.
52
53For addresses and address lists there are functions
54
55 realname, mailaddress = m.getaddr(name)
56 list = m.getaddrlist(name)
57
58where the latter returns a list of (realname, mailaddr) tuples.
59
60There is also a method
61
62 time = m.getdate(name)
63
64which parses a Date-like field and returns a time-compatible tuple,
65i.e. a tuple such as returned by time.localtime() or accepted by
66time.mktime().
67
68See the class definition for lower level access methods.
69
70There are also some utility functions here.
71"""
72# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
73
74import time
75
76__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
77
78_blanklines = ('\r\n', '\n') # Optimization for islast()
79
80
81class Message:
82 """Represents a single RFC 2822-compliant message."""
83
84 def __init__(self, fp, seekable = 1):
85 """Initialize the class instance and read the headers."""
86 if seekable == 1:
87 # Exercise tell() to make sure it works
88 # (and then assume seek() works, too)
89 try:
90 fp.tell()
91 except (AttributeError, IOError):
92 seekable = 0
93 self.fp = fp
94 self.seekable = seekable
95 self.startofheaders = None
96 self.startofbody = None
97 #
98 if self.seekable:
99 try:
100 self.startofheaders = self.fp.tell()
101 except IOError:
102 self.seekable = 0
103 #
104 self.readheaders()
105 #
106 if self.seekable:
107 try:
108 self.startofbody = self.fp.tell()
109 except IOError:
110 self.seekable = 0
111
112 def rewindbody(self):
113 """Rewind the file to the start of the body (if seekable)."""
114 if not self.seekable:
115 raise IOError, "unseekable file"
116 self.fp.seek(self.startofbody)
117
118 def readheaders(self):
119 """Read header lines.
120
121 Read header lines up to the entirely blank line that terminates them.
122 The (normally blank) line that ends the headers is skipped, but not
123 included in the returned list. If a non-header line ends the headers,
124 (which is an error), an attempt is made to backspace over it; it is
125 never included in the returned list.
126
127 The variable self.status is set to the empty string if all went well,
128 otherwise it is an error message. The variable self.headers is a
129 completely uninterpreted list of lines contained in the header (so
130 printing them will reproduce the header exactly as it appears in the
131 file).
132 """
133 self.dict = {}
134 self.unixfrom = ''
135 self.headers = lst = []
136 self.status = ''
137 headerseen = ""
138 firstline = 1
139 startofline = unread = tell = None
140 if hasattr(self.fp, 'unread'):
141 unread = self.fp.unread
142 elif self.seekable:
143 tell = self.fp.tell
144 while 1:
145 if tell:
146 try:
147 startofline = tell()
148 except IOError:
149 startofline = tell = None
150 self.seekable = 0
151 line = self.fp.readline()
152 if not line:
153 self.status = 'EOF in headers'
154 break
155 # Skip unix From name time lines
156 if firstline and line.startswith('From '):
157 self.unixfrom = self.unixfrom + line
158 continue
159 firstline = 0
160 if headerseen and line[0] in ' \t':
161 # It's a continuation line.
162 lst.append(line)
163 x = (self.dict[headerseen] + "\n " + line.strip())
164 self.dict[headerseen] = x.strip()
165 continue
166 elif self.iscomment(line):
167 # It's a comment. Ignore it.
168 continue
169 elif self.islast(line):
170 # Note! No pushback here! The delimiter line gets eaten.
171 break
172 headerseen = self.isheader(line)
173 if headerseen:
174 # It's a legal header line, save it.
175 lst.append(line)
176 self.dict[headerseen] = line[len(headerseen)+1:].strip()
177 continue
178 else:
179 # It's not a header line; throw it back and stop here.
180 if not self.dict:
181 self.status = 'No headers'
182 else:
183 self.status = 'Non-header line where header expected'
184 # Try to undo the read.
185 if unread:
186 unread(line)
187 elif tell:
188 self.fp.seek(startofline)
189 else:
190 self.status = self.status + '; bad seek'
191 break
192
193 def isheader(self, line):
194 """Determine whether a given line is a legal header.
195
196 This method should return the header name, suitably canonicalized.
197 You may override this method in order to use Message parsing on tagged
198 data in RFC 2822-like formats with special header formats.
199 """
200 i = line.find(':')
201 if i > 0:
202 return line[:i].lower()
203 return None
204
205 def islast(self, line):
206 """Determine whether a line is a legal end of RFC 2822 headers.
207
208 You may override this method if your application wants to bend the
209 rules, e.g. to strip trailing whitespace, or to recognize MH template
210 separators ('--------'). For convenience (e.g. for code reading from
211 sockets) a line consisting of \r\n also matches.
212 """
213 return line in _blanklines
214
215 def iscomment(self, line):
216 """Determine whether a line should be skipped entirely.
217
218 You may override this method in order to use Message parsing on tagged
219 data in RFC 2822-like formats that support embedded comments or
220 free-text data.
221 """
222 return False
223
224 def getallmatchingheaders(self, name):
225 """Find all header lines matching a given header name.
226
227 Look through the list of headers and find all lines matching a given
228 header name (and their continuation lines). A list of the lines is
229 returned, without interpretation. If the header does not occur, an
230 empty list is returned. If the header occurs multiple times, all
231 occurrences are returned. Case is not important in the header name.
232 """
233 name = name.lower() + ':'
234 n = len(name)
235 lst = []
236 hit = 0
237 for line in self.headers:
238 if line[:n].lower() == name:
239 hit = 1
240 elif not line[:1].isspace():
241 hit = 0
242 if hit:
243 lst.append(line)
244 return lst
245
246 def getfirstmatchingheader(self, name):
247 """Get the first header line matching name.
248
249 This is similar to getallmatchingheaders, but it returns only the
250 first matching header (and its continuation lines).
251 """
252 name = name.lower() + ':'
253 n = len(name)
254 lst = []
255 hit = 0
256 for line in self.headers:
257 if hit:
258 if not line[:1].isspace():
259 break
260 elif line[:n].lower() == name:
261 hit = 1
262 if hit:
263 lst.append(line)
264 return lst
265
266 def getrawheader(self, name):
267 """A higher-level interface to getfirstmatchingheader().
268
269 Return a string containing the literal text of the header but with the
270 keyword stripped. All leading, trailing and embedded whitespace is
271 kept in the string, however. Return None if the header does not
272 occur.
273 """
274
275 lst = self.getfirstmatchingheader(name)
276 if not lst:
277 return None
278 lst[0] = lst[0][len(name) + 1:]
279 return ''.join(lst)
280
281 def getheader(self, name, default=None):
282 """Get the header value for a name.
283
284 This is the normal interface: it returns a stripped version of the
285 header value for a given header name, or None if it doesn't exist.
286 This uses the dictionary version which finds the *last* such header.
287 """
288 return self.dict.get(name.lower(), default)
289 get = getheader
290
291 def getheaders(self, name):
292 """Get all values for a header.
293
294 This returns a list of values for headers given more than once; each
295 value in the result list is stripped in the same way as the result of
296 getheader(). If the header is not given, return an empty list.
297 """
298 result = []
299 current = ''
300 have_header = 0
301 for s in self.getallmatchingheaders(name):
302 if s[0].isspace():
303 if current:
304 current = "%s\n %s" % (current, s.strip())
305 else:
306 current = s.strip()
307 else:
308 if have_header:
309 result.append(current)
310 current = s[s.find(":") + 1:].strip()
311 have_header = 1
312 if have_header:
313 result.append(current)
314 return result
315
316 def getaddr(self, name):
317 """Get a single address from a header, as a tuple.
318
319 An example return value:
320 ('Guido van Rossum', 'guido@cwi.nl')
321 """
322 # New, by Ben Escoto
323 alist = self.getaddrlist(name)
324 if alist:
325 return alist[0]
326 else:
327 return (None, None)
328
329 def getaddrlist(self, name):
330 """Get a list of addresses from a header.
331
332 Retrieves a list of addresses from a header, where each address is a
333 tuple as returned by getaddr(). Scans all named headers, so it works
334 properly with multiple To: or Cc: headers for example.
335 """
336 raw = []
337 for h in self.getallmatchingheaders(name):
338 if h[0] in ' \t':
339 raw.append(h)
340 else:
341 if raw:
342 raw.append(', ')
343 i = h.find(':')
344 if i > 0:
345 addr = h[i+1:]
346 raw.append(addr)
347 alladdrs = ''.join(raw)
348 a = AddressList(alladdrs)
349 return a.addresslist
350
351 def getdate(self, name):
352 """Retrieve a date field from a header.
353
354 Retrieves a date field from the named header, returning a tuple
355 compatible with time.mktime().
356 """
357 try:
358 data = self[name]
359 except KeyError:
360 return None
361 return parsedate(data)
362
363 def getdate_tz(self, name):
364 """Retrieve a date field from a header as a 10-tuple.
365
366 The first 9 elements make up a tuple compatible with time.mktime(),
367 and the 10th is the offset of the poster's time zone from GMT/UTC.
368 """
369 try:
370 data = self[name]
371 except KeyError:
372 return None
373 return parsedate_tz(data)
374
375
376 # Access as a dictionary (only finds *last* header of each type):
377
378 def __len__(self):
379 """Get the number of headers in a message."""
380 return len(self.dict)
381
382 def __getitem__(self, name):
383 """Get a specific header, as from a dictionary."""
384 return self.dict[name.lower()]
385
386 def __setitem__(self, name, value):
387 """Set the value of a header.
388
389 Note: This is not a perfect inversion of __getitem__, because any
390 changed headers get stuck at the end of the raw-headers list rather
391 than where the altered header was.
392 """
393 del self[name] # Won't fail if it doesn't exist
394 self.dict[name.lower()] = value
395 text = name + ": " + value
396 for line in text.split("\n"):
397 self.headers.append(line + "\n")
398
399 def __delitem__(self, name):
400 """Delete all occurrences of a specific header, if it is present."""
401 name = name.lower()
402 if not name in self.dict:
403 return
404 del self.dict[name]
405 name = name + ':'
406 n = len(name)
407 lst = []
408 hit = 0
409 for i in range(len(self.headers)):
410 line = self.headers[i]
411 if line[:n].lower() == name:
412 hit = 1
413 elif not line[:1].isspace():
414 hit = 0
415 if hit:
416 lst.append(i)
417 for i in reversed(lst):
418 del self.headers[i]
419
420 def setdefault(self, name, default=""):
421 lowername = name.lower()
422 if lowername in self.dict:
423 return self.dict[lowername]
424 else:
425 text = name + ": " + default
426 for line in text.split("\n"):
427 self.headers.append(line + "\n")
428 self.dict[lowername] = default
429 return default
430
431 def has_key(self, name):
432 """Determine whether a message contains the named header."""
433 return name.lower() in self.dict
434
435 def __contains__(self, name):
436 """Determine whether a message contains the named header."""
437 return name.lower() in self.dict
438
439 def __iter__(self):
440 return iter(self.dict)
441
442 def keys(self):
443 """Get all of a message's header field names."""
444 return self.dict.keys()
445
446 def values(self):
447 """Get all of a message's header field values."""
448 return self.dict.values()
449
450 def items(self):
451 """Get all of a message's headers.
452
453 Returns a list of name, value tuples.
454 """
455 return self.dict.items()
456
457 def __str__(self):
458 return ''.join(self.headers)
459
460
461# Utility functions
462# -----------------
463
464# XXX Should fix unquote() and quote() to be really conformant.
465# XXX The inverses of the parse functions may also be useful.
466
467
468def unquote(s):
469 """Remove quotes from a string."""
470 if len(s) > 1:
471 if s.startswith('"') and s.endswith('"'):
472 return s[1:-1].replace('\\\\', '\\').replace('\\"', '"')
473 if s.startswith('<') and s.endswith('>'):
474 return s[1:-1]
475 return s
476
477
478def quote(s):
479 """Add quotes around a string."""
480 return s.replace('\\', '\\\\').replace('"', '\\"')
481
482
483def parseaddr(address):
484 """Parse an address into a (realname, mailaddr) tuple."""
485 a = AddressList(address)
486 lst = a.addresslist
487 if not lst:
488 return (None, None)
489 return lst[0]
490
491
492class AddrlistClass:
493 """Address parser class by Ben Escoto.
494
495 To understand what this class does, it helps to have a copy of
496 RFC 2822 in front of you.
497
498 http://www.faqs.org/rfcs/rfc2822.html
499
500 Note: this class interface is deprecated and may be removed in the future.
501 Use rfc822.AddressList instead.
502 """
503
504 def __init__(self, field):
505 """Initialize a new instance.
506
507 `field' is an unparsed address header field, containing one or more
508 addresses.
509 """
510 self.specials = '()<>@,:;.\"[]'
511 self.pos = 0
512 self.LWS = ' \t'
513 self.CR = '\r\n'
514 self.atomends = self.specials + self.LWS + self.CR
515 # Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
516 # is obsolete syntax. RFC 2822 requires that we recognize obsolete
517 # syntax, so allow dots in phrases.
518 self.phraseends = self.atomends.replace('.', '')
519 self.field = field
520 self.commentlist = []
521
522 def gotonext(self):
523 """Parse up to the start of the next address."""
524 while self.pos < len(self.field):
525 if self.field[self.pos] in self.LWS + '\n\r':
526 self.pos = self.pos + 1
527 elif self.field[self.pos] == '(':
528 self.commentlist.append(self.getcomment())
529 else: break
530
531 def getaddrlist(self):
532 """Parse all addresses.
533
534 Returns a list containing all of the addresses.
535 """
536 result = []
537 ad = self.getaddress()
538 while ad:
539 result += ad
540 ad = self.getaddress()
541 return result
542
543 def getaddress(self):
544 """Parse the next address."""
545 self.commentlist = []
546 self.gotonext()
547
548 oldpos = self.pos
549 oldcl = self.commentlist
550 plist = self.getphraselist()
551
552 self.gotonext()
553 returnlist = []
554
555 if self.pos >= len(self.field):
556 # Bad email address technically, no domain.
557 if plist:
558 returnlist = [(' '.join(self.commentlist), plist[0])]
559
560 elif self.field[self.pos] in '.@':
561 # email address is just an addrspec
562 # this isn't very efficient since we start over
563 self.pos = oldpos
564 self.commentlist = oldcl
565 addrspec = self.getaddrspec()
566 returnlist = [(' '.join(self.commentlist), addrspec)]
567
568 elif self.field[self.pos] == ':':
569 # address is a group
570 returnlist = []
571
572 fieldlen = len(self.field)
573 self.pos += 1
574 while self.pos < len(self.field):
575 self.gotonext()
576 if self.pos < fieldlen and self.field[self.pos] == ';':
577 self.pos += 1
578 break
579 returnlist = returnlist + self.getaddress()
580
581 elif self.field[self.pos] == '<':
582 # Address is a phrase then a route addr
583 routeaddr = self.getrouteaddr()
584
585 if self.commentlist:
586 returnlist = [(' '.join(plist) + ' (' + \
587 ' '.join(self.commentlist) + ')', routeaddr)]
588 else: returnlist = [(' '.join(plist), routeaddr)]
589
590 else:
591 if plist:
592 returnlist = [(' '.join(self.commentlist), plist[0])]
593 elif self.field[self.pos] in self.specials:
594 self.pos += 1
595
596 self.gotonext()
597 if self.pos < len(self.field) and self.field[self.pos] == ',':
598 self.pos += 1
599 return returnlist
600
601 def getrouteaddr(self):
602 """Parse a route address (Return-path value).
603
604 This method just skips all the route stuff and returns the addrspec.
605 """
606 if self.field[self.pos] != '<':
607 return
608
609 expectroute = 0
610 self.pos += 1
611 self.gotonext()
612 adlist = ""
613 while self.pos < len(self.field):
614 if expectroute:
615 self.getdomain()
616 expectroute = 0
617 elif self.field[self.pos] == '>':
618 self.pos += 1
619 break
620 elif self.field[self.pos] == '@':
621 self.pos += 1
622 expectroute = 1
623 elif self.field[self.pos] == ':':
624 self.pos += 1
625 else:
626 adlist = self.getaddrspec()
627 self.pos += 1
628 break
629 self.gotonext()
630
631 return adlist
632
633 def getaddrspec(self):
634 """Parse an RFC 2822 addr-spec."""
635 aslist = []
636
637 self.gotonext()
638 while self.pos < len(self.field):
639 if self.field[self.pos] == '.':
640 aslist.append('.')
641 self.pos += 1
642 elif self.field[self.pos] == '"':
643 aslist.append('"%s"' % self.getquote())
644 elif self.field[self.pos] in self.atomends:
645 break
646 else: aslist.append(self.getatom())
647 self.gotonext()
648
649 if self.pos >= len(self.field) or self.field[self.pos] != '@':
650 return ''.join(aslist)
651
652 aslist.append('@')
653 self.pos += 1
654 self.gotonext()
655 return ''.join(aslist) + self.getdomain()
656
657 def getdomain(self):
658 """Get the complete domain name from an address."""
659 sdlist = []
660 while self.pos < len(self.field):
661 if self.field[self.pos] in self.LWS:
662 self.pos += 1
663 elif self.field[self.pos] == '(':
664 self.commentlist.append(self.getcomment())
665 elif self.field[self.pos] == '[':
666 sdlist.append(self.getdomainliteral())
667 elif self.field[self.pos] == '.':
668 self.pos += 1
669 sdlist.append('.')
670 elif self.field[self.pos] in self.atomends:
671 break
672 else: sdlist.append(self.getatom())
673 return ''.join(sdlist)
674
675 def getdelimited(self, beginchar, endchars, allowcomments = 1):
676 """Parse a header fragment delimited by special characters.
677
678 `beginchar' is the start character for the fragment. If self is not
679 looking at an instance of `beginchar' then getdelimited returns the
680 empty string.
681
682 `endchars' is a sequence of allowable end-delimiting characters.
683 Parsing stops when one of these is encountered.
684
685 If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
686 within the parsed fragment.
687 """
688 if self.field[self.pos] != beginchar:
689 return ''
690
691 slist = ['']
692 quote = 0
693 self.pos += 1
694 while self.pos < len(self.field):
695 if quote == 1:
696 slist.append(self.field[self.pos])
697 quote = 0
698 elif self.field[self.pos] in endchars:
699 self.pos += 1
700 break
701 elif allowcomments and self.field[self.pos] == '(':
702 slist.append(self.getcomment())
703 continue # have already advanced pos from getcomment
704 elif self.field[self.pos] == '\\':
705 quote = 1
706 else:
707 slist.append(self.field[self.pos])
708 self.pos += 1
709
710 return ''.join(slist)
711
712 def getquote(self):
713 """Get a quote-delimited fragment from self's field."""
714 return self.getdelimited('"', '"\r', 0)
715
716 def getcomment(self):
717 """Get a parenthesis-delimited fragment from self's field."""
718 return self.getdelimited('(', ')\r', 1)
719
720 def getdomainliteral(self):
721 """Parse an RFC 2822 domain-literal."""
722 return '[%s]' % self.getdelimited('[', ']\r', 0)
723
724 def getatom(self, atomends=None):
725 """Parse an RFC 2822 atom.
726
727 Optional atomends specifies a different set of end token delimiters
728 (the default is to use self.atomends). This is used e.g. in
729 getphraselist() since phrase endings must not include the `.' (which
730 is legal in phrases)."""
731 atomlist = ['']
732 if atomends is None:
733 atomends = self.atomends
734
735 while self.pos < len(self.field):
736 if self.field[self.pos] in atomends:
737 break
738 else: atomlist.append(self.field[self.pos])
739 self.pos += 1
740
741 return ''.join(atomlist)
742
743 def getphraselist(self):
744 """Parse a sequence of RFC 2822 phrases.
745
746 A phrase is a sequence of words, which are in turn either RFC 2822
747 atoms or quoted-strings. Phrases are canonicalized by squeezing all
748 runs of continuous whitespace into one space.
749 """
750 plist = []
751
752 while self.pos < len(self.field):
753 if self.field[self.pos] in self.LWS:
754 self.pos += 1
755 elif self.field[self.pos] == '"':
756 plist.append(self.getquote())
757 elif self.field[self.pos] == '(':
758 self.commentlist.append(self.getcomment())
759 elif self.field[self.pos] in self.phraseends:
760 break
761 else:
762 plist.append(self.getatom(self.phraseends))
763
764 return plist
765
766class AddressList(AddrlistClass):
767 """An AddressList encapsulates a list of parsed RFC 2822 addresses."""
768 def __init__(self, field):
769 AddrlistClass.__init__(self, field)
770 if field:
771 self.addresslist = self.getaddrlist()
772 else:
773 self.addresslist = []
774
775 def __len__(self):
776 return len(self.addresslist)
777
778 def __str__(self):
779 return ", ".join(map(dump_address_pair, self.addresslist))
780
781 def __add__(self, other):
782 # Set union
783 newaddr = AddressList(None)
784 newaddr.addresslist = self.addresslist[:]
785 for x in other.addresslist:
786 if not x in self.addresslist:
787 newaddr.addresslist.append(x)
788 return newaddr
789
790 def __iadd__(self, other):
791 # Set union, in-place
792 for x in other.addresslist:
793 if not x in self.addresslist:
794 self.addresslist.append(x)
795 return self
796
797 def __sub__(self, other):
798 # Set difference
799 newaddr = AddressList(None)
800 for x in self.addresslist:
801 if not x in other.addresslist:
802 newaddr.addresslist.append(x)
803 return newaddr
804
805 def __isub__(self, other):
806 # Set difference, in-place
807 for x in other.addresslist:
808 if x in self.addresslist:
809 self.addresslist.remove(x)
810 return self
811
812 def __getitem__(self, index):
813 # Make indexing, slices, and 'in' work
814 return self.addresslist[index]
815
816def dump_address_pair(pair):
817 """Dump a (name, address) pair in a canonicalized form."""
818 if pair[0]:
819 return '"' + pair[0] + '" <' + pair[1] + '>'
820 else:
821 return pair[1]
822
823# Parse a date field
824
825_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
826 'aug', 'sep', 'oct', 'nov', 'dec',
827 'january', 'february', 'march', 'april', 'may', 'june', 'july',
828 'august', 'september', 'october', 'november', 'december']
829_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
830
831# The timezone table does not include the military time zones defined
832# in RFC822, other than Z. According to RFC1123, the description in
833# RFC822 gets the signs wrong, so we can't rely on any such time
834# zones. RFC1123 recommends that numeric timezone indicators be used
835# instead of timezone names.
836
837_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
838 'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
839 'EST': -500, 'EDT': -400, # Eastern
840 'CST': -600, 'CDT': -500, # Central
841 'MST': -700, 'MDT': -600, # Mountain
842 'PST': -800, 'PDT': -700 # Pacific
843 }
844
845
846def parsedate_tz(data):
847 """Convert a date string to a time tuple.
848
849 Accounts for military timezones.
850 """
851 if not data:
852 return None
853 data = data.split()
854 if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
855 # There's a dayname here. Skip it
856 del data[0]
857 if len(data) == 3: # RFC 850 date, deprecated
858 stuff = data[0].split('-')
859 if len(stuff) == 3:
860 data = stuff + data[1:]
861 if len(data) == 4:
862 s = data[3]
863 i = s.find('+')
864 if i > 0:
865 data[3:] = [s[:i], s[i+1:]]
866 else:
867 data.append('') # Dummy tz
868 if len(data) < 5:
869 return None
870 data = data[:5]
871 [dd, mm, yy, tm, tz] = data
872 mm = mm.lower()
873 if not mm in _monthnames:
874 dd, mm = mm, dd.lower()
875 if not mm in _monthnames:
876 return None
877 mm = _monthnames.index(mm)+1
878 if mm > 12: mm = mm - 12
879 if dd[-1] == ',':
880 dd = dd[:-1]
881 i = yy.find(':')
882 if i > 0:
883 yy, tm = tm, yy
884 if yy[-1] == ',':
885 yy = yy[:-1]
886 if not yy[0].isdigit():
887 yy, tz = tz, yy
888 if tm[-1] == ',':
889 tm = tm[:-1]
890 tm = tm.split(':')
891 if len(tm) == 2:
892 [thh, tmm] = tm
893 tss = '0'
894 elif len(tm) == 3:
895 [thh, tmm, tss] = tm
896 else:
897 return None
898 try:
899 yy = int(yy)
900 dd = int(dd)
901 thh = int(thh)
902 tmm = int(tmm)
903 tss = int(tss)
904 except ValueError:
905 return None
906 tzoffset = None
907 tz = tz.upper()
908 if tz in _timezones:
909 tzoffset = _timezones[tz]
910 else:
911 try:
912 tzoffset = int(tz)
913 except ValueError:
914 pass
915 # Convert a timezone offset into seconds ; -0500 -> -18000
916 if tzoffset:
917 if tzoffset < 0:
918 tzsign = -1
919 tzoffset = -tzoffset
920 else:
921 tzsign = 1
922 tzoffset = tzsign * ( (tzoffset//100)*3600 + (tzoffset % 100)*60)
923 return (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
924
925
926def parsedate(data):
927 """Convert a time string to a time tuple."""
928 t = parsedate_tz(data)
929 if t is None:
930 return t
931 return t[:9]
932
933
934def mktime_tz(data):
935 """Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
936 if data[9] is None:
937 # No zone info, so localtime is better assumption than GMT
938 return time.mktime(data[:8] + (-1,))
939 else:
940 t = time.mktime(data[:8] + (0,))
941 return t - data[9] - time.timezone
942
943def formatdate(timeval=None):
944 """Returns time format preferred for Internet standards.
945
946 Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
947
948 According to RFC 1123, day and month names must always be in
949 English. If not for that, this code could use strftime(). It
950 can't because strftime() honors the locale and could generated
951 non-English names.
952 """
953 if timeval is None:
954 timeval = time.time()
955 timeval = time.gmtime(timeval)
956 return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
957 ("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[timeval[6]],
958 timeval[2],
959 ("Jan", "Feb", "Mar", "Apr", "May", "Jun",
960 "Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[timeval[1]-1],
961 timeval[0], timeval[3], timeval[4], timeval[5])
962
963
964# When used as script, run a small test program.
965# The first command line argument must be a filename containing one
966# message in RFC-822 format.
967
968if __name__ == '__main__':
969 import sys, os
970 file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
971 if sys.argv[1:]: file = sys.argv[1]
972 f = open(file, 'r')
973 m = Message(f)
974 print 'From:', m.getaddr('from')
975 print 'To:', m.getaddrlist('to')
976 print 'Subject:', m.getheader('subject')
977 print 'Date:', m.getheader('date')
978 date = m.getdate_tz('date')
979 tz = date[-1]
980 date = time.localtime(mktime_tz(date))
981 if date:
982 print 'ParsedDate:', time.asctime(date),
983 hhmmss = tz
984 hhmm, ss = divmod(hhmmss, 60)
985 hh, mm = divmod(hhmm, 60)
986 print "%+03d%02d" % (hh, mm),
987 if ss: print ".%02d" % ss,
988 print
989 else:
990 print 'ParsedDate:', None
991 m.rewindbody()
992 n = 0
993 while f.readline():
994 n += 1
995 print 'Lines:', n
996 print '-'*70
997 print 'len =', len(m)
998 if 'Date' in m: print 'Date =', m['Date']
999 if 'X-Nonsense' in m: pass
1000 print 'keys =', m.keys()
1001 print 'values =', m.values()
1002 print 'items =', m.items()
Note: See TracBrowser for help on using the repository browser.