Context Navigation

← Previous Revision
Next Revision →
Normal
Revision Log

rfc822.py

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 32.5 KB

Rev	Line
[2]	1	"""RFC 2822 message manipulation.
	2
	3	Note: This is only a very rough sketch of a full RFC-822 parser; in particular
	4	the tokenizing of addresses does not adhere to all the quoting rules.
	5
	6	Note: RFC 2822 is a long awaited update to RFC 822. This module should
	7	conform to RFC 2822, and is thus mis-named (it's not worth renaming it). Some
	8	effort at RFC 2822 updates have been made, but a thorough audit has not been
	9	performed. Consider any RFC 2822 non-conformance to be a bug.
	10
	11	RFC 2822: http://www.faqs.org/rfcs/rfc2822.html
	12	RFC 822 : http://www.faqs.org/rfcs/rfc822.html (obsolete)
	13
	14	Directions for use:
	15
	16	To create a Message object: first open a file, e.g.:
	17
	18	fp = open(file, 'r')
	19
	20	You can use any other legal way of getting an open file object, e.g. use
	21	sys.stdin or call os.popen(). Then pass the open file object to the Message()
	22	constructor:
	23
	24	m = Message(fp)
	25
	26	This class can work with any input object that supports a readline method. If
	27	the input object has seek and tell capability, the rewindbody method will
	28	work; also illegal lines will be pushed back onto the input stream. If the
	29	input object lacks seek but has an `unread' method that can push back a line
	30	of input, Message will use that to push back illegal lines. Thus this class
	31	can be used to parse messages coming from a buffered stream.
	32
	33	The optional `seekable' argument is provided as a workaround for certain stdio
	34	libraries in which tell() discards buffered data before discovering that the
	35	lseek() system call doesn't work. For maximum portability, you should set the
	36	seekable argument to zero to prevent that initial \code{tell} when passing in
[391]	37	an unseekable object such as a file object created from a socket object. If
[2]	38	it is 1 on entry -- which it is by default -- the tell() method of the open
	39	file object is called once; if this raises an exception, seekable is reset to
	40	0. For other nonzero values of seekable, this test is not made.
	41
	42	To get the text of a particular header there are several methods:
	43
	44	str = m.getheader(name)
	45	str = m.getrawheader(name)
	46
	47	where name is the name of the header, e.g. 'Subject'. The difference is that
	48	getheader() strips the leading and trailing whitespace, while getrawheader()
	49	doesn't. Both functions retain embedded whitespace (including newlines)
	50	exactly as they are specified in the header, and leave the case of the text
	51	unchanged.
	52
	53	For addresses and address lists there are functions
	54
	55	realname, mailaddress = m.getaddr(name)
	56	list = m.getaddrlist(name)
	57
	58	where the latter returns a list of (realname, mailaddr) tuples.
	59
	60	There is also a method
	61
	62	time = m.getdate(name)
	63
	64	which parses a Date-like field and returns a time-compatible tuple,
	65	i.e. a tuple such as returned by time.localtime() or accepted by
	66	time.mktime().
	67
	68	See the class definition for lower level access methods.
	69
	70	There are also some utility functions here.
	71	"""
	72	# Cleanup and extensions by Eric S. Raymond <esr@thyrsus.com>
	73
	74	import time
	75
	76	from warnings import warnpy3k
	77	warnpy3k("in 3.x, rfc822 has been removed in favor of the email package",
	78	stacklevel=2)
	79
	80	__all__ = ["Message","AddressList","parsedate","parsedate_tz","mktime_tz"]
	81
	82	_blanklines = ('\r\n', '\n') # Optimization for islast()
	83
	84
	85	class Message:
	86	"""Represents a single RFC 2822-compliant message."""
	87
	88	def __init__(self, fp, seekable = 1):
	89	"""Initialize the class instance and read the headers."""
	90	if seekable == 1:
	91	# Exercise tell() to make sure it works
	92	# (and then assume seek() works, too)
	93	try:
	94	fp.tell()
	95	except (AttributeError, IOError):
	96	seekable = 0
	97	self.fp = fp
	98	self.seekable = seekable
	99	self.startofheaders = None
	100	self.startofbody = None
	101	#
	102	if self.seekable:
	103	try:
	104	self.startofheaders = self.fp.tell()
	105	except IOError:
	106	self.seekable = 0
	107	#
	108	self.readheaders()
	109	#
	110	if self.seekable:
	111	try:
	112	self.startofbody = self.fp.tell()
	113	except IOError:
	114	self.seekable = 0
	115
	116	def rewindbody(self):
	117	"""Rewind the file to the start of the body (if seekable)."""
	118	if not self.seekable:
	119	raise IOError, "unseekable file"
	120	self.fp.seek(self.startofbody)
	121
	122	def readheaders(self):
	123	"""Read header lines.
	124
	125	Read header lines up to the entirely blank line that terminates them.
	126	The (normally blank) line that ends the headers is skipped, but not
	127	included in the returned list. If a non-header line ends the headers,
	128	(which is an error), an attempt is made to backspace over it; it is
	129	never included in the returned list.
	130
	131	The variable self.status is set to the empty string if all went well,
	132	otherwise it is an error message. The variable self.headers is a
	133	completely uninterpreted list of lines contained in the header (so
	134	printing them will reproduce the header exactly as it appears in the
	135	file).
	136	"""
	137	self.dict = {}
	138	self.unixfrom = ''
	139	self.headers = lst = []
	140	self.status = ''
	141	headerseen = ""
	142	firstline = 1
	143	startofline = unread = tell = None
	144	if hasattr(self.fp, 'unread'):
	145	unread = self.fp.unread
	146	elif self.seekable:
	147	tell = self.fp.tell
	148	while 1:
	149	if tell:
	150	try:
	151	startofline = tell()
	152	except IOError:
	153	startofline = tell = None
	154	self.seekable = 0
	155	line = self.fp.readline()
	156	if not line:
	157	self.status = 'EOF in headers'
	158	break
	159	# Skip unix From name time lines
	160	if firstline and line.startswith('From '):
	161	self.unixfrom = self.unixfrom + line
	162	continue
	163	firstline = 0
	164	if headerseen and line[0] in ' \t':
	165	# It's a continuation line.
	166	lst.append(line)
	167	x = (self.dict[headerseen] + "\n " + line.strip())
	168	self.dict[headerseen] = x.strip()
	169	continue
	170	elif self.iscomment(line):
	171	# It's a comment. Ignore it.
	172	continue
	173	elif self.islast(line):
	174	# Note! No pushback here! The delimiter line gets eaten.
	175	break
	176	headerseen = self.isheader(line)
	177	if headerseen:
	178	# It's a legal header line, save it.
	179	lst.append(line)
	180	self.dict[headerseen] = line[len(headerseen)+1:].strip()
	181	continue
	182	else:
	183	# It's not a header line; throw it back and stop here.
	184	if not self.dict:
	185	self.status = 'No headers'
	186	else:
	187	self.status = 'Non-header line where header expected'
	188	# Try to undo the read.
	189	if unread:
	190	unread(line)
	191	elif tell:
	192	self.fp.seek(startofline)
	193	else:
	194	self.status = self.status + '; bad seek'
	195	break
	196
	197	def isheader(self, line):
	198	"""Determine whether a given line is a legal header.
	199
	200	This method should return the header name, suitably canonicalized.
	201	You may override this method in order to use Message parsing on tagged
	202	data in RFC 2822-like formats with special header formats.
	203	"""
	204	i = line.find(':')
	205	if i > 0:
	206	return line[:i].lower()
	207	return None
	208
	209	def islast(self, line):
	210	"""Determine whether a line is a legal end of RFC 2822 headers.
	211
	212	You may override this method if your application wants to bend the
	213	rules, e.g. to strip trailing whitespace, or to recognize MH template
	214	separators ('--------'). For convenience (e.g. for code reading from
[391]	215	sockets) a line consisting of \\r\\n also matches.
[2]	216	"""
	217	return line in _blanklines
	218
	219	def iscomment(self, line):
	220	"""Determine whether a line should be skipped entirely.
	221
	222	You may override this method in order to use Message parsing on tagged
	223	data in RFC 2822-like formats that support embedded comments or
	224	free-text data.
	225	"""
	226	return False
	227
	228	def getallmatchingheaders(self, name):
	229	"""Find all header lines matching a given header name.
	230
	231	Look through the list of headers and find all lines matching a given
	232	header name (and their continuation lines). A list of the lines is
	233	returned, without interpretation. If the header does not occur, an
	234	empty list is returned. If the header occurs multiple times, all
	235	occurrences are returned. Case is not important in the header name.
	236	"""
	237	name = name.lower() + ':'
	238	n = len(name)
	239	lst = []
	240	hit = 0
	241	for line in self.headers:
	242	if line[:n].lower() == name:
	243	hit = 1
	244	elif not line[:1].isspace():
	245	hit = 0
	246	if hit:
	247	lst.append(line)
	248	return lst
	249
	250	def getfirstmatchingheader(self, name):
	251	"""Get the first header line matching name.
	252
	253	This is similar to getallmatchingheaders, but it returns only the
	254	first matching header (and its continuation lines).
	255	"""
	256	name = name.lower() + ':'
	257	n = len(name)
	258	lst = []
	259	hit = 0
	260	for line in self.headers:
	261	if hit:
	262	if not line[:1].isspace():
	263	break
	264	elif line[:n].lower() == name:
	265	hit = 1
	266	if hit:
	267	lst.append(line)
	268	return lst
	269
	270	def getrawheader(self, name):
	271	"""A higher-level interface to getfirstmatchingheader().
	272
	273	Return a string containing the literal text of the header but with the
	274	keyword stripped. All leading, trailing and embedded whitespace is
	275	kept in the string, however. Return None if the header does not
	276	occur.
	277	"""
	278
	279	lst = self.getfirstmatchingheader(name)
	280	if not lst:
	281	return None
	282	lst[0] = lst[0][len(name) + 1:]
	283	return ''.join(lst)
	284
	285	def getheader(self, name, default=None):
	286	"""Get the header value for a name.
	287
	288	This is the normal interface: it returns a stripped version of the
	289	header value for a given header name, or None if it doesn't exist.
	290	This uses the dictionary version which finds the last such header.
	291	"""
	292	return self.dict.get(name.lower(), default)
	293	get = getheader
	294
	295	def getheaders(self, name):
	296	"""Get all values for a header.
	297
	298	This returns a list of values for headers given more than once; each
	299	value in the result list is stripped in the same way as the result of
	300	getheader(). If the header is not given, return an empty list.
	301	"""
	302	result = []
	303	current = ''
	304	have_header = 0
	305	for s in self.getallmatchingheaders(name):
	306	if s[0].isspace():
	307	if current:
	308	current = "%s\n %s" % (current, s.strip())
	309	else:
	310	current = s.strip()
	311	else:
	312	if have_header:
	313	result.append(current)
	314	current = s[s.find(":") + 1:].strip()
	315	have_header = 1
	316	if have_header:
	317	result.append(current)
	318	return result
	319
	320	def getaddr(self, name):
	321	"""Get a single address from a header, as a tuple.
	322
	323	An example return value:
	324	('Guido van Rossum', 'guido@cwi.nl')
	325	"""
	326	# New, by Ben Escoto
	327	alist = self.getaddrlist(name)
	328	if alist:
	329	return alist[0]
	330	else:
	331	return (None, None)
	332
	333	def getaddrlist(self, name):
	334	"""Get a list of addresses from a header.
	335
	336	Retrieves a list of addresses from a header, where each address is a
	337	tuple as returned by getaddr(). Scans all named headers, so it works
	338	properly with multiple To: or Cc: headers for example.
	339	"""
	340	raw = []
	341	for h in self.getallmatchingheaders(name):
	342	if h[0] in ' \t':
	343	raw.append(h)
	344	else:
	345	if raw:
	346	raw.append(', ')
	347	i = h.find(':')
	348	if i > 0:
	349	addr = h[i+1:]
	350	raw.append(addr)
	351	alladdrs = ''.join(raw)
	352	a = AddressList(alladdrs)
	353	return a.addresslist
	354
	355	def getdate(self, name):
	356	"""Retrieve a date field from a header.
	357
	358	Retrieves a date field from the named header, returning a tuple
	359	compatible with time.mktime().
	360	"""
	361	try:
	362	data = self[name]
	363	except KeyError:
	364	return None
	365	return parsedate(data)
	366
	367	def getdate_tz(self, name):
	368	"""Retrieve a date field from a header as a 10-tuple.
	369
	370	The first 9 elements make up a tuple compatible with time.mktime(),
	371	and the 10th is the offset of the poster's time zone from GMT/UTC.
	372	"""
	373	try:
	374	data = self[name]
	375	except KeyError:
	376	return None
	377	return parsedate_tz(data)
	378
	379
	380	# Access as a dictionary (only finds last header of each type):
	381
	382	def __len__(self):
	383	"""Get the number of headers in a message."""
	384	return len(self.dict)
	385
	386	def __getitem__(self, name):
	387	"""Get a specific header, as from a dictionary."""
	388	return self.dict[name.lower()]
	389
	390	def __setitem__(self, name, value):
	391	"""Set the value of a header.
	392
	393	Note: This is not a perfect inversion of __getitem__, because any
	394	changed headers get stuck at the end of the raw-headers list rather
	395	than where the altered header was.
	396	"""
	397	del self[name] # Won't fail if it doesn't exist
	398	self.dict[name.lower()] = value
	399	text = name + ": " + value
	400	for line in text.split("\n"):
	401	self.headers.append(line + "\n")
	402
	403	def __delitem__(self, name):
	404	"""Delete all occurrences of a specific header, if it is present."""
	405	name = name.lower()
	406	if not name in self.dict:
	407	return
	408	del self.dict[name]
	409	name = name + ':'
	410	n = len(name)
	411	lst = []
	412	hit = 0
	413	for i in range(len(self.headers)):
	414	line = self.headers[i]
	415	if line[:n].lower() == name:
	416	hit = 1
	417	elif not line[:1].isspace():
	418	hit = 0
	419	if hit:
	420	lst.append(i)
	421	for i in reversed(lst):
	422	del self.headers[i]
	423
	424	def setdefault(self, name, default=""):
	425	lowername = name.lower()
	426	if lowername in self.dict:
	427	return self.dict[lowername]
	428	else:
	429	text = name + ": " + default
	430	for line in text.split("\n"):
	431	self.headers.append(line + "\n")
	432	self.dict[lowername] = default
	433	return default
	434
	435	def has_key(self, name):
	436	"""Determine whether a message contains the named header."""
	437	return name.lower() in self.dict
	438
	439	def __contains__(self, name):
	440	"""Determine whether a message contains the named header."""
	441	return name.lower() in self.dict
	442
	443	def __iter__(self):
	444	return iter(self.dict)
	445
	446	def keys(self):
	447	"""Get all of a message's header field names."""
	448	return self.dict.keys()
	449
	450	def values(self):
	451	"""Get all of a message's header field values."""
	452	return self.dict.values()
	453
	454	def items(self):
	455	"""Get all of a message's headers.
	456
	457	Returns a list of name, value tuples.
	458	"""
	459	return self.dict.items()
	460
	461	def __str__(self):
	462	return ''.join(self.headers)
	463
	464
	465	# Utility functions
	466	# -----------------
	467
	468	# XXX Should fix unquote() and quote() to be really conformant.
	469	# XXX The inverses of the parse functions may also be useful.
	470
	471
	472	def unquote(s):
	473	"""Remove quotes from a string."""
	474	if len(s) > 1:
	475	if s.startswith('"') and s.endswith('"'):
	476	return s[1:-1].replace('\\\\', '\\').replace('\\"', '"')
	477	if s.startswith('<') and s.endswith('>'):
	478	return s[1:-1]
	479	return s
	480
	481
	482	def quote(s):
	483	"""Add quotes around a string."""
	484	return s.replace('\\', '\\\\').replace('"', '\\"')
	485
	486
	487	def parseaddr(address):
	488	"""Parse an address into a (realname, mailaddr) tuple."""
	489	a = AddressList(address)
	490	lst = a.addresslist
	491	if not lst:
	492	return (None, None)
	493	return lst[0]
	494
	495
	496	class AddrlistClass:
	497	"""Address parser class by Ben Escoto.
	498
	499	To understand what this class does, it helps to have a copy of
	500	RFC 2822 in front of you.
	501
	502	http://www.faqs.org/rfcs/rfc2822.html
	503
	504	Note: this class interface is deprecated and may be removed in the future.
	505	Use rfc822.AddressList instead.
	506	"""
	507
	508	def __init__(self, field):
	509	"""Initialize a new instance.
	510
	511	`field' is an unparsed address header field, containing one or more
	512	addresses.
	513	"""
	514	self.specials = '()<>@,:;.\"[]'
	515	self.pos = 0
	516	self.LWS = ' \t'
	517	self.CR = '\r\n'
	518	self.atomends = self.specials + self.LWS + self.CR
	519	# Note that RFC 2822 now specifies `.' as obs-phrase, meaning that it
	520	# is obsolete syntax. RFC 2822 requires that we recognize obsolete
	521	# syntax, so allow dots in phrases.
	522	self.phraseends = self.atomends.replace('.', '')
	523	self.field = field
	524	self.commentlist = []
	525
	526	def gotonext(self):
	527	"""Parse up to the start of the next address."""
	528	while self.pos < len(self.field):
	529	if self.field[self.pos] in self.LWS + '\n\r':
	530	self.pos = self.pos + 1
	531	elif self.field[self.pos] == '(':
	532	self.commentlist.append(self.getcomment())
	533	else: break
	534
	535	def getaddrlist(self):
	536	"""Parse all addresses.
	537
	538	Returns a list containing all of the addresses.
	539	"""
	540	result = []
	541	ad = self.getaddress()
	542	while ad:
	543	result += ad
	544	ad = self.getaddress()
	545	return result
	546
	547	def getaddress(self):
	548	"""Parse the next address."""
	549	self.commentlist = []
	550	self.gotonext()
	551
	552	oldpos = self.pos
	553	oldcl = self.commentlist
	554	plist = self.getphraselist()
	555
	556	self.gotonext()
	557	returnlist = []
	558
	559	if self.pos >= len(self.field):
	560	# Bad email address technically, no domain.
	561	if plist:
	562	returnlist = [(' '.join(self.commentlist), plist[0])]
	563
	564	elif self.field[self.pos] in '.@':
	565	# email address is just an addrspec
	566	# this isn't very efficient since we start over
	567	self.pos = oldpos
	568	self.commentlist = oldcl
	569	addrspec = self.getaddrspec()
	570	returnlist = [(' '.join(self.commentlist), addrspec)]
	571
	572	elif self.field[self.pos] == ':':
	573	# address is a group
	574	returnlist = []
	575
	576	fieldlen = len(self.field)
	577	self.pos += 1
	578	while self.pos < len(self.field):
	579	self.gotonext()
	580	if self.pos < fieldlen and self.field[self.pos] == ';':
	581	self.pos += 1
	582	break
	583	returnlist = returnlist + self.getaddress()
	584
	585	elif self.field[self.pos] == '<':
	586	# Address is a phrase then a route addr
	587	routeaddr = self.getrouteaddr()
	588
	589	if self.commentlist:
	590	returnlist = [(' '.join(plist) + ' (' + \
	591	' '.join(self.commentlist) + ')', routeaddr)]
	592	else: returnlist = [(' '.join(plist), routeaddr)]
	593
	594	else:
	595	if plist:
	596	returnlist = [(' '.join(self.commentlist), plist[0])]
	597	elif self.field[self.pos] in self.specials:
	598	self.pos += 1
	599
	600	self.gotonext()
	601	if self.pos < len(self.field) and self.field[self.pos] == ',':
	602	self.pos += 1
	603	return returnlist
	604
	605	def getrouteaddr(self):
	606	"""Parse a route address (Return-path value).
	607
	608	This method just skips all the route stuff and returns the addrspec.
	609	"""
	610	if self.field[self.pos] != '<':
	611	return
	612
	613	expectroute = 0
	614	self.pos += 1
	615	self.gotonext()
	616	adlist = ""
	617	while self.pos < len(self.field):
	618	if expectroute:
	619	self.getdomain()
	620	expectroute = 0
	621	elif self.field[self.pos] == '>':
	622	self.pos += 1
	623	break
	624	elif self.field[self.pos] == '@':
	625	self.pos += 1
	626	expectroute = 1
	627	elif self.field[self.pos] == ':':
	628	self.pos += 1
	629	else:
	630	adlist = self.getaddrspec()
	631	self.pos += 1
	632	break
	633	self.gotonext()
	634
	635	return adlist
	636
	637	def getaddrspec(self):
	638	"""Parse an RFC 2822 addr-spec."""
	639	aslist = []
	640
	641	self.gotonext()
	642	while self.pos < len(self.field):
	643	if self.field[self.pos] == '.':
	644	aslist.append('.')
	645	self.pos += 1
	646	elif self.field[self.pos] == '"':
	647	aslist.append('"%s"' % self.getquote())
	648	elif self.field[self.pos] in self.atomends:
	649	break
	650	else: aslist.append(self.getatom())
	651	self.gotonext()
	652
	653	if self.pos >= len(self.field) or self.field[self.pos] != '@':
	654	return ''.join(aslist)
	655
	656	aslist.append('@')
	657	self.pos += 1
	658	self.gotonext()
	659	return ''.join(aslist) + self.getdomain()
	660
	661	def getdomain(self):
	662	"""Get the complete domain name from an address."""
	663	sdlist = []
	664	while self.pos < len(self.field):
	665	if self.field[self.pos] in self.LWS:
	666	self.pos += 1
	667	elif self.field[self.pos] == '(':
	668	self.commentlist.append(self.getcomment())
	669	elif self.field[self.pos] == '[':
	670	sdlist.append(self.getdomainliteral())
	671	elif self.field[self.pos] == '.':
	672	self.pos += 1
	673	sdlist.append('.')
	674	elif self.field[self.pos] in self.atomends:
	675	break
	676	else: sdlist.append(self.getatom())
	677	return ''.join(sdlist)
	678
	679	def getdelimited(self, beginchar, endchars, allowcomments = 1):
	680	"""Parse a header fragment delimited by special characters.
	681
	682	`beginchar' is the start character for the fragment. If self is not
	683	looking at an instance of `beginchar' then getdelimited returns the
	684	empty string.
	685
	686	`endchars' is a sequence of allowable end-delimiting characters.
	687	Parsing stops when one of these is encountered.
	688
	689	If `allowcomments' is non-zero, embedded RFC 2822 comments are allowed
	690	within the parsed fragment.
	691	"""
	692	if self.field[self.pos] != beginchar:
	693	return ''
	694
	695	slist = ['']
	696	quote = 0
	697	self.pos += 1
	698	while self.pos < len(self.field):
	699	if quote == 1:
	700	slist.append(self.field[self.pos])
	701	quote = 0
	702	elif self.field[self.pos] in endchars:
	703	self.pos += 1
	704	break
	705	elif allowcomments and self.field[self.pos] == '(':
	706	slist.append(self.getcomment())
	707	continue # have already advanced pos from getcomment
	708	elif self.field[self.pos] == '\\':
	709	quote = 1
	710	else:
	711	slist.append(self.field[self.pos])
	712	self.pos += 1
	713
	714	return ''.join(slist)
	715
	716	def getquote(self):
	717	"""Get a quote-delimited fragment from self's field."""
	718	return self.getdelimited('"', '"\r', 0)
	719
	720	def getcomment(self):
	721	"""Get a parenthesis-delimited fragment from self's field."""
	722	return self.getdelimited('(', ')\r', 1)
	723
	724	def getdomainliteral(self):
	725	"""Parse an RFC 2822 domain-literal."""
	726	return '[%s]' % self.getdelimited('[', ']\r', 0)
	727
	728	def getatom(self, atomends=None):
	729	"""Parse an RFC 2822 atom.
	730
	731	Optional atomends specifies a different set of end token delimiters
	732	(the default is to use self.atomends). This is used e.g. in
	733	getphraselist() since phrase endings must not include the `.' (which
	734	is legal in phrases)."""
	735	atomlist = ['']
	736	if atomends is None:
	737	atomends = self.atomends
	738
	739	while self.pos < len(self.field):
	740	if self.field[self.pos] in atomends:
	741	break
	742	else: atomlist.append(self.field[self.pos])
	743	self.pos += 1
	744
	745	return ''.join(atomlist)
	746
	747	def getphraselist(self):
	748	"""Parse a sequence of RFC 2822 phrases.
	749
	750	A phrase is a sequence of words, which are in turn either RFC 2822
	751	atoms or quoted-strings. Phrases are canonicalized by squeezing all
	752	runs of continuous whitespace into one space.
	753	"""
	754	plist = []
	755
	756	while self.pos < len(self.field):
	757	if self.field[self.pos] in self.LWS:
	758	self.pos += 1
	759	elif self.field[self.pos] == '"':
	760	plist.append(self.getquote())
	761	elif self.field[self.pos] == '(':
	762	self.commentlist.append(self.getcomment())
	763	elif self.field[self.pos] in self.phraseends:
	764	break
	765	else:
	766	plist.append(self.getatom(self.phraseends))
	767
	768	return plist
	769
	770	class AddressList(AddrlistClass):
	771	"""An AddressList encapsulates a list of parsed RFC 2822 addresses."""
	772	def __init__(self, field):
	773	AddrlistClass.__init__(self, field)
	774	if field:
	775	self.addresslist = self.getaddrlist()
	776	else:
	777	self.addresslist = []
	778
	779	def __len__(self):
	780	return len(self.addresslist)
	781
	782	def __str__(self):
	783	return ", ".join(map(dump_address_pair, self.addresslist))
	784
	785	def __add__(self, other):
	786	# Set union
	787	newaddr = AddressList(None)
	788	newaddr.addresslist = self.addresslist[:]
	789	for x in other.addresslist:
	790	if not x in self.addresslist:
	791	newaddr.addresslist.append(x)
	792	return newaddr
	793
	794	def __iadd__(self, other):
	795	# Set union, in-place
	796	for x in other.addresslist:
	797	if not x in self.addresslist:
	798	self.addresslist.append(x)
	799	return self
	800
	801	def __sub__(self, other):
	802	# Set difference
	803	newaddr = AddressList(None)
	804	for x in self.addresslist:
	805	if not x in other.addresslist:
	806	newaddr.addresslist.append(x)
	807	return newaddr
	808
	809	def __isub__(self, other):
	810	# Set difference, in-place
	811	for x in other.addresslist:
	812	if x in self.addresslist:
	813	self.addresslist.remove(x)
	814	return self
	815
	816	def __getitem__(self, index):
	817	# Make indexing, slices, and 'in' work
	818	return self.addresslist[index]
	819
	820	def dump_address_pair(pair):
	821	"""Dump a (name, address) pair in a canonicalized form."""
	822	if pair[0]:
	823	return '"' + pair[0] + '" <' + pair[1] + '>'
	824	else:
	825	return pair[1]
	826
	827	# Parse a date field
	828
	829	_monthnames = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul',
	830	'aug', 'sep', 'oct', 'nov', 'dec',
	831	'january', 'february', 'march', 'april', 'may', 'june', 'july',
	832	'august', 'september', 'october', 'november', 'december']
	833	_daynames = ['mon', 'tue', 'wed', 'thu', 'fri', 'sat', 'sun']
	834
	835	# The timezone table does not include the military time zones defined
	836	# in RFC822, other than Z. According to RFC1123, the description in
	837	# RFC822 gets the signs wrong, so we can't rely on any such time
	838	# zones. RFC1123 recommends that numeric timezone indicators be used
	839	# instead of timezone names.
	840
	841	_timezones = {'UT':0, 'UTC':0, 'GMT':0, 'Z':0,
	842	'AST': -400, 'ADT': -300, # Atlantic (used in Canada)
	843	'EST': -500, 'EDT': -400, # Eastern
	844	'CST': -600, 'CDT': -500, # Central
	845	'MST': -700, 'MDT': -600, # Mountain
	846	'PST': -800, 'PDT': -700 # Pacific
	847	}
	848
	849
	850	def parsedate_tz(data):
	851	"""Convert a date string to a time tuple.
	852
	853	Accounts for military timezones.
	854	"""
	855	if not data:
	856	return None
	857	data = data.split()
	858	if data[0][-1] in (',', '.') or data[0].lower() in _daynames:
	859	# There's a dayname here. Skip it
	860	del data[0]
	861	else:
	862	# no space after the "weekday,"?
	863	i = data[0].rfind(',')
	864	if i >= 0:
	865	data[0] = data[0][i+1:]
	866	if len(data) == 3: # RFC 850 date, deprecated
	867	stuff = data[0].split('-')
	868	if len(stuff) == 3:
	869	data = stuff + data[1:]
	870	if len(data) == 4:
	871	s = data[3]
	872	i = s.find('+')
	873	if i > 0:
	874	data[3:] = [s[:i], s[i+1:]]
	875	else:
	876	data.append('') # Dummy tz
	877	if len(data) < 5:
	878	return None
	879	data = data[:5]
	880	[dd, mm, yy, tm, tz] = data
	881	mm = mm.lower()
	882	if not mm in _monthnames:
	883	dd, mm = mm, dd.lower()
	884	if not mm in _monthnames:
	885	return None
	886	mm = _monthnames.index(mm)+1
	887	if mm > 12: mm = mm - 12
	888	if dd[-1] == ',':
	889	dd = dd[:-1]
	890	i = yy.find(':')
	891	if i > 0:
	892	yy, tm = tm, yy
	893	if yy[-1] == ',':
	894	yy = yy[:-1]
	895	if not yy[0].isdigit():
	896	yy, tz = tz, yy
	897	if tm[-1] == ',':
	898	tm = tm[:-1]
	899	tm = tm.split(':')
	900	if len(tm) == 2:
	901	[thh, tmm] = tm
	902	tss = '0'
	903	elif len(tm) == 3:
	904	[thh, tmm, tss] = tm
	905	else:
	906	return None
	907	try:
	908	yy = int(yy)
	909	dd = int(dd)
	910	thh = int(thh)
	911	tmm = int(tmm)
	912	tss = int(tss)
	913	except ValueError:
	914	return None
	915	tzoffset = None
	916	tz = tz.upper()
	917	if tz in _timezones:
	918	tzoffset = _timezones[tz]
	919	else:
	920	try:
	921	tzoffset = int(tz)
	922	except ValueError:
	923	pass
	924	# Convert a timezone offset into seconds ; -0500 -> -18000
	925	if tzoffset:
	926	if tzoffset < 0:
	927	tzsign = -1
	928	tzoffset = -tzoffset
	929	else:
	930	tzsign = 1
	931	tzoffset = tzsign * ( (tzoffset//100)3600 + (tzoffset % 100)60)
	932	return (yy, mm, dd, thh, tmm, tss, 0, 1, 0, tzoffset)
	933
	934
	935	def parsedate(data):
	936	"""Convert a time string to a time tuple."""
	937	t = parsedate_tz(data)
	938	if t is None:
	939	return t
	940	return t[:9]
	941
	942
	943	def mktime_tz(data):
	944	"""Turn a 10-tuple as returned by parsedate_tz() into a UTC timestamp."""
	945	if data[9] is None:
	946	# No zone info, so localtime is better assumption than GMT
	947	return time.mktime(data[:8] + (-1,))
	948	else:
	949	t = time.mktime(data[:8] + (0,))
	950	return t - data[9] - time.timezone
	951
	952	def formatdate(timeval=None):
	953	"""Returns time format preferred for Internet standards.
	954
	955	Sun, 06 Nov 1994 08:49:37 GMT ; RFC 822, updated by RFC 1123
	956
	957	According to RFC 1123, day and month names must always be in
	958	English. If not for that, this code could use strftime(). It
	959	can't because strftime() honors the locale and could generated
	960	non-English names.
	961	"""
	962	if timeval is None:
	963	timeval = time.time()
	964	timeval = time.gmtime(timeval)
	965	return "%s, %02d %s %04d %02d:%02d:%02d GMT" % (
	966	("Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun")[timeval[6]],
	967	timeval[2],
	968	("Jan", "Feb", "Mar", "Apr", "May", "Jun",
	969	"Jul", "Aug", "Sep", "Oct", "Nov", "Dec")[timeval[1]-1],
	970	timeval[0], timeval[3], timeval[4], timeval[5])
	971
	972
	973	# When used as script, run a small test program.
	974	# The first command line argument must be a filename containing one
	975	# message in RFC-822 format.
	976
	977	if __name__ == '__main__':
	978	import sys, os
	979	file = os.path.join(os.environ['HOME'], 'Mail/inbox/1')
	980	if sys.argv[1:]: file = sys.argv[1]
	981	f = open(file, 'r')
	982	m = Message(f)
	983	print 'From:', m.getaddr('from')
	984	print 'To:', m.getaddrlist('to')
	985	print 'Subject:', m.getheader('subject')
	986	print 'Date:', m.getheader('date')
	987	date = m.getdate_tz('date')
	988	tz = date[-1]
	989	date = time.localtime(mktime_tz(date))
	990	if date:
	991	print 'ParsedDate:', time.asctime(date),
	992	hhmmss = tz
	993	hhmm, ss = divmod(hhmmss, 60)
	994	hh, mm = divmod(hhmm, 60)
	995	print "%+03d%02d" % (hh, mm),
	996	if ss: print ".%02d" % ss,
	997	print
	998	else:
	999	print 'ParsedDate:', None
	1000	m.rewindbody()
	1001	n = 0
	1002	while f.readline():
	1003	n += 1
	1004	print 'Lines:', n
	1005	print '-'*70
	1006	print 'len =', len(m)
	1007	if 'Date' in m: print 'Date =', m['Date']
	1008	if 'X-Nonsense' in m: pass
	1009	print 'keys =', m.keys()
	1010	print 'values =', m.values()
	1011	print 'items =', m.items()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/rfc822.py

Download in other formats: