Context Navigation

← Previous Revision
Next Revision →
Normal
Revision Log

urllib.py

Last change on this file was 1257, checked in by dmik, 8 years ago

python: Fix handling drive letters in urllib.url2pathname and pathname2url on OS/2.

It used to use Posix code path that would not recongize drive letters and treat the
path as non-absolute. Fixes Mozilla's python/mozbuild/mozpack/test/test_mozjar.py.

Property svn:eol-style set to native

File size: 57.4 KB

Rev	Line
[2]	1	"""Open an arbitrary URL.
	2
	3	See the following document for more info on URLs:
	4	"Names and Addresses, URIs, URLs, URNs, URCs", at
	5	http://www.w3.org/pub/WWW/Addressing/Overview.html
	6
	7	See also the HTTP spec (from which the error codes are derived):
	8	"HTTP - Hypertext Transfer Protocol", at
	9	http://www.w3.org/pub/WWW/Protocols/
	10
	11	Related standards and specs:
	12	- RFC1808: the "relative URL" spec. (authoritative status)
	13	- RFC1738 - the "URL standard". (authoritative status)
	14	- RFC1630 - the "URI spec". (informational status)
	15
	16	The object returned by URLopener().open(file) will differ per
	17	protocol. All you know is that is has methods read(), readline(),
	18	readlines(), fileno(), close() and info(). The read*(), fileno()
	19	and close() methods work like those of open files.
	20	The info() method returns a mimetools.Message object which can be
	21	used to query various info about the object, if available.
	22	(mimetools.Message objects are queried with the getheader() method.)
	23	"""
	24
	25	import string
	26	import socket
	27	import os
	28	import time
	29	import sys
[391]	30	import base64
	31	import re
	32
[2]	33	from urlparse import urljoin as basejoin
	34
	35	__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
	36	"urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
	37	"urlencode", "url2pathname", "pathname2url", "splittag",
	38	"localhost", "thishost", "ftperrors", "basejoin", "unwrap",
	39	"splittype", "splithost", "splituser", "splitpasswd", "splitport",
	40	"splitnport", "splitquery", "splitattr", "splitvalue",
	41	"getproxies"]
	42
	43	__version__ = '1.17' # XXX This version is not always updated :-(
	44
	45	MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
	46
	47	# Helper for non-unix systems
[391]	48	if os.name == 'nt':
[2]	49	from nturl2path import url2pathname, pathname2url
	50	elif os.name == 'riscos':
	51	from rourl2path import url2pathname, pathname2url
[1257]	52	elif os.name == 'os2':
	53	import nturl2path
	54	def url2pathname(pathname):
	55	"""OS-specific conversion from a relative URL of the 'file' scheme
	56	to a file system path; not recommended for general use."""
	57	# nturl2path only expects back slashes
	58	return nturl2path.url2pathname(pathname.replace('/', '\\'))
	59
	60	def pathname2url(pathname):
	61	"""OS-specific conversion from a file system path to a relative URL
	62	of the 'file' scheme; not recommended for general use."""
	63	# nturl2path only expects back slashes
	64	return nturl2path.pathname2url(pathname.replace('/', '\\'))
[2]	65	else:
	66	def url2pathname(pathname):
	67	"""OS-specific conversion from a relative URL of the 'file' scheme
	68	to a file system path; not recommended for general use."""
	69	return unquote(pathname)
	70
	71	def pathname2url(pathname):
	72	"""OS-specific conversion from a file system path to a relative URL
	73	of the 'file' scheme; not recommended for general use."""
	74	return quote(pathname)
	75
	76	# This really consists of two pieces:
	77	# (1) a class which handles opening of all sorts of URLs
	78	# (plus assorted utilities etc.)
	79	# (2) a set of functions for parsing URLs
	80	# XXX Should these be separated out into different modules?
	81
	82
	83	# Shortcut for basic usage
	84	_urlopener = None
	85	def urlopen(url, data=None, proxies=None):
	86	"""Create a file-like object for the specified URL to read from."""
	87	from warnings import warnpy3k
	88	warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
	89	"favor of urllib2.urlopen()", stacklevel=2)
	90
	91	global _urlopener
	92	if proxies is not None:
	93	opener = FancyURLopener(proxies=proxies)
	94	elif not _urlopener:
	95	opener = FancyURLopener()
	96	_urlopener = opener
	97	else:
	98	opener = _urlopener
	99	if data is None:
	100	return opener.open(url)
	101	else:
	102	return opener.open(url, data)
	103	def urlretrieve(url, filename=None, reporthook=None, data=None):
	104	global _urlopener
	105	if not _urlopener:
	106	_urlopener = FancyURLopener()
	107	return _urlopener.retrieve(url, filename, reporthook, data)
	108	def urlcleanup():
	109	if _urlopener:
	110	_urlopener.cleanup()
[391]	111	_safe_quoters.clear()
	112	ftpcache.clear()
[2]	113
	114	# check for SSL
	115	try:
	116	import ssl
	117	except:
	118	_have_ssl = False
	119	else:
	120	_have_ssl = True
	121
	122	# exception raised when downloaded size does not match content-length
	123	class ContentTooShortError(IOError):
	124	def __init__(self, message, content):
	125	IOError.__init__(self, message)
	126	self.content = content
	127
	128	ftpcache = {}
	129	class URLopener:
	130	"""Class to open URLs.
	131	This is a class rather than just a subroutine because we may need
	132	more than one set of global protocol-specific options.
	133	Note -- this is a base class for those who don't want the
	134	automatic handling of errors type 302 (relocated) and 401
	135	(authorization needed)."""
	136
	137	__tempfiles = None
	138
	139	version = "Python-urllib/%s" % __version__
	140
	141	# Constructor
	142	def __init__(self, proxies=None, **x509):
	143	if proxies is None:
	144	proxies = getproxies()
	145	assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
	146	self.proxies = proxies
	147	self.key_file = x509.get('key_file')
	148	self.cert_file = x509.get('cert_file')
	149	self.addheaders = [('User-Agent', self.version)]
	150	self.__tempfiles = []
	151	self.__unlink = os.unlink # See cleanup()
	152	self.tempcache = None
	153	# Undocumented feature: if you assign {} to tempcache,
	154	# it is used to cache files retrieved with
	155	# self.retrieve(). This is not enabled by default
	156	# since it does not work for changing documents (and I
	157	# haven't got the logic to check expiration headers
	158	# yet).
	159	self.ftpcache = ftpcache
	160	# Undocumented feature: you can use a different
	161	# ftp cache by assigning to the .ftpcache member;
	162	# in case you want logically independent URL openers
	163	# XXX This is not threadsafe. Bah.
	164
	165	def __del__(self):
	166	self.close()
	167
	168	def close(self):
	169	self.cleanup()
	170
	171	def cleanup(self):
	172	# This code sometimes runs when the rest of this module
	173	# has already been deleted, so it can't use any globals
	174	# or import anything.
	175	if self.__tempfiles:
	176	for file in self.__tempfiles:
	177	try:
	178	self.__unlink(file)
	179	except OSError:
	180	pass
	181	del self.__tempfiles[:]
	182	if self.tempcache:
	183	self.tempcache.clear()
	184
	185	def addheader(self, *args):
	186	"""Add a header to be used by the HTTP interface only
	187	e.g. u.addheader('Accept', 'sound/basic')"""
	188	self.addheaders.append(args)
	189
	190	# External interface
	191	def open(self, fullurl, data=None):
	192	"""Use URLopener().open(file) instead of open(file, 'r')."""
	193	fullurl = unwrap(toBytes(fullurl))
[391]	194	# percent encode url, fixing lame server errors for e.g, like space
	195	# within url paths.
[2]	196	fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]\|")
	197	if self.tempcache and fullurl in self.tempcache:
	198	filename, headers = self.tempcache[fullurl]
	199	fp = open(filename, 'rb')
	200	return addinfourl(fp, headers, fullurl)
	201	urltype, url = splittype(fullurl)
	202	if not urltype:
	203	urltype = 'file'
	204	if urltype in self.proxies:
	205	proxy = self.proxies[urltype]
	206	urltype, proxyhost = splittype(proxy)
	207	host, selector = splithost(proxyhost)
	208	url = (host, fullurl) # Signal special case to open_*()
	209	else:
	210	proxy = None
	211	name = 'open_' + urltype
	212	self.type = urltype
	213	name = name.replace('-', '_')
	214	if not hasattr(self, name):
	215	if proxy:
	216	return self.open_unknown_proxy(proxy, fullurl, data)
	217	else:
	218	return self.open_unknown(fullurl, data)
	219	try:
	220	if data is None:
	221	return getattr(self, name)(url)
	222	else:
	223	return getattr(self, name)(url, data)
	224	except socket.error, msg:
	225	raise IOError, ('socket error', msg), sys.exc_info()[2]
	226
	227	def open_unknown(self, fullurl, data=None):
	228	"""Overridable interface to open unknown URL type."""
	229	type, url = splittype(fullurl)
	230	raise IOError, ('url error', 'unknown url type', type)
	231
	232	def open_unknown_proxy(self, proxy, fullurl, data=None):
	233	"""Overridable interface to open unknown URL type."""
	234	type, url = splittype(fullurl)
	235	raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
	236
	237	# External interface
	238	def retrieve(self, url, filename=None, reporthook=None, data=None):
	239	"""retrieve(url) returns (filename, headers) for a local object
	240	or (tempfilename, headers) for a remote object."""
	241	url = unwrap(toBytes(url))
	242	if self.tempcache and url in self.tempcache:
	243	return self.tempcache[url]
	244	type, url1 = splittype(url)
	245	if filename is None and (not type or type == 'file'):
	246	try:
	247	fp = self.open_local_file(url1)
	248	hdrs = fp.info()
[391]	249	fp.close()
[2]	250	return url2pathname(splithost(url1)[1]), hdrs
[391]	251	except IOError:
[2]	252	pass
	253	fp = self.open(url, data)
	254	try:
	255	headers = fp.info()
	256	if filename:
	257	tfp = open(filename, 'wb')
	258	else:
	259	import tempfile
	260	garbage, path = splittype(url)
	261	garbage, path = splithost(path or "")
	262	path, garbage = splitquery(path or "")
	263	path, garbage = splitattr(path or "")
	264	suffix = os.path.splitext(path)[1]
	265	(fd, filename) = tempfile.mkstemp(suffix)
	266	self.__tempfiles.append(filename)
	267	tfp = os.fdopen(fd, 'wb')
	268	try:
	269	result = filename, headers
	270	if self.tempcache is not None:
	271	self.tempcache[url] = result
	272	bs = 1024*8
	273	size = -1
	274	read = 0
	275	blocknum = 0
[391]	276	if "content-length" in headers:
	277	size = int(headers["Content-Length"])
[2]	278	if reporthook:
	279	reporthook(blocknum, bs, size)
	280	while 1:
	281	block = fp.read(bs)
	282	if block == "":
	283	break
	284	read += len(block)
	285	tfp.write(block)
	286	blocknum += 1
	287	if reporthook:
	288	reporthook(blocknum, bs, size)
	289	finally:
	290	tfp.close()
	291	finally:
	292	fp.close()
	293
	294	# raise exception if actual size does not match content-length header
	295	if size >= 0 and read < size:
	296	raise ContentTooShortError("retrieval incomplete: got only %i out "
	297	"of %i bytes" % (read, size), result)
	298
	299	return result
	300
	301	# Each method named open_<type> knows how to open that type of URL
	302
	303	def open_http(self, url, data=None):
	304	"""Use HTTP protocol."""
	305	import httplib
	306	user_passwd = None
	307	proxy_passwd= None
	308	if isinstance(url, str):
	309	host, selector = splithost(url)
	310	if host:
	311	user_passwd, host = splituser(host)
	312	host = unquote(host)
	313	realhost = host
	314	else:
	315	host, selector = url
	316	# check whether the proxy contains authorization information
	317	proxy_passwd, host = splituser(host)
	318	# now we proceed with the url we want to obtain
	319	urltype, rest = splittype(selector)
	320	url = rest
	321	user_passwd = None
	322	if urltype.lower() != 'http':
	323	realhost = None
	324	else:
	325	realhost, rest = splithost(rest)
	326	if realhost:
	327	user_passwd, realhost = splituser(realhost)
	328	if user_passwd:
	329	selector = "%s://%s%s" % (urltype, realhost, rest)
	330	if proxy_bypass(realhost):
	331	host = realhost
	332
	333	#print "proxy via http:", host, selector
	334	if not host: raise IOError, ('http error', 'no host given')
	335
	336	if proxy_passwd:
[391]	337	proxy_passwd = unquote(proxy_passwd)
[2]	338	proxy_auth = base64.b64encode(proxy_passwd).strip()
	339	else:
	340	proxy_auth = None
	341
	342	if user_passwd:
[391]	343	user_passwd = unquote(user_passwd)
[2]	344	auth = base64.b64encode(user_passwd).strip()
	345	else:
	346	auth = None
	347	h = httplib.HTTP(host)
	348	if data is not None:
	349	h.putrequest('POST', selector)
	350	h.putheader('Content-Type', 'application/x-www-form-urlencoded')
	351	h.putheader('Content-Length', '%d' % len(data))
	352	else:
	353	h.putrequest('GET', selector)
	354	if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
	355	if auth: h.putheader('Authorization', 'Basic %s' % auth)
	356	if realhost: h.putheader('Host', realhost)
	357	for args in self.addheaders: h.putheader(*args)
[391]	358	h.endheaders(data)
[2]	359	errcode, errmsg, headers = h.getreply()
	360	fp = h.getfile()
	361	if errcode == -1:
	362	if fp: fp.close()
	363	# something went wrong with the HTTP status line
	364	raise IOError, ('http protocol error', 0,
	365	'got a bad status line', None)
	366	# According to RFC 2616, "2xx" code indicates that the client's
	367	# request was successfully received, understood, and accepted.
	368	if (200 <= errcode < 300):
	369	return addinfourl(fp, headers, "http:" + url, errcode)
	370	else:
	371	if data is None:
	372	return self.http_error(url, fp, errcode, errmsg, headers)
	373	else:
	374	return self.http_error(url, fp, errcode, errmsg, headers, data)
	375
	376	def http_error(self, url, fp, errcode, errmsg, headers, data=None):
	377	"""Handle http errors.
	378	Derived class can override this, or provide specific handlers
	379	named http_error_DDD where DDD is the 3-digit error code."""
	380	# First check if there's a specific handler for this error
	381	name = 'http_error_%d' % errcode
	382	if hasattr(self, name):
	383	method = getattr(self, name)
	384	if data is None:
	385	result = method(url, fp, errcode, errmsg, headers)
	386	else:
	387	result = method(url, fp, errcode, errmsg, headers, data)
	388	if result: return result
	389	return self.http_error_default(url, fp, errcode, errmsg, headers)
	390
	391	def http_error_default(self, url, fp, errcode, errmsg, headers):
	392	"""Default error handler: close the connection and raise IOError."""
	393	fp.close()
	394	raise IOError, ('http error', errcode, errmsg, headers)
	395
	396	if _have_ssl:
	397	def open_https(self, url, data=None):
	398	"""Use HTTPS protocol."""
	399
	400	import httplib
	401	user_passwd = None
	402	proxy_passwd = None
	403	if isinstance(url, str):
	404	host, selector = splithost(url)
	405	if host:
	406	user_passwd, host = splituser(host)
	407	host = unquote(host)
	408	realhost = host
	409	else:
	410	host, selector = url
	411	# here, we determine, whether the proxy contains authorization information
	412	proxy_passwd, host = splituser(host)
	413	urltype, rest = splittype(selector)
	414	url = rest
	415	user_passwd = None
	416	if urltype.lower() != 'https':
	417	realhost = None
	418	else:
	419	realhost, rest = splithost(rest)
	420	if realhost:
	421	user_passwd, realhost = splituser(realhost)
	422	if user_passwd:
	423	selector = "%s://%s%s" % (urltype, realhost, rest)
	424	#print "proxy via https:", host, selector
	425	if not host: raise IOError, ('https error', 'no host given')
	426	if proxy_passwd:
[391]	427	proxy_passwd = unquote(proxy_passwd)
[2]	428	proxy_auth = base64.b64encode(proxy_passwd).strip()
	429	else:
	430	proxy_auth = None
	431	if user_passwd:
[391]	432	user_passwd = unquote(user_passwd)
[2]	433	auth = base64.b64encode(user_passwd).strip()
	434	else:
	435	auth = None
	436	h = httplib.HTTPS(host, 0,
	437	key_file=self.key_file,
	438	cert_file=self.cert_file)
	439	if data is not None:
	440	h.putrequest('POST', selector)
	441	h.putheader('Content-Type',
	442	'application/x-www-form-urlencoded')
	443	h.putheader('Content-Length', '%d' % len(data))
	444	else:
	445	h.putrequest('GET', selector)
	446	if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
	447	if auth: h.putheader('Authorization', 'Basic %s' % auth)
	448	if realhost: h.putheader('Host', realhost)
	449	for args in self.addheaders: h.putheader(*args)
[391]	450	h.endheaders(data)
[2]	451	errcode, errmsg, headers = h.getreply()
	452	fp = h.getfile()
	453	if errcode == -1:
	454	if fp: fp.close()
	455	# something went wrong with the HTTP status line
	456	raise IOError, ('http protocol error', 0,
	457	'got a bad status line', None)
	458	# According to RFC 2616, "2xx" code indicates that the client's
	459	# request was successfully received, understood, and accepted.
	460	if (200 <= errcode < 300):
	461	return addinfourl(fp, headers, "https:" + url, errcode)
	462	else:
	463	if data is None:
	464	return self.http_error(url, fp, errcode, errmsg, headers)
	465	else:
	466	return self.http_error(url, fp, errcode, errmsg, headers,
	467	data)
	468
	469	def open_file(self, url):
	470	"""Use local file or FTP depending on form of URL."""
	471	if not isinstance(url, str):
	472	raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
	473	if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
	474	return self.open_ftp(url)
	475	else:
	476	return self.open_local_file(url)
	477
	478	def open_local_file(self, url):
	479	"""Use local file."""
	480	import mimetypes, mimetools, email.utils
	481	try:
	482	from cStringIO import StringIO
	483	except ImportError:
	484	from StringIO import StringIO
	485	host, file = splithost(url)
	486	localname = url2pathname(file)
	487	try:
	488	stats = os.stat(localname)
	489	except OSError, e:
	490	raise IOError(e.errno, e.strerror, e.filename)
	491	size = stats.st_size
	492	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
	493	mtype = mimetypes.guess_type(url)[0]
	494	headers = mimetools.Message(StringIO(
	495	'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
	496	(mtype or 'text/plain', size, modified)))
	497	if not host:
	498	urlfile = file
	499	if file[:1] == '/':
	500	urlfile = 'file://' + file
[391]	501	elif file[:2] == './':
	502	raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
[2]	503	return addinfourl(open(localname, 'rb'),
	504	headers, urlfile)
	505	host, port = splitport(host)
	506	if not port \
	507	and socket.gethostbyname(host) in (localhost(), thishost()):
	508	urlfile = file
	509	if file[:1] == '/':
	510	urlfile = 'file://' + file
	511	return addinfourl(open(localname, 'rb'),
	512	headers, urlfile)
	513	raise IOError, ('local file error', 'not on local host')
	514
	515	def open_ftp(self, url):
	516	"""Use FTP protocol."""
	517	if not isinstance(url, str):
	518	raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
	519	import mimetypes, mimetools
	520	try:
	521	from cStringIO import StringIO
	522	except ImportError:
	523	from StringIO import StringIO
	524	host, path = splithost(url)
	525	if not host: raise IOError, ('ftp error', 'no host given')
	526	host, port = splitport(host)
	527	user, host = splituser(host)
	528	if user: user, passwd = splitpasswd(user)
	529	else: passwd = None
	530	host = unquote(host)
[391]	531	user = user or ''
	532	passwd = passwd or ''
[2]	533	host = socket.gethostbyname(host)
	534	if not port:
	535	import ftplib
	536	port = ftplib.FTP_PORT
	537	else:
	538	port = int(port)
	539	path, attrs = splitattr(path)
	540	path = unquote(path)
	541	dirs = path.split('/')
	542	dirs, file = dirs[:-1], dirs[-1]
	543	if dirs and not dirs[0]: dirs = dirs[1:]
	544	if dirs and not dirs[0]: dirs[0] = '/'
	545	key = user, host, port, '/'.join(dirs)
	546	# XXX thread unsafe!
	547	if len(self.ftpcache) > MAXFTPCACHE:
	548	# Prune the cache, rather arbitrarily
	549	for k in self.ftpcache.keys():
	550	if k != key:
	551	v = self.ftpcache[k]
	552	del self.ftpcache[k]
	553	v.close()
	554	try:
	555	if not key in self.ftpcache:
	556	self.ftpcache[key] = \
	557	ftpwrapper(user, passwd, host, port, dirs)
	558	if not file: type = 'D'
	559	else: type = 'I'
	560	for attr in attrs:
	561	attr, value = splitvalue(attr)
	562	if attr.lower() == 'type' and \
	563	value in ('a', 'A', 'i', 'I', 'd', 'D'):
	564	type = value.upper()
	565	(fp, retrlen) = self.ftpcache[key].retrfile(file, type)
	566	mtype = mimetypes.guess_type("ftp:" + url)[0]
	567	headers = ""
	568	if mtype:
	569	headers += "Content-Type: %s\n" % mtype
	570	if retrlen is not None and retrlen >= 0:
	571	headers += "Content-Length: %d\n" % retrlen
	572	headers = mimetools.Message(StringIO(headers))
	573	return addinfourl(fp, headers, "ftp:" + url)
	574	except ftperrors(), msg:
	575	raise IOError, ('ftp error', msg), sys.exc_info()[2]
	576
	577	def open_data(self, url, data=None):
	578	"""Use "data" URL."""
	579	if not isinstance(url, str):
	580	raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
	581	# ignore POSTed data
	582	#
	583	# syntax of data URLs:
	584	# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
	585	# mediatype := [ type "/" subtype ] *( ";" parameter )
	586	# data := *urlchar
	587	# parameter := attribute "=" value
	588	import mimetools
	589	try:
	590	from cStringIO import StringIO
	591	except ImportError:
	592	from StringIO import StringIO
	593	try:
	594	[type, data] = url.split(',', 1)
	595	except ValueError:
	596	raise IOError, ('data error', 'bad data URL')
	597	if not type:
	598	type = 'text/plain;charset=US-ASCII'
	599	semi = type.rfind(';')
	600	if semi >= 0 and '=' not in type[semi:]:
	601	encoding = type[semi+1:]
	602	type = type[:semi]
	603	else:
	604	encoding = ''
	605	msg = []
[391]	606	msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
[2]	607	time.gmtime(time.time())))
	608	msg.append('Content-type: %s' % type)
	609	if encoding == 'base64':
	610	data = base64.decodestring(data)
	611	else:
	612	data = unquote(data)
	613	msg.append('Content-Length: %d' % len(data))
	614	msg.append('')
	615	msg.append(data)
	616	msg = '\n'.join(msg)
	617	f = StringIO(msg)
	618	headers = mimetools.Message(f, 0)
	619	#f.fileno = None # needed for addinfourl
	620	return addinfourl(f, headers, url)
	621
	622
	623	class FancyURLopener(URLopener):
	624	"""Derived class with handlers for errors we can handle (perhaps)."""
	625
	626	def __init__(self, args, *kwargs):
	627	URLopener.__init__(self, args, *kwargs)
	628	self.auth_cache = {}
	629	self.tries = 0
	630	self.maxtries = 10
	631
	632	def http_error_default(self, url, fp, errcode, errmsg, headers):
	633	"""Default error handling -- don't raise an exception."""
	634	return addinfourl(fp, headers, "http:" + url, errcode)
	635
	636	def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
	637	"""Error 302 -- relocated (temporarily)."""
	638	self.tries += 1
	639	if self.maxtries and self.tries >= self.maxtries:
	640	if hasattr(self, "http_error_500"):
	641	meth = self.http_error_500
	642	else:
	643	meth = self.http_error_default
	644	self.tries = 0
	645	return meth(url, fp, 500,
	646	"Internal Server Error: Redirect Recursion", headers)
	647	result = self.redirect_internal(url, fp, errcode, errmsg, headers,
	648	data)
	649	self.tries = 0
	650	return result
	651
	652	def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
	653	if 'location' in headers:
	654	newurl = headers['location']
	655	elif 'uri' in headers:
	656	newurl = headers['uri']
	657	else:
	658	return
	659	fp.close()
	660	# In case the server sent a relative URL, join with original:
	661	newurl = basejoin(self.type + ":" + url, newurl)
[391]	662
	663	# For security reasons we do not allow redirects to protocols
	664	# other than HTTP, HTTPS or FTP.
	665	newurl_lower = newurl.lower()
	666	if not (newurl_lower.startswith('http://') or
	667	newurl_lower.startswith('https://') or
	668	newurl_lower.startswith('ftp://')):
	669	raise IOError('redirect error', errcode,
	670	errmsg + " - Redirection to url '%s' is not allowed" %
	671	newurl,
	672	headers)
	673
[2]	674	return self.open(newurl)
	675
	676	def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
	677	"""Error 301 -- also relocated (permanently)."""
	678	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
	679
	680	def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
	681	"""Error 303 -- also relocated (essentially identical to 302)."""
	682	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
	683
	684	def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
	685	"""Error 307 -- relocated, but turn POST into error."""
	686	if data is None:
	687	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
	688	else:
	689	return self.http_error_default(url, fp, errcode, errmsg, headers)
	690
	691	def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
	692	"""Error 401 -- authentication required.
	693	This function supports Basic authentication only."""
	694	if not 'www-authenticate' in headers:
	695	URLopener.http_error_default(self, url, fp,
	696	errcode, errmsg, headers)
	697	stuff = headers['www-authenticate']
	698	import re
	699	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
	700	if not match:
	701	URLopener.http_error_default(self, url, fp,
	702	errcode, errmsg, headers)
	703	scheme, realm = match.groups()
	704	if scheme.lower() != 'basic':
	705	URLopener.http_error_default(self, url, fp,
	706	errcode, errmsg, headers)
	707	name = 'retry_' + self.type + '_basic_auth'
	708	if data is None:
	709	return getattr(self,name)(url, realm)
	710	else:
	711	return getattr(self,name)(url, realm, data)
	712
	713	def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
	714	"""Error 407 -- proxy authentication required.
	715	This function supports Basic authentication only."""
	716	if not 'proxy-authenticate' in headers:
	717	URLopener.http_error_default(self, url, fp,
	718	errcode, errmsg, headers)
	719	stuff = headers['proxy-authenticate']
	720	import re
	721	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
	722	if not match:
	723	URLopener.http_error_default(self, url, fp,
	724	errcode, errmsg, headers)
	725	scheme, realm = match.groups()
	726	if scheme.lower() != 'basic':
	727	URLopener.http_error_default(self, url, fp,
	728	errcode, errmsg, headers)
	729	name = 'retry_proxy_' + self.type + '_basic_auth'
	730	if data is None:
	731	return getattr(self,name)(url, realm)
	732	else:
	733	return getattr(self,name)(url, realm, data)
	734
	735	def retry_proxy_http_basic_auth(self, url, realm, data=None):
	736	host, selector = splithost(url)
	737	newurl = 'http://' + host + selector
	738	proxy = self.proxies['http']
	739	urltype, proxyhost = splittype(proxy)
	740	proxyhost, proxyselector = splithost(proxyhost)
	741	i = proxyhost.find('@') + 1
	742	proxyhost = proxyhost[i:]
	743	user, passwd = self.get_user_passwd(proxyhost, realm, i)
	744	if not (user or passwd): return None
	745	proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
	746	self.proxies['http'] = 'http://' + proxyhost + proxyselector
	747	if data is None:
	748	return self.open(newurl)
	749	else:
	750	return self.open(newurl, data)
	751
	752	def retry_proxy_https_basic_auth(self, url, realm, data=None):
	753	host, selector = splithost(url)
	754	newurl = 'https://' + host + selector
	755	proxy = self.proxies['https']
	756	urltype, proxyhost = splittype(proxy)
	757	proxyhost, proxyselector = splithost(proxyhost)
	758	i = proxyhost.find('@') + 1
	759	proxyhost = proxyhost[i:]
	760	user, passwd = self.get_user_passwd(proxyhost, realm, i)
	761	if not (user or passwd): return None
	762	proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
	763	self.proxies['https'] = 'https://' + proxyhost + proxyselector
	764	if data is None:
	765	return self.open(newurl)
	766	else:
	767	return self.open(newurl, data)
	768
	769	def retry_http_basic_auth(self, url, realm, data=None):
	770	host, selector = splithost(url)
	771	i = host.find('@') + 1
	772	host = host[i:]
	773	user, passwd = self.get_user_passwd(host, realm, i)
	774	if not (user or passwd): return None
	775	host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
	776	newurl = 'http://' + host + selector
	777	if data is None:
	778	return self.open(newurl)
	779	else:
	780	return self.open(newurl, data)
	781
	782	def retry_https_basic_auth(self, url, realm, data=None):
	783	host, selector = splithost(url)
	784	i = host.find('@') + 1
	785	host = host[i:]
	786	user, passwd = self.get_user_passwd(host, realm, i)
	787	if not (user or passwd): return None
	788	host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
	789	newurl = 'https://' + host + selector
	790	if data is None:
	791	return self.open(newurl)
	792	else:
	793	return self.open(newurl, data)
	794
[391]	795	def get_user_passwd(self, host, realm, clear_cache=0):
[2]	796	key = realm + '@' + host.lower()
	797	if key in self.auth_cache:
	798	if clear_cache:
	799	del self.auth_cache[key]
	800	else:
	801	return self.auth_cache[key]
	802	user, passwd = self.prompt_user_passwd(host, realm)
	803	if user or passwd: self.auth_cache[key] = (user, passwd)
	804	return user, passwd
	805
	806	def prompt_user_passwd(self, host, realm):
	807	"""Override this in a GUI environment!"""
	808	import getpass
	809	try:
	810	user = raw_input("Enter username for %s at %s: " % (realm,
	811	host))
	812	passwd = getpass.getpass("Enter password for %s in %s at %s: " %
	813	(user, realm, host))
	814	return user, passwd
	815	except KeyboardInterrupt:
	816	print
	817	return None, None
	818
	819
	820	# Utility functions
	821
	822	_localhost = None
	823	def localhost():
	824	"""Return the IP address of the magic hostname 'localhost'."""
	825	global _localhost
	826	if _localhost is None:
	827	_localhost = socket.gethostbyname('localhost')
	828	return _localhost
	829
	830	_thishost = None
	831	def thishost():
	832	"""Return the IP address of the current host."""
	833	global _thishost
	834	if _thishost is None:
[391]	835	try:
	836	_thishost = socket.gethostbyname(socket.gethostname())
	837	except socket.gaierror:
	838	_thishost = socket.gethostbyname('localhost')
[2]	839	return _thishost
	840
	841	_ftperrors = None
	842	def ftperrors():
	843	"""Return the set of errors raised by the FTP class."""
	844	global _ftperrors
	845	if _ftperrors is None:
	846	import ftplib
	847	_ftperrors = ftplib.all_errors
	848	return _ftperrors
	849
	850	_noheaders = None
	851	def noheaders():
	852	"""Return an empty mimetools.Message object."""
	853	global _noheaders
	854	if _noheaders is None:
	855	import mimetools
	856	try:
	857	from cStringIO import StringIO
	858	except ImportError:
	859	from StringIO import StringIO
	860	_noheaders = mimetools.Message(StringIO(), 0)
	861	_noheaders.fp.close() # Recycle file descriptor
	862	return _noheaders
	863
	864
	865	# Utility classes
	866
	867	class ftpwrapper:
	868	"""Class used by open_ftp() for cache of open FTP connections."""
	869
	870	def __init__(self, user, passwd, host, port, dirs,
[391]	871	timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
	872	persistent=True):
[2]	873	self.user = user
	874	self.passwd = passwd
	875	self.host = host
	876	self.port = port
	877	self.dirs = dirs
	878	self.timeout = timeout
[391]	879	self.refcount = 0
	880	self.keepalive = persistent
[2]	881	self.init()
	882
	883	def init(self):
	884	import ftplib
	885	self.busy = 0
	886	self.ftp = ftplib.FTP()
	887	self.ftp.connect(self.host, self.port, self.timeout)
	888	self.ftp.login(self.user, self.passwd)
[391]	889	_target = '/'.join(self.dirs)
	890	self.ftp.cwd(_target)
[2]	891
	892	def retrfile(self, file, type):
	893	import ftplib
	894	self.endtransfer()
	895	if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
	896	else: cmd = 'TYPE ' + type; isdir = 0
	897	try:
	898	self.ftp.voidcmd(cmd)
	899	except ftplib.all_errors:
	900	self.init()
	901	self.ftp.voidcmd(cmd)
	902	conn = None
	903	if file and not isdir:
	904	# Try to retrieve as a file
	905	try:
	906	cmd = 'RETR ' + file
[391]	907	conn, retrlen = self.ftp.ntransfercmd(cmd)
[2]	908	except ftplib.error_perm, reason:
	909	if str(reason)[:3] != '550':
	910	raise IOError, ('ftp error', reason), sys.exc_info()[2]
	911	if not conn:
	912	# Set transfer mode to ASCII!
	913	self.ftp.voidcmd('TYPE A')
	914	# Try a directory listing. Verify that directory exists.
	915	if file:
	916	pwd = self.ftp.pwd()
	917	try:
	918	try:
	919	self.ftp.cwd(file)
	920	except ftplib.error_perm, reason:
	921	raise IOError, ('ftp error', reason), sys.exc_info()[2]
	922	finally:
	923	self.ftp.cwd(pwd)
	924	cmd = 'LIST ' + file
	925	else:
	926	cmd = 'LIST'
[391]	927	conn, retrlen = self.ftp.ntransfercmd(cmd)
[2]	928	self.busy = 1
[391]	929	ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
	930	self.refcount += 1
	931	conn.close()
[2]	932	# Pass back both a suitably decorated object and a retrieval length
[391]	933	return (ftpobj, retrlen)
	934
[2]	935	def endtransfer(self):
	936	if not self.busy:
	937	return
	938	self.busy = 0
	939	try:
	940	self.ftp.voidresp()
	941	except ftperrors():
	942	pass
	943
	944	def close(self):
[391]	945	self.keepalive = False
	946	if self.refcount <= 0:
	947	self.real_close()
	948
	949	def file_close(self):
[2]	950	self.endtransfer()
[391]	951	self.refcount -= 1
	952	if self.refcount <= 0 and not self.keepalive:
	953	self.real_close()
	954
	955	def real_close(self):
	956	self.endtransfer()
[2]	957	try:
	958	self.ftp.close()
	959	except ftperrors():
	960	pass
	961
	962	class addbase:
	963	"""Base class for addinfo and addclosehook."""
	964
	965	def __init__(self, fp):
	966	self.fp = fp
	967	self.read = self.fp.read
	968	self.readline = self.fp.readline
	969	if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
	970	if hasattr(self.fp, "fileno"):
	971	self.fileno = self.fp.fileno
	972	else:
	973	self.fileno = lambda: None
	974	if hasattr(self.fp, "__iter__"):
	975	self.__iter__ = self.fp.__iter__
	976	if hasattr(self.fp, "next"):
	977	self.next = self.fp.next
	978
	979	def __repr__(self):
	980	return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
	981	id(self), self.fp)
	982
	983	def close(self):
	984	self.read = None
	985	self.readline = None
	986	self.readlines = None
	987	self.fileno = None
	988	if self.fp: self.fp.close()
	989	self.fp = None
	990
	991	class addclosehook(addbase):
	992	"""Class to add a close hook to an open file."""
	993
	994	def __init__(self, fp, closehook, *hookargs):
	995	addbase.__init__(self, fp)
	996	self.closehook = closehook
	997	self.hookargs = hookargs
	998
	999	def close(self):
	1000	if self.closehook:
	1001	self.closehook(*self.hookargs)
	1002	self.closehook = None
	1003	self.hookargs = None
[391]	1004	addbase.close(self)
[2]	1005
	1006	class addinfo(addbase):
	1007	"""class to add an info() method to an open file."""
	1008
	1009	def __init__(self, fp, headers):
	1010	addbase.__init__(self, fp)
	1011	self.headers = headers
	1012
	1013	def info(self):
	1014	return self.headers
	1015
	1016	class addinfourl(addbase):
	1017	"""class to add info() and geturl() methods to an open file."""
	1018
	1019	def __init__(self, fp, headers, url, code=None):
	1020	addbase.__init__(self, fp)
	1021	self.headers = headers
	1022	self.url = url
	1023	self.code = code
	1024
	1025	def info(self):
	1026	return self.headers
	1027
	1028	def getcode(self):
	1029	return self.code
	1030
	1031	def geturl(self):
	1032	return self.url
	1033
	1034
	1035	# Utilities to parse URLs (most of these return None for missing parts):
	1036	# unwrap('<URL:type://host/path>') --> 'type://host/path'
	1037	# splittype('type:opaquestring') --> 'type', 'opaquestring'
	1038	# splithost('//host[:port]/path') --> 'host[:port]', '/path'
	1039	# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
	1040	# splitpasswd('user:passwd') -> 'user', 'passwd'
	1041	# splitport('host:port') --> 'host', 'port'
	1042	# splitquery('/path?query') --> '/path', 'query'
	1043	# splittag('/path#tag') --> '/path', 'tag'
	1044	# splitattr('/path;attr1=value1;attr2=value2;...') ->
	1045	# '/path', ['attr1=value1', 'attr2=value2', ...]
	1046	# splitvalue('attr=value') --> 'attr', 'value'
	1047	# unquote('abc%20def') -> 'abc def'
	1048	# quote('abc def') -> 'abc%20def')
	1049
	1050	try:
	1051	unicode
	1052	except NameError:
	1053	def _is_unicode(x):
	1054	return 0
	1055	else:
	1056	def _is_unicode(x):
	1057	return isinstance(x, unicode)
	1058
	1059	def toBytes(url):
	1060	"""toBytes(u"URL") --> 'URL'."""
	1061	# Most URL schemes require ASCII. If that changes, the conversion
	1062	# can be relaxed
	1063	if _is_unicode(url):
	1064	try:
	1065	url = url.encode("ASCII")
	1066	except UnicodeError:
	1067	raise UnicodeError("URL " + repr(url) +
	1068	" contains non-ASCII characters")
	1069	return url
	1070
	1071	def unwrap(url):
	1072	"""unwrap('<URL:type://host/path>') --> 'type://host/path'."""
	1073	url = url.strip()
	1074	if url[:1] == '<' and url[-1:] == '>':
	1075	url = url[1:-1].strip()
	1076	if url[:4] == 'URL:': url = url[4:].strip()
	1077	return url
	1078
	1079	_typeprog = None
	1080	def splittype(url):
	1081	"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
	1082	global _typeprog
	1083	if _typeprog is None:
	1084	import re
	1085	_typeprog = re.compile('^([^/:]+):')
	1086
	1087	match = _typeprog.match(url)
	1088	if match:
	1089	scheme = match.group(1)
	1090	return scheme.lower(), url[len(scheme) + 1:]
	1091	return None, url
	1092
	1093	_hostprog = None
	1094	def splithost(url):
	1095	"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
	1096	global _hostprog
	1097	if _hostprog is None:
	1098	import re
	1099	_hostprog = re.compile('^//([^/?])(.)$')
	1100
	1101	match = _hostprog.match(url)
[391]	1102	if match:
	1103	host_port = match.group(1)
	1104	path = match.group(2)
	1105	if path and not path.startswith('/'):
	1106	path = '/' + path
	1107	return host_port, path
[2]	1108	return None, url
	1109
	1110	_userprog = None
	1111	def splituser(host):
	1112	"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
	1113	global _userprog
	1114	if _userprog is None:
	1115	import re
	1116	_userprog = re.compile('^(.)@(.)$')
	1117
	1118	match = _userprog.match(host)
[391]	1119	if match: return match.group(1, 2)
[2]	1120	return None, host
	1121
	1122	_passwdprog = None
	1123	def splitpasswd(user):
	1124	"""splitpasswd('user:passwd') -> 'user', 'passwd'."""
	1125	global _passwdprog
	1126	if _passwdprog is None:
	1127	import re
[391]	1128	_passwdprog = re.compile('^([^:]):(.)$',re.S)
[2]	1129
	1130	match = _passwdprog.match(user)
	1131	if match: return match.group(1, 2)
	1132	return user, None
	1133
	1134	# splittag('/path#tag') --> '/path', 'tag'
	1135	_portprog = None
	1136	def splitport(host):
	1137	"""splitport('host:port') --> 'host', 'port'."""
	1138	global _portprog
	1139	if _portprog is None:
	1140	import re
	1141	_portprog = re.compile('^(.*):([0-9]+)$')
	1142
	1143	match = _portprog.match(host)
	1144	if match: return match.group(1, 2)
	1145	return host, None
	1146
	1147	_nportprog = None
	1148	def splitnport(host, defport=-1):
	1149	"""Split host and port, returning numeric port.
	1150	Return given default port if no ':' found; defaults to -1.
	1151	Return numerical port if a valid number are found after ':'.
	1152	Return None if ':' but not a valid number."""
	1153	global _nportprog
	1154	if _nportprog is None:
	1155	import re
	1156	_nportprog = re.compile('^(.):(.)$')
	1157
	1158	match = _nportprog.match(host)
	1159	if match:
	1160	host, port = match.group(1, 2)
	1161	try:
	1162	if not port: raise ValueError, "no digits"
	1163	nport = int(port)
	1164	except ValueError:
	1165	nport = None
	1166	return host, nport
	1167	return host, defport
	1168
	1169	_queryprog = None
	1170	def splitquery(url):
	1171	"""splitquery('/path?query') --> '/path', 'query'."""
	1172	global _queryprog
	1173	if _queryprog is None:
	1174	import re
	1175	_queryprog = re.compile('^(.)\?([^?])$')
	1176
	1177	match = _queryprog.match(url)
	1178	if match: return match.group(1, 2)
	1179	return url, None
	1180
	1181	_tagprog = None
	1182	def splittag(url):
	1183	"""splittag('/path#tag') --> '/path', 'tag'."""
	1184	global _tagprog
	1185	if _tagprog is None:
	1186	import re
	1187	_tagprog = re.compile('^(.)#([^#])$')
	1188
	1189	match = _tagprog.match(url)
	1190	if match: return match.group(1, 2)
	1191	return url, None
	1192
	1193	def splitattr(url):
	1194	"""splitattr('/path;attr1=value1;attr2=value2;...') ->
	1195	'/path', ['attr1=value1', 'attr2=value2', ...]."""
	1196	words = url.split(';')
	1197	return words[0], words[1:]
	1198
	1199	_valueprog = None
	1200	def splitvalue(attr):
	1201	"""splitvalue('attr=value') --> 'attr', 'value'."""
	1202	global _valueprog
	1203	if _valueprog is None:
	1204	import re
	1205	_valueprog = re.compile('^([^=])=(.)$')
	1206
	1207	match = _valueprog.match(attr)
	1208	if match: return match.group(1, 2)
	1209	return attr, None
	1210
[391]	1211	# urlparse contains a duplicate of this method to avoid a circular import. If
	1212	# you update this method, also update the copy in urlparse. This code
	1213	# duplication does not exist in Python3.
[2]	1214
[391]	1215	_hexdig = '0123456789ABCDEFabcdef'
	1216	_hextochr = dict((a + b, chr(int(a + b, 16)))
	1217	for a in _hexdig for b in _hexdig)
	1218	_asciire = re.compile('([\x00-\x7f]+)')
	1219
[2]	1220	def unquote(s):
	1221	"""unquote('abc%20def') -> 'abc def'."""
[391]	1222	if _is_unicode(s):
	1223	if '%' not in s:
	1224	return s
	1225	bits = _asciire.split(s)
	1226	res = [bits[0]]
	1227	append = res.append
	1228	for i in range(1, len(bits), 2):
	1229	append(unquote(str(bits[i])).decode('latin1'))
	1230	append(bits[i + 1])
	1231	return ''.join(res)
	1232
	1233	bits = s.split('%')
	1234	# fastpath
	1235	if len(bits) == 1:
	1236	return s
	1237	res = [bits[0]]
	1238	append = res.append
	1239	for item in bits[1:]:
[2]	1240	try:
[391]	1241	append(_hextochr[item[:2]])
	1242	append(item[2:])
[2]	1243	except KeyError:
[391]	1244	append('%')
	1245	append(item)
	1246	return ''.join(res)
[2]	1247
	1248	def unquote_plus(s):
	1249	"""unquote('%7e/abc+def') -> '~/abc def'"""
	1250	s = s.replace('+', ' ')
	1251	return unquote(s)
	1252
	1253	always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
	1254	'abcdefghijklmnopqrstuvwxyz'
	1255	'0123456789' '_.-')
[391]	1256	_safe_map = {}
	1257	for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
	1258	_safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
	1259	_safe_quoters = {}
[2]	1260
[391]	1261	def quote(s, safe='/'):
[2]	1262	"""quote('abc def') -> 'abc%20def'
	1263
	1264	Each part of a URL, e.g. the path info, the query, etc., has a
	1265	different set of reserved characters that must be quoted.
	1266
	1267	RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
	1268	the following reserved characters.
	1269
	1270	reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \|
	1271	"$" \| ","
	1272
	1273	Each of these characters is reserved in some component of a URL,
	1274	but not necessarily in all of them.
	1275
	1276	By default, the quote function is intended for quoting the path
	1277	section of a URL. Thus, it will not encode '/'. This character
	1278	is reserved, but in typical usage the quote function is being
	1279	called on a path where the existing slash characters are used as
	1280	reserved characters.
	1281	"""
[391]	1282	# fastpath
	1283	if not s:
	1284	if s is None:
	1285	raise TypeError('None object cannot be quoted')
	1286	return s
[2]	1287	cachekey = (safe, always_safe)
	1288	try:
[391]	1289	(quoter, safe) = _safe_quoters[cachekey]
[2]	1290	except KeyError:
[391]	1291	safe_map = _safe_map.copy()
	1292	safe_map.update([(c, c) for c in safe])
	1293	quoter = safe_map.__getitem__
	1294	safe = always_safe + safe
	1295	_safe_quoters[cachekey] = (quoter, safe)
	1296	if not s.rstrip(safe):
	1297	return s
	1298	return ''.join(map(quoter, s))
[2]	1299
[391]	1300	def quote_plus(s, safe=''):
[2]	1301	"""Quote the query fragment of a URL; replacing ' ' with '+'"""
	1302	if ' ' in s:
	1303	s = quote(s, safe + ' ')
	1304	return s.replace(' ', '+')
	1305	return quote(s, safe)
	1306
[391]	1307	def urlencode(query, doseq=0):
[2]	1308	"""Encode a sequence of two-element tuples or dictionary into a URL query string.
	1309
	1310	If any values in the query arg are sequences and doseq is true, each
	1311	sequence element is converted to a separate parameter.
	1312
	1313	If the query arg is a sequence of two-element tuples, the order of the
	1314	parameters in the output will match the order of parameters in the
	1315	input.
	1316	"""
	1317
	1318	if hasattr(query,"items"):
	1319	# mapping objects
	1320	query = query.items()
	1321	else:
	1322	# it's a bother at times that strings and string-like objects are
	1323	# sequences...
	1324	try:
	1325	# non-sequence items should not work with len()
	1326	# non-empty strings will fail this
	1327	if len(query) and not isinstance(query[0], tuple):
	1328	raise TypeError
	1329	# zero-length sequences of all types will get here and succeed,
	1330	# but that's a minor nit - since the original implementation
	1331	# allowed empty dicts that type of behavior probably should be
	1332	# preserved for consistency
	1333	except TypeError:
	1334	ty,va,tb = sys.exc_info()
	1335	raise TypeError, "not a valid non-string sequence or mapping object", tb
	1336
	1337	l = []
	1338	if not doseq:
	1339	# preserve old behavior
	1340	for k, v in query:
	1341	k = quote_plus(str(k))
	1342	v = quote_plus(str(v))
	1343	l.append(k + '=' + v)
	1344	else:
	1345	for k, v in query:
	1346	k = quote_plus(str(k))
	1347	if isinstance(v, str):
	1348	v = quote_plus(v)
	1349	l.append(k + '=' + v)
	1350	elif _is_unicode(v):
	1351	# is there a reasonable way to convert to ASCII?
	1352	# encode generates a string, but "replace" or "ignore"
	1353	# lose information and "strict" can raise UnicodeError
	1354	v = quote_plus(v.encode("ASCII","replace"))
	1355	l.append(k + '=' + v)
	1356	else:
	1357	try:
	1358	# is this a sufficient test for sequence-ness?
[391]	1359	len(v)
[2]	1360	except TypeError:
	1361	# not a sequence
	1362	v = quote_plus(str(v))
	1363	l.append(k + '=' + v)
	1364	else:
	1365	# loop over the sequence
	1366	for elt in v:
	1367	l.append(k + '=' + quote_plus(str(elt)))
	1368	return '&'.join(l)
	1369
	1370	# Proxy handling
	1371	def getproxies_environment():
	1372	"""Return a dictionary of scheme -> proxy server URL mappings.
	1373
	1374	Scan the environment for variables named <scheme>_proxy;
	1375	this seems to be the standard convention. If you need a
	1376	different way, you can pass a proxies dictionary to the
	1377	[Fancy]URLopener constructor.
	1378
	1379	"""
	1380	proxies = {}
	1381	for name, value in os.environ.items():
	1382	name = name.lower()
	1383	if value and name[-6:] == '_proxy':
	1384	proxies[name[:-6]] = value
	1385	return proxies
	1386
	1387	def proxy_bypass_environment(host):
	1388	"""Test if proxies should not be used for a particular host.
	1389
	1390	Checks the environment for a variable named no_proxy, which should
	1391	be a list of DNS suffixes separated by commas, or '*' for all hosts.
	1392	"""
	1393	no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
	1394	# '*' is special case for always bypass
	1395	if no_proxy == '*':
	1396	return 1
	1397	# strip port off host
	1398	hostonly, port = splitport(host)
	1399	# check if the host ends with any of the DNS suffixes
[391]	1400	no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
	1401	for name in no_proxy_list:
[2]	1402	if name and (hostonly.endswith(name) or host.endswith(name)):
	1403	return 1
	1404	# otherwise, don't bypass
	1405	return 0
	1406
	1407
	1408	if sys.platform == 'darwin':
	1409	from _scproxy import _get_proxy_settings, _get_proxies
	1410
	1411	def proxy_bypass_macosx_sysconf(host):
	1412	"""
	1413	Return True iff this host shouldn't be accessed using a proxy
	1414
	1415	This function uses the MacOSX framework SystemConfiguration
	1416	to fetch the proxy information.
	1417	"""
	1418	import re
	1419	import socket
	1420	from fnmatch import fnmatch
	1421
	1422	hostonly, port = splitport(host)
	1423
	1424	def ip2num(ipAddr):
	1425	parts = ipAddr.split('.')
	1426	parts = map(int, parts)
	1427	if len(parts) != 4:
	1428	parts = (parts + [0, 0, 0, 0])[:4]
	1429	return (parts[0] << 24) \| (parts[1] << 16) \| (parts[2] << 8) \| parts[3]
	1430
	1431	proxy_settings = _get_proxy_settings()
	1432
	1433	# Check for simple host names:
	1434	if '.' not in host:
	1435	if proxy_settings['exclude_simple']:
	1436	return True
	1437
	1438	hostIP = None
	1439
	1440	for value in proxy_settings.get('exceptions', ()):
	1441	# Items in the list are strings like these: *.local, 169.254/16
	1442	if not value: continue
	1443
	1444	m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
	1445	if m is not None:
	1446	if hostIP is None:
	1447	try:
	1448	hostIP = socket.gethostbyname(hostonly)
	1449	hostIP = ip2num(hostIP)
	1450	except socket.error:
	1451	continue
	1452
	1453	base = ip2num(m.group(1))
[391]	1454	mask = m.group(2)
	1455	if mask is None:
	1456	mask = 8 * (m.group(1).count('.') + 1)
	1457
	1458	else:
	1459	mask = int(mask[1:])
[2]	1460	mask = 32 - mask
	1461
	1462	if (hostIP >> mask) == (base >> mask):
	1463	return True
	1464
	1465	elif fnmatch(host, value):
	1466	return True
	1467
	1468	return False
	1469
	1470	def getproxies_macosx_sysconf():
	1471	"""Return a dictionary of scheme -> proxy server URL mappings.
	1472
	1473	This function uses the MacOSX framework SystemConfiguration
	1474	to fetch the proxy information.
	1475	"""
	1476	return _get_proxies()
	1477
	1478	def proxy_bypass(host):
	1479	if getproxies_environment():
	1480	return proxy_bypass_environment(host)
	1481	else:
	1482	return proxy_bypass_macosx_sysconf(host)
	1483
	1484	def getproxies():
	1485	return getproxies_environment() or getproxies_macosx_sysconf()
	1486
	1487	elif os.name == 'nt':
	1488	def getproxies_registry():
	1489	"""Return a dictionary of scheme -> proxy server URL mappings.
	1490
	1491	Win32 uses the registry to store proxies.
	1492
	1493	"""
	1494	proxies = {}
	1495	try:
	1496	import _winreg
	1497	except ImportError:
	1498	# Std module, so should be around - but you never know!
	1499	return proxies
	1500	try:
	1501	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
	1502	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
	1503	proxyEnable = _winreg.QueryValueEx(internetSettings,
	1504	'ProxyEnable')[0]
	1505	if proxyEnable:
	1506	# Returned as Unicode but problems if not converted to ASCII
	1507	proxyServer = str(_winreg.QueryValueEx(internetSettings,
	1508	'ProxyServer')[0])
	1509	if '=' in proxyServer:
	1510	# Per-protocol settings
	1511	for p in proxyServer.split(';'):
	1512	protocol, address = p.split('=', 1)
	1513	# See if address has a type:// prefix
	1514	import re
	1515	if not re.match('^([^/:]+)://', address):
	1516	address = '%s://%s' % (protocol, address)
	1517	proxies[protocol] = address
	1518	else:
	1519	# Use one setting for all protocols
	1520	if proxyServer[:5] == 'http:':
	1521	proxies['http'] = proxyServer
	1522	else:
	1523	proxies['http'] = 'http://%s' % proxyServer
[391]	1524	proxies['https'] = 'https://%s' % proxyServer
[2]	1525	proxies['ftp'] = 'ftp://%s' % proxyServer
	1526	internetSettings.Close()
	1527	except (WindowsError, ValueError, TypeError):
	1528	# Either registry key not found etc, or the value in an
	1529	# unexpected format.
	1530	# proxies already set up to be empty so nothing to do
	1531	pass
	1532	return proxies
	1533
	1534	def getproxies():
	1535	"""Return a dictionary of scheme -> proxy server URL mappings.
	1536
	1537	Returns settings gathered from the environment, if specified,
	1538	or the registry.
	1539
	1540	"""
	1541	return getproxies_environment() or getproxies_registry()
	1542
	1543	def proxy_bypass_registry(host):
	1544	try:
	1545	import _winreg
	1546	import re
	1547	except ImportError:
	1548	# Std modules, so should be around - but you never know!
	1549	return 0
	1550	try:
	1551	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
	1552	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
	1553	proxyEnable = _winreg.QueryValueEx(internetSettings,
	1554	'ProxyEnable')[0]
	1555	proxyOverride = str(_winreg.QueryValueEx(internetSettings,
	1556	'ProxyOverride')[0])
	1557	# ^^^^ Returned as Unicode but problems if not converted to ASCII
	1558	except WindowsError:
	1559	return 0
	1560	if not proxyEnable or not proxyOverride:
	1561	return 0
	1562	# try to make a host list from name and IP address.
	1563	rawHost, port = splitport(host)
	1564	host = [rawHost]
	1565	try:
	1566	addr = socket.gethostbyname(rawHost)
	1567	if addr != rawHost:
	1568	host.append(addr)
	1569	except socket.error:
	1570	pass
	1571	try:
	1572	fqdn = socket.getfqdn(rawHost)
	1573	if fqdn != rawHost:
	1574	host.append(fqdn)
	1575	except socket.error:
	1576	pass
	1577	# make a check value list from the registry entry: replace the
	1578	# '<local>' string by the localhost entry and the corresponding
	1579	# canonical entry.
	1580	proxyOverride = proxyOverride.split(';')
	1581	# now check if we match one of the registry values.
	1582	for test in proxyOverride:
[391]	1583	if test == '<local>':
	1584	if '.' not in rawHost:
	1585	return 1
[2]	1586	test = test.replace(".", r"\.") # mask dots
	1587	test = test.replace("", r".") # change glob sequence
	1588	test = test.replace("?", r".") # change glob char
	1589	for val in host:
	1590	# print "%s <--> %s" %( test, val )
	1591	if re.match(test, val, re.I):
	1592	return 1
	1593	return 0
	1594
	1595	def proxy_bypass(host):
	1596	"""Return a dictionary of scheme -> proxy server URL mappings.
	1597
	1598	Returns settings gathered from the environment, if specified,
	1599	or the registry.
	1600
	1601	"""
	1602	if getproxies_environment():
	1603	return proxy_bypass_environment(host)
	1604	else:
	1605	return proxy_bypass_registry(host)
	1606
	1607	else:
	1608	# By default use environment variables
	1609	getproxies = getproxies_environment
	1610	proxy_bypass = proxy_bypass_environment
	1611
	1612	# Test and time quote() and unquote()
	1613	def test1():
	1614	s = ''
	1615	for i in range(256): s = s + chr(i)
	1616	s = s*4
	1617	t0 = time.time()
	1618	qs = quote(s)
	1619	uqs = unquote(qs)
	1620	t1 = time.time()
	1621	if uqs != s:
	1622	print 'Wrong!'
	1623	print repr(s)
	1624	print repr(qs)
	1625	print repr(uqs)
	1626	print round(t1 - t0, 3), 'sec'
	1627
	1628
	1629	def reporthook(blocknum, blocksize, totalsize):
	1630	# Report during remote transfers
	1631	print "Block number: %d, Block size: %d, Total size: %d" % (
	1632	blocknum, blocksize, totalsize)

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/urllib.py

Download in other formats: