Context Navigation

urllib.py@ 1257

Last change on this file since 1257 was 1257, checked in by dmik, 8 years ago

python: Fix handling drive letters in urllib.url2pathname and pathname2url on OS/2.

It used to use Posix code path that would not recongize drive letters and treat the
path as non-absolute. Fixes Mozilla's python/mozbuild/mozpack/test/test_mozjar.py.

Property svn:eol-style set to native

File size: 57.4 KB

Line
1	"""Open an arbitrary URL.
2
3	See the following document for more info on URLs:
4	"Names and Addresses, URIs, URLs, URNs, URCs", at
5	http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7	See also the HTTP spec (from which the error codes are derived):
8	"HTTP - Hypertext Transfer Protocol", at
9	http://www.w3.org/pub/WWW/Protocols/
10
11	Related standards and specs:
12	- RFC1808: the "relative URL" spec. (authoritative status)
13	- RFC1738 - the "URL standard". (authoritative status)
14	- RFC1630 - the "URI spec". (informational status)
15
16	The object returned by URLopener().open(file) will differ per
17	protocol. All you know is that is has methods read(), readline(),
18	readlines(), fileno(), close() and info(). The read*(), fileno()
19	and close() methods work like those of open files.
20	The info() method returns a mimetools.Message object which can be
21	used to query various info about the object, if available.
22	(mimetools.Message objects are queried with the getheader() method.)
23	"""
24
25	import string
26	import socket
27	import os
28	import time
29	import sys
30	import base64
31	import re
32
33	from urlparse import urljoin as basejoin
34
35	__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
36	"urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
37	"urlencode", "url2pathname", "pathname2url", "splittag",
38	"localhost", "thishost", "ftperrors", "basejoin", "unwrap",
39	"splittype", "splithost", "splituser", "splitpasswd", "splitport",
40	"splitnport", "splitquery", "splitattr", "splitvalue",
41	"getproxies"]
42
43	__version__ = '1.17' # XXX This version is not always updated :-(
44
45	MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
46
47	# Helper for non-unix systems
48	if os.name == 'nt':
49	from nturl2path import url2pathname, pathname2url
50	elif os.name == 'riscos':
51	from rourl2path import url2pathname, pathname2url
52	elif os.name == 'os2':
53	import nturl2path
54	def url2pathname(pathname):
55	"""OS-specific conversion from a relative URL of the 'file' scheme
56	to a file system path; not recommended for general use."""
57	# nturl2path only expects back slashes
58	return nturl2path.url2pathname(pathname.replace('/', '\\'))
59
60	def pathname2url(pathname):
61	"""OS-specific conversion from a file system path to a relative URL
62	of the 'file' scheme; not recommended for general use."""
63	# nturl2path only expects back slashes
64	return nturl2path.pathname2url(pathname.replace('/', '\\'))
65	else:
66	def url2pathname(pathname):
67	"""OS-specific conversion from a relative URL of the 'file' scheme
68	to a file system path; not recommended for general use."""
69	return unquote(pathname)
70
71	def pathname2url(pathname):
72	"""OS-specific conversion from a file system path to a relative URL
73	of the 'file' scheme; not recommended for general use."""
74	return quote(pathname)
75
76	# This really consists of two pieces:
77	# (1) a class which handles opening of all sorts of URLs
78	# (plus assorted utilities etc.)
79	# (2) a set of functions for parsing URLs
80	# XXX Should these be separated out into different modules?
81
82
83	# Shortcut for basic usage
84	_urlopener = None
85	def urlopen(url, data=None, proxies=None):
86	"""Create a file-like object for the specified URL to read from."""
87	from warnings import warnpy3k
88	warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
89	"favor of urllib2.urlopen()", stacklevel=2)
90
91	global _urlopener
92	if proxies is not None:
93	opener = FancyURLopener(proxies=proxies)
94	elif not _urlopener:
95	opener = FancyURLopener()
96	_urlopener = opener
97	else:
98	opener = _urlopener
99	if data is None:
100	return opener.open(url)
101	else:
102	return opener.open(url, data)
103	def urlretrieve(url, filename=None, reporthook=None, data=None):
104	global _urlopener
105	if not _urlopener:
106	_urlopener = FancyURLopener()
107	return _urlopener.retrieve(url, filename, reporthook, data)
108	def urlcleanup():
109	if _urlopener:
110	_urlopener.cleanup()
111	_safe_quoters.clear()
112	ftpcache.clear()
113
114	# check for SSL
115	try:
116	import ssl
117	except:
118	_have_ssl = False
119	else:
120	_have_ssl = True
121
122	# exception raised when downloaded size does not match content-length
123	class ContentTooShortError(IOError):
124	def __init__(self, message, content):
125	IOError.__init__(self, message)
126	self.content = content
127
128	ftpcache = {}
129	class URLopener:
130	"""Class to open URLs.
131	This is a class rather than just a subroutine because we may need
132	more than one set of global protocol-specific options.
133	Note -- this is a base class for those who don't want the
134	automatic handling of errors type 302 (relocated) and 401
135	(authorization needed)."""
136
137	__tempfiles = None
138
139	version = "Python-urllib/%s" % __version__
140
141	# Constructor
142	def __init__(self, proxies=None, **x509):
143	if proxies is None:
144	proxies = getproxies()
145	assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
146	self.proxies = proxies
147	self.key_file = x509.get('key_file')
148	self.cert_file = x509.get('cert_file')
149	self.addheaders = [('User-Agent', self.version)]
150	self.__tempfiles = []
151	self.__unlink = os.unlink # See cleanup()
152	self.tempcache = None
153	# Undocumented feature: if you assign {} to tempcache,
154	# it is used to cache files retrieved with
155	# self.retrieve(). This is not enabled by default
156	# since it does not work for changing documents (and I
157	# haven't got the logic to check expiration headers
158	# yet).
159	self.ftpcache = ftpcache
160	# Undocumented feature: you can use a different
161	# ftp cache by assigning to the .ftpcache member;
162	# in case you want logically independent URL openers
163	# XXX This is not threadsafe. Bah.
164
165	def __del__(self):
166	self.close()
167
168	def close(self):
169	self.cleanup()
170
171	def cleanup(self):
172	# This code sometimes runs when the rest of this module
173	# has already been deleted, so it can't use any globals
174	# or import anything.
175	if self.__tempfiles:
176	for file in self.__tempfiles:
177	try:
178	self.__unlink(file)
179	except OSError:
180	pass
181	del self.__tempfiles[:]
182	if self.tempcache:
183	self.tempcache.clear()
184
185	def addheader(self, *args):
186	"""Add a header to be used by the HTTP interface only
187	e.g. u.addheader('Accept', 'sound/basic')"""
188	self.addheaders.append(args)
189
190	# External interface
191	def open(self, fullurl, data=None):
192	"""Use URLopener().open(file) instead of open(file, 'r')."""
193	fullurl = unwrap(toBytes(fullurl))
194	# percent encode url, fixing lame server errors for e.g, like space
195	# within url paths.
196	fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]\|")
197	if self.tempcache and fullurl in self.tempcache:
198	filename, headers = self.tempcache[fullurl]
199	fp = open(filename, 'rb')
200	return addinfourl(fp, headers, fullurl)
201	urltype, url = splittype(fullurl)
202	if not urltype:
203	urltype = 'file'
204	if urltype in self.proxies:
205	proxy = self.proxies[urltype]
206	urltype, proxyhost = splittype(proxy)
207	host, selector = splithost(proxyhost)
208	url = (host, fullurl) # Signal special case to open_*()
209	else:
210	proxy = None
211	name = 'open_' + urltype
212	self.type = urltype
213	name = name.replace('-', '_')
214	if not hasattr(self, name):
215	if proxy:
216	return self.open_unknown_proxy(proxy, fullurl, data)
217	else:
218	return self.open_unknown(fullurl, data)
219	try:
220	if data is None:
221	return getattr(self, name)(url)
222	else:
223	return getattr(self, name)(url, data)
224	except socket.error, msg:
225	raise IOError, ('socket error', msg), sys.exc_info()[2]
226
227	def open_unknown(self, fullurl, data=None):
228	"""Overridable interface to open unknown URL type."""
229	type, url = splittype(fullurl)
230	raise IOError, ('url error', 'unknown url type', type)
231
232	def open_unknown_proxy(self, proxy, fullurl, data=None):
233	"""Overridable interface to open unknown URL type."""
234	type, url = splittype(fullurl)
235	raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
236
237	# External interface
238	def retrieve(self, url, filename=None, reporthook=None, data=None):
239	"""retrieve(url) returns (filename, headers) for a local object
240	or (tempfilename, headers) for a remote object."""
241	url = unwrap(toBytes(url))
242	if self.tempcache and url in self.tempcache:
243	return self.tempcache[url]
244	type, url1 = splittype(url)
245	if filename is None and (not type or type == 'file'):
246	try:
247	fp = self.open_local_file(url1)
248	hdrs = fp.info()
249	fp.close()
250	return url2pathname(splithost(url1)[1]), hdrs
251	except IOError:
252	pass
253	fp = self.open(url, data)
254	try:
255	headers = fp.info()
256	if filename:
257	tfp = open(filename, 'wb')
258	else:
259	import tempfile
260	garbage, path = splittype(url)
261	garbage, path = splithost(path or "")
262	path, garbage = splitquery(path or "")
263	path, garbage = splitattr(path or "")
264	suffix = os.path.splitext(path)[1]
265	(fd, filename) = tempfile.mkstemp(suffix)
266	self.__tempfiles.append(filename)
267	tfp = os.fdopen(fd, 'wb')
268	try:
269	result = filename, headers
270	if self.tempcache is not None:
271	self.tempcache[url] = result
272	bs = 1024*8
273	size = -1
274	read = 0
275	blocknum = 0
276	if "content-length" in headers:
277	size = int(headers["Content-Length"])
278	if reporthook:
279	reporthook(blocknum, bs, size)
280	while 1:
281	block = fp.read(bs)
282	if block == "":
283	break
284	read += len(block)
285	tfp.write(block)
286	blocknum += 1
287	if reporthook:
288	reporthook(blocknum, bs, size)
289	finally:
290	tfp.close()
291	finally:
292	fp.close()
293
294	# raise exception if actual size does not match content-length header
295	if size >= 0 and read < size:
296	raise ContentTooShortError("retrieval incomplete: got only %i out "
297	"of %i bytes" % (read, size), result)
298
299	return result
300
301	# Each method named open_<type> knows how to open that type of URL
302
303	def open_http(self, url, data=None):
304	"""Use HTTP protocol."""
305	import httplib
306	user_passwd = None
307	proxy_passwd= None
308	if isinstance(url, str):
309	host, selector = splithost(url)
310	if host:
311	user_passwd, host = splituser(host)
312	host = unquote(host)
313	realhost = host
314	else:
315	host, selector = url
316	# check whether the proxy contains authorization information
317	proxy_passwd, host = splituser(host)
318	# now we proceed with the url we want to obtain
319	urltype, rest = splittype(selector)
320	url = rest
321	user_passwd = None
322	if urltype.lower() != 'http':
323	realhost = None
324	else:
325	realhost, rest = splithost(rest)
326	if realhost:
327	user_passwd, realhost = splituser(realhost)
328	if user_passwd:
329	selector = "%s://%s%s" % (urltype, realhost, rest)
330	if proxy_bypass(realhost):
331	host = realhost
332
333	#print "proxy via http:", host, selector
334	if not host: raise IOError, ('http error', 'no host given')
335
336	if proxy_passwd:
337	proxy_passwd = unquote(proxy_passwd)
338	proxy_auth = base64.b64encode(proxy_passwd).strip()
339	else:
340	proxy_auth = None
341
342	if user_passwd:
343	user_passwd = unquote(user_passwd)
344	auth = base64.b64encode(user_passwd).strip()
345	else:
346	auth = None
347	h = httplib.HTTP(host)
348	if data is not None:
349	h.putrequest('POST', selector)
350	h.putheader('Content-Type', 'application/x-www-form-urlencoded')
351	h.putheader('Content-Length', '%d' % len(data))
352	else:
353	h.putrequest('GET', selector)
354	if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
355	if auth: h.putheader('Authorization', 'Basic %s' % auth)
356	if realhost: h.putheader('Host', realhost)
357	for args in self.addheaders: h.putheader(*args)
358	h.endheaders(data)
359	errcode, errmsg, headers = h.getreply()
360	fp = h.getfile()
361	if errcode == -1:
362	if fp: fp.close()
363	# something went wrong with the HTTP status line
364	raise IOError, ('http protocol error', 0,
365	'got a bad status line', None)
366	# According to RFC 2616, "2xx" code indicates that the client's
367	# request was successfully received, understood, and accepted.
368	if (200 <= errcode < 300):
369	return addinfourl(fp, headers, "http:" + url, errcode)
370	else:
371	if data is None:
372	return self.http_error(url, fp, errcode, errmsg, headers)
373	else:
374	return self.http_error(url, fp, errcode, errmsg, headers, data)
375
376	def http_error(self, url, fp, errcode, errmsg, headers, data=None):
377	"""Handle http errors.
378	Derived class can override this, or provide specific handlers
379	named http_error_DDD where DDD is the 3-digit error code."""
380	# First check if there's a specific handler for this error
381	name = 'http_error_%d' % errcode
382	if hasattr(self, name):
383	method = getattr(self, name)
384	if data is None:
385	result = method(url, fp, errcode, errmsg, headers)
386	else:
387	result = method(url, fp, errcode, errmsg, headers, data)
388	if result: return result
389	return self.http_error_default(url, fp, errcode, errmsg, headers)
390
391	def http_error_default(self, url, fp, errcode, errmsg, headers):
392	"""Default error handler: close the connection and raise IOError."""
393	fp.close()
394	raise IOError, ('http error', errcode, errmsg, headers)
395
396	if _have_ssl:
397	def open_https(self, url, data=None):
398	"""Use HTTPS protocol."""
399
400	import httplib
401	user_passwd = None
402	proxy_passwd = None
403	if isinstance(url, str):
404	host, selector = splithost(url)
405	if host:
406	user_passwd, host = splituser(host)
407	host = unquote(host)
408	realhost = host
409	else:
410	host, selector = url
411	# here, we determine, whether the proxy contains authorization information
412	proxy_passwd, host = splituser(host)
413	urltype, rest = splittype(selector)
414	url = rest
415	user_passwd = None
416	if urltype.lower() != 'https':
417	realhost = None
418	else:
419	realhost, rest = splithost(rest)
420	if realhost:
421	user_passwd, realhost = splituser(realhost)
422	if user_passwd:
423	selector = "%s://%s%s" % (urltype, realhost, rest)
424	#print "proxy via https:", host, selector
425	if not host: raise IOError, ('https error', 'no host given')
426	if proxy_passwd:
427	proxy_passwd = unquote(proxy_passwd)
428	proxy_auth = base64.b64encode(proxy_passwd).strip()
429	else:
430	proxy_auth = None
431	if user_passwd:
432	user_passwd = unquote(user_passwd)
433	auth = base64.b64encode(user_passwd).strip()
434	else:
435	auth = None
436	h = httplib.HTTPS(host, 0,
437	key_file=self.key_file,
438	cert_file=self.cert_file)
439	if data is not None:
440	h.putrequest('POST', selector)
441	h.putheader('Content-Type',
442	'application/x-www-form-urlencoded')
443	h.putheader('Content-Length', '%d' % len(data))
444	else:
445	h.putrequest('GET', selector)
446	if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
447	if auth: h.putheader('Authorization', 'Basic %s' % auth)
448	if realhost: h.putheader('Host', realhost)
449	for args in self.addheaders: h.putheader(*args)
450	h.endheaders(data)
451	errcode, errmsg, headers = h.getreply()
452	fp = h.getfile()
453	if errcode == -1:
454	if fp: fp.close()
455	# something went wrong with the HTTP status line
456	raise IOError, ('http protocol error', 0,
457	'got a bad status line', None)
458	# According to RFC 2616, "2xx" code indicates that the client's
459	# request was successfully received, understood, and accepted.
460	if (200 <= errcode < 300):
461	return addinfourl(fp, headers, "https:" + url, errcode)
462	else:
463	if data is None:
464	return self.http_error(url, fp, errcode, errmsg, headers)
465	else:
466	return self.http_error(url, fp, errcode, errmsg, headers,
467	data)
468
469	def open_file(self, url):
470	"""Use local file or FTP depending on form of URL."""
471	if not isinstance(url, str):
472	raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
473	if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
474	return self.open_ftp(url)
475	else:
476	return self.open_local_file(url)
477
478	def open_local_file(self, url):
479	"""Use local file."""
480	import mimetypes, mimetools, email.utils
481	try:
482	from cStringIO import StringIO
483	except ImportError:
484	from StringIO import StringIO
485	host, file = splithost(url)
486	localname = url2pathname(file)
487	try:
488	stats = os.stat(localname)
489	except OSError, e:
490	raise IOError(e.errno, e.strerror, e.filename)
491	size = stats.st_size
492	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
493	mtype = mimetypes.guess_type(url)[0]
494	headers = mimetools.Message(StringIO(
495	'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
496	(mtype or 'text/plain', size, modified)))
497	if not host:
498	urlfile = file
499	if file[:1] == '/':
500	urlfile = 'file://' + file
501	elif file[:2] == './':
502	raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
503	return addinfourl(open(localname, 'rb'),
504	headers, urlfile)
505	host, port = splitport(host)
506	if not port \
507	and socket.gethostbyname(host) in (localhost(), thishost()):
508	urlfile = file
509	if file[:1] == '/':
510	urlfile = 'file://' + file
511	return addinfourl(open(localname, 'rb'),
512	headers, urlfile)
513	raise IOError, ('local file error', 'not on local host')
514
515	def open_ftp(self, url):
516	"""Use FTP protocol."""
517	if not isinstance(url, str):
518	raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
519	import mimetypes, mimetools
520	try:
521	from cStringIO import StringIO
522	except ImportError:
523	from StringIO import StringIO
524	host, path = splithost(url)
525	if not host: raise IOError, ('ftp error', 'no host given')
526	host, port = splitport(host)
527	user, host = splituser(host)
528	if user: user, passwd = splitpasswd(user)
529	else: passwd = None
530	host = unquote(host)
531	user = user or ''
532	passwd = passwd or ''
533	host = socket.gethostbyname(host)
534	if not port:
535	import ftplib
536	port = ftplib.FTP_PORT
537	else:
538	port = int(port)
539	path, attrs = splitattr(path)
540	path = unquote(path)
541	dirs = path.split('/')
542	dirs, file = dirs[:-1], dirs[-1]
543	if dirs and not dirs[0]: dirs = dirs[1:]
544	if dirs and not dirs[0]: dirs[0] = '/'
545	key = user, host, port, '/'.join(dirs)
546	# XXX thread unsafe!
547	if len(self.ftpcache) > MAXFTPCACHE:
548	# Prune the cache, rather arbitrarily
549	for k in self.ftpcache.keys():
550	if k != key:
551	v = self.ftpcache[k]
552	del self.ftpcache[k]
553	v.close()
554	try:
555	if not key in self.ftpcache:
556	self.ftpcache[key] = \
557	ftpwrapper(user, passwd, host, port, dirs)
558	if not file: type = 'D'
559	else: type = 'I'
560	for attr in attrs:
561	attr, value = splitvalue(attr)
562	if attr.lower() == 'type' and \
563	value in ('a', 'A', 'i', 'I', 'd', 'D'):
564	type = value.upper()
565	(fp, retrlen) = self.ftpcache[key].retrfile(file, type)
566	mtype = mimetypes.guess_type("ftp:" + url)[0]
567	headers = ""
568	if mtype:
569	headers += "Content-Type: %s\n" % mtype
570	if retrlen is not None and retrlen >= 0:
571	headers += "Content-Length: %d\n" % retrlen
572	headers = mimetools.Message(StringIO(headers))
573	return addinfourl(fp, headers, "ftp:" + url)
574	except ftperrors(), msg:
575	raise IOError, ('ftp error', msg), sys.exc_info()[2]
576
577	def open_data(self, url, data=None):
578	"""Use "data" URL."""
579	if not isinstance(url, str):
580	raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
581	# ignore POSTed data
582	#
583	# syntax of data URLs:
584	# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
585	# mediatype := [ type "/" subtype ] *( ";" parameter )
586	# data := *urlchar
587	# parameter := attribute "=" value
588	import mimetools
589	try:
590	from cStringIO import StringIO
591	except ImportError:
592	from StringIO import StringIO
593	try:
594	[type, data] = url.split(',', 1)
595	except ValueError:
596	raise IOError, ('data error', 'bad data URL')
597	if not type:
598	type = 'text/plain;charset=US-ASCII'
599	semi = type.rfind(';')
600	if semi >= 0 and '=' not in type[semi:]:
601	encoding = type[semi+1:]
602	type = type[:semi]
603	else:
604	encoding = ''
605	msg = []
606	msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
607	time.gmtime(time.time())))
608	msg.append('Content-type: %s' % type)
609	if encoding == 'base64':
610	data = base64.decodestring(data)
611	else:
612	data = unquote(data)
613	msg.append('Content-Length: %d' % len(data))
614	msg.append('')
615	msg.append(data)
616	msg = '\n'.join(msg)
617	f = StringIO(msg)
618	headers = mimetools.Message(f, 0)
619	#f.fileno = None # needed for addinfourl
620	return addinfourl(f, headers, url)
621
622
623	class FancyURLopener(URLopener):
624	"""Derived class with handlers for errors we can handle (perhaps)."""
625
626	def __init__(self, args, *kwargs):
627	URLopener.__init__(self, args, *kwargs)
628	self.auth_cache = {}
629	self.tries = 0
630	self.maxtries = 10
631
632	def http_error_default(self, url, fp, errcode, errmsg, headers):
633	"""Default error handling -- don't raise an exception."""
634	return addinfourl(fp, headers, "http:" + url, errcode)
635
636	def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
637	"""Error 302 -- relocated (temporarily)."""
638	self.tries += 1
639	if self.maxtries and self.tries >= self.maxtries:
640	if hasattr(self, "http_error_500"):
641	meth = self.http_error_500
642	else:
643	meth = self.http_error_default
644	self.tries = 0
645	return meth(url, fp, 500,
646	"Internal Server Error: Redirect Recursion", headers)
647	result = self.redirect_internal(url, fp, errcode, errmsg, headers,
648	data)
649	self.tries = 0
650	return result
651
652	def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
653	if 'location' in headers:
654	newurl = headers['location']
655	elif 'uri' in headers:
656	newurl = headers['uri']
657	else:
658	return
659	fp.close()
660	# In case the server sent a relative URL, join with original:
661	newurl = basejoin(self.type + ":" + url, newurl)
662
663	# For security reasons we do not allow redirects to protocols
664	# other than HTTP, HTTPS or FTP.
665	newurl_lower = newurl.lower()
666	if not (newurl_lower.startswith('http://') or
667	newurl_lower.startswith('https://') or
668	newurl_lower.startswith('ftp://')):
669	raise IOError('redirect error', errcode,
670	errmsg + " - Redirection to url '%s' is not allowed" %
671	newurl,
672	headers)
673
674	return self.open(newurl)
675
676	def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
677	"""Error 301 -- also relocated (permanently)."""
678	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
679
680	def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
681	"""Error 303 -- also relocated (essentially identical to 302)."""
682	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
683
684	def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
685	"""Error 307 -- relocated, but turn POST into error."""
686	if data is None:
687	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
688	else:
689	return self.http_error_default(url, fp, errcode, errmsg, headers)
690
691	def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
692	"""Error 401 -- authentication required.
693	This function supports Basic authentication only."""
694	if not 'www-authenticate' in headers:
695	URLopener.http_error_default(self, url, fp,
696	errcode, errmsg, headers)
697	stuff = headers['www-authenticate']
698	import re
699	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
700	if not match:
701	URLopener.http_error_default(self, url, fp,
702	errcode, errmsg, headers)
703	scheme, realm = match.groups()
704	if scheme.lower() != 'basic':
705	URLopener.http_error_default(self, url, fp,
706	errcode, errmsg, headers)
707	name = 'retry_' + self.type + '_basic_auth'
708	if data is None:
709	return getattr(self,name)(url, realm)
710	else:
711	return getattr(self,name)(url, realm, data)
712
713	def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
714	"""Error 407 -- proxy authentication required.
715	This function supports Basic authentication only."""
716	if not 'proxy-authenticate' in headers:
717	URLopener.http_error_default(self, url, fp,
718	errcode, errmsg, headers)
719	stuff = headers['proxy-authenticate']
720	import re
721	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
722	if not match:
723	URLopener.http_error_default(self, url, fp,
724	errcode, errmsg, headers)
725	scheme, realm = match.groups()
726	if scheme.lower() != 'basic':
727	URLopener.http_error_default(self, url, fp,
728	errcode, errmsg, headers)
729	name = 'retry_proxy_' + self.type + '_basic_auth'
730	if data is None:
731	return getattr(self,name)(url, realm)
732	else:
733	return getattr(self,name)(url, realm, data)
734
735	def retry_proxy_http_basic_auth(self, url, realm, data=None):
736	host, selector = splithost(url)
737	newurl = 'http://' + host + selector
738	proxy = self.proxies['http']
739	urltype, proxyhost = splittype(proxy)
740	proxyhost, proxyselector = splithost(proxyhost)
741	i = proxyhost.find('@') + 1
742	proxyhost = proxyhost[i:]
743	user, passwd = self.get_user_passwd(proxyhost, realm, i)
744	if not (user or passwd): return None
745	proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
746	self.proxies['http'] = 'http://' + proxyhost + proxyselector
747	if data is None:
748	return self.open(newurl)
749	else:
750	return self.open(newurl, data)
751
752	def retry_proxy_https_basic_auth(self, url, realm, data=None):
753	host, selector = splithost(url)
754	newurl = 'https://' + host + selector
755	proxy = self.proxies['https']
756	urltype, proxyhost = splittype(proxy)
757	proxyhost, proxyselector = splithost(proxyhost)
758	i = proxyhost.find('@') + 1
759	proxyhost = proxyhost[i:]
760	user, passwd = self.get_user_passwd(proxyhost, realm, i)
761	if not (user or passwd): return None
762	proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
763	self.proxies['https'] = 'https://' + proxyhost + proxyselector
764	if data is None:
765	return self.open(newurl)
766	else:
767	return self.open(newurl, data)
768
769	def retry_http_basic_auth(self, url, realm, data=None):
770	host, selector = splithost(url)
771	i = host.find('@') + 1
772	host = host[i:]
773	user, passwd = self.get_user_passwd(host, realm, i)
774	if not (user or passwd): return None
775	host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
776	newurl = 'http://' + host + selector
777	if data is None:
778	return self.open(newurl)
779	else:
780	return self.open(newurl, data)
781
782	def retry_https_basic_auth(self, url, realm, data=None):
783	host, selector = splithost(url)
784	i = host.find('@') + 1
785	host = host[i:]
786	user, passwd = self.get_user_passwd(host, realm, i)
787	if not (user or passwd): return None
788	host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
789	newurl = 'https://' + host + selector
790	if data is None:
791	return self.open(newurl)
792	else:
793	return self.open(newurl, data)
794
795	def get_user_passwd(self, host, realm, clear_cache=0):
796	key = realm + '@' + host.lower()
797	if key in self.auth_cache:
798	if clear_cache:
799	del self.auth_cache[key]
800	else:
801	return self.auth_cache[key]
802	user, passwd = self.prompt_user_passwd(host, realm)
803	if user or passwd: self.auth_cache[key] = (user, passwd)
804	return user, passwd
805
806	def prompt_user_passwd(self, host, realm):
807	"""Override this in a GUI environment!"""
808	import getpass
809	try:
810	user = raw_input("Enter username for %s at %s: " % (realm,
811	host))
812	passwd = getpass.getpass("Enter password for %s in %s at %s: " %
813	(user, realm, host))
814	return user, passwd
815	except KeyboardInterrupt:
816	print
817	return None, None
818
819
820	# Utility functions
821
822	_localhost = None
823	def localhost():
824	"""Return the IP address of the magic hostname 'localhost'."""
825	global _localhost
826	if _localhost is None:
827	_localhost = socket.gethostbyname('localhost')
828	return _localhost
829
830	_thishost = None
831	def thishost():
832	"""Return the IP address of the current host."""
833	global _thishost
834	if _thishost is None:
835	try:
836	_thishost = socket.gethostbyname(socket.gethostname())
837	except socket.gaierror:
838	_thishost = socket.gethostbyname('localhost')
839	return _thishost
840
841	_ftperrors = None
842	def ftperrors():
843	"""Return the set of errors raised by the FTP class."""
844	global _ftperrors
845	if _ftperrors is None:
846	import ftplib
847	_ftperrors = ftplib.all_errors
848	return _ftperrors
849
850	_noheaders = None
851	def noheaders():
852	"""Return an empty mimetools.Message object."""
853	global _noheaders
854	if _noheaders is None:
855	import mimetools
856	try:
857	from cStringIO import StringIO
858	except ImportError:
859	from StringIO import StringIO
860	_noheaders = mimetools.Message(StringIO(), 0)
861	_noheaders.fp.close() # Recycle file descriptor
862	return _noheaders
863
864
865	# Utility classes
866
867	class ftpwrapper:
868	"""Class used by open_ftp() for cache of open FTP connections."""
869
870	def __init__(self, user, passwd, host, port, dirs,
871	timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
872	persistent=True):
873	self.user = user
874	self.passwd = passwd
875	self.host = host
876	self.port = port
877	self.dirs = dirs
878	self.timeout = timeout
879	self.refcount = 0
880	self.keepalive = persistent
881	self.init()
882
883	def init(self):
884	import ftplib
885	self.busy = 0
886	self.ftp = ftplib.FTP()
887	self.ftp.connect(self.host, self.port, self.timeout)
888	self.ftp.login(self.user, self.passwd)
889	_target = '/'.join(self.dirs)
890	self.ftp.cwd(_target)
891
892	def retrfile(self, file, type):
893	import ftplib
894	self.endtransfer()
895	if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
896	else: cmd = 'TYPE ' + type; isdir = 0
897	try:
898	self.ftp.voidcmd(cmd)
899	except ftplib.all_errors:
900	self.init()
901	self.ftp.voidcmd(cmd)
902	conn = None
903	if file and not isdir:
904	# Try to retrieve as a file
905	try:
906	cmd = 'RETR ' + file
907	conn, retrlen = self.ftp.ntransfercmd(cmd)
908	except ftplib.error_perm, reason:
909	if str(reason)[:3] != '550':
910	raise IOError, ('ftp error', reason), sys.exc_info()[2]
911	if not conn:
912	# Set transfer mode to ASCII!
913	self.ftp.voidcmd('TYPE A')
914	# Try a directory listing. Verify that directory exists.
915	if file:
916	pwd = self.ftp.pwd()
917	try:
918	try:
919	self.ftp.cwd(file)
920	except ftplib.error_perm, reason:
921	raise IOError, ('ftp error', reason), sys.exc_info()[2]
922	finally:
923	self.ftp.cwd(pwd)
924	cmd = 'LIST ' + file
925	else:
926	cmd = 'LIST'
927	conn, retrlen = self.ftp.ntransfercmd(cmd)
928	self.busy = 1
929	ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
930	self.refcount += 1
931	conn.close()
932	# Pass back both a suitably decorated object and a retrieval length
933	return (ftpobj, retrlen)
934
935	def endtransfer(self):
936	if not self.busy:
937	return
938	self.busy = 0
939	try:
940	self.ftp.voidresp()
941	except ftperrors():
942	pass
943
944	def close(self):
945	self.keepalive = False
946	if self.refcount <= 0:
947	self.real_close()
948
949	def file_close(self):
950	self.endtransfer()
951	self.refcount -= 1
952	if self.refcount <= 0 and not self.keepalive:
953	self.real_close()
954
955	def real_close(self):
956	self.endtransfer()
957	try:
958	self.ftp.close()
959	except ftperrors():
960	pass
961
962	class addbase:
963	"""Base class for addinfo and addclosehook."""
964
965	def __init__(self, fp):
966	self.fp = fp
967	self.read = self.fp.read
968	self.readline = self.fp.readline
969	if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
970	if hasattr(self.fp, "fileno"):
971	self.fileno = self.fp.fileno
972	else:
973	self.fileno = lambda: None
974	if hasattr(self.fp, "__iter__"):
975	self.__iter__ = self.fp.__iter__
976	if hasattr(self.fp, "next"):
977	self.next = self.fp.next
978
979	def __repr__(self):
980	return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
981	id(self), self.fp)
982
983	def close(self):
984	self.read = None
985	self.readline = None
986	self.readlines = None
987	self.fileno = None
988	if self.fp: self.fp.close()
989	self.fp = None
990
991	class addclosehook(addbase):
992	"""Class to add a close hook to an open file."""
993
994	def __init__(self, fp, closehook, *hookargs):
995	addbase.__init__(self, fp)
996	self.closehook = closehook
997	self.hookargs = hookargs
998
999	def close(self):
1000	if self.closehook:
1001	self.closehook(*self.hookargs)
1002	self.closehook = None
1003	self.hookargs = None
1004	addbase.close(self)
1005
1006	class addinfo(addbase):
1007	"""class to add an info() method to an open file."""
1008
1009	def __init__(self, fp, headers):
1010	addbase.__init__(self, fp)
1011	self.headers = headers
1012
1013	def info(self):
1014	return self.headers
1015
1016	class addinfourl(addbase):
1017	"""class to add info() and geturl() methods to an open file."""
1018
1019	def __init__(self, fp, headers, url, code=None):
1020	addbase.__init__(self, fp)
1021	self.headers = headers
1022	self.url = url
1023	self.code = code
1024
1025	def info(self):
1026	return self.headers
1027
1028	def getcode(self):
1029	return self.code
1030
1031	def geturl(self):
1032	return self.url
1033
1034
1035	# Utilities to parse URLs (most of these return None for missing parts):
1036	# unwrap('<URL:type://host/path>') --> 'type://host/path'
1037	# splittype('type:opaquestring') --> 'type', 'opaquestring'
1038	# splithost('//host[:port]/path') --> 'host[:port]', '/path'
1039	# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
1040	# splitpasswd('user:passwd') -> 'user', 'passwd'
1041	# splitport('host:port') --> 'host', 'port'
1042	# splitquery('/path?query') --> '/path', 'query'
1043	# splittag('/path#tag') --> '/path', 'tag'
1044	# splitattr('/path;attr1=value1;attr2=value2;...') ->
1045	# '/path', ['attr1=value1', 'attr2=value2', ...]
1046	# splitvalue('attr=value') --> 'attr', 'value'
1047	# unquote('abc%20def') -> 'abc def'
1048	# quote('abc def') -> 'abc%20def')
1049
1050	try:
1051	unicode
1052	except NameError:
1053	def _is_unicode(x):
1054	return 0
1055	else:
1056	def _is_unicode(x):
1057	return isinstance(x, unicode)
1058
1059	def toBytes(url):
1060	"""toBytes(u"URL") --> 'URL'."""
1061	# Most URL schemes require ASCII. If that changes, the conversion
1062	# can be relaxed
1063	if _is_unicode(url):
1064	try:
1065	url = url.encode("ASCII")
1066	except UnicodeError:
1067	raise UnicodeError("URL " + repr(url) +
1068	" contains non-ASCII characters")
1069	return url
1070
1071	def unwrap(url):
1072	"""unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1073	url = url.strip()
1074	if url[:1] == '<' and url[-1:] == '>':
1075	url = url[1:-1].strip()
1076	if url[:4] == 'URL:': url = url[4:].strip()
1077	return url
1078
1079	_typeprog = None
1080	def splittype(url):
1081	"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1082	global _typeprog
1083	if _typeprog is None:
1084	import re
1085	_typeprog = re.compile('^([^/:]+):')
1086
1087	match = _typeprog.match(url)
1088	if match:
1089	scheme = match.group(1)
1090	return scheme.lower(), url[len(scheme) + 1:]
1091	return None, url
1092
1093	_hostprog = None
1094	def splithost(url):
1095	"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1096	global _hostprog
1097	if _hostprog is None:
1098	import re
1099	_hostprog = re.compile('^//([^/?])(.)$')
1100
1101	match = _hostprog.match(url)
1102	if match:
1103	host_port = match.group(1)
1104	path = match.group(2)
1105	if path and not path.startswith('/'):
1106	path = '/' + path
1107	return host_port, path
1108	return None, url
1109
1110	_userprog = None
1111	def splituser(host):
1112	"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1113	global _userprog
1114	if _userprog is None:
1115	import re
1116	_userprog = re.compile('^(.)@(.)$')
1117
1118	match = _userprog.match(host)
1119	if match: return match.group(1, 2)
1120	return None, host
1121
1122	_passwdprog = None
1123	def splitpasswd(user):
1124	"""splitpasswd('user:passwd') -> 'user', 'passwd'."""
1125	global _passwdprog
1126	if _passwdprog is None:
1127	import re
1128	_passwdprog = re.compile('^([^:]):(.)$',re.S)
1129
1130	match = _passwdprog.match(user)
1131	if match: return match.group(1, 2)
1132	return user, None
1133
1134	# splittag('/path#tag') --> '/path', 'tag'
1135	_portprog = None
1136	def splitport(host):
1137	"""splitport('host:port') --> 'host', 'port'."""
1138	global _portprog
1139	if _portprog is None:
1140	import re
1141	_portprog = re.compile('^(.*):([0-9]+)$')
1142
1143	match = _portprog.match(host)
1144	if match: return match.group(1, 2)
1145	return host, None
1146
1147	_nportprog = None
1148	def splitnport(host, defport=-1):
1149	"""Split host and port, returning numeric port.
1150	Return given default port if no ':' found; defaults to -1.
1151	Return numerical port if a valid number are found after ':'.
1152	Return None if ':' but not a valid number."""
1153	global _nportprog
1154	if _nportprog is None:
1155	import re
1156	_nportprog = re.compile('^(.):(.)$')
1157
1158	match = _nportprog.match(host)
1159	if match:
1160	host, port = match.group(1, 2)
1161	try:
1162	if not port: raise ValueError, "no digits"
1163	nport = int(port)
1164	except ValueError:
1165	nport = None
1166	return host, nport
1167	return host, defport
1168
1169	_queryprog = None
1170	def splitquery(url):
1171	"""splitquery('/path?query') --> '/path', 'query'."""
1172	global _queryprog
1173	if _queryprog is None:
1174	import re
1175	_queryprog = re.compile('^(.)\?([^?])$')
1176
1177	match = _queryprog.match(url)
1178	if match: return match.group(1, 2)
1179	return url, None
1180
1181	_tagprog = None
1182	def splittag(url):
1183	"""splittag('/path#tag') --> '/path', 'tag'."""
1184	global _tagprog
1185	if _tagprog is None:
1186	import re
1187	_tagprog = re.compile('^(.)#([^#])$')
1188
1189	match = _tagprog.match(url)
1190	if match: return match.group(1, 2)
1191	return url, None
1192
1193	def splitattr(url):
1194	"""splitattr('/path;attr1=value1;attr2=value2;...') ->
1195	'/path', ['attr1=value1', 'attr2=value2', ...]."""
1196	words = url.split(';')
1197	return words[0], words[1:]
1198
1199	_valueprog = None
1200	def splitvalue(attr):
1201	"""splitvalue('attr=value') --> 'attr', 'value'."""
1202	global _valueprog
1203	if _valueprog is None:
1204	import re
1205	_valueprog = re.compile('^([^=])=(.)$')
1206
1207	match = _valueprog.match(attr)
1208	if match: return match.group(1, 2)
1209	return attr, None
1210
1211	# urlparse contains a duplicate of this method to avoid a circular import. If
1212	# you update this method, also update the copy in urlparse. This code
1213	# duplication does not exist in Python3.
1214
1215	_hexdig = '0123456789ABCDEFabcdef'
1216	_hextochr = dict((a + b, chr(int(a + b, 16)))
1217	for a in _hexdig for b in _hexdig)
1218	_asciire = re.compile('([\x00-\x7f]+)')
1219
1220	def unquote(s):
1221	"""unquote('abc%20def') -> 'abc def'."""
1222	if _is_unicode(s):
1223	if '%' not in s:
1224	return s
1225	bits = _asciire.split(s)
1226	res = [bits[0]]
1227	append = res.append
1228	for i in range(1, len(bits), 2):
1229	append(unquote(str(bits[i])).decode('latin1'))
1230	append(bits[i + 1])
1231	return ''.join(res)
1232
1233	bits = s.split('%')
1234	# fastpath
1235	if len(bits) == 1:
1236	return s
1237	res = [bits[0]]
1238	append = res.append
1239	for item in bits[1:]:
1240	try:
1241	append(_hextochr[item[:2]])
1242	append(item[2:])
1243	except KeyError:
1244	append('%')
1245	append(item)
1246	return ''.join(res)
1247
1248	def unquote_plus(s):
1249	"""unquote('%7e/abc+def') -> '~/abc def'"""
1250	s = s.replace('+', ' ')
1251	return unquote(s)
1252
1253	always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1254	'abcdefghijklmnopqrstuvwxyz'
1255	'0123456789' '_.-')
1256	_safe_map = {}
1257	for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
1258	_safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
1259	_safe_quoters = {}
1260
1261	def quote(s, safe='/'):
1262	"""quote('abc def') -> 'abc%20def'
1263
1264	Each part of a URL, e.g. the path info, the query, etc., has a
1265	different set of reserved characters that must be quoted.
1266
1267	RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1268	the following reserved characters.
1269
1270	reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \|
1271	"$" \| ","
1272
1273	Each of these characters is reserved in some component of a URL,
1274	but not necessarily in all of them.
1275
1276	By default, the quote function is intended for quoting the path
1277	section of a URL. Thus, it will not encode '/'. This character
1278	is reserved, but in typical usage the quote function is being
1279	called on a path where the existing slash characters are used as
1280	reserved characters.
1281	"""
1282	# fastpath
1283	if not s:
1284	if s is None:
1285	raise TypeError('None object cannot be quoted')
1286	return s
1287	cachekey = (safe, always_safe)
1288	try:
1289	(quoter, safe) = _safe_quoters[cachekey]
1290	except KeyError:
1291	safe_map = _safe_map.copy()
1292	safe_map.update([(c, c) for c in safe])
1293	quoter = safe_map.__getitem__
1294	safe = always_safe + safe
1295	_safe_quoters[cachekey] = (quoter, safe)
1296	if not s.rstrip(safe):
1297	return s
1298	return ''.join(map(quoter, s))
1299
1300	def quote_plus(s, safe=''):
1301	"""Quote the query fragment of a URL; replacing ' ' with '+'"""
1302	if ' ' in s:
1303	s = quote(s, safe + ' ')
1304	return s.replace(' ', '+')
1305	return quote(s, safe)
1306
1307	def urlencode(query, doseq=0):
1308	"""Encode a sequence of two-element tuples or dictionary into a URL query string.
1309
1310	If any values in the query arg are sequences and doseq is true, each
1311	sequence element is converted to a separate parameter.
1312
1313	If the query arg is a sequence of two-element tuples, the order of the
1314	parameters in the output will match the order of parameters in the
1315	input.
1316	"""
1317
1318	if hasattr(query,"items"):
1319	# mapping objects
1320	query = query.items()
1321	else:
1322	# it's a bother at times that strings and string-like objects are
1323	# sequences...
1324	try:
1325	# non-sequence items should not work with len()
1326	# non-empty strings will fail this
1327	if len(query) and not isinstance(query[0], tuple):
1328	raise TypeError
1329	# zero-length sequences of all types will get here and succeed,
1330	# but that's a minor nit - since the original implementation
1331	# allowed empty dicts that type of behavior probably should be
1332	# preserved for consistency
1333	except TypeError:
1334	ty,va,tb = sys.exc_info()
1335	raise TypeError, "not a valid non-string sequence or mapping object", tb
1336
1337	l = []
1338	if not doseq:
1339	# preserve old behavior
1340	for k, v in query:
1341	k = quote_plus(str(k))
1342	v = quote_plus(str(v))
1343	l.append(k + '=' + v)
1344	else:
1345	for k, v in query:
1346	k = quote_plus(str(k))
1347	if isinstance(v, str):
1348	v = quote_plus(v)
1349	l.append(k + '=' + v)
1350	elif _is_unicode(v):
1351	# is there a reasonable way to convert to ASCII?
1352	# encode generates a string, but "replace" or "ignore"
1353	# lose information and "strict" can raise UnicodeError
1354	v = quote_plus(v.encode("ASCII","replace"))
1355	l.append(k + '=' + v)
1356	else:
1357	try:
1358	# is this a sufficient test for sequence-ness?
1359	len(v)
1360	except TypeError:
1361	# not a sequence
1362	v = quote_plus(str(v))
1363	l.append(k + '=' + v)
1364	else:
1365	# loop over the sequence
1366	for elt in v:
1367	l.append(k + '=' + quote_plus(str(elt)))
1368	return '&'.join(l)
1369
1370	# Proxy handling
1371	def getproxies_environment():
1372	"""Return a dictionary of scheme -> proxy server URL mappings.
1373
1374	Scan the environment for variables named <scheme>_proxy;
1375	this seems to be the standard convention. If you need a
1376	different way, you can pass a proxies dictionary to the
1377	[Fancy]URLopener constructor.
1378
1379	"""
1380	proxies = {}
1381	for name, value in os.environ.items():
1382	name = name.lower()
1383	if value and name[-6:] == '_proxy':
1384	proxies[name[:-6]] = value
1385	return proxies
1386
1387	def proxy_bypass_environment(host):
1388	"""Test if proxies should not be used for a particular host.
1389
1390	Checks the environment for a variable named no_proxy, which should
1391	be a list of DNS suffixes separated by commas, or '*' for all hosts.
1392	"""
1393	no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1394	# '*' is special case for always bypass
1395	if no_proxy == '*':
1396	return 1
1397	# strip port off host
1398	hostonly, port = splitport(host)
1399	# check if the host ends with any of the DNS suffixes
1400	no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
1401	for name in no_proxy_list:
1402	if name and (hostonly.endswith(name) or host.endswith(name)):
1403	return 1
1404	# otherwise, don't bypass
1405	return 0
1406
1407
1408	if sys.platform == 'darwin':
1409	from _scproxy import _get_proxy_settings, _get_proxies
1410
1411	def proxy_bypass_macosx_sysconf(host):
1412	"""
1413	Return True iff this host shouldn't be accessed using a proxy
1414
1415	This function uses the MacOSX framework SystemConfiguration
1416	to fetch the proxy information.
1417	"""
1418	import re
1419	import socket
1420	from fnmatch import fnmatch
1421
1422	hostonly, port = splitport(host)
1423
1424	def ip2num(ipAddr):
1425	parts = ipAddr.split('.')
1426	parts = map(int, parts)
1427	if len(parts) != 4:
1428	parts = (parts + [0, 0, 0, 0])[:4]
1429	return (parts[0] << 24) \| (parts[1] << 16) \| (parts[2] << 8) \| parts[3]
1430
1431	proxy_settings = _get_proxy_settings()
1432
1433	# Check for simple host names:
1434	if '.' not in host:
1435	if proxy_settings['exclude_simple']:
1436	return True
1437
1438	hostIP = None
1439
1440	for value in proxy_settings.get('exceptions', ()):
1441	# Items in the list are strings like these: *.local, 169.254/16
1442	if not value: continue
1443
1444	m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1445	if m is not None:
1446	if hostIP is None:
1447	try:
1448	hostIP = socket.gethostbyname(hostonly)
1449	hostIP = ip2num(hostIP)
1450	except socket.error:
1451	continue
1452
1453	base = ip2num(m.group(1))
1454	mask = m.group(2)
1455	if mask is None:
1456	mask = 8 * (m.group(1).count('.') + 1)
1457
1458	else:
1459	mask = int(mask[1:])
1460	mask = 32 - mask
1461
1462	if (hostIP >> mask) == (base >> mask):
1463	return True
1464
1465	elif fnmatch(host, value):
1466	return True
1467
1468	return False
1469
1470	def getproxies_macosx_sysconf():
1471	"""Return a dictionary of scheme -> proxy server URL mappings.
1472
1473	This function uses the MacOSX framework SystemConfiguration
1474	to fetch the proxy information.
1475	"""
1476	return _get_proxies()
1477
1478	def proxy_bypass(host):
1479	if getproxies_environment():
1480	return proxy_bypass_environment(host)
1481	else:
1482	return proxy_bypass_macosx_sysconf(host)
1483
1484	def getproxies():
1485	return getproxies_environment() or getproxies_macosx_sysconf()
1486
1487	elif os.name == 'nt':
1488	def getproxies_registry():
1489	"""Return a dictionary of scheme -> proxy server URL mappings.
1490
1491	Win32 uses the registry to store proxies.
1492
1493	"""
1494	proxies = {}
1495	try:
1496	import _winreg
1497	except ImportError:
1498	# Std module, so should be around - but you never know!
1499	return proxies
1500	try:
1501	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1502	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1503	proxyEnable = _winreg.QueryValueEx(internetSettings,
1504	'ProxyEnable')[0]
1505	if proxyEnable:
1506	# Returned as Unicode but problems if not converted to ASCII
1507	proxyServer = str(_winreg.QueryValueEx(internetSettings,
1508	'ProxyServer')[0])
1509	if '=' in proxyServer:
1510	# Per-protocol settings
1511	for p in proxyServer.split(';'):
1512	protocol, address = p.split('=', 1)
1513	# See if address has a type:// prefix
1514	import re
1515	if not re.match('^([^/:]+)://', address):
1516	address = '%s://%s' % (protocol, address)
1517	proxies[protocol] = address
1518	else:
1519	# Use one setting for all protocols
1520	if proxyServer[:5] == 'http:':
1521	proxies['http'] = proxyServer
1522	else:
1523	proxies['http'] = 'http://%s' % proxyServer
1524	proxies['https'] = 'https://%s' % proxyServer
1525	proxies['ftp'] = 'ftp://%s' % proxyServer
1526	internetSettings.Close()
1527	except (WindowsError, ValueError, TypeError):
1528	# Either registry key not found etc, or the value in an
1529	# unexpected format.
1530	# proxies already set up to be empty so nothing to do
1531	pass
1532	return proxies
1533
1534	def getproxies():
1535	"""Return a dictionary of scheme -> proxy server URL mappings.
1536
1537	Returns settings gathered from the environment, if specified,
1538	or the registry.
1539
1540	"""
1541	return getproxies_environment() or getproxies_registry()
1542
1543	def proxy_bypass_registry(host):
1544	try:
1545	import _winreg
1546	import re
1547	except ImportError:
1548	# Std modules, so should be around - but you never know!
1549	return 0
1550	try:
1551	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1552	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1553	proxyEnable = _winreg.QueryValueEx(internetSettings,
1554	'ProxyEnable')[0]
1555	proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1556	'ProxyOverride')[0])
1557	# ^^^^ Returned as Unicode but problems if not converted to ASCII
1558	except WindowsError:
1559	return 0
1560	if not proxyEnable or not proxyOverride:
1561	return 0
1562	# try to make a host list from name and IP address.
1563	rawHost, port = splitport(host)
1564	host = [rawHost]
1565	try:
1566	addr = socket.gethostbyname(rawHost)
1567	if addr != rawHost:
1568	host.append(addr)
1569	except socket.error:
1570	pass
1571	try:
1572	fqdn = socket.getfqdn(rawHost)
1573	if fqdn != rawHost:
1574	host.append(fqdn)
1575	except socket.error:
1576	pass
1577	# make a check value list from the registry entry: replace the
1578	# '<local>' string by the localhost entry and the corresponding
1579	# canonical entry.
1580	proxyOverride = proxyOverride.split(';')
1581	# now check if we match one of the registry values.
1582	for test in proxyOverride:
1583	if test == '<local>':
1584	if '.' not in rawHost:
1585	return 1
1586	test = test.replace(".", r"\.") # mask dots
1587	test = test.replace("", r".") # change glob sequence
1588	test = test.replace("?", r".") # change glob char
1589	for val in host:
1590	# print "%s <--> %s" %( test, val )
1591	if re.match(test, val, re.I):
1592	return 1
1593	return 0
1594
1595	def proxy_bypass(host):
1596	"""Return a dictionary of scheme -> proxy server URL mappings.
1597
1598	Returns settings gathered from the environment, if specified,
1599	or the registry.
1600
1601	"""
1602	if getproxies_environment():
1603	return proxy_bypass_environment(host)
1604	else:
1605	return proxy_bypass_registry(host)
1606
1607	else:
1608	# By default use environment variables
1609	getproxies = getproxies_environment
1610	proxy_bypass = proxy_bypass_environment
1611
1612	# Test and time quote() and unquote()
1613	def test1():
1614	s = ''
1615	for i in range(256): s = s + chr(i)
1616	s = s*4
1617	t0 = time.time()
1618	qs = quote(s)
1619	uqs = unquote(qs)
1620	t1 = time.time()
1621	if uqs != s:
1622	print 'Wrong!'
1623	print repr(s)
1624	print repr(qs)
1625	print repr(uqs)
1626	print round(t1 - t0, 3), 'sec'
1627
1628
1629	def reporthook(blocknum, blocksize, totalsize):
1630	# Report during remote transfers
1631	print "Block number: %d, Block size: %d, Total size: %d" % (
1632	blocknum, blocksize, totalsize)

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/urllib.py@ 1257

Download in other formats: