Context Navigation

source: python/trunk/Lib/urllib.py@ 608

Last change on this file since 608 was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 56.8 KB

Line
1	"""Open an arbitrary URL.
2
3	See the following document for more info on URLs:
4	"Names and Addresses, URIs, URLs, URNs, URCs", at
5	http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7	See also the HTTP spec (from which the error codes are derived):
8	"HTTP - Hypertext Transfer Protocol", at
9	http://www.w3.org/pub/WWW/Protocols/
10
11	Related standards and specs:
12	- RFC1808: the "relative URL" spec. (authoritative status)
13	- RFC1738 - the "URL standard". (authoritative status)
14	- RFC1630 - the "URI spec". (informational status)
15
16	The object returned by URLopener().open(file) will differ per
17	protocol. All you know is that is has methods read(), readline(),
18	readlines(), fileno(), close() and info(). The read*(), fileno()
19	and close() methods work like those of open files.
20	The info() method returns a mimetools.Message object which can be
21	used to query various info about the object, if available.
22	(mimetools.Message objects are queried with the getheader() method.)
23	"""
24
25	import string
26	import socket
27	import os
28	import time
29	import sys
30	import base64
31	import re
32
33	from urlparse import urljoin as basejoin
34
35	__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
36	"urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
37	"urlencode", "url2pathname", "pathname2url", "splittag",
38	"localhost", "thishost", "ftperrors", "basejoin", "unwrap",
39	"splittype", "splithost", "splituser", "splitpasswd", "splitport",
40	"splitnport", "splitquery", "splitattr", "splitvalue",
41	"getproxies"]
42
43	__version__ = '1.17' # XXX This version is not always updated :-(
44
45	MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
46
47	# Helper for non-unix systems
48	if os.name == 'nt':
49	from nturl2path import url2pathname, pathname2url
50	elif os.name == 'riscos':
51	from rourl2path import url2pathname, pathname2url
52	else:
53	def url2pathname(pathname):
54	"""OS-specific conversion from a relative URL of the 'file' scheme
55	to a file system path; not recommended for general use."""
56	return unquote(pathname)
57
58	def pathname2url(pathname):
59	"""OS-specific conversion from a file system path to a relative URL
60	of the 'file' scheme; not recommended for general use."""
61	return quote(pathname)
62
63	# This really consists of two pieces:
64	# (1) a class which handles opening of all sorts of URLs
65	# (plus assorted utilities etc.)
66	# (2) a set of functions for parsing URLs
67	# XXX Should these be separated out into different modules?
68
69
70	# Shortcut for basic usage
71	_urlopener = None
72	def urlopen(url, data=None, proxies=None):
73	"""Create a file-like object for the specified URL to read from."""
74	from warnings import warnpy3k
75	warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
76	"favor of urllib2.urlopen()", stacklevel=2)
77
78	global _urlopener
79	if proxies is not None:
80	opener = FancyURLopener(proxies=proxies)
81	elif not _urlopener:
82	opener = FancyURLopener()
83	_urlopener = opener
84	else:
85	opener = _urlopener
86	if data is None:
87	return opener.open(url)
88	else:
89	return opener.open(url, data)
90	def urlretrieve(url, filename=None, reporthook=None, data=None):
91	global _urlopener
92	if not _urlopener:
93	_urlopener = FancyURLopener()
94	return _urlopener.retrieve(url, filename, reporthook, data)
95	def urlcleanup():
96	if _urlopener:
97	_urlopener.cleanup()
98	_safe_quoters.clear()
99	ftpcache.clear()
100
101	# check for SSL
102	try:
103	import ssl
104	except:
105	_have_ssl = False
106	else:
107	_have_ssl = True
108
109	# exception raised when downloaded size does not match content-length
110	class ContentTooShortError(IOError):
111	def __init__(self, message, content):
112	IOError.__init__(self, message)
113	self.content = content
114
115	ftpcache = {}
116	class URLopener:
117	"""Class to open URLs.
118	This is a class rather than just a subroutine because we may need
119	more than one set of global protocol-specific options.
120	Note -- this is a base class for those who don't want the
121	automatic handling of errors type 302 (relocated) and 401
122	(authorization needed)."""
123
124	__tempfiles = None
125
126	version = "Python-urllib/%s" % __version__
127
128	# Constructor
129	def __init__(self, proxies=None, **x509):
130	if proxies is None:
131	proxies = getproxies()
132	assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
133	self.proxies = proxies
134	self.key_file = x509.get('key_file')
135	self.cert_file = x509.get('cert_file')
136	self.addheaders = [('User-Agent', self.version)]
137	self.__tempfiles = []
138	self.__unlink = os.unlink # See cleanup()
139	self.tempcache = None
140	# Undocumented feature: if you assign {} to tempcache,
141	# it is used to cache files retrieved with
142	# self.retrieve(). This is not enabled by default
143	# since it does not work for changing documents (and I
144	# haven't got the logic to check expiration headers
145	# yet).
146	self.ftpcache = ftpcache
147	# Undocumented feature: you can use a different
148	# ftp cache by assigning to the .ftpcache member;
149	# in case you want logically independent URL openers
150	# XXX This is not threadsafe. Bah.
151
152	def __del__(self):
153	self.close()
154
155	def close(self):
156	self.cleanup()
157
158	def cleanup(self):
159	# This code sometimes runs when the rest of this module
160	# has already been deleted, so it can't use any globals
161	# or import anything.
162	if self.__tempfiles:
163	for file in self.__tempfiles:
164	try:
165	self.__unlink(file)
166	except OSError:
167	pass
168	del self.__tempfiles[:]
169	if self.tempcache:
170	self.tempcache.clear()
171
172	def addheader(self, *args):
173	"""Add a header to be used by the HTTP interface only
174	e.g. u.addheader('Accept', 'sound/basic')"""
175	self.addheaders.append(args)
176
177	# External interface
178	def open(self, fullurl, data=None):
179	"""Use URLopener().open(file) instead of open(file, 'r')."""
180	fullurl = unwrap(toBytes(fullurl))
181	# percent encode url, fixing lame server errors for e.g, like space
182	# within url paths.
183	fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]\|")
184	if self.tempcache and fullurl in self.tempcache:
185	filename, headers = self.tempcache[fullurl]
186	fp = open(filename, 'rb')
187	return addinfourl(fp, headers, fullurl)
188	urltype, url = splittype(fullurl)
189	if not urltype:
190	urltype = 'file'
191	if urltype in self.proxies:
192	proxy = self.proxies[urltype]
193	urltype, proxyhost = splittype(proxy)
194	host, selector = splithost(proxyhost)
195	url = (host, fullurl) # Signal special case to open_*()
196	else:
197	proxy = None
198	name = 'open_' + urltype
199	self.type = urltype
200	name = name.replace('-', '_')
201	if not hasattr(self, name):
202	if proxy:
203	return self.open_unknown_proxy(proxy, fullurl, data)
204	else:
205	return self.open_unknown(fullurl, data)
206	try:
207	if data is None:
208	return getattr(self, name)(url)
209	else:
210	return getattr(self, name)(url, data)
211	except socket.error, msg:
212	raise IOError, ('socket error', msg), sys.exc_info()[2]
213
214	def open_unknown(self, fullurl, data=None):
215	"""Overridable interface to open unknown URL type."""
216	type, url = splittype(fullurl)
217	raise IOError, ('url error', 'unknown url type', type)
218
219	def open_unknown_proxy(self, proxy, fullurl, data=None):
220	"""Overridable interface to open unknown URL type."""
221	type, url = splittype(fullurl)
222	raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
223
224	# External interface
225	def retrieve(self, url, filename=None, reporthook=None, data=None):
226	"""retrieve(url) returns (filename, headers) for a local object
227	or (tempfilename, headers) for a remote object."""
228	url = unwrap(toBytes(url))
229	if self.tempcache and url in self.tempcache:
230	return self.tempcache[url]
231	type, url1 = splittype(url)
232	if filename is None and (not type or type == 'file'):
233	try:
234	fp = self.open_local_file(url1)
235	hdrs = fp.info()
236	fp.close()
237	return url2pathname(splithost(url1)[1]), hdrs
238	except IOError:
239	pass
240	fp = self.open(url, data)
241	try:
242	headers = fp.info()
243	if filename:
244	tfp = open(filename, 'wb')
245	else:
246	import tempfile
247	garbage, path = splittype(url)
248	garbage, path = splithost(path or "")
249	path, garbage = splitquery(path or "")
250	path, garbage = splitattr(path or "")
251	suffix = os.path.splitext(path)[1]
252	(fd, filename) = tempfile.mkstemp(suffix)
253	self.__tempfiles.append(filename)
254	tfp = os.fdopen(fd, 'wb')
255	try:
256	result = filename, headers
257	if self.tempcache is not None:
258	self.tempcache[url] = result
259	bs = 1024*8
260	size = -1
261	read = 0
262	blocknum = 0
263	if "content-length" in headers:
264	size = int(headers["Content-Length"])
265	if reporthook:
266	reporthook(blocknum, bs, size)
267	while 1:
268	block = fp.read(bs)
269	if block == "":
270	break
271	read += len(block)
272	tfp.write(block)
273	blocknum += 1
274	if reporthook:
275	reporthook(blocknum, bs, size)
276	finally:
277	tfp.close()
278	finally:
279	fp.close()
280
281	# raise exception if actual size does not match content-length header
282	if size >= 0 and read < size:
283	raise ContentTooShortError("retrieval incomplete: got only %i out "
284	"of %i bytes" % (read, size), result)
285
286	return result
287
288	# Each method named open_<type> knows how to open that type of URL
289
290	def open_http(self, url, data=None):
291	"""Use HTTP protocol."""
292	import httplib
293	user_passwd = None
294	proxy_passwd= None
295	if isinstance(url, str):
296	host, selector = splithost(url)
297	if host:
298	user_passwd, host = splituser(host)
299	host = unquote(host)
300	realhost = host
301	else:
302	host, selector = url
303	# check whether the proxy contains authorization information
304	proxy_passwd, host = splituser(host)
305	# now we proceed with the url we want to obtain
306	urltype, rest = splittype(selector)
307	url = rest
308	user_passwd = None
309	if urltype.lower() != 'http':
310	realhost = None
311	else:
312	realhost, rest = splithost(rest)
313	if realhost:
314	user_passwd, realhost = splituser(realhost)
315	if user_passwd:
316	selector = "%s://%s%s" % (urltype, realhost, rest)
317	if proxy_bypass(realhost):
318	host = realhost
319
320	#print "proxy via http:", host, selector
321	if not host: raise IOError, ('http error', 'no host given')
322
323	if proxy_passwd:
324	proxy_passwd = unquote(proxy_passwd)
325	proxy_auth = base64.b64encode(proxy_passwd).strip()
326	else:
327	proxy_auth = None
328
329	if user_passwd:
330	user_passwd = unquote(user_passwd)
331	auth = base64.b64encode(user_passwd).strip()
332	else:
333	auth = None
334	h = httplib.HTTP(host)
335	if data is not None:
336	h.putrequest('POST', selector)
337	h.putheader('Content-Type', 'application/x-www-form-urlencoded')
338	h.putheader('Content-Length', '%d' % len(data))
339	else:
340	h.putrequest('GET', selector)
341	if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
342	if auth: h.putheader('Authorization', 'Basic %s' % auth)
343	if realhost: h.putheader('Host', realhost)
344	for args in self.addheaders: h.putheader(*args)
345	h.endheaders(data)
346	errcode, errmsg, headers = h.getreply()
347	fp = h.getfile()
348	if errcode == -1:
349	if fp: fp.close()
350	# something went wrong with the HTTP status line
351	raise IOError, ('http protocol error', 0,
352	'got a bad status line', None)
353	# According to RFC 2616, "2xx" code indicates that the client's
354	# request was successfully received, understood, and accepted.
355	if (200 <= errcode < 300):
356	return addinfourl(fp, headers, "http:" + url, errcode)
357	else:
358	if data is None:
359	return self.http_error(url, fp, errcode, errmsg, headers)
360	else:
361	return self.http_error(url, fp, errcode, errmsg, headers, data)
362
363	def http_error(self, url, fp, errcode, errmsg, headers, data=None):
364	"""Handle http errors.
365	Derived class can override this, or provide specific handlers
366	named http_error_DDD where DDD is the 3-digit error code."""
367	# First check if there's a specific handler for this error
368	name = 'http_error_%d' % errcode
369	if hasattr(self, name):
370	method = getattr(self, name)
371	if data is None:
372	result = method(url, fp, errcode, errmsg, headers)
373	else:
374	result = method(url, fp, errcode, errmsg, headers, data)
375	if result: return result
376	return self.http_error_default(url, fp, errcode, errmsg, headers)
377
378	def http_error_default(self, url, fp, errcode, errmsg, headers):
379	"""Default error handler: close the connection and raise IOError."""
380	fp.close()
381	raise IOError, ('http error', errcode, errmsg, headers)
382
383	if _have_ssl:
384	def open_https(self, url, data=None):
385	"""Use HTTPS protocol."""
386
387	import httplib
388	user_passwd = None
389	proxy_passwd = None
390	if isinstance(url, str):
391	host, selector = splithost(url)
392	if host:
393	user_passwd, host = splituser(host)
394	host = unquote(host)
395	realhost = host
396	else:
397	host, selector = url
398	# here, we determine, whether the proxy contains authorization information
399	proxy_passwd, host = splituser(host)
400	urltype, rest = splittype(selector)
401	url = rest
402	user_passwd = None
403	if urltype.lower() != 'https':
404	realhost = None
405	else:
406	realhost, rest = splithost(rest)
407	if realhost:
408	user_passwd, realhost = splituser(realhost)
409	if user_passwd:
410	selector = "%s://%s%s" % (urltype, realhost, rest)
411	#print "proxy via https:", host, selector
412	if not host: raise IOError, ('https error', 'no host given')
413	if proxy_passwd:
414	proxy_passwd = unquote(proxy_passwd)
415	proxy_auth = base64.b64encode(proxy_passwd).strip()
416	else:
417	proxy_auth = None
418	if user_passwd:
419	user_passwd = unquote(user_passwd)
420	auth = base64.b64encode(user_passwd).strip()
421	else:
422	auth = None
423	h = httplib.HTTPS(host, 0,
424	key_file=self.key_file,
425	cert_file=self.cert_file)
426	if data is not None:
427	h.putrequest('POST', selector)
428	h.putheader('Content-Type',
429	'application/x-www-form-urlencoded')
430	h.putheader('Content-Length', '%d' % len(data))
431	else:
432	h.putrequest('GET', selector)
433	if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
434	if auth: h.putheader('Authorization', 'Basic %s' % auth)
435	if realhost: h.putheader('Host', realhost)
436	for args in self.addheaders: h.putheader(*args)
437	h.endheaders(data)
438	errcode, errmsg, headers = h.getreply()
439	fp = h.getfile()
440	if errcode == -1:
441	if fp: fp.close()
442	# something went wrong with the HTTP status line
443	raise IOError, ('http protocol error', 0,
444	'got a bad status line', None)
445	# According to RFC 2616, "2xx" code indicates that the client's
446	# request was successfully received, understood, and accepted.
447	if (200 <= errcode < 300):
448	return addinfourl(fp, headers, "https:" + url, errcode)
449	else:
450	if data is None:
451	return self.http_error(url, fp, errcode, errmsg, headers)
452	else:
453	return self.http_error(url, fp, errcode, errmsg, headers,
454	data)
455
456	def open_file(self, url):
457	"""Use local file or FTP depending on form of URL."""
458	if not isinstance(url, str):
459	raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
460	if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
461	return self.open_ftp(url)
462	else:
463	return self.open_local_file(url)
464
465	def open_local_file(self, url):
466	"""Use local file."""
467	import mimetypes, mimetools, email.utils
468	try:
469	from cStringIO import StringIO
470	except ImportError:
471	from StringIO import StringIO
472	host, file = splithost(url)
473	localname = url2pathname(file)
474	try:
475	stats = os.stat(localname)
476	except OSError, e:
477	raise IOError(e.errno, e.strerror, e.filename)
478	size = stats.st_size
479	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
480	mtype = mimetypes.guess_type(url)[0]
481	headers = mimetools.Message(StringIO(
482	'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
483	(mtype or 'text/plain', size, modified)))
484	if not host:
485	urlfile = file
486	if file[:1] == '/':
487	urlfile = 'file://' + file
488	elif file[:2] == './':
489	raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
490	return addinfourl(open(localname, 'rb'),
491	headers, urlfile)
492	host, port = splitport(host)
493	if not port \
494	and socket.gethostbyname(host) in (localhost(), thishost()):
495	urlfile = file
496	if file[:1] == '/':
497	urlfile = 'file://' + file
498	return addinfourl(open(localname, 'rb'),
499	headers, urlfile)
500	raise IOError, ('local file error', 'not on local host')
501
502	def open_ftp(self, url):
503	"""Use FTP protocol."""
504	if not isinstance(url, str):
505	raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
506	import mimetypes, mimetools
507	try:
508	from cStringIO import StringIO
509	except ImportError:
510	from StringIO import StringIO
511	host, path = splithost(url)
512	if not host: raise IOError, ('ftp error', 'no host given')
513	host, port = splitport(host)
514	user, host = splituser(host)
515	if user: user, passwd = splitpasswd(user)
516	else: passwd = None
517	host = unquote(host)
518	user = user or ''
519	passwd = passwd or ''
520	host = socket.gethostbyname(host)
521	if not port:
522	import ftplib
523	port = ftplib.FTP_PORT
524	else:
525	port = int(port)
526	path, attrs = splitattr(path)
527	path = unquote(path)
528	dirs = path.split('/')
529	dirs, file = dirs[:-1], dirs[-1]
530	if dirs and not dirs[0]: dirs = dirs[1:]
531	if dirs and not dirs[0]: dirs[0] = '/'
532	key = user, host, port, '/'.join(dirs)
533	# XXX thread unsafe!
534	if len(self.ftpcache) > MAXFTPCACHE:
535	# Prune the cache, rather arbitrarily
536	for k in self.ftpcache.keys():
537	if k != key:
538	v = self.ftpcache[k]
539	del self.ftpcache[k]
540	v.close()
541	try:
542	if not key in self.ftpcache:
543	self.ftpcache[key] = \
544	ftpwrapper(user, passwd, host, port, dirs)
545	if not file: type = 'D'
546	else: type = 'I'
547	for attr in attrs:
548	attr, value = splitvalue(attr)
549	if attr.lower() == 'type' and \
550	value in ('a', 'A', 'i', 'I', 'd', 'D'):
551	type = value.upper()
552	(fp, retrlen) = self.ftpcache[key].retrfile(file, type)
553	mtype = mimetypes.guess_type("ftp:" + url)[0]
554	headers = ""
555	if mtype:
556	headers += "Content-Type: %s\n" % mtype
557	if retrlen is not None and retrlen >= 0:
558	headers += "Content-Length: %d\n" % retrlen
559	headers = mimetools.Message(StringIO(headers))
560	return addinfourl(fp, headers, "ftp:" + url)
561	except ftperrors(), msg:
562	raise IOError, ('ftp error', msg), sys.exc_info()[2]
563
564	def open_data(self, url, data=None):
565	"""Use "data" URL."""
566	if not isinstance(url, str):
567	raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
568	# ignore POSTed data
569	#
570	# syntax of data URLs:
571	# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
572	# mediatype := [ type "/" subtype ] *( ";" parameter )
573	# data := *urlchar
574	# parameter := attribute "=" value
575	import mimetools
576	try:
577	from cStringIO import StringIO
578	except ImportError:
579	from StringIO import StringIO
580	try:
581	[type, data] = url.split(',', 1)
582	except ValueError:
583	raise IOError, ('data error', 'bad data URL')
584	if not type:
585	type = 'text/plain;charset=US-ASCII'
586	semi = type.rfind(';')
587	if semi >= 0 and '=' not in type[semi:]:
588	encoding = type[semi+1:]
589	type = type[:semi]
590	else:
591	encoding = ''
592	msg = []
593	msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
594	time.gmtime(time.time())))
595	msg.append('Content-type: %s' % type)
596	if encoding == 'base64':
597	data = base64.decodestring(data)
598	else:
599	data = unquote(data)
600	msg.append('Content-Length: %d' % len(data))
601	msg.append('')
602	msg.append(data)
603	msg = '\n'.join(msg)
604	f = StringIO(msg)
605	headers = mimetools.Message(f, 0)
606	#f.fileno = None # needed for addinfourl
607	return addinfourl(f, headers, url)
608
609
610	class FancyURLopener(URLopener):
611	"""Derived class with handlers for errors we can handle (perhaps)."""
612
613	def __init__(self, args, *kwargs):
614	URLopener.__init__(self, args, *kwargs)
615	self.auth_cache = {}
616	self.tries = 0
617	self.maxtries = 10
618
619	def http_error_default(self, url, fp, errcode, errmsg, headers):
620	"""Default error handling -- don't raise an exception."""
621	return addinfourl(fp, headers, "http:" + url, errcode)
622
623	def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
624	"""Error 302 -- relocated (temporarily)."""
625	self.tries += 1
626	if self.maxtries and self.tries >= self.maxtries:
627	if hasattr(self, "http_error_500"):
628	meth = self.http_error_500
629	else:
630	meth = self.http_error_default
631	self.tries = 0
632	return meth(url, fp, 500,
633	"Internal Server Error: Redirect Recursion", headers)
634	result = self.redirect_internal(url, fp, errcode, errmsg, headers,
635	data)
636	self.tries = 0
637	return result
638
639	def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
640	if 'location' in headers:
641	newurl = headers['location']
642	elif 'uri' in headers:
643	newurl = headers['uri']
644	else:
645	return
646	fp.close()
647	# In case the server sent a relative URL, join with original:
648	newurl = basejoin(self.type + ":" + url, newurl)
649
650	# For security reasons we do not allow redirects to protocols
651	# other than HTTP, HTTPS or FTP.
652	newurl_lower = newurl.lower()
653	if not (newurl_lower.startswith('http://') or
654	newurl_lower.startswith('https://') or
655	newurl_lower.startswith('ftp://')):
656	raise IOError('redirect error', errcode,
657	errmsg + " - Redirection to url '%s' is not allowed" %
658	newurl,
659	headers)
660
661	return self.open(newurl)
662
663	def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
664	"""Error 301 -- also relocated (permanently)."""
665	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
666
667	def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
668	"""Error 303 -- also relocated (essentially identical to 302)."""
669	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
670
671	def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
672	"""Error 307 -- relocated, but turn POST into error."""
673	if data is None:
674	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
675	else:
676	return self.http_error_default(url, fp, errcode, errmsg, headers)
677
678	def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
679	"""Error 401 -- authentication required.
680	This function supports Basic authentication only."""
681	if not 'www-authenticate' in headers:
682	URLopener.http_error_default(self, url, fp,
683	errcode, errmsg, headers)
684	stuff = headers['www-authenticate']
685	import re
686	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
687	if not match:
688	URLopener.http_error_default(self, url, fp,
689	errcode, errmsg, headers)
690	scheme, realm = match.groups()
691	if scheme.lower() != 'basic':
692	URLopener.http_error_default(self, url, fp,
693	errcode, errmsg, headers)
694	name = 'retry_' + self.type + '_basic_auth'
695	if data is None:
696	return getattr(self,name)(url, realm)
697	else:
698	return getattr(self,name)(url, realm, data)
699
700	def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
701	"""Error 407 -- proxy authentication required.
702	This function supports Basic authentication only."""
703	if not 'proxy-authenticate' in headers:
704	URLopener.http_error_default(self, url, fp,
705	errcode, errmsg, headers)
706	stuff = headers['proxy-authenticate']
707	import re
708	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
709	if not match:
710	URLopener.http_error_default(self, url, fp,
711	errcode, errmsg, headers)
712	scheme, realm = match.groups()
713	if scheme.lower() != 'basic':
714	URLopener.http_error_default(self, url, fp,
715	errcode, errmsg, headers)
716	name = 'retry_proxy_' + self.type + '_basic_auth'
717	if data is None:
718	return getattr(self,name)(url, realm)
719	else:
720	return getattr(self,name)(url, realm, data)
721
722	def retry_proxy_http_basic_auth(self, url, realm, data=None):
723	host, selector = splithost(url)
724	newurl = 'http://' + host + selector
725	proxy = self.proxies['http']
726	urltype, proxyhost = splittype(proxy)
727	proxyhost, proxyselector = splithost(proxyhost)
728	i = proxyhost.find('@') + 1
729	proxyhost = proxyhost[i:]
730	user, passwd = self.get_user_passwd(proxyhost, realm, i)
731	if not (user or passwd): return None
732	proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
733	self.proxies['http'] = 'http://' + proxyhost + proxyselector
734	if data is None:
735	return self.open(newurl)
736	else:
737	return self.open(newurl, data)
738
739	def retry_proxy_https_basic_auth(self, url, realm, data=None):
740	host, selector = splithost(url)
741	newurl = 'https://' + host + selector
742	proxy = self.proxies['https']
743	urltype, proxyhost = splittype(proxy)
744	proxyhost, proxyselector = splithost(proxyhost)
745	i = proxyhost.find('@') + 1
746	proxyhost = proxyhost[i:]
747	user, passwd = self.get_user_passwd(proxyhost, realm, i)
748	if not (user or passwd): return None
749	proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
750	self.proxies['https'] = 'https://' + proxyhost + proxyselector
751	if data is None:
752	return self.open(newurl)
753	else:
754	return self.open(newurl, data)
755
756	def retry_http_basic_auth(self, url, realm, data=None):
757	host, selector = splithost(url)
758	i = host.find('@') + 1
759	host = host[i:]
760	user, passwd = self.get_user_passwd(host, realm, i)
761	if not (user or passwd): return None
762	host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
763	newurl = 'http://' + host + selector
764	if data is None:
765	return self.open(newurl)
766	else:
767	return self.open(newurl, data)
768
769	def retry_https_basic_auth(self, url, realm, data=None):
770	host, selector = splithost(url)
771	i = host.find('@') + 1
772	host = host[i:]
773	user, passwd = self.get_user_passwd(host, realm, i)
774	if not (user or passwd): return None
775	host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
776	newurl = 'https://' + host + selector
777	if data is None:
778	return self.open(newurl)
779	else:
780	return self.open(newurl, data)
781
782	def get_user_passwd(self, host, realm, clear_cache=0):
783	key = realm + '@' + host.lower()
784	if key in self.auth_cache:
785	if clear_cache:
786	del self.auth_cache[key]
787	else:
788	return self.auth_cache[key]
789	user, passwd = self.prompt_user_passwd(host, realm)
790	if user or passwd: self.auth_cache[key] = (user, passwd)
791	return user, passwd
792
793	def prompt_user_passwd(self, host, realm):
794	"""Override this in a GUI environment!"""
795	import getpass
796	try:
797	user = raw_input("Enter username for %s at %s: " % (realm,
798	host))
799	passwd = getpass.getpass("Enter password for %s in %s at %s: " %
800	(user, realm, host))
801	return user, passwd
802	except KeyboardInterrupt:
803	print
804	return None, None
805
806
807	# Utility functions
808
809	_localhost = None
810	def localhost():
811	"""Return the IP address of the magic hostname 'localhost'."""
812	global _localhost
813	if _localhost is None:
814	_localhost = socket.gethostbyname('localhost')
815	return _localhost
816
817	_thishost = None
818	def thishost():
819	"""Return the IP address of the current host."""
820	global _thishost
821	if _thishost is None:
822	try:
823	_thishost = socket.gethostbyname(socket.gethostname())
824	except socket.gaierror:
825	_thishost = socket.gethostbyname('localhost')
826	return _thishost
827
828	_ftperrors = None
829	def ftperrors():
830	"""Return the set of errors raised by the FTP class."""
831	global _ftperrors
832	if _ftperrors is None:
833	import ftplib
834	_ftperrors = ftplib.all_errors
835	return _ftperrors
836
837	_noheaders = None
838	def noheaders():
839	"""Return an empty mimetools.Message object."""
840	global _noheaders
841	if _noheaders is None:
842	import mimetools
843	try:
844	from cStringIO import StringIO
845	except ImportError:
846	from StringIO import StringIO
847	_noheaders = mimetools.Message(StringIO(), 0)
848	_noheaders.fp.close() # Recycle file descriptor
849	return _noheaders
850
851
852	# Utility classes
853
854	class ftpwrapper:
855	"""Class used by open_ftp() for cache of open FTP connections."""
856
857	def __init__(self, user, passwd, host, port, dirs,
858	timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
859	persistent=True):
860	self.user = user
861	self.passwd = passwd
862	self.host = host
863	self.port = port
864	self.dirs = dirs
865	self.timeout = timeout
866	self.refcount = 0
867	self.keepalive = persistent
868	self.init()
869
870	def init(self):
871	import ftplib
872	self.busy = 0
873	self.ftp = ftplib.FTP()
874	self.ftp.connect(self.host, self.port, self.timeout)
875	self.ftp.login(self.user, self.passwd)
876	_target = '/'.join(self.dirs)
877	self.ftp.cwd(_target)
878
879	def retrfile(self, file, type):
880	import ftplib
881	self.endtransfer()
882	if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
883	else: cmd = 'TYPE ' + type; isdir = 0
884	try:
885	self.ftp.voidcmd(cmd)
886	except ftplib.all_errors:
887	self.init()
888	self.ftp.voidcmd(cmd)
889	conn = None
890	if file and not isdir:
891	# Try to retrieve as a file
892	try:
893	cmd = 'RETR ' + file
894	conn, retrlen = self.ftp.ntransfercmd(cmd)
895	except ftplib.error_perm, reason:
896	if str(reason)[:3] != '550':
897	raise IOError, ('ftp error', reason), sys.exc_info()[2]
898	if not conn:
899	# Set transfer mode to ASCII!
900	self.ftp.voidcmd('TYPE A')
901	# Try a directory listing. Verify that directory exists.
902	if file:
903	pwd = self.ftp.pwd()
904	try:
905	try:
906	self.ftp.cwd(file)
907	except ftplib.error_perm, reason:
908	raise IOError, ('ftp error', reason), sys.exc_info()[2]
909	finally:
910	self.ftp.cwd(pwd)
911	cmd = 'LIST ' + file
912	else:
913	cmd = 'LIST'
914	conn, retrlen = self.ftp.ntransfercmd(cmd)
915	self.busy = 1
916	ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
917	self.refcount += 1
918	conn.close()
919	# Pass back both a suitably decorated object and a retrieval length
920	return (ftpobj, retrlen)
921
922	def endtransfer(self):
923	if not self.busy:
924	return
925	self.busy = 0
926	try:
927	self.ftp.voidresp()
928	except ftperrors():
929	pass
930
931	def close(self):
932	self.keepalive = False
933	if self.refcount <= 0:
934	self.real_close()
935
936	def file_close(self):
937	self.endtransfer()
938	self.refcount -= 1
939	if self.refcount <= 0 and not self.keepalive:
940	self.real_close()
941
942	def real_close(self):
943	self.endtransfer()
944	try:
945	self.ftp.close()
946	except ftperrors():
947	pass
948
949	class addbase:
950	"""Base class for addinfo and addclosehook."""
951
952	def __init__(self, fp):
953	self.fp = fp
954	self.read = self.fp.read
955	self.readline = self.fp.readline
956	if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
957	if hasattr(self.fp, "fileno"):
958	self.fileno = self.fp.fileno
959	else:
960	self.fileno = lambda: None
961	if hasattr(self.fp, "__iter__"):
962	self.__iter__ = self.fp.__iter__
963	if hasattr(self.fp, "next"):
964	self.next = self.fp.next
965
966	def __repr__(self):
967	return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
968	id(self), self.fp)
969
970	def close(self):
971	self.read = None
972	self.readline = None
973	self.readlines = None
974	self.fileno = None
975	if self.fp: self.fp.close()
976	self.fp = None
977
978	class addclosehook(addbase):
979	"""Class to add a close hook to an open file."""
980
981	def __init__(self, fp, closehook, *hookargs):
982	addbase.__init__(self, fp)
983	self.closehook = closehook
984	self.hookargs = hookargs
985
986	def close(self):
987	if self.closehook:
988	self.closehook(*self.hookargs)
989	self.closehook = None
990	self.hookargs = None
991	addbase.close(self)
992
993	class addinfo(addbase):
994	"""class to add an info() method to an open file."""
995
996	def __init__(self, fp, headers):
997	addbase.__init__(self, fp)
998	self.headers = headers
999
1000	def info(self):
1001	return self.headers
1002
1003	class addinfourl(addbase):
1004	"""class to add info() and geturl() methods to an open file."""
1005
1006	def __init__(self, fp, headers, url, code=None):
1007	addbase.__init__(self, fp)
1008	self.headers = headers
1009	self.url = url
1010	self.code = code
1011
1012	def info(self):
1013	return self.headers
1014
1015	def getcode(self):
1016	return self.code
1017
1018	def geturl(self):
1019	return self.url
1020
1021
1022	# Utilities to parse URLs (most of these return None for missing parts):
1023	# unwrap('<URL:type://host/path>') --> 'type://host/path'
1024	# splittype('type:opaquestring') --> 'type', 'opaquestring'
1025	# splithost('//host[:port]/path') --> 'host[:port]', '/path'
1026	# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
1027	# splitpasswd('user:passwd') -> 'user', 'passwd'
1028	# splitport('host:port') --> 'host', 'port'
1029	# splitquery('/path?query') --> '/path', 'query'
1030	# splittag('/path#tag') --> '/path', 'tag'
1031	# splitattr('/path;attr1=value1;attr2=value2;...') ->
1032	# '/path', ['attr1=value1', 'attr2=value2', ...]
1033	# splitvalue('attr=value') --> 'attr', 'value'
1034	# unquote('abc%20def') -> 'abc def'
1035	# quote('abc def') -> 'abc%20def')
1036
1037	try:
1038	unicode
1039	except NameError:
1040	def _is_unicode(x):
1041	return 0
1042	else:
1043	def _is_unicode(x):
1044	return isinstance(x, unicode)
1045
1046	def toBytes(url):
1047	"""toBytes(u"URL") --> 'URL'."""
1048	# Most URL schemes require ASCII. If that changes, the conversion
1049	# can be relaxed
1050	if _is_unicode(url):
1051	try:
1052	url = url.encode("ASCII")
1053	except UnicodeError:
1054	raise UnicodeError("URL " + repr(url) +
1055	" contains non-ASCII characters")
1056	return url
1057
1058	def unwrap(url):
1059	"""unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1060	url = url.strip()
1061	if url[:1] == '<' and url[-1:] == '>':
1062	url = url[1:-1].strip()
1063	if url[:4] == 'URL:': url = url[4:].strip()
1064	return url
1065
1066	_typeprog = None
1067	def splittype(url):
1068	"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1069	global _typeprog
1070	if _typeprog is None:
1071	import re
1072	_typeprog = re.compile('^([^/:]+):')
1073
1074	match = _typeprog.match(url)
1075	if match:
1076	scheme = match.group(1)
1077	return scheme.lower(), url[len(scheme) + 1:]
1078	return None, url
1079
1080	_hostprog = None
1081	def splithost(url):
1082	"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1083	global _hostprog
1084	if _hostprog is None:
1085	import re
1086	_hostprog = re.compile('^//([^/?])(.)$')
1087
1088	match = _hostprog.match(url)
1089	if match:
1090	host_port = match.group(1)
1091	path = match.group(2)
1092	if path and not path.startswith('/'):
1093	path = '/' + path
1094	return host_port, path
1095	return None, url
1096
1097	_userprog = None
1098	def splituser(host):
1099	"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1100	global _userprog
1101	if _userprog is None:
1102	import re
1103	_userprog = re.compile('^(.)@(.)$')
1104
1105	match = _userprog.match(host)
1106	if match: return match.group(1, 2)
1107	return None, host
1108
1109	_passwdprog = None
1110	def splitpasswd(user):
1111	"""splitpasswd('user:passwd') -> 'user', 'passwd'."""
1112	global _passwdprog
1113	if _passwdprog is None:
1114	import re
1115	_passwdprog = re.compile('^([^:]):(.)$',re.S)
1116
1117	match = _passwdprog.match(user)
1118	if match: return match.group(1, 2)
1119	return user, None
1120
1121	# splittag('/path#tag') --> '/path', 'tag'
1122	_portprog = None
1123	def splitport(host):
1124	"""splitport('host:port') --> 'host', 'port'."""
1125	global _portprog
1126	if _portprog is None:
1127	import re
1128	_portprog = re.compile('^(.*):([0-9]+)$')
1129
1130	match = _portprog.match(host)
1131	if match: return match.group(1, 2)
1132	return host, None
1133
1134	_nportprog = None
1135	def splitnport(host, defport=-1):
1136	"""Split host and port, returning numeric port.
1137	Return given default port if no ':' found; defaults to -1.
1138	Return numerical port if a valid number are found after ':'.
1139	Return None if ':' but not a valid number."""
1140	global _nportprog
1141	if _nportprog is None:
1142	import re
1143	_nportprog = re.compile('^(.):(.)$')
1144
1145	match = _nportprog.match(host)
1146	if match:
1147	host, port = match.group(1, 2)
1148	try:
1149	if not port: raise ValueError, "no digits"
1150	nport = int(port)
1151	except ValueError:
1152	nport = None
1153	return host, nport
1154	return host, defport
1155
1156	_queryprog = None
1157	def splitquery(url):
1158	"""splitquery('/path?query') --> '/path', 'query'."""
1159	global _queryprog
1160	if _queryprog is None:
1161	import re
1162	_queryprog = re.compile('^(.)\?([^?])$')
1163
1164	match = _queryprog.match(url)
1165	if match: return match.group(1, 2)
1166	return url, None
1167
1168	_tagprog = None
1169	def splittag(url):
1170	"""splittag('/path#tag') --> '/path', 'tag'."""
1171	global _tagprog
1172	if _tagprog is None:
1173	import re
1174	_tagprog = re.compile('^(.)#([^#])$')
1175
1176	match = _tagprog.match(url)
1177	if match: return match.group(1, 2)
1178	return url, None
1179
1180	def splitattr(url):
1181	"""splitattr('/path;attr1=value1;attr2=value2;...') ->
1182	'/path', ['attr1=value1', 'attr2=value2', ...]."""
1183	words = url.split(';')
1184	return words[0], words[1:]
1185
1186	_valueprog = None
1187	def splitvalue(attr):
1188	"""splitvalue('attr=value') --> 'attr', 'value'."""
1189	global _valueprog
1190	if _valueprog is None:
1191	import re
1192	_valueprog = re.compile('^([^=])=(.)$')
1193
1194	match = _valueprog.match(attr)
1195	if match: return match.group(1, 2)
1196	return attr, None
1197
1198	# urlparse contains a duplicate of this method to avoid a circular import. If
1199	# you update this method, also update the copy in urlparse. This code
1200	# duplication does not exist in Python3.
1201
1202	_hexdig = '0123456789ABCDEFabcdef'
1203	_hextochr = dict((a + b, chr(int(a + b, 16)))
1204	for a in _hexdig for b in _hexdig)
1205	_asciire = re.compile('([\x00-\x7f]+)')
1206
1207	def unquote(s):
1208	"""unquote('abc%20def') -> 'abc def'."""
1209	if _is_unicode(s):
1210	if '%' not in s:
1211	return s
1212	bits = _asciire.split(s)
1213	res = [bits[0]]
1214	append = res.append
1215	for i in range(1, len(bits), 2):
1216	append(unquote(str(bits[i])).decode('latin1'))
1217	append(bits[i + 1])
1218	return ''.join(res)
1219
1220	bits = s.split('%')
1221	# fastpath
1222	if len(bits) == 1:
1223	return s
1224	res = [bits[0]]
1225	append = res.append
1226	for item in bits[1:]:
1227	try:
1228	append(_hextochr[item[:2]])
1229	append(item[2:])
1230	except KeyError:
1231	append('%')
1232	append(item)
1233	return ''.join(res)
1234
1235	def unquote_plus(s):
1236	"""unquote('%7e/abc+def') -> '~/abc def'"""
1237	s = s.replace('+', ' ')
1238	return unquote(s)
1239
1240	always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1241	'abcdefghijklmnopqrstuvwxyz'
1242	'0123456789' '_.-')
1243	_safe_map = {}
1244	for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
1245	_safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
1246	_safe_quoters = {}
1247
1248	def quote(s, safe='/'):
1249	"""quote('abc def') -> 'abc%20def'
1250
1251	Each part of a URL, e.g. the path info, the query, etc., has a
1252	different set of reserved characters that must be quoted.
1253
1254	RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1255	the following reserved characters.
1256
1257	reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \|
1258	"$" \| ","
1259
1260	Each of these characters is reserved in some component of a URL,
1261	but not necessarily in all of them.
1262
1263	By default, the quote function is intended for quoting the path
1264	section of a URL. Thus, it will not encode '/'. This character
1265	is reserved, but in typical usage the quote function is being
1266	called on a path where the existing slash characters are used as
1267	reserved characters.
1268	"""
1269	# fastpath
1270	if not s:
1271	if s is None:
1272	raise TypeError('None object cannot be quoted')
1273	return s
1274	cachekey = (safe, always_safe)
1275	try:
1276	(quoter, safe) = _safe_quoters[cachekey]
1277	except KeyError:
1278	safe_map = _safe_map.copy()
1279	safe_map.update([(c, c) for c in safe])
1280	quoter = safe_map.__getitem__
1281	safe = always_safe + safe
1282	_safe_quoters[cachekey] = (quoter, safe)
1283	if not s.rstrip(safe):
1284	return s
1285	return ''.join(map(quoter, s))
1286
1287	def quote_plus(s, safe=''):
1288	"""Quote the query fragment of a URL; replacing ' ' with '+'"""
1289	if ' ' in s:
1290	s = quote(s, safe + ' ')
1291	return s.replace(' ', '+')
1292	return quote(s, safe)
1293
1294	def urlencode(query, doseq=0):
1295	"""Encode a sequence of two-element tuples or dictionary into a URL query string.
1296
1297	If any values in the query arg are sequences and doseq is true, each
1298	sequence element is converted to a separate parameter.
1299
1300	If the query arg is a sequence of two-element tuples, the order of the
1301	parameters in the output will match the order of parameters in the
1302	input.
1303	"""
1304
1305	if hasattr(query,"items"):
1306	# mapping objects
1307	query = query.items()
1308	else:
1309	# it's a bother at times that strings and string-like objects are
1310	# sequences...
1311	try:
1312	# non-sequence items should not work with len()
1313	# non-empty strings will fail this
1314	if len(query) and not isinstance(query[0], tuple):
1315	raise TypeError
1316	# zero-length sequences of all types will get here and succeed,
1317	# but that's a minor nit - since the original implementation
1318	# allowed empty dicts that type of behavior probably should be
1319	# preserved for consistency
1320	except TypeError:
1321	ty,va,tb = sys.exc_info()
1322	raise TypeError, "not a valid non-string sequence or mapping object", tb
1323
1324	l = []
1325	if not doseq:
1326	# preserve old behavior
1327	for k, v in query:
1328	k = quote_plus(str(k))
1329	v = quote_plus(str(v))
1330	l.append(k + '=' + v)
1331	else:
1332	for k, v in query:
1333	k = quote_plus(str(k))
1334	if isinstance(v, str):
1335	v = quote_plus(v)
1336	l.append(k + '=' + v)
1337	elif _is_unicode(v):
1338	# is there a reasonable way to convert to ASCII?
1339	# encode generates a string, but "replace" or "ignore"
1340	# lose information and "strict" can raise UnicodeError
1341	v = quote_plus(v.encode("ASCII","replace"))
1342	l.append(k + '=' + v)
1343	else:
1344	try:
1345	# is this a sufficient test for sequence-ness?
1346	len(v)
1347	except TypeError:
1348	# not a sequence
1349	v = quote_plus(str(v))
1350	l.append(k + '=' + v)
1351	else:
1352	# loop over the sequence
1353	for elt in v:
1354	l.append(k + '=' + quote_plus(str(elt)))
1355	return '&'.join(l)
1356
1357	# Proxy handling
1358	def getproxies_environment():
1359	"""Return a dictionary of scheme -> proxy server URL mappings.
1360
1361	Scan the environment for variables named <scheme>_proxy;
1362	this seems to be the standard convention. If you need a
1363	different way, you can pass a proxies dictionary to the
1364	[Fancy]URLopener constructor.
1365
1366	"""
1367	proxies = {}
1368	for name, value in os.environ.items():
1369	name = name.lower()
1370	if value and name[-6:] == '_proxy':
1371	proxies[name[:-6]] = value
1372	return proxies
1373
1374	def proxy_bypass_environment(host):
1375	"""Test if proxies should not be used for a particular host.
1376
1377	Checks the environment for a variable named no_proxy, which should
1378	be a list of DNS suffixes separated by commas, or '*' for all hosts.
1379	"""
1380	no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1381	# '*' is special case for always bypass
1382	if no_proxy == '*':
1383	return 1
1384	# strip port off host
1385	hostonly, port = splitport(host)
1386	# check if the host ends with any of the DNS suffixes
1387	no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
1388	for name in no_proxy_list:
1389	if name and (hostonly.endswith(name) or host.endswith(name)):
1390	return 1
1391	# otherwise, don't bypass
1392	return 0
1393
1394
1395	if sys.platform == 'darwin':
1396	from _scproxy import _get_proxy_settings, _get_proxies
1397
1398	def proxy_bypass_macosx_sysconf(host):
1399	"""
1400	Return True iff this host shouldn't be accessed using a proxy
1401
1402	This function uses the MacOSX framework SystemConfiguration
1403	to fetch the proxy information.
1404	"""
1405	import re
1406	import socket
1407	from fnmatch import fnmatch
1408
1409	hostonly, port = splitport(host)
1410
1411	def ip2num(ipAddr):
1412	parts = ipAddr.split('.')
1413	parts = map(int, parts)
1414	if len(parts) != 4:
1415	parts = (parts + [0, 0, 0, 0])[:4]
1416	return (parts[0] << 24) \| (parts[1] << 16) \| (parts[2] << 8) \| parts[3]
1417
1418	proxy_settings = _get_proxy_settings()
1419
1420	# Check for simple host names:
1421	if '.' not in host:
1422	if proxy_settings['exclude_simple']:
1423	return True
1424
1425	hostIP = None
1426
1427	for value in proxy_settings.get('exceptions', ()):
1428	# Items in the list are strings like these: *.local, 169.254/16
1429	if not value: continue
1430
1431	m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1432	if m is not None:
1433	if hostIP is None:
1434	try:
1435	hostIP = socket.gethostbyname(hostonly)
1436	hostIP = ip2num(hostIP)
1437	except socket.error:
1438	continue
1439
1440	base = ip2num(m.group(1))
1441	mask = m.group(2)
1442	if mask is None:
1443	mask = 8 * (m.group(1).count('.') + 1)
1444
1445	else:
1446	mask = int(mask[1:])
1447	mask = 32 - mask
1448
1449	if (hostIP >> mask) == (base >> mask):
1450	return True
1451
1452	elif fnmatch(host, value):
1453	return True
1454
1455	return False
1456
1457	def getproxies_macosx_sysconf():
1458	"""Return a dictionary of scheme -> proxy server URL mappings.
1459
1460	This function uses the MacOSX framework SystemConfiguration
1461	to fetch the proxy information.
1462	"""
1463	return _get_proxies()
1464
1465	def proxy_bypass(host):
1466	if getproxies_environment():
1467	return proxy_bypass_environment(host)
1468	else:
1469	return proxy_bypass_macosx_sysconf(host)
1470
1471	def getproxies():
1472	return getproxies_environment() or getproxies_macosx_sysconf()
1473
1474	elif os.name == 'nt':
1475	def getproxies_registry():
1476	"""Return a dictionary of scheme -> proxy server URL mappings.
1477
1478	Win32 uses the registry to store proxies.
1479
1480	"""
1481	proxies = {}
1482	try:
1483	import _winreg
1484	except ImportError:
1485	# Std module, so should be around - but you never know!
1486	return proxies
1487	try:
1488	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1489	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1490	proxyEnable = _winreg.QueryValueEx(internetSettings,
1491	'ProxyEnable')[0]
1492	if proxyEnable:
1493	# Returned as Unicode but problems if not converted to ASCII
1494	proxyServer = str(_winreg.QueryValueEx(internetSettings,
1495	'ProxyServer')[0])
1496	if '=' in proxyServer:
1497	# Per-protocol settings
1498	for p in proxyServer.split(';'):
1499	protocol, address = p.split('=', 1)
1500	# See if address has a type:// prefix
1501	import re
1502	if not re.match('^([^/:]+)://', address):
1503	address = '%s://%s' % (protocol, address)
1504	proxies[protocol] = address
1505	else:
1506	# Use one setting for all protocols
1507	if proxyServer[:5] == 'http:':
1508	proxies['http'] = proxyServer
1509	else:
1510	proxies['http'] = 'http://%s' % proxyServer
1511	proxies['https'] = 'https://%s' % proxyServer
1512	proxies['ftp'] = 'ftp://%s' % proxyServer
1513	internetSettings.Close()
1514	except (WindowsError, ValueError, TypeError):
1515	# Either registry key not found etc, or the value in an
1516	# unexpected format.
1517	# proxies already set up to be empty so nothing to do
1518	pass
1519	return proxies
1520
1521	def getproxies():
1522	"""Return a dictionary of scheme -> proxy server URL mappings.
1523
1524	Returns settings gathered from the environment, if specified,
1525	or the registry.
1526
1527	"""
1528	return getproxies_environment() or getproxies_registry()
1529
1530	def proxy_bypass_registry(host):
1531	try:
1532	import _winreg
1533	import re
1534	except ImportError:
1535	# Std modules, so should be around - but you never know!
1536	return 0
1537	try:
1538	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1539	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1540	proxyEnable = _winreg.QueryValueEx(internetSettings,
1541	'ProxyEnable')[0]
1542	proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1543	'ProxyOverride')[0])
1544	# ^^^^ Returned as Unicode but problems if not converted to ASCII
1545	except WindowsError:
1546	return 0
1547	if not proxyEnable or not proxyOverride:
1548	return 0
1549	# try to make a host list from name and IP address.
1550	rawHost, port = splitport(host)
1551	host = [rawHost]
1552	try:
1553	addr = socket.gethostbyname(rawHost)
1554	if addr != rawHost:
1555	host.append(addr)
1556	except socket.error:
1557	pass
1558	try:
1559	fqdn = socket.getfqdn(rawHost)
1560	if fqdn != rawHost:
1561	host.append(fqdn)
1562	except socket.error:
1563	pass
1564	# make a check value list from the registry entry: replace the
1565	# '<local>' string by the localhost entry and the corresponding
1566	# canonical entry.
1567	proxyOverride = proxyOverride.split(';')
1568	# now check if we match one of the registry values.
1569	for test in proxyOverride:
1570	if test == '<local>':
1571	if '.' not in rawHost:
1572	return 1
1573	test = test.replace(".", r"\.") # mask dots
1574	test = test.replace("", r".") # change glob sequence
1575	test = test.replace("?", r".") # change glob char
1576	for val in host:
1577	# print "%s <--> %s" %( test, val )
1578	if re.match(test, val, re.I):
1579	return 1
1580	return 0
1581
1582	def proxy_bypass(host):
1583	"""Return a dictionary of scheme -> proxy server URL mappings.
1584
1585	Returns settings gathered from the environment, if specified,
1586	or the registry.
1587
1588	"""
1589	if getproxies_environment():
1590	return proxy_bypass_environment(host)
1591	else:
1592	return proxy_bypass_registry(host)
1593
1594	else:
1595	# By default use environment variables
1596	getproxies = getproxies_environment
1597	proxy_bypass = proxy_bypass_environment
1598
1599	# Test and time quote() and unquote()
1600	def test1():
1601	s = ''
1602	for i in range(256): s = s + chr(i)
1603	s = s*4
1604	t0 = time.time()
1605	qs = quote(s)
1606	uqs = unquote(qs)
1607	t1 = time.time()
1608	if uqs != s:
1609	print 'Wrong!'
1610	print repr(s)
1611	print repr(qs)
1612	print repr(uqs)
1613	print round(t1 - t0, 3), 'sec'
1614
1615
1616	def reporthook(blocknum, blocksize, totalsize):
1617	# Report during remote transfers
1618	print "Block number: %d, Block size: %d, Total size: %d" % (
1619	blocknum, blocksize, totalsize)

Note: See TracBrowser for help on using the repository browser.

Download in other formats: