Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

urllib.py@ 388

Last change on this file since 388 was 2, checked in by Yuri Dario, 15 years ago
Initial import for vendor code.
Property svn:eol-style set to `native`
File size: 56.5 KB

Line
1	"""Open an arbitrary URL.
2
3	See the following document for more info on URLs:
4	"Names and Addresses, URIs, URLs, URNs, URCs", at
5	http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7	See also the HTTP spec (from which the error codes are derived):
8	"HTTP - Hypertext Transfer Protocol", at
9	http://www.w3.org/pub/WWW/Protocols/
10
11	Related standards and specs:
12	- RFC1808: the "relative URL" spec. (authoritative status)
13	- RFC1738 - the "URL standard". (authoritative status)
14	- RFC1630 - the "URI spec". (informational status)
15
16	The object returned by URLopener().open(file) will differ per
17	protocol. All you know is that is has methods read(), readline(),
18	readlines(), fileno(), close() and info(). The read*(), fileno()
19	and close() methods work like those of open files.
20	The info() method returns a mimetools.Message object which can be
21	used to query various info about the object, if available.
22	(mimetools.Message objects are queried with the getheader() method.)
23	"""
24
25	import string
26	import socket
27	import os
28	import time
29	import sys
30	from urlparse import urljoin as basejoin
31
32	__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33	"urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
34	"urlencode", "url2pathname", "pathname2url", "splittag",
35	"localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36	"splittype", "splithost", "splituser", "splitpasswd", "splitport",
37	"splitnport", "splitquery", "splitattr", "splitvalue",
38	"getproxies"]
39
40	__version__ = '1.17' # XXX This version is not always updated :-(
41
42	MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
43
44	# Helper for non-unix systems
45	if os.name == 'mac':
46	from macurl2path import url2pathname, pathname2url
47	elif os.name == 'nt':
48	from nturl2path import url2pathname, pathname2url
49	elif os.name == 'riscos':
50	from rourl2path import url2pathname, pathname2url
51	else:
52	def url2pathname(pathname):
53	"""OS-specific conversion from a relative URL of the 'file' scheme
54	to a file system path; not recommended for general use."""
55	return unquote(pathname)
56
57	def pathname2url(pathname):
58	"""OS-specific conversion from a file system path to a relative URL
59	of the 'file' scheme; not recommended for general use."""
60	return quote(pathname)
61
62	# This really consists of two pieces:
63	# (1) a class which handles opening of all sorts of URLs
64	# (plus assorted utilities etc.)
65	# (2) a set of functions for parsing URLs
66	# XXX Should these be separated out into different modules?
67
68
69	# Shortcut for basic usage
70	_urlopener = None
71	def urlopen(url, data=None, proxies=None):
72	"""Create a file-like object for the specified URL to read from."""
73	from warnings import warnpy3k
74	warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
75	"favor of urllib2.urlopen()", stacklevel=2)
76
77	global _urlopener
78	if proxies is not None:
79	opener = FancyURLopener(proxies=proxies)
80	elif not _urlopener:
81	opener = FancyURLopener()
82	_urlopener = opener
83	else:
84	opener = _urlopener
85	if data is None:
86	return opener.open(url)
87	else:
88	return opener.open(url, data)
89	def urlretrieve(url, filename=None, reporthook=None, data=None):
90	global _urlopener
91	if not _urlopener:
92	_urlopener = FancyURLopener()
93	return _urlopener.retrieve(url, filename, reporthook, data)
94	def urlcleanup():
95	if _urlopener:
96	_urlopener.cleanup()
97
98	# check for SSL
99	try:
100	import ssl
101	except:
102	_have_ssl = False
103	else:
104	_have_ssl = True
105
106	# exception raised when downloaded size does not match content-length
107	class ContentTooShortError(IOError):
108	def __init__(self, message, content):
109	IOError.__init__(self, message)
110	self.content = content
111
112	ftpcache = {}
113	class URLopener:
114	"""Class to open URLs.
115	This is a class rather than just a subroutine because we may need
116	more than one set of global protocol-specific options.
117	Note -- this is a base class for those who don't want the
118	automatic handling of errors type 302 (relocated) and 401
119	(authorization needed)."""
120
121	__tempfiles = None
122
123	version = "Python-urllib/%s" % __version__
124
125	# Constructor
126	def __init__(self, proxies=None, **x509):
127	if proxies is None:
128	proxies = getproxies()
129	assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
130	self.proxies = proxies
131	self.key_file = x509.get('key_file')
132	self.cert_file = x509.get('cert_file')
133	self.addheaders = [('User-Agent', self.version)]
134	self.__tempfiles = []
135	self.__unlink = os.unlink # See cleanup()
136	self.tempcache = None
137	# Undocumented feature: if you assign {} to tempcache,
138	# it is used to cache files retrieved with
139	# self.retrieve(). This is not enabled by default
140	# since it does not work for changing documents (and I
141	# haven't got the logic to check expiration headers
142	# yet).
143	self.ftpcache = ftpcache
144	# Undocumented feature: you can use a different
145	# ftp cache by assigning to the .ftpcache member;
146	# in case you want logically independent URL openers
147	# XXX This is not threadsafe. Bah.
148
149	def __del__(self):
150	self.close()
151
152	def close(self):
153	self.cleanup()
154
155	def cleanup(self):
156	# This code sometimes runs when the rest of this module
157	# has already been deleted, so it can't use any globals
158	# or import anything.
159	if self.__tempfiles:
160	for file in self.__tempfiles:
161	try:
162	self.__unlink(file)
163	except OSError:
164	pass
165	del self.__tempfiles[:]
166	if self.tempcache:
167	self.tempcache.clear()
168
169	def addheader(self, *args):
170	"""Add a header to be used by the HTTP interface only
171	e.g. u.addheader('Accept', 'sound/basic')"""
172	self.addheaders.append(args)
173
174	# External interface
175	def open(self, fullurl, data=None):
176	"""Use URLopener().open(file) instead of open(file, 'r')."""
177	fullurl = unwrap(toBytes(fullurl))
178	# percent encode url. fixing lame server errors like space within url
179	# parts
180	fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]\|")
181	if self.tempcache and fullurl in self.tempcache:
182	filename, headers = self.tempcache[fullurl]
183	fp = open(filename, 'rb')
184	return addinfourl(fp, headers, fullurl)
185	urltype, url = splittype(fullurl)
186	if not urltype:
187	urltype = 'file'
188	if urltype in self.proxies:
189	proxy = self.proxies[urltype]
190	urltype, proxyhost = splittype(proxy)
191	host, selector = splithost(proxyhost)
192	url = (host, fullurl) # Signal special case to open_*()
193	else:
194	proxy = None
195	name = 'open_' + urltype
196	self.type = urltype
197	name = name.replace('-', '_')
198	if not hasattr(self, name):
199	if proxy:
200	return self.open_unknown_proxy(proxy, fullurl, data)
201	else:
202	return self.open_unknown(fullurl, data)
203	try:
204	if data is None:
205	return getattr(self, name)(url)
206	else:
207	return getattr(self, name)(url, data)
208	except socket.error, msg:
209	raise IOError, ('socket error', msg), sys.exc_info()[2]
210
211	def open_unknown(self, fullurl, data=None):
212	"""Overridable interface to open unknown URL type."""
213	type, url = splittype(fullurl)
214	raise IOError, ('url error', 'unknown url type', type)
215
216	def open_unknown_proxy(self, proxy, fullurl, data=None):
217	"""Overridable interface to open unknown URL type."""
218	type, url = splittype(fullurl)
219	raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
220
221	# External interface
222	def retrieve(self, url, filename=None, reporthook=None, data=None):
223	"""retrieve(url) returns (filename, headers) for a local object
224	or (tempfilename, headers) for a remote object."""
225	url = unwrap(toBytes(url))
226	if self.tempcache and url in self.tempcache:
227	return self.tempcache[url]
228	type, url1 = splittype(url)
229	if filename is None and (not type or type == 'file'):
230	try:
231	fp = self.open_local_file(url1)
232	hdrs = fp.info()
233	del fp
234	return url2pathname(splithost(url1)[1]), hdrs
235	except IOError, msg:
236	pass
237	fp = self.open(url, data)
238	try:
239	headers = fp.info()
240	if filename:
241	tfp = open(filename, 'wb')
242	else:
243	import tempfile
244	garbage, path = splittype(url)
245	garbage, path = splithost(path or "")
246	path, garbage = splitquery(path or "")
247	path, garbage = splitattr(path or "")
248	suffix = os.path.splitext(path)[1]
249	(fd, filename) = tempfile.mkstemp(suffix)
250	self.__tempfiles.append(filename)
251	tfp = os.fdopen(fd, 'wb')
252	try:
253	result = filename, headers
254	if self.tempcache is not None:
255	self.tempcache[url] = result
256	bs = 1024*8
257	size = -1
258	read = 0
259	blocknum = 0
260	if reporthook:
261	if "content-length" in headers:
262	size = int(headers["Content-Length"])
263	reporthook(blocknum, bs, size)
264	while 1:
265	block = fp.read(bs)
266	if block == "":
267	break
268	read += len(block)
269	tfp.write(block)
270	blocknum += 1
271	if reporthook:
272	reporthook(blocknum, bs, size)
273	finally:
274	tfp.close()
275	finally:
276	fp.close()
277	del fp
278	del tfp
279
280	# raise exception if actual size does not match content-length header
281	if size >= 0 and read < size:
282	raise ContentTooShortError("retrieval incomplete: got only %i out "
283	"of %i bytes" % (read, size), result)
284
285	return result
286
287	# Each method named open_<type> knows how to open that type of URL
288
289	def open_http(self, url, data=None):
290	"""Use HTTP protocol."""
291	import httplib
292	user_passwd = None
293	proxy_passwd= None
294	if isinstance(url, str):
295	host, selector = splithost(url)
296	if host:
297	user_passwd, host = splituser(host)
298	host = unquote(host)
299	realhost = host
300	else:
301	host, selector = url
302	# check whether the proxy contains authorization information
303	proxy_passwd, host = splituser(host)
304	# now we proceed with the url we want to obtain
305	urltype, rest = splittype(selector)
306	url = rest
307	user_passwd = None
308	if urltype.lower() != 'http':
309	realhost = None
310	else:
311	realhost, rest = splithost(rest)
312	if realhost:
313	user_passwd, realhost = splituser(realhost)
314	if user_passwd:
315	selector = "%s://%s%s" % (urltype, realhost, rest)
316	if proxy_bypass(realhost):
317	host = realhost
318
319	#print "proxy via http:", host, selector
320	if not host: raise IOError, ('http error', 'no host given')
321
322	if proxy_passwd:
323	import base64
324	proxy_auth = base64.b64encode(proxy_passwd).strip()
325	else:
326	proxy_auth = None
327
328	if user_passwd:
329	import base64
330	auth = base64.b64encode(user_passwd).strip()
331	else:
332	auth = None
333	h = httplib.HTTP(host)
334	if data is not None:
335	h.putrequest('POST', selector)
336	h.putheader('Content-Type', 'application/x-www-form-urlencoded')
337	h.putheader('Content-Length', '%d' % len(data))
338	else:
339	h.putrequest('GET', selector)
340	if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
341	if auth: h.putheader('Authorization', 'Basic %s' % auth)
342	if realhost: h.putheader('Host', realhost)
343	for args in self.addheaders: h.putheader(*args)
344	h.endheaders()
345	if data is not None:
346	h.send(data)
347	errcode, errmsg, headers = h.getreply()
348	fp = h.getfile()
349	if errcode == -1:
350	if fp: fp.close()
351	# something went wrong with the HTTP status line
352	raise IOError, ('http protocol error', 0,
353	'got a bad status line', None)
354	# According to RFC 2616, "2xx" code indicates that the client's
355	# request was successfully received, understood, and accepted.
356	if (200 <= errcode < 300):
357	return addinfourl(fp, headers, "http:" + url, errcode)
358	else:
359	if data is None:
360	return self.http_error(url, fp, errcode, errmsg, headers)
361	else:
362	return self.http_error(url, fp, errcode, errmsg, headers, data)
363
364	def http_error(self, url, fp, errcode, errmsg, headers, data=None):
365	"""Handle http errors.
366	Derived class can override this, or provide specific handlers
367	named http_error_DDD where DDD is the 3-digit error code."""
368	# First check if there's a specific handler for this error
369	name = 'http_error_%d' % errcode
370	if hasattr(self, name):
371	method = getattr(self, name)
372	if data is None:
373	result = method(url, fp, errcode, errmsg, headers)
374	else:
375	result = method(url, fp, errcode, errmsg, headers, data)
376	if result: return result
377	return self.http_error_default(url, fp, errcode, errmsg, headers)
378
379	def http_error_default(self, url, fp, errcode, errmsg, headers):
380	"""Default error handler: close the connection and raise IOError."""
381	void = fp.read()
382	fp.close()
383	raise IOError, ('http error', errcode, errmsg, headers)
384
385	if _have_ssl:
386	def open_https(self, url, data=None):
387	"""Use HTTPS protocol."""
388
389	import httplib
390	user_passwd = None
391	proxy_passwd = None
392	if isinstance(url, str):
393	host, selector = splithost(url)
394	if host:
395	user_passwd, host = splituser(host)
396	host = unquote(host)
397	realhost = host
398	else:
399	host, selector = url
400	# here, we determine, whether the proxy contains authorization information
401	proxy_passwd, host = splituser(host)
402	urltype, rest = splittype(selector)
403	url = rest
404	user_passwd = None
405	if urltype.lower() != 'https':
406	realhost = None
407	else:
408	realhost, rest = splithost(rest)
409	if realhost:
410	user_passwd, realhost = splituser(realhost)
411	if user_passwd:
412	selector = "%s://%s%s" % (urltype, realhost, rest)
413	#print "proxy via https:", host, selector
414	if not host: raise IOError, ('https error', 'no host given')
415	if proxy_passwd:
416	import base64
417	proxy_auth = base64.b64encode(proxy_passwd).strip()
418	else:
419	proxy_auth = None
420	if user_passwd:
421	import base64
422	auth = base64.b64encode(user_passwd).strip()
423	else:
424	auth = None
425	h = httplib.HTTPS(host, 0,
426	key_file=self.key_file,
427	cert_file=self.cert_file)
428	if data is not None:
429	h.putrequest('POST', selector)
430	h.putheader('Content-Type',
431	'application/x-www-form-urlencoded')
432	h.putheader('Content-Length', '%d' % len(data))
433	else:
434	h.putrequest('GET', selector)
435	if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
436	if auth: h.putheader('Authorization', 'Basic %s' % auth)
437	if realhost: h.putheader('Host', realhost)
438	for args in self.addheaders: h.putheader(*args)
439	h.endheaders()
440	if data is not None:
441	h.send(data)
442	errcode, errmsg, headers = h.getreply()
443	fp = h.getfile()
444	if errcode == -1:
445	if fp: fp.close()
446	# something went wrong with the HTTP status line
447	raise IOError, ('http protocol error', 0,
448	'got a bad status line', None)
449	# According to RFC 2616, "2xx" code indicates that the client's
450	# request was successfully received, understood, and accepted.
451	if (200 <= errcode < 300):
452	return addinfourl(fp, headers, "https:" + url, errcode)
453	else:
454	if data is None:
455	return self.http_error(url, fp, errcode, errmsg, headers)
456	else:
457	return self.http_error(url, fp, errcode, errmsg, headers,
458	data)
459
460	def open_file(self, url):
461	"""Use local file or FTP depending on form of URL."""
462	if not isinstance(url, str):
463	raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
464	if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
465	return self.open_ftp(url)
466	else:
467	return self.open_local_file(url)
468
469	def open_local_file(self, url):
470	"""Use local file."""
471	import mimetypes, mimetools, email.utils
472	try:
473	from cStringIO import StringIO
474	except ImportError:
475	from StringIO import StringIO
476	host, file = splithost(url)
477	localname = url2pathname(file)
478	try:
479	stats = os.stat(localname)
480	except OSError, e:
481	raise IOError(e.errno, e.strerror, e.filename)
482	size = stats.st_size
483	modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
484	mtype = mimetypes.guess_type(url)[0]
485	headers = mimetools.Message(StringIO(
486	'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
487	(mtype or 'text/plain', size, modified)))
488	if not host:
489	urlfile = file
490	if file[:1] == '/':
491	urlfile = 'file://' + file
492	return addinfourl(open(localname, 'rb'),
493	headers, urlfile)
494	host, port = splitport(host)
495	if not port \
496	and socket.gethostbyname(host) in (localhost(), thishost()):
497	urlfile = file
498	if file[:1] == '/':
499	urlfile = 'file://' + file
500	return addinfourl(open(localname, 'rb'),
501	headers, urlfile)
502	raise IOError, ('local file error', 'not on local host')
503
504	def open_ftp(self, url):
505	"""Use FTP protocol."""
506	if not isinstance(url, str):
507	raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
508	import mimetypes, mimetools
509	try:
510	from cStringIO import StringIO
511	except ImportError:
512	from StringIO import StringIO
513	host, path = splithost(url)
514	if not host: raise IOError, ('ftp error', 'no host given')
515	host, port = splitport(host)
516	user, host = splituser(host)
517	if user: user, passwd = splitpasswd(user)
518	else: passwd = None
519	host = unquote(host)
520	user = unquote(user or '')
521	passwd = unquote(passwd or '')
522	host = socket.gethostbyname(host)
523	if not port:
524	import ftplib
525	port = ftplib.FTP_PORT
526	else:
527	port = int(port)
528	path, attrs = splitattr(path)
529	path = unquote(path)
530	dirs = path.split('/')
531	dirs, file = dirs[:-1], dirs[-1]
532	if dirs and not dirs[0]: dirs = dirs[1:]
533	if dirs and not dirs[0]: dirs[0] = '/'
534	key = user, host, port, '/'.join(dirs)
535	# XXX thread unsafe!
536	if len(self.ftpcache) > MAXFTPCACHE:
537	# Prune the cache, rather arbitrarily
538	for k in self.ftpcache.keys():
539	if k != key:
540	v = self.ftpcache[k]
541	del self.ftpcache[k]
542	v.close()
543	try:
544	if not key in self.ftpcache:
545	self.ftpcache[key] = \
546	ftpwrapper(user, passwd, host, port, dirs)
547	if not file: type = 'D'
548	else: type = 'I'
549	for attr in attrs:
550	attr, value = splitvalue(attr)
551	if attr.lower() == 'type' and \
552	value in ('a', 'A', 'i', 'I', 'd', 'D'):
553	type = value.upper()
554	(fp, retrlen) = self.ftpcache[key].retrfile(file, type)
555	mtype = mimetypes.guess_type("ftp:" + url)[0]
556	headers = ""
557	if mtype:
558	headers += "Content-Type: %s\n" % mtype
559	if retrlen is not None and retrlen >= 0:
560	headers += "Content-Length: %d\n" % retrlen
561	headers = mimetools.Message(StringIO(headers))
562	return addinfourl(fp, headers, "ftp:" + url)
563	except ftperrors(), msg:
564	raise IOError, ('ftp error', msg), sys.exc_info()[2]
565
566	def open_data(self, url, data=None):
567	"""Use "data" URL."""
568	if not isinstance(url, str):
569	raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
570	# ignore POSTed data
571	#
572	# syntax of data URLs:
573	# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
574	# mediatype := [ type "/" subtype ] *( ";" parameter )
575	# data := *urlchar
576	# parameter := attribute "=" value
577	import mimetools
578	try:
579	from cStringIO import StringIO
580	except ImportError:
581	from StringIO import StringIO
582	try:
583	[type, data] = url.split(',', 1)
584	except ValueError:
585	raise IOError, ('data error', 'bad data URL')
586	if not type:
587	type = 'text/plain;charset=US-ASCII'
588	semi = type.rfind(';')
589	if semi >= 0 and '=' not in type[semi:]:
590	encoding = type[semi+1:]
591	type = type[:semi]
592	else:
593	encoding = ''
594	msg = []
595	msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
596	time.gmtime(time.time())))
597	msg.append('Content-type: %s' % type)
598	if encoding == 'base64':
599	import base64
600	data = base64.decodestring(data)
601	else:
602	data = unquote(data)
603	msg.append('Content-Length: %d' % len(data))
604	msg.append('')
605	msg.append(data)
606	msg = '\n'.join(msg)
607	f = StringIO(msg)
608	headers = mimetools.Message(f, 0)
609	#f.fileno = None # needed for addinfourl
610	return addinfourl(f, headers, url)
611
612
613	class FancyURLopener(URLopener):
614	"""Derived class with handlers for errors we can handle (perhaps)."""
615
616	def __init__(self, args, *kwargs):
617	URLopener.__init__(self, args, *kwargs)
618	self.auth_cache = {}
619	self.tries = 0
620	self.maxtries = 10
621
622	def http_error_default(self, url, fp, errcode, errmsg, headers):
623	"""Default error handling -- don't raise an exception."""
624	return addinfourl(fp, headers, "http:" + url, errcode)
625
626	def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
627	"""Error 302 -- relocated (temporarily)."""
628	self.tries += 1
629	if self.maxtries and self.tries >= self.maxtries:
630	if hasattr(self, "http_error_500"):
631	meth = self.http_error_500
632	else:
633	meth = self.http_error_default
634	self.tries = 0
635	return meth(url, fp, 500,
636	"Internal Server Error: Redirect Recursion", headers)
637	result = self.redirect_internal(url, fp, errcode, errmsg, headers,
638	data)
639	self.tries = 0
640	return result
641
642	def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
643	if 'location' in headers:
644	newurl = headers['location']
645	elif 'uri' in headers:
646	newurl = headers['uri']
647	else:
648	return
649	void = fp.read()
650	fp.close()
651	# In case the server sent a relative URL, join with original:
652	newurl = basejoin(self.type + ":" + url, newurl)
653	return self.open(newurl)
654
655	def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
656	"""Error 301 -- also relocated (permanently)."""
657	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
658
659	def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
660	"""Error 303 -- also relocated (essentially identical to 302)."""
661	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
662
663	def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
664	"""Error 307 -- relocated, but turn POST into error."""
665	if data is None:
666	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
667	else:
668	return self.http_error_default(url, fp, errcode, errmsg, headers)
669
670	def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
671	"""Error 401 -- authentication required.
672	This function supports Basic authentication only."""
673	if not 'www-authenticate' in headers:
674	URLopener.http_error_default(self, url, fp,
675	errcode, errmsg, headers)
676	stuff = headers['www-authenticate']
677	import re
678	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
679	if not match:
680	URLopener.http_error_default(self, url, fp,
681	errcode, errmsg, headers)
682	scheme, realm = match.groups()
683	if scheme.lower() != 'basic':
684	URLopener.http_error_default(self, url, fp,
685	errcode, errmsg, headers)
686	name = 'retry_' + self.type + '_basic_auth'
687	if data is None:
688	return getattr(self,name)(url, realm)
689	else:
690	return getattr(self,name)(url, realm, data)
691
692	def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
693	"""Error 407 -- proxy authentication required.
694	This function supports Basic authentication only."""
695	if not 'proxy-authenticate' in headers:
696	URLopener.http_error_default(self, url, fp,
697	errcode, errmsg, headers)
698	stuff = headers['proxy-authenticate']
699	import re
700	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
701	if not match:
702	URLopener.http_error_default(self, url, fp,
703	errcode, errmsg, headers)
704	scheme, realm = match.groups()
705	if scheme.lower() != 'basic':
706	URLopener.http_error_default(self, url, fp,
707	errcode, errmsg, headers)
708	name = 'retry_proxy_' + self.type + '_basic_auth'
709	if data is None:
710	return getattr(self,name)(url, realm)
711	else:
712	return getattr(self,name)(url, realm, data)
713
714	def retry_proxy_http_basic_auth(self, url, realm, data=None):
715	host, selector = splithost(url)
716	newurl = 'http://' + host + selector
717	proxy = self.proxies['http']
718	urltype, proxyhost = splittype(proxy)
719	proxyhost, proxyselector = splithost(proxyhost)
720	i = proxyhost.find('@') + 1
721	proxyhost = proxyhost[i:]
722	user, passwd = self.get_user_passwd(proxyhost, realm, i)
723	if not (user or passwd): return None
724	proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
725	self.proxies['http'] = 'http://' + proxyhost + proxyselector
726	if data is None:
727	return self.open(newurl)
728	else:
729	return self.open(newurl, data)
730
731	def retry_proxy_https_basic_auth(self, url, realm, data=None):
732	host, selector = splithost(url)
733	newurl = 'https://' + host + selector
734	proxy = self.proxies['https']
735	urltype, proxyhost = splittype(proxy)
736	proxyhost, proxyselector = splithost(proxyhost)
737	i = proxyhost.find('@') + 1
738	proxyhost = proxyhost[i:]
739	user, passwd = self.get_user_passwd(proxyhost, realm, i)
740	if not (user or passwd): return None
741	proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
742	self.proxies['https'] = 'https://' + proxyhost + proxyselector
743	if data is None:
744	return self.open(newurl)
745	else:
746	return self.open(newurl, data)
747
748	def retry_http_basic_auth(self, url, realm, data=None):
749	host, selector = splithost(url)
750	i = host.find('@') + 1
751	host = host[i:]
752	user, passwd = self.get_user_passwd(host, realm, i)
753	if not (user or passwd): return None
754	host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
755	newurl = 'http://' + host + selector
756	if data is None:
757	return self.open(newurl)
758	else:
759	return self.open(newurl, data)
760
761	def retry_https_basic_auth(self, url, realm, data=None):
762	host, selector = splithost(url)
763	i = host.find('@') + 1
764	host = host[i:]
765	user, passwd = self.get_user_passwd(host, realm, i)
766	if not (user or passwd): return None
767	host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
768	newurl = 'https://' + host + selector
769	if data is None:
770	return self.open(newurl)
771	else:
772	return self.open(newurl, data)
773
774	def get_user_passwd(self, host, realm, clear_cache = 0):
775	key = realm + '@' + host.lower()
776	if key in self.auth_cache:
777	if clear_cache:
778	del self.auth_cache[key]
779	else:
780	return self.auth_cache[key]
781	user, passwd = self.prompt_user_passwd(host, realm)
782	if user or passwd: self.auth_cache[key] = (user, passwd)
783	return user, passwd
784
785	def prompt_user_passwd(self, host, realm):
786	"""Override this in a GUI environment!"""
787	import getpass
788	try:
789	user = raw_input("Enter username for %s at %s: " % (realm,
790	host))
791	passwd = getpass.getpass("Enter password for %s in %s at %s: " %
792	(user, realm, host))
793	return user, passwd
794	except KeyboardInterrupt:
795	print
796	return None, None
797
798
799	# Utility functions
800
801	_localhost = None
802	def localhost():
803	"""Return the IP address of the magic hostname 'localhost'."""
804	global _localhost
805	if _localhost is None:
806	_localhost = socket.gethostbyname('localhost')
807	return _localhost
808
809	_thishost = None
810	def thishost():
811	"""Return the IP address of the current host."""
812	global _thishost
813	if _thishost is None:
814	_thishost = socket.gethostbyname(socket.gethostname())
815	return _thishost
816
817	_ftperrors = None
818	def ftperrors():
819	"""Return the set of errors raised by the FTP class."""
820	global _ftperrors
821	if _ftperrors is None:
822	import ftplib
823	_ftperrors = ftplib.all_errors
824	return _ftperrors
825
826	_noheaders = None
827	def noheaders():
828	"""Return an empty mimetools.Message object."""
829	global _noheaders
830	if _noheaders is None:
831	import mimetools
832	try:
833	from cStringIO import StringIO
834	except ImportError:
835	from StringIO import StringIO
836	_noheaders = mimetools.Message(StringIO(), 0)
837	_noheaders.fp.close() # Recycle file descriptor
838	return _noheaders
839
840
841	# Utility classes
842
843	class ftpwrapper:
844	"""Class used by open_ftp() for cache of open FTP connections."""
845
846	def __init__(self, user, passwd, host, port, dirs,
847	timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
848	self.user = user
849	self.passwd = passwd
850	self.host = host
851	self.port = port
852	self.dirs = dirs
853	self.timeout = timeout
854	self.init()
855
856	def init(self):
857	import ftplib
858	self.busy = 0
859	self.ftp = ftplib.FTP()
860	self.ftp.connect(self.host, self.port, self.timeout)
861	self.ftp.login(self.user, self.passwd)
862	for dir in self.dirs:
863	self.ftp.cwd(dir)
864
865	def retrfile(self, file, type):
866	import ftplib
867	self.endtransfer()
868	if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
869	else: cmd = 'TYPE ' + type; isdir = 0
870	try:
871	self.ftp.voidcmd(cmd)
872	except ftplib.all_errors:
873	self.init()
874	self.ftp.voidcmd(cmd)
875	conn = None
876	if file and not isdir:
877	# Try to retrieve as a file
878	try:
879	cmd = 'RETR ' + file
880	conn = self.ftp.ntransfercmd(cmd)
881	except ftplib.error_perm, reason:
882	if str(reason)[:3] != '550':
883	raise IOError, ('ftp error', reason), sys.exc_info()[2]
884	if not conn:
885	# Set transfer mode to ASCII!
886	self.ftp.voidcmd('TYPE A')
887	# Try a directory listing. Verify that directory exists.
888	if file:
889	pwd = self.ftp.pwd()
890	try:
891	try:
892	self.ftp.cwd(file)
893	except ftplib.error_perm, reason:
894	raise IOError, ('ftp error', reason), sys.exc_info()[2]
895	finally:
896	self.ftp.cwd(pwd)
897	cmd = 'LIST ' + file
898	else:
899	cmd = 'LIST'
900	conn = self.ftp.ntransfercmd(cmd)
901	self.busy = 1
902	# Pass back both a suitably decorated object and a retrieval length
903	return (addclosehook(conn[0].makefile('rb'),
904	self.endtransfer), conn[1])
905	def endtransfer(self):
906	if not self.busy:
907	return
908	self.busy = 0
909	try:
910	self.ftp.voidresp()
911	except ftperrors():
912	pass
913
914	def close(self):
915	self.endtransfer()
916	try:
917	self.ftp.close()
918	except ftperrors():
919	pass
920
921	class addbase:
922	"""Base class for addinfo and addclosehook."""
923
924	def __init__(self, fp):
925	self.fp = fp
926	self.read = self.fp.read
927	self.readline = self.fp.readline
928	if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
929	if hasattr(self.fp, "fileno"):
930	self.fileno = self.fp.fileno
931	else:
932	self.fileno = lambda: None
933	if hasattr(self.fp, "__iter__"):
934	self.__iter__ = self.fp.__iter__
935	if hasattr(self.fp, "next"):
936	self.next = self.fp.next
937
938	def __repr__(self):
939	return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
940	id(self), self.fp)
941
942	def close(self):
943	self.read = None
944	self.readline = None
945	self.readlines = None
946	self.fileno = None
947	if self.fp: self.fp.close()
948	self.fp = None
949
950	class addclosehook(addbase):
951	"""Class to add a close hook to an open file."""
952
953	def __init__(self, fp, closehook, *hookargs):
954	addbase.__init__(self, fp)
955	self.closehook = closehook
956	self.hookargs = hookargs
957
958	def close(self):
959	addbase.close(self)
960	if self.closehook:
961	self.closehook(*self.hookargs)
962	self.closehook = None
963	self.hookargs = None
964
965	class addinfo(addbase):
966	"""class to add an info() method to an open file."""
967
968	def __init__(self, fp, headers):
969	addbase.__init__(self, fp)
970	self.headers = headers
971
972	def info(self):
973	return self.headers
974
975	class addinfourl(addbase):
976	"""class to add info() and geturl() methods to an open file."""
977
978	def __init__(self, fp, headers, url, code=None):
979	addbase.__init__(self, fp)
980	self.headers = headers
981	self.url = url
982	self.code = code
983
984	def info(self):
985	return self.headers
986
987	def getcode(self):
988	return self.code
989
990	def geturl(self):
991	return self.url
992
993
994	# Utilities to parse URLs (most of these return None for missing parts):
995	# unwrap('<URL:type://host/path>') --> 'type://host/path'
996	# splittype('type:opaquestring') --> 'type', 'opaquestring'
997	# splithost('//host[:port]/path') --> 'host[:port]', '/path'
998	# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
999	# splitpasswd('user:passwd') -> 'user', 'passwd'
1000	# splitport('host:port') --> 'host', 'port'
1001	# splitquery('/path?query') --> '/path', 'query'
1002	# splittag('/path#tag') --> '/path', 'tag'
1003	# splitattr('/path;attr1=value1;attr2=value2;...') ->
1004	# '/path', ['attr1=value1', 'attr2=value2', ...]
1005	# splitvalue('attr=value') --> 'attr', 'value'
1006	# unquote('abc%20def') -> 'abc def'
1007	# quote('abc def') -> 'abc%20def')
1008
1009	try:
1010	unicode
1011	except NameError:
1012	def _is_unicode(x):
1013	return 0
1014	else:
1015	def _is_unicode(x):
1016	return isinstance(x, unicode)
1017
1018	def toBytes(url):
1019	"""toBytes(u"URL") --> 'URL'."""
1020	# Most URL schemes require ASCII. If that changes, the conversion
1021	# can be relaxed
1022	if _is_unicode(url):
1023	try:
1024	url = url.encode("ASCII")
1025	except UnicodeError:
1026	raise UnicodeError("URL " + repr(url) +
1027	" contains non-ASCII characters")
1028	return url
1029
1030	def unwrap(url):
1031	"""unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1032	url = url.strip()
1033	if url[:1] == '<' and url[-1:] == '>':
1034	url = url[1:-1].strip()
1035	if url[:4] == 'URL:': url = url[4:].strip()
1036	return url
1037
1038	_typeprog = None
1039	def splittype(url):
1040	"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1041	global _typeprog
1042	if _typeprog is None:
1043	import re
1044	_typeprog = re.compile('^([^/:]+):')
1045
1046	match = _typeprog.match(url)
1047	if match:
1048	scheme = match.group(1)
1049	return scheme.lower(), url[len(scheme) + 1:]
1050	return None, url
1051
1052	_hostprog = None
1053	def splithost(url):
1054	"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1055	global _hostprog
1056	if _hostprog is None:
1057	import re
1058	_hostprog = re.compile('^//([^/?])(.)$')
1059
1060	match = _hostprog.match(url)
1061	if match: return match.group(1, 2)
1062	return None, url
1063
1064	_userprog = None
1065	def splituser(host):
1066	"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1067	global _userprog
1068	if _userprog is None:
1069	import re
1070	_userprog = re.compile('^(.)@(.)$')
1071
1072	match = _userprog.match(host)
1073	if match: return map(unquote, match.group(1, 2))
1074	return None, host
1075
1076	_passwdprog = None
1077	def splitpasswd(user):
1078	"""splitpasswd('user:passwd') -> 'user', 'passwd'."""
1079	global _passwdprog
1080	if _passwdprog is None:
1081	import re
1082	_passwdprog = re.compile('^([^:]):(.)$')
1083
1084	match = _passwdprog.match(user)
1085	if match: return match.group(1, 2)
1086	return user, None
1087
1088	# splittag('/path#tag') --> '/path', 'tag'
1089	_portprog = None
1090	def splitport(host):
1091	"""splitport('host:port') --> 'host', 'port'."""
1092	global _portprog
1093	if _portprog is None:
1094	import re
1095	_portprog = re.compile('^(.*):([0-9]+)$')
1096
1097	match = _portprog.match(host)
1098	if match: return match.group(1, 2)
1099	return host, None
1100
1101	_nportprog = None
1102	def splitnport(host, defport=-1):
1103	"""Split host and port, returning numeric port.
1104	Return given default port if no ':' found; defaults to -1.
1105	Return numerical port if a valid number are found after ':'.
1106	Return None if ':' but not a valid number."""
1107	global _nportprog
1108	if _nportprog is None:
1109	import re
1110	_nportprog = re.compile('^(.):(.)$')
1111
1112	match = _nportprog.match(host)
1113	if match:
1114	host, port = match.group(1, 2)
1115	try:
1116	if not port: raise ValueError, "no digits"
1117	nport = int(port)
1118	except ValueError:
1119	nport = None
1120	return host, nport
1121	return host, defport
1122
1123	_queryprog = None
1124	def splitquery(url):
1125	"""splitquery('/path?query') --> '/path', 'query'."""
1126	global _queryprog
1127	if _queryprog is None:
1128	import re
1129	_queryprog = re.compile('^(.)\?([^?])$')
1130
1131	match = _queryprog.match(url)
1132	if match: return match.group(1, 2)
1133	return url, None
1134
1135	_tagprog = None
1136	def splittag(url):
1137	"""splittag('/path#tag') --> '/path', 'tag'."""
1138	global _tagprog
1139	if _tagprog is None:
1140	import re
1141	_tagprog = re.compile('^(.)#([^#])$')
1142
1143	match = _tagprog.match(url)
1144	if match: return match.group(1, 2)
1145	return url, None
1146
1147	def splitattr(url):
1148	"""splitattr('/path;attr1=value1;attr2=value2;...') ->
1149	'/path', ['attr1=value1', 'attr2=value2', ...]."""
1150	words = url.split(';')
1151	return words[0], words[1:]
1152
1153	_valueprog = None
1154	def splitvalue(attr):
1155	"""splitvalue('attr=value') --> 'attr', 'value'."""
1156	global _valueprog
1157	if _valueprog is None:
1158	import re
1159	_valueprog = re.compile('^([^=])=(.)$')
1160
1161	match = _valueprog.match(attr)
1162	if match: return match.group(1, 2)
1163	return attr, None
1164
1165	_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1166	_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1167
1168	def unquote(s):
1169	"""unquote('abc%20def') -> 'abc def'."""
1170	res = s.split('%')
1171	for i in xrange(1, len(res)):
1172	item = res[i]
1173	try:
1174	res[i] = _hextochr[item[:2]] + item[2:]
1175	except KeyError:
1176	res[i] = '%' + item
1177	except UnicodeDecodeError:
1178	res[i] = unichr(int(item[:2], 16)) + item[2:]
1179	return "".join(res)
1180
1181	def unquote_plus(s):
1182	"""unquote('%7e/abc+def') -> '~/abc def'"""
1183	s = s.replace('+', ' ')
1184	return unquote(s)
1185
1186	always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1187	'abcdefghijklmnopqrstuvwxyz'
1188	'0123456789' '_.-')
1189	_safemaps = {}
1190
1191	def quote(s, safe = '/'):
1192	"""quote('abc def') -> 'abc%20def'
1193
1194	Each part of a URL, e.g. the path info, the query, etc., has a
1195	different set of reserved characters that must be quoted.
1196
1197	RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1198	the following reserved characters.
1199
1200	reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \|
1201	"$" \| ","
1202
1203	Each of these characters is reserved in some component of a URL,
1204	but not necessarily in all of them.
1205
1206	By default, the quote function is intended for quoting the path
1207	section of a URL. Thus, it will not encode '/'. This character
1208	is reserved, but in typical usage the quote function is being
1209	called on a path where the existing slash characters are used as
1210	reserved characters.
1211	"""
1212	cachekey = (safe, always_safe)
1213	try:
1214	safe_map = _safemaps[cachekey]
1215	except KeyError:
1216	safe += always_safe
1217	safe_map = {}
1218	for i in range(256):
1219	c = chr(i)
1220	safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1221	_safemaps[cachekey] = safe_map
1222	res = map(safe_map.__getitem__, s)
1223	return ''.join(res)
1224
1225	def quote_plus(s, safe = ''):
1226	"""Quote the query fragment of a URL; replacing ' ' with '+'"""
1227	if ' ' in s:
1228	s = quote(s, safe + ' ')
1229	return s.replace(' ', '+')
1230	return quote(s, safe)
1231
1232	def urlencode(query,doseq=0):
1233	"""Encode a sequence of two-element tuples or dictionary into a URL query string.
1234
1235	If any values in the query arg are sequences and doseq is true, each
1236	sequence element is converted to a separate parameter.
1237
1238	If the query arg is a sequence of two-element tuples, the order of the
1239	parameters in the output will match the order of parameters in the
1240	input.
1241	"""
1242
1243	if hasattr(query,"items"):
1244	# mapping objects
1245	query = query.items()
1246	else:
1247	# it's a bother at times that strings and string-like objects are
1248	# sequences...
1249	try:
1250	# non-sequence items should not work with len()
1251	# non-empty strings will fail this
1252	if len(query) and not isinstance(query[0], tuple):
1253	raise TypeError
1254	# zero-length sequences of all types will get here and succeed,
1255	# but that's a minor nit - since the original implementation
1256	# allowed empty dicts that type of behavior probably should be
1257	# preserved for consistency
1258	except TypeError:
1259	ty,va,tb = sys.exc_info()
1260	raise TypeError, "not a valid non-string sequence or mapping object", tb
1261
1262	l = []
1263	if not doseq:
1264	# preserve old behavior
1265	for k, v in query:
1266	k = quote_plus(str(k))
1267	v = quote_plus(str(v))
1268	l.append(k + '=' + v)
1269	else:
1270	for k, v in query:
1271	k = quote_plus(str(k))
1272	if isinstance(v, str):
1273	v = quote_plus(v)
1274	l.append(k + '=' + v)
1275	elif _is_unicode(v):
1276	# is there a reasonable way to convert to ASCII?
1277	# encode generates a string, but "replace" or "ignore"
1278	# lose information and "strict" can raise UnicodeError
1279	v = quote_plus(v.encode("ASCII","replace"))
1280	l.append(k + '=' + v)
1281	else:
1282	try:
1283	# is this a sufficient test for sequence-ness?
1284	x = len(v)
1285	except TypeError:
1286	# not a sequence
1287	v = quote_plus(str(v))
1288	l.append(k + '=' + v)
1289	else:
1290	# loop over the sequence
1291	for elt in v:
1292	l.append(k + '=' + quote_plus(str(elt)))
1293	return '&'.join(l)
1294
1295	# Proxy handling
1296	def getproxies_environment():
1297	"""Return a dictionary of scheme -> proxy server URL mappings.
1298
1299	Scan the environment for variables named <scheme>_proxy;
1300	this seems to be the standard convention. If you need a
1301	different way, you can pass a proxies dictionary to the
1302	[Fancy]URLopener constructor.
1303
1304	"""
1305	proxies = {}
1306	for name, value in os.environ.items():
1307	name = name.lower()
1308	if value and name[-6:] == '_proxy':
1309	proxies[name[:-6]] = value
1310	return proxies
1311
1312	def proxy_bypass_environment(host):
1313	"""Test if proxies should not be used for a particular host.
1314
1315	Checks the environment for a variable named no_proxy, which should
1316	be a list of DNS suffixes separated by commas, or '*' for all hosts.
1317	"""
1318	no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1319	# '*' is special case for always bypass
1320	if no_proxy == '*':
1321	return 1
1322	# strip port off host
1323	hostonly, port = splitport(host)
1324	# check if the host ends with any of the DNS suffixes
1325	for name in no_proxy.split(','):
1326	if name and (hostonly.endswith(name) or host.endswith(name)):
1327	return 1
1328	# otherwise, don't bypass
1329	return 0
1330
1331
1332	if sys.platform == 'darwin':
1333	from _scproxy import _get_proxy_settings, _get_proxies
1334
1335	def proxy_bypass_macosx_sysconf(host):
1336	"""
1337	Return True iff this host shouldn't be accessed using a proxy
1338
1339	This function uses the MacOSX framework SystemConfiguration
1340	to fetch the proxy information.
1341	"""
1342	import re
1343	import socket
1344	from fnmatch import fnmatch
1345
1346	hostonly, port = splitport(host)
1347
1348	def ip2num(ipAddr):
1349	parts = ipAddr.split('.')
1350	parts = map(int, parts)
1351	if len(parts) != 4:
1352	parts = (parts + [0, 0, 0, 0])[:4]
1353	return (parts[0] << 24) \| (parts[1] << 16) \| (parts[2] << 8) \| parts[3]
1354
1355	proxy_settings = _get_proxy_settings()
1356
1357	# Check for simple host names:
1358	if '.' not in host:
1359	if proxy_settings['exclude_simple']:
1360	return True
1361
1362	hostIP = None
1363
1364	for value in proxy_settings.get('exceptions', ()):
1365	# Items in the list are strings like these: *.local, 169.254/16
1366	if not value: continue
1367
1368	m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1369	if m is not None:
1370	if hostIP is None:
1371	try:
1372	hostIP = socket.gethostbyname(hostonly)
1373	hostIP = ip2num(hostIP)
1374	except socket.error:
1375	continue
1376
1377	base = ip2num(m.group(1))
1378	mask = int(m.group(2)[1:])
1379	mask = 32 - mask
1380
1381	if (hostIP >> mask) == (base >> mask):
1382	return True
1383
1384	elif fnmatch(host, value):
1385	return True
1386
1387	return False
1388
1389
1390	def getproxies_macosx_sysconf():
1391	"""Return a dictionary of scheme -> proxy server URL mappings.
1392
1393	This function uses the MacOSX framework SystemConfiguration
1394	to fetch the proxy information.
1395	"""
1396	return _get_proxies()
1397
1398
1399
1400	def proxy_bypass(host):
1401	if getproxies_environment():
1402	return proxy_bypass_environment(host)
1403	else:
1404	return proxy_bypass_macosx_sysconf(host)
1405
1406	def getproxies():
1407	return getproxies_environment() or getproxies_macosx_sysconf()
1408
1409	elif os.name == 'nt':
1410	def getproxies_registry():
1411	"""Return a dictionary of scheme -> proxy server URL mappings.
1412
1413	Win32 uses the registry to store proxies.
1414
1415	"""
1416	proxies = {}
1417	try:
1418	import _winreg
1419	except ImportError:
1420	# Std module, so should be around - but you never know!
1421	return proxies
1422	try:
1423	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1424	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1425	proxyEnable = _winreg.QueryValueEx(internetSettings,
1426	'ProxyEnable')[0]
1427	if proxyEnable:
1428	# Returned as Unicode but problems if not converted to ASCII
1429	proxyServer = str(_winreg.QueryValueEx(internetSettings,
1430	'ProxyServer')[0])
1431	if '=' in proxyServer:
1432	# Per-protocol settings
1433	for p in proxyServer.split(';'):
1434	protocol, address = p.split('=', 1)
1435	# See if address has a type:// prefix
1436	import re
1437	if not re.match('^([^/:]+)://', address):
1438	address = '%s://%s' % (protocol, address)
1439	proxies[protocol] = address
1440	else:
1441	# Use one setting for all protocols
1442	if proxyServer[:5] == 'http:':
1443	proxies['http'] = proxyServer
1444	else:
1445	proxies['http'] = 'http://%s' % proxyServer
1446	proxies['ftp'] = 'ftp://%s' % proxyServer
1447	internetSettings.Close()
1448	except (WindowsError, ValueError, TypeError):
1449	# Either registry key not found etc, or the value in an
1450	# unexpected format.
1451	# proxies already set up to be empty so nothing to do
1452	pass
1453	return proxies
1454
1455	def getproxies():
1456	"""Return a dictionary of scheme -> proxy server URL mappings.
1457
1458	Returns settings gathered from the environment, if specified,
1459	or the registry.
1460
1461	"""
1462	return getproxies_environment() or getproxies_registry()
1463
1464	def proxy_bypass_registry(host):
1465	try:
1466	import _winreg
1467	import re
1468	except ImportError:
1469	# Std modules, so should be around - but you never know!
1470	return 0
1471	try:
1472	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1473	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1474	proxyEnable = _winreg.QueryValueEx(internetSettings,
1475	'ProxyEnable')[0]
1476	proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1477	'ProxyOverride')[0])
1478	# ^^^^ Returned as Unicode but problems if not converted to ASCII
1479	except WindowsError:
1480	return 0
1481	if not proxyEnable or not proxyOverride:
1482	return 0
1483	# try to make a host list from name and IP address.
1484	rawHost, port = splitport(host)
1485	host = [rawHost]
1486	try:
1487	addr = socket.gethostbyname(rawHost)
1488	if addr != rawHost:
1489	host.append(addr)
1490	except socket.error:
1491	pass
1492	try:
1493	fqdn = socket.getfqdn(rawHost)
1494	if fqdn != rawHost:
1495	host.append(fqdn)
1496	except socket.error:
1497	pass
1498	# make a check value list from the registry entry: replace the
1499	# '<local>' string by the localhost entry and the corresponding
1500	# canonical entry.
1501	proxyOverride = proxyOverride.split(';')
1502	i = 0
1503	while i < len(proxyOverride):
1504	if proxyOverride[i] == '<local>':
1505	proxyOverride[i:i+1] = ['localhost',
1506	'127.0.0.1',
1507	socket.gethostname(),
1508	socket.gethostbyname(
1509	socket.gethostname())]
1510	i += 1
1511	# print proxyOverride
1512	# now check if we match one of the registry values.
1513	for test in proxyOverride:
1514	test = test.replace(".", r"\.") # mask dots
1515	test = test.replace("", r".") # change glob sequence
1516	test = test.replace("?", r".") # change glob char
1517	for val in host:
1518	# print "%s <--> %s" %( test, val )
1519	if re.match(test, val, re.I):
1520	return 1
1521	return 0
1522
1523	def proxy_bypass(host):
1524	"""Return a dictionary of scheme -> proxy server URL mappings.
1525
1526	Returns settings gathered from the environment, if specified,
1527	or the registry.
1528
1529	"""
1530	if getproxies_environment():
1531	return proxy_bypass_environment(host)
1532	else:
1533	return proxy_bypass_registry(host)
1534
1535	else:
1536	# By default use environment variables
1537	getproxies = getproxies_environment
1538	proxy_bypass = proxy_bypass_environment
1539
1540	# Test and time quote() and unquote()
1541	def test1():
1542	s = ''
1543	for i in range(256): s = s + chr(i)
1544	s = s*4
1545	t0 = time.time()
1546	qs = quote(s)
1547	uqs = unquote(qs)
1548	t1 = time.time()
1549	if uqs != s:
1550	print 'Wrong!'
1551	print repr(s)
1552	print repr(qs)
1553	print repr(uqs)
1554	print round(t1 - t0, 3), 'sec'
1555
1556
1557	def reporthook(blocknum, blocksize, totalsize):
1558	# Report during remote transfers
1559	print "Block number: %d, Block size: %d, Total size: %d" % (
1560	blocknum, blocksize, totalsize)
1561
1562	# Test program
1563	def test(args=[]):
1564	if not args:
1565	args = [
1566	'/etc/passwd',
1567	'file:/etc/passwd',
1568	'file://localhost/etc/passwd',
1569	'ftp://ftp.gnu.org/pub/README',
1570	'http://www.python.org/index.html',
1571	]
1572	if hasattr(URLopener, "open_https"):
1573	args.append('https://synergy.as.cmu.edu/~geek/')
1574	try:
1575	for url in args:
1576	print '-'10, url, '-'10
1577	fn, h = urlretrieve(url, None, reporthook)
1578	print fn
1579	if h:
1580	print '======'
1581	for k in h.keys(): print k + ':', h[k]
1582	print '======'
1583	fp = open(fn, 'rb')
1584	data = fp.read()
1585	del fp
1586	if '\r' in data:
1587	table = string.maketrans("", "")
1588	data = data.translate(table, "\r")
1589	print data
1590	fn, h = None, None
1591	print '-'*40
1592	finally:
1593	urlcleanup()
1594
1595	def main():
1596	import getopt, sys
1597	try:
1598	opts, args = getopt.getopt(sys.argv[1:], "th")
1599	except getopt.error, msg:
1600	print msg
1601	print "Use -h for help"
1602	return
1603	t = 0
1604	for o, a in opts:
1605	if o == '-t':
1606	t = t + 1
1607	if o == '-h':
1608	print "Usage: python urllib.py [-t] [url ...]"
1609	print "-t runs self-test;",
1610	print "otherwise, contents of urls are printed"
1611	return
1612	if t:
1613	if t > 1:
1614	test1()
1615	test(args)
1616	else:
1617	if not args:
1618	print "Use -h for help"
1619	for url in args:
1620	print urlopen(url).read(),
1621
1622	# Run test program when run as a script
1623	if __name__ == '__main__':
1624	main()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/urllib.py@ 388

Download in other formats: