Context Navigation

urllib.py

Visit:

Last change on this file was 3225, checked in by bird, 18 years ago
Python 2.5
File size: 53.2 KB

Line
1	"""Open an arbitrary URL.
2
3	See the following document for more info on URLs:
4	"Names and Addresses, URIs, URLs, URNs, URCs", at
5	http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7	See also the HTTP spec (from which the error codes are derived):
8	"HTTP - Hypertext Transfer Protocol", at
9	http://www.w3.org/pub/WWW/Protocols/
10
11	Related standards and specs:
12	- RFC1808: the "relative URL" spec. (authoritative status)
13	- RFC1738 - the "URL standard". (authoritative status)
14	- RFC1630 - the "URI spec". (informational status)
15
16	The object returned by URLopener().open(file) will differ per
17	protocol. All you know is that is has methods read(), readline(),
18	readlines(), fileno(), close() and info(). The read*(), fileno()
19	and close() methods work like those of open files.
20	The info() method returns a mimetools.Message object which can be
21	used to query various info about the object, if available.
22	(mimetools.Message objects are queried with the getheader() method.)
23	"""
24
25	import string
26	import socket
27	import os
28	import time
29	import sys
30	from urlparse import urljoin as basejoin
31
32	__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33	"urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
34	"urlencode", "url2pathname", "pathname2url", "splittag",
35	"localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36	"splittype", "splithost", "splituser", "splitpasswd", "splitport",
37	"splitnport", "splitquery", "splitattr", "splitvalue",
38	"splitgophertype", "getproxies"]
39
40	__version__ = '1.17' # XXX This version is not always updated :-(
41
42	MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
43
44	# Helper for non-unix systems
45	if os.name == 'mac':
46	from macurl2path import url2pathname, pathname2url
47	elif os.name == 'nt':
48	from nturl2path import url2pathname, pathname2url
49	elif os.name == 'riscos':
50	from rourl2path import url2pathname, pathname2url
51	else:
52	def url2pathname(pathname):
53	"""OS-specific conversion from a relative URL of the 'file' scheme
54	to a file system path; not recommended for general use."""
55	return unquote(pathname)
56
57	def pathname2url(pathname):
58	"""OS-specific conversion from a file system path to a relative URL
59	of the 'file' scheme; not recommended for general use."""
60	return quote(pathname)
61
62	# This really consists of two pieces:
63	# (1) a class which handles opening of all sorts of URLs
64	# (plus assorted utilities etc.)
65	# (2) a set of functions for parsing URLs
66	# XXX Should these be separated out into different modules?
67
68
69	# Shortcut for basic usage
70	_urlopener = None
71	def urlopen(url, data=None, proxies=None):
72	"""urlopen(url [, data]) -> open file-like object"""
73	global _urlopener
74	if proxies is not None:
75	opener = FancyURLopener(proxies=proxies)
76	elif not _urlopener:
77	opener = FancyURLopener()
78	_urlopener = opener
79	else:
80	opener = _urlopener
81	if data is None:
82	return opener.open(url)
83	else:
84	return opener.open(url, data)
85	def urlretrieve(url, filename=None, reporthook=None, data=None):
86	global _urlopener
87	if not _urlopener:
88	_urlopener = FancyURLopener()
89	return _urlopener.retrieve(url, filename, reporthook, data)
90	def urlcleanup():
91	if _urlopener:
92	_urlopener.cleanup()
93
94	# exception raised when downloaded size does not match content-length
95	class ContentTooShortError(IOError):
96	def __init__(self, message, content):
97	IOError.__init__(self, message)
98	self.content = content
99
100	ftpcache = {}
101	class URLopener:
102	"""Class to open URLs.
103	This is a class rather than just a subroutine because we may need
104	more than one set of global protocol-specific options.
105	Note -- this is a base class for those who don't want the
106	automatic handling of errors type 302 (relocated) and 401
107	(authorization needed)."""
108
109	__tempfiles = None
110
111	version = "Python-urllib/%s" % __version__
112
113	# Constructor
114	def __init__(self, proxies=None, **x509):
115	if proxies is None:
116	proxies = getproxies()
117	assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
118	self.proxies = proxies
119	self.key_file = x509.get('key_file')
120	self.cert_file = x509.get('cert_file')
121	self.addheaders = [('User-Agent', self.version)]
122	self.__tempfiles = []
123	self.__unlink = os.unlink # See cleanup()
124	self.tempcache = None
125	# Undocumented feature: if you assign {} to tempcache,
126	# it is used to cache files retrieved with
127	# self.retrieve(). This is not enabled by default
128	# since it does not work for changing documents (and I
129	# haven't got the logic to check expiration headers
130	# yet).
131	self.ftpcache = ftpcache
132	# Undocumented feature: you can use a different
133	# ftp cache by assigning to the .ftpcache member;
134	# in case you want logically independent URL openers
135	# XXX This is not threadsafe. Bah.
136
137	def __del__(self):
138	self.close()
139
140	def close(self):
141	self.cleanup()
142
143	def cleanup(self):
144	# This code sometimes runs when the rest of this module
145	# has already been deleted, so it can't use any globals
146	# or import anything.
147	if self.__tempfiles:
148	for file in self.__tempfiles:
149	try:
150	self.__unlink(file)
151	except OSError:
152	pass
153	del self.__tempfiles[:]
154	if self.tempcache:
155	self.tempcache.clear()
156
157	def addheader(self, *args):
158	"""Add a header to be used by the HTTP interface only
159	e.g. u.addheader('Accept', 'sound/basic')"""
160	self.addheaders.append(args)
161
162	# External interface
163	def open(self, fullurl, data=None):
164	"""Use URLopener().open(file) instead of open(file, 'r')."""
165	fullurl = unwrap(toBytes(fullurl))
166	if self.tempcache and fullurl in self.tempcache:
167	filename, headers = self.tempcache[fullurl]
168	fp = open(filename, 'rb')
169	return addinfourl(fp, headers, fullurl)
170	urltype, url = splittype(fullurl)
171	if not urltype:
172	urltype = 'file'
173	if urltype in self.proxies:
174	proxy = self.proxies[urltype]
175	urltype, proxyhost = splittype(proxy)
176	host, selector = splithost(proxyhost)
177	url = (host, fullurl) # Signal special case to open_*()
178	else:
179	proxy = None
180	name = 'open_' + urltype
181	self.type = urltype
182	name = name.replace('-', '_')
183	if not hasattr(self, name):
184	if proxy:
185	return self.open_unknown_proxy(proxy, fullurl, data)
186	else:
187	return self.open_unknown(fullurl, data)
188	try:
189	if data is None:
190	return getattr(self, name)(url)
191	else:
192	return getattr(self, name)(url, data)
193	except socket.error, msg:
194	raise IOError, ('socket error', msg), sys.exc_info()[2]
195
196	def open_unknown(self, fullurl, data=None):
197	"""Overridable interface to open unknown URL type."""
198	type, url = splittype(fullurl)
199	raise IOError, ('url error', 'unknown url type', type)
200
201	def open_unknown_proxy(self, proxy, fullurl, data=None):
202	"""Overridable interface to open unknown URL type."""
203	type, url = splittype(fullurl)
204	raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
205
206	# External interface
207	def retrieve(self, url, filename=None, reporthook=None, data=None):
208	"""retrieve(url) returns (filename, headers) for a local object
209	or (tempfilename, headers) for a remote object."""
210	url = unwrap(toBytes(url))
211	if self.tempcache and url in self.tempcache:
212	return self.tempcache[url]
213	type, url1 = splittype(url)
214	if filename is None and (not type or type == 'file'):
215	try:
216	fp = self.open_local_file(url1)
217	hdrs = fp.info()
218	del fp
219	return url2pathname(splithost(url1)[1]), hdrs
220	except IOError, msg:
221	pass
222	fp = self.open(url, data)
223	headers = fp.info()
224	if filename:
225	tfp = open(filename, 'wb')
226	else:
227	import tempfile
228	garbage, path = splittype(url)
229	garbage, path = splithost(path or "")
230	path, garbage = splitquery(path or "")
231	path, garbage = splitattr(path or "")
232	suffix = os.path.splitext(path)[1]
233	(fd, filename) = tempfile.mkstemp(suffix)
234	self.__tempfiles.append(filename)
235	tfp = os.fdopen(fd, 'wb')
236	result = filename, headers
237	if self.tempcache is not None:
238	self.tempcache[url] = result
239	bs = 1024*8
240	size = -1
241	read = 0
242	blocknum = 0
243	if reporthook:
244	if "content-length" in headers:
245	size = int(headers["Content-Length"])
246	reporthook(blocknum, bs, size)
247	while 1:
248	block = fp.read(bs)
249	if block == "":
250	break
251	read += len(block)
252	tfp.write(block)
253	blocknum += 1
254	if reporthook:
255	reporthook(blocknum, bs, size)
256	fp.close()
257	tfp.close()
258	del fp
259	del tfp
260
261	# raise exception if actual size does not match content-length header
262	if size >= 0 and read < size:
263	raise ContentTooShortError("retrieval incomplete: got only %i out "
264	"of %i bytes" % (read, size), result)
265
266	return result
267
268	# Each method named open_<type> knows how to open that type of URL
269
270	def open_http(self, url, data=None):
271	"""Use HTTP protocol."""
272	import httplib
273	user_passwd = None
274	proxy_passwd= None
275	if isinstance(url, str):
276	host, selector = splithost(url)
277	if host:
278	user_passwd, host = splituser(host)
279	host = unquote(host)
280	realhost = host
281	else:
282	host, selector = url
283	# check whether the proxy contains authorization information
284	proxy_passwd, host = splituser(host)
285	# now we proceed with the url we want to obtain
286	urltype, rest = splittype(selector)
287	url = rest
288	user_passwd = None
289	if urltype.lower() != 'http':
290	realhost = None
291	else:
292	realhost, rest = splithost(rest)
293	if realhost:
294	user_passwd, realhost = splituser(realhost)
295	if user_passwd:
296	selector = "%s://%s%s" % (urltype, realhost, rest)
297	if proxy_bypass(realhost):
298	host = realhost
299
300	#print "proxy via http:", host, selector
301	if not host: raise IOError, ('http error', 'no host given')
302
303	if proxy_passwd:
304	import base64
305	proxy_auth = base64.encodestring(proxy_passwd).strip()
306	else:
307	proxy_auth = None
308
309	if user_passwd:
310	import base64
311	auth = base64.encodestring(user_passwd).strip()
312	else:
313	auth = None
314	h = httplib.HTTP(host)
315	if data is not None:
316	h.putrequest('POST', selector)
317	h.putheader('Content-Type', 'application/x-www-form-urlencoded')
318	h.putheader('Content-Length', '%d' % len(data))
319	else:
320	h.putrequest('GET', selector)
321	if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
322	if auth: h.putheader('Authorization', 'Basic %s' % auth)
323	if realhost: h.putheader('Host', realhost)
324	for args in self.addheaders: h.putheader(*args)
325	h.endheaders()
326	if data is not None:
327	h.send(data)
328	errcode, errmsg, headers = h.getreply()
329	fp = h.getfile()
330	if errcode == 200:
331	return addinfourl(fp, headers, "http:" + url)
332	else:
333	if data is None:
334	return self.http_error(url, fp, errcode, errmsg, headers)
335	else:
336	return self.http_error(url, fp, errcode, errmsg, headers, data)
337
338	def http_error(self, url, fp, errcode, errmsg, headers, data=None):
339	"""Handle http errors.
340	Derived class can override this, or provide specific handlers
341	named http_error_DDD where DDD is the 3-digit error code."""
342	# First check if there's a specific handler for this error
343	name = 'http_error_%d' % errcode
344	if hasattr(self, name):
345	method = getattr(self, name)
346	if data is None:
347	result = method(url, fp, errcode, errmsg, headers)
348	else:
349	result = method(url, fp, errcode, errmsg, headers, data)
350	if result: return result
351	return self.http_error_default(url, fp, errcode, errmsg, headers)
352
353	def http_error_default(self, url, fp, errcode, errmsg, headers):
354	"""Default error handler: close the connection and raise IOError."""
355	void = fp.read()
356	fp.close()
357	raise IOError, ('http error', errcode, errmsg, headers)
358
359	if hasattr(socket, "ssl"):
360	def open_https(self, url, data=None):
361	"""Use HTTPS protocol."""
362	import httplib
363	user_passwd = None
364	proxy_passwd = None
365	if isinstance(url, str):
366	host, selector = splithost(url)
367	if host:
368	user_passwd, host = splituser(host)
369	host = unquote(host)
370	realhost = host
371	else:
372	host, selector = url
373	# here, we determine, whether the proxy contains authorization information
374	proxy_passwd, host = splituser(host)
375	urltype, rest = splittype(selector)
376	url = rest
377	user_passwd = None
378	if urltype.lower() != 'https':
379	realhost = None
380	else:
381	realhost, rest = splithost(rest)
382	if realhost:
383	user_passwd, realhost = splituser(realhost)
384	if user_passwd:
385	selector = "%s://%s%s" % (urltype, realhost, rest)
386	#print "proxy via https:", host, selector
387	if not host: raise IOError, ('https error', 'no host given')
388	if proxy_passwd:
389	import base64
390	proxy_auth = base64.encodestring(proxy_passwd).strip()
391	else:
392	proxy_auth = None
393	if user_passwd:
394	import base64
395	auth = base64.encodestring(user_passwd).strip()
396	else:
397	auth = None
398	h = httplib.HTTPS(host, 0,
399	key_file=self.key_file,
400	cert_file=self.cert_file)
401	if data is not None:
402	h.putrequest('POST', selector)
403	h.putheader('Content-Type',
404	'application/x-www-form-urlencoded')
405	h.putheader('Content-Length', '%d' % len(data))
406	else:
407	h.putrequest('GET', selector)
408	if proxy_auth: h.putheader('Proxy-Authorization: Basic %s' % proxy_auth)
409	if auth: h.putheader('Authorization: Basic %s' % auth)
410	if realhost: h.putheader('Host', realhost)
411	for args in self.addheaders: h.putheader(*args)
412	h.endheaders()
413	if data is not None:
414	h.send(data)
415	errcode, errmsg, headers = h.getreply()
416	fp = h.getfile()
417	if errcode == 200:
418	return addinfourl(fp, headers, "https:" + url)
419	else:
420	if data is None:
421	return self.http_error(url, fp, errcode, errmsg, headers)
422	else:
423	return self.http_error(url, fp, errcode, errmsg, headers,
424	data)
425
426	def open_gopher(self, url):
427	"""Use Gopher protocol."""
428	if not isinstance(url, str):
429	raise IOError, ('gopher error', 'proxy support for gopher protocol currently not implemented')
430	import gopherlib
431	host, selector = splithost(url)
432	if not host: raise IOError, ('gopher error', 'no host given')
433	host = unquote(host)
434	type, selector = splitgophertype(selector)
435	selector, query = splitquery(selector)
436	selector = unquote(selector)
437	if query:
438	query = unquote(query)
439	fp = gopherlib.send_query(selector, query, host)
440	else:
441	fp = gopherlib.send_selector(selector, host)
442	return addinfourl(fp, noheaders(), "gopher:" + url)
443
444	def open_file(self, url):
445	"""Use local file or FTP depending on form of URL."""
446	if not isinstance(url, str):
447	raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
448	if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
449	return self.open_ftp(url)
450	else:
451	return self.open_local_file(url)
452
453	def open_local_file(self, url):
454	"""Use local file."""
455	import mimetypes, mimetools, email.Utils
456	try:
457	from cStringIO import StringIO
458	except ImportError:
459	from StringIO import StringIO
460	host, file = splithost(url)
461	localname = url2pathname(file)
462	try:
463	stats = os.stat(localname)
464	except OSError, e:
465	raise IOError(e.errno, e.strerror, e.filename)
466	size = stats.st_size
467	modified = email.Utils.formatdate(stats.st_mtime, usegmt=True)
468	mtype = mimetypes.guess_type(url)[0]
469	headers = mimetools.Message(StringIO(
470	'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
471	(mtype or 'text/plain', size, modified)))
472	if not host:
473	urlfile = file
474	if file[:1] == '/':
475	urlfile = 'file://' + file
476	return addinfourl(open(localname, 'rb'),
477	headers, urlfile)
478	host, port = splitport(host)
479	if not port \
480	and socket.gethostbyname(host) in (localhost(), thishost()):
481	urlfile = file
482	if file[:1] == '/':
483	urlfile = 'file://' + file
484	return addinfourl(open(localname, 'rb'),
485	headers, urlfile)
486	raise IOError, ('local file error', 'not on local host')
487
488	def open_ftp(self, url):
489	"""Use FTP protocol."""
490	if not isinstance(url, str):
491	raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
492	import mimetypes, mimetools
493	try:
494	from cStringIO import StringIO
495	except ImportError:
496	from StringIO import StringIO
497	host, path = splithost(url)
498	if not host: raise IOError, ('ftp error', 'no host given')
499	host, port = splitport(host)
500	user, host = splituser(host)
501	if user: user, passwd = splitpasswd(user)
502	else: passwd = None
503	host = unquote(host)
504	user = unquote(user or '')
505	passwd = unquote(passwd or '')
506	host = socket.gethostbyname(host)
507	if not port:
508	import ftplib
509	port = ftplib.FTP_PORT
510	else:
511	port = int(port)
512	path, attrs = splitattr(path)
513	path = unquote(path)
514	dirs = path.split('/')
515	dirs, file = dirs[:-1], dirs[-1]
516	if dirs and not dirs[0]: dirs = dirs[1:]
517	if dirs and not dirs[0]: dirs[0] = '/'
518	key = user, host, port, '/'.join(dirs)
519	# XXX thread unsafe!
520	if len(self.ftpcache) > MAXFTPCACHE:
521	# Prune the cache, rather arbitrarily
522	for k in self.ftpcache.keys():
523	if k != key:
524	v = self.ftpcache[k]
525	del self.ftpcache[k]
526	v.close()
527	try:
528	if not key in self.ftpcache:
529	self.ftpcache[key] = \
530	ftpwrapper(user, passwd, host, port, dirs)
531	if not file: type = 'D'
532	else: type = 'I'
533	for attr in attrs:
534	attr, value = splitvalue(attr)
535	if attr.lower() == 'type' and \
536	value in ('a', 'A', 'i', 'I', 'd', 'D'):
537	type = value.upper()
538	(fp, retrlen) = self.ftpcache[key].retrfile(file, type)
539	mtype = mimetypes.guess_type("ftp:" + url)[0]
540	headers = ""
541	if mtype:
542	headers += "Content-Type: %s\n" % mtype
543	if retrlen is not None and retrlen >= 0:
544	headers += "Content-Length: %d\n" % retrlen
545	headers = mimetools.Message(StringIO(headers))
546	return addinfourl(fp, headers, "ftp:" + url)
547	except ftperrors(), msg:
548	raise IOError, ('ftp error', msg), sys.exc_info()[2]
549
550	def open_data(self, url, data=None):
551	"""Use "data" URL."""
552	if not isinstance(url, str):
553	raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
554	# ignore POSTed data
555	#
556	# syntax of data URLs:
557	# dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
558	# mediatype := [ type "/" subtype ] *( ";" parameter )
559	# data := *urlchar
560	# parameter := attribute "=" value
561	import mimetools
562	try:
563	from cStringIO import StringIO
564	except ImportError:
565	from StringIO import StringIO
566	try:
567	[type, data] = url.split(',', 1)
568	except ValueError:
569	raise IOError, ('data error', 'bad data URL')
570	if not type:
571	type = 'text/plain;charset=US-ASCII'
572	semi = type.rfind(';')
573	if semi >= 0 and '=' not in type[semi:]:
574	encoding = type[semi+1:]
575	type = type[:semi]
576	else:
577	encoding = ''
578	msg = []
579	msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
580	time.gmtime(time.time())))
581	msg.append('Content-type: %s' % type)
582	if encoding == 'base64':
583	import base64
584	data = base64.decodestring(data)
585	else:
586	data = unquote(data)
587	msg.append('Content-Length: %d' % len(data))
588	msg.append('')
589	msg.append(data)
590	msg = '\n'.join(msg)
591	f = StringIO(msg)
592	headers = mimetools.Message(f, 0)
593	#f.fileno = None # needed for addinfourl
594	return addinfourl(f, headers, url)
595
596
597	class FancyURLopener(URLopener):
598	"""Derived class with handlers for errors we can handle (perhaps)."""
599
600	def __init__(self, args, *kwargs):
601	URLopener.__init__(self, args, *kwargs)
602	self.auth_cache = {}
603	self.tries = 0
604	self.maxtries = 10
605
606	def http_error_default(self, url, fp, errcode, errmsg, headers):
607	"""Default error handling -- don't raise an exception."""
608	return addinfourl(fp, headers, "http:" + url)
609
610	def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
611	"""Error 302 -- relocated (temporarily)."""
612	self.tries += 1
613	if self.maxtries and self.tries >= self.maxtries:
614	if hasattr(self, "http_error_500"):
615	meth = self.http_error_500
616	else:
617	meth = self.http_error_default
618	self.tries = 0
619	return meth(url, fp, 500,
620	"Internal Server Error: Redirect Recursion", headers)
621	result = self.redirect_internal(url, fp, errcode, errmsg, headers,
622	data)
623	self.tries = 0
624	return result
625
626	def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
627	if 'location' in headers:
628	newurl = headers['location']
629	elif 'uri' in headers:
630	newurl = headers['uri']
631	else:
632	return
633	void = fp.read()
634	fp.close()
635	# In case the server sent a relative URL, join with original:
636	newurl = basejoin(self.type + ":" + url, newurl)
637	return self.open(newurl)
638
639	def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
640	"""Error 301 -- also relocated (permanently)."""
641	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
642
643	def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
644	"""Error 303 -- also relocated (essentially identical to 302)."""
645	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
646
647	def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
648	"""Error 307 -- relocated, but turn POST into error."""
649	if data is None:
650	return self.http_error_302(url, fp, errcode, errmsg, headers, data)
651	else:
652	return self.http_error_default(url, fp, errcode, errmsg, headers)
653
654	def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
655	"""Error 401 -- authentication required.
656	This function supports Basic authentication only."""
657	if not 'www-authenticate' in headers:
658	URLopener.http_error_default(self, url, fp,
659	errcode, errmsg, headers)
660	stuff = headers['www-authenticate']
661	import re
662	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
663	if not match:
664	URLopener.http_error_default(self, url, fp,
665	errcode, errmsg, headers)
666	scheme, realm = match.groups()
667	if scheme.lower() != 'basic':
668	URLopener.http_error_default(self, url, fp,
669	errcode, errmsg, headers)
670	name = 'retry_' + self.type + '_basic_auth'
671	if data is None:
672	return getattr(self,name)(url, realm)
673	else:
674	return getattr(self,name)(url, realm, data)
675
676	def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
677	"""Error 407 -- proxy authentication required.
678	This function supports Basic authentication only."""
679	if not 'proxy-authenticate' in headers:
680	URLopener.http_error_default(self, url, fp,
681	errcode, errmsg, headers)
682	stuff = headers['proxy-authenticate']
683	import re
684	match = re.match('[ \t]([^ \t]+)[ \t]+realm="([^"])"', stuff)
685	if not match:
686	URLopener.http_error_default(self, url, fp,
687	errcode, errmsg, headers)
688	scheme, realm = match.groups()
689	if scheme.lower() != 'basic':
690	URLopener.http_error_default(self, url, fp,
691	errcode, errmsg, headers)
692	name = 'retry_proxy_' + self.type + '_basic_auth'
693	if data is None:
694	return getattr(self,name)(url, realm)
695	else:
696	return getattr(self,name)(url, realm, data)
697
698	def retry_proxy_http_basic_auth(self, url, realm, data=None):
699	host, selector = splithost(url)
700	newurl = 'http://' + host + selector
701	proxy = self.proxies['http']
702	urltype, proxyhost = splittype(proxy)
703	proxyhost, proxyselector = splithost(proxyhost)
704	i = proxyhost.find('@') + 1
705	proxyhost = proxyhost[i:]
706	user, passwd = self.get_user_passwd(proxyhost, realm, i)
707	if not (user or passwd): return None
708	proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
709	self.proxies['http'] = 'http://' + proxyhost + proxyselector
710	if data is None:
711	return self.open(newurl)
712	else:
713	return self.open(newurl, data)
714
715	def retry_proxy_https_basic_auth(self, url, realm, data=None):
716	host, selector = splithost(url)
717	newurl = 'https://' + host + selector
718	proxy = self.proxies['https']
719	urltype, proxyhost = splittype(proxy)
720	proxyhost, proxyselector = splithost(proxyhost)
721	i = proxyhost.find('@') + 1
722	proxyhost = proxyhost[i:]
723	user, passwd = self.get_user_passwd(proxyhost, realm, i)
724	if not (user or passwd): return None
725	proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
726	self.proxies['https'] = 'https://' + proxyhost + proxyselector
727	if data is None:
728	return self.open(newurl)
729	else:
730	return self.open(newurl, data)
731
732	def retry_http_basic_auth(self, url, realm, data=None):
733	host, selector = splithost(url)
734	i = host.find('@') + 1
735	host = host[i:]
736	user, passwd = self.get_user_passwd(host, realm, i)
737	if not (user or passwd): return None
738	host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
739	newurl = 'http://' + host + selector
740	if data is None:
741	return self.open(newurl)
742	else:
743	return self.open(newurl, data)
744
745	def retry_https_basic_auth(self, url, realm, data=None):
746	host, selector = splithost(url)
747	i = host.find('@') + 1
748	host = host[i:]
749	user, passwd = self.get_user_passwd(host, realm, i)
750	if not (user or passwd): return None
751	host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
752	newurl = 'https://' + host + selector
753	if data is None:
754	return self.open(newurl)
755	else:
756	return self.open(newurl, data)
757
758	def get_user_passwd(self, host, realm, clear_cache = 0):
759	key = realm + '@' + host.lower()
760	if key in self.auth_cache:
761	if clear_cache:
762	del self.auth_cache[key]
763	else:
764	return self.auth_cache[key]
765	user, passwd = self.prompt_user_passwd(host, realm)
766	if user or passwd: self.auth_cache[key] = (user, passwd)
767	return user, passwd
768
769	def prompt_user_passwd(self, host, realm):
770	"""Override this in a GUI environment!"""
771	import getpass
772	try:
773	user = raw_input("Enter username for %s at %s: " % (realm,
774	host))
775	passwd = getpass.getpass("Enter password for %s in %s at %s: " %
776	(user, realm, host))
777	return user, passwd
778	except KeyboardInterrupt:
779	print
780	return None, None
781
782
783	# Utility functions
784
785	_localhost = None
786	def localhost():
787	"""Return the IP address of the magic hostname 'localhost'."""
788	global _localhost
789	if _localhost is None:
790	_localhost = socket.gethostbyname('localhost')
791	return _localhost
792
793	_thishost = None
794	def thishost():
795	"""Return the IP address of the current host."""
796	global _thishost
797	if _thishost is None:
798	_thishost = socket.gethostbyname(socket.gethostname())
799	return _thishost
800
801	_ftperrors = None
802	def ftperrors():
803	"""Return the set of errors raised by the FTP class."""
804	global _ftperrors
805	if _ftperrors is None:
806	import ftplib
807	_ftperrors = ftplib.all_errors
808	return _ftperrors
809
810	_noheaders = None
811	def noheaders():
812	"""Return an empty mimetools.Message object."""
813	global _noheaders
814	if _noheaders is None:
815	import mimetools
816	try:
817	from cStringIO import StringIO
818	except ImportError:
819	from StringIO import StringIO
820	_noheaders = mimetools.Message(StringIO(), 0)
821	_noheaders.fp.close() # Recycle file descriptor
822	return _noheaders
823
824
825	# Utility classes
826
827	class ftpwrapper:
828	"""Class used by open_ftp() for cache of open FTP connections."""
829
830	def __init__(self, user, passwd, host, port, dirs):
831	self.user = user
832	self.passwd = passwd
833	self.host = host
834	self.port = port
835	self.dirs = dirs
836	self.init()
837
838	def init(self):
839	import ftplib
840	self.busy = 0
841	self.ftp = ftplib.FTP()
842	self.ftp.connect(self.host, self.port)
843	self.ftp.login(self.user, self.passwd)
844	for dir in self.dirs:
845	self.ftp.cwd(dir)
846
847	def retrfile(self, file, type):
848	import ftplib
849	self.endtransfer()
850	if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
851	else: cmd = 'TYPE ' + type; isdir = 0
852	try:
853	self.ftp.voidcmd(cmd)
854	except ftplib.all_errors:
855	self.init()
856	self.ftp.voidcmd(cmd)
857	conn = None
858	if file and not isdir:
859	# Try to retrieve as a file
860	try:
861	cmd = 'RETR ' + file
862	conn = self.ftp.ntransfercmd(cmd)
863	except ftplib.error_perm, reason:
864	if str(reason)[:3] != '550':
865	raise IOError, ('ftp error', reason), sys.exc_info()[2]
866	if not conn:
867	# Set transfer mode to ASCII!
868	self.ftp.voidcmd('TYPE A')
869	# Try a directory listing
870	if file: cmd = 'LIST ' + file
871	else: cmd = 'LIST'
872	conn = self.ftp.ntransfercmd(cmd)
873	self.busy = 1
874	# Pass back both a suitably decorated object and a retrieval length
875	return (addclosehook(conn[0].makefile('rb'),
876	self.endtransfer), conn[1])
877	def endtransfer(self):
878	if not self.busy:
879	return
880	self.busy = 0
881	try:
882	self.ftp.voidresp()
883	except ftperrors():
884	pass
885
886	def close(self):
887	self.endtransfer()
888	try:
889	self.ftp.close()
890	except ftperrors():
891	pass
892
893	class addbase:
894	"""Base class for addinfo and addclosehook."""
895
896	def __init__(self, fp):
897	self.fp = fp
898	self.read = self.fp.read
899	self.readline = self.fp.readline
900	if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
901	if hasattr(self.fp, "fileno"):
902	self.fileno = self.fp.fileno
903	else:
904	self.fileno = lambda: None
905	if hasattr(self.fp, "__iter__"):
906	self.__iter__ = self.fp.__iter__
907	if hasattr(self.fp, "next"):
908	self.next = self.fp.next
909
910	def __repr__(self):
911	return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
912	id(self), self.fp)
913
914	def close(self):
915	self.read = None
916	self.readline = None
917	self.readlines = None
918	self.fileno = None
919	if self.fp: self.fp.close()
920	self.fp = None
921
922	class addclosehook(addbase):
923	"""Class to add a close hook to an open file."""
924
925	def __init__(self, fp, closehook, *hookargs):
926	addbase.__init__(self, fp)
927	self.closehook = closehook
928	self.hookargs = hookargs
929
930	def close(self):
931	addbase.close(self)
932	if self.closehook:
933	self.closehook(*self.hookargs)
934	self.closehook = None
935	self.hookargs = None
936
937	class addinfo(addbase):
938	"""class to add an info() method to an open file."""
939
940	def __init__(self, fp, headers):
941	addbase.__init__(self, fp)
942	self.headers = headers
943
944	def info(self):
945	return self.headers
946
947	class addinfourl(addbase):
948	"""class to add info() and geturl() methods to an open file."""
949
950	def __init__(self, fp, headers, url):
951	addbase.__init__(self, fp)
952	self.headers = headers
953	self.url = url
954
955	def info(self):
956	return self.headers
957
958	def geturl(self):
959	return self.url
960
961
962	# Utilities to parse URLs (most of these return None for missing parts):
963	# unwrap('<URL:type://host/path>') --> 'type://host/path'
964	# splittype('type:opaquestring') --> 'type', 'opaquestring'
965	# splithost('//host[:port]/path') --> 'host[:port]', '/path'
966	# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
967	# splitpasswd('user:passwd') -> 'user', 'passwd'
968	# splitport('host:port') --> 'host', 'port'
969	# splitquery('/path?query') --> '/path', 'query'
970	# splittag('/path#tag') --> '/path', 'tag'
971	# splitattr('/path;attr1=value1;attr2=value2;...') ->
972	# '/path', ['attr1=value1', 'attr2=value2', ...]
973	# splitvalue('attr=value') --> 'attr', 'value'
974	# splitgophertype('/Xselector') --> 'X', 'selector'
975	# unquote('abc%20def') -> 'abc def'
976	# quote('abc def') -> 'abc%20def')
977
978	try:
979	unicode
980	except NameError:
981	def _is_unicode(x):
982	return 0
983	else:
984	def _is_unicode(x):
985	return isinstance(x, unicode)
986
987	def toBytes(url):
988	"""toBytes(u"URL") --> 'URL'."""
989	# Most URL schemes require ASCII. If that changes, the conversion
990	# can be relaxed
991	if _is_unicode(url):
992	try:
993	url = url.encode("ASCII")
994	except UnicodeError:
995	raise UnicodeError("URL " + repr(url) +
996	" contains non-ASCII characters")
997	return url
998
999	def unwrap(url):
1000	"""unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1001	url = url.strip()
1002	if url[:1] == '<' and url[-1:] == '>':
1003	url = url[1:-1].strip()
1004	if url[:4] == 'URL:': url = url[4:].strip()
1005	return url
1006
1007	_typeprog = None
1008	def splittype(url):
1009	"""splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1010	global _typeprog
1011	if _typeprog is None:
1012	import re
1013	_typeprog = re.compile('^([^/:]+):')
1014
1015	match = _typeprog.match(url)
1016	if match:
1017	scheme = match.group(1)
1018	return scheme.lower(), url[len(scheme) + 1:]
1019	return None, url
1020
1021	_hostprog = None
1022	def splithost(url):
1023	"""splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1024	global _hostprog
1025	if _hostprog is None:
1026	import re
1027	_hostprog = re.compile('^//([^/?])(.)$')
1028
1029	match = _hostprog.match(url)
1030	if match: return match.group(1, 2)
1031	return None, url
1032
1033	_userprog = None
1034	def splituser(host):
1035	"""splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1036	global _userprog
1037	if _userprog is None:
1038	import re
1039	_userprog = re.compile('^(.)@(.)$')
1040
1041	match = _userprog.match(host)
1042	if match: return map(unquote, match.group(1, 2))
1043	return None, host
1044
1045	_passwdprog = None
1046	def splitpasswd(user):
1047	"""splitpasswd('user:passwd') -> 'user', 'passwd'."""
1048	global _passwdprog
1049	if _passwdprog is None:
1050	import re
1051	_passwdprog = re.compile('^([^:]):(.)$')
1052
1053	match = _passwdprog.match(user)
1054	if match: return match.group(1, 2)
1055	return user, None
1056
1057	# splittag('/path#tag') --> '/path', 'tag'
1058	_portprog = None
1059	def splitport(host):
1060	"""splitport('host:port') --> 'host', 'port'."""
1061	global _portprog
1062	if _portprog is None:
1063	import re
1064	_portprog = re.compile('^(.*):([0-9]+)$')
1065
1066	match = _portprog.match(host)
1067	if match: return match.group(1, 2)
1068	return host, None
1069
1070	_nportprog = None
1071	def splitnport(host, defport=-1):
1072	"""Split host and port, returning numeric port.
1073	Return given default port if no ':' found; defaults to -1.
1074	Return numerical port if a valid number are found after ':'.
1075	Return None if ':' but not a valid number."""
1076	global _nportprog
1077	if _nportprog is None:
1078	import re
1079	_nportprog = re.compile('^(.):(.)$')
1080
1081	match = _nportprog.match(host)
1082	if match:
1083	host, port = match.group(1, 2)
1084	try:
1085	if not port: raise ValueError, "no digits"
1086	nport = int(port)
1087	except ValueError:
1088	nport = None
1089	return host, nport
1090	return host, defport
1091
1092	_queryprog = None
1093	def splitquery(url):
1094	"""splitquery('/path?query') --> '/path', 'query'."""
1095	global _queryprog
1096	if _queryprog is None:
1097	import re
1098	_queryprog = re.compile('^(.)\?([^?])$')
1099
1100	match = _queryprog.match(url)
1101	if match: return match.group(1, 2)
1102	return url, None
1103
1104	_tagprog = None
1105	def splittag(url):
1106	"""splittag('/path#tag') --> '/path', 'tag'."""
1107	global _tagprog
1108	if _tagprog is None:
1109	import re
1110	_tagprog = re.compile('^(.)#([^#])$')
1111
1112	match = _tagprog.match(url)
1113	if match: return match.group(1, 2)
1114	return url, None
1115
1116	def splitattr(url):
1117	"""splitattr('/path;attr1=value1;attr2=value2;...') ->
1118	'/path', ['attr1=value1', 'attr2=value2', ...]."""
1119	words = url.split(';')
1120	return words[0], words[1:]
1121
1122	_valueprog = None
1123	def splitvalue(attr):
1124	"""splitvalue('attr=value') --> 'attr', 'value'."""
1125	global _valueprog
1126	if _valueprog is None:
1127	import re
1128	_valueprog = re.compile('^([^=])=(.)$')
1129
1130	match = _valueprog.match(attr)
1131	if match: return match.group(1, 2)
1132	return attr, None
1133
1134	def splitgophertype(selector):
1135	"""splitgophertype('/Xselector') --> 'X', 'selector'."""
1136	if selector[:1] == '/' and selector[1:2]:
1137	return selector[1], selector[2:]
1138	return None, selector
1139
1140	_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1141	_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1142
1143	def unquote(s):
1144	"""unquote('abc%20def') -> 'abc def'."""
1145	res = s.split('%')
1146	for i in xrange(1, len(res)):
1147	item = res[i]
1148	try:
1149	res[i] = _hextochr[item[:2]] + item[2:]
1150	except KeyError:
1151	res[i] = '%' + item
1152	except UnicodeDecodeError:
1153	res[i] = unichr(int(item[:2], 16)) + item[2:]
1154	return "".join(res)
1155
1156	def unquote_plus(s):
1157	"""unquote('%7e/abc+def') -> '~/abc def'"""
1158	s = s.replace('+', ' ')
1159	return unquote(s)
1160
1161	always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1162	'abcdefghijklmnopqrstuvwxyz'
1163	'0123456789' '_.-')
1164	_safemaps = {}
1165
1166	def quote(s, safe = '/'):
1167	"""quote('abc def') -> 'abc%20def'
1168
1169	Each part of a URL, e.g. the path info, the query, etc., has a
1170	different set of reserved characters that must be quoted.
1171
1172	RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1173	the following reserved characters.
1174
1175	reserved = ";" \| "/" \| "?" \| ":" \| "@" \| "&" \| "=" \| "+" \|
1176	"$" \| ","
1177
1178	Each of these characters is reserved in some component of a URL,
1179	but not necessarily in all of them.
1180
1181	By default, the quote function is intended for quoting the path
1182	section of a URL. Thus, it will not encode '/'. This character
1183	is reserved, but in typical usage the quote function is being
1184	called on a path where the existing slash characters are used as
1185	reserved characters.
1186	"""
1187	cachekey = (safe, always_safe)
1188	try:
1189	safe_map = _safemaps[cachekey]
1190	except KeyError:
1191	safe += always_safe
1192	safe_map = {}
1193	for i in range(256):
1194	c = chr(i)
1195	safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1196	_safemaps[cachekey] = safe_map
1197	res = map(safe_map.__getitem__, s)
1198	return ''.join(res)
1199
1200	def quote_plus(s, safe = ''):
1201	"""Quote the query fragment of a URL; replacing ' ' with '+'"""
1202	if ' ' in s:
1203	s = quote(s, safe + ' ')
1204	return s.replace(' ', '+')
1205	return quote(s, safe)
1206
1207	def urlencode(query,doseq=0):
1208	"""Encode a sequence of two-element tuples or dictionary into a URL query string.
1209
1210	If any values in the query arg are sequences and doseq is true, each
1211	sequence element is converted to a separate parameter.
1212
1213	If the query arg is a sequence of two-element tuples, the order of the
1214	parameters in the output will match the order of parameters in the
1215	input.
1216	"""
1217
1218	if hasattr(query,"items"):
1219	# mapping objects
1220	query = query.items()
1221	else:
1222	# it's a bother at times that strings and string-like objects are
1223	# sequences...
1224	try:
1225	# non-sequence items should not work with len()
1226	# non-empty strings will fail this
1227	if len(query) and not isinstance(query[0], tuple):
1228	raise TypeError
1229	# zero-length sequences of all types will get here and succeed,
1230	# but that's a minor nit - since the original implementation
1231	# allowed empty dicts that type of behavior probably should be
1232	# preserved for consistency
1233	except TypeError:
1234	ty,va,tb = sys.exc_info()
1235	raise TypeError, "not a valid non-string sequence or mapping object", tb
1236
1237	l = []
1238	if not doseq:
1239	# preserve old behavior
1240	for k, v in query:
1241	k = quote_plus(str(k))
1242	v = quote_plus(str(v))
1243	l.append(k + '=' + v)
1244	else:
1245	for k, v in query:
1246	k = quote_plus(str(k))
1247	if isinstance(v, str):
1248	v = quote_plus(v)
1249	l.append(k + '=' + v)
1250	elif _is_unicode(v):
1251	# is there a reasonable way to convert to ASCII?
1252	# encode generates a string, but "replace" or "ignore"
1253	# lose information and "strict" can raise UnicodeError
1254	v = quote_plus(v.encode("ASCII","replace"))
1255	l.append(k + '=' + v)
1256	else:
1257	try:
1258	# is this a sufficient test for sequence-ness?
1259	x = len(v)
1260	except TypeError:
1261	# not a sequence
1262	v = quote_plus(str(v))
1263	l.append(k + '=' + v)
1264	else:
1265	# loop over the sequence
1266	for elt in v:
1267	l.append(k + '=' + quote_plus(str(elt)))
1268	return '&'.join(l)
1269
1270	# Proxy handling
1271	def getproxies_environment():
1272	"""Return a dictionary of scheme -> proxy server URL mappings.
1273
1274	Scan the environment for variables named <scheme>_proxy;
1275	this seems to be the standard convention. If you need a
1276	different way, you can pass a proxies dictionary to the
1277	[Fancy]URLopener constructor.
1278
1279	"""
1280	proxies = {}
1281	for name, value in os.environ.items():
1282	name = name.lower()
1283	if value and name[-6:] == '_proxy':
1284	proxies[name[:-6]] = value
1285	return proxies
1286
1287	if sys.platform == 'darwin':
1288	def getproxies_internetconfig():
1289	"""Return a dictionary of scheme -> proxy server URL mappings.
1290
1291	By convention the mac uses Internet Config to store
1292	proxies. An HTTP proxy, for instance, is stored under
1293	the HttpProxy key.
1294
1295	"""
1296	try:
1297	import ic
1298	except ImportError:
1299	return {}
1300
1301	try:
1302	config = ic.IC()
1303	except ic.error:
1304	return {}
1305	proxies = {}
1306	# HTTP:
1307	if 'UseHTTPProxy' in config and config['UseHTTPProxy']:
1308	try:
1309	value = config['HTTPProxyHost']
1310	except ic.error:
1311	pass
1312	else:
1313	proxies['http'] = 'http://%s' % value
1314	# FTP: XXXX To be done.
1315	# Gopher: XXXX To be done.
1316	return proxies
1317
1318	def proxy_bypass(x):
1319	return 0
1320
1321	def getproxies():
1322	return getproxies_environment() or getproxies_internetconfig()
1323
1324	elif os.name == 'nt':
1325	def getproxies_registry():
1326	"""Return a dictionary of scheme -> proxy server URL mappings.
1327
1328	Win32 uses the registry to store proxies.
1329
1330	"""
1331	proxies = {}
1332	try:
1333	import _winreg
1334	except ImportError:
1335	# Std module, so should be around - but you never know!
1336	return proxies
1337	try:
1338	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1339	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1340	proxyEnable = _winreg.QueryValueEx(internetSettings,
1341	'ProxyEnable')[0]
1342	if proxyEnable:
1343	# Returned as Unicode but problems if not converted to ASCII
1344	proxyServer = str(_winreg.QueryValueEx(internetSettings,
1345	'ProxyServer')[0])
1346	if '=' in proxyServer:
1347	# Per-protocol settings
1348	for p in proxyServer.split(';'):
1349	protocol, address = p.split('=', 1)
1350	# See if address has a type:// prefix
1351	import re
1352	if not re.match('^([^/:]+)://', address):
1353	address = '%s://%s' % (protocol, address)
1354	proxies[protocol] = address
1355	else:
1356	# Use one setting for all protocols
1357	if proxyServer[:5] == 'http:':
1358	proxies['http'] = proxyServer
1359	else:
1360	proxies['http'] = 'http://%s' % proxyServer
1361	proxies['ftp'] = 'ftp://%s' % proxyServer
1362	internetSettings.Close()
1363	except (WindowsError, ValueError, TypeError):
1364	# Either registry key not found etc, or the value in an
1365	# unexpected format.
1366	# proxies already set up to be empty so nothing to do
1367	pass
1368	return proxies
1369
1370	def getproxies():
1371	"""Return a dictionary of scheme -> proxy server URL mappings.
1372
1373	Returns settings gathered from the environment, if specified,
1374	or the registry.
1375
1376	"""
1377	return getproxies_environment() or getproxies_registry()
1378
1379	def proxy_bypass(host):
1380	try:
1381	import _winreg
1382	import re
1383	except ImportError:
1384	# Std modules, so should be around - but you never know!
1385	return 0
1386	try:
1387	internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1388	r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1389	proxyEnable = _winreg.QueryValueEx(internetSettings,
1390	'ProxyEnable')[0]
1391	proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1392	'ProxyOverride')[0])
1393	# ^^^^ Returned as Unicode but problems if not converted to ASCII
1394	except WindowsError:
1395	return 0
1396	if not proxyEnable or not proxyOverride:
1397	return 0
1398	# try to make a host list from name and IP address.
1399	rawHost, port = splitport(host)
1400	host = [rawHost]
1401	try:
1402	addr = socket.gethostbyname(rawHost)
1403	if addr != rawHost:
1404	host.append(addr)
1405	except socket.error:
1406	pass
1407	try:
1408	fqdn = socket.getfqdn(rawHost)
1409	if fqdn != rawHost:
1410	host.append(fqdn)
1411	except socket.error:
1412	pass
1413	# make a check value list from the registry entry: replace the
1414	# '<local>' string by the localhost entry and the corresponding
1415	# canonical entry.
1416	proxyOverride = proxyOverride.split(';')
1417	i = 0
1418	while i < len(proxyOverride):
1419	if proxyOverride[i] == '<local>':
1420	proxyOverride[i:i+1] = ['localhost',
1421	'127.0.0.1',
1422	socket.gethostname(),
1423	socket.gethostbyname(
1424	socket.gethostname())]
1425	i += 1
1426	# print proxyOverride
1427	# now check if we match one of the registry values.
1428	for test in proxyOverride:
1429	test = test.replace(".", r"\.") # mask dots
1430	test = test.replace("", r".") # change glob sequence
1431	test = test.replace("?", r".") # change glob char
1432	for val in host:
1433	# print "%s <--> %s" %( test, val )
1434	if re.match(test, val, re.I):
1435	return 1
1436	return 0
1437
1438	else:
1439	# By default use environment variables
1440	getproxies = getproxies_environment
1441
1442	def proxy_bypass(host):
1443	return 0
1444
1445	# Test and time quote() and unquote()
1446	def test1():
1447	s = ''
1448	for i in range(256): s = s + chr(i)
1449	s = s*4
1450	t0 = time.time()
1451	qs = quote(s)
1452	uqs = unquote(qs)
1453	t1 = time.time()
1454	if uqs != s:
1455	print 'Wrong!'
1456	print repr(s)
1457	print repr(qs)
1458	print repr(uqs)
1459	print round(t1 - t0, 3), 'sec'
1460
1461
1462	def reporthook(blocknum, blocksize, totalsize):
1463	# Report during remote transfers
1464	print "Block number: %d, Block size: %d, Total size: %d" % (
1465	blocknum, blocksize, totalsize)
1466
1467	# Test program
1468	def test(args=[]):
1469	if not args:
1470	args = [
1471	'/etc/passwd',
1472	'file:/etc/passwd',
1473	'file://localhost/etc/passwd',
1474	'ftp://ftp.python.org/pub/python/README',
1475	## 'gopher://gopher.micro.umn.edu/1/',
1476	'http://www.python.org/index.html',
1477	]
1478	if hasattr(URLopener, "open_https"):
1479	args.append('https://synergy.as.cmu.edu/~geek/')
1480	try:
1481	for url in args:
1482	print '-'10, url, '-'10
1483	fn, h = urlretrieve(url, None, reporthook)
1484	print fn
1485	if h:
1486	print '======'
1487	for k in h.keys(): print k + ':', h[k]
1488	print '======'
1489	fp = open(fn, 'rb')
1490	data = fp.read()
1491	del fp
1492	if '\r' in data:
1493	table = string.maketrans("", "")
1494	data = data.translate(table, "\r")
1495	print data
1496	fn, h = None, None
1497	print '-'*40
1498	finally:
1499	urlcleanup()
1500
1501	def main():
1502	import getopt, sys
1503	try:
1504	opts, args = getopt.getopt(sys.argv[1:], "th")
1505	except getopt.error, msg:
1506	print msg
1507	print "Use -h for help"
1508	return
1509	t = 0
1510	for o, a in opts:
1511	if o == '-t':
1512	t = t + 1
1513	if o == '-h':
1514	print "Usage: python urllib.py [-t] [url ...]"
1515	print "-t runs self-test;",
1516	print "otherwise, contents of urls are printed"
1517	return
1518	if t:
1519	if t > 1:
1520	test1()
1521	test(args)
1522	else:
1523	if not args:
1524	print "Use -h for help"
1525	for url in args:
1526	print urlopen(url).read(),
1527
1528	# Run test program when run as a script
1529	if __name__ == '__main__':
1530	main()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: trunk/essentials/dev-lang/python/Lib/urllib.py

Download in other formats: