source: python/trunk/Lib/urllib.py@ 388

Last change on this file since 388 was 2, checked in by Yuri Dario, 15 years ago

Initial import for vendor code.

  • Property svn:eol-style set to native
File size: 56.5 KB
Line 
1"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
19and close() methods work like those of open files.
20The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
24
25import string
26import socket
27import os
28import time
29import sys
30from urlparse import urljoin as basejoin
31
32__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
33 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
34 "urlencode", "url2pathname", "pathname2url", "splittag",
35 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
36 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
37 "splitnport", "splitquery", "splitattr", "splitvalue",
38 "getproxies"]
39
40__version__ = '1.17' # XXX This version is not always updated :-(
41
42MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
43
44# Helper for non-unix systems
45if os.name == 'mac':
46 from macurl2path import url2pathname, pathname2url
47elif os.name == 'nt':
48 from nturl2path import url2pathname, pathname2url
49elif os.name == 'riscos':
50 from rourl2path import url2pathname, pathname2url
51else:
52 def url2pathname(pathname):
53 """OS-specific conversion from a relative URL of the 'file' scheme
54 to a file system path; not recommended for general use."""
55 return unquote(pathname)
56
57 def pathname2url(pathname):
58 """OS-specific conversion from a file system path to a relative URL
59 of the 'file' scheme; not recommended for general use."""
60 return quote(pathname)
61
62# This really consists of two pieces:
63# (1) a class which handles opening of all sorts of URLs
64# (plus assorted utilities etc.)
65# (2) a set of functions for parsing URLs
66# XXX Should these be separated out into different modules?
67
68
69# Shortcut for basic usage
70_urlopener = None
71def urlopen(url, data=None, proxies=None):
72 """Create a file-like object for the specified URL to read from."""
73 from warnings import warnpy3k
74 warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
75 "favor of urllib2.urlopen()", stacklevel=2)
76
77 global _urlopener
78 if proxies is not None:
79 opener = FancyURLopener(proxies=proxies)
80 elif not _urlopener:
81 opener = FancyURLopener()
82 _urlopener = opener
83 else:
84 opener = _urlopener
85 if data is None:
86 return opener.open(url)
87 else:
88 return opener.open(url, data)
89def urlretrieve(url, filename=None, reporthook=None, data=None):
90 global _urlopener
91 if not _urlopener:
92 _urlopener = FancyURLopener()
93 return _urlopener.retrieve(url, filename, reporthook, data)
94def urlcleanup():
95 if _urlopener:
96 _urlopener.cleanup()
97
98# check for SSL
99try:
100 import ssl
101except:
102 _have_ssl = False
103else:
104 _have_ssl = True
105
106# exception raised when downloaded size does not match content-length
107class ContentTooShortError(IOError):
108 def __init__(self, message, content):
109 IOError.__init__(self, message)
110 self.content = content
111
112ftpcache = {}
113class URLopener:
114 """Class to open URLs.
115 This is a class rather than just a subroutine because we may need
116 more than one set of global protocol-specific options.
117 Note -- this is a base class for those who don't want the
118 automatic handling of errors type 302 (relocated) and 401
119 (authorization needed)."""
120
121 __tempfiles = None
122
123 version = "Python-urllib/%s" % __version__
124
125 # Constructor
126 def __init__(self, proxies=None, **x509):
127 if proxies is None:
128 proxies = getproxies()
129 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
130 self.proxies = proxies
131 self.key_file = x509.get('key_file')
132 self.cert_file = x509.get('cert_file')
133 self.addheaders = [('User-Agent', self.version)]
134 self.__tempfiles = []
135 self.__unlink = os.unlink # See cleanup()
136 self.tempcache = None
137 # Undocumented feature: if you assign {} to tempcache,
138 # it is used to cache files retrieved with
139 # self.retrieve(). This is not enabled by default
140 # since it does not work for changing documents (and I
141 # haven't got the logic to check expiration headers
142 # yet).
143 self.ftpcache = ftpcache
144 # Undocumented feature: you can use a different
145 # ftp cache by assigning to the .ftpcache member;
146 # in case you want logically independent URL openers
147 # XXX This is not threadsafe. Bah.
148
149 def __del__(self):
150 self.close()
151
152 def close(self):
153 self.cleanup()
154
155 def cleanup(self):
156 # This code sometimes runs when the rest of this module
157 # has already been deleted, so it can't use any globals
158 # or import anything.
159 if self.__tempfiles:
160 for file in self.__tempfiles:
161 try:
162 self.__unlink(file)
163 except OSError:
164 pass
165 del self.__tempfiles[:]
166 if self.tempcache:
167 self.tempcache.clear()
168
169 def addheader(self, *args):
170 """Add a header to be used by the HTTP interface only
171 e.g. u.addheader('Accept', 'sound/basic')"""
172 self.addheaders.append(args)
173
174 # External interface
175 def open(self, fullurl, data=None):
176 """Use URLopener().open(file) instead of open(file, 'r')."""
177 fullurl = unwrap(toBytes(fullurl))
178 # percent encode url. fixing lame server errors like space within url
179 # parts
180 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
181 if self.tempcache and fullurl in self.tempcache:
182 filename, headers = self.tempcache[fullurl]
183 fp = open(filename, 'rb')
184 return addinfourl(fp, headers, fullurl)
185 urltype, url = splittype(fullurl)
186 if not urltype:
187 urltype = 'file'
188 if urltype in self.proxies:
189 proxy = self.proxies[urltype]
190 urltype, proxyhost = splittype(proxy)
191 host, selector = splithost(proxyhost)
192 url = (host, fullurl) # Signal special case to open_*()
193 else:
194 proxy = None
195 name = 'open_' + urltype
196 self.type = urltype
197 name = name.replace('-', '_')
198 if not hasattr(self, name):
199 if proxy:
200 return self.open_unknown_proxy(proxy, fullurl, data)
201 else:
202 return self.open_unknown(fullurl, data)
203 try:
204 if data is None:
205 return getattr(self, name)(url)
206 else:
207 return getattr(self, name)(url, data)
208 except socket.error, msg:
209 raise IOError, ('socket error', msg), sys.exc_info()[2]
210
211 def open_unknown(self, fullurl, data=None):
212 """Overridable interface to open unknown URL type."""
213 type, url = splittype(fullurl)
214 raise IOError, ('url error', 'unknown url type', type)
215
216 def open_unknown_proxy(self, proxy, fullurl, data=None):
217 """Overridable interface to open unknown URL type."""
218 type, url = splittype(fullurl)
219 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
220
221 # External interface
222 def retrieve(self, url, filename=None, reporthook=None, data=None):
223 """retrieve(url) returns (filename, headers) for a local object
224 or (tempfilename, headers) for a remote object."""
225 url = unwrap(toBytes(url))
226 if self.tempcache and url in self.tempcache:
227 return self.tempcache[url]
228 type, url1 = splittype(url)
229 if filename is None and (not type or type == 'file'):
230 try:
231 fp = self.open_local_file(url1)
232 hdrs = fp.info()
233 del fp
234 return url2pathname(splithost(url1)[1]), hdrs
235 except IOError, msg:
236 pass
237 fp = self.open(url, data)
238 try:
239 headers = fp.info()
240 if filename:
241 tfp = open(filename, 'wb')
242 else:
243 import tempfile
244 garbage, path = splittype(url)
245 garbage, path = splithost(path or "")
246 path, garbage = splitquery(path or "")
247 path, garbage = splitattr(path or "")
248 suffix = os.path.splitext(path)[1]
249 (fd, filename) = tempfile.mkstemp(suffix)
250 self.__tempfiles.append(filename)
251 tfp = os.fdopen(fd, 'wb')
252 try:
253 result = filename, headers
254 if self.tempcache is not None:
255 self.tempcache[url] = result
256 bs = 1024*8
257 size = -1
258 read = 0
259 blocknum = 0
260 if reporthook:
261 if "content-length" in headers:
262 size = int(headers["Content-Length"])
263 reporthook(blocknum, bs, size)
264 while 1:
265 block = fp.read(bs)
266 if block == "":
267 break
268 read += len(block)
269 tfp.write(block)
270 blocknum += 1
271 if reporthook:
272 reporthook(blocknum, bs, size)
273 finally:
274 tfp.close()
275 finally:
276 fp.close()
277 del fp
278 del tfp
279
280 # raise exception if actual size does not match content-length header
281 if size >= 0 and read < size:
282 raise ContentTooShortError("retrieval incomplete: got only %i out "
283 "of %i bytes" % (read, size), result)
284
285 return result
286
287 # Each method named open_<type> knows how to open that type of URL
288
289 def open_http(self, url, data=None):
290 """Use HTTP protocol."""
291 import httplib
292 user_passwd = None
293 proxy_passwd= None
294 if isinstance(url, str):
295 host, selector = splithost(url)
296 if host:
297 user_passwd, host = splituser(host)
298 host = unquote(host)
299 realhost = host
300 else:
301 host, selector = url
302 # check whether the proxy contains authorization information
303 proxy_passwd, host = splituser(host)
304 # now we proceed with the url we want to obtain
305 urltype, rest = splittype(selector)
306 url = rest
307 user_passwd = None
308 if urltype.lower() != 'http':
309 realhost = None
310 else:
311 realhost, rest = splithost(rest)
312 if realhost:
313 user_passwd, realhost = splituser(realhost)
314 if user_passwd:
315 selector = "%s://%s%s" % (urltype, realhost, rest)
316 if proxy_bypass(realhost):
317 host = realhost
318
319 #print "proxy via http:", host, selector
320 if not host: raise IOError, ('http error', 'no host given')
321
322 if proxy_passwd:
323 import base64
324 proxy_auth = base64.b64encode(proxy_passwd).strip()
325 else:
326 proxy_auth = None
327
328 if user_passwd:
329 import base64
330 auth = base64.b64encode(user_passwd).strip()
331 else:
332 auth = None
333 h = httplib.HTTP(host)
334 if data is not None:
335 h.putrequest('POST', selector)
336 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
337 h.putheader('Content-Length', '%d' % len(data))
338 else:
339 h.putrequest('GET', selector)
340 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
341 if auth: h.putheader('Authorization', 'Basic %s' % auth)
342 if realhost: h.putheader('Host', realhost)
343 for args in self.addheaders: h.putheader(*args)
344 h.endheaders()
345 if data is not None:
346 h.send(data)
347 errcode, errmsg, headers = h.getreply()
348 fp = h.getfile()
349 if errcode == -1:
350 if fp: fp.close()
351 # something went wrong with the HTTP status line
352 raise IOError, ('http protocol error', 0,
353 'got a bad status line', None)
354 # According to RFC 2616, "2xx" code indicates that the client's
355 # request was successfully received, understood, and accepted.
356 if (200 <= errcode < 300):
357 return addinfourl(fp, headers, "http:" + url, errcode)
358 else:
359 if data is None:
360 return self.http_error(url, fp, errcode, errmsg, headers)
361 else:
362 return self.http_error(url, fp, errcode, errmsg, headers, data)
363
364 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
365 """Handle http errors.
366 Derived class can override this, or provide specific handlers
367 named http_error_DDD where DDD is the 3-digit error code."""
368 # First check if there's a specific handler for this error
369 name = 'http_error_%d' % errcode
370 if hasattr(self, name):
371 method = getattr(self, name)
372 if data is None:
373 result = method(url, fp, errcode, errmsg, headers)
374 else:
375 result = method(url, fp, errcode, errmsg, headers, data)
376 if result: return result
377 return self.http_error_default(url, fp, errcode, errmsg, headers)
378
379 def http_error_default(self, url, fp, errcode, errmsg, headers):
380 """Default error handler: close the connection and raise IOError."""
381 void = fp.read()
382 fp.close()
383 raise IOError, ('http error', errcode, errmsg, headers)
384
385 if _have_ssl:
386 def open_https(self, url, data=None):
387 """Use HTTPS protocol."""
388
389 import httplib
390 user_passwd = None
391 proxy_passwd = None
392 if isinstance(url, str):
393 host, selector = splithost(url)
394 if host:
395 user_passwd, host = splituser(host)
396 host = unquote(host)
397 realhost = host
398 else:
399 host, selector = url
400 # here, we determine, whether the proxy contains authorization information
401 proxy_passwd, host = splituser(host)
402 urltype, rest = splittype(selector)
403 url = rest
404 user_passwd = None
405 if urltype.lower() != 'https':
406 realhost = None
407 else:
408 realhost, rest = splithost(rest)
409 if realhost:
410 user_passwd, realhost = splituser(realhost)
411 if user_passwd:
412 selector = "%s://%s%s" % (urltype, realhost, rest)
413 #print "proxy via https:", host, selector
414 if not host: raise IOError, ('https error', 'no host given')
415 if proxy_passwd:
416 import base64
417 proxy_auth = base64.b64encode(proxy_passwd).strip()
418 else:
419 proxy_auth = None
420 if user_passwd:
421 import base64
422 auth = base64.b64encode(user_passwd).strip()
423 else:
424 auth = None
425 h = httplib.HTTPS(host, 0,
426 key_file=self.key_file,
427 cert_file=self.cert_file)
428 if data is not None:
429 h.putrequest('POST', selector)
430 h.putheader('Content-Type',
431 'application/x-www-form-urlencoded')
432 h.putheader('Content-Length', '%d' % len(data))
433 else:
434 h.putrequest('GET', selector)
435 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
436 if auth: h.putheader('Authorization', 'Basic %s' % auth)
437 if realhost: h.putheader('Host', realhost)
438 for args in self.addheaders: h.putheader(*args)
439 h.endheaders()
440 if data is not None:
441 h.send(data)
442 errcode, errmsg, headers = h.getreply()
443 fp = h.getfile()
444 if errcode == -1:
445 if fp: fp.close()
446 # something went wrong with the HTTP status line
447 raise IOError, ('http protocol error', 0,
448 'got a bad status line', None)
449 # According to RFC 2616, "2xx" code indicates that the client's
450 # request was successfully received, understood, and accepted.
451 if (200 <= errcode < 300):
452 return addinfourl(fp, headers, "https:" + url, errcode)
453 else:
454 if data is None:
455 return self.http_error(url, fp, errcode, errmsg, headers)
456 else:
457 return self.http_error(url, fp, errcode, errmsg, headers,
458 data)
459
460 def open_file(self, url):
461 """Use local file or FTP depending on form of URL."""
462 if not isinstance(url, str):
463 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
464 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
465 return self.open_ftp(url)
466 else:
467 return self.open_local_file(url)
468
469 def open_local_file(self, url):
470 """Use local file."""
471 import mimetypes, mimetools, email.utils
472 try:
473 from cStringIO import StringIO
474 except ImportError:
475 from StringIO import StringIO
476 host, file = splithost(url)
477 localname = url2pathname(file)
478 try:
479 stats = os.stat(localname)
480 except OSError, e:
481 raise IOError(e.errno, e.strerror, e.filename)
482 size = stats.st_size
483 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
484 mtype = mimetypes.guess_type(url)[0]
485 headers = mimetools.Message(StringIO(
486 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
487 (mtype or 'text/plain', size, modified)))
488 if not host:
489 urlfile = file
490 if file[:1] == '/':
491 urlfile = 'file://' + file
492 return addinfourl(open(localname, 'rb'),
493 headers, urlfile)
494 host, port = splitport(host)
495 if not port \
496 and socket.gethostbyname(host) in (localhost(), thishost()):
497 urlfile = file
498 if file[:1] == '/':
499 urlfile = 'file://' + file
500 return addinfourl(open(localname, 'rb'),
501 headers, urlfile)
502 raise IOError, ('local file error', 'not on local host')
503
504 def open_ftp(self, url):
505 """Use FTP protocol."""
506 if not isinstance(url, str):
507 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
508 import mimetypes, mimetools
509 try:
510 from cStringIO import StringIO
511 except ImportError:
512 from StringIO import StringIO
513 host, path = splithost(url)
514 if not host: raise IOError, ('ftp error', 'no host given')
515 host, port = splitport(host)
516 user, host = splituser(host)
517 if user: user, passwd = splitpasswd(user)
518 else: passwd = None
519 host = unquote(host)
520 user = unquote(user or '')
521 passwd = unquote(passwd or '')
522 host = socket.gethostbyname(host)
523 if not port:
524 import ftplib
525 port = ftplib.FTP_PORT
526 else:
527 port = int(port)
528 path, attrs = splitattr(path)
529 path = unquote(path)
530 dirs = path.split('/')
531 dirs, file = dirs[:-1], dirs[-1]
532 if dirs and not dirs[0]: dirs = dirs[1:]
533 if dirs and not dirs[0]: dirs[0] = '/'
534 key = user, host, port, '/'.join(dirs)
535 # XXX thread unsafe!
536 if len(self.ftpcache) > MAXFTPCACHE:
537 # Prune the cache, rather arbitrarily
538 for k in self.ftpcache.keys():
539 if k != key:
540 v = self.ftpcache[k]
541 del self.ftpcache[k]
542 v.close()
543 try:
544 if not key in self.ftpcache:
545 self.ftpcache[key] = \
546 ftpwrapper(user, passwd, host, port, dirs)
547 if not file: type = 'D'
548 else: type = 'I'
549 for attr in attrs:
550 attr, value = splitvalue(attr)
551 if attr.lower() == 'type' and \
552 value in ('a', 'A', 'i', 'I', 'd', 'D'):
553 type = value.upper()
554 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
555 mtype = mimetypes.guess_type("ftp:" + url)[0]
556 headers = ""
557 if mtype:
558 headers += "Content-Type: %s\n" % mtype
559 if retrlen is not None and retrlen >= 0:
560 headers += "Content-Length: %d\n" % retrlen
561 headers = mimetools.Message(StringIO(headers))
562 return addinfourl(fp, headers, "ftp:" + url)
563 except ftperrors(), msg:
564 raise IOError, ('ftp error', msg), sys.exc_info()[2]
565
566 def open_data(self, url, data=None):
567 """Use "data" URL."""
568 if not isinstance(url, str):
569 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
570 # ignore POSTed data
571 #
572 # syntax of data URLs:
573 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
574 # mediatype := [ type "/" subtype ] *( ";" parameter )
575 # data := *urlchar
576 # parameter := attribute "=" value
577 import mimetools
578 try:
579 from cStringIO import StringIO
580 except ImportError:
581 from StringIO import StringIO
582 try:
583 [type, data] = url.split(',', 1)
584 except ValueError:
585 raise IOError, ('data error', 'bad data URL')
586 if not type:
587 type = 'text/plain;charset=US-ASCII'
588 semi = type.rfind(';')
589 if semi >= 0 and '=' not in type[semi:]:
590 encoding = type[semi+1:]
591 type = type[:semi]
592 else:
593 encoding = ''
594 msg = []
595 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %T GMT',
596 time.gmtime(time.time())))
597 msg.append('Content-type: %s' % type)
598 if encoding == 'base64':
599 import base64
600 data = base64.decodestring(data)
601 else:
602 data = unquote(data)
603 msg.append('Content-Length: %d' % len(data))
604 msg.append('')
605 msg.append(data)
606 msg = '\n'.join(msg)
607 f = StringIO(msg)
608 headers = mimetools.Message(f, 0)
609 #f.fileno = None # needed for addinfourl
610 return addinfourl(f, headers, url)
611
612
613class FancyURLopener(URLopener):
614 """Derived class with handlers for errors we can handle (perhaps)."""
615
616 def __init__(self, *args, **kwargs):
617 URLopener.__init__(self, *args, **kwargs)
618 self.auth_cache = {}
619 self.tries = 0
620 self.maxtries = 10
621
622 def http_error_default(self, url, fp, errcode, errmsg, headers):
623 """Default error handling -- don't raise an exception."""
624 return addinfourl(fp, headers, "http:" + url, errcode)
625
626 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
627 """Error 302 -- relocated (temporarily)."""
628 self.tries += 1
629 if self.maxtries and self.tries >= self.maxtries:
630 if hasattr(self, "http_error_500"):
631 meth = self.http_error_500
632 else:
633 meth = self.http_error_default
634 self.tries = 0
635 return meth(url, fp, 500,
636 "Internal Server Error: Redirect Recursion", headers)
637 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
638 data)
639 self.tries = 0
640 return result
641
642 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
643 if 'location' in headers:
644 newurl = headers['location']
645 elif 'uri' in headers:
646 newurl = headers['uri']
647 else:
648 return
649 void = fp.read()
650 fp.close()
651 # In case the server sent a relative URL, join with original:
652 newurl = basejoin(self.type + ":" + url, newurl)
653 return self.open(newurl)
654
655 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
656 """Error 301 -- also relocated (permanently)."""
657 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
658
659 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
660 """Error 303 -- also relocated (essentially identical to 302)."""
661 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
662
663 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
664 """Error 307 -- relocated, but turn POST into error."""
665 if data is None:
666 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
667 else:
668 return self.http_error_default(url, fp, errcode, errmsg, headers)
669
670 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
671 """Error 401 -- authentication required.
672 This function supports Basic authentication only."""
673 if not 'www-authenticate' in headers:
674 URLopener.http_error_default(self, url, fp,
675 errcode, errmsg, headers)
676 stuff = headers['www-authenticate']
677 import re
678 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
679 if not match:
680 URLopener.http_error_default(self, url, fp,
681 errcode, errmsg, headers)
682 scheme, realm = match.groups()
683 if scheme.lower() != 'basic':
684 URLopener.http_error_default(self, url, fp,
685 errcode, errmsg, headers)
686 name = 'retry_' + self.type + '_basic_auth'
687 if data is None:
688 return getattr(self,name)(url, realm)
689 else:
690 return getattr(self,name)(url, realm, data)
691
692 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
693 """Error 407 -- proxy authentication required.
694 This function supports Basic authentication only."""
695 if not 'proxy-authenticate' in headers:
696 URLopener.http_error_default(self, url, fp,
697 errcode, errmsg, headers)
698 stuff = headers['proxy-authenticate']
699 import re
700 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
701 if not match:
702 URLopener.http_error_default(self, url, fp,
703 errcode, errmsg, headers)
704 scheme, realm = match.groups()
705 if scheme.lower() != 'basic':
706 URLopener.http_error_default(self, url, fp,
707 errcode, errmsg, headers)
708 name = 'retry_proxy_' + self.type + '_basic_auth'
709 if data is None:
710 return getattr(self,name)(url, realm)
711 else:
712 return getattr(self,name)(url, realm, data)
713
714 def retry_proxy_http_basic_auth(self, url, realm, data=None):
715 host, selector = splithost(url)
716 newurl = 'http://' + host + selector
717 proxy = self.proxies['http']
718 urltype, proxyhost = splittype(proxy)
719 proxyhost, proxyselector = splithost(proxyhost)
720 i = proxyhost.find('@') + 1
721 proxyhost = proxyhost[i:]
722 user, passwd = self.get_user_passwd(proxyhost, realm, i)
723 if not (user or passwd): return None
724 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
725 self.proxies['http'] = 'http://' + proxyhost + proxyselector
726 if data is None:
727 return self.open(newurl)
728 else:
729 return self.open(newurl, data)
730
731 def retry_proxy_https_basic_auth(self, url, realm, data=None):
732 host, selector = splithost(url)
733 newurl = 'https://' + host + selector
734 proxy = self.proxies['https']
735 urltype, proxyhost = splittype(proxy)
736 proxyhost, proxyselector = splithost(proxyhost)
737 i = proxyhost.find('@') + 1
738 proxyhost = proxyhost[i:]
739 user, passwd = self.get_user_passwd(proxyhost, realm, i)
740 if not (user or passwd): return None
741 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
742 self.proxies['https'] = 'https://' + proxyhost + proxyselector
743 if data is None:
744 return self.open(newurl)
745 else:
746 return self.open(newurl, data)
747
748 def retry_http_basic_auth(self, url, realm, data=None):
749 host, selector = splithost(url)
750 i = host.find('@') + 1
751 host = host[i:]
752 user, passwd = self.get_user_passwd(host, realm, i)
753 if not (user or passwd): return None
754 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
755 newurl = 'http://' + host + selector
756 if data is None:
757 return self.open(newurl)
758 else:
759 return self.open(newurl, data)
760
761 def retry_https_basic_auth(self, url, realm, data=None):
762 host, selector = splithost(url)
763 i = host.find('@') + 1
764 host = host[i:]
765 user, passwd = self.get_user_passwd(host, realm, i)
766 if not (user or passwd): return None
767 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
768 newurl = 'https://' + host + selector
769 if data is None:
770 return self.open(newurl)
771 else:
772 return self.open(newurl, data)
773
774 def get_user_passwd(self, host, realm, clear_cache = 0):
775 key = realm + '@' + host.lower()
776 if key in self.auth_cache:
777 if clear_cache:
778 del self.auth_cache[key]
779 else:
780 return self.auth_cache[key]
781 user, passwd = self.prompt_user_passwd(host, realm)
782 if user or passwd: self.auth_cache[key] = (user, passwd)
783 return user, passwd
784
785 def prompt_user_passwd(self, host, realm):
786 """Override this in a GUI environment!"""
787 import getpass
788 try:
789 user = raw_input("Enter username for %s at %s: " % (realm,
790 host))
791 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
792 (user, realm, host))
793 return user, passwd
794 except KeyboardInterrupt:
795 print
796 return None, None
797
798
799# Utility functions
800
801_localhost = None
802def localhost():
803 """Return the IP address of the magic hostname 'localhost'."""
804 global _localhost
805 if _localhost is None:
806 _localhost = socket.gethostbyname('localhost')
807 return _localhost
808
809_thishost = None
810def thishost():
811 """Return the IP address of the current host."""
812 global _thishost
813 if _thishost is None:
814 _thishost = socket.gethostbyname(socket.gethostname())
815 return _thishost
816
817_ftperrors = None
818def ftperrors():
819 """Return the set of errors raised by the FTP class."""
820 global _ftperrors
821 if _ftperrors is None:
822 import ftplib
823 _ftperrors = ftplib.all_errors
824 return _ftperrors
825
826_noheaders = None
827def noheaders():
828 """Return an empty mimetools.Message object."""
829 global _noheaders
830 if _noheaders is None:
831 import mimetools
832 try:
833 from cStringIO import StringIO
834 except ImportError:
835 from StringIO import StringIO
836 _noheaders = mimetools.Message(StringIO(), 0)
837 _noheaders.fp.close() # Recycle file descriptor
838 return _noheaders
839
840
841# Utility classes
842
843class ftpwrapper:
844 """Class used by open_ftp() for cache of open FTP connections."""
845
846 def __init__(self, user, passwd, host, port, dirs,
847 timeout=socket._GLOBAL_DEFAULT_TIMEOUT):
848 self.user = user
849 self.passwd = passwd
850 self.host = host
851 self.port = port
852 self.dirs = dirs
853 self.timeout = timeout
854 self.init()
855
856 def init(self):
857 import ftplib
858 self.busy = 0
859 self.ftp = ftplib.FTP()
860 self.ftp.connect(self.host, self.port, self.timeout)
861 self.ftp.login(self.user, self.passwd)
862 for dir in self.dirs:
863 self.ftp.cwd(dir)
864
865 def retrfile(self, file, type):
866 import ftplib
867 self.endtransfer()
868 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
869 else: cmd = 'TYPE ' + type; isdir = 0
870 try:
871 self.ftp.voidcmd(cmd)
872 except ftplib.all_errors:
873 self.init()
874 self.ftp.voidcmd(cmd)
875 conn = None
876 if file and not isdir:
877 # Try to retrieve as a file
878 try:
879 cmd = 'RETR ' + file
880 conn = self.ftp.ntransfercmd(cmd)
881 except ftplib.error_perm, reason:
882 if str(reason)[:3] != '550':
883 raise IOError, ('ftp error', reason), sys.exc_info()[2]
884 if not conn:
885 # Set transfer mode to ASCII!
886 self.ftp.voidcmd('TYPE A')
887 # Try a directory listing. Verify that directory exists.
888 if file:
889 pwd = self.ftp.pwd()
890 try:
891 try:
892 self.ftp.cwd(file)
893 except ftplib.error_perm, reason:
894 raise IOError, ('ftp error', reason), sys.exc_info()[2]
895 finally:
896 self.ftp.cwd(pwd)
897 cmd = 'LIST ' + file
898 else:
899 cmd = 'LIST'
900 conn = self.ftp.ntransfercmd(cmd)
901 self.busy = 1
902 # Pass back both a suitably decorated object and a retrieval length
903 return (addclosehook(conn[0].makefile('rb'),
904 self.endtransfer), conn[1])
905 def endtransfer(self):
906 if not self.busy:
907 return
908 self.busy = 0
909 try:
910 self.ftp.voidresp()
911 except ftperrors():
912 pass
913
914 def close(self):
915 self.endtransfer()
916 try:
917 self.ftp.close()
918 except ftperrors():
919 pass
920
921class addbase:
922 """Base class for addinfo and addclosehook."""
923
924 def __init__(self, fp):
925 self.fp = fp
926 self.read = self.fp.read
927 self.readline = self.fp.readline
928 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
929 if hasattr(self.fp, "fileno"):
930 self.fileno = self.fp.fileno
931 else:
932 self.fileno = lambda: None
933 if hasattr(self.fp, "__iter__"):
934 self.__iter__ = self.fp.__iter__
935 if hasattr(self.fp, "next"):
936 self.next = self.fp.next
937
938 def __repr__(self):
939 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
940 id(self), self.fp)
941
942 def close(self):
943 self.read = None
944 self.readline = None
945 self.readlines = None
946 self.fileno = None
947 if self.fp: self.fp.close()
948 self.fp = None
949
950class addclosehook(addbase):
951 """Class to add a close hook to an open file."""
952
953 def __init__(self, fp, closehook, *hookargs):
954 addbase.__init__(self, fp)
955 self.closehook = closehook
956 self.hookargs = hookargs
957
958 def close(self):
959 addbase.close(self)
960 if self.closehook:
961 self.closehook(*self.hookargs)
962 self.closehook = None
963 self.hookargs = None
964
965class addinfo(addbase):
966 """class to add an info() method to an open file."""
967
968 def __init__(self, fp, headers):
969 addbase.__init__(self, fp)
970 self.headers = headers
971
972 def info(self):
973 return self.headers
974
975class addinfourl(addbase):
976 """class to add info() and geturl() methods to an open file."""
977
978 def __init__(self, fp, headers, url, code=None):
979 addbase.__init__(self, fp)
980 self.headers = headers
981 self.url = url
982 self.code = code
983
984 def info(self):
985 return self.headers
986
987 def getcode(self):
988 return self.code
989
990 def geturl(self):
991 return self.url
992
993
994# Utilities to parse URLs (most of these return None for missing parts):
995# unwrap('<URL:type://host/path>') --> 'type://host/path'
996# splittype('type:opaquestring') --> 'type', 'opaquestring'
997# splithost('//host[:port]/path') --> 'host[:port]', '/path'
998# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
999# splitpasswd('user:passwd') -> 'user', 'passwd'
1000# splitport('host:port') --> 'host', 'port'
1001# splitquery('/path?query') --> '/path', 'query'
1002# splittag('/path#tag') --> '/path', 'tag'
1003# splitattr('/path;attr1=value1;attr2=value2;...') ->
1004# '/path', ['attr1=value1', 'attr2=value2', ...]
1005# splitvalue('attr=value') --> 'attr', 'value'
1006# unquote('abc%20def') -> 'abc def'
1007# quote('abc def') -> 'abc%20def')
1008
1009try:
1010 unicode
1011except NameError:
1012 def _is_unicode(x):
1013 return 0
1014else:
1015 def _is_unicode(x):
1016 return isinstance(x, unicode)
1017
1018def toBytes(url):
1019 """toBytes(u"URL") --> 'URL'."""
1020 # Most URL schemes require ASCII. If that changes, the conversion
1021 # can be relaxed
1022 if _is_unicode(url):
1023 try:
1024 url = url.encode("ASCII")
1025 except UnicodeError:
1026 raise UnicodeError("URL " + repr(url) +
1027 " contains non-ASCII characters")
1028 return url
1029
1030def unwrap(url):
1031 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1032 url = url.strip()
1033 if url[:1] == '<' and url[-1:] == '>':
1034 url = url[1:-1].strip()
1035 if url[:4] == 'URL:': url = url[4:].strip()
1036 return url
1037
1038_typeprog = None
1039def splittype(url):
1040 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1041 global _typeprog
1042 if _typeprog is None:
1043 import re
1044 _typeprog = re.compile('^([^/:]+):')
1045
1046 match = _typeprog.match(url)
1047 if match:
1048 scheme = match.group(1)
1049 return scheme.lower(), url[len(scheme) + 1:]
1050 return None, url
1051
1052_hostprog = None
1053def splithost(url):
1054 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1055 global _hostprog
1056 if _hostprog is None:
1057 import re
1058 _hostprog = re.compile('^//([^/?]*)(.*)$')
1059
1060 match = _hostprog.match(url)
1061 if match: return match.group(1, 2)
1062 return None, url
1063
1064_userprog = None
1065def splituser(host):
1066 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1067 global _userprog
1068 if _userprog is None:
1069 import re
1070 _userprog = re.compile('^(.*)@(.*)$')
1071
1072 match = _userprog.match(host)
1073 if match: return map(unquote, match.group(1, 2))
1074 return None, host
1075
1076_passwdprog = None
1077def splitpasswd(user):
1078 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1079 global _passwdprog
1080 if _passwdprog is None:
1081 import re
1082 _passwdprog = re.compile('^([^:]*):(.*)$')
1083
1084 match = _passwdprog.match(user)
1085 if match: return match.group(1, 2)
1086 return user, None
1087
1088# splittag('/path#tag') --> '/path', 'tag'
1089_portprog = None
1090def splitport(host):
1091 """splitport('host:port') --> 'host', 'port'."""
1092 global _portprog
1093 if _portprog is None:
1094 import re
1095 _portprog = re.compile('^(.*):([0-9]+)$')
1096
1097 match = _portprog.match(host)
1098 if match: return match.group(1, 2)
1099 return host, None
1100
1101_nportprog = None
1102def splitnport(host, defport=-1):
1103 """Split host and port, returning numeric port.
1104 Return given default port if no ':' found; defaults to -1.
1105 Return numerical port if a valid number are found after ':'.
1106 Return None if ':' but not a valid number."""
1107 global _nportprog
1108 if _nportprog is None:
1109 import re
1110 _nportprog = re.compile('^(.*):(.*)$')
1111
1112 match = _nportprog.match(host)
1113 if match:
1114 host, port = match.group(1, 2)
1115 try:
1116 if not port: raise ValueError, "no digits"
1117 nport = int(port)
1118 except ValueError:
1119 nport = None
1120 return host, nport
1121 return host, defport
1122
1123_queryprog = None
1124def splitquery(url):
1125 """splitquery('/path?query') --> '/path', 'query'."""
1126 global _queryprog
1127 if _queryprog is None:
1128 import re
1129 _queryprog = re.compile('^(.*)\?([^?]*)$')
1130
1131 match = _queryprog.match(url)
1132 if match: return match.group(1, 2)
1133 return url, None
1134
1135_tagprog = None
1136def splittag(url):
1137 """splittag('/path#tag') --> '/path', 'tag'."""
1138 global _tagprog
1139 if _tagprog is None:
1140 import re
1141 _tagprog = re.compile('^(.*)#([^#]*)$')
1142
1143 match = _tagprog.match(url)
1144 if match: return match.group(1, 2)
1145 return url, None
1146
1147def splitattr(url):
1148 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1149 '/path', ['attr1=value1', 'attr2=value2', ...]."""
1150 words = url.split(';')
1151 return words[0], words[1:]
1152
1153_valueprog = None
1154def splitvalue(attr):
1155 """splitvalue('attr=value') --> 'attr', 'value'."""
1156 global _valueprog
1157 if _valueprog is None:
1158 import re
1159 _valueprog = re.compile('^([^=]*)=(.*)$')
1160
1161 match = _valueprog.match(attr)
1162 if match: return match.group(1, 2)
1163 return attr, None
1164
1165_hextochr = dict(('%02x' % i, chr(i)) for i in range(256))
1166_hextochr.update(('%02X' % i, chr(i)) for i in range(256))
1167
1168def unquote(s):
1169 """unquote('abc%20def') -> 'abc def'."""
1170 res = s.split('%')
1171 for i in xrange(1, len(res)):
1172 item = res[i]
1173 try:
1174 res[i] = _hextochr[item[:2]] + item[2:]
1175 except KeyError:
1176 res[i] = '%' + item
1177 except UnicodeDecodeError:
1178 res[i] = unichr(int(item[:2], 16)) + item[2:]
1179 return "".join(res)
1180
1181def unquote_plus(s):
1182 """unquote('%7e/abc+def') -> '~/abc def'"""
1183 s = s.replace('+', ' ')
1184 return unquote(s)
1185
1186always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1187 'abcdefghijklmnopqrstuvwxyz'
1188 '0123456789' '_.-')
1189_safemaps = {}
1190
1191def quote(s, safe = '/'):
1192 """quote('abc def') -> 'abc%20def'
1193
1194 Each part of a URL, e.g. the path info, the query, etc., has a
1195 different set of reserved characters that must be quoted.
1196
1197 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1198 the following reserved characters.
1199
1200 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1201 "$" | ","
1202
1203 Each of these characters is reserved in some component of a URL,
1204 but not necessarily in all of them.
1205
1206 By default, the quote function is intended for quoting the path
1207 section of a URL. Thus, it will not encode '/'. This character
1208 is reserved, but in typical usage the quote function is being
1209 called on a path where the existing slash characters are used as
1210 reserved characters.
1211 """
1212 cachekey = (safe, always_safe)
1213 try:
1214 safe_map = _safemaps[cachekey]
1215 except KeyError:
1216 safe += always_safe
1217 safe_map = {}
1218 for i in range(256):
1219 c = chr(i)
1220 safe_map[c] = (c in safe) and c or ('%%%02X' % i)
1221 _safemaps[cachekey] = safe_map
1222 res = map(safe_map.__getitem__, s)
1223 return ''.join(res)
1224
1225def quote_plus(s, safe = ''):
1226 """Quote the query fragment of a URL; replacing ' ' with '+'"""
1227 if ' ' in s:
1228 s = quote(s, safe + ' ')
1229 return s.replace(' ', '+')
1230 return quote(s, safe)
1231
1232def urlencode(query,doseq=0):
1233 """Encode a sequence of two-element tuples or dictionary into a URL query string.
1234
1235 If any values in the query arg are sequences and doseq is true, each
1236 sequence element is converted to a separate parameter.
1237
1238 If the query arg is a sequence of two-element tuples, the order of the
1239 parameters in the output will match the order of parameters in the
1240 input.
1241 """
1242
1243 if hasattr(query,"items"):
1244 # mapping objects
1245 query = query.items()
1246 else:
1247 # it's a bother at times that strings and string-like objects are
1248 # sequences...
1249 try:
1250 # non-sequence items should not work with len()
1251 # non-empty strings will fail this
1252 if len(query) and not isinstance(query[0], tuple):
1253 raise TypeError
1254 # zero-length sequences of all types will get here and succeed,
1255 # but that's a minor nit - since the original implementation
1256 # allowed empty dicts that type of behavior probably should be
1257 # preserved for consistency
1258 except TypeError:
1259 ty,va,tb = sys.exc_info()
1260 raise TypeError, "not a valid non-string sequence or mapping object", tb
1261
1262 l = []
1263 if not doseq:
1264 # preserve old behavior
1265 for k, v in query:
1266 k = quote_plus(str(k))
1267 v = quote_plus(str(v))
1268 l.append(k + '=' + v)
1269 else:
1270 for k, v in query:
1271 k = quote_plus(str(k))
1272 if isinstance(v, str):
1273 v = quote_plus(v)
1274 l.append(k + '=' + v)
1275 elif _is_unicode(v):
1276 # is there a reasonable way to convert to ASCII?
1277 # encode generates a string, but "replace" or "ignore"
1278 # lose information and "strict" can raise UnicodeError
1279 v = quote_plus(v.encode("ASCII","replace"))
1280 l.append(k + '=' + v)
1281 else:
1282 try:
1283 # is this a sufficient test for sequence-ness?
1284 x = len(v)
1285 except TypeError:
1286 # not a sequence
1287 v = quote_plus(str(v))
1288 l.append(k + '=' + v)
1289 else:
1290 # loop over the sequence
1291 for elt in v:
1292 l.append(k + '=' + quote_plus(str(elt)))
1293 return '&'.join(l)
1294
1295# Proxy handling
1296def getproxies_environment():
1297 """Return a dictionary of scheme -> proxy server URL mappings.
1298
1299 Scan the environment for variables named <scheme>_proxy;
1300 this seems to be the standard convention. If you need a
1301 different way, you can pass a proxies dictionary to the
1302 [Fancy]URLopener constructor.
1303
1304 """
1305 proxies = {}
1306 for name, value in os.environ.items():
1307 name = name.lower()
1308 if value and name[-6:] == '_proxy':
1309 proxies[name[:-6]] = value
1310 return proxies
1311
1312def proxy_bypass_environment(host):
1313 """Test if proxies should not be used for a particular host.
1314
1315 Checks the environment for a variable named no_proxy, which should
1316 be a list of DNS suffixes separated by commas, or '*' for all hosts.
1317 """
1318 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1319 # '*' is special case for always bypass
1320 if no_proxy == '*':
1321 return 1
1322 # strip port off host
1323 hostonly, port = splitport(host)
1324 # check if the host ends with any of the DNS suffixes
1325 for name in no_proxy.split(','):
1326 if name and (hostonly.endswith(name) or host.endswith(name)):
1327 return 1
1328 # otherwise, don't bypass
1329 return 0
1330
1331
1332if sys.platform == 'darwin':
1333 from _scproxy import _get_proxy_settings, _get_proxies
1334
1335 def proxy_bypass_macosx_sysconf(host):
1336 """
1337 Return True iff this host shouldn't be accessed using a proxy
1338
1339 This function uses the MacOSX framework SystemConfiguration
1340 to fetch the proxy information.
1341 """
1342 import re
1343 import socket
1344 from fnmatch import fnmatch
1345
1346 hostonly, port = splitport(host)
1347
1348 def ip2num(ipAddr):
1349 parts = ipAddr.split('.')
1350 parts = map(int, parts)
1351 if len(parts) != 4:
1352 parts = (parts + [0, 0, 0, 0])[:4]
1353 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1354
1355 proxy_settings = _get_proxy_settings()
1356
1357 # Check for simple host names:
1358 if '.' not in host:
1359 if proxy_settings['exclude_simple']:
1360 return True
1361
1362 hostIP = None
1363
1364 for value in proxy_settings.get('exceptions', ()):
1365 # Items in the list are strings like these: *.local, 169.254/16
1366 if not value: continue
1367
1368 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1369 if m is not None:
1370 if hostIP is None:
1371 try:
1372 hostIP = socket.gethostbyname(hostonly)
1373 hostIP = ip2num(hostIP)
1374 except socket.error:
1375 continue
1376
1377 base = ip2num(m.group(1))
1378 mask = int(m.group(2)[1:])
1379 mask = 32 - mask
1380
1381 if (hostIP >> mask) == (base >> mask):
1382 return True
1383
1384 elif fnmatch(host, value):
1385 return True
1386
1387 return False
1388
1389
1390 def getproxies_macosx_sysconf():
1391 """Return a dictionary of scheme -> proxy server URL mappings.
1392
1393 This function uses the MacOSX framework SystemConfiguration
1394 to fetch the proxy information.
1395 """
1396 return _get_proxies()
1397
1398
1399
1400 def proxy_bypass(host):
1401 if getproxies_environment():
1402 return proxy_bypass_environment(host)
1403 else:
1404 return proxy_bypass_macosx_sysconf(host)
1405
1406 def getproxies():
1407 return getproxies_environment() or getproxies_macosx_sysconf()
1408
1409elif os.name == 'nt':
1410 def getproxies_registry():
1411 """Return a dictionary of scheme -> proxy server URL mappings.
1412
1413 Win32 uses the registry to store proxies.
1414
1415 """
1416 proxies = {}
1417 try:
1418 import _winreg
1419 except ImportError:
1420 # Std module, so should be around - but you never know!
1421 return proxies
1422 try:
1423 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1424 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1425 proxyEnable = _winreg.QueryValueEx(internetSettings,
1426 'ProxyEnable')[0]
1427 if proxyEnable:
1428 # Returned as Unicode but problems if not converted to ASCII
1429 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1430 'ProxyServer')[0])
1431 if '=' in proxyServer:
1432 # Per-protocol settings
1433 for p in proxyServer.split(';'):
1434 protocol, address = p.split('=', 1)
1435 # See if address has a type:// prefix
1436 import re
1437 if not re.match('^([^/:]+)://', address):
1438 address = '%s://%s' % (protocol, address)
1439 proxies[protocol] = address
1440 else:
1441 # Use one setting for all protocols
1442 if proxyServer[:5] == 'http:':
1443 proxies['http'] = proxyServer
1444 else:
1445 proxies['http'] = 'http://%s' % proxyServer
1446 proxies['ftp'] = 'ftp://%s' % proxyServer
1447 internetSettings.Close()
1448 except (WindowsError, ValueError, TypeError):
1449 # Either registry key not found etc, or the value in an
1450 # unexpected format.
1451 # proxies already set up to be empty so nothing to do
1452 pass
1453 return proxies
1454
1455 def getproxies():
1456 """Return a dictionary of scheme -> proxy server URL mappings.
1457
1458 Returns settings gathered from the environment, if specified,
1459 or the registry.
1460
1461 """
1462 return getproxies_environment() or getproxies_registry()
1463
1464 def proxy_bypass_registry(host):
1465 try:
1466 import _winreg
1467 import re
1468 except ImportError:
1469 # Std modules, so should be around - but you never know!
1470 return 0
1471 try:
1472 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1473 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1474 proxyEnable = _winreg.QueryValueEx(internetSettings,
1475 'ProxyEnable')[0]
1476 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1477 'ProxyOverride')[0])
1478 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1479 except WindowsError:
1480 return 0
1481 if not proxyEnable or not proxyOverride:
1482 return 0
1483 # try to make a host list from name and IP address.
1484 rawHost, port = splitport(host)
1485 host = [rawHost]
1486 try:
1487 addr = socket.gethostbyname(rawHost)
1488 if addr != rawHost:
1489 host.append(addr)
1490 except socket.error:
1491 pass
1492 try:
1493 fqdn = socket.getfqdn(rawHost)
1494 if fqdn != rawHost:
1495 host.append(fqdn)
1496 except socket.error:
1497 pass
1498 # make a check value list from the registry entry: replace the
1499 # '<local>' string by the localhost entry and the corresponding
1500 # canonical entry.
1501 proxyOverride = proxyOverride.split(';')
1502 i = 0
1503 while i < len(proxyOverride):
1504 if proxyOverride[i] == '<local>':
1505 proxyOverride[i:i+1] = ['localhost',
1506 '127.0.0.1',
1507 socket.gethostname(),
1508 socket.gethostbyname(
1509 socket.gethostname())]
1510 i += 1
1511 # print proxyOverride
1512 # now check if we match one of the registry values.
1513 for test in proxyOverride:
1514 test = test.replace(".", r"\.") # mask dots
1515 test = test.replace("*", r".*") # change glob sequence
1516 test = test.replace("?", r".") # change glob char
1517 for val in host:
1518 # print "%s <--> %s" %( test, val )
1519 if re.match(test, val, re.I):
1520 return 1
1521 return 0
1522
1523 def proxy_bypass(host):
1524 """Return a dictionary of scheme -> proxy server URL mappings.
1525
1526 Returns settings gathered from the environment, if specified,
1527 or the registry.
1528
1529 """
1530 if getproxies_environment():
1531 return proxy_bypass_environment(host)
1532 else:
1533 return proxy_bypass_registry(host)
1534
1535else:
1536 # By default use environment variables
1537 getproxies = getproxies_environment
1538 proxy_bypass = proxy_bypass_environment
1539
1540# Test and time quote() and unquote()
1541def test1():
1542 s = ''
1543 for i in range(256): s = s + chr(i)
1544 s = s*4
1545 t0 = time.time()
1546 qs = quote(s)
1547 uqs = unquote(qs)
1548 t1 = time.time()
1549 if uqs != s:
1550 print 'Wrong!'
1551 print repr(s)
1552 print repr(qs)
1553 print repr(uqs)
1554 print round(t1 - t0, 3), 'sec'
1555
1556
1557def reporthook(blocknum, blocksize, totalsize):
1558 # Report during remote transfers
1559 print "Block number: %d, Block size: %d, Total size: %d" % (
1560 blocknum, blocksize, totalsize)
1561
1562# Test program
1563def test(args=[]):
1564 if not args:
1565 args = [
1566 '/etc/passwd',
1567 'file:/etc/passwd',
1568 'file://localhost/etc/passwd',
1569 'ftp://ftp.gnu.org/pub/README',
1570 'http://www.python.org/index.html',
1571 ]
1572 if hasattr(URLopener, "open_https"):
1573 args.append('https://synergy.as.cmu.edu/~geek/')
1574 try:
1575 for url in args:
1576 print '-'*10, url, '-'*10
1577 fn, h = urlretrieve(url, None, reporthook)
1578 print fn
1579 if h:
1580 print '======'
1581 for k in h.keys(): print k + ':', h[k]
1582 print '======'
1583 fp = open(fn, 'rb')
1584 data = fp.read()
1585 del fp
1586 if '\r' in data:
1587 table = string.maketrans("", "")
1588 data = data.translate(table, "\r")
1589 print data
1590 fn, h = None, None
1591 print '-'*40
1592 finally:
1593 urlcleanup()
1594
1595def main():
1596 import getopt, sys
1597 try:
1598 opts, args = getopt.getopt(sys.argv[1:], "th")
1599 except getopt.error, msg:
1600 print msg
1601 print "Use -h for help"
1602 return
1603 t = 0
1604 for o, a in opts:
1605 if o == '-t':
1606 t = t + 1
1607 if o == '-h':
1608 print "Usage: python urllib.py [-t] [url ...]"
1609 print "-t runs self-test;",
1610 print "otherwise, contents of urls are printed"
1611 return
1612 if t:
1613 if t > 1:
1614 test1()
1615 test(args)
1616 else:
1617 if not args:
1618 print "Use -h for help"
1619 for url in args:
1620 print urlopen(url).read(),
1621
1622# Run test program when run as a script
1623if __name__ == '__main__':
1624 main()
Note: See TracBrowser for help on using the repository browser.