source: python/trunk/Lib/urllib.py@ 1538

Last change on this file since 1538 was 1257, checked in by dmik, 8 years ago

python: Fix handling drive letters in urllib.url2pathname and pathname2url on OS/2.

It used to use Posix code path that would not recongize drive letters and treat the
path as non-absolute. Fixes Mozilla's python/mozbuild/mozpack/test/test_mozjar.py.

  • Property svn:eol-style set to native
File size: 57.4 KB
Line 
1"""Open an arbitrary URL.
2
3See the following document for more info on URLs:
4"Names and Addresses, URIs, URLs, URNs, URCs", at
5http://www.w3.org/pub/WWW/Addressing/Overview.html
6
7See also the HTTP spec (from which the error codes are derived):
8"HTTP - Hypertext Transfer Protocol", at
9http://www.w3.org/pub/WWW/Protocols/
10
11Related standards and specs:
12- RFC1808: the "relative URL" spec. (authoritative status)
13- RFC1738 - the "URL standard". (authoritative status)
14- RFC1630 - the "URI spec". (informational status)
15
16The object returned by URLopener().open(file) will differ per
17protocol. All you know is that is has methods read(), readline(),
18readlines(), fileno(), close() and info(). The read*(), fileno()
19and close() methods work like those of open files.
20The info() method returns a mimetools.Message object which can be
21used to query various info about the object, if available.
22(mimetools.Message objects are queried with the getheader() method.)
23"""
24
25import string
26import socket
27import os
28import time
29import sys
30import base64
31import re
32
33from urlparse import urljoin as basejoin
34
35__all__ = ["urlopen", "URLopener", "FancyURLopener", "urlretrieve",
36 "urlcleanup", "quote", "quote_plus", "unquote", "unquote_plus",
37 "urlencode", "url2pathname", "pathname2url", "splittag",
38 "localhost", "thishost", "ftperrors", "basejoin", "unwrap",
39 "splittype", "splithost", "splituser", "splitpasswd", "splitport",
40 "splitnport", "splitquery", "splitattr", "splitvalue",
41 "getproxies"]
42
43__version__ = '1.17' # XXX This version is not always updated :-(
44
45MAXFTPCACHE = 10 # Trim the ftp cache beyond this size
46
47# Helper for non-unix systems
48if os.name == 'nt':
49 from nturl2path import url2pathname, pathname2url
50elif os.name == 'riscos':
51 from rourl2path import url2pathname, pathname2url
52elif os.name == 'os2':
53 import nturl2path
54 def url2pathname(pathname):
55 """OS-specific conversion from a relative URL of the 'file' scheme
56 to a file system path; not recommended for general use."""
57 # nturl2path only expects back slashes
58 return nturl2path.url2pathname(pathname.replace('/', '\\'))
59
60 def pathname2url(pathname):
61 """OS-specific conversion from a file system path to a relative URL
62 of the 'file' scheme; not recommended for general use."""
63 # nturl2path only expects back slashes
64 return nturl2path.pathname2url(pathname.replace('/', '\\'))
65else:
66 def url2pathname(pathname):
67 """OS-specific conversion from a relative URL of the 'file' scheme
68 to a file system path; not recommended for general use."""
69 return unquote(pathname)
70
71 def pathname2url(pathname):
72 """OS-specific conversion from a file system path to a relative URL
73 of the 'file' scheme; not recommended for general use."""
74 return quote(pathname)
75
76# This really consists of two pieces:
77# (1) a class which handles opening of all sorts of URLs
78# (plus assorted utilities etc.)
79# (2) a set of functions for parsing URLs
80# XXX Should these be separated out into different modules?
81
82
83# Shortcut for basic usage
84_urlopener = None
85def urlopen(url, data=None, proxies=None):
86 """Create a file-like object for the specified URL to read from."""
87 from warnings import warnpy3k
88 warnpy3k("urllib.urlopen() has been removed in Python 3.0 in "
89 "favor of urllib2.urlopen()", stacklevel=2)
90
91 global _urlopener
92 if proxies is not None:
93 opener = FancyURLopener(proxies=proxies)
94 elif not _urlopener:
95 opener = FancyURLopener()
96 _urlopener = opener
97 else:
98 opener = _urlopener
99 if data is None:
100 return opener.open(url)
101 else:
102 return opener.open(url, data)
103def urlretrieve(url, filename=None, reporthook=None, data=None):
104 global _urlopener
105 if not _urlopener:
106 _urlopener = FancyURLopener()
107 return _urlopener.retrieve(url, filename, reporthook, data)
108def urlcleanup():
109 if _urlopener:
110 _urlopener.cleanup()
111 _safe_quoters.clear()
112 ftpcache.clear()
113
114# check for SSL
115try:
116 import ssl
117except:
118 _have_ssl = False
119else:
120 _have_ssl = True
121
122# exception raised when downloaded size does not match content-length
123class ContentTooShortError(IOError):
124 def __init__(self, message, content):
125 IOError.__init__(self, message)
126 self.content = content
127
128ftpcache = {}
129class URLopener:
130 """Class to open URLs.
131 This is a class rather than just a subroutine because we may need
132 more than one set of global protocol-specific options.
133 Note -- this is a base class for those who don't want the
134 automatic handling of errors type 302 (relocated) and 401
135 (authorization needed)."""
136
137 __tempfiles = None
138
139 version = "Python-urllib/%s" % __version__
140
141 # Constructor
142 def __init__(self, proxies=None, **x509):
143 if proxies is None:
144 proxies = getproxies()
145 assert hasattr(proxies, 'has_key'), "proxies must be a mapping"
146 self.proxies = proxies
147 self.key_file = x509.get('key_file')
148 self.cert_file = x509.get('cert_file')
149 self.addheaders = [('User-Agent', self.version)]
150 self.__tempfiles = []
151 self.__unlink = os.unlink # See cleanup()
152 self.tempcache = None
153 # Undocumented feature: if you assign {} to tempcache,
154 # it is used to cache files retrieved with
155 # self.retrieve(). This is not enabled by default
156 # since it does not work for changing documents (and I
157 # haven't got the logic to check expiration headers
158 # yet).
159 self.ftpcache = ftpcache
160 # Undocumented feature: you can use a different
161 # ftp cache by assigning to the .ftpcache member;
162 # in case you want logically independent URL openers
163 # XXX This is not threadsafe. Bah.
164
165 def __del__(self):
166 self.close()
167
168 def close(self):
169 self.cleanup()
170
171 def cleanup(self):
172 # This code sometimes runs when the rest of this module
173 # has already been deleted, so it can't use any globals
174 # or import anything.
175 if self.__tempfiles:
176 for file in self.__tempfiles:
177 try:
178 self.__unlink(file)
179 except OSError:
180 pass
181 del self.__tempfiles[:]
182 if self.tempcache:
183 self.tempcache.clear()
184
185 def addheader(self, *args):
186 """Add a header to be used by the HTTP interface only
187 e.g. u.addheader('Accept', 'sound/basic')"""
188 self.addheaders.append(args)
189
190 # External interface
191 def open(self, fullurl, data=None):
192 """Use URLopener().open(file) instead of open(file, 'r')."""
193 fullurl = unwrap(toBytes(fullurl))
194 # percent encode url, fixing lame server errors for e.g, like space
195 # within url paths.
196 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|")
197 if self.tempcache and fullurl in self.tempcache:
198 filename, headers = self.tempcache[fullurl]
199 fp = open(filename, 'rb')
200 return addinfourl(fp, headers, fullurl)
201 urltype, url = splittype(fullurl)
202 if not urltype:
203 urltype = 'file'
204 if urltype in self.proxies:
205 proxy = self.proxies[urltype]
206 urltype, proxyhost = splittype(proxy)
207 host, selector = splithost(proxyhost)
208 url = (host, fullurl) # Signal special case to open_*()
209 else:
210 proxy = None
211 name = 'open_' + urltype
212 self.type = urltype
213 name = name.replace('-', '_')
214 if not hasattr(self, name):
215 if proxy:
216 return self.open_unknown_proxy(proxy, fullurl, data)
217 else:
218 return self.open_unknown(fullurl, data)
219 try:
220 if data is None:
221 return getattr(self, name)(url)
222 else:
223 return getattr(self, name)(url, data)
224 except socket.error, msg:
225 raise IOError, ('socket error', msg), sys.exc_info()[2]
226
227 def open_unknown(self, fullurl, data=None):
228 """Overridable interface to open unknown URL type."""
229 type, url = splittype(fullurl)
230 raise IOError, ('url error', 'unknown url type', type)
231
232 def open_unknown_proxy(self, proxy, fullurl, data=None):
233 """Overridable interface to open unknown URL type."""
234 type, url = splittype(fullurl)
235 raise IOError, ('url error', 'invalid proxy for %s' % type, proxy)
236
237 # External interface
238 def retrieve(self, url, filename=None, reporthook=None, data=None):
239 """retrieve(url) returns (filename, headers) for a local object
240 or (tempfilename, headers) for a remote object."""
241 url = unwrap(toBytes(url))
242 if self.tempcache and url in self.tempcache:
243 return self.tempcache[url]
244 type, url1 = splittype(url)
245 if filename is None and (not type or type == 'file'):
246 try:
247 fp = self.open_local_file(url1)
248 hdrs = fp.info()
249 fp.close()
250 return url2pathname(splithost(url1)[1]), hdrs
251 except IOError:
252 pass
253 fp = self.open(url, data)
254 try:
255 headers = fp.info()
256 if filename:
257 tfp = open(filename, 'wb')
258 else:
259 import tempfile
260 garbage, path = splittype(url)
261 garbage, path = splithost(path or "")
262 path, garbage = splitquery(path or "")
263 path, garbage = splitattr(path or "")
264 suffix = os.path.splitext(path)[1]
265 (fd, filename) = tempfile.mkstemp(suffix)
266 self.__tempfiles.append(filename)
267 tfp = os.fdopen(fd, 'wb')
268 try:
269 result = filename, headers
270 if self.tempcache is not None:
271 self.tempcache[url] = result
272 bs = 1024*8
273 size = -1
274 read = 0
275 blocknum = 0
276 if "content-length" in headers:
277 size = int(headers["Content-Length"])
278 if reporthook:
279 reporthook(blocknum, bs, size)
280 while 1:
281 block = fp.read(bs)
282 if block == "":
283 break
284 read += len(block)
285 tfp.write(block)
286 blocknum += 1
287 if reporthook:
288 reporthook(blocknum, bs, size)
289 finally:
290 tfp.close()
291 finally:
292 fp.close()
293
294 # raise exception if actual size does not match content-length header
295 if size >= 0 and read < size:
296 raise ContentTooShortError("retrieval incomplete: got only %i out "
297 "of %i bytes" % (read, size), result)
298
299 return result
300
301 # Each method named open_<type> knows how to open that type of URL
302
303 def open_http(self, url, data=None):
304 """Use HTTP protocol."""
305 import httplib
306 user_passwd = None
307 proxy_passwd= None
308 if isinstance(url, str):
309 host, selector = splithost(url)
310 if host:
311 user_passwd, host = splituser(host)
312 host = unquote(host)
313 realhost = host
314 else:
315 host, selector = url
316 # check whether the proxy contains authorization information
317 proxy_passwd, host = splituser(host)
318 # now we proceed with the url we want to obtain
319 urltype, rest = splittype(selector)
320 url = rest
321 user_passwd = None
322 if urltype.lower() != 'http':
323 realhost = None
324 else:
325 realhost, rest = splithost(rest)
326 if realhost:
327 user_passwd, realhost = splituser(realhost)
328 if user_passwd:
329 selector = "%s://%s%s" % (urltype, realhost, rest)
330 if proxy_bypass(realhost):
331 host = realhost
332
333 #print "proxy via http:", host, selector
334 if not host: raise IOError, ('http error', 'no host given')
335
336 if proxy_passwd:
337 proxy_passwd = unquote(proxy_passwd)
338 proxy_auth = base64.b64encode(proxy_passwd).strip()
339 else:
340 proxy_auth = None
341
342 if user_passwd:
343 user_passwd = unquote(user_passwd)
344 auth = base64.b64encode(user_passwd).strip()
345 else:
346 auth = None
347 h = httplib.HTTP(host)
348 if data is not None:
349 h.putrequest('POST', selector)
350 h.putheader('Content-Type', 'application/x-www-form-urlencoded')
351 h.putheader('Content-Length', '%d' % len(data))
352 else:
353 h.putrequest('GET', selector)
354 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
355 if auth: h.putheader('Authorization', 'Basic %s' % auth)
356 if realhost: h.putheader('Host', realhost)
357 for args in self.addheaders: h.putheader(*args)
358 h.endheaders(data)
359 errcode, errmsg, headers = h.getreply()
360 fp = h.getfile()
361 if errcode == -1:
362 if fp: fp.close()
363 # something went wrong with the HTTP status line
364 raise IOError, ('http protocol error', 0,
365 'got a bad status line', None)
366 # According to RFC 2616, "2xx" code indicates that the client's
367 # request was successfully received, understood, and accepted.
368 if (200 <= errcode < 300):
369 return addinfourl(fp, headers, "http:" + url, errcode)
370 else:
371 if data is None:
372 return self.http_error(url, fp, errcode, errmsg, headers)
373 else:
374 return self.http_error(url, fp, errcode, errmsg, headers, data)
375
376 def http_error(self, url, fp, errcode, errmsg, headers, data=None):
377 """Handle http errors.
378 Derived class can override this, or provide specific handlers
379 named http_error_DDD where DDD is the 3-digit error code."""
380 # First check if there's a specific handler for this error
381 name = 'http_error_%d' % errcode
382 if hasattr(self, name):
383 method = getattr(self, name)
384 if data is None:
385 result = method(url, fp, errcode, errmsg, headers)
386 else:
387 result = method(url, fp, errcode, errmsg, headers, data)
388 if result: return result
389 return self.http_error_default(url, fp, errcode, errmsg, headers)
390
391 def http_error_default(self, url, fp, errcode, errmsg, headers):
392 """Default error handler: close the connection and raise IOError."""
393 fp.close()
394 raise IOError, ('http error', errcode, errmsg, headers)
395
396 if _have_ssl:
397 def open_https(self, url, data=None):
398 """Use HTTPS protocol."""
399
400 import httplib
401 user_passwd = None
402 proxy_passwd = None
403 if isinstance(url, str):
404 host, selector = splithost(url)
405 if host:
406 user_passwd, host = splituser(host)
407 host = unquote(host)
408 realhost = host
409 else:
410 host, selector = url
411 # here, we determine, whether the proxy contains authorization information
412 proxy_passwd, host = splituser(host)
413 urltype, rest = splittype(selector)
414 url = rest
415 user_passwd = None
416 if urltype.lower() != 'https':
417 realhost = None
418 else:
419 realhost, rest = splithost(rest)
420 if realhost:
421 user_passwd, realhost = splituser(realhost)
422 if user_passwd:
423 selector = "%s://%s%s" % (urltype, realhost, rest)
424 #print "proxy via https:", host, selector
425 if not host: raise IOError, ('https error', 'no host given')
426 if proxy_passwd:
427 proxy_passwd = unquote(proxy_passwd)
428 proxy_auth = base64.b64encode(proxy_passwd).strip()
429 else:
430 proxy_auth = None
431 if user_passwd:
432 user_passwd = unquote(user_passwd)
433 auth = base64.b64encode(user_passwd).strip()
434 else:
435 auth = None
436 h = httplib.HTTPS(host, 0,
437 key_file=self.key_file,
438 cert_file=self.cert_file)
439 if data is not None:
440 h.putrequest('POST', selector)
441 h.putheader('Content-Type',
442 'application/x-www-form-urlencoded')
443 h.putheader('Content-Length', '%d' % len(data))
444 else:
445 h.putrequest('GET', selector)
446 if proxy_auth: h.putheader('Proxy-Authorization', 'Basic %s' % proxy_auth)
447 if auth: h.putheader('Authorization', 'Basic %s' % auth)
448 if realhost: h.putheader('Host', realhost)
449 for args in self.addheaders: h.putheader(*args)
450 h.endheaders(data)
451 errcode, errmsg, headers = h.getreply()
452 fp = h.getfile()
453 if errcode == -1:
454 if fp: fp.close()
455 # something went wrong with the HTTP status line
456 raise IOError, ('http protocol error', 0,
457 'got a bad status line', None)
458 # According to RFC 2616, "2xx" code indicates that the client's
459 # request was successfully received, understood, and accepted.
460 if (200 <= errcode < 300):
461 return addinfourl(fp, headers, "https:" + url, errcode)
462 else:
463 if data is None:
464 return self.http_error(url, fp, errcode, errmsg, headers)
465 else:
466 return self.http_error(url, fp, errcode, errmsg, headers,
467 data)
468
469 def open_file(self, url):
470 """Use local file or FTP depending on form of URL."""
471 if not isinstance(url, str):
472 raise IOError, ('file error', 'proxy support for file protocol currently not implemented')
473 if url[:2] == '//' and url[2:3] != '/' and url[2:12].lower() != 'localhost/':
474 return self.open_ftp(url)
475 else:
476 return self.open_local_file(url)
477
478 def open_local_file(self, url):
479 """Use local file."""
480 import mimetypes, mimetools, email.utils
481 try:
482 from cStringIO import StringIO
483 except ImportError:
484 from StringIO import StringIO
485 host, file = splithost(url)
486 localname = url2pathname(file)
487 try:
488 stats = os.stat(localname)
489 except OSError, e:
490 raise IOError(e.errno, e.strerror, e.filename)
491 size = stats.st_size
492 modified = email.utils.formatdate(stats.st_mtime, usegmt=True)
493 mtype = mimetypes.guess_type(url)[0]
494 headers = mimetools.Message(StringIO(
495 'Content-Type: %s\nContent-Length: %d\nLast-modified: %s\n' %
496 (mtype or 'text/plain', size, modified)))
497 if not host:
498 urlfile = file
499 if file[:1] == '/':
500 urlfile = 'file://' + file
501 elif file[:2] == './':
502 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url)
503 return addinfourl(open(localname, 'rb'),
504 headers, urlfile)
505 host, port = splitport(host)
506 if not port \
507 and socket.gethostbyname(host) in (localhost(), thishost()):
508 urlfile = file
509 if file[:1] == '/':
510 urlfile = 'file://' + file
511 return addinfourl(open(localname, 'rb'),
512 headers, urlfile)
513 raise IOError, ('local file error', 'not on local host')
514
515 def open_ftp(self, url):
516 """Use FTP protocol."""
517 if not isinstance(url, str):
518 raise IOError, ('ftp error', 'proxy support for ftp protocol currently not implemented')
519 import mimetypes, mimetools
520 try:
521 from cStringIO import StringIO
522 except ImportError:
523 from StringIO import StringIO
524 host, path = splithost(url)
525 if not host: raise IOError, ('ftp error', 'no host given')
526 host, port = splitport(host)
527 user, host = splituser(host)
528 if user: user, passwd = splitpasswd(user)
529 else: passwd = None
530 host = unquote(host)
531 user = user or ''
532 passwd = passwd or ''
533 host = socket.gethostbyname(host)
534 if not port:
535 import ftplib
536 port = ftplib.FTP_PORT
537 else:
538 port = int(port)
539 path, attrs = splitattr(path)
540 path = unquote(path)
541 dirs = path.split('/')
542 dirs, file = dirs[:-1], dirs[-1]
543 if dirs and not dirs[0]: dirs = dirs[1:]
544 if dirs and not dirs[0]: dirs[0] = '/'
545 key = user, host, port, '/'.join(dirs)
546 # XXX thread unsafe!
547 if len(self.ftpcache) > MAXFTPCACHE:
548 # Prune the cache, rather arbitrarily
549 for k in self.ftpcache.keys():
550 if k != key:
551 v = self.ftpcache[k]
552 del self.ftpcache[k]
553 v.close()
554 try:
555 if not key in self.ftpcache:
556 self.ftpcache[key] = \
557 ftpwrapper(user, passwd, host, port, dirs)
558 if not file: type = 'D'
559 else: type = 'I'
560 for attr in attrs:
561 attr, value = splitvalue(attr)
562 if attr.lower() == 'type' and \
563 value in ('a', 'A', 'i', 'I', 'd', 'D'):
564 type = value.upper()
565 (fp, retrlen) = self.ftpcache[key].retrfile(file, type)
566 mtype = mimetypes.guess_type("ftp:" + url)[0]
567 headers = ""
568 if mtype:
569 headers += "Content-Type: %s\n" % mtype
570 if retrlen is not None and retrlen >= 0:
571 headers += "Content-Length: %d\n" % retrlen
572 headers = mimetools.Message(StringIO(headers))
573 return addinfourl(fp, headers, "ftp:" + url)
574 except ftperrors(), msg:
575 raise IOError, ('ftp error', msg), sys.exc_info()[2]
576
577 def open_data(self, url, data=None):
578 """Use "data" URL."""
579 if not isinstance(url, str):
580 raise IOError, ('data error', 'proxy support for data protocol currently not implemented')
581 # ignore POSTed data
582 #
583 # syntax of data URLs:
584 # dataurl := "data:" [ mediatype ] [ ";base64" ] "," data
585 # mediatype := [ type "/" subtype ] *( ";" parameter )
586 # data := *urlchar
587 # parameter := attribute "=" value
588 import mimetools
589 try:
590 from cStringIO import StringIO
591 except ImportError:
592 from StringIO import StringIO
593 try:
594 [type, data] = url.split(',', 1)
595 except ValueError:
596 raise IOError, ('data error', 'bad data URL')
597 if not type:
598 type = 'text/plain;charset=US-ASCII'
599 semi = type.rfind(';')
600 if semi >= 0 and '=' not in type[semi:]:
601 encoding = type[semi+1:]
602 type = type[:semi]
603 else:
604 encoding = ''
605 msg = []
606 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT',
607 time.gmtime(time.time())))
608 msg.append('Content-type: %s' % type)
609 if encoding == 'base64':
610 data = base64.decodestring(data)
611 else:
612 data = unquote(data)
613 msg.append('Content-Length: %d' % len(data))
614 msg.append('')
615 msg.append(data)
616 msg = '\n'.join(msg)
617 f = StringIO(msg)
618 headers = mimetools.Message(f, 0)
619 #f.fileno = None # needed for addinfourl
620 return addinfourl(f, headers, url)
621
622
623class FancyURLopener(URLopener):
624 """Derived class with handlers for errors we can handle (perhaps)."""
625
626 def __init__(self, *args, **kwargs):
627 URLopener.__init__(self, *args, **kwargs)
628 self.auth_cache = {}
629 self.tries = 0
630 self.maxtries = 10
631
632 def http_error_default(self, url, fp, errcode, errmsg, headers):
633 """Default error handling -- don't raise an exception."""
634 return addinfourl(fp, headers, "http:" + url, errcode)
635
636 def http_error_302(self, url, fp, errcode, errmsg, headers, data=None):
637 """Error 302 -- relocated (temporarily)."""
638 self.tries += 1
639 if self.maxtries and self.tries >= self.maxtries:
640 if hasattr(self, "http_error_500"):
641 meth = self.http_error_500
642 else:
643 meth = self.http_error_default
644 self.tries = 0
645 return meth(url, fp, 500,
646 "Internal Server Error: Redirect Recursion", headers)
647 result = self.redirect_internal(url, fp, errcode, errmsg, headers,
648 data)
649 self.tries = 0
650 return result
651
652 def redirect_internal(self, url, fp, errcode, errmsg, headers, data):
653 if 'location' in headers:
654 newurl = headers['location']
655 elif 'uri' in headers:
656 newurl = headers['uri']
657 else:
658 return
659 fp.close()
660 # In case the server sent a relative URL, join with original:
661 newurl = basejoin(self.type + ":" + url, newurl)
662
663 # For security reasons we do not allow redirects to protocols
664 # other than HTTP, HTTPS or FTP.
665 newurl_lower = newurl.lower()
666 if not (newurl_lower.startswith('http://') or
667 newurl_lower.startswith('https://') or
668 newurl_lower.startswith('ftp://')):
669 raise IOError('redirect error', errcode,
670 errmsg + " - Redirection to url '%s' is not allowed" %
671 newurl,
672 headers)
673
674 return self.open(newurl)
675
676 def http_error_301(self, url, fp, errcode, errmsg, headers, data=None):
677 """Error 301 -- also relocated (permanently)."""
678 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
679
680 def http_error_303(self, url, fp, errcode, errmsg, headers, data=None):
681 """Error 303 -- also relocated (essentially identical to 302)."""
682 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
683
684 def http_error_307(self, url, fp, errcode, errmsg, headers, data=None):
685 """Error 307 -- relocated, but turn POST into error."""
686 if data is None:
687 return self.http_error_302(url, fp, errcode, errmsg, headers, data)
688 else:
689 return self.http_error_default(url, fp, errcode, errmsg, headers)
690
691 def http_error_401(self, url, fp, errcode, errmsg, headers, data=None):
692 """Error 401 -- authentication required.
693 This function supports Basic authentication only."""
694 if not 'www-authenticate' in headers:
695 URLopener.http_error_default(self, url, fp,
696 errcode, errmsg, headers)
697 stuff = headers['www-authenticate']
698 import re
699 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
700 if not match:
701 URLopener.http_error_default(self, url, fp,
702 errcode, errmsg, headers)
703 scheme, realm = match.groups()
704 if scheme.lower() != 'basic':
705 URLopener.http_error_default(self, url, fp,
706 errcode, errmsg, headers)
707 name = 'retry_' + self.type + '_basic_auth'
708 if data is None:
709 return getattr(self,name)(url, realm)
710 else:
711 return getattr(self,name)(url, realm, data)
712
713 def http_error_407(self, url, fp, errcode, errmsg, headers, data=None):
714 """Error 407 -- proxy authentication required.
715 This function supports Basic authentication only."""
716 if not 'proxy-authenticate' in headers:
717 URLopener.http_error_default(self, url, fp,
718 errcode, errmsg, headers)
719 stuff = headers['proxy-authenticate']
720 import re
721 match = re.match('[ \t]*([^ \t]+)[ \t]+realm="([^"]*)"', stuff)
722 if not match:
723 URLopener.http_error_default(self, url, fp,
724 errcode, errmsg, headers)
725 scheme, realm = match.groups()
726 if scheme.lower() != 'basic':
727 URLopener.http_error_default(self, url, fp,
728 errcode, errmsg, headers)
729 name = 'retry_proxy_' + self.type + '_basic_auth'
730 if data is None:
731 return getattr(self,name)(url, realm)
732 else:
733 return getattr(self,name)(url, realm, data)
734
735 def retry_proxy_http_basic_auth(self, url, realm, data=None):
736 host, selector = splithost(url)
737 newurl = 'http://' + host + selector
738 proxy = self.proxies['http']
739 urltype, proxyhost = splittype(proxy)
740 proxyhost, proxyselector = splithost(proxyhost)
741 i = proxyhost.find('@') + 1
742 proxyhost = proxyhost[i:]
743 user, passwd = self.get_user_passwd(proxyhost, realm, i)
744 if not (user or passwd): return None
745 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
746 self.proxies['http'] = 'http://' + proxyhost + proxyselector
747 if data is None:
748 return self.open(newurl)
749 else:
750 return self.open(newurl, data)
751
752 def retry_proxy_https_basic_auth(self, url, realm, data=None):
753 host, selector = splithost(url)
754 newurl = 'https://' + host + selector
755 proxy = self.proxies['https']
756 urltype, proxyhost = splittype(proxy)
757 proxyhost, proxyselector = splithost(proxyhost)
758 i = proxyhost.find('@') + 1
759 proxyhost = proxyhost[i:]
760 user, passwd = self.get_user_passwd(proxyhost, realm, i)
761 if not (user or passwd): return None
762 proxyhost = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + proxyhost
763 self.proxies['https'] = 'https://' + proxyhost + proxyselector
764 if data is None:
765 return self.open(newurl)
766 else:
767 return self.open(newurl, data)
768
769 def retry_http_basic_auth(self, url, realm, data=None):
770 host, selector = splithost(url)
771 i = host.find('@') + 1
772 host = host[i:]
773 user, passwd = self.get_user_passwd(host, realm, i)
774 if not (user or passwd): return None
775 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
776 newurl = 'http://' + host + selector
777 if data is None:
778 return self.open(newurl)
779 else:
780 return self.open(newurl, data)
781
782 def retry_https_basic_auth(self, url, realm, data=None):
783 host, selector = splithost(url)
784 i = host.find('@') + 1
785 host = host[i:]
786 user, passwd = self.get_user_passwd(host, realm, i)
787 if not (user or passwd): return None
788 host = quote(user, safe='') + ':' + quote(passwd, safe='') + '@' + host
789 newurl = 'https://' + host + selector
790 if data is None:
791 return self.open(newurl)
792 else:
793 return self.open(newurl, data)
794
795 def get_user_passwd(self, host, realm, clear_cache=0):
796 key = realm + '@' + host.lower()
797 if key in self.auth_cache:
798 if clear_cache:
799 del self.auth_cache[key]
800 else:
801 return self.auth_cache[key]
802 user, passwd = self.prompt_user_passwd(host, realm)
803 if user or passwd: self.auth_cache[key] = (user, passwd)
804 return user, passwd
805
806 def prompt_user_passwd(self, host, realm):
807 """Override this in a GUI environment!"""
808 import getpass
809 try:
810 user = raw_input("Enter username for %s at %s: " % (realm,
811 host))
812 passwd = getpass.getpass("Enter password for %s in %s at %s: " %
813 (user, realm, host))
814 return user, passwd
815 except KeyboardInterrupt:
816 print
817 return None, None
818
819
820# Utility functions
821
822_localhost = None
823def localhost():
824 """Return the IP address of the magic hostname 'localhost'."""
825 global _localhost
826 if _localhost is None:
827 _localhost = socket.gethostbyname('localhost')
828 return _localhost
829
830_thishost = None
831def thishost():
832 """Return the IP address of the current host."""
833 global _thishost
834 if _thishost is None:
835 try:
836 _thishost = socket.gethostbyname(socket.gethostname())
837 except socket.gaierror:
838 _thishost = socket.gethostbyname('localhost')
839 return _thishost
840
841_ftperrors = None
842def ftperrors():
843 """Return the set of errors raised by the FTP class."""
844 global _ftperrors
845 if _ftperrors is None:
846 import ftplib
847 _ftperrors = ftplib.all_errors
848 return _ftperrors
849
850_noheaders = None
851def noheaders():
852 """Return an empty mimetools.Message object."""
853 global _noheaders
854 if _noheaders is None:
855 import mimetools
856 try:
857 from cStringIO import StringIO
858 except ImportError:
859 from StringIO import StringIO
860 _noheaders = mimetools.Message(StringIO(), 0)
861 _noheaders.fp.close() # Recycle file descriptor
862 return _noheaders
863
864
865# Utility classes
866
867class ftpwrapper:
868 """Class used by open_ftp() for cache of open FTP connections."""
869
870 def __init__(self, user, passwd, host, port, dirs,
871 timeout=socket._GLOBAL_DEFAULT_TIMEOUT,
872 persistent=True):
873 self.user = user
874 self.passwd = passwd
875 self.host = host
876 self.port = port
877 self.dirs = dirs
878 self.timeout = timeout
879 self.refcount = 0
880 self.keepalive = persistent
881 self.init()
882
883 def init(self):
884 import ftplib
885 self.busy = 0
886 self.ftp = ftplib.FTP()
887 self.ftp.connect(self.host, self.port, self.timeout)
888 self.ftp.login(self.user, self.passwd)
889 _target = '/'.join(self.dirs)
890 self.ftp.cwd(_target)
891
892 def retrfile(self, file, type):
893 import ftplib
894 self.endtransfer()
895 if type in ('d', 'D'): cmd = 'TYPE A'; isdir = 1
896 else: cmd = 'TYPE ' + type; isdir = 0
897 try:
898 self.ftp.voidcmd(cmd)
899 except ftplib.all_errors:
900 self.init()
901 self.ftp.voidcmd(cmd)
902 conn = None
903 if file and not isdir:
904 # Try to retrieve as a file
905 try:
906 cmd = 'RETR ' + file
907 conn, retrlen = self.ftp.ntransfercmd(cmd)
908 except ftplib.error_perm, reason:
909 if str(reason)[:3] != '550':
910 raise IOError, ('ftp error', reason), sys.exc_info()[2]
911 if not conn:
912 # Set transfer mode to ASCII!
913 self.ftp.voidcmd('TYPE A')
914 # Try a directory listing. Verify that directory exists.
915 if file:
916 pwd = self.ftp.pwd()
917 try:
918 try:
919 self.ftp.cwd(file)
920 except ftplib.error_perm, reason:
921 raise IOError, ('ftp error', reason), sys.exc_info()[2]
922 finally:
923 self.ftp.cwd(pwd)
924 cmd = 'LIST ' + file
925 else:
926 cmd = 'LIST'
927 conn, retrlen = self.ftp.ntransfercmd(cmd)
928 self.busy = 1
929 ftpobj = addclosehook(conn.makefile('rb'), self.file_close)
930 self.refcount += 1
931 conn.close()
932 # Pass back both a suitably decorated object and a retrieval length
933 return (ftpobj, retrlen)
934
935 def endtransfer(self):
936 if not self.busy:
937 return
938 self.busy = 0
939 try:
940 self.ftp.voidresp()
941 except ftperrors():
942 pass
943
944 def close(self):
945 self.keepalive = False
946 if self.refcount <= 0:
947 self.real_close()
948
949 def file_close(self):
950 self.endtransfer()
951 self.refcount -= 1
952 if self.refcount <= 0 and not self.keepalive:
953 self.real_close()
954
955 def real_close(self):
956 self.endtransfer()
957 try:
958 self.ftp.close()
959 except ftperrors():
960 pass
961
962class addbase:
963 """Base class for addinfo and addclosehook."""
964
965 def __init__(self, fp):
966 self.fp = fp
967 self.read = self.fp.read
968 self.readline = self.fp.readline
969 if hasattr(self.fp, "readlines"): self.readlines = self.fp.readlines
970 if hasattr(self.fp, "fileno"):
971 self.fileno = self.fp.fileno
972 else:
973 self.fileno = lambda: None
974 if hasattr(self.fp, "__iter__"):
975 self.__iter__ = self.fp.__iter__
976 if hasattr(self.fp, "next"):
977 self.next = self.fp.next
978
979 def __repr__(self):
980 return '<%s at %r whose fp = %r>' % (self.__class__.__name__,
981 id(self), self.fp)
982
983 def close(self):
984 self.read = None
985 self.readline = None
986 self.readlines = None
987 self.fileno = None
988 if self.fp: self.fp.close()
989 self.fp = None
990
991class addclosehook(addbase):
992 """Class to add a close hook to an open file."""
993
994 def __init__(self, fp, closehook, *hookargs):
995 addbase.__init__(self, fp)
996 self.closehook = closehook
997 self.hookargs = hookargs
998
999 def close(self):
1000 if self.closehook:
1001 self.closehook(*self.hookargs)
1002 self.closehook = None
1003 self.hookargs = None
1004 addbase.close(self)
1005
1006class addinfo(addbase):
1007 """class to add an info() method to an open file."""
1008
1009 def __init__(self, fp, headers):
1010 addbase.__init__(self, fp)
1011 self.headers = headers
1012
1013 def info(self):
1014 return self.headers
1015
1016class addinfourl(addbase):
1017 """class to add info() and geturl() methods to an open file."""
1018
1019 def __init__(self, fp, headers, url, code=None):
1020 addbase.__init__(self, fp)
1021 self.headers = headers
1022 self.url = url
1023 self.code = code
1024
1025 def info(self):
1026 return self.headers
1027
1028 def getcode(self):
1029 return self.code
1030
1031 def geturl(self):
1032 return self.url
1033
1034
1035# Utilities to parse URLs (most of these return None for missing parts):
1036# unwrap('<URL:type://host/path>') --> 'type://host/path'
1037# splittype('type:opaquestring') --> 'type', 'opaquestring'
1038# splithost('//host[:port]/path') --> 'host[:port]', '/path'
1039# splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'
1040# splitpasswd('user:passwd') -> 'user', 'passwd'
1041# splitport('host:port') --> 'host', 'port'
1042# splitquery('/path?query') --> '/path', 'query'
1043# splittag('/path#tag') --> '/path', 'tag'
1044# splitattr('/path;attr1=value1;attr2=value2;...') ->
1045# '/path', ['attr1=value1', 'attr2=value2', ...]
1046# splitvalue('attr=value') --> 'attr', 'value'
1047# unquote('abc%20def') -> 'abc def'
1048# quote('abc def') -> 'abc%20def')
1049
1050try:
1051 unicode
1052except NameError:
1053 def _is_unicode(x):
1054 return 0
1055else:
1056 def _is_unicode(x):
1057 return isinstance(x, unicode)
1058
1059def toBytes(url):
1060 """toBytes(u"URL") --> 'URL'."""
1061 # Most URL schemes require ASCII. If that changes, the conversion
1062 # can be relaxed
1063 if _is_unicode(url):
1064 try:
1065 url = url.encode("ASCII")
1066 except UnicodeError:
1067 raise UnicodeError("URL " + repr(url) +
1068 " contains non-ASCII characters")
1069 return url
1070
1071def unwrap(url):
1072 """unwrap('<URL:type://host/path>') --> 'type://host/path'."""
1073 url = url.strip()
1074 if url[:1] == '<' and url[-1:] == '>':
1075 url = url[1:-1].strip()
1076 if url[:4] == 'URL:': url = url[4:].strip()
1077 return url
1078
1079_typeprog = None
1080def splittype(url):
1081 """splittype('type:opaquestring') --> 'type', 'opaquestring'."""
1082 global _typeprog
1083 if _typeprog is None:
1084 import re
1085 _typeprog = re.compile('^([^/:]+):')
1086
1087 match = _typeprog.match(url)
1088 if match:
1089 scheme = match.group(1)
1090 return scheme.lower(), url[len(scheme) + 1:]
1091 return None, url
1092
1093_hostprog = None
1094def splithost(url):
1095 """splithost('//host[:port]/path') --> 'host[:port]', '/path'."""
1096 global _hostprog
1097 if _hostprog is None:
1098 import re
1099 _hostprog = re.compile('^//([^/?]*)(.*)$')
1100
1101 match = _hostprog.match(url)
1102 if match:
1103 host_port = match.group(1)
1104 path = match.group(2)
1105 if path and not path.startswith('/'):
1106 path = '/' + path
1107 return host_port, path
1108 return None, url
1109
1110_userprog = None
1111def splituser(host):
1112 """splituser('user[:passwd]@host[:port]') --> 'user[:passwd]', 'host[:port]'."""
1113 global _userprog
1114 if _userprog is None:
1115 import re
1116 _userprog = re.compile('^(.*)@(.*)$')
1117
1118 match = _userprog.match(host)
1119 if match: return match.group(1, 2)
1120 return None, host
1121
1122_passwdprog = None
1123def splitpasswd(user):
1124 """splitpasswd('user:passwd') -> 'user', 'passwd'."""
1125 global _passwdprog
1126 if _passwdprog is None:
1127 import re
1128 _passwdprog = re.compile('^([^:]*):(.*)$',re.S)
1129
1130 match = _passwdprog.match(user)
1131 if match: return match.group(1, 2)
1132 return user, None
1133
1134# splittag('/path#tag') --> '/path', 'tag'
1135_portprog = None
1136def splitport(host):
1137 """splitport('host:port') --> 'host', 'port'."""
1138 global _portprog
1139 if _portprog is None:
1140 import re
1141 _portprog = re.compile('^(.*):([0-9]+)$')
1142
1143 match = _portprog.match(host)
1144 if match: return match.group(1, 2)
1145 return host, None
1146
1147_nportprog = None
1148def splitnport(host, defport=-1):
1149 """Split host and port, returning numeric port.
1150 Return given default port if no ':' found; defaults to -1.
1151 Return numerical port if a valid number are found after ':'.
1152 Return None if ':' but not a valid number."""
1153 global _nportprog
1154 if _nportprog is None:
1155 import re
1156 _nportprog = re.compile('^(.*):(.*)$')
1157
1158 match = _nportprog.match(host)
1159 if match:
1160 host, port = match.group(1, 2)
1161 try:
1162 if not port: raise ValueError, "no digits"
1163 nport = int(port)
1164 except ValueError:
1165 nport = None
1166 return host, nport
1167 return host, defport
1168
1169_queryprog = None
1170def splitquery(url):
1171 """splitquery('/path?query') --> '/path', 'query'."""
1172 global _queryprog
1173 if _queryprog is None:
1174 import re
1175 _queryprog = re.compile('^(.*)\?([^?]*)$')
1176
1177 match = _queryprog.match(url)
1178 if match: return match.group(1, 2)
1179 return url, None
1180
1181_tagprog = None
1182def splittag(url):
1183 """splittag('/path#tag') --> '/path', 'tag'."""
1184 global _tagprog
1185 if _tagprog is None:
1186 import re
1187 _tagprog = re.compile('^(.*)#([^#]*)$')
1188
1189 match = _tagprog.match(url)
1190 if match: return match.group(1, 2)
1191 return url, None
1192
1193def splitattr(url):
1194 """splitattr('/path;attr1=value1;attr2=value2;...') ->
1195 '/path', ['attr1=value1', 'attr2=value2', ...]."""
1196 words = url.split(';')
1197 return words[0], words[1:]
1198
1199_valueprog = None
1200def splitvalue(attr):
1201 """splitvalue('attr=value') --> 'attr', 'value'."""
1202 global _valueprog
1203 if _valueprog is None:
1204 import re
1205 _valueprog = re.compile('^([^=]*)=(.*)$')
1206
1207 match = _valueprog.match(attr)
1208 if match: return match.group(1, 2)
1209 return attr, None
1210
1211# urlparse contains a duplicate of this method to avoid a circular import. If
1212# you update this method, also update the copy in urlparse. This code
1213# duplication does not exist in Python3.
1214
1215_hexdig = '0123456789ABCDEFabcdef'
1216_hextochr = dict((a + b, chr(int(a + b, 16)))
1217 for a in _hexdig for b in _hexdig)
1218_asciire = re.compile('([\x00-\x7f]+)')
1219
1220def unquote(s):
1221 """unquote('abc%20def') -> 'abc def'."""
1222 if _is_unicode(s):
1223 if '%' not in s:
1224 return s
1225 bits = _asciire.split(s)
1226 res = [bits[0]]
1227 append = res.append
1228 for i in range(1, len(bits), 2):
1229 append(unquote(str(bits[i])).decode('latin1'))
1230 append(bits[i + 1])
1231 return ''.join(res)
1232
1233 bits = s.split('%')
1234 # fastpath
1235 if len(bits) == 1:
1236 return s
1237 res = [bits[0]]
1238 append = res.append
1239 for item in bits[1:]:
1240 try:
1241 append(_hextochr[item[:2]])
1242 append(item[2:])
1243 except KeyError:
1244 append('%')
1245 append(item)
1246 return ''.join(res)
1247
1248def unquote_plus(s):
1249 """unquote('%7e/abc+def') -> '~/abc def'"""
1250 s = s.replace('+', ' ')
1251 return unquote(s)
1252
1253always_safe = ('ABCDEFGHIJKLMNOPQRSTUVWXYZ'
1254 'abcdefghijklmnopqrstuvwxyz'
1255 '0123456789' '_.-')
1256_safe_map = {}
1257for i, c in zip(xrange(256), str(bytearray(xrange(256)))):
1258 _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i)
1259_safe_quoters = {}
1260
1261def quote(s, safe='/'):
1262 """quote('abc def') -> 'abc%20def'
1263
1264 Each part of a URL, e.g. the path info, the query, etc., has a
1265 different set of reserved characters that must be quoted.
1266
1267 RFC 2396 Uniform Resource Identifiers (URI): Generic Syntax lists
1268 the following reserved characters.
1269
1270 reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" |
1271 "$" | ","
1272
1273 Each of these characters is reserved in some component of a URL,
1274 but not necessarily in all of them.
1275
1276 By default, the quote function is intended for quoting the path
1277 section of a URL. Thus, it will not encode '/'. This character
1278 is reserved, but in typical usage the quote function is being
1279 called on a path where the existing slash characters are used as
1280 reserved characters.
1281 """
1282 # fastpath
1283 if not s:
1284 if s is None:
1285 raise TypeError('None object cannot be quoted')
1286 return s
1287 cachekey = (safe, always_safe)
1288 try:
1289 (quoter, safe) = _safe_quoters[cachekey]
1290 except KeyError:
1291 safe_map = _safe_map.copy()
1292 safe_map.update([(c, c) for c in safe])
1293 quoter = safe_map.__getitem__
1294 safe = always_safe + safe
1295 _safe_quoters[cachekey] = (quoter, safe)
1296 if not s.rstrip(safe):
1297 return s
1298 return ''.join(map(quoter, s))
1299
1300def quote_plus(s, safe=''):
1301 """Quote the query fragment of a URL; replacing ' ' with '+'"""
1302 if ' ' in s:
1303 s = quote(s, safe + ' ')
1304 return s.replace(' ', '+')
1305 return quote(s, safe)
1306
1307def urlencode(query, doseq=0):
1308 """Encode a sequence of two-element tuples or dictionary into a URL query string.
1309
1310 If any values in the query arg are sequences and doseq is true, each
1311 sequence element is converted to a separate parameter.
1312
1313 If the query arg is a sequence of two-element tuples, the order of the
1314 parameters in the output will match the order of parameters in the
1315 input.
1316 """
1317
1318 if hasattr(query,"items"):
1319 # mapping objects
1320 query = query.items()
1321 else:
1322 # it's a bother at times that strings and string-like objects are
1323 # sequences...
1324 try:
1325 # non-sequence items should not work with len()
1326 # non-empty strings will fail this
1327 if len(query) and not isinstance(query[0], tuple):
1328 raise TypeError
1329 # zero-length sequences of all types will get here and succeed,
1330 # but that's a minor nit - since the original implementation
1331 # allowed empty dicts that type of behavior probably should be
1332 # preserved for consistency
1333 except TypeError:
1334 ty,va,tb = sys.exc_info()
1335 raise TypeError, "not a valid non-string sequence or mapping object", tb
1336
1337 l = []
1338 if not doseq:
1339 # preserve old behavior
1340 for k, v in query:
1341 k = quote_plus(str(k))
1342 v = quote_plus(str(v))
1343 l.append(k + '=' + v)
1344 else:
1345 for k, v in query:
1346 k = quote_plus(str(k))
1347 if isinstance(v, str):
1348 v = quote_plus(v)
1349 l.append(k + '=' + v)
1350 elif _is_unicode(v):
1351 # is there a reasonable way to convert to ASCII?
1352 # encode generates a string, but "replace" or "ignore"
1353 # lose information and "strict" can raise UnicodeError
1354 v = quote_plus(v.encode("ASCII","replace"))
1355 l.append(k + '=' + v)
1356 else:
1357 try:
1358 # is this a sufficient test for sequence-ness?
1359 len(v)
1360 except TypeError:
1361 # not a sequence
1362 v = quote_plus(str(v))
1363 l.append(k + '=' + v)
1364 else:
1365 # loop over the sequence
1366 for elt in v:
1367 l.append(k + '=' + quote_plus(str(elt)))
1368 return '&'.join(l)
1369
1370# Proxy handling
1371def getproxies_environment():
1372 """Return a dictionary of scheme -> proxy server URL mappings.
1373
1374 Scan the environment for variables named <scheme>_proxy;
1375 this seems to be the standard convention. If you need a
1376 different way, you can pass a proxies dictionary to the
1377 [Fancy]URLopener constructor.
1378
1379 """
1380 proxies = {}
1381 for name, value in os.environ.items():
1382 name = name.lower()
1383 if value and name[-6:] == '_proxy':
1384 proxies[name[:-6]] = value
1385 return proxies
1386
1387def proxy_bypass_environment(host):
1388 """Test if proxies should not be used for a particular host.
1389
1390 Checks the environment for a variable named no_proxy, which should
1391 be a list of DNS suffixes separated by commas, or '*' for all hosts.
1392 """
1393 no_proxy = os.environ.get('no_proxy', '') or os.environ.get('NO_PROXY', '')
1394 # '*' is special case for always bypass
1395 if no_proxy == '*':
1396 return 1
1397 # strip port off host
1398 hostonly, port = splitport(host)
1399 # check if the host ends with any of the DNS suffixes
1400 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')]
1401 for name in no_proxy_list:
1402 if name and (hostonly.endswith(name) or host.endswith(name)):
1403 return 1
1404 # otherwise, don't bypass
1405 return 0
1406
1407
1408if sys.platform == 'darwin':
1409 from _scproxy import _get_proxy_settings, _get_proxies
1410
1411 def proxy_bypass_macosx_sysconf(host):
1412 """
1413 Return True iff this host shouldn't be accessed using a proxy
1414
1415 This function uses the MacOSX framework SystemConfiguration
1416 to fetch the proxy information.
1417 """
1418 import re
1419 import socket
1420 from fnmatch import fnmatch
1421
1422 hostonly, port = splitport(host)
1423
1424 def ip2num(ipAddr):
1425 parts = ipAddr.split('.')
1426 parts = map(int, parts)
1427 if len(parts) != 4:
1428 parts = (parts + [0, 0, 0, 0])[:4]
1429 return (parts[0] << 24) | (parts[1] << 16) | (parts[2] << 8) | parts[3]
1430
1431 proxy_settings = _get_proxy_settings()
1432
1433 # Check for simple host names:
1434 if '.' not in host:
1435 if proxy_settings['exclude_simple']:
1436 return True
1437
1438 hostIP = None
1439
1440 for value in proxy_settings.get('exceptions', ()):
1441 # Items in the list are strings like these: *.local, 169.254/16
1442 if not value: continue
1443
1444 m = re.match(r"(\d+(?:\.\d+)*)(/\d+)?", value)
1445 if m is not None:
1446 if hostIP is None:
1447 try:
1448 hostIP = socket.gethostbyname(hostonly)
1449 hostIP = ip2num(hostIP)
1450 except socket.error:
1451 continue
1452
1453 base = ip2num(m.group(1))
1454 mask = m.group(2)
1455 if mask is None:
1456 mask = 8 * (m.group(1).count('.') + 1)
1457
1458 else:
1459 mask = int(mask[1:])
1460 mask = 32 - mask
1461
1462 if (hostIP >> mask) == (base >> mask):
1463 return True
1464
1465 elif fnmatch(host, value):
1466 return True
1467
1468 return False
1469
1470 def getproxies_macosx_sysconf():
1471 """Return a dictionary of scheme -> proxy server URL mappings.
1472
1473 This function uses the MacOSX framework SystemConfiguration
1474 to fetch the proxy information.
1475 """
1476 return _get_proxies()
1477
1478 def proxy_bypass(host):
1479 if getproxies_environment():
1480 return proxy_bypass_environment(host)
1481 else:
1482 return proxy_bypass_macosx_sysconf(host)
1483
1484 def getproxies():
1485 return getproxies_environment() or getproxies_macosx_sysconf()
1486
1487elif os.name == 'nt':
1488 def getproxies_registry():
1489 """Return a dictionary of scheme -> proxy server URL mappings.
1490
1491 Win32 uses the registry to store proxies.
1492
1493 """
1494 proxies = {}
1495 try:
1496 import _winreg
1497 except ImportError:
1498 # Std module, so should be around - but you never know!
1499 return proxies
1500 try:
1501 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1502 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1503 proxyEnable = _winreg.QueryValueEx(internetSettings,
1504 'ProxyEnable')[0]
1505 if proxyEnable:
1506 # Returned as Unicode but problems if not converted to ASCII
1507 proxyServer = str(_winreg.QueryValueEx(internetSettings,
1508 'ProxyServer')[0])
1509 if '=' in proxyServer:
1510 # Per-protocol settings
1511 for p in proxyServer.split(';'):
1512 protocol, address = p.split('=', 1)
1513 # See if address has a type:// prefix
1514 import re
1515 if not re.match('^([^/:]+)://', address):
1516 address = '%s://%s' % (protocol, address)
1517 proxies[protocol] = address
1518 else:
1519 # Use one setting for all protocols
1520 if proxyServer[:5] == 'http:':
1521 proxies['http'] = proxyServer
1522 else:
1523 proxies['http'] = 'http://%s' % proxyServer
1524 proxies['https'] = 'https://%s' % proxyServer
1525 proxies['ftp'] = 'ftp://%s' % proxyServer
1526 internetSettings.Close()
1527 except (WindowsError, ValueError, TypeError):
1528 # Either registry key not found etc, or the value in an
1529 # unexpected format.
1530 # proxies already set up to be empty so nothing to do
1531 pass
1532 return proxies
1533
1534 def getproxies():
1535 """Return a dictionary of scheme -> proxy server URL mappings.
1536
1537 Returns settings gathered from the environment, if specified,
1538 or the registry.
1539
1540 """
1541 return getproxies_environment() or getproxies_registry()
1542
1543 def proxy_bypass_registry(host):
1544 try:
1545 import _winreg
1546 import re
1547 except ImportError:
1548 # Std modules, so should be around - but you never know!
1549 return 0
1550 try:
1551 internetSettings = _winreg.OpenKey(_winreg.HKEY_CURRENT_USER,
1552 r'Software\Microsoft\Windows\CurrentVersion\Internet Settings')
1553 proxyEnable = _winreg.QueryValueEx(internetSettings,
1554 'ProxyEnable')[0]
1555 proxyOverride = str(_winreg.QueryValueEx(internetSettings,
1556 'ProxyOverride')[0])
1557 # ^^^^ Returned as Unicode but problems if not converted to ASCII
1558 except WindowsError:
1559 return 0
1560 if not proxyEnable or not proxyOverride:
1561 return 0
1562 # try to make a host list from name and IP address.
1563 rawHost, port = splitport(host)
1564 host = [rawHost]
1565 try:
1566 addr = socket.gethostbyname(rawHost)
1567 if addr != rawHost:
1568 host.append(addr)
1569 except socket.error:
1570 pass
1571 try:
1572 fqdn = socket.getfqdn(rawHost)
1573 if fqdn != rawHost:
1574 host.append(fqdn)
1575 except socket.error:
1576 pass
1577 # make a check value list from the registry entry: replace the
1578 # '<local>' string by the localhost entry and the corresponding
1579 # canonical entry.
1580 proxyOverride = proxyOverride.split(';')
1581 # now check if we match one of the registry values.
1582 for test in proxyOverride:
1583 if test == '<local>':
1584 if '.' not in rawHost:
1585 return 1
1586 test = test.replace(".", r"\.") # mask dots
1587 test = test.replace("*", r".*") # change glob sequence
1588 test = test.replace("?", r".") # change glob char
1589 for val in host:
1590 # print "%s <--> %s" %( test, val )
1591 if re.match(test, val, re.I):
1592 return 1
1593 return 0
1594
1595 def proxy_bypass(host):
1596 """Return a dictionary of scheme -> proxy server URL mappings.
1597
1598 Returns settings gathered from the environment, if specified,
1599 or the registry.
1600
1601 """
1602 if getproxies_environment():
1603 return proxy_bypass_environment(host)
1604 else:
1605 return proxy_bypass_registry(host)
1606
1607else:
1608 # By default use environment variables
1609 getproxies = getproxies_environment
1610 proxy_bypass = proxy_bypass_environment
1611
1612# Test and time quote() and unquote()
1613def test1():
1614 s = ''
1615 for i in range(256): s = s + chr(i)
1616 s = s*4
1617 t0 = time.time()
1618 qs = quote(s)
1619 uqs = unquote(qs)
1620 t1 = time.time()
1621 if uqs != s:
1622 print 'Wrong!'
1623 print repr(s)
1624 print repr(qs)
1625 print repr(uqs)
1626 print round(t1 - t0, 3), 'sec'
1627
1628
1629def reporthook(blocknum, blocksize, totalsize):
1630 # Report during remote transfers
1631 print "Block number: %d, Block size: %d, Total size: %d" % (
1632 blocknum, blocksize, totalsize)
Note: See TracBrowser for help on using the repository browser.