Changeset 388 for python/vendor/current/Lib/urllib.py
- Timestamp:
- Mar 19, 2014, 11:11:30 AM (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
python/vendor/current/Lib/urllib.py
r2 r388 28 28 import time 29 29 import sys 30 import base64 31 import re 32 30 33 from urlparse import urljoin as basejoin 31 34 … … 43 46 44 47 # Helper for non-unix systems 45 if os.name == 'mac': 46 from macurl2path import url2pathname, pathname2url 47 elif os.name == 'nt': 48 if os.name == 'nt': 48 49 from nturl2path import url2pathname, pathname2url 49 50 elif os.name == 'riscos': … … 95 96 if _urlopener: 96 97 _urlopener.cleanup() 98 _safe_quoters.clear() 99 ftpcache.clear() 97 100 98 101 # check for SSL … … 176 179 """Use URLopener().open(file) instead of open(file, 'r').""" 177 180 fullurl = unwrap(toBytes(fullurl)) 178 # percent encode url . fixing lame server errors like space within url179 # parts181 # percent encode url, fixing lame server errors for e.g, like space 182 # within url paths. 180 183 fullurl = quote(fullurl, safe="%/:=&?~#+!$,;'@()*[]|") 181 184 if self.tempcache and fullurl in self.tempcache: … … 231 234 fp = self.open_local_file(url1) 232 235 hdrs = fp.info() 233 del fp236 fp.close() 234 237 return url2pathname(splithost(url1)[1]), hdrs 235 except IOError , msg:238 except IOError: 236 239 pass 237 240 fp = self.open(url, data) … … 258 261 read = 0 259 262 blocknum = 0 263 if "content-length" in headers: 264 size = int(headers["Content-Length"]) 260 265 if reporthook: 261 if "content-length" in headers:262 size = int(headers["Content-Length"])263 266 reporthook(blocknum, bs, size) 264 267 while 1: … … 275 278 finally: 276 279 fp.close() 277 del fp278 del tfp279 280 280 281 # raise exception if actual size does not match content-length header … … 321 322 322 323 if proxy_passwd: 323 import base64324 proxy_passwd = unquote(proxy_passwd) 324 325 proxy_auth = base64.b64encode(proxy_passwd).strip() 325 326 else: … … 327 328 328 329 if user_passwd: 329 import base64330 user_passwd = unquote(user_passwd) 330 331 auth = base64.b64encode(user_passwd).strip() 331 332 else: … … 342 343 if realhost: h.putheader('Host', realhost) 343 344 for args in self.addheaders: h.putheader(*args) 344 h.endheaders() 345 if data is not None: 346 h.send(data) 345 h.endheaders(data) 347 346 errcode, errmsg, headers = h.getreply() 348 347 fp = h.getfile() … … 379 378 def http_error_default(self, url, fp, errcode, errmsg, headers): 380 379 """Default error handler: close the connection and raise IOError.""" 381 void = fp.read()382 380 fp.close() 383 381 raise IOError, ('http error', errcode, errmsg, headers) … … 414 412 if not host: raise IOError, ('https error', 'no host given') 415 413 if proxy_passwd: 416 import base64414 proxy_passwd = unquote(proxy_passwd) 417 415 proxy_auth = base64.b64encode(proxy_passwd).strip() 418 416 else: 419 417 proxy_auth = None 420 418 if user_passwd: 421 import base64419 user_passwd = unquote(user_passwd) 422 420 auth = base64.b64encode(user_passwd).strip() 423 421 else: … … 437 435 if realhost: h.putheader('Host', realhost) 438 436 for args in self.addheaders: h.putheader(*args) 439 h.endheaders() 440 if data is not None: 441 h.send(data) 437 h.endheaders(data) 442 438 errcode, errmsg, headers = h.getreply() 443 439 fp = h.getfile() … … 490 486 if file[:1] == '/': 491 487 urlfile = 'file://' + file 488 elif file[:2] == './': 489 raise ValueError("local file url may start with / or file:. Unknown url of type: %s" % url) 492 490 return addinfourl(open(localname, 'rb'), 493 491 headers, urlfile) … … 518 516 else: passwd = None 519 517 host = unquote(host) 520 user = u nquote(user or '')521 passwd = unquote(passwd or '')518 user = user or '' 519 passwd = passwd or '' 522 520 host = socket.gethostbyname(host) 523 521 if not port: … … 593 591 encoding = '' 594 592 msg = [] 595 msg.append('Date: %s'%time.strftime('%a, %d %b %Y % TGMT',593 msg.append('Date: %s'%time.strftime('%a, %d %b %Y %H:%M:%S GMT', 596 594 time.gmtime(time.time()))) 597 595 msg.append('Content-type: %s' % type) 598 596 if encoding == 'base64': 599 import base64600 597 data = base64.decodestring(data) 601 598 else: … … 647 644 else: 648 645 return 649 void = fp.read()650 646 fp.close() 651 647 # In case the server sent a relative URL, join with original: 652 648 newurl = basejoin(self.type + ":" + url, newurl) 649 650 # For security reasons we do not allow redirects to protocols 651 # other than HTTP, HTTPS or FTP. 652 newurl_lower = newurl.lower() 653 if not (newurl_lower.startswith('http://') or 654 newurl_lower.startswith('https://') or 655 newurl_lower.startswith('ftp://')): 656 raise IOError('redirect error', errcode, 657 errmsg + " - Redirection to url '%s' is not allowed" % 658 newurl, 659 headers) 660 653 661 return self.open(newurl) 654 662 … … 772 780 return self.open(newurl, data) 773 781 774 def get_user_passwd(self, host, realm, clear_cache =0):782 def get_user_passwd(self, host, realm, clear_cache=0): 775 783 key = realm + '@' + host.lower() 776 784 if key in self.auth_cache: … … 812 820 global _thishost 813 821 if _thishost is None: 814 _thishost = socket.gethostbyname(socket.gethostname()) 822 try: 823 _thishost = socket.gethostbyname(socket.gethostname()) 824 except socket.gaierror: 825 _thishost = socket.gethostbyname('localhost') 815 826 return _thishost 816 827 … … 845 856 846 857 def __init__(self, user, passwd, host, port, dirs, 847 timeout=socket._GLOBAL_DEFAULT_TIMEOUT): 858 timeout=socket._GLOBAL_DEFAULT_TIMEOUT, 859 persistent=True): 848 860 self.user = user 849 861 self.passwd = passwd … … 852 864 self.dirs = dirs 853 865 self.timeout = timeout 866 self.refcount = 0 867 self.keepalive = persistent 854 868 self.init() 855 869 … … 860 874 self.ftp.connect(self.host, self.port, self.timeout) 861 875 self.ftp.login(self.user, self.passwd) 862 for dir in self.dirs:863 self.ftp.cwd(dir)876 _target = '/'.join(self.dirs) 877 self.ftp.cwd(_target) 864 878 865 879 def retrfile(self, file, type): … … 878 892 try: 879 893 cmd = 'RETR ' + file 880 conn = self.ftp.ntransfercmd(cmd)894 conn, retrlen = self.ftp.ntransfercmd(cmd) 881 895 except ftplib.error_perm, reason: 882 896 if str(reason)[:3] != '550': … … 898 912 else: 899 913 cmd = 'LIST' 900 conn = self.ftp.ntransfercmd(cmd)914 conn, retrlen = self.ftp.ntransfercmd(cmd) 901 915 self.busy = 1 916 ftpobj = addclosehook(conn.makefile('rb'), self.file_close) 917 self.refcount += 1 918 conn.close() 902 919 # Pass back both a suitably decorated object and a retrieval length 903 return ( addclosehook(conn[0].makefile('rb'),904 self.endtransfer), conn[1]) 920 return (ftpobj, retrlen) 921 905 922 def endtransfer(self): 906 923 if not self.busy: … … 913 930 914 931 def close(self): 932 self.keepalive = False 933 if self.refcount <= 0: 934 self.real_close() 935 936 def file_close(self): 937 self.endtransfer() 938 self.refcount -= 1 939 if self.refcount <= 0 and not self.keepalive: 940 self.real_close() 941 942 def real_close(self): 915 943 self.endtransfer() 916 944 try: … … 957 985 958 986 def close(self): 959 addbase.close(self)960 987 if self.closehook: 961 988 self.closehook(*self.hookargs) 962 989 self.closehook = None 963 990 self.hookargs = None 991 addbase.close(self) 964 992 965 993 class addinfo(addbase): … … 1059 1087 1060 1088 match = _hostprog.match(url) 1061 if match: return match.group(1, 2) 1089 if match: 1090 host_port = match.group(1) 1091 path = match.group(2) 1092 if path and not path.startswith('/'): 1093 path = '/' + path 1094 return host_port, path 1062 1095 return None, url 1063 1096 … … 1071 1104 1072 1105 match = _userprog.match(host) 1073 if match: return ma p(unquote, match.group(1, 2))1106 if match: return match.group(1, 2) 1074 1107 return None, host 1075 1108 … … 1080 1113 if _passwdprog is None: 1081 1114 import re 1082 _passwdprog = re.compile('^([^:]*):(.*)$' )1115 _passwdprog = re.compile('^([^:]*):(.*)$',re.S) 1083 1116 1084 1117 match = _passwdprog.match(user) … … 1163 1196 return attr, None 1164 1197 1165 _hextochr = dict(('%02x' % i, chr(i)) for i in range(256)) 1166 _hextochr.update(('%02X' % i, chr(i)) for i in range(256)) 1198 # urlparse contains a duplicate of this method to avoid a circular import. If 1199 # you update this method, also update the copy in urlparse. This code 1200 # duplication does not exist in Python3. 1201 1202 _hexdig = '0123456789ABCDEFabcdef' 1203 _hextochr = dict((a + b, chr(int(a + b, 16))) 1204 for a in _hexdig for b in _hexdig) 1205 _asciire = re.compile('([\x00-\x7f]+)') 1167 1206 1168 1207 def unquote(s): 1169 1208 """unquote('abc%20def') -> 'abc def'.""" 1170 res = s.split('%') 1171 for i in xrange(1, len(res)): 1172 item = res[i] 1173 try: 1174 res[i] = _hextochr[item[:2]] + item[2:] 1209 if _is_unicode(s): 1210 if '%' not in s: 1211 return s 1212 bits = _asciire.split(s) 1213 res = [bits[0]] 1214 append = res.append 1215 for i in range(1, len(bits), 2): 1216 append(unquote(str(bits[i])).decode('latin1')) 1217 append(bits[i + 1]) 1218 return ''.join(res) 1219 1220 bits = s.split('%') 1221 # fastpath 1222 if len(bits) == 1: 1223 return s 1224 res = [bits[0]] 1225 append = res.append 1226 for item in bits[1:]: 1227 try: 1228 append(_hextochr[item[:2]]) 1229 append(item[2:]) 1175 1230 except KeyError: 1176 res[i] = '%' + item 1177 except UnicodeDecodeError: 1178 res[i] = unichr(int(item[:2], 16)) + item[2:] 1179 return "".join(res) 1231 append('%') 1232 append(item) 1233 return ''.join(res) 1180 1234 1181 1235 def unquote_plus(s): … … 1187 1241 'abcdefghijklmnopqrstuvwxyz' 1188 1242 '0123456789' '_.-') 1189 _safemaps = {} 1190 1191 def quote(s, safe = '/'): 1243 _safe_map = {} 1244 for i, c in zip(xrange(256), str(bytearray(xrange(256)))): 1245 _safe_map[c] = c if (i < 128 and c in always_safe) else '%{:02X}'.format(i) 1246 _safe_quoters = {} 1247 1248 def quote(s, safe='/'): 1192 1249 """quote('abc def') -> 'abc%20def' 1193 1250 … … 1210 1267 reserved characters. 1211 1268 """ 1269 # fastpath 1270 if not s: 1271 if s is None: 1272 raise TypeError('None object cannot be quoted') 1273 return s 1212 1274 cachekey = (safe, always_safe) 1213 1275 try: 1214 safe_map = _safemaps[cachekey]1276 (quoter, safe) = _safe_quoters[cachekey] 1215 1277 except KeyError: 1216 safe += always_safe1217 safe_map = {}1218 for i in range(256):1219 c = chr(i)1220 safe_map[c] = (c in safe) and c or ('%%%02X' % i)1221 _safemaps[cachekey] = safe_map1222 res = map(safe_map.__getitem__, s)1223 return ''.join( res)1224 1225 def quote_plus(s, safe =''):1278 safe_map = _safe_map.copy() 1279 safe_map.update([(c, c) for c in safe]) 1280 quoter = safe_map.__getitem__ 1281 safe = always_safe + safe 1282 _safe_quoters[cachekey] = (quoter, safe) 1283 if not s.rstrip(safe): 1284 return s 1285 return ''.join(map(quoter, s)) 1286 1287 def quote_plus(s, safe=''): 1226 1288 """Quote the query fragment of a URL; replacing ' ' with '+'""" 1227 1289 if ' ' in s: … … 1230 1292 return quote(s, safe) 1231 1293 1232 def urlencode(query, doseq=0):1294 def urlencode(query, doseq=0): 1233 1295 """Encode a sequence of two-element tuples or dictionary into a URL query string. 1234 1296 … … 1282 1344 try: 1283 1345 # is this a sufficient test for sequence-ness? 1284 x =len(v)1346 len(v) 1285 1347 except TypeError: 1286 1348 # not a sequence … … 1323 1385 hostonly, port = splitport(host) 1324 1386 # check if the host ends with any of the DNS suffixes 1325 for name in no_proxy.split(','): 1387 no_proxy_list = [proxy.strip() for proxy in no_proxy.split(',')] 1388 for name in no_proxy_list: 1326 1389 if name and (hostonly.endswith(name) or host.endswith(name)): 1327 1390 return 1 … … 1376 1439 1377 1440 base = ip2num(m.group(1)) 1378 mask = int(m.group(2)[1:]) 1441 mask = m.group(2) 1442 if mask is None: 1443 mask = 8 * (m.group(1).count('.') + 1) 1444 1445 else: 1446 mask = int(mask[1:]) 1379 1447 mask = 32 - mask 1380 1448 … … 1386 1454 1387 1455 return False 1388 1389 1456 1390 1457 def getproxies_macosx_sysconf(): … … 1395 1462 """ 1396 1463 return _get_proxies() 1397 1398 1399 1464 1400 1465 def proxy_bypass(host): … … 1444 1509 else: 1445 1510 proxies['http'] = 'http://%s' % proxyServer 1511 proxies['https'] = 'https://%s' % proxyServer 1446 1512 proxies['ftp'] = 'ftp://%s' % proxyServer 1447 1513 internetSettings.Close() … … 1500 1566 # canonical entry. 1501 1567 proxyOverride = proxyOverride.split(';') 1502 i = 01503 while i < len(proxyOverride):1504 if proxyOverride[i] == '<local>':1505 proxyOverride[i:i+1] = ['localhost',1506 '127.0.0.1',1507 socket.gethostname(),1508 socket.gethostbyname(1509 socket.gethostname())]1510 i += 11511 # print proxyOverride1512 1568 # now check if we match one of the registry values. 1513 1569 for test in proxyOverride: 1570 if test == '<local>': 1571 if '.' not in rawHost: 1572 return 1 1514 1573 test = test.replace(".", r"\.") # mask dots 1515 1574 test = test.replace("*", r".*") # change glob sequence … … 1559 1618 print "Block number: %d, Block size: %d, Total size: %d" % ( 1560 1619 blocknum, blocksize, totalsize) 1561 1562 # Test program1563 def test(args=[]):1564 if not args:1565 args = [1566 '/etc/passwd',1567 'file:/etc/passwd',1568 'file://localhost/etc/passwd',1569 'ftp://ftp.gnu.org/pub/README',1570 'http://www.python.org/index.html',1571 ]1572 if hasattr(URLopener, "open_https"):1573 args.append('https://synergy.as.cmu.edu/~geek/')1574 try:1575 for url in args:1576 print '-'*10, url, '-'*101577 fn, h = urlretrieve(url, None, reporthook)1578 print fn1579 if h:1580 print '======'1581 for k in h.keys(): print k + ':', h[k]1582 print '======'1583 fp = open(fn, 'rb')1584 data = fp.read()1585 del fp1586 if '\r' in data:1587 table = string.maketrans("", "")1588 data = data.translate(table, "\r")1589 print data1590 fn, h = None, None1591 print '-'*401592 finally:1593 urlcleanup()1594 1595 def main():1596 import getopt, sys1597 try:1598 opts, args = getopt.getopt(sys.argv[1:], "th")1599 except getopt.error, msg:1600 print msg1601 print "Use -h for help"1602 return1603 t = 01604 for o, a in opts:1605 if o == '-t':1606 t = t + 11607 if o == '-h':1608 print "Usage: python urllib.py [-t] [url ...]"1609 print "-t runs self-test;",1610 print "otherwise, contents of urls are printed"1611 return1612 if t:1613 if t > 1:1614 test1()1615 test(args)1616 else:1617 if not args:1618 print "Use -h for help"1619 for url in args:1620 print urlopen(url).read(),1621 1622 # Run test program when run as a script1623 if __name__ == '__main__':1624 main()
Note:
See TracChangeset
for help on using the changeset viewer.