Changeset 388 for python/vendor/current/Lib/zipfile.py
- Timestamp:
- Mar 19, 2014, 11:11:30 AM (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
python/vendor/current/Lib/zipfile.py
r2 r388 4 4 import struct, os, time, sys, shutil 5 5 import binascii, cStringIO, stat 6 import io 7 import re 8 import string 6 9 7 10 try: … … 129 132 _CD64_OFFSET_START_CENTDIR = 9 130 133 131 def is_zipfile(filename): 132 """Quickly see if file is a ZIP file by checking the magic number.""" 134 def _check_zipfile(fp): 133 135 try: 134 fpin = open(filename, "rb") 135 endrec = _EndRecData(fpin) 136 fpin.close() 137 if endrec: 138 return True # file has correct magic number 136 if _EndRecData(fp): 137 return True # file has correct magic number 139 138 except IOError: 140 139 pass 141 140 return False 141 142 def is_zipfile(filename): 143 """Quickly see if a file is a ZIP file by checking the magic number. 144 145 The filename argument may be a file or file-like object too. 146 """ 147 result = False 148 try: 149 if hasattr(filename, "read"): 150 result = _check_zipfile(fp=filename) 151 else: 152 with open(filename, "rb") as fp: 153 result = _check_zipfile(fp) 154 except IOError: 155 pass 156 return result 142 157 143 158 def _EndRecData64(fpin, offset, endrec): … … 145 160 Read the ZIP64 end-of-archive records and use that to update endrec 146 161 """ 147 fpin.seek(offset - sizeEndCentDir64Locator, 2) 162 try: 163 fpin.seek(offset - sizeEndCentDir64Locator, 2) 164 except IOError: 165 # If the seek fails, the file is not large enough to contain a ZIP64 166 # end-of-archive record, so just return the end record we were given. 167 return endrec 168 148 169 data = fpin.read(sizeEndCentDir64Locator) 170 if len(data) != sizeEndCentDir64Locator: 171 return endrec 149 172 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data) 150 173 if sig != stringEndArchive64Locator: … … 157 180 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2) 158 181 data = fpin.read(sizeEndCentDir64) 182 if len(data) != sizeEndCentDir64: 183 return endrec 159 184 sig, sz, create_version, read_version, disk_num, disk_dir, \ 160 185 dircount, dircount2, dirsize, diroffset = \ … … 192 217 return None 193 218 data = fpin.read() 194 if data[0:4] == stringEndArchive and data[-2:] == "\000\000": 219 if (len(data) == sizeEndCentDir and 220 data[0:4] == stringEndArchive and 221 data[-2:] == b"\000\000"): 195 222 # the signature is correct and there's no comment, unpack structure 196 223 endrec = struct.unpack(structEndArchive, data) … … 216 243 # found the magic number; attempt to unpack and interpret 217 244 recData = data[start:start+sizeEndCentDir] 245 if len(recData) != sizeEndCentDir: 246 # Zip file is corrupted. 247 return None 218 248 endrec = list(struct.unpack(structEndArchive, recData)) 219 comment = data[start+sizeEndCentDir:] 220 # check that comment length is correct 221 if endrec[_ECD_COMMENT_SIZE] == len(comment): 222 # Append the archive comment and start offset 223 endrec.append(comment) 224 endrec.append(maxCommentStart + start) 225 226 # Try to read the "Zip64 end of central directory" structure 227 return _EndRecData64(fpin, maxCommentStart + start - filesize, 228 endrec) 249 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file 250 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize] 251 endrec.append(comment) 252 endrec.append(maxCommentStart + start) 253 254 # Try to read the "Zip64 end of central directory" structure 255 return _EndRecData64(fpin, maxCommentStart + start - filesize, 256 endrec) 229 257 230 258 # Unable to find a valid end of central directory structure 231 return 259 return None 232 260 233 261 … … 273 301 self.filename = filename # Normalized file name 274 302 self.date_time = date_time # year, month, day, hour, min, sec 303 304 if date_time[0] < 1980: 305 raise ValueError('ZIP does not support timestamps before 1980') 306 275 307 # Standard values: 276 308 self.compress_type = ZIP_STORED # Type of compression for the file … … 295 327 # file_size Size of the uncompressed file 296 328 297 def FileHeader(self ):329 def FileHeader(self, zip64=None): 298 330 """Return the per-file header as a string.""" 299 331 dt = self.date_time … … 310 342 extra = self.extra 311 343 312 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:313 # File is larger than what fits into a 4 byte integer,314 # fall back to the ZIP64 extension344 if zip64 is None: 345 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT 346 if zip64: 315 347 fmt = '<HHQQ' 316 348 extra = extra + struct.pack(fmt, 317 349 1, struct.calcsize(fmt)-4, file_size, compress_size) 350 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT: 351 if not zip64: 352 raise LargeZipFile("Filesize would require ZIP64 extensions") 353 # File is larger than what fits into a 4 byte integer, 354 # fall back to the ZIP64 extension 318 355 file_size = 0xffffffff 319 356 compress_size = 0xffffffff … … 440 477 return c 441 478 442 class ZipExtFile: 479 480 compressor_names = { 481 0: 'store', 482 1: 'shrink', 483 2: 'reduce', 484 3: 'reduce', 485 4: 'reduce', 486 5: 'reduce', 487 6: 'implode', 488 7: 'tokenize', 489 8: 'deflate', 490 9: 'deflate64', 491 10: 'implode', 492 12: 'bzip2', 493 14: 'lzma', 494 18: 'terse', 495 19: 'lz77', 496 97: 'wavpack', 497 98: 'ppmd', 498 } 499 500 501 class ZipExtFile(io.BufferedIOBase): 443 502 """File-like object for reading an archive member. 444 503 Is returned by ZipFile.open(). 445 504 """ 446 505 447 def __init__(self, fileobj, zipinfo, decrypt=None): 448 self.fileobj = fileobj 449 self.decrypter = decrypt 450 self.bytes_read = 0L 451 self.rawbuffer = '' 452 self.readbuffer = '' 453 self.linebuffer = '' 454 self.eof = False 455 self.univ_newlines = False 456 self.nlSeps = ("\n", ) 457 self.lastdiscard = '' 458 459 self.compress_type = zipinfo.compress_type 460 self.compress_size = zipinfo.compress_size 461 462 self.closed = False 463 self.mode = "r" 506 # Max size supported by decompressor. 507 MAX_N = 1 << 31 - 1 508 509 # Read from compressed files in 4k blocks. 510 MIN_READ_SIZE = 4096 511 512 # Search for universal newlines or line chunks. 513 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)') 514 515 def __init__(self, fileobj, mode, zipinfo, decrypter=None, 516 close_fileobj=False): 517 self._fileobj = fileobj 518 self._decrypter = decrypter 519 self._close_fileobj = close_fileobj 520 521 self._compress_type = zipinfo.compress_type 522 self._compress_size = zipinfo.compress_size 523 self._compress_left = zipinfo.compress_size 524 525 if self._compress_type == ZIP_DEFLATED: 526 self._decompressor = zlib.decompressobj(-15) 527 elif self._compress_type != ZIP_STORED: 528 descr = compressor_names.get(self._compress_type) 529 if descr: 530 raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr)) 531 else: 532 raise NotImplementedError("compression type %d" % (self._compress_type,)) 533 self._unconsumed = '' 534 535 self._readbuffer = '' 536 self._offset = 0 537 538 self._universal = 'U' in mode 539 self.newlines = None 540 541 # Adjust read size for encrypted files since the first 12 bytes 542 # are for the encryption/password information. 543 if self._decrypter is not None: 544 self._compress_left -= 12 545 546 self.mode = mode 464 547 self.name = zipinfo.filename 465 548 466 # read from compressed files in 64k blocks 467 self.compreadsize = 64*1024 468 if self.compress_type == ZIP_DEFLATED: 469 self.dc = zlib.decompressobj(-15) 470 471 def set_univ_newlines(self, univ_newlines): 472 self.univ_newlines = univ_newlines 473 474 # pick line separator char(s) based on universal newlines flag 475 self.nlSeps = ("\n", ) 476 if self.univ_newlines: 477 self.nlSeps = ("\r\n", "\r", "\n") 478 479 def __iter__(self): 480 return self 481 482 def next(self): 483 nextline = self.readline() 484 if not nextline: 485 raise StopIteration() 486 487 return nextline 549 if hasattr(zipinfo, 'CRC'): 550 self._expected_crc = zipinfo.CRC 551 self._running_crc = crc32(b'') & 0xffffffff 552 else: 553 self._expected_crc = None 554 555 def readline(self, limit=-1): 556 """Read and return a line from the stream. 557 558 If limit is specified, at most limit bytes will be read. 559 """ 560 561 if not self._universal and limit < 0: 562 # Shortcut common case - newline found in buffer. 563 i = self._readbuffer.find('\n', self._offset) + 1 564 if i > 0: 565 line = self._readbuffer[self._offset: i] 566 self._offset = i 567 return line 568 569 if not self._universal: 570 return io.BufferedIOBase.readline(self, limit) 571 572 line = '' 573 while limit < 0 or len(line) < limit: 574 readahead = self.peek(2) 575 if readahead == '': 576 return line 577 578 # 579 # Search for universal newlines or line chunks. 580 # 581 # The pattern returns either a line chunk or a newline, but not 582 # both. Combined with peek(2), we are assured that the sequence 583 # '\r\n' is always retrieved completely and never split into 584 # separate newlines - '\r', '\n' due to coincidental readaheads. 585 # 586 match = self.PATTERN.search(readahead) 587 newline = match.group('newline') 588 if newline is not None: 589 if self.newlines is None: 590 self.newlines = [] 591 if newline not in self.newlines: 592 self.newlines.append(newline) 593 self._offset += len(newline) 594 return line + '\n' 595 596 chunk = match.group('chunk') 597 if limit >= 0: 598 chunk = chunk[: limit - len(line)] 599 600 self._offset += len(chunk) 601 line += chunk 602 603 return line 604 605 def peek(self, n=1): 606 """Returns buffered bytes without advancing the position.""" 607 if n > len(self._readbuffer) - self._offset: 608 chunk = self.read(n) 609 self._offset -= len(chunk) 610 611 # Return up to 512 bytes to reduce allocation overhead for tight loops. 612 return self._readbuffer[self._offset: self._offset + 512] 613 614 def readable(self): 615 return True 616 617 def read(self, n=-1): 618 """Read and return up to n bytes. 619 If the argument is omitted, None, or negative, data is read and returned until EOF is reached.. 620 """ 621 buf = '' 622 if n is None: 623 n = -1 624 while True: 625 if n < 0: 626 data = self.read1(n) 627 elif n > len(buf): 628 data = self.read1(n - len(buf)) 629 else: 630 return buf 631 if len(data) == 0: 632 return buf 633 buf += data 634 635 def _update_crc(self, newdata, eof): 636 # Update the CRC using the given data. 637 if self._expected_crc is None: 638 # No need to compute the CRC if we don't have a reference value 639 return 640 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff 641 # Check the CRC if we're at the end of the file 642 if eof and self._running_crc != self._expected_crc: 643 raise BadZipfile("Bad CRC-32 for file %r" % self.name) 644 645 def read1(self, n): 646 """Read up to n bytes with at most one read() system call.""" 647 648 # Simplify algorithm (branching) by transforming negative n to large n. 649 if n < 0 or n is None: 650 n = self.MAX_N 651 652 # Bytes available in read buffer. 653 len_readbuffer = len(self._readbuffer) - self._offset 654 655 # Read from file. 656 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed): 657 nbytes = n - len_readbuffer - len(self._unconsumed) 658 nbytes = max(nbytes, self.MIN_READ_SIZE) 659 nbytes = min(nbytes, self._compress_left) 660 661 data = self._fileobj.read(nbytes) 662 self._compress_left -= len(data) 663 664 if data and self._decrypter is not None: 665 data = ''.join(map(self._decrypter, data)) 666 667 if self._compress_type == ZIP_STORED: 668 self._update_crc(data, eof=(self._compress_left==0)) 669 self._readbuffer = self._readbuffer[self._offset:] + data 670 self._offset = 0 671 else: 672 # Prepare deflated bytes for decompression. 673 self._unconsumed += data 674 675 # Handle unconsumed data. 676 if (len(self._unconsumed) > 0 and n > len_readbuffer and 677 self._compress_type == ZIP_DEFLATED): 678 data = self._decompressor.decompress( 679 self._unconsumed, 680 max(n - len_readbuffer, self.MIN_READ_SIZE) 681 ) 682 683 self._unconsumed = self._decompressor.unconsumed_tail 684 eof = len(self._unconsumed) == 0 and self._compress_left == 0 685 if eof: 686 data += self._decompressor.flush() 687 688 self._update_crc(data, eof=eof) 689 self._readbuffer = self._readbuffer[self._offset:] + data 690 self._offset = 0 691 692 # Read from buffer. 693 data = self._readbuffer[self._offset: self._offset + n] 694 self._offset += len(data) 695 return data 488 696 489 697 def close(self): 490 self.closed = True 491 492 def _checkfornewline(self): 493 nl, nllen = -1, -1 494 if self.linebuffer: 495 # ugly check for cases where half of an \r\n pair was 496 # read on the last pass, and the \r was discarded. In this 497 # case we just throw away the \n at the start of the buffer. 498 if (self.lastdiscard, self.linebuffer[0]) == ('\r','\n'): 499 self.linebuffer = self.linebuffer[1:] 500 501 for sep in self.nlSeps: 502 nl = self.linebuffer.find(sep) 503 if nl >= 0: 504 nllen = len(sep) 505 return nl, nllen 506 507 return nl, nllen 508 509 def readline(self, size = -1): 510 """Read a line with approx. size. If size is negative, 511 read a whole line. 512 """ 513 if size < 0: 514 size = sys.maxint 515 elif size == 0: 516 return '' 517 518 # check for a newline already in buffer 519 nl, nllen = self._checkfornewline() 520 521 if nl >= 0: 522 # the next line was already in the buffer 523 nl = min(nl, size) 524 else: 525 # no line break in buffer - try to read more 526 size -= len(self.linebuffer) 527 while nl < 0 and size > 0: 528 buf = self.read(min(size, 100)) 529 if not buf: 530 break 531 self.linebuffer += buf 532 size -= len(buf) 533 534 # check for a newline in buffer 535 nl, nllen = self._checkfornewline() 536 537 # we either ran out of bytes in the file, or 538 # met the specified size limit without finding a newline, 539 # so return current buffer 540 if nl < 0: 541 s = self.linebuffer 542 self.linebuffer = '' 543 return s 544 545 buf = self.linebuffer[:nl] 546 self.lastdiscard = self.linebuffer[nl:nl + nllen] 547 self.linebuffer = self.linebuffer[nl + nllen:] 548 549 # line is always returned with \n as newline char (except possibly 550 # for a final incomplete line in the file, which is handled above). 551 return buf + "\n" 552 553 def readlines(self, sizehint = -1): 554 """Return a list with all (following) lines. The sizehint parameter 555 is ignored in this implementation. 556 """ 557 result = [] 558 while True: 559 line = self.readline() 560 if not line: break 561 result.append(line) 562 return result 563 564 def read(self, size = None): 565 # act like file() obj and return empty string if size is 0 566 if size == 0: 567 return '' 568 569 # determine read size 570 bytesToRead = self.compress_size - self.bytes_read 571 572 # adjust read size for encrypted files since the first 12 bytes 573 # are for the encryption/password information 574 if self.decrypter is not None: 575 bytesToRead -= 12 576 577 if size is not None and size >= 0: 578 if self.compress_type == ZIP_STORED: 579 lr = len(self.readbuffer) 580 bytesToRead = min(bytesToRead, size - lr) 581 elif self.compress_type == ZIP_DEFLATED: 582 if len(self.readbuffer) > size: 583 # the user has requested fewer bytes than we've already 584 # pulled through the decompressor; don't read any more 585 bytesToRead = 0 586 else: 587 # user will use up the buffer, so read some more 588 lr = len(self.rawbuffer) 589 bytesToRead = min(bytesToRead, self.compreadsize - lr) 590 591 # avoid reading past end of file contents 592 if bytesToRead + self.bytes_read > self.compress_size: 593 bytesToRead = self.compress_size - self.bytes_read 594 595 # try to read from file (if necessary) 596 if bytesToRead > 0: 597 bytes = self.fileobj.read(bytesToRead) 598 self.bytes_read += len(bytes) 599 self.rawbuffer += bytes 600 601 # handle contents of raw buffer 602 if self.rawbuffer: 603 newdata = self.rawbuffer 604 self.rawbuffer = '' 605 606 # decrypt new data if we were given an object to handle that 607 if newdata and self.decrypter is not None: 608 newdata = ''.join(map(self.decrypter, newdata)) 609 610 # decompress newly read data if necessary 611 if newdata and self.compress_type == ZIP_DEFLATED: 612 newdata = self.dc.decompress(newdata) 613 self.rawbuffer = self.dc.unconsumed_tail 614 if self.eof and len(self.rawbuffer) == 0: 615 # we're out of raw bytes (both from the file and 616 # the local buffer); flush just to make sure the 617 # decompressor is done 618 newdata += self.dc.flush() 619 # prevent decompressor from being used again 620 self.dc = None 621 622 self.readbuffer += newdata 623 624 625 # return what the user asked for 626 if size is None or len(self.readbuffer) <= size: 627 bytes = self.readbuffer 628 self.readbuffer = '' 629 else: 630 bytes = self.readbuffer[:size] 631 self.readbuffer = self.readbuffer[size:] 632 633 return bytes 634 635 636 class ZipFile: 698 try : 699 if self._close_fileobj: 700 self._fileobj.close() 701 finally: 702 super(ZipExtFile, self).close() 703 704 705 class ZipFile(object): 637 706 """ Class with methods to open, read, write, close, list zip files. 638 707 … … 673 742 self.mode = key = mode.replace('b', '')[0] 674 743 self.pwd = None 675 self. comment = ''744 self._comment = '' 676 745 677 746 # Check if we were passed a file-like object … … 693 762 self.filename = getattr(file, 'name', None) 694 763 695 if key == 'r': 696 self._GetContents() 697 elif key == 'w': 698 pass 699 elif key == 'a': 700 try: # See if file is a zip file 764 try: 765 if key == 'r': 701 766 self._RealGetContents() 702 # seek to start of directory and overwrite 703 self.fp.seek(self.start_dir, 0) 704 except BadZipfile: # file is not a zip file, just append 705 self.fp.seek(0, 2) 706 else: 767 elif key == 'w': 768 # set the modified flag so central directory gets written 769 # even if no files are added to the archive 770 self._didModify = True 771 elif key == 'a': 772 try: 773 # See if file is a zip file 774 self._RealGetContents() 775 # seek to start of directory and overwrite 776 self.fp.seek(self.start_dir, 0) 777 except BadZipfile: 778 # file is not a zip file, just append 779 self.fp.seek(0, 2) 780 781 # set the modified flag so central directory gets written 782 # even if no files are added to the archive 783 self._didModify = True 784 else: 785 raise RuntimeError('Mode must be "r", "w" or "a"') 786 except: 787 fp = self.fp 788 self.fp = None 707 789 if not self._filePassed: 708 self.fp.close() 709 self.fp = None 710 raise RuntimeError, 'Mode must be "r", "w" or "a"' 711 712 def _GetContents(self): 713 """Read the directory, making sure we close the file if the format 714 is bad.""" 715 try: 716 self._RealGetContents() 717 except BadZipfile: 718 if not self._filePassed: 719 self.fp.close() 720 self.fp = None 790 fp.close() 721 791 raise 792 793 def __enter__(self): 794 return self 795 796 def __exit__(self, type, value, traceback): 797 self.close() 722 798 723 799 def _RealGetContents(self): 724 800 """Read in the table of contents for the ZIP file.""" 725 801 fp = self.fp 726 endrec = _EndRecData(fp) 802 try: 803 endrec = _EndRecData(fp) 804 except IOError: 805 raise BadZipfile("File is not a zip file") 727 806 if not endrec: 728 807 raise BadZipfile, "File is not a zip file" … … 731 810 size_cd = endrec[_ECD_SIZE] # bytes in central directory 732 811 offset_cd = endrec[_ECD_OFFSET] # offset of central directory 733 self. comment = endrec[_ECD_COMMENT]# archive comment812 self._comment = endrec[_ECD_COMMENT] # archive comment 734 813 735 814 # "concat" is zero, unless zip was concatenated to another file … … 750 829 while total < size_cd: 751 830 centdir = fp.read(sizeCentralDir) 752 if centdir[0:4] != stringCentralDir:753 raise BadZipfile , "Bad magic number for central directory"831 if len(centdir) != sizeCentralDir: 832 raise BadZipfile("Truncated central directory") 754 833 centdir = struct.unpack(structCentralDir, centdir) 834 if centdir[_CD_SIGNATURE] != stringCentralDir: 835 raise BadZipfile("Bad magic number for central directory") 755 836 if self.debug > 2: 756 837 print centdir … … 811 892 # Read by chunks, to avoid an OverflowError or a 812 893 # MemoryError with very large embedded files. 813 f = self.open(zinfo.filename, "r")814 while f.read(chunk_size): # Check CRC-32815 pass894 with self.open(zinfo.filename, "r") as f: 895 while f.read(chunk_size): # Check CRC-32 896 pass 816 897 except BadZipfile: 817 898 return zinfo.filename … … 829 910 """Set default password for encrypted files.""" 830 911 self.pwd = pwd 912 913 @property 914 def comment(self): 915 """The comment text associated with the ZIP file.""" 916 return self._comment 917 918 @comment.setter 919 def comment(self, comment): 920 # check for valid comment length 921 if len(comment) >= ZIP_MAX_COMMENT: 922 if self.debug: 923 print('Archive comment is too long; truncating to %d bytes' 924 % ZIP_MAX_COMMENT) 925 comment = comment[:ZIP_MAX_COMMENT] 926 self._comment = comment 927 self._didModify = True 831 928 832 929 def read(self, name, pwd=None): … … 846 943 if self._filePassed: 847 944 zef_file = self.fp 945 should_close = False 848 946 else: 849 947 zef_file = open(self.filename, 'rb') 850 851 # Make sure we have an info object 852 if isinstance(name, ZipInfo): 853 # 'name' is already an info object 854 zinfo = name 855 else: 856 # Get info object for name 857 zinfo = self.getinfo(name) 858 859 zef_file.seek(zinfo.header_offset, 0) 860 861 # Skip the file header: 862 fheader = zef_file.read(sizeFileHeader) 863 if fheader[0:4] != stringFileHeader: 864 raise BadZipfile, "Bad magic number for file header" 865 866 fheader = struct.unpack(structFileHeader, fheader) 867 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 868 if fheader[_FH_EXTRA_FIELD_LENGTH]: 869 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 870 871 if fname != zinfo.orig_filename: 872 raise BadZipfile, \ 873 'File name in directory "%s" and header "%s" differ.' % ( 874 zinfo.orig_filename, fname) 875 876 # check for encrypted flag & handle password 877 is_encrypted = zinfo.flag_bits & 0x1 878 zd = None 879 if is_encrypted: 880 if not pwd: 881 pwd = self.pwd 882 if not pwd: 883 raise RuntimeError, "File %s is encrypted, " \ 884 "password required for extraction" % name 885 886 zd = _ZipDecrypter(pwd) 887 # The first 12 bytes in the cypher stream is an encryption header 888 # used to strengthen the algorithm. The first 11 bytes are 889 # completely random, while the 12th contains the MSB of the CRC, 890 # or the MSB of the file time depending on the header type 891 # and is used to check the correctness of the password. 892 bytes = zef_file.read(12) 893 h = map(zd, bytes[0:12]) 894 if zinfo.flag_bits & 0x8: 895 # compare against the file type from extended local headers 896 check_byte = (zinfo._raw_time >> 8) & 0xff 948 should_close = True 949 950 try: 951 # Make sure we have an info object 952 if isinstance(name, ZipInfo): 953 # 'name' is already an info object 954 zinfo = name 897 955 else: 898 # compare against the CRC otherwise 899 check_byte = (zinfo.CRC >> 24) & 0xff 900 if ord(h[11]) != check_byte: 901 raise RuntimeError("Bad password for file", name) 902 903 # build and return a ZipExtFile 904 if zd is None: 905 zef = ZipExtFile(zef_file, zinfo) 906 else: 907 zef = ZipExtFile(zef_file, zinfo, zd) 908 909 # set universal newlines on ZipExtFile if necessary 910 if "U" in mode: 911 zef.set_univ_newlines(True) 912 return zef 956 # Get info object for name 957 zinfo = self.getinfo(name) 958 959 zef_file.seek(zinfo.header_offset, 0) 960 961 # Skip the file header: 962 fheader = zef_file.read(sizeFileHeader) 963 if len(fheader) != sizeFileHeader: 964 raise BadZipfile("Truncated file header") 965 fheader = struct.unpack(structFileHeader, fheader) 966 if fheader[_FH_SIGNATURE] != stringFileHeader: 967 raise BadZipfile("Bad magic number for file header") 968 969 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH]) 970 if fheader[_FH_EXTRA_FIELD_LENGTH]: 971 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH]) 972 973 if fname != zinfo.orig_filename: 974 raise BadZipfile, \ 975 'File name in directory "%s" and header "%s" differ.' % ( 976 zinfo.orig_filename, fname) 977 978 # check for encrypted flag & handle password 979 is_encrypted = zinfo.flag_bits & 0x1 980 zd = None 981 if is_encrypted: 982 if not pwd: 983 pwd = self.pwd 984 if not pwd: 985 raise RuntimeError, "File %s is encrypted, " \ 986 "password required for extraction" % name 987 988 zd = _ZipDecrypter(pwd) 989 # The first 12 bytes in the cypher stream is an encryption header 990 # used to strengthen the algorithm. The first 11 bytes are 991 # completely random, while the 12th contains the MSB of the CRC, 992 # or the MSB of the file time depending on the header type 993 # and is used to check the correctness of the password. 994 bytes = zef_file.read(12) 995 h = map(zd, bytes[0:12]) 996 if zinfo.flag_bits & 0x8: 997 # compare against the file type from extended local headers 998 check_byte = (zinfo._raw_time >> 8) & 0xff 999 else: 1000 # compare against the CRC otherwise 1001 check_byte = (zinfo.CRC >> 24) & 0xff 1002 if ord(h[11]) != check_byte: 1003 raise RuntimeError("Bad password for file", name) 1004 1005 return ZipExtFile(zef_file, mode, zinfo, zd, 1006 close_fileobj=should_close) 1007 except: 1008 if should_close: 1009 zef_file.close() 1010 raise 913 1011 914 1012 def extract(self, member, path=None, pwd=None): … … 944 1042 # build the destination pathname, replacing 945 1043 # forward slashes to platform specific separators. 946 # Strip trailing path separator, unless it represents the root. 947 if (targetpath[-1:] in (os.path.sep, os.path.altsep) 948 and len(os.path.splitdrive(targetpath)[1]) > 1): 949 targetpath = targetpath[:-1] 950 951 # don't include leading "/" from file name if present 952 if member.filename[0] == '/': 953 targetpath = os.path.join(targetpath, member.filename[1:]) 954 else: 955 targetpath = os.path.join(targetpath, member.filename) 956 1044 arcname = member.filename.replace('/', os.path.sep) 1045 1046 if os.path.altsep: 1047 arcname = arcname.replace(os.path.altsep, os.path.sep) 1048 # interpret absolute pathname as relative, remove drive letter or 1049 # UNC path, redundant separators, "." and ".." components. 1050 arcname = os.path.splitdrive(arcname)[1] 1051 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep) 1052 if x not in ('', os.path.curdir, os.path.pardir)) 1053 if os.path.sep == '\\': 1054 # filter illegal characters on Windows 1055 illegal = ':<>|"?*' 1056 if isinstance(arcname, unicode): 1057 table = {ord(c): ord('_') for c in illegal} 1058 else: 1059 table = string.maketrans(illegal, '_' * len(illegal)) 1060 arcname = arcname.translate(table) 1061 # remove trailing dots 1062 arcname = (x.rstrip('.') for x in arcname.split(os.path.sep)) 1063 arcname = os.path.sep.join(x for x in arcname if x) 1064 1065 targetpath = os.path.join(targetpath, arcname) 957 1066 targetpath = os.path.normpath(targetpath) 958 1067 … … 967 1076 return targetpath 968 1077 969 source = self.open(member, pwd=pwd) 970 target = file(targetpath, "wb") 971 shutil.copyfileobj(source, target) 972 source.close() 973 target.close() 1078 with self.open(member, pwd=pwd) as source, \ 1079 file(targetpath, "wb") as target: 1080 shutil.copyfileobj(source, target) 974 1081 975 1082 return targetpath … … 1037 1144 self.filelist.append(zinfo) 1038 1145 self.NameToInfo[zinfo.filename] = zinfo 1039 self.fp.write(zinfo.FileHeader( ))1146 self.fp.write(zinfo.FileHeader(False)) 1040 1147 return 1041 1148 1042 fp = open(filename, "rb") 1043 # Must overwrite CRC and sizes with correct data later 1044 zinfo.CRC = CRC = 0 1045 zinfo.compress_size = compress_size = 0 1046 zinfo.file_size = file_size = 0 1047 self.fp.write(zinfo.FileHeader()) 1048 if zinfo.compress_type == ZIP_DEFLATED: 1049 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, 1050 zlib.DEFLATED, -15) 1051 else: 1052 cmpr = None 1053 while 1: 1054 buf = fp.read(1024 * 8) 1055 if not buf: 1056 break 1057 file_size = file_size + len(buf) 1058 CRC = crc32(buf, CRC) & 0xffffffff 1059 if cmpr: 1060 buf = cmpr.compress(buf) 1061 compress_size = compress_size + len(buf) 1062 self.fp.write(buf) 1063 fp.close() 1149 with open(filename, "rb") as fp: 1150 # Must overwrite CRC and sizes with correct data later 1151 zinfo.CRC = CRC = 0 1152 zinfo.compress_size = compress_size = 0 1153 # Compressed size can be larger than uncompressed size 1154 zip64 = self._allowZip64 and \ 1155 zinfo.file_size * 1.05 > ZIP64_LIMIT 1156 self.fp.write(zinfo.FileHeader(zip64)) 1157 if zinfo.compress_type == ZIP_DEFLATED: 1158 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION, 1159 zlib.DEFLATED, -15) 1160 else: 1161 cmpr = None 1162 file_size = 0 1163 while 1: 1164 buf = fp.read(1024 * 8) 1165 if not buf: 1166 break 1167 file_size = file_size + len(buf) 1168 CRC = crc32(buf, CRC) & 0xffffffff 1169 if cmpr: 1170 buf = cmpr.compress(buf) 1171 compress_size = compress_size + len(buf) 1172 self.fp.write(buf) 1064 1173 if cmpr: 1065 1174 buf = cmpr.flush() … … 1071 1180 zinfo.CRC = CRC 1072 1181 zinfo.file_size = file_size 1073 # Seek backwards and write CRC and file sizes 1182 if not zip64 and self._allowZip64: 1183 if file_size > ZIP64_LIMIT: 1184 raise RuntimeError('File size has increased during compressing') 1185 if compress_size > ZIP64_LIMIT: 1186 raise RuntimeError('Compressed size larger than uncompressed size') 1187 # Seek backwards and write file header (which will now include 1188 # correct CRC and file sizes) 1074 1189 position = self.fp.tell() # Preserve current position in file 1075 self.fp.seek(zinfo.header_offset + 14, 0) 1076 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size, 1077 zinfo.file_size)) 1190 self.fp.seek(zinfo.header_offset, 0) 1191 self.fp.write(zinfo.FileHeader(zip64)) 1078 1192 self.fp.seek(position, 0) 1079 1193 self.filelist.append(zinfo) 1080 1194 self.NameToInfo[zinfo.filename] = zinfo 1081 1195 1082 def writestr(self, zinfo_or_arcname, bytes ):1196 def writestr(self, zinfo_or_arcname, bytes, compress_type=None): 1083 1197 """Write a file into the archive. The contents is the string 1084 1198 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or … … 1087 1201 zinfo = ZipInfo(filename=zinfo_or_arcname, 1088 1202 date_time=time.localtime(time.time())[:6]) 1203 1089 1204 zinfo.compress_type = self.compression 1090 1205 zinfo.external_attr = 0600 << 16 … … 1095 1210 raise RuntimeError( 1096 1211 "Attempt to write to ZIP archive that was already closed") 1212 1213 if compress_type is not None: 1214 zinfo.compress_type = compress_type 1097 1215 1098 1216 zinfo.file_size = len(bytes) # Uncompressed size … … 1108 1226 else: 1109 1227 zinfo.compress_size = zinfo.file_size 1110 zinfo.header_offset = self.fp.tell() # Start of header bytes 1111 self.fp.write(zinfo.FileHeader()) 1228 zip64 = zinfo.file_size > ZIP64_LIMIT or \ 1229 zinfo.compress_size > ZIP64_LIMIT 1230 if zip64 and not self._allowZip64: 1231 raise LargeZipFile("Filesize would require ZIP64 extensions") 1232 self.fp.write(zinfo.FileHeader(zip64)) 1112 1233 self.fp.write(bytes) 1113 self.fp.flush()1114 1234 if zinfo.flag_bits & 0x08: 1115 1235 # Write CRC and file sizes after the file data 1116 self.fp.write(struct.pack("<LLL", zinfo.CRC, zinfo.compress_size, 1236 fmt = '<LQQ' if zip64 else '<LLL' 1237 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size, 1117 1238 zinfo.file_size)) 1239 self.fp.flush() 1118 1240 self.filelist.append(zinfo) 1119 1241 self.NameToInfo[zinfo.filename] = zinfo … … 1129 1251 return 1130 1252 1131 if self.mode in ("w", "a") and self._didModify: # write ending records 1132 count = 0 1133 pos1 = self.fp.tell() 1134 for zinfo in self.filelist: # write central directory 1135 count = count + 1 1136 dt = zinfo.date_time 1137 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 1138 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 1139 extra = [] 1140 if zinfo.file_size > ZIP64_LIMIT \ 1141 or zinfo.compress_size > ZIP64_LIMIT: 1142 extra.append(zinfo.file_size) 1143 extra.append(zinfo.compress_size) 1144 file_size = 0xffffffff 1145 compress_size = 0xffffffff 1146 else: 1147 file_size = zinfo.file_size 1148 compress_size = zinfo.compress_size 1149 1150 if zinfo.header_offset > ZIP64_LIMIT: 1151 extra.append(zinfo.header_offset) 1152 header_offset = 0xffffffffL 1153 else: 1154 header_offset = zinfo.header_offset 1155 1156 extra_data = zinfo.extra 1157 if extra: 1158 # Append a ZIP64 field to the extra's 1159 extra_data = struct.pack( 1160 '<HH' + 'Q'*len(extra), 1161 1, 8*len(extra), *extra) + extra_data 1162 1163 extract_version = max(45, zinfo.extract_version) 1164 create_version = max(45, zinfo.create_version) 1165 else: 1166 extract_version = zinfo.extract_version 1167 create_version = zinfo.create_version 1168 1169 try: 1170 filename, flag_bits = zinfo._encodeFilenameFlags() 1171 centdir = struct.pack(structCentralDir, 1172 stringCentralDir, create_version, 1173 zinfo.create_system, extract_version, zinfo.reserved, 1174 flag_bits, zinfo.compress_type, dostime, dosdate, 1175 zinfo.CRC, compress_size, file_size, 1176 len(filename), len(extra_data), len(zinfo.comment), 1177 0, zinfo.internal_attr, zinfo.external_attr, 1178 header_offset) 1179 except DeprecationWarning: 1180 print >>sys.stderr, (structCentralDir, 1181 stringCentralDir, create_version, 1182 zinfo.create_system, extract_version, zinfo.reserved, 1183 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, 1184 zinfo.CRC, compress_size, file_size, 1185 len(zinfo.filename), len(extra_data), len(zinfo.comment), 1186 0, zinfo.internal_attr, zinfo.external_attr, 1187 header_offset) 1188 raise 1189 self.fp.write(centdir) 1190 self.fp.write(filename) 1191 self.fp.write(extra_data) 1192 self.fp.write(zinfo.comment) 1193 1194 pos2 = self.fp.tell() 1195 # Write end-of-zip-archive record 1196 centDirCount = count 1197 centDirSize = pos2 - pos1 1198 centDirOffset = pos1 1199 if (centDirCount >= ZIP_FILECOUNT_LIMIT or 1200 centDirOffset > ZIP64_LIMIT or 1201 centDirSize > ZIP64_LIMIT): 1202 # Need to write the ZIP64 end-of-archive records 1203 zip64endrec = struct.pack( 1204 structEndArchive64, stringEndArchive64, 1205 44, 45, 45, 0, 0, centDirCount, centDirCount, 1206 centDirSize, centDirOffset) 1207 self.fp.write(zip64endrec) 1208 1209 zip64locrec = struct.pack( 1210 structEndArchive64Locator, 1211 stringEndArchive64Locator, 0, pos2, 1) 1212 self.fp.write(zip64locrec) 1213 centDirCount = min(centDirCount, 0xFFFF) 1214 centDirSize = min(centDirSize, 0xFFFFFFFF) 1215 centDirOffset = min(centDirOffset, 0xFFFFFFFF) 1216 1217 # check for valid comment length 1218 if len(self.comment) >= ZIP_MAX_COMMENT: 1219 if self.debug > 0: 1220 msg = 'Archive comment is too long; truncating to %d bytes' \ 1221 % ZIP_MAX_COMMENT 1222 self.comment = self.comment[:ZIP_MAX_COMMENT] 1223 1224 endrec = struct.pack(structEndArchive, stringEndArchive, 1225 0, 0, centDirCount, centDirCount, 1226 centDirSize, centDirOffset, len(self.comment)) 1227 self.fp.write(endrec) 1228 self.fp.write(self.comment) 1229 self.fp.flush() 1230 1231 if not self._filePassed: 1232 self.fp.close() 1233 self.fp = None 1253 try: 1254 if self.mode in ("w", "a") and self._didModify: # write ending records 1255 count = 0 1256 pos1 = self.fp.tell() 1257 for zinfo in self.filelist: # write central directory 1258 count = count + 1 1259 dt = zinfo.date_time 1260 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2] 1261 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2) 1262 extra = [] 1263 if zinfo.file_size > ZIP64_LIMIT \ 1264 or zinfo.compress_size > ZIP64_LIMIT: 1265 extra.append(zinfo.file_size) 1266 extra.append(zinfo.compress_size) 1267 file_size = 0xffffffff 1268 compress_size = 0xffffffff 1269 else: 1270 file_size = zinfo.file_size 1271 compress_size = zinfo.compress_size 1272 1273 if zinfo.header_offset > ZIP64_LIMIT: 1274 extra.append(zinfo.header_offset) 1275 header_offset = 0xffffffffL 1276 else: 1277 header_offset = zinfo.header_offset 1278 1279 extra_data = zinfo.extra 1280 if extra: 1281 # Append a ZIP64 field to the extra's 1282 extra_data = struct.pack( 1283 '<HH' + 'Q'*len(extra), 1284 1, 8*len(extra), *extra) + extra_data 1285 1286 extract_version = max(45, zinfo.extract_version) 1287 create_version = max(45, zinfo.create_version) 1288 else: 1289 extract_version = zinfo.extract_version 1290 create_version = zinfo.create_version 1291 1292 try: 1293 filename, flag_bits = zinfo._encodeFilenameFlags() 1294 centdir = struct.pack(structCentralDir, 1295 stringCentralDir, create_version, 1296 zinfo.create_system, extract_version, zinfo.reserved, 1297 flag_bits, zinfo.compress_type, dostime, dosdate, 1298 zinfo.CRC, compress_size, file_size, 1299 len(filename), len(extra_data), len(zinfo.comment), 1300 0, zinfo.internal_attr, zinfo.external_attr, 1301 header_offset) 1302 except DeprecationWarning: 1303 print >>sys.stderr, (structCentralDir, 1304 stringCentralDir, create_version, 1305 zinfo.create_system, extract_version, zinfo.reserved, 1306 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate, 1307 zinfo.CRC, compress_size, file_size, 1308 len(zinfo.filename), len(extra_data), len(zinfo.comment), 1309 0, zinfo.internal_attr, zinfo.external_attr, 1310 header_offset) 1311 raise 1312 self.fp.write(centdir) 1313 self.fp.write(filename) 1314 self.fp.write(extra_data) 1315 self.fp.write(zinfo.comment) 1316 1317 pos2 = self.fp.tell() 1318 # Write end-of-zip-archive record 1319 centDirCount = count 1320 centDirSize = pos2 - pos1 1321 centDirOffset = pos1 1322 if (centDirCount >= ZIP_FILECOUNT_LIMIT or 1323 centDirOffset > ZIP64_LIMIT or 1324 centDirSize > ZIP64_LIMIT): 1325 # Need to write the ZIP64 end-of-archive records 1326 zip64endrec = struct.pack( 1327 structEndArchive64, stringEndArchive64, 1328 44, 45, 45, 0, 0, centDirCount, centDirCount, 1329 centDirSize, centDirOffset) 1330 self.fp.write(zip64endrec) 1331 1332 zip64locrec = struct.pack( 1333 structEndArchive64Locator, 1334 stringEndArchive64Locator, 0, pos2, 1) 1335 self.fp.write(zip64locrec) 1336 centDirCount = min(centDirCount, 0xFFFF) 1337 centDirSize = min(centDirSize, 0xFFFFFFFF) 1338 centDirOffset = min(centDirOffset, 0xFFFFFFFF) 1339 1340 endrec = struct.pack(structEndArchive, stringEndArchive, 1341 0, 0, centDirCount, centDirCount, 1342 centDirSize, centDirOffset, len(self._comment)) 1343 self.fp.write(endrec) 1344 self.fp.write(self._comment) 1345 self.fp.flush() 1346 finally: 1347 fp = self.fp 1348 self.fp = None 1349 if not self._filePassed: 1350 fp.close() 1234 1351 1235 1352 … … 1353 1470 print USAGE 1354 1471 sys.exit(1) 1355 zf = ZipFile(args[1], 'r') 1356 zf.printdir() 1357 zf.close() 1472 with ZipFile(args[1], 'r') as zf: 1473 zf.printdir() 1358 1474 1359 1475 elif args[0] == '-t': … … 1361 1477 print USAGE 1362 1478 sys.exit(1) 1363 zf = ZipFile(args[1], 'r') 1364 zf.testzip() 1479 with ZipFile(args[1], 'r') as zf: 1480 badfile = zf.testzip() 1481 if badfile: 1482 print("The following enclosed file is corrupted: {!r}".format(badfile)) 1365 1483 print "Done testing" 1366 1484 … … 1370 1488 sys.exit(1) 1371 1489 1372 zf = ZipFile(args[1], 'r') 1373 out = args[2] 1374 for path in zf.namelist(): 1375 if path.startswith('./'): 1376 tgt = os.path.join(out, path[2:]) 1377 else: 1378 tgt = os.path.join(out, path) 1379 1380 tgtdir = os.path.dirname(tgt) 1381 if not os.path.exists(tgtdir): 1382 os.makedirs(tgtdir) 1383 fp = open(tgt, 'wb') 1384 fp.write(zf.read(path)) 1385 fp.close() 1386 zf.close() 1490 with ZipFile(args[1], 'r') as zf: 1491 out = args[2] 1492 for path in zf.namelist(): 1493 if path.startswith('./'): 1494 tgt = os.path.join(out, path[2:]) 1495 else: 1496 tgt = os.path.join(out, path) 1497 1498 tgtdir = os.path.dirname(tgt) 1499 if not os.path.exists(tgtdir): 1500 os.makedirs(tgtdir) 1501 with open(tgt, 'wb') as fp: 1502 fp.write(zf.read(path)) 1387 1503 1388 1504 elif args[0] == '-c': … … 1400 1516 # else: ignore 1401 1517 1402 zf = ZipFile(args[1], 'w', allowZip64=True) 1403 for src in args[2:]: 1404 addToZip(zf, src, os.path.basename(src)) 1405 1406 zf.close() 1518 with ZipFile(args[1], 'w', allowZip64=True) as zf: 1519 for src in args[2:]: 1520 addToZip(zf, src, os.path.basename(src)) 1407 1521 1408 1522 if __name__ == "__main__":
Note:
See TracChangeset
for help on using the changeset viewer.