source: python/trunk/Lib/zipfile.py@ 611

Last change on this file since 611 was 391, checked in by dmik, 11 years ago

python: Merge vendor 2.7.6 to trunk.

  • Property svn:eol-style set to native
File size: 56.5 KB
Line 
1"""
2Read and write ZIP files.
3"""
4import struct, os, time, sys, shutil
5import binascii, cStringIO, stat
6import io
7import re
8import string
9
10try:
11 import zlib # We may need its compression method
12 crc32 = zlib.crc32
13except ImportError:
14 zlib = None
15 crc32 = binascii.crc32
16
17__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
18 "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
19
20class BadZipfile(Exception):
21 pass
22
23
24class LargeZipFile(Exception):
25 """
26 Raised when writing a zipfile, the zipfile requires ZIP64 extensions
27 and those extensions are disabled.
28 """
29
30error = BadZipfile # The exception raised by this module
31
32ZIP64_LIMIT = (1 << 31) - 1
33ZIP_FILECOUNT_LIMIT = 1 << 16
34ZIP_MAX_COMMENT = (1 << 16) - 1
35
36# constants for Zip file compression methods
37ZIP_STORED = 0
38ZIP_DEFLATED = 8
39# Other ZIP compression methods not supported
40
41# Below are some formats and associated data for reading/writing headers using
42# the struct module. The names and structures of headers/records are those used
43# in the PKWARE description of the ZIP file format:
44# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
45# (URL valid as of January 2008)
46
47# The "end of central directory" structure, magic number, size, and indices
48# (section V.I in the format document)
49structEndArchive = "<4s4H2LH"
50stringEndArchive = "PK\005\006"
51sizeEndCentDir = struct.calcsize(structEndArchive)
52
53_ECD_SIGNATURE = 0
54_ECD_DISK_NUMBER = 1
55_ECD_DISK_START = 2
56_ECD_ENTRIES_THIS_DISK = 3
57_ECD_ENTRIES_TOTAL = 4
58_ECD_SIZE = 5
59_ECD_OFFSET = 6
60_ECD_COMMENT_SIZE = 7
61# These last two indices are not part of the structure as defined in the
62# spec, but they are used internally by this module as a convenience
63_ECD_COMMENT = 8
64_ECD_LOCATION = 9
65
66# The "central directory" structure, magic number, size, and indices
67# of entries in the structure (section V.F in the format document)
68structCentralDir = "<4s4B4HL2L5H2L"
69stringCentralDir = "PK\001\002"
70sizeCentralDir = struct.calcsize(structCentralDir)
71
72# indexes of entries in the central directory structure
73_CD_SIGNATURE = 0
74_CD_CREATE_VERSION = 1
75_CD_CREATE_SYSTEM = 2
76_CD_EXTRACT_VERSION = 3
77_CD_EXTRACT_SYSTEM = 4
78_CD_FLAG_BITS = 5
79_CD_COMPRESS_TYPE = 6
80_CD_TIME = 7
81_CD_DATE = 8
82_CD_CRC = 9
83_CD_COMPRESSED_SIZE = 10
84_CD_UNCOMPRESSED_SIZE = 11
85_CD_FILENAME_LENGTH = 12
86_CD_EXTRA_FIELD_LENGTH = 13
87_CD_COMMENT_LENGTH = 14
88_CD_DISK_NUMBER_START = 15
89_CD_INTERNAL_FILE_ATTRIBUTES = 16
90_CD_EXTERNAL_FILE_ATTRIBUTES = 17
91_CD_LOCAL_HEADER_OFFSET = 18
92
93# The "local file header" structure, magic number, size, and indices
94# (section V.A in the format document)
95structFileHeader = "<4s2B4HL2L2H"
96stringFileHeader = "PK\003\004"
97sizeFileHeader = struct.calcsize(structFileHeader)
98
99_FH_SIGNATURE = 0
100_FH_EXTRACT_VERSION = 1
101_FH_EXTRACT_SYSTEM = 2
102_FH_GENERAL_PURPOSE_FLAG_BITS = 3
103_FH_COMPRESSION_METHOD = 4
104_FH_LAST_MOD_TIME = 5
105_FH_LAST_MOD_DATE = 6
106_FH_CRC = 7
107_FH_COMPRESSED_SIZE = 8
108_FH_UNCOMPRESSED_SIZE = 9
109_FH_FILENAME_LENGTH = 10
110_FH_EXTRA_FIELD_LENGTH = 11
111
112# The "Zip64 end of central directory locator" structure, magic number, and size
113structEndArchive64Locator = "<4sLQL"
114stringEndArchive64Locator = "PK\x06\x07"
115sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
116
117# The "Zip64 end of central directory" record, magic number, size, and indices
118# (section V.G in the format document)
119structEndArchive64 = "<4sQ2H2L4Q"
120stringEndArchive64 = "PK\x06\x06"
121sizeEndCentDir64 = struct.calcsize(structEndArchive64)
122
123_CD64_SIGNATURE = 0
124_CD64_DIRECTORY_RECSIZE = 1
125_CD64_CREATE_VERSION = 2
126_CD64_EXTRACT_VERSION = 3
127_CD64_DISK_NUMBER = 4
128_CD64_DISK_NUMBER_START = 5
129_CD64_NUMBER_ENTRIES_THIS_DISK = 6
130_CD64_NUMBER_ENTRIES_TOTAL = 7
131_CD64_DIRECTORY_SIZE = 8
132_CD64_OFFSET_START_CENTDIR = 9
133
134def _check_zipfile(fp):
135 try:
136 if _EndRecData(fp):
137 return True # file has correct magic number
138 except IOError:
139 pass
140 return False
141
142def is_zipfile(filename):
143 """Quickly see if a file is a ZIP file by checking the magic number.
144
145 The filename argument may be a file or file-like object too.
146 """
147 result = False
148 try:
149 if hasattr(filename, "read"):
150 result = _check_zipfile(fp=filename)
151 else:
152 with open(filename, "rb") as fp:
153 result = _check_zipfile(fp)
154 except IOError:
155 pass
156 return result
157
158def _EndRecData64(fpin, offset, endrec):
159 """
160 Read the ZIP64 end-of-archive records and use that to update endrec
161 """
162 try:
163 fpin.seek(offset - sizeEndCentDir64Locator, 2)
164 except IOError:
165 # If the seek fails, the file is not large enough to contain a ZIP64
166 # end-of-archive record, so just return the end record we were given.
167 return endrec
168
169 data = fpin.read(sizeEndCentDir64Locator)
170 if len(data) != sizeEndCentDir64Locator:
171 return endrec
172 sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
173 if sig != stringEndArchive64Locator:
174 return endrec
175
176 if diskno != 0 or disks != 1:
177 raise BadZipfile("zipfiles that span multiple disks are not supported")
178
179 # Assume no 'zip64 extensible data'
180 fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
181 data = fpin.read(sizeEndCentDir64)
182 if len(data) != sizeEndCentDir64:
183 return endrec
184 sig, sz, create_version, read_version, disk_num, disk_dir, \
185 dircount, dircount2, dirsize, diroffset = \
186 struct.unpack(structEndArchive64, data)
187 if sig != stringEndArchive64:
188 return endrec
189
190 # Update the original endrec using data from the ZIP64 record
191 endrec[_ECD_SIGNATURE] = sig
192 endrec[_ECD_DISK_NUMBER] = disk_num
193 endrec[_ECD_DISK_START] = disk_dir
194 endrec[_ECD_ENTRIES_THIS_DISK] = dircount
195 endrec[_ECD_ENTRIES_TOTAL] = dircount2
196 endrec[_ECD_SIZE] = dirsize
197 endrec[_ECD_OFFSET] = diroffset
198 return endrec
199
200
201def _EndRecData(fpin):
202 """Return data from the "End of Central Directory" record, or None.
203
204 The data is a list of the nine items in the ZIP "End of central dir"
205 record followed by a tenth item, the file seek offset of this record."""
206
207 # Determine file size
208 fpin.seek(0, 2)
209 filesize = fpin.tell()
210
211 # Check to see if this is ZIP file with no archive comment (the
212 # "end of central directory" structure should be the last item in the
213 # file if this is the case).
214 try:
215 fpin.seek(-sizeEndCentDir, 2)
216 except IOError:
217 return None
218 data = fpin.read()
219 if (len(data) == sizeEndCentDir and
220 data[0:4] == stringEndArchive and
221 data[-2:] == b"\000\000"):
222 # the signature is correct and there's no comment, unpack structure
223 endrec = struct.unpack(structEndArchive, data)
224 endrec=list(endrec)
225
226 # Append a blank comment and record start offset
227 endrec.append("")
228 endrec.append(filesize - sizeEndCentDir)
229
230 # Try to read the "Zip64 end of central directory" structure
231 return _EndRecData64(fpin, -sizeEndCentDir, endrec)
232
233 # Either this is not a ZIP file, or it is a ZIP file with an archive
234 # comment. Search the end of the file for the "end of central directory"
235 # record signature. The comment is the last item in the ZIP file and may be
236 # up to 64K long. It is assumed that the "end of central directory" magic
237 # number does not appear in the comment.
238 maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
239 fpin.seek(maxCommentStart, 0)
240 data = fpin.read()
241 start = data.rfind(stringEndArchive)
242 if start >= 0:
243 # found the magic number; attempt to unpack and interpret
244 recData = data[start:start+sizeEndCentDir]
245 if len(recData) != sizeEndCentDir:
246 # Zip file is corrupted.
247 return None
248 endrec = list(struct.unpack(structEndArchive, recData))
249 commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
250 comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
251 endrec.append(comment)
252 endrec.append(maxCommentStart + start)
253
254 # Try to read the "Zip64 end of central directory" structure
255 return _EndRecData64(fpin, maxCommentStart + start - filesize,
256 endrec)
257
258 # Unable to find a valid end of central directory structure
259 return None
260
261
262class ZipInfo (object):
263 """Class with attributes describing each file in the ZIP archive."""
264
265 __slots__ = (
266 'orig_filename',
267 'filename',
268 'date_time',
269 'compress_type',
270 'comment',
271 'extra',
272 'create_system',
273 'create_version',
274 'extract_version',
275 'reserved',
276 'flag_bits',
277 'volume',
278 'internal_attr',
279 'external_attr',
280 'header_offset',
281 'CRC',
282 'compress_size',
283 'file_size',
284 '_raw_time',
285 )
286
287 def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
288 self.orig_filename = filename # Original file name in archive
289
290 # Terminate the file name at the first null byte. Null bytes in file
291 # names are used as tricks by viruses in archives.
292 null_byte = filename.find(chr(0))
293 if null_byte >= 0:
294 filename = filename[0:null_byte]
295 # This is used to ensure paths in generated ZIP files always use
296 # forward slashes as the directory separator, as required by the
297 # ZIP format specification.
298 if os.sep != "/" and os.sep in filename:
299 filename = filename.replace(os.sep, "/")
300
301 self.filename = filename # Normalized file name
302 self.date_time = date_time # year, month, day, hour, min, sec
303
304 if date_time[0] < 1980:
305 raise ValueError('ZIP does not support timestamps before 1980')
306
307 # Standard values:
308 self.compress_type = ZIP_STORED # Type of compression for the file
309 self.comment = "" # Comment for each file
310 self.extra = "" # ZIP extra data
311 if sys.platform == 'win32':
312 self.create_system = 0 # System which created ZIP archive
313 else:
314 # Assume everything else is unix-y
315 self.create_system = 3 # System which created ZIP archive
316 self.create_version = 20 # Version which created ZIP archive
317 self.extract_version = 20 # Version needed to extract archive
318 self.reserved = 0 # Must be zero
319 self.flag_bits = 0 # ZIP flag bits
320 self.volume = 0 # Volume number of file header
321 self.internal_attr = 0 # Internal attributes
322 self.external_attr = 0 # External file attributes
323 # Other attributes are set by class ZipFile:
324 # header_offset Byte offset to the file header
325 # CRC CRC-32 of the uncompressed file
326 # compress_size Size of the compressed file
327 # file_size Size of the uncompressed file
328
329 def FileHeader(self, zip64=None):
330 """Return the per-file header as a string."""
331 dt = self.date_time
332 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
333 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
334 if self.flag_bits & 0x08:
335 # Set these to zero because we write them after the file data
336 CRC = compress_size = file_size = 0
337 else:
338 CRC = self.CRC
339 compress_size = self.compress_size
340 file_size = self.file_size
341
342 extra = self.extra
343
344 if zip64 is None:
345 zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
346 if zip64:
347 fmt = '<HHQQ'
348 extra = extra + struct.pack(fmt,
349 1, struct.calcsize(fmt)-4, file_size, compress_size)
350 if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
351 if not zip64:
352 raise LargeZipFile("Filesize would require ZIP64 extensions")
353 # File is larger than what fits into a 4 byte integer,
354 # fall back to the ZIP64 extension
355 file_size = 0xffffffff
356 compress_size = 0xffffffff
357 self.extract_version = max(45, self.extract_version)
358 self.create_version = max(45, self.extract_version)
359
360 filename, flag_bits = self._encodeFilenameFlags()
361 header = struct.pack(structFileHeader, stringFileHeader,
362 self.extract_version, self.reserved, flag_bits,
363 self.compress_type, dostime, dosdate, CRC,
364 compress_size, file_size,
365 len(filename), len(extra))
366 return header + filename + extra
367
368 def _encodeFilenameFlags(self):
369 if isinstance(self.filename, unicode):
370 try:
371 return self.filename.encode('ascii'), self.flag_bits
372 except UnicodeEncodeError:
373 return self.filename.encode('utf-8'), self.flag_bits | 0x800
374 else:
375 return self.filename, self.flag_bits
376
377 def _decodeFilename(self):
378 if self.flag_bits & 0x800:
379 return self.filename.decode('utf-8')
380 else:
381 return self.filename
382
383 def _decodeExtra(self):
384 # Try to decode the extra field.
385 extra = self.extra
386 unpack = struct.unpack
387 while extra:
388 tp, ln = unpack('<HH', extra[:4])
389 if tp == 1:
390 if ln >= 24:
391 counts = unpack('<QQQ', extra[4:28])
392 elif ln == 16:
393 counts = unpack('<QQ', extra[4:20])
394 elif ln == 8:
395 counts = unpack('<Q', extra[4:12])
396 elif ln == 0:
397 counts = ()
398 else:
399 raise RuntimeError, "Corrupt extra field %s"%(ln,)
400
401 idx = 0
402
403 # ZIP64 extension (large files and/or large archives)
404 if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
405 self.file_size = counts[idx]
406 idx += 1
407
408 if self.compress_size == 0xFFFFFFFFL:
409 self.compress_size = counts[idx]
410 idx += 1
411
412 if self.header_offset == 0xffffffffL:
413 old = self.header_offset
414 self.header_offset = counts[idx]
415 idx+=1
416
417 extra = extra[ln+4:]
418
419
420class _ZipDecrypter:
421 """Class to handle decryption of files stored within a ZIP archive.
422
423 ZIP supports a password-based form of encryption. Even though known
424 plaintext attacks have been found against it, it is still useful
425 to be able to get data out of such a file.
426
427 Usage:
428 zd = _ZipDecrypter(mypwd)
429 plain_char = zd(cypher_char)
430 plain_text = map(zd, cypher_text)
431 """
432
433 def _GenerateCRCTable():
434 """Generate a CRC-32 table.
435
436 ZIP encryption uses the CRC32 one-byte primitive for scrambling some
437 internal keys. We noticed that a direct implementation is faster than
438 relying on binascii.crc32().
439 """
440 poly = 0xedb88320
441 table = [0] * 256
442 for i in range(256):
443 crc = i
444 for j in range(8):
445 if crc & 1:
446 crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
447 else:
448 crc = ((crc >> 1) & 0x7FFFFFFF)
449 table[i] = crc
450 return table
451 crctable = _GenerateCRCTable()
452
453 def _crc32(self, ch, crc):
454 """Compute the CRC32 primitive on one byte."""
455 return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
456
457 def __init__(self, pwd):
458 self.key0 = 305419896
459 self.key1 = 591751049
460 self.key2 = 878082192
461 for p in pwd:
462 self._UpdateKeys(p)
463
464 def _UpdateKeys(self, c):
465 self.key0 = self._crc32(c, self.key0)
466 self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
467 self.key1 = (self.key1 * 134775813 + 1) & 4294967295
468 self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
469
470 def __call__(self, c):
471 """Decrypt a single character."""
472 c = ord(c)
473 k = self.key2 | 2
474 c = c ^ (((k * (k^1)) >> 8) & 255)
475 c = chr(c)
476 self._UpdateKeys(c)
477 return c
478
479
480compressor_names = {
481 0: 'store',
482 1: 'shrink',
483 2: 'reduce',
484 3: 'reduce',
485 4: 'reduce',
486 5: 'reduce',
487 6: 'implode',
488 7: 'tokenize',
489 8: 'deflate',
490 9: 'deflate64',
491 10: 'implode',
492 12: 'bzip2',
493 14: 'lzma',
494 18: 'terse',
495 19: 'lz77',
496 97: 'wavpack',
497 98: 'ppmd',
498}
499
500
501class ZipExtFile(io.BufferedIOBase):
502 """File-like object for reading an archive member.
503 Is returned by ZipFile.open().
504 """
505
506 # Max size supported by decompressor.
507 MAX_N = 1 << 31 - 1
508
509 # Read from compressed files in 4k blocks.
510 MIN_READ_SIZE = 4096
511
512 # Search for universal newlines or line chunks.
513 PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)|(?P<newline>\n|\r\n?)')
514
515 def __init__(self, fileobj, mode, zipinfo, decrypter=None,
516 close_fileobj=False):
517 self._fileobj = fileobj
518 self._decrypter = decrypter
519 self._close_fileobj = close_fileobj
520
521 self._compress_type = zipinfo.compress_type
522 self._compress_size = zipinfo.compress_size
523 self._compress_left = zipinfo.compress_size
524
525 if self._compress_type == ZIP_DEFLATED:
526 self._decompressor = zlib.decompressobj(-15)
527 elif self._compress_type != ZIP_STORED:
528 descr = compressor_names.get(self._compress_type)
529 if descr:
530 raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
531 else:
532 raise NotImplementedError("compression type %d" % (self._compress_type,))
533 self._unconsumed = ''
534
535 self._readbuffer = ''
536 self._offset = 0
537
538 self._universal = 'U' in mode
539 self.newlines = None
540
541 # Adjust read size for encrypted files since the first 12 bytes
542 # are for the encryption/password information.
543 if self._decrypter is not None:
544 self._compress_left -= 12
545
546 self.mode = mode
547 self.name = zipinfo.filename
548
549 if hasattr(zipinfo, 'CRC'):
550 self._expected_crc = zipinfo.CRC
551 self._running_crc = crc32(b'') & 0xffffffff
552 else:
553 self._expected_crc = None
554
555 def readline(self, limit=-1):
556 """Read and return a line from the stream.
557
558 If limit is specified, at most limit bytes will be read.
559 """
560
561 if not self._universal and limit < 0:
562 # Shortcut common case - newline found in buffer.
563 i = self._readbuffer.find('\n', self._offset) + 1
564 if i > 0:
565 line = self._readbuffer[self._offset: i]
566 self._offset = i
567 return line
568
569 if not self._universal:
570 return io.BufferedIOBase.readline(self, limit)
571
572 line = ''
573 while limit < 0 or len(line) < limit:
574 readahead = self.peek(2)
575 if readahead == '':
576 return line
577
578 #
579 # Search for universal newlines or line chunks.
580 #
581 # The pattern returns either a line chunk or a newline, but not
582 # both. Combined with peek(2), we are assured that the sequence
583 # '\r\n' is always retrieved completely and never split into
584 # separate newlines - '\r', '\n' due to coincidental readaheads.
585 #
586 match = self.PATTERN.search(readahead)
587 newline = match.group('newline')
588 if newline is not None:
589 if self.newlines is None:
590 self.newlines = []
591 if newline not in self.newlines:
592 self.newlines.append(newline)
593 self._offset += len(newline)
594 return line + '\n'
595
596 chunk = match.group('chunk')
597 if limit >= 0:
598 chunk = chunk[: limit - len(line)]
599
600 self._offset += len(chunk)
601 line += chunk
602
603 return line
604
605 def peek(self, n=1):
606 """Returns buffered bytes without advancing the position."""
607 if n > len(self._readbuffer) - self._offset:
608 chunk = self.read(n)
609 self._offset -= len(chunk)
610
611 # Return up to 512 bytes to reduce allocation overhead for tight loops.
612 return self._readbuffer[self._offset: self._offset + 512]
613
614 def readable(self):
615 return True
616
617 def read(self, n=-1):
618 """Read and return up to n bytes.
619 If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
620 """
621 buf = ''
622 if n is None:
623 n = -1
624 while True:
625 if n < 0:
626 data = self.read1(n)
627 elif n > len(buf):
628 data = self.read1(n - len(buf))
629 else:
630 return buf
631 if len(data) == 0:
632 return buf
633 buf += data
634
635 def _update_crc(self, newdata, eof):
636 # Update the CRC using the given data.
637 if self._expected_crc is None:
638 # No need to compute the CRC if we don't have a reference value
639 return
640 self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
641 # Check the CRC if we're at the end of the file
642 if eof and self._running_crc != self._expected_crc:
643 raise BadZipfile("Bad CRC-32 for file %r" % self.name)
644
645 def read1(self, n):
646 """Read up to n bytes with at most one read() system call."""
647
648 # Simplify algorithm (branching) by transforming negative n to large n.
649 if n < 0 or n is None:
650 n = self.MAX_N
651
652 # Bytes available in read buffer.
653 len_readbuffer = len(self._readbuffer) - self._offset
654
655 # Read from file.
656 if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
657 nbytes = n - len_readbuffer - len(self._unconsumed)
658 nbytes = max(nbytes, self.MIN_READ_SIZE)
659 nbytes = min(nbytes, self._compress_left)
660
661 data = self._fileobj.read(nbytes)
662 self._compress_left -= len(data)
663
664 if data and self._decrypter is not None:
665 data = ''.join(map(self._decrypter, data))
666
667 if self._compress_type == ZIP_STORED:
668 self._update_crc(data, eof=(self._compress_left==0))
669 self._readbuffer = self._readbuffer[self._offset:] + data
670 self._offset = 0
671 else:
672 # Prepare deflated bytes for decompression.
673 self._unconsumed += data
674
675 # Handle unconsumed data.
676 if (len(self._unconsumed) > 0 and n > len_readbuffer and
677 self._compress_type == ZIP_DEFLATED):
678 data = self._decompressor.decompress(
679 self._unconsumed,
680 max(n - len_readbuffer, self.MIN_READ_SIZE)
681 )
682
683 self._unconsumed = self._decompressor.unconsumed_tail
684 eof = len(self._unconsumed) == 0 and self._compress_left == 0
685 if eof:
686 data += self._decompressor.flush()
687
688 self._update_crc(data, eof=eof)
689 self._readbuffer = self._readbuffer[self._offset:] + data
690 self._offset = 0
691
692 # Read from buffer.
693 data = self._readbuffer[self._offset: self._offset + n]
694 self._offset += len(data)
695 return data
696
697 def close(self):
698 try :
699 if self._close_fileobj:
700 self._fileobj.close()
701 finally:
702 super(ZipExtFile, self).close()
703
704
705class ZipFile(object):
706 """ Class with methods to open, read, write, close, list zip files.
707
708 z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
709
710 file: Either the path to the file, or a file-like object.
711 If it is a path, the file will be opened and closed by ZipFile.
712 mode: The mode can be either read "r", write "w" or append "a".
713 compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
714 allowZip64: if True ZipFile will create files with ZIP64 extensions when
715 needed, otherwise it will raise an exception when this would
716 be necessary.
717
718 """
719
720 fp = None # Set here since __del__ checks it
721
722 def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
723 """Open the ZIP file with mode read "r", write "w" or append "a"."""
724 if mode not in ("r", "w", "a"):
725 raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
726
727 if compression == ZIP_STORED:
728 pass
729 elif compression == ZIP_DEFLATED:
730 if not zlib:
731 raise RuntimeError,\
732 "Compression requires the (missing) zlib module"
733 else:
734 raise RuntimeError, "That compression method is not supported"
735
736 self._allowZip64 = allowZip64
737 self._didModify = False
738 self.debug = 0 # Level of printing: 0 through 3
739 self.NameToInfo = {} # Find file info given name
740 self.filelist = [] # List of ZipInfo instances for archive
741 self.compression = compression # Method of compression
742 self.mode = key = mode.replace('b', '')[0]
743 self.pwd = None
744 self._comment = ''
745
746 # Check if we were passed a file-like object
747 if isinstance(file, basestring):
748 self._filePassed = 0
749 self.filename = file
750 modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
751 try:
752 self.fp = open(file, modeDict[mode])
753 except IOError:
754 if mode == 'a':
755 mode = key = 'w'
756 self.fp = open(file, modeDict[mode])
757 else:
758 raise
759 else:
760 self._filePassed = 1
761 self.fp = file
762 self.filename = getattr(file, 'name', None)
763
764 try:
765 if key == 'r':
766 self._RealGetContents()
767 elif key == 'w':
768 # set the modified flag so central directory gets written
769 # even if no files are added to the archive
770 self._didModify = True
771 elif key == 'a':
772 try:
773 # See if file is a zip file
774 self._RealGetContents()
775 # seek to start of directory and overwrite
776 self.fp.seek(self.start_dir, 0)
777 except BadZipfile:
778 # file is not a zip file, just append
779 self.fp.seek(0, 2)
780
781 # set the modified flag so central directory gets written
782 # even if no files are added to the archive
783 self._didModify = True
784 else:
785 raise RuntimeError('Mode must be "r", "w" or "a"')
786 except:
787 fp = self.fp
788 self.fp = None
789 if not self._filePassed:
790 fp.close()
791 raise
792
793 def __enter__(self):
794 return self
795
796 def __exit__(self, type, value, traceback):
797 self.close()
798
799 def _RealGetContents(self):
800 """Read in the table of contents for the ZIP file."""
801 fp = self.fp
802 try:
803 endrec = _EndRecData(fp)
804 except IOError:
805 raise BadZipfile("File is not a zip file")
806 if not endrec:
807 raise BadZipfile, "File is not a zip file"
808 if self.debug > 1:
809 print endrec
810 size_cd = endrec[_ECD_SIZE] # bytes in central directory
811 offset_cd = endrec[_ECD_OFFSET] # offset of central directory
812 self._comment = endrec[_ECD_COMMENT] # archive comment
813
814 # "concat" is zero, unless zip was concatenated to another file
815 concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
816 if endrec[_ECD_SIGNATURE] == stringEndArchive64:
817 # If Zip64 extension structures are present, account for them
818 concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
819
820 if self.debug > 2:
821 inferred = concat + offset_cd
822 print "given, inferred, offset", offset_cd, inferred, concat
823 # self.start_dir: Position of start of central directory
824 self.start_dir = offset_cd + concat
825 fp.seek(self.start_dir, 0)
826 data = fp.read(size_cd)
827 fp = cStringIO.StringIO(data)
828 total = 0
829 while total < size_cd:
830 centdir = fp.read(sizeCentralDir)
831 if len(centdir) != sizeCentralDir:
832 raise BadZipfile("Truncated central directory")
833 centdir = struct.unpack(structCentralDir, centdir)
834 if centdir[_CD_SIGNATURE] != stringCentralDir:
835 raise BadZipfile("Bad magic number for central directory")
836 if self.debug > 2:
837 print centdir
838 filename = fp.read(centdir[_CD_FILENAME_LENGTH])
839 # Create ZipInfo instance to store file information
840 x = ZipInfo(filename)
841 x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
842 x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
843 x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
844 (x.create_version, x.create_system, x.extract_version, x.reserved,
845 x.flag_bits, x.compress_type, t, d,
846 x.CRC, x.compress_size, x.file_size) = centdir[1:12]
847 x.volume, x.internal_attr, x.external_attr = centdir[15:18]
848 # Convert date/time code to (year, month, day, hour, min, sec)
849 x._raw_time = t
850 x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
851 t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
852
853 x._decodeExtra()
854 x.header_offset = x.header_offset + concat
855 x.filename = x._decodeFilename()
856 self.filelist.append(x)
857 self.NameToInfo[x.filename] = x
858
859 # update total bytes read from central directory
860 total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
861 + centdir[_CD_EXTRA_FIELD_LENGTH]
862 + centdir[_CD_COMMENT_LENGTH])
863
864 if self.debug > 2:
865 print "total", total
866
867
868 def namelist(self):
869 """Return a list of file names in the archive."""
870 l = []
871 for data in self.filelist:
872 l.append(data.filename)
873 return l
874
875 def infolist(self):
876 """Return a list of class ZipInfo instances for files in the
877 archive."""
878 return self.filelist
879
880 def printdir(self):
881 """Print a table of contents for the zip file."""
882 print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
883 for zinfo in self.filelist:
884 date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
885 print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
886
887 def testzip(self):
888 """Read all the files and check the CRC."""
889 chunk_size = 2 ** 20
890 for zinfo in self.filelist:
891 try:
892 # Read by chunks, to avoid an OverflowError or a
893 # MemoryError with very large embedded files.
894 with self.open(zinfo.filename, "r") as f:
895 while f.read(chunk_size): # Check CRC-32
896 pass
897 except BadZipfile:
898 return zinfo.filename
899
900 def getinfo(self, name):
901 """Return the instance of ZipInfo given 'name'."""
902 info = self.NameToInfo.get(name)
903 if info is None:
904 raise KeyError(
905 'There is no item named %r in the archive' % name)
906
907 return info
908
909 def setpassword(self, pwd):
910 """Set default password for encrypted files."""
911 self.pwd = pwd
912
913 @property
914 def comment(self):
915 """The comment text associated with the ZIP file."""
916 return self._comment
917
918 @comment.setter
919 def comment(self, comment):
920 # check for valid comment length
921 if len(comment) >= ZIP_MAX_COMMENT:
922 if self.debug:
923 print('Archive comment is too long; truncating to %d bytes'
924 % ZIP_MAX_COMMENT)
925 comment = comment[:ZIP_MAX_COMMENT]
926 self._comment = comment
927 self._didModify = True
928
929 def read(self, name, pwd=None):
930 """Return file bytes (as a string) for name."""
931 return self.open(name, "r", pwd).read()
932
933 def open(self, name, mode="r", pwd=None):
934 """Return file-like object for 'name'."""
935 if mode not in ("r", "U", "rU"):
936 raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
937 if not self.fp:
938 raise RuntimeError, \
939 "Attempt to read ZIP archive that was already closed"
940
941 # Only open a new file for instances where we were not
942 # given a file object in the constructor
943 if self._filePassed:
944 zef_file = self.fp
945 should_close = False
946 else:
947 zef_file = open(self.filename, 'rb')
948 should_close = True
949
950 try:
951 # Make sure we have an info object
952 if isinstance(name, ZipInfo):
953 # 'name' is already an info object
954 zinfo = name
955 else:
956 # Get info object for name
957 zinfo = self.getinfo(name)
958
959 zef_file.seek(zinfo.header_offset, 0)
960
961 # Skip the file header:
962 fheader = zef_file.read(sizeFileHeader)
963 if len(fheader) != sizeFileHeader:
964 raise BadZipfile("Truncated file header")
965 fheader = struct.unpack(structFileHeader, fheader)
966 if fheader[_FH_SIGNATURE] != stringFileHeader:
967 raise BadZipfile("Bad magic number for file header")
968
969 fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
970 if fheader[_FH_EXTRA_FIELD_LENGTH]:
971 zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
972
973 if fname != zinfo.orig_filename:
974 raise BadZipfile, \
975 'File name in directory "%s" and header "%s" differ.' % (
976 zinfo.orig_filename, fname)
977
978 # check for encrypted flag & handle password
979 is_encrypted = zinfo.flag_bits & 0x1
980 zd = None
981 if is_encrypted:
982 if not pwd:
983 pwd = self.pwd
984 if not pwd:
985 raise RuntimeError, "File %s is encrypted, " \
986 "password required for extraction" % name
987
988 zd = _ZipDecrypter(pwd)
989 # The first 12 bytes in the cypher stream is an encryption header
990 # used to strengthen the algorithm. The first 11 bytes are
991 # completely random, while the 12th contains the MSB of the CRC,
992 # or the MSB of the file time depending on the header type
993 # and is used to check the correctness of the password.
994 bytes = zef_file.read(12)
995 h = map(zd, bytes[0:12])
996 if zinfo.flag_bits & 0x8:
997 # compare against the file type from extended local headers
998 check_byte = (zinfo._raw_time >> 8) & 0xff
999 else:
1000 # compare against the CRC otherwise
1001 check_byte = (zinfo.CRC >> 24) & 0xff
1002 if ord(h[11]) != check_byte:
1003 raise RuntimeError("Bad password for file", name)
1004
1005 return ZipExtFile(zef_file, mode, zinfo, zd,
1006 close_fileobj=should_close)
1007 except:
1008 if should_close:
1009 zef_file.close()
1010 raise
1011
1012 def extract(self, member, path=None, pwd=None):
1013 """Extract a member from the archive to the current working directory,
1014 using its full name. Its file information is extracted as accurately
1015 as possible. `member' may be a filename or a ZipInfo object. You can
1016 specify a different directory using `path'.
1017 """
1018 if not isinstance(member, ZipInfo):
1019 member = self.getinfo(member)
1020
1021 if path is None:
1022 path = os.getcwd()
1023
1024 return self._extract_member(member, path, pwd)
1025
1026 def extractall(self, path=None, members=None, pwd=None):
1027 """Extract all members from the archive to the current working
1028 directory. `path' specifies a different directory to extract to.
1029 `members' is optional and must be a subset of the list returned
1030 by namelist().
1031 """
1032 if members is None:
1033 members = self.namelist()
1034
1035 for zipinfo in members:
1036 self.extract(zipinfo, path, pwd)
1037
1038 def _extract_member(self, member, targetpath, pwd):
1039 """Extract the ZipInfo object 'member' to a physical
1040 file on the path targetpath.
1041 """
1042 # build the destination pathname, replacing
1043 # forward slashes to platform specific separators.
1044 arcname = member.filename.replace('/', os.path.sep)
1045
1046 if os.path.altsep:
1047 arcname = arcname.replace(os.path.altsep, os.path.sep)
1048 # interpret absolute pathname as relative, remove drive letter or
1049 # UNC path, redundant separators, "." and ".." components.
1050 arcname = os.path.splitdrive(arcname)[1]
1051 arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1052 if x not in ('', os.path.curdir, os.path.pardir))
1053 if os.path.sep == '\\':
1054 # filter illegal characters on Windows
1055 illegal = ':<>|"?*'
1056 if isinstance(arcname, unicode):
1057 table = {ord(c): ord('_') for c in illegal}
1058 else:
1059 table = string.maketrans(illegal, '_' * len(illegal))
1060 arcname = arcname.translate(table)
1061 # remove trailing dots
1062 arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))
1063 arcname = os.path.sep.join(x for x in arcname if x)
1064
1065 targetpath = os.path.join(targetpath, arcname)
1066 targetpath = os.path.normpath(targetpath)
1067
1068 # Create all upper directories if necessary.
1069 upperdirs = os.path.dirname(targetpath)
1070 if upperdirs and not os.path.exists(upperdirs):
1071 os.makedirs(upperdirs)
1072
1073 if member.filename[-1] == '/':
1074 if not os.path.isdir(targetpath):
1075 os.mkdir(targetpath)
1076 return targetpath
1077
1078 with self.open(member, pwd=pwd) as source, \
1079 file(targetpath, "wb") as target:
1080 shutil.copyfileobj(source, target)
1081
1082 return targetpath
1083
1084 def _writecheck(self, zinfo):
1085 """Check for errors before writing a file to the archive."""
1086 if zinfo.filename in self.NameToInfo:
1087 if self.debug: # Warning for duplicate names
1088 print "Duplicate name:", zinfo.filename
1089 if self.mode not in ("w", "a"):
1090 raise RuntimeError, 'write() requires mode "w" or "a"'
1091 if not self.fp:
1092 raise RuntimeError, \
1093 "Attempt to write ZIP archive that was already closed"
1094 if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1095 raise RuntimeError, \
1096 "Compression requires the (missing) zlib module"
1097 if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1098 raise RuntimeError, \
1099 "That compression method is not supported"
1100 if zinfo.file_size > ZIP64_LIMIT:
1101 if not self._allowZip64:
1102 raise LargeZipFile("Filesize would require ZIP64 extensions")
1103 if zinfo.header_offset > ZIP64_LIMIT:
1104 if not self._allowZip64:
1105 raise LargeZipFile("Zipfile size would require ZIP64 extensions")
1106
1107 def write(self, filename, arcname=None, compress_type=None):
1108 """Put the bytes from filename into the archive under the name
1109 arcname."""
1110 if not self.fp:
1111 raise RuntimeError(
1112 "Attempt to write to ZIP archive that was already closed")
1113
1114 st = os.stat(filename)
1115 isdir = stat.S_ISDIR(st.st_mode)
1116 mtime = time.localtime(st.st_mtime)
1117 date_time = mtime[0:6]
1118 # Create ZipInfo instance to store file information
1119 if arcname is None:
1120 arcname = filename
1121 arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1122 while arcname[0] in (os.sep, os.altsep):
1123 arcname = arcname[1:]
1124 if isdir:
1125 arcname += '/'
1126 zinfo = ZipInfo(arcname, date_time)
1127 zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
1128 if compress_type is None:
1129 zinfo.compress_type = self.compression
1130 else:
1131 zinfo.compress_type = compress_type
1132
1133 zinfo.file_size = st.st_size
1134 zinfo.flag_bits = 0x00
1135 zinfo.header_offset = self.fp.tell() # Start of header bytes
1136
1137 self._writecheck(zinfo)
1138 self._didModify = True
1139
1140 if isdir:
1141 zinfo.file_size = 0
1142 zinfo.compress_size = 0
1143 zinfo.CRC = 0
1144 self.filelist.append(zinfo)
1145 self.NameToInfo[zinfo.filename] = zinfo
1146 self.fp.write(zinfo.FileHeader(False))
1147 return
1148
1149 with open(filename, "rb") as fp:
1150 # Must overwrite CRC and sizes with correct data later
1151 zinfo.CRC = CRC = 0
1152 zinfo.compress_size = compress_size = 0
1153 # Compressed size can be larger than uncompressed size
1154 zip64 = self._allowZip64 and \
1155 zinfo.file_size * 1.05 > ZIP64_LIMIT
1156 self.fp.write(zinfo.FileHeader(zip64))
1157 if zinfo.compress_type == ZIP_DEFLATED:
1158 cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1159 zlib.DEFLATED, -15)
1160 else:
1161 cmpr = None
1162 file_size = 0
1163 while 1:
1164 buf = fp.read(1024 * 8)
1165 if not buf:
1166 break
1167 file_size = file_size + len(buf)
1168 CRC = crc32(buf, CRC) & 0xffffffff
1169 if cmpr:
1170 buf = cmpr.compress(buf)
1171 compress_size = compress_size + len(buf)
1172 self.fp.write(buf)
1173 if cmpr:
1174 buf = cmpr.flush()
1175 compress_size = compress_size + len(buf)
1176 self.fp.write(buf)
1177 zinfo.compress_size = compress_size
1178 else:
1179 zinfo.compress_size = file_size
1180 zinfo.CRC = CRC
1181 zinfo.file_size = file_size
1182 if not zip64 and self._allowZip64:
1183 if file_size > ZIP64_LIMIT:
1184 raise RuntimeError('File size has increased during compressing')
1185 if compress_size > ZIP64_LIMIT:
1186 raise RuntimeError('Compressed size larger than uncompressed size')
1187 # Seek backwards and write file header (which will now include
1188 # correct CRC and file sizes)
1189 position = self.fp.tell() # Preserve current position in file
1190 self.fp.seek(zinfo.header_offset, 0)
1191 self.fp.write(zinfo.FileHeader(zip64))
1192 self.fp.seek(position, 0)
1193 self.filelist.append(zinfo)
1194 self.NameToInfo[zinfo.filename] = zinfo
1195
1196 def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
1197 """Write a file into the archive. The contents is the string
1198 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1199 the name of the file in the archive."""
1200 if not isinstance(zinfo_or_arcname, ZipInfo):
1201 zinfo = ZipInfo(filename=zinfo_or_arcname,
1202 date_time=time.localtime(time.time())[:6])
1203
1204 zinfo.compress_type = self.compression
1205 zinfo.external_attr = 0600 << 16
1206 else:
1207 zinfo = zinfo_or_arcname
1208
1209 if not self.fp:
1210 raise RuntimeError(
1211 "Attempt to write to ZIP archive that was already closed")
1212
1213 if compress_type is not None:
1214 zinfo.compress_type = compress_type
1215
1216 zinfo.file_size = len(bytes) # Uncompressed size
1217 zinfo.header_offset = self.fp.tell() # Start of header bytes
1218 self._writecheck(zinfo)
1219 self._didModify = True
1220 zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
1221 if zinfo.compress_type == ZIP_DEFLATED:
1222 co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1223 zlib.DEFLATED, -15)
1224 bytes = co.compress(bytes) + co.flush()
1225 zinfo.compress_size = len(bytes) # Compressed size
1226 else:
1227 zinfo.compress_size = zinfo.file_size
1228 zip64 = zinfo.file_size > ZIP64_LIMIT or \
1229 zinfo.compress_size > ZIP64_LIMIT
1230 if zip64 and not self._allowZip64:
1231 raise LargeZipFile("Filesize would require ZIP64 extensions")
1232 self.fp.write(zinfo.FileHeader(zip64))
1233 self.fp.write(bytes)
1234 if zinfo.flag_bits & 0x08:
1235 # Write CRC and file sizes after the file data
1236 fmt = '<LQQ' if zip64 else '<LLL'
1237 self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
1238 zinfo.file_size))
1239 self.fp.flush()
1240 self.filelist.append(zinfo)
1241 self.NameToInfo[zinfo.filename] = zinfo
1242
1243 def __del__(self):
1244 """Call the "close()" method in case the user forgot."""
1245 self.close()
1246
1247 def close(self):
1248 """Close the file, and for mode "w" and "a" write the ending
1249 records."""
1250 if self.fp is None:
1251 return
1252
1253 try:
1254 if self.mode in ("w", "a") and self._didModify: # write ending records
1255 count = 0
1256 pos1 = self.fp.tell()
1257 for zinfo in self.filelist: # write central directory
1258 count = count + 1
1259 dt = zinfo.date_time
1260 dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
1261 dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
1262 extra = []
1263 if zinfo.file_size > ZIP64_LIMIT \
1264 or zinfo.compress_size > ZIP64_LIMIT:
1265 extra.append(zinfo.file_size)
1266 extra.append(zinfo.compress_size)
1267 file_size = 0xffffffff
1268 compress_size = 0xffffffff
1269 else:
1270 file_size = zinfo.file_size
1271 compress_size = zinfo.compress_size
1272
1273 if zinfo.header_offset > ZIP64_LIMIT:
1274 extra.append(zinfo.header_offset)
1275 header_offset = 0xffffffffL
1276 else:
1277 header_offset = zinfo.header_offset
1278
1279 extra_data = zinfo.extra
1280 if extra:
1281 # Append a ZIP64 field to the extra's
1282 extra_data = struct.pack(
1283 '<HH' + 'Q'*len(extra),
1284 1, 8*len(extra), *extra) + extra_data
1285
1286 extract_version = max(45, zinfo.extract_version)
1287 create_version = max(45, zinfo.create_version)
1288 else:
1289 extract_version = zinfo.extract_version
1290 create_version = zinfo.create_version
1291
1292 try:
1293 filename, flag_bits = zinfo._encodeFilenameFlags()
1294 centdir = struct.pack(structCentralDir,
1295 stringCentralDir, create_version,
1296 zinfo.create_system, extract_version, zinfo.reserved,
1297 flag_bits, zinfo.compress_type, dostime, dosdate,
1298 zinfo.CRC, compress_size, file_size,
1299 len(filename), len(extra_data), len(zinfo.comment),
1300 0, zinfo.internal_attr, zinfo.external_attr,
1301 header_offset)
1302 except DeprecationWarning:
1303 print >>sys.stderr, (structCentralDir,
1304 stringCentralDir, create_version,
1305 zinfo.create_system, extract_version, zinfo.reserved,
1306 zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1307 zinfo.CRC, compress_size, file_size,
1308 len(zinfo.filename), len(extra_data), len(zinfo.comment),
1309 0, zinfo.internal_attr, zinfo.external_attr,
1310 header_offset)
1311 raise
1312 self.fp.write(centdir)
1313 self.fp.write(filename)
1314 self.fp.write(extra_data)
1315 self.fp.write(zinfo.comment)
1316
1317 pos2 = self.fp.tell()
1318 # Write end-of-zip-archive record
1319 centDirCount = count
1320 centDirSize = pos2 - pos1
1321 centDirOffset = pos1
1322 if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1323 centDirOffset > ZIP64_LIMIT or
1324 centDirSize > ZIP64_LIMIT):
1325 # Need to write the ZIP64 end-of-archive records
1326 zip64endrec = struct.pack(
1327 structEndArchive64, stringEndArchive64,
1328 44, 45, 45, 0, 0, centDirCount, centDirCount,
1329 centDirSize, centDirOffset)
1330 self.fp.write(zip64endrec)
1331
1332 zip64locrec = struct.pack(
1333 structEndArchive64Locator,
1334 stringEndArchive64Locator, 0, pos2, 1)
1335 self.fp.write(zip64locrec)
1336 centDirCount = min(centDirCount, 0xFFFF)
1337 centDirSize = min(centDirSize, 0xFFFFFFFF)
1338 centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1339
1340 endrec = struct.pack(structEndArchive, stringEndArchive,
1341 0, 0, centDirCount, centDirCount,
1342 centDirSize, centDirOffset, len(self._comment))
1343 self.fp.write(endrec)
1344 self.fp.write(self._comment)
1345 self.fp.flush()
1346 finally:
1347 fp = self.fp
1348 self.fp = None
1349 if not self._filePassed:
1350 fp.close()
1351
1352
1353class PyZipFile(ZipFile):
1354 """Class to create ZIP archives with Python library files and packages."""
1355
1356 def writepy(self, pathname, basename = ""):
1357 """Add all files from "pathname" to the ZIP archive.
1358
1359 If pathname is a package directory, search the directory and
1360 all package subdirectories recursively for all *.py and enter
1361 the modules into the archive. If pathname is a plain
1362 directory, listdir *.py and enter all modules. Else, pathname
1363 must be a Python *.py file and the module will be put into the
1364 archive. Added modules are always module.pyo or module.pyc.
1365 This method will compile the module.py into module.pyc if
1366 necessary.
1367 """
1368 dir, name = os.path.split(pathname)
1369 if os.path.isdir(pathname):
1370 initname = os.path.join(pathname, "__init__.py")
1371 if os.path.isfile(initname):
1372 # This is a package directory, add it
1373 if basename:
1374 basename = "%s/%s" % (basename, name)
1375 else:
1376 basename = name
1377 if self.debug:
1378 print "Adding package in", pathname, "as", basename
1379 fname, arcname = self._get_codename(initname[0:-3], basename)
1380 if self.debug:
1381 print "Adding", arcname
1382 self.write(fname, arcname)
1383 dirlist = os.listdir(pathname)
1384 dirlist.remove("__init__.py")
1385 # Add all *.py files and package subdirectories
1386 for filename in dirlist:
1387 path = os.path.join(pathname, filename)
1388 root, ext = os.path.splitext(filename)
1389 if os.path.isdir(path):
1390 if os.path.isfile(os.path.join(path, "__init__.py")):
1391 # This is a package directory, add it
1392 self.writepy(path, basename) # Recursive call
1393 elif ext == ".py":
1394 fname, arcname = self._get_codename(path[0:-3],
1395 basename)
1396 if self.debug:
1397 print "Adding", arcname
1398 self.write(fname, arcname)
1399 else:
1400 # This is NOT a package directory, add its files at top level
1401 if self.debug:
1402 print "Adding files from directory", pathname
1403 for filename in os.listdir(pathname):
1404 path = os.path.join(pathname, filename)
1405 root, ext = os.path.splitext(filename)
1406 if ext == ".py":
1407 fname, arcname = self._get_codename(path[0:-3],
1408 basename)
1409 if self.debug:
1410 print "Adding", arcname
1411 self.write(fname, arcname)
1412 else:
1413 if pathname[-3:] != ".py":
1414 raise RuntimeError, \
1415 'Files added with writepy() must end with ".py"'
1416 fname, arcname = self._get_codename(pathname[0:-3], basename)
1417 if self.debug:
1418 print "Adding file", arcname
1419 self.write(fname, arcname)
1420
1421 def _get_codename(self, pathname, basename):
1422 """Return (filename, archivename) for the path.
1423
1424 Given a module name path, return the correct file path and
1425 archive name, compiling if necessary. For example, given
1426 /python/lib/string, return (/python/lib/string.pyc, string).
1427 """
1428 file_py = pathname + ".py"
1429 file_pyc = pathname + ".pyc"
1430 file_pyo = pathname + ".pyo"
1431 if os.path.isfile(file_pyo) and \
1432 os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1433 fname = file_pyo # Use .pyo file
1434 elif not os.path.isfile(file_pyc) or \
1435 os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1436 import py_compile
1437 if self.debug:
1438 print "Compiling", file_py
1439 try:
1440 py_compile.compile(file_py, file_pyc, None, True)
1441 except py_compile.PyCompileError,err:
1442 print err.msg
1443 fname = file_pyc
1444 else:
1445 fname = file_pyc
1446 archivename = os.path.split(fname)[1]
1447 if basename:
1448 archivename = "%s/%s" % (basename, archivename)
1449 return (fname, archivename)
1450
1451
1452def main(args = None):
1453 import textwrap
1454 USAGE=textwrap.dedent("""\
1455 Usage:
1456 zipfile.py -l zipfile.zip # Show listing of a zipfile
1457 zipfile.py -t zipfile.zip # Test if a zipfile is valid
1458 zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1459 zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1460 """)
1461 if args is None:
1462 args = sys.argv[1:]
1463
1464 if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1465 print USAGE
1466 sys.exit(1)
1467
1468 if args[0] == '-l':
1469 if len(args) != 2:
1470 print USAGE
1471 sys.exit(1)
1472 with ZipFile(args[1], 'r') as zf:
1473 zf.printdir()
1474
1475 elif args[0] == '-t':
1476 if len(args) != 2:
1477 print USAGE
1478 sys.exit(1)
1479 with ZipFile(args[1], 'r') as zf:
1480 badfile = zf.testzip()
1481 if badfile:
1482 print("The following enclosed file is corrupted: {!r}".format(badfile))
1483 print "Done testing"
1484
1485 elif args[0] == '-e':
1486 if len(args) != 3:
1487 print USAGE
1488 sys.exit(1)
1489
1490 with ZipFile(args[1], 'r') as zf:
1491 out = args[2]
1492 for path in zf.namelist():
1493 if path.startswith('./'):
1494 tgt = os.path.join(out, path[2:])
1495 else:
1496 tgt = os.path.join(out, path)
1497
1498 tgtdir = os.path.dirname(tgt)
1499 if not os.path.exists(tgtdir):
1500 os.makedirs(tgtdir)
1501 with open(tgt, 'wb') as fp:
1502 fp.write(zf.read(path))
1503
1504 elif args[0] == '-c':
1505 if len(args) < 3:
1506 print USAGE
1507 sys.exit(1)
1508
1509 def addToZip(zf, path, zippath):
1510 if os.path.isfile(path):
1511 zf.write(path, zippath, ZIP_DEFLATED)
1512 elif os.path.isdir(path):
1513 for nm in os.listdir(path):
1514 addToZip(zf,
1515 os.path.join(path, nm), os.path.join(zippath, nm))
1516 # else: ignore
1517
1518 with ZipFile(args[1], 'w', allowZip64=True) as zf:
1519 for src in args[2:]:
1520 addToZip(zf, src, os.path.basename(src))
1521
1522if __name__ == "__main__":
1523 main()
Note: See TracBrowser for help on using the repository browser.