Context Navigation

zipfile.py@ 611

Last change on this file since 611 was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 56.5 KB

Line
1	"""
2	Read and write ZIP files.
3	"""
4	import struct, os, time, sys, shutil
5	import binascii, cStringIO, stat
6	import io
7	import re
8	import string
9
10	try:
11	import zlib # We may need its compression method
12	crc32 = zlib.crc32
13	except ImportError:
14	zlib = None
15	crc32 = binascii.crc32
16
17	__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
18	"ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
19
20	class BadZipfile(Exception):
21	pass
22
23
24	class LargeZipFile(Exception):
25	"""
26	Raised when writing a zipfile, the zipfile requires ZIP64 extensions
27	and those extensions are disabled.
28	"""
29
30	error = BadZipfile # The exception raised by this module
31
32	ZIP64_LIMIT = (1 << 31) - 1
33	ZIP_FILECOUNT_LIMIT = 1 << 16
34	ZIP_MAX_COMMENT = (1 << 16) - 1
35
36	# constants for Zip file compression methods
37	ZIP_STORED = 0
38	ZIP_DEFLATED = 8
39	# Other ZIP compression methods not supported
40
41	# Below are some formats and associated data for reading/writing headers using
42	# the struct module. The names and structures of headers/records are those used
43	# in the PKWARE description of the ZIP file format:
44	# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
45	# (URL valid as of January 2008)
46
47	# The "end of central directory" structure, magic number, size, and indices
48	# (section V.I in the format document)
49	structEndArchive = "<4s4H2LH"
50	stringEndArchive = "PK\005\006"
51	sizeEndCentDir = struct.calcsize(structEndArchive)
52
53	_ECD_SIGNATURE = 0
54	_ECD_DISK_NUMBER = 1
55	_ECD_DISK_START = 2
56	_ECD_ENTRIES_THIS_DISK = 3
57	_ECD_ENTRIES_TOTAL = 4
58	_ECD_SIZE = 5
59	_ECD_OFFSET = 6
60	_ECD_COMMENT_SIZE = 7
61	# These last two indices are not part of the structure as defined in the
62	# spec, but they are used internally by this module as a convenience
63	_ECD_COMMENT = 8
64	_ECD_LOCATION = 9
65
66	# The "central directory" structure, magic number, size, and indices
67	# of entries in the structure (section V.F in the format document)
68	structCentralDir = "<4s4B4HL2L5H2L"
69	stringCentralDir = "PK\001\002"
70	sizeCentralDir = struct.calcsize(structCentralDir)
71
72	# indexes of entries in the central directory structure
73	_CD_SIGNATURE = 0
74	_CD_CREATE_VERSION = 1
75	_CD_CREATE_SYSTEM = 2
76	_CD_EXTRACT_VERSION = 3
77	_CD_EXTRACT_SYSTEM = 4
78	_CD_FLAG_BITS = 5
79	_CD_COMPRESS_TYPE = 6
80	_CD_TIME = 7
81	_CD_DATE = 8
82	_CD_CRC = 9
83	_CD_COMPRESSED_SIZE = 10
84	_CD_UNCOMPRESSED_SIZE = 11
85	_CD_FILENAME_LENGTH = 12
86	_CD_EXTRA_FIELD_LENGTH = 13
87	_CD_COMMENT_LENGTH = 14
88	_CD_DISK_NUMBER_START = 15
89	_CD_INTERNAL_FILE_ATTRIBUTES = 16
90	_CD_EXTERNAL_FILE_ATTRIBUTES = 17
91	_CD_LOCAL_HEADER_OFFSET = 18
92
93	# The "local file header" structure, magic number, size, and indices
94	# (section V.A in the format document)
95	structFileHeader = "<4s2B4HL2L2H"
96	stringFileHeader = "PK\003\004"
97	sizeFileHeader = struct.calcsize(structFileHeader)
98
99	_FH_SIGNATURE = 0
100	_FH_EXTRACT_VERSION = 1
101	_FH_EXTRACT_SYSTEM = 2
102	_FH_GENERAL_PURPOSE_FLAG_BITS = 3
103	_FH_COMPRESSION_METHOD = 4
104	_FH_LAST_MOD_TIME = 5
105	_FH_LAST_MOD_DATE = 6
106	_FH_CRC = 7
107	_FH_COMPRESSED_SIZE = 8
108	_FH_UNCOMPRESSED_SIZE = 9
109	_FH_FILENAME_LENGTH = 10
110	_FH_EXTRA_FIELD_LENGTH = 11
111
112	# The "Zip64 end of central directory locator" structure, magic number, and size
113	structEndArchive64Locator = "<4sLQL"
114	stringEndArchive64Locator = "PK\x06\x07"
115	sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
116
117	# The "Zip64 end of central directory" record, magic number, size, and indices
118	# (section V.G in the format document)
119	structEndArchive64 = "<4sQ2H2L4Q"
120	stringEndArchive64 = "PK\x06\x06"
121	sizeEndCentDir64 = struct.calcsize(structEndArchive64)
122
123	_CD64_SIGNATURE = 0
124	_CD64_DIRECTORY_RECSIZE = 1
125	_CD64_CREATE_VERSION = 2
126	_CD64_EXTRACT_VERSION = 3
127	_CD64_DISK_NUMBER = 4
128	_CD64_DISK_NUMBER_START = 5
129	_CD64_NUMBER_ENTRIES_THIS_DISK = 6
130	_CD64_NUMBER_ENTRIES_TOTAL = 7
131	_CD64_DIRECTORY_SIZE = 8
132	_CD64_OFFSET_START_CENTDIR = 9
133
134	def _check_zipfile(fp):
135	try:
136	if _EndRecData(fp):
137	return True # file has correct magic number
138	except IOError:
139	pass
140	return False
141
142	def is_zipfile(filename):
143	"""Quickly see if a file is a ZIP file by checking the magic number.
144
145	The filename argument may be a file or file-like object too.
146	"""
147	result = False
148	try:
149	if hasattr(filename, "read"):
150	result = _check_zipfile(fp=filename)
151	else:
152	with open(filename, "rb") as fp:
153	result = _check_zipfile(fp)
154	except IOError:
155	pass
156	return result
157
158	def _EndRecData64(fpin, offset, endrec):
159	"""
160	Read the ZIP64 end-of-archive records and use that to update endrec
161	"""
162	try:
163	fpin.seek(offset - sizeEndCentDir64Locator, 2)
164	except IOError:
165	# If the seek fails, the file is not large enough to contain a ZIP64
166	# end-of-archive record, so just return the end record we were given.
167	return endrec
168
169	data = fpin.read(sizeEndCentDir64Locator)
170	if len(data) != sizeEndCentDir64Locator:
171	return endrec
172	sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
173	if sig != stringEndArchive64Locator:
174	return endrec
175
176	if diskno != 0 or disks != 1:
177	raise BadZipfile("zipfiles that span multiple disks are not supported")
178
179	# Assume no 'zip64 extensible data'
180	fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
181	data = fpin.read(sizeEndCentDir64)
182	if len(data) != sizeEndCentDir64:
183	return endrec
184	sig, sz, create_version, read_version, disk_num, disk_dir, \
185	dircount, dircount2, dirsize, diroffset = \
186	struct.unpack(structEndArchive64, data)
187	if sig != stringEndArchive64:
188	return endrec
189
190	# Update the original endrec using data from the ZIP64 record
191	endrec[_ECD_SIGNATURE] = sig
192	endrec[_ECD_DISK_NUMBER] = disk_num
193	endrec[_ECD_DISK_START] = disk_dir
194	endrec[_ECD_ENTRIES_THIS_DISK] = dircount
195	endrec[_ECD_ENTRIES_TOTAL] = dircount2
196	endrec[_ECD_SIZE] = dirsize
197	endrec[_ECD_OFFSET] = diroffset
198	return endrec
199
200
201	def _EndRecData(fpin):
202	"""Return data from the "End of Central Directory" record, or None.
203
204	The data is a list of the nine items in the ZIP "End of central dir"
205	record followed by a tenth item, the file seek offset of this record."""
206
207	# Determine file size
208	fpin.seek(0, 2)
209	filesize = fpin.tell()
210
211	# Check to see if this is ZIP file with no archive comment (the
212	# "end of central directory" structure should be the last item in the
213	# file if this is the case).
214	try:
215	fpin.seek(-sizeEndCentDir, 2)
216	except IOError:
217	return None
218	data = fpin.read()
219	if (len(data) == sizeEndCentDir and
220	data[0:4] == stringEndArchive and
221	data[-2:] == b"\000\000"):
222	# the signature is correct and there's no comment, unpack structure
223	endrec = struct.unpack(structEndArchive, data)
224	endrec=list(endrec)
225
226	# Append a blank comment and record start offset
227	endrec.append("")
228	endrec.append(filesize - sizeEndCentDir)
229
230	# Try to read the "Zip64 end of central directory" structure
231	return _EndRecData64(fpin, -sizeEndCentDir, endrec)
232
233	# Either this is not a ZIP file, or it is a ZIP file with an archive
234	# comment. Search the end of the file for the "end of central directory"
235	# record signature. The comment is the last item in the ZIP file and may be
236	# up to 64K long. It is assumed that the "end of central directory" magic
237	# number does not appear in the comment.
238	maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
239	fpin.seek(maxCommentStart, 0)
240	data = fpin.read()
241	start = data.rfind(stringEndArchive)
242	if start >= 0:
243	# found the magic number; attempt to unpack and interpret
244	recData = data[start:start+sizeEndCentDir]
245	if len(recData) != sizeEndCentDir:
246	# Zip file is corrupted.
247	return None
248	endrec = list(struct.unpack(structEndArchive, recData))
249	commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
250	comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
251	endrec.append(comment)
252	endrec.append(maxCommentStart + start)
253
254	# Try to read the "Zip64 end of central directory" structure
255	return _EndRecData64(fpin, maxCommentStart + start - filesize,
256	endrec)
257
258	# Unable to find a valid end of central directory structure
259	return None
260
261
262	class ZipInfo (object):
263	"""Class with attributes describing each file in the ZIP archive."""
264
265	__slots__ = (
266	'orig_filename',
267	'filename',
268	'date_time',
269	'compress_type',
270	'comment',
271	'extra',
272	'create_system',
273	'create_version',
274	'extract_version',
275	'reserved',
276	'flag_bits',
277	'volume',
278	'internal_attr',
279	'external_attr',
280	'header_offset',
281	'CRC',
282	'compress_size',
283	'file_size',
284	'_raw_time',
285	)
286
287	def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
288	self.orig_filename = filename # Original file name in archive
289
290	# Terminate the file name at the first null byte. Null bytes in file
291	# names are used as tricks by viruses in archives.
292	null_byte = filename.find(chr(0))
293	if null_byte >= 0:
294	filename = filename[0:null_byte]
295	# This is used to ensure paths in generated ZIP files always use
296	# forward slashes as the directory separator, as required by the
297	# ZIP format specification.
298	if os.sep != "/" and os.sep in filename:
299	filename = filename.replace(os.sep, "/")
300
301	self.filename = filename # Normalized file name
302	self.date_time = date_time # year, month, day, hour, min, sec
303
304	if date_time[0] < 1980:
305	raise ValueError('ZIP does not support timestamps before 1980')
306
307	# Standard values:
308	self.compress_type = ZIP_STORED # Type of compression for the file
309	self.comment = "" # Comment for each file
310	self.extra = "" # ZIP extra data
311	if sys.platform == 'win32':
312	self.create_system = 0 # System which created ZIP archive
313	else:
314	# Assume everything else is unix-y
315	self.create_system = 3 # System which created ZIP archive
316	self.create_version = 20 # Version which created ZIP archive
317	self.extract_version = 20 # Version needed to extract archive
318	self.reserved = 0 # Must be zero
319	self.flag_bits = 0 # ZIP flag bits
320	self.volume = 0 # Volume number of file header
321	self.internal_attr = 0 # Internal attributes
322	self.external_attr = 0 # External file attributes
323	# Other attributes are set by class ZipFile:
324	# header_offset Byte offset to the file header
325	# CRC CRC-32 of the uncompressed file
326	# compress_size Size of the compressed file
327	# file_size Size of the uncompressed file
328
329	def FileHeader(self, zip64=None):
330	"""Return the per-file header as a string."""
331	dt = self.date_time
332	dosdate = (dt[0] - 1980) << 9 \| dt[1] << 5 \| dt[2]
333	dostime = dt[3] << 11 \| dt[4] << 5 \| (dt[5] // 2)
334	if self.flag_bits & 0x08:
335	# Set these to zero because we write them after the file data
336	CRC = compress_size = file_size = 0
337	else:
338	CRC = self.CRC
339	compress_size = self.compress_size
340	file_size = self.file_size
341
342	extra = self.extra
343
344	if zip64 is None:
345	zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
346	if zip64:
347	fmt = '<HHQQ'
348	extra = extra + struct.pack(fmt,
349	1, struct.calcsize(fmt)-4, file_size, compress_size)
350	if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
351	if not zip64:
352	raise LargeZipFile("Filesize would require ZIP64 extensions")
353	# File is larger than what fits into a 4 byte integer,
354	# fall back to the ZIP64 extension
355	file_size = 0xffffffff
356	compress_size = 0xffffffff
357	self.extract_version = max(45, self.extract_version)
358	self.create_version = max(45, self.extract_version)
359
360	filename, flag_bits = self._encodeFilenameFlags()
361	header = struct.pack(structFileHeader, stringFileHeader,
362	self.extract_version, self.reserved, flag_bits,
363	self.compress_type, dostime, dosdate, CRC,
364	compress_size, file_size,
365	len(filename), len(extra))
366	return header + filename + extra
367
368	def _encodeFilenameFlags(self):
369	if isinstance(self.filename, unicode):
370	try:
371	return self.filename.encode('ascii'), self.flag_bits
372	except UnicodeEncodeError:
373	return self.filename.encode('utf-8'), self.flag_bits \| 0x800
374	else:
375	return self.filename, self.flag_bits
376
377	def _decodeFilename(self):
378	if self.flag_bits & 0x800:
379	return self.filename.decode('utf-8')
380	else:
381	return self.filename
382
383	def _decodeExtra(self):
384	# Try to decode the extra field.
385	extra = self.extra
386	unpack = struct.unpack
387	while extra:
388	tp, ln = unpack('<HH', extra[:4])
389	if tp == 1:
390	if ln >= 24:
391	counts = unpack('<QQQ', extra[4:28])
392	elif ln == 16:
393	counts = unpack('<QQ', extra[4:20])
394	elif ln == 8:
395	counts = unpack('<Q', extra[4:12])
396	elif ln == 0:
397	counts = ()
398	else:
399	raise RuntimeError, "Corrupt extra field %s"%(ln,)
400
401	idx = 0
402
403	# ZIP64 extension (large files and/or large archives)
404	if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
405	self.file_size = counts[idx]
406	idx += 1
407
408	if self.compress_size == 0xFFFFFFFFL:
409	self.compress_size = counts[idx]
410	idx += 1
411
412	if self.header_offset == 0xffffffffL:
413	old = self.header_offset
414	self.header_offset = counts[idx]
415	idx+=1
416
417	extra = extra[ln+4:]
418
419
420	class _ZipDecrypter:
421	"""Class to handle decryption of files stored within a ZIP archive.
422
423	ZIP supports a password-based form of encryption. Even though known
424	plaintext attacks have been found against it, it is still useful
425	to be able to get data out of such a file.
426
427	Usage:
428	zd = _ZipDecrypter(mypwd)
429	plain_char = zd(cypher_char)
430	plain_text = map(zd, cypher_text)
431	"""
432
433	def _GenerateCRCTable():
434	"""Generate a CRC-32 table.
435
436	ZIP encryption uses the CRC32 one-byte primitive for scrambling some
437	internal keys. We noticed that a direct implementation is faster than
438	relying on binascii.crc32().
439	"""
440	poly = 0xedb88320
441	table = [0] * 256
442	for i in range(256):
443	crc = i
444	for j in range(8):
445	if crc & 1:
446	crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
447	else:
448	crc = ((crc >> 1) & 0x7FFFFFFF)
449	table[i] = crc
450	return table
451	crctable = _GenerateCRCTable()
452
453	def _crc32(self, ch, crc):
454	"""Compute the CRC32 primitive on one byte."""
455	return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
456
457	def __init__(self, pwd):
458	self.key0 = 305419896
459	self.key1 = 591751049
460	self.key2 = 878082192
461	for p in pwd:
462	self._UpdateKeys(p)
463
464	def _UpdateKeys(self, c):
465	self.key0 = self._crc32(c, self.key0)
466	self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
467	self.key1 = (self.key1 * 134775813 + 1) & 4294967295
468	self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
469
470	def __call__(self, c):
471	"""Decrypt a single character."""
472	c = ord(c)
473	k = self.key2 \| 2
474	c = c ^ (((k * (k^1)) >> 8) & 255)
475	c = chr(c)
476	self._UpdateKeys(c)
477	return c
478
479
480	compressor_names = {
481	0: 'store',
482	1: 'shrink',
483	2: 'reduce',
484	3: 'reduce',
485	4: 'reduce',
486	5: 'reduce',
487	6: 'implode',
488	7: 'tokenize',
489	8: 'deflate',
490	9: 'deflate64',
491	10: 'implode',
492	12: 'bzip2',
493	14: 'lzma',
494	18: 'terse',
495	19: 'lz77',
496	97: 'wavpack',
497	98: 'ppmd',
498	}
499
500
501	class ZipExtFile(io.BufferedIOBase):
502	"""File-like object for reading an archive member.
503	Is returned by ZipFile.open().
504	"""
505
506	# Max size supported by decompressor.
507	MAX_N = 1 << 31 - 1
508
509	# Read from compressed files in 4k blocks.
510	MIN_READ_SIZE = 4096
511
512	# Search for universal newlines or line chunks.
513	PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)\|(?P<newline>\n\|\r\n?)')
514
515	def __init__(self, fileobj, mode, zipinfo, decrypter=None,
516	close_fileobj=False):
517	self._fileobj = fileobj
518	self._decrypter = decrypter
519	self._close_fileobj = close_fileobj
520
521	self._compress_type = zipinfo.compress_type
522	self._compress_size = zipinfo.compress_size
523	self._compress_left = zipinfo.compress_size
524
525	if self._compress_type == ZIP_DEFLATED:
526	self._decompressor = zlib.decompressobj(-15)
527	elif self._compress_type != ZIP_STORED:
528	descr = compressor_names.get(self._compress_type)
529	if descr:
530	raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
531	else:
532	raise NotImplementedError("compression type %d" % (self._compress_type,))
533	self._unconsumed = ''
534
535	self._readbuffer = ''
536	self._offset = 0
537
538	self._universal = 'U' in mode
539	self.newlines = None
540
541	# Adjust read size for encrypted files since the first 12 bytes
542	# are for the encryption/password information.
543	if self._decrypter is not None:
544	self._compress_left -= 12
545
546	self.mode = mode
547	self.name = zipinfo.filename
548
549	if hasattr(zipinfo, 'CRC'):
550	self._expected_crc = zipinfo.CRC
551	self._running_crc = crc32(b'') & 0xffffffff
552	else:
553	self._expected_crc = None
554
555	def readline(self, limit=-1):
556	"""Read and return a line from the stream.
557
558	If limit is specified, at most limit bytes will be read.
559	"""
560
561	if not self._universal and limit < 0:
562	# Shortcut common case - newline found in buffer.
563	i = self._readbuffer.find('\n', self._offset) + 1
564	if i > 0:
565	line = self._readbuffer[self._offset: i]
566	self._offset = i
567	return line
568
569	if not self._universal:
570	return io.BufferedIOBase.readline(self, limit)
571
572	line = ''
573	while limit < 0 or len(line) < limit:
574	readahead = self.peek(2)
575	if readahead == '':
576	return line
577
578	#
579	# Search for universal newlines or line chunks.
580	#
581	# The pattern returns either a line chunk or a newline, but not
582	# both. Combined with peek(2), we are assured that the sequence
583	# '\r\n' is always retrieved completely and never split into
584	# separate newlines - '\r', '\n' due to coincidental readaheads.
585	#
586	match = self.PATTERN.search(readahead)
587	newline = match.group('newline')
588	if newline is not None:
589	if self.newlines is None:
590	self.newlines = []
591	if newline not in self.newlines:
592	self.newlines.append(newline)
593	self._offset += len(newline)
594	return line + '\n'
595
596	chunk = match.group('chunk')
597	if limit >= 0:
598	chunk = chunk[: limit - len(line)]
599
600	self._offset += len(chunk)
601	line += chunk
602
603	return line
604
605	def peek(self, n=1):
606	"""Returns buffered bytes without advancing the position."""
607	if n > len(self._readbuffer) - self._offset:
608	chunk = self.read(n)
609	self._offset -= len(chunk)
610
611	# Return up to 512 bytes to reduce allocation overhead for tight loops.
612	return self._readbuffer[self._offset: self._offset + 512]
613
614	def readable(self):
615	return True
616
617	def read(self, n=-1):
618	"""Read and return up to n bytes.
619	If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
620	"""
621	buf = ''
622	if n is None:
623	n = -1
624	while True:
625	if n < 0:
626	data = self.read1(n)
627	elif n > len(buf):
628	data = self.read1(n - len(buf))
629	else:
630	return buf
631	if len(data) == 0:
632	return buf
633	buf += data
634
635	def _update_crc(self, newdata, eof):
636	# Update the CRC using the given data.
637	if self._expected_crc is None:
638	# No need to compute the CRC if we don't have a reference value
639	return
640	self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
641	# Check the CRC if we're at the end of the file
642	if eof and self._running_crc != self._expected_crc:
643	raise BadZipfile("Bad CRC-32 for file %r" % self.name)
644
645	def read1(self, n):
646	"""Read up to n bytes with at most one read() system call."""
647
648	# Simplify algorithm (branching) by transforming negative n to large n.
649	if n < 0 or n is None:
650	n = self.MAX_N
651
652	# Bytes available in read buffer.
653	len_readbuffer = len(self._readbuffer) - self._offset
654
655	# Read from file.
656	if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
657	nbytes = n - len_readbuffer - len(self._unconsumed)
658	nbytes = max(nbytes, self.MIN_READ_SIZE)
659	nbytes = min(nbytes, self._compress_left)
660
661	data = self._fileobj.read(nbytes)
662	self._compress_left -= len(data)
663
664	if data and self._decrypter is not None:
665	data = ''.join(map(self._decrypter, data))
666
667	if self._compress_type == ZIP_STORED:
668	self._update_crc(data, eof=(self._compress_left==0))
669	self._readbuffer = self._readbuffer[self._offset:] + data
670	self._offset = 0
671	else:
672	# Prepare deflated bytes for decompression.
673	self._unconsumed += data
674
675	# Handle unconsumed data.
676	if (len(self._unconsumed) > 0 and n > len_readbuffer and
677	self._compress_type == ZIP_DEFLATED):
678	data = self._decompressor.decompress(
679	self._unconsumed,
680	max(n - len_readbuffer, self.MIN_READ_SIZE)
681	)
682
683	self._unconsumed = self._decompressor.unconsumed_tail
684	eof = len(self._unconsumed) == 0 and self._compress_left == 0
685	if eof:
686	data += self._decompressor.flush()
687
688	self._update_crc(data, eof=eof)
689	self._readbuffer = self._readbuffer[self._offset:] + data
690	self._offset = 0
691
692	# Read from buffer.
693	data = self._readbuffer[self._offset: self._offset + n]
694	self._offset += len(data)
695	return data
696
697	def close(self):
698	try :
699	if self._close_fileobj:
700	self._fileobj.close()
701	finally:
702	super(ZipExtFile, self).close()
703
704
705	class ZipFile(object):
706	""" Class with methods to open, read, write, close, list zip files.
707
708	z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
709
710	file: Either the path to the file, or a file-like object.
711	If it is a path, the file will be opened and closed by ZipFile.
712	mode: The mode can be either read "r", write "w" or append "a".
713	compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
714	allowZip64: if True ZipFile will create files with ZIP64 extensions when
715	needed, otherwise it will raise an exception when this would
716	be necessary.
717
718	"""
719
720	fp = None # Set here since __del__ checks it
721
722	def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
723	"""Open the ZIP file with mode read "r", write "w" or append "a"."""
724	if mode not in ("r", "w", "a"):
725	raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
726
727	if compression == ZIP_STORED:
728	pass
729	elif compression == ZIP_DEFLATED:
730	if not zlib:
731	raise RuntimeError,\
732	"Compression requires the (missing) zlib module"
733	else:
734	raise RuntimeError, "That compression method is not supported"
735
736	self._allowZip64 = allowZip64
737	self._didModify = False
738	self.debug = 0 # Level of printing: 0 through 3
739	self.NameToInfo = {} # Find file info given name
740	self.filelist = [] # List of ZipInfo instances for archive
741	self.compression = compression # Method of compression
742	self.mode = key = mode.replace('b', '')[0]
743	self.pwd = None
744	self._comment = ''
745
746	# Check if we were passed a file-like object
747	if isinstance(file, basestring):
748	self._filePassed = 0
749	self.filename = file
750	modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
751	try:
752	self.fp = open(file, modeDict[mode])
753	except IOError:
754	if mode == 'a':
755	mode = key = 'w'
756	self.fp = open(file, modeDict[mode])
757	else:
758	raise
759	else:
760	self._filePassed = 1
761	self.fp = file
762	self.filename = getattr(file, 'name', None)
763
764	try:
765	if key == 'r':
766	self._RealGetContents()
767	elif key == 'w':
768	# set the modified flag so central directory gets written
769	# even if no files are added to the archive
770	self._didModify = True
771	elif key == 'a':
772	try:
773	# See if file is a zip file
774	self._RealGetContents()
775	# seek to start of directory and overwrite
776	self.fp.seek(self.start_dir, 0)
777	except BadZipfile:
778	# file is not a zip file, just append
779	self.fp.seek(0, 2)
780
781	# set the modified flag so central directory gets written
782	# even if no files are added to the archive
783	self._didModify = True
784	else:
785	raise RuntimeError('Mode must be "r", "w" or "a"')
786	except:
787	fp = self.fp
788	self.fp = None
789	if not self._filePassed:
790	fp.close()
791	raise
792
793	def __enter__(self):
794	return self
795
796	def __exit__(self, type, value, traceback):
797	self.close()
798
799	def _RealGetContents(self):
800	"""Read in the table of contents for the ZIP file."""
801	fp = self.fp
802	try:
803	endrec = _EndRecData(fp)
804	except IOError:
805	raise BadZipfile("File is not a zip file")
806	if not endrec:
807	raise BadZipfile, "File is not a zip file"
808	if self.debug > 1:
809	print endrec
810	size_cd = endrec[_ECD_SIZE] # bytes in central directory
811	offset_cd = endrec[_ECD_OFFSET] # offset of central directory
812	self._comment = endrec[_ECD_COMMENT] # archive comment
813
814	# "concat" is zero, unless zip was concatenated to another file
815	concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
816	if endrec[_ECD_SIGNATURE] == stringEndArchive64:
817	# If Zip64 extension structures are present, account for them
818	concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
819
820	if self.debug > 2:
821	inferred = concat + offset_cd
822	print "given, inferred, offset", offset_cd, inferred, concat
823	# self.start_dir: Position of start of central directory
824	self.start_dir = offset_cd + concat
825	fp.seek(self.start_dir, 0)
826	data = fp.read(size_cd)
827	fp = cStringIO.StringIO(data)
828	total = 0
829	while total < size_cd:
830	centdir = fp.read(sizeCentralDir)
831	if len(centdir) != sizeCentralDir:
832	raise BadZipfile("Truncated central directory")
833	centdir = struct.unpack(structCentralDir, centdir)
834	if centdir[_CD_SIGNATURE] != stringCentralDir:
835	raise BadZipfile("Bad magic number for central directory")
836	if self.debug > 2:
837	print centdir
838	filename = fp.read(centdir[_CD_FILENAME_LENGTH])
839	# Create ZipInfo instance to store file information
840	x = ZipInfo(filename)
841	x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
842	x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
843	x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
844	(x.create_version, x.create_system, x.extract_version, x.reserved,
845	x.flag_bits, x.compress_type, t, d,
846	x.CRC, x.compress_size, x.file_size) = centdir[1:12]
847	x.volume, x.internal_attr, x.external_attr = centdir[15:18]
848	# Convert date/time code to (year, month, day, hour, min, sec)
849	x._raw_time = t
850	x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
851	t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
852
853	x._decodeExtra()
854	x.header_offset = x.header_offset + concat
855	x.filename = x._decodeFilename()
856	self.filelist.append(x)
857	self.NameToInfo[x.filename] = x
858
859	# update total bytes read from central directory
860	total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
861	+ centdir[_CD_EXTRA_FIELD_LENGTH]
862	+ centdir[_CD_COMMENT_LENGTH])
863
864	if self.debug > 2:
865	print "total", total
866
867
868	def namelist(self):
869	"""Return a list of file names in the archive."""
870	l = []
871	for data in self.filelist:
872	l.append(data.filename)
873	return l
874
875	def infolist(self):
876	"""Return a list of class ZipInfo instances for files in the
877	archive."""
878	return self.filelist
879
880	def printdir(self):
881	"""Print a table of contents for the zip file."""
882	print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
883	for zinfo in self.filelist:
884	date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
885	print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
886
887	def testzip(self):
888	"""Read all the files and check the CRC."""
889	chunk_size = 2 ** 20
890	for zinfo in self.filelist:
891	try:
892	# Read by chunks, to avoid an OverflowError or a
893	# MemoryError with very large embedded files.
894	with self.open(zinfo.filename, "r") as f:
895	while f.read(chunk_size): # Check CRC-32
896	pass
897	except BadZipfile:
898	return zinfo.filename
899
900	def getinfo(self, name):
901	"""Return the instance of ZipInfo given 'name'."""
902	info = self.NameToInfo.get(name)
903	if info is None:
904	raise KeyError(
905	'There is no item named %r in the archive' % name)
906
907	return info
908
909	def setpassword(self, pwd):
910	"""Set default password for encrypted files."""
911	self.pwd = pwd
912
913	@property
914	def comment(self):
915	"""The comment text associated with the ZIP file."""
916	return self._comment
917
918	@comment.setter
919	def comment(self, comment):
920	# check for valid comment length
921	if len(comment) >= ZIP_MAX_COMMENT:
922	if self.debug:
923	print('Archive comment is too long; truncating to %d bytes'
924	% ZIP_MAX_COMMENT)
925	comment = comment[:ZIP_MAX_COMMENT]
926	self._comment = comment
927	self._didModify = True
928
929	def read(self, name, pwd=None):
930	"""Return file bytes (as a string) for name."""
931	return self.open(name, "r", pwd).read()
932
933	def open(self, name, mode="r", pwd=None):
934	"""Return file-like object for 'name'."""
935	if mode not in ("r", "U", "rU"):
936	raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
937	if not self.fp:
938	raise RuntimeError, \
939	"Attempt to read ZIP archive that was already closed"
940
941	# Only open a new file for instances where we were not
942	# given a file object in the constructor
943	if self._filePassed:
944	zef_file = self.fp
945	should_close = False
946	else:
947	zef_file = open(self.filename, 'rb')
948	should_close = True
949
950	try:
951	# Make sure we have an info object
952	if isinstance(name, ZipInfo):
953	# 'name' is already an info object
954	zinfo = name
955	else:
956	# Get info object for name
957	zinfo = self.getinfo(name)
958
959	zef_file.seek(zinfo.header_offset, 0)
960
961	# Skip the file header:
962	fheader = zef_file.read(sizeFileHeader)
963	if len(fheader) != sizeFileHeader:
964	raise BadZipfile("Truncated file header")
965	fheader = struct.unpack(structFileHeader, fheader)
966	if fheader[_FH_SIGNATURE] != stringFileHeader:
967	raise BadZipfile("Bad magic number for file header")
968
969	fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
970	if fheader[_FH_EXTRA_FIELD_LENGTH]:
971	zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
972
973	if fname != zinfo.orig_filename:
974	raise BadZipfile, \
975	'File name in directory "%s" and header "%s" differ.' % (
976	zinfo.orig_filename, fname)
977
978	# check for encrypted flag & handle password
979	is_encrypted = zinfo.flag_bits & 0x1
980	zd = None
981	if is_encrypted:
982	if not pwd:
983	pwd = self.pwd
984	if not pwd:
985	raise RuntimeError, "File %s is encrypted, " \
986	"password required for extraction" % name
987
988	zd = _ZipDecrypter(pwd)
989	# The first 12 bytes in the cypher stream is an encryption header
990	# used to strengthen the algorithm. The first 11 bytes are
991	# completely random, while the 12th contains the MSB of the CRC,
992	# or the MSB of the file time depending on the header type
993	# and is used to check the correctness of the password.
994	bytes = zef_file.read(12)
995	h = map(zd, bytes[0:12])
996	if zinfo.flag_bits & 0x8:
997	# compare against the file type from extended local headers
998	check_byte = (zinfo._raw_time >> 8) & 0xff
999	else:
1000	# compare against the CRC otherwise
1001	check_byte = (zinfo.CRC >> 24) & 0xff
1002	if ord(h[11]) != check_byte:
1003	raise RuntimeError("Bad password for file", name)
1004
1005	return ZipExtFile(zef_file, mode, zinfo, zd,
1006	close_fileobj=should_close)
1007	except:
1008	if should_close:
1009	zef_file.close()
1010	raise
1011
1012	def extract(self, member, path=None, pwd=None):
1013	"""Extract a member from the archive to the current working directory,
1014	using its full name. Its file information is extracted as accurately
1015	as possible. `member' may be a filename or a ZipInfo object. You can
1016	specify a different directory using `path'.
1017	"""
1018	if not isinstance(member, ZipInfo):
1019	member = self.getinfo(member)
1020
1021	if path is None:
1022	path = os.getcwd()
1023
1024	return self._extract_member(member, path, pwd)
1025
1026	def extractall(self, path=None, members=None, pwd=None):
1027	"""Extract all members from the archive to the current working
1028	directory. `path' specifies a different directory to extract to.
1029	`members' is optional and must be a subset of the list returned
1030	by namelist().
1031	"""
1032	if members is None:
1033	members = self.namelist()
1034
1035	for zipinfo in members:
1036	self.extract(zipinfo, path, pwd)
1037
1038	def _extract_member(self, member, targetpath, pwd):
1039	"""Extract the ZipInfo object 'member' to a physical
1040	file on the path targetpath.
1041	"""
1042	# build the destination pathname, replacing
1043	# forward slashes to platform specific separators.
1044	arcname = member.filename.replace('/', os.path.sep)
1045
1046	if os.path.altsep:
1047	arcname = arcname.replace(os.path.altsep, os.path.sep)
1048	# interpret absolute pathname as relative, remove drive letter or
1049	# UNC path, redundant separators, "." and ".." components.
1050	arcname = os.path.splitdrive(arcname)[1]
1051	arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
1052	if x not in ('', os.path.curdir, os.path.pardir))
1053	if os.path.sep == '\\':
1054	# filter illegal characters on Windows
1055	illegal = ':<>\|"?*'
1056	if isinstance(arcname, unicode):
1057	table = {ord(c): ord('_') for c in illegal}
1058	else:
1059	table = string.maketrans(illegal, '_' * len(illegal))
1060	arcname = arcname.translate(table)
1061	# remove trailing dots
1062	arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))
1063	arcname = os.path.sep.join(x for x in arcname if x)
1064
1065	targetpath = os.path.join(targetpath, arcname)
1066	targetpath = os.path.normpath(targetpath)
1067
1068	# Create all upper directories if necessary.
1069	upperdirs = os.path.dirname(targetpath)
1070	if upperdirs and not os.path.exists(upperdirs):
1071	os.makedirs(upperdirs)
1072
1073	if member.filename[-1] == '/':
1074	if not os.path.isdir(targetpath):
1075	os.mkdir(targetpath)
1076	return targetpath
1077
1078	with self.open(member, pwd=pwd) as source, \
1079	file(targetpath, "wb") as target:
1080	shutil.copyfileobj(source, target)
1081
1082	return targetpath
1083
1084	def _writecheck(self, zinfo):
1085	"""Check for errors before writing a file to the archive."""
1086	if zinfo.filename in self.NameToInfo:
1087	if self.debug: # Warning for duplicate names
1088	print "Duplicate name:", zinfo.filename
1089	if self.mode not in ("w", "a"):
1090	raise RuntimeError, 'write() requires mode "w" or "a"'
1091	if not self.fp:
1092	raise RuntimeError, \
1093	"Attempt to write ZIP archive that was already closed"
1094	if zinfo.compress_type == ZIP_DEFLATED and not zlib:
1095	raise RuntimeError, \
1096	"Compression requires the (missing) zlib module"
1097	if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
1098	raise RuntimeError, \
1099	"That compression method is not supported"
1100	if zinfo.file_size > ZIP64_LIMIT:
1101	if not self._allowZip64:
1102	raise LargeZipFile("Filesize would require ZIP64 extensions")
1103	if zinfo.header_offset > ZIP64_LIMIT:
1104	if not self._allowZip64:
1105	raise LargeZipFile("Zipfile size would require ZIP64 extensions")
1106
1107	def write(self, filename, arcname=None, compress_type=None):
1108	"""Put the bytes from filename into the archive under the name
1109	arcname."""
1110	if not self.fp:
1111	raise RuntimeError(
1112	"Attempt to write to ZIP archive that was already closed")
1113
1114	st = os.stat(filename)
1115	isdir = stat.S_ISDIR(st.st_mode)
1116	mtime = time.localtime(st.st_mtime)
1117	date_time = mtime[0:6]
1118	# Create ZipInfo instance to store file information
1119	if arcname is None:
1120	arcname = filename
1121	arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
1122	while arcname[0] in (os.sep, os.altsep):
1123	arcname = arcname[1:]
1124	if isdir:
1125	arcname += '/'
1126	zinfo = ZipInfo(arcname, date_time)
1127	zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
1128	if compress_type is None:
1129	zinfo.compress_type = self.compression
1130	else:
1131	zinfo.compress_type = compress_type
1132
1133	zinfo.file_size = st.st_size
1134	zinfo.flag_bits = 0x00
1135	zinfo.header_offset = self.fp.tell() # Start of header bytes
1136
1137	self._writecheck(zinfo)
1138	self._didModify = True
1139
1140	if isdir:
1141	zinfo.file_size = 0
1142	zinfo.compress_size = 0
1143	zinfo.CRC = 0
1144	self.filelist.append(zinfo)
1145	self.NameToInfo[zinfo.filename] = zinfo
1146	self.fp.write(zinfo.FileHeader(False))
1147	return
1148
1149	with open(filename, "rb") as fp:
1150	# Must overwrite CRC and sizes with correct data later
1151	zinfo.CRC = CRC = 0
1152	zinfo.compress_size = compress_size = 0
1153	# Compressed size can be larger than uncompressed size
1154	zip64 = self._allowZip64 and \
1155	zinfo.file_size * 1.05 > ZIP64_LIMIT
1156	self.fp.write(zinfo.FileHeader(zip64))
1157	if zinfo.compress_type == ZIP_DEFLATED:
1158	cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1159	zlib.DEFLATED, -15)
1160	else:
1161	cmpr = None
1162	file_size = 0
1163	while 1:
1164	buf = fp.read(1024 * 8)
1165	if not buf:
1166	break
1167	file_size = file_size + len(buf)
1168	CRC = crc32(buf, CRC) & 0xffffffff
1169	if cmpr:
1170	buf = cmpr.compress(buf)
1171	compress_size = compress_size + len(buf)
1172	self.fp.write(buf)
1173	if cmpr:
1174	buf = cmpr.flush()
1175	compress_size = compress_size + len(buf)
1176	self.fp.write(buf)
1177	zinfo.compress_size = compress_size
1178	else:
1179	zinfo.compress_size = file_size
1180	zinfo.CRC = CRC
1181	zinfo.file_size = file_size
1182	if not zip64 and self._allowZip64:
1183	if file_size > ZIP64_LIMIT:
1184	raise RuntimeError('File size has increased during compressing')
1185	if compress_size > ZIP64_LIMIT:
1186	raise RuntimeError('Compressed size larger than uncompressed size')
1187	# Seek backwards and write file header (which will now include
1188	# correct CRC and file sizes)
1189	position = self.fp.tell() # Preserve current position in file
1190	self.fp.seek(zinfo.header_offset, 0)
1191	self.fp.write(zinfo.FileHeader(zip64))
1192	self.fp.seek(position, 0)
1193	self.filelist.append(zinfo)
1194	self.NameToInfo[zinfo.filename] = zinfo
1195
1196	def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
1197	"""Write a file into the archive. The contents is the string
1198	'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
1199	the name of the file in the archive."""
1200	if not isinstance(zinfo_or_arcname, ZipInfo):
1201	zinfo = ZipInfo(filename=zinfo_or_arcname,
1202	date_time=time.localtime(time.time())[:6])
1203
1204	zinfo.compress_type = self.compression
1205	zinfo.external_attr = 0600 << 16
1206	else:
1207	zinfo = zinfo_or_arcname
1208
1209	if not self.fp:
1210	raise RuntimeError(
1211	"Attempt to write to ZIP archive that was already closed")
1212
1213	if compress_type is not None:
1214	zinfo.compress_type = compress_type
1215
1216	zinfo.file_size = len(bytes) # Uncompressed size
1217	zinfo.header_offset = self.fp.tell() # Start of header bytes
1218	self._writecheck(zinfo)
1219	self._didModify = True
1220	zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
1221	if zinfo.compress_type == ZIP_DEFLATED:
1222	co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
1223	zlib.DEFLATED, -15)
1224	bytes = co.compress(bytes) + co.flush()
1225	zinfo.compress_size = len(bytes) # Compressed size
1226	else:
1227	zinfo.compress_size = zinfo.file_size
1228	zip64 = zinfo.file_size > ZIP64_LIMIT or \
1229	zinfo.compress_size > ZIP64_LIMIT
1230	if zip64 and not self._allowZip64:
1231	raise LargeZipFile("Filesize would require ZIP64 extensions")
1232	self.fp.write(zinfo.FileHeader(zip64))
1233	self.fp.write(bytes)
1234	if zinfo.flag_bits & 0x08:
1235	# Write CRC and file sizes after the file data
1236	fmt = '<LQQ' if zip64 else '<LLL'
1237	self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
1238	zinfo.file_size))
1239	self.fp.flush()
1240	self.filelist.append(zinfo)
1241	self.NameToInfo[zinfo.filename] = zinfo
1242
1243	def __del__(self):
1244	"""Call the "close()" method in case the user forgot."""
1245	self.close()
1246
1247	def close(self):
1248	"""Close the file, and for mode "w" and "a" write the ending
1249	records."""
1250	if self.fp is None:
1251	return
1252
1253	try:
1254	if self.mode in ("w", "a") and self._didModify: # write ending records
1255	count = 0
1256	pos1 = self.fp.tell()
1257	for zinfo in self.filelist: # write central directory
1258	count = count + 1
1259	dt = zinfo.date_time
1260	dosdate = (dt[0] - 1980) << 9 \| dt[1] << 5 \| dt[2]
1261	dostime = dt[3] << 11 \| dt[4] << 5 \| (dt[5] // 2)
1262	extra = []
1263	if zinfo.file_size > ZIP64_LIMIT \
1264	or zinfo.compress_size > ZIP64_LIMIT:
1265	extra.append(zinfo.file_size)
1266	extra.append(zinfo.compress_size)
1267	file_size = 0xffffffff
1268	compress_size = 0xffffffff
1269	else:
1270	file_size = zinfo.file_size
1271	compress_size = zinfo.compress_size
1272
1273	if zinfo.header_offset > ZIP64_LIMIT:
1274	extra.append(zinfo.header_offset)
1275	header_offset = 0xffffffffL
1276	else:
1277	header_offset = zinfo.header_offset
1278
1279	extra_data = zinfo.extra
1280	if extra:
1281	# Append a ZIP64 field to the extra's
1282	extra_data = struct.pack(
1283	'<HH' + 'Q'*len(extra),
1284	1, 8len(extra), extra) + extra_data
1285
1286	extract_version = max(45, zinfo.extract_version)
1287	create_version = max(45, zinfo.create_version)
1288	else:
1289	extract_version = zinfo.extract_version
1290	create_version = zinfo.create_version
1291
1292	try:
1293	filename, flag_bits = zinfo._encodeFilenameFlags()
1294	centdir = struct.pack(structCentralDir,
1295	stringCentralDir, create_version,
1296	zinfo.create_system, extract_version, zinfo.reserved,
1297	flag_bits, zinfo.compress_type, dostime, dosdate,
1298	zinfo.CRC, compress_size, file_size,
1299	len(filename), len(extra_data), len(zinfo.comment),
1300	0, zinfo.internal_attr, zinfo.external_attr,
1301	header_offset)
1302	except DeprecationWarning:
1303	print >>sys.stderr, (structCentralDir,
1304	stringCentralDir, create_version,
1305	zinfo.create_system, extract_version, zinfo.reserved,
1306	zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
1307	zinfo.CRC, compress_size, file_size,
1308	len(zinfo.filename), len(extra_data), len(zinfo.comment),
1309	0, zinfo.internal_attr, zinfo.external_attr,
1310	header_offset)
1311	raise
1312	self.fp.write(centdir)
1313	self.fp.write(filename)
1314	self.fp.write(extra_data)
1315	self.fp.write(zinfo.comment)
1316
1317	pos2 = self.fp.tell()
1318	# Write end-of-zip-archive record
1319	centDirCount = count
1320	centDirSize = pos2 - pos1
1321	centDirOffset = pos1
1322	if (centDirCount >= ZIP_FILECOUNT_LIMIT or
1323	centDirOffset > ZIP64_LIMIT or
1324	centDirSize > ZIP64_LIMIT):
1325	# Need to write the ZIP64 end-of-archive records
1326	zip64endrec = struct.pack(
1327	structEndArchive64, stringEndArchive64,
1328	44, 45, 45, 0, 0, centDirCount, centDirCount,
1329	centDirSize, centDirOffset)
1330	self.fp.write(zip64endrec)
1331
1332	zip64locrec = struct.pack(
1333	structEndArchive64Locator,
1334	stringEndArchive64Locator, 0, pos2, 1)
1335	self.fp.write(zip64locrec)
1336	centDirCount = min(centDirCount, 0xFFFF)
1337	centDirSize = min(centDirSize, 0xFFFFFFFF)
1338	centDirOffset = min(centDirOffset, 0xFFFFFFFF)
1339
1340	endrec = struct.pack(structEndArchive, stringEndArchive,
1341	0, 0, centDirCount, centDirCount,
1342	centDirSize, centDirOffset, len(self._comment))
1343	self.fp.write(endrec)
1344	self.fp.write(self._comment)
1345	self.fp.flush()
1346	finally:
1347	fp = self.fp
1348	self.fp = None
1349	if not self._filePassed:
1350	fp.close()
1351
1352
1353	class PyZipFile(ZipFile):
1354	"""Class to create ZIP archives with Python library files and packages."""
1355
1356	def writepy(self, pathname, basename = ""):
1357	"""Add all files from "pathname" to the ZIP archive.
1358
1359	If pathname is a package directory, search the directory and
1360	all package subdirectories recursively for all *.py and enter
1361	the modules into the archive. If pathname is a plain
1362	directory, listdir *.py and enter all modules. Else, pathname
1363	must be a Python *.py file and the module will be put into the
1364	archive. Added modules are always module.pyo or module.pyc.
1365	This method will compile the module.py into module.pyc if
1366	necessary.
1367	"""
1368	dir, name = os.path.split(pathname)
1369	if os.path.isdir(pathname):
1370	initname = os.path.join(pathname, "__init__.py")
1371	if os.path.isfile(initname):
1372	# This is a package directory, add it
1373	if basename:
1374	basename = "%s/%s" % (basename, name)
1375	else:
1376	basename = name
1377	if self.debug:
1378	print "Adding package in", pathname, "as", basename
1379	fname, arcname = self._get_codename(initname[0:-3], basename)
1380	if self.debug:
1381	print "Adding", arcname
1382	self.write(fname, arcname)
1383	dirlist = os.listdir(pathname)
1384	dirlist.remove("__init__.py")
1385	# Add all *.py files and package subdirectories
1386	for filename in dirlist:
1387	path = os.path.join(pathname, filename)
1388	root, ext = os.path.splitext(filename)
1389	if os.path.isdir(path):
1390	if os.path.isfile(os.path.join(path, "__init__.py")):
1391	# This is a package directory, add it
1392	self.writepy(path, basename) # Recursive call
1393	elif ext == ".py":
1394	fname, arcname = self._get_codename(path[0:-3],
1395	basename)
1396	if self.debug:
1397	print "Adding", arcname
1398	self.write(fname, arcname)
1399	else:
1400	# This is NOT a package directory, add its files at top level
1401	if self.debug:
1402	print "Adding files from directory", pathname
1403	for filename in os.listdir(pathname):
1404	path = os.path.join(pathname, filename)
1405	root, ext = os.path.splitext(filename)
1406	if ext == ".py":
1407	fname, arcname = self._get_codename(path[0:-3],
1408	basename)
1409	if self.debug:
1410	print "Adding", arcname
1411	self.write(fname, arcname)
1412	else:
1413	if pathname[-3:] != ".py":
1414	raise RuntimeError, \
1415	'Files added with writepy() must end with ".py"'
1416	fname, arcname = self._get_codename(pathname[0:-3], basename)
1417	if self.debug:
1418	print "Adding file", arcname
1419	self.write(fname, arcname)
1420
1421	def _get_codename(self, pathname, basename):
1422	"""Return (filename, archivename) for the path.
1423
1424	Given a module name path, return the correct file path and
1425	archive name, compiling if necessary. For example, given
1426	/python/lib/string, return (/python/lib/string.pyc, string).
1427	"""
1428	file_py = pathname + ".py"
1429	file_pyc = pathname + ".pyc"
1430	file_pyo = pathname + ".pyo"
1431	if os.path.isfile(file_pyo) and \
1432	os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
1433	fname = file_pyo # Use .pyo file
1434	elif not os.path.isfile(file_pyc) or \
1435	os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
1436	import py_compile
1437	if self.debug:
1438	print "Compiling", file_py
1439	try:
1440	py_compile.compile(file_py, file_pyc, None, True)
1441	except py_compile.PyCompileError,err:
1442	print err.msg
1443	fname = file_pyc
1444	else:
1445	fname = file_pyc
1446	archivename = os.path.split(fname)[1]
1447	if basename:
1448	archivename = "%s/%s" % (basename, archivename)
1449	return (fname, archivename)
1450
1451
1452	def main(args = None):
1453	import textwrap
1454	USAGE=textwrap.dedent("""\
1455	Usage:
1456	zipfile.py -l zipfile.zip # Show listing of a zipfile
1457	zipfile.py -t zipfile.zip # Test if a zipfile is valid
1458	zipfile.py -e zipfile.zip target # Extract zipfile into target dir
1459	zipfile.py -c zipfile.zip src ... # Create zipfile from sources
1460	""")
1461	if args is None:
1462	args = sys.argv[1:]
1463
1464	if not args or args[0] not in ('-l', '-c', '-e', '-t'):
1465	print USAGE
1466	sys.exit(1)
1467
1468	if args[0] == '-l':
1469	if len(args) != 2:
1470	print USAGE
1471	sys.exit(1)
1472	with ZipFile(args[1], 'r') as zf:
1473	zf.printdir()
1474
1475	elif args[0] == '-t':
1476	if len(args) != 2:
1477	print USAGE
1478	sys.exit(1)
1479	with ZipFile(args[1], 'r') as zf:
1480	badfile = zf.testzip()
1481	if badfile:
1482	print("The following enclosed file is corrupted: {!r}".format(badfile))
1483	print "Done testing"
1484
1485	elif args[0] == '-e':
1486	if len(args) != 3:
1487	print USAGE
1488	sys.exit(1)
1489
1490	with ZipFile(args[1], 'r') as zf:
1491	out = args[2]
1492	for path in zf.namelist():
1493	if path.startswith('./'):
1494	tgt = os.path.join(out, path[2:])
1495	else:
1496	tgt = os.path.join(out, path)
1497
1498	tgtdir = os.path.dirname(tgt)
1499	if not os.path.exists(tgtdir):
1500	os.makedirs(tgtdir)
1501	with open(tgt, 'wb') as fp:
1502	fp.write(zf.read(path))
1503
1504	elif args[0] == '-c':
1505	if len(args) < 3:
1506	print USAGE
1507	sys.exit(1)
1508
1509	def addToZip(zf, path, zippath):
1510	if os.path.isfile(path):
1511	zf.write(path, zippath, ZIP_DEFLATED)
1512	elif os.path.isdir(path):
1513	for nm in os.listdir(path):
1514	addToZip(zf,
1515	os.path.join(path, nm), os.path.join(zippath, nm))
1516	# else: ignore
1517
1518	with ZipFile(args[1], 'w', allowZip64=True) as zf:
1519	for src in args[2:]:
1520	addToZip(zf, src, os.path.basename(src))
1521
1522	if __name__ == "__main__":
1523	main()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/zipfile.py@ 611

Download in other formats: