Context Navigation

← Previous Revision
Next Revision →
Normal
Revision Log

zipfile.py

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 56.5 KB

Rev	Line
[2]	1	"""
	2	Read and write ZIP files.
	3	"""
	4	import struct, os, time, sys, shutil
	5	import binascii, cStringIO, stat
[391]	6	import io
	7	import re
	8	import string
[2]	9
	10	try:
	11	import zlib # We may need its compression method
	12	crc32 = zlib.crc32
	13	except ImportError:
	14	zlib = None
	15	crc32 = binascii.crc32
	16
	17	__all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
	18	"ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
	19
	20	class BadZipfile(Exception):
	21	pass
	22
	23
	24	class LargeZipFile(Exception):
	25	"""
	26	Raised when writing a zipfile, the zipfile requires ZIP64 extensions
	27	and those extensions are disabled.
	28	"""
	29
	30	error = BadZipfile # The exception raised by this module
	31
	32	ZIP64_LIMIT = (1 << 31) - 1
	33	ZIP_FILECOUNT_LIMIT = 1 << 16
	34	ZIP_MAX_COMMENT = (1 << 16) - 1
	35
	36	# constants for Zip file compression methods
	37	ZIP_STORED = 0
	38	ZIP_DEFLATED = 8
	39	# Other ZIP compression methods not supported
	40
	41	# Below are some formats and associated data for reading/writing headers using
	42	# the struct module. The names and structures of headers/records are those used
	43	# in the PKWARE description of the ZIP file format:
	44	# http://www.pkware.com/documents/casestudies/APPNOTE.TXT
	45	# (URL valid as of January 2008)
	46
	47	# The "end of central directory" structure, magic number, size, and indices
	48	# (section V.I in the format document)
	49	structEndArchive = "<4s4H2LH"
	50	stringEndArchive = "PK\005\006"
	51	sizeEndCentDir = struct.calcsize(structEndArchive)
	52
	53	_ECD_SIGNATURE = 0
	54	_ECD_DISK_NUMBER = 1
	55	_ECD_DISK_START = 2
	56	_ECD_ENTRIES_THIS_DISK = 3
	57	_ECD_ENTRIES_TOTAL = 4
	58	_ECD_SIZE = 5
	59	_ECD_OFFSET = 6
	60	_ECD_COMMENT_SIZE = 7
	61	# These last two indices are not part of the structure as defined in the
	62	# spec, but they are used internally by this module as a convenience
	63	_ECD_COMMENT = 8
	64	_ECD_LOCATION = 9
	65
	66	# The "central directory" structure, magic number, size, and indices
	67	# of entries in the structure (section V.F in the format document)
	68	structCentralDir = "<4s4B4HL2L5H2L"
	69	stringCentralDir = "PK\001\002"
	70	sizeCentralDir = struct.calcsize(structCentralDir)
	71
	72	# indexes of entries in the central directory structure
	73	_CD_SIGNATURE = 0
	74	_CD_CREATE_VERSION = 1
	75	_CD_CREATE_SYSTEM = 2
	76	_CD_EXTRACT_VERSION = 3
	77	_CD_EXTRACT_SYSTEM = 4
	78	_CD_FLAG_BITS = 5
	79	_CD_COMPRESS_TYPE = 6
	80	_CD_TIME = 7
	81	_CD_DATE = 8
	82	_CD_CRC = 9
	83	_CD_COMPRESSED_SIZE = 10
	84	_CD_UNCOMPRESSED_SIZE = 11
	85	_CD_FILENAME_LENGTH = 12
	86	_CD_EXTRA_FIELD_LENGTH = 13
	87	_CD_COMMENT_LENGTH = 14
	88	_CD_DISK_NUMBER_START = 15
	89	_CD_INTERNAL_FILE_ATTRIBUTES = 16
	90	_CD_EXTERNAL_FILE_ATTRIBUTES = 17
	91	_CD_LOCAL_HEADER_OFFSET = 18
	92
	93	# The "local file header" structure, magic number, size, and indices
	94	# (section V.A in the format document)
	95	structFileHeader = "<4s2B4HL2L2H"
	96	stringFileHeader = "PK\003\004"
	97	sizeFileHeader = struct.calcsize(structFileHeader)
	98
	99	_FH_SIGNATURE = 0
	100	_FH_EXTRACT_VERSION = 1
	101	_FH_EXTRACT_SYSTEM = 2
	102	_FH_GENERAL_PURPOSE_FLAG_BITS = 3
	103	_FH_COMPRESSION_METHOD = 4
	104	_FH_LAST_MOD_TIME = 5
	105	_FH_LAST_MOD_DATE = 6
	106	_FH_CRC = 7
	107	_FH_COMPRESSED_SIZE = 8
	108	_FH_UNCOMPRESSED_SIZE = 9
	109	_FH_FILENAME_LENGTH = 10
	110	_FH_EXTRA_FIELD_LENGTH = 11
	111
	112	# The "Zip64 end of central directory locator" structure, magic number, and size
	113	structEndArchive64Locator = "<4sLQL"
	114	stringEndArchive64Locator = "PK\x06\x07"
	115	sizeEndCentDir64Locator = struct.calcsize(structEndArchive64Locator)
	116
	117	# The "Zip64 end of central directory" record, magic number, size, and indices
	118	# (section V.G in the format document)
	119	structEndArchive64 = "<4sQ2H2L4Q"
	120	stringEndArchive64 = "PK\x06\x06"
	121	sizeEndCentDir64 = struct.calcsize(structEndArchive64)
	122
	123	_CD64_SIGNATURE = 0
	124	_CD64_DIRECTORY_RECSIZE = 1
	125	_CD64_CREATE_VERSION = 2
	126	_CD64_EXTRACT_VERSION = 3
	127	_CD64_DISK_NUMBER = 4
	128	_CD64_DISK_NUMBER_START = 5
	129	_CD64_NUMBER_ENTRIES_THIS_DISK = 6
	130	_CD64_NUMBER_ENTRIES_TOTAL = 7
	131	_CD64_DIRECTORY_SIZE = 8
	132	_CD64_OFFSET_START_CENTDIR = 9
	133
[391]	134	def _check_zipfile(fp):
[2]	135	try:
[391]	136	if _EndRecData(fp):
	137	return True # file has correct magic number
[2]	138	except IOError:
	139	pass
	140	return False
	141
[391]	142	def is_zipfile(filename):
	143	"""Quickly see if a file is a ZIP file by checking the magic number.
	144
	145	The filename argument may be a file or file-like object too.
	146	"""
	147	result = False
	148	try:
	149	if hasattr(filename, "read"):
	150	result = _check_zipfile(fp=filename)
	151	else:
	152	with open(filename, "rb") as fp:
	153	result = _check_zipfile(fp)
	154	except IOError:
	155	pass
	156	return result
	157
[2]	158	def _EndRecData64(fpin, offset, endrec):
	159	"""
	160	Read the ZIP64 end-of-archive records and use that to update endrec
	161	"""
[391]	162	try:
	163	fpin.seek(offset - sizeEndCentDir64Locator, 2)
	164	except IOError:
	165	# If the seek fails, the file is not large enough to contain a ZIP64
	166	# end-of-archive record, so just return the end record we were given.
	167	return endrec
	168
[2]	169	data = fpin.read(sizeEndCentDir64Locator)
[391]	170	if len(data) != sizeEndCentDir64Locator:
	171	return endrec
[2]	172	sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
	173	if sig != stringEndArchive64Locator:
	174	return endrec
	175
	176	if diskno != 0 or disks != 1:
	177	raise BadZipfile("zipfiles that span multiple disks are not supported")
	178
	179	# Assume no 'zip64 extensible data'
	180	fpin.seek(offset - sizeEndCentDir64Locator - sizeEndCentDir64, 2)
	181	data = fpin.read(sizeEndCentDir64)
[391]	182	if len(data) != sizeEndCentDir64:
	183	return endrec
[2]	184	sig, sz, create_version, read_version, disk_num, disk_dir, \
	185	dircount, dircount2, dirsize, diroffset = \
	186	struct.unpack(structEndArchive64, data)
	187	if sig != stringEndArchive64:
	188	return endrec
	189
	190	# Update the original endrec using data from the ZIP64 record
	191	endrec[_ECD_SIGNATURE] = sig
	192	endrec[_ECD_DISK_NUMBER] = disk_num
	193	endrec[_ECD_DISK_START] = disk_dir
	194	endrec[_ECD_ENTRIES_THIS_DISK] = dircount
	195	endrec[_ECD_ENTRIES_TOTAL] = dircount2
	196	endrec[_ECD_SIZE] = dirsize
	197	endrec[_ECD_OFFSET] = diroffset
	198	return endrec
	199
	200
	201	def _EndRecData(fpin):
	202	"""Return data from the "End of Central Directory" record, or None.
	203
	204	The data is a list of the nine items in the ZIP "End of central dir"
	205	record followed by a tenth item, the file seek offset of this record."""
	206
	207	# Determine file size
	208	fpin.seek(0, 2)
	209	filesize = fpin.tell()
	210
	211	# Check to see if this is ZIP file with no archive comment (the
	212	# "end of central directory" structure should be the last item in the
	213	# file if this is the case).
	214	try:
	215	fpin.seek(-sizeEndCentDir, 2)
	216	except IOError:
	217	return None
	218	data = fpin.read()
[391]	219	if (len(data) == sizeEndCentDir and
	220	data[0:4] == stringEndArchive and
	221	data[-2:] == b"\000\000"):
[2]	222	# the signature is correct and there's no comment, unpack structure
	223	endrec = struct.unpack(structEndArchive, data)
	224	endrec=list(endrec)
	225
	226	# Append a blank comment and record start offset
	227	endrec.append("")
	228	endrec.append(filesize - sizeEndCentDir)
	229
	230	# Try to read the "Zip64 end of central directory" structure
	231	return _EndRecData64(fpin, -sizeEndCentDir, endrec)
	232
	233	# Either this is not a ZIP file, or it is a ZIP file with an archive
	234	# comment. Search the end of the file for the "end of central directory"
	235	# record signature. The comment is the last item in the ZIP file and may be
	236	# up to 64K long. It is assumed that the "end of central directory" magic
	237	# number does not appear in the comment.
	238	maxCommentStart = max(filesize - (1 << 16) - sizeEndCentDir, 0)
	239	fpin.seek(maxCommentStart, 0)
	240	data = fpin.read()
	241	start = data.rfind(stringEndArchive)
	242	if start >= 0:
	243	# found the magic number; attempt to unpack and interpret
	244	recData = data[start:start+sizeEndCentDir]
[391]	245	if len(recData) != sizeEndCentDir:
	246	# Zip file is corrupted.
	247	return None
[2]	248	endrec = list(struct.unpack(structEndArchive, recData))
[391]	249	commentSize = endrec[_ECD_COMMENT_SIZE] #as claimed by the zip file
	250	comment = data[start+sizeEndCentDir:start+sizeEndCentDir+commentSize]
	251	endrec.append(comment)
	252	endrec.append(maxCommentStart + start)
[2]	253
[391]	254	# Try to read the "Zip64 end of central directory" structure
	255	return _EndRecData64(fpin, maxCommentStart + start - filesize,
	256	endrec)
[2]	257
	258	# Unable to find a valid end of central directory structure
[391]	259	return None
[2]	260
	261
	262	class ZipInfo (object):
	263	"""Class with attributes describing each file in the ZIP archive."""
	264
	265	__slots__ = (
	266	'orig_filename',
	267	'filename',
	268	'date_time',
	269	'compress_type',
	270	'comment',
	271	'extra',
	272	'create_system',
	273	'create_version',
	274	'extract_version',
	275	'reserved',
	276	'flag_bits',
	277	'volume',
	278	'internal_attr',
	279	'external_attr',
	280	'header_offset',
	281	'CRC',
	282	'compress_size',
	283	'file_size',
	284	'_raw_time',
	285	)
	286
	287	def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
	288	self.orig_filename = filename # Original file name in archive
	289
	290	# Terminate the file name at the first null byte. Null bytes in file
	291	# names are used as tricks by viruses in archives.
	292	null_byte = filename.find(chr(0))
	293	if null_byte >= 0:
	294	filename = filename[0:null_byte]
	295	# This is used to ensure paths in generated ZIP files always use
	296	# forward slashes as the directory separator, as required by the
	297	# ZIP format specification.
	298	if os.sep != "/" and os.sep in filename:
	299	filename = filename.replace(os.sep, "/")
	300
	301	self.filename = filename # Normalized file name
	302	self.date_time = date_time # year, month, day, hour, min, sec
[391]	303
	304	if date_time[0] < 1980:
	305	raise ValueError('ZIP does not support timestamps before 1980')
	306
[2]	307	# Standard values:
	308	self.compress_type = ZIP_STORED # Type of compression for the file
	309	self.comment = "" # Comment for each file
	310	self.extra = "" # ZIP extra data
	311	if sys.platform == 'win32':
	312	self.create_system = 0 # System which created ZIP archive
	313	else:
	314	# Assume everything else is unix-y
	315	self.create_system = 3 # System which created ZIP archive
	316	self.create_version = 20 # Version which created ZIP archive
	317	self.extract_version = 20 # Version needed to extract archive
	318	self.reserved = 0 # Must be zero
	319	self.flag_bits = 0 # ZIP flag bits
	320	self.volume = 0 # Volume number of file header
	321	self.internal_attr = 0 # Internal attributes
	322	self.external_attr = 0 # External file attributes
	323	# Other attributes are set by class ZipFile:
	324	# header_offset Byte offset to the file header
	325	# CRC CRC-32 of the uncompressed file
	326	# compress_size Size of the compressed file
	327	# file_size Size of the uncompressed file
	328
[391]	329	def FileHeader(self, zip64=None):
[2]	330	"""Return the per-file header as a string."""
	331	dt = self.date_time
	332	dosdate = (dt[0] - 1980) << 9 \| dt[1] << 5 \| dt[2]
	333	dostime = dt[3] << 11 \| dt[4] << 5 \| (dt[5] // 2)
	334	if self.flag_bits & 0x08:
	335	# Set these to zero because we write them after the file data
	336	CRC = compress_size = file_size = 0
	337	else:
	338	CRC = self.CRC
	339	compress_size = self.compress_size
	340	file_size = self.file_size
	341
	342	extra = self.extra
	343
[391]	344	if zip64 is None:
	345	zip64 = file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT
	346	if zip64:
	347	fmt = '<HHQQ'
	348	extra = extra + struct.pack(fmt,
	349	1, struct.calcsize(fmt)-4, file_size, compress_size)
[2]	350	if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
[391]	351	if not zip64:
	352	raise LargeZipFile("Filesize would require ZIP64 extensions")
[2]	353	# File is larger than what fits into a 4 byte integer,
	354	# fall back to the ZIP64 extension
	355	file_size = 0xffffffff
	356	compress_size = 0xffffffff
	357	self.extract_version = max(45, self.extract_version)
	358	self.create_version = max(45, self.extract_version)
	359
	360	filename, flag_bits = self._encodeFilenameFlags()
	361	header = struct.pack(structFileHeader, stringFileHeader,
	362	self.extract_version, self.reserved, flag_bits,
	363	self.compress_type, dostime, dosdate, CRC,
	364	compress_size, file_size,
	365	len(filename), len(extra))
	366	return header + filename + extra
	367
	368	def _encodeFilenameFlags(self):
	369	if isinstance(self.filename, unicode):
	370	try:
	371	return self.filename.encode('ascii'), self.flag_bits
	372	except UnicodeEncodeError:
	373	return self.filename.encode('utf-8'), self.flag_bits \| 0x800
	374	else:
	375	return self.filename, self.flag_bits
	376
	377	def _decodeFilename(self):
	378	if self.flag_bits & 0x800:
	379	return self.filename.decode('utf-8')
	380	else:
	381	return self.filename
	382
	383	def _decodeExtra(self):
	384	# Try to decode the extra field.
	385	extra = self.extra
	386	unpack = struct.unpack
	387	while extra:
	388	tp, ln = unpack('<HH', extra[:4])
	389	if tp == 1:
	390	if ln >= 24:
	391	counts = unpack('<QQQ', extra[4:28])
	392	elif ln == 16:
	393	counts = unpack('<QQ', extra[4:20])
	394	elif ln == 8:
	395	counts = unpack('<Q', extra[4:12])
	396	elif ln == 0:
	397	counts = ()
	398	else:
	399	raise RuntimeError, "Corrupt extra field %s"%(ln,)
	400
	401	idx = 0
	402
	403	# ZIP64 extension (large files and/or large archives)
	404	if self.file_size in (0xffffffffffffffffL, 0xffffffffL):
	405	self.file_size = counts[idx]
	406	idx += 1
	407
	408	if self.compress_size == 0xFFFFFFFFL:
	409	self.compress_size = counts[idx]
	410	idx += 1
	411
	412	if self.header_offset == 0xffffffffL:
	413	old = self.header_offset
	414	self.header_offset = counts[idx]
	415	idx+=1
	416
	417	extra = extra[ln+4:]
	418
	419
	420	class _ZipDecrypter:
	421	"""Class to handle decryption of files stored within a ZIP archive.
	422
	423	ZIP supports a password-based form of encryption. Even though known
	424	plaintext attacks have been found against it, it is still useful
	425	to be able to get data out of such a file.
	426
	427	Usage:
	428	zd = _ZipDecrypter(mypwd)
	429	plain_char = zd(cypher_char)
	430	plain_text = map(zd, cypher_text)
	431	"""
	432
	433	def _GenerateCRCTable():
	434	"""Generate a CRC-32 table.
	435
	436	ZIP encryption uses the CRC32 one-byte primitive for scrambling some
	437	internal keys. We noticed that a direct implementation is faster than
	438	relying on binascii.crc32().
	439	"""
	440	poly = 0xedb88320
	441	table = [0] * 256
	442	for i in range(256):
	443	crc = i
	444	for j in range(8):
	445	if crc & 1:
	446	crc = ((crc >> 1) & 0x7FFFFFFF) ^ poly
	447	else:
	448	crc = ((crc >> 1) & 0x7FFFFFFF)
	449	table[i] = crc
	450	return table
	451	crctable = _GenerateCRCTable()
	452
	453	def _crc32(self, ch, crc):
	454	"""Compute the CRC32 primitive on one byte."""
	455	return ((crc >> 8) & 0xffffff) ^ self.crctable[(crc ^ ord(ch)) & 0xff]
	456
	457	def __init__(self, pwd):
	458	self.key0 = 305419896
	459	self.key1 = 591751049
	460	self.key2 = 878082192
	461	for p in pwd:
	462	self._UpdateKeys(p)
	463
	464	def _UpdateKeys(self, c):
	465	self.key0 = self._crc32(c, self.key0)
	466	self.key1 = (self.key1 + (self.key0 & 255)) & 4294967295
	467	self.key1 = (self.key1 * 134775813 + 1) & 4294967295
	468	self.key2 = self._crc32(chr((self.key1 >> 24) & 255), self.key2)
	469
	470	def __call__(self, c):
	471	"""Decrypt a single character."""
	472	c = ord(c)
	473	k = self.key2 \| 2
	474	c = c ^ (((k * (k^1)) >> 8) & 255)
	475	c = chr(c)
	476	self._UpdateKeys(c)
	477	return c
	478
[391]	479
	480	compressor_names = {
	481	0: 'store',
	482	1: 'shrink',
	483	2: 'reduce',
	484	3: 'reduce',
	485	4: 'reduce',
	486	5: 'reduce',
	487	6: 'implode',
	488	7: 'tokenize',
	489	8: 'deflate',
	490	9: 'deflate64',
	491	10: 'implode',
	492	12: 'bzip2',
	493	14: 'lzma',
	494	18: 'terse',
	495	19: 'lz77',
	496	97: 'wavpack',
	497	98: 'ppmd',
	498	}
	499
	500
	501	class ZipExtFile(io.BufferedIOBase):
[2]	502	"""File-like object for reading an archive member.
	503	Is returned by ZipFile.open().
	504	"""
	505
[391]	506	# Max size supported by decompressor.
	507	MAX_N = 1 << 31 - 1
[2]	508
[391]	509	# Read from compressed files in 4k blocks.
	510	MIN_READ_SIZE = 4096
[2]	511
[391]	512	# Search for universal newlines or line chunks.
	513	PATTERN = re.compile(r'^(?P<chunk>[^\r\n]+)\|(?P<newline>\n\|\r\n?)')
[2]	514
[391]	515	def __init__(self, fileobj, mode, zipinfo, decrypter=None,
	516	close_fileobj=False):
	517	self._fileobj = fileobj
	518	self._decrypter = decrypter
	519	self._close_fileobj = close_fileobj
[2]	520
[391]	521	self._compress_type = zipinfo.compress_type
	522	self._compress_size = zipinfo.compress_size
	523	self._compress_left = zipinfo.compress_size
[2]	524
[391]	525	if self._compress_type == ZIP_DEFLATED:
	526	self._decompressor = zlib.decompressobj(-15)
	527	elif self._compress_type != ZIP_STORED:
	528	descr = compressor_names.get(self._compress_type)
	529	if descr:
	530	raise NotImplementedError("compression type %d (%s)" % (self._compress_type, descr))
	531	else:
	532	raise NotImplementedError("compression type %d" % (self._compress_type,))
	533	self._unconsumed = ''
[2]	534
[391]	535	self._readbuffer = ''
	536	self._offset = 0
[2]	537
[391]	538	self._universal = 'U' in mode
	539	self.newlines = None
[2]	540
[391]	541	# Adjust read size for encrypted files since the first 12 bytes
	542	# are for the encryption/password information.
	543	if self._decrypter is not None:
	544	self._compress_left -= 12
[2]	545
[391]	546	self.mode = mode
	547	self.name = zipinfo.filename
[2]	548
[391]	549	if hasattr(zipinfo, 'CRC'):
	550	self._expected_crc = zipinfo.CRC
	551	self._running_crc = crc32(b'') & 0xffffffff
	552	else:
	553	self._expected_crc = None
[2]	554
[391]	555	def readline(self, limit=-1):
	556	"""Read and return a line from the stream.
[2]	557
[391]	558	If limit is specified, at most limit bytes will be read.
[2]	559	"""
	560
[391]	561	if not self._universal and limit < 0:
	562	# Shortcut common case - newline found in buffer.
	563	i = self._readbuffer.find('\n', self._offset) + 1
	564	if i > 0:
	565	line = self._readbuffer[self._offset: i]
	566	self._offset = i
	567	return line
[2]	568
[391]	569	if not self._universal:
	570	return io.BufferedIOBase.readline(self, limit)
[2]	571
[391]	572	line = ''
	573	while limit < 0 or len(line) < limit:
	574	readahead = self.peek(2)
	575	if readahead == '':
	576	return line
[2]	577
[391]	578	#
	579	# Search for universal newlines or line chunks.
	580	#
	581	# The pattern returns either a line chunk or a newline, but not
	582	# both. Combined with peek(2), we are assured that the sequence
	583	# '\r\n' is always retrieved completely and never split into
	584	# separate newlines - '\r', '\n' due to coincidental readaheads.
	585	#
	586	match = self.PATTERN.search(readahead)
	587	newline = match.group('newline')
	588	if newline is not None:
	589	if self.newlines is None:
	590	self.newlines = []
	591	if newline not in self.newlines:
	592	self.newlines.append(newline)
	593	self._offset += len(newline)
	594	return line + '\n'
[2]	595
[391]	596	chunk = match.group('chunk')
	597	if limit >= 0:
	598	chunk = chunk[: limit - len(line)]
[2]	599
[391]	600	self._offset += len(chunk)
	601	line += chunk
[2]	602
[391]	603	return line
	604
	605	def peek(self, n=1):
	606	"""Returns buffered bytes without advancing the position."""
	607	if n > len(self._readbuffer) - self._offset:
	608	chunk = self.read(n)
	609	self._offset -= len(chunk)
	610
	611	# Return up to 512 bytes to reduce allocation overhead for tight loops.
	612	return self._readbuffer[self._offset: self._offset + 512]
	613
	614	def readable(self):
	615	return True
	616
	617	def read(self, n=-1):
	618	"""Read and return up to n bytes.
	619	If the argument is omitted, None, or negative, data is read and returned until EOF is reached..
[2]	620	"""
[391]	621	buf = ''
	622	if n is None:
	623	n = -1
[2]	624	while True:
[391]	625	if n < 0:
	626	data = self.read1(n)
	627	elif n > len(buf):
	628	data = self.read1(n - len(buf))
	629	else:
	630	return buf
	631	if len(data) == 0:
	632	return buf
	633	buf += data
[2]	634
[391]	635	def _update_crc(self, newdata, eof):
	636	# Update the CRC using the given data.
	637	if self._expected_crc is None:
	638	# No need to compute the CRC if we don't have a reference value
	639	return
	640	self._running_crc = crc32(newdata, self._running_crc) & 0xffffffff
	641	# Check the CRC if we're at the end of the file
	642	if eof and self._running_crc != self._expected_crc:
	643	raise BadZipfile("Bad CRC-32 for file %r" % self.name)
[2]	644
[391]	645	def read1(self, n):
	646	"""Read up to n bytes with at most one read() system call."""
[2]	647
[391]	648	# Simplify algorithm (branching) by transforming negative n to large n.
	649	if n < 0 or n is None:
	650	n = self.MAX_N
[2]	651
[391]	652	# Bytes available in read buffer.
	653	len_readbuffer = len(self._readbuffer) - self._offset
[2]	654
[391]	655	# Read from file.
	656	if self._compress_left > 0 and n > len_readbuffer + len(self._unconsumed):
	657	nbytes = n - len_readbuffer - len(self._unconsumed)
	658	nbytes = max(nbytes, self.MIN_READ_SIZE)
	659	nbytes = min(nbytes, self._compress_left)
[2]	660
[391]	661	data = self._fileobj.read(nbytes)
	662	self._compress_left -= len(data)
[2]	663
[391]	664	if data and self._decrypter is not None:
	665	data = ''.join(map(self._decrypter, data))
[2]	666
[391]	667	if self._compress_type == ZIP_STORED:
	668	self._update_crc(data, eof=(self._compress_left==0))
	669	self._readbuffer = self._readbuffer[self._offset:] + data
	670	self._offset = 0
	671	else:
	672	# Prepare deflated bytes for decompression.
	673	self._unconsumed += data
[2]	674
[391]	675	# Handle unconsumed data.
	676	if (len(self._unconsumed) > 0 and n > len_readbuffer and
	677	self._compress_type == ZIP_DEFLATED):
	678	data = self._decompressor.decompress(
	679	self._unconsumed,
	680	max(n - len_readbuffer, self.MIN_READ_SIZE)
	681	)
[2]	682
[391]	683	self._unconsumed = self._decompressor.unconsumed_tail
	684	eof = len(self._unconsumed) == 0 and self._compress_left == 0
	685	if eof:
	686	data += self._decompressor.flush()
[2]	687
[391]	688	self._update_crc(data, eof=eof)
	689	self._readbuffer = self._readbuffer[self._offset:] + data
	690	self._offset = 0
[2]	691
[391]	692	# Read from buffer.
	693	data = self._readbuffer[self._offset: self._offset + n]
	694	self._offset += len(data)
	695	return data
[2]	696
[391]	697	def close(self):
	698	try :
	699	if self._close_fileobj:
	700	self._fileobj.close()
	701	finally:
	702	super(ZipExtFile, self).close()
[2]	703
	704
[391]	705	class ZipFile(object):
[2]	706	""" Class with methods to open, read, write, close, list zip files.
	707
	708	z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=False)
	709
	710	file: Either the path to the file, or a file-like object.
	711	If it is a path, the file will be opened and closed by ZipFile.
	712	mode: The mode can be either read "r", write "w" or append "a".
	713	compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
	714	allowZip64: if True ZipFile will create files with ZIP64 extensions when
	715	needed, otherwise it will raise an exception when this would
	716	be necessary.
	717
	718	"""
	719
	720	fp = None # Set here since __del__ checks it
	721
	722	def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
	723	"""Open the ZIP file with mode read "r", write "w" or append "a"."""
	724	if mode not in ("r", "w", "a"):
	725	raise RuntimeError('ZipFile() requires mode "r", "w", or "a"')
	726
	727	if compression == ZIP_STORED:
	728	pass
	729	elif compression == ZIP_DEFLATED:
	730	if not zlib:
	731	raise RuntimeError,\
	732	"Compression requires the (missing) zlib module"
	733	else:
	734	raise RuntimeError, "That compression method is not supported"
	735
	736	self._allowZip64 = allowZip64
	737	self._didModify = False
	738	self.debug = 0 # Level of printing: 0 through 3
	739	self.NameToInfo = {} # Find file info given name
	740	self.filelist = [] # List of ZipInfo instances for archive
	741	self.compression = compression # Method of compression
	742	self.mode = key = mode.replace('b', '')[0]
	743	self.pwd = None
[391]	744	self._comment = ''
[2]	745
	746	# Check if we were passed a file-like object
	747	if isinstance(file, basestring):
	748	self._filePassed = 0
	749	self.filename = file
	750	modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
	751	try:
	752	self.fp = open(file, modeDict[mode])
	753	except IOError:
	754	if mode == 'a':
	755	mode = key = 'w'
	756	self.fp = open(file, modeDict[mode])
	757	else:
	758	raise
	759	else:
	760	self._filePassed = 1
	761	self.fp = file
	762	self.filename = getattr(file, 'name', None)
	763
[391]	764	try:
	765	if key == 'r':
[2]	766	self._RealGetContents()
[391]	767	elif key == 'w':
	768	# set the modified flag so central directory gets written
	769	# even if no files are added to the archive
	770	self._didModify = True
	771	elif key == 'a':
	772	try:
	773	# See if file is a zip file
	774	self._RealGetContents()
	775	# seek to start of directory and overwrite
	776	self.fp.seek(self.start_dir, 0)
	777	except BadZipfile:
	778	# file is not a zip file, just append
	779	self.fp.seek(0, 2)
[2]	780
[391]	781	# set the modified flag so central directory gets written
	782	# even if no files are added to the archive
	783	self._didModify = True
	784	else:
	785	raise RuntimeError('Mode must be "r", "w" or "a"')
	786	except:
	787	fp = self.fp
	788	self.fp = None
[2]	789	if not self._filePassed:
[391]	790	fp.close()
[2]	791	raise
	792
[391]	793	def __enter__(self):
	794	return self
	795
	796	def __exit__(self, type, value, traceback):
	797	self.close()
	798
[2]	799	def _RealGetContents(self):
	800	"""Read in the table of contents for the ZIP file."""
	801	fp = self.fp
[391]	802	try:
	803	endrec = _EndRecData(fp)
	804	except IOError:
	805	raise BadZipfile("File is not a zip file")
[2]	806	if not endrec:
	807	raise BadZipfile, "File is not a zip file"
	808	if self.debug > 1:
	809	print endrec
	810	size_cd = endrec[_ECD_SIZE] # bytes in central directory
	811	offset_cd = endrec[_ECD_OFFSET] # offset of central directory
[391]	812	self._comment = endrec[_ECD_COMMENT] # archive comment
[2]	813
	814	# "concat" is zero, unless zip was concatenated to another file
	815	concat = endrec[_ECD_LOCATION] - size_cd - offset_cd
	816	if endrec[_ECD_SIGNATURE] == stringEndArchive64:
	817	# If Zip64 extension structures are present, account for them
	818	concat -= (sizeEndCentDir64 + sizeEndCentDir64Locator)
	819
	820	if self.debug > 2:
	821	inferred = concat + offset_cd
	822	print "given, inferred, offset", offset_cd, inferred, concat
	823	# self.start_dir: Position of start of central directory
	824	self.start_dir = offset_cd + concat
	825	fp.seek(self.start_dir, 0)
	826	data = fp.read(size_cd)
	827	fp = cStringIO.StringIO(data)
	828	total = 0
	829	while total < size_cd:
	830	centdir = fp.read(sizeCentralDir)
[391]	831	if len(centdir) != sizeCentralDir:
	832	raise BadZipfile("Truncated central directory")
[2]	833	centdir = struct.unpack(structCentralDir, centdir)
[391]	834	if centdir[_CD_SIGNATURE] != stringCentralDir:
	835	raise BadZipfile("Bad magic number for central directory")
[2]	836	if self.debug > 2:
	837	print centdir
	838	filename = fp.read(centdir[_CD_FILENAME_LENGTH])
	839	# Create ZipInfo instance to store file information
	840	x = ZipInfo(filename)
	841	x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
	842	x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
	843	x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
	844	(x.create_version, x.create_system, x.extract_version, x.reserved,
	845	x.flag_bits, x.compress_type, t, d,
	846	x.CRC, x.compress_size, x.file_size) = centdir[1:12]
	847	x.volume, x.internal_attr, x.external_attr = centdir[15:18]
	848	# Convert date/time code to (year, month, day, hour, min, sec)
	849	x._raw_time = t
	850	x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
	851	t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
	852
	853	x._decodeExtra()
	854	x.header_offset = x.header_offset + concat
	855	x.filename = x._decodeFilename()
	856	self.filelist.append(x)
	857	self.NameToInfo[x.filename] = x
	858
	859	# update total bytes read from central directory
	860	total = (total + sizeCentralDir + centdir[_CD_FILENAME_LENGTH]
	861	+ centdir[_CD_EXTRA_FIELD_LENGTH]
	862	+ centdir[_CD_COMMENT_LENGTH])
	863
	864	if self.debug > 2:
	865	print "total", total
	866
	867
	868	def namelist(self):
	869	"""Return a list of file names in the archive."""
	870	l = []
	871	for data in self.filelist:
	872	l.append(data.filename)
	873	return l
	874
	875	def infolist(self):
	876	"""Return a list of class ZipInfo instances for files in the
	877	archive."""
	878	return self.filelist
	879
	880	def printdir(self):
	881	"""Print a table of contents for the zip file."""
	882	print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
	883	for zinfo in self.filelist:
	884	date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time[:6]
	885	print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
	886
	887	def testzip(self):
	888	"""Read all the files and check the CRC."""
	889	chunk_size = 2 ** 20
	890	for zinfo in self.filelist:
	891	try:
	892	# Read by chunks, to avoid an OverflowError or a
	893	# MemoryError with very large embedded files.
[391]	894	with self.open(zinfo.filename, "r") as f:
	895	while f.read(chunk_size): # Check CRC-32
	896	pass
[2]	897	except BadZipfile:
	898	return zinfo.filename
	899
	900	def getinfo(self, name):
	901	"""Return the instance of ZipInfo given 'name'."""
	902	info = self.NameToInfo.get(name)
	903	if info is None:
	904	raise KeyError(
	905	'There is no item named %r in the archive' % name)
	906
	907	return info
	908
	909	def setpassword(self, pwd):
	910	"""Set default password for encrypted files."""
	911	self.pwd = pwd
	912
[391]	913	@property
	914	def comment(self):
	915	"""The comment text associated with the ZIP file."""
	916	return self._comment
	917
	918	@comment.setter
	919	def comment(self, comment):
	920	# check for valid comment length
	921	if len(comment) >= ZIP_MAX_COMMENT:
	922	if self.debug:
	923	print('Archive comment is too long; truncating to %d bytes'
	924	% ZIP_MAX_COMMENT)
	925	comment = comment[:ZIP_MAX_COMMENT]
	926	self._comment = comment
	927	self._didModify = True
	928
[2]	929	def read(self, name, pwd=None):
	930	"""Return file bytes (as a string) for name."""
	931	return self.open(name, "r", pwd).read()
	932
	933	def open(self, name, mode="r", pwd=None):
	934	"""Return file-like object for 'name'."""
	935	if mode not in ("r", "U", "rU"):
	936	raise RuntimeError, 'open() requires mode "r", "U", or "rU"'
	937	if not self.fp:
	938	raise RuntimeError, \
	939	"Attempt to read ZIP archive that was already closed"
	940
	941	# Only open a new file for instances where we were not
	942	# given a file object in the constructor
	943	if self._filePassed:
	944	zef_file = self.fp
[391]	945	should_close = False
[2]	946	else:
	947	zef_file = open(self.filename, 'rb')
[391]	948	should_close = True
[2]	949
[391]	950	try:
	951	# Make sure we have an info object
	952	if isinstance(name, ZipInfo):
	953	# 'name' is already an info object
	954	zinfo = name
	955	else:
	956	# Get info object for name
	957	zinfo = self.getinfo(name)
[2]	958
[391]	959	zef_file.seek(zinfo.header_offset, 0)
[2]	960
[391]	961	# Skip the file header:
	962	fheader = zef_file.read(sizeFileHeader)
	963	if len(fheader) != sizeFileHeader:
	964	raise BadZipfile("Truncated file header")
	965	fheader = struct.unpack(structFileHeader, fheader)
	966	if fheader[_FH_SIGNATURE] != stringFileHeader:
	967	raise BadZipfile("Bad magic number for file header")
[2]	968
[391]	969	fname = zef_file.read(fheader[_FH_FILENAME_LENGTH])
	970	if fheader[_FH_EXTRA_FIELD_LENGTH]:
	971	zef_file.read(fheader[_FH_EXTRA_FIELD_LENGTH])
[2]	972
[391]	973	if fname != zinfo.orig_filename:
	974	raise BadZipfile, \
	975	'File name in directory "%s" and header "%s" differ.' % (
	976	zinfo.orig_filename, fname)
[2]	977
[391]	978	# check for encrypted flag & handle password
	979	is_encrypted = zinfo.flag_bits & 0x1
	980	zd = None
	981	if is_encrypted:
	982	if not pwd:
	983	pwd = self.pwd
	984	if not pwd:
	985	raise RuntimeError, "File %s is encrypted, " \
	986	"password required for extraction" % name
[2]	987
[391]	988	zd = _ZipDecrypter(pwd)
	989	# The first 12 bytes in the cypher stream is an encryption header
	990	# used to strengthen the algorithm. The first 11 bytes are
	991	# completely random, while the 12th contains the MSB of the CRC,
	992	# or the MSB of the file time depending on the header type
	993	# and is used to check the correctness of the password.
	994	bytes = zef_file.read(12)
	995	h = map(zd, bytes[0:12])
	996	if zinfo.flag_bits & 0x8:
	997	# compare against the file type from extended local headers
	998	check_byte = (zinfo._raw_time >> 8) & 0xff
	999	else:
	1000	# compare against the CRC otherwise
	1001	check_byte = (zinfo.CRC >> 24) & 0xff
	1002	if ord(h[11]) != check_byte:
	1003	raise RuntimeError("Bad password for file", name)
[2]	1004
[391]	1005	return ZipExtFile(zef_file, mode, zinfo, zd,
	1006	close_fileobj=should_close)
	1007	except:
	1008	if should_close:
	1009	zef_file.close()
	1010	raise
[2]	1011
	1012	def extract(self, member, path=None, pwd=None):
	1013	"""Extract a member from the archive to the current working directory,
	1014	using its full name. Its file information is extracted as accurately
	1015	as possible. `member' may be a filename or a ZipInfo object. You can
	1016	specify a different directory using `path'.
	1017	"""
	1018	if not isinstance(member, ZipInfo):
	1019	member = self.getinfo(member)
	1020
	1021	if path is None:
	1022	path = os.getcwd()
	1023
	1024	return self._extract_member(member, path, pwd)
	1025
	1026	def extractall(self, path=None, members=None, pwd=None):
	1027	"""Extract all members from the archive to the current working
	1028	directory. `path' specifies a different directory to extract to.
	1029	`members' is optional and must be a subset of the list returned
	1030	by namelist().
	1031	"""
	1032	if members is None:
	1033	members = self.namelist()
	1034
	1035	for zipinfo in members:
	1036	self.extract(zipinfo, path, pwd)
	1037
	1038	def _extract_member(self, member, targetpath, pwd):
	1039	"""Extract the ZipInfo object 'member' to a physical
	1040	file on the path targetpath.
	1041	"""
	1042	# build the destination pathname, replacing
	1043	# forward slashes to platform specific separators.
[391]	1044	arcname = member.filename.replace('/', os.path.sep)
[2]	1045
[391]	1046	if os.path.altsep:
	1047	arcname = arcname.replace(os.path.altsep, os.path.sep)
	1048	# interpret absolute pathname as relative, remove drive letter or
	1049	# UNC path, redundant separators, "." and ".." components.
	1050	arcname = os.path.splitdrive(arcname)[1]
	1051	arcname = os.path.sep.join(x for x in arcname.split(os.path.sep)
	1052	if x not in ('', os.path.curdir, os.path.pardir))
	1053	if os.path.sep == '\\':
	1054	# filter illegal characters on Windows
	1055	illegal = ':<>\|"?*'
	1056	if isinstance(arcname, unicode):
	1057	table = {ord(c): ord('_') for c in illegal}
	1058	else:
	1059	table = string.maketrans(illegal, '_' * len(illegal))
	1060	arcname = arcname.translate(table)
	1061	# remove trailing dots
	1062	arcname = (x.rstrip('.') for x in arcname.split(os.path.sep))
	1063	arcname = os.path.sep.join(x for x in arcname if x)
[2]	1064
[391]	1065	targetpath = os.path.join(targetpath, arcname)
[2]	1066	targetpath = os.path.normpath(targetpath)
	1067
	1068	# Create all upper directories if necessary.
	1069	upperdirs = os.path.dirname(targetpath)
	1070	if upperdirs and not os.path.exists(upperdirs):
	1071	os.makedirs(upperdirs)
	1072
	1073	if member.filename[-1] == '/':
	1074	if not os.path.isdir(targetpath):
	1075	os.mkdir(targetpath)
	1076	return targetpath
	1077
[391]	1078	with self.open(member, pwd=pwd) as source, \
	1079	file(targetpath, "wb") as target:
	1080	shutil.copyfileobj(source, target)
[2]	1081
	1082	return targetpath
	1083
	1084	def _writecheck(self, zinfo):
	1085	"""Check for errors before writing a file to the archive."""
	1086	if zinfo.filename in self.NameToInfo:
	1087	if self.debug: # Warning for duplicate names
	1088	print "Duplicate name:", zinfo.filename
	1089	if self.mode not in ("w", "a"):
	1090	raise RuntimeError, 'write() requires mode "w" or "a"'
	1091	if not self.fp:
	1092	raise RuntimeError, \
	1093	"Attempt to write ZIP archive that was already closed"
	1094	if zinfo.compress_type == ZIP_DEFLATED and not zlib:
	1095	raise RuntimeError, \
	1096	"Compression requires the (missing) zlib module"
	1097	if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
	1098	raise RuntimeError, \
	1099	"That compression method is not supported"
	1100	if zinfo.file_size > ZIP64_LIMIT:
	1101	if not self._allowZip64:
	1102	raise LargeZipFile("Filesize would require ZIP64 extensions")
	1103	if zinfo.header_offset > ZIP64_LIMIT:
	1104	if not self._allowZip64:
	1105	raise LargeZipFile("Zipfile size would require ZIP64 extensions")
	1106
	1107	def write(self, filename, arcname=None, compress_type=None):
	1108	"""Put the bytes from filename into the archive under the name
	1109	arcname."""
	1110	if not self.fp:
	1111	raise RuntimeError(
	1112	"Attempt to write to ZIP archive that was already closed")
	1113
	1114	st = os.stat(filename)
	1115	isdir = stat.S_ISDIR(st.st_mode)
	1116	mtime = time.localtime(st.st_mtime)
	1117	date_time = mtime[0:6]
	1118	# Create ZipInfo instance to store file information
	1119	if arcname is None:
	1120	arcname = filename
	1121	arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
	1122	while arcname[0] in (os.sep, os.altsep):
	1123	arcname = arcname[1:]
	1124	if isdir:
	1125	arcname += '/'
	1126	zinfo = ZipInfo(arcname, date_time)
	1127	zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
	1128	if compress_type is None:
	1129	zinfo.compress_type = self.compression
	1130	else:
	1131	zinfo.compress_type = compress_type
	1132
	1133	zinfo.file_size = st.st_size
	1134	zinfo.flag_bits = 0x00
	1135	zinfo.header_offset = self.fp.tell() # Start of header bytes
	1136
	1137	self._writecheck(zinfo)
	1138	self._didModify = True
	1139
	1140	if isdir:
	1141	zinfo.file_size = 0
	1142	zinfo.compress_size = 0
	1143	zinfo.CRC = 0
	1144	self.filelist.append(zinfo)
	1145	self.NameToInfo[zinfo.filename] = zinfo
[391]	1146	self.fp.write(zinfo.FileHeader(False))
[2]	1147	return
	1148
[391]	1149	with open(filename, "rb") as fp:
	1150	# Must overwrite CRC and sizes with correct data later
	1151	zinfo.CRC = CRC = 0
	1152	zinfo.compress_size = compress_size = 0
	1153	# Compressed size can be larger than uncompressed size
	1154	zip64 = self._allowZip64 and \
	1155	zinfo.file_size * 1.05 > ZIP64_LIMIT
	1156	self.fp.write(zinfo.FileHeader(zip64))
	1157	if zinfo.compress_type == ZIP_DEFLATED:
	1158	cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
	1159	zlib.DEFLATED, -15)
	1160	else:
	1161	cmpr = None
	1162	file_size = 0
	1163	while 1:
	1164	buf = fp.read(1024 * 8)
	1165	if not buf:
	1166	break
	1167	file_size = file_size + len(buf)
	1168	CRC = crc32(buf, CRC) & 0xffffffff
	1169	if cmpr:
	1170	buf = cmpr.compress(buf)
	1171	compress_size = compress_size + len(buf)
	1172	self.fp.write(buf)
[2]	1173	if cmpr:
	1174	buf = cmpr.flush()
	1175	compress_size = compress_size + len(buf)
	1176	self.fp.write(buf)
	1177	zinfo.compress_size = compress_size
	1178	else:
	1179	zinfo.compress_size = file_size
	1180	zinfo.CRC = CRC
	1181	zinfo.file_size = file_size
[391]	1182	if not zip64 and self._allowZip64:
	1183	if file_size > ZIP64_LIMIT:
	1184	raise RuntimeError('File size has increased during compressing')
	1185	if compress_size > ZIP64_LIMIT:
	1186	raise RuntimeError('Compressed size larger than uncompressed size')
	1187	# Seek backwards and write file header (which will now include
	1188	# correct CRC and file sizes)
[2]	1189	position = self.fp.tell() # Preserve current position in file
[391]	1190	self.fp.seek(zinfo.header_offset, 0)
	1191	self.fp.write(zinfo.FileHeader(zip64))
[2]	1192	self.fp.seek(position, 0)
	1193	self.filelist.append(zinfo)
	1194	self.NameToInfo[zinfo.filename] = zinfo
	1195
[391]	1196	def writestr(self, zinfo_or_arcname, bytes, compress_type=None):
[2]	1197	"""Write a file into the archive. The contents is the string
	1198	'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
	1199	the name of the file in the archive."""
	1200	if not isinstance(zinfo_or_arcname, ZipInfo):
	1201	zinfo = ZipInfo(filename=zinfo_or_arcname,
	1202	date_time=time.localtime(time.time())[:6])
[391]	1203
[2]	1204	zinfo.compress_type = self.compression
	1205	zinfo.external_attr = 0600 << 16
	1206	else:
	1207	zinfo = zinfo_or_arcname
	1208
	1209	if not self.fp:
	1210	raise RuntimeError(
	1211	"Attempt to write to ZIP archive that was already closed")
	1212
[391]	1213	if compress_type is not None:
	1214	zinfo.compress_type = compress_type
	1215
[2]	1216	zinfo.file_size = len(bytes) # Uncompressed size
	1217	zinfo.header_offset = self.fp.tell() # Start of header bytes
	1218	self._writecheck(zinfo)
	1219	self._didModify = True
	1220	zinfo.CRC = crc32(bytes) & 0xffffffff # CRC-32 checksum
	1221	if zinfo.compress_type == ZIP_DEFLATED:
	1222	co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
	1223	zlib.DEFLATED, -15)
	1224	bytes = co.compress(bytes) + co.flush()
	1225	zinfo.compress_size = len(bytes) # Compressed size
	1226	else:
	1227	zinfo.compress_size = zinfo.file_size
[391]	1228	zip64 = zinfo.file_size > ZIP64_LIMIT or \
	1229	zinfo.compress_size > ZIP64_LIMIT
	1230	if zip64 and not self._allowZip64:
	1231	raise LargeZipFile("Filesize would require ZIP64 extensions")
	1232	self.fp.write(zinfo.FileHeader(zip64))
[2]	1233	self.fp.write(bytes)
	1234	if zinfo.flag_bits & 0x08:
	1235	# Write CRC and file sizes after the file data
[391]	1236	fmt = '<LQQ' if zip64 else '<LLL'
	1237	self.fp.write(struct.pack(fmt, zinfo.CRC, zinfo.compress_size,
[2]	1238	zinfo.file_size))
[391]	1239	self.fp.flush()
[2]	1240	self.filelist.append(zinfo)
	1241	self.NameToInfo[zinfo.filename] = zinfo
	1242
	1243	def __del__(self):
	1244	"""Call the "close()" method in case the user forgot."""
	1245	self.close()
	1246
	1247	def close(self):
	1248	"""Close the file, and for mode "w" and "a" write the ending
	1249	records."""
	1250	if self.fp is None:
	1251	return
	1252
[391]	1253	try:
	1254	if self.mode in ("w", "a") and self._didModify: # write ending records
	1255	count = 0
	1256	pos1 = self.fp.tell()
	1257	for zinfo in self.filelist: # write central directory
	1258	count = count + 1
	1259	dt = zinfo.date_time
	1260	dosdate = (dt[0] - 1980) << 9 \| dt[1] << 5 \| dt[2]
	1261	dostime = dt[3] << 11 \| dt[4] << 5 \| (dt[5] // 2)
	1262	extra = []
	1263	if zinfo.file_size > ZIP64_LIMIT \
	1264	or zinfo.compress_size > ZIP64_LIMIT:
	1265	extra.append(zinfo.file_size)
	1266	extra.append(zinfo.compress_size)
	1267	file_size = 0xffffffff
	1268	compress_size = 0xffffffff
	1269	else:
	1270	file_size = zinfo.file_size
	1271	compress_size = zinfo.compress_size
[2]	1272
[391]	1273	if zinfo.header_offset > ZIP64_LIMIT:
	1274	extra.append(zinfo.header_offset)
	1275	header_offset = 0xffffffffL
	1276	else:
	1277	header_offset = zinfo.header_offset
[2]	1278
[391]	1279	extra_data = zinfo.extra
	1280	if extra:
	1281	# Append a ZIP64 field to the extra's
	1282	extra_data = struct.pack(
	1283	'<HH' + 'Q'*len(extra),
	1284	1, 8len(extra), extra) + extra_data
[2]	1285
[391]	1286	extract_version = max(45, zinfo.extract_version)
	1287	create_version = max(45, zinfo.create_version)
	1288	else:
	1289	extract_version = zinfo.extract_version
	1290	create_version = zinfo.create_version
[2]	1291
[391]	1292	try:
	1293	filename, flag_bits = zinfo._encodeFilenameFlags()
	1294	centdir = struct.pack(structCentralDir,
	1295	stringCentralDir, create_version,
	1296	zinfo.create_system, extract_version, zinfo.reserved,
	1297	flag_bits, zinfo.compress_type, dostime, dosdate,
	1298	zinfo.CRC, compress_size, file_size,
	1299	len(filename), len(extra_data), len(zinfo.comment),
	1300	0, zinfo.internal_attr, zinfo.external_attr,
	1301	header_offset)
	1302	except DeprecationWarning:
	1303	print >>sys.stderr, (structCentralDir,
	1304	stringCentralDir, create_version,
	1305	zinfo.create_system, extract_version, zinfo.reserved,
	1306	zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
	1307	zinfo.CRC, compress_size, file_size,
	1308	len(zinfo.filename), len(extra_data), len(zinfo.comment),
	1309	0, zinfo.internal_attr, zinfo.external_attr,
	1310	header_offset)
	1311	raise
	1312	self.fp.write(centdir)
	1313	self.fp.write(filename)
	1314	self.fp.write(extra_data)
	1315	self.fp.write(zinfo.comment)
[2]	1316
[391]	1317	pos2 = self.fp.tell()
	1318	# Write end-of-zip-archive record
	1319	centDirCount = count
	1320	centDirSize = pos2 - pos1
	1321	centDirOffset = pos1
	1322	if (centDirCount >= ZIP_FILECOUNT_LIMIT or
	1323	centDirOffset > ZIP64_LIMIT or
	1324	centDirSize > ZIP64_LIMIT):
	1325	# Need to write the ZIP64 end-of-archive records
	1326	zip64endrec = struct.pack(
	1327	structEndArchive64, stringEndArchive64,
	1328	44, 45, 45, 0, 0, centDirCount, centDirCount,
	1329	centDirSize, centDirOffset)
	1330	self.fp.write(zip64endrec)
[2]	1331
[391]	1332	zip64locrec = struct.pack(
	1333	structEndArchive64Locator,
	1334	stringEndArchive64Locator, 0, pos2, 1)
	1335	self.fp.write(zip64locrec)
	1336	centDirCount = min(centDirCount, 0xFFFF)
	1337	centDirSize = min(centDirSize, 0xFFFFFFFF)
	1338	centDirOffset = min(centDirOffset, 0xFFFFFFFF)
[2]	1339
[391]	1340	endrec = struct.pack(structEndArchive, stringEndArchive,
	1341	0, 0, centDirCount, centDirCount,
	1342	centDirSize, centDirOffset, len(self._comment))
	1343	self.fp.write(endrec)
	1344	self.fp.write(self._comment)
	1345	self.fp.flush()
	1346	finally:
	1347	fp = self.fp
	1348	self.fp = None
	1349	if not self._filePassed:
	1350	fp.close()
[2]	1351
	1352
	1353	class PyZipFile(ZipFile):
	1354	"""Class to create ZIP archives with Python library files and packages."""
	1355
	1356	def writepy(self, pathname, basename = ""):
	1357	"""Add all files from "pathname" to the ZIP archive.
	1358
	1359	If pathname is a package directory, search the directory and
	1360	all package subdirectories recursively for all *.py and enter
	1361	the modules into the archive. If pathname is a plain
	1362	directory, listdir *.py and enter all modules. Else, pathname
	1363	must be a Python *.py file and the module will be put into the
	1364	archive. Added modules are always module.pyo or module.pyc.
	1365	This method will compile the module.py into module.pyc if
	1366	necessary.
	1367	"""
	1368	dir, name = os.path.split(pathname)
	1369	if os.path.isdir(pathname):
	1370	initname = os.path.join(pathname, "__init__.py")
	1371	if os.path.isfile(initname):
	1372	# This is a package directory, add it
	1373	if basename:
	1374	basename = "%s/%s" % (basename, name)
	1375	else:
	1376	basename = name
	1377	if self.debug:
	1378	print "Adding package in", pathname, "as", basename
	1379	fname, arcname = self._get_codename(initname[0:-3], basename)
	1380	if self.debug:
	1381	print "Adding", arcname
	1382	self.write(fname, arcname)
	1383	dirlist = os.listdir(pathname)
	1384	dirlist.remove("__init__.py")
	1385	# Add all *.py files and package subdirectories
	1386	for filename in dirlist:
	1387	path = os.path.join(pathname, filename)
	1388	root, ext = os.path.splitext(filename)
	1389	if os.path.isdir(path):
	1390	if os.path.isfile(os.path.join(path, "__init__.py")):
	1391	# This is a package directory, add it
	1392	self.writepy(path, basename) # Recursive call
	1393	elif ext == ".py":
	1394	fname, arcname = self._get_codename(path[0:-3],
	1395	basename)
	1396	if self.debug:
	1397	print "Adding", arcname
	1398	self.write(fname, arcname)
	1399	else:
	1400	# This is NOT a package directory, add its files at top level
	1401	if self.debug:
	1402	print "Adding files from directory", pathname
	1403	for filename in os.listdir(pathname):
	1404	path = os.path.join(pathname, filename)
	1405	root, ext = os.path.splitext(filename)
	1406	if ext == ".py":
	1407	fname, arcname = self._get_codename(path[0:-3],
	1408	basename)
	1409	if self.debug:
	1410	print "Adding", arcname
	1411	self.write(fname, arcname)
	1412	else:
	1413	if pathname[-3:] != ".py":
	1414	raise RuntimeError, \
	1415	'Files added with writepy() must end with ".py"'
	1416	fname, arcname = self._get_codename(pathname[0:-3], basename)
	1417	if self.debug:
	1418	print "Adding file", arcname
	1419	self.write(fname, arcname)
	1420
	1421	def _get_codename(self, pathname, basename):
	1422	"""Return (filename, archivename) for the path.
	1423
	1424	Given a module name path, return the correct file path and
	1425	archive name, compiling if necessary. For example, given
	1426	/python/lib/string, return (/python/lib/string.pyc, string).
	1427	"""
	1428	file_py = pathname + ".py"
	1429	file_pyc = pathname + ".pyc"
	1430	file_pyo = pathname + ".pyo"
	1431	if os.path.isfile(file_pyo) and \
	1432	os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
	1433	fname = file_pyo # Use .pyo file
	1434	elif not os.path.isfile(file_pyc) or \
	1435	os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
	1436	import py_compile
	1437	if self.debug:
	1438	print "Compiling", file_py
	1439	try:
	1440	py_compile.compile(file_py, file_pyc, None, True)
	1441	except py_compile.PyCompileError,err:
	1442	print err.msg
	1443	fname = file_pyc
	1444	else:
	1445	fname = file_pyc
	1446	archivename = os.path.split(fname)[1]
	1447	if basename:
	1448	archivename = "%s/%s" % (basename, archivename)
	1449	return (fname, archivename)
	1450
	1451
	1452	def main(args = None):
	1453	import textwrap
	1454	USAGE=textwrap.dedent("""\
	1455	Usage:
	1456	zipfile.py -l zipfile.zip # Show listing of a zipfile
	1457	zipfile.py -t zipfile.zip # Test if a zipfile is valid
	1458	zipfile.py -e zipfile.zip target # Extract zipfile into target dir
	1459	zipfile.py -c zipfile.zip src ... # Create zipfile from sources
	1460	""")
	1461	if args is None:
	1462	args = sys.argv[1:]
	1463
	1464	if not args or args[0] not in ('-l', '-c', '-e', '-t'):
	1465	print USAGE
	1466	sys.exit(1)
	1467
	1468	if args[0] == '-l':
	1469	if len(args) != 2:
	1470	print USAGE
	1471	sys.exit(1)
[391]	1472	with ZipFile(args[1], 'r') as zf:
	1473	zf.printdir()
[2]	1474
	1475	elif args[0] == '-t':
	1476	if len(args) != 2:
	1477	print USAGE
	1478	sys.exit(1)
[391]	1479	with ZipFile(args[1], 'r') as zf:
	1480	badfile = zf.testzip()
	1481	if badfile:
	1482	print("The following enclosed file is corrupted: {!r}".format(badfile))
[2]	1483	print "Done testing"
	1484
	1485	elif args[0] == '-e':
	1486	if len(args) != 3:
	1487	print USAGE
	1488	sys.exit(1)
	1489
[391]	1490	with ZipFile(args[1], 'r') as zf:
	1491	out = args[2]
	1492	for path in zf.namelist():
	1493	if path.startswith('./'):
	1494	tgt = os.path.join(out, path[2:])
	1495	else:
	1496	tgt = os.path.join(out, path)
[2]	1497
[391]	1498	tgtdir = os.path.dirname(tgt)
	1499	if not os.path.exists(tgtdir):
	1500	os.makedirs(tgtdir)
	1501	with open(tgt, 'wb') as fp:
	1502	fp.write(zf.read(path))
[2]	1503
	1504	elif args[0] == '-c':
	1505	if len(args) < 3:
	1506	print USAGE
	1507	sys.exit(1)
	1508
	1509	def addToZip(zf, path, zippath):
	1510	if os.path.isfile(path):
	1511	zf.write(path, zippath, ZIP_DEFLATED)
	1512	elif os.path.isdir(path):
	1513	for nm in os.listdir(path):
	1514	addToZip(zf,
	1515	os.path.join(path, nm), os.path.join(zippath, nm))
	1516	# else: ignore
	1517
[391]	1518	with ZipFile(args[1], 'w', allowZip64=True) as zf:
	1519	for src in args[2:]:
	1520	addToZip(zf, src, os.path.basename(src))
[2]	1521
	1522	if __name__ == "__main__":
	1523	main()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/zipfile.py

Download in other formats: