1 | """
|
---|
2 | Read and write ZIP files.
|
---|
3 | """
|
---|
4 | import struct, os, time, sys
|
---|
5 | import binascii, cStringIO
|
---|
6 |
|
---|
7 | try:
|
---|
8 | import zlib # We may need its compression method
|
---|
9 | except ImportError:
|
---|
10 | zlib = None
|
---|
11 |
|
---|
12 | __all__ = ["BadZipfile", "error", "ZIP_STORED", "ZIP_DEFLATED", "is_zipfile",
|
---|
13 | "ZipInfo", "ZipFile", "PyZipFile", "LargeZipFile" ]
|
---|
14 |
|
---|
15 | class BadZipfile(Exception):
|
---|
16 | pass
|
---|
17 |
|
---|
18 |
|
---|
19 | class LargeZipFile(Exception):
|
---|
20 | """
|
---|
21 | Raised when writing a zipfile, the zipfile requires ZIP64 extensions
|
---|
22 | and those extensions are disabled.
|
---|
23 | """
|
---|
24 |
|
---|
25 | error = BadZipfile # The exception raised by this module
|
---|
26 |
|
---|
27 | ZIP64_LIMIT= (1 << 31) - 1
|
---|
28 |
|
---|
29 | # constants for Zip file compression methods
|
---|
30 | ZIP_STORED = 0
|
---|
31 | ZIP_DEFLATED = 8
|
---|
32 | # Other ZIP compression methods not supported
|
---|
33 |
|
---|
34 | # Here are some struct module formats for reading headers
|
---|
35 | structEndArchive = "<4s4H2lH" # 9 items, end of archive, 22 bytes
|
---|
36 | stringEndArchive = "PK\005\006" # magic number for end of archive record
|
---|
37 | structCentralDir = "<4s4B4HlLL5HLl"# 19 items, central directory, 46 bytes
|
---|
38 | stringCentralDir = "PK\001\002" # magic number for central directory
|
---|
39 | structFileHeader = "<4s2B4HlLL2H" # 12 items, file header record, 30 bytes
|
---|
40 | stringFileHeader = "PK\003\004" # magic number for file header
|
---|
41 | structEndArchive64Locator = "<4slql" # 4 items, locate Zip64 header, 20 bytes
|
---|
42 | stringEndArchive64Locator = "PK\x06\x07" # magic token for locator header
|
---|
43 | structEndArchive64 = "<4sqhhllqqqq" # 10 items, end of archive (Zip64), 56 bytes
|
---|
44 | stringEndArchive64 = "PK\x06\x06" # magic token for Zip64 header
|
---|
45 |
|
---|
46 |
|
---|
47 | # indexes of entries in the central directory structure
|
---|
48 | _CD_SIGNATURE = 0
|
---|
49 | _CD_CREATE_VERSION = 1
|
---|
50 | _CD_CREATE_SYSTEM = 2
|
---|
51 | _CD_EXTRACT_VERSION = 3
|
---|
52 | _CD_EXTRACT_SYSTEM = 4 # is this meaningful?
|
---|
53 | _CD_FLAG_BITS = 5
|
---|
54 | _CD_COMPRESS_TYPE = 6
|
---|
55 | _CD_TIME = 7
|
---|
56 | _CD_DATE = 8
|
---|
57 | _CD_CRC = 9
|
---|
58 | _CD_COMPRESSED_SIZE = 10
|
---|
59 | _CD_UNCOMPRESSED_SIZE = 11
|
---|
60 | _CD_FILENAME_LENGTH = 12
|
---|
61 | _CD_EXTRA_FIELD_LENGTH = 13
|
---|
62 | _CD_COMMENT_LENGTH = 14
|
---|
63 | _CD_DISK_NUMBER_START = 15
|
---|
64 | _CD_INTERNAL_FILE_ATTRIBUTES = 16
|
---|
65 | _CD_EXTERNAL_FILE_ATTRIBUTES = 17
|
---|
66 | _CD_LOCAL_HEADER_OFFSET = 18
|
---|
67 |
|
---|
68 | # indexes of entries in the local file header structure
|
---|
69 | _FH_SIGNATURE = 0
|
---|
70 | _FH_EXTRACT_VERSION = 1
|
---|
71 | _FH_EXTRACT_SYSTEM = 2 # is this meaningful?
|
---|
72 | _FH_GENERAL_PURPOSE_FLAG_BITS = 3
|
---|
73 | _FH_COMPRESSION_METHOD = 4
|
---|
74 | _FH_LAST_MOD_TIME = 5
|
---|
75 | _FH_LAST_MOD_DATE = 6
|
---|
76 | _FH_CRC = 7
|
---|
77 | _FH_COMPRESSED_SIZE = 8
|
---|
78 | _FH_UNCOMPRESSED_SIZE = 9
|
---|
79 | _FH_FILENAME_LENGTH = 10
|
---|
80 | _FH_EXTRA_FIELD_LENGTH = 11
|
---|
81 |
|
---|
82 | def is_zipfile(filename):
|
---|
83 | """Quickly see if file is a ZIP file by checking the magic number."""
|
---|
84 | try:
|
---|
85 | fpin = open(filename, "rb")
|
---|
86 | endrec = _EndRecData(fpin)
|
---|
87 | fpin.close()
|
---|
88 | if endrec:
|
---|
89 | return True # file has correct magic number
|
---|
90 | except IOError:
|
---|
91 | pass
|
---|
92 | return False
|
---|
93 |
|
---|
94 | def _EndRecData64(fpin, offset, endrec):
|
---|
95 | """
|
---|
96 | Read the ZIP64 end-of-archive records and use that to update endrec
|
---|
97 | """
|
---|
98 | locatorSize = struct.calcsize(structEndArchive64Locator)
|
---|
99 | fpin.seek(offset - locatorSize, 2)
|
---|
100 | data = fpin.read(locatorSize)
|
---|
101 | sig, diskno, reloff, disks = struct.unpack(structEndArchive64Locator, data)
|
---|
102 | if sig != stringEndArchive64Locator:
|
---|
103 | return endrec
|
---|
104 |
|
---|
105 | if diskno != 0 or disks != 1:
|
---|
106 | raise BadZipfile("zipfiles that span multiple disks are not supported")
|
---|
107 |
|
---|
108 | # Assume no 'zip64 extensible data'
|
---|
109 | endArchiveSize = struct.calcsize(structEndArchive64)
|
---|
110 | fpin.seek(offset - locatorSize - endArchiveSize, 2)
|
---|
111 | data = fpin.read(endArchiveSize)
|
---|
112 | sig, sz, create_version, read_version, disk_num, disk_dir, \
|
---|
113 | dircount, dircount2, dirsize, diroffset = \
|
---|
114 | struct.unpack(structEndArchive64, data)
|
---|
115 | if sig != stringEndArchive64:
|
---|
116 | return endrec
|
---|
117 |
|
---|
118 | # Update the original endrec using data from the ZIP64 record
|
---|
119 | endrec[1] = disk_num
|
---|
120 | endrec[2] = disk_dir
|
---|
121 | endrec[3] = dircount
|
---|
122 | endrec[4] = dircount2
|
---|
123 | endrec[5] = dirsize
|
---|
124 | endrec[6] = diroffset
|
---|
125 | return endrec
|
---|
126 |
|
---|
127 |
|
---|
128 | def _EndRecData(fpin):
|
---|
129 | """Return data from the "End of Central Directory" record, or None.
|
---|
130 |
|
---|
131 | The data is a list of the nine items in the ZIP "End of central dir"
|
---|
132 | record followed by a tenth item, the file seek offset of this record."""
|
---|
133 | fpin.seek(-22, 2) # Assume no archive comment.
|
---|
134 | filesize = fpin.tell() + 22 # Get file size
|
---|
135 | data = fpin.read()
|
---|
136 | if data[0:4] == stringEndArchive and data[-2:] == "\000\000":
|
---|
137 | endrec = struct.unpack(structEndArchive, data)
|
---|
138 | endrec = list(endrec)
|
---|
139 | endrec.append("") # Append the archive comment
|
---|
140 | endrec.append(filesize - 22) # Append the record start offset
|
---|
141 | if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
|
---|
142 | return _EndRecData64(fpin, -22, endrec)
|
---|
143 | return endrec
|
---|
144 | # Search the last END_BLOCK bytes of the file for the record signature.
|
---|
145 | # The comment is appended to the ZIP file and has a 16 bit length.
|
---|
146 | # So the comment may be up to 64K long. We limit the search for the
|
---|
147 | # signature to a few Kbytes at the end of the file for efficiency.
|
---|
148 | # also, the signature must not appear in the comment.
|
---|
149 | END_BLOCK = min(filesize, 1024 * 4)
|
---|
150 | fpin.seek(filesize - END_BLOCK, 0)
|
---|
151 | data = fpin.read()
|
---|
152 | start = data.rfind(stringEndArchive)
|
---|
153 | if start >= 0: # Correct signature string was found
|
---|
154 | endrec = struct.unpack(structEndArchive, data[start:start+22])
|
---|
155 | endrec = list(endrec)
|
---|
156 | comment = data[start+22:]
|
---|
157 | if endrec[7] == len(comment): # Comment length checks out
|
---|
158 | # Append the archive comment and start offset
|
---|
159 | endrec.append(comment)
|
---|
160 | endrec.append(filesize - END_BLOCK + start)
|
---|
161 | if endrec[-4] == -1 or endrec[-4] == 0xffffffff:
|
---|
162 | return _EndRecData64(fpin, - END_BLOCK + start, endrec)
|
---|
163 | return endrec
|
---|
164 | return # Error, return None
|
---|
165 |
|
---|
166 |
|
---|
167 | class ZipInfo (object):
|
---|
168 | """Class with attributes describing each file in the ZIP archive."""
|
---|
169 |
|
---|
170 | __slots__ = (
|
---|
171 | 'orig_filename',
|
---|
172 | 'filename',
|
---|
173 | 'date_time',
|
---|
174 | 'compress_type',
|
---|
175 | 'comment',
|
---|
176 | 'extra',
|
---|
177 | 'create_system',
|
---|
178 | 'create_version',
|
---|
179 | 'extract_version',
|
---|
180 | 'reserved',
|
---|
181 | 'flag_bits',
|
---|
182 | 'volume',
|
---|
183 | 'internal_attr',
|
---|
184 | 'external_attr',
|
---|
185 | 'header_offset',
|
---|
186 | 'CRC',
|
---|
187 | 'compress_size',
|
---|
188 | 'file_size',
|
---|
189 | )
|
---|
190 |
|
---|
191 | def __init__(self, filename="NoName", date_time=(1980,1,1,0,0,0)):
|
---|
192 | self.orig_filename = filename # Original file name in archive
|
---|
193 |
|
---|
194 | # Terminate the file name at the first null byte. Null bytes in file
|
---|
195 | # names are used as tricks by viruses in archives.
|
---|
196 | null_byte = filename.find(chr(0))
|
---|
197 | if null_byte >= 0:
|
---|
198 | filename = filename[0:null_byte]
|
---|
199 | # This is used to ensure paths in generated ZIP files always use
|
---|
200 | # forward slashes as the directory separator, as required by the
|
---|
201 | # ZIP format specification.
|
---|
202 | if os.sep != "/" and os.sep in filename:
|
---|
203 | filename = filename.replace(os.sep, "/")
|
---|
204 |
|
---|
205 | self.filename = filename # Normalized file name
|
---|
206 | self.date_time = date_time # year, month, day, hour, min, sec
|
---|
207 | # Standard values:
|
---|
208 | self.compress_type = ZIP_STORED # Type of compression for the file
|
---|
209 | self.comment = "" # Comment for each file
|
---|
210 | self.extra = "" # ZIP extra data
|
---|
211 | if sys.platform == 'win32':
|
---|
212 | self.create_system = 0 # System which created ZIP archive
|
---|
213 | else:
|
---|
214 | # Assume everything else is unix-y
|
---|
215 | self.create_system = 3 # System which created ZIP archive
|
---|
216 | self.create_version = 20 # Version which created ZIP archive
|
---|
217 | self.extract_version = 20 # Version needed to extract archive
|
---|
218 | self.reserved = 0 # Must be zero
|
---|
219 | self.flag_bits = 0 # ZIP flag bits
|
---|
220 | self.volume = 0 # Volume number of file header
|
---|
221 | self.internal_attr = 0 # Internal attributes
|
---|
222 | self.external_attr = 0 # External file attributes
|
---|
223 | # Other attributes are set by class ZipFile:
|
---|
224 | # header_offset Byte offset to the file header
|
---|
225 | # CRC CRC-32 of the uncompressed file
|
---|
226 | # compress_size Size of the compressed file
|
---|
227 | # file_size Size of the uncompressed file
|
---|
228 |
|
---|
229 | def FileHeader(self):
|
---|
230 | """Return the per-file header as a string."""
|
---|
231 | dt = self.date_time
|
---|
232 | dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
|
---|
233 | dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
|
---|
234 | if self.flag_bits & 0x08:
|
---|
235 | # Set these to zero because we write them after the file data
|
---|
236 | CRC = compress_size = file_size = 0
|
---|
237 | else:
|
---|
238 | CRC = self.CRC
|
---|
239 | compress_size = self.compress_size
|
---|
240 | file_size = self.file_size
|
---|
241 |
|
---|
242 | extra = self.extra
|
---|
243 |
|
---|
244 | if file_size > ZIP64_LIMIT or compress_size > ZIP64_LIMIT:
|
---|
245 | # File is larger than what fits into a 4 byte integer,
|
---|
246 | # fall back to the ZIP64 extension
|
---|
247 | fmt = '<hhqq'
|
---|
248 | extra = extra + struct.pack(fmt,
|
---|
249 | 1, struct.calcsize(fmt)-4, file_size, compress_size)
|
---|
250 | file_size = 0xffffffff # -1
|
---|
251 | compress_size = 0xffffffff # -1
|
---|
252 | self.extract_version = max(45, self.extract_version)
|
---|
253 | self.create_version = max(45, self.extract_version)
|
---|
254 |
|
---|
255 | header = struct.pack(structFileHeader, stringFileHeader,
|
---|
256 | self.extract_version, self.reserved, self.flag_bits,
|
---|
257 | self.compress_type, dostime, dosdate, CRC,
|
---|
258 | compress_size, file_size,
|
---|
259 | len(self.filename), len(extra))
|
---|
260 | return header + self.filename + extra
|
---|
261 |
|
---|
262 | def _decodeExtra(self):
|
---|
263 | # Try to decode the extra field.
|
---|
264 | extra = self.extra
|
---|
265 | unpack = struct.unpack
|
---|
266 | while extra:
|
---|
267 | tp, ln = unpack('<hh', extra[:4])
|
---|
268 | if tp == 1:
|
---|
269 | if ln >= 24:
|
---|
270 | counts = unpack('<qqq', extra[4:28])
|
---|
271 | elif ln == 16:
|
---|
272 | counts = unpack('<qq', extra[4:20])
|
---|
273 | elif ln == 8:
|
---|
274 | counts = unpack('<q', extra[4:12])
|
---|
275 | elif ln == 0:
|
---|
276 | counts = ()
|
---|
277 | else:
|
---|
278 | raise RuntimeError, "Corrupt extra field %s"%(ln,)
|
---|
279 |
|
---|
280 | idx = 0
|
---|
281 |
|
---|
282 | # ZIP64 extension (large files and/or large archives)
|
---|
283 | if self.file_size == -1 or self.file_size == 0xFFFFFFFFL:
|
---|
284 | self.file_size = counts[idx]
|
---|
285 | idx += 1
|
---|
286 |
|
---|
287 | if self.compress_size == -1 or self.compress_size == 0xFFFFFFFFL:
|
---|
288 | self.compress_size = counts[idx]
|
---|
289 | idx += 1
|
---|
290 |
|
---|
291 | if self.header_offset == -1 or self.header_offset == 0xffffffffL:
|
---|
292 | old = self.header_offset
|
---|
293 | self.header_offset = counts[idx]
|
---|
294 | idx+=1
|
---|
295 |
|
---|
296 | extra = extra[ln+4:]
|
---|
297 |
|
---|
298 |
|
---|
299 | class ZipFile:
|
---|
300 | """ Class with methods to open, read, write, close, list zip files.
|
---|
301 |
|
---|
302 | z = ZipFile(file, mode="r", compression=ZIP_STORED, allowZip64=True)
|
---|
303 |
|
---|
304 | file: Either the path to the file, or a file-like object.
|
---|
305 | If it is a path, the file will be opened and closed by ZipFile.
|
---|
306 | mode: The mode can be either read "r", write "w" or append "a".
|
---|
307 | compression: ZIP_STORED (no compression) or ZIP_DEFLATED (requires zlib).
|
---|
308 | allowZip64: if True ZipFile will create files with ZIP64 extensions when
|
---|
309 | needed, otherwise it will raise an exception when this would
|
---|
310 | be necessary.
|
---|
311 |
|
---|
312 | """
|
---|
313 |
|
---|
314 | fp = None # Set here since __del__ checks it
|
---|
315 |
|
---|
316 | def __init__(self, file, mode="r", compression=ZIP_STORED, allowZip64=False):
|
---|
317 | """Open the ZIP file with mode read "r", write "w" or append "a"."""
|
---|
318 | self._allowZip64 = allowZip64
|
---|
319 | self._didModify = False
|
---|
320 | if compression == ZIP_STORED:
|
---|
321 | pass
|
---|
322 | elif compression == ZIP_DEFLATED:
|
---|
323 | if not zlib:
|
---|
324 | raise RuntimeError,\
|
---|
325 | "Compression requires the (missing) zlib module"
|
---|
326 | else:
|
---|
327 | raise RuntimeError, "That compression method is not supported"
|
---|
328 | self.debug = 0 # Level of printing: 0 through 3
|
---|
329 | self.NameToInfo = {} # Find file info given name
|
---|
330 | self.filelist = [] # List of ZipInfo instances for archive
|
---|
331 | self.compression = compression # Method of compression
|
---|
332 | self.mode = key = mode.replace('b', '')[0]
|
---|
333 |
|
---|
334 | # Check if we were passed a file-like object
|
---|
335 | if isinstance(file, basestring):
|
---|
336 | self._filePassed = 0
|
---|
337 | self.filename = file
|
---|
338 | modeDict = {'r' : 'rb', 'w': 'wb', 'a' : 'r+b'}
|
---|
339 | self.fp = open(file, modeDict[mode])
|
---|
340 | else:
|
---|
341 | self._filePassed = 1
|
---|
342 | self.fp = file
|
---|
343 | self.filename = getattr(file, 'name', None)
|
---|
344 |
|
---|
345 | if key == 'r':
|
---|
346 | self._GetContents()
|
---|
347 | elif key == 'w':
|
---|
348 | pass
|
---|
349 | elif key == 'a':
|
---|
350 | try: # See if file is a zip file
|
---|
351 | self._RealGetContents()
|
---|
352 | # seek to start of directory and overwrite
|
---|
353 | self.fp.seek(self.start_dir, 0)
|
---|
354 | except BadZipfile: # file is not a zip file, just append
|
---|
355 | self.fp.seek(0, 2)
|
---|
356 | else:
|
---|
357 | if not self._filePassed:
|
---|
358 | self.fp.close()
|
---|
359 | self.fp = None
|
---|
360 | raise RuntimeError, 'Mode must be "r", "w" or "a"'
|
---|
361 |
|
---|
362 | def _GetContents(self):
|
---|
363 | """Read the directory, making sure we close the file if the format
|
---|
364 | is bad."""
|
---|
365 | try:
|
---|
366 | self._RealGetContents()
|
---|
367 | except BadZipfile:
|
---|
368 | if not self._filePassed:
|
---|
369 | self.fp.close()
|
---|
370 | self.fp = None
|
---|
371 | raise
|
---|
372 |
|
---|
373 | def _RealGetContents(self):
|
---|
374 | """Read in the table of contents for the ZIP file."""
|
---|
375 | fp = self.fp
|
---|
376 | endrec = _EndRecData(fp)
|
---|
377 | if not endrec:
|
---|
378 | raise BadZipfile, "File is not a zip file"
|
---|
379 | if self.debug > 1:
|
---|
380 | print endrec
|
---|
381 | size_cd = endrec[5] # bytes in central directory
|
---|
382 | offset_cd = endrec[6] # offset of central directory
|
---|
383 | self.comment = endrec[8] # archive comment
|
---|
384 | # endrec[9] is the offset of the "End of Central Dir" record
|
---|
385 | if endrec[9] > ZIP64_LIMIT:
|
---|
386 | x = endrec[9] - size_cd - 56 - 20
|
---|
387 | else:
|
---|
388 | x = endrec[9] - size_cd
|
---|
389 | # "concat" is zero, unless zip was concatenated to another file
|
---|
390 | concat = x - offset_cd
|
---|
391 | if self.debug > 2:
|
---|
392 | print "given, inferred, offset", offset_cd, x, concat
|
---|
393 | # self.start_dir: Position of start of central directory
|
---|
394 | self.start_dir = offset_cd + concat
|
---|
395 | fp.seek(self.start_dir, 0)
|
---|
396 | data = fp.read(size_cd)
|
---|
397 | fp = cStringIO.StringIO(data)
|
---|
398 | total = 0
|
---|
399 | while total < size_cd:
|
---|
400 | centdir = fp.read(46)
|
---|
401 | total = total + 46
|
---|
402 | if centdir[0:4] != stringCentralDir:
|
---|
403 | raise BadZipfile, "Bad magic number for central directory"
|
---|
404 | centdir = struct.unpack(structCentralDir, centdir)
|
---|
405 | if self.debug > 2:
|
---|
406 | print centdir
|
---|
407 | filename = fp.read(centdir[_CD_FILENAME_LENGTH])
|
---|
408 | # Create ZipInfo instance to store file information
|
---|
409 | x = ZipInfo(filename)
|
---|
410 | x.extra = fp.read(centdir[_CD_EXTRA_FIELD_LENGTH])
|
---|
411 | x.comment = fp.read(centdir[_CD_COMMENT_LENGTH])
|
---|
412 | total = (total + centdir[_CD_FILENAME_LENGTH]
|
---|
413 | + centdir[_CD_EXTRA_FIELD_LENGTH]
|
---|
414 | + centdir[_CD_COMMENT_LENGTH])
|
---|
415 | x.header_offset = centdir[_CD_LOCAL_HEADER_OFFSET]
|
---|
416 | (x.create_version, x.create_system, x.extract_version, x.reserved,
|
---|
417 | x.flag_bits, x.compress_type, t, d,
|
---|
418 | x.CRC, x.compress_size, x.file_size) = centdir[1:12]
|
---|
419 | x.volume, x.internal_attr, x.external_attr = centdir[15:18]
|
---|
420 | # Convert date/time code to (year, month, day, hour, min, sec)
|
---|
421 | x.date_time = ( (d>>9)+1980, (d>>5)&0xF, d&0x1F,
|
---|
422 | t>>11, (t>>5)&0x3F, (t&0x1F) * 2 )
|
---|
423 |
|
---|
424 | x._decodeExtra()
|
---|
425 | x.header_offset = x.header_offset + concat
|
---|
426 | self.filelist.append(x)
|
---|
427 | self.NameToInfo[x.filename] = x
|
---|
428 | if self.debug > 2:
|
---|
429 | print "total", total
|
---|
430 |
|
---|
431 |
|
---|
432 | def namelist(self):
|
---|
433 | """Return a list of file names in the archive."""
|
---|
434 | l = []
|
---|
435 | for data in self.filelist:
|
---|
436 | l.append(data.filename)
|
---|
437 | return l
|
---|
438 |
|
---|
439 | def infolist(self):
|
---|
440 | """Return a list of class ZipInfo instances for files in the
|
---|
441 | archive."""
|
---|
442 | return self.filelist
|
---|
443 |
|
---|
444 | def printdir(self):
|
---|
445 | """Print a table of contents for the zip file."""
|
---|
446 | print "%-46s %19s %12s" % ("File Name", "Modified ", "Size")
|
---|
447 | for zinfo in self.filelist:
|
---|
448 | date = "%d-%02d-%02d %02d:%02d:%02d" % zinfo.date_time
|
---|
449 | print "%-46s %s %12d" % (zinfo.filename, date, zinfo.file_size)
|
---|
450 |
|
---|
451 | def testzip(self):
|
---|
452 | """Read all the files and check the CRC."""
|
---|
453 | for zinfo in self.filelist:
|
---|
454 | try:
|
---|
455 | self.read(zinfo.filename) # Check CRC-32
|
---|
456 | except BadZipfile:
|
---|
457 | return zinfo.filename
|
---|
458 |
|
---|
459 |
|
---|
460 | def getinfo(self, name):
|
---|
461 | """Return the instance of ZipInfo given 'name'."""
|
---|
462 | return self.NameToInfo[name]
|
---|
463 |
|
---|
464 | def read(self, name):
|
---|
465 | """Return file bytes (as a string) for name."""
|
---|
466 | if self.mode not in ("r", "a"):
|
---|
467 | raise RuntimeError, 'read() requires mode "r" or "a"'
|
---|
468 | if not self.fp:
|
---|
469 | raise RuntimeError, \
|
---|
470 | "Attempt to read ZIP archive that was already closed"
|
---|
471 | zinfo = self.getinfo(name)
|
---|
472 | filepos = self.fp.tell()
|
---|
473 |
|
---|
474 | self.fp.seek(zinfo.header_offset, 0)
|
---|
475 |
|
---|
476 | # Skip the file header:
|
---|
477 | fheader = self.fp.read(30)
|
---|
478 | if fheader[0:4] != stringFileHeader:
|
---|
479 | raise BadZipfile, "Bad magic number for file header"
|
---|
480 |
|
---|
481 | fheader = struct.unpack(structFileHeader, fheader)
|
---|
482 | fname = self.fp.read(fheader[_FH_FILENAME_LENGTH])
|
---|
483 | if fheader[_FH_EXTRA_FIELD_LENGTH]:
|
---|
484 | self.fp.read(fheader[_FH_EXTRA_FIELD_LENGTH])
|
---|
485 |
|
---|
486 | if fname != zinfo.orig_filename:
|
---|
487 | raise BadZipfile, \
|
---|
488 | 'File name in directory "%s" and header "%s" differ.' % (
|
---|
489 | zinfo.orig_filename, fname)
|
---|
490 |
|
---|
491 | bytes = self.fp.read(zinfo.compress_size)
|
---|
492 | self.fp.seek(filepos, 0)
|
---|
493 | if zinfo.compress_type == ZIP_STORED:
|
---|
494 | pass
|
---|
495 | elif zinfo.compress_type == ZIP_DEFLATED:
|
---|
496 | if not zlib:
|
---|
497 | raise RuntimeError, \
|
---|
498 | "De-compression requires the (missing) zlib module"
|
---|
499 | # zlib compress/decompress code by Jeremy Hylton of CNRI
|
---|
500 | dc = zlib.decompressobj(-15)
|
---|
501 | bytes = dc.decompress(bytes)
|
---|
502 | # need to feed in unused pad byte so that zlib won't choke
|
---|
503 | ex = dc.decompress('Z') + dc.flush()
|
---|
504 | if ex:
|
---|
505 | bytes = bytes + ex
|
---|
506 | else:
|
---|
507 | raise BadZipfile, \
|
---|
508 | "Unsupported compression method %d for file %s" % \
|
---|
509 | (zinfo.compress_type, name)
|
---|
510 | crc = binascii.crc32(bytes)
|
---|
511 | if crc != zinfo.CRC:
|
---|
512 | raise BadZipfile, "Bad CRC-32 for file %s" % name
|
---|
513 | return bytes
|
---|
514 |
|
---|
515 | def _writecheck(self, zinfo):
|
---|
516 | """Check for errors before writing a file to the archive."""
|
---|
517 | if zinfo.filename in self.NameToInfo:
|
---|
518 | if self.debug: # Warning for duplicate names
|
---|
519 | print "Duplicate name:", zinfo.filename
|
---|
520 | if self.mode not in ("w", "a"):
|
---|
521 | raise RuntimeError, 'write() requires mode "w" or "a"'
|
---|
522 | if not self.fp:
|
---|
523 | raise RuntimeError, \
|
---|
524 | "Attempt to write ZIP archive that was already closed"
|
---|
525 | if zinfo.compress_type == ZIP_DEFLATED and not zlib:
|
---|
526 | raise RuntimeError, \
|
---|
527 | "Compression requires the (missing) zlib module"
|
---|
528 | if zinfo.compress_type not in (ZIP_STORED, ZIP_DEFLATED):
|
---|
529 | raise RuntimeError, \
|
---|
530 | "That compression method is not supported"
|
---|
531 | if zinfo.file_size > ZIP64_LIMIT:
|
---|
532 | if not self._allowZip64:
|
---|
533 | raise LargeZipFile("Filesize would require ZIP64 extensions")
|
---|
534 | if zinfo.header_offset > ZIP64_LIMIT:
|
---|
535 | if not self._allowZip64:
|
---|
536 | raise LargeZipFile("Zipfile size would require ZIP64 extensions")
|
---|
537 |
|
---|
538 | def write(self, filename, arcname=None, compress_type=None):
|
---|
539 | """Put the bytes from filename into the archive under the name
|
---|
540 | arcname."""
|
---|
541 | st = os.stat(filename)
|
---|
542 | mtime = time.localtime(st.st_mtime)
|
---|
543 | date_time = mtime[0:6]
|
---|
544 | # Create ZipInfo instance to store file information
|
---|
545 | if arcname is None:
|
---|
546 | arcname = filename
|
---|
547 | arcname = os.path.normpath(os.path.splitdrive(arcname)[1])
|
---|
548 | while arcname[0] in (os.sep, os.altsep):
|
---|
549 | arcname = arcname[1:]
|
---|
550 | zinfo = ZipInfo(arcname, date_time)
|
---|
551 | zinfo.external_attr = (st[0] & 0xFFFF) << 16L # Unix attributes
|
---|
552 | if compress_type is None:
|
---|
553 | zinfo.compress_type = self.compression
|
---|
554 | else:
|
---|
555 | zinfo.compress_type = compress_type
|
---|
556 |
|
---|
557 | zinfo.file_size = st.st_size
|
---|
558 | zinfo.flag_bits = 0x00
|
---|
559 | zinfo.header_offset = self.fp.tell() # Start of header bytes
|
---|
560 |
|
---|
561 | self._writecheck(zinfo)
|
---|
562 | self._didModify = True
|
---|
563 | fp = open(filename, "rb")
|
---|
564 | # Must overwrite CRC and sizes with correct data later
|
---|
565 | zinfo.CRC = CRC = 0
|
---|
566 | zinfo.compress_size = compress_size = 0
|
---|
567 | zinfo.file_size = file_size = 0
|
---|
568 | self.fp.write(zinfo.FileHeader())
|
---|
569 | if zinfo.compress_type == ZIP_DEFLATED:
|
---|
570 | cmpr = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
|
---|
571 | zlib.DEFLATED, -15)
|
---|
572 | else:
|
---|
573 | cmpr = None
|
---|
574 | while 1:
|
---|
575 | buf = fp.read(1024 * 8)
|
---|
576 | if not buf:
|
---|
577 | break
|
---|
578 | file_size = file_size + len(buf)
|
---|
579 | CRC = binascii.crc32(buf, CRC)
|
---|
580 | if cmpr:
|
---|
581 | buf = cmpr.compress(buf)
|
---|
582 | compress_size = compress_size + len(buf)
|
---|
583 | self.fp.write(buf)
|
---|
584 | fp.close()
|
---|
585 | if cmpr:
|
---|
586 | buf = cmpr.flush()
|
---|
587 | compress_size = compress_size + len(buf)
|
---|
588 | self.fp.write(buf)
|
---|
589 | zinfo.compress_size = compress_size
|
---|
590 | else:
|
---|
591 | zinfo.compress_size = file_size
|
---|
592 | zinfo.CRC = CRC
|
---|
593 | zinfo.file_size = file_size
|
---|
594 | # Seek backwards and write CRC and file sizes
|
---|
595 | position = self.fp.tell() # Preserve current position in file
|
---|
596 | self.fp.seek(zinfo.header_offset + 14, 0)
|
---|
597 | self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
|
---|
598 | zinfo.file_size))
|
---|
599 | self.fp.seek(position, 0)
|
---|
600 | self.filelist.append(zinfo)
|
---|
601 | self.NameToInfo[zinfo.filename] = zinfo
|
---|
602 |
|
---|
603 | def writestr(self, zinfo_or_arcname, bytes):
|
---|
604 | """Write a file into the archive. The contents is the string
|
---|
605 | 'bytes'. 'zinfo_or_arcname' is either a ZipInfo instance or
|
---|
606 | the name of the file in the archive."""
|
---|
607 | if not isinstance(zinfo_or_arcname, ZipInfo):
|
---|
608 | zinfo = ZipInfo(filename=zinfo_or_arcname,
|
---|
609 | date_time=time.localtime(time.time()))
|
---|
610 | zinfo.compress_type = self.compression
|
---|
611 | else:
|
---|
612 | zinfo = zinfo_or_arcname
|
---|
613 | zinfo.file_size = len(bytes) # Uncompressed size
|
---|
614 | zinfo.header_offset = self.fp.tell() # Start of header bytes
|
---|
615 | self._writecheck(zinfo)
|
---|
616 | self._didModify = True
|
---|
617 | zinfo.CRC = binascii.crc32(bytes) # CRC-32 checksum
|
---|
618 | if zinfo.compress_type == ZIP_DEFLATED:
|
---|
619 | co = zlib.compressobj(zlib.Z_DEFAULT_COMPRESSION,
|
---|
620 | zlib.DEFLATED, -15)
|
---|
621 | bytes = co.compress(bytes) + co.flush()
|
---|
622 | zinfo.compress_size = len(bytes) # Compressed size
|
---|
623 | else:
|
---|
624 | zinfo.compress_size = zinfo.file_size
|
---|
625 | zinfo.header_offset = self.fp.tell() # Start of header bytes
|
---|
626 | self.fp.write(zinfo.FileHeader())
|
---|
627 | self.fp.write(bytes)
|
---|
628 | self.fp.flush()
|
---|
629 | if zinfo.flag_bits & 0x08:
|
---|
630 | # Write CRC and file sizes after the file data
|
---|
631 | self.fp.write(struct.pack("<lLL", zinfo.CRC, zinfo.compress_size,
|
---|
632 | zinfo.file_size))
|
---|
633 | self.filelist.append(zinfo)
|
---|
634 | self.NameToInfo[zinfo.filename] = zinfo
|
---|
635 |
|
---|
636 | def __del__(self):
|
---|
637 | """Call the "close()" method in case the user forgot."""
|
---|
638 | self.close()
|
---|
639 |
|
---|
640 | def close(self):
|
---|
641 | """Close the file, and for mode "w" and "a" write the ending
|
---|
642 | records."""
|
---|
643 | if self.fp is None:
|
---|
644 | return
|
---|
645 |
|
---|
646 | if self.mode in ("w", "a") and self._didModify: # write ending records
|
---|
647 | count = 0
|
---|
648 | pos1 = self.fp.tell()
|
---|
649 | for zinfo in self.filelist: # write central directory
|
---|
650 | count = count + 1
|
---|
651 | dt = zinfo.date_time
|
---|
652 | dosdate = (dt[0] - 1980) << 9 | dt[1] << 5 | dt[2]
|
---|
653 | dostime = dt[3] << 11 | dt[4] << 5 | (dt[5] // 2)
|
---|
654 | extra = []
|
---|
655 | if zinfo.file_size > ZIP64_LIMIT \
|
---|
656 | or zinfo.compress_size > ZIP64_LIMIT:
|
---|
657 | extra.append(zinfo.file_size)
|
---|
658 | extra.append(zinfo.compress_size)
|
---|
659 | file_size = 0xffffffff #-1
|
---|
660 | compress_size = 0xffffffff #-1
|
---|
661 | else:
|
---|
662 | file_size = zinfo.file_size
|
---|
663 | compress_size = zinfo.compress_size
|
---|
664 |
|
---|
665 | if zinfo.header_offset > ZIP64_LIMIT:
|
---|
666 | extra.append(zinfo.header_offset)
|
---|
667 | header_offset = -1 # struct "l" format: 32 one bits
|
---|
668 | else:
|
---|
669 | header_offset = zinfo.header_offset
|
---|
670 |
|
---|
671 | extra_data = zinfo.extra
|
---|
672 | if extra:
|
---|
673 | # Append a ZIP64 field to the extra's
|
---|
674 | extra_data = struct.pack(
|
---|
675 | '<hh' + 'q'*len(extra),
|
---|
676 | 1, 8*len(extra), *extra) + extra_data
|
---|
677 |
|
---|
678 | extract_version = max(45, zinfo.extract_version)
|
---|
679 | create_version = max(45, zinfo.create_version)
|
---|
680 | else:
|
---|
681 | extract_version = zinfo.extract_version
|
---|
682 | create_version = zinfo.create_version
|
---|
683 |
|
---|
684 | centdir = struct.pack(structCentralDir,
|
---|
685 | stringCentralDir, create_version,
|
---|
686 | zinfo.create_system, extract_version, zinfo.reserved,
|
---|
687 | zinfo.flag_bits, zinfo.compress_type, dostime, dosdate,
|
---|
688 | zinfo.CRC, compress_size, file_size,
|
---|
689 | len(zinfo.filename), len(extra_data), len(zinfo.comment),
|
---|
690 | 0, zinfo.internal_attr, zinfo.external_attr,
|
---|
691 | header_offset)
|
---|
692 | self.fp.write(centdir)
|
---|
693 | self.fp.write(zinfo.filename)
|
---|
694 | self.fp.write(extra_data)
|
---|
695 | self.fp.write(zinfo.comment)
|
---|
696 |
|
---|
697 | pos2 = self.fp.tell()
|
---|
698 | # Write end-of-zip-archive record
|
---|
699 | if pos1 > ZIP64_LIMIT:
|
---|
700 | # Need to write the ZIP64 end-of-archive records
|
---|
701 | zip64endrec = struct.pack(
|
---|
702 | structEndArchive64, stringEndArchive64,
|
---|
703 | 44, 45, 45, 0, 0, count, count, pos2 - pos1, pos1)
|
---|
704 | self.fp.write(zip64endrec)
|
---|
705 |
|
---|
706 | zip64locrec = struct.pack(
|
---|
707 | structEndArchive64Locator,
|
---|
708 | stringEndArchive64Locator, 0, pos2, 1)
|
---|
709 | self.fp.write(zip64locrec)
|
---|
710 |
|
---|
711 | # XXX Why is `pos3` computed next? It's never referenced.
|
---|
712 | pos3 = self.fp.tell()
|
---|
713 | endrec = struct.pack(structEndArchive, stringEndArchive,
|
---|
714 | 0, 0, count, count, pos2 - pos1, -1, 0)
|
---|
715 | self.fp.write(endrec)
|
---|
716 |
|
---|
717 | else:
|
---|
718 | endrec = struct.pack(structEndArchive, stringEndArchive,
|
---|
719 | 0, 0, count, count, pos2 - pos1, pos1, 0)
|
---|
720 | self.fp.write(endrec)
|
---|
721 | self.fp.flush()
|
---|
722 | if not self._filePassed:
|
---|
723 | self.fp.close()
|
---|
724 | self.fp = None
|
---|
725 |
|
---|
726 |
|
---|
727 | class PyZipFile(ZipFile):
|
---|
728 | """Class to create ZIP archives with Python library files and packages."""
|
---|
729 |
|
---|
730 | def writepy(self, pathname, basename = ""):
|
---|
731 | """Add all files from "pathname" to the ZIP archive.
|
---|
732 |
|
---|
733 | If pathname is a package directory, search the directory and
|
---|
734 | all package subdirectories recursively for all *.py and enter
|
---|
735 | the modules into the archive. If pathname is a plain
|
---|
736 | directory, listdir *.py and enter all modules. Else, pathname
|
---|
737 | must be a Python *.py file and the module will be put into the
|
---|
738 | archive. Added modules are always module.pyo or module.pyc.
|
---|
739 | This method will compile the module.py into module.pyc if
|
---|
740 | necessary.
|
---|
741 | """
|
---|
742 | dir, name = os.path.split(pathname)
|
---|
743 | if os.path.isdir(pathname):
|
---|
744 | initname = os.path.join(pathname, "__init__.py")
|
---|
745 | if os.path.isfile(initname):
|
---|
746 | # This is a package directory, add it
|
---|
747 | if basename:
|
---|
748 | basename = "%s/%s" % (basename, name)
|
---|
749 | else:
|
---|
750 | basename = name
|
---|
751 | if self.debug:
|
---|
752 | print "Adding package in", pathname, "as", basename
|
---|
753 | fname, arcname = self._get_codename(initname[0:-3], basename)
|
---|
754 | if self.debug:
|
---|
755 | print "Adding", arcname
|
---|
756 | self.write(fname, arcname)
|
---|
757 | dirlist = os.listdir(pathname)
|
---|
758 | dirlist.remove("__init__.py")
|
---|
759 | # Add all *.py files and package subdirectories
|
---|
760 | for filename in dirlist:
|
---|
761 | path = os.path.join(pathname, filename)
|
---|
762 | root, ext = os.path.splitext(filename)
|
---|
763 | if os.path.isdir(path):
|
---|
764 | if os.path.isfile(os.path.join(path, "__init__.py")):
|
---|
765 | # This is a package directory, add it
|
---|
766 | self.writepy(path, basename) # Recursive call
|
---|
767 | elif ext == ".py":
|
---|
768 | fname, arcname = self._get_codename(path[0:-3],
|
---|
769 | basename)
|
---|
770 | if self.debug:
|
---|
771 | print "Adding", arcname
|
---|
772 | self.write(fname, arcname)
|
---|
773 | else:
|
---|
774 | # This is NOT a package directory, add its files at top level
|
---|
775 | if self.debug:
|
---|
776 | print "Adding files from directory", pathname
|
---|
777 | for filename in os.listdir(pathname):
|
---|
778 | path = os.path.join(pathname, filename)
|
---|
779 | root, ext = os.path.splitext(filename)
|
---|
780 | if ext == ".py":
|
---|
781 | fname, arcname = self._get_codename(path[0:-3],
|
---|
782 | basename)
|
---|
783 | if self.debug:
|
---|
784 | print "Adding", arcname
|
---|
785 | self.write(fname, arcname)
|
---|
786 | else:
|
---|
787 | if pathname[-3:] != ".py":
|
---|
788 | raise RuntimeError, \
|
---|
789 | 'Files added with writepy() must end with ".py"'
|
---|
790 | fname, arcname = self._get_codename(pathname[0:-3], basename)
|
---|
791 | if self.debug:
|
---|
792 | print "Adding file", arcname
|
---|
793 | self.write(fname, arcname)
|
---|
794 |
|
---|
795 | def _get_codename(self, pathname, basename):
|
---|
796 | """Return (filename, archivename) for the path.
|
---|
797 |
|
---|
798 | Given a module name path, return the correct file path and
|
---|
799 | archive name, compiling if necessary. For example, given
|
---|
800 | /python/lib/string, return (/python/lib/string.pyc, string).
|
---|
801 | """
|
---|
802 | file_py = pathname + ".py"
|
---|
803 | file_pyc = pathname + ".pyc"
|
---|
804 | file_pyo = pathname + ".pyo"
|
---|
805 | if os.path.isfile(file_pyo) and \
|
---|
806 | os.stat(file_pyo).st_mtime >= os.stat(file_py).st_mtime:
|
---|
807 | fname = file_pyo # Use .pyo file
|
---|
808 | elif not os.path.isfile(file_pyc) or \
|
---|
809 | os.stat(file_pyc).st_mtime < os.stat(file_py).st_mtime:
|
---|
810 | import py_compile
|
---|
811 | if self.debug:
|
---|
812 | print "Compiling", file_py
|
---|
813 | try:
|
---|
814 | py_compile.compile(file_py, file_pyc, None, True)
|
---|
815 | except py_compile.PyCompileError,err:
|
---|
816 | print err.msg
|
---|
817 | fname = file_pyc
|
---|
818 | else:
|
---|
819 | fname = file_pyc
|
---|
820 | archivename = os.path.split(fname)[1]
|
---|
821 | if basename:
|
---|
822 | archivename = "%s/%s" % (basename, archivename)
|
---|
823 | return (fname, archivename)
|
---|
824 |
|
---|
825 |
|
---|
826 | def main(args = None):
|
---|
827 | import textwrap
|
---|
828 | USAGE=textwrap.dedent("""\
|
---|
829 | Usage:
|
---|
830 | zipfile.py -l zipfile.zip # Show listing of a zipfile
|
---|
831 | zipfile.py -t zipfile.zip # Test if a zipfile is valid
|
---|
832 | zipfile.py -e zipfile.zip target # Extract zipfile into target dir
|
---|
833 | zipfile.py -c zipfile.zip src ... # Create zipfile from sources
|
---|
834 | """)
|
---|
835 | if args is None:
|
---|
836 | args = sys.argv[1:]
|
---|
837 |
|
---|
838 | if not args or args[0] not in ('-l', '-c', '-e', '-t'):
|
---|
839 | print USAGE
|
---|
840 | sys.exit(1)
|
---|
841 |
|
---|
842 | if args[0] == '-l':
|
---|
843 | if len(args) != 2:
|
---|
844 | print USAGE
|
---|
845 | sys.exit(1)
|
---|
846 | zf = ZipFile(args[1], 'r')
|
---|
847 | zf.printdir()
|
---|
848 | zf.close()
|
---|
849 |
|
---|
850 | elif args[0] == '-t':
|
---|
851 | if len(args) != 2:
|
---|
852 | print USAGE
|
---|
853 | sys.exit(1)
|
---|
854 | zf = ZipFile(args[1], 'r')
|
---|
855 | zf.testzip()
|
---|
856 | print "Done testing"
|
---|
857 |
|
---|
858 | elif args[0] == '-e':
|
---|
859 | if len(args) != 3:
|
---|
860 | print USAGE
|
---|
861 | sys.exit(1)
|
---|
862 |
|
---|
863 | zf = ZipFile(args[1], 'r')
|
---|
864 | out = args[2]
|
---|
865 | for path in zf.namelist():
|
---|
866 | if path.startswith('./'):
|
---|
867 | tgt = os.path.join(out, path[2:])
|
---|
868 | else:
|
---|
869 | tgt = os.path.join(out, path)
|
---|
870 |
|
---|
871 | tgtdir = os.path.dirname(tgt)
|
---|
872 | if not os.path.exists(tgtdir):
|
---|
873 | os.makedirs(tgtdir)
|
---|
874 | fp = open(tgt, 'wb')
|
---|
875 | fp.write(zf.read(path))
|
---|
876 | fp.close()
|
---|
877 | zf.close()
|
---|
878 |
|
---|
879 | elif args[0] == '-c':
|
---|
880 | if len(args) < 3:
|
---|
881 | print USAGE
|
---|
882 | sys.exit(1)
|
---|
883 |
|
---|
884 | def addToZip(zf, path, zippath):
|
---|
885 | if os.path.isfile(path):
|
---|
886 | zf.write(path, zippath, ZIP_DEFLATED)
|
---|
887 | elif os.path.isdir(path):
|
---|
888 | for nm in os.listdir(path):
|
---|
889 | addToZip(zf,
|
---|
890 | os.path.join(path, nm), os.path.join(zippath, nm))
|
---|
891 | # else: ignore
|
---|
892 |
|
---|
893 | zf = ZipFile(args[1], 'w', allowZip64=True)
|
---|
894 | for src in args[2:]:
|
---|
895 | addToZip(zf, src, os.path.basename(src))
|
---|
896 |
|
---|
897 | zf.close()
|
---|
898 |
|
---|
899 | if __name__ == "__main__":
|
---|
900 | main()
|
---|