Context Navigation

tarfile.py@ 609

Last change on this file since 609 was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 87.0 KB

Line
1	#!/usr/bin/env python
2	# -- coding: iso-8859-1 --
3	#-------------------------------------------------------------------
4	# tarfile.py
5	#-------------------------------------------------------------------
6	# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7	# All rights reserved.
8	#
9	# Permission is hereby granted, free of charge, to any person
10	# obtaining a copy of this software and associated documentation
11	# files (the "Software"), to deal in the Software without
12	# restriction, including without limitation the rights to use,
13	# copy, modify, merge, publish, distribute, sublicense, and/or sell
14	# copies of the Software, and to permit persons to whom the
15	# Software is furnished to do so, subject to the following
16	# conditions:
17	#
18	# The above copyright notice and this permission notice shall be
19	# included in all copies or substantial portions of the Software.
20	#
21	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22	# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23	# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24	# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25	# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26	# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27	# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28	# OTHER DEALINGS IN THE SOFTWARE.
29	#
30	"""Read from and write to tar format archives.
31	"""
32
33	__version__ = "$Revision: 85213 $"
34	# $Source$
35
36	version = "0.9.0"
37	__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38	__date__ = "$Date$"
39	__cvsid__ = "$Id$"
40	__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42	#---------
43	# Imports
44	#---------
45	import sys
46	import os
47	import shutil
48	import stat
49	import errno
50	import time
51	import struct
52	import copy
53	import re
54	import operator
55
56	try:
57	import grp, pwd
58	except ImportError:
59	grp = pwd = None
60
61	# from tarfile import *
62	__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
63
64	#---------------------------------------------------------
65	# tar constants
66	#---------------------------------------------------------
67	NUL = "\0" # the null character
68	BLOCKSIZE = 512 # length of processing blocks
69	RECORDSIZE = BLOCKSIZE * 20 # length of records
70	GNU_MAGIC = "ustar \0" # magic gnu tar string
71	POSIX_MAGIC = "ustar\x0000" # magic posix tar string
72
73	LENGTH_NAME = 100 # maximum length of a filename
74	LENGTH_LINK = 100 # maximum length of a linkname
75	LENGTH_PREFIX = 155 # maximum length of the prefix field
76
77	REGTYPE = "0" # regular file
78	AREGTYPE = "\0" # regular file
79	LNKTYPE = "1" # link (inside tarfile)
80	SYMTYPE = "2" # symbolic link
81	CHRTYPE = "3" # character special device
82	BLKTYPE = "4" # block special device
83	DIRTYPE = "5" # directory
84	FIFOTYPE = "6" # fifo special device
85	CONTTYPE = "7" # contiguous file
86
87	GNUTYPE_LONGNAME = "L" # GNU tar longname
88	GNUTYPE_LONGLINK = "K" # GNU tar longlink
89	GNUTYPE_SPARSE = "S" # GNU tar sparse file
90
91	XHDTYPE = "x" # POSIX.1-2001 extended header
92	XGLTYPE = "g" # POSIX.1-2001 global header
93	SOLARIS_XHDTYPE = "X" # Solaris extended header
94
95	USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
96	GNU_FORMAT = 1 # GNU tar format
97	PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
98	DEFAULT_FORMAT = GNU_FORMAT
99
100	#---------------------------------------------------------
101	# tarfile constants
102	#---------------------------------------------------------
103	# File types that tarfile supports:
104	SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
105	SYMTYPE, DIRTYPE, FIFOTYPE,
106	CONTTYPE, CHRTYPE, BLKTYPE,
107	GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
108	GNUTYPE_SPARSE)
109
110	# File types that will be treated as a regular file.
111	REGULAR_TYPES = (REGTYPE, AREGTYPE,
112	CONTTYPE, GNUTYPE_SPARSE)
113
114	# File types that are part of the GNU tar format.
115	GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
116	GNUTYPE_SPARSE)
117
118	# Fields from a pax header that override a TarInfo attribute.
119	PAX_FIELDS = ("path", "linkpath", "size", "mtime",
120	"uid", "gid", "uname", "gname")
121
122	# Fields in a pax header that are numbers, all other fields
123	# are treated as strings.
124	PAX_NUMBER_FIELDS = {
125	"atime": float,
126	"ctime": float,
127	"mtime": float,
128	"uid": int,
129	"gid": int,
130	"size": int
131	}
132
133	#---------------------------------------------------------
134	# Bits used in the mode field, values in octal.
135	#---------------------------------------------------------
136	S_IFLNK = 0120000 # symbolic link
137	S_IFREG = 0100000 # regular file
138	S_IFBLK = 0060000 # block device
139	S_IFDIR = 0040000 # directory
140	S_IFCHR = 0020000 # character device
141	S_IFIFO = 0010000 # fifo
142
143	TSUID = 04000 # set UID on execution
144	TSGID = 02000 # set GID on execution
145	TSVTX = 01000 # reserved
146
147	TUREAD = 0400 # read by owner
148	TUWRITE = 0200 # write by owner
149	TUEXEC = 0100 # execute/search by owner
150	TGREAD = 0040 # read by group
151	TGWRITE = 0020 # write by group
152	TGEXEC = 0010 # execute/search by group
153	TOREAD = 0004 # read by other
154	TOWRITE = 0002 # write by other
155	TOEXEC = 0001 # execute/search by other
156
157	#---------------------------------------------------------
158	# initialization
159	#---------------------------------------------------------
160	ENCODING = sys.getfilesystemencoding()
161	if ENCODING is None:
162	ENCODING = sys.getdefaultencoding()
163
164	#---------------------------------------------------------
165	# Some useful functions
166	#---------------------------------------------------------
167
168	def stn(s, length):
169	"""Convert a python string to a null-terminated string buffer.
170	"""
171	return s[:length] + (length - len(s)) * NUL
172
173	def nts(s):
174	"""Convert a null-terminated string field to a python string.
175	"""
176	# Use the string up to the first null char.
177	p = s.find("\0")
178	if p == -1:
179	return s
180	return s[:p]
181
182	def nti(s):
183	"""Convert a number field to a python number.
184	"""
185	# There are two possible encodings for a number field, see
186	# itn() below.
187	if s[0] != chr(0200):
188	try:
189	n = int(nts(s) or "0", 8)
190	except ValueError:
191	raise InvalidHeaderError("invalid header")
192	else:
193	n = 0L
194	for i in xrange(len(s) - 1):
195	n <<= 8
196	n += ord(s[i + 1])
197	return n
198
199	def itn(n, digits=8, format=DEFAULT_FORMAT):
200	"""Convert a python number to a number field.
201	"""
202	# POSIX 1003.1-1988 requires numbers to be encoded as a string of
203	# octal digits followed by a null-byte, this allows values up to
204	# (8**(digits-1))-1. GNU tar allows storing numbers greater than
205	# that if necessary. A leading 0200 byte indicates this particular
206	# encoding, the following digits-1 bytes are a big-endian
207	# representation. This allows values up to (256**(digits-1))-1.
208	if 0 <= n < 8 ** (digits - 1):
209	s = "%0*o" % (digits - 1, n) + NUL
210	else:
211	if format != GNU_FORMAT or n >= 256 ** (digits - 1):
212	raise ValueError("overflow in number field")
213
214	if n < 0:
215	# XXX We mimic GNU tar's behaviour with negative numbers,
216	# this could raise OverflowError.
217	n = struct.unpack("L", struct.pack("l", n))[0]
218
219	s = ""
220	for i in xrange(digits - 1):
221	s = chr(n & 0377) + s
222	n >>= 8
223	s = chr(0200) + s
224	return s
225
226	def uts(s, encoding, errors):
227	"""Convert a unicode object to a string.
228	"""
229	if errors == "utf-8":
230	# An extra error handler similar to the -o invalid=UTF-8 option
231	# in POSIX.1-2001. Replace untranslatable characters with their
232	# UTF-8 representation.
233	try:
234	return s.encode(encoding, "strict")
235	except UnicodeEncodeError:
236	x = []
237	for c in s:
238	try:
239	x.append(c.encode(encoding, "strict"))
240	except UnicodeEncodeError:
241	x.append(c.encode("utf8"))
242	return "".join(x)
243	else:
244	return s.encode(encoding, errors)
245
246	def calc_chksums(buf):
247	"""Calculate the checksum for a member's header by summing up all
248	characters except for the chksum field which is treated as if
249	it was filled with spaces. According to the GNU tar sources,
250	some tars (Sun and NeXT) calculate chksum with signed char,
251	which will be different if there are chars in the buffer with
252	the high bit set. So we calculate two checksums, unsigned and
253	signed.
254	"""
255	unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
256	signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
257	return unsigned_chksum, signed_chksum
258
259	def copyfileobj(src, dst, length=None):
260	"""Copy length bytes from fileobj src to fileobj dst.
261	If length is None, copy the entire content.
262	"""
263	if length == 0:
264	return
265	if length is None:
266	shutil.copyfileobj(src, dst)
267	return
268
269	BUFSIZE = 16 * 1024
270	blocks, remainder = divmod(length, BUFSIZE)
271	for b in xrange(blocks):
272	buf = src.read(BUFSIZE)
273	if len(buf) < BUFSIZE:
274	raise IOError("end of file reached")
275	dst.write(buf)
276
277	if remainder != 0:
278	buf = src.read(remainder)
279	if len(buf) < remainder:
280	raise IOError("end of file reached")
281	dst.write(buf)
282	return
283
284	filemode_table = (
285	((S_IFLNK, "l"),
286	(S_IFREG, "-"),
287	(S_IFBLK, "b"),
288	(S_IFDIR, "d"),
289	(S_IFCHR, "c"),
290	(S_IFIFO, "p")),
291
292	((TUREAD, "r"),),
293	((TUWRITE, "w"),),
294	((TUEXEC\|TSUID, "s"),
295	(TSUID, "S"),
296	(TUEXEC, "x")),
297
298	((TGREAD, "r"),),
299	((TGWRITE, "w"),),
300	((TGEXEC\|TSGID, "s"),
301	(TSGID, "S"),
302	(TGEXEC, "x")),
303
304	((TOREAD, "r"),),
305	((TOWRITE, "w"),),
306	((TOEXEC\|TSVTX, "t"),
307	(TSVTX, "T"),
308	(TOEXEC, "x"))
309	)
310
311	def filemode(mode):
312	"""Convert a file's mode to a string of the form
313	-rwxrwxrwx.
314	Used by TarFile.list()
315	"""
316	perm = []
317	for table in filemode_table:
318	for bit, char in table:
319	if mode & bit == bit:
320	perm.append(char)
321	break
322	else:
323	perm.append("-")
324	return "".join(perm)
325
326	class TarError(Exception):
327	"""Base exception."""
328	pass
329	class ExtractError(TarError):
330	"""General exception for extract errors."""
331	pass
332	class ReadError(TarError):
333	"""Exception for unreadable tar archives."""
334	pass
335	class CompressionError(TarError):
336	"""Exception for unavailable compression methods."""
337	pass
338	class StreamError(TarError):
339	"""Exception for unsupported operations on stream-like TarFiles."""
340	pass
341	class HeaderError(TarError):
342	"""Base exception for header errors."""
343	pass
344	class EmptyHeaderError(HeaderError):
345	"""Exception for empty headers."""
346	pass
347	class TruncatedHeaderError(HeaderError):
348	"""Exception for truncated headers."""
349	pass
350	class EOFHeaderError(HeaderError):
351	"""Exception for end of file headers."""
352	pass
353	class InvalidHeaderError(HeaderError):
354	"""Exception for invalid headers."""
355	pass
356	class SubsequentHeaderError(HeaderError):
357	"""Exception for missing and invalid extended headers."""
358	pass
359
360	#---------------------------
361	# internal stream interface
362	#---------------------------
363	class _LowLevelFile:
364	"""Low-level file object. Supports reading and writing.
365	It is used instead of a regular file object for streaming
366	access.
367	"""
368
369	def __init__(self, name, mode):
370	mode = {
371	"r": os.O_RDONLY,
372	"w": os.O_WRONLY \| os.O_CREAT \| os.O_TRUNC,
373	}[mode]
374	if hasattr(os, "O_BINARY"):
375	mode \|= os.O_BINARY
376	self.fd = os.open(name, mode, 0666)
377
378	def close(self):
379	os.close(self.fd)
380
381	def read(self, size):
382	return os.read(self.fd, size)
383
384	def write(self, s):
385	os.write(self.fd, s)
386
387	class _Stream:
388	"""Class that serves as an adapter between TarFile and
389	a stream-like object. The stream-like object only
390	needs to have a read() or write() method and is accessed
391	blockwise. Use of gzip or bzip2 compression is possible.
392	A stream-like object could be for example: sys.stdin,
393	sys.stdout, a socket, a tape device etc.
394
395	_Stream is intended to be used only internally.
396	"""
397
398	def __init__(self, name, mode, comptype, fileobj, bufsize):
399	"""Construct a _Stream object.
400	"""
401	self._extfileobj = True
402	if fileobj is None:
403	fileobj = _LowLevelFile(name, mode)
404	self._extfileobj = False
405
406	if comptype == '*':
407	# Enable transparent compression detection for the
408	# stream interface
409	fileobj = _StreamProxy(fileobj)
410	comptype = fileobj.getcomptype()
411
412	self.name = name or ""
413	self.mode = mode
414	self.comptype = comptype
415	self.fileobj = fileobj
416	self.bufsize = bufsize
417	self.buf = ""
418	self.pos = 0L
419	self.closed = False
420
421	if comptype == "gz":
422	try:
423	import zlib
424	except ImportError:
425	raise CompressionError("zlib module is not available")
426	self.zlib = zlib
427	self.crc = zlib.crc32("") & 0xffffffffL
428	if mode == "r":
429	self._init_read_gz()
430	else:
431	self._init_write_gz()
432
433	if comptype == "bz2":
434	try:
435	import bz2
436	except ImportError:
437	raise CompressionError("bz2 module is not available")
438	if mode == "r":
439	self.dbuf = ""
440	self.cmp = bz2.BZ2Decompressor()
441	else:
442	self.cmp = bz2.BZ2Compressor()
443
444	def __del__(self):
445	if hasattr(self, "closed") and not self.closed:
446	self.close()
447
448	def _init_write_gz(self):
449	"""Initialize for writing with gzip compression.
450	"""
451	self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
452	-self.zlib.MAX_WBITS,
453	self.zlib.DEF_MEM_LEVEL,
454	0)
455	timestamp = struct.pack("<L", long(time.time()))
456	self.__write("\037\213\010\010%s\002\377" % timestamp)
457	if type(self.name) is unicode:
458	self.name = self.name.encode("iso-8859-1", "replace")
459	if self.name.endswith(".gz"):
460	self.name = self.name[:-3]
461	self.__write(self.name + NUL)
462
463	def write(self, s):
464	"""Write string s to the stream.
465	"""
466	if self.comptype == "gz":
467	self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
468	self.pos += len(s)
469	if self.comptype != "tar":
470	s = self.cmp.compress(s)
471	self.__write(s)
472
473	def __write(self, s):
474	"""Write string s to the stream if a whole new block
475	is ready to be written.
476	"""
477	self.buf += s
478	while len(self.buf) > self.bufsize:
479	self.fileobj.write(self.buf[:self.bufsize])
480	self.buf = self.buf[self.bufsize:]
481
482	def close(self):
483	"""Close the _Stream object. No operation should be
484	done on it afterwards.
485	"""
486	if self.closed:
487	return
488
489	if self.mode == "w" and self.comptype != "tar":
490	self.buf += self.cmp.flush()
491
492	if self.mode == "w" and self.buf:
493	self.fileobj.write(self.buf)
494	self.buf = ""
495	if self.comptype == "gz":
496	# The native zlib crc is an unsigned 32-bit integer, but
497	# the Python wrapper implicitly casts that to a signed C
498	# long. So, on a 32-bit box self.crc may "look negative",
499	# while the same crc on a 64-bit box may "look positive".
500	# To avoid irksome warnings from the `struct` module, force
501	# it to look positive on all boxes.
502	self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
503	self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
504
505	if not self._extfileobj:
506	self.fileobj.close()
507
508	self.closed = True
509
510	def _init_read_gz(self):
511	"""Initialize for reading a gzip compressed fileobj.
512	"""
513	self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
514	self.dbuf = ""
515
516	# taken from gzip.GzipFile with some alterations
517	if self.__read(2) != "\037\213":
518	raise ReadError("not a gzip file")
519	if self.__read(1) != "\010":
520	raise CompressionError("unsupported compression method")
521
522	flag = ord(self.__read(1))
523	self.__read(6)
524
525	if flag & 4:
526	xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
527	self.read(xlen)
528	if flag & 8:
529	while True:
530	s = self.__read(1)
531	if not s or s == NUL:
532	break
533	if flag & 16:
534	while True:
535	s = self.__read(1)
536	if not s or s == NUL:
537	break
538	if flag & 2:
539	self.__read(2)
540
541	def tell(self):
542	"""Return the stream's file pointer position.
543	"""
544	return self.pos
545
546	def seek(self, pos=0):
547	"""Set the stream's file pointer to pos. Negative seeking
548	is forbidden.
549	"""
550	if pos - self.pos >= 0:
551	blocks, remainder = divmod(pos - self.pos, self.bufsize)
552	for i in xrange(blocks):
553	self.read(self.bufsize)
554	self.read(remainder)
555	else:
556	raise StreamError("seeking backwards is not allowed")
557	return self.pos
558
559	def read(self, size=None):
560	"""Return the next size number of bytes from the stream.
561	If size is not defined, return all bytes of the stream
562	up to EOF.
563	"""
564	if size is None:
565	t = []
566	while True:
567	buf = self._read(self.bufsize)
568	if not buf:
569	break
570	t.append(buf)
571	buf = "".join(t)
572	else:
573	buf = self._read(size)
574	self.pos += len(buf)
575	return buf
576
577	def _read(self, size):
578	"""Return size bytes from the stream.
579	"""
580	if self.comptype == "tar":
581	return self.__read(size)
582
583	c = len(self.dbuf)
584	t = [self.dbuf]
585	while c < size:
586	buf = self.__read(self.bufsize)
587	if not buf:
588	break
589	try:
590	buf = self.cmp.decompress(buf)
591	except IOError:
592	raise ReadError("invalid compressed data")
593	t.append(buf)
594	c += len(buf)
595	t = "".join(t)
596	self.dbuf = t[size:]
597	return t[:size]
598
599	def __read(self, size):
600	"""Return size bytes from stream. If internal buffer is empty,
601	read another block from the stream.
602	"""
603	c = len(self.buf)
604	t = [self.buf]
605	while c < size:
606	buf = self.fileobj.read(self.bufsize)
607	if not buf:
608	break
609	t.append(buf)
610	c += len(buf)
611	t = "".join(t)
612	self.buf = t[size:]
613	return t[:size]
614	# class _Stream
615
616	class _StreamProxy(object):
617	"""Small proxy class that enables transparent compression
618	detection for the Stream interface (mode 'r\|*').
619	"""
620
621	def __init__(self, fileobj):
622	self.fileobj = fileobj
623	self.buf = self.fileobj.read(BLOCKSIZE)
624
625	def read(self, size):
626	self.read = self.fileobj.read
627	return self.buf
628
629	def getcomptype(self):
630	if self.buf.startswith("\037\213\010"):
631	return "gz"
632	if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY":
633	return "bz2"
634	return "tar"
635
636	def close(self):
637	self.fileobj.close()
638	# class StreamProxy
639
640	class _BZ2Proxy(object):
641	"""Small proxy class that enables external file object
642	support for "r:bz2" and "w:bz2" modes. This is actually
643	a workaround for a limitation in bz2 module's BZ2File
644	class which (unlike gzip.GzipFile) has no support for
645	a file object argument.
646	"""
647
648	blocksize = 16 * 1024
649
650	def __init__(self, fileobj, mode):
651	self.fileobj = fileobj
652	self.mode = mode
653	self.name = getattr(self.fileobj, "name", None)
654	self.init()
655
656	def init(self):
657	import bz2
658	self.pos = 0
659	if self.mode == "r":
660	self.bz2obj = bz2.BZ2Decompressor()
661	self.fileobj.seek(0)
662	self.buf = ""
663	else:
664	self.bz2obj = bz2.BZ2Compressor()
665
666	def read(self, size):
667	b = [self.buf]
668	x = len(self.buf)
669	while x < size:
670	raw = self.fileobj.read(self.blocksize)
671	if not raw:
672	break
673	data = self.bz2obj.decompress(raw)
674	b.append(data)
675	x += len(data)
676	self.buf = "".join(b)
677
678	buf = self.buf[:size]
679	self.buf = self.buf[size:]
680	self.pos += len(buf)
681	return buf
682
683	def seek(self, pos):
684	if pos < self.pos:
685	self.init()
686	self.read(pos - self.pos)
687
688	def tell(self):
689	return self.pos
690
691	def write(self, data):
692	self.pos += len(data)
693	raw = self.bz2obj.compress(data)
694	self.fileobj.write(raw)
695
696	def close(self):
697	if self.mode == "w":
698	raw = self.bz2obj.flush()
699	self.fileobj.write(raw)
700	# class _BZ2Proxy
701
702	#------------------------
703	# Extraction file object
704	#------------------------
705	class _FileInFile(object):
706	"""A thin wrapper around an existing file object that
707	provides a part of its data as an individual file
708	object.
709	"""
710
711	def __init__(self, fileobj, offset, size, sparse=None):
712	self.fileobj = fileobj
713	self.offset = offset
714	self.size = size
715	self.sparse = sparse
716	self.position = 0
717
718	def tell(self):
719	"""Return the current file position.
720	"""
721	return self.position
722
723	def seek(self, position):
724	"""Seek to a position in the file.
725	"""
726	self.position = position
727
728	def read(self, size=None):
729	"""Read data from the file.
730	"""
731	if size is None:
732	size = self.size - self.position
733	else:
734	size = min(size, self.size - self.position)
735
736	if self.sparse is None:
737	return self.readnormal(size)
738	else:
739	return self.readsparse(size)
740
741	def readnormal(self, size):
742	"""Read operation for regular files.
743	"""
744	self.fileobj.seek(self.offset + self.position)
745	self.position += size
746	return self.fileobj.read(size)
747
748	def readsparse(self, size):
749	"""Read operation for sparse files.
750	"""
751	data = []
752	while size > 0:
753	buf = self.readsparsesection(size)
754	if not buf:
755	break
756	size -= len(buf)
757	data.append(buf)
758	return "".join(data)
759
760	def readsparsesection(self, size):
761	"""Read a single section of a sparse file.
762	"""
763	section = self.sparse.find(self.position)
764
765	if section is None:
766	return ""
767
768	size = min(size, section.offset + section.size - self.position)
769
770	if isinstance(section, _data):
771	realpos = section.realpos + self.position - section.offset
772	self.fileobj.seek(self.offset + realpos)
773	self.position += size
774	return self.fileobj.read(size)
775	else:
776	self.position += size
777	return NUL * size
778	#class _FileInFile
779
780
781	class ExFileObject(object):
782	"""File-like object for reading an archive member.
783	Is returned by TarFile.extractfile().
784	"""
785	blocksize = 1024
786
787	def __init__(self, tarfile, tarinfo):
788	self.fileobj = _FileInFile(tarfile.fileobj,
789	tarinfo.offset_data,
790	tarinfo.size,
791	getattr(tarinfo, "sparse", None))
792	self.name = tarinfo.name
793	self.mode = "r"
794	self.closed = False
795	self.size = tarinfo.size
796
797	self.position = 0
798	self.buffer = ""
799
800	def read(self, size=None):
801	"""Read at most size bytes from the file. If size is not
802	present or None, read all data until EOF is reached.
803	"""
804	if self.closed:
805	raise ValueError("I/O operation on closed file")
806
807	buf = ""
808	if self.buffer:
809	if size is None:
810	buf = self.buffer
811	self.buffer = ""
812	else:
813	buf = self.buffer[:size]
814	self.buffer = self.buffer[size:]
815
816	if size is None:
817	buf += self.fileobj.read()
818	else:
819	buf += self.fileobj.read(size - len(buf))
820
821	self.position += len(buf)
822	return buf
823
824	def readline(self, size=-1):
825	"""Read one entire line from the file. If size is present
826	and non-negative, return a string with at most that
827	size, which may be an incomplete line.
828	"""
829	if self.closed:
830	raise ValueError("I/O operation on closed file")
831
832	if "\n" in self.buffer:
833	pos = self.buffer.find("\n") + 1
834	else:
835	buffers = [self.buffer]
836	while True:
837	buf = self.fileobj.read(self.blocksize)
838	buffers.append(buf)
839	if not buf or "\n" in buf:
840	self.buffer = "".join(buffers)
841	pos = self.buffer.find("\n") + 1
842	if pos == 0:
843	# no newline found.
844	pos = len(self.buffer)
845	break
846
847	if size != -1:
848	pos = min(size, pos)
849
850	buf = self.buffer[:pos]
851	self.buffer = self.buffer[pos:]
852	self.position += len(buf)
853	return buf
854
855	def readlines(self):
856	"""Return a list with all remaining lines.
857	"""
858	result = []
859	while True:
860	line = self.readline()
861	if not line: break
862	result.append(line)
863	return result
864
865	def tell(self):
866	"""Return the current file position.
867	"""
868	if self.closed:
869	raise ValueError("I/O operation on closed file")
870
871	return self.position
872
873	def seek(self, pos, whence=os.SEEK_SET):
874	"""Seek to a position in the file.
875	"""
876	if self.closed:
877	raise ValueError("I/O operation on closed file")
878
879	if whence == os.SEEK_SET:
880	self.position = min(max(pos, 0), self.size)
881	elif whence == os.SEEK_CUR:
882	if pos < 0:
883	self.position = max(self.position + pos, 0)
884	else:
885	self.position = min(self.position + pos, self.size)
886	elif whence == os.SEEK_END:
887	self.position = max(min(self.size + pos, self.size), 0)
888	else:
889	raise ValueError("Invalid argument")
890
891	self.buffer = ""
892	self.fileobj.seek(self.position)
893
894	def close(self):
895	"""Close the file object.
896	"""
897	self.closed = True
898
899	def __iter__(self):
900	"""Get an iterator over the file's lines.
901	"""
902	while True:
903	line = self.readline()
904	if not line:
905	break
906	yield line
907	#class ExFileObject
908
909	#------------------
910	# Exported Classes
911	#------------------
912	class TarInfo(object):
913	"""Informational class which holds the details about an
914	archive member given by a tar header block.
915	TarInfo objects are returned by TarFile.getmember(),
916	TarFile.getmembers() and TarFile.gettarinfo() and are
917	usually created internally.
918	"""
919
920	def __init__(self, name=""):
921	"""Construct a TarInfo object. name is the optional name
922	of the member.
923	"""
924	self.name = name # member name
925	self.mode = 0644 # file permissions
926	self.uid = 0 # user id
927	self.gid = 0 # group id
928	self.size = 0 # file size
929	self.mtime = 0 # modification time
930	self.chksum = 0 # header checksum
931	self.type = REGTYPE # member type
932	self.linkname = "" # link name
933	self.uname = "" # user name
934	self.gname = "" # group name
935	self.devmajor = 0 # device major number
936	self.devminor = 0 # device minor number
937
938	self.offset = 0 # the tar header starts here
939	self.offset_data = 0 # the file's data starts here
940
941	self.pax_headers = {} # pax header information
942
943	# In pax headers the "name" and "linkname" field are called
944	# "path" and "linkpath".
945	def _getpath(self):
946	return self.name
947	def _setpath(self, name):
948	self.name = name
949	path = property(_getpath, _setpath)
950
951	def _getlinkpath(self):
952	return self.linkname
953	def _setlinkpath(self, linkname):
954	self.linkname = linkname
955	linkpath = property(_getlinkpath, _setlinkpath)
956
957	def __repr__(self):
958	return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
959
960	def get_info(self, encoding, errors):
961	"""Return the TarInfo's attributes as a dictionary.
962	"""
963	info = {
964	"name": self.name,
965	"mode": self.mode & 07777,
966	"uid": self.uid,
967	"gid": self.gid,
968	"size": self.size,
969	"mtime": self.mtime,
970	"chksum": self.chksum,
971	"type": self.type,
972	"linkname": self.linkname,
973	"uname": self.uname,
974	"gname": self.gname,
975	"devmajor": self.devmajor,
976	"devminor": self.devminor
977	}
978
979	if info["type"] == DIRTYPE and not info["name"].endswith("/"):
980	info["name"] += "/"
981
982	for key in ("name", "linkname", "uname", "gname"):
983	if type(info[key]) is unicode:
984	info[key] = info[key].encode(encoding, errors)
985
986	return info
987
988	def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
989	"""Return a tar header as a string of 512 byte blocks.
990	"""
991	info = self.get_info(encoding, errors)
992
993	if format == USTAR_FORMAT:
994	return self.create_ustar_header(info)
995	elif format == GNU_FORMAT:
996	return self.create_gnu_header(info)
997	elif format == PAX_FORMAT:
998	return self.create_pax_header(info, encoding, errors)
999	else:
1000	raise ValueError("invalid format")
1001
1002	def create_ustar_header(self, info):
1003	"""Return the object as a ustar header block.
1004	"""
1005	info["magic"] = POSIX_MAGIC
1006
1007	if len(info["linkname"]) > LENGTH_LINK:
1008	raise ValueError("linkname is too long")
1009
1010	if len(info["name"]) > LENGTH_NAME:
1011	info["prefix"], info["name"] = self._posix_split_name(info["name"])
1012
1013	return self._create_header(info, USTAR_FORMAT)
1014
1015	def create_gnu_header(self, info):
1016	"""Return the object as a GNU header block sequence.
1017	"""
1018	info["magic"] = GNU_MAGIC
1019
1020	buf = ""
1021	if len(info["linkname"]) > LENGTH_LINK:
1022	buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1023
1024	if len(info["name"]) > LENGTH_NAME:
1025	buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1026
1027	return buf + self._create_header(info, GNU_FORMAT)
1028
1029	def create_pax_header(self, info, encoding, errors):
1030	"""Return the object as a ustar header block. If it cannot be
1031	represented this way, prepend a pax extended header sequence
1032	with supplement information.
1033	"""
1034	info["magic"] = POSIX_MAGIC
1035	pax_headers = self.pax_headers.copy()
1036
1037	# Test string fields for values that exceed the field length or cannot
1038	# be represented in ASCII encoding.
1039	for name, hname, length in (
1040	("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1041	("uname", "uname", 32), ("gname", "gname", 32)):
1042
1043	if hname in pax_headers:
1044	# The pax header has priority.
1045	continue
1046
1047	val = info[name].decode(encoding, errors)
1048
1049	# Try to encode the string as ASCII.
1050	try:
1051	val.encode("ascii")
1052	except UnicodeEncodeError:
1053	pax_headers[hname] = val
1054	continue
1055
1056	if len(info[name]) > length:
1057	pax_headers[hname] = val
1058
1059	# Test number fields for values that exceed the field limit or values
1060	# that like to be stored as float.
1061	for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1062	if name in pax_headers:
1063	# The pax header has priority. Avoid overflow.
1064	info[name] = 0
1065	continue
1066
1067	val = info[name]
1068	if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1069	pax_headers[name] = unicode(val)
1070	info[name] = 0
1071
1072	# Create a pax extended header if necessary.
1073	if pax_headers:
1074	buf = self._create_pax_generic_header(pax_headers)
1075	else:
1076	buf = ""
1077
1078	return buf + self._create_header(info, USTAR_FORMAT)
1079
1080	@classmethod
1081	def create_pax_global_header(cls, pax_headers):
1082	"""Return the object as a pax global header block sequence.
1083	"""
1084	return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
1085
1086	def _posix_split_name(self, name):
1087	"""Split a name longer than 100 chars into a prefix
1088	and a name part.
1089	"""
1090	prefix = name[:LENGTH_PREFIX + 1]
1091	while prefix and prefix[-1] != "/":
1092	prefix = prefix[:-1]
1093
1094	name = name[len(prefix):]
1095	prefix = prefix[:-1]
1096
1097	if not prefix or len(name) > LENGTH_NAME:
1098	raise ValueError("name is too long")
1099	return prefix, name
1100
1101	@staticmethod
1102	def _create_header(info, format):
1103	"""Return a header block. info is a dictionary with file
1104	information, format must be one of the *_FORMAT constants.
1105	"""
1106	parts = [
1107	stn(info.get("name", ""), 100),
1108	itn(info.get("mode", 0) & 07777, 8, format),
1109	itn(info.get("uid", 0), 8, format),
1110	itn(info.get("gid", 0), 8, format),
1111	itn(info.get("size", 0), 12, format),
1112	itn(info.get("mtime", 0), 12, format),
1113	" ", # checksum field
1114	info.get("type", REGTYPE),
1115	stn(info.get("linkname", ""), 100),
1116	stn(info.get("magic", POSIX_MAGIC), 8),
1117	stn(info.get("uname", ""), 32),
1118	stn(info.get("gname", ""), 32),
1119	itn(info.get("devmajor", 0), 8, format),
1120	itn(info.get("devminor", 0), 8, format),
1121	stn(info.get("prefix", ""), 155)
1122	]
1123
1124	buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1125	chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1126	buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1127	return buf
1128
1129	@staticmethod
1130	def _create_payload(payload):
1131	"""Return the string payload filled with zero bytes
1132	up to the next 512 byte border.
1133	"""
1134	blocks, remainder = divmod(len(payload), BLOCKSIZE)
1135	if remainder > 0:
1136	payload += (BLOCKSIZE - remainder) * NUL
1137	return payload
1138
1139	@classmethod
1140	def _create_gnu_long_header(cls, name, type):
1141	"""Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1142	for name.
1143	"""
1144	name += NUL
1145
1146	info = {}
1147	info["name"] = "././@LongLink"
1148	info["type"] = type
1149	info["size"] = len(name)
1150	info["magic"] = GNU_MAGIC
1151
1152	# create extended header + name blocks.
1153	return cls._create_header(info, USTAR_FORMAT) + \
1154	cls._create_payload(name)
1155
1156	@classmethod
1157	def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1158	"""Return a POSIX.1-2001 extended or global header sequence
1159	that contains a list of keyword, value pairs. The values
1160	must be unicode objects.
1161	"""
1162	records = []
1163	for keyword, value in pax_headers.iteritems():
1164	keyword = keyword.encode("utf8")
1165	value = value.encode("utf8")
1166	l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1167	n = p = 0
1168	while True:
1169	n = l + len(str(p))
1170	if n == p:
1171	break
1172	p = n
1173	records.append("%d %s=%s\n" % (p, keyword, value))
1174	records = "".join(records)
1175
1176	# We use a hardcoded "././@PaxHeader" name like star does
1177	# instead of the one that POSIX recommends.
1178	info = {}
1179	info["name"] = "././@PaxHeader"
1180	info["type"] = type
1181	info["size"] = len(records)
1182	info["magic"] = POSIX_MAGIC
1183
1184	# Create pax header + record blocks.
1185	return cls._create_header(info, USTAR_FORMAT) + \
1186	cls._create_payload(records)
1187
1188	@classmethod
1189	def frombuf(cls, buf):
1190	"""Construct a TarInfo object from a 512 byte string buffer.
1191	"""
1192	if len(buf) == 0:
1193	raise EmptyHeaderError("empty header")
1194	if len(buf) != BLOCKSIZE:
1195	raise TruncatedHeaderError("truncated header")
1196	if buf.count(NUL) == BLOCKSIZE:
1197	raise EOFHeaderError("end of file header")
1198
1199	chksum = nti(buf[148:156])
1200	if chksum not in calc_chksums(buf):
1201	raise InvalidHeaderError("bad checksum")
1202
1203	obj = cls()
1204	obj.buf = buf
1205	obj.name = nts(buf[0:100])
1206	obj.mode = nti(buf[100:108])
1207	obj.uid = nti(buf[108:116])
1208	obj.gid = nti(buf[116:124])
1209	obj.size = nti(buf[124:136])
1210	obj.mtime = nti(buf[136:148])
1211	obj.chksum = chksum
1212	obj.type = buf[156:157]
1213	obj.linkname = nts(buf[157:257])
1214	obj.uname = nts(buf[265:297])
1215	obj.gname = nts(buf[297:329])
1216	obj.devmajor = nti(buf[329:337])
1217	obj.devminor = nti(buf[337:345])
1218	prefix = nts(buf[345:500])
1219
1220	# Old V7 tar format represents a directory as a regular
1221	# file with a trailing slash.
1222	if obj.type == AREGTYPE and obj.name.endswith("/"):
1223	obj.type = DIRTYPE
1224
1225	# Remove redundant slashes from directories.
1226	if obj.isdir():
1227	obj.name = obj.name.rstrip("/")
1228
1229	# Reconstruct a ustar longname.
1230	if prefix and obj.type not in GNU_TYPES:
1231	obj.name = prefix + "/" + obj.name
1232	return obj
1233
1234	@classmethod
1235	def fromtarfile(cls, tarfile):
1236	"""Return the next TarInfo object from TarFile object
1237	tarfile.
1238	"""
1239	buf = tarfile.fileobj.read(BLOCKSIZE)
1240	obj = cls.frombuf(buf)
1241	obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1242	return obj._proc_member(tarfile)
1243
1244	#--------------------------------------------------------------------------
1245	# The following are methods that are called depending on the type of a
1246	# member. The entry point is _proc_member() which can be overridden in a
1247	# subclass to add custom _proc_() methods. A _proc_() method MUST
1248	# implement the following
1249	# operations:
1250	# 1. Set self.offset_data to the position where the data blocks begin,
1251	# if there is data that follows.
1252	# 2. Set tarfile.offset to the position where the next member's header will
1253	# begin.
1254	# 3. Return self or another valid TarInfo object.
1255	def _proc_member(self, tarfile):
1256	"""Choose the right processing method depending on
1257	the type and call it.
1258	"""
1259	if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1260	return self._proc_gnulong(tarfile)
1261	elif self.type == GNUTYPE_SPARSE:
1262	return self._proc_sparse(tarfile)
1263	elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1264	return self._proc_pax(tarfile)
1265	else:
1266	return self._proc_builtin(tarfile)
1267
1268	def _proc_builtin(self, tarfile):
1269	"""Process a builtin type or an unknown type which
1270	will be treated as a regular file.
1271	"""
1272	self.offset_data = tarfile.fileobj.tell()
1273	offset = self.offset_data
1274	if self.isreg() or self.type not in SUPPORTED_TYPES:
1275	# Skip the following data blocks.
1276	offset += self._block(self.size)
1277	tarfile.offset = offset
1278
1279	# Patch the TarInfo object with saved global
1280	# header information.
1281	self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1282
1283	return self
1284
1285	def _proc_gnulong(self, tarfile):
1286	"""Process the blocks that hold a GNU longname
1287	or longlink member.
1288	"""
1289	buf = tarfile.fileobj.read(self._block(self.size))
1290
1291	# Fetch the next header and process it.
1292	try:
1293	next = self.fromtarfile(tarfile)
1294	except HeaderError:
1295	raise SubsequentHeaderError("missing or bad subsequent header")
1296
1297	# Patch the TarInfo object from the next header with
1298	# the longname information.
1299	next.offset = self.offset
1300	if self.type == GNUTYPE_LONGNAME:
1301	next.name = nts(buf)
1302	elif self.type == GNUTYPE_LONGLINK:
1303	next.linkname = nts(buf)
1304
1305	return next
1306
1307	def _proc_sparse(self, tarfile):
1308	"""Process a GNU sparse header plus extra headers.
1309	"""
1310	buf = self.buf
1311	sp = _ringbuffer()
1312	pos = 386
1313	lastpos = 0L
1314	realpos = 0L
1315	# There are 4 possible sparse structs in the
1316	# first header.
1317	for i in xrange(4):
1318	try:
1319	offset = nti(buf[pos:pos + 12])
1320	numbytes = nti(buf[pos + 12:pos + 24])
1321	except ValueError:
1322	break
1323	if offset > lastpos:
1324	sp.append(_hole(lastpos, offset - lastpos))
1325	sp.append(_data(offset, numbytes, realpos))
1326	realpos += numbytes
1327	lastpos = offset + numbytes
1328	pos += 24
1329
1330	isextended = ord(buf[482])
1331	origsize = nti(buf[483:495])
1332
1333	# If the isextended flag is given,
1334	# there are extra headers to process.
1335	while isextended == 1:
1336	buf = tarfile.fileobj.read(BLOCKSIZE)
1337	pos = 0
1338	for i in xrange(21):
1339	try:
1340	offset = nti(buf[pos:pos + 12])
1341	numbytes = nti(buf[pos + 12:pos + 24])
1342	except ValueError:
1343	break
1344	if offset > lastpos:
1345	sp.append(_hole(lastpos, offset - lastpos))
1346	sp.append(_data(offset, numbytes, realpos))
1347	realpos += numbytes
1348	lastpos = offset + numbytes
1349	pos += 24
1350	isextended = ord(buf[504])
1351
1352	if lastpos < origsize:
1353	sp.append(_hole(lastpos, origsize - lastpos))
1354
1355	self.sparse = sp
1356
1357	self.offset_data = tarfile.fileobj.tell()
1358	tarfile.offset = self.offset_data + self._block(self.size)
1359	self.size = origsize
1360
1361	return self
1362
1363	def _proc_pax(self, tarfile):
1364	"""Process an extended or global header as described in
1365	POSIX.1-2001.
1366	"""
1367	# Read the header information.
1368	buf = tarfile.fileobj.read(self._block(self.size))
1369
1370	# A pax header stores supplemental information for either
1371	# the following file (extended) or all following files
1372	# (global).
1373	if self.type == XGLTYPE:
1374	pax_headers = tarfile.pax_headers
1375	else:
1376	pax_headers = tarfile.pax_headers.copy()
1377
1378	# Parse pax header information. A record looks like that:
1379	# "%d %s=%s\n" % (length, keyword, value). length is the size
1380	# of the complete record including the length field itself and
1381	# the newline. keyword and value are both UTF-8 encoded strings.
1382	regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1383	pos = 0
1384	while True:
1385	match = regex.match(buf, pos)
1386	if not match:
1387	break
1388
1389	length, keyword = match.groups()
1390	length = int(length)
1391	value = buf[match.end(2) + 1:match.start(1) + length - 1]
1392
1393	keyword = keyword.decode("utf8")
1394	value = value.decode("utf8")
1395
1396	pax_headers[keyword] = value
1397	pos += length
1398
1399	# Fetch the next header.
1400	try:
1401	next = self.fromtarfile(tarfile)
1402	except HeaderError:
1403	raise SubsequentHeaderError("missing or bad subsequent header")
1404
1405	if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1406	# Patch the TarInfo object with the extended header info.
1407	next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1408	next.offset = self.offset
1409
1410	if "size" in pax_headers:
1411	# If the extended header replaces the size field,
1412	# we need to recalculate the offset where the next
1413	# header starts.
1414	offset = next.offset_data
1415	if next.isreg() or next.type not in SUPPORTED_TYPES:
1416	offset += next._block(next.size)
1417	tarfile.offset = offset
1418
1419	return next
1420
1421	def _apply_pax_info(self, pax_headers, encoding, errors):
1422	"""Replace fields with supplemental information from a previous
1423	pax extended or global header.
1424	"""
1425	for keyword, value in pax_headers.iteritems():
1426	if keyword not in PAX_FIELDS:
1427	continue
1428
1429	if keyword == "path":
1430	value = value.rstrip("/")
1431
1432	if keyword in PAX_NUMBER_FIELDS:
1433	try:
1434	value = PAX_NUMBER_FIELDS[keyword](value)
1435	except ValueError:
1436	value = 0
1437	else:
1438	value = uts(value, encoding, errors)
1439
1440	setattr(self, keyword, value)
1441
1442	self.pax_headers = pax_headers.copy()
1443
1444	def _block(self, count):
1445	"""Round up a byte count by BLOCKSIZE and return it,
1446	e.g. _block(834) => 1024.
1447	"""
1448	blocks, remainder = divmod(count, BLOCKSIZE)
1449	if remainder:
1450	blocks += 1
1451	return blocks * BLOCKSIZE
1452
1453	def isreg(self):
1454	return self.type in REGULAR_TYPES
1455	def isfile(self):
1456	return self.isreg()
1457	def isdir(self):
1458	return self.type == DIRTYPE
1459	def issym(self):
1460	return self.type == SYMTYPE
1461	def islnk(self):
1462	return self.type == LNKTYPE
1463	def ischr(self):
1464	return self.type == CHRTYPE
1465	def isblk(self):
1466	return self.type == BLKTYPE
1467	def isfifo(self):
1468	return self.type == FIFOTYPE
1469	def issparse(self):
1470	return self.type == GNUTYPE_SPARSE
1471	def isdev(self):
1472	return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1473	# class TarInfo
1474
1475	class TarFile(object):
1476	"""The TarFile Class provides an interface to tar archives.
1477	"""
1478
1479	debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1480
1481	dereference = False # If true, add content of linked file to the
1482	# tar file, else the link.
1483
1484	ignore_zeros = False # If true, skips empty or invalid blocks and
1485	# continues processing.
1486
1487	errorlevel = 1 # If 0, fatal errors only appear in debug
1488	# messages (if debug >= 0). If > 0, errors
1489	# are passed to the caller as exceptions.
1490
1491	format = DEFAULT_FORMAT # The format to use when creating an archive.
1492
1493	encoding = ENCODING # Encoding for 8-bit character strings.
1494
1495	errors = None # Error handler for unicode conversion.
1496
1497	tarinfo = TarInfo # The default TarInfo class to use.
1498
1499	fileobject = ExFileObject # The default ExFileObject class to use.
1500
1501	def __init__(self, name=None, mode="r", fileobj=None, format=None,
1502	tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1503	errors=None, pax_headers=None, debug=None, errorlevel=None):
1504	"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1505	read from an existing archive, 'a' to append data to an existing
1506	file or 'w' to create a new file overwriting an existing one. `mode'
1507	defaults to 'r'.
1508	If `fileobj' is given, it is used for reading or writing data. If it
1509	can be determined, `mode' is overridden by `fileobj's mode.
1510	`fileobj' is not closed, when TarFile is closed.
1511	"""
1512	if len(mode) > 1 or mode not in "raw":
1513	raise ValueError("mode must be 'r', 'a' or 'w'")
1514	self.mode = mode
1515	self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1516
1517	if not fileobj:
1518	if self.mode == "a" and not os.path.exists(name):
1519	# Create nonexistent files in append mode.
1520	self.mode = "w"
1521	self._mode = "wb"
1522	fileobj = bltn_open(name, self._mode)
1523	self._extfileobj = False
1524	else:
1525	if name is None and hasattr(fileobj, "name"):
1526	name = fileobj.name
1527	if hasattr(fileobj, "mode"):
1528	self._mode = fileobj.mode
1529	self._extfileobj = True
1530	self.name = os.path.abspath(name) if name else None
1531	self.fileobj = fileobj
1532
1533	# Init attributes.
1534	if format is not None:
1535	self.format = format
1536	if tarinfo is not None:
1537	self.tarinfo = tarinfo
1538	if dereference is not None:
1539	self.dereference = dereference
1540	if ignore_zeros is not None:
1541	self.ignore_zeros = ignore_zeros
1542	if encoding is not None:
1543	self.encoding = encoding
1544
1545	if errors is not None:
1546	self.errors = errors
1547	elif mode == "r":
1548	self.errors = "utf-8"
1549	else:
1550	self.errors = "strict"
1551
1552	if pax_headers is not None and self.format == PAX_FORMAT:
1553	self.pax_headers = pax_headers
1554	else:
1555	self.pax_headers = {}
1556
1557	if debug is not None:
1558	self.debug = debug
1559	if errorlevel is not None:
1560	self.errorlevel = errorlevel
1561
1562	# Init datastructures.
1563	self.closed = False
1564	self.members = [] # list of members as TarInfo objects
1565	self._loaded = False # flag if all members have been read
1566	self.offset = self.fileobj.tell()
1567	# current position in the archive file
1568	self.inodes = {} # dictionary caching the inodes of
1569	# archive members already added
1570
1571	try:
1572	if self.mode == "r":
1573	self.firstmember = None
1574	self.firstmember = self.next()
1575
1576	if self.mode == "a":
1577	# Move to the end of the archive,
1578	# before the first empty block.
1579	while True:
1580	self.fileobj.seek(self.offset)
1581	try:
1582	tarinfo = self.tarinfo.fromtarfile(self)
1583	self.members.append(tarinfo)
1584	except EOFHeaderError:
1585	self.fileobj.seek(self.offset)
1586	break
1587	except HeaderError, e:
1588	raise ReadError(str(e))
1589
1590	if self.mode in "aw":
1591	self._loaded = True
1592
1593	if self.pax_headers:
1594	buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1595	self.fileobj.write(buf)
1596	self.offset += len(buf)
1597	except:
1598	if not self._extfileobj:
1599	self.fileobj.close()
1600	self.closed = True
1601	raise
1602
1603	def _getposix(self):
1604	return self.format == USTAR_FORMAT
1605	def _setposix(self, value):
1606	import warnings
1607	warnings.warn("use the format attribute instead", DeprecationWarning,
1608	2)
1609	if value:
1610	self.format = USTAR_FORMAT
1611	else:
1612	self.format = GNU_FORMAT
1613	posix = property(_getposix, _setposix)
1614
1615	#--------------------------------------------------------------------------
1616	# Below are the classmethods which act as alternate constructors to the
1617	# TarFile class. The open() method is the only one that is needed for
1618	# public use; it is the "super"-constructor and is able to select an
1619	# adequate "sub"-constructor for a particular compression using the mapping
1620	# from OPEN_METH.
1621	#
1622	# This concept allows one to subclass TarFile without losing the comfort of
1623	# the super-constructor. A sub-constructor is registered and made available
1624	# by adding it to the mapping in OPEN_METH.
1625
1626	@classmethod
1627	def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1628	"""Open a tar archive for reading, writing or appending. Return
1629	an appropriate TarFile class.
1630
1631	mode:
1632	'r' or 'r:*' open for reading with transparent compression
1633	'r:' open for reading exclusively uncompressed
1634	'r:gz' open for reading with gzip compression
1635	'r:bz2' open for reading with bzip2 compression
1636	'a' or 'a:' open for appending, creating the file if necessary
1637	'w' or 'w:' open for writing without compression
1638	'w:gz' open for writing with gzip compression
1639	'w:bz2' open for writing with bzip2 compression
1640
1641	'r\|*' open a stream of tar blocks with transparent compression
1642	'r\|' open an uncompressed stream of tar blocks for reading
1643	'r\|gz' open a gzip compressed stream of tar blocks
1644	'r\|bz2' open a bzip2 compressed stream of tar blocks
1645	'w\|' open an uncompressed stream for writing
1646	'w\|gz' open a gzip compressed stream for writing
1647	'w\|bz2' open a bzip2 compressed stream for writing
1648	"""
1649
1650	if not name and not fileobj:
1651	raise ValueError("nothing to open")
1652
1653	if mode in ("r", "r:*"):
1654	# Find out which *open() is appropriate for opening the file.
1655	for comptype in cls.OPEN_METH:
1656	func = getattr(cls, cls.OPEN_METH[comptype])
1657	if fileobj is not None:
1658	saved_pos = fileobj.tell()
1659	try:
1660	return func(name, "r", fileobj, **kwargs)
1661	except (ReadError, CompressionError), e:
1662	if fileobj is not None:
1663	fileobj.seek(saved_pos)
1664	continue
1665	raise ReadError("file could not be opened successfully")
1666
1667	elif ":" in mode:
1668	filemode, comptype = mode.split(":", 1)
1669	filemode = filemode or "r"
1670	comptype = comptype or "tar"
1671
1672	# Select the *open() function according to
1673	# given compression.
1674	if comptype in cls.OPEN_METH:
1675	func = getattr(cls, cls.OPEN_METH[comptype])
1676	else:
1677	raise CompressionError("unknown compression type %r" % comptype)
1678	return func(name, filemode, fileobj, **kwargs)
1679
1680	elif "\|" in mode:
1681	filemode, comptype = mode.split("\|", 1)
1682	filemode = filemode or "r"
1683	comptype = comptype or "tar"
1684
1685	if filemode not in "rw":
1686	raise ValueError("mode must be 'r' or 'w'")
1687
1688	t = cls(name, filemode,
1689	_Stream(name, filemode, comptype, fileobj, bufsize),
1690	**kwargs)
1691	t._extfileobj = False
1692	return t
1693
1694	elif mode in "aw":
1695	return cls.taropen(name, mode, fileobj, **kwargs)
1696
1697	raise ValueError("undiscernible mode")
1698
1699	@classmethod
1700	def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1701	"""Open uncompressed tar archive name for reading or writing.
1702	"""
1703	if len(mode) > 1 or mode not in "raw":
1704	raise ValueError("mode must be 'r', 'a' or 'w'")
1705	return cls(name, mode, fileobj, **kwargs)
1706
1707	@classmethod
1708	def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1709	"""Open gzip compressed tar archive name for reading or writing.
1710	Appending is not allowed.
1711	"""
1712	if len(mode) > 1 or mode not in "rw":
1713	raise ValueError("mode must be 'r' or 'w'")
1714
1715	try:
1716	import gzip
1717	gzip.GzipFile
1718	except (ImportError, AttributeError):
1719	raise CompressionError("gzip module is not available")
1720
1721	if fileobj is None:
1722	fileobj = bltn_open(name, mode + "b")
1723
1724	try:
1725	t = cls.taropen(name, mode,
1726	gzip.GzipFile(name, mode, compresslevel, fileobj),
1727	**kwargs)
1728	except IOError:
1729	raise ReadError("not a gzip file")
1730	t._extfileobj = False
1731	return t
1732
1733	@classmethod
1734	def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1735	"""Open bzip2 compressed tar archive name for reading or writing.
1736	Appending is not allowed.
1737	"""
1738	if len(mode) > 1 or mode not in "rw":
1739	raise ValueError("mode must be 'r' or 'w'.")
1740
1741	try:
1742	import bz2
1743	except ImportError:
1744	raise CompressionError("bz2 module is not available")
1745
1746	if fileobj is not None:
1747	fileobj = _BZ2Proxy(fileobj, mode)
1748	else:
1749	fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1750
1751	try:
1752	t = cls.taropen(name, mode, fileobj, **kwargs)
1753	except (IOError, EOFError):
1754	raise ReadError("not a bzip2 file")
1755	t._extfileobj = False
1756	return t
1757
1758	# All *open() methods are registered here.
1759	OPEN_METH = {
1760	"tar": "taropen", # uncompressed tar
1761	"gz": "gzopen", # gzip compressed tar
1762	"bz2": "bz2open" # bzip2 compressed tar
1763	}
1764
1765	#--------------------------------------------------------------------------
1766	# The public methods which TarFile provides:
1767
1768	def close(self):
1769	"""Close the TarFile. In write-mode, two finishing zero blocks are
1770	appended to the archive.
1771	"""
1772	if self.closed:
1773	return
1774
1775	if self.mode in "aw":
1776	self.fileobj.write(NUL * (BLOCKSIZE * 2))
1777	self.offset += (BLOCKSIZE * 2)
1778	# fill up the end with zero-blocks
1779	# (like option -b20 for tar does)
1780	blocks, remainder = divmod(self.offset, RECORDSIZE)
1781	if remainder > 0:
1782	self.fileobj.write(NUL * (RECORDSIZE - remainder))
1783
1784	if not self._extfileobj:
1785	self.fileobj.close()
1786	self.closed = True
1787
1788	def getmember(self, name):
1789	"""Return a TarInfo object for member `name'. If `name' can not be
1790	found in the archive, KeyError is raised. If a member occurs more
1791	than once in the archive, its last occurrence is assumed to be the
1792	most up-to-date version.
1793	"""
1794	tarinfo = self._getmember(name)
1795	if tarinfo is None:
1796	raise KeyError("filename %r not found" % name)
1797	return tarinfo
1798
1799	def getmembers(self):
1800	"""Return the members of the archive as a list of TarInfo objects. The
1801	list has the same order as the members in the archive.
1802	"""
1803	self._check()
1804	if not self._loaded: # if we want to obtain a list of
1805	self._load() # all members, we first have to
1806	# scan the whole archive.
1807	return self.members
1808
1809	def getnames(self):
1810	"""Return the members of the archive as a list of their names. It has
1811	the same order as the list returned by getmembers().
1812	"""
1813	return [tarinfo.name for tarinfo in self.getmembers()]
1814
1815	def gettarinfo(self, name=None, arcname=None, fileobj=None):
1816	"""Create a TarInfo object for either the file `name' or the file
1817	object `fileobj' (using os.fstat on its file descriptor). You can
1818	modify some of the TarInfo's attributes before you add it using
1819	addfile(). If given, `arcname' specifies an alternative name for the
1820	file in the archive.
1821	"""
1822	self._check("aw")
1823
1824	# When fileobj is given, replace name by
1825	# fileobj's real name.
1826	if fileobj is not None:
1827	name = fileobj.name
1828
1829	# Building the name of the member in the archive.
1830	# Backward slashes are converted to forward slashes,
1831	# Absolute paths are turned to relative paths.
1832	if arcname is None:
1833	arcname = name
1834	drv, arcname = os.path.splitdrive(arcname)
1835	arcname = arcname.replace(os.sep, "/")
1836	arcname = arcname.lstrip("/")
1837
1838	# Now, fill the TarInfo object with
1839	# information specific for the file.
1840	tarinfo = self.tarinfo()
1841	tarinfo.tarfile = self
1842
1843	# Use os.stat or os.lstat, depending on platform
1844	# and if symlinks shall be resolved.
1845	if fileobj is None:
1846	if hasattr(os, "lstat") and not self.dereference:
1847	statres = os.lstat(name)
1848	else:
1849	statres = os.stat(name)
1850	else:
1851	statres = os.fstat(fileobj.fileno())
1852	linkname = ""
1853
1854	stmd = statres.st_mode
1855	if stat.S_ISREG(stmd):
1856	inode = (statres.st_ino, statres.st_dev)
1857	if not self.dereference and statres.st_nlink > 1 and \
1858	inode in self.inodes and arcname != self.inodes[inode]:
1859	# Is it a hardlink to an already
1860	# archived file?
1861	type = LNKTYPE
1862	linkname = self.inodes[inode]
1863	else:
1864	# The inode is added only if its valid.
1865	# For win32 it is always 0.
1866	type = REGTYPE
1867	if inode[0]:
1868	self.inodes[inode] = arcname
1869	elif stat.S_ISDIR(stmd):
1870	type = DIRTYPE
1871	elif stat.S_ISFIFO(stmd):
1872	type = FIFOTYPE
1873	elif stat.S_ISLNK(stmd):
1874	type = SYMTYPE
1875	linkname = os.readlink(name)
1876	elif stat.S_ISCHR(stmd):
1877	type = CHRTYPE
1878	elif stat.S_ISBLK(stmd):
1879	type = BLKTYPE
1880	else:
1881	return None
1882
1883	# Fill the TarInfo object with all
1884	# information we can get.
1885	tarinfo.name = arcname
1886	tarinfo.mode = stmd
1887	tarinfo.uid = statres.st_uid
1888	tarinfo.gid = statres.st_gid
1889	if type == REGTYPE:
1890	tarinfo.size = statres.st_size
1891	else:
1892	tarinfo.size = 0L
1893	tarinfo.mtime = statres.st_mtime
1894	tarinfo.type = type
1895	tarinfo.linkname = linkname
1896	if pwd:
1897	try:
1898	tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1899	except KeyError:
1900	pass
1901	if grp:
1902	try:
1903	tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1904	except KeyError:
1905	pass
1906
1907	if type in (CHRTYPE, BLKTYPE):
1908	if hasattr(os, "major") and hasattr(os, "minor"):
1909	tarinfo.devmajor = os.major(statres.st_rdev)
1910	tarinfo.devminor = os.minor(statres.st_rdev)
1911	return tarinfo
1912
1913	def list(self, verbose=True):
1914	"""Print a table of contents to sys.stdout. If `verbose' is False, only
1915	the names of the members are printed. If it is True, an `ls -l'-like
1916	output is produced.
1917	"""
1918	self._check()
1919
1920	for tarinfo in self:
1921	if verbose:
1922	print filemode(tarinfo.mode),
1923	print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1924	tarinfo.gname or tarinfo.gid),
1925	if tarinfo.ischr() or tarinfo.isblk():
1926	print "%10s" % ("%d,%d" \
1927	% (tarinfo.devmajor, tarinfo.devminor)),
1928	else:
1929	print "%10d" % tarinfo.size,
1930	print "%d-%02d-%02d %02d:%02d:%02d" \
1931	% time.localtime(tarinfo.mtime)[:6],
1932
1933	print tarinfo.name + ("/" if tarinfo.isdir() else ""),
1934
1935	if verbose:
1936	if tarinfo.issym():
1937	print "->", tarinfo.linkname,
1938	if tarinfo.islnk():
1939	print "link to", tarinfo.linkname,
1940	print
1941
1942	def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
1943	"""Add the file `name' to the archive. `name' may be any type of file
1944	(directory, fifo, symbolic link, etc.). If given, `arcname'
1945	specifies an alternative name for the file in the archive.
1946	Directories are added recursively by default. This can be avoided by
1947	setting `recursive' to False. `exclude' is a function that should
1948	return True for each filename to be excluded. `filter' is a function
1949	that expects a TarInfo object argument and returns the changed
1950	TarInfo object, if it returns None the TarInfo object will be
1951	excluded from the archive.
1952	"""
1953	self._check("aw")
1954
1955	if arcname is None:
1956	arcname = name
1957
1958	# Exclude pathnames.
1959	if exclude is not None:
1960	import warnings
1961	warnings.warn("use the filter argument instead",
1962	DeprecationWarning, 2)
1963	if exclude(name):
1964	self._dbg(2, "tarfile: Excluded %r" % name)
1965	return
1966
1967	# Skip if somebody tries to archive the archive...
1968	if self.name is not None and os.path.abspath(name) == self.name:
1969	self._dbg(2, "tarfile: Skipped %r" % name)
1970	return
1971
1972	self._dbg(1, name)
1973
1974	# Create a TarInfo object from the file.
1975	tarinfo = self.gettarinfo(name, arcname)
1976
1977	if tarinfo is None:
1978	self._dbg(1, "tarfile: Unsupported type %r" % name)
1979	return
1980
1981	# Change or exclude the TarInfo object.
1982	if filter is not None:
1983	tarinfo = filter(tarinfo)
1984	if tarinfo is None:
1985	self._dbg(2, "tarfile: Excluded %r" % name)
1986	return
1987
1988	# Append the tar header and data to the archive.
1989	if tarinfo.isreg():
1990	with bltn_open(name, "rb") as f:
1991	self.addfile(tarinfo, f)
1992
1993	elif tarinfo.isdir():
1994	self.addfile(tarinfo)
1995	if recursive:
1996	for f in os.listdir(name):
1997	self.add(os.path.join(name, f), os.path.join(arcname, f),
1998	recursive, exclude, filter)
1999
2000	else:
2001	self.addfile(tarinfo)
2002
2003	def addfile(self, tarinfo, fileobj=None):
2004	"""Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
2005	given, tarinfo.size bytes are read from it and added to the archive.
2006	You can create TarInfo objects using gettarinfo().
2007	On Windows platforms, `fileobj' should always be opened with mode
2008	'rb' to avoid irritation about the file size.
2009	"""
2010	self._check("aw")
2011
2012	tarinfo = copy.copy(tarinfo)
2013
2014	buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2015	self.fileobj.write(buf)
2016	self.offset += len(buf)
2017
2018	# If there's data to follow, append it.
2019	if fileobj is not None:
2020	copyfileobj(fileobj, self.fileobj, tarinfo.size)
2021	blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2022	if remainder > 0:
2023	self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2024	blocks += 1
2025	self.offset += blocks * BLOCKSIZE
2026
2027	self.members.append(tarinfo)
2028
2029	def extractall(self, path=".", members=None):
2030	"""Extract all members from the archive to the current working
2031	directory and set owner, modification time and permissions on
2032	directories afterwards. `path' specifies a different directory
2033	to extract to. `members' is optional and must be a subset of the
2034	list returned by getmembers().
2035	"""
2036	directories = []
2037
2038	if members is None:
2039	members = self
2040
2041	for tarinfo in members:
2042	if tarinfo.isdir():
2043	# Extract directories with a safe mode.
2044	directories.append(tarinfo)
2045	tarinfo = copy.copy(tarinfo)
2046	tarinfo.mode = 0700
2047	self.extract(tarinfo, path)
2048
2049	# Reverse sort directories.
2050	directories.sort(key=operator.attrgetter('name'))
2051	directories.reverse()
2052
2053	# Set correct owner, mtime and filemode on directories.
2054	for tarinfo in directories:
2055	dirpath = os.path.join(path, tarinfo.name)
2056	try:
2057	self.chown(tarinfo, dirpath)
2058	self.utime(tarinfo, dirpath)
2059	self.chmod(tarinfo, dirpath)
2060	except ExtractError, e:
2061	if self.errorlevel > 1:
2062	raise
2063	else:
2064	self._dbg(1, "tarfile: %s" % e)
2065
2066	def extract(self, member, path=""):
2067	"""Extract a member from the archive to the current working directory,
2068	using its full name. Its file information is extracted as accurately
2069	as possible. `member' may be a filename or a TarInfo object. You can
2070	specify a different directory using `path'.
2071	"""
2072	self._check("r")
2073
2074	if isinstance(member, basestring):
2075	tarinfo = self.getmember(member)
2076	else:
2077	tarinfo = member
2078
2079	# Prepare the link target for makelink().
2080	if tarinfo.islnk():
2081	tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2082
2083	try:
2084	self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2085	except EnvironmentError, e:
2086	if self.errorlevel > 0:
2087	raise
2088	else:
2089	if e.filename is None:
2090	self._dbg(1, "tarfile: %s" % e.strerror)
2091	else:
2092	self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2093	except ExtractError, e:
2094	if self.errorlevel > 1:
2095	raise
2096	else:
2097	self._dbg(1, "tarfile: %s" % e)
2098
2099	def extractfile(self, member):
2100	"""Extract a member from the archive as a file object. `member' may be
2101	a filename or a TarInfo object. If `member' is a regular file, a
2102	file-like object is returned. If `member' is a link, a file-like
2103	object is constructed from the link's target. If `member' is none of
2104	the above, None is returned.
2105	The file-like object is read-only and provides the following
2106	methods: read(), readline(), readlines(), seek() and tell()
2107	"""
2108	self._check("r")
2109
2110	if isinstance(member, basestring):
2111	tarinfo = self.getmember(member)
2112	else:
2113	tarinfo = member
2114
2115	if tarinfo.isreg():
2116	return self.fileobject(self, tarinfo)
2117
2118	elif tarinfo.type not in SUPPORTED_TYPES:
2119	# If a member's type is unknown, it is treated as a
2120	# regular file.
2121	return self.fileobject(self, tarinfo)
2122
2123	elif tarinfo.islnk() or tarinfo.issym():
2124	if isinstance(self.fileobj, _Stream):
2125	# A small but ugly workaround for the case that someone tries
2126	# to extract a (sym)link as a file-object from a non-seekable
2127	# stream of tar blocks.
2128	raise StreamError("cannot extract (sym)link as file object")
2129	else:
2130	# A (sym)link's file object is its target's file object.
2131	return self.extractfile(self._find_link_target(tarinfo))
2132	else:
2133	# If there's no data associated with the member (directory, chrdev,
2134	# blkdev, etc.), return None instead of a file object.
2135	return None
2136
2137	def _extract_member(self, tarinfo, targetpath):
2138	"""Extract the TarInfo object tarinfo to a physical
2139	file called targetpath.
2140	"""
2141	# Fetch the TarInfo object for the given name
2142	# and build the destination pathname, replacing
2143	# forward slashes to platform specific separators.
2144	targetpath = targetpath.rstrip("/")
2145	targetpath = targetpath.replace("/", os.sep)
2146
2147	# Create all upper directories.
2148	upperdirs = os.path.dirname(targetpath)
2149	if upperdirs and not os.path.exists(upperdirs):
2150	# Create directories that are not part of the archive with
2151	# default permissions.
2152	os.makedirs(upperdirs)
2153
2154	if tarinfo.islnk() or tarinfo.issym():
2155	self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2156	else:
2157	self._dbg(1, tarinfo.name)
2158
2159	if tarinfo.isreg():
2160	self.makefile(tarinfo, targetpath)
2161	elif tarinfo.isdir():
2162	self.makedir(tarinfo, targetpath)
2163	elif tarinfo.isfifo():
2164	self.makefifo(tarinfo, targetpath)
2165	elif tarinfo.ischr() or tarinfo.isblk():
2166	self.makedev(tarinfo, targetpath)
2167	elif tarinfo.islnk() or tarinfo.issym():
2168	self.makelink(tarinfo, targetpath)
2169	elif tarinfo.type not in SUPPORTED_TYPES:
2170	self.makeunknown(tarinfo, targetpath)
2171	else:
2172	self.makefile(tarinfo, targetpath)
2173
2174	self.chown(tarinfo, targetpath)
2175	if not tarinfo.issym():
2176	self.chmod(tarinfo, targetpath)
2177	self.utime(tarinfo, targetpath)
2178
2179	#--------------------------------------------------------------------------
2180	# Below are the different file methods. They are called via
2181	# _extract_member() when extract() is called. They can be replaced in a
2182	# subclass to implement other functionality.
2183
2184	def makedir(self, tarinfo, targetpath):
2185	"""Make a directory called targetpath.
2186	"""
2187	try:
2188	# Use a safe mode for the directory, the real mode is set
2189	# later in _extract_member().
2190	os.mkdir(targetpath, 0700)
2191	except EnvironmentError, e:
2192	if e.errno != errno.EEXIST:
2193	raise
2194
2195	def makefile(self, tarinfo, targetpath):
2196	"""Make a file called targetpath.
2197	"""
2198	source = self.extractfile(tarinfo)
2199	try:
2200	with bltn_open(targetpath, "wb") as target:
2201	copyfileobj(source, target)
2202	finally:
2203	source.close()
2204
2205	def makeunknown(self, tarinfo, targetpath):
2206	"""Make a file from a TarInfo object with an unknown type
2207	at targetpath.
2208	"""
2209	self.makefile(tarinfo, targetpath)
2210	self._dbg(1, "tarfile: Unknown file type %r, " \
2211	"extracted as regular file." % tarinfo.type)
2212
2213	def makefifo(self, tarinfo, targetpath):
2214	"""Make a fifo called targetpath.
2215	"""
2216	if hasattr(os, "mkfifo"):
2217	os.mkfifo(targetpath)
2218	else:
2219	raise ExtractError("fifo not supported by system")
2220
2221	def makedev(self, tarinfo, targetpath):
2222	"""Make a character or block device called targetpath.
2223	"""
2224	if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2225	raise ExtractError("special devices not supported by system")
2226
2227	mode = tarinfo.mode
2228	if tarinfo.isblk():
2229	mode \|= stat.S_IFBLK
2230	else:
2231	mode \|= stat.S_IFCHR
2232
2233	os.mknod(targetpath, mode,
2234	os.makedev(tarinfo.devmajor, tarinfo.devminor))
2235
2236	def makelink(self, tarinfo, targetpath):
2237	"""Make a (symbolic) link called targetpath. If it cannot be created
2238	(platform limitation), we try to make a copy of the referenced file
2239	instead of a link.
2240	"""
2241	if hasattr(os, "symlink") and hasattr(os, "link"):
2242	# For systems that support symbolic and hard links.
2243	if tarinfo.issym():
2244	if os.path.lexists(targetpath):
2245	os.unlink(targetpath)
2246	os.symlink(tarinfo.linkname, targetpath)
2247	else:
2248	# See extract().
2249	if os.path.exists(tarinfo._link_target):
2250	if os.path.lexists(targetpath):
2251	os.unlink(targetpath)
2252	os.link(tarinfo._link_target, targetpath)
2253	else:
2254	self._extract_member(self._find_link_target(tarinfo), targetpath)
2255	else:
2256	try:
2257	self._extract_member(self._find_link_target(tarinfo), targetpath)
2258	except KeyError:
2259	raise ExtractError("unable to resolve link inside archive")
2260
2261	def chown(self, tarinfo, targetpath):
2262	"""Set owner of targetpath according to tarinfo.
2263	"""
2264	if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2265	# We have to be root to do so.
2266	try:
2267	g = grp.getgrnam(tarinfo.gname)[2]
2268	except KeyError:
2269	g = tarinfo.gid
2270	try:
2271	u = pwd.getpwnam(tarinfo.uname)[2]
2272	except KeyError:
2273	u = tarinfo.uid
2274	try:
2275	if tarinfo.issym() and hasattr(os, "lchown"):
2276	os.lchown(targetpath, u, g)
2277	else:
2278	if sys.platform != "os2emx":
2279	os.chown(targetpath, u, g)
2280	except EnvironmentError, e:
2281	raise ExtractError("could not change owner")
2282
2283	def chmod(self, tarinfo, targetpath):
2284	"""Set file permissions of targetpath according to tarinfo.
2285	"""
2286	if hasattr(os, 'chmod'):
2287	try:
2288	os.chmod(targetpath, tarinfo.mode)
2289	except EnvironmentError, e:
2290	raise ExtractError("could not change mode")
2291
2292	def utime(self, tarinfo, targetpath):
2293	"""Set modification time of targetpath according to tarinfo.
2294	"""
2295	if not hasattr(os, 'utime'):
2296	return
2297	try:
2298	os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2299	except EnvironmentError, e:
2300	raise ExtractError("could not change modification time")
2301
2302	#--------------------------------------------------------------------------
2303	def next(self):
2304	"""Return the next member of the archive as a TarInfo object, when
2305	TarFile is opened for reading. Return None if there is no more
2306	available.
2307	"""
2308	self._check("ra")
2309	if self.firstmember is not None:
2310	m = self.firstmember
2311	self.firstmember = None
2312	return m
2313
2314	# Read the next block.
2315	self.fileobj.seek(self.offset)
2316	tarinfo = None
2317	while True:
2318	try:
2319	tarinfo = self.tarinfo.fromtarfile(self)
2320	except EOFHeaderError, e:
2321	if self.ignore_zeros:
2322	self._dbg(2, "0x%X: %s" % (self.offset, e))
2323	self.offset += BLOCKSIZE
2324	continue
2325	except InvalidHeaderError, e:
2326	if self.ignore_zeros:
2327	self._dbg(2, "0x%X: %s" % (self.offset, e))
2328	self.offset += BLOCKSIZE
2329	continue
2330	elif self.offset == 0:
2331	raise ReadError(str(e))
2332	except EmptyHeaderError:
2333	if self.offset == 0:
2334	raise ReadError("empty file")
2335	except TruncatedHeaderError, e:
2336	if self.offset == 0:
2337	raise ReadError(str(e))
2338	except SubsequentHeaderError, e:
2339	raise ReadError(str(e))
2340	break
2341
2342	if tarinfo is not None:
2343	self.members.append(tarinfo)
2344	else:
2345	self._loaded = True
2346
2347	return tarinfo
2348
2349	#--------------------------------------------------------------------------
2350	# Little helper methods:
2351
2352	def _getmember(self, name, tarinfo=None, normalize=False):
2353	"""Find an archive member by name from bottom to top.
2354	If tarinfo is given, it is used as the starting point.
2355	"""
2356	# Ensure that all members have been loaded.
2357	members = self.getmembers()
2358
2359	# Limit the member search list up to tarinfo.
2360	if tarinfo is not None:
2361	members = members[:members.index(tarinfo)]
2362
2363	if normalize:
2364	name = os.path.normpath(name)
2365
2366	for member in reversed(members):
2367	if normalize:
2368	member_name = os.path.normpath(member.name)
2369	else:
2370	member_name = member.name
2371
2372	if name == member_name:
2373	return member
2374
2375	def _load(self):
2376	"""Read through the entire archive file and look for readable
2377	members.
2378	"""
2379	while True:
2380	tarinfo = self.next()
2381	if tarinfo is None:
2382	break
2383	self._loaded = True
2384
2385	def _check(self, mode=None):
2386	"""Check if TarFile is still open, and if the operation's mode
2387	corresponds to TarFile's mode.
2388	"""
2389	if self.closed:
2390	raise IOError("%s is closed" % self.__class__.__name__)
2391	if mode is not None and self.mode not in mode:
2392	raise IOError("bad operation for mode %r" % self.mode)
2393
2394	def _find_link_target(self, tarinfo):
2395	"""Find the target member of a symlink or hardlink member in the
2396	archive.
2397	"""
2398	if tarinfo.issym():
2399	# Always search the entire archive.
2400	linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
2401	limit = None
2402	else:
2403	# Search the archive before the link, because a hard link is
2404	# just a reference to an already archived file.
2405	linkname = tarinfo.linkname
2406	limit = tarinfo
2407
2408	member = self._getmember(linkname, tarinfo=limit, normalize=True)
2409	if member is None:
2410	raise KeyError("linkname %r not found" % linkname)
2411	return member
2412
2413	def __iter__(self):
2414	"""Provide an iterator object.
2415	"""
2416	if self._loaded:
2417	return iter(self.members)
2418	else:
2419	return TarIter(self)
2420
2421	def _dbg(self, level, msg):
2422	"""Write debugging output to sys.stderr.
2423	"""
2424	if level <= self.debug:
2425	print >> sys.stderr, msg
2426
2427	def __enter__(self):
2428	self._check()
2429	return self
2430
2431	def __exit__(self, type, value, traceback):
2432	if type is None:
2433	self.close()
2434	else:
2435	# An exception occurred. We must not call close() because
2436	# it would try to write end-of-archive blocks and padding.
2437	if not self._extfileobj:
2438	self.fileobj.close()
2439	self.closed = True
2440	# class TarFile
2441
2442	class TarIter:
2443	"""Iterator Class.
2444
2445	for tarinfo in TarFile(...):
2446	suite...
2447	"""
2448
2449	def __init__(self, tarfile):
2450	"""Construct a TarIter object.
2451	"""
2452	self.tarfile = tarfile
2453	self.index = 0
2454	def __iter__(self):
2455	"""Return iterator object.
2456	"""
2457	return self
2458	def next(self):
2459	"""Return the next item using TarFile's next() method.
2460	When all members have been read, set TarFile as _loaded.
2461	"""
2462	# Fix for SF #1100429: Under rare circumstances it can
2463	# happen that getmembers() is called during iteration,
2464	# which will cause TarIter to stop prematurely.
2465
2466	if self.index == 0 and self.tarfile.firstmember is not None:
2467	tarinfo = self.tarfile.next()
2468	elif self.index < len(self.tarfile.members):
2469	tarinfo = self.tarfile.members[self.index]
2470	elif not self.tarfile._loaded:
2471	tarinfo = self.tarfile.next()
2472	if not tarinfo:
2473	self.tarfile._loaded = True
2474	raise StopIteration
2475	else:
2476	raise StopIteration
2477	self.index += 1
2478	return tarinfo
2479
2480	# Helper classes for sparse file support
2481	class _section:
2482	"""Base class for _data and _hole.
2483	"""
2484	def __init__(self, offset, size):
2485	self.offset = offset
2486	self.size = size
2487	def __contains__(self, offset):
2488	return self.offset <= offset < self.offset + self.size
2489
2490	class _data(_section):
2491	"""Represent a data section in a sparse file.
2492	"""
2493	def __init__(self, offset, size, realpos):
2494	_section.__init__(self, offset, size)
2495	self.realpos = realpos
2496
2497	class _hole(_section):
2498	"""Represent a hole section in a sparse file.
2499	"""
2500	pass
2501
2502	class _ringbuffer(list):
2503	"""Ringbuffer class which increases performance
2504	over a regular list.
2505	"""
2506	def __init__(self):
2507	self.idx = 0
2508	def find(self, offset):
2509	idx = self.idx
2510	while True:
2511	item = self[idx]
2512	if offset in item:
2513	break
2514	idx += 1
2515	if idx == len(self):
2516	idx = 0
2517	if idx == self.idx:
2518	# End of File
2519	return None
2520	self.idx = idx
2521	return item
2522
2523	#---------------------------------------------
2524	# zipfile compatible TarFile class
2525	#---------------------------------------------
2526	TAR_PLAIN = 0 # zipfile.ZIP_STORED
2527	TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2528	class TarFileCompat:
2529	"""TarFile class compatible with standard module zipfile's
2530	ZipFile class.
2531	"""
2532	def __init__(self, file, mode="r", compression=TAR_PLAIN):
2533	from warnings import warnpy3k
2534	warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2535	stacklevel=2)
2536	if compression == TAR_PLAIN:
2537	self.tarfile = TarFile.taropen(file, mode)
2538	elif compression == TAR_GZIPPED:
2539	self.tarfile = TarFile.gzopen(file, mode)
2540	else:
2541	raise ValueError("unknown compression constant")
2542	if mode[0:1] == "r":
2543	members = self.tarfile.getmembers()
2544	for m in members:
2545	m.filename = m.name
2546	m.file_size = m.size
2547	m.date_time = time.gmtime(m.mtime)[:6]
2548	def namelist(self):
2549	return map(lambda m: m.name, self.infolist())
2550	def infolist(self):
2551	return filter(lambda m: m.type in REGULAR_TYPES,
2552	self.tarfile.getmembers())
2553	def printdir(self):
2554	self.tarfile.list()
2555	def testzip(self):
2556	return
2557	def getinfo(self, name):
2558	return self.tarfile.getmember(name)
2559	def read(self, name):
2560	return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2561	def write(self, filename, arcname=None, compress_type=None):
2562	self.tarfile.add(filename, arcname)
2563	def writestr(self, zinfo, bytes):
2564	try:
2565	from cStringIO import StringIO
2566	except ImportError:
2567	from StringIO import StringIO
2568	import calendar
2569	tinfo = TarInfo(zinfo.filename)
2570	tinfo.size = len(bytes)
2571	tinfo.mtime = calendar.timegm(zinfo.date_time)
2572	self.tarfile.addfile(tinfo, StringIO(bytes))
2573	def close(self):
2574	self.tarfile.close()
2575	#class TarFileCompat
2576
2577	#--------------------
2578	# exported functions
2579	#--------------------
2580	def is_tarfile(name):
2581	"""Return True if name points to a tar archive that we
2582	are able to handle, else return False.
2583	"""
2584	try:
2585	t = open(name)
2586	t.close()
2587	return True
2588	except TarError:
2589	return False
2590
2591	bltn_open = open
2592	open = TarFile.open

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/tarfile.py@ 609

Download in other formats: