Context Navigation

← Previous Revision
Next Revision →
Normal
Revision Log

tarfile.py

Last change on this file was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 87.0 KB

Rev	Line
[2]	1	#!/usr/bin/env python
	2	# -- coding: iso-8859-1 --
	3	#-------------------------------------------------------------------
	4	# tarfile.py
	5	#-------------------------------------------------------------------
	6	# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
	7	# All rights reserved.
	8	#
	9	# Permission is hereby granted, free of charge, to any person
	10	# obtaining a copy of this software and associated documentation
	11	# files (the "Software"), to deal in the Software without
	12	# restriction, including without limitation the rights to use,
	13	# copy, modify, merge, publish, distribute, sublicense, and/or sell
	14	# copies of the Software, and to permit persons to whom the
	15	# Software is furnished to do so, subject to the following
	16	# conditions:
	17	#
	18	# The above copyright notice and this permission notice shall be
	19	# included in all copies or substantial portions of the Software.
	20	#
	21	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
	22	# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
	23	# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
	24	# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
	25	# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
	26	# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
	27	# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
	28	# OTHER DEALINGS IN THE SOFTWARE.
	29	#
	30	"""Read from and write to tar format archives.
	31	"""
	32
[391]	33	__version__ = "$Revision: 85213 $"
[2]	34	# $Source$
	35
	36	version = "0.9.0"
	37	__author__ = "Lars Gustäbel (lars@gustaebel.de)"
[391]	38	__date__ = "$Date$"
	39	__cvsid__ = "$Id$"
[2]	40	__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
	41
	42	#---------
	43	# Imports
	44	#---------
	45	import sys
	46	import os
	47	import shutil
	48	import stat
	49	import errno
	50	import time
	51	import struct
	52	import copy
	53	import re
	54	import operator
	55
	56	try:
	57	import grp, pwd
	58	except ImportError:
	59	grp = pwd = None
	60
	61	# from tarfile import *
	62	__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
	63
	64	#---------------------------------------------------------
	65	# tar constants
	66	#---------------------------------------------------------
	67	NUL = "\0" # the null character
	68	BLOCKSIZE = 512 # length of processing blocks
	69	RECORDSIZE = BLOCKSIZE * 20 # length of records
	70	GNU_MAGIC = "ustar \0" # magic gnu tar string
	71	POSIX_MAGIC = "ustar\x0000" # magic posix tar string
	72
	73	LENGTH_NAME = 100 # maximum length of a filename
	74	LENGTH_LINK = 100 # maximum length of a linkname
	75	LENGTH_PREFIX = 155 # maximum length of the prefix field
	76
	77	REGTYPE = "0" # regular file
	78	AREGTYPE = "\0" # regular file
	79	LNKTYPE = "1" # link (inside tarfile)
	80	SYMTYPE = "2" # symbolic link
	81	CHRTYPE = "3" # character special device
	82	BLKTYPE = "4" # block special device
	83	DIRTYPE = "5" # directory
	84	FIFOTYPE = "6" # fifo special device
	85	CONTTYPE = "7" # contiguous file
	86
	87	GNUTYPE_LONGNAME = "L" # GNU tar longname
	88	GNUTYPE_LONGLINK = "K" # GNU tar longlink
	89	GNUTYPE_SPARSE = "S" # GNU tar sparse file
	90
	91	XHDTYPE = "x" # POSIX.1-2001 extended header
	92	XGLTYPE = "g" # POSIX.1-2001 global header
	93	SOLARIS_XHDTYPE = "X" # Solaris extended header
	94
	95	USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
	96	GNU_FORMAT = 1 # GNU tar format
	97	PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
	98	DEFAULT_FORMAT = GNU_FORMAT
	99
	100	#---------------------------------------------------------
	101	# tarfile constants
	102	#---------------------------------------------------------
	103	# File types that tarfile supports:
	104	SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
	105	SYMTYPE, DIRTYPE, FIFOTYPE,
	106	CONTTYPE, CHRTYPE, BLKTYPE,
	107	GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
	108	GNUTYPE_SPARSE)
	109
	110	# File types that will be treated as a regular file.
	111	REGULAR_TYPES = (REGTYPE, AREGTYPE,
	112	CONTTYPE, GNUTYPE_SPARSE)
	113
	114	# File types that are part of the GNU tar format.
	115	GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
	116	GNUTYPE_SPARSE)
	117
	118	# Fields from a pax header that override a TarInfo attribute.
	119	PAX_FIELDS = ("path", "linkpath", "size", "mtime",
	120	"uid", "gid", "uname", "gname")
	121
	122	# Fields in a pax header that are numbers, all other fields
	123	# are treated as strings.
	124	PAX_NUMBER_FIELDS = {
	125	"atime": float,
	126	"ctime": float,
	127	"mtime": float,
	128	"uid": int,
	129	"gid": int,
	130	"size": int
	131	}
	132
	133	#---------------------------------------------------------
	134	# Bits used in the mode field, values in octal.
	135	#---------------------------------------------------------
	136	S_IFLNK = 0120000 # symbolic link
	137	S_IFREG = 0100000 # regular file
	138	S_IFBLK = 0060000 # block device
	139	S_IFDIR = 0040000 # directory
	140	S_IFCHR = 0020000 # character device
	141	S_IFIFO = 0010000 # fifo
	142
	143	TSUID = 04000 # set UID on execution
	144	TSGID = 02000 # set GID on execution
	145	TSVTX = 01000 # reserved
	146
	147	TUREAD = 0400 # read by owner
	148	TUWRITE = 0200 # write by owner
	149	TUEXEC = 0100 # execute/search by owner
	150	TGREAD = 0040 # read by group
	151	TGWRITE = 0020 # write by group
	152	TGEXEC = 0010 # execute/search by group
	153	TOREAD = 0004 # read by other
	154	TOWRITE = 0002 # write by other
	155	TOEXEC = 0001 # execute/search by other
	156
	157	#---------------------------------------------------------
	158	# initialization
	159	#---------------------------------------------------------
	160	ENCODING = sys.getfilesystemencoding()
	161	if ENCODING is None:
	162	ENCODING = sys.getdefaultencoding()
	163
	164	#---------------------------------------------------------
	165	# Some useful functions
	166	#---------------------------------------------------------
	167
	168	def stn(s, length):
	169	"""Convert a python string to a null-terminated string buffer.
	170	"""
	171	return s[:length] + (length - len(s)) * NUL
	172
	173	def nts(s):
	174	"""Convert a null-terminated string field to a python string.
	175	"""
	176	# Use the string up to the first null char.
	177	p = s.find("\0")
	178	if p == -1:
	179	return s
	180	return s[:p]
	181
	182	def nti(s):
	183	"""Convert a number field to a python number.
	184	"""
	185	# There are two possible encodings for a number field, see
	186	# itn() below.
	187	if s[0] != chr(0200):
	188	try:
	189	n = int(nts(s) or "0", 8)
	190	except ValueError:
[391]	191	raise InvalidHeaderError("invalid header")
[2]	192	else:
	193	n = 0L
	194	for i in xrange(len(s) - 1):
	195	n <<= 8
	196	n += ord(s[i + 1])
	197	return n
	198
	199	def itn(n, digits=8, format=DEFAULT_FORMAT):
	200	"""Convert a python number to a number field.
	201	"""
	202	# POSIX 1003.1-1988 requires numbers to be encoded as a string of
	203	# octal digits followed by a null-byte, this allows values up to
	204	# (8**(digits-1))-1. GNU tar allows storing numbers greater than
	205	# that if necessary. A leading 0200 byte indicates this particular
	206	# encoding, the following digits-1 bytes are a big-endian
	207	# representation. This allows values up to (256**(digits-1))-1.
	208	if 0 <= n < 8 ** (digits - 1):
	209	s = "%0*o" % (digits - 1, n) + NUL
	210	else:
	211	if format != GNU_FORMAT or n >= 256 ** (digits - 1):
	212	raise ValueError("overflow in number field")
	213
	214	if n < 0:
	215	# XXX We mimic GNU tar's behaviour with negative numbers,
	216	# this could raise OverflowError.
	217	n = struct.unpack("L", struct.pack("l", n))[0]
	218
	219	s = ""
	220	for i in xrange(digits - 1):
	221	s = chr(n & 0377) + s
	222	n >>= 8
	223	s = chr(0200) + s
	224	return s
	225
	226	def uts(s, encoding, errors):
	227	"""Convert a unicode object to a string.
	228	"""
	229	if errors == "utf-8":
	230	# An extra error handler similar to the -o invalid=UTF-8 option
	231	# in POSIX.1-2001. Replace untranslatable characters with their
	232	# UTF-8 representation.
	233	try:
	234	return s.encode(encoding, "strict")
	235	except UnicodeEncodeError:
	236	x = []
	237	for c in s:
	238	try:
	239	x.append(c.encode(encoding, "strict"))
	240	except UnicodeEncodeError:
	241	x.append(c.encode("utf8"))
	242	return "".join(x)
	243	else:
	244	return s.encode(encoding, errors)
	245
	246	def calc_chksums(buf):
	247	"""Calculate the checksum for a member's header by summing up all
	248	characters except for the chksum field which is treated as if
	249	it was filled with spaces. According to the GNU tar sources,
	250	some tars (Sun and NeXT) calculate chksum with signed char,
	251	which will be different if there are chars in the buffer with
	252	the high bit set. So we calculate two checksums, unsigned and
	253	signed.
	254	"""
	255	unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
	256	signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
	257	return unsigned_chksum, signed_chksum
	258
	259	def copyfileobj(src, dst, length=None):
	260	"""Copy length bytes from fileobj src to fileobj dst.
	261	If length is None, copy the entire content.
	262	"""
	263	if length == 0:
	264	return
	265	if length is None:
	266	shutil.copyfileobj(src, dst)
	267	return
	268
	269	BUFSIZE = 16 * 1024
	270	blocks, remainder = divmod(length, BUFSIZE)
	271	for b in xrange(blocks):
	272	buf = src.read(BUFSIZE)
	273	if len(buf) < BUFSIZE:
	274	raise IOError("end of file reached")
	275	dst.write(buf)
	276
	277	if remainder != 0:
	278	buf = src.read(remainder)
	279	if len(buf) < remainder:
	280	raise IOError("end of file reached")
	281	dst.write(buf)
	282	return
	283
	284	filemode_table = (
	285	((S_IFLNK, "l"),
	286	(S_IFREG, "-"),
	287	(S_IFBLK, "b"),
	288	(S_IFDIR, "d"),
	289	(S_IFCHR, "c"),
	290	(S_IFIFO, "p")),
	291
	292	((TUREAD, "r"),),
	293	((TUWRITE, "w"),),
	294	((TUEXEC\|TSUID, "s"),
	295	(TSUID, "S"),
	296	(TUEXEC, "x")),
	297
	298	((TGREAD, "r"),),
	299	((TGWRITE, "w"),),
	300	((TGEXEC\|TSGID, "s"),
	301	(TSGID, "S"),
	302	(TGEXEC, "x")),
	303
	304	((TOREAD, "r"),),
	305	((TOWRITE, "w"),),
	306	((TOEXEC\|TSVTX, "t"),
	307	(TSVTX, "T"),
	308	(TOEXEC, "x"))
	309	)
	310
	311	def filemode(mode):
	312	"""Convert a file's mode to a string of the form
	313	-rwxrwxrwx.
	314	Used by TarFile.list()
	315	"""
	316	perm = []
	317	for table in filemode_table:
	318	for bit, char in table:
	319	if mode & bit == bit:
	320	perm.append(char)
	321	break
	322	else:
	323	perm.append("-")
	324	return "".join(perm)
	325
	326	class TarError(Exception):
	327	"""Base exception."""
	328	pass
	329	class ExtractError(TarError):
	330	"""General exception for extract errors."""
	331	pass
	332	class ReadError(TarError):
[391]	333	"""Exception for unreadable tar archives."""
[2]	334	pass
	335	class CompressionError(TarError):
	336	"""Exception for unavailable compression methods."""
	337	pass
	338	class StreamError(TarError):
	339	"""Exception for unsupported operations on stream-like TarFiles."""
	340	pass
	341	class HeaderError(TarError):
[391]	342	"""Base exception for header errors."""
	343	pass
	344	class EmptyHeaderError(HeaderError):
	345	"""Exception for empty headers."""
	346	pass
	347	class TruncatedHeaderError(HeaderError):
	348	"""Exception for truncated headers."""
	349	pass
	350	class EOFHeaderError(HeaderError):
	351	"""Exception for end of file headers."""
	352	pass
	353	class InvalidHeaderError(HeaderError):
[2]	354	"""Exception for invalid headers."""
	355	pass
[391]	356	class SubsequentHeaderError(HeaderError):
	357	"""Exception for missing and invalid extended headers."""
	358	pass
[2]	359
	360	#---------------------------
	361	# internal stream interface
	362	#---------------------------
	363	class _LowLevelFile:
	364	"""Low-level file object. Supports reading and writing.
	365	It is used instead of a regular file object for streaming
	366	access.
	367	"""
	368
	369	def __init__(self, name, mode):
	370	mode = {
	371	"r": os.O_RDONLY,
	372	"w": os.O_WRONLY \| os.O_CREAT \| os.O_TRUNC,
	373	}[mode]
	374	if hasattr(os, "O_BINARY"):
	375	mode \|= os.O_BINARY
[391]	376	self.fd = os.open(name, mode, 0666)
[2]	377
	378	def close(self):
	379	os.close(self.fd)
	380
	381	def read(self, size):
	382	return os.read(self.fd, size)
	383
	384	def write(self, s):
	385	os.write(self.fd, s)
	386
	387	class _Stream:
	388	"""Class that serves as an adapter between TarFile and
	389	a stream-like object. The stream-like object only
	390	needs to have a read() or write() method and is accessed
	391	blockwise. Use of gzip or bzip2 compression is possible.
	392	A stream-like object could be for example: sys.stdin,
	393	sys.stdout, a socket, a tape device etc.
	394
	395	_Stream is intended to be used only internally.
	396	"""
	397
	398	def __init__(self, name, mode, comptype, fileobj, bufsize):
	399	"""Construct a _Stream object.
	400	"""
	401	self._extfileobj = True
	402	if fileobj is None:
	403	fileobj = _LowLevelFile(name, mode)
	404	self._extfileobj = False
	405
	406	if comptype == '*':
	407	# Enable transparent compression detection for the
	408	# stream interface
	409	fileobj = _StreamProxy(fileobj)
	410	comptype = fileobj.getcomptype()
	411
	412	self.name = name or ""
	413	self.mode = mode
	414	self.comptype = comptype
	415	self.fileobj = fileobj
	416	self.bufsize = bufsize
	417	self.buf = ""
	418	self.pos = 0L
	419	self.closed = False
	420
	421	if comptype == "gz":
	422	try:
	423	import zlib
	424	except ImportError:
	425	raise CompressionError("zlib module is not available")
	426	self.zlib = zlib
	427	self.crc = zlib.crc32("") & 0xffffffffL
	428	if mode == "r":
	429	self._init_read_gz()
	430	else:
	431	self._init_write_gz()
	432
	433	if comptype == "bz2":
	434	try:
	435	import bz2
	436	except ImportError:
	437	raise CompressionError("bz2 module is not available")
	438	if mode == "r":
	439	self.dbuf = ""
	440	self.cmp = bz2.BZ2Decompressor()
	441	else:
	442	self.cmp = bz2.BZ2Compressor()
	443
	444	def __del__(self):
	445	if hasattr(self, "closed") and not self.closed:
	446	self.close()
	447
	448	def _init_write_gz(self):
	449	"""Initialize for writing with gzip compression.
	450	"""
	451	self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
	452	-self.zlib.MAX_WBITS,
	453	self.zlib.DEF_MEM_LEVEL,
	454	0)
	455	timestamp = struct.pack("<L", long(time.time()))
	456	self.__write("\037\213\010\010%s\002\377" % timestamp)
[391]	457	if type(self.name) is unicode:
	458	self.name = self.name.encode("iso-8859-1", "replace")
[2]	459	if self.name.endswith(".gz"):
	460	self.name = self.name[:-3]
	461	self.__write(self.name + NUL)
	462
	463	def write(self, s):
	464	"""Write string s to the stream.
	465	"""
	466	if self.comptype == "gz":
	467	self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
	468	self.pos += len(s)
	469	if self.comptype != "tar":
	470	s = self.cmp.compress(s)
	471	self.__write(s)
	472
	473	def __write(self, s):
	474	"""Write string s to the stream if a whole new block
	475	is ready to be written.
	476	"""
	477	self.buf += s
	478	while len(self.buf) > self.bufsize:
	479	self.fileobj.write(self.buf[:self.bufsize])
	480	self.buf = self.buf[self.bufsize:]
	481
	482	def close(self):
	483	"""Close the _Stream object. No operation should be
	484	done on it afterwards.
	485	"""
	486	if self.closed:
	487	return
	488
	489	if self.mode == "w" and self.comptype != "tar":
	490	self.buf += self.cmp.flush()
	491
	492	if self.mode == "w" and self.buf:
	493	self.fileobj.write(self.buf)
	494	self.buf = ""
	495	if self.comptype == "gz":
	496	# The native zlib crc is an unsigned 32-bit integer, but
	497	# the Python wrapper implicitly casts that to a signed C
	498	# long. So, on a 32-bit box self.crc may "look negative",
	499	# while the same crc on a 64-bit box may "look positive".
	500	# To avoid irksome warnings from the `struct` module, force
	501	# it to look positive on all boxes.
	502	self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
	503	self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
	504
	505	if not self._extfileobj:
	506	self.fileobj.close()
	507
	508	self.closed = True
	509
	510	def _init_read_gz(self):
	511	"""Initialize for reading a gzip compressed fileobj.
	512	"""
	513	self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
	514	self.dbuf = ""
	515
	516	# taken from gzip.GzipFile with some alterations
	517	if self.__read(2) != "\037\213":
	518	raise ReadError("not a gzip file")
	519	if self.__read(1) != "\010":
	520	raise CompressionError("unsupported compression method")
	521
	522	flag = ord(self.__read(1))
	523	self.__read(6)
	524
	525	if flag & 4:
	526	xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
	527	self.read(xlen)
	528	if flag & 8:
	529	while True:
	530	s = self.__read(1)
	531	if not s or s == NUL:
	532	break
	533	if flag & 16:
	534	while True:
	535	s = self.__read(1)
	536	if not s or s == NUL:
	537	break
	538	if flag & 2:
	539	self.__read(2)
	540
	541	def tell(self):
	542	"""Return the stream's file pointer position.
	543	"""
	544	return self.pos
	545
	546	def seek(self, pos=0):
	547	"""Set the stream's file pointer to pos. Negative seeking
	548	is forbidden.
	549	"""
	550	if pos - self.pos >= 0:
	551	blocks, remainder = divmod(pos - self.pos, self.bufsize)
	552	for i in xrange(blocks):
	553	self.read(self.bufsize)
	554	self.read(remainder)
	555	else:
	556	raise StreamError("seeking backwards is not allowed")
	557	return self.pos
	558
	559	def read(self, size=None):
	560	"""Return the next size number of bytes from the stream.
	561	If size is not defined, return all bytes of the stream
	562	up to EOF.
	563	"""
	564	if size is None:
	565	t = []
	566	while True:
	567	buf = self._read(self.bufsize)
	568	if not buf:
	569	break
	570	t.append(buf)
	571	buf = "".join(t)
	572	else:
	573	buf = self._read(size)
	574	self.pos += len(buf)
	575	return buf
	576
	577	def _read(self, size):
	578	"""Return size bytes from the stream.
	579	"""
	580	if self.comptype == "tar":
	581	return self.__read(size)
	582
	583	c = len(self.dbuf)
	584	t = [self.dbuf]
	585	while c < size:
	586	buf = self.__read(self.bufsize)
	587	if not buf:
	588	break
	589	try:
	590	buf = self.cmp.decompress(buf)
	591	except IOError:
	592	raise ReadError("invalid compressed data")
	593	t.append(buf)
	594	c += len(buf)
	595	t = "".join(t)
	596	self.dbuf = t[size:]
	597	return t[:size]
	598
	599	def __read(self, size):
	600	"""Return size bytes from stream. If internal buffer is empty,
	601	read another block from the stream.
	602	"""
	603	c = len(self.buf)
	604	t = [self.buf]
	605	while c < size:
	606	buf = self.fileobj.read(self.bufsize)
	607	if not buf:
	608	break
	609	t.append(buf)
	610	c += len(buf)
	611	t = "".join(t)
	612	self.buf = t[size:]
	613	return t[:size]
	614	# class _Stream
	615
	616	class _StreamProxy(object):
	617	"""Small proxy class that enables transparent compression
	618	detection for the Stream interface (mode 'r\|*').
	619	"""
	620
	621	def __init__(self, fileobj):
	622	self.fileobj = fileobj
	623	self.buf = self.fileobj.read(BLOCKSIZE)
	624
	625	def read(self, size):
	626	self.read = self.fileobj.read
	627	return self.buf
	628
	629	def getcomptype(self):
	630	if self.buf.startswith("\037\213\010"):
	631	return "gz"
[391]	632	if self.buf[0:3] == "BZh" and self.buf[4:10] == "1AY&SY":
[2]	633	return "bz2"
	634	return "tar"
	635
	636	def close(self):
	637	self.fileobj.close()
	638	# class StreamProxy
	639
	640	class _BZ2Proxy(object):
	641	"""Small proxy class that enables external file object
	642	support for "r:bz2" and "w:bz2" modes. This is actually
	643	a workaround for a limitation in bz2 module's BZ2File
	644	class which (unlike gzip.GzipFile) has no support for
	645	a file object argument.
	646	"""
	647
	648	blocksize = 16 * 1024
	649
	650	def __init__(self, fileobj, mode):
	651	self.fileobj = fileobj
	652	self.mode = mode
	653	self.name = getattr(self.fileobj, "name", None)
	654	self.init()
	655
	656	def init(self):
	657	import bz2
	658	self.pos = 0
	659	if self.mode == "r":
	660	self.bz2obj = bz2.BZ2Decompressor()
	661	self.fileobj.seek(0)
	662	self.buf = ""
	663	else:
	664	self.bz2obj = bz2.BZ2Compressor()
	665
	666	def read(self, size):
	667	b = [self.buf]
	668	x = len(self.buf)
	669	while x < size:
	670	raw = self.fileobj.read(self.blocksize)
	671	if not raw:
	672	break
[391]	673	data = self.bz2obj.decompress(raw)
[2]	674	b.append(data)
	675	x += len(data)
	676	self.buf = "".join(b)
	677
	678	buf = self.buf[:size]
	679	self.buf = self.buf[size:]
	680	self.pos += len(buf)
	681	return buf
	682
	683	def seek(self, pos):
	684	if pos < self.pos:
	685	self.init()
	686	self.read(pos - self.pos)
	687
	688	def tell(self):
	689	return self.pos
	690
	691	def write(self, data):
	692	self.pos += len(data)
	693	raw = self.bz2obj.compress(data)
	694	self.fileobj.write(raw)
	695
	696	def close(self):
	697	if self.mode == "w":
	698	raw = self.bz2obj.flush()
	699	self.fileobj.write(raw)
	700	# class _BZ2Proxy
	701
	702	#------------------------
	703	# Extraction file object
	704	#------------------------
	705	class _FileInFile(object):
	706	"""A thin wrapper around an existing file object that
	707	provides a part of its data as an individual file
	708	object.
	709	"""
	710
	711	def __init__(self, fileobj, offset, size, sparse=None):
	712	self.fileobj = fileobj
	713	self.offset = offset
	714	self.size = size
	715	self.sparse = sparse
	716	self.position = 0
	717
	718	def tell(self):
	719	"""Return the current file position.
	720	"""
	721	return self.position
	722
	723	def seek(self, position):
	724	"""Seek to a position in the file.
	725	"""
	726	self.position = position
	727
	728	def read(self, size=None):
	729	"""Read data from the file.
	730	"""
	731	if size is None:
	732	size = self.size - self.position
	733	else:
	734	size = min(size, self.size - self.position)
	735
	736	if self.sparse is None:
	737	return self.readnormal(size)
	738	else:
	739	return self.readsparse(size)
	740
	741	def readnormal(self, size):
	742	"""Read operation for regular files.
	743	"""
	744	self.fileobj.seek(self.offset + self.position)
	745	self.position += size
	746	return self.fileobj.read(size)
	747
	748	def readsparse(self, size):
	749	"""Read operation for sparse files.
	750	"""
	751	data = []
	752	while size > 0:
	753	buf = self.readsparsesection(size)
	754	if not buf:
	755	break
	756	size -= len(buf)
	757	data.append(buf)
	758	return "".join(data)
	759
	760	def readsparsesection(self, size):
	761	"""Read a single section of a sparse file.
	762	"""
	763	section = self.sparse.find(self.position)
	764
	765	if section is None:
	766	return ""
	767
	768	size = min(size, section.offset + section.size - self.position)
	769
	770	if isinstance(section, _data):
	771	realpos = section.realpos + self.position - section.offset
	772	self.fileobj.seek(self.offset + realpos)
	773	self.position += size
	774	return self.fileobj.read(size)
	775	else:
	776	self.position += size
	777	return NUL * size
	778	#class _FileInFile
	779
	780
	781	class ExFileObject(object):
	782	"""File-like object for reading an archive member.
	783	Is returned by TarFile.extractfile().
	784	"""
	785	blocksize = 1024
	786
	787	def __init__(self, tarfile, tarinfo):
	788	self.fileobj = _FileInFile(tarfile.fileobj,
	789	tarinfo.offset_data,
	790	tarinfo.size,
	791	getattr(tarinfo, "sparse", None))
	792	self.name = tarinfo.name
	793	self.mode = "r"
	794	self.closed = False
	795	self.size = tarinfo.size
	796
	797	self.position = 0
	798	self.buffer = ""
	799
	800	def read(self, size=None):
	801	"""Read at most size bytes from the file. If size is not
	802	present or None, read all data until EOF is reached.
	803	"""
	804	if self.closed:
	805	raise ValueError("I/O operation on closed file")
	806
	807	buf = ""
	808	if self.buffer:
	809	if size is None:
	810	buf = self.buffer
	811	self.buffer = ""
	812	else:
	813	buf = self.buffer[:size]
	814	self.buffer = self.buffer[size:]
	815
	816	if size is None:
	817	buf += self.fileobj.read()
	818	else:
	819	buf += self.fileobj.read(size - len(buf))
	820
	821	self.position += len(buf)
	822	return buf
	823
	824	def readline(self, size=-1):
	825	"""Read one entire line from the file. If size is present
	826	and non-negative, return a string with at most that
	827	size, which may be an incomplete line.
	828	"""
	829	if self.closed:
	830	raise ValueError("I/O operation on closed file")
	831
	832	if "\n" in self.buffer:
	833	pos = self.buffer.find("\n") + 1
	834	else:
	835	buffers = [self.buffer]
	836	while True:
	837	buf = self.fileobj.read(self.blocksize)
	838	buffers.append(buf)
	839	if not buf or "\n" in buf:
	840	self.buffer = "".join(buffers)
	841	pos = self.buffer.find("\n") + 1
	842	if pos == 0:
	843	# no newline found.
	844	pos = len(self.buffer)
	845	break
	846
	847	if size != -1:
	848	pos = min(size, pos)
	849
	850	buf = self.buffer[:pos]
	851	self.buffer = self.buffer[pos:]
	852	self.position += len(buf)
	853	return buf
	854
	855	def readlines(self):
	856	"""Return a list with all remaining lines.
	857	"""
	858	result = []
	859	while True:
	860	line = self.readline()
	861	if not line: break
	862	result.append(line)
	863	return result
	864
	865	def tell(self):
	866	"""Return the current file position.
	867	"""
	868	if self.closed:
	869	raise ValueError("I/O operation on closed file")
	870
	871	return self.position
	872
	873	def seek(self, pos, whence=os.SEEK_SET):
	874	"""Seek to a position in the file.
	875	"""
	876	if self.closed:
	877	raise ValueError("I/O operation on closed file")
	878
	879	if whence == os.SEEK_SET:
	880	self.position = min(max(pos, 0), self.size)
	881	elif whence == os.SEEK_CUR:
	882	if pos < 0:
	883	self.position = max(self.position + pos, 0)
	884	else:
	885	self.position = min(self.position + pos, self.size)
	886	elif whence == os.SEEK_END:
	887	self.position = max(min(self.size + pos, self.size), 0)
	888	else:
	889	raise ValueError("Invalid argument")
	890
	891	self.buffer = ""
	892	self.fileobj.seek(self.position)
	893
	894	def close(self):
	895	"""Close the file object.
	896	"""
	897	self.closed = True
	898
	899	def __iter__(self):
	900	"""Get an iterator over the file's lines.
	901	"""
	902	while True:
	903	line = self.readline()
	904	if not line:
	905	break
	906	yield line
	907	#class ExFileObject
	908
	909	#------------------
	910	# Exported Classes
	911	#------------------
	912	class TarInfo(object):
	913	"""Informational class which holds the details about an
	914	archive member given by a tar header block.
	915	TarInfo objects are returned by TarFile.getmember(),
	916	TarFile.getmembers() and TarFile.gettarinfo() and are
	917	usually created internally.
	918	"""
	919
	920	def __init__(self, name=""):
	921	"""Construct a TarInfo object. name is the optional name
	922	of the member.
	923	"""
	924	self.name = name # member name
	925	self.mode = 0644 # file permissions
	926	self.uid = 0 # user id
	927	self.gid = 0 # group id
	928	self.size = 0 # file size
	929	self.mtime = 0 # modification time
	930	self.chksum = 0 # header checksum
	931	self.type = REGTYPE # member type
	932	self.linkname = "" # link name
[391]	933	self.uname = "" # user name
	934	self.gname = "" # group name
[2]	935	self.devmajor = 0 # device major number
	936	self.devminor = 0 # device minor number
	937
	938	self.offset = 0 # the tar header starts here
	939	self.offset_data = 0 # the file's data starts here
	940
	941	self.pax_headers = {} # pax header information
	942
	943	# In pax headers the "name" and "linkname" field are called
	944	# "path" and "linkpath".
	945	def _getpath(self):
	946	return self.name
	947	def _setpath(self, name):
	948	self.name = name
	949	path = property(_getpath, _setpath)
	950
	951	def _getlinkpath(self):
	952	return self.linkname
	953	def _setlinkpath(self, linkname):
	954	self.linkname = linkname
	955	linkpath = property(_getlinkpath, _setlinkpath)
	956
	957	def __repr__(self):
	958	return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
	959
	960	def get_info(self, encoding, errors):
	961	"""Return the TarInfo's attributes as a dictionary.
	962	"""
	963	info = {
[391]	964	"name": self.name,
[2]	965	"mode": self.mode & 07777,
	966	"uid": self.uid,
	967	"gid": self.gid,
	968	"size": self.size,
	969	"mtime": self.mtime,
	970	"chksum": self.chksum,
	971	"type": self.type,
[391]	972	"linkname": self.linkname,
[2]	973	"uname": self.uname,
	974	"gname": self.gname,
	975	"devmajor": self.devmajor,
	976	"devminor": self.devminor
	977	}
	978
	979	if info["type"] == DIRTYPE and not info["name"].endswith("/"):
	980	info["name"] += "/"
	981
	982	for key in ("name", "linkname", "uname", "gname"):
	983	if type(info[key]) is unicode:
	984	info[key] = info[key].encode(encoding, errors)
	985
	986	return info
	987
	988	def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
	989	"""Return a tar header as a string of 512 byte blocks.
	990	"""
	991	info = self.get_info(encoding, errors)
	992
	993	if format == USTAR_FORMAT:
	994	return self.create_ustar_header(info)
	995	elif format == GNU_FORMAT:
	996	return self.create_gnu_header(info)
	997	elif format == PAX_FORMAT:
	998	return self.create_pax_header(info, encoding, errors)
	999	else:
	1000	raise ValueError("invalid format")
	1001
	1002	def create_ustar_header(self, info):
	1003	"""Return the object as a ustar header block.
	1004	"""
	1005	info["magic"] = POSIX_MAGIC
	1006
	1007	if len(info["linkname"]) > LENGTH_LINK:
	1008	raise ValueError("linkname is too long")
	1009
	1010	if len(info["name"]) > LENGTH_NAME:
	1011	info["prefix"], info["name"] = self._posix_split_name(info["name"])
	1012
	1013	return self._create_header(info, USTAR_FORMAT)
	1014
	1015	def create_gnu_header(self, info):
	1016	"""Return the object as a GNU header block sequence.
	1017	"""
	1018	info["magic"] = GNU_MAGIC
	1019
	1020	buf = ""
	1021	if len(info["linkname"]) > LENGTH_LINK:
	1022	buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
	1023
	1024	if len(info["name"]) > LENGTH_NAME:
	1025	buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
	1026
	1027	return buf + self._create_header(info, GNU_FORMAT)
	1028
	1029	def create_pax_header(self, info, encoding, errors):
	1030	"""Return the object as a ustar header block. If it cannot be
	1031	represented this way, prepend a pax extended header sequence
	1032	with supplement information.
	1033	"""
	1034	info["magic"] = POSIX_MAGIC
	1035	pax_headers = self.pax_headers.copy()
	1036
	1037	# Test string fields for values that exceed the field length or cannot
	1038	# be represented in ASCII encoding.
	1039	for name, hname, length in (
	1040	("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
	1041	("uname", "uname", 32), ("gname", "gname", 32)):
	1042
	1043	if hname in pax_headers:
	1044	# The pax header has priority.
	1045	continue
	1046
	1047	val = info[name].decode(encoding, errors)
	1048
	1049	# Try to encode the string as ASCII.
	1050	try:
	1051	val.encode("ascii")
	1052	except UnicodeEncodeError:
	1053	pax_headers[hname] = val
	1054	continue
	1055
	1056	if len(info[name]) > length:
	1057	pax_headers[hname] = val
	1058
	1059	# Test number fields for values that exceed the field limit or values
	1060	# that like to be stored as float.
	1061	for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
	1062	if name in pax_headers:
	1063	# The pax header has priority. Avoid overflow.
	1064	info[name] = 0
	1065	continue
	1066
	1067	val = info[name]
	1068	if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
	1069	pax_headers[name] = unicode(val)
	1070	info[name] = 0
	1071
	1072	# Create a pax extended header if necessary.
	1073	if pax_headers:
	1074	buf = self._create_pax_generic_header(pax_headers)
	1075	else:
	1076	buf = ""
	1077
	1078	return buf + self._create_header(info, USTAR_FORMAT)
	1079
	1080	@classmethod
	1081	def create_pax_global_header(cls, pax_headers):
	1082	"""Return the object as a pax global header block sequence.
	1083	"""
	1084	return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
	1085
	1086	def _posix_split_name(self, name):
	1087	"""Split a name longer than 100 chars into a prefix
	1088	and a name part.
	1089	"""
	1090	prefix = name[:LENGTH_PREFIX + 1]
	1091	while prefix and prefix[-1] != "/":
	1092	prefix = prefix[:-1]
	1093
	1094	name = name[len(prefix):]
	1095	prefix = prefix[:-1]
	1096
	1097	if not prefix or len(name) > LENGTH_NAME:
	1098	raise ValueError("name is too long")
	1099	return prefix, name
	1100
	1101	@staticmethod
	1102	def _create_header(info, format):
	1103	"""Return a header block. info is a dictionary with file
	1104	information, format must be one of the *_FORMAT constants.
	1105	"""
	1106	parts = [
	1107	stn(info.get("name", ""), 100),
	1108	itn(info.get("mode", 0) & 07777, 8, format),
	1109	itn(info.get("uid", 0), 8, format),
	1110	itn(info.get("gid", 0), 8, format),
	1111	itn(info.get("size", 0), 12, format),
	1112	itn(info.get("mtime", 0), 12, format),
	1113	" ", # checksum field
	1114	info.get("type", REGTYPE),
	1115	stn(info.get("linkname", ""), 100),
	1116	stn(info.get("magic", POSIX_MAGIC), 8),
[391]	1117	stn(info.get("uname", ""), 32),
	1118	stn(info.get("gname", ""), 32),
[2]	1119	itn(info.get("devmajor", 0), 8, format),
	1120	itn(info.get("devminor", 0), 8, format),
	1121	stn(info.get("prefix", ""), 155)
	1122	]
	1123
	1124	buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
	1125	chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
	1126	buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
	1127	return buf
	1128
	1129	@staticmethod
	1130	def _create_payload(payload):
	1131	"""Return the string payload filled with zero bytes
	1132	up to the next 512 byte border.
	1133	"""
	1134	blocks, remainder = divmod(len(payload), BLOCKSIZE)
	1135	if remainder > 0:
	1136	payload += (BLOCKSIZE - remainder) * NUL
	1137	return payload
	1138
	1139	@classmethod
	1140	def _create_gnu_long_header(cls, name, type):
	1141	"""Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
	1142	for name.
	1143	"""
	1144	name += NUL
	1145
	1146	info = {}
	1147	info["name"] = "././@LongLink"
	1148	info["type"] = type
	1149	info["size"] = len(name)
	1150	info["magic"] = GNU_MAGIC
	1151
	1152	# create extended header + name blocks.
	1153	return cls._create_header(info, USTAR_FORMAT) + \
	1154	cls._create_payload(name)
	1155
	1156	@classmethod
	1157	def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
	1158	"""Return a POSIX.1-2001 extended or global header sequence
	1159	that contains a list of keyword, value pairs. The values
	1160	must be unicode objects.
	1161	"""
	1162	records = []
	1163	for keyword, value in pax_headers.iteritems():
	1164	keyword = keyword.encode("utf8")
	1165	value = value.encode("utf8")
	1166	l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
	1167	n = p = 0
	1168	while True:
	1169	n = l + len(str(p))
	1170	if n == p:
	1171	break
	1172	p = n
	1173	records.append("%d %s=%s\n" % (p, keyword, value))
	1174	records = "".join(records)
	1175
	1176	# We use a hardcoded "././@PaxHeader" name like star does
	1177	# instead of the one that POSIX recommends.
	1178	info = {}
	1179	info["name"] = "././@PaxHeader"
	1180	info["type"] = type
	1181	info["size"] = len(records)
	1182	info["magic"] = POSIX_MAGIC
	1183
	1184	# Create pax header + record blocks.
	1185	return cls._create_header(info, USTAR_FORMAT) + \
	1186	cls._create_payload(records)
	1187
	1188	@classmethod
	1189	def frombuf(cls, buf):
	1190	"""Construct a TarInfo object from a 512 byte string buffer.
	1191	"""
[391]	1192	if len(buf) == 0:
	1193	raise EmptyHeaderError("empty header")
[2]	1194	if len(buf) != BLOCKSIZE:
[391]	1195	raise TruncatedHeaderError("truncated header")
[2]	1196	if buf.count(NUL) == BLOCKSIZE:
[391]	1197	raise EOFHeaderError("end of file header")
[2]	1198
	1199	chksum = nti(buf[148:156])
	1200	if chksum not in calc_chksums(buf):
[391]	1201	raise InvalidHeaderError("bad checksum")
[2]	1202
	1203	obj = cls()
	1204	obj.buf = buf
	1205	obj.name = nts(buf[0:100])
	1206	obj.mode = nti(buf[100:108])
	1207	obj.uid = nti(buf[108:116])
	1208	obj.gid = nti(buf[116:124])
	1209	obj.size = nti(buf[124:136])
	1210	obj.mtime = nti(buf[136:148])
	1211	obj.chksum = chksum
	1212	obj.type = buf[156:157]
	1213	obj.linkname = nts(buf[157:257])
	1214	obj.uname = nts(buf[265:297])
	1215	obj.gname = nts(buf[297:329])
	1216	obj.devmajor = nti(buf[329:337])
	1217	obj.devminor = nti(buf[337:345])
	1218	prefix = nts(buf[345:500])
	1219
	1220	# Old V7 tar format represents a directory as a regular
	1221	# file with a trailing slash.
	1222	if obj.type == AREGTYPE and obj.name.endswith("/"):
	1223	obj.type = DIRTYPE
	1224
	1225	# Remove redundant slashes from directories.
	1226	if obj.isdir():
	1227	obj.name = obj.name.rstrip("/")
	1228
	1229	# Reconstruct a ustar longname.
	1230	if prefix and obj.type not in GNU_TYPES:
	1231	obj.name = prefix + "/" + obj.name
	1232	return obj
	1233
	1234	@classmethod
	1235	def fromtarfile(cls, tarfile):
	1236	"""Return the next TarInfo object from TarFile object
	1237	tarfile.
	1238	"""
	1239	buf = tarfile.fileobj.read(BLOCKSIZE)
	1240	obj = cls.frombuf(buf)
	1241	obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
	1242	return obj._proc_member(tarfile)
	1243
	1244	#--------------------------------------------------------------------------
	1245	# The following are methods that are called depending on the type of a
	1246	# member. The entry point is _proc_member() which can be overridden in a
	1247	# subclass to add custom _proc_() methods. A _proc_() method MUST
	1248	# implement the following
	1249	# operations:
	1250	# 1. Set self.offset_data to the position where the data blocks begin,
	1251	# if there is data that follows.
	1252	# 2. Set tarfile.offset to the position where the next member's header will
	1253	# begin.
	1254	# 3. Return self or another valid TarInfo object.
	1255	def _proc_member(self, tarfile):
	1256	"""Choose the right processing method depending on
	1257	the type and call it.
	1258	"""
	1259	if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
	1260	return self._proc_gnulong(tarfile)
	1261	elif self.type == GNUTYPE_SPARSE:
	1262	return self._proc_sparse(tarfile)
	1263	elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
	1264	return self._proc_pax(tarfile)
	1265	else:
	1266	return self._proc_builtin(tarfile)
	1267
	1268	def _proc_builtin(self, tarfile):
	1269	"""Process a builtin type or an unknown type which
	1270	will be treated as a regular file.
	1271	"""
	1272	self.offset_data = tarfile.fileobj.tell()
	1273	offset = self.offset_data
	1274	if self.isreg() or self.type not in SUPPORTED_TYPES:
	1275	# Skip the following data blocks.
	1276	offset += self._block(self.size)
	1277	tarfile.offset = offset
	1278
	1279	# Patch the TarInfo object with saved global
	1280	# header information.
	1281	self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
	1282
	1283	return self
	1284
	1285	def _proc_gnulong(self, tarfile):
	1286	"""Process the blocks that hold a GNU longname
	1287	or longlink member.
	1288	"""
	1289	buf = tarfile.fileobj.read(self._block(self.size))
	1290
	1291	# Fetch the next header and process it.
[391]	1292	try:
	1293	next = self.fromtarfile(tarfile)
	1294	except HeaderError:
	1295	raise SubsequentHeaderError("missing or bad subsequent header")
[2]	1296
	1297	# Patch the TarInfo object from the next header with
	1298	# the longname information.
	1299	next.offset = self.offset
	1300	if self.type == GNUTYPE_LONGNAME:
	1301	next.name = nts(buf)
	1302	elif self.type == GNUTYPE_LONGLINK:
	1303	next.linkname = nts(buf)
	1304
	1305	return next
	1306
	1307	def _proc_sparse(self, tarfile):
	1308	"""Process a GNU sparse header plus extra headers.
	1309	"""
	1310	buf = self.buf
	1311	sp = _ringbuffer()
	1312	pos = 386
	1313	lastpos = 0L
	1314	realpos = 0L
	1315	# There are 4 possible sparse structs in the
	1316	# first header.
	1317	for i in xrange(4):
	1318	try:
	1319	offset = nti(buf[pos:pos + 12])
	1320	numbytes = nti(buf[pos + 12:pos + 24])
	1321	except ValueError:
	1322	break
	1323	if offset > lastpos:
	1324	sp.append(_hole(lastpos, offset - lastpos))
	1325	sp.append(_data(offset, numbytes, realpos))
	1326	realpos += numbytes
	1327	lastpos = offset + numbytes
	1328	pos += 24
	1329
	1330	isextended = ord(buf[482])
	1331	origsize = nti(buf[483:495])
	1332
	1333	# If the isextended flag is given,
	1334	# there are extra headers to process.
	1335	while isextended == 1:
	1336	buf = tarfile.fileobj.read(BLOCKSIZE)
	1337	pos = 0
	1338	for i in xrange(21):
	1339	try:
	1340	offset = nti(buf[pos:pos + 12])
	1341	numbytes = nti(buf[pos + 12:pos + 24])
	1342	except ValueError:
	1343	break
	1344	if offset > lastpos:
	1345	sp.append(_hole(lastpos, offset - lastpos))
	1346	sp.append(_data(offset, numbytes, realpos))
	1347	realpos += numbytes
	1348	lastpos = offset + numbytes
	1349	pos += 24
	1350	isextended = ord(buf[504])
	1351
	1352	if lastpos < origsize:
	1353	sp.append(_hole(lastpos, origsize - lastpos))
	1354
	1355	self.sparse = sp
	1356
	1357	self.offset_data = tarfile.fileobj.tell()
	1358	tarfile.offset = self.offset_data + self._block(self.size)
	1359	self.size = origsize
	1360
	1361	return self
	1362
	1363	def _proc_pax(self, tarfile):
	1364	"""Process an extended or global header as described in
	1365	POSIX.1-2001.
	1366	"""
	1367	# Read the header information.
	1368	buf = tarfile.fileobj.read(self._block(self.size))
	1369
	1370	# A pax header stores supplemental information for either
	1371	# the following file (extended) or all following files
	1372	# (global).
	1373	if self.type == XGLTYPE:
	1374	pax_headers = tarfile.pax_headers
	1375	else:
	1376	pax_headers = tarfile.pax_headers.copy()
	1377
	1378	# Parse pax header information. A record looks like that:
	1379	# "%d %s=%s\n" % (length, keyword, value). length is the size
	1380	# of the complete record including the length field itself and
	1381	# the newline. keyword and value are both UTF-8 encoded strings.
	1382	regex = re.compile(r"(\d+) ([^=]+)=", re.U)
	1383	pos = 0
	1384	while True:
	1385	match = regex.match(buf, pos)
	1386	if not match:
	1387	break
	1388
	1389	length, keyword = match.groups()
	1390	length = int(length)
	1391	value = buf[match.end(2) + 1:match.start(1) + length - 1]
	1392
	1393	keyword = keyword.decode("utf8")
	1394	value = value.decode("utf8")
	1395
	1396	pax_headers[keyword] = value
	1397	pos += length
	1398
	1399	# Fetch the next header.
[391]	1400	try:
	1401	next = self.fromtarfile(tarfile)
	1402	except HeaderError:
	1403	raise SubsequentHeaderError("missing or bad subsequent header")
[2]	1404
	1405	if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
	1406	# Patch the TarInfo object with the extended header info.
	1407	next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
	1408	next.offset = self.offset
	1409
	1410	if "size" in pax_headers:
	1411	# If the extended header replaces the size field,
	1412	# we need to recalculate the offset where the next
	1413	# header starts.
	1414	offset = next.offset_data
	1415	if next.isreg() or next.type not in SUPPORTED_TYPES:
	1416	offset += next._block(next.size)
	1417	tarfile.offset = offset
	1418
	1419	return next
	1420
	1421	def _apply_pax_info(self, pax_headers, encoding, errors):
	1422	"""Replace fields with supplemental information from a previous
	1423	pax extended or global header.
	1424	"""
	1425	for keyword, value in pax_headers.iteritems():
	1426	if keyword not in PAX_FIELDS:
	1427	continue
	1428
	1429	if keyword == "path":
	1430	value = value.rstrip("/")
	1431
	1432	if keyword in PAX_NUMBER_FIELDS:
	1433	try:
	1434	value = PAX_NUMBER_FIELDS[keyword](value)
	1435	except ValueError:
	1436	value = 0
	1437	else:
	1438	value = uts(value, encoding, errors)
	1439
	1440	setattr(self, keyword, value)
	1441
	1442	self.pax_headers = pax_headers.copy()
	1443
	1444	def _block(self, count):
	1445	"""Round up a byte count by BLOCKSIZE and return it,
	1446	e.g. _block(834) => 1024.
	1447	"""
	1448	blocks, remainder = divmod(count, BLOCKSIZE)
	1449	if remainder:
	1450	blocks += 1
	1451	return blocks * BLOCKSIZE
	1452
	1453	def isreg(self):
	1454	return self.type in REGULAR_TYPES
	1455	def isfile(self):
	1456	return self.isreg()
	1457	def isdir(self):
	1458	return self.type == DIRTYPE
	1459	def issym(self):
	1460	return self.type == SYMTYPE
	1461	def islnk(self):
	1462	return self.type == LNKTYPE
	1463	def ischr(self):
	1464	return self.type == CHRTYPE
	1465	def isblk(self):
	1466	return self.type == BLKTYPE
	1467	def isfifo(self):
	1468	return self.type == FIFOTYPE
	1469	def issparse(self):
	1470	return self.type == GNUTYPE_SPARSE
	1471	def isdev(self):
	1472	return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
	1473	# class TarInfo
	1474
	1475	class TarFile(object):
	1476	"""The TarFile Class provides an interface to tar archives.
	1477	"""
	1478
	1479	debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
	1480
	1481	dereference = False # If true, add content of linked file to the
	1482	# tar file, else the link.
	1483
	1484	ignore_zeros = False # If true, skips empty or invalid blocks and
	1485	# continues processing.
	1486
[391]	1487	errorlevel = 1 # If 0, fatal errors only appear in debug
[2]	1488	# messages (if debug >= 0). If > 0, errors
	1489	# are passed to the caller as exceptions.
	1490
	1491	format = DEFAULT_FORMAT # The format to use when creating an archive.
	1492
	1493	encoding = ENCODING # Encoding for 8-bit character strings.
	1494
	1495	errors = None # Error handler for unicode conversion.
	1496
	1497	tarinfo = TarInfo # The default TarInfo class to use.
	1498
	1499	fileobject = ExFileObject # The default ExFileObject class to use.
	1500
	1501	def __init__(self, name=None, mode="r", fileobj=None, format=None,
	1502	tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
	1503	errors=None, pax_headers=None, debug=None, errorlevel=None):
	1504	"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
	1505	read from an existing archive, 'a' to append data to an existing
	1506	file or 'w' to create a new file overwriting an existing one. `mode'
	1507	defaults to 'r'.
	1508	If `fileobj' is given, it is used for reading or writing data. If it
	1509	can be determined, `mode' is overridden by `fileobj's mode.
	1510	`fileobj' is not closed, when TarFile is closed.
	1511	"""
	1512	if len(mode) > 1 or mode not in "raw":
	1513	raise ValueError("mode must be 'r', 'a' or 'w'")
	1514	self.mode = mode
	1515	self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
	1516
	1517	if not fileobj:
	1518	if self.mode == "a" and not os.path.exists(name):
	1519	# Create nonexistent files in append mode.
	1520	self.mode = "w"
	1521	self._mode = "wb"
	1522	fileobj = bltn_open(name, self._mode)
	1523	self._extfileobj = False
	1524	else:
	1525	if name is None and hasattr(fileobj, "name"):
	1526	name = fileobj.name
	1527	if hasattr(fileobj, "mode"):
	1528	self._mode = fileobj.mode
	1529	self._extfileobj = True
	1530	self.name = os.path.abspath(name) if name else None
	1531	self.fileobj = fileobj
	1532
	1533	# Init attributes.
	1534	if format is not None:
	1535	self.format = format
	1536	if tarinfo is not None:
	1537	self.tarinfo = tarinfo
	1538	if dereference is not None:
	1539	self.dereference = dereference
	1540	if ignore_zeros is not None:
	1541	self.ignore_zeros = ignore_zeros
	1542	if encoding is not None:
	1543	self.encoding = encoding
	1544
	1545	if errors is not None:
	1546	self.errors = errors
	1547	elif mode == "r":
	1548	self.errors = "utf-8"
	1549	else:
	1550	self.errors = "strict"
	1551
	1552	if pax_headers is not None and self.format == PAX_FORMAT:
	1553	self.pax_headers = pax_headers
	1554	else:
	1555	self.pax_headers = {}
	1556
	1557	if debug is not None:
	1558	self.debug = debug
	1559	if errorlevel is not None:
	1560	self.errorlevel = errorlevel
	1561
	1562	# Init datastructures.
	1563	self.closed = False
	1564	self.members = [] # list of members as TarInfo objects
	1565	self._loaded = False # flag if all members have been read
	1566	self.offset = self.fileobj.tell()
	1567	# current position in the archive file
	1568	self.inodes = {} # dictionary caching the inodes of
	1569	# archive members already added
	1570
	1571	try:
	1572	if self.mode == "r":
	1573	self.firstmember = None
	1574	self.firstmember = self.next()
	1575
	1576	if self.mode == "a":
	1577	# Move to the end of the archive,
	1578	# before the first empty block.
	1579	while True:
[391]	1580	self.fileobj.seek(self.offset)
	1581	try:
	1582	tarinfo = self.tarinfo.fromtarfile(self)
	1583	self.members.append(tarinfo)
	1584	except EOFHeaderError:
	1585	self.fileobj.seek(self.offset)
[2]	1586	break
[391]	1587	except HeaderError, e:
	1588	raise ReadError(str(e))
[2]	1589
	1590	if self.mode in "aw":
	1591	self._loaded = True
	1592
	1593	if self.pax_headers:
	1594	buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
	1595	self.fileobj.write(buf)
	1596	self.offset += len(buf)
	1597	except:
	1598	if not self._extfileobj:
	1599	self.fileobj.close()
	1600	self.closed = True
	1601	raise
	1602
	1603	def _getposix(self):
	1604	return self.format == USTAR_FORMAT
	1605	def _setposix(self, value):
	1606	import warnings
	1607	warnings.warn("use the format attribute instead", DeprecationWarning,
	1608	2)
	1609	if value:
	1610	self.format = USTAR_FORMAT
	1611	else:
	1612	self.format = GNU_FORMAT
	1613	posix = property(_getposix, _setposix)
	1614
	1615	#--------------------------------------------------------------------------
	1616	# Below are the classmethods which act as alternate constructors to the
	1617	# TarFile class. The open() method is the only one that is needed for
	1618	# public use; it is the "super"-constructor and is able to select an
	1619	# adequate "sub"-constructor for a particular compression using the mapping
	1620	# from OPEN_METH.
	1621	#
	1622	# This concept allows one to subclass TarFile without losing the comfort of
	1623	# the super-constructor. A sub-constructor is registered and made available
	1624	# by adding it to the mapping in OPEN_METH.
	1625
	1626	@classmethod
	1627	def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
	1628	"""Open a tar archive for reading, writing or appending. Return
	1629	an appropriate TarFile class.
	1630
	1631	mode:
	1632	'r' or 'r:*' open for reading with transparent compression
	1633	'r:' open for reading exclusively uncompressed
	1634	'r:gz' open for reading with gzip compression
	1635	'r:bz2' open for reading with bzip2 compression
	1636	'a' or 'a:' open for appending, creating the file if necessary
	1637	'w' or 'w:' open for writing without compression
	1638	'w:gz' open for writing with gzip compression
	1639	'w:bz2' open for writing with bzip2 compression
	1640
	1641	'r\|*' open a stream of tar blocks with transparent compression
	1642	'r\|' open an uncompressed stream of tar blocks for reading
	1643	'r\|gz' open a gzip compressed stream of tar blocks
	1644	'r\|bz2' open a bzip2 compressed stream of tar blocks
	1645	'w\|' open an uncompressed stream for writing
	1646	'w\|gz' open a gzip compressed stream for writing
	1647	'w\|bz2' open a bzip2 compressed stream for writing
	1648	"""
	1649
	1650	if not name and not fileobj:
	1651	raise ValueError("nothing to open")
	1652
	1653	if mode in ("r", "r:*"):
	1654	# Find out which *open() is appropriate for opening the file.
	1655	for comptype in cls.OPEN_METH:
	1656	func = getattr(cls, cls.OPEN_METH[comptype])
	1657	if fileobj is not None:
	1658	saved_pos = fileobj.tell()
	1659	try:
	1660	return func(name, "r", fileobj, **kwargs)
	1661	except (ReadError, CompressionError), e:
	1662	if fileobj is not None:
	1663	fileobj.seek(saved_pos)
	1664	continue
	1665	raise ReadError("file could not be opened successfully")
	1666
	1667	elif ":" in mode:
	1668	filemode, comptype = mode.split(":", 1)
	1669	filemode = filemode or "r"
	1670	comptype = comptype or "tar"
	1671
	1672	# Select the *open() function according to
	1673	# given compression.
	1674	if comptype in cls.OPEN_METH:
	1675	func = getattr(cls, cls.OPEN_METH[comptype])
	1676	else:
	1677	raise CompressionError("unknown compression type %r" % comptype)
	1678	return func(name, filemode, fileobj, **kwargs)
	1679
	1680	elif "\|" in mode:
	1681	filemode, comptype = mode.split("\|", 1)
	1682	filemode = filemode or "r"
	1683	comptype = comptype or "tar"
	1684
	1685	if filemode not in "rw":
	1686	raise ValueError("mode must be 'r' or 'w'")
	1687
	1688	t = cls(name, filemode,
	1689	_Stream(name, filemode, comptype, fileobj, bufsize),
	1690	**kwargs)
	1691	t._extfileobj = False
	1692	return t
	1693
	1694	elif mode in "aw":
	1695	return cls.taropen(name, mode, fileobj, **kwargs)
	1696
	1697	raise ValueError("undiscernible mode")
	1698
	1699	@classmethod
	1700	def taropen(cls, name, mode="r", fileobj=None, **kwargs):
	1701	"""Open uncompressed tar archive name for reading or writing.
	1702	"""
	1703	if len(mode) > 1 or mode not in "raw":
	1704	raise ValueError("mode must be 'r', 'a' or 'w'")
	1705	return cls(name, mode, fileobj, **kwargs)
	1706
	1707	@classmethod
	1708	def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
	1709	"""Open gzip compressed tar archive name for reading or writing.
	1710	Appending is not allowed.
	1711	"""
	1712	if len(mode) > 1 or mode not in "rw":
	1713	raise ValueError("mode must be 'r' or 'w'")
	1714
	1715	try:
	1716	import gzip
	1717	gzip.GzipFile
	1718	except (ImportError, AttributeError):
	1719	raise CompressionError("gzip module is not available")
	1720
	1721	if fileobj is None:
	1722	fileobj = bltn_open(name, mode + "b")
	1723
	1724	try:
	1725	t = cls.taropen(name, mode,
	1726	gzip.GzipFile(name, mode, compresslevel, fileobj),
	1727	**kwargs)
	1728	except IOError:
	1729	raise ReadError("not a gzip file")
	1730	t._extfileobj = False
	1731	return t
	1732
	1733	@classmethod
	1734	def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
	1735	"""Open bzip2 compressed tar archive name for reading or writing.
	1736	Appending is not allowed.
	1737	"""
	1738	if len(mode) > 1 or mode not in "rw":
	1739	raise ValueError("mode must be 'r' or 'w'.")
	1740
	1741	try:
	1742	import bz2
	1743	except ImportError:
	1744	raise CompressionError("bz2 module is not available")
	1745
	1746	if fileobj is not None:
	1747	fileobj = _BZ2Proxy(fileobj, mode)
	1748	else:
	1749	fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
	1750
	1751	try:
	1752	t = cls.taropen(name, mode, fileobj, **kwargs)
[391]	1753	except (IOError, EOFError):
[2]	1754	raise ReadError("not a bzip2 file")
	1755	t._extfileobj = False
	1756	return t
	1757
	1758	# All *open() methods are registered here.
	1759	OPEN_METH = {
	1760	"tar": "taropen", # uncompressed tar
	1761	"gz": "gzopen", # gzip compressed tar
	1762	"bz2": "bz2open" # bzip2 compressed tar
	1763	}
	1764
	1765	#--------------------------------------------------------------------------
	1766	# The public methods which TarFile provides:
	1767
	1768	def close(self):
	1769	"""Close the TarFile. In write-mode, two finishing zero blocks are
	1770	appended to the archive.
	1771	"""
	1772	if self.closed:
	1773	return
	1774
	1775	if self.mode in "aw":
	1776	self.fileobj.write(NUL * (BLOCKSIZE * 2))
	1777	self.offset += (BLOCKSIZE * 2)
	1778	# fill up the end with zero-blocks
	1779	# (like option -b20 for tar does)
	1780	blocks, remainder = divmod(self.offset, RECORDSIZE)
	1781	if remainder > 0:
	1782	self.fileobj.write(NUL * (RECORDSIZE - remainder))
	1783
	1784	if not self._extfileobj:
	1785	self.fileobj.close()
	1786	self.closed = True
	1787
	1788	def getmember(self, name):
	1789	"""Return a TarInfo object for member `name'. If `name' can not be
	1790	found in the archive, KeyError is raised. If a member occurs more
	1791	than once in the archive, its last occurrence is assumed to be the
	1792	most up-to-date version.
	1793	"""
	1794	tarinfo = self._getmember(name)
	1795	if tarinfo is None:
	1796	raise KeyError("filename %r not found" % name)
	1797	return tarinfo
	1798
	1799	def getmembers(self):
	1800	"""Return the members of the archive as a list of TarInfo objects. The
	1801	list has the same order as the members in the archive.
	1802	"""
	1803	self._check()
	1804	if not self._loaded: # if we want to obtain a list of
	1805	self._load() # all members, we first have to
	1806	# scan the whole archive.
	1807	return self.members
	1808
	1809	def getnames(self):
	1810	"""Return the members of the archive as a list of their names. It has
	1811	the same order as the list returned by getmembers().
	1812	"""
	1813	return [tarinfo.name for tarinfo in self.getmembers()]
	1814
	1815	def gettarinfo(self, name=None, arcname=None, fileobj=None):
	1816	"""Create a TarInfo object for either the file `name' or the file
	1817	object `fileobj' (using os.fstat on its file descriptor). You can
	1818	modify some of the TarInfo's attributes before you add it using
	1819	addfile(). If given, `arcname' specifies an alternative name for the
	1820	file in the archive.
	1821	"""
	1822	self._check("aw")
	1823
	1824	# When fileobj is given, replace name by
	1825	# fileobj's real name.
	1826	if fileobj is not None:
	1827	name = fileobj.name
	1828
	1829	# Building the name of the member in the archive.
	1830	# Backward slashes are converted to forward slashes,
	1831	# Absolute paths are turned to relative paths.
	1832	if arcname is None:
	1833	arcname = name
	1834	drv, arcname = os.path.splitdrive(arcname)
[391]	1835	arcname = arcname.replace(os.sep, "/")
	1836	arcname = arcname.lstrip("/")
[2]	1837
	1838	# Now, fill the TarInfo object with
	1839	# information specific for the file.
	1840	tarinfo = self.tarinfo()
	1841	tarinfo.tarfile = self
	1842
	1843	# Use os.stat or os.lstat, depending on platform
	1844	# and if symlinks shall be resolved.
	1845	if fileobj is None:
	1846	if hasattr(os, "lstat") and not self.dereference:
	1847	statres = os.lstat(name)
	1848	else:
	1849	statres = os.stat(name)
	1850	else:
	1851	statres = os.fstat(fileobj.fileno())
	1852	linkname = ""
	1853
	1854	stmd = statres.st_mode
	1855	if stat.S_ISREG(stmd):
	1856	inode = (statres.st_ino, statres.st_dev)
	1857	if not self.dereference and statres.st_nlink > 1 and \
	1858	inode in self.inodes and arcname != self.inodes[inode]:
	1859	# Is it a hardlink to an already
	1860	# archived file?
	1861	type = LNKTYPE
	1862	linkname = self.inodes[inode]
	1863	else:
	1864	# The inode is added only if its valid.
	1865	# For win32 it is always 0.
	1866	type = REGTYPE
	1867	if inode[0]:
	1868	self.inodes[inode] = arcname
	1869	elif stat.S_ISDIR(stmd):
	1870	type = DIRTYPE
	1871	elif stat.S_ISFIFO(stmd):
	1872	type = FIFOTYPE
	1873	elif stat.S_ISLNK(stmd):
	1874	type = SYMTYPE
	1875	linkname = os.readlink(name)
	1876	elif stat.S_ISCHR(stmd):
	1877	type = CHRTYPE
	1878	elif stat.S_ISBLK(stmd):
	1879	type = BLKTYPE
	1880	else:
	1881	return None
	1882
	1883	# Fill the TarInfo object with all
	1884	# information we can get.
	1885	tarinfo.name = arcname
	1886	tarinfo.mode = stmd
	1887	tarinfo.uid = statres.st_uid
	1888	tarinfo.gid = statres.st_gid
[391]	1889	if type == REGTYPE:
[2]	1890	tarinfo.size = statres.st_size
	1891	else:
	1892	tarinfo.size = 0L
	1893	tarinfo.mtime = statres.st_mtime
	1894	tarinfo.type = type
	1895	tarinfo.linkname = linkname
	1896	if pwd:
	1897	try:
	1898	tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
	1899	except KeyError:
	1900	pass
	1901	if grp:
	1902	try:
	1903	tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
	1904	except KeyError:
	1905	pass
	1906
	1907	if type in (CHRTYPE, BLKTYPE):
	1908	if hasattr(os, "major") and hasattr(os, "minor"):
	1909	tarinfo.devmajor = os.major(statres.st_rdev)
	1910	tarinfo.devminor = os.minor(statres.st_rdev)
	1911	return tarinfo
	1912
	1913	def list(self, verbose=True):
	1914	"""Print a table of contents to sys.stdout. If `verbose' is False, only
	1915	the names of the members are printed. If it is True, an `ls -l'-like
	1916	output is produced.
	1917	"""
	1918	self._check()
	1919
	1920	for tarinfo in self:
	1921	if verbose:
	1922	print filemode(tarinfo.mode),
	1923	print "%s/%s" % (tarinfo.uname or tarinfo.uid,
	1924	tarinfo.gname or tarinfo.gid),
	1925	if tarinfo.ischr() or tarinfo.isblk():
	1926	print "%10s" % ("%d,%d" \
	1927	% (tarinfo.devmajor, tarinfo.devminor)),
	1928	else:
	1929	print "%10d" % tarinfo.size,
	1930	print "%d-%02d-%02d %02d:%02d:%02d" \
	1931	% time.localtime(tarinfo.mtime)[:6],
	1932
	1933	print tarinfo.name + ("/" if tarinfo.isdir() else ""),
	1934
	1935	if verbose:
	1936	if tarinfo.issym():
	1937	print "->", tarinfo.linkname,
	1938	if tarinfo.islnk():
	1939	print "link to", tarinfo.linkname,
	1940	print
	1941
[391]	1942	def add(self, name, arcname=None, recursive=True, exclude=None, filter=None):
[2]	1943	"""Add the file `name' to the archive. `name' may be any type of file
	1944	(directory, fifo, symbolic link, etc.). If given, `arcname'
	1945	specifies an alternative name for the file in the archive.
	1946	Directories are added recursively by default. This can be avoided by
	1947	setting `recursive' to False. `exclude' is a function that should
[391]	1948	return True for each filename to be excluded. `filter' is a function
	1949	that expects a TarInfo object argument and returns the changed
	1950	TarInfo object, if it returns None the TarInfo object will be
	1951	excluded from the archive.
[2]	1952	"""
	1953	self._check("aw")
	1954
	1955	if arcname is None:
	1956	arcname = name
	1957
	1958	# Exclude pathnames.
[391]	1959	if exclude is not None:
	1960	import warnings
	1961	warnings.warn("use the filter argument instead",
	1962	DeprecationWarning, 2)
	1963	if exclude(name):
	1964	self._dbg(2, "tarfile: Excluded %r" % name)
	1965	return
[2]	1966
	1967	# Skip if somebody tries to archive the archive...
	1968	if self.name is not None and os.path.abspath(name) == self.name:
	1969	self._dbg(2, "tarfile: Skipped %r" % name)
	1970	return
	1971
	1972	self._dbg(1, name)
	1973
	1974	# Create a TarInfo object from the file.
	1975	tarinfo = self.gettarinfo(name, arcname)
	1976
	1977	if tarinfo is None:
	1978	self._dbg(1, "tarfile: Unsupported type %r" % name)
	1979	return
	1980
[391]	1981	# Change or exclude the TarInfo object.
	1982	if filter is not None:
	1983	tarinfo = filter(tarinfo)
	1984	if tarinfo is None:
	1985	self._dbg(2, "tarfile: Excluded %r" % name)
	1986	return
	1987
[2]	1988	# Append the tar header and data to the archive.
	1989	if tarinfo.isreg():
[391]	1990	with bltn_open(name, "rb") as f:
	1991	self.addfile(tarinfo, f)
[2]	1992
	1993	elif tarinfo.isdir():
	1994	self.addfile(tarinfo)
	1995	if recursive:
	1996	for f in os.listdir(name):
[391]	1997	self.add(os.path.join(name, f), os.path.join(arcname, f),
	1998	recursive, exclude, filter)
[2]	1999
	2000	else:
	2001	self.addfile(tarinfo)
	2002
	2003	def addfile(self, tarinfo, fileobj=None):
	2004	"""Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
	2005	given, tarinfo.size bytes are read from it and added to the archive.
	2006	You can create TarInfo objects using gettarinfo().
	2007	On Windows platforms, `fileobj' should always be opened with mode
	2008	'rb' to avoid irritation about the file size.
	2009	"""
	2010	self._check("aw")
	2011
	2012	tarinfo = copy.copy(tarinfo)
	2013
	2014	buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
	2015	self.fileobj.write(buf)
	2016	self.offset += len(buf)
	2017
	2018	# If there's data to follow, append it.
	2019	if fileobj is not None:
	2020	copyfileobj(fileobj, self.fileobj, tarinfo.size)
	2021	blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
	2022	if remainder > 0:
	2023	self.fileobj.write(NUL * (BLOCKSIZE - remainder))
	2024	blocks += 1
	2025	self.offset += blocks * BLOCKSIZE
	2026
	2027	self.members.append(tarinfo)
	2028
	2029	def extractall(self, path=".", members=None):
	2030	"""Extract all members from the archive to the current working
	2031	directory and set owner, modification time and permissions on
	2032	directories afterwards. `path' specifies a different directory
	2033	to extract to. `members' is optional and must be a subset of the
	2034	list returned by getmembers().
	2035	"""
	2036	directories = []
	2037
	2038	if members is None:
	2039	members = self
	2040
	2041	for tarinfo in members:
	2042	if tarinfo.isdir():
	2043	# Extract directories with a safe mode.
	2044	directories.append(tarinfo)
	2045	tarinfo = copy.copy(tarinfo)
	2046	tarinfo.mode = 0700
	2047	self.extract(tarinfo, path)
	2048
	2049	# Reverse sort directories.
	2050	directories.sort(key=operator.attrgetter('name'))
	2051	directories.reverse()
	2052
	2053	# Set correct owner, mtime and filemode on directories.
	2054	for tarinfo in directories:
	2055	dirpath = os.path.join(path, tarinfo.name)
	2056	try:
	2057	self.chown(tarinfo, dirpath)
	2058	self.utime(tarinfo, dirpath)
	2059	self.chmod(tarinfo, dirpath)
	2060	except ExtractError, e:
	2061	if self.errorlevel > 1:
	2062	raise
	2063	else:
	2064	self._dbg(1, "tarfile: %s" % e)
	2065
	2066	def extract(self, member, path=""):
	2067	"""Extract a member from the archive to the current working directory,
	2068	using its full name. Its file information is extracted as accurately
	2069	as possible. `member' may be a filename or a TarInfo object. You can
	2070	specify a different directory using `path'.
	2071	"""
	2072	self._check("r")
	2073
	2074	if isinstance(member, basestring):
	2075	tarinfo = self.getmember(member)
	2076	else:
	2077	tarinfo = member
	2078
	2079	# Prepare the link target for makelink().
	2080	if tarinfo.islnk():
	2081	tarinfo._link_target = os.path.join(path, tarinfo.linkname)
	2082
	2083	try:
	2084	self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
	2085	except EnvironmentError, e:
	2086	if self.errorlevel > 0:
	2087	raise
	2088	else:
	2089	if e.filename is None:
	2090	self._dbg(1, "tarfile: %s" % e.strerror)
	2091	else:
	2092	self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
	2093	except ExtractError, e:
	2094	if self.errorlevel > 1:
	2095	raise
	2096	else:
	2097	self._dbg(1, "tarfile: %s" % e)
	2098
	2099	def extractfile(self, member):
	2100	"""Extract a member from the archive as a file object. `member' may be
	2101	a filename or a TarInfo object. If `member' is a regular file, a
	2102	file-like object is returned. If `member' is a link, a file-like
	2103	object is constructed from the link's target. If `member' is none of
	2104	the above, None is returned.
	2105	The file-like object is read-only and provides the following
	2106	methods: read(), readline(), readlines(), seek() and tell()
	2107	"""
	2108	self._check("r")
	2109
	2110	if isinstance(member, basestring):
	2111	tarinfo = self.getmember(member)
	2112	else:
	2113	tarinfo = member
	2114
	2115	if tarinfo.isreg():
	2116	return self.fileobject(self, tarinfo)
	2117
	2118	elif tarinfo.type not in SUPPORTED_TYPES:
	2119	# If a member's type is unknown, it is treated as a
	2120	# regular file.
	2121	return self.fileobject(self, tarinfo)
	2122
	2123	elif tarinfo.islnk() or tarinfo.issym():
	2124	if isinstance(self.fileobj, _Stream):
	2125	# A small but ugly workaround for the case that someone tries
	2126	# to extract a (sym)link as a file-object from a non-seekable
	2127	# stream of tar blocks.
	2128	raise StreamError("cannot extract (sym)link as file object")
	2129	else:
	2130	# A (sym)link's file object is its target's file object.
[391]	2131	return self.extractfile(self._find_link_target(tarinfo))
[2]	2132	else:
	2133	# If there's no data associated with the member (directory, chrdev,
	2134	# blkdev, etc.), return None instead of a file object.
	2135	return None
	2136
	2137	def _extract_member(self, tarinfo, targetpath):
	2138	"""Extract the TarInfo object tarinfo to a physical
	2139	file called targetpath.
	2140	"""
	2141	# Fetch the TarInfo object for the given name
	2142	# and build the destination pathname, replacing
	2143	# forward slashes to platform specific separators.
[391]	2144	targetpath = targetpath.rstrip("/")
	2145	targetpath = targetpath.replace("/", os.sep)
[2]	2146
	2147	# Create all upper directories.
	2148	upperdirs = os.path.dirname(targetpath)
	2149	if upperdirs and not os.path.exists(upperdirs):
	2150	# Create directories that are not part of the archive with
	2151	# default permissions.
	2152	os.makedirs(upperdirs)
	2153
	2154	if tarinfo.islnk() or tarinfo.issym():
	2155	self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
	2156	else:
	2157	self._dbg(1, tarinfo.name)
	2158
	2159	if tarinfo.isreg():
	2160	self.makefile(tarinfo, targetpath)
	2161	elif tarinfo.isdir():
	2162	self.makedir(tarinfo, targetpath)
	2163	elif tarinfo.isfifo():
	2164	self.makefifo(tarinfo, targetpath)
	2165	elif tarinfo.ischr() or tarinfo.isblk():
	2166	self.makedev(tarinfo, targetpath)
	2167	elif tarinfo.islnk() or tarinfo.issym():
	2168	self.makelink(tarinfo, targetpath)
	2169	elif tarinfo.type not in SUPPORTED_TYPES:
	2170	self.makeunknown(tarinfo, targetpath)
	2171	else:
	2172	self.makefile(tarinfo, targetpath)
	2173
	2174	self.chown(tarinfo, targetpath)
	2175	if not tarinfo.issym():
	2176	self.chmod(tarinfo, targetpath)
	2177	self.utime(tarinfo, targetpath)
	2178
	2179	#--------------------------------------------------------------------------
	2180	# Below are the different file methods. They are called via
	2181	# _extract_member() when extract() is called. They can be replaced in a
	2182	# subclass to implement other functionality.
	2183
	2184	def makedir(self, tarinfo, targetpath):
	2185	"""Make a directory called targetpath.
	2186	"""
	2187	try:
	2188	# Use a safe mode for the directory, the real mode is set
	2189	# later in _extract_member().
	2190	os.mkdir(targetpath, 0700)
	2191	except EnvironmentError, e:
	2192	if e.errno != errno.EEXIST:
	2193	raise
	2194
	2195	def makefile(self, tarinfo, targetpath):
	2196	"""Make a file called targetpath.
	2197	"""
	2198	source = self.extractfile(tarinfo)
[391]	2199	try:
	2200	with bltn_open(targetpath, "wb") as target:
	2201	copyfileobj(source, target)
	2202	finally:
	2203	source.close()
[2]	2204
	2205	def makeunknown(self, tarinfo, targetpath):
	2206	"""Make a file from a TarInfo object with an unknown type
	2207	at targetpath.
	2208	"""
	2209	self.makefile(tarinfo, targetpath)
	2210	self._dbg(1, "tarfile: Unknown file type %r, " \
	2211	"extracted as regular file." % tarinfo.type)
	2212
	2213	def makefifo(self, tarinfo, targetpath):
	2214	"""Make a fifo called targetpath.
	2215	"""
	2216	if hasattr(os, "mkfifo"):
	2217	os.mkfifo(targetpath)
	2218	else:
	2219	raise ExtractError("fifo not supported by system")
	2220
	2221	def makedev(self, tarinfo, targetpath):
	2222	"""Make a character or block device called targetpath.
	2223	"""
	2224	if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
	2225	raise ExtractError("special devices not supported by system")
	2226
	2227	mode = tarinfo.mode
	2228	if tarinfo.isblk():
	2229	mode \|= stat.S_IFBLK
	2230	else:
	2231	mode \|= stat.S_IFCHR
	2232
	2233	os.mknod(targetpath, mode,
	2234	os.makedev(tarinfo.devmajor, tarinfo.devminor))
	2235
	2236	def makelink(self, tarinfo, targetpath):
	2237	"""Make a (symbolic) link called targetpath. If it cannot be created
	2238	(platform limitation), we try to make a copy of the referenced file
	2239	instead of a link.
	2240	"""
[391]	2241	if hasattr(os, "symlink") and hasattr(os, "link"):
	2242	# For systems that support symbolic and hard links.
[2]	2243	if tarinfo.issym():
[391]	2244	if os.path.lexists(targetpath):
	2245	os.unlink(targetpath)
	2246	os.symlink(tarinfo.linkname, targetpath)
[2]	2247	else:
	2248	# See extract().
[391]	2249	if os.path.exists(tarinfo._link_target):
	2250	if os.path.lexists(targetpath):
	2251	os.unlink(targetpath)
	2252	os.link(tarinfo._link_target, targetpath)
	2253	else:
	2254	self._extract_member(self._find_link_target(tarinfo), targetpath)
	2255	else:
[2]	2256	try:
[391]	2257	self._extract_member(self._find_link_target(tarinfo), targetpath)
	2258	except KeyError:
	2259	raise ExtractError("unable to resolve link inside archive")
[2]	2260
	2261	def chown(self, tarinfo, targetpath):
	2262	"""Set owner of targetpath according to tarinfo.
	2263	"""
	2264	if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
	2265	# We have to be root to do so.
	2266	try:
	2267	g = grp.getgrnam(tarinfo.gname)[2]
	2268	except KeyError:
[391]	2269	g = tarinfo.gid
[2]	2270	try:
	2271	u = pwd.getpwnam(tarinfo.uname)[2]
	2272	except KeyError:
[391]	2273	u = tarinfo.uid
[2]	2274	try:
	2275	if tarinfo.issym() and hasattr(os, "lchown"):
	2276	os.lchown(targetpath, u, g)
	2277	else:
	2278	if sys.platform != "os2emx":
	2279	os.chown(targetpath, u, g)
	2280	except EnvironmentError, e:
	2281	raise ExtractError("could not change owner")
	2282
	2283	def chmod(self, tarinfo, targetpath):
	2284	"""Set file permissions of targetpath according to tarinfo.
	2285	"""
	2286	if hasattr(os, 'chmod'):
	2287	try:
	2288	os.chmod(targetpath, tarinfo.mode)
	2289	except EnvironmentError, e:
	2290	raise ExtractError("could not change mode")
	2291
	2292	def utime(self, tarinfo, targetpath):
	2293	"""Set modification time of targetpath according to tarinfo.
	2294	"""
	2295	if not hasattr(os, 'utime'):
	2296	return
	2297	try:
	2298	os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
	2299	except EnvironmentError, e:
	2300	raise ExtractError("could not change modification time")
	2301
	2302	#--------------------------------------------------------------------------
	2303	def next(self):
	2304	"""Return the next member of the archive as a TarInfo object, when
	2305	TarFile is opened for reading. Return None if there is no more
	2306	available.
	2307	"""
	2308	self._check("ra")
	2309	if self.firstmember is not None:
	2310	m = self.firstmember
	2311	self.firstmember = None
	2312	return m
	2313
	2314	# Read the next block.
	2315	self.fileobj.seek(self.offset)
[391]	2316	tarinfo = None
[2]	2317	while True:
	2318	try:
	2319	tarinfo = self.tarinfo.fromtarfile(self)
[391]	2320	except EOFHeaderError, e:
[2]	2321	if self.ignore_zeros:
	2322	self._dbg(2, "0x%X: %s" % (self.offset, e))
	2323	self.offset += BLOCKSIZE
	2324	continue
[391]	2325	except InvalidHeaderError, e:
	2326	if self.ignore_zeros:
	2327	self._dbg(2, "0x%X: %s" % (self.offset, e))
	2328	self.offset += BLOCKSIZE
	2329	continue
	2330	elif self.offset == 0:
	2331	raise ReadError(str(e))
	2332	except EmptyHeaderError:
	2333	if self.offset == 0:
	2334	raise ReadError("empty file")
	2335	except TruncatedHeaderError, e:
	2336	if self.offset == 0:
	2337	raise ReadError(str(e))
	2338	except SubsequentHeaderError, e:
	2339	raise ReadError(str(e))
[2]	2340	break
	2341
[391]	2342	if tarinfo is not None:
	2343	self.members.append(tarinfo)
	2344	else:
	2345	self._loaded = True
	2346
[2]	2347	return tarinfo
	2348
	2349	#--------------------------------------------------------------------------
	2350	# Little helper methods:
	2351
[391]	2352	def _getmember(self, name, tarinfo=None, normalize=False):
[2]	2353	"""Find an archive member by name from bottom to top.
	2354	If tarinfo is given, it is used as the starting point.
	2355	"""
	2356	# Ensure that all members have been loaded.
	2357	members = self.getmembers()
	2358
[391]	2359	# Limit the member search list up to tarinfo.
	2360	if tarinfo is not None:
	2361	members = members[:members.index(tarinfo)]
[2]	2362
[391]	2363	if normalize:
	2364	name = os.path.normpath(name)
[2]	2365
[391]	2366	for member in reversed(members):
	2367	if normalize:
	2368	member_name = os.path.normpath(member.name)
	2369	else:
	2370	member_name = member.name
	2371
	2372	if name == member_name:
	2373	return member
	2374
[2]	2375	def _load(self):
	2376	"""Read through the entire archive file and look for readable
	2377	members.
	2378	"""
	2379	while True:
	2380	tarinfo = self.next()
	2381	if tarinfo is None:
	2382	break
	2383	self._loaded = True
	2384
	2385	def _check(self, mode=None):
	2386	"""Check if TarFile is still open, and if the operation's mode
	2387	corresponds to TarFile's mode.
	2388	"""
	2389	if self.closed:
	2390	raise IOError("%s is closed" % self.__class__.__name__)
	2391	if mode is not None and self.mode not in mode:
	2392	raise IOError("bad operation for mode %r" % self.mode)
	2393
[391]	2394	def _find_link_target(self, tarinfo):
	2395	"""Find the target member of a symlink or hardlink member in the
	2396	archive.
	2397	"""
	2398	if tarinfo.issym():
	2399	# Always search the entire archive.
	2400	linkname = "/".join(filter(None, (os.path.dirname(tarinfo.name), tarinfo.linkname)))
	2401	limit = None
	2402	else:
	2403	# Search the archive before the link, because a hard link is
	2404	# just a reference to an already archived file.
	2405	linkname = tarinfo.linkname
	2406	limit = tarinfo
	2407
	2408	member = self._getmember(linkname, tarinfo=limit, normalize=True)
	2409	if member is None:
	2410	raise KeyError("linkname %r not found" % linkname)
	2411	return member
	2412
[2]	2413	def __iter__(self):
	2414	"""Provide an iterator object.
	2415	"""
	2416	if self._loaded:
	2417	return iter(self.members)
	2418	else:
	2419	return TarIter(self)
	2420
	2421	def _dbg(self, level, msg):
	2422	"""Write debugging output to sys.stderr.
	2423	"""
	2424	if level <= self.debug:
	2425	print >> sys.stderr, msg
[391]	2426
	2427	def __enter__(self):
	2428	self._check()
	2429	return self
	2430
	2431	def __exit__(self, type, value, traceback):
	2432	if type is None:
	2433	self.close()
	2434	else:
	2435	# An exception occurred. We must not call close() because
	2436	# it would try to write end-of-archive blocks and padding.
	2437	if not self._extfileobj:
	2438	self.fileobj.close()
	2439	self.closed = True
[2]	2440	# class TarFile
	2441
	2442	class TarIter:
	2443	"""Iterator Class.
	2444
	2445	for tarinfo in TarFile(...):
	2446	suite...
	2447	"""
	2448
	2449	def __init__(self, tarfile):
	2450	"""Construct a TarIter object.
	2451	"""
	2452	self.tarfile = tarfile
	2453	self.index = 0
	2454	def __iter__(self):
	2455	"""Return iterator object.
	2456	"""
	2457	return self
	2458	def next(self):
	2459	"""Return the next item using TarFile's next() method.
	2460	When all members have been read, set TarFile as _loaded.
	2461	"""
	2462	# Fix for SF #1100429: Under rare circumstances it can
	2463	# happen that getmembers() is called during iteration,
	2464	# which will cause TarIter to stop prematurely.
[391]	2465
	2466	if self.index == 0 and self.tarfile.firstmember is not None:
[2]	2467	tarinfo = self.tarfile.next()
[391]	2468	elif self.index < len(self.tarfile.members):
	2469	tarinfo = self.tarfile.members[self.index]
	2470	elif not self.tarfile._loaded:
	2471	tarinfo = self.tarfile.next()
[2]	2472	if not tarinfo:
	2473	self.tarfile._loaded = True
	2474	raise StopIteration
	2475	else:
[391]	2476	raise StopIteration
[2]	2477	self.index += 1
	2478	return tarinfo
	2479
	2480	# Helper classes for sparse file support
	2481	class _section:
	2482	"""Base class for _data and _hole.
	2483	"""
	2484	def __init__(self, offset, size):
	2485	self.offset = offset
	2486	self.size = size
	2487	def __contains__(self, offset):
	2488	return self.offset <= offset < self.offset + self.size
	2489
	2490	class _data(_section):
	2491	"""Represent a data section in a sparse file.
	2492	"""
	2493	def __init__(self, offset, size, realpos):
	2494	_section.__init__(self, offset, size)
	2495	self.realpos = realpos
	2496
	2497	class _hole(_section):
	2498	"""Represent a hole section in a sparse file.
	2499	"""
	2500	pass
	2501
	2502	class _ringbuffer(list):
	2503	"""Ringbuffer class which increases performance
	2504	over a regular list.
	2505	"""
	2506	def __init__(self):
	2507	self.idx = 0
	2508	def find(self, offset):
	2509	idx = self.idx
	2510	while True:
	2511	item = self[idx]
	2512	if offset in item:
	2513	break
	2514	idx += 1
	2515	if idx == len(self):
	2516	idx = 0
	2517	if idx == self.idx:
	2518	# End of File
	2519	return None
	2520	self.idx = idx
	2521	return item
	2522
	2523	#---------------------------------------------
	2524	# zipfile compatible TarFile class
	2525	#---------------------------------------------
	2526	TAR_PLAIN = 0 # zipfile.ZIP_STORED
	2527	TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
	2528	class TarFileCompat:
	2529	"""TarFile class compatible with standard module zipfile's
	2530	ZipFile class.
	2531	"""
	2532	def __init__(self, file, mode="r", compression=TAR_PLAIN):
	2533	from warnings import warnpy3k
	2534	warnpy3k("the TarFileCompat class has been removed in Python 3.0",
	2535	stacklevel=2)
	2536	if compression == TAR_PLAIN:
	2537	self.tarfile = TarFile.taropen(file, mode)
	2538	elif compression == TAR_GZIPPED:
	2539	self.tarfile = TarFile.gzopen(file, mode)
	2540	else:
	2541	raise ValueError("unknown compression constant")
	2542	if mode[0:1] == "r":
	2543	members = self.tarfile.getmembers()
	2544	for m in members:
	2545	m.filename = m.name
	2546	m.file_size = m.size
	2547	m.date_time = time.gmtime(m.mtime)[:6]
	2548	def namelist(self):
	2549	return map(lambda m: m.name, self.infolist())
	2550	def infolist(self):
	2551	return filter(lambda m: m.type in REGULAR_TYPES,
	2552	self.tarfile.getmembers())
	2553	def printdir(self):
	2554	self.tarfile.list()
	2555	def testzip(self):
	2556	return
	2557	def getinfo(self, name):
	2558	return self.tarfile.getmember(name)
	2559	def read(self, name):
	2560	return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
	2561	def write(self, filename, arcname=None, compress_type=None):
	2562	self.tarfile.add(filename, arcname)
	2563	def writestr(self, zinfo, bytes):
	2564	try:
	2565	from cStringIO import StringIO
	2566	except ImportError:
	2567	from StringIO import StringIO
	2568	import calendar
	2569	tinfo = TarInfo(zinfo.filename)
	2570	tinfo.size = len(bytes)
	2571	tinfo.mtime = calendar.timegm(zinfo.date_time)
	2572	self.tarfile.addfile(tinfo, StringIO(bytes))
	2573	def close(self):
	2574	self.tarfile.close()
	2575	#class TarFileCompat
	2576
	2577	#--------------------
	2578	# exported functions
	2579	#--------------------
	2580	def is_tarfile(name):
	2581	"""Return True if name points to a tar archive that we
	2582	are able to handle, else return False.
	2583	"""
	2584	try:
	2585	t = open(name)
	2586	t.close()
	2587	return True
	2588	except TarError:
	2589	return False
	2590
	2591	bltn_open = open
	2592	open = TarFile.open

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/tarfile.py

Download in other formats: