Context Navigation

tarfile.py

Last change on this file was 2, checked in by Yuri Dario, 15 years ago
Initial import for vendor code.
Property svn:eol-style set to `native`
File size: 84.8 KB

Line
1	#!/usr/bin/env python
2	# -- coding: iso-8859-1 --
3	#-------------------------------------------------------------------
4	# tarfile.py
5	#-------------------------------------------------------------------
6	# Copyright (C) 2002 Lars Gustäbel <lars@gustaebel.de>
7	# All rights reserved.
8	#
9	# Permission is hereby granted, free of charge, to any person
10	# obtaining a copy of this software and associated documentation
11	# files (the "Software"), to deal in the Software without
12	# restriction, including without limitation the rights to use,
13	# copy, modify, merge, publish, distribute, sublicense, and/or sell
14	# copies of the Software, and to permit persons to whom the
15	# Software is furnished to do so, subject to the following
16	# conditions:
17	#
18	# The above copyright notice and this permission notice shall be
19	# included in all copies or substantial portions of the Software.
20	#
21	# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
22	# EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
23	# OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
24	# NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
25	# HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
26	# WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
27	# FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
28	# OTHER DEALINGS IN THE SOFTWARE.
29	#
30	"""Read from and write to tar format archives.
31	"""
32
33	__version__ = "$Revision: 76386 $"
34	# $Source$
35
36	version = "0.9.0"
37	__author__ = "Lars Gustäbel (lars@gustaebel.de)"
38	__date__ = "$Date: 2009-11-18 16:09:35 -0500 (Wed, 18 Nov 2009) $"
39	__cvsid__ = "$Id: tarfile.py 76386 2009-11-18 21:09:35Z lars.gustaebel $"
40	__credits__ = "Gustavo Niemeyer, Niels Gustäbel, Richard Townsend."
41
42	#---------
43	# Imports
44	#---------
45	import sys
46	import os
47	import shutil
48	import stat
49	import errno
50	import time
51	import struct
52	import copy
53	import re
54	import operator
55
56	if sys.platform == 'mac':
57	# This module needs work for MacOS9, especially in the area of pathname
58	# handling. In many places it is assumed a simple substitution of / by the
59	# local os.path.sep is good enough to convert pathnames, but this does not
60	# work with the mac rooted:path:name versus :nonrooted:path:name syntax
61	raise ImportError, "tarfile does not work for platform==mac"
62
63	try:
64	import grp, pwd
65	except ImportError:
66	grp = pwd = None
67
68	# from tarfile import *
69	__all__ = ["TarFile", "TarInfo", "is_tarfile", "TarError"]
70
71	#---------------------------------------------------------
72	# tar constants
73	#---------------------------------------------------------
74	NUL = "\0" # the null character
75	BLOCKSIZE = 512 # length of processing blocks
76	RECORDSIZE = BLOCKSIZE * 20 # length of records
77	GNU_MAGIC = "ustar \0" # magic gnu tar string
78	POSIX_MAGIC = "ustar\x0000" # magic posix tar string
79
80	LENGTH_NAME = 100 # maximum length of a filename
81	LENGTH_LINK = 100 # maximum length of a linkname
82	LENGTH_PREFIX = 155 # maximum length of the prefix field
83
84	REGTYPE = "0" # regular file
85	AREGTYPE = "\0" # regular file
86	LNKTYPE = "1" # link (inside tarfile)
87	SYMTYPE = "2" # symbolic link
88	CHRTYPE = "3" # character special device
89	BLKTYPE = "4" # block special device
90	DIRTYPE = "5" # directory
91	FIFOTYPE = "6" # fifo special device
92	CONTTYPE = "7" # contiguous file
93
94	GNUTYPE_LONGNAME = "L" # GNU tar longname
95	GNUTYPE_LONGLINK = "K" # GNU tar longlink
96	GNUTYPE_SPARSE = "S" # GNU tar sparse file
97
98	XHDTYPE = "x" # POSIX.1-2001 extended header
99	XGLTYPE = "g" # POSIX.1-2001 global header
100	SOLARIS_XHDTYPE = "X" # Solaris extended header
101
102	USTAR_FORMAT = 0 # POSIX.1-1988 (ustar) format
103	GNU_FORMAT = 1 # GNU tar format
104	PAX_FORMAT = 2 # POSIX.1-2001 (pax) format
105	DEFAULT_FORMAT = GNU_FORMAT
106
107	#---------------------------------------------------------
108	# tarfile constants
109	#---------------------------------------------------------
110	# File types that tarfile supports:
111	SUPPORTED_TYPES = (REGTYPE, AREGTYPE, LNKTYPE,
112	SYMTYPE, DIRTYPE, FIFOTYPE,
113	CONTTYPE, CHRTYPE, BLKTYPE,
114	GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
115	GNUTYPE_SPARSE)
116
117	# File types that will be treated as a regular file.
118	REGULAR_TYPES = (REGTYPE, AREGTYPE,
119	CONTTYPE, GNUTYPE_SPARSE)
120
121	# File types that are part of the GNU tar format.
122	GNU_TYPES = (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK,
123	GNUTYPE_SPARSE)
124
125	# Fields from a pax header that override a TarInfo attribute.
126	PAX_FIELDS = ("path", "linkpath", "size", "mtime",
127	"uid", "gid", "uname", "gname")
128
129	# Fields in a pax header that are numbers, all other fields
130	# are treated as strings.
131	PAX_NUMBER_FIELDS = {
132	"atime": float,
133	"ctime": float,
134	"mtime": float,
135	"uid": int,
136	"gid": int,
137	"size": int
138	}
139
140	#---------------------------------------------------------
141	# Bits used in the mode field, values in octal.
142	#---------------------------------------------------------
143	S_IFLNK = 0120000 # symbolic link
144	S_IFREG = 0100000 # regular file
145	S_IFBLK = 0060000 # block device
146	S_IFDIR = 0040000 # directory
147	S_IFCHR = 0020000 # character device
148	S_IFIFO = 0010000 # fifo
149
150	TSUID = 04000 # set UID on execution
151	TSGID = 02000 # set GID on execution
152	TSVTX = 01000 # reserved
153
154	TUREAD = 0400 # read by owner
155	TUWRITE = 0200 # write by owner
156	TUEXEC = 0100 # execute/search by owner
157	TGREAD = 0040 # read by group
158	TGWRITE = 0020 # write by group
159	TGEXEC = 0010 # execute/search by group
160	TOREAD = 0004 # read by other
161	TOWRITE = 0002 # write by other
162	TOEXEC = 0001 # execute/search by other
163
164	#---------------------------------------------------------
165	# initialization
166	#---------------------------------------------------------
167	ENCODING = sys.getfilesystemencoding()
168	if ENCODING is None:
169	ENCODING = sys.getdefaultencoding()
170
171	#---------------------------------------------------------
172	# Some useful functions
173	#---------------------------------------------------------
174
175	def stn(s, length):
176	"""Convert a python string to a null-terminated string buffer.
177	"""
178	return s[:length] + (length - len(s)) * NUL
179
180	def nts(s):
181	"""Convert a null-terminated string field to a python string.
182	"""
183	# Use the string up to the first null char.
184	p = s.find("\0")
185	if p == -1:
186	return s
187	return s[:p]
188
189	def nti(s):
190	"""Convert a number field to a python number.
191	"""
192	# There are two possible encodings for a number field, see
193	# itn() below.
194	if s[0] != chr(0200):
195	try:
196	n = int(nts(s) or "0", 8)
197	except ValueError:
198	raise HeaderError("invalid header")
199	else:
200	n = 0L
201	for i in xrange(len(s) - 1):
202	n <<= 8
203	n += ord(s[i + 1])
204	return n
205
206	def itn(n, digits=8, format=DEFAULT_FORMAT):
207	"""Convert a python number to a number field.
208	"""
209	# POSIX 1003.1-1988 requires numbers to be encoded as a string of
210	# octal digits followed by a null-byte, this allows values up to
211	# (8**(digits-1))-1. GNU tar allows storing numbers greater than
212	# that if necessary. A leading 0200 byte indicates this particular
213	# encoding, the following digits-1 bytes are a big-endian
214	# representation. This allows values up to (256**(digits-1))-1.
215	if 0 <= n < 8 ** (digits - 1):
216	s = "%0*o" % (digits - 1, n) + NUL
217	else:
218	if format != GNU_FORMAT or n >= 256 ** (digits - 1):
219	raise ValueError("overflow in number field")
220
221	if n < 0:
222	# XXX We mimic GNU tar's behaviour with negative numbers,
223	# this could raise OverflowError.
224	n = struct.unpack("L", struct.pack("l", n))[0]
225
226	s = ""
227	for i in xrange(digits - 1):
228	s = chr(n & 0377) + s
229	n >>= 8
230	s = chr(0200) + s
231	return s
232
233	def uts(s, encoding, errors):
234	"""Convert a unicode object to a string.
235	"""
236	if errors == "utf-8":
237	# An extra error handler similar to the -o invalid=UTF-8 option
238	# in POSIX.1-2001. Replace untranslatable characters with their
239	# UTF-8 representation.
240	try:
241	return s.encode(encoding, "strict")
242	except UnicodeEncodeError:
243	x = []
244	for c in s:
245	try:
246	x.append(c.encode(encoding, "strict"))
247	except UnicodeEncodeError:
248	x.append(c.encode("utf8"))
249	return "".join(x)
250	else:
251	return s.encode(encoding, errors)
252
253	def calc_chksums(buf):
254	"""Calculate the checksum for a member's header by summing up all
255	characters except for the chksum field which is treated as if
256	it was filled with spaces. According to the GNU tar sources,
257	some tars (Sun and NeXT) calculate chksum with signed char,
258	which will be different if there are chars in the buffer with
259	the high bit set. So we calculate two checksums, unsigned and
260	signed.
261	"""
262	unsigned_chksum = 256 + sum(struct.unpack("148B", buf[:148]) + struct.unpack("356B", buf[156:512]))
263	signed_chksum = 256 + sum(struct.unpack("148b", buf[:148]) + struct.unpack("356b", buf[156:512]))
264	return unsigned_chksum, signed_chksum
265
266	def copyfileobj(src, dst, length=None):
267	"""Copy length bytes from fileobj src to fileobj dst.
268	If length is None, copy the entire content.
269	"""
270	if length == 0:
271	return
272	if length is None:
273	shutil.copyfileobj(src, dst)
274	return
275
276	BUFSIZE = 16 * 1024
277	blocks, remainder = divmod(length, BUFSIZE)
278	for b in xrange(blocks):
279	buf = src.read(BUFSIZE)
280	if len(buf) < BUFSIZE:
281	raise IOError("end of file reached")
282	dst.write(buf)
283
284	if remainder != 0:
285	buf = src.read(remainder)
286	if len(buf) < remainder:
287	raise IOError("end of file reached")
288	dst.write(buf)
289	return
290
291	filemode_table = (
292	((S_IFLNK, "l"),
293	(S_IFREG, "-"),
294	(S_IFBLK, "b"),
295	(S_IFDIR, "d"),
296	(S_IFCHR, "c"),
297	(S_IFIFO, "p")),
298
299	((TUREAD, "r"),),
300	((TUWRITE, "w"),),
301	((TUEXEC\|TSUID, "s"),
302	(TSUID, "S"),
303	(TUEXEC, "x")),
304
305	((TGREAD, "r"),),
306	((TGWRITE, "w"),),
307	((TGEXEC\|TSGID, "s"),
308	(TSGID, "S"),
309	(TGEXEC, "x")),
310
311	((TOREAD, "r"),),
312	((TOWRITE, "w"),),
313	((TOEXEC\|TSVTX, "t"),
314	(TSVTX, "T"),
315	(TOEXEC, "x"))
316	)
317
318	def filemode(mode):
319	"""Convert a file's mode to a string of the form
320	-rwxrwxrwx.
321	Used by TarFile.list()
322	"""
323	perm = []
324	for table in filemode_table:
325	for bit, char in table:
326	if mode & bit == bit:
327	perm.append(char)
328	break
329	else:
330	perm.append("-")
331	return "".join(perm)
332
333	if os.sep != "/":
334	normpath = lambda path: os.path.normpath(path).replace(os.sep, "/")
335	else:
336	normpath = os.path.normpath
337
338	class TarError(Exception):
339	"""Base exception."""
340	pass
341	class ExtractError(TarError):
342	"""General exception for extract errors."""
343	pass
344	class ReadError(TarError):
345	"""Exception for unreadble tar archives."""
346	pass
347	class CompressionError(TarError):
348	"""Exception for unavailable compression methods."""
349	pass
350	class StreamError(TarError):
351	"""Exception for unsupported operations on stream-like TarFiles."""
352	pass
353	class HeaderError(TarError):
354	"""Exception for invalid headers."""
355	pass
356
357	#---------------------------
358	# internal stream interface
359	#---------------------------
360	class _LowLevelFile:
361	"""Low-level file object. Supports reading and writing.
362	It is used instead of a regular file object for streaming
363	access.
364	"""
365
366	def __init__(self, name, mode):
367	mode = {
368	"r": os.O_RDONLY,
369	"w": os.O_WRONLY \| os.O_CREAT \| os.O_TRUNC,
370	}[mode]
371	if hasattr(os, "O_BINARY"):
372	mode \|= os.O_BINARY
373	self.fd = os.open(name, mode)
374
375	def close(self):
376	os.close(self.fd)
377
378	def read(self, size):
379	return os.read(self.fd, size)
380
381	def write(self, s):
382	os.write(self.fd, s)
383
384	class _Stream:
385	"""Class that serves as an adapter between TarFile and
386	a stream-like object. The stream-like object only
387	needs to have a read() or write() method and is accessed
388	blockwise. Use of gzip or bzip2 compression is possible.
389	A stream-like object could be for example: sys.stdin,
390	sys.stdout, a socket, a tape device etc.
391
392	_Stream is intended to be used only internally.
393	"""
394
395	def __init__(self, name, mode, comptype, fileobj, bufsize):
396	"""Construct a _Stream object.
397	"""
398	self._extfileobj = True
399	if fileobj is None:
400	fileobj = _LowLevelFile(name, mode)
401	self._extfileobj = False
402
403	if comptype == '*':
404	# Enable transparent compression detection for the
405	# stream interface
406	fileobj = _StreamProxy(fileobj)
407	comptype = fileobj.getcomptype()
408
409	self.name = name or ""
410	self.mode = mode
411	self.comptype = comptype
412	self.fileobj = fileobj
413	self.bufsize = bufsize
414	self.buf = ""
415	self.pos = 0L
416	self.closed = False
417
418	if comptype == "gz":
419	try:
420	import zlib
421	except ImportError:
422	raise CompressionError("zlib module is not available")
423	self.zlib = zlib
424	self.crc = zlib.crc32("") & 0xffffffffL
425	if mode == "r":
426	self._init_read_gz()
427	else:
428	self._init_write_gz()
429
430	if comptype == "bz2":
431	try:
432	import bz2
433	except ImportError:
434	raise CompressionError("bz2 module is not available")
435	if mode == "r":
436	self.dbuf = ""
437	self.cmp = bz2.BZ2Decompressor()
438	else:
439	self.cmp = bz2.BZ2Compressor()
440
441	def __del__(self):
442	if hasattr(self, "closed") and not self.closed:
443	self.close()
444
445	def _init_write_gz(self):
446	"""Initialize for writing with gzip compression.
447	"""
448	self.cmp = self.zlib.compressobj(9, self.zlib.DEFLATED,
449	-self.zlib.MAX_WBITS,
450	self.zlib.DEF_MEM_LEVEL,
451	0)
452	timestamp = struct.pack("<L", long(time.time()))
453	self.__write("\037\213\010\010%s\002\377" % timestamp)
454	if self.name.endswith(".gz"):
455	self.name = self.name[:-3]
456	self.__write(self.name + NUL)
457
458	def write(self, s):
459	"""Write string s to the stream.
460	"""
461	if self.comptype == "gz":
462	self.crc = self.zlib.crc32(s, self.crc) & 0xffffffffL
463	self.pos += len(s)
464	if self.comptype != "tar":
465	s = self.cmp.compress(s)
466	self.__write(s)
467
468	def __write(self, s):
469	"""Write string s to the stream if a whole new block
470	is ready to be written.
471	"""
472	self.buf += s
473	while len(self.buf) > self.bufsize:
474	self.fileobj.write(self.buf[:self.bufsize])
475	self.buf = self.buf[self.bufsize:]
476
477	def close(self):
478	"""Close the _Stream object. No operation should be
479	done on it afterwards.
480	"""
481	if self.closed:
482	return
483
484	if self.mode == "w" and self.comptype != "tar":
485	self.buf += self.cmp.flush()
486
487	if self.mode == "w" and self.buf:
488	self.fileobj.write(self.buf)
489	self.buf = ""
490	if self.comptype == "gz":
491	# The native zlib crc is an unsigned 32-bit integer, but
492	# the Python wrapper implicitly casts that to a signed C
493	# long. So, on a 32-bit box self.crc may "look negative",
494	# while the same crc on a 64-bit box may "look positive".
495	# To avoid irksome warnings from the `struct` module, force
496	# it to look positive on all boxes.
497	self.fileobj.write(struct.pack("<L", self.crc & 0xffffffffL))
498	self.fileobj.write(struct.pack("<L", self.pos & 0xffffFFFFL))
499
500	if not self._extfileobj:
501	self.fileobj.close()
502
503	self.closed = True
504
505	def _init_read_gz(self):
506	"""Initialize for reading a gzip compressed fileobj.
507	"""
508	self.cmp = self.zlib.decompressobj(-self.zlib.MAX_WBITS)
509	self.dbuf = ""
510
511	# taken from gzip.GzipFile with some alterations
512	if self.__read(2) != "\037\213":
513	raise ReadError("not a gzip file")
514	if self.__read(1) != "\010":
515	raise CompressionError("unsupported compression method")
516
517	flag = ord(self.__read(1))
518	self.__read(6)
519
520	if flag & 4:
521	xlen = ord(self.__read(1)) + 256 * ord(self.__read(1))
522	self.read(xlen)
523	if flag & 8:
524	while True:
525	s = self.__read(1)
526	if not s or s == NUL:
527	break
528	if flag & 16:
529	while True:
530	s = self.__read(1)
531	if not s or s == NUL:
532	break
533	if flag & 2:
534	self.__read(2)
535
536	def tell(self):
537	"""Return the stream's file pointer position.
538	"""
539	return self.pos
540
541	def seek(self, pos=0):
542	"""Set the stream's file pointer to pos. Negative seeking
543	is forbidden.
544	"""
545	if pos - self.pos >= 0:
546	blocks, remainder = divmod(pos - self.pos, self.bufsize)
547	for i in xrange(blocks):
548	self.read(self.bufsize)
549	self.read(remainder)
550	else:
551	raise StreamError("seeking backwards is not allowed")
552	return self.pos
553
554	def read(self, size=None):
555	"""Return the next size number of bytes from the stream.
556	If size is not defined, return all bytes of the stream
557	up to EOF.
558	"""
559	if size is None:
560	t = []
561	while True:
562	buf = self._read(self.bufsize)
563	if not buf:
564	break
565	t.append(buf)
566	buf = "".join(t)
567	else:
568	buf = self._read(size)
569	self.pos += len(buf)
570	return buf
571
572	def _read(self, size):
573	"""Return size bytes from the stream.
574	"""
575	if self.comptype == "tar":
576	return self.__read(size)
577
578	c = len(self.dbuf)
579	t = [self.dbuf]
580	while c < size:
581	buf = self.__read(self.bufsize)
582	if not buf:
583	break
584	try:
585	buf = self.cmp.decompress(buf)
586	except IOError:
587	raise ReadError("invalid compressed data")
588	t.append(buf)
589	c += len(buf)
590	t = "".join(t)
591	self.dbuf = t[size:]
592	return t[:size]
593
594	def __read(self, size):
595	"""Return size bytes from stream. If internal buffer is empty,
596	read another block from the stream.
597	"""
598	c = len(self.buf)
599	t = [self.buf]
600	while c < size:
601	buf = self.fileobj.read(self.bufsize)
602	if not buf:
603	break
604	t.append(buf)
605	c += len(buf)
606	t = "".join(t)
607	self.buf = t[size:]
608	return t[:size]
609	# class _Stream
610
611	class _StreamProxy(object):
612	"""Small proxy class that enables transparent compression
613	detection for the Stream interface (mode 'r\|*').
614	"""
615
616	def __init__(self, fileobj):
617	self.fileobj = fileobj
618	self.buf = self.fileobj.read(BLOCKSIZE)
619
620	def read(self, size):
621	self.read = self.fileobj.read
622	return self.buf
623
624	def getcomptype(self):
625	if self.buf.startswith("\037\213\010"):
626	return "gz"
627	if self.buf.startswith("BZh91"):
628	return "bz2"
629	return "tar"
630
631	def close(self):
632	self.fileobj.close()
633	# class StreamProxy
634
635	class _BZ2Proxy(object):
636	"""Small proxy class that enables external file object
637	support for "r:bz2" and "w:bz2" modes. This is actually
638	a workaround for a limitation in bz2 module's BZ2File
639	class which (unlike gzip.GzipFile) has no support for
640	a file object argument.
641	"""
642
643	blocksize = 16 * 1024
644
645	def __init__(self, fileobj, mode):
646	self.fileobj = fileobj
647	self.mode = mode
648	self.name = getattr(self.fileobj, "name", None)
649	self.init()
650
651	def init(self):
652	import bz2
653	self.pos = 0
654	if self.mode == "r":
655	self.bz2obj = bz2.BZ2Decompressor()
656	self.fileobj.seek(0)
657	self.buf = ""
658	else:
659	self.bz2obj = bz2.BZ2Compressor()
660
661	def read(self, size):
662	b = [self.buf]
663	x = len(self.buf)
664	while x < size:
665	raw = self.fileobj.read(self.blocksize)
666	if not raw:
667	break
668	try:
669	data = self.bz2obj.decompress(raw)
670	except EOFError:
671	break
672	b.append(data)
673	x += len(data)
674	self.buf = "".join(b)
675
676	buf = self.buf[:size]
677	self.buf = self.buf[size:]
678	self.pos += len(buf)
679	return buf
680
681	def seek(self, pos):
682	if pos < self.pos:
683	self.init()
684	self.read(pos - self.pos)
685
686	def tell(self):
687	return self.pos
688
689	def write(self, data):
690	self.pos += len(data)
691	raw = self.bz2obj.compress(data)
692	self.fileobj.write(raw)
693
694	def close(self):
695	if self.mode == "w":
696	raw = self.bz2obj.flush()
697	self.fileobj.write(raw)
698	# class _BZ2Proxy
699
700	#------------------------
701	# Extraction file object
702	#------------------------
703	class _FileInFile(object):
704	"""A thin wrapper around an existing file object that
705	provides a part of its data as an individual file
706	object.
707	"""
708
709	def __init__(self, fileobj, offset, size, sparse=None):
710	self.fileobj = fileobj
711	self.offset = offset
712	self.size = size
713	self.sparse = sparse
714	self.position = 0
715
716	def tell(self):
717	"""Return the current file position.
718	"""
719	return self.position
720
721	def seek(self, position):
722	"""Seek to a position in the file.
723	"""
724	self.position = position
725
726	def read(self, size=None):
727	"""Read data from the file.
728	"""
729	if size is None:
730	size = self.size - self.position
731	else:
732	size = min(size, self.size - self.position)
733
734	if self.sparse is None:
735	return self.readnormal(size)
736	else:
737	return self.readsparse(size)
738
739	def readnormal(self, size):
740	"""Read operation for regular files.
741	"""
742	self.fileobj.seek(self.offset + self.position)
743	self.position += size
744	return self.fileobj.read(size)
745
746	def readsparse(self, size):
747	"""Read operation for sparse files.
748	"""
749	data = []
750	while size > 0:
751	buf = self.readsparsesection(size)
752	if not buf:
753	break
754	size -= len(buf)
755	data.append(buf)
756	return "".join(data)
757
758	def readsparsesection(self, size):
759	"""Read a single section of a sparse file.
760	"""
761	section = self.sparse.find(self.position)
762
763	if section is None:
764	return ""
765
766	size = min(size, section.offset + section.size - self.position)
767
768	if isinstance(section, _data):
769	realpos = section.realpos + self.position - section.offset
770	self.fileobj.seek(self.offset + realpos)
771	self.position += size
772	return self.fileobj.read(size)
773	else:
774	self.position += size
775	return NUL * size
776	#class _FileInFile
777
778
779	class ExFileObject(object):
780	"""File-like object for reading an archive member.
781	Is returned by TarFile.extractfile().
782	"""
783	blocksize = 1024
784
785	def __init__(self, tarfile, tarinfo):
786	self.fileobj = _FileInFile(tarfile.fileobj,
787	tarinfo.offset_data,
788	tarinfo.size,
789	getattr(tarinfo, "sparse", None))
790	self.name = tarinfo.name
791	self.mode = "r"
792	self.closed = False
793	self.size = tarinfo.size
794
795	self.position = 0
796	self.buffer = ""
797
798	def read(self, size=None):
799	"""Read at most size bytes from the file. If size is not
800	present or None, read all data until EOF is reached.
801	"""
802	if self.closed:
803	raise ValueError("I/O operation on closed file")
804
805	buf = ""
806	if self.buffer:
807	if size is None:
808	buf = self.buffer
809	self.buffer = ""
810	else:
811	buf = self.buffer[:size]
812	self.buffer = self.buffer[size:]
813
814	if size is None:
815	buf += self.fileobj.read()
816	else:
817	buf += self.fileobj.read(size - len(buf))
818
819	self.position += len(buf)
820	return buf
821
822	def readline(self, size=-1):
823	"""Read one entire line from the file. If size is present
824	and non-negative, return a string with at most that
825	size, which may be an incomplete line.
826	"""
827	if self.closed:
828	raise ValueError("I/O operation on closed file")
829
830	if "\n" in self.buffer:
831	pos = self.buffer.find("\n") + 1
832	else:
833	buffers = [self.buffer]
834	while True:
835	buf = self.fileobj.read(self.blocksize)
836	buffers.append(buf)
837	if not buf or "\n" in buf:
838	self.buffer = "".join(buffers)
839	pos = self.buffer.find("\n") + 1
840	if pos == 0:
841	# no newline found.
842	pos = len(self.buffer)
843	break
844
845	if size != -1:
846	pos = min(size, pos)
847
848	buf = self.buffer[:pos]
849	self.buffer = self.buffer[pos:]
850	self.position += len(buf)
851	return buf
852
853	def readlines(self):
854	"""Return a list with all remaining lines.
855	"""
856	result = []
857	while True:
858	line = self.readline()
859	if not line: break
860	result.append(line)
861	return result
862
863	def tell(self):
864	"""Return the current file position.
865	"""
866	if self.closed:
867	raise ValueError("I/O operation on closed file")
868
869	return self.position
870
871	def seek(self, pos, whence=os.SEEK_SET):
872	"""Seek to a position in the file.
873	"""
874	if self.closed:
875	raise ValueError("I/O operation on closed file")
876
877	if whence == os.SEEK_SET:
878	self.position = min(max(pos, 0), self.size)
879	elif whence == os.SEEK_CUR:
880	if pos < 0:
881	self.position = max(self.position + pos, 0)
882	else:
883	self.position = min(self.position + pos, self.size)
884	elif whence == os.SEEK_END:
885	self.position = max(min(self.size + pos, self.size), 0)
886	else:
887	raise ValueError("Invalid argument")
888
889	self.buffer = ""
890	self.fileobj.seek(self.position)
891
892	def close(self):
893	"""Close the file object.
894	"""
895	self.closed = True
896
897	def __iter__(self):
898	"""Get an iterator over the file's lines.
899	"""
900	while True:
901	line = self.readline()
902	if not line:
903	break
904	yield line
905	#class ExFileObject
906
907	#------------------
908	# Exported Classes
909	#------------------
910	class TarInfo(object):
911	"""Informational class which holds the details about an
912	archive member given by a tar header block.
913	TarInfo objects are returned by TarFile.getmember(),
914	TarFile.getmembers() and TarFile.gettarinfo() and are
915	usually created internally.
916	"""
917
918	def __init__(self, name=""):
919	"""Construct a TarInfo object. name is the optional name
920	of the member.
921	"""
922	self.name = name # member name
923	self.mode = 0644 # file permissions
924	self.uid = 0 # user id
925	self.gid = 0 # group id
926	self.size = 0 # file size
927	self.mtime = 0 # modification time
928	self.chksum = 0 # header checksum
929	self.type = REGTYPE # member type
930	self.linkname = "" # link name
931	self.uname = "root" # user name
932	self.gname = "root" # group name
933	self.devmajor = 0 # device major number
934	self.devminor = 0 # device minor number
935
936	self.offset = 0 # the tar header starts here
937	self.offset_data = 0 # the file's data starts here
938
939	self.pax_headers = {} # pax header information
940
941	# In pax headers the "name" and "linkname" field are called
942	# "path" and "linkpath".
943	def _getpath(self):
944	return self.name
945	def _setpath(self, name):
946	self.name = name
947	path = property(_getpath, _setpath)
948
949	def _getlinkpath(self):
950	return self.linkname
951	def _setlinkpath(self, linkname):
952	self.linkname = linkname
953	linkpath = property(_getlinkpath, _setlinkpath)
954
955	def __repr__(self):
956	return "<%s %r at %#x>" % (self.__class__.__name__,self.name,id(self))
957
958	def get_info(self, encoding, errors):
959	"""Return the TarInfo's attributes as a dictionary.
960	"""
961	info = {
962	"name": normpath(self.name),
963	"mode": self.mode & 07777,
964	"uid": self.uid,
965	"gid": self.gid,
966	"size": self.size,
967	"mtime": self.mtime,
968	"chksum": self.chksum,
969	"type": self.type,
970	"linkname": normpath(self.linkname) if self.linkname else "",
971	"uname": self.uname,
972	"gname": self.gname,
973	"devmajor": self.devmajor,
974	"devminor": self.devminor
975	}
976
977	if info["type"] == DIRTYPE and not info["name"].endswith("/"):
978	info["name"] += "/"
979
980	for key in ("name", "linkname", "uname", "gname"):
981	if type(info[key]) is unicode:
982	info[key] = info[key].encode(encoding, errors)
983
984	return info
985
986	def tobuf(self, format=DEFAULT_FORMAT, encoding=ENCODING, errors="strict"):
987	"""Return a tar header as a string of 512 byte blocks.
988	"""
989	info = self.get_info(encoding, errors)
990
991	if format == USTAR_FORMAT:
992	return self.create_ustar_header(info)
993	elif format == GNU_FORMAT:
994	return self.create_gnu_header(info)
995	elif format == PAX_FORMAT:
996	return self.create_pax_header(info, encoding, errors)
997	else:
998	raise ValueError("invalid format")
999
1000	def create_ustar_header(self, info):
1001	"""Return the object as a ustar header block.
1002	"""
1003	info["magic"] = POSIX_MAGIC
1004
1005	if len(info["linkname"]) > LENGTH_LINK:
1006	raise ValueError("linkname is too long")
1007
1008	if len(info["name"]) > LENGTH_NAME:
1009	info["prefix"], info["name"] = self._posix_split_name(info["name"])
1010
1011	return self._create_header(info, USTAR_FORMAT)
1012
1013	def create_gnu_header(self, info):
1014	"""Return the object as a GNU header block sequence.
1015	"""
1016	info["magic"] = GNU_MAGIC
1017
1018	buf = ""
1019	if len(info["linkname"]) > LENGTH_LINK:
1020	buf += self._create_gnu_long_header(info["linkname"], GNUTYPE_LONGLINK)
1021
1022	if len(info["name"]) > LENGTH_NAME:
1023	buf += self._create_gnu_long_header(info["name"], GNUTYPE_LONGNAME)
1024
1025	return buf + self._create_header(info, GNU_FORMAT)
1026
1027	def create_pax_header(self, info, encoding, errors):
1028	"""Return the object as a ustar header block. If it cannot be
1029	represented this way, prepend a pax extended header sequence
1030	with supplement information.
1031	"""
1032	info["magic"] = POSIX_MAGIC
1033	pax_headers = self.pax_headers.copy()
1034
1035	# Test string fields for values that exceed the field length or cannot
1036	# be represented in ASCII encoding.
1037	for name, hname, length in (
1038	("name", "path", LENGTH_NAME), ("linkname", "linkpath", LENGTH_LINK),
1039	("uname", "uname", 32), ("gname", "gname", 32)):
1040
1041	if hname in pax_headers:
1042	# The pax header has priority.
1043	continue
1044
1045	val = info[name].decode(encoding, errors)
1046
1047	# Try to encode the string as ASCII.
1048	try:
1049	val.encode("ascii")
1050	except UnicodeEncodeError:
1051	pax_headers[hname] = val
1052	continue
1053
1054	if len(info[name]) > length:
1055	pax_headers[hname] = val
1056
1057	# Test number fields for values that exceed the field limit or values
1058	# that like to be stored as float.
1059	for name, digits in (("uid", 8), ("gid", 8), ("size", 12), ("mtime", 12)):
1060	if name in pax_headers:
1061	# The pax header has priority. Avoid overflow.
1062	info[name] = 0
1063	continue
1064
1065	val = info[name]
1066	if not 0 <= val < 8 ** (digits - 1) or isinstance(val, float):
1067	pax_headers[name] = unicode(val)
1068	info[name] = 0
1069
1070	# Create a pax extended header if necessary.
1071	if pax_headers:
1072	buf = self._create_pax_generic_header(pax_headers)
1073	else:
1074	buf = ""
1075
1076	return buf + self._create_header(info, USTAR_FORMAT)
1077
1078	@classmethod
1079	def create_pax_global_header(cls, pax_headers):
1080	"""Return the object as a pax global header block sequence.
1081	"""
1082	return cls._create_pax_generic_header(pax_headers, type=XGLTYPE)
1083
1084	def _posix_split_name(self, name):
1085	"""Split a name longer than 100 chars into a prefix
1086	and a name part.
1087	"""
1088	prefix = name[:LENGTH_PREFIX + 1]
1089	while prefix and prefix[-1] != "/":
1090	prefix = prefix[:-1]
1091
1092	name = name[len(prefix):]
1093	prefix = prefix[:-1]
1094
1095	if not prefix or len(name) > LENGTH_NAME:
1096	raise ValueError("name is too long")
1097	return prefix, name
1098
1099	@staticmethod
1100	def _create_header(info, format):
1101	"""Return a header block. info is a dictionary with file
1102	information, format must be one of the *_FORMAT constants.
1103	"""
1104	parts = [
1105	stn(info.get("name", ""), 100),
1106	itn(info.get("mode", 0) & 07777, 8, format),
1107	itn(info.get("uid", 0), 8, format),
1108	itn(info.get("gid", 0), 8, format),
1109	itn(info.get("size", 0), 12, format),
1110	itn(info.get("mtime", 0), 12, format),
1111	" ", # checksum field
1112	info.get("type", REGTYPE),
1113	stn(info.get("linkname", ""), 100),
1114	stn(info.get("magic", POSIX_MAGIC), 8),
1115	stn(info.get("uname", "root"), 32),
1116	stn(info.get("gname", "root"), 32),
1117	itn(info.get("devmajor", 0), 8, format),
1118	itn(info.get("devminor", 0), 8, format),
1119	stn(info.get("prefix", ""), 155)
1120	]
1121
1122	buf = struct.pack("%ds" % BLOCKSIZE, "".join(parts))
1123	chksum = calc_chksums(buf[-BLOCKSIZE:])[0]
1124	buf = buf[:-364] + "%06o\0" % chksum + buf[-357:]
1125	return buf
1126
1127	@staticmethod
1128	def _create_payload(payload):
1129	"""Return the string payload filled with zero bytes
1130	up to the next 512 byte border.
1131	"""
1132	blocks, remainder = divmod(len(payload), BLOCKSIZE)
1133	if remainder > 0:
1134	payload += (BLOCKSIZE - remainder) * NUL
1135	return payload
1136
1137	@classmethod
1138	def _create_gnu_long_header(cls, name, type):
1139	"""Return a GNUTYPE_LONGNAME or GNUTYPE_LONGLINK sequence
1140	for name.
1141	"""
1142	name += NUL
1143
1144	info = {}
1145	info["name"] = "././@LongLink"
1146	info["type"] = type
1147	info["size"] = len(name)
1148	info["magic"] = GNU_MAGIC
1149
1150	# create extended header + name blocks.
1151	return cls._create_header(info, USTAR_FORMAT) + \
1152	cls._create_payload(name)
1153
1154	@classmethod
1155	def _create_pax_generic_header(cls, pax_headers, type=XHDTYPE):
1156	"""Return a POSIX.1-2001 extended or global header sequence
1157	that contains a list of keyword, value pairs. The values
1158	must be unicode objects.
1159	"""
1160	records = []
1161	for keyword, value in pax_headers.iteritems():
1162	keyword = keyword.encode("utf8")
1163	value = value.encode("utf8")
1164	l = len(keyword) + len(value) + 3 # ' ' + '=' + '\n'
1165	n = p = 0
1166	while True:
1167	n = l + len(str(p))
1168	if n == p:
1169	break
1170	p = n
1171	records.append("%d %s=%s\n" % (p, keyword, value))
1172	records = "".join(records)
1173
1174	# We use a hardcoded "././@PaxHeader" name like star does
1175	# instead of the one that POSIX recommends.
1176	info = {}
1177	info["name"] = "././@PaxHeader"
1178	info["type"] = type
1179	info["size"] = len(records)
1180	info["magic"] = POSIX_MAGIC
1181
1182	# Create pax header + record blocks.
1183	return cls._create_header(info, USTAR_FORMAT) + \
1184	cls._create_payload(records)
1185
1186	@classmethod
1187	def frombuf(cls, buf):
1188	"""Construct a TarInfo object from a 512 byte string buffer.
1189	"""
1190	if len(buf) != BLOCKSIZE:
1191	raise HeaderError("truncated header")
1192	if buf.count(NUL) == BLOCKSIZE:
1193	raise HeaderError("empty header")
1194
1195	chksum = nti(buf[148:156])
1196	if chksum not in calc_chksums(buf):
1197	raise HeaderError("bad checksum")
1198
1199	obj = cls()
1200	obj.buf = buf
1201	obj.name = nts(buf[0:100])
1202	obj.mode = nti(buf[100:108])
1203	obj.uid = nti(buf[108:116])
1204	obj.gid = nti(buf[116:124])
1205	obj.size = nti(buf[124:136])
1206	obj.mtime = nti(buf[136:148])
1207	obj.chksum = chksum
1208	obj.type = buf[156:157]
1209	obj.linkname = nts(buf[157:257])
1210	obj.uname = nts(buf[265:297])
1211	obj.gname = nts(buf[297:329])
1212	obj.devmajor = nti(buf[329:337])
1213	obj.devminor = nti(buf[337:345])
1214	prefix = nts(buf[345:500])
1215
1216	# Old V7 tar format represents a directory as a regular
1217	# file with a trailing slash.
1218	if obj.type == AREGTYPE and obj.name.endswith("/"):
1219	obj.type = DIRTYPE
1220
1221	# Remove redundant slashes from directories.
1222	if obj.isdir():
1223	obj.name = obj.name.rstrip("/")
1224
1225	# Reconstruct a ustar longname.
1226	if prefix and obj.type not in GNU_TYPES:
1227	obj.name = prefix + "/" + obj.name
1228	return obj
1229
1230	@classmethod
1231	def fromtarfile(cls, tarfile):
1232	"""Return the next TarInfo object from TarFile object
1233	tarfile.
1234	"""
1235	buf = tarfile.fileobj.read(BLOCKSIZE)
1236	if not buf:
1237	return
1238	obj = cls.frombuf(buf)
1239	obj.offset = tarfile.fileobj.tell() - BLOCKSIZE
1240	return obj._proc_member(tarfile)
1241
1242	#--------------------------------------------------------------------------
1243	# The following are methods that are called depending on the type of a
1244	# member. The entry point is _proc_member() which can be overridden in a
1245	# subclass to add custom _proc_() methods. A _proc_() method MUST
1246	# implement the following
1247	# operations:
1248	# 1. Set self.offset_data to the position where the data blocks begin,
1249	# if there is data that follows.
1250	# 2. Set tarfile.offset to the position where the next member's header will
1251	# begin.
1252	# 3. Return self or another valid TarInfo object.
1253	def _proc_member(self, tarfile):
1254	"""Choose the right processing method depending on
1255	the type and call it.
1256	"""
1257	if self.type in (GNUTYPE_LONGNAME, GNUTYPE_LONGLINK):
1258	return self._proc_gnulong(tarfile)
1259	elif self.type == GNUTYPE_SPARSE:
1260	return self._proc_sparse(tarfile)
1261	elif self.type in (XHDTYPE, XGLTYPE, SOLARIS_XHDTYPE):
1262	return self._proc_pax(tarfile)
1263	else:
1264	return self._proc_builtin(tarfile)
1265
1266	def _proc_builtin(self, tarfile):
1267	"""Process a builtin type or an unknown type which
1268	will be treated as a regular file.
1269	"""
1270	self.offset_data = tarfile.fileobj.tell()
1271	offset = self.offset_data
1272	if self.isreg() or self.type not in SUPPORTED_TYPES:
1273	# Skip the following data blocks.
1274	offset += self._block(self.size)
1275	tarfile.offset = offset
1276
1277	# Patch the TarInfo object with saved global
1278	# header information.
1279	self._apply_pax_info(tarfile.pax_headers, tarfile.encoding, tarfile.errors)
1280
1281	return self
1282
1283	def _proc_gnulong(self, tarfile):
1284	"""Process the blocks that hold a GNU longname
1285	or longlink member.
1286	"""
1287	buf = tarfile.fileobj.read(self._block(self.size))
1288
1289	# Fetch the next header and process it.
1290	next = self.fromtarfile(tarfile)
1291	if next is None:
1292	raise HeaderError("missing subsequent header")
1293
1294	# Patch the TarInfo object from the next header with
1295	# the longname information.
1296	next.offset = self.offset
1297	if self.type == GNUTYPE_LONGNAME:
1298	next.name = nts(buf)
1299	elif self.type == GNUTYPE_LONGLINK:
1300	next.linkname = nts(buf)
1301
1302	return next
1303
1304	def _proc_sparse(self, tarfile):
1305	"""Process a GNU sparse header plus extra headers.
1306	"""
1307	buf = self.buf
1308	sp = _ringbuffer()
1309	pos = 386
1310	lastpos = 0L
1311	realpos = 0L
1312	# There are 4 possible sparse structs in the
1313	# first header.
1314	for i in xrange(4):
1315	try:
1316	offset = nti(buf[pos:pos + 12])
1317	numbytes = nti(buf[pos + 12:pos + 24])
1318	except ValueError:
1319	break
1320	if offset > lastpos:
1321	sp.append(_hole(lastpos, offset - lastpos))
1322	sp.append(_data(offset, numbytes, realpos))
1323	realpos += numbytes
1324	lastpos = offset + numbytes
1325	pos += 24
1326
1327	isextended = ord(buf[482])
1328	origsize = nti(buf[483:495])
1329
1330	# If the isextended flag is given,
1331	# there are extra headers to process.
1332	while isextended == 1:
1333	buf = tarfile.fileobj.read(BLOCKSIZE)
1334	pos = 0
1335	for i in xrange(21):
1336	try:
1337	offset = nti(buf[pos:pos + 12])
1338	numbytes = nti(buf[pos + 12:pos + 24])
1339	except ValueError:
1340	break
1341	if offset > lastpos:
1342	sp.append(_hole(lastpos, offset - lastpos))
1343	sp.append(_data(offset, numbytes, realpos))
1344	realpos += numbytes
1345	lastpos = offset + numbytes
1346	pos += 24
1347	isextended = ord(buf[504])
1348
1349	if lastpos < origsize:
1350	sp.append(_hole(lastpos, origsize - lastpos))
1351
1352	self.sparse = sp
1353
1354	self.offset_data = tarfile.fileobj.tell()
1355	tarfile.offset = self.offset_data + self._block(self.size)
1356	self.size = origsize
1357
1358	return self
1359
1360	def _proc_pax(self, tarfile):
1361	"""Process an extended or global header as described in
1362	POSIX.1-2001.
1363	"""
1364	# Read the header information.
1365	buf = tarfile.fileobj.read(self._block(self.size))
1366
1367	# A pax header stores supplemental information for either
1368	# the following file (extended) or all following files
1369	# (global).
1370	if self.type == XGLTYPE:
1371	pax_headers = tarfile.pax_headers
1372	else:
1373	pax_headers = tarfile.pax_headers.copy()
1374
1375	# Parse pax header information. A record looks like that:
1376	# "%d %s=%s\n" % (length, keyword, value). length is the size
1377	# of the complete record including the length field itself and
1378	# the newline. keyword and value are both UTF-8 encoded strings.
1379	regex = re.compile(r"(\d+) ([^=]+)=", re.U)
1380	pos = 0
1381	while True:
1382	match = regex.match(buf, pos)
1383	if not match:
1384	break
1385
1386	length, keyword = match.groups()
1387	length = int(length)
1388	value = buf[match.end(2) + 1:match.start(1) + length - 1]
1389
1390	keyword = keyword.decode("utf8")
1391	value = value.decode("utf8")
1392
1393	pax_headers[keyword] = value
1394	pos += length
1395
1396	# Fetch the next header.
1397	next = self.fromtarfile(tarfile)
1398
1399	if self.type in (XHDTYPE, SOLARIS_XHDTYPE):
1400	if next is None:
1401	raise HeaderError("missing subsequent header")
1402
1403	# Patch the TarInfo object with the extended header info.
1404	next._apply_pax_info(pax_headers, tarfile.encoding, tarfile.errors)
1405	next.offset = self.offset
1406
1407	if "size" in pax_headers:
1408	# If the extended header replaces the size field,
1409	# we need to recalculate the offset where the next
1410	# header starts.
1411	offset = next.offset_data
1412	if next.isreg() or next.type not in SUPPORTED_TYPES:
1413	offset += next._block(next.size)
1414	tarfile.offset = offset
1415
1416	return next
1417
1418	def _apply_pax_info(self, pax_headers, encoding, errors):
1419	"""Replace fields with supplemental information from a previous
1420	pax extended or global header.
1421	"""
1422	for keyword, value in pax_headers.iteritems():
1423	if keyword not in PAX_FIELDS:
1424	continue
1425
1426	if keyword == "path":
1427	value = value.rstrip("/")
1428
1429	if keyword in PAX_NUMBER_FIELDS:
1430	try:
1431	value = PAX_NUMBER_FIELDS[keyword](value)
1432	except ValueError:
1433	value = 0
1434	else:
1435	value = uts(value, encoding, errors)
1436
1437	setattr(self, keyword, value)
1438
1439	self.pax_headers = pax_headers.copy()
1440
1441	def _block(self, count):
1442	"""Round up a byte count by BLOCKSIZE and return it,
1443	e.g. _block(834) => 1024.
1444	"""
1445	blocks, remainder = divmod(count, BLOCKSIZE)
1446	if remainder:
1447	blocks += 1
1448	return blocks * BLOCKSIZE
1449
1450	def isreg(self):
1451	return self.type in REGULAR_TYPES
1452	def isfile(self):
1453	return self.isreg()
1454	def isdir(self):
1455	return self.type == DIRTYPE
1456	def issym(self):
1457	return self.type == SYMTYPE
1458	def islnk(self):
1459	return self.type == LNKTYPE
1460	def ischr(self):
1461	return self.type == CHRTYPE
1462	def isblk(self):
1463	return self.type == BLKTYPE
1464	def isfifo(self):
1465	return self.type == FIFOTYPE
1466	def issparse(self):
1467	return self.type == GNUTYPE_SPARSE
1468	def isdev(self):
1469	return self.type in (CHRTYPE, BLKTYPE, FIFOTYPE)
1470	# class TarInfo
1471
1472	class TarFile(object):
1473	"""The TarFile Class provides an interface to tar archives.
1474	"""
1475
1476	debug = 0 # May be set from 0 (no msgs) to 3 (all msgs)
1477
1478	dereference = False # If true, add content of linked file to the
1479	# tar file, else the link.
1480
1481	ignore_zeros = False # If true, skips empty or invalid blocks and
1482	# continues processing.
1483
1484	errorlevel = 0 # If 0, fatal errors only appear in debug
1485	# messages (if debug >= 0). If > 0, errors
1486	# are passed to the caller as exceptions.
1487
1488	format = DEFAULT_FORMAT # The format to use when creating an archive.
1489
1490	encoding = ENCODING # Encoding for 8-bit character strings.
1491
1492	errors = None # Error handler for unicode conversion.
1493
1494	tarinfo = TarInfo # The default TarInfo class to use.
1495
1496	fileobject = ExFileObject # The default ExFileObject class to use.
1497
1498	def __init__(self, name=None, mode="r", fileobj=None, format=None,
1499	tarinfo=None, dereference=None, ignore_zeros=None, encoding=None,
1500	errors=None, pax_headers=None, debug=None, errorlevel=None):
1501	"""Open an (uncompressed) tar archive `name'. `mode' is either 'r' to
1502	read from an existing archive, 'a' to append data to an existing
1503	file or 'w' to create a new file overwriting an existing one. `mode'
1504	defaults to 'r'.
1505	If `fileobj' is given, it is used for reading or writing data. If it
1506	can be determined, `mode' is overridden by `fileobj's mode.
1507	`fileobj' is not closed, when TarFile is closed.
1508	"""
1509	if len(mode) > 1 or mode not in "raw":
1510	raise ValueError("mode must be 'r', 'a' or 'w'")
1511	self.mode = mode
1512	self._mode = {"r": "rb", "a": "r+b", "w": "wb"}[mode]
1513
1514	if not fileobj:
1515	if self.mode == "a" and not os.path.exists(name):
1516	# Create nonexistent files in append mode.
1517	self.mode = "w"
1518	self._mode = "wb"
1519	fileobj = bltn_open(name, self._mode)
1520	self._extfileobj = False
1521	else:
1522	if name is None and hasattr(fileobj, "name"):
1523	name = fileobj.name
1524	if hasattr(fileobj, "mode"):
1525	self._mode = fileobj.mode
1526	self._extfileobj = True
1527	self.name = os.path.abspath(name) if name else None
1528	self.fileobj = fileobj
1529
1530	# Init attributes.
1531	if format is not None:
1532	self.format = format
1533	if tarinfo is not None:
1534	self.tarinfo = tarinfo
1535	if dereference is not None:
1536	self.dereference = dereference
1537	if ignore_zeros is not None:
1538	self.ignore_zeros = ignore_zeros
1539	if encoding is not None:
1540	self.encoding = encoding
1541
1542	if errors is not None:
1543	self.errors = errors
1544	elif mode == "r":
1545	self.errors = "utf-8"
1546	else:
1547	self.errors = "strict"
1548
1549	if pax_headers is not None and self.format == PAX_FORMAT:
1550	self.pax_headers = pax_headers
1551	else:
1552	self.pax_headers = {}
1553
1554	if debug is not None:
1555	self.debug = debug
1556	if errorlevel is not None:
1557	self.errorlevel = errorlevel
1558
1559	# Init datastructures.
1560	self.closed = False
1561	self.members = [] # list of members as TarInfo objects
1562	self._loaded = False # flag if all members have been read
1563	self.offset = self.fileobj.tell()
1564	# current position in the archive file
1565	self.inodes = {} # dictionary caching the inodes of
1566	# archive members already added
1567
1568	try:
1569	if self.mode == "r":
1570	self.firstmember = None
1571	self.firstmember = self.next()
1572
1573	if self.mode == "a":
1574	# Move to the end of the archive,
1575	# before the first empty block.
1576	self.firstmember = None
1577	while True:
1578	if self.next() is None:
1579	if self.offset > 0:
1580	self.fileobj.seek(- BLOCKSIZE, 1)
1581	break
1582
1583	if self.mode in "aw":
1584	self._loaded = True
1585
1586	if self.pax_headers:
1587	buf = self.tarinfo.create_pax_global_header(self.pax_headers.copy())
1588	self.fileobj.write(buf)
1589	self.offset += len(buf)
1590	except:
1591	if not self._extfileobj:
1592	self.fileobj.close()
1593	self.closed = True
1594	raise
1595
1596	def _getposix(self):
1597	return self.format == USTAR_FORMAT
1598	def _setposix(self, value):
1599	import warnings
1600	warnings.warn("use the format attribute instead", DeprecationWarning,
1601	2)
1602	if value:
1603	self.format = USTAR_FORMAT
1604	else:
1605	self.format = GNU_FORMAT
1606	posix = property(_getposix, _setposix)
1607
1608	#--------------------------------------------------------------------------
1609	# Below are the classmethods which act as alternate constructors to the
1610	# TarFile class. The open() method is the only one that is needed for
1611	# public use; it is the "super"-constructor and is able to select an
1612	# adequate "sub"-constructor for a particular compression using the mapping
1613	# from OPEN_METH.
1614	#
1615	# This concept allows one to subclass TarFile without losing the comfort of
1616	# the super-constructor. A sub-constructor is registered and made available
1617	# by adding it to the mapping in OPEN_METH.
1618
1619	@classmethod
1620	def open(cls, name=None, mode="r", fileobj=None, bufsize=RECORDSIZE, **kwargs):
1621	"""Open a tar archive for reading, writing or appending. Return
1622	an appropriate TarFile class.
1623
1624	mode:
1625	'r' or 'r:*' open for reading with transparent compression
1626	'r:' open for reading exclusively uncompressed
1627	'r:gz' open for reading with gzip compression
1628	'r:bz2' open for reading with bzip2 compression
1629	'a' or 'a:' open for appending, creating the file if necessary
1630	'w' or 'w:' open for writing without compression
1631	'w:gz' open for writing with gzip compression
1632	'w:bz2' open for writing with bzip2 compression
1633
1634	'r\|*' open a stream of tar blocks with transparent compression
1635	'r\|' open an uncompressed stream of tar blocks for reading
1636	'r\|gz' open a gzip compressed stream of tar blocks
1637	'r\|bz2' open a bzip2 compressed stream of tar blocks
1638	'w\|' open an uncompressed stream for writing
1639	'w\|gz' open a gzip compressed stream for writing
1640	'w\|bz2' open a bzip2 compressed stream for writing
1641	"""
1642
1643	if not name and not fileobj:
1644	raise ValueError("nothing to open")
1645
1646	if mode in ("r", "r:*"):
1647	# Find out which *open() is appropriate for opening the file.
1648	for comptype in cls.OPEN_METH:
1649	func = getattr(cls, cls.OPEN_METH[comptype])
1650	if fileobj is not None:
1651	saved_pos = fileobj.tell()
1652	try:
1653	return func(name, "r", fileobj, **kwargs)
1654	except (ReadError, CompressionError), e:
1655	if fileobj is not None:
1656	fileobj.seek(saved_pos)
1657	continue
1658	raise ReadError("file could not be opened successfully")
1659
1660	elif ":" in mode:
1661	filemode, comptype = mode.split(":", 1)
1662	filemode = filemode or "r"
1663	comptype = comptype or "tar"
1664
1665	# Select the *open() function according to
1666	# given compression.
1667	if comptype in cls.OPEN_METH:
1668	func = getattr(cls, cls.OPEN_METH[comptype])
1669	else:
1670	raise CompressionError("unknown compression type %r" % comptype)
1671	return func(name, filemode, fileobj, **kwargs)
1672
1673	elif "\|" in mode:
1674	filemode, comptype = mode.split("\|", 1)
1675	filemode = filemode or "r"
1676	comptype = comptype or "tar"
1677
1678	if filemode not in "rw":
1679	raise ValueError("mode must be 'r' or 'w'")
1680
1681	t = cls(name, filemode,
1682	_Stream(name, filemode, comptype, fileobj, bufsize),
1683	**kwargs)
1684	t._extfileobj = False
1685	return t
1686
1687	elif mode in "aw":
1688	return cls.taropen(name, mode, fileobj, **kwargs)
1689
1690	raise ValueError("undiscernible mode")
1691
1692	@classmethod
1693	def taropen(cls, name, mode="r", fileobj=None, **kwargs):
1694	"""Open uncompressed tar archive name for reading or writing.
1695	"""
1696	if len(mode) > 1 or mode not in "raw":
1697	raise ValueError("mode must be 'r', 'a' or 'w'")
1698	return cls(name, mode, fileobj, **kwargs)
1699
1700	@classmethod
1701	def gzopen(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1702	"""Open gzip compressed tar archive name for reading or writing.
1703	Appending is not allowed.
1704	"""
1705	if len(mode) > 1 or mode not in "rw":
1706	raise ValueError("mode must be 'r' or 'w'")
1707
1708	try:
1709	import gzip
1710	gzip.GzipFile
1711	except (ImportError, AttributeError):
1712	raise CompressionError("gzip module is not available")
1713
1714	if fileobj is None:
1715	fileobj = bltn_open(name, mode + "b")
1716
1717	try:
1718	t = cls.taropen(name, mode,
1719	gzip.GzipFile(name, mode, compresslevel, fileobj),
1720	**kwargs)
1721	except IOError:
1722	raise ReadError("not a gzip file")
1723	t._extfileobj = False
1724	return t
1725
1726	@classmethod
1727	def bz2open(cls, name, mode="r", fileobj=None, compresslevel=9, **kwargs):
1728	"""Open bzip2 compressed tar archive name for reading or writing.
1729	Appending is not allowed.
1730	"""
1731	if len(mode) > 1 or mode not in "rw":
1732	raise ValueError("mode must be 'r' or 'w'.")
1733
1734	try:
1735	import bz2
1736	except ImportError:
1737	raise CompressionError("bz2 module is not available")
1738
1739	if fileobj is not None:
1740	fileobj = _BZ2Proxy(fileobj, mode)
1741	else:
1742	fileobj = bz2.BZ2File(name, mode, compresslevel=compresslevel)
1743
1744	try:
1745	t = cls.taropen(name, mode, fileobj, **kwargs)
1746	except IOError:
1747	raise ReadError("not a bzip2 file")
1748	t._extfileobj = False
1749	return t
1750
1751	# All *open() methods are registered here.
1752	OPEN_METH = {
1753	"tar": "taropen", # uncompressed tar
1754	"gz": "gzopen", # gzip compressed tar
1755	"bz2": "bz2open" # bzip2 compressed tar
1756	}
1757
1758	#--------------------------------------------------------------------------
1759	# The public methods which TarFile provides:
1760
1761	def close(self):
1762	"""Close the TarFile. In write-mode, two finishing zero blocks are
1763	appended to the archive.
1764	"""
1765	if self.closed:
1766	return
1767
1768	if self.mode in "aw":
1769	self.fileobj.write(NUL * (BLOCKSIZE * 2))
1770	self.offset += (BLOCKSIZE * 2)
1771	# fill up the end with zero-blocks
1772	# (like option -b20 for tar does)
1773	blocks, remainder = divmod(self.offset, RECORDSIZE)
1774	if remainder > 0:
1775	self.fileobj.write(NUL * (RECORDSIZE - remainder))
1776
1777	if not self._extfileobj:
1778	self.fileobj.close()
1779	self.closed = True
1780
1781	def getmember(self, name):
1782	"""Return a TarInfo object for member `name'. If `name' can not be
1783	found in the archive, KeyError is raised. If a member occurs more
1784	than once in the archive, its last occurrence is assumed to be the
1785	most up-to-date version.
1786	"""
1787	tarinfo = self._getmember(name)
1788	if tarinfo is None:
1789	raise KeyError("filename %r not found" % name)
1790	return tarinfo
1791
1792	def getmembers(self):
1793	"""Return the members of the archive as a list of TarInfo objects. The
1794	list has the same order as the members in the archive.
1795	"""
1796	self._check()
1797	if not self._loaded: # if we want to obtain a list of
1798	self._load() # all members, we first have to
1799	# scan the whole archive.
1800	return self.members
1801
1802	def getnames(self):
1803	"""Return the members of the archive as a list of their names. It has
1804	the same order as the list returned by getmembers().
1805	"""
1806	return [tarinfo.name for tarinfo in self.getmembers()]
1807
1808	def gettarinfo(self, name=None, arcname=None, fileobj=None):
1809	"""Create a TarInfo object for either the file `name' or the file
1810	object `fileobj' (using os.fstat on its file descriptor). You can
1811	modify some of the TarInfo's attributes before you add it using
1812	addfile(). If given, `arcname' specifies an alternative name for the
1813	file in the archive.
1814	"""
1815	self._check("aw")
1816
1817	# When fileobj is given, replace name by
1818	# fileobj's real name.
1819	if fileobj is not None:
1820	name = fileobj.name
1821
1822	# Building the name of the member in the archive.
1823	# Backward slashes are converted to forward slashes,
1824	# Absolute paths are turned to relative paths.
1825	if arcname is None:
1826	arcname = name
1827	arcname = normpath(arcname)
1828	drv, arcname = os.path.splitdrive(arcname)
1829	while arcname[0:1] == "/":
1830	arcname = arcname[1:]
1831
1832	# Now, fill the TarInfo object with
1833	# information specific for the file.
1834	tarinfo = self.tarinfo()
1835	tarinfo.tarfile = self
1836
1837	# Use os.stat or os.lstat, depending on platform
1838	# and if symlinks shall be resolved.
1839	if fileobj is None:
1840	if hasattr(os, "lstat") and not self.dereference:
1841	statres = os.lstat(name)
1842	else:
1843	statres = os.stat(name)
1844	else:
1845	statres = os.fstat(fileobj.fileno())
1846	linkname = ""
1847
1848	stmd = statres.st_mode
1849	if stat.S_ISREG(stmd):
1850	inode = (statres.st_ino, statres.st_dev)
1851	if not self.dereference and statres.st_nlink > 1 and \
1852	inode in self.inodes and arcname != self.inodes[inode]:
1853	# Is it a hardlink to an already
1854	# archived file?
1855	type = LNKTYPE
1856	linkname = self.inodes[inode]
1857	else:
1858	# The inode is added only if its valid.
1859	# For win32 it is always 0.
1860	type = REGTYPE
1861	if inode[0]:
1862	self.inodes[inode] = arcname
1863	elif stat.S_ISDIR(stmd):
1864	type = DIRTYPE
1865	elif stat.S_ISFIFO(stmd):
1866	type = FIFOTYPE
1867	elif stat.S_ISLNK(stmd):
1868	type = SYMTYPE
1869	linkname = os.readlink(name)
1870	elif stat.S_ISCHR(stmd):
1871	type = CHRTYPE
1872	elif stat.S_ISBLK(stmd):
1873	type = BLKTYPE
1874	else:
1875	return None
1876
1877	# Fill the TarInfo object with all
1878	# information we can get.
1879	tarinfo.name = arcname
1880	tarinfo.mode = stmd
1881	tarinfo.uid = statres.st_uid
1882	tarinfo.gid = statres.st_gid
1883	if stat.S_ISREG(stmd):
1884	tarinfo.size = statres.st_size
1885	else:
1886	tarinfo.size = 0L
1887	tarinfo.mtime = statres.st_mtime
1888	tarinfo.type = type
1889	tarinfo.linkname = linkname
1890	if pwd:
1891	try:
1892	tarinfo.uname = pwd.getpwuid(tarinfo.uid)[0]
1893	except KeyError:
1894	pass
1895	if grp:
1896	try:
1897	tarinfo.gname = grp.getgrgid(tarinfo.gid)[0]
1898	except KeyError:
1899	pass
1900
1901	if type in (CHRTYPE, BLKTYPE):
1902	if hasattr(os, "major") and hasattr(os, "minor"):
1903	tarinfo.devmajor = os.major(statres.st_rdev)
1904	tarinfo.devminor = os.minor(statres.st_rdev)
1905	return tarinfo
1906
1907	def list(self, verbose=True):
1908	"""Print a table of contents to sys.stdout. If `verbose' is False, only
1909	the names of the members are printed. If it is True, an `ls -l'-like
1910	output is produced.
1911	"""
1912	self._check()
1913
1914	for tarinfo in self:
1915	if verbose:
1916	print filemode(tarinfo.mode),
1917	print "%s/%s" % (tarinfo.uname or tarinfo.uid,
1918	tarinfo.gname or tarinfo.gid),
1919	if tarinfo.ischr() or tarinfo.isblk():
1920	print "%10s" % ("%d,%d" \
1921	% (tarinfo.devmajor, tarinfo.devminor)),
1922	else:
1923	print "%10d" % tarinfo.size,
1924	print "%d-%02d-%02d %02d:%02d:%02d" \
1925	% time.localtime(tarinfo.mtime)[:6],
1926
1927	print tarinfo.name + ("/" if tarinfo.isdir() else ""),
1928
1929	if verbose:
1930	if tarinfo.issym():
1931	print "->", tarinfo.linkname,
1932	if tarinfo.islnk():
1933	print "link to", tarinfo.linkname,
1934	print
1935
1936	def add(self, name, arcname=None, recursive=True, exclude=None):
1937	"""Add the file `name' to the archive. `name' may be any type of file
1938	(directory, fifo, symbolic link, etc.). If given, `arcname'
1939	specifies an alternative name for the file in the archive.
1940	Directories are added recursively by default. This can be avoided by
1941	setting `recursive' to False. `exclude' is a function that should
1942	return True for each filename to be excluded.
1943	"""
1944	self._check("aw")
1945
1946	if arcname is None:
1947	arcname = name
1948
1949	# Exclude pathnames.
1950	if exclude is not None and exclude(name):
1951	self._dbg(2, "tarfile: Excluded %r" % name)
1952	return
1953
1954	# Skip if somebody tries to archive the archive...
1955	if self.name is not None and os.path.abspath(name) == self.name:
1956	self._dbg(2, "tarfile: Skipped %r" % name)
1957	return
1958
1959	# Special case: The user wants to add the current
1960	# working directory.
1961	if name == ".":
1962	if recursive:
1963	if arcname == ".":
1964	arcname = ""
1965	for f in os.listdir(name):
1966	self.add(f, os.path.join(arcname, f), recursive, exclude)
1967	return
1968
1969	self._dbg(1, name)
1970
1971	# Create a TarInfo object from the file.
1972	tarinfo = self.gettarinfo(name, arcname)
1973
1974	if tarinfo is None:
1975	self._dbg(1, "tarfile: Unsupported type %r" % name)
1976	return
1977
1978	# Append the tar header and data to the archive.
1979	if tarinfo.isreg():
1980	f = bltn_open(name, "rb")
1981	self.addfile(tarinfo, f)
1982	f.close()
1983
1984	elif tarinfo.isdir():
1985	self.addfile(tarinfo)
1986	if recursive:
1987	for f in os.listdir(name):
1988	self.add(os.path.join(name, f), os.path.join(arcname, f), recursive, exclude)
1989
1990	else:
1991	self.addfile(tarinfo)
1992
1993	def addfile(self, tarinfo, fileobj=None):
1994	"""Add the TarInfo object `tarinfo' to the archive. If `fileobj' is
1995	given, tarinfo.size bytes are read from it and added to the archive.
1996	You can create TarInfo objects using gettarinfo().
1997	On Windows platforms, `fileobj' should always be opened with mode
1998	'rb' to avoid irritation about the file size.
1999	"""
2000	self._check("aw")
2001
2002	tarinfo = copy.copy(tarinfo)
2003
2004	buf = tarinfo.tobuf(self.format, self.encoding, self.errors)
2005	self.fileobj.write(buf)
2006	self.offset += len(buf)
2007
2008	# If there's data to follow, append it.
2009	if fileobj is not None:
2010	copyfileobj(fileobj, self.fileobj, tarinfo.size)
2011	blocks, remainder = divmod(tarinfo.size, BLOCKSIZE)
2012	if remainder > 0:
2013	self.fileobj.write(NUL * (BLOCKSIZE - remainder))
2014	blocks += 1
2015	self.offset += blocks * BLOCKSIZE
2016
2017	self.members.append(tarinfo)
2018
2019	def extractall(self, path=".", members=None):
2020	"""Extract all members from the archive to the current working
2021	directory and set owner, modification time and permissions on
2022	directories afterwards. `path' specifies a different directory
2023	to extract to. `members' is optional and must be a subset of the
2024	list returned by getmembers().
2025	"""
2026	directories = []
2027
2028	if members is None:
2029	members = self
2030
2031	for tarinfo in members:
2032	if tarinfo.isdir():
2033	# Extract directories with a safe mode.
2034	directories.append(tarinfo)
2035	tarinfo = copy.copy(tarinfo)
2036	tarinfo.mode = 0700
2037	self.extract(tarinfo, path)
2038
2039	# Reverse sort directories.
2040	directories.sort(key=operator.attrgetter('name'))
2041	directories.reverse()
2042
2043	# Set correct owner, mtime and filemode on directories.
2044	for tarinfo in directories:
2045	dirpath = os.path.join(path, tarinfo.name)
2046	try:
2047	self.chown(tarinfo, dirpath)
2048	self.utime(tarinfo, dirpath)
2049	self.chmod(tarinfo, dirpath)
2050	except ExtractError, e:
2051	if self.errorlevel > 1:
2052	raise
2053	else:
2054	self._dbg(1, "tarfile: %s" % e)
2055
2056	def extract(self, member, path=""):
2057	"""Extract a member from the archive to the current working directory,
2058	using its full name. Its file information is extracted as accurately
2059	as possible. `member' may be a filename or a TarInfo object. You can
2060	specify a different directory using `path'.
2061	"""
2062	self._check("r")
2063
2064	if isinstance(member, basestring):
2065	tarinfo = self.getmember(member)
2066	else:
2067	tarinfo = member
2068
2069	# Prepare the link target for makelink().
2070	if tarinfo.islnk():
2071	tarinfo._link_target = os.path.join(path, tarinfo.linkname)
2072
2073	try:
2074	self._extract_member(tarinfo, os.path.join(path, tarinfo.name))
2075	except EnvironmentError, e:
2076	if self.errorlevel > 0:
2077	raise
2078	else:
2079	if e.filename is None:
2080	self._dbg(1, "tarfile: %s" % e.strerror)
2081	else:
2082	self._dbg(1, "tarfile: %s %r" % (e.strerror, e.filename))
2083	except ExtractError, e:
2084	if self.errorlevel > 1:
2085	raise
2086	else:
2087	self._dbg(1, "tarfile: %s" % e)
2088
2089	def extractfile(self, member):
2090	"""Extract a member from the archive as a file object. `member' may be
2091	a filename or a TarInfo object. If `member' is a regular file, a
2092	file-like object is returned. If `member' is a link, a file-like
2093	object is constructed from the link's target. If `member' is none of
2094	the above, None is returned.
2095	The file-like object is read-only and provides the following
2096	methods: read(), readline(), readlines(), seek() and tell()
2097	"""
2098	self._check("r")
2099
2100	if isinstance(member, basestring):
2101	tarinfo = self.getmember(member)
2102	else:
2103	tarinfo = member
2104
2105	if tarinfo.isreg():
2106	return self.fileobject(self, tarinfo)
2107
2108	elif tarinfo.type not in SUPPORTED_TYPES:
2109	# If a member's type is unknown, it is treated as a
2110	# regular file.
2111	return self.fileobject(self, tarinfo)
2112
2113	elif tarinfo.islnk() or tarinfo.issym():
2114	if isinstance(self.fileobj, _Stream):
2115	# A small but ugly workaround for the case that someone tries
2116	# to extract a (sym)link as a file-object from a non-seekable
2117	# stream of tar blocks.
2118	raise StreamError("cannot extract (sym)link as file object")
2119	else:
2120	# A (sym)link's file object is its target's file object.
2121	return self.extractfile(self._getmember(tarinfo.linkname,
2122	tarinfo))
2123	else:
2124	# If there's no data associated with the member (directory, chrdev,
2125	# blkdev, etc.), return None instead of a file object.
2126	return None
2127
2128	def _extract_member(self, tarinfo, targetpath):
2129	"""Extract the TarInfo object tarinfo to a physical
2130	file called targetpath.
2131	"""
2132	# Fetch the TarInfo object for the given name
2133	# and build the destination pathname, replacing
2134	# forward slashes to platform specific separators.
2135	if targetpath[-1:] == "/":
2136	targetpath = targetpath[:-1]
2137	targetpath = os.path.normpath(targetpath)
2138
2139	# Create all upper directories.
2140	upperdirs = os.path.dirname(targetpath)
2141	if upperdirs and not os.path.exists(upperdirs):
2142	# Create directories that are not part of the archive with
2143	# default permissions.
2144	os.makedirs(upperdirs)
2145
2146	if tarinfo.islnk() or tarinfo.issym():
2147	self._dbg(1, "%s -> %s" % (tarinfo.name, tarinfo.linkname))
2148	else:
2149	self._dbg(1, tarinfo.name)
2150
2151	if tarinfo.isreg():
2152	self.makefile(tarinfo, targetpath)
2153	elif tarinfo.isdir():
2154	self.makedir(tarinfo, targetpath)
2155	elif tarinfo.isfifo():
2156	self.makefifo(tarinfo, targetpath)
2157	elif tarinfo.ischr() or tarinfo.isblk():
2158	self.makedev(tarinfo, targetpath)
2159	elif tarinfo.islnk() or tarinfo.issym():
2160	self.makelink(tarinfo, targetpath)
2161	elif tarinfo.type not in SUPPORTED_TYPES:
2162	self.makeunknown(tarinfo, targetpath)
2163	else:
2164	self.makefile(tarinfo, targetpath)
2165
2166	self.chown(tarinfo, targetpath)
2167	if not tarinfo.issym():
2168	self.chmod(tarinfo, targetpath)
2169	self.utime(tarinfo, targetpath)
2170
2171	#--------------------------------------------------------------------------
2172	# Below are the different file methods. They are called via
2173	# _extract_member() when extract() is called. They can be replaced in a
2174	# subclass to implement other functionality.
2175
2176	def makedir(self, tarinfo, targetpath):
2177	"""Make a directory called targetpath.
2178	"""
2179	try:
2180	# Use a safe mode for the directory, the real mode is set
2181	# later in _extract_member().
2182	os.mkdir(targetpath, 0700)
2183	except EnvironmentError, e:
2184	if e.errno != errno.EEXIST:
2185	raise
2186
2187	def makefile(self, tarinfo, targetpath):
2188	"""Make a file called targetpath.
2189	"""
2190	source = self.extractfile(tarinfo)
2191	target = bltn_open(targetpath, "wb")
2192	copyfileobj(source, target)
2193	source.close()
2194	target.close()
2195
2196	def makeunknown(self, tarinfo, targetpath):
2197	"""Make a file from a TarInfo object with an unknown type
2198	at targetpath.
2199	"""
2200	self.makefile(tarinfo, targetpath)
2201	self._dbg(1, "tarfile: Unknown file type %r, " \
2202	"extracted as regular file." % tarinfo.type)
2203
2204	def makefifo(self, tarinfo, targetpath):
2205	"""Make a fifo called targetpath.
2206	"""
2207	if hasattr(os, "mkfifo"):
2208	os.mkfifo(targetpath)
2209	else:
2210	raise ExtractError("fifo not supported by system")
2211
2212	def makedev(self, tarinfo, targetpath):
2213	"""Make a character or block device called targetpath.
2214	"""
2215	if not hasattr(os, "mknod") or not hasattr(os, "makedev"):
2216	raise ExtractError("special devices not supported by system")
2217
2218	mode = tarinfo.mode
2219	if tarinfo.isblk():
2220	mode \|= stat.S_IFBLK
2221	else:
2222	mode \|= stat.S_IFCHR
2223
2224	os.mknod(targetpath, mode,
2225	os.makedev(tarinfo.devmajor, tarinfo.devminor))
2226
2227	def makelink(self, tarinfo, targetpath):
2228	"""Make a (symbolic) link called targetpath. If it cannot be created
2229	(platform limitation), we try to make a copy of the referenced file
2230	instead of a link.
2231	"""
2232	linkpath = tarinfo.linkname
2233	try:
2234	if tarinfo.issym():
2235	os.symlink(linkpath, targetpath)
2236	else:
2237	# See extract().
2238	os.link(tarinfo._link_target, targetpath)
2239	except AttributeError:
2240	if tarinfo.issym():
2241	linkpath = os.path.join(os.path.dirname(tarinfo.name),
2242	linkpath)
2243	linkpath = normpath(linkpath)
2244
2245	try:
2246	self._extract_member(self.getmember(linkpath), targetpath)
2247	except (EnvironmentError, KeyError), e:
2248	linkpath = os.path.normpath(linkpath)
2249	try:
2250	shutil.copy2(linkpath, targetpath)
2251	except EnvironmentError, e:
2252	raise IOError("link could not be created")
2253
2254	def chown(self, tarinfo, targetpath):
2255	"""Set owner of targetpath according to tarinfo.
2256	"""
2257	if pwd and hasattr(os, "geteuid") and os.geteuid() == 0:
2258	# We have to be root to do so.
2259	try:
2260	g = grp.getgrnam(tarinfo.gname)[2]
2261	except KeyError:
2262	try:
2263	g = grp.getgrgid(tarinfo.gid)[2]
2264	except KeyError:
2265	g = os.getgid()
2266	try:
2267	u = pwd.getpwnam(tarinfo.uname)[2]
2268	except KeyError:
2269	try:
2270	u = pwd.getpwuid(tarinfo.uid)[2]
2271	except KeyError:
2272	u = os.getuid()
2273	try:
2274	if tarinfo.issym() and hasattr(os, "lchown"):
2275	os.lchown(targetpath, u, g)
2276	else:
2277	if sys.platform != "os2emx":
2278	os.chown(targetpath, u, g)
2279	except EnvironmentError, e:
2280	raise ExtractError("could not change owner")
2281
2282	def chmod(self, tarinfo, targetpath):
2283	"""Set file permissions of targetpath according to tarinfo.
2284	"""
2285	if hasattr(os, 'chmod'):
2286	try:
2287	os.chmod(targetpath, tarinfo.mode)
2288	except EnvironmentError, e:
2289	raise ExtractError("could not change mode")
2290
2291	def utime(self, tarinfo, targetpath):
2292	"""Set modification time of targetpath according to tarinfo.
2293	"""
2294	if not hasattr(os, 'utime'):
2295	return
2296	try:
2297	os.utime(targetpath, (tarinfo.mtime, tarinfo.mtime))
2298	except EnvironmentError, e:
2299	raise ExtractError("could not change modification time")
2300
2301	#--------------------------------------------------------------------------
2302	def next(self):
2303	"""Return the next member of the archive as a TarInfo object, when
2304	TarFile is opened for reading. Return None if there is no more
2305	available.
2306	"""
2307	self._check("ra")
2308	if self.firstmember is not None:
2309	m = self.firstmember
2310	self.firstmember = None
2311	return m
2312
2313	# Read the next block.
2314	self.fileobj.seek(self.offset)
2315	while True:
2316	try:
2317	tarinfo = self.tarinfo.fromtarfile(self)
2318	if tarinfo is None:
2319	return
2320	self.members.append(tarinfo)
2321
2322	except HeaderError, e:
2323	if self.ignore_zeros:
2324	self._dbg(2, "0x%X: %s" % (self.offset, e))
2325	self.offset += BLOCKSIZE
2326	continue
2327	else:
2328	if self.offset == 0:
2329	raise ReadError(str(e))
2330	return None
2331	break
2332
2333	return tarinfo
2334
2335	#--------------------------------------------------------------------------
2336	# Little helper methods:
2337
2338	def _getmember(self, name, tarinfo=None):
2339	"""Find an archive member by name from bottom to top.
2340	If tarinfo is given, it is used as the starting point.
2341	"""
2342	# Ensure that all members have been loaded.
2343	members = self.getmembers()
2344
2345	if tarinfo is None:
2346	end = len(members)
2347	else:
2348	end = members.index(tarinfo)
2349
2350	for i in xrange(end - 1, -1, -1):
2351	if name == members[i].name:
2352	return members[i]
2353
2354	def _load(self):
2355	"""Read through the entire archive file and look for readable
2356	members.
2357	"""
2358	while True:
2359	tarinfo = self.next()
2360	if tarinfo is None:
2361	break
2362	self._loaded = True
2363
2364	def _check(self, mode=None):
2365	"""Check if TarFile is still open, and if the operation's mode
2366	corresponds to TarFile's mode.
2367	"""
2368	if self.closed:
2369	raise IOError("%s is closed" % self.__class__.__name__)
2370	if mode is not None and self.mode not in mode:
2371	raise IOError("bad operation for mode %r" % self.mode)
2372
2373	def __iter__(self):
2374	"""Provide an iterator object.
2375	"""
2376	if self._loaded:
2377	return iter(self.members)
2378	else:
2379	return TarIter(self)
2380
2381	def _dbg(self, level, msg):
2382	"""Write debugging output to sys.stderr.
2383	"""
2384	if level <= self.debug:
2385	print >> sys.stderr, msg
2386	# class TarFile
2387
2388	class TarIter:
2389	"""Iterator Class.
2390
2391	for tarinfo in TarFile(...):
2392	suite...
2393	"""
2394
2395	def __init__(self, tarfile):
2396	"""Construct a TarIter object.
2397	"""
2398	self.tarfile = tarfile
2399	self.index = 0
2400	def __iter__(self):
2401	"""Return iterator object.
2402	"""
2403	return self
2404	def next(self):
2405	"""Return the next item using TarFile's next() method.
2406	When all members have been read, set TarFile as _loaded.
2407	"""
2408	# Fix for SF #1100429: Under rare circumstances it can
2409	# happen that getmembers() is called during iteration,
2410	# which will cause TarIter to stop prematurely.
2411	if not self.tarfile._loaded:
2412	tarinfo = self.tarfile.next()
2413	if not tarinfo:
2414	self.tarfile._loaded = True
2415	raise StopIteration
2416	else:
2417	try:
2418	tarinfo = self.tarfile.members[self.index]
2419	except IndexError:
2420	raise StopIteration
2421	self.index += 1
2422	return tarinfo
2423
2424	# Helper classes for sparse file support
2425	class _section:
2426	"""Base class for _data and _hole.
2427	"""
2428	def __init__(self, offset, size):
2429	self.offset = offset
2430	self.size = size
2431	def __contains__(self, offset):
2432	return self.offset <= offset < self.offset + self.size
2433
2434	class _data(_section):
2435	"""Represent a data section in a sparse file.
2436	"""
2437	def __init__(self, offset, size, realpos):
2438	_section.__init__(self, offset, size)
2439	self.realpos = realpos
2440
2441	class _hole(_section):
2442	"""Represent a hole section in a sparse file.
2443	"""
2444	pass
2445
2446	class _ringbuffer(list):
2447	"""Ringbuffer class which increases performance
2448	over a regular list.
2449	"""
2450	def __init__(self):
2451	self.idx = 0
2452	def find(self, offset):
2453	idx = self.idx
2454	while True:
2455	item = self[idx]
2456	if offset in item:
2457	break
2458	idx += 1
2459	if idx == len(self):
2460	idx = 0
2461	if idx == self.idx:
2462	# End of File
2463	return None
2464	self.idx = idx
2465	return item
2466
2467	#---------------------------------------------
2468	# zipfile compatible TarFile class
2469	#---------------------------------------------
2470	TAR_PLAIN = 0 # zipfile.ZIP_STORED
2471	TAR_GZIPPED = 8 # zipfile.ZIP_DEFLATED
2472	class TarFileCompat:
2473	"""TarFile class compatible with standard module zipfile's
2474	ZipFile class.
2475	"""
2476	def __init__(self, file, mode="r", compression=TAR_PLAIN):
2477	from warnings import warnpy3k
2478	warnpy3k("the TarFileCompat class has been removed in Python 3.0",
2479	stacklevel=2)
2480	if compression == TAR_PLAIN:
2481	self.tarfile = TarFile.taropen(file, mode)
2482	elif compression == TAR_GZIPPED:
2483	self.tarfile = TarFile.gzopen(file, mode)
2484	else:
2485	raise ValueError("unknown compression constant")
2486	if mode[0:1] == "r":
2487	members = self.tarfile.getmembers()
2488	for m in members:
2489	m.filename = m.name
2490	m.file_size = m.size
2491	m.date_time = time.gmtime(m.mtime)[:6]
2492	def namelist(self):
2493	return map(lambda m: m.name, self.infolist())
2494	def infolist(self):
2495	return filter(lambda m: m.type in REGULAR_TYPES,
2496	self.tarfile.getmembers())
2497	def printdir(self):
2498	self.tarfile.list()
2499	def testzip(self):
2500	return
2501	def getinfo(self, name):
2502	return self.tarfile.getmember(name)
2503	def read(self, name):
2504	return self.tarfile.extractfile(self.tarfile.getmember(name)).read()
2505	def write(self, filename, arcname=None, compress_type=None):
2506	self.tarfile.add(filename, arcname)
2507	def writestr(self, zinfo, bytes):
2508	try:
2509	from cStringIO import StringIO
2510	except ImportError:
2511	from StringIO import StringIO
2512	import calendar
2513	tinfo = TarInfo(zinfo.filename)
2514	tinfo.size = len(bytes)
2515	tinfo.mtime = calendar.timegm(zinfo.date_time)
2516	self.tarfile.addfile(tinfo, StringIO(bytes))
2517	def close(self):
2518	self.tarfile.close()
2519	#class TarFileCompat
2520
2521	#--------------------
2522	# exported functions
2523	#--------------------
2524	def is_tarfile(name):
2525	"""Return True if name points to a tar archive that we
2526	are able to handle, else return False.
2527	"""
2528	try:
2529	t = open(name)
2530	t.close()
2531	return True
2532	except TarError:
2533	return False
2534
2535	bltn_open = open
2536	open = TarFile.open

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/vendor/Python-2.6.5/Lib/tarfile.py

Download in other formats: