source: vendor/python/2.5/Doc/lib/libtarfile.tex

Last change on this file was 3225, checked in by bird, 18 years ago

Python 2.5

File size: 18.2 KB
Line 
1\section{\module{tarfile} --- Read and write tar archive files}
2
3\declaremodule{standard}{tarfile}
4\modulesynopsis{Read and write tar-format archive files.}
5\versionadded{2.3}
6
7\moduleauthor{Lars Gust\"abel}{lars@gustaebel.de}
8\sectionauthor{Lars Gust\"abel}{lars@gustaebel.de}
9
10The \module{tarfile} module makes it possible to read and create tar archives.
11Some facts and figures:
12
13\begin{itemize}
14\item reads and writes \module{gzip} and \module{bzip2} compressed archives.
15\item creates \POSIX{} 1003.1-1990 compliant or GNU tar compatible archives.
16\item reads GNU tar extensions \emph{longname}, \emph{longlink} and
17 \emph{sparse}.
18\item stores pathnames of unlimited length using GNU tar extensions.
19\item handles directories, regular files, hardlinks, symbolic links, fifos,
20 character devices and block devices and is able to acquire and
21 restore file information like timestamp, access permissions and owner.
22\item can handle tape devices.
23\end{itemize}
24
25\begin{funcdesc}{open}{\optional{name\optional{, mode
26 \optional{, fileobj\optional{, bufsize}}}}}
27 Return a \class{TarFile} object for the pathname \var{name}.
28 For detailed information on \class{TarFile} objects,
29 see \citetitle{TarFile Objects} (section \ref{tarfile-objects}).
30
31 \var{mode} has to be a string of the form \code{'filemode[:compression]'},
32 it defaults to \code{'r'}. Here is a full list of mode combinations:
33
34 \begin{tableii}{c|l}{code}{mode}{action}
35 \lineii{'r' or 'r:*'}{Open for reading with transparent compression (recommended).}
36 \lineii{'r:'}{Open for reading exclusively without compression.}
37 \lineii{'r:gz'}{Open for reading with gzip compression.}
38 \lineii{'r:bz2'}{Open for reading with bzip2 compression.}
39 \lineii{'a' or 'a:'}{Open for appending with no compression.}
40 \lineii{'w' or 'w:'}{Open for uncompressed writing.}
41 \lineii{'w:gz'}{Open for gzip compressed writing.}
42 \lineii{'w:bz2'}{Open for bzip2 compressed writing.}
43 \end{tableii}
44
45 Note that \code{'a:gz'} or \code{'a:bz2'} is not possible.
46 If \var{mode} is not suitable to open a certain (compressed) file for
47 reading, \exception{ReadError} is raised. Use \var{mode} \code{'r'} to
48 avoid this. If a compression method is not supported,
49 \exception{CompressionError} is raised.
50
51 If \var{fileobj} is specified, it is used as an alternative to
52 a file object opened for \var{name}.
53
54 For special purposes, there is a second format for \var{mode}:
55 \code{'filemode|[compression]'}. \function{open()} will return a
56 \class{TarFile} object that processes its data as a stream of
57 blocks. No random seeking will be done on the file. If given,
58 \var{fileobj} may be any object that has a \method{read()} or
59 \method{write()} method (depending on the \var{mode}).
60 \var{bufsize} specifies the blocksize and defaults to \code{20 *
61 512} bytes. Use this variant in combination with
62 e.g. \code{sys.stdin}, a socket file object or a tape device.
63 However, such a \class{TarFile} object is limited in that it does
64 not allow to be accessed randomly, see ``Examples''
65 (section~\ref{tar-examples}). The currently possible modes:
66
67 \begin{tableii}{c|l}{code}{Mode}{Action}
68 \lineii{'r|*'}{Open a \emph{stream} of tar blocks for reading with transparent compression.}
69 \lineii{'r|'}{Open a \emph{stream} of uncompressed tar blocks for reading.}
70 \lineii{'r|gz'}{Open a gzip compressed \emph{stream} for reading.}
71 \lineii{'r|bz2'}{Open a bzip2 compressed \emph{stream} for reading.}
72 \lineii{'w|'}{Open an uncompressed \emph{stream} for writing.}
73 \lineii{'w|gz'}{Open an gzip compressed \emph{stream} for writing.}
74 \lineii{'w|bz2'}{Open an bzip2 compressed \emph{stream} for writing.}
75 \end{tableii}
76\end{funcdesc}
77
78\begin{classdesc*}{TarFile}
79 Class for reading and writing tar archives. Do not use this
80 class directly, better use \function{open()} instead.
81 See ``TarFile Objects'' (section~\ref{tarfile-objects}).
82\end{classdesc*}
83
84\begin{funcdesc}{is_tarfile}{name}
85 Return \constant{True} if \var{name} is a tar archive file, that
86 the \module{tarfile} module can read.
87\end{funcdesc}
88
89\begin{classdesc}{TarFileCompat}{filename\optional{, mode\optional{,
90 compression}}}
91 Class for limited access to tar archives with a
92 \refmodule{zipfile}-like interface. Please consult the
93 documentation of the \refmodule{zipfile} module for more details.
94 \var{compression} must be one of the following constants:
95 \begin{datadesc}{TAR_PLAIN}
96 Constant for an uncompressed tar archive.
97 \end{datadesc}
98 \begin{datadesc}{TAR_GZIPPED}
99 Constant for a \refmodule{gzip} compressed tar archive.
100 \end{datadesc}
101\end{classdesc}
102
103\begin{excdesc}{TarError}
104 Base class for all \module{tarfile} exceptions.
105\end{excdesc}
106
107\begin{excdesc}{ReadError}
108 Is raised when a tar archive is opened, that either cannot be handled by
109 the \module{tarfile} module or is somehow invalid.
110\end{excdesc}
111
112\begin{excdesc}{CompressionError}
113 Is raised when a compression method is not supported or when the data
114 cannot be decoded properly.
115\end{excdesc}
116
117\begin{excdesc}{StreamError}
118 Is raised for the limitations that are typical for stream-like
119 \class{TarFile} objects.
120\end{excdesc}
121
122\begin{excdesc}{ExtractError}
123 Is raised for \emph{non-fatal} errors when using \method{extract()}, but
124 only if \member{TarFile.errorlevel}\code{ == 2}.
125\end{excdesc}
126
127\begin{seealso}
128 \seemodule{zipfile}{Documentation of the \refmodule{zipfile}
129 standard module.}
130
131 \seetitle[http://www.gnu.org/software/tar/manual/html_node/tar_134.html\#SEC134]
132 {GNU tar manual, Basic Tar Format}{Documentation for tar archive files,
133 including GNU tar extensions.}
134\end{seealso}
135
136%-----------------
137% TarFile Objects
138%-----------------
139
140\subsection{TarFile Objects \label{tarfile-objects}}
141
142The \class{TarFile} object provides an interface to a tar archive. A tar
143archive is a sequence of blocks. An archive member (a stored file) is made up
144of a header block followed by data blocks. It is possible, to store a file in a
145tar archive several times. Each archive member is represented by a
146\class{TarInfo} object, see \citetitle{TarInfo Objects} (section
147\ref{tarinfo-objects}) for details.
148
149\begin{classdesc}{TarFile}{\optional{name
150 \optional{, mode\optional{, fileobj}}}}
151 Open an \emph{(uncompressed)} tar archive \var{name}.
152 \var{mode} is either \code{'r'} to read from an existing archive,
153 \code{'a'} to append data to an existing file or \code{'w'} to create a new
154 file overwriting an existing one. \var{mode} defaults to \code{'r'}.
155
156 If \var{fileobj} is given, it is used for reading or writing data.
157 If it can be determined, \var{mode} is overridden by \var{fileobj}'s mode.
158 \begin{notice}
159 \var{fileobj} is not closed, when \class{TarFile} is closed.
160 \end{notice}
161\end{classdesc}
162
163\begin{methoddesc}{open}{...}
164 Alternative constructor. The \function{open()} function on module level is
165 actually a shortcut to this classmethod. See section~\ref{module-tarfile}
166 for details.
167\end{methoddesc}
168
169\begin{methoddesc}{getmember}{name}
170 Return a \class{TarInfo} object for member \var{name}. If \var{name} can
171 not be found in the archive, \exception{KeyError} is raised.
172 \begin{notice}
173 If a member occurs more than once in the archive, its last
174 occurrence is assumed to be the most up-to-date version.
175 \end{notice}
176\end{methoddesc}
177
178\begin{methoddesc}{getmembers}{}
179 Return the members of the archive as a list of \class{TarInfo} objects.
180 The list has the same order as the members in the archive.
181\end{methoddesc}
182
183\begin{methoddesc}{getnames}{}
184 Return the members as a list of their names. It has the same order as
185 the list returned by \method{getmembers()}.
186\end{methoddesc}
187
188\begin{methoddesc}{list}{verbose=True}
189 Print a table of contents to \code{sys.stdout}. If \var{verbose} is
190 \constant{False}, only the names of the members are printed. If it is
191 \constant{True}, output similar to that of \program{ls -l} is produced.
192\end{methoddesc}
193
194\begin{methoddesc}{next}{}
195 Return the next member of the archive as a \class{TarInfo} object, when
196 \class{TarFile} is opened for reading. Return \code{None} if there is no
197 more available.
198\end{methoddesc}
199
200\begin{methoddesc}{extractall}{\optional{path\optional{, members}}}
201 Extract all members from the archive to the current working directory
202 or directory \var{path}. If optional \var{members} is given, it must be
203 a subset of the list returned by \method{getmembers()}.
204 Directory informations like owner, modification time and permissions are
205 set after all members have been extracted. This is done to work around two
206 problems: A directory's modification time is reset each time a file is
207 created in it. And, if a directory's permissions do not allow writing,
208 extracting files to it will fail.
209 \versionadded{2.5}
210\end{methoddesc}
211
212\begin{methoddesc}{extract}{member\optional{, path}}
213 Extract a member from the archive to the current working directory,
214 using its full name. Its file information is extracted as accurately as
215 possible.
216 \var{member} may be a filename or a \class{TarInfo} object.
217 You can specify a different directory using \var{path}.
218 \begin{notice}
219 Because the \method{extract()} method allows random access to a tar
220 archive there are some issues you must take care of yourself. See the
221 description for \method{extractall()} above.
222 \end{notice}
223\end{methoddesc}
224
225\begin{methoddesc}{extractfile}{member}
226 Extract a member from the archive as a file object.
227 \var{member} may be a filename or a \class{TarInfo} object.
228 If \var{member} is a regular file, a file-like object is returned.
229 If \var{member} is a link, a file-like object is constructed from the
230 link's target.
231 If \var{member} is none of the above, \code{None} is returned.
232 \begin{notice}
233 The file-like object is read-only and provides the following methods:
234 \method{read()}, \method{readline()}, \method{readlines()},
235 \method{seek()}, \method{tell()}.
236 \end{notice}
237\end{methoddesc}
238
239\begin{methoddesc}{add}{name\optional{, arcname\optional{, recursive}}}
240 Add the file \var{name} to the archive. \var{name} may be any type
241 of file (directory, fifo, symbolic link, etc.).
242 If given, \var{arcname} specifies an alternative name for the file in the
243 archive. Directories are added recursively by default.
244 This can be avoided by setting \var{recursive} to \constant{False};
245 the default is \constant{True}.
246\end{methoddesc}
247
248\begin{methoddesc}{addfile}{tarinfo\optional{, fileobj}}
249 Add the \class{TarInfo} object \var{tarinfo} to the archive.
250 If \var{fileobj} is given, \code{\var{tarinfo}.size} bytes are read
251 from it and added to the archive. You can create \class{TarInfo} objects
252 using \method{gettarinfo()}.
253 \begin{notice}
254 On Windows platforms, \var{fileobj} should always be opened with mode
255 \code{'rb'} to avoid irritation about the file size.
256 \end{notice}
257\end{methoddesc}
258
259\begin{methoddesc}{gettarinfo}{\optional{name\optional{,
260 arcname\optional{, fileobj}}}}
261 Create a \class{TarInfo} object for either the file \var{name} or
262 the file object \var{fileobj} (using \function{os.fstat()} on its
263 file descriptor). You can modify some of the \class{TarInfo}'s
264 attributes before you add it using \method{addfile()}. If given,
265 \var{arcname} specifies an alternative name for the file in the
266 archive.
267\end{methoddesc}
268
269\begin{methoddesc}{close}{}
270 Close the \class{TarFile}. In write mode, two finishing zero
271 blocks are appended to the archive.
272\end{methoddesc}
273
274\begin{memberdesc}{posix}
275 If true, create a \POSIX{} 1003.1-1990 compliant archive. GNU
276 extensions are not used, because they are not part of the \POSIX{}
277 standard. This limits the length of filenames to at most 256,
278 link names to 100 characters and the maximum file size to 8
279 gigabytes. A \exception{ValueError} is raised if a file exceeds
280 this limit. If false, create a GNU tar compatible archive. It
281 will not be \POSIX{} compliant, but can store files without any
282 of the above restrictions.
283 \versionchanged[\var{posix} defaults to \constant{False}]{2.4}
284\end{memberdesc}
285
286\begin{memberdesc}{dereference}
287 If false, add symbolic and hard links to archive. If true, add the
288 content of the target files to the archive. This has no effect on
289 systems that do not support symbolic links.
290\end{memberdesc}
291
292\begin{memberdesc}{ignore_zeros}
293 If false, treat an empty block as the end of the archive. If true,
294 skip empty (and invalid) blocks and try to get as many members as
295 possible. This is only useful for concatenated or damaged
296 archives.
297\end{memberdesc}
298
299\begin{memberdesc}{debug=0}
300 To be set from \code{0} (no debug messages; the default) up to
301 \code{3} (all debug messages). The messages are written to
302 \code{sys.stderr}.
303\end{memberdesc}
304
305\begin{memberdesc}{errorlevel}
306 If \code{0} (the default), all errors are ignored when using
307 \method{extract()}. Nevertheless, they appear as error messages
308 in the debug output, when debugging is enabled. If \code{1}, all
309 \emph{fatal} errors are raised as \exception{OSError} or
310 \exception{IOError} exceptions. If \code{2}, all \emph{non-fatal}
311 errors are raised as \exception{TarError} exceptions as well.
312\end{memberdesc}
313
314%-----------------
315% TarInfo Objects
316%-----------------
317
318\subsection{TarInfo Objects \label{tarinfo-objects}}
319
320A \class{TarInfo} object represents one member in a
321\class{TarFile}. Aside from storing all required attributes of a file
322(like file type, size, time, permissions, owner etc.), it provides
323some useful methods to determine its type. It does \emph{not} contain
324the file's data itself.
325
326\class{TarInfo} objects are returned by \class{TarFile}'s methods
327\method{getmember()}, \method{getmembers()} and \method{gettarinfo()}.
328
329\begin{classdesc}{TarInfo}{\optional{name}}
330 Create a \class{TarInfo} object.
331\end{classdesc}
332
333\begin{methoddesc}{frombuf}{}
334 Create and return a \class{TarInfo} object from a string buffer.
335\end{methoddesc}
336
337\begin{methoddesc}{tobuf}{posix}
338 Create a string buffer from a \class{TarInfo} object.
339 See \class{TarFile}'s \member{posix} attribute for information
340 on the \var{posix} argument. It defaults to \constant{False}.
341
342 \versionadded[The \var{posix} parameter]{2.5}
343\end{methoddesc}
344
345A \code{TarInfo} object has the following public data attributes:
346
347\begin{memberdesc}{name}
348 Name of the archive member.
349\end{memberdesc}
350
351\begin{memberdesc}{size}
352 Size in bytes.
353\end{memberdesc}
354
355\begin{memberdesc}{mtime}
356 Time of last modification.
357\end{memberdesc}
358
359\begin{memberdesc}{mode}
360 Permission bits.
361\end{memberdesc}
362
363\begin{memberdesc}{type}
364 File type. \var{type} is usually one of these constants:
365 \constant{REGTYPE}, \constant{AREGTYPE}, \constant{LNKTYPE},
366 \constant{SYMTYPE}, \constant{DIRTYPE}, \constant{FIFOTYPE},
367 \constant{CONTTYPE}, \constant{CHRTYPE}, \constant{BLKTYPE},
368 \constant{GNUTYPE_SPARSE}. To determine the type of a
369 \class{TarInfo} object more conveniently, use the \code{is_*()}
370 methods below.
371\end{memberdesc}
372
373\begin{memberdesc}{linkname}
374 Name of the target file name, which is only present in
375 \class{TarInfo} objects of type \constant{LNKTYPE} and
376 \constant{SYMTYPE}.
377\end{memberdesc}
378
379\begin{memberdesc}{uid}
380 User ID of the user who originally stored this member.
381\end{memberdesc}
382
383\begin{memberdesc}{gid}
384 Group ID of the user who originally stored this member.
385\end{memberdesc}
386
387\begin{memberdesc}{uname}
388 User name.
389\end{memberdesc}
390
391\begin{memberdesc}{gname}
392 Group name.
393\end{memberdesc}
394
395A \class{TarInfo} object also provides some convenient query methods:
396
397\begin{methoddesc}{isfile}{}
398 Return \constant{True} if the \class{Tarinfo} object is a regular
399 file.
400\end{methoddesc}
401
402\begin{methoddesc}{isreg}{}
403 Same as \method{isfile()}.
404\end{methoddesc}
405
406\begin{methoddesc}{isdir}{}
407 Return \constant{True} if it is a directory.
408\end{methoddesc}
409
410\begin{methoddesc}{issym}{}
411 Return \constant{True} if it is a symbolic link.
412\end{methoddesc}
413
414\begin{methoddesc}{islnk}{}
415 Return \constant{True} if it is a hard link.
416\end{methoddesc}
417
418\begin{methoddesc}{ischr}{}
419 Return \constant{True} if it is a character device.
420\end{methoddesc}
421
422\begin{methoddesc}{isblk}{}
423 Return \constant{True} if it is a block device.
424\end{methoddesc}
425
426\begin{methoddesc}{isfifo}{}
427 Return \constant{True} if it is a FIFO.
428\end{methoddesc}
429
430\begin{methoddesc}{isdev}{}
431 Return \constant{True} if it is one of character device, block
432 device or FIFO.
433\end{methoddesc}
434
435%------------------------
436% Examples
437%------------------------
438
439\subsection{Examples \label{tar-examples}}
440
441How to extract an entire tar archive to the current working directory:
442\begin{verbatim}
443import tarfile
444tar = tarfile.open("sample.tar.gz")
445tar.extractall()
446tar.close()
447\end{verbatim}
448
449How to create an uncompressed tar archive from a list of filenames:
450\begin{verbatim}
451import tarfile
452tar = tarfile.open("sample.tar", "w")
453for name in ["foo", "bar", "quux"]:
454 tar.add(name)
455tar.close()
456\end{verbatim}
457
458How to read a gzip compressed tar archive and display some member information:
459\begin{verbatim}
460import tarfile
461tar = tarfile.open("sample.tar.gz", "r:gz")
462for tarinfo in tar:
463 print tarinfo.name, "is", tarinfo.size, "bytes in size and is",
464 if tarinfo.isreg():
465 print "a regular file."
466 elif tarinfo.isdir():
467 print "a directory."
468 else:
469 print "something else."
470tar.close()
471\end{verbatim}
472
473How to create a tar archive with faked information:
474\begin{verbatim}
475import tarfile
476tar = tarfile.open("sample.tar.gz", "w:gz")
477for name in namelist:
478 tarinfo = tar.gettarinfo(name, "fakeproj-1.0/" + name)
479 tarinfo.uid = 123
480 tarinfo.gid = 456
481 tarinfo.uname = "johndoe"
482 tarinfo.gname = "fake"
483 tar.addfile(tarinfo, file(name))
484tar.close()
485\end{verbatim}
486
487The \emph{only} way to extract an uncompressed tar stream from
488\code{sys.stdin}:
489\begin{verbatim}
490import sys
491import tarfile
492tar = tarfile.open(mode="r|", fileobj=sys.stdin)
493for tarinfo in tar:
494 tar.extract(tarinfo)
495tar.close()
496\end{verbatim}
Note: See TracBrowser for help on using the repository browser.