Context Navigation

pickle.py@ 606

Last change on this file since 606 was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 44.1 KB

Line
1	"""Create portable serialized representations of Python objects.
2
3	See module cPickle for a (much) faster implementation.
4	See module copy_reg for a mechanism for registering custom picklers.
5	See module pickletools source for extensive comments.
6
7	Classes:
8
9	Pickler
10	Unpickler
11
12	Functions:
13
14	dump(object, file)
15	dumps(object) -> string
16	load(file) -> object
17	loads(string) -> object
18
19	Misc variables:
20
21	__version__
22	format_version
23	compatible_formats
24
25	"""
26
27	__version__ = "$Revision: 72223 $" # Code version
28
29	from types import *
30	from copy_reg import dispatch_table
31	from copy_reg import _extension_registry, _inverted_registry, _extension_cache
32	import marshal
33	import sys
34	import struct
35	import re
36
37	__all__ = ["PickleError", "PicklingError", "UnpicklingError", "Pickler",
38	"Unpickler", "dump", "dumps", "load", "loads"]
39
40	# These are purely informational; no code uses these.
41	format_version = "2.0" # File format version we write
42	compatible_formats = ["1.0", # Original protocol 0
43	"1.1", # Protocol 0 with INST added
44	"1.2", # Original protocol 1
45	"1.3", # Protocol 1 with BINFLOAT added
46	"2.0", # Protocol 2
47	] # Old format versions we can read
48
49	# Keep in synch with cPickle. This is the highest protocol number we
50	# know how to read.
51	HIGHEST_PROTOCOL = 2
52
53	# Why use struct.pack() for pickling but marshal.loads() for
54	# unpickling? struct.pack() is 40% faster than marshal.dumps(), but
55	# marshal.loads() is twice as fast as struct.unpack()!
56	mloads = marshal.loads
57
58	class PickleError(Exception):
59	"""A common base class for the other pickling exceptions."""
60	pass
61
62	class PicklingError(PickleError):
63	"""This exception is raised when an unpicklable object is passed to the
64	dump() method.
65
66	"""
67	pass
68
69	class UnpicklingError(PickleError):
70	"""This exception is raised when there is a problem unpickling an object,
71	such as a security violation.
72
73	Note that other exceptions may also be raised during unpickling, including
74	(but not necessarily limited to) AttributeError, EOFError, ImportError,
75	and IndexError.
76
77	"""
78	pass
79
80	# An instance of _Stop is raised by Unpickler.load_stop() in response to
81	# the STOP opcode, passing the object that is the result of unpickling.
82	class _Stop(Exception):
83	def __init__(self, value):
84	self.value = value
85
86	# Jython has PyStringMap; it's a dict subclass with string keys
87	try:
88	from org.python.core import PyStringMap
89	except ImportError:
90	PyStringMap = None
91
92	# UnicodeType may or may not be exported (normally imported from types)
93	try:
94	UnicodeType
95	except NameError:
96	UnicodeType = None
97
98	# Pickle opcodes. See pickletools.py for extensive docs. The listing
99	# here is in kind-of alphabetical order of 1-character pickle code.
100	# pickletools groups them by purpose.
101
102	MARK = '(' # push special markobject on stack
103	STOP = '.' # every pickle ends with STOP
104	POP = '0' # discard topmost stack item
105	POP_MARK = '1' # discard stack top through topmost markobject
106	DUP = '2' # duplicate top stack item
107	FLOAT = 'F' # push float object; decimal string argument
108	INT = 'I' # push integer or bool; decimal string argument
109	BININT = 'J' # push four-byte signed int
110	BININT1 = 'K' # push 1-byte unsigned int
111	LONG = 'L' # push long; decimal string argument
112	BININT2 = 'M' # push 2-byte unsigned int
113	NONE = 'N' # push None
114	PERSID = 'P' # push persistent object; id is taken from string arg
115	BINPERSID = 'Q' # " " " ; " " " " stack
116	REDUCE = 'R' # apply callable to argtuple, both on stack
117	STRING = 'S' # push string; NL-terminated string argument
118	BINSTRING = 'T' # push string; counted binary string argument
119	SHORT_BINSTRING = 'U' # " " ; " " " " < 256 bytes
120	UNICODE = 'V' # push Unicode string; raw-unicode-escaped'd argument
121	BINUNICODE = 'X' # " " " ; counted UTF-8 string argument
122	APPEND = 'a' # append stack top to list below it
123	BUILD = 'b' # call __setstate__ or __dict__.update()
124	GLOBAL = 'c' # push self.find_class(modname, name); 2 string args
125	DICT = 'd' # build a dict from stack items
126	EMPTY_DICT = '}' # push empty dict
127	APPENDS = 'e' # extend list on stack by topmost stack slice
128	GET = 'g' # push item from memo on stack; index is string arg
129	BINGET = 'h' # " " " " " " ; " " 1-byte arg
130	INST = 'i' # build & push class instance
131	LONG_BINGET = 'j' # push item from memo on stack; index is 4-byte arg
132	LIST = 'l' # build list from topmost stack items
133	EMPTY_LIST = ']' # push empty list
134	OBJ = 'o' # build & push class instance
135	PUT = 'p' # store stack top in memo; index is string arg
136	BINPUT = 'q' # " " " " " ; " " 1-byte arg
137	LONG_BINPUT = 'r' # " " " " " ; " " 4-byte arg
138	SETITEM = 's' # add key+value pair to dict
139	TUPLE = 't' # build tuple from topmost stack items
140	EMPTY_TUPLE = ')' # push empty tuple
141	SETITEMS = 'u' # modify dict by adding topmost key+value pairs
142	BINFLOAT = 'G' # push float; arg is 8-byte float encoding
143
144	TRUE = 'I01\n' # not an opcode; see INT docs in pickletools.py
145	FALSE = 'I00\n' # not an opcode; see INT docs in pickletools.py
146
147	# Protocol 2
148
149	PROTO = '\x80' # identify pickle protocol
150	NEWOBJ = '\x81' # build object by applying cls.__new__ to argtuple
151	EXT1 = '\x82' # push object from extension registry; 1-byte index
152	EXT2 = '\x83' # ditto, but 2-byte index
153	EXT4 = '\x84' # ditto, but 4-byte index
154	TUPLE1 = '\x85' # build 1-tuple from stack top
155	TUPLE2 = '\x86' # build 2-tuple from two topmost stack items
156	TUPLE3 = '\x87' # build 3-tuple from three topmost stack items
157	NEWTRUE = '\x88' # push True
158	NEWFALSE = '\x89' # push False
159	LONG1 = '\x8a' # push long from < 256 bytes
160	LONG4 = '\x8b' # push really big long
161
162	_tuplesize2code = [EMPTY_TUPLE, TUPLE1, TUPLE2, TUPLE3]
163
164
165	__all__.extend([x for x in dir() if re.match("[A-Z][A-Z0-9_]+$",x)])
166	del x
167
168
169	# Pickling machinery
170
171	class Pickler:
172
173	def __init__(self, file, protocol=None):
174	"""This takes a file-like object for writing a pickle data stream.
175
176	The optional protocol argument tells the pickler to use the
177	given protocol; supported protocols are 0, 1, 2. The default
178	protocol is 0, to be backwards compatible. (Protocol 0 is the
179	only protocol that can be written to a file opened in text
180	mode and read back successfully. When using a protocol higher
181	than 0, make sure the file is opened in binary mode, both when
182	pickling and unpickling.)
183
184	Protocol 1 is more efficient than protocol 0; protocol 2 is
185	more efficient than protocol 1.
186
187	Specifying a negative protocol version selects the highest
188	protocol version supported. The higher the protocol used, the
189	more recent the version of Python needed to read the pickle
190	produced.
191
192	The file parameter must have a write() method that accepts a single
193	string argument. It can thus be an open file object, a StringIO
194	object, or any other custom object that meets this interface.
195
196	"""
197	if protocol is None:
198	protocol = 0
199	if protocol < 0:
200	protocol = HIGHEST_PROTOCOL
201	elif not 0 <= protocol <= HIGHEST_PROTOCOL:
202	raise ValueError("pickle protocol must be <= %d" % HIGHEST_PROTOCOL)
203	self.write = file.write
204	self.memo = {}
205	self.proto = int(protocol)
206	self.bin = protocol >= 1
207	self.fast = 0
208
209	def clear_memo(self):
210	"""Clears the pickler's "memo".
211
212	The memo is the data structure that remembers which objects the
213	pickler has already seen, so that shared or recursive objects are
214	pickled by reference and not by value. This method is useful when
215	re-using picklers.
216
217	"""
218	self.memo.clear()
219
220	def dump(self, obj):
221	"""Write a pickled representation of obj to the open file."""
222	if self.proto >= 2:
223	self.write(PROTO + chr(self.proto))
224	self.save(obj)
225	self.write(STOP)
226
227	def memoize(self, obj):
228	"""Store an object in the memo."""
229
230	# The Pickler memo is a dictionary mapping object ids to 2-tuples
231	# that contain the Unpickler memo key and the object being memoized.
232	# The memo key is written to the pickle and will become
233	# the key in the Unpickler's memo. The object is stored in the
234	# Pickler memo so that transient objects are kept alive during
235	# pickling.
236
237	# The use of the Unpickler memo length as the memo key is just a
238	# convention. The only requirement is that the memo values be unique.
239	# But there appears no advantage to any other scheme, and this
240	# scheme allows the Unpickler memo to be implemented as a plain (but
241	# growable) array, indexed by memo key.
242	if self.fast:
243	return
244	assert id(obj) not in self.memo
245	memo_len = len(self.memo)
246	self.write(self.put(memo_len))
247	self.memo[id(obj)] = memo_len, obj
248
249	# Return a PUT (BINPUT, LONG_BINPUT) opcode string, with argument i.
250	def put(self, i, pack=struct.pack):
251	if self.bin:
252	if i < 256:
253	return BINPUT + chr(i)
254	else:
255	return LONG_BINPUT + pack("<i", i)
256
257	return PUT + repr(i) + '\n'
258
259	# Return a GET (BINGET, LONG_BINGET) opcode string, with argument i.
260	def get(self, i, pack=struct.pack):
261	if self.bin:
262	if i < 256:
263	return BINGET + chr(i)
264	else:
265	return LONG_BINGET + pack("<i", i)
266
267	return GET + repr(i) + '\n'
268
269	def save(self, obj):
270	# Check for persistent id (defined by a subclass)
271	pid = self.persistent_id(obj)
272	if pid:
273	self.save_pers(pid)
274	return
275
276	# Check the memo
277	x = self.memo.get(id(obj))
278	if x:
279	self.write(self.get(x[0]))
280	return
281
282	# Check the type dispatch table
283	t = type(obj)
284	f = self.dispatch.get(t)
285	if f:
286	f(self, obj) # Call unbound method with explicit self
287	return
288
289	# Check copy_reg.dispatch_table
290	reduce = dispatch_table.get(t)
291	if reduce:
292	rv = reduce(obj)
293	else:
294	# Check for a class with a custom metaclass; treat as regular class
295	try:
296	issc = issubclass(t, TypeType)
297	except TypeError: # t is not a class (old Boost; see SF #502085)
298	issc = 0
299	if issc:
300	self.save_global(obj)
301	return
302
303	# Check for a __reduce_ex__ method, fall back to __reduce__
304	reduce = getattr(obj, "__reduce_ex__", None)
305	if reduce:
306	rv = reduce(self.proto)
307	else:
308	reduce = getattr(obj, "__reduce__", None)
309	if reduce:
310	rv = reduce()
311	else:
312	raise PicklingError("Can't pickle %r object: %r" %
313	(t.__name__, obj))
314
315	# Check for string returned by reduce(), meaning "save as global"
316	if type(rv) is StringType:
317	self.save_global(obj, rv)
318	return
319
320	# Assert that reduce() returned a tuple
321	if type(rv) is not TupleType:
322	raise PicklingError("%s must return string or tuple" % reduce)
323
324	# Assert that it returned an appropriately sized tuple
325	l = len(rv)
326	if not (2 <= l <= 5):
327	raise PicklingError("Tuple returned by %s must have "
328	"two to five elements" % reduce)
329
330	# Save the reduce() output and finally memoize the object
331	self.save_reduce(obj=obj, *rv)
332
333	def persistent_id(self, obj):
334	# This exists so a subclass can override it
335	return None
336
337	def save_pers(self, pid):
338	# Save a persistent id reference
339	if self.bin:
340	self.save(pid)
341	self.write(BINPERSID)
342	else:
343	self.write(PERSID + str(pid) + '\n')
344
345	def save_reduce(self, func, args, state=None,
346	listitems=None, dictitems=None, obj=None):
347	# This API is called by some subclasses
348
349	# Assert that args is a tuple or None
350	if not isinstance(args, TupleType):
351	raise PicklingError("args from reduce() should be a tuple")
352
353	# Assert that func is callable
354	if not hasattr(func, '__call__'):
355	raise PicklingError("func from reduce should be callable")
356
357	save = self.save
358	write = self.write
359
360	# Protocol 2 special case: if func's name is __newobj__, use NEWOBJ
361	if self.proto >= 2 and getattr(func, "__name__", "") == "__newobj__":
362	# A __reduce__ implementation can direct protocol 2 to
363	# use the more efficient NEWOBJ opcode, while still
364	# allowing protocol 0 and 1 to work normally. For this to
365	# work, the function returned by __reduce__ should be
366	# called __newobj__, and its first argument should be a
367	# new-style class. The implementation for __newobj__
368	# should be as follows, although pickle has no way to
369	# verify this:
370	#
371	# def __newobj__(cls, *args):
372	# return cls.__new__(cls, *args)
373	#
374	# Protocols 0 and 1 will pickle a reference to __newobj__,
375	# while protocol 2 (and above) will pickle a reference to
376	# cls, the remaining args tuple, and the NEWOBJ code,
377	# which calls cls.__new__(cls, *args) at unpickling time
378	# (see load_newobj below). If __reduce__ returns a
379	# three-tuple, the state from the third tuple item will be
380	# pickled regardless of the protocol, calling __setstate__
381	# at unpickling time (see load_build below).
382	#
383	# Note that no standard __newobj__ implementation exists;
384	# you have to provide your own. This is to enforce
385	# compatibility with Python 2.2 (pickles written using
386	# protocol 0 or 1 in Python 2.3 should be unpicklable by
387	# Python 2.2).
388	cls = args[0]
389	if not hasattr(cls, "__new__"):
390	raise PicklingError(
391	"args[0] from __newobj__ args has no __new__")
392	if obj is not None and cls is not obj.__class__:
393	raise PicklingError(
394	"args[0] from __newobj__ args has the wrong class")
395	args = args[1:]
396	save(cls)
397	save(args)
398	write(NEWOBJ)
399	else:
400	save(func)
401	save(args)
402	write(REDUCE)
403
404	if obj is not None:
405	self.memoize(obj)
406
407	# More new special cases (that work with older protocols as
408	# well): when __reduce__ returns a tuple with 4 or 5 items,
409	# the 4th and 5th item should be iterators that provide list
410	# items and dict items (as (key, value) tuples), or None.
411
412	if listitems is not None:
413	self._batch_appends(listitems)
414
415	if dictitems is not None:
416	self._batch_setitems(dictitems)
417
418	if state is not None:
419	save(state)
420	write(BUILD)
421
422	# Methods below this point are dispatched through the dispatch table
423
424	dispatch = {}
425
426	def save_none(self, obj):
427	self.write(NONE)
428	dispatch[NoneType] = save_none
429
430	def save_bool(self, obj):
431	if self.proto >= 2:
432	self.write(obj and NEWTRUE or NEWFALSE)
433	else:
434	self.write(obj and TRUE or FALSE)
435	dispatch[bool] = save_bool
436
437	def save_int(self, obj, pack=struct.pack):
438	if self.bin:
439	# If the int is small enough to fit in a signed 4-byte 2's-comp
440	# format, we can store it more efficiently than the general
441	# case.
442	# First one- and two-byte unsigned ints:
443	if obj >= 0:
444	if obj <= 0xff:
445	self.write(BININT1 + chr(obj))
446	return
447	if obj <= 0xffff:
448	self.write("%c%c%c" % (BININT2, obj&0xff, obj>>8))
449	return
450	# Next check for 4-byte signed ints:
451	high_bits = obj >> 31 # note that Python shift sign-extends
452	if high_bits == 0 or high_bits == -1:
453	# All high bits are copies of bit 2**31, so the value
454	# fits in a 4-byte signed int.
455	self.write(BININT + pack("<i", obj))
456	return
457	# Text pickle, or int too big to fit in signed 4-byte format.
458	self.write(INT + repr(obj) + '\n')
459	dispatch[IntType] = save_int
460
461	def save_long(self, obj, pack=struct.pack):
462	if self.proto >= 2:
463	bytes = encode_long(obj)
464	n = len(bytes)
465	if n < 256:
466	self.write(LONG1 + chr(n) + bytes)
467	else:
468	self.write(LONG4 + pack("<i", n) + bytes)
469	return
470	self.write(LONG + repr(obj) + '\n')
471	dispatch[LongType] = save_long
472
473	def save_float(self, obj, pack=struct.pack):
474	if self.bin:
475	self.write(BINFLOAT + pack('>d', obj))
476	else:
477	self.write(FLOAT + repr(obj) + '\n')
478	dispatch[FloatType] = save_float
479
480	def save_string(self, obj, pack=struct.pack):
481	if self.bin:
482	n = len(obj)
483	if n < 256:
484	self.write(SHORT_BINSTRING + chr(n) + obj)
485	else:
486	self.write(BINSTRING + pack("<i", n) + obj)
487	else:
488	self.write(STRING + repr(obj) + '\n')
489	self.memoize(obj)
490	dispatch[StringType] = save_string
491
492	def save_unicode(self, obj, pack=struct.pack):
493	if self.bin:
494	encoding = obj.encode('utf-8')
495	n = len(encoding)
496	self.write(BINUNICODE + pack("<i", n) + encoding)
497	else:
498	obj = obj.replace("\\", "\\u005c")
499	obj = obj.replace("\n", "\\u000a")
500	self.write(UNICODE + obj.encode('raw-unicode-escape') + '\n')
501	self.memoize(obj)
502	dispatch[UnicodeType] = save_unicode
503
504	if StringType is UnicodeType:
505	# This is true for Jython
506	def save_string(self, obj, pack=struct.pack):
507	unicode = obj.isunicode()
508
509	if self.bin:
510	if unicode:
511	obj = obj.encode("utf-8")
512	l = len(obj)
513	if l < 256 and not unicode:
514	self.write(SHORT_BINSTRING + chr(l) + obj)
515	else:
516	s = pack("<i", l)
517	if unicode:
518	self.write(BINUNICODE + s + obj)
519	else:
520	self.write(BINSTRING + s + obj)
521	else:
522	if unicode:
523	obj = obj.replace("\\", "\\u005c")
524	obj = obj.replace("\n", "\\u000a")
525	obj = obj.encode('raw-unicode-escape')
526	self.write(UNICODE + obj + '\n')
527	else:
528	self.write(STRING + repr(obj) + '\n')
529	self.memoize(obj)
530	dispatch[StringType] = save_string
531
532	def save_tuple(self, obj):
533	write = self.write
534	proto = self.proto
535
536	n = len(obj)
537	if n == 0:
538	if proto:
539	write(EMPTY_TUPLE)
540	else:
541	write(MARK + TUPLE)
542	return
543
544	save = self.save
545	memo = self.memo
546	if n <= 3 and proto >= 2:
547	for element in obj:
548	save(element)
549	# Subtle. Same as in the big comment below.
550	if id(obj) in memo:
551	get = self.get(memo[id(obj)][0])
552	write(POP * n + get)
553	else:
554	write(_tuplesize2code[n])
555	self.memoize(obj)
556	return
557
558	# proto 0 or proto 1 and tuple isn't empty, or proto > 1 and tuple
559	# has more than 3 elements.
560	write(MARK)
561	for element in obj:
562	save(element)
563
564	if id(obj) in memo:
565	# Subtle. d was not in memo when we entered save_tuple(), so
566	# the process of saving the tuple's elements must have saved
567	# the tuple itself: the tuple is recursive. The proper action
568	# now is to throw away everything we put on the stack, and
569	# simply GET the tuple (it's already constructed). This check
570	# could have been done in the "for element" loop instead, but
571	# recursive tuples are a rare thing.
572	get = self.get(memo[id(obj)][0])
573	if proto:
574	write(POP_MARK + get)
575	else: # proto 0 -- POP_MARK not available
576	write(POP * (n+1) + get)
577	return
578
579	# No recursion.
580	self.write(TUPLE)
581	self.memoize(obj)
582
583	dispatch[TupleType] = save_tuple
584
585	# save_empty_tuple() isn't used by anything in Python 2.3. However, I
586	# found a Pickler subclass in Zope3 that calls it, so it's not harmless
587	# to remove it.
588	def save_empty_tuple(self, obj):
589	self.write(EMPTY_TUPLE)
590
591	def save_list(self, obj):
592	write = self.write
593
594	if self.bin:
595	write(EMPTY_LIST)
596	else: # proto 0 -- can't use EMPTY_LIST
597	write(MARK + LIST)
598
599	self.memoize(obj)
600	self._batch_appends(iter(obj))
601
602	dispatch[ListType] = save_list
603
604	# Keep in synch with cPickle's BATCHSIZE. Nothing will break if it gets
605	# out of synch, though.
606	_BATCHSIZE = 1000
607
608	def _batch_appends(self, items):
609	# Helper to batch up APPENDS sequences
610	save = self.save
611	write = self.write
612
613	if not self.bin:
614	for x in items:
615	save(x)
616	write(APPEND)
617	return
618
619	r = xrange(self._BATCHSIZE)
620	while items is not None:
621	tmp = []
622	for i in r:
623	try:
624	x = items.next()
625	tmp.append(x)
626	except StopIteration:
627	items = None
628	break
629	n = len(tmp)
630	if n > 1:
631	write(MARK)
632	for x in tmp:
633	save(x)
634	write(APPENDS)
635	elif n:
636	save(tmp[0])
637	write(APPEND)
638	# else tmp is empty, and we're done
639
640	def save_dict(self, obj):
641	write = self.write
642
643	if self.bin:
644	write(EMPTY_DICT)
645	else: # proto 0 -- can't use EMPTY_DICT
646	write(MARK + DICT)
647
648	self.memoize(obj)
649	self._batch_setitems(obj.iteritems())
650
651	dispatch[DictionaryType] = save_dict
652	if not PyStringMap is None:
653	dispatch[PyStringMap] = save_dict
654
655	def _batch_setitems(self, items):
656	# Helper to batch up SETITEMS sequences; proto >= 1 only
657	save = self.save
658	write = self.write
659
660	if not self.bin:
661	for k, v in items:
662	save(k)
663	save(v)
664	write(SETITEM)
665	return
666
667	r = xrange(self._BATCHSIZE)
668	while items is not None:
669	tmp = []
670	for i in r:
671	try:
672	tmp.append(items.next())
673	except StopIteration:
674	items = None
675	break
676	n = len(tmp)
677	if n > 1:
678	write(MARK)
679	for k, v in tmp:
680	save(k)
681	save(v)
682	write(SETITEMS)
683	elif n:
684	k, v = tmp[0]
685	save(k)
686	save(v)
687	write(SETITEM)
688	# else tmp is empty, and we're done
689
690	def save_inst(self, obj):
691	cls = obj.__class__
692
693	memo = self.memo
694	write = self.write
695	save = self.save
696
697	if hasattr(obj, '__getinitargs__'):
698	args = obj.__getinitargs__()
699	len(args) # XXX Assert it's a sequence
700	_keep_alive(args, memo)
701	else:
702	args = ()
703
704	write(MARK)
705
706	if self.bin:
707	save(cls)
708	for arg in args:
709	save(arg)
710	write(OBJ)
711	else:
712	for arg in args:
713	save(arg)
714	write(INST + cls.__module__ + '\n' + cls.__name__ + '\n')
715
716	self.memoize(obj)
717
718	try:
719	getstate = obj.__getstate__
720	except AttributeError:
721	stuff = obj.__dict__
722	else:
723	stuff = getstate()
724	_keep_alive(stuff, memo)
725	save(stuff)
726	write(BUILD)
727
728	dispatch[InstanceType] = save_inst
729
730	def save_global(self, obj, name=None, pack=struct.pack):
731	write = self.write
732	memo = self.memo
733
734	if name is None:
735	name = obj.__name__
736
737	module = getattr(obj, "__module__", None)
738	if module is None:
739	module = whichmodule(obj, name)
740
741	try:
742	__import__(module)
743	mod = sys.modules[module]
744	klass = getattr(mod, name)
745	except (ImportError, KeyError, AttributeError):
746	raise PicklingError(
747	"Can't pickle %r: it's not found as %s.%s" %
748	(obj, module, name))
749	else:
750	if klass is not obj:
751	raise PicklingError(
752	"Can't pickle %r: it's not the same object as %s.%s" %
753	(obj, module, name))
754
755	if self.proto >= 2:
756	code = _extension_registry.get((module, name))
757	if code:
758	assert code > 0
759	if code <= 0xff:
760	write(EXT1 + chr(code))
761	elif code <= 0xffff:
762	write("%c%c%c" % (EXT2, code&0xff, code>>8))
763	else:
764	write(EXT4 + pack("<i", code))
765	return
766
767	write(GLOBAL + module + '\n' + name + '\n')
768	self.memoize(obj)
769
770	dispatch[ClassType] = save_global
771	dispatch[FunctionType] = save_global
772	dispatch[BuiltinFunctionType] = save_global
773	dispatch[TypeType] = save_global
774
775	# Pickling helpers
776
777	def _keep_alive(x, memo):
778	"""Keeps a reference to the object x in the memo.
779
780	Because we remember objects by their id, we have
781	to assure that possibly temporary objects are kept
782	alive by referencing them.
783	We store a reference at the id of the memo, which should
784	normally not be used unless someone tries to deepcopy
785	the memo itself...
786	"""
787	try:
788	memo[id(memo)].append(x)
789	except KeyError:
790	# aha, this is the first one :-)
791	memo[id(memo)]=[x]
792
793
794	# A cache for whichmodule(), mapping a function object to the name of
795	# the module in which the function was found.
796
797	classmap = {} # called classmap for backwards compatibility
798
799	def whichmodule(func, funcname):
800	"""Figure out the module in which a function occurs.
801
802	Search sys.modules for the module.
803	Cache in classmap.
804	Return a module name.
805	If the function cannot be found, return "__main__".
806	"""
807	# Python functions should always get an __module__ from their globals.
808	mod = getattr(func, "__module__", None)
809	if mod is not None:
810	return mod
811	if func in classmap:
812	return classmap[func]
813
814	for name, module in sys.modules.items():
815	if module is None:
816	continue # skip dummy package entries
817	if name != '__main__' and getattr(module, funcname, None) is func:
818	break
819	else:
820	name = '__main__'
821	classmap[func] = name
822	return name
823
824
825	# Unpickling machinery
826
827	class Unpickler:
828
829	def __init__(self, file):
830	"""This takes a file-like object for reading a pickle data stream.
831
832	The protocol version of the pickle is detected automatically, so no
833	proto argument is needed.
834
835	The file-like object must have two methods, a read() method that
836	takes an integer argument, and a readline() method that requires no
837	arguments. Both methods should return a string. Thus file-like
838	object can be a file object opened for reading, a StringIO object,
839	or any other custom object that meets this interface.
840	"""
841	self.readline = file.readline
842	self.read = file.read
843	self.memo = {}
844
845	def load(self):
846	"""Read a pickled object representation from the open file.
847
848	Return the reconstituted object hierarchy specified in the file.
849	"""
850	self.mark = object() # any new unique object
851	self.stack = []
852	self.append = self.stack.append
853	read = self.read
854	dispatch = self.dispatch
855	try:
856	while 1:
857	key = read(1)
858	dispatch[key](self)
859	except _Stop, stopinst:
860	return stopinst.value
861
862	# Return largest index k such that self.stack[k] is self.mark.
863	# If the stack doesn't contain a mark, eventually raises IndexError.
864	# This could be sped by maintaining another stack, of indices at which
865	# the mark appears. For that matter, the latter stack would suffice,
866	# and we wouldn't need to push mark objects on self.stack at all.
867	# Doing so is probably a good thing, though, since if the pickle is
868	# corrupt (or hostile) we may get a clue from finding self.mark embedded
869	# in unpickled objects.
870	def marker(self):
871	stack = self.stack
872	mark = self.mark
873	k = len(stack)-1
874	while stack[k] is not mark: k = k-1
875	return k
876
877	dispatch = {}
878
879	def load_eof(self):
880	raise EOFError
881	dispatch[''] = load_eof
882
883	def load_proto(self):
884	proto = ord(self.read(1))
885	if not 0 <= proto <= 2:
886	raise ValueError, "unsupported pickle protocol: %d" % proto
887	dispatch[PROTO] = load_proto
888
889	def load_persid(self):
890	pid = self.readline()[:-1]
891	self.append(self.persistent_load(pid))
892	dispatch[PERSID] = load_persid
893
894	def load_binpersid(self):
895	pid = self.stack.pop()
896	self.append(self.persistent_load(pid))
897	dispatch[BINPERSID] = load_binpersid
898
899	def load_none(self):
900	self.append(None)
901	dispatch[NONE] = load_none
902
903	def load_false(self):
904	self.append(False)
905	dispatch[NEWFALSE] = load_false
906
907	def load_true(self):
908	self.append(True)
909	dispatch[NEWTRUE] = load_true
910
911	def load_int(self):
912	data = self.readline()
913	if data == FALSE[1:]:
914	val = False
915	elif data == TRUE[1:]:
916	val = True
917	else:
918	try:
919	val = int(data)
920	except ValueError:
921	val = long(data)
922	self.append(val)
923	dispatch[INT] = load_int
924
925	def load_binint(self):
926	self.append(mloads('i' + self.read(4)))
927	dispatch[BININT] = load_binint
928
929	def load_binint1(self):
930	self.append(ord(self.read(1)))
931	dispatch[BININT1] = load_binint1
932
933	def load_binint2(self):
934	self.append(mloads('i' + self.read(2) + '\000\000'))
935	dispatch[BININT2] = load_binint2
936
937	def load_long(self):
938	self.append(long(self.readline()[:-1], 0))
939	dispatch[LONG] = load_long
940
941	def load_long1(self):
942	n = ord(self.read(1))
943	bytes = self.read(n)
944	self.append(decode_long(bytes))
945	dispatch[LONG1] = load_long1
946
947	def load_long4(self):
948	n = mloads('i' + self.read(4))
949	bytes = self.read(n)
950	self.append(decode_long(bytes))
951	dispatch[LONG4] = load_long4
952
953	def load_float(self):
954	self.append(float(self.readline()[:-1]))
955	dispatch[FLOAT] = load_float
956
957	def load_binfloat(self, unpack=struct.unpack):
958	self.append(unpack('>d', self.read(8))[0])
959	dispatch[BINFLOAT] = load_binfloat
960
961	def load_string(self):
962	rep = self.readline()[:-1]
963	for q in "\"'": # double or single quote
964	if rep.startswith(q):
965	if len(rep) < 2 or not rep.endswith(q):
966	raise ValueError, "insecure string pickle"
967	rep = rep[len(q):-len(q)]
968	break
969	else:
970	raise ValueError, "insecure string pickle"
971	self.append(rep.decode("string-escape"))
972	dispatch[STRING] = load_string
973
974	def load_binstring(self):
975	len = mloads('i' + self.read(4))
976	self.append(self.read(len))
977	dispatch[BINSTRING] = load_binstring
978
979	def load_unicode(self):
980	self.append(unicode(self.readline()[:-1],'raw-unicode-escape'))
981	dispatch[UNICODE] = load_unicode
982
983	def load_binunicode(self):
984	len = mloads('i' + self.read(4))
985	self.append(unicode(self.read(len),'utf-8'))
986	dispatch[BINUNICODE] = load_binunicode
987
988	def load_short_binstring(self):
989	len = ord(self.read(1))
990	self.append(self.read(len))
991	dispatch[SHORT_BINSTRING] = load_short_binstring
992
993	def load_tuple(self):
994	k = self.marker()
995	self.stack[k:] = [tuple(self.stack[k+1:])]
996	dispatch[TUPLE] = load_tuple
997
998	def load_empty_tuple(self):
999	self.stack.append(())
1000	dispatch[EMPTY_TUPLE] = load_empty_tuple
1001
1002	def load_tuple1(self):
1003	self.stack[-1] = (self.stack[-1],)
1004	dispatch[TUPLE1] = load_tuple1
1005
1006	def load_tuple2(self):
1007	self.stack[-2:] = [(self.stack[-2], self.stack[-1])]
1008	dispatch[TUPLE2] = load_tuple2
1009
1010	def load_tuple3(self):
1011	self.stack[-3:] = [(self.stack[-3], self.stack[-2], self.stack[-1])]
1012	dispatch[TUPLE3] = load_tuple3
1013
1014	def load_empty_list(self):
1015	self.stack.append([])
1016	dispatch[EMPTY_LIST] = load_empty_list
1017
1018	def load_empty_dictionary(self):
1019	self.stack.append({})
1020	dispatch[EMPTY_DICT] = load_empty_dictionary
1021
1022	def load_list(self):
1023	k = self.marker()
1024	self.stack[k:] = [self.stack[k+1:]]
1025	dispatch[LIST] = load_list
1026
1027	def load_dict(self):
1028	k = self.marker()
1029	d = {}
1030	items = self.stack[k+1:]
1031	for i in range(0, len(items), 2):
1032	key = items[i]
1033	value = items[i+1]
1034	d[key] = value
1035	self.stack[k:] = [d]
1036	dispatch[DICT] = load_dict
1037
1038	# INST and OBJ differ only in how they get a class object. It's not
1039	# only sensible to do the rest in a common routine, the two routines
1040	# previously diverged and grew different bugs.
1041	# klass is the class to instantiate, and k points to the topmost mark
1042	# object, following which are the arguments for klass.__init__.
1043	def _instantiate(self, klass, k):
1044	args = tuple(self.stack[k+1:])
1045	del self.stack[k:]
1046	instantiated = 0
1047	if (not args and
1048	type(klass) is ClassType and
1049	not hasattr(klass, "__getinitargs__")):
1050	try:
1051	value = _EmptyClass()
1052	value.__class__ = klass
1053	instantiated = 1
1054	except RuntimeError:
1055	# In restricted execution, assignment to inst.__class__ is
1056	# prohibited
1057	pass
1058	if not instantiated:
1059	try:
1060	value = klass(*args)
1061	except TypeError, err:
1062	raise TypeError, "in constructor for %s: %s" % (
1063	klass.__name__, str(err)), sys.exc_info()[2]
1064	self.append(value)
1065
1066	def load_inst(self):
1067	module = self.readline()[:-1]
1068	name = self.readline()[:-1]
1069	klass = self.find_class(module, name)
1070	self._instantiate(klass, self.marker())
1071	dispatch[INST] = load_inst
1072
1073	def load_obj(self):
1074	# Stack is ... markobject classobject arg1 arg2 ...
1075	k = self.marker()
1076	klass = self.stack.pop(k+1)
1077	self._instantiate(klass, k)
1078	dispatch[OBJ] = load_obj
1079
1080	def load_newobj(self):
1081	args = self.stack.pop()
1082	cls = self.stack[-1]
1083	obj = cls.__new__(cls, *args)
1084	self.stack[-1] = obj
1085	dispatch[NEWOBJ] = load_newobj
1086
1087	def load_global(self):
1088	module = self.readline()[:-1]
1089	name = self.readline()[:-1]
1090	klass = self.find_class(module, name)
1091	self.append(klass)
1092	dispatch[GLOBAL] = load_global
1093
1094	def load_ext1(self):
1095	code = ord(self.read(1))
1096	self.get_extension(code)
1097	dispatch[EXT1] = load_ext1
1098
1099	def load_ext2(self):
1100	code = mloads('i' + self.read(2) + '\000\000')
1101	self.get_extension(code)
1102	dispatch[EXT2] = load_ext2
1103
1104	def load_ext4(self):
1105	code = mloads('i' + self.read(4))
1106	self.get_extension(code)
1107	dispatch[EXT4] = load_ext4
1108
1109	def get_extension(self, code):
1110	nil = []
1111	obj = _extension_cache.get(code, nil)
1112	if obj is not nil:
1113	self.append(obj)
1114	return
1115	key = _inverted_registry.get(code)
1116	if not key:
1117	raise ValueError("unregistered extension code %d" % code)
1118	obj = self.find_class(*key)
1119	_extension_cache[code] = obj
1120	self.append(obj)
1121
1122	def find_class(self, module, name):
1123	# Subclasses may override this
1124	__import__(module)
1125	mod = sys.modules[module]
1126	klass = getattr(mod, name)
1127	return klass
1128
1129	def load_reduce(self):
1130	stack = self.stack
1131	args = stack.pop()
1132	func = stack[-1]
1133	value = func(*args)
1134	stack[-1] = value
1135	dispatch[REDUCE] = load_reduce
1136
1137	def load_pop(self):
1138	del self.stack[-1]
1139	dispatch[POP] = load_pop
1140
1141	def load_pop_mark(self):
1142	k = self.marker()
1143	del self.stack[k:]
1144	dispatch[POP_MARK] = load_pop_mark
1145
1146	def load_dup(self):
1147	self.append(self.stack[-1])
1148	dispatch[DUP] = load_dup
1149
1150	def load_get(self):
1151	self.append(self.memo[self.readline()[:-1]])
1152	dispatch[GET] = load_get
1153
1154	def load_binget(self):
1155	i = ord(self.read(1))
1156	self.append(self.memo[repr(i)])
1157	dispatch[BINGET] = load_binget
1158
1159	def load_long_binget(self):
1160	i = mloads('i' + self.read(4))
1161	self.append(self.memo[repr(i)])
1162	dispatch[LONG_BINGET] = load_long_binget
1163
1164	def load_put(self):
1165	self.memo[self.readline()[:-1]] = self.stack[-1]
1166	dispatch[PUT] = load_put
1167
1168	def load_binput(self):
1169	i = ord(self.read(1))
1170	self.memo[repr(i)] = self.stack[-1]
1171	dispatch[BINPUT] = load_binput
1172
1173	def load_long_binput(self):
1174	i = mloads('i' + self.read(4))
1175	self.memo[repr(i)] = self.stack[-1]
1176	dispatch[LONG_BINPUT] = load_long_binput
1177
1178	def load_append(self):
1179	stack = self.stack
1180	value = stack.pop()
1181	list = stack[-1]
1182	list.append(value)
1183	dispatch[APPEND] = load_append
1184
1185	def load_appends(self):
1186	stack = self.stack
1187	mark = self.marker()
1188	list = stack[mark - 1]
1189	list.extend(stack[mark + 1:])
1190	del stack[mark:]
1191	dispatch[APPENDS] = load_appends
1192
1193	def load_setitem(self):
1194	stack = self.stack
1195	value = stack.pop()
1196	key = stack.pop()
1197	dict = stack[-1]
1198	dict[key] = value
1199	dispatch[SETITEM] = load_setitem
1200
1201	def load_setitems(self):
1202	stack = self.stack
1203	mark = self.marker()
1204	dict = stack[mark - 1]
1205	for i in range(mark + 1, len(stack), 2):
1206	dict[stack[i]] = stack[i + 1]
1207
1208	del stack[mark:]
1209	dispatch[SETITEMS] = load_setitems
1210
1211	def load_build(self):
1212	stack = self.stack
1213	state = stack.pop()
1214	inst = stack[-1]
1215	setstate = getattr(inst, "__setstate__", None)
1216	if setstate:
1217	setstate(state)
1218	return
1219	slotstate = None
1220	if isinstance(state, tuple) and len(state) == 2:
1221	state, slotstate = state
1222	if state:
1223	try:
1224	d = inst.__dict__
1225	try:
1226	for k, v in state.iteritems():
1227	d[intern(k)] = v
1228	# keys in state don't have to be strings
1229	# don't blow up, but don't go out of our way
1230	except TypeError:
1231	d.update(state)
1232
1233	except RuntimeError:
1234	# XXX In restricted execution, the instance's __dict__
1235	# is not accessible. Use the old way of unpickling
1236	# the instance variables. This is a semantic
1237	# difference when unpickling in restricted
1238	# vs. unrestricted modes.
1239	# Note, however, that cPickle has never tried to do the
1240	# .update() business, and always uses
1241	# PyObject_SetItem(inst.__dict__, key, value) in a
1242	# loop over state.items().
1243	for k, v in state.items():
1244	setattr(inst, k, v)
1245	if slotstate:
1246	for k, v in slotstate.items():
1247	setattr(inst, k, v)
1248	dispatch[BUILD] = load_build
1249
1250	def load_mark(self):
1251	self.append(self.mark)
1252	dispatch[MARK] = load_mark
1253
1254	def load_stop(self):
1255	value = self.stack.pop()
1256	raise _Stop(value)
1257	dispatch[STOP] = load_stop
1258
1259	# Helper class for load_inst/load_obj
1260
1261	class _EmptyClass:
1262	pass
1263
1264	# Encode/decode longs in linear time.
1265
1266	import binascii as _binascii
1267
1268	def encode_long(x):
1269	r"""Encode a long to a two's complement little-endian binary string.
1270	Note that 0L is a special case, returning an empty string, to save a
1271	byte in the LONG1 pickling context.
1272
1273	>>> encode_long(0L)
1274	''
1275	>>> encode_long(255L)
1276	'\xff\x00'
1277	>>> encode_long(32767L)
1278	'\xff\x7f'
1279	>>> encode_long(-256L)
1280	'\x00\xff'
1281	>>> encode_long(-32768L)
1282	'\x00\x80'
1283	>>> encode_long(-128L)
1284	'\x80'
1285	>>> encode_long(127L)
1286	'\x7f'
1287	>>>
1288	"""
1289
1290	if x == 0:
1291	return ''
1292	if x > 0:
1293	ashex = hex(x)
1294	assert ashex.startswith("0x")
1295	njunkchars = 2 + ashex.endswith('L')
1296	nibbles = len(ashex) - njunkchars
1297	if nibbles & 1:
1298	# need an even # of nibbles for unhexlify
1299	ashex = "0x0" + ashex[2:]
1300	elif int(ashex[2], 16) >= 8:
1301	# "looks negative", so need a byte of sign bits
1302	ashex = "0x00" + ashex[2:]
1303	else:
1304	# Build the 256's-complement: (1L << nbytes) + x. The trick is
1305	# to find the number of bytes in linear time (although that should
1306	# really be a constant-time task).
1307	ashex = hex(-x)
1308	assert ashex.startswith("0x")
1309	njunkchars = 2 + ashex.endswith('L')
1310	nibbles = len(ashex) - njunkchars
1311	if nibbles & 1:
1312	# Extend to a full byte.
1313	nibbles += 1
1314	nbits = nibbles * 4
1315	x += 1L << nbits
1316	assert x > 0
1317	ashex = hex(x)
1318	njunkchars = 2 + ashex.endswith('L')
1319	newnibbles = len(ashex) - njunkchars
1320	if newnibbles < nibbles:
1321	ashex = "0x" + "0" * (nibbles - newnibbles) + ashex[2:]
1322	if int(ashex[2], 16) < 8:
1323	# "looks positive", so need a byte of sign bits
1324	ashex = "0xff" + ashex[2:]
1325
1326	if ashex.endswith('L'):
1327	ashex = ashex[2:-1]
1328	else:
1329	ashex = ashex[2:]
1330	assert len(ashex) & 1 == 0, (x, ashex)
1331	binary = _binascii.unhexlify(ashex)
1332	return binary[::-1]
1333
1334	def decode_long(data):
1335	r"""Decode a long from a two's complement little-endian binary string.
1336
1337	>>> decode_long('')
1338	0L
1339	>>> decode_long("\xff\x00")
1340	255L
1341	>>> decode_long("\xff\x7f")
1342	32767L
1343	>>> decode_long("\x00\xff")
1344	-256L
1345	>>> decode_long("\x00\x80")
1346	-32768L
1347	>>> decode_long("\x80")
1348	-128L
1349	>>> decode_long("\x7f")
1350	127L
1351	"""
1352
1353	nbytes = len(data)
1354	if nbytes == 0:
1355	return 0L
1356	ashex = _binascii.hexlify(data[::-1])
1357	n = long(ashex, 16) # quadratic time before Python 2.3; linear now
1358	if data[-1] >= '\x80':
1359	n -= 1L << (nbytes * 8)
1360	return n
1361
1362	# Shorthands
1363
1364	try:
1365	from cStringIO import StringIO
1366	except ImportError:
1367	from StringIO import StringIO
1368
1369	def dump(obj, file, protocol=None):
1370	Pickler(file, protocol).dump(obj)
1371
1372	def dumps(obj, protocol=None):
1373	file = StringIO()
1374	Pickler(file, protocol).dump(obj)
1375	return file.getvalue()
1376
1377	def load(file):
1378	return Unpickler(file).load()
1379
1380	def loads(str):
1381	file = StringIO(str)
1382	return Unpickler(file).load()
1383
1384	# Doctest
1385
1386	def _test():
1387	import doctest
1388	return doctest.testmod()
1389
1390	if __name__ == "__main__":
1391	_test()

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/pickle.py@ 606

Download in other formats: