Context Navigation

collections.py@ 603

Last change on this file since 603 was 391, checked in by dmik, 11 years ago
python: Merge vendor 2.7.6 to trunk.
Property svn:eol-style set to `native`
File size: 25.4 KB

Line
1	__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
2	# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3	# They should however be considered an integral part of collections.py.
4	from _abcoll import *
5	import _abcoll
6	__all__ += _abcoll.__all__
7
8	from _collections import deque, defaultdict
9	from operator import itemgetter as _itemgetter, eq as _eq
10	from keyword import iskeyword as _iskeyword
11	import sys as _sys
12	import heapq as _heapq
13	from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
14	from itertools import imap as _imap
15
16	try:
17	from thread import get_ident as _get_ident
18	except ImportError:
19	from dummy_thread import get_ident as _get_ident
20
21
22	################################################################################
23	### OrderedDict
24	################################################################################
25
26	class OrderedDict(dict):
27	'Dictionary that remembers insertion order'
28	# An inherited dict maps keys to values.
29	# The inherited dict provides __getitem__, __len__, __contains__, and get.
30	# The remaining methods are order-aware.
31	# Big-O running times for all methods are the same as regular dictionaries.
32
33	# The internal self.__map dict maps keys to links in a doubly linked list.
34	# The circular doubly linked list starts and ends with a sentinel element.
35	# The sentinel element never gets deleted (this simplifies the algorithm).
36	# Each link is stored as a list of length three: [PREV, NEXT, KEY].
37
38	def __init__(self, args, *kwds):
39	'''Initialize an ordered dictionary. The signature is the same as
40	regular dictionaries, but keyword arguments are not recommended because
41	their insertion order is arbitrary.
42
43	'''
44	if len(args) > 1:
45	raise TypeError('expected at most 1 arguments, got %d' % len(args))
46	try:
47	self.__root
48	except AttributeError:
49	self.__root = root = [] # sentinel node
50	root[:] = [root, root, None]
51	self.__map = {}
52	self.__update(args, *kwds)
53
54	def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
55	'od.__setitem__(i, y) <==> od[i]=y'
56	# Setting a new item creates a new link at the end of the linked list,
57	# and the inherited dictionary is updated with the new key/value pair.
58	if key not in self:
59	root = self.__root
60	last = root[0]
61	last[1] = root[0] = self.__map[key] = [last, root, key]
62	return dict_setitem(self, key, value)
63
64	def __delitem__(self, key, dict_delitem=dict.__delitem__):
65	'od.__delitem__(y) <==> del od[y]'
66	# Deleting an existing item uses self.__map to find the link which gets
67	# removed by updating the links in the predecessor and successor nodes.
68	dict_delitem(self, key)
69	link_prev, link_next, _ = self.__map.pop(key)
70	link_prev[1] = link_next # update link_prev[NEXT]
71	link_next[0] = link_prev # update link_next[PREV]
72
73	def __iter__(self):
74	'od.__iter__() <==> iter(od)'
75	# Traverse the linked list in order.
76	root = self.__root
77	curr = root[1] # start at the first node
78	while curr is not root:
79	yield curr[2] # yield the curr[KEY]
80	curr = curr[1] # move to next node
81
82	def __reversed__(self):
83	'od.__reversed__() <==> reversed(od)'
84	# Traverse the linked list in reverse order.
85	root = self.__root
86	curr = root[0] # start at the last node
87	while curr is not root:
88	yield curr[2] # yield the curr[KEY]
89	curr = curr[0] # move to previous node
90
91	def clear(self):
92	'od.clear() -> None. Remove all items from od.'
93	root = self.__root
94	root[:] = [root, root, None]
95	self.__map.clear()
96	dict.clear(self)
97
98	# -- the following methods do not depend on the internal structure --
99
100	def keys(self):
101	'od.keys() -> list of keys in od'
102	return list(self)
103
104	def values(self):
105	'od.values() -> list of values in od'
106	return [self[key] for key in self]
107
108	def items(self):
109	'od.items() -> list of (key, value) pairs in od'
110	return [(key, self[key]) for key in self]
111
112	def iterkeys(self):
113	'od.iterkeys() -> an iterator over the keys in od'
114	return iter(self)
115
116	def itervalues(self):
117	'od.itervalues -> an iterator over the values in od'
118	for k in self:
119	yield self[k]
120
121	def iteritems(self):
122	'od.iteritems -> an iterator over the (key, value) pairs in od'
123	for k in self:
124	yield (k, self[k])
125
126	update = MutableMapping.update
127
128	__update = update # let subclasses override update without breaking __init__
129
130	__marker = object()
131
132	def pop(self, key, default=__marker):
133	'''od.pop(k[,d]) -> v, remove specified key and return the corresponding
134	value. If key is not found, d is returned if given, otherwise KeyError
135	is raised.
136
137	'''
138	if key in self:
139	result = self[key]
140	del self[key]
141	return result
142	if default is self.__marker:
143	raise KeyError(key)
144	return default
145
146	def setdefault(self, key, default=None):
147	'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
148	if key in self:
149	return self[key]
150	self[key] = default
151	return default
152
153	def popitem(self, last=True):
154	'''od.popitem() -> (k, v), return and remove a (key, value) pair.
155	Pairs are returned in LIFO order if last is true or FIFO order if false.
156
157	'''
158	if not self:
159	raise KeyError('dictionary is empty')
160	key = next(reversed(self) if last else iter(self))
161	value = self.pop(key)
162	return key, value
163
164	def __repr__(self, _repr_running={}):
165	'od.__repr__() <==> repr(od)'
166	call_key = id(self), _get_ident()
167	if call_key in _repr_running:
168	return '...'
169	_repr_running[call_key] = 1
170	try:
171	if not self:
172	return '%s()' % (self.__class__.__name__,)
173	return '%s(%r)' % (self.__class__.__name__, self.items())
174	finally:
175	del _repr_running[call_key]
176
177	def __reduce__(self):
178	'Return state information for pickling'
179	items = [[k, self[k]] for k in self]
180	inst_dict = vars(self).copy()
181	for k in vars(OrderedDict()):
182	inst_dict.pop(k, None)
183	if inst_dict:
184	return (self.__class__, (items,), inst_dict)
185	return self.__class__, (items,)
186
187	def copy(self):
188	'od.copy() -> a shallow copy of od'
189	return self.__class__(self)
190
191	@classmethod
192	def fromkeys(cls, iterable, value=None):
193	'''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S.
194	If not specified, the value defaults to None.
195
196	'''
197	self = cls()
198	for key in iterable:
199	self[key] = value
200	return self
201
202	def __eq__(self, other):
203	'''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
204	while comparison to a regular mapping is order-insensitive.
205
206	'''
207	if isinstance(other, OrderedDict):
208	return dict.__eq__(self, other) and all(_imap(_eq, self, other))
209	return dict.__eq__(self, other)
210
211	def __ne__(self, other):
212	'od.__ne__(y) <==> od!=y'
213	return not self == other
214
215	# -- the following methods support python 3.x style dictionary views --
216
217	def viewkeys(self):
218	"od.viewkeys() -> a set-like object providing a view on od's keys"
219	return KeysView(self)
220
221	def viewvalues(self):
222	"od.viewvalues() -> an object providing a view on od's values"
223	return ValuesView(self)
224
225	def viewitems(self):
226	"od.viewitems() -> a set-like object providing a view on od's items"
227	return ItemsView(self)
228
229
230	################################################################################
231	### namedtuple
232	################################################################################
233
234	_class_template = '''\
235	class {typename}(tuple):
236	'{typename}({arg_list})'
237
238	__slots__ = ()
239
240	_fields = {field_names!r}
241
242	def __new__(_cls, {arg_list}):
243	'Create new instance of {typename}({arg_list})'
244	return _tuple.__new__(_cls, ({arg_list}))
245
246	@classmethod
247	def _make(cls, iterable, new=tuple.__new__, len=len):
248	'Make a new {typename} object from a sequence or iterable'
249	result = new(cls, iterable)
250	if len(result) != {num_fields:d}:
251	raise TypeError('Expected {num_fields:d} arguments, got %d' % len(result))
252	return result
253
254	def __repr__(self):
255	'Return a nicely formatted representation string'
256	return '{typename}({repr_fmt})' % self
257
258	def _asdict(self):
259	'Return a new OrderedDict which maps field names to their values'
260	return OrderedDict(zip(self._fields, self))
261
262	def _replace(_self, **kwds):
263	'Return a new {typename} object replacing specified fields with new values'
264	result = _self._make(map(kwds.pop, {field_names!r}, _self))
265	if kwds:
266	raise ValueError('Got unexpected field names: %r' % kwds.keys())
267	return result
268
269	def __getnewargs__(self):
270	'Return self as a plain tuple. Used by copy and pickle.'
271	return tuple(self)
272
273	__dict__ = _property(_asdict)
274
275	def __getstate__(self):
276	'Exclude the OrderedDict from pickling'
277	pass
278
279	{field_defs}
280	'''
281
282	_repr_template = '{name}=%r'
283
284	_field_template = '''\
285	{name} = _property(_itemgetter({index:d}), doc='Alias for field number {index:d}')
286	'''
287
288	def namedtuple(typename, field_names, verbose=False, rename=False):
289	"""Returns a new subclass of tuple with named fields.
290
291	>>> Point = namedtuple('Point', ['x', 'y'])
292	>>> Point.__doc__ # docstring for the new class
293	'Point(x, y)'
294	>>> p = Point(11, y=22) # instantiate with positional args or keywords
295	>>> p[0] + p[1] # indexable like a plain tuple
296	33
297	>>> x, y = p # unpack like a regular tuple
298	>>> x, y
299	(11, 22)
300	>>> p.x + p.y # fields also accessable by name
301	33
302	>>> d = p._asdict() # convert to a dictionary
303	>>> d['x']
304	11
305	>>> Point(**d) # convert from a dictionary
306	Point(x=11, y=22)
307	>>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
308	Point(x=100, y=22)
309
310	"""
311
312	# Validate the field names. At the user's option, either generate an error
313	# message or automatically replace the field name with a valid name.
314	if isinstance(field_names, basestring):
315	field_names = field_names.replace(',', ' ').split()
316	field_names = map(str, field_names)
317	if rename:
318	seen = set()
319	for index, name in enumerate(field_names):
320	if (not all(c.isalnum() or c=='_' for c in name)
321	or _iskeyword(name)
322	or not name
323	or name[0].isdigit()
324	or name.startswith('_')
325	or name in seen):
326	field_names[index] = '_%d' % index
327	seen.add(name)
328	for name in [typename] + field_names:
329	if not all(c.isalnum() or c=='_' for c in name):
330	raise ValueError('Type names and field names can only contain '
331	'alphanumeric characters and underscores: %r' % name)
332	if _iskeyword(name):
333	raise ValueError('Type names and field names cannot be a '
334	'keyword: %r' % name)
335	if name[0].isdigit():
336	raise ValueError('Type names and field names cannot start with '
337	'a number: %r' % name)
338	seen = set()
339	for name in field_names:
340	if name.startswith('_') and not rename:
341	raise ValueError('Field names cannot start with an underscore: '
342	'%r' % name)
343	if name in seen:
344	raise ValueError('Encountered duplicate field name: %r' % name)
345	seen.add(name)
346
347	# Fill-in the class template
348	class_definition = _class_template.format(
349	typename = typename,
350	field_names = tuple(field_names),
351	num_fields = len(field_names),
352	arg_list = repr(tuple(field_names)).replace("'", "")[1:-1],
353	repr_fmt = ', '.join(_repr_template.format(name=name)
354	for name in field_names),
355	field_defs = '\n'.join(_field_template.format(index=index, name=name)
356	for index, name in enumerate(field_names))
357	)
358	if verbose:
359	print class_definition
360
361	# Execute the template string in a temporary namespace and support
362	# tracing utilities by setting a value for frame.f_globals['__name__']
363	namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
364	OrderedDict=OrderedDict, _property=property, _tuple=tuple)
365	try:
366	exec class_definition in namespace
367	except SyntaxError as e:
368	raise SyntaxError(e.message + ':\n' + class_definition)
369	result = namespace[typename]
370
371	# For pickling to work, the __module__ variable needs to be set to the frame
372	# where the named tuple is created. Bypass this step in environments where
373	# sys._getframe is not defined (Jython for example) or sys._getframe is not
374	# defined for arguments greater than 0 (IronPython).
375	try:
376	result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
377	except (AttributeError, ValueError):
378	pass
379
380	return result
381
382
383	########################################################################
384	### Counter
385	########################################################################
386
387	class Counter(dict):
388	'''Dict subclass for counting hashable items. Sometimes called a bag
389	or multiset. Elements are stored as dictionary keys and their counts
390	are stored as dictionary values.
391
392	>>> c = Counter('abcdeabcdabcaba') # count elements from a string
393
394	>>> c.most_common(3) # three most common elements
395	[('a', 5), ('b', 4), ('c', 3)]
396	>>> sorted(c) # list all unique elements
397	['a', 'b', 'c', 'd', 'e']
398	>>> ''.join(sorted(c.elements())) # list elements with repetitions
399	'aaaaabbbbcccdde'
400	>>> sum(c.values()) # total of all counts
401	15
402
403	>>> c['a'] # count of letter 'a'
404	5
405	>>> for elem in 'shazam': # update counts from an iterable
406	... c[elem] += 1 # by adding 1 to each element's count
407	>>> c['a'] # now there are seven 'a'
408	7
409	>>> del c['b'] # remove all 'b'
410	>>> c['b'] # now there are zero 'b'
411	0
412
413	>>> d = Counter('simsalabim') # make another counter
414	>>> c.update(d) # add in the second counter
415	>>> c['a'] # now there are nine 'a'
416	9
417
418	>>> c.clear() # empty the counter
419	>>> c
420	Counter()
421
422	Note: If a count is set to zero or reduced to zero, it will remain
423	in the counter until the entry is deleted or the counter is cleared:
424
425	>>> c = Counter('aaabbc')
426	>>> c['b'] -= 2 # reduce the count of 'b' by two
427	>>> c.most_common() # 'b' is still in, but its count is zero
428	[('a', 3), ('c', 1), ('b', 0)]
429
430	'''
431	# References:
432	# http://en.wikipedia.org/wiki/Multiset
433	# http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
434	# http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
435	# http://code.activestate.com/recipes/259174/
436	# Knuth, TAOCP Vol. II section 4.6.3
437
438	def __init__(self, iterable=None, **kwds):
439	'''Create a new, empty Counter object. And if given, count elements
440	from an input iterable. Or, initialize the count from another mapping
441	of elements to their counts.
442
443	>>> c = Counter() # a new, empty counter
444	>>> c = Counter('gallahad') # a new counter from an iterable
445	>>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
446	>>> c = Counter(a=4, b=2) # a new counter from keyword args
447
448	'''
449	super(Counter, self).__init__()
450	self.update(iterable, **kwds)
451
452	def __missing__(self, key):
453	'The count of elements not in the Counter is zero.'
454	# Needed so that self[missing_item] does not raise KeyError
455	return 0
456
457	def most_common(self, n=None):
458	'''List the n most common elements and their counts from the most
459	common to the least. If n is None, then list all element counts.
460
461	>>> Counter('abcdeabcdabcaba').most_common(3)
462	[('a', 5), ('b', 4), ('c', 3)]
463
464	'''
465	# Emulate Bag.sortedByCount from Smalltalk
466	if n is None:
467	return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
468	return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
469
470	def elements(self):
471	'''Iterator over elements repeating each as many times as its count.
472
473	>>> c = Counter('ABCABC')
474	>>> sorted(c.elements())
475	['A', 'A', 'B', 'B', 'C', 'C']
476
477	# Knuth's example for prime factors of 1836: 2*2 3*3 17**1
478	>>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
479	>>> product = 1
480	>>> for factor in prime_factors.elements(): # loop over factors
481	... product *= factor # and multiply them
482	>>> product
483	1836
484
485	Note, if an element's count has been set to zero or is a negative
486	number, elements() will ignore it.
487
488	'''
489	# Emulate Bag.do from Smalltalk and Multiset.begin from C++.
490	return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
491
492	# Override dict methods where necessary
493
494	@classmethod
495	def fromkeys(cls, iterable, v=None):
496	# There is no equivalent method for counters because setting v=1
497	# means that no element can have a count greater than one.
498	raise NotImplementedError(
499	'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
500
501	def update(self, iterable=None, **kwds):
502	'''Like dict.update() but add counts instead of replacing them.
503
504	Source can be an iterable, a dictionary, or another Counter instance.
505
506	>>> c = Counter('which')
507	>>> c.update('witch') # add elements from another iterable
508	>>> d = Counter('watch')
509	>>> c.update(d) # add elements from another counter
510	>>> c['h'] # four 'h' in which, witch, and watch
511	4
512
513	'''
514	# The regular dict.update() operation makes no sense here because the
515	# replace behavior results in the some of original untouched counts
516	# being mixed-in with all of the other counts for a mismash that
517	# doesn't have a straight-forward interpretation in most counting
518	# contexts. Instead, we implement straight-addition. Both the inputs
519	# and outputs are allowed to contain zero and negative counts.
520
521	if iterable is not None:
522	if isinstance(iterable, Mapping):
523	if self:
524	self_get = self.get
525	for elem, count in iterable.iteritems():
526	self[elem] = self_get(elem, 0) + count
527	else:
528	super(Counter, self).update(iterable) # fast path when counter is empty
529	else:
530	self_get = self.get
531	for elem in iterable:
532	self[elem] = self_get(elem, 0) + 1
533	if kwds:
534	self.update(kwds)
535
536	def subtract(self, iterable=None, **kwds):
537	'''Like dict.update() but subtracts counts instead of replacing them.
538	Counts can be reduced below zero. Both the inputs and outputs are
539	allowed to contain zero and negative counts.
540
541	Source can be an iterable, a dictionary, or another Counter instance.
542
543	>>> c = Counter('which')
544	>>> c.subtract('witch') # subtract elements from another iterable
545	>>> c.subtract(Counter('watch')) # subtract elements from another counter
546	>>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
547	0
548	>>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
549	-1
550
551	'''
552	if iterable is not None:
553	self_get = self.get
554	if isinstance(iterable, Mapping):
555	for elem, count in iterable.items():
556	self[elem] = self_get(elem, 0) - count
557	else:
558	for elem in iterable:
559	self[elem] = self_get(elem, 0) - 1
560	if kwds:
561	self.subtract(kwds)
562
563	def copy(self):
564	'Return a shallow copy.'
565	return self.__class__(self)
566
567	def __reduce__(self):
568	return self.__class__, (dict(self),)
569
570	def __delitem__(self, elem):
571	'Like dict.__delitem__() but does not raise KeyError for missing values.'
572	if elem in self:
573	super(Counter, self).__delitem__(elem)
574
575	def __repr__(self):
576	if not self:
577	return '%s()' % self.__class__.__name__
578	items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
579	return '%s({%s})' % (self.__class__.__name__, items)
580
581	# Multiset-style mathematical operations discussed in:
582	# Knuth TAOCP Volume II section 4.6.3 exercise 19
583	# and at http://en.wikipedia.org/wiki/Multiset
584	#
585	# Outputs guaranteed to only include positive counts.
586	#
587	# To strip negative and zero counts, add-in an empty counter:
588	# c += Counter()
589
590	def __add__(self, other):
591	'''Add counts from two counters.
592
593	>>> Counter('abbb') + Counter('bcc')
594	Counter({'b': 4, 'c': 2, 'a': 1})
595
596	'''
597	if not isinstance(other, Counter):
598	return NotImplemented
599	result = Counter()
600	for elem, count in self.items():
601	newcount = count + other[elem]
602	if newcount > 0:
603	result[elem] = newcount
604	for elem, count in other.items():
605	if elem not in self and count > 0:
606	result[elem] = count
607	return result
608
609	def __sub__(self, other):
610	''' Subtract count, but keep only results with positive counts.
611
612	>>> Counter('abbbc') - Counter('bccd')
613	Counter({'b': 2, 'a': 1})
614
615	'''
616	if not isinstance(other, Counter):
617	return NotImplemented
618	result = Counter()
619	for elem, count in self.items():
620	newcount = count - other[elem]
621	if newcount > 0:
622	result[elem] = newcount
623	for elem, count in other.items():
624	if elem not in self and count < 0:
625	result[elem] = 0 - count
626	return result
627
628	def __or__(self, other):
629	'''Union is the maximum of value in either of the input counters.
630
631	>>> Counter('abbb') \| Counter('bcc')
632	Counter({'b': 3, 'c': 2, 'a': 1})
633
634	'''
635	if not isinstance(other, Counter):
636	return NotImplemented
637	result = Counter()
638	for elem, count in self.items():
639	other_count = other[elem]
640	newcount = other_count if count < other_count else count
641	if newcount > 0:
642	result[elem] = newcount
643	for elem, count in other.items():
644	if elem not in self and count > 0:
645	result[elem] = count
646	return result
647
648	def __and__(self, other):
649	''' Intersection is the minimum of corresponding counts.
650
651	>>> Counter('abbb') & Counter('bcc')
652	Counter({'b': 1})
653
654	'''
655	if not isinstance(other, Counter):
656	return NotImplemented
657	result = Counter()
658	for elem, count in self.items():
659	other_count = other[elem]
660	newcount = count if count < other_count else other_count
661	if newcount > 0:
662	result[elem] = newcount
663	return result
664
665
666	if __name__ == '__main__':
667	# verify that instances can be pickled
668	from cPickle import loads, dumps
669	Point = namedtuple('Point', 'x, y', True)
670	p = Point(x=10, y=20)
671	assert p == loads(dumps(p))
672
673	# test and demonstrate ability to override methods
674	class Point(namedtuple('Point', 'x y')):
675	__slots__ = ()
676	@property
677	def hypot(self):
678	return (self.x 2 + self.y 2) ** 0.5
679	def __str__(self):
680	return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
681
682	for p in Point(3, 4), Point(14, 5/7.):
683	print p
684
685	class Point(namedtuple('Point', 'x y')):
686	'Point class with optimized _make() and _replace() without error-checking'
687	__slots__ = ()
688	_make = classmethod(tuple.__new__)
689	def _replace(self, _map=map, **kwds):
690	return self._make(_map(kwds.get, ('x', 'y'), self))
691
692	print Point(11, 22)._replace(x=100)
693
694	Point3D = namedtuple('Point3D', Point._fields + ('z',))
695	print Point3D.__doc__
696
697	import doctest
698	TestResults = namedtuple('TestResults', 'failed attempted')
699	print TestResults(*doctest.testmod())

Note: See TracBrowser for help on using the repository browser.

Context Navigation

source: python/trunk/Lib/collections.py@ 603

Download in other formats: