source: python/trunk/Lib/collections.py@ 560

Last change on this file since 560 was 391, checked in by dmik, 11 years ago

python: Merge vendor 2.7.6 to trunk.

  • Property svn:eol-style set to native
File size: 25.4 KB
Line 
1__all__ = ['Counter', 'deque', 'defaultdict', 'namedtuple', 'OrderedDict']
2# For bootstrapping reasons, the collection ABCs are defined in _abcoll.py.
3# They should however be considered an integral part of collections.py.
4from _abcoll import *
5import _abcoll
6__all__ += _abcoll.__all__
7
8from _collections import deque, defaultdict
9from operator import itemgetter as _itemgetter, eq as _eq
10from keyword import iskeyword as _iskeyword
11import sys as _sys
12import heapq as _heapq
13from itertools import repeat as _repeat, chain as _chain, starmap as _starmap
14from itertools import imap as _imap
15
16try:
17 from thread import get_ident as _get_ident
18except ImportError:
19 from dummy_thread import get_ident as _get_ident
20
21
22################################################################################
23### OrderedDict
24################################################################################
25
26class OrderedDict(dict):
27 'Dictionary that remembers insertion order'
28 # An inherited dict maps keys to values.
29 # The inherited dict provides __getitem__, __len__, __contains__, and get.
30 # The remaining methods are order-aware.
31 # Big-O running times for all methods are the same as regular dictionaries.
32
33 # The internal self.__map dict maps keys to links in a doubly linked list.
34 # The circular doubly linked list starts and ends with a sentinel element.
35 # The sentinel element never gets deleted (this simplifies the algorithm).
36 # Each link is stored as a list of length three: [PREV, NEXT, KEY].
37
38 def __init__(self, *args, **kwds):
39 '''Initialize an ordered dictionary. The signature is the same as
40 regular dictionaries, but keyword arguments are not recommended because
41 their insertion order is arbitrary.
42
43 '''
44 if len(args) > 1:
45 raise TypeError('expected at most 1 arguments, got %d' % len(args))
46 try:
47 self.__root
48 except AttributeError:
49 self.__root = root = [] # sentinel node
50 root[:] = [root, root, None]
51 self.__map = {}
52 self.__update(*args, **kwds)
53
54 def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
55 'od.__setitem__(i, y) <==> od[i]=y'
56 # Setting a new item creates a new link at the end of the linked list,
57 # and the inherited dictionary is updated with the new key/value pair.
58 if key not in self:
59 root = self.__root
60 last = root[0]
61 last[1] = root[0] = self.__map[key] = [last, root, key]
62 return dict_setitem(self, key, value)
63
64 def __delitem__(self, key, dict_delitem=dict.__delitem__):
65 'od.__delitem__(y) <==> del od[y]'
66 # Deleting an existing item uses self.__map to find the link which gets
67 # removed by updating the links in the predecessor and successor nodes.
68 dict_delitem(self, key)
69 link_prev, link_next, _ = self.__map.pop(key)
70 link_prev[1] = link_next # update link_prev[NEXT]
71 link_next[0] = link_prev # update link_next[PREV]
72
73 def __iter__(self):
74 'od.__iter__() <==> iter(od)'
75 # Traverse the linked list in order.
76 root = self.__root
77 curr = root[1] # start at the first node
78 while curr is not root:
79 yield curr[2] # yield the curr[KEY]
80 curr = curr[1] # move to next node
81
82 def __reversed__(self):
83 'od.__reversed__() <==> reversed(od)'
84 # Traverse the linked list in reverse order.
85 root = self.__root
86 curr = root[0] # start at the last node
87 while curr is not root:
88 yield curr[2] # yield the curr[KEY]
89 curr = curr[0] # move to previous node
90
91 def clear(self):
92 'od.clear() -> None. Remove all items from od.'
93 root = self.__root
94 root[:] = [root, root, None]
95 self.__map.clear()
96 dict.clear(self)
97
98 # -- the following methods do not depend on the internal structure --
99
100 def keys(self):
101 'od.keys() -> list of keys in od'
102 return list(self)
103
104 def values(self):
105 'od.values() -> list of values in od'
106 return [self[key] for key in self]
107
108 def items(self):
109 'od.items() -> list of (key, value) pairs in od'
110 return [(key, self[key]) for key in self]
111
112 def iterkeys(self):
113 'od.iterkeys() -> an iterator over the keys in od'
114 return iter(self)
115
116 def itervalues(self):
117 'od.itervalues -> an iterator over the values in od'
118 for k in self:
119 yield self[k]
120
121 def iteritems(self):
122 'od.iteritems -> an iterator over the (key, value) pairs in od'
123 for k in self:
124 yield (k, self[k])
125
126 update = MutableMapping.update
127
128 __update = update # let subclasses override update without breaking __init__
129
130 __marker = object()
131
132 def pop(self, key, default=__marker):
133 '''od.pop(k[,d]) -> v, remove specified key and return the corresponding
134 value. If key is not found, d is returned if given, otherwise KeyError
135 is raised.
136
137 '''
138 if key in self:
139 result = self[key]
140 del self[key]
141 return result
142 if default is self.__marker:
143 raise KeyError(key)
144 return default
145
146 def setdefault(self, key, default=None):
147 'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
148 if key in self:
149 return self[key]
150 self[key] = default
151 return default
152
153 def popitem(self, last=True):
154 '''od.popitem() -> (k, v), return and remove a (key, value) pair.
155 Pairs are returned in LIFO order if last is true or FIFO order if false.
156
157 '''
158 if not self:
159 raise KeyError('dictionary is empty')
160 key = next(reversed(self) if last else iter(self))
161 value = self.pop(key)
162 return key, value
163
164 def __repr__(self, _repr_running={}):
165 'od.__repr__() <==> repr(od)'
166 call_key = id(self), _get_ident()
167 if call_key in _repr_running:
168 return '...'
169 _repr_running[call_key] = 1
170 try:
171 if not self:
172 return '%s()' % (self.__class__.__name__,)
173 return '%s(%r)' % (self.__class__.__name__, self.items())
174 finally:
175 del _repr_running[call_key]
176
177 def __reduce__(self):
178 'Return state information for pickling'
179 items = [[k, self[k]] for k in self]
180 inst_dict = vars(self).copy()
181 for k in vars(OrderedDict()):
182 inst_dict.pop(k, None)
183 if inst_dict:
184 return (self.__class__, (items,), inst_dict)
185 return self.__class__, (items,)
186
187 def copy(self):
188 'od.copy() -> a shallow copy of od'
189 return self.__class__(self)
190
191 @classmethod
192 def fromkeys(cls, iterable, value=None):
193 '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S.
194 If not specified, the value defaults to None.
195
196 '''
197 self = cls()
198 for key in iterable:
199 self[key] = value
200 return self
201
202 def __eq__(self, other):
203 '''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive
204 while comparison to a regular mapping is order-insensitive.
205
206 '''
207 if isinstance(other, OrderedDict):
208 return dict.__eq__(self, other) and all(_imap(_eq, self, other))
209 return dict.__eq__(self, other)
210
211 def __ne__(self, other):
212 'od.__ne__(y) <==> od!=y'
213 return not self == other
214
215 # -- the following methods support python 3.x style dictionary views --
216
217 def viewkeys(self):
218 "od.viewkeys() -> a set-like object providing a view on od's keys"
219 return KeysView(self)
220
221 def viewvalues(self):
222 "od.viewvalues() -> an object providing a view on od's values"
223 return ValuesView(self)
224
225 def viewitems(self):
226 "od.viewitems() -> a set-like object providing a view on od's items"
227 return ItemsView(self)
228
229
230################################################################################
231### namedtuple
232################################################################################
233
234_class_template = '''\
235class {typename}(tuple):
236 '{typename}({arg_list})'
237
238 __slots__ = ()
239
240 _fields = {field_names!r}
241
242 def __new__(_cls, {arg_list}):
243 'Create new instance of {typename}({arg_list})'
244 return _tuple.__new__(_cls, ({arg_list}))
245
246 @classmethod
247 def _make(cls, iterable, new=tuple.__new__, len=len):
248 'Make a new {typename} object from a sequence or iterable'
249 result = new(cls, iterable)
250 if len(result) != {num_fields:d}:
251 raise TypeError('Expected {num_fields:d} arguments, got %d' % len(result))
252 return result
253
254 def __repr__(self):
255 'Return a nicely formatted representation string'
256 return '{typename}({repr_fmt})' % self
257
258 def _asdict(self):
259 'Return a new OrderedDict which maps field names to their values'
260 return OrderedDict(zip(self._fields, self))
261
262 def _replace(_self, **kwds):
263 'Return a new {typename} object replacing specified fields with new values'
264 result = _self._make(map(kwds.pop, {field_names!r}, _self))
265 if kwds:
266 raise ValueError('Got unexpected field names: %r' % kwds.keys())
267 return result
268
269 def __getnewargs__(self):
270 'Return self as a plain tuple. Used by copy and pickle.'
271 return tuple(self)
272
273 __dict__ = _property(_asdict)
274
275 def __getstate__(self):
276 'Exclude the OrderedDict from pickling'
277 pass
278
279{field_defs}
280'''
281
282_repr_template = '{name}=%r'
283
284_field_template = '''\
285 {name} = _property(_itemgetter({index:d}), doc='Alias for field number {index:d}')
286'''
287
288def namedtuple(typename, field_names, verbose=False, rename=False):
289 """Returns a new subclass of tuple with named fields.
290
291 >>> Point = namedtuple('Point', ['x', 'y'])
292 >>> Point.__doc__ # docstring for the new class
293 'Point(x, y)'
294 >>> p = Point(11, y=22) # instantiate with positional args or keywords
295 >>> p[0] + p[1] # indexable like a plain tuple
296 33
297 >>> x, y = p # unpack like a regular tuple
298 >>> x, y
299 (11, 22)
300 >>> p.x + p.y # fields also accessable by name
301 33
302 >>> d = p._asdict() # convert to a dictionary
303 >>> d['x']
304 11
305 >>> Point(**d) # convert from a dictionary
306 Point(x=11, y=22)
307 >>> p._replace(x=100) # _replace() is like str.replace() but targets named fields
308 Point(x=100, y=22)
309
310 """
311
312 # Validate the field names. At the user's option, either generate an error
313 # message or automatically replace the field name with a valid name.
314 if isinstance(field_names, basestring):
315 field_names = field_names.replace(',', ' ').split()
316 field_names = map(str, field_names)
317 if rename:
318 seen = set()
319 for index, name in enumerate(field_names):
320 if (not all(c.isalnum() or c=='_' for c in name)
321 or _iskeyword(name)
322 or not name
323 or name[0].isdigit()
324 or name.startswith('_')
325 or name in seen):
326 field_names[index] = '_%d' % index
327 seen.add(name)
328 for name in [typename] + field_names:
329 if not all(c.isalnum() or c=='_' for c in name):
330 raise ValueError('Type names and field names can only contain '
331 'alphanumeric characters and underscores: %r' % name)
332 if _iskeyword(name):
333 raise ValueError('Type names and field names cannot be a '
334 'keyword: %r' % name)
335 if name[0].isdigit():
336 raise ValueError('Type names and field names cannot start with '
337 'a number: %r' % name)
338 seen = set()
339 for name in field_names:
340 if name.startswith('_') and not rename:
341 raise ValueError('Field names cannot start with an underscore: '
342 '%r' % name)
343 if name in seen:
344 raise ValueError('Encountered duplicate field name: %r' % name)
345 seen.add(name)
346
347 # Fill-in the class template
348 class_definition = _class_template.format(
349 typename = typename,
350 field_names = tuple(field_names),
351 num_fields = len(field_names),
352 arg_list = repr(tuple(field_names)).replace("'", "")[1:-1],
353 repr_fmt = ', '.join(_repr_template.format(name=name)
354 for name in field_names),
355 field_defs = '\n'.join(_field_template.format(index=index, name=name)
356 for index, name in enumerate(field_names))
357 )
358 if verbose:
359 print class_definition
360
361 # Execute the template string in a temporary namespace and support
362 # tracing utilities by setting a value for frame.f_globals['__name__']
363 namespace = dict(_itemgetter=_itemgetter, __name__='namedtuple_%s' % typename,
364 OrderedDict=OrderedDict, _property=property, _tuple=tuple)
365 try:
366 exec class_definition in namespace
367 except SyntaxError as e:
368 raise SyntaxError(e.message + ':\n' + class_definition)
369 result = namespace[typename]
370
371 # For pickling to work, the __module__ variable needs to be set to the frame
372 # where the named tuple is created. Bypass this step in environments where
373 # sys._getframe is not defined (Jython for example) or sys._getframe is not
374 # defined for arguments greater than 0 (IronPython).
375 try:
376 result.__module__ = _sys._getframe(1).f_globals.get('__name__', '__main__')
377 except (AttributeError, ValueError):
378 pass
379
380 return result
381
382
383########################################################################
384### Counter
385########################################################################
386
387class Counter(dict):
388 '''Dict subclass for counting hashable items. Sometimes called a bag
389 or multiset. Elements are stored as dictionary keys and their counts
390 are stored as dictionary values.
391
392 >>> c = Counter('abcdeabcdabcaba') # count elements from a string
393
394 >>> c.most_common(3) # three most common elements
395 [('a', 5), ('b', 4), ('c', 3)]
396 >>> sorted(c) # list all unique elements
397 ['a', 'b', 'c', 'd', 'e']
398 >>> ''.join(sorted(c.elements())) # list elements with repetitions
399 'aaaaabbbbcccdde'
400 >>> sum(c.values()) # total of all counts
401 15
402
403 >>> c['a'] # count of letter 'a'
404 5
405 >>> for elem in 'shazam': # update counts from an iterable
406 ... c[elem] += 1 # by adding 1 to each element's count
407 >>> c['a'] # now there are seven 'a'
408 7
409 >>> del c['b'] # remove all 'b'
410 >>> c['b'] # now there are zero 'b'
411 0
412
413 >>> d = Counter('simsalabim') # make another counter
414 >>> c.update(d) # add in the second counter
415 >>> c['a'] # now there are nine 'a'
416 9
417
418 >>> c.clear() # empty the counter
419 >>> c
420 Counter()
421
422 Note: If a count is set to zero or reduced to zero, it will remain
423 in the counter until the entry is deleted or the counter is cleared:
424
425 >>> c = Counter('aaabbc')
426 >>> c['b'] -= 2 # reduce the count of 'b' by two
427 >>> c.most_common() # 'b' is still in, but its count is zero
428 [('a', 3), ('c', 1), ('b', 0)]
429
430 '''
431 # References:
432 # http://en.wikipedia.org/wiki/Multiset
433 # http://www.gnu.org/software/smalltalk/manual-base/html_node/Bag.html
434 # http://www.demo2s.com/Tutorial/Cpp/0380__set-multiset/Catalog0380__set-multiset.htm
435 # http://code.activestate.com/recipes/259174/
436 # Knuth, TAOCP Vol. II section 4.6.3
437
438 def __init__(self, iterable=None, **kwds):
439 '''Create a new, empty Counter object. And if given, count elements
440 from an input iterable. Or, initialize the count from another mapping
441 of elements to their counts.
442
443 >>> c = Counter() # a new, empty counter
444 >>> c = Counter('gallahad') # a new counter from an iterable
445 >>> c = Counter({'a': 4, 'b': 2}) # a new counter from a mapping
446 >>> c = Counter(a=4, b=2) # a new counter from keyword args
447
448 '''
449 super(Counter, self).__init__()
450 self.update(iterable, **kwds)
451
452 def __missing__(self, key):
453 'The count of elements not in the Counter is zero.'
454 # Needed so that self[missing_item] does not raise KeyError
455 return 0
456
457 def most_common(self, n=None):
458 '''List the n most common elements and their counts from the most
459 common to the least. If n is None, then list all element counts.
460
461 >>> Counter('abcdeabcdabcaba').most_common(3)
462 [('a', 5), ('b', 4), ('c', 3)]
463
464 '''
465 # Emulate Bag.sortedByCount from Smalltalk
466 if n is None:
467 return sorted(self.iteritems(), key=_itemgetter(1), reverse=True)
468 return _heapq.nlargest(n, self.iteritems(), key=_itemgetter(1))
469
470 def elements(self):
471 '''Iterator over elements repeating each as many times as its count.
472
473 >>> c = Counter('ABCABC')
474 >>> sorted(c.elements())
475 ['A', 'A', 'B', 'B', 'C', 'C']
476
477 # Knuth's example for prime factors of 1836: 2**2 * 3**3 * 17**1
478 >>> prime_factors = Counter({2: 2, 3: 3, 17: 1})
479 >>> product = 1
480 >>> for factor in prime_factors.elements(): # loop over factors
481 ... product *= factor # and multiply them
482 >>> product
483 1836
484
485 Note, if an element's count has been set to zero or is a negative
486 number, elements() will ignore it.
487
488 '''
489 # Emulate Bag.do from Smalltalk and Multiset.begin from C++.
490 return _chain.from_iterable(_starmap(_repeat, self.iteritems()))
491
492 # Override dict methods where necessary
493
494 @classmethod
495 def fromkeys(cls, iterable, v=None):
496 # There is no equivalent method for counters because setting v=1
497 # means that no element can have a count greater than one.
498 raise NotImplementedError(
499 'Counter.fromkeys() is undefined. Use Counter(iterable) instead.')
500
501 def update(self, iterable=None, **kwds):
502 '''Like dict.update() but add counts instead of replacing them.
503
504 Source can be an iterable, a dictionary, or another Counter instance.
505
506 >>> c = Counter('which')
507 >>> c.update('witch') # add elements from another iterable
508 >>> d = Counter('watch')
509 >>> c.update(d) # add elements from another counter
510 >>> c['h'] # four 'h' in which, witch, and watch
511 4
512
513 '''
514 # The regular dict.update() operation makes no sense here because the
515 # replace behavior results in the some of original untouched counts
516 # being mixed-in with all of the other counts for a mismash that
517 # doesn't have a straight-forward interpretation in most counting
518 # contexts. Instead, we implement straight-addition. Both the inputs
519 # and outputs are allowed to contain zero and negative counts.
520
521 if iterable is not None:
522 if isinstance(iterable, Mapping):
523 if self:
524 self_get = self.get
525 for elem, count in iterable.iteritems():
526 self[elem] = self_get(elem, 0) + count
527 else:
528 super(Counter, self).update(iterable) # fast path when counter is empty
529 else:
530 self_get = self.get
531 for elem in iterable:
532 self[elem] = self_get(elem, 0) + 1
533 if kwds:
534 self.update(kwds)
535
536 def subtract(self, iterable=None, **kwds):
537 '''Like dict.update() but subtracts counts instead of replacing them.
538 Counts can be reduced below zero. Both the inputs and outputs are
539 allowed to contain zero and negative counts.
540
541 Source can be an iterable, a dictionary, or another Counter instance.
542
543 >>> c = Counter('which')
544 >>> c.subtract('witch') # subtract elements from another iterable
545 >>> c.subtract(Counter('watch')) # subtract elements from another counter
546 >>> c['h'] # 2 in which, minus 1 in witch, minus 1 in watch
547 0
548 >>> c['w'] # 1 in which, minus 1 in witch, minus 1 in watch
549 -1
550
551 '''
552 if iterable is not None:
553 self_get = self.get
554 if isinstance(iterable, Mapping):
555 for elem, count in iterable.items():
556 self[elem] = self_get(elem, 0) - count
557 else:
558 for elem in iterable:
559 self[elem] = self_get(elem, 0) - 1
560 if kwds:
561 self.subtract(kwds)
562
563 def copy(self):
564 'Return a shallow copy.'
565 return self.__class__(self)
566
567 def __reduce__(self):
568 return self.__class__, (dict(self),)
569
570 def __delitem__(self, elem):
571 'Like dict.__delitem__() but does not raise KeyError for missing values.'
572 if elem in self:
573 super(Counter, self).__delitem__(elem)
574
575 def __repr__(self):
576 if not self:
577 return '%s()' % self.__class__.__name__
578 items = ', '.join(map('%r: %r'.__mod__, self.most_common()))
579 return '%s({%s})' % (self.__class__.__name__, items)
580
581 # Multiset-style mathematical operations discussed in:
582 # Knuth TAOCP Volume II section 4.6.3 exercise 19
583 # and at http://en.wikipedia.org/wiki/Multiset
584 #
585 # Outputs guaranteed to only include positive counts.
586 #
587 # To strip negative and zero counts, add-in an empty counter:
588 # c += Counter()
589
590 def __add__(self, other):
591 '''Add counts from two counters.
592
593 >>> Counter('abbb') + Counter('bcc')
594 Counter({'b': 4, 'c': 2, 'a': 1})
595
596 '''
597 if not isinstance(other, Counter):
598 return NotImplemented
599 result = Counter()
600 for elem, count in self.items():
601 newcount = count + other[elem]
602 if newcount > 0:
603 result[elem] = newcount
604 for elem, count in other.items():
605 if elem not in self and count > 0:
606 result[elem] = count
607 return result
608
609 def __sub__(self, other):
610 ''' Subtract count, but keep only results with positive counts.
611
612 >>> Counter('abbbc') - Counter('bccd')
613 Counter({'b': 2, 'a': 1})
614
615 '''
616 if not isinstance(other, Counter):
617 return NotImplemented
618 result = Counter()
619 for elem, count in self.items():
620 newcount = count - other[elem]
621 if newcount > 0:
622 result[elem] = newcount
623 for elem, count in other.items():
624 if elem not in self and count < 0:
625 result[elem] = 0 - count
626 return result
627
628 def __or__(self, other):
629 '''Union is the maximum of value in either of the input counters.
630
631 >>> Counter('abbb') | Counter('bcc')
632 Counter({'b': 3, 'c': 2, 'a': 1})
633
634 '''
635 if not isinstance(other, Counter):
636 return NotImplemented
637 result = Counter()
638 for elem, count in self.items():
639 other_count = other[elem]
640 newcount = other_count if count < other_count else count
641 if newcount > 0:
642 result[elem] = newcount
643 for elem, count in other.items():
644 if elem not in self and count > 0:
645 result[elem] = count
646 return result
647
648 def __and__(self, other):
649 ''' Intersection is the minimum of corresponding counts.
650
651 >>> Counter('abbb') & Counter('bcc')
652 Counter({'b': 1})
653
654 '''
655 if not isinstance(other, Counter):
656 return NotImplemented
657 result = Counter()
658 for elem, count in self.items():
659 other_count = other[elem]
660 newcount = count if count < other_count else other_count
661 if newcount > 0:
662 result[elem] = newcount
663 return result
664
665
666if __name__ == '__main__':
667 # verify that instances can be pickled
668 from cPickle import loads, dumps
669 Point = namedtuple('Point', 'x, y', True)
670 p = Point(x=10, y=20)
671 assert p == loads(dumps(p))
672
673 # test and demonstrate ability to override methods
674 class Point(namedtuple('Point', 'x y')):
675 __slots__ = ()
676 @property
677 def hypot(self):
678 return (self.x ** 2 + self.y ** 2) ** 0.5
679 def __str__(self):
680 return 'Point: x=%6.3f y=%6.3f hypot=%6.3f' % (self.x, self.y, self.hypot)
681
682 for p in Point(3, 4), Point(14, 5/7.):
683 print p
684
685 class Point(namedtuple('Point', 'x y')):
686 'Point class with optimized _make() and _replace() without error-checking'
687 __slots__ = ()
688 _make = classmethod(tuple.__new__)
689 def _replace(self, _map=map, **kwds):
690 return self._make(_map(kwds.get, ('x', 'y'), self))
691
692 print Point(11, 22)._replace(x=100)
693
694 Point3D = namedtuple('Point3D', Point._fields + ('z',))
695 print Point3D.__doc__
696
697 import doctest
698 TestResults = namedtuple('TestResults', 'failed attempted')
699 print TestResults(*doctest.testmod())
Note: See TracBrowser for help on using the repository browser.