source: python/trunk/Lib/sets.py@ 608

Last change on this file since 608 was 391, checked in by dmik, 11 years ago

python: Merge vendor 2.7.6 to trunk.

  • Property svn:eol-style set to native
File size: 18.6 KB
Line 
1"""Classes to represent arbitrary sets (including sets of sets).
2
3This module implements sets using dictionaries whose values are
4ignored. The usual operations (union, intersection, deletion, etc.)
5are provided as both methods and operators.
6
7Important: sets are not sequences! While they support 'x in s',
8'len(s)', and 'for x in s', none of those operations are unique for
9sequences; for example, mappings support all three as well. The
10characteristic operation for sequences is subscripting with small
11integers: s[i], for i in range(len(s)). Sets don't support
12subscripting at all. Also, sequences allow multiple occurrences and
13their elements have a definite order; sets on the other hand don't
14record multiple occurrences and don't remember the order of element
15insertion (which is why they don't support s[i]).
16
17The following classes are provided:
18
19BaseSet -- All the operations common to both mutable and immutable
20 sets. This is an abstract class, not meant to be directly
21 instantiated.
22
23Set -- Mutable sets, subclass of BaseSet; not hashable.
24
25ImmutableSet -- Immutable sets, subclass of BaseSet; hashable.
26 An iterable argument is mandatory to create an ImmutableSet.
27
28_TemporarilyImmutableSet -- A wrapper around a Set, hashable,
29 giving the same hash value as the immutable set equivalent
30 would have. Do not use this class directly.
31
32Only hashable objects can be added to a Set. In particular, you cannot
33really add a Set as an element to another Set; if you try, what is
34actually added is an ImmutableSet built from it (it compares equal to
35the one you tried adding).
36
37When you ask if `x in y' where x is a Set and y is a Set or
38ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and
39what's tested is actually `z in y'.
40
41"""
42
43# Code history:
44#
45# - Greg V. Wilson wrote the first version, using a different approach
46# to the mutable/immutable problem, and inheriting from dict.
47#
48# - Alex Martelli modified Greg's version to implement the current
49# Set/ImmutableSet approach, and make the data an attribute.
50#
51# - Guido van Rossum rewrote much of the code, made some API changes,
52# and cleaned up the docstrings.
53#
54# - Raymond Hettinger added a number of speedups and other
55# improvements.
56
57from itertools import ifilter, ifilterfalse
58
59__all__ = ['BaseSet', 'Set', 'ImmutableSet']
60
61import warnings
62warnings.warn("the sets module is deprecated", DeprecationWarning,
63 stacklevel=2)
64
65class BaseSet(object):
66 """Common base class for mutable and immutable sets."""
67
68 __slots__ = ['_data']
69
70 # Constructor
71
72 def __init__(self):
73 """This is an abstract class."""
74 # Don't call this from a concrete subclass!
75 if self.__class__ is BaseSet:
76 raise TypeError, ("BaseSet is an abstract class. "
77 "Use Set or ImmutableSet.")
78
79 # Standard protocols: __len__, __repr__, __str__, __iter__
80
81 def __len__(self):
82 """Return the number of elements of a set."""
83 return len(self._data)
84
85 def __repr__(self):
86 """Return string representation of a set.
87
88 This looks like 'Set([<list of elements>])'.
89 """
90 return self._repr()
91
92 # __str__ is the same as __repr__
93 __str__ = __repr__
94
95 def _repr(self, sorted=False):
96 elements = self._data.keys()
97 if sorted:
98 elements.sort()
99 return '%s(%r)' % (self.__class__.__name__, elements)
100
101 def __iter__(self):
102 """Return an iterator over the elements or a set.
103
104 This is the keys iterator for the underlying dict.
105 """
106 return self._data.iterkeys()
107
108 # Three-way comparison is not supported. However, because __eq__ is
109 # tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and
110 # then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this
111 # case).
112
113 def __cmp__(self, other):
114 raise TypeError, "can't compare sets using cmp()"
115
116 # Equality comparisons using the underlying dicts. Mixed-type comparisons
117 # are allowed here, where Set == z for non-Set z always returns False,
118 # and Set != z always True. This allows expressions like "x in y" to
119 # give the expected result when y is a sequence of mixed types, not
120 # raising a pointless TypeError just because y contains a Set, or x is
121 # a Set and y contain's a non-set ("in" invokes only __eq__).
122 # Subtle: it would be nicer if __eq__ and __ne__ could return
123 # NotImplemented instead of True or False. Then the other comparand
124 # would get a chance to determine the result, and if the other comparand
125 # also returned NotImplemented then it would fall back to object address
126 # comparison (which would always return False for __eq__ and always
127 # True for __ne__). However, that doesn't work, because this type
128 # *also* implements __cmp__: if, e.g., __eq__ returns NotImplemented,
129 # Python tries __cmp__ next, and the __cmp__ here then raises TypeError.
130
131 def __eq__(self, other):
132 if isinstance(other, BaseSet):
133 return self._data == other._data
134 else:
135 return False
136
137 def __ne__(self, other):
138 if isinstance(other, BaseSet):
139 return self._data != other._data
140 else:
141 return True
142
143 # Copying operations
144
145 def copy(self):
146 """Return a shallow copy of a set."""
147 result = self.__class__()
148 result._data.update(self._data)
149 return result
150
151 __copy__ = copy # For the copy module
152
153 def __deepcopy__(self, memo):
154 """Return a deep copy of a set; used by copy module."""
155 # This pre-creates the result and inserts it in the memo
156 # early, in case the deep copy recurses into another reference
157 # to this same set. A set can't be an element of itself, but
158 # it can certainly contain an object that has a reference to
159 # itself.
160 from copy import deepcopy
161 result = self.__class__()
162 memo[id(self)] = result
163 data = result._data
164 value = True
165 for elt in self:
166 data[deepcopy(elt, memo)] = value
167 return result
168
169 # Standard set operations: union, intersection, both differences.
170 # Each has an operator version (e.g. __or__, invoked with |) and a
171 # method version (e.g. union).
172 # Subtle: Each pair requires distinct code so that the outcome is
173 # correct when the type of other isn't suitable. For example, if
174 # we did "union = __or__" instead, then Set().union(3) would return
175 # NotImplemented instead of raising TypeError (albeit that *why* it
176 # raises TypeError as-is is also a bit subtle).
177
178 def __or__(self, other):
179 """Return the union of two sets as a new set.
180
181 (I.e. all elements that are in either set.)
182 """
183 if not isinstance(other, BaseSet):
184 return NotImplemented
185 return self.union(other)
186
187 def union(self, other):
188 """Return the union of two sets as a new set.
189
190 (I.e. all elements that are in either set.)
191 """
192 result = self.__class__(self)
193 result._update(other)
194 return result
195
196 def __and__(self, other):
197 """Return the intersection of two sets as a new set.
198
199 (I.e. all elements that are in both sets.)
200 """
201 if not isinstance(other, BaseSet):
202 return NotImplemented
203 return self.intersection(other)
204
205 def intersection(self, other):
206 """Return the intersection of two sets as a new set.
207
208 (I.e. all elements that are in both sets.)
209 """
210 if not isinstance(other, BaseSet):
211 other = Set(other)
212 if len(self) <= len(other):
213 little, big = self, other
214 else:
215 little, big = other, self
216 common = ifilter(big._data.__contains__, little)
217 return self.__class__(common)
218
219 def __xor__(self, other):
220 """Return the symmetric difference of two sets as a new set.
221
222 (I.e. all elements that are in exactly one of the sets.)
223 """
224 if not isinstance(other, BaseSet):
225 return NotImplemented
226 return self.symmetric_difference(other)
227
228 def symmetric_difference(self, other):
229 """Return the symmetric difference of two sets as a new set.
230
231 (I.e. all elements that are in exactly one of the sets.)
232 """
233 result = self.__class__()
234 data = result._data
235 value = True
236 selfdata = self._data
237 try:
238 otherdata = other._data
239 except AttributeError:
240 otherdata = Set(other)._data
241 for elt in ifilterfalse(otherdata.__contains__, selfdata):
242 data[elt] = value
243 for elt in ifilterfalse(selfdata.__contains__, otherdata):
244 data[elt] = value
245 return result
246
247 def __sub__(self, other):
248 """Return the difference of two sets as a new Set.
249
250 (I.e. all elements that are in this set and not in the other.)
251 """
252 if not isinstance(other, BaseSet):
253 return NotImplemented
254 return self.difference(other)
255
256 def difference(self, other):
257 """Return the difference of two sets as a new Set.
258
259 (I.e. all elements that are in this set and not in the other.)
260 """
261 result = self.__class__()
262 data = result._data
263 try:
264 otherdata = other._data
265 except AttributeError:
266 otherdata = Set(other)._data
267 value = True
268 for elt in ifilterfalse(otherdata.__contains__, self):
269 data[elt] = value
270 return result
271
272 # Membership test
273
274 def __contains__(self, element):
275 """Report whether an element is a member of a set.
276
277 (Called in response to the expression `element in self'.)
278 """
279 try:
280 return element in self._data
281 except TypeError:
282 transform = getattr(element, "__as_temporarily_immutable__", None)
283 if transform is None:
284 raise # re-raise the TypeError exception we caught
285 return transform() in self._data
286
287 # Subset and superset test
288
289 def issubset(self, other):
290 """Report whether another set contains this set."""
291 self._binary_sanity_check(other)
292 if len(self) > len(other): # Fast check for obvious cases
293 return False
294 for elt in ifilterfalse(other._data.__contains__, self):
295 return False
296 return True
297
298 def issuperset(self, other):
299 """Report whether this set contains another set."""
300 self._binary_sanity_check(other)
301 if len(self) < len(other): # Fast check for obvious cases
302 return False
303 for elt in ifilterfalse(self._data.__contains__, other):
304 return False
305 return True
306
307 # Inequality comparisons using the is-subset relation.
308 __le__ = issubset
309 __ge__ = issuperset
310
311 def __lt__(self, other):
312 self._binary_sanity_check(other)
313 return len(self) < len(other) and self.issubset(other)
314
315 def __gt__(self, other):
316 self._binary_sanity_check(other)
317 return len(self) > len(other) and self.issuperset(other)
318
319 # We inherit object.__hash__, so we must deny this explicitly
320 __hash__ = None
321
322 # Assorted helpers
323
324 def _binary_sanity_check(self, other):
325 # Check that the other argument to a binary operation is also
326 # a set, raising a TypeError otherwise.
327 if not isinstance(other, BaseSet):
328 raise TypeError, "Binary operation only permitted between sets"
329
330 def _compute_hash(self):
331 # Calculate hash code for a set by xor'ing the hash codes of
332 # the elements. This ensures that the hash code does not depend
333 # on the order in which elements are added to the set. This is
334 # not called __hash__ because a BaseSet should not be hashable;
335 # only an ImmutableSet is hashable.
336 result = 0
337 for elt in self:
338 result ^= hash(elt)
339 return result
340
341 def _update(self, iterable):
342 # The main loop for update() and the subclass __init__() methods.
343 data = self._data
344
345 # Use the fast update() method when a dictionary is available.
346 if isinstance(iterable, BaseSet):
347 data.update(iterable._data)
348 return
349
350 value = True
351
352 if type(iterable) in (list, tuple, xrange):
353 # Optimized: we know that __iter__() and next() can't
354 # raise TypeError, so we can move 'try:' out of the loop.
355 it = iter(iterable)
356 while True:
357 try:
358 for element in it:
359 data[element] = value
360 return
361 except TypeError:
362 transform = getattr(element, "__as_immutable__", None)
363 if transform is None:
364 raise # re-raise the TypeError exception we caught
365 data[transform()] = value
366 else:
367 # Safe: only catch TypeError where intended
368 for element in iterable:
369 try:
370 data[element] = value
371 except TypeError:
372 transform = getattr(element, "__as_immutable__", None)
373 if transform is None:
374 raise # re-raise the TypeError exception we caught
375 data[transform()] = value
376
377
378class ImmutableSet(BaseSet):
379 """Immutable set class."""
380
381 __slots__ = ['_hashcode']
382
383 # BaseSet + hashing
384
385 def __init__(self, iterable=None):
386 """Construct an immutable set from an optional iterable."""
387 self._hashcode = None
388 self._data = {}
389 if iterable is not None:
390 self._update(iterable)
391
392 def __hash__(self):
393 if self._hashcode is None:
394 self._hashcode = self._compute_hash()
395 return self._hashcode
396
397 def __getstate__(self):
398 return self._data, self._hashcode
399
400 def __setstate__(self, state):
401 self._data, self._hashcode = state
402
403class Set(BaseSet):
404 """ Mutable set class."""
405
406 __slots__ = []
407
408 # BaseSet + operations requiring mutability; no hashing
409
410 def __init__(self, iterable=None):
411 """Construct a set from an optional iterable."""
412 self._data = {}
413 if iterable is not None:
414 self._update(iterable)
415
416 def __getstate__(self):
417 # getstate's results are ignored if it is not
418 return self._data,
419
420 def __setstate__(self, data):
421 self._data, = data
422
423 # In-place union, intersection, differences.
424 # Subtle: The xyz_update() functions deliberately return None,
425 # as do all mutating operations on built-in container types.
426 # The __xyz__ spellings have to return self, though.
427
428 def __ior__(self, other):
429 """Update a set with the union of itself and another."""
430 self._binary_sanity_check(other)
431 self._data.update(other._data)
432 return self
433
434 def union_update(self, other):
435 """Update a set with the union of itself and another."""
436 self._update(other)
437
438 def __iand__(self, other):
439 """Update a set with the intersection of itself and another."""
440 self._binary_sanity_check(other)
441 self._data = (self & other)._data
442 return self
443
444 def intersection_update(self, other):
445 """Update a set with the intersection of itself and another."""
446 if isinstance(other, BaseSet):
447 self &= other
448 else:
449 self._data = (self.intersection(other))._data
450
451 def __ixor__(self, other):
452 """Update a set with the symmetric difference of itself and another."""
453 self._binary_sanity_check(other)
454 self.symmetric_difference_update(other)
455 return self
456
457 def symmetric_difference_update(self, other):
458 """Update a set with the symmetric difference of itself and another."""
459 data = self._data
460 value = True
461 if not isinstance(other, BaseSet):
462 other = Set(other)
463 if self is other:
464 self.clear()
465 for elt in other:
466 if elt in data:
467 del data[elt]
468 else:
469 data[elt] = value
470
471 def __isub__(self, other):
472 """Remove all elements of another set from this set."""
473 self._binary_sanity_check(other)
474 self.difference_update(other)
475 return self
476
477 def difference_update(self, other):
478 """Remove all elements of another set from this set."""
479 data = self._data
480 if not isinstance(other, BaseSet):
481 other = Set(other)
482 if self is other:
483 self.clear()
484 for elt in ifilter(data.__contains__, other):
485 del data[elt]
486
487 # Python dict-like mass mutations: update, clear
488
489 def update(self, iterable):
490 """Add all values from an iterable (such as a list or file)."""
491 self._update(iterable)
492
493 def clear(self):
494 """Remove all elements from this set."""
495 self._data.clear()
496
497 # Single-element mutations: add, remove, discard
498
499 def add(self, element):
500 """Add an element to a set.
501
502 This has no effect if the element is already present.
503 """
504 try:
505 self._data[element] = True
506 except TypeError:
507 transform = getattr(element, "__as_immutable__", None)
508 if transform is None:
509 raise # re-raise the TypeError exception we caught
510 self._data[transform()] = True
511
512 def remove(self, element):
513 """Remove an element from a set; it must be a member.
514
515 If the element is not a member, raise a KeyError.
516 """
517 try:
518 del self._data[element]
519 except TypeError:
520 transform = getattr(element, "__as_temporarily_immutable__", None)
521 if transform is None:
522 raise # re-raise the TypeError exception we caught
523 del self._data[transform()]
524
525 def discard(self, element):
526 """Remove an element from a set if it is a member.
527
528 If the element is not a member, do nothing.
529 """
530 try:
531 self.remove(element)
532 except KeyError:
533 pass
534
535 def pop(self):
536 """Remove and return an arbitrary set element."""
537 return self._data.popitem()[0]
538
539 def __as_immutable__(self):
540 # Return a copy of self as an immutable set
541 return ImmutableSet(self)
542
543 def __as_temporarily_immutable__(self):
544 # Return self wrapped in a temporarily immutable set
545 return _TemporarilyImmutableSet(self)
546
547
548class _TemporarilyImmutableSet(BaseSet):
549 # Wrap a mutable set as if it was temporarily immutable.
550 # This only supplies hashing and equality comparisons.
551
552 def __init__(self, set):
553 self._set = set
554 self._data = set._data # Needed by ImmutableSet.__eq__()
555
556 def __hash__(self):
557 return self._set._compute_hash()
Note: See TracBrowser for help on using the repository browser.