source: python/trunk/Lib/sets.py@ 12

Last change on this file since 12 was 2, checked in by Yuri Dario, 15 years ago

Initial import for vendor code.

  • Property svn:eol-style set to native
File size: 19.2 KB
Line 
1"""Classes to represent arbitrary sets (including sets of sets).
2
3This module implements sets using dictionaries whose values are
4ignored. The usual operations (union, intersection, deletion, etc.)
5are provided as both methods and operators.
6
7Important: sets are not sequences! While they support 'x in s',
8'len(s)', and 'for x in s', none of those operations are unique for
9sequences; for example, mappings support all three as well. The
10characteristic operation for sequences is subscripting with small
11integers: s[i], for i in range(len(s)). Sets don't support
12subscripting at all. Also, sequences allow multiple occurrences and
13their elements have a definite order; sets on the other hand don't
14record multiple occurrences and don't remember the order of element
15insertion (which is why they don't support s[i]).
16
17The following classes are provided:
18
19BaseSet -- All the operations common to both mutable and immutable
20 sets. This is an abstract class, not meant to be directly
21 instantiated.
22
23Set -- Mutable sets, subclass of BaseSet; not hashable.
24
25ImmutableSet -- Immutable sets, subclass of BaseSet; hashable.
26 An iterable argument is mandatory to create an ImmutableSet.
27
28_TemporarilyImmutableSet -- A wrapper around a Set, hashable,
29 giving the same hash value as the immutable set equivalent
30 would have. Do not use this class directly.
31
32Only hashable objects can be added to a Set. In particular, you cannot
33really add a Set as an element to another Set; if you try, what is
34actually added is an ImmutableSet built from it (it compares equal to
35the one you tried adding).
36
37When you ask if `x in y' where x is a Set and y is a Set or
38ImmutableSet, x is wrapped into a _TemporarilyImmutableSet z, and
39what's tested is actually `z in y'.
40
41"""
42
43# Code history:
44#
45# - Greg V. Wilson wrote the first version, using a different approach
46# to the mutable/immutable problem, and inheriting from dict.
47#
48# - Alex Martelli modified Greg's version to implement the current
49# Set/ImmutableSet approach, and make the data an attribute.
50#
51# - Guido van Rossum rewrote much of the code, made some API changes,
52# and cleaned up the docstrings.
53#
54# - Raymond Hettinger added a number of speedups and other
55# improvements.
56
57from __future__ import generators
58try:
59 from itertools import ifilter, ifilterfalse
60except ImportError:
61 # Code to make the module run under Py2.2
62 def ifilter(predicate, iterable):
63 if predicate is None:
64 def predicate(x):
65 return x
66 for x in iterable:
67 if predicate(x):
68 yield x
69 def ifilterfalse(predicate, iterable):
70 if predicate is None:
71 def predicate(x):
72 return x
73 for x in iterable:
74 if not predicate(x):
75 yield x
76 try:
77 True, False
78 except NameError:
79 True, False = (0==0, 0!=0)
80
81__all__ = ['BaseSet', 'Set', 'ImmutableSet']
82
83import warnings
84warnings.warn("the sets module is deprecated", DeprecationWarning,
85 stacklevel=2)
86
87class BaseSet(object):
88 """Common base class for mutable and immutable sets."""
89
90 __slots__ = ['_data']
91
92 # Constructor
93
94 def __init__(self):
95 """This is an abstract class."""
96 # Don't call this from a concrete subclass!
97 if self.__class__ is BaseSet:
98 raise TypeError, ("BaseSet is an abstract class. "
99 "Use Set or ImmutableSet.")
100
101 # Standard protocols: __len__, __repr__, __str__, __iter__
102
103 def __len__(self):
104 """Return the number of elements of a set."""
105 return len(self._data)
106
107 def __repr__(self):
108 """Return string representation of a set.
109
110 This looks like 'Set([<list of elements>])'.
111 """
112 return self._repr()
113
114 # __str__ is the same as __repr__
115 __str__ = __repr__
116
117 def _repr(self, sorted=False):
118 elements = self._data.keys()
119 if sorted:
120 elements.sort()
121 return '%s(%r)' % (self.__class__.__name__, elements)
122
123 def __iter__(self):
124 """Return an iterator over the elements or a set.
125
126 This is the keys iterator for the underlying dict.
127 """
128 return self._data.iterkeys()
129
130 # Three-way comparison is not supported. However, because __eq__ is
131 # tried before __cmp__, if Set x == Set y, x.__eq__(y) returns True and
132 # then cmp(x, y) returns 0 (Python doesn't actually call __cmp__ in this
133 # case).
134
135 def __cmp__(self, other):
136 raise TypeError, "can't compare sets using cmp()"
137
138 # Equality comparisons using the underlying dicts. Mixed-type comparisons
139 # are allowed here, where Set == z for non-Set z always returns False,
140 # and Set != z always True. This allows expressions like "x in y" to
141 # give the expected result when y is a sequence of mixed types, not
142 # raising a pointless TypeError just because y contains a Set, or x is
143 # a Set and y contain's a non-set ("in" invokes only __eq__).
144 # Subtle: it would be nicer if __eq__ and __ne__ could return
145 # NotImplemented instead of True or False. Then the other comparand
146 # would get a chance to determine the result, and if the other comparand
147 # also returned NotImplemented then it would fall back to object address
148 # comparison (which would always return False for __eq__ and always
149 # True for __ne__). However, that doesn't work, because this type
150 # *also* implements __cmp__: if, e.g., __eq__ returns NotImplemented,
151 # Python tries __cmp__ next, and the __cmp__ here then raises TypeError.
152
153 def __eq__(self, other):
154 if isinstance(other, BaseSet):
155 return self._data == other._data
156 else:
157 return False
158
159 def __ne__(self, other):
160 if isinstance(other, BaseSet):
161 return self._data != other._data
162 else:
163 return True
164
165 # Copying operations
166
167 def copy(self):
168 """Return a shallow copy of a set."""
169 result = self.__class__()
170 result._data.update(self._data)
171 return result
172
173 __copy__ = copy # For the copy module
174
175 def __deepcopy__(self, memo):
176 """Return a deep copy of a set; used by copy module."""
177 # This pre-creates the result and inserts it in the memo
178 # early, in case the deep copy recurses into another reference
179 # to this same set. A set can't be an element of itself, but
180 # it can certainly contain an object that has a reference to
181 # itself.
182 from copy import deepcopy
183 result = self.__class__()
184 memo[id(self)] = result
185 data = result._data
186 value = True
187 for elt in self:
188 data[deepcopy(elt, memo)] = value
189 return result
190
191 # Standard set operations: union, intersection, both differences.
192 # Each has an operator version (e.g. __or__, invoked with |) and a
193 # method version (e.g. union).
194 # Subtle: Each pair requires distinct code so that the outcome is
195 # correct when the type of other isn't suitable. For example, if
196 # we did "union = __or__" instead, then Set().union(3) would return
197 # NotImplemented instead of raising TypeError (albeit that *why* it
198 # raises TypeError as-is is also a bit subtle).
199
200 def __or__(self, other):
201 """Return the union of two sets as a new set.
202
203 (I.e. all elements that are in either set.)
204 """
205 if not isinstance(other, BaseSet):
206 return NotImplemented
207 return self.union(other)
208
209 def union(self, other):
210 """Return the union of two sets as a new set.
211
212 (I.e. all elements that are in either set.)
213 """
214 result = self.__class__(self)
215 result._update(other)
216 return result
217
218 def __and__(self, other):
219 """Return the intersection of two sets as a new set.
220
221 (I.e. all elements that are in both sets.)
222 """
223 if not isinstance(other, BaseSet):
224 return NotImplemented
225 return self.intersection(other)
226
227 def intersection(self, other):
228 """Return the intersection of two sets as a new set.
229
230 (I.e. all elements that are in both sets.)
231 """
232 if not isinstance(other, BaseSet):
233 other = Set(other)
234 if len(self) <= len(other):
235 little, big = self, other
236 else:
237 little, big = other, self
238 common = ifilter(big._data.has_key, little)
239 return self.__class__(common)
240
241 def __xor__(self, other):
242 """Return the symmetric difference of two sets as a new set.
243
244 (I.e. all elements that are in exactly one of the sets.)
245 """
246 if not isinstance(other, BaseSet):
247 return NotImplemented
248 return self.symmetric_difference(other)
249
250 def symmetric_difference(self, other):
251 """Return the symmetric difference of two sets as a new set.
252
253 (I.e. all elements that are in exactly one of the sets.)
254 """
255 result = self.__class__()
256 data = result._data
257 value = True
258 selfdata = self._data
259 try:
260 otherdata = other._data
261 except AttributeError:
262 otherdata = Set(other)._data
263 for elt in ifilterfalse(otherdata.has_key, selfdata):
264 data[elt] = value
265 for elt in ifilterfalse(selfdata.has_key, otherdata):
266 data[elt] = value
267 return result
268
269 def __sub__(self, other):
270 """Return the difference of two sets as a new Set.
271
272 (I.e. all elements that are in this set and not in the other.)
273 """
274 if not isinstance(other, BaseSet):
275 return NotImplemented
276 return self.difference(other)
277
278 def difference(self, other):
279 """Return the difference of two sets as a new Set.
280
281 (I.e. all elements that are in this set and not in the other.)
282 """
283 result = self.__class__()
284 data = result._data
285 try:
286 otherdata = other._data
287 except AttributeError:
288 otherdata = Set(other)._data
289 value = True
290 for elt in ifilterfalse(otherdata.has_key, self):
291 data[elt] = value
292 return result
293
294 # Membership test
295
296 def __contains__(self, element):
297 """Report whether an element is a member of a set.
298
299 (Called in response to the expression `element in self'.)
300 """
301 try:
302 return element in self._data
303 except TypeError:
304 transform = getattr(element, "__as_temporarily_immutable__", None)
305 if transform is None:
306 raise # re-raise the TypeError exception we caught
307 return transform() in self._data
308
309 # Subset and superset test
310
311 def issubset(self, other):
312 """Report whether another set contains this set."""
313 self._binary_sanity_check(other)
314 if len(self) > len(other): # Fast check for obvious cases
315 return False
316 for elt in ifilterfalse(other._data.has_key, self):
317 return False
318 return True
319
320 def issuperset(self, other):
321 """Report whether this set contains another set."""
322 self._binary_sanity_check(other)
323 if len(self) < len(other): # Fast check for obvious cases
324 return False
325 for elt in ifilterfalse(self._data.has_key, other):
326 return False
327 return True
328
329 # Inequality comparisons using the is-subset relation.
330 __le__ = issubset
331 __ge__ = issuperset
332
333 def __lt__(self, other):
334 self._binary_sanity_check(other)
335 return len(self) < len(other) and self.issubset(other)
336
337 def __gt__(self, other):
338 self._binary_sanity_check(other)
339 return len(self) > len(other) and self.issuperset(other)
340
341 # Assorted helpers
342
343 def _binary_sanity_check(self, other):
344 # Check that the other argument to a binary operation is also
345 # a set, raising a TypeError otherwise.
346 if not isinstance(other, BaseSet):
347 raise TypeError, "Binary operation only permitted between sets"
348
349 def _compute_hash(self):
350 # Calculate hash code for a set by xor'ing the hash codes of
351 # the elements. This ensures that the hash code does not depend
352 # on the order in which elements are added to the set. This is
353 # not called __hash__ because a BaseSet should not be hashable;
354 # only an ImmutableSet is hashable.
355 result = 0
356 for elt in self:
357 result ^= hash(elt)
358 return result
359
360 def _update(self, iterable):
361 # The main loop for update() and the subclass __init__() methods.
362 data = self._data
363
364 # Use the fast update() method when a dictionary is available.
365 if isinstance(iterable, BaseSet):
366 data.update(iterable._data)
367 return
368
369 value = True
370
371 if type(iterable) in (list, tuple, xrange):
372 # Optimized: we know that __iter__() and next() can't
373 # raise TypeError, so we can move 'try:' out of the loop.
374 it = iter(iterable)
375 while True:
376 try:
377 for element in it:
378 data[element] = value
379 return
380 except TypeError:
381 transform = getattr(element, "__as_immutable__", None)
382 if transform is None:
383 raise # re-raise the TypeError exception we caught
384 data[transform()] = value
385 else:
386 # Safe: only catch TypeError where intended
387 for element in iterable:
388 try:
389 data[element] = value
390 except TypeError:
391 transform = getattr(element, "__as_immutable__", None)
392 if transform is None:
393 raise # re-raise the TypeError exception we caught
394 data[transform()] = value
395
396
397class ImmutableSet(BaseSet):
398 """Immutable set class."""
399
400 __slots__ = ['_hashcode']
401
402 # BaseSet + hashing
403
404 def __init__(self, iterable=None):
405 """Construct an immutable set from an optional iterable."""
406 self._hashcode = None
407 self._data = {}
408 if iterable is not None:
409 self._update(iterable)
410
411 def __hash__(self):
412 if self._hashcode is None:
413 self._hashcode = self._compute_hash()
414 return self._hashcode
415
416 def __getstate__(self):
417 return self._data, self._hashcode
418
419 def __setstate__(self, state):
420 self._data, self._hashcode = state
421
422class Set(BaseSet):
423 """ Mutable set class."""
424
425 __slots__ = []
426
427 # BaseSet + operations requiring mutability; no hashing
428
429 def __init__(self, iterable=None):
430 """Construct a set from an optional iterable."""
431 self._data = {}
432 if iterable is not None:
433 self._update(iterable)
434
435 def __getstate__(self):
436 # getstate's results are ignored if it is not
437 return self._data,
438
439 def __setstate__(self, data):
440 self._data, = data
441
442 # We inherit object.__hash__, so we must deny this explicitly
443 __hash__ = None
444
445 # In-place union, intersection, differences.
446 # Subtle: The xyz_update() functions deliberately return None,
447 # as do all mutating operations on built-in container types.
448 # The __xyz__ spellings have to return self, though.
449
450 def __ior__(self, other):
451 """Update a set with the union of itself and another."""
452 self._binary_sanity_check(other)
453 self._data.update(other._data)
454 return self
455
456 def union_update(self, other):
457 """Update a set with the union of itself and another."""
458 self._update(other)
459
460 def __iand__(self, other):
461 """Update a set with the intersection of itself and another."""
462 self._binary_sanity_check(other)
463 self._data = (self & other)._data
464 return self
465
466 def intersection_update(self, other):
467 """Update a set with the intersection of itself and another."""
468 if isinstance(other, BaseSet):
469 self &= other
470 else:
471 self._data = (self.intersection(other))._data
472
473 def __ixor__(self, other):
474 """Update a set with the symmetric difference of itself and another."""
475 self._binary_sanity_check(other)
476 self.symmetric_difference_update(other)
477 return self
478
479 def symmetric_difference_update(self, other):
480 """Update a set with the symmetric difference of itself and another."""
481 data = self._data
482 value = True
483 if not isinstance(other, BaseSet):
484 other = Set(other)
485 if self is other:
486 self.clear()
487 for elt in other:
488 if elt in data:
489 del data[elt]
490 else:
491 data[elt] = value
492
493 def __isub__(self, other):
494 """Remove all elements of another set from this set."""
495 self._binary_sanity_check(other)
496 self.difference_update(other)
497 return self
498
499 def difference_update(self, other):
500 """Remove all elements of another set from this set."""
501 data = self._data
502 if not isinstance(other, BaseSet):
503 other = Set(other)
504 if self is other:
505 self.clear()
506 for elt in ifilter(data.has_key, other):
507 del data[elt]
508
509 # Python dict-like mass mutations: update, clear
510
511 def update(self, iterable):
512 """Add all values from an iterable (such as a list or file)."""
513 self._update(iterable)
514
515 def clear(self):
516 """Remove all elements from this set."""
517 self._data.clear()
518
519 # Single-element mutations: add, remove, discard
520
521 def add(self, element):
522 """Add an element to a set.
523
524 This has no effect if the element is already present.
525 """
526 try:
527 self._data[element] = True
528 except TypeError:
529 transform = getattr(element, "__as_immutable__", None)
530 if transform is None:
531 raise # re-raise the TypeError exception we caught
532 self._data[transform()] = True
533
534 def remove(self, element):
535 """Remove an element from a set; it must be a member.
536
537 If the element is not a member, raise a KeyError.
538 """
539 try:
540 del self._data[element]
541 except TypeError:
542 transform = getattr(element, "__as_temporarily_immutable__", None)
543 if transform is None:
544 raise # re-raise the TypeError exception we caught
545 del self._data[transform()]
546
547 def discard(self, element):
548 """Remove an element from a set if it is a member.
549
550 If the element is not a member, do nothing.
551 """
552 try:
553 self.remove(element)
554 except KeyError:
555 pass
556
557 def pop(self):
558 """Remove and return an arbitrary set element."""
559 return self._data.popitem()[0]
560
561 def __as_immutable__(self):
562 # Return a copy of self as an immutable set
563 return ImmutableSet(self)
564
565 def __as_temporarily_immutable__(self):
566 # Return self wrapped in a temporarily immutable set
567 return _TemporarilyImmutableSet(self)
568
569
570class _TemporarilyImmutableSet(BaseSet):
571 # Wrap a mutable set as if it was temporarily immutable.
572 # This only supplies hashing and equality comparisons.
573
574 def __init__(self, set):
575 self._set = set
576 self._data = set._data # Needed by ImmutableSet.__eq__()
577
578 def __hash__(self):
579 return self._set._compute_hash()
Note: See TracBrowser for help on using the repository browser.