Context Navigation

← Previous Change
Next Change →

difflib.py

Timestamp:

Mar 19, 2014, 11:11:30 AM (11 years ago)

Author:

dmik

Message:

python: Update vendor to 2.7.6.

File:

: 1 edited

python/vendor/current/Lib/difflib.py (modified) (16 diffs)

Legend:

: Unmodified
: Added
: Removed

python/vendor/current/Lib/difflib.py

-              r2
+              r388
     """
     def __init__(self, isjunk=None, a='', b=''):
+    def __init__(self, isjunk=None, a='', b='', autojunk=True):
         """Construct a SequenceMatcher.
 …
         default, an empty string.  The elements of b must be hashable. See
         also .set_seqs() and .set_seq2().
+        Optional arg autojunk should be set to False to disable the
+        "automatic junk heuristic" that treats popular elements as junk
+        (see module documentation for more information).
         """
 …
         # isbpopular
         #      for x in b, isbpopular(x) is true iff b is reasonably long
+        #      (at least 200 elements) and x accounts for more than 1% of
+        #      its elements.  DOES NOT WORK for x in a!
+        #      (at least 200 elements) and x accounts for more than 1 + 1% of
+        #      its elements (when autojunk is enabled).
+        #      DOES NOT WORK for x in a!
         self.isjunk = isjunk
         self.a = self.b = None
+        self.autojunk = autojunk
         self.set_seqs(a, b)
 …
     # also creates the fast isbjunk function ...
     # b2j also does not contain entries for "popular" elements, meaning
     # elements that account for more than 1% of the total elements, and
+    # elements that account for more than 1 + 1% of the total elements, and
     # when the sequence is reasonably large (>= 200 elements); this can
     # be viewed as an adaptive notion of semi-junk, and yields an enormous
 …
         # from the start.
         b = self.b
+        self.b2j = b2j = {}
+        for i, elt in enumerate(b):
+            indices = b2j.setdefault(elt, [])
+            indices.append(i)
+        # Purge junk elements
+        junk = set()
+        isjunk = self.isjunk
+        if isjunk:
+            for elt in list(b2j.keys()):  # using list() since b2j is modified
+                if isjunk(elt):
+                    junk.add(elt)
+                    del b2j[elt]
+        # Purge popular elements that are not junk
+        popular = set()
         n = len(b)
+        self.b2j = b2j = {}
+        populardict = {}
+        for i, elt in enumerate(b):
+            if elt in b2j:
+                indices = b2j[elt]
+                if n >= 200 and len(indices) * 100 > n:
+                    populardict[elt] = 1
+                    del indices[:]
+                else:
+                    indices.append(i)
+            else:
+                b2j[elt] = [i]
+        # Purge leftover indices for popular elements.
+        for elt in populardict:
+            del b2j[elt]
+        # Now b2j.keys() contains elements uniquely, and especially when
+        # the sequence is a string, that's usually a good deal smaller
+        # than len(string).  The difference is the number of isjunk calls
+        # saved.
+        isjunk = self.isjunk
+        junkdict = {}
+        if isjunk:
+            for d in populardict, b2j:
+                for elt in d.keys():
+                    if isjunk(elt):
+                        junkdict[elt] = 1
+                        del d[elt]
+        # Now for x in b, isjunk(x) == x in junkdict, but the
+        # latter is much faster.  Note too that while there may be a
+        # lot of junk in the sequence, the number of *unique* junk
+        # elements is probably small.  So the memory burden of keeping
+        # this dict alive is likely trivial compared to the size of b2j.
+        self.isbjunk = junkdict.__contains__
+        self.isbpopular = populardict.__contains__
+        if self.autojunk and n >= 200:
+            ntest = n // 100 + 1
+            for elt, idxs in list(b2j.items()):
+                if len(idxs) > ntest:
+                    popular.add(elt)
+                    del b2j[elt]
+        # Now for x in b, isjunk(x) == x in junk, but the latter is much faster.
+        # Sicne the number of *unique* junk elements is probably small, the
+        # memory burden of keeping this set alive is likely trivial compared to
+        # the size of b2j.
+        self.isbjunk = junk.__contains__
+        self.isbpopular = popular.__contains__
     def find_longest_match(self, alo, ahi, blo, bhi):
 …
         """ Isolate change clusters by eliminating ranges with no changes.
         Return a generator of groups with upto n lines of context.
+        Return a generator of groups with up to n lines of context.
         Each group is in the same format as returned by get_opcodes().
 …
+########################################################################
+###  Unified Diff
+########################################################################
+def _format_range_unified(start, stop):
+    'Convert range to the "ed" format'
+    # Per the diff spec at http://www.unix.org/single_unix_specification/
+    beginning = start + 1     # lines start numbering with one
+    length = stop - start
+    if length == 1:
+        return '{}'.format(beginning)
+    if not length:
+        beginning -= 1        # empty ranges begin at line just before the range
+    return '{},{}'.format(beginning, length)
 def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
                  tofiledate='', n=3, lineterm='\n'):
 …
     The unidiff format normally has a header for filenames and modification
     times.  Any or all of these may be specified using strings for
     'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.  The modification
     times are normally expressed in the format returned by time.ctime().
+    'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
+    The modification times are normally expressed in the ISO 8601 format.
     Example:
 …
     >>> for line in unified_diff('one two three four'.split(),
     ...             'zero one tree four'.split(), 'Original', 'Current',
     ...             'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
+    ...             '2005-01-26 23:30:50', '2010-04-02 10:20:52',
     ...             lineterm=''):
     ...     print line
     --- Original Sat Jan 26 23:30:50 1991
     +++ Current Fri Jun 06 10:20:52 2003
+    ...     print line                  # doctest: +NORMALIZE_WHITESPACE
+    --- Original        2005-01-26 23:30:50
+    +++ Current         2010-04-02 10:20:52
@@ -1,4 +1,4 @@
     +zero
 …
     for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
         if not started:
-            yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm)
-            yield '+++ %s %s%s' % (tofile, tofiledate, lineterm)
             started = True
+        i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
+        yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)
+            fromdate = '\t{}'.format(fromfiledate) if fromfiledate else ''
+            todate = '\t{}'.format(tofiledate) if tofiledate else ''
+            yield '--- {}{}{}'.format(fromfile, fromdate, lineterm)
+            yield '+++ {}{}{}'.format(tofile, todate, lineterm)
+        first, last = group[0], group[-1]
+        file1_range = _format_range_unified(first[1], last[2])
+        file2_range = _format_range_unified(first[3], last[4])
+        yield '@@ -{} +{} @@{}'.format(file1_range, file2_range, lineterm)
         for tag, i1, i2, j1, j2 in group:
             if tag == 'equal':
 …
                     yield ' ' + line
                 continue
             if tag == 'replace' or tag == 'delete':
+            if tag in ('replace', 'delete'):
                 for line in a[i1:i2]:
                     yield '-' + line
             if tag == 'replace' or tag == 'insert':
+            if tag in ('replace', 'insert'):
                 for line in b[j1:j2]:
                     yield '+' + line
+########################################################################
+###  Context Diff
+########################################################################
+def _format_range_context(start, stop):
+    'Convert range to the "ed" format'
+    # Per the diff spec at http://www.unix.org/single_unix_specification/
+    beginning = start + 1     # lines start numbering with one
+    length = stop - start
+    if not length:
+        beginning -= 1        # empty ranges begin at line just before the range
+    if length <= 1:
+        return '{}'.format(beginning)
+    return '{},{}'.format(beginning, beginning + length - 1)
 # See http://www.unix.org/single_unix_specification/
 …
     modification times.  Any or all of these may be specified using
     strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
     The modification times are normally expressed in the format returned
     by time.ctime().  If not specified, the strings default to blanks.
+    The modification times are normally expressed in the ISO 8601 format.
+    If not specified, the strings default to blanks.
     Example:
     >>> print ''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(1),
+    ...       'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current',
+    ...       'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:22:46 2003')),
+    *** Original Sat Jan 26 23:30:50 1991
+    --- Current Fri Jun 06 10:22:46 2003
+    ...       'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current')),
+    *** Original
+    --- Current
     ***************
     *** 1,4 ****
 …
     """
+    prefix = dict(insert='+ ', delete='- ', replace='! ', equal='  ')
     started = False
-    prefixmap = {'insert':'+ ', 'delete':'- ', 'replace':'! ', 'equal':'  '}
     for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
         if not started:
-            yield '*** %s %s%s' % (fromfile, fromfiledate, lineterm)
-            yield '--- %s %s%s' % (tofile, tofiledate, lineterm)
             started = True
+        yield '***************%s' % (lineterm,)
+        if group[-1][2] - group[0][1] >= 2:
+            yield '*** %d,%d ****%s' % (group[0][1]+1, group[-1][2], lineterm)
+        else:
+            yield '*** %d ****%s' % (group[-1][2], lineterm)
+        visiblechanges = [e for e in group if e[0] in ('replace', 'delete')]
+        if visiblechanges:
+            fromdate = '\t{}'.format(fromfiledate) if fromfiledate else ''
+            todate = '\t{}'.format(tofiledate) if tofiledate else ''
+            yield '*** {}{}{}'.format(fromfile, fromdate, lineterm)
+            yield '--- {}{}{}'.format(tofile, todate, lineterm)
+        first, last = group[0], group[-1]
+        yield '***************' + lineterm
+        file1_range = _format_range_context(first[1], last[2])
+        yield '*** {} ****{}'.format(file1_range, lineterm)
+        if any(tag in ('replace', 'delete') for tag, _, _, _, _ in group):
             for tag, i1, i2, _, _ in group:
                 if tag != 'insert':
                     for line in a[i1:i2]:
+                        yield prefixmap[tag] + line
+        if group[-1][4] - group[0][3] >= 2:
+            yield '--- %d,%d ----%s' % (group[0][3]+1, group[-1][4], lineterm)
+        else:
+            yield '--- %d ----%s' % (group[-1][4], lineterm)
+        visiblechanges = [e for e in group if e[0] in ('replace', 'insert')]
+        if visiblechanges:
+                        yield prefix[tag] + line
+        file2_range = _format_range_context(first[3], last[4])
+        yield '--- {} ----{}'.format(file2_range, lineterm)
+        if any(tag in ('replace', 'insert') for tag, _, _, _, _ in group):
             for tag, _, _, j1, j2 in group:
                 if tag != 'delete':
                     for line in b[j1:j2]:
                         yield prefixmap[tag] + line
+                        yield prefix[tag] + line
 def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK):
 …
     charjunk -- passed on to ndiff (see ndiff documentation)
     This function returns an interator which returns a tuple:
+    This function returns an iterator which returns a tuple:
     (from line tuple, to line tuple, boolean flag)
 …
             # expand tabs into spaces
             line = line.expandtabs(self._tabsize)
             # relace spaces from expanded tabs back into tab characters
+            # replace spaces from expanded tabs back into tab characters
             # (we'll replace them with markup after we do differencing)
             line = line.replace(' ','\t')
 …
         # change tabs to spaces before it gets more difficult after we insert
         # markkup
+        # markup
         fromlines,tolines = self._tab_newline_replace(fromlines,tolines)

Note: See TracChangeset for help on using the changeset viewer.

Context Navigation

Changeset 388 for python/vendor/current/Lib/difflib.py

Legend:

python/vendor/current/Lib/difflib.py

Download in other formats: