Ignore:
Timestamp:
Mar 19, 2014, 11:11:30 AM (11 years ago)
Author:
dmik
Message:

python: Update vendor to 2.7.6.

File:
1 edited

Legend:

Unmodified
Added
Removed
  • python/vendor/current/Lib/difflib.py

    r2 r388  
    152152    """
    153153
    154     def __init__(self, isjunk=None, a='', b=''):
     154    def __init__(self, isjunk=None, a='', b='', autojunk=True):
    155155        """Construct a SequenceMatcher.
    156156
     
    170170        default, an empty string.  The elements of b must be hashable. See
    171171        also .set_seqs() and .set_seq2().
     172
     173        Optional arg autojunk should be set to False to disable the
     174        "automatic junk heuristic" that treats popular elements as junk
     175        (see module documentation for more information).
    172176        """
    173177
     
    208212        # isbpopular
    209213        #      for x in b, isbpopular(x) is true iff b is reasonably long
    210         #      (at least 200 elements) and x accounts for more than 1% of
    211         #      its elements.  DOES NOT WORK for x in a!
     214        #      (at least 200 elements) and x accounts for more than 1 + 1% of
     215        #      its elements (when autojunk is enabled).
     216        #      DOES NOT WORK for x in a!
    212217
    213218        self.isjunk = isjunk
    214219        self.a = self.b = None
     220        self.autojunk = autojunk
    215221        self.set_seqs(a, b)
    216222
     
    289295    # also creates the fast isbjunk function ...
    290296    # b2j also does not contain entries for "popular" elements, meaning
    291     # elements that account for more than 1% of the total elements, and
     297    # elements that account for more than 1 + 1% of the total elements, and
    292298    # when the sequence is reasonably large (>= 200 elements); this can
    293299    # be viewed as an adaptive notion of semi-junk, and yields an enormous
     
    310316        # from the start.
    311317        b = self.b
     318        self.b2j = b2j = {}
     319
     320        for i, elt in enumerate(b):
     321            indices = b2j.setdefault(elt, [])
     322            indices.append(i)
     323
     324        # Purge junk elements
     325        junk = set()
     326        isjunk = self.isjunk
     327        if isjunk:
     328            for elt in list(b2j.keys()):  # using list() since b2j is modified
     329                if isjunk(elt):
     330                    junk.add(elt)
     331                    del b2j[elt]
     332
     333        # Purge popular elements that are not junk
     334        popular = set()
    312335        n = len(b)
    313         self.b2j = b2j = {}
    314         populardict = {}
    315         for i, elt in enumerate(b):
    316             if elt in b2j:
    317                 indices = b2j[elt]
    318                 if n >= 200 and len(indices) * 100 > n:
    319                     populardict[elt] = 1
    320                     del indices[:]
    321                 else:
    322                     indices.append(i)
    323             else:
    324                 b2j[elt] = [i]
    325 
    326         # Purge leftover indices for popular elements.
    327         for elt in populardict:
    328             del b2j[elt]
    329 
    330         # Now b2j.keys() contains elements uniquely, and especially when
    331         # the sequence is a string, that's usually a good deal smaller
    332         # than len(string).  The difference is the number of isjunk calls
    333         # saved.
    334         isjunk = self.isjunk
    335         junkdict = {}
    336         if isjunk:
    337             for d in populardict, b2j:
    338                 for elt in d.keys():
    339                     if isjunk(elt):
    340                         junkdict[elt] = 1
    341                         del d[elt]
    342 
    343         # Now for x in b, isjunk(x) == x in junkdict, but the
    344         # latter is much faster.  Note too that while there may be a
    345         # lot of junk in the sequence, the number of *unique* junk
    346         # elements is probably small.  So the memory burden of keeping
    347         # this dict alive is likely trivial compared to the size of b2j.
    348         self.isbjunk = junkdict.__contains__
    349         self.isbpopular = populardict.__contains__
     336        if self.autojunk and n >= 200:
     337            ntest = n // 100 + 1
     338            for elt, idxs in list(b2j.items()):
     339                if len(idxs) > ntest:
     340                    popular.add(elt)
     341                    del b2j[elt]
     342
     343        # Now for x in b, isjunk(x) == x in junk, but the latter is much faster.
     344        # Sicne the number of *unique* junk elements is probably small, the
     345        # memory burden of keeping this set alive is likely trivial compared to
     346        # the size of b2j.
     347        self.isbjunk = junk.__contains__
     348        self.isbpopular = popular.__contains__
    350349
    351350    def find_longest_match(self, alo, ahi, blo, bhi):
     
    588587        """ Isolate change clusters by eliminating ranges with no changes.
    589588
    590         Return a generator of groups with upto n lines of context.
     589        Return a generator of groups with up to n lines of context.
    591590        Each group is in the same format as returned by get_opcodes().
    592591
     
    11421141
    11431142
     1143########################################################################
     1144###  Unified Diff
     1145########################################################################
     1146
     1147def _format_range_unified(start, stop):
     1148    'Convert range to the "ed" format'
     1149    # Per the diff spec at http://www.unix.org/single_unix_specification/
     1150    beginning = start + 1     # lines start numbering with one
     1151    length = stop - start
     1152    if length == 1:
     1153        return '{}'.format(beginning)
     1154    if not length:
     1155        beginning -= 1        # empty ranges begin at line just before the range
     1156    return '{},{}'.format(beginning, length)
     1157
    11441158def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
    11451159                 tofiledate='', n=3, lineterm='\n'):
     
    11621176    The unidiff format normally has a header for filenames and modification
    11631177    times.  Any or all of these may be specified using strings for
    1164     'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.  The modification
    1165     times are normally expressed in the format returned by time.ctime().
     1178    'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
     1179    The modification times are normally expressed in the ISO 8601 format.
    11661180
    11671181    Example:
     
    11691183    >>> for line in unified_diff('one two three four'.split(),
    11701184    ...             'zero one tree four'.split(), 'Original', 'Current',
    1171     ...             'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:20:52 2003',
     1185    ...             '2005-01-26 23:30:50', '2010-04-02 10:20:52',
    11721186    ...             lineterm=''):
    1173     ...     print line
    1174     --- Original Sat Jan 26 23:30:50 1991
    1175     +++ Current Fri Jun 06 10:20:52 2003
     1187    ...     print line                  # doctest: +NORMALIZE_WHITESPACE
     1188    --- Original        2005-01-26 23:30:50
     1189    +++ Current         2010-04-02 10:20:52
    11761190    @@ -1,4 +1,4 @@
    11771191    +zero
     
    11861200    for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
    11871201        if not started:
    1188             yield '--- %s %s%s' % (fromfile, fromfiledate, lineterm)
    1189             yield '+++ %s %s%s' % (tofile, tofiledate, lineterm)
    11901202            started = True
    1191         i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
    1192         yield "@@ -%d,%d +%d,%d @@%s" % (i1+1, i2-i1, j1+1, j2-j1, lineterm)
     1203            fromdate = '\t{}'.format(fromfiledate) if fromfiledate else ''
     1204            todate = '\t{}'.format(tofiledate) if tofiledate else ''
     1205            yield '--- {}{}{}'.format(fromfile, fromdate, lineterm)
     1206            yield '+++ {}{}{}'.format(tofile, todate, lineterm)
     1207
     1208        first, last = group[0], group[-1]
     1209        file1_range = _format_range_unified(first[1], last[2])
     1210        file2_range = _format_range_unified(first[3], last[4])
     1211        yield '@@ -{} +{} @@{}'.format(file1_range, file2_range, lineterm)
     1212
    11931213        for tag, i1, i2, j1, j2 in group:
    11941214            if tag == 'equal':
     
    11961216                    yield ' ' + line
    11971217                continue
    1198             if tag == 'replace' or tag == 'delete':
     1218            if tag in ('replace', 'delete'):
    11991219                for line in a[i1:i2]:
    12001220                    yield '-' + line
    1201             if tag == 'replace' or tag == 'insert':
     1221            if tag in ('replace', 'insert'):
    12021222                for line in b[j1:j2]:
    12031223                    yield '+' + line
     1224
     1225
     1226########################################################################
     1227###  Context Diff
     1228########################################################################
     1229
     1230def _format_range_context(start, stop):
     1231    'Convert range to the "ed" format'
     1232    # Per the diff spec at http://www.unix.org/single_unix_specification/
     1233    beginning = start + 1     # lines start numbering with one
     1234    length = stop - start
     1235    if not length:
     1236        beginning -= 1        # empty ranges begin at line just before the range
     1237    if length <= 1:
     1238        return '{}'.format(beginning)
     1239    return '{},{}'.format(beginning, beginning + length - 1)
    12041240
    12051241# See http://www.unix.org/single_unix_specification/
     
    12251261    modification times.  Any or all of these may be specified using
    12261262    strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
    1227     The modification times are normally expressed in the format returned
    1228     by time.ctime().  If not specified, the strings default to blanks.
     1263    The modification times are normally expressed in the ISO 8601 format.
     1264    If not specified, the strings default to blanks.
    12291265
    12301266    Example:
    12311267
    12321268    >>> print ''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(1),
    1233     ...       'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current',
    1234     ...       'Sat Jan 26 23:30:50 1991', 'Fri Jun 06 10:22:46 2003')),
    1235     *** Original Sat Jan 26 23:30:50 1991
    1236     --- Current Fri Jun 06 10:22:46 2003
     1269    ...       'zero\none\ntree\nfour\n'.splitlines(1), 'Original', 'Current')),
     1270    *** Original
     1271    --- Current
    12371272    ***************
    12381273    *** 1,4 ****
     
    12481283    """
    12491284
     1285    prefix = dict(insert='+ ', delete='- ', replace='! ', equal='  ')
    12501286    started = False
    1251     prefixmap = {'insert':'+ ', 'delete':'- ', 'replace':'! ', 'equal':'  '}
    12521287    for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
    12531288        if not started:
    1254             yield '*** %s %s%s' % (fromfile, fromfiledate, lineterm)
    1255             yield '--- %s %s%s' % (tofile, tofiledate, lineterm)
    12561289            started = True
    1257 
    1258         yield '***************%s' % (lineterm,)
    1259         if group[-1][2] - group[0][1] >= 2:
    1260             yield '*** %d,%d ****%s' % (group[0][1]+1, group[-1][2], lineterm)
    1261         else:
    1262             yield '*** %d ****%s' % (group[-1][2], lineterm)
    1263         visiblechanges = [e for e in group if e[0] in ('replace', 'delete')]
    1264         if visiblechanges:
     1290            fromdate = '\t{}'.format(fromfiledate) if fromfiledate else ''
     1291            todate = '\t{}'.format(tofiledate) if tofiledate else ''
     1292            yield '*** {}{}{}'.format(fromfile, fromdate, lineterm)
     1293            yield '--- {}{}{}'.format(tofile, todate, lineterm)
     1294
     1295        first, last = group[0], group[-1]
     1296        yield '***************' + lineterm
     1297
     1298        file1_range = _format_range_context(first[1], last[2])
     1299        yield '*** {} ****{}'.format(file1_range, lineterm)
     1300
     1301        if any(tag in ('replace', 'delete') for tag, _, _, _, _ in group):
    12651302            for tag, i1, i2, _, _ in group:
    12661303                if tag != 'insert':
    12671304                    for line in a[i1:i2]:
    1268                         yield prefixmap[tag] + line
    1269 
    1270         if group[-1][4] - group[0][3] >= 2:
    1271             yield '--- %d,%d ----%s' % (group[0][3]+1, group[-1][4], lineterm)
    1272         else:
    1273             yield '--- %d ----%s' % (group[-1][4], lineterm)
    1274         visiblechanges = [e for e in group if e[0] in ('replace', 'insert')]
    1275         if visiblechanges:
     1305                        yield prefix[tag] + line
     1306
     1307        file2_range = _format_range_context(first[3], last[4])
     1308        yield '--- {} ----{}'.format(file2_range, lineterm)
     1309
     1310        if any(tag in ('replace', 'insert') for tag, _, _, _, _ in group):
    12761311            for tag, _, _, j1, j2 in group:
    12771312                if tag != 'delete':
    12781313                    for line in b[j1:j2]:
    1279                         yield prefixmap[tag] + line
     1314                        yield prefix[tag] + line
    12801315
    12811316def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK):
     
    13271362    charjunk -- passed on to ndiff (see ndiff documentation)
    13281363
    1329     This function returns an interator which returns a tuple:
     1364    This function returns an iterator which returns a tuple:
    13301365    (from line tuple, to line tuple, boolean flag)
    13311366
     
    17131748            # expand tabs into spaces
    17141749            line = line.expandtabs(self._tabsize)
    1715             # relace spaces from expanded tabs back into tab characters
     1750            # replace spaces from expanded tabs back into tab characters
    17161751            # (we'll replace them with markup after we do differencing)
    17171752            line = line.replace(' ','\t')
     
    19291964
    19301965        # change tabs to spaces before it gets more difficult after we insert
    1931         # markkup
     1966        # markup
    19321967        fromlines,tolines = self._tab_newline_replace(fromlines,tolines)
    19331968
Note: See TracChangeset for help on using the changeset viewer.