[2] | 1 | """
|
---|
| 2 | HyperParser
|
---|
| 3 | ===========
|
---|
| 4 | This module defines the HyperParser class, which provides advanced parsing
|
---|
| 5 | abilities for the ParenMatch and other extensions.
|
---|
| 6 | The HyperParser uses PyParser. PyParser is intended mostly to give information
|
---|
| 7 | on the proper indentation of code. HyperParser gives some information on the
|
---|
| 8 | structure of code, used by extensions to help the user.
|
---|
| 9 | """
|
---|
| 10 |
|
---|
| 11 | import string
|
---|
| 12 | import keyword
|
---|
[391] | 13 | from idlelib import PyParse
|
---|
[2] | 14 |
|
---|
| 15 | class HyperParser:
|
---|
| 16 |
|
---|
| 17 | def __init__(self, editwin, index):
|
---|
| 18 | """Initialize the HyperParser to analyze the surroundings of the given
|
---|
| 19 | index.
|
---|
| 20 | """
|
---|
| 21 |
|
---|
| 22 | self.editwin = editwin
|
---|
| 23 | self.text = text = editwin.text
|
---|
| 24 |
|
---|
| 25 | parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth)
|
---|
| 26 |
|
---|
| 27 | def index2line(index):
|
---|
| 28 | return int(float(index))
|
---|
| 29 | lno = index2line(text.index(index))
|
---|
| 30 |
|
---|
| 31 | if not editwin.context_use_ps1:
|
---|
| 32 | for context in editwin.num_context_lines:
|
---|
| 33 | startat = max(lno - context, 1)
|
---|
[391] | 34 | startatindex = repr(startat) + ".0"
|
---|
[2] | 35 | stopatindex = "%d.end" % lno
|
---|
| 36 | # We add the newline because PyParse requires a newline at end.
|
---|
| 37 | # We add a space so that index won't be at end of line, so that
|
---|
| 38 | # its status will be the same as the char before it, if should.
|
---|
| 39 | parser.set_str(text.get(startatindex, stopatindex)+' \n')
|
---|
| 40 | bod = parser.find_good_parse_start(
|
---|
| 41 | editwin._build_char_in_string_func(startatindex))
|
---|
| 42 | if bod is not None or startat == 1:
|
---|
| 43 | break
|
---|
| 44 | parser.set_lo(bod or 0)
|
---|
| 45 | else:
|
---|
| 46 | r = text.tag_prevrange("console", index)
|
---|
| 47 | if r:
|
---|
| 48 | startatindex = r[1]
|
---|
| 49 | else:
|
---|
| 50 | startatindex = "1.0"
|
---|
| 51 | stopatindex = "%d.end" % lno
|
---|
| 52 | # We add the newline because PyParse requires a newline at end.
|
---|
| 53 | # We add a space so that index won't be at end of line, so that
|
---|
| 54 | # its status will be the same as the char before it, if should.
|
---|
| 55 | parser.set_str(text.get(startatindex, stopatindex)+' \n')
|
---|
| 56 | parser.set_lo(0)
|
---|
| 57 |
|
---|
| 58 | # We want what the parser has, except for the last newline and space.
|
---|
| 59 | self.rawtext = parser.str[:-2]
|
---|
| 60 | # As far as I can see, parser.str preserves the statement we are in,
|
---|
| 61 | # so that stopatindex can be used to synchronize the string with the
|
---|
| 62 | # text box indices.
|
---|
| 63 | self.stopatindex = stopatindex
|
---|
| 64 | self.bracketing = parser.get_last_stmt_bracketing()
|
---|
| 65 | # find which pairs of bracketing are openers. These always correspond
|
---|
| 66 | # to a character of rawtext.
|
---|
| 67 | self.isopener = [i>0 and self.bracketing[i][1] > self.bracketing[i-1][1]
|
---|
| 68 | for i in range(len(self.bracketing))]
|
---|
| 69 |
|
---|
| 70 | self.set_index(index)
|
---|
| 71 |
|
---|
| 72 | def set_index(self, index):
|
---|
| 73 | """Set the index to which the functions relate. Note that it must be
|
---|
| 74 | in the same statement.
|
---|
| 75 | """
|
---|
| 76 | indexinrawtext = \
|
---|
| 77 | len(self.rawtext) - len(self.text.get(index, self.stopatindex))
|
---|
| 78 | if indexinrawtext < 0:
|
---|
| 79 | raise ValueError("The index given is before the analyzed statement")
|
---|
| 80 | self.indexinrawtext = indexinrawtext
|
---|
| 81 | # find the rightmost bracket to which index belongs
|
---|
| 82 | self.indexbracket = 0
|
---|
| 83 | while self.indexbracket < len(self.bracketing)-1 and \
|
---|
| 84 | self.bracketing[self.indexbracket+1][0] < self.indexinrawtext:
|
---|
| 85 | self.indexbracket += 1
|
---|
| 86 | if self.indexbracket < len(self.bracketing)-1 and \
|
---|
| 87 | self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and \
|
---|
| 88 | not self.isopener[self.indexbracket+1]:
|
---|
| 89 | self.indexbracket += 1
|
---|
| 90 |
|
---|
| 91 | def is_in_string(self):
|
---|
| 92 | """Is the index given to the HyperParser is in a string?"""
|
---|
| 93 | # The bracket to which we belong should be an opener.
|
---|
| 94 | # If it's an opener, it has to have a character.
|
---|
| 95 | return self.isopener[self.indexbracket] and \
|
---|
| 96 | self.rawtext[self.bracketing[self.indexbracket][0]] in ('"', "'")
|
---|
| 97 |
|
---|
| 98 | def is_in_code(self):
|
---|
| 99 | """Is the index given to the HyperParser is in a normal code?"""
|
---|
| 100 | return not self.isopener[self.indexbracket] or \
|
---|
| 101 | self.rawtext[self.bracketing[self.indexbracket][0]] not in \
|
---|
| 102 | ('#', '"', "'")
|
---|
| 103 |
|
---|
| 104 | def get_surrounding_brackets(self, openers='([{', mustclose=False):
|
---|
| 105 | """If the index given to the HyperParser is surrounded by a bracket
|
---|
| 106 | defined in openers (or at least has one before it), return the
|
---|
| 107 | indices of the opening bracket and the closing bracket (or the
|
---|
| 108 | end of line, whichever comes first).
|
---|
| 109 | If it is not surrounded by brackets, or the end of line comes before
|
---|
| 110 | the closing bracket and mustclose is True, returns None.
|
---|
| 111 | """
|
---|
| 112 | bracketinglevel = self.bracketing[self.indexbracket][1]
|
---|
| 113 | before = self.indexbracket
|
---|
| 114 | while not self.isopener[before] or \
|
---|
| 115 | self.rawtext[self.bracketing[before][0]] not in openers or \
|
---|
| 116 | self.bracketing[before][1] > bracketinglevel:
|
---|
| 117 | before -= 1
|
---|
| 118 | if before < 0:
|
---|
| 119 | return None
|
---|
| 120 | bracketinglevel = min(bracketinglevel, self.bracketing[before][1])
|
---|
| 121 | after = self.indexbracket + 1
|
---|
| 122 | while after < len(self.bracketing) and \
|
---|
| 123 | self.bracketing[after][1] >= bracketinglevel:
|
---|
| 124 | after += 1
|
---|
| 125 |
|
---|
| 126 | beforeindex = self.text.index("%s-%dc" %
|
---|
| 127 | (self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))
|
---|
| 128 | if after >= len(self.bracketing) or \
|
---|
| 129 | self.bracketing[after][0] > len(self.rawtext):
|
---|
| 130 | if mustclose:
|
---|
| 131 | return None
|
---|
| 132 | afterindex = self.stopatindex
|
---|
| 133 | else:
|
---|
| 134 | # We are after a real char, so it is a ')' and we give the index
|
---|
| 135 | # before it.
|
---|
| 136 | afterindex = self.text.index("%s-%dc" %
|
---|
| 137 | (self.stopatindex,
|
---|
| 138 | len(self.rawtext)-(self.bracketing[after][0]-1)))
|
---|
| 139 |
|
---|
| 140 | return beforeindex, afterindex
|
---|
| 141 |
|
---|
| 142 | # This string includes all chars that may be in a white space
|
---|
| 143 | _whitespace_chars = " \t\n\\"
|
---|
| 144 | # This string includes all chars that may be in an identifier
|
---|
| 145 | _id_chars = string.ascii_letters + string.digits + "_"
|
---|
| 146 | # This string includes all chars that may be the first char of an identifier
|
---|
| 147 | _id_first_chars = string.ascii_letters + "_"
|
---|
| 148 |
|
---|
| 149 | # Given a string and pos, return the number of chars in the identifier
|
---|
| 150 | # which ends at pos, or 0 if there is no such one. Saved words are not
|
---|
| 151 | # identifiers.
|
---|
| 152 | def _eat_identifier(self, str, limit, pos):
|
---|
| 153 | i = pos
|
---|
| 154 | while i > limit and str[i-1] in self._id_chars:
|
---|
| 155 | i -= 1
|
---|
| 156 | if i < pos and (str[i] not in self._id_first_chars or \
|
---|
| 157 | keyword.iskeyword(str[i:pos])):
|
---|
| 158 | i = pos
|
---|
| 159 | return pos - i
|
---|
| 160 |
|
---|
| 161 | def get_expression(self):
|
---|
| 162 | """Return a string with the Python expression which ends at the given
|
---|
| 163 | index, which is empty if there is no real one.
|
---|
| 164 | """
|
---|
| 165 | if not self.is_in_code():
|
---|
| 166 | raise ValueError("get_expression should only be called if index "\
|
---|
| 167 | "is inside a code.")
|
---|
| 168 |
|
---|
| 169 | rawtext = self.rawtext
|
---|
| 170 | bracketing = self.bracketing
|
---|
| 171 |
|
---|
| 172 | brck_index = self.indexbracket
|
---|
| 173 | brck_limit = bracketing[brck_index][0]
|
---|
| 174 | pos = self.indexinrawtext
|
---|
| 175 |
|
---|
| 176 | last_identifier_pos = pos
|
---|
| 177 | postdot_phase = True
|
---|
| 178 |
|
---|
| 179 | while 1:
|
---|
| 180 | # Eat whitespaces, comments, and if postdot_phase is False - one dot
|
---|
| 181 | while 1:
|
---|
| 182 | if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:
|
---|
| 183 | # Eat a whitespace
|
---|
| 184 | pos -= 1
|
---|
| 185 | elif not postdot_phase and \
|
---|
| 186 | pos > brck_limit and rawtext[pos-1] == '.':
|
---|
| 187 | # Eat a dot
|
---|
| 188 | pos -= 1
|
---|
| 189 | postdot_phase = True
|
---|
| 190 | # The next line will fail if we are *inside* a comment, but we
|
---|
| 191 | # shouldn't be.
|
---|
| 192 | elif pos == brck_limit and brck_index > 0 and \
|
---|
| 193 | rawtext[bracketing[brck_index-1][0]] == '#':
|
---|
| 194 | # Eat a comment
|
---|
| 195 | brck_index -= 2
|
---|
| 196 | brck_limit = bracketing[brck_index][0]
|
---|
| 197 | pos = bracketing[brck_index+1][0]
|
---|
| 198 | else:
|
---|
| 199 | # If we didn't eat anything, quit.
|
---|
| 200 | break
|
---|
| 201 |
|
---|
| 202 | if not postdot_phase:
|
---|
| 203 | # We didn't find a dot, so the expression end at the last
|
---|
| 204 | # identifier pos.
|
---|
| 205 | break
|
---|
| 206 |
|
---|
| 207 | ret = self._eat_identifier(rawtext, brck_limit, pos)
|
---|
| 208 | if ret:
|
---|
| 209 | # There is an identifier to eat
|
---|
| 210 | pos = pos - ret
|
---|
| 211 | last_identifier_pos = pos
|
---|
| 212 | # Now, in order to continue the search, we must find a dot.
|
---|
| 213 | postdot_phase = False
|
---|
| 214 | # (the loop continues now)
|
---|
| 215 |
|
---|
| 216 | elif pos == brck_limit:
|
---|
| 217 | # We are at a bracketing limit. If it is a closing bracket,
|
---|
| 218 | # eat the bracket, otherwise, stop the search.
|
---|
| 219 | level = bracketing[brck_index][1]
|
---|
| 220 | while brck_index > 0 and bracketing[brck_index-1][1] > level:
|
---|
| 221 | brck_index -= 1
|
---|
| 222 | if bracketing[brck_index][0] == brck_limit:
|
---|
| 223 | # We were not at the end of a closing bracket
|
---|
| 224 | break
|
---|
| 225 | pos = bracketing[brck_index][0]
|
---|
| 226 | brck_index -= 1
|
---|
| 227 | brck_limit = bracketing[brck_index][0]
|
---|
| 228 | last_identifier_pos = pos
|
---|
| 229 | if rawtext[pos] in "([":
|
---|
| 230 | # [] and () may be used after an identifier, so we
|
---|
| 231 | # continue. postdot_phase is True, so we don't allow a dot.
|
---|
| 232 | pass
|
---|
| 233 | else:
|
---|
| 234 | # We can't continue after other types of brackets
|
---|
[391] | 235 | if rawtext[pos] in "'\"":
|
---|
| 236 | # Scan a string prefix
|
---|
| 237 | while pos > 0 and rawtext[pos - 1] in "rRbBuU":
|
---|
| 238 | pos -= 1
|
---|
| 239 | last_identifier_pos = pos
|
---|
[2] | 240 | break
|
---|
| 241 |
|
---|
| 242 | else:
|
---|
| 243 | # We've found an operator or something.
|
---|
| 244 | break
|
---|
| 245 |
|
---|
| 246 | return rawtext[last_identifier_pos:self.indexinrawtext]
|
---|