Changeset 391 for python/trunk/Parser
- Timestamp:
- Mar 19, 2014, 11:31:01 PM (12 years ago)
- Location:
- python/trunk
- Files:
-
- 1 deleted
- 21 edited
-
. (modified) (1 prop)
-
Parser/Python.asdl (modified) (4 diffs)
-
Parser/acceler.c (modified) (4 diffs)
-
Parser/asdl.py (modified) (11 diffs)
-
Parser/asdl_c.py (modified) (16 diffs)
-
Parser/bitset.c (modified) (6 diffs)
-
Parser/firstsets.c (modified) (2 diffs)
-
Parser/grammar.c (modified) (8 diffs)
-
Parser/grammar.mak (deleted)
-
Parser/grammar1.c (modified) (2 diffs)
-
Parser/intrcheck.c (modified) (11 diffs)
-
Parser/listnode.c (modified) (3 diffs)
-
Parser/metagrammar.c (modified) (2 diffs)
-
Parser/myreadline.c (modified) (3 diffs)
-
Parser/node.c (modified) (6 diffs)
-
Parser/parser.c (modified) (14 diffs)
-
Parser/parsetok.c (modified) (5 diffs)
-
Parser/pgen.c (modified) (23 diffs)
-
Parser/pgenmain.c (modified) (8 diffs)
-
Parser/printgrammar.c (modified) (6 diffs)
-
Parser/tokenizer.c (modified) (35 diffs)
-
Parser/tokenizer.h (modified) (1 diff)
Legend:
- Unmodified
- Added
- Removed
-
python/trunk
-
Property svn:mergeinfo
set to
/python/vendor/Python-2.7.6 merged eligible /python/vendor/current merged eligible
-
Property svn:mergeinfo
set to
-
python/trunk/Parser/Python.asdl
r380 r391 1 1 -- ASDL's five builtin types are identifier, int, string, object, bool 2 2 3 module Python version "$Revision : 62047$"3 module Python version "$Revision$" 4 4 { 5 5 mod = Module(stmt* body) … … 12 12 stmt = FunctionDef(identifier name, arguments args, 13 13 stmt* body, expr* decorator_list) 14 | ClassDef(identifier name, expr* bases, stmt* body, expr *decorator_list)14 | ClassDef(identifier name, expr* bases, stmt* body, expr* decorator_list) 15 15 | Return(expr? value) 16 16 … … 35 35 36 36 | Import(alias* names) 37 | ImportFrom(identifier module, alias* names, int? level)37 | ImportFrom(identifier? module, alias* names, int? level) 38 38 39 39 -- Doesn't capture requirement that locals must be … … 57 57 | IfExp(expr test, expr body, expr orelse) 58 58 | Dict(expr* keys, expr* values) 59 | Set(expr* elts) 59 60 | ListComp(expr elt, comprehension* generators) 61 | SetComp(expr elt, comprehension* generators) 62 | DictComp(expr key, expr value, comprehension* generators) 60 63 | GeneratorExp(expr elt, comprehension* generators) 61 64 -- the grammar constrains where yield expressions can occur -
python/trunk/Parser/acceler.c
r2 r391 24 24 PyGrammar_AddAccelerators(grammar *g) 25 25 { 26 dfa *d;27 int i;28 d = g->g_dfa;29 for (i = g->g_ndfas; --i >= 0; d++)30 fixdfa(g, d);31 g->g_accel = 1;26 dfa *d; 27 int i; 28 d = g->g_dfa; 29 for (i = g->g_ndfas; --i >= 0; d++) 30 fixdfa(g, d); 31 g->g_accel = 1; 32 32 } 33 33 … … 35 35 PyGrammar_RemoveAccelerators(grammar *g) 36 36 { 37 dfa *d;38 int i;39 g->g_accel = 0;40 d = g->g_dfa;41 for (i = g->g_ndfas; --i >= 0; d++) {42 state *s;43 int j;44 s = d->d_state;45 for (j = 0; j < d->d_nstates; j++, s++) {46 if (s->s_accel)47 PyObject_FREE(s->s_accel);48 s->s_accel = NULL;49 }50 }37 dfa *d; 38 int i; 39 g->g_accel = 0; 40 d = g->g_dfa; 41 for (i = g->g_ndfas; --i >= 0; d++) { 42 state *s; 43 int j; 44 s = d->d_state; 45 for (j = 0; j < d->d_nstates; j++, s++) { 46 if (s->s_accel) 47 PyObject_FREE(s->s_accel); 48 s->s_accel = NULL; 49 } 50 } 51 51 } 52 52 … … 54 54 fixdfa(grammar *g, dfa *d) 55 55 { 56 state *s;57 int j;58 s = d->d_state;59 for (j = 0; j < d->d_nstates; j++, s++)60 fixstate(g, s);56 state *s; 57 int j; 58 s = d->d_state; 59 for (j = 0; j < d->d_nstates; j++, s++) 60 fixstate(g, s); 61 61 } 62 62 … … 64 64 fixstate(grammar *g, state *s) 65 65 { 66 arc *a;67 int k;68 int *accel;69 int nl = g->g_ll.ll_nlabels;70 s->s_accept = 0;71 accel = (int *) PyObject_MALLOC(nl * sizeof(int));72 if (accel == NULL) {73 fprintf(stderr, "no mem to build parser accelerators\n");74 exit(1);75 }76 for (k = 0; k < nl; k++)77 accel[k] = -1;78 a = s->s_arc;79 for (k = s->s_narcs; --k >= 0; a++) {80 int lbl = a->a_lbl;81 label *l = &g->g_ll.ll_label[lbl];82 int type = l->lb_type;83 if (a->a_arrow >= (1 << 7)) {84 printf("XXX too many states!\n");85 continue;86 }87 if (ISNONTERMINAL(type)) {88 dfa *d1 = PyGrammar_FindDFA(g, type);89 int ibit;90 if (type - NT_OFFSET >= (1 << 7)) {91 printf("XXX too high nonterminal number!\n");92 continue;93 }94 for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) {95 if (testbit(d1->d_first, ibit)) {96 if (accel[ibit] != -1)97 printf("XXX ambiguity!\n");98 accel[ibit] = a->a_arrow | (1 << 7) |99 ((type - NT_OFFSET) << 8);100 }101 }102 }103 else if (lbl == EMPTY)104 s->s_accept = 1;105 else if (lbl >= 0 && lbl < nl)106 accel[lbl] = a->a_arrow;107 }108 while (nl > 0 && accel[nl-1] == -1)109 nl--;110 for (k = 0; k < nl && accel[k] == -1;)111 k++;112 if (k < nl) {113 int i;114 s->s_accel = (int *) PyObject_MALLOC((nl-k) * sizeof(int));115 if (s->s_accel == NULL) {116 fprintf(stderr, "no mem to add parser accelerators\n");117 exit(1);118 }119 s->s_lower = k;120 s->s_upper = nl;121 for (i = 0; k < nl; i++, k++)122 s->s_accel[i] = accel[k];123 }124 PyObject_FREE(accel);66 arc *a; 67 int k; 68 int *accel; 69 int nl = g->g_ll.ll_nlabels; 70 s->s_accept = 0; 71 accel = (int *) PyObject_MALLOC(nl * sizeof(int)); 72 if (accel == NULL) { 73 fprintf(stderr, "no mem to build parser accelerators\n"); 74 exit(1); 75 } 76 for (k = 0; k < nl; k++) 77 accel[k] = -1; 78 a = s->s_arc; 79 for (k = s->s_narcs; --k >= 0; a++) { 80 int lbl = a->a_lbl; 81 label *l = &g->g_ll.ll_label[lbl]; 82 int type = l->lb_type; 83 if (a->a_arrow >= (1 << 7)) { 84 printf("XXX too many states!\n"); 85 continue; 86 } 87 if (ISNONTERMINAL(type)) { 88 dfa *d1 = PyGrammar_FindDFA(g, type); 89 int ibit; 90 if (type - NT_OFFSET >= (1 << 7)) { 91 printf("XXX too high nonterminal number!\n"); 92 continue; 93 } 94 for (ibit = 0; ibit < g->g_ll.ll_nlabels; ibit++) { 95 if (testbit(d1->d_first, ibit)) { 96 if (accel[ibit] != -1) 97 printf("XXX ambiguity!\n"); 98 accel[ibit] = a->a_arrow | (1 << 7) | 99 ((type - NT_OFFSET) << 8); 100 } 101 } 102 } 103 else if (lbl == EMPTY) 104 s->s_accept = 1; 105 else if (lbl >= 0 && lbl < nl) 106 accel[lbl] = a->a_arrow; 107 } 108 while (nl > 0 && accel[nl-1] == -1) 109 nl--; 110 for (k = 0; k < nl && accel[k] == -1;) 111 k++; 112 if (k < nl) { 113 int i; 114 s->s_accel = (int *) PyObject_MALLOC((nl-k) * sizeof(int)); 115 if (s->s_accel == NULL) { 116 fprintf(stderr, "no mem to add parser accelerators\n"); 117 exit(1); 118 } 119 s->s_lower = k; 120 s->s_upper = nl; 121 for (i = 0; k < nl; i++, k++) 122 s->s_accel[i] = accel[k]; 123 } 124 PyObject_FREE(accel); 125 125 } -
python/trunk/Parser/asdl.py
r2 r391 2 2 3 3 See http://asdl.sourceforge.net/ and 4 http://www.cs.princeton.edu/ ~danwang/Papers/dsl97/dsl97-abstract.html.4 http://www.cs.princeton.edu/research/techreps/TR-554-97 5 5 6 6 Only supports top level module decl, not view. I'm guessing that view … … 11 11 """ 12 12 13 #__metaclass__ = type14 15 13 import os 16 14 import traceback … … 18 16 import spark 19 17 20 class Token :18 class Token(object): 21 19 # spark seems to dispatch in the parser based on a token's 22 20 # type attribute … … 46 44 self.lineno = lineno 47 45 48 class ASDLSyntaxError :46 class ASDLSyntaxError(Exception): 49 47 50 48 def __init__(self, lineno, token=None, msg=None): … … 207 205 def p_field_2(self, (type, _, name)): 208 206 " field ::= Id * Id " 209 return Field(type, name, seq= 1)207 return Field(type, name, seq=True) 210 208 211 209 def p_field_3(self, (type, _, name)): 212 210 " field ::= Id ? Id " 213 return Field(type, name, opt= 1)211 return Field(type, name, opt=True) 214 212 215 213 def p_field_4(self, (type, _)): 216 214 " field ::= Id * " 217 return Field(type, seq= 1)215 return Field(type, seq=True) 218 216 219 217 def p_field_5(self, (type, _)): 220 218 " field ::= Id ? " 221 return Field(type, opt= 1)219 return Field(type, opt=True) 222 220 223 221 builtin_types = ("identifier", "string", "int", "bool", "object") … … 227 225 # piecemeal as they seem helpful 228 226 229 class AST :227 class AST(object): 230 228 pass # a marker class 231 229 … … 259 257 260 258 class Field(AST): 261 def __init__(self, type, name=None, seq= 0, opt=0):259 def __init__(self, type, name=None, seq=False, opt=False): 262 260 self.type = type 263 261 self.name = name … … 267 265 def __repr__(self): 268 266 if self.seq: 269 extra = ", seq= 1"267 extra = ", seq=True" 270 268 elif self.opt: 271 extra = ", opt= 1"269 extra = ", opt=True" 272 270 else: 273 271 extra = "" … … 297 295 class VisitorBase(object): 298 296 299 def __init__(self, skip= 0):297 def __init__(self, skip=False): 300 298 self.cache = {} 301 299 self.skip = skip … … 332 330 333 331 def __init__(self): 334 super(Check, self).__init__(skip= 1)332 super(Check, self).__init__(skip=True) 335 333 self.cons = {} 336 334 self.errors = 0 … … 374 372 375 373 for t in v.types: 376 if not mod.types.has_key(t)and not t in builtin_types:374 if t not in mod.types and not t in builtin_types: 377 375 v.errors += 1 378 376 uses = ", ".join(v.types[t]) -
python/trunk/Parser/asdl_c.py
r2 r391 87 87 super(EmitVisitor, self).__init__() 88 88 89 def emit(self, s, depth, reflow= 1):89 def emit(self, s, depth, reflow=True): 90 90 # XXX reflow long lines? 91 91 if reflow: … … 256 256 self.emit_function(cons.name, ctype, args, attrs) 257 257 258 def emit_function(self, name, ctype, args, attrs, union= 1):258 def emit_function(self, name, ctype, args, attrs, union=True): 259 259 args = args + attrs 260 260 if args: … … 268 268 margs += ", a%d" % i 269 269 self.emit("#define %s(%s) _Py_%s(%s)" % (name, margs, name, margs), 0, 270 reflow = 0)271 self.emit("%s _Py_%s(%s);" % (ctype, name, argstr), 0)270 reflow=False) 271 self.emit("%s _Py_%s(%s);" % (ctype, name, argstr), False) 272 272 273 273 def visitProduct(self, prod, name): 274 274 self.emit_function(name, get_c_type(name), 275 self.get_args(prod.fields), [], union= 0)275 self.get_args(prod.fields), [], union=False) 276 276 277 277 … … 279 279 """Visitor to generate constructor functions for AST.""" 280 280 281 def emit_function(self, name, ctype, args, attrs, union= 1):282 def emit(s, depth=0, reflow= 1):281 def emit_function(self, name, ctype, args, attrs, union=True): 282 def emit(s, depth=0, reflow=True): 283 283 self.emit(s, depth, reflow) 284 284 argstr = ", ".join(["%s %s" % (atype, aname) … … 299 299 msg = "field %s is required for %s" % (argname, name) 300 300 emit(' "%s");' % msg, 301 2, reflow= 0)301 2, reflow=False) 302 302 emit('return NULL;', 2) 303 303 emit('}', 1) … … 315 315 316 316 def emit_body_union(self, name, args, attrs): 317 def emit(s, depth=0, reflow= 1):317 def emit(s, depth=0, reflow=True): 318 318 self.emit(s, depth, reflow) 319 319 emit("p->kind = %s_kind;" % name, 1) … … 324 324 325 325 def emit_body_struct(self, name, args, attrs): 326 def emit(s, depth=0, reflow= 1):326 def emit(s, depth=0, reflow=True): 327 327 self.emit(s, depth, reflow) 328 328 for argtype, argname, opt in args: … … 734 734 int i, result; 735 735 PyObject *s, *l = PyTuple_New(num_fields); 736 if (!l) return 0; 737 for(i = 0; i < num_fields; i++) { 736 if (!l) 737 return 0; 738 for (i = 0; i < num_fields; i++) { 738 739 s = PyString_FromString(attrs[i]); 739 740 if (!s) { … … 800 801 } 801 802 802 #define obj2ast_identifier obj2ast_object 803 #define obj2ast_string obj2ast_object 803 static int obj2ast_identifier(PyObject* obj, PyObject** out, PyArena* arena) 804 { 805 if (!PyString_CheckExact(obj) && obj != Py_None) { 806 PyErr_Format(PyExc_TypeError, 807 "AST identifier must be of type str"); 808 return 1; 809 } 810 return obj2ast_object(obj, out, arena); 811 } 812 813 static int obj2ast_string(PyObject* obj, PyObject** out, PyArena* arena) 814 { 815 if (!PyString_CheckExact(obj) && !PyUnicode_CheckExact(obj)) { 816 PyErr_SetString(PyExc_TypeError, 817 "AST string must be of type str or unicode"); 818 return 1; 819 } 820 return obj2ast_object(obj, out, arena); 821 } 804 822 805 823 static int obj2ast_int(PyObject* obj, int* out, PyArena* arena) … … 902 920 self.emit("if (!%s_singleton) return 0;" % cons.name, 1) 903 921 904 905 def parse_version(mod):906 return mod.version.value[12:-3]907 922 908 923 class ASTModuleVisitor(PickleVisitor): … … 920 935 self.emit('if (PyModule_AddIntConstant(m, "PyCF_ONLY_AST", PyCF_ONLY_AST) < 0)', 1) 921 936 self.emit("return;", 2) 922 # Value of version: "$Revision : 76775$"937 # Value of version: "$Revision$" 923 938 self.emit('if (PyModule_AddStringConstant(m, "__version__", "%s") < 0)' 924 % parse_version(mod), 1)939 % mod.version, 1) 925 940 self.emit("return;", 2) 926 941 for dfn in mod.dfns: … … 963 978 964 979 class StaticVisitor(PickleVisitor): 965 CODE = '''Very simple, always emit this static code. Over ide CODE'''980 CODE = '''Very simple, always emit this static code. Override CODE''' 966 981 967 982 def visit(self, object): … … 1019 1034 self.emit("Py_INCREF(%s_singleton);" % t.name, 3) 1020 1035 self.emit("return %s_singleton;" % t.name, 3) 1021 self.emit("default:" % name, 2)1036 self.emit("default:", 2) 1022 1037 self.emit('/* should never happen, but just in case ... */', 3) 1023 1038 code = "PyErr_Format(PyExc_SystemError, \"unknown %s found\");" % name … … 1103 1118 { 1104 1119 mod_ty res; 1105 PyObject *req_type[] = {(PyObject*)Module_type, (PyObject*)Expression_type, 1106 (PyObject*)Interactive_type}; 1107 char *req_name[] = {"Module", "Expression", "Interactive"}; 1120 PyObject *req_type[3]; 1121 char *req_name[3]; 1108 1122 int isinstance; 1123 1124 req_type[0] = (PyObject*)Module_type; 1125 req_type[1] = (PyObject*)Expression_type; 1126 req_type[2] = (PyObject*)Interactive_type; 1127 1128 req_name[0] = "Module"; 1129 req_name[1] = "Expression"; 1130 req_name[2] = "Interactive"; 1131 1109 1132 assert(0 <= mode && mode <= 2); 1110 1133 … … 1160 1183 auto_gen_msg = common_msg % argv0 1161 1184 mod = asdl.parse(srcfile) 1185 mod.version = "82160" 1162 1186 if not asdl.check(mod): 1163 1187 sys.exit(1) … … 1181 1205 f = open(p, "wb") 1182 1206 f.write(auto_gen_msg) 1183 f.write(c_file_msg % parse_version(mod))1207 f.write(c_file_msg % mod.version) 1184 1208 f.write('#include "Python.h"\n') 1185 1209 f.write('#include "%s-ast.h"\n' % mod.name) -
python/trunk/Parser/bitset.c
r2 r391 8 8 newbitset(int nbits) 9 9 { 10 int nbytes = NBYTES(nbits);11 bitset ss = (char *)PyObject_MALLOC(sizeof(BYTE) * nbytes);12 13 if (ss == NULL)14 Py_FatalError("no mem for bitset");15 16 ss += nbytes;17 while (--nbytes >= 0)18 *--ss = 0;19 return ss;10 int nbytes = NBYTES(nbits); 11 bitset ss = (char *)PyObject_MALLOC(sizeof(BYTE) * nbytes); 12 13 if (ss == NULL) 14 Py_FatalError("no mem for bitset"); 15 16 ss += nbytes; 17 while (--nbytes >= 0) 18 *--ss = 0; 19 return ss; 20 20 } 21 21 … … 23 23 delbitset(bitset ss) 24 24 { 25 PyObject_FREE(ss);25 PyObject_FREE(ss); 26 26 } 27 27 … … 29 29 addbit(bitset ss, int ibit) 30 30 { 31 int ibyte = BIT2BYTE(ibit);32 BYTE mask = BIT2MASK(ibit);33 34 if (ss[ibyte] & mask)35 return 0; /* Bit already set */36 ss[ibyte] |= mask;37 return 1;31 int ibyte = BIT2BYTE(ibit); 32 BYTE mask = BIT2MASK(ibit); 33 34 if (ss[ibyte] & mask) 35 return 0; /* Bit already set */ 36 ss[ibyte] |= mask; 37 return 1; 38 38 } 39 39 … … 42 42 testbit(bitset ss, int ibit) 43 43 { 44 return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0;44 return (ss[BIT2BYTE(ibit)] & BIT2MASK(ibit)) != 0; 45 45 } 46 46 #endif … … 49 49 samebitset(bitset ss1, bitset ss2, int nbits) 50 50 { 51 int i;52 53 for (i = NBYTES(nbits); --i >= 0; )54 if (*ss1++ != *ss2++)55 return 0;56 return 1;51 int i; 52 53 for (i = NBYTES(nbits); --i >= 0; ) 54 if (*ss1++ != *ss2++) 55 return 0; 56 return 1; 57 57 } 58 58 … … 60 60 mergebitset(bitset ss1, bitset ss2, int nbits) 61 61 { 62 int i;63 64 for (i = NBYTES(nbits); --i >= 0; )65 *ss1++ |= *ss2++;62 int i; 63 64 for (i = NBYTES(nbits); --i >= 0; ) 65 *ss1++ |= *ss2++; 66 66 } -
python/trunk/Parser/firstsets.c
r2 r391 14 14 addfirstsets(grammar *g) 15 15 { 16 int i;17 dfa *d;16 int i; 17 dfa *d; 18 18 19 if (Py_DebugFlag)20 printf("Adding FIRST sets ...\n");21 for (i = 0; i < g->g_ndfas; i++) {22 d = &g->g_dfa[i];23 if (d->d_first == NULL)24 calcfirstset(g, d);25 }19 if (Py_DebugFlag) 20 printf("Adding FIRST sets ...\n"); 21 for (i = 0; i < g->g_ndfas; i++) { 22 d = &g->g_dfa[i]; 23 if (d->d_first == NULL) 24 calcfirstset(g, d); 25 } 26 26 } 27 27 … … 29 29 calcfirstset(grammar *g, dfa *d) 30 30 { 31 int i, j; 32 state *s; 33 arc *a; 34 int nsyms; 35 int *sym; 36 int nbits; 37 static bitset dummy; 38 bitset result; 39 int type; 40 dfa *d1; 41 label *l0; 42 43 if (Py_DebugFlag) 44 printf("Calculate FIRST set for '%s'\n", d->d_name); 45 46 if (dummy == NULL) 47 dummy = newbitset(1); 48 if (d->d_first == dummy) { 49 fprintf(stderr, "Left-recursion for '%s'\n", d->d_name); 50 return; 51 } 52 if (d->d_first != NULL) { 53 fprintf(stderr, "Re-calculating FIRST set for '%s' ???\n", 54 d->d_name); 55 } 56 d->d_first = dummy; 57 58 l0 = g->g_ll.ll_label; 59 nbits = g->g_ll.ll_nlabels; 60 result = newbitset(nbits); 61 62 sym = (int *)PyObject_MALLOC(sizeof(int)); 63 if (sym == NULL) 64 Py_FatalError("no mem for new sym in calcfirstset"); 65 nsyms = 1; 66 sym[0] = findlabel(&g->g_ll, d->d_type, (char *)NULL); 67 68 s = &d->d_state[d->d_initial]; 69 for (i = 0; i < s->s_narcs; i++) { 70 a = &s->s_arc[i]; 71 for (j = 0; j < nsyms; j++) { 72 if (sym[j] == a->a_lbl) 73 break; 74 } 75 if (j >= nsyms) { /* New label */ 76 sym = (int *)PyObject_REALLOC(sym, 77 sizeof(int) * (nsyms + 1)); 78 if (sym == NULL) 79 Py_FatalError( 80 "no mem to resize sym in calcfirstset"); 81 sym[nsyms++] = a->a_lbl; 82 type = l0[a->a_lbl].lb_type; 83 if (ISNONTERMINAL(type)) { 84 d1 = PyGrammar_FindDFA(g, type); 85 if (d1->d_first == dummy) { 86 fprintf(stderr, 87 "Left-recursion below '%s'\n", 88 d->d_name); 89 } 90 else { 91 if (d1->d_first == NULL) 92 calcfirstset(g, d1); 93 mergebitset(result, 94 d1->d_first, nbits); 95 } 96 } 97 else if (ISTERMINAL(type)) { 98 addbit(result, a->a_lbl); 99 } 100 } 101 } 102 d->d_first = result; 103 if (Py_DebugFlag) { 104 printf("FIRST set for '%s': {", d->d_name); 105 for (i = 0; i < nbits; i++) { 106 if (testbit(result, i)) 107 printf(" %s", PyGrammar_LabelRepr(&l0[i])); 108 } 109 printf(" }\n"); 110 } 31 int i, j; 32 state *s; 33 arc *a; 34 int nsyms; 35 int *sym; 36 int nbits; 37 static bitset dummy; 38 bitset result; 39 int type; 40 dfa *d1; 41 label *l0; 111 42 112 PyObject_FREE(sym); 43 if (Py_DebugFlag) 44 printf("Calculate FIRST set for '%s'\n", d->d_name); 45 46 if (dummy == NULL) 47 dummy = newbitset(1); 48 if (d->d_first == dummy) { 49 fprintf(stderr, "Left-recursion for '%s'\n", d->d_name); 50 return; 51 } 52 if (d->d_first != NULL) { 53 fprintf(stderr, "Re-calculating FIRST set for '%s' ???\n", 54 d->d_name); 55 } 56 d->d_first = dummy; 57 58 l0 = g->g_ll.ll_label; 59 nbits = g->g_ll.ll_nlabels; 60 result = newbitset(nbits); 61 62 sym = (int *)PyObject_MALLOC(sizeof(int)); 63 if (sym == NULL) 64 Py_FatalError("no mem for new sym in calcfirstset"); 65 nsyms = 1; 66 sym[0] = findlabel(&g->g_ll, d->d_type, (char *)NULL); 67 68 s = &d->d_state[d->d_initial]; 69 for (i = 0; i < s->s_narcs; i++) { 70 a = &s->s_arc[i]; 71 for (j = 0; j < nsyms; j++) { 72 if (sym[j] == a->a_lbl) 73 break; 74 } 75 if (j >= nsyms) { /* New label */ 76 sym = (int *)PyObject_REALLOC(sym, 77 sizeof(int) * (nsyms + 1)); 78 if (sym == NULL) 79 Py_FatalError( 80 "no mem to resize sym in calcfirstset"); 81 sym[nsyms++] = a->a_lbl; 82 type = l0[a->a_lbl].lb_type; 83 if (ISNONTERMINAL(type)) { 84 d1 = PyGrammar_FindDFA(g, type); 85 if (d1->d_first == dummy) { 86 fprintf(stderr, 87 "Left-recursion below '%s'\n", 88 d->d_name); 89 } 90 else { 91 if (d1->d_first == NULL) 92 calcfirstset(g, d1); 93 mergebitset(result, 94 d1->d_first, nbits); 95 } 96 } 97 else if (ISTERMINAL(type)) { 98 addbit(result, a->a_lbl); 99 } 100 } 101 } 102 d->d_first = result; 103 if (Py_DebugFlag) { 104 printf("FIRST set for '%s': {", d->d_name); 105 for (i = 0; i < nbits; i++) { 106 if (testbit(result, i)) 107 printf(" %s", PyGrammar_LabelRepr(&l0[i])); 108 } 109 printf(" }\n"); 110 } 111 112 PyObject_FREE(sym); 113 113 } -
python/trunk/Parser/grammar.c
r2 r391 19 19 newgrammar(int start) 20 20 { 21 grammar *g;22 23 g = (grammar *)PyObject_MALLOC(sizeof(grammar));24 if (g == NULL)25 Py_FatalError("no mem for new grammar");26 g->g_ndfas = 0;27 g->g_dfa = NULL;28 g->g_start = start;29 g->g_ll.ll_nlabels = 0;30 g->g_ll.ll_label = NULL;31 g->g_accel = 0;32 return g;21 grammar *g; 22 23 g = (grammar *)PyObject_MALLOC(sizeof(grammar)); 24 if (g == NULL) 25 Py_FatalError("no mem for new grammar"); 26 g->g_ndfas = 0; 27 g->g_dfa = NULL; 28 g->g_start = start; 29 g->g_ll.ll_nlabels = 0; 30 g->g_ll.ll_label = NULL; 31 g->g_accel = 0; 32 return g; 33 33 } 34 34 … … 36 36 adddfa(grammar *g, int type, char *name) 37 37 { 38 dfa *d;39 40 g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa, 41 sizeof(dfa) * (g->g_ndfas + 1));42 if (g->g_dfa == NULL)43 Py_FatalError("no mem to resize dfa in adddfa");44 d = &g->g_dfa[g->g_ndfas++];45 d->d_type = type;46 d->d_name = strdup(name);47 d->d_nstates = 0;48 d->d_state = NULL;49 d->d_initial = -1;50 d->d_first = NULL;51 return d; /* Only use while fresh! */38 dfa *d; 39 40 g->g_dfa = (dfa *)PyObject_REALLOC(g->g_dfa, 41 sizeof(dfa) * (g->g_ndfas + 1)); 42 if (g->g_dfa == NULL) 43 Py_FatalError("no mem to resize dfa in adddfa"); 44 d = &g->g_dfa[g->g_ndfas++]; 45 d->d_type = type; 46 d->d_name = strdup(name); 47 d->d_nstates = 0; 48 d->d_state = NULL; 49 d->d_initial = -1; 50 d->d_first = NULL; 51 return d; /* Only use while fresh! */ 52 52 } 53 53 … … 55 55 addstate(dfa *d) 56 56 { 57 state *s;58 59 d->d_state = (state *)PyObject_REALLOC(d->d_state,60 sizeof(state) * (d->d_nstates + 1));61 if (d->d_state == NULL)62 Py_FatalError("no mem to resize state in addstate");63 s = &d->d_state[d->d_nstates++];64 s->s_narcs = 0;65 s->s_arc = NULL;66 s->s_lower = 0;67 s->s_upper = 0;68 s->s_accel = NULL;69 s->s_accept = 0;70 return s - d->d_state;57 state *s; 58 59 d->d_state = (state *)PyObject_REALLOC(d->d_state, 60 sizeof(state) * (d->d_nstates + 1)); 61 if (d->d_state == NULL) 62 Py_FatalError("no mem to resize state in addstate"); 63 s = &d->d_state[d->d_nstates++]; 64 s->s_narcs = 0; 65 s->s_arc = NULL; 66 s->s_lower = 0; 67 s->s_upper = 0; 68 s->s_accel = NULL; 69 s->s_accept = 0; 70 return s - d->d_state; 71 71 } 72 72 … … 74 74 addarc(dfa *d, int from, int to, int lbl) 75 75 { 76 state *s;77 arc *a;78 79 assert(0 <= from && from < d->d_nstates);80 assert(0 <= to && to < d->d_nstates);81 82 s = &d->d_state[from];83 s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1));84 if (s->s_arc == NULL)85 Py_FatalError("no mem to resize arc list in addarc");86 a = &s->s_arc[s->s_narcs++];87 a->a_lbl = lbl;88 a->a_arrow = to;76 state *s; 77 arc *a; 78 79 assert(0 <= from && from < d->d_nstates); 80 assert(0 <= to && to < d->d_nstates); 81 82 s = &d->d_state[from]; 83 s->s_arc = (arc *)PyObject_REALLOC(s->s_arc, sizeof(arc) * (s->s_narcs + 1)); 84 if (s->s_arc == NULL) 85 Py_FatalError("no mem to resize arc list in addarc"); 86 a = &s->s_arc[s->s_narcs++]; 87 a->a_lbl = lbl; 88 a->a_arrow = to; 89 89 } 90 90 … … 92 92 addlabel(labellist *ll, int type, char *str) 93 93 { 94 int i;95 label *lb;96 97 for (i = 0; i < ll->ll_nlabels; i++) {98 if (ll->ll_label[i].lb_type == type &&99 strcmp(ll->ll_label[i].lb_str, str) == 0)100 return i;101 }102 ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label,103 sizeof(label) * (ll->ll_nlabels + 1));104 if (ll->ll_label == NULL)105 Py_FatalError("no mem to resize labellist in addlabel");106 lb = &ll->ll_label[ll->ll_nlabels++];107 lb->lb_type = type;108 lb->lb_str = strdup(str);109 if (Py_DebugFlag)110 printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels,111 PyGrammar_LabelRepr(lb));112 return lb - ll->ll_label;94 int i; 95 label *lb; 96 97 for (i = 0; i < ll->ll_nlabels; i++) { 98 if (ll->ll_label[i].lb_type == type && 99 strcmp(ll->ll_label[i].lb_str, str) == 0) 100 return i; 101 } 102 ll->ll_label = (label *)PyObject_REALLOC(ll->ll_label, 103 sizeof(label) * (ll->ll_nlabels + 1)); 104 if (ll->ll_label == NULL) 105 Py_FatalError("no mem to resize labellist in addlabel"); 106 lb = &ll->ll_label[ll->ll_nlabels++]; 107 lb->lb_type = type; 108 lb->lb_str = strdup(str); 109 if (Py_DebugFlag) 110 printf("Label @ %8p, %d: %s\n", ll, ll->ll_nlabels, 111 PyGrammar_LabelRepr(lb)); 112 return lb - ll->ll_label; 113 113 } 114 114 … … 118 118 findlabel(labellist *ll, int type, char *str) 119 119 { 120 int i;121 122 for (i = 0; i < ll->ll_nlabels; i++) {123 if (ll->ll_label[i].lb_type == type /*&&124 strcmp(ll->ll_label[i].lb_str, str) == 0*/)125 return i;126 }127 fprintf(stderr, "Label %d/'%s' not found\n", type, str);128 Py_FatalError("grammar.c:findlabel()");129 return 0; /* Make gcc -Wall happy */120 int i; 121 122 for (i = 0; i < ll->ll_nlabels; i++) { 123 if (ll->ll_label[i].lb_type == type /*&& 124 strcmp(ll->ll_label[i].lb_str, str) == 0*/) 125 return i; 126 } 127 fprintf(stderr, "Label %d/'%s' not found\n", type, str); 128 Py_FatalError("grammar.c:findlabel()"); 129 return 0; /* Make gcc -Wall happy */ 130 130 } 131 131 … … 136 136 translatelabels(grammar *g) 137 137 { 138 int i;138 int i; 139 139 140 140 #ifdef Py_DEBUG 141 printf("Translating labels ...\n");141 printf("Translating labels ...\n"); 142 142 #endif 143 /* Don't translate EMPTY */144 for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++)145 translabel(g, &g->g_ll.ll_label[i]);143 /* Don't translate EMPTY */ 144 for (i = EMPTY+1; i < g->g_ll.ll_nlabels; i++) 145 translabel(g, &g->g_ll.ll_label[i]); 146 146 } 147 147 … … 149 149 translabel(grammar *g, label *lb) 150 150 { 151 int i;152 153 if (Py_DebugFlag)154 printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb));155 156 if (lb->lb_type == NAME) {157 for (i = 0; i < g->g_ndfas; i++) {158 if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) {159 if (Py_DebugFlag)160 printf(161 "Label %s is non-terminal %d.\n",162 lb->lb_str,163 g->g_dfa[i].d_type);164 lb->lb_type = g->g_dfa[i].d_type;165 free(lb->lb_str);166 lb->lb_str = NULL;167 return;168 }169 }170 for (i = 0; i < (int)N_TOKENS; i++) {171 if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) {172 if (Py_DebugFlag)173 printf("Label %s is terminal %d.\n",174 lb->lb_str, i);175 lb->lb_type = i;176 free(lb->lb_str);177 lb->lb_str = NULL;178 return;179 }180 }181 printf("Can't translate NAME label '%s'\n", lb->lb_str);182 return;183 }184 185 if (lb->lb_type == STRING) {186 if (isalpha(Py_CHARMASK(lb->lb_str[1])) ||187 lb->lb_str[1] == '_') {188 char *p;189 char *src;190 char *dest;191 size_t name_len;192 if (Py_DebugFlag)193 printf("Label %s is a keyword\n", lb->lb_str);194 lb->lb_type = NAME;195 src = lb->lb_str + 1;196 p = strchr(src, '\'');197 if (p)198 name_len = p - src;199 else200 name_len = strlen(src);201 dest = (char *)malloc(name_len + 1);202 if (!dest) {203 printf("Can't alloc dest '%s'\n", src);204 return;205 }206 strncpy(dest, src, name_len);207 dest[name_len] = '\0';208 free(lb->lb_str);209 lb->lb_str = dest;210 }211 else if (lb->lb_str[2] == lb->lb_str[0]) {212 int type = (int) PyToken_OneChar(lb->lb_str[1]);213 if (type != OP) {214 lb->lb_type = type;215 free(lb->lb_str);216 lb->lb_str = NULL;217 }218 else219 printf("Unknown OP label %s\n",220 lb->lb_str);221 }222 else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) {223 int type = (int) PyToken_TwoChars(lb->lb_str[1],224 lb->lb_str[2]);225 if (type != OP) {226 lb->lb_type = type;227 free(lb->lb_str);228 lb->lb_str = NULL;229 }230 else231 printf("Unknown OP label %s\n",232 lb->lb_str);233 }234 else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) {235 int type = (int) PyToken_ThreeChars(lb->lb_str[1],236 lb->lb_str[2],237 lb->lb_str[3]);238 if (type != OP) {239 lb->lb_type = type;240 free(lb->lb_str);241 lb->lb_str = NULL;242 }243 else244 printf("Unknown OP label %s\n",245 lb->lb_str);246 }247 else248 printf("Can't translate STRING label %s\n",249 lb->lb_str);250 }251 else252 printf("Can't translate label '%s'\n",253 PyGrammar_LabelRepr(lb));254 } 151 int i; 152 153 if (Py_DebugFlag) 154 printf("Translating label %s ...\n", PyGrammar_LabelRepr(lb)); 155 156 if (lb->lb_type == NAME) { 157 for (i = 0; i < g->g_ndfas; i++) { 158 if (strcmp(lb->lb_str, g->g_dfa[i].d_name) == 0) { 159 if (Py_DebugFlag) 160 printf( 161 "Label %s is non-terminal %d.\n", 162 lb->lb_str, 163 g->g_dfa[i].d_type); 164 lb->lb_type = g->g_dfa[i].d_type; 165 free(lb->lb_str); 166 lb->lb_str = NULL; 167 return; 168 } 169 } 170 for (i = 0; i < (int)N_TOKENS; i++) { 171 if (strcmp(lb->lb_str, _PyParser_TokenNames[i]) == 0) { 172 if (Py_DebugFlag) 173 printf("Label %s is terminal %d.\n", 174 lb->lb_str, i); 175 lb->lb_type = i; 176 free(lb->lb_str); 177 lb->lb_str = NULL; 178 return; 179 } 180 } 181 printf("Can't translate NAME label '%s'\n", lb->lb_str); 182 return; 183 } 184 185 if (lb->lb_type == STRING) { 186 if (isalpha(Py_CHARMASK(lb->lb_str[1])) || 187 lb->lb_str[1] == '_') { 188 char *p; 189 char *src; 190 char *dest; 191 size_t name_len; 192 if (Py_DebugFlag) 193 printf("Label %s is a keyword\n", lb->lb_str); 194 lb->lb_type = NAME; 195 src = lb->lb_str + 1; 196 p = strchr(src, '\''); 197 if (p) 198 name_len = p - src; 199 else 200 name_len = strlen(src); 201 dest = (char *)malloc(name_len + 1); 202 if (!dest) { 203 printf("Can't alloc dest '%s'\n", src); 204 return; 205 } 206 strncpy(dest, src, name_len); 207 dest[name_len] = '\0'; 208 free(lb->lb_str); 209 lb->lb_str = dest; 210 } 211 else if (lb->lb_str[2] == lb->lb_str[0]) { 212 int type = (int) PyToken_OneChar(lb->lb_str[1]); 213 if (type != OP) { 214 lb->lb_type = type; 215 free(lb->lb_str); 216 lb->lb_str = NULL; 217 } 218 else 219 printf("Unknown OP label %s\n", 220 lb->lb_str); 221 } 222 else if (lb->lb_str[2] && lb->lb_str[3] == lb->lb_str[0]) { 223 int type = (int) PyToken_TwoChars(lb->lb_str[1], 224 lb->lb_str[2]); 225 if (type != OP) { 226 lb->lb_type = type; 227 free(lb->lb_str); 228 lb->lb_str = NULL; 229 } 230 else 231 printf("Unknown OP label %s\n", 232 lb->lb_str); 233 } 234 else if (lb->lb_str[2] && lb->lb_str[3] && lb->lb_str[4] == lb->lb_str[0]) { 235 int type = (int) PyToken_ThreeChars(lb->lb_str[1], 236 lb->lb_str[2], 237 lb->lb_str[3]); 238 if (type != OP) { 239 lb->lb_type = type; 240 free(lb->lb_str); 241 lb->lb_str = NULL; 242 } 243 else 244 printf("Unknown OP label %s\n", 245 lb->lb_str); 246 } 247 else 248 printf("Can't translate STRING label %s\n", 249 lb->lb_str); 250 } 251 else 252 printf("Can't translate label '%s'\n", 253 PyGrammar_LabelRepr(lb)); 254 } -
python/trunk/Parser/grammar1.c
r2 r391 12 12 PyGrammar_FindDFA(grammar *g, register int type) 13 13 { 14 register dfa *d;14 register dfa *d; 15 15 #if 1 16 /* Massive speed-up */17 d = &g->g_dfa[type - NT_OFFSET];18 assert(d->d_type == type);19 return d;16 /* Massive speed-up */ 17 d = &g->g_dfa[type - NT_OFFSET]; 18 assert(d->d_type == type); 19 return d; 20 20 #else 21 /* Old, slow version */22 register int i;23 24 for (i = g->g_ndfas, d = g->g_dfa; --i >= 0; d++) {25 if (d->d_type == type)26 return d;27 }28 assert(0);29 /* NOTREACHED */21 /* Old, slow version */ 22 register int i; 23 24 for (i = g->g_ndfas, d = g->g_dfa; --i >= 0; d++) { 25 if (d->d_type == type) 26 return d; 27 } 28 assert(0); 29 /* NOTREACHED */ 30 30 #endif 31 31 } … … 34 34 PyGrammar_LabelRepr(label *lb) 35 35 { 36 static char buf[100];37 38 if (lb->lb_type == ENDMARKER)39 return "EMPTY";40 else if (ISNONTERMINAL(lb->lb_type)) {41 if (lb->lb_str == NULL) {42 PyOS_snprintf(buf, sizeof(buf), "NT%d", lb->lb_type);43 return buf;44 }45 else46 return lb->lb_str;47 }48 else {49 if (lb->lb_str == NULL)50 return _PyParser_TokenNames[lb->lb_type];51 else {52 PyOS_snprintf(buf, sizeof(buf), "%.32s(%.32s)",53 _PyParser_TokenNames[lb->lb_type], lb->lb_str);54 return buf;55 }56 }36 static char buf[100]; 37 38 if (lb->lb_type == ENDMARKER) 39 return "EMPTY"; 40 else if (ISNONTERMINAL(lb->lb_type)) { 41 if (lb->lb_str == NULL) { 42 PyOS_snprintf(buf, sizeof(buf), "NT%d", lb->lb_type); 43 return buf; 44 } 45 else 46 return lb->lb_str; 47 } 48 else { 49 if (lb->lb_str == NULL) 50 return _PyParser_TokenNames[lb->lb_type]; 51 else { 52 PyOS_snprintf(buf, sizeof(buf), "%.32s(%.32s)", 53 _PyParser_TokenNames[lb->lb_type], lb->lb_str); 54 return buf; 55 } 56 } 57 57 } -
python/trunk/Parser/intrcheck.c
r2 r391 22 22 PyOS_InterruptOccurred(void) 23 23 { 24 _wyield();24 _wyield(); 25 25 } 26 26 … … 48 48 PyOS_InitInterrupts(void) 49 49 { 50 _go32_want_ctrl_break(1 /* TRUE */);50 _go32_want_ctrl_break(1 /* TRUE */); 51 51 } 52 52 … … 59 59 PyOS_InterruptOccurred(void) 60 60 { 61 return _go32_was_ctrl_break_hit();61 return _go32_was_ctrl_break_hit(); 62 62 } 63 63 … … 79 79 PyOS_InterruptOccurred(void) 80 80 { 81 int interrupted = 0;82 while (kbhit()) {83 if (getch() == '\003')84 interrupted = 1;85 }86 return interrupted;81 int interrupted = 0; 82 while (kbhit()) { 83 if (getch() == '\003') 84 interrupted = 1; 85 } 86 return interrupted; 87 87 } 88 88 … … 107 107 PyErr_SetInterrupt(void) 108 108 { 109 interrupted = 1;109 interrupted = 1; 110 110 } 111 111 … … 115 115 checksignals_witharg(void * arg) 116 116 { 117 return PyErr_CheckSignals();117 return PyErr_CheckSignals(); 118 118 } 119 119 … … 121 121 intcatcher(int sig) 122 122 { 123 extern void Py_Exit(int);124 static char message[] =123 extern void Py_Exit(int); 124 static char message[] = 125 125 "python: to interrupt a truly hanging Python program, interrupt once more.\n"; 126 switch (interrupted++) {127 case 0:128 break;129 case 1:126 switch (interrupted++) { 127 case 0: 128 break; 129 case 1: 130 130 #ifdef RISCOS 131 fprintf(stderr, message);131 fprintf(stderr, message); 132 132 #else 133 write(2, message, strlen(message));133 write(2, message, strlen(message)); 134 134 #endif 135 break;136 case 2:137 interrupted = 0;138 Py_Exit(1);139 break;140 }141 PyOS_setsig(SIGINT, intcatcher);142 Py_AddPendingCall(checksignals_witharg, NULL);135 break; 136 case 2: 137 interrupted = 0; 138 Py_Exit(1); 139 break; 140 } 141 PyOS_setsig(SIGINT, intcatcher); 142 Py_AddPendingCall(checksignals_witharg, NULL); 143 143 } 144 144 … … 148 148 PyOS_InitInterrupts(void) 149 149 { 150 if ((old_siginthandler = PyOS_setsig(SIGINT, SIG_IGN)) != SIG_IGN)151 PyOS_setsig(SIGINT, intcatcher);150 if ((old_siginthandler = PyOS_setsig(SIGINT, SIG_IGN)) != SIG_IGN) 151 PyOS_setsig(SIGINT, intcatcher); 152 152 } 153 153 … … 155 155 PyOS_FiniInterrupts(void) 156 156 { 157 PyOS_setsig(SIGINT, old_siginthandler);157 PyOS_setsig(SIGINT, old_siginthandler); 158 158 } 159 159 … … 161 161 PyOS_InterruptOccurred(void) 162 162 { 163 if (!interrupted)164 return 0;165 interrupted = 0;166 return 1;163 if (!interrupted) 164 return 0; 165 interrupted = 0; 166 return 1; 167 167 } 168 168 … … 173 173 { 174 174 #ifdef WITH_THREAD 175 PyEval_ReInitThreads();176 PyThread_ReInitTLS();175 PyEval_ReInitThreads(); 176 PyThread_ReInitTLS(); 177 177 #endif 178 178 } -
python/trunk/Parser/listnode.c
r2 r391 13 13 PyNode_ListTree(node *n) 14 14 { 15 listnode(stdout, n);15 listnode(stdout, n); 16 16 } 17 17 … … 21 21 listnode(FILE *fp, node *n) 22 22 { 23 level = 0;24 atbol = 1;25 list1node(fp, n);23 level = 0; 24 atbol = 1; 25 list1node(fp, n); 26 26 } 27 27 … … 29 29 list1node(FILE *fp, node *n) 30 30 { 31 if (n == 0)32 return;33 if (ISNONTERMINAL(TYPE(n))) {34 int i;35 for (i = 0; i < NCH(n); i++)36 list1node(fp, CHILD(n, i));37 }38 else if (ISTERMINAL(TYPE(n))) {39 switch (TYPE(n)) {40 case INDENT:41 ++level;42 break;43 case DEDENT:44 --level;45 break;46 default:47 if (atbol) {48 int i;49 for (i = 0; i < level; ++i)50 fprintf(fp, "\t");51 atbol = 0;52 }53 if (TYPE(n) == NEWLINE) {54 if (STR(n) != NULL)55 fprintf(fp, "%s", STR(n));56 fprintf(fp, "\n");57 atbol = 1;58 }59 else60 fprintf(fp, "%s ", STR(n));61 break;62 }63 }64 else65 fprintf(fp, "? ");31 if (n == 0) 32 return; 33 if (ISNONTERMINAL(TYPE(n))) { 34 int i; 35 for (i = 0; i < NCH(n); i++) 36 list1node(fp, CHILD(n, i)); 37 } 38 else if (ISTERMINAL(TYPE(n))) { 39 switch (TYPE(n)) { 40 case INDENT: 41 ++level; 42 break; 43 case DEDENT: 44 --level; 45 break; 46 default: 47 if (atbol) { 48 int i; 49 for (i = 0; i < level; ++i) 50 fprintf(fp, "\t"); 51 atbol = 0; 52 } 53 if (TYPE(n) == NEWLINE) { 54 if (STR(n) != NULL) 55 fprintf(fp, "%s", STR(n)); 56 fprintf(fp, "\n"); 57 atbol = 1; 58 } 59 else 60 fprintf(fp, "%s ", STR(n)); 61 break; 62 } 63 } 64 else 65 fprintf(fp, "? "); 66 66 } -
python/trunk/Parser/metagrammar.c
r2 r391 5 5 #include "pgen.h" 6 6 static arc arcs_0_0[3] = { 7 {2, 0},8 {3, 0},9 {4, 1},7 {2, 0}, 8 {3, 0}, 9 {4, 1}, 10 10 }; 11 11 static arc arcs_0_1[1] = { 12 {0, 1},12 {0, 1}, 13 13 }; 14 14 static state states_0[2] = { 15 {3, arcs_0_0},16 {1, arcs_0_1},15 {3, arcs_0_0}, 16 {1, arcs_0_1}, 17 17 }; 18 18 static arc arcs_1_0[1] = { 19 {5, 1},19 {5, 1}, 20 20 }; 21 21 static arc arcs_1_1[1] = { 22 {6, 2},22 {6, 2}, 23 23 }; 24 24 static arc arcs_1_2[1] = { 25 {7, 3},25 {7, 3}, 26 26 }; 27 27 static arc arcs_1_3[1] = { 28 {3, 4},28 {3, 4}, 29 29 }; 30 30 static arc arcs_1_4[1] = { 31 {0, 4},31 {0, 4}, 32 32 }; 33 33 static state states_1[5] = { 34 {1, arcs_1_0},35 {1, arcs_1_1},36 {1, arcs_1_2},37 {1, arcs_1_3},38 {1, arcs_1_4},34 {1, arcs_1_0}, 35 {1, arcs_1_1}, 36 {1, arcs_1_2}, 37 {1, arcs_1_3}, 38 {1, arcs_1_4}, 39 39 }; 40 40 static arc arcs_2_0[1] = { 41 {8, 1},41 {8, 1}, 42 42 }; 43 43 static arc arcs_2_1[2] = { 44 {9, 0},45 {0, 1},44 {9, 0}, 45 {0, 1}, 46 46 }; 47 47 static state states_2[2] = { 48 {1, arcs_2_0},49 {2, arcs_2_1},48 {1, arcs_2_0}, 49 {2, arcs_2_1}, 50 50 }; 51 51 static arc arcs_3_0[1] = { 52 {10, 1},52 {10, 1}, 53 53 }; 54 54 static arc arcs_3_1[2] = { 55 {10, 1},56 {0, 1},55 {10, 1}, 56 {0, 1}, 57 57 }; 58 58 static state states_3[2] = { 59 {1, arcs_3_0},60 {2, arcs_3_1},59 {1, arcs_3_0}, 60 {2, arcs_3_1}, 61 61 }; 62 62 static arc arcs_4_0[2] = { 63 {11, 1},64 {13, 2},63 {11, 1}, 64 {13, 2}, 65 65 }; 66 66 static arc arcs_4_1[1] = { 67 {7, 3},67 {7, 3}, 68 68 }; 69 69 static arc arcs_4_2[3] = { 70 {14, 4},71 {15, 4},72 {0, 2},70 {14, 4}, 71 {15, 4}, 72 {0, 2}, 73 73 }; 74 74 static arc arcs_4_3[1] = { 75 {12, 4},75 {12, 4}, 76 76 }; 77 77 static arc arcs_4_4[1] = { 78 {0, 4},78 {0, 4}, 79 79 }; 80 80 static state states_4[5] = { 81 {2, arcs_4_0},82 {1, arcs_4_1},83 {3, arcs_4_2},84 {1, arcs_4_3},85 {1, arcs_4_4},81 {2, arcs_4_0}, 82 {1, arcs_4_1}, 83 {3, arcs_4_2}, 84 {1, arcs_4_3}, 85 {1, arcs_4_4}, 86 86 }; 87 87 static arc arcs_5_0[3] = { 88 {5, 1},89 {16, 1},90 {17, 2},88 {5, 1}, 89 {16, 1}, 90 {17, 2}, 91 91 }; 92 92 static arc arcs_5_1[1] = { 93 {0, 1},93 {0, 1}, 94 94 }; 95 95 static arc arcs_5_2[1] = { 96 {7, 3},96 {7, 3}, 97 97 }; 98 98 static arc arcs_5_3[1] = { 99 {18, 1},99 {18, 1}, 100 100 }; 101 101 static state states_5[4] = { 102 {3, arcs_5_0},103 {1, arcs_5_1},104 {1, arcs_5_2},105 {1, arcs_5_3},102 {3, arcs_5_0}, 103 {1, arcs_5_1}, 104 {1, arcs_5_2}, 105 {1, arcs_5_3}, 106 106 }; 107 107 static dfa dfas[6] = { 108 {256, "MSTART", 0, 2, states_0,109 "\070\000\000"},110 {257, "RULE", 0, 5, states_1,111 "\040\000\000"},112 {258, "RHS", 0, 2, states_2,113 "\040\010\003"},114 {259, "ALT", 0, 2, states_3,115 "\040\010\003"},116 {260, "ITEM", 0, 5, states_4,117 "\040\010\003"},118 {261, "ATOM", 0, 4, states_5,119 "\040\000\003"},108 {256, "MSTART", 0, 2, states_0, 109 "\070\000\000"}, 110 {257, "RULE", 0, 5, states_1, 111 "\040\000\000"}, 112 {258, "RHS", 0, 2, states_2, 113 "\040\010\003"}, 114 {259, "ALT", 0, 2, states_3, 115 "\040\010\003"}, 116 {260, "ITEM", 0, 5, states_4, 117 "\040\010\003"}, 118 {261, "ATOM", 0, 4, states_5, 119 "\040\000\003"}, 120 120 }; 121 121 static label labels[19] = { 122 {0, "EMPTY"},123 {256, 0},124 {257, 0},125 {4, 0},126 {0, 0},127 {1, 0},128 {11, 0},129 {258, 0},130 {259, 0},131 {18, 0},132 {260, 0},133 {9, 0},134 {10, 0},135 {261, 0},136 {16, 0},137 {14, 0},138 {3, 0},139 {7, 0},140 {8, 0},122 {0, "EMPTY"}, 123 {256, 0}, 124 {257, 0}, 125 {4, 0}, 126 {0, 0}, 127 {1, 0}, 128 {11, 0}, 129 {258, 0}, 130 {259, 0}, 131 {18, 0}, 132 {260, 0}, 133 {9, 0}, 134 {10, 0}, 135 {261, 0}, 136 {16, 0}, 137 {14, 0}, 138 {3, 0}, 139 {7, 0}, 140 {8, 0}, 141 141 }; 142 142 static grammar _PyParser_Grammar = { 143 6,144 dfas,145 {19, labels},146 256143 6, 144 dfas, 145 {19, labels}, 146 256 147 147 }; 148 148 … … 150 150 meta_grammar(void) 151 151 { 152 return &_PyParser_Grammar;152 return &_PyParser_Grammar; 153 153 } 154 154 -
python/trunk/Parser/myreadline.c
r2 r391 40 40 my_fgets(char *buf, int len, FILE *fp) 41 41 { 42 char *p; 43 for (;;) { 44 if (PyOS_InputHook != NULL) 45 (void)(PyOS_InputHook)(); 46 errno = 0; 47 p = fgets(buf, len, fp); 48 if (p != NULL) 49 return 0; /* No error */ 42 char *p; 50 43 #ifdef MS_WINDOWS 51 /* In the case of a Ctrl+C or some other external event 52 interrupting the operation: 53 Win2k/NT: ERROR_OPERATION_ABORTED is the most recent Win32 54 error code (and feof() returns TRUE). 55 Win9x: Ctrl+C seems to have no effect on fgets() returning 56 early - the signal handler is called, but the fgets() 57 only returns "normally" (ie, when Enter hit or feof()) 58 */ 59 if (GetLastError()==ERROR_OPERATION_ABORTED) { 60 /* Signals come asynchronously, so we sleep a brief 61 moment before checking if the handler has been 62 triggered (we cant just return 1 before the 63 signal handler has been called, as the later 64 signal may be treated as a separate interrupt). 65 */ 66 Sleep(1); 67 if (PyOS_InterruptOccurred()) { 68 return 1; /* Interrupt */ 69 } 70 /* Either the sleep wasn't long enough (need a 71 short loop retrying?) or not interrupted at all 72 (in which case we should revisit the whole thing!) 73 Logging some warning would be nice. assert is not 74 viable as under the debugger, the various dialogs 75 mean the condition is not true. 76 */ 77 } 44 int i; 45 #endif 46 47 while (1) { 48 if (PyOS_InputHook != NULL) 49 (void)(PyOS_InputHook)(); 50 errno = 0; 51 clearerr(fp); 52 p = fgets(buf, len, fp); 53 if (p != NULL) 54 return 0; /* No error */ 55 #ifdef MS_WINDOWS 56 /* Ctrl-C anywhere on the line or Ctrl-Z if the only character 57 on a line will set ERROR_OPERATION_ABORTED. Under normal 58 circumstances Ctrl-C will also have caused the SIGINT handler 59 to fire. This signal fires in another thread and is not 60 guaranteed to have occurred before this point in the code. 61 62 Therefore: check in a small loop to see if the trigger has 63 fired, in which case assume this is a Ctrl-C event. If it 64 hasn't fired within 10ms assume that this is a Ctrl-Z on its 65 own or that the signal isn't going to fire for some other 66 reason and drop through to check for EOF. 67 */ 68 if (GetLastError()==ERROR_OPERATION_ABORTED) { 69 for (i = 0; i < 10; i++) { 70 if (PyOS_InterruptOccurred()) 71 return 1; 72 Sleep(1); 73 } 74 } 78 75 #endif /* MS_WINDOWS */ 79 if (feof(fp)) { 80 return -1; /* EOF */ 81 } 76 if (feof(fp)) { 77 clearerr(fp); 78 return -1; /* EOF */ 79 } 82 80 #ifdef EINTR 83 if (errno == EINTR) { 84 int s; 85 #ifdef WITH_THREAD 86 PyEval_RestoreThread(_PyOS_ReadlineTState); 87 #endif 88 s = PyErr_CheckSignals(); 89 #ifdef WITH_THREAD 90 PyEval_SaveThread(); 91 #endif 92 if (s < 0) { 93 return 1; 94 } 95 } 96 #endif 97 if (PyOS_InterruptOccurred()) { 98 return 1; /* Interrupt */ 99 } 100 return -2; /* Error */ 101 } 102 /* NOTREACHED */ 81 if (errno == EINTR) { 82 int s; 83 #ifdef WITH_THREAD 84 PyEval_RestoreThread(_PyOS_ReadlineTState); 85 #endif 86 s = PyErr_CheckSignals(); 87 #ifdef WITH_THREAD 88 PyEval_SaveThread(); 89 #endif 90 if (s < 0) 91 return 1; 92 /* try again */ 93 continue; 94 } 95 #endif 96 if (PyOS_InterruptOccurred()) { 97 return 1; /* Interrupt */ 98 } 99 return -2; /* Error */ 100 } 101 /* NOTREACHED */ 103 102 } 104 103 … … 109 108 PyOS_StdioReadline(FILE *sys_stdin, FILE *sys_stdout, char *prompt) 110 109 { 111 size_t n;112 char *p;113 n = 100;114 if ((p = (char *)PyMem_MALLOC(n)) == NULL)115 return NULL;116 fflush(sys_stdout);110 size_t n; 111 char *p; 112 n = 100; 113 if ((p = (char *)PyMem_MALLOC(n)) == NULL) 114 return NULL; 115 fflush(sys_stdout); 117 116 #ifndef RISCOS 118 if (prompt)119 fprintf(stderr, "%s", prompt);117 if (prompt) 118 fprintf(stderr, "%s", prompt); 120 119 #else 121 if (prompt) {122 if(Py_RISCOSWimpFlag)123 fprintf(stderr, "\x0cr%s\x0c", prompt);124 else125 fprintf(stderr, "%s", prompt);126 }127 #endif 128 fflush(stderr);129 switch (my_fgets(p, (int)n, sys_stdin)) {130 case 0: /* Normal case */131 break;132 case 1: /* Interrupt */133 PyMem_FREE(p);134 return NULL;135 case -1: /* EOF */136 case -2: /* Error */137 default: /* Shouldn't happen */138 *p = '\0';139 break;140 }141 n = strlen(p);142 while (n > 0 && p[n-1] != '\n') {143 size_t incr = n+2;144 p = (char *)PyMem_REALLOC(p, n + incr);145 if (p == NULL)146 return NULL;147 if (incr > INT_MAX) {148 PyErr_SetString(PyExc_OverflowError, "input line too long");149 }150 if (my_fgets(p+n, (int)incr, sys_stdin) != 0)151 break;152 n += strlen(p+n);153 }154 return (char *)PyMem_REALLOC(p, n+1);120 if (prompt) { 121 if(Py_RISCOSWimpFlag) 122 fprintf(stderr, "\x0cr%s\x0c", prompt); 123 else 124 fprintf(stderr, "%s", prompt); 125 } 126 #endif 127 fflush(stderr); 128 switch (my_fgets(p, (int)n, sys_stdin)) { 129 case 0: /* Normal case */ 130 break; 131 case 1: /* Interrupt */ 132 PyMem_FREE(p); 133 return NULL; 134 case -1: /* EOF */ 135 case -2: /* Error */ 136 default: /* Shouldn't happen */ 137 *p = '\0'; 138 break; 139 } 140 n = strlen(p); 141 while (n > 0 && p[n-1] != '\n') { 142 size_t incr = n+2; 143 p = (char *)PyMem_REALLOC(p, n + incr); 144 if (p == NULL) 145 return NULL; 146 if (incr > INT_MAX) { 147 PyErr_SetString(PyExc_OverflowError, "input line too long"); 148 } 149 if (my_fgets(p+n, (int)incr, sys_stdin) != 0) 150 break; 151 n += strlen(p+n); 152 } 153 return (char *)PyMem_REALLOC(p, n+1); 155 154 } 156 155 … … 169 168 PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt) 170 169 { 171 char *rv;172 173 if (_PyOS_ReadlineTState == PyThreadState_GET()) {174 PyErr_SetString(PyExc_RuntimeError,175 "can't re-enter readline");176 return NULL;177 }178 179 180 if (PyOS_ReadlineFunctionPointer == NULL) {170 char *rv; 171 172 if (_PyOS_ReadlineTState == PyThreadState_GET()) { 173 PyErr_SetString(PyExc_RuntimeError, 174 "can't re-enter readline"); 175 return NULL; 176 } 177 178 179 if (PyOS_ReadlineFunctionPointer == NULL) { 181 180 #ifdef __VMS 182 PyOS_ReadlineFunctionPointer = vms__StdioReadline;181 PyOS_ReadlineFunctionPointer = vms__StdioReadline; 183 182 #else 184 PyOS_ReadlineFunctionPointer = PyOS_StdioReadline;185 #endif 186 }187 188 #ifdef WITH_THREAD 189 if (_PyOS_ReadlineLock == NULL) {190 _PyOS_ReadlineLock = PyThread_allocate_lock(); 191 }192 #endif 193 194 _PyOS_ReadlineTState = PyThreadState_GET();195 Py_BEGIN_ALLOW_THREADS196 #ifdef WITH_THREAD 197 PyThread_acquire_lock(_PyOS_ReadlineLock, 1);198 #endif 199 200 /* This is needed to handle the unlikely case that the201 * interpreter is in interactive mode *and* stdin/out are not202 * a tty. This can happen, for example if python is run like203 * this: python -i < test1.py204 */205 if (!isatty (fileno (sys_stdin)) || !isatty (fileno (sys_stdout)))206 rv = PyOS_StdioReadline (sys_stdin, sys_stdout, prompt);207 else208 rv = (*PyOS_ReadlineFunctionPointer)(sys_stdin, sys_stdout,209 prompt);210 Py_END_ALLOW_THREADS211 212 #ifdef WITH_THREAD 213 PyThread_release_lock(_PyOS_ReadlineLock);214 #endif 215 216 _PyOS_ReadlineTState = NULL;217 218 return rv;183 PyOS_ReadlineFunctionPointer = PyOS_StdioReadline; 184 #endif 185 } 186 187 #ifdef WITH_THREAD 188 if (_PyOS_ReadlineLock == NULL) { 189 _PyOS_ReadlineLock = PyThread_allocate_lock(); 190 } 191 #endif 192 193 _PyOS_ReadlineTState = PyThreadState_GET(); 194 Py_BEGIN_ALLOW_THREADS 195 #ifdef WITH_THREAD 196 PyThread_acquire_lock(_PyOS_ReadlineLock, 1); 197 #endif 198 199 /* This is needed to handle the unlikely case that the 200 * interpreter is in interactive mode *and* stdin/out are not 201 * a tty. This can happen, for example if python is run like 202 * this: python -i < test1.py 203 */ 204 if (!isatty (fileno (sys_stdin)) || !isatty (fileno (sys_stdout))) 205 rv = PyOS_StdioReadline (sys_stdin, sys_stdout, prompt); 206 else 207 rv = (*PyOS_ReadlineFunctionPointer)(sys_stdin, sys_stdout, 208 prompt); 209 Py_END_ALLOW_THREADS 210 211 #ifdef WITH_THREAD 212 PyThread_release_lock(_PyOS_ReadlineLock); 213 #endif 214 215 _PyOS_ReadlineTState = NULL; 216 217 return rv; 219 218 } -
python/trunk/Parser/node.c
r2 r391 8 8 PyNode_New(int type) 9 9 { 10 node *n = (node *) PyObject_MALLOC(1 * sizeof(node));11 if (n == NULL)12 return NULL;13 n->n_type = type;14 n->n_str = NULL;15 n->n_lineno = 0;16 n->n_nchildren = 0;17 n->n_child = NULL;18 return n;10 node *n = (node *) PyObject_MALLOC(1 * sizeof(node)); 11 if (n == NULL) 12 return NULL; 13 n->n_type = type; 14 n->n_str = NULL; 15 n->n_lineno = 0; 16 n->n_nchildren = 0; 17 n->n_child = NULL; 18 return n; 19 19 } 20 20 … … 23 23 fancy_roundup(int n) 24 24 { 25 /* Round up to the closest power of 2 >= n. */26 int result = 256;27 assert(n > 128);28 while (result < n) {29 result <<= 1;30 if (result <= 0)31 return -1;32 }33 return result;25 /* Round up to the closest power of 2 >= n. */ 26 int result = 256; 27 assert(n > 128); 28 while (result < n) { 29 result <<= 1; 30 if (result <= 0) 31 return -1; 32 } 33 return result; 34 34 } 35 35 … … 71 71 * capacity. The code is tricky to avoid that. 72 72 */ 73 #define XXXROUNDUP(n) ((n) <= 1 ? (n) : \74 (n) <= 128 ? (((n) + 3) & ~3) :\75 fancy_roundup(n))73 #define XXXROUNDUP(n) ((n) <= 1 ? (n) : \ 74 (n) <= 128 ? (((n) + 3) & ~3) : \ 75 fancy_roundup(n)) 76 76 77 77 … … 79 79 PyNode_AddChild(register node *n1, int type, char *str, int lineno, int col_offset) 80 80 { 81 const int nch = n1->n_nchildren;82 int current_capacity;83 int required_capacity;84 node *n;81 const int nch = n1->n_nchildren; 82 int current_capacity; 83 int required_capacity; 84 node *n; 85 85 86 if (nch == INT_MAX || nch < 0)87 return E_OVERFLOW;86 if (nch == INT_MAX || nch < 0) 87 return E_OVERFLOW; 88 88 89 current_capacity = XXXROUNDUP(nch);90 required_capacity = XXXROUNDUP(nch + 1);91 if (current_capacity < 0 || required_capacity < 0)92 return E_OVERFLOW;93 if (current_capacity < required_capacity) {94 if (required_capacity > PY_SIZE_MAX / sizeof(node)) {95 return E_NOMEM;96 }97 n = n1->n_child;98 n = (node *) PyObject_REALLOC(n,99 required_capacity * sizeof(node));100 if (n == NULL)101 return E_NOMEM;102 n1->n_child = n;103 }89 current_capacity = XXXROUNDUP(nch); 90 required_capacity = XXXROUNDUP(nch + 1); 91 if (current_capacity < 0 || required_capacity < 0) 92 return E_OVERFLOW; 93 if (current_capacity < required_capacity) { 94 if (required_capacity > PY_SIZE_MAX / sizeof(node)) { 95 return E_NOMEM; 96 } 97 n = n1->n_child; 98 n = (node *) PyObject_REALLOC(n, 99 required_capacity * sizeof(node)); 100 if (n == NULL) 101 return E_NOMEM; 102 n1->n_child = n; 103 } 104 104 105 n = &n1->n_child[n1->n_nchildren++];106 n->n_type = type;107 n->n_str = str;108 n->n_lineno = lineno;109 n->n_col_offset = col_offset;110 n->n_nchildren = 0;111 n->n_child = NULL;112 return 0;105 n = &n1->n_child[n1->n_nchildren++]; 106 n->n_type = type; 107 n->n_str = str; 108 n->n_lineno = lineno; 109 n->n_col_offset = col_offset; 110 n->n_nchildren = 0; 111 n->n_child = NULL; 112 return 0; 113 113 } 114 114 115 115 /* Forward */ 116 116 static void freechildren(node *); 117 static Py_ssize_t sizeofchildren(node *n); 117 118 118 119 … … 120 121 PyNode_Free(node *n) 121 122 { 122 if (n != NULL) { 123 freechildren(n); 124 PyObject_FREE(n); 125 } 123 if (n != NULL) { 124 freechildren(n); 125 PyObject_FREE(n); 126 } 127 } 128 129 Py_ssize_t 130 _PyNode_SizeOf(node *n) 131 { 132 Py_ssize_t res = 0; 133 134 if (n != NULL) 135 res = sizeof(node) + sizeofchildren(n); 136 return res; 126 137 } 127 138 … … 129 140 freechildren(node *n) 130 141 { 131 int i;132 for (i = NCH(n); --i >= 0; )133 freechildren(CHILD(n, i));134 if (n->n_child != NULL)135 PyObject_FREE(n->n_child);136 if (STR(n) != NULL)137 PyObject_FREE(STR(n));142 int i; 143 for (i = NCH(n); --i >= 0; ) 144 freechildren(CHILD(n, i)); 145 if (n->n_child != NULL) 146 PyObject_FREE(n->n_child); 147 if (STR(n) != NULL) 148 PyObject_FREE(STR(n)); 138 149 } 150 151 static Py_ssize_t 152 sizeofchildren(node *n) 153 { 154 Py_ssize_t res = 0; 155 int i; 156 for (i = NCH(n); --i >= 0; ) 157 res += sizeofchildren(CHILD(n, i)); 158 if (n->n_child != NULL) 159 /* allocated size of n->n_child array */ 160 res += XXXROUNDUP(NCH(n)) * sizeof(node); 161 if (STR(n) != NULL) 162 res += strlen(STR(n)) + 1; 163 return res; 164 } -
python/trunk/Parser/parser.c
r2 r391 30 30 s_reset(stack *s) 31 31 { 32 s->s_top = &s->s_base[MAXSTACK];32 s->s_top = &s->s_base[MAXSTACK]; 33 33 } 34 34 … … 38 38 s_push(register stack *s, dfa *d, node *parent) 39 39 { 40 register stackentry *top;41 if (s->s_top == s->s_base) {42 fprintf(stderr, "s_push: parser stack overflow\n");43 return E_NOMEM;44 }45 top = --s->s_top;46 top->s_dfa = d;47 top->s_parent = parent;48 top->s_state = 0;49 return 0;40 register stackentry *top; 41 if (s->s_top == s->s_base) { 42 fprintf(stderr, "s_push: parser stack overflow\n"); 43 return E_NOMEM; 44 } 45 top = --s->s_top; 46 top->s_dfa = d; 47 top->s_parent = parent; 48 top->s_state = 0; 49 return 0; 50 50 } 51 51 … … 55 55 s_pop(register stack *s) 56 56 { 57 if (s_empty(s))58 Py_FatalError("s_pop: parser stack underflow -- FATAL");59 s->s_top++;57 if (s_empty(s)) 58 Py_FatalError("s_pop: parser stack underflow -- FATAL"); 59 s->s_top++; 60 60 } 61 61 … … 72 72 PyParser_New(grammar *g, int start) 73 73 { 74 parser_state *ps;75 76 if (!g->g_accel)77 PyGrammar_AddAccelerators(g);78 ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state));79 if (ps == NULL)80 return NULL;81 ps->p_grammar = g;74 parser_state *ps; 75 76 if (!g->g_accel) 77 PyGrammar_AddAccelerators(g); 78 ps = (parser_state *)PyMem_MALLOC(sizeof(parser_state)); 79 if (ps == NULL) 80 return NULL; 81 ps->p_grammar = g; 82 82 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 83 ps->p_flags = 0;84 #endif 85 ps->p_tree = PyNode_New(start);86 if (ps->p_tree == NULL) {87 PyMem_FREE(ps);88 return NULL;89 }90 s_reset(&ps->p_stack);91 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree);92 return ps;83 ps->p_flags = 0; 84 #endif 85 ps->p_tree = PyNode_New(start); 86 if (ps->p_tree == NULL) { 87 PyMem_FREE(ps); 88 return NULL; 89 } 90 s_reset(&ps->p_stack); 91 (void) s_push(&ps->p_stack, PyGrammar_FindDFA(g, start), ps->p_tree); 92 return ps; 93 93 } 94 94 … … 96 96 PyParser_Delete(parser_state *ps) 97 97 { 98 /* NB If you want to save the parse tree,99 you must set p_tree to NULL before calling delparser! */100 PyNode_Free(ps->p_tree);101 PyMem_FREE(ps);98 /* NB If you want to save the parse tree, 99 you must set p_tree to NULL before calling delparser! */ 100 PyNode_Free(ps->p_tree); 101 PyMem_FREE(ps); 102 102 } 103 103 … … 108 108 shift(register stack *s, int type, char *str, int newstate, int lineno, int col_offset) 109 109 { 110 int err;111 assert(!s_empty(s));112 err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset);113 if (err)114 return err;115 s->s_top->s_state = newstate;116 return 0;110 int err; 111 assert(!s_empty(s)); 112 err = PyNode_AddChild(s->s_top->s_parent, type, str, lineno, col_offset); 113 if (err) 114 return err; 115 s->s_top->s_state = newstate; 116 return 0; 117 117 } 118 118 … … 120 120 push(register stack *s, int type, dfa *d, int newstate, int lineno, int col_offset) 121 121 { 122 int err;123 register node *n;124 n = s->s_top->s_parent;125 assert(!s_empty(s));126 err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset);127 if (err)128 return err;129 s->s_top->s_state = newstate;130 return s_push(s, d, CHILD(n, NCH(n)-1));122 int err; 123 register node *n; 124 n = s->s_top->s_parent; 125 assert(!s_empty(s)); 126 err = PyNode_AddChild(n, type, (char *)NULL, lineno, col_offset); 127 if (err) 128 return err; 129 s->s_top->s_state = newstate; 130 return s_push(s, d, CHILD(n, NCH(n)-1)); 131 131 } 132 132 … … 137 137 classify(parser_state *ps, int type, char *str) 138 138 { 139 grammar *g = ps->p_grammar;140 register int n = g->g_ll.ll_nlabels;141 142 if (type == NAME) {143 register char *s = str;144 register label *l = g->g_ll.ll_label;145 register int i;146 for (i = n; i > 0; i--, l++) {147 if (l->lb_type != NAME || l->lb_str == NULL ||148 l->lb_str[0] != s[0] ||149 strcmp(l->lb_str, s) != 0)150 continue;139 grammar *g = ps->p_grammar; 140 register int n = g->g_ll.ll_nlabels; 141 142 if (type == NAME) { 143 register char *s = str; 144 register label *l = g->g_ll.ll_label; 145 register int i; 146 for (i = n; i > 0; i--, l++) { 147 if (l->lb_type != NAME || l->lb_str == NULL || 148 l->lb_str[0] != s[0] || 149 strcmp(l->lb_str, s) != 0) 150 continue; 151 151 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 152 if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION &&153 s[0] == 'p' && strcmp(s, "print") == 0) {154 break; /* no longer a keyword */155 }156 #endif 157 D(printf("It's a keyword\n"));158 return n - i;159 }160 }161 162 {163 register label *l = g->g_ll.ll_label;164 register int i;165 for (i = n; i > 0; i--, l++) {166 if (l->lb_type == type && l->lb_str == NULL) {167 D(printf("It's a token we know\n"));168 return n - i;169 }170 }171 }172 173 D(printf("Illegal token\n"));174 return -1;152 if (ps->p_flags & CO_FUTURE_PRINT_FUNCTION && 153 s[0] == 'p' && strcmp(s, "print") == 0) { 154 break; /* no longer a keyword */ 155 } 156 #endif 157 D(printf("It's a keyword\n")); 158 return n - i; 159 } 160 } 161 162 { 163 register label *l = g->g_ll.ll_label; 164 register int i; 165 for (i = n; i > 0; i--, l++) { 166 if (l->lb_type == type && l->lb_str == NULL) { 167 D(printf("It's a token we know\n")); 168 return n - i; 169 } 170 } 171 } 172 173 D(printf("Illegal token\n")); 174 return -1; 175 175 } 176 176 … … 179 179 future_hack(parser_state *ps) 180 180 { 181 node *n = ps->p_stack.s_top->s_parent;182 node *ch, *cch;183 int i;184 185 /* from __future__ import ..., must have at least 4 children */186 n = CHILD(n, 0);187 if (NCH(n) < 4)188 return;189 ch = CHILD(n, 0);190 if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0)191 return;192 ch = CHILD(n, 1);193 if (NCH(ch) == 1 && STR(CHILD(ch, 0)) &&194 strcmp(STR(CHILD(ch, 0)), "__future__") != 0)195 return;196 ch = CHILD(n, 3);197 /* ch can be a star, a parenthesis or import_as_names */198 if (TYPE(ch) == STAR)199 return;200 if (TYPE(ch) == LPAR)201 ch = CHILD(n, 4);202 203 for (i = 0; i < NCH(ch); i += 2) {204 cch = CHILD(ch, i);205 if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) {206 char *str_ch = STR(CHILD(cch, 0));207 if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) {208 ps->p_flags |= CO_FUTURE_WITH_STATEMENT;209 } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) {210 ps->p_flags |= CO_FUTURE_PRINT_FUNCTION;211 } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) {212 ps->p_flags |= CO_FUTURE_UNICODE_LITERALS;213 }214 }215 }181 node *n = ps->p_stack.s_top->s_parent; 182 node *ch, *cch; 183 int i; 184 185 /* from __future__ import ..., must have at least 4 children */ 186 n = CHILD(n, 0); 187 if (NCH(n) < 4) 188 return; 189 ch = CHILD(n, 0); 190 if (STR(ch) == NULL || strcmp(STR(ch), "from") != 0) 191 return; 192 ch = CHILD(n, 1); 193 if (NCH(ch) == 1 && STR(CHILD(ch, 0)) && 194 strcmp(STR(CHILD(ch, 0)), "__future__") != 0) 195 return; 196 ch = CHILD(n, 3); 197 /* ch can be a star, a parenthesis or import_as_names */ 198 if (TYPE(ch) == STAR) 199 return; 200 if (TYPE(ch) == LPAR) 201 ch = CHILD(n, 4); 202 203 for (i = 0; i < NCH(ch); i += 2) { 204 cch = CHILD(ch, i); 205 if (NCH(cch) >= 1 && TYPE(CHILD(cch, 0)) == NAME) { 206 char *str_ch = STR(CHILD(cch, 0)); 207 if (strcmp(str_ch, FUTURE_WITH_STATEMENT) == 0) { 208 ps->p_flags |= CO_FUTURE_WITH_STATEMENT; 209 } else if (strcmp(str_ch, FUTURE_PRINT_FUNCTION) == 0) { 210 ps->p_flags |= CO_FUTURE_PRINT_FUNCTION; 211 } else if (strcmp(str_ch, FUTURE_UNICODE_LITERALS) == 0) { 212 ps->p_flags |= CO_FUTURE_UNICODE_LITERALS; 213 } 214 } 215 } 216 216 } 217 217 #endif /* future keyword */ … … 219 219 int 220 220 PyParser_AddToken(register parser_state *ps, register int type, char *str, 221 int lineno, int col_offset, int *expected_ret)222 { 223 register int ilabel;224 int err;225 226 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str));227 228 /* Find out which label this token is */229 ilabel = classify(ps, type, str);230 if (ilabel < 0)231 return E_SYNTAX;232 233 /* Loop until the token is shifted or an error occurred */234 for (;;) {235 /* Fetch the current dfa and state */236 register dfa *d = ps->p_stack.s_top->s_dfa;237 register state *s = &d->d_state[ps->p_stack.s_top->s_state];238 239 D(printf(" DFA '%s', state %d:",240 d->d_name, ps->p_stack.s_top->s_state));241 242 /* Check accelerator */243 if (s->s_lower <= ilabel && ilabel < s->s_upper) {244 register int x = s->s_accel[ilabel - s->s_lower];245 if (x != -1) {246 if (x & (1<<7)) {247 /* Push non-terminal */248 int nt = (x >> 8) + NT_OFFSET;249 int arrow = x & ((1<<7)-1);250 dfa *d1 = PyGrammar_FindDFA(251 ps->p_grammar, nt);252 if ((err = push(&ps->p_stack, nt, d1,253 arrow, lineno, col_offset)) > 0) {254 D(printf(" MemError: push\n"));255 return err;256 }257 D(printf(" Push ...\n"));258 continue;259 }260 261 /* Shift the token */262 if ((err = shift(&ps->p_stack, type, str,263 x, lineno, col_offset)) > 0) {264 D(printf(" MemError: shift.\n"));265 return err;266 }267 D(printf(" Shift.\n"));268 /* Pop while we are in an accept-only state */269 while (s = &d->d_state270 [ps->p_stack.s_top->s_state],271 s->s_accept && s->s_narcs == 1) {272 D(printf(" DFA '%s', state %d: "273 "Direct pop.\n",274 d->d_name,275 ps->p_stack.s_top->s_state));221 int lineno, int col_offset, int *expected_ret) 222 { 223 register int ilabel; 224 int err; 225 226 D(printf("Token %s/'%s' ... ", _PyParser_TokenNames[type], str)); 227 228 /* Find out which label this token is */ 229 ilabel = classify(ps, type, str); 230 if (ilabel < 0) 231 return E_SYNTAX; 232 233 /* Loop until the token is shifted or an error occurred */ 234 for (;;) { 235 /* Fetch the current dfa and state */ 236 register dfa *d = ps->p_stack.s_top->s_dfa; 237 register state *s = &d->d_state[ps->p_stack.s_top->s_state]; 238 239 D(printf(" DFA '%s', state %d:", 240 d->d_name, ps->p_stack.s_top->s_state)); 241 242 /* Check accelerator */ 243 if (s->s_lower <= ilabel && ilabel < s->s_upper) { 244 register int x = s->s_accel[ilabel - s->s_lower]; 245 if (x != -1) { 246 if (x & (1<<7)) { 247 /* Push non-terminal */ 248 int nt = (x >> 8) + NT_OFFSET; 249 int arrow = x & ((1<<7)-1); 250 dfa *d1 = PyGrammar_FindDFA( 251 ps->p_grammar, nt); 252 if ((err = push(&ps->p_stack, nt, d1, 253 arrow, lineno, col_offset)) > 0) { 254 D(printf(" MemError: push\n")); 255 return err; 256 } 257 D(printf(" Push ...\n")); 258 continue; 259 } 260 261 /* Shift the token */ 262 if ((err = shift(&ps->p_stack, type, str, 263 x, lineno, col_offset)) > 0) { 264 D(printf(" MemError: shift.\n")); 265 return err; 266 } 267 D(printf(" Shift.\n")); 268 /* Pop while we are in an accept-only state */ 269 while (s = &d->d_state 270 [ps->p_stack.s_top->s_state], 271 s->s_accept && s->s_narcs == 1) { 272 D(printf(" DFA '%s', state %d: " 273 "Direct pop.\n", 274 d->d_name, 275 ps->p_stack.s_top->s_state)); 276 276 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 277 if (d->d_name[0] == 'i' &&278 strcmp(d->d_name,279 "import_stmt") == 0)280 future_hack(ps);281 #endif 282 s_pop(&ps->p_stack);283 if (s_empty(&ps->p_stack)) {284 D(printf(" ACCEPT.\n"));285 return E_DONE;286 }287 d = ps->p_stack.s_top->s_dfa;288 }289 return E_OK;290 }291 }292 293 if (s->s_accept) {277 if (d->d_name[0] == 'i' && 278 strcmp(d->d_name, 279 "import_stmt") == 0) 280 future_hack(ps); 281 #endif 282 s_pop(&ps->p_stack); 283 if (s_empty(&ps->p_stack)) { 284 D(printf(" ACCEPT.\n")); 285 return E_DONE; 286 } 287 d = ps->p_stack.s_top->s_dfa; 288 } 289 return E_OK; 290 } 291 } 292 293 if (s->s_accept) { 294 294 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 295 if (d->d_name[0] == 'i' &&296 strcmp(d->d_name, "import_stmt") == 0)297 future_hack(ps);298 #endif 299 /* Pop this dfa and try again */300 s_pop(&ps->p_stack);301 D(printf(" Pop ...\n"));302 if (s_empty(&ps->p_stack)) {303 D(printf(" Error: bottom of stack.\n"));304 return E_SYNTAX;305 }306 continue;307 }308 309 /* Stuck, report syntax error */310 D(printf(" Error.\n"));311 if (expected_ret) {312 if (s->s_lower == s->s_upper - 1) {313 /* Only one possible expected token */314 *expected_ret = ps->p_grammar->315 g_ll.ll_label[s->s_lower].lb_type;316 }317 else 318 *expected_ret = -1;319 }320 return E_SYNTAX;321 }295 if (d->d_name[0] == 'i' && 296 strcmp(d->d_name, "import_stmt") == 0) 297 future_hack(ps); 298 #endif 299 /* Pop this dfa and try again */ 300 s_pop(&ps->p_stack); 301 D(printf(" Pop ...\n")); 302 if (s_empty(&ps->p_stack)) { 303 D(printf(" Error: bottom of stack.\n")); 304 return E_SYNTAX; 305 } 306 continue; 307 } 308 309 /* Stuck, report syntax error */ 310 D(printf(" Error.\n")); 311 if (expected_ret) { 312 if (s->s_lower == s->s_upper - 1) { 313 /* Only one possible expected token */ 314 *expected_ret = ps->p_grammar-> 315 g_ll.ll_label[s->s_lower].lb_type; 316 } 317 else 318 *expected_ret = -1; 319 } 320 return E_SYNTAX; 321 } 322 322 } 323 323 … … 330 330 dumptree(grammar *g, node *n) 331 331 { 332 int i;333 334 if (n == NULL)335 printf("NIL");336 else {337 label l;338 l.lb_type = TYPE(n);339 l.lb_str = STR(n);340 printf("%s", PyGrammar_LabelRepr(&l));341 if (ISNONTERMINAL(TYPE(n))) {342 printf("(");343 for (i = 0; i < NCH(n); i++) {344 if (i > 0)345 printf(",");346 dumptree(g, CHILD(n, i));347 }348 printf(")");349 }350 }332 int i; 333 334 if (n == NULL) 335 printf("NIL"); 336 else { 337 label l; 338 l.lb_type = TYPE(n); 339 l.lb_str = STR(n); 340 printf("%s", PyGrammar_LabelRepr(&l)); 341 if (ISNONTERMINAL(TYPE(n))) { 342 printf("("); 343 for (i = 0; i < NCH(n); i++) { 344 if (i > 0) 345 printf(","); 346 dumptree(g, CHILD(n, i)); 347 } 348 printf(")"); 349 } 350 } 351 351 } 352 352 … … 354 354 showtree(grammar *g, node *n) 355 355 { 356 int i;357 358 if (n == NULL)359 return;360 if (ISNONTERMINAL(TYPE(n))) {361 for (i = 0; i < NCH(n); i++)362 showtree(g, CHILD(n, i));363 }364 else if (ISTERMINAL(TYPE(n))) {365 printf("%s", _PyParser_TokenNames[TYPE(n)]);366 if (TYPE(n) == NUMBER || TYPE(n) == NAME)367 printf("(%s)", STR(n));368 printf(" ");369 }370 else371 printf("? ");356 int i; 357 358 if (n == NULL) 359 return; 360 if (ISNONTERMINAL(TYPE(n))) { 361 for (i = 0; i < NCH(n); i++) 362 showtree(g, CHILD(n, i)); 363 } 364 else if (ISTERMINAL(TYPE(n))) { 365 printf("%s", _PyParser_TokenNames[TYPE(n)]); 366 if (TYPE(n) == NUMBER || TYPE(n) == NAME) 367 printf("(%s)", STR(n)); 368 printf(" "); 369 } 370 else 371 printf("? "); 372 372 } 373 373 … … 375 375 printtree(parser_state *ps) 376 376 { 377 if (Py_DebugFlag) {378 printf("Parse tree:\n");379 dumptree(ps->p_grammar, ps->p_tree);380 printf("\n");381 printf("Tokens:\n");382 showtree(ps->p_grammar, ps->p_tree);383 printf("\n");384 }385 printf("Listing:\n");386 PyNode_ListTree(ps->p_tree);387 printf("\n");377 if (Py_DebugFlag) { 378 printf("Parse tree:\n"); 379 dumptree(ps->p_grammar, ps->p_tree); 380 printf("\n"); 381 printf("Tokens:\n"); 382 showtree(ps->p_grammar, ps->p_tree); 383 printf("\n"); 384 } 385 printf("Listing:\n"); 386 PyNode_ListTree(ps->p_tree); 387 printf("\n"); 388 388 } 389 389 … … 420 420 As an example, consider this grammar: 421 421 422 expr: term (OP term)*423 term: CONSTANT | '(' expr ')'422 expr: term (OP term)* 423 term: CONSTANT | '(' expr ')' 424 424 425 425 The DFA corresponding to the rule for expr is: 426 426 427 427 ------->.---term-->.-------> 428 ^ |429 | |430 \----OP----/428 ^ | 429 | | 430 \----OP----/ 431 431 432 432 The parse tree generated for the input a+b is: -
python/trunk/Parser/parsetok.c
r2 r391 22 22 PyParser_ParseString(const char *s, grammar *g, int start, perrdetail *err_ret) 23 23 { 24 return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0);24 return PyParser_ParseStringFlagsFilename(s, NULL, g, start, err_ret, 0); 25 25 } 26 26 27 27 node * 28 28 PyParser_ParseStringFlags(const char *s, grammar *g, int start, 29 perrdetail *err_ret, int flags)30 { 31 return PyParser_ParseStringFlagsFilename(s, NULL,32 g, start, err_ret, flags);29 perrdetail *err_ret, int flags) 30 { 31 return PyParser_ParseStringFlagsFilename(s, NULL, 32 g, start, err_ret, flags); 33 33 } 34 34 35 35 node * 36 36 PyParser_ParseStringFlagsFilename(const char *s, const char *filename, 37 grammar *g, int start,38 perrdetail *err_ret, int flags)39 { 40 int iflags = flags;41 return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start,42 err_ret, &iflags);37 grammar *g, int start, 38 perrdetail *err_ret, int flags) 39 { 40 int iflags = flags; 41 return PyParser_ParseStringFlagsFilenameEx(s, filename, g, start, 42 err_ret, &iflags); 43 43 } 44 44 45 45 node * 46 46 PyParser_ParseStringFlagsFilenameEx(const char *s, const char *filename, 47 grammar *g, int start,48 perrdetail *err_ret, int *flags)49 { 50 struct tok_state *tok;51 52 initerr(err_ret, filename);53 54 if ((tok = PyTokenizer_FromString(s)) == NULL) {55 err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM;56 return NULL;57 }58 59 tok->filename = filename ? filename : "<string>";60 if (Py_TabcheckFlag || Py_VerboseFlag) {61 tok->altwarning = (tok->filename != NULL);62 if (Py_TabcheckFlag >= 2)63 tok->alterror++;64 }65 66 return parsetok(tok, g, start, err_ret, flags);47 grammar *g, int start, 48 perrdetail *err_ret, int *flags) 49 { 50 struct tok_state *tok; 51 52 initerr(err_ret, filename); 53 54 if ((tok = PyTokenizer_FromString(s, start == file_input)) == NULL) { 55 err_ret->error = PyErr_Occurred() ? E_DECODE : E_NOMEM; 56 return NULL; 57 } 58 59 tok->filename = filename ? filename : "<string>"; 60 if (Py_TabcheckFlag || Py_VerboseFlag) { 61 tok->altwarning = (tok->filename != NULL); 62 if (Py_TabcheckFlag >= 2) 63 tok->alterror++; 64 } 65 66 return parsetok(tok, g, start, err_ret, flags); 67 67 } 68 68 … … 71 71 node * 72 72 PyParser_ParseFile(FILE *fp, const char *filename, grammar *g, int start, 73 char *ps1, char *ps2, perrdetail *err_ret)74 { 75 return PyParser_ParseFileFlags(fp, filename, g, start, ps1, ps2,76 err_ret, 0);73 char *ps1, char *ps2, perrdetail *err_ret) 74 { 75 return PyParser_ParseFileFlags(fp, filename, g, start, ps1, ps2, 76 err_ret, 0); 77 77 } 78 78 79 79 node * 80 80 PyParser_ParseFileFlags(FILE *fp, const char *filename, grammar *g, int start, 81 char *ps1, char *ps2, perrdetail *err_ret, int flags)82 { 83 int iflags = flags;84 return PyParser_ParseFileFlagsEx(fp, filename, g, start, ps1, ps2, err_ret, &iflags);81 char *ps1, char *ps2, perrdetail *err_ret, int flags) 82 { 83 int iflags = flags; 84 return PyParser_ParseFileFlagsEx(fp, filename, g, start, ps1, ps2, err_ret, &iflags); 85 85 } 86 86 87 87 node * 88 88 PyParser_ParseFileFlagsEx(FILE *fp, const char *filename, grammar *g, int start, 89 char *ps1, char *ps2, perrdetail *err_ret, int *flags)90 { 91 struct tok_state *tok;92 93 initerr(err_ret, filename);94 95 if ((tok = PyTokenizer_FromFile(fp, ps1, ps2)) == NULL) {96 err_ret->error = E_NOMEM;97 return NULL;98 }99 tok->filename = filename;100 if (Py_TabcheckFlag || Py_VerboseFlag) {101 tok->altwarning = (filename != NULL);102 if (Py_TabcheckFlag >= 2)103 tok->alterror++;104 }105 106 return parsetok(tok, g, start, err_ret, flags);89 char *ps1, char *ps2, perrdetail *err_ret, int *flags) 90 { 91 struct tok_state *tok; 92 93 initerr(err_ret, filename); 94 95 if ((tok = PyTokenizer_FromFile(fp, ps1, ps2)) == NULL) { 96 err_ret->error = E_NOMEM; 97 return NULL; 98 } 99 tok->filename = filename; 100 if (Py_TabcheckFlag || Py_VerboseFlag) { 101 tok->altwarning = (filename != NULL); 102 if (Py_TabcheckFlag >= 2) 103 tok->alterror++; 104 } 105 106 return parsetok(tok, g, start, err_ret, flags); 107 107 } 108 108 … … 117 117 warn(const char *msg, const char *filename, int lineno) 118 118 { 119 if (filename == NULL)120 filename = "<string>";121 PySys_WriteStderr(msg, filename, lineno);119 if (filename == NULL) 120 filename = "<string>"; 121 PySys_WriteStderr(msg, filename, lineno); 122 122 } 123 123 #endif … … 128 128 static node * 129 129 parsetok(struct tok_state *tok, grammar *g, int start, perrdetail *err_ret, 130 int *flags)131 { 132 parser_state *ps;133 node *n;134 int started = 0, handling_import = 0, handling_with= 0;135 136 if ((ps = PyParser_New(g, start)) == NULL) {137 fprintf(stderr, "no mem for new parser\n");138 err_ret->error = E_NOMEM;139 PyTokenizer_Free(tok);140 return NULL;141 }130 int *flags) 131 { 132 parser_state *ps; 133 node *n; 134 int started = 0; 135 136 if ((ps = PyParser_New(g, start)) == NULL) { 137 fprintf(stderr, "no mem for new parser\n"); 138 err_ret->error = E_NOMEM; 139 PyTokenizer_Free(tok); 140 return NULL; 141 } 142 142 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 143 if (*flags & PyPARSE_PRINT_IS_FUNCTION) { 144 ps->p_flags |= CO_FUTURE_PRINT_FUNCTION; 145 } 146 if (*flags & PyPARSE_UNICODE_LITERALS) { 147 ps->p_flags |= CO_FUTURE_UNICODE_LITERALS; 148 } 149 150 #endif 151 152 for (;;) { 153 char *a, *b; 154 int type; 155 size_t len; 156 char *str; 157 int col_offset; 158 159 type = PyTokenizer_Get(tok, &a, &b); 160 if (type == ERRORTOKEN) { 161 err_ret->error = tok->done; 162 break; 163 } 164 if (type == ENDMARKER && started) { 165 type = NEWLINE; /* Add an extra newline */ 166 handling_with = handling_import = 0; 167 started = 0; 168 /* Add the right number of dedent tokens, 169 except if a certain flag is given -- 170 codeop.py uses this. */ 171 if (tok->indent && 172 !(*flags & PyPARSE_DONT_IMPLY_DEDENT)) 173 { 174 tok->pendin = -tok->indent; 175 tok->indent = 0; 176 } 177 } 178 else 179 started = 1; 180 len = b - a; /* XXX this may compute NULL - NULL */ 181 str = (char *) PyObject_MALLOC(len + 1); 182 if (str == NULL) { 183 fprintf(stderr, "no mem for next token\n"); 184 err_ret->error = E_NOMEM; 185 break; 186 } 187 if (len > 0) 188 strncpy(str, a, len); 189 str[len] = '\0'; 143 if (*flags & PyPARSE_PRINT_IS_FUNCTION) { 144 ps->p_flags |= CO_FUTURE_PRINT_FUNCTION; 145 } 146 if (*flags & PyPARSE_UNICODE_LITERALS) { 147 ps->p_flags |= CO_FUTURE_UNICODE_LITERALS; 148 } 149 150 #endif 151 152 for (;;) { 153 char *a, *b; 154 int type; 155 size_t len; 156 char *str; 157 int col_offset; 158 159 type = PyTokenizer_Get(tok, &a, &b); 160 if (type == ERRORTOKEN) { 161 err_ret->error = tok->done; 162 break; 163 } 164 if (type == ENDMARKER && started) { 165 type = NEWLINE; /* Add an extra newline */ 166 started = 0; 167 /* Add the right number of dedent tokens, 168 except if a certain flag is given -- 169 codeop.py uses this. */ 170 if (tok->indent && 171 !(*flags & PyPARSE_DONT_IMPLY_DEDENT)) 172 { 173 tok->pendin = -tok->indent; 174 tok->indent = 0; 175 } 176 } 177 else 178 started = 1; 179 len = b - a; /* XXX this may compute NULL - NULL */ 180 str = (char *) PyObject_MALLOC(len + 1); 181 if (str == NULL) { 182 fprintf(stderr, "no mem for next token\n"); 183 err_ret->error = E_NOMEM; 184 break; 185 } 186 if (len > 0) 187 strncpy(str, a, len); 188 str[len] = '\0'; 190 189 191 190 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 192 191 #endif 193 if (a >= tok->line_start)194 col_offset = a - tok->line_start;195 else196 col_offset = -1;197 198 if ((err_ret->error =199 PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset,200 &(err_ret->expected))) != E_OK) {201 if (err_ret->error != E_DONE) {202 PyObject_FREE(str);203 err_ret->token = type;204 } 205 break;206 }207 }208 209 if (err_ret->error == E_DONE) {210 n = ps->p_tree;211 ps->p_tree = NULL;212 }213 else214 n = NULL;192 if (a >= tok->line_start) 193 col_offset = a - tok->line_start; 194 else 195 col_offset = -1; 196 197 if ((err_ret->error = 198 PyParser_AddToken(ps, (int)type, str, tok->lineno, col_offset, 199 &(err_ret->expected))) != E_OK) { 200 if (err_ret->error != E_DONE) { 201 PyObject_FREE(str); 202 err_ret->token = type; 203 } 204 break; 205 } 206 } 207 208 if (err_ret->error == E_DONE) { 209 n = ps->p_tree; 210 ps->p_tree = NULL; 211 } 212 else 213 n = NULL; 215 214 216 215 #ifdef PY_PARSER_REQUIRES_FUTURE_KEYWORD 217 *flags = ps->p_flags;218 #endif 219 PyParser_Delete(ps);220 221 if (n == NULL) {222 if (tok->lineno <= 1 && tok->done == E_EOF)223 err_ret->error = E_EOF;224 err_ret->lineno = tok->lineno;225 if (tok->buf != NULL) {226 char *text = NULL;227 size_t len;228 assert(tok->cur - tok->buf < INT_MAX);229 err_ret->offset = (int)(tok->cur - tok->buf);230 len = tok->inp - tok->buf;216 *flags = ps->p_flags; 217 #endif 218 PyParser_Delete(ps); 219 220 if (n == NULL) { 221 if (tok->lineno <= 1 && tok->done == E_EOF) 222 err_ret->error = E_EOF; 223 err_ret->lineno = tok->lineno; 224 if (tok->buf != NULL) { 225 char *text = NULL; 226 size_t len; 227 assert(tok->cur - tok->buf < INT_MAX); 228 err_ret->offset = (int)(tok->cur - tok->buf); 229 len = tok->inp - tok->buf; 231 230 #ifdef Py_USING_UNICODE 232 text = PyTokenizer_RestoreEncoding(tok, len, &err_ret->offset); 233 234 #endif 235 if (text == NULL) { 236 text = (char *) PyObject_MALLOC(len + 1); 237 if (text != NULL) { 238 if (len > 0) 239 strncpy(text, tok->buf, len); 240 text[len] = '\0'; 241 } 242 } 243 err_ret->text = text; 244 } 245 } else if (tok->encoding != NULL) { 246 node* r = PyNode_New(encoding_decl); 247 if (!r) { 248 err_ret->error = E_NOMEM; 249 n = NULL; 250 goto done; 251 } 252 r->n_str = tok->encoding; 253 r->n_nchildren = 1; 254 r->n_child = n; 255 tok->encoding = NULL; 256 n = r; 257 } 231 text = PyTokenizer_RestoreEncoding(tok, len, &err_ret->offset); 232 233 #endif 234 if (text == NULL) { 235 text = (char *) PyObject_MALLOC(len + 1); 236 if (text != NULL) { 237 if (len > 0) 238 strncpy(text, tok->buf, len); 239 text[len] = '\0'; 240 } 241 } 242 err_ret->text = text; 243 } 244 } else if (tok->encoding != NULL) { 245 /* 'nodes->n_str' uses PyObject_*, while 'tok->encoding' was 246 * allocated using PyMem_ 247 */ 248 node* r = PyNode_New(encoding_decl); 249 if (r) 250 r->n_str = PyObject_MALLOC(strlen(tok->encoding)+1); 251 if (!r || !r->n_str) { 252 err_ret->error = E_NOMEM; 253 if (r) 254 PyObject_FREE(r); 255 n = NULL; 256 goto done; 257 } 258 strcpy(r->n_str, tok->encoding); 259 PyMem_FREE(tok->encoding); 260 tok->encoding = NULL; 261 r->n_nchildren = 1; 262 r->n_child = n; 263 n = r; 264 } 258 265 259 266 done: 260 PyTokenizer_Free(tok);261 262 return n;267 PyTokenizer_Free(tok); 268 269 return n; 263 270 } 264 271 … … 266 273 initerr(perrdetail *err_ret, const char *filename) 267 274 { 268 err_ret->error = E_OK;269 err_ret->filename = filename;270 err_ret->lineno = 0;271 err_ret->offset = 0;272 err_ret->text = NULL;273 err_ret->token = -1;274 err_ret->expected = -1;275 } 275 err_ret->error = E_OK; 276 err_ret->filename = filename; 277 err_ret->lineno = 0; 278 err_ret->offset = 0; 279 err_ret->text = NULL; 280 err_ret->token = -1; 281 err_ret->expected = -1; 282 } -
python/trunk/Parser/pgen.c
r2 r391 18 18 19 19 typedef struct _nfaarc { 20 intar_label;21 intar_arrow;20 int ar_label; 21 int ar_arrow; 22 22 } nfaarc; 23 23 24 24 typedef struct _nfastate { 25 intst_narcs;26 nfaarc*st_arc;25 int st_narcs; 26 nfaarc *st_arc; 27 27 } nfastate; 28 28 29 29 typedef struct _nfa { 30 intnf_type;31 char*nf_name;32 intnf_nstates;33 nfastate*nf_state;34 intnf_start, nf_finish;30 int nf_type; 31 char *nf_name; 32 int nf_nstates; 33 nfastate *nf_state; 34 int nf_start, nf_finish; 35 35 } nfa; 36 36 37 37 /* Forward */ 38 38 static void compile_rhs(labellist *ll, 39 nfa *nf, node *n, int *pa, int *pb);39 nfa *nf, node *n, int *pa, int *pb); 40 40 static void compile_alt(labellist *ll, 41 nfa *nf, node *n, int *pa, int *pb);41 nfa *nf, node *n, int *pa, int *pb); 42 42 static void compile_item(labellist *ll, 43 nfa *nf, node *n, int *pa, int *pb);43 nfa *nf, node *n, int *pa, int *pb); 44 44 static void compile_atom(labellist *ll, 45 nfa *nf, node *n, int *pa, int *pb);45 nfa *nf, node *n, int *pa, int *pb); 46 46 47 47 static int 48 48 addnfastate(nfa *nf) 49 49 { 50 nfastate *st;51 52 nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state, 53 sizeof(nfastate) * (nf->nf_nstates + 1));54 if (nf->nf_state == NULL)55 Py_FatalError("out of mem");56 st = &nf->nf_state[nf->nf_nstates++];57 st->st_narcs = 0;58 st->st_arc = NULL;59 return st - nf->nf_state;50 nfastate *st; 51 52 nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state, 53 sizeof(nfastate) * (nf->nf_nstates + 1)); 54 if (nf->nf_state == NULL) 55 Py_FatalError("out of mem"); 56 st = &nf->nf_state[nf->nf_nstates++]; 57 st->st_narcs = 0; 58 st->st_arc = NULL; 59 return st - nf->nf_state; 60 60 } 61 61 … … 63 63 addnfaarc(nfa *nf, int from, int to, int lbl) 64 64 { 65 nfastate *st;66 nfaarc *ar;67 68 st = &nf->nf_state[from];69 st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc,70 sizeof(nfaarc) * (st->st_narcs + 1));71 if (st->st_arc == NULL)72 Py_FatalError("out of mem");73 ar = &st->st_arc[st->st_narcs++];74 ar->ar_label = lbl;75 ar->ar_arrow = to;65 nfastate *st; 66 nfaarc *ar; 67 68 st = &nf->nf_state[from]; 69 st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc, 70 sizeof(nfaarc) * (st->st_narcs + 1)); 71 if (st->st_arc == NULL) 72 Py_FatalError("out of mem"); 73 ar = &st->st_arc[st->st_narcs++]; 74 ar->ar_label = lbl; 75 ar->ar_arrow = to; 76 76 } 77 77 … … 79 79 newnfa(char *name) 80 80 { 81 nfa *nf;82 static int type = NT_OFFSET; /* All types will be disjunct */83 84 nf = (nfa *)PyObject_MALLOC(sizeof(nfa));85 if (nf == NULL)86 Py_FatalError("no mem for new nfa");87 nf->nf_type = type++;88 nf->nf_name = name; /* XXX strdup(name) ??? */89 nf->nf_nstates = 0;90 nf->nf_state = NULL;91 nf->nf_start = nf->nf_finish = -1;92 return nf;81 nfa *nf; 82 static int type = NT_OFFSET; /* All types will be disjunct */ 83 84 nf = (nfa *)PyObject_MALLOC(sizeof(nfa)); 85 if (nf == NULL) 86 Py_FatalError("no mem for new nfa"); 87 nf->nf_type = type++; 88 nf->nf_name = name; /* XXX strdup(name) ??? */ 89 nf->nf_nstates = 0; 90 nf->nf_state = NULL; 91 nf->nf_start = nf->nf_finish = -1; 92 return nf; 93 93 } 94 94 95 95 typedef struct _nfagrammar { 96 intgr_nnfas;97 nfa**gr_nfa;98 labellistgr_ll;96 int gr_nnfas; 97 nfa **gr_nfa; 98 labellist gr_ll; 99 99 } nfagrammar; 100 100 … … 105 105 newnfagrammar(void) 106 106 { 107 nfagrammar *gr;108 109 gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar));110 if (gr == NULL)111 Py_FatalError("no mem for new nfa grammar");112 gr->gr_nnfas = 0;113 gr->gr_nfa = NULL;114 gr->gr_ll.ll_nlabels = 0;115 gr->gr_ll.ll_label = NULL;116 addlabel(&gr->gr_ll, ENDMARKER, "EMPTY");117 return gr;107 nfagrammar *gr; 108 109 gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar)); 110 if (gr == NULL) 111 Py_FatalError("no mem for new nfa grammar"); 112 gr->gr_nnfas = 0; 113 gr->gr_nfa = NULL; 114 gr->gr_ll.ll_nlabels = 0; 115 gr->gr_ll.ll_label = NULL; 116 addlabel(&gr->gr_ll, ENDMARKER, "EMPTY"); 117 return gr; 118 118 } 119 119 … … 121 121 addnfa(nfagrammar *gr, char *name) 122 122 { 123 nfa *nf;124 125 nf = newnfa(name);126 gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa,127 sizeof(nfa*) * (gr->gr_nnfas + 1));128 if (gr->gr_nfa == NULL)129 Py_FatalError("out of mem");130 gr->gr_nfa[gr->gr_nnfas++] = nf;131 addlabel(&gr->gr_ll, NAME, nf->nf_name);132 return nf;123 nfa *nf; 124 125 nf = newnfa(name); 126 gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa, 127 sizeof(nfa*) * (gr->gr_nnfas + 1)); 128 if (gr->gr_nfa == NULL) 129 Py_FatalError("out of mem"); 130 gr->gr_nfa[gr->gr_nnfas++] = nf; 131 addlabel(&gr->gr_ll, NAME, nf->nf_name); 132 return nf; 133 133 } 134 134 … … 138 138 139 139 #define REQN(i, count) \ 140 if (i < count) { \141 fprintf(stderr, REQNFMT, count); \142 Py_FatalError("REQN"); \143 } else140 if (i < count) { \ 141 fprintf(stderr, REQNFMT, count); \ 142 Py_FatalError("REQN"); \ 143 } else 144 144 145 145 #else 146 #define REQN(i, count) /* empty */146 #define REQN(i, count) /* empty */ 147 147 #endif 148 148 … … 150 150 metacompile(node *n) 151 151 { 152 nfagrammar *gr;153 int i;154 155 if (Py_DebugFlag)156 printf("Compiling (meta-) parse tree into NFA grammar\n");157 gr = newnfagrammar();158 REQ(n, MSTART);159 i = n->n_nchildren - 1; /* Last child is ENDMARKER */160 n = n->n_child;161 for (; --i >= 0; n++) {162 if (n->n_type != NEWLINE)163 compile_rule(gr, n);164 }165 return gr;152 nfagrammar *gr; 153 int i; 154 155 if (Py_DebugFlag) 156 printf("Compiling (meta-) parse tree into NFA grammar\n"); 157 gr = newnfagrammar(); 158 REQ(n, MSTART); 159 i = n->n_nchildren - 1; /* Last child is ENDMARKER */ 160 n = n->n_child; 161 for (; --i >= 0; n++) { 162 if (n->n_type != NEWLINE) 163 compile_rule(gr, n); 164 } 165 return gr; 166 166 } 167 167 … … 169 169 compile_rule(nfagrammar *gr, node *n) 170 170 { 171 nfa *nf;172 173 REQ(n, RULE);174 REQN(n->n_nchildren, 4);175 n = n->n_child;176 REQ(n, NAME);177 nf = addnfa(gr, n->n_str);178 n++;179 REQ(n, COLON);180 n++;181 REQ(n, RHS);182 compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish);183 n++;184 REQ(n, NEWLINE);171 nfa *nf; 172 173 REQ(n, RULE); 174 REQN(n->n_nchildren, 4); 175 n = n->n_child; 176 REQ(n, NAME); 177 nf = addnfa(gr, n->n_str); 178 n++; 179 REQ(n, COLON); 180 n++; 181 REQ(n, RHS); 182 compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish); 183 n++; 184 REQ(n, NEWLINE); 185 185 } 186 186 … … 188 188 compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb) 189 189 { 190 int i;191 int a, b;192 193 REQ(n, RHS);194 i = n->n_nchildren;195 REQN(i, 1);196 n = n->n_child;197 REQ(n, ALT);198 compile_alt(ll, nf, n, pa, pb);199 if (--i <= 0)200 return;201 n++;202 a = *pa;203 b = *pb;204 *pa = addnfastate(nf);205 *pb = addnfastate(nf);206 addnfaarc(nf, *pa, a, EMPTY);207 addnfaarc(nf, b, *pb, EMPTY);208 for (; --i >= 0; n++) {209 REQ(n, VBAR);210 REQN(i, 1);211 --i;212 n++;213 REQ(n, ALT);214 compile_alt(ll, nf, n, &a, &b);215 addnfaarc(nf, *pa, a, EMPTY);216 addnfaarc(nf, b, *pb, EMPTY);217 }190 int i; 191 int a, b; 192 193 REQ(n, RHS); 194 i = n->n_nchildren; 195 REQN(i, 1); 196 n = n->n_child; 197 REQ(n, ALT); 198 compile_alt(ll, nf, n, pa, pb); 199 if (--i <= 0) 200 return; 201 n++; 202 a = *pa; 203 b = *pb; 204 *pa = addnfastate(nf); 205 *pb = addnfastate(nf); 206 addnfaarc(nf, *pa, a, EMPTY); 207 addnfaarc(nf, b, *pb, EMPTY); 208 for (; --i >= 0; n++) { 209 REQ(n, VBAR); 210 REQN(i, 1); 211 --i; 212 n++; 213 REQ(n, ALT); 214 compile_alt(ll, nf, n, &a, &b); 215 addnfaarc(nf, *pa, a, EMPTY); 216 addnfaarc(nf, b, *pb, EMPTY); 217 } 218 218 } 219 219 … … 221 221 compile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb) 222 222 { 223 int i;224 int a, b;225 226 REQ(n, ALT);227 i = n->n_nchildren;228 REQN(i, 1);229 n = n->n_child;230 REQ(n, ITEM);231 compile_item(ll, nf, n, pa, pb);232 --i;233 n++;234 for (; --i >= 0; n++) {235 REQ(n, ITEM);236 compile_item(ll, nf, n, &a, &b);237 addnfaarc(nf, *pb, a, EMPTY);238 *pb = b;239 }223 int i; 224 int a, b; 225 226 REQ(n, ALT); 227 i = n->n_nchildren; 228 REQN(i, 1); 229 n = n->n_child; 230 REQ(n, ITEM); 231 compile_item(ll, nf, n, pa, pb); 232 --i; 233 n++; 234 for (; --i >= 0; n++) { 235 REQ(n, ITEM); 236 compile_item(ll, nf, n, &a, &b); 237 addnfaarc(nf, *pb, a, EMPTY); 238 *pb = b; 239 } 240 240 } 241 241 … … 243 243 compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb) 244 244 { 245 int i;246 int a, b;247 248 REQ(n, ITEM);249 i = n->n_nchildren;250 REQN(i, 1);251 n = n->n_child;252 if (n->n_type == LSQB) {253 REQN(i, 3);254 n++;255 REQ(n, RHS);256 *pa = addnfastate(nf);257 *pb = addnfastate(nf);258 addnfaarc(nf, *pa, *pb, EMPTY);259 compile_rhs(ll, nf, n, &a, &b);260 addnfaarc(nf, *pa, a, EMPTY);261 addnfaarc(nf, b, *pb, EMPTY);262 REQN(i, 1);263 n++;264 REQ(n, RSQB);265 }266 else {267 compile_atom(ll, nf, n, pa, pb);268 if (--i <= 0)269 return;270 n++;271 addnfaarc(nf, *pb, *pa, EMPTY);272 if (n->n_type == STAR)273 *pb = *pa;274 else275 REQ(n, PLUS);276 }245 int i; 246 int a, b; 247 248 REQ(n, ITEM); 249 i = n->n_nchildren; 250 REQN(i, 1); 251 n = n->n_child; 252 if (n->n_type == LSQB) { 253 REQN(i, 3); 254 n++; 255 REQ(n, RHS); 256 *pa = addnfastate(nf); 257 *pb = addnfastate(nf); 258 addnfaarc(nf, *pa, *pb, EMPTY); 259 compile_rhs(ll, nf, n, &a, &b); 260 addnfaarc(nf, *pa, a, EMPTY); 261 addnfaarc(nf, b, *pb, EMPTY); 262 REQN(i, 1); 263 n++; 264 REQ(n, RSQB); 265 } 266 else { 267 compile_atom(ll, nf, n, pa, pb); 268 if (--i <= 0) 269 return; 270 n++; 271 addnfaarc(nf, *pb, *pa, EMPTY); 272 if (n->n_type == STAR) 273 *pb = *pa; 274 else 275 REQ(n, PLUS); 276 } 277 277 } 278 278 … … 280 280 compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb) 281 281 { 282 int i;283 284 REQ(n, ATOM);285 i = n->n_nchildren;286 REQN(i, 1);287 n = n->n_child;288 if (n->n_type == LPAR) {289 REQN(i, 3);290 n++;291 REQ(n, RHS);292 compile_rhs(ll, nf, n, pa, pb);293 n++;294 REQ(n, RPAR);295 }296 else if (n->n_type == NAME || n->n_type == STRING) {297 *pa = addnfastate(nf);298 *pb = addnfastate(nf);299 addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str));300 }301 else302 REQ(n, NAME);282 int i; 283 284 REQ(n, ATOM); 285 i = n->n_nchildren; 286 REQN(i, 1); 287 n = n->n_child; 288 if (n->n_type == LPAR) { 289 REQN(i, 3); 290 n++; 291 REQ(n, RHS); 292 compile_rhs(ll, nf, n, pa, pb); 293 n++; 294 REQ(n, RPAR); 295 } 296 else if (n->n_type == NAME || n->n_type == STRING) { 297 *pa = addnfastate(nf); 298 *pb = addnfastate(nf); 299 addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str)); 300 } 301 else 302 REQ(n, NAME); 303 303 } 304 304 … … 306 306 dumpstate(labellist *ll, nfa *nf, int istate) 307 307 { 308 nfastate *st;309 int i;310 nfaarc *ar;311 312 printf("%c%2d%c",313 istate == nf->nf_start ? '*' : ' ',314 istate,315 istate == nf->nf_finish ? '.' : ' ');316 st = &nf->nf_state[istate];317 ar = st->st_arc;318 for (i = 0; i < st->st_narcs; i++) {319 if (i > 0)320 printf("\n ");321 printf("-> %2d %s", ar->ar_arrow,322 PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label]));323 ar++;324 }325 printf("\n");308 nfastate *st; 309 int i; 310 nfaarc *ar; 311 312 printf("%c%2d%c", 313 istate == nf->nf_start ? '*' : ' ', 314 istate, 315 istate == nf->nf_finish ? '.' : ' '); 316 st = &nf->nf_state[istate]; 317 ar = st->st_arc; 318 for (i = 0; i < st->st_narcs; i++) { 319 if (i > 0) 320 printf("\n "); 321 printf("-> %2d %s", ar->ar_arrow, 322 PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label])); 323 ar++; 324 } 325 printf("\n"); 326 326 } 327 327 … … 329 329 dumpnfa(labellist *ll, nfa *nf) 330 330 { 331 int i;332 333 printf("NFA '%s' has %d states; start %d, finish %d\n",334 nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish);335 for (i = 0; i < nf->nf_nstates; i++)336 dumpstate(ll, nf, i);331 int i; 332 333 printf("NFA '%s' has %d states; start %d, finish %d\n", 334 nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish); 335 for (i = 0; i < nf->nf_nstates; i++) 336 dumpstate(ll, nf, i); 337 337 } 338 338 … … 343 343 addclosure(bitset ss, nfa *nf, int istate) 344 344 { 345 if (addbit(ss, istate)) {346 nfastate *st = &nf->nf_state[istate];347 nfaarc *ar = st->st_arc;348 int i;349 350 for (i = st->st_narcs; --i >= 0; ) {351 if (ar->ar_label == EMPTY)352 addclosure(ss, nf, ar->ar_arrow);353 ar++;354 }355 }345 if (addbit(ss, istate)) { 346 nfastate *st = &nf->nf_state[istate]; 347 nfaarc *ar = st->st_arc; 348 int i; 349 350 for (i = st->st_narcs; --i >= 0; ) { 351 if (ar->ar_label == EMPTY) 352 addclosure(ss, nf, ar->ar_arrow); 353 ar++; 354 } 355 } 356 356 } 357 357 358 358 typedef struct _ss_arc { 359 bitsetsa_bitset;360 intsa_arrow;361 intsa_label;359 bitset sa_bitset; 360 int sa_arrow; 361 int sa_label; 362 362 } ss_arc; 363 363 364 364 typedef struct _ss_state { 365 bitsetss_ss;366 intss_narcs;367 struct _ss_arc*ss_arc;368 intss_deleted;369 intss_finish;370 intss_rename;365 bitset ss_ss; 366 int ss_narcs; 367 struct _ss_arc *ss_arc; 368 int ss_deleted; 369 int ss_finish; 370 int ss_rename; 371 371 } ss_state; 372 372 373 373 typedef struct _ss_dfa { 374 intsd_nstates;375 ss_state *sd_state;374 int sd_nstates; 375 ss_state *sd_state; 376 376 } ss_dfa; 377 377 378 378 /* Forward */ 379 379 static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits, 380 labellist *ll, char *msg);380 labellist *ll, char *msg); 381 381 static void simplify(int xx_nstates, ss_state *xx_state); 382 382 static void convert(dfa *d, int xx_nstates, ss_state *xx_state); … … 385 385 makedfa(nfagrammar *gr, nfa *nf, dfa *d) 386 386 { 387 int nbits = nf->nf_nstates;388 bitset ss;389 int xx_nstates;390 ss_state *xx_state, *yy;391 ss_arc *zz;392 int istate, jstate, iarc, jarc, ibit;393 nfastate *st;394 nfaarc *ar;395 396 ss = newbitset(nbits);397 addclosure(ss, nf, nf->nf_start);398 xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state));399 if (xx_state == NULL)400 Py_FatalError("no mem for xx_state in makedfa");401 xx_nstates = 1;402 yy = &xx_state[0];403 yy->ss_ss = ss;404 yy->ss_narcs = 0;405 yy->ss_arc = NULL;406 yy->ss_deleted = 0;407 yy->ss_finish = testbit(ss, nf->nf_finish);408 if (yy->ss_finish)409 printf("Error: nonterminal '%s' may produce empty.\n",410 nf->nf_name);411 412 /* This algorithm is from a book written before413 the invention of structured programming... */414 415 /* For each unmarked state... */416 for (istate = 0; istate < xx_nstates; ++istate) {417 size_t size;418 yy = &xx_state[istate];419 ss = yy->ss_ss;420 /* For all its states... */421 for (ibit = 0; ibit < nf->nf_nstates; ++ibit) {422 if (!testbit(ss, ibit))423 continue;424 st = &nf->nf_state[ibit];425 /* For all non-empty arcs from this state... */426 for (iarc = 0; iarc < st->st_narcs; iarc++) {427 ar = &st->st_arc[iarc];428 if (ar->ar_label == EMPTY)429 continue;430 /* Look up in list of arcs from this state */431 for (jarc = 0; jarc < yy->ss_narcs; ++jarc) {432 zz = &yy->ss_arc[jarc];433 if (ar->ar_label == zz->sa_label)434 goto found;435 }436 /* Add new arc for this state */437 size = sizeof(ss_arc) * (yy->ss_narcs + 1);438 yy->ss_arc = (ss_arc *)PyObject_REALLOC(439 yy->ss_arc, size);440 if (yy->ss_arc == NULL)441 Py_FatalError("out of mem");442 zz = &yy->ss_arc[yy->ss_narcs++];443 zz->sa_label = ar->ar_label;444 zz->sa_bitset = newbitset(nbits);445 zz->sa_arrow = -1;446 found:;447 /* Add destination */448 addclosure(zz->sa_bitset, nf, ar->ar_arrow);449 }450 }451 /* Now look up all the arrow states */452 for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) {453 zz = &xx_state[istate].ss_arc[jarc];454 for (jstate = 0; jstate < xx_nstates; jstate++) {455 if (samebitset(zz->sa_bitset,456 xx_state[jstate].ss_ss, nbits)) {457 zz->sa_arrow = jstate;458 goto done;459 }460 }461 size = sizeof(ss_state) * (xx_nstates + 1);462 xx_state = (ss_state *)PyObject_REALLOC(xx_state, 463 size);464 if (xx_state == NULL)465 Py_FatalError("out of mem");466 zz->sa_arrow = xx_nstates;467 yy = &xx_state[xx_nstates++];468 yy->ss_ss = zz->sa_bitset;469 yy->ss_narcs = 0;470 yy->ss_arc = NULL;471 yy->ss_deleted = 0;472 yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish);473 done:;474 }475 }476 477 if (Py_DebugFlag)478 printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,479 "before minimizing");480 481 simplify(xx_nstates, xx_state);482 483 if (Py_DebugFlag)484 printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll,485 "after minimizing");486 487 convert(d, xx_nstates, xx_state);488 489 /* XXX cleanup */490 PyObject_FREE(xx_state);387 int nbits = nf->nf_nstates; 388 bitset ss; 389 int xx_nstates; 390 ss_state *xx_state, *yy; 391 ss_arc *zz; 392 int istate, jstate, iarc, jarc, ibit; 393 nfastate *st; 394 nfaarc *ar; 395 396 ss = newbitset(nbits); 397 addclosure(ss, nf, nf->nf_start); 398 xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state)); 399 if (xx_state == NULL) 400 Py_FatalError("no mem for xx_state in makedfa"); 401 xx_nstates = 1; 402 yy = &xx_state[0]; 403 yy->ss_ss = ss; 404 yy->ss_narcs = 0; 405 yy->ss_arc = NULL; 406 yy->ss_deleted = 0; 407 yy->ss_finish = testbit(ss, nf->nf_finish); 408 if (yy->ss_finish) 409 printf("Error: nonterminal '%s' may produce empty.\n", 410 nf->nf_name); 411 412 /* This algorithm is from a book written before 413 the invention of structured programming... */ 414 415 /* For each unmarked state... */ 416 for (istate = 0; istate < xx_nstates; ++istate) { 417 size_t size; 418 yy = &xx_state[istate]; 419 ss = yy->ss_ss; 420 /* For all its states... */ 421 for (ibit = 0; ibit < nf->nf_nstates; ++ibit) { 422 if (!testbit(ss, ibit)) 423 continue; 424 st = &nf->nf_state[ibit]; 425 /* For all non-empty arcs from this state... */ 426 for (iarc = 0; iarc < st->st_narcs; iarc++) { 427 ar = &st->st_arc[iarc]; 428 if (ar->ar_label == EMPTY) 429 continue; 430 /* Look up in list of arcs from this state */ 431 for (jarc = 0; jarc < yy->ss_narcs; ++jarc) { 432 zz = &yy->ss_arc[jarc]; 433 if (ar->ar_label == zz->sa_label) 434 goto found; 435 } 436 /* Add new arc for this state */ 437 size = sizeof(ss_arc) * (yy->ss_narcs + 1); 438 yy->ss_arc = (ss_arc *)PyObject_REALLOC( 439 yy->ss_arc, size); 440 if (yy->ss_arc == NULL) 441 Py_FatalError("out of mem"); 442 zz = &yy->ss_arc[yy->ss_narcs++]; 443 zz->sa_label = ar->ar_label; 444 zz->sa_bitset = newbitset(nbits); 445 zz->sa_arrow = -1; 446 found: ; 447 /* Add destination */ 448 addclosure(zz->sa_bitset, nf, ar->ar_arrow); 449 } 450 } 451 /* Now look up all the arrow states */ 452 for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) { 453 zz = &xx_state[istate].ss_arc[jarc]; 454 for (jstate = 0; jstate < xx_nstates; jstate++) { 455 if (samebitset(zz->sa_bitset, 456 xx_state[jstate].ss_ss, nbits)) { 457 zz->sa_arrow = jstate; 458 goto done; 459 } 460 } 461 size = sizeof(ss_state) * (xx_nstates + 1); 462 xx_state = (ss_state *)PyObject_REALLOC(xx_state, 463 size); 464 if (xx_state == NULL) 465 Py_FatalError("out of mem"); 466 zz->sa_arrow = xx_nstates; 467 yy = &xx_state[xx_nstates++]; 468 yy->ss_ss = zz->sa_bitset; 469 yy->ss_narcs = 0; 470 yy->ss_arc = NULL; 471 yy->ss_deleted = 0; 472 yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish); 473 done: ; 474 } 475 } 476 477 if (Py_DebugFlag) 478 printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll, 479 "before minimizing"); 480 481 simplify(xx_nstates, xx_state); 482 483 if (Py_DebugFlag) 484 printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll, 485 "after minimizing"); 486 487 convert(d, xx_nstates, xx_state); 488 489 /* XXX cleanup */ 490 PyObject_FREE(xx_state); 491 491 } 492 492 493 493 static void 494 494 printssdfa(int xx_nstates, ss_state *xx_state, int nbits, 495 labellist *ll, char *msg)496 { 497 int i, ibit, iarc;498 ss_state *yy;499 ss_arc *zz;500 501 printf("Subset DFA %s\n", msg);502 for (i = 0; i < xx_nstates; i++) {503 yy = &xx_state[i];504 if (yy->ss_deleted)505 continue;506 printf(" Subset %d", i);507 if (yy->ss_finish)508 printf(" (finish)");509 printf(" { ");510 for (ibit = 0; ibit < nbits; ibit++) {511 if (testbit(yy->ss_ss, ibit))512 printf("%d ", ibit);513 }514 printf("}\n");515 for (iarc = 0; iarc < yy->ss_narcs; iarc++) {516 zz = &yy->ss_arc[iarc];517 printf(" Arc to state %d, label %s\n",518 zz->sa_arrow,519 PyGrammar_LabelRepr(520 &ll->ll_label[zz->sa_label]));521 }522 }495 labellist *ll, char *msg) 496 { 497 int i, ibit, iarc; 498 ss_state *yy; 499 ss_arc *zz; 500 501 printf("Subset DFA %s\n", msg); 502 for (i = 0; i < xx_nstates; i++) { 503 yy = &xx_state[i]; 504 if (yy->ss_deleted) 505 continue; 506 printf(" Subset %d", i); 507 if (yy->ss_finish) 508 printf(" (finish)"); 509 printf(" { "); 510 for (ibit = 0; ibit < nbits; ibit++) { 511 if (testbit(yy->ss_ss, ibit)) 512 printf("%d ", ibit); 513 } 514 printf("}\n"); 515 for (iarc = 0; iarc < yy->ss_narcs; iarc++) { 516 zz = &yy->ss_arc[iarc]; 517 printf(" Arc to state %d, label %s\n", 518 zz->sa_arrow, 519 PyGrammar_LabelRepr( 520 &ll->ll_label[zz->sa_label])); 521 } 522 } 523 523 } 524 524 … … 536 536 samestate(ss_state *s1, ss_state *s2) 537 537 { 538 int i;539 540 if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish)541 return 0;542 for (i = 0; i < s1->ss_narcs; i++) {543 if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow ||544 s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label)545 return 0;546 }547 return 1;538 int i; 539 540 if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish) 541 return 0; 542 for (i = 0; i < s1->ss_narcs; i++) { 543 if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow || 544 s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label) 545 return 0; 546 } 547 return 1; 548 548 } 549 549 … … 551 551 renamestates(int xx_nstates, ss_state *xx_state, int from, int to) 552 552 { 553 int i, j;554 555 if (Py_DebugFlag)556 printf("Rename state %d to %d.\n", from, to);557 for (i = 0; i < xx_nstates; i++) {558 if (xx_state[i].ss_deleted)559 continue;560 for (j = 0; j < xx_state[i].ss_narcs; j++) {561 if (xx_state[i].ss_arc[j].sa_arrow == from)562 xx_state[i].ss_arc[j].sa_arrow = to;563 }564 }553 int i, j; 554 555 if (Py_DebugFlag) 556 printf("Rename state %d to %d.\n", from, to); 557 for (i = 0; i < xx_nstates; i++) { 558 if (xx_state[i].ss_deleted) 559 continue; 560 for (j = 0; j < xx_state[i].ss_narcs; j++) { 561 if (xx_state[i].ss_arc[j].sa_arrow == from) 562 xx_state[i].ss_arc[j].sa_arrow = to; 563 } 564 } 565 565 } 566 566 … … 568 568 simplify(int xx_nstates, ss_state *xx_state) 569 569 { 570 int changes;571 int i, j;572 573 do {574 changes = 0;575 for (i = 1; i < xx_nstates; i++) {576 if (xx_state[i].ss_deleted)577 continue;578 for (j = 0; j < i; j++) {579 if (xx_state[j].ss_deleted)580 continue;581 if (samestate(&xx_state[i], &xx_state[j])) {582 xx_state[i].ss_deleted++;583 renamestates(xx_nstates, xx_state,584 i, j);585 changes++;586 break;587 }588 }589 }590 } while (changes);570 int changes; 571 int i, j; 572 573 do { 574 changes = 0; 575 for (i = 1; i < xx_nstates; i++) { 576 if (xx_state[i].ss_deleted) 577 continue; 578 for (j = 0; j < i; j++) { 579 if (xx_state[j].ss_deleted) 580 continue; 581 if (samestate(&xx_state[i], &xx_state[j])) { 582 xx_state[i].ss_deleted++; 583 renamestates(xx_nstates, xx_state, 584 i, j); 585 changes++; 586 break; 587 } 588 } 589 } 590 } while (changes); 591 591 } 592 592 … … 599 599 convert(dfa *d, int xx_nstates, ss_state *xx_state) 600 600 { 601 int i, j;602 ss_state *yy;603 ss_arc *zz;604 605 for (i = 0; i < xx_nstates; i++) {606 yy = &xx_state[i];607 if (yy->ss_deleted)608 continue;609 yy->ss_rename = addstate(d);610 }611 612 for (i = 0; i < xx_nstates; i++) {613 yy = &xx_state[i];614 if (yy->ss_deleted)615 continue;616 for (j = 0; j < yy->ss_narcs; j++) {617 zz = &yy->ss_arc[j];618 addarc(d, yy->ss_rename,619 xx_state[zz->sa_arrow].ss_rename,620 zz->sa_label);621 }622 if (yy->ss_finish)623 addarc(d, yy->ss_rename, yy->ss_rename, 0);624 }625 626 d->d_initial = 0;601 int i, j; 602 ss_state *yy; 603 ss_arc *zz; 604 605 for (i = 0; i < xx_nstates; i++) { 606 yy = &xx_state[i]; 607 if (yy->ss_deleted) 608 continue; 609 yy->ss_rename = addstate(d); 610 } 611 612 for (i = 0; i < xx_nstates; i++) { 613 yy = &xx_state[i]; 614 if (yy->ss_deleted) 615 continue; 616 for (j = 0; j < yy->ss_narcs; j++) { 617 zz = &yy->ss_arc[j]; 618 addarc(d, yy->ss_rename, 619 xx_state[zz->sa_arrow].ss_rename, 620 zz->sa_label); 621 } 622 if (yy->ss_finish) 623 addarc(d, yy->ss_rename, yy->ss_rename, 0); 624 } 625 626 d->d_initial = 0; 627 627 } 628 628 … … 633 633 maketables(nfagrammar *gr) 634 634 { 635 int i;636 nfa *nf;637 dfa *d;638 grammar *g;639 640 if (gr->gr_nnfas == 0)641 return NULL;642 g = newgrammar(gr->gr_nfa[0]->nf_type);643 /* XXX first rule must be start rule */644 g->g_ll = gr->gr_ll;645 646 for (i = 0; i < gr->gr_nnfas; i++) {647 nf = gr->gr_nfa[i];648 if (Py_DebugFlag) {649 printf("Dump of NFA for '%s' ...\n", nf->nf_name);650 dumpnfa(&gr->gr_ll, nf);651 printf("Making DFA for '%s' ...\n", nf->nf_name);652 }653 d = adddfa(g, nf->nf_type, nf->nf_name);654 makedfa(gr, gr->gr_nfa[i], d);655 }656 657 return g;635 int i; 636 nfa *nf; 637 dfa *d; 638 grammar *g; 639 640 if (gr->gr_nnfas == 0) 641 return NULL; 642 g = newgrammar(gr->gr_nfa[0]->nf_type); 643 /* XXX first rule must be start rule */ 644 g->g_ll = gr->gr_ll; 645 646 for (i = 0; i < gr->gr_nnfas; i++) { 647 nf = gr->gr_nfa[i]; 648 if (Py_DebugFlag) { 649 printf("Dump of NFA for '%s' ...\n", nf->nf_name); 650 dumpnfa(&gr->gr_ll, nf); 651 printf("Making DFA for '%s' ...\n", nf->nf_name); 652 } 653 d = adddfa(g, nf->nf_type, nf->nf_name); 654 makedfa(gr, gr->gr_nfa[i], d); 655 } 656 657 return g; 658 658 } 659 659 … … 661 661 pgen(node *n) 662 662 { 663 nfagrammar *gr;664 grammar *g;665 666 gr = metacompile(n);667 g = maketables(gr);668 translatelabels(g);669 addfirstsets(g);670 PyObject_FREE(gr);671 return g;663 nfagrammar *gr; 664 grammar *g; 665 666 gr = metacompile(n); 667 g = maketables(gr); 668 translatelabels(g); 669 addfirstsets(g); 670 PyObject_FREE(gr); 671 return g; 672 672 } 673 673 … … 703 703 704 704 [Aho&Ullman 77] 705 Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977706 (first edition)705 Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977 706 (first edition) 707 707 708 708 */ -
python/trunk/Parser/pgenmain.c
r2 r391 31 31 Py_Exit(int sts) 32 32 { 33 exit(sts);33 exit(sts); 34 34 } 35 35 … … 37 37 main(int argc, char **argv) 38 38 { 39 grammar *g;40 FILE *fp;41 char *filename, *graminit_h, *graminit_c;42 43 if (argc != 4) {44 fprintf(stderr,45 "usage: %s grammar graminit.h graminit.c\n", argv[0]);46 Py_Exit(2);47 }48 filename = argv[1];49 graminit_h = argv[2];50 graminit_c = argv[3];51 g = getgrammar(filename);52 fp = fopen(graminit_c, "w");53 if (fp == NULL) {54 perror(graminit_c);55 Py_Exit(1);56 }57 if (Py_DebugFlag)58 printf("Writing %s ...\n", graminit_c);59 printgrammar(g, fp);60 fclose(fp);61 fp = fopen(graminit_h, "w");62 if (fp == NULL) {63 perror(graminit_h);64 Py_Exit(1);65 }66 if (Py_DebugFlag)67 printf("Writing %s ...\n", graminit_h);68 printnonterminals(g, fp);69 fclose(fp);70 Py_Exit(0);71 return 0; /* Make gcc -Wall happy */39 grammar *g; 40 FILE *fp; 41 char *filename, *graminit_h, *graminit_c; 42 43 if (argc != 4) { 44 fprintf(stderr, 45 "usage: %s grammar graminit.h graminit.c\n", argv[0]); 46 Py_Exit(2); 47 } 48 filename = argv[1]; 49 graminit_h = argv[2]; 50 graminit_c = argv[3]; 51 g = getgrammar(filename); 52 fp = fopen(graminit_c, "w"); 53 if (fp == NULL) { 54 perror(graminit_c); 55 Py_Exit(1); 56 } 57 if (Py_DebugFlag) 58 printf("Writing %s ...\n", graminit_c); 59 printgrammar(g, fp); 60 fclose(fp); 61 fp = fopen(graminit_h, "w"); 62 if (fp == NULL) { 63 perror(graminit_h); 64 Py_Exit(1); 65 } 66 if (Py_DebugFlag) 67 printf("Writing %s ...\n", graminit_h); 68 printnonterminals(g, fp); 69 fclose(fp); 70 Py_Exit(0); 71 return 0; /* Make gcc -Wall happy */ 72 72 } 73 73 … … 75 75 getgrammar(char *filename) 76 76 { 77 FILE *fp;78 node *n;79 grammar *g0, *g;80 perrdetail err;81 82 fp = fopen(filename, "r");83 if (fp == NULL) {84 perror(filename);85 Py_Exit(1);86 }87 g0 = meta_grammar();88 n = PyParser_ParseFile(fp, filename, g0, g0->g_start,89 (char *)NULL, (char *)NULL, &err);90 fclose(fp);91 if (n == NULL) {92 fprintf(stderr, "Parsing error %d, line %d.\n",93 err.error, err.lineno);94 if (err.text != NULL) {95 size_t i;96 fprintf(stderr, "%s", err.text);97 i = strlen(err.text);98 if (i == 0 || err.text[i-1] != '\n')99 fprintf(stderr, "\n");100 for (i = 0; i < err.offset; i++) {101 if (err.text[i] == '\t')102 putc('\t', stderr);103 else104 putc(' ', stderr);105 }106 fprintf(stderr, "^\n");107 PyObject_FREE(err.text);108 }109 Py_Exit(1);110 }111 g = pgen(n);112 if (g == NULL) {113 printf("Bad grammar.\n");114 Py_Exit(1);115 }116 return g;77 FILE *fp; 78 node *n; 79 grammar *g0, *g; 80 perrdetail err; 81 82 fp = fopen(filename, "r"); 83 if (fp == NULL) { 84 perror(filename); 85 Py_Exit(1); 86 } 87 g0 = meta_grammar(); 88 n = PyParser_ParseFile(fp, filename, g0, g0->g_start, 89 (char *)NULL, (char *)NULL, &err); 90 fclose(fp); 91 if (n == NULL) { 92 fprintf(stderr, "Parsing error %d, line %d.\n", 93 err.error, err.lineno); 94 if (err.text != NULL) { 95 size_t i; 96 fprintf(stderr, "%s", err.text); 97 i = strlen(err.text); 98 if (i == 0 || err.text[i-1] != '\n') 99 fprintf(stderr, "\n"); 100 for (i = 0; i < err.offset; i++) { 101 if (err.text[i] == '\t') 102 putc('\t', stderr); 103 else 104 putc(' ', stderr); 105 } 106 fprintf(stderr, "^\n"); 107 PyObject_FREE(err.text); 108 } 109 Py_Exit(1); 110 } 111 g = pgen(n); 112 if (g == NULL) { 113 printf("Bad grammar.\n"); 114 Py_Exit(1); 115 } 116 return g; 117 117 } 118 118 … … 121 121 PyErr_Occurred() 122 122 { 123 return 0;123 return 0; 124 124 } 125 125 … … 127 127 Py_FatalError(const char *msg) 128 128 { 129 fprintf(stderr, "pgen: FATAL ERROR: %s\n", msg);130 Py_Exit(1);129 fprintf(stderr, "pgen: FATAL ERROR: %s\n", msg); 130 Py_Exit(1); 131 131 } 132 132 … … 136 136 PyOS_Readline(FILE *sys_stdin, FILE *sys_stdout, char *prompt) 137 137 { 138 size_t n = 1000;139 char *p = (char *)PyMem_MALLOC(n);140 char *q;141 if (p == NULL)142 return NULL;143 fprintf(stderr, "%s", prompt);144 q = fgets(p, n, sys_stdin);145 if (q == NULL) {146 *p = '\0';147 return p;148 }149 n = strlen(p);150 if (n > 0 && p[n-1] != '\n')151 p[n-1] = '\n';152 return (char *)PyMem_REALLOC(p, n+1);138 size_t n = 1000; 139 char *p = (char *)PyMem_MALLOC(n); 140 char *q; 141 if (p == NULL) 142 return NULL; 143 fprintf(stderr, "%s", prompt); 144 q = fgets(p, n, sys_stdin); 145 if (q == NULL) { 146 *p = '\0'; 147 return p; 148 } 149 n = strlen(p); 150 if (n > 0 && p[n-1] != '\n') 151 p[n-1] = '\n'; 152 return (char *)PyMem_REALLOC(p, n+1); 153 153 } 154 154 … … 157 157 Py_UniversalNewlineFgets(char *buf, int n, FILE *stream, PyObject *fobj) 158 158 { 159 return fgets(buf, n, stream);159 return fgets(buf, n, stream); 160 160 } 161 161 … … 166 166 PySys_WriteStderr(const char *format, ...) 167 167 { 168 va_list va;168 va_list va; 169 169 170 va_start(va, format);171 vfprintf(stderr, format, va);172 va_end(va);170 va_start(va, format); 171 vfprintf(stderr, format, va); 172 va_end(va); 173 173 } -
python/trunk/Parser/printgrammar.c
r2 r391 14 14 printgrammar(grammar *g, FILE *fp) 15 15 { 16 fprintf(fp, "/* Generated by Parser/pgen */\n\n");17 fprintf(fp, "#include \"pgenheaders.h\"\n");18 fprintf(fp, "#include \"grammar.h\"\n");19 fprintf(fp, "PyAPI_DATA(grammar) _PyParser_Grammar;\n");20 printdfas(g, fp);21 printlabels(g, fp);22 fprintf(fp, "grammar _PyParser_Grammar = {\n");23 fprintf(fp, "\t%d,\n", g->g_ndfas);24 fprintf(fp, "\tdfas,\n");25 fprintf(fp, "\t{%d, labels},\n", g->g_ll.ll_nlabels);26 fprintf(fp, "\t%d\n", g->g_start);27 fprintf(fp, "};\n");16 fprintf(fp, "/* Generated by Parser/pgen */\n\n"); 17 fprintf(fp, "#include \"pgenheaders.h\"\n"); 18 fprintf(fp, "#include \"grammar.h\"\n"); 19 fprintf(fp, "PyAPI_DATA(grammar) _PyParser_Grammar;\n"); 20 printdfas(g, fp); 21 printlabels(g, fp); 22 fprintf(fp, "grammar _PyParser_Grammar = {\n"); 23 fprintf(fp, " %d,\n", g->g_ndfas); 24 fprintf(fp, " dfas,\n"); 25 fprintf(fp, " {%d, labels},\n", g->g_ll.ll_nlabels); 26 fprintf(fp, " %d\n", g->g_start); 27 fprintf(fp, "};\n"); 28 28 } 29 29 … … 31 31 printnonterminals(grammar *g, FILE *fp) 32 32 { 33 dfa *d;34 int i;35 36 fprintf(fp, "/* Generated by Parser/pgen */\n\n");37 38 d = g->g_dfa;39 for (i = g->g_ndfas; --i >= 0; d++)40 fprintf(fp, "#define %s %d\n", d->d_name, d->d_type);33 dfa *d; 34 int i; 35 36 fprintf(fp, "/* Generated by Parser/pgen */\n\n"); 37 38 d = g->g_dfa; 39 for (i = g->g_ndfas; --i >= 0; d++) 40 fprintf(fp, "#define %s %d\n", d->d_name, d->d_type); 41 41 } 42 42 … … 44 44 printarcs(int i, dfa *d, FILE *fp) 45 45 { 46 arc *a;47 state *s;48 int j, k;49 50 s = d->d_state;51 for (j = 0; j < d->d_nstates; j++, s++) {52 fprintf(fp, "static arc arcs_%d_%d[%d] = {\n",53 i, j, s->s_narcs);54 a = s->s_arc;55 for (k = 0; k < s->s_narcs; k++, a++)56 fprintf(fp, "\t{%d, %d},\n", a->a_lbl, a->a_arrow);57 fprintf(fp, "};\n");58 }46 arc *a; 47 state *s; 48 int j, k; 49 50 s = d->d_state; 51 for (j = 0; j < d->d_nstates; j++, s++) { 52 fprintf(fp, "static arc arcs_%d_%d[%d] = {\n", 53 i, j, s->s_narcs); 54 a = s->s_arc; 55 for (k = 0; k < s->s_narcs; k++, a++) 56 fprintf(fp, " {%d, %d},\n", a->a_lbl, a->a_arrow); 57 fprintf(fp, "};\n"); 58 } 59 59 } 60 60 … … 62 62 printstates(grammar *g, FILE *fp) 63 63 { 64 state *s;65 dfa *d;66 int i, j;67 68 d = g->g_dfa;69 for (i = 0; i < g->g_ndfas; i++, d++) {70 printarcs(i, d, fp);71 fprintf(fp, "static state states_%d[%d] = {\n",72 i, d->d_nstates);73 s = d->d_state;74 for (j = 0; j < d->d_nstates; j++, s++)75 fprintf(fp, "\t{%d, arcs_%d_%d},\n",76 s->s_narcs, i, j);77 fprintf(fp, "};\n");78 }64 state *s; 65 dfa *d; 66 int i, j; 67 68 d = g->g_dfa; 69 for (i = 0; i < g->g_ndfas; i++, d++) { 70 printarcs(i, d, fp); 71 fprintf(fp, "static state states_%d[%d] = {\n", 72 i, d->d_nstates); 73 s = d->d_state; 74 for (j = 0; j < d->d_nstates; j++, s++) 75 fprintf(fp, " {%d, arcs_%d_%d},\n", 76 s->s_narcs, i, j); 77 fprintf(fp, "};\n"); 78 } 79 79 } 80 80 … … 82 82 printdfas(grammar *g, FILE *fp) 83 83 { 84 dfa *d;85 int i, j;86 87 printstates(g, fp);88 fprintf(fp, "static dfa dfas[%d] = {\n", g->g_ndfas);89 d = g->g_dfa;90 for (i = 0; i < g->g_ndfas; i++, d++) {91 fprintf(fp, "\t{%d, \"%s\", %d, %d, states_%d,\n",92 d->d_type, d->d_name, d->d_initial, d->d_nstates, i);93 fprintf(fp, "\t\"");94 for (j = 0; j < NBYTES(g->g_ll.ll_nlabels); j++)95 fprintf(fp, "\\%03o", d->d_first[j] & 0xff);96 fprintf(fp, "\"},\n");97 }98 fprintf(fp, "};\n");84 dfa *d; 85 int i, j; 86 87 printstates(g, fp); 88 fprintf(fp, "static dfa dfas[%d] = {\n", g->g_ndfas); 89 d = g->g_dfa; 90 for (i = 0; i < g->g_ndfas; i++, d++) { 91 fprintf(fp, " {%d, \"%s\", %d, %d, states_%d,\n", 92 d->d_type, d->d_name, d->d_initial, d->d_nstates, i); 93 fprintf(fp, " \""); 94 for (j = 0; j < NBYTES(g->g_ll.ll_nlabels); j++) 95 fprintf(fp, "\\%03o", d->d_first[j] & 0xff); 96 fprintf(fp, "\"},\n"); 97 } 98 fprintf(fp, "};\n"); 99 99 } 100 100 … … 102 102 printlabels(grammar *g, FILE *fp) 103 103 { 104 label *l;105 int i;106 107 fprintf(fp, "static label labels[%d] = {\n", g->g_ll.ll_nlabels);108 l = g->g_ll.ll_label;109 for (i = g->g_ll.ll_nlabels; --i >= 0; l++) {110 if (l->lb_str == NULL)111 fprintf(fp, "\t{%d, 0},\n", l->lb_type);112 else113 fprintf(fp, "\t{%d, \"%s\"},\n",114 l->lb_type, l->lb_str);115 }116 fprintf(fp, "};\n");104 label *l; 105 int i; 106 107 fprintf(fp, "static label labels[%d] = {\n", g->g_ll.ll_nlabels); 108 l = g->g_ll.ll_label; 109 for (i = g->g_ll.ll_nlabels; --i >= 0; l++) { 110 if (l->lb_str == NULL) 111 fprintf(fp, " {%d, 0},\n", l->lb_type); 112 else 113 fprintf(fp, " {%d, \"%s\"},\n", 114 l->lb_type, l->lb_str); 115 } 116 fprintf(fp, "};\n"); 117 117 } -
python/trunk/Parser/tokenizer.c
r105 r391 36 36 37 37 char *_PyParser_TokenNames[] = { 38 "ENDMARKER",39 "NAME",40 "NUMBER",41 "STRING",42 "NEWLINE",43 "INDENT",44 "DEDENT",45 "LPAR",46 "RPAR",47 "LSQB",48 "RSQB",49 "COLON",50 "COMMA",51 "SEMI",52 "PLUS",53 "MINUS",54 "STAR",55 "SLASH",56 "VBAR",57 "AMPER",58 "LESS",59 "GREATER",60 "EQUAL",61 "DOT",62 "PERCENT",63 "BACKQUOTE",64 "LBRACE",65 "RBRACE",66 "EQEQUAL",67 "NOTEQUAL",68 "LESSEQUAL",69 "GREATEREQUAL",70 "TILDE",71 "CIRCUMFLEX",72 "LEFTSHIFT",73 "RIGHTSHIFT",74 "DOUBLESTAR",75 "PLUSEQUAL",76 "MINEQUAL",77 "STAREQUAL",78 "SLASHEQUAL",79 "PERCENTEQUAL",80 "AMPEREQUAL",81 "VBAREQUAL",82 "CIRCUMFLEXEQUAL",83 "LEFTSHIFTEQUAL",84 "RIGHTSHIFTEQUAL",85 "DOUBLESTAREQUAL",86 "DOUBLESLASH",87 "DOUBLESLASHEQUAL",88 "AT",89 /* This table must match the #defines in token.h! */90 "OP",91 "<ERRORTOKEN>",92 "<N_TOKENS>"38 "ENDMARKER", 39 "NAME", 40 "NUMBER", 41 "STRING", 42 "NEWLINE", 43 "INDENT", 44 "DEDENT", 45 "LPAR", 46 "RPAR", 47 "LSQB", 48 "RSQB", 49 "COLON", 50 "COMMA", 51 "SEMI", 52 "PLUS", 53 "MINUS", 54 "STAR", 55 "SLASH", 56 "VBAR", 57 "AMPER", 58 "LESS", 59 "GREATER", 60 "EQUAL", 61 "DOT", 62 "PERCENT", 63 "BACKQUOTE", 64 "LBRACE", 65 "RBRACE", 66 "EQEQUAL", 67 "NOTEQUAL", 68 "LESSEQUAL", 69 "GREATEREQUAL", 70 "TILDE", 71 "CIRCUMFLEX", 72 "LEFTSHIFT", 73 "RIGHTSHIFT", 74 "DOUBLESTAR", 75 "PLUSEQUAL", 76 "MINEQUAL", 77 "STAREQUAL", 78 "SLASHEQUAL", 79 "PERCENTEQUAL", 80 "AMPEREQUAL", 81 "VBAREQUAL", 82 "CIRCUMFLEXEQUAL", 83 "LEFTSHIFTEQUAL", 84 "RIGHTSHIFTEQUAL", 85 "DOUBLESTAREQUAL", 86 "DOUBLESLASH", 87 "DOUBLESLASHEQUAL", 88 "AT", 89 /* This table must match the #defines in token.h! */ 90 "OP", 91 "<ERRORTOKEN>", 92 "<N_TOKENS>" 93 93 }; 94 95 94 96 95 /* Create and initialize a new tok_state structure */ … … 99 98 tok_new(void) 100 99 { 101 struct tok_state *tok = (struct tok_state *)PyMem_MALLOC( 102 sizeof(struct tok_state)); 103 if (tok == NULL) 104 return NULL; 105 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL; 106 tok->done = E_OK; 107 tok->fp = NULL; 108 tok->tabsize = TABSIZE; 109 tok->indent = 0; 110 tok->indstack[0] = 0; 111 tok->atbol = 1; 112 tok->pendin = 0; 113 tok->prompt = tok->nextprompt = NULL; 114 tok->lineno = 0; 115 tok->level = 0; 116 tok->filename = NULL; 117 tok->altwarning = 0; 118 tok->alterror = 0; 119 tok->alttabsize = 1; 120 tok->altindstack[0] = 0; 121 tok->decoding_state = 0; 122 tok->decoding_erred = 0; 123 tok->read_coding_spec = 0; 124 tok->encoding = NULL; 125 tok->cont_line = 0; 100 struct tok_state *tok = (struct tok_state *)PyMem_MALLOC( 101 sizeof(struct tok_state)); 102 if (tok == NULL) 103 return NULL; 104 tok->buf = tok->cur = tok->end = tok->inp = tok->start = NULL; 105 tok->done = E_OK; 106 tok->fp = NULL; 107 tok->input = NULL; 108 tok->tabsize = TABSIZE; 109 tok->indent = 0; 110 tok->indstack[0] = 0; 111 tok->atbol = 1; 112 tok->pendin = 0; 113 tok->prompt = tok->nextprompt = NULL; 114 tok->lineno = 0; 115 tok->level = 0; 116 tok->filename = NULL; 117 tok->altwarning = 0; 118 tok->alterror = 0; 119 tok->alttabsize = 1; 120 tok->altindstack[0] = 0; 121 tok->decoding_state = 0; 122 tok->decoding_erred = 0; 123 tok->read_coding_spec = 0; 124 tok->encoding = NULL; 125 tok->cont_line = 0; 126 126 #ifndef PGEN 127 tok->decoding_readline = NULL;128 tok->decoding_buffer = NULL;127 tok->decoding_readline = NULL; 128 tok->decoding_buffer = NULL; 129 129 #endif 130 return tok; 130 return tok; 131 } 132 133 static char * 134 new_string(const char *s, Py_ssize_t len) 135 { 136 char* result = (char *)PyMem_MALLOC(len + 1); 137 if (result != NULL) { 138 memcpy(result, s, len); 139 result[len] = '\0'; 140 } 141 return result; 131 142 } 132 143 … … 136 147 decoding_fgets(char *s, int size, struct tok_state *tok) 137 148 { 138 return fgets(s, size, tok->fp);149 return fgets(s, size, tok->fp); 139 150 } 140 151 … … 142 153 decoding_feof(struct tok_state *tok) 143 154 { 144 return feof(tok->fp);145 } 146 147 static c onst char *148 decode_str(const char *str, struct tok_state *tok)149 { 150 return str;155 return feof(tok->fp); 156 } 157 158 static char * 159 decode_str(const char *str, int exec_input, struct tok_state *tok) 160 { 161 return new_string(str, strlen(str)); 151 162 } 152 163 … … 156 167 error_ret(struct tok_state *tok) /* XXX */ 157 168 { 158 tok->decoding_erred = 1; 159 if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */ 160 PyMem_FREE(tok->buf); 161 tok->buf = NULL; 162 return NULL; /* as if it were EOF */ 163 } 169 tok->decoding_erred = 1; 170 if (tok->fp != NULL && tok->buf != NULL) /* see PyTokenizer_Free */ 171 PyMem_FREE(tok->buf); 172 tok->buf = NULL; 173 return NULL; /* as if it were EOF */ 174 } 175 164 176 165 177 static char * 166 new_string(const char *s, Py_ssize_t len) 167 { 168 char* result = (char *)PyMem_MALLOC(len + 1); 169 if (result != NULL) { 170 memcpy(result, s, len); 171 result[len] = '\0'; 172 } 173 return result; 174 } 175 176 static char * 177 get_normal_name(char *s) /* for utf-8 and latin-1 */ 178 { 179 char buf[13]; 180 int i; 181 for (i = 0; i < 12; i++) { 182 int c = s[i]; 183 if (c == '\0') break; 184 else if (c == '_') buf[i] = '-'; 185 else buf[i] = tolower(c); 186 } 187 buf[i] = '\0'; 188 if (strcmp(buf, "utf-8") == 0 || 189 strncmp(buf, "utf-8-", 6) == 0) return "utf-8"; 190 else if (strcmp(buf, "latin-1") == 0 || 191 strcmp(buf, "iso-8859-1") == 0 || 192 strcmp(buf, "iso-latin-1") == 0 || 193 strncmp(buf, "latin-1-", 8) == 0 || 194 strncmp(buf, "iso-8859-1-", 11) == 0 || 195 strncmp(buf, "iso-latin-1-", 12) == 0) return "iso-8859-1"; 196 else return s; 178 get_normal_name(char *s) /* for utf-8 and latin-1 */ 179 { 180 char buf[13]; 181 int i; 182 for (i = 0; i < 12; i++) { 183 int c = s[i]; 184 if (c == '\0') 185 break; 186 else if (c == '_') 187 buf[i] = '-'; 188 else 189 buf[i] = tolower(c); 190 } 191 buf[i] = '\0'; 192 if (strcmp(buf, "utf-8") == 0 || 193 strncmp(buf, "utf-8-", 6) == 0) 194 return "utf-8"; 195 else if (strcmp(buf, "latin-1") == 0 || 196 strcmp(buf, "iso-8859-1") == 0 || 197 strcmp(buf, "iso-latin-1") == 0 || 198 strncmp(buf, "latin-1-", 8) == 0 || 199 strncmp(buf, "iso-8859-1-", 11) == 0 || 200 strncmp(buf, "iso-latin-1-", 12) == 0) 201 return "iso-8859-1"; 202 else 203 return s; 197 204 } 198 205 … … 202 209 get_coding_spec(const char *s, Py_ssize_t size) 203 210 { 204 Py_ssize_t i;205 /* Coding spec must be in a comment, and that comment must be206 * the only statement on the source code line. */207 for (i = 0; i < size - 6; i++) {208 if (s[i] == '#')209 break;210 if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014')211 return NULL;212 }213 for (; i < size - 6; i++) { /* XXX inefficient search */214 const char* t = s + i;215 if (strncmp(t, "coding", 6) == 0) {216 const char* begin = NULL;217 t += 6;218 if (t[0] != ':' && t[0] != '=')219 continue;220 do {221 t++;222 } while (t[0] == '\x20' || t[0] == '\t');223 224 begin = t;225 while (isalnum(Py_CHARMASK(t[0])) ||226 t[0] == '-' || t[0] == '_' || t[0] == '.')227 t++;228 229 if (begin < t) {230 char* r = new_string(begin, t - begin);231 char* q = get_normal_name(r);232 if (r != q) {233 PyMem_FREE(r);234 r = new_string(q, strlen(q));235 }236 return r;237 }238 }239 }240 return NULL;211 Py_ssize_t i; 212 /* Coding spec must be in a comment, and that comment must be 213 * the only statement on the source code line. */ 214 for (i = 0; i < size - 6; i++) { 215 if (s[i] == '#') 216 break; 217 if (s[i] != ' ' && s[i] != '\t' && s[i] != '\014') 218 return NULL; 219 } 220 for (; i < size - 6; i++) { /* XXX inefficient search */ 221 const char* t = s + i; 222 if (strncmp(t, "coding", 6) == 0) { 223 const char* begin = NULL; 224 t += 6; 225 if (t[0] != ':' && t[0] != '=') 226 continue; 227 do { 228 t++; 229 } while (t[0] == '\x20' || t[0] == '\t'); 230 231 begin = t; 232 while (Py_ISALNUM(t[0]) || 233 t[0] == '-' || t[0] == '_' || t[0] == '.') 234 t++; 235 236 if (begin < t) { 237 char* r = new_string(begin, t - begin); 238 char* q = get_normal_name(r); 239 if (r != q) { 240 PyMem_FREE(r); 241 r = new_string(q, strlen(q)); 242 } 243 return r; 244 } 245 } 246 } 247 return NULL; 241 248 } 242 249 … … 248 255 static int 249 256 check_coding_spec(const char* line, Py_ssize_t size, struct tok_state *tok, 250 int set_readline(struct tok_state *, const char *))251 { 252 char * cs;253 int r = 1;254 255 if (tok->cont_line)256 /* It's a continuation line, so it can't be a coding spec. */257 return 1;258 cs = get_coding_spec(line, size);259 if (cs != NULL) {260 tok->read_coding_spec = 1;261 if (tok->encoding == NULL) {262 assert(tok->decoding_state == 1); /* raw */263 if (strcmp(cs, "utf-8") == 0 ||264 strcmp(cs, "iso-8859-1") == 0) {265 tok->encoding = cs;266 } else {257 int set_readline(struct tok_state *, const char *)) 258 { 259 char * cs; 260 int r = 1; 261 262 if (tok->cont_line) 263 /* It's a continuation line, so it can't be a coding spec. */ 264 return 1; 265 cs = get_coding_spec(line, size); 266 if (cs != NULL) { 267 tok->read_coding_spec = 1; 268 if (tok->encoding == NULL) { 269 assert(tok->decoding_state == 1); /* raw */ 270 if (strcmp(cs, "utf-8") == 0 || 271 strcmp(cs, "iso-8859-1") == 0) { 272 tok->encoding = cs; 273 } else { 267 274 #ifdef Py_USING_UNICODE 268 r = set_readline(tok, cs); 269 if (r) { 270 tok->encoding = cs; 271 tok->decoding_state = -1; 272 } 273 else 274 PyMem_FREE(cs); 275 r = set_readline(tok, cs); 276 if (r) { 277 tok->encoding = cs; 278 tok->decoding_state = -1; 279 } 280 else { 281 PyErr_Format(PyExc_SyntaxError, 282 "encoding problem: %s", cs); 283 PyMem_FREE(cs); 284 } 275 285 #else 276 /* Without Unicode support, we cannot277 process the coding spec. Since there278 won't be any Unicode literals, that279 won't matter. */280 PyMem_FREE(cs);286 /* Without Unicode support, we cannot 287 process the coding spec. Since there 288 won't be any Unicode literals, that 289 won't matter. */ 290 PyMem_FREE(cs); 281 291 #endif 282 } 283 } else { /* then, compare cs with BOM */ 284 r = (strcmp(tok->encoding, cs) == 0); 285 PyMem_FREE(cs); 286 } 287 } 288 if (!r) { 289 cs = tok->encoding; 290 if (!cs) 291 cs = "with BOM"; 292 PyErr_Format(PyExc_SyntaxError, "encoding problem: %s", cs); 293 } 294 return r; 292 } 293 } else { /* then, compare cs with BOM */ 294 r = (strcmp(tok->encoding, cs) == 0); 295 if (!r) 296 PyErr_Format(PyExc_SyntaxError, 297 "encoding problem: %s with BOM", cs); 298 PyMem_FREE(cs); 299 } 300 } 301 return r; 295 302 } 296 303 … … 301 308 static int 302 309 check_bom(int get_char(struct tok_state *), 303 void unget_char(int, struct tok_state *), 304 int set_readline(struct tok_state *, const char *), 305 struct tok_state *tok) 306 { 307 int ch = get_char(tok); 308 tok->decoding_state = 1; 309 if (ch == EOF) { 310 return 1; 311 } else if (ch == 0xEF) { 312 ch = get_char(tok); if (ch != 0xBB) goto NON_BOM; 313 ch = get_char(tok); if (ch != 0xBF) goto NON_BOM; 310 void unget_char(int, struct tok_state *), 311 int set_readline(struct tok_state *, const char *), 312 struct tok_state *tok) 313 { 314 int ch1, ch2, ch3; 315 ch1 = get_char(tok); 316 tok->decoding_state = 1; 317 if (ch1 == EOF) { 318 return 1; 319 } else if (ch1 == 0xEF) { 320 ch2 = get_char(tok); 321 if (ch2 != 0xBB) { 322 unget_char(ch2, tok); 323 unget_char(ch1, tok); 324 return 1; 325 } 326 ch3 = get_char(tok); 327 if (ch3 != 0xBF) { 328 unget_char(ch3, tok); 329 unget_char(ch2, tok); 330 unget_char(ch1, tok); 331 return 1; 332 } 314 333 #if 0 315 /* Disable support for UTF-16 BOMs until a decision 316 is made whether this needs to be supported. */ 317 } else if (ch == 0xFE) { 318 ch = get_char(tok); if (ch != 0xFF) goto NON_BOM; 319 if (!set_readline(tok, "utf-16-be")) return 0; 320 tok->decoding_state = -1; 321 } else if (ch == 0xFF) { 322 ch = get_char(tok); if (ch != 0xFE) goto NON_BOM; 323 if (!set_readline(tok, "utf-16-le")) return 0; 324 tok->decoding_state = -1; 334 /* Disable support for UTF-16 BOMs until a decision 335 is made whether this needs to be supported. */ 336 } else if (ch1 == 0xFE) { 337 ch2 = get_char(tok); 338 if (ch2 != 0xFF) { 339 unget_char(ch2, tok); 340 unget_char(ch1, tok); 341 return 1; 342 } 343 if (!set_readline(tok, "utf-16-be")) 344 return 0; 345 tok->decoding_state = -1; 346 } else if (ch1 == 0xFF) { 347 ch2 = get_char(tok); 348 if (ch2 != 0xFE) { 349 unget_char(ch2, tok); 350 unget_char(ch1, tok); 351 return 1; 352 } 353 if (!set_readline(tok, "utf-16-le")) 354 return 0; 355 tok->decoding_state = -1; 325 356 #endif 326 } else { 327 unget_char(ch, tok); 328 return 1; 329 } 330 if (tok->encoding != NULL) 331 PyMem_FREE(tok->encoding); 332 tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */ 333 return 1; 334 NON_BOM: 335 /* any token beginning with '\xEF', '\xFE', '\xFF' is a bad token */ 336 unget_char(0xFF, tok); /* XXX this will cause a syntax error */ 337 return 1; 357 } else { 358 unget_char(ch1, tok); 359 return 1; 360 } 361 if (tok->encoding != NULL) 362 PyMem_FREE(tok->encoding); 363 tok->encoding = new_string("utf-8", 5); /* resulting is in utf-8 */ 364 return 1; 338 365 } 339 366 … … 344 371 1) NULL: need to call tok->decoding_readline to get a new line 345 372 2) PyUnicodeObject *: decoding_feof has called tok->decoding_readline and 346 stored the result in tok->decoding_buffer373 stored the result in tok->decoding_buffer 347 374 3) PyStringObject *: previous call to fp_readl did not have enough room 348 (in the s buffer) to copy entire contents of the line read349 by tok->decoding_readline. tok->decoding_buffer has the overflow.350 In this case, fp_readl is called in a loop (with an expanded buffer)351 until the buffer ends with a '\n' (or until the end of the file is352 reached): see tok_nextc and its calls to decoding_fgets.375 (in the s buffer) to copy entire contents of the line read 376 by tok->decoding_readline. tok->decoding_buffer has the overflow. 377 In this case, fp_readl is called in a loop (with an expanded buffer) 378 until the buffer ends with a '\n' (or until the end of the file is 379 reached): see tok_nextc and its calls to decoding_fgets. 353 380 */ 354 381 … … 357 384 { 358 385 #ifndef Py_USING_UNICODE 359 /* In a non-Unicode built, this should never be called. */360 Py_FatalError("fp_readl should not be called in this build.");361 return NULL; /* Keep compiler happy (not reachable) */386 /* In a non-Unicode built, this should never be called. */ 387 Py_FatalError("fp_readl should not be called in this build."); 388 return NULL; /* Keep compiler happy (not reachable) */ 362 389 #else 363 PyObject* utf8 = NULL; 364 PyObject* buf = tok->decoding_buffer; 365 char *str; 366 Py_ssize_t utf8len; 367 368 /* Ask for one less byte so we can terminate it */ 369 assert(size > 0); 370 size--; 371 372 if (buf == NULL) { 373 buf = PyObject_CallObject(tok->decoding_readline, NULL); 374 if (buf == NULL) 375 return error_ret(tok); 376 } else { 377 tok->decoding_buffer = NULL; 378 if (PyString_CheckExact(buf)) 379 utf8 = buf; 380 } 381 if (utf8 == NULL) { 382 utf8 = PyUnicode_AsUTF8String(buf); 383 Py_DECREF(buf); 384 if (utf8 == NULL) 385 return error_ret(tok); 386 } 387 str = PyString_AsString(utf8); 388 utf8len = PyString_GET_SIZE(utf8); 389 if (utf8len > size) { 390 tok->decoding_buffer = PyString_FromStringAndSize(str+size, utf8len-size); 391 if (tok->decoding_buffer == NULL) { 392 Py_DECREF(utf8); 393 return error_ret(tok); 394 } 395 utf8len = size; 396 } 397 memcpy(s, str, utf8len); 398 s[utf8len] = '\0'; 399 Py_DECREF(utf8); 400 if (utf8len == 0) return NULL; /* EOF */ 401 return s; 390 PyObject* utf8 = NULL; 391 PyObject* buf = tok->decoding_buffer; 392 char *str; 393 Py_ssize_t utf8len; 394 395 /* Ask for one less byte so we can terminate it */ 396 assert(size > 0); 397 size--; 398 399 if (buf == NULL) { 400 buf = PyObject_CallObject(tok->decoding_readline, NULL); 401 if (buf == NULL) 402 return error_ret(tok); 403 } else { 404 tok->decoding_buffer = NULL; 405 if (PyString_CheckExact(buf)) 406 utf8 = buf; 407 } 408 if (utf8 == NULL) { 409 utf8 = PyUnicode_AsUTF8String(buf); 410 Py_DECREF(buf); 411 if (utf8 == NULL) 412 return error_ret(tok); 413 } 414 str = PyString_AsString(utf8); 415 utf8len = PyString_GET_SIZE(utf8); 416 if (utf8len > size) { 417 tok->decoding_buffer = PyString_FromStringAndSize(str+size, utf8len-size); 418 if (tok->decoding_buffer == NULL) { 419 Py_DECREF(utf8); 420 return error_ret(tok); 421 } 422 utf8len = size; 423 } 424 memcpy(s, str, utf8len); 425 s[utf8len] = '\0'; 426 Py_DECREF(utf8); 427 if (utf8len == 0) 428 return NULL; /* EOF */ 429 return s; 402 430 #endif 403 431 } … … 416 444 fp_setreadl(struct tok_state *tok, const char* enc) 417 445 { 418 PyObject *reader, *stream, *readline;419 420 /* XXX: constify filename argument. */421 stream = PyFile_FromFile(tok->fp, (char*)tok->filename, "rb", NULL);422 if (stream == NULL)423 return 0;424 425 reader = PyCodec_StreamReader(enc, stream, NULL);426 Py_DECREF(stream);427 if (reader == NULL)428 return 0;429 430 readline = PyObject_GetAttrString(reader, "readline");431 Py_DECREF(reader);432 if (readline == NULL)433 return 0;434 435 tok->decoding_readline = readline;436 return 1;446 PyObject *reader, *stream, *readline; 447 448 /* XXX: constify filename argument. */ 449 stream = PyFile_FromFile(tok->fp, (char*)tok->filename, "rb", NULL); 450 if (stream == NULL) 451 return 0; 452 453 reader = PyCodec_StreamReader(enc, stream, NULL); 454 Py_DECREF(stream); 455 if (reader == NULL) 456 return 0; 457 458 readline = PyObject_GetAttrString(reader, "readline"); 459 Py_DECREF(reader); 460 if (readline == NULL) 461 return 0; 462 463 tok->decoding_readline = readline; 464 return 1; 437 465 } 438 466 … … 440 468 441 469 static int fp_getc(struct tok_state *tok) { 442 return getc(tok->fp);470 return getc(tok->fp); 443 471 } 444 472 … … 446 474 447 475 static void fp_ungetc(int c, struct tok_state *tok) { 448 ungetc(c, tok->fp);476 ungetc(c, tok->fp); 449 477 } 450 478 … … 455 483 decoding_fgets(char *s, int size, struct tok_state *tok) 456 484 { 457 char *line = NULL;458 int badchar = 0;459 for (;;) {460 if (tok->decoding_state < 0) {461 /* We already have a codec associated with462 this input. */463 line = fp_readl(s, size, tok);464 break;465 } else if (tok->decoding_state > 0) {466 /* We want a 'raw' read. */467 line = Py_UniversalNewlineFgets(s, size,468 tok->fp, NULL);469 break;470 } else {471 /* We have not yet determined the encoding.472 If an encoding is found, use the file-pointer473 reader functions from now on. */474 if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok))475 return error_ret(tok);476 assert(tok->decoding_state != 0);477 }478 }479 if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) {480 if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) {481 return error_ret(tok);482 }483 }485 char *line = NULL; 486 int badchar = 0; 487 for (;;) { 488 if (tok->decoding_state < 0) { 489 /* We already have a codec associated with 490 this input. */ 491 line = fp_readl(s, size, tok); 492 break; 493 } else if (tok->decoding_state > 0) { 494 /* We want a 'raw' read. */ 495 line = Py_UniversalNewlineFgets(s, size, 496 tok->fp, NULL); 497 break; 498 } else { 499 /* We have not yet determined the encoding. 500 If an encoding is found, use the file-pointer 501 reader functions from now on. */ 502 if (!check_bom(fp_getc, fp_ungetc, fp_setreadl, tok)) 503 return error_ret(tok); 504 assert(tok->decoding_state != 0); 505 } 506 } 507 if (line != NULL && tok->lineno < 2 && !tok->read_coding_spec) { 508 if (!check_coding_spec(line, strlen(line), tok, fp_setreadl)) { 509 return error_ret(tok); 510 } 511 } 484 512 #ifndef PGEN 485 /* The default encoding is ASCII, so make sure we don't have any486 non-ASCII bytes in it. */487 if (line && !tok->encoding) {488 unsigned char *c;489 for (c = (unsigned char *)line; *c; c++)490 if (*c > 127) {491 badchar = *c;492 break;493 }494 }495 if (badchar) {496 char buf[500];497 /* Need to add 1 to the line number, since this line498 has not been counted, yet. */499 sprintf(buf,500 "Non-ASCII character '\\x%.2x' "501 "in file %.200s on line %i, "502 "but no encoding declared; "503 "see http://www.python.org/peps/pep-0263.html for details",504 badchar, tok->filename, tok->lineno + 1);505 PyErr_SetString(PyExc_SyntaxError, buf);506 return error_ret(tok);507 }513 /* The default encoding is ASCII, so make sure we don't have any 514 non-ASCII bytes in it. */ 515 if (line && !tok->encoding) { 516 unsigned char *c; 517 for (c = (unsigned char *)line; *c; c++) 518 if (*c > 127) { 519 badchar = *c; 520 break; 521 } 522 } 523 if (badchar) { 524 char buf[500]; 525 /* Need to add 1 to the line number, since this line 526 has not been counted, yet. */ 527 sprintf(buf, 528 "Non-ASCII character '\\x%.2x' " 529 "in file %.200s on line %i, " 530 "but no encoding declared; " 531 "see http://www.python.org/peps/pep-0263.html for details", 532 badchar, tok->filename, tok->lineno + 1); 533 PyErr_SetString(PyExc_SyntaxError, buf); 534 return error_ret(tok); 535 } 508 536 #endif 509 return line;537 return line; 510 538 } 511 539 … … 513 541 decoding_feof(struct tok_state *tok) 514 542 { 515 if (tok->decoding_state >= 0) {516 return feof(tok->fp);517 } else {518 PyObject* buf = tok->decoding_buffer;519 if (buf == NULL) {520 buf = PyObject_CallObject(tok->decoding_readline, NULL);521 if (buf == NULL) {522 error_ret(tok);523 return 1;524 } else {525 tok->decoding_buffer = buf;526 }527 }528 return PyObject_Length(buf) == 0;529 }543 if (tok->decoding_state >= 0) { 544 return feof(tok->fp); 545 } else { 546 PyObject* buf = tok->decoding_buffer; 547 if (buf == NULL) { 548 buf = PyObject_CallObject(tok->decoding_readline, NULL); 549 if (buf == NULL) { 550 error_ret(tok); 551 return 1; 552 } else { 553 tok->decoding_buffer = buf; 554 } 555 } 556 return PyObject_Length(buf) == 0; 557 } 530 558 } 531 559 … … 534 562 static int 535 563 buf_getc(struct tok_state *tok) { 536 return Py_CHARMASK(*tok->str++);564 return Py_CHARMASK(*tok->str++); 537 565 } 538 566 … … 541 569 static void 542 570 buf_ungetc(int c, struct tok_state *tok) { 543 tok->str--;544 assert(Py_CHARMASK(*tok->str) == c);/* tok->cur may point to read-only segment */571 tok->str--; 572 assert(Py_CHARMASK(*tok->str) == c); /* tok->cur may point to read-only segment */ 545 573 } 546 574 … … 550 578 static int 551 579 buf_setreadl(struct tok_state *tok, const char* enc) { 552 tok->enc = enc;553 return 1;580 tok->enc = enc; 581 return 1; 554 582 } 555 583 … … 560 588 static PyObject * 561 589 translate_into_utf8(const char* str, const char* enc) { 562 PyObject *utf8;563 PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL);564 if (buf == NULL)565 return NULL;566 utf8 = PyUnicode_AsUTF8String(buf);567 Py_DECREF(buf);568 return utf8;590 PyObject *utf8; 591 PyObject* buf = PyUnicode_Decode(str, strlen(str), enc, NULL); 592 if (buf == NULL) 593 return NULL; 594 utf8 = PyUnicode_AsUTF8String(buf); 595 Py_DECREF(buf); 596 return utf8; 569 597 } 570 598 #endif 599 600 601 static char * 602 translate_newlines(const char *s, int exec_input, struct tok_state *tok) { 603 int skip_next_lf = 0, needed_length = strlen(s) + 2, final_length; 604 char *buf, *current; 605 char c = '\0'; 606 buf = PyMem_MALLOC(needed_length); 607 if (buf == NULL) { 608 tok->done = E_NOMEM; 609 return NULL; 610 } 611 for (current = buf; *s; s++, current++) { 612 c = *s; 613 if (skip_next_lf) { 614 skip_next_lf = 0; 615 if (c == '\n') { 616 c = *++s; 617 if (!c) 618 break; 619 } 620 } 621 if (c == '\r') { 622 skip_next_lf = 1; 623 c = '\n'; 624 } 625 *current = c; 626 } 627 /* If this is exec input, add a newline to the end of the string if 628 there isn't one already. */ 629 if (exec_input && c != '\n') { 630 *current = '\n'; 631 current++; 632 } 633 *current = '\0'; 634 final_length = current - buf + 1; 635 if (final_length < needed_length && final_length) 636 /* should never fail */ 637 buf = PyMem_REALLOC(buf, final_length); 638 return buf; 639 } 571 640 572 641 /* Decode a byte string STR for use as the buffer of TOK. … … 575 644 576 645 static const char * 577 decode_str(const char *str, struct tok_state *tok) 578 { 579 PyObject* utf8 = NULL; 580 const char *s; 581 const char *newl[2] = {NULL, NULL}; 582 int lineno = 0; 583 tok->enc = NULL; 584 tok->str = str; 585 if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok)) 586 return error_ret(tok); 587 str = tok->str; /* string after BOM if any */ 588 assert(str); 646 decode_str(const char *input, int single, struct tok_state *tok) 647 { 648 PyObject* utf8 = NULL; 649 const char *str; 650 const char *s; 651 const char *newl[2] = {NULL, NULL}; 652 int lineno = 0; 653 tok->input = str = translate_newlines(input, single, tok); 654 if (str == NULL) 655 return NULL; 656 tok->enc = NULL; 657 tok->str = str; 658 if (!check_bom(buf_getc, buf_ungetc, buf_setreadl, tok)) 659 return error_ret(tok); 660 str = tok->str; /* string after BOM if any */ 661 assert(str); 589 662 #ifdef Py_USING_UNICODE 590 if (tok->enc != NULL) {591 utf8 = translate_into_utf8(str, tok->enc);592 if (utf8 == NULL)593 return error_ret(tok);594 str = PyString_AsString(utf8);595 }663 if (tok->enc != NULL) { 664 utf8 = translate_into_utf8(str, tok->enc); 665 if (utf8 == NULL) 666 return error_ret(tok); 667 str = PyString_AsString(utf8); 668 } 596 669 #endif 597 for (s = str;; s++) {598 if (*s == '\0') break;599 else if (*s == '\n') {600 assert(lineno < 2);601 newl[lineno] = s;602 lineno++;603 if (lineno == 2) break;604 }605 }606 tok->enc = NULL;607 /* need to check line 1 and 2 separately since check_coding_spec608 assumes a single line as input */609 if (newl[0]) {610 if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl))611 return error_ret(tok);612 if (tok->enc == NULL && newl[1]) {613 if (!check_coding_spec(newl[0]+1, newl[1] - newl[0],614 tok, buf_setreadl))615 return error_ret(tok);616 }617 }670 for (s = str;; s++) { 671 if (*s == '\0') break; 672 else if (*s == '\n') { 673 assert(lineno < 2); 674 newl[lineno] = s; 675 lineno++; 676 if (lineno == 2) break; 677 } 678 } 679 tok->enc = NULL; 680 /* need to check line 1 and 2 separately since check_coding_spec 681 assumes a single line as input */ 682 if (newl[0]) { 683 if (!check_coding_spec(str, newl[0] - str, tok, buf_setreadl)) 684 return error_ret(tok); 685 if (tok->enc == NULL && newl[1]) { 686 if (!check_coding_spec(newl[0]+1, newl[1] - newl[0], 687 tok, buf_setreadl)) 688 return error_ret(tok); 689 } 690 } 618 691 #ifdef Py_USING_UNICODE 619 if (tok->enc != NULL) { 620 assert(utf8 == NULL); 621 utf8 = translate_into_utf8(str, tok->enc); 622 if (utf8 == NULL) { 623 PyErr_Format(PyExc_SyntaxError, 624 "unknown encoding: %s", tok->enc); 625 return error_ret(tok); 626 } 627 str = PyString_AsString(utf8); 628 } 692 if (tok->enc != NULL) { 693 assert(utf8 == NULL); 694 utf8 = translate_into_utf8(str, tok->enc); 695 if (utf8 == NULL) 696 return error_ret(tok); 697 str = PyString_AsString(utf8); 698 } 629 699 #endif 630 assert(tok->decoding_buffer == NULL);631 tok->decoding_buffer = utf8; /* CAUTION */632 return str;700 assert(tok->decoding_buffer == NULL); 701 tok->decoding_buffer = utf8; /* CAUTION */ 702 return str; 633 703 } 634 704 … … 638 708 639 709 struct tok_state * 640 PyTokenizer_FromString(const char *str )641 { 642 struct tok_state *tok = tok_new();643 if (tok == NULL)644 return NULL;645 str = (char *)decode_str(str, tok);646 if (str == NULL) {647 PyTokenizer_Free(tok);648 return NULL;649 }650 651 /* XXX: constify members. */652 tok->buf = tok->cur = tok->end = tok->inp = (char*)str;653 return tok;710 PyTokenizer_FromString(const char *str, int exec_input) 711 { 712 struct tok_state *tok = tok_new(); 713 if (tok == NULL) 714 return NULL; 715 str = (char *)decode_str(str, exec_input, tok); 716 if (str == NULL) { 717 PyTokenizer_Free(tok); 718 return NULL; 719 } 720 721 /* XXX: constify members. */ 722 tok->buf = tok->cur = tok->end = tok->inp = (char*)str; 723 return tok; 654 724 } 655 725 … … 660 730 PyTokenizer_FromFile(FILE *fp, char *ps1, char *ps2) 661 731 { 662 struct tok_state *tok = tok_new();663 if (tok == NULL)664 return NULL;665 if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) {666 PyTokenizer_Free(tok);667 return NULL;668 }669 tok->cur = tok->inp = tok->buf;670 tok->end = tok->buf + BUFSIZ;671 tok->fp = fp;672 tok->prompt = ps1;673 tok->nextprompt = ps2;674 return tok;732 struct tok_state *tok = tok_new(); 733 if (tok == NULL) 734 return NULL; 735 if ((tok->buf = (char *)PyMem_MALLOC(BUFSIZ)) == NULL) { 736 PyTokenizer_Free(tok); 737 return NULL; 738 } 739 tok->cur = tok->inp = tok->buf; 740 tok->end = tok->buf + BUFSIZ; 741 tok->fp = fp; 742 tok->prompt = ps1; 743 tok->nextprompt = ps2; 744 return tok; 675 745 } 676 746 … … 681 751 PyTokenizer_Free(struct tok_state *tok) 682 752 { 683 if (tok->encoding != NULL)684 PyMem_FREE(tok->encoding);753 if (tok->encoding != NULL) 754 PyMem_FREE(tok->encoding); 685 755 #ifndef PGEN 686 Py_XDECREF(tok->decoding_readline);687 Py_XDECREF(tok->decoding_buffer);756 Py_XDECREF(tok->decoding_readline); 757 Py_XDECREF(tok->decoding_buffer); 688 758 #endif 689 if (tok->fp != NULL && tok->buf != NULL) 690 PyMem_FREE(tok->buf); 691 PyMem_FREE(tok); 759 if (tok->fp != NULL && tok->buf != NULL) 760 PyMem_FREE(tok->buf); 761 if (tok->input) 762 PyMem_FREE((char *)tok->input); 763 PyMem_FREE(tok); 692 764 } 693 765 … … 696 768 tok_stdin_decode(struct tok_state *tok, char **inp) 697 769 { 698 PyObject *enc, *sysstdin, *decoded, *utf8;699 const char *encoding;700 char *converted;701 702 if (PySys_GetFile((char *)"stdin", NULL) != stdin)703 return 0;704 sysstdin = PySys_GetObject("stdin");705 if (sysstdin == NULL || !PyFile_Check(sysstdin))706 return 0;707 708 enc = ((PyFileObject *)sysstdin)->f_encoding;709 if (enc == NULL || !PyString_Check(enc))710 return 0;711 Py_INCREF(enc);712 713 encoding = PyString_AsString(enc);714 decoded = PyUnicode_Decode(*inp, strlen(*inp), encoding, NULL);715 if (decoded == NULL)716 goto error_clear;717 718 utf8 = PyUnicode_AsEncodedString(decoded, "utf-8", NULL);719 Py_DECREF(decoded);720 if (utf8 == NULL)721 goto error_clear;722 723 assert(PyString_Check(utf8));724 converted = new_string(PyString_AS_STRING(utf8),725 PyString_GET_SIZE(utf8));726 Py_DECREF(utf8);727 if (converted == NULL)728 goto error_nomem;729 730 PyMem_FREE(*inp);731 *inp = converted;732 if (tok->encoding != NULL)733 PyMem_FREE(tok->encoding);734 tok->encoding = new_string(encoding, strlen(encoding));735 if (tok->encoding == NULL)736 goto error_nomem;737 738 Py_DECREF(enc);739 return 0;770 PyObject *enc, *sysstdin, *decoded, *utf8; 771 const char *encoding; 772 char *converted; 773 774 if (PySys_GetFile((char *)"stdin", NULL) != stdin) 775 return 0; 776 sysstdin = PySys_GetObject("stdin"); 777 if (sysstdin == NULL || !PyFile_Check(sysstdin)) 778 return 0; 779 780 enc = ((PyFileObject *)sysstdin)->f_encoding; 781 if (enc == NULL || !PyString_Check(enc)) 782 return 0; 783 Py_INCREF(enc); 784 785 encoding = PyString_AsString(enc); 786 decoded = PyUnicode_Decode(*inp, strlen(*inp), encoding, NULL); 787 if (decoded == NULL) 788 goto error_clear; 789 790 utf8 = PyUnicode_AsEncodedString(decoded, "utf-8", NULL); 791 Py_DECREF(decoded); 792 if (utf8 == NULL) 793 goto error_clear; 794 795 assert(PyString_Check(utf8)); 796 converted = new_string(PyString_AS_STRING(utf8), 797 PyString_GET_SIZE(utf8)); 798 Py_DECREF(utf8); 799 if (converted == NULL) 800 goto error_nomem; 801 802 PyMem_FREE(*inp); 803 *inp = converted; 804 if (tok->encoding != NULL) 805 PyMem_FREE(tok->encoding); 806 tok->encoding = new_string(encoding, strlen(encoding)); 807 if (tok->encoding == NULL) 808 goto error_nomem; 809 810 Py_DECREF(enc); 811 return 0; 740 812 741 813 error_nomem: 742 Py_DECREF(enc);743 tok->done = E_NOMEM;744 return -1;814 Py_DECREF(enc); 815 tok->done = E_NOMEM; 816 return -1; 745 817 746 818 error_clear: 747 /* Fallback to iso-8859-1: for backward compatibility */ 748 Py_DECREF(enc); 749 PyErr_Clear(); 750 return 0; 819 Py_DECREF(enc); 820 if (!PyErr_ExceptionMatches(PyExc_UnicodeDecodeError)) { 821 tok->done = E_ERROR; 822 return -1; 823 } 824 /* Fallback to iso-8859-1: for backward compatibility */ 825 PyErr_Clear(); 826 return 0; 751 827 } 752 828 #endif … … 757 833 tok_nextc(register struct tok_state *tok) 758 834 { 759 for (;;) {760 if (tok->cur != tok->inp) {761 return Py_CHARMASK(*tok->cur++); /* Fast path */762 }763 if (tok->done != E_OK)764 return EOF;765 if (tok->fp == NULL) {766 char *end = strchr(tok->inp, '\n');767 if (end != NULL)768 end++;769 else {770 end = strchr(tok->inp, '\0');771 if (end == tok->inp) {772 tok->done = E_EOF;773 return EOF;774 }775 }776 if (tok->start == NULL)777 tok->buf = tok->cur;778 tok->line_start = tok->cur;779 tok->lineno++;780 tok->inp = end;781 return Py_CHARMASK(*tok->cur++);782 }783 if (tok->prompt != NULL) {784 char *newtok = PyOS_Readline(stdin, stdout, tok->prompt);785 if (tok->nextprompt != NULL)786 tok->prompt = tok->nextprompt;787 if (newtok == NULL)788 tok->done = E_INTR;789 else if (*newtok == '\0') {790 PyMem_FREE(newtok);791 tok->done = E_EOF;792 }835 for (;;) { 836 if (tok->cur != tok->inp) { 837 return Py_CHARMASK(*tok->cur++); /* Fast path */ 838 } 839 if (tok->done != E_OK) 840 return EOF; 841 if (tok->fp == NULL) { 842 char *end = strchr(tok->inp, '\n'); 843 if (end != NULL) 844 end++; 845 else { 846 end = strchr(tok->inp, '\0'); 847 if (end == tok->inp) { 848 tok->done = E_EOF; 849 return EOF; 850 } 851 } 852 if (tok->start == NULL) 853 tok->buf = tok->cur; 854 tok->line_start = tok->cur; 855 tok->lineno++; 856 tok->inp = end; 857 return Py_CHARMASK(*tok->cur++); 858 } 859 if (tok->prompt != NULL) { 860 char *newtok = PyOS_Readline(stdin, stdout, tok->prompt); 861 if (tok->nextprompt != NULL) 862 tok->prompt = tok->nextprompt; 863 if (newtok == NULL) 864 tok->done = E_INTR; 865 else if (*newtok == '\0') { 866 PyMem_FREE(newtok); 867 tok->done = E_EOF; 868 } 793 869 #if !defined(PGEN) && defined(Py_USING_UNICODE) 794 else if (tok_stdin_decode(tok, &newtok) != 0)795 PyMem_FREE(newtok);870 else if (tok_stdin_decode(tok, &newtok) != 0) 871 PyMem_FREE(newtok); 796 872 #endif 797 else if (tok->start != NULL) {798 size_t start = tok->start - tok->buf;799 size_t oldlen = tok->cur - tok->buf;800 size_t newlen = oldlen + strlen(newtok);801 char *buf = tok->buf;802 buf = (char *)PyMem_REALLOC(buf, newlen+1);803 tok->lineno++;804 if (buf == NULL) {805 PyMem_FREE(tok->buf);806 tok->buf = NULL;807 PyMem_FREE(newtok);808 tok->done = E_NOMEM;809 return EOF;810 }811 tok->buf = buf;812 tok->cur = tok->buf + oldlen;813 tok->line_start = tok->cur;814 strcpy(tok->buf + oldlen, newtok);815 PyMem_FREE(newtok);816 tok->inp = tok->buf + newlen;817 tok->end = tok->inp + 1;818 tok->start = tok->buf + start;819 }820 else {821 tok->lineno++;822 if (tok->buf != NULL)823 PyMem_FREE(tok->buf);824 tok->buf = newtok;825 tok->line_start = tok->buf;826 tok->cur = tok->buf;827 tok->line_start = tok->buf;828 tok->inp = strchr(tok->buf, '\0');829 tok->end = tok->inp + 1;830 }831 }832 else {833 int done = 0;834 Py_ssize_t cur = 0;835 char *pt;836 if (tok->start == NULL) {837 if (tok->buf == NULL) {838 tok->buf = (char *)839 PyMem_MALLOC(BUFSIZ);840 if (tok->buf == NULL) {841 tok->done = E_NOMEM;842 return EOF;843 }844 tok->end = tok->buf + BUFSIZ;845 }846 if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf),847 tok) == NULL) {848 tok->done = E_EOF;849 done = 1;850 }851 else {852 tok->done = E_OK;853 tok->inp = strchr(tok->buf, '\0');854 done = tok->inp[-1] == '\n';855 }856 }857 else {858 cur = tok->cur - tok->buf;859 if (decoding_feof(tok)) {860 tok->done = E_EOF;861 done = 1;862 }863 else864 tok->done = E_OK;865 }866 tok->lineno++;867 /* Read until '\n' or EOF */868 while (!done) {869 Py_ssize_t curstart = tok->start == NULL ? -1 :870 tok->start - tok->buf;871 Py_ssize_t curvalid = tok->inp - tok->buf;872 Py_ssize_t newsize = curvalid + BUFSIZ;873 char *newbuf = tok->buf;874 newbuf = (char *)PyMem_REALLOC(newbuf,875 newsize);876 if (newbuf == NULL) {877 tok->done = E_NOMEM;878 tok->cur = tok->inp;879 return EOF;880 }881 tok->buf = newbuf;882 tok->inp = tok->buf + curvalid;883 tok->end = tok->buf + newsize;884 tok->start = curstart < 0 ? NULL :885 tok->buf + curstart;886 if (decoding_fgets(tok->inp,887 (int)(tok->end - tok->inp),888 tok) == NULL) {889 /* Break out early on decoding890 errors, as tok->buf will be NULL891 */892 if (tok->decoding_erred)893 return EOF;894 /* Last line does not end in \n,895 fake one */896 strcpy(tok->inp, "\n");897 }898 tok->inp = strchr(tok->inp, '\0');899 done = tok->inp[-1] == '\n';900 }901 if (tok->buf != NULL) {902 tok->cur = tok->buf + cur;903 tok->line_start = tok->cur;904 /* replace "\r\n" with "\n" */905 /* For Mac leave the \r, giving a syntax error */906 pt = tok->inp - 2;907 if (pt >= tok->buf && *pt == '\r') {908 *pt++ = '\n';909 *pt = '\0';910 tok->inp = pt;911 }912 }913 }914 if (tok->done != E_OK) {915 if (tok->prompt != NULL)916 PySys_WriteStderr("\n");917 tok->cur = tok->inp;918 return EOF;919 }920 }921 /*NOTREACHED*/873 else if (tok->start != NULL) { 874 size_t start = tok->start - tok->buf; 875 size_t oldlen = tok->cur - tok->buf; 876 size_t newlen = oldlen + strlen(newtok); 877 char *buf = tok->buf; 878 buf = (char *)PyMem_REALLOC(buf, newlen+1); 879 tok->lineno++; 880 if (buf == NULL) { 881 PyMem_FREE(tok->buf); 882 tok->buf = NULL; 883 PyMem_FREE(newtok); 884 tok->done = E_NOMEM; 885 return EOF; 886 } 887 tok->buf = buf; 888 tok->cur = tok->buf + oldlen; 889 tok->line_start = tok->cur; 890 strcpy(tok->buf + oldlen, newtok); 891 PyMem_FREE(newtok); 892 tok->inp = tok->buf + newlen; 893 tok->end = tok->inp + 1; 894 tok->start = tok->buf + start; 895 } 896 else { 897 tok->lineno++; 898 if (tok->buf != NULL) 899 PyMem_FREE(tok->buf); 900 tok->buf = newtok; 901 tok->line_start = tok->buf; 902 tok->cur = tok->buf; 903 tok->line_start = tok->buf; 904 tok->inp = strchr(tok->buf, '\0'); 905 tok->end = tok->inp + 1; 906 } 907 } 908 else { 909 int done = 0; 910 Py_ssize_t cur = 0; 911 char *pt; 912 if (tok->start == NULL) { 913 if (tok->buf == NULL) { 914 tok->buf = (char *) 915 PyMem_MALLOC(BUFSIZ); 916 if (tok->buf == NULL) { 917 tok->done = E_NOMEM; 918 return EOF; 919 } 920 tok->end = tok->buf + BUFSIZ; 921 } 922 if (decoding_fgets(tok->buf, (int)(tok->end - tok->buf), 923 tok) == NULL) { 924 tok->done = E_EOF; 925 done = 1; 926 } 927 else { 928 tok->done = E_OK; 929 tok->inp = strchr(tok->buf, '\0'); 930 done = tok->inp[-1] == '\n'; 931 } 932 } 933 else { 934 cur = tok->cur - tok->buf; 935 if (decoding_feof(tok)) { 936 tok->done = E_EOF; 937 done = 1; 938 } 939 else 940 tok->done = E_OK; 941 } 942 tok->lineno++; 943 /* Read until '\n' or EOF */ 944 while (!done) { 945 Py_ssize_t curstart = tok->start == NULL ? -1 : 946 tok->start - tok->buf; 947 Py_ssize_t curvalid = tok->inp - tok->buf; 948 Py_ssize_t newsize = curvalid + BUFSIZ; 949 char *newbuf = tok->buf; 950 newbuf = (char *)PyMem_REALLOC(newbuf, 951 newsize); 952 if (newbuf == NULL) { 953 tok->done = E_NOMEM; 954 tok->cur = tok->inp; 955 return EOF; 956 } 957 tok->buf = newbuf; 958 tok->inp = tok->buf + curvalid; 959 tok->end = tok->buf + newsize; 960 tok->start = curstart < 0 ? NULL : 961 tok->buf + curstart; 962 if (decoding_fgets(tok->inp, 963 (int)(tok->end - tok->inp), 964 tok) == NULL) { 965 /* Break out early on decoding 966 errors, as tok->buf will be NULL 967 */ 968 if (tok->decoding_erred) 969 return EOF; 970 /* Last line does not end in \n, 971 fake one */ 972 strcpy(tok->inp, "\n"); 973 } 974 tok->inp = strchr(tok->inp, '\0'); 975 done = tok->inp[-1] == '\n'; 976 } 977 if (tok->buf != NULL) { 978 tok->cur = tok->buf + cur; 979 tok->line_start = tok->cur; 980 /* replace "\r\n" with "\n" */ 981 /* For Mac leave the \r, giving a syntax error */ 982 pt = tok->inp - 2; 983 if (pt >= tok->buf && *pt == '\r') { 984 *pt++ = '\n'; 985 *pt = '\0'; 986 tok->inp = pt; 987 } 988 } 989 } 990 if (tok->done != E_OK) { 991 if (tok->prompt != NULL) 992 PySys_WriteStderr("\n"); 993 tok->cur = tok->inp; 994 return EOF; 995 } 996 } 997 /*NOTREACHED*/ 922 998 } 923 999 … … 928 1004 tok_backup(register struct tok_state *tok, register int c) 929 1005 { 930 if (c != EOF) {931 if (--tok->cur < tok->buf)932 Py_FatalError("tok_backup: beginof buffer");933 if (*tok->cur != c)934 *tok->cur = c;935 }1006 if (c != EOF) { 1007 if (--tok->cur < tok->buf) 1008 Py_FatalError("tok_backup: beginning of buffer"); 1009 if (*tok->cur != c) 1010 *tok->cur = c; 1011 } 936 1012 } 937 1013 … … 942 1018 PyToken_OneChar(int c) 943 1019 { 944 switch (c) {945 case '(':return LPAR;946 case ')':return RPAR;947 case '[':return LSQB;948 case ']':return RSQB;949 case ':':return COLON;950 case ',':return COMMA;951 case ';':return SEMI;952 case '+':return PLUS;953 case '-':return MINUS;954 case '*':return STAR;955 case '/':return SLASH;956 case '|':return VBAR;957 case '&':return AMPER;958 case '<':return LESS;959 case '>':return GREATER;960 case '=':return EQUAL;961 case '.':return DOT;962 case '%':return PERCENT;963 case '`':return BACKQUOTE;964 case '{':return LBRACE;965 case '}':return RBRACE;966 case '^':return CIRCUMFLEX;967 case '~':return TILDE;968 case '@': return AT;969 default:return OP;970 }1020 switch (c) { 1021 case '(': return LPAR; 1022 case ')': return RPAR; 1023 case '[': return LSQB; 1024 case ']': return RSQB; 1025 case ':': return COLON; 1026 case ',': return COMMA; 1027 case ';': return SEMI; 1028 case '+': return PLUS; 1029 case '-': return MINUS; 1030 case '*': return STAR; 1031 case '/': return SLASH; 1032 case '|': return VBAR; 1033 case '&': return AMPER; 1034 case '<': return LESS; 1035 case '>': return GREATER; 1036 case '=': return EQUAL; 1037 case '.': return DOT; 1038 case '%': return PERCENT; 1039 case '`': return BACKQUOTE; 1040 case '{': return LBRACE; 1041 case '}': return RBRACE; 1042 case '^': return CIRCUMFLEX; 1043 case '~': return TILDE; 1044 case '@': return AT; 1045 default: return OP; 1046 } 971 1047 } 972 1048 … … 975 1051 PyToken_TwoChars(int c1, int c2) 976 1052 { 977 switch (c1) {978 case '=':979 switch (c2) {980 case '=':return EQEQUAL;981 }982 break;983 case '!':984 switch (c2) {985 case '=':return NOTEQUAL;986 }987 break;988 case '<':989 switch (c2) {990 case '>':return NOTEQUAL;991 case '=':return LESSEQUAL;992 case '<':return LEFTSHIFT;993 }994 break;995 case '>':996 switch (c2) {997 case '=':return GREATEREQUAL;998 case '>':return RIGHTSHIFT;999 }1000 break;1001 case '+':1002 switch (c2) {1003 case '=':return PLUSEQUAL;1004 }1005 break;1006 case '-':1007 switch (c2) {1008 case '=':return MINEQUAL;1009 }1010 break;1011 case '*':1012 switch (c2) {1013 case '*':return DOUBLESTAR;1014 case '=':return STAREQUAL;1015 }1016 break;1017 case '/':1018 switch (c2) {1019 case '/':return DOUBLESLASH;1020 case '=':return SLASHEQUAL;1021 }1022 break;1023 case '|':1024 switch (c2) {1025 case '=':return VBAREQUAL;1026 }1027 break;1028 case '%':1029 switch (c2) {1030 case '=':return PERCENTEQUAL;1031 }1032 break;1033 case '&':1034 switch (c2) {1035 case '=':return AMPEREQUAL;1036 }1037 break;1038 case '^':1039 switch (c2) {1040 case '=':return CIRCUMFLEXEQUAL;1041 }1042 break;1043 }1044 return OP;1053 switch (c1) { 1054 case '=': 1055 switch (c2) { 1056 case '=': return EQEQUAL; 1057 } 1058 break; 1059 case '!': 1060 switch (c2) { 1061 case '=': return NOTEQUAL; 1062 } 1063 break; 1064 case '<': 1065 switch (c2) { 1066 case '>': return NOTEQUAL; 1067 case '=': return LESSEQUAL; 1068 case '<': return LEFTSHIFT; 1069 } 1070 break; 1071 case '>': 1072 switch (c2) { 1073 case '=': return GREATEREQUAL; 1074 case '>': return RIGHTSHIFT; 1075 } 1076 break; 1077 case '+': 1078 switch (c2) { 1079 case '=': return PLUSEQUAL; 1080 } 1081 break; 1082 case '-': 1083 switch (c2) { 1084 case '=': return MINEQUAL; 1085 } 1086 break; 1087 case '*': 1088 switch (c2) { 1089 case '*': return DOUBLESTAR; 1090 case '=': return STAREQUAL; 1091 } 1092 break; 1093 case '/': 1094 switch (c2) { 1095 case '/': return DOUBLESLASH; 1096 case '=': return SLASHEQUAL; 1097 } 1098 break; 1099 case '|': 1100 switch (c2) { 1101 case '=': return VBAREQUAL; 1102 } 1103 break; 1104 case '%': 1105 switch (c2) { 1106 case '=': return PERCENTEQUAL; 1107 } 1108 break; 1109 case '&': 1110 switch (c2) { 1111 case '=': return AMPEREQUAL; 1112 } 1113 break; 1114 case '^': 1115 switch (c2) { 1116 case '=': return CIRCUMFLEXEQUAL; 1117 } 1118 break; 1119 } 1120 return OP; 1045 1121 } 1046 1122 … … 1048 1124 PyToken_ThreeChars(int c1, int c2, int c3) 1049 1125 { 1050 switch (c1) {1051 case '<':1052 switch (c2) {1053 case '<':1054 switch (c3) {1055 case '=':1056 return LEFTSHIFTEQUAL;1057 }1058 break;1059 }1060 break;1061 case '>':1062 switch (c2) {1063 case '>':1064 switch (c3) {1065 case '=':1066 return RIGHTSHIFTEQUAL;1067 }1068 break;1069 }1070 break;1071 case '*':1072 switch (c2) {1073 case '*':1074 switch (c3) {1075 case '=':1076 return DOUBLESTAREQUAL;1077 }1078 break;1079 }1080 break;1081 case '/':1082 switch (c2) {1083 case '/':1084 switch (c3) {1085 case '=':1086 return DOUBLESLASHEQUAL;1087 }1088 break;1089 }1090 break;1091 }1092 return OP;1126 switch (c1) { 1127 case '<': 1128 switch (c2) { 1129 case '<': 1130 switch (c3) { 1131 case '=': 1132 return LEFTSHIFTEQUAL; 1133 } 1134 break; 1135 } 1136 break; 1137 case '>': 1138 switch (c2) { 1139 case '>': 1140 switch (c3) { 1141 case '=': 1142 return RIGHTSHIFTEQUAL; 1143 } 1144 break; 1145 } 1146 break; 1147 case '*': 1148 switch (c2) { 1149 case '*': 1150 switch (c3) { 1151 case '=': 1152 return DOUBLESTAREQUAL; 1153 } 1154 break; 1155 } 1156 break; 1157 case '/': 1158 switch (c2) { 1159 case '/': 1160 switch (c3) { 1161 case '=': 1162 return DOUBLESLASHEQUAL; 1163 } 1164 break; 1165 } 1166 break; 1167 } 1168 return OP; 1093 1169 } 1094 1170 … … 1096 1172 indenterror(struct tok_state *tok) 1097 1173 { 1098 if (tok->alterror) { 1099 tok->done = E_TABSPACE; 1100 tok->cur = tok->inp; 1101 return 1; 1102 } 1103 if (tok->altwarning) { 1104 PySys_WriteStderr("%s: inconsistent use of tabs and spaces " 1105 "in indentation\n", tok->filename); 1106 tok->altwarning = 0; 1107 } 1108 return 0; 1109 } 1110 1174 if (tok->alterror) { 1175 tok->done = E_TABSPACE; 1176 tok->cur = tok->inp; 1177 return 1; 1178 } 1179 if (tok->altwarning) { 1180 PySys_WriteStderr("%s: inconsistent use of tabs and spaces " 1181 "in indentation\n", tok->filename); 1182 tok->altwarning = 0; 1183 } 1184 return 0; 1185 } 1111 1186 1112 1187 /* Get next token, after space stripping etc. */ … … 1115 1190 tok_get(register struct tok_state *tok, char **p_start, char **p_end) 1116 1191 { 1117 register int c;1118 int blankline;1119 1120 *p_start = *p_end = NULL;1192 register int c; 1193 int blankline; 1194 1195 *p_start = *p_end = NULL; 1121 1196 nextline: 1122 tok->start = NULL;1123 blankline = 0;1124 1125 /* Get indentation level */1126 if (tok->atbol) {1127 register int col = 0;1128 register int altcol = 0;1129 tok->atbol = 0;1130 for (;;) {1131 c = tok_nextc(tok);1132 if (c == ' ')1133 col++, altcol++;1134 else if (c == '\t') {1135 col = (col/tok->tabsize + 1) * tok->tabsize;1136 altcol = (altcol/tok->alttabsize + 1)1137 * tok->alttabsize;1138 }1139 else if (c == '\014') /* Control-L (formfeed) */1140 col = altcol = 0; /* For Emacs users */1141 else1142 break;1143 }1144 tok_backup(tok, c);1145 if (c == '#' || c == '\n') {1146 /* Lines with only whitespace and/or comments1147 shouldn't affect the indentation and are1148 not passed to the parser as NEWLINE tokens,1149 except *totally* empty lines in interactive1150 mode, which signal the end of a command group. */1151 if (col == 0 && c == '\n' && tok->prompt != NULL)1152 blankline = 0; /* Let it through */1153 else1154 blankline = 1; /* Ignore completely */1155 /* We can't jump back right here since we still1156 may need to skip to the end of a comment */1157 }1158 if (!blankline && tok->level == 0) {1159 if (col == tok->indstack[tok->indent]) {1160 /* No change */1161 if (altcol != tok->altindstack[tok->indent]) {1162 if (indenterror(tok))1163 return ERRORTOKEN;1164 }1165 }1166 else if (col > tok->indstack[tok->indent]) {1167 /* Indent -- always one */1168 if (tok->indent+1 >= MAXINDENT) {1169 tok->done = E_TOODEEP;1170 tok->cur = tok->inp;1171 return ERRORTOKEN;1172 }1173 if (altcol <= tok->altindstack[tok->indent]) {1174 if (indenterror(tok))1175 return ERRORTOKEN;1176 }1177 tok->pendin++;1178 tok->indstack[++tok->indent] = col;1179 tok->altindstack[tok->indent] = altcol;1180 }1181 else /* col < tok->indstack[tok->indent] */ {1182 /* Dedent -- any number, must be consistent */1183 while (tok->indent > 0 &&1184 col < tok->indstack[tok->indent]) {1185 tok->pendin--;1186 tok->indent--;1187 }1188 if (col != tok->indstack[tok->indent]) {1189 tok->done = E_DEDENT;1190 tok->cur = tok->inp;1191 return ERRORTOKEN;1192 }1193 if (altcol != tok->altindstack[tok->indent]) {1194 if (indenterror(tok))1195 return ERRORTOKEN;1196 }1197 }1198 }1199 }1200 1201 tok->start = tok->cur;1202 1203 /* Return pending indents/dedents */1204 if (tok->pendin != 0) {1205 if (tok->pendin < 0) {1206 tok->pendin++;1207 return DEDENT;1208 }1209 else {1210 tok->pendin--;1211 return INDENT;1212 }1213 }1197 tok->start = NULL; 1198 blankline = 0; 1199 1200 /* Get indentation level */ 1201 if (tok->atbol) { 1202 register int col = 0; 1203 register int altcol = 0; 1204 tok->atbol = 0; 1205 for (;;) { 1206 c = tok_nextc(tok); 1207 if (c == ' ') 1208 col++, altcol++; 1209 else if (c == '\t') { 1210 col = (col/tok->tabsize + 1) * tok->tabsize; 1211 altcol = (altcol/tok->alttabsize + 1) 1212 * tok->alttabsize; 1213 } 1214 else if (c == '\014') /* Control-L (formfeed) */ 1215 col = altcol = 0; /* For Emacs users */ 1216 else 1217 break; 1218 } 1219 tok_backup(tok, c); 1220 if (c == '#' || c == '\n') { 1221 /* Lines with only whitespace and/or comments 1222 shouldn't affect the indentation and are 1223 not passed to the parser as NEWLINE tokens, 1224 except *totally* empty lines in interactive 1225 mode, which signal the end of a command group. */ 1226 if (col == 0 && c == '\n' && tok->prompt != NULL) 1227 blankline = 0; /* Let it through */ 1228 else 1229 blankline = 1; /* Ignore completely */ 1230 /* We can't jump back right here since we still 1231 may need to skip to the end of a comment */ 1232 } 1233 if (!blankline && tok->level == 0) { 1234 if (col == tok->indstack[tok->indent]) { 1235 /* No change */ 1236 if (altcol != tok->altindstack[tok->indent]) { 1237 if (indenterror(tok)) 1238 return ERRORTOKEN; 1239 } 1240 } 1241 else if (col > tok->indstack[tok->indent]) { 1242 /* Indent -- always one */ 1243 if (tok->indent+1 >= MAXINDENT) { 1244 tok->done = E_TOODEEP; 1245 tok->cur = tok->inp; 1246 return ERRORTOKEN; 1247 } 1248 if (altcol <= tok->altindstack[tok->indent]) { 1249 if (indenterror(tok)) 1250 return ERRORTOKEN; 1251 } 1252 tok->pendin++; 1253 tok->indstack[++tok->indent] = col; 1254 tok->altindstack[tok->indent] = altcol; 1255 } 1256 else /* col < tok->indstack[tok->indent] */ { 1257 /* Dedent -- any number, must be consistent */ 1258 while (tok->indent > 0 && 1259 col < tok->indstack[tok->indent]) { 1260 tok->pendin--; 1261 tok->indent--; 1262 } 1263 if (col != tok->indstack[tok->indent]) { 1264 tok->done = E_DEDENT; 1265 tok->cur = tok->inp; 1266 return ERRORTOKEN; 1267 } 1268 if (altcol != tok->altindstack[tok->indent]) { 1269 if (indenterror(tok)) 1270 return ERRORTOKEN; 1271 } 1272 } 1273 } 1274 } 1275 1276 tok->start = tok->cur; 1277 1278 /* Return pending indents/dedents */ 1279 if (tok->pendin != 0) { 1280 if (tok->pendin < 0) { 1281 tok->pendin++; 1282 return DEDENT; 1283 } 1284 else { 1285 tok->pendin--; 1286 return INDENT; 1287 } 1288 } 1214 1289 1215 1290 again: 1216 tok->start = NULL;1217 /* Skip spaces */1218 do {1219 c = tok_nextc(tok);1220 } while (c == ' ' || c == '\t' || c == '\014');1221 1222 /* Set start of current token */1223 tok->start = tok->cur - 1;1224 1225 /* Skip comment, while looking for tab-setting magic */1226 if (c == '#') {1227 static char *tabforms[] = {1228 "tab-width:",/* Emacs */1229 ":tabstop=",/* vim, full form */1230 ":ts=",/* vim, abbreviated form */1231 "set tabsize=",/* will vi never die? */1232 /* more templates can be added here to support other editors */1233 };1234 char cbuf[80];1235 char *tp, **cp;1236 tp = cbuf;1237 do {1238 *tp++ = c = tok_nextc(tok);1239 } while (c != EOF && c != '\n' &&1240 (size_t)(tp - cbuf + 1) < sizeof(cbuf));1241 *tp = '\0';1242 for (cp = tabforms;1243 cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]);1244 cp++) {1245 if ((tp = strstr(cbuf, *cp))) {1246 int newsize = atoi(tp + strlen(*cp));1247 1248 if (newsize >= 1 && newsize <= 40) {1249 tok->tabsize = newsize;1250 if (Py_VerboseFlag)1251 PySys_WriteStderr(1252 "Tab size set to %d\n",1253 newsize);1254 }1255 }1256 }1257 while (c != EOF && c != '\n')1258 c = tok_nextc(tok);1259 }1260 1261 /* Check for EOF and errors now */1262 if (c == EOF) {1263 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN;1264 }1265 1266 /* Identifier (most frequent token!) */1267 if (isalpha(c) || c == '_') {1268 /* Process r"", u"" and ur"" */1269 switch (c) {1270 case 'b':1271 case 'B':1272 c = tok_nextc(tok);1273 if (c == 'r' || c == 'R')1274 c = tok_nextc(tok);1275 if (c == '"' || c == '\'')1276 goto letter_quote;1277 break;1278 case 'r':1279 case 'R':1280 c = tok_nextc(tok);1281 if (c == '"' || c == '\'')1282 goto letter_quote;1283 break;1284 case 'u':1285 case 'U':1286 c = tok_nextc(tok);1287 if (c == 'r' || c == 'R')1288 c = tok_nextc(tok);1289 if (c == '"' || c == '\'')1290 goto letter_quote;1291 break;1292 }1293 while (isalnum(c) || c == '_') {1294 c = tok_nextc(tok);1291 tok->start = NULL; 1292 /* Skip spaces */ 1293 do { 1294 c = tok_nextc(tok); 1295 } while (c == ' ' || c == '\t' || c == '\014'); 1296 1297 /* Set start of current token */ 1298 tok->start = tok->cur - 1; 1299 1300 /* Skip comment, while looking for tab-setting magic */ 1301 if (c == '#') { 1302 static char *tabforms[] = { 1303 "tab-width:", /* Emacs */ 1304 ":tabstop=", /* vim, full form */ 1305 ":ts=", /* vim, abbreviated form */ 1306 "set tabsize=", /* will vi never die? */ 1307 /* more templates can be added here to support other editors */ 1308 }; 1309 char cbuf[80]; 1310 char *tp, **cp; 1311 tp = cbuf; 1312 do { 1313 *tp++ = c = tok_nextc(tok); 1314 } while (c != EOF && c != '\n' && 1315 (size_t)(tp - cbuf + 1) < sizeof(cbuf)); 1316 *tp = '\0'; 1317 for (cp = tabforms; 1318 cp < tabforms + sizeof(tabforms)/sizeof(tabforms[0]); 1319 cp++) { 1320 if ((tp = strstr(cbuf, *cp))) { 1321 int newsize = atoi(tp + strlen(*cp)); 1322 1323 if (newsize >= 1 && newsize <= 40) { 1324 tok->tabsize = newsize; 1325 if (Py_VerboseFlag) 1326 PySys_WriteStderr( 1327 "Tab size set to %d\n", 1328 newsize); 1329 } 1330 } 1331 } 1332 while (c != EOF && c != '\n') 1333 c = tok_nextc(tok); 1334 } 1335 1336 /* Check for EOF and errors now */ 1337 if (c == EOF) { 1338 return tok->done == E_EOF ? ENDMARKER : ERRORTOKEN; 1339 } 1340 1341 /* Identifier (most frequent token!) */ 1342 if (Py_ISALPHA(c) || c == '_') { 1343 /* Process r"", u"" and ur"" */ 1344 switch (c) { 1345 case 'b': 1346 case 'B': 1347 c = tok_nextc(tok); 1348 if (c == 'r' || c == 'R') 1349 c = tok_nextc(tok); 1350 if (c == '"' || c == '\'') 1351 goto letter_quote; 1352 break; 1353 case 'r': 1354 case 'R': 1355 c = tok_nextc(tok); 1356 if (c == '"' || c == '\'') 1357 goto letter_quote; 1358 break; 1359 case 'u': 1360 case 'U': 1361 c = tok_nextc(tok); 1362 if (c == 'r' || c == 'R') 1363 c = tok_nextc(tok); 1364 if (c == '"' || c == '\'') 1365 goto letter_quote; 1366 break; 1367 } 1368 while (c != EOF && (Py_ISALNUM(c) || c == '_')) { 1369 c = tok_nextc(tok); 1295 1370 #ifdef __KLIBC__ 1296 if (c == EOF)1297 break;1371 if (c == EOF) 1372 break; 1298 1373 #endif 1299 }1300 tok_backup(tok, c);1301 *p_start = tok->start;1302 *p_end = tok->cur;1303 return NAME;1304 }1305 1306 /* Newline */1307 if (c == '\n') {1308 tok->atbol = 1;1309 if (blankline || tok->level > 0)1310 goto nextline;1311 *p_start = tok->start;1312 *p_end = tok->cur - 1; /* Leave '\n' out of the string */1313 tok->cont_line = 0;1314 return NEWLINE;1315 }1316 1317 /* Period or number starting with period? */1318 if (c == '.') {1319 c = tok_nextc(tok);1320 if (isdigit(c)) {1321 goto fraction;1322 }1323 else {1324 tok_backup(tok, c);1325 *p_start = tok->start;1326 *p_end = tok->cur;1327 return DOT;1328 }1329 }1330 1331 /* Number */1332 if (isdigit(c)) {1333 if (c == '0') {1334 /* Hex, octal or binary -- maybe. */1335 c = tok_nextc(tok);1336 if (c == '.')1337 goto fraction;1374 } 1375 tok_backup(tok, c); 1376 *p_start = tok->start; 1377 *p_end = tok->cur; 1378 return NAME; 1379 } 1380 1381 /* Newline */ 1382 if (c == '\n') { 1383 tok->atbol = 1; 1384 if (blankline || tok->level > 0) 1385 goto nextline; 1386 *p_start = tok->start; 1387 *p_end = tok->cur - 1; /* Leave '\n' out of the string */ 1388 tok->cont_line = 0; 1389 return NEWLINE; 1390 } 1391 1392 /* Period or number starting with period? */ 1393 if (c == '.') { 1394 c = tok_nextc(tok); 1395 if (isdigit(c)) { 1396 goto fraction; 1397 } 1398 else { 1399 tok_backup(tok, c); 1400 *p_start = tok->start; 1401 *p_end = tok->cur; 1402 return DOT; 1403 } 1404 } 1405 1406 /* Number */ 1407 if (isdigit(c)) { 1408 if (c == '0') { 1409 /* Hex, octal or binary -- maybe. */ 1410 c = tok_nextc(tok); 1411 if (c == '.') 1412 goto fraction; 1338 1413 #ifndef WITHOUT_COMPLEX 1339 if (c == 'j' || c == 'J')1340 goto imaginary;1414 if (c == 'j' || c == 'J') 1415 goto imaginary; 1341 1416 #endif 1342 if (c == 'x' || c == 'X') {1343 1344 /* Hex */1345 c = tok_nextc(tok);1346 if (!isxdigit(c)) {1347 tok->done = E_TOKEN;1348 tok_backup(tok, c);1349 return ERRORTOKEN;1350 }1351 do {1352 c = tok_nextc(tok);1353 } while (isxdigit(c));1354 }1355 else if (c == 'o' || c == 'O') {1356 /* Octal */1357 c = tok_nextc(tok);1358 if (c < '0' || c >= '8') {1359 tok->done = E_TOKEN;1360 tok_backup(tok, c);1361 return ERRORTOKEN;1362 }1363 do {1364 c = tok_nextc(tok);1365 } while ('0' <= c && c < '8');1366 }1367 else if (c == 'b' || c == 'B') {1368 /* Binary */1369 c = tok_nextc(tok);1370 if (c != '0' && c != '1') {1371 tok->done = E_TOKEN;1372 tok_backup(tok, c);1373 return ERRORTOKEN;1374 }1375 do {1376 c = tok_nextc(tok);1377 } while (c == '0' || c == '1');1378 }1379 else {1380 int found_decimal = 0;1381 /* Octal; c is first char of it */1382 /* There's no 'isoctdigit' macro, sigh */1383 while ('0' <= c && c < '8') {1384 c = tok_nextc(tok);1385 }1386 if (isdigit(c)) {1387 found_decimal = 1;1388 do {1389 c = tok_nextc(tok);1390 } while (isdigit(c));1391 }1392 if (c == '.')1393 goto fraction;1394 else if (c == 'e' || c == 'E')1395 goto exponent;1417 if (c == 'x' || c == 'X') { 1418 1419 /* Hex */ 1420 c = tok_nextc(tok); 1421 if (!isxdigit(c)) { 1422 tok->done = E_TOKEN; 1423 tok_backup(tok, c); 1424 return ERRORTOKEN; 1425 } 1426 do { 1427 c = tok_nextc(tok); 1428 } while (isxdigit(c)); 1429 } 1430 else if (c == 'o' || c == 'O') { 1431 /* Octal */ 1432 c = tok_nextc(tok); 1433 if (c < '0' || c >= '8') { 1434 tok->done = E_TOKEN; 1435 tok_backup(tok, c); 1436 return ERRORTOKEN; 1437 } 1438 do { 1439 c = tok_nextc(tok); 1440 } while ('0' <= c && c < '8'); 1441 } 1442 else if (c == 'b' || c == 'B') { 1443 /* Binary */ 1444 c = tok_nextc(tok); 1445 if (c != '0' && c != '1') { 1446 tok->done = E_TOKEN; 1447 tok_backup(tok, c); 1448 return ERRORTOKEN; 1449 } 1450 do { 1451 c = tok_nextc(tok); 1452 } while (c == '0' || c == '1'); 1453 } 1454 else { 1455 int found_decimal = 0; 1456 /* Octal; c is first char of it */ 1457 /* There's no 'isoctdigit' macro, sigh */ 1458 while ('0' <= c && c < '8') { 1459 c = tok_nextc(tok); 1460 } 1461 if (isdigit(c)) { 1462 found_decimal = 1; 1463 do { 1464 c = tok_nextc(tok); 1465 } while (isdigit(c)); 1466 } 1467 if (c == '.') 1468 goto fraction; 1469 else if (c == 'e' || c == 'E') 1470 goto exponent; 1396 1471 #ifndef WITHOUT_COMPLEX 1397 else if (c == 'j' || c == 'J')1398 goto imaginary;1472 else if (c == 'j' || c == 'J') 1473 goto imaginary; 1399 1474 #endif 1400 else if (found_decimal) {1401 tok->done = E_TOKEN;1402 tok_backup(tok, c);1403 return ERRORTOKEN;1404 }1405 }1406 if (c == 'l' || c == 'L')1407 c = tok_nextc(tok);1408 }1409 else {1410 /* Decimal */1411 do {1412 c = tok_nextc(tok);1413 } while (isdigit(c));1414 if (c == 'l' || c == 'L')1415 c = tok_nextc(tok);1416 else {1417 /* Accept floating point numbers. */1418 if (c == '.') {1419 fraction:1420 /* Fraction */1421 do {1422 c = tok_nextc(tok);1423 } while (isdigit(c));1424 }1425 if (c == 'e' || c == 'E') {1426 exponent:1427 /* Exponent part */1428 c = tok_nextc(tok);1429 if (c == '+' || c == '-')1430 c = tok_nextc(tok);1431 if (!isdigit(c)) {1432 tok->done = E_TOKEN;1433 tok_backup(tok, c);1434 return ERRORTOKEN;1435 }1436 do {1437 c = tok_nextc(tok);1438 } while (isdigit(c));1439 }1475 else if (found_decimal) { 1476 tok->done = E_TOKEN; 1477 tok_backup(tok, c); 1478 return ERRORTOKEN; 1479 } 1480 } 1481 if (c == 'l' || c == 'L') 1482 c = tok_nextc(tok); 1483 } 1484 else { 1485 /* Decimal */ 1486 do { 1487 c = tok_nextc(tok); 1488 } while (isdigit(c)); 1489 if (c == 'l' || c == 'L') 1490 c = tok_nextc(tok); 1491 else { 1492 /* Accept floating point numbers. */ 1493 if (c == '.') { 1494 fraction: 1495 /* Fraction */ 1496 do { 1497 c = tok_nextc(tok); 1498 } while (isdigit(c)); 1499 } 1500 if (c == 'e' || c == 'E') { 1501 exponent: 1502 /* Exponent part */ 1503 c = tok_nextc(tok); 1504 if (c == '+' || c == '-') 1505 c = tok_nextc(tok); 1506 if (!isdigit(c)) { 1507 tok->done = E_TOKEN; 1508 tok_backup(tok, c); 1509 return ERRORTOKEN; 1510 } 1511 do { 1512 c = tok_nextc(tok); 1513 } while (isdigit(c)); 1514 } 1440 1515 #ifndef WITHOUT_COMPLEX 1441 if (c == 'j' || c == 'J')1442 /* Imaginary part */1443 imaginary:1444 c = tok_nextc(tok);1516 if (c == 'j' || c == 'J') 1517 /* Imaginary part */ 1518 imaginary: 1519 c = tok_nextc(tok); 1445 1520 #endif 1446 }1447 }1448 tok_backup(tok, c);1449 *p_start = tok->start;1450 *p_end = tok->cur;1451 return NUMBER;1452 }1521 } 1522 } 1523 tok_backup(tok, c); 1524 *p_start = tok->start; 1525 *p_end = tok->cur; 1526 return NUMBER; 1527 } 1453 1528 1454 1529 letter_quote: 1455 /* String */1456 if (c == '\'' || c == '"') {1457 Py_ssize_t quote2 = tok->cur - tok->start + 1;1458 int quote = c;1459 int triple = 0;1460 int tripcount = 0;1461 for (;;) {1462 c = tok_nextc(tok);1463 if (c == '\n') {1464 if (!triple) {1465 tok->done = E_EOLS;1466 tok_backup(tok, c);1467 return ERRORTOKEN;1468 }1469 tripcount = 0;1470 tok->cont_line = 1; /* multiline string. */1471 }1472 else if (c == EOF) {1473 if (triple)1474 tok->done = E_EOFS;1475 else1476 tok->done = E_EOLS;1477 tok->cur = tok->inp;1478 return ERRORTOKEN;1479 }1480 else if (c == quote) {1481 tripcount++;1482 if (tok->cur - tok->start == quote2) {1483 c = tok_nextc(tok);1484 if (c == quote) {1485 triple = 1;1486 tripcount = 0;1487 continue;1488 }1489 tok_backup(tok, c);1490 }1491 if (!triple || tripcount == 3)1492 break;1493 }1494 else if (c == '\\') {1495 tripcount = 0;1496 c = tok_nextc(tok);1497 if (c == EOF) {1498 tok->done = E_EOLS;1499 tok->cur = tok->inp;1500 return ERRORTOKEN;1501 }1502 }1503 else1504 tripcount = 0;1505 }1506 *p_start = tok->start;1507 *p_end = tok->cur;1508 return STRING;1509 }1510 1511 /* Line continuation */1512 if (c == '\\') {1513 c = tok_nextc(tok);1514 if (c != '\n') {1515 tok->done = E_LINECONT;1516 tok->cur = tok->inp;1517 return ERRORTOKEN;1518 }1519 tok->cont_line = 1;1520 goto again; /* Read next line */1521 }1522 1523 /* Check for two-character token */1524 {1525 int c2 = tok_nextc(tok);1526 int token = PyToken_TwoChars(c, c2);1530 /* String */ 1531 if (c == '\'' || c == '"') { 1532 Py_ssize_t quote2 = tok->cur - tok->start + 1; 1533 int quote = c; 1534 int triple = 0; 1535 int tripcount = 0; 1536 for (;;) { 1537 c = tok_nextc(tok); 1538 if (c == '\n') { 1539 if (!triple) { 1540 tok->done = E_EOLS; 1541 tok_backup(tok, c); 1542 return ERRORTOKEN; 1543 } 1544 tripcount = 0; 1545 tok->cont_line = 1; /* multiline string. */ 1546 } 1547 else if (c == EOF) { 1548 if (triple) 1549 tok->done = E_EOFS; 1550 else 1551 tok->done = E_EOLS; 1552 tok->cur = tok->inp; 1553 return ERRORTOKEN; 1554 } 1555 else if (c == quote) { 1556 tripcount++; 1557 if (tok->cur - tok->start == quote2) { 1558 c = tok_nextc(tok); 1559 if (c == quote) { 1560 triple = 1; 1561 tripcount = 0; 1562 continue; 1563 } 1564 tok_backup(tok, c); 1565 } 1566 if (!triple || tripcount == 3) 1567 break; 1568 } 1569 else if (c == '\\') { 1570 tripcount = 0; 1571 c = tok_nextc(tok); 1572 if (c == EOF) { 1573 tok->done = E_EOLS; 1574 tok->cur = tok->inp; 1575 return ERRORTOKEN; 1576 } 1577 } 1578 else 1579 tripcount = 0; 1580 } 1581 *p_start = tok->start; 1582 *p_end = tok->cur; 1583 return STRING; 1584 } 1585 1586 /* Line continuation */ 1587 if (c == '\\') { 1588 c = tok_nextc(tok); 1589 if (c != '\n') { 1590 tok->done = E_LINECONT; 1591 tok->cur = tok->inp; 1592 return ERRORTOKEN; 1593 } 1594 tok->cont_line = 1; 1595 goto again; /* Read next line */ 1596 } 1597 1598 /* Check for two-character token */ 1599 { 1600 int c2 = tok_nextc(tok); 1601 int token = PyToken_TwoChars(c, c2); 1527 1602 #ifndef PGEN 1528 if (Py_Py3kWarningFlag && token == NOTEQUAL && c == '<') {1529 if (PyErr_WarnExplicit(PyExc_DeprecationWarning,1530 "<> not supported in 3.x; use !=",1531 tok->filename, tok->lineno,1532 NULL, NULL)) {1533 return ERRORTOKEN;1534 }1535 }1603 if (Py_Py3kWarningFlag && token == NOTEQUAL && c == '<') { 1604 if (PyErr_WarnExplicit(PyExc_DeprecationWarning, 1605 "<> not supported in 3.x; use !=", 1606 tok->filename, tok->lineno, 1607 NULL, NULL)) { 1608 return ERRORTOKEN; 1609 } 1610 } 1536 1611 #endif 1537 if (token != OP) {1538 int c3 = tok_nextc(tok);1539 int token3 = PyToken_ThreeChars(c, c2, c3);1540 if (token3 != OP) {1541 token = token3;1542 } else {1543 tok_backup(tok, c3);1544 }1545 *p_start = tok->start;1546 *p_end = tok->cur;1547 return token;1548 }1549 tok_backup(tok, c2);1550 }1551 1552 /* Keep track of parentheses nesting level */1553 switch (c) {1554 case '(':1555 case '[':1556 case '{':1557 tok->level++;1558 break;1559 case ')':1560 case ']':1561 case '}':1562 tok->level--;1563 break;1564 }1565 1566 /* Punctuation character */1567 *p_start = tok->start;1568 *p_end = tok->cur;1569 return PyToken_OneChar(c);1612 if (token != OP) { 1613 int c3 = tok_nextc(tok); 1614 int token3 = PyToken_ThreeChars(c, c2, c3); 1615 if (token3 != OP) { 1616 token = token3; 1617 } else { 1618 tok_backup(tok, c3); 1619 } 1620 *p_start = tok->start; 1621 *p_end = tok->cur; 1622 return token; 1623 } 1624 tok_backup(tok, c2); 1625 } 1626 1627 /* Keep track of parentheses nesting level */ 1628 switch (c) { 1629 case '(': 1630 case '[': 1631 case '{': 1632 tok->level++; 1633 break; 1634 case ')': 1635 case ']': 1636 case '}': 1637 tok->level--; 1638 break; 1639 } 1640 1641 /* Punctuation character */ 1642 *p_start = tok->start; 1643 *p_end = tok->cur; 1644 return PyToken_OneChar(c); 1570 1645 } 1571 1646 … … 1573 1648 PyTokenizer_Get(struct tok_state *tok, char **p_start, char **p_end) 1574 1649 { 1575 int result = tok_get(tok, p_start, p_end);1576 if (tok->decoding_erred) {1577 result = ERRORTOKEN;1578 tok->done = E_DECODE;1579 }1580 return result;1650 int result = tok_get(tok, p_start, p_end); 1651 if (tok->decoding_erred) { 1652 result = ERRORTOKEN; 1653 tok->done = E_DECODE; 1654 } 1655 return result; 1581 1656 } 1582 1657 … … 1589 1664 PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int* offset) 1590 1665 { 1591 return NULL;1666 return NULL; 1592 1667 } 1593 1668 #else … … 1595 1670 static PyObject * 1596 1671 dec_utf8(const char *enc, const char *text, size_t len) { 1597 PyObject *ret = NULL; 1598 PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace");1599 if (unicode_text) {1600 ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace");1601 Py_DECREF(unicode_text);1602 }1603 if (!ret) {1604 PyErr_Clear();1605 }1606 return ret;1672 PyObject *ret = NULL; 1673 PyObject *unicode_text = PyUnicode_DecodeUTF8(text, len, "replace"); 1674 if (unicode_text) { 1675 ret = PyUnicode_AsEncodedString(unicode_text, enc, "replace"); 1676 Py_DECREF(unicode_text); 1677 } 1678 if (!ret) { 1679 PyErr_Clear(); 1680 } 1681 return ret; 1607 1682 } 1608 1683 char * 1609 1684 PyTokenizer_RestoreEncoding(struct tok_state* tok, int len, int *offset) 1610 1685 { 1611 char *text = NULL;1612 if (tok->encoding) {1613 /* convert source to original encondig */1614 PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len);1615 if (lineobj != NULL) {1616 int linelen = PyString_Size(lineobj);1617 const char *line = PyString_AsString(lineobj);1618 text = PyObject_MALLOC(linelen + 1);1619 if (text != NULL && line != NULL) {1620 if (linelen)1621 strncpy(text, line, linelen);1622 text[linelen] = '\0';1623 }1624 Py_DECREF(lineobj);1625 1626 /* adjust error offset */1627 if (*offset > 1) {1628 PyObject *offsetobj = dec_utf8(tok->encoding, 1629 tok->buf, *offset-1);1630 if (offsetobj) {1631 *offset = PyString_Size(offsetobj) + 1;1632 Py_DECREF(offsetobj);1633 }1634 }1635 1636 }1637 }1638 return text;1686 char *text = NULL; 1687 if (tok->encoding) { 1688 /* convert source to original encondig */ 1689 PyObject *lineobj = dec_utf8(tok->encoding, tok->buf, len); 1690 if (lineobj != NULL) { 1691 int linelen = PyString_Size(lineobj); 1692 const char *line = PyString_AsString(lineobj); 1693 text = PyObject_MALLOC(linelen + 1); 1694 if (text != NULL && line != NULL) { 1695 if (linelen) 1696 strncpy(text, line, linelen); 1697 text[linelen] = '\0'; 1698 } 1699 Py_DECREF(lineobj); 1700 1701 /* adjust error offset */ 1702 if (*offset > 1) { 1703 PyObject *offsetobj = dec_utf8(tok->encoding, 1704 tok->buf, *offset-1); 1705 if (offsetobj) { 1706 *offset = PyString_Size(offsetobj) + 1; 1707 Py_DECREF(offsetobj); 1708 } 1709 } 1710 1711 } 1712 } 1713 return text; 1639 1714 1640 1715 } … … 1648 1723 tok_dump(int type, char *start, char *end) 1649 1724 { 1650 printf("%s", _PyParser_TokenNames[type]);1651 if (type == NAME || type == NUMBER || type == STRING || type == OP)1652 printf("(%.*s)", (int)(end - start), start);1725 printf("%s", _PyParser_TokenNames[type]); 1726 if (type == NAME || type == NUMBER || type == STRING || type == OP) 1727 printf("(%.*s)", (int)(end - start), start); 1653 1728 } 1654 1729 -
python/trunk/Parser/tokenizer.h
r2 r391 9 9 /* Tokenizer interface */ 10 10 11 #include "token.h" /* For token types */11 #include "token.h" /* For token types */ 12 12 13 #define MAXINDENT 100 /* Max indentation level */13 #define MAXINDENT 100 /* Max indentation level */ 14 14 15 15 /* Tokenizer state */ 16 16 struct tok_state { 17 /* Input state; buf <= cur <= inp <= end */18 /* NB an entire line is held in the buffer */19 char *buf;/* Input buffer, or NULL; malloc'ed if fp != NULL */20 char *cur;/* Next character in buffer */21 char *inp;/* End of data in buffer */22 char *end;/* End of input buffer if buf != NULL */23 char *start;/* Start of current token if not NULL */24 int done;/* E_OK normally, E_EOF at EOF, otherwise error code */25 /* NB If done != E_OK, cur must be == inp!!! */26 FILE *fp;/* Rest of input; NULL if tokenizing a string */27 int tabsize;/* Tab spacing */28 int indent;/* Current indentation index */29 int indstack[MAXINDENT];/* Stack of indents */30 int atbol;/* Nonzero if at begin of new line */31 int pendin;/* Pending indents (if > 0) or dedents (if < 0) */32 char *prompt, *nextprompt;/* For interactive prompting */33 int lineno;/* Current line number */34 int level;/* () [] {} Parentheses nesting level */35 /* Used to allow free continuations inside them */36 /* Stuff for checking on different tab sizes */37 const char *filename;/* For error messages */38 int altwarning;/* Issue warning if alternate tabs don't match */39 int alterror;/* Issue error if alternate tabs don't match */40 int alttabsize;/* Alternate tab spacing */41 int altindstack[MAXINDENT];/* Stack of alternate indents */42 /* Stuff for PEP 0263 */43 int decoding_state;/* -1:decoding, 0:init, 1:raw */44 int decoding_erred;/* whether erred in decoding */45 int read_coding_spec;/* whether 'coding:...' has been read */46 char *encoding;47 int cont_line; /* whether we are in a continuation line. */48 const char* line_start;/* pointer to start of current line */17 /* Input state; buf <= cur <= inp <= end */ 18 /* NB an entire line is held in the buffer */ 19 char *buf; /* Input buffer, or NULL; malloc'ed if fp != NULL */ 20 char *cur; /* Next character in buffer */ 21 char *inp; /* End of data in buffer */ 22 char *end; /* End of input buffer if buf != NULL */ 23 char *start; /* Start of current token if not NULL */ 24 int done; /* E_OK normally, E_EOF at EOF, otherwise error code */ 25 /* NB If done != E_OK, cur must be == inp!!! */ 26 FILE *fp; /* Rest of input; NULL if tokenizing a string */ 27 int tabsize; /* Tab spacing */ 28 int indent; /* Current indentation index */ 29 int indstack[MAXINDENT]; /* Stack of indents */ 30 int atbol; /* Nonzero if at begin of new line */ 31 int pendin; /* Pending indents (if > 0) or dedents (if < 0) */ 32 char *prompt, *nextprompt; /* For interactive prompting */ 33 int lineno; /* Current line number */ 34 int level; /* () [] {} Parentheses nesting level */ 35 /* Used to allow free continuations inside them */ 36 /* Stuff for checking on different tab sizes */ 37 const char *filename; /* For error messages */ 38 int altwarning; /* Issue warning if alternate tabs don't match */ 39 int alterror; /* Issue error if alternate tabs don't match */ 40 int alttabsize; /* Alternate tab spacing */ 41 int altindstack[MAXINDENT]; /* Stack of alternate indents */ 42 /* Stuff for PEP 0263 */ 43 int decoding_state; /* -1:decoding, 0:init, 1:raw */ 44 int decoding_erred; /* whether erred in decoding */ 45 int read_coding_spec; /* whether 'coding:...' has been read */ 46 char *encoding; 47 int cont_line; /* whether we are in a continuation line. */ 48 const char* line_start; /* pointer to start of current line */ 49 49 #ifndef PGEN 50 PyObject *decoding_readline; /* codecs.open(...).readline */51 PyObject *decoding_buffer;50 PyObject *decoding_readline; /* codecs.open(...).readline */ 51 PyObject *decoding_buffer; 52 52 #endif 53 const char* enc; 54 const char* str; 53 const char* enc; 54 const char* str; 55 const char* input; /* Tokenizer's newline translated copy of the string. */ 55 56 }; 56 57 57 extern struct tok_state *PyTokenizer_FromString(const char * );58 extern struct tok_state *PyTokenizer_FromString(const char *, int); 58 59 extern struct tok_state *PyTokenizer_FromFile(FILE *, char *, char *); 59 60 extern void PyTokenizer_Free(struct tok_state *); 60 61 extern int PyTokenizer_Get(struct tok_state *, char **, char **); 61 62 #if defined(PGEN) || defined(Py_USING_UNICODE) 62 extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok, 63 int len, int *offset);63 extern char * PyTokenizer_RestoreEncoding(struct tok_state* tok, 64 int len, int *offset); 64 65 #endif 65 66
Note:
See TracChangeset
for help on using the changeset viewer.
