Changeset 388 for python/vendor/current/Parser/pgen.c
- Timestamp:
- Mar 19, 2014, 11:11:30 AM (11 years ago)
- File:
-
- 1 edited
Legend:
- Unmodified
- Added
- Removed
-
python/vendor/current/Parser/pgen.c
r2 r388 18 18 19 19 typedef struct _nfaarc { 20 intar_label;21 intar_arrow;20 int ar_label; 21 int ar_arrow; 22 22 } nfaarc; 23 23 24 24 typedef struct _nfastate { 25 intst_narcs;26 nfaarc*st_arc;25 int st_narcs; 26 nfaarc *st_arc; 27 27 } nfastate; 28 28 29 29 typedef struct _nfa { 30 intnf_type;31 char*nf_name;32 intnf_nstates;33 nfastate*nf_state;34 intnf_start, nf_finish;30 int nf_type; 31 char *nf_name; 32 int nf_nstates; 33 nfastate *nf_state; 34 int nf_start, nf_finish; 35 35 } nfa; 36 36 37 37 /* Forward */ 38 38 static void compile_rhs(labellist *ll, 39 39 nfa *nf, node *n, int *pa, int *pb); 40 40 static void compile_alt(labellist *ll, 41 41 nfa *nf, node *n, int *pa, int *pb); 42 42 static void compile_item(labellist *ll, 43 43 nfa *nf, node *n, int *pa, int *pb); 44 44 static void compile_atom(labellist *ll, 45 45 nfa *nf, node *n, int *pa, int *pb); 46 46 47 47 static int 48 48 addnfastate(nfa *nf) 49 49 { 50 51 52 nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state, 53 54 55 56 57 58 59 50 nfastate *st; 51 52 nf->nf_state = (nfastate *)PyObject_REALLOC(nf->nf_state, 53 sizeof(nfastate) * (nf->nf_nstates + 1)); 54 if (nf->nf_state == NULL) 55 Py_FatalError("out of mem"); 56 st = &nf->nf_state[nf->nf_nstates++]; 57 st->st_narcs = 0; 58 st->st_arc = NULL; 59 return st - nf->nf_state; 60 60 } 61 61 … … 63 63 addnfaarc(nfa *nf, int from, int to, int lbl) 64 64 { 65 66 67 68 69 70 71 72 73 74 75 65 nfastate *st; 66 nfaarc *ar; 67 68 st = &nf->nf_state[from]; 69 st->st_arc = (nfaarc *)PyObject_REALLOC(st->st_arc, 70 sizeof(nfaarc) * (st->st_narcs + 1)); 71 if (st->st_arc == NULL) 72 Py_FatalError("out of mem"); 73 ar = &st->st_arc[st->st_narcs++]; 74 ar->ar_label = lbl; 75 ar->ar_arrow = to; 76 76 } 77 77 … … 79 79 newnfa(char *name) 80 80 { 81 82 83 84 85 86 87 88 89 90 91 92 81 nfa *nf; 82 static int type = NT_OFFSET; /* All types will be disjunct */ 83 84 nf = (nfa *)PyObject_MALLOC(sizeof(nfa)); 85 if (nf == NULL) 86 Py_FatalError("no mem for new nfa"); 87 nf->nf_type = type++; 88 nf->nf_name = name; /* XXX strdup(name) ??? */ 89 nf->nf_nstates = 0; 90 nf->nf_state = NULL; 91 nf->nf_start = nf->nf_finish = -1; 92 return nf; 93 93 } 94 94 95 95 typedef struct _nfagrammar { 96 intgr_nnfas;97 nfa**gr_nfa;98 labellistgr_ll;96 int gr_nnfas; 97 nfa **gr_nfa; 98 labellist gr_ll; 99 99 } nfagrammar; 100 100 … … 105 105 newnfagrammar(void) 106 106 { 107 108 109 110 111 112 113 114 115 116 117 107 nfagrammar *gr; 108 109 gr = (nfagrammar *)PyObject_MALLOC(sizeof(nfagrammar)); 110 if (gr == NULL) 111 Py_FatalError("no mem for new nfa grammar"); 112 gr->gr_nnfas = 0; 113 gr->gr_nfa = NULL; 114 gr->gr_ll.ll_nlabels = 0; 115 gr->gr_ll.ll_label = NULL; 116 addlabel(&gr->gr_ll, ENDMARKER, "EMPTY"); 117 return gr; 118 118 } 119 119 … … 121 121 addnfa(nfagrammar *gr, char *name) 122 122 { 123 124 125 126 127 128 129 130 131 132 123 nfa *nf; 124 125 nf = newnfa(name); 126 gr->gr_nfa = (nfa **)PyObject_REALLOC(gr->gr_nfa, 127 sizeof(nfa*) * (gr->gr_nnfas + 1)); 128 if (gr->gr_nfa == NULL) 129 Py_FatalError("out of mem"); 130 gr->gr_nfa[gr->gr_nnfas++] = nf; 131 addlabel(&gr->gr_ll, NAME, nf->nf_name); 132 return nf; 133 133 } 134 134 … … 138 138 139 139 #define REQN(i, count) \ 140 141 142 143 140 if (i < count) { \ 141 fprintf(stderr, REQNFMT, count); \ 142 Py_FatalError("REQN"); \ 143 } else 144 144 145 145 #else 146 #define REQN(i, count) 146 #define REQN(i, count) /* empty */ 147 147 #endif 148 148 … … 150 150 metacompile(node *n) 151 151 { 152 153 154 155 156 157 158 159 160 161 162 163 164 165 152 nfagrammar *gr; 153 int i; 154 155 if (Py_DebugFlag) 156 printf("Compiling (meta-) parse tree into NFA grammar\n"); 157 gr = newnfagrammar(); 158 REQ(n, MSTART); 159 i = n->n_nchildren - 1; /* Last child is ENDMARKER */ 160 n = n->n_child; 161 for (; --i >= 0; n++) { 162 if (n->n_type != NEWLINE) 163 compile_rule(gr, n); 164 } 165 return gr; 166 166 } 167 167 … … 169 169 compile_rule(nfagrammar *gr, node *n) 170 170 { 171 172 173 174 175 176 177 178 179 180 181 182 183 184 171 nfa *nf; 172 173 REQ(n, RULE); 174 REQN(n->n_nchildren, 4); 175 n = n->n_child; 176 REQ(n, NAME); 177 nf = addnfa(gr, n->n_str); 178 n++; 179 REQ(n, COLON); 180 n++; 181 REQ(n, RHS); 182 compile_rhs(&gr->gr_ll, nf, n, &nf->nf_start, &nf->nf_finish); 183 n++; 184 REQ(n, NEWLINE); 185 185 } 186 186 … … 188 188 compile_rhs(labellist *ll, nfa *nf, node *n, int *pa, int *pb) 189 189 { 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 190 int i; 191 int a, b; 192 193 REQ(n, RHS); 194 i = n->n_nchildren; 195 REQN(i, 1); 196 n = n->n_child; 197 REQ(n, ALT); 198 compile_alt(ll, nf, n, pa, pb); 199 if (--i <= 0) 200 return; 201 n++; 202 a = *pa; 203 b = *pb; 204 *pa = addnfastate(nf); 205 *pb = addnfastate(nf); 206 addnfaarc(nf, *pa, a, EMPTY); 207 addnfaarc(nf, b, *pb, EMPTY); 208 for (; --i >= 0; n++) { 209 REQ(n, VBAR); 210 REQN(i, 1); 211 --i; 212 n++; 213 REQ(n, ALT); 214 compile_alt(ll, nf, n, &a, &b); 215 addnfaarc(nf, *pa, a, EMPTY); 216 addnfaarc(nf, b, *pb, EMPTY); 217 } 218 218 } 219 219 … … 221 221 compile_alt(labellist *ll, nfa *nf, node *n, int *pa, int *pb) 222 222 { 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 223 int i; 224 int a, b; 225 226 REQ(n, ALT); 227 i = n->n_nchildren; 228 REQN(i, 1); 229 n = n->n_child; 230 REQ(n, ITEM); 231 compile_item(ll, nf, n, pa, pb); 232 --i; 233 n++; 234 for (; --i >= 0; n++) { 235 REQ(n, ITEM); 236 compile_item(ll, nf, n, &a, &b); 237 addnfaarc(nf, *pb, a, EMPTY); 238 *pb = b; 239 } 240 240 } 241 241 … … 243 243 compile_item(labellist *ll, nfa *nf, node *n, int *pa, int *pb) 244 244 { 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 245 int i; 246 int a, b; 247 248 REQ(n, ITEM); 249 i = n->n_nchildren; 250 REQN(i, 1); 251 n = n->n_child; 252 if (n->n_type == LSQB) { 253 REQN(i, 3); 254 n++; 255 REQ(n, RHS); 256 *pa = addnfastate(nf); 257 *pb = addnfastate(nf); 258 addnfaarc(nf, *pa, *pb, EMPTY); 259 compile_rhs(ll, nf, n, &a, &b); 260 addnfaarc(nf, *pa, a, EMPTY); 261 addnfaarc(nf, b, *pb, EMPTY); 262 REQN(i, 1); 263 n++; 264 REQ(n, RSQB); 265 } 266 else { 267 compile_atom(ll, nf, n, pa, pb); 268 if (--i <= 0) 269 return; 270 n++; 271 addnfaarc(nf, *pb, *pa, EMPTY); 272 if (n->n_type == STAR) 273 *pb = *pa; 274 else 275 REQ(n, PLUS); 276 } 277 277 } 278 278 … … 280 280 compile_atom(labellist *ll, nfa *nf, node *n, int *pa, int *pb) 281 281 { 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 282 int i; 283 284 REQ(n, ATOM); 285 i = n->n_nchildren; 286 REQN(i, 1); 287 n = n->n_child; 288 if (n->n_type == LPAR) { 289 REQN(i, 3); 290 n++; 291 REQ(n, RHS); 292 compile_rhs(ll, nf, n, pa, pb); 293 n++; 294 REQ(n, RPAR); 295 } 296 else if (n->n_type == NAME || n->n_type == STRING) { 297 *pa = addnfastate(nf); 298 *pb = addnfastate(nf); 299 addnfaarc(nf, *pa, *pb, addlabel(ll, n->n_type, n->n_str)); 300 } 301 else 302 REQ(n, NAME); 303 303 } 304 304 … … 306 306 dumpstate(labellist *ll, nfa *nf, int istate) 307 307 { 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 308 nfastate *st; 309 int i; 310 nfaarc *ar; 311 312 printf("%c%2d%c", 313 istate == nf->nf_start ? '*' : ' ', 314 istate, 315 istate == nf->nf_finish ? '.' : ' '); 316 st = &nf->nf_state[istate]; 317 ar = st->st_arc; 318 for (i = 0; i < st->st_narcs; i++) { 319 if (i > 0) 320 printf("\n "); 321 printf("-> %2d %s", ar->ar_arrow, 322 PyGrammar_LabelRepr(&ll->ll_label[ar->ar_label])); 323 ar++; 324 } 325 printf("\n"); 326 326 } 327 327 … … 329 329 dumpnfa(labellist *ll, nfa *nf) 330 330 { 331 332 333 334 335 336 331 int i; 332 333 printf("NFA '%s' has %d states; start %d, finish %d\n", 334 nf->nf_name, nf->nf_nstates, nf->nf_start, nf->nf_finish); 335 for (i = 0; i < nf->nf_nstates; i++) 336 dumpstate(ll, nf, i); 337 337 } 338 338 … … 343 343 addclosure(bitset ss, nfa *nf, int istate) 344 344 { 345 346 347 348 349 350 351 352 353 354 355 345 if (addbit(ss, istate)) { 346 nfastate *st = &nf->nf_state[istate]; 347 nfaarc *ar = st->st_arc; 348 int i; 349 350 for (i = st->st_narcs; --i >= 0; ) { 351 if (ar->ar_label == EMPTY) 352 addclosure(ss, nf, ar->ar_arrow); 353 ar++; 354 } 355 } 356 356 } 357 357 358 358 typedef struct _ss_arc { 359 bitsetsa_bitset;360 intsa_arrow;361 intsa_label;359 bitset sa_bitset; 360 int sa_arrow; 361 int sa_label; 362 362 } ss_arc; 363 363 364 364 typedef struct _ss_state { 365 bitsetss_ss;366 intss_narcs;367 struct _ss_arc*ss_arc;368 intss_deleted;369 intss_finish;370 intss_rename;365 bitset ss_ss; 366 int ss_narcs; 367 struct _ss_arc *ss_arc; 368 int ss_deleted; 369 int ss_finish; 370 int ss_rename; 371 371 } ss_state; 372 372 373 373 typedef struct _ss_dfa { 374 intsd_nstates;375 374 int sd_nstates; 375 ss_state *sd_state; 376 376 } ss_dfa; 377 377 378 378 /* Forward */ 379 379 static void printssdfa(int xx_nstates, ss_state *xx_state, int nbits, 380 380 labellist *ll, char *msg); 381 381 static void simplify(int xx_nstates, ss_state *xx_state); 382 382 static void convert(dfa *d, int xx_nstates, ss_state *xx_state); … … 385 385 makedfa(nfagrammar *gr, nfa *nf, dfa *d) 386 386 { 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 found:;447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 xx_state = (ss_state *)PyObject_REALLOC(xx_state, 463 464 465 466 467 468 469 470 471 472 473 done:;474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 387 int nbits = nf->nf_nstates; 388 bitset ss; 389 int xx_nstates; 390 ss_state *xx_state, *yy; 391 ss_arc *zz; 392 int istate, jstate, iarc, jarc, ibit; 393 nfastate *st; 394 nfaarc *ar; 395 396 ss = newbitset(nbits); 397 addclosure(ss, nf, nf->nf_start); 398 xx_state = (ss_state *)PyObject_MALLOC(sizeof(ss_state)); 399 if (xx_state == NULL) 400 Py_FatalError("no mem for xx_state in makedfa"); 401 xx_nstates = 1; 402 yy = &xx_state[0]; 403 yy->ss_ss = ss; 404 yy->ss_narcs = 0; 405 yy->ss_arc = NULL; 406 yy->ss_deleted = 0; 407 yy->ss_finish = testbit(ss, nf->nf_finish); 408 if (yy->ss_finish) 409 printf("Error: nonterminal '%s' may produce empty.\n", 410 nf->nf_name); 411 412 /* This algorithm is from a book written before 413 the invention of structured programming... */ 414 415 /* For each unmarked state... */ 416 for (istate = 0; istate < xx_nstates; ++istate) { 417 size_t size; 418 yy = &xx_state[istate]; 419 ss = yy->ss_ss; 420 /* For all its states... */ 421 for (ibit = 0; ibit < nf->nf_nstates; ++ibit) { 422 if (!testbit(ss, ibit)) 423 continue; 424 st = &nf->nf_state[ibit]; 425 /* For all non-empty arcs from this state... */ 426 for (iarc = 0; iarc < st->st_narcs; iarc++) { 427 ar = &st->st_arc[iarc]; 428 if (ar->ar_label == EMPTY) 429 continue; 430 /* Look up in list of arcs from this state */ 431 for (jarc = 0; jarc < yy->ss_narcs; ++jarc) { 432 zz = &yy->ss_arc[jarc]; 433 if (ar->ar_label == zz->sa_label) 434 goto found; 435 } 436 /* Add new arc for this state */ 437 size = sizeof(ss_arc) * (yy->ss_narcs + 1); 438 yy->ss_arc = (ss_arc *)PyObject_REALLOC( 439 yy->ss_arc, size); 440 if (yy->ss_arc == NULL) 441 Py_FatalError("out of mem"); 442 zz = &yy->ss_arc[yy->ss_narcs++]; 443 zz->sa_label = ar->ar_label; 444 zz->sa_bitset = newbitset(nbits); 445 zz->sa_arrow = -1; 446 found: ; 447 /* Add destination */ 448 addclosure(zz->sa_bitset, nf, ar->ar_arrow); 449 } 450 } 451 /* Now look up all the arrow states */ 452 for (jarc = 0; jarc < xx_state[istate].ss_narcs; jarc++) { 453 zz = &xx_state[istate].ss_arc[jarc]; 454 for (jstate = 0; jstate < xx_nstates; jstate++) { 455 if (samebitset(zz->sa_bitset, 456 xx_state[jstate].ss_ss, nbits)) { 457 zz->sa_arrow = jstate; 458 goto done; 459 } 460 } 461 size = sizeof(ss_state) * (xx_nstates + 1); 462 xx_state = (ss_state *)PyObject_REALLOC(xx_state, 463 size); 464 if (xx_state == NULL) 465 Py_FatalError("out of mem"); 466 zz->sa_arrow = xx_nstates; 467 yy = &xx_state[xx_nstates++]; 468 yy->ss_ss = zz->sa_bitset; 469 yy->ss_narcs = 0; 470 yy->ss_arc = NULL; 471 yy->ss_deleted = 0; 472 yy->ss_finish = testbit(yy->ss_ss, nf->nf_finish); 473 done: ; 474 } 475 } 476 477 if (Py_DebugFlag) 478 printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll, 479 "before minimizing"); 480 481 simplify(xx_nstates, xx_state); 482 483 if (Py_DebugFlag) 484 printssdfa(xx_nstates, xx_state, nbits, &gr->gr_ll, 485 "after minimizing"); 486 487 convert(d, xx_nstates, xx_state); 488 489 /* XXX cleanup */ 490 PyObject_FREE(xx_state); 491 491 } 492 492 493 493 static void 494 494 printssdfa(int xx_nstates, ss_state *xx_state, int nbits, 495 496 { 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 495 labellist *ll, char *msg) 496 { 497 int i, ibit, iarc; 498 ss_state *yy; 499 ss_arc *zz; 500 501 printf("Subset DFA %s\n", msg); 502 for (i = 0; i < xx_nstates; i++) { 503 yy = &xx_state[i]; 504 if (yy->ss_deleted) 505 continue; 506 printf(" Subset %d", i); 507 if (yy->ss_finish) 508 printf(" (finish)"); 509 printf(" { "); 510 for (ibit = 0; ibit < nbits; ibit++) { 511 if (testbit(yy->ss_ss, ibit)) 512 printf("%d ", ibit); 513 } 514 printf("}\n"); 515 for (iarc = 0; iarc < yy->ss_narcs; iarc++) { 516 zz = &yy->ss_arc[iarc]; 517 printf(" Arc to state %d, label %s\n", 518 zz->sa_arrow, 519 PyGrammar_LabelRepr( 520 &ll->ll_label[zz->sa_label])); 521 } 522 } 523 523 } 524 524 … … 536 536 samestate(ss_state *s1, ss_state *s2) 537 537 { 538 539 540 541 542 543 544 545 546 547 538 int i; 539 540 if (s1->ss_narcs != s2->ss_narcs || s1->ss_finish != s2->ss_finish) 541 return 0; 542 for (i = 0; i < s1->ss_narcs; i++) { 543 if (s1->ss_arc[i].sa_arrow != s2->ss_arc[i].sa_arrow || 544 s1->ss_arc[i].sa_label != s2->ss_arc[i].sa_label) 545 return 0; 546 } 547 return 1; 548 548 } 549 549 … … 551 551 renamestates(int xx_nstates, ss_state *xx_state, int from, int to) 552 552 { 553 554 555 556 557 558 559 560 561 562 563 564 553 int i, j; 554 555 if (Py_DebugFlag) 556 printf("Rename state %d to %d.\n", from, to); 557 for (i = 0; i < xx_nstates; i++) { 558 if (xx_state[i].ss_deleted) 559 continue; 560 for (j = 0; j < xx_state[i].ss_narcs; j++) { 561 if (xx_state[i].ss_arc[j].sa_arrow == from) 562 xx_state[i].ss_arc[j].sa_arrow = to; 563 } 564 } 565 565 } 566 566 … … 568 568 simplify(int xx_nstates, ss_state *xx_state) 569 569 { 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 570 int changes; 571 int i, j; 572 573 do { 574 changes = 0; 575 for (i = 1; i < xx_nstates; i++) { 576 if (xx_state[i].ss_deleted) 577 continue; 578 for (j = 0; j < i; j++) { 579 if (xx_state[j].ss_deleted) 580 continue; 581 if (samestate(&xx_state[i], &xx_state[j])) { 582 xx_state[i].ss_deleted++; 583 renamestates(xx_nstates, xx_state, 584 i, j); 585 changes++; 586 break; 587 } 588 } 589 } 590 } while (changes); 591 591 } 592 592 … … 599 599 convert(dfa *d, int xx_nstates, ss_state *xx_state) 600 600 { 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 601 int i, j; 602 ss_state *yy; 603 ss_arc *zz; 604 605 for (i = 0; i < xx_nstates; i++) { 606 yy = &xx_state[i]; 607 if (yy->ss_deleted) 608 continue; 609 yy->ss_rename = addstate(d); 610 } 611 612 for (i = 0; i < xx_nstates; i++) { 613 yy = &xx_state[i]; 614 if (yy->ss_deleted) 615 continue; 616 for (j = 0; j < yy->ss_narcs; j++) { 617 zz = &yy->ss_arc[j]; 618 addarc(d, yy->ss_rename, 619 xx_state[zz->sa_arrow].ss_rename, 620 zz->sa_label); 621 } 622 if (yy->ss_finish) 623 addarc(d, yy->ss_rename, yy->ss_rename, 0); 624 } 625 626 d->d_initial = 0; 627 627 } 628 628 … … 633 633 maketables(nfagrammar *gr) 634 634 { 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 635 int i; 636 nfa *nf; 637 dfa *d; 638 grammar *g; 639 640 if (gr->gr_nnfas == 0) 641 return NULL; 642 g = newgrammar(gr->gr_nfa[0]->nf_type); 643 /* XXX first rule must be start rule */ 644 g->g_ll = gr->gr_ll; 645 646 for (i = 0; i < gr->gr_nnfas; i++) { 647 nf = gr->gr_nfa[i]; 648 if (Py_DebugFlag) { 649 printf("Dump of NFA for '%s' ...\n", nf->nf_name); 650 dumpnfa(&gr->gr_ll, nf); 651 printf("Making DFA for '%s' ...\n", nf->nf_name); 652 } 653 d = adddfa(g, nf->nf_type, nf->nf_name); 654 makedfa(gr, gr->gr_nfa[i], d); 655 } 656 657 return g; 658 658 } 659 659 … … 661 661 pgen(node *n) 662 662 { 663 664 665 666 667 668 669 670 671 663 nfagrammar *gr; 664 grammar *g; 665 666 gr = metacompile(n); 667 g = maketables(gr); 668 translatelabels(g); 669 addfirstsets(g); 670 PyObject_FREE(gr); 671 return g; 672 672 } 673 673 … … 703 703 704 704 [Aho&Ullman 77] 705 706 705 Aho&Ullman, Principles of Compiler Design, Addison-Wesley 1977 706 (first edition) 707 707 708 708 */
Note:
See TracChangeset
for help on using the changeset viewer.