[3031] | 1 | /* nfa - NFA construction routines */
|
---|
| 2 |
|
---|
| 3 | /* Copyright (c) 1990 The Regents of the University of California. */
|
---|
| 4 | /* All rights reserved. */
|
---|
| 5 |
|
---|
| 6 | /* This code is derived from software contributed to Berkeley by */
|
---|
| 7 | /* Vern Paxson. */
|
---|
| 8 |
|
---|
| 9 | /* The United States Government has rights in this work pursuant */
|
---|
| 10 | /* to contract no. DE-AC03-76SF00098 between the United States */
|
---|
| 11 | /* Department of Energy and the University of California. */
|
---|
| 12 |
|
---|
| 13 | /* This file is part of flex. */
|
---|
| 14 |
|
---|
| 15 | /* Redistribution and use in source and binary forms, with or without */
|
---|
| 16 | /* modification, are permitted provided that the following conditions */
|
---|
| 17 | /* are met: */
|
---|
| 18 |
|
---|
| 19 | /* 1. Redistributions of source code must retain the above copyright */
|
---|
| 20 | /* notice, this list of conditions and the following disclaimer. */
|
---|
| 21 | /* 2. Redistributions in binary form must reproduce the above copyright */
|
---|
| 22 | /* notice, this list of conditions and the following disclaimer in the */
|
---|
| 23 | /* documentation and/or other materials provided with the distribution. */
|
---|
| 24 |
|
---|
| 25 | /* Neither the name of the University nor the names of its contributors */
|
---|
| 26 | /* may be used to endorse or promote products derived from this software */
|
---|
| 27 | /* without specific prior written permission. */
|
---|
| 28 |
|
---|
| 29 | /* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR */
|
---|
| 30 | /* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED */
|
---|
| 31 | /* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR */
|
---|
| 32 | /* PURPOSE. */
|
---|
| 33 |
|
---|
| 34 | #include "flexdef.h"
|
---|
| 35 |
|
---|
| 36 |
|
---|
| 37 | /* declare functions that have forward references */
|
---|
| 38 |
|
---|
| 39 | int dupmachine PROTO ((int));
|
---|
| 40 | void mkxtion PROTO ((int, int));
|
---|
| 41 |
|
---|
| 42 |
|
---|
| 43 | /* add_accept - add an accepting state to a machine
|
---|
| 44 | *
|
---|
| 45 | * accepting_number becomes mach's accepting number.
|
---|
| 46 | */
|
---|
| 47 |
|
---|
| 48 | void add_accept (mach, accepting_number)
|
---|
| 49 | int mach, accepting_number;
|
---|
| 50 | {
|
---|
| 51 | /* Hang the accepting number off an epsilon state. if it is associated
|
---|
| 52 | * with a state that has a non-epsilon out-transition, then the state
|
---|
| 53 | * will accept BEFORE it makes that transition, i.e., one character
|
---|
| 54 | * too soon.
|
---|
| 55 | */
|
---|
| 56 |
|
---|
| 57 | if (transchar[finalst[mach]] == SYM_EPSILON)
|
---|
| 58 | accptnum[finalst[mach]] = accepting_number;
|
---|
| 59 |
|
---|
| 60 | else {
|
---|
| 61 | int astate = mkstate (SYM_EPSILON);
|
---|
| 62 |
|
---|
| 63 | accptnum[astate] = accepting_number;
|
---|
| 64 | (void) link_machines (mach, astate);
|
---|
| 65 | }
|
---|
| 66 | }
|
---|
| 67 |
|
---|
| 68 |
|
---|
| 69 | /* copysingl - make a given number of copies of a singleton machine
|
---|
| 70 | *
|
---|
| 71 | * synopsis
|
---|
| 72 | *
|
---|
| 73 | * newsng = copysingl( singl, num );
|
---|
| 74 | *
|
---|
| 75 | * newsng - a new singleton composed of num copies of singl
|
---|
| 76 | * singl - a singleton machine
|
---|
| 77 | * num - the number of copies of singl to be present in newsng
|
---|
| 78 | */
|
---|
| 79 |
|
---|
| 80 | int copysingl (singl, num)
|
---|
| 81 | int singl, num;
|
---|
| 82 | {
|
---|
| 83 | int copy, i;
|
---|
| 84 |
|
---|
| 85 | copy = mkstate (SYM_EPSILON);
|
---|
| 86 |
|
---|
| 87 | for (i = 1; i <= num; ++i)
|
---|
| 88 | copy = link_machines (copy, dupmachine (singl));
|
---|
| 89 |
|
---|
| 90 | return copy;
|
---|
| 91 | }
|
---|
| 92 |
|
---|
| 93 |
|
---|
| 94 | /* dumpnfa - debugging routine to write out an nfa */
|
---|
| 95 |
|
---|
| 96 | void dumpnfa (state1)
|
---|
| 97 | int state1;
|
---|
| 98 |
|
---|
| 99 | {
|
---|
| 100 | int sym, tsp1, tsp2, anum, ns;
|
---|
| 101 |
|
---|
| 102 | fprintf (stderr,
|
---|
| 103 | _
|
---|
| 104 | ("\n\n********** beginning dump of nfa with start state %d\n"),
|
---|
| 105 | state1);
|
---|
| 106 |
|
---|
| 107 | /* We probably should loop starting at firstst[state1] and going to
|
---|
| 108 | * lastst[state1], but they're not maintained properly when we "or"
|
---|
| 109 | * all of the rules together. So we use our knowledge that the machine
|
---|
| 110 | * starts at state 1 and ends at lastnfa.
|
---|
| 111 | */
|
---|
| 112 |
|
---|
| 113 | /* for ( ns = firstst[state1]; ns <= lastst[state1]; ++ns ) */
|
---|
| 114 | for (ns = 1; ns <= lastnfa; ++ns) {
|
---|
| 115 | fprintf (stderr, _("state # %4d\t"), ns);
|
---|
| 116 |
|
---|
| 117 | sym = transchar[ns];
|
---|
| 118 | tsp1 = trans1[ns];
|
---|
| 119 | tsp2 = trans2[ns];
|
---|
| 120 | anum = accptnum[ns];
|
---|
| 121 |
|
---|
| 122 | fprintf (stderr, "%3d: %4d, %4d", sym, tsp1, tsp2);
|
---|
| 123 |
|
---|
| 124 | if (anum != NIL)
|
---|
| 125 | fprintf (stderr, " [%d]", anum);
|
---|
| 126 |
|
---|
| 127 | fprintf (stderr, "\n");
|
---|
| 128 | }
|
---|
| 129 |
|
---|
| 130 | fprintf (stderr, _("********** end of dump\n"));
|
---|
| 131 | }
|
---|
| 132 |
|
---|
| 133 |
|
---|
| 134 | /* dupmachine - make a duplicate of a given machine
|
---|
| 135 | *
|
---|
| 136 | * synopsis
|
---|
| 137 | *
|
---|
| 138 | * copy = dupmachine( mach );
|
---|
| 139 | *
|
---|
| 140 | * copy - holds duplicate of mach
|
---|
| 141 | * mach - machine to be duplicated
|
---|
| 142 | *
|
---|
| 143 | * note that the copy of mach is NOT an exact duplicate; rather, all the
|
---|
| 144 | * transition states values are adjusted so that the copy is self-contained,
|
---|
| 145 | * as the original should have been.
|
---|
| 146 | *
|
---|
| 147 | * also note that the original MUST be contiguous, with its low and high
|
---|
| 148 | * states accessible by the arrays firstst and lastst
|
---|
| 149 | */
|
---|
| 150 |
|
---|
| 151 | int dupmachine (mach)
|
---|
| 152 | int mach;
|
---|
| 153 | {
|
---|
| 154 | int i, init, state_offset;
|
---|
| 155 | int state = 0;
|
---|
| 156 | int last = lastst[mach];
|
---|
| 157 |
|
---|
| 158 | for (i = firstst[mach]; i <= last; ++i) {
|
---|
| 159 | state = mkstate (transchar[i]);
|
---|
| 160 |
|
---|
| 161 | if (trans1[i] != NO_TRANSITION) {
|
---|
| 162 | mkxtion (finalst[state], trans1[i] + state - i);
|
---|
| 163 |
|
---|
| 164 | if (transchar[i] == SYM_EPSILON &&
|
---|
| 165 | trans2[i] != NO_TRANSITION)
|
---|
| 166 | mkxtion (finalst[state],
|
---|
| 167 | trans2[i] + state - i);
|
---|
| 168 | }
|
---|
| 169 |
|
---|
| 170 | accptnum[state] = accptnum[i];
|
---|
| 171 | }
|
---|
| 172 |
|
---|
| 173 | if (state == 0)
|
---|
| 174 | flexfatal (_("empty machine in dupmachine()"));
|
---|
| 175 |
|
---|
| 176 | state_offset = state - i + 1;
|
---|
| 177 |
|
---|
| 178 | init = mach + state_offset;
|
---|
| 179 | firstst[init] = firstst[mach] + state_offset;
|
---|
| 180 | finalst[init] = finalst[mach] + state_offset;
|
---|
| 181 | lastst[init] = lastst[mach] + state_offset;
|
---|
| 182 |
|
---|
| 183 | return init;
|
---|
| 184 | }
|
---|
| 185 |
|
---|
| 186 |
|
---|
| 187 | /* finish_rule - finish up the processing for a rule
|
---|
| 188 | *
|
---|
| 189 | * An accepting number is added to the given machine. If variable_trail_rule
|
---|
| 190 | * is true then the rule has trailing context and both the head and trail
|
---|
| 191 | * are variable size. Otherwise if headcnt or trailcnt is non-zero then
|
---|
| 192 | * the machine recognizes a pattern with trailing context and headcnt is
|
---|
| 193 | * the number of characters in the matched part of the pattern, or zero
|
---|
| 194 | * if the matched part has variable length. trailcnt is the number of
|
---|
| 195 | * trailing context characters in the pattern, or zero if the trailing
|
---|
| 196 | * context has variable length.
|
---|
| 197 | */
|
---|
| 198 |
|
---|
| 199 | void finish_rule (mach, variable_trail_rule, headcnt, trailcnt,
|
---|
| 200 | pcont_act)
|
---|
| 201 | int mach, variable_trail_rule, headcnt, trailcnt, pcont_act;
|
---|
| 202 | {
|
---|
| 203 | char action_text[MAXLINE];
|
---|
| 204 |
|
---|
| 205 | add_accept (mach, num_rules);
|
---|
| 206 |
|
---|
| 207 | /* We did this in new_rule(), but it often gets the wrong
|
---|
| 208 | * number because we do it before we start parsing the current rule.
|
---|
| 209 | */
|
---|
| 210 | rule_linenum[num_rules] = linenum;
|
---|
| 211 |
|
---|
| 212 | /* If this is a continued action, then the line-number has already
|
---|
| 213 | * been updated, giving us the wrong number.
|
---|
| 214 | */
|
---|
| 215 | if (continued_action)
|
---|
| 216 | --rule_linenum[num_rules];
|
---|
| 217 |
|
---|
| 218 |
|
---|
| 219 | /* If the previous rule was continued action, then we inherit the
|
---|
| 220 | * previous newline flag, possibly overriding the current one.
|
---|
| 221 | */
|
---|
| 222 | if (pcont_act && rule_has_nl[num_rules - 1])
|
---|
| 223 | rule_has_nl[num_rules] = true;
|
---|
| 224 |
|
---|
| 225 | sprintf (action_text, "case %d:\n", num_rules);
|
---|
| 226 | add_action (action_text);
|
---|
| 227 | if (rule_has_nl[num_rules]) {
|
---|
| 228 | sprintf (action_text, "/* rule %d can match eol */\n",
|
---|
| 229 | num_rules);
|
---|
| 230 | add_action (action_text);
|
---|
| 231 | }
|
---|
| 232 |
|
---|
| 233 |
|
---|
| 234 | if (variable_trail_rule) {
|
---|
| 235 | rule_type[num_rules] = RULE_VARIABLE;
|
---|
| 236 |
|
---|
| 237 | if (performance_report > 0)
|
---|
| 238 | fprintf (stderr,
|
---|
| 239 | _
|
---|
| 240 | ("Variable trailing context rule at line %d\n"),
|
---|
| 241 | rule_linenum[num_rules]);
|
---|
| 242 |
|
---|
| 243 | variable_trailing_context_rules = true;
|
---|
| 244 | }
|
---|
| 245 |
|
---|
| 246 | else {
|
---|
| 247 | rule_type[num_rules] = RULE_NORMAL;
|
---|
| 248 |
|
---|
| 249 | if (headcnt > 0 || trailcnt > 0) {
|
---|
| 250 | /* Do trailing context magic to not match the trailing
|
---|
| 251 | * characters.
|
---|
| 252 | */
|
---|
| 253 | char *scanner_cp = "YY_G(yy_c_buf_p) = yy_cp";
|
---|
| 254 | char *scanner_bp = "yy_bp";
|
---|
| 255 |
|
---|
| 256 | add_action
|
---|
| 257 | ("*yy_cp = YY_G(yy_hold_char); /* undo effects of setting up yytext */\n");
|
---|
| 258 |
|
---|
| 259 | if (headcnt > 0) {
|
---|
| 260 | sprintf (action_text, "%s = %s + %d;\n",
|
---|
| 261 | scanner_cp, scanner_bp, headcnt);
|
---|
| 262 | add_action (action_text);
|
---|
| 263 | }
|
---|
| 264 |
|
---|
| 265 | else {
|
---|
| 266 | sprintf (action_text, "%s -= %d;\n",
|
---|
| 267 | scanner_cp, trailcnt);
|
---|
| 268 | add_action (action_text);
|
---|
| 269 | }
|
---|
| 270 |
|
---|
| 271 | add_action
|
---|
| 272 | ("YY_DO_BEFORE_ACTION; /* set up yytext again */\n");
|
---|
| 273 | }
|
---|
| 274 | }
|
---|
| 275 |
|
---|
| 276 | /* Okay, in the action code at this point yytext and yyleng have
|
---|
| 277 | * their proper final values for this rule, so here's the point
|
---|
| 278 | * to do any user action. But don't do it for continued actions,
|
---|
| 279 | * as that'll result in multiple YY_RULE_SETUP's.
|
---|
| 280 | */
|
---|
| 281 | if (!continued_action)
|
---|
| 282 | add_action ("YY_RULE_SETUP\n");
|
---|
| 283 |
|
---|
| 284 | line_directive_out ((FILE *) 0, 1);
|
---|
| 285 | }
|
---|
| 286 |
|
---|
| 287 |
|
---|
| 288 | /* link_machines - connect two machines together
|
---|
| 289 | *
|
---|
| 290 | * synopsis
|
---|
| 291 | *
|
---|
| 292 | * new = link_machines( first, last );
|
---|
| 293 | *
|
---|
| 294 | * new - a machine constructed by connecting first to last
|
---|
| 295 | * first - the machine whose successor is to be last
|
---|
| 296 | * last - the machine whose predecessor is to be first
|
---|
| 297 | *
|
---|
| 298 | * note: this routine concatenates the machine first with the machine
|
---|
| 299 | * last to produce a machine new which will pattern-match first first
|
---|
| 300 | * and then last, and will fail if either of the sub-patterns fails.
|
---|
| 301 | * FIRST is set to new by the operation. last is unmolested.
|
---|
| 302 | */
|
---|
| 303 |
|
---|
| 304 | int link_machines (first, last)
|
---|
| 305 | int first, last;
|
---|
| 306 | {
|
---|
| 307 | if (first == NIL)
|
---|
| 308 | return last;
|
---|
| 309 |
|
---|
| 310 | else if (last == NIL)
|
---|
| 311 | return first;
|
---|
| 312 |
|
---|
| 313 | else {
|
---|
| 314 | mkxtion (finalst[first], last);
|
---|
| 315 | finalst[first] = finalst[last];
|
---|
| 316 | lastst[first] = MAX (lastst[first], lastst[last]);
|
---|
| 317 | firstst[first] = MIN (firstst[first], firstst[last]);
|
---|
| 318 |
|
---|
| 319 | return first;
|
---|
| 320 | }
|
---|
| 321 | }
|
---|
| 322 |
|
---|
| 323 |
|
---|
| 324 | /* mark_beginning_as_normal - mark each "beginning" state in a machine
|
---|
| 325 | * as being a "normal" (i.e., not trailing context-
|
---|
| 326 | * associated) states
|
---|
| 327 | *
|
---|
| 328 | * The "beginning" states are the epsilon closure of the first state
|
---|
| 329 | */
|
---|
| 330 |
|
---|
| 331 | void mark_beginning_as_normal (mach)
|
---|
| 332 | register int mach;
|
---|
| 333 | {
|
---|
| 334 | switch (state_type[mach]) {
|
---|
| 335 | case STATE_NORMAL:
|
---|
| 336 | /* Oh, we've already visited here. */
|
---|
| 337 | return;
|
---|
| 338 |
|
---|
| 339 | case STATE_TRAILING_CONTEXT:
|
---|
| 340 | state_type[mach] = STATE_NORMAL;
|
---|
| 341 |
|
---|
| 342 | if (transchar[mach] == SYM_EPSILON) {
|
---|
| 343 | if (trans1[mach] != NO_TRANSITION)
|
---|
| 344 | mark_beginning_as_normal (trans1[mach]);
|
---|
| 345 |
|
---|
| 346 | if (trans2[mach] != NO_TRANSITION)
|
---|
| 347 | mark_beginning_as_normal (trans2[mach]);
|
---|
| 348 | }
|
---|
| 349 | break;
|
---|
| 350 |
|
---|
| 351 | default:
|
---|
| 352 | flexerror (_
|
---|
| 353 | ("bad state type in mark_beginning_as_normal()"));
|
---|
| 354 | break;
|
---|
| 355 | }
|
---|
| 356 | }
|
---|
| 357 |
|
---|
| 358 |
|
---|
| 359 | /* mkbranch - make a machine that branches to two machines
|
---|
| 360 | *
|
---|
| 361 | * synopsis
|
---|
| 362 | *
|
---|
| 363 | * branch = mkbranch( first, second );
|
---|
| 364 | *
|
---|
| 365 | * branch - a machine which matches either first's pattern or second's
|
---|
| 366 | * first, second - machines whose patterns are to be or'ed (the | operator)
|
---|
| 367 | *
|
---|
| 368 | * Note that first and second are NEITHER destroyed by the operation. Also,
|
---|
| 369 | * the resulting machine CANNOT be used with any other "mk" operation except
|
---|
| 370 | * more mkbranch's. Compare with mkor()
|
---|
| 371 | */
|
---|
| 372 |
|
---|
| 373 | int mkbranch (first, second)
|
---|
| 374 | int first, second;
|
---|
| 375 | {
|
---|
| 376 | int eps;
|
---|
| 377 |
|
---|
| 378 | if (first == NO_TRANSITION)
|
---|
| 379 | return second;
|
---|
| 380 |
|
---|
| 381 | else if (second == NO_TRANSITION)
|
---|
| 382 | return first;
|
---|
| 383 |
|
---|
| 384 | eps = mkstate (SYM_EPSILON);
|
---|
| 385 |
|
---|
| 386 | mkxtion (eps, first);
|
---|
| 387 | mkxtion (eps, second);
|
---|
| 388 |
|
---|
| 389 | return eps;
|
---|
| 390 | }
|
---|
| 391 |
|
---|
| 392 |
|
---|
| 393 | /* mkclos - convert a machine into a closure
|
---|
| 394 | *
|
---|
| 395 | * synopsis
|
---|
| 396 | * new = mkclos( state );
|
---|
| 397 | *
|
---|
| 398 | * new - a new state which matches the closure of "state"
|
---|
| 399 | */
|
---|
| 400 |
|
---|
| 401 | int mkclos (state)
|
---|
| 402 | int state;
|
---|
| 403 | {
|
---|
| 404 | return mkopt (mkposcl (state));
|
---|
| 405 | }
|
---|
| 406 |
|
---|
| 407 |
|
---|
| 408 | /* mkopt - make a machine optional
|
---|
| 409 | *
|
---|
| 410 | * synopsis
|
---|
| 411 | *
|
---|
| 412 | * new = mkopt( mach );
|
---|
| 413 | *
|
---|
| 414 | * new - a machine which optionally matches whatever mach matched
|
---|
| 415 | * mach - the machine to make optional
|
---|
| 416 | *
|
---|
| 417 | * notes:
|
---|
| 418 | * 1. mach must be the last machine created
|
---|
| 419 | * 2. mach is destroyed by the call
|
---|
| 420 | */
|
---|
| 421 |
|
---|
| 422 | int mkopt (mach)
|
---|
| 423 | int mach;
|
---|
| 424 | {
|
---|
| 425 | int eps;
|
---|
| 426 |
|
---|
| 427 | if (!SUPER_FREE_EPSILON (finalst[mach])) {
|
---|
| 428 | eps = mkstate (SYM_EPSILON);
|
---|
| 429 | mach = link_machines (mach, eps);
|
---|
| 430 | }
|
---|
| 431 |
|
---|
| 432 | /* Can't skimp on the following if FREE_EPSILON(mach) is true because
|
---|
| 433 | * some state interior to "mach" might point back to the beginning
|
---|
| 434 | * for a closure.
|
---|
| 435 | */
|
---|
| 436 | eps = mkstate (SYM_EPSILON);
|
---|
| 437 | mach = link_machines (eps, mach);
|
---|
| 438 |
|
---|
| 439 | mkxtion (mach, finalst[mach]);
|
---|
| 440 |
|
---|
| 441 | return mach;
|
---|
| 442 | }
|
---|
| 443 |
|
---|
| 444 |
|
---|
| 445 | /* mkor - make a machine that matches either one of two machines
|
---|
| 446 | *
|
---|
| 447 | * synopsis
|
---|
| 448 | *
|
---|
| 449 | * new = mkor( first, second );
|
---|
| 450 | *
|
---|
| 451 | * new - a machine which matches either first's pattern or second's
|
---|
| 452 | * first, second - machines whose patterns are to be or'ed (the | operator)
|
---|
| 453 | *
|
---|
| 454 | * note that first and second are both destroyed by the operation
|
---|
| 455 | * the code is rather convoluted because an attempt is made to minimize
|
---|
| 456 | * the number of epsilon states needed
|
---|
| 457 | */
|
---|
| 458 |
|
---|
| 459 | int mkor (first, second)
|
---|
| 460 | int first, second;
|
---|
| 461 | {
|
---|
| 462 | int eps, orend;
|
---|
| 463 |
|
---|
| 464 | if (first == NIL)
|
---|
| 465 | return second;
|
---|
| 466 |
|
---|
| 467 | else if (second == NIL)
|
---|
| 468 | return first;
|
---|
| 469 |
|
---|
| 470 | else {
|
---|
| 471 | /* See comment in mkopt() about why we can't use the first
|
---|
| 472 | * state of "first" or "second" if they satisfy "FREE_EPSILON".
|
---|
| 473 | */
|
---|
| 474 | eps = mkstate (SYM_EPSILON);
|
---|
| 475 |
|
---|
| 476 | first = link_machines (eps, first);
|
---|
| 477 |
|
---|
| 478 | mkxtion (first, second);
|
---|
| 479 |
|
---|
| 480 | if (SUPER_FREE_EPSILON (finalst[first]) &&
|
---|
| 481 | accptnum[finalst[first]] == NIL) {
|
---|
| 482 | orend = finalst[first];
|
---|
| 483 | mkxtion (finalst[second], orend);
|
---|
| 484 | }
|
---|
| 485 |
|
---|
| 486 | else if (SUPER_FREE_EPSILON (finalst[second]) &&
|
---|
| 487 | accptnum[finalst[second]] == NIL) {
|
---|
| 488 | orend = finalst[second];
|
---|
| 489 | mkxtion (finalst[first], orend);
|
---|
| 490 | }
|
---|
| 491 |
|
---|
| 492 | else {
|
---|
| 493 | eps = mkstate (SYM_EPSILON);
|
---|
| 494 |
|
---|
| 495 | first = link_machines (first, eps);
|
---|
| 496 | orend = finalst[first];
|
---|
| 497 |
|
---|
| 498 | mkxtion (finalst[second], orend);
|
---|
| 499 | }
|
---|
| 500 | }
|
---|
| 501 |
|
---|
| 502 | finalst[first] = orend;
|
---|
| 503 | return first;
|
---|
| 504 | }
|
---|
| 505 |
|
---|
| 506 |
|
---|
| 507 | /* mkposcl - convert a machine into a positive closure
|
---|
| 508 | *
|
---|
| 509 | * synopsis
|
---|
| 510 | * new = mkposcl( state );
|
---|
| 511 | *
|
---|
| 512 | * new - a machine matching the positive closure of "state"
|
---|
| 513 | */
|
---|
| 514 |
|
---|
| 515 | int mkposcl (state)
|
---|
| 516 | int state;
|
---|
| 517 | {
|
---|
| 518 | int eps;
|
---|
| 519 |
|
---|
| 520 | if (SUPER_FREE_EPSILON (finalst[state])) {
|
---|
| 521 | mkxtion (finalst[state], state);
|
---|
| 522 | return state;
|
---|
| 523 | }
|
---|
| 524 |
|
---|
| 525 | else {
|
---|
| 526 | eps = mkstate (SYM_EPSILON);
|
---|
| 527 | mkxtion (eps, state);
|
---|
| 528 | return link_machines (state, eps);
|
---|
| 529 | }
|
---|
| 530 | }
|
---|
| 531 |
|
---|
| 532 |
|
---|
| 533 | /* mkrep - make a replicated machine
|
---|
| 534 | *
|
---|
| 535 | * synopsis
|
---|
| 536 | * new = mkrep( mach, lb, ub );
|
---|
| 537 | *
|
---|
| 538 | * new - a machine that matches whatever "mach" matched from "lb"
|
---|
| 539 | * number of times to "ub" number of times
|
---|
| 540 | *
|
---|
| 541 | * note
|
---|
| 542 | * if "ub" is INFINITE_REPEAT then "new" matches "lb" or more occurrences of "mach"
|
---|
| 543 | */
|
---|
| 544 |
|
---|
| 545 | int mkrep (mach, lb, ub)
|
---|
| 546 | int mach, lb, ub;
|
---|
| 547 | {
|
---|
| 548 | int base_mach, tail, copy, i;
|
---|
| 549 |
|
---|
| 550 | base_mach = copysingl (mach, lb - 1);
|
---|
| 551 |
|
---|
| 552 | if (ub == INFINITE_REPEAT) {
|
---|
| 553 | copy = dupmachine (mach);
|
---|
| 554 | mach = link_machines (mach,
|
---|
| 555 | link_machines (base_mach,
|
---|
| 556 | mkclos (copy)));
|
---|
| 557 | }
|
---|
| 558 |
|
---|
| 559 | else {
|
---|
| 560 | tail = mkstate (SYM_EPSILON);
|
---|
| 561 |
|
---|
| 562 | for (i = lb; i < ub; ++i) {
|
---|
| 563 | copy = dupmachine (mach);
|
---|
| 564 | tail = mkopt (link_machines (copy, tail));
|
---|
| 565 | }
|
---|
| 566 |
|
---|
| 567 | mach =
|
---|
| 568 | link_machines (mach,
|
---|
| 569 | link_machines (base_mach, tail));
|
---|
| 570 | }
|
---|
| 571 |
|
---|
| 572 | return mach;
|
---|
| 573 | }
|
---|
| 574 |
|
---|
| 575 |
|
---|
| 576 | /* mkstate - create a state with a transition on a given symbol
|
---|
| 577 | *
|
---|
| 578 | * synopsis
|
---|
| 579 | *
|
---|
| 580 | * state = mkstate( sym );
|
---|
| 581 | *
|
---|
| 582 | * state - a new state matching sym
|
---|
| 583 | * sym - the symbol the new state is to have an out-transition on
|
---|
| 584 | *
|
---|
| 585 | * note that this routine makes new states in ascending order through the
|
---|
| 586 | * state array (and increments LASTNFA accordingly). The routine DUPMACHINE
|
---|
| 587 | * relies on machines being made in ascending order and that they are
|
---|
| 588 | * CONTIGUOUS. Change it and you will have to rewrite DUPMACHINE (kludge
|
---|
| 589 | * that it admittedly is)
|
---|
| 590 | */
|
---|
| 591 |
|
---|
| 592 | int mkstate (sym)
|
---|
| 593 | int sym;
|
---|
| 594 | {
|
---|
| 595 | if (++lastnfa >= current_mns) {
|
---|
| 596 | if ((current_mns += MNS_INCREMENT) >= maximum_mns)
|
---|
| 597 | lerrif (_
|
---|
| 598 | ("input rules are too complicated (>= %d NFA states)"),
|
---|
| 599 | current_mns);
|
---|
| 600 |
|
---|
| 601 | ++num_reallocs;
|
---|
| 602 |
|
---|
| 603 | firstst = reallocate_integer_array (firstst, current_mns);
|
---|
| 604 | lastst = reallocate_integer_array (lastst, current_mns);
|
---|
| 605 | finalst = reallocate_integer_array (finalst, current_mns);
|
---|
| 606 | transchar =
|
---|
| 607 | reallocate_integer_array (transchar, current_mns);
|
---|
| 608 | trans1 = reallocate_integer_array (trans1, current_mns);
|
---|
| 609 | trans2 = reallocate_integer_array (trans2, current_mns);
|
---|
| 610 | accptnum =
|
---|
| 611 | reallocate_integer_array (accptnum, current_mns);
|
---|
| 612 | assoc_rule =
|
---|
| 613 | reallocate_integer_array (assoc_rule, current_mns);
|
---|
| 614 | state_type =
|
---|
| 615 | reallocate_integer_array (state_type, current_mns);
|
---|
| 616 | }
|
---|
| 617 |
|
---|
| 618 | firstst[lastnfa] = lastnfa;
|
---|
| 619 | finalst[lastnfa] = lastnfa;
|
---|
| 620 | lastst[lastnfa] = lastnfa;
|
---|
| 621 | transchar[lastnfa] = sym;
|
---|
| 622 | trans1[lastnfa] = NO_TRANSITION;
|
---|
| 623 | trans2[lastnfa] = NO_TRANSITION;
|
---|
| 624 | accptnum[lastnfa] = NIL;
|
---|
| 625 | assoc_rule[lastnfa] = num_rules;
|
---|
| 626 | state_type[lastnfa] = current_state_type;
|
---|
| 627 |
|
---|
| 628 | /* Fix up equivalence classes base on this transition. Note that any
|
---|
| 629 | * character which has its own transition gets its own equivalence
|
---|
| 630 | * class. Thus only characters which are only in character classes
|
---|
| 631 | * have a chance at being in the same equivalence class. E.g. "a|b"
|
---|
| 632 | * puts 'a' and 'b' into two different equivalence classes. "[ab]"
|
---|
| 633 | * puts them in the same equivalence class (barring other differences
|
---|
| 634 | * elsewhere in the input).
|
---|
| 635 | */
|
---|
| 636 |
|
---|
| 637 | if (sym < 0) {
|
---|
| 638 | /* We don't have to update the equivalence classes since
|
---|
| 639 | * that was already done when the ccl was created for the
|
---|
| 640 | * first time.
|
---|
| 641 | */
|
---|
| 642 | }
|
---|
| 643 |
|
---|
| 644 | else if (sym == SYM_EPSILON)
|
---|
| 645 | ++numeps;
|
---|
| 646 |
|
---|
| 647 | else {
|
---|
| 648 | check_char (sym);
|
---|
| 649 |
|
---|
| 650 | if (useecs)
|
---|
| 651 | /* Map NUL's to csize. */
|
---|
| 652 | mkechar (sym ? sym : csize, nextecm, ecgroup);
|
---|
| 653 | }
|
---|
| 654 |
|
---|
| 655 | return lastnfa;
|
---|
| 656 | }
|
---|
| 657 |
|
---|
| 658 |
|
---|
| 659 | /* mkxtion - make a transition from one state to another
|
---|
| 660 | *
|
---|
| 661 | * synopsis
|
---|
| 662 | *
|
---|
| 663 | * mkxtion( statefrom, stateto );
|
---|
| 664 | *
|
---|
| 665 | * statefrom - the state from which the transition is to be made
|
---|
| 666 | * stateto - the state to which the transition is to be made
|
---|
| 667 | */
|
---|
| 668 |
|
---|
| 669 | void mkxtion (statefrom, stateto)
|
---|
| 670 | int statefrom, stateto;
|
---|
| 671 | {
|
---|
| 672 | if (trans1[statefrom] == NO_TRANSITION)
|
---|
| 673 | trans1[statefrom] = stateto;
|
---|
| 674 |
|
---|
| 675 | else if ((transchar[statefrom] != SYM_EPSILON) ||
|
---|
| 676 | (trans2[statefrom] != NO_TRANSITION))
|
---|
| 677 | flexfatal (_("found too many transitions in mkxtion()"));
|
---|
| 678 |
|
---|
| 679 | else { /* second out-transition for an epsilon state */
|
---|
| 680 | ++eps2;
|
---|
| 681 | trans2[statefrom] = stateto;
|
---|
| 682 | }
|
---|
| 683 | }
|
---|
| 684 |
|
---|
| 685 | /* new_rule - initialize for a new rule */
|
---|
| 686 |
|
---|
| 687 | void new_rule ()
|
---|
| 688 | {
|
---|
| 689 | if (++num_rules >= current_max_rules) {
|
---|
| 690 | ++num_reallocs;
|
---|
| 691 | current_max_rules += MAX_RULES_INCREMENT;
|
---|
| 692 | rule_type = reallocate_integer_array (rule_type,
|
---|
| 693 | current_max_rules);
|
---|
| 694 | rule_linenum = reallocate_integer_array (rule_linenum,
|
---|
| 695 | current_max_rules);
|
---|
| 696 | rule_useful = reallocate_integer_array (rule_useful,
|
---|
| 697 | current_max_rules);
|
---|
| 698 | rule_has_nl = reallocate_bool_array (rule_has_nl,
|
---|
| 699 | current_max_rules);
|
---|
| 700 | }
|
---|
| 701 |
|
---|
| 702 | if (num_rules > MAX_RULE)
|
---|
| 703 | lerrif (_("too many rules (> %d)!"), MAX_RULE);
|
---|
| 704 |
|
---|
| 705 | rule_linenum[num_rules] = linenum;
|
---|
| 706 | rule_useful[num_rules] = false;
|
---|
| 707 | rule_has_nl[num_rules] = false;
|
---|
| 708 | }
|
---|