CocoSourcesCS 2
1 /*------------------------------------------------------------------------- 2 DFA.cs -- Generation of the Scanner Automaton 3 Compiler Generator Coco/R, 4 Copyright (c) 1990, 2004 Hanspeter Moessenboeck, University of Linz 5 extended by M. Loeberbauer & A. Woess, Univ. of Linz 6 with improvements by Pat Terry, Rhodes University 7 8 This program is free software; you can redistribute it and/or modify it 9 under the terms of the GNU General Public License as published by the 10 Free Software Foundation; either version 2, or (at your option) any 11 later version. 12 13 This program is distributed in the hope that it will be useful, but 14 WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY 15 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License 16 for more details. 17 18 You should have received a copy of the GNU General Public License along 19 with this program; if not, write to the Free Software Foundation, Inc., 20 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. 21 22 As an exception, it is allowed to write an extension of Coco/R that is 23 used as a plugin in non-free software. 24 25 If not otherwise stated, any source code generated by Coco/R (other than 26 Coco/R itself) does not fall under the GNU General Public License. 27 -------------------------------------------------------------------------*/ 28 using System; 29 using System.IO; 30 using System.Text; 31 using System.Collections; 32 33 namespace at.jku.ssw.Coco { 34 35 //----------------------------------------------------------------------------- 36 // State 37 //----------------------------------------------------------------------------- 38 39 public class State { // state of finite automaton 40 public int nr; // state number 41 public Action firstAction;// to first action of this state 42 public Symbol endOf; // recognized token if state is final 43 public bool ctx; // true if state is reached via contextTrans 44 public State next; 45 46 public void AddAction(Action act) { 47 Action lasta = null, a = firstAction; 48 while (a != null && act.typ >= a.typ) {lasta = a; a = a.next;} 49 // collecting classes at the beginning gives better performance 50 act.next = a; 51 if (a==firstAction) firstAction = act; else lasta.next = act; 52 } 53 54 public void DetachAction(Action act) { 55 Action lasta = null, a = firstAction; 56 while (a != null && a != act) {lasta = a; a = a.next;} 57 if (a != null) 58 if (a == firstAction) firstAction = a.next; else lasta.next = a.next; 59 } 60 61 public void MeltWith(State s) { // copy actions of s to state 62 for (Action action = s.firstAction; action != null; action = action.next) { 63 Action a = new Action(action.typ, action.sym, action.tc); 64 a.AddTargets(action); 65 AddAction(a); 66 } 67 } 68 69 } 70 71 //----------------------------------------------------------------------------- 72 // Action 73 //----------------------------------------------------------------------------- 74 75 public class Action { // action of finite automaton 76 public int typ; // type of action symbol: clas, chr 77 public int sym; // action symbol 78 public int tc; // transition code: normalTrans, contextTrans 79 public Target target; // states reached from this action 80 public Action next; 81 82 public Action(int typ, int sym, int tc) { 83 this.typ = typ; this.sym = sym; this.tc = tc; 84 } 85 86 public void AddTarget(Target t) { // add t to the action.targets 87 Target last = null; 88 Target p = target; 89 while (p != null && t.state.nr >= p.state.nr) { 90 if (t.state == p.state) return; 91 last = p; p = p.next; 92 } 93 t.next = p; 94 if (p == target) target = t; else last.next = t; 95 } 96 97 public void AddTargets(Action a) { // add copy of a.targets to action.targets 98 for (Target p = a.target; p != null; p = p.next) { 99 Target t = new Target(p.state); 100 AddTarget(t); 101 } 102 if (a.tc == Node.contextTrans) tc = Node.contextTrans; 103 } 104 105 public CharSet Symbols(Tab tab) { 106 CharSet s; 107 if (typ == Node.clas) 108 s = tab.CharClassSet(sym).Clone(); 109 else { 110 s = new CharSet(); s.Set(sym); 111 } 112 return s; 113 } 114 115 public void ShiftWith(CharSet s, Tab tab) { 116 if (s.Elements() == 1) { 117 typ = Node.chr; sym = s.First(); 118 } else { 119 CharClass c = tab.FindCharClass(s); 120 if (c == null) c = tab.NewCharClass("#", s); // class with dummy name 121 typ = Node.clas; sym = c.n; 122 } 123 } 124 125 } 126 127 //----------------------------------------------------------------------------- 128 // Target 129 //----------------------------------------------------------------------------- 130 131 public class Target { // set of states that are reached by an action 132 public State state; // target state 133 public Target next; 134 135 public Target (State s) { 136 state = s; 137 } 138 } 139 140 //----------------------------------------------------------------------------- 141 // Melted 142 //----------------------------------------------------------------------------- 143 144 public class Melted { // info about melted states 145 public BitArray set; // set of old states 146 public State state; // new state 147 public Melted next; 148 149 public Melted(BitArray set, State state) { 150 this.set = set; this.state = state; 151 } 152 } 153 154 //----------------------------------------------------------------------------- 155 // Comment 156 //----------------------------------------------------------------------------- 157 158 public class Comment { // info about comment syntax 159 public string start; 160 public string stop; 161 public bool nested; 162 public Comment next; 163 164 public Comment(string start, string stop, bool nested) { 165 this.start = start; this.stop = stop; this.nested = nested; 166 } 167 168 } 169 170 //----------------------------------------------------------------------------- 171 // CharSet 172 //----------------------------------------------------------------------------- 173 174 public class CharSet { 175 176 public class Range { 177 public int from, to; 178 public Range next; 179 public Range(int from, int to) { this.from = from; this.to = to; } 180 } 181 182 public Range head; 183 184 public bool this[int i] { 185 get { 186 for (Range p = head; p != null; p = p.next) 187 if (i < p.from) return false; 188 else if (i <= p.to) return true; // p.from <= i <= p.to 189 return false; 190 } 191 } 192 193 public void Set(int i) { 194 Range cur = head, prev = null; 195 while (cur != null && i >= cur.from-1) { 196 if (i <= cur.to + 1) { // (cur.from-1) <= i <= (cur.to+1) 197 if (i == cur.from - 1) cur.from--; 198 else if (i == cur.to + 1) { 199 cur.to++; 200 Range next = cur.next; 201 if (next != null && cur.to == next.from - 1) { cur.to = next.to; cur.next = next.next; }; 202 } 203 return; 204 } 205 prev = cur; cur = cur.next; 206 } 207 Range n = new Range(i, i); 208 n.next = cur; 209 if (prev == null) head = n; else prev.next = n; 210 } 211 212 public CharSet Clone() { 213 CharSet s = new CharSet(); 214 Range prev = null; 215 for (Range cur = head; cur != null; cur = cur.next) { 216 Range r = new Range(cur.from, cur.to); 217 if (prev == null) s.head = r; else prev.next = r; 218 prev = r; 219 } 220 return s; 221 } 222 223 public bool Equals(CharSet s) { 224 Range p = head, q = s.head; 225 while (p != null && q != null) { 226 if (p.from != q.from || p.to != q.to) return false; 227 p = p.next; q = q.next; 228 } 229 return p == q; 230 } 231 232 public int Elements() { 233 int n = 0; 234 for (Range p = head; p != null; p = p.next) n += p.to - p.from + 1; 235 return n; 236 } 237 238 public int First() { 239 if (head != null) return head.from; 240 return -1; 241 } 242 243 public void Or(CharSet s) { 244 for (Range p = s.head; p != null; p = p.next) 245 for (int i = p.from; i <= p.to; i++) Set(i); 246 } 247 248 public void And(CharSet s) { 249 CharSet x = new CharSet(); 250 for (Range p = head; p != null; p = p.next) 251 for (int i = p.from; i <= p.to; i++) 252 if (s[i]) x.Set(i); 253 head = x.head; 254 } 255 256 public void Subtract(CharSet s) { 257 CharSet x = new CharSet(); 258 for (Range p = head; p != null; p = p.next) 259 for (int i = p.from; i <= p.to; i++) 260 if (!s[i]) x.Set(i); 261 head = x.head; 262 } 263 264 public bool Includes(CharSet s) { 265 for (Range p = s.head; p != null; p = p.next) 266 for (int i = p.from; i <= p.to; i++) 267 if (!this[i]) return false; 268 return true; 269 } 270 271 public bool Intersects(CharSet s) { 272 for (Range p = s.head; p != null; p = p.next) 273 for (int i = p.from; i <= p.to; i++) 274 if (this[i]) return true; 275 return false; 276 } 277 278 public void Fill() { 279 head = new Range(Char.MinValue, Char.MaxValue); 280 } 281 } 282 283 284 //----------------------------------------------------------------------------- 285 // Generator 286 //----------------------------------------------------------------------------- 287 class Generator { 288 private const int EOF = -1; 289 290 private FileStream fram; 291 private StreamWriter gen; 292 private readonly Tab tab; 293 private string frameFile; 294 295 public Generator(Tab tab) { 296 this.tab = tab; 297 } 298 299 public FileStream OpenFrame(String frame) { 300 if (tab.frameDir != null) frameFile = Path.Combine(tab.frameDir, frame); 301 if (frameFile == null || !File.Exists(frameFile)) frameFile = Path.Combine(tab.srcDir, frame); 302 if (frameFile == null || !File.Exists(frameFile)) throw new FatalError("Cannot find : " + frame); 303 304 try { 305 fram = new FileStream(frameFile, FileMode.Open, FileAccess.Read, FileShare.Read); 306 } catch (FileNotFoundException) { 307 throw new FatalError("Cannot open frame file: " + frameFile); 308 } 309 return fram; 310 } 311 312 313 314 public StreamWriter OpenGen(string target) { 315 string fn = Path.Combine(tab.outDir, target); 316 try { 317 if (File.Exists(fn)) File.Copy(fn, fn + ".old", true); 318 gen = new StreamWriter(new FileStream(fn, FileMode.Create)); /* pdt */ 319 } catch (IOException) { 320 throw new FatalError("Cannot generate file: " + fn); 321 } 322 return gen; 323 } 324 325 326 public void GenCopyright() { 327 string copyFr = null; 328 if (tab.frameDir != null) copyFr = Path.Combine(tab.frameDir, "Copyright.frame"); 329 if (copyFr == null || !File.Exists(copyFr)) copyFr = Path.Combine(tab.srcDir, "Copyright.frame"); 330 if (copyFr == null || !File.Exists(copyFr)) return; 331 332 try { 333 FileStream scannerFram = fram; 334 fram = new FileStream(copyFr, FileMode.Open, FileAccess.Read, FileShare.Read); 335 CopyFramePart(null); 336 fram = scannerFram; 337 } catch (FileNotFoundException) { 338 throw new FatalError("Cannot open Copyright.frame"); 339 } 340 } 341 342 public void SkipFramePart(String stop) { 343 CopyFramePart(stop, false); 344 } 345 346 347 public void CopyFramePart(String stop) { 348 CopyFramePart(stop, true); 349 } 350 351 // if stop == null, copies until end of file 352 private void CopyFramePart(string stop, bool generateOutput) { 353 char startCh = (char) 0; 354 int endOfStopString = 0; 355 356 if (stop != null) { 357 startCh = stop[0]; 358 endOfStopString = stop.Length - 1; 359 } 360 361 int ch = framRead(); 362 while (ch != EOF) { 363 if (stop != null && ch == startCh) { 364 int i = 0; 365 do { 366 if (i == endOfStopString) return; // stop[0..i] found 367 ch = framRead(); i++; 368 } while (ch == stop[i]); 369 // stop[0..i-1] found; continue with last read character 370 if (generateOutput) gen.Write(stop.Substring(0, i)); 371 } else { 372 if (generateOutput) gen.Write((char) ch); 373 ch = framRead(); 374 } 375 } 376 377 if (stop != null) throw new FatalError("Incomplete or corrupt frame file: " + frameFile); 378 } 379 380 private int framRead() { 381 try { 382 return fram.ReadByte(); 383 } catch (Exception) { 384 throw new FatalError("Error reading frame file: " + frameFile); 385 } 386 } 387 } 388 389 //----------------------------------------------------------------------------- 390 // DFA 391 //----------------------------------------------------------------------------- 392 393 public class DFA { 394 private int maxStates; 395 private int lastStateNr; // highest state number 396 private State firstState; 397 private State lastState; // last allocated state 398 private int lastSimState; // last non melted state 399 private FileStream fram; // scanner frame input 400 private StreamWriter gen; // generated scanner file 401 private Symbol curSy; // current token to be recognized (in FindTrans) 402 private bool dirtyDFA; // DFA may become nondeterministic in MatchLiteral 403 404 public bool ignoreCase; // true if input should be treated case-insensitively 405 public bool hasCtxMoves; // DFA has context transitions 406 407 // other Coco objects 408 private Parser parser; 409 private Tab tab; 410 private Errors errors; 411 private TextWriter trace; 412 413 //---------- Output primitives 414 private string Ch(int ch) { 415 if (ch < ' ' || ch >= 127 || ch == '\'' || ch == '\\') return Convert.ToString(ch); 416 else return String.Format("'{0}'", (char)ch); 417 } 418 419 private string ChCond(char ch) { 420 return String.Format("ch == {0}", Ch(ch)); 421 } 422 423 private void PutRange(CharSet s) { 424 for (CharSet.Range r = s.head; r != null; r = r.next) { 425 if (r.from == r.to) { gen.Write("ch == " + Ch(r.from)); } 426 else if (r.from == 0) { gen.Write("ch <= " + Ch(r.to)); } 427 else { gen.Write("ch >= " + Ch(r.from) + " && ch <= " + Ch(r.to)); } 428 if (r.next != null) gen.Write(" || "); 429 } 430 } 431 432 //---------- State handling 433 434 State NewState() { 435 State s = new State(); s.nr = ++lastStateNr; 436 if (firstState == null) firstState = s; else lastState.next = s; 437 lastState = s; 438 return s; 439 } 440 441 void NewTransition(State from, State to, int typ, int sym, int tc) { 442 Target t = new Target(to); 443 Action a = new Action(typ, sym, tc); a.target = t; 444 from.AddAction(a); 445 if (typ == Node.clas) curSy.tokenKind = Symbol.classToken; 446 } 447 448 void CombineShifts() { 449 State state; 450 Action a, b, c; 451 CharSet seta, setb; 452 for (state = firstState; state != null; state = state.next) { 453 for (a = state.firstAction; a != null; a = a.next) { 454 b = a.next; 455 while (b != null) 456 if (a.target.state == b.target.state && a.tc == b.tc) { 457 seta = a.Symbols(tab); setb = b.Symbols(tab); 458 seta.Or(setb); 459 a.ShiftWith(seta, tab); 460 c = b; b = b.next; state.DetachAction(c); 461 } else b = b.next; 462 } 463 } 464 } 465 466 void FindUsedStates(State state, BitArray used) { 467 if (used[state.nr]) return; 468 used[state.nr] = true; 469 for (Action a = state.firstAction; a != null; a = a.next) 470 FindUsedStates(a.target.state, used); 471 } 472 473 void DeleteRedundantStates() { 474 State[] newState = new State[lastStateNr + 1]; 475 BitArray used = new BitArray(lastStateNr + 1); 476 FindUsedStates(firstState, used); 477 // combine equal final states 478 for (State s1 = firstState.next; s1 != null; s1 = s1.next) // firstState cannot be final 479 if (used[s1.nr] && s1.endOf != null && s1.firstAction == null && !s1.ctx) 480 for (State s2 = s1.next; s2 != null; s2 = s2.next) 481 if (used[s2.nr] && s1.endOf == s2.endOf && s2.firstAction == null & !s2.ctx) { 482 used[s2.nr] = false; newState[s2.nr] = s1; 483 } 484 for (State state = firstState; state != null; state = state.next) 485 if (used[state.nr]) 486 for (Action a = state.firstAction; a != null; a = a.next) 487 if (!used[a.target.state.nr]) 488 a.target.state = newState[a.target.state.nr]; 489 // delete unused states 490 lastState = firstState; lastStateNr = 0; // firstState has number 0 491 for (State state = firstState.next; state != null; state = state.next) 492 if (used[state.nr]) {state.nr = ++lastStateNr; lastState = state;} 493 else lastState.next = state.next; 494 } 495 496 State TheState(Node p) { 497 State state; 498 if (p == null) {state = NewState(); state.endOf = curSy; return state;} 499 else return p.state; 500 } 501 502 void Step(State from, Node p, BitArray stepped) { 503 if (p == null) return; 504 stepped[p.n] = true; 505 switch (p.typ) { 506 case Node.clas: case Node.chr: { 507 NewTransition(from, TheState(p.next), p.typ, p.val, p.code); 508 break; 509 } 510 case Node.alt: { 511 Step(from, p.sub, stepped); Step(from, p.down, stepped); 512 break; 513 } 514 case Node.iter: { 515 if (Tab.DelSubGraph(p.sub)) { 516 parser.SemErr("contents of {...} must not be deletable"); 517 return; 518 } 519 if (p.next != null && !stepped[p.next.n]) Step(from, p.next, stepped); 520 Step(from, p.sub, stepped); 521 if (p.state != from) { 522 Step(p.state, p, new BitArray(tab.nodes.Count)); 523 } 524 break; 525 } 526 case Node.opt: { 527 if (p.next != null && !stepped[p.next.n]) Step(from, p.next, stepped); 528 Step(from, p.sub, stepped); 529 break; 530 } 531 } 532 } 533 534 // Assigns a state n.state to every node n. There will be a transition from 535 // n.state to n.next.state triggered by n.val. All nodes in an alternative 536 // chain are represented by the same state. 537 // Numbering scheme: 538 // - any node after a chr, clas, opt, or alt, must get a new number 539 // - if a nested structure starts with an iteration the iter node must get a new number 540 // - if an iteration follows an iteration, it must get a new number 541 void NumberNodes(Node p, State state, bool renumIter) { 542 if (p == null) return; 543 if (p.state != null) return; // already visited; 544 if (state == null || (p.typ == Node.iter && renumIter)) state = NewState(); 545 p.state = state; 546 if (Tab.DelGraph(p)) state.endOf = curSy; 547 switch (p.typ) { 548 case Node.clas: case Node.chr: { 549 NumberNodes(p.next, null, false); 550 break; 551 } 552 case Node.opt: { 553 NumberNodes(p.next, null, false); 554 NumberNodes(p.sub, state, true); 555 break; 556 } 557 case Node.iter: { 558 NumberNodes(p.next, state, true); 559 NumberNodes(p.sub, state, true); 560 break; 561 } 562 case Node.alt: { 563 NumberNodes(p.next, null, false); 564 NumberNodes(p.sub, state, true); 565 NumberNodes(p.down, state, renumIter); 566 break; 567 } 568 } 569 } 570 571 void FindTrans (Node p, bool start, BitArray marked) { 572 if (p == null || marked[p.n]) return; 573 marked[p.n] = true; 574 if (start) Step(p.state, p, new BitArray(tab.nodes.Count)); // start of group of equally numbered nodes 575 switch (p.typ) { 576 case Node.clas: case Node.chr: { 577 FindTrans(p.next, true, marked); 578 break; 579 } 580 case Node.opt: { 581 FindTrans(p.next, true, marked); FindTrans(p.sub, false, marked); 582 break; 583 } 584 case Node.iter: { 585 FindTrans(p.next, false, marked); FindTrans(p.sub, false, marked); 586 break; 587 } 588 case Node.alt: { 589 FindTrans(p.sub, false, marked); FindTrans(p.down, false, marked); 590 break; 591 } 592 } 593 } 594 595 public void ConvertToStates(Node p, Symbol sym) { 596 curSy = sym; 597 if (Tab.DelGraph(p)) { 598 parser.SemErr("token might be empty"); 599 return; 600 } 601 NumberNodes(p, firstState, true); 602 FindTrans(p, true, new BitArray(tab.nodes.Count)); 603 if (p.typ == Node.iter) { 604 Step(firstState, p, new BitArray(tab.nodes.Count)); 605 } 606 } 607 608 // match string against current automaton; store it either as a fixedToken or as a litToken 609 public void MatchLiteral(string s, Symbol sym) { 610 s = tab.Unescape(s.Substring(1, s.Length-2)); 611 int i, len = s.Length; 612 State state = firstState; 613 Action a = null; 614 for (i = 0; i < len; i++) { // try to match s against existing DFA 615 a = FindAction(state, s[i]); 616 if (a == null) break; 617 state = a.target.state; 618 } 619 // if s was not totally consumed or leads to a non-final state => make new DFA from it 620 if (i != len || state.endOf == null) { 621 state = firstState; i = 0; a = null; 622 dirtyDFA = true; 623 } 624 for (; i < len; i++) { // make new DFA for s[i..len-1], ML: i is either 0 or len 625 State to = NewState(); 626 NewTransition(state, to, Node.chr, s[i], Node.normalTrans); 627 state = to; 628 } 629 Symbol matchedSym = state.endOf; 630 if (state.endOf == null) { 631 state.endOf = sym; 632 } else if (matchedSym.tokenKind == Symbol.fixedToken || (a != null && a.tc == Node.contextTrans)) { 633 // s matched a token with a fixed definition or a token with an appendix that will be cut off 634 parser.SemErr("tokens " + sym.name + " and " + matchedSym.name + " cannot be distinguished"); 635 } else { // matchedSym == classToken || classLitToken 636 matchedSym.tokenKind = Symbol.classLitToken; 637 sym.tokenKind = Symbol.litToken; 638 } 639 } 640 641 void SplitActions(State state, Action a, Action b) { 642 Action c; CharSet seta, setb, setc; 643 seta = a.Symbols(tab); setb = b.Symbols(tab); 644 if (seta.Equals(setb)) { 645 a.AddTargets(b); 646 state.DetachAction(b); 647 } else if (seta.Includes(setb)) { 648 setc = seta.Clone(); setc.Subtract(setb); 649 b.AddTargets(a); 650 a.ShiftWith(setc, tab); 651 } else if (setb.Includes(seta)) { 652 setc = setb.Clone(); setc.Subtract(seta); 653 a.AddTargets(b); 654 b.ShiftWith(setc, tab); 655 } else { 656 setc = seta.Clone(); setc.And(setb); 657 seta.Subtract(setc); 658 setb.Subtract(setc); 659 a.ShiftWith(seta, tab); 660 b.ShiftWith(setb, tab); 661 c = new Action(0, 0, Node.normalTrans); // typ and sym are set in ShiftWith 662 c.AddTargets(a); 663 c.AddTargets(b); 664 c.ShiftWith(setc, tab); 665 state.AddAction(c); 666 } 667 } 668 669 bool Overlap(Action a, Action b) { 670 CharSet seta, setb; 671 if (a.typ == Node.chr) 672 if (b.typ == Node.chr) return a.sym == b.sym; 673 else {setb = tab.CharClassSet(b.sym); return setb[a.sym];} 674 else { 675 seta = tab.CharClassSet(a.sym); 676 if (b.typ == Node.chr) return seta[b.sym]; 677 else {setb = tab.CharClassSet(b.sym); return seta.Intersects(setb);} 678 } 679 } 680 681 void MakeUnique(State state) { 682 bool changed; 683 do { 684 changed = false; 685 for (Action a = state.firstAction; a != null; a = a.next) 686 for (Action b = a.next; b != null; b = b.next) 687 if (Overlap(a, b)) { SplitActions(state, a, b); changed = true; } 688 } while (changed); 689 } 690 691 void MeltStates(State state) { 692 bool ctx; 693 BitArray targets; 694 Symbol endOf; 695 for (Action action = state.firstAction; action != null; action = action.next) { 696 if (action.target.next != null) { 697 GetTargetStates(action, out targets, out endOf, out ctx); 698 Melted melt = StateWithSet(targets); 699 if (melt == null) { 700 State s = NewState(); s.endOf = endOf; s.ctx = ctx; 701 for (Target targ = action.target; targ != null; targ = targ.next) 702 s.MeltWith(targ.state); 703 MakeUnique(s); 704 melt = NewMelted(targets, s); 705 } 706 action.target.next = null; 707 action.target.state = melt.state; 708 } 709 } 710 } 711 712 void FindCtxStates() { 713 for (State state = firstState; state != null; state = state.next) 714 for (Action a = state.firstAction; a != null; a = a.next) 715 if (a.tc == Node.contextTrans) a.target.state.ctx = true; 716 } 717 718 public void MakeDeterministic() { 719 State state; 720 lastSimState = lastState.nr; 721 maxStates = 2 * lastSimState; // heuristic for set size in Melted.set 722 FindCtxStates(); 723 for (state = firstState; state != null; state = state.next) 724 MakeUnique(state); 725 for (state = firstState; state != null; state = state.next) 726 MeltStates(state); 727 DeleteRedundantStates(); 728 CombineShifts(); 729 } 730 731 public void PrintStates() { 732 trace.WriteLine(); 733 trace.WriteLine("---------- states ----------"); 734 for (State state = firstState; state != null; state = state.next) { 735 bool first = true; 736 if (state.endOf == null) trace.Write(" "); 737 else trace.Write("E({0,12})", tab.Name(state.endOf.name)); 738 trace.Write("{0,3}:", state.nr); 739 if (state.firstAction == null) trace.WriteLine(); 740 for (Action action = state.firstAction; action != null; action = action.next) { 741 if (first) {trace.Write(" "); first = false;} else trace.Write(" "); 742 if (action.typ == Node.clas) trace.Write(((CharClass)tab.classes[action.sym]).name); 743 else trace.Write("{0, 3}", Ch(action.sym)); 744 for (Target targ = action.target; targ != null; targ = targ.next) 745 trace.Write(" {0, 3}", targ.state.nr); 746 if (action.tc == Node.contextTrans) trace.WriteLine(" context"); else trace.WriteLine(); 747 } 748 } 749 trace.WriteLine(); 750 trace.WriteLine("---------- character classes ----------"); 751 tab.WriteCharClasses(); 752 } 753 754 //---------------------------- actions -------------------------------- 755 756 public Action FindAction(State state, char ch) { 757 for (Action a = state.firstAction; a != null; a = a.next) 758 if (a.typ == Node.chr && ch == a.sym) return a; 759 else if (a.typ == Node.clas) { 760 CharSet s = tab.CharClassSet(a.sym); 761 if (s[ch]) return a; 762 } 763 return null; 764 } 765 766 public void GetTargetStates(Action a, out BitArray targets, out Symbol endOf, out bool ctx) { 767 // compute the set of target states 768 targets = new BitArray(maxStates); endOf = null; 769 ctx = false; 770 for (Target t = a.target; t != null; t = t.next) { 771 int stateNr = t.state.nr; 772 if (stateNr <= lastSimState) targets[stateNr] = true; 773 else targets.Or(MeltedSet(stateNr)); 774 if (t.state.endOf != null) 775 if (endOf == null || endOf == t.state.endOf) 776 endOf = t.state.endOf; 777 else 778 errors.SemErr("Tokens " + endOf.name + " and " + t.state.endOf.name + " cannot be distinguished"); 779 if (t.state.ctx) { 780 ctx = true; 781 // The following check seems to be unnecessary. It reported an error 782 // if a symbol + context was the prefix of another symbol, e.g. 783 // s1 = "a" "b" "c". 784 // s2 = "a" CONTEXT("b"). 785 // But this is ok. 786 // if (t.state.endOf != null) { 787 // Console.WriteLine("Ambiguous context clause"); 788 // errors.count++; 789 // } 790 } 791 } 792 } 793 794 //------------------------- melted states ------------------------------ 795 796 Melted firstMelted; // head of melted state list 797 798 Melted NewMelted(BitArray set, State state) { 799 Melted m = new Melted(set, state); 800 m.next = firstMelted; firstMelted = m; 801 return m; 802 } 803 804 BitArray MeltedSet(int nr) { 805 Melted m = firstMelted; 806 while (m != null) { 807 if (m.state.nr == nr) return m.set; else m = m.next; 808 } 809 throw new FatalError("compiler error in Melted.Set"); 810 } 811 812 Melted StateWithSet(BitArray s) { 813 for (Melted m = firstMelted; m != null; m = m.next) 814 if (Sets.Equals(s, m.set)) return m; 815 return null; 816 } 817 818 //------------------------ comments -------------------------------- 819 820 public Comment firstComment; // list of comments 821 822 string CommentStr(Node p) { 823 StringBuilder s = new StringBuilder(); 824 while (p != null) { 825 if (p.typ == Node.chr) { 826 s.Append((char)p.val); 827 } else if (p.typ == Node.clas) { 828 CharSet set = tab.CharClassSet(p.val); 829 if (set.Elements() != 1) parser.SemErr("character set contains more than 1 character"); 830 s.Append((char)set.First()); 831 } else parser.SemErr("comment delimiters may not be structured"); 832 p = p.next; 833 } 834 if (s.Length == 0 || s.Length > 2) { 835 parser.SemErr("comment delimiters must be 1 or 2 characters long"); 836 s = new StringBuilder("?"); 837 } 838 return s.ToString(); 839 } 840 841 public void NewComment(Node from, Node to, bool nested) { 842 Comment c = new Comment(CommentStr(from), CommentStr(to), nested); 843 c.next = firstComment; firstComment = c; 844 } 845 846 847 //------------------------ scanner generation ---------------------- 848 849 void GenComBody(Comment com) { 850 gen.WriteLine( "\t\t\tfor(;;) {"); 851 gen.Write ( "\t\t\t\tif ({0}) ", ChCond(com.stop[0])); gen.WriteLine("{"); 852 if (com.stop.Length == 1) { 853 gen.WriteLine("\t\t\t\t\tlevel--;"); 854 gen.WriteLine("\t\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }"); 855 gen.WriteLine("\t\t\t\t\tNextCh();"); 856 } else { 857 gen.WriteLine("\t\t\t\t\tNextCh();"); 858 gen.WriteLine("\t\t\t\t\tif ({0}) {{", ChCond(com.stop[1])); 859 gen.WriteLine("\t\t\t\t\t\tlevel--;"); 860 gen.WriteLine("\t\t\t\t\t\tif (level == 0) { oldEols = line - line0; NextCh(); return true; }"); 861 gen.WriteLine("\t\t\t\t\t\tNextCh();"); 862 gen.WriteLine("\t\t\t\t\t}"); 863 } 864 if (com.nested) { 865 gen.Write ("\t\t\t\t}"); gen.Write(" else if ({0}) ", ChCond(com.start[0])); gen.WriteLine("{"); 866 if (com.start.Length == 1) 867 gen.WriteLine("\t\t\t\t\tlevel++; NextCh();"); 868 else { 869 gen.WriteLine("\t\t\t\t\tNextCh();"); 870 gen.Write ("\t\t\t\t\tif ({0}) ", ChCond(com.start[1])); gen.WriteLine("{"); 871 gen.WriteLine("\t\t\t\t\t\tlevel++; NextCh();"); 872 gen.WriteLine("\t\t\t\t\t}"); 873 } 874 } 875 gen.WriteLine( "\t\t\t\t} else if (ch == Buffer.EOF) return false;"); 876 gen.WriteLine( "\t\t\t\telse NextCh();"); 877 gen.WriteLine( "\t\t\t}"); 878 } 879 880 void GenComment(Comment com, int i) { 881 gen.WriteLine(); 882 gen.Write ("\tbool Comment{0}() ", i); gen.WriteLine("{"); 883 gen.WriteLine("\t\tint level = 1, pos0 = pos, line0 = line, col0 = col, charPos0 = charPos;"); 884 if (com.start.Length == 1) { 885 gen.WriteLine("\t\tNextCh();"); 886 GenComBody(com); 887 } else { 888 gen.WriteLine("\t\tNextCh();"); 889 gen.Write ("\t\tif ({0}) ", ChCond(com.start[1])); gen.WriteLine("{"); 890 gen.WriteLine("\t\t\tNextCh();"); 891 GenComBody(com); 892 gen.WriteLine("\t\t} else {"); 893 gen.WriteLine("\t\t\tbuffer.Pos = pos0; NextCh(); line = line0; col = col0; charPos = charPos0;"); 894 gen.WriteLine("\t\t}"); 895 gen.WriteLine("\t\treturn false;"); 896 } 897 gen.WriteLine("\t}"); 898 } 899 900 string SymName(Symbol sym) { 901 if (Char.IsLetter(sym.name[0])) { // real name value is stored in Tab.literals 902 foreach (DictionaryEntry e in tab.literals) 903 if ((Symbol)e.Value == sym) return (string)e.Key; 904 } 905 return sym.name; 906 } 907 908 void GenLiterals () { 909 if (ignoreCase) { 910 gen.WriteLine("\t\tswitch (t.val.ToLower()) {"); 911 } else { 912 gen.WriteLine("\t\tswitch (t.val) {"); 913 } 914 foreach (IList ts in new IList[] { tab.terminals, tab.pragmas }) { 915 foreach (Symbol sym in ts) { 916 if (sym.tokenKind == Symbol.litToken) { 917 string name = SymName(sym); 918 if (ignoreCase) name = name.ToLower(); 919 // sym.name stores literals with quotes, e.g. "\"Literal\"" 920 gen.WriteLine("\t\t\tcase {0}: t.kind = {1}; break;", name, sym.n); 921 } 922 } 923 } 924 gen.WriteLine("\t\t\tdefault: break;"); 925 gen.Write("\t\t}"); 926 } 927 928 void WriteState(State state) { 929 Symbol endOf = state.endOf; 930 gen.WriteLine("\t\t\tcase {0}:", state.nr); 931 if (endOf != null && state.firstAction != null) { 932 gen.WriteLine("\t\t\t\trecEnd = pos; recKind = {0};", endOf.n); 933 } 934 bool ctxEnd = state.ctx; 935 for (Action action = state.firstAction; action != null; action = action.next) { 936 if (action == state.firstAction) gen.Write("\t\t\t\tif ("); 937 else gen.Write("\t\t\t\telse if ("); 938 if (action.typ == Node.chr) gen.Write(ChCond((char)action.sym)); 939 else PutRange(tab.CharClassSet(action.sym)); 940 gen.Write(") {"); 941 if (action.tc == Node.contextTrans) { 942 gen.Write("apx++; "); ctxEnd = false; 943 } else if (state.ctx) 944 gen.Write("apx = 0; "); 945 gen.Write("AddCh(); goto case {0};", action.target.state.nr); 946 gen.WriteLine("}"); 947 } 948 if (state.firstAction == null) 949 gen.Write("\t\t\t\t{"); 950 else 951 gen.Write("\t\t\t\telse {"); 952 if (ctxEnd) { // final context state: cut appendix 953 gen.WriteLine(); 954 gen.WriteLine("\t\t\t\t\ttlen -= apx;"); 955 gen.WriteLine("\t\t\t\t\tSetScannerBehindT();"); 956 gen.Write("\t\t\t\t\t"); 957 } 958 if (endOf == null) { 959 gen.WriteLine("goto case 0;}"); 960 } else { 961 gen.Write("t.kind = {0}; ", endOf.n); 962 if (endOf.tokenKind == Symbol.classLitToken) { 963 gen.WriteLine("t.val = new String(tval, 0, tlen); CheckLiteral(); return t;}"); 964 } else { 965 gen.WriteLine("break;}"); 966 } 967 } 968 } 969 970 void WriteStartTab() { 971 for (Action action = firstState.firstAction; action != null; action = action.next) { 972 int targetState = action.target.state.nr; 973 if (action.typ == Node.chr) { 974 gen.WriteLine("\t\tstart[" + action.sym + "] = " + targetState + "; "); 975 } else { 976 CharSet s = tab.CharClassSet(action.sym); 977 for (CharSet.Range r = s.head; r != null; r = r.next) { 978 gen.WriteLine("\t\tfor (int i = " + r.from + "; i <= " + r.to + "; ++i) start[i] = " + targetState + ";"); 979 } 980 } 981 } 982 gen.WriteLine("\t\tstart[Buffer.EOF] = -1;"); 983 } 984 985 public void WriteScanner() { 986 Generator g = new Generator(tab); 987 fram = g.OpenFrame("Scanner.frame"); 988 gen = g.OpenGen("Scanner.cs"); 989 if (dirtyDFA) MakeDeterministic(); 990 991 g.GenCopyright(); 992 g.SkipFramePart("-->begin"); 993 994 g.CopyFramePart("-->namespace"); 995 if (tab.nsName != null && tab.nsName.Length > 0) { 996 gen.Write("namespace "); 997 gen.Write(tab.nsName); 998 gen.Write(" {"); 999 } 1000 g.CopyFramePart("-->declarations"); 1001 gen.WriteLine("\tconst int maxT = {0};", tab.terminals.Count - 1); 1002 gen.WriteLine("\tconst int noSym = {0};", tab.noSym.n); 1003 if (ignoreCase) 1004 gen.Write("\tchar valCh; // current input character (for token.val)"); 1005 g.CopyFramePart("-->initialization"); 1006 WriteStartTab(); 1007 g.CopyFramePart("-->casing1"); 1008 if (ignoreCase) { 1009 gen.WriteLine("\t\tif (ch != Buffer.EOF) {"); 1010 gen.WriteLine("\t\t\tvalCh = (char) ch;"); 1011 gen.WriteLine("\t\t\tch = char.ToLower((char) ch);"); 1012 gen.WriteLine("\t\t}"); 1013 } 1014 g.CopyFramePart("-->casing2"); 1015 gen.Write("\t\t\ttval[tlen++] = "); 1016 if (ignoreCase) gen.Write("valCh;"); else gen.Write("(char) ch;"); 1017 g.CopyFramePart("-->comments"); 1018 Comment com = firstComment; 1019 int comIdx = 0; 1020 while (com != null) { 1021 GenComment(com, comIdx); 1022 com = com.next; comIdx++; 1023 } 1024 g.CopyFramePart("-->literals"); GenLiterals(); 1025 g.CopyFramePart("-->scan1"); 1026 gen.Write("\t\t\t"); 1027 if (tab.ignored.Elements() > 0) { PutRange(tab.ignored); } else { gen.Write("false"); } 1028 g.CopyFramePart("-->scan2"); 1029 if (firstComment != null) { 1030 gen.Write("\t\tif ("); 1031 com = firstComment; comIdx = 0; 1032 while (com != null) { 1033 gen.Write(ChCond(com.start[0])); 1034 gen.Write(" && Comment{0}()", comIdx); 1035 if (com.next != null) gen.Write(" ||"); 1036 com = com.next; comIdx++; 1037 } 1038 gen.Write(") return NextToken();"); 1039 } 1040 if (hasCtxMoves) { gen.WriteLine(); gen.Write("\t\tint apx = 0;"); } /* pdt */ 1041 g.CopyFramePart("-->scan3"); 1042 for (State state = firstState.next; state != null; state = state.next) 1043 WriteState(state); 1044 g.CopyFramePart(null); 1045 if (tab.nsName != null && tab.nsName.Length > 0) gen.Write("}"); 1046 gen.Close(); 1047 } 1048 1049 public DFA (Parser parser) { 1050 this.parser = parser; 1051 tab = parser.tab; 1052 errors = parser.errors; 1053 trace = parser.trace; 1054 firstState = null; lastState = null; lastStateNr = -1; 1055 firstState = NewState(); 1056 firstMelted = null; firstComment = null; 1057 ignoreCase = false; 1058 dirtyDFA = false; 1059 hasCtxMoves = false; 1060 } 1061 1062 } // end DFA 1063 1064 } // end namespace