Computer Laboratory

Automaton.cpp
Go to the documentation of this file.
1 
2 /*
3  * Copyright (c) 2013 Jonathan Anderson
4  * All rights reserved.
5  *
6  * This software was developed by SRI International and the University of
7  * Cambridge Computer Laboratory under DARPA/AFRL contract (FA8750-10-C-0237)
8  * ("CTSRD"), as part of the DARPA CRASH research programme.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  * notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  * notice, this list of conditions and the following disclaimer in the
17  * documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
20  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
23  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29  * SUCH DAMAGE.
30  */
31 
32 #include "Automaton.h"
33 #include "Debug.h"
34 #include "Names.h"
35 #include "State.h"
36 #include "Transition.h"
37 
38 #include "tesla.pb.h"
39 
40 #include <llvm/ADT/SmallPtrSet.h>
41 #include <llvm/ADT/Twine.h>
42 #include <llvm/Support/raw_ostream.h> // TODO: remove once TODOs below fixed
43 
44 #include <google/protobuf/text_format.h>
45 
46 #include <sstream>
47 #include <set>
48 #if __has_include(<unordered_map>) and !defined(__linux__)
49 #include <unordered_map>
50 #include <unordered_set>
51 using std::unordered_map;
52 using std::unordered_set;
53 #else
54 #include <tr1/unordered_map>
55 #include <tr1/unordered_set>
56 using std::tr1::unordered_map;
57 using std::tr1::unordered_set;
58 #endif
59 
60 #include <stdio.h>
61 #ifndef NDEBUG
62 #include <stdlib.h>
63 #endif
64 
65 using google::protobuf::TextFormat;
66 
67 using namespace llvm;
68 
69 using std::map;
70 using std::string;
71 using std::stringstream;
72 using std::vector;
73 
74 namespace tesla {
75 
76 namespace internal {
77 
78 class NFAParser {
79 public:
81  const Usage* Use,
82  const AutomataMap* Descriptions = NULL)
83  : Automaton(A), Use(Use), Descriptions(Descriptions),
84  SubAutomataAllowed(true)
85  {
86  }
87 
88  NFAParser& AllowSubAutomata(bool Allow);
89 
96  void Parse(OwningPtr<NFA>& Out, unsigned int id);
97 
98 private:
99  State* Parse(const Expression&, State& InitialState,
100  bool Init = false, bool Cleanup = false);
101 
102  State* Parse(const BooleanExpr&, State& InitialState);
103  State* Parse(const Sequence&, State& InitialState);
104  State* Parse(const NowEvent&, State& InitialState, bool, bool);
105  State* Parse(const FunctionEvent&, State& InitialState, bool, bool);
106  State* Parse(const FieldAssignment&, State& InitialState, bool, bool);
107  State* SubAutomaton(const Identifier&, State& InitialState);
108 
109  // Inclusive-Or stuff
110  void ConvertIncOrToExcOr(State& InitialState, State& EndState);
111  void CalculateReachableTransitionsBetween(const State& InitialState, State& EndState, SmallVector<Transition*,16>& Ts);
112  TransitionVectors GenerateTransitionPrefixesOf(SmallVector<Transition*,16>& Ts);
113  void CreateParallelAutomata(TransitionVectors& prefixes, SmallVector<Transition*,16>& rhs, State& InitialState, State& EndState);
114  void CreateParallelAutomaton(SmallVector<Transition*,16>& lhs, SmallVector<Transition*,16>& rhs, State& InitialState, State& EndState);
115  void CreateTransitionChainCopy(SmallVector<Transition*,16>& chain, State& InitialState, State& EndState);
116 
118  const Usage* Use;
119  const AutomataMap* Descriptions;
120 
121  bool SubAutomataAllowed;
122  State* Start;
123  StateVector States;
124  TransitionVector Transitions;
125 };
126 
127 }
128 
129 
130 // ---- Automaton implementation ----------------------------------------------
131 Automaton::Automaton(size_t id, const AutomatonDescription& A,
132  const Usage *Use, StringRef Name,
133  ArrayRef<State*> S, const TransitionSets& Transitions)
134  : id(id), assertion(A), use(Use), name(Name), Transitions(Transitions)
135 {
136  assert(!Use || A.identifier() == Use->identifier());
137  States.insert(States.begin(), S.begin(), S.end());
138 }
139 
141  for (auto i : Transitions)
142  for (const Transition *T : i)
143  if (!T->IsRealisable())
144  return false;
145 
146  return true;
147 }
148 
149 string Automaton::String() const {
150  stringstream ss;
151  ss << "automaton '" << Name() << "' {\n";
152 
153  for (State *S : States) {
154  assert(S != NULL);
155  ss << "\t" << S->String() << "\n";
156  }
157 
158  ss << "}";
159 
160  return ss.str();
161 }
162 
163 string Automaton::Dot() const {
164  stringstream ss;
165  ss
166  << "/*\n"
167  << " * " << Name() << "\n"
168  << " */\n"
169  << "digraph automaton_" << id << " {\n"
170  << "\tgraph [ truecolor=true, bgcolor=\"transparent\", dpi = 60 ];\n"
171  << "\tnode ["
172  << " shape = circle,"
173  << " fontname = \"Monospace\","
174  << " style = filled, fillcolor = \"white\""
175  << "];\n"
176  << "\tedge [ fontname = \"Monospace\" ];\n"
177  << "\n"
178  ;
179 
180  for (State *S : States) {
181  ss << "\t" << S->Dot() << "\n";
182  }
183 
184  int i = 0;
185  for (auto EquivalenceClass : Transitions) {
186  string color = ("\"/dark28/" + Twine(i++ % 8 + 1) + "\"").str();
187  auto *Head = *EquivalenceClass.begin();
188 
189  ss
190  << "\n\t/*\n"
191  << "\t * " << Head->ShortLabel() << "\n"
192  << "\t */\n"
193  << "\tedge [ "
194  << "color = " << color << ", "
195  << "fontcolor = " << color << ",\n\t\t"
196  << "label = \"" << Head->DotLabel();
197 
198  if (Head->RequiresInit())
199  ss << "\\n&laquo;init&raquo;";
200 
201  if (Head->RequiresCleanup())
202  ss << "\\n&laquo;cleanup&raquo;";
203 
204  ss
205  << "\""
206  << " ];\n";
207 
208  for (auto *T : EquivalenceClass)
209  ss
210  << "\t" << T->Source().ID() << " -> " << T->Destination().ID() << ";\n"
211  ;
212  }
213 
214  ss
215  << "\n\t/*\n"
216  << "\t * Footer:\n"
217  << "\t */\n"
218  << "\tfontname = \"Monospace\";\n"
219  << "\tlabel = \"" << Name() << "\";\n"
220  << "\tlabelloc = top;\n"
221  << "\tlabeljust = left;\n"
222  << "}";
223 
224  return ss.str();
225 }
226 
227 
228 
229 // ---- NFA implementation ----------------------------------------------------
230 NFA* NFA::Parse(const AutomatonDescription *A, const Usage *Use,
231  unsigned int id) {
232  OwningPtr<NFA> N;
233  internal::NFAParser(*A, Use).Parse(N, id);
234  assert(N);
235 
236  return N.take();
237 }
238 
239 NFA* NFA::Link(const AutomataMap& Descriptions) {
240  assert(id < 1000);
241 
242  OwningPtr<NFA> N;
243  internal::NFAParser(assertion, use, &Descriptions)
244  .AllowSubAutomata(false)
245  .Parse(N, id);
246  assert(N);
247 
248  return N.take();
249 }
250 
251 NFA::NFA(size_t id, const AutomatonDescription& A,
252  const Usage *Use, StringRef Name,
253  ArrayRef<State*> S, const TransitionSets& T)
254  : Automaton(id, A, Use, Name, S, T)
255 {
256 }
257 
258 
259 namespace internal {
261  assert(Allow || Descriptions != NULL && "need a source of subautomata");
262  SubAutomataAllowed = Allow;
263 
264  return *this;
265 }
266 
267 void NFAParser::Parse(OwningPtr<NFA>& Out, unsigned int id) {
268  debugs("tesla.automata.parsing")
269  << "Parsing '" << ShortName(Automaton.identifier()) << "'...\n";
270 
271  size_t VariableRefs = 0;
272  for (auto A : Automaton.argument())
273  if (A.type() == Argument::Variable)
274  VariableRefs++;
275 
276  Start = State::NewBuilder(States)
277  .SetStartState()
278  .SetRefCount(VariableRefs)
279  .Build();
280 
281  // Parse the automaton entry point, if provided...
282  if (Use && Use->has_beginning()) {
283  Start = Parse(Use->beginning(), *Start, true);
284  if (!Start) {
285  string Str;
286  TextFormat::PrintToString(Use->beginning(), &Str);
287  panic("failed to parse automaton 'beginning' event: " + Str);
288  }
289  }
290 
291  // Parse the main automaton itself.
292  State *End = Parse(Automaton.expression(), *Start);
293  if (!End)
294  panic("failed to parse automaton '" + ShortName(Automaton.identifier()));
295 
296  // Parse the automaton finalisation point, if provided...
297  if (Use && Use->has_end()) {
298  End = Parse(Use->end(), *End, false, true);
299  if (!End) {
300  string Str;
301  TextFormat::PrintToString(Use->end(), &Str);
302  panic("failed to parse automaton 'end' event: " + Str);
303  }
304  }
305 
306  // Handle out-of-scope events: if we observe one, we it should cause us to
307  // stay in the post-initialisation state.
308  debugs("tesla.automata.parsing.out-of-scope") << "out-of-scope events:\n";
309  vector<const Transition*> OutOfScope;
310  for (const Transition *T : Transitions) {
311  // Have we already noted an equivalent out-of-scope transition?
312  //
313  // This check is subtly different from looping over the equivalence
314  // classes created below: that equivalence is based on the transition's
315  // input event only, so transitions in the same equivalence class can have
316  // different in-scope vs out-of-scope characteristics.
317  bool AlreadyHave = false;
318  for (const Transition *Existing : OutOfScope)
319  if (Existing->EquivalentTo(*T)) {
320  AlreadyHave = true;
321  break;
322  }
323 
324  if (!AlreadyHave && !T->IsStrict() && !T->RequiresInit()) {
325  OutOfScope.push_back(T);
326  debugs("tesla.automata.parsing.out-of-scope")
327  << " " << T->String() << "\n";
328  }
329  }
330 
331  for (auto *T : OutOfScope) {
332  State& Destination = *(T->RequiresCleanup() ? End : Start);
333 
334  switch (T->getKind()) {
335  case Transition::Now: // fall through
336  case Transition::Null: // fall through
338  break;
339 
340  case Transition::FieldAssign: // fall through
341  case Transition::Fn:
342  Transition::Copy(*Start, Destination, T, Transitions, true);
343  break;
344  }
345  }
346 
347  const Identifier &ID = Automaton.identifier();
348 
349  string Description;
350  TextFormat::PrintToString(Automaton, &Description);
351 
352  TransitionSets TEquivClasses;
353  Transition::GroupClasses(Transitions, TEquivClasses);
354 
355  Out.reset(new NFA(id, Automaton, Use, ShortName(ID), States, TEquivClasses));
356 
357  debugs("tesla.automata.parsing") << "parsed '" << Out->Name() << "'.\n\n";
358 }
359 
360 State* NFAParser::Parse(const Expression& Expr, State& Start,
361  bool Init, bool Cleanup) {
362 
363  switch (Expr.type()) {
365  if (Init)
366  panic("boolean expression cannot do initialisation");
367 
368  if (Cleanup)
369  panic("boolean expression cannot do cleanup");
370 
371  return Parse(Expr.booleanexpr(), Start);
372 
374  if (Init)
375  panic("sequence transition cannot do initialisation");
376 
377  if (Cleanup)
378  panic("sequence transition cannot do cleanup");
379 
380  return Parse(Expr.sequence(), Start);
381 
382  case Expression::NULL_EXPR:
383  if (Init)
384  panic("epsilon transition cannot do initialisation");
385 
386  if (Cleanup)
387  panic("epsilon transition cannot do cleanup");
388 
389  return &Start;
390 
391  case Expression::NOW:
392  return Parse(Expr.now(), Start, Init, Cleanup);
393 
394  case Expression::FUNCTION:
395  return Parse(Expr.function(), Start, Init, Cleanup);
396 
397  case Expression::FIELD_ASSIGN:
398  return Parse(Expr.fieldassign(), Start, Init, Cleanup);
399 
401  if (Init)
402  panic("sub-automaton transition cannot do initialisation");
403 
404  if (Cleanup)
405  panic("sub-automaton transition cannot do cleanup");
406 
407  if (SubAutomataAllowed)
408  return SubAutomaton(Expr.subautomaton(), Start);
409 
410  else {
411  // If sub-automata are not allowed, find and parse the sub's definition.
412  assert(Descriptions != NULL);
413  const Identifier& ID = Expr.subautomaton();
414  auto i = Descriptions->find(ID);
415  if (i == Descriptions->end())
416  panic("subautomaton '" + ShortName(ID) + "' not defined");
417 
418  return Parse(i->second->expression(), Start);
419  }
420  }
421 }
422 
423 State* NFAParser::Parse(const BooleanExpr& Expr, State& Branch) {
424  assert(Expr.expression_size() == 2);
425  const Expression& LHS = Expr.expression(0);
426  const Expression& RHS = Expr.expression(1);
427 
428  State *LHSFinal = Parse(LHS, Branch);
429  State *RHSFinal = Parse(RHS, Branch);
430 
431  if (!LHSFinal || !RHSFinal)
432  return NULL;
433 
434  State *Join = State::NewBuilder(States).Build();
435  Transition::Create(*LHSFinal, *Join, Transitions);
436  Transition::Create(*RHSFinal, *Join, Transitions);
437 
438  switch (Expr.operation()) {
439  case BooleanExpr::BE_Xor:
440  return Join;
441 
442  case BooleanExpr::BE_Or:
443  ConvertIncOrToExcOr(Branch, *Join);
444  return Join;
445 
446  case BooleanExpr::BE_And:
447  // TODO: join two (sets of) final states together
448  errs() << "TESLA WARNING: using unsupported AND feature\n";
449  return Join;
450  }
451 }
452 
453 State* NFAParser::Parse(const Sequence& Seq, State& Start) {
454  State *Current = &Start;
455  for (const Expression& E : Seq.expression())
456  Current = Parse(E, *Current);
457 
458  return Current;
459 }
460 
461 State* NFAParser::Parse(const NowEvent& now, State& InitialState,
462  bool Init, bool Cleanup) {
463  State *Final = State::NewBuilder(States).SetAccepting(Cleanup).Build();
464  Transition::Create(InitialState, *Final, now, Automaton, Transitions,
465  Init, Cleanup);
466  return Final;
467 }
468 
469 State* NFAParser::Parse(const FunctionEvent& Ev, State& From,
470  bool Init, bool Cleanup) {
471  State *Final = State::NewBuilder(States).SetAccepting(Cleanup).Build();
472  Transition::Create(From, *Final, Ev, Transitions, Init, Cleanup);
473  return Final;
474 }
475 
476 State* NFAParser::Parse(const FieldAssignment& Assign, State& From,
477  bool Init, bool Cleanup) {
478  State *Final = State::NewBuilder(States).SetAccepting(Cleanup).Build();
479  Transition::Create(From, *Final, Assign, Transitions, Init, Cleanup);
480  return Final;
481 }
482 
483 State* NFAParser::SubAutomaton(const Identifier& ID, State& InitialState) {
484  State *Final = State::NewBuilder(States).Build();
485  Transition::CreateSubAutomaton(InitialState, *Final, ID, Transitions);
486  return Final;
487 }
488 
489 string stringifyTransitionVector(SmallVector<Transition*,16>& Ts) {
490  stringstream ss;
491  for (auto T : Ts) {
492  ss << T->String() << " ";
493  }
494  return ss.str();
495 }
496 
498  stringstream ss;
499  ss << "[";
500  for (auto& TV : TVs) {
501  ss << "{";
502  ss << stringifyTransitionVector(TV);
503  ss << "}, ";
504  }
505  ss << "]";
506  return ss.str();
507 }
508 
509 void NFAParser::ConvertIncOrToExcOr(State& InitialState, State& EndState) {
510  debugs("tesla.automata.inclusive_or")
511  << "Converting inclusive-or to exclusive-or\n";
512 
513  /*
514  * X `inclusive-or` Y is computed as:
515  * (prefix*(X) || Y) | (X || prefix*(Y)) | (X || Y)
516  * where
517  * X and Y are sequences of transitions
518  * prefix*(X) is the set of sets of transition prefixes of X
519  * (This is equivalent to the powerset of X minus X itself.)
520  * || is the parallel operator that allow possible interleavings of the
521  * lhs and rhs
522  *
523  * For example, ab `inclusive-or` cd refers to the following automaton:
524  * = (prefix*(ab) || cd) | (ab || prefix*(cd)) | (ab || cd)
525  * = (ø || cd) | (a || cd) | (ab || ø) | (ab || c) | (ab || cd)
526  * = (cd) | (a || cd) | (ab) | (ab || c) | (ab || cd)
527  * = (cd) | (acd | cad | cda) | (ab) | abc | acb | cab | abcd | acbd | acdb | cdab | cabd | cadb
528  * = cd | acd | cad | cda | ab | abc | acb | cab | abcd | acbd | acdb | cdab | cabd | cadb
529  */
530  // separate lhs and rhs into different vectors
531  SmallVector<Transition*,16> lhs, rhs;
532  auto TI = InitialState.begin();
533  Transition *LhsFirstT = *TI;
534  Transition *RhsFirstT = *(TI+1);
535 
536  debugs("tesla.automata.inclusive_or")
537  << "Lhs: " << LhsFirstT->String() << "\n"
538  << "Rhs: " << RhsFirstT->String() << "\n"
539  ;
540 
541  lhs.push_back(LhsFirstT);
542  rhs.push_back(RhsFirstT);
543  CalculateReachableTransitionsBetween(LhsFirstT->Destination(), EndState, lhs);
544  CalculateReachableTransitionsBetween(RhsFirstT->Destination(), EndState, rhs);
545 
546  debugs("tesla.automata.inclusive_or")
547  << "Calculated reachable transitions for lhs: "
548  << stringifyTransitionVector(lhs) << "\n"
549  << "Calculated reachable transitions for rhs: "
550  << stringifyTransitionVector(rhs) << "\n"
551  ;
552 
553  // End is already the result of lhs | rhs
554  // We need to add to this:
555  // (prefix*(lhs) || rhs) | (lhs || prefix*(rhs)) | (lhs || rhs)
556  TransitionVectors lhsPrefixes = GenerateTransitionPrefixesOf(lhs);
557  TransitionVectors rhsPrefixes = GenerateTransitionPrefixesOf(rhs);
558  CreateParallelAutomata(lhsPrefixes, rhs, InitialState, EndState);
559  CreateParallelAutomata(rhsPrefixes, lhs, InitialState, EndState);
560  CreateParallelAutomaton(lhs, rhs, InitialState, EndState);
561 }
562 
563 void NFAParser::CalculateReachableTransitionsBetween(const State& Start, State& End, SmallVector<Transition*,16>& Ts) {
564  Transition* T = *Start.begin();
565  if (Start.ID() == End.ID()) {
566  return;
567  }
568  if (!isa<NullTransition>(T)) { // skip epsilon edges
569  Ts.push_back(T);
570  }
571  CalculateReachableTransitionsBetween(T->Destination(), End, Ts);
572 }
573 
574 TransitionVectors NFAParser::GenerateTransitionPrefixesOf(SmallVector<Transition*,16>& Ts) {
575 
576  debugs("tesla.automata.inclusive_or")
577  << "Generating transition prefixes of "
578  << stringifyTransitionVector(Ts) << "\n"
579  ;
580 
581  TransitionVectors prefixes;
582  SmallVector<Transition*,16> lastPrefix;
583  // add empty prefix
584  prefixes.push_back(lastPrefix);
585  if (Ts.size() > 1) { // catch corner case where length of Ts is 1
586  bool nonEmptyPrefix = false;
587  for (auto T : Ts) {
588  lastPrefix.push_back(T);
589  prefixes.push_back(lastPrefix);
590  nonEmptyPrefix = true;
591  }
592  if (nonEmptyPrefix) {
593  prefixes.pop_back(); // delete the last element as it is not a prefix
594  }
595  }
596 
597  debugs("tesla.automata.inclusive_or")
598  << "Computed prefixes: " << stringifyTransitionVectors(prefixes) << "\n";
599 
600  return prefixes;
601 }
602 
603 void NFAParser::CreateParallelAutomata(TransitionVectors& prefixes, SmallVector<Transition*,16>& rhs, State& InitialState, State& EndState) {
604  for (auto prefix : prefixes) {
605  // Skip the empty prefix because lhs | rhs has already been constructed by
606  // Parse(const BooleanExpr& Expr, State& Branch) above.
607  if (!prefix.empty()) {
608  CreateParallelAutomaton(prefix, rhs, InitialState, EndState);
609  }
610  }
611 }
612 
613 // Compute the automaton for lhs || rhs (where || is the parallel operator)
614 void NFAParser::CreateParallelAutomaton(SmallVector<Transition*,16>& lhs, SmallVector<Transition*,16>& rhs, State& InitialState, State& EndState) {
615  /*
616  * We build the automaton recursively by repeatedly decomposing lhs || rhs,
617  * until it cannot be decomposed any further.
618  * For example, ab || cd can be decomposed as follows:
619  * ab || cd = a (b || cd) | c (ab || d)
620  * = a ( b (ø || cd) | c (b || d) ) | c ( a (b || d) | d ( ab || ø) )
621  * = a ( bcd | c (bd | db) ) | c ( a (bd | db) | dab )
622  * = a ( bcd | cbd | cdb ) | c ( abd | adb | dab )
623  * = abcd | acbd | acdb | cabd | cadb | cdab
624  */
625 
626  debugs("tesla.automata.inclusive_or")
627  << "Entering CreateParallelAutomaton("
628  << stringifyTransitionVector(lhs) << ","
629  << stringifyTransitionVector(rhs) << ")\n"
630  ;
631 
632  if (lhs.empty()) {
633  CreateTransitionChainCopy(rhs, InitialState, EndState);
634  }
635  else if (rhs.empty()) {
636  CreateTransitionChainCopy(lhs, InitialState, EndState);
637  }
638  else {
639  // Decompose as per above comment
640  // a (b ||cd)
641  Transition *LhsFirstT = lhs.front();
642  State *LhsFirstTNewDest = State::NewBuilder(States).Build();
643  Transition::Copy(InitialState, *LhsFirstTNewDest, LhsFirstT, Transitions);
644  SmallVector<Transition*,16> lhsCopy = lhs;
645  lhsCopy.erase(lhsCopy.begin()); // TODO: use a more efficient data structure (deque?)
646  CreateParallelAutomaton(lhsCopy, rhs, *LhsFirstTNewDest, EndState);
647 
648  // c (ab || d)
649  Transition *RhsFirstT = rhs.front();
650  State *RhsFirstTNewDest = State::NewBuilder(States).Build();
651  Transition::Copy(InitialState, *RhsFirstTNewDest, RhsFirstT, Transitions);
652  SmallVector<Transition*,16> rhsCopy = rhs;
653  rhsCopy.erase(rhsCopy.begin()); // TODO: use a more efficient data structure (deque?)
654  CreateParallelAutomaton(lhs, rhsCopy, *RhsFirstTNewDest, EndState);
655  }
656 
657  debugs("tesla.automata.inclusive_or")
658  << "Exiting CreateParallelAutomaton("
659  << stringifyTransitionVector(lhs) << ","
660  << stringifyTransitionVector(rhs) << ")\n"
661  ;
662 }
663 
664 void NFAParser::CreateTransitionChainCopy(SmallVector<Transition*,16>& chain, State& InitialState, State& EndState) {
665  State* CurrSource = &InitialState;
666 
667  debugs("tesla.automata.inclusive_or")
668  << "Creating copy of transition chain: "
669  << stringifyTransitionVector(chain) << "\n"
670  ;
671 
672  for (auto TI=chain.begin(), TE=chain.end()-1; TI != TE; TI++) {
673  Transition* T = *TI;
674  debugs("tesla.automata.inclusive_or")
675  << "Creating copy of transition: " << T->String() << "\n";
676 
677  State* TDest = State::NewBuilder(States).Build();
678  Transition::Copy(*CurrSource, *TDest, T, Transitions);
679  CurrSource = TDest;
680  }
681  // last transition copy goes from CurrSource -> End
682  Transition::Copy(*CurrSource, EndState, chain.back(), Transitions);
683 }
684 
685 typedef std::set<unsigned> NFAState;
686 raw_ostream& operator << (raw_ostream& Out, const NFAState& S) {
687  for (unsigned i : S)
688  Out << i << " ";
689  Out << "\n";
690  return Out;
691 }
693 {
694  size_t operator()(const NFAState &S) const {
695  if (S.size() == 0) return 0;
696  return *S.begin();
697  }
698 };
699 
700 raw_ostream& operator << (
701  raw_ostream& Out, const unordered_map<NFAState, State*, NFAStateHash>& M) {
702  for (auto I : M) Out << (I.second->ID()) << " = " << I.first;
703  return Out;
704 }
705 
706 class DFABuilder {
709  unordered_map<NFAState, State*, NFAStateHash> DFAStates;
711  unordered_set<unsigned> FinishedStates;
713  llvm::SmallVector<
714  std::pair<NFAState, std::pair<bool,bool> >, 16> UnfinishedStates;
715  StateVector States;
716  TransitionVector Transitions;
720  void collectFrontier(NFAState& N, const State* S, bool& Start, bool& Final) {
721  N.insert(S->ID());
722  Start |= S->IsStartState();
723  Final |= S->IsAcceptingState();
724  for (Transition *T : *S)
725  if (T->getKind() == Transition::Null)
726  collectFrontier(N, &T->Destination());
727  }
728 
729  void collectFrontier(NFAState& N, const State* S) {
730  bool Start = true;
731  bool Final = true;
732  collectFrontier(N, S, Start, Final);
733  }
734  State *stateForNFAStates(NFAState& NStates, bool Start, bool Final) {
735  auto Existing = DFAStates.find(NStates);
736  if (Existing != DFAStates.end())
737  return Existing->second;
738 
739  auto Builder = State::NewBuilder(States);
740  Builder.SetStartState(Start);
741  Builder.SetAccepting(Final);
742  if (Start)
743  Builder.SetRefCount(RefCount);
744 
745  std::stringstream Name;
746  std::copy(NStates.begin(), NStates.end(),
747  std::ostream_iterator<int>(Name,","));
748  Builder.SetName(StringRef("NFA:" + Name.str()).drop_back(1));
749 
750  State *DS = Builder.Build();
751  DFAStates.insert(std::make_pair(NStates, DS));
752  UnfinishedStates.push_back(
753  std::make_pair(NStates, std::make_pair(Start, Final)));
754  return DS;
755  }
756 
757  State *stateForNFAState(const State *S) {
758  if (RefCount == 0)
759  RefCount = S->References().size();
760 
761  NFAState NStates;
762  bool Start = false;
763  bool Final = false;
764  collectFrontier(NStates, S, Start, Final);
765  return stateForNFAStates(NStates, Start, Final);
766  }
767 
768  public:
770  DFA *ConstructDFA(const NFA *N) {
771  assert(N != NULL);
772  // We can't reuse these currently.
773  assert(States.empty());
774  // The set of DFA states that correspond to the current location
775  NFAState CurrentState;
776  stateForNFAState(N->States.front());
777  while (!UnfinishedStates.empty()) {
778  CurrentState = UnfinishedStates.back().first;
779  auto StartFinal = UnfinishedStates.back().second;
780  bool Start = StartFinal.first;
781  bool Final = StartFinal.second;
782  UnfinishedStates.pop_back();
783  // Find the NFA states that correspond to the current state.
784  State *DS = stateForNFAStates(CurrentState, Start, Final);
785  if (FinishedStates.find(DS->ID()) != FinishedStates.end())
786  continue;
787  FinishedStates.insert(DS->ID());
788  // Now we have a state, we need to handle its transitions.
789  // If there is only one state in the current state set, then we can just
790  // copy all of the transitions from it.
791  unordered_set<Transition*> FinishedTransitions;
792  // Loop over all of the NFA states and find their outgoing transitions
793  for (auto SI=CurrentState.begin(), SE=CurrentState.end() ; SI!=SE ; ++SI) {
794  unsigned I = *SI;
795  State *NState = N->States[I];
796  Start = false;
797  Final = false;
798  for (auto TI=NState->begin(), TE=NState->end() ; TI!=TE ; ++TI) {
799  Transition *T = *TI;
800  if (T->getKind() == Transition::Null) continue;
801  assert(T->IsRealisable());
802  if (FinishedTransitions.count(T) != 0) continue;
803  NFAState Destinations;
804  collectFrontier(Destinations, &T->Destination(), Start, Final);
805  FinishedTransitions.insert(T);
806  // Find the other transitions from this state that are equivalent to this one.
807  auto DTI = TI;
808  for (++DTI ; DTI!=TE ; ++DTI) {
809  if ((*DTI)->EquivalentTo(*T)) {
810  assert(FinishedTransitions.count(*DTI) == 0);
811  FinishedTransitions.insert(*DTI);
812  collectFrontier(Destinations, &(*DTI)->Destination(), Start, Final);
813  }
814  }
815  auto DSI = SI;
816  for (++DSI ; DSI!=SE ; ++DSI)
817  for (Transition *DT : *N->States[*DSI]) {
818  if (DT->EquivalentTo(*T)) {
819  assert(FinishedTransitions.count(DT) == 0);
820  FinishedTransitions.insert(DT);
821  collectFrontier(Destinations, &DT->Destination(), Start, Final);
822  }
823  }
824 
825  State *Dest = stateForNFAStates(Destinations, Start, Final);
826  Transition::Copy(*DS, *Dest, T, Transitions);
827  }
828  }
829  }
830  TransitionSets TEquivClasses;
831  Transition::GroupClasses(Transitions, TEquivClasses);
832 
833  DFA *D = new DFA(N->ID(),
834  const_cast<AutomatonDescription&>(N->getAssertion()),
835  N->Use(), N->Name(), States, TEquivClasses);
836 
837  debugs("tesla.automata.dfa")
838  << "DFA conversion results:\n"
839  << "> NFA: " << N->String() << "\n"
840  << "> DFA: " << D->String() << "\n"
841  << ">> DFA state map:\n"
842  << DFAStates
843  << ">> DFA transition equivalence classes:\n"
844  ;
845 
846  return D;
847  }
848 
849 private:
850  size_t RefCount = 0;
851 };
852 
853 
854 } // internal namespace
855 
856 // ---- DFA implementation ----------------------------------------------------
857 DFA* DFA::Convert(const NFA* N) {
859  return B.ConstructDFA(N);
860 }
861 
862 DFA::DFA(size_t id, AutomatonDescription& A, const Usage* Use, StringRef Name,
863  ArrayRef<State*> S, const TransitionSets& T)
864  : Automaton(id, A, Use, Name, S, T)
865 {
866 #ifndef NDEBUG
867  for (auto i : T)
868  for (const Transition* T: i)
869  assert(T->IsRealisable());
870 #endif
871 }
872 
873 } // namespace tesla
874