{-- SLR(1) parser as in the Compiler Construction notes section 14 examples at end --} import Data.List as L import Data.Set as S import Data.Map as M import Control.Monad.State import Control.Monad import Control.Monad.Error -- import System.IO.Unsafe as Unsafe -- debugMonad s = Unsafe.unsafePerformIO(putStrLn s >> return (return ())) partitionBy :: (Ord k, Ord a) => (a -> k) -> Set a -> Map k (Set a) partitionBy f s = S.fold (\a -> M.insertWith (S.union) (f a) (S.singleton a)) M.empty s class (Eq a, Ord a) => Alphabet a where nonterminals :: [a] terminals :: [a] {----------------------------- basic data structures ------------------------------} data Symbol a = Letter a | EOF | Marker | Start deriving (Eq,Ord) data Production a = Production (Symbol a) [Symbol a] deriving (Eq,Ord) data Item a = Item (Symbol a) ([Symbol a], [Symbol a]) deriving (Eq,Ord) data ItemSet a = ItemSet (Set (Item a)) deriving (Eq,Ord) data Transition a = TransStart | TransSucc (Symbol a) | TransFinal (Production a) | TransAccept data CFSMState a = ItemSetState (Set (Item a)) | FinalState | AcceptState | StartState deriving (Eq,Ord,Show) data CFSM a = CFSM (Map (CFSMState a) [(Transition a, CFSMState a)]) cfsmMap (CFSM m) = m firstState :: (Ord a) => CFSM a -> Maybe (CFSMState a) firstState (CFSM m) = case M.lookup StartState m of (Just ((_,st):_)) -> Just st _ -> Nothing subject (Production s _) = s instance (Ord a, Show a) => Show (CFSM a) where show (CFSM cm) = let numberStates m = M.fromList (zip (M.keys m) [1..]) showJint (Just i) = show i showTransitions (Just tl) = concat (intersperse "\n" (L.map (\(t,s') -> show t ++ " => " ++ (case t of TransAccept -> "accept" TransFinal _ -> "$" _ -> (showJint (M.lookup s' ns)))) tl)) showState ns cm s@(ItemSetState (is)) = (showJint (M.lookup s ns)) ++ ": {\n\t" ++ (concat (L.intersperse "\n\t" (L.map show (S.toList is)))) ++ "\n}\n" ++ (showTransitions (M.lookup s cm)) ++ "\n" showState _ _ _ = "" ns = numberStates cm in concat (intersperse "\n" (L.map (showState ns cm) (M.keys cm))) instance (Show a) => Show (Transition a) where show TransStart = "S" show (TransSucc a) = show a show (TransFinal p) = show p show TransAccept = "eof" instance (Show a) => Show (Symbol a) where show (Letter a) = show a show EOF = "EOF" show Marker = "." show Start = "Start" instance (Show a) => Show (Production a) where show (Production s sl) = (show s) ++ " --> " ++ (concat (L.intersperse " " (L.map show sl))) instance (Show a) => Show (Item a) where show (Item s (l,r)) = (show s) ++ " --> " ++ (concat (L.intersperse " " (L.map show l))) ++ "." ++ (concat (L.intersperse " " (L.map show r))) instance (Show a) => Show (ItemSet a) where show (ItemSet il) = concat (L.intersperse "\n" (L.map show (S.toList il))) (-->) :: a -> [a] -> Production a n --> l = Production (Letter n) (L.map Letter l) {----------------------------- closure of an item in a set of productions ------------------------------} nonTerminalAfterMarker :: (Alphabet a) => Item a -> Maybe a nonTerminalAfterMarker (Item _ (_,r)) = case r of [] -> Nothing x:_ -> case x of Letter a -> if elem a nonterminals then Just a else Nothing _ -> Nothing startsWith :: (Alphabet a) => a -> Set (Production a) -> Set (Production a) startsWith e pl = S.filter (\(Production s _) -> case s of {Letter a -> a == e ; _ -> False}) pl initialItem :: (Alphabet a) => Production a -> Item a initialItem (Production s pl) = Item s ([],pl) startProduction :: a -> Production a startProduction e = Production Start [Letter e, EOF] grammar :: (Alphabet a) => Set (Production a) -> a -> Set (Production a) grammar g e = S.insert (startProduction e) g startState :: (Alphabet a) => Set (Production a) -> a -> CFSMState a startState g e = ItemSetState $ itemClosure g (initialItem (startProduction e)) nextItem :: (Alphabet a) => Item a -> Item a nextItem i@(Item _ (_, [])) = i nextItem (Item s (sl, x:sr)) = Item s (sl ++ [x], sr) --gets symbol after marker, if there is one nextSymbol :: (Alphabet a) => Item a -> Maybe (Symbol a) nextSymbol (Item _ (_,sr)) = case sr of [] -> Nothing s:_ -> Just s closeItem :: (Alphabet a) => Set (Production a) -> Item a -> Set (Item a) closeItem g p = case (nonTerminalAfterMarker p) of Nothing -> S.singleton p Just e -> S.insert p (S.map initialItem (startsWith e g)) itemClosure :: (Alphabet a) => Set (Production a) -> Item a -> Set (Item a) itemClosure g i = let itemClosure' isAcc is = if S.null is then isAcc else let is' = S.difference (S.unions (S.toList (S.map (closeItem g) is))) isAcc in itemClosure' (S.union isAcc is') is' in itemClosure' S.empty (closeItem g i) closure :: (Alphabet a) => Set (Production a) -> Set (Item a) -> Set (Item a) closure g ps = S.unions (S.toList (S.map (itemClosure g) ps)) itemToProduction :: (Alphabet a) => Item a -> Production a itemToProduction (Item s (sl,sr)) = Production s (sl ++ sr) successorStates :: (Alphabet a) => Set (Production a) -> Set (Item a) -> [(Transition a, CFSMState a)] successorStates g is = L.map ssm (M.toList (partitionBy nextSymbol is)) where ssm (Just EOF, is) = (TransAccept, AcceptState) ssm (Just e, is) = (TransSucc e, ItemSetState (closure g (S.map nextItem is))) ssm (Nothing, is) = (TransFinal (itemToProduction (head (S.toList is))), FinalState) {----------------------------- build the CFSM ------------------------------} data CFSMCalcState a = CFSMCalcState {s_seen :: [CFSMState a], s_map :: Map (CFSMState a) [(Transition a, CFSMState a)]} cfsm :: (Alphabet a) => Set (Production a) -> a -> CFSM a cfsm g start = let ss = startState g start in CFSM $ s_map (execState (buildCsfm ss) (CFSMCalcState {s_seen = [], s_map = (M.fromList [(StartState, [(TransStart, ss)])])})) where buildCsfm s = case s of ItemSetState is -> do sn <- isSeen s if sn then return () else do setSeen s let nextStates = case s of {ItemSetState is -> successorStates g is; _ -> []} addToMap s nextStates mapM_ (\(t,s) -> buildCsfm s) nextStates _ -> return () isSeen s = gets s_seen >>= return . (L.elem s) setSeen s = modify (\st -> st{s_seen = s:(s_seen st)}) addToMap s stl = modify (\st -> st{s_map = M.insert s stl (s_map st)}) {----------------------------- Build left and follow map ------------------------------} whileChangeM_ m = do {s <- get; m; s' <- get; if (s /= s') then whileChangeM_ m else return ()} whileM_ mb m = do b <- mb if b then do {m ; whileM_ mb m} else return () leftMap :: (Alphabet a) => (Set (Production a)) -> Map (Symbol a) (Set (Symbol a)) leftMap g = execState buildLeft M.empty where buildLeft = do mapM_ step2 (S.toList g) whileChangeM_ (mapM_ step3 (S.toList g)) step2 (Production uu@(Letter u) (x:xl)) = if elem u nonterminals then enterIntoSet (S.singleton x) uu else return () step2 (Production Start (x:xl)) = enterIntoSet (S.singleton x) Start step2 _ = return () step3 (Production uu@(Letter u) (xx@(Letter x):xl)) = if (elem u nonterminals) && (elem x nonterminals) then insertAllIntoMap xx uu else return () step3 (Production Start (xx@(Letter x):xl)) = if elem x nonterminals then insertAllIntoMap xx Start else return () step3 _ = return () enterIntoSet x u = modify (M.insertWith (S.union) u x) insertAllIntoMap x u = do mxl <- get >>= return . (M.lookup x) case mxl of Just xl -> enterIntoSet xl u Nothing -> return () followPairs :: (Alphabet a) => Production a -> [(Symbol a, Symbol a)] followPairs (Production s (xx@(Letter x):y:xl)) = if elem x nonterminals then (xx,y) : (followPairs (Production s (y:xl))) else (followPairs (Production s (y:xl))) followPairs (Production s (x:xl)) = followPairs (Production s xl) followPairs _ = [] follow :: (Alphabet a) => (Set (Production a)) -> Map (Symbol a) (Set (Symbol a)) follow g = execState buildFollow M.empty where buildFollow = do let left = leftMap g mapM_ (\(v,b) -> do enterIntoSet (S.singleton b) v case b of Letter bb -> if elem bb nonterminals then case (M.lookup b left) of Nothing -> return () Just b -> enterIntoSet b v else return () _ -> return () ) (concat (L.map followPairs (S.toList g))) whileChangeM_ (mapM_ step3 (S.toList g)) step3 (Production _ []) = return () step3 (Production uu@(Letter u) l) = case last l of vv@(Letter v) -> if (elem u nonterminals) && (elem v nonterminals) then insertAllIntoMap uu vv else return () _ -> return () step3 (Production Start l) = case last l of vv@(Letter v) -> if (elem v nonterminals) then insertAllIntoMap Start vv else return () _ -> return () enterIntoSet x u = modify (M.insertWith (S.union) u x) insertAllIntoMap x u = do mxl <- get >>= return . (M.lookup x) case mxl of Just xl -> enterIntoSet xl u Nothing -> return () {----------------------------- SLR(1) action and goto tables ------------------------------} data TableEntry a = TableStateNumber (CFSMState a) | TableProduction (Production a) | TableAccept deriving (Ord,Eq) data ActionGotoTables a = ActionGotoTables {actionTable :: Map (CFSMState a, Symbol a) (TableEntry a), gotoTable :: Map (CFSMState a, a) (CFSMState a), actionFirstState :: CFSMState a} data CfsmState a = CfsmState {cfsm_follow :: Map (Symbol a) (Set (Symbol a)), cfsm_action :: Map (CFSMState a, Symbol a) (TableEntry a), cfsm_goto :: Map (CFSMState a, a) (CFSMState a), cfsm_cfsm :: CFSM a} data CfsmEx a = ShiftReduceConflict (CFSMState a, Symbol a) | NoFirstState deriving Show instance Error (CfsmEx a) actionGoto :: (Alphabet a, Show a) => Set (Production a) -> a -> Either (CfsmEx a) (ActionGotoTables a) actionGoto g e = let c = cfsm g e in case runState (runErrorT buildTables) (CfsmState (follow (grammar g e)) M.empty M.empty c) of (Left e, _) -> Left e (Right _, st) -> case firstState (cfsm_cfsm st) of Just sf -> Right (ActionGotoTables (cfsm_action st) (cfsm_goto st) sf) Nothing -> Left NoFirstState where buildTables = getCfsmStates >>= mapM_ buildTablesFromState buildTablesFromState si = getTransitions si >>= mapM_ (addTrans si) getCfsmStates = gets cfsm_cfsm >>= return . M.keys . cfsmMap addTrans si (t,sj) = case t of TransSucc sym -> case sym of Letter a -> if elem a terminals then setActionTerminal si a sj else if elem a nonterminals then setGoto si a sj else return () _ -> return () TransFinal p -> (getFollowSymbols (subject p)) >>= mapM_ (\k -> setActionProduction si k p) TransAccept -> setActionEOF si _ -> return () setActionTerminal si k sj = modifyActionTable (si, Letter k) (TableStateNumber sj) setActionProduction si k p = modifyActionTable (si, k) (TableProduction p) setActionEOF si = modifyActionTable (si, EOF) TableAccept setGoto si a sj = modifyGotoTable (si, a) sj modifyActionTable k v = do act <- gets cfsm_action case M.lookup k act of Nothing -> return () Just _ -> throwError (ShiftReduceConflict k) modify $ \st -> st{cfsm_action = M.insert k v act} modifyGotoTable k v = do goto <- gets cfsm_goto modify $ \st -> st{cfsm_goto = M.insert k v goto} getTransitions si = do cmap <- gets cfsm_cfsm >>= return . cfsmMap case M.lookup si cmap of Nothing -> return [] Just r -> return r getFollowSymbols e = do follow <- gets cfsm_follow case M.lookup e follow of Nothing -> return [] Just l -> return (S.toList l) {----------------------------- SLR parser ------------------------------} data ParseError a = ParseError String | CfsmError (CfsmEx a) deriving Show instance Error (ParseError a) -- b could be Token when slrParser runs with a lexer data Tree a b = Leaf b | Branch (Production a) [Tree a b] data StackItem a b = StackSymbol (Tree a b) | StackState (CFSMState a) data TokenListItem b = Token b | TEOF maxlist z = foldl max z {-- ---Printing the tree--- we first create a map (Int,Int) -> String then lay it out. For example, the result of `slrParse productions E id [INT, PLUS, LBRAK, INT, RBRAK];` is (Branch E --> E + T [Branch E --> T [Branch T --> P [Branch P --> i [Leaf i]]], Leaf +, Branch T --> P [Branch P --> ( E ) [Leaf (,Branch E --> T [Branch T --> P [Branch P --> i [Leaf i]]], Leaf )]]]) which is layed out like this, with the first E at grid position (0,0), the final right brace at position (3,4) [3 down, 4 across] and the final 'i' at position (6,3) E E+T T P P (E) i T P i first we construct this grid as a `Map (Int,Int) String` then we lay it out. --} instance (Show a, Show b) => Show (Tree a b) where show (Leaf b) = show b show b = let incr = modify (\(y,m) -> (y+1,m)) ins x s = modify (\(y,m) -> (y, M.insert (x,y) s m)) mm (Leaf b) x = ins x (show b) >> incr mm (Branch p tl) x = ins x (show (subject p)) >> mapM_ (\b -> mm b (x+1)) tl grid = snd (execState (mm b 0) (0, M.empty)) maxlen = maxlist 0 (L.map length (M.elems grid)) x = maxlist 0 (L.map fst (M.keys grid)) y = maxlist 0 (L.map snd (M.keys grid)) block s y = s ++ (take (maxlen - (length s)) $ repeat ' ') printElem x y = case M.lookup (x,y) grid of Just e -> block e y Nothing -> block "" y in "\n" ++ (concat $ concat $ L.map (\x -> (L.map (\y -> (printElem x y)) [0..y]) ++ ["\n"]) [0..x]) data ParseState a b = ParseState {p_result :: Maybe (Tree a b), p_stack :: [StackItem a b], p_tokens :: [TokenListItem b], p_actionGoto :: ActionGotoTables a} slrParse :: (Alphabet a, Show a, Show b) => Set (Production a) -> a -> (b -> a) -> [b] -> Either (ParseError a) (Tree a b) slrParse grammar start t2t toks = case actionGoto grammar start of Left e -> Left (CfsmError e) Right ag -> case runState (runErrorT parse) (ParseState Nothing [StackState (actionFirstState ag)] (L.map Token toks) ag) of (Left e, _) -> Left e (Right _, st) -> case p_result st of Just res -> Right res _ -> throwError (ParseError "no result ?!?") where parse = whileM_ (do {res <- gets p_result ; case res of Nothing -> return True ; _ -> return False}) parseStep parseStep = do f <- getStateOnTopOfStack u <- getNextToken act <- actionLookup f (tokenToSymbol u) case act of TableStateNumber i -> shift i TableProduction p -> reduce p TableAccept -> accept getStateOnTopOfStack = do stack <- gets p_stack case stack of ((StackState f) : _) -> return f _ -> throwError (ParseError "Top of Stack is not a state") getNextToken = do toks <- gets p_tokens case toks of (u : _) -> return u _ -> return TEOF actionLookup f k = do ag <- gets p_actionGoto case M.lookup (f, k) (actionTable ag) of Just act -> return act _ -> throwError (ParseError ("No action at state " ++ (show f) ++ "; terminal " ++ (show k))) tokenToSymbol (Token b) = Letter (t2t b) tokenToSymbol TEOF = EOF shift i = do stack <- gets p_stack tkns <- gets p_tokens (u,tkns') <- do case tkns of (Token u) : tl -> return (u,tl) _ -> throwError (ParseError "unexpected end of token list encountered") modify (\st -> st{p_stack = ((StackState i):(StackSymbol (Leaf u)):stack), p_tokens = tkns'}) reduce pd@(Production p cde) = do syms <- takeNSymbolsFromStack (length cde) stack' <- gets p_stack g <- case stack' of ((StackState c):_) -> case p of (Letter ps) -> gotoLookup c ps _ -> throwError (ParseError ("symbol " ++ (show p) ++ " not in alphabet at reduce")) _ -> throwError (ParseError ("top of stack is not a state at reduce")) modify (\st -> st{p_stack = (StackState g):(StackSymbol (Branch pd syms)):stack'}) accept = do stack <- gets p_stack case stack of (_:(StackSymbol q):_) -> modify (\st -> st{p_result = Just q}) _ -> throwError (ParseError "stack incorrect at accept") takeNSymbolsFromStack n = do let tk 0 r s = Just (r,s) tk n r (x:s) = case x of StackSymbol symb -> tk (n-1) (symb:r) s _ -> tk n r s stack <- gets p_stack case tk n [] stack of Just (ret,stack') -> do modify (\st -> st{p_stack = stack'}) return ret Nothing -> throwError (ParseError ("not " ++ (show n) ++ " symbols on stack")) gotoLookup c ps = do ag <- gets p_actionGoto case M.lookup (c, ps) (gotoTable ag) of Just act -> return act _ -> throwError (ParseError ("No goto at state " ++ (show c) ++ "; symbol " ++ (show ps))) {----------------------------- example from the notes ------------------------------} data ToyAlphabet = E | T | P | INT | PLUS | TIMES | LBRAK | RBRAK deriving (Eq,Ord) instance Show ToyAlphabet where show c = case c of {E -> "E"; T -> "T"; P -> "P"; INT -> "i"; PLUS -> "+"; TIMES -> "**"; LBRAK -> "("; RBRAK -> ")"} instance Alphabet ToyAlphabet where nonterminals = [E,T,P] terminals = [INT, PLUS, TIMES, LBRAK, RBRAK] productions = S.fromList [ E --> [E, PLUS, T] ,E --> [T] ,T --> [P, TIMES, T] ,T --> [P] ,P --> [INT] ,P --> [LBRAK, E, RBRAK] ] toyCfsm = cfsm productions E toyLeftMap = leftMap (grammar productions E) toyFollow = follow (grammar productions E) toyActionGoto = actionGoto productions E toyParse = slrParse productions E id [INT, PLUS, LBRAK, INT, RBRAK]; {----------------------------- example from exam 2008, Paper 5, Q 6 ------------------------------} data ExamAlphabet = Ex | Tx | Ux | Px | ONEx | TWOx | IDx | LBRAKx | RBRAKx | PLUSx | DIVx | POWx deriving (Eq,Ord) instance Show ExamAlphabet where show c = case c of {Ex -> "E"; Tx -> "T"; Px -> "P"; Ux -> "U" ; PLUSx -> "+"; DIVx -> "/"; LBRAKx -> "("; RBRAKx -> ")" ; POWx -> "^" ; ONEx -> "1" ; TWOx -> "2" ; IDx -> "id"} instance Alphabet ExamAlphabet where nonterminals = [Ex,Tx,Px,Ux] terminals = [ONEx, TWOx, PLUSx, LBRAKx, RBRAKx, POWx, IDx, DIVx] eproductions = S.fromList [ Ex --> [Ex, PLUSx, Tx] ,Ex --> [Tx] ,Tx --> [Ux, DIVx, Ux] ,Tx --> [Ux] ,Ux --> [Px,POWx,Ux] ,Ux --> [Px] ,Px --> [LBRAKx, Ex, RBRAKx] ,Px --> [ONEx] ,Px --> [TWOx] ,Px --> [IDx] ] eproductions' = S.fromList [ Ex --> [Ex, PLUSx, Ex] ,Ex --> [Ex, DIVx, Ex] ,Ex --> [Ex,POWx,Ex] ,Ex --> [LBRAKx, Ex, RBRAKx] ,Ex --> [ONEx] ,Ex --> [TWOx] ,Ex --> [IDx] ] eproductions'' = S.fromList [ Ex --> [Ex, PLUSx, Tx] ,Ex --> [Tx] ,Ex --> [Tx, DIVx, Tx] ,Ex --> [Ex,POWx,Tx] ,Ux --> [Px] ,Tx --> [LBRAKx, Ex, RBRAKx] ,Tx --> [ONEx] ,Tx --> [TWOx] ,Tx --> [IDx] ] powParse = slrParse eproductions Ex id [IDx, POWx, IDx, POWx, IDx]; plusParse = slrParse eproductions Ex id [IDx, PLUSx, IDx, PLUSx, IDx]; divParse = slrParse eproductions Ex id [IDx, DIVx, IDx, DIVx, IDx]; -- get shift reduce conflicts with this grammar powParse' = slrParse eproductions' Ex id [IDx, POWx, IDx, POWx, IDx]; -- if ^ is made right associative, we would get shift reduce conflict, -- but with it being left associative, we're fine. powParse'' = slrParse eproductions'' Ex id [IDx, POWx, IDx, POWx, IDx];