(* * CBG squanderer: A two-stack operator precedence parser. * This parser pushes operators and operands alternately on the two stacks. * The operator stack must remain in increasing precedence and when an operator of * lower precedence needs to be pushed, the reduce operation is instead called. Reduce * pops an operator and two operands and pushes the resulting syntax tree fragment * on the operand stack, thereby reducing each stack depth by one and * reducing the precedence on the top of the operand stack. * * There are various special cases for monadic operators, arrays and parenthesis. *) datatype uc_t = uc_rname of string | uc_digit of string | uc_var of string | uc_diop of (char * uc_t) * (char * uc_t) * (char * uc_t) | ucd_equals | ucd_times | ucd_divide | ucd_mod | ucd_lbra | ucd_rbra | ucd_plus | ucd_minus | ucd_xor | ucd_neg | ucd_query | ucd_colon | ucd_semi | ucd_not | ucd_logor | ucd_bitor | ucd_logand | ucd_bitand | ucd_comma | ucd_lpar | ucd_rpar | ucd_subs | ucd_lshift | ucd_rshift | ucd_deqd | ucd_dned | ucd_dltd | ucd_dgtd | ucd_dled | ucd_dged | uc_filler ; fun ucomp_lex(nil) = nil | ucomp_lex(h::t) = if #"A" <= h andalso h <= #"Z" then (h, uc_rname(implode[h]))::(ucomp_lex t) else if h = #"s" andalso t<>nil andalso hd t = #"p" then (h, uc_var "sp")::(ucomp_lex(tl t)) else if h = #"p" andalso t<>nil andalso hd t = #"c" then (h, uc_var "pc")::(ucomp_lex(tl t)) else if #"a" <= h andalso h <= #"z" then (h, uc_var(implode[h]))::(ucomp_lex t) else if #"0" <= h andalso h <= #"9" then (h, uc_digit(implode[h]))::(ucomp_lex t) else if h = #"=" andalso (hd t) = #"=" then (h, ucd_deqd)::(ucomp_lex(tl t)) else if h = #"=" then (h, ucd_equals)::(ucomp_lex t) else if h = #"&" andalso (hd t) = #"&" then (h, ucd_logand)::(ucomp_lex(tl t)) else if h = #"&" then (h, ucd_bitand)::(ucomp_lex t) else if h = #"|" andalso (hd t) = #"|" then (h, ucd_logor)::(ucomp_lex(tl t)) else if h = #"|" then (h, ucd_bitor)::(ucomp_lex t) else if h = #"[" then (h, ucd_lbra)::(ucomp_lex t) else if h = #"]" then (h, ucd_rbra)::(ucomp_lex t) else if h = #"(" then (h, ucd_lpar)::(ucomp_lex t) else if h = #")" then (h, ucd_rpar)::(ucomp_lex t) else if h = #"/" then (h, ucd_divide)::(ucomp_lex t) else if h = #"*" then (h, ucd_times)::(ucomp_lex t) else if h = #"%" then (h, ucd_mod)::(ucomp_lex t) else if h = #"+" then (h, ucd_plus)::(ucomp_lex t) else if h = #"-" then (h, ucd_minus)::(ucomp_lex t) else if h = #"^" then (h, ucd_xor)::(ucomp_lex t) else if h = #"!" andalso (hd t) = #"=" then (h, ucd_dned)::(ucomp_lex(tl t)) else if h = #"!" then (h, ucd_not)::(ucomp_lex t) else if h = #";" then (h, ucd_semi)::(ucomp_lex t) else if h = #"~" then (h, ucd_neg)::(ucomp_lex t) else if h = #"?" then (h, ucd_query)::(ucomp_lex t) else if h = #":" then (h, ucd_colon)::(ucomp_lex t) else if h = #"," then (h, ucd_comma)::(ucomp_lex t) else if h = #"<" andalso (hd t) = #"<" then (h, ucd_lshift)::(ucomp_lex(tl t)) else if h = #"<" andalso (hd t) = #"=" then (h, ucd_dled)::(ucomp_lex(tl t)) else if h = #"<" then (h, ucd_dltd)::(ucomp_lex t) else if h = #">" andalso (hd t) = #">" then (h, ucd_rshift)::(ucomp_lex(tl t)) else if h = #">" andalso (hd t) = #"=" then (h, ucd_dged)::(ucomp_lex(tl t)) else if h = #">" then (h, ucd_dgtd)::(ucomp_lex t) else raise sfault("Bad ucomp_lex character: " ^ (implode[h])) ; val precedence_order = [ ucd_subs, ucd_not, ucd_neg, ucd_times, ucd_divide, ucd_mod, ucd_plus, ucd_minus, ucd_lshift, ucd_rshift, ucd_xor, ucd_bitor, ucd_bitand, ucd_dltd, ucd_dgtd, ucd_dled, ucd_dged, ucd_deqd, ucd_dned, ucd_logor, ucd_logand, ucd_colon, ucd_query, ucd_equals, ucd_comma, ucd_semi ] ; fun bs_ast2 (l,r) h = (#".", uc_diop(h, l, r)) ; val monadic_filler = (#"_", uc_filler) fun bs_reduce_item (r::l::st1, h::st2) = ((bs_ast2 (l, r) h)::st1, st2) | bs_reduce_item (r::st1, _) = raise sfault("bs_reduce_item: missing arg") | bs_reduce_item _ = raise sfault("bs_reduce_item: no args") ; fun bs_reduce k (items, nil) = (items, nil) | bs_reduce NONE (st1, st2) = bs_reduce NONE (bs_reduce_item(st1, st2)) | bs_reduce (SOME k) (st1, A as ((v, j)::st2)) = if k=j then (st1, st2) else bs_reduce (SOME k) (bs_reduce_item(st1, A)) | bs_reduce NONE _ = raise sfault("bs_reduce: eoi syntax error") | bs_reduce (SOME v) _ = raise sfault("bs_reduce: syntax error") ; fun bs_push v (st1, nil) t = bs_parse1(st1, [v]) t | bs_push v (st1, h::st2) t = let fun hs nil = raise sfault("operator not in precedence order list") | hs (b::bs) = if b=(snd v) orelse b=(snd h) then b else hs bs val higher = hs precedence_order in if higher<>(snd h) then bs_parse1(st1, v::h::st2) t else bs_push v (bs_reduce_item(st1, h::st2)) t end and bs_parse1(st1,st2)((h as (k, uc_rname a))::t) = bs_parse2(h::st1,st2)t | bs_parse1(st1,st2)((h as (k, uc_var a))::t) = bs_parse2(h::st1,st2)t | bs_parse1(st1,st2)((h as (k, uc_digit a))::t) = bs_parse2(h::st1,st2)t | bs_parse1(st1,st2)((h as (k, ucd_lpar))::t) = bs_parse1(st1,h::st2)t | bs_parse1(st1,st2)((h as (k, ucd_lbra))::t) = bs_parse1((#"M", uc_var "mem")::st1,h::(#"[", ucd_subs)::st2)t | bs_parse1(st1,st2)((h as (k, ucd_not))::t) = bs_parse1(monadic_filler::st1,h::st2)t | bs_parse1(st1,st2)((k, other)::t) = raise sfault("ucomp parse 1 error: " ^ (implode [k])) and bs_parse2(st1,st2)(nil) = bs_reduce NONE (st1, st2) | bs_parse2(st1,st2)((h as (k, ucd_query))::t) = bs_push h (st1,st2)t | bs_parse2(st1,st2)((h as (k, ucd_colon))::t) = bs_push h (st1,st2)t | bs_parse2(st1,st2)((h as (k, ucd_equals))::t) = bs_push h (st1,st2)t | bs_parse2(st1,st2)((h as (k, ucd_plus))::t) = bs_push h (st1,st2)t | bs_parse2(st1,st2)((h as (k, ucd_comma))::t) = bs_push h (st1,st2)t | bs_parse2(st1,st2)((h as (k, ucd_xor))::t) = bs_push h (st1,st2)t | bs_parse2(st1,st2)((h as (k, ucd_dltd))::t) = bs_push h (st1,st2)t | bs_parse2(st1,st2)((h as (k, ucd_dled))::t) = bs_push h (st1,st2)t | bs_parse2(st1,st2)((h as (k, ucd_minus))::t) = bs_push h (st1,st2)t | bs_parse2(st1,st2)((h as (k, ucd_times))::t) = bs_push h (st1,st2)t | bs_parse2(st1,st2)((h as (k, ucd_divide))::t) = bs_push h (st1,st2)t | bs_parse2(st1,st2)((h as (k, ucd_lshift))::t) = bs_push h (st1,st2)t | bs_parse2(st1,st2)((h as (k, ucd_semi))::t) = bs_push h (st1,st2)t | bs_parse2(st1,st2)((h as (k, ucd_rshift))::t) = bs_push h (st1,st2)t | bs_parse2(st1,st2)((h as (k, ucd_rbra))::t) = bs_parse2(bs_reduce (SOME ucd_lbra) (st1,st2))t | bs_parse2(st1,st2)((h as (k, ucd_rpar))::t) = bs_parse2(bs_reduce (SOME ucd_lpar) (st1,st2))t | bs_parse2(st1,st2)((k, other)::t) = raise sfault("ucomp parse 2 error: " ^ (implode [k])) ; fun ucomp_p s = let val toks = (ucomp_lex(explode s)) val (items, ops) = bs_parse1(nil, nil) toks val _ = if ops<>nil then raise sfault("bs_parse: dangle ops") else () val _ = if length items <>1 then raise sfault("bs_parse: dangle args") else () in () end ; (* (C) 1986 DJ Greaves *) Exercises: 1. Make this parser into a running program and generate some example runs. 2. Give a BNF or other syntax for the language accepted. 3. Extend the parser to handle and ignore monadic plus (i.e +4 means the same as 4). 4. Extend the parser to handled multi-digit numbers or multi-letter variable names. 5. Write an evaluator to run programs that have been parsed. You may need to add one or two more forms to make it a Turing-complete language! 6. Implement function application. END