(*
 * CBG squanderer:  A two-stack operator precedence parser.
 * This parser pushes operators and operands alternately on the two stacks.
 * The operator stack must remain in increasing precedence and when an operator of
 * lower precedence needs to be pushed, the reduce operation is instead called. Reduce
 * pops an operator and two operands and pushes the resulting syntax tree fragment
 * on the operand stack, thereby reducing each stack depth by one and
 * reducing the precedence on the top of the operand stack.  
 *
 * There are various special cases for monadic operators, arrays and parenthesis.
 *)
datatype uc_t = 
  uc_rname of string
| uc_digit of string
| uc_var of string
| uc_diop of (char * uc_t) * (char * uc_t) * (char * uc_t)
| ucd_equals
| ucd_times | ucd_divide | ucd_mod
| ucd_lbra | ucd_rbra
| ucd_plus | ucd_minus
| ucd_xor
| ucd_neg
| ucd_query | ucd_colon
| ucd_semi
| ucd_not
| ucd_logor | ucd_bitor | ucd_logand | ucd_bitand
| ucd_comma
| ucd_lpar | ucd_rpar | ucd_subs
| ucd_lshift
| ucd_rshift
| ucd_deqd | ucd_dned
| ucd_dltd | ucd_dgtd
| ucd_dled | ucd_dged
| uc_filler
;


fun 
  ucomp_lex(nil) = nil
| ucomp_lex(h::t) = 
	if #"A" <= h andalso h <= #"Z" then (h, uc_rname(implode[h]))::(ucomp_lex t)
	else if h = #"s" andalso t<>nil andalso hd t = #"p" then (h, uc_var "sp")::(ucomp_lex(tl t))
	else if h = #"p" andalso t<>nil andalso hd t = #"c" then (h, uc_var "pc")::(ucomp_lex(tl t))
	else if #"a" <= h andalso h <= #"z" then (h, uc_var(implode[h]))::(ucomp_lex t)
	else if #"0" <= h andalso h <= #"9" then (h, uc_digit(implode[h]))::(ucomp_lex t)
	else if h = #"=" andalso (hd t) = #"=" then (h, ucd_deqd)::(ucomp_lex(tl t))
	else if h = #"=" then (h, ucd_equals)::(ucomp_lex t)
	else if h = #"&" andalso (hd t) = #"&" then (h, ucd_logand)::(ucomp_lex(tl t))
	else if h = #"&" then (h, ucd_bitand)::(ucomp_lex t)
	else if h = #"|" andalso (hd t) = #"|" then (h, ucd_logor)::(ucomp_lex(tl t))
	else if h = #"|" then (h, ucd_bitor)::(ucomp_lex t)
	else if h = #"[" then (h, ucd_lbra)::(ucomp_lex t)
	else if h = #"]" then (h, ucd_rbra)::(ucomp_lex t)
	else if h = #"(" then (h, ucd_lpar)::(ucomp_lex t)
	else if h = #")" then (h, ucd_rpar)::(ucomp_lex t)
	else if h = #"/" then (h, ucd_divide)::(ucomp_lex t)
	else if h = #"*" then (h, ucd_times)::(ucomp_lex t)
	else if h = #"%" then (h, ucd_mod)::(ucomp_lex t)
	else if h = #"+" then (h, ucd_plus)::(ucomp_lex t)
	else if h = #"-" then (h, ucd_minus)::(ucomp_lex t)
	else if h = #"^" then (h, ucd_xor)::(ucomp_lex t)
	else if h = #"!" andalso (hd t) = #"=" then (h, ucd_dned)::(ucomp_lex(tl t))
	else if h = #"!" then (h, ucd_not)::(ucomp_lex t)
	else if h = #";" then (h, ucd_semi)::(ucomp_lex t)
	else if h = #"~" then (h, ucd_neg)::(ucomp_lex t)
	else if h = #"?" then (h, ucd_query)::(ucomp_lex t)
	else if h = #":" then (h, ucd_colon)::(ucomp_lex t)
	else if h = #"," then (h, ucd_comma)::(ucomp_lex t)
	else if h = #"<" andalso (hd t) = #"<" then (h, ucd_lshift)::(ucomp_lex(tl t))
	else if h = #"<" andalso (hd t) = #"=" then (h, ucd_dled)::(ucomp_lex(tl t))
	else if h = #"<" then (h, ucd_dltd)::(ucomp_lex t)
	else if h = #">" andalso (hd t) = #">" then (h, ucd_rshift)::(ucomp_lex(tl t))
	else if h = #">" andalso (hd t) = #"=" then (h, ucd_dged)::(ucomp_lex(tl t))
	else if h = #">" then (h, ucd_dgtd)::(ucomp_lex t)
	else raise sfault("Bad ucomp_lex character: " ^ (implode[h]))
;

val precedence_order = [
	ucd_subs,
	ucd_not, ucd_neg,
	ucd_times, ucd_divide, ucd_mod,
	ucd_plus, ucd_minus,
	ucd_lshift, ucd_rshift,
	ucd_xor,
	ucd_bitor,
	ucd_bitand,
	ucd_dltd, ucd_dgtd, ucd_dled, ucd_dged, ucd_deqd, ucd_dned,
	ucd_logor,
	ucd_logand,
	ucd_colon, ucd_query,
	ucd_equals, ucd_comma, ucd_semi
]
;

fun bs_ast2 (l,r) h = (#".", uc_diop(h, l, r))
;

val monadic_filler = (#"_", uc_filler)

fun bs_reduce_item (r::l::st1,  h::st2) = ((bs_ast2 (l, r) h)::st1, st2)
|   bs_reduce_item (r::st1, _) = raise sfault("bs_reduce_item: missing arg")
|   bs_reduce_item _ = raise sfault("bs_reduce_item: no args")
;

fun bs_reduce k (items, nil) = (items, nil)
|   bs_reduce NONE (st1, st2) = bs_reduce NONE (bs_reduce_item(st1, st2))
|   bs_reduce (SOME k) (st1, A as ((v, j)::st2)) =
	 if k=j then (st1, st2)
	 else bs_reduce (SOME k) (bs_reduce_item(st1, A))
|   bs_reduce NONE _ = raise sfault("bs_reduce: eoi syntax error")
|   bs_reduce (SOME v) _ = raise sfault("bs_reduce: syntax error")
;

fun
  bs_push v (st1, nil) t = bs_parse1(st1, [v]) t 
| bs_push v (st1, h::st2) t = 
	let fun hs nil = raise sfault("operator not in precedence order list")
	|   hs (b::bs) = if b=(snd v) orelse b=(snd h) then b else hs bs
	val higher = hs precedence_order
	in if higher<>(snd h) then bs_parse1(st1, v::h::st2) t
	else bs_push v (bs_reduce_item(st1, h::st2)) t
	end

and
  bs_parse1(st1,st2)((h as (k, uc_rname a))::t) = 	bs_parse2(h::st1,st2)t
| bs_parse1(st1,st2)((h as (k, uc_var a))::t) = 	bs_parse2(h::st1,st2)t
| bs_parse1(st1,st2)((h as (k, uc_digit a))::t) = 	bs_parse2(h::st1,st2)t
| bs_parse1(st1,st2)((h as (k, ucd_lpar))::t) = 	bs_parse1(st1,h::st2)t
| bs_parse1(st1,st2)((h as (k, ucd_lbra))::t) = 
		bs_parse1((#"M", uc_var "mem")::st1,h::(#"[", ucd_subs)::st2)t
| bs_parse1(st1,st2)((h as (k, ucd_not))::t) = 		bs_parse1(monadic_filler::st1,h::st2)t
| bs_parse1(st1,st2)((k, other)::t) = raise sfault("ucomp parse 1 error: " ^ (implode [k]))

and
  bs_parse2(st1,st2)(nil) = bs_reduce NONE (st1, st2)
| bs_parse2(st1,st2)((h as (k, ucd_query))::t)  = bs_push h (st1,st2)t
| bs_parse2(st1,st2)((h as (k, ucd_colon))::t)  = bs_push h (st1,st2)t
| bs_parse2(st1,st2)((h as (k, ucd_equals))::t) = bs_push h (st1,st2)t
| bs_parse2(st1,st2)((h as (k, ucd_plus))::t)   = bs_push h (st1,st2)t 
| bs_parse2(st1,st2)((h as (k, ucd_comma))::t)  = bs_push h (st1,st2)t 
| bs_parse2(st1,st2)((h as (k, ucd_xor))::t)    = bs_push h (st1,st2)t 
| bs_parse2(st1,st2)((h as (k, ucd_dltd))::t)   = bs_push h (st1,st2)t 
| bs_parse2(st1,st2)((h as (k, ucd_dled))::t)   = bs_push h (st1,st2)t 
| bs_parse2(st1,st2)((h as (k, ucd_minus))::t)  = bs_push h (st1,st2)t
| bs_parse2(st1,st2)((h as (k, ucd_times))::t)  = bs_push h (st1,st2)t
| bs_parse2(st1,st2)((h as (k, ucd_divide))::t) = bs_push h (st1,st2)t
| bs_parse2(st1,st2)((h as (k, ucd_lshift))::t) = bs_push h (st1,st2)t
| bs_parse2(st1,st2)((h as (k, ucd_semi))::t)   = bs_push h (st1,st2)t
| bs_parse2(st1,st2)((h as (k, ucd_rshift))::t) = bs_push h (st1,st2)t
| bs_parse2(st1,st2)((h as (k, ucd_rbra))::t)    = bs_parse2(bs_reduce (SOME ucd_lbra) (st1,st2))t
| bs_parse2(st1,st2)((h as (k, ucd_rpar))::t)    = bs_parse2(bs_reduce (SOME ucd_lpar) (st1,st2))t
| bs_parse2(st1,st2)((k, other)::t) = raise sfault("ucomp parse 2 error: " ^ (implode [k]))
;

fun ucomp_p s =
    let val toks = (ucomp_lex(explode s))
	val (items, ops) = bs_parse1(nil, nil) toks
	val _ = if ops<>nil then raise sfault("bs_parse: dangle ops") else ()
	val _ = if length items <>1 then raise sfault("bs_parse: dangle args") else ()
	in () end
;

(* (C) 1986 DJ Greaves *)


Exercises:
  1. Make this parser into a running program and generate some example runs.

  2. Give a BNF or other syntax for the language accepted.

  3. Extend the parser to handle and ignore monadic plus  (i.e  +4 means the same as 4).

  4. Extend the parser to handled multi-digit numbers or multi-letter variable names.

  5. Write an evaluator to run programs that have been parsed. You may need to add
one or two more forms to make it a Turing-complete language!

  6. Implement function application.

END