(*  Title: 	syntax
    Author: 	Lawrence C Paulson, Cambridge University Computer Laboratory
    Copyright   1988  University of Cambridge

Syntax operations: lexical analysis, parsing, and printing.

  Standard syntax for higher order logic:...
    %(a)t for abstraction,  	%(a,b,c)t for nested abstraction
    !(a)t for quantification,	!(a,b,c)t for nested quantification
  The standard table of infixes contains implication and equality

  IMPLEMENT prefix operators with precedence?  (EXAMPLE: negation)

  SHOULD ADD THE FOLLOWING FUNCTION?
fun pureprin t =
    (Pretty.init();  Syntax.prin Syntax.pure t;  Pretty.nl());
*)

infix thenp;
infix 7 thenkey;

(*Any "syntax" structure must include the following components:
  type syntax -- holds syntactic description of a theory
  extend syn stuff -- extend syntax with various stuff (impl dependent)
  merge(syn1,syn2) -- combine two syntaxes
  pure -- syntax of the pure theory
  prin syn t -- print term t under syntax "syn"
  read syn nt a -- read string "a" as nonterminal nt in "syn"
*)

signature SYNTAX = 
sig
  structure Symtab: SYMTAB
  datatype lexsy = DelimSy | IdentSy | InfixSy of int * int | VarSy of int

  (*Required components*)
  type syntax
  val extend: syntax -> string list * (string list * string * int) list * ((string*lexsy)list -> term * (string*lexsy)list) * (lexsy Symtab.table -> term -> unit) -> syntax
  val merge: syntax*syntax->syntax
  val pure: syntax 
  val prin: syntax -> term -> unit
  val read: syntax -> string -> string -> term

  (*Gadgets for building parsers*)
  val enclose_if: bool -> ('a -> unit) -> 'a -> unit
  val parepeat: string * ((string*lexsy)list -> 'a * (string*lexsy)list) -> (string*lexsy)list -> 'a list * (string*lexsy)list
  val parepin: string * ((string*lexsy)list -> 'a * (string*lexsy)list) -> (string*lexsy)list -> 'a list * (string*lexsy)list
  val parserr: string * (string*lexsy)list -> 'a
  val parserror: (string*lexsy)list -> 'a
  val parse_end: 'a * (string * lexsy) list -> 'a
  val parse_functl:  ((string*lexsy)list -> term * (string*lexsy)list) -> int -> (string*lexsy)list -> term * (string*lexsy)list
  val parse_ident: (string*lexsy)list -> (string*typ) * (string*lexsy)list
  val print_functl:  (int*int -> term -> unit) * lexsy Symtab.table -> (int*int -> term -> unit)
  val thenkey: ((string*lexsy)list -> 'a * (string*lexsy)list) * string -> (string*lexsy)list -> 'a * (string * lexsy) list
  val thenp: ((string*lexsy)list -> 'a*'b) * ('b -> 'c*'d) -> (string*lexsy)list -> ('a*'c)*'d
end;


functor SyntaxFun(structure Symtab: SYMTAB and Pretty: PRETTY) : SYNTAX = 
struct

structure Symtab = Symtab;



(*Infixes, identifiers, variables, keywords*)
datatype lexsy =
    InfixSy  of int*int		(*left, right precedences*)
  | IdentSy  
  | VarSy    of int
  | DelimSy; 


type lexemes = (string*lexsy) list;


(*Syntactic information, including parsing and printing functions.*)
type syntax = 
  {lextab: lexsy Symtab.table, 
   parser: (string*lexsy)list -> term * (string*lexsy)list,
   printer: lexsy Symtab.table -> term -> unit};



fun prec_error (cs, msg) =
    error (msg ^ " for infix operator " ^ space_implode " " cs);


(*Precedences should be from 1 to 9.  Higher values bind tighter.
  The arithmetic below converts precedence/associativity into
    separate left and right precedences. *)
fun make_infix (cs, "left", k) = 
	if 1<=k andalso k<=9 then (cs, InfixSy(10*k, 10*k + 1)) 
	else  prec_error (cs, "Precedence must be 1 to 9")
  | make_infix (cs, "right", k) = 
	if 1<=k andalso k<=9 then (cs, InfixSy(10*k, 10*k - 1)) 
	else  prec_error (cs, "Precedence must be 1 to 9")
  | make_infix (cs, _, _) = 
	prec_error (cs, "Precedence must be \"left\" or \"right\"");


(*Lexical analysis maps the characters, a string list, to a lexeme list.*)


(*Look up the identifier in lextab, generate polymorphic one if absent.
  Symbol could be Const, Infix, Keyword, Ident, Other*)
fun default_look (lextab,a) =
    case Symtab.lookup(lextab,a) of
	  None => IdentSy 
       |  Some ld => ld;


fun get_lexemes (lextab, cs: string list)  : lexemes = 
  let fun idfix a = (a, default_look(lextab,a));
      fun make_var (a,i) =
	    case default_look(lextab,a) of
	          IdentSy => (a, VarSy i)  
		| _ => lexerr("Cannot use reserved word as variable", cs)
      fun getlex (lxs, []) = rev lxs
	| getlex (lxs, "%"::"("::cs) = (*lambda*)
	      getlex(("%(",DelimSy) :: lxs, cs)
	| getlex (lxs, "!"::"("::cs) = (*meta-forall*)
	      getlex(("!(",DelimSy) :: lxs, cs)
	| getlex (lxs, "?"::cs) =  (*variables*)
	    getlex (apfst (fn (a,i) => make_var(a,i) :: lxs)
			  (scan_varname cs))
	| getlex (lxs, "["::"|"::cs) = (*left meta-bracket*)
	      getlex(("[|",DelimSy) ::lxs, cs)
	| getlex (lxs, "|"::"]"::cs) = (*right meta-bracket*)
	      getlex(("|]",DelimSy) ::lxs, cs)
	| getlex (lxs, c::cs) = (*single character delimiter*)
	   if is_delimchar c then getlex((c,DelimSy)::lxs, cs) else
	   if is_letter c		(*alphanumeric identifier/infix*)
	   then let val (a,ds) = scan_ident(c::cs)
		in  getlex (idfix a :: lxs, ds)  end
	   else
	   if is_special c	(*symbolic identifier/infix*)
	   then let val (scs,ds) = take_prefix is_special (c::cs)
		in  getlex (idfix(implode scs) :: lxs, ds)  end else
	   if is_digit c		(*number*)
	   then let val (digs,ds) = take_prefix is_digit (c::cs)
		in  getlex (idfix(implode digs) :: lxs, ds)  end else
	   if is_blank c  then  getlex (lxs, strip_prefix is_blank cs)  
	   else lexerr ("Illegal character", cs)
  in  getlex ([],cs)  end;



(* P A R S I N G *)


(*syntax errors*)
fun parserr (msg, lxs: lexemes) =
    raise PARSERR (msg ^ ".  Location: "
		  ^ space_implode " " (map fst lxs));


(*sequential parsing, accumulating results*)
fun parsefn1 thenp parsefn2 = fn lxs: lexemes =>
  let val (x1,lxs1) = parsefn1 lxs;
      val (x2,lxs2) = parsefn2 lxs1
  in  ( (x1,x2), lxs2)  end;


(*Parse a phrase, then skip the given symbol, which need not be a DelimSy.
  May use e.g. an InfixSy as a keyword;  caller must be careful. *)
fun parsefn thenkey a = fn lxs: lexemes =>
  let val (x,lxs1) = parsefn lxs
  in  case lxs1 of
      (b,_) :: lxs2 =>
	  if a=b then (x,lxs2)
	  else parserr ("Symbol "^ a ^" expected", lxs1)
      | _ =>   parserr ("Symbol "^ a ^" expected", lxs1)
  end;


(*This code calls apfst on (x,lxs) to apply a function to x 
  while passing the remaining lexemes lxs.*)

(*Parse a phrase repeatedly (0 or more times), using parse function,
  so long as the first symbol is the delimiter key
  Returns the list of results  *)
fun parepeat (a, parse) (lxs: lexemes) = case lxs of
      ((b,DelimSy) :: ls) =>    
	  if a=b then  apfst (op::) ((parse thenp parepeat (a, parse)) ls)
	  else ([], lxs)
    | _ => ([], lxs);


(*parses    <phrase> key ... key <phrase>    *)
fun parepin (a,parse) (lxs: lexemes) =
    apfst (op::) ((parse thenp parepeat (a, parse)) lxs);





(*check that no lexical symbols remain*)
fun parse_end (x, []) = x
  | parse_end (_,lxs) = parserr ("Leftover text", lxs);



(*Parse an identifier, return name and dummy type. *)
fun parse_ident ((a, IdentSy) :: lxs) =  ((a,Adummy), lxs)
  | parse_ident lxs = parserr("Variable name expected", lxs);


(*Parse list of distinct identifiers as x,y,z *)
fun parse_boundvars lxs =
  let val (bns,ls) = (parepin (",", parse_ident)) lxs
  in  case findrep(map fst bns) of
	  [] => (bns,ls)
        | s::_ => parserr ("Repeated bound variable: "^s, lxs)
  end;


(*Functional for writing parsers
  prec is the precedence of the operator to the left;
    parsing stops at an operator with lower (= weaker) precedence
    so that a*b+c is parsed as (a*b)+c. *)
fun parse_functl parsetm (prec:int)   : lexemes -> term * lexemes =
  let fun recparse lxs = parse_functl parsetm 0 lxs;
      fun parse_atom lxs0 = case lxs0 of
	  (a, IdentSy) :: lxs => (Free(a,Adummy), lxs)
	| (a, VarSy(i)) :: lxs => (Var((a,i), Adummy) , lxs)
	| ("op",DelimSy) :: (a, InfixSy _) :: lxs =>
	    (Const(a,Adummy), lxs)   (*infix-as-constant, like ML*)
	| ("(",DelimSy) :: lxs => (recparse thenkey ")") lxs
	| lxs => parsetm lxs;
      fun parse_combtail (t,lxs) = case lxs of
	  ("(",DelimSy)::ls =>
	    let val (args, lxs2) = 
		    (parepin (",", recparse) thenkey ")") ls
	    in  parse_combtail (list_comb(t, args), lxs2)  end
	| (a, InfixSy(left,right)) :: ls =>
	    if left < prec then  (t,lxs)
	    else
	      let val (u,lxs2) = parse_functl parsetm right ls
	      in  parse_combtail (Const(a,Adummy)$t$u, lxs2)
	      end
	| _ => (t,lxs)
  in fn ("%(",DelimSy) :: lxs =>
	  apfst list_abs_free 
		((parse_boundvars thenkey ")" thenp
		  parse_functl parsetm 0) lxs)
      | ("!(",DelimSy) :: lxs =>
	  apfst list_all_free 
		((parse_boundvars thenkey ")" thenp
		  parse_functl parsetm 0) lxs)
      | lxs =>  parse_combtail (parse_atom lxs)
  end;


(*NOTE: nt CURRENTLY UNUSED!!! *)
fun read (syn: syntax) nt a = 
    parse_end (#parser syn (get_lexemes (#lextab syn, explode a)));



(*P R I N T I N G*)


(*Put brackets around the printed result if condition is true.*)
fun enclose_if true  pfn x = (Pretty.bgs"(";  pfn x;  Pretty.ens")")
  | enclose_if false pfn x =  pfn x; 


fun needs_parens (Abs _) = true
  | needs_parens (_ $ _) = true
  | needs_parens   _     = false;


(*maps  (vars,v)  to   v'::vars    this reverses the names, as required*)
fun add_boundname oldnames (vars, (a,T)) : (string*typ) list =
    (variant (map fst vars @ oldnames) a, T) :: vars;


(* Printing.  Renames bound variables if anonymous or clashes exist.
   Handles infixes: for f(a,b), looks up constant's name (f) in lextab.
   Uses operator precedences to minimize use of parens. *)
fun print_functl(recprint, lextab) (outleft,outright) t =
  let fun print_abs (a,vars,body) =
	let val oldnames = add_term_names(body,[]);
	    val newvars = itlist_left (add_boundname oldnames) ([], vars)
	in  case  map fst (rev newvars) of
	      b::bs =>
	       (Pretty.bg 3;  Pretty.bgs (a ^ "(");  Pretty.st b;
		seq (fn x => (Pretty.brks "," 0;  Pretty.st x)) bs;
		Pretty.ens")";	Pretty.brk(0,0); 
		recprint (0,outright) (subst_bounds(map Free newvars, body));
		Pretty.en())
	    | [] => error"print_functl"
        end
      fun print_atom t = enclose_if (needs_parens t) (recprint (0,0)) t
      fun print_comb t =
	let val (f,ts) = strip_comb t
	in  print_atom f;
	    Pretty.list ("(", ")", recprint (0,0)) ts  
	end
  in case t of
       Const (a,_)   => 
	(case (Symtab.lookup(lextab, a)) of
	    Some (InfixSy _) => Pretty.st ("op "^a)
	  | _ => Pretty.st a)
     | Free (name,_) =>  Pretty.st name
     | Bound bno => Pretty.st("B." ^ string_of_int bno) 
     | Var (uname,_) =>  Pretty.st ("?" ^ string_of_xname uname)
     | Abs _ =>  enclose_if (outright>0) print_abs
		    ("%", strip_abs_vars t, strip_abs_body t)
     | Const("all",_) $ (Abs _) =>
		enclose_if (outright>0) print_abs
		    ("!", strip_all_vars t, strip_all_body t)
     | Const(a,_) $ A $ B =>
	(case (Symtab.lookup(lextab,a)) of
	    Some (InfixSy(left,right)) =>
		if  left<=outleft orelse right<=outright 
		then (Pretty.bgs"(";  recprint (0, left) A; 
		      Pretty.st (" "^a);  Pretty.brk(1,0);  
		      recprint (right, 0) B;  Pretty.ens")")
		else (Pretty.bg 0;  recprint (outleft, left) A; 
		      Pretty.st (" "^a);  Pretty.brk(1,0);  
		      recprint (right, outright) B;  Pretty.en())
	  | _ => print_comb t)    (*No entry or not InfixSy*)
     | _ $ _ =>  print_comb t
  end;


fun prin ({lextab,printer,...}: syntax) = printer lextab;


(*** Theory constructions ***)


(** Syntax of pure type theory **)

fun parserror lxs = parserr ("Illegal expression", lxs);

fun print1 lextab pair = print_functl (print1 lextab, lextab) pair;
 
(*A basic printing function*)
fun pure_printer lextab = print1 lextab (0,0);

val pure : syntax =
   {lextab= Symtab.st_of_declist
     ([ make_infix(["=="], "right", 2) ,	(*equality*)
        make_infix(["==>"], "right", 1) ,	(*implication*)
        (["op"],	DelimSy)		(*nonfixing keyword*) ],
      Symtab.null),
    parser= parse_functl parserror 0,
    printer= pure_printer};


(*Extend a syntax by adding delimeters, infixes, and new parsing/printing fns*)
fun extend (syn: syntax) (delims,infix_decs,parser,printer) : syntax = 
  let val decs = (delims, DelimSy) :: (map make_infix infix_decs);
      val tab = Symtab.st_of_declist (decs, #lextab syn);
  in  {lextab= tab, parser=parser, printer=printer}  end
  handle Symtab.TABLE ["update_new", key] =>
	 error("Syntax.extend: Multiple declaration of " ^ key);


(*Update table with (a,x) providing any existing asgt to "a" equals x. *)
fun update_eq ((a,x),tab) =
    case Symtab.lookup(tab,a) of
	None => Symtab.update((a,x), tab)
      | Some y => if x=y then tab 
	    else  error("Syntax.merge: Incompatible declarations of " ^ a);


(*Combine tables, updating tab2 by tab1 and checking.*)
fun merge_tabs (tab1,tab2) = 
    Symtab.balance (itlist_right update_eq (Symtab.alist_of tab1, tab2));


(*Merge two signatures.  Forms unions of tables.
  Takes syntax functions of syn1 if at all possible.*)
fun merge ({lextab, parser, printer}: syntax, syn2: syntax) : syntax =
  {lextab= merge_tabs(lextab, #lextab syn2),
   parser=parser,  printer=printer};



end;
