(* ========================================================================= *)
(* Lexical analyzer, type and preterm parsers.                               *)
(* ========================================================================= *)

(* ------------------------------------------------------------------------- *)
(* Character discrimination.                                                 *)
(* ------------------------------------------------------------------------- *)

let isspace,isbra,issymb,isalnum,isnum =
  let charcode s = int_of_char(nth_char s 0) in
  let spaces = " \t\n"
  and brackets = "()[]{};,"
  and symbs = "\\!@#$%^&*-+|\\<=>/?~.:"
  and alnums =
    "'abcdefghijklmnopqrstuvwxyz_ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789"
  and nums = "0123456789" in
  let allchars = spaces^brackets^symbs^alnums^nums in
  let csetsize = itlist (max o charcode) (explode allchars) 256 in
  let ctable = make_vect csetsize 0 in
  do_list (fun c -> vect_assign ctable(charcode c) 1) (explode spaces);
  do_list (fun c -> vect_assign ctable(charcode c) 2) (explode brackets);
  do_list (fun c -> vect_assign ctable(charcode c) 4) (explode symbs);
  do_list (fun c -> vect_assign ctable(charcode c) 8) (explode alnums);
  do_list (fun c -> vect_assign ctable(charcode c) 16) (explode nums);
  let isspace c = vect_item ctable (charcode c) = 1
  and isbra c  = vect_item ctable (charcode c) = 2
  and issymb c = vect_item ctable (charcode c) = 4
  and isalnum c = vect_item ctable (charcode c) >= 8
  and isnum c = vect_item ctable (charcode c) = 16 in
  isspace,isbra,issymb,isalnum,isnum;;

(* ------------------------------------------------------------------------- *)
(* Reserved words.                                                           *)
(* ------------------------------------------------------------------------- *)

let reserve_words,unreserve_words,is_reserved_word,reserved_words =
  let reswords = ref ["(";  ")"; "[";   "]";  "{";   "}";
                      ":";  ";";  "."; "=>";  "|";
                      "let"; "in"; "and"; "if"; "then"; "else"] in
  (fun ns  -> reswords := union (!reswords) ns),
  (fun ns  -> reswords := subtract (!reswords) ns),
  (fun n  -> mem n (!reswords)),
  (fun () -> !reswords);;

(* ------------------------------------------------------------------------- *)
(* Functions to access the global tables controlling special parse status.   *)
(*                                                                           *)
(*  o List of binders;                                                       *)
(*                                                                           *)
(*  o List of prefixes (right-associated unary functions like negation).     *)
(*                                                                           *)
(*  o List of infixes with their precedences and associations.               *)
(*                                                                           *)
(* Note that these tables are independent of constant/variable status or     *)
(* whether an identifier is symbolic.                                        *)
(*                                                                           *)
(* Lookup of dollared identifiers won't find anything (unless the user       *)
(* cussedly puts them in in their dollared form) which is as desired.        *)
(* ------------------------------------------------------------------------- *)

let unparse_as_binder,parse_as_binder,parses_as_binder,binders =
  let binder_list = ref ([]:string list) in
  (fun n  -> binder_list := subtract (!binder_list) [n]),
  (fun n  -> binder_list := union (!binder_list) [n]),
  (fun n  -> mem n (!binder_list)),
  (fun () -> !binder_list);;

let unparse_as_prefix,parse_as_prefix,is_prefix,prefixes =
  let prefix_list = ref ([]:string list) in
  (fun n  -> prefix_list := subtract (!prefix_list) [n]),
  (fun n  -> prefix_list := union (!prefix_list) [n]),
  (fun n  -> mem n (!prefix_list)),
  (fun () -> !prefix_list);;

let unparse_as_infix,parse_as_infix,get_infix_status,infixes =
  let cmp ((_,(x,_)),(_,(y,_))) = x < y in
  let infix_list = ref ([]:(string * (int * string)) list) in
  (fun n     -> infix_list := filter (prefix not o prefix = n o fst)
                                     (!infix_list)),
  (fun (n,d) -> infix_list := sort cmp
     ((n,d)::(filter (prefix not o prefix = n o fst)(!infix_list)))),
  (fun n     -> assoc n (!infix_list)),
  (fun ()    -> !infix_list);;

(* ------------------------------------------------------------------------- *)
(* Need to have this now for set enums, since "," isn't a reserved word.     *)
(* ------------------------------------------------------------------------- *)

parse_as_infix (",",(14,"right"));;

(* ------------------------------------------------------------------------- *)
(* Interface maps. Very similar to HOL88 -- someday should do better.        *)
(* Note that translation is done in the lexer, so affects types too.         *)
(* ------------------------------------------------------------------------- *)

let set_interface_map,interface_map,
    apply_interface_map,reverse_interface_map,
    augment_interface_map,reduce_interface_map =
  let cmap = ref ([]:(string * string)list) in
  let set_interface_map imap = cmap := imap
  and interface_map() = !cmap
  and apply_interface_map n = try assoc n (!cmap) with Failure _ -> n
  and reverse_interface_map n = try rev_assoc n (!cmap) with Failure _ -> n
  and augment_interface_map ps = cmap := union ps (!cmap)
  and reduce_interface_map ps = cmap :=  subtract (!cmap) ps in
  set_interface_map,interface_map,
  apply_interface_map,reverse_interface_map,
  augment_interface_map,reduce_interface_map;;

(* ------------------------------------------------------------------------- *)
(* Basic parser combinators.                                                 *)
(* ------------------------------------------------------------------------- *)

exception Noparse;;

let prefix || parser1 parser2 input =
  try parser1 input
  with Noparse -> parser2 input;;

let prefix ++ parser1 parser2 input =
  let result1,rest1 = parser1 input in
  let result2,rest2 = parser2 rest1 in
  (result1,result2),rest2;;

let rec many parser input =
  try let result,next = parser input in
      let results,rest = many parser next in
      (result::results),rest
  with Noparse -> [],input;;

let prefix >> parser treatment input =
  let result,rest = parser input in
  treatment(result),rest;;

let fix err parser input =
  try parser input
  with Noparse -> failwith (err ^ " expected");;

let rec listof parser sep err =
  parser ++ many (sep ++ fix err parser >> snd) >> (prefix ::);;

let nothing input = [],input;;

let elistof parser sep err =
  listof parser sep err || nothing;;

let rightbin parser sep cons err =
  listof parser sep err >> end_itlist cons;;

let leftbin parser sep cons err =
  listof parser sep err >> end_itlist (C cons) o rev;;

let possibly parser input =
  try let x,rest = parser input in [x],rest
  with Noparse -> [],input;;

let some p =
  fun [] -> raise Noparse
    | (h::t) -> if p h then (h,t) else raise Noparse;;

let a tok = some (fun item -> item = tok);;

let finished input =
  if input = [] then 0,input else failwith "Unparsed input";;

(* ------------------------------------------------------------------------- *)
(* The basic lexical classes: identifiers, strings and reserved words.       *)
(* ------------------------------------------------------------------------- *)

type lexcode = Ident of string
             | Resword of string;;

(* ------------------------------------------------------------------------- *)
(* Lexical analyzer. Apart from some special bracket symbols, each           *)
(* identifier is made up of the longest string of alphanumerics or           *)
(* the longest string of symbolics, possibly with a dollar in either case.   *)
(* ------------------------------------------------------------------------- *)

let lex =
  let collect (h,t) = end_itlist (prefix^) (h::t) in
  let dollar = fun (Ident n) -> Ident("$"^n) in
  let reserve =
    fun (Ident n as tok) ->
        if is_reserved_word n then Resword(n) else tok
      | t -> t in
  let rawident = (some isalnum ++ many (some isalnum)
               || some issymb ++ many (some issymb))
    >> (Ident o apply_interface_map o collect) in
  let ident = a "$" ++ rawident >> (dollar o snd) || rawident in
  let bracket = some isbra >> Ident in
  let rawtoken = bracket || ident in
  let token = rawtoken ++ many (some isspace) >> (reserve o fst) in
  fst o (many (some isspace) ++ many token ++ finished >> (snd o fst));;

(* ------------------------------------------------------------------------- *)
(* Parser for pretypes. Concrete syntax:                                     *)
(*                                                                           *)
(* TYPE        :: SUMTYPE -> TYPE                                            *)
(*              | SUMTYPE                                                    *)
(*                                                                           *)
(* SUMTYPE     :: PRODTYPE + SUMTYPE                                         *)
(*              | PRODTYPE                                                   *)
(*                                                                           *)
(* PRODTYPE    :: APPTYPE # PRODTYPE                                         *)
(*              | APPTYPE                                                    *)
(*                                                                           *)
(* APPTYPE     :: ATOMICTYPES type-constructor  [Provided arity matches]     *)
(*              | ATOMICTYPES                   [Provided only 1 ATOMICTYPE] *)
(*                                                                           *)
(* ATOMICTYPES :: type-constructor              [Provided arity zero]        *)
(*              | type-variable                                              *)
(*              | ( TYPE )                                                   *)
(*              | ( TYPE LIST )                                              *)
(*                                                                           *)
(* TYPELIST    :: TYPE , TYPELIST                                            *)
(*              | TYPE                                                       *)
(*                                                                           *)
(* Two features make this different from previous HOL type syntax:           *)
(*                                                                           *)
(*  o Any identifier not in use as a type constant will be parsed as a       *)
(*    type variable; a ' is not needed and a * is not allowed.               *)
(*                                                                           *)
(*  o Antiquotation is not yet supported.                                    *)
(* ------------------------------------------------------------------------- *)

let parse_pretype =
  let btyop n x y = Ptycon(n,[x;y])
  and mk_apptype =
    fun ([s],[]) -> s
      | (tys,[c]) -> Ptycon(c,tys)
      | _ -> failwith "Bad type construction"
  and type_atom input =
    match input with
      (Ident s)::rest ->
          (if try get_type_arity s = 0 with Failure _ -> false
           then Ptycon(s,[]) else Utv(s)),rest
    | _ -> raise Noparse
  and type_constructor input =
    match input with
      (Ident s)::rest -> if try get_type_arity s > 0 with Failure _ -> false
                         then s,rest else raise Noparse
    | _ -> raise Noparse in
  let rec pretype i = rightbin sumtype (a (Ident "->")) (btyop "fun") "type" i
  and sumtype i = rightbin prodtype (a (Ident "+")) (btyop "sum") "type" i
  and prodtype i = rightbin apptype (a (Ident "#")) (btyop "prod") "type" i
  and apptype i = (atomictypes ++ (type_constructor >> (fun x -> [x])
                                || nothing) >> mk_apptype) i
  and atomictypes i =
        (((a (Resword "(")) ++ typelist ++ a (Resword ")") >> (snd o fst))
      || type_atom >> (fun x -> [x])) i
  and typelist i = listof pretype (a (Ident ",")) "type" i in
  pretype;;

(* ------------------------------------------------------------------------- *)
(* Precedence parsing.                                                       *)
(* ------------------------------------------------------------------------- *)

let rec mk_precedence infixes parser input =
  match infixes with
    (s,(p,at))::rest ->
        (if at = "right" then rightbin else leftbin)
        (mk_precedence (tl infixes) parser)
        (a (Ident s))
        (fun x y -> Combp(Combp(Varp(s,Dpty),x),y))
        ("term after "^s)
        input
  | _ -> parser input;;

(* ------------------------------------------------------------------------- *)
(* Hook to allow installation of user parsers.                               *)
(* ------------------------------------------------------------------------- *)

let install_parser,delete_parser,installed_parsers,try_user_parser =
  let rec try_parsers ps i =
    if ps = [] then raise Noparse else
    try snd(hd ps) i with Noparse -> try_parsers (tl ps) i in
  let parser_list = ref [] in
  (fun dat -> parser_list := dat::(!parser_list)),
  (fun (key,_) -> try parser_list := snd (remove (fun (key',_) -> key = key')
                                                 (!parser_list))
                  with Failure _ -> ()),
  (fun () -> !parser_list),
  (fun i -> try_parsers (!parser_list) i);;

(* ------------------------------------------------------------------------- *)
(* Initial preterm parsing. This uses binder and precedence/associativity/   *)
(* prefix status to guide parsing and preterm construction, but treats all   *)
(* identifiers as variables.                                                 *)
(*                                                                           *)
(* PRETERM            :: APPL_PRETERM => PRETERM | PRETERM                   *)
(*                     | APPL_PRETERM                                        *)
(*                                                                           *)
(* APPL_PRETERM       :: APPL_PRETERM : type                                 *)
(*                     | APPL_PRETERM BINDER_PRETERM                         *)
(*                     | BINDER_PRETERM                                      *)
(*                                                                           *)
(* BINDER_PRETERM     :: binder VARSTRUCT_PRETERMS . PRETERM                 *)
(*                     | let PRETERM and ... and PRETERM in PRETERM          *)
(*                     | ATOMIC_PRETERM                                      *)
(*                                                                           *)
(* VARSTRUCT_PRETERMS :: TYPED_PRETERM VARSTRUCT_PRETERMS                    *)
(*                     | TYPED_PRETERM                                       *)
(*                                                                           *)
(* TYPED_PRETERM      :: TYPED_PRETERM : type                                *)
(*                     | ATOMIC_PRETERM                                      *)
(*                                                                           *)
(* ATOMIC_PRETERM     :: ( PRETERM )                                         *)
(*                     | [ PRETERM; .. ; PRETERM ]                           *)
(*                     | { PRETERM; .. ; PRETERM }                           *)
(*                     | { PRETERM | PRETERM }                               *)
(*                     | identifier                                          *)
(*                                                                           *)
(* Note that arbitrary preterms are allowed as varstructs. This allows       *)
(* more general forms of matching and considerably regularizes the syntax.   *)
(* ------------------------------------------------------------------------- *)

let parse_typed_apreterm,parse_preterm =
  let pretype = parse_pretype
  and singleton1 x = [x]
  and singleton2 (x,y) = [y]
  and lmk_cond =
    fun (b,[_,((l,_),r)]) ->
          Combp(Combp(Combp(Varp("COND",Dpty),b),l),r)
      | (b,_) -> b
  and lmk_ite (((((_,b),_),l),_),r) =
          Combp(Combp(Combp(Varp("COND",Dpty),b),l),r)
  and lmk_typed =
    fun (p,[]) -> p | (p,[ty]) -> Typing(p,ty) | _ -> fail()
  and lmk_let (((_,bnds),_),ptm) = pmk_let (bnds,ptm)
  and lmk_binder ((((s,h),t),_),p) = pmk_binder(s,h::t,p)
  and lmk_setenum(l,_) = pmk_set_enum l
  and lmk_setabs(((l,_),r),_) = pmk_setabs(l,r)
  and nullset _ = Varp("EMPTY",Dpty)
  and identifier =
    fun ((Ident s):: rest) ->
        if can get_infix_status s or is_prefix s or parses_as_binder s
        then raise Noparse else s,rest
      | _ -> raise Noparse
  and binder =
    fun ((Ident s):: rest) ->
        if parses_as_binder s then s,rest else raise Noparse
      | _ -> raise Noparse
  and pre_fix =
    fun ((Ident s):: rest) ->
        if is_prefix s then s,rest else raise Noparse
      | _ -> raise Noparse in
  let rec preterm i =
    leftbin nepreterm (a (Ident "=")) (curry pmk_eq) "term" i
  and nepreterm i =
    (binary_preterm ++
     possibly (a (Resword "=>") ++
               fix "term" (preterm ++ a (Resword "|") ++ preterm))
     >> lmk_cond
  || a (Resword "if") ++
     preterm ++
     a (Resword "then") ++
     preterm ++
     a (Resword "else") ++
     preterm
     >> lmk_ite) i
  and binary_preterm i =
    mk_precedence (tl (infixes())) typed_appl_preterm i
  and typed_appl_preterm i =
    (appl_preterm ++
     possibly (a (Resword ":") ++ pretype >> snd)
    >> lmk_typed) i
  and appl_preterm i =
    (pre_fix ++ appl_preterm
    >> (fun (x,y) -> Combp(Varp(x,Dpty),y))
  || binder_preterm ++ many binder_preterm >>
        (fun (h,t) -> itlist (fun x y -> Combp(y,x)) (rev t) h)) i
  and binder_preterm i =
    (a (Resword "let") ++
     leftbin (preterm >> singleton1) (a (Resword "and")) (prefix@) "binding" ++
     a (Resword "in") ++
     preterm
    >> lmk_let
  || binder ++
     typed_apreterm ++
     many typed_apreterm ++
     a (Resword ".") ++
     preterm
    >> lmk_binder
  || atomic_preterm) i
  and typed_apreterm i =
    (atomic_preterm ++
     possibly (a (Resword ":") ++ pretype >> snd)
    >> lmk_typed) i
  and atomic_preterm i =
    (try_user_parser
  || a (Resword "(") ++
     preterm ++
     a (Resword ")")
    >> (snd o fst)
  || a (Resword "[") ++
     elistof preterm (a (Resword ";")) "term" ++
     a (Resword "]")
    >> (pmk_list o snd o fst)
  || a (Resword "{") ++
     (elistof typed_apreterm (a (Ident ",")) "term" ++  a (Resword "}")
        >> lmk_setenum
   || preterm ++ a (Resword "|") ++ preterm ++ a (Resword "}")
        >> lmk_setabs)
    >> snd
  || identifier
    >> (fun s -> Varp(s,Dpty))) i in
  typed_apreterm,preterm;;

(* ------------------------------------------------------------------------- *)
(* Type and term parsers.                                                    *)
(* ------------------------------------------------------------------------- *)

let parse_type s =
  let pty,l = (parse_pretype o lex o explode) s in
  if l = [] then type_of_pretype pty
  else failwith "Unexpected junk after type";;

let parse_term s =
  let ptm,l = (parse_preterm o lex o explode) s in
  if l = [] then
   (term_of_preterm o retypecheck [] o undollar) ptm
  else failwith "Unexpected junk after term";;

(* ------------------------------------------------------------------------- *)
(* Short forms.                                                              *)
(* ------------------------------------------------------------------------- *)

let X = parse_term and Y = parse_type;;
