structure Parse_support = Parse_support;
structure Tokens = Tokens;

type pos = int;
val line:pos = 0;
type svalue = Tokens.svalue;
type ('a,'b) token = ('a,'b) Tokens.token;
type lexresult = (svalue,pos) Tokens.token;


fun error(s,_,_) = output(std_out,"HOL lexer error: "^s^"\n");

fun eof (_:Tokens.Parse_support.Preterm.Term.term list ref) = Tokens.EOF(line,line);
val type_paren_count = ref 0;
val comment_paren_count = ref 0;
val string_list = ref ([]:string list);
exception AQ_ERR of string;
exception LEX_ERR of string;

fun has_tilde s =
   let val tilde = ord "~"
       fun f i = ((ordof(s,i) = tilde) orelse f(i+1))
   in f 0 handle _ => false
   end;

(*
fun break s = snd
   (itlist (fn ch => fn (current_frag,seen) =>
             if (ch = "~")
             then if (null current_frag)
                  then ([],(ch::seen))
                  else ([],(ch::(implode current_frag)::seen))
             else ((ch::current_frag),seen))
           (""::explode s) ([],[]));
*)


(* Confusion warning: symbolic_ident means roughly 

       Maybe a $ followed by any sequence of symbols
*)
%%
%header (functor HOL_LEX(structure Tokens : hol_TOKENS
                         structure Parse_support : Parse_support_sig
                         sharing
                           Tokens.Parse_support = Parse_support));

%arg (lala : Parse_support.Preterm.Term.term list ref);
%s TYPE TYCOMMENT COMMENT STRING;
%reject
ident = [\$]?([A-Za-z0-9] [A-Za-z0-9_']*);
symbolic_ident_or_tilde = [\$]?([#\?\+\*\/\\\=\<\>&%@!:\,\;_\|\~-]+);
symbolic_ident = [\$]?([#\?\+\*\/\\\=\<\>&%@!:\,\;_\|-]+);
type_var_ident = ['] [A-Za-z][A-Za-z0-9_]*;
ws = [\ \t \010 \012];
%%
<INITIAL>\n => (continue());
<INITIAL>{ws}+ => (continue());
<INITIAL>"(*" => ( YYBEGIN COMMENT; comment_paren_count := 1; continue());
<INITIAL>"\"" => ( YYBEGIN STRING; string_list := [yytext]; continue());
<INITIAL>"." => (Tokens.dot(line,line));
<INITIAL>"(" => (Tokens.lparen(line,line));
<INITIAL>")" => (Tokens.rparen(line,line));
<INITIAL>"{" => (Tokens.lbrace(line,line));
<INITIAL>"}" => (Tokens.rbrace(line,line));
<INITIAL>"[" => (Tokens.lbracket(line,line));
<INITIAL>"]" => (Tokens.rbracket(line,line));

<INITIAL>"{|" => (Tokens.hoare_lbrace(line,line));
<INITIAL>"|}" => (Tokens.hoare_rbrace(line,line));
<INITIAL>"[|" => (Tokens.hoare_lbracket(line,line));
<INITIAL>"|]" => (Tokens.hoare_rbracket(line,line));

<INITIAL>"^" => (let val (L as ref (x::t)) = lala
                     val _ = L := t
                 in Tokens.aq(x,line,line)
                 end
                 handle _ => raise AQ_ERR "lexer.INITIAL");
<INITIAL>{ident} => (case yytext
         of 
     (* Hoare logic identifiers *)
            "skip" => Tokens.hoare_skip(line,line)
          | "abort" => Tokens.hoare_abort(line,line)
          | "if" => Tokens.hoare_if(line,line)
          | "then" => Tokens.hoare_then(line,line)
          | "else" => Tokens.hoare_else(line,line)
          | "fi" => Tokens.hoare_fi(line,line)
          | "assert" => Tokens.hoare_assert(line,line)
          | "invariant" => Tokens.hoare_invariant(line,line)
          | "variant" => Tokens.hoare_variant(line,line)
          | "while" => Tokens.hoare_while(line,line)
          | "do" => Tokens.hoare_do(line,line)
          | "done" => Tokens.hoare_done(line,line)
     (* end Hoare logic identifiers *)

          | "let" => Tokens.let_(line,line)
          | "in" => Tokens.in_(line,line)
          | "and" => Tokens.and_(line,line)
          | "of" => (case (!Globals.in_type_spec)
                       of NONE => raise LEX_ERR(quote "of"^" is a keyword.")
                        |(SOME"")=>raise LEX_ERR(quote "of"^" is a keyword.")
                        |(SOME _) => (YYBEGIN TYPE; type_paren_count := 0;
                                      Tokens.of_(line,line)))
          |    _ => if (Parse_support.is_binder yytext)
                    then Tokens.binder(yytext,line,line)
                    else Tokens.ident(yytext,line,line));

<INITIAL>{symbolic_ident_or_tilde} => 
   ( if ((has_tilde yytext) andalso (mem yytext (!Globals.tilde_symbols)))
     then if (Parse_support.is_binder yytext)
          then Tokens.binder(yytext,line,line)
          else Tokens.symbolic_ident(yytext,line,line)
     else REJECT());

<INITIAL>{symbolic_ident} => ( case yytext 
         of 
      (* Hoare logic symbolic identifiers *)
            ";;" => Tokens.hoare_command_sep(line,line)          
          | ":=" => Tokens.hoare_assign(line,line)          
      (* end Hoare logic symbolic identifiers *)

          | ";" => Tokens.semi_colon(line,line)
          | "=>" => Tokens.eq_gt(line,line)
          | "="  => Tokens.eq(line,line)
          | "|"  => Tokens.bar(line,line)
          | ":" => ((case(!Globals.in_type_spec)
                       of NONE => (YYBEGIN TYPE;  type_paren_count := 0)
                        | _ => ());
                    Tokens.colon(line,line))
          |   _  => if (Parse_support.is_binder yytext)
                    then Tokens.binder(yytext,line,line)
                    else Tokens.symbolic_ident(yytext,line,line));

<INITIAL>"$~" => (Tokens.ident("~",line,line));
<INITIAL>"~"  => (Tokens.ident("~",line,line));

<INITIAL>. => (raise LEX_ERR "INITIAL.catchall");

<TYPE>\n => (continue());
<TYPE>{ws}+ => (continue());
<TYPE>"(*" => ( YYBEGIN TYCOMMENT; comment_paren_count := 1; continue());
<TYPE>{type_var_ident} => (Tokens.type_var_ident(yytext,line,line));
<TYPE>{ident} => (case yytext
                    of 
                (* Hoare logic keywords *)
                       "skip" => Tokens.hoare_skip(line,line)
                     | "abort" => Tokens.hoare_abort(line,line)
                     | "if" => Tokens.hoare_if(line,line)
                     | "then" => Tokens.hoare_then(line,line)
                     | "else" => Tokens.hoare_else(line,line)
                     | "fi" => Tokens.hoare_fi(line,line)
                     | "assert" => Tokens.hoare_assert(line,line)
                     | "invariant" => Tokens.hoare_invariant(line,line)
                     | "variant" => Tokens.hoare_variant(line,line)
                     | "while" => Tokens.hoare_while(line,line)
                     | "do" => Tokens.hoare_do(line,line)
                     | "done" => Tokens.hoare_done(line,line)
                (* end Hoare logic keywords *)

                     | "let" => (YYBEGIN INITIAL; Tokens.let_(line,line))
                     | "in" => (YYBEGIN INITIAL; Tokens.in_(line,line))
                     | "and" => (YYBEGIN INITIAL; Tokens.and_(line,line))
                     |    _ => Tokens.type_ident(yytext,line,line));

<TYPE>"^" => (let val (L as ref (x::t)) = lala
                  val () = L := t
              in Tokens.aq(x,line,line)
              end
              handle _ => raise AQ_ERR "lexer.TYPE");

<TYPE>"->" => (Tokens.type_right_arrow(line,line));
<TYPE>"+" => (Tokens.type_plus(line,line));
<TYPE>"#" => (Tokens.type_hash(line,line));
<TYPE>"," => (if (!type_paren_count = 0)
              then (YYBEGIN INITIAL; Tokens.symbolic_ident(",",line,line))
              else Tokens.type_comma(line,line));

<TYPE>"."  =>  (YYBEGIN INITIAL; Tokens.dot(line,line));
<TYPE>"("  => (inc type_paren_count; Tokens.type_lparen(line,line));
<TYPE>")"  => (if (!type_paren_count = 0)
               then (YYBEGIN INITIAL; Tokens.rparen(line,line))
               else (dec type_paren_count; Tokens.type_rparen(line,line)));

<TYPE>"[" => (YYBEGIN INITIAL; Tokens.lbracket(line,line));
<TYPE>"]" => (YYBEGIN INITIAL; Tokens.rbracket(line,line));
<TYPE>"{" => (YYBEGIN INITIAL; Tokens.lbrace(line,line));
<TYPE>"}" => (YYBEGIN INITIAL; Tokens.rbrace(line,line));

<TYPE>"[|" => (YYBEGIN INITIAL; Tokens.hoare_lbracket(line,line));
<TYPE>"|]" => (YYBEGIN INITIAL; Tokens.hoare_rbracket(line,line));
<TYPE>"{|" => (YYBEGIN INITIAL; Tokens.hoare_lbrace(line,line));
<TYPE>"|}" => (YYBEGIN INITIAL; Tokens.hoare_rbrace(line,line));

<TYPE>"=>" => ((case (!Globals.in_type_spec)
                  of (SOME _) => ()
                   | NONE => YYBEGIN INITIAL);
               Tokens.eq_gt(line,line));

<TYPE>{symbolic_ident_or_tilde} => 
   ( if ((has_tilde yytext) andalso not(mem yytext (!Globals.tilde_symbols)))
     then( YYBEGIN INITIAL;
           if (Parse_support.is_binder yytext)
           then Tokens.binder(yytext,line,line)
           else Tokens.symbolic_ident(yytext,line,line))
     else REJECT());

<TYPE>{symbolic_ident} => 
        ( YYBEGIN INITIAL;
          case yytext 
            of 
      (* Hoare logic symbolic identifiers *)
               ";;" => Tokens.hoare_command_sep(line,line)          
             | ":=" => Tokens.hoare_assign(line,line)          
      (* end Hoare logic symbolic identifiers *)

             | ";"  => Tokens.semi_colon(line,line)
             | "=>" => Tokens.eq_gt(line,line)
             | "=" => Tokens.eq(line,line)
             | "|"  => Tokens.bar(line,line)
             |   _  => if (Parse_support.is_binder yytext)
                       then Tokens.binder(yytext,line,line)
                       else Tokens.symbolic_ident(yytext,line,line));
<TYPE>"$~" => (YYBEGIN INITIAL;Tokens.ident("~",line,line));
<TYPE>"~"  => (YYBEGIN INITIAL;Tokens.ident("~",line,line));

<TYPE>. => (raise LEX_ERR "TYPE.catchall");

<TYCOMMENT>"\n" => (continue());
<TYCOMMENT>"(*" => (inc comment_paren_count; continue());
<TYCOMMENT>"*)" => (dec comment_paren_count;
                    if (!comment_paren_count = 0)
                    then YYBEGIN TYPE
                    else ();
                    continue());
<TYCOMMENT>. => (continue());

<COMMENT>"\n" => (continue());
<COMMENT>"(*" => (inc comment_paren_count; continue());
<COMMENT>"*)" => (dec comment_paren_count;
                  if (!comment_paren_count = 0)
                  then YYBEGIN INITIAL
                  else ();
                  continue());
<COMMENT>. => (continue());

<STRING>"\n" => (string_list :=  yytext::(!string_list); continue());
<STRING>"\\\"" =>(string_list := yytext::(!string_list); continue());
<STRING>"\"" => (YYBEGIN INITIAL; 
              Tokens.string_(implode(rev(yytext::(!string_list))),line,line));
<STRING>. => (string_list :=  yytext::(!string_list); continue());
