(**************************************************************************)
(*         x86 Multiprocessor Machine Code Semantics: HOL sources         *)
(*                                                                        *)
(*                                                                        *)
(*  Susmit Sarkar (1), Peter Sewell (1), Francesco Zappa Nardelli (2),    *)
(*  Scott Owens (1), Thomas Braibant (2), Magnus Myreen (1),              *)
(*  Jade Alglave (2)                                                      *)
(*                                                                        *)
(*   (1) Computer Laboratory, University of Cambridge                     *)
(*   (2) Moscova project, INRIA Paris-Rocquencourt                        *)
(*                                                                        *)
(*    Copyright 2007-2008                                                 *)
(*                                                                        *)
(*  Redistribution and use in source and binary forms, with or without    *)
(*  modification, are permitted provided that the following conditions    *)
(*  are met:                                                              *)
(*                                                                        *)
(*  1. Redistributions of source code must retain the above copyright     *)
(*     notice, this list of conditions and the following disclaimer.      *)
(*  2. Redistributions in binary form must reproduce the above copyright  *)
(*     notice, this list of conditions and the following disclaimer in    *)
(*     the documentation and/or other materials provided with the         *)
(*     distribution.                                                      *)
(*  3. The names of the authors may not be used to endorse or promote     *)
(*     products derived from this software without specific prior         *)
(*     written permission.                                                *)
(*                                                                        *)
(*  THIS SOFTWARE IS PROVIDED BY THE AUTHORS ``AS IS'' AND ANY EXPRESS    *)
(*  OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED     *)
(*  WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE    *)
(*  ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS BE LIABLE FOR ANY       *)
(*  DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL    *)
(*  DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE     *)
(*  GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS         *)
(*  INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,          *)
(*  WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING             *)
(*  NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS    *)
(*  SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.          *)
(*                                                                        *)
(**************************************************************************)

(* *** types *)
exception Exit

type arg = 
  | A_eax
  | A_r32
  | A_r_m32
  | A_imm8
  | A_imm16
  | A_imm32
  | A_rel8
  | A_rel32

type instruction_skel = 
    { mnemonic : string;
      arg1 : arg option;
      arg2 : arg option }

type instruction_set = instruction_skel list

type init_type = 
  | Reg of string
  | Loc of string

type arg_instance =
    { arg_segment : (string*string*int*string*int) list;   (* base, size, factor, index, constant *)
      arg_init : (init_type*string) list;
      arg_code : string }

type instruction_instance =
    { segment : (string*string*int*string*int) list;
      init : (init_type*string) list;
      pre : string;
      code : string;
      post : string
    }

(* statistics *)
type stat_arg =
  { s_const : int ref;
    s_reg : int ref;
    s_mem_abs : int ref;
    s_mem_ind : int ref }

let inc_s_const s   = s.s_const := !(s.s_const)+1
let inc_s_reg s     = s.s_reg := !(s.s_reg)+1
let inc_s_mem_abs s = s.s_mem_abs := !(s.s_mem_abs)+1
let inc_s_mem_ind s = s.s_mem_ind := !(s.s_mem_ind)+1


type stat =
    { s_instruction : string;
      s_no_ok : int ref;
      s_no_fail : int ref }

type stats = 
    { s_instr_stat : stat list;
      s_arg_stat : stat_arg }

(* *** options *)
let hol_sem_dir = ref "../tarball-spec_public"
let x86_decoder = ref None 
let debug_f = ref false
let no_asm_f = ref false
let no_test_f = ref false
let report = ref "report"
let repeat_tests = ref 1

(* *** state, used to build the statistic *)
let current_instruction_skel = ref ""
let current_instruction_arg = { s_const = ref 0; s_reg = ref 0; s_mem_abs = ref 0; s_mem_ind = ref 0; }

(* *** aux functions *)
let debug s =
  if !debug_f then print_endline ("x86sem - debug: "^s)

let string_option s =
  match s with Some s -> s | None -> ""

let internal s =
  print_endline ("x86sem - internal: "^s)

let warning s = ()
(*  if !warning_f then print_endline ("x86sem - warning: "^s) *)

let error s = 
  print_endline ("x86sem - error: "^s); 
  raise Exit

let exec c =
 match Unix.system c with
 | Unix.WEXITED 0 -> ()
 | _ -> error ("command "^c^" failed.")

let rec option_map f l =
  match l with
  | [] -> []
  | h::t -> 
      let r = f h in
      ( match r with
      | None -> option_map f t
      | Some v -> v :: (option_map f t) )

let hex_to_int h =
  Scanf.sscanf h "%x" (fun i -> i)

let int_to_hex i =
  Printf.sprintf "%X" i

let rnd_no =
  Random.self_init ();
  fun n -> Random.int n

let pp_arg_option ao =
  match ao with
  | Some A_eax   -> "EAX"
  | Some A_r32   -> "r32" 
  | Some A_r_m32 -> "r/m32"
  | Some A_imm32 -> "imm32"
  | Some A_imm16 -> "imm16"
  | Some A_imm8  -> "imm8" 
  | Some A_rel8  -> "rel8"
  | Some A_rel32 -> "rel32"
  | None         -> ""

(* *** detect Holmake executable *)

let holmake_dir = 
  try
    let ch = Unix.open_process_in "which Holmake" in
    let holmake_guess = Filename.dirname (input_line ch) in
    ignore (Unix.close_process_in ch);
    holmake_guess 
  with End_of_file -> 
    error "Holmake must be in the PATH"

(* *** parsing *)

let parse_arg s =
  match s with
  | "EAX"    -> Some A_eax
  | "r32"    -> Some A_r32
  | "r/m32"  -> Some A_r_m32
  | "imm8"   -> Some A_imm8
  | "imm16"  -> Some A_imm16
  | "imm32"  -> Some A_imm32
  | "rel8"   -> Some A_rel8
  | "rel32"   -> Some A_rel32
  | _ -> debug ("warning: cannot parse argument: "^s); None

let parse_instruction_set () = 
  try
    let ch = open_in 
	( match !x86_decoder with
	| Some s -> s
	| None -> Filename.concat !hol_sem_dir "x86_decoderScript.sml" ) in
    (* first skip plenty of lines *)
    let line_no = ref 1 in
    let regexp = Str.regexp_string "val x86_syntax_list = `` [" in
    let rec skip () =
      let l = input_line ch in
      line_no := !line_no + 1;
      if Str.string_match regexp l 0 then () else skip () in
    skip ();
    (* debug ("skipped until line: "^(string_of_int !line_no)); *)
    let rec parse_instr_def instr_set =
      let l = input_line ch in
      line_no := !line_no + 1;
      if String.contains l ']'
      then instr_set
      else (
	try 
	  let pos_start = (String.index l '|') + 2 in
	  let pos_end = (String.rindex l '"')  in 
          let instr_string = String.sub l pos_start (pos_end-pos_start) in
	  let mne,arg_string =  (* parse mnemonics *)
	    let mne_end = String.index instr_string ' ' in
	    ( String.sub instr_string 0 mne_end,
	      String.sub instr_string (mne_end+1) ((String.length instr_string)-mne_end-1) ) in

  	  let arg1, arg_string = (* parse first arg *)
	    try
	      let arg1_end = String.index arg_string ',' in
	      ( parse_arg (String.sub arg_string 0 arg1_end),
		let n_end = (* skip at most one space after the comma *)
		  match String.get arg_string (arg1_end+1) with 
		  | ' ' -> arg1_end + 2
		  | _ -> arg1_end + 1 in
		String.sub arg_string n_end ((String.length arg_string)-n_end) ) 
	    with Not_found -> (None, arg_string) in

	  let arg2 = (* parse second arg *)
	    let arg2_end = 
	      try String.index arg_string ' ' 
	      with Not_found -> String.length arg_string in
	    if arg2_end = 0 
	    then None
	    else parse_arg (String.sub arg_string 0 arg2_end) in
  
	  let arg1, arg2 = (* shift left args: ugly, sorry *)
	    ( match arg1, arg2 with
	    | None, None | Some _ , Some _ -> arg1, arg2
	    | None, Some _ -> arg2, arg1
	    | Some _ , None -> internal "never happen in parse_instr_set"; arg1, arg2 ) in

(* 	  let arg1_debug = pp_arg_option arg1 in *)
(* 	  let arg2_debug = pp_arg_option arg2 in *)
(* 	  debug ((string_of_int (!line_no-1))^": "^mne^" "^arg1_debug^" "^arg2_debug); *)

          parse_instr_def 
	    ( { mnemonic = mne; arg1 = arg1; arg2 = arg2 } :: instr_set )
        with Not_found -> 
	  ( debug ("warning: skipped decoder line "^(string_of_int (!line_no - 1))^"."); 
	    parse_instr_def instr_set ) )

    in
    let instr_set = parse_instr_def [] in
    let no_instr = List.length instr_set in
    debug ("parsed "^(string_of_int no_instr)^" instructions from x86_decoder.");
    (instr_set, no_instr)
    
  with Sys_error s -> error ("Cannot find the HOL x86 infrastructure\n   "^s)

let mnemonic_of_instruction instruction =
  try String.sub instruction.code 0 (String.index instruction.code ' ')
  with Not_found -> instruction.code

let extract_instruction_hex instruction =
  let ch = open_in "fztest.txt" in
  let regexp = Str.regexp_string "<x86sem>:" in
  let rec skip () =
    let l = input_line ch in
    try let _ = Str.search_forward regexp l 0 in ()
    with Not_found -> skip () in
  skip ();
  let line1 = 
    let l = input_line ch in
    let ib = (String.index l ':') + 1 in
    let ie = 
      let mne = String.lowercase (mnemonic_of_instruction instruction) in
      let _ = Str.search_forward (Str.regexp_string mne) l 0 in
      Str.match_beginning () in
    let hex = String.sub l ib (ie-ib) in
    String.uppercase (String.concat "" (Str.split (Str.regexp "[ \t]+") hex)) in
  let line2 =
    try 
      let l = input_line ch in
      let ib = (String.index l ':') + 1 in
      let parse = 
	try let _ = String.get l 40 in false
	with Not_found -> true in
      if parse 
      then     
	let hex = String.sub l ib ((String.length l)-ib) in
	String.uppercase (String.concat "" (Str.split (Str.regexp "[ \t]+") hex))
      else "" 
    with Not_found -> "" in
  line1 ^ line2

(* *** pp *)
let pp_init l =
  String.concat "\n        " 
    (List.map 
       (fun (x,y) -> 
	 ( match x with
	 | Reg r -> "MOV "^r^", "^y
	 | Loc l -> "MOV "^l^", "^y ) )
       l)

let pp_segment s = 
  String.concat "\n" (List.map (fun (l,size,_,_,_) -> l^" resd "^size) s)

(* *** build instructions *)
let available_registers : string list ref = ref []

let flags = [ "CF"; "PF"; "AF"; "ZF"; "SF"; "OF" ]

let init_available_registers () =
  available_registers := [ "EAX"; "ECX"; "EDX"; "EBX"; "ESP"; "EBP"; "ESI"; "EDI" ]

let available_index_registers () = 
  List.filter (fun x -> not ((String.compare x "ESP") = 0)) !available_registers
  
let update_available_registers r =
  available_registers := List.filter (fun x -> not ((String.compare x r) = 0)) !available_registers
  
let build_imm8 () =
  Printf.sprintf "0x%X" (Random.int (1 lsl 8))

let build_imm16 () =
  Printf.sprintf "0x%X" (Random.int (1 lsl 16))

let build_imm32 () =
  (* attempts to generate more often corner cases *)
  let rd = Printf.sprintf "0x%lX" (Random.int32 (Int32.max_int)) in
  let imm32_list = [ "0x0"; "0x1"; "0x2"; rd; "0xFFFFFFFE"; "0xFFFFFFFF" ] in
  let imm32_no = List.length imm32_list in
  List.nth imm32_list (rnd_no imm32_no)

let build_reg r32 =
  let no_r32 = List.length r32 in
  let r = List.nth r32 (rnd_no no_r32) in
  update_available_registers r; 
  r

let build_base_reg () = 
  build_reg (!available_registers)

let build_index_reg () =
  build_reg (available_index_registers ())

let build_r32 o =
  let reg =
    match o with
    | None -> build_base_reg ()
    | Some r -> update_available_registers r; r in
  let init_value = build_imm32 () in
  let init = (Reg reg,init_value) in
  { arg_segment = []; arg_init = [ init ]; arg_code = "DWORD "^reg }

let build_m32 =
  let index = ref (-1) in 
  fun () ->
    (* build a label, put it in the bss segment *)
    ( index := !index + 1;
      let base_address = "loc"^(string_of_int !index) in
      let init_value = build_imm32 () in

      let (address, size, init, fact, offset, const) = 
	if Random.bool ()
	then ( (* direct addressing mode *)
	  inc_s_mem_abs current_instruction_arg; (* stat *)
	  "DWORD ["^base_address^"]", "1", [ (Loc ("DWORD ["^base_address^"]"), init_value) ], 1, "0", 0 ) 
	else ( (* indirect addressing mode *)
	  inc_s_mem_ind current_instruction_arg; (* stat *)
	  let br = build_base_reg () in
	  let ir = build_index_reg () in 
	  let fact, fact_s = 
	    let allowed_fact = [ 1; 1; 2; 4; 8 ] in (* generate 1 more often *)
	    let candidate = List.nth allowed_fact (rnd_no (List.length allowed_fact)) in
	    if candidate = 1 then (1,"") else (candidate,(string_of_int candidate)^"*") in
	  let const, const_s = 
	    if Random.bool () 
	    then (0, "")
	    else let c = rnd_no 40 in (c, "+"^(string_of_int c)) in (* SHOULD GENERATE NEGATIVE CONSTANTS *)
	  let address = "DWORD ["^br^" + "^fact_s^ir^const_s^"]" in
	  let init_base_reg = (Reg br, base_address) in
	  let size = Random.int (1 lsl 8) in
	  let content_index_reg = string_of_int (Random.int ((size - 4 - const) / fact)) in 
	  let init_index_reg = (Reg ir, content_index_reg) in
	  let init_mem = (Loc address, init_value) in
	  ( address, string_of_int size, [ init_base_reg; init_index_reg; init_mem ], fact, content_index_reg, const) 
	 ) in

      { arg_segment = [ (base_address,size,fact,offset,const) ]; arg_init = init; arg_code = address } )

let build_arg arg_skel =
  match arg_skel with
  | A_eax -> 
      inc_s_reg current_instruction_arg;
      build_r32 (Some "EAX")
  | A_r32 -> 
      inc_s_reg current_instruction_arg;
      build_r32 None
  | A_r_m32 -> 
      if Random.bool () 
      then (
	inc_s_reg current_instruction_arg;
	build_r32 None )
      else build_m32 ()
  | A_imm8  -> 
      inc_s_const current_instruction_arg;
      { arg_segment = []; arg_init = []; arg_code = "BYTE "^(build_imm8()) }
  | A_imm16 -> 
      inc_s_const current_instruction_arg;
      { arg_segment = []; arg_init = []; arg_code = "WORD "^(build_imm16()) }
  | A_imm32 -> 
      inc_s_const current_instruction_arg;
      { arg_segment = []; arg_init = []; arg_code = "DWORD "^(build_imm32()) }
  | A_rel8
  | A_rel32 -> 
      { arg_segment = []; arg_init = []; arg_code = "*rel*" }

let current_instruction_init skel =
  current_instruction_skel := skel.mnemonic ^" "^(pp_arg_option skel.arg1)^" "^(pp_arg_option skel.arg2);
  current_instruction_arg.s_const := 0; 
  current_instruction_arg.s_reg := 0; 
  current_instruction_arg.s_mem_abs := 0; 
  current_instruction_arg.s_mem_ind := 0
  
let build_instr skel =
  current_instruction_init skel;
  let pre_instr = 
    match skel.arg1, skel.arg2 with
    | None, None -> 
	{ segment = []; init = []; pre = ""; code = skel.mnemonic; post = "" }
    | Some arg1, None      -> 
	let ai1 = build_arg arg1 in
	{ segment = ai1.arg_segment;
	  init = ai1.arg_init;
	  pre = "";
	  code = skel.mnemonic^" "^ai1.arg_code;
	  post = "" }
    | Some arg1, Some arg2 -> 
	let ai1,ai2 = build_arg arg1, build_arg arg2 in
	{ segment = ai1.arg_segment@ai2.arg_segment;
	  init = ai1.arg_init@ai2.arg_init;
	  pre = "";
	  code = skel.mnemonic^" "^ai1.arg_code^", "^ai2.arg_code;
	  post = "" }
    | None, Some _ -> 
	internal ("never happen in build_instr"); 
	{ segment = [ ("EE","0",0,"0",0) ]; init = [ Reg "EE","EE" ]; pre = ""; code = "EE"; post = "" }
  in
  match skel.mnemonic with        (* some instructions require special care *)
  | "POP" -> 
      { pre_instr with pre = ("PUSH "^(if Random.bool () then (build_r32 None).arg_code else build_imm32 ())^"\n") }
  | "PUSH" -> 
      { pre_instr with post = "POP eax" }
  | "JMP" | "JE" | "JNE" ->
      { pre_instr with code = skel.mnemonic ^" jmp_lbl\n" }
  | "LOOP" | "LOOPE" | "LOOPNE" -> 
      update_available_registers "ECX";
      { pre_instr with 
	init = (Reg "ECX", build_imm32 ()) :: pre_instr.init;
	code = skel.mnemonic ^" jmp_lbl\n" }
  | "CALL" ->
      { pre_instr with 
	post = "POP eax";
	code = skel.mnemonic ^" jmp_lbl\n" }
  | "RET" ->
      { pre_instr with
	pre = "lea eax, [ret_label]\n      push dword eax" }
  | _ -> debug "NONE"; pre_instr

let build_rnd_instr instr_set no_instr =
  init_available_registers ();
  let instr_skel = List.nth instr_set (rnd_no no_instr) in
  debug ( "generating: "^instr_skel.mnemonic^" "
	  ^(pp_arg_option instr_skel.arg1) ^" "
	  ^(pp_arg_option instr_skel.arg2) );

  let instr = build_instr instr_skel in
  debug ( "\nsegment: "
	  ^(pp_segment instr.segment)
	  ^"\ninit   : "^(pp_init instr.init)
	  ^"\ncode   : "^instr.code );
  instr

(* *** more pp *)
let pp_dump_mem s =     
  (String.concat "\n" 
     (List.map 
	(fun (l,sz,_,_,_) -> 
	  "        dump_mem 1, "^l^", "
	  ^(string_of_int ((int_of_string (sz) / 4) +1)))
	s))^"\n"
  
let pp_randomize_flags () =
  let flag_bits = 
    [ ("0x00000001","0xFFFFFFFE");  (* CF *)
      ("0x00000004","0xFFFFFFFB");  (* PF *)
      ("0x00000010","0xFFFFFFEF");  (* AF *)
      ("0x00000040","0xFFFFFF7F");  (* ZF *)
      ("0x00000080","0xFFFFFEFF");  (* SF *)
      ("0x00000800","0xFFFFEFFF");  (* OF *)
    ] in
  let flag_set = 
    String.concat "\n"
      (List.map 
	 (fun fb -> 
	   if Random.bool () 
	   then "        or  eax, "^(fst fb)
	   else "        and eax, "^(snd fb))
	 flag_bits) in
  "pushf                     ; randomize flags
        pop eax
"^flag_set^"
        push eax
        popf

"

let stack_size = ref 0

let pp_randomize_stack () =
  if !stack_size = 0 then ""
  else (
    (* pushes a few values on the stack (at most 10) *)
    let n = ref(rnd_no 10) in
    stack_size := !stack_size + !n;
    debug ("***"^(string_of_int !stack_size));
    let buf = ref "" in
    while !n > 0 do
      buf := "        push "^(build_imm32 ())^"\n"^ !buf;
      n := !n - 1
    done;
    !buf )

let pp_revert_stack () =
  if !stack_size = 0 then ""
  else (
    let buf = ref "" in
    while !stack_size > 0 do
      buf := "        pop eax\n"^ !buf;
      stack_size := !stack_size - 1
    done;
    !buf )
  
let pp_dump_stack instr =
  match (mnemonic_of_instruction instr) with
  | "PUSH" | "POP" | "CALL" | "RET" -> "        dump_stack 1,9,-7\n"
  | _ -> ""

(* for POP and RET we move the stack pointer so that the old content
of the stack is not override by the calls to the i/o functions. *)

let pp_stack_push instr =
  match mnemonic_of_instruction instr with
  | "POP" | "RET" -> "ret_label: lea esp, [esp - 4]\n"
  | _ -> ""

let pp_stack_pop instr =
  match mnemonic_of_instruction instr with
  | "POP" | "RET" -> "        lea esp, [esp + 4]\n"
  | _ -> ""

(* *** skeleton *)
let skeleton_end instr rev_stack = 
  (pp_stack_push instr) 
  ^"        dump_regs 2
"^(pp_dump_mem instr.segment) ^"
        mov eax, x86sem
        dump_regs 3
"^(pp_dump_stack instr) ^"
"^(rev_stack)^"
"^instr.post^"
"^(pp_stack_pop instr) 

let pp_nop instr =
  match (mnemonic_of_instruction instr) with
  | "JMP" | "JE" | "JNE" | "LOOP" | "LOOPE" | "LOOPNE" | "CALL" -> 
      let rec nops n = 
	if n = 0 then "" else "        nop\n"^(nops (n-1)) in
      "        jmp end\n" ^(nops (rnd_no 30))^
      "\njmp_lbl:\n"^(skeleton_end instr "")
  | _ -> ""

let skeleton instr =
  let rnd_stack = pp_randomize_stack () in
  let rev_stack = pp_revert_stack () in

"
%include \"asm_io.inc\"

segment .data
        
segment .bss
"^(pp_segment instr.segment)^"

segment .text
        global  asm_main
        
asm_main:
        enter   0,0               ; setup routine
        pusha
"^(rnd_stack)^"

        "^(pp_randomize_flags ())^"
        "^(pp_init instr.init)^"
        "^instr.pre^" 

        dump_regs 1
"^(pp_dump_mem instr.segment) ^"
"^(pp_dump_stack instr) ^"
x86sem: "^instr.code^"
        
"^(skeleton_end instr rev_stack)^" 
"^(pp_nop instr) ^"

end:    popa
        mov     eax, 0            ; return back to C
        leave                     
        ret
"

(* *** parsing the output of fztest *)
let save_register_dump ch = 
  try
    let _ = input_line ch in 
    let l1 = input_line ch in
    let l2 = input_line ch in
    let l3 = input_line ch in
    l1^" "^l2^" "^l3
  with End_of_file -> ""

let save_memory_dumps ch instruction =
  try
    List.map      
      ( fun (a,sz,f,b,c) -> 
	let address = 
	  let l = input_line ch in
	  let _ = Str.search_forward (Str.regexp "Address = \\([^ ]*\\)") l 0 in
	  let addr_hex = (Str.matched_group 1 l) in
	  debug ("smd- address: " ^addr_hex);
	  (hex_to_int addr_hex) in
	
	let l1 = input_line ch in
	let base = 
	  let base_hex = String.sub l1 0 8 in
	  debug ("smd - base: "^base_hex);
	  (hex_to_int base_hex) in 
	debug ("l1: "^l1); 
	let buffer = ref (String.sub l1 9 47) in
	for i = 1 to (((int_of_string (sz) / 4) +1)) do
	  let l = input_line ch in
 	  debug ("l : "^l); 
	  buffer := !buffer ^ " " ^ (String.sub l 9 47)
	done;

(*	debug (!buffer); *)

	((a,sz,f,b,c), address, base, !buffer)
       ) instruction.segment 
  with End_of_file -> []

let save_stack_dumps ch instruction =
  match (mnemonic_of_instruction instruction) with
  | "PUSH" | "POP" | "CALL" | "RET" -> 
      let _ = input_line ch in  (* skip two lines *)
      let _ = input_line ch in
      let l1 = input_line ch in
      let l2 = input_line ch in
      let l3 = input_line ch in
      debug l1;
      debug l2;
      debug l3;
      debug (String.sub l3 6 8);  
      let addr = Scanf.sscanf (String.sub l3 6 8) "%lx" (fun i -> i)  in
      let bits = (String.sub l1 16 8) ^ (String.sub l2 16 8) ^ (String.sub l3 16 8) in
      debug bits;
      [ (addr, bits) ]
  | _ -> []
      
(* *** report *)
let report_memory memory =
  List.map 
    (fun ((_,_,f,ofs,c),addr,base,bits) ->
      let ofs = int_of_string ofs in
      let n = addr + (f*ofs+c) - base in
      let rec cut b i =
	if i = -1 then String.concat ", " b
	else (
	  let v = String.sub bits ((n+i)*3) 2 in
	  let s = "(\"" ^ (int_to_hex (addr+(f*ofs+c)+i)) ^"\",\""^v^"\")" in
	  cut (s::b) (i-1) ) in
      cut [] 3)
    memory

let report_stack stack =
  List.map 
    (fun (addr,bits) ->
      let rec cut b i =
	if i = -1 then String.concat ", " b
	else (
	  let v = String.sub bits (i*2) 2 in
	  let naddr = Printf.sprintf "%lX" (Int32.add addr (Int32.of_int (11-i))) in
	  let s = "(\"" ^ naddr ^"\",\""^v^"\")" in
	  cut (s::b) (i-1) ) in
      cut [] 11)
    stack

let hex_sum h i =
  Scanf.sscanf h "%lx" (fun j -> Printf.sprintf "%lX" (Int32.add j (Int32.of_int i)))

let report_flags registers =
  let s = Str.last_chars registers 21 in
  let flags_set = Str.split (Str.regexp "[ \t]+") s in
  let flags_unset = List.filter (fun f -> not(List.mem f flags_set)) flags in
  (List.map (fun f -> "(\""^f^"\",\"T\")") flags_set)
  @ (List.map (fun f -> "(\""^f^"\",\"F\")") flags_unset)

type rr_register = Rr_initial | Rr_final

let report_register instruction r registers rr =
  try 
    let regexp_string = r^" = \\([^ ]*\\)" in
    let regexp = Str.regexp regexp_string in
    let _ = Str.search_forward regexp registers 0 in
    let v = Str.matched_group 1 registers in
    ( match rr, (mnemonic_of_instruction instruction), r with    (* special treatment for POP and RET *)
    | Rr_final, "POP", "EIP" -> hex_sum v (-4)
    | Rr_final, "POP", "ESP" -> hex_sum v 4
    | Rr_final, "RET", "EIP" -> hex_sum v (-4)
    | Rr_final, "RET", "ESP" -> hex_sum v 4
    | _, _,_ -> v )
  with End_of_file -> (debug ("warning: cannot detect the final state of "^r); "")

let report_registers instruction registers rr =
  let registers_initialised =
    List.map (fun (x,_) -> x) instruction.init in
  let registers_extra =
    match mnemonic_of_instruction instruction with        (* some instructions require special care *)
    | "POP" | "PUSH" | "CALL" | "RET" -> 
	(if List.mem (Reg "ESP") registers_initialised then [] else [ (Reg "ESP") ])
	@ (if List.mem (Reg "EBP") registers_initialised then [] else [ (Reg "EBP") ])
    | "CMPXCHG" -> 
	if List.mem (Reg "EAX") registers_initialised then [] else [ (Reg "EAX") ]
    | _ -> [] in
  option_map 
    (fun x ->
      (match x with
      | Reg r -> Some ("(\""^r^"\",\""^(report_register instruction r registers rr)^"\")")
      | Loc l -> None  ))
    (registers_initialised @ registers_extra)
    
let report_init instr initial_registers initial_memory initial_stack eip_registers =
  String.concat ", " 
    (( report_registers instr initial_registers Rr_initial)
     @ ( report_memory initial_memory )
     @ ( report_stack initial_stack )
     @ ( report_flags initial_registers ) 
     @ ["(\"EIP\",\"" ^ report_register instr "EAX" eip_registers Rr_initial ^"\")"] )

let report_final instr final_registers final_memory final_stack =
  String.concat ", "
    (( report_registers instr final_registers Rr_final)
     @ (report_memory final_memory ) 
     @ (report_stack final_stack ) 
     @ (report_flags final_registers )
     @ ["(\"EIP\",\"" ^ report_register instr "EIP" final_registers Rr_final ^"\")"] )

(* *** all together: given an instruction, build the test, run it, and report *)
let run_test instruction = 
  let asm_ch = open_out "fztest.asm" in 
  output_string asm_ch (skeleton instruction);
  close_out asm_ch;
  if !no_asm_f then exit 0;
  (* compile and link *)
  if not (Sys.file_exists "driver.o") then exec "gcc -c driver.c";
  if not (Sys.file_exists "asm_io.o") then exec "nasm -f elf -D ELF_TYPE asm_io.asm";
  exec "nasm -f elf fztest.asm";
  exec "gcc -o fztest driver.o fztest.o asm_io.o";
  exec "objdump -d fztest > fztest.txt";
  let instruction_hex = extract_instruction_hex instruction in
  (* execute and parse stdout *)
  let out_ch = Unix.open_process_in "./fztest" in
  (* save the registers before instr *)
  let initial_registers = save_register_dump out_ch in
  debug ("initial_registers: "^initial_registers);
  (* save the initial memory dump *)
  let initial_memory = save_memory_dumps out_ch instruction in
  (* save the initial stack (if needed) *)
  let initial_stack = save_stack_dumps out_ch instruction in
  (* save the registers after the instruction being tested  *)
  let final_registers = save_register_dump out_ch in
  debug ("final_registers: "^final_registers);
  (* save the memory dumps *)
  let final_memory = save_memory_dumps out_ch instruction in
  (* save the EIP of the instruction being tested (in eax) *)
  let eip_registers = save_register_dump out_ch in
  debug ("eip_in_eax: "^eip_registers);
  (* save the final stack (if needed) *)
  let final_stack = save_stack_dumps out_ch instruction in
  (* complete the execution and generate the report *)
  match Unix.close_process_in out_ch with
  | Unix.WEXITED 0 ->
      (* generate the report *)
      "(* " ^ instruction.code ^ " *)\n"
      ^ "val _ = x86_test \""^instruction_hex^"\"\n"
      ^ "   [" 
      ^ report_init instruction initial_registers initial_memory initial_stack eip_registers
      ^ "]\n"
      ^ "   [" 
      ^ report_final instruction final_registers final_memory final_stack
      ^ "];\n"
  | _ -> 
      error "*** fztest did not exit with return value 0."

(* *** generate x86_testScript.sml and prove it *)
let testScript_skeleton s =
  "open HolKernel boolLib bossLib Parse x86_Lib; val _ = new_theory \"x86_auto_test\";\n\n"
  ^ s
  ^ "\n\nval _ = export_theory ();\n"

let build_x86_testScript instr_set no_instr =
  let rec build_hol_code () = 
    try 
      let instruction = build_rnd_instr instr_set no_instr in
      run_test instruction 
    with e -> 
      if (!no_test_f || !no_asm_f)
      then raise e 
      else (
	exec ("cat fztest.asm >> "^ !report^"_invalid.txt"); 
	build_hol_code ()) in
  
  if !no_test_f
  then (
    print_endline (build_hol_code ()); exit 0)
  else (
    let ch = open_out (Filename.concat !hol_sem_dir "x86_auto_testScript.sml") in
    let hol_test = build_hol_code () in
    output_string ch (testScript_skeleton hol_test);
    close_out ch;
    let current_dir = Unix.getcwd () in
    Unix.chdir !hol_sem_dir;
    List.iter (fun s -> if (Sys.file_exists s) then Sys.remove s) 
      [ "x86_auto_testTheory.sig"; "x86_auto_testTheory.sml"; "x86_auto_testTheory.ui"; "x86_auto_testTheory.uo" ];
    let hol_ch = Unix.open_process_in ((Filename.concat holmake_dir "Holmake")^" x86_auto_testTheory.uo") in
    let lines = ref [] in
    ( try 
      while true do
	lines := input_line hol_ch :: !lines
      done
    with End_of_file -> ignore (Unix.close_process_in hol_ch));
    let report_text = hol_test ^ (String.concat "\n" (List.rev !lines)) in
    Unix.chdir current_dir;
   
   let dump report_file report_text =
     let ch = Unix.out_channel_of_descr (Unix.openfile report_file
					   [Unix.O_WRONLY; Unix.O_APPEND; Unix.O_CREAT]  0o640) in
     output_string ch "\n\n*** *** *** *** *** *** *** *** *** *** *** ***\n\n";
     output_string ch report_text;
     close_out ch in
   
   let success = 
     try ignore(Str.search_forward (Str.regexp_string "Test failed") report_text 0); false
     with Not_found -> (
       try ignore (Str.search_forward (Str.regexp_string "unsolved goal") report_text 0); false
       with Not_found -> ( 
	 try ignore(Str.search_forward (Str.regexp_string "Test successful") report_text 0); true
	 with Not_found -> ( 
	   internal ("internal: malformed report from hol\n\n"^report_text); 
	   exit 1 ))) in

   (* success/failure reporting *)
   if success 
   then dump (!report^"_ok.txt") report_text
   else dump (!report^"_fail.txt") report_text;

   (* stats *)
   let statistics = 
     try 
       let ch = open_in (!report^"_stat.bin") in
       let s = (Marshal.from_channel ch : stats) in
       close_in ch;
       s
     with _ -> 
       { s_instr_stat = [];
         s_arg_stat = { s_const = ref 0; s_reg = ref 0; s_mem_abs = ref 0; s_mem_ind = ref 0 } } in

   let new_instr_statistics =    (* UGLY UGLY UGLY REWRITE REWRITE REWRITE *)
     try
       let s = List.find (fun r -> (String.compare r.s_instruction !current_instruction_skel) = 0) statistics.s_instr_stat in
       if success 
       then s.s_no_ok := !(s.s_no_ok) + 1
       else s.s_no_fail := !(s.s_no_fail) + 1;
       statistics.s_instr_stat
     with Not_found -> 
       if success 
       then { s_instruction = !current_instruction_skel; s_no_ok = ref 1; s_no_fail = ref 0 } :: statistics.s_instr_stat
       else { s_instruction = !current_instruction_skel; s_no_ok = ref 0; s_no_fail = ref 1 } :: statistics.s_instr_stat
   in

   let new_arg_statistics =
     statistics.s_arg_stat.s_const := !(current_instruction_arg.s_const) + !(statistics.s_arg_stat.s_const);
     statistics.s_arg_stat.s_reg := !(current_instruction_arg.s_reg) + !(statistics.s_arg_stat.s_reg);
     statistics.s_arg_stat.s_mem_abs := !(current_instruction_arg.s_mem_abs) + !(statistics.s_arg_stat.s_mem_abs);
     statistics.s_arg_stat.s_mem_ind := !(current_instruction_arg.s_mem_ind) + !(statistics.s_arg_stat.s_mem_ind);
     statistics.s_arg_stat in

   let new_statistics =
     { s_instr_stat = new_instr_statistics;
       s_arg_stat = new_arg_statistics } in

   let ch = open_out (!report^"_stat.bin") in
   Marshal.to_channel ch new_statistics [];
   close_out ch )

let dump_stats () =
  let n_tests = ref 0 in
  let n_instr = ref 0 in
  
  let rec spaces n =
    if n = 0 then "" else " "^(spaces (n-1)) in

  let statistics = 
    try 
      let ch = open_in (!report^"_stat.bin") in
      let s = (Marshal.from_channel ch : stats) in
      close_in ch;
      s
    with _ -> error "statistic file not found" in

  print_endline ("Statistics source: "^ (!report^"_stat.bin")^"\n");
  
  let is = List.sort (fun e1 e2 -> String.compare e1.s_instruction e2.s_instruction) statistics.s_instr_stat in

  List.iter (fun r -> 
    print_endline (r.s_instruction^(spaces (30-(String.length r.s_instruction)))
		   ^(string_of_int !(r.s_no_ok)) ^(spaces (10-(String.length (string_of_int !(r.s_no_ok)))))
		   ^(string_of_int !(r.s_no_fail)));
    n_tests := !n_tests + !(r.s_no_ok) + !(r.s_no_fail);
    n_instr := !n_instr + 1)
    is;
  
  print_endline ("\nInstructions: "^(string_of_int !n_instr));
  print_endline ("Tests: "^(string_of_int !n_tests));
  print_newline ();
  print_endline ("Constants: "^(string_of_int !(statistics.s_arg_stat.s_const)));
  print_endline ("Registers: "^(string_of_int !(statistics.s_arg_stat.s_reg)));
  print_endline ("Mem. Abs.: "^(string_of_int !(statistics.s_arg_stat.s_mem_abs)));
  print_endline ("Mem. Ind.: "^(string_of_int !(statistics.s_arg_stat.s_mem_ind)));
  exit 0
  
  
(* *** all together *)
let usage =
  "\nx86sem <opts>"

let _ =
  let args = 
    Arg.align
      [ ("-x86_dir", 
	 Arg.Set_string hol_sem_dir, 
	 "<dir> Specify the directory of HOL infrastructure");
	("-report",
	 Arg.Set_string report,
	 "<string> Name of report file");
	("-repeat",
	 Arg.Set_int repeat_tests,
	 "<int> Number of tests to execute");
	("-dump_stats",
	 Arg.Unit dump_stats,
	 " Dump the statistics and exit");
        ("-x86_decoder", 
	 Arg.String (fun s -> x86_decoder := Some s), 
	 "<file> (debug) Specify the decoder file");
	("-no_asm",
	 Arg.Set no_asm_f,
	 " (debug) Do not assemble and run the test program");
	("-no_hol",
	 Arg.Set no_test_f,
	 " (debug) Do not perform symbolic evaluation");
	("-debug",
	 Arg.Set debug_f,
	 " (debug) Turn on debug messages")
      ] in
  Arg.parse args (fun s -> ()) usage;
  (* parse instruction set *)
  let (instr_set, no_instr) = parse_instruction_set () in
  for i = 1 to !repeat_tests do
    (* build and prove x86_testScript *)
    build_x86_testScript instr_set no_instr
  done
  
(* ATTIC *)
(*     if n = 0 then () *)
(*     else (f (); repeat (n-1) f) in *)
(*   repeat 5 (fun () -> build_rnd_instr instr_set no_instr) *)

(* 	  (( option_map  *)
(* 	      (fun (x,v) ->  *)
(* 		(match x with *)
(* 		| Reg r -> *)
(* 		    let v = *)
(* 		      let i = try (String.rindex v ' ') + 3 with Not_found -> 2 in *)
(* 		      String.sub v i ((String.length v)-i) in *)
(* 		    Some ("(\""^r^"\",\""^v^"\")") *)
(* 		| Loc l -> None)) *)
(* 	      instr.init ) *)
(* 	("-warning", *)
(* 	 Arg.Set warning_f, *)
(* 	 " Turn on warning messages"); *)
