//
// $Id: $
//  Hardware ML processor core and testbench.
//  (C) 2012 DJ Greaves, M Myrene, University of Cambridge Computer Laboratory.
//  Bluespec version (C) 2013 DJ Greaves.
//
//

package Hwmlcore;


import BRAM::*;

typedef Bit#(8) InoutValue_t;
typedef Bit#(8) Instruction_t;
typedef Bit#(16) CodeAddr_t;
typedef Bit#(16) StackAddr_t;
typedef Bit#(16) HeapAddr_t;
typedef Bit#(32) Word_t;


//

typedef enum { Fetch, Execute, WriteBack, CompareExch, Pop2a, Pop2b, Abort } Phase_t;
typedef enum { WBS_Smem, WBS_Hmem, WBS_IOmem } WriteBackSrc_t;
typedef enum { WBD_ToS, WBD_arg0, WBD_arg1, WBD_arg2 } WriteBackDest_t;


interface InoutWiring;
   method Action  input_req(InoutValue_t addr);
   method ActionValue#(InoutValue_t) input_response_get();
   method Action output(InoutValue_t addr, InoutValue_t data);
endinterface



(* synthesize *)
module mkHwmlCore(BRAM1Port#(CodeAddr_t, Instruction_t) c_mem,
                  BRAM1Port#(StackAddr_t, Word_t) s_mem,
                  BRAM1Port#(HeapAddr_t, Word_t) h_mem,
                  InoutWiring io_mem,
                  Empty plugh);

  Reg#(WriteBackSrc_t) wb_src <- mkReg(0);
  Reg#(WriteBackDest_t) wb_dest <- mkReg(0);
  Reg#(CodeAddr_t) pc_reg <- mkReg(32'h8000_0000);
  Reg#(StackAddr_t) sp_reg <- mkReg(0);
  Reg#(StackAddr_t) hp_reg <- mkReg(0);
  Reg#(Instruction_t) ins <- mkReg(0);

  Reg#(Phase_t) phase <- mkReg(Fetch);

  Reg#(Word_t) arg0 <- mkRegU;
  Reg#(Word_t) arg1 <- mkRegU;
  Reg#(Word_t) arg2 <- mkRegU;

  Reg#(Bool) failed <- mkReg(0);


   function Action h_mem_load_req(HeapAddr_t addr);
           h_mem.portA.request.put(BRAMRequest{write: False, responseOnWrite: False, address: addr, datain:0 });
   endfunction

   function Action c_mem_load_req(CodeAddr_t addr);
           //$display("c_mem_load_req %h", addr);
           c_mem.portA.request.put(BRAMRequest{write: False, responseOnWrite: False, address: addr, datain:0 });
   endfunction

   function Action s_mem_load_req(StackAddr_t addr);
           s_mem.portA.request.put(BRAMRequest{write: False, responseOnWrite: False, address: addr, datain:0 });
   endfunction


   function Action h_mem_store(HeapAddr_t addr, Word_t vale);
           h_mem.portA.request.put(BRAMRequest{write: True, responseOnWrite: False, address: addr, datain:vale });
   endfunction

   function Action s_mem_store(StackAddr_t addr, Word_t vale);
           $display("s_mem store [ 'h%h ] := 'h%h", addr, vale);
           s_mem.portA.request.put(BRAMRequest{write: True, responseOnWrite: False, address: addr, datain:vale });
   endfunction


   
   rule fetch(phase==Fetch && !failed);
      c_mem_load_req(pc_reg);
      //$display("%t: fetch pc=%h", $time, pc_reg);
      phase <= Execute;
   endrule

   function Action complete(); // Retire current instruction - reset to fetch
     phase <= Fetch;
   endfunction

   rule aborter(phase == Abort);
      let dd <- c_mem.portA.response.get();
      ins <= dd;
      $display("%t: abort pc=%h  ins=%h", $time, pc_reg, dd);
      $display("Aborted");
     complete();
   endrule

   function Action push(Word_t arg);
     //     $display("Pushing sp=%h", sp_reg);
     arg0 <= arg;
     arg1 <= arg0;
     arg2 <= arg1;
     sp_reg <= sp_reg + 1;
     if (sp_reg >= 3) s_mem_store(sp_reg-3, arg2);
     complete();
   endfunction

   Reg#(Bool) deb <- mkReg(0);

   function Action wb_needed(WriteBackDest_t dest, WriteBackSrc_t src);
      phase <= WriteBack;
      wb_dest <= dest;
      wb_src <= src;
   endfunction

   function Action io_read(Word_t port_no);
     io_mem.input_req(port_no & 8'hFF);
   endfunction

   function Action stack_mindepth(Integer x);
     if (sp_reg < x) failed <= 1;
   endfunction

   function ActionValue#(Word_t) pop(Bool fill_tos);
     if (fill_tos) arg0 <= arg1;
     arg1 <= arg2;
     sp_reg <= sp_reg - 1;
     if (sp_reg >= 4) begin
           s_mem_load_req(sp_reg-4);
           wb_needed(WBD_arg2, WBS_Smem);
           end
     else complete();
   endfunction

   function ActionValue#(Word_t) pop2();
     arg0 <= arg2;
     sp_reg <= sp_reg - 2;
     if (sp_reg >= 5) begin
           s_mem_load_req(sp_reg-4);
	   phase <= Pop2a;
           end
     else if (sp_reg >= 4) begin
           s_mem_load_req(sp_reg-4);
           wb_needed(WBD_arg1, WBS_Smem); 
           end
    else complete();
   endfunction

   rule pop2a(phase == Pop2a);
           let dd <- s_mem.portA.response.get(); 
	   arg1 <= dd;
	   $display("Pop2 arg1 := 'h%h", dd);
	   phase <= Pop2b;
   endrule

   rule pop2b(phase == Pop2b);
           s_mem_load_req(sp_reg-3); // NB: sp already adjusted by two.
           wb_needed(WBD_arg2, WBS_Smem); 
   endrule

   rule write_back(phase == WriteBack);
     int wb_data = wb_src == WBS_Smem ? s_mem.portA.response.get(): // clearly this needs be non-strict -aggresive-conditions=enable
                   wb_src == WBS_IOmem ? io_mem.input_response_get():
                   wb_src == WBS_Hmem ? h_mem.portA.response.get():
                   0;
     case (wb_dest) matches
       //WBD_ToS:  push(wb_data);
       WBD_arg0: arg0 <= wb_data;
       WBD_arg1: arg1 <= wb_data;
       WBD_arg2: arg2 <= wb_data;
     endcase
     $display("Write back d=%h s=%h data=%h", wb_dest, wb_src, wb_data);
     complete();
   endrule


   rule instruction_decode(phase==Execute);
      Maybe#(CodeAddr_t) branch_or_call_dest;
      //      branch_or_call_dest = Invalid(0); // Should not need this (0) here!
      branch_or_call_dest = tagged Invalid; // Should not need this (0) here!

      let dd <- c_mem.portA.response.get();
      ins <= dd;
      $write("%t: sp=%h tos = [[ %h %h %h ]] Decode/execute pc='h%h  ins='h%h", $time, sp_reg, arg0, arg1, arg2,  pc_reg, dd);


     if ((dd & 8'hC0) == 8'h40)
       begin
             $display(" Push immediate %h", dd & 32'h3F);
             push(dd & 32'h3F); // Push immediate
       end


     else if ((dd & 8'hC0) == 8'hC0) // Shift Add Immediate	
      begin
	 arg0 <= (arg0 << 7) + { 26'd0, dd[5:0] };
	 complete();
      end
     else case (dd) matches
    8'h00: // Abort
        begin
	   $display(" Abort");
           failed <= 1;
	   complete();
        end

    8'h01: // Pop and discard
       begin
             pop(True);
       end

     8'd2: //StackLoad: Load value from inside stack at address on the stack: 
      // Offset zero is top of stack once top is removed.
      begin
	  if (arg0 == 0) begin
             $display(" Stack load 0 (dup)");
	     arg0 <= arg1;
	     complete();
	  end
	  else if (arg0 == 1) begin
             $display(" Stack load 1");
	     arg0 <= arg2;
	     complete();
	  end
	  else begin
               $display(" Stack load 'h%h", arg0);
               s_mem_load_req(sp_reg-arg0 - 2);
               wb_needed(WBD_ToS, WBS_Smem);
  // TODO check sp >= arg0
          end
	  
       end


      8'd3: //StackStore: Store a value on the stack at a stack address on the stack.
       // arg0=offset. arg1=value. Zero offset refers to top of stack once args are removed.
       begin
	  stack_mindepth(2);// Need two operands.
	  let offset = arg0;
          let value = arg1;
          pop2(); //
          s_mem_store(sp_reg - offset - 3, value);
       end


      8'd4: // Pop element below top and discard
	begin 
	   stack_mindepth(2);// Need two operands.
	   sp_reg <= sp_reg - 1;
	   arg1 <= arg2;
           if (sp_reg >= 4) begin
               s_mem_load_req(sp_reg-4);
               wb_needed(WBD_arg2, WBS_Smem); 
	      end
        end

     8'd8, //  Equal
     8'd9, //  Less
     8'd10, //  Add
     8'd11: //  Sub
        begin
	   stack_mindepth(2);// Need two operands.
	   /* verilator lint_off CASEINCOMPLETE */
	   let res =
	     (dd==8'd8)  ?  (arg1 == arg0) ? 1:0: //  Equal
	     (dd==8'd9)  ?  (arg1 <  arg0) ? 1:0: //  Less
             (dd==8'd10) ?  (arg1 + arg0):        //  Add
	     (dd==8'd11) ?  (arg1 - arg0):        //  Sub
             32'b0;
	   $display(" ALU == < + -  -> %h ", res);
           arg0 <= res;
	   pop(False);
	   /* verilator lint_on CASEINCOMPLETE */
	   // TODO add overflow detection and fail on it.
        end


      8'd12: // Swap (* swaps top two stack elements *)
       begin
	   $display(" Swap %h %h", arg0, arg1);
          stack_mindepth(2);// Need two operands.
          arg0 <= arg1;
	  arg1 <= arg0;
          complete();
       end

      8'd16:  // Jump to address on top of stack
       begin
          $display(" Jmp/Ret 'h%h", arg0);
	  branch_or_call_dest = Valid(arg0);
	  pop(True);
       end

       8'd17: // JumpIfNotZero
       begin
	  stack_mindepth(2);// Need two operands.
	  if (arg1 != 32'd0) begin
  	      $display(" JNZ (taken to 'h%h)", arg0);
	      branch_or_call_dest = Valid(arg0);
	      end
          else $display(" JNZ (not taken)");
	  pop2();
       end

     8'd18:  // Call : swap pc and second entry on stack while swapping pc with rla. 
       begin
          $display(" Call 'h%h", arg0);
	  branch_or_call_dest = Valid(arg0);
	  //arg0 <= arg1;
      	  arg0 <= pc_reg + 1; // older semantics?
          complete();
       end


     8'd32: //HeapLoad 
        begin
  	  h_mem_load_req(arg0);
          wb_needed(WBD_ToS, WBS_Hmem);
        end

      8'd33: //HeapStore to fresh heap location
        begin
	   stack_mindepth(1);// Need one operand.
           let value = arg0;
           h_mem_store(hp_reg, value);
	   pop(True);
	   hp_reg <= hp_reg + 1; // Word adressed heap
	   //   if (hp_reg  >= `G_HEAP_END) failed <= True;
       end

     8'd34: //HeapAdress: Load heap pointer to top of stack
        begin
	  push(hp_reg);
        end

      8'd35: //Read               (* read NV memory / UART regs *)
        begin
           stack_mindepth(1);// Need one operand.
           io_read(arg0);
           wb_needed(WBD_ToS, WBS_IOmem);
        end
/*
      8'd36: //Write             (* write NV memory / UART regs *)
        begin
	   stack_mindepth(2);// Need two operands.
	   k_store_0 = arg0;
	   k_store_cmd_0 = `HC_io_store;
	   k_store_adr_0 = arg1;
 	   k_sp_reg = sp_reg - 2;
	   k_arg0 = arg2;
	   if (sp_reg >= 5) begin
	      { k_sload_adr_0, k_sload_cmd_0, k_arg1 }  = { sp_reg-32'd4, 1'b1, sload_0 };
	      { k_sload_adr_1, k_sload_cmd_1, k_arg2 }  = { sp_reg-32'd5, 1'b1, sload_1 };
              k_complete = sload_0_rdy && sload_1_rdy && store_0_rdy;
	   end
	   else if (sp_reg >= 4) begin
	      { k_sload_adr_0, k_sload_cmd_0, k_arg1 }  = { sp_reg-32'd4, 1'b1, sload_0 };
              k_complete = sload_0_rdy && store_0_rdy;
	  end
          else k_complete = store_0_rdy;
        end
*/

      8'd37: //Compare Exchange   atomic compare-and-exchange on heap value.
        begin
           stack_mindepth(3);     // Need three operands.
	   let address = arg2;    // Address
	   let store_val = arg0;  // Value to be stored
           let refval = arg1;     // Value to be comared against
	   h_mem_load_req(address);
	   phase <= CompareExch;
        end
 
     8'd38: //Heap alloc: pop one value and store on heap at next free location.
      begin
         stack_mindepth(1);// Need one operand.
	 hp_reg <= hp_reg + 1; // Word adressed heap
	 h_mem_store(hp_reg, arg0);
	 sp_reg <= sp_reg - 1;
	 pop(True);
      end

      default:
          begin
            failed <= 1;
            $display("Illegal instruction %h\n", dd); $finish;
	    complete();
          end

     endcase

     case (branch_or_call_dest) matches
       tagged Valid .ddest: pc_reg <= ddest;
	    //$display("branch taken to 'h%h", branch_dest);
	    //end
       tagged Invalid:  pc_reg <= pc_reg+1;
    endcase

  endrule


  rule compare_exchange(phase == CompareExch); 
    let address = arg2;    // Address
    let store_val = arg0;  // Value to be stored
    let refval = arg1;     // Value to be comared against
    let r <- h_mem.portA.response.get();
    arg0 <= r;
    if (r == refval) h_mem_store(address, store_val);
    pop2();
  endrule

endmodule


endpackage
