//
//  Hardware ML processor core and testbench.
//  (C) 2012 DJ Greaves, M Myrene, University of Cambridge Computer Laboratory.
//


`define EOF 32'hFFFF_FFFF
`define NULL 0
`define MAX_LINE_LENGTH 1000


`include "memcmds.v"

module CORE0_TOP(
   input clk,
 input reset,
 input [31:0] logging, 
 input [7:0] dil8_sw_data,
 output [7:0] leds8,
 
 // Code memory connections
 input c_mem_ack,
 output [1:0] c_mem_cmd,
 output [31:0] c_mem_adr,
 output [7:0] c_mem_wdata,
 input [7:0] c_mem_rdata,
 
 
 // UART connections
 input uart_rxav, 
 input uart_txav,
 output uart_read,
 output uart_write,	// Data transfer qualifiers 
 output [7:0] uart_parin8,
 input [7:0] uart_parout8,
 output failed);

   wire irq = 0;

  reg st_mem_ack;  wire [1:0] st_mem_cmd;  wire [31:0] st_mem_adr, st_mem_wdata, st_mem_rdata;
  reg hp_mem_ack;  wire [1:0] hp_mem_cmd;  wire [31:0] hp_mem_adr, hp_mem_wdata, hp_mem_rdata;
   
  wire io_ack;  wire [1:0] io_cmd;  wire [31:0] io_adr, io_wdata, io_rdata;   



   SRAM32 stackmem(.clk(clk), .adr(st_mem_adr[15:0]), .wdata(st_mem_wdata), .rdata(st_mem_rdata), .cmd(st_mem_cmd),  .logging(logging>=3));
    

   SRAM32 heapmem(.clk(clk), .adr(hp_mem_adr[15:0]), .wdata(hp_mem_wdata), .rdata(hp_mem_rdata), .cmd(hp_mem_cmd),  .logging(logging>=1));

   // Substitute dil-sw data for immediate operand in instruction no 0. 
   //wire [7:0] c_mem_rdata1 = (c_mem_adr[15:0] == 0) ? { 2'b01, dil8_sw_data[5:0]}: c_mem_rdata;
   
  always @(posedge clk) begin
     st_mem_ack <= st_mem_cmd != 0;
     hp_mem_ack <= hp_mem_cmd != 0;
     end

   IOSUBSYS the_IOSUBSYS(
    .clk(clk),
    .uart_write(uart_write),
    .uart_read(uart_read),
    .uart_parin8(uart_parin8),
    .uart_parout8(uart_parout8),
    .uart_txav(uart_txav),
    .uart_rxav(uart_rxav),
    .gpio_dil8(dil8_sw_data),
    .gpio_led8(leds8),    
    .ack(io_ack),
    .reset(reset),    

    .adr(io_adr[15:0]), .wdata(io_wdata), .rdata(io_rdata), .cmd(io_cmd)
    );
   
  HWML_CPU_CORE core0(
    .clk(clk), .reset(reset), .failed(failed), .logging(logging>=2), .irq(irq),

    .io_adr(io_adr), .io_wdata(io_wdata), .io_rdata(io_rdata), .io_cmd(io_cmd), .io_ack(io_mem_ack),

   .c_mem_adr(c_mem_adr), .c_mem_wdata(c_mem_wdata), .c_mem_rdata(c_mem_rdata), .c_mem_cmd(c_mem_cmd), .c_mem_ack(c_mem_ack),

   .st_mem_adr(st_mem_adr), .st_mem_wdata(st_mem_wdata), .st_mem_rdata(st_mem_rdata), .st_mem_cmd(st_mem_cmd), .st_mem_ack(st_mem_ack),

   .hp_mem_adr(hp_mem_adr), .hp_mem_wdata(hp_mem_wdata), .hp_mem_rdata(hp_mem_rdata), .hp_mem_cmd(hp_mem_cmd), .hp_mem_ack(hp_mem_ack)

   );




endmodule




module HWML_CPU_CORE(
 input clk,
 input reset,
 input irq,
 input logging,
 output [7:0] leds8,
 
 input hp_mem_ack,
 output reg [1:0] hp_mem_cmd,
 output reg [31:0] hp_mem_adr,
 output reg [31:0] hp_mem_wdata,
 input [31:0] hp_mem_rdata,
 
 input c_mem_ack,
 output reg [1:0] c_mem_cmd,
 output reg [31:0] c_mem_adr,
 output reg [7:0] c_mem_wdata,
 input [7:0] c_mem_rdata,
 
 input st_mem_ack,
 output reg [1:0] st_mem_cmd,
 output reg [31:0] st_mem_adr,
 output reg [31:0] st_mem_wdata,
 input [31:0] st_mem_rdata,

 input io_ack,
 output reg [1:0] io_cmd,
 output reg [31:0] io_adr,
 output reg [31:0] io_wdata,
 input [31:0] io_rdata,
 output reg failed
   );
   

`define G_HEAP_BASE   32'h0000_0000
`define G_HEAP_END    32'h0000_FFFF
`define G_STACK_BASE  32'h0000_0000
`define G_STACK_END   32'h0000_FFFF
`define G_CODE_BASE   32'h1000_0000
`define G_CODE_END    32'h1000_FFFF

`define G_CODE_PC_RESET_VALUE    32'h0000_0000
`define G_CODE_PC_ISR_VALUE      32'h0000_0008


`define HC_idle      3'd0
`define HC_hp_load   3'd1
`define HC_hp_store  3'd2      
`define HC_io_store  3'd3
`define HC_io_load   3'd4      
`define HC_st_store  3'd5

  reg [31:0] arg0; // Top of the stack
  reg [31:0] arg1; // Second value on stack
  reg [31:0] arg2; // Third
  reg [31:0] k_arg0; // Next value for top of the stack
  reg [31:0] k_arg1; // ditto second value on stack
  reg [31:0] k_arg2; // 
  reg [31:0] sp_reg;
  reg [31:0] pc_reg;
  reg [31:0] hp_reg;


   reg [2:0] k_store_cmd_0; // Store unit for heap, stack or io.
   reg [31:0] k_store_0;
   reg [31:0] k_store_adr_0;

   reg [2:0] k_load_cmd_0; // Load unit for heap or io.
   reg [31:0] load_0;
   reg [31:0] k_load_adr_0;

   reg  k_sload_cmd_0; // Two load units for the stack.
   reg  k_sload_cmd_1;
   reg [31:0] sload_0;
   reg [31:0] sload_1;   
   reg [31:0] k_sload_adr_0;
   reg [31:0] k_sload_adr_1;   

   reg 	      sload_0_reqd;
   reg 	      sload_1_reqd;
   reg 	      store_0_reqd;
   reg 	      load_0_reqd;

   reg 	      sload_0_rdy;
   reg 	      sload_1_rdy;
   reg 	      store_0_rdy;
   reg 	      load_0_rdy;
   
  reg sr_int_enabled;


  reg [7:0] current_instruction;

  reg k_fail;

  reg [31:0] k_hp;
  reg [31:0] k_pc;
  reg [31:0] k_sp;
  reg k_complete;
  reg decode;
  always @(*) begin
     k_arg0 = arg0;
     k_arg1 = arg1;
     k_arg2 = arg2;
     k_store_0 = 0;
     k_store_adr_0 = 0;
     k_load_adr_0 = 0;     
     k_sload_adr_0 = 0;     
     k_sload_adr_1 = 0;          
     k_load_cmd_0 = 0;
     k_store_cmd_0 = 0;
     k_sload_cmd_0 = 0;
     k_sload_cmd_1 = 0;
     k_fail = 0;
     k_pc = pc_reg + 1;
     k_sp = sp_reg;
     k_hp = hp_reg;
     k_complete = 0;
   if (reset) begin
      k_pc     = `G_CODE_PC_RESET_VALUE;
      k_hp     = `G_HEAP_BASE;
      k_sp     = `G_STACK_BASE;
      end
   else if (decode)

   casez (current_instruction)
  
      
      8'd0: // Abort 
        begin
           k_fail = k_fail || 1;
	   k_complete = 1;
        end

      8'd1: // Pop and discard
	begin 
           k_fail = k_fail || sp_reg < 1; // Need one operand to discard.
	   k_sp = sp_reg - 1;
	   k_arg0 = arg1;
	   k_arg1 = arg2;
           if (sp_reg >= 4) begin
	      { k_sload_adr_0, k_sload_cmd_0, k_arg2 }  = { sp_reg-32'd4, 1'b1, sload_0 };
              k_complete = sload_0_rdy;
	      end
           else k_complete = 1;
        end

     8'd2: //StackLoad: Load value from inside stack at address on the stack: 
      // Offset zero is top of stack once top is removed.
      begin
	  if (arg0 == 0) begin
	     k_arg0 = arg1;
	     k_complete = 1;
	  end
	  else if (arg0 == 1) begin
	     k_arg0 = arg2;
	     k_complete = 1;
	  end
	  else begin
	     { k_sload_adr_0, k_sload_cmd_0, k_arg2 }  = { sp_reg - arg0 - 32'd2, 1'b1, sload_0 };
             k_fail = k_fail || arg0 + sp_reg > `G_STACK_END-3; 
             k_complete = sload_0_rdy;
          end
       end

      8'd3: //StackStore: Store a value on the stack at a stack address on the stack.
       // arg0=offset. arg1=value. Zero offset refers to top of stack once args are removed.
       begin
	  k_fail = k_fail || sp_reg < 2; // Need two operands.
	  k_sp = sp_reg - 2;
	  k_arg0 = arg2;
	  
	  if (arg0 == 32'd0) begin
	     k_arg2 = arg1;
	     k_complete = 1;
	     end
	  else begin 
	     { k_store_cmd_0, k_store_adr_0, k_store_0 } = { `HC_st_store, sp_reg - arg0 - 32'd3, arg1 };
	     k_complete =  store_0_rdy;
	  end
	  
	  if (sp_reg >= 5) begin
	     { k_sload_adr_0, k_sload_cmd_0, k_arg1 }  = { sp_reg-32'd4, 1'b1, sload_0 };
	     { k_sload_adr_1, k_sload_cmd_1, k_arg2 }  = { sp_reg-32'd5, 1'b1, sload_1 };
	     k_complete = k_complete && sload_0_rdy && sload_1_rdy;
	  end
	  else if (sp_reg >= 4) begin
	     { k_sload_adr_0, k_sload_cmd_0, k_arg1 }  = { sp_reg-32'd4, 1'b1, sload_0 };
	     k_complete = k_complete && sload_0_rdy;
	  end
       end

      8'd4: // Pop element below top and discard
	begin 
	   k_fail = k_fail || sp_reg < 2; // Need two on stack.
	   k_sp = sp_reg - 1;
	   k_arg1 = arg2;
           if (sp_reg >= 4) begin
	      { k_sload_adr_0, k_sload_cmd_0, k_arg2 }  = { sp_reg-32'd4, 1'b1, sload_0 };
              k_complete = sload_0_rdy;
	      end
           else k_complete = 1;
        end

     8'd8, //  Equal
     8'd9, //  Less
     8'd10, //  Add
     8'd11: //  Sub
        begin
           k_fail = k_fail || sp_reg < 2; // Need two operands.
	   k_sp = sp_reg - 1;
	   /* verilator lint_off CASEINCOMPLETE */
	   case (current_instruction)
	     8'd8:  k_arg0 = (arg1 == arg0) ? 1:0; //  Equal
	     8'd9:  k_arg0 = (arg1 <  arg0) ? 1:0; //  Less
             8'd10: k_arg0 = (arg1 + arg0);        //  Add
	     8'd11: k_arg0 = (arg1 - arg0);        //  Sub
	   endcase /*  */
	   /* verilator lint_on CASEINCOMPLETE */
	   // TODO add overflow detection and fail on it.
	   k_arg1 = arg2;
	   if (sp_reg >= 4) begin
	      { k_sload_adr_0, k_sload_cmd_0, k_arg2 }  = { sp_reg-32'd4, 1'b1, sload_0 };
              k_complete = sload_0_rdy;
	      end
           else k_complete = 1;
        end


      8'd12: // Swap (* swaps top two stack elements *)
       begin
          k_fail = k_fail || sp_reg < 2; // Need two operands.
          k_arg0 = arg1;
	  k_arg1 = arg0;
          k_complete = 1;
       end

      8'd16:  // Jump to adress on top of stack
       begin
	  k_pc = arg0;
	  k_arg0 = arg1;
	  k_arg1 = arg2;
          k_fail = k_fail || sp_reg < 1; // Need one operand.
	  k_sp = sp_reg - 1;
          k_complete = 1;
	  if (sp_reg >= 4) begin
	      { k_sload_adr_0, k_sload_cmd_0, k_arg2 }  = { sp_reg-32'd4, 1'b1, sload_0 };
             k_complete = k_complete && sload_0_rdy;
	  end

       end

       8'd17: // JumpIfNotZero
       begin
	  if (arg1 != 32'd0) k_pc = arg0;
          k_fail = k_fail || sp_reg < 2; // Need two operands.	
	  k_sp = sp_reg - 2;
	  k_arg0 = arg2;
	  k_complete = 1;
	  if (sp_reg >= 5) begin
	     { k_sload_adr_0, k_sload_cmd_0, k_arg1 }  = { sp_reg-32'd4, 1'b1, sload_0 };
	     { k_sload_adr_1, k_sload_cmd_1, k_arg2 }  = { sp_reg-32'd5, 1'b1, sload_1 };
             k_complete = k_complete && sload_0_rdy && sload_1_rdy;
	  end
	  else if (sp_reg >= 4) begin
	     { k_sload_adr_0, k_sload_cmd_0, k_arg1 }  = { sp_reg-32'd4, 1'b1, sload_0 };
	     k_complete = k_complete && sload_0_rdy;
	  end
       end

     8'd18: // Call : swap pc and second entry on stack while swapping pc with rla. 
       begin
	  k_pc = arg0;
	  k_arg0 = arg1;
      	  k_arg1 = pc_reg+32'd1;
          k_complete = 1;
       end


     8'd32: //HeapLoad 
        begin
	   { k_sload_adr_0, k_sload_cmd_0, k_arg0 } = { arg0, 1'd1, sload_0 };
	   k_complete = sload_0_rdy;
        end

      8'd33: //HeapStore to fresh heap location
        begin
	   { k_store_0, k_store_cmd_0, k_store_adr_0 } = { arg0, `HC_hp_store, hp_reg };
	   k_arg0 = arg1;
	   k_hp = hp_reg + 1; // Word adressed heap
	   k_arg1 = arg2;
           k_fail = k_fail || hp_reg  >= `G_HEAP_END;
           k_fail = k_fail || sp_reg < 1; // Need one operand.
	   k_sp = sp_reg - 1;
           k_complete = store_0_rdy;
	   if (sp_reg >= 4) begin
 	      { k_sload_adr_0, k_sload_cmd_0, k_arg2 }  = { sp_reg-32'd4, 1'b1, sload_0 };
              k_complete = k_complete && sload_0_rdy;
	   end
       end

     8'd34: //HeapAdress: Load heap pointer to top of stack
        begin
	   k_arg0 = hp_reg;
	   k_arg1 = arg0;
	   k_arg2 = arg1;
	   k_sp = sp_reg + 1;	
  	   k_fail = k_fail || sp_reg >= `G_STACK_END-1;	
	   if (sp_reg >= 3) begin
	    { k_store_cmd_0, k_store_adr_0, k_store_0 } = { `HC_st_store, sp_reg - 32'd3, arg2 };
	      k_complete = store_0_rdy;
	   end
	   else k_complete = 1;
        end

      8'd35: //Read               (* read NV memory / UART regs *)
        begin
	   k_arg0 = load_0;
	   k_load_cmd_0 = `HC_io_load;
	   k_load_adr_0 = arg0;
  	   k_complete = load_0_rdy;
        end

      8'd36: //Write             (* write NV memory / UART regs *)
        begin
	   k_store_0 = arg0;
	   k_store_cmd_0 = `HC_io_store;
	   k_store_adr_0 = arg1;
           k_fail = k_fail || sp_reg < 2; // Need two operands.	
 	   k_sp = sp_reg - 2;
	   k_arg0 = arg2;
	   if (sp_reg >= 5) begin
	      { k_sload_adr_0, k_sload_cmd_0, k_arg1 }  = { sp_reg-32'd4, 1'b1, sload_0 };
	      { k_sload_adr_1, k_sload_cmd_1, k_arg2 }  = { sp_reg-32'd5, 1'b1, sload_1 };
              k_complete = sload_0_rdy && sload_1_rdy && store_0_rdy;
	   end
	   else if (sp_reg >= 4) begin
	      { k_sload_adr_0, k_sload_cmd_0, k_arg1 }  = { sp_reg-32'd4, 1'b1, sload_0 };
              k_complete = sload_0_rdy && store_0_rdy;
	  end
          else k_complete = store_0_rdy;


        end

      8'd37: //Compare Exchange  atomic compare-and-exchange on heap value.
        begin
           k_fail = k_fail || sp_reg < 3; // Need three operands.
	   k_load_cmd_0 = `HC_hp_load;
	   k_load_adr_0 = arg2; // Address

	   k_arg0 = load_0;

	   if (load_0_rdy && load_0 == arg1) begin
	      k_store_0 = arg0; // Value to be stored
	      k_store_cmd_0 = `HC_hp_store;
	      k_store_adr_0 = arg2; // Adress
	      k_complete = store_0_rdy;
	   end
	   else if (load_0_rdy) k_complete = 1; // updated below

	   sp_reg <= sp_reg - 2;

	   if (sp_reg >= 5) begin
	      { k_sload_adr_0, k_sload_cmd_0, k_arg1 }  = { sp_reg-32'd4, 1'b1, sload_0 };
	      { k_sload_adr_1, k_sload_cmd_1, k_arg2 }  = { sp_reg-32'd5, 1'b1, sload_1 };
              k_complete = k_complete && sload_0_rdy && sload_1_rdy;
	   end
	   else if (sp_reg >= 4) begin
	      { k_sload_adr_0, k_sload_cmd_0, k_arg1 }  = { sp_reg-32'd4, 1'b1, sload_0 };
              k_complete = k_complete && sload_0_rdy;
	   end

        end
  
     
     8'd38: //Heap alloc: pop one value and store on heap at next free location.
      begin
	 k_fail = k_fail || sp_reg < 1; // Need one operand 

	 { k_store_0, k_store_cmd_0, k_store_adr_0 } = { arg0, `HC_hp_store, hp_reg };	 
	 k_complete = store_0_rdy;
	 
	 k_sp = sp_reg - 1;
	 k_arg0 = arg1;
	 k_arg1 = arg2;

	 if (sp_reg >= 4) begin
	    { k_sload_adr_0, k_sload_cmd_0, k_arg2 }  = { sp_reg-32'd4, 1'b1, sload_0 };
	    k_complete = k_complete && sload_0_rdy;
	 end

	 k_hp = hp_reg + 1; // Word adressed heap

      end
      
     8'b01??_????: // Push Immediate	
      begin
	 k_arg0 = { 26'd0, current_instruction[5:0] };
	 k_arg1 = arg0;
	 k_arg2 = arg1;
	 k_fail = k_fail || sp_reg >= `G_STACK_END-1;
	 k_sp = sp_reg + 1;
	 if (sp_reg >= 3) begin
	    { k_store_cmd_0, k_store_adr_0, k_store_0 } = { `HC_st_store, sp_reg - 32'd3, arg2 };
	    k_complete = store_0_rdy;
	 end
	 else k_complete = 1;
      end

    8'b11??_????: // Shift Add Immediate	
      begin
	 k_arg0 = (arg0 << 7) + { 26'd0, current_instruction[5:0] };
	 k_complete = 1;
      end


   default:
     begin
	k_fail = k_fail || 1;
     end

   endcase
   end

   always @(posedge clk)
       if (reset) begin
          $display("Reset");
          c_mem_cmd <= `M_idle;
          st_mem_cmd <= `M_idle;
          hp_mem_cmd <= `M_idle;
          failed <= 0;
	  hp_reg <= `G_HEAP_BASE;
          sp_reg <= `G_STACK_BASE;
          pc_reg <= `G_CODE_PC_RESET_VALUE;
          sr_int_enabled <= 0;
	  decode <= 0;
	  load_0_rdy <= 0;
	  sload_0_rdy <= 0;
	  sload_1_rdy <= 0;
	  store_0_rdy <= 0;
	  load_0_reqd <= 0;
	  sload_0_reqd <= 0;
	  sload_1_reqd <= 0;
	  store_0_reqd <= 0;
       end 
       else // if (irq && sr_int_enabled) begin


       begin 
	  st_mem_cmd <= `M_idle;
	  io_cmd <= `M_idle;
	  hp_mem_cmd <= `M_idle;
	  
	  if (k_sload_cmd_0 && !sload_0_reqd) begin
             st_mem_cmd <= `M_read;
             st_mem_adr <= k_sload_adr_0;
	     sload_0_reqd <= 1;
	  end
	  
	  else if (k_sload_cmd_1 && !sload_1_reqd) begin
             st_mem_cmd <= `M_read;
             st_mem_adr <= k_sload_adr_1;
	     sload_1_reqd <= 1;
	  end


	  if (k_sload_cmd_0 && !sload_0_rdy && st_mem_ack) begin 
	     sload_0 <= st_mem_rdata; sload_0_rdy <= 1; 
	  end
	  else if (k_sload_cmd_1 && !sload_1_rdy && st_mem_ack) begin 
             sload_1 <= st_mem_rdata; sload_1_rdy <= 1;
	  end
	  
	  if (k_load_cmd_0 == `HC_hp_load && !load_0_reqd) begin
	     hp_mem_cmd <= `M_read;
	     hp_mem_adr <= k_load_adr_0;
	     load_0_reqd <= 1;
	  end
	  
          if (k_load_cmd_0 == `HC_hp_load && !load_0_rdy && hp_mem_ack) begin 
	     load_0 <= hp_mem_rdata; load_0_rdy <= 1; 
	  end

	  if (k_load_cmd_0 == `HC_io_load && !load_0_reqd) begin
	     io_cmd <= `M_read;
	     io_adr <= k_load_adr_0;
	     load_0_reqd <= 1;
	  end

          if (k_load_cmd_0 == `HC_io_load && !load_0_rdy && io_ack) begin 
	     load_0 <= io_rdata; load_0_rdy <= 1; 
	  end



       if (k_store_cmd_0 == `HC_io_store && !store_0_reqd) begin // todo interlock with io load
	  io_cmd <= `M_write;
	  io_adr <= k_store_adr_0;
	  io_wdata <= k_store_0;
	  store_0_reqd <= 1;
       end
       else if (k_store_cmd_0 == `HC_hp_store && !store_0_reqd) begin
	  {hp_mem_cmd, hp_mem_adr, hp_mem_wdata, store_0_reqd} <= { `M_write, k_store_adr_0, k_store_0, 1'd1};
       end
       // Here we interlock on simulatenous load and store to the stack - not that any current instructions attempt this
       else if (k_store_cmd_0 == `HC_st_store && !store_0_reqd && !(k_sload_cmd_0 && !sload_0_rdy)&& !(k_sload_cmd_1 && !sload_1_rdy)) begin
	  st_mem_cmd <= `M_write;
	  st_mem_adr <= k_store_adr_0;
	  st_mem_wdata <= k_store_0;
	  store_0_reqd <= 1;
       end


	  if (k_store_cmd_0 == `HC_hp_store && !store_0_rdy) begin
	     if (hp_mem_ack) store_0_rdy <= 1;	  
	  end
	     else if (k_store_cmd_0 == `HC_io_store && !store_0_rdy) begin
		if (io_ack) store_0_rdy <= 1;	  
	     end
	  else if (k_store_cmd_0 == `HC_st_store && !store_0_rdy && !(k_sload_cmd_0 && !sload_0_rdy)&& !(k_sload_cmd_1 && !sload_1_rdy)) begin   
	     if (st_mem_ack) store_0_rdy <= 1;
	  end


       if (k_fail) failed <= 1;
       if (!decode && !c_mem_ack) begin
          c_mem_adr <= pc_reg;
   	  c_mem_cmd <= `M_read;
          end
       else c_mem_cmd <= `M_idle;

       if (!decode && c_mem_ack) begin
          current_instruction <= c_mem_rdata;
	  
	  if (logging) begin
	  $write("%t sp=%h tos= [[ %h %h %h  ]] Decode instruction pc=%h  i=%h ", 
	  $time, sp_reg, arg0, arg1, arg2, pc_reg, c_mem_rdata);
	  if ((c_mem_rdata & 8'hC0) == 8'h40) $display("LOAD %h", c_mem_rdata & (8'h3F));
	  else if ((c_mem_rdata & 8'hC0) == 8'hC0) $display("ORI %h", c_mem_rdata & (8'h3F));	  
	  else case (c_mem_rdata)
	    8'd0  : $display("ABORT");
	    8'd1  : $display("POP");
	    8'd2  : $display("STACK-LOAD");
	    8'd3  : $display("STACK-STORE");
	    8'd4  : $display("POP1");
            8'd8  : $display("EQUAL");
	    8'd9  : $display("LESS");
	    8'd10 : $display("ADD");
	    8'd11 : $display("SUB");
	    8'd12 : $display("SWAP");
	    8'd16 : $display("JMP/ret");
	    8'd17 : $display("JNZ");
	    8'd18 : $display("CALL");

	    8'd32 : $display("HEAP-LOAD");
	    8'd33 : $display("HEAP-STORE");
	    8'd34 : $display("HEAP-ADDR");
	    8'd35 : $display("IO READ");
	    8'd36 : $display("IO WRITE");
	    8'd37 : $display("COMPARE-EXCHANGE");
	    8'd38 : $display("HEAP-ALLOC");
	    default: $display("..");
	    endcase
	  end
	  
          decode <= 1;
          end
   

       if (k_complete && !failed &&
            !(k_sload_cmd_0 && !sload_0_rdy) &&
            !(k_sload_cmd_1 && !sload_1_rdy) &&
            !(k_load_cmd_0  != `HC_idle && !load_0_rdy) &&
            !(k_store_cmd_0 != `HC_idle && !store_0_rdy))
         begin // retire when all writes complete
	    pc_reg <= k_pc;
	    arg0 <= k_arg0;
	    arg1 <= k_arg1;
	    arg2 <= k_arg2;
	    hp_reg <= k_hp;
	    sp_reg <= k_sp;
            decode <= 0;
	    load_0_rdy <= 0;
	    store_0_rdy <= 0;
	    sload_0_rdy <= 0;
	    sload_1_rdy <= 0;
	    load_0_reqd <= 0;
	    store_0_reqd <= 0;
	    sload_0_reqd <= 0;
	    sload_1_reqd <= 0;
         end

       end

   
   assign leds8 = arg0[7:0]; // Top-of-stack is output to leds.
   
endmodule   



// eof
