Computer Laboratory

Bluepsec Examples

Cyan Processor

Overview

This package contains the processor. Please do read the slides to better understand the simple instruction format used. Also, please do remember that this is a simple example designed to keep the code short rather than being particularly efficient. That said, I have had Masters students use this code as a basis for more elaborate designs.

The code

package CyanProc;

import FIFO::*;
import GetPut::*;
import ClientServer::*;
import Connectable::*;
import Vector::*;
import CyanTypes::*;
import CyanAsm::*;
import FibProg::*;

typedef 1024 MemSize;


module mkDecode(Server#(ControlTokenT,ControlTokenT));
   
   FIFO#(ControlTokenT) out_fifo <- mkLFIFO;

   // define decode table (i.e. a ROM)
   DecodedT dt[valueOf(TExp#(SizeOf#(OpcodeT)))] = 
   {DecodedT{imm: False, alu: ALU_or,  mem: MEM_nop,   br: BR_nop, wb: True},  // OpOR
    DecodedT{imm: False, alu: ALU_and, mem: MEM_nop,   br: BR_nop, wb: True},  // OpAND
    DecodedT{imm: False, alu: ALU_add, mem: MEM_nop,   br: BR_nop, wb: True},  // OpADD
    DecodedT{imm: False, alu: ALU_sub, mem: MEM_nop,   br: BR_nop, wb: True},  // OpSUB
    DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop,   br: BR_nop, wb: False}, // Op04
    DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop,   br: BR_nop, wb: False}, // Op05
    DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop,   br: BR_nop, wb: False}, // Op06
    DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop,   br: BR_nop, wb: False}, // Op07
    
    DecodedT{imm: True,  alu: ALU_or,  mem: MEM_nop,   br: BR_nop, wb: True},  // OpORI
    DecodedT{imm: True,  alu: ALU_and, mem: MEM_nop,   br: BR_nop, wb: True},  // OpANDI
    DecodedT{imm: True,  alu: ALU_add, mem: MEM_nop,   br: BR_nop, wb: True},  // OpADDI
    DecodedT{imm: True,  alu: ALU_sub, mem: MEM_nop,   br: BR_nop, wb: False}, // Op0b
    DecodedT{imm: True,  alu: ALU_nop, mem: MEM_nop,   br: BR_nop, wb: False}, // Op0c
    DecodedT{imm: True,  alu: ALU_nop, mem: MEM_nop,   br: BR_nop, wb: False}, // Op0d
    DecodedT{imm: True,  alu: ALU_nop, mem: MEM_nop,   br: BR_nop, wb: False}, // Op0e
    DecodedT{imm: True,  alu: ALU_nop, mem: MEM_nop,   br: BR_nop, wb: False}, // Op0f
    
    DecodedT{imm: True,  alu: ALU_add, mem: MEM_load,  br: BR_nop, wb: True},  // OpLOAD
    DecodedT{imm: True,  alu: ALU_add, mem: MEM_store, br: BR_nop, wb: False}, // OpSTORE
    DecodedT{imm: True,  alu: ALU_nop, mem: MEM_nop,   br: BR_nop, wb: False}, // Op12
    DecodedT{imm: True,  alu: ALU_nop, mem: MEM_nop,   br: BR_nop, wb: False}, // Op13
    DecodedT{imm: True,  alu: ALU_nop, mem: MEM_nop,   br: BR_nop, wb: False}, // Op14
    DecodedT{imm: True,  alu: ALU_nop, mem: MEM_nop,   br: BR_nop, wb: False}, // Op15
    DecodedT{imm: True,  alu: ALU_nop, mem: MEM_nop,   br: BR_nop, wb: False}, // Op16
    DecodedT{imm: True,  alu: ALU_nop, mem: MEM_nop,   br: BR_nop, wb: False}, // Op17
    
    DecodedT{imm: False, alu: ALU_sub, mem: MEM_nop,   br: BR_con, wb: False}, // OpBEQ
    DecodedT{imm: False, alu: ALU_sub, mem: MEM_nop,   br: BR_con, wb: False}, // OpBNE
    DecodedT{imm: False, alu: ALU_sub, mem: MEM_nop,   br: BR_con, wb: False}, // OpBLT
    DecodedT{imm: False, alu: ALU_sub, mem: MEM_nop,   br: BR_con, wb: False}, // OpBLE
    DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop,   br: BR_jmp, wb: True},  // OpJSR
    DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop,   br: BR_jmp, wb: False}, // OpJR
    DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop,   br: BR_nop, wb: False}, // OpJMP
    DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop,   br: BR_nop, wb: False}};// Op1F
    
   interface Put request;
      method Action put(cin);
	 ControlTokenT cout = cin;
	 cout.dec = dt[pack(cin.inst.opcode)];
	 out_fifo.enq(cout);
      endmethod
   endinterface
   interface response = toGet(out_fifo);
endmodule


module mkALU(Server#(ControlTokenT, ControlTokenT));
   FIFO#(ControlTokenT) out_fifo <- mkLFIFO;
   
   function WordT alu_func(ALUopT op, WordT a, WordT b);
      WordT r;
      case (op) // look at decoded ALU opcode
	 ALU_nop: r = a;
	 ALU_or:  r = a | b;
	 ALU_and: r = a & b;
	 ALU_add: r = a + b;
	 ALU_sub: r = a - b;
	 default: r = ?; // default case is "don't care"
      endcase
      return r;
   endfunction
   
   interface Put request;
      method Action put(cin);  // input control token
	 ControlTokenT cout = cin;
	 let a = cin.ra_src;
	 let b = cin.dec.imm ? extend(cin.inst.imm) : cin.rb_src;
	 cout.result = alu_func(cin.dec.alu,a,b);
	 out_fifo.enq(cout);
      endmethod
   endinterface
   interface response = toGet(out_fifo);
endmodule


module mkMemoryAccess(Server#(ControlTokenT, ControlTokenT));

   // use embedded memory which is separate from instruction memory for now
   Vector#(MemSize, Reg#(WordT)) dmem <- replicateM(mkReg(0));
   FIFO#(ControlTokenT) out_fifo <- mkFIFO;  // N.B. not an LFIFO
   
   interface Put request;
      method Action put(control_in);
	 ControlTokenT c = control_in;
	 case(c.dec.mem)
	    MEM_load  : c.result = dmem[c.result];
	    MEM_store :
	    begin
	       dmem[c.result] <= c.rb_src;
	       let magic_output = unpack(extend(pack(Address_magic_output)));
	       if(c.result==magic_output)
		  begin
		     $display("%05t:                     +-------------+",$time);
		     $display("%05t: --OUTPUT------------| \033[44m\033[37m\033[1m%11d\033[0m |--------------------",
			      $time, c.rb_src);
		     $display("%05t:                     +-------------+",$time);
		  end
	    end
	 endcase
	 out_fifo.enq(c);
      endmethod
   endinterface
   interface response = toGet(out_fifo);
endmodule



interface RegisterFileIfc;
   interface Server#(InstructionT, Tuple2#(WordT,WordT)) read;
   interface Put#(Tuple3#(ThreadidT,RegT,WordT)) writeback;
   method Action display_registers(Maybe#(Tuple3#(ThreadidT,RegT,WordT)) t);
endinterface

module mkRegisterFile(RegisterFileIfc);
   FIFO#(Tuple2#(WordT,WordT)) read_fifo <- mkLFIFO;
   // create register file as a vector of registers
   // simple, but no the most efficient options
   // need to include thread ID in the indexing for a multithreaded version
   Vector#(TExp#(SizeOf#(RegT)),Reg#(WordT)) rf <- replicateM(mkReg(0));
   interface Server read;
      interface Put request;
	 method Action put(inst);
	    let a = rf[pack(inst.ra)];
	    let b = rf[pack(inst.rb)];
	    read_fifo.enq(tuple2(a,b));
	 endmethod
      endinterface
      interface Get response = toGet(read_fifo);
   endinterface
   interface Put writeback;
      method Action put(wb);
	 let id = tpl_1(wb); // thread id - currently unused
	 let rd = tpl_2(wb);
	 if(rd!=RgZero) rf[pack(rd)] <= tpl_3(wb);
      endmethod
   endinterface
   method Action display_registers(t);
      let id = tpl_1(fromMaybe(?,t)); // thread id - currently unused
      let rd = pack(tpl_2(fromMaybe(?,t)));
      let val = tpl_3(fromMaybe(?,t));
      $write("%05t|",$time);
      for(Bit#(5) x=0; x<8; x=x+1) $write("%6d|",x);
      $display("");
      $write("     +");
      for(Bit#(5) x=0; x<8; x=x+1) $write("------+");
      $display("");
      for(Bit#(5) y=0; y<4; y=y+1)
	 begin
	    $write("R%2d+ |",y*8);
	    for(Bit#(5) x=0; x<8; x=x+1)
	       begin
		  let r = y*8+x; // register to output
		  if(isValid(t) && (r==rd))
		     // write changed value (ANSI terminal codes used to highlight)
		     $write("\033[47m\033[31m\033[7m\033[1m%6d\033[0m<",val);
		  else
		      $write("%6d|",rf[y*8+x]);
	       end
	    $display("");
	 end
   endmethod
endmodule



module mkBranch(Server#(ControlTokenT,ControlTokenT));
   FIFO#(ControlTokenT) out_fifo <- mkLFIFO;
   
   interface Put request;
      method Action put(control_in);
	 ControlTokenT c = control_in;
	 let oldpc = c.contx.pc;
	 Bool branch = False;
	 case(c.inst.opcode)
	    OpBEQ: branch = c.result==0;
	    OpBNE: branch = c.result!=0;
	    OpBLT: branch = c.result<0;
	    OpBLE: branch = c.result<=0;
	    OpJSR, OpJR: branch = True;
	 endcase
   
	 if(c.inst.opcode==OpJSR) // jump to subroutine
	    begin
	       c.inst.rd = Rgra;
	       c.result = c.contx.pc+1; // return address
	    end
	 
	 if(c.inst.opcode==OpJR)
	    begin
	       c.contx.pc = c.ra_src; // jump to a register
	       $display("%05t: return from subroutine to pc=%d",$time,c.ra_src);
	    end
	 else if(branch)
	    c.contx.pc = extend(c.inst.imm); // branch taken
	 else
	    c.contx.pc = c.contx.pc+1; // branch not taken
	    
	 if(branch && (oldpc==c.contx.pc)) // terminate thread
	    $display("%05t: Thread %1d terminated after executing %1d instructions",
	       $time, c.contx.id, c.contx.inst_count);
	 else
	    out_fifo.enq(c);
      endmethod
   endinterface
   interface response = toGet(out_fifo);
endmodule



module mkProc(Empty);

   FIFO#(ContextT) run_queue <- mkSizedFIFO(4);
   Reg#(Bool) init <- mkReg(True);

   Server#(ContextT, IFtokenT)           fetch   <- mkInstructionROM(fib_program);
   RegisterFileIfc                       rf      <- mkRegisterFile;
   Server#(ControlTokenT, ControlTokenT) decode  <- mkDecode;
   Server#(ControlTokenT, ControlTokenT) execute <- mkALU;
   Server#(ControlTokenT, ControlTokenT) dmemory <- mkMemoryAccess;
   Server#(ControlTokenT, ControlTokenT) branch  <- mkBranch;

   rule initialise(init);
      run_queue.enq(ContextT{pc: 0, id: 0, inst_count: 0});
      init <= False;
   endrule

   mkConnection(toGet(run_queue), fetch.request);
   
   rule connect_fetch_to_decode_and_register_file;
      let f <- fetch.response.get();
      let c = tpl_1(f); // context
      let i = tpl_2(f); // instruction
      rf.read.request.put(i);
      ControlTokenT dectoken = ?;
      dectoken.contx = c;
      dectoken.inst = i;
      decode.request.put(dectoken);
   
      // some debug output
      $display("______________________________________________________________________________");
      $write("%05t: \033[32mpc=%3d\033[0m : %5s",$time,c.pc,opcode2string(i.opcode));
      $write(" rd=", reg2fmt(i.rd));
      $write(" ra=", reg2fmt(i.ra));
      $write(" rb=", reg2fmt(i.rb));
      $write(" imm=%5d\n", i.imm);
   endrule

   rule merge_decode_and_resigisters_then_connect_to_execute;
      let d <- decode.response.get;
      let r <- rf.read.response.get;
      d.ra_src = tpl_1(r);
      d.rb_src = tpl_2(r);
      execute.request.put(d);
   endrule
   
   mkConnection(execute.response, dmemory.request);
   mkConnection(dmemory.response, branch.request);
   
   rule connect_branch_to_writeback(!init);
      let c <- branch.response.get();
      c.contx.inst_count = c.contx.inst_count+1;
      run_queue.enq(c.contx);
      if(c.dec.wb)
	 begin
	    rf.writeback.put(tuple3(c.contx.id, c.inst.rd, c.result));
	    $display("%05t: wrote result=%d to rd=", $time, c.result, c.inst.rd);
	    rf.display_registers(tagged Valid tuple3(c.contx.id, c.inst.rd, c.result));
	 end
      else
	 rf.display_registers(tagged Invalid);
   endrule
   
endmodule


endpackage: CyanProc

Link to the CyanProc.bsv source