Bluepsec Examples
Cyan Processor
Overview
This package contains the processor. Please do read the slides to better understand the simple instruction format used. Also, please do remember that this is a simple example designed to keep the code short rather than being particularly efficient. That said, I have had Masters students use this code as a basis for more elaborate designs.
The code
package CyanProc; import FIFO::*; import GetPut::*; import ClientServer::*; import Connectable::*; import Vector::*; import CyanTypes::*; import CyanAsm::*; import FibProg::*; typedef 1024 MemSize; module mkDecode(Server#(ControlTokenT,ControlTokenT)); FIFO#(ControlTokenT) out_fifo <- mkLFIFO; // define decode table (i.e. a ROM) DecodedT dt[valueOf(TExp#(SizeOf#(OpcodeT)))] = {DecodedT{imm: False, alu: ALU_or, mem: MEM_nop, br: BR_nop, wb: True}, // OpOR DecodedT{imm: False, alu: ALU_and, mem: MEM_nop, br: BR_nop, wb: True}, // OpAND DecodedT{imm: False, alu: ALU_add, mem: MEM_nop, br: BR_nop, wb: True}, // OpADD DecodedT{imm: False, alu: ALU_sub, mem: MEM_nop, br: BR_nop, wb: True}, // OpSUB DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op04 DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op05 DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op06 DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op07 DecodedT{imm: True, alu: ALU_or, mem: MEM_nop, br: BR_nop, wb: True}, // OpORI DecodedT{imm: True, alu: ALU_and, mem: MEM_nop, br: BR_nop, wb: True}, // OpANDI DecodedT{imm: True, alu: ALU_add, mem: MEM_nop, br: BR_nop, wb: True}, // OpADDI DecodedT{imm: True, alu: ALU_sub, mem: MEM_nop, br: BR_nop, wb: False}, // Op0b DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op0c DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op0d DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op0e DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op0f DecodedT{imm: True, alu: ALU_add, mem: MEM_load, br: BR_nop, wb: True}, // OpLOAD DecodedT{imm: True, alu: ALU_add, mem: MEM_store, br: BR_nop, wb: False}, // OpSTORE DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op12 DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op13 DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op14 DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op15 DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op16 DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op17 DecodedT{imm: False, alu: ALU_sub, mem: MEM_nop, br: BR_con, wb: False}, // OpBEQ DecodedT{imm: False, alu: ALU_sub, mem: MEM_nop, br: BR_con, wb: False}, // OpBNE DecodedT{imm: False, alu: ALU_sub, mem: MEM_nop, br: BR_con, wb: False}, // OpBLT DecodedT{imm: False, alu: ALU_sub, mem: MEM_nop, br: BR_con, wb: False}, // OpBLE DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop, br: BR_jmp, wb: True}, // OpJSR DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop, br: BR_jmp, wb: False}, // OpJR DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // OpJMP DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}};// Op1F interface Put request; method Action put(cin); ControlTokenT cout = cin; cout.dec = dt[pack(cin.inst.opcode)]; out_fifo.enq(cout); endmethod endinterface interface response = toGet(out_fifo); endmodule module mkALU(Server#(ControlTokenT, ControlTokenT)); FIFO#(ControlTokenT) out_fifo <- mkLFIFO; function WordT alu_func(ALUopT op, WordT a, WordT b); WordT r; case (op) // look at decoded ALU opcode ALU_nop: r = a; ALU_or: r = a | b; ALU_and: r = a & b; ALU_add: r = a + b; ALU_sub: r = a - b; default: r = ?; // default case is "don't care" endcase return r; endfunction interface Put request; method Action put(cin); // input control token ControlTokenT cout = cin; let a = cin.ra_src; let b = cin.dec.imm ? extend(cin.inst.imm) : cin.rb_src; cout.result = alu_func(cin.dec.alu,a,b); out_fifo.enq(cout); endmethod endinterface interface response = toGet(out_fifo); endmodule module mkMemoryAccess(Server#(ControlTokenT, ControlTokenT)); // use embedded memory which is separate from instruction memory for now Vector#(MemSize, Reg#(WordT)) dmem <- replicateM(mkReg(0)); FIFO#(ControlTokenT) out_fifo <- mkFIFO; // N.B. not an LFIFO interface Put request; method Action put(control_in); ControlTokenT c = control_in; case(c.dec.mem) MEM_load : c.result = dmem[c.result]; MEM_store : begin dmem[c.result] <= c.rb_src; let magic_output = unpack(extend(pack(Address_magic_output))); if(c.result==magic_output) begin $display("%05t: +-------------+",$time); $display("%05t: --OUTPUT------------| \033[44m\033[37m\033[1m%11d\033[0m |--------------------", $time, c.rb_src); $display("%05t: +-------------+",$time); end end endcase out_fifo.enq(c); endmethod endinterface interface response = toGet(out_fifo); endmodule interface RegisterFileIfc; interface Server#(InstructionT, Tuple2#(WordT,WordT)) read; interface Put#(Tuple3#(ThreadidT,RegT,WordT)) writeback; method Action display_registers(Maybe#(Tuple3#(ThreadidT,RegT,WordT)) t); endinterface module mkRegisterFile(RegisterFileIfc); FIFO#(Tuple2#(WordT,WordT)) read_fifo <- mkLFIFO; // create register file as a vector of registers // simple, but no the most efficient options // need to include thread ID in the indexing for a multithreaded version Vector#(TExp#(SizeOf#(RegT)),Reg#(WordT)) rf <- replicateM(mkReg(0)); interface Server read; interface Put request; method Action put(inst); let a = rf[pack(inst.ra)]; let b = rf[pack(inst.rb)]; read_fifo.enq(tuple2(a,b)); endmethod endinterface interface Get response = toGet(read_fifo); endinterface interface Put writeback; method Action put(wb); let id = tpl_1(wb); // thread id - currently unused let rd = tpl_2(wb); if(rd!=RgZero) rf[pack(rd)] <= tpl_3(wb); endmethod endinterface method Action display_registers(t); let id = tpl_1(fromMaybe(?,t)); // thread id - currently unused let rd = pack(tpl_2(fromMaybe(?,t))); let val = tpl_3(fromMaybe(?,t)); $write("%05t|",$time); for(Bit#(5) x=0; x<8; x=x+1) $write("%6d|",x); $display(""); $write(" +"); for(Bit#(5) x=0; x<8; x=x+1) $write("------+"); $display(""); for(Bit#(5) y=0; y<4; y=y+1) begin $write("R%2d+ |",y*8); for(Bit#(5) x=0; x<8; x=x+1) begin let r = y*8+x; // register to output if(isValid(t) && (r==rd)) // write changed value (ANSI terminal codes used to highlight) $write("\033[47m\033[31m\033[7m\033[1m%6d\033[0m<",val); else $write("%6d|",rf[y*8+x]); end $display(""); end endmethod endmodule module mkBranch(Server#(ControlTokenT,ControlTokenT)); FIFO#(ControlTokenT) out_fifo <- mkLFIFO; interface Put request; method Action put(control_in); ControlTokenT c = control_in; let oldpc = c.contx.pc; Bool branch = False; case(c.inst.opcode) OpBEQ: branch = c.result==0; OpBNE: branch = c.result!=0; OpBLT: branch = c.result<0; OpBLE: branch = c.result<=0; OpJSR, OpJR: branch = True; endcase if(c.inst.opcode==OpJSR) // jump to subroutine begin c.inst.rd = Rgra; c.result = c.contx.pc+1; // return address end if(c.inst.opcode==OpJR) begin c.contx.pc = c.ra_src; // jump to a register $display("%05t: return from subroutine to pc=%d",$time,c.ra_src); end else if(branch) c.contx.pc = extend(c.inst.imm); // branch taken else c.contx.pc = c.contx.pc+1; // branch not taken if(branch && (oldpc==c.contx.pc)) // terminate thread $display("%05t: Thread %1d terminated after executing %1d instructions", $time, c.contx.id, c.contx.inst_count); else out_fifo.enq(c); endmethod endinterface interface response = toGet(out_fifo); endmodule module mkProc(Empty); FIFO#(ContextT) run_queue <- mkSizedFIFO(4); Reg#(Bool) init <- mkReg(True); Server#(ContextT, IFtokenT) fetch <- mkInstructionROM(fib_program); RegisterFileIfc rf <- mkRegisterFile; Server#(ControlTokenT, ControlTokenT) decode <- mkDecode; Server#(ControlTokenT, ControlTokenT) execute <- mkALU; Server#(ControlTokenT, ControlTokenT) dmemory <- mkMemoryAccess; Server#(ControlTokenT, ControlTokenT) branch <- mkBranch; rule initialise(init); run_queue.enq(ContextT{pc: 0, id: 0, inst_count: 0}); init <= False; endrule mkConnection(toGet(run_queue), fetch.request); rule connect_fetch_to_decode_and_register_file; let f <- fetch.response.get(); let c = tpl_1(f); // context let i = tpl_2(f); // instruction rf.read.request.put(i); ControlTokenT dectoken = ?; dectoken.contx = c; dectoken.inst = i; decode.request.put(dectoken); // some debug output $display("______________________________________________________________________________"); $write("%05t: \033[32mpc=%3d\033[0m : %5s",$time,c.pc,opcode2string(i.opcode)); $write(" rd=", reg2fmt(i.rd)); $write(" ra=", reg2fmt(i.ra)); $write(" rb=", reg2fmt(i.rb)); $write(" imm=%5d\n", i.imm); endrule rule merge_decode_and_resigisters_then_connect_to_execute; let d <- decode.response.get; let r <- rf.read.response.get; d.ra_src = tpl_1(r); d.rb_src = tpl_2(r); execute.request.put(d); endrule mkConnection(execute.response, dmemory.request); mkConnection(dmemory.response, branch.request); rule connect_branch_to_writeback(!init); let c <- branch.response.get(); c.contx.inst_count = c.contx.inst_count+1; run_queue.enq(c.contx); if(c.dec.wb) begin rf.writeback.put(tuple3(c.contx.id, c.inst.rd, c.result)); $display("%05t: wrote result=%d to rd=", $time, c.result, c.inst.rd); rf.display_registers(tagged Valid tuple3(c.contx.id, c.inst.rd, c.result)); end else rf.display_registers(tagged Invalid); endrule endmodule endpackage: CyanProc
Link to the CyanProc.bsv source