Bluepsec Examples
Cyan Processor
Overview
This package contains the processor. Please do read the slides to better understand the simple instruction format used. Also, please do remember that this is a simple example designed to keep the code short rather than being particularly efficient. That said, I have had Masters students use this code as a basis for more elaborate designs.
The code
package CyanProc;
import FIFO::*;
import GetPut::*;
import ClientServer::*;
import Connectable::*;
import Vector::*;
import CyanTypes::*;
import CyanAsm::*;
import FibProg::*;
typedef 1024 MemSize;
module mkDecode(Server#(ControlTokenT,ControlTokenT));
FIFO#(ControlTokenT) out_fifo <- mkLFIFO;
// define decode table (i.e. a ROM)
DecodedT dt[valueOf(TExp#(SizeOf#(OpcodeT)))] =
{DecodedT{imm: False, alu: ALU_or, mem: MEM_nop, br: BR_nop, wb: True}, // OpOR
DecodedT{imm: False, alu: ALU_and, mem: MEM_nop, br: BR_nop, wb: True}, // OpAND
DecodedT{imm: False, alu: ALU_add, mem: MEM_nop, br: BR_nop, wb: True}, // OpADD
DecodedT{imm: False, alu: ALU_sub, mem: MEM_nop, br: BR_nop, wb: True}, // OpSUB
DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op04
DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op05
DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op06
DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op07
DecodedT{imm: True, alu: ALU_or, mem: MEM_nop, br: BR_nop, wb: True}, // OpORI
DecodedT{imm: True, alu: ALU_and, mem: MEM_nop, br: BR_nop, wb: True}, // OpANDI
DecodedT{imm: True, alu: ALU_add, mem: MEM_nop, br: BR_nop, wb: True}, // OpADDI
DecodedT{imm: True, alu: ALU_sub, mem: MEM_nop, br: BR_nop, wb: False}, // Op0b
DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op0c
DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op0d
DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op0e
DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op0f
DecodedT{imm: True, alu: ALU_add, mem: MEM_load, br: BR_nop, wb: True}, // OpLOAD
DecodedT{imm: True, alu: ALU_add, mem: MEM_store, br: BR_nop, wb: False}, // OpSTORE
DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op12
DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op13
DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op14
DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op15
DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op16
DecodedT{imm: True, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // Op17
DecodedT{imm: False, alu: ALU_sub, mem: MEM_nop, br: BR_con, wb: False}, // OpBEQ
DecodedT{imm: False, alu: ALU_sub, mem: MEM_nop, br: BR_con, wb: False}, // OpBNE
DecodedT{imm: False, alu: ALU_sub, mem: MEM_nop, br: BR_con, wb: False}, // OpBLT
DecodedT{imm: False, alu: ALU_sub, mem: MEM_nop, br: BR_con, wb: False}, // OpBLE
DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop, br: BR_jmp, wb: True}, // OpJSR
DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop, br: BR_jmp, wb: False}, // OpJR
DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}, // OpJMP
DecodedT{imm: False, alu: ALU_nop, mem: MEM_nop, br: BR_nop, wb: False}};// Op1F
interface Put request;
method Action put(cin);
ControlTokenT cout = cin;
cout.dec = dt[pack(cin.inst.opcode)];
out_fifo.enq(cout);
endmethod
endinterface
interface response = toGet(out_fifo);
endmodule
module mkALU(Server#(ControlTokenT, ControlTokenT));
FIFO#(ControlTokenT) out_fifo <- mkLFIFO;
function WordT alu_func(ALUopT op, WordT a, WordT b);
WordT r;
case (op) // look at decoded ALU opcode
ALU_nop: r = a;
ALU_or: r = a | b;
ALU_and: r = a & b;
ALU_add: r = a + b;
ALU_sub: r = a - b;
default: r = ?; // default case is "don't care"
endcase
return r;
endfunction
interface Put request;
method Action put(cin); // input control token
ControlTokenT cout = cin;
let a = cin.ra_src;
let b = cin.dec.imm ? extend(cin.inst.imm) : cin.rb_src;
cout.result = alu_func(cin.dec.alu,a,b);
out_fifo.enq(cout);
endmethod
endinterface
interface response = toGet(out_fifo);
endmodule
module mkMemoryAccess(Server#(ControlTokenT, ControlTokenT));
// use embedded memory which is separate from instruction memory for now
Vector#(MemSize, Reg#(WordT)) dmem <- replicateM(mkReg(0));
FIFO#(ControlTokenT) out_fifo <- mkFIFO; // N.B. not an LFIFO
interface Put request;
method Action put(control_in);
ControlTokenT c = control_in;
case(c.dec.mem)
MEM_load : c.result = dmem[c.result];
MEM_store :
begin
dmem[c.result] <= c.rb_src;
let magic_output = unpack(extend(pack(Address_magic_output)));
if(c.result==magic_output)
begin
$display("%05t: +-------------+",$time);
$display("%05t: --OUTPUT------------| \033[44m\033[37m\033[1m%11d\033[0m |--------------------",
$time, c.rb_src);
$display("%05t: +-------------+",$time);
end
end
endcase
out_fifo.enq(c);
endmethod
endinterface
interface response = toGet(out_fifo);
endmodule
interface RegisterFileIfc;
interface Server#(InstructionT, Tuple2#(WordT,WordT)) read;
interface Put#(Tuple3#(ThreadidT,RegT,WordT)) writeback;
method Action display_registers(Maybe#(Tuple3#(ThreadidT,RegT,WordT)) t);
endinterface
module mkRegisterFile(RegisterFileIfc);
FIFO#(Tuple2#(WordT,WordT)) read_fifo <- mkLFIFO;
// create register file as a vector of registers
// simple, but no the most efficient options
// need to include thread ID in the indexing for a multithreaded version
Vector#(TExp#(SizeOf#(RegT)),Reg#(WordT)) rf <- replicateM(mkReg(0));
interface Server read;
interface Put request;
method Action put(inst);
let a = rf[pack(inst.ra)];
let b = rf[pack(inst.rb)];
read_fifo.enq(tuple2(a,b));
endmethod
endinterface
interface Get response = toGet(read_fifo);
endinterface
interface Put writeback;
method Action put(wb);
let id = tpl_1(wb); // thread id - currently unused
let rd = tpl_2(wb);
if(rd!=RgZero) rf[pack(rd)] <= tpl_3(wb);
endmethod
endinterface
method Action display_registers(t);
let id = tpl_1(fromMaybe(?,t)); // thread id - currently unused
let rd = pack(tpl_2(fromMaybe(?,t)));
let val = tpl_3(fromMaybe(?,t));
$write("%05t|",$time);
for(Bit#(5) x=0; x<8; x=x+1) $write("%6d|",x);
$display("");
$write(" +");
for(Bit#(5) x=0; x<8; x=x+1) $write("------+");
$display("");
for(Bit#(5) y=0; y<4; y=y+1)
begin
$write("R%2d+ |",y*8);
for(Bit#(5) x=0; x<8; x=x+1)
begin
let r = y*8+x; // register to output
if(isValid(t) && (r==rd))
// write changed value (ANSI terminal codes used to highlight)
$write("\033[47m\033[31m\033[7m\033[1m%6d\033[0m<",val);
else
$write("%6d|",rf[y*8+x]);
end
$display("");
end
endmethod
endmodule
module mkBranch(Server#(ControlTokenT,ControlTokenT));
FIFO#(ControlTokenT) out_fifo <- mkLFIFO;
interface Put request;
method Action put(control_in);
ControlTokenT c = control_in;
let oldpc = c.contx.pc;
Bool branch = False;
case(c.inst.opcode)
OpBEQ: branch = c.result==0;
OpBNE: branch = c.result!=0;
OpBLT: branch = c.result<0;
OpBLE: branch = c.result<=0;
OpJSR, OpJR: branch = True;
endcase
if(c.inst.opcode==OpJSR) // jump to subroutine
begin
c.inst.rd = Rgra;
c.result = c.contx.pc+1; // return address
end
if(c.inst.opcode==OpJR)
begin
c.contx.pc = c.ra_src; // jump to a register
$display("%05t: return from subroutine to pc=%d",$time,c.ra_src);
end
else if(branch)
c.contx.pc = extend(c.inst.imm); // branch taken
else
c.contx.pc = c.contx.pc+1; // branch not taken
if(branch && (oldpc==c.contx.pc)) // terminate thread
$display("%05t: Thread %1d terminated after executing %1d instructions",
$time, c.contx.id, c.contx.inst_count);
else
out_fifo.enq(c);
endmethod
endinterface
interface response = toGet(out_fifo);
endmodule
module mkProc(Empty);
FIFO#(ContextT) run_queue <- mkSizedFIFO(4);
Reg#(Bool) init <- mkReg(True);
Server#(ContextT, IFtokenT) fetch <- mkInstructionROM(fib_program);
RegisterFileIfc rf <- mkRegisterFile;
Server#(ControlTokenT, ControlTokenT) decode <- mkDecode;
Server#(ControlTokenT, ControlTokenT) execute <- mkALU;
Server#(ControlTokenT, ControlTokenT) dmemory <- mkMemoryAccess;
Server#(ControlTokenT, ControlTokenT) branch <- mkBranch;
rule initialise(init);
run_queue.enq(ContextT{pc: 0, id: 0, inst_count: 0});
init <= False;
endrule
mkConnection(toGet(run_queue), fetch.request);
rule connect_fetch_to_decode_and_register_file;
let f <- fetch.response.get();
let c = tpl_1(f); // context
let i = tpl_2(f); // instruction
rf.read.request.put(i);
ControlTokenT dectoken = ?;
dectoken.contx = c;
dectoken.inst = i;
decode.request.put(dectoken);
// some debug output
$display("______________________________________________________________________________");
$write("%05t: \033[32mpc=%3d\033[0m : %5s",$time,c.pc,opcode2string(i.opcode));
$write(" rd=", reg2fmt(i.rd));
$write(" ra=", reg2fmt(i.ra));
$write(" rb=", reg2fmt(i.rb));
$write(" imm=%5d\n", i.imm);
endrule
rule merge_decode_and_resigisters_then_connect_to_execute;
let d <- decode.response.get;
let r <- rf.read.response.get;
d.ra_src = tpl_1(r);
d.rb_src = tpl_2(r);
execute.request.put(d);
endrule
mkConnection(execute.response, dmemory.request);
mkConnection(dmemory.response, branch.request);
rule connect_branch_to_writeback(!init);
let c <- branch.response.get();
c.contx.inst_count = c.contx.inst_count+1;
run_queue.enq(c.contx);
if(c.dec.wb)
begin
rf.writeback.put(tuple3(c.contx.id, c.inst.rd, c.result));
$display("%05t: wrote result=%d to rd=", $time, c.result, c.inst.rd);
rf.display_registers(tagged Valid tuple3(c.contx.id, c.inst.rd, c.result));
end
else
rf.display_registers(tagged Invalid);
endrule
endmodule
endpackage: CyanProc
Link to the CyanProc.bsv source