Adding a Gem5 instruction (for research)
Table of Contents
This looks at how I added a zero region instruction, which is a modified version of a store. It is adding a full instruction, rather than just a pseudo-instruction, as this does. Also look at this for RISC-V.
1 Decode
I have used the ARMv8 Architecture Reference Manual (table C4-4) to find an unallocated instruction, and found the place where it generates an unknown instruction in the decoder:
31–28 | 27–24 | 23–20 | 19–16 | 15–12 | 11-10 | 9–5 | 4–0 |
0011 | 1000 | 0010 | 0000 | 0000 | 00 | start reg | size reg |
@@ -782,8 +783,18 @@ namespace Aarch64 return new Unknown64(machInst); } } else if (bits(machInst, 21) == 1) { - if (bits(machInst, 11, 10) != 0x2) + // Have XX11 1X00 XX1X XXXX XXXX 00SS SSSZ ZZZZ + // Need 0011 1000 0010 0000 0000 00SS SSSZ ZZZZ + if (bits(machInst, 11, 10) == 0x0 && + bits(machInst, 31,12) == 0x38200) { + IntRegIndex rstart = + (IntRegIndex)(uint32_t)bits(machInst, 9, 5); + IntRegIndex roffset = + (IntRegIndex)(uint32_t)bits(machInst, 4, 0); + return new STRZERO(machInst, rstart, roffset); + } else if (bits(machInst, 11, 10) != 0x2) { return new Unknown64(machInst); + } if (!bits(machInst, 14)) return new Unknown64(machInst); IntRegIndex rt = (IntRegIndex)(uint32_t)bits(machInst, 4, 0);
2 Template
I have just copied the LoadStoreImm64{Declare,Constructor} and Store64{Execute,InitiateAcc,CompleteAcc} templates. I don't think I did any changes to them, but it still is helpful to see how the ISA language works.
@@ -705,3 +706,97 @@ def template LoadStoreLitU64Constructor {{ setExcAcRel(exclusive, acrel); } }}; + +def template ZeroRegionDeclare {{ + class %(class_name)s : public %(base_class)s + { + public: + + /// Constructor. + %(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _offset); + + Fault execute(ExecContext *, Trace::InstRecord *) const override; + Fault initiateAcc(ExecContext *, Trace::InstRecord *) const override; + Fault completeAcc(PacketPtr, ExecContext *, + Trace::InstRecord *) const override; + + void + annotateFault(ArmFault *fault) override + { + %(fa_code)s + } + }; +}}; + +def template ZeroRegionConstructor {{ + %(class_name)s::%(class_name)s(ExtMachInst machInst, + IntRegIndex _dest, IntRegIndex _offset) + : %(base_class)s("%(mnemonic)s", machInst, %(op_class)s, + _offset, _dest, 0) + { + %(constructor)s; + } +}}; + +def template ZeroRegionExecute {{ + Fault %(class_name)s::execute(ExecContext *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + unsigned memAccessFlags = %(mem_flags)s; + %(ea_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = writeMemAtomic(xc, traceData, Mem, EA, + memAccessFlags, NULL); + } + + if (fault == NoFault) { + %(op_wb)s; + } + + return fault; + } +}}; + +def template ZeroRegionInitiateAcc {{ + Fault %(class_name)s::initiateAcc(ExecContext *xc, + Trace::InstRecord *traceData) const + { + Addr EA; + Fault fault = NoFault; + + %(op_decl)s; + %(op_rd)s; + unsigned memAccessFlags = %(mem_flags)s; + %(ea_code)s; + + if (fault == NoFault) { + %(memacc_code)s; + } + + if (fault == NoFault) { + fault = writeMemTiming(xc, traceData, Mem, EA, memAccessFlags, + NULL); + } + + return fault; + } +}}; + +def template ZeroRegionCompleteAcc {{ + Fault %(class_name)s::completeAcc(PacketPtr pkt, ExecContext *xc, + Trace::InstRecord *traceData) const + { + return NoFault; + } +}};
3 Filling in the template
The code that fills in the templates is in a global let
block, which
means it is just literal python code, according to the gem5 wiki. What
I have below is just a version of the StoreImm64 class, with the
parent methods also expanded, and the code simplified. The
InstObjParams
is what is used for the substitution, with the first
three arguments being the mnemonic, class name and base class name,
then a dictionary of miscellaneous substitutions (codeBlobs
) and
instruction flags.
@@ -290,6 +291,50 @@ let {{ StorePost64(mnem, NameBase + "_POST", size, flavor=flavor).emit() StoreReg64(mnem, NameBase + "_REG", size, flavor=flavor).emit() + class StoreZeroReg64(object): + def __init__(self, mnemonic, class_name): + self.mnemonic = mnemonic + self.class_name = class_name + + def emit(self): + codeBlobs = {} + codeBlobs["mem_flags"] = "ArmISA::TLB::MustBeOne" + \ + "|ArmISA::TLB::AllowUnaligned" + \ + "|Request::REGION_ZERO" + # Address computation + eaCode = SPAlignmentCheckCode + "EA = XBase;" + + codeBlobs["ea_code"] = eaCode + + # Code that actually handles the access + accCode = 'Mem_ud = cSwap(XDest_ud, isBigEndian64(xc->tcBase()));' + codeBlobs["memacc_code"] = accCode + # TODO: RMK35 + codeBlobs["fa_code"] = ''' + fault->annotate(ArmFault::SAS, 2); + fault->annotate(ArmFault::SSE, false); + fault->annotate(ArmFault::SRT, dest); + fault->annotate(ArmFault::SF, false); + fault->annotate(ArmFault::AR, false); + ''' + + instFlags = [] + + iop = InstObjParams(self.mnemonic, self.class_name, + "ArmISA::MemoryImm64", + codeBlobs, instFlags) + + # templates in src/arch/arm/isa/templates/mem64.isa + global header_output, decoder_output, exec_output + header_output += eval('ZeroRegionDeclare').subst(iop) + decoder_output += eval('ZeroRegionConstructor').subst(iop) + exec_output += eval('ZeroRegionExecute').subst(iop) + \ + eval('ZeroRegionInitiateAcc').subst(iop) + \ + eval('ZeroRegionCompleteAcc').subst(iop) + + StoreZeroReg64("strz", "STRZERO").emit() + buildStores64("strb", "STRB64", 1) buildStores64("strh", "STRH64", 2) buildStores64("str", "STRW64", 4)