Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding support for ZCMT Extension for Code-Size Reduction in CVA6 #2659

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
1 change: 1 addition & 0 deletions core/Flist.cva6
Original file line number Diff line number Diff line change
Expand Up @@ -112,6 +112,7 @@ ${CVA6_REPO_DIR}/core/branch_unit.sv
${CVA6_REPO_DIR}/core/compressed_decoder.sv
${CVA6_REPO_DIR}/core/macro_decoder.sv
${CVA6_REPO_DIR}/core/controller.sv
${CVA6_REPO_DIR}/core/zcmt_decoder.sv
${CVA6_REPO_DIR}/core/csr_buffer.sv
${CVA6_REPO_DIR}/core/csr_regfile.sv
${CVA6_REPO_DIR}/core/decoder.sv
Expand Down
14 changes: 12 additions & 2 deletions core/branch_unit.sv
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,8 @@ module branch_unit #(
input fu_data_t fu_data_i,
// Instruction PC - ISSUE_STAGE
input logic [CVA6Cfg.VLEN-1:0] pc_i,
// Is zcmt instruction - ISSUE_STAGE
input logic is_zcmt_i,
// Instruction is compressed - ISSUE_STAGE
input logic is_compressed_instr_i,
// Branch unit instruction is valid - ISSUE_STAGE
Expand Down Expand Up @@ -75,13 +77,21 @@ module branch_unit #(
// we need to put the branch target address into rd, this is the result of this unit
branch_result_o = next_pc;
resolved_branch_o.pc = pc_i;
// There are only two sources of mispredicts:
// There are only three sources of mispredicts:
// 1. Branches
// 2. Jumps to register addresses
// 3. Zcmt instructions
if (branch_valid_i) begin
// write target address which goes to PC Gen
// write target address which goes to PC Gen or select target address if zcmt
resolved_branch_o.target_address = (branch_comp_res_i) ? target_address : next_pc;
resolved_branch_o.is_taken = branch_comp_res_i;
if (CVA6Cfg.RVZCMT) begin
if (is_zcmt_i) begin
// Unconditional jump handling
resolved_branch_o.is_mispredict = 1'b1; // miss prediction for ZCMT
resolved_branch_o.cf_type = ariane_pkg::JumpR;
end
end
// check the outcome of the branch speculation
if (ariane_pkg::op_is_branch(fu_data_i.operation)) begin
// Set the `cf_type` of the output as `branch`, this will update the BHT.
Expand Down
4 changes: 2 additions & 2 deletions core/cache_subsystem/wt_dcache.sv
Original file line number Diff line number Diff line change
Expand Up @@ -188,10 +188,10 @@ module wt_dcache
// read controllers (LD unit and PTW/MMU)
///////////////////////////////////////////////////////

// 0 is used by MMU, 1 by READ access requests
// 0 is used by MMU or implicit read by zcmt, 1 by READ access requests
for (genvar k = 0; k < NumPorts - 1; k++) begin : gen_rd_ports
// set these to high prio ports
if ((k == 0 && CVA6Cfg.MmuPresent) || (k == 1) || (k == 2 && CVA6Cfg.EnableAccelerator)) begin
if ((k == 0 && (CVA6Cfg.MmuPresent || CVA6Cfg.RVZCMT )) || (k == 1) || (k == 2 && CVA6Cfg.EnableAccelerator)) begin
assign rd_prio[k] = 1'b1;
wt_dcache_ctrl #(
.CVA6Cfg(CVA6Cfg),
Expand Down
22 changes: 10 additions & 12 deletions core/compressed_decoder.sv
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,9 @@ module compressed_decoder #(
// Output instruction is macro - decoder
output logic is_macro_instr_o,
// Output instruction is compressed - decoder
output logic is_compressed_o
output logic is_compressed_o,
// Output instruction is macro - decoder
output logic is_zcmt_instr_o
);

// -------------------
Expand All @@ -42,6 +44,7 @@ module compressed_decoder #(
is_compressed_o = 1'b1;
instr_o = instr_i;
is_macro_instr_o = 0;
is_zcmt_instr_o = 1'b0;

// I: | imm[11:0] | rs1 | funct3 | rd | opcode |
// S: | imm[11:5] | rs2 | rs1 | funct3 | imm[4:0] | opcode |
Expand Down Expand Up @@ -867,18 +870,13 @@ module compressed_decoder #(
3'b000,
riscv::OpcodeStoreFp
};
end else if (CVA6Cfg.RVZCMP) begin
if (instr_i[12:10] == 3'b110 || instr_i[12:10] == 3'b111 || instr_i[12:10] == 3'b011) begin //is a push/pop instruction
is_macro_instr_o = 1;
instr_o = instr_i;
end else begin
illegal_instr_o = 1'b1;
end
end else begin
illegal_instr_o = 1'b1;
end
end else if (CVA6Cfg.RVZCMP && (instr_i[12:10] == 3'b110 || instr_i[12:10] == 3'b111 || instr_i[12:10] == 3'b011)) begin
is_macro_instr_o = 1;
instr_o = instr_i;
end else if (CVA6Cfg.RVZCMT && (instr_i[12:10] == 3'b000)) //jt/jalt instruction
is_zcmt_instr_o = 1'b1;
else illegal_instr_o = 1'b1;
end

riscv::OpcodeC2Swsp: begin
// c.swsp -> sw rs2, imm(x2)
instr_o = {
Expand Down
54 changes: 45 additions & 9 deletions core/csr_regfile.sv
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ module csr_regfile
#(
parameter config_pkg::cva6_cfg_t CVA6Cfg = config_pkg::cva6_cfg_empty,
parameter type exception_t = logic,
parameter type jvt_t = logic,
parameter type irq_ctrl_t = logic,
parameter type scoreboard_entry_t = logic,
parameter type rvfi_probes_csr_t = logic,
Expand Down Expand Up @@ -167,7 +168,9 @@ module csr_regfile
// TO_BE_COMPLETED - PERF_COUNTERS
output logic [31:0] mcountinhibit_o,
// RVFI
output rvfi_probes_csr_t rvfi_csr_o
output rvfi_probes_csr_t rvfi_csr_o,
//jvt output
output jvt_t jvt_o
);

localparam logic [63:0] SMODE_STATUS_READ_MASK = ariane_pkg::smode_status_read_mask(CVA6Cfg);
Expand Down Expand Up @@ -295,6 +298,7 @@ module csr_regfile
assign pmpaddr_o = pmpaddr_q[(CVA6Cfg.NrPMPEntries>0?CVA6Cfg.NrPMPEntries-1 : 0):0];

riscv::fcsr_t fcsr_q, fcsr_d;
jvt_t jvt_q, jvt_d;
// ----------------
// Assignments
// ----------------
Expand Down Expand Up @@ -350,6 +354,13 @@ module csr_regfile
read_access_exception = 1'b1;
end
end
riscv::CSR_JVT: begin
if (CVA6Cfg.RVZCMT) begin
csr_rdata = {jvt_q.base, jvt_q.mode};
end else begin
read_access_exception = 1'b1;
end
end
// non-standard extension
riscv::CSR_FTRAN: begin
if (CVA6Cfg.FpPresent && !(mstatus_q.fs == riscv::Off || (CVA6Cfg.RVH && v_q && vsstatus_q.fs == riscv::Off))) begin
Expand Down Expand Up @@ -908,12 +919,14 @@ module csr_regfile

perf_we_o = 1'b0;
perf_data_o = 'b0;
if (CVA6Cfg.RVZCMT) begin
jvt_d = jvt_q;
end
fcsr_d = fcsr_q;

fcsr_d = fcsr_q;

priv_lvl_d = priv_lvl_q;
v_d = v_q;
debug_mode_d = debug_mode_q;
priv_lvl_d = priv_lvl_q;
v_d = v_q;
debug_mode_d = debug_mode_q;

if (CVA6Cfg.DebugEn) begin
dcsr_d = dcsr_q;
Expand Down Expand Up @@ -1060,6 +1073,14 @@ module csr_regfile
riscv::CSR_DSCRATCH1:
if (CVA6Cfg.DebugEn) dscratch1_d = csr_wdata;
else update_access_exception = 1'b1;
riscv::CSR_JVT: begin
if (CVA6Cfg.RVZCMT) begin
jvt_d.base = csr_wdata[CVA6Cfg.XLEN-1:6];
jvt_d.mode = 6'b000000;
end else begin
update_access_exception = 1'b1;
end
end
// trigger module CSRs
riscv::CSR_TSELECT: update_access_exception = 1'b1; // not implemented
riscv::CSR_TDATA1: update_access_exception = 1'b1; // not implemented
Expand Down Expand Up @@ -2443,8 +2464,16 @@ module csr_regfile
assign fflags_o = fcsr_q.fflags;
assign frm_o = fcsr_q.frm;
assign fprec_o = fcsr_q.fprec;
//JVT outputs
if (CVA6Cfg.RVZCMT) begin
assign jvt_o.base = jvt_q.base;
assign jvt_o.mode = jvt_q.mode;
end else begin
assign jvt_o.base = '0;
assign jvt_o.mode = '0;
end
// MMU outputs
assign satp_ppn_o = CVA6Cfg.RVS ? satp_q.ppn : '0;
assign satp_ppn_o = CVA6Cfg.RVS ? satp_q.ppn : '0;
assign vsatp_ppn_o = CVA6Cfg.RVH ? vsatp_q.ppn : '0;
assign hgatp_ppn_o = CVA6Cfg.RVH ? hgatp_q.ppn : '0;
if (CVA6Cfg.RVS) begin
Expand Down Expand Up @@ -2506,9 +2535,12 @@ module csr_regfile
// sequential process
always_ff @(posedge clk_i or negedge rst_ni) begin
if (~rst_ni) begin
priv_lvl_q <= riscv::PRIV_LVL_M;
priv_lvl_q <= riscv::PRIV_LVL_M;
// floating-point registers
fcsr_q <= '0;
fcsr_q <= '0;
if (CVA6Cfg.RVZCMT) begin
jvt_q <= '0;
end
// debug signals
debug_mode_q <= 1'b0;
if (CVA6Cfg.DebugEn) begin
Expand Down Expand Up @@ -2592,6 +2624,9 @@ module csr_regfile
priv_lvl_q <= priv_lvl_d;
// floating-point registers
fcsr_q <= fcsr_d;
if (CVA6Cfg.RVZCMT) begin
jvt_q <= jvt_d;
end
// debug signals
if (CVA6Cfg.DebugEn) begin
debug_mode_q <= debug_mode_d;
Expand Down Expand Up @@ -2713,6 +2748,7 @@ module csr_regfile
// RVFI
//-------------
assign rvfi_csr_o.fcsr_q = CVA6Cfg.FpPresent ? fcsr_q : '0;
assign rvfi_csr_o.jvt_q = CVA6Cfg.RVZCMT ? jvt_q : '0;
assign rvfi_csr_o.dcsr_q = CVA6Cfg.DebugEn ? dcsr_q : '0;
assign rvfi_csr_o.dpc_q = CVA6Cfg.DebugEn ? dpc_q : '0;
assign rvfi_csr_o.dscratch0_q = CVA6Cfg.DebugEn ? dscratch0_q : '0;
Expand Down
44 changes: 40 additions & 4 deletions core/cva6.sv
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,11 @@ module cva6
branchpredict_sbe_t branch_predict; // this field contains branch prediction information regarding the forward branch path
exception_t ex; // this field contains exceptions which might have happened earlier, e.g.: fetch exceptions
},
//JVT struct{base,mode}
localparam type jvt_t = struct packed {
logic [CVA6Cfg.XLEN-7:0] base;
logic [5:0] mode;
},

// ID/EX/WB Stage
localparam type scoreboard_entry_t = struct packed {
Expand Down Expand Up @@ -113,6 +118,7 @@ module cva6
logic is_last_macro_instr; // is last decoded 32bit instruction of macro definition
logic is_double_rd_macro_instr; // is double move decoded 32bit instruction of macro definition
logic vfp; // is this a vector floating-point instruction?
logic is_zcmt; //is a zcmt instruction
},
localparam type writeback_t = struct packed {
logic valid; // wb data is valid
Expand Down Expand Up @@ -415,6 +421,7 @@ module cva6

fu_data_t [CVA6Cfg.NrIssuePorts-1:0] fu_data_id_ex;
logic [CVA6Cfg.VLEN-1:0] pc_id_ex;
logic zcmt_id_ex;
logic is_compressed_instr_id_ex;
logic [CVA6Cfg.NrIssuePorts-1:0][31:0] tinst_ex;
// fixed latency units
Expand Down Expand Up @@ -563,6 +570,8 @@ module cva6
riscv::pmpcfg_t [(CVA6Cfg.NrPMPEntries > 0 ? CVA6Cfg.NrPMPEntries-1 : 0):0] pmpcfg;
logic [(CVA6Cfg.NrPMPEntries > 0 ? CVA6Cfg.NrPMPEntries-1 : 0):0][CVA6Cfg.PLEN-3:0] pmpaddr;
logic [31:0] mcountinhibit_csr_perf;
//jvt
jvt_t jvt;
// ----------------------------
// Performance Counters <-> *
// ----------------------------
Expand Down Expand Up @@ -617,6 +626,8 @@ module cva6
// ----------------
dcache_req_i_t [2:0] dcache_req_ports_ex_cache;
dcache_req_o_t [2:0] dcache_req_ports_cache_ex;
dcache_req_i_t dcache_req_ports_id_cache;
dcache_req_o_t dcache_req_ports_cache_id;
dcache_req_i_t [1:0] dcache_req_ports_acc_cache;
dcache_req_o_t [1:0] dcache_req_ports_cache_acc;
logic dcache_commit_wbuffer_empty;
Expand Down Expand Up @@ -671,8 +682,11 @@ module cva6
id_stage #(
.CVA6Cfg(CVA6Cfg),
.branchpredict_sbe_t(branchpredict_sbe_t),
.dcache_req_i_t(dcache_req_i_t),
.dcache_req_o_t(dcache_req_o_t),
.exception_t(exception_t),
.fetch_entry_t(fetch_entry_t),
.jvt_t(jvt_t),
.irq_ctrl_t(irq_ctrl_t),
.scoreboard_entry_t(scoreboard_entry_t),
.interrupts_t(interrupts_t),
Expand Down Expand Up @@ -716,7 +730,11 @@ module cva6
.compressed_ready_i(x_compressed_ready),
.compressed_resp_i (x_compressed_resp),
.compressed_valid_o(x_compressed_valid),
.compressed_req_o (x_compressed_req)
.compressed_req_o (x_compressed_req),
.jvt_i (jvt),
// DCACHE interfaces
.dcache_req_ports_i(dcache_req_ports_cache_id),
.dcache_req_ports_o(dcache_req_ports_id_cache)
);

logic [CVA6Cfg.NrWbPorts-1:0][CVA6Cfg.TRANS_ID_BITS-1:0] trans_id_ex_id;
Expand Down Expand Up @@ -817,6 +835,7 @@ module cva6
.rs2_forwarding_o (rs2_forwarding_id_ex),
.fu_data_o (fu_data_id_ex),
.pc_o (pc_id_ex),
.is_zcmt_o (zcmt_id_ex),
.is_compressed_instr_o (is_compressed_instr_id_ex),
.tinst_o (tinst_ex),
// fixed latency unit ready
Expand Down Expand Up @@ -908,6 +927,7 @@ module cva6
.rs2_forwarding_i(rs2_forwarding_id_ex),
.fu_data_i(fu_data_id_ex),
.pc_i(pc_id_ex),
.is_zcmt_i(zcmt_id_ex),
.is_compressed_instr_i(is_compressed_instr_id_ex),
.tinst_i(tinst_ex),
// fixed latency units
Expand Down Expand Up @@ -1078,6 +1098,7 @@ module cva6
csr_regfile #(
.CVA6Cfg (CVA6Cfg),
.exception_t (exception_t),
.jvt_t (jvt_t),
.irq_ctrl_t (irq_ctrl_t),
.scoreboard_entry_t(scoreboard_entry_t),
.rvfi_probes_csr_t (rvfi_probes_csr_t),
Expand Down Expand Up @@ -1154,6 +1175,7 @@ module cva6
.pmpcfg_o (pmpcfg),
.pmpaddr_o (pmpaddr),
.mcountinhibit_o (mcountinhibit_csr_perf),
.jvt_o (jvt),
//RVFI
.rvfi_csr_o (rvfi_csr)
);
Expand Down Expand Up @@ -1258,15 +1280,29 @@ module cva6
dcache_req_o_t [NumPorts-1:0] dcache_req_from_cache;

// D$ request
assign dcache_req_to_cache[0] = dcache_req_ports_ex_cache[0];
// Since ZCMT is only enable for embdeed class so MMU should be disable.
// Cache port 0 is being ultilize in implicit read access in ZCMT extension.
if (CVA6Cfg.RVZCMT & ~(CVA6Cfg.MmuPresent)) begin
assign dcache_req_to_cache[0] = dcache_req_ports_id_cache;
end else begin
assign dcache_req_to_cache[0] = dcache_req_ports_ex_cache[0];
end
assign dcache_req_to_cache[1] = dcache_req_ports_ex_cache[1];
assign dcache_req_to_cache[2] = dcache_req_ports_acc_cache[0];
assign dcache_req_to_cache[3] = dcache_req_ports_ex_cache[2].data_req ? dcache_req_ports_ex_cache [2] :
dcache_req_ports_acc_cache[1];

// D$ response
assign dcache_req_ports_cache_ex[0] = dcache_req_from_cache[0];
assign dcache_req_ports_cache_ex[1] = dcache_req_from_cache[1];
// Since ZCMT is only enable for embdeed class so MMU should be disable.
// Cache port 0 is being ultilized in implicit read access in ZCMT extension.
if (CVA6Cfg.RVZCMT & ~(CVA6Cfg.MmuPresent)) begin
assign dcache_req_ports_cache_id = dcache_req_from_cache[0];
assign dcache_req_ports_cache_ex[0] = '0;
end else begin
assign dcache_req_ports_cache_ex[0] = dcache_req_from_cache[0];
assign dcache_req_ports_cache_id = '0;
end
assign dcache_req_ports_cache_ex[1] = dcache_req_from_cache[1];
assign dcache_req_ports_cache_acc[0] = dcache_req_from_cache[2];
always_comb begin : gen_dcache_req_store_data_gnt
dcache_req_ports_cache_ex[2] = dcache_req_from_cache[3];
Expand Down
2 changes: 1 addition & 1 deletion core/cva6_rvfi.sv
Original file line number Diff line number Diff line change
Expand Up @@ -418,7 +418,7 @@ module cva6_rvfi
`CONNECT_RVFI_SAME(1'b1, icache)

`CONNECT_RVFI_SAME(CVA6Cfg.EnableAccelerator, acc_cons)

`CONNECT_RVFI_SAME(CVA6Cfg.RVZCMT, jvt)
`CONNECT_RVFI_FULL(1'b1, pmpcfg0, csr.pmpcfg_q[CVA6Cfg.XLEN/8-1:0])
`CONNECT_RVFI_FULL(CVA6Cfg.XLEN == 32, pmpcfg1, csr.pmpcfg_q[7:4])

Expand Down
Loading
Loading