Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[RVV] CVA6 re-parametrization and MMU interface #2652

Open
wants to merge 12 commits into
base: master
Choose a base branch
from
Open
10 changes: 9 additions & 1 deletion Bender.yml
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
package:
name: ariane
name: cva6
authors:
- "Florian Zaruba <[email protected]>"
- "Michael Schaffner <[email protected]>"
Expand Down Expand Up @@ -68,6 +68,7 @@ sources:
- target: any(cv64a6_imafdcv_sv39, cv64a6_imafdc_sv39, cv64a6_imafdc_sv39_wb, cv64a6_imafdch_sv39, cv64a6_imafdch_sv39_wb, cv32a6_imac_sv0, cv32a6_imac_sv32, cv32a6_imafc_sv32)
files:
- core/cva6_mmu/cva6_tlb.sv
- core/cva6_mmu/cva6_shared_tlb.sv
- core/cva6_mmu/cva6_mmu.sv
- core/cva6_mmu/cva6_ptw.sv

Expand All @@ -78,6 +79,8 @@ sources:
# Extension Interface
- core/cvxif_example/include/cvxif_instr_pkg.sv
- core/cvxif_fu.sv
- core/cvxif_issue_register_commit_if_driver.sv
- core/cvxif_compressed_if_driver.sv
- core/cvxif_example/cvxif_example_coprocessor.sv
- core/cvxif_example/instr_decoder.sv

Expand All @@ -95,6 +98,7 @@ sources:
- core/csr_regfile.sv
- core/decoder.sv
- core/ex_stage.sv
- core/acc_dispatcher.sv
- core/instr_realign.sv
- core/id_stage.sv
- core/issue_read_operands.sv
Expand Down Expand Up @@ -142,6 +146,7 @@ sources:
# Physical Memory Protection
- core/pmp/src/pmp.sv
- core/pmp/src/pmp_entry.sv
- core/pmp/src/pmp_data_if.sv

- include_dirs:
- common/local/util
Expand All @@ -153,12 +158,15 @@ sources:
- common/local/util
files:
- common/local/util/tc_sram_wrapper.sv
- common/local/util/sram_cache.sv

- target: all(fpga, xilinx)
include_dirs:
- common/local/util
files:
- common/local/util/sram_cache.sv
- common/local/util/tc_sram_fpga_wrapper.sv
- vendor/pulp-platform/fpga-support/rtl/SyncSpRamBeNx64.sv

- target: not(synthesis)
include_dirs:
Expand Down
50 changes: 25 additions & 25 deletions common/local/util/instr_tracer.sv
Original file line number Diff line number Diff line change
Expand Up @@ -25,32 +25,32 @@ module instr_tracer #(
parameter type exception_t = logic,
parameter interrupts_t INTERRUPTS = '0
)(
input logic pck,
input logic rstn,
input logic flush_unissued,
input logic flush_all,
input logic [31:0] instruction,
input logic fetch_valid,
input logic fetch_ack,
input logic issue_ack, // issue acknowledged
input scoreboard_entry_t issue_sbe, // issue scoreboard entry
input logic [1:0][4:0] waddr, // WB stage
input logic [1:0][63:0] wdata,
input logic [1:0] we_gpr,
input logic [1:0] we_fpr,
input scoreboard_entry_t [1:0] commit_instr, // commit instruction
input logic [1:0] commit_ack,
input logic st_valid, // stores - address translation
input logic [CVA6Cfg.PLEN-1:0] st_paddr,
input logic ld_valid, // loads
input logic ld_kill,
input logic [CVA6Cfg.PLEN-1:0] ld_paddr,
input bp_resolve_t resolve_branch, // misprediction
input exception_t commit_exception,
input riscv::priv_lvl_t priv_lvl, // current privilege level
input logic debug_mode,
input logic pck,
input logic rstn,
input logic flush_unissued,
input logic flush_all,
input logic [31:0] instruction,
input logic fetch_valid,
input logic fetch_ack,
input logic issue_ack, // issue acknowledged
input scoreboard_entry_t issue_sbe, // issue scoreboard entry
input logic [CVA6Cfg.NrCommitPorts-1:0][4:0] waddr, // WB stage
input logic [CVA6Cfg.NrCommitPorts-1:0][63:0] wdata,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_gpr,
input logic [CVA6Cfg.NrCommitPorts-1:0] we_fpr,
input scoreboard_entry_t [CVA6Cfg.NrCommitPorts-1:0] commit_instr, // commit instruction
input logic [CVA6Cfg.NrCommitPorts-1:0] commit_ack,
input logic st_valid, // stores - address translation
input logic [CVA6Cfg.PLEN-1:0] st_paddr,
input logic ld_valid, // loads
input logic ld_kill,
input logic [CVA6Cfg.PLEN-1:0] ld_paddr,
input bp_resolve_t resolve_branch, // misprediction
input exception_t commit_exception,
input riscv::priv_lvl_t priv_lvl, // current privilege level
input logic debug_mode,

input logic[CVA6Cfg.XLEN-1:0] hart_id_i
input logic[CVA6Cfg.XLEN-1:0] hart_id_i
);

// keep the decoded instructions in a queue
Expand Down
126 changes: 56 additions & 70 deletions core/acc_dispatcher.sv
Original file line number Diff line number Diff line change
Expand Up @@ -23,36 +23,12 @@ module acc_dispatcher
parameter type exception_t = logic,
parameter type fu_data_t = logic,
parameter type scoreboard_entry_t = logic,
localparam type accelerator_req_t = struct packed {
logic req_valid;
logic resp_ready;
riscv::instruction_t insn;
logic [CVA6Cfg.XLEN-1:0] rs1;
logic [CVA6Cfg.XLEN-1:0] rs2;
fpnew_pkg::roundmode_e frm;
logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id;
logic store_pending;
// Invalidation interface
logic acc_cons_en;
logic inval_ready;
},
parameter type acc_req_t = accelerator_req_t,
parameter type acc_resp_t = struct packed {
logic req_ready;
logic resp_valid;
logic [CVA6Cfg.XLEN-1:0] result;
logic [CVA6Cfg.TRANS_ID_BITS-1:0] trans_id;
exception_t exception;
// Metadata
logic store_pending;
logic store_complete;
logic load_complete;
logic [4:0] fflags;
logic fflags_valid;
// Invalidation interface
logic inval_valid;
logic [63:0] inval_addr;
},
parameter type acc_req_t = logic,
parameter type acc_resp_t = logic,
parameter type accelerator_req_t = logic,
parameter type accelerator_resp_t = logic,
parameter type acc_mmu_req_t = logic,
parameter type acc_mmu_resp_t = logic,
parameter type acc_cfg_t = logic,
parameter acc_cfg_t AccCfg = '0
) (
Expand All @@ -65,10 +41,11 @@ module acc_dispatcher
// Interface with the CSRs
input priv_lvl_t ld_st_priv_lvl_i,
input logic sum_i,
input pmpcfg_t [CVA6Cfg.NrPMPEntries-1:0] pmpcfg_i,
input logic [CVA6Cfg.NrPMPEntries-1:0][CVA6Cfg.PLEN-3:0] pmpaddr_i,
input pmpcfg_t [avoid_neg(CVA6Cfg.NrPMPEntries-1):0] pmpcfg_i,
input logic [avoid_neg(CVA6Cfg.NrPMPEntries-1):0][CVA6Cfg.PLEN-3:0] pmpaddr_i,
input logic [2:0] fcsr_frm_i,
output logic dirty_v_state_o,
input logic acc_mmu_en_i,
// Interface with the issue stage
input scoreboard_entry_t issue_instr_i,
input logic issue_instr_hs_i,
Expand All @@ -88,6 +65,9 @@ module acc_dispatcher
output logic acc_stall_st_pending_o,
input logic acc_no_st_pending_i,
input dcache_req_i_t [2:0] dcache_req_ports_i,
// Interface with the MMU
output acc_mmu_req_t acc_mmu_req_o,
input acc_mmu_resp_t acc_mmu_resp_i,
// Interface with the controller
output logic ctrl_halt_o,
input logic [11:0] csr_addr_i,
Expand Down Expand Up @@ -219,7 +199,7 @@ module acc_dispatcher
end

// An accelerator instruction was issued.
if (acc_req_o.req_valid) insn_ready_d[acc_req_o.trans_id] = 1'b0;
if (acc_req_o.acc_req.req_valid) insn_ready_d[acc_req_o.acc_req.trans_id] = 1'b0;
end : p_non_speculative_ff

/*************************
Expand All @@ -231,29 +211,31 @@ module acc_dispatcher
logic acc_req_ready;

accelerator_req_t acc_req_int;
fall_through_register #(
spill_register #(
.T(accelerator_req_t)
) i_accelerator_req_register (
.clk_i (clk_i),
.rst_ni (rst_ni),
.clr_i (1'b0),
.testmode_i(1'b0),
.data_i (acc_req),
.valid_i (acc_req_valid),
.ready_o (acc_req_ready),
.data_o (acc_req_int),
.valid_o (acc_req_o.req_valid),
.ready_i (acc_resp_i.req_ready)
.clk_i (clk_i),
.rst_ni (rst_ni),
.data_i (acc_req),
.valid_i(acc_req_valid),
.ready_o(acc_req_ready),
.data_o (acc_req_int),
.valid_o(acc_req_o.acc_req.req_valid),
.ready_i(acc_resp_i.acc_resp.req_ready)
);

assign acc_req_o.insn = acc_req_int.insn;
assign acc_req_o.rs1 = acc_req_int.rs1;
assign acc_req_o.rs2 = acc_req_int.rs2;
assign acc_req_o.frm = acc_req_int.frm;
assign acc_req_o.trans_id = acc_req_int.trans_id;
assign acc_req_o.store_pending = !acc_no_st_pending_i && acc_cons_en_i;
assign acc_req_o.acc_cons_en = acc_cons_en_i;
assign acc_req_o.inval_ready = inval_ready_i;
assign acc_req_o.acc_req.insn = acc_req_int.insn;
assign acc_req_o.acc_req.rs1 = acc_req_int.rs1;
assign acc_req_o.acc_req.rs2 = acc_req_int.rs2;
assign acc_req_o.acc_req.frm = acc_req_int.frm;
assign acc_req_o.acc_req.trans_id = acc_req_int.trans_id;
assign acc_req_o.acc_req.store_pending = !acc_no_st_pending_i && acc_cons_en_i;
assign acc_req_o.acc_req.acc_cons_en = acc_cons_en_i;
assign acc_req_o.acc_req.inval_ready = inval_ready_i;

// MMU interface
assign acc_req_o.acc_mmu_resp = acc_mmu_resp_i;
assign acc_req_o.acc_mmu_en = acc_mmu_en_i;

always_comb begin : accelerator_req_dispatcher
// Do not fetch from the instruction queue
Expand All @@ -263,7 +245,7 @@ module acc_dispatcher
acc_req = '0;
acc_req_valid = 1'b0;

// Unpack fu_data_t into accelerator_req_t
// Unpack fu_data_t into acc_req_t
if (!acc_insn_queue_empty) begin
acc_req = '{
// Instruction is forwarded from the decoder as an immediate
Expand Down Expand Up @@ -297,23 +279,27 @@ module acc_dispatcher
logic acc_ld_disp;
logic acc_st_disp;

assign acc_trans_id_o = acc_resp_i.trans_id;
assign acc_result_o = acc_resp_i.result;
assign acc_valid_o = acc_resp_i.resp_valid;
assign acc_exception_o = acc_resp_i.exception;
assign acc_trans_id_o = acc_resp_i.acc_resp.trans_id;
assign acc_result_o = acc_resp_i.acc_resp.result;
assign acc_valid_o = acc_resp_i.acc_resp.resp_valid;
assign acc_exception_o = acc_resp_i.acc_resp.exception;
// Unpack the accelerator response
assign acc_fflags_valid_o = acc_resp_i.fflags_valid;
assign acc_fflags_o = acc_resp_i.fflags;
assign acc_fflags_valid_o = acc_resp_i.acc_resp.fflags_valid;
assign acc_fflags_o = acc_resp_i.acc_resp.fflags;

// MMU interface
assign acc_mmu_req_o = acc_resp_i.acc_mmu_req;

// Always ready to receive responses
assign acc_req_o.resp_ready = 1'b1;
assign acc_req_o.acc_req.resp_ready = 1'b1;

// Signal dispatched load/store to issue stage
assign acc_ld_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD);
assign acc_st_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE);
assign acc_ld_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_LOAD);
assign acc_st_disp = acc_req_valid && (acc_insn_queue_o.operation == ACCEL_OP_STORE);

// Cache invalidation
assign inval_valid_o = acc_resp_i.inval_valid;
assign inval_addr_o = acc_resp_i.inval_addr;
assign inval_valid_o = acc_resp_i.acc_resp.inval_valid;
assign inval_addr_o = acc_resp_i.acc_resp.inval_addr;

/**************************
* Accelerator commit *
Expand Down Expand Up @@ -351,8 +337,8 @@ module acc_dispatcher
`FF(wait_acc_store_q, wait_acc_store_d, '0)

// Set on store barrier. Clear when no store is pending.
assign wait_acc_store_d = (wait_acc_store_q | commit_st_barrier_i) & acc_resp_i.store_pending;
assign ctrl_halt_o = wait_acc_store_q;
assign wait_acc_store_d = (wait_acc_store_q | commit_st_barrier_i) & acc_resp_i.acc_resp.store_pending;
assign ctrl_halt_o = wait_acc_store_q;

/**************************
* Load/Store tracking *
Expand Down Expand Up @@ -390,9 +376,9 @@ module acc_dispatcher
.clk_i (clk_i),
.rst_ni (rst_ni),
.clear_i (1'b0),
.en_i (acc_ld_disp ^ acc_resp_i.load_complete),
.en_i (acc_ld_disp ^ acc_resp_i.acc_resp.load_complete),
.load_i (1'b0),
.down_i (acc_resp_i.load_complete),
.down_i (acc_resp_i.acc_resp.load_complete),
.d_i ('0),
.q_o (acc_disp_loads_pending),
.overflow_o(acc_disp_loads_overflow)
Expand Down Expand Up @@ -435,9 +421,9 @@ module acc_dispatcher
.clk_i (clk_i),
.rst_ni (rst_ni),
.clear_i (1'b0),
.en_i (acc_st_disp ^ acc_resp_i.store_complete),
.en_i (acc_st_disp ^ acc_resp_i.acc_resp.store_complete),
.load_i (1'b0),
.down_i (acc_resp_i.store_complete),
.down_i (acc_resp_i.acc_resp.store_complete),
.d_i ('0),
.q_o (acc_disp_stores_pending),
.overflow_o(acc_disp_stores_overflow)
Expand Down
6 changes: 3 additions & 3 deletions core/cache_subsystem/cva6_icache.sv
Original file line number Diff line number Diff line change
Expand Up @@ -426,7 +426,7 @@ module cva6_icache
for (genvar i = 0; i < CVA6Cfg.ICACHE_SET_ASSOC; i++) begin : gen_tag_cmpsel
assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i];
assign cl_sel[i] = cl_rdata[i][{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_WIDTH];
Comment on lines 427 to 428
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

[verible-verilog-format] reported by reviewdog 🐶

Suggested change
assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i];
assign cl_sel[i] = cl_rdata[i][{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_WIDTH];
assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i];
assign cl_sel[i] = cl_rdata[i][{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_WIDTH];

assign cl_user[i] = cl_ruser[i][{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_USER_WIDTH];
assign cl_user[i] = CVA6Cfg.FETCH_USER_EN ? cl_ruser[i][{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_USER_WIDTH] : '0;
end


Expand All @@ -441,10 +441,10 @@ module cva6_icache
always_comb begin
if (cmp_en_q) begin
dreq_o.data = cl_sel[hit_idx];
dreq_o.user = cl_user[hit_idx];
dreq_o.user = CVA6Cfg.FETCH_USER_EN ? cl_user[hit_idx] : '0;
end else begin
dreq_o.data = mem_rtrn_i.data[{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_WIDTH];
dreq_o.user = mem_rtrn_i.user[{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_USER_WIDTH];
dreq_o.user = CVA6Cfg.FETCH_USER_EN ? mem_rtrn_i.user[{cl_offset_q, 3'b0}+:CVA6Cfg.FETCH_USER_WIDTH] : '0;
end
end

Expand Down
8 changes: 5 additions & 3 deletions core/csr_regfile.sv
Original file line number Diff line number Diff line change
Expand Up @@ -161,9 +161,9 @@ module csr_regfile
// TO_BE_COMPLETED - PERF_COUNTERS
output logic perf_we_o,
// PMP configuration containing pmpcfg for max 64 PMPs - ACC_DISPATCHER
output riscv::pmpcfg_t [(CVA6Cfg.NrPMPEntries > 0 ? CVA6Cfg.NrPMPEntries-1 : 0):0] pmpcfg_o,
output riscv::pmpcfg_t [avoid_neg(CVA6Cfg.NrPMPEntries-1):0] pmpcfg_o,
// PMP addresses - ACC_DISPATCHER
output logic [(CVA6Cfg.NrPMPEntries > 0 ? CVA6Cfg.NrPMPEntries-1 : 0):0][CVA6Cfg.PLEN-3:0] pmpaddr_o,
output logic [avoid_neg(CVA6Cfg.NrPMPEntries-1):0][CVA6Cfg.PLEN-3:0] pmpaddr_o,
// TO_BE_COMPLETED - PERF_COUNTERS
output logic [31:0] mcountinhibit_o,
// RVFI
Expand Down Expand Up @@ -773,11 +773,13 @@ module csr_regfile
riscv::CSR_PMPCFG14,
riscv::CSR_PMPCFG15: begin
// index is calculated using PMPCFG0 as the offset
automatic logic [11:0] index = csr_addr.address[11:0] - riscv::CSR_PMPCFG0;
automatic logic [3:0] index = csr_addr.address[11:0] - riscv::CSR_PMPCFG0;

// if index is not even and XLEN==64, raise exception
if (CVA6Cfg.XLEN == 64 && index[0] == 1'b1) read_access_exception = 1'b1;
else begin
// The following line has no effect. It's here just to prevent the synthesizer from crashing
if (CVA6Cfg.XLEN == 64) index = (index >> 1) << 1;
csr_rdata = pmpcfg_q[index*4+:CVA6Cfg.XLEN/8];
end
end
Expand Down
Loading
Loading