1
0
mirror of https://github.com/corundum/corundum.git synced 2025-01-30 08:32:52 +08:00

Add completion buffer management logic to DMA interface modu

Signed-off-by: Alex Forencich <alex@alexforencich.com>
This commit is contained in:
Alex Forencich 2023-05-30 18:37:44 -07:00
parent 972ec36ce8
commit 3d2feb36dc
4 changed files with 104 additions and 4 deletions

View File

@ -75,6 +75,10 @@ module dma_if_pcie #
parameter READ_OP_TABLE_SIZE = PCIE_TAG_COUNT,
// In-flight transmit limit (read)
parameter READ_TX_LIMIT = 2**TX_SEQ_NUM_WIDTH,
// Completion header flow control credit limit (read)
parameter READ_CPLH_FC_LIMIT = 0,
// Completion data flow control credit limit (read)
parameter READ_CPLD_FC_LIMIT = READ_CPLH_FC_LIMIT*4,
// Operation table size (write)
parameter WRITE_OP_TABLE_SIZE = 2**TX_SEQ_NUM_WIDTH,
// In-flight transmit limit (write)
@ -191,6 +195,7 @@ module dma_if_pcie #
input wire read_enable,
input wire write_enable,
input wire ext_tag_enable,
input wire rcb_128b,
input wire [15:0] requester_id,
input wire [2:0] max_read_request_size,
input wire [2:0] max_payload_size,
@ -259,6 +264,8 @@ dma_if_pcie_rd #(
.TAG_WIDTH(TAG_WIDTH),
.OP_TABLE_SIZE(READ_OP_TABLE_SIZE),
.TX_LIMIT(READ_TX_LIMIT),
.CPLH_FC_LIMIT(READ_CPLH_FC_LIMIT),
.CPLD_FC_LIMIT(READ_CPLD_FC_LIMIT),
.TLP_FORCE_64_BIT_ADDR(TLP_FORCE_64_BIT_ADDR),
.CHECK_BUS_NUMBER(CHECK_BUS_NUMBER)
)
@ -327,6 +334,7 @@ dma_if_pcie_rd_inst (
*/
.enable(read_enable),
.ext_tag_enable(ext_tag_enable),
.rcb_128b(rcb_128b),
.requester_id(requester_id),
.max_read_request_size(max_read_request_size),

View File

@ -69,6 +69,10 @@ module dma_if_pcie_rd #
parameter OP_TABLE_SIZE = PCIE_TAG_COUNT,
// In-flight transmit limit
parameter TX_LIMIT = 2**TX_SEQ_NUM_WIDTH,
// Completion header flow control credit limit
parameter CPLH_FC_LIMIT = 0,
// Completion data flow control credit limit
parameter CPLD_FC_LIMIT = CPLH_FC_LIMIT*4,
// Force 64 bit address
parameter TLP_FORCE_64_BIT_ADDR = 0,
// Requester ID mash
@ -139,6 +143,7 @@ module dma_if_pcie_rd #
*/
input wire enable,
input wire ext_tag_enable,
input wire rcb_128b,
input wire [15:0] requester_id,
input wire [2:0] max_read_request_size,
@ -192,6 +197,9 @@ parameter OP_TABLE_READ_COUNT_WIDTH = PCIE_TAG_WIDTH+1;
parameter TX_COUNT_WIDTH = $clog2(TX_LIMIT+1);
parameter CL_CPLH_FC_LIMIT = $clog2(CPLH_FC_LIMIT);
parameter CL_CPLD_FC_LIMIT = $clog2(CPLD_FC_LIMIT);
parameter STATUS_FIFO_ADDR_WIDTH = 5;
parameter OUTPUT_FIFO_ADDR_WIDTH = 5;
@ -302,6 +310,8 @@ reg [3:0] req_first_be;
reg [3:0] req_last_be;
reg [12:0] req_tlp_count;
reg [10:0] req_dword_count;
reg [6:0] req_cplh_fc_count;
reg [8:0] req_cpld_fc_count;
reg req_last_tlp;
reg [PCIE_ADDR_WIDTH-1:0] req_pcie_addr;
@ -371,6 +381,7 @@ reg [6:0] rx_cpl_tlp_hdr_lower_addr;
reg [127:0] tlp_hdr;
reg [10:0] max_read_request_size_dw_reg = 11'd0;
reg rcb_128b_reg = 1'b0;
reg [STATUS_FIFO_ADDR_WIDTH+1-1:0] status_fifo_wr_ptr_reg = 0;
reg [STATUS_FIFO_ADDR_WIDTH+1-1:0] status_fifo_rd_ptr_reg = 0;
@ -411,6 +422,16 @@ reg [OP_TAG_WIDTH+1-1:0] active_op_count_reg = 0;
reg inc_active_op;
reg dec_active_op;
reg [CL_CPLH_FC_LIMIT+1-1:0] active_cplh_fc_count_reg = 0;
reg active_cplh_fc_av_reg = 1'b1;
reg [6:0] inc_active_cplh_fc_count;
reg [6:0] dec_active_cplh_fc_count;
reg [CL_CPLD_FC_LIMIT+1-1:0] active_cpld_fc_count_reg = 0;
reg active_cpld_fc_av_reg = 1'b1;
reg [8:0] inc_active_cpld_fc_count;
reg [8:0] dec_active_cpld_fc_count;
reg rx_cpl_tlp_ready_reg = 1'b0, rx_cpl_tlp_ready_next;
reg [TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] tx_rd_req_tlp_hdr_reg = 0, tx_rd_req_tlp_hdr_next;
@ -496,6 +517,8 @@ reg [PCIE_TAG_WIDTH-1:0] pcie_tag_table_start_ptr_reg = 0, pcie_tag_table_start_
reg [RAM_SEL_WIDTH-1:0] pcie_tag_table_start_ram_sel_reg = 0, pcie_tag_table_start_ram_sel_next;
reg [RAM_ADDR_WIDTH-1:0] pcie_tag_table_start_ram_addr_reg = 0, pcie_tag_table_start_ram_addr_next;
reg [OP_TAG_WIDTH-1:0] pcie_tag_table_start_op_tag_reg = 0, pcie_tag_table_start_op_tag_next;
reg [6:0] pcie_tag_table_start_cplh_fc_reg = 0, pcie_tag_table_start_cplh_fc_next;
reg [8:0] pcie_tag_table_start_cpld_fc_reg = 0, pcie_tag_table_start_cpld_fc_next;
reg pcie_tag_table_start_zero_len_reg = 1'b0, pcie_tag_table_start_zero_len_next;
reg pcie_tag_table_start_en_reg = 1'b0, pcie_tag_table_start_en_next;
reg [PCIE_TAG_WIDTH-1:0] pcie_tag_table_finish_ptr;
@ -508,6 +531,10 @@ reg [RAM_ADDR_WIDTH-1:0] pcie_tag_table_ram_addr[(2**PCIE_TAG_WIDTH)-1:0];
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
reg [OP_TAG_WIDTH-1:0] pcie_tag_table_op_tag[(2**PCIE_TAG_WIDTH)-1:0];
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
reg [6:0] pcie_tag_table_cplh_fc[(2**PCIE_TAG_WIDTH)-1:0];
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
reg [8:0] pcie_tag_table_cpld_fc[(2**PCIE_TAG_WIDTH)-1:0];
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
reg pcie_tag_table_zero_len[(2**PCIE_TAG_WIDTH)-1:0];
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
reg pcie_tag_table_active_a[(2**PCIE_TAG_WIDTH)-1:0];
@ -621,6 +648,8 @@ always @* begin
inc_active_tx = 1'b0;
inc_active_tag = 1'b0;
inc_active_op = 1'b0;
inc_active_cplh_fc_count = 0;
inc_active_cpld_fc_count = 0;
op_table_start_ptr = req_op_tag_reg;
op_table_start_tag = s_axis_read_desc_tag;
@ -637,14 +666,25 @@ always @* begin
// crosses 4k boundary, split on 4K boundary
req_tlp_count = 13'h1000 - req_pcie_addr_reg[11:0];
req_dword_count = 11'h400 - req_pcie_addr_reg[11:2];
req_cpld_fc_count = 9'h100 - req_pcie_addr_reg[11:4];
if (rcb_128b_reg) begin
req_cplh_fc_count = 6'h20 - req_pcie_addr_reg[11:7];
end else begin
req_cplh_fc_count = 7'h40 - req_pcie_addr_reg[11:6];
end
req_last_tlp = (((req_pcie_addr_reg & 12'hfff) + (req_op_count_reg & 12'hfff)) & 12'hfff) == 0 && req_op_count_reg >> 12 == 0;
// optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count
req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]+1;
req_pcie_addr[11:0] = 12'd0;
end else begin
// does not cross 4k boundary, send one TLP
req_tlp_count = req_op_count_reg;
req_dword_count = (req_op_count_reg + req_pcie_addr_reg[1:0] + 3) >> 2;
req_cpld_fc_count = (req_op_count_reg + req_pcie_addr_reg[1:0] + 15) >> 4;
if (rcb_128b_reg) begin
req_cplh_fc_count = (req_pcie_addr_reg[6:0]+req_op_count_reg+127) >> 7;
end else begin
req_cplh_fc_count = (req_pcie_addr_reg[5:0]+req_op_count_reg+63) >> 6;
end
req_last_tlp = 1'b1;
// always last TLP, so next address is irrelevant
req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12];
@ -656,25 +696,47 @@ always @* begin
// crosses 4k boundary, split on 4K boundary
req_tlp_count = 13'h1000 - req_pcie_addr_reg[11:0];
req_dword_count = 11'h400 - req_pcie_addr_reg[11:2];
req_cpld_fc_count = 9'h100 - req_pcie_addr_reg[11:4];
if (rcb_128b_reg) begin
req_cplh_fc_count = 6'h20 - req_pcie_addr_reg[11:7];
end else begin
req_cplh_fc_count = 7'h40 - req_pcie_addr_reg[11:6];
end
req_last_tlp = 1'b0;
// optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count
req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]+1;
req_pcie_addr[11:0] = 12'd0;
end else begin
// does not cross 4k boundary, split on 128-byte read completion boundary
req_tlp_count = {max_read_request_size_dw_reg, 2'b00} - req_pcie_addr_reg[6:0];
req_dword_count = max_read_request_size_dw_reg - req_pcie_addr_reg[6:2];
req_cpld_fc_count = max_read_request_size_dw_reg[10:2] - req_pcie_addr_reg[6:4];
if (rcb_128b_reg) begin
req_cplh_fc_count = max_read_request_size_dw_reg[10:5];
end else begin
req_cplh_fc_count = max_read_request_size_dw_reg[10:4] - req_pcie_addr_reg[6];
end
req_last_tlp = 1'b0;
// optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count
req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12];
req_pcie_addr[11:0] = {{req_pcie_addr_reg[11:7], 5'd0} + max_read_request_size_dw_reg, 2'b00};
end
end
// un-optimized TLP size computations (for reference)
// req_dword_count = (req_tlp_count + req_pcie_addr_reg[1:0] + 3) >> 2
// req_cpld_fc_count = (req_dword_count + 3) >> 2
// if (rcb_128b_reg) begin
// req_cplh_fc_count = (req_pcie_addr_reg[6:0]+req_tlp_count+127) >> 7
// end lse begin
// req_cplh_fc_count = (req_pcie_addr_reg[5:0]+req_tlp_count+63) >> 6
// end
// req_pcie_addr = req_pcie_addr_reg + req_tlp_count
pcie_tag_table_start_ptr_next = req_pcie_tag_reg;
pcie_tag_table_start_ram_sel_next = req_ram_sel_reg;
pcie_tag_table_start_ram_addr_next = req_ram_addr_reg + req_tlp_count;
pcie_tag_table_start_op_tag_next = req_op_tag_reg;
pcie_tag_table_start_cplh_fc_next = req_cplh_fc_count;
pcie_tag_table_start_cpld_fc_next = req_cpld_fc_count;
pcie_tag_table_start_zero_len_next = req_zero_len_reg;
pcie_tag_table_start_en_next = 1'b0;
@ -755,7 +817,7 @@ always @* begin
tx_rd_req_tlp_hdr_next = tlp_hdr;
end
if ((!tx_rd_req_tlp_valid_reg || tx_rd_req_tlp_ready) && req_pcie_tag_valid_reg && (!TX_SEQ_NUM_ENABLE || active_tx_count_av_reg)) begin
if ((!tx_rd_req_tlp_valid_reg || tx_rd_req_tlp_ready) && req_pcie_tag_valid_reg && (!TX_SEQ_NUM_ENABLE || active_tx_count_av_reg) && active_cplh_fc_av_reg && active_cpld_fc_av_reg) begin
tx_rd_req_tlp_valid_next = 1'b1;
inc_active_tx = 1'b1;
@ -768,9 +830,13 @@ always @* begin
pcie_tag_table_start_ram_sel_next = req_ram_sel_reg;
pcie_tag_table_start_ram_addr_next = req_ram_addr_reg + req_tlp_count;
pcie_tag_table_start_op_tag_next = req_op_tag_reg;
pcie_tag_table_start_cplh_fc_next = req_cplh_fc_count;
pcie_tag_table_start_cpld_fc_next = req_cpld_fc_count;
pcie_tag_table_start_zero_len_next = req_zero_len_reg;
pcie_tag_table_start_en_next = 1'b1;
inc_active_tag = 1'b1;
inc_active_cplh_fc_count = req_cplh_fc_count;
inc_active_cpld_fc_count = req_cpld_fc_count;
op_table_read_start_ptr = req_op_tag_reg;
op_table_read_start_commit = req_last_tlp;
@ -868,6 +934,8 @@ always @* begin
dec_active_tag = 1'b0;
dec_active_op = 1'b0;
dec_active_cplh_fc_count = 0;
dec_active_cpld_fc_count = 0;
// Write generation
ram_wr_cmd_sel_pipe_next = {RAM_SEG_COUNT{ram_sel_reg}};
@ -1214,6 +1282,8 @@ always @* begin
pcie_tag_table_finish_ptr = pcie_tag_reg;
pcie_tag_table_finish_en = 1'b1;
dec_active_tag = 1'b1;
dec_active_cplh_fc_count = pcie_tag_table_cplh_fc[pcie_tag_reg];
dec_active_cpld_fc_count = pcie_tag_table_cpld_fc[pcie_tag_reg];
pcie_tag_fifo_wr_tag = pcie_tag_reg;
if (pcie_tag_fifo_wr_tag < PCIE_TAG_COUNT_1 || !PCIE_TAG_COUNT_2) begin
@ -1398,6 +1468,7 @@ always @(posedge clk) begin
stat_rd_tx_stall_reg <= stat_rd_tx_stall_next;
max_read_request_size_dw_reg <= 11'd32 << (max_read_request_size > 5 ? 5 : max_read_request_size);
rcb_128b_reg <= rcb_128b;
if (status_fifo_wr_en) begin
status_fifo_op_tag[status_fifo_wr_ptr_reg[STATUS_FIFO_ADDR_WIDTH-1:0]] <= status_fifo_wr_op_tag;
@ -1430,10 +1501,18 @@ always @(posedge clk) begin
active_tag_count_reg <= active_tag_count_reg + inc_active_tag - dec_active_tag;
active_op_count_reg <= active_op_count_reg + inc_active_op - dec_active_op;
active_cplh_fc_count_reg <= active_cplh_fc_count_reg + inc_active_cplh_fc_count - dec_active_cplh_fc_count;
active_cplh_fc_av_reg <= !CPLH_FC_LIMIT || active_cplh_fc_count_reg < CPLH_FC_LIMIT;
active_cpld_fc_count_reg <= active_cpld_fc_count_reg + inc_active_cpld_fc_count - dec_active_cpld_fc_count;
active_cpld_fc_av_reg <= !CPLD_FC_LIMIT || active_cpld_fc_count_reg < CPLD_FC_LIMIT;
pcie_tag_table_start_ptr_reg <= pcie_tag_table_start_ptr_next;
pcie_tag_table_start_ram_sel_reg <= pcie_tag_table_start_ram_sel_next;
pcie_tag_table_start_ram_addr_reg <= pcie_tag_table_start_ram_addr_next;
pcie_tag_table_start_op_tag_reg <= pcie_tag_table_start_op_tag_next;
pcie_tag_table_start_cplh_fc_reg <= pcie_tag_table_start_cplh_fc_next;
pcie_tag_table_start_cpld_fc_reg <= pcie_tag_table_start_cpld_fc_next;
pcie_tag_table_start_zero_len_reg <= pcie_tag_table_start_zero_len_next;
pcie_tag_table_start_en_reg <= pcie_tag_table_start_en_next;
@ -1443,6 +1522,8 @@ always @(posedge clk) begin
pcie_tag_table_ram_sel[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_ram_sel_reg;
pcie_tag_table_ram_addr[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_ram_addr_reg;
pcie_tag_table_op_tag[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_op_tag_reg;
pcie_tag_table_cplh_fc[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_cplh_fc_reg;
pcie_tag_table_cpld_fc[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_cpld_fc_reg;
pcie_tag_table_zero_len[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_zero_len_reg;
pcie_tag_table_active_a[pcie_tag_table_start_ptr_reg] <= !pcie_tag_table_active_b[pcie_tag_table_start_ptr_reg];
end
@ -1558,6 +1639,12 @@ always @(posedge clk) begin
active_tag_count_reg <= 0;
active_op_count_reg <= 0;
active_cplh_fc_count_reg <= 0;
active_cplh_fc_av_reg <= 1'b1;
active_cpld_fc_count_reg <= 0;
active_cpld_fc_av_reg <= 1'b1;
pcie_tag_table_start_en_reg <= 1'b0;
pcie_tag_fifo_1_wr_ptr_reg <= 0;

View File

@ -50,6 +50,8 @@ export PARAM_LEN_WIDTH := 20
export PARAM_TAG_WIDTH := 8
export PARAM_OP_TABLE_SIZE := $(PARAM_PCIE_TAG_COUNT)
export PARAM_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_TX_SEQ_NUM_WIDTH)-1) ))" )
export PARAM_CPLH_FC_LIMIT := 512
export PARAM_CPLD_FC_LIMIT := $(shell expr $(PARAM_CPLH_FC_LIMIT) \* 4 )
export PARAM_TLP_FORCE_64_BIT_ADDR := 0
export PARAM_CHECK_BUS_NUMBER := 1

View File

@ -85,6 +85,7 @@ class TB(object):
cfg_max_read_req=dut.max_read_request_size,
cfg_ext_tag_enable=dut.ext_tag_enable,
cfg_rcb=dut.rcb_128b,
)
self.dev.log.setLevel(logging.DEBUG)
@ -330,6 +331,8 @@ def test_dma_if_pcie_rd(request, pcie_data_width, pcie_offset):
parameters['TAG_WIDTH'] = 8
parameters['OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT']
parameters['TX_LIMIT'] = 2**(parameters['TX_SEQ_NUM_WIDTH']-1)
parameters['CPLH_FC_LIMIT'] = 512
parameters['CPLD_FC_LIMIT'] = parameters['CPLH_FC_LIMIT']*4
parameters['TLP_FORCE_64_BIT_ADDR'] = 0
parameters['CHECK_BUS_NUMBER'] = 0