mirror of
https://github.com/corundum/corundum.git
synced 2025-01-30 08:32:52 +08:00
Add completion buffer management logic to DMA interface modu
Signed-off-by: Alex Forencich <alex@alexforencich.com>
This commit is contained in:
parent
972ec36ce8
commit
3d2feb36dc
@ -75,6 +75,10 @@ module dma_if_pcie #
|
||||
parameter READ_OP_TABLE_SIZE = PCIE_TAG_COUNT,
|
||||
// In-flight transmit limit (read)
|
||||
parameter READ_TX_LIMIT = 2**TX_SEQ_NUM_WIDTH,
|
||||
// Completion header flow control credit limit (read)
|
||||
parameter READ_CPLH_FC_LIMIT = 0,
|
||||
// Completion data flow control credit limit (read)
|
||||
parameter READ_CPLD_FC_LIMIT = READ_CPLH_FC_LIMIT*4,
|
||||
// Operation table size (write)
|
||||
parameter WRITE_OP_TABLE_SIZE = 2**TX_SEQ_NUM_WIDTH,
|
||||
// In-flight transmit limit (write)
|
||||
@ -191,6 +195,7 @@ module dma_if_pcie #
|
||||
input wire read_enable,
|
||||
input wire write_enable,
|
||||
input wire ext_tag_enable,
|
||||
input wire rcb_128b,
|
||||
input wire [15:0] requester_id,
|
||||
input wire [2:0] max_read_request_size,
|
||||
input wire [2:0] max_payload_size,
|
||||
@ -259,6 +264,8 @@ dma_if_pcie_rd #(
|
||||
.TAG_WIDTH(TAG_WIDTH),
|
||||
.OP_TABLE_SIZE(READ_OP_TABLE_SIZE),
|
||||
.TX_LIMIT(READ_TX_LIMIT),
|
||||
.CPLH_FC_LIMIT(READ_CPLH_FC_LIMIT),
|
||||
.CPLD_FC_LIMIT(READ_CPLD_FC_LIMIT),
|
||||
.TLP_FORCE_64_BIT_ADDR(TLP_FORCE_64_BIT_ADDR),
|
||||
.CHECK_BUS_NUMBER(CHECK_BUS_NUMBER)
|
||||
)
|
||||
@ -327,6 +334,7 @@ dma_if_pcie_rd_inst (
|
||||
*/
|
||||
.enable(read_enable),
|
||||
.ext_tag_enable(ext_tag_enable),
|
||||
.rcb_128b(rcb_128b),
|
||||
.requester_id(requester_id),
|
||||
.max_read_request_size(max_read_request_size),
|
||||
|
||||
|
@ -69,6 +69,10 @@ module dma_if_pcie_rd #
|
||||
parameter OP_TABLE_SIZE = PCIE_TAG_COUNT,
|
||||
// In-flight transmit limit
|
||||
parameter TX_LIMIT = 2**TX_SEQ_NUM_WIDTH,
|
||||
// Completion header flow control credit limit
|
||||
parameter CPLH_FC_LIMIT = 0,
|
||||
// Completion data flow control credit limit
|
||||
parameter CPLD_FC_LIMIT = CPLH_FC_LIMIT*4,
|
||||
// Force 64 bit address
|
||||
parameter TLP_FORCE_64_BIT_ADDR = 0,
|
||||
// Requester ID mash
|
||||
@ -139,6 +143,7 @@ module dma_if_pcie_rd #
|
||||
*/
|
||||
input wire enable,
|
||||
input wire ext_tag_enable,
|
||||
input wire rcb_128b,
|
||||
input wire [15:0] requester_id,
|
||||
input wire [2:0] max_read_request_size,
|
||||
|
||||
@ -192,6 +197,9 @@ parameter OP_TABLE_READ_COUNT_WIDTH = PCIE_TAG_WIDTH+1;
|
||||
|
||||
parameter TX_COUNT_WIDTH = $clog2(TX_LIMIT+1);
|
||||
|
||||
parameter CL_CPLH_FC_LIMIT = $clog2(CPLH_FC_LIMIT);
|
||||
parameter CL_CPLD_FC_LIMIT = $clog2(CPLD_FC_LIMIT);
|
||||
|
||||
parameter STATUS_FIFO_ADDR_WIDTH = 5;
|
||||
parameter OUTPUT_FIFO_ADDR_WIDTH = 5;
|
||||
|
||||
@ -302,6 +310,8 @@ reg [3:0] req_first_be;
|
||||
reg [3:0] req_last_be;
|
||||
reg [12:0] req_tlp_count;
|
||||
reg [10:0] req_dword_count;
|
||||
reg [6:0] req_cplh_fc_count;
|
||||
reg [8:0] req_cpld_fc_count;
|
||||
reg req_last_tlp;
|
||||
reg [PCIE_ADDR_WIDTH-1:0] req_pcie_addr;
|
||||
|
||||
@ -371,6 +381,7 @@ reg [6:0] rx_cpl_tlp_hdr_lower_addr;
|
||||
reg [127:0] tlp_hdr;
|
||||
|
||||
reg [10:0] max_read_request_size_dw_reg = 11'd0;
|
||||
reg rcb_128b_reg = 1'b0;
|
||||
|
||||
reg [STATUS_FIFO_ADDR_WIDTH+1-1:0] status_fifo_wr_ptr_reg = 0;
|
||||
reg [STATUS_FIFO_ADDR_WIDTH+1-1:0] status_fifo_rd_ptr_reg = 0;
|
||||
@ -411,6 +422,16 @@ reg [OP_TAG_WIDTH+1-1:0] active_op_count_reg = 0;
|
||||
reg inc_active_op;
|
||||
reg dec_active_op;
|
||||
|
||||
reg [CL_CPLH_FC_LIMIT+1-1:0] active_cplh_fc_count_reg = 0;
|
||||
reg active_cplh_fc_av_reg = 1'b1;
|
||||
reg [6:0] inc_active_cplh_fc_count;
|
||||
reg [6:0] dec_active_cplh_fc_count;
|
||||
|
||||
reg [CL_CPLD_FC_LIMIT+1-1:0] active_cpld_fc_count_reg = 0;
|
||||
reg active_cpld_fc_av_reg = 1'b1;
|
||||
reg [8:0] inc_active_cpld_fc_count;
|
||||
reg [8:0] dec_active_cpld_fc_count;
|
||||
|
||||
reg rx_cpl_tlp_ready_reg = 1'b0, rx_cpl_tlp_ready_next;
|
||||
|
||||
reg [TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] tx_rd_req_tlp_hdr_reg = 0, tx_rd_req_tlp_hdr_next;
|
||||
@ -496,6 +517,8 @@ reg [PCIE_TAG_WIDTH-1:0] pcie_tag_table_start_ptr_reg = 0, pcie_tag_table_start_
|
||||
reg [RAM_SEL_WIDTH-1:0] pcie_tag_table_start_ram_sel_reg = 0, pcie_tag_table_start_ram_sel_next;
|
||||
reg [RAM_ADDR_WIDTH-1:0] pcie_tag_table_start_ram_addr_reg = 0, pcie_tag_table_start_ram_addr_next;
|
||||
reg [OP_TAG_WIDTH-1:0] pcie_tag_table_start_op_tag_reg = 0, pcie_tag_table_start_op_tag_next;
|
||||
reg [6:0] pcie_tag_table_start_cplh_fc_reg = 0, pcie_tag_table_start_cplh_fc_next;
|
||||
reg [8:0] pcie_tag_table_start_cpld_fc_reg = 0, pcie_tag_table_start_cpld_fc_next;
|
||||
reg pcie_tag_table_start_zero_len_reg = 1'b0, pcie_tag_table_start_zero_len_next;
|
||||
reg pcie_tag_table_start_en_reg = 1'b0, pcie_tag_table_start_en_next;
|
||||
reg [PCIE_TAG_WIDTH-1:0] pcie_tag_table_finish_ptr;
|
||||
@ -508,6 +531,10 @@ reg [RAM_ADDR_WIDTH-1:0] pcie_tag_table_ram_addr[(2**PCIE_TAG_WIDTH)-1:0];
|
||||
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
|
||||
reg [OP_TAG_WIDTH-1:0] pcie_tag_table_op_tag[(2**PCIE_TAG_WIDTH)-1:0];
|
||||
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
|
||||
reg [6:0] pcie_tag_table_cplh_fc[(2**PCIE_TAG_WIDTH)-1:0];
|
||||
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
|
||||
reg [8:0] pcie_tag_table_cpld_fc[(2**PCIE_TAG_WIDTH)-1:0];
|
||||
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
|
||||
reg pcie_tag_table_zero_len[(2**PCIE_TAG_WIDTH)-1:0];
|
||||
(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *)
|
||||
reg pcie_tag_table_active_a[(2**PCIE_TAG_WIDTH)-1:0];
|
||||
@ -621,6 +648,8 @@ always @* begin
|
||||
inc_active_tx = 1'b0;
|
||||
inc_active_tag = 1'b0;
|
||||
inc_active_op = 1'b0;
|
||||
inc_active_cplh_fc_count = 0;
|
||||
inc_active_cpld_fc_count = 0;
|
||||
|
||||
op_table_start_ptr = req_op_tag_reg;
|
||||
op_table_start_tag = s_axis_read_desc_tag;
|
||||
@ -637,14 +666,25 @@ always @* begin
|
||||
// crosses 4k boundary, split on 4K boundary
|
||||
req_tlp_count = 13'h1000 - req_pcie_addr_reg[11:0];
|
||||
req_dword_count = 11'h400 - req_pcie_addr_reg[11:2];
|
||||
req_cpld_fc_count = 9'h100 - req_pcie_addr_reg[11:4];
|
||||
if (rcb_128b_reg) begin
|
||||
req_cplh_fc_count = 6'h20 - req_pcie_addr_reg[11:7];
|
||||
end else begin
|
||||
req_cplh_fc_count = 7'h40 - req_pcie_addr_reg[11:6];
|
||||
end
|
||||
req_last_tlp = (((req_pcie_addr_reg & 12'hfff) + (req_op_count_reg & 12'hfff)) & 12'hfff) == 0 && req_op_count_reg >> 12 == 0;
|
||||
// optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count
|
||||
req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]+1;
|
||||
req_pcie_addr[11:0] = 12'd0;
|
||||
end else begin
|
||||
// does not cross 4k boundary, send one TLP
|
||||
req_tlp_count = req_op_count_reg;
|
||||
req_dword_count = (req_op_count_reg + req_pcie_addr_reg[1:0] + 3) >> 2;
|
||||
req_cpld_fc_count = (req_op_count_reg + req_pcie_addr_reg[1:0] + 15) >> 4;
|
||||
if (rcb_128b_reg) begin
|
||||
req_cplh_fc_count = (req_pcie_addr_reg[6:0]+req_op_count_reg+127) >> 7;
|
||||
end else begin
|
||||
req_cplh_fc_count = (req_pcie_addr_reg[5:0]+req_op_count_reg+63) >> 6;
|
||||
end
|
||||
req_last_tlp = 1'b1;
|
||||
// always last TLP, so next address is irrelevant
|
||||
req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12];
|
||||
@ -656,25 +696,47 @@ always @* begin
|
||||
// crosses 4k boundary, split on 4K boundary
|
||||
req_tlp_count = 13'h1000 - req_pcie_addr_reg[11:0];
|
||||
req_dword_count = 11'h400 - req_pcie_addr_reg[11:2];
|
||||
req_cpld_fc_count = 9'h100 - req_pcie_addr_reg[11:4];
|
||||
if (rcb_128b_reg) begin
|
||||
req_cplh_fc_count = 6'h20 - req_pcie_addr_reg[11:7];
|
||||
end else begin
|
||||
req_cplh_fc_count = 7'h40 - req_pcie_addr_reg[11:6];
|
||||
end
|
||||
req_last_tlp = 1'b0;
|
||||
// optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count
|
||||
req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]+1;
|
||||
req_pcie_addr[11:0] = 12'd0;
|
||||
end else begin
|
||||
// does not cross 4k boundary, split on 128-byte read completion boundary
|
||||
req_tlp_count = {max_read_request_size_dw_reg, 2'b00} - req_pcie_addr_reg[6:0];
|
||||
req_dword_count = max_read_request_size_dw_reg - req_pcie_addr_reg[6:2];
|
||||
req_cpld_fc_count = max_read_request_size_dw_reg[10:2] - req_pcie_addr_reg[6:4];
|
||||
if (rcb_128b_reg) begin
|
||||
req_cplh_fc_count = max_read_request_size_dw_reg[10:5];
|
||||
end else begin
|
||||
req_cplh_fc_count = max_read_request_size_dw_reg[10:4] - req_pcie_addr_reg[6];
|
||||
end
|
||||
req_last_tlp = 1'b0;
|
||||
// optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count
|
||||
req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12];
|
||||
req_pcie_addr[11:0] = {{req_pcie_addr_reg[11:7], 5'd0} + max_read_request_size_dw_reg, 2'b00};
|
||||
end
|
||||
end
|
||||
|
||||
// un-optimized TLP size computations (for reference)
|
||||
// req_dword_count = (req_tlp_count + req_pcie_addr_reg[1:0] + 3) >> 2
|
||||
// req_cpld_fc_count = (req_dword_count + 3) >> 2
|
||||
// if (rcb_128b_reg) begin
|
||||
// req_cplh_fc_count = (req_pcie_addr_reg[6:0]+req_tlp_count+127) >> 7
|
||||
// end lse begin
|
||||
// req_cplh_fc_count = (req_pcie_addr_reg[5:0]+req_tlp_count+63) >> 6
|
||||
// end
|
||||
// req_pcie_addr = req_pcie_addr_reg + req_tlp_count
|
||||
|
||||
pcie_tag_table_start_ptr_next = req_pcie_tag_reg;
|
||||
pcie_tag_table_start_ram_sel_next = req_ram_sel_reg;
|
||||
pcie_tag_table_start_ram_addr_next = req_ram_addr_reg + req_tlp_count;
|
||||
pcie_tag_table_start_op_tag_next = req_op_tag_reg;
|
||||
pcie_tag_table_start_cplh_fc_next = req_cplh_fc_count;
|
||||
pcie_tag_table_start_cpld_fc_next = req_cpld_fc_count;
|
||||
pcie_tag_table_start_zero_len_next = req_zero_len_reg;
|
||||
pcie_tag_table_start_en_next = 1'b0;
|
||||
|
||||
@ -755,7 +817,7 @@ always @* begin
|
||||
tx_rd_req_tlp_hdr_next = tlp_hdr;
|
||||
end
|
||||
|
||||
if ((!tx_rd_req_tlp_valid_reg || tx_rd_req_tlp_ready) && req_pcie_tag_valid_reg && (!TX_SEQ_NUM_ENABLE || active_tx_count_av_reg)) begin
|
||||
if ((!tx_rd_req_tlp_valid_reg || tx_rd_req_tlp_ready) && req_pcie_tag_valid_reg && (!TX_SEQ_NUM_ENABLE || active_tx_count_av_reg) && active_cplh_fc_av_reg && active_cpld_fc_av_reg) begin
|
||||
tx_rd_req_tlp_valid_next = 1'b1;
|
||||
|
||||
inc_active_tx = 1'b1;
|
||||
@ -768,9 +830,13 @@ always @* begin
|
||||
pcie_tag_table_start_ram_sel_next = req_ram_sel_reg;
|
||||
pcie_tag_table_start_ram_addr_next = req_ram_addr_reg + req_tlp_count;
|
||||
pcie_tag_table_start_op_tag_next = req_op_tag_reg;
|
||||
pcie_tag_table_start_cplh_fc_next = req_cplh_fc_count;
|
||||
pcie_tag_table_start_cpld_fc_next = req_cpld_fc_count;
|
||||
pcie_tag_table_start_zero_len_next = req_zero_len_reg;
|
||||
pcie_tag_table_start_en_next = 1'b1;
|
||||
inc_active_tag = 1'b1;
|
||||
inc_active_cplh_fc_count = req_cplh_fc_count;
|
||||
inc_active_cpld_fc_count = req_cpld_fc_count;
|
||||
|
||||
op_table_read_start_ptr = req_op_tag_reg;
|
||||
op_table_read_start_commit = req_last_tlp;
|
||||
@ -868,6 +934,8 @@ always @* begin
|
||||
|
||||
dec_active_tag = 1'b0;
|
||||
dec_active_op = 1'b0;
|
||||
dec_active_cplh_fc_count = 0;
|
||||
dec_active_cpld_fc_count = 0;
|
||||
|
||||
// Write generation
|
||||
ram_wr_cmd_sel_pipe_next = {RAM_SEG_COUNT{ram_sel_reg}};
|
||||
@ -1214,6 +1282,8 @@ always @* begin
|
||||
pcie_tag_table_finish_ptr = pcie_tag_reg;
|
||||
pcie_tag_table_finish_en = 1'b1;
|
||||
dec_active_tag = 1'b1;
|
||||
dec_active_cplh_fc_count = pcie_tag_table_cplh_fc[pcie_tag_reg];
|
||||
dec_active_cpld_fc_count = pcie_tag_table_cpld_fc[pcie_tag_reg];
|
||||
|
||||
pcie_tag_fifo_wr_tag = pcie_tag_reg;
|
||||
if (pcie_tag_fifo_wr_tag < PCIE_TAG_COUNT_1 || !PCIE_TAG_COUNT_2) begin
|
||||
@ -1398,6 +1468,7 @@ always @(posedge clk) begin
|
||||
stat_rd_tx_stall_reg <= stat_rd_tx_stall_next;
|
||||
|
||||
max_read_request_size_dw_reg <= 11'd32 << (max_read_request_size > 5 ? 5 : max_read_request_size);
|
||||
rcb_128b_reg <= rcb_128b;
|
||||
|
||||
if (status_fifo_wr_en) begin
|
||||
status_fifo_op_tag[status_fifo_wr_ptr_reg[STATUS_FIFO_ADDR_WIDTH-1:0]] <= status_fifo_wr_op_tag;
|
||||
@ -1430,10 +1501,18 @@ always @(posedge clk) begin
|
||||
active_tag_count_reg <= active_tag_count_reg + inc_active_tag - dec_active_tag;
|
||||
active_op_count_reg <= active_op_count_reg + inc_active_op - dec_active_op;
|
||||
|
||||
active_cplh_fc_count_reg <= active_cplh_fc_count_reg + inc_active_cplh_fc_count - dec_active_cplh_fc_count;
|
||||
active_cplh_fc_av_reg <= !CPLH_FC_LIMIT || active_cplh_fc_count_reg < CPLH_FC_LIMIT;
|
||||
|
||||
active_cpld_fc_count_reg <= active_cpld_fc_count_reg + inc_active_cpld_fc_count - dec_active_cpld_fc_count;
|
||||
active_cpld_fc_av_reg <= !CPLD_FC_LIMIT || active_cpld_fc_count_reg < CPLD_FC_LIMIT;
|
||||
|
||||
pcie_tag_table_start_ptr_reg <= pcie_tag_table_start_ptr_next;
|
||||
pcie_tag_table_start_ram_sel_reg <= pcie_tag_table_start_ram_sel_next;
|
||||
pcie_tag_table_start_ram_addr_reg <= pcie_tag_table_start_ram_addr_next;
|
||||
pcie_tag_table_start_op_tag_reg <= pcie_tag_table_start_op_tag_next;
|
||||
pcie_tag_table_start_cplh_fc_reg <= pcie_tag_table_start_cplh_fc_next;
|
||||
pcie_tag_table_start_cpld_fc_reg <= pcie_tag_table_start_cpld_fc_next;
|
||||
pcie_tag_table_start_zero_len_reg <= pcie_tag_table_start_zero_len_next;
|
||||
pcie_tag_table_start_en_reg <= pcie_tag_table_start_en_next;
|
||||
|
||||
@ -1443,6 +1522,8 @@ always @(posedge clk) begin
|
||||
pcie_tag_table_ram_sel[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_ram_sel_reg;
|
||||
pcie_tag_table_ram_addr[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_ram_addr_reg;
|
||||
pcie_tag_table_op_tag[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_op_tag_reg;
|
||||
pcie_tag_table_cplh_fc[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_cplh_fc_reg;
|
||||
pcie_tag_table_cpld_fc[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_cpld_fc_reg;
|
||||
pcie_tag_table_zero_len[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_zero_len_reg;
|
||||
pcie_tag_table_active_a[pcie_tag_table_start_ptr_reg] <= !pcie_tag_table_active_b[pcie_tag_table_start_ptr_reg];
|
||||
end
|
||||
@ -1558,6 +1639,12 @@ always @(posedge clk) begin
|
||||
active_tag_count_reg <= 0;
|
||||
active_op_count_reg <= 0;
|
||||
|
||||
active_cplh_fc_count_reg <= 0;
|
||||
active_cplh_fc_av_reg <= 1'b1;
|
||||
|
||||
active_cpld_fc_count_reg <= 0;
|
||||
active_cpld_fc_av_reg <= 1'b1;
|
||||
|
||||
pcie_tag_table_start_en_reg <= 1'b0;
|
||||
|
||||
pcie_tag_fifo_1_wr_ptr_reg <= 0;
|
||||
|
@ -50,6 +50,8 @@ export PARAM_LEN_WIDTH := 20
|
||||
export PARAM_TAG_WIDTH := 8
|
||||
export PARAM_OP_TABLE_SIZE := $(PARAM_PCIE_TAG_COUNT)
|
||||
export PARAM_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_TX_SEQ_NUM_WIDTH)-1) ))" )
|
||||
export PARAM_CPLH_FC_LIMIT := 512
|
||||
export PARAM_CPLD_FC_LIMIT := $(shell expr $(PARAM_CPLH_FC_LIMIT) \* 4 )
|
||||
export PARAM_TLP_FORCE_64_BIT_ADDR := 0
|
||||
export PARAM_CHECK_BUS_NUMBER := 1
|
||||
|
||||
|
@ -85,6 +85,7 @@ class TB(object):
|
||||
|
||||
cfg_max_read_req=dut.max_read_request_size,
|
||||
cfg_ext_tag_enable=dut.ext_tag_enable,
|
||||
cfg_rcb=dut.rcb_128b,
|
||||
)
|
||||
|
||||
self.dev.log.setLevel(logging.DEBUG)
|
||||
@ -330,6 +331,8 @@ def test_dma_if_pcie_rd(request, pcie_data_width, pcie_offset):
|
||||
parameters['TAG_WIDTH'] = 8
|
||||
parameters['OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT']
|
||||
parameters['TX_LIMIT'] = 2**(parameters['TX_SEQ_NUM_WIDTH']-1)
|
||||
parameters['CPLH_FC_LIMIT'] = 512
|
||||
parameters['CPLD_FC_LIMIT'] = parameters['CPLH_FC_LIMIT']*4
|
||||
parameters['TLP_FORCE_64_BIT_ADDR'] = 0
|
||||
parameters['CHECK_BUS_NUMBER'] = 0
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user