From 32fe17ad91821b52a5fa405e0770eac756a5bf42 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Fri, 25 Mar 2022 23:56:42 -0700 Subject: [PATCH 01/21] Return 0 for unmatched registers --- example/common/rtl/example_core.v | 1 + 1 file changed, 1 insertion(+) diff --git a/example/common/rtl/example_core.v b/example/common/rtl/example_core.v index a67fb6bcc..5d11aacf5 100644 --- a/example/common/rtl/example_core.v +++ b/example/common/rtl/example_core.v @@ -411,6 +411,7 @@ always @* begin axil_ctrl_arready_next = 1'b1; axil_ctrl_rresp_next = 2'b00; axil_ctrl_rvalid_next = 1'b1; + axil_ctrl_rdata_next = 32'd0; case ({s_axil_ctrl_araddr[15:2], 2'b00}) // control From 3f967c673fd37150e8601d121491a04733a7ab57 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Wed, 30 Mar 2022 23:39:34 -0700 Subject: [PATCH 02/21] Read zero length flag on all paths --- rtl/dma_if_pcie_us_wr.v | 2 ++ rtl/dma_if_pcie_wr.v | 1 + rtl/pcie_us_axi_dma_wr.v | 2 ++ 3 files changed, 5 insertions(+) diff --git a/rtl/dma_if_pcie_us_wr.v b/rtl/dma_if_pcie_us_wr.v index 3179cbfd7..5b351900e 100644 --- a/rtl/dma_if_pcie_us_wr.v +++ b/rtl/dma_if_pcie_us_wr.v @@ -1074,6 +1074,7 @@ always @* begin // skip idle state if possible tlp_addr_next = op_table_pcie_addr[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; tlp_len_next = op_table_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; + tlp_zero_len_next = op_table_zero_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; dword_count_next = op_table_dword_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; offset_next = op_table_offset[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; cycle_count_next = op_table_cycle_count[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; @@ -1139,6 +1140,7 @@ always @* begin // skip idle state if possible tlp_addr_next = op_table_pcie_addr[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; tlp_len_next = op_table_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; + tlp_zero_len_next = op_table_zero_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; dword_count_next = op_table_dword_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; offset_next = op_table_offset[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; cycle_count_next = op_table_cycle_count[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; diff --git a/rtl/dma_if_pcie_wr.v b/rtl/dma_if_pcie_wr.v index 55c9779f9..dc0ad81af 100644 --- a/rtl/dma_if_pcie_wr.v +++ b/rtl/dma_if_pcie_wr.v @@ -896,6 +896,7 @@ always @* begin // skip idle state if possible tlp_addr_next = op_table_pcie_addr[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; tlp_len_next = op_table_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; + tlp_zero_len_next = op_table_zero_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; dword_count_next = op_table_dword_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; offset_next = op_table_offset[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; cycle_count_next = op_table_cycle_count[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; diff --git a/rtl/pcie_us_axi_dma_wr.v b/rtl/pcie_us_axi_dma_wr.v index d23c80427..1796b6387 100644 --- a/rtl/pcie_us_axi_dma_wr.v +++ b/rtl/pcie_us_axi_dma_wr.v @@ -864,6 +864,7 @@ always @* begin // skip idle state if possible tlp_addr_next = op_table_pcie_addr[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; tlp_len_next = op_table_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; + tlp_zero_len_next = op_table_zero_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; dword_count_next = op_table_dword_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; offset_next = op_table_offset[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; input_cycle_count_next = op_table_input_cycle_count[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; @@ -990,6 +991,7 @@ always @* begin // skip idle state if possible tlp_addr_next = op_table_pcie_addr[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; tlp_len_next = op_table_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; + tlp_zero_len_next = op_table_zero_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; dword_count_next = op_table_dword_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; offset_next = op_table_offset[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; input_cycle_count_next = op_table_input_cycle_count[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; From 7cae50fa10a675a14f5ea5f8e3a54486baa47159 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Wed, 30 Mar 2022 23:40:02 -0700 Subject: [PATCH 03/21] Support zero-length operations in AXI DMA interface modules --- rtl/dma_if_axi_rd.v | 41 ++++++++++++++++++++++---- rtl/dma_if_axi_wr.v | 28 ++++++++++++++++-- tb/dma_if_axi_rd/test_dma_if_axi_rd.py | 2 +- tb/dma_if_axi_wr/test_dma_if_axi_wr.py | 2 +- 4 files changed, 63 insertions(+), 10 deletions(-) diff --git a/rtl/dma_if_axi_rd.v b/rtl/dma_if_axi_rd.v index bbeb97e2d..8380286da 100644 --- a/rtl/dma_if_axi_rd.v +++ b/rtl/dma_if_axi_rd.v @@ -223,12 +223,14 @@ reg [RAM_SEL_WIDTH-1:0] req_ram_sel_reg = {RAM_SEL_WIDTH{1'b0}}, req_ram_sel_nex reg [RAM_ADDR_WIDTH-1:0] req_ram_addr_reg = {RAM_ADDR_WIDTH{1'b0}}, req_ram_addr_next; reg [LEN_WIDTH-1:0] req_op_count_reg = {LEN_WIDTH{1'b0}}, req_op_count_next; reg [LEN_WIDTH-1:0] req_tr_count_reg = {LEN_WIDTH{1'b0}}, req_tr_count_next; +reg req_zero_len_reg = 1'b0, req_zero_len_next; reg [TAG_WIDTH-1:0] req_tag_reg = {TAG_WIDTH{1'b0}}, req_tag_next; reg [RAM_SEL_WIDTH-1:0] ram_sel_reg = {RAM_SEL_WIDTH{1'b0}}, ram_sel_next; reg [RAM_ADDR_WIDTH-1:0] addr_reg = {RAM_ADDR_WIDTH{1'b0}}, addr_next; reg [RAM_ADDR_WIDTH-1:0] addr_delay_reg = {RAM_ADDR_WIDTH{1'b0}}, addr_delay_next; reg [12:0] op_count_reg = 13'd0, op_count_next; +reg zero_len_reg = 1'b0, zero_len_next; reg [RAM_SEG_COUNT-1:0] ram_mask_reg = {RAM_SEG_COUNT{1'b0}}, ram_mask_next; reg [RAM_SEG_COUNT-1:0] ram_mask_0_reg = {RAM_SEG_COUNT{1'b0}}, ram_mask_0_next; reg [RAM_SEG_COUNT-1:0] ram_mask_1_reg = {RAM_SEG_COUNT{1'b0}}, ram_mask_1_next; @@ -251,6 +253,7 @@ reg [OP_TAG_WIDTH-1:0] status_fifo_wr_op_tag; reg [RAM_SEG_COUNT-1:0] status_fifo_wr_mask; reg status_fifo_wr_finish; reg status_fifo_we; +reg status_fifo_mask_reg = 1'b0, status_fifo_mask_next; reg status_fifo_finish_reg = 1'b0, status_fifo_finish_next; reg status_fifo_we_reg = 1'b0, status_fifo_we_next; reg status_fifo_half_full_reg = 1'b0; @@ -308,6 +311,7 @@ reg [AXI_ADDR_WIDTH-1:0] op_table_start_axi_addr; reg [RAM_SEL_WIDTH-1:0] op_table_start_ram_sel; reg [RAM_ADDR_WIDTH-1:0] op_table_start_ram_addr; reg [11:0] op_table_start_len; +reg op_table_start_zero_len; reg [CYCLE_COUNT_WIDTH-1:0] op_table_start_cycle_count; reg [TAG_WIDTH-1:0] op_table_start_tag; reg op_table_start_last; @@ -329,6 +333,8 @@ reg [RAM_ADDR_WIDTH-1:0] op_table_ram_addr [2**OP_TAG_WIDTH-1:0]; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg [11:0] op_table_len[2**OP_TAG_WIDTH-1:0]; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +reg op_table_zero_len[2**OP_TAG_WIDTH-1:0]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg [CYCLE_COUNT_WIDTH-1:0] op_table_cycle_count[2**OP_TAG_WIDTH-1:0]; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg [TAG_WIDTH-1:0] op_table_tag[2**OP_TAG_WIDTH-1:0]; @@ -345,6 +351,7 @@ initial begin op_table_ram_sel[i] = 0; op_table_ram_addr[i] = 0; op_table_len[i] = 0; + op_table_zero_len[i] = 1'b0; op_table_cycle_count[i] = 0; op_table_tag[i] = 0; op_table_last[i] = 0; @@ -362,6 +369,7 @@ always @* begin req_ram_addr_next = req_ram_addr_reg; req_op_count_next = req_op_count_reg; req_tr_count_next = req_tr_count_reg; + req_zero_len_next = req_zero_len_reg; req_tag_next = req_tag_reg; m_axi_arid_next = m_axi_arid_reg; @@ -373,6 +381,7 @@ always @* begin op_table_start_ram_sel = req_ram_sel_reg; op_table_start_ram_addr = req_ram_addr_reg; op_table_start_len = 0; + op_table_start_zero_len = req_zero_len_reg; op_table_start_tag = req_tag_reg; op_table_start_cycle_count = 0; op_table_start_last = 0; @@ -386,7 +395,14 @@ always @* begin req_axi_addr_next = s_axis_read_desc_axi_addr; req_ram_sel_next = s_axis_read_desc_ram_sel; req_ram_addr_next = s_axis_read_desc_ram_addr; - req_op_count_next = s_axis_read_desc_len; + if (s_axis_read_desc_len == 0) begin + // zero-length operation + req_op_count_next = 1; + req_zero_len_next = 1'b1; + end else begin + req_op_count_next = s_axis_read_desc_len; + req_zero_len_next = 1'b0; + end req_tag_next = s_axis_read_desc_tag; if (req_op_count_next <= AXI_MAX_BURST_SIZE - (req_axi_addr_next & OFFSET_MASK) || AXI_MAX_BURST_SIZE >= 4096) begin @@ -426,6 +442,7 @@ always @* begin op_table_start_ram_sel = req_ram_sel_reg; op_table_start_ram_addr = req_ram_addr_reg; op_table_start_len = req_tr_count_next; + op_table_start_zero_len = req_zero_len_reg; op_table_start_tag = req_tag_reg; op_table_start_cycle_count = (req_tr_count_next + (req_axi_addr_reg & OFFSET_MASK) - 1) >> AXI_BURST_SIZE; op_table_start_last = req_op_count_reg == req_tr_count_next; @@ -478,6 +495,7 @@ always @* begin addr_next = addr_reg; addr_delay_next = addr_delay_reg; op_count_next = op_count_reg; + zero_len_next = zero_len_reg; ram_mask_next = ram_mask_reg; ram_mask_0_next = ram_mask_0_reg; ram_mask_1_next = ram_mask_1_reg; @@ -495,6 +513,7 @@ always @* begin m_axi_rdata_int_next = m_axi_rdata_int_reg; m_axi_rvalid_int_next = 1'b0; + status_fifo_mask_next = 1'b1; status_fifo_finish_next = 1'b0; status_fifo_we_next = 1'b0; @@ -534,6 +553,7 @@ always @* begin ram_sel_next = op_table_ram_sel[op_tag_next]; addr_next = op_table_ram_addr[op_tag_next]; op_count_next = op_table_len[op_tag_next]; + zero_len_next = op_table_zero_len[op_tag_next]; offset_next = op_table_ram_addr[op_tag_next][RAM_OFFSET_WIDTH-1:0]-(op_table_axi_addr[op_tag_next] & OFFSET_MASK); if (m_axi_rready && m_axi_rvalid) begin @@ -560,8 +580,14 @@ always @* begin addr_next = addr_next + cycle_byte_count_next; op_count_next = op_count_next - cycle_byte_count_next; - m_axi_rdata_int_next = m_axi_rdata; - m_axi_rvalid_int_next = 1'b1; + if (zero_len_next) begin + status_fifo_mask_next = 1'b0; + end else begin + m_axi_rdata_int_next = m_axi_rdata; + m_axi_rvalid_int_next = 1'b1; + + status_fifo_mask_next = 1'b1; + end status_fifo_finish_next = 1'b0; status_fifo_we_next = 1'b1; @@ -612,6 +638,7 @@ always @* begin m_axi_rdata_int_next = m_axi_rdata; m_axi_rvalid_int_next = 1'b1; + status_fifo_mask_next = 1'b1; status_fifo_finish_next = 1'b0; status_fifo_we_next = 1'b1; @@ -630,13 +657,13 @@ always @* begin status_fifo_rd_ptr_next = status_fifo_rd_ptr_reg; status_fifo_wr_op_tag = op_tag_reg; - status_fifo_wr_mask = ram_mask_reg; + status_fifo_wr_mask = status_fifo_mask_reg ? ram_mask_reg : 0; status_fifo_wr_finish = status_fifo_finish_reg; status_fifo_we = 1'b0; if (status_fifo_we_reg) begin status_fifo_wr_op_tag = op_tag_reg; - status_fifo_wr_mask = ram_mask_reg; + status_fifo_wr_mask = status_fifo_mask_reg ? ram_mask_reg : 0; status_fifo_wr_finish = status_fifo_finish_reg; status_fifo_we = 1'b1; end @@ -698,12 +725,14 @@ always @(posedge clk) begin req_ram_addr_reg <= req_ram_addr_next; req_op_count_reg <= req_op_count_next; req_tr_count_reg <= req_tr_count_next; + req_zero_len_reg <= req_zero_len_next; req_tag_reg <= req_tag_next; ram_sel_reg <= ram_sel_next; addr_reg <= addr_next; addr_delay_reg <= addr_delay_next; op_count_reg <= op_count_next; + zero_len_reg <= zero_len_next; ram_mask_reg <= ram_mask_next; ram_mask_0_reg <= ram_mask_0_next; ram_mask_1_reg <= ram_mask_1_next; @@ -737,6 +766,7 @@ always @(posedge clk) begin end status_fifo_rd_ptr_reg <= status_fifo_rd_ptr_next; + status_fifo_mask_reg <= status_fifo_mask_next; status_fifo_finish_reg <= status_fifo_finish_next; status_fifo_we_reg <= status_fifo_we_next; @@ -754,6 +784,7 @@ always @(posedge clk) begin op_table_ram_sel[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_ram_sel; op_table_ram_addr[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_ram_addr; op_table_len[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_len; + op_table_zero_len[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_zero_len; op_table_cycle_count[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_cycle_count; op_table_tag[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_tag; op_table_last[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_last; diff --git a/rtl/dma_if_axi_wr.v b/rtl/dma_if_axi_wr.v index 1f2de6679..abdb784a6 100644 --- a/rtl/dma_if_axi_wr.v +++ b/rtl/dma_if_axi_wr.v @@ -235,6 +235,7 @@ reg [AXI_ADDR_WIDTH-1:0] req_axi_addr_reg = {AXI_ADDR_WIDTH{1'b0}}, req_axi_addr reg [RAM_SEL_WIDTH-1:0] ram_sel_reg = {RAM_SEL_WIDTH{1'b0}}, ram_sel_next; reg [RAM_ADDR_WIDTH-1:0] ram_addr_reg = {RAM_ADDR_WIDTH{1'b0}}, ram_addr_next; reg [LEN_WIDTH-1:0] op_count_reg = {LEN_WIDTH{1'b0}}, op_count_next; +reg zero_len_reg = 1'b0, zero_len_next; reg [LEN_WIDTH-1:0] tr_count_reg = {LEN_WIDTH{1'b0}}, tr_count_next; reg [12:0] tr_word_count_reg = 13'd0, tr_word_count_next; reg [TAG_WIDTH-1:0] tag_reg = {TAG_WIDTH{1'b0}}, tag_next; @@ -255,6 +256,7 @@ reg [RAM_OFFSET_WIDTH-1:0] end_offset_reg = {RAM_OFFSET_WIDTH{1'b0}}, end_offset reg [AXI_ADDR_WIDTH-1:0] axi_addr_reg = {AXI_ADDR_WIDTH{1'b0}}, axi_addr_next; reg [12:0] axi_len_reg = 13'd0, axi_len_next; +reg axi_zero_len_reg = 1'b0, axi_zero_len_next; reg [RAM_OFFSET_WIDTH-1:0] offset_reg = {RAM_OFFSET_WIDTH{1'b0}}, offset_next; reg [AXI_STRB_WIDTH-1:0] strb_offset_mask_reg = {AXI_STRB_WIDTH{1'b1}}, strb_offset_mask_next; reg [OFFSET_WIDTH-1:0] last_cycle_offset_reg = {OFFSET_WIDTH{1'b0}}, last_cycle_offset_next; @@ -330,6 +332,7 @@ assign ram_rd_resp_ready = ram_rd_resp_ready_cmb; reg [OP_TAG_WIDTH+1-1:0] op_table_start_ptr_reg = 0; reg [AXI_ADDR_WIDTH-1:0] op_table_start_axi_addr; reg [11:0] op_table_start_len; +reg op_table_start_zero_len; reg [CYCLE_COUNT_WIDTH-1:0] op_table_start_cycle_count; reg [RAM_OFFSET_WIDTH-1:0] op_table_start_offset; reg [TAG_WIDTH-1:0] op_table_start_tag; @@ -351,6 +354,8 @@ reg [AXI_ADDR_WIDTH-1:0] op_table_axi_addr[2**OP_TAG_WIDTH-1:0]; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg [11:0] op_table_len[2**OP_TAG_WIDTH-1:0]; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +reg op_table_zero_len[2**OP_TAG_WIDTH-1:0]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg [CYCLE_COUNT_WIDTH-1:0] op_table_cycle_count[2**OP_TAG_WIDTH-1:0]; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg [RAM_OFFSET_WIDTH-1:0] op_table_offset[2**OP_TAG_WIDTH-1:0]; @@ -365,6 +370,7 @@ initial begin for (i = 0; i < 2**OP_TAG_WIDTH; i = i + 1) begin op_table_axi_addr[i] = 0; op_table_len[i] = 0; + op_table_zero_len[i] = 1'b0; op_table_cycle_count[i] = 0; op_table_offset[i] = 0; op_table_tag[i] = 0; @@ -382,6 +388,7 @@ always @* begin ram_sel_next = ram_sel_reg; ram_addr_next = ram_addr_reg; op_count_next = op_count_reg; + zero_len_next = zero_len_reg; tr_count_next = tr_count_reg; tr_word_count_next = tr_word_count_reg; @@ -395,6 +402,7 @@ always @* begin op_table_start_axi_addr = req_axi_addr_reg; op_table_start_len = 0; + op_table_start_zero_len = zero_len_reg; op_table_start_cycle_count = 0; op_table_start_offset = (req_axi_addr_reg & OFFSET_MASK)-ram_addr_reg[RAM_OFFSET_WIDTH-1:0]; op_table_start_tag = tag_reg; @@ -410,7 +418,14 @@ always @* begin req_axi_addr_next = s_axis_write_desc_axi_addr; ram_sel_next = s_axis_write_desc_ram_sel; ram_addr_next = s_axis_write_desc_ram_addr; - op_count_next = s_axis_write_desc_len; + if (s_axis_write_desc_len == 0) begin + // zero-length operation + op_count_next = 1; + zero_len_next = 1'b1; + end else begin + op_count_next = s_axis_write_desc_len; + zero_len_next = 1'b0; + end tag_next = s_axis_write_desc_tag; if (op_count_next <= AXI_MAX_BURST_SIZE - (req_axi_addr_next & OFFSET_MASK) || AXI_MAX_BURST_SIZE >= 4096) begin @@ -458,6 +473,7 @@ always @* begin op_table_start_axi_addr = req_axi_addr_reg; op_table_start_len = tr_word_count_next; + op_table_start_zero_len = zero_len_reg; op_table_start_offset = (req_axi_addr_reg & OFFSET_MASK)-ram_addr_reg[RAM_OFFSET_WIDTH-1:0]; op_table_start_tag = tag_reg; op_table_start_last = op_count_reg == tr_word_count_next; @@ -656,6 +672,7 @@ always @* begin axi_addr_next = axi_addr_reg; axi_len_next = axi_len_reg; + axi_zero_len_next = axi_zero_len_reg; offset_next = offset_reg; strb_offset_mask_next = strb_offset_mask_reg; last_cycle_offset_next = last_cycle_offset_reg; @@ -691,8 +708,9 @@ always @* begin axi_addr_next = op_table_axi_addr[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; axi_len_next = op_table_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; + axi_zero_len_next = op_table_zero_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; offset_next = op_table_offset[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; - strb_offset_mask_next = {AXI_STRB_WIDTH{1'b1}} << (axi_addr_next & OFFSET_MASK); + strb_offset_mask_next = axi_zero_len_next ? {AXI_STRB_WIDTH{1'b0}} : ({AXI_STRB_WIDTH{1'b1}} << (axi_addr_next & OFFSET_MASK)); last_cycle_offset_next = axi_addr_next + (axi_len_next & OFFSET_MASK); cycle_count_next = op_table_cycle_count[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; last_cycle_next = op_table_cycle_count[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]] == 0; @@ -739,8 +757,9 @@ always @* begin // skip idle state if possible axi_addr_next = op_table_axi_addr[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; axi_len_next = op_table_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; + axi_zero_len_next = op_table_zero_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; offset_next = op_table_offset[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; - strb_offset_mask_next = {AXI_STRB_WIDTH{1'b1}} << (axi_addr_next & OFFSET_MASK); + strb_offset_mask_next = axi_zero_len_next ? {AXI_STRB_WIDTH{1'b0}} : ({AXI_STRB_WIDTH{1'b1}} << (axi_addr_next & OFFSET_MASK)); last_cycle_offset_next = axi_addr_next + (axi_len_next & OFFSET_MASK); cycle_count_next = op_table_cycle_count[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; last_cycle_next = op_table_cycle_count[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]] == 0; @@ -824,6 +843,7 @@ always @(posedge clk) begin ram_sel_reg <= ram_sel_next; ram_addr_reg <= ram_addr_next; op_count_reg <= op_count_next; + zero_len_reg <= zero_len_next; tr_count_reg <= tr_count_next; tr_word_count_reg <= tr_word_count_next; tag_reg <= tag_next; @@ -844,6 +864,7 @@ always @(posedge clk) begin axi_addr_reg <= axi_addr_next; axi_len_reg <= axi_len_next; + axi_zero_len_reg <= axi_zero_len_next; offset_reg <= offset_next; strb_offset_mask_reg <= strb_offset_mask_next; last_cycle_offset_reg <= last_cycle_offset_next; @@ -888,6 +909,7 @@ always @(posedge clk) begin op_table_write_complete[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= 1'b0; op_table_axi_addr[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_axi_addr; op_table_len[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_len; + op_table_zero_len[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_zero_len; op_table_cycle_count[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_cycle_count; op_table_offset[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_offset; op_table_tag[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_tag; diff --git a/tb/dma_if_axi_rd/test_dma_if_axi_rd.py b/tb/dma_if_axi_rd/test_dma_if_axi_rd.py index 734914c6f..559bba5a3 100644 --- a/tb/dma_if_axi_rd/test_dma_if_axi_rd.py +++ b/tb/dma_if_axi_rd/test_dma_if_axi_rd.py @@ -117,7 +117,7 @@ async def run_test_read(dut, idle_inserter=None, backpressure_inserter=None): tb.dut.enable.value = 1 - for length in list(range(1, ram_byte_lanes+3))+list(range(128-4, 128+4))+[1024]: + for length in list(range(0, ram_byte_lanes+3))+list(range(128-4, 128+4))+[1024]: for axi_offset in list(range(axi_byte_lanes+1))+list(range(4096-axi_byte_lanes, 4096)): for ram_offset in range(ram_byte_lanes+1): tb.log.info("length %d, axi_offset %d, ram_offset %d", length, axi_offset, ram_offset) diff --git a/tb/dma_if_axi_wr/test_dma_if_axi_wr.py b/tb/dma_if_axi_wr/test_dma_if_axi_wr.py index f26206fe4..2a349de8f 100644 --- a/tb/dma_if_axi_wr/test_dma_if_axi_wr.py +++ b/tb/dma_if_axi_wr/test_dma_if_axi_wr.py @@ -118,7 +118,7 @@ async def run_test_write(dut, idle_inserter=None, backpressure_inserter=None): tb.dut.enable.value = 1 - for length in list(range(1, ram_byte_lanes+3))+list(range(128-4, 128+4))+[1024]: + for length in list(range(0, ram_byte_lanes+3))+list(range(128-4, 128+4))+[1024]: # for axi_offset in axi_offsets: for axi_offset in list(range(axi_byte_lanes+1))+list(range(4096-axi_byte_lanes, 4096)): for ram_offset in range(ram_byte_lanes+1): From 0b9c7671fb35d240e01ecec337eb6b254c995f0f Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Thu, 31 Mar 2022 00:05:55 -0700 Subject: [PATCH 04/21] Minor refactor of zero-length handling logic --- rtl/dma_if_axi_rd.v | 16 ++++++++-------- rtl/dma_if_pcie_rd.v | 16 ++++++++-------- rtl/dma_if_pcie_us_rd.v | 16 ++++++++-------- 3 files changed, 24 insertions(+), 24 deletions(-) diff --git a/rtl/dma_if_axi_rd.v b/rtl/dma_if_axi_rd.v index 8380286da..eb3dab345 100644 --- a/rtl/dma_if_axi_rd.v +++ b/rtl/dma_if_axi_rd.v @@ -580,18 +580,18 @@ always @* begin addr_next = addr_next + cycle_byte_count_next; op_count_next = op_count_next - cycle_byte_count_next; - if (zero_len_next) begin - status_fifo_mask_next = 1'b0; - end else begin - m_axi_rdata_int_next = m_axi_rdata; - m_axi_rvalid_int_next = 1'b1; - - status_fifo_mask_next = 1'b1; - end + m_axi_rdata_int_next = m_axi_rdata; + m_axi_rvalid_int_next = 1'b1; + status_fifo_mask_next = 1'b1; status_fifo_finish_next = 1'b0; status_fifo_we_next = 1'b1; + if (zero_len_next) begin + m_axi_rvalid_int_next = 1'b0; + status_fifo_mask_next = 1'b0; + end + if (!USE_AXI_ID) begin op_table_read_complete_en = 1'b1; end diff --git a/rtl/dma_if_pcie_rd.v b/rtl/dma_if_pcie_rd.v index b0172e3cd..eb482e20b 100644 --- a/rtl/dma_if_pcie_rd.v +++ b/rtl/dma_if_pcie_rd.v @@ -1061,19 +1061,19 @@ always @* begin end else begin // no error - if (zero_len_next) begin - status_fifo_mask_next = 1'b0; - end else begin - tlp_data_int_next = rx_cpl_tlp_data; - tlp_data_valid_int_next = 1'b1; - - status_fifo_mask_next = 1'b1; - end + tlp_data_int_next = rx_cpl_tlp_data; + tlp_data_valid_int_next = 1'b1; + status_fifo_mask_next = 1'b1; status_fifo_finish_next = 1'b0; status_fifo_error_next = DMA_ERROR_NONE; status_fifo_we_next = 1'b1; + if (zero_len_next) begin + tlp_data_valid_int_next = 1'b0; + status_fifo_mask_next = 1'b0; + end + stat_rd_req_finish_tag_next = pcie_tag_next; stat_rd_req_finish_status_next = DMA_ERROR_NONE; diff --git a/rtl/dma_if_pcie_us_rd.v b/rtl/dma_if_pcie_us_rd.v index ae87aa1f1..0fa07b1ff 100644 --- a/rtl/dma_if_pcie_us_rd.v +++ b/rtl/dma_if_pcie_us_rd.v @@ -1064,19 +1064,19 @@ always @* begin end else begin // no error - if (zero_len_next) begin - status_fifo_mask_next = 1'b0; - end else begin - rc_tdata_int_next = s_axis_rc_tdata; - rc_tvalid_int_next = 1'b1; - - status_fifo_mask_next = 1'b1; - end + rc_tdata_int_next = s_axis_rc_tdata; + rc_tvalid_int_next = 1'b1; + status_fifo_mask_next = 1'b1; status_fifo_finish_next = 1'b0; status_fifo_error_next = DMA_ERROR_NONE; status_fifo_we_next = 1'b1; + if (zero_len_next) begin + rc_tvalid_int_next = 1'b0; + status_fifo_mask_next = 1'b0; + end + if (last_cycle) begin if (final_cpl_next) begin // last completion in current read request (PCIe tag) From ac5f942128622a1e42f090c39de8eea8db618a52 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Thu, 31 Mar 2022 01:48:36 -0700 Subject: [PATCH 05/21] Support error reporting in AXI DMA interface modules --- rtl/dma_if_axi_rd.v | 79 +++++++++++++++++++++++++++++++++++++++++---- rtl/dma_if_axi_wr.v | 51 ++++++++++++++++++++--------- 2 files changed, 109 insertions(+), 21 deletions(-) diff --git a/rtl/dma_if_axi_rd.v b/rtl/dma_if_axi_rd.v index eb3dab345..e416bf258 100644 --- a/rtl/dma_if_axi_rd.v +++ b/rtl/dma_if_axi_rd.v @@ -249,17 +249,22 @@ reg [OP_TAG_WIDTH-1:0] status_fifo_op_tag[(2**STATUS_FIFO_ADDR_WIDTH)-1:0]; reg [RAM_SEG_COUNT-1:0] status_fifo_mask[(2**STATUS_FIFO_ADDR_WIDTH)-1:0]; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg status_fifo_finish[(2**STATUS_FIFO_ADDR_WIDTH)-1:0]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +reg [3:0] status_fifo_error[(2**STATUS_FIFO_ADDR_WIDTH)-1:0]; reg [OP_TAG_WIDTH-1:0] status_fifo_wr_op_tag; reg [RAM_SEG_COUNT-1:0] status_fifo_wr_mask; reg status_fifo_wr_finish; +reg [3:0] status_fifo_wr_error; reg status_fifo_we; reg status_fifo_mask_reg = 1'b0, status_fifo_mask_next; reg status_fifo_finish_reg = 1'b0, status_fifo_finish_next; +reg [3:0] status_fifo_error_reg = 4'd0, status_fifo_error_next; reg status_fifo_we_reg = 1'b0, status_fifo_we_next; reg status_fifo_half_full_reg = 1'b0; reg [OP_TAG_WIDTH-1:0] status_fifo_rd_op_tag_reg = 0, status_fifo_rd_op_tag_next; reg [RAM_SEG_COUNT-1:0] status_fifo_rd_mask_reg = 0, status_fifo_rd_mask_next; reg status_fifo_rd_finish_reg = 1'b0, status_fifo_rd_finish_next; +reg [3:0] status_fifo_rd_error_reg = 4'd0, status_fifo_rd_error_next; reg status_fifo_rd_valid_reg = 1'b0, status_fifo_rd_valid_next; reg [AXI_DATA_WIDTH-1:0] m_axi_rdata_int_reg = {AXI_DATA_WIDTH{1'b0}}, m_axi_rdata_int_next; @@ -316,10 +321,13 @@ reg [CYCLE_COUNT_WIDTH-1:0] op_table_start_cycle_count; reg [TAG_WIDTH-1:0] op_table_start_tag; reg op_table_start_last; reg op_table_start_en; -reg op_table_read_complete_en; reg [OP_TAG_WIDTH+1-1:0] op_table_read_complete_ptr_reg = 0; -reg op_table_write_complete_en; +reg op_table_read_complete_en; +reg [OP_TAG_WIDTH-1:0] op_table_update_status_ptr; +reg [3:0] op_table_update_status_error; +reg op_table_update_status_en; reg [OP_TAG_WIDTH-1:0] op_table_write_complete_ptr; +reg op_table_write_complete_en; reg [OP_TAG_WIDTH+1-1:0] op_table_finish_ptr_reg = 0; reg op_table_finish_en; @@ -342,6 +350,12 @@ reg [TAG_WIDTH-1:0] op_table_tag[2**OP_TAG_WIDTH-1:0]; reg op_table_last[2**OP_TAG_WIDTH-1:0]; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg op_table_write_complete[2**OP_TAG_WIDTH-1:0]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +reg op_table_error_a [2**OP_TAG_WIDTH-1:0]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +reg op_table_error_b [2**OP_TAG_WIDTH-1:0]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +reg [3:0] op_table_error_code [2**OP_TAG_WIDTH-1:0]; integer i; @@ -356,6 +370,9 @@ initial begin op_table_tag[i] = 0; op_table_last[i] = 0; op_table_write_complete[i] = 0; + op_table_error_a[i] = 0; + op_table_error_b[i] = 0; + op_table_error_code[i] = 0; end end @@ -507,14 +524,13 @@ always @* begin op_tag_next = op_tag_reg; op_table_read_complete_en = 1'b0; - op_table_write_complete_en = 1'b0; - op_table_write_complete_ptr = m_axi_rid; m_axi_rdata_int_next = m_axi_rdata_int_reg; m_axi_rvalid_int_next = 1'b0; status_fifo_mask_next = 1'b1; status_fifo_finish_next = 1'b0; + status_fifo_error_next = DMA_ERROR_NONE; status_fifo_we_next = 1'b0; out_done_ack = {RAM_SEG_COUNT{1'b0}}; @@ -585,6 +601,7 @@ always @* begin status_fifo_mask_next = 1'b1; status_fifo_finish_next = 1'b0; + status_fifo_error_next = DMA_ERROR_NONE; status_fifo_we_next = 1'b1; if (zero_len_next) begin @@ -592,6 +609,16 @@ always @* begin status_fifo_mask_next = 1'b0; end + if (m_axi_rresp == AXI_RESP_SLVERR) begin + m_axi_rvalid_int_next = 1'b0; + status_fifo_mask_next = 1'b0; + status_fifo_error_next = DMA_ERROR_AXI_RD_SLVERR; + end else if (m_axi_rresp == AXI_RESP_DECERR) begin + m_axi_rvalid_int_next = 1'b0; + status_fifo_mask_next = 1'b0; + status_fifo_error_next = DMA_ERROR_AXI_RD_DECERR; + end + if (!USE_AXI_ID) begin op_table_read_complete_en = 1'b1; end @@ -640,8 +667,19 @@ always @* begin status_fifo_mask_next = 1'b1; status_fifo_finish_next = 1'b0; + status_fifo_error_next = DMA_ERROR_NONE; status_fifo_we_next = 1'b1; + if (m_axi_rresp == AXI_RESP_SLVERR) begin + m_axi_rvalid_int_next = 1'b0; + status_fifo_mask_next = 1'b0; + status_fifo_error_next = DMA_ERROR_AXI_RD_SLVERR; + end else if (m_axi_rresp == AXI_RESP_DECERR) begin + m_axi_rvalid_int_next = 1'b0; + status_fifo_mask_next = 1'b0; + status_fifo_error_next = DMA_ERROR_AXI_RD_DECERR; + end + if (m_axi_rlast) begin status_fifo_finish_next = 1'b1; axi_state_next = AXI_STATE_IDLE; @@ -659,12 +697,14 @@ always @* begin status_fifo_wr_op_tag = op_tag_reg; status_fifo_wr_mask = status_fifo_mask_reg ? ram_mask_reg : 0; status_fifo_wr_finish = status_fifo_finish_reg; + status_fifo_wr_error = status_fifo_error_reg; status_fifo_we = 1'b0; if (status_fifo_we_reg) begin status_fifo_wr_op_tag = op_tag_reg; status_fifo_wr_mask = status_fifo_mask_reg ? ram_mask_reg : 0; status_fifo_wr_finish = status_fifo_finish_reg; + status_fifo_wr_error = status_fifo_error_reg; status_fifo_we = 1'b1; end @@ -672,6 +712,11 @@ always @* begin status_fifo_rd_mask_next = status_fifo_rd_mask_reg; status_fifo_rd_finish_next = status_fifo_rd_finish_reg; status_fifo_rd_valid_next = status_fifo_rd_valid_reg; + status_fifo_rd_error_next = status_fifo_rd_error_reg; + + op_table_update_status_ptr = status_fifo_rd_op_tag_reg; + op_table_update_status_error = status_fifo_rd_error_reg; + op_table_update_status_en = 1'b0; op_table_write_complete_ptr = status_fifo_rd_op_tag_reg; op_table_write_complete_en = 1'b0; @@ -679,6 +724,7 @@ always @* begin if (status_fifo_rd_valid_reg && (status_fifo_rd_mask_reg & ~out_done) == 0) begin // got write completion, pop and return status status_fifo_rd_valid_next = 1'b0; + op_table_update_status_en = 1'b1; out_done_ack = status_fifo_rd_mask_reg; @@ -694,6 +740,7 @@ always @* begin status_fifo_rd_op_tag_next = status_fifo_op_tag[status_fifo_rd_ptr_reg[STATUS_FIFO_ADDR_WIDTH-1:0]]; status_fifo_rd_mask_next = status_fifo_mask[status_fifo_rd_ptr_reg[STATUS_FIFO_ADDR_WIDTH-1:0]]; status_fifo_rd_finish_next = status_fifo_finish[status_fifo_rd_ptr_reg[STATUS_FIFO_ADDR_WIDTH-1:0]]; + status_fifo_rd_error_next = status_fifo_error[status_fifo_rd_ptr_reg[STATUS_FIFO_ADDR_WIDTH-1:0]]; status_fifo_rd_valid_next = 1'b1; status_fifo_rd_ptr_next = status_fifo_rd_ptr_reg + 1; end @@ -701,16 +748,24 @@ always @* begin // commit operations in-order op_table_finish_en = 1'b0; + if (m_axis_read_desc_status_valid_reg) begin + m_axis_read_desc_status_error_next = DMA_ERROR_NONE; + end else begin + m_axis_read_desc_status_error_next = m_axis_read_desc_status_error_reg; + end + m_axis_read_desc_status_tag_next = op_table_tag[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]; - m_axis_read_desc_status_error_next = 0; m_axis_read_desc_status_valid_next = 1'b0; if (op_table_active[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]] && op_table_write_complete[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]] && op_table_finish_ptr_reg != op_table_start_ptr_reg) begin op_table_finish_en = 1'b1; + if (op_table_error_a[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]] != op_table_error_b[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]) begin + m_axis_read_desc_status_error_next = op_table_error_code[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]; + end + if (op_table_last[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]) begin m_axis_read_desc_status_tag_next = op_table_tag[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]; - m_axis_read_desc_status_error_next = 0; m_axis_read_desc_status_valid_next = 1'b1; end end @@ -762,17 +817,20 @@ always @(posedge clk) begin status_fifo_op_tag[status_fifo_wr_ptr_reg[STATUS_FIFO_ADDR_WIDTH-1:0]] <= status_fifo_wr_op_tag; status_fifo_mask[status_fifo_wr_ptr_reg[STATUS_FIFO_ADDR_WIDTH-1:0]] <= status_fifo_wr_mask; status_fifo_finish[status_fifo_wr_ptr_reg[STATUS_FIFO_ADDR_WIDTH-1:0]] <= status_fifo_wr_finish; + status_fifo_error[status_fifo_wr_ptr_reg[STATUS_FIFO_ADDR_WIDTH-1:0]] <= status_fifo_wr_error; status_fifo_wr_ptr_reg <= status_fifo_wr_ptr_reg + 1; end status_fifo_rd_ptr_reg <= status_fifo_rd_ptr_next; status_fifo_mask_reg <= status_fifo_mask_next; status_fifo_finish_reg <= status_fifo_finish_next; + status_fifo_error_reg <= status_fifo_error_next; status_fifo_we_reg <= status_fifo_we_next; status_fifo_rd_op_tag_reg <= status_fifo_rd_op_tag_next; status_fifo_rd_mask_reg <= status_fifo_rd_mask_next; status_fifo_rd_finish_reg <= status_fifo_rd_finish_next; + status_fifo_rd_error_reg <= status_fifo_rd_error_next; status_fifo_rd_valid_reg <= status_fifo_rd_valid_next; status_fifo_half_full_reg <= $unsigned(status_fifo_wr_ptr_reg - status_fifo_rd_ptr_reg) >= 2**(STATUS_FIFO_ADDR_WIDTH-1); @@ -789,12 +847,20 @@ always @(posedge clk) begin op_table_tag[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_tag; op_table_last[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_last; op_table_write_complete[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= 1'b0; + op_table_error_a[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_error_b[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]]; end if (!USE_AXI_ID && op_table_read_complete_en) begin op_table_read_complete_ptr_reg <= op_table_read_complete_ptr_reg + 1; end + if (op_table_update_status_en) begin + if (op_table_update_status_error != 0) begin + op_table_error_code[op_table_update_status_ptr] <= op_table_update_status_error; + op_table_error_b[op_table_update_status_ptr] <= !op_table_error_a[op_table_update_status_ptr]; + end + end + if (op_table_write_complete_en) begin op_table_write_complete[op_table_write_complete_ptr] <= 1'b1; end @@ -814,6 +880,7 @@ always @(posedge clk) begin m_axi_rready_reg <= 1'b0; s_axis_read_desc_ready_reg <= 1'b0; + m_axis_read_desc_status_error_reg = 4'd0; m_axis_read_desc_status_valid_reg <= 1'b0; status_fifo_wr_ptr_reg <= 0; diff --git a/rtl/dma_if_axi_wr.v b/rtl/dma_if_axi_wr.v index abdb784a6..a23520acc 100644 --- a/rtl/dma_if_axi_wr.v +++ b/rtl/dma_if_axi_wr.v @@ -342,8 +342,9 @@ reg [OP_TAG_WIDTH+1-1:0] op_table_tx_start_ptr_reg = 0; reg op_table_tx_start_en; reg [OP_TAG_WIDTH+1-1:0] op_table_tx_finish_ptr_reg = 0; reg op_table_tx_finish_en; -reg op_table_write_complete_en; reg [OP_TAG_WIDTH-1:0] op_table_write_complete_ptr; +reg [3:0] op_table_write_complete_error; +reg op_table_write_complete_en; reg [OP_TAG_WIDTH+1-1:0] op_table_finish_ptr_reg = 0; reg op_table_finish_en; @@ -363,6 +364,8 @@ reg [RAM_OFFSET_WIDTH-1:0] op_table_offset[2**OP_TAG_WIDTH-1:0]; reg [TAG_WIDTH-1:0] op_table_tag[2**OP_TAG_WIDTH-1:0]; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg op_table_last[2**OP_TAG_WIDTH-1:0]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +reg [3:0] op_table_error_code [2**OP_TAG_WIDTH-1:0]; integer i; @@ -375,6 +378,7 @@ initial begin op_table_offset[i] = 0; op_table_tag[i] = 0; op_table_last[i] = 0; + op_table_error_code[i] = 0; end end @@ -686,9 +690,6 @@ always @* begin op_table_tx_start_en = 1'b0; op_table_tx_finish_en = 1'b0; - op_table_write_complete_en = 1'b0; - op_table_write_complete_ptr = m_axi_bid; - m_axi_awid_next = m_axi_awid_reg; m_axi_awaddr_next = m_axi_awaddr_reg; m_axi_awlen_next = m_axi_awlen_reg; @@ -789,27 +790,44 @@ always @* begin mask_fifo_rd_ptr_next = mask_fifo_rd_ptr_reg+1; end + op_table_write_complete_ptr = m_axi_bid; + if (m_axi_bresp == AXI_RESP_SLVERR) begin + op_table_write_complete_error = DMA_ERROR_AXI_WR_SLVERR; + end else if (m_axi_bresp == AXI_RESP_DECERR) begin + op_table_write_complete_error = DMA_ERROR_AXI_WR_DECERR; + end else begin + op_table_write_complete_error = DMA_ERROR_NONE; + end + op_table_write_complete_en = 1'b0; + + m_axis_write_desc_status_tag_next = op_table_tag[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]; + if (m_axis_write_desc_status_valid_reg) begin + m_axis_write_desc_status_error_next = DMA_ERROR_NONE; + end else begin + m_axis_write_desc_status_error_next = m_axis_write_desc_status_error_reg; + end + m_axis_write_desc_status_valid_next = 1'b0; + if (USE_AXI_ID) begin // accept write completions m_axi_bready_next = 1'b1; if (m_axi_bready && m_axi_bvalid) begin - op_table_write_complete_en = 1'b1; op_table_write_complete_ptr = m_axi_bid; + op_table_write_complete_en = 1'b1; end // commit operations in-order op_table_finish_en = 1'b0; - m_axis_write_desc_status_tag_next = op_table_tag[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]; - m_axis_write_desc_status_error_next = 0; - m_axis_write_desc_status_valid_next = 1'b0; - if (op_table_active[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]] && op_table_write_complete[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]] && op_table_finish_ptr_reg != op_table_tx_finish_ptr_reg) begin op_table_finish_en = 1'b1; + if (op_table_error_code[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]] != DMA_ERROR_NONE) begin + m_axis_write_desc_status_error_next = op_table_error_code[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]; + end + if (op_table_last[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]) begin m_axis_write_desc_status_tag_next = op_table_tag[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]; - m_axis_write_desc_status_error_next = 0; m_axis_write_desc_status_valid_next = 1'b1; end end @@ -817,17 +835,18 @@ always @* begin // accept write completions op_table_finish_en = 1'b0; - m_axis_write_desc_status_tag_next = op_table_tag[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]; - m_axis_write_desc_status_error_next = 0; - m_axis_write_desc_status_valid_next = 1'b0; - m_axi_bready_next = 1'b1; if (m_axi_bready && m_axi_bvalid) begin op_table_finish_en = 1'b1; + if (m_axi_bresp == AXI_RESP_SLVERR) begin + m_axis_write_desc_status_error_next = DMA_ERROR_AXI_WR_SLVERR; + end else if (m_axi_bresp == AXI_RESP_DECERR) begin + m_axis_write_desc_status_error_next = DMA_ERROR_AXI_WR_DECERR; + end + if (op_table_last[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]) begin m_axis_write_desc_status_tag_next = op_table_tag[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]; - m_axis_write_desc_status_error_next = 0; m_axis_write_desc_status_valid_next = 1'b1; end end @@ -926,6 +945,7 @@ always @(posedge clk) begin if (USE_AXI_ID && op_table_write_complete_en) begin op_table_write_complete[op_table_write_complete_ptr] <= 1'b1; + op_table_error_code[op_table_write_complete_ptr] <= op_table_write_complete_error; end if (op_table_finish_en) begin @@ -946,6 +966,7 @@ always @(posedge clk) begin m_axi_bready_reg <= 1'b0; s_axis_write_desc_ready_reg <= 1'b0; + m_axis_write_desc_status_error_reg <= 4'd0; m_axis_write_desc_status_valid_reg <= 1'b0; ram_rd_cmd_valid_reg <= {RAM_SEG_COUNT{1'b0}}; From 2aeb820d35dec59078efcd15245a4e643ef0dd20 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Thu, 31 Mar 2022 16:42:46 -0700 Subject: [PATCH 06/21] Add operation table size assertion in AXI DMA IF modules --- rtl/dma_if_axi_rd.v | 5 +++++ rtl/dma_if_axi_wr.v | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/rtl/dma_if_axi_rd.v b/rtl/dma_if_axi_rd.v index e416bf258..4b02f36f0 100644 --- a/rtl/dma_if_axi_rd.v +++ b/rtl/dma_if_axi_rd.v @@ -185,6 +185,11 @@ initial begin $error("Error: RAM_ADDR_WIDTH does not match RAM configuration (instance %m)"); $finish; end + + if (OP_TABLE_SIZE > 2**AXI_ID_WIDTH) begin + $error("Error: AXI_ID_WIDTH insufficient for requested OP_TABLE_SIZE (instance %m)"); + $finish; + end end localparam [1:0] diff --git a/rtl/dma_if_axi_wr.v b/rtl/dma_if_axi_wr.v index a23520acc..1008b4297 100644 --- a/rtl/dma_if_axi_wr.v +++ b/rtl/dma_if_axi_wr.v @@ -187,6 +187,11 @@ initial begin $error("Error: RAM_ADDR_WIDTH does not match RAM configuration (instance %m)"); $finish; end + + if (OP_TABLE_SIZE > 2**AXI_ID_WIDTH) begin + $error("Error: AXI_ID_WIDTH insufficient for requested OP_TABLE_SIZE (instance %m)"); + $finish; + end end localparam [1:0] From dd7cc63d55a868dfccbd41f3dbf0f08f3083fa3d Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Thu, 31 Mar 2022 17:04:03 -0700 Subject: [PATCH 07/21] Correct reporting of request length statistics for zero-length operations in PCIe DMA IF modules --- rtl/dma_if_pcie_rd.v | 2 +- rtl/dma_if_pcie_wr.v | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/rtl/dma_if_pcie_rd.v b/rtl/dma_if_pcie_rd.v index eb482e20b..7e483f437 100644 --- a/rtl/dma_if_pcie_rd.v +++ b/rtl/dma_if_pcie_rd.v @@ -772,7 +772,7 @@ always @* begin req_pcie_tag_valid_next = 1'b0; stat_rd_req_start_tag_next = req_pcie_tag_reg; - stat_rd_req_start_len_next = req_tlp_count_next; + stat_rd_req_start_len_next = req_zero_len_reg ? 0 : req_tlp_count_next; stat_rd_req_start_valid_next = 1'b1; if (!req_last_tlp) begin diff --git a/rtl/dma_if_pcie_wr.v b/rtl/dma_if_pcie_wr.v index dc0ad81af..c746120a6 100644 --- a/rtl/dma_if_pcie_wr.v +++ b/rtl/dma_if_pcie_wr.v @@ -565,7 +565,7 @@ always @* begin op_table_start_en = 1'b1; stat_wr_req_start_tag_next = op_table_start_ptr_reg; - stat_wr_req_start_len_next = tlp_count_reg; + stat_wr_req_start_len_next = zero_len_reg ? 0 : tlp_count_reg; stat_wr_req_start_valid_next = 1'b1; // TLP size computation From 4bbd187567b1534459d39dd33653463dd2821f0a Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Thu, 31 Mar 2022 17:56:05 -0700 Subject: [PATCH 08/21] Add statistics outputs to AXI DMA IF modules --- rtl/dma_if_axi.v | 74 +++++++++++++++++++++++++-- rtl/dma_if_axi_rd.v | 117 +++++++++++++++++++++++++++++++++++++++++- rtl/dma_if_axi_wr.v | 120 +++++++++++++++++++++++++++++++++++++++++++- 3 files changed, 306 insertions(+), 5 deletions(-) diff --git a/rtl/dma_if_axi.v b/rtl/dma_if_axi.v index 1a2dd824d..e40bc1ad8 100644 --- a/rtl/dma_if_axi.v +++ b/rtl/dma_if_axi.v @@ -169,7 +169,39 @@ module dma_if_axi # * Configuration */ input wire read_enable, - input wire write_enable + input wire write_enable, + + /* + * Statistics + */ + output wire [$clog2(READ_OP_TABLE_SIZE)-1:0] stat_rd_op_start_tag, + output wire [LEN_WIDTH-1:0] stat_rd_op_start_len, + output wire stat_rd_op_start_valid, + output wire [$clog2(READ_OP_TABLE_SIZE)-1:0] stat_rd_op_finish_tag, + output wire [3:0] stat_rd_op_finish_status, + output wire stat_rd_op_finish_valid, + output wire [$clog2(READ_OP_TABLE_SIZE)-1:0] stat_rd_req_start_tag, + output wire [12:0] stat_rd_req_start_len, + output wire stat_rd_req_start_valid, + output wire [$clog2(READ_OP_TABLE_SIZE)-1:0] stat_rd_req_finish_tag, + output wire [3:0] stat_rd_req_finish_status, + output wire stat_rd_req_finish_valid, + output wire stat_rd_op_table_full, + output wire stat_rd_tx_stall, + output wire [$clog2(WRITE_OP_TABLE_SIZE)-1:0] stat_wr_op_start_tag, + output wire [LEN_WIDTH-1:0] stat_wr_op_start_len, + output wire stat_wr_op_start_valid, + output wire [$clog2(WRITE_OP_TABLE_SIZE)-1:0] stat_wr_op_finish_tag, + output wire [3:0] stat_wr_op_finish_status, + output wire stat_wr_op_finish_valid, + output wire [$clog2(WRITE_OP_TABLE_SIZE)-1:0] stat_wr_req_start_tag, + output wire [12:0] stat_wr_req_start_len, + output wire stat_wr_req_start_valid, + output wire [$clog2(WRITE_OP_TABLE_SIZE)-1:0] stat_wr_req_finish_tag, + output wire [3:0] stat_wr_req_finish_status, + output wire stat_wr_req_finish_valid, + output wire stat_wr_op_table_full, + output wire stat_wr_tx_stall ); dma_if_axi_rd #( @@ -245,7 +277,25 @@ dma_if_axi_rd_inst ( /* * Configuration */ - .enable(read_enable) + .enable(read_enable), + + /* + * Statistics + */ + .stat_rd_op_start_tag(stat_rd_op_start_tag), + .stat_rd_op_start_len(stat_rd_op_start_len), + .stat_rd_op_start_valid(stat_rd_op_start_valid), + .stat_rd_op_finish_tag(stat_rd_op_finish_tag), + .stat_rd_op_finish_status(stat_rd_op_finish_status), + .stat_rd_op_finish_valid(stat_rd_op_finish_valid), + .stat_rd_req_start_tag(stat_rd_req_start_tag), + .stat_rd_req_start_len(stat_rd_req_start_len), + .stat_rd_req_start_valid(stat_rd_req_start_valid), + .stat_rd_req_finish_tag(stat_rd_req_finish_tag), + .stat_rd_req_finish_status(stat_rd_req_finish_status), + .stat_rd_req_finish_valid(stat_rd_req_finish_valid), + .stat_rd_op_table_full(stat_rd_op_table_full), + .stat_rd_tx_stall(stat_rd_tx_stall) ); dma_if_axi_wr #( @@ -324,7 +374,25 @@ dma_if_axi_wr_inst ( /* * Configuration */ - .enable(write_enable) + .enable(write_enable), + + /* + * Statistics + */ + .stat_wr_op_start_tag(stat_wr_op_start_tag), + .stat_wr_op_start_len(stat_wr_op_start_len), + .stat_wr_op_start_valid(stat_wr_op_start_valid), + .stat_wr_op_finish_tag(stat_wr_op_finish_tag), + .stat_wr_op_finish_status(stat_wr_op_finish_status), + .stat_wr_op_finish_valid(stat_wr_op_finish_valid), + .stat_wr_req_start_tag(stat_wr_req_start_tag), + .stat_wr_req_start_len(stat_wr_req_start_len), + .stat_wr_req_start_valid(stat_wr_req_start_valid), + .stat_wr_req_finish_tag(stat_wr_req_finish_tag), + .stat_wr_req_finish_status(stat_wr_req_finish_status), + .stat_wr_req_finish_valid(stat_wr_req_finish_valid), + .stat_wr_op_table_full(stat_wr_op_table_full), + .stat_wr_tx_stall(stat_wr_tx_stall) ); endmodule diff --git a/rtl/dma_if_axi_rd.v b/rtl/dma_if_axi_rd.v index 4b02f36f0..a976220de 100644 --- a/rtl/dma_if_axi_rd.v +++ b/rtl/dma_if_axi_rd.v @@ -120,7 +120,25 @@ module dma_if_axi_rd # /* * Configuration */ - input wire enable + input wire enable, + + /* + * Statistics + */ + output wire [$clog2(OP_TABLE_SIZE)-1:0] stat_rd_op_start_tag, + output wire [LEN_WIDTH-1:0] stat_rd_op_start_len, + output wire stat_rd_op_start_valid, + output wire [$clog2(OP_TABLE_SIZE)-1:0] stat_rd_op_finish_tag, + output wire [3:0] stat_rd_op_finish_status, + output wire stat_rd_op_finish_valid, + output wire [$clog2(OP_TABLE_SIZE)-1:0] stat_rd_req_start_tag, + output wire [12:0] stat_rd_req_start_len, + output wire stat_rd_req_start_valid, + output wire [$clog2(OP_TABLE_SIZE)-1:0] stat_rd_req_finish_tag, + output wire [3:0] stat_rd_req_finish_status, + output wire stat_rd_req_finish_valid, + output wire stat_rd_op_table_full, + output wire stat_rd_tx_stall ); parameter RAM_WORD_WIDTH = RAM_SEG_BE_WIDTH; @@ -287,6 +305,21 @@ reg [TAG_WIDTH-1:0] m_axis_read_desc_status_tag_reg = {TAG_WIDTH{1'b0}}, m_axis_ reg [3:0] m_axis_read_desc_status_error_reg = 4'd0, m_axis_read_desc_status_error_next; reg m_axis_read_desc_status_valid_reg = 1'b0, m_axis_read_desc_status_valid_next; +reg [OP_TAG_WIDTH-1:0] stat_rd_op_start_tag_reg = 0, stat_rd_op_start_tag_next; +reg [LEN_WIDTH-1:0] stat_rd_op_start_len_reg = 0, stat_rd_op_start_len_next; +reg stat_rd_op_start_valid_reg = 1'b0, stat_rd_op_start_valid_next; +reg [OP_TAG_WIDTH-1:0] stat_rd_op_finish_tag_reg = 0, stat_rd_op_finish_tag_next; +reg [3:0] stat_rd_op_finish_status_reg = 4'd0, stat_rd_op_finish_status_next; +reg stat_rd_op_finish_valid_reg = 1'b0, stat_rd_op_finish_valid_next; +reg [OP_TAG_WIDTH-1:0] stat_rd_req_start_tag_reg = 0, stat_rd_req_start_tag_next; +reg [12:0] stat_rd_req_start_len_reg = 13'd0, stat_rd_req_start_len_next; +reg stat_rd_req_start_valid_reg = 1'b0, stat_rd_req_start_valid_next; +reg [OP_TAG_WIDTH-1:0] stat_rd_req_finish_tag_reg = 0, stat_rd_req_finish_tag_next; +reg [3:0] stat_rd_req_finish_status_reg = 4'd0, stat_rd_req_finish_status_next; +reg stat_rd_req_finish_valid_reg = 1'b0, stat_rd_req_finish_valid_next; +reg stat_rd_op_table_full_reg = 1'b0, stat_rd_op_table_full_next; +reg stat_rd_tx_stall_reg = 1'b0, stat_rd_tx_stall_next; + // internal datapath reg [RAM_SEG_COUNT*RAM_SEL_WIDTH-1:0] ram_wr_cmd_sel_int; reg [RAM_SEG_COUNT*RAM_SEG_BE_WIDTH-1:0] ram_wr_cmd_be_int; @@ -315,6 +348,21 @@ assign m_axis_read_desc_status_tag = m_axis_read_desc_status_tag_reg; assign m_axis_read_desc_status_error = m_axis_read_desc_status_error_reg; assign m_axis_read_desc_status_valid = m_axis_read_desc_status_valid_reg; +assign stat_rd_op_start_tag = stat_rd_op_start_tag_reg; +assign stat_rd_op_start_len = stat_rd_op_start_len_reg; +assign stat_rd_op_start_valid = stat_rd_op_start_valid_reg; +assign stat_rd_op_finish_tag = stat_rd_op_finish_tag_reg; +assign stat_rd_op_finish_status = stat_rd_op_finish_status_reg; +assign stat_rd_op_finish_valid = stat_rd_op_finish_valid_reg; +assign stat_rd_req_start_tag = stat_rd_req_start_tag_reg; +assign stat_rd_req_start_len = stat_rd_req_start_len_reg; +assign stat_rd_req_start_valid = stat_rd_req_start_valid_reg; +assign stat_rd_req_finish_tag = stat_rd_req_finish_tag_reg; +assign stat_rd_req_finish_status = stat_rd_req_finish_status_reg; +assign stat_rd_req_finish_valid = stat_rd_req_finish_valid_reg; +assign stat_rd_op_table_full = stat_rd_op_table_full_reg; +assign stat_rd_tx_stall = stat_rd_tx_stall_reg; + // operation tag management reg [OP_TAG_WIDTH+1-1:0] op_table_start_ptr_reg = 0; reg [AXI_ADDR_WIDTH-1:0] op_table_start_axi_addr; @@ -386,6 +434,15 @@ always @* begin s_axis_read_desc_ready_next = 1'b0; + stat_rd_op_start_tag_next = stat_rd_op_start_tag_reg; + stat_rd_op_start_len_next = stat_rd_op_start_len_reg; + stat_rd_op_start_valid_next = 1'b0; + stat_rd_req_start_tag_next = stat_rd_req_start_tag_reg; + stat_rd_req_start_len_next = stat_rd_req_start_len_reg; + stat_rd_req_start_valid_next = 1'b0; + stat_rd_op_table_full_next = !(!op_table_active[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] && ($unsigned(op_table_start_ptr_reg - op_table_finish_ptr_reg) < 2**OP_TAG_WIDTH)); + stat_rd_tx_stall_next = m_axi_arvalid_reg && !m_axi_arready; + req_axi_addr_next = req_axi_addr_reg; req_ram_sel_next = req_ram_sel_reg; req_ram_addr_next = req_ram_addr_reg; @@ -449,6 +506,11 @@ always @* begin if (s_axis_read_desc_ready && s_axis_read_desc_valid) begin s_axis_read_desc_ready_next = 1'b0; + + stat_rd_op_start_tag_next = stat_rd_op_start_tag_reg+1; + stat_rd_op_start_len_next = s_axis_read_desc_len; + stat_rd_op_start_valid_next = 1'b1; + req_state_next = REQ_STATE_START; end else begin req_state_next = REQ_STATE_IDLE; @@ -470,6 +532,10 @@ always @* begin op_table_start_last = req_op_count_reg == req_tr_count_next; op_table_start_en = 1'b1; + stat_rd_req_start_tag_next = op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]; + stat_rd_req_start_len_next = req_zero_len_reg ? 0 : req_tr_count_reg; + stat_rd_req_start_valid_next = 1'b1; + m_axi_arid_next = op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]; m_axi_araddr_next = req_axi_addr_reg; m_axi_arlen_next = op_table_start_cycle_count; @@ -513,6 +579,13 @@ always @* begin m_axi_rready_next = 1'b0; + stat_rd_op_finish_tag_next = stat_rd_op_finish_tag_reg; + stat_rd_op_finish_status_next = stat_rd_op_finish_status_reg; + stat_rd_op_finish_valid_next = 1'b0; + stat_rd_req_finish_tag_next = stat_rd_req_finish_tag_reg; + stat_rd_req_finish_status_next = stat_rd_req_finish_status_reg; + stat_rd_req_finish_valid_next = 1'b0; + ram_sel_next = ram_sel_reg; addr_next = addr_reg; addr_delay_next = addr_delay_reg; @@ -624,12 +697,17 @@ always @* begin status_fifo_error_next = DMA_ERROR_AXI_RD_DECERR; end + stat_rd_req_finish_tag_next = op_tag_next; + stat_rd_req_finish_status_next = status_fifo_error_next; + stat_rd_req_finish_valid_next = 1'b0; + if (!USE_AXI_ID) begin op_table_read_complete_en = 1'b1; end if (m_axi_rlast) begin status_fifo_finish_next = 1'b1; + stat_rd_req_finish_valid_next = 1'b1; axi_state_next = AXI_STATE_IDLE; end else begin axi_state_next = AXI_STATE_WRITE; @@ -685,8 +763,13 @@ always @* begin status_fifo_error_next = DMA_ERROR_AXI_RD_DECERR; end + stat_rd_req_finish_tag_next = op_tag_next; + stat_rd_req_finish_status_next = status_fifo_error_next; + stat_rd_req_finish_valid_next = 1'b0; + if (m_axi_rlast) begin status_fifo_finish_next = 1'b1; + stat_rd_req_finish_valid_next = 1'b1; axi_state_next = AXI_STATE_IDLE; end else begin axi_state_next = AXI_STATE_WRITE; @@ -762,6 +845,10 @@ always @* begin m_axis_read_desc_status_tag_next = op_table_tag[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]; m_axis_read_desc_status_valid_next = 1'b0; + stat_rd_op_finish_tag_next = stat_rd_op_finish_tag_reg; + stat_rd_op_finish_status_next = m_axis_read_desc_status_error_next; + stat_rd_op_finish_valid_next = 1'b0; + if (op_table_active[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]] && op_table_write_complete[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]] && op_table_finish_ptr_reg != op_table_start_ptr_reg) begin op_table_finish_en = 1'b1; @@ -769,9 +856,13 @@ always @* begin m_axis_read_desc_status_error_next = op_table_error_code[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]; end + stat_rd_op_finish_status_next = m_axis_read_desc_status_error_next; + if (op_table_last[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]) begin m_axis_read_desc_status_tag_next = op_table_tag[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]; m_axis_read_desc_status_valid_next = 1'b1; + stat_rd_op_finish_tag_next = stat_rd_op_finish_tag_reg + 1; + stat_rd_op_finish_valid_next = 1'b1; end end end @@ -818,6 +909,21 @@ always @(posedge clk) begin m_axis_read_desc_status_error_reg <= m_axis_read_desc_status_error_next; m_axis_read_desc_status_valid_reg <= m_axis_read_desc_status_valid_next; + stat_rd_op_start_tag_reg <= stat_rd_op_start_tag_next; + stat_rd_op_start_len_reg <= stat_rd_op_start_len_next; + stat_rd_op_start_valid_reg <= stat_rd_op_start_valid_next; + stat_rd_op_finish_tag_reg <= stat_rd_op_finish_tag_next; + stat_rd_op_finish_status_reg <= stat_rd_op_finish_status_next; + stat_rd_op_finish_valid_reg <= stat_rd_op_finish_valid_next; + stat_rd_req_start_tag_reg <= stat_rd_req_start_tag_next; + stat_rd_req_start_len_reg <= stat_rd_req_start_len_next; + stat_rd_req_start_valid_reg <= stat_rd_req_start_valid_next; + stat_rd_req_finish_tag_reg <= stat_rd_req_finish_tag_next; + stat_rd_req_finish_status_reg <= stat_rd_req_finish_status_next; + stat_rd_req_finish_valid_reg <= stat_rd_req_finish_valid_next; + stat_rd_op_table_full_reg <= stat_rd_op_table_full_next; + stat_rd_tx_stall_reg <= stat_rd_tx_stall_next; + if (status_fifo_we) begin status_fifo_op_tag[status_fifo_wr_ptr_reg[STATUS_FIFO_ADDR_WIDTH-1:0]] <= status_fifo_wr_op_tag; status_fifo_mask[status_fifo_wr_ptr_reg[STATUS_FIFO_ADDR_WIDTH-1:0]] <= status_fifo_wr_mask; @@ -888,6 +994,15 @@ always @(posedge clk) begin m_axis_read_desc_status_error_reg = 4'd0; m_axis_read_desc_status_valid_reg <= 1'b0; + stat_rd_op_start_tag_reg <= 0; + stat_rd_op_start_valid_reg <= 1'b0; + stat_rd_op_finish_tag_reg <= 0; + stat_rd_op_finish_valid_reg <= 1'b0; + stat_rd_req_start_valid_reg <= 1'b0; + stat_rd_req_finish_valid_reg <= 1'b0; + stat_rd_op_table_full_reg <= 1'b0; + stat_rd_tx_stall_reg <= 1'b0; + status_fifo_wr_ptr_reg <= 0; status_fifo_rd_ptr_reg <= 0; status_fifo_we_reg <= 1'b0; diff --git a/rtl/dma_if_axi_wr.v b/rtl/dma_if_axi_wr.v index 1008b4297..79e084fee 100644 --- a/rtl/dma_if_axi_wr.v +++ b/rtl/dma_if_axi_wr.v @@ -123,7 +123,25 @@ module dma_if_axi_wr # /* * Configuration */ - input wire enable + input wire enable, + + /* + * Statistics + */ + output wire [$clog2(OP_TABLE_SIZE)-1:0] stat_wr_op_start_tag, + output wire [LEN_WIDTH-1:0] stat_wr_op_start_len, + output wire stat_wr_op_start_valid, + output wire [$clog2(OP_TABLE_SIZE)-1:0] stat_wr_op_finish_tag, + output wire [3:0] stat_wr_op_finish_status, + output wire stat_wr_op_finish_valid, + output wire [$clog2(OP_TABLE_SIZE)-1:0] stat_wr_req_start_tag, + output wire [12:0] stat_wr_req_start_len, + output wire stat_wr_req_start_valid, + output wire [$clog2(OP_TABLE_SIZE)-1:0] stat_wr_req_finish_tag, + output wire [3:0] stat_wr_req_finish_status, + output wire stat_wr_req_finish_valid, + output wire stat_wr_op_table_full, + output wire stat_wr_tx_stall ); parameter RAM_WORD_WIDTH = RAM_SEG_BE_WIDTH; @@ -304,6 +322,21 @@ reg [RAM_SEG_COUNT*RAM_SEG_ADDR_WIDTH-1:0] ram_rd_cmd_addr_reg = 0, ram_rd_cmd_a reg [RAM_SEG_COUNT-1:0] ram_rd_cmd_valid_reg = 0, ram_rd_cmd_valid_next; reg [RAM_SEG_COUNT-1:0] ram_rd_resp_ready_cmb; +reg [OP_TAG_WIDTH-1:0] stat_wr_op_start_tag_reg = 0, stat_wr_op_start_tag_next; +reg [LEN_WIDTH-1:0] stat_wr_op_start_len_reg = 0, stat_wr_op_start_len_next; +reg stat_wr_op_start_valid_reg = 1'b0, stat_wr_op_start_valid_next; +reg [OP_TAG_WIDTH-1:0] stat_wr_op_finish_tag_reg = 0, stat_wr_op_finish_tag_next; +reg [3:0] stat_wr_op_finish_status_reg = 0, stat_wr_op_finish_status_next; +reg stat_wr_op_finish_valid_reg = 1'b0, stat_wr_op_finish_valid_next; +reg [OP_TAG_WIDTH-1:0] stat_wr_req_start_tag_reg = 0, stat_wr_req_start_tag_next; +reg [12:0] stat_wr_req_start_len_reg = 13'd0, stat_wr_req_start_len_next; +reg stat_wr_req_start_valid_reg = 1'b0, stat_wr_req_start_valid_next; +reg [OP_TAG_WIDTH-1:0] stat_wr_req_finish_tag_reg = 0, stat_wr_req_finish_tag_next; +reg [3:0] stat_wr_req_finish_status_reg = 0, stat_wr_req_finish_status_next; +reg stat_wr_req_finish_valid_reg = 1'b0, stat_wr_req_finish_valid_next; +reg stat_wr_op_table_full_reg = 1'b0, stat_wr_op_table_full_next; +reg stat_wr_tx_stall_reg = 1'b0, stat_wr_tx_stall_next; + // internal datapath reg [AXI_DATA_WIDTH-1:0] m_axi_wdata_int; reg [AXI_STRB_WIDTH-1:0] m_axi_wstrb_int; @@ -333,6 +366,21 @@ assign ram_rd_cmd_addr = ram_rd_cmd_addr_reg; assign ram_rd_cmd_valid = ram_rd_cmd_valid_reg; assign ram_rd_resp_ready = ram_rd_resp_ready_cmb; +assign stat_wr_op_start_tag = stat_wr_op_start_tag_reg; +assign stat_wr_op_start_len = stat_wr_op_start_len_reg; +assign stat_wr_op_start_valid = stat_wr_op_start_valid_reg; +assign stat_wr_op_finish_tag = stat_wr_op_finish_tag_reg; +assign stat_wr_op_finish_status = stat_wr_op_finish_status_reg; +assign stat_wr_op_finish_valid = stat_wr_op_finish_valid_reg; +assign stat_wr_req_start_tag = stat_wr_req_start_tag_reg; +assign stat_wr_req_start_len = stat_wr_req_start_len_reg; +assign stat_wr_req_start_valid = stat_wr_req_start_valid_reg; +assign stat_wr_req_finish_tag = stat_wr_req_finish_tag_reg; +assign stat_wr_req_finish_status = stat_wr_req_finish_status_reg; +assign stat_wr_req_finish_valid = stat_wr_req_finish_valid_reg; +assign stat_wr_op_table_full = stat_wr_op_table_full_reg; +assign stat_wr_tx_stall = stat_wr_tx_stall_reg; + // operation tag management reg [OP_TAG_WIDTH+1-1:0] op_table_start_ptr_reg = 0; reg [AXI_ADDR_WIDTH-1:0] op_table_start_axi_addr; @@ -392,6 +440,15 @@ always @* begin s_axis_write_desc_ready_next = 1'b0; + stat_wr_op_start_tag_next = stat_wr_op_start_tag_reg; + stat_wr_op_start_len_next = stat_wr_op_start_len_reg; + stat_wr_op_start_valid_next = 1'b0; + stat_wr_req_start_tag_next = stat_wr_req_start_tag_reg; + stat_wr_req_start_len_next = stat_wr_req_start_len_reg; + stat_wr_req_start_valid_next = 1'b0; + stat_wr_op_table_full_next = !(!op_table_active[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] && ($unsigned(op_table_start_ptr_reg - op_table_finish_ptr_reg) < 2**OP_TAG_WIDTH)); + stat_wr_tx_stall_next = (m_axi_awvalid && !m_axi_awready) || (m_axi_wvalid && !m_axi_wready); + tag_next = tag_reg; req_axi_addr_next = req_axi_addr_reg; ram_sel_next = ram_sel_reg; @@ -459,6 +516,11 @@ always @* begin if (s_axis_write_desc_ready & s_axis_write_desc_valid) begin s_axis_write_desc_ready_next = 1'b0; + + stat_wr_op_start_tag_next = stat_wr_op_start_tag_reg+1; + stat_wr_op_start_len_next = s_axis_write_desc_len; + stat_wr_op_start_valid_next = 1'b1; + req_state_next = REQ_STATE_START; end else begin req_state_next = REQ_STATE_IDLE; @@ -488,6 +550,10 @@ always @* begin op_table_start_last = op_count_reg == tr_word_count_next; op_table_start_en = 1'b1; + stat_wr_req_start_tag_next = op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]; + stat_wr_req_start_len_next = zero_len_reg ? 0 : tr_word_count_next; + stat_wr_req_start_valid_next = 1'b1; + if (op_count_next <= AXI_MAX_BURST_SIZE - (req_axi_addr_next & OFFSET_MASK) || AXI_MAX_BURST_SIZE >= 4096) begin // packet smaller than max burst size if (((req_axi_addr_next & 12'hfff) + (op_count_next & 12'hfff)) >> 12 != 0 || op_count_next >> 12 != 0) begin @@ -679,6 +745,13 @@ always @* begin ram_rd_resp_ready_cmb = {RAM_SEG_COUNT{1'b0}}; + stat_wr_op_finish_tag_next = stat_wr_op_finish_tag_reg; + stat_wr_op_finish_status_next = stat_wr_op_finish_status_reg; + stat_wr_op_finish_valid_next = 1'b0; + stat_wr_req_finish_tag_next = stat_wr_req_finish_tag_reg; + stat_wr_req_finish_status_next = stat_wr_req_finish_status_reg; + stat_wr_req_finish_valid_next = 1'b0; + axi_addr_next = axi_addr_reg; axi_len_next = axi_len_reg; axi_zero_len_next = axi_zero_len_reg; @@ -813,12 +886,22 @@ always @* begin end m_axis_write_desc_status_valid_next = 1'b0; + stat_wr_req_finish_status_next = op_table_write_complete_error; + stat_wr_req_finish_valid_next = 1'b0; + + stat_wr_op_finish_tag_next = stat_wr_op_finish_tag_reg; + stat_wr_op_finish_status_next = m_axis_write_desc_status_error_next; + stat_wr_op_finish_valid_next = 1'b0; + if (USE_AXI_ID) begin // accept write completions + stat_wr_req_finish_tag_next = m_axi_bid; + m_axi_bready_next = 1'b1; if (m_axi_bready && m_axi_bvalid) begin op_table_write_complete_ptr = m_axi_bid; op_table_write_complete_en = 1'b1; + stat_wr_req_finish_valid_next = 1'b1; end // commit operations in-order @@ -831,18 +914,25 @@ always @* begin m_axis_write_desc_status_error_next = op_table_error_code[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]; end + stat_wr_op_finish_status_next = m_axis_write_desc_status_error_next; + if (op_table_last[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]) begin m_axis_write_desc_status_tag_next = op_table_tag[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]; m_axis_write_desc_status_valid_next = 1'b1; + stat_wr_op_finish_tag_next = stat_wr_op_finish_tag_reg + 1; + stat_wr_op_finish_valid_next = 1'b1; end end end else begin // accept write completions op_table_finish_en = 1'b0; + stat_wr_req_finish_tag_next = op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]; + m_axi_bready_next = 1'b1; if (m_axi_bready && m_axi_bvalid) begin op_table_finish_en = 1'b1; + stat_wr_req_finish_valid_next = 1'b1; if (m_axi_bresp == AXI_RESP_SLVERR) begin m_axis_write_desc_status_error_next = DMA_ERROR_AXI_WR_SLVERR; @@ -850,9 +940,13 @@ always @* begin m_axis_write_desc_status_error_next = DMA_ERROR_AXI_WR_DECERR; end + stat_wr_op_finish_status_next = m_axis_write_desc_status_error_next; + if (op_table_last[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]) begin m_axis_write_desc_status_tag_next = op_table_tag[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]; m_axis_write_desc_status_valid_next = 1'b1; + stat_wr_op_finish_tag_next = stat_wr_op_finish_tag_reg + 1; + stat_wr_op_finish_valid_next = 1'b1; end end end @@ -917,6 +1011,21 @@ always @(posedge clk) begin m_axis_write_desc_status_error_reg <= m_axis_write_desc_status_error_next; m_axis_write_desc_status_valid_reg <= m_axis_write_desc_status_valid_next; + stat_wr_op_start_tag_reg <= stat_wr_op_start_tag_next; + stat_wr_op_start_len_reg <= stat_wr_op_start_len_next; + stat_wr_op_start_valid_reg <= stat_wr_op_start_valid_next; + stat_wr_op_finish_tag_reg <= stat_wr_op_finish_tag_next; + stat_wr_op_finish_status_reg <= stat_wr_op_finish_status_next; + stat_wr_op_finish_valid_reg <= stat_wr_op_finish_valid_next; + stat_wr_req_start_tag_reg <= stat_wr_req_start_tag_next; + stat_wr_req_start_len_reg <= stat_wr_req_start_len_next; + stat_wr_req_start_valid_reg <= stat_wr_req_start_valid_next; + stat_wr_req_finish_tag_reg <= stat_wr_req_finish_tag_next; + stat_wr_req_finish_status_reg <= stat_wr_req_finish_status_next; + stat_wr_req_finish_valid_reg <= stat_wr_req_finish_valid_next; + stat_wr_op_table_full_reg <= stat_wr_op_table_full_next; + stat_wr_tx_stall_reg <= stat_wr_tx_stall_next; + ram_rd_cmd_sel_reg <= ram_rd_cmd_sel_next; ram_rd_cmd_addr_reg <= ram_rd_cmd_addr_next; ram_rd_cmd_valid_reg <= ram_rd_cmd_valid_next; @@ -974,6 +1083,15 @@ always @(posedge clk) begin m_axis_write_desc_status_error_reg <= 4'd0; m_axis_write_desc_status_valid_reg <= 1'b0; + stat_wr_op_start_tag_reg <= 0; + stat_wr_op_start_valid_reg <= 1'b0; + stat_wr_op_finish_tag_reg <= 0; + stat_wr_op_finish_valid_reg <= 1'b0; + stat_wr_req_start_valid_reg <= 1'b0; + stat_wr_req_finish_valid_reg <= 1'b0; + stat_wr_op_table_full_reg <= 1'b0; + stat_wr_tx_stall_reg <= 1'b0; + ram_rd_cmd_valid_reg <= {RAM_SEG_COUNT{1'b0}}; mask_fifo_wr_ptr_reg <= 0; From 1f46987ed8d95705d5bb338c994624a22b937808 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Thu, 31 Mar 2022 23:19:50 -0700 Subject: [PATCH 09/21] Fix typo in Stratix 10 shim --- rtl/pcie_s10_if_tx.v | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/pcie_s10_if_tx.v b/rtl/pcie_s10_if_tx.v index 6cec15d1a..1d6bb2f30 100644 --- a/rtl/pcie_s10_if_tx.v +++ b/rtl/pcie_s10_if_tx.v @@ -318,7 +318,7 @@ always @* begin wr_req_fifo_wr_data[63:32] = tx_wr_req_tlp_hdr[95:64]; wr_req_fifo_wr_data[95:64] = tx_wr_req_tlp_hdr[63:32]; if (wr_req_payload_offset_next) begin - wr_req_fifo_wr_data[127:96] = tx_rd_req_tlp_hdr[31:0]; + wr_req_fifo_wr_data[127:96] = tx_wr_req_tlp_hdr[31:0]; wr_req_fifo_wr_data[SEG_COUNT*SEG_DATA_WIDTH-1:128] = tx_wr_req_tlp_data; end else begin wr_req_fifo_wr_data[SEG_COUNT*SEG_DATA_WIDTH-1:96] = tx_wr_req_tlp_data; From 7fcec1096104106935968355c2d49666a87151dc Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Fri, 1 Apr 2022 01:11:30 -0700 Subject: [PATCH 10/21] Add internal RAM_DATA_WIDTH parameter --- rtl/dma_if_axi_rd.v | 3 ++- rtl/dma_if_axi_wr.v | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/rtl/dma_if_axi_rd.v b/rtl/dma_if_axi_rd.v index a976220de..bdeb4b73d 100644 --- a/rtl/dma_if_axi_rd.v +++ b/rtl/dma_if_axi_rd.v @@ -141,6 +141,7 @@ module dma_if_axi_rd # output wire stat_rd_tx_stall ); +parameter RAM_DATA_WIDTH = RAM_SEG_COUNT*RAM_SEG_DATA_WIDTH; parameter RAM_WORD_WIDTH = RAM_SEG_BE_WIDTH; parameter RAM_WORD_SIZE = RAM_SEG_DATA_WIDTH/RAM_WORD_WIDTH; @@ -189,7 +190,7 @@ initial begin $finish; end - if (RAM_SEG_COUNT*RAM_SEG_DATA_WIDTH != AXI_DATA_WIDTH*2) begin + if (RAM_DATA_WIDTH != AXI_DATA_WIDTH*2) begin $error("Error: RAM interface width must be double the AXI interface width (instance %m)"); $finish; end diff --git a/rtl/dma_if_axi_wr.v b/rtl/dma_if_axi_wr.v index 79e084fee..6a4e4d8c2 100644 --- a/rtl/dma_if_axi_wr.v +++ b/rtl/dma_if_axi_wr.v @@ -144,6 +144,7 @@ module dma_if_axi_wr # output wire stat_wr_tx_stall ); +parameter RAM_DATA_WIDTH = RAM_SEG_COUNT*RAM_SEG_DATA_WIDTH; parameter RAM_WORD_WIDTH = RAM_SEG_BE_WIDTH; parameter RAM_WORD_SIZE = RAM_SEG_DATA_WIDTH/RAM_WORD_WIDTH; @@ -191,7 +192,7 @@ initial begin $finish; end - if (RAM_SEG_COUNT*RAM_SEG_DATA_WIDTH != AXI_DATA_WIDTH*2) begin + if (RAM_DATA_WIDTH != AXI_DATA_WIDTH*2) begin $error("Error: RAM interface width must be double the AXI interface width (instance %m)"); $finish; end @@ -820,7 +821,7 @@ always @* begin offset_next = offset_reg + AXI_STRB_WIDTH; strb_offset_mask_next = {AXI_STRB_WIDTH{1'b1}}; - m_axi_wdata_int = {2{ram_rd_resp_data}} >> (RAM_SEG_COUNT*RAM_SEG_DATA_WIDTH-offset_reg*AXI_WORD_SIZE); + m_axi_wdata_int = {2{ram_rd_resp_data}} >> (RAM_DATA_WIDTH-offset_reg*AXI_WORD_SIZE); m_axi_wstrb_int = strb_offset_mask_reg; m_axi_wvalid_int = 1'b1; From 34fe24287d4b7b74af4bacf6d7111f92dedd98f1 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Fri, 1 Apr 2022 01:42:25 -0700 Subject: [PATCH 11/21] Simplify logic --- rtl/dma_if_axi_wr.v | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/rtl/dma_if_axi_wr.v b/rtl/dma_if_axi_wr.v index 6a4e4d8c2..50ef4378d 100644 --- a/rtl/dma_if_axi_wr.v +++ b/rtl/dma_if_axi_wr.v @@ -664,15 +664,13 @@ always @* begin read_last_cycle_next = read_cycle_count_next == 0; for (i = 0; i < RAM_SEG_COUNT; i = i + 1) begin - if (read_ram_mask_0_reg[i]) begin + if (read_ram_mask_reg[i]) begin ram_rd_cmd_sel_next[i*RAM_SEL_WIDTH +: RAM_SEL_WIDTH] = read_ram_sel_reg; ram_rd_cmd_addr_next[i*RAM_SEG_ADDR_WIDTH +: RAM_SEG_ADDR_WIDTH] = read_ram_addr_reg[RAM_ADDR_WIDTH-1:RAM_ADDR_WIDTH-RAM_SEG_ADDR_WIDTH]; ram_rd_cmd_valid_next[i] = 1'b1; end if (read_ram_mask_1_reg[i]) begin - ram_rd_cmd_sel_next[i*RAM_SEL_WIDTH +: RAM_SEL_WIDTH] = read_ram_sel_reg; ram_rd_cmd_addr_next[i*RAM_SEG_ADDR_WIDTH +: RAM_SEG_ADDR_WIDTH] = read_ram_addr_reg[RAM_ADDR_WIDTH-1:RAM_ADDR_WIDTH-RAM_SEG_ADDR_WIDTH]+1; - ram_rd_cmd_valid_next[i] = 1'b1; end end From a5dcb3d27c0330c8bdecf372aa431d1fcc83bb1b Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Mon, 4 Apr 2022 12:40:42 -0700 Subject: [PATCH 12/21] Add support for writing immediate data to DMA IF modules --- rtl/dma_if_axi.v | 10 ++++ rtl/dma_if_axi_wr.v | 72 +++++++++++++++++++++--- rtl/dma_if_desc_mux.v | 26 +++++++++ rtl/dma_if_mux.v | 14 +++++ rtl/dma_if_mux_rd.v | 5 ++ rtl/dma_if_mux_wr.v | 14 +++++ rtl/dma_if_pcie.v | 10 ++++ rtl/dma_if_pcie_wr.v | 67 ++++++++++++++++++++-- tb/dma_if_axi_wr/Makefile | 6 ++ tb/dma_if_axi_wr/test_dma_if_axi_wr.py | 57 ++++++++++++++++++- tb/dma_if_pcie_wr/Makefile | 6 ++ tb/dma_if_pcie_wr/test_dma_if_pcie_wr.py | 66 +++++++++++++++++++++- 12 files changed, 339 insertions(+), 14 deletions(-) diff --git a/rtl/dma_if_axi.v b/rtl/dma_if_axi.v index e40bc1ad8..582635ca1 100644 --- a/rtl/dma_if_axi.v +++ b/rtl/dma_if_axi.v @@ -55,6 +55,10 @@ module dma_if_axi # parameter RAM_SEG_BE_WIDTH = RAM_SEG_DATA_WIDTH/8, // RAM segment address width parameter RAM_SEG_ADDR_WIDTH = RAM_ADDR_WIDTH-$clog2(RAM_SEG_COUNT*RAM_SEG_BE_WIDTH), + // Immediate enable + parameter IMM_ENABLE = 0, + // Immediate width + parameter IMM_WIDTH = 32, // Length field width parameter LEN_WIDTH = 16, // Tag field width @@ -135,6 +139,8 @@ module dma_if_axi # input wire [AXI_ADDR_WIDTH-1:0] s_axis_write_desc_axi_addr, input wire [RAM_SEL_WIDTH-1:0] s_axis_write_desc_ram_sel, input wire [RAM_ADDR_WIDTH-1:0] s_axis_write_desc_ram_addr, + input wire [IMM_WIDTH-1:0] s_axis_write_desc_imm, + input wire s_axis_write_desc_imm_en, input wire [LEN_WIDTH-1:0] s_axis_write_desc_len, input wire [TAG_WIDTH-1:0] s_axis_write_desc_tag, input wire s_axis_write_desc_valid, @@ -310,6 +316,8 @@ dma_if_axi_wr #( .RAM_SEG_DATA_WIDTH(RAM_SEG_DATA_WIDTH), .RAM_SEG_BE_WIDTH(RAM_SEG_BE_WIDTH), .RAM_SEG_ADDR_WIDTH(RAM_SEG_ADDR_WIDTH), + .IMM_ENABLE(IMM_ENABLE), + .IMM_WIDTH(IMM_WIDTH), .LEN_WIDTH(LEN_WIDTH), .TAG_WIDTH(TAG_WIDTH), .OP_TABLE_SIZE(WRITE_OP_TABLE_SIZE), @@ -348,6 +356,8 @@ dma_if_axi_wr_inst ( .s_axis_write_desc_axi_addr(s_axis_write_desc_axi_addr), .s_axis_write_desc_ram_sel(s_axis_write_desc_ram_sel), .s_axis_write_desc_ram_addr(s_axis_write_desc_ram_addr), + .s_axis_write_desc_imm(s_axis_write_desc_imm), + .s_axis_write_desc_imm_en(s_axis_write_desc_imm_en), .s_axis_write_desc_len(s_axis_write_desc_len), .s_axis_write_desc_tag(s_axis_write_desc_tag), .s_axis_write_desc_valid(s_axis_write_desc_valid), diff --git a/rtl/dma_if_axi_wr.v b/rtl/dma_if_axi_wr.v index 50ef4378d..f50cc2ea5 100644 --- a/rtl/dma_if_axi_wr.v +++ b/rtl/dma_if_axi_wr.v @@ -55,6 +55,10 @@ module dma_if_axi_wr # parameter RAM_SEG_BE_WIDTH = RAM_SEG_DATA_WIDTH/8, // RAM segment address width parameter RAM_SEG_ADDR_WIDTH = RAM_ADDR_WIDTH-$clog2(RAM_SEG_COUNT*RAM_SEG_BE_WIDTH), + // Immediate enable + parameter IMM_ENABLE = 0, + // Immediate width + parameter IMM_WIDTH = 32, // Length field width parameter LEN_WIDTH = 16, // Tag field width @@ -97,6 +101,8 @@ module dma_if_axi_wr # input wire [AXI_ADDR_WIDTH-1:0] s_axis_write_desc_axi_addr, input wire [RAM_SEL_WIDTH-1:0] s_axis_write_desc_ram_sel, input wire [RAM_ADDR_WIDTH-1:0] s_axis_write_desc_ram_addr, + input wire [IMM_WIDTH-1:0] s_axis_write_desc_imm, + input wire s_axis_write_desc_imm_en, input wire [LEN_WIDTH-1:0] s_axis_write_desc_len, input wire [TAG_WIDTH-1:0] s_axis_write_desc_tag, input wire s_axis_write_desc_valid, @@ -211,6 +217,11 @@ initial begin $error("Error: AXI_ID_WIDTH insufficient for requested OP_TABLE_SIZE (instance %m)"); $finish; end + + if (IMM_ENABLE && IMM_WIDTH > AXI_DATA_WIDTH) begin + $error("Error: IMM_WIDTH must not be larger than the AXI interface width (instance %m)"); + $finish; + end end localparam [1:0] @@ -258,6 +269,8 @@ reg read_cmd_ready; reg [AXI_ADDR_WIDTH-1:0] req_axi_addr_reg = {AXI_ADDR_WIDTH{1'b0}}, req_axi_addr_next; reg [RAM_SEL_WIDTH-1:0] ram_sel_reg = {RAM_SEL_WIDTH{1'b0}}, ram_sel_next; reg [RAM_ADDR_WIDTH-1:0] ram_addr_reg = {RAM_ADDR_WIDTH{1'b0}}, ram_addr_next; +reg [IMM_WIDTH-1:0] imm_reg = {IMM_WIDTH{1'b0}}, imm_next; +reg imm_en_reg = 1'b0, imm_en_next; reg [LEN_WIDTH-1:0] op_count_reg = {LEN_WIDTH{1'b0}}, op_count_next; reg zero_len_reg = 1'b0, zero_len_next; reg [LEN_WIDTH-1:0] tr_count_reg = {LEN_WIDTH{1'b0}}, tr_count_next; @@ -267,6 +280,7 @@ reg [TAG_WIDTH-1:0] tag_reg = {TAG_WIDTH{1'b0}}, tag_next; reg [AXI_ADDR_WIDTH-1:0] read_axi_addr_reg = {AXI_ADDR_WIDTH{1'b0}}, read_axi_addr_next; reg [RAM_SEL_WIDTH-1:0] read_ram_sel_reg = {RAM_SEL_WIDTH{1'b0}}, read_ram_sel_next; reg [RAM_ADDR_WIDTH-1:0] read_ram_addr_reg = {RAM_ADDR_WIDTH{1'b0}}, read_ram_addr_next; +reg read_imm_en_reg = 1'b0, read_imm_en_next; reg [LEN_WIDTH-1:0] read_len_reg = {LEN_WIDTH{1'b0}}, read_len_next; reg [RAM_SEG_COUNT-1:0] read_ram_mask_reg = {RAM_SEG_COUNT{1'b0}}, read_ram_mask_next; reg [RAM_SEG_COUNT-1:0] read_ram_mask_0_reg = {RAM_SEG_COUNT{1'b0}}, read_ram_mask_0_next; @@ -279,6 +293,8 @@ reg [RAM_OFFSET_WIDTH-1:0] start_offset_reg = {RAM_OFFSET_WIDTH{1'b0}}, start_of reg [RAM_OFFSET_WIDTH-1:0] end_offset_reg = {RAM_OFFSET_WIDTH{1'b0}}, end_offset_next; reg [AXI_ADDR_WIDTH-1:0] axi_addr_reg = {AXI_ADDR_WIDTH{1'b0}}, axi_addr_next; +reg [IMM_WIDTH-1:0] axi_imm_reg = {IMM_WIDTH{1'b0}}, axi_imm_next; +reg axi_imm_en_reg = 1'b0, axi_imm_en_next; reg [12:0] axi_len_reg = 13'd0, axi_len_next; reg axi_zero_len_reg = 1'b0, axi_zero_len_next; reg [RAM_OFFSET_WIDTH-1:0] offset_reg = {RAM_OFFSET_WIDTH{1'b0}}, offset_next; @@ -292,6 +308,7 @@ reg last_cycle_reg = 1'b0, last_cycle_next; reg [AXI_ADDR_WIDTH-1:0] read_cmd_axi_addr_reg = {AXI_ADDR_WIDTH{1'b0}}, read_cmd_axi_addr_next; reg [RAM_SEL_WIDTH-1:0] read_cmd_ram_sel_reg = {RAM_SEL_WIDTH{1'b0}}, read_cmd_ram_sel_next; reg [RAM_ADDR_WIDTH-1:0] read_cmd_ram_addr_reg = {RAM_ADDR_WIDTH{1'b0}}, read_cmd_ram_addr_next; +reg read_cmd_imm_en_reg = 1'b0, read_cmd_imm_en_next; reg [12:0] read_cmd_len_reg = 13'd0, read_cmd_len_next; reg [CYCLE_COUNT_WIDTH-1:0] read_cmd_cycle_count_reg = {CYCLE_COUNT_WIDTH{1'b0}}, read_cmd_cycle_count_next; reg read_cmd_last_cycle_reg = 1'b0, read_cmd_last_cycle_next; @@ -385,6 +402,8 @@ assign stat_wr_tx_stall = stat_wr_tx_stall_reg; // operation tag management reg [OP_TAG_WIDTH+1-1:0] op_table_start_ptr_reg = 0; reg [AXI_ADDR_WIDTH-1:0] op_table_start_axi_addr; +reg [IMM_WIDTH-1:0] op_table_start_imm; +reg op_table_start_imm_en; reg [11:0] op_table_start_len; reg op_table_start_zero_len; reg [CYCLE_COUNT_WIDTH-1:0] op_table_start_cycle_count; @@ -407,6 +426,10 @@ reg [2**OP_TAG_WIDTH-1:0] op_table_write_complete = 0; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg [AXI_ADDR_WIDTH-1:0] op_table_axi_addr[2**OP_TAG_WIDTH-1:0]; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +reg [IMM_WIDTH-1:0] op_table_imm[2**OP_TAG_WIDTH-1:0]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +reg op_table_imm_en[2**OP_TAG_WIDTH-1:0]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg [11:0] op_table_len[2**OP_TAG_WIDTH-1:0]; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg op_table_zero_len[2**OP_TAG_WIDTH-1:0]; @@ -426,6 +449,8 @@ integer i; initial begin for (i = 0; i < 2**OP_TAG_WIDTH; i = i + 1) begin op_table_axi_addr[i] = 0; + op_table_imm[i] = 0; + op_table_imm_en[i] = 0; op_table_len[i] = 0; op_table_zero_len[i] = 1'b0; op_table_cycle_count[i] = 0; @@ -454,6 +479,8 @@ always @* begin req_axi_addr_next = req_axi_addr_reg; ram_sel_next = ram_sel_reg; ram_addr_next = ram_addr_reg; + imm_next = imm_reg; + imm_en_next = imm_en_reg; op_count_next = op_count_reg; zero_len_next = zero_len_reg; tr_count_next = tr_count_reg; @@ -462,12 +489,15 @@ always @* begin read_cmd_axi_addr_next = read_cmd_axi_addr_reg; read_cmd_ram_sel_next = read_cmd_ram_sel_reg; read_cmd_ram_addr_next = read_cmd_ram_addr_reg; + read_cmd_imm_en_next = read_cmd_imm_en_reg; read_cmd_len_next = read_cmd_len_reg; read_cmd_cycle_count_next = read_cmd_cycle_count_reg; read_cmd_last_cycle_next = read_cmd_last_cycle_reg; read_cmd_valid_next = read_cmd_valid_reg && !read_cmd_ready; op_table_start_axi_addr = req_axi_addr_reg; + op_table_start_imm = imm_reg; + op_table_start_imm_en = imm_en_reg; op_table_start_len = 0; op_table_start_zero_len = zero_len_reg; op_table_start_cycle_count = 0; @@ -483,8 +513,15 @@ always @* begin s_axis_write_desc_ready_next = !op_table_active[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] && ($unsigned(op_table_start_ptr_reg - op_table_finish_ptr_reg) < 2**OP_TAG_WIDTH) && enable; req_axi_addr_next = s_axis_write_desc_axi_addr; - ram_sel_next = s_axis_write_desc_ram_sel; - ram_addr_next = s_axis_write_desc_ram_addr; + if (IMM_ENABLE && s_axis_write_desc_imm_en) begin + ram_sel_next = 0; + ram_addr_next = 0; + end else begin + ram_sel_next = s_axis_write_desc_ram_sel; + ram_addr_next = s_axis_write_desc_ram_addr; + end + imm_next = s_axis_write_desc_imm; + imm_en_next = IMM_ENABLE && s_axis_write_desc_imm_en; if (s_axis_write_desc_len == 0) begin // zero-length operation op_count_next = 1; @@ -533,6 +570,7 @@ always @* begin read_cmd_axi_addr_next = req_axi_addr_reg; read_cmd_ram_sel_next = ram_sel_reg; read_cmd_ram_addr_next = ram_addr_reg; + read_cmd_imm_en_next = imm_en_reg; read_cmd_len_next = tr_word_count_next; read_cmd_cycle_count_next = (tr_word_count_next + (req_axi_addr_reg & OFFSET_MASK) - 1) >> AXI_BURST_SIZE; op_table_start_cycle_count = read_cmd_cycle_count_next; @@ -544,6 +582,8 @@ always @* begin op_count_next = op_count_reg - tr_word_count_next; op_table_start_axi_addr = req_axi_addr_reg; + op_table_start_imm = imm_reg; + op_table_start_imm_en = imm_en_reg; op_table_start_len = tr_word_count_next; op_table_start_zero_len = zero_len_reg; op_table_start_offset = (req_axi_addr_reg & OFFSET_MASK)-ram_addr_reg[RAM_OFFSET_WIDTH-1:0]; @@ -600,6 +640,7 @@ always @* begin read_axi_addr_next = read_axi_addr_reg; read_ram_sel_next = read_ram_sel_reg; read_ram_addr_next = read_ram_addr_reg; + read_imm_en_next = read_imm_en_reg; read_len_next = read_len_reg; read_ram_mask_next = read_ram_mask_reg; read_ram_mask_0_next = read_ram_mask_0_reg; @@ -622,6 +663,7 @@ always @* begin read_axi_addr_next = read_cmd_axi_addr_reg; read_ram_sel_next = read_cmd_ram_sel_reg; read_ram_addr_next = read_cmd_ram_addr_reg; + read_imm_en_next = read_cmd_imm_en_reg; read_len_next = read_cmd_len_reg; read_cycle_count_next = read_cmd_cycle_count_reg; read_last_cycle_next = read_cmd_last_cycle_reg; @@ -667,14 +709,14 @@ always @* begin if (read_ram_mask_reg[i]) begin ram_rd_cmd_sel_next[i*RAM_SEL_WIDTH +: RAM_SEL_WIDTH] = read_ram_sel_reg; ram_rd_cmd_addr_next[i*RAM_SEG_ADDR_WIDTH +: RAM_SEG_ADDR_WIDTH] = read_ram_addr_reg[RAM_ADDR_WIDTH-1:RAM_ADDR_WIDTH-RAM_SEG_ADDR_WIDTH]; - ram_rd_cmd_valid_next[i] = 1'b1; + ram_rd_cmd_valid_next[i] = !(IMM_ENABLE && read_imm_en_reg); end if (read_ram_mask_1_reg[i]) begin ram_rd_cmd_addr_next[i*RAM_SEG_ADDR_WIDTH +: RAM_SEG_ADDR_WIDTH] = read_ram_addr_reg[RAM_ADDR_WIDTH-1:RAM_ADDR_WIDTH-RAM_SEG_ADDR_WIDTH]+1; end end - mask_fifo_wr_mask = read_ram_mask_reg; + mask_fifo_wr_mask = (IMM_ENABLE && read_imm_en_reg) ? 0 : read_ram_mask_reg; mask_fifo_we = 1'b1; if (read_len_next > AXI_STRB_WIDTH) begin @@ -703,6 +745,7 @@ always @* begin read_axi_addr_next = read_cmd_axi_addr_reg; read_ram_sel_next = read_cmd_ram_sel_reg; read_ram_addr_next = read_cmd_ram_addr_reg; + read_imm_en_next = read_cmd_imm_en_reg; read_len_next = read_cmd_len_reg; read_cycle_count_next = read_cmd_cycle_count_reg; read_last_cycle_next = read_cmd_last_cycle_reg; @@ -752,6 +795,8 @@ always @* begin stat_wr_req_finish_valid_next = 1'b0; axi_addr_next = axi_addr_reg; + axi_imm_next = axi_imm_reg; + axi_imm_en_next = axi_imm_en_reg; axi_len_next = axi_len_reg; axi_zero_len_next = axi_zero_len_reg; offset_next = offset_reg; @@ -773,8 +818,8 @@ always @* begin m_axi_awvalid_next = m_axi_awvalid_reg && !m_axi_awready; m_axi_bready_next = 1'b0; - m_axi_wdata_int = 0; - m_axi_wstrb_int = 0; + m_axi_wdata_int = ((IMM_ENABLE && axi_imm_en_reg) ? {2{{RAM_DATA_WIDTH{1'b0}} | axi_imm_reg}} : {2{ram_rd_resp_data}}) >> (RAM_DATA_WIDTH-offset_reg*AXI_WORD_SIZE); + m_axi_wstrb_int = strb_offset_mask_reg; m_axi_wlast_int = 1'b0; m_axi_wvalid_int = 1'b0; @@ -785,6 +830,8 @@ always @* begin ram_rd_resp_ready_cmb = {RAM_SEG_COUNT{1'b0}}; axi_addr_next = op_table_axi_addr[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; + axi_imm_next = op_table_imm[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; + axi_imm_en_next = op_table_imm_en[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; axi_len_next = op_table_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; axi_zero_len_next = op_table_zero_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; offset_next = op_table_offset[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; @@ -819,8 +866,9 @@ always @* begin offset_next = offset_reg + AXI_STRB_WIDTH; strb_offset_mask_next = {AXI_STRB_WIDTH{1'b1}}; - m_axi_wdata_int = {2{ram_rd_resp_data}} >> (RAM_DATA_WIDTH-offset_reg*AXI_WORD_SIZE); + m_axi_wdata_int = ((IMM_ENABLE && axi_imm_en_reg) ? {2{{RAM_DATA_WIDTH{1'b0}} | axi_imm_reg}} : {2{ram_rd_resp_data}}) >> (RAM_DATA_WIDTH-offset_reg*AXI_WORD_SIZE); m_axi_wstrb_int = strb_offset_mask_reg; + m_axi_wlast_int = 1'b0; m_axi_wvalid_int = 1'b1; if (last_cycle_reg) begin @@ -834,6 +882,8 @@ always @* begin // skip idle state if possible axi_addr_next = op_table_axi_addr[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; + axi_imm_next = op_table_imm[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; + axi_imm_en_next = op_table_imm_en[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; axi_len_next = op_table_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; axi_zero_len_next = op_table_zero_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; offset_next = op_table_offset[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; @@ -959,6 +1009,8 @@ always @(posedge clk) begin req_axi_addr_reg <= req_axi_addr_next; ram_sel_reg <= ram_sel_next; ram_addr_reg <= ram_addr_next; + imm_reg <= imm_next; + imm_en_reg <= imm_en_next; op_count_reg <= op_count_next; zero_len_reg <= zero_len_next; tr_count_reg <= tr_count_next; @@ -968,6 +1020,7 @@ always @(posedge clk) begin read_axi_addr_reg <= read_axi_addr_next; read_ram_sel_reg <= read_ram_sel_next; read_ram_addr_reg <= read_ram_addr_next; + read_imm_en_reg <= read_imm_en_next; read_len_reg <= read_len_next; read_ram_mask_reg <= read_ram_mask_next; read_ram_mask_0_reg <= read_ram_mask_0_next; @@ -980,6 +1033,8 @@ always @(posedge clk) begin end_offset_reg <= end_offset_next; axi_addr_reg <= axi_addr_next; + axi_imm_reg <= axi_imm_next; + axi_imm_en_reg <= axi_imm_en_next; axi_len_reg <= axi_len_next; axi_zero_len_reg <= axi_zero_len_next; offset_reg <= offset_next; @@ -993,6 +1048,7 @@ always @(posedge clk) begin read_cmd_axi_addr_reg <= read_cmd_axi_addr_next; read_cmd_ram_sel_reg <= read_cmd_ram_sel_next; read_cmd_ram_addr_reg <= read_cmd_ram_addr_next; + read_cmd_imm_en_reg <= read_cmd_imm_en_next; read_cmd_len_reg <= read_cmd_len_next; read_cmd_cycle_count_reg <= read_cmd_cycle_count_next; read_cmd_last_cycle_reg <= read_cmd_last_cycle_next; @@ -1040,6 +1096,8 @@ always @(posedge clk) begin op_table_active[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= 1'b1; op_table_write_complete[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= 1'b0; op_table_axi_addr[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_axi_addr; + op_table_imm[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_imm; + op_table_imm_en[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_imm_en; op_table_len[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_len; op_table_zero_len[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_zero_len; op_table_cycle_count[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_cycle_count; diff --git a/rtl/dma_if_desc_mux.v b/rtl/dma_if_desc_mux.v index 253c6c92b..f3fa59071 100644 --- a/rtl/dma_if_desc_mux.v +++ b/rtl/dma_if_desc_mux.v @@ -46,6 +46,10 @@ module dma_if_desc_mux # parameter RAM_ADDR_WIDTH = 16, // DMA address width parameter DMA_ADDR_WIDTH = 64, + // Immediate enable + parameter IMM_ENABLE = 0, + // Immediate width + parameter IMM_WIDTH = 32, // Length field width parameter LEN_WIDTH = 16, // Input tag field width @@ -68,6 +72,8 @@ module dma_if_desc_mux # output wire [DMA_ADDR_WIDTH-1:0] m_axis_desc_dma_addr, output wire [M_RAM_SEL_WIDTH-1:0] m_axis_desc_ram_sel, output wire [RAM_ADDR_WIDTH-1:0] m_axis_desc_ram_addr, + output wire [IMM_WIDTH-1:0] m_axis_desc_imm, + output wire m_axis_desc_imm_en, output wire [LEN_WIDTH-1:0] m_axis_desc_len, output wire [M_TAG_WIDTH-1:0] m_axis_desc_tag, output wire m_axis_desc_valid, @@ -86,6 +92,8 @@ module dma_if_desc_mux # input wire [PORTS*DMA_ADDR_WIDTH-1:0] s_axis_desc_dma_addr, input wire [PORTS*S_RAM_SEL_WIDTH-1:0] s_axis_desc_ram_sel, input wire [PORTS*RAM_ADDR_WIDTH-1:0] s_axis_desc_ram_addr, + input wire [PORTS*IMM_WIDTH-1:0] s_axis_desc_imm, + input wire [PORTS-1:0] s_axis_desc_imm_en, input wire [PORTS*LEN_WIDTH-1:0] s_axis_desc_len, input wire [PORTS*S_TAG_WIDTH-1:0] s_axis_desc_tag, input wire [PORTS-1:0] s_axis_desc_valid, @@ -136,6 +144,8 @@ wire [CL_PORTS-1:0] grant_encoded; reg [DMA_ADDR_WIDTH-1:0] m_axis_desc_dma_addr_int; reg [M_RAM_SEL_WIDTH-1:0] m_axis_desc_ram_sel_int; reg [RAM_ADDR_WIDTH-1:0] m_axis_desc_ram_addr_int; +reg [IMM_WIDTH-1:0] m_axis_desc_imm_int; +reg m_axis_desc_imm_en_int; reg [LEN_WIDTH-1:0] m_axis_desc_len_int; reg [M_TAG_WIDTH-1:0] m_axis_desc_tag_int; reg m_axis_desc_valid_int; @@ -148,6 +158,8 @@ assign s_axis_desc_ready = (m_axis_desc_ready_int_reg && grant_valid) << grant_e wire [DMA_ADDR_WIDTH-1:0] current_s_desc_dma_addr = s_axis_desc_dma_addr[grant_encoded*DMA_ADDR_WIDTH +: DMA_ADDR_WIDTH]; wire [S_RAM_SEL_WIDTH-1:0] current_s_desc_ram_sel = s_axis_desc_ram_sel[grant_encoded*S_RAM_SEL_WIDTH +: S_RAM_SEL_WIDTH_INT]; wire [RAM_ADDR_WIDTH-1:0] current_s_desc_ram_addr = s_axis_desc_ram_addr[grant_encoded*RAM_ADDR_WIDTH +: RAM_ADDR_WIDTH]; +wire [IMM_WIDTH-1:0] current_s_desc_imm = s_axis_desc_imm[grant_encoded*IMM_WIDTH +: IMM_WIDTH]; +wire current_s_desc_imm_en = s_axis_desc_imm_en[grant_encoded]; wire [LEN_WIDTH-1:0] current_s_desc_len = s_axis_desc_len[grant_encoded*LEN_WIDTH +: LEN_WIDTH]; wire [S_TAG_WIDTH-1:0] current_s_desc_tag = s_axis_desc_tag[grant_encoded*S_TAG_WIDTH +: S_TAG_WIDTH]; wire current_s_desc_valid = s_axis_desc_valid[grant_encoded]; @@ -182,6 +194,8 @@ always @* begin m_axis_desc_ram_sel_int[M_RAM_SEL_WIDTH-1:M_RAM_SEL_WIDTH-CL_PORTS] = grant_encoded; end m_axis_desc_ram_addr_int = current_s_desc_ram_addr; + m_axis_desc_imm_int = current_s_desc_imm; + m_axis_desc_imm_en_int = current_s_desc_imm_en; m_axis_desc_len_int = current_s_desc_len; m_axis_desc_tag_int = {grant_encoded, current_s_desc_tag}; m_axis_desc_valid_int = current_s_desc_valid && m_axis_desc_ready_int_reg && grant_valid; @@ -191,6 +205,8 @@ end reg [DMA_ADDR_WIDTH-1:0] m_axis_desc_dma_addr_reg = {DMA_ADDR_WIDTH{1'b0}}; reg [M_RAM_SEL_WIDTH-1:0] m_axis_desc_ram_sel_reg = {M_RAM_SEL_WIDTH{1'b0}}; reg [RAM_ADDR_WIDTH-1:0] m_axis_desc_ram_addr_reg = {RAM_ADDR_WIDTH{1'b0}}; +reg [IMM_WIDTH-1:0] m_axis_desc_imm_reg = {IMM_WIDTH{1'b0}}; +reg m_axis_desc_imm_en_reg = 1'b0; reg [LEN_WIDTH-1:0] m_axis_desc_len_reg = {LEN_WIDTH{1'b0}}; reg [M_TAG_WIDTH-1:0] m_axis_desc_tag_reg = {M_TAG_WIDTH{1'b0}}; reg m_axis_desc_valid_reg = 1'b0, m_axis_desc_valid_next; @@ -198,6 +214,8 @@ reg m_axis_desc_valid_reg = 1'b0, m_axis_desc_valid_ne reg [DMA_ADDR_WIDTH-1:0] temp_m_axis_desc_dma_addr_reg = {DMA_ADDR_WIDTH{1'b0}}; reg [M_RAM_SEL_WIDTH-1:0] temp_m_axis_desc_ram_sel_reg = {M_RAM_SEL_WIDTH{1'b0}}; reg [RAM_ADDR_WIDTH-1:0] temp_m_axis_desc_ram_addr_reg = {RAM_ADDR_WIDTH{1'b0}}; +reg [IMM_WIDTH-1:0] temp_m_axis_desc_imm_reg = {IMM_WIDTH{1'b0}}; +reg temp_m_axis_desc_imm_en_reg = 1'b0; reg [LEN_WIDTH-1:0] temp_m_axis_desc_len_reg = {LEN_WIDTH{1'b0}}; reg [M_TAG_WIDTH-1:0] temp_m_axis_desc_tag_reg = {M_TAG_WIDTH{1'b0}}; reg temp_m_axis_desc_valid_reg = 1'b0, temp_m_axis_desc_valid_next; @@ -210,6 +228,8 @@ reg store_axis_temp_to_output; assign m_axis_desc_dma_addr = m_axis_desc_dma_addr_reg; assign m_axis_desc_ram_sel = m_axis_desc_ram_sel_reg; assign m_axis_desc_ram_addr = m_axis_desc_ram_addr_reg; +assign m_axis_desc_imm = IMM_ENABLE ? m_axis_desc_imm_reg : {IMM_WIDTH{1'b0}}; +assign m_axis_desc_imm_en = IMM_ENABLE ? m_axis_desc_imm_en_reg : 1'b0; assign m_axis_desc_len = m_axis_desc_len_reg; assign m_axis_desc_tag = m_axis_desc_tag_reg; assign m_axis_desc_valid = m_axis_desc_valid_reg; @@ -261,12 +281,16 @@ always @(posedge clk) begin m_axis_desc_dma_addr_reg <= m_axis_desc_dma_addr_int; m_axis_desc_ram_sel_reg <= m_axis_desc_ram_sel_int; m_axis_desc_ram_addr_reg <= m_axis_desc_ram_addr_int; + m_axis_desc_imm_reg <= m_axis_desc_imm_int; + m_axis_desc_imm_en_reg <= m_axis_desc_imm_en_int; m_axis_desc_len_reg <= m_axis_desc_len_int; m_axis_desc_tag_reg <= m_axis_desc_tag_int; end else if (store_axis_temp_to_output) begin m_axis_desc_dma_addr_reg <= temp_m_axis_desc_dma_addr_reg; m_axis_desc_ram_sel_reg <= temp_m_axis_desc_ram_sel_reg; m_axis_desc_ram_addr_reg <= temp_m_axis_desc_ram_addr_reg; + m_axis_desc_imm_reg <= temp_m_axis_desc_imm_reg; + m_axis_desc_imm_en_reg <= temp_m_axis_desc_imm_en_reg; m_axis_desc_len_reg <= temp_m_axis_desc_len_reg; m_axis_desc_tag_reg <= temp_m_axis_desc_tag_reg; end @@ -275,6 +299,8 @@ always @(posedge clk) begin temp_m_axis_desc_dma_addr_reg <= m_axis_desc_dma_addr_int; temp_m_axis_desc_ram_sel_reg <= m_axis_desc_ram_sel_int; temp_m_axis_desc_ram_addr_reg <= m_axis_desc_ram_addr_int; + temp_m_axis_desc_imm_reg <= m_axis_desc_imm_int; + temp_m_axis_desc_imm_en_reg <= m_axis_desc_imm_en_int; temp_m_axis_desc_len_reg <= m_axis_desc_len_int; temp_m_axis_desc_tag_reg <= m_axis_desc_tag_int; end diff --git a/rtl/dma_if_mux.v b/rtl/dma_if_mux.v index 3df283628..c6ba26d97 100644 --- a/rtl/dma_if_mux.v +++ b/rtl/dma_if_mux.v @@ -52,6 +52,10 @@ module dma_if_mux # parameter RAM_ADDR_WIDTH = SEG_ADDR_WIDTH+$clog2(SEG_COUNT)+$clog2(SEG_BE_WIDTH), // DMA address width parameter DMA_ADDR_WIDTH = 64, + // Immediate enable + parameter IMM_ENABLE = 0, + // Immediate width + parameter IMM_WIDTH = 32, // Length field width parameter LEN_WIDTH = 16, // Input tag field width @@ -92,6 +96,8 @@ module dma_if_mux # output wire [DMA_ADDR_WIDTH-1:0] m_axis_write_desc_dma_addr, output wire [M_RAM_SEL_WIDTH-1:0] m_axis_write_desc_ram_sel, output wire [RAM_ADDR_WIDTH-1:0] m_axis_write_desc_ram_addr, + output wire [IMM_WIDTH-1:0] m_axis_write_desc_imm, + output wire m_axis_write_desc_imm_en, output wire [LEN_WIDTH-1:0] m_axis_write_desc_len, output wire [M_TAG_WIDTH-1:0] m_axis_write_desc_tag, output wire m_axis_write_desc_valid, @@ -128,6 +134,8 @@ module dma_if_mux # input wire [PORTS*DMA_ADDR_WIDTH-1:0] s_axis_write_desc_dma_addr, input wire [PORTS*S_RAM_SEL_WIDTH-1:0] s_axis_write_desc_ram_sel, input wire [PORTS*RAM_ADDR_WIDTH-1:0] s_axis_write_desc_ram_addr, + input wire [PORTS*IMM_WIDTH-1:0] s_axis_write_desc_imm, + input wire [PORTS-1:0] s_axis_write_desc_imm_en, input wire [PORTS*LEN_WIDTH-1:0] s_axis_write_desc_len, input wire [PORTS*S_TAG_WIDTH-1:0] s_axis_write_desc_tag, input wire [PORTS-1:0] s_axis_write_desc_valid, @@ -266,6 +274,8 @@ dma_if_mux_wr #( .M_RAM_SEL_WIDTH(M_RAM_SEL_WIDTH), .RAM_ADDR_WIDTH(RAM_ADDR_WIDTH), .DMA_ADDR_WIDTH(DMA_ADDR_WIDTH), + .IMM_ENABLE(IMM_ENABLE), + .IMM_WIDTH(IMM_WIDTH), .LEN_WIDTH(LEN_WIDTH), .S_TAG_WIDTH(S_TAG_WIDTH), .M_TAG_WIDTH(M_TAG_WIDTH), @@ -282,6 +292,8 @@ dma_if_mux_wr_inst ( .m_axis_write_desc_dma_addr(m_axis_write_desc_dma_addr), .m_axis_write_desc_ram_sel(m_axis_write_desc_ram_sel), .m_axis_write_desc_ram_addr(m_axis_write_desc_ram_addr), + .m_axis_write_desc_imm(m_axis_write_desc_imm), + .m_axis_write_desc_imm_en(m_axis_write_desc_imm_en), .m_axis_write_desc_len(m_axis_write_desc_len), .m_axis_write_desc_tag(m_axis_write_desc_tag), .m_axis_write_desc_valid(m_axis_write_desc_valid), @@ -300,6 +312,8 @@ dma_if_mux_wr_inst ( .s_axis_write_desc_dma_addr(s_axis_write_desc_dma_addr), .s_axis_write_desc_ram_sel(s_axis_write_desc_ram_sel), .s_axis_write_desc_ram_addr(s_axis_write_desc_ram_addr), + .s_axis_write_desc_imm(s_axis_write_desc_imm), + .s_axis_write_desc_imm_en(s_axis_write_desc_imm_en), .s_axis_write_desc_len(s_axis_write_desc_len), .s_axis_write_desc_tag(s_axis_write_desc_tag), .s_axis_write_desc_valid(s_axis_write_desc_valid), diff --git a/rtl/dma_if_mux_rd.v b/rtl/dma_if_mux_rd.v index 5882e9c11..de1c753a1 100644 --- a/rtl/dma_if_mux_rd.v +++ b/rtl/dma_if_mux_rd.v @@ -134,6 +134,7 @@ dma_if_desc_mux #( .M_RAM_SEL_WIDTH(M_RAM_SEL_WIDTH), .RAM_ADDR_WIDTH(RAM_ADDR_WIDTH), .DMA_ADDR_WIDTH(DMA_ADDR_WIDTH), + .IMM_ENABLE(0), .LEN_WIDTH(LEN_WIDTH), .S_TAG_WIDTH(S_TAG_WIDTH), .M_TAG_WIDTH(M_TAG_WIDTH), @@ -150,6 +151,8 @@ dma_if_desc_mux_inst ( .m_axis_desc_dma_addr(m_axis_read_desc_dma_addr), .m_axis_desc_ram_sel(m_axis_read_desc_ram_sel), .m_axis_desc_ram_addr(m_axis_read_desc_ram_addr), + .m_axis_desc_imm(), + .m_axis_desc_imm_en(), .m_axis_desc_len(m_axis_read_desc_len), .m_axis_desc_tag(m_axis_read_desc_tag), .m_axis_desc_valid(m_axis_read_desc_valid), @@ -168,6 +171,8 @@ dma_if_desc_mux_inst ( .s_axis_desc_dma_addr(s_axis_read_desc_dma_addr), .s_axis_desc_ram_sel(s_axis_read_desc_ram_sel), .s_axis_desc_ram_addr(s_axis_read_desc_ram_addr), + .s_axis_desc_imm(32'd0), + .s_axis_desc_imm_en(1'b0), .s_axis_desc_len(s_axis_read_desc_len), .s_axis_desc_tag(s_axis_read_desc_tag), .s_axis_desc_valid(s_axis_read_desc_valid), diff --git a/rtl/dma_if_mux_wr.v b/rtl/dma_if_mux_wr.v index d8259c0a6..e3700d1ec 100644 --- a/rtl/dma_if_mux_wr.v +++ b/rtl/dma_if_mux_wr.v @@ -52,6 +52,10 @@ module dma_if_mux_wr # parameter RAM_ADDR_WIDTH = SEG_ADDR_WIDTH+$clog2(SEG_COUNT)+$clog2(SEG_BE_WIDTH), // DMA address width parameter DMA_ADDR_WIDTH = 64, + // Immediate enable + parameter IMM_ENABLE = 0, + // Immediate width + parameter IMM_WIDTH = 32, // Length field width parameter LEN_WIDTH = 16, // Input tag field width @@ -74,6 +78,8 @@ module dma_if_mux_wr # output wire [DMA_ADDR_WIDTH-1:0] m_axis_write_desc_dma_addr, output wire [M_RAM_SEL_WIDTH-1:0] m_axis_write_desc_ram_sel, output wire [RAM_ADDR_WIDTH-1:0] m_axis_write_desc_ram_addr, + output wire [IMM_WIDTH-1:0] m_axis_write_desc_imm, + output wire m_axis_write_desc_imm_en, output wire [LEN_WIDTH-1:0] m_axis_write_desc_len, output wire [M_TAG_WIDTH-1:0] m_axis_write_desc_tag, output wire m_axis_write_desc_valid, @@ -92,6 +98,8 @@ module dma_if_mux_wr # input wire [PORTS*DMA_ADDR_WIDTH-1:0] s_axis_write_desc_dma_addr, input wire [PORTS*S_RAM_SEL_WIDTH-1:0] s_axis_write_desc_ram_sel, input wire [PORTS*RAM_ADDR_WIDTH-1:0] s_axis_write_desc_ram_addr, + input wire [PORTS*IMM_WIDTH-1:0] s_axis_write_desc_imm, + input wire [PORTS-1:0] s_axis_write_desc_imm_en, input wire [PORTS*LEN_WIDTH-1:0] s_axis_write_desc_len, input wire [PORTS*S_TAG_WIDTH-1:0] s_axis_write_desc_tag, input wire [PORTS-1:0] s_axis_write_desc_valid, @@ -134,6 +142,8 @@ dma_if_desc_mux #( .M_RAM_SEL_WIDTH(M_RAM_SEL_WIDTH), .RAM_ADDR_WIDTH(RAM_ADDR_WIDTH), .DMA_ADDR_WIDTH(DMA_ADDR_WIDTH), + .IMM_ENABLE(IMM_ENABLE), + .IMM_WIDTH(IMM_WIDTH), .LEN_WIDTH(LEN_WIDTH), .S_TAG_WIDTH(S_TAG_WIDTH), .M_TAG_WIDTH(M_TAG_WIDTH), @@ -150,6 +160,8 @@ dma_if_desc_mux_inst ( .m_axis_desc_dma_addr(m_axis_write_desc_dma_addr), .m_axis_desc_ram_sel(m_axis_write_desc_ram_sel), .m_axis_desc_ram_addr(m_axis_write_desc_ram_addr), + .m_axis_desc_imm(m_axis_write_desc_imm), + .m_axis_desc_imm_en(m_axis_write_desc_imm_en), .m_axis_desc_len(m_axis_write_desc_len), .m_axis_desc_tag(m_axis_write_desc_tag), .m_axis_desc_valid(m_axis_write_desc_valid), @@ -168,6 +180,8 @@ dma_if_desc_mux_inst ( .s_axis_desc_dma_addr(s_axis_write_desc_dma_addr), .s_axis_desc_ram_sel(s_axis_write_desc_ram_sel), .s_axis_desc_ram_addr(s_axis_write_desc_ram_addr), + .s_axis_desc_imm(s_axis_write_desc_imm), + .s_axis_desc_imm_en(s_axis_write_desc_imm_en), .s_axis_desc_len(s_axis_write_desc_len), .s_axis_desc_tag(s_axis_write_desc_tag), .s_axis_desc_valid(s_axis_write_desc_valid), diff --git a/rtl/dma_if_pcie.v b/rtl/dma_if_pcie.v index b8a743204..3792aafa2 100644 --- a/rtl/dma_if_pcie.v +++ b/rtl/dma_if_pcie.v @@ -63,6 +63,10 @@ module dma_if_pcie # parameter PCIE_ADDR_WIDTH = 64, // PCIe tag count parameter PCIE_TAG_COUNT = 256, + // Immediate enable + parameter IMM_ENABLE = 0, + // Immediate width + parameter IMM_WIDTH = 32, // Length field width parameter LEN_WIDTH = 16, // Tag field width @@ -160,6 +164,8 @@ module dma_if_pcie # input wire [PCIE_ADDR_WIDTH-1:0] s_axis_write_desc_pcie_addr, input wire [RAM_SEL_WIDTH-1:0] s_axis_write_desc_ram_sel, input wire [RAM_ADDR_WIDTH-1:0] s_axis_write_desc_ram_addr, + input wire [IMM_WIDTH-1:0] s_axis_write_desc_imm, + input wire s_axis_write_desc_imm_en, input wire [LEN_WIDTH-1:0] s_axis_write_desc_len, input wire [TAG_WIDTH-1:0] s_axis_write_desc_tag, input wire s_axis_write_desc_valid, @@ -385,6 +391,8 @@ dma_if_pcie_wr #( .RAM_SEG_BE_WIDTH(RAM_SEG_BE_WIDTH), .RAM_SEG_ADDR_WIDTH(RAM_SEG_ADDR_WIDTH), .PCIE_ADDR_WIDTH(PCIE_ADDR_WIDTH), + .IMM_ENABLE(IMM_ENABLE), + .IMM_WIDTH(IMM_WIDTH), .LEN_WIDTH(LEN_WIDTH), .TAG_WIDTH(TAG_WIDTH), .OP_TABLE_SIZE(WRITE_OP_TABLE_SIZE), @@ -426,6 +434,8 @@ dma_if_pcie_wr_inst ( .s_axis_write_desc_pcie_addr(s_axis_write_desc_pcie_addr), .s_axis_write_desc_ram_sel(s_axis_write_desc_ram_sel), .s_axis_write_desc_ram_addr(s_axis_write_desc_ram_addr), + .s_axis_write_desc_imm(s_axis_write_desc_imm), + .s_axis_write_desc_imm_en(s_axis_write_desc_imm_en), .s_axis_write_desc_len(s_axis_write_desc_len), .s_axis_write_desc_tag(s_axis_write_desc_tag), .s_axis_write_desc_valid(s_axis_write_desc_valid), diff --git a/rtl/dma_if_pcie_wr.v b/rtl/dma_if_pcie_wr.v index c746120a6..5016740aa 100644 --- a/rtl/dma_if_pcie_wr.v +++ b/rtl/dma_if_pcie_wr.v @@ -61,6 +61,10 @@ module dma_if_pcie_wr # parameter RAM_SEG_ADDR_WIDTH = RAM_ADDR_WIDTH-$clog2(RAM_SEG_COUNT*RAM_SEG_BE_WIDTH), // PCIe address width parameter PCIE_ADDR_WIDTH = 64, + // Immediate enable + parameter IMM_ENABLE = 0, + // Immediate width + parameter IMM_WIDTH = 32, // Length field width parameter LEN_WIDTH = 16, // Tag field width @@ -108,6 +112,8 @@ module dma_if_pcie_wr # input wire [PCIE_ADDR_WIDTH-1:0] s_axis_write_desc_pcie_addr, input wire [RAM_SEL_WIDTH-1:0] s_axis_write_desc_ram_sel, input wire [RAM_ADDR_WIDTH-1:0] s_axis_write_desc_ram_addr, + input wire [IMM_WIDTH-1:0] s_axis_write_desc_imm, + input wire s_axis_write_desc_imm_en, input wire [LEN_WIDTH-1:0] s_axis_write_desc_len, input wire [TAG_WIDTH-1:0] s_axis_write_desc_tag, input wire s_axis_write_desc_valid, @@ -230,6 +236,11 @@ initial begin $error("Error: RAM_ADDR_WIDTH does not match RAM configuration (instance %m)"); $finish; end + + if (IMM_ENABLE && IMM_WIDTH > TLP_DATA_WIDTH) begin + $error("Error: IMM_WIDTH must not be larger than the PCIe interface width (instance %m)"); + $finish; + end end localparam [2:0] @@ -265,6 +276,8 @@ reg read_cmd_ready; reg [PCIE_ADDR_WIDTH-1:0] pcie_addr_reg = {PCIE_ADDR_WIDTH{1'b0}}, pcie_addr_next; reg [RAM_SEL_WIDTH-1:0] ram_sel_reg = {RAM_SEL_WIDTH{1'b0}}, ram_sel_next; reg [RAM_ADDR_WIDTH-1:0] ram_addr_reg = {RAM_ADDR_WIDTH{1'b0}}, ram_addr_next; +reg [IMM_WIDTH-1:0] imm_reg = {IMM_WIDTH{1'b0}}, imm_next; +reg imm_en_reg = 1'b0, imm_en_next; reg [LEN_WIDTH-1:0] op_count_reg = {LEN_WIDTH{1'b0}}, op_count_next; reg [LEN_WIDTH-1:0] tr_count_reg = {LEN_WIDTH{1'b0}}, tr_count_next; reg [12:0] tlp_count_reg = 13'd0, tlp_count_next; @@ -274,6 +287,7 @@ reg zero_len_reg = 1'b0, zero_len_next; reg [PCIE_ADDR_WIDTH-1:0] read_pcie_addr_reg = {PCIE_ADDR_WIDTH{1'b0}}, read_pcie_addr_next; reg [RAM_SEL_WIDTH-1:0] read_ram_sel_reg = {RAM_SEL_WIDTH{1'b0}}, read_ram_sel_next; reg [RAM_ADDR_WIDTH-1:0] read_ram_addr_reg = {RAM_ADDR_WIDTH{1'b0}}, read_ram_addr_next; +reg read_imm_en_reg = 1'b0, read_imm_en_next; reg [LEN_WIDTH-1:0] read_len_reg = {LEN_WIDTH{1'b0}}, read_len_next; reg [RAM_SEG_COUNT-1:0] read_ram_mask_reg = {RAM_SEG_COUNT{1'b0}}, read_ram_mask_next; reg [RAM_SEG_COUNT-1:0] read_ram_mask_0_reg = {RAM_SEG_COUNT{1'b0}}, read_ram_mask_0_next; @@ -286,6 +300,8 @@ reg [RAM_OFFSET_WIDTH-1:0] start_offset_reg = {RAM_OFFSET_WIDTH{1'b0}}, start_of reg [RAM_OFFSET_WIDTH-1:0] end_offset_reg = {RAM_OFFSET_WIDTH{1'b0}}, end_offset_next; reg [PCIE_ADDR_WIDTH-1:0] tlp_addr_reg = {PCIE_ADDR_WIDTH{1'b0}}, tlp_addr_next; +reg [IMM_WIDTH-1:0] tlp_imm_reg = {IMM_WIDTH{1'b0}}, tlp_imm_next; +reg tlp_imm_en_reg = 1'b0, tlp_imm_en_next; reg [11:0] tlp_len_reg = 12'd0, tlp_len_next; reg tlp_zero_len_reg = 1'b0, tlp_zero_len_next; reg [RAM_OFFSET_WIDTH-1:0] offset_reg = {RAM_OFFSET_WIDTH{1'b0}}, offset_next; @@ -299,6 +315,7 @@ reg tlp_frame_reg = 1'b0, tlp_frame_next; reg [PCIE_ADDR_WIDTH-1:0] read_cmd_pcie_addr_reg = {PCIE_ADDR_WIDTH{1'b0}}, read_cmd_pcie_addr_next; reg [RAM_SEL_WIDTH-1:0] read_cmd_ram_sel_reg = {RAM_SEL_WIDTH{1'b0}}, read_cmd_ram_sel_next; reg [RAM_ADDR_WIDTH-1:0] read_cmd_ram_addr_reg = {RAM_ADDR_WIDTH{1'b0}}, read_cmd_ram_addr_next; +reg read_cmd_imm_en_reg = 1'b0, read_cmd_imm_en_next; reg [11:0] read_cmd_len_reg = 12'd0, read_cmd_len_next; reg [CYCLE_COUNT_WIDTH-1:0] read_cmd_cycle_count_reg = {CYCLE_COUNT_WIDTH{1'b0}}, read_cmd_cycle_count_next; reg read_cmd_last_cycle_reg = 1'b0, read_cmd_last_cycle_next; @@ -395,6 +412,8 @@ assign stat_wr_tx_stall = stat_wr_tx_stall_reg; // operation tag management reg [OP_TAG_WIDTH+1-1:0] op_table_start_ptr_reg = 0; reg [PCIE_ADDR_WIDTH-1:0] op_table_start_pcie_addr; +reg [IMM_WIDTH-1:0] op_table_start_imm; +reg op_table_start_imm_en; reg [11:0] op_table_start_len; reg op_table_start_zero_len; reg [9:0] op_table_start_dword_len; @@ -415,6 +434,10 @@ reg [2**OP_TAG_WIDTH-1:0] op_table_tx_done = 0; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg [PCIE_ADDR_WIDTH-1:0] op_table_pcie_addr[2**OP_TAG_WIDTH-1:0]; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +reg [IMM_WIDTH-1:0] op_table_imm[2**OP_TAG_WIDTH-1:0]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +reg op_table_imm_en[2**OP_TAG_WIDTH-1:0]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg [11:0] op_table_len[2**OP_TAG_WIDTH-1:0]; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg op_table_zero_len[2**OP_TAG_WIDTH-1:0]; @@ -434,6 +457,8 @@ integer i; initial begin for (i = 0; i < 2**OP_TAG_WIDTH; i = i + 1) begin op_table_pcie_addr[i] = 0; + op_table_imm[i] = 0; + op_table_imm_en[i] = 0; op_table_len[i] = 0; op_table_zero_len[i] = 0; op_table_dword_len[i] = 0; @@ -463,6 +488,8 @@ always @* begin pcie_addr_next = pcie_addr_reg; ram_sel_next = ram_sel_reg; ram_addr_next = ram_addr_reg; + imm_next = imm_reg; + imm_en_next = imm_en_reg; op_count_next = op_count_reg; tr_count_next = tr_count_reg; tlp_count_next = tlp_count_reg; @@ -472,12 +499,15 @@ always @* begin read_cmd_pcie_addr_next = read_cmd_pcie_addr_reg; read_cmd_ram_sel_next = read_cmd_ram_sel_reg; read_cmd_ram_addr_next = read_cmd_ram_addr_reg; + read_cmd_imm_en_next = read_cmd_imm_en_reg; read_cmd_len_next = read_cmd_len_reg; read_cmd_cycle_count_next = read_cmd_cycle_count_reg; read_cmd_last_cycle_next = read_cmd_last_cycle_reg; read_cmd_valid_next = read_cmd_valid_reg && !read_cmd_ready; op_table_start_pcie_addr = pcie_addr_reg; + op_table_start_imm = imm_reg; + op_table_start_imm_en = imm_en_reg; op_table_start_len = tlp_count_reg; op_table_start_zero_len = zero_len_reg; op_table_start_dword_len = (tlp_count_reg + pcie_addr_reg[1:0] + 3) >> 2; @@ -494,8 +524,15 @@ always @* begin s_axis_write_desc_ready_next = !op_table_active[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] && ($unsigned(op_table_start_ptr_reg - op_table_finish_ptr_reg) < 2**OP_TAG_WIDTH) && enable; pcie_addr_next = s_axis_write_desc_pcie_addr; - ram_sel_next = s_axis_write_desc_ram_sel; - ram_addr_next = s_axis_write_desc_ram_addr; + if (IMM_ENABLE && s_axis_write_desc_imm_en) begin + ram_sel_next = 0; + ram_addr_next = 0; + end else begin + ram_sel_next = s_axis_write_desc_ram_sel; + ram_addr_next = s_axis_write_desc_ram_addr; + end + imm_next = s_axis_write_desc_imm; + imm_en_next = IMM_ENABLE && s_axis_write_desc_imm_en; if (s_axis_write_desc_len == 0) begin // zero-length operation op_count_next = 1; @@ -544,6 +581,7 @@ always @* begin read_cmd_pcie_addr_next = pcie_addr_reg; read_cmd_ram_sel_next = ram_sel_reg; read_cmd_ram_addr_next = ram_addr_reg; + read_cmd_imm_en_next = imm_en_reg; read_cmd_len_next = tlp_count_reg; read_cmd_cycle_count_next = (tlp_count_reg + pcie_addr_reg[1:0] - 1) >> $clog2(TLP_DATA_WIDTH_BYTES); op_table_start_cycle_count = read_cmd_cycle_count_next; @@ -555,6 +593,8 @@ always @* begin op_count_next = op_count_reg - tlp_count_reg; op_table_start_pcie_addr = pcie_addr_reg; + op_table_start_imm = imm_reg; + op_table_start_imm_en = imm_en_reg; op_table_start_len = tlp_count_reg; op_table_start_zero_len = zero_len_reg; op_table_start_dword_len = (tlp_count_reg + pcie_addr_reg[1:0] + 3) >> 2; @@ -614,6 +654,7 @@ always @* begin read_pcie_addr_next = read_pcie_addr_reg; read_ram_sel_next = read_ram_sel_reg; read_ram_addr_next = read_ram_addr_reg; + read_imm_en_next = read_imm_en_reg; read_len_next = read_len_reg; read_ram_mask_next = read_ram_mask_reg; read_ram_mask_0_next = read_ram_mask_0_reg; @@ -636,6 +677,7 @@ always @* begin read_pcie_addr_next = read_cmd_pcie_addr_reg; read_ram_sel_next = read_cmd_ram_sel_reg; read_ram_addr_next = read_cmd_ram_addr_reg; + read_imm_en_next = read_cmd_imm_en_reg; read_len_next = read_cmd_len_reg; read_cycle_count_next = read_cmd_cycle_count_reg; read_last_cycle_next = read_cmd_last_cycle_reg; @@ -681,14 +723,14 @@ always @* begin if (read_ram_mask_reg[i]) begin ram_rd_cmd_sel_next[i*RAM_SEL_WIDTH +: RAM_SEL_WIDTH] = read_ram_sel_reg; ram_rd_cmd_addr_next[i*RAM_SEG_ADDR_WIDTH +: RAM_SEG_ADDR_WIDTH] = read_ram_addr_reg[RAM_ADDR_WIDTH-1:RAM_ADDR_WIDTH-RAM_SEG_ADDR_WIDTH]; - ram_rd_cmd_valid_next[i] = 1'b1; + ram_rd_cmd_valid_next[i] = !(IMM_ENABLE && read_imm_en_reg); end if (read_ram_mask_1_reg[i]) begin ram_rd_cmd_addr_next[i*RAM_SEG_ADDR_WIDTH +: RAM_SEG_ADDR_WIDTH] = read_ram_addr_reg[RAM_ADDR_WIDTH-1:RAM_ADDR_WIDTH-RAM_SEG_ADDR_WIDTH]+1; end end - mask_fifo_wr_mask = read_ram_mask_reg; + mask_fifo_wr_mask = (IMM_ENABLE && read_imm_en_reg) ? 0 : read_ram_mask_reg; mask_fifo_we = 1'b1; if (read_len_next > TLP_DATA_WIDTH_BYTES) begin @@ -718,6 +760,7 @@ always @* begin read_pcie_addr_next = read_cmd_pcie_addr_reg; read_ram_sel_next = read_cmd_ram_sel_reg; read_ram_addr_next = read_cmd_ram_addr_reg; + read_imm_en_next = read_cmd_imm_en_reg; read_len_next = read_cmd_len_reg; read_cycle_count_next = read_cmd_cycle_count_reg; read_last_cycle_next = read_cmd_last_cycle_reg; @@ -769,6 +812,8 @@ always @* begin stat_wr_req_finish_valid_next = 1'b0; tlp_addr_next = tlp_addr_reg; + tlp_imm_next = tlp_imm_reg; + tlp_imm_en_next = tlp_imm_en_reg; tlp_len_next = tlp_len_reg; tlp_zero_len_next = tlp_zero_len_reg; dword_count_next = dword_count_reg; @@ -839,6 +884,8 @@ always @* begin tlp_frame_next = 1'b0; tlp_addr_next = op_table_pcie_addr[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; + tlp_imm_next = op_table_imm[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; + tlp_imm_en_next = op_table_imm_en[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; tlp_len_next = op_table_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; tlp_zero_len_next = op_table_zero_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; dword_count_next = op_table_dword_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; @@ -857,7 +904,7 @@ always @* begin // transfer state, transfer data if (!tx_wr_req_tlp_valid_reg || tx_wr_req_tlp_ready) begin - tx_wr_req_tlp_data_next = {2{ram_rd_resp_data}} >> (RAM_DATA_WIDTH-offset_reg*8); + tx_wr_req_tlp_data_next = ((IMM_ENABLE && tlp_imm_en_reg) ? {2{{RAM_DATA_WIDTH{1'b0}} | tlp_imm_reg}} : {2{ram_rd_resp_data}}) >> (RAM_DATA_WIDTH-offset_reg*8); if (dword_count_reg >= TLP_STRB_WIDTH) begin tx_wr_req_tlp_strb_next = {TLP_STRB_WIDTH{1'b1}}; end else begin @@ -895,6 +942,8 @@ always @* begin // skip idle state if possible tlp_addr_next = op_table_pcie_addr[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; + tlp_imm_next = op_table_imm[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; + tlp_imm_en_next = op_table_imm_en[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; tlp_len_next = op_table_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; tlp_zero_len_next = op_table_zero_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; dword_count_next = op_table_dword_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; @@ -977,6 +1026,8 @@ always @(posedge clk) begin pcie_addr_reg <= pcie_addr_next; ram_sel_reg <= ram_sel_next; ram_addr_reg <= ram_addr_next; + imm_reg <= imm_next; + imm_en_reg <= imm_en_next; op_count_reg <= op_count_next; tr_count_reg <= tr_count_next; tlp_count_reg <= tlp_count_next; @@ -986,6 +1037,7 @@ always @(posedge clk) begin read_pcie_addr_reg <= read_pcie_addr_next; read_ram_sel_reg <= read_ram_sel_next; read_ram_addr_reg <= read_ram_addr_next; + read_imm_en_reg <= read_imm_en_next; read_len_reg <= read_len_next; read_ram_mask_reg <= read_ram_mask_next; read_ram_mask_0_reg <= read_ram_mask_0_next; @@ -998,6 +1050,8 @@ always @(posedge clk) begin end_offset_reg <= end_offset_next; tlp_addr_reg <= tlp_addr_next; + tlp_imm_reg <= tlp_imm_next; + tlp_imm_en_reg <= tlp_imm_en_next; tlp_len_reg <= tlp_len_next; tlp_zero_len_reg <= tlp_zero_len_next; dword_count_reg <= dword_count_next; @@ -1011,6 +1065,7 @@ always @(posedge clk) begin read_cmd_pcie_addr_reg <= read_cmd_pcie_addr_next; read_cmd_ram_sel_reg <= read_cmd_ram_sel_next; read_cmd_ram_addr_reg <= read_cmd_ram_addr_next; + read_cmd_imm_en_reg <= read_cmd_imm_en_next; read_cmd_len_reg <= read_cmd_len_next; read_cmd_cycle_count_reg <= read_cmd_cycle_count_next; read_cmd_last_cycle_reg <= read_cmd_last_cycle_next; @@ -1066,6 +1121,8 @@ always @(posedge clk) begin op_table_active[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= 1'b1; op_table_tx_done[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= 1'b0; op_table_pcie_addr[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_pcie_addr; + op_table_imm[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_imm; + op_table_imm_en[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_imm_en; op_table_len[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_len; op_table_zero_len[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_zero_len; op_table_dword_len[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_dword_len; diff --git a/tb/dma_if_axi_wr/Makefile b/tb/dma_if_axi_wr/Makefile index 7f19735db..8bccb276a 100644 --- a/tb/dma_if_axi_wr/Makefile +++ b/tb/dma_if_axi_wr/Makefile @@ -42,6 +42,8 @@ export PARAM_RAM_SEG_COUNT ?= 2 export PARAM_RAM_SEG_DATA_WIDTH ?= $(shell expr $(PARAM_AXI_DATA_WIDTH) \* 2 / $(PARAM_RAM_SEG_COUNT) ) export PARAM_RAM_SEG_BE_WIDTH ?= $(shell expr $(PARAM_RAM_SEG_DATA_WIDTH) / 8 ) export PARAM_RAM_SEG_ADDR_WIDTH ?= $(shell python -c "print($(PARAM_RAM_ADDR_WIDTH) - ($(PARAM_RAM_SEG_COUNT)*$(PARAM_RAM_SEG_BE_WIDTH)-1).bit_length())") +export PARAM_IMM_ENABLE ?= 1 +export PARAM_IMM_WIDTH ?= $(PARAM_AXI_DATA_WIDTH) export PARAM_LEN_WIDTH ?= 16 export PARAM_TAG_WIDTH ?= 8 export PARAM_OP_TABLE_SIZE ?= $(shell python -c "print(2**$(PARAM_AXI_ID_WIDTH))") @@ -60,6 +62,8 @@ ifeq ($(SIM), icarus) COMPILE_ARGS += -P $(TOPLEVEL).RAM_SEG_DATA_WIDTH=$(PARAM_RAM_SEG_DATA_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).RAM_SEG_BE_WIDTH=$(PARAM_RAM_SEG_BE_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).RAM_SEG_ADDR_WIDTH=$(PARAM_RAM_SEG_ADDR_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).IMM_ENABLE=$(PARAM_IMM_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).IMM_WIDTH=$(PARAM_IMM_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).LEN_WIDTH=$(PARAM_LEN_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).TAG_WIDTH=$(PARAM_TAG_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).OP_TABLE_SIZE=$(PARAM_OP_TABLE_SIZE) @@ -82,6 +86,8 @@ else ifeq ($(SIM), verilator) COMPILE_ARGS += -GRAM_SEG_DATA_WIDTH=$(PARAM_RAM_SEG_DATA_WIDTH) COMPILE_ARGS += -GRAM_SEG_BE_WIDTH=$(PARAM_RAM_SEG_BE_WIDTH) COMPILE_ARGS += -GRAM_SEG_ADDR_WIDTH=$(PARAM_RAM_SEG_ADDR_WIDTH) + COMPILE_ARGS += -GIMM_WIDTH=$(PARAM_IMM_WIDTH) + COMPILE_ARGS += -GIMM_ENABLE=$(PARAM_IMM_ENABLE) COMPILE_ARGS += -GLEN_WIDTH=$(PARAM_LEN_WIDTH) COMPILE_ARGS += -GTAG_WIDTH=$(PARAM_TAG_WIDTH) COMPILE_ARGS += -GOP_TABLE_SIZE=$(PARAM_OP_TABLE_SIZE) diff --git a/tb/dma_if_axi_wr/test_dma_if_axi_wr.py b/tb/dma_if_axi_wr/test_dma_if_axi_wr.py index 2a349de8f..a96aa2b11 100644 --- a/tb/dma_if_axi_wr/test_dma_if_axi_wr.py +++ b/tb/dma_if_axi_wr/test_dma_if_axi_wr.py @@ -50,7 +50,7 @@ except ImportError: del sys.path[0] DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc", - signals=["axi_addr", "ram_addr", "ram_sel", "len", "tag", "valid", "ready"] + signals=["axi_addr", "ram_addr", "ram_sel", "imm", "imm_en", "len", "tag", "valid", "ready"] ) DescStatusBus, DescStatusTransaction, DescStatusSource, DescStatusSink, DescStatusMonitor = define_stream("DescStatus", @@ -153,6 +153,54 @@ async def run_test_write(dut, idle_inserter=None, backpressure_inserter=None): await RisingEdge(dut.clk) +async def run_test_write_imm(dut, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + axi_byte_lanes = tb.axi_ram.byte_lanes + tag_count = 2**len(tb.write_desc_source.bus.tag) + + cur_tag = 1 + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + await tb.cycle_reset() + + tb.dut.enable.value = 1 + + for length in list(range(1, len(dut.s_axis_write_desc_imm) // 8)): + # for axi_offset in axi_offsets: + for axi_offset in list(range(axi_byte_lanes+1))+list(range(4096-axi_byte_lanes, 4096)): + tb.log.info("length %d, axi_offset %d", length, axi_offset) + axi_addr = axi_offset+0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + imm = int.from_bytes(test_data, 'little') + + tb.axi_ram.write(axi_addr-128, b'\xaa'*(len(test_data)+256)) + + tb.log.debug("Immediate: 0x%x", imm) + + desc = DescTransaction(axi_addr=axi_addr, ram_addr=0, ram_sel=0, imm=imm, imm_en=1, len=len(test_data), tag=cur_tag) + await tb.write_desc_source.send(desc) + + status = await tb.write_desc_status_sink.recv() + + tb.log.info("status: %s", status) + + assert int(status.tag) == cur_tag + assert int(status.error) == 0 + + tb.log.debug("%s", tb.axi_ram.hexdump_str((axi_addr & ~0xf)-16, (((axi_addr & 0xf)+length-1) & ~0xf)+48, prefix="AXI ")) + + assert tb.axi_ram.read(axi_addr-1, len(test_data)+2) == b'\xaa'+test_data+b'\xaa' + + cur_tag = (cur_tag + 1) % tag_count + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + def cycle_pause(): return itertools.cycle([1, 1, 1, 0]) @@ -164,6 +212,11 @@ if cocotb.SIM_NAME: factory.add_option("backpressure_inserter", [None, cycle_pause]) factory.generate_tests() + factory = TestFactory(run_test_write_imm) + factory.add_option("idle_inserter", [None, cycle_pause]) + factory.add_option("backpressure_inserter", [None, cycle_pause]) + factory.generate_tests() + # cocotb-test @@ -201,6 +254,8 @@ def test_dma_if_axi_wr(request, axi_data_width): parameters['RAM_SEG_DATA_WIDTH'] = ram_seg_data_width parameters['RAM_SEG_BE_WIDTH'] = ram_seg_be_width parameters['RAM_SEG_ADDR_WIDTH'] = ram_seg_addr_width + parameters['IMM_ENABLE'] = 1 + parameters['IMM_WIDTH'] = parameters['AXI_DATA_WIDTH'] parameters['LEN_WIDTH'] = 16 parameters['TAG_WIDTH'] = 8 parameters['OP_TABLE_SIZE'] = 2**parameters['AXI_ID_WIDTH'] diff --git a/tb/dma_if_pcie_wr/Makefile b/tb/dma_if_pcie_wr/Makefile index 74952c3c3..d27f750d1 100644 --- a/tb/dma_if_pcie_wr/Makefile +++ b/tb/dma_if_pcie_wr/Makefile @@ -46,6 +46,8 @@ export PARAM_RAM_SEG_DATA_WIDTH ?= $(shell expr $(PARAM_TLP_SEG_COUNT) \* $(PARA export PARAM_RAM_SEG_BE_WIDTH ?= $(shell expr $(PARAM_RAM_SEG_DATA_WIDTH) / 8 ) export PARAM_RAM_SEG_ADDR_WIDTH ?= $(shell python -c "print($(PARAM_RAM_ADDR_WIDTH) - ($(PARAM_RAM_SEG_COUNT)*$(PARAM_RAM_SEG_BE_WIDTH)-1).bit_length())") export PARAM_PCIE_ADDR_WIDTH ?= 64 +export PARAM_IMM_ENABLE ?= 1 +export PARAM_IMM_WIDTH ?= $(shell expr $(PARAM_TLP_SEG_COUNT) \* $(PARAM_TLP_SEG_DATA_WIDTH) ) export PARAM_LEN_WIDTH ?= 20 export PARAM_TAG_WIDTH ?= 8 export PARAM_OP_TABLE_SIZE ?= $(shell echo "$$(( 1 << ($(PARAM_TX_SEQ_NUM_WIDTH)-1) ))" ) @@ -70,6 +72,8 @@ ifeq ($(SIM), icarus) COMPILE_ARGS += -P $(TOPLEVEL).RAM_SEG_BE_WIDTH=$(PARAM_RAM_SEG_BE_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).RAM_SEG_ADDR_WIDTH=$(PARAM_RAM_SEG_ADDR_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).PCIE_ADDR_WIDTH=$(PARAM_PCIE_ADDR_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).IMM_ENABLE=$(PARAM_IMM_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).IMM_WIDTH=$(PARAM_IMM_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).LEN_WIDTH=$(PARAM_LEN_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).TAG_WIDTH=$(PARAM_TAG_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).OP_TABLE_SIZE=$(PARAM_OP_TABLE_SIZE) @@ -98,6 +102,8 @@ else ifeq ($(SIM), verilator) COMPILE_ARGS += -GRAM_SEG_BE_WIDTH=$(PARAM_RAM_SEG_BE_WIDTH) COMPILE_ARGS += -GRAM_SEG_ADDR_WIDTH=$(PARAM_RAM_SEG_ADDR_WIDTH) COMPILE_ARGS += -GPCIE_ADDR_WIDTH=$(PARAM_PCIE_ADDR_WIDTH) + COMPILE_ARGS += -GIMM_WIDTH=$(PARAM_IMM_WIDTH) + COMPILE_ARGS += -GIMM_ENABLE=$(PARAM_IMM_ENABLE) COMPILE_ARGS += -GLEN_WIDTH=$(PARAM_LEN_WIDTH) COMPILE_ARGS += -GTAG_WIDTH=$(PARAM_TAG_WIDTH) COMPILE_ARGS += -GOP_TABLE_SIZE=$(PARAM_OP_TABLE_SIZE) diff --git a/tb/dma_if_pcie_wr/test_dma_if_pcie_wr.py b/tb/dma_if_pcie_wr/test_dma_if_pcie_wr.py index 8e3fa2cff..63ced66f8 100644 --- a/tb/dma_if_pcie_wr/test_dma_if_pcie_wr.py +++ b/tb/dma_if_pcie_wr/test_dma_if_pcie_wr.py @@ -53,7 +53,7 @@ except ImportError: del sys.path[0] DescBus, DescTransaction, DescSource, DescSink, DescMonitor = define_stream("Desc", - signals=["pcie_addr", "ram_addr", "ram_sel", "len", "tag", "valid", "ready"] + signals=["pcie_addr", "ram_addr", "ram_sel", "imm", "imm_en", "len", "tag", "valid", "ready"] ) DescStatusBus, DescStatusTransaction, DescStatusSource, DescStatusSink, DescStatusMonitor = define_stream("DescStatus", @@ -184,6 +184,64 @@ async def run_test_write(dut, idle_inserter=None, backpressure_inserter=None): await RisingEdge(dut.clk) +async def run_test_write_imm(dut, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + if os.getenv("PCIE_OFFSET") is None: + pcie_offsets = list(range(4))+list(range(4096-4, 4096)) + else: + pcie_offsets = [int(os.getenv("PCIE_OFFSET"))] + + byte_lanes = tb.dma_ram.byte_lanes + tag_count = 2**len(tb.write_desc_source.bus.tag) + + cur_tag = 1 + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + await tb.cycle_reset() + + await tb.rc.enumerate(enable_bus_mastering=True) + + mem = tb.rc.mem_pool.alloc_region(16*1024*1024) + mem_base = mem.get_absolute_address(0) + + tb.dut.enable <= 1 + + for length in list(range(0, len(dut.s_axis_write_desc_imm) // 8 + 1)): + for pcie_offset in pcie_offsets: + tb.log.info("length %d, pcie_offset %d", length, pcie_offset) + pcie_addr = pcie_offset+0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + imm = int.from_bytes(test_data, 'little') + + mem[pcie_addr-128:pcie_addr-128+len(test_data)+256] = b'\xaa'*(len(test_data)+256) + + tb.log.debug("Immediate: 0x%x", imm) + + desc = DescTransaction(pcie_addr=mem_base+pcie_addr, ram_addr=0, ram_sel=0, imm=imm, imm_en=1, len=len(test_data), tag=cur_tag) + await tb.write_desc_source.send(desc) + + status = await tb.write_desc_status_sink.recv() + await Timer(100 + (length // byte_lanes), 'ns') + + tb.log.info("status: %s", status) + + assert int(status.tag) == cur_tag + assert int(status.error) == 0 + + tb.log.debug("%s", hexdump_str(mem, (pcie_addr & ~0xf)-16, (((pcie_addr & 0xf)+length-1) & ~0xf)+48, prefix="PCIe ")) + + assert mem[pcie_addr-1:pcie_addr+len(test_data)+1] == b'\xaa'+test_data+b'\xaa' + + cur_tag = (cur_tag + 1) % tag_count + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + def cycle_pause(): return itertools.cycle([1, 1, 1, 0]) @@ -194,6 +252,10 @@ if cocotb.SIM_NAME: factory.add_option(("idle_inserter", "backpressure_inserter"), [(None, None), (cycle_pause, cycle_pause)]) factory.generate_tests() + factory = TestFactory(run_test_write_imm) + factory.add_option(("idle_inserter", "backpressure_inserter"), [(None, None), (cycle_pause, cycle_pause)]) + factory.generate_tests() + # cocotb-test @@ -241,6 +303,8 @@ def test_dma_if_pcie_wr(request, pcie_data_width, pcie_offset): parameters['RAM_SEG_BE_WIDTH'] = ram_seg_be_width parameters['RAM_SEG_ADDR_WIDTH'] = ram_seg_addr_width parameters['PCIE_ADDR_WIDTH'] = 64 + parameters['IMM_ENABLE'] = 1 + parameters['IMM_WIDTH'] = parameters['TLP_SEG_COUNT'] * parameters['TLP_SEG_DATA_WIDTH'] parameters['LEN_WIDTH'] = 20 parameters['TAG_WIDTH'] = 8 parameters['OP_TABLE_SIZE'] = 2**(parameters['TX_SEQ_NUM_WIDTH']-1) From 389911e1266879be021e137f58110b7dde658358 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Mon, 4 Apr 2022 15:04:57 -0700 Subject: [PATCH 13/21] Update example design to test immediate write --- .../common/driver/example/example_driver.c | 17 ++++++++++ example/common/rtl/example_core.v | 34 ++++++++++++++----- example/common/rtl/example_core_pcie.v | 14 ++++++++ example/common/rtl/example_core_pcie_s10.v | 6 ++++ example/common/rtl/example_core_pcie_us.v | 6 ++++ example/common/tb/example_core_pcie/Makefile | 6 ++++ .../test_example_core_pcie.py | 22 ++++++++++++ .../common/tb/example_core_pcie_s10/Makefile | 6 ++++ .../test_example_core_pcie_s10.py | 22 ++++++++++++ .../common/tb/example_core_pcie_us/Makefile | 6 ++++ .../test_example_core_pcie_us.py | 22 ++++++++++++ 11 files changed, 152 insertions(+), 9 deletions(-) diff --git a/example/common/driver/example/example_driver.c b/example/common/driver/example/example_driver.c index b354be3a2..8f8a193ec 100644 --- a/example/common/driver/example/example_driver.c +++ b/example/common/driver/example/example_driver.c @@ -234,6 +234,23 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent) print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, edev->dma_region + 0x0200, 256, true); + dev_info(dev, "start immediate write to host"); + iowrite32((edev->dma_region_addr + 0x0200) & 0xffffffff, edev->bar[0] + 0x000200); + iowrite32(((edev->dma_region_addr + 0x0200) >> 32) & 0xffffffff, edev->bar[0] + 0x000204); + iowrite32(0x44332211, edev->bar[0] + 0x000208); + iowrite32(0, edev->bar[0] + 0x00020C); + iowrite32(0x4, edev->bar[0] + 0x000210); + iowrite32(0x100AA, edev->bar[0] + 0x000214); + + msleep(1); + + dev_info(dev, "Read status"); + dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000218)); + + dev_info(dev, "read data"); + print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, + edev->dma_region + 0x0200, 4, true); + // probe complete return 0; diff --git a/example/common/rtl/example_core.v b/example/common/rtl/example_core.v index 5d11aacf5..538945fb5 100644 --- a/example/common/rtl/example_core.v +++ b/example/common/rtl/example_core.v @@ -41,6 +41,10 @@ module example_core # parameter AXIL_STRB_WIDTH = (AXIL_DATA_WIDTH/8), // DMA address width parameter DMA_ADDR_WIDTH = 64, + // DMA immediate enable + parameter DMA_IMM_ENABLE = 0, + // DMA immediate width + parameter DMA_IMM_WIDTH = 32, // DMA Length field width parameter DMA_LEN_WIDTH = 16, // DMA Tag field width @@ -109,6 +113,8 @@ module example_core # output wire [DMA_ADDR_WIDTH-1:0] m_axis_dma_write_desc_dma_addr, output wire [RAM_SEL_WIDTH-1:0] m_axis_dma_write_desc_ram_sel, output wire [RAM_ADDR_WIDTH-1:0] m_axis_dma_write_desc_ram_addr, + output wire [DMA_IMM_WIDTH-1:0] m_axis_dma_write_desc_imm, + output wire m_axis_dma_write_desc_imm_en, output wire [DMA_LEN_WIDTH-1:0] m_axis_dma_write_desc_len, output wire [DMA_TAG_WIDTH-1:0] m_axis_dma_write_desc_tag, output wire m_axis_dma_write_desc_valid, @@ -145,6 +151,8 @@ module example_core # output wire [31:0] msi_irq ); +localparam RAM_ADDR_IMM_WIDTH = (DMA_IMM_ENABLE && (DMA_IMM_WIDTH > RAM_ADDR_WIDTH)) ? DMA_IMM_WIDTH : RAM_ADDR_WIDTH; + dma_psdpram #( .SIZE(16384), .SEG_COUNT(RAM_SEG_COUNT), @@ -203,7 +211,8 @@ reg [3:0] dma_read_desc_status_error_reg = 0, dma_read_desc_status_error_next; reg dma_read_desc_status_valid_reg = 0, dma_read_desc_status_valid_next; reg [DMA_ADDR_WIDTH-1:0] dma_write_desc_dma_addr_reg = 0, dma_write_desc_dma_addr_next; -reg [RAM_ADDR_WIDTH-1:0] dma_write_desc_ram_addr_reg = 0, dma_write_desc_ram_addr_next; +reg [RAM_ADDR_IMM_WIDTH-1:0] dma_write_desc_ram_addr_imm_reg = 0, dma_write_desc_ram_addr_imm_next; +reg dma_write_desc_imm_en_reg = 0, dma_write_desc_imm_en_next; reg [DMA_LEN_WIDTH-1:0] dma_write_desc_len_reg = 0, dma_write_desc_len_next; reg [DMA_TAG_WIDTH-1:0] dma_write_desc_tag_reg = 0, dma_write_desc_tag_next; reg dma_write_desc_valid_reg = 0, dma_write_desc_valid_next; @@ -260,7 +269,9 @@ assign m_axis_dma_read_desc_valid = dma_read_desc_valid_reg; assign m_axis_dma_write_desc_dma_addr = dma_write_desc_dma_addr_reg; assign m_axis_dma_write_desc_ram_sel = 0; -assign m_axis_dma_write_desc_ram_addr = dma_write_desc_ram_addr_reg; +assign m_axis_dma_write_desc_ram_addr = dma_write_desc_ram_addr_imm_reg; +assign m_axis_dma_write_desc_imm = dma_write_desc_ram_addr_imm_reg; +assign m_axis_dma_write_desc_imm_en = dma_write_desc_imm_en_reg; assign m_axis_dma_write_desc_len = dma_write_desc_len_reg; assign m_axis_dma_write_desc_tag = dma_write_desc_tag_reg; assign m_axis_dma_write_desc_valid = dma_write_desc_valid_reg; @@ -289,7 +300,8 @@ always @* begin dma_read_desc_status_valid_next = dma_read_desc_status_valid_reg; dma_write_desc_dma_addr_next = dma_write_desc_dma_addr_reg; - dma_write_desc_ram_addr_next = dma_write_desc_ram_addr_reg; + dma_write_desc_ram_addr_imm_next = dma_write_desc_ram_addr_imm_reg; + dma_write_desc_imm_en_next = dma_write_desc_imm_en_reg; dma_write_desc_len_next = dma_write_desc_len_reg; dma_write_desc_tag_next = dma_write_desc_tag_reg; dma_write_desc_valid_next = dma_write_desc_valid_reg && !m_axis_dma_write_desc_ready; @@ -357,10 +369,11 @@ always @* begin // single write 16'h0200: dma_write_desc_dma_addr_next[31:0] = s_axil_ctrl_wdata; 16'h0204: dma_write_desc_dma_addr_next[63:32] = s_axil_ctrl_wdata; - 16'h0208: dma_write_desc_ram_addr_next = s_axil_ctrl_wdata; + 16'h0208: dma_write_desc_ram_addr_imm_next = s_axil_ctrl_wdata; 16'h0210: dma_write_desc_len_next = s_axil_ctrl_wdata; 16'h0214: begin - dma_write_desc_tag_next = s_axil_ctrl_wdata; + dma_write_desc_tag_next = s_axil_ctrl_wdata[15:0]; + dma_write_desc_imm_en_next = s_axil_ctrl_wdata[16]; dma_write_desc_valid_next = 1'b1; end // block read @@ -442,12 +455,13 @@ always @* begin // single write 16'h0200: axil_ctrl_rdata_next = dma_write_desc_dma_addr_reg; 16'h0204: axil_ctrl_rdata_next = dma_write_desc_dma_addr_reg >> 32; - 16'h0208: axil_ctrl_rdata_next = dma_write_desc_ram_addr_reg; - 16'h020c: axil_ctrl_rdata_next = dma_write_desc_ram_addr_reg >> 32; + 16'h0208: axil_ctrl_rdata_next = dma_write_desc_ram_addr_imm_reg; + 16'h020c: axil_ctrl_rdata_next = dma_write_desc_ram_addr_imm_reg >> 32; 16'h0210: axil_ctrl_rdata_next = dma_write_desc_len_reg; 16'h0214: axil_ctrl_rdata_next = dma_write_desc_tag_reg; 16'h0218: begin axil_ctrl_rdata_next[15:0] = dma_write_desc_status_tag_reg; + axil_ctrl_rdata_next[16] = dma_write_desc_imm_en_reg; axil_ctrl_rdata_next[27:24] = dma_write_desc_status_error_reg; axil_ctrl_rdata_next[31] = dma_write_desc_status_valid_reg; dma_write_desc_status_valid_next = 1'b0; @@ -554,7 +568,8 @@ always @* begin dma_write_block_dma_offset_next = dma_write_block_dma_offset_reg + dma_write_block_dma_stride_reg; dma_write_desc_dma_addr_next = dma_write_block_dma_base_addr_reg + (dma_write_block_dma_offset_reg & dma_write_block_dma_offset_mask_reg); dma_write_block_ram_offset_next = dma_write_block_ram_offset_reg + dma_write_block_ram_stride_reg; - dma_write_desc_ram_addr_next = dma_write_block_ram_base_addr_reg + (dma_write_block_ram_offset_reg & dma_write_block_ram_offset_mask_reg); + dma_write_desc_ram_addr_imm_next = dma_write_block_ram_base_addr_reg + (dma_write_block_ram_offset_reg & dma_write_block_ram_offset_mask_reg); + dma_write_desc_imm_en_next = 1'b0; dma_write_desc_len_next = dma_write_block_len_reg; dma_write_block_count_next = dma_write_block_count_reg - 1; dma_write_desc_tag_next = dma_write_block_count_reg; @@ -594,7 +609,8 @@ always @(posedge clk) begin dma_read_desc_status_valid_reg <= dma_read_desc_status_valid_next; dma_write_desc_dma_addr_reg <= dma_write_desc_dma_addr_next; - dma_write_desc_ram_addr_reg <= dma_write_desc_ram_addr_next; + dma_write_desc_ram_addr_imm_reg <= dma_write_desc_ram_addr_imm_next; + dma_write_desc_imm_en_reg <= dma_write_desc_imm_en_next; dma_write_desc_len_reg <= dma_write_desc_len_next; dma_write_desc_tag_reg <= dma_write_desc_tag_next; dma_write_desc_valid_reg <= dma_write_desc_valid_next; diff --git a/example/common/rtl/example_core_pcie.v b/example/common/rtl/example_core_pcie.v index 5fa978aed..30133195d 100644 --- a/example/common/rtl/example_core_pcie.v +++ b/example/common/rtl/example_core_pcie.v @@ -47,6 +47,10 @@ module example_core_pcie # parameter TX_SEQ_NUM_WIDTH = 5, // TX sequence number tracking enable parameter TX_SEQ_NUM_ENABLE = 1, + // Immediate enable + parameter IMM_ENABLE = 1, + // Immediate width + parameter IMM_WIDTH = 32, // PCIe tag count parameter PCIE_TAG_COUNT = 256, // Operation table size (read) @@ -220,6 +224,8 @@ wire axis_dma_read_desc_status_valid; wire [PCIE_ADDR_WIDTH-1:0] axis_dma_write_desc_dma_addr; wire [RAM_SEL_WIDTH-1:0] axis_dma_write_desc_ram_sel; wire [RAM_ADDR_WIDTH-1:0] axis_dma_write_desc_ram_addr; +wire [IMM_WIDTH-1:0] axis_dma_write_desc_imm; +wire axis_dma_write_desc_imm_en; wire [DMA_LEN_WIDTH-1:0] axis_dma_write_desc_len; wire [DMA_TAG_WIDTH-1:0] axis_dma_write_desc_tag; wire axis_dma_write_desc_valid; @@ -629,6 +635,8 @@ dma_if_pcie #( .RAM_SEG_ADDR_WIDTH(RAM_SEG_ADDR_WIDTH), .PCIE_ADDR_WIDTH(PCIE_ADDR_WIDTH), .PCIE_TAG_COUNT(PCIE_TAG_COUNT), + .IMM_ENABLE(IMM_ENABLE), + .IMM_WIDTH(IMM_WIDTH), .LEN_WIDTH(DMA_LEN_WIDTH), .TAG_WIDTH(DMA_TAG_WIDTH), .READ_OP_TABLE_SIZE(READ_OP_TABLE_SIZE), @@ -716,6 +724,8 @@ dma_if_pcie_inst ( .s_axis_write_desc_pcie_addr(axis_dma_write_desc_dma_addr), .s_axis_write_desc_ram_sel(axis_dma_write_desc_ram_sel), .s_axis_write_desc_ram_addr(axis_dma_write_desc_ram_addr), + .s_axis_write_desc_imm(axis_dma_write_desc_imm), + .s_axis_write_desc_imm_en(axis_dma_write_desc_imm_en), .s_axis_write_desc_len(axis_dma_write_desc_len), .s_axis_write_desc_tag(axis_dma_write_desc_tag), .s_axis_write_desc_valid(axis_dma_write_desc_valid), @@ -794,6 +804,8 @@ example_core #( .AXIL_ADDR_WIDTH(AXIL_ADDR_WIDTH), .AXIL_STRB_WIDTH(AXIL_STRB_WIDTH), .DMA_ADDR_WIDTH(PCIE_ADDR_WIDTH), + .DMA_IMM_ENABLE(IMM_ENABLE), + .DMA_IMM_WIDTH(IMM_WIDTH), .DMA_LEN_WIDTH(DMA_LEN_WIDTH), .DMA_TAG_WIDTH(DMA_TAG_WIDTH), .RAM_SEL_WIDTH(RAM_SEL_WIDTH), @@ -854,6 +866,8 @@ core_inst ( .m_axis_dma_write_desc_dma_addr(axis_dma_write_desc_dma_addr), .m_axis_dma_write_desc_ram_sel(axis_dma_write_desc_ram_sel), .m_axis_dma_write_desc_ram_addr(axis_dma_write_desc_ram_addr), + .m_axis_dma_write_desc_imm(axis_dma_write_desc_imm), + .m_axis_dma_write_desc_imm_en(axis_dma_write_desc_imm_en), .m_axis_dma_write_desc_len(axis_dma_write_desc_len), .m_axis_dma_write_desc_tag(axis_dma_write_desc_tag), .m_axis_dma_write_desc_valid(axis_dma_write_desc_valid), diff --git a/example/common/rtl/example_core_pcie_s10.v b/example/common/rtl/example_core_pcie_s10.v index 710db45af..48615c408 100644 --- a/example/common/rtl/example_core_pcie_s10.v +++ b/example/common/rtl/example_core_pcie_s10.v @@ -39,6 +39,10 @@ module example_core_pcie_s10 # parameter SEG_DATA_WIDTH = 256, // H-Tile/L-Tile AVST segment empty signal width parameter SEG_EMPTY_WIDTH = $clog2(SEG_DATA_WIDTH/32), + // Immediate enable + parameter IMM_ENABLE = 1, + // Immediate width + parameter IMM_WIDTH = 32, // TX sequence number width parameter TX_SEQ_NUM_WIDTH = 6, // TX sequence number tracking enable @@ -366,6 +370,8 @@ example_core_pcie #( .TX_SEQ_NUM_COUNT(TX_SEQ_NUM_COUNT), .TX_SEQ_NUM_WIDTH(TX_SEQ_NUM_WIDTH), .TX_SEQ_NUM_ENABLE(TX_SEQ_NUM_ENABLE), + .IMM_ENABLE(IMM_ENABLE), + .IMM_WIDTH(IMM_WIDTH), .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(READ_OP_TABLE_SIZE), .READ_TX_LIMIT(READ_TX_LIMIT), diff --git a/example/common/rtl/example_core_pcie_us.v b/example/common/rtl/example_core_pcie_us.v index f439c8f52..ddae9b7d9 100644 --- a/example/common/rtl/example_core_pcie_us.v +++ b/example/common/rtl/example_core_pcie_us.v @@ -49,6 +49,10 @@ module example_core_pcie_us # parameter RQ_SEQ_NUM_WIDTH = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 4 : 6, // RQ sequence number tracking enable parameter RQ_SEQ_NUM_ENABLE = 1, + // Immediate enable + parameter IMM_ENABLE = 1, + // Immediate width + parameter IMM_WIDTH = 32, // PCIe tag count parameter PCIE_TAG_COUNT = 256, // Operation table size (read) @@ -461,6 +465,8 @@ example_core_pcie #( .TX_SEQ_NUM_COUNT(TX_SEQ_NUM_COUNT), .TX_SEQ_NUM_WIDTH(TX_SEQ_NUM_WIDTH), .TX_SEQ_NUM_ENABLE(TX_SEQ_NUM_ENABLE), + .IMM_ENABLE(IMM_ENABLE), + .IMM_WIDTH(IMM_WIDTH), .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(READ_OP_TABLE_SIZE), .READ_TX_LIMIT(READ_TX_LIMIT), diff --git a/example/common/tb/example_core_pcie/Makefile b/example/common/tb/example_core_pcie/Makefile index 080dab6b3..1af20cb83 100644 --- a/example/common/tb/example_core_pcie/Makefile +++ b/example/common/tb/example_core_pcie/Makefile @@ -56,6 +56,8 @@ export PARAM_TX_SEQ_NUM_COUNT ?= 1 export PARAM_TX_SEQ_NUM_WIDTH ?= 6 export PARAM_TX_SEQ_NUM_ENABLE ?= 1 export PARAM_PCIE_TAG_COUNT ?= 256 +export PARAM_IMM_ENABLE ?= 1 +export PARAM_IMM_WIDTH ?= 32 export PARAM_READ_OP_TABLE_SIZE ?= $(PARAM_PCIE_TAG_COUNT) export PARAM_READ_TX_LIMIT ?= $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" ) export PARAM_READ_TX_FC_ENABLE ?= 1 @@ -78,6 +80,8 @@ ifeq ($(SIM), icarus) COMPILE_ARGS += -P $(TOPLEVEL).TX_SEQ_NUM_WIDTH=$(PARAM_TX_SEQ_NUM_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).TX_SEQ_NUM_ENABLE=$(PARAM_TX_SEQ_NUM_ENABLE) COMPILE_ARGS += -P $(TOPLEVEL).PCIE_TAG_COUNT=$(PARAM_PCIE_TAG_COUNT) + COMPILE_ARGS += -P $(TOPLEVEL).IMM_ENABLE=$(PARAM_IMM_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).IMM_WIDTH=$(PARAM_IMM_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).READ_OP_TABLE_SIZE=$(PARAM_READ_OP_TABLE_SIZE) COMPILE_ARGS += -P $(TOPLEVEL).READ_TX_LIMIT=$(PARAM_READ_TX_LIMIT) COMPILE_ARGS += -P $(TOPLEVEL).READ_TX_FC_ENABLE=$(PARAM_READ_TX_FC_ENABLE) @@ -104,6 +108,8 @@ else ifeq ($(SIM), verilator) COMPILE_ARGS += -GTX_SEQ_NUM_WIDTH=$(PARAM_TX_SEQ_NUM_WIDTH) COMPILE_ARGS += -GTX_SEQ_NUM_ENABLE=$(PARAM_TX_SEQ_NUM_ENABLE) COMPILE_ARGS += -GPCIE_TAG_COUNT=$(PARAM_PCIE_TAG_COUNT) + COMPILE_ARGS += -GIMM_ENABLE=$(PARAM_IMM_ENABLE) + COMPILE_ARGS += -GIMM_WIDTH=$(PARAM_IMM_WIDTH) COMPILE_ARGS += -GREAD_OP_TABLE_SIZE=$(PARAM_READ_OP_TABLE_SIZE) COMPILE_ARGS += -GREAD_TX_LIMIT=$(PARAM_READ_TX_LIMIT) COMPILE_ARGS += -GREAD_TX_FC_ENABLE=$(PARAM_READ_TX_FC_ENABLE) diff --git a/example/common/tb/example_core_pcie/test_example_core_pcie.py b/example/common/tb/example_core_pcie/test_example_core_pcie.py index 5ec428178..c93eb062a 100644 --- a/example/common/tb/example_core_pcie/test_example_core_pcie.py +++ b/example/common/tb/example_core_pcie/test_example_core_pcie.py @@ -199,6 +199,26 @@ async def run_test(dut): assert mem[0:1024] == mem[0x1000:0x1000+1024] + tb.log.info("Test immediate write") + + # write pcie write descriptor + await dev_pf0_bar0.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000208, 0x44332211) + await dev_pf0_bar0.write_dword(0x000210, 0x4) + await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + + await Timer(2000, 'ns') + + # read status + val = await dev_pf0_bar0.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x800100AA + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44' + tb.log.info("Test DMA block operations") # write packet data @@ -337,6 +357,8 @@ def test_example_core_pcie(request, pcie_data_width): parameters['TX_SEQ_NUM_WIDTH'] = 6 parameters['TX_SEQ_NUM_ENABLE'] = 1 parameters['PCIE_TAG_COUNT'] = 256 + parameters['IMM_ENABLE'] = 1 + parameters['IMM_WIDTH'] = 32 parameters['READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT'] parameters['READ_TX_LIMIT'] = 2**parameters['TX_SEQ_NUM_WIDTH'] parameters['READ_TX_FC_ENABLE'] = 1 diff --git a/example/common/tb/example_core_pcie_s10/Makefile b/example/common/tb/example_core_pcie_s10/Makefile index b34339fd9..8475a641a 100644 --- a/example/common/tb/example_core_pcie_s10/Makefile +++ b/example/common/tb/example_core_pcie_s10/Makefile @@ -61,6 +61,8 @@ export PARAM_TX_SEQ_NUM_WIDTH ?= 6 export PARAM_TX_SEQ_NUM_ENABLE ?= 1 export PARAM_L_TILE ?= 0 export PARAM_PCIE_TAG_COUNT ?= 256 +export PARAM_IMM_ENABLE ?= 1 +export PARAM_IMM_WIDTH ?= 32 export PARAM_READ_OP_TABLE_SIZE ?= $(PARAM_PCIE_TAG_COUNT) export PARAM_READ_TX_LIMIT ?= $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" ) export PARAM_READ_TX_FC_ENABLE ?= 1 @@ -80,6 +82,8 @@ ifeq ($(SIM), icarus) COMPILE_ARGS += -P $(TOPLEVEL).TX_SEQ_NUM_ENABLE=$(PARAM_TX_SEQ_NUM_ENABLE) COMPILE_ARGS += -P $(TOPLEVEL).L_TILE=$(PARAM_L_TILE) COMPILE_ARGS += -P $(TOPLEVEL).PCIE_TAG_COUNT=$(PARAM_PCIE_TAG_COUNT) + COMPILE_ARGS += -P $(TOPLEVEL).IMM_ENABLE=$(PARAM_IMM_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).IMM_WIDTH=$(PARAM_IMM_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).READ_OP_TABLE_SIZE=$(PARAM_READ_OP_TABLE_SIZE) COMPILE_ARGS += -P $(TOPLEVEL).READ_TX_LIMIT=$(PARAM_READ_TX_LIMIT) COMPILE_ARGS += -P $(TOPLEVEL).READ_TX_FC_ENABLE=$(PARAM_READ_TX_FC_ENABLE) @@ -103,6 +107,8 @@ else ifeq ($(SIM), verilator) COMPILE_ARGS += -GTX_SEQ_NUM_ENABLE=$(PARAM_TX_SEQ_NUM_ENABLE) COMPILE_ARGS += -GL_TILE=$(PARAM_L_TILE) COMPILE_ARGS += -GPCIE_TAG_COUNT=$(PARAM_PCIE_TAG_COUNT) + COMPILE_ARGS += -GIMM_ENABLE=$(PARAM_IMM_ENABLE) + COMPILE_ARGS += -GIMM_WIDTH=$(PARAM_IMM_WIDTH) COMPILE_ARGS += -GREAD_OP_TABLE_SIZE=$(PARAM_READ_OP_TABLE_SIZE) COMPILE_ARGS += -GREAD_TX_LIMIT=$(PARAM_READ_TX_LIMIT) COMPILE_ARGS += -GREAD_TX_FC_ENABLE=$(PARAM_READ_TX_FC_ENABLE) diff --git a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py index 9680b297e..3095607f0 100644 --- a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py +++ b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py @@ -237,6 +237,26 @@ async def run_test(dut): assert mem[0:1024] == mem[0x1000:0x1000+1024] + tb.log.info("Test immediate write") + + # write pcie write descriptor + await dev_pf0_bar0.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000208, 0x44332211) + await dev_pf0_bar0.write_dword(0x000210, 0x4) + await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + + await Timer(2000, 'ns') + + # read status + val = await dev_pf0_bar0.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x800100AA + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44' + tb.log.info("Test DMA block operations") # write packet data @@ -376,6 +396,8 @@ def test_example_core_pcie_s10(request, data_width, l_tile): parameters['TX_SEQ_NUM_ENABLE'] = 1 parameters['L_TILE'] = l_tile parameters['PCIE_TAG_COUNT'] = 256 + parameters['IMM_ENABLE'] = 1 + parameters['IMM_WIDTH'] = 32 parameters['READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT'] parameters['READ_TX_LIMIT'] = 2**parameters['TX_SEQ_NUM_WIDTH'] parameters['READ_TX_FC_ENABLE'] = 1 diff --git a/example/common/tb/example_core_pcie_us/Makefile b/example/common/tb/example_core_pcie_us/Makefile index 1da8e0c6c..53532e23a 100644 --- a/example/common/tb/example_core_pcie_us/Makefile +++ b/example/common/tb/example_core_pcie_us/Makefile @@ -65,6 +65,8 @@ export PARAM_AXIS_PCIE_CC_USER_WIDTH ?= $(if $(filter-out 512,$(PARAM_AXIS_PCIE_ export PARAM_RQ_SEQ_NUM_WIDTH ?= $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),6,4) export PARAM_RQ_SEQ_NUM_ENABLE ?= 1 export PARAM_PCIE_TAG_COUNT ?= 256 +export PARAM_IMM_ENABLE ?= 1 +export PARAM_IMM_WIDTH ?= 32 export PARAM_READ_OP_TABLE_SIZE ?= $(PARAM_PCIE_TAG_COUNT) export PARAM_READ_TX_LIMIT ?= $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" ) export PARAM_READ_TX_FC_ENABLE ?= 1 @@ -86,6 +88,8 @@ ifeq ($(SIM), icarus) COMPILE_ARGS += -P $(TOPLEVEL).RQ_SEQ_NUM_WIDTH=$(PARAM_RQ_SEQ_NUM_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).RQ_SEQ_NUM_ENABLE=$(PARAM_RQ_SEQ_NUM_ENABLE) COMPILE_ARGS += -P $(TOPLEVEL).PCIE_TAG_COUNT=$(PARAM_PCIE_TAG_COUNT) + COMPILE_ARGS += -P $(TOPLEVEL).IMM_ENABLE=$(PARAM_IMM_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).IMM_WIDTH=$(PARAM_IMM_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).READ_OP_TABLE_SIZE=$(PARAM_READ_OP_TABLE_SIZE) COMPILE_ARGS += -P $(TOPLEVEL).READ_TX_LIMIT=$(PARAM_READ_TX_LIMIT) COMPILE_ARGS += -P $(TOPLEVEL).READ_TX_FC_ENABLE=$(PARAM_READ_TX_FC_ENABLE) @@ -111,6 +115,8 @@ else ifeq ($(SIM), verilator) COMPILE_ARGS += -GRQ_SEQ_NUM_WIDTH=$(PARAM_RQ_SEQ_NUM_WIDTH) COMPILE_ARGS += -GRQ_SEQ_NUM_ENABLE=$(PARAM_RQ_SEQ_NUM_ENABLE) COMPILE_ARGS += -GPCIE_TAG_COUNT=$(PARAM_PCIE_TAG_COUNT) + COMPILE_ARGS += -GIMM_ENABLE=$(PARAM_IMM_ENABLE) + COMPILE_ARGS += -GIMM_WIDTH=$(PARAM_IMM_WIDTH) COMPILE_ARGS += -GREAD_OP_TABLE_SIZE=$(PARAM_READ_OP_TABLE_SIZE) COMPILE_ARGS += -GREAD_TX_LIMIT=$(PARAM_READ_TX_LIMIT) COMPILE_ARGS += -GREAD_TX_FC_ENABLE=$(PARAM_READ_TX_FC_ENABLE) diff --git a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py index de0ec35bf..28cb17f26 100644 --- a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py +++ b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py @@ -331,6 +331,26 @@ async def run_test(dut): assert mem[0:1024] == mem[0x1000:0x1000+1024] + tb.log.info("Test immediate write") + + # write pcie write descriptor + await dev_pf0_bar0.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000208, 0x44332211) + await dev_pf0_bar0.write_dword(0x000210, 0x4) + await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + + await Timer(2000, 'ns') + + # read status + val = await dev_pf0_bar0.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x800100AA + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44' + tb.log.info("Test DMA block operations") # write packet data @@ -473,6 +493,8 @@ def test_example_core_pcie_us(request, axis_pcie_data_width): parameters['RQ_SEQ_NUM_WIDTH'] = 4 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 6 parameters['RQ_SEQ_NUM_ENABLE'] = 1 parameters['PCIE_TAG_COUNT'] = 256 + parameters['IMM_ENABLE'] = 1 + parameters['IMM_WIDTH'] = 32 parameters['READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT'] parameters['READ_TX_LIMIT'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1) parameters['READ_TX_FC_ENABLE'] = 1 From e7a83364d084205ad0b0cd5c7e1c2a563a13394e Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Mon, 4 Apr 2022 15:05:21 -0700 Subject: [PATCH 14/21] Update testbenches --- .../fpga/tb/fpga_core/test_fpga_core.py | 106 ++++++++++++++++++ .../fpga/tb/fpga_core/test_fpga_core.py | 20 ++++ .../AU200/fpga/tb/fpga_core/test_fpga_core.py | 20 ++++ .../AU250/fpga/tb/fpga_core/test_fpga_core.py | 20 ++++ .../AU280/fpga/tb/fpga_core/test_fpga_core.py | 20 ++++ .../AU50/fpga/tb/fpga_core/test_fpga_core.py | 20 ++++ .../fpga/tb/fpga_core/test_fpga_core.py | 20 ++++ .../fpga/tb/fpga_core/test_fpga_core.py | 20 ++++ .../fpga/tb/fpga_core/test_fpga_core.py | 106 ++++++++++++++++++ .../fpga/tb/fpga_core/test_fpga_core.py | 20 ++++ .../fpga/tb/fpga_core/test_fpga_core.py | 20 ++++ .../fpga/tb/fpga_core/test_fpga_core.py | 20 ++++ .../fpga/tb/fpga_core/test_fpga_core.py | 20 ++++ .../fb2CG/fpga/tb/fpga_core/test_fpga_core.py | 20 ++++ 14 files changed, 452 insertions(+) diff --git a/example/520N_MX/fpga/tb/fpga_core/test_fpga_core.py b/example/520N_MX/fpga/tb/fpga_core/test_fpga_core.py index 87888d60c..82f15683a 100644 --- a/example/520N_MX/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/520N_MX/fpga/tb/fpga_core/test_fpga_core.py @@ -234,6 +234,112 @@ async def run_test(dut): assert mem[0:1024] == mem[0x1000:0x1000+1024] + tb.log.info("Test immediate write") + + # write pcie write descriptor + await dev_pf0_bar0.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000208, 0x44332211) + await dev_pf0_bar0.write_dword(0x000210, 0x4) + await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + + await Timer(2000, 'ns') + + # read status + val = await dev_pf0_bar0.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x800100AA + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44' + + tb.log.info("Test DMA block operations") + + # write packet data + mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + + # enable DMA + await dev_pf0_bar0.write_dword(0x000000, 1) + # disable interrupts + await dev_pf0_bar0.write_dword(0x000008, 0) + + # configure operation (read) + # DMA base address + await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + # DMA offset address + await dev_pf0_bar0.write_dword(0x001088, 0) + await dev_pf0_bar0.write_dword(0x00108c, 0) + # DMA offset mask + await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001094, 0) + # DMA stride + await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x00109c, 0) + # RAM base address + await dev_pf0_bar0.write_dword(0x0010c0, 0) + await dev_pf0_bar0.write_dword(0x0010c4, 0) + # RAM offset address + await dev_pf0_bar0.write_dword(0x0010c8, 0) + await dev_pf0_bar0.write_dword(0x0010cc, 0) + # RAM offset mask + await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d4, 0) + # RAM stride + await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010dc, 0) + # clear cycle count + await dev_pf0_bar0.write_dword(0x001008, 0) + await dev_pf0_bar0.write_dword(0x00100c, 0) + # block length + await dev_pf0_bar0.write_dword(0x001010, 256) + # block count + await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x00101c, 0) + # start + await dev_pf0_bar0.write_dword(0x001000, 1) + + await Timer(2000, 'ns') + + # configure operation (write) + # DMA base address + await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + # DMA offset address + await dev_pf0_bar0.write_dword(0x001188, 0) + await dev_pf0_bar0.write_dword(0x00118c, 0) + # DMA offset mask + await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001194, 0) + # DMA stride + await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x00119c, 0) + # RAM base address + await dev_pf0_bar0.write_dword(0x0011c0, 0) + await dev_pf0_bar0.write_dword(0x0011c4, 0) + # RAM offset address + await dev_pf0_bar0.write_dword(0x0011c8, 0) + await dev_pf0_bar0.write_dword(0x0011cc, 0) + # RAM offset mask + await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d4, 0) + # RAM stride + await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011dc, 0) + # clear cycle count + await dev_pf0_bar0.write_dword(0x001108, 0) + await dev_pf0_bar0.write_dword(0x00110c, 0) + # block length + await dev_pf0_bar0.write_dword(0x001110, 256) + # block count + await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x00111c, 0) + # start + await dev_pf0_bar0.write_dword(0x001100, 1) + + await Timer(2000, 'ns') + await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py b/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py index f2c657c91..ab78dbb3c 100644 --- a/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py @@ -310,6 +310,26 @@ async def run_test(dut): assert mem[0:1024] == mem[0x1000:0x1000+1024] + tb.log.info("Test immediate write") + + # write pcie write descriptor + await dev_pf0_bar0.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000208, 0x44332211) + await dev_pf0_bar0.write_dword(0x000210, 0x4) + await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + + await Timer(2000, 'ns') + + # read status + val = await dev_pf0_bar0.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x800100AA + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44' + tb.log.info("Test DMA block operations") # write packet data diff --git a/example/AU200/fpga/tb/fpga_core/test_fpga_core.py b/example/AU200/fpga/tb/fpga_core/test_fpga_core.py index 429f0383f..5add2329d 100644 --- a/example/AU200/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU200/fpga/tb/fpga_core/test_fpga_core.py @@ -312,6 +312,26 @@ async def run_test(dut): assert mem[0:1024] == mem[0x1000:0x1000+1024] + tb.log.info("Test immediate write") + + # write pcie write descriptor + await dev_pf0_bar0.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000208, 0x44332211) + await dev_pf0_bar0.write_dword(0x000210, 0x4) + await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + + await Timer(2000, 'ns') + + # read status + val = await dev_pf0_bar0.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x800100AA + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44' + tb.log.info("Test DMA block operations") # write packet data diff --git a/example/AU250/fpga/tb/fpga_core/test_fpga_core.py b/example/AU250/fpga/tb/fpga_core/test_fpga_core.py index 429f0383f..5add2329d 100644 --- a/example/AU250/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU250/fpga/tb/fpga_core/test_fpga_core.py @@ -312,6 +312,26 @@ async def run_test(dut): assert mem[0:1024] == mem[0x1000:0x1000+1024] + tb.log.info("Test immediate write") + + # write pcie write descriptor + await dev_pf0_bar0.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000208, 0x44332211) + await dev_pf0_bar0.write_dword(0x000210, 0x4) + await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + + await Timer(2000, 'ns') + + # read status + val = await dev_pf0_bar0.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x800100AA + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44' + tb.log.info("Test DMA block operations") # write packet data diff --git a/example/AU280/fpga/tb/fpga_core/test_fpga_core.py b/example/AU280/fpga/tb/fpga_core/test_fpga_core.py index f2c657c91..ab78dbb3c 100644 --- a/example/AU280/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU280/fpga/tb/fpga_core/test_fpga_core.py @@ -310,6 +310,26 @@ async def run_test(dut): assert mem[0:1024] == mem[0x1000:0x1000+1024] + tb.log.info("Test immediate write") + + # write pcie write descriptor + await dev_pf0_bar0.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000208, 0x44332211) + await dev_pf0_bar0.write_dword(0x000210, 0x4) + await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + + await Timer(2000, 'ns') + + # read status + val = await dev_pf0_bar0.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x800100AA + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44' + tb.log.info("Test DMA block operations") # write packet data diff --git a/example/AU50/fpga/tb/fpga_core/test_fpga_core.py b/example/AU50/fpga/tb/fpga_core/test_fpga_core.py index f2c657c91..ab78dbb3c 100644 --- a/example/AU50/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU50/fpga/tb/fpga_core/test_fpga_core.py @@ -310,6 +310,26 @@ async def run_test(dut): assert mem[0:1024] == mem[0x1000:0x1000+1024] + tb.log.info("Test immediate write") + + # write pcie write descriptor + await dev_pf0_bar0.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000208, 0x44332211) + await dev_pf0_bar0.write_dword(0x000210, 0x4) + await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + + await Timer(2000, 'ns') + + # read status + val = await dev_pf0_bar0.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x800100AA + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44' + tb.log.info("Test DMA block operations") # write packet data diff --git a/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py b/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py index f1d721c63..f3759cb1d 100644 --- a/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py @@ -306,6 +306,26 @@ async def run_test(dut): assert mem[0:1024] == mem[0x1000:0x1000+1024] + tb.log.info("Test immediate write") + + # write pcie write descriptor + await dev_pf0_bar0.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000208, 0x44332211) + await dev_pf0_bar0.write_dword(0x000210, 0x4) + await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + + await Timer(2000, 'ns') + + # read status + val = await dev_pf0_bar0.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x800100AA + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44' + tb.log.info("Test DMA block operations") # write packet data diff --git a/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py b/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py index a4258a9a8..ea327aef7 100644 --- a/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py @@ -310,6 +310,26 @@ async def run_test(dut): assert mem[0:1024] == mem[0x1000:0x1000+1024] + tb.log.info("Test immediate write") + + # write pcie write descriptor + await dev_pf0_bar0.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000208, 0x44332211) + await dev_pf0_bar0.write_dword(0x000210, 0x4) + await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + + await Timer(2000, 'ns') + + # read status + val = await dev_pf0_bar0.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x800100AA + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44' + tb.log.info("Test DMA block operations") # write packet data diff --git a/example/S10MX_DK/fpga/tb/fpga_core/test_fpga_core.py b/example/S10MX_DK/fpga/tb/fpga_core/test_fpga_core.py index 87888d60c..82f15683a 100644 --- a/example/S10MX_DK/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/S10MX_DK/fpga/tb/fpga_core/test_fpga_core.py @@ -234,6 +234,112 @@ async def run_test(dut): assert mem[0:1024] == mem[0x1000:0x1000+1024] + tb.log.info("Test immediate write") + + # write pcie write descriptor + await dev_pf0_bar0.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000208, 0x44332211) + await dev_pf0_bar0.write_dword(0x000210, 0x4) + await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + + await Timer(2000, 'ns') + + # read status + val = await dev_pf0_bar0.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x800100AA + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44' + + tb.log.info("Test DMA block operations") + + # write packet data + mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + + # enable DMA + await dev_pf0_bar0.write_dword(0x000000, 1) + # disable interrupts + await dev_pf0_bar0.write_dword(0x000008, 0) + + # configure operation (read) + # DMA base address + await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + # DMA offset address + await dev_pf0_bar0.write_dword(0x001088, 0) + await dev_pf0_bar0.write_dword(0x00108c, 0) + # DMA offset mask + await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001094, 0) + # DMA stride + await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x00109c, 0) + # RAM base address + await dev_pf0_bar0.write_dword(0x0010c0, 0) + await dev_pf0_bar0.write_dword(0x0010c4, 0) + # RAM offset address + await dev_pf0_bar0.write_dword(0x0010c8, 0) + await dev_pf0_bar0.write_dword(0x0010cc, 0) + # RAM offset mask + await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d4, 0) + # RAM stride + await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010dc, 0) + # clear cycle count + await dev_pf0_bar0.write_dword(0x001008, 0) + await dev_pf0_bar0.write_dword(0x00100c, 0) + # block length + await dev_pf0_bar0.write_dword(0x001010, 256) + # block count + await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x00101c, 0) + # start + await dev_pf0_bar0.write_dword(0x001000, 1) + + await Timer(2000, 'ns') + + # configure operation (write) + # DMA base address + await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + # DMA offset address + await dev_pf0_bar0.write_dword(0x001188, 0) + await dev_pf0_bar0.write_dword(0x00118c, 0) + # DMA offset mask + await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001194, 0) + # DMA stride + await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x00119c, 0) + # RAM base address + await dev_pf0_bar0.write_dword(0x0011c0, 0) + await dev_pf0_bar0.write_dword(0x0011c4, 0) + # RAM offset address + await dev_pf0_bar0.write_dword(0x0011c8, 0) + await dev_pf0_bar0.write_dword(0x0011cc, 0) + # RAM offset mask + await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d4, 0) + # RAM stride + await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011dc, 0) + # clear cycle count + await dev_pf0_bar0.write_dword(0x001108, 0) + await dev_pf0_bar0.write_dword(0x00110c, 0) + # block length + await dev_pf0_bar0.write_dword(0x001110, 256) + # block count + await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x00111c, 0) + # start + await dev_pf0_bar0.write_dword(0x001100, 1) + + await Timer(2000, 'ns') + await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py b/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py index 742d4efed..92afb32b3 100644 --- a/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py @@ -311,6 +311,26 @@ async def run_test(dut): assert mem[0:1024] == mem[0x1000:0x1000+1024] + tb.log.info("Test immediate write") + + # write pcie write descriptor + await dev_pf0_bar0.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000208, 0x44332211) + await dev_pf0_bar0.write_dword(0x000210, 0x4) + await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + + await Timer(2000, 'ns') + + # read status + val = await dev_pf0_bar0.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x800100AA + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44' + tb.log.info("Test DMA block operations") # write packet data diff --git a/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py b/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py index f526584c4..1f6d410c9 100644 --- a/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py @@ -317,6 +317,26 @@ async def run_test(dut): assert mem[0:1024] == mem[0x1000:0x1000+1024] + tb.log.info("Test immediate write") + + # write pcie write descriptor + await dev_pf0_bar0.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000208, 0x44332211) + await dev_pf0_bar0.write_dword(0x000210, 0x4) + await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + + await Timer(2000, 'ns') + + # read status + val = await dev_pf0_bar0.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x800100AA + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44' + tb.log.info("Test DMA block operations") # write packet data diff --git a/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py b/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py index 429f0383f..5add2329d 100644 --- a/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py @@ -312,6 +312,26 @@ async def run_test(dut): assert mem[0:1024] == mem[0x1000:0x1000+1024] + tb.log.info("Test immediate write") + + # write pcie write descriptor + await dev_pf0_bar0.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000208, 0x44332211) + await dev_pf0_bar0.write_dword(0x000210, 0x4) + await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + + await Timer(2000, 'ns') + + # read status + val = await dev_pf0_bar0.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x800100AA + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44' + tb.log.info("Test DMA block operations") # write packet data diff --git a/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py b/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py index aee89ee1d..02871d3c8 100644 --- a/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py @@ -317,6 +317,26 @@ async def run_test(dut): assert mem[0:1024] == mem[0x1000:0x1000+1024] + tb.log.info("Test immediate write") + + # write pcie write descriptor + await dev_pf0_bar0.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000208, 0x44332211) + await dev_pf0_bar0.write_dword(0x000210, 0x4) + await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + + await Timer(2000, 'ns') + + # read status + val = await dev_pf0_bar0.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x800100AA + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44' + tb.log.info("Test DMA block operations") # write packet data diff --git a/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py b/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py index 6c5b428d1..c316e8d3b 100644 --- a/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py @@ -312,6 +312,26 @@ async def run_test(dut): assert mem[0:1024] == mem[0x1000:0x1000+1024] + tb.log.info("Test immediate write") + + # write pcie write descriptor + await dev_pf0_bar0.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x000208, 0x44332211) + await dev_pf0_bar0.write_dword(0x000210, 0x4) + await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + + await Timer(2000, 'ns') + + # read status + val = await dev_pf0_bar0.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x800100AA + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44' + tb.log.info("Test DMA block operations") # write packet data From 43719a9f739bc8bf5809be8f0ab6f35afc4cb330 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Mon, 4 Apr 2022 15:05:46 -0700 Subject: [PATCH 15/21] Cleanup --- rtl/pcie_axi_master_rd.v | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/rtl/pcie_axi_master_rd.v b/rtl/pcie_axi_master_rd.v index bf5df02d7..feb4f56ff 100644 --- a/rtl/pcie_axi_master_rd.v +++ b/rtl/pcie_axi_master_rd.v @@ -55,8 +55,8 @@ module pcie_axi_master_rd # parameter TLP_FORCE_64_BIT_ADDR = 0 ) ( - input wire clk, - input wire rst, + input wire clk, + input wire rst, /* * TLP input (request) @@ -443,7 +443,7 @@ always @* begin // report correctable error status_error_cor_next = 1'b1; - // // UR completion + // UR completion tlp_cmd_status_next = CPL_STATUS_UR; tlp_cmd_byte_len_next = 12'd0; tlp_cmd_dword_len_next = 10'd0; From 32b4f2cb1f6ce3caebfdf1bd444954f10f9d5437 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Mon, 4 Apr 2022 15:21:25 -0700 Subject: [PATCH 16/21] Improve block operation tests --- .../fpga/tb/fpga_core/test_fpga_core.py | 58 +++++++++++++------ .../AU200/fpga/tb/fpga_core/test_fpga_core.py | 58 +++++++++++++------ .../AU250/fpga/tb/fpga_core/test_fpga_core.py | 58 +++++++++++++------ .../AU280/fpga/tb/fpga_core/test_fpga_core.py | 58 +++++++++++++------ .../AU50/fpga/tb/fpga_core/test_fpga_core.py | 58 +++++++++++++------ .../fpga/tb/fpga_core/test_fpga_core.py | 58 +++++++++++++------ .../fpga/tb/fpga_core/test_fpga_core.py | 58 +++++++++++++------ .../fpga/tb/fpga_core/test_fpga_core.py | 58 +++++++++++++------ .../fpga/tb/fpga_core/test_fpga_core.py | 58 +++++++++++++------ .../fpga/tb/fpga_core/test_fpga_core.py | 58 +++++++++++++------ .../fpga/tb/fpga_core/test_fpga_core.py | 58 +++++++++++++------ .../test_example_core_pcie.py | 58 +++++++++++++------ .../test_example_core_pcie_s10.py | 58 +++++++++++++------ .../test_example_core_pcie_us.py | 58 +++++++++++++------ .../fb2CG/fpga/tb/fpga_core/test_fpga_core.py | 58 +++++++++++++------ 15 files changed, 585 insertions(+), 285 deletions(-) diff --git a/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py b/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py index ab78dbb3c..1118761b0 100644 --- a/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py @@ -332,8 +332,16 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + region_len = 0x2000 + src_offset = 0x0000 + dest_offset = 0x4000 + + block_size = 256 + block_stride = block_size + block_count = 32 + # write packet data - mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) @@ -342,16 +350,16 @@ async def run_test(dut): # configure operation (read) # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001088, 0) await dev_pf0_bar0.write_dword(0x00108c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001090, region_len-1) await dev_pf0_bar0.write_dword(0x001094, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x001098, block_stride) await dev_pf0_bar0.write_dword(0x00109c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0010c0, 0) @@ -360,36 +368,40 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0010c8, 0) await dev_pf0_bar0.write_dword(0x0010cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) await dev_pf0_bar0.write_dword(0x0010d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010d8, block_stride) await dev_pf0_bar0.write_dword(0x0010dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001008, 0) await dev_pf0_bar0.write_dword(0x00100c, 0) # block length - await dev_pf0_bar0.write_dword(0x001010, 256) + await dev_pf0_bar0.write_dword(0x001010, block_size) # block count - await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x001018, block_count) await dev_pf0_bar0.write_dword(0x00101c, 0) # start await dev_pf0_bar0.write_dword(0x001000, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001018) + await Timer(1000, 'ns') + if cnt == 0: + break # configure operation (write) # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001188, 0) await dev_pf0_bar0.write_dword(0x00118c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001190, region_len-1) await dev_pf0_bar0.write_dword(0x001194, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x001198, block_stride) await dev_pf0_bar0.write_dword(0x00119c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0011c0, 0) @@ -398,23 +410,31 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0011c8, 0) await dev_pf0_bar0.write_dword(0x0011cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) await dev_pf0_bar0.write_dword(0x0011d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011d8, block_stride) await dev_pf0_bar0.write_dword(0x0011dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001108, 0) await dev_pf0_bar0.write_dword(0x00110c, 0) # block length - await dev_pf0_bar0.write_dword(0x001110, 256) + await dev_pf0_bar0.write_dword(0x001110, block_size) # block count - await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x001118, block_count) await dev_pf0_bar0.write_dword(0x00111c, 0) # start await dev_pf0_bar0.write_dword(0x001100, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001118) + await Timer(1000, 'ns') + if cnt == 0: + break + + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) + + assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/AU200/fpga/tb/fpga_core/test_fpga_core.py b/example/AU200/fpga/tb/fpga_core/test_fpga_core.py index 5add2329d..2e288aeb6 100644 --- a/example/AU200/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU200/fpga/tb/fpga_core/test_fpga_core.py @@ -334,8 +334,16 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + region_len = 0x2000 + src_offset = 0x0000 + dest_offset = 0x4000 + + block_size = 256 + block_stride = block_size + block_count = 32 + # write packet data - mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) @@ -344,16 +352,16 @@ async def run_test(dut): # configure operation (read) # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001088, 0) await dev_pf0_bar0.write_dword(0x00108c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001090, region_len-1) await dev_pf0_bar0.write_dword(0x001094, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x001098, block_stride) await dev_pf0_bar0.write_dword(0x00109c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0010c0, 0) @@ -362,36 +370,40 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0010c8, 0) await dev_pf0_bar0.write_dword(0x0010cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) await dev_pf0_bar0.write_dword(0x0010d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010d8, block_stride) await dev_pf0_bar0.write_dword(0x0010dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001008, 0) await dev_pf0_bar0.write_dword(0x00100c, 0) # block length - await dev_pf0_bar0.write_dword(0x001010, 256) + await dev_pf0_bar0.write_dword(0x001010, block_size) # block count - await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x001018, block_count) await dev_pf0_bar0.write_dword(0x00101c, 0) # start await dev_pf0_bar0.write_dword(0x001000, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001018) + await Timer(1000, 'ns') + if cnt == 0: + break # configure operation (write) # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001188, 0) await dev_pf0_bar0.write_dword(0x00118c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001190, region_len-1) await dev_pf0_bar0.write_dword(0x001194, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x001198, block_stride) await dev_pf0_bar0.write_dword(0x00119c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0011c0, 0) @@ -400,23 +412,31 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0011c8, 0) await dev_pf0_bar0.write_dword(0x0011cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) await dev_pf0_bar0.write_dword(0x0011d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011d8, block_stride) await dev_pf0_bar0.write_dword(0x0011dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001108, 0) await dev_pf0_bar0.write_dword(0x00110c, 0) # block length - await dev_pf0_bar0.write_dword(0x001110, 256) + await dev_pf0_bar0.write_dword(0x001110, block_size) # block count - await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x001118, block_count) await dev_pf0_bar0.write_dword(0x00111c, 0) # start await dev_pf0_bar0.write_dword(0x001100, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001118) + await Timer(1000, 'ns') + if cnt == 0: + break + + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) + + assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/AU250/fpga/tb/fpga_core/test_fpga_core.py b/example/AU250/fpga/tb/fpga_core/test_fpga_core.py index 5add2329d..2e288aeb6 100644 --- a/example/AU250/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU250/fpga/tb/fpga_core/test_fpga_core.py @@ -334,8 +334,16 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + region_len = 0x2000 + src_offset = 0x0000 + dest_offset = 0x4000 + + block_size = 256 + block_stride = block_size + block_count = 32 + # write packet data - mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) @@ -344,16 +352,16 @@ async def run_test(dut): # configure operation (read) # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001088, 0) await dev_pf0_bar0.write_dword(0x00108c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001090, region_len-1) await dev_pf0_bar0.write_dword(0x001094, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x001098, block_stride) await dev_pf0_bar0.write_dword(0x00109c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0010c0, 0) @@ -362,36 +370,40 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0010c8, 0) await dev_pf0_bar0.write_dword(0x0010cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) await dev_pf0_bar0.write_dword(0x0010d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010d8, block_stride) await dev_pf0_bar0.write_dword(0x0010dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001008, 0) await dev_pf0_bar0.write_dword(0x00100c, 0) # block length - await dev_pf0_bar0.write_dword(0x001010, 256) + await dev_pf0_bar0.write_dword(0x001010, block_size) # block count - await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x001018, block_count) await dev_pf0_bar0.write_dword(0x00101c, 0) # start await dev_pf0_bar0.write_dword(0x001000, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001018) + await Timer(1000, 'ns') + if cnt == 0: + break # configure operation (write) # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001188, 0) await dev_pf0_bar0.write_dword(0x00118c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001190, region_len-1) await dev_pf0_bar0.write_dword(0x001194, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x001198, block_stride) await dev_pf0_bar0.write_dword(0x00119c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0011c0, 0) @@ -400,23 +412,31 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0011c8, 0) await dev_pf0_bar0.write_dword(0x0011cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) await dev_pf0_bar0.write_dword(0x0011d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011d8, block_stride) await dev_pf0_bar0.write_dword(0x0011dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001108, 0) await dev_pf0_bar0.write_dword(0x00110c, 0) # block length - await dev_pf0_bar0.write_dword(0x001110, 256) + await dev_pf0_bar0.write_dword(0x001110, block_size) # block count - await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x001118, block_count) await dev_pf0_bar0.write_dword(0x00111c, 0) # start await dev_pf0_bar0.write_dword(0x001100, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001118) + await Timer(1000, 'ns') + if cnt == 0: + break + + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) + + assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/AU280/fpga/tb/fpga_core/test_fpga_core.py b/example/AU280/fpga/tb/fpga_core/test_fpga_core.py index ab78dbb3c..1118761b0 100644 --- a/example/AU280/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU280/fpga/tb/fpga_core/test_fpga_core.py @@ -332,8 +332,16 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + region_len = 0x2000 + src_offset = 0x0000 + dest_offset = 0x4000 + + block_size = 256 + block_stride = block_size + block_count = 32 + # write packet data - mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) @@ -342,16 +350,16 @@ async def run_test(dut): # configure operation (read) # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001088, 0) await dev_pf0_bar0.write_dword(0x00108c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001090, region_len-1) await dev_pf0_bar0.write_dword(0x001094, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x001098, block_stride) await dev_pf0_bar0.write_dword(0x00109c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0010c0, 0) @@ -360,36 +368,40 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0010c8, 0) await dev_pf0_bar0.write_dword(0x0010cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) await dev_pf0_bar0.write_dword(0x0010d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010d8, block_stride) await dev_pf0_bar0.write_dword(0x0010dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001008, 0) await dev_pf0_bar0.write_dword(0x00100c, 0) # block length - await dev_pf0_bar0.write_dword(0x001010, 256) + await dev_pf0_bar0.write_dword(0x001010, block_size) # block count - await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x001018, block_count) await dev_pf0_bar0.write_dword(0x00101c, 0) # start await dev_pf0_bar0.write_dword(0x001000, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001018) + await Timer(1000, 'ns') + if cnt == 0: + break # configure operation (write) # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001188, 0) await dev_pf0_bar0.write_dword(0x00118c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001190, region_len-1) await dev_pf0_bar0.write_dword(0x001194, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x001198, block_stride) await dev_pf0_bar0.write_dword(0x00119c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0011c0, 0) @@ -398,23 +410,31 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0011c8, 0) await dev_pf0_bar0.write_dword(0x0011cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) await dev_pf0_bar0.write_dword(0x0011d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011d8, block_stride) await dev_pf0_bar0.write_dword(0x0011dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001108, 0) await dev_pf0_bar0.write_dword(0x00110c, 0) # block length - await dev_pf0_bar0.write_dword(0x001110, 256) + await dev_pf0_bar0.write_dword(0x001110, block_size) # block count - await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x001118, block_count) await dev_pf0_bar0.write_dword(0x00111c, 0) # start await dev_pf0_bar0.write_dword(0x001100, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001118) + await Timer(1000, 'ns') + if cnt == 0: + break + + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) + + assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/AU50/fpga/tb/fpga_core/test_fpga_core.py b/example/AU50/fpga/tb/fpga_core/test_fpga_core.py index ab78dbb3c..1118761b0 100644 --- a/example/AU50/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU50/fpga/tb/fpga_core/test_fpga_core.py @@ -332,8 +332,16 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + region_len = 0x2000 + src_offset = 0x0000 + dest_offset = 0x4000 + + block_size = 256 + block_stride = block_size + block_count = 32 + # write packet data - mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) @@ -342,16 +350,16 @@ async def run_test(dut): # configure operation (read) # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001088, 0) await dev_pf0_bar0.write_dword(0x00108c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001090, region_len-1) await dev_pf0_bar0.write_dword(0x001094, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x001098, block_stride) await dev_pf0_bar0.write_dword(0x00109c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0010c0, 0) @@ -360,36 +368,40 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0010c8, 0) await dev_pf0_bar0.write_dword(0x0010cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) await dev_pf0_bar0.write_dword(0x0010d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010d8, block_stride) await dev_pf0_bar0.write_dword(0x0010dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001008, 0) await dev_pf0_bar0.write_dword(0x00100c, 0) # block length - await dev_pf0_bar0.write_dword(0x001010, 256) + await dev_pf0_bar0.write_dword(0x001010, block_size) # block count - await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x001018, block_count) await dev_pf0_bar0.write_dword(0x00101c, 0) # start await dev_pf0_bar0.write_dword(0x001000, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001018) + await Timer(1000, 'ns') + if cnt == 0: + break # configure operation (write) # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001188, 0) await dev_pf0_bar0.write_dword(0x00118c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001190, region_len-1) await dev_pf0_bar0.write_dword(0x001194, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x001198, block_stride) await dev_pf0_bar0.write_dword(0x00119c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0011c0, 0) @@ -398,23 +410,31 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0011c8, 0) await dev_pf0_bar0.write_dword(0x0011cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) await dev_pf0_bar0.write_dword(0x0011d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011d8, block_stride) await dev_pf0_bar0.write_dword(0x0011dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001108, 0) await dev_pf0_bar0.write_dword(0x00110c, 0) # block length - await dev_pf0_bar0.write_dword(0x001110, 256) + await dev_pf0_bar0.write_dword(0x001110, block_size) # block count - await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x001118, block_count) await dev_pf0_bar0.write_dword(0x00111c, 0) # start await dev_pf0_bar0.write_dword(0x001100, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001118) + await Timer(1000, 'ns') + if cnt == 0: + break + + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) + + assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py b/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py index f3759cb1d..389b950d5 100644 --- a/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py @@ -328,8 +328,16 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + region_len = 0x2000 + src_offset = 0x0000 + dest_offset = 0x4000 + + block_size = 256 + block_stride = block_size + block_count = 32 + # write packet data - mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) @@ -338,16 +346,16 @@ async def run_test(dut): # configure operation (read) # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001088, 0) await dev_pf0_bar0.write_dword(0x00108c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001090, region_len-1) await dev_pf0_bar0.write_dword(0x001094, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x001098, block_stride) await dev_pf0_bar0.write_dword(0x00109c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0010c0, 0) @@ -356,36 +364,40 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0010c8, 0) await dev_pf0_bar0.write_dword(0x0010cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) await dev_pf0_bar0.write_dword(0x0010d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010d8, block_stride) await dev_pf0_bar0.write_dword(0x0010dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001008, 0) await dev_pf0_bar0.write_dword(0x00100c, 0) # block length - await dev_pf0_bar0.write_dword(0x001010, 256) + await dev_pf0_bar0.write_dword(0x001010, block_size) # block count - await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x001018, block_count) await dev_pf0_bar0.write_dword(0x00101c, 0) # start await dev_pf0_bar0.write_dword(0x001000, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001018) + await Timer(1000, 'ns') + if cnt == 0: + break # configure operation (write) # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001188, 0) await dev_pf0_bar0.write_dword(0x00118c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001190, region_len-1) await dev_pf0_bar0.write_dword(0x001194, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x001198, block_stride) await dev_pf0_bar0.write_dword(0x00119c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0011c0, 0) @@ -394,23 +406,31 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0011c8, 0) await dev_pf0_bar0.write_dword(0x0011cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) await dev_pf0_bar0.write_dword(0x0011d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011d8, block_stride) await dev_pf0_bar0.write_dword(0x0011dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001108, 0) await dev_pf0_bar0.write_dword(0x00110c, 0) # block length - await dev_pf0_bar0.write_dword(0x001110, 256) + await dev_pf0_bar0.write_dword(0x001110, block_size) # block count - await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x001118, block_count) await dev_pf0_bar0.write_dword(0x00111c, 0) # start await dev_pf0_bar0.write_dword(0x001100, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001118) + await Timer(1000, 'ns') + if cnt == 0: + break + + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) + + assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py b/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py index ea327aef7..a68b46639 100644 --- a/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py @@ -332,8 +332,16 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + region_len = 0x2000 + src_offset = 0x0000 + dest_offset = 0x4000 + + block_size = 256 + block_stride = block_size + block_count = 32 + # write packet data - mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) @@ -342,16 +350,16 @@ async def run_test(dut): # configure operation (read) # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001088, 0) await dev_pf0_bar0.write_dword(0x00108c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001090, region_len-1) await dev_pf0_bar0.write_dword(0x001094, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x001098, block_stride) await dev_pf0_bar0.write_dword(0x00109c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0010c0, 0) @@ -360,36 +368,40 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0010c8, 0) await dev_pf0_bar0.write_dword(0x0010cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) await dev_pf0_bar0.write_dword(0x0010d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010d8, block_stride) await dev_pf0_bar0.write_dword(0x0010dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001008, 0) await dev_pf0_bar0.write_dword(0x00100c, 0) # block length - await dev_pf0_bar0.write_dword(0x001010, 256) + await dev_pf0_bar0.write_dword(0x001010, block_size) # block count - await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x001018, block_count) await dev_pf0_bar0.write_dword(0x00101c, 0) # start await dev_pf0_bar0.write_dword(0x001000, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001018) + await Timer(1000, 'ns') + if cnt == 0: + break # configure operation (write) # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001188, 0) await dev_pf0_bar0.write_dword(0x00118c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001190, region_len-1) await dev_pf0_bar0.write_dword(0x001194, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x001198, block_stride) await dev_pf0_bar0.write_dword(0x00119c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0011c0, 0) @@ -398,23 +410,31 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0011c8, 0) await dev_pf0_bar0.write_dword(0x0011cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) await dev_pf0_bar0.write_dword(0x0011d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011d8, block_stride) await dev_pf0_bar0.write_dword(0x0011dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001108, 0) await dev_pf0_bar0.write_dword(0x00110c, 0) # block length - await dev_pf0_bar0.write_dword(0x001110, 256) + await dev_pf0_bar0.write_dword(0x001110, block_size) # block count - await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x001118, block_count) await dev_pf0_bar0.write_dword(0x00111c, 0) # start await dev_pf0_bar0.write_dword(0x001100, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001118) + await Timer(1000, 'ns') + if cnt == 0: + break + + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) + + assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py b/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py index 92afb32b3..82a45c251 100644 --- a/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py @@ -333,8 +333,16 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + region_len = 0x2000 + src_offset = 0x0000 + dest_offset = 0x4000 + + block_size = 256 + block_stride = block_size + block_count = 32 + # write packet data - mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) @@ -343,16 +351,16 @@ async def run_test(dut): # configure operation (read) # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001088, 0) await dev_pf0_bar0.write_dword(0x00108c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001090, region_len-1) await dev_pf0_bar0.write_dword(0x001094, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x001098, block_stride) await dev_pf0_bar0.write_dword(0x00109c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0010c0, 0) @@ -361,36 +369,40 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0010c8, 0) await dev_pf0_bar0.write_dword(0x0010cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) await dev_pf0_bar0.write_dword(0x0010d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010d8, block_stride) await dev_pf0_bar0.write_dword(0x0010dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001008, 0) await dev_pf0_bar0.write_dword(0x00100c, 0) # block length - await dev_pf0_bar0.write_dword(0x001010, 256) + await dev_pf0_bar0.write_dword(0x001010, block_size) # block count - await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x001018, block_count) await dev_pf0_bar0.write_dword(0x00101c, 0) # start await dev_pf0_bar0.write_dword(0x001000, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001018) + await Timer(1000, 'ns') + if cnt == 0: + break # configure operation (write) # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001188, 0) await dev_pf0_bar0.write_dword(0x00118c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001190, region_len-1) await dev_pf0_bar0.write_dword(0x001194, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x001198, block_stride) await dev_pf0_bar0.write_dword(0x00119c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0011c0, 0) @@ -399,23 +411,31 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0011c8, 0) await dev_pf0_bar0.write_dword(0x0011cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) await dev_pf0_bar0.write_dword(0x0011d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011d8, block_stride) await dev_pf0_bar0.write_dword(0x0011dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001108, 0) await dev_pf0_bar0.write_dword(0x00110c, 0) # block length - await dev_pf0_bar0.write_dword(0x001110, 256) + await dev_pf0_bar0.write_dword(0x001110, block_size) # block count - await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x001118, block_count) await dev_pf0_bar0.write_dword(0x00111c, 0) # start await dev_pf0_bar0.write_dword(0x001100, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001118) + await Timer(1000, 'ns') + if cnt == 0: + break + + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) + + assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py b/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py index 1f6d410c9..cf9c3ca63 100644 --- a/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py @@ -339,8 +339,16 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + region_len = 0x2000 + src_offset = 0x0000 + dest_offset = 0x4000 + + block_size = 256 + block_stride = block_size + block_count = 32 + # write packet data - mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) @@ -349,16 +357,16 @@ async def run_test(dut): # configure operation (read) # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001088, 0) await dev_pf0_bar0.write_dword(0x00108c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001090, region_len-1) await dev_pf0_bar0.write_dword(0x001094, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x001098, block_stride) await dev_pf0_bar0.write_dword(0x00109c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0010c0, 0) @@ -367,36 +375,40 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0010c8, 0) await dev_pf0_bar0.write_dword(0x0010cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) await dev_pf0_bar0.write_dword(0x0010d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010d8, block_stride) await dev_pf0_bar0.write_dword(0x0010dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001008, 0) await dev_pf0_bar0.write_dword(0x00100c, 0) # block length - await dev_pf0_bar0.write_dword(0x001010, 256) + await dev_pf0_bar0.write_dword(0x001010, block_size) # block count - await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x001018, block_count) await dev_pf0_bar0.write_dword(0x00101c, 0) # start await dev_pf0_bar0.write_dword(0x001000, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001018) + await Timer(1000, 'ns') + if cnt == 0: + break # configure operation (write) # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001188, 0) await dev_pf0_bar0.write_dword(0x00118c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001190, region_len-1) await dev_pf0_bar0.write_dword(0x001194, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x001198, block_stride) await dev_pf0_bar0.write_dword(0x00119c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0011c0, 0) @@ -405,23 +417,31 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0011c8, 0) await dev_pf0_bar0.write_dword(0x0011cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) await dev_pf0_bar0.write_dword(0x0011d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011d8, block_stride) await dev_pf0_bar0.write_dword(0x0011dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001108, 0) await dev_pf0_bar0.write_dword(0x00110c, 0) # block length - await dev_pf0_bar0.write_dword(0x001110, 256) + await dev_pf0_bar0.write_dword(0x001110, block_size) # block count - await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x001118, block_count) await dev_pf0_bar0.write_dword(0x00111c, 0) # start await dev_pf0_bar0.write_dword(0x001100, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001118) + await Timer(1000, 'ns') + if cnt == 0: + break + + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) + + assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py b/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py index 5add2329d..2e288aeb6 100644 --- a/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py @@ -334,8 +334,16 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + region_len = 0x2000 + src_offset = 0x0000 + dest_offset = 0x4000 + + block_size = 256 + block_stride = block_size + block_count = 32 + # write packet data - mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) @@ -344,16 +352,16 @@ async def run_test(dut): # configure operation (read) # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001088, 0) await dev_pf0_bar0.write_dword(0x00108c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001090, region_len-1) await dev_pf0_bar0.write_dword(0x001094, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x001098, block_stride) await dev_pf0_bar0.write_dword(0x00109c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0010c0, 0) @@ -362,36 +370,40 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0010c8, 0) await dev_pf0_bar0.write_dword(0x0010cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) await dev_pf0_bar0.write_dword(0x0010d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010d8, block_stride) await dev_pf0_bar0.write_dword(0x0010dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001008, 0) await dev_pf0_bar0.write_dword(0x00100c, 0) # block length - await dev_pf0_bar0.write_dword(0x001010, 256) + await dev_pf0_bar0.write_dword(0x001010, block_size) # block count - await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x001018, block_count) await dev_pf0_bar0.write_dword(0x00101c, 0) # start await dev_pf0_bar0.write_dword(0x001000, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001018) + await Timer(1000, 'ns') + if cnt == 0: + break # configure operation (write) # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001188, 0) await dev_pf0_bar0.write_dword(0x00118c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001190, region_len-1) await dev_pf0_bar0.write_dword(0x001194, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x001198, block_stride) await dev_pf0_bar0.write_dword(0x00119c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0011c0, 0) @@ -400,23 +412,31 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0011c8, 0) await dev_pf0_bar0.write_dword(0x0011cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) await dev_pf0_bar0.write_dword(0x0011d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011d8, block_stride) await dev_pf0_bar0.write_dword(0x0011dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001108, 0) await dev_pf0_bar0.write_dword(0x00110c, 0) # block length - await dev_pf0_bar0.write_dword(0x001110, 256) + await dev_pf0_bar0.write_dword(0x001110, block_size) # block count - await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x001118, block_count) await dev_pf0_bar0.write_dword(0x00111c, 0) # start await dev_pf0_bar0.write_dword(0x001100, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001118) + await Timer(1000, 'ns') + if cnt == 0: + break + + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) + + assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py b/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py index 02871d3c8..a0a7c8d2d 100644 --- a/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py @@ -339,8 +339,16 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + region_len = 0x2000 + src_offset = 0x0000 + dest_offset = 0x4000 + + block_size = 256 + block_stride = block_size + block_count = 32 + # write packet data - mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) @@ -349,16 +357,16 @@ async def run_test(dut): # configure operation (read) # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001088, 0) await dev_pf0_bar0.write_dword(0x00108c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001090, region_len-1) await dev_pf0_bar0.write_dword(0x001094, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x001098, block_stride) await dev_pf0_bar0.write_dword(0x00109c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0010c0, 0) @@ -367,36 +375,40 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0010c8, 0) await dev_pf0_bar0.write_dword(0x0010cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) await dev_pf0_bar0.write_dword(0x0010d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010d8, block_stride) await dev_pf0_bar0.write_dword(0x0010dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001008, 0) await dev_pf0_bar0.write_dword(0x00100c, 0) # block length - await dev_pf0_bar0.write_dword(0x001010, 256) + await dev_pf0_bar0.write_dword(0x001010, block_size) # block count - await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x001018, block_count) await dev_pf0_bar0.write_dword(0x00101c, 0) # start await dev_pf0_bar0.write_dword(0x001000, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001018) + await Timer(1000, 'ns') + if cnt == 0: + break # configure operation (write) # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001188, 0) await dev_pf0_bar0.write_dword(0x00118c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001190, region_len-1) await dev_pf0_bar0.write_dword(0x001194, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x001198, block_stride) await dev_pf0_bar0.write_dword(0x00119c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0011c0, 0) @@ -405,23 +417,31 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0011c8, 0) await dev_pf0_bar0.write_dword(0x0011cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) await dev_pf0_bar0.write_dword(0x0011d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011d8, block_stride) await dev_pf0_bar0.write_dword(0x0011dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001108, 0) await dev_pf0_bar0.write_dword(0x00110c, 0) # block length - await dev_pf0_bar0.write_dword(0x001110, 256) + await dev_pf0_bar0.write_dword(0x001110, block_size) # block count - await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x001118, block_count) await dev_pf0_bar0.write_dword(0x00111c, 0) # start await dev_pf0_bar0.write_dword(0x001100, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001118) + await Timer(1000, 'ns') + if cnt == 0: + break + + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) + + assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/common/tb/example_core_pcie/test_example_core_pcie.py b/example/common/tb/example_core_pcie/test_example_core_pcie.py index c93eb062a..b00cb0007 100644 --- a/example/common/tb/example_core_pcie/test_example_core_pcie.py +++ b/example/common/tb/example_core_pcie/test_example_core_pcie.py @@ -221,8 +221,16 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + region_len = 0x2000 + src_offset = 0x0000 + dest_offset = 0x4000 + + block_size = 256 + block_stride = block_size + block_count = 32 + # write packet data - mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) @@ -231,16 +239,16 @@ async def run_test(dut): # configure operation (read) # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001088, 0) await dev_pf0_bar0.write_dword(0x00108c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001090, region_len-1) await dev_pf0_bar0.write_dword(0x001094, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x001098, block_stride) await dev_pf0_bar0.write_dword(0x00109c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0010c0, 0) @@ -249,36 +257,40 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0010c8, 0) await dev_pf0_bar0.write_dword(0x0010cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) await dev_pf0_bar0.write_dword(0x0010d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010d8, block_stride) await dev_pf0_bar0.write_dword(0x0010dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001008, 0) await dev_pf0_bar0.write_dword(0x00100c, 0) # block length - await dev_pf0_bar0.write_dword(0x001010, 256) + await dev_pf0_bar0.write_dword(0x001010, block_size) # block count - await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x001018, block_count) await dev_pf0_bar0.write_dword(0x00101c, 0) # start await dev_pf0_bar0.write_dword(0x001000, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001018) + await Timer(1000, 'ns') + if cnt == 0: + break # configure operation (write) # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001188, 0) await dev_pf0_bar0.write_dword(0x00118c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001190, region_len-1) await dev_pf0_bar0.write_dword(0x001194, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x001198, block_stride) await dev_pf0_bar0.write_dword(0x00119c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0011c0, 0) @@ -287,23 +299,31 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0011c8, 0) await dev_pf0_bar0.write_dword(0x0011cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) await dev_pf0_bar0.write_dword(0x0011d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011d8, block_stride) await dev_pf0_bar0.write_dword(0x0011dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001108, 0) await dev_pf0_bar0.write_dword(0x00110c, 0) # block length - await dev_pf0_bar0.write_dword(0x001110, 256) + await dev_pf0_bar0.write_dword(0x001110, block_size) # block count - await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x001118, block_count) await dev_pf0_bar0.write_dword(0x00111c, 0) # start await dev_pf0_bar0.write_dword(0x001100, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001118) + await Timer(1000, 'ns') + if cnt == 0: + break + + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) + + assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py index 3095607f0..e13458f3c 100644 --- a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py +++ b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py @@ -259,8 +259,16 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + region_len = 0x2000 + src_offset = 0x0000 + dest_offset = 0x4000 + + block_size = 256 + block_stride = block_size + block_count = 32 + # write packet data - mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) @@ -269,16 +277,16 @@ async def run_test(dut): # configure operation (read) # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001088, 0) await dev_pf0_bar0.write_dword(0x00108c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001090, region_len-1) await dev_pf0_bar0.write_dword(0x001094, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x001098, block_stride) await dev_pf0_bar0.write_dword(0x00109c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0010c0, 0) @@ -287,36 +295,40 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0010c8, 0) await dev_pf0_bar0.write_dword(0x0010cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) await dev_pf0_bar0.write_dword(0x0010d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010d8, block_stride) await dev_pf0_bar0.write_dword(0x0010dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001008, 0) await dev_pf0_bar0.write_dword(0x00100c, 0) # block length - await dev_pf0_bar0.write_dword(0x001010, 256) + await dev_pf0_bar0.write_dword(0x001010, block_size) # block count - await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x001018, block_count) await dev_pf0_bar0.write_dword(0x00101c, 0) # start await dev_pf0_bar0.write_dword(0x001000, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001018) + await Timer(1000, 'ns') + if cnt == 0: + break # configure operation (write) # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001188, 0) await dev_pf0_bar0.write_dword(0x00118c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001190, region_len-1) await dev_pf0_bar0.write_dword(0x001194, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x001198, block_stride) await dev_pf0_bar0.write_dword(0x00119c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0011c0, 0) @@ -325,23 +337,31 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0011c8, 0) await dev_pf0_bar0.write_dword(0x0011cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) await dev_pf0_bar0.write_dword(0x0011d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011d8, block_stride) await dev_pf0_bar0.write_dword(0x0011dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001108, 0) await dev_pf0_bar0.write_dword(0x00110c, 0) # block length - await dev_pf0_bar0.write_dword(0x001110, 256) + await dev_pf0_bar0.write_dword(0x001110, block_size) # block count - await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x001118, block_count) await dev_pf0_bar0.write_dword(0x00111c, 0) # start await dev_pf0_bar0.write_dword(0x001100, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001118) + await Timer(1000, 'ns') + if cnt == 0: + break + + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) + + assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py index 28cb17f26..a3c4eb243 100644 --- a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py +++ b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py @@ -353,8 +353,16 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + region_len = 0x2000 + src_offset = 0x0000 + dest_offset = 0x4000 + + block_size = 256 + block_stride = block_size + block_count = 32 + # write packet data - mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) @@ -363,16 +371,16 @@ async def run_test(dut): # configure operation (read) # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001088, 0) await dev_pf0_bar0.write_dword(0x00108c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001090, region_len-1) await dev_pf0_bar0.write_dword(0x001094, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x001098, block_stride) await dev_pf0_bar0.write_dword(0x00109c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0010c0, 0) @@ -381,36 +389,40 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0010c8, 0) await dev_pf0_bar0.write_dword(0x0010cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) await dev_pf0_bar0.write_dword(0x0010d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010d8, block_stride) await dev_pf0_bar0.write_dword(0x0010dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001008, 0) await dev_pf0_bar0.write_dword(0x00100c, 0) # block length - await dev_pf0_bar0.write_dword(0x001010, 256) + await dev_pf0_bar0.write_dword(0x001010, block_size) # block count - await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x001018, block_count) await dev_pf0_bar0.write_dword(0x00101c, 0) # start await dev_pf0_bar0.write_dword(0x001000, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001018) + await Timer(1000, 'ns') + if cnt == 0: + break # configure operation (write) # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001188, 0) await dev_pf0_bar0.write_dword(0x00118c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001190, region_len-1) await dev_pf0_bar0.write_dword(0x001194, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x001198, block_stride) await dev_pf0_bar0.write_dword(0x00119c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0011c0, 0) @@ -419,23 +431,31 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0011c8, 0) await dev_pf0_bar0.write_dword(0x0011cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) await dev_pf0_bar0.write_dword(0x0011d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011d8, block_stride) await dev_pf0_bar0.write_dword(0x0011dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001108, 0) await dev_pf0_bar0.write_dword(0x00110c, 0) # block length - await dev_pf0_bar0.write_dword(0x001110, 256) + await dev_pf0_bar0.write_dword(0x001110, block_size) # block count - await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x001118, block_count) await dev_pf0_bar0.write_dword(0x00111c, 0) # start await dev_pf0_bar0.write_dword(0x001100, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001118) + await Timer(1000, 'ns') + if cnt == 0: + break + + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) + + assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py b/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py index c316e8d3b..17d445eaa 100644 --- a/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py @@ -334,8 +334,16 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + region_len = 0x2000 + src_offset = 0x0000 + dest_offset = 0x4000 + + block_size = 256 + block_stride = block_size + block_count = 32 + # write packet data - mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) @@ -344,16 +352,16 @@ async def run_test(dut): # configure operation (read) # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001088, 0) await dev_pf0_bar0.write_dword(0x00108c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001090, region_len-1) await dev_pf0_bar0.write_dword(0x001094, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x001098, block_stride) await dev_pf0_bar0.write_dword(0x00109c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0010c0, 0) @@ -362,36 +370,40 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0010c8, 0) await dev_pf0_bar0.write_dword(0x0010cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) await dev_pf0_bar0.write_dword(0x0010d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010d8, block_stride) await dev_pf0_bar0.write_dword(0x0010dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001008, 0) await dev_pf0_bar0.write_dword(0x00100c, 0) # block length - await dev_pf0_bar0.write_dword(0x001010, 256) + await dev_pf0_bar0.write_dword(0x001010, block_size) # block count - await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x001018, block_count) await dev_pf0_bar0.write_dword(0x00101c, 0) # start await dev_pf0_bar0.write_dword(0x001000, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001018) + await Timer(1000, 'ns') + if cnt == 0: + break # configure operation (write) # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001188, 0) await dev_pf0_bar0.write_dword(0x00118c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001190, region_len-1) await dev_pf0_bar0.write_dword(0x001194, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x001198, block_stride) await dev_pf0_bar0.write_dword(0x00119c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0011c0, 0) @@ -400,23 +412,31 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0011c8, 0) await dev_pf0_bar0.write_dword(0x0011cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) await dev_pf0_bar0.write_dword(0x0011d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011d8, block_stride) await dev_pf0_bar0.write_dword(0x0011dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001108, 0) await dev_pf0_bar0.write_dword(0x00110c, 0) # block length - await dev_pf0_bar0.write_dword(0x001110, 256) + await dev_pf0_bar0.write_dword(0x001110, block_size) # block count - await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x001118, block_count) await dev_pf0_bar0.write_dword(0x00111c, 0) # start await dev_pf0_bar0.write_dword(0x001100, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001118) + await Timer(1000, 'ns') + if cnt == 0: + break + + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) + + assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] await RisingEdge(dut.clk) await RisingEdge(dut.clk) From 89db2a29b7443c7f0185e29f231c4331c8a34314 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Wed, 6 Apr 2022 13:23:46 -0700 Subject: [PATCH 17/21] When EXTEND_RAM_SEL is not set, do not modify ram_sel --- rtl/dma_if_desc_mux.v | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rtl/dma_if_desc_mux.v b/rtl/dma_if_desc_mux.v index f3fa59071..33c21d8d4 100644 --- a/rtl/dma_if_desc_mux.v +++ b/rtl/dma_if_desc_mux.v @@ -190,7 +190,7 @@ always @* begin // pass through selected packet data m_axis_desc_dma_addr_int = current_s_desc_dma_addr; m_axis_desc_ram_sel_int = current_s_desc_ram_sel; - if (PORTS > 1) begin + if (EXTEND_RAM_SEL && PORTS > 1) begin m_axis_desc_ram_sel_int[M_RAM_SEL_WIDTH-1:M_RAM_SEL_WIDTH-CL_PORTS] = grant_encoded; end m_axis_desc_ram_addr_int = current_s_desc_ram_addr; From 984aefe5085908eb8858f4e0fa4f029e738bf3ed Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Wed, 6 Apr 2022 13:24:05 -0700 Subject: [PATCH 18/21] Fix tag indexing --- rtl/dma_if_desc_mux.v | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/rtl/dma_if_desc_mux.v b/rtl/dma_if_desc_mux.v index 33c21d8d4..92efbd0fd 100644 --- a/rtl/dma_if_desc_mux.v +++ b/rtl/dma_if_desc_mux.v @@ -197,7 +197,10 @@ always @* begin m_axis_desc_imm_int = current_s_desc_imm; m_axis_desc_imm_en_int = current_s_desc_imm_en; m_axis_desc_len_int = current_s_desc_len; - m_axis_desc_tag_int = {grant_encoded, current_s_desc_tag}; + m_axis_desc_tag_int = current_s_desc_tag; + if (PORTS > 1) begin + m_axis_desc_tag_int[M_TAG_WIDTH-1:M_TAG_WIDTH-CL_PORTS] = grant_encoded; + end m_axis_desc_valid_int = current_s_desc_valid && m_axis_desc_ready_int_reg && grant_valid; end From ffc0a70c400a8265214081197f5cb1ab3484c732 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Tue, 19 Apr 2022 23:18:50 -0700 Subject: [PATCH 19/21] Update scripts to use setpci built-in bit masking Signed-off-by: Alex Forencich --- scripts/pcie_disable_fatal_err.sh | 13 ++++++------- scripts/pcie_ext_tag.sh | 17 ++++------------- scripts/pcie_hot_reset.sh | 10 ++++------ scripts/pcie_set_speed.sh | 10 +--------- 4 files changed, 15 insertions(+), 35 deletions(-) diff --git a/scripts/pcie_disable_fatal_err.sh b/scripts/pcie_disable_fatal_err.sh index 968cc71de..c344464b0 100755 --- a/scripts/pcie_disable_fatal_err.sh +++ b/scripts/pcie_disable_fatal_err.sh @@ -25,17 +25,16 @@ fi echo "Disabling fatal error reporting on port $port..." -cmd=$(setpci -s $port COMMAND) - -echo "Command:" $cmd +echo "Command:" $(setpci -s $port COMMAND) # clear SERR bit in command register -setpci -s $port COMMAND=$(printf "%04x" $((0x$cmd & ~0x0100))) +setpci -s $port COMMAND=0000:0100 -ctrl=$(setpci -s $port CAP_EXP+8.w) +echo "Command:" $(setpci -s $port COMMAND) -echo "Device control:" $ctrl +echo "Device control:" $(setpci -s $port CAP_EXP+8.w) # clear fatal error reporting enable bit in device control register -setpci -s $port CAP_EXP+8.w=$(printf "%04x" $((0x$ctrl & ~0x0004))) +setpci -s $port CAP_EXP+8.w=0000:0004 +echo "Device control:" $(setpci -s $port CAP_EXP+8.w) diff --git a/scripts/pcie_ext_tag.sh b/scripts/pcie_ext_tag.sh index 1fcf39e2a..391a07bf6 100755 --- a/scripts/pcie_ext_tag.sh +++ b/scripts/pcie_ext_tag.sh @@ -22,23 +22,14 @@ if [ ! -e "/sys/bus/pci/devices/$dev" ]; then exit 1 fi -ctrl=$(setpci -s $dev CAP_EXP+8.w) +echo "Device control:" $(setpci -s $dev CAP_EXP+8.w) if (($en > 0)); then - echo "Enabling ext tag on $dev..." - - echo "Device control:" $ctrl - - setpci -s $dev CAP_EXP+8.w=$(printf "%04x" $((0x$ctrl | 0x0100))) - + setpci -s $dev CAP_EXP+8.w=0100:0100 else - echo "Disabling ext tag on $dev..." - - echo "Device control:" $ctrl - - setpci -s $dev CAP_EXP+8.w=$(printf "%04x" $((0x$ctrl & ~0x0100))) - + setpci -s $dev CAP_EXP+8.w=0000:0100 fi +echo "Device control:" $(setpci -s $dev CAP_EXP+8.w) diff --git a/scripts/pcie_hot_reset.sh b/scripts/pcie_hot_reset.sh index 18c178f83..0a98da3f5 100755 --- a/scripts/pcie_hot_reset.sh +++ b/scripts/pcie_hot_reset.sh @@ -29,13 +29,11 @@ echo 1 > "/sys/bus/pci/devices/$dev/remove" echo "Performing hot reset of port $port..." -bc=$(setpci -s $port BRIDGE_CONTROL) +echo "Bridge control:" $(setpci -s $port BRIDGE_CONTROL) -echo "Bridge control:" $bc - -setpci -s $port BRIDGE_CONTROL=$(printf "%04x" $((0x$bc | 0x40))) -sleep 0.01 -setpci -s $port BRIDGE_CONTROL=$bc +setpci -s $port BRIDGE_CONTROL=40:40 +sleep 0.5 +setpci -s $port BRIDGE_CONTROL=00:40 sleep 0.5 echo "Rescanning bus..." diff --git a/scripts/pcie_set_speed.sh b/scripts/pcie_set_speed.sh index 5e0ef4b27..ab56d2a06 100755 --- a/scripts/pcie_set_speed.sh +++ b/scripts/pcie_set_speed.sh @@ -60,15 +60,7 @@ setpci -s $dev CAP_EXP+30.L=$lc2n echo "Triggering link retraining..." -lc=$(setpci -s $dev CAP_EXP+10.L) - -echo "Original link control:" $lc - -lcn=$(printf "%08x" $((0x$lc | 0x20))) - -echo "New link control:" $lcn - -setpci -s $dev CAP_EXP+10.L=$lcn +setpci -s $dev CAP_EXP+10.L=20:20 sleep 0.1 From e4b1df0ddb344bd78cbd58f351df864c73c9aa76 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Wed, 20 Apr 2022 00:43:21 -0700 Subject: [PATCH 20/21] Fix immediate enable register implementation in example design Signed-off-by: Alex Forencich --- example/520N_MX/fpga/tb/fpga_core/test_fpga_core.py | 4 ++-- .../ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py | 4 ++-- example/AU200/fpga/tb/fpga_core/test_fpga_core.py | 4 ++-- example/AU250/fpga/tb/fpga_core/test_fpga_core.py | 4 ++-- example/AU280/fpga/tb/fpga_core/test_fpga_core.py | 4 ++-- example/AU50/fpga/tb/fpga_core/test_fpga_core.py | 4 ++-- example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py | 4 ++-- example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py | 4 ++-- example/S10MX_DK/fpga/tb/fpga_core/test_fpga_core.py | 4 ++-- example/VCU108/fpga/tb/fpga_core/test_fpga_core.py | 4 ++-- example/VCU118/fpga/tb/fpga_core/test_fpga_core.py | 4 ++-- example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py | 4 ++-- example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py | 4 ++-- example/common/driver/example/example_driver.c | 2 +- example/common/rtl/example_core.v | 10 ++++++---- .../tb/example_core_pcie/test_example_core_pcie.py | 4 ++-- .../test_example_core_pcie_s10.py | 4 ++-- .../example_core_pcie_us/test_example_core_pcie_us.py | 4 ++-- example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py | 4 ++-- 19 files changed, 41 insertions(+), 39 deletions(-) diff --git a/example/520N_MX/fpga/tb/fpga_core/test_fpga_core.py b/example/520N_MX/fpga/tb/fpga_core/test_fpga_core.py index 82f15683a..e8159b340 100644 --- a/example/520N_MX/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/520N_MX/fpga/tb/fpga_core/test_fpga_core.py @@ -241,14 +241,14 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) await dev_pf0_bar0.write_dword(0x000208, 0x44332211) await dev_pf0_bar0.write_dword(0x000210, 0x4) - await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + await dev_pf0_bar0.write_dword(0x000214, 0x800000AA) await Timer(2000, 'ns') # read status val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) - assert val == 0x800100AA + assert val == 0x800000AA tb.log.info("%s", mem.hexdump_str(0x1000, 64)) diff --git a/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py b/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py index 1118761b0..574c73478 100644 --- a/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py @@ -317,14 +317,14 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) await dev_pf0_bar0.write_dword(0x000208, 0x44332211) await dev_pf0_bar0.write_dword(0x000210, 0x4) - await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + await dev_pf0_bar0.write_dword(0x000214, 0x800000AA) await Timer(2000, 'ns') # read status val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) - assert val == 0x800100AA + assert val == 0x800000AA tb.log.info("%s", mem.hexdump_str(0x1000, 64)) diff --git a/example/AU200/fpga/tb/fpga_core/test_fpga_core.py b/example/AU200/fpga/tb/fpga_core/test_fpga_core.py index 2e288aeb6..570e0da8c 100644 --- a/example/AU200/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU200/fpga/tb/fpga_core/test_fpga_core.py @@ -319,14 +319,14 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) await dev_pf0_bar0.write_dword(0x000208, 0x44332211) await dev_pf0_bar0.write_dword(0x000210, 0x4) - await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + await dev_pf0_bar0.write_dword(0x000214, 0x800000AA) await Timer(2000, 'ns') # read status val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) - assert val == 0x800100AA + assert val == 0x800000AA tb.log.info("%s", mem.hexdump_str(0x1000, 64)) diff --git a/example/AU250/fpga/tb/fpga_core/test_fpga_core.py b/example/AU250/fpga/tb/fpga_core/test_fpga_core.py index 2e288aeb6..570e0da8c 100644 --- a/example/AU250/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU250/fpga/tb/fpga_core/test_fpga_core.py @@ -319,14 +319,14 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) await dev_pf0_bar0.write_dword(0x000208, 0x44332211) await dev_pf0_bar0.write_dword(0x000210, 0x4) - await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + await dev_pf0_bar0.write_dword(0x000214, 0x800000AA) await Timer(2000, 'ns') # read status val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) - assert val == 0x800100AA + assert val == 0x800000AA tb.log.info("%s", mem.hexdump_str(0x1000, 64)) diff --git a/example/AU280/fpga/tb/fpga_core/test_fpga_core.py b/example/AU280/fpga/tb/fpga_core/test_fpga_core.py index 1118761b0..574c73478 100644 --- a/example/AU280/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU280/fpga/tb/fpga_core/test_fpga_core.py @@ -317,14 +317,14 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) await dev_pf0_bar0.write_dword(0x000208, 0x44332211) await dev_pf0_bar0.write_dword(0x000210, 0x4) - await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + await dev_pf0_bar0.write_dword(0x000214, 0x800000AA) await Timer(2000, 'ns') # read status val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) - assert val == 0x800100AA + assert val == 0x800000AA tb.log.info("%s", mem.hexdump_str(0x1000, 64)) diff --git a/example/AU50/fpga/tb/fpga_core/test_fpga_core.py b/example/AU50/fpga/tb/fpga_core/test_fpga_core.py index 1118761b0..574c73478 100644 --- a/example/AU50/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU50/fpga/tb/fpga_core/test_fpga_core.py @@ -317,14 +317,14 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) await dev_pf0_bar0.write_dword(0x000208, 0x44332211) await dev_pf0_bar0.write_dword(0x000210, 0x4) - await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + await dev_pf0_bar0.write_dword(0x000214, 0x800000AA) await Timer(2000, 'ns') # read status val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) - assert val == 0x800100AA + assert val == 0x800000AA tb.log.info("%s", mem.hexdump_str(0x1000, 64)) diff --git a/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py b/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py index 389b950d5..6e489c7e6 100644 --- a/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py @@ -313,14 +313,14 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) await dev_pf0_bar0.write_dword(0x000208, 0x44332211) await dev_pf0_bar0.write_dword(0x000210, 0x4) - await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + await dev_pf0_bar0.write_dword(0x000214, 0x800000AA) await Timer(2000, 'ns') # read status val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) - assert val == 0x800100AA + assert val == 0x800000AA tb.log.info("%s", mem.hexdump_str(0x1000, 64)) diff --git a/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py b/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py index a68b46639..37b721492 100644 --- a/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py @@ -317,14 +317,14 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) await dev_pf0_bar0.write_dword(0x000208, 0x44332211) await dev_pf0_bar0.write_dword(0x000210, 0x4) - await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + await dev_pf0_bar0.write_dword(0x000214, 0x800000AA) await Timer(2000, 'ns') # read status val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) - assert val == 0x800100AA + assert val == 0x800000AA tb.log.info("%s", mem.hexdump_str(0x1000, 64)) diff --git a/example/S10MX_DK/fpga/tb/fpga_core/test_fpga_core.py b/example/S10MX_DK/fpga/tb/fpga_core/test_fpga_core.py index 82f15683a..e8159b340 100644 --- a/example/S10MX_DK/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/S10MX_DK/fpga/tb/fpga_core/test_fpga_core.py @@ -241,14 +241,14 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) await dev_pf0_bar0.write_dword(0x000208, 0x44332211) await dev_pf0_bar0.write_dword(0x000210, 0x4) - await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + await dev_pf0_bar0.write_dword(0x000214, 0x800000AA) await Timer(2000, 'ns') # read status val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) - assert val == 0x800100AA + assert val == 0x800000AA tb.log.info("%s", mem.hexdump_str(0x1000, 64)) diff --git a/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py b/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py index 82a45c251..d09695bb2 100644 --- a/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py @@ -318,14 +318,14 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) await dev_pf0_bar0.write_dword(0x000208, 0x44332211) await dev_pf0_bar0.write_dword(0x000210, 0x4) - await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + await dev_pf0_bar0.write_dword(0x000214, 0x800000AA) await Timer(2000, 'ns') # read status val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) - assert val == 0x800100AA + assert val == 0x800000AA tb.log.info("%s", mem.hexdump_str(0x1000, 64)) diff --git a/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py b/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py index cf9c3ca63..cdd35ec04 100644 --- a/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py @@ -324,14 +324,14 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) await dev_pf0_bar0.write_dword(0x000208, 0x44332211) await dev_pf0_bar0.write_dword(0x000210, 0x4) - await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + await dev_pf0_bar0.write_dword(0x000214, 0x800000AA) await Timer(2000, 'ns') # read status val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) - assert val == 0x800100AA + assert val == 0x800000AA tb.log.info("%s", mem.hexdump_str(0x1000, 64)) diff --git a/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py b/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py index 2e288aeb6..570e0da8c 100644 --- a/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py @@ -319,14 +319,14 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) await dev_pf0_bar0.write_dword(0x000208, 0x44332211) await dev_pf0_bar0.write_dword(0x000210, 0x4) - await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + await dev_pf0_bar0.write_dword(0x000214, 0x800000AA) await Timer(2000, 'ns') # read status val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) - assert val == 0x800100AA + assert val == 0x800000AA tb.log.info("%s", mem.hexdump_str(0x1000, 64)) diff --git a/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py b/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py index a0a7c8d2d..d771dd452 100644 --- a/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py @@ -324,14 +324,14 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) await dev_pf0_bar0.write_dword(0x000208, 0x44332211) await dev_pf0_bar0.write_dword(0x000210, 0x4) - await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + await dev_pf0_bar0.write_dword(0x000214, 0x800000AA) await Timer(2000, 'ns') # read status val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) - assert val == 0x800100AA + assert val == 0x800000AA tb.log.info("%s", mem.hexdump_str(0x1000, 64)) diff --git a/example/common/driver/example/example_driver.c b/example/common/driver/example/example_driver.c index 8f8a193ec..9483ba272 100644 --- a/example/common/driver/example/example_driver.c +++ b/example/common/driver/example/example_driver.c @@ -240,7 +240,7 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent) iowrite32(0x44332211, edev->bar[0] + 0x000208); iowrite32(0, edev->bar[0] + 0x00020C); iowrite32(0x4, edev->bar[0] + 0x000210); - iowrite32(0x100AA, edev->bar[0] + 0x000214); + iowrite32(0x800000AA, edev->bar[0] + 0x000214); msleep(1); diff --git a/example/common/rtl/example_core.v b/example/common/rtl/example_core.v index 538945fb5..820976a55 100644 --- a/example/common/rtl/example_core.v +++ b/example/common/rtl/example_core.v @@ -372,8 +372,8 @@ always @* begin 16'h0208: dma_write_desc_ram_addr_imm_next = s_axil_ctrl_wdata; 16'h0210: dma_write_desc_len_next = s_axil_ctrl_wdata; 16'h0214: begin - dma_write_desc_tag_next = s_axil_ctrl_wdata[15:0]; - dma_write_desc_imm_en_next = s_axil_ctrl_wdata[16]; + dma_write_desc_tag_next = s_axil_ctrl_wdata[23:0]; + dma_write_desc_imm_en_next = s_axil_ctrl_wdata[31]; dma_write_desc_valid_next = 1'b1; end // block read @@ -458,10 +458,12 @@ always @* begin 16'h0208: axil_ctrl_rdata_next = dma_write_desc_ram_addr_imm_reg; 16'h020c: axil_ctrl_rdata_next = dma_write_desc_ram_addr_imm_reg >> 32; 16'h0210: axil_ctrl_rdata_next = dma_write_desc_len_reg; - 16'h0214: axil_ctrl_rdata_next = dma_write_desc_tag_reg; + 16'h0214: begin + axil_ctrl_rdata_next[23:0] = dma_write_desc_tag_reg; + axil_ctrl_rdata_next[31] = dma_write_desc_imm_en_reg; + end 16'h0218: begin axil_ctrl_rdata_next[15:0] = dma_write_desc_status_tag_reg; - axil_ctrl_rdata_next[16] = dma_write_desc_imm_en_reg; axil_ctrl_rdata_next[27:24] = dma_write_desc_status_error_reg; axil_ctrl_rdata_next[31] = dma_write_desc_status_valid_reg; dma_write_desc_status_valid_next = 1'b0; diff --git a/example/common/tb/example_core_pcie/test_example_core_pcie.py b/example/common/tb/example_core_pcie/test_example_core_pcie.py index b00cb0007..eeb82fa13 100644 --- a/example/common/tb/example_core_pcie/test_example_core_pcie.py +++ b/example/common/tb/example_core_pcie/test_example_core_pcie.py @@ -206,14 +206,14 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) await dev_pf0_bar0.write_dword(0x000208, 0x44332211) await dev_pf0_bar0.write_dword(0x000210, 0x4) - await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + await dev_pf0_bar0.write_dword(0x000214, 0x800000AA) await Timer(2000, 'ns') # read status val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) - assert val == 0x800100AA + assert val == 0x800000AA tb.log.info("%s", mem.hexdump_str(0x1000, 64)) diff --git a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py index e13458f3c..7032adaad 100644 --- a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py +++ b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py @@ -244,14 +244,14 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) await dev_pf0_bar0.write_dword(0x000208, 0x44332211) await dev_pf0_bar0.write_dword(0x000210, 0x4) - await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + await dev_pf0_bar0.write_dword(0x000214, 0x800000AA) await Timer(2000, 'ns') # read status val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) - assert val == 0x800100AA + assert val == 0x800000AA tb.log.info("%s", mem.hexdump_str(0x1000, 64)) diff --git a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py index a3c4eb243..d4a3c851f 100644 --- a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py +++ b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py @@ -338,14 +338,14 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) await dev_pf0_bar0.write_dword(0x000208, 0x44332211) await dev_pf0_bar0.write_dword(0x000210, 0x4) - await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + await dev_pf0_bar0.write_dword(0x000214, 0x800000AA) await Timer(2000, 'ns') # read status val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) - assert val == 0x800100AA + assert val == 0x800000AA tb.log.info("%s", mem.hexdump_str(0x1000, 64)) diff --git a/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py b/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py index 17d445eaa..efcb15428 100644 --- a/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py @@ -319,14 +319,14 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) await dev_pf0_bar0.write_dword(0x000208, 0x44332211) await dev_pf0_bar0.write_dword(0x000210, 0x4) - await dev_pf0_bar0.write_dword(0x000214, 0x100AA) + await dev_pf0_bar0.write_dword(0x000214, 0x800000AA) await Timer(2000, 'ns') # read status val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) - assert val == 0x800100AA + assert val == 0x800000AA tb.log.info("%s", mem.hexdump_str(0x1000, 64)) From 0b815522b0b015d5d1cf2b91a5047980c70de7a5 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Wed, 20 Apr 2022 00:43:55 -0700 Subject: [PATCH 21/21] Sync example design testbenches Signed-off-by: Alex Forencich --- .../fpga/tb/fpga_core/test_fpga_core.py | 58 +++++++++++++------ .../fpga/tb/fpga_core/test_fpga_core.py | 2 + .../AU200/fpga/tb/fpga_core/test_fpga_core.py | 2 + .../AU250/fpga/tb/fpga_core/test_fpga_core.py | 2 + .../AU280/fpga/tb/fpga_core/test_fpga_core.py | 2 + .../AU50/fpga/tb/fpga_core/test_fpga_core.py | 2 + .../fpga/tb/fpga_core/test_fpga_core.py | 2 + .../fpga/tb/fpga_core/test_fpga_core.py | 58 +++++++++++++------ .../fpga/tb/fpga_core/test_fpga_core.py | 2 + .../fpga/tb/fpga_core/test_fpga_core.py | 2 + .../fpga/tb/fpga_core/test_fpga_core.py | 2 + .../fpga/tb/fpga_core/test_fpga_core.py | 2 + 12 files changed, 98 insertions(+), 38 deletions(-) diff --git a/example/520N_MX/fpga/tb/fpga_core/test_fpga_core.py b/example/520N_MX/fpga/tb/fpga_core/test_fpga_core.py index e8159b340..f33ab5e4e 100644 --- a/example/520N_MX/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/520N_MX/fpga/tb/fpga_core/test_fpga_core.py @@ -256,8 +256,16 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + region_len = 0x2000 + src_offset = 0x0000 + dest_offset = 0x4000 + + block_size = 256 + block_stride = block_size + block_count = 32 + # write packet data - mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) @@ -266,16 +274,16 @@ async def run_test(dut): # configure operation (read) # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001088, 0) await dev_pf0_bar0.write_dword(0x00108c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001090, region_len-1) await dev_pf0_bar0.write_dword(0x001094, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x001098, block_stride) await dev_pf0_bar0.write_dword(0x00109c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0010c0, 0) @@ -284,36 +292,40 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0010c8, 0) await dev_pf0_bar0.write_dword(0x0010cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) await dev_pf0_bar0.write_dword(0x0010d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010d8, block_stride) await dev_pf0_bar0.write_dword(0x0010dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001008, 0) await dev_pf0_bar0.write_dword(0x00100c, 0) # block length - await dev_pf0_bar0.write_dword(0x001010, 256) + await dev_pf0_bar0.write_dword(0x001010, block_size) # block count - await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x001018, block_count) await dev_pf0_bar0.write_dword(0x00101c, 0) # start await dev_pf0_bar0.write_dword(0x001000, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001018) + await Timer(1000, 'ns') + if cnt == 0: + break # configure operation (write) # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001188, 0) await dev_pf0_bar0.write_dword(0x00118c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001190, region_len-1) await dev_pf0_bar0.write_dword(0x001194, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x001198, block_stride) await dev_pf0_bar0.write_dword(0x00119c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0011c0, 0) @@ -322,23 +334,31 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0011c8, 0) await dev_pf0_bar0.write_dword(0x0011cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) await dev_pf0_bar0.write_dword(0x0011d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011d8, block_stride) await dev_pf0_bar0.write_dword(0x0011dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001108, 0) await dev_pf0_bar0.write_dword(0x00110c, 0) # block length - await dev_pf0_bar0.write_dword(0x001110, 256) + await dev_pf0_bar0.write_dword(0x001110, block_size) # block count - await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x001118, block_count) await dev_pf0_bar0.write_dword(0x00111c, 0) # start await dev_pf0_bar0.write_dword(0x001100, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001118) + await Timer(1000, 'ns') + if cnt == 0: + break + + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) + + assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py b/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py index 574c73478..efcb15428 100644 --- a/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py @@ -277,6 +277,8 @@ async def run_test(dut): # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) + # enable interrupts + await dev_pf0_bar0.write_dword(0x000008, 0x3) # write pcie read descriptor await dev_pf0_bar0.write_dword(0x000100, (mem_base+0x0000) & 0xffffffff) diff --git a/example/AU200/fpga/tb/fpga_core/test_fpga_core.py b/example/AU200/fpga/tb/fpga_core/test_fpga_core.py index 570e0da8c..1698e0298 100644 --- a/example/AU200/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU200/fpga/tb/fpga_core/test_fpga_core.py @@ -279,6 +279,8 @@ async def run_test(dut): # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) + # enable interrupts + await dev_pf0_bar0.write_dword(0x000008, 0x3) # write pcie read descriptor await dev_pf0_bar0.write_dword(0x000100, (mem_base+0x0000) & 0xffffffff) diff --git a/example/AU250/fpga/tb/fpga_core/test_fpga_core.py b/example/AU250/fpga/tb/fpga_core/test_fpga_core.py index 570e0da8c..1698e0298 100644 --- a/example/AU250/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU250/fpga/tb/fpga_core/test_fpga_core.py @@ -279,6 +279,8 @@ async def run_test(dut): # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) + # enable interrupts + await dev_pf0_bar0.write_dword(0x000008, 0x3) # write pcie read descriptor await dev_pf0_bar0.write_dword(0x000100, (mem_base+0x0000) & 0xffffffff) diff --git a/example/AU280/fpga/tb/fpga_core/test_fpga_core.py b/example/AU280/fpga/tb/fpga_core/test_fpga_core.py index 574c73478..efcb15428 100644 --- a/example/AU280/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU280/fpga/tb/fpga_core/test_fpga_core.py @@ -277,6 +277,8 @@ async def run_test(dut): # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) + # enable interrupts + await dev_pf0_bar0.write_dword(0x000008, 0x3) # write pcie read descriptor await dev_pf0_bar0.write_dword(0x000100, (mem_base+0x0000) & 0xffffffff) diff --git a/example/AU50/fpga/tb/fpga_core/test_fpga_core.py b/example/AU50/fpga/tb/fpga_core/test_fpga_core.py index 574c73478..efcb15428 100644 --- a/example/AU50/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU50/fpga/tb/fpga_core/test_fpga_core.py @@ -277,6 +277,8 @@ async def run_test(dut): # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) + # enable interrupts + await dev_pf0_bar0.write_dword(0x000008, 0x3) # write pcie read descriptor await dev_pf0_bar0.write_dword(0x000100, (mem_base+0x0000) & 0xffffffff) diff --git a/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py b/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py index 37b721492..052f4883e 100644 --- a/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py @@ -277,6 +277,8 @@ async def run_test(dut): # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) + # enable interrupts + await dev_pf0_bar0.write_dword(0x000008, 0x3) # write pcie read descriptor await dev_pf0_bar0.write_dword(0x000100, (mem_base+0x0000) & 0xffffffff) diff --git a/example/S10MX_DK/fpga/tb/fpga_core/test_fpga_core.py b/example/S10MX_DK/fpga/tb/fpga_core/test_fpga_core.py index e8159b340..f33ab5e4e 100644 --- a/example/S10MX_DK/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/S10MX_DK/fpga/tb/fpga_core/test_fpga_core.py @@ -256,8 +256,16 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + region_len = 0x2000 + src_offset = 0x0000 + dest_offset = 0x4000 + + block_size = 256 + block_stride = block_size + block_count = 32 + # write packet data - mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) @@ -266,16 +274,16 @@ async def run_test(dut): # configure operation (read) # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001088, 0) await dev_pf0_bar0.write_dword(0x00108c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001090, region_len-1) await dev_pf0_bar0.write_dword(0x001094, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001098, 256) + await dev_pf0_bar0.write_dword(0x001098, block_stride) await dev_pf0_bar0.write_dword(0x00109c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0010c0, 0) @@ -284,36 +292,40 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0010c8, 0) await dev_pf0_bar0.write_dword(0x0010cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) await dev_pf0_bar0.write_dword(0x0010d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, 256) + await dev_pf0_bar0.write_dword(0x0010d8, block_stride) await dev_pf0_bar0.write_dword(0x0010dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001008, 0) await dev_pf0_bar0.write_dword(0x00100c, 0) # block length - await dev_pf0_bar0.write_dword(0x001010, 256) + await dev_pf0_bar0.write_dword(0x001010, block_size) # block count - await dev_pf0_bar0.write_dword(0x001018, 32) + await dev_pf0_bar0.write_dword(0x001018, block_count) await dev_pf0_bar0.write_dword(0x00101c, 0) # start await dev_pf0_bar0.write_dword(0x001000, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001018) + await Timer(1000, 'ns') + if cnt == 0: + break # configure operation (write) # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+0x0000) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+0x0000 >> 32) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) # DMA offset address await dev_pf0_bar0.write_dword(0x001188, 0) await dev_pf0_bar0.write_dword(0x00118c, 0) # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, 0x000003ff) + await dev_pf0_bar0.write_dword(0x001190, region_len-1) await dev_pf0_bar0.write_dword(0x001194, 0) # DMA stride - await dev_pf0_bar0.write_dword(0x001198, 256) + await dev_pf0_bar0.write_dword(0x001198, block_stride) await dev_pf0_bar0.write_dword(0x00119c, 0) # RAM base address await dev_pf0_bar0.write_dword(0x0011c0, 0) @@ -322,23 +334,31 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x0011c8, 0) await dev_pf0_bar0.write_dword(0x0011cc, 0) # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, 0x000003ff) + await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) await dev_pf0_bar0.write_dword(0x0011d4, 0) # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, 256) + await dev_pf0_bar0.write_dword(0x0011d8, block_stride) await dev_pf0_bar0.write_dword(0x0011dc, 0) # clear cycle count await dev_pf0_bar0.write_dword(0x001108, 0) await dev_pf0_bar0.write_dword(0x00110c, 0) # block length - await dev_pf0_bar0.write_dword(0x001110, 256) + await dev_pf0_bar0.write_dword(0x001110, block_size) # block count - await dev_pf0_bar0.write_dword(0x001118, 32) + await dev_pf0_bar0.write_dword(0x001118, block_count) await dev_pf0_bar0.write_dword(0x00111c, 0) # start await dev_pf0_bar0.write_dword(0x001100, 1) - await Timer(2000, 'ns') + for k in range(10): + cnt = await dev_pf0_bar0.read_dword(0x001118) + await Timer(1000, 'ns') + if cnt == 0: + break + + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) + + assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py b/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py index d09695bb2..1e38e1b50 100644 --- a/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py @@ -278,6 +278,8 @@ async def run_test(dut): # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) + # enable interrupts + await dev_pf0_bar0.write_dword(0x000008, 0x3) # write pcie read descriptor await dev_pf0_bar0.write_dword(0x000100, (mem_base+0x0000) & 0xffffffff) diff --git a/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py b/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py index cdd35ec04..f105535f1 100644 --- a/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py @@ -284,6 +284,8 @@ async def run_test(dut): # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) + # enable interrupts + await dev_pf0_bar0.write_dword(0x000008, 0x3) # write pcie read descriptor await dev_pf0_bar0.write_dword(0x000100, (mem_base+0x0000) & 0xffffffff) diff --git a/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py b/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py index 570e0da8c..1698e0298 100644 --- a/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py @@ -279,6 +279,8 @@ async def run_test(dut): # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) + # enable interrupts + await dev_pf0_bar0.write_dword(0x000008, 0x3) # write pcie read descriptor await dev_pf0_bar0.write_dword(0x000100, (mem_base+0x0000) & 0xffffffff) diff --git a/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py b/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py index d771dd452..d13d19f73 100644 --- a/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py @@ -284,6 +284,8 @@ async def run_test(dut): # enable DMA await dev_pf0_bar0.write_dword(0x000000, 1) + # enable interrupts + await dev_pf0_bar0.write_dword(0x000008, 0x3) # write pcie read descriptor await dev_pf0_bar0.write_dword(0x000100, (mem_base+0x0000) & 0xffffffff)