From 3e03b20bc78368958933703a41e8acf613169050 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Sat, 24 Jul 2021 01:13:25 -0700 Subject: [PATCH 1/3] Properly handle zero-length PCIe read and write operations --- rtl/pcie_us_axi_master_rd.v | 2 +- rtl/pcie_us_axil_master.v | 2 +- tb/pcie_us_axi_master/test_pcie_us_axi_master.py | 4 ++-- .../test_pcie_us_axi_master_rd.py | 2 +- .../test_pcie_us_axi_master_wr.py | 2 +- tb/pcie_us_axil_master/test_pcie_us_axil_master.py | 12 +++++++++--- 6 files changed, 15 insertions(+), 9 deletions(-) diff --git a/rtl/pcie_us_axi_master_rd.v b/rtl/pcie_us_axi_master_rd.v index 046bcb804..a2d1f6387 100644 --- a/rtl/pcie_us_axi_master_rd.v +++ b/rtl/pcie_us_axi_master_rd.v @@ -306,7 +306,7 @@ assign status_error_uncor = status_error_uncor_reg; always @* begin casez (first_be_next) - 4'b0000: single_dword_len = 3'd0; + 4'b0000: single_dword_len = 3'd1; 4'b0001: single_dword_len = 3'd1; 4'b0010: single_dword_len = 3'd1; 4'b0100: single_dword_len = 3'd1; diff --git a/rtl/pcie_us_axil_master.v b/rtl/pcie_us_axil_master.v index df543f3b6..ae3b5b28d 100644 --- a/rtl/pcie_us_axil_master.v +++ b/rtl/pcie_us_axil_master.v @@ -272,7 +272,7 @@ always @* begin endcase m_axis_cc_tdata_int[9:8] = at_reg; casez (first_be_reg) - 4'b0000: m_axis_cc_tdata_int[28:16] = 13'd0; // Byte count + 4'b0000: m_axis_cc_tdata_int[28:16] = 13'd1; // Byte count 4'b0001: m_axis_cc_tdata_int[28:16] = 13'd1; // Byte count 4'b0010: m_axis_cc_tdata_int[28:16] = 13'd1; // Byte count 4'b0100: m_axis_cc_tdata_int[28:16] = 13'd1; // Byte count diff --git a/tb/pcie_us_axi_master/test_pcie_us_axi_master.py b/tb/pcie_us_axi_master/test_pcie_us_axi_master.py index 7058e5c44..b8a0a84e4 100644 --- a/tb/pcie_us_axi_master/test_pcie_us_axi_master.py +++ b/tb/pcie_us_axi_master/test_pcie_us_axi_master.py @@ -157,7 +157,7 @@ async def run_test_write(dut, idle_inserter=None, backpressure_inserter=None): dev_bar0 = tb.rc.tree[0][0].bar_addr[0] - for length in list(range(1, byte_lanes*2))+[1024]: + for length in list(range(0, byte_lanes*2))+[1024]: for pcie_offset in range(byte_lanes): tb.log.info("length %d, pcie_offset %d", length, pcie_offset) pcie_addr = pcie_offset+0x1000 @@ -196,7 +196,7 @@ async def run_test_read(dut, idle_inserter=None, backpressure_inserter=None): dev_bar0 = tb.rc.tree[0][0].bar_addr[0] - for length in list(range(1, byte_lanes*2))+[1024]: + for length in list(range(0, byte_lanes*2))+[1024]: for pcie_offset in range(byte_lanes): tb.log.info("length %d, pcie_offset %d", length, pcie_offset) pcie_addr = pcie_offset+0x1000 diff --git a/tb/pcie_us_axi_master_rd/test_pcie_us_axi_master_rd.py b/tb/pcie_us_axi_master_rd/test_pcie_us_axi_master_rd.py index f995d2d68..84834c079 100644 --- a/tb/pcie_us_axi_master_rd/test_pcie_us_axi_master_rd.py +++ b/tb/pcie_us_axi_master_rd/test_pcie_us_axi_master_rd.py @@ -154,7 +154,7 @@ async def run_test_read(dut, idle_inserter=None, backpressure_inserter=None): dev_bar0 = tb.rc.tree[0][0].bar_addr[0] - for length in list(range(1, byte_lanes*2))+[1024]: + for length in list(range(0, byte_lanes*2))+[1024]: for pcie_offset in list(range(byte_lanes))+list(range(4096-byte_lanes, 4096)): tb.log.info("length %d, pcie_offset %d", length, pcie_offset) pcie_addr = pcie_offset+0x1000 diff --git a/tb/pcie_us_axi_master_wr/test_pcie_us_axi_master_wr.py b/tb/pcie_us_axi_master_wr/test_pcie_us_axi_master_wr.py index 629394588..88d689275 100644 --- a/tb/pcie_us_axi_master_wr/test_pcie_us_axi_master_wr.py +++ b/tb/pcie_us_axi_master_wr/test_pcie_us_axi_master_wr.py @@ -139,7 +139,7 @@ async def run_test_write(dut, idle_inserter=None, backpressure_inserter=None): dev_bar0 = tb.rc.tree[0][0].bar_addr[0] - for length in list(range(1, byte_lanes*2))+[1024]: + for length in list(range(0, byte_lanes*2))+[1024]: for pcie_offset in list(range(byte_lanes))+list(range(4096-byte_lanes, 4096)): tb.log.info("length %d, pcie_offset %d", length, pcie_offset) pcie_addr = pcie_offset+0x1000 diff --git a/tb/pcie_us_axil_master/test_pcie_us_axil_master.py b/tb/pcie_us_axil_master/test_pcie_us_axil_master.py index a87e3ad3c..5def3e5be 100644 --- a/tb/pcie_us_axil_master/test_pcie_us_axil_master.py +++ b/tb/pcie_us_axil_master/test_pcie_us_axil_master.py @@ -154,7 +154,7 @@ async def run_test_write(dut, idle_inserter=None, backpressure_inserter=None): dev_bar0 = tb.rc.tree[0][0].bar_addr[0] dev_bar1 = tb.rc.tree[0][0].bar_addr[1] - for length in range(1, 5): + for length in range(0, 5): for pcie_offset in range(4-length+1): tb.log.info("length %d, pcie_offset %d", length, pcie_offset) pcie_addr = pcie_offset+0x1000 @@ -192,7 +192,7 @@ async def run_test_read(dut, idle_inserter=None, backpressure_inserter=None): dev_bar0 = tb.rc.tree[0][0].bar_addr[0] dev_bar1 = tb.rc.tree[0][0].bar_addr[1] - for length in range(1, 5): + for length in range(0, 5): for pcie_offset in range(4-length+1): tb.log.info("length %d, pcie_offset %d", length, pcie_offset) pcie_addr = pcie_offset+0x1000 @@ -355,7 +355,13 @@ def cycle_pause(): if cocotb.SIM_NAME: - for test in [run_test_write, run_test_read, run_test_io_write, run_test_io_read, run_test_bad_ops]: + for test in [ + run_test_write, + run_test_read, + run_test_io_write, + run_test_io_read, + run_test_bad_ops + ]: factory = TestFactory(test) factory.add_option("idle_inserter", [None, cycle_pause]) From 59c026b1b86c8de4d387f914f573472aa5b70b21 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Sat, 24 Jul 2021 02:02:30 -0700 Subject: [PATCH 2/3] Fix parameters --- tb/dma_if_pcie_us/Makefile | 4 ++-- tb/dma_if_pcie_us_rd/Makefile | 4 ++-- tb/dma_if_pcie_us_wr/Makefile | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tb/dma_if_pcie_us/Makefile b/tb/dma_if_pcie_us/Makefile index 5e10028c5..135879c96 100644 --- a/tb/dma_if_pcie_us/Makefile +++ b/tb/dma_if_pcie_us/Makefile @@ -94,8 +94,8 @@ else ifeq ($(SIM), verilator) COMPILE_ARGS += -GAXIS_PCIE_KEEP_WIDTH=$(PARAM_AXIS_PCIE_KEEP_WIDTH) COMPILE_ARGS += -GAXIS_PCIE_RQ_USER_WIDTH=$(PARAM_AXIS_PCIE_RQ_USER_WIDTH) COMPILE_ARGS += -GAXIS_PCIE_RC_USER_WIDTH=$(PARAM_AXIS_PCIE_RC_USER_WIDTH) - COMPILE_ARGS += -GAXIS_RQ_SEQ_NUM_WIDTH=$(PARAM_RQ_SEQ_NUM_WIDTH) - COMPILE_ARGS += -GAXIS_RQ_SEQ_NUM_ENABLE=$(PARAM_RQ_SEQ_NUM_ENABLE) + COMPILE_ARGS += -GRQ_SEQ_NUM_WIDTH=$(PARAM_RQ_SEQ_NUM_WIDTH) + COMPILE_ARGS += -GRQ_SEQ_NUM_ENABLE=$(PARAM_RQ_SEQ_NUM_ENABLE) COMPILE_ARGS += -GSEG_COUNT=$(PARAM_SEG_COUNT) COMPILE_ARGS += -GSEG_DATA_WIDTH=$(PARAM_SEG_DATA_WIDTH) COMPILE_ARGS += -GSEG_ADDR_WIDTH=$(PARAM_SEG_ADDR_WIDTH) diff --git a/tb/dma_if_pcie_us_rd/Makefile b/tb/dma_if_pcie_us_rd/Makefile index a96433cd3..863d9fa48 100644 --- a/tb/dma_if_pcie_us_rd/Makefile +++ b/tb/dma_if_pcie_us_rd/Makefile @@ -86,8 +86,8 @@ else ifeq ($(SIM), verilator) COMPILE_ARGS += -GAXIS_PCIE_KEEP_WIDTH=$(PARAM_AXIS_PCIE_KEEP_WIDTH) COMPILE_ARGS += -GAXIS_PCIE_RQ_USER_WIDTH=$(PARAM_AXIS_PCIE_RQ_USER_WIDTH) COMPILE_ARGS += -GAXIS_PCIE_RC_USER_WIDTH=$(PARAM_AXIS_PCIE_RC_USER_WIDTH) - COMPILE_ARGS += -GAXIS_RQ_SEQ_NUM_WIDTH=$(PARAM_RQ_SEQ_NUM_WIDTH) - COMPILE_ARGS += -GAXIS_RQ_SEQ_NUM_ENABLE=$(PARAM_RQ_SEQ_NUM_ENABLE) + COMPILE_ARGS += -GRQ_SEQ_NUM_WIDTH=$(PARAM_RQ_SEQ_NUM_WIDTH) + COMPILE_ARGS += -GRQ_SEQ_NUM_ENABLE=$(PARAM_RQ_SEQ_NUM_ENABLE) COMPILE_ARGS += -GSEG_COUNT=$(PARAM_SEG_COUNT) COMPILE_ARGS += -GSEG_DATA_WIDTH=$(PARAM_SEG_DATA_WIDTH) COMPILE_ARGS += -GSEG_ADDR_WIDTH=$(PARAM_SEG_ADDR_WIDTH) diff --git a/tb/dma_if_pcie_us_wr/Makefile b/tb/dma_if_pcie_us_wr/Makefile index 89be4df9b..37a923e19 100644 --- a/tb/dma_if_pcie_us_wr/Makefile +++ b/tb/dma_if_pcie_us_wr/Makefile @@ -81,8 +81,8 @@ else ifeq ($(SIM), verilator) COMPILE_ARGS += -GAXIS_PCIE_DATA_WIDTH=$(PARAM_AXIS_PCIE_DATA_WIDTH) COMPILE_ARGS += -GAXIS_PCIE_KEEP_WIDTH=$(PARAM_AXIS_PCIE_KEEP_WIDTH) COMPILE_ARGS += -GAXIS_PCIE_RQ_USER_WIDTH=$(PARAM_AXIS_PCIE_RQ_USER_WIDTH) - COMPILE_ARGS += -GAXIS_RQ_SEQ_NUM_WIDTH=$(PARAM_RQ_SEQ_NUM_WIDTH) - COMPILE_ARGS += -GAXIS_RQ_SEQ_NUM_ENABLE=$(PARAM_RQ_SEQ_NUM_ENABLE) + COMPILE_ARGS += -GRQ_SEQ_NUM_WIDTH=$(PARAM_RQ_SEQ_NUM_WIDTH) + COMPILE_ARGS += -GRQ_SEQ_NUM_ENABLE=$(PARAM_RQ_SEQ_NUM_ENABLE) COMPILE_ARGS += -GSEG_COUNT=$(PARAM_SEG_COUNT) COMPILE_ARGS += -GSEG_DATA_WIDTH=$(PARAM_SEG_DATA_WIDTH) COMPILE_ARGS += -GSEG_ADDR_WIDTH=$(PARAM_SEG_ADDR_WIDTH) From dad637bd00bf3a691eb2b8c0a461c4e3212a8779 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Sun, 25 Jul 2021 01:36:40 -0700 Subject: [PATCH 3/3] Properly handle zero-length DMA operations --- rtl/dma_if_pcie_us_rd.v | 67 ++++++++++++++----- rtl/dma_if_pcie_us_wr.v | 31 ++++++++- rtl/pcie_us_axi_dma_rd.v | 39 +++++++++-- rtl/pcie_us_axi_dma_wr.v | 30 +++++++-- tb/dma_if_pcie_us/test_dma_if_pcie_us.py | 4 +- .../test_dma_if_pcie_us_rd.py | 2 +- .../test_dma_if_pcie_us_wr.py | 2 +- tb/pcie_us_axi_dma/test_pcie_us_axi_dma.py | 4 +- .../test_pcie_us_axi_dma_rd.py | 2 +- .../test_pcie_us_axi_dma_wr.py | 2 +- 10 files changed, 145 insertions(+), 38 deletions(-) diff --git a/rtl/dma_if_pcie_us_rd.v b/rtl/dma_if_pcie_us_rd.v index 287d7de49..4d7a64572 100644 --- a/rtl/dma_if_pcie_us_rd.v +++ b/rtl/dma_if_pcie_us_rd.v @@ -334,6 +334,7 @@ reg [RAM_SEL_WIDTH-1:0] req_ram_sel_reg = {RAM_SEL_WIDTH{1'b0}}, req_ram_sel_nex reg [RAM_ADDR_WIDTH-1:0] req_ram_addr_reg = {RAM_ADDR_WIDTH{1'b0}}, req_ram_addr_next; reg [LEN_WIDTH-1:0] req_op_count_reg = {LEN_WIDTH{1'b0}}, req_op_count_next; reg [12:0] req_tlp_count_reg = 13'd0, req_tlp_count_next; +reg req_zero_len_reg = 1'b0, req_zero_len_next; reg [OP_TAG_WIDTH-1:0] req_op_tag_reg = {OP_TAG_WIDTH{1'b0}}, req_op_tag_next; reg [PCIE_TAG_WIDTH-1:0] req_pcie_tag_reg = {PCIE_TAG_WIDTH{1'b0}}, req_pcie_tag_next; reg req_pcie_tag_valid_reg = 1'b0, req_pcie_tag_valid_next; @@ -346,6 +347,7 @@ reg [RAM_ADDR_WIDTH-1:0] addr_reg = {RAM_ADDR_WIDTH{1'b0}}, addr_next; reg [RAM_ADDR_WIDTH-1:0] addr_delay_reg = {RAM_ADDR_WIDTH{1'b0}}, addr_delay_next; reg [9:0] op_dword_count_reg = 10'd0, op_dword_count_next; reg [12:0] op_count_reg = 13'd0, op_count_next; +reg zero_len_reg = 1'b0, zero_len_next; reg [SEG_COUNT-1:0] ram_mask_reg = {SEG_COUNT{1'b0}}, ram_mask_next; reg [SEG_COUNT-1:0] ram_mask_0_reg = {SEG_COUNT{1'b0}}, ram_mask_0_next; reg [SEG_COUNT-1:0] ram_mask_1_reg = {SEG_COUNT{1'b0}}, ram_mask_1_next; @@ -434,6 +436,7 @@ reg [PCIE_TAG_WIDTH-1:0] pcie_tag_table_start_ptr_reg = 0, pcie_tag_table_start_ reg [RAM_SEL_WIDTH-1:0] pcie_tag_table_start_ram_sel_reg = 0, pcie_tag_table_start_ram_sel_next; reg [RAM_ADDR_WIDTH-1:0] pcie_tag_table_start_ram_addr_reg = 0, pcie_tag_table_start_ram_addr_next; reg [OP_TAG_WIDTH-1:0] pcie_tag_table_start_op_tag_reg = 0, pcie_tag_table_start_op_tag_next; +reg pcie_tag_table_start_zero_len_reg = 1'b0, pcie_tag_table_start_zero_len_next; reg pcie_tag_table_start_en_reg = 1'b0, pcie_tag_table_start_en_next; reg [PCIE_TAG_WIDTH-1:0] pcie_tag_table_finish_ptr; reg pcie_tag_table_finish_en; @@ -441,6 +444,7 @@ reg pcie_tag_table_finish_en; reg [RAM_SEL_WIDTH-1:0] pcie_tag_table_ram_sel[(2**PCIE_TAG_WIDTH)-1:0]; reg [RAM_ADDR_WIDTH-1:0] pcie_tag_table_ram_addr[(2**PCIE_TAG_WIDTH)-1:0]; reg [OP_TAG_WIDTH-1:0] pcie_tag_table_op_tag[(2**PCIE_TAG_WIDTH)-1:0]; +reg pcie_tag_table_zero_len[(2**PCIE_TAG_WIDTH)-1:0]; reg pcie_tag_table_active_a[(2**PCIE_TAG_WIDTH)-1:0]; reg pcie_tag_table_active_b[(2**PCIE_TAG_WIDTH)-1:0]; @@ -495,6 +499,7 @@ initial begin pcie_tag_table_ram_sel[i] = 0; pcie_tag_table_ram_addr[i] = 0; pcie_tag_table_op_tag[i] = 0; + pcie_tag_table_zero_len[i] = 0; pcie_tag_table_active_a[i] = 0; pcie_tag_table_active_b[i] = 0; end @@ -510,6 +515,7 @@ always @* begin req_ram_addr_next = req_ram_addr_reg; req_op_count_next = req_op_count_reg; req_tlp_count_next = req_tlp_count_reg; + req_zero_len_next = req_zero_len_reg; req_op_tag_next = req_op_tag_reg; req_pcie_tag_next = req_pcie_tag_reg; req_pcie_tag_valid_next = req_pcie_tag_valid_reg; @@ -571,6 +577,7 @@ always @* begin pcie_tag_table_start_ram_sel_next = req_ram_sel_reg; pcie_tag_table_start_ram_addr_next = req_ram_addr_reg + req_tlp_count_next; pcie_tag_table_start_op_tag_next = req_op_tag_reg; + pcie_tag_table_start_zero_len_next = req_zero_len_reg; pcie_tag_table_start_en_next = 1'b0; first_be = 4'b1111 << req_pcie_addr_reg[1:0]; @@ -591,9 +598,9 @@ always @* begin tlp_header_data[127] = 1'b0; // force ECRC if (AXIS_PCIE_DATA_WIDTH == 512) begin - tlp_tuser[3:0] = dword_count == 1 ? first_be & last_be : first_be; // first BE 0 + tlp_tuser[3:0] = req_zero_len_reg ? 4'b0000 : (dword_count == 1 ? first_be & last_be : first_be); // first BE 0 tlp_tuser[7:4] = 4'd0; // first BE 1 - tlp_tuser[11:8] = dword_count == 1 ? 4'b0000 : last_be; // last BE 0 + tlp_tuser[11:8] = req_zero_len_reg ? 4'b0000 : (dword_count == 1 ? 4'b0000 : last_be); // last BE 0 tlp_tuser[15:12] = 4'd0; // last BE 1 tlp_tuser[19:16] = 3'd0; // addr_offset tlp_tuser[21:20] = 2'b01; // is_sop @@ -611,8 +618,8 @@ always @* begin tlp_tuser[72:67] = 6'd0; // seq_num1 tlp_tuser[136:73] = 64'd0; // parity end else begin - tlp_tuser[3:0] = dword_count == 1 ? first_be & last_be : first_be; // first BE - tlp_tuser[7:4] = dword_count == 1 ? 4'b0000 : last_be; // last BE + tlp_tuser[3:0] = req_zero_len_reg ? 4'b0000 : (dword_count == 1 ? first_be & last_be : first_be); // first BE + tlp_tuser[7:4] = req_zero_len_reg ? 4'b0000 : (dword_count == 1 ? 4'b0000 : last_be); // last BE tlp_tuser[10:8] = 3'd0; // addr_offset tlp_tuser[11] = 1'b0; // discontinue tlp_tuser[12] = 1'b0; // tph_present @@ -656,7 +663,14 @@ always @* begin req_ram_sel_next = s_axis_read_desc_ram_sel; req_pcie_addr_next = s_axis_read_desc_pcie_addr; req_ram_addr_next = s_axis_read_desc_ram_addr; - req_op_count_next = s_axis_read_desc_len; + if (s_axis_read_desc_len == 0) begin + // zero-length operation + req_op_count_next = 1; + req_zero_len_next = 1'b1; + end else begin + req_op_count_next = s_axis_read_desc_len; + req_zero_len_next = 1'b0; + end req_op_tag_next = op_tag_fifo_mem[op_tag_fifo_rd_ptr_reg[OP_TAG_WIDTH-1:0]]; op_table_start_ptr = op_tag_fifo_mem[op_tag_fifo_rd_ptr_reg[OP_TAG_WIDTH-1:0]]; op_table_start_tag = s_axis_read_desc_tag; @@ -682,6 +696,7 @@ always @* begin pcie_tag_table_start_ram_sel_next = req_ram_sel_reg; pcie_tag_table_start_ram_addr_next = req_ram_addr_reg + req_tlp_count_next; pcie_tag_table_start_op_tag_next = req_op_tag_reg; + pcie_tag_table_start_zero_len_next = req_zero_len_reg; pcie_tag_table_start_en_next = 1'b1; op_table_read_start_ptr = req_op_tag_reg; @@ -720,6 +735,7 @@ always @* begin pcie_tag_table_start_ram_sel_next = req_ram_sel_reg; pcie_tag_table_start_ram_addr_next = req_ram_addr_reg + req_tlp_count_next; pcie_tag_table_start_op_tag_next = req_op_tag_reg; + pcie_tag_table_start_zero_len_next = req_zero_len_reg; pcie_tag_table_start_en_next = 1'b1; op_table_read_start_ptr = req_op_tag_reg; @@ -771,6 +787,7 @@ always @* begin addr_next = addr_reg; addr_delay_next = addr_delay_reg; op_count_next = op_count_reg; + zero_len_next = zero_len_reg; ram_mask_next = ram_mask_reg; ram_mask_0_next = ram_mask_0_reg; ram_mask_1_next = ram_mask_1_reg; @@ -851,6 +868,7 @@ always @* begin ram_sel_next = pcie_tag_table_ram_sel[pcie_tag_next]; addr_next = pcie_tag_table_ram_addr[pcie_tag_next] - byte_count_next; + zero_len_next = pcie_tag_table_zero_len[pcie_tag_next]; offset_next = addr_next[OFFSET_WIDTH-1:0] - (12+lower_addr_next[1:0]); @@ -915,12 +933,19 @@ always @* begin if (pcie_tag_table_active_b[pcie_tag_next] != pcie_tag_table_active_a[pcie_tag_next] && error_code_next == RC_ERROR_NORMAL_TERMINATION) begin // no error - rc_tdata_int_next = s_axis_rc_tdata; - rc_tvalid_int_next = 1'b1; - status_fifo_mask_next = 1'b1; - status_fifo_finish_next = 1'b0; - status_fifo_we_next = 1'b1; + if (zero_len_next) begin + status_fifo_mask_next = 1'b0; + status_fifo_finish_next = 1'b0; + status_fifo_we_next = 1'b1; + end else begin + rc_tdata_int_next = s_axis_rc_tdata; + rc_tvalid_int_next = 1'b1; + + status_fifo_mask_next = 1'b1; + status_fifo_finish_next = 1'b0; + status_fifo_we_next = 1'b1; + end if (last_cycle) begin if (final_cpl_next) begin @@ -1049,6 +1074,7 @@ always @* begin ram_sel_next = pcie_tag_table_ram_sel[pcie_tag_next]; addr_next = pcie_tag_table_ram_addr[pcie_tag_next] - byte_count_reg; + zero_len_next = pcie_tag_table_zero_len[pcie_tag_next]; offset_next = addr_next[OFFSET_WIDTH-1:0] - (4+lower_addr_reg[1:0]); @@ -1083,12 +1109,19 @@ always @* begin if (pcie_tag_table_active_b[pcie_tag_next] != pcie_tag_table_active_a[pcie_tag_next] && error_code_reg == RC_ERROR_NORMAL_TERMINATION) begin // no error - rc_tdata_int_next = s_axis_rc_tdata; - rc_tvalid_int_next = 1'b1; - status_fifo_mask_next = 1'b1; - status_fifo_finish_next = 1'b0; - status_fifo_we_next = 1'b1; + if (zero_len_next) begin + status_fifo_mask_next = 1'b0; + status_fifo_finish_next = 1'b0; + status_fifo_we_next = 1'b1; + end else begin + rc_tdata_int_next = s_axis_rc_tdata; + rc_tvalid_int_next = 1'b1; + + status_fifo_mask_next = 1'b1; + status_fifo_finish_next = 1'b0; + status_fifo_we_next = 1'b1; + end if (last_cycle) begin if (final_cpl_next) begin @@ -1349,6 +1382,7 @@ always @(posedge clk) begin req_ram_addr_reg <= req_ram_addr_next; req_op_count_reg <= req_op_count_next; req_tlp_count_reg <= req_tlp_count_next; + req_zero_len_reg <= req_zero_len_next; req_op_tag_reg <= req_op_tag_next; req_pcie_tag_reg <= req_pcie_tag_next; req_pcie_tag_valid_reg <= req_pcie_tag_valid_next; @@ -1360,6 +1394,7 @@ always @(posedge clk) begin addr_reg <= addr_next; addr_delay_reg <= addr_delay_next; op_count_reg <= op_count_next; + zero_len_reg <= zero_len_next; ram_mask_reg <= ram_mask_next; ram_mask_0_reg <= ram_mask_0_next; ram_mask_1_reg <= ram_mask_1_next; @@ -1427,6 +1462,7 @@ always @(posedge clk) begin pcie_tag_table_start_ram_sel_reg <= pcie_tag_table_start_ram_sel_next; pcie_tag_table_start_ram_addr_reg <= pcie_tag_table_start_ram_addr_next; pcie_tag_table_start_op_tag_reg <= pcie_tag_table_start_op_tag_next; + pcie_tag_table_start_zero_len_reg <= pcie_tag_table_start_zero_len_next; pcie_tag_table_start_en_reg <= pcie_tag_table_start_en_next; if (init_pcie_tag_reg) begin @@ -1435,6 +1471,7 @@ always @(posedge clk) begin pcie_tag_table_ram_sel[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_ram_sel_reg; pcie_tag_table_ram_addr[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_ram_addr_reg; pcie_tag_table_op_tag[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_op_tag_reg; + pcie_tag_table_zero_len[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_zero_len_reg; pcie_tag_table_active_a[pcie_tag_table_start_ptr_reg] <= !pcie_tag_table_active_b[pcie_tag_table_start_ptr_reg]; end diff --git a/rtl/dma_if_pcie_us_wr.v b/rtl/dma_if_pcie_us_wr.v index 244577f61..708b97ce6 100644 --- a/rtl/dma_if_pcie_us_wr.v +++ b/rtl/dma_if_pcie_us_wr.v @@ -302,6 +302,7 @@ reg [RAM_ADDR_WIDTH-1:0] ram_addr_reg = {RAM_ADDR_WIDTH{1'b0}}, ram_addr_next; reg [LEN_WIDTH-1:0] op_count_reg = {LEN_WIDTH{1'b0}}, op_count_next; reg [LEN_WIDTH-1:0] tr_count_reg = {LEN_WIDTH{1'b0}}, tr_count_next; reg [12:0] tlp_count_reg = 13'd0, tlp_count_next; +reg zero_len_reg = 1'b0, zero_len_next; reg [TAG_WIDTH-1:0] tag_reg = {TAG_WIDTH{1'b0}}, tag_next; reg [PCIE_ADDR_WIDTH-1:0] read_pcie_addr_reg = {PCIE_ADDR_WIDTH{1'b0}}, read_pcie_addr_next; @@ -320,6 +321,7 @@ reg [RAM_OFFSET_WIDTH-1:0] end_offset_reg = {RAM_OFFSET_WIDTH{1'b0}}, end_offset reg [PCIE_ADDR_WIDTH-1:0] tlp_addr_reg = {PCIE_ADDR_WIDTH{1'b0}}, tlp_addr_next; reg [11:0] tlp_len_reg = 12'd0, tlp_len_next; +reg tlp_zero_len_reg = 1'b0, tlp_zero_len_next; reg [RAM_OFFSET_WIDTH-1:0] offset_reg = {RAM_OFFSET_WIDTH{1'b0}}, offset_next; reg [9:0] dword_count_reg = 10'd0, dword_count_next; reg [SEG_COUNT-1:0] ram_mask_reg = {SEG_COUNT{1'b0}}, ram_mask_next; @@ -407,6 +409,7 @@ assign ram_rd_resp_ready = ram_rd_resp_ready_cmb; reg [OP_TAG_WIDTH+1-1:0] op_table_start_ptr_reg = 0; reg [PCIE_ADDR_WIDTH-1:0] op_table_start_pcie_addr; reg [11:0] op_table_start_len; +reg op_table_start_zero_len; reg [9:0] op_table_start_dword_len; reg [CYCLE_COUNT_WIDTH-1:0] op_table_start_cycle_count; reg [RAM_OFFSET_WIDTH-1:0] op_table_start_offset; @@ -424,6 +427,7 @@ reg [2**OP_TAG_WIDTH-1:0] op_table_active = 0; reg [2**OP_TAG_WIDTH-1:0] op_table_tx_done = 0; reg [PCIE_ADDR_WIDTH-1:0] op_table_pcie_addr[2**OP_TAG_WIDTH-1:0]; reg [11:0] op_table_len[2**OP_TAG_WIDTH-1:0]; +reg op_table_zero_len[2**OP_TAG_WIDTH-1:0]; reg [9:0] op_table_dword_len[2**OP_TAG_WIDTH-1:0]; reg [CYCLE_COUNT_WIDTH-1:0] op_table_cycle_count[2**OP_TAG_WIDTH-1:0]; reg [RAM_OFFSET_WIDTH-1:0] op_table_offset[2**OP_TAG_WIDTH-1:0]; @@ -436,6 +440,7 @@ initial begin for (i = 0; i < 2**OP_TAG_WIDTH; i = i + 1) begin op_table_pcie_addr[i] = 0; op_table_len[i] = 0; + op_table_zero_len[i] = 0; op_table_dword_len[i] = 0; op_table_cycle_count[i] = 0; op_table_offset[i] = 0; @@ -455,6 +460,7 @@ always @* begin op_count_next = op_count_reg; tr_count_next = tr_count_reg; tlp_count_next = tlp_count_reg; + zero_len_next = zero_len_reg; tag_next = tag_reg; read_cmd_pcie_addr_next = read_cmd_pcie_addr_reg; @@ -467,6 +473,7 @@ always @* begin op_table_start_pcie_addr = pcie_addr_reg; op_table_start_len = tlp_count_reg; + op_table_start_zero_len = zero_len_reg; op_table_start_dword_len = (tlp_count_reg + pcie_addr_reg[1:0] + 3) >> 2; op_table_start_cycle_count = 0; if (AXIS_PCIE_DATA_WIDTH >= 256) begin @@ -487,7 +494,14 @@ always @* begin pcie_addr_next = s_axis_write_desc_pcie_addr; ram_sel_next = s_axis_write_desc_ram_sel; ram_addr_next = s_axis_write_desc_ram_addr; - op_count_next = s_axis_write_desc_len; + if (s_axis_write_desc_len == 0) begin + // zero-length operation + op_count_next = 1; + zero_len_next = 1'b1; + end else begin + op_count_next = s_axis_write_desc_len; + zero_len_next = 1'b0; + end tag_next = s_axis_write_desc_tag; // TLP size computation @@ -540,6 +554,7 @@ always @* begin op_table_start_pcie_addr = pcie_addr_reg; op_table_start_len = tlp_count_reg; + op_table_start_zero_len = zero_len_reg; op_table_start_dword_len = (tlp_count_reg + pcie_addr_reg[1:0] + 3) >> 2; if (AXIS_PCIE_DATA_WIDTH >= 256) begin op_table_start_offset = 16+pcie_addr_reg[1:0]-ram_addr_reg[RAM_OFFSET_WIDTH-1:0]; @@ -756,6 +771,7 @@ always @* begin tlp_addr_next = tlp_addr_reg; tlp_len_next = tlp_len_reg; + tlp_zero_len_next = tlp_zero_len_reg; dword_count_next = dword_count_reg; offset_next = offset_reg; ram_mask_next = ram_mask_reg; @@ -983,6 +999,7 @@ always @* begin tlp_addr_next = op_table_pcie_addr[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; tlp_len_next = op_table_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; + tlp_zero_len_next = op_table_zero_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; dword_count_next = op_table_dword_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; offset_next = op_table_offset[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; cycle_count_next = op_table_cycle_count[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; @@ -1000,8 +1017,13 @@ always @* begin if (!tlp_header_valid_next) begin tlp_header_data_next = tlp_header_data; - tlp_first_be_next = dword_count_reg == 1 ? first_be & last_be : first_be; - tlp_last_be_next = dword_count_reg == 1 ? 4'b0000 : last_be; + if (tlp_zero_len_reg) begin + tlp_first_be_next = 4'b0000; + tlp_last_be_next = 4'b0000; + end else begin + tlp_first_be_next = dword_count_reg == 1 ? first_be & last_be : first_be; + tlp_last_be_next = dword_count_reg == 1 ? 4'b0000 : last_be; + end tlp_seq_num_next = op_table_tx_finish_ptr_reg[OP_TAG_WIDTH-1:0] & SEQ_NUM_MASK; end @@ -1157,6 +1179,7 @@ always @(posedge clk) begin op_count_reg <= op_count_next; tr_count_reg <= tr_count_next; tlp_count_reg <= tlp_count_next; + zero_len_reg <= zero_len_next; tag_reg <= tag_next; read_pcie_addr_reg <= read_pcie_addr_next; @@ -1175,6 +1198,7 @@ always @(posedge clk) begin tlp_addr_reg <= tlp_addr_next; tlp_len_reg <= tlp_len_next; + tlp_zero_len_reg <= tlp_zero_len_next; dword_count_reg <= dword_count_next; offset_reg <= offset_next; ram_mask_reg <= ram_mask_next; @@ -1243,6 +1267,7 @@ always @(posedge clk) begin op_table_tx_done[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= 1'b0; op_table_pcie_addr[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_pcie_addr; op_table_len[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_len; + op_table_zero_len[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_zero_len; op_table_dword_len[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_dword_len; op_table_cycle_count[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_cycle_count; op_table_offset[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_offset; diff --git a/rtl/pcie_us_axi_dma_rd.v b/rtl/pcie_us_axi_dma_rd.v index 791fba3ac..b3e99d38c 100644 --- a/rtl/pcie_us_axi_dma_rd.v +++ b/rtl/pcie_us_axi_dma_rd.v @@ -339,6 +339,7 @@ reg [PCIE_ADDR_WIDTH-1:0] req_pcie_addr_reg = {PCIE_ADDR_WIDTH{1'b0}}, req_pcie_ reg [AXI_ADDR_WIDTH-1:0] req_axi_addr_reg = {AXI_ADDR_WIDTH{1'b0}}, req_axi_addr_next; reg [LEN_WIDTH-1:0] req_op_count_reg = {LEN_WIDTH{1'b0}}, req_op_count_next; reg [12:0] req_tlp_count_reg = 13'd0, req_tlp_count_next; +reg req_zero_len_reg = 1'b0, req_zero_len_next; reg [OP_TAG_WIDTH-1:0] req_op_tag_reg = {OP_TAG_WIDTH{1'b0}}, req_op_tag_next; reg [PCIE_TAG_WIDTH-1:0] req_pcie_tag_reg = {PCIE_TAG_WIDTH{1'b0}}, req_pcie_tag_next; reg req_pcie_tag_valid_reg = 1'b0, req_pcie_tag_valid_next; @@ -350,6 +351,7 @@ reg [AXI_ADDR_WIDTH-1:0] axi_addr_reg = {AXI_ADDR_WIDTH{1'b0}}, axi_addr_next; reg [9:0] op_dword_count_reg = 10'd0, op_dword_count_next; reg [12:0] op_count_reg = 13'd0, op_count_next; reg [12:0] tr_count_reg = 13'd0, tr_count_next; +reg zero_len_reg = 1'b0, zero_len_next; reg [CYCLE_COUNT_WIDTH-1:0] input_cycle_count_reg = {CYCLE_COUNT_WIDTH{1'b0}}, input_cycle_count_next; reg [CYCLE_COUNT_WIDTH-1:0] output_cycle_count_reg = {CYCLE_COUNT_WIDTH{1'b0}}, output_cycle_count_next; reg input_active_reg = 1'b0, input_active_next; @@ -435,12 +437,14 @@ assign status_error_uncor = status_error_uncor_reg; reg [PCIE_TAG_WIDTH-1:0] pcie_tag_table_start_ptr_reg = 0, pcie_tag_table_start_ptr_next; reg [AXI_ADDR_WIDTH-1:0] pcie_tag_table_start_axi_addr_reg = 0, pcie_tag_table_start_axi_addr_next; reg [OP_TAG_WIDTH-1:0] pcie_tag_table_start_op_tag_reg = 0, pcie_tag_table_start_op_tag_next; +reg pcie_tag_table_start_zero_len_reg = 1'b0, pcie_tag_table_start_zero_len_next; reg pcie_tag_table_start_en_reg = 1'b0, pcie_tag_table_start_en_next; reg [PCIE_TAG_WIDTH-1:0] pcie_tag_table_finish_ptr; reg pcie_tag_table_finish_en; reg [AXI_ADDR_WIDTH-1:0] pcie_tag_table_axi_addr[(2**PCIE_TAG_WIDTH)-1:0]; reg [OP_TAG_WIDTH-1:0] pcie_tag_table_op_tag[(2**PCIE_TAG_WIDTH)-1:0]; +reg pcie_tag_table_zero_len[(2**PCIE_TAG_WIDTH)-1:0]; reg pcie_tag_table_active_a[(2**PCIE_TAG_WIDTH)-1:0]; reg pcie_tag_table_active_b[(2**PCIE_TAG_WIDTH)-1:0]; @@ -510,6 +514,7 @@ initial begin for (i = 0; i < 2**PCIE_TAG_WIDTH; i = i + 1) begin pcie_tag_table_axi_addr[i] = 0; pcie_tag_table_op_tag[i] = 0; + pcie_tag_table_zero_len[i] = 0; pcie_tag_table_active_a[i] = 0; pcie_tag_table_active_b[i] = 0; end @@ -524,6 +529,7 @@ always @* begin req_axi_addr_next = req_axi_addr_reg; req_op_count_next = req_op_count_reg; req_tlp_count_next = req_tlp_count_reg; + req_zero_len_next = req_zero_len_reg; req_op_tag_next = req_op_tag_reg; req_pcie_tag_next = req_pcie_tag_reg; req_pcie_tag_valid_next = req_pcie_tag_valid_reg; @@ -584,6 +590,7 @@ always @* begin pcie_tag_table_start_ptr_next = req_pcie_tag_reg; pcie_tag_table_start_axi_addr_next = req_axi_addr_reg + req_tlp_count_next; pcie_tag_table_start_op_tag_next = req_op_tag_reg; + pcie_tag_table_start_zero_len_next = req_zero_len_reg; pcie_tag_table_start_en_next = 1'b0; first_be = 4'b1111 << req_pcie_addr_reg[1:0]; @@ -604,9 +611,9 @@ always @* begin tlp_header_data[127] = 1'b0; // force ECRC if (AXIS_PCIE_DATA_WIDTH == 512) begin - tlp_tuser[3:0] = dword_count == 1 ? first_be & last_be : first_be; // first BE 0 + tlp_tuser[3:0] = req_zero_len_reg ? 4'b0000 : (dword_count == 1 ? first_be & last_be : first_be); // first BE 0 tlp_tuser[7:4] = 4'd0; // first BE 1 - tlp_tuser[11:8] = dword_count == 1 ? 4'b0000 : last_be; // last BE 0 + tlp_tuser[11:8] = req_zero_len_reg ? 4'b0000 : (dword_count == 1 ? 4'b0000 : last_be); // last BE 0 tlp_tuser[15:12] = 4'd0; // last BE 1 tlp_tuser[19:16] = 3'd0; // addr_offset tlp_tuser[21:20] = 2'b01; // is_sop @@ -624,8 +631,8 @@ always @* begin tlp_tuser[72:67] = 6'd0; // seq_num1 tlp_tuser[136:73] = 64'd0; // parity end else begin - tlp_tuser[3:0] = dword_count == 1 ? first_be & last_be : first_be; // first BE - tlp_tuser[7:4] = dword_count == 1 ? 4'b0000 : last_be; // last BE + tlp_tuser[3:0] = req_zero_len_reg ? 4'b0000 : (dword_count == 1 ? first_be & last_be : first_be); // first BE + tlp_tuser[7:4] = req_zero_len_reg ? 4'b0000 : (dword_count == 1 ? 4'b0000 : last_be); // last BE tlp_tuser[10:8] = 3'd0; // addr_offset tlp_tuser[11] = 1'b0; // discontinue tlp_tuser[12] = 1'b0; // tph_present @@ -668,7 +675,14 @@ always @* begin s_axis_read_desc_ready_next = 1'b0; req_pcie_addr_next = s_axis_read_desc_pcie_addr; req_axi_addr_next = s_axis_read_desc_axi_addr; - req_op_count_next = s_axis_read_desc_len; + if (s_axis_read_desc_len == 0) begin + // zero-length operation + req_op_count_next = 1; + req_zero_len_next = 1'b1; + end else begin + req_op_count_next = s_axis_read_desc_len; + req_zero_len_next = 1'b0; + end req_op_tag_next = op_tag_fifo_mem[op_tag_fifo_rd_ptr_reg[OP_TAG_WIDTH-1:0]]; op_table_start_ptr = op_tag_fifo_mem[op_tag_fifo_rd_ptr_reg[OP_TAG_WIDTH-1:0]]; op_table_start_tag = s_axis_read_desc_tag; @@ -694,6 +708,7 @@ always @* begin pcie_tag_table_start_ptr_next = req_pcie_tag_reg; pcie_tag_table_start_axi_addr_next = req_axi_addr_reg + req_tlp_count_next; pcie_tag_table_start_op_tag_next = req_op_tag_reg; + pcie_tag_table_start_zero_len_next = req_zero_len_reg; pcie_tag_table_start_en_next = 1'b1; op_table_read_start_ptr = req_op_tag_reg; @@ -731,6 +746,7 @@ always @* begin pcie_tag_table_start_ptr_next = req_pcie_tag_reg; pcie_tag_table_start_axi_addr_next = req_axi_addr_reg + req_tlp_count_next; pcie_tag_table_start_op_tag_next = req_op_tag_reg; + pcie_tag_table_start_zero_len_next = req_zero_len_reg; pcie_tag_table_start_en_next = 1'b1; op_table_read_start_ptr = req_op_tag_reg; @@ -781,6 +797,7 @@ always @* begin axi_addr_next = axi_addr_reg; op_count_next = op_count_reg; tr_count_next = tr_count_reg; + zero_len_next = zero_len_reg; op_dword_count_next = op_dword_count_reg; input_cycle_count_next = input_cycle_count_reg; output_cycle_count_next = output_cycle_count_reg; @@ -859,6 +876,7 @@ always @* begin end axi_addr_next = pcie_tag_table_axi_addr[pcie_tag_next] - byte_count_next; + zero_len_next = pcie_tag_table_zero_len[pcie_tag_next]; offset_next = axi_addr_next[OFFSET_WIDTH-1:0] - (12+lower_addr_next[1:0]); bubble_cycle_next = axi_addr_next[OFFSET_WIDTH-1:0] < 12+lower_addr_next[1:0]; @@ -987,6 +1005,7 @@ always @* begin //s_axis_rc_tdata[30:28]; // attr axi_addr_next = pcie_tag_table_axi_addr[pcie_tag_next] - byte_count_reg; + zero_len_next = pcie_tag_table_zero_len[pcie_tag_next]; offset_next = axi_addr_next[OFFSET_WIDTH-1:0] - (4+lower_addr_reg[1:0]); bubble_cycle_next = axi_addr_next[OFFSET_WIDTH-1:0] < 4+lower_addr_reg[1:0]; @@ -1125,7 +1144,9 @@ always @* begin end else begin m_axi_wdata_int = shift_axis_tdata; end - if (first_cycle_reg) begin + if (zero_len_reg) begin + m_axi_wstrb_int = {AXI_STRB_WIDTH{1'b0}}; + end else if (first_cycle_reg) begin m_axi_wstrb_int = {AXI_STRB_WIDTH{1'b1}} << first_cycle_offset_reg; end else begin m_axi_wstrb_int = {AXI_STRB_WIDTH{1'b1}}; @@ -1139,7 +1160,7 @@ always @* begin last_cycle_next = output_cycle_count_next == 0; if (last_cycle_reg) begin - if (last_cycle_offset_reg != 0 && op_count_reg == 0) begin + if (last_cycle_offset_reg != 0 && op_count_reg == 0 && !zero_len_reg) begin m_axi_wstrb_int = m_axi_wstrb_int & {AXI_STRB_WIDTH{1'b1}} >> (AXI_STRB_WIDTH-last_cycle_offset_reg); end m_axi_wlast_int = 1'b1; @@ -1342,6 +1363,7 @@ always @(posedge clk) begin req_axi_addr_reg <= req_axi_addr_next; req_op_count_reg <= req_op_count_next; req_tlp_count_reg <= req_tlp_count_next; + req_zero_len_reg <= req_zero_len_next; req_op_tag_reg <= req_op_tag_next; req_pcie_tag_reg <= req_pcie_tag_next; req_pcie_tag_valid_reg <= req_pcie_tag_valid_next; @@ -1352,6 +1374,7 @@ always @(posedge clk) begin axi_addr_reg <= axi_addr_next; op_count_reg <= op_count_next; tr_count_reg <= tr_count_next; + zero_len_reg <= zero_len_next; op_dword_count_reg <= op_dword_count_next; input_cycle_count_reg <= input_cycle_count_next; output_cycle_count_reg <= output_cycle_count_next; @@ -1408,6 +1431,7 @@ always @(posedge clk) begin pcie_tag_table_start_ptr_reg <= pcie_tag_table_start_ptr_next; pcie_tag_table_start_axi_addr_reg <= pcie_tag_table_start_axi_addr_next; pcie_tag_table_start_op_tag_reg <= pcie_tag_table_start_op_tag_next; + pcie_tag_table_start_zero_len_reg <= pcie_tag_table_start_zero_len_next; pcie_tag_table_start_en_reg <= pcie_tag_table_start_en_next; if (init_pcie_tag_reg) begin @@ -1415,6 +1439,7 @@ always @(posedge clk) begin end else if (pcie_tag_table_start_en_reg) begin pcie_tag_table_axi_addr[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_axi_addr_reg; pcie_tag_table_op_tag[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_op_tag_reg; + pcie_tag_table_zero_len[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_zero_len_reg; pcie_tag_table_active_a[pcie_tag_table_start_ptr_reg] <= !pcie_tag_table_active_b[pcie_tag_table_start_ptr_reg]; end diff --git a/rtl/pcie_us_axi_dma_wr.v b/rtl/pcie_us_axi_dma_wr.v index ea666b7c3..3bd8bd98f 100644 --- a/rtl/pcie_us_axi_dma_wr.v +++ b/rtl/pcie_us_axi_dma_wr.v @@ -290,9 +290,11 @@ reg [AXI_ADDR_WIDTH-1:0] axi_addr_reg = {AXI_ADDR_WIDTH{1'b0}}, axi_addr_next; reg [LEN_WIDTH-1:0] op_count_reg = {LEN_WIDTH{1'b0}}, op_count_next; reg [LEN_WIDTH-1:0] tr_count_reg = {LEN_WIDTH{1'b0}}, tr_count_next; reg [12:0] tlp_count_reg = 13'd0, tlp_count_next; +reg zero_len_reg = 1'b0, zero_len_next; reg [PCIE_ADDR_WIDTH-1:0] tlp_addr_reg = {PCIE_ADDR_WIDTH{1'b0}}, tlp_addr_next; reg [11:0] tlp_len_reg = 12'd0, tlp_len_next; +reg tlp_zero_len_reg = 1'b0, tlp_zero_len_next; reg [OFFSET_WIDTH-1:0] offset_reg = {OFFSET_WIDTH{1'b0}}, offset_next; reg [9:0] dword_count_reg = 10'd0, dword_count_next; reg [CYCLE_COUNT_WIDTH-1:0] input_cycle_count_reg = {CYCLE_COUNT_WIDTH{1'b0}}, input_cycle_count_next; @@ -370,6 +372,7 @@ assign m_axi_rready = m_axi_rready_reg; reg [OP_TAG_WIDTH+1-1:0] op_table_start_ptr_reg = 0; reg [PCIE_ADDR_WIDTH-1:0] op_table_start_pcie_addr; reg [11:0] op_table_start_len; +reg op_table_start_zero_len; reg [9:0] op_table_start_dword_len; reg [CYCLE_COUNT_WIDTH-1:0] op_table_start_input_cycle_count; reg [CYCLE_COUNT_WIDTH-1:0] op_table_start_output_cycle_count; @@ -389,6 +392,7 @@ reg [2**OP_TAG_WIDTH-1:0] op_table_active = 0; reg [2**OP_TAG_WIDTH-1:0] op_table_tx_done = 0; reg [PCIE_ADDR_WIDTH-1:0] op_table_pcie_addr[2**OP_TAG_WIDTH-1:0]; reg [11:0] op_table_len[2**OP_TAG_WIDTH-1:0]; +reg op_table_zero_len[2**OP_TAG_WIDTH-1:0]; reg [9:0] op_table_dword_len[2**OP_TAG_WIDTH-1:0]; reg [CYCLE_COUNT_WIDTH-1:0] op_table_input_cycle_count[2**OP_TAG_WIDTH-1:0]; reg [CYCLE_COUNT_WIDTH-1:0] op_table_output_cycle_count[2**OP_TAG_WIDTH-1:0]; @@ -403,6 +407,7 @@ initial begin for (i = 0; i < 2**OP_TAG_WIDTH; i = i + 1) begin op_table_pcie_addr[i] = 0; op_table_len[i] = 0; + op_table_zero_len[i] = 0; op_table_dword_len[i] = 0; op_table_input_cycle_count[i] = 0; op_table_output_cycle_count[i] = 0; @@ -427,6 +432,7 @@ always @* begin op_count_next = op_count_reg; tr_count_next = tr_count_reg; tlp_count_next = tlp_count_reg; + zero_len_next = zero_len_reg; tlp_cmd_tag_next = tlp_cmd_tag_reg; tlp_cmd_last_next = tlp_cmd_last_reg; @@ -511,6 +517,7 @@ always @* begin op_table_start_pcie_addr = pcie_addr_reg; op_table_start_len = tlp_count; + op_table_start_zero_len = zero_len_reg; op_table_start_dword_len = dword_count; op_table_start_input_cycle_count = (tlp_count + axi_addr_reg[OFFSET_WIDTH-1:0] - 1) >> AXI_BURST_SIZE; if (AXIS_PCIE_DATA_WIDTH >= 256) begin @@ -537,7 +544,14 @@ always @* begin pcie_addr_next = s_axis_write_desc_pcie_addr; axi_addr_next = s_axis_write_desc_axi_addr; - op_count_next = s_axis_write_desc_len; + if (s_axis_write_desc_len == 0) begin + // zero-length operation + op_count_next = 1; + zero_len_next = 1'b1; + end else begin + op_count_next = s_axis_write_desc_len; + zero_len_next = 1'b0; + end if (s_axis_write_desc_ready & s_axis_write_desc_valid) begin s_axis_write_desc_ready_next = 1'b0; @@ -553,6 +567,7 @@ always @* begin op_table_start_pcie_addr = pcie_addr_reg; op_table_start_len = tlp_count; + op_table_start_zero_len = zero_len_reg; op_table_start_dword_len = dword_count; op_table_start_input_cycle_count = (tlp_count + axi_addr_reg[OFFSET_WIDTH-1:0] - 1) >> AXI_BURST_SIZE; if (AXIS_PCIE_DATA_WIDTH >= 256) begin @@ -627,6 +642,7 @@ always @* begin tlp_addr_next = tlp_addr_reg; tlp_len_next = tlp_len_reg; + tlp_zero_len_next = tlp_zero_len_reg; dword_count_next = dword_count_reg; offset_next = offset_reg; input_cycle_count_next = input_cycle_count_reg; @@ -655,9 +671,9 @@ always @* begin tlp_header_data[127] = 1'b0; // force ECRC if (AXIS_PCIE_DATA_WIDTH == 512) begin - tlp_tuser[3:0] = dword_count_reg == 1 ? first_be & last_be : first_be; // first BE 0 + tlp_tuser[3:0] = tlp_zero_len_reg ? 4'b0000 : (dword_count_reg == 1 ? first_be & last_be : first_be); // first BE 0 tlp_tuser[7:4] = 4'd0; // first BE 1 - tlp_tuser[11:8] = dword_count_reg == 1 ? 4'b0000 : last_be; // last BE 0 + tlp_tuser[11:8] = tlp_zero_len_reg ? 4'b0000 : (dword_count_reg == 1 ? 4'b0000 : last_be); // last BE 0 tlp_tuser[15:12] = 4'd0; // last BE 1 tlp_tuser[19:16] = 3'd0; // addr_offset tlp_tuser[21:20] = 2'b01; // is_sop @@ -675,8 +691,8 @@ always @* begin tlp_tuser[72:67] = 6'd0; // seq_num1 tlp_tuser[136:73] = 64'd0; // parity end else begin - tlp_tuser[3:0] = dword_count_reg == 1 ? first_be & last_be : first_be; // first BE - tlp_tuser[7:4] = dword_count_reg == 1 ? 4'b0000 : last_be; // last BE + tlp_tuser[3:0] = tlp_zero_len_reg ? 4'b0000 : (dword_count_reg == 1 ? first_be & last_be : first_be); // first BE + tlp_tuser[7:4] = tlp_zero_len_reg ? 4'b0000 : (dword_count_reg == 1 ? 4'b0000 : last_be); // last BE tlp_tuser[10:8] = 3'd0; // addr_offset tlp_tuser[11] = 1'b0; // discontinue tlp_tuser[12] = 1'b0; // tph_present @@ -733,6 +749,7 @@ always @* begin tlp_addr_next = op_table_pcie_addr[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; tlp_len_next = op_table_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; + tlp_zero_len_next = op_table_zero_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; dword_count_next = op_table_dword_len[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; offset_next = op_table_offset[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; input_cycle_count_next = op_table_input_cycle_count[op_table_tx_start_ptr_reg[OP_TAG_WIDTH-1:0]]; @@ -1010,9 +1027,11 @@ always @(posedge clk) begin op_count_reg <= op_count_next; tr_count_reg <= tr_count_next; tlp_count_reg <= tlp_count_next; + zero_len_reg <= zero_len_next; tlp_addr_reg <= tlp_addr_next; tlp_len_reg <= tlp_len_next; + tlp_zero_len_reg <= tlp_zero_len_next; dword_count_reg <= dword_count_next; offset_reg <= offset_next; input_cycle_count_reg <= input_cycle_count_next; @@ -1062,6 +1081,7 @@ always @(posedge clk) begin op_table_tx_done[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= 1'b0; op_table_pcie_addr[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_pcie_addr; op_table_len[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_len; + op_table_zero_len[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_zero_len; op_table_dword_len[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_dword_len; op_table_input_cycle_count[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_input_cycle_count; op_table_output_cycle_count[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= op_table_start_output_cycle_count; diff --git a/tb/dma_if_pcie_us/test_dma_if_pcie_us.py b/tb/dma_if_pcie_us/test_dma_if_pcie_us.py index a1b5687bf..bd9da8857 100644 --- a/tb/dma_if_pcie_us/test_dma_if_pcie_us.py +++ b/tb/dma_if_pcie_us/test_dma_if_pcie_us.py @@ -183,7 +183,7 @@ async def run_test_write(dut, idle_inserter=None, backpressure_inserter=None): tb.dut.write_enable <= 1 - for length in list(range(1, byte_lanes+3))+list(range(128-4, 128+4))+[1024]: + for length in list(range(0, byte_lanes+3))+list(range(128-4, 128+4))+[1024]: for pcie_offset in list(range(4))+list(range(4096-4, 4096)): for ram_offset in range(1): tb.log.info("length %d, pcie_offset %d, ram_offset %d", length, pcie_offset, ram_offset) @@ -239,7 +239,7 @@ async def run_test_read(dut, idle_inserter=None, backpressure_inserter=None): tb.dut.read_enable <= 1 - for length in list(range(1, byte_lanes+3))+list(range(128-4, 128+4))+[1024]: + for length in list(range(0, byte_lanes+3))+list(range(128-4, 128+4))+[1024]: for pcie_offset in list(range(4))+list(range(4096-4, 4096)): for ram_offset in range(1): tb.log.info("length %d, pcie_offset %d, ram_offset %d", length, pcie_offset, ram_offset) diff --git a/tb/dma_if_pcie_us_rd/test_dma_if_pcie_us_rd.py b/tb/dma_if_pcie_us_rd/test_dma_if_pcie_us_rd.py index 8049f8f00..7da7fd2ac 100644 --- a/tb/dma_if_pcie_us_rd/test_dma_if_pcie_us_rd.py +++ b/tb/dma_if_pcie_us_rd/test_dma_if_pcie_us_rd.py @@ -179,7 +179,7 @@ async def run_test_read(dut, idle_inserter=None, backpressure_inserter=None): tb.dut.enable <= 1 - for length in list(range(1, byte_lanes+3))+list(range(128-4, 128+4))+[1024]: + for length in list(range(0, byte_lanes+3))+list(range(128-4, 128+4))+[1024]: for pcie_offset in pcie_offsets: for ram_offset in range(byte_lanes+1): tb.log.info("length %d, pcie_offset %d, ram_offset %d", length, pcie_offset, ram_offset) diff --git a/tb/dma_if_pcie_us_wr/test_dma_if_pcie_us_wr.py b/tb/dma_if_pcie_us_wr/test_dma_if_pcie_us_wr.py index c7f396784..edcfd07c4 100644 --- a/tb/dma_if_pcie_us_wr/test_dma_if_pcie_us_wr.py +++ b/tb/dma_if_pcie_us_wr/test_dma_if_pcie_us_wr.py @@ -167,7 +167,7 @@ async def run_test_write(dut, idle_inserter=None, backpressure_inserter=None): tb.dut.enable <= 1 - for length in list(range(1, byte_lanes+3))+list(range(128-4, 128+4))+[1024]: + for length in list(range(0, byte_lanes+3))+list(range(128-4, 128+4))+[1024]: for pcie_offset in pcie_offsets: for ram_offset in range(byte_lanes+1): tb.log.info("length %d, pcie_offset %d, ram_offset %d", length, pcie_offset, ram_offset) diff --git a/tb/pcie_us_axi_dma/test_pcie_us_axi_dma.py b/tb/pcie_us_axi_dma/test_pcie_us_axi_dma.py index deab06b67..cb237ff40 100644 --- a/tb/pcie_us_axi_dma/test_pcie_us_axi_dma.py +++ b/tb/pcie_us_axi_dma/test_pcie_us_axi_dma.py @@ -175,7 +175,7 @@ async def run_test_write(dut, idle_inserter=None, backpressure_inserter=None): tb.dut.write_enable <= 1 - for length in list(range(1, byte_lanes+3))+list(range(128-4, 128+4))+[1024]: + for length in list(range(0, byte_lanes+3))+list(range(128-4, 128+4))+[1024]: for pcie_offset in list(range(4))+list(range(4096-4, 4096)): for axi_offset in range(1): tb.log.info("length %d, pcie_offset %d, axi_offset %d", length, pcie_offset, axi_offset) @@ -230,7 +230,7 @@ async def run_test_read(dut, idle_inserter=None, backpressure_inserter=None): tb.dut.read_enable <= 1 - for length in list(range(1, byte_lanes+3))+list(range(128-4, 128+4))+[1024]: + for length in list(range(0, byte_lanes+3))+list(range(128-4, 128+4))+[1024]: for pcie_offset in list(range(4))+list(range(4096-4, 4096)): for axi_offset in range(1): tb.log.info("length %d, pcie_offset %d, axi_offset %d", length, pcie_offset, axi_offset) diff --git a/tb/pcie_us_axi_dma_rd/test_pcie_us_axi_dma_rd.py b/tb/pcie_us_axi_dma_rd/test_pcie_us_axi_dma_rd.py index 264e4f38f..d19c806dd 100644 --- a/tb/pcie_us_axi_dma_rd/test_pcie_us_axi_dma_rd.py +++ b/tb/pcie_us_axi_dma_rd/test_pcie_us_axi_dma_rd.py @@ -170,7 +170,7 @@ async def run_test_read(dut, idle_inserter=None, backpressure_inserter=None): tb.dut.enable <= 1 - for length in list(range(1, byte_lanes+3))+list(range(128-4, 128+4))+[1024]: + for length in list(range(0, byte_lanes+3))+list(range(128-4, 128+4))+[1024]: for pcie_offset in pcie_offsets: for axi_offset in list(range(byte_lanes+1))+list(range(4096-byte_lanes, 4096)): tb.log.info("length %d, pcie_offset %d, axi_offset %d", length, pcie_offset, axi_offset) diff --git a/tb/pcie_us_axi_dma_wr/test_pcie_us_axi_dma_wr.py b/tb/pcie_us_axi_dma_wr/test_pcie_us_axi_dma_wr.py index b8e4e80a5..7f8e5c867 100644 --- a/tb/pcie_us_axi_dma_wr/test_pcie_us_axi_dma_wr.py +++ b/tb/pcie_us_axi_dma_wr/test_pcie_us_axi_dma_wr.py @@ -156,7 +156,7 @@ async def run_test_write(dut, idle_inserter=None, backpressure_inserter=None): tb.dut.enable <= 1 - for length in list(range(1, byte_lanes+3))+list(range(128-4, 128+4))+[1024]: + for length in list(range(0, byte_lanes+3))+list(range(128-4, 128+4))+[1024]: for pcie_offset in pcie_offsets: for axi_offset in list(range(byte_lanes+1))+list(range(4096-byte_lanes, 4096)): tb.log.info("length %d, pcie_offset %d, axi_offset %d", length, pcie_offset, axi_offset)