From 30379cd8a35e8012fb50371a9f56a2b90e79c414 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Thu, 6 Apr 2023 20:43:13 -0700 Subject: [PATCH] Add phase tag to events and completions to avoid queue pointer reads Signed-off-by: Alex Forencich --- docs/source/rb/cqm_event.rst | 6 +- docs/source/rb/cqm_rx.rst | 6 +- docs/source/rb/cqm_tx.rst | 6 +- docs/source/rb/index.rst | 10 +- docs/source/rb/qm_rx.rst | 6 +- docs/source/rb/qm_tx.rst | 6 +- fpga/common/rtl/cpl_write.v | 216 +++++++++--------------------- fpga/common/rtl/mqnic_interface.v | 17 ++- fpga/common/tb/mqnic.py | 74 +++++----- modules/mqnic/mqnic.h | 4 - modules/mqnic/mqnic_cq.c | 15 +-- modules/mqnic/mqnic_eq.c | 30 ++--- modules/mqnic/mqnic_hw.h | 19 ++- modules/mqnic/mqnic_rx.c | 16 ++- modules/mqnic/mqnic_tx.c | 14 +- 15 files changed, 187 insertions(+), 258 deletions(-) diff --git a/docs/source/rb/cqm_event.rst b/docs/source/rb/cqm_event.rst index d298d24c1..db5a4827e 100644 --- a/docs/source/rb/cqm_event.rst +++ b/docs/source/rb/cqm_event.rst @@ -4,7 +4,7 @@ Event queue manager register block ================================== -The event queue manager register block has a header with type 0x0000C010, version 0x00000100, and indicates the location of the event queue manager registers and number of event queues. +The event queue manager register block has a header with type 0x0000C010, version 0x00000200, and indicates the location of the event queue manager registers and number of event queues. .. table:: @@ -13,7 +13,7 @@ The event queue manager register block has a header with type 0x0000C010, versio ======== ============= ====== ====== ====== ====== ============= RBB+0x00 Type Vendor ID Type RO 0x0000C010 -------- ------------- -------------- -------------- ------------- - RBB+0x04 Version Major Minor Patch Meta RO 0x00000100 + RBB+0x04 Version Major Minor Patch Meta RO 0x00000200 -------- ------------- ------ ------ ------ ------ ------------- RBB+0x08 Next pointer Pointer to next register block RO - -------- ------------- ------------------------------ ------------- @@ -82,5 +82,5 @@ Each queue has several associated control registers, detailed in this table: --------- -------------- ------ ------ -------------- ------------- Base+0x10 Head pointer Head pointer RW - --------- -------------- -------------- -------------- ------------- - Base+0x14 Tail pointer Tail pointer RW - + Base+0x18 Tail pointer Tail pointer RW - ========= ============== ============== ============== ============= diff --git a/docs/source/rb/cqm_rx.rst b/docs/source/rb/cqm_rx.rst index 3a2bbdc82..8e63c6f95 100644 --- a/docs/source/rb/cqm_rx.rst +++ b/docs/source/rb/cqm_rx.rst @@ -4,7 +4,7 @@ Receive completion queue manager register block ================================================ -The receive completion queue manager register block has a header with type 0x0000C031, version 0x00000100, and indicates the location of the receive completion queue manager registers and number of completion queues. +The receive completion queue manager register block has a header with type 0x0000C031, version 0x00000200, and indicates the location of the receive completion queue manager registers and number of completion queues. .. table:: @@ -13,7 +13,7 @@ The receive completion queue manager register block has a header with type 0x000 ======== ============= ====== ====== ====== ====== ============= RBB+0x00 Type Vendor ID Type RO 0x0000C031 -------- ------------- -------------- -------------- ------------- - RBB+0x04 Version Major Minor Patch Meta RO 0x00000100 + RBB+0x04 Version Major Minor Patch Meta RO 0x00000200 -------- ------------- ------ ------ ------ ------ ------------- RBB+0x08 Next pointer Pointer to next register block RO - -------- ------------- ------------------------------ ------------- @@ -82,5 +82,5 @@ Each queue has several associated control registers, detailed in this table: --------- -------------- ------ ------ -------------- ------------- Base+0x10 Head pointer Head pointer RW - --------- -------------- -------------- -------------- ------------- - Base+0x14 Tail pointer Tail pointer RW - + Base+0x18 Tail pointer Tail pointer RW - ========= ============== ============== ============== ============= diff --git a/docs/source/rb/cqm_tx.rst b/docs/source/rb/cqm_tx.rst index 611cec69a..32c0bc752 100644 --- a/docs/source/rb/cqm_tx.rst +++ b/docs/source/rb/cqm_tx.rst @@ -4,7 +4,7 @@ Transmit completion queue manager register block ================================================ -The transmit completion queue manager register block has a header with type 0x0000C030, version 0x00000100, and indicates the location of the transmit completion queue manager registers and number of completion queues. +The transmit completion queue manager register block has a header with type 0x0000C030, version 0x00000200, and indicates the location of the transmit completion queue manager registers and number of completion queues. .. table:: @@ -13,7 +13,7 @@ The transmit completion queue manager register block has a header with type 0x00 ======== ============= ====== ====== ====== ====== ============= RBB+0x00 Type Vendor ID Type RO 0x0000C030 -------- ------------- -------------- -------------- ------------- - RBB+0x04 Version Major Minor Patch Meta RO 0x00000100 + RBB+0x04 Version Major Minor Patch Meta RO 0x00000200 -------- ------------- ------ ------ ------ ------ ------------- RBB+0x08 Next pointer Pointer to next register block RO - -------- ------------- ------------------------------ ------------- @@ -82,5 +82,5 @@ Each queue has several associated control registers, detailed in this table: --------- -------------- ------ ------ -------------- ------------- Base+0x10 Head pointer Head pointer RW - --------- -------------- -------------- -------------- ------------- - Base+0x14 Tail pointer Tail pointer RW - + Base+0x18 Tail pointer Tail pointer RW - ========= ============== ============== ============== ============= diff --git a/docs/source/rb/index.rst b/docs/source/rb/index.rst index 78b57d0c3..4acd88d31 100644 --- a/docs/source/rb/index.rst +++ b/docs/source/rb/index.rst @@ -71,11 +71,11 @@ The NIC register space is constructed from a linked list of register blocks. Ea 0x0000C006 0x00000100 stats 0x0000C007 0x00000100 IRQ config 0x0000C008 0x00000100 :ref:`rb_clk_info` - 0x0000C010 0x00000100 :ref:`rb_cqm_event` - 0x0000C020 0x00000100 :ref:`rb_qm_tx` - 0x0000C021 0x00000100 :ref:`rb_qm_rx` - 0x0000C030 0x00000100 :ref:`rb_cqm_tx` - 0x0000C031 0x00000100 :ref:`rb_cqm_rx` + 0x0000C010 0x00000200 :ref:`rb_cqm_event` + 0x0000C020 0x00000200 :ref:`rb_qm_tx` + 0x0000C021 0x00000200 :ref:`rb_qm_rx` + 0x0000C030 0x00000200 :ref:`rb_cqm_tx` + 0x0000C031 0x00000200 :ref:`rb_cqm_rx` 0x0000C040 0x00000100 :ref:`rb_sched_rr` 0x0000C050 0x00000100 :ref:`rb_sched_ctrl_tdma` 0x0000C060 0x00000100 :ref:`rb_tdma_sch` diff --git a/docs/source/rb/qm_rx.rst b/docs/source/rb/qm_rx.rst index d43cbe7ca..db2153b66 100644 --- a/docs/source/rb/qm_rx.rst +++ b/docs/source/rb/qm_rx.rst @@ -4,7 +4,7 @@ Receive queue manager register block ===================================== -The receive queue manager register block has a header with type 0x0000C021, version 0x00000100, and indicates the location of the receive queue manager registers and number of queues. +The receive queue manager register block has a header with type 0x0000C021, version 0x00000200, and indicates the location of the receive queue manager registers and number of queues. .. table:: @@ -13,7 +13,7 @@ The receive queue manager register block has a header with type 0x0000C021, vers ======== ============= ====== ====== ====== ====== ============= RBB+0x00 Type Vendor ID Type RO 0x0000C021 -------- ------------- -------------- -------------- ------------- - RBB+0x04 Version Major Minor Patch Meta RO 0x00000100 + RBB+0x04 Version Major Minor Patch Meta RO 0x00000200 -------- ------------- ------ ------ ------ ------ ------------- RBB+0x08 Next pointer Pointer to next register block RO - -------- ------------- ------------------------------ ------------- @@ -82,5 +82,5 @@ Each queue has several associated control registers, detailed in this table: --------- -------------- -------------- -------------- ------------- Base+0x10 Head pointer Head pointer RW - --------- -------------- -------------- -------------- ------------- - Base+0x14 Tail pointer Tail pointer RW - + Base+0x18 Tail pointer Tail pointer RW - ========= ============== ============== ============== ============= diff --git a/docs/source/rb/qm_tx.rst b/docs/source/rb/qm_tx.rst index 9c7e87316..08c8b964e 100644 --- a/docs/source/rb/qm_tx.rst +++ b/docs/source/rb/qm_tx.rst @@ -4,7 +4,7 @@ Transmit queue manager register block ===================================== -The transmit queue manager register block has a header with type 0x0000C020, version 0x00000100, and indicates the location of the transmit queue manager registers and number of queues. +The transmit queue manager register block has a header with type 0x0000C020, version 0x00000200, and indicates the location of the transmit queue manager registers and number of queues. .. table:: @@ -13,7 +13,7 @@ The transmit queue manager register block has a header with type 0x0000C020, ver ======== ============= ====== ====== ====== ====== ============= RBB+0x00 Type Vendor ID Type RO 0x0000C020 -------- ------------- -------------- -------------- ------------- - RBB+0x04 Version Major Minor Patch Meta RO 0x00000100 + RBB+0x04 Version Major Minor Patch Meta RO 0x00000200 -------- ------------- ------ ------ ------ ------ ------------- RBB+0x08 Next pointer Pointer to next register block RO - -------- ------------- ------------------------------ ------------- @@ -82,5 +82,5 @@ Each queue has several associated control registers, detailed in this table: --------- -------------- -------------- -------------- ------------- Base+0x10 Head pointer Head pointer RW - --------- -------------- -------------- -------------- ------------- - Base+0x14 Tail pointer Tail pointer RW - + Base+0x18 Tail pointer Tail pointer RW - ========= ============== ============== ============== ============= diff --git a/fpga/common/rtl/cpl_write.v b/fpga/common/rtl/cpl_write.v index 9591a34df..75c33d9e0 100644 --- a/fpga/common/rtl/cpl_write.v +++ b/fpga/common/rtl/cpl_write.v @@ -110,6 +110,7 @@ module cpl_write # /* * Completion enqueue response input */ + input wire [PORTS-1:0] s_axis_cpl_enqueue_resp_phase, input wire [PORTS*DMA_ADDR_WIDTH-1:0] s_axis_cpl_enqueue_resp_addr, input wire [PORTS*QUEUE_REQ_TAG_WIDTH-1:0] s_axis_cpl_enqueue_resp_tag, input wire [PORTS*QUEUE_OP_TAG_WIDTH-1:0] s_axis_cpl_enqueue_resp_op_tag, @@ -174,6 +175,16 @@ initial begin $error("Error: Queue request tag width insufficient for descriptor table size (instance %m)"); $finish; end + + if (SEG_ADDR_WIDTH < CL_DESC_TABLE_SIZE) begin + $error("Error: SEG_ADDR_WIDTH not sufficient for requested size (min %d for table size %d) (instance %m)", CL_DESC_TABLE_SIZE, DESC_TABLE_SIZE); + $finish; + end + + if (SEG_COUNT*SEG_DATA_WIDTH < CPL_SIZE*8) begin + $error("Error: DMA RAM width insufficient for completion size (instance %m)"); + $finish; + end end reg s_axis_req_ready_reg = 1'b0, s_axis_req_ready_next; @@ -206,15 +217,21 @@ reg [CL_PORTS-1:0] desc_table_sel[DESC_TABLE_SIZE-1:0]; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg [REQ_TAG_WIDTH-1:0] desc_table_tag[DESC_TABLE_SIZE-1:0]; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +reg [CPL_SIZE*8-1:0] desc_table_data[DESC_TABLE_SIZE-1:0]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +reg desc_table_phase[DESC_TABLE_SIZE-1:0]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg [QUEUE_OP_TAG_WIDTH-1:0] desc_table_queue_op_tag[DESC_TABLE_SIZE-1:0]; reg [CL_DESC_TABLE_SIZE+1-1:0] desc_table_start_ptr_reg = 0; reg [CL_PORTS-1:0] desc_table_start_sel; reg [REQ_TAG_WIDTH-1:0] desc_table_start_tag; +reg [CPL_SIZE*8-1:0] desc_table_start_data; reg [QUEUE_INDEX_WIDTH-1:0] desc_table_start_cpl_queue; reg [QUEUE_OP_TAG_WIDTH-1:0] desc_table_start_queue_op_tag; reg desc_table_start_en; reg [CL_DESC_TABLE_SIZE-1:0] desc_table_enqueue_ptr; +reg desc_table_enqueue_phase; reg [QUEUE_OP_TAG_WIDTH-1:0] desc_table_enqueue_queue_op_tag; reg desc_table_enqueue_invalid; reg desc_table_enqueue_en; @@ -223,20 +240,6 @@ reg desc_table_cpl_write_done_en; reg [CL_DESC_TABLE_SIZE+1-1:0] desc_table_finish_ptr_reg = 0; reg desc_table_finish_en; -reg [RAM_ADDR_WIDTH-1:0] dma_write_desc_ram_addr_reg = {RAM_ADDR_WIDTH{1'b0}}, dma_write_desc_ram_addr_next; -reg [7:0] dma_write_desc_len_reg = 8'd0, dma_write_desc_len_next; -reg [CL_DESC_TABLE_SIZE-1:0] dma_write_desc_tag_reg = {CL_DESC_TABLE_SIZE{1'b0}}, dma_write_desc_tag_next; -reg dma_write_desc_user_reg = 1'b0, dma_write_desc_user_next; -reg dma_write_desc_valid_reg = 1'b0, dma_write_desc_valid_next; -wire dma_write_desc_ready; - -wire [CL_DESC_TABLE_SIZE-1:0] dma_write_desc_status_tag; -wire dma_write_desc_status_valid; - -reg [CPL_SIZE*8-1:0] cpl_data_reg = 0, cpl_data_next; -reg cpl_data_valid_reg = 1'b0, cpl_data_valid_next; -wire cpl_data_ready; - assign s_axis_req_ready = s_axis_req_ready_reg; assign m_axis_req_status_tag = m_axis_req_status_tag_reg; @@ -273,115 +276,53 @@ op_table_start_enc_inst ( .output_unencoded() ); -wire [SEG_COUNT*SEG_BE_WIDTH-1:0] dma_ram_wr_cmd_be_int; -wire [SEG_COUNT*SEG_ADDR_WIDTH-1:0] dma_ram_wr_cmd_addr_int; -wire [SEG_COUNT*SEG_DATA_WIDTH-1:0] dma_ram_wr_cmd_data_int; -wire [SEG_COUNT-1:0] dma_ram_wr_cmd_valid_int; -wire [SEG_COUNT-1:0] dma_ram_wr_cmd_ready_int; -wire [SEG_COUNT-1:0] dma_ram_wr_done_int; +generate -dma_psdpram #( - .SIZE(DESC_TABLE_SIZE*SEG_COUNT*SEG_BE_WIDTH), - .SEG_COUNT(SEG_COUNT), - .SEG_DATA_WIDTH(SEG_DATA_WIDTH), - .SEG_ADDR_WIDTH(SEG_ADDR_WIDTH), - .SEG_BE_WIDTH(SEG_BE_WIDTH), - .PIPELINE(RAM_PIPELINE) -) -dma_psdpram_inst ( - .clk(clk), - .rst(rst), + genvar n; - /* - * Write port - */ - .wr_cmd_be(dma_ram_wr_cmd_be_int), - .wr_cmd_addr(dma_ram_wr_cmd_addr_int), - .wr_cmd_data(dma_ram_wr_cmd_data_int), - .wr_cmd_valid(dma_ram_wr_cmd_valid_int), - .wr_cmd_ready(dma_ram_wr_cmd_ready_int), - .wr_done(dma_ram_wr_done_int), + for (n = 0; n < SEG_COUNT; n = n + 1) begin - /* - * Read port - */ - .rd_cmd_addr(dma_ram_rd_cmd_addr), - .rd_cmd_valid(dma_ram_rd_cmd_valid), - .rd_cmd_ready(dma_ram_rd_cmd_ready), - .rd_resp_data(dma_ram_rd_resp_data), - .rd_resp_valid(dma_ram_rd_resp_valid), - .rd_resp_ready(dma_ram_rd_resp_ready) -); + reg [RAM_PIPELINE-1:0] dma_ram_rd_resp_valid_pipe_reg = 0; + reg [SEG_DATA_WIDTH-1:0] dma_ram_rd_resp_data_pipe_reg[RAM_PIPELINE-1:0]; -dma_client_axis_sink #( - .SEG_COUNT(SEG_COUNT), - .SEG_DATA_WIDTH(SEG_DATA_WIDTH), - .SEG_ADDR_WIDTH(SEG_ADDR_WIDTH), - .SEG_BE_WIDTH(SEG_BE_WIDTH), - .RAM_ADDR_WIDTH(RAM_ADDR_WIDTH), - .AXIS_DATA_WIDTH(CPL_SIZE*8), - .AXIS_KEEP_ENABLE(CPL_SIZE > 1), - .AXIS_KEEP_WIDTH(CPL_SIZE), - .AXIS_LAST_ENABLE(1), - .AXIS_ID_ENABLE(0), - .AXIS_DEST_ENABLE(0), - .AXIS_USER_ENABLE(1), - .AXIS_USER_WIDTH(1), - .LEN_WIDTH(8), - .TAG_WIDTH(CL_DESC_TABLE_SIZE) -) -dma_client_axis_sink_inst ( - .clk(clk), - .rst(rst), + integer i, j; - /* - * DMA write descriptor input - */ - .s_axis_write_desc_ram_addr(dma_write_desc_ram_addr_reg), - .s_axis_write_desc_len(dma_write_desc_len_reg), - .s_axis_write_desc_tag(dma_write_desc_tag_reg), - .s_axis_write_desc_valid(dma_write_desc_valid_reg), - .s_axis_write_desc_ready(dma_write_desc_ready), + initial begin + for (i = 0; i < RAM_PIPELINE; i = i + 1) begin + dma_ram_rd_resp_data_pipe_reg[i] = 0; + end + end - /* - * DMA write descriptor status output - */ - .m_axis_write_desc_status_len(), - .m_axis_write_desc_status_tag(dma_write_desc_status_tag), - .m_axis_write_desc_status_id(), - .m_axis_write_desc_status_dest(), - .m_axis_write_desc_status_user(), - .m_axis_write_desc_status_error(), - .m_axis_write_desc_status_valid(dma_write_desc_status_valid), + always @(posedge clk) begin + if (dma_ram_rd_resp_ready[n]) begin + dma_ram_rd_resp_valid_pipe_reg[RAM_PIPELINE-1] <= 1'b0; + end - /* - * AXI stream write data input - */ - .s_axis_write_data_tdata(cpl_data_reg), - .s_axis_write_data_tkeep({CPL_SIZE{1'b1}}), - .s_axis_write_data_tvalid(cpl_data_valid_reg), - .s_axis_write_data_tready(cpl_data_ready), - .s_axis_write_data_tlast(1'b1), - .s_axis_write_data_tid(0), - .s_axis_write_data_tdest(0), - .s_axis_write_data_tuser(1'b0), + for (j = RAM_PIPELINE-1; j > 0; j = j - 1) begin + if (dma_ram_rd_resp_ready[n] || ((~dma_ram_rd_resp_valid_pipe_reg) >> j)) begin + dma_ram_rd_resp_valid_pipe_reg[j] <= dma_ram_rd_resp_valid_pipe_reg[j-1]; + dma_ram_rd_resp_data_pipe_reg[j] <= dma_ram_rd_resp_data_pipe_reg[j-1]; + dma_ram_rd_resp_valid_pipe_reg[j-1] <= 1'b0; + end + end - /* - * RAM interface - */ - .ram_wr_cmd_be(dma_ram_wr_cmd_be_int), - .ram_wr_cmd_addr(dma_ram_wr_cmd_addr_int), - .ram_wr_cmd_data(dma_ram_wr_cmd_data_int), - .ram_wr_cmd_valid(dma_ram_wr_cmd_valid_int), - .ram_wr_cmd_ready(dma_ram_wr_cmd_ready_int), - .ram_wr_done(dma_ram_wr_done_int), + if (dma_ram_rd_cmd_valid[n] && dma_ram_rd_cmd_ready[n]) begin + dma_ram_rd_resp_valid_pipe_reg[0] <= 1'b1; + dma_ram_rd_resp_data_pipe_reg[0] <= {desc_table_phase[dma_ram_rd_cmd_addr[SEG_ADDR_WIDTH*n +: CL_DESC_TABLE_SIZE]], desc_table_data[dma_ram_rd_cmd_addr[SEG_ADDR_WIDTH*n +: CL_DESC_TABLE_SIZE]][CPL_SIZE*8-1-1:0]} >> SEG_DATA_WIDTH*n; + end - /* - * Configuration - */ - .enable(1'b1), - .abort(1'b0) -); + if (rst) begin + dma_ram_rd_resp_valid_pipe_reg <= 0; + end + end + + assign dma_ram_rd_cmd_ready[n] = dma_ram_rd_resp_ready[n] || ~dma_ram_rd_resp_valid_pipe_reg; + + assign dma_ram_rd_resp_valid[n] = dma_ram_rd_resp_valid_pipe_reg[RAM_PIPELINE-1]; + assign dma_ram_rd_resp_data[SEG_DATA_WIDTH*n +: SEG_DATA_WIDTH] = dma_ram_rd_resp_data_pipe_reg[RAM_PIPELINE-1]; + end + +endgenerate always @* begin s_axis_req_ready_next = 1'b0; @@ -406,19 +347,12 @@ always @* begin m_axis_dma_write_desc_tag_next = m_axis_dma_write_desc_tag_reg; m_axis_dma_write_desc_valid_next = m_axis_dma_write_desc_valid_reg && !m_axis_dma_write_desc_ready; - dma_write_desc_ram_addr_next = dma_write_desc_ram_addr_reg; - dma_write_desc_len_next = dma_write_desc_len_reg; - dma_write_desc_tag_next = dma_write_desc_tag_reg; - dma_write_desc_user_next = dma_write_desc_user_reg; - dma_write_desc_valid_next = dma_write_desc_valid_reg && !dma_write_desc_ready; - - cpl_data_next = cpl_data_reg; - cpl_data_valid_next = cpl_data_valid_reg && !cpl_data_ready; - desc_table_start_sel = s_axis_req_sel; desc_table_start_tag = s_axis_req_tag; + desc_table_start_data = s_axis_req_data; desc_table_start_en = 1'b0; desc_table_enqueue_ptr = s_axis_cpl_enqueue_resp_tag[enqueue_resp_enc*QUEUE_REQ_TAG_WIDTH +: QUEUE_REQ_TAG_WIDTH] & DESC_PTR_MASK; + desc_table_enqueue_phase = s_axis_cpl_enqueue_resp_phase[enqueue_resp_enc]; desc_table_enqueue_queue_op_tag = s_axis_cpl_enqueue_resp_op_tag[enqueue_resp_enc*QUEUE_OP_TAG_WIDTH +: QUEUE_OP_TAG_WIDTH]; desc_table_enqueue_invalid = 1'b0; desc_table_enqueue_en = 1'b0; @@ -428,35 +362,20 @@ always @* begin // queue query // wait for descriptor request - s_axis_req_ready_next = enable && !desc_table_active[desc_table_start_ptr_reg & DESC_PTR_MASK] && ($unsigned(desc_table_start_ptr_reg - desc_table_finish_ptr_reg) < DESC_TABLE_SIZE) && (!m_axis_cpl_enqueue_req_valid || (m_axis_cpl_enqueue_req_valid & m_axis_cpl_enqueue_req_ready)) && (!dma_write_desc_valid_reg) && (!cpl_data_valid_reg); + s_axis_req_ready_next = enable && !desc_table_active[desc_table_start_ptr_reg & DESC_PTR_MASK] && ($unsigned(desc_table_start_ptr_reg - desc_table_finish_ptr_reg) < DESC_TABLE_SIZE) && (!m_axis_cpl_enqueue_req_valid || (m_axis_cpl_enqueue_req_valid & m_axis_cpl_enqueue_req_ready)); if (s_axis_req_ready && s_axis_req_valid) begin s_axis_req_ready_next = 1'b0; // store in descriptor table desc_table_start_sel = s_axis_req_sel; desc_table_start_tag = s_axis_req_tag; + desc_table_start_data = s_axis_req_data; desc_table_start_en = 1'b1; // initiate queue query m_axis_cpl_enqueue_req_queue_next = s_axis_req_queue; m_axis_cpl_enqueue_req_tag_next = desc_table_start_ptr_reg & DESC_PTR_MASK; m_axis_cpl_enqueue_req_valid_next = 1 << s_axis_req_sel; - - // initiate completion write to DMA RAM - cpl_data_next = s_axis_req_data; - cpl_data_valid_next = 1'b1; - - dma_write_desc_ram_addr_next = (desc_table_start_ptr_reg & DESC_PTR_MASK) << 5; - dma_write_desc_len_next = CPL_SIZE; - dma_write_desc_tag_next = (desc_table_start_ptr_reg & DESC_PTR_MASK); - dma_write_desc_valid_next = 1'b1; - end - - // finish completion write to DMA RAM - if (dma_write_desc_status_valid) begin - // update entry in descriptor table - // desc_table_cpl_write_done_ptr = s_axis_dma_write_desc_status_tag & DESC_PTR_MASK; - // desc_table_cpl_write_done_en = 1'b1; end // start completion write @@ -466,6 +385,7 @@ always @* begin // update entry in descriptor table desc_table_enqueue_ptr = s_axis_cpl_enqueue_resp_tag[enqueue_resp_enc*QUEUE_REQ_TAG_WIDTH +: QUEUE_REQ_TAG_WIDTH] & DESC_PTR_MASK; + desc_table_enqueue_phase = s_axis_cpl_enqueue_resp_phase[enqueue_resp_enc]; desc_table_enqueue_queue_op_tag = s_axis_cpl_enqueue_resp_op_tag[enqueue_resp_enc*QUEUE_OP_TAG_WIDTH +: QUEUE_OP_TAG_WIDTH]; desc_table_enqueue_invalid = 1'b0; desc_table_enqueue_en = 1'b1; @@ -478,7 +398,7 @@ always @* begin // initiate completion write m_axis_dma_write_desc_dma_addr_next = s_axis_cpl_enqueue_resp_addr[enqueue_resp_enc*DMA_ADDR_WIDTH +: DMA_ADDR_WIDTH]; - m_axis_dma_write_desc_ram_addr_next = (s_axis_cpl_enqueue_resp_tag[enqueue_resp_enc*QUEUE_REQ_TAG_WIDTH +: QUEUE_REQ_TAG_WIDTH] & DESC_PTR_MASK) << 5; + m_axis_dma_write_desc_ram_addr_next = (s_axis_cpl_enqueue_resp_tag[enqueue_resp_enc*QUEUE_REQ_TAG_WIDTH +: QUEUE_REQ_TAG_WIDTH] & DESC_PTR_MASK) << $clog2(SEG_COUNT*SEG_BE_WIDTH); m_axis_dma_write_desc_len_next = CPL_SIZE; m_axis_dma_write_desc_tag_next = (s_axis_cpl_enqueue_resp_tag[enqueue_resp_enc*QUEUE_REQ_TAG_WIDTH +: QUEUE_REQ_TAG_WIDTH] & DESC_PTR_MASK); @@ -542,25 +462,18 @@ always @(posedge clk) begin m_axis_dma_write_desc_tag_reg <= m_axis_dma_write_desc_tag_next; m_axis_dma_write_desc_valid_reg <= m_axis_dma_write_desc_valid_next; - dma_write_desc_ram_addr_reg <= dma_write_desc_ram_addr_next; - dma_write_desc_len_reg <= dma_write_desc_len_next; - dma_write_desc_tag_reg <= dma_write_desc_tag_next; - dma_write_desc_user_reg <= dma_write_desc_user_next; - dma_write_desc_valid_reg <= dma_write_desc_valid_next; - - cpl_data_reg <= cpl_data_next; - cpl_data_valid_reg <= cpl_data_valid_next; - if (desc_table_start_en) begin desc_table_active[desc_table_start_ptr_reg & DESC_PTR_MASK] <= 1'b1; desc_table_invalid[desc_table_start_ptr_reg & DESC_PTR_MASK] <= 1'b0; desc_table_cpl_write_done[desc_table_start_ptr_reg & DESC_PTR_MASK] <= 1'b0; desc_table_sel[desc_table_start_ptr_reg & DESC_PTR_MASK] <= desc_table_start_sel; desc_table_tag[desc_table_start_ptr_reg & DESC_PTR_MASK] <= desc_table_start_tag; + desc_table_data[desc_table_start_ptr_reg & DESC_PTR_MASK] <= desc_table_start_data; desc_table_start_ptr_reg <= desc_table_start_ptr_reg + 1; end if (desc_table_enqueue_en) begin + desc_table_phase[desc_table_enqueue_ptr & DESC_PTR_MASK] <= desc_table_enqueue_phase; desc_table_queue_op_tag[desc_table_enqueue_ptr & DESC_PTR_MASK] <= desc_table_enqueue_queue_op_tag; desc_table_invalid[desc_table_enqueue_ptr & DESC_PTR_MASK] <= desc_table_enqueue_invalid; end @@ -582,9 +495,6 @@ always @(posedge clk) begin m_axis_cpl_enqueue_commit_valid_reg <= 1'b0; m_axis_dma_write_desc_valid_reg <= 1'b0; - dma_write_desc_valid_reg <= 1'b0; - cpl_data_valid_reg <= 1'b0; - desc_table_active <= 0; desc_table_invalid <= 0; diff --git a/fpga/common/rtl/mqnic_interface.v b/fpga/common/rtl/mqnic_interface.v index 3550e8a1c..7441e7b83 100644 --- a/fpga/common/rtl/mqnic_interface.v +++ b/fpga/common/rtl/mqnic_interface.v @@ -711,6 +711,7 @@ wire [CPL_QUEUE_REQ_TAG_WIDTH-1:0] event_enqueue_req_tag; wire event_enqueue_req_valid; wire event_enqueue_req_ready; +wire event_enqueue_resp_phase; wire [DMA_ADDR_WIDTH-1:0] event_enqueue_resp_addr; wire [CPL_QUEUE_REQ_TAG_WIDTH-1:0] event_enqueue_resp_tag; wire [QUEUE_OP_TAG_WIDTH-1:0] event_enqueue_resp_op_tag; @@ -752,6 +753,7 @@ wire [CPL_QUEUE_REQ_TAG_WIDTH-1:0] tx_cpl_enqueue_req_tag; wire tx_cpl_enqueue_req_valid; wire tx_cpl_enqueue_req_ready; +wire tx_cpl_enqueue_resp_phase; wire [DMA_ADDR_WIDTH-1:0] tx_cpl_enqueue_resp_addr; wire [CPL_QUEUE_REQ_TAG_WIDTH-1:0] tx_cpl_enqueue_resp_tag; wire [QUEUE_OP_TAG_WIDTH-1:0] tx_cpl_enqueue_resp_op_tag; @@ -790,6 +792,7 @@ wire [CPL_QUEUE_REQ_TAG_WIDTH-1:0] rx_cpl_enqueue_req_tag; wire rx_cpl_enqueue_req_valid; wire rx_cpl_enqueue_req_ready; +wire rx_cpl_enqueue_resp_phase; wire [DMA_ADDR_WIDTH-1:0] rx_cpl_enqueue_resp_addr; wire [CPL_QUEUE_REQ_TAG_WIDTH-1:0] rx_cpl_enqueue_resp_tag; wire [QUEUE_OP_TAG_WIDTH-1:0] rx_cpl_enqueue_resp_op_tag; @@ -1159,35 +1162,35 @@ always @(posedge clk) begin RBB+8'h2C: ctrl_reg_rd_data_reg <= rx_mtu_reg; // IF ctrl: RX MTU // Queue manager (Event) RBB+8'h40: ctrl_reg_rd_data_reg <= 32'h0000C010; // Event QM: Type - RBB+8'h44: ctrl_reg_rd_data_reg <= 32'h00000100; // Event QM: Version + RBB+8'h44: ctrl_reg_rd_data_reg <= 32'h00000200; // Event QM: Version RBB+8'h48: ctrl_reg_rd_data_reg <= RB_BASE_ADDR+8'h60; // Event QM: Next header RBB+8'h4C: ctrl_reg_rd_data_reg <= AXIL_EQM_BASE_ADDR; // Event QM: Offset RBB+8'h50: ctrl_reg_rd_data_reg <= 2**EVENT_QUEUE_INDEX_WIDTH; // Event QM: Count RBB+8'h54: ctrl_reg_rd_data_reg <= 32; // Event QM: Stride // Queue manager (TX) RBB+8'h60: ctrl_reg_rd_data_reg <= 32'h0000C020; // TX QM: Type - RBB+8'h64: ctrl_reg_rd_data_reg <= 32'h00000100; // TX QM: Version + RBB+8'h64: ctrl_reg_rd_data_reg <= 32'h00000200; // TX QM: Version RBB+8'h68: ctrl_reg_rd_data_reg <= RB_BASE_ADDR+8'h80; // TX QM: Next header RBB+8'h6C: ctrl_reg_rd_data_reg <= AXIL_TX_QM_BASE_ADDR; // TX QM: Offset RBB+8'h70: ctrl_reg_rd_data_reg <= 2**TX_QUEUE_INDEX_WIDTH; // TX QM: Count RBB+8'h74: ctrl_reg_rd_data_reg <= 32; // TX QM: Stride // Queue manager (TX CPL) RBB+8'h80: ctrl_reg_rd_data_reg <= 32'h0000C030; // TX CPL QM: Type - RBB+8'h84: ctrl_reg_rd_data_reg <= 32'h00000100; // TX CPL QM: Version + RBB+8'h84: ctrl_reg_rd_data_reg <= 32'h00000200; // TX CPL QM: Version RBB+8'h88: ctrl_reg_rd_data_reg <= RB_BASE_ADDR+8'hA0; // TX CPL QM: Next header RBB+8'h8C: ctrl_reg_rd_data_reg <= AXIL_TX_CQM_BASE_ADDR; // TX CPL QM: Offset RBB+8'h90: ctrl_reg_rd_data_reg <= 2**TX_CPL_QUEUE_INDEX_WIDTH; // TX CPL QM: Count RBB+8'h94: ctrl_reg_rd_data_reg <= 32; // TX CPL QM: Stride // Queue manager (RX) RBB+8'hA0: ctrl_reg_rd_data_reg <= 32'h0000C021; // RX QM: Type - RBB+8'hA4: ctrl_reg_rd_data_reg <= 32'h00000100; // RX QM: Version + RBB+8'hA4: ctrl_reg_rd_data_reg <= 32'h00000200; // RX QM: Version RBB+8'hA8: ctrl_reg_rd_data_reg <= RB_BASE_ADDR+8'hC0; // RX QM: Next header RBB+8'hAC: ctrl_reg_rd_data_reg <= AXIL_RX_QM_BASE_ADDR; // RX QM: Offset RBB+8'hB0: ctrl_reg_rd_data_reg <= 2**RX_QUEUE_INDEX_WIDTH; // RX QM: Count RBB+8'hB4: ctrl_reg_rd_data_reg <= 32; // RX QM: Stride // Queue manager (RX CPL) RBB+8'hC0: ctrl_reg_rd_data_reg <= 32'h0000C031; // RX CPL QM: Type - RBB+8'hC4: ctrl_reg_rd_data_reg <= 32'h00000100; // RX CPL QM: Version + RBB+8'hC4: ctrl_reg_rd_data_reg <= 32'h00000200; // RX CPL QM: Version RBB+8'hC8: ctrl_reg_rd_data_reg <= RX_RB_BASE_ADDR; // RX CPL QM: Next header RBB+8'hCC: ctrl_reg_rd_data_reg <= AXIL_RX_CQM_BASE_ADDR; // RX CPL QM: Offset RBB+8'hD0: ctrl_reg_rd_data_reg <= 2**RX_CPL_QUEUE_INDEX_WIDTH; // RX CPL QM: Count @@ -1296,6 +1299,7 @@ event_queue_manager_inst ( */ .m_axis_enqueue_resp_queue(), .m_axis_enqueue_resp_ptr(), + .m_axis_enqueue_resp_phase(event_enqueue_resp_phase), .m_axis_enqueue_resp_addr(event_enqueue_resp_addr), .m_axis_enqueue_resp_event(), .m_axis_enqueue_resp_tag(event_enqueue_resp_tag), @@ -1466,6 +1470,7 @@ tx_cpl_queue_manager_inst ( */ .m_axis_enqueue_resp_queue(), .m_axis_enqueue_resp_ptr(), + .m_axis_enqueue_resp_phase(tx_cpl_enqueue_resp_phase), .m_axis_enqueue_resp_addr(tx_cpl_enqueue_resp_addr), .m_axis_enqueue_resp_event(), .m_axis_enqueue_resp_tag(tx_cpl_enqueue_resp_tag), @@ -1636,6 +1641,7 @@ rx_cpl_queue_manager_inst ( */ .m_axis_enqueue_resp_queue(), .m_axis_enqueue_resp_ptr(), + .m_axis_enqueue_resp_phase(rx_cpl_enqueue_resp_phase), .m_axis_enqueue_resp_addr(rx_cpl_enqueue_resp_addr), .m_axis_enqueue_resp_event(), .m_axis_enqueue_resp_tag(rx_cpl_enqueue_resp_tag), @@ -1996,6 +2002,7 @@ cpl_write_inst ( /* * Completion enqueue response input */ + .s_axis_cpl_enqueue_resp_phase({event_enqueue_resp_phase, rx_cpl_enqueue_resp_phase, tx_cpl_enqueue_resp_phase}), .s_axis_cpl_enqueue_resp_addr({event_enqueue_resp_addr, rx_cpl_enqueue_resp_addr, tx_cpl_enqueue_resp_addr}), .s_axis_cpl_enqueue_resp_tag({event_enqueue_resp_tag, rx_cpl_enqueue_resp_tag, tx_cpl_enqueue_resp_tag}), .s_axis_cpl_enqueue_resp_op_tag({event_enqueue_resp_op_tag, rx_cpl_enqueue_resp_op_tag, tx_cpl_enqueue_resp_op_tag}), diff --git a/fpga/common/tb/mqnic.py b/fpga/common/tb/mqnic.py index 0f8be1b57..5eb57fd4c 100644 --- a/fpga/common/tb/mqnic.py +++ b/fpga/common/tb/mqnic.py @@ -189,31 +189,31 @@ MQNIC_RB_RX_QUEUE_MAP_CH_REG_RSS_MASK = 0x04 MQNIC_RB_RX_QUEUE_MAP_CH_REG_APP_MASK = 0x08 MQNIC_RB_EVENT_QM_TYPE = 0x0000C010 -MQNIC_RB_EVENT_QM_VER = 0x00000100 +MQNIC_RB_EVENT_QM_VER = 0x00000200 MQNIC_RB_EVENT_QM_REG_OFFSET = 0x0C MQNIC_RB_EVENT_QM_REG_COUNT = 0x10 MQNIC_RB_EVENT_QM_REG_STRIDE = 0x14 MQNIC_RB_TX_QM_TYPE = 0x0000C020 -MQNIC_RB_TX_QM_VER = 0x00000100 +MQNIC_RB_TX_QM_VER = 0x00000200 MQNIC_RB_TX_QM_REG_OFFSET = 0x0C MQNIC_RB_TX_QM_REG_COUNT = 0x10 MQNIC_RB_TX_QM_REG_STRIDE = 0x14 MQNIC_RB_TX_CQM_TYPE = 0x0000C030 -MQNIC_RB_TX_CQM_VER = 0x00000100 +MQNIC_RB_TX_CQM_VER = 0x00000200 MQNIC_RB_TX_CQM_REG_OFFSET = 0x0C MQNIC_RB_TX_CQM_REG_COUNT = 0x10 MQNIC_RB_TX_CQM_REG_STRIDE = 0x14 MQNIC_RB_RX_QM_TYPE = 0x0000C021 -MQNIC_RB_RX_QM_VER = 0x00000100 +MQNIC_RB_RX_QM_VER = 0x00000200 MQNIC_RB_RX_QM_REG_OFFSET = 0x0C MQNIC_RB_RX_QM_REG_COUNT = 0x10 MQNIC_RB_RX_QM_REG_STRIDE = 0x14 MQNIC_RB_RX_CQM_TYPE = 0x0000C031 -MQNIC_RB_RX_CQM_VER = 0x00000100 +MQNIC_RB_RX_CQM_VER = 0x00000200 MQNIC_RB_RX_CQM_REG_OFFSET = 0x0C MQNIC_RB_RX_CQM_REG_COUNT = 0x10 MQNIC_RB_RX_CQM_REG_STRIDE = 0x14 @@ -454,6 +454,11 @@ class EqRing: self.irq = irq + self.head_ptr = 0 + self.tail_ptr = 0 + + self.buf[0:self.buf_size] = b'\x00'*self.buf_size + await self.hw_regs.write_dword(MQNIC_EVENT_QUEUE_ACTIVE_LOG_SIZE_REG, 0) # active, log size await self.hw_regs.write_dword(MQNIC_EVENT_QUEUE_BASE_ADDR_REG, self.buf_dma & 0xffffffff) # base address await self.hw_regs.write_dword(MQNIC_EVENT_QUEUE_BASE_ADDR_REG+4, self.buf_dma >> 32) # base address @@ -472,12 +477,6 @@ class EqRing: self.active = False - def empty(self): - return self.head_ptr == self.tail_ptr - - def full(self): - return self.head_ptr - self.tail_ptr >= self.size - async def read_head_ptr(self): val = await self.hw_regs.read_dword(MQNIC_EVENT_QUEUE_HEAD_PTR_REG) self.head_ptr += (val - self.head_ptr) & self.hw_ptr_mask @@ -497,17 +496,17 @@ class EqRing: self.log.info("Process event queue") - await self.read_head_ptr() - eq_tail_ptr = self.tail_ptr eq_index = eq_tail_ptr & self.size_mask - self.log.info("%d events in queue", self.head_ptr - eq_tail_ptr) + while True: + event_data = struct.unpack_from("> 32) # base address @@ -619,12 +623,6 @@ class CqRing: self.active = False - def empty(self): - return self.head_ptr == self.tail_ptr - - def full(self): - return self.head_ptr - self.tail_ptr >= self.size - async def read_head_ptr(self): val = await self.hw_regs.read_dword(MQNIC_CPL_QUEUE_HEAD_PTR_REG) self.head_ptr += (val - self.head_ptr) & self.hw_ptr_mask @@ -725,6 +723,9 @@ class TxRing: self.cq.src_ring = self self.cq.handler = TxRing.process_tx_cq + self.head_ptr = 0 + self.tail_ptr = 0 + await self.hw_regs.write_dword(MQNIC_QUEUE_ACTIVE_LOG_SIZE_REG, 0) # active, log size await self.hw_regs.write_dword(MQNIC_QUEUE_BASE_ADDR_REG, self.buf_dma & 0xffffffff) # base address await self.hw_regs.write_dword(MQNIC_QUEUE_BASE_ADDR_REG+4, self.buf_dma >> 32) # base address @@ -782,16 +783,18 @@ class TxRing: return # process completion queue - await cq.read_head_ptr() - cq_tail_ptr = cq.tail_ptr cq_index = cq_tail_ptr & cq.size_mask - while (cq.head_ptr != cq_tail_ptr): - cpl_data = struct.unpack_from("> 32) # base address @@ -988,16 +994,18 @@ class RxRing: return # process completion queue - await cq.read_head_ptr() - cq_tail_ptr = cq.tail_ptr cq_index = cq_tail_ptr & cq.size_mask - while (cq.head_ptr != cq_tail_ptr): - cpl_data = struct.unpack_from("eq_ring = eq_ring; ring->eq_index = eq_ring->index; + ring->head_ptr = 0; + ring->tail_ptr = 0; + + memset(ring->buf, 1, ring->buf_size); + // deactivate queue iowrite32(0, ring->hw_addr + MQNIC_CPL_QUEUE_ACTIVE_LOG_SIZE_REG); // set base address @@ -161,16 +166,6 @@ void mqnic_deactivate_cq_ring(struct mqnic_cq_ring *ring) ring->active = 0; } -bool mqnic_is_cq_ring_empty(const struct mqnic_cq_ring *ring) -{ - return ring->head_ptr == ring->tail_ptr; -} - -bool mqnic_is_cq_ring_full(const struct mqnic_cq_ring *ring) -{ - return ring->head_ptr - ring->tail_ptr >= ring->size; -} - void mqnic_cq_read_head_ptr(struct mqnic_cq_ring *ring) { ring->head_ptr += (ioread32(ring->hw_head_ptr) - ring->head_ptr) & ring->hw_ptr_mask; diff --git a/modules/mqnic/mqnic_eq.c b/modules/mqnic/mqnic_eq.c index 1f5127b5e..bec464583 100644 --- a/modules/mqnic/mqnic_eq.c +++ b/modules/mqnic/mqnic_eq.c @@ -149,6 +149,11 @@ int mqnic_activate_eq_ring(struct mqnic_eq_ring *ring, struct mqnic_irq *irq) ring->irq = irq; ring->irq_index = irq->index; + ring->head_ptr = 0; + ring->tail_ptr = 0; + + memset(ring->buf, 1, ring->buf_size); + // deactivate queue iowrite32(0, ring->hw_addr + MQNIC_EVENT_QUEUE_ACTIVE_LOG_SIZE_REG); // set base address @@ -157,10 +162,8 @@ int mqnic_activate_eq_ring(struct mqnic_eq_ring *ring, struct mqnic_irq *irq) // set interrupt index iowrite32(ring->irq_index, ring->hw_addr + MQNIC_EVENT_QUEUE_INTERRUPT_INDEX_REG); // set pointers - iowrite32(ring->head_ptr & ring->hw_ptr_mask, - ring->hw_addr + MQNIC_EVENT_QUEUE_HEAD_PTR_REG); - iowrite32(ring->tail_ptr & ring->hw_ptr_mask, - ring->hw_addr + MQNIC_EVENT_QUEUE_TAIL_PTR_REG); + iowrite32(ring->head_ptr & ring->hw_ptr_mask, ring->hw_addr + MQNIC_EVENT_QUEUE_HEAD_PTR_REG); + iowrite32(ring->tail_ptr & ring->hw_ptr_mask, ring->hw_addr + MQNIC_EVENT_QUEUE_TAIL_PTR_REG); // set size and activate queue iowrite32(ilog2(ring->size) | MQNIC_EVENT_QUEUE_ACTIVE_MASK, ring->hw_addr + MQNIC_EVENT_QUEUE_ACTIVE_LOG_SIZE_REG); @@ -188,16 +191,6 @@ void mqnic_deactivate_eq_ring(struct mqnic_eq_ring *ring) ring->active = 0; } -bool mqnic_is_eq_ring_empty(const struct mqnic_eq_ring *ring) -{ - return ring->head_ptr == ring->tail_ptr; -} - -bool mqnic_is_eq_ring_full(const struct mqnic_eq_ring *ring) -{ - return ring->head_ptr - ring->tail_ptr >= ring->size; -} - void mqnic_eq_read_head_ptr(struct mqnic_eq_ring *ring) { ring->head_ptr += (ioread32(ring->hw_head_ptr) - ring->head_ptr) & ring->hw_ptr_mask; @@ -227,14 +220,17 @@ void mqnic_process_eq(struct mqnic_eq_ring *eq_ring) int done = 0; // read head pointer from NIC - mqnic_eq_read_head_ptr(eq_ring); - eq_tail_ptr = eq_ring->tail_ptr; eq_index = eq_tail_ptr & eq_ring->size_mask; - while (eq_ring->head_ptr != eq_tail_ptr) { + while (1) { event = (struct mqnic_event *)(eq_ring->buf + eq_index * eq_ring->stride); + if (!!(event->phase & cpu_to_le32(0x80000000)) == !!(eq_tail_ptr & eq_ring->size)) + break; + + dma_rmb(); + if (event->type == MQNIC_EVENT_TYPE_TX_CPL) { // transmit completion event if (unlikely(le16_to_cpu(event->source) > interface->tx_cpl_queue_count)) { diff --git a/modules/mqnic/mqnic_hw.h b/modules/mqnic/mqnic_hw.h index 84e573af4..397ff4a4d 100644 --- a/modules/mqnic/mqnic_hw.h +++ b/modules/mqnic/mqnic_hw.h @@ -218,31 +218,31 @@ #define MQNIC_RB_RX_QUEUE_MAP_CH_REG_APP_MASK 0x08 #define MQNIC_RB_EVENT_QM_TYPE 0x0000C010 -#define MQNIC_RB_EVENT_QM_VER 0x00000100 +#define MQNIC_RB_EVENT_QM_VER 0x00000200 #define MQNIC_RB_EVENT_QM_REG_OFFSET 0x0C #define MQNIC_RB_EVENT_QM_REG_COUNT 0x10 #define MQNIC_RB_EVENT_QM_REG_STRIDE 0x14 #define MQNIC_RB_TX_QM_TYPE 0x0000C020 -#define MQNIC_RB_TX_QM_VER 0x00000100 +#define MQNIC_RB_TX_QM_VER 0x00000200 #define MQNIC_RB_TX_QM_REG_OFFSET 0x0C #define MQNIC_RB_TX_QM_REG_COUNT 0x10 #define MQNIC_RB_TX_QM_REG_STRIDE 0x14 #define MQNIC_RB_TX_CQM_TYPE 0x0000C030 -#define MQNIC_RB_TX_CQM_VER 0x00000100 +#define MQNIC_RB_TX_CQM_VER 0x00000200 #define MQNIC_RB_TX_CQM_REG_OFFSET 0x0C #define MQNIC_RB_TX_CQM_REG_COUNT 0x10 #define MQNIC_RB_TX_CQM_REG_STRIDE 0x14 #define MQNIC_RB_RX_QM_TYPE 0x0000C021 -#define MQNIC_RB_RX_QM_VER 0x00000100 +#define MQNIC_RB_RX_QM_VER 0x00000200 #define MQNIC_RB_RX_QM_REG_OFFSET 0x0C #define MQNIC_RB_RX_QM_REG_COUNT 0x10 #define MQNIC_RB_RX_QM_REG_STRIDE 0x14 #define MQNIC_RB_RX_CQM_TYPE 0x0000C031 -#define MQNIC_RB_RX_CQM_VER 0x00000100 +#define MQNIC_RB_RX_CQM_VER 0x00000200 #define MQNIC_RB_RX_CQM_REG_OFFSET 0x0C #define MQNIC_RB_RX_CQM_REG_COUNT 0x10 #define MQNIC_RB_RX_CQM_REG_STRIDE 0x14 @@ -361,12 +361,19 @@ struct mqnic_cpl { __u8 rsvd2; __u8 rsvd3; __le32 rsvd4; - __le32 rsvd5; + __le32 phase; }; struct mqnic_event { __le16 type; __le16 source; + __le32 rsvd0; + __le32 rsvd1; + __le32 rsvd2; + __le32 rsvd3; + __le32 rsvd4; + __le32 rsvd5; + __le32 phase; }; #endif /* MQNIC_HW_H */ diff --git a/modules/mqnic/mqnic_rx.c b/modules/mqnic/mqnic_rx.c index 667880e03..6b97b2f30 100644 --- a/modules/mqnic/mqnic_rx.c +++ b/modules/mqnic/mqnic_rx.c @@ -157,6 +157,9 @@ int mqnic_activate_rx_ring(struct mqnic_ring *ring, struct mqnic_priv *priv, cq_ring->src_ring = ring; cq_ring->handler = mqnic_rx_irq; + ring->head_ptr = 0; + ring->tail_ptr = 0; + // deactivate queue iowrite32(0, ring->hw_addr + MQNIC_QUEUE_ACTIVE_LOG_SIZE_REG); // set base address @@ -328,16 +331,17 @@ int mqnic_process_rx_cq(struct mqnic_cq_ring *cq_ring, int napi_budget) return done; // process completion queue - // read head pointer from NIC - mqnic_cq_read_head_ptr(cq_ring); - cq_tail_ptr = cq_ring->tail_ptr; cq_index = cq_tail_ptr & cq_ring->size_mask; - mb(); // is a barrier here necessary? If so, what kind? - - while (cq_ring->head_ptr != cq_tail_ptr && done < budget) { + while (done < budget) { cpl = (struct mqnic_cpl *)(cq_ring->buf + cq_index * cq_ring->stride); + + if (!!(cpl->phase & cpu_to_le32(0x80000000)) == !!(cq_tail_ptr & cq_ring->size)) + break; + + dma_rmb(); + ring_index = le16_to_cpu(cpl->index) & rx_ring->size_mask; rx_info = &rx_ring->rx_info[ring_index]; page = rx_info->page; diff --git a/modules/mqnic/mqnic_tx.c b/modules/mqnic/mqnic_tx.c index 170dfdad5..ebbcc8b27 100644 --- a/modules/mqnic/mqnic_tx.c +++ b/modules/mqnic/mqnic_tx.c @@ -159,6 +159,9 @@ int mqnic_activate_tx_ring(struct mqnic_ring *ring, struct mqnic_priv *priv, cq_ring->src_ring = ring; cq_ring->handler = mqnic_tx_irq; + ring->head_ptr = 0; + ring->tail_ptr = 0; + // deactivate queue iowrite32(0, ring->hw_addr + MQNIC_QUEUE_ACTIVE_LOG_SIZE_REG); // set base address @@ -275,14 +278,17 @@ int mqnic_process_tx_cq(struct mqnic_cq_ring *cq_ring, int napi_budget) netdev_txq_bql_complete_prefetchw(tx_ring->tx_queue); // process completion queue - // read head pointer from NIC - mqnic_cq_read_head_ptr(cq_ring); - cq_tail_ptr = cq_ring->tail_ptr; cq_index = cq_tail_ptr & cq_ring->size_mask; - while (cq_ring->head_ptr != cq_tail_ptr && done < budget) { + while (done < budget) { cpl = (struct mqnic_cpl *)(cq_ring->buf + cq_index * cq_ring->stride); + + if (!!(cpl->phase & cpu_to_le32(0x80000000)) == !!(cq_tail_ptr & cq_ring->size)) + break; + + dma_rmb(); + ring_index = le16_to_cpu(cpl->index) & tx_ring->size_mask; tx_info = &tx_ring->tx_info[ring_index];