diff --git a/example/common/driver/example/example_driver.c b/example/common/driver/example/example_driver.c index 3cd3a6c97..69aa05a9c 100644 --- a/example/common/driver/example/example_driver.c +++ b/example/common/driver/example/example_driver.c @@ -167,9 +167,14 @@ static void dma_block_read_bench(struct example_dev *edev, dma_addr_t dma_addr, u64 size, u64 stride, u64 count) { u64 cycles; + u32 rd_req; + u32 rd_cpl; udelay(5); + rd_req = ioread32(edev->bar[0] + 0x000020); + rd_cpl = ioread32(edev->bar[0] + 0x000024); + dma_block_read(edev, dma_addr, 0, 0x3fff, stride, 0, 0, 0x3fff, stride, size, count); @@ -177,17 +182,23 @@ static void dma_block_read_bench(struct example_dev *edev, udelay(5); - dev_info(edev->dev, "read %lld blocks of %lld bytes (stride %lld) in %lld ns: %lld Mbps", - count, size, stride, cycles * 4, size * count * 8 * 1000 / (cycles * 4)); + rd_req = ioread32(edev->bar[0] + 0x000020) - rd_req; + rd_cpl = ioread32(edev->bar[0] + 0x000024) - rd_cpl; + + dev_info(edev->dev, "read %lld blocks of %lld bytes (stride %lld) in %lld ns (%d req %d cpl): %lld Mbps", + count, size, stride, cycles * 4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4)); } static void dma_block_write_bench(struct example_dev *edev, dma_addr_t dma_addr, u64 size, u64 stride, u64 count) { u64 cycles; + u32 wr_req; udelay(5); + wr_req = ioread32(edev->bar[0] + 0x000028); + dma_block_write(edev, dma_addr, 0, 0x3fff, stride, 0, 0, 0x3fff, stride, size, count); @@ -195,8 +206,10 @@ static void dma_block_write_bench(struct example_dev *edev, udelay(5); - dev_info(edev->dev, "wrote %lld blocks of %lld bytes (stride %lld) in %lld ns: %lld Mbps", - count, size, stride, cycles * 4, size * count * 8 * 1000 / (cycles * 4)); + wr_req = ioread32(edev->bar[0] + 0x000028) - wr_req; + + dev_info(edev->dev, "wrote %lld blocks of %lld bytes (stride %lld) in %lld ns (%d req): %lld Mbps", + count, size, stride, cycles * 4, wr_req, size * count * 8 * 1000 / (cycles * 4)); } static irqreturn_t edev_intr(int irq, void *data) diff --git a/example/common/rtl/example_core.v b/example/common/rtl/example_core.v index 038e37925..6729cbaf0 100644 --- a/example/common/rtl/example_core.v +++ b/example/common/rtl/example_core.v @@ -159,7 +159,10 @@ module example_core # */ output wire dma_enable, input wire dma_rd_busy, - input wire dma_wr_busy + input wire dma_wr_busy, + input wire dma_rd_req, + input wire dma_rd_cpl, + input wire dma_wr_req ); localparam RAM_ADDR_IMM_WIDTH = (DMA_IMM_ENABLE && (DMA_IMM_WIDTH > RAM_ADDR_WIDTH)) ? DMA_IMM_WIDTH : RAM_ADDR_WIDTH; @@ -210,6 +213,9 @@ reg axil_ctrl_rvalid_reg = 1'b0, axil_ctrl_rvalid_next; reg [63:0] cycle_count_reg = 0; reg [15:0] dma_read_active_count_reg = 0; reg [15:0] dma_write_active_count_reg = 0; +reg [31:0] dma_rd_req_count_reg = 0; +reg [31:0] dma_rd_cpl_count_reg = 0; +reg [31:0] dma_wr_req_count_reg = 0; reg [DMA_ADDR_WIDTH-1:0] dma_read_desc_dma_addr_reg = 0, dma_read_desc_dma_addr_next; reg [RAM_ADDR_WIDTH-1:0] dma_read_desc_ram_addr_reg = 0, dma_read_desc_ram_addr_next; @@ -455,8 +461,11 @@ always @* begin end 16'h0010: axil_ctrl_rdata_next = cycle_count_reg; 16'h0014: axil_ctrl_rdata_next = cycle_count_reg >> 32; - 16'h0020: axil_ctrl_rdata_next = dma_read_active_count_reg; - 16'h0028: axil_ctrl_rdata_next = dma_write_active_count_reg; + 16'h0018: axil_ctrl_rdata_next = dma_read_active_count_reg; + 16'h001c: axil_ctrl_rdata_next = dma_write_active_count_reg; + 16'h0020: axil_ctrl_rdata_next = dma_rd_req_count_reg; + 16'h0024: axil_ctrl_rdata_next = dma_rd_cpl_count_reg; + 16'h0028: axil_ctrl_rdata_next = dma_wr_req_count_reg; // single read 16'h0100: axil_ctrl_rdata_next = dma_read_desc_dma_addr_reg; 16'h0104: axil_ctrl_rdata_next = dma_read_desc_dma_addr_reg >> 32; @@ -626,6 +635,10 @@ always @(posedge clk) begin + (m_axis_dma_write_desc_valid && m_axis_dma_write_desc_ready) - s_axis_dma_write_desc_status_valid; + dma_rd_req_count_reg <= dma_rd_req_count_reg + dma_rd_req; + dma_rd_cpl_count_reg <= dma_rd_cpl_count_reg + dma_rd_cpl; + dma_wr_req_count_reg <= dma_wr_req_count_reg + dma_wr_req; + dma_read_desc_dma_addr_reg <= dma_read_desc_dma_addr_next; dma_read_desc_ram_addr_reg <= dma_read_desc_ram_addr_next; dma_read_desc_len_reg <= dma_read_desc_len_next; @@ -690,6 +703,9 @@ always @(posedge clk) begin cycle_count_reg <= 0; dma_read_active_count_reg <= 0; dma_write_active_count_reg <= 0; + dma_rd_req_count_reg <= 0; + dma_rd_cpl_count_reg <= 0; + dma_wr_req_count_reg <= 0; dma_read_desc_valid_reg <= 1'b0; dma_read_desc_status_valid_reg <= 1'b0; diff --git a/example/common/rtl/example_core_pcie.v b/example/common/rtl/example_core_pcie.v index 83b24d419..764e85bd5 100644 --- a/example/common/rtl/example_core_pcie.v +++ b/example/common/rtl/example_core_pcie.v @@ -1121,7 +1121,10 @@ core_inst ( */ .dma_enable(dma_enable), .dma_rd_busy(dma_rd_busy), - .dma_wr_busy(dma_wr_busy) + .dma_wr_busy(dma_wr_busy), + .dma_rd_req(tx_rd_req_tlp_valid && tx_rd_req_tlp_sop && tx_rd_req_tlp_ready), + .dma_rd_cpl(rx_cpl_tlp_valid && rx_cpl_tlp_sop && rx_cpl_tlp_ready), + .dma_wr_req(tx_wr_req_tlp_valid && tx_wr_req_tlp_sop && tx_wr_req_tlp_ready) ); endmodule