From 731bb7f38a8d81ce5b074c6d5fac4e40d098a139 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Mon, 12 Jun 2023 23:53:53 -0700 Subject: [PATCH 01/20] Add RCB to debug info Signed-off-by: Alex Forencich --- example/common/driver/example/example_driver.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/example/common/driver/example/example_driver.c b/example/common/driver/example/example_driver.c index 17d863ad7..8dcd3b40c 100644 --- a/example/common/driver/example/example_driver.c +++ b/example/common/driver/example/example_driver.c @@ -227,16 +227,20 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (pdev->pcie_cap) { u16 devctl; u32 lnkcap; + u16 lnkctl; u16 lnksta; pci_read_config_word(pdev, pdev->pcie_cap + PCI_EXP_DEVCTL, &devctl); pci_read_config_dword(pdev, pdev->pcie_cap + PCI_EXP_LNKCAP, &lnkcap); + pci_read_config_word(pdev, pdev->pcie_cap + PCI_EXP_LNKCTL, &lnkctl); pci_read_config_word(pdev, pdev->pcie_cap + PCI_EXP_LNKSTA, &lnksta); dev_info(dev, " Max payload size: %d bytes", 128 << ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5)); dev_info(dev, " Max read request size: %d bytes", 128 << ((devctl & PCI_EXP_DEVCTL_READRQ) >> 12)); + dev_info(dev, " Read completion boundary: %d bytes", + lnkctl & PCI_EXP_LNKCTL_RCB ? 128 : 64); dev_info(dev, " Link capability: gen %d x%d", lnkcap & PCI_EXP_LNKCAP_SLS, (lnkcap & PCI_EXP_LNKCAP_MLW) >> 4); dev_info(dev, " Link status: gen %d x%d", From b91076f6d356e622da45b7056bd822eb164f75d9 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Tue, 13 Jun 2023 11:28:20 -0700 Subject: [PATCH 02/20] Fix AXIS_PCIE_RQ_USER_WIDTH parameter for US+ devices Signed-off-by: Alex Forencich --- example/ADM_PCIE_9V3/fpga/rtl/fpga.v | 2 +- example/AU200/fpga/rtl/fpga.v | 2 +- example/AU250/fpga/rtl/fpga.v | 2 +- example/AU280/fpga/rtl/fpga.v | 2 +- example/AU50/fpga/rtl/fpga.v | 2 +- example/ExaNIC_X25/fpga/rtl/fpga.v | 2 +- example/VCU118/fpga/rtl/fpga.v | 2 +- example/VCU1525/fpga/rtl/fpga.v | 2 +- example/ZCU106/fpga/rtl/fpga.v | 2 +- example/fb2CG/fpga/rtl/fpga.v | 2 +- 10 files changed, 10 insertions(+), 10 deletions(-) diff --git a/example/ADM_PCIE_9V3/fpga/rtl/fpga.v b/example/ADM_PCIE_9V3/fpga/rtl/fpga.v index d8c59a846..a4bcc4141 100644 --- a/example/ADM_PCIE_9V3/fpga/rtl/fpga.v +++ b/example/ADM_PCIE_9V3/fpga/rtl/fpga.v @@ -54,7 +54,7 @@ module fpga ( parameter AXIS_PCIE_DATA_WIDTH = 512; parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; -parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; +parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; diff --git a/example/AU200/fpga/rtl/fpga.v b/example/AU200/fpga/rtl/fpga.v index 426b56c8c..6d267f5cb 100644 --- a/example/AU200/fpga/rtl/fpga.v +++ b/example/AU200/fpga/rtl/fpga.v @@ -53,7 +53,7 @@ module fpga ( parameter AXIS_PCIE_DATA_WIDTH = 512; parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; -parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; +parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; diff --git a/example/AU250/fpga/rtl/fpga.v b/example/AU250/fpga/rtl/fpga.v index 426b56c8c..6d267f5cb 100644 --- a/example/AU250/fpga/rtl/fpga.v +++ b/example/AU250/fpga/rtl/fpga.v @@ -53,7 +53,7 @@ module fpga ( parameter AXIS_PCIE_DATA_WIDTH = 512; parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; -parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; +parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; diff --git a/example/AU280/fpga/rtl/fpga.v b/example/AU280/fpga/rtl/fpga.v index 52ae618c2..1d9cc0c53 100644 --- a/example/AU280/fpga/rtl/fpga.v +++ b/example/AU280/fpga/rtl/fpga.v @@ -52,7 +52,7 @@ module fpga ( parameter AXIS_PCIE_DATA_WIDTH = 512; parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; -parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; +parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; diff --git a/example/AU50/fpga/rtl/fpga.v b/example/AU50/fpga/rtl/fpga.v index f02aa7984..a45ddfb5c 100644 --- a/example/AU50/fpga/rtl/fpga.v +++ b/example/AU50/fpga/rtl/fpga.v @@ -55,7 +55,7 @@ module fpga ( parameter AXIS_PCIE_DATA_WIDTH = 512; parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; -parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; +parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; diff --git a/example/ExaNIC_X25/fpga/rtl/fpga.v b/example/ExaNIC_X25/fpga/rtl/fpga.v index 30463f830..c59f38be4 100644 --- a/example/ExaNIC_X25/fpga/rtl/fpga.v +++ b/example/ExaNIC_X25/fpga/rtl/fpga.v @@ -54,7 +54,7 @@ module fpga ( parameter AXIS_PCIE_DATA_WIDTH = 256; parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; -parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; +parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; diff --git a/example/VCU118/fpga/rtl/fpga.v b/example/VCU118/fpga/rtl/fpga.v index eceb81788..86a746ce9 100644 --- a/example/VCU118/fpga/rtl/fpga.v +++ b/example/VCU118/fpga/rtl/fpga.v @@ -58,7 +58,7 @@ module fpga ( parameter AXIS_PCIE_DATA_WIDTH = 512; parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; -parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; +parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; diff --git a/example/VCU1525/fpga/rtl/fpga.v b/example/VCU1525/fpga/rtl/fpga.v index 426b56c8c..6d267f5cb 100644 --- a/example/VCU1525/fpga/rtl/fpga.v +++ b/example/VCU1525/fpga/rtl/fpga.v @@ -53,7 +53,7 @@ module fpga ( parameter AXIS_PCIE_DATA_WIDTH = 512; parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; -parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; +parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; diff --git a/example/ZCU106/fpga/rtl/fpga.v b/example/ZCU106/fpga/rtl/fpga.v index 6e9f9e77d..038c1125c 100644 --- a/example/ZCU106/fpga/rtl/fpga.v +++ b/example/ZCU106/fpga/rtl/fpga.v @@ -58,7 +58,7 @@ module fpga ( parameter AXIS_PCIE_DATA_WIDTH = 128; parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; -parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; +parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; diff --git a/example/fb2CG/fpga/rtl/fpga.v b/example/fb2CG/fpga/rtl/fpga.v index 78db5d5e9..04ed89ded 100644 --- a/example/fb2CG/fpga/rtl/fpga.v +++ b/example/fb2CG/fpga/rtl/fpga.v @@ -56,7 +56,7 @@ module fpga ( parameter AXIS_PCIE_DATA_WIDTH = 512; parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; -parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; +parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; From bf51c8b7bb0900a295e5d612c34d77f59eb61d42 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Tue, 13 Jun 2023 15:38:59 -0700 Subject: [PATCH 03/20] Connect DMA engine busy status outputs Signed-off-by: Alex Forencich --- .../common/driver/example/example_driver.c | 10 ++++++++ example/common/rtl/example_core.v | 13 +++++++++- example/common/rtl/example_core_pcie.v | 22 +++++++++++++---- .../test_example_core_pcie.py | 24 +++++++++++++++---- .../test_example_core_pcie_ptile.py | 24 +++++++++++++++---- .../test_example_core_pcie_s10.py | 24 +++++++++++++++---- .../test_example_core_pcie_us.py | 24 +++++++++++++++---- 7 files changed, 118 insertions(+), 23 deletions(-) diff --git a/example/common/driver/example/example_driver.c b/example/common/driver/example/example_driver.c index 8dcd3b40c..3cd3a6c97 100644 --- a/example/common/driver/example/example_driver.c +++ b/example/common/driver/example/example_driver.c @@ -103,6 +103,8 @@ static void dma_block_read(struct example_dev *edev, if ((ioread32(edev->bar[0] + 0x001000) & 1) != 0) dev_warn(edev->dev, "%s: operation timed out", __func__); + if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0) + dev_warn(edev->dev, "%s: DMA engine busy", __func__); } static void dma_block_write(struct example_dev *edev, @@ -157,6 +159,8 @@ static void dma_block_write(struct example_dev *edev, if ((ioread32(edev->bar[0] + 0x001100) & 1) != 0) dev_warn(edev->dev, "%s: operation timed out", __func__); + if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0) + dev_warn(edev->dev, "%s: DMA engine busy", __func__); } static void dma_block_read_bench(struct example_dev *edev, @@ -365,6 +369,7 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent) msleep(1); dev_info(dev, "Read status"); + dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000000)); dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000118)); dev_info(dev, "start copy to host"); @@ -378,6 +383,7 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent) msleep(1); dev_info(dev, "Read status"); + dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000000)); dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000218)); dev_info(dev, "read test data"); @@ -402,6 +408,7 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent) msleep(1); dev_info(dev, "Read status"); + dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000000)); dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000218)); dev_info(dev, "read data"); @@ -436,6 +443,9 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent) } } + dev_info(dev, "Read status"); + dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000000)); + // probe complete return 0; diff --git a/example/common/rtl/example_core.v b/example/common/rtl/example_core.v index e0ae1578c..038e37925 100644 --- a/example/common/rtl/example_core.v +++ b/example/common/rtl/example_core.v @@ -152,7 +152,14 @@ module example_core # */ output wire [IRQ_INDEX_WIDTH-1:0] irq_index, output wire irq_valid, - input wire irq_ready + input wire irq_ready, + + /* + * Control and status + */ + output wire dma_enable, + input wire dma_rd_busy, + input wire dma_wr_busy ); localparam RAM_ADDR_IMM_WIDTH = (DMA_IMM_ENABLE && (DMA_IMM_WIDTH > RAM_ADDR_WIDTH)) ? DMA_IMM_WIDTH : RAM_ADDR_WIDTH; @@ -284,6 +291,8 @@ assign m_axis_dma_write_desc_valid = dma_write_desc_valid_reg; assign irq_index = 0; assign irq_valid = irq_valid_reg; +assign dma_enable = dma_enable_reg; + always @* begin axil_ctrl_awready_next = 1'b0; axil_ctrl_wready_next = 1'b0; @@ -437,6 +446,8 @@ always @* begin // control 16'h0000: begin axil_ctrl_rdata_next[0] = dma_enable_reg; + axil_ctrl_rdata_next[8] = dma_wr_busy; + axil_ctrl_rdata_next[9] = dma_rd_busy; end 16'h0008: begin axil_ctrl_rdata_next[0] = dma_rd_int_en_reg; diff --git a/example/common/rtl/example_core_pcie.v b/example/common/rtl/example_core_pcie.v index db710450c..83b24d419 100644 --- a/example/common/rtl/example_core_pcie.v +++ b/example/common/rtl/example_core_pcie.v @@ -345,6 +345,11 @@ wire [IRQ_INDEX_WIDTH-1:0] irq_index; wire irq_valid; wire irq_ready; +// Control and status +wire dma_enable; +wire dma_rd_busy; +wire dma_wr_busy; + pcie_tlp_demux_bar #( .PORTS(3), .TLP_DATA_WIDTH(TLP_DATA_WIDTH), @@ -900,8 +905,8 @@ dma_if_pcie_inst ( /* * Configuration */ - .read_enable(1'b1), - .write_enable(1'b1), + .read_enable(dma_enable), + .write_enable(dma_enable), .ext_tag_enable(ext_tag_enable), .rcb_128b(rcb_128b), .requester_id({bus_num, 5'd0, 3'd0}), @@ -911,8 +916,8 @@ dma_if_pcie_inst ( /* * Status */ - .status_rd_busy(), - .status_wr_busy(), + .status_rd_busy(dma_rd_busy), + .status_wr_busy(dma_wr_busy), .status_error_cor(status_error_cor_int[3]), .status_error_uncor(status_error_uncor_int[3]) ); @@ -1109,7 +1114,14 @@ core_inst ( */ .irq_index(irq_index), .irq_valid(irq_valid), - .irq_ready(irq_ready) + .irq_ready(irq_ready), + + /* + * Control and status + */ + .dma_enable(dma_enable), + .dma_rd_busy(dma_rd_busy), + .dma_wr_busy(dma_wr_busy) ); endmodule diff --git a/example/common/tb/example_core_pcie/test_example_core_pcie.py b/example/common/tb/example_core_pcie/test_example_core_pcie.py index 70f1b9813..2d4634c54 100644 --- a/example/common/tb/example_core_pcie/test_example_core_pcie.py +++ b/example/common/tb/example_core_pcie/test_example_core_pcie.py @@ -224,6 +224,8 @@ async def run_test(dut): await Timer(2000, 'ns') # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) val = await dev_pf0_bar0.read_dword(0x000118) tb.log.info("Status: 0x%x", val) assert val == 0x800000AA @@ -238,6 +240,8 @@ async def run_test(dut): await Timer(2000, 'ns') # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) assert val == 0x80000055 @@ -258,6 +262,8 @@ async def run_test(dut): await Timer(2000, 'ns') # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) assert val == 0x800000AA @@ -321,11 +327,15 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x001000, 1) for k in range(10): - cnt = await dev_pf0_bar0.read_dword(0x001018) await Timer(1000, 'ns') - if cnt == 0: + run = await dev_pf0_bar0.read_dword(0x001000) + if run == 0: break + # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) + # configure operation (write) # DMA base address await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) @@ -363,11 +373,17 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x001100, 1) for k in range(10): - cnt = await dev_pf0_bar0.read_dword(0x001118) await Timer(1000, 'ns') - if cnt == 0: + run = await dev_pf0_bar0.read_dword(0x001100) + if run == 0: break + # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) + + assert status & 0x300 == 0 + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] diff --git a/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py b/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py index b82024cc6..fddffe5ee 100644 --- a/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py +++ b/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py @@ -309,6 +309,8 @@ async def run_test(dut): await Timer(2000, 'ns') # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) val = await dev_pf0_bar0.read_dword(0x000118) tb.log.info("Status: 0x%x", val) assert val == 0x800000AA @@ -323,6 +325,8 @@ async def run_test(dut): await Timer(2000, 'ns') # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) assert val == 0x80000055 @@ -343,6 +347,8 @@ async def run_test(dut): await Timer(2000, 'ns') # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) assert val == 0x800000AA @@ -406,11 +412,15 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x001000, 1) for k in range(10): - cnt = await dev_pf0_bar0.read_dword(0x001018) await Timer(1000, 'ns') - if cnt == 0: + run = await dev_pf0_bar0.read_dword(0x001000) + if run == 0: break + # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) + # configure operation (write) # DMA base address await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) @@ -448,11 +458,17 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x001100, 1) for k in range(10): - cnt = await dev_pf0_bar0.read_dword(0x001118) await Timer(1000, 'ns') - if cnt == 0: + run = await dev_pf0_bar0.read_dword(0x001100) + if run == 0: break + # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) + + assert status & 0x300 == 0 + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] diff --git a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py index b74f4a58e..59929ddbf 100644 --- a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py +++ b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py @@ -257,6 +257,8 @@ async def run_test(dut): await Timer(2000, 'ns') # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) val = await dev_pf0_bar0.read_dword(0x000118) tb.log.info("Status: 0x%x", val) assert val == 0x800000AA @@ -271,6 +273,8 @@ async def run_test(dut): await Timer(2000, 'ns') # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) assert val == 0x80000055 @@ -291,6 +295,8 @@ async def run_test(dut): await Timer(2000, 'ns') # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) assert val == 0x800000AA @@ -354,11 +360,15 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x001000, 1) for k in range(10): - cnt = await dev_pf0_bar0.read_dword(0x001018) await Timer(1000, 'ns') - if cnt == 0: + run = await dev_pf0_bar0.read_dword(0x001000) + if run == 0: break + # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) + # configure operation (write) # DMA base address await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) @@ -396,11 +406,17 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x001100, 1) for k in range(10): - cnt = await dev_pf0_bar0.read_dword(0x001118) await Timer(1000, 'ns') - if cnt == 0: + run = await dev_pf0_bar0.read_dword(0x001100) + if run == 0: break + # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) + + assert status & 0x300 == 0 + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] diff --git a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py index e728be19c..a57d9993e 100644 --- a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py +++ b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py @@ -350,6 +350,8 @@ async def run_test(dut): await Timer(2000, 'ns') # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) val = await dev_pf0_bar0.read_dword(0x000118) tb.log.info("Status: 0x%x", val) assert val == 0x800000AA @@ -364,6 +366,8 @@ async def run_test(dut): await Timer(2000, 'ns') # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) assert val == 0x80000055 @@ -384,6 +388,8 @@ async def run_test(dut): await Timer(2000, 'ns') # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) val = await dev_pf0_bar0.read_dword(0x000218) tb.log.info("Status: 0x%x", val) assert val == 0x800000AA @@ -447,11 +453,15 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x001000, 1) for k in range(10): - cnt = await dev_pf0_bar0.read_dword(0x001018) await Timer(1000, 'ns') - if cnt == 0: + run = await dev_pf0_bar0.read_dword(0x001000) + if run == 0: break + # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) + # configure operation (write) # DMA base address await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) @@ -489,12 +499,16 @@ async def run_test(dut): await dev_pf0_bar0.write_dword(0x001100, 1) for k in range(10): - cnt = await dev_pf0_bar0.read_dword(0x001118) await Timer(1000, 'ns') - if cnt == 0: + run = await dev_pf0_bar0.read_dword(0x001100) + if run == 0: break - await Timer(2000, 'ns') + # read status + status = await dev_pf0_bar0.read_dword(0x000000) + tb.log.info("DMA Status: 0x%x", status) + + assert status & 0x300 == 0 tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) From 9536554c5ab33fd3f7f6431ee3ea40839e2bd71a Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Tue, 13 Jun 2023 15:41:10 -0700 Subject: [PATCH 04/20] Add request and completion counters Signed-off-by: Alex Forencich --- .../common/driver/example/example_driver.c | 21 ++++++++++++++---- example/common/rtl/example_core.v | 22 ++++++++++++++++--- example/common/rtl/example_core_pcie.v | 5 ++++- 3 files changed, 40 insertions(+), 8 deletions(-) diff --git a/example/common/driver/example/example_driver.c b/example/common/driver/example/example_driver.c index 3cd3a6c97..69aa05a9c 100644 --- a/example/common/driver/example/example_driver.c +++ b/example/common/driver/example/example_driver.c @@ -167,9 +167,14 @@ static void dma_block_read_bench(struct example_dev *edev, dma_addr_t dma_addr, u64 size, u64 stride, u64 count) { u64 cycles; + u32 rd_req; + u32 rd_cpl; udelay(5); + rd_req = ioread32(edev->bar[0] + 0x000020); + rd_cpl = ioread32(edev->bar[0] + 0x000024); + dma_block_read(edev, dma_addr, 0, 0x3fff, stride, 0, 0, 0x3fff, stride, size, count); @@ -177,17 +182,23 @@ static void dma_block_read_bench(struct example_dev *edev, udelay(5); - dev_info(edev->dev, "read %lld blocks of %lld bytes (stride %lld) in %lld ns: %lld Mbps", - count, size, stride, cycles * 4, size * count * 8 * 1000 / (cycles * 4)); + rd_req = ioread32(edev->bar[0] + 0x000020) - rd_req; + rd_cpl = ioread32(edev->bar[0] + 0x000024) - rd_cpl; + + dev_info(edev->dev, "read %lld blocks of %lld bytes (stride %lld) in %lld ns (%d req %d cpl): %lld Mbps", + count, size, stride, cycles * 4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4)); } static void dma_block_write_bench(struct example_dev *edev, dma_addr_t dma_addr, u64 size, u64 stride, u64 count) { u64 cycles; + u32 wr_req; udelay(5); + wr_req = ioread32(edev->bar[0] + 0x000028); + dma_block_write(edev, dma_addr, 0, 0x3fff, stride, 0, 0, 0x3fff, stride, size, count); @@ -195,8 +206,10 @@ static void dma_block_write_bench(struct example_dev *edev, udelay(5); - dev_info(edev->dev, "wrote %lld blocks of %lld bytes (stride %lld) in %lld ns: %lld Mbps", - count, size, stride, cycles * 4, size * count * 8 * 1000 / (cycles * 4)); + wr_req = ioread32(edev->bar[0] + 0x000028) - wr_req; + + dev_info(edev->dev, "wrote %lld blocks of %lld bytes (stride %lld) in %lld ns (%d req): %lld Mbps", + count, size, stride, cycles * 4, wr_req, size * count * 8 * 1000 / (cycles * 4)); } static irqreturn_t edev_intr(int irq, void *data) diff --git a/example/common/rtl/example_core.v b/example/common/rtl/example_core.v index 038e37925..6729cbaf0 100644 --- a/example/common/rtl/example_core.v +++ b/example/common/rtl/example_core.v @@ -159,7 +159,10 @@ module example_core # */ output wire dma_enable, input wire dma_rd_busy, - input wire dma_wr_busy + input wire dma_wr_busy, + input wire dma_rd_req, + input wire dma_rd_cpl, + input wire dma_wr_req ); localparam RAM_ADDR_IMM_WIDTH = (DMA_IMM_ENABLE && (DMA_IMM_WIDTH > RAM_ADDR_WIDTH)) ? DMA_IMM_WIDTH : RAM_ADDR_WIDTH; @@ -210,6 +213,9 @@ reg axil_ctrl_rvalid_reg = 1'b0, axil_ctrl_rvalid_next; reg [63:0] cycle_count_reg = 0; reg [15:0] dma_read_active_count_reg = 0; reg [15:0] dma_write_active_count_reg = 0; +reg [31:0] dma_rd_req_count_reg = 0; +reg [31:0] dma_rd_cpl_count_reg = 0; +reg [31:0] dma_wr_req_count_reg = 0; reg [DMA_ADDR_WIDTH-1:0] dma_read_desc_dma_addr_reg = 0, dma_read_desc_dma_addr_next; reg [RAM_ADDR_WIDTH-1:0] dma_read_desc_ram_addr_reg = 0, dma_read_desc_ram_addr_next; @@ -455,8 +461,11 @@ always @* begin end 16'h0010: axil_ctrl_rdata_next = cycle_count_reg; 16'h0014: axil_ctrl_rdata_next = cycle_count_reg >> 32; - 16'h0020: axil_ctrl_rdata_next = dma_read_active_count_reg; - 16'h0028: axil_ctrl_rdata_next = dma_write_active_count_reg; + 16'h0018: axil_ctrl_rdata_next = dma_read_active_count_reg; + 16'h001c: axil_ctrl_rdata_next = dma_write_active_count_reg; + 16'h0020: axil_ctrl_rdata_next = dma_rd_req_count_reg; + 16'h0024: axil_ctrl_rdata_next = dma_rd_cpl_count_reg; + 16'h0028: axil_ctrl_rdata_next = dma_wr_req_count_reg; // single read 16'h0100: axil_ctrl_rdata_next = dma_read_desc_dma_addr_reg; 16'h0104: axil_ctrl_rdata_next = dma_read_desc_dma_addr_reg >> 32; @@ -626,6 +635,10 @@ always @(posedge clk) begin + (m_axis_dma_write_desc_valid && m_axis_dma_write_desc_ready) - s_axis_dma_write_desc_status_valid; + dma_rd_req_count_reg <= dma_rd_req_count_reg + dma_rd_req; + dma_rd_cpl_count_reg <= dma_rd_cpl_count_reg + dma_rd_cpl; + dma_wr_req_count_reg <= dma_wr_req_count_reg + dma_wr_req; + dma_read_desc_dma_addr_reg <= dma_read_desc_dma_addr_next; dma_read_desc_ram_addr_reg <= dma_read_desc_ram_addr_next; dma_read_desc_len_reg <= dma_read_desc_len_next; @@ -690,6 +703,9 @@ always @(posedge clk) begin cycle_count_reg <= 0; dma_read_active_count_reg <= 0; dma_write_active_count_reg <= 0; + dma_rd_req_count_reg <= 0; + dma_rd_cpl_count_reg <= 0; + dma_wr_req_count_reg <= 0; dma_read_desc_valid_reg <= 1'b0; dma_read_desc_status_valid_reg <= 1'b0; diff --git a/example/common/rtl/example_core_pcie.v b/example/common/rtl/example_core_pcie.v index 83b24d419..764e85bd5 100644 --- a/example/common/rtl/example_core_pcie.v +++ b/example/common/rtl/example_core_pcie.v @@ -1121,7 +1121,10 @@ core_inst ( */ .dma_enable(dma_enable), .dma_rd_busy(dma_rd_busy), - .dma_wr_busy(dma_wr_busy) + .dma_wr_busy(dma_wr_busy), + .dma_rd_req(tx_rd_req_tlp_valid && tx_rd_req_tlp_sop && tx_rd_req_tlp_ready), + .dma_rd_cpl(rx_cpl_tlp_valid && rx_cpl_tlp_sop && rx_cpl_tlp_ready), + .dma_wr_req(tx_wr_req_tlp_valid && tx_wr_req_tlp_sop && tx_wr_req_tlp_ready) ); endmodule From ca655ca9fb9963630567dd62d63e17dbf7832894 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Fri, 16 Jun 2023 16:55:42 -0700 Subject: [PATCH 05/20] Update example designs based on results of buffer size tests Signed-off-by: Alex Forencich --- example/ADM_PCIE_9V3/fpga/rtl/fpga_core.v | 7 +++---- example/AU200/fpga/rtl/fpga_core.v | 7 +++---- example/AU250/fpga/rtl/fpga_core.v | 7 +++---- example/AU280/fpga/rtl/fpga_core.v | 7 +++---- example/AU50/fpga/rtl/fpga_core.v | 7 +++---- example/ExaNIC_X10/fpga/rtl/fpga_core.v | 5 ++--- example/ExaNIC_X25/fpga/rtl/fpga_core.v | 7 +++---- example/VCU108/fpga/rtl/fpga_core.v | 5 ++--- example/VCU118/fpga/rtl/fpga_core.v | 7 +++---- example/VCU1525/fpga/rtl/fpga_core.v | 7 +++---- example/ZCU106/fpga/rtl/fpga_core.v | 7 +++---- example/common/rtl/example_core_pcie_us.v | 4 ++-- example/common/tb/example_core_pcie_us/Makefile | 4 ++-- .../tb/example_core_pcie_us/test_example_core_pcie_us.py | 4 ++-- example/fb2CG/fpga/rtl/fpga_core.v | 7 +++---- 15 files changed, 40 insertions(+), 52 deletions(-) diff --git a/example/ADM_PCIE_9V3/fpga/rtl/fpga_core.v b/example/ADM_PCIE_9V3/fpga/rtl/fpga_core.v index 78f28f9a0..fb1c82576 100644 --- a/example/ADM_PCIE_9V3/fpga/rtl/fpga_core.v +++ b/example/ADM_PCIE_9V3/fpga/rtl/fpga_core.v @@ -159,8 +159,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), - .READ_CPLH_FC_LIMIT(128), - .READ_CPLD_FC_LIMIT(2048), + .READ_CPLH_FC_LIMIT(256), + .READ_CPLD_FC_LIMIT(2048-256), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -263,8 +263,7 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), - // .cfg_rcb_status(cfg_rcb_status), - .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP + .cfg_rcb_status(cfg_rcb_status), /* * Status diff --git a/example/AU200/fpga/rtl/fpga_core.v b/example/AU200/fpga/rtl/fpga_core.v index 39d8ab260..68701b8c8 100644 --- a/example/AU200/fpga/rtl/fpga_core.v +++ b/example/AU200/fpga/rtl/fpga_core.v @@ -156,8 +156,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), - .READ_CPLH_FC_LIMIT(128), - .READ_CPLD_FC_LIMIT(2048), + .READ_CPLH_FC_LIMIT(256), + .READ_CPLD_FC_LIMIT(2048-256), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -260,8 +260,7 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), - // .cfg_rcb_status(cfg_rcb_status), - .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP + .cfg_rcb_status(cfg_rcb_status), /* * Status diff --git a/example/AU250/fpga/rtl/fpga_core.v b/example/AU250/fpga/rtl/fpga_core.v index 39d8ab260..68701b8c8 100644 --- a/example/AU250/fpga/rtl/fpga_core.v +++ b/example/AU250/fpga/rtl/fpga_core.v @@ -156,8 +156,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), - .READ_CPLH_FC_LIMIT(128), - .READ_CPLD_FC_LIMIT(2048), + .READ_CPLH_FC_LIMIT(256), + .READ_CPLD_FC_LIMIT(2048-256), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -260,8 +260,7 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), - // .cfg_rcb_status(cfg_rcb_status), - .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP + .cfg_rcb_status(cfg_rcb_status), /* * Status diff --git a/example/AU280/fpga/rtl/fpga_core.v b/example/AU280/fpga/rtl/fpga_core.v index 644ee09fd..ed244292b 100644 --- a/example/AU280/fpga/rtl/fpga_core.v +++ b/example/AU280/fpga/rtl/fpga_core.v @@ -148,8 +148,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), - .READ_CPLH_FC_LIMIT(128), - .READ_CPLD_FC_LIMIT(2048), + .READ_CPLH_FC_LIMIT(256), + .READ_CPLD_FC_LIMIT(2048-256), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -252,8 +252,7 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), - // .cfg_rcb_status(cfg_rcb_status), - .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP + .cfg_rcb_status(cfg_rcb_status), /* * Status diff --git a/example/AU50/fpga/rtl/fpga_core.v b/example/AU50/fpga/rtl/fpga_core.v index 1fff150c5..75875b0b6 100644 --- a/example/AU50/fpga/rtl/fpga_core.v +++ b/example/AU50/fpga/rtl/fpga_core.v @@ -159,8 +159,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), - .READ_CPLH_FC_LIMIT(128), - .READ_CPLD_FC_LIMIT(2048), + .READ_CPLH_FC_LIMIT(256), + .READ_CPLD_FC_LIMIT(2048-256), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -263,8 +263,7 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), - // .cfg_rcb_status(cfg_rcb_status), - .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP + .cfg_rcb_status(cfg_rcb_status), /* * Status diff --git a/example/ExaNIC_X10/fpga/rtl/fpga_core.v b/example/ExaNIC_X10/fpga/rtl/fpga_core.v index d33065d87..c0078c3b7 100644 --- a/example/ExaNIC_X10/fpga/rtl/fpga_core.v +++ b/example/ExaNIC_X10/fpga/rtl/fpga_core.v @@ -155,7 +155,7 @@ example_core_pcie_us #( .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .READ_CPLH_FC_LIMIT(64), - .READ_CPLD_FC_LIMIT(992), + .READ_CPLD_FC_LIMIT(1024-64), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -258,8 +258,7 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), - // .cfg_rcb_status(cfg_rcb_status), - .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US PCIe HIP + .cfg_rcb_status(cfg_rcb_status), /* * Status diff --git a/example/ExaNIC_X25/fpga/rtl/fpga_core.v b/example/ExaNIC_X25/fpga/rtl/fpga_core.v index 08a1bef83..7617c4390 100644 --- a/example/ExaNIC_X25/fpga/rtl/fpga_core.v +++ b/example/ExaNIC_X25/fpga/rtl/fpga_core.v @@ -159,8 +159,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), - .READ_CPLH_FC_LIMIT(128), - .READ_CPLD_FC_LIMIT(2048), + .READ_CPLH_FC_LIMIT(256), + .READ_CPLD_FC_LIMIT(2048-256), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -263,8 +263,7 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), - // .cfg_rcb_status(cfg_rcb_status), - .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP + .cfg_rcb_status(cfg_rcb_status), /* * Status diff --git a/example/VCU108/fpga/rtl/fpga_core.v b/example/VCU108/fpga/rtl/fpga_core.v index 510ea1bba..a6b213963 100644 --- a/example/VCU108/fpga/rtl/fpga_core.v +++ b/example/VCU108/fpga/rtl/fpga_core.v @@ -157,7 +157,7 @@ example_core_pcie_us #( .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .READ_CPLH_FC_LIMIT(64), - .READ_CPLD_FC_LIMIT(992), + .READ_CPLD_FC_LIMIT(1024-64), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -260,8 +260,7 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), - // .cfg_rcb_status(cfg_rcb_status), - .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US PCIe HIP + .cfg_rcb_status(cfg_rcb_status), /* * Status diff --git a/example/VCU118/fpga/rtl/fpga_core.v b/example/VCU118/fpga/rtl/fpga_core.v index 573ce020f..10d0c665e 100644 --- a/example/VCU118/fpga/rtl/fpga_core.v +++ b/example/VCU118/fpga/rtl/fpga_core.v @@ -161,8 +161,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), - .READ_CPLH_FC_LIMIT(128), - .READ_CPLD_FC_LIMIT(2048), + .READ_CPLH_FC_LIMIT(256), + .READ_CPLD_FC_LIMIT(2048-256), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -265,8 +265,7 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), - // .cfg_rcb_status(cfg_rcb_status), - .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP + .cfg_rcb_status(cfg_rcb_status), /* * Status diff --git a/example/VCU1525/fpga/rtl/fpga_core.v b/example/VCU1525/fpga/rtl/fpga_core.v index 39d8ab260..68701b8c8 100644 --- a/example/VCU1525/fpga/rtl/fpga_core.v +++ b/example/VCU1525/fpga/rtl/fpga_core.v @@ -156,8 +156,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), - .READ_CPLH_FC_LIMIT(128), - .READ_CPLD_FC_LIMIT(2048), + .READ_CPLH_FC_LIMIT(256), + .READ_CPLD_FC_LIMIT(2048-256), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -260,8 +260,7 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), - // .cfg_rcb_status(cfg_rcb_status), - .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP + .cfg_rcb_status(cfg_rcb_status), /* * Status diff --git a/example/ZCU106/fpga/rtl/fpga_core.v b/example/ZCU106/fpga/rtl/fpga_core.v index b3879c240..e7581c496 100644 --- a/example/ZCU106/fpga/rtl/fpga_core.v +++ b/example/ZCU106/fpga/rtl/fpga_core.v @@ -161,8 +161,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), - .READ_CPLH_FC_LIMIT(128), - .READ_CPLD_FC_LIMIT(2048), + .READ_CPLH_FC_LIMIT(256), + .READ_CPLD_FC_LIMIT(2048-256), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -265,8 +265,7 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), - // .cfg_rcb_status(cfg_rcb_status), - .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP + .cfg_rcb_status(cfg_rcb_status), /* * Status diff --git a/example/common/rtl/example_core_pcie_us.v b/example/common/rtl/example_core_pcie_us.v index c8fe2cfcf..e79602735 100644 --- a/example/common/rtl/example_core_pcie_us.v +++ b/example/common/rtl/example_core_pcie_us.v @@ -68,9 +68,9 @@ module example_core_pcie_us # // In-flight transmit limit (read) parameter READ_TX_LIMIT = 2**(RQ_SEQ_NUM_WIDTH-1), // Completion header flow control credit limit (read) - parameter READ_CPLH_FC_LIMIT = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 64 : 128, + parameter READ_CPLH_FC_LIMIT = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 64 : 256, // Completion data flow control credit limit (read) - parameter READ_CPLD_FC_LIMIT = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 992 : 2048, + parameter READ_CPLD_FC_LIMIT = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 1024-64 : 2048-256, // Operation table size (write) parameter WRITE_OP_TABLE_SIZE = 2**(RQ_SEQ_NUM_WIDTH-1), // In-flight transmit limit (write) diff --git a/example/common/tb/example_core_pcie_us/Makefile b/example/common/tb/example_core_pcie_us/Makefile index 4f6ced3e1..fe6923cb2 100644 --- a/example/common/tb/example_core_pcie_us/Makefile +++ b/example/common/tb/example_core_pcie_us/Makefile @@ -74,8 +74,8 @@ export PARAM_IMM_ENABLE := 1 export PARAM_IMM_WIDTH := 32 export PARAM_READ_OP_TABLE_SIZE := $(PARAM_PCIE_TAG_COUNT) export PARAM_READ_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" ) -export PARAM_READ_CPLH_FC_LIMIT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),64,128) -export PARAM_READ_CPLD_FC_LIMIT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),992,2048) +export PARAM_READ_CPLH_FC_LIMIT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),256,64) +export PARAM_READ_CPLD_FC_LIMIT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),1792,960) export PARAM_WRITE_OP_TABLE_SIZE := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" ) export PARAM_WRITE_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" ) export PARAM_BAR0_APERTURE := 24 diff --git a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py index a57d9993e..b55dbfad0 100644 --- a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py +++ b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py @@ -580,8 +580,8 @@ def test_example_core_pcie_us(request, axis_pcie_data_width, straddle): parameters['IMM_WIDTH'] = 32 parameters['READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT'] parameters['READ_TX_LIMIT'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1) - parameters['READ_CPLH_FC_LIMIT'] = 64 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 128 - parameters['READ_CPLD_FC_LIMIT'] = 992 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 2048 + parameters['READ_CPLH_FC_LIMIT'] = 64 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 256 + parameters['READ_CPLD_FC_LIMIT'] = 1024-64 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 2048-256 parameters['WRITE_OP_TABLE_SIZE'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1) parameters['WRITE_TX_LIMIT'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1) parameters['BAR0_APERTURE'] = 24 diff --git a/example/fb2CG/fpga/rtl/fpga_core.v b/example/fb2CG/fpga/rtl/fpga_core.v index 10f534459..aa479324a 100644 --- a/example/fb2CG/fpga/rtl/fpga_core.v +++ b/example/fb2CG/fpga/rtl/fpga_core.v @@ -161,8 +161,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), - .READ_CPLH_FC_LIMIT(128), - .READ_CPLD_FC_LIMIT(2048), + .READ_CPLH_FC_LIMIT(256), + .READ_CPLD_FC_LIMIT(2048-256), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -265,8 +265,7 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), - // .cfg_rcb_status(cfg_rcb_status), - .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP + .cfg_rcb_status(cfg_rcb_status), /* * Status From 1b2140a8495d79cd6462435f0ed55378691c74f2 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Mon, 19 Jun 2023 13:13:52 -0700 Subject: [PATCH 06/20] Add RX completion stall feature to example design for testing completion buffer Signed-off-by: Alex Forencich --- example/common/rtl/example_core.v | 22 +++++++++++++++++++- example/common/rtl/example_core_pcie.v | 10 +++++++-- example/common/rtl/example_core_pcie_ptile.v | 15 +++++++++++-- example/common/rtl/example_core_pcie_s10.v | 15 +++++++++++-- example/common/rtl/example_core_pcie_us.v | 19 ++++++++++++++--- 5 files changed, 71 insertions(+), 10 deletions(-) diff --git a/example/common/rtl/example_core.v b/example/common/rtl/example_core.v index 6729cbaf0..8ac66add8 100644 --- a/example/common/rtl/example_core.v +++ b/example/common/rtl/example_core.v @@ -162,7 +162,8 @@ module example_core # input wire dma_wr_busy, input wire dma_rd_req, input wire dma_rd_cpl, - input wire dma_wr_req + input wire dma_wr_req, + output wire rx_cpl_stall ); localparam RAM_ADDR_IMM_WIDTH = (DMA_IMM_ENABLE && (DMA_IMM_WIDTH > RAM_ADDR_WIDTH)) ? DMA_IMM_WIDTH : RAM_ADDR_WIDTH; @@ -243,6 +244,9 @@ reg dma_rd_int_en_reg = 0, dma_rd_int_en_next; reg dma_wr_int_en_reg = 0, dma_wr_int_en_next; reg irq_valid_reg = 1'b0, irq_valid_next; +reg rx_cpl_stall_reg = 1'b0, rx_cpl_stall_next; +reg [23:0] rx_cpl_stall_count_reg = 0, rx_cpl_stall_count_next; + reg dma_read_block_run_reg = 1'b0, dma_read_block_run_next; reg [DMA_LEN_WIDTH-1:0] dma_read_block_len_reg = 0, dma_read_block_len_next; reg [31:0] dma_read_block_count_reg = 0, dma_read_block_count_next; @@ -298,6 +302,7 @@ assign irq_index = 0; assign irq_valid = irq_valid_reg; assign dma_enable = dma_enable_reg; +assign rx_cpl_stall = rx_cpl_stall_reg; always @* begin axil_ctrl_awready_next = 1'b0; @@ -337,6 +342,9 @@ always @* begin irq_valid_next = irq_valid_reg && !irq_ready; + rx_cpl_stall_next = 1'b0; + rx_cpl_stall_count_next = rx_cpl_stall_count_reg; + dma_read_block_run_next = dma_read_block_run_reg; dma_read_block_len_next = dma_read_block_len_reg; dma_read_block_count_next = dma_read_block_count_reg; @@ -363,6 +371,11 @@ always @* begin dma_write_block_ram_offset_mask_next = dma_write_block_ram_offset_mask_reg; dma_write_block_ram_stride_next = dma_write_block_ram_stride_reg; + if (rx_cpl_stall_count_reg) begin + rx_cpl_stall_count_next = rx_cpl_stall_count_reg - 1; + rx_cpl_stall_next = 1'b1; + end + if (s_axil_ctrl_awvalid && s_axil_ctrl_wvalid && !axil_ctrl_bvalid_reg) begin // write operation axil_ctrl_awready_next = 1'b1; @@ -379,6 +392,7 @@ always @* begin dma_rd_int_en_next = s_axil_ctrl_wdata[0]; dma_wr_int_en_next = s_axil_ctrl_wdata[1]; end + 16'h0040: rx_cpl_stall_count_next = s_axil_ctrl_wdata; // single read 16'h0100: dma_read_desc_dma_addr_next[31:0] = s_axil_ctrl_wdata; 16'h0104: dma_read_desc_dma_addr_next[63:32] = s_axil_ctrl_wdata; @@ -466,6 +480,7 @@ always @* begin 16'h0020: axil_ctrl_rdata_next = dma_rd_req_count_reg; 16'h0024: axil_ctrl_rdata_next = dma_rd_cpl_count_reg; 16'h0028: axil_ctrl_rdata_next = dma_wr_req_count_reg; + 16'h0040: axil_ctrl_rdata_next = rx_cpl_stall_count_reg; // single read 16'h0100: axil_ctrl_rdata_next = dma_read_desc_dma_addr_reg; 16'h0104: axil_ctrl_rdata_next = dma_read_desc_dma_addr_reg >> 32; @@ -667,6 +682,9 @@ always @(posedge clk) begin irq_valid_reg <= irq_valid_next; + rx_cpl_stall_reg <= rx_cpl_stall_next; + rx_cpl_stall_count_reg <= rx_cpl_stall_count_next; + dma_read_block_run_reg <= dma_read_block_run_next; dma_read_block_len_reg <= dma_read_block_len_next; dma_read_block_count_reg <= dma_read_block_count_next; @@ -715,6 +733,8 @@ always @(posedge clk) begin dma_rd_int_en_reg <= 1'b0; dma_wr_int_en_reg <= 1'b0; irq_valid_reg <= 1'b0; + rx_cpl_stall_reg <= 1'b0; + rx_cpl_stall_count_reg <= 0; dma_read_block_run_reg <= 1'b0; dma_write_block_run_reg <= 1'b0; end diff --git a/example/common/rtl/example_core_pcie.v b/example/common/rtl/example_core_pcie.v index 764e85bd5..5d93ac847 100644 --- a/example/common/rtl/example_core_pcie.v +++ b/example/common/rtl/example_core_pcie.v @@ -172,7 +172,12 @@ module example_core_pcie # * Status */ output wire status_error_cor, - output wire status_error_uncor + output wire status_error_uncor, + + /* + * Control and status + */ + output wire rx_cpl_stall ); parameter AXIL_CTRL_DATA_WIDTH = 32; @@ -1124,7 +1129,8 @@ core_inst ( .dma_wr_busy(dma_wr_busy), .dma_rd_req(tx_rd_req_tlp_valid && tx_rd_req_tlp_sop && tx_rd_req_tlp_ready), .dma_rd_cpl(rx_cpl_tlp_valid && rx_cpl_tlp_sop && rx_cpl_tlp_ready), - .dma_wr_req(tx_wr_req_tlp_valid && tx_wr_req_tlp_sop && tx_wr_req_tlp_ready) + .dma_wr_req(tx_wr_req_tlp_valid && tx_wr_req_tlp_sop && tx_wr_req_tlp_ready), + .rx_cpl_stall(rx_cpl_stall) ); endmodule diff --git a/example/common/rtl/example_core_pcie_ptile.v b/example/common/rtl/example_core_pcie_ptile.v index 877b2d351..232beab8e 100644 --- a/example/common/rtl/example_core_pcie_ptile.v +++ b/example/common/rtl/example_core_pcie_ptile.v @@ -200,6 +200,12 @@ wire [2:0] max_payload_size; wire msix_enable; wire msix_mask; +wire rx_cpl_stall; + +wire rx_st_ready_int; + +assign rx_st_ready = rx_st_ready_int & !rx_cpl_stall; + pcie_ptile_if #( .SEG_COUNT(SEG_COUNT), .SEG_DATA_WIDTH(SEG_DATA_WIDTH), @@ -226,7 +232,7 @@ pcie_ptile_if_inst ( .rx_st_sop(rx_st_sop), .rx_st_eop(rx_st_eop), .rx_st_valid(rx_st_valid), - .rx_st_ready(rx_st_ready), + .rx_st_ready(rx_st_ready_int), .rx_st_hdr(rx_st_hdr), .rx_st_tlp_prfx(rx_st_tlp_prfx), .rx_st_vf_active(rx_st_vf_active), @@ -488,7 +494,12 @@ core_pcie_inst ( * Status */ .status_error_cor(), - .status_error_uncor() + .status_error_uncor(), + + /* + * Control and status + */ + .rx_cpl_stall(rx_cpl_stall) ); endmodule diff --git a/example/common/rtl/example_core_pcie_s10.v b/example/common/rtl/example_core_pcie_s10.v index c51ec3ce1..1ff6a0a4b 100644 --- a/example/common/rtl/example_core_pcie_s10.v +++ b/example/common/rtl/example_core_pcie_s10.v @@ -194,6 +194,12 @@ wire [2:0] max_payload_size; wire msix_enable; wire msix_mask; +wire rx_cpl_stall; + +wire rx_st_ready_int; + +assign rx_st_ready = rx_st_ready_int & !rx_cpl_stall; + pcie_s10_if #( .SEG_COUNT(SEG_COUNT), .SEG_DATA_WIDTH(SEG_DATA_WIDTH), @@ -222,7 +228,7 @@ pcie_s10_if_inst ( .rx_st_sop(rx_st_sop), .rx_st_eop(rx_st_eop), .rx_st_valid(rx_st_valid), - .rx_st_ready(rx_st_ready), + .rx_st_ready(rx_st_ready_int), .rx_st_vf_active(rx_st_vf_active), .rx_st_func_num(rx_st_func_num), .rx_st_vf_num(rx_st_vf_num), @@ -495,7 +501,12 @@ core_pcie_inst ( * Status */ .status_error_cor(), - .status_error_uncor() + .status_error_uncor(), + + /* + * Control and status + */ + .rx_cpl_stall(rx_cpl_stall) ); endmodule diff --git a/example/common/rtl/example_core_pcie_us.v b/example/common/rtl/example_core_pcie_us.v index e79602735..9ecdb948c 100644 --- a/example/common/rtl/example_core_pcie_us.v +++ b/example/common/rtl/example_core_pcie_us.v @@ -259,6 +259,14 @@ wire ext_tag_enable; wire msix_enable; wire msix_mask; +wire rx_cpl_stall; + +wire s_axis_rc_tvalid_int; +wire s_axis_rc_tready_int; + +assign s_axis_rc_tvalid_int = s_axis_rc_tvalid & ~rx_cpl_stall; +assign s_axis_rc_tready = s_axis_rc_tready_int & ~rx_cpl_stall; + pcie_us_if #( .AXIS_PCIE_DATA_WIDTH(AXIS_PCIE_DATA_WIDTH), .AXIS_PCIE_KEEP_WIDTH(AXIS_PCIE_KEEP_WIDTH), @@ -295,8 +303,8 @@ pcie_us_if_inst ( */ .s_axis_rc_tdata(s_axis_rc_tdata), .s_axis_rc_tkeep(s_axis_rc_tkeep), - .s_axis_rc_tvalid(s_axis_rc_tvalid), - .s_axis_rc_tready(s_axis_rc_tready), + .s_axis_rc_tvalid(s_axis_rc_tvalid_int), + .s_axis_rc_tready(s_axis_rc_tready_int), .s_axis_rc_tlast(s_axis_rc_tlast), .s_axis_rc_tuser(s_axis_rc_tuser), @@ -624,7 +632,12 @@ core_pcie_inst ( * Status */ .status_error_cor(status_error_cor), - .status_error_uncor(status_error_uncor) + .status_error_uncor(status_error_uncor), + + /* + * Control and status + */ + .rx_cpl_stall(rx_cpl_stall) ); endmodule From 23595150dd6152c229ccf1a8bdd5d207127737f3 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Wed, 21 Jun 2023 02:30:38 -0700 Subject: [PATCH 07/20] Fix TLP mux pause Signed-off-by: Alex Forencich --- rtl/pcie_tlp_fifo_mux.v | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rtl/pcie_tlp_fifo_mux.v b/rtl/pcie_tlp_fifo_mux.v index f347d1d32..847b2325c 100644 --- a/rtl/pcie_tlp_fifo_mux.v +++ b/rtl/pcie_tlp_fifo_mux.v @@ -347,7 +347,7 @@ always @* begin // compute mux settings for (port = 0; port < PORTS; port = port + 1) begin - port_seg_valid[port] = pause[port] ? 0 : {2{fifo_ctrl_tlp_valid[port]}} >> fifo_ctrl_seg_offset[port]; + port_seg_valid[port] = {2{fifo_ctrl_tlp_valid[port]}} >> fifo_ctrl_seg_offset[port]; port_seg_eop[port] = {2{fifo_ctrl_tlp_eop[port]}} >> fifo_ctrl_seg_offset[port]; end @@ -383,7 +383,7 @@ always @* begin port_cyc = cur_port; seg_offset_cyc = port_seg_offset_cyc[cur_port]; seg_count_cyc = port_seg_count_cyc[cur_port]; - if (port_seg_valid[cur_port][0]) begin + if (!pause[cur_port] && port_seg_valid[cur_port][0]) begin // set frame frame_cyc = 1; sel_tlp_seq_valid_cyc[OUT_TLP_SEG_COUNT*cur_port+seg] = 1'b1; From e59f5a03bd06918b2250104bf5fb765289f90c82 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Wed, 21 Jun 2023 16:26:40 -0700 Subject: [PATCH 08/20] Update example designs based on results of buffer size tests Signed-off-by: Alex Forencich --- example/common/rtl/example_core_pcie_s10.v | 2 +- example/common/tb/example_core_pcie_s10/Makefile | 2 +- .../tb/example_core_pcie_s10/test_example_core_pcie_s10.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/example/common/rtl/example_core_pcie_s10.v b/example/common/rtl/example_core_pcie_s10.v index 1ff6a0a4b..8ceadc3d7 100644 --- a/example/common/rtl/example_core_pcie_s10.v +++ b/example/common/rtl/example_core_pcie_s10.v @@ -58,7 +58,7 @@ module example_core_pcie_s10 # // Completion header flow control credit limit (read) parameter READ_CPLH_FC_LIMIT = 770, // Completion data flow control credit limit (read) - parameter READ_CPLD_FC_LIMIT = 2500, + parameter READ_CPLD_FC_LIMIT = 2400, // Operation table size (write) parameter WRITE_OP_TABLE_SIZE = 2**TX_SEQ_NUM_WIDTH, // In-flight transmit limit (write) diff --git a/example/common/tb/example_core_pcie_s10/Makefile b/example/common/tb/example_core_pcie_s10/Makefile index 6b629949c..f554059e4 100644 --- a/example/common/tb/example_core_pcie_s10/Makefile +++ b/example/common/tb/example_core_pcie_s10/Makefile @@ -69,7 +69,7 @@ export PARAM_IMM_WIDTH := 32 export PARAM_READ_OP_TABLE_SIZE := $(PARAM_PCIE_TAG_COUNT) export PARAM_READ_TX_LIMIT := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" ) export PARAM_READ_CPLH_FC_LIMIT := 770 -export PARAM_READ_CPLD_FC_LIMIT := 2500 +export PARAM_READ_CPLD_FC_LIMIT := 2400 export PARAM_WRITE_OP_TABLE_SIZE := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" ) export PARAM_WRITE_TX_LIMIT := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" ) export PARAM_BAR0_APERTURE := 24 diff --git a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py index 59929ddbf..3fb3de96e 100644 --- a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py +++ b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py @@ -482,7 +482,7 @@ def test_example_core_pcie_s10(request, data_width, l_tile): parameters['READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT'] parameters['READ_TX_LIMIT'] = 2**parameters['TX_SEQ_NUM_WIDTH'] parameters['READ_CPLH_FC_LIMIT'] = 770 - parameters['READ_CPLD_FC_LIMIT'] = 2500 + parameters['READ_CPLD_FC_LIMIT'] = 2400 parameters['WRITE_OP_TABLE_SIZE'] = 2**parameters['TX_SEQ_NUM_WIDTH'] parameters['WRITE_TX_LIMIT'] = 2**parameters['TX_SEQ_NUM_WIDTH'] parameters['BAR0_APERTURE'] = 24 From 0a53e7c990cfbfe0be1f6637a87e06cddf0270fe Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Thu, 22 Jun 2023 16:45:00 -0700 Subject: [PATCH 09/20] Improve completion credit count tracking Signed-off-by: Alex Forencich --- rtl/dma_if_pcie_rd.v | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) diff --git a/rtl/dma_if_pcie_rd.v b/rtl/dma_if_pcie_rd.v index a238b0414..3e0801e7f 100644 --- a/rtl/dma_if_pcie_rd.v +++ b/rtl/dma_if_pcie_rd.v @@ -422,13 +422,13 @@ reg [OP_TAG_WIDTH+1-1:0] active_op_count_reg = 0; reg inc_active_op; reg dec_active_op; -reg [CL_CPLH_FC_LIMIT+1-1:0] active_cplh_fc_count_reg = 0; -reg active_cplh_fc_av_reg = 1'b1; +reg [CL_CPLH_FC_LIMIT+1-1:0] active_cplh_fc_count_reg = 0, active_cplh_fc_count_next; +reg active_cplh_fc_av_reg = 1'b1, active_cplh_fc_av_next; reg [6:0] inc_active_cplh_fc_count; reg [6:0] dec_active_cplh_fc_count; -reg [CL_CPLD_FC_LIMIT+1-1:0] active_cpld_fc_count_reg = 0; -reg active_cpld_fc_av_reg = 1'b1; +reg [CL_CPLD_FC_LIMIT+1-1:0] active_cpld_fc_count_reg = 0, active_cpld_fc_count_next; +reg active_cpld_fc_av_reg = 1'b1, active_cpld_fc_av_next; reg [8:0] inc_active_cpld_fc_count; reg [8:0] dec_active_cpld_fc_count; @@ -1382,6 +1382,12 @@ always @* begin end active_tx_count_av_next = active_tx_count_next < TX_LIMIT; + + active_cplh_fc_count_next <= active_cplh_fc_count_reg + inc_active_cplh_fc_count - dec_active_cplh_fc_count; + active_cplh_fc_av_next <= !CPLH_FC_LIMIT || active_cplh_fc_count_next < CPLH_FC_LIMIT; + + active_cpld_fc_count_next <= active_cpld_fc_count_reg + inc_active_cpld_fc_count - dec_active_cpld_fc_count; + active_cpld_fc_av_next <= !CPLD_FC_LIMIT || active_cpld_fc_count_next < CPLD_FC_LIMIT; end always @(posedge clk) begin @@ -1501,11 +1507,11 @@ always @(posedge clk) begin active_tag_count_reg <= active_tag_count_reg + inc_active_tag - dec_active_tag; active_op_count_reg <= active_op_count_reg + inc_active_op - dec_active_op; - active_cplh_fc_count_reg <= active_cplh_fc_count_reg + inc_active_cplh_fc_count - dec_active_cplh_fc_count; - active_cplh_fc_av_reg <= !CPLH_FC_LIMIT || active_cplh_fc_count_reg < CPLH_FC_LIMIT; + active_cplh_fc_count_reg <= active_cplh_fc_count_next; + active_cplh_fc_av_reg <= active_cplh_fc_av_next; - active_cpld_fc_count_reg <= active_cpld_fc_count_reg + inc_active_cpld_fc_count - dec_active_cpld_fc_count; - active_cpld_fc_av_reg <= !CPLD_FC_LIMIT || active_cpld_fc_count_reg < CPLD_FC_LIMIT; + active_cpld_fc_count_reg <= active_cpld_fc_count_next; + active_cpld_fc_av_reg <= active_cpld_fc_av_next; pcie_tag_table_start_ptr_reg <= pcie_tag_table_start_ptr_next; pcie_tag_table_start_ram_sel_reg <= pcie_tag_table_start_ram_sel_next; From 0db9fdd2b965b93eeab91c400b387cfda52cbd3a Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Thu, 22 Jun 2023 16:47:00 -0700 Subject: [PATCH 10/20] Test S10 example design with 2 segments by default Signed-off-by: Alex Forencich --- example/common/tb/example_core_pcie_s10/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/example/common/tb/example_core_pcie_s10/Makefile b/example/common/tb/example_core_pcie_s10/Makefile index f554059e4..fbb5c4899 100644 --- a/example/common/tb/example_core_pcie_s10/Makefile +++ b/example/common/tb/example_core_pcie_s10/Makefile @@ -57,7 +57,7 @@ VERILOG_SOURCES += ../../../../rtl/priority_encoder.v VERILOG_SOURCES += ../../../../rtl/pulse_merge.v # module parameters -export PARAM_SEG_COUNT := 1 +export PARAM_SEG_COUNT := 2 export PARAM_SEG_DATA_WIDTH := 256 export PARAM_SEG_EMPTY_WIDTH := $(shell python -c "print((($(PARAM_SEG_DATA_WIDTH)//32)-1).bit_length())" ) export PARAM_TX_SEQ_NUM_WIDTH := 6 From 145e150ba4aeebff0c7ac78ff6fe644a15be8875 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Thu, 22 Jun 2023 16:49:53 -0700 Subject: [PATCH 11/20] Reorganize example design testbenches, run benchmark in testbench Signed-off-by: Alex Forencich --- .../test_example_core_pcie_ptile.py | 254 ++++++++++-------- .../test_example_core_pcie_s10.py | 254 ++++++++++-------- .../test_example_core_pcie_us.py | 254 ++++++++++-------- 3 files changed, 447 insertions(+), 315 deletions(-) diff --git a/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py b/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py index fddffe5ee..b4dc9e2fb 100644 --- a/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py +++ b/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py @@ -258,6 +258,136 @@ class TB(object): await self.rc.enumerate() +async def dma_block_read_bench(tb, dev, addr, mask, size, stride, count): + dev_pf0_bar0 = dev.bar_window[0] + + rd_req = await dev_pf0_bar0.read_dword(0x000020) + rd_cpl = await dev_pf0_bar0.read_dword(0x000024) + + # configure operation (read) + # DMA base address + await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff) + # DMA offset address + await dev_pf0_bar0.write_dword(0x001088, 0) + await dev_pf0_bar0.write_dword(0x00108c, 0) + # DMA offset mask + await dev_pf0_bar0.write_dword(0x001090, mask) + await dev_pf0_bar0.write_dword(0x001094, 0) + # DMA stride + await dev_pf0_bar0.write_dword(0x001098, stride) + await dev_pf0_bar0.write_dword(0x00109c, 0) + # RAM base address + await dev_pf0_bar0.write_dword(0x0010c0, 0) + await dev_pf0_bar0.write_dword(0x0010c4, 0) + # RAM offset address + await dev_pf0_bar0.write_dword(0x0010c8, 0) + await dev_pf0_bar0.write_dword(0x0010cc, 0) + # RAM offset mask + await dev_pf0_bar0.write_dword(0x0010d0, mask) + await dev_pf0_bar0.write_dword(0x0010d4, 0) + # RAM stride + await dev_pf0_bar0.write_dword(0x0010d8, stride) + await dev_pf0_bar0.write_dword(0x0010dc, 0) + # clear cycle count + await dev_pf0_bar0.write_dword(0x001008, 0) + await dev_pf0_bar0.write_dword(0x00100c, 0) + # block length + await dev_pf0_bar0.write_dword(0x001010, size) + # block count + await dev_pf0_bar0.write_dword(0x001018, count) + await dev_pf0_bar0.write_dword(0x00101c, 0) + + # start + await dev_pf0_bar0.write_dword(0x001000, 1) + + for k in range(100): + await Timer(1000, 'ns') + run = await dev_pf0_bar0.read_dword(0x001000) + status = await dev_pf0_bar0.read_dword(0x000000) + if run == 0 and status & 0x300 == 0: + break + + if run != 0: + tb.log.warning("Operation timed out") + if status & 0x300 != 0: + tb.log.warning("DMA engine busy") + + cycles = await dev_pf0_bar0.read_dword(0x001008) + + rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req + rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl + + tb.log.info("read %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req %d cpl) %d Mbps", + count, size, count*size, stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4)) + + assert status & 0x300 == 0 + + +async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count): + dev_pf0_bar0 = dev.bar_window[0] + + wr_req = await dev_pf0_bar0.read_dword(0x000028) + + # configure operation (read) + # DMA base address + await dev_pf0_bar0.write_dword(0x001180, addr & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (addr >> 32) & 0xffffffff) + # DMA offset address + await dev_pf0_bar0.write_dword(0x001188, 0) + await dev_pf0_bar0.write_dword(0x00118c, 0) + # DMA offset mask + await dev_pf0_bar0.write_dword(0x001190, mask) + await dev_pf0_bar0.write_dword(0x001194, 0) + # DMA stride + await dev_pf0_bar0.write_dword(0x001198, stride) + await dev_pf0_bar0.write_dword(0x00119c, 0) + # RAM base address + await dev_pf0_bar0.write_dword(0x0011c0, 0) + await dev_pf0_bar0.write_dword(0x0011c4, 0) + # RAM offset address + await dev_pf0_bar0.write_dword(0x0011c8, 0) + await dev_pf0_bar0.write_dword(0x0011cc, 0) + # RAM offset mask + await dev_pf0_bar0.write_dword(0x0011d0, mask) + await dev_pf0_bar0.write_dword(0x0011d4, 0) + # RAM stride + await dev_pf0_bar0.write_dword(0x0011d8, stride) + await dev_pf0_bar0.write_dword(0x0011dc, 0) + # clear cycle count + await dev_pf0_bar0.write_dword(0x001108, 0) + await dev_pf0_bar0.write_dword(0x00110c, 0) + # block length + await dev_pf0_bar0.write_dword(0x001110, size) + # block count + await dev_pf0_bar0.write_dword(0x001118, count) + await dev_pf0_bar0.write_dword(0x00111c, 0) + + # start + await dev_pf0_bar0.write_dword(0x001100, 1) + + for k in range(100): + await Timer(1000, 'ns') + run = await dev_pf0_bar0.read_dword(0x001100) + status = await dev_pf0_bar0.read_dword(0x000000) + if run == 0 and status & 0x300 == 0: + break + + if run != 0: + tb.log.warning("Operation timed out") + if status & 0x300 != 0: + tb.log.warning("DMA engine busy") + + cycles = await dev_pf0_bar0.read_dword(0x001108) + + wr_req = await dev_pf0_bar0.read_dword(0x000028) - wr_req + + tb.log.info("wrote %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req) %d Mbps", + count, size, count*size, stride, cycles*4, wr_req, size * count * 8 * 1000 / (cycles * 4)) + + assert status & 0x300 == 0 + + @cocotb.test() async def run_test(dut): @@ -359,120 +489,34 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + # disable interrupts + await dev_pf0_bar0.write_dword(0x000008, 0) + region_len = 0x2000 src_offset = 0x0000 dest_offset = 0x4000 - block_size = 256 - block_stride = block_size - block_count = 32 - - # write packet data - mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) - - # enable DMA - await dev_pf0_bar0.write_dword(0x000000, 1) - # disable interrupts - await dev_pf0_bar0.write_dword(0x000008, 0) - - # configure operation (read) - # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) - # DMA offset address - await dev_pf0_bar0.write_dword(0x001088, 0) - await dev_pf0_bar0.write_dword(0x00108c, 0) - # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, region_len-1) - await dev_pf0_bar0.write_dword(0x001094, 0) - # DMA stride - await dev_pf0_bar0.write_dword(0x001098, block_stride) - await dev_pf0_bar0.write_dword(0x00109c, 0) - # RAM base address - await dev_pf0_bar0.write_dword(0x0010c0, 0) - await dev_pf0_bar0.write_dword(0x0010c4, 0) - # RAM offset address - await dev_pf0_bar0.write_dword(0x0010c8, 0) - await dev_pf0_bar0.write_dword(0x0010cc, 0) - # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) - await dev_pf0_bar0.write_dword(0x0010d4, 0) - # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, block_stride) - await dev_pf0_bar0.write_dword(0x0010dc, 0) - # clear cycle count - await dev_pf0_bar0.write_dword(0x001008, 0) - await dev_pf0_bar0.write_dword(0x00100c, 0) - # block length - await dev_pf0_bar0.write_dword(0x001010, block_size) - # block count - await dev_pf0_bar0.write_dword(0x001018, block_count) - await dev_pf0_bar0.write_dword(0x00101c, 0) - # start - await dev_pf0_bar0.write_dword(0x001000, 1) - - for k in range(10): - await Timer(1000, 'ns') - run = await dev_pf0_bar0.read_dword(0x001000) - if run == 0: - break - - # read status - status = await dev_pf0_bar0.read_dword(0x000000) - tb.log.info("DMA Status: 0x%x", status) - - # configure operation (write) - # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) - # DMA offset address - await dev_pf0_bar0.write_dword(0x001188, 0) - await dev_pf0_bar0.write_dword(0x00118c, 0) - # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, region_len-1) - await dev_pf0_bar0.write_dword(0x001194, 0) - # DMA stride - await dev_pf0_bar0.write_dword(0x001198, block_stride) - await dev_pf0_bar0.write_dword(0x00119c, 0) - # RAM base address - await dev_pf0_bar0.write_dword(0x0011c0, 0) - await dev_pf0_bar0.write_dword(0x0011c4, 0) - # RAM offset address - await dev_pf0_bar0.write_dword(0x0011c8, 0) - await dev_pf0_bar0.write_dword(0x0011cc, 0) - # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) - await dev_pf0_bar0.write_dword(0x0011d4, 0) - # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, block_stride) - await dev_pf0_bar0.write_dword(0x0011dc, 0) - # clear cycle count - await dev_pf0_bar0.write_dword(0x001108, 0) - await dev_pf0_bar0.write_dword(0x00110c, 0) - # block length - await dev_pf0_bar0.write_dword(0x001110, block_size) - # block count - await dev_pf0_bar0.write_dword(0x001118, block_count) - await dev_pf0_bar0.write_dword(0x00111c, 0) - # start - await dev_pf0_bar0.write_dword(0x001100, 1) - - for k in range(10): - await Timer(1000, 'ns') - run = await dev_pf0_bar0.read_dword(0x001100) - if run == 0: - break - - # read status - status = await dev_pf0_bar0.read_dword(0x000000) - tb.log.info("DMA Status: 0x%x", status) - - assert status & 0x300 == 0 + await dma_block_read_bench(tb, dev, mem_base+src_offset, region_len-1, 256, 256, 32) + await dma_block_write_bench(tb, dev, mem_base+dest_offset, region_len-1, 256, 256, 32) tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] + tb.log.info("Perform block reads") + + count = 100 + for size in [2**x for x in range(14)]: + stride = size + await dma_block_read_bench(tb, dev, mem_base, region_len-1, size, stride, count) + + tb.log.info("Perform block writes") + + count = 100 + for size in [2**x for x in range(14)]: + stride = size + await dma_block_write_bench(tb, dev, mem_base, region_len-1, size, stride, count) + await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py index 3fb3de96e..9e43266e5 100644 --- a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py +++ b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py @@ -206,6 +206,136 @@ class TB(object): await self.rc.enumerate() +async def dma_block_read_bench(tb, dev, addr, mask, size, stride, count): + dev_pf0_bar0 = dev.bar_window[0] + + rd_req = await dev_pf0_bar0.read_dword(0x000020) + rd_cpl = await dev_pf0_bar0.read_dword(0x000024) + + # configure operation (read) + # DMA base address + await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff) + # DMA offset address + await dev_pf0_bar0.write_dword(0x001088, 0) + await dev_pf0_bar0.write_dword(0x00108c, 0) + # DMA offset mask + await dev_pf0_bar0.write_dword(0x001090, mask) + await dev_pf0_bar0.write_dword(0x001094, 0) + # DMA stride + await dev_pf0_bar0.write_dword(0x001098, stride) + await dev_pf0_bar0.write_dword(0x00109c, 0) + # RAM base address + await dev_pf0_bar0.write_dword(0x0010c0, 0) + await dev_pf0_bar0.write_dword(0x0010c4, 0) + # RAM offset address + await dev_pf0_bar0.write_dword(0x0010c8, 0) + await dev_pf0_bar0.write_dword(0x0010cc, 0) + # RAM offset mask + await dev_pf0_bar0.write_dword(0x0010d0, mask) + await dev_pf0_bar0.write_dword(0x0010d4, 0) + # RAM stride + await dev_pf0_bar0.write_dword(0x0010d8, stride) + await dev_pf0_bar0.write_dword(0x0010dc, 0) + # clear cycle count + await dev_pf0_bar0.write_dword(0x001008, 0) + await dev_pf0_bar0.write_dword(0x00100c, 0) + # block length + await dev_pf0_bar0.write_dword(0x001010, size) + # block count + await dev_pf0_bar0.write_dword(0x001018, count) + await dev_pf0_bar0.write_dword(0x00101c, 0) + + # start + await dev_pf0_bar0.write_dword(0x001000, 1) + + for k in range(100): + await Timer(1000, 'ns') + run = await dev_pf0_bar0.read_dword(0x001000) + status = await dev_pf0_bar0.read_dword(0x000000) + if run == 0 and status & 0x300 == 0: + break + + if run != 0: + tb.log.warning("Operation timed out") + if status & 0x300 != 0: + tb.log.warning("DMA engine busy") + + cycles = await dev_pf0_bar0.read_dword(0x001008) + + rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req + rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl + + tb.log.info("read %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req %d cpl) %d Mbps", + count, size, count*size, stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4)) + + assert status & 0x300 == 0 + + +async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count): + dev_pf0_bar0 = dev.bar_window[0] + + wr_req = await dev_pf0_bar0.read_dword(0x000028) + + # configure operation (read) + # DMA base address + await dev_pf0_bar0.write_dword(0x001180, addr & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (addr >> 32) & 0xffffffff) + # DMA offset address + await dev_pf0_bar0.write_dword(0x001188, 0) + await dev_pf0_bar0.write_dword(0x00118c, 0) + # DMA offset mask + await dev_pf0_bar0.write_dword(0x001190, mask) + await dev_pf0_bar0.write_dword(0x001194, 0) + # DMA stride + await dev_pf0_bar0.write_dword(0x001198, stride) + await dev_pf0_bar0.write_dword(0x00119c, 0) + # RAM base address + await dev_pf0_bar0.write_dword(0x0011c0, 0) + await dev_pf0_bar0.write_dword(0x0011c4, 0) + # RAM offset address + await dev_pf0_bar0.write_dword(0x0011c8, 0) + await dev_pf0_bar0.write_dword(0x0011cc, 0) + # RAM offset mask + await dev_pf0_bar0.write_dword(0x0011d0, mask) + await dev_pf0_bar0.write_dword(0x0011d4, 0) + # RAM stride + await dev_pf0_bar0.write_dword(0x0011d8, stride) + await dev_pf0_bar0.write_dword(0x0011dc, 0) + # clear cycle count + await dev_pf0_bar0.write_dword(0x001108, 0) + await dev_pf0_bar0.write_dword(0x00110c, 0) + # block length + await dev_pf0_bar0.write_dword(0x001110, size) + # block count + await dev_pf0_bar0.write_dword(0x001118, count) + await dev_pf0_bar0.write_dword(0x00111c, 0) + + # start + await dev_pf0_bar0.write_dword(0x001100, 1) + + for k in range(100): + await Timer(1000, 'ns') + run = await dev_pf0_bar0.read_dword(0x001100) + status = await dev_pf0_bar0.read_dword(0x000000) + if run == 0 and status & 0x300 == 0: + break + + if run != 0: + tb.log.warning("Operation timed out") + if status & 0x300 != 0: + tb.log.warning("DMA engine busy") + + cycles = await dev_pf0_bar0.read_dword(0x001108) + + wr_req = await dev_pf0_bar0.read_dword(0x000028) - wr_req + + tb.log.info("wrote %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req) %d Mbps", + count, size, count*size, stride, cycles*4, wr_req, size * count * 8 * 1000 / (cycles * 4)) + + assert status & 0x300 == 0 + + @cocotb.test() async def run_test(dut): @@ -307,120 +437,34 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + # disable interrupts + await dev_pf0_bar0.write_dword(0x000008, 0) + region_len = 0x2000 src_offset = 0x0000 dest_offset = 0x4000 - block_size = 256 - block_stride = block_size - block_count = 32 - - # write packet data - mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) - - # enable DMA - await dev_pf0_bar0.write_dword(0x000000, 1) - # disable interrupts - await dev_pf0_bar0.write_dword(0x000008, 0) - - # configure operation (read) - # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) - # DMA offset address - await dev_pf0_bar0.write_dword(0x001088, 0) - await dev_pf0_bar0.write_dword(0x00108c, 0) - # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, region_len-1) - await dev_pf0_bar0.write_dword(0x001094, 0) - # DMA stride - await dev_pf0_bar0.write_dword(0x001098, block_stride) - await dev_pf0_bar0.write_dword(0x00109c, 0) - # RAM base address - await dev_pf0_bar0.write_dword(0x0010c0, 0) - await dev_pf0_bar0.write_dword(0x0010c4, 0) - # RAM offset address - await dev_pf0_bar0.write_dword(0x0010c8, 0) - await dev_pf0_bar0.write_dword(0x0010cc, 0) - # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) - await dev_pf0_bar0.write_dword(0x0010d4, 0) - # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, block_stride) - await dev_pf0_bar0.write_dword(0x0010dc, 0) - # clear cycle count - await dev_pf0_bar0.write_dword(0x001008, 0) - await dev_pf0_bar0.write_dword(0x00100c, 0) - # block length - await dev_pf0_bar0.write_dword(0x001010, block_size) - # block count - await dev_pf0_bar0.write_dword(0x001018, block_count) - await dev_pf0_bar0.write_dword(0x00101c, 0) - # start - await dev_pf0_bar0.write_dword(0x001000, 1) - - for k in range(10): - await Timer(1000, 'ns') - run = await dev_pf0_bar0.read_dword(0x001000) - if run == 0: - break - - # read status - status = await dev_pf0_bar0.read_dword(0x000000) - tb.log.info("DMA Status: 0x%x", status) - - # configure operation (write) - # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) - # DMA offset address - await dev_pf0_bar0.write_dword(0x001188, 0) - await dev_pf0_bar0.write_dword(0x00118c, 0) - # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, region_len-1) - await dev_pf0_bar0.write_dword(0x001194, 0) - # DMA stride - await dev_pf0_bar0.write_dword(0x001198, block_stride) - await dev_pf0_bar0.write_dword(0x00119c, 0) - # RAM base address - await dev_pf0_bar0.write_dword(0x0011c0, 0) - await dev_pf0_bar0.write_dword(0x0011c4, 0) - # RAM offset address - await dev_pf0_bar0.write_dword(0x0011c8, 0) - await dev_pf0_bar0.write_dword(0x0011cc, 0) - # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) - await dev_pf0_bar0.write_dword(0x0011d4, 0) - # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, block_stride) - await dev_pf0_bar0.write_dword(0x0011dc, 0) - # clear cycle count - await dev_pf0_bar0.write_dword(0x001108, 0) - await dev_pf0_bar0.write_dword(0x00110c, 0) - # block length - await dev_pf0_bar0.write_dword(0x001110, block_size) - # block count - await dev_pf0_bar0.write_dword(0x001118, block_count) - await dev_pf0_bar0.write_dword(0x00111c, 0) - # start - await dev_pf0_bar0.write_dword(0x001100, 1) - - for k in range(10): - await Timer(1000, 'ns') - run = await dev_pf0_bar0.read_dword(0x001100) - if run == 0: - break - - # read status - status = await dev_pf0_bar0.read_dword(0x000000) - tb.log.info("DMA Status: 0x%x", status) - - assert status & 0x300 == 0 + await dma_block_read_bench(tb, dev, mem_base+src_offset, region_len-1, 256, 256, 32) + await dma_block_write_bench(tb, dev, mem_base+dest_offset, region_len-1, 256, 256, 32) tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] + tb.log.info("Perform block reads") + + count = 100 + for size in [2**x for x in range(14)]: + stride = size + await dma_block_read_bench(tb, dev, mem_base, region_len-1, size, stride, count) + + tb.log.info("Perform block writes") + + count = 100 + for size in [2**x for x in range(14)]: + stride = size + await dma_block_write_bench(tb, dev, mem_base, region_len-1, size, stride, count) + await RisingEdge(dut.clk) await RisingEdge(dut.clk) diff --git a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py index b55dbfad0..4f89a5d86 100644 --- a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py +++ b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py @@ -299,6 +299,136 @@ class TB(object): await self.rc.enumerate() +async def dma_block_read_bench(tb, dev, addr, mask, size, stride, count): + dev_pf0_bar0 = dev.bar_window[0] + + rd_req = await dev_pf0_bar0.read_dword(0x000020) + rd_cpl = await dev_pf0_bar0.read_dword(0x000024) + + # configure operation (read) + # DMA base address + await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff) + # DMA offset address + await dev_pf0_bar0.write_dword(0x001088, 0) + await dev_pf0_bar0.write_dword(0x00108c, 0) + # DMA offset mask + await dev_pf0_bar0.write_dword(0x001090, mask) + await dev_pf0_bar0.write_dword(0x001094, 0) + # DMA stride + await dev_pf0_bar0.write_dword(0x001098, stride) + await dev_pf0_bar0.write_dword(0x00109c, 0) + # RAM base address + await dev_pf0_bar0.write_dword(0x0010c0, 0) + await dev_pf0_bar0.write_dword(0x0010c4, 0) + # RAM offset address + await dev_pf0_bar0.write_dword(0x0010c8, 0) + await dev_pf0_bar0.write_dword(0x0010cc, 0) + # RAM offset mask + await dev_pf0_bar0.write_dword(0x0010d0, mask) + await dev_pf0_bar0.write_dword(0x0010d4, 0) + # RAM stride + await dev_pf0_bar0.write_dword(0x0010d8, stride) + await dev_pf0_bar0.write_dword(0x0010dc, 0) + # clear cycle count + await dev_pf0_bar0.write_dword(0x001008, 0) + await dev_pf0_bar0.write_dword(0x00100c, 0) + # block length + await dev_pf0_bar0.write_dword(0x001010, size) + # block count + await dev_pf0_bar0.write_dword(0x001018, count) + await dev_pf0_bar0.write_dword(0x00101c, 0) + + # start + await dev_pf0_bar0.write_dword(0x001000, 1) + + for k in range(100): + await Timer(1000, 'ns') + run = await dev_pf0_bar0.read_dword(0x001000) + status = await dev_pf0_bar0.read_dword(0x000000) + if run == 0 and status & 0x300 == 0: + break + + if run != 0: + tb.log.warning("Operation timed out") + if status & 0x300 != 0: + tb.log.warning("DMA engine busy") + + cycles = await dev_pf0_bar0.read_dword(0x001008) + + rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req + rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl + + tb.log.info("read %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req %d cpl) %d Mbps", + count, size, count*size, stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4)) + + assert status & 0x300 == 0 + + +async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count): + dev_pf0_bar0 = dev.bar_window[0] + + wr_req = await dev_pf0_bar0.read_dword(0x000028) + + # configure operation (read) + # DMA base address + await dev_pf0_bar0.write_dword(0x001180, addr & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001184, (addr >> 32) & 0xffffffff) + # DMA offset address + await dev_pf0_bar0.write_dword(0x001188, 0) + await dev_pf0_bar0.write_dword(0x00118c, 0) + # DMA offset mask + await dev_pf0_bar0.write_dword(0x001190, mask) + await dev_pf0_bar0.write_dword(0x001194, 0) + # DMA stride + await dev_pf0_bar0.write_dword(0x001198, stride) + await dev_pf0_bar0.write_dword(0x00119c, 0) + # RAM base address + await dev_pf0_bar0.write_dword(0x0011c0, 0) + await dev_pf0_bar0.write_dword(0x0011c4, 0) + # RAM offset address + await dev_pf0_bar0.write_dword(0x0011c8, 0) + await dev_pf0_bar0.write_dword(0x0011cc, 0) + # RAM offset mask + await dev_pf0_bar0.write_dword(0x0011d0, mask) + await dev_pf0_bar0.write_dword(0x0011d4, 0) + # RAM stride + await dev_pf0_bar0.write_dword(0x0011d8, stride) + await dev_pf0_bar0.write_dword(0x0011dc, 0) + # clear cycle count + await dev_pf0_bar0.write_dword(0x001108, 0) + await dev_pf0_bar0.write_dword(0x00110c, 0) + # block length + await dev_pf0_bar0.write_dword(0x001110, size) + # block count + await dev_pf0_bar0.write_dword(0x001118, count) + await dev_pf0_bar0.write_dword(0x00111c, 0) + + # start + await dev_pf0_bar0.write_dword(0x001100, 1) + + for k in range(100): + await Timer(1000, 'ns') + run = await dev_pf0_bar0.read_dword(0x001100) + status = await dev_pf0_bar0.read_dword(0x000000) + if run == 0 and status & 0x300 == 0: + break + + if run != 0: + tb.log.warning("Operation timed out") + if status & 0x300 != 0: + tb.log.warning("DMA engine busy") + + cycles = await dev_pf0_bar0.read_dword(0x001108) + + wr_req = await dev_pf0_bar0.read_dword(0x000028) - wr_req + + tb.log.info("wrote %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req) %d Mbps", + count, size, count*size, stride, cycles*4, wr_req, size * count * 8 * 1000 / (cycles * 4)) + + assert status & 0x300 == 0 + + @cocotb.test() async def run_test(dut): @@ -400,120 +530,34 @@ async def run_test(dut): tb.log.info("Test DMA block operations") + # disable interrupts + await dev_pf0_bar0.write_dword(0x000008, 0) + region_len = 0x2000 src_offset = 0x0000 dest_offset = 0x4000 - block_size = 256 - block_stride = block_size - block_count = 32 - - # write packet data - mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) - - # enable DMA - await dev_pf0_bar0.write_dword(0x000000, 1) - # disable interrupts - await dev_pf0_bar0.write_dword(0x000008, 0) - - # configure operation (read) - # DMA base address - await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) - # DMA offset address - await dev_pf0_bar0.write_dword(0x001088, 0) - await dev_pf0_bar0.write_dword(0x00108c, 0) - # DMA offset mask - await dev_pf0_bar0.write_dword(0x001090, region_len-1) - await dev_pf0_bar0.write_dword(0x001094, 0) - # DMA stride - await dev_pf0_bar0.write_dword(0x001098, block_stride) - await dev_pf0_bar0.write_dword(0x00109c, 0) - # RAM base address - await dev_pf0_bar0.write_dword(0x0010c0, 0) - await dev_pf0_bar0.write_dword(0x0010c4, 0) - # RAM offset address - await dev_pf0_bar0.write_dword(0x0010c8, 0) - await dev_pf0_bar0.write_dword(0x0010cc, 0) - # RAM offset mask - await dev_pf0_bar0.write_dword(0x0010d0, region_len-1) - await dev_pf0_bar0.write_dword(0x0010d4, 0) - # RAM stride - await dev_pf0_bar0.write_dword(0x0010d8, block_stride) - await dev_pf0_bar0.write_dword(0x0010dc, 0) - # clear cycle count - await dev_pf0_bar0.write_dword(0x001008, 0) - await dev_pf0_bar0.write_dword(0x00100c, 0) - # block length - await dev_pf0_bar0.write_dword(0x001010, block_size) - # block count - await dev_pf0_bar0.write_dword(0x001018, block_count) - await dev_pf0_bar0.write_dword(0x00101c, 0) - # start - await dev_pf0_bar0.write_dword(0x001000, 1) - - for k in range(10): - await Timer(1000, 'ns') - run = await dev_pf0_bar0.read_dword(0x001000) - if run == 0: - break - - # read status - status = await dev_pf0_bar0.read_dword(0x000000) - tb.log.info("DMA Status: 0x%x", status) - - # configure operation (write) - # DMA base address - await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) - await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) - # DMA offset address - await dev_pf0_bar0.write_dword(0x001188, 0) - await dev_pf0_bar0.write_dword(0x00118c, 0) - # DMA offset mask - await dev_pf0_bar0.write_dword(0x001190, region_len-1) - await dev_pf0_bar0.write_dword(0x001194, 0) - # DMA stride - await dev_pf0_bar0.write_dword(0x001198, block_stride) - await dev_pf0_bar0.write_dword(0x00119c, 0) - # RAM base address - await dev_pf0_bar0.write_dword(0x0011c0, 0) - await dev_pf0_bar0.write_dword(0x0011c4, 0) - # RAM offset address - await dev_pf0_bar0.write_dword(0x0011c8, 0) - await dev_pf0_bar0.write_dword(0x0011cc, 0) - # RAM offset mask - await dev_pf0_bar0.write_dword(0x0011d0, region_len-1) - await dev_pf0_bar0.write_dword(0x0011d4, 0) - # RAM stride - await dev_pf0_bar0.write_dword(0x0011d8, block_stride) - await dev_pf0_bar0.write_dword(0x0011dc, 0) - # clear cycle count - await dev_pf0_bar0.write_dword(0x001108, 0) - await dev_pf0_bar0.write_dword(0x00110c, 0) - # block length - await dev_pf0_bar0.write_dword(0x001110, block_size) - # block count - await dev_pf0_bar0.write_dword(0x001118, block_count) - await dev_pf0_bar0.write_dword(0x00111c, 0) - # start - await dev_pf0_bar0.write_dword(0x001100, 1) - - for k in range(10): - await Timer(1000, 'ns') - run = await dev_pf0_bar0.read_dword(0x001100) - if run == 0: - break - - # read status - status = await dev_pf0_bar0.read_dword(0x000000) - tb.log.info("DMA Status: 0x%x", status) - - assert status & 0x300 == 0 + await dma_block_read_bench(tb, dev, mem_base+src_offset, region_len-1, 256, 256, 32) + await dma_block_write_bench(tb, dev, mem_base+dest_offset, region_len-1, 256, 256, 32) tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] + tb.log.info("Perform block reads") + + count = 100 + for size in [2**x for x in range(14)]: + stride = size + await dma_block_read_bench(tb, dev, mem_base, region_len-1, size, stride, count) + + tb.log.info("Perform block writes") + + count = 100 + for size in [2**x for x in range(14)]: + stride = size + await dma_block_write_bench(tb, dev, mem_base, region_len-1, size, stride, count) + await RisingEdge(dut.clk) await RisingEdge(dut.clk) From 95a735c226147e6396f91cbbb4d8469f3fb90b50 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Thu, 22 Jun 2023 16:50:39 -0700 Subject: [PATCH 12/20] Add completion buffer test to example design testbenches Signed-off-by: Alex Forencich --- .../test_example_core_pcie_ptile.py | 103 ++++++++++++++++++ .../test_example_core_pcie_s10.py | 103 ++++++++++++++++++ .../test_example_core_pcie_us.py | 103 ++++++++++++++++++ 3 files changed, 309 insertions(+) diff --git a/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py b/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py index b4dc9e2fb..6668724fe 100644 --- a/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py +++ b/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py @@ -388,6 +388,81 @@ async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count): assert status & 0x300 == 0 +async def dma_cpl_buf_test(tb, dev, addr, mask, size, stride, count, stall): + dev_pf0_bar0 = dev.bar_window[0] + + rd_req = await dev_pf0_bar0.read_dword(0x000020) + rd_cpl = await dev_pf0_bar0.read_dword(0x000024) + + # configure operation (read) + # DMA base address + await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff) + # DMA offset address + await dev_pf0_bar0.write_dword(0x001088, 0) + await dev_pf0_bar0.write_dword(0x00108c, 0) + # DMA offset mask + await dev_pf0_bar0.write_dword(0x001090, mask) + await dev_pf0_bar0.write_dword(0x001094, 0) + # DMA stride + await dev_pf0_bar0.write_dword(0x001098, stride) + await dev_pf0_bar0.write_dword(0x00109c, 0) + # RAM base address + await dev_pf0_bar0.write_dword(0x0010c0, 0) + await dev_pf0_bar0.write_dword(0x0010c4, 0) + # RAM offset address + await dev_pf0_bar0.write_dword(0x0010c8, 0) + await dev_pf0_bar0.write_dword(0x0010cc, 0) + # RAM offset mask + await dev_pf0_bar0.write_dword(0x0010d0, mask) + await dev_pf0_bar0.write_dword(0x0010d4, 0) + # RAM stride + await dev_pf0_bar0.write_dword(0x0010d8, stride) + await dev_pf0_bar0.write_dword(0x0010dc, 0) + # clear cycle count + await dev_pf0_bar0.write_dword(0x001008, 0) + await dev_pf0_bar0.write_dword(0x00100c, 0) + # block length + await dev_pf0_bar0.write_dword(0x001010, size) + # block count + await dev_pf0_bar0.write_dword(0x001018, count) + await dev_pf0_bar0.write_dword(0x00101c, 0) + + if stall: + # stall RX + await dev_pf0_bar0.write_dword(0x000040, stall) + + # start + await dev_pf0_bar0.write_dword(0x001000, 1) + + # wait for stall + if stall: + for k in range(stall): + await RisingEdge(tb.dut.clk) + + for k in range(100): + await Timer(1000, 'ns') + run = await dev_pf0_bar0.read_dword(0x001000) + status = await dev_pf0_bar0.read_dword(0x000000) + if run == 0 and status & 0x300 == 0: + break + + if run != 0: + tb.log.warning("Operation timed out") + if status & 0x300 != 0: + tb.log.warning("DMA engine busy") + + cycles = await dev_pf0_bar0.read_dword(0x001008) + + rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req + rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl + + tb.log.info("read %d x %d B (total %d B %d CPLD, stride %d) in %d ns (%d req %d cpl) %d Mbps", + count, size, count*size, count*((size+15)//16), stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4)) + + assert status & 0x300 == 0 + + @cocotb.test() async def run_test(dut): @@ -503,6 +578,34 @@ async def run_test(dut): assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] + tb.log.info("Test RX completion buffer (CPLH, 8)") + + size = 8 + stride = size + for count in range(32, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + + tb.log.info("Test RX completion buffer (CPLH, 8+64)") + + size = 8+64 + stride = 0 + for count in range(8, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + + tb.log.info("Test RX completion buffer (CPLH, 8+128+8)") + + size = 8+128+8 + stride = 0 + for count in range(8, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + + tb.log.info("Test RX completion buffer (CPLD)") + + size = 512 + stride = size + for count in range(8, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 4000) + tb.log.info("Perform block reads") count = 100 diff --git a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py index 9e43266e5..92fa6e5d9 100644 --- a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py +++ b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py @@ -336,6 +336,81 @@ async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count): assert status & 0x300 == 0 +async def dma_cpl_buf_test(tb, dev, addr, mask, size, stride, count, stall): + dev_pf0_bar0 = dev.bar_window[0] + + rd_req = await dev_pf0_bar0.read_dword(0x000020) + rd_cpl = await dev_pf0_bar0.read_dword(0x000024) + + # configure operation (read) + # DMA base address + await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff) + # DMA offset address + await dev_pf0_bar0.write_dword(0x001088, 0) + await dev_pf0_bar0.write_dword(0x00108c, 0) + # DMA offset mask + await dev_pf0_bar0.write_dword(0x001090, mask) + await dev_pf0_bar0.write_dword(0x001094, 0) + # DMA stride + await dev_pf0_bar0.write_dword(0x001098, stride) + await dev_pf0_bar0.write_dword(0x00109c, 0) + # RAM base address + await dev_pf0_bar0.write_dword(0x0010c0, 0) + await dev_pf0_bar0.write_dword(0x0010c4, 0) + # RAM offset address + await dev_pf0_bar0.write_dword(0x0010c8, 0) + await dev_pf0_bar0.write_dword(0x0010cc, 0) + # RAM offset mask + await dev_pf0_bar0.write_dword(0x0010d0, mask) + await dev_pf0_bar0.write_dword(0x0010d4, 0) + # RAM stride + await dev_pf0_bar0.write_dword(0x0010d8, stride) + await dev_pf0_bar0.write_dword(0x0010dc, 0) + # clear cycle count + await dev_pf0_bar0.write_dword(0x001008, 0) + await dev_pf0_bar0.write_dword(0x00100c, 0) + # block length + await dev_pf0_bar0.write_dword(0x001010, size) + # block count + await dev_pf0_bar0.write_dword(0x001018, count) + await dev_pf0_bar0.write_dword(0x00101c, 0) + + if stall: + # stall RX + await dev_pf0_bar0.write_dword(0x000040, stall) + + # start + await dev_pf0_bar0.write_dword(0x001000, 1) + + # wait for stall + if stall: + for k in range(stall): + await RisingEdge(tb.dut.clk) + + for k in range(100): + await Timer(1000, 'ns') + run = await dev_pf0_bar0.read_dword(0x001000) + status = await dev_pf0_bar0.read_dword(0x000000) + if run == 0 and status & 0x300 == 0: + break + + if run != 0: + tb.log.warning("Operation timed out") + if status & 0x300 != 0: + tb.log.warning("DMA engine busy") + + cycles = await dev_pf0_bar0.read_dword(0x001008) + + rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req + rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl + + tb.log.info("read %d x %d B (total %d B %d CPLD, stride %d) in %d ns (%d req %d cpl) %d Mbps", + count, size, count*size, count*((size+15)//16), stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4)) + + assert status & 0x300 == 0 + + @cocotb.test() async def run_test(dut): @@ -451,6 +526,34 @@ async def run_test(dut): assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] + tb.log.info("Test RX completion buffer (CPLH, 8)") + + size = 8 + stride = size + for count in range(32, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + + tb.log.info("Test RX completion buffer (CPLH, 8+64)") + + size = 8+64 + stride = 0 + for count in range(8, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + + tb.log.info("Test RX completion buffer (CPLH, 8+128+8)") + + size = 8+128+8 + stride = 0 + for count in range(8, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + + tb.log.info("Test RX completion buffer (CPLD)") + + size = 512 + stride = size + for count in range(8, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 4000) + tb.log.info("Perform block reads") count = 100 diff --git a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py index 4f89a5d86..832d58508 100644 --- a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py +++ b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py @@ -429,6 +429,81 @@ async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count): assert status & 0x300 == 0 +async def dma_cpl_buf_test(tb, dev, addr, mask, size, stride, count, stall): + dev_pf0_bar0 = dev.bar_window[0] + + rd_req = await dev_pf0_bar0.read_dword(0x000020) + rd_cpl = await dev_pf0_bar0.read_dword(0x000024) + + # configure operation (read) + # DMA base address + await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff) + # DMA offset address + await dev_pf0_bar0.write_dword(0x001088, 0) + await dev_pf0_bar0.write_dword(0x00108c, 0) + # DMA offset mask + await dev_pf0_bar0.write_dword(0x001090, mask) + await dev_pf0_bar0.write_dword(0x001094, 0) + # DMA stride + await dev_pf0_bar0.write_dword(0x001098, stride) + await dev_pf0_bar0.write_dword(0x00109c, 0) + # RAM base address + await dev_pf0_bar0.write_dword(0x0010c0, 0) + await dev_pf0_bar0.write_dword(0x0010c4, 0) + # RAM offset address + await dev_pf0_bar0.write_dword(0x0010c8, 0) + await dev_pf0_bar0.write_dword(0x0010cc, 0) + # RAM offset mask + await dev_pf0_bar0.write_dword(0x0010d0, mask) + await dev_pf0_bar0.write_dword(0x0010d4, 0) + # RAM stride + await dev_pf0_bar0.write_dword(0x0010d8, stride) + await dev_pf0_bar0.write_dword(0x0010dc, 0) + # clear cycle count + await dev_pf0_bar0.write_dword(0x001008, 0) + await dev_pf0_bar0.write_dword(0x00100c, 0) + # block length + await dev_pf0_bar0.write_dword(0x001010, size) + # block count + await dev_pf0_bar0.write_dword(0x001018, count) + await dev_pf0_bar0.write_dword(0x00101c, 0) + + if stall: + # stall RX + await dev_pf0_bar0.write_dword(0x000040, stall) + + # start + await dev_pf0_bar0.write_dword(0x001000, 1) + + # wait for stall + if stall: + for k in range(stall): + await RisingEdge(tb.dut.clk) + + for k in range(100): + await Timer(1000, 'ns') + run = await dev_pf0_bar0.read_dword(0x001000) + status = await dev_pf0_bar0.read_dword(0x000000) + if run == 0 and status & 0x300 == 0: + break + + if run != 0: + tb.log.warning("Operation timed out") + if status & 0x300 != 0: + tb.log.warning("DMA engine busy") + + cycles = await dev_pf0_bar0.read_dword(0x001008) + + rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req + rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl + + tb.log.info("read %d x %d B (total %d B %d CPLD, stride %d) in %d ns (%d req %d cpl) %d Mbps", + count, size, count*size, count*((size+15)//16), stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4)) + + assert status & 0x300 == 0 + + @cocotb.test() async def run_test(dut): @@ -544,6 +619,34 @@ async def run_test(dut): assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] + tb.log.info("Test RX completion buffer (CPLH, 8)") + + size = 8 + stride = size + for count in range(32, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + + tb.log.info("Test RX completion buffer (CPLH, 8+64)") + + size = 8+64 + stride = 0 + for count in range(8, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + + tb.log.info("Test RX completion buffer (CPLH, 8+128+8)") + + size = 8+128+8 + stride = 0 + for count in range(8, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + + tb.log.info("Test RX completion buffer (CPLD)") + + size = 512 + stride = size + for count in range(8, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 4000) + tb.log.info("Perform block reads") count = 100 From aba315c9fce05bef98ce18e5913037c72c089ffe Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Thu, 22 Jun 2023 16:51:08 -0700 Subject: [PATCH 13/20] Add completion buffer tests to example driver Signed-off-by: Alex Forencich --- .../common/driver/example/example_driver.c | 141 +++++++++++++++++- 1 file changed, 135 insertions(+), 6 deletions(-) diff --git a/example/common/driver/example/example_driver.c b/example/common/driver/example/example_driver.c index 69aa05a9c..6996bdeaf 100644 --- a/example/common/driver/example/example_driver.c +++ b/example/common/driver/example/example_driver.c @@ -185,8 +185,8 @@ static void dma_block_read_bench(struct example_dev *edev, rd_req = ioread32(edev->bar[0] + 0x000020) - rd_req; rd_cpl = ioread32(edev->bar[0] + 0x000024) - rd_cpl; - dev_info(edev->dev, "read %lld blocks of %lld bytes (stride %lld) in %lld ns (%d req %d cpl): %lld Mbps", - count, size, stride, cycles * 4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4)); + dev_info(edev->dev, "read %lld blocks of %lld bytes (total %lld B, stride %lld) in %lld ns (%d req %d cpl): %lld Mbps", + count, size, count*size, stride, cycles * 4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4)); } static void dma_block_write_bench(struct example_dev *edev, @@ -208,8 +208,81 @@ static void dma_block_write_bench(struct example_dev *edev, wr_req = ioread32(edev->bar[0] + 0x000028) - wr_req; - dev_info(edev->dev, "wrote %lld blocks of %lld bytes (stride %lld) in %lld ns (%d req): %lld Mbps", - count, size, stride, cycles * 4, wr_req, size * count * 8 * 1000 / (cycles * 4)); + dev_info(edev->dev, "wrote %lld blocks of %lld bytes (total %lld B, stride %lld) in %lld ns (%d req): %lld Mbps", + count, size, count*size, stride, cycles * 4, wr_req, size * count * 8 * 1000 / (cycles * 4)); +} + +static void dma_cpl_buf_test(struct example_dev *edev, dma_addr_t dma_addr, + u64 size, u64 stride, u64 count, int stall) +{ + unsigned long t; + u64 cycles; + u32 rd_req; + u32 rd_cpl; + + rd_req = ioread32(edev->bar[0] + 0x000020); + rd_cpl = ioread32(edev->bar[0] + 0x000024); + + // DMA base address + iowrite32(dma_addr & 0xffffffff, edev->bar[0] + 0x001080); + iowrite32((dma_addr >> 32) & 0xffffffff, edev->bar[0] + 0x001084); + // DMA offset address + iowrite32(0, edev->bar[0] + 0x001088); + iowrite32(0, edev->bar[0] + 0x00108c); + // DMA offset mask + iowrite32(0x3fff, edev->bar[0] + 0x001090); + iowrite32(0, edev->bar[0] + 0x001094); + // DMA stride + iowrite32(stride & 0xffffffff, edev->bar[0] + 0x001098); + iowrite32((stride >> 32) & 0xffffffff, edev->bar[0] + 0x00109c); + // RAM base address + iowrite32(0, edev->bar[0] + 0x0010c0); + iowrite32(0, edev->bar[0] + 0x0010c4); + // RAM offset address + iowrite32(0, edev->bar[0] + 0x0010c8); + iowrite32(0, edev->bar[0] + 0x0010cc); + // RAM offset mask + iowrite32(0x3fff, edev->bar[0] + 0x0010d0); + iowrite32(0, edev->bar[0] + 0x0010d4); + // RAM stride + iowrite32(stride & 0xffffffff, edev->bar[0] + 0x0010d8); + iowrite32((stride >> 32) & 0xffffffff, edev->bar[0] + 0x0010dc); + // clear cycle count + iowrite32(0, edev->bar[0] + 0x001008); + iowrite32(0, edev->bar[0] + 0x00100c); + // block length + iowrite32(size, edev->bar[0] + 0x001010); + // block count + iowrite32(count, edev->bar[0] + 0x001018); + + if (stall) + iowrite32(stall, edev->bar[0] + 0x000040); + + // start + iowrite32(1, edev->bar[0] + 0x001000); + + if (stall) + msleep(10); + + // wait for transfer to complete + t = jiffies + msecs_to_jiffies(20000); + while (time_before(jiffies, t)) { + if ((ioread32(edev->bar[0] + 0x001000) & 1) == 0) + break; + } + + if ((ioread32(edev->bar[0] + 0x001000) & 1) != 0) + dev_warn(edev->dev, "%s: operation timed out", __func__); + if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0) + dev_warn(edev->dev, "%s: DMA engine busy", __func__); + + cycles = ioread32(edev->bar[0] + 0x001008); + + rd_req = ioread32(edev->bar[0] + 0x000020) - rd_req; + rd_cpl = ioread32(edev->bar[0] + 0x000024) - rd_cpl; + + dev_info(edev->dev, "read %lld x %lld B (total %lld B %lld CPLD, stride %lld) in %lld ns (%d req %d cpl): %lld Mbps", + count, size, count*size, count*((size+15) / 16), stride, cycles * 4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4)); } static irqreturn_t edev_intr(int irq, void *data) @@ -431,31 +504,87 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent) if (!mismatch) { u64 size; u64 stride; + u64 count; dev_info(dev, "disable interrupts"); iowrite32(0x0, edev->bar[0] + 0x000008); + dev_info(dev, "test RX completion buffer (CPLH, 8)"); + + size = 8; + stride = size; + for (count = 32; count <= 256; count += 8) { + dma_cpl_buf_test(edev, + edev->dma_region_addr + 0x0000, + size, stride, count, 100000); + if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0) + goto out; + } + + dev_info(dev, "test RX completion buffer (CPLH, unaligned 8+64)"); + + size = 8+64; + stride = 0; + for (count = 8; count <= 256; count += 8) { + dma_cpl_buf_test(edev, + edev->dma_region_addr + 128 - 8, + size, stride, count, 400000); + if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0) + goto out; + } + + dev_info(dev, "test RX completion buffer (CPLH, unaligned 8+128+8)"); + + size = 8+128+8; + stride = 0; + for (count = 8; count <= 256; count += 8) { + dma_cpl_buf_test(edev, + edev->dma_region_addr + 128 - 8, + size, stride, count, 100000); + if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0) + goto out; + } + + dev_info(dev, "test RX completion buffer (CPLD)"); + + size = 512; + stride = size; + for (count = 8; count <= 256; count += 8) { + dma_cpl_buf_test(edev, + edev->dma_region_addr + 0x0000, + size, stride, count, 100000); + if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0) + goto out; + } + dev_info(dev, "perform block reads (dma_alloc_coherent)"); + count = 10000; for (size = 1; size <= 8192; size *= 2) { for (stride = size; stride <= max(size, 256llu); stride *= 2) { dma_block_read_bench(edev, edev->dma_region_addr + 0x0000, - size, stride, 10000); + size, stride, count); + if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0) + goto out; } } dev_info(dev, "perform block writes (dma_alloc_coherent)"); + count = 10000; for (size = 1; size <= 8192; size *= 2) { for (stride = size; stride <= max(size, 256llu); stride *= 2) { dma_block_write_bench(edev, edev->dma_region_addr + 0x0000, - size, stride, 10000); + size, stride, count); + if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0) + goto out; } } } +out: dev_info(dev, "Read status"); dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000000)); From 84eef7b90c114a85d05913f6cff89ce52926eb50 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Thu, 22 Jun 2023 17:54:01 -0700 Subject: [PATCH 14/20] Remove extraneous parameters from pcie_msix testbench Signed-off-by: Alex Forencich --- tb/pcie_msix/Makefile | 3 --- tb/pcie_msix/test_pcie_msix.py | 6 +----- 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/tb/pcie_msix/Makefile b/tb/pcie_msix/Makefile index d94fab9c5..f3b4d1d86 100644 --- a/tb/pcie_msix/Makefile +++ b/tb/pcie_msix/Makefile @@ -36,10 +36,7 @@ export PARAM_IRQ_INDEX_WIDTH := 11 export PARAM_AXIL_DATA_WIDTH := 32 export PARAM_AXIL_ADDR_WIDTH := $(shell expr $(PARAM_IRQ_INDEX_WIDTH) + 5 ) export PARAM_AXIL_STRB_WIDTH := $(shell expr $(PARAM_AXIL_DATA_WIDTH) / 8 ) -export PARAM_TLP_DATA_WIDTH := 64 -export PARAM_TLP_STRB_WIDTH := $(shell expr $(PARAM_TLP_DATA_WIDTH) / 32 ) export PARAM_TLP_HDR_WIDTH := 128 -export PARAM_TLP_SEG_COUNT := 1 export PARAM_TLP_FORCE_64_BIT_ADDR := 0 ifeq ($(SIM), icarus) diff --git a/tb/pcie_msix/test_pcie_msix.py b/tb/pcie_msix/test_pcie_msix.py index c1c054e28..2115dec60 100644 --- a/tb/pcie_msix/test_pcie_msix.py +++ b/tb/pcie_msix/test_pcie_msix.py @@ -319,8 +319,7 @@ rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl')) @pytest.mark.parametrize("axil_data_width", [32, 64]) -@pytest.mark.parametrize("pcie_data_width", [64, 128]) -def test_pcie_msix(request, pcie_data_width, axil_data_width): +def test_pcie_msix(request, axil_data_width): dut = "pcie_msix" module = os.path.splitext(os.path.basename(__file__))[0] toplevel = dut @@ -335,10 +334,7 @@ def test_pcie_msix(request, pcie_data_width, axil_data_width): parameters['AXIL_DATA_WIDTH'] = axil_data_width parameters['AXIL_ADDR_WIDTH'] = parameters['IRQ_INDEX_WIDTH']+5 parameters['AXIL_STRB_WIDTH'] = (axil_data_width // 8) - parameters['TLP_DATA_WIDTH'] = pcie_data_width - parameters['TLP_STRB_WIDTH'] = pcie_data_width // 32 parameters['TLP_HDR_WIDTH'] = 128 - parameters['TLP_SEG_COUNT'] = 1 parameters['TLP_FORCE_64_BIT_ADDR'] = 0 extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} From 2306e515223729819f1105099b46c66de5c3f7f8 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Thu, 22 Jun 2023 18:08:44 -0700 Subject: [PATCH 15/20] Example design parameter clean-up Signed-off-by: Alex Forencich --- .../fpga_axi/tb/fpga_core/Makefile | 1 - .../fpga_axi/tb/fpga_core/test_fpga_core.py | 1 - example/AU200/fpga_axi/tb/fpga_core/Makefile | 1 - .../fpga_axi/tb/fpga_core/test_fpga_core.py | 1 - example/AU250/fpga_axi/tb/fpga_core/Makefile | 1 - .../fpga_axi/tb/fpga_core/test_fpga_core.py | 1 - example/AU280/fpga_axi/tb/fpga_core/Makefile | 1 - .../fpga_axi/tb/fpga_core/test_fpga_core.py | 1 - example/AU50/fpga_axi/tb/fpga_core/Makefile | 1 - .../fpga_axi/tb/fpga_core/test_fpga_core.py | 1 - .../ExaNIC_X10/fpga_axi/tb/fpga_core/Makefile | 1 - .../fpga_axi/tb/fpga_core/test_fpga_core.py | 1 - .../ExaNIC_X25/fpga_axi/tb/fpga_core/Makefile | 1 - .../fpga_axi/tb/fpga_core/test_fpga_core.py | 1 - example/VCU108/fpga_axi/rtl/fpga.v | 59 ++++---- example/VCU108/fpga_axi/rtl/fpga_core.v | 128 +++++++++--------- .../fpga_axi/tb/fpga_core/test_fpga_core.py | 1 - example/VCU118/fpga_axi/tb/fpga_core/Makefile | 1 - .../fpga_axi/tb/fpga_core/test_fpga_core.py | 1 - .../VCU1525/fpga_axi/tb/fpga_core/Makefile | 1 - .../fpga_axi/tb/fpga_core/test_fpga_core.py | 1 - example/ZCU106/fpga_axi/tb/fpga_core/Makefile | 1 - .../fpga_axi/tb/fpga_core/test_fpga_core.py | 1 - example/fb2CG/fpga_axi/tb/fpga_core/Makefile | 1 - .../fpga_axi/tb/fpga_core/test_fpga_core.py | 1 - 25 files changed, 100 insertions(+), 110 deletions(-) diff --git a/example/ADM_PCIE_9V3/fpga_axi/tb/fpga_core/Makefile b/example/ADM_PCIE_9V3/fpga_axi/tb/fpga_core/Makefile index e730af529..b09d0d3bf 100644 --- a/example/ADM_PCIE_9V3/fpga_axi/tb/fpga_core/Makefile +++ b/example/ADM_PCIE_9V3/fpga_axi/tb/fpga_core/Makefile @@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_ export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) -export PARAM_RQ_SEQ_NUM_WIDTH := 6 ifeq ($(SIM), icarus) PLUSARGS += -fst diff --git a/example/ADM_PCIE_9V3/fpga_axi/tb/fpga_core/test_fpga_core.py b/example/ADM_PCIE_9V3/fpga_axi/tb/fpga_core/test_fpga_core.py index 80b4057a8..ee8e602bc 100644 --- a/example/ADM_PCIE_9V3/fpga_axi/tb/fpga_core/test_fpga_core.py +++ b/example/ADM_PCIE_9V3/fpga_axi/tb/fpga_core/test_fpga_core.py @@ -396,7 +396,6 @@ def test_fpga_core(request): parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 - parameters['RQ_SEQ_NUM_WIDTH'] = 6 extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} diff --git a/example/AU200/fpga_axi/tb/fpga_core/Makefile b/example/AU200/fpga_axi/tb/fpga_core/Makefile index e730af529..b09d0d3bf 100644 --- a/example/AU200/fpga_axi/tb/fpga_core/Makefile +++ b/example/AU200/fpga_axi/tb/fpga_core/Makefile @@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_ export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) -export PARAM_RQ_SEQ_NUM_WIDTH := 6 ifeq ($(SIM), icarus) PLUSARGS += -fst diff --git a/example/AU200/fpga_axi/tb/fpga_core/test_fpga_core.py b/example/AU200/fpga_axi/tb/fpga_core/test_fpga_core.py index f662d6cf6..8e67c0332 100644 --- a/example/AU200/fpga_axi/tb/fpga_core/test_fpga_core.py +++ b/example/AU200/fpga_axi/tb/fpga_core/test_fpga_core.py @@ -398,7 +398,6 @@ def test_fpga_core(request): parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 - parameters['RQ_SEQ_NUM_WIDTH'] = 6 extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} diff --git a/example/AU250/fpga_axi/tb/fpga_core/Makefile b/example/AU250/fpga_axi/tb/fpga_core/Makefile index e730af529..b09d0d3bf 100644 --- a/example/AU250/fpga_axi/tb/fpga_core/Makefile +++ b/example/AU250/fpga_axi/tb/fpga_core/Makefile @@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_ export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) -export PARAM_RQ_SEQ_NUM_WIDTH := 6 ifeq ($(SIM), icarus) PLUSARGS += -fst diff --git a/example/AU250/fpga_axi/tb/fpga_core/test_fpga_core.py b/example/AU250/fpga_axi/tb/fpga_core/test_fpga_core.py index f662d6cf6..8e67c0332 100644 --- a/example/AU250/fpga_axi/tb/fpga_core/test_fpga_core.py +++ b/example/AU250/fpga_axi/tb/fpga_core/test_fpga_core.py @@ -398,7 +398,6 @@ def test_fpga_core(request): parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 - parameters['RQ_SEQ_NUM_WIDTH'] = 6 extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} diff --git a/example/AU280/fpga_axi/tb/fpga_core/Makefile b/example/AU280/fpga_axi/tb/fpga_core/Makefile index e730af529..b09d0d3bf 100644 --- a/example/AU280/fpga_axi/tb/fpga_core/Makefile +++ b/example/AU280/fpga_axi/tb/fpga_core/Makefile @@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_ export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) -export PARAM_RQ_SEQ_NUM_WIDTH := 6 ifeq ($(SIM), icarus) PLUSARGS += -fst diff --git a/example/AU280/fpga_axi/tb/fpga_core/test_fpga_core.py b/example/AU280/fpga_axi/tb/fpga_core/test_fpga_core.py index 80b4057a8..ee8e602bc 100644 --- a/example/AU280/fpga_axi/tb/fpga_core/test_fpga_core.py +++ b/example/AU280/fpga_axi/tb/fpga_core/test_fpga_core.py @@ -396,7 +396,6 @@ def test_fpga_core(request): parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 - parameters['RQ_SEQ_NUM_WIDTH'] = 6 extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} diff --git a/example/AU50/fpga_axi/tb/fpga_core/Makefile b/example/AU50/fpga_axi/tb/fpga_core/Makefile index e730af529..b09d0d3bf 100644 --- a/example/AU50/fpga_axi/tb/fpga_core/Makefile +++ b/example/AU50/fpga_axi/tb/fpga_core/Makefile @@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_ export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) -export PARAM_RQ_SEQ_NUM_WIDTH := 6 ifeq ($(SIM), icarus) PLUSARGS += -fst diff --git a/example/AU50/fpga_axi/tb/fpga_core/test_fpga_core.py b/example/AU50/fpga_axi/tb/fpga_core/test_fpga_core.py index 80b4057a8..ee8e602bc 100644 --- a/example/AU50/fpga_axi/tb/fpga_core/test_fpga_core.py +++ b/example/AU50/fpga_axi/tb/fpga_core/test_fpga_core.py @@ -396,7 +396,6 @@ def test_fpga_core(request): parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 - parameters['RQ_SEQ_NUM_WIDTH'] = 6 extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} diff --git a/example/ExaNIC_X10/fpga_axi/tb/fpga_core/Makefile b/example/ExaNIC_X10/fpga_axi/tb/fpga_core/Makefile index 83abb0568..ebdba6e8f 100644 --- a/example/ExaNIC_X10/fpga_axi/tb/fpga_core/Makefile +++ b/example/ExaNIC_X10/fpga_axi/tb/fpga_core/Makefile @@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := 60 export PARAM_AXIS_PCIE_RC_USER_WIDTH := 75 export PARAM_AXIS_PCIE_CQ_USER_WIDTH := 85 export PARAM_AXIS_PCIE_CC_USER_WIDTH := 33 -export PARAM_RQ_SEQ_NUM_WIDTH := 4 ifeq ($(SIM), icarus) PLUSARGS += -fst diff --git a/example/ExaNIC_X10/fpga_axi/tb/fpga_core/test_fpga_core.py b/example/ExaNIC_X10/fpga_axi/tb/fpga_core/test_fpga_core.py index 942cda074..8a8449e2c 100644 --- a/example/ExaNIC_X10/fpga_axi/tb/fpga_core/test_fpga_core.py +++ b/example/ExaNIC_X10/fpga_axi/tb/fpga_core/test_fpga_core.py @@ -370,7 +370,6 @@ def test_fpga_core(request): parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 85 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 - parameters['RQ_SEQ_NUM_WIDTH'] = 4 extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} diff --git a/example/ExaNIC_X25/fpga_axi/tb/fpga_core/Makefile b/example/ExaNIC_X25/fpga_axi/tb/fpga_core/Makefile index 8df1050af..09f0cec0e 100644 --- a/example/ExaNIC_X25/fpga_axi/tb/fpga_core/Makefile +++ b/example/ExaNIC_X25/fpga_axi/tb/fpga_core/Makefile @@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_ export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) -export PARAM_RQ_SEQ_NUM_WIDTH := 6 ifeq ($(SIM), icarus) PLUSARGS += -fst diff --git a/example/ExaNIC_X25/fpga_axi/tb/fpga_core/test_fpga_core.py b/example/ExaNIC_X25/fpga_axi/tb/fpga_core/test_fpga_core.py index 0d0d0c5b1..9fbce2122 100644 --- a/example/ExaNIC_X25/fpga_axi/tb/fpga_core/test_fpga_core.py +++ b/example/ExaNIC_X25/fpga_axi/tb/fpga_core/test_fpga_core.py @@ -396,7 +396,6 @@ def test_fpga_core(request): parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 - parameters['RQ_SEQ_NUM_WIDTH'] = 6 extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} diff --git a/example/VCU108/fpga_axi/rtl/fpga.v b/example/VCU108/fpga_axi/rtl/fpga.v index 93ddb1620..125e8a0b6 100644 --- a/example/VCU108/fpga_axi/rtl/fpga.v +++ b/example/VCU108/fpga_axi/rtl/fpga.v @@ -57,6 +57,10 @@ module fpga ( parameter AXIS_PCIE_DATA_WIDTH = 256; parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); +parameter AXIS_PCIE_RC_USER_WIDTH = 75; +parameter AXIS_PCIE_RQ_USER_WIDTH = 60; +parameter AXIS_PCIE_CQ_USER_WIDTH = 85; +parameter AXIS_PCIE_CC_USER_WIDTH = 33; // Clock and reset wire pcie_user_clk; @@ -107,33 +111,33 @@ ibufds_gte3_pcie_mgt_refclk_inst ( .ODIV2 (pcie_sys_clk) ); -wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_rq_tdata; -wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_rq_tkeep; -wire axis_rq_tlast; -wire axis_rq_tready; -wire [59:0] axis_rq_tuser; -wire axis_rq_tvalid; +wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_rq_tdata; +wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_rq_tkeep; +wire axis_rq_tlast; +wire axis_rq_tready; +wire [AXIS_PCIE_RQ_USER_WIDTH-1:0] axis_rq_tuser; +wire axis_rq_tvalid; -wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_rc_tdata; -wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_rc_tkeep; -wire axis_rc_tlast; -wire axis_rc_tready; -wire [74:0] axis_rc_tuser; -wire axis_rc_tvalid; +wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_rc_tdata; +wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_rc_tkeep; +wire axis_rc_tlast; +wire axis_rc_tready; +wire [AXIS_PCIE_RC_USER_WIDTH-1:0] axis_rc_tuser; +wire axis_rc_tvalid; -wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_cq_tdata; -wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_cq_tkeep; -wire axis_cq_tlast; -wire axis_cq_tready; -wire [84:0] axis_cq_tuser; -wire axis_cq_tvalid; +wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_cq_tdata; +wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_cq_tkeep; +wire axis_cq_tlast; +wire axis_cq_tready; +wire [AXIS_PCIE_CQ_USER_WIDTH-1:0] axis_cq_tuser; +wire axis_cq_tvalid; -wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_cc_tdata; -wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_cc_tkeep; -wire axis_cc_tlast; -wire axis_cc_tready; -wire [32:0] axis_cc_tuser; -wire axis_cc_tvalid; +wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_cc_tdata; +wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_cc_tkeep; +wire axis_cc_tlast; +wire axis_cc_tready; +wire [AXIS_PCIE_CC_USER_WIDTH-1:0] axis_cc_tuser; +wire axis_cc_tvalid; // ila_0 rq_ila ( // .clk(pcie_user_clk), @@ -357,7 +361,12 @@ pcie3_ultrascale_inst ( ); fpga_core #( - .AXIS_PCIE_DATA_WIDTH(AXIS_PCIE_DATA_WIDTH) + .AXIS_PCIE_DATA_WIDTH(AXIS_PCIE_DATA_WIDTH), + .AXIS_PCIE_KEEP_WIDTH(AXIS_PCIE_KEEP_WIDTH), + .AXIS_PCIE_RC_USER_WIDTH(AXIS_PCIE_RC_USER_WIDTH), + .AXIS_PCIE_RQ_USER_WIDTH(AXIS_PCIE_RQ_USER_WIDTH), + .AXIS_PCIE_CQ_USER_WIDTH(AXIS_PCIE_CQ_USER_WIDTH), + .AXIS_PCIE_CC_USER_WIDTH(AXIS_PCIE_CC_USER_WIDTH) ) core_inst ( /* diff --git a/example/VCU108/fpga_axi/rtl/fpga_core.v b/example/VCU108/fpga_axi/rtl/fpga_core.v index c5819b9d5..742db0bc5 100644 --- a/example/VCU108/fpga_axi/rtl/fpga_core.v +++ b/example/VCU108/fpga_axi/rtl/fpga_core.v @@ -34,89 +34,93 @@ THE SOFTWARE. module fpga_core # ( parameter AXIS_PCIE_DATA_WIDTH = 256, - parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32) + parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32), + parameter AXIS_PCIE_RC_USER_WIDTH = 75, + parameter AXIS_PCIE_RQ_USER_WIDTH = 60, + parameter AXIS_PCIE_CQ_USER_WIDTH = 85, + parameter AXIS_PCIE_CC_USER_WIDTH = 33 ) ( /* * Clock: 250 MHz * Synchronous reset */ - input wire clk, - input wire rst, + input wire clk, + input wire rst, /* * GPIO */ - input wire btnu, - input wire btnl, - input wire btnd, - input wire btnr, - input wire btnc, - input wire [3:0] sw, - output wire [7:0] led, + input wire btnu, + input wire btnl, + input wire btnd, + input wire btnr, + input wire btnc, + input wire [3:0] sw, + output wire [7:0] led, /* * PCIe */ - output wire [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_rq_tdata, - output wire [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_rq_tkeep, - output wire m_axis_rq_tlast, - input wire m_axis_rq_tready, - output wire [59:0] m_axis_rq_tuser, - output wire m_axis_rq_tvalid, + output wire [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_rq_tdata, + output wire [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_rq_tkeep, + output wire m_axis_rq_tlast, + input wire m_axis_rq_tready, + output wire [AXIS_PCIE_RQ_USER_WIDTH-1:0] m_axis_rq_tuser, + output wire m_axis_rq_tvalid, - input wire [AXIS_PCIE_DATA_WIDTH-1:0] s_axis_rc_tdata, - input wire [AXIS_PCIE_KEEP_WIDTH-1:0] s_axis_rc_tkeep, - input wire s_axis_rc_tlast, - output wire s_axis_rc_tready, - input wire [74:0] s_axis_rc_tuser, - input wire s_axis_rc_tvalid, + input wire [AXIS_PCIE_DATA_WIDTH-1:0] s_axis_rc_tdata, + input wire [AXIS_PCIE_KEEP_WIDTH-1:0] s_axis_rc_tkeep, + input wire s_axis_rc_tlast, + output wire s_axis_rc_tready, + input wire [AXIS_PCIE_RC_USER_WIDTH-1:0] s_axis_rc_tuser, + input wire s_axis_rc_tvalid, - input wire [AXIS_PCIE_DATA_WIDTH-1:0] s_axis_cq_tdata, - input wire [AXIS_PCIE_KEEP_WIDTH-1:0] s_axis_cq_tkeep, - input wire s_axis_cq_tlast, - output wire s_axis_cq_tready, - input wire [84:0] s_axis_cq_tuser, - input wire s_axis_cq_tvalid, + input wire [AXIS_PCIE_DATA_WIDTH-1:0] s_axis_cq_tdata, + input wire [AXIS_PCIE_KEEP_WIDTH-1:0] s_axis_cq_tkeep, + input wire s_axis_cq_tlast, + output wire s_axis_cq_tready, + input wire [AXIS_PCIE_CQ_USER_WIDTH-1:0] s_axis_cq_tuser, + input wire s_axis_cq_tvalid, - output wire [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_cc_tdata, - output wire [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_cc_tkeep, - output wire m_axis_cc_tlast, - input wire m_axis_cc_tready, - output wire [32:0] m_axis_cc_tuser, - output wire m_axis_cc_tvalid, + output wire [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_cc_tdata, + output wire [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_cc_tkeep, + output wire m_axis_cc_tlast, + input wire m_axis_cc_tready, + output wire [AXIS_PCIE_CC_USER_WIDTH-1:0] m_axis_cc_tuser, + output wire m_axis_cc_tvalid, - input wire [2:0] cfg_max_payload, - input wire [2:0] cfg_max_read_req, + input wire [2:0] cfg_max_payload, + input wire [2:0] cfg_max_read_req, - output wire [18:0] cfg_mgmt_addr, - output wire cfg_mgmt_write, - output wire [31:0] cfg_mgmt_write_data, - output wire [3:0] cfg_mgmt_byte_enable, - output wire cfg_mgmt_read, - input wire [31:0] cfg_mgmt_read_data, - input wire cfg_mgmt_read_write_done, + output wire [18:0] cfg_mgmt_addr, + output wire cfg_mgmt_write, + output wire [31:0] cfg_mgmt_write_data, + output wire [3:0] cfg_mgmt_byte_enable, + output wire cfg_mgmt_read, + input wire [31:0] cfg_mgmt_read_data, + input wire cfg_mgmt_read_write_done, - input wire [3:0] cfg_interrupt_msi_enable, - input wire [7:0] cfg_interrupt_msi_vf_enable, - input wire [11:0] cfg_interrupt_msi_mmenable, - input wire cfg_interrupt_msi_mask_update, - input wire [31:0] cfg_interrupt_msi_data, - output wire [3:0] cfg_interrupt_msi_select, - output wire [31:0] cfg_interrupt_msi_int, - output wire [31:0] cfg_interrupt_msi_pending_status, - output wire cfg_interrupt_msi_pending_status_data_enable, - output wire [3:0] cfg_interrupt_msi_pending_status_function_num, - input wire cfg_interrupt_msi_sent, - input wire cfg_interrupt_msi_fail, - output wire [2:0] cfg_interrupt_msi_attr, - output wire cfg_interrupt_msi_tph_present, - output wire [1:0] cfg_interrupt_msi_tph_type, - output wire [8:0] cfg_interrupt_msi_tph_st_tag, - output wire [3:0] cfg_interrupt_msi_function_number, + input wire [3:0] cfg_interrupt_msi_enable, + input wire [7:0] cfg_interrupt_msi_vf_enable, + input wire [11:0] cfg_interrupt_msi_mmenable, + input wire cfg_interrupt_msi_mask_update, + input wire [31:0] cfg_interrupt_msi_data, + output wire [3:0] cfg_interrupt_msi_select, + output wire [31:0] cfg_interrupt_msi_int, + output wire [31:0] cfg_interrupt_msi_pending_status, + output wire cfg_interrupt_msi_pending_status_data_enable, + output wire [3:0] cfg_interrupt_msi_pending_status_function_num, + input wire cfg_interrupt_msi_sent, + input wire cfg_interrupt_msi_fail, + output wire [2:0] cfg_interrupt_msi_attr, + output wire cfg_interrupt_msi_tph_present, + output wire [1:0] cfg_interrupt_msi_tph_type, + output wire [8:0] cfg_interrupt_msi_tph_st_tag, + output wire [3:0] cfg_interrupt_msi_function_number, - output wire status_error_cor, - output wire status_error_uncor + output wire status_error_cor, + output wire status_error_uncor ); parameter PCIE_ADDR_WIDTH = 64; diff --git a/example/VCU108/fpga_axi/tb/fpga_core/test_fpga_core.py b/example/VCU108/fpga_axi/tb/fpga_core/test_fpga_core.py index b292bc974..562568c0b 100644 --- a/example/VCU108/fpga_axi/tb/fpga_core/test_fpga_core.py +++ b/example/VCU108/fpga_axi/tb/fpga_core/test_fpga_core.py @@ -377,7 +377,6 @@ def test_fpga_core(request): parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 85 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 - parameters['RQ_SEQ_NUM_WIDTH'] = 4 extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} diff --git a/example/VCU118/fpga_axi/tb/fpga_core/Makefile b/example/VCU118/fpga_axi/tb/fpga_core/Makefile index e730af529..b09d0d3bf 100644 --- a/example/VCU118/fpga_axi/tb/fpga_core/Makefile +++ b/example/VCU118/fpga_axi/tb/fpga_core/Makefile @@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_ export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) -export PARAM_RQ_SEQ_NUM_WIDTH := 6 ifeq ($(SIM), icarus) PLUSARGS += -fst diff --git a/example/VCU118/fpga_axi/tb/fpga_core/test_fpga_core.py b/example/VCU118/fpga_axi/tb/fpga_core/test_fpga_core.py index 74ef95548..a2de697b6 100644 --- a/example/VCU118/fpga_axi/tb/fpga_core/test_fpga_core.py +++ b/example/VCU118/fpga_axi/tb/fpga_core/test_fpga_core.py @@ -403,7 +403,6 @@ def test_fpga_core(request): parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 - parameters['RQ_SEQ_NUM_WIDTH'] = 6 extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} diff --git a/example/VCU1525/fpga_axi/tb/fpga_core/Makefile b/example/VCU1525/fpga_axi/tb/fpga_core/Makefile index e730af529..b09d0d3bf 100644 --- a/example/VCU1525/fpga_axi/tb/fpga_core/Makefile +++ b/example/VCU1525/fpga_axi/tb/fpga_core/Makefile @@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_ export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) -export PARAM_RQ_SEQ_NUM_WIDTH := 6 ifeq ($(SIM), icarus) PLUSARGS += -fst diff --git a/example/VCU1525/fpga_axi/tb/fpga_core/test_fpga_core.py b/example/VCU1525/fpga_axi/tb/fpga_core/test_fpga_core.py index f662d6cf6..8e67c0332 100644 --- a/example/VCU1525/fpga_axi/tb/fpga_core/test_fpga_core.py +++ b/example/VCU1525/fpga_axi/tb/fpga_core/test_fpga_core.py @@ -398,7 +398,6 @@ def test_fpga_core(request): parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 - parameters['RQ_SEQ_NUM_WIDTH'] = 6 extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} diff --git a/example/ZCU106/fpga_axi/tb/fpga_core/Makefile b/example/ZCU106/fpga_axi/tb/fpga_core/Makefile index 6d78bf572..55ed40bff 100644 --- a/example/ZCU106/fpga_axi/tb/fpga_core/Makefile +++ b/example/ZCU106/fpga_axi/tb/fpga_core/Makefile @@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_ export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) -export PARAM_RQ_SEQ_NUM_WIDTH := 6 ifeq ($(SIM), icarus) PLUSARGS += -fst diff --git a/example/ZCU106/fpga_axi/tb/fpga_core/test_fpga_core.py b/example/ZCU106/fpga_axi/tb/fpga_core/test_fpga_core.py index e27299f62..3e1712a8e 100644 --- a/example/ZCU106/fpga_axi/tb/fpga_core/test_fpga_core.py +++ b/example/ZCU106/fpga_axi/tb/fpga_core/test_fpga_core.py @@ -403,7 +403,6 @@ def test_fpga_core(request): parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 - parameters['RQ_SEQ_NUM_WIDTH'] = 6 extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} diff --git a/example/fb2CG/fpga_axi/tb/fpga_core/Makefile b/example/fb2CG/fpga_axi/tb/fpga_core/Makefile index e730af529..b09d0d3bf 100644 --- a/example/fb2CG/fpga_axi/tb/fpga_core/Makefile +++ b/example/fb2CG/fpga_axi/tb/fpga_core/Makefile @@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_ export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) -export PARAM_RQ_SEQ_NUM_WIDTH := 6 ifeq ($(SIM), icarus) PLUSARGS += -fst diff --git a/example/fb2CG/fpga_axi/tb/fpga_core/test_fpga_core.py b/example/fb2CG/fpga_axi/tb/fpga_core/test_fpga_core.py index 80b4057a8..ee8e602bc 100644 --- a/example/fb2CG/fpga_axi/tb/fpga_core/test_fpga_core.py +++ b/example/fb2CG/fpga_axi/tb/fpga_core/test_fpga_core.py @@ -396,7 +396,6 @@ def test_fpga_core(request): parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 - parameters['RQ_SEQ_NUM_WIDTH'] = 6 extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} From 4798f2162d5aa1511b5ecd27f7de9422916863b0 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Thu, 22 Jun 2023 18:14:34 -0700 Subject: [PATCH 16/20] Remove extraneous parameters from pcie_us_axi_dma_wr testbench Signed-off-by: Alex Forencich --- tb/pcie_us_axi_dma_wr/test_pcie_us_axi_dma_wr.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/tb/pcie_us_axi_dma_wr/test_pcie_us_axi_dma_wr.py b/tb/pcie_us_axi_dma_wr/test_pcie_us_axi_dma_wr.py index 4d6c9e7d4..472e0e51f 100644 --- a/tb/pcie_us_axi_dma_wr/test_pcie_us_axi_dma_wr.py +++ b/tb/pcie_us_axi_dma_wr/test_pcie_us_axi_dma_wr.py @@ -270,9 +270,6 @@ def test_pcie_us_axi_dma_wr(request, axis_pcie_data_width, pcie_offset): parameters['AXI_ID_WIDTH'] = 8 parameters['AXI_MAX_BURST_LEN'] = 256 parameters['PCIE_ADDR_WIDTH'] = 64 - parameters['PCIE_TAG_COUNT'] = 64 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 256 - parameters['PCIE_TAG_WIDTH'] = (parameters['PCIE_TAG_COUNT']-1).bit_length() - parameters['PCIE_EXT_TAG_ENABLE'] = int(parameters['PCIE_TAG_COUNT'] > 32) parameters['LEN_WIDTH'] = 20 parameters['TAG_WIDTH'] = 8 parameters['OP_TABLE_SIZE'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1) From 2f881e154aaf3c6a75d93ba78262bd5437f32f2d Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Thu, 22 Jun 2023 19:16:09 -0700 Subject: [PATCH 17/20] Adjust testbench timeouts Signed-off-by: Alex Forencich --- .../example_core_pcie_ptile/test_example_core_pcie_ptile.py | 4 ++-- .../tb/example_core_pcie_s10/test_example_core_pcie_s10.py | 4 ++-- .../tb/example_core_pcie_us/test_example_core_pcie_us.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py b/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py index 6668724fe..5fe252662 100644 --- a/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py +++ b/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py @@ -301,7 +301,7 @@ async def dma_block_read_bench(tb, dev, addr, mask, size, stride, count): # start await dev_pf0_bar0.write_dword(0x001000, 1) - for k in range(100): + for k in range(1000): await Timer(1000, 'ns') run = await dev_pf0_bar0.read_dword(0x001000) status = await dev_pf0_bar0.read_dword(0x000000) @@ -366,7 +366,7 @@ async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count): # start await dev_pf0_bar0.write_dword(0x001100, 1) - for k in range(100): + for k in range(1000): await Timer(1000, 'ns') run = await dev_pf0_bar0.read_dword(0x001100) status = await dev_pf0_bar0.read_dword(0x000000) diff --git a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py index 92fa6e5d9..42ecf1924 100644 --- a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py +++ b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py @@ -249,7 +249,7 @@ async def dma_block_read_bench(tb, dev, addr, mask, size, stride, count): # start await dev_pf0_bar0.write_dword(0x001000, 1) - for k in range(100): + for k in range(1000): await Timer(1000, 'ns') run = await dev_pf0_bar0.read_dword(0x001000) status = await dev_pf0_bar0.read_dword(0x000000) @@ -314,7 +314,7 @@ async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count): # start await dev_pf0_bar0.write_dword(0x001100, 1) - for k in range(100): + for k in range(1000): await Timer(1000, 'ns') run = await dev_pf0_bar0.read_dword(0x001100) status = await dev_pf0_bar0.read_dword(0x000000) diff --git a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py index 832d58508..68775d8e1 100644 --- a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py +++ b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py @@ -342,7 +342,7 @@ async def dma_block_read_bench(tb, dev, addr, mask, size, stride, count): # start await dev_pf0_bar0.write_dword(0x001000, 1) - for k in range(100): + for k in range(1000): await Timer(1000, 'ns') run = await dev_pf0_bar0.read_dword(0x001000) status = await dev_pf0_bar0.read_dword(0x000000) @@ -407,7 +407,7 @@ async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count): # start await dev_pf0_bar0.write_dword(0x001100, 1) - for k in range(100): + for k in range(1000): await Timer(1000, 'ns') run = await dev_pf0_bar0.read_dword(0x001100) status = await dev_pf0_bar0.read_dword(0x000000) From d730369671fae10c8a7f639ad806586872f5c948 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Fri, 23 Jun 2023 02:39:22 -0700 Subject: [PATCH 18/20] Use correct offsets in testbench Signed-off-by: Alex Forencich --- .../example_core_pcie_ptile/test_example_core_pcie_ptile.py | 4 ++-- .../tb/example_core_pcie_s10/test_example_core_pcie_s10.py | 4 ++-- .../tb/example_core_pcie_us/test_example_core_pcie_us.py | 4 ++-- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py b/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py index 5fe252662..b32fb3a70 100644 --- a/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py +++ b/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py @@ -590,14 +590,14 @@ async def run_test(dut): size = 8+64 stride = 0 for count in range(8, 256+1, 8): - await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000) tb.log.info("Test RX completion buffer (CPLH, 8+128+8)") size = 8+128+8 stride = 0 for count in range(8, 256+1, 8): - await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000) tb.log.info("Test RX completion buffer (CPLD)") diff --git a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py index 42ecf1924..eb5a2385b 100644 --- a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py +++ b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py @@ -538,14 +538,14 @@ async def run_test(dut): size = 8+64 stride = 0 for count in range(8, 256+1, 8): - await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000) tb.log.info("Test RX completion buffer (CPLH, 8+128+8)") size = 8+128+8 stride = 0 for count in range(8, 256+1, 8): - await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000) tb.log.info("Test RX completion buffer (CPLD)") diff --git a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py index 68775d8e1..248c279e4 100644 --- a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py +++ b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py @@ -631,14 +631,14 @@ async def run_test(dut): size = 8+64 stride = 0 for count in range(8, 256+1, 8): - await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000) tb.log.info("Test RX completion buffer (CPLH, 8+128+8)") size = 8+128+8 stride = 0 for count in range(8, 256+1, 8): - await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000) tb.log.info("Test RX completion buffer (CPLD)") From fe7d8e229d91a70225f1aae061ff639fe9217427 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Fri, 23 Jun 2023 22:37:30 -0700 Subject: [PATCH 19/20] Update cocotbext-pcie Signed-off-by: Alex Forencich --- tox.ini | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tox.ini b/tox.ini index 897b5a325..234ac0741 100644 --- a/tox.ini +++ b/tox.ini @@ -18,7 +18,7 @@ deps = cocotb-bus == 0.2.1 cocotb-test == 0.2.4 cocotbext-axi == 0.1.24 - cocotbext-pcie == 0.2.12 + cocotbext-pcie == 0.2.14 jinja2 == 3.1.2 commands = From 75126f133318b31f226ae13ebc46a40eb52cf3ac Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Fri, 23 Jun 2023 22:38:06 -0700 Subject: [PATCH 20/20] Configure RC model to split on all RCB boundaries during RX completion buffer tests Signed-off-by: Alex Forencich --- .../example_core_pcie_ptile/test_example_core_pcie_ptile.py | 4 ++++ .../tb/example_core_pcie_s10/test_example_core_pcie_s10.py | 4 ++++ .../tb/example_core_pcie_us/test_example_core_pcie_us.py | 4 ++++ 3 files changed, 12 insertions(+) diff --git a/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py b/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py index b32fb3a70..42cce8133 100644 --- a/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py +++ b/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py @@ -580,6 +580,8 @@ async def run_test(dut): tb.log.info("Test RX completion buffer (CPLH, 8)") + tb.rc.split_on_all_rcb = True + size = 8 stride = size for count in range(32, 256+1, 8): @@ -599,6 +601,8 @@ async def run_test(dut): for count in range(8, 256+1, 8): await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000) + tb.rc.split_on_all_rcb = False + tb.log.info("Test RX completion buffer (CPLD)") size = 512 diff --git a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py index eb5a2385b..d6cfac5d0 100644 --- a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py +++ b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py @@ -528,6 +528,8 @@ async def run_test(dut): tb.log.info("Test RX completion buffer (CPLH, 8)") + tb.rc.split_on_all_rcb = True + size = 8 stride = size for count in range(32, 256+1, 8): @@ -547,6 +549,8 @@ async def run_test(dut): for count in range(8, 256+1, 8): await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000) + tb.rc.split_on_all_rcb = False + tb.log.info("Test RX completion buffer (CPLD)") size = 512 diff --git a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py index 248c279e4..b20144336 100644 --- a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py +++ b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py @@ -621,6 +621,8 @@ async def run_test(dut): tb.log.info("Test RX completion buffer (CPLH, 8)") + tb.rc.split_on_all_rcb = True + size = 8 stride = size for count in range(32, 256+1, 8): @@ -640,6 +642,8 @@ async def run_test(dut): for count in range(8, 256+1, 8): await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000) + tb.rc.split_on_all_rcb = False + tb.log.info("Test RX completion buffer (CPLD)") size = 512