From d45857fb98655411dfe682992d12cf7c0b0de4a5 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Thu, 21 Apr 2022 14:19:43 -0700 Subject: [PATCH] fpga/app/dma_bench: Add DMA benchmark application Signed-off-by: Alex Forencich --- fpga/app/dma_bench/lib | 1 + fpga/app/dma_bench/modules/mqnic | 1 + .../modules/mqnic_app_dma_bench/Makefile | 26 + .../modules/mqnic_app_dma_bench/main.c | 569 ++++++++ fpga/app/dma_bench/rtl/common | 1 + .../dma_bench/rtl/mqnic_app_block_dma_bench.v | 1158 +++++++++++++++++ .../dma_bench/tb/mqnic_core_pcie_us/Makefile | 439 +++++++ .../dma_bench/tb/mqnic_core_pcie_us/mqnic.py | 1 + .../test_mqnic_core_pcie_us.py | 967 ++++++++++++++ 9 files changed, 3163 insertions(+) create mode 120000 fpga/app/dma_bench/lib create mode 120000 fpga/app/dma_bench/modules/mqnic create mode 100644 fpga/app/dma_bench/modules/mqnic_app_dma_bench/Makefile create mode 100644 fpga/app/dma_bench/modules/mqnic_app_dma_bench/main.c create mode 120000 fpga/app/dma_bench/rtl/common create mode 100644 fpga/app/dma_bench/rtl/mqnic_app_block_dma_bench.v create mode 100644 fpga/app/dma_bench/tb/mqnic_core_pcie_us/Makefile create mode 120000 fpga/app/dma_bench/tb/mqnic_core_pcie_us/mqnic.py create mode 100644 fpga/app/dma_bench/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py diff --git a/fpga/app/dma_bench/lib b/fpga/app/dma_bench/lib new file mode 120000 index 000000000..bc1a1ee04 --- /dev/null +++ b/fpga/app/dma_bench/lib @@ -0,0 +1 @@ +../../lib/ \ No newline at end of file diff --git a/fpga/app/dma_bench/modules/mqnic b/fpga/app/dma_bench/modules/mqnic new file mode 120000 index 000000000..9a59d2b76 --- /dev/null +++ b/fpga/app/dma_bench/modules/mqnic @@ -0,0 +1 @@ +../../../../modules/mqnic/ \ No newline at end of file diff --git a/fpga/app/dma_bench/modules/mqnic_app_dma_bench/Makefile b/fpga/app/dma_bench/modules/mqnic_app_dma_bench/Makefile new file mode 100644 index 000000000..cb2cbdc70 --- /dev/null +++ b/fpga/app/dma_bench/modules/mqnic_app_dma_bench/Makefile @@ -0,0 +1,26 @@ +ifneq ($(KERNELRELEASE),) + +ccflags-y += -I$(src)/../mqnic/ + +# object files to build +obj-m += mqnic_app_dma_bench.o +mqnic_app_dma_bench-y += main.o + +else + +ifneq ($(KERNEL_SRC),) +# alternatively to variable KDIR accept variable KERNEL_SRC as used in +# PetaLinux/Yocto for example +KDIR ?= $(KERNEL_SRC) +endif + +KDIR ?= /lib/modules/$(shell uname -r)/build + +all: modules + +help modules modules_install clean: + $(MAKE) -C $(KDIR) M=$(shell pwd) $@ + +install: modules_install + +endif diff --git a/fpga/app/dma_bench/modules/mqnic_app_dma_bench/main.c b/fpga/app/dma_bench/modules/mqnic_app_dma_bench/main.c new file mode 100644 index 000000000..db087232b --- /dev/null +++ b/fpga/app/dma_bench/modules/mqnic_app_dma_bench/main.c @@ -0,0 +1,569 @@ +// SPDX-License-Identifier: BSD-2-Clause-Views +/* + * Copyright 2022, The Regents of the University of California. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following + * disclaimer in the documentation and/or other materials provided + * with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * The views and conclusions contained in the software and documentation + * are those of the authors and should not be interpreted as representing + * official policies, either expressed or implied, of The Regents of the + * University of California. + */ + +#include "mqnic.h" +#include + +MODULE_DESCRIPTION("mqnic DMA benchmark application driver"); +MODULE_AUTHOR("Alex Forencich"); +MODULE_LICENSE("Dual BSD/GPL"); +MODULE_VERSION("0.1"); + +struct mqnic_app_dma_bench { + struct device *dev; + struct mqnic_dev *mdev; + struct mqnic_adev *adev; + + struct device *nic_dev; + + void __iomem *nic_hw_addr; + void __iomem *app_hw_addr; + void __iomem *ram_hw_addr; + + // DMA buffer + size_t dma_region_len; + void *dma_region; + dma_addr_t dma_region_addr; +}; + +const char *dma_bench_stats_names[] = { + // PCIe stats + "pcie_rx_tlp_mem_rd", // index 0 + "pcie_rx_tlp_mem_wr", // index 1 + "pcie_rx_tlp_io", // index 2 + "pcie_rx_tlp_cfg", // index 3 + "pcie_rx_tlp_msg", // index 4 + "pcie_rx_tlp_cpl", // index 5 + "pcie_rx_tlp_cpl_ur", // index 6 + "pcie_rx_tlp_cpl_ca", // index 7 + "pcie_rx_tlp_atomic", // index 8 + "pcie_rx_tlp_ep", // index 9 + "pcie_rx_tlp_hdr_dw", // index 10 + "pcie_rx_tlp_req_dw", // index 11 + "pcie_rx_tlp_payload_dw", // index 12 + "pcie_rx_tlp_cpl_dw", // index 13 + "", // index 14 + "", // index 15 + "pcie_tx_tlp_mem_rd", // index 16 + "pcie_tx_tlp_mem_wr", // index 17 + "pcie_tx_tlp_io", // index 18 + "pcie_tx_tlp_cfg", // index 19 + "pcie_tx_tlp_msg", // index 20 + "pcie_tx_tlp_cpl", // index 21 + "pcie_tx_tlp_cpl_ur", // index 22 + "pcie_tx_tlp_cpl_ca", // index 23 + "pcie_tx_tlp_atomic", // index 24 + "pcie_tx_tlp_ep", // index 25 + "pcie_tx_tlp_hdr_dw", // index 26 + "pcie_tx_tlp_req_dw", // index 27 + "pcie_tx_tlp_payload_dw", // index 28 + "pcie_tx_tlp_cpl_dw", // index 29 + "", // index 30 + "", // index 31 + + // DMA statistics + "dma_rd_op_count", // index 0 + "dma_rd_op_bytes", // index 1 + "dma_rd_op_latency", // index 2 + "dma_rd_op_error", // index 3 + "dma_rd_req_count", // index 4 + "dma_rd_req_latency", // index 5 + "dma_rd_req_timeout", // index 6 + "dma_rd_op_table_full", // index 7 + "dma_rd_no_tags", // index 8 + "dma_rd_tx_no_credit", // index 9 + "dma_rd_tx_limit", // index 10 + "dma_rd_tx_stall", // index 11 + "", // index 12 + "", // index 13 + "", // index 14 + "", // index 15 + "dma_wr_op_count", // index 16 + "dma_wr_op_bytes", // index 17 + "dma_wr_op_latency", // index 18 + "dma_wr_op_error", // index 19 + "dma_wr_req_count", // index 20 + "dma_wr_req_latency", // index 21 + "", // index 22 + "dma_wr_op_table_full", // index 23 + "", // index 24 + "dma_wr_tx_no_credit", // index 25 + "dma_wr_tx_limit", // index 26 + "dma_wr_tx_stall", // index 27 + "", // index 28 + "", // index 29 + "", // index 30 + "", // index 31 + 0 +}; + +static u64 read_stat_counter(struct mqnic_app_dma_bench *app, int index) +{ + u64 val; + + val = (u64) ioread32(app->nic_hw_addr + 0x010000 + index * 8 + 0); + val |= (u64) ioread32(app->nic_hw_addr + 0x010000 + index * 8 + 4) << 32; + return val; +} + +static void print_counters(struct mqnic_app_dma_bench *app) +{ + struct device *dev = app->dev; + + int index = 0; + u64 val; + + while (dma_bench_stats_names[index]) { + if (strlen(dma_bench_stats_names[index]) > 0) { + val = read_stat_counter(app, index); + dev_info(dev, "%s: %lld", dma_bench_stats_names[index], val); + } + index++; + } +} + +static void dma_read(struct mqnic_app_dma_bench *app, + dma_addr_t dma_addr, size_t ram_addr, size_t len) +{ + int tag = 0; + int new_tag = 0; + unsigned long t; + + tag = ioread32(app->app_hw_addr + 0x000118); // dummy read + tag = (ioread32(app->app_hw_addr + 0x000118) & 0x7f) + 1; + iowrite32(dma_addr & 0xffffffff, app->app_hw_addr + 0x000100); + iowrite32((dma_addr >> 32) & 0xffffffff, app->app_hw_addr + 0x000104); + iowrite32(ram_addr, app->app_hw_addr + 0x000108); + iowrite32(0, app->app_hw_addr + 0x00010C); + iowrite32(len, app->app_hw_addr + 0x000110); + iowrite32(tag, app->app_hw_addr + 0x000114); + + // wait for transfer to complete + t = jiffies + msecs_to_jiffies(200); + while (time_before(jiffies, t)) { + new_tag = (ioread32(app->app_hw_addr + 0x000118) & 0xff); + if (new_tag == tag) + break; + } + + if (tag != new_tag) + dev_warn(app->dev, "%s: tag %d (expected %d)", __func__, new_tag, tag); +} + +static void dma_write(struct mqnic_app_dma_bench *app, + dma_addr_t dma_addr, size_t ram_addr, size_t len) +{ + int tag = 0; + int new_tag = 0; + unsigned long t; + + tag = ioread32(app->app_hw_addr + 0x000218); // dummy read + tag = (ioread32(app->app_hw_addr + 0x000218) & 0x7f) + 1; + iowrite32(dma_addr & 0xffffffff, app->app_hw_addr + 0x000200); + iowrite32((dma_addr >> 32) & 0xffffffff, app->app_hw_addr + 0x000204); + iowrite32(ram_addr, app->app_hw_addr + 0x000208); + iowrite32(0, app->app_hw_addr + 0x00020C); + iowrite32(len, app->app_hw_addr + 0x000210); + iowrite32(tag, app->app_hw_addr + 0x000214); + + // wait for transfer to complete + t = jiffies + msecs_to_jiffies(200); + while (time_before(jiffies, t)) { + new_tag = (ioread32(app->app_hw_addr + 0x000218) & 0xff); + if (new_tag == tag) + break; + } + + if (tag != new_tag) + dev_warn(app->dev, "%s: tag %d (expected %d)", __func__, new_tag, tag); +} + +static void dma_block_read(struct mqnic_app_dma_bench *app, + dma_addr_t dma_addr, size_t dma_offset, + size_t dma_offset_mask, size_t dma_stride, + size_t ram_addr, size_t ram_offset, + size_t ram_offset_mask, size_t ram_stride, + size_t block_len, size_t block_count) +{ + unsigned long t; + + // DMA base address + iowrite32(dma_addr & 0xffffffff, app->app_hw_addr + 0x001080); + iowrite32((dma_addr >> 32) & 0xffffffff, app->app_hw_addr + 0x001084); + // DMA offset address + iowrite32(dma_offset & 0xffffffff, app->app_hw_addr + 0x001088); + iowrite32((dma_offset >> 32) & 0xffffffff, app->app_hw_addr + 0x00108c); + // DMA offset mask + iowrite32(dma_offset_mask & 0xffffffff, app->app_hw_addr + 0x001090); + iowrite32((dma_offset_mask >> 32) & 0xffffffff, app->app_hw_addr + 0x001094); + // DMA stride + iowrite32(dma_stride & 0xffffffff, app->app_hw_addr + 0x001098); + iowrite32((dma_stride >> 32) & 0xffffffff, app->app_hw_addr + 0x00109c); + // RAM base address + iowrite32(ram_addr & 0xffffffff, app->app_hw_addr + 0x0010c0); + iowrite32((ram_addr >> 32) & 0xffffffff, app->app_hw_addr + 0x0010c4); + // RAM offset address + iowrite32(ram_offset & 0xffffffff, app->app_hw_addr + 0x0010c8); + iowrite32((ram_offset >> 32) & 0xffffffff, app->app_hw_addr + 0x0010cc); + // RAM offset mask + iowrite32(ram_offset_mask & 0xffffffff, app->app_hw_addr + 0x0010d0); + iowrite32((ram_offset_mask >> 32) & 0xffffffff, app->app_hw_addr + 0x0010d4); + // RAM stride + iowrite32(ram_stride & 0xffffffff, app->app_hw_addr + 0x0010d8); + iowrite32((ram_stride >> 32) & 0xffffffff, app->app_hw_addr + 0x0010dc); + // clear cycle count + iowrite32(0, app->app_hw_addr + 0x001008); + iowrite32(0, app->app_hw_addr + 0x00100c); + // block length + iowrite32(block_len, app->app_hw_addr + 0x001010); + // block count + iowrite32(block_count, app->app_hw_addr + 0x001018); + // start + iowrite32(1, app->app_hw_addr + 0x001000); + + // wait for transfer to complete + t = jiffies + msecs_to_jiffies(20000); + while (time_before(jiffies, t)) { + if ((ioread32(app->app_hw_addr + 0x001000) & 1) == 0) + break; + } + + if ((ioread32(app->app_hw_addr + 0x001000) & 1) != 0) + dev_warn(app->dev, "%s: operation timed out", __func__); +} + +static void dma_block_write(struct mqnic_app_dma_bench *app, + dma_addr_t dma_addr, size_t dma_offset, + size_t dma_offset_mask, size_t dma_stride, + size_t ram_addr, size_t ram_offset, + size_t ram_offset_mask, size_t ram_stride, + size_t block_len, size_t block_count) +{ + unsigned long t; + + // DMA base address + iowrite32(dma_addr & 0xffffffff, app->app_hw_addr + 0x001180); + iowrite32((dma_addr >> 32) & 0xffffffff, app->app_hw_addr + 0x001184); + // DMA offset address + iowrite32(dma_offset & 0xffffffff, app->app_hw_addr + 0x001188); + iowrite32((dma_offset >> 32) & 0xffffffff, app->app_hw_addr + 0x00118c); + // DMA offset mask + iowrite32(dma_offset_mask & 0xffffffff, app->app_hw_addr + 0x001190); + iowrite32((dma_offset_mask >> 32) & 0xffffffff, app->app_hw_addr + 0x001194); + // DMA stride + iowrite32(dma_stride & 0xffffffff, app->app_hw_addr + 0x001198); + iowrite32((dma_stride >> 32) & 0xffffffff, app->app_hw_addr + 0x00119c); + // RAM base address + iowrite32(ram_addr & 0xffffffff, app->app_hw_addr + 0x0011c0); + iowrite32((ram_addr >> 32) & 0xffffffff, app->app_hw_addr + 0x0011c4); + // RAM offset address + iowrite32(ram_offset & 0xffffffff, app->app_hw_addr + 0x0011c8); + iowrite32((ram_offset >> 32) & 0xffffffff, app->app_hw_addr + 0x0011cc); + // RAM offset mask + iowrite32(ram_offset_mask & 0xffffffff, app->app_hw_addr + 0x0011d0); + iowrite32((ram_offset_mask >> 32) & 0xffffffff, app->app_hw_addr + 0x0011d4); + // RAM stride + iowrite32(ram_stride & 0xffffffff, app->app_hw_addr + 0x0011d8); + iowrite32((ram_stride >> 32) & 0xffffffff, app->app_hw_addr + 0x0011dc); + // clear cycle count + iowrite32(0, app->app_hw_addr + 0x001108); + iowrite32(0, app->app_hw_addr + 0x00110c); + // block length + iowrite32(block_len, app->app_hw_addr + 0x001110); + // block count + iowrite32(block_count, app->app_hw_addr + 0x001118); + // start + iowrite32(1, app->app_hw_addr + 0x001100); + + // wait for transfer to complete + t = jiffies + msecs_to_jiffies(20000); + while (time_before(jiffies, t)) { + if ((ioread32(app->app_hw_addr + 0x001100) & 1) == 0) + break; + } + + if ((ioread32(app->app_hw_addr + 0x001100) & 1) != 0) + dev_warn(app->dev, "%s: operation timed out", __func__); +} + +static void dma_block_read_bench(struct mqnic_app_dma_bench *app, + dma_addr_t dma_addr, u64 size, u64 stride, u64 count) +{ + u64 cycles; + u64 op_count; + u64 op_latency; + u64 req_count; + u64 req_latency; + + udelay(5); + + op_count = read_stat_counter(app, 32); + op_latency = read_stat_counter(app, 34); + req_count = read_stat_counter(app, 36); + req_latency = read_stat_counter(app, 37); + + dma_block_read(app, dma_addr, 0, 0x3fff, stride, + 0, 0, 0x3fff, stride, size, count); + + cycles = ioread32(app->app_hw_addr + 0x001008); + + udelay(5); + + op_count = read_stat_counter(app, 32) - op_count; + op_latency = read_stat_counter(app, 34) - op_latency; + req_count = read_stat_counter(app, 36) - req_count; + req_latency = read_stat_counter(app, 37) - req_latency; + + dev_info(app->dev, "read %lld blocks of %lld bytes (stride %lld) in %lld ns (%lld ns/op, %lld req, %lld ns/req): %lld Mbps", + count, size, stride, cycles * 4, (op_latency * 4) / op_count, req_count, + (req_latency * 4) / req_count, size * count * 8 * 1000 / (cycles * 4)); +} + +static void dma_block_write_bench(struct mqnic_app_dma_bench *app, + dma_addr_t dma_addr, u64 size, u64 stride, u64 count) +{ + u64 cycles; + u64 op_count; + u64 op_latency; + u64 req_count; + u64 req_latency; + + udelay(5); + + op_count = read_stat_counter(app, 48); + op_latency = read_stat_counter(app, 50); + req_count = read_stat_counter(app, 52); + req_latency = read_stat_counter(app, 53); + + dma_block_write(app, dma_addr, 0, 0x3fff, stride, + 0, 0, 0x3fff, stride, size, count); + + cycles = ioread32(app->app_hw_addr + 0x001108); + + udelay(5); + + op_count = read_stat_counter(app, 48) - op_count; + op_latency = read_stat_counter(app, 50) - op_latency; + req_count = read_stat_counter(app, 52) - req_count; + req_latency = read_stat_counter(app, 53) - req_latency; + + dev_info(app->dev, "wrote %lld blocks of %lld bytes (stride %lld) in %lld ns (%lld ns/op, %lld req, %lld ns/req): %lld Mbps", + count, size, stride, cycles * 4, (op_latency * 4) / op_count, req_count, + (req_latency * 4) / req_count, size * count * 8 * 1000 / (cycles * 4)); +} + +static int mqnic_app_dma_bench_probe(struct auxiliary_device *adev, + const struct auxiliary_device_id *id) +{ + struct mqnic_app_dma_bench *app; + struct mqnic_dev *mdev = container_of(adev, struct mqnic_adev, adev)->mdev; + struct device *dev = &adev->dev; + + int mismatch = 0; + int k; + + dev_info(dev, "%s() called", __func__); + + if (!mdev->hw_addr || !mdev->app_hw_addr) { + dev_err(dev, "Error: required region not present"); + return -EIO; + } + + app = devm_kzalloc(dev, sizeof(*app), GFP_KERNEL); + if (!app) + return -ENOMEM; + + app->dev = dev; + app->mdev = mdev; + dev_set_drvdata(&adev->dev, app); + + app->nic_dev = mdev->dev; + app->nic_hw_addr = mdev->hw_addr; + app->app_hw_addr = mdev->app_hw_addr; + app->ram_hw_addr = mdev->ram_hw_addr; + + // Allocate DMA buffer + app->dma_region_len = 16 * 1024; + app->dma_region = dma_alloc_coherent(app->nic_dev, app->dma_region_len, + &app->dma_region_addr, GFP_KERNEL | __GFP_ZERO); + if (!app->dma_region) + return -ENOMEM; + + dev_info(dev, "Allocated DMA region virt %p, phys %p", + app->dma_region, (void *)app->dma_region_addr); + + // Dump counters + dev_info(dev, "Statistics counters"); + print_counters(app); + + // DMA test + dev_info(dev, "Write test data"); + for (k = 0; k < 256; k++) + ((char *)app->dma_region)[k] = k; + + dev_info(dev, "Read test data"); + print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, + app->dma_region, 256, true); + + dev_info(dev, "Start copy to card"); + dma_read(app, app->dma_region_addr + 0x0000, 0x100, 0x100); + + dev_info(dev, "Start copy to host"); + dma_write(app, app->dma_region_addr + 0x0200, 0x100, 0x100); + + dev_info(dev, "read test data"); + print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1, + app->dma_region + 0x0200, 256, true); + + if (memcmp(app->dma_region + 0x0000, app->dma_region + 0x0200, 256) == 0) { + dev_info(dev, "test data matches"); + } else { + dev_warn(dev, "test data mismatch"); + mismatch = 1; + } + + if (!mismatch) { + u64 size; + u64 stride; + struct page *page; + dma_addr_t dma_addr; + + dev_info(dev, "perform block reads (dma_alloc_coherent)"); + + for (size = 1; size <= 8192; size *= 2) { + for (stride = size; stride <= max(size, 256llu); stride *= 2) { + dma_block_read_bench(app, app->dma_region_addr + 0x0000, + size, stride, 10000); + } + } + + dev_info(dev, "perform block writes (dma_alloc_coherent)"); + + for (size = 1; size <= 8192; size *= 2) { + for (stride = size; stride <= max(size, 256llu); stride *= 2) { + dma_block_write_bench(app, app->dma_region_addr + 0x0000, + size, stride, 10000); + } + } + + page = alloc_pages_node(NUMA_NO_NODE, GFP_ATOMIC | __GFP_NOWARN | + __GFP_COMP | __GFP_MEMALLOC, 2); + + if (page) { + dma_addr = dma_map_page(app->nic_dev, page, 0, 4096 * (1 << 2), PCI_DMA_TODEVICE); + + if (!dma_mapping_error(app->nic_dev, dma_addr)) { + dev_info(dev, "perform block reads (alloc_pages_node)"); + + for (size = 1; size <= 8192; size *= 2) { + for (stride = size; stride <= max(size, 256llu); stride *= 2) { + dma_block_read_bench(app, dma_addr + 0x0000, + size, stride, 10000); + } + } + + dma_unmap_page(app->nic_dev, dma_addr, 4096 * (1 << 2), PCI_DMA_TODEVICE); + } else { + dev_warn(dev, "DMA mapping error"); + } + + dma_addr = dma_map_page(app->nic_dev, page, 0, 4096 * (1 << 2), PCI_DMA_FROMDEVICE); + + if (!dma_mapping_error(app->nic_dev, dma_addr)) { + dev_info(dev, "perform block writes (alloc_pages_node)"); + + for (size = 1; size <= 8192; size *= 2) { + for (stride = size; stride <= max(size, 256llu); stride *= 2) { + dma_block_write_bench(app, dma_addr + 0x0000, + size, stride, 10000); + } + } + + dma_unmap_page(app->nic_dev, dma_addr, 4096 * (1 << 2), PCI_DMA_FROMDEVICE); + } else { + dev_warn(dev, "DMA mapping error"); + } + } + + if (page) + __free_pages(page, 2); + else + dev_warn(dev, "failed to allocate memory"); + } + + // Dump counters + dev_info(dev, "Statistics counters"); + print_counters(app); + + return 0; +} + +static void mqnic_app_dma_bench_remove(struct auxiliary_device *adev) +{ + struct mqnic_app_dma_bench *app = dev_get_drvdata(&adev->dev); + struct device *dev = app->dev; + + dev_info(dev, "%s() called", __func__); + + dma_free_coherent(app->nic_dev, app->dma_region_len, app->dma_region, + app->dma_region_addr); +} + +static const struct auxiliary_device_id mqnic_app_dma_bench_id_table[] = { + { .name = "mqnic.app_12348001" }, + {}, +}; + +MODULE_DEVICE_TABLE(auxiliary, mqnic_app_dma_bench_id_table); + +static struct auxiliary_driver mqnic_app_dma_bench_driver = { + .name = "mqnic_app_dma_bench", + .probe = mqnic_app_dma_bench_probe, + .remove = mqnic_app_dma_bench_remove, + .id_table = mqnic_app_dma_bench_id_table, +}; + +static int __init mqnic_app_dma_bench_init(void) +{ + return auxiliary_driver_register(&mqnic_app_dma_bench_driver); +} + +static void __exit mqnic_app_dma_bench_exit(void) +{ + auxiliary_driver_unregister(&mqnic_app_dma_bench_driver); +} + +module_init(mqnic_app_dma_bench_init); +module_exit(mqnic_app_dma_bench_exit); diff --git a/fpga/app/dma_bench/rtl/common b/fpga/app/dma_bench/rtl/common new file mode 120000 index 000000000..e60fada70 --- /dev/null +++ b/fpga/app/dma_bench/rtl/common @@ -0,0 +1 @@ +../../../common/rtl/ \ No newline at end of file diff --git a/fpga/app/dma_bench/rtl/mqnic_app_block_dma_bench.v b/fpga/app/dma_bench/rtl/mqnic_app_block_dma_bench.v new file mode 100644 index 000000000..e93b16ebf --- /dev/null +++ b/fpga/app/dma_bench/rtl/mqnic_app_block_dma_bench.v @@ -0,0 +1,1158 @@ +/* + +Copyright 2021, The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS OF THE UNIVERSITY OF CALIFORNIA ''AS +IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OF THE UNIVERSITY OF CALIFORNIA OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those +of the authors and should not be interpreted as representing official policies, +either expressed or implied, of The Regents of the University of California. + +*/ + +// Language: Verilog 2001 + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * Application block (DMA benchmark application) + */ +module mqnic_app_block # +( + // Structural configuration + parameter IF_COUNT = 1, + parameter PORTS_PER_IF = 1, + parameter SCHED_PER_IF = PORTS_PER_IF, + + parameter PORT_COUNT = IF_COUNT*PORTS_PER_IF, + + // PTP configuration + parameter PTP_TS_WIDTH = 96, + parameter PTP_TAG_WIDTH = 16, + parameter PTP_PERIOD_NS_WIDTH = 4, + parameter PTP_OFFSET_NS_WIDTH = 32, + parameter PTP_FNS_WIDTH = 32, + parameter PTP_PERIOD_NS = 4'd4, + parameter PTP_PERIOD_FNS = 32'd0, + parameter PTP_USE_SAMPLE_CLOCK = 0, + parameter PTP_PORT_CDC_PIPELINE = 0, + parameter PTP_PEROUT_ENABLE = 0, + parameter PTP_PEROUT_COUNT = 1, + parameter PTP_TS_ENABLE = 1, + + // Application configuration + parameter APP_ID = 32'h12348001, + parameter APP_CTRL_ENABLE = 1, + parameter APP_DMA_ENABLE = 1, + parameter APP_AXIS_DIRECT_ENABLE = 1, + parameter APP_AXIS_SYNC_ENABLE = 1, + parameter APP_AXIS_IF_ENABLE = 1, + parameter APP_STAT_ENABLE = 1, + parameter APP_GPIO_IN_WIDTH = 32, + parameter APP_GPIO_OUT_WIDTH = 32, + + // DMA interface configuration + parameter DMA_ADDR_WIDTH = 64, + parameter DMA_IMM_ENABLE = 0, + parameter DMA_IMM_WIDTH = 32, + parameter DMA_LEN_WIDTH = 16, + parameter DMA_TAG_WIDTH = 16, + parameter RAM_SEL_WIDTH = 4, + parameter RAM_ADDR_WIDTH = 16, + parameter RAM_SEG_COUNT = 2, + parameter RAM_SEG_DATA_WIDTH = 256*2/RAM_SEG_COUNT, + parameter RAM_SEG_BE_WIDTH = RAM_SEG_DATA_WIDTH/8, + parameter RAM_SEG_ADDR_WIDTH = RAM_ADDR_WIDTH-$clog2(RAM_SEG_COUNT*RAM_SEG_BE_WIDTH), + parameter RAM_PIPELINE = 2, + + // AXI lite interface (application control from host) + parameter AXIL_APP_CTRL_DATA_WIDTH = 32, + parameter AXIL_APP_CTRL_ADDR_WIDTH = 16, + parameter AXIL_APP_CTRL_STRB_WIDTH = (AXIL_APP_CTRL_DATA_WIDTH/8), + + // AXI lite interface (control to NIC) + parameter AXIL_CTRL_DATA_WIDTH = 32, + parameter AXIL_CTRL_ADDR_WIDTH = 16, + parameter AXIL_CTRL_STRB_WIDTH = (AXIL_CTRL_DATA_WIDTH/8), + + // Ethernet interface configuration (direct, async) + parameter AXIS_DATA_WIDTH = 512, + parameter AXIS_KEEP_WIDTH = AXIS_DATA_WIDTH/8, + parameter AXIS_TX_USER_WIDTH = (PTP_TS_ENABLE ? PTP_TAG_WIDTH : 0) + 1, + parameter AXIS_RX_USER_WIDTH = (PTP_TS_ENABLE ? PTP_TS_WIDTH : 0) + 1, + parameter AXIS_RX_USE_READY = 0, + + // Ethernet interface configuration (direct, sync) + parameter AXIS_SYNC_DATA_WIDTH = AXIS_DATA_WIDTH, + parameter AXIS_SYNC_KEEP_WIDTH = AXIS_SYNC_DATA_WIDTH/8, + parameter AXIS_SYNC_TX_USER_WIDTH = AXIS_TX_USER_WIDTH, + parameter AXIS_SYNC_RX_USER_WIDTH = AXIS_RX_USER_WIDTH, + + // Ethernet interface configuration (interface) + parameter AXIS_IF_DATA_WIDTH = AXIS_SYNC_DATA_WIDTH*2**$clog2(PORTS_PER_IF), + parameter AXIS_IF_KEEP_WIDTH = AXIS_IF_DATA_WIDTH/8, + parameter AXIS_IF_TX_ID_WIDTH = 12, + parameter AXIS_IF_RX_ID_WIDTH = PORTS_PER_IF > 1 ? $clog2(PORTS_PER_IF) : 1, + parameter AXIS_IF_TX_DEST_WIDTH = $clog2(PORTS_PER_IF)+4, + parameter AXIS_IF_RX_DEST_WIDTH = 8, + parameter AXIS_IF_TX_USER_WIDTH = AXIS_SYNC_TX_USER_WIDTH, + parameter AXIS_IF_RX_USER_WIDTH = AXIS_SYNC_RX_USER_WIDTH, + + // Statistics counter subsystem + parameter STAT_ENABLE = 1, + parameter STAT_INC_WIDTH = 24, + parameter STAT_ID_WIDTH = 12 +) +( + input wire clk, + input wire rst, + + /* + * AXI-Lite slave interface (control from host) + */ + input wire [AXIL_APP_CTRL_ADDR_WIDTH-1:0] s_axil_app_ctrl_awaddr, + input wire [2:0] s_axil_app_ctrl_awprot, + input wire s_axil_app_ctrl_awvalid, + output wire s_axil_app_ctrl_awready, + input wire [AXIL_APP_CTRL_DATA_WIDTH-1:0] s_axil_app_ctrl_wdata, + input wire [AXIL_APP_CTRL_STRB_WIDTH-1:0] s_axil_app_ctrl_wstrb, + input wire s_axil_app_ctrl_wvalid, + output wire s_axil_app_ctrl_wready, + output wire [1:0] s_axil_app_ctrl_bresp, + output wire s_axil_app_ctrl_bvalid, + input wire s_axil_app_ctrl_bready, + input wire [AXIL_APP_CTRL_ADDR_WIDTH-1:0] s_axil_app_ctrl_araddr, + input wire [2:0] s_axil_app_ctrl_arprot, + input wire s_axil_app_ctrl_arvalid, + output wire s_axil_app_ctrl_arready, + output wire [AXIL_APP_CTRL_DATA_WIDTH-1:0] s_axil_app_ctrl_rdata, + output wire [1:0] s_axil_app_ctrl_rresp, + output wire s_axil_app_ctrl_rvalid, + input wire s_axil_app_ctrl_rready, + + /* + * AXI-Lite master interface (control to NIC) + */ + output wire [AXIL_CTRL_ADDR_WIDTH-1:0] m_axil_ctrl_awaddr, + output wire [2:0] m_axil_ctrl_awprot, + output wire m_axil_ctrl_awvalid, + input wire m_axil_ctrl_awready, + output wire [AXIL_CTRL_DATA_WIDTH-1:0] m_axil_ctrl_wdata, + output wire [AXIL_CTRL_STRB_WIDTH-1:0] m_axil_ctrl_wstrb, + output wire m_axil_ctrl_wvalid, + input wire m_axil_ctrl_wready, + input wire [1:0] m_axil_ctrl_bresp, + input wire m_axil_ctrl_bvalid, + output wire m_axil_ctrl_bready, + output wire [AXIL_CTRL_ADDR_WIDTH-1:0] m_axil_ctrl_araddr, + output wire [2:0] m_axil_ctrl_arprot, + output wire m_axil_ctrl_arvalid, + input wire m_axil_ctrl_arready, + input wire [AXIL_CTRL_DATA_WIDTH-1:0] m_axil_ctrl_rdata, + input wire [1:0] m_axil_ctrl_rresp, + input wire m_axil_ctrl_rvalid, + output wire m_axil_ctrl_rready, + + /* + * DMA read descriptor output (control) + */ + output wire [DMA_ADDR_WIDTH-1:0] m_axis_ctrl_dma_read_desc_dma_addr, + output wire [RAM_SEL_WIDTH-1:0] m_axis_ctrl_dma_read_desc_ram_sel, + output wire [RAM_ADDR_WIDTH-1:0] m_axis_ctrl_dma_read_desc_ram_addr, + output wire [DMA_LEN_WIDTH-1:0] m_axis_ctrl_dma_read_desc_len, + output wire [DMA_TAG_WIDTH-1:0] m_axis_ctrl_dma_read_desc_tag, + output wire m_axis_ctrl_dma_read_desc_valid, + input wire m_axis_ctrl_dma_read_desc_ready, + + /* + * DMA read descriptor status input (control) + */ + input wire [DMA_TAG_WIDTH-1:0] s_axis_ctrl_dma_read_desc_status_tag, + input wire [3:0] s_axis_ctrl_dma_read_desc_status_error, + input wire s_axis_ctrl_dma_read_desc_status_valid, + + /* + * DMA write descriptor output (control) + */ + output wire [DMA_ADDR_WIDTH-1:0] m_axis_ctrl_dma_write_desc_dma_addr, + output wire [RAM_SEL_WIDTH-1:0] m_axis_ctrl_dma_write_desc_ram_sel, + output wire [RAM_ADDR_WIDTH-1:0] m_axis_ctrl_dma_write_desc_ram_addr, + output wire [DMA_IMM_WIDTH-1:0] m_axis_ctrl_dma_write_desc_imm, + output wire m_axis_ctrl_dma_write_desc_imm_en, + output wire [DMA_LEN_WIDTH-1:0] m_axis_ctrl_dma_write_desc_len, + output wire [DMA_TAG_WIDTH-1:0] m_axis_ctrl_dma_write_desc_tag, + output wire m_axis_ctrl_dma_write_desc_valid, + input wire m_axis_ctrl_dma_write_desc_ready, + + /* + * DMA write descriptor status input (control) + */ + input wire [DMA_TAG_WIDTH-1:0] s_axis_ctrl_dma_write_desc_status_tag, + input wire [3:0] s_axis_ctrl_dma_write_desc_status_error, + input wire s_axis_ctrl_dma_write_desc_status_valid, + + /* + * DMA read descriptor output (data) + */ + output wire [DMA_ADDR_WIDTH-1:0] m_axis_data_dma_read_desc_dma_addr, + output wire [RAM_SEL_WIDTH-1:0] m_axis_data_dma_read_desc_ram_sel, + output wire [RAM_ADDR_WIDTH-1:0] m_axis_data_dma_read_desc_ram_addr, + output wire [DMA_LEN_WIDTH-1:0] m_axis_data_dma_read_desc_len, + output wire [DMA_TAG_WIDTH-1:0] m_axis_data_dma_read_desc_tag, + output wire m_axis_data_dma_read_desc_valid, + input wire m_axis_data_dma_read_desc_ready, + + /* + * DMA read descriptor status input (data) + */ + input wire [DMA_TAG_WIDTH-1:0] s_axis_data_dma_read_desc_status_tag, + input wire [3:0] s_axis_data_dma_read_desc_status_error, + input wire s_axis_data_dma_read_desc_status_valid, + + /* + * DMA write descriptor output (data) + */ + output wire [DMA_ADDR_WIDTH-1:0] m_axis_data_dma_write_desc_dma_addr, + output wire [RAM_SEL_WIDTH-1:0] m_axis_data_dma_write_desc_ram_sel, + output wire [RAM_ADDR_WIDTH-1:0] m_axis_data_dma_write_desc_ram_addr, + output wire [DMA_IMM_WIDTH-1:0] m_axis_data_dma_write_desc_imm, + output wire m_axis_data_dma_write_desc_imm_en, + output wire [DMA_LEN_WIDTH-1:0] m_axis_data_dma_write_desc_len, + output wire [DMA_TAG_WIDTH-1:0] m_axis_data_dma_write_desc_tag, + output wire m_axis_data_dma_write_desc_valid, + input wire m_axis_data_dma_write_desc_ready, + + /* + * DMA write descriptor status input (data) + */ + input wire [DMA_TAG_WIDTH-1:0] s_axis_data_dma_write_desc_status_tag, + input wire [3:0] s_axis_data_dma_write_desc_status_error, + input wire s_axis_data_dma_write_desc_status_valid, + + /* + * DMA RAM interface (control) + */ + input wire [RAM_SEG_COUNT*RAM_SEL_WIDTH-1:0] ctrl_dma_ram_wr_cmd_sel, + input wire [RAM_SEG_COUNT*RAM_SEG_BE_WIDTH-1:0] ctrl_dma_ram_wr_cmd_be, + input wire [RAM_SEG_COUNT*RAM_SEG_ADDR_WIDTH-1:0] ctrl_dma_ram_wr_cmd_addr, + input wire [RAM_SEG_COUNT*RAM_SEG_DATA_WIDTH-1:0] ctrl_dma_ram_wr_cmd_data, + input wire [RAM_SEG_COUNT-1:0] ctrl_dma_ram_wr_cmd_valid, + output wire [RAM_SEG_COUNT-1:0] ctrl_dma_ram_wr_cmd_ready, + output wire [RAM_SEG_COUNT-1:0] ctrl_dma_ram_wr_done, + input wire [RAM_SEG_COUNT*RAM_SEL_WIDTH-1:0] ctrl_dma_ram_rd_cmd_sel, + input wire [RAM_SEG_COUNT*RAM_SEG_ADDR_WIDTH-1:0] ctrl_dma_ram_rd_cmd_addr, + input wire [RAM_SEG_COUNT-1:0] ctrl_dma_ram_rd_cmd_valid, + output wire [RAM_SEG_COUNT-1:0] ctrl_dma_ram_rd_cmd_ready, + output wire [RAM_SEG_COUNT*RAM_SEG_DATA_WIDTH-1:0] ctrl_dma_ram_rd_resp_data, + output wire [RAM_SEG_COUNT-1:0] ctrl_dma_ram_rd_resp_valid, + input wire [RAM_SEG_COUNT-1:0] ctrl_dma_ram_rd_resp_ready, + + /* + * DMA RAM interface (data) + */ + input wire [RAM_SEG_COUNT*RAM_SEL_WIDTH-1:0] data_dma_ram_wr_cmd_sel, + input wire [RAM_SEG_COUNT*RAM_SEG_BE_WIDTH-1:0] data_dma_ram_wr_cmd_be, + input wire [RAM_SEG_COUNT*RAM_SEG_ADDR_WIDTH-1:0] data_dma_ram_wr_cmd_addr, + input wire [RAM_SEG_COUNT*RAM_SEG_DATA_WIDTH-1:0] data_dma_ram_wr_cmd_data, + input wire [RAM_SEG_COUNT-1:0] data_dma_ram_wr_cmd_valid, + output wire [RAM_SEG_COUNT-1:0] data_dma_ram_wr_cmd_ready, + output wire [RAM_SEG_COUNT-1:0] data_dma_ram_wr_done, + input wire [RAM_SEG_COUNT*RAM_SEL_WIDTH-1:0] data_dma_ram_rd_cmd_sel, + input wire [RAM_SEG_COUNT*RAM_SEG_ADDR_WIDTH-1:0] data_dma_ram_rd_cmd_addr, + input wire [RAM_SEG_COUNT-1:0] data_dma_ram_rd_cmd_valid, + output wire [RAM_SEG_COUNT-1:0] data_dma_ram_rd_cmd_ready, + output wire [RAM_SEG_COUNT*RAM_SEG_DATA_WIDTH-1:0] data_dma_ram_rd_resp_data, + output wire [RAM_SEG_COUNT-1:0] data_dma_ram_rd_resp_valid, + input wire [RAM_SEG_COUNT-1:0] data_dma_ram_rd_resp_ready, + + /* + * PTP clock + */ + input wire ptp_sample_clk, + input wire ptp_pps, + input wire [PTP_TS_WIDTH-1:0] ptp_ts_96, + input wire ptp_ts_step, + input wire [PTP_PEROUT_COUNT-1:0] ptp_perout_locked, + input wire [PTP_PEROUT_COUNT-1:0] ptp_perout_error, + input wire [PTP_PEROUT_COUNT-1:0] ptp_perout_pulse, + + /* + * Ethernet (direct MAC interface - lowest latency raw traffic) + */ + input wire [PORT_COUNT-1:0] direct_tx_clk, + input wire [PORT_COUNT-1:0] direct_tx_rst, + + input wire [PORT_COUNT*AXIS_DATA_WIDTH-1:0] s_axis_direct_tx_tdata, + input wire [PORT_COUNT*AXIS_KEEP_WIDTH-1:0] s_axis_direct_tx_tkeep, + input wire [PORT_COUNT-1:0] s_axis_direct_tx_tvalid, + output wire [PORT_COUNT-1:0] s_axis_direct_tx_tready, + input wire [PORT_COUNT-1:0] s_axis_direct_tx_tlast, + input wire [PORT_COUNT*AXIS_TX_USER_WIDTH-1:0] s_axis_direct_tx_tuser, + + output wire [PORT_COUNT*AXIS_DATA_WIDTH-1:0] m_axis_direct_tx_tdata, + output wire [PORT_COUNT*AXIS_KEEP_WIDTH-1:0] m_axis_direct_tx_tkeep, + output wire [PORT_COUNT-1:0] m_axis_direct_tx_tvalid, + input wire [PORT_COUNT-1:0] m_axis_direct_tx_tready, + output wire [PORT_COUNT-1:0] m_axis_direct_tx_tlast, + output wire [PORT_COUNT*AXIS_TX_USER_WIDTH-1:0] m_axis_direct_tx_tuser, + + input wire [PORT_COUNT*PTP_TS_WIDTH-1:0] s_axis_direct_tx_ptp_ts, + input wire [PORT_COUNT*PTP_TAG_WIDTH-1:0] s_axis_direct_tx_ptp_ts_tag, + input wire [PORT_COUNT-1:0] s_axis_direct_tx_ptp_ts_valid, + output wire [PORT_COUNT-1:0] s_axis_direct_tx_ptp_ts_ready, + + output wire [PORT_COUNT*PTP_TS_WIDTH-1:0] m_axis_direct_tx_ptp_ts, + output wire [PORT_COUNT*PTP_TAG_WIDTH-1:0] m_axis_direct_tx_ptp_ts_tag, + output wire [PORT_COUNT-1:0] m_axis_direct_tx_ptp_ts_valid, + input wire [PORT_COUNT-1:0] m_axis_direct_tx_ptp_ts_ready, + + input wire [PORT_COUNT-1:0] direct_rx_clk, + input wire [PORT_COUNT-1:0] direct_rx_rst, + + input wire [PORT_COUNT*AXIS_DATA_WIDTH-1:0] s_axis_direct_rx_tdata, + input wire [PORT_COUNT*AXIS_KEEP_WIDTH-1:0] s_axis_direct_rx_tkeep, + input wire [PORT_COUNT-1:0] s_axis_direct_rx_tvalid, + output wire [PORT_COUNT-1:0] s_axis_direct_rx_tready, + input wire [PORT_COUNT-1:0] s_axis_direct_rx_tlast, + input wire [PORT_COUNT*AXIS_RX_USER_WIDTH-1:0] s_axis_direct_rx_tuser, + + output wire [PORT_COUNT*AXIS_DATA_WIDTH-1:0] m_axis_direct_rx_tdata, + output wire [PORT_COUNT*AXIS_KEEP_WIDTH-1:0] m_axis_direct_rx_tkeep, + output wire [PORT_COUNT-1:0] m_axis_direct_rx_tvalid, + input wire [PORT_COUNT-1:0] m_axis_direct_rx_tready, + output wire [PORT_COUNT-1:0] m_axis_direct_rx_tlast, + output wire [PORT_COUNT*AXIS_RX_USER_WIDTH-1:0] m_axis_direct_rx_tuser, + + /* + * Ethernet (synchronous MAC interface - low latency raw traffic) + */ + input wire [PORT_COUNT*AXIS_SYNC_DATA_WIDTH-1:0] s_axis_sync_tx_tdata, + input wire [PORT_COUNT*AXIS_SYNC_KEEP_WIDTH-1:0] s_axis_sync_tx_tkeep, + input wire [PORT_COUNT-1:0] s_axis_sync_tx_tvalid, + output wire [PORT_COUNT-1:0] s_axis_sync_tx_tready, + input wire [PORT_COUNT-1:0] s_axis_sync_tx_tlast, + input wire [PORT_COUNT*AXIS_SYNC_TX_USER_WIDTH-1:0] s_axis_sync_tx_tuser, + + output wire [PORT_COUNT*AXIS_SYNC_DATA_WIDTH-1:0] m_axis_sync_tx_tdata, + output wire [PORT_COUNT*AXIS_SYNC_KEEP_WIDTH-1:0] m_axis_sync_tx_tkeep, + output wire [PORT_COUNT-1:0] m_axis_sync_tx_tvalid, + input wire [PORT_COUNT-1:0] m_axis_sync_tx_tready, + output wire [PORT_COUNT-1:0] m_axis_sync_tx_tlast, + output wire [PORT_COUNT*AXIS_SYNC_TX_USER_WIDTH-1:0] m_axis_sync_tx_tuser, + + input wire [PORT_COUNT*PTP_TS_WIDTH-1:0] s_axis_sync_tx_ptp_ts, + input wire [PORT_COUNT*PTP_TAG_WIDTH-1:0] s_axis_sync_tx_ptp_ts_tag, + input wire [PORT_COUNT-1:0] s_axis_sync_tx_ptp_ts_valid, + output wire [PORT_COUNT-1:0] s_axis_sync_tx_ptp_ts_ready, + + output wire [PORT_COUNT*PTP_TS_WIDTH-1:0] m_axis_sync_tx_ptp_ts, + output wire [PORT_COUNT*PTP_TAG_WIDTH-1:0] m_axis_sync_tx_ptp_ts_tag, + output wire [PORT_COUNT-1:0] m_axis_sync_tx_ptp_ts_valid, + input wire [PORT_COUNT-1:0] m_axis_sync_tx_ptp_ts_ready, + + input wire [PORT_COUNT*AXIS_SYNC_DATA_WIDTH-1:0] s_axis_sync_rx_tdata, + input wire [PORT_COUNT*AXIS_SYNC_KEEP_WIDTH-1:0] s_axis_sync_rx_tkeep, + input wire [PORT_COUNT-1:0] s_axis_sync_rx_tvalid, + output wire [PORT_COUNT-1:0] s_axis_sync_rx_tready, + input wire [PORT_COUNT-1:0] s_axis_sync_rx_tlast, + input wire [PORT_COUNT*AXIS_SYNC_RX_USER_WIDTH-1:0] s_axis_sync_rx_tuser, + + output wire [PORT_COUNT*AXIS_SYNC_DATA_WIDTH-1:0] m_axis_sync_rx_tdata, + output wire [PORT_COUNT*AXIS_SYNC_KEEP_WIDTH-1:0] m_axis_sync_rx_tkeep, + output wire [PORT_COUNT-1:0] m_axis_sync_rx_tvalid, + input wire [PORT_COUNT-1:0] m_axis_sync_rx_tready, + output wire [PORT_COUNT-1:0] m_axis_sync_rx_tlast, + output wire [PORT_COUNT*AXIS_SYNC_RX_USER_WIDTH-1:0] m_axis_sync_rx_tuser, + + /* + * Ethernet (internal at interface module) + */ + input wire [IF_COUNT*AXIS_IF_DATA_WIDTH-1:0] s_axis_if_tx_tdata, + input wire [IF_COUNT*AXIS_IF_KEEP_WIDTH-1:0] s_axis_if_tx_tkeep, + input wire [IF_COUNT-1:0] s_axis_if_tx_tvalid, + output wire [IF_COUNT-1:0] s_axis_if_tx_tready, + input wire [IF_COUNT-1:0] s_axis_if_tx_tlast, + input wire [IF_COUNT*AXIS_IF_TX_ID_WIDTH-1:0] s_axis_if_tx_tid, + input wire [IF_COUNT*AXIS_IF_TX_DEST_WIDTH-1:0] s_axis_if_tx_tdest, + input wire [IF_COUNT*AXIS_IF_TX_USER_WIDTH-1:0] s_axis_if_tx_tuser, + + output wire [IF_COUNT*AXIS_IF_DATA_WIDTH-1:0] m_axis_if_tx_tdata, + output wire [IF_COUNT*AXIS_IF_KEEP_WIDTH-1:0] m_axis_if_tx_tkeep, + output wire [IF_COUNT-1:0] m_axis_if_tx_tvalid, + input wire [IF_COUNT-1:0] m_axis_if_tx_tready, + output wire [IF_COUNT-1:0] m_axis_if_tx_tlast, + output wire [IF_COUNT*AXIS_IF_TX_ID_WIDTH-1:0] m_axis_if_tx_tid, + output wire [IF_COUNT*AXIS_IF_TX_DEST_WIDTH-1:0] m_axis_if_tx_tdest, + output wire [IF_COUNT*AXIS_IF_TX_USER_WIDTH-1:0] m_axis_if_tx_tuser, + + input wire [IF_COUNT*PTP_TS_WIDTH-1:0] s_axis_if_tx_ptp_ts, + input wire [IF_COUNT*PTP_TAG_WIDTH-1:0] s_axis_if_tx_ptp_ts_tag, + input wire [IF_COUNT-1:0] s_axis_if_tx_ptp_ts_valid, + output wire [IF_COUNT-1:0] s_axis_if_tx_ptp_ts_ready, + + output wire [IF_COUNT*PTP_TS_WIDTH-1:0] m_axis_if_tx_ptp_ts, + output wire [IF_COUNT*PTP_TAG_WIDTH-1:0] m_axis_if_tx_ptp_ts_tag, + output wire [IF_COUNT-1:0] m_axis_if_tx_ptp_ts_valid, + input wire [IF_COUNT-1:0] m_axis_if_tx_ptp_ts_ready, + + input wire [IF_COUNT*AXIS_IF_DATA_WIDTH-1:0] s_axis_if_rx_tdata, + input wire [IF_COUNT*AXIS_IF_KEEP_WIDTH-1:0] s_axis_if_rx_tkeep, + input wire [IF_COUNT-1:0] s_axis_if_rx_tvalid, + output wire [IF_COUNT-1:0] s_axis_if_rx_tready, + input wire [IF_COUNT-1:0] s_axis_if_rx_tlast, + input wire [IF_COUNT*AXIS_IF_RX_ID_WIDTH-1:0] s_axis_if_rx_tid, + input wire [IF_COUNT*AXIS_IF_RX_DEST_WIDTH-1:0] s_axis_if_rx_tdest, + input wire [IF_COUNT*AXIS_IF_RX_USER_WIDTH-1:0] s_axis_if_rx_tuser, + + output wire [IF_COUNT*AXIS_IF_DATA_WIDTH-1:0] m_axis_if_rx_tdata, + output wire [IF_COUNT*AXIS_IF_KEEP_WIDTH-1:0] m_axis_if_rx_tkeep, + output wire [IF_COUNT-1:0] m_axis_if_rx_tvalid, + input wire [IF_COUNT-1:0] m_axis_if_rx_tready, + output wire [IF_COUNT-1:0] m_axis_if_rx_tlast, + output wire [IF_COUNT*AXIS_IF_RX_ID_WIDTH-1:0] m_axis_if_rx_tid, + output wire [IF_COUNT*AXIS_IF_RX_DEST_WIDTH-1:0] m_axis_if_rx_tdest, + output wire [IF_COUNT*AXIS_IF_RX_USER_WIDTH-1:0] m_axis_if_rx_tuser, + + /* + * Statistics increment output + */ + output wire [STAT_INC_WIDTH-1:0] m_axis_stat_tdata, + output wire [STAT_ID_WIDTH-1:0] m_axis_stat_tid, + output wire m_axis_stat_tvalid, + input wire m_axis_stat_tready, + + /* + * GPIO + */ + input wire [APP_GPIO_IN_WIDTH-1:0] gpio_in, + output wire [APP_GPIO_OUT_WIDTH-1:0] gpio_out, + + /* + * JTAG + */ + input wire jtag_tdi, + output wire jtag_tdo, + input wire jtag_tms, + input wire jtag_tck +); + +// check configuration +initial begin + if (APP_ID != 32'h12348001) begin + $error("Error: Invalid APP_ID (expected 32'h12348001, got 32'h%x) (instance %m)", APP_ID); + $finish; + end + + if (!APP_DMA_ENABLE) begin + $error("Error: APP_DMA_ENABLE required (instance %m)"); + $finish; + end +end + +localparam RAM_ADDR_IMM_WIDTH = (DMA_IMM_ENABLE && (DMA_IMM_WIDTH > RAM_ADDR_WIDTH)) ? DMA_IMM_WIDTH : RAM_ADDR_WIDTH; + +/* + * AXI-Lite master interface (control to NIC) + */ +assign m_axil_ctrl_awaddr = 0; +assign m_axil_ctrl_awprot = 0; +assign m_axil_ctrl_awvalid = 1'b0; +assign m_axil_ctrl_wdata = 0; +assign m_axil_ctrl_wstrb = 0; +assign m_axil_ctrl_wvalid = 1'b0; +assign m_axil_ctrl_bready = 1'b1; +assign m_axil_ctrl_araddr = 0; +assign m_axil_ctrl_arprot = 0; +assign m_axil_ctrl_arvalid = 1'b0; +assign m_axil_ctrl_rready = 1'b1; + +/* + * Ethernet (direct MAC interface - lowest latency raw traffic) + */ +assign m_axis_direct_tx_tdata = s_axis_direct_tx_tdata; +assign m_axis_direct_tx_tkeep = s_axis_direct_tx_tkeep; +assign m_axis_direct_tx_tvalid = s_axis_direct_tx_tvalid; +assign s_axis_direct_tx_tready = m_axis_direct_tx_tready; +assign m_axis_direct_tx_tlast = s_axis_direct_tx_tlast; +assign m_axis_direct_tx_tuser = s_axis_direct_tx_tuser; + +assign m_axis_direct_tx_ptp_ts = s_axis_direct_tx_ptp_ts; +assign m_axis_direct_tx_ptp_ts_tag = s_axis_direct_tx_ptp_ts_tag; +assign m_axis_direct_tx_ptp_ts_valid = s_axis_direct_tx_ptp_ts_valid; +assign s_axis_direct_tx_ptp_ts_ready = m_axis_direct_tx_ptp_ts_ready; + +assign m_axis_direct_rx_tdata = s_axis_direct_rx_tdata; +assign m_axis_direct_rx_tkeep = s_axis_direct_rx_tkeep; +assign m_axis_direct_rx_tvalid = s_axis_direct_rx_tvalid; +assign s_axis_direct_rx_tready = m_axis_direct_rx_tready; +assign m_axis_direct_rx_tlast = s_axis_direct_rx_tlast; +assign m_axis_direct_rx_tuser = s_axis_direct_rx_tuser; + +/* + * Ethernet (synchronous MAC interface - low latency raw traffic) + */ +assign m_axis_sync_tx_tdata = s_axis_sync_tx_tdata; +assign m_axis_sync_tx_tkeep = s_axis_sync_tx_tkeep; +assign m_axis_sync_tx_tvalid = s_axis_sync_tx_tvalid; +assign s_axis_sync_tx_tready = m_axis_sync_tx_tready; +assign m_axis_sync_tx_tlast = s_axis_sync_tx_tlast; +assign m_axis_sync_tx_tuser = s_axis_sync_tx_tuser; + +assign m_axis_sync_tx_ptp_ts = s_axis_sync_tx_ptp_ts; +assign m_axis_sync_tx_ptp_ts_tag = s_axis_sync_tx_ptp_ts_tag; +assign m_axis_sync_tx_ptp_ts_valid = s_axis_sync_tx_ptp_ts_valid; +assign s_axis_sync_tx_ptp_ts_ready = m_axis_sync_tx_ptp_ts_ready; + +assign m_axis_sync_rx_tdata = s_axis_sync_rx_tdata; +assign m_axis_sync_rx_tkeep = s_axis_sync_rx_tkeep; +assign m_axis_sync_rx_tvalid = s_axis_sync_rx_tvalid; +assign s_axis_sync_rx_tready = m_axis_sync_rx_tready; +assign m_axis_sync_rx_tlast = s_axis_sync_rx_tlast; +assign m_axis_sync_rx_tuser = s_axis_sync_rx_tuser; + +/* + * Ethernet (internal at interface module) + */ +assign m_axis_if_tx_tdata = s_axis_if_tx_tdata; +assign m_axis_if_tx_tkeep = s_axis_if_tx_tkeep; +assign m_axis_if_tx_tvalid = s_axis_if_tx_tvalid; +assign s_axis_if_tx_tready = m_axis_if_tx_tready; +assign m_axis_if_tx_tlast = s_axis_if_tx_tlast; +assign m_axis_if_tx_tid = s_axis_if_tx_tid; +assign m_axis_if_tx_tdest = s_axis_if_tx_tdest; +assign m_axis_if_tx_tuser = s_axis_if_tx_tuser; + +assign m_axis_if_tx_ptp_ts = s_axis_if_tx_ptp_ts; +assign m_axis_if_tx_ptp_ts_tag = s_axis_if_tx_ptp_ts_tag; +assign m_axis_if_tx_ptp_ts_valid = s_axis_if_tx_ptp_ts_valid; +assign s_axis_if_tx_ptp_ts_ready = m_axis_if_tx_ptp_ts_ready; + +assign m_axis_if_rx_tdata = s_axis_if_rx_tdata; +assign m_axis_if_rx_tkeep = s_axis_if_rx_tkeep; +assign m_axis_if_rx_tvalid = s_axis_if_rx_tvalid; +assign s_axis_if_rx_tready = m_axis_if_rx_tready; +assign m_axis_if_rx_tlast = s_axis_if_rx_tlast; +assign m_axis_if_rx_tid = s_axis_if_rx_tid; +assign m_axis_if_rx_tdest = s_axis_if_rx_tdest; +assign m_axis_if_rx_tuser = s_axis_if_rx_tuser; + +/* + * DMA interface (control) + */ +assign m_axis_ctrl_dma_read_desc_dma_addr = 0; +assign m_axis_ctrl_dma_read_desc_ram_sel = 0; +assign m_axis_ctrl_dma_read_desc_ram_addr = 0; +assign m_axis_ctrl_dma_read_desc_len = 0; +assign m_axis_ctrl_dma_read_desc_tag = 0; +assign m_axis_ctrl_dma_read_desc_valid = 1'b0; +assign m_axis_ctrl_dma_write_desc_dma_addr = 0; +assign m_axis_ctrl_dma_write_desc_ram_sel = 0; +assign m_axis_ctrl_dma_write_desc_ram_addr = 0; +assign m_axis_ctrl_dma_write_desc_imm = 0; +assign m_axis_ctrl_dma_write_desc_imm_en = 0; +assign m_axis_ctrl_dma_write_desc_len = 0; +assign m_axis_ctrl_dma_write_desc_tag = 0; +assign m_axis_ctrl_dma_write_desc_valid = 1'b0; + +assign ctrl_dma_ram_wr_cmd_ready = 1'b1; +assign ctrl_dma_ram_wr_done = ctrl_dma_ram_wr_cmd_valid; +assign ctrl_dma_ram_rd_cmd_ready = ctrl_dma_ram_rd_resp_ready; +assign ctrl_dma_ram_rd_resp_data = 0; +assign ctrl_dma_ram_rd_resp_valid = ctrl_dma_ram_rd_cmd_valid; + +/* + * Statistics increment output + */ +assign m_axis_stat_tdata = 0; +assign m_axis_stat_tid = 0; +assign m_axis_stat_tvalid = 1'b0; + +/* + * GPIO + */ +assign gpio_out = 0; + +/* + * JTAG + */ +assign jtag_tdo = jtag_tdi; + + +// AXI lite connections +wire [AXIL_APP_CTRL_ADDR_WIDTH-1:0] axil_csr_awaddr; +wire [2:0] axil_csr_awprot; +wire axil_csr_awvalid; +wire axil_csr_awready; +wire [AXIL_APP_CTRL_DATA_WIDTH-1:0] axil_csr_wdata; +wire [AXIL_APP_CTRL_STRB_WIDTH-1:0] axil_csr_wstrb; +wire axil_csr_wvalid; +wire axil_csr_wready; +wire [1:0] axil_csr_bresp; +wire axil_csr_bvalid; +wire axil_csr_bready; +wire [AXIL_APP_CTRL_ADDR_WIDTH-1:0] axil_csr_araddr; +wire [2:0] axil_csr_arprot; +wire axil_csr_arvalid; +wire axil_csr_arready; +wire [AXIL_APP_CTRL_DATA_WIDTH-1:0] axil_csr_rdata; +wire [1:0] axil_csr_rresp; +wire axil_csr_rvalid; +wire axil_csr_rready; + +assign axil_csr_awaddr = s_axil_app_ctrl_awaddr; +assign axil_csr_awprot = s_axil_app_ctrl_awprot; +assign axil_csr_awvalid = s_axil_app_ctrl_awvalid; +assign s_axil_app_ctrl_awready = axil_csr_awready; +assign axil_csr_wdata = s_axil_app_ctrl_wdata; +assign axil_csr_wstrb = s_axil_app_ctrl_wstrb; +assign axil_csr_wvalid = s_axil_app_ctrl_wvalid; +assign s_axil_app_ctrl_wready = axil_csr_wready; +assign s_axil_app_ctrl_bresp = axil_csr_bresp; +assign s_axil_app_ctrl_bvalid = axil_csr_bvalid; +assign axil_csr_bready = s_axil_app_ctrl_bready; +assign axil_csr_araddr = s_axil_app_ctrl_araddr; +assign axil_csr_arprot = s_axil_app_ctrl_arprot; +assign axil_csr_arvalid = s_axil_app_ctrl_arvalid; +assign s_axil_app_ctrl_arready = axil_csr_arready; +assign s_axil_app_ctrl_rdata = axil_csr_rdata; +assign s_axil_app_ctrl_rresp = axil_csr_rresp; +assign s_axil_app_ctrl_rvalid = axil_csr_rvalid; +assign axil_csr_rready = s_axil_app_ctrl_rready; + +// control registers +reg axil_csr_awready_reg = 1'b0, axil_csr_awready_next; +reg axil_csr_wready_reg = 1'b0, axil_csr_wready_next; +reg [1:0] axil_csr_bresp_reg = 2'b00, axil_csr_bresp_next; +reg axil_csr_bvalid_reg = 1'b0, axil_csr_bvalid_next; +reg axil_csr_arready_reg = 1'b0, axil_csr_arready_next; +reg [AXIL_APP_CTRL_DATA_WIDTH-1:0] axil_csr_rdata_reg = 0, axil_csr_rdata_next; +reg [1:0] axil_csr_rresp_reg = 2'b00, axil_csr_rresp_next; +reg axil_csr_rvalid_reg = 1'b0, axil_csr_rvalid_next; + +reg [63:0] cycle_count_reg = 0; +reg [15:0] dma_read_active_count_reg = 0; +reg [15:0] dma_write_active_count_reg = 0; + +reg [DMA_ADDR_WIDTH-1:0] dma_read_desc_dma_addr_reg = 0, dma_read_desc_dma_addr_next; +reg [RAM_ADDR_WIDTH-1:0] dma_read_desc_ram_addr_reg = 0, dma_read_desc_ram_addr_next; +reg [DMA_LEN_WIDTH-1:0] dma_read_desc_len_reg = 0, dma_read_desc_len_next; +reg [DMA_TAG_WIDTH-1:0] dma_read_desc_tag_reg = 0, dma_read_desc_tag_next; +reg dma_read_desc_valid_reg = 0, dma_read_desc_valid_next; + +reg [DMA_TAG_WIDTH-1:0] dma_read_desc_status_tag_reg = 0, dma_read_desc_status_tag_next; +reg [3:0] dma_read_desc_status_error_reg = 0, dma_read_desc_status_error_next; +reg dma_read_desc_status_valid_reg = 0, dma_read_desc_status_valid_next; + +reg [DMA_ADDR_WIDTH-1:0] dma_write_desc_dma_addr_reg = 0, dma_write_desc_dma_addr_next; +reg [RAM_ADDR_IMM_WIDTH-1:0] dma_write_desc_ram_addr_imm_reg = 0, dma_write_desc_ram_addr_imm_next; +reg dma_write_desc_imm_en_reg = 0, dma_write_desc_imm_en_next; +reg [DMA_LEN_WIDTH-1:0] dma_write_desc_len_reg = 0, dma_write_desc_len_next; +reg [DMA_TAG_WIDTH-1:0] dma_write_desc_tag_reg = 0, dma_write_desc_tag_next; +reg dma_write_desc_valid_reg = 0, dma_write_desc_valid_next; + +reg [DMA_TAG_WIDTH-1:0] dma_write_desc_status_tag_reg = 0, dma_write_desc_status_tag_next; +reg [3:0] dma_write_desc_status_error_reg = 0, dma_write_desc_status_error_next; +reg dma_write_desc_status_valid_reg = 0, dma_write_desc_status_valid_next; + +reg dma_rd_int_en_reg = 0, dma_rd_int_en_next; +reg dma_wr_int_en_reg = 0, dma_wr_int_en_next; + +reg dma_read_block_run_reg = 1'b0, dma_read_block_run_next; +reg [DMA_LEN_WIDTH-1:0] dma_read_block_len_reg = 0, dma_read_block_len_next; +reg [31:0] dma_read_block_count_reg = 0, dma_read_block_count_next; +reg [63:0] dma_read_block_cycle_count_reg = 0, dma_read_block_cycle_count_next; +reg [DMA_ADDR_WIDTH-1:0] dma_read_block_dma_base_addr_reg = 0, dma_read_block_dma_base_addr_next; +reg [DMA_ADDR_WIDTH-1:0] dma_read_block_dma_offset_reg = 0, dma_read_block_dma_offset_next; +reg [DMA_ADDR_WIDTH-1:0] dma_read_block_dma_offset_mask_reg = 0, dma_read_block_dma_offset_mask_next; +reg [DMA_ADDR_WIDTH-1:0] dma_read_block_dma_stride_reg = 0, dma_read_block_dma_stride_next; +reg [RAM_ADDR_WIDTH-1:0] dma_read_block_ram_base_addr_reg = 0, dma_read_block_ram_base_addr_next; +reg [RAM_ADDR_WIDTH-1:0] dma_read_block_ram_offset_reg = 0, dma_read_block_ram_offset_next; +reg [RAM_ADDR_WIDTH-1:0] dma_read_block_ram_offset_mask_reg = 0, dma_read_block_ram_offset_mask_next; +reg [RAM_ADDR_WIDTH-1:0] dma_read_block_ram_stride_reg = 0, dma_read_block_ram_stride_next; + +reg dma_write_block_run_reg = 1'b0, dma_write_block_run_next; +reg [DMA_LEN_WIDTH-1:0] dma_write_block_len_reg = 0, dma_write_block_len_next; +reg [31:0] dma_write_block_count_reg = 0, dma_write_block_count_next; +reg [63:0] dma_write_block_cycle_count_reg = 0, dma_write_block_cycle_count_next; +reg [DMA_ADDR_WIDTH-1:0] dma_write_block_dma_base_addr_reg = 0, dma_write_block_dma_base_addr_next; +reg [DMA_ADDR_WIDTH-1:0] dma_write_block_dma_offset_reg = 0, dma_write_block_dma_offset_next; +reg [DMA_ADDR_WIDTH-1:0] dma_write_block_dma_offset_mask_reg = 0, dma_write_block_dma_offset_mask_next; +reg [DMA_ADDR_WIDTH-1:0] dma_write_block_dma_stride_reg = 0, dma_write_block_dma_stride_next; +reg [RAM_ADDR_WIDTH-1:0] dma_write_block_ram_base_addr_reg = 0, dma_write_block_ram_base_addr_next; +reg [RAM_ADDR_WIDTH-1:0] dma_write_block_ram_offset_reg = 0, dma_write_block_ram_offset_next; +reg [RAM_ADDR_WIDTH-1:0] dma_write_block_ram_offset_mask_reg = 0, dma_write_block_ram_offset_mask_next; +reg [RAM_ADDR_WIDTH-1:0] dma_write_block_ram_stride_reg = 0, dma_write_block_ram_stride_next; + +assign axil_csr_awready = axil_csr_awready_reg; +assign axil_csr_wready = axil_csr_wready_reg; +assign axil_csr_bresp = axil_csr_bresp_reg; +assign axil_csr_bvalid = axil_csr_bvalid_reg; +assign axil_csr_arready = axil_csr_arready_reg; +assign axil_csr_rdata = axil_csr_rdata_reg; +assign axil_csr_rresp = axil_csr_rresp_reg; +assign axil_csr_rvalid = axil_csr_rvalid_reg; + +assign m_axis_data_dma_read_desc_dma_addr = dma_read_desc_dma_addr_reg; +assign m_axis_data_dma_read_desc_ram_sel = 0; +assign m_axis_data_dma_read_desc_ram_addr = dma_read_desc_ram_addr_reg; +assign m_axis_data_dma_read_desc_len = dma_read_desc_len_reg; +assign m_axis_data_dma_read_desc_tag = dma_read_desc_tag_reg; +assign m_axis_data_dma_read_desc_valid = dma_read_desc_valid_reg; + +assign m_axis_data_dma_write_desc_dma_addr = dma_write_desc_dma_addr_reg; +assign m_axis_data_dma_write_desc_ram_sel = 0; +assign m_axis_data_dma_write_desc_ram_addr = dma_write_desc_ram_addr_imm_reg; +assign m_axis_data_dma_write_desc_imm = dma_write_desc_ram_addr_imm_reg; +assign m_axis_data_dma_write_desc_imm_en = dma_write_desc_imm_en_reg; +assign m_axis_data_dma_write_desc_len = dma_write_desc_len_reg; +assign m_axis_data_dma_write_desc_tag = dma_write_desc_tag_reg; +assign m_axis_data_dma_write_desc_valid = dma_write_desc_valid_reg; + +always @* begin + axil_csr_awready_next = 1'b0; + axil_csr_wready_next = 1'b0; + axil_csr_bresp_next = 2'b00; + axil_csr_bvalid_next = axil_csr_bvalid_reg && !axil_csr_bready; + axil_csr_arready_next = 1'b0; + axil_csr_rdata_next = axil_csr_rdata_reg; + axil_csr_rresp_next = 2'b00; + axil_csr_rvalid_next = axil_csr_rvalid_reg && !axil_csr_rready; + + dma_read_desc_dma_addr_next = dma_read_desc_dma_addr_reg; + dma_read_desc_ram_addr_next = dma_read_desc_ram_addr_reg; + dma_read_desc_len_next = dma_read_desc_len_reg; + dma_read_desc_tag_next = dma_read_desc_tag_reg; + dma_read_desc_valid_next = dma_read_desc_valid_reg && !m_axis_data_dma_read_desc_ready; + + dma_read_desc_status_tag_next = dma_read_desc_status_tag_reg; + dma_read_desc_status_error_next = dma_read_desc_status_error_reg; + dma_read_desc_status_valid_next = dma_read_desc_status_valid_reg; + + dma_write_desc_dma_addr_next = dma_write_desc_dma_addr_reg; + dma_write_desc_ram_addr_imm_next = dma_write_desc_ram_addr_imm_reg; + dma_write_desc_imm_en_next = dma_write_desc_imm_en_reg; + dma_write_desc_len_next = dma_write_desc_len_reg; + dma_write_desc_tag_next = dma_write_desc_tag_reg; + dma_write_desc_valid_next = dma_write_desc_valid_reg && !m_axis_data_dma_write_desc_ready; + + dma_write_desc_status_tag_next = dma_write_desc_status_tag_reg; + dma_write_desc_status_error_next = dma_write_desc_status_error_reg; + dma_write_desc_status_valid_next = dma_write_desc_status_valid_reg; + + dma_rd_int_en_next = dma_rd_int_en_reg; + dma_wr_int_en_next = dma_wr_int_en_reg; + + dma_read_block_run_next = dma_read_block_run_reg; + dma_read_block_len_next = dma_read_block_len_reg; + dma_read_block_count_next = dma_read_block_count_reg; + dma_read_block_cycle_count_next = dma_read_block_cycle_count_reg; + dma_read_block_dma_base_addr_next = dma_read_block_dma_base_addr_reg; + dma_read_block_dma_offset_next = dma_read_block_dma_offset_reg; + dma_read_block_dma_offset_mask_next = dma_read_block_dma_offset_mask_reg; + dma_read_block_dma_stride_next = dma_read_block_dma_stride_reg; + dma_read_block_ram_base_addr_next = dma_read_block_ram_base_addr_reg; + dma_read_block_ram_offset_next = dma_read_block_ram_offset_reg; + dma_read_block_ram_offset_mask_next = dma_read_block_ram_offset_mask_reg; + dma_read_block_ram_stride_next = dma_read_block_ram_stride_reg; + + dma_write_block_run_next = dma_write_block_run_reg; + dma_write_block_len_next = dma_write_block_len_reg; + dma_write_block_count_next = dma_write_block_count_reg; + dma_write_block_cycle_count_next = dma_write_block_cycle_count_reg; + dma_write_block_dma_base_addr_next = dma_write_block_dma_base_addr_reg; + dma_write_block_dma_offset_next = dma_write_block_dma_offset_reg; + dma_write_block_dma_offset_mask_next = dma_write_block_dma_offset_mask_reg; + dma_write_block_dma_stride_next = dma_write_block_dma_stride_reg; + dma_write_block_ram_base_addr_next = dma_write_block_ram_base_addr_reg; + dma_write_block_ram_offset_next = dma_write_block_ram_offset_reg; + dma_write_block_ram_offset_mask_next = dma_write_block_ram_offset_mask_reg; + dma_write_block_ram_stride_next = dma_write_block_ram_stride_reg; + + if (axil_csr_awvalid && axil_csr_wvalid && !axil_csr_bvalid_reg) begin + // write operation + axil_csr_awready_next = 1'b1; + axil_csr_wready_next = 1'b1; + axil_csr_bresp_next = 2'b00; + axil_csr_bvalid_next = 1'b1; + + case ({axil_csr_awaddr[15:2], 2'b00}) + // control + 16'h0000: begin + end + 16'h0008: begin + dma_rd_int_en_next = axil_csr_wdata[0]; + dma_wr_int_en_next = axil_csr_wdata[1]; + end + // single read + 16'h0100: dma_read_desc_dma_addr_next[31:0] = axil_csr_wdata; + 16'h0104: dma_read_desc_dma_addr_next[63:32] = axil_csr_wdata; + 16'h0108: dma_read_desc_ram_addr_next = axil_csr_wdata; + 16'h0110: dma_read_desc_len_next = axil_csr_wdata; + 16'h0114: begin + dma_read_desc_tag_next = axil_csr_wdata; + dma_read_desc_valid_next = 1'b1; + end + // single write + 16'h0200: dma_write_desc_dma_addr_next[31:0] = axil_csr_wdata; + 16'h0204: dma_write_desc_dma_addr_next[63:32] = axil_csr_wdata; + 16'h0208: dma_write_desc_ram_addr_imm_next = axil_csr_wdata; + 16'h0210: dma_write_desc_len_next = axil_csr_wdata; + 16'h0214: begin + dma_write_desc_tag_next = axil_csr_wdata[23:0]; + dma_write_desc_imm_en_next = axil_csr_wdata[31]; + dma_write_desc_valid_next = 1'b1; + end + // block read + 16'h1000: begin + dma_read_block_run_next = axil_csr_wdata[0]; + end + 16'h1008: dma_read_block_cycle_count_next[31:0] = axil_csr_wdata; + 16'h100c: dma_read_block_cycle_count_next[63:32] = axil_csr_wdata; + 16'h1010: dma_read_block_len_next = axil_csr_wdata; + 16'h1018: dma_read_block_count_next[31:0] = axil_csr_wdata; + 16'h1080: dma_read_block_dma_base_addr_next[31:0] = axil_csr_wdata; + 16'h1084: dma_read_block_dma_base_addr_next[63:32] = axil_csr_wdata; + 16'h1088: dma_read_block_dma_offset_next[31:0] = axil_csr_wdata; + 16'h108c: dma_read_block_dma_offset_next[63:32] = axil_csr_wdata; + 16'h1090: dma_read_block_dma_offset_mask_next[31:0] = axil_csr_wdata; + 16'h1094: dma_read_block_dma_offset_mask_next[63:32] = axil_csr_wdata; + 16'h1098: dma_read_block_dma_stride_next[31:0] = axil_csr_wdata; + 16'h109c: dma_read_block_dma_stride_next[63:32] = axil_csr_wdata; + 16'h10c0: dma_read_block_ram_base_addr_next = axil_csr_wdata; + 16'h10c8: dma_read_block_ram_offset_next = axil_csr_wdata; + 16'h10d0: dma_read_block_ram_offset_mask_next = axil_csr_wdata; + 16'h10d8: dma_read_block_ram_stride_next = axil_csr_wdata; + // block write + 16'h1100: begin + dma_write_block_run_next = axil_csr_wdata[0]; + end + 16'h1108: dma_write_block_cycle_count_next[31:0] = axil_csr_wdata; + 16'h110c: dma_write_block_cycle_count_next[63:32] = axil_csr_wdata; + 16'h1110: dma_write_block_len_next = axil_csr_wdata; + 16'h1118: dma_write_block_count_next[31:0] = axil_csr_wdata; + 16'h1180: dma_write_block_dma_base_addr_next[31:0] = axil_csr_wdata; + 16'h1184: dma_write_block_dma_base_addr_next[63:32] = axil_csr_wdata; + 16'h1188: dma_write_block_dma_offset_next[31:0] = axil_csr_wdata; + 16'h118c: dma_write_block_dma_offset_next[63:32] = axil_csr_wdata; + 16'h1190: dma_write_block_dma_offset_mask_next[31:0] = axil_csr_wdata; + 16'h1194: dma_write_block_dma_offset_mask_next[63:32] = axil_csr_wdata; + 16'h1198: dma_write_block_dma_stride_next[31:0] = axil_csr_wdata; + 16'h119c: dma_write_block_dma_stride_next[63:32] = axil_csr_wdata; + 16'h11c0: dma_write_block_ram_base_addr_next = axil_csr_wdata; + 16'h11c8: dma_write_block_ram_offset_next = axil_csr_wdata; + 16'h11d0: dma_write_block_ram_offset_mask_next = axil_csr_wdata; + 16'h11d8: dma_write_block_ram_stride_next = axil_csr_wdata; + endcase + end + + if (axil_csr_arvalid && !axil_csr_rvalid_reg) begin + // read operation + axil_csr_arready_next = 1'b1; + axil_csr_rresp_next = 2'b00; + axil_csr_rvalid_next = 1'b1; + axil_csr_rdata_next = 32'd0; + + case ({axil_csr_araddr[15:2], 2'b00}) + // control + 16'h0000: begin + end + 16'h0008: begin + axil_csr_rdata_next[0] = dma_rd_int_en_reg; + axil_csr_rdata_next[1] = dma_wr_int_en_reg; + end + 16'h0010: axil_csr_rdata_next = cycle_count_reg; + 16'h0014: axil_csr_rdata_next = cycle_count_reg >> 32; + 16'h0020: axil_csr_rdata_next = dma_read_active_count_reg; + 16'h0028: axil_csr_rdata_next = dma_write_active_count_reg; + // single read + 16'h0100: axil_csr_rdata_next = dma_read_desc_dma_addr_reg; + 16'h0104: axil_csr_rdata_next = dma_read_desc_dma_addr_reg >> 32; + 16'h0108: axil_csr_rdata_next = dma_read_desc_ram_addr_reg; + 16'h010c: axil_csr_rdata_next = dma_read_desc_ram_addr_reg >> 32; + 16'h0110: axil_csr_rdata_next = dma_read_desc_len_reg; + 16'h0114: axil_csr_rdata_next = dma_read_desc_tag_reg; + 16'h0118: begin + axil_csr_rdata_next[15:0] = dma_read_desc_status_tag_reg; + axil_csr_rdata_next[27:24] = dma_read_desc_status_error_reg; + axil_csr_rdata_next[31] = dma_read_desc_status_valid_reg; + dma_read_desc_status_valid_next = 1'b0; + end + // single write + 16'h0200: axil_csr_rdata_next = dma_write_desc_dma_addr_reg; + 16'h0204: axil_csr_rdata_next = dma_write_desc_dma_addr_reg >> 32; + 16'h0208: axil_csr_rdata_next = dma_write_desc_ram_addr_imm_reg; + 16'h020c: axil_csr_rdata_next = dma_write_desc_ram_addr_imm_reg >> 32; + 16'h0210: axil_csr_rdata_next = dma_write_desc_len_reg; + 16'h0214: begin + axil_csr_rdata_next[23:0] = dma_write_desc_tag_reg; + axil_csr_rdata_next[31] = dma_write_desc_imm_en_reg; + end + 16'h0218: begin + axil_csr_rdata_next[15:0] = dma_write_desc_status_tag_reg; + axil_csr_rdata_next[27:24] = dma_write_desc_status_error_reg; + axil_csr_rdata_next[31] = dma_write_desc_status_valid_reg; + dma_write_desc_status_valid_next = 1'b0; + end + // block read + 16'h1000: begin + axil_csr_rdata_next[0] = dma_read_block_run_reg; + end + 16'h1008: axil_csr_rdata_next = dma_read_block_cycle_count_reg; + 16'h100c: axil_csr_rdata_next = dma_read_block_cycle_count_reg >> 32; + 16'h1010: axil_csr_rdata_next = dma_read_block_len_reg; + 16'h1018: axil_csr_rdata_next = dma_read_block_count_reg; + 16'h101c: axil_csr_rdata_next = dma_read_block_count_reg >> 32; + 16'h1080: axil_csr_rdata_next = dma_read_block_dma_base_addr_reg; + 16'h1084: axil_csr_rdata_next = dma_read_block_dma_base_addr_reg >> 32; + 16'h1088: axil_csr_rdata_next = dma_read_block_dma_offset_reg; + 16'h108c: axil_csr_rdata_next = dma_read_block_dma_offset_reg >> 32; + 16'h1090: axil_csr_rdata_next = dma_read_block_dma_offset_mask_reg; + 16'h1094: axil_csr_rdata_next = dma_read_block_dma_offset_mask_reg >> 32; + 16'h1098: axil_csr_rdata_next = dma_read_block_dma_stride_reg; + 16'h109c: axil_csr_rdata_next = dma_read_block_dma_stride_reg >> 32; + 16'h10c0: axil_csr_rdata_next = dma_read_block_ram_base_addr_reg; + 16'h10c4: axil_csr_rdata_next = dma_read_block_ram_base_addr_reg >> 32; + 16'h10c8: axil_csr_rdata_next = dma_read_block_ram_offset_reg; + 16'h10cc: axil_csr_rdata_next = dma_read_block_ram_offset_reg >> 32; + 16'h10d0: axil_csr_rdata_next = dma_read_block_ram_offset_mask_reg; + 16'h10d4: axil_csr_rdata_next = dma_read_block_ram_offset_mask_reg >> 32; + 16'h10d8: axil_csr_rdata_next = dma_read_block_ram_stride_reg; + 16'h10dc: axil_csr_rdata_next = dma_read_block_ram_stride_reg >> 32; + // block write + 16'h1100: begin + axil_csr_rdata_next[0] = dma_write_block_run_reg; + end + 16'h1108: axil_csr_rdata_next = dma_write_block_cycle_count_reg; + 16'h110c: axil_csr_rdata_next = dma_write_block_cycle_count_reg >> 32; + 16'h1110: axil_csr_rdata_next = dma_write_block_len_reg; + 16'h1118: axil_csr_rdata_next = dma_write_block_count_reg; + 16'h111c: axil_csr_rdata_next = dma_write_block_count_reg >> 32; + 16'h1180: axil_csr_rdata_next = dma_write_block_dma_base_addr_reg; + 16'h1184: axil_csr_rdata_next = dma_write_block_dma_base_addr_reg >> 32; + 16'h1188: axil_csr_rdata_next = dma_write_block_dma_offset_reg; + 16'h118c: axil_csr_rdata_next = dma_write_block_dma_offset_reg >> 32; + 16'h1190: axil_csr_rdata_next = dma_write_block_dma_offset_mask_reg; + 16'h1194: axil_csr_rdata_next = dma_write_block_dma_offset_mask_reg >> 32; + 16'h1198: axil_csr_rdata_next = dma_write_block_dma_stride_reg; + 16'h119c: axil_csr_rdata_next = dma_write_block_dma_stride_reg >> 32; + 16'h11c0: axil_csr_rdata_next = dma_write_block_ram_base_addr_reg; + 16'h11c4: axil_csr_rdata_next = dma_write_block_ram_base_addr_reg >> 32; + 16'h11c8: axil_csr_rdata_next = dma_write_block_ram_offset_reg; + 16'h11cc: axil_csr_rdata_next = dma_write_block_ram_offset_reg >> 32; + 16'h11d0: axil_csr_rdata_next = dma_write_block_ram_offset_mask_reg; + 16'h11d4: axil_csr_rdata_next = dma_write_block_ram_offset_mask_reg >> 32; + 16'h11d8: axil_csr_rdata_next = dma_write_block_ram_stride_reg; + 16'h11dc: axil_csr_rdata_next = dma_write_block_ram_stride_reg >> 32; + endcase + end + + // store read response + if (s_axis_data_dma_read_desc_status_valid) begin + dma_read_desc_status_tag_next = s_axis_data_dma_read_desc_status_tag; + dma_read_desc_status_error_next = s_axis_data_dma_read_desc_status_error; + dma_read_desc_status_valid_next = s_axis_data_dma_read_desc_status_valid; + end + + // store write response + if (s_axis_data_dma_write_desc_status_valid) begin + dma_write_desc_status_tag_next = s_axis_data_dma_write_desc_status_tag; + dma_write_desc_status_error_next = s_axis_data_dma_write_desc_status_error; + dma_write_desc_status_valid_next = s_axis_data_dma_write_desc_status_valid; + end + + // block read + if (dma_read_block_run_reg) begin + dma_read_block_cycle_count_next = dma_read_block_cycle_count_reg + 1; + + if (dma_read_block_count_reg == 0) begin + if (dma_read_active_count_reg == 0) begin + dma_read_block_run_next = 1'b0; + end + end else begin + if (!dma_read_desc_valid_reg || m_axis_data_dma_read_desc_ready) begin + dma_read_block_dma_offset_next = dma_read_block_dma_offset_reg + dma_read_block_dma_stride_reg; + dma_read_desc_dma_addr_next = dma_read_block_dma_base_addr_reg + (dma_read_block_dma_offset_reg & dma_read_block_dma_offset_mask_reg); + dma_read_block_ram_offset_next = dma_read_block_ram_offset_reg + dma_read_block_ram_stride_reg; + dma_read_desc_ram_addr_next = dma_read_block_ram_base_addr_reg + (dma_read_block_ram_offset_reg & dma_read_block_ram_offset_mask_reg); + dma_read_desc_len_next = dma_read_block_len_reg; + dma_read_block_count_next = dma_read_block_count_reg - 1; + dma_read_desc_tag_next = dma_read_block_count_reg; + dma_read_desc_valid_next = 1'b1; + end + end + end + + // block write + if (dma_write_block_run_reg) begin + dma_write_block_cycle_count_next = dma_write_block_cycle_count_reg + 1; + + if (dma_write_block_count_reg == 0) begin + if (dma_write_active_count_reg == 0) begin + dma_write_block_run_next = 1'b0; + end + end else begin + if (!dma_write_desc_valid_reg || m_axis_data_dma_write_desc_ready) begin + dma_write_block_dma_offset_next = dma_write_block_dma_offset_reg + dma_write_block_dma_stride_reg; + dma_write_desc_dma_addr_next = dma_write_block_dma_base_addr_reg + (dma_write_block_dma_offset_reg & dma_write_block_dma_offset_mask_reg); + dma_write_block_ram_offset_next = dma_write_block_ram_offset_reg + dma_write_block_ram_stride_reg; + dma_write_desc_ram_addr_imm_next = dma_write_block_ram_base_addr_reg + (dma_write_block_ram_offset_reg & dma_write_block_ram_offset_mask_reg); + dma_write_desc_imm_en_next = 1'b0; + dma_write_desc_len_next = dma_write_block_len_reg; + dma_write_block_count_next = dma_write_block_count_reg - 1; + dma_write_desc_tag_next = dma_write_block_count_reg; + dma_write_desc_valid_next = 1'b1; + end + end + end +end + +always @(posedge clk) begin + axil_csr_awready_reg <= axil_csr_awready_next; + axil_csr_wready_reg <= axil_csr_wready_next; + axil_csr_bresp_reg <= axil_csr_bresp_next; + axil_csr_bvalid_reg <= axil_csr_bvalid_next; + axil_csr_arready_reg <= axil_csr_arready_next; + axil_csr_rdata_reg <= axil_csr_rdata_next; + axil_csr_rresp_reg <= axil_csr_rresp_next; + axil_csr_rvalid_reg <= axil_csr_rvalid_next; + + cycle_count_reg <= cycle_count_reg + 1; + + dma_read_active_count_reg <= dma_read_active_count_reg + + (m_axis_data_dma_read_desc_valid && m_axis_data_dma_read_desc_ready) + - s_axis_data_dma_read_desc_status_valid; + dma_write_active_count_reg <= dma_write_active_count_reg + + (m_axis_data_dma_write_desc_valid && m_axis_data_dma_write_desc_ready) + - s_axis_data_dma_write_desc_status_valid; + + dma_read_desc_dma_addr_reg <= dma_read_desc_dma_addr_next; + dma_read_desc_ram_addr_reg <= dma_read_desc_ram_addr_next; + dma_read_desc_len_reg <= dma_read_desc_len_next; + dma_read_desc_tag_reg <= dma_read_desc_tag_next; + dma_read_desc_valid_reg <= dma_read_desc_valid_next; + + dma_read_desc_status_tag_reg <= dma_read_desc_status_tag_next; + dma_read_desc_status_error_reg <= dma_read_desc_status_error_next; + dma_read_desc_status_valid_reg <= dma_read_desc_status_valid_next; + + dma_write_desc_dma_addr_reg <= dma_write_desc_dma_addr_next; + dma_write_desc_ram_addr_imm_reg <= dma_write_desc_ram_addr_imm_next; + dma_write_desc_imm_en_reg <= dma_write_desc_imm_en_next; + dma_write_desc_len_reg <= dma_write_desc_len_next; + dma_write_desc_tag_reg <= dma_write_desc_tag_next; + dma_write_desc_valid_reg <= dma_write_desc_valid_next; + + dma_write_desc_status_tag_reg <= dma_write_desc_status_tag_next; + dma_write_desc_status_error_reg <= dma_write_desc_status_error_next; + dma_write_desc_status_valid_reg <= dma_write_desc_status_valid_next; + + dma_rd_int_en_reg <= dma_rd_int_en_next; + dma_wr_int_en_reg <= dma_wr_int_en_next; + + dma_read_block_run_reg <= dma_read_block_run_next; + dma_read_block_len_reg <= dma_read_block_len_next; + dma_read_block_count_reg <= dma_read_block_count_next; + dma_read_block_cycle_count_reg <= dma_read_block_cycle_count_next; + dma_read_block_dma_base_addr_reg <= dma_read_block_dma_base_addr_next; + dma_read_block_dma_offset_reg <= dma_read_block_dma_offset_next; + dma_read_block_dma_offset_mask_reg <= dma_read_block_dma_offset_mask_next; + dma_read_block_dma_stride_reg <= dma_read_block_dma_stride_next; + dma_read_block_ram_base_addr_reg <= dma_read_block_ram_base_addr_next; + dma_read_block_ram_offset_reg <= dma_read_block_ram_offset_next; + dma_read_block_ram_offset_mask_reg <= dma_read_block_ram_offset_mask_next; + dma_read_block_ram_stride_reg <= dma_read_block_ram_stride_next; + + dma_write_block_run_reg <= dma_write_block_run_next; + dma_write_block_len_reg <= dma_write_block_len_next; + dma_write_block_count_reg <= dma_write_block_count_next; + dma_write_block_cycle_count_reg <= dma_write_block_cycle_count_next; + dma_write_block_dma_base_addr_reg <= dma_write_block_dma_base_addr_next; + dma_write_block_dma_offset_reg <= dma_write_block_dma_offset_next; + dma_write_block_dma_offset_mask_reg <= dma_write_block_dma_offset_mask_next; + dma_write_block_dma_stride_reg <= dma_write_block_dma_stride_next; + dma_write_block_ram_base_addr_reg <= dma_write_block_ram_base_addr_next; + dma_write_block_ram_offset_reg <= dma_write_block_ram_offset_next; + dma_write_block_ram_offset_mask_reg <= dma_write_block_ram_offset_mask_next; + dma_write_block_ram_stride_reg <= dma_write_block_ram_stride_next; + + if (rst) begin + axil_csr_awready_reg <= 1'b0; + axil_csr_wready_reg <= 1'b0; + axil_csr_bvalid_reg <= 1'b0; + axil_csr_arready_reg <= 1'b0; + axil_csr_rvalid_reg <= 1'b0; + + cycle_count_reg <= 0; + dma_read_active_count_reg <= 0; + dma_write_active_count_reg <= 0; + + dma_read_desc_valid_reg <= 1'b0; + dma_read_desc_status_valid_reg <= 1'b0; + dma_write_desc_valid_reg <= 1'b0; + dma_write_desc_status_valid_reg <= 1'b0; + dma_rd_int_en_reg <= 1'b0; + dma_wr_int_en_reg <= 1'b0; + dma_read_block_run_reg <= 1'b0; + dma_write_block_run_reg <= 1'b0; + end +end + +dma_psdpram #( + .SIZE(16384), + .SEG_COUNT(RAM_SEG_COUNT), + .SEG_DATA_WIDTH(RAM_SEG_DATA_WIDTH), + .SEG_ADDR_WIDTH(RAM_SEG_ADDR_WIDTH), + .SEG_BE_WIDTH(RAM_SEG_BE_WIDTH), + .PIPELINE(2) +) +dma_ram_inst ( + .clk(clk), + .rst(rst), + + /* + * Write port + */ + .wr_cmd_be(data_dma_ram_wr_cmd_be), + .wr_cmd_addr(data_dma_ram_wr_cmd_addr), + .wr_cmd_data(data_dma_ram_wr_cmd_data), + .wr_cmd_valid(data_dma_ram_wr_cmd_valid), + .wr_cmd_ready(data_dma_ram_wr_cmd_ready), + .wr_done(data_dma_ram_wr_done), + + /* + * Read port + */ + .rd_cmd_addr(data_dma_ram_rd_cmd_addr), + .rd_cmd_valid(data_dma_ram_rd_cmd_valid), + .rd_cmd_ready(data_dma_ram_rd_cmd_ready), + .rd_resp_data(data_dma_ram_rd_resp_data), + .rd_resp_valid(data_dma_ram_rd_resp_valid), + .rd_resp_ready(data_dma_ram_rd_resp_ready) +); + +endmodule + +`resetall diff --git a/fpga/app/dma_bench/tb/mqnic_core_pcie_us/Makefile b/fpga/app/dma_bench/tb/mqnic_core_pcie_us/Makefile new file mode 100644 index 000000000..b511f36c7 --- /dev/null +++ b/fpga/app/dma_bench/tb/mqnic_core_pcie_us/Makefile @@ -0,0 +1,439 @@ +# Copyright 2021, The Regents of the University of California. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are met: +# +# 1. Redistributions of source code must retain the above copyright notice, +# this list of conditions and the following disclaimer. +# +# 2. Redistributions in binary form must reproduce the above copyright notice, +# this list of conditions and the following disclaimer in the documentation +# and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY THE REGENTS OF THE UNIVERSITY OF CALIFORNIA ''AS +# IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +# DISCLAIMED. IN NO EVENT SHALL THE REGENTS OF THE UNIVERSITY OF CALIFORNIA OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +# OF SUCH DAMAGE. +# +# The views and conclusions contained in the software and documentation are those +# of the authors and should not be interpreted as representing official policies, +# either expressed or implied, of The Regents of the University of California. + +TOPLEVEL_LANG = verilog + +SIM ?= icarus +WAVES ?= 0 + +COCOTB_HDL_TIMEUNIT = 1ns +COCOTB_HDL_TIMEPRECISION = 1ps + +DUT = mqnic_core_pcie_us +TOPLEVEL = $(DUT) +MODULE = test_$(DUT) +VERILOG_SOURCES += ../../rtl/common/$(DUT).v +VERILOG_SOURCES += ../../rtl/common/mqnic_core_pcie.v +VERILOG_SOURCES += ../../rtl/common/mqnic_core.v +VERILOG_SOURCES += ../../rtl/common/mqnic_interface.v +VERILOG_SOURCES += ../../rtl/common/mqnic_interface_tx.v +VERILOG_SOURCES += ../../rtl/common/mqnic_interface_rx.v +VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v +VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v +VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v +VERILOG_SOURCES += ../../rtl/common/cpl_write.v +VERILOG_SOURCES += ../../rtl/common/cpl_op_mux.v +VERILOG_SOURCES += ../../rtl/common/desc_fetch.v +VERILOG_SOURCES += ../../rtl/common/desc_op_mux.v +VERILOG_SOURCES += ../../rtl/common/event_mux.v +VERILOG_SOURCES += ../../rtl/common/queue_manager.v +VERILOG_SOURCES += ../../rtl/common/cpl_queue_manager.v +VERILOG_SOURCES += ../../rtl/common/tx_fifo.v +VERILOG_SOURCES += ../../rtl/common/rx_fifo.v +VERILOG_SOURCES += ../../rtl/common/tx_req_mux.v +VERILOG_SOURCES += ../../rtl/common/tx_engine.v +VERILOG_SOURCES += ../../rtl/common/rx_engine.v +VERILOG_SOURCES += ../../rtl/common/tx_checksum.v +VERILOG_SOURCES += ../../rtl/common/rx_hash.v +VERILOG_SOURCES += ../../rtl/common/rx_checksum.v +VERILOG_SOURCES += ../../rtl/common/stats_counter.v +VERILOG_SOURCES += ../../rtl/common/stats_collect.v +VERILOG_SOURCES += ../../rtl/common/stats_pcie_if.v +VERILOG_SOURCES += ../../rtl/common/stats_pcie_tlp.v +VERILOG_SOURCES += ../../rtl/common/stats_dma_if_pcie.v +VERILOG_SOURCES += ../../rtl/common/stats_dma_latency.v +VERILOG_SOURCES += ../../rtl/common/mqnic_tx_scheduler_block_rr.v +VERILOG_SOURCES += ../../rtl/common/tx_scheduler_rr.v +VERILOG_SOURCES += ../../rtl/mqnic_app_block_dma_bench.v +VERILOG_SOURCES += ../../lib/eth/rtl/ptp_clock.v +VERILOG_SOURCES += ../../lib/eth/rtl/ptp_clock_cdc.v +VERILOG_SOURCES += ../../lib/eth/rtl/ptp_perout.v +VERILOG_SOURCES += ../../lib/eth/rtl/ptp_ts_extract.v +VERILOG_SOURCES += ../../lib/axi/rtl/axil_crossbar.v +VERILOG_SOURCES += ../../lib/axi/rtl/axil_crossbar_addr.v +VERILOG_SOURCES += ../../lib/axi/rtl/axil_crossbar_rd.v +VERILOG_SOURCES += ../../lib/axi/rtl/axil_crossbar_wr.v +VERILOG_SOURCES += ../../lib/axi/rtl/axil_ram.v +VERILOG_SOURCES += ../../lib/axi/rtl/axil_reg_if.v +VERILOG_SOURCES += ../../lib/axi/rtl/axil_reg_if_rd.v +VERILOG_SOURCES += ../../lib/axi/rtl/axil_reg_if_wr.v +VERILOG_SOURCES += ../../lib/axi/rtl/axil_register_rd.v +VERILOG_SOURCES += ../../lib/axi/rtl/axil_register_wr.v +VERILOG_SOURCES += ../../lib/axi/rtl/arbiter.v +VERILOG_SOURCES += ../../lib/axi/rtl/priority_encoder.v +VERILOG_SOURCES += ../../lib/axis/rtl/axis_adapter.v +VERILOG_SOURCES += ../../lib/axis/rtl/axis_arb_mux.v +VERILOG_SOURCES += ../../lib/axis/rtl/axis_async_fifo.v +VERILOG_SOURCES += ../../lib/axis/rtl/axis_async_fifo_adapter.v +VERILOG_SOURCES += ../../lib/axis/rtl/axis_demux.v +VERILOG_SOURCES += ../../lib/axis/rtl/axis_fifo.v +VERILOG_SOURCES += ../../lib/axis/rtl/axis_fifo_adapter.v +VERILOG_SOURCES += ../../lib/axis/rtl/axis_pipeline_fifo.v +VERILOG_SOURCES += ../../lib/axis/rtl/axis_register.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axil_master.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux_bar.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie.v +VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_rd.v +VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_wr.v +VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_mux_rd.v +VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_mux_wr.v +VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_desc_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/dma_ram_demux_rd.v +VERILOG_SOURCES += ../../lib/pcie/rtl/dma_ram_demux_wr.v +VERILOG_SOURCES += ../../lib/pcie/rtl/dma_psdpram.v +VERILOG_SOURCES += ../../lib/pcie/rtl/dma_client_axis_sink.v +VERILOG_SOURCES += ../../lib/pcie/rtl/dma_client_axis_source.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_us_if.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_us_if_rc.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_us_if_rq.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_us_if_cc.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_us_if_cq.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_us_cfg.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_us_msi.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pulse_merge.v + +# module parameters + +# Structural configuration +export PARAM_IF_COUNT ?= 1 +export PARAM_PORTS_PER_IF ?= 1 +export PARAM_SCHED_PER_IF ?= $(PARAM_PORTS_PER_IF) + +# PTP configuration +export PARAM_PTP_CLOCK_PIPELINE ?= 0 +export PARAM_PTP_USE_SAMPLE_CLOCK ?= 0 +export PARAM_PTP_SEPARATE_RX_CLOCK ?= 0 +export PARAM_PTP_PORT_CDC_PIPELINE ?= 0 +export PARAM_PTP_PEROUT_ENABLE ?= 0 +export PARAM_PTP_PEROUT_COUNT ?= 1 + +# Queue manager configuration (interface) +export PARAM_EVENT_QUEUE_OP_TABLE_SIZE ?= 32 +export PARAM_TX_QUEUE_OP_TABLE_SIZE ?= 32 +export PARAM_RX_QUEUE_OP_TABLE_SIZE ?= 32 +export PARAM_TX_CPL_QUEUE_OP_TABLE_SIZE ?= $(PARAM_TX_QUEUE_OP_TABLE_SIZE) +export PARAM_RX_CPL_QUEUE_OP_TABLE_SIZE ?= $(PARAM_RX_QUEUE_OP_TABLE_SIZE) +export PARAM_TX_QUEUE_INDEX_WIDTH ?= 13 +export PARAM_RX_QUEUE_INDEX_WIDTH ?= 8 +export PARAM_TX_CPL_QUEUE_INDEX_WIDTH ?= $(PARAM_TX_QUEUE_INDEX_WIDTH) +export PARAM_RX_CPL_QUEUE_INDEX_WIDTH ?= $(PARAM_RX_QUEUE_INDEX_WIDTH) +export PARAM_EVENT_QUEUE_PIPELINE ?= 3 +export PARAM_TX_QUEUE_PIPELINE ?= $(shell python -c "print(3 + max($(PARAM_TX_QUEUE_INDEX_WIDTH)-12, 0))") +export PARAM_RX_QUEUE_PIPELINE ?= $(shell python -c "print(3 + max($(PARAM_RX_QUEUE_INDEX_WIDTH)-12, 0))") +export PARAM_TX_CPL_QUEUE_PIPELINE ?= $(PARAM_TX_QUEUE_PIPELINE) +export PARAM_RX_CPL_QUEUE_PIPELINE ?= $(PARAM_RX_QUEUE_PIPELINE) + +# TX and RX engine configuration (port) +export PARAM_TX_DESC_TABLE_SIZE ?= 32 +export PARAM_RX_DESC_TABLE_SIZE ?= 32 + +# Scheduler configuration (port) +export PARAM_TX_SCHEDULER_OP_TABLE_SIZE ?= $(PARAM_TX_DESC_TABLE_SIZE) +export PARAM_TX_SCHEDULER_PIPELINE ?= $(PARAM_TX_QUEUE_PIPELINE) +export PARAM_TDMA_INDEX_WIDTH ?= 6 + +# Timestamping configuration (port) +export PARAM_PTP_TS_ENABLE ?= 1 +export PARAM_TX_PTP_TS_FIFO_DEPTH ?= 32 +export PARAM_RX_PTP_TS_FIFO_DEPTH ?= 32 + +# Interface configuration (port) +export PARAM_TX_CHECKSUM_ENABLE ?= 1 +export PARAM_RX_RSS_ENABLE ?= 1 +export PARAM_RX_HASH_ENABLE ?= 1 +export PARAM_RX_CHECKSUM_ENABLE ?= 1 +export PARAM_TX_FIFO_DEPTH ?= 32768 +export PARAM_RX_FIFO_DEPTH ?= 131072 +export PARAM_MAX_TX_SIZE ?= 9214 +export PARAM_MAX_RX_SIZE ?= 9214 +export PARAM_TX_RAM_SIZE ?= 131072 +export PARAM_RX_RAM_SIZE ?= 131072 + +# Application block configuration +export PARAM_APP_ID ?= $(shell echo $$((16#12348001)) ) +export PARAM_APP_ENABLE ?= 1 +export PARAM_APP_CTRL_ENABLE ?= 0 +export PARAM_APP_DMA_ENABLE ?= 1 +export PARAM_APP_AXIS_DIRECT_ENABLE ?= 0 +export PARAM_APP_AXIS_SYNC_ENABLE ?= 0 +export PARAM_APP_AXIS_IF_ENABLE ?= 0 +export PARAM_APP_STAT_ENABLE ?= 0 + +# DMA interface configuration +export PARAM_DMA_IMM_ENABLE ?= 1 +export PARAM_DMA_IMM_WIDTH ?= 32 +export PARAM_DMA_LEN_WIDTH ?= 16 +export PARAM_DMA_TAG_WIDTH ?= 16 +export PARAM_RAM_ADDR_WIDTH ?= $(shell python -c "print((max($(PARAM_TX_RAM_SIZE), $(PARAM_RX_RAM_SIZE))-1).bit_length())") +export PARAM_RAM_PIPELINE ?= 2 + +# PCIe interface configuration +export PARAM_AXIS_PCIE_DATA_WIDTH ?= 512 +export PARAM_PF_COUNT ?= 1 +export PARAM_VF_COUNT ?= 0 +export PARAM_PCIE_TAG_COUNT ?= 64 +export PARAM_PCIE_DMA_READ_OP_TABLE_SIZE ?= $(PARAM_PCIE_TAG_COUNT) +export PARAM_PCIE_DMA_READ_TX_LIMIT ?= 16 +export PARAM_PCIE_DMA_READ_TX_FC_ENABLE ?= 1 +export PARAM_PCIE_DMA_WRITE_OP_TABLE_SIZE ?= 16 +export PARAM_PCIE_DMA_WRITE_TX_LIMIT ?= 3 +export PARAM_PCIE_DMA_WRITE_TX_FC_ENABLE ?= 1 +export PARAM_MSI_COUNT ?= 32 + +# AXI lite interface configuration (control) +export PARAM_AXIL_CTRL_DATA_WIDTH ?= 32 +export PARAM_AXIL_CTRL_ADDR_WIDTH ?= 24 +export PARAM_AXIL_CSR_PASSTHROUGH_ENABLE ?= 0 + +# AXI lite interface configuration (application control) +export PARAM_AXIL_APP_CTRL_DATA_WIDTH ?= $(PARAM_AXIL_CTRL_DATA_WIDTH) +export PARAM_AXIL_APP_CTRL_ADDR_WIDTH ?= 24 + +# Ethernet interface configuration +export PARAM_AXIS_ETH_DATA_WIDTH ?= 512 +export PARAM_AXIS_ETH_SYNC_DATA_WIDTH ?= $(PARAM_AXIS_ETH_DATA_WIDTH) +export PARAM_AXIS_ETH_RX_USE_READY ?= 0 +export PARAM_AXIS_ETH_TX_PIPELINE ?= 0 +export PARAM_AXIS_ETH_TX_FIFO_PIPELINE ?= 2 +export PARAM_AXIS_ETH_TX_TS_PIPELINE ?= 0 +export PARAM_AXIS_ETH_RX_PIPELINE ?= 0 +export PARAM_AXIS_ETH_RX_FIFO_PIPELINE ?= 2 + +# Statistics counter subsystem +export PARAM_STAT_ENABLE ?= 1 +export PARAM_STAT_DMA_ENABLE ?= 1 +export PARAM_STAT_PCIE_ENABLE ?= 1 +export PARAM_STAT_INC_WIDTH ?= 24 +export PARAM_STAT_ID_WIDTH ?= 12 + +ifeq ($(SIM), icarus) + PLUSARGS += -fst + + COMPILE_ARGS += -P $(TOPLEVEL).IF_COUNT=$(PARAM_IF_COUNT) + COMPILE_ARGS += -P $(TOPLEVEL).PORTS_PER_IF=$(PARAM_PORTS_PER_IF) + COMPILE_ARGS += -P $(TOPLEVEL).SCHED_PER_IF=$(PARAM_SCHED_PER_IF) + COMPILE_ARGS += -P $(TOPLEVEL).PTP_CLOCK_PIPELINE=$(PARAM_PTP_CLOCK_PIPELINE) + COMPILE_ARGS += -P $(TOPLEVEL).PTP_USE_SAMPLE_CLOCK=$(PARAM_PTP_USE_SAMPLE_CLOCK) + COMPILE_ARGS += -P $(TOPLEVEL).PTP_SEPARATE_RX_CLOCK=$(PARAM_PTP_SEPARATE_RX_CLOCK) + COMPILE_ARGS += -P $(TOPLEVEL).PTP_PORT_CDC_PIPELINE=$(PARAM_PTP_PORT_CDC_PIPELINE) + COMPILE_ARGS += -P $(TOPLEVEL).PTP_PEROUT_ENABLE=$(PARAM_PTP_PEROUT_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).PTP_PEROUT_COUNT=$(PARAM_PTP_PEROUT_COUNT) + COMPILE_ARGS += -P $(TOPLEVEL).EVENT_QUEUE_OP_TABLE_SIZE=$(PARAM_EVENT_QUEUE_OP_TABLE_SIZE) + COMPILE_ARGS += -P $(TOPLEVEL).TX_QUEUE_OP_TABLE_SIZE=$(PARAM_TX_QUEUE_OP_TABLE_SIZE) + COMPILE_ARGS += -P $(TOPLEVEL).RX_QUEUE_OP_TABLE_SIZE=$(PARAM_RX_QUEUE_OP_TABLE_SIZE) + COMPILE_ARGS += -P $(TOPLEVEL).TX_CPL_QUEUE_OP_TABLE_SIZE=$(PARAM_TX_CPL_QUEUE_OP_TABLE_SIZE) + COMPILE_ARGS += -P $(TOPLEVEL).RX_CPL_QUEUE_OP_TABLE_SIZE=$(PARAM_RX_CPL_QUEUE_OP_TABLE_SIZE) + COMPILE_ARGS += -P $(TOPLEVEL).TX_QUEUE_INDEX_WIDTH=$(PARAM_TX_QUEUE_INDEX_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).RX_QUEUE_INDEX_WIDTH=$(PARAM_RX_QUEUE_INDEX_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).TX_CPL_QUEUE_INDEX_WIDTH=$(PARAM_TX_CPL_QUEUE_INDEX_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).RX_CPL_QUEUE_INDEX_WIDTH=$(PARAM_RX_CPL_QUEUE_INDEX_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).EVENT_QUEUE_PIPELINE=$(PARAM_EVENT_QUEUE_PIPELINE) + COMPILE_ARGS += -P $(TOPLEVEL).TX_QUEUE_PIPELINE=$(PARAM_TX_QUEUE_PIPELINE) + COMPILE_ARGS += -P $(TOPLEVEL).RX_QUEUE_PIPELINE=$(PARAM_RX_QUEUE_PIPELINE) + COMPILE_ARGS += -P $(TOPLEVEL).TX_CPL_QUEUE_PIPELINE=$(PARAM_TX_CPL_QUEUE_PIPELINE) + COMPILE_ARGS += -P $(TOPLEVEL).RX_CPL_QUEUE_PIPELINE=$(PARAM_RX_CPL_QUEUE_PIPELINE) + COMPILE_ARGS += -P $(TOPLEVEL).TX_DESC_TABLE_SIZE=$(PARAM_TX_DESC_TABLE_SIZE) + COMPILE_ARGS += -P $(TOPLEVEL).RX_DESC_TABLE_SIZE=$(PARAM_RX_DESC_TABLE_SIZE) + COMPILE_ARGS += -P $(TOPLEVEL).TX_SCHEDULER_OP_TABLE_SIZE=$(PARAM_TX_SCHEDULER_OP_TABLE_SIZE) + COMPILE_ARGS += -P $(TOPLEVEL).TX_SCHEDULER_PIPELINE=$(PARAM_TX_SCHEDULER_PIPELINE) + COMPILE_ARGS += -P $(TOPLEVEL).TDMA_INDEX_WIDTH=$(PARAM_TDMA_INDEX_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).PTP_TS_ENABLE=$(PARAM_PTP_TS_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).TX_PTP_TS_FIFO_DEPTH=$(PARAM_TX_PTP_TS_FIFO_DEPTH) + COMPILE_ARGS += -P $(TOPLEVEL).RX_PTP_TS_FIFO_DEPTH=$(PARAM_RX_PTP_TS_FIFO_DEPTH) + COMPILE_ARGS += -P $(TOPLEVEL).TX_CHECKSUM_ENABLE=$(PARAM_TX_CHECKSUM_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).RX_RSS_ENABLE=$(PARAM_RX_RSS_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).RX_HASH_ENABLE=$(PARAM_RX_HASH_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).RX_CHECKSUM_ENABLE=$(PARAM_RX_CHECKSUM_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).TX_FIFO_DEPTH=$(PARAM_TX_FIFO_DEPTH) + COMPILE_ARGS += -P $(TOPLEVEL).RX_FIFO_DEPTH=$(PARAM_RX_FIFO_DEPTH) + COMPILE_ARGS += -P $(TOPLEVEL).MAX_TX_SIZE=$(PARAM_MAX_TX_SIZE) + COMPILE_ARGS += -P $(TOPLEVEL).MAX_RX_SIZE=$(PARAM_MAX_RX_SIZE) + COMPILE_ARGS += -P $(TOPLEVEL).TX_RAM_SIZE=$(PARAM_TX_RAM_SIZE) + COMPILE_ARGS += -P $(TOPLEVEL).RX_RAM_SIZE=$(PARAM_RX_RAM_SIZE) + COMPILE_ARGS += -P $(TOPLEVEL).APP_ID=$(PARAM_APP_ID) + COMPILE_ARGS += -P $(TOPLEVEL).APP_ENABLE=$(PARAM_APP_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).APP_CTRL_ENABLE=$(PARAM_APP_CTRL_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).APP_DMA_ENABLE=$(PARAM_APP_DMA_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).APP_AXIS_DIRECT_ENABLE=$(PARAM_APP_AXIS_DIRECT_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).APP_AXIS_SYNC_ENABLE=$(PARAM_APP_AXIS_SYNC_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).APP_AXIS_IF_ENABLE=$(PARAM_APP_AXIS_IF_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).APP_STAT_ENABLE=$(PARAM_APP_STAT_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).DMA_IMM_ENABLE=$(PARAM_DMA_IMM_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).DMA_IMM_WIDTH=$(PARAM_DMA_IMM_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).DMA_LEN_WIDTH=$(PARAM_DMA_LEN_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).DMA_TAG_WIDTH=$(PARAM_DMA_TAG_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).RAM_ADDR_WIDTH=$(PARAM_RAM_ADDR_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).RAM_PIPELINE=$(PARAM_RAM_PIPELINE) + COMPILE_ARGS += -P $(TOPLEVEL).AXIS_PCIE_DATA_WIDTH=$(PARAM_AXIS_PCIE_DATA_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).PF_COUNT=$(PARAM_PF_COUNT) + COMPILE_ARGS += -P $(TOPLEVEL).VF_COUNT=$(PARAM_VF_COUNT) + COMPILE_ARGS += -P $(TOPLEVEL).PCIE_TAG_COUNT=$(PARAM_PCIE_TAG_COUNT) + COMPILE_ARGS += -P $(TOPLEVEL).PCIE_DMA_READ_OP_TABLE_SIZE=$(PARAM_PCIE_DMA_READ_OP_TABLE_SIZE) + COMPILE_ARGS += -P $(TOPLEVEL).PCIE_DMA_READ_TX_LIMIT=$(PARAM_PCIE_DMA_READ_TX_LIMIT) + COMPILE_ARGS += -P $(TOPLEVEL).PCIE_DMA_READ_TX_FC_ENABLE=$(PARAM_PCIE_DMA_READ_TX_FC_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).PCIE_DMA_WRITE_OP_TABLE_SIZE=$(PARAM_PCIE_DMA_WRITE_OP_TABLE_SIZE) + COMPILE_ARGS += -P $(TOPLEVEL).PCIE_DMA_WRITE_TX_LIMIT=$(PARAM_PCIE_DMA_WRITE_TX_LIMIT) + COMPILE_ARGS += -P $(TOPLEVEL).PCIE_DMA_WRITE_TX_FC_ENABLE=$(PARAM_PCIE_DMA_WRITE_TX_FC_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).MSI_COUNT=$(PARAM_MSI_COUNT) + COMPILE_ARGS += -P $(TOPLEVEL).AXIL_CTRL_DATA_WIDTH=$(PARAM_AXIL_CTRL_DATA_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).AXIL_CTRL_ADDR_WIDTH=$(PARAM_AXIL_CTRL_ADDR_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).AXIL_CSR_PASSTHROUGH_ENABLE=$(PARAM_AXIL_CSR_PASSTHROUGH_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).AXIL_APP_CTRL_DATA_WIDTH=$(PARAM_AXIL_APP_CTRL_DATA_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).AXIL_APP_CTRL_ADDR_WIDTH=$(PARAM_AXIL_APP_CTRL_ADDR_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).AXIS_ETH_DATA_WIDTH=$(PARAM_AXIS_ETH_DATA_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).AXIS_ETH_SYNC_DATA_WIDTH=$(PARAM_AXIS_ETH_SYNC_DATA_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).AXIS_ETH_RX_USE_READY=$(PARAM_AXIS_ETH_RX_USE_READY) + COMPILE_ARGS += -P $(TOPLEVEL).AXIS_ETH_TX_PIPELINE=$(PARAM_AXIS_ETH_TX_PIPELINE) + COMPILE_ARGS += -P $(TOPLEVEL).AXIS_ETH_TX_FIFO_PIPELINE=$(PARAM_AXIS_ETH_TX_FIFO_PIPELINE) + COMPILE_ARGS += -P $(TOPLEVEL).AXIS_ETH_TX_TS_PIPELINE=$(PARAM_AXIS_ETH_TX_TS_PIPELINE) + COMPILE_ARGS += -P $(TOPLEVEL).AXIS_ETH_RX_PIPELINE=$(PARAM_AXIS_ETH_RX_PIPELINE) + COMPILE_ARGS += -P $(TOPLEVEL).AXIS_ETH_RX_FIFO_PIPELINE=$(PARAM_AXIS_ETH_RX_FIFO_PIPELINE) + COMPILE_ARGS += -P $(TOPLEVEL).STAT_ENABLE=$(PARAM_STAT_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).STAT_DMA_ENABLE=$(PARAM_STAT_DMA_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).STAT_PCIE_ENABLE=$(PARAM_STAT_PCIE_ENABLE) + COMPILE_ARGS += -P $(TOPLEVEL).STAT_INC_WIDTH=$(PARAM_STAT_INC_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).STAT_ID_WIDTH=$(PARAM_STAT_ID_WIDTH) + + ifeq ($(WAVES), 1) + VERILOG_SOURCES += iverilog_dump.v + COMPILE_ARGS += -s iverilog_dump + endif +else ifeq ($(SIM), verilator) + COMPILE_ARGS += -Wno-SELRANGE -Wno-WIDTH + + COMPILE_ARGS += -GIF_COUNT=$(PARAM_IF_COUNT) + COMPILE_ARGS += -GPORTS_PER_IF=$(PARAM_PORTS_PER_IF) + COMPILE_ARGS += -GSCHED_PER_IF=$(PARAM_SCHED_PER_IF) + COMPILE_ARGS += -GPTP_CLOCK_PIPELINE=$(PARAM_PTP_CLOCK_PIPELINE) + COMPILE_ARGS += -GPTP_USE_SAMPLE_CLOCK=$(PARAM_PTP_USE_SAMPLE_CLOCK) + COMPILE_ARGS += -GPTP_SEPARATE_RX_CLOCK=$(PARAM_PTP_SEPARATE_RX_CLOCK) + COMPILE_ARGS += -GPTP_PORT_CDC_PIPELINE=$(PARAM_PTP_PORT_CDC_PIPELINE) + COMPILE_ARGS += -GPTP_PEROUT_ENABLE=$(PARAM_PTP_PEROUT_ENABLE) + COMPILE_ARGS += -GPTP_PEROUT_COUNT=$(PARAM_PTP_PEROUT_COUNT) + COMPILE_ARGS += -GEVENT_QUEUE_OP_TABLE_SIZE=$(PARAM_EVENT_QUEUE_OP_TABLE_SIZE) + COMPILE_ARGS += -GTX_QUEUE_OP_TABLE_SIZE=$(PARAM_TX_QUEUE_OP_TABLE_SIZE) + COMPILE_ARGS += -GRX_QUEUE_OP_TABLE_SIZE=$(PARAM_RX_QUEUE_OP_TABLE_SIZE) + COMPILE_ARGS += -GTX_CPL_QUEUE_OP_TABLE_SIZE=$(PARAM_TX_CPL_QUEUE_OP_TABLE_SIZE) + COMPILE_ARGS += -GRX_CPL_QUEUE_OP_TABLE_SIZE=$(PARAM_RX_CPL_QUEUE_OP_TABLE_SIZE) + COMPILE_ARGS += -GTX_QUEUE_INDEX_WIDTH=$(PARAM_TX_QUEUE_INDEX_WIDTH) + COMPILE_ARGS += -GRX_QUEUE_INDEX_WIDTH=$(PARAM_RX_QUEUE_INDEX_WIDTH) + COMPILE_ARGS += -GTX_CPL_QUEUE_INDEX_WIDTH=$(PARAM_TX_CPL_QUEUE_INDEX_WIDTH) + COMPILE_ARGS += -GRX_CPL_QUEUE_INDEX_WIDTH=$(PARAM_RX_CPL_QUEUE_INDEX_WIDTH) + COMPILE_ARGS += -GEVENT_QUEUE_PIPELINE=$(PARAM_EVENT_QUEUE_PIPELINE) + COMPILE_ARGS += -GTX_QUEUE_PIPELINE=$(PARAM_TX_QUEUE_PIPELINE) + COMPILE_ARGS += -GRX_QUEUE_PIPELINE=$(PARAM_RX_QUEUE_PIPELINE) + COMPILE_ARGS += -GTX_CPL_QUEUE_PIPELINE=$(PARAM_TX_CPL_QUEUE_PIPELINE) + COMPILE_ARGS += -GRX_CPL_QUEUE_PIPELINE=$(PARAM_RX_CPL_QUEUE_PIPELINE) + COMPILE_ARGS += -GTX_DESC_TABLE_SIZE=$(PARAM_TX_DESC_TABLE_SIZE) + COMPILE_ARGS += -GRX_DESC_TABLE_SIZE=$(PARAM_RX_DESC_TABLE_SIZE) + COMPILE_ARGS += -GTX_SCHEDULER_OP_TABLE_SIZE=$(PARAM_TX_SCHEDULER_OP_TABLE_SIZE) + COMPILE_ARGS += -GTX_SCHEDULER_PIPELINE=$(PARAM_TX_SCHEDULER_PIPELINE) + COMPILE_ARGS += -GTDMA_INDEX_WIDTH=$(PARAM_TDMA_INDEX_WIDTH) + COMPILE_ARGS += -GPTP_TS_ENABLE=$(PARAM_PTP_TS_ENABLE) + COMPILE_ARGS += -GTX_PTP_TS_FIFO_DEPTH=$(PARAM_TX_PTP_TS_FIFO_DEPTH) + COMPILE_ARGS += -GRX_PTP_TS_FIFO_DEPTH=$(PARAM_RX_PTP_TS_FIFO_DEPTH) + COMPILE_ARGS += -GTX_CHECKSUM_ENABLE=$(PARAM_TX_CHECKSUM_ENABLE) + COMPILE_ARGS += -GRX_RSS_ENABLE=$(PARAM_RX_RSS_ENABLE) + COMPILE_ARGS += -GRX_HASH_ENABLE=$(PARAM_RX_HASH_ENABLE) + COMPILE_ARGS += -GRX_CHECKSUM_ENABLE=$(PARAM_RX_CHECKSUM_ENABLE) + COMPILE_ARGS += -GTX_FIFO_DEPTH=$(PARAM_TX_FIFO_DEPTH) + COMPILE_ARGS += -GRX_FIFO_DEPTH=$(PARAM_RX_FIFO_DEPTH) + COMPILE_ARGS += -GMAX_TX_SIZE=$(PARAM_MAX_TX_SIZE) + COMPILE_ARGS += -GMAX_RX_SIZE=$(PARAM_MAX_RX_SIZE) + COMPILE_ARGS += -GTX_RAM_SIZE=$(PARAM_TX_RAM_SIZE) + COMPILE_ARGS += -GRX_RAM_SIZE=$(PARAM_RX_RAM_SIZE) + COMPILE_ARGS += -GAPP_ID=$(PARAM_APP_ID) + COMPILE_ARGS += -GAPP_ENABLE=$(PARAM_APP_ENABLE) + COMPILE_ARGS += -GAPP_CTRL_ENABLE=$(PARAM_APP_CTRL_ENABLE) + COMPILE_ARGS += -GAPP_DMA_ENABLE=$(PARAM_APP_DMA_ENABLE) + COMPILE_ARGS += -GAPP_AXIS_DIRECT_ENABLE=$(PARAM_APP_AXIS_DIRECT_ENABLE) + COMPILE_ARGS += -GAPP_AXIS_SYNC_ENABLE=$(PARAM_APP_AXIS_SYNC_ENABLE) + COMPILE_ARGS += -GAPP_AXIS_IF_ENABLE=$(PARAM_APP_AXIS_IF_ENABLE) + COMPILE_ARGS += -GAPP_STAT_ENABLE=$(PARAM_APP_STAT_ENABLE) + COMPILE_ARGS += -GDMA_IMM_ENABLE=$(PARAM_DMA_IMM_ENABLE) + COMPILE_ARGS += -GDMA_IMM_WIDTH=$(PARAM_DMA_IMM_WIDTH) + COMPILE_ARGS += -GDMA_LEN_WIDTH=$(PARAM_DMA_LEN_WIDTH) + COMPILE_ARGS += -GDMA_TAG_WIDTH=$(PARAM_DMA_TAG_WIDTH) + COMPILE_ARGS += -GRAM_ADDR_WIDTH=$(PARAM_RAM_ADDR_WIDTH) + COMPILE_ARGS += -GRAM_PIPELINE=$(PARAM_RAM_PIPELINE) + COMPILE_ARGS += -GAXIS_PCIE_DATA_WIDTH=$(PARAM_AXIS_PCIE_DATA_WIDTH) + COMPILE_ARGS += -GPF_COUNT=$(PARAM_PF_COUNT) + COMPILE_ARGS += -GVF_COUNT=$(PARAM_VF_COUNT) + COMPILE_ARGS += -GPCIE_TAG_COUNT=$(PARAM_PCIE_TAG_COUNT) + COMPILE_ARGS += -GPCIE_DMA_READ_OP_TABLE_SIZE=$(PARAM_PCIE_DMA_READ_OP_TABLE_SIZE) + COMPILE_ARGS += -GPCIE_DMA_READ_TX_LIMIT=$(PARAM_PCIE_DMA_READ_TX_LIMIT) + COMPILE_ARGS += -GPCIE_DMA_READ_TX_FC_ENABLE=$(PARAM_PCIE_DMA_READ_TX_FC_ENABLE) + COMPILE_ARGS += -GPCIE_DMA_WRITE_OP_TABLE_SIZE=$(PARAM_PCIE_DMA_WRITE_OP_TABLE_SIZE) + COMPILE_ARGS += -GPCIE_DMA_WRITE_TX_LIMIT=$(PARAM_PCIE_DMA_WRITE_TX_LIMIT) + COMPILE_ARGS += -GPCIE_DMA_WRITE_TX_FC_ENABLE=$(PARAM_PCIE_DMA_WRITE_TX_FC_ENABLE) + COMPILE_ARGS += -GMSI_COUNT=$(PARAM_MSI_COUNT) + COMPILE_ARGS += -GAXIL_CTRL_DATA_WIDTH=$(PARAM_AXIL_CTRL_DATA_WIDTH) + COMPILE_ARGS += -GAXIL_CTRL_ADDR_WIDTH=$(PARAM_AXIL_CTRL_ADDR_WIDTH) + COMPILE_ARGS += -GAXIL_CSR_PASSTHROUGH_ENABLE=$(PARAM_AXIL_CSR_PASSTHROUGH_ENABLE) + COMPILE_ARGS += -GAXIL_APP_CTRL_DATA_WIDTH=$(PARAM_AXIL_APP_CTRL_DATA_WIDTH) + COMPILE_ARGS += -GAXIL_APP_CTRL_ADDR_WIDTH=$(PARAM_AXIL_APP_CTRL_ADDR_WIDTH) + COMPILE_ARGS += -GAXIS_ETH_DATA_WIDTH=$(PARAM_AXIS_ETH_DATA_WIDTH) + COMPILE_ARGS += -GAXIS_ETH_SYNC_DATA_WIDTH=$(PARAM_AXIS_ETH_SYNC_DATA_WIDTH) + COMPILE_ARGS += -GAXIS_ETH_RX_USE_READY=$(PARAM_AXIS_ETH_RX_USE_READY) + COMPILE_ARGS += -GAXIS_ETH_TX_PIPELINE=$(PARAM_AXIS_ETH_TX_PIPELINE) + COMPILE_ARGS += -GAXIS_ETH_TX_FIFO_PIPELINE=$(PARAM_AXIS_ETH_TX_FIFO_PIPELINE) + COMPILE_ARGS += -GAXIS_ETH_TX_TS_PIPELINE=$(PARAM_AXIS_ETH_TX_TS_PIPELINE) + COMPILE_ARGS += -GAXIS_ETH_RX_PIPELINE=$(PARAM_AXIS_ETH_RX_PIPELINE) + COMPILE_ARGS += -GAXIS_ETH_RX_FIFO_PIPELINE=$(PARAM_AXIS_ETH_RX_FIFO_PIPELINE) + COMPILE_ARGS += -GSTAT_ENABLE=$(PARAM_STAT_ENABLE) + COMPILE_ARGS += -GSTAT_DMA_ENABLE=$(PARAM_STAT_DMA_ENABLE) + COMPILE_ARGS += -GSTAT_PCIE_ENABLE=$(PARAM_STAT_PCIE_ENABLE) + COMPILE_ARGS += -GSTAT_INC_WIDTH=$(PARAM_STAT_INC_WIDTH) + COMPILE_ARGS += -GSTAT_ID_WIDTH=$(PARAM_STAT_ID_WIDTH) + + ifeq ($(WAVES), 1) + COMPILE_ARGS += --trace-fst + endif +endif + +include $(shell cocotb-config --makefiles)/Makefile.sim + +iverilog_dump.v: + echo 'module iverilog_dump();' > $@ + echo 'initial begin' >> $@ + echo ' $$dumpfile("$(TOPLEVEL).fst");' >> $@ + echo ' $$dumpvars(0, $(TOPLEVEL));' >> $@ + echo 'end' >> $@ + echo 'endmodule' >> $@ + +clean:: + @rm -rf iverilog_dump.v + @rm -rf dump.fst $(TOPLEVEL).fst diff --git a/fpga/app/dma_bench/tb/mqnic_core_pcie_us/mqnic.py b/fpga/app/dma_bench/tb/mqnic_core_pcie_us/mqnic.py new file mode 120000 index 000000000..f2c96aec4 --- /dev/null +++ b/fpga/app/dma_bench/tb/mqnic_core_pcie_us/mqnic.py @@ -0,0 +1 @@ +../../../../common/tb/mqnic.py \ No newline at end of file diff --git a/fpga/app/dma_bench/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py b/fpga/app/dma_bench/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py new file mode 100644 index 000000000..85a8a86d5 --- /dev/null +++ b/fpga/app/dma_bench/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py @@ -0,0 +1,967 @@ +""" + +Copyright 2021, The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS OF THE UNIVERSITY OF CALIFORNIA ''AS +IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OF THE UNIVERSITY OF CALIFORNIA OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those +of the authors and should not be interpreted as representing official policies, +either expressed or implied, of The Regents of the University of California. + +""" + +import logging +import os +import sys + +import scapy.utils +from scapy.layers.l2 import Ether +from scapy.layers.inet import IP, UDP + +import cocotb_test.simulator +import pytest + +import cocotb +from cocotb.log import SimLog +from cocotb.clock import Clock +from cocotb.triggers import RisingEdge, FallingEdge, Timer + +from cocotbext.axi import AxiStreamBus +from cocotbext.eth import EthMac +from cocotbext.pcie.core import RootComplex +from cocotbext.pcie.xilinx.us import UltraScalePlusPcieDevice + +try: + import mqnic +except ImportError: + # attempt import from current directory + sys.path.insert(0, os.path.join(os.path.dirname(__file__))) + try: + import mqnic + finally: + del sys.path[0] + + +class TB(object): + def __init__(self, dut): + self.dut = dut + + self.log = SimLog("cocotb.tb") + self.log.setLevel(logging.DEBUG) + + # PCIe + self.rc = RootComplex() + + self.rc.max_payload_size = 0x1 # 256 bytes + self.rc.max_read_request_size = 0x2 # 512 bytes + + self.dev = UltraScalePlusPcieDevice( + # configuration options + pcie_generation=3, + # pcie_link_width=16, + user_clk_frequency=250e6, + alignment="dword", + cq_cc_straddle=False, + rq_rc_straddle=False, + rc_4tlp_straddle=False, + enable_pf1=False, + enable_client_tag=True, + enable_extended_tag=True, + enable_parity=False, + enable_rx_msg_interface=False, + enable_sriov=False, + enable_extended_configuration=False, + + enable_pf0_msi=True, + enable_pf1_msi=False, + + # signals + # Clock and Reset Interface + user_clk=dut.clk, + user_reset=dut.rst, + # user_lnk_up + # sys_clk + # sys_clk_gt + # sys_reset + # phy_rdy_out + + # Requester reQuest Interface + rq_bus=AxiStreamBus.from_prefix(dut, "m_axis_rq"), + pcie_rq_seq_num0=dut.s_axis_rq_seq_num_0, + pcie_rq_seq_num_vld0=dut.s_axis_rq_seq_num_valid_0, + pcie_rq_seq_num1=dut.s_axis_rq_seq_num_1, + pcie_rq_seq_num_vld1=dut.s_axis_rq_seq_num_valid_1, + # pcie_rq_tag0 + # pcie_rq_tag1 + # pcie_rq_tag_av + # pcie_rq_tag_vld0 + # pcie_rq_tag_vld1 + + # Requester Completion Interface + rc_bus=AxiStreamBus.from_prefix(dut, "s_axis_rc"), + + # Completer reQuest Interface + cq_bus=AxiStreamBus.from_prefix(dut, "s_axis_cq"), + # pcie_cq_np_req + # pcie_cq_np_req_count + + # Completer Completion Interface + cc_bus=AxiStreamBus.from_prefix(dut, "m_axis_cc"), + + # Transmit Flow Control Interface + # pcie_tfc_nph_av=dut.pcie_tfc_nph_av, + # pcie_tfc_npd_av=dut.pcie_tfc_npd_av, + + # Configuration Management Interface + cfg_mgmt_addr=dut.cfg_mgmt_addr, + cfg_mgmt_function_number=dut.cfg_mgmt_function_number, + cfg_mgmt_write=dut.cfg_mgmt_write, + cfg_mgmt_write_data=dut.cfg_mgmt_write_data, + cfg_mgmt_byte_enable=dut.cfg_mgmt_byte_enable, + cfg_mgmt_read=dut.cfg_mgmt_read, + cfg_mgmt_read_data=dut.cfg_mgmt_read_data, + cfg_mgmt_read_write_done=dut.cfg_mgmt_read_write_done, + # cfg_mgmt_debug_access + + # Configuration Status Interface + # cfg_phy_link_down + # cfg_phy_link_status + # cfg_negotiated_width + # cfg_current_speed + cfg_max_payload=dut.cfg_max_payload, + cfg_max_read_req=dut.cfg_max_read_req, + # cfg_function_status + # cfg_vf_status + # cfg_function_power_state + # cfg_vf_power_state + # cfg_link_power_state + # cfg_err_cor_out + # cfg_err_nonfatal_out + # cfg_err_fatal_out + # cfg_local_error_out + # cfg_local_error_valid + # cfg_rx_pm_state + # cfg_tx_pm_state + # cfg_ltssm_state + # cfg_rcb_status + # cfg_obff_enable + # cfg_pl_status_change + # cfg_tph_requester_enable + # cfg_tph_st_mode + # cfg_vf_tph_requester_enable + # cfg_vf_tph_st_mode + + # Configuration Received Message Interface + # cfg_msg_received + # cfg_msg_received_data + # cfg_msg_received_type + + # Configuration Transmit Message Interface + # cfg_msg_transmit + # cfg_msg_transmit_type + # cfg_msg_transmit_data + # cfg_msg_transmit_done + + # Configuration Flow Control Interface + cfg_fc_ph=dut.cfg_fc_ph, + cfg_fc_pd=dut.cfg_fc_pd, + cfg_fc_nph=dut.cfg_fc_nph, + cfg_fc_npd=dut.cfg_fc_npd, + cfg_fc_cplh=dut.cfg_fc_cplh, + cfg_fc_cpld=dut.cfg_fc_cpld, + cfg_fc_sel=dut.cfg_fc_sel, + + # Configuration Control Interface + # cfg_hot_reset_in + # cfg_hot_reset_out + # cfg_config_space_enable + # cfg_dsn + # cfg_bus_number + # cfg_ds_port_number + # cfg_ds_bus_number + # cfg_ds_device_number + # cfg_ds_function_number + # cfg_power_state_change_ack + # cfg_power_state_change_interrupt + cfg_err_cor_in=dut.status_error_cor, + cfg_err_uncor_in=dut.status_error_uncor, + # cfg_flr_in_process + # cfg_flr_done + # cfg_vf_flr_in_process + # cfg_vf_flr_func_num + # cfg_vf_flr_done + # cfg_pm_aspm_l1_entry_reject + # cfg_pm_aspm_tx_l0s_entry_disable + # cfg_req_pm_transition_l23_ready + # cfg_link_training_enable + + # Configuration Interrupt Controller Interface + # cfg_interrupt_int + # cfg_interrupt_sent + # cfg_interrupt_pending + cfg_interrupt_msi_enable=dut.cfg_interrupt_msi_enable, + cfg_interrupt_msi_mmenable=dut.cfg_interrupt_msi_mmenable, + cfg_interrupt_msi_mask_update=dut.cfg_interrupt_msi_mask_update, + cfg_interrupt_msi_data=dut.cfg_interrupt_msi_data, + # cfg_interrupt_msi_select=dut.cfg_interrupt_msi_select, + cfg_interrupt_msi_int=dut.cfg_interrupt_msi_int, + cfg_interrupt_msi_pending_status=dut.cfg_interrupt_msi_pending_status, + cfg_interrupt_msi_pending_status_data_enable=dut.cfg_interrupt_msi_pending_status_data_enable, + # cfg_interrupt_msi_pending_status_function_num=dut.cfg_interrupt_msi_pending_status_function_num, + cfg_interrupt_msi_sent=dut.cfg_interrupt_msi_sent, + cfg_interrupt_msi_fail=dut.cfg_interrupt_msi_fail, + # cfg_interrupt_msix_enable + # cfg_interrupt_msix_mask + # cfg_interrupt_msix_vf_enable + # cfg_interrupt_msix_vf_mask + # cfg_interrupt_msix_address + # cfg_interrupt_msix_data + # cfg_interrupt_msix_int + # cfg_interrupt_msix_vec_pending + # cfg_interrupt_msix_vec_pending_status + cfg_interrupt_msi_attr=dut.cfg_interrupt_msi_attr, + cfg_interrupt_msi_tph_present=dut.cfg_interrupt_msi_tph_present, + cfg_interrupt_msi_tph_type=dut.cfg_interrupt_msi_tph_type, + # cfg_interrupt_msi_tph_st_tag=dut.cfg_interrupt_msi_tph_st_tag, + # cfg_interrupt_msi_function_number=dut.cfg_interrupt_msi_function_number, + + # Configuration Extend Interface + # cfg_ext_read_received + # cfg_ext_write_received + # cfg_ext_register_number + # cfg_ext_function_number + # cfg_ext_write_data + # cfg_ext_write_byte_enable + # cfg_ext_read_data + # cfg_ext_read_data_valid + ) + + # self.dev.log.setLevel(logging.DEBUG) + + self.rc.make_port().connect(self.dev) + + self.driver = mqnic.Driver() + + self.dev.functions[0].msi_cap.msi_multiple_message_capable = 5 + + self.dev.functions[0].configure_bar(0, 2**len(dut.core_pcie_inst.axil_ctrl_araddr), ext=True, prefetch=True) + if hasattr(dut.core_pcie_inst, 'pcie_app_ctrl'): + self.dev.functions[0].configure_bar(2, 2**len(dut.core_pcie_inst.axil_app_ctrl_araddr), ext=True, prefetch=True) + + # Ethernet + self.port_mac = [] + + eth_int_if_width = len(dut.core_pcie_inst.core_inst.iface[0].port[0].rx_async_fifo_inst.m_axis_tdata) + eth_clock_period = 6.4 + eth_speed = 10e9 + + if eth_int_if_width == 64: + # 10G + eth_clock_period = 6.4 + eth_speed = 10e9 + elif eth_int_if_width == 128: + # 25G + eth_clock_period = 2.56 + eth_speed = 25e9 + elif eth_int_if_width == 512: + # 100G + eth_clock_period = 3.102 + eth_speed = 100e9 + + for iface in dut.core_pcie_inst.core_inst.iface: + for port in iface.port: + cocotb.start_soon(Clock(port.port_rx_clk, eth_clock_period, units="ns").start()) + cocotb.start_soon(Clock(port.port_tx_clk, eth_clock_period, units="ns").start()) + + port.port_rx_rst.setimmediatevalue(0) + port.port_tx_rst.setimmediatevalue(0) + + mac = EthMac( + tx_clk=port.port_tx_clk, + tx_rst=port.port_tx_rst, + tx_bus=AxiStreamBus.from_prefix(port, "axis_tx"), + tx_ptp_time=port.ptp.tx_ptp_cdc_inst.output_ts, + tx_ptp_ts=port.ptp.axis_tx_ptp_ts, + tx_ptp_ts_tag=port.ptp.axis_tx_ptp_ts_tag, + tx_ptp_ts_valid=port.ptp.axis_tx_ptp_ts_valid, + rx_clk=port.port_rx_clk, + rx_rst=port.port_rx_rst, + rx_bus=AxiStreamBus.from_prefix(port, "axis_rx"), + rx_ptp_time=port.ptp.rx_ptp_cdc_inst.output_ts, + ifg=12, speed=eth_speed + ) + + self.port_mac.append(mac) + + dut.ctrl_reg_wr_wait.setimmediatevalue(0) + dut.ctrl_reg_wr_ack.setimmediatevalue(0) + dut.ctrl_reg_rd_data.setimmediatevalue(0) + dut.ctrl_reg_rd_wait.setimmediatevalue(0) + dut.ctrl_reg_rd_ack.setimmediatevalue(0) + + dut.ptp_sample_clk.setimmediatevalue(0) + + dut.s_axis_stat_tdata.setimmediatevalue(0) + dut.s_axis_stat_tid.setimmediatevalue(0) + dut.s_axis_stat_tvalid.setimmediatevalue(0) + + self.loopback_enable = False + cocotb.start_soon(self._run_loopback()) + + async def init(self): + + for mac in self.port_mac: + mac.rx.reset.setimmediatevalue(0) + mac.tx.reset.setimmediatevalue(0) + + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + + for mac in self.port_mac: + mac.rx.reset.setimmediatevalue(1) + mac.tx.reset.setimmediatevalue(1) + + await FallingEdge(self.dut.rst) + await Timer(100, 'ns') + + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + + for mac in self.port_mac: + mac.rx.reset.setimmediatevalue(0) + mac.tx.reset.setimmediatevalue(0) + + await self.rc.enumerate(enable_bus_mastering=True, configure_msi=True) + + async def _run_loopback(self): + while True: + await RisingEdge(self.dut.clk) + + if self.loopback_enable: + for mac in self.port_mac: + if not mac.tx.empty(): + await mac.rx.send(await mac.tx.recv()) + + +@cocotb.test() +async def run_test_nic(dut): + + tb = TB(dut) + + await tb.init() + + tb.log.info("Init driver") + await tb.driver.init_pcie_dev(tb.rc, tb.dev.functions[0].pcie_id) + for interface in tb.driver.interfaces: + await interface.open() + + # enable queues + tb.log.info("Enable queues") + for interface in tb.driver.interfaces: + await interface.sched_blocks[0].schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000001) + for k in range(interface.tx_queue_count): + await interface.sched_blocks[0].schedulers[0].hw_regs.write_dword(4*k, 0x00000003) + + # wait for all writes to complete + await tb.driver.hw_regs.read_dword(0) + tb.log.info("Init complete") + + tb.log.info("Send and receive single packet") + + for interface in tb.driver.interfaces: + data = bytearray([x % 256 for x in range(1024)]) + + await interface.start_xmit(data, 0) + + pkt = await tb.port_mac[interface.index*interface.port_count].tx.recv() + tb.log.info("Packet: %s", pkt) + + await tb.port_mac[interface.index*interface.port_count].rx.send(pkt) + + pkt = await interface.recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + tb.log.info("RX and TX checksum tests") + + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=2) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + pkt = await tb.port_mac[0].tx.recv() + tb.log.info("Packet: %s", pkt) + + await tb.port_mac[0].rx.send(pkt) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert Ether(pkt.data).build() == test_pkt.build() + + tb.log.info("Multiple small packets") + + count = 64 + + pkts = [bytearray([(x+k) % 256 for x in range(60)]) for k in range(count)] + + tb.loopback_enable = True + + for p in pkts: + await tb.driver.interfaces[0].start_xmit(p, 0) + + for k in range(count): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.data == pkts[k] + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + tb.loopback_enable = False + + tb.log.info("Multiple large packets") + + count = 64 + + pkts = [bytearray([(x+k) % 256 for x in range(1514)]) for k in range(count)] + + tb.loopback_enable = True + + for p in pkts: + await tb.driver.interfaces[0].start_xmit(p, 0) + + for k in range(count): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.data == pkts[k] + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + tb.loopback_enable = False + + tb.log.info("Jumbo frames") + + count = 64 + + pkts = [bytearray([(x+k) % 256 for x in range(9014)]) for k in range(count)] + + tb.loopback_enable = True + + for p in pkts: + await tb.driver.interfaces[0].start_xmit(p, 0) + + for k in range(count): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.data == pkts[k] + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + tb.loopback_enable = False + + if len(tb.driver.interfaces) > 1: + tb.log.info("All interfaces") + + count = 64 + + pkts = [bytearray([(x+k) % 256 for x in range(1514)]) for k in range(count)] + + tb.loopback_enable = True + + for k, p in enumerate(pkts): + await tb.driver.interfaces[k % len(tb.driver.interfaces)].start_xmit(p, 0) + + for k in range(count): + pkt = await tb.driver.interfaces[k % len(tb.driver.interfaces)].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.data == pkts[k] + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + tb.loopback_enable = False + + if len(tb.driver.interfaces[0].sched_blocks) > 1: + tb.log.info("All interface 0 scheduler blocks") + + for block in tb.driver.interfaces[0].sched_blocks: + await block.schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000001) + for k in range(block.interface.tx_queue_count): + if k % len(tb.driver.interfaces[0].sched_blocks) == block.index: + await block.schedulers[0].hw_regs.write_dword(4*k, 0x00000003) + else: + await block.schedulers[0].hw_regs.write_dword(4*k, 0x00000000) + + count = 64 + + pkts = [bytearray([(x+k) % 256 for x in range(1514)]) for k in range(count)] + + tb.loopback_enable = True + + for k, p in enumerate(pkts): + await tb.driver.interfaces[0].start_xmit(p, k % len(tb.driver.interfaces[0].sched_blocks)) + + for k in range(count): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + # assert pkt.data == pkts[k] + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + tb.loopback_enable = False + + for block in tb.driver.interfaces[0].sched_blocks[1:]: + await block.schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000000) + + mem = tb.rc.mem_pool.alloc_region(16*1024*1024) + mem_base = mem.get_absolute_address(0) + + tb.log.info("Test DMA") + + # write packet data + mem[0:1024] = bytearray([x % 256 for x in range(1024)]) + + # write pcie read descriptor + await tb.driver.app_hw_regs.write_dword(0x000100, (mem_base+0x0000) & 0xffffffff) + await tb.driver.app_hw_regs.write_dword(0x000104, (mem_base+0x0000 >> 32) & 0xffffffff) + await tb.driver.app_hw_regs.write_dword(0x000108, 0x100) + await tb.driver.app_hw_regs.write_dword(0x000110, 0x400) + await tb.driver.app_hw_regs.write_dword(0x000114, 0xAA) + + await Timer(2000, 'ns') + + # read status + val = await tb.driver.app_hw_regs.read_dword(0x000118) + tb.log.info("Status: 0x%x", val) + assert val == 0x800000AA + + # write pcie write descriptor + await tb.driver.app_hw_regs.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await tb.driver.app_hw_regs.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await tb.driver.app_hw_regs.write_dword(0x000208, 0x100) + await tb.driver.app_hw_regs.write_dword(0x000210, 0x400) + await tb.driver.app_hw_regs.write_dword(0x000214, 0x55) + + await Timer(2000, 'ns') + + # read status + val = await tb.driver.app_hw_regs.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x80000055 + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0:1024] == mem[0x1000:0x1000+1024] + + tb.log.info("Test immediate write") + + # write pcie write descriptor + await tb.driver.app_hw_regs.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) + await tb.driver.app_hw_regs.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) + await tb.driver.app_hw_regs.write_dword(0x000208, 0x44332211) + await tb.driver.app_hw_regs.write_dword(0x000210, 0x4) + await tb.driver.app_hw_regs.write_dword(0x000214, 0x800000AA) + + await Timer(2000, 'ns') + + # read status + val = await tb.driver.app_hw_regs.read_dword(0x000218) + tb.log.info("Status: 0x%x", val) + assert val == 0x800000AA + + tb.log.info("%s", mem.hexdump_str(0x1000, 64)) + + assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44' + + tb.log.info("Test DMA block operations") + + region_len = 0x2000 + src_offset = 0x0000 + dest_offset = 0x4000 + + block_size = 256 + block_stride = block_size + block_count = 32 + + # write packet data + mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) + + # enable DMA + await tb.driver.app_hw_regs.write_dword(0x000000, 1) + # disable interrupts + await tb.driver.app_hw_regs.write_dword(0x000008, 0) + + # configure operation (read) + # DMA base address + await tb.driver.app_hw_regs.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) + await tb.driver.app_hw_regs.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) + # DMA offset address + await tb.driver.app_hw_regs.write_dword(0x001088, 0) + await tb.driver.app_hw_regs.write_dword(0x00108c, 0) + # DMA offset mask + await tb.driver.app_hw_regs.write_dword(0x001090, region_len-1) + await tb.driver.app_hw_regs.write_dword(0x001094, 0) + # DMA stride + await tb.driver.app_hw_regs.write_dword(0x001098, block_stride) + await tb.driver.app_hw_regs.write_dword(0x00109c, 0) + # RAM base address + await tb.driver.app_hw_regs.write_dword(0x0010c0, 0) + await tb.driver.app_hw_regs.write_dword(0x0010c4, 0) + # RAM offset address + await tb.driver.app_hw_regs.write_dword(0x0010c8, 0) + await tb.driver.app_hw_regs.write_dword(0x0010cc, 0) + # RAM offset mask + await tb.driver.app_hw_regs.write_dword(0x0010d0, region_len-1) + await tb.driver.app_hw_regs.write_dword(0x0010d4, 0) + # RAM stride + await tb.driver.app_hw_regs.write_dword(0x0010d8, block_stride) + await tb.driver.app_hw_regs.write_dword(0x0010dc, 0) + # clear cycle count + await tb.driver.app_hw_regs.write_dword(0x001008, 0) + await tb.driver.app_hw_regs.write_dword(0x00100c, 0) + # block length + await tb.driver.app_hw_regs.write_dword(0x001010, block_size) + # block count + await tb.driver.app_hw_regs.write_dword(0x001018, block_count) + await tb.driver.app_hw_regs.write_dword(0x00101c, 0) + # start + await tb.driver.app_hw_regs.write_dword(0x001000, 1) + + for k in range(10): + cnt = await tb.driver.app_hw_regs.read_dword(0x001018) + await Timer(1000, 'ns') + if cnt == 0: + break + + # configure operation (write) + # DMA base address + await tb.driver.app_hw_regs.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) + await tb.driver.app_hw_regs.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) + # DMA offset address + await tb.driver.app_hw_regs.write_dword(0x001188, 0) + await tb.driver.app_hw_regs.write_dword(0x00118c, 0) + # DMA offset mask + await tb.driver.app_hw_regs.write_dword(0x001190, region_len-1) + await tb.driver.app_hw_regs.write_dword(0x001194, 0) + # DMA stride + await tb.driver.app_hw_regs.write_dword(0x001198, block_stride) + await tb.driver.app_hw_regs.write_dword(0x00119c, 0) + # RAM base address + await tb.driver.app_hw_regs.write_dword(0x0011c0, 0) + await tb.driver.app_hw_regs.write_dword(0x0011c4, 0) + # RAM offset address + await tb.driver.app_hw_regs.write_dword(0x0011c8, 0) + await tb.driver.app_hw_regs.write_dword(0x0011cc, 0) + # RAM offset mask + await tb.driver.app_hw_regs.write_dword(0x0011d0, region_len-1) + await tb.driver.app_hw_regs.write_dword(0x0011d4, 0) + # RAM stride + await tb.driver.app_hw_regs.write_dword(0x0011d8, block_stride) + await tb.driver.app_hw_regs.write_dword(0x0011dc, 0) + # clear cycle count + await tb.driver.app_hw_regs.write_dword(0x001108, 0) + await tb.driver.app_hw_regs.write_dword(0x00110c, 0) + # block length + await tb.driver.app_hw_regs.write_dword(0x001110, block_size) + # block count + await tb.driver.app_hw_regs.write_dword(0x001118, block_count) + await tb.driver.app_hw_regs.write_dword(0x00111c, 0) + # start + await tb.driver.app_hw_regs.write_dword(0x001100, 1) + + for k in range(10): + cnt = await tb.driver.app_hw_regs.read_dword(0x001118) + await Timer(1000, 'ns') + if cnt == 0: + break + + tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) + + assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] + + tb.log.info("Read statistics counters") + + await Timer(2000, 'ns') + + lst = [] + + for k in range(64): + lst.append(await tb.driver.hw_regs.read_dword(0x010000+k*8)) + + print(lst) + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +# cocotb-test + +tests_dir = os.path.dirname(__file__) +rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl')) +lib_dir = os.path.abspath(os.path.join(rtl_dir, '..', 'lib')) +axi_rtl_dir = os.path.abspath(os.path.join(lib_dir, 'axi', 'rtl')) +axis_rtl_dir = os.path.abspath(os.path.join(lib_dir, 'axis', 'rtl')) +eth_rtl_dir = os.path.abspath(os.path.join(lib_dir, 'eth', 'rtl')) +pcie_rtl_dir = os.path.abspath(os.path.join(lib_dir, 'pcie', 'rtl')) + + +@pytest.mark.parametrize(("if_count", "ports_per_if", "axis_pcie_data_width", + "axis_eth_data_width", "axis_eth_sync_data_width"), [ + (1, 1, 256, 64, 64), + (2, 1, 256, 64, 64), + (1, 2, 256, 64, 64), + (1, 1, 256, 64, 128), + (1, 1, 512, 64, 64), + (1, 1, 512, 64, 128), + (1, 1, 512, 512, 512), + ]) +def test_mqnic_core_pcie_us(request, if_count, ports_per_if, axis_pcie_data_width, + axis_eth_data_width, axis_eth_sync_data_width): + dut = "mqnic_core_pcie_us" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = dut + + verilog_sources = [ + os.path.join(rtl_dir, "common", f"{dut}.v"), + os.path.join(rtl_dir, "common", "mqnic_core.v"), + os.path.join(rtl_dir, "common", "mqnic_core_pcie.v"), + os.path.join(rtl_dir, "common", "mqnic_interface.v"), + os.path.join(rtl_dir, "common", "mqnic_interface_tx.v"), + os.path.join(rtl_dir, "common", "mqnic_interface_rx.v"), + os.path.join(rtl_dir, "common", "mqnic_egress.v"), + os.path.join(rtl_dir, "common", "mqnic_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), + os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_ptp.v"), + os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), + os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), + os.path.join(rtl_dir, "common", "cpl_write.v"), + os.path.join(rtl_dir, "common", "cpl_op_mux.v"), + os.path.join(rtl_dir, "common", "desc_fetch.v"), + os.path.join(rtl_dir, "common", "desc_op_mux.v"), + os.path.join(rtl_dir, "common", "event_mux.v"), + os.path.join(rtl_dir, "common", "queue_manager.v"), + os.path.join(rtl_dir, "common", "cpl_queue_manager.v"), + os.path.join(rtl_dir, "common", "tx_fifo.v"), + os.path.join(rtl_dir, "common", "rx_fifo.v"), + os.path.join(rtl_dir, "common", "tx_req_mux.v"), + os.path.join(rtl_dir, "common", "tx_engine.v"), + os.path.join(rtl_dir, "common", "rx_engine.v"), + os.path.join(rtl_dir, "common", "tx_checksum.v"), + os.path.join(rtl_dir, "common", "rx_hash.v"), + os.path.join(rtl_dir, "common", "rx_checksum.v"), + os.path.join(rtl_dir, "common", "stats_counter.v"), + os.path.join(rtl_dir, "common", "stats_collect.v"), + os.path.join(rtl_dir, "common", "stats_pcie_if.v"), + os.path.join(rtl_dir, "common", "stats_pcie_tlp.v"), + os.path.join(rtl_dir, "common", "stats_dma_if_pcie.v"), + os.path.join(rtl_dir, "common", "stats_dma_latency.v"), + os.path.join(rtl_dir, "common", "mqnic_tx_scheduler_block_rr.v"), + os.path.join(rtl_dir, "common", "tx_scheduler_rr.v"), + os.path.join(rtl_dir, "mqnic_app_block_dma_bench.v"), + os.path.join(eth_rtl_dir, "ptp_clock.v"), + os.path.join(eth_rtl_dir, "ptp_clock_cdc.v"), + os.path.join(eth_rtl_dir, "ptp_perout.v"), + os.path.join(eth_rtl_dir, "ptp_ts_extract.v"), + os.path.join(axi_rtl_dir, "axil_crossbar.v"), + os.path.join(axi_rtl_dir, "axil_crossbar_addr.v"), + os.path.join(axi_rtl_dir, "axil_crossbar_rd.v"), + os.path.join(axi_rtl_dir, "axil_crossbar_wr.v"), + os.path.join(axi_rtl_dir, "axil_reg_if.v"), + os.path.join(axi_rtl_dir, "axil_reg_if_rd.v"), + os.path.join(axi_rtl_dir, "axil_reg_if_wr.v"), + os.path.join(axi_rtl_dir, "axil_register_rd.v"), + os.path.join(axi_rtl_dir, "axil_register_wr.v"), + os.path.join(axi_rtl_dir, "arbiter.v"), + os.path.join(axi_rtl_dir, "priority_encoder.v"), + os.path.join(axis_rtl_dir, "axis_adapter.v"), + os.path.join(axis_rtl_dir, "axis_arb_mux.v"), + os.path.join(axis_rtl_dir, "axis_async_fifo.v"), + os.path.join(axis_rtl_dir, "axis_async_fifo_adapter.v"), + os.path.join(axis_rtl_dir, "axis_demux.v"), + os.path.join(axis_rtl_dir, "axis_fifo.v"), + os.path.join(axis_rtl_dir, "axis_fifo_adapter.v"), + os.path.join(axis_rtl_dir, "axis_pipeline_fifo.v"), + os.path.join(axis_rtl_dir, "axis_register.v"), + os.path.join(pcie_rtl_dir, "pcie_axil_master.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "dma_if_pcie.v"), + os.path.join(pcie_rtl_dir, "dma_if_pcie_rd.v"), + os.path.join(pcie_rtl_dir, "dma_if_pcie_wr.v"), + os.path.join(pcie_rtl_dir, "dma_if_mux.v"), + os.path.join(pcie_rtl_dir, "dma_if_mux_rd.v"), + os.path.join(pcie_rtl_dir, "dma_if_mux_wr.v"), + os.path.join(pcie_rtl_dir, "dma_if_desc_mux.v"), + os.path.join(pcie_rtl_dir, "dma_ram_demux_rd.v"), + os.path.join(pcie_rtl_dir, "dma_ram_demux_wr.v"), + os.path.join(pcie_rtl_dir, "dma_psdpram.v"), + os.path.join(pcie_rtl_dir, "dma_client_axis_sink.v"), + os.path.join(pcie_rtl_dir, "dma_client_axis_source.v"), + os.path.join(pcie_rtl_dir, "pcie_us_if.v"), + os.path.join(pcie_rtl_dir, "pcie_us_if_rc.v"), + os.path.join(pcie_rtl_dir, "pcie_us_if_rq.v"), + os.path.join(pcie_rtl_dir, "pcie_us_if_cc.v"), + os.path.join(pcie_rtl_dir, "pcie_us_if_cq.v"), + os.path.join(pcie_rtl_dir, "pcie_us_cfg.v"), + os.path.join(pcie_rtl_dir, "pcie_us_msi.v"), + os.path.join(pcie_rtl_dir, "pulse_merge.v"), + ] + + parameters = {} + + # Structural configuration + parameters['IF_COUNT'] = if_count + parameters['PORTS_PER_IF'] = ports_per_if + parameters['SCHED_PER_IF'] = ports_per_if + + # PTP configuration + parameters['PTP_CLOCK_PIPELINE'] = 0 + parameters['PTP_USE_SAMPLE_CLOCK'] = 0 + parameters['PTP_SEPARATE_RX_CLOCK'] = 0 + parameters['PTP_PORT_CDC_PIPELINE'] = 0 + parameters['PTP_PEROUT_ENABLE'] = 0 + parameters['PTP_PEROUT_COUNT'] = 1 + + # Queue manager configuration (interface) + parameters['EVENT_QUEUE_OP_TABLE_SIZE'] = 32 + parameters['TX_QUEUE_OP_TABLE_SIZE'] = 32 + parameters['RX_QUEUE_OP_TABLE_SIZE'] = 32 + parameters['TX_CPL_QUEUE_OP_TABLE_SIZE'] = parameters['TX_QUEUE_OP_TABLE_SIZE'] + parameters['RX_CPL_QUEUE_OP_TABLE_SIZE'] = parameters['RX_QUEUE_OP_TABLE_SIZE'] + parameters['TX_QUEUE_INDEX_WIDTH'] = 13 + parameters['RX_QUEUE_INDEX_WIDTH'] = 8 + parameters['TX_CPL_QUEUE_INDEX_WIDTH'] = parameters['TX_QUEUE_INDEX_WIDTH'] + parameters['RX_CPL_QUEUE_INDEX_WIDTH'] = parameters['RX_QUEUE_INDEX_WIDTH'] + parameters['EVENT_QUEUE_PIPELINE'] = 3 + parameters['TX_QUEUE_PIPELINE'] = 3 + max(parameters['TX_QUEUE_INDEX_WIDTH']-12, 0) + parameters['RX_QUEUE_PIPELINE'] = 3 + max(parameters['RX_QUEUE_INDEX_WIDTH']-12, 0) + parameters['TX_CPL_QUEUE_PIPELINE'] = parameters['TX_QUEUE_PIPELINE'] + parameters['RX_CPL_QUEUE_PIPELINE'] = parameters['RX_QUEUE_PIPELINE'] + + # TX and RX engine configuration (port) + parameters['TX_DESC_TABLE_SIZE'] = 32 + parameters['RX_DESC_TABLE_SIZE'] = 32 + + # Scheduler configuration (port) + parameters['TX_SCHEDULER_OP_TABLE_SIZE'] = parameters['TX_DESC_TABLE_SIZE'] + parameters['TX_SCHEDULER_PIPELINE'] = parameters['TX_QUEUE_PIPELINE'] + parameters['TDMA_INDEX_WIDTH'] = 6 + + # Timestamping configuration (port) + parameters['PTP_TS_ENABLE'] = 1 + parameters['TX_PTP_TS_FIFO_DEPTH'] = 32 + parameters['RX_PTP_TS_FIFO_DEPTH'] = 32 + + # Interface configuration (port) + parameters['TX_CHECKSUM_ENABLE'] = 1 + parameters['RX_RSS_ENABLE'] = 1 + parameters['RX_HASH_ENABLE'] = 1 + parameters['RX_CHECKSUM_ENABLE'] = 1 + parameters['TX_FIFO_DEPTH'] = 32768 + parameters['RX_FIFO_DEPTH'] = 131072 + parameters['MAX_TX_SIZE'] = 9214 + parameters['MAX_RX_SIZE'] = 9214 + parameters['TX_RAM_SIZE'] = 131072 + parameters['RX_RAM_SIZE'] = 131072 + + # Application block configuration + parameters['APP_ID'] = 0x12348001 + parameters['APP_ENABLE'] = 1 + parameters['APP_CTRL_ENABLE'] = 0 + parameters['APP_DMA_ENABLE'] = 1 + parameters['APP_AXIS_DIRECT_ENABLE'] = 0 + parameters['APP_AXIS_SYNC_ENABLE'] = 0 + parameters['APP_AXIS_IF_ENABLE'] = 0 + parameters['APP_STAT_ENABLE'] = 0 + + # DMA interface configuration + parameters['DMA_IMM_ENABLE'] = 1 + parameters['DMA_IMM_WIDTH'] = 32 + parameters['DMA_LEN_WIDTH'] = 16 + parameters['DMA_TAG_WIDTH'] = 16 + parameters['RAM_ADDR_WIDTH'] = (max(parameters['TX_RAM_SIZE'], parameters['RX_RAM_SIZE'])-1).bit_length() + parameters['RAM_PIPELINE'] = 2 + + # PCIe interface configuration + parameters['AXIS_PCIE_DATA_WIDTH'] = axis_pcie_data_width + parameters['PF_COUNT'] = 1 + parameters['VF_COUNT'] = 0 + parameters['PCIE_TAG_COUNT'] = 64 + parameters['PCIE_DMA_READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT'] + parameters['PCIE_DMA_READ_TX_LIMIT'] = 16 + parameters['PCIE_DMA_READ_TX_FC_ENABLE'] = 1 + parameters['PCIE_DMA_WRITE_OP_TABLE_SIZE'] = 16 + parameters['PCIE_DMA_WRITE_TX_LIMIT'] = 3 + parameters['PCIE_DMA_WRITE_TX_FC_ENABLE'] = 1 + parameters['MSI_COUNT'] = 32 + + # AXI lite interface configuration (control) + parameters['AXIL_CTRL_DATA_WIDTH'] = 32 + parameters['AXIL_CTRL_ADDR_WIDTH'] = 24 + parameters['AXIL_CSR_PASSTHROUGH_ENABLE'] = 0 + + # AXI lite interface configuration (application control) + parameters['AXIL_APP_CTRL_DATA_WIDTH'] = parameters['AXIL_CTRL_DATA_WIDTH'] + parameters['AXIL_APP_CTRL_ADDR_WIDTH'] = 24 + + # Ethernet interface configuration + parameters['AXIS_ETH_DATA_WIDTH'] = axis_eth_data_width + parameters['AXIS_ETH_SYNC_DATA_WIDTH'] = axis_eth_sync_data_width + parameters['AXIS_ETH_RX_USE_READY'] = 0 + parameters['AXIS_ETH_TX_PIPELINE'] = 0 + parameters['AXIS_ETH_TX_FIFO_PIPELINE'] = 2 + parameters['AXIS_ETH_TX_TS_PIPELINE'] = 0 + parameters['AXIS_ETH_RX_PIPELINE'] = 0 + parameters['AXIS_ETH_RX_FIFO_PIPELINE'] = 2 + + # Statistics counter subsystem + parameters['STAT_ENABLE'] = 1 + parameters['STAT_DMA_ENABLE'] = 1 + parameters['STAT_PCIE_ENABLE'] = 1 + parameters['STAT_INC_WIDTH'] = 24 + parameters['STAT_ID_WIDTH'] = 12 + + extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} + + sim_build = os.path.join(tests_dir, "sim_build", + request.node.name.replace('[', '-').replace(']', '')) + + cocotb_test.simulator.run( + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + parameters=parameters, + sim_build=sim_build, + extra_env=extra_env, + )