1
0
mirror of https://github.com/corundum/corundum.git synced 2025-01-30 08:32:52 +08:00

fpga/app/dma_bench: Add DMA benchmark application

Signed-off-by: Alex Forencich <alex@alexforencich.com>
This commit is contained in:
Alex Forencich 2022-04-21 14:19:43 -07:00
parent 6044b75fa3
commit d45857fb98
9 changed files with 3163 additions and 0 deletions

1
fpga/app/dma_bench/lib Symbolic link
View File

@ -0,0 +1 @@
../../lib/

View File

@ -0,0 +1 @@
../../../../modules/mqnic/

View File

@ -0,0 +1,26 @@
ifneq ($(KERNELRELEASE),)
ccflags-y += -I$(src)/../mqnic/
# object files to build
obj-m += mqnic_app_dma_bench.o
mqnic_app_dma_bench-y += main.o
else
ifneq ($(KERNEL_SRC),)
# alternatively to variable KDIR accept variable KERNEL_SRC as used in
# PetaLinux/Yocto for example
KDIR ?= $(KERNEL_SRC)
endif
KDIR ?= /lib/modules/$(shell uname -r)/build
all: modules
help modules modules_install clean:
$(MAKE) -C $(KDIR) M=$(shell pwd) $@
install: modules_install
endif

View File

@ -0,0 +1,569 @@
// SPDX-License-Identifier: BSD-2-Clause-Views
/*
* Copyright 2022, The Regents of the University of California.
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
*
* 2. Redistributions in binary form must reproduce the above
* copyright notice, this list of conditions and the following
* disclaimer in the documentation and/or other materials provided
* with the distribution.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
* OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
* The views and conclusions contained in the software and documentation
* are those of the authors and should not be interpreted as representing
* official policies, either expressed or implied, of The Regents of the
* University of California.
*/
#include "mqnic.h"
#include <linux/module.h>
MODULE_DESCRIPTION("mqnic DMA benchmark application driver");
MODULE_AUTHOR("Alex Forencich");
MODULE_LICENSE("Dual BSD/GPL");
MODULE_VERSION("0.1");
struct mqnic_app_dma_bench {
struct device *dev;
struct mqnic_dev *mdev;
struct mqnic_adev *adev;
struct device *nic_dev;
void __iomem *nic_hw_addr;
void __iomem *app_hw_addr;
void __iomem *ram_hw_addr;
// DMA buffer
size_t dma_region_len;
void *dma_region;
dma_addr_t dma_region_addr;
};
const char *dma_bench_stats_names[] = {
// PCIe stats
"pcie_rx_tlp_mem_rd", // index 0
"pcie_rx_tlp_mem_wr", // index 1
"pcie_rx_tlp_io", // index 2
"pcie_rx_tlp_cfg", // index 3
"pcie_rx_tlp_msg", // index 4
"pcie_rx_tlp_cpl", // index 5
"pcie_rx_tlp_cpl_ur", // index 6
"pcie_rx_tlp_cpl_ca", // index 7
"pcie_rx_tlp_atomic", // index 8
"pcie_rx_tlp_ep", // index 9
"pcie_rx_tlp_hdr_dw", // index 10
"pcie_rx_tlp_req_dw", // index 11
"pcie_rx_tlp_payload_dw", // index 12
"pcie_rx_tlp_cpl_dw", // index 13
"", // index 14
"", // index 15
"pcie_tx_tlp_mem_rd", // index 16
"pcie_tx_tlp_mem_wr", // index 17
"pcie_tx_tlp_io", // index 18
"pcie_tx_tlp_cfg", // index 19
"pcie_tx_tlp_msg", // index 20
"pcie_tx_tlp_cpl", // index 21
"pcie_tx_tlp_cpl_ur", // index 22
"pcie_tx_tlp_cpl_ca", // index 23
"pcie_tx_tlp_atomic", // index 24
"pcie_tx_tlp_ep", // index 25
"pcie_tx_tlp_hdr_dw", // index 26
"pcie_tx_tlp_req_dw", // index 27
"pcie_tx_tlp_payload_dw", // index 28
"pcie_tx_tlp_cpl_dw", // index 29
"", // index 30
"", // index 31
// DMA statistics
"dma_rd_op_count", // index 0
"dma_rd_op_bytes", // index 1
"dma_rd_op_latency", // index 2
"dma_rd_op_error", // index 3
"dma_rd_req_count", // index 4
"dma_rd_req_latency", // index 5
"dma_rd_req_timeout", // index 6
"dma_rd_op_table_full", // index 7
"dma_rd_no_tags", // index 8
"dma_rd_tx_no_credit", // index 9
"dma_rd_tx_limit", // index 10
"dma_rd_tx_stall", // index 11
"", // index 12
"", // index 13
"", // index 14
"", // index 15
"dma_wr_op_count", // index 16
"dma_wr_op_bytes", // index 17
"dma_wr_op_latency", // index 18
"dma_wr_op_error", // index 19
"dma_wr_req_count", // index 20
"dma_wr_req_latency", // index 21
"", // index 22
"dma_wr_op_table_full", // index 23
"", // index 24
"dma_wr_tx_no_credit", // index 25
"dma_wr_tx_limit", // index 26
"dma_wr_tx_stall", // index 27
"", // index 28
"", // index 29
"", // index 30
"", // index 31
0
};
static u64 read_stat_counter(struct mqnic_app_dma_bench *app, int index)
{
u64 val;
val = (u64) ioread32(app->nic_hw_addr + 0x010000 + index * 8 + 0);
val |= (u64) ioread32(app->nic_hw_addr + 0x010000 + index * 8 + 4) << 32;
return val;
}
static void print_counters(struct mqnic_app_dma_bench *app)
{
struct device *dev = app->dev;
int index = 0;
u64 val;
while (dma_bench_stats_names[index]) {
if (strlen(dma_bench_stats_names[index]) > 0) {
val = read_stat_counter(app, index);
dev_info(dev, "%s: %lld", dma_bench_stats_names[index], val);
}
index++;
}
}
static void dma_read(struct mqnic_app_dma_bench *app,
dma_addr_t dma_addr, size_t ram_addr, size_t len)
{
int tag = 0;
int new_tag = 0;
unsigned long t;
tag = ioread32(app->app_hw_addr + 0x000118); // dummy read
tag = (ioread32(app->app_hw_addr + 0x000118) & 0x7f) + 1;
iowrite32(dma_addr & 0xffffffff, app->app_hw_addr + 0x000100);
iowrite32((dma_addr >> 32) & 0xffffffff, app->app_hw_addr + 0x000104);
iowrite32(ram_addr, app->app_hw_addr + 0x000108);
iowrite32(0, app->app_hw_addr + 0x00010C);
iowrite32(len, app->app_hw_addr + 0x000110);
iowrite32(tag, app->app_hw_addr + 0x000114);
// wait for transfer to complete
t = jiffies + msecs_to_jiffies(200);
while (time_before(jiffies, t)) {
new_tag = (ioread32(app->app_hw_addr + 0x000118) & 0xff);
if (new_tag == tag)
break;
}
if (tag != new_tag)
dev_warn(app->dev, "%s: tag %d (expected %d)", __func__, new_tag, tag);
}
static void dma_write(struct mqnic_app_dma_bench *app,
dma_addr_t dma_addr, size_t ram_addr, size_t len)
{
int tag = 0;
int new_tag = 0;
unsigned long t;
tag = ioread32(app->app_hw_addr + 0x000218); // dummy read
tag = (ioread32(app->app_hw_addr + 0x000218) & 0x7f) + 1;
iowrite32(dma_addr & 0xffffffff, app->app_hw_addr + 0x000200);
iowrite32((dma_addr >> 32) & 0xffffffff, app->app_hw_addr + 0x000204);
iowrite32(ram_addr, app->app_hw_addr + 0x000208);
iowrite32(0, app->app_hw_addr + 0x00020C);
iowrite32(len, app->app_hw_addr + 0x000210);
iowrite32(tag, app->app_hw_addr + 0x000214);
// wait for transfer to complete
t = jiffies + msecs_to_jiffies(200);
while (time_before(jiffies, t)) {
new_tag = (ioread32(app->app_hw_addr + 0x000218) & 0xff);
if (new_tag == tag)
break;
}
if (tag != new_tag)
dev_warn(app->dev, "%s: tag %d (expected %d)", __func__, new_tag, tag);
}
static void dma_block_read(struct mqnic_app_dma_bench *app,
dma_addr_t dma_addr, size_t dma_offset,
size_t dma_offset_mask, size_t dma_stride,
size_t ram_addr, size_t ram_offset,
size_t ram_offset_mask, size_t ram_stride,
size_t block_len, size_t block_count)
{
unsigned long t;
// DMA base address
iowrite32(dma_addr & 0xffffffff, app->app_hw_addr + 0x001080);
iowrite32((dma_addr >> 32) & 0xffffffff, app->app_hw_addr + 0x001084);
// DMA offset address
iowrite32(dma_offset & 0xffffffff, app->app_hw_addr + 0x001088);
iowrite32((dma_offset >> 32) & 0xffffffff, app->app_hw_addr + 0x00108c);
// DMA offset mask
iowrite32(dma_offset_mask & 0xffffffff, app->app_hw_addr + 0x001090);
iowrite32((dma_offset_mask >> 32) & 0xffffffff, app->app_hw_addr + 0x001094);
// DMA stride
iowrite32(dma_stride & 0xffffffff, app->app_hw_addr + 0x001098);
iowrite32((dma_stride >> 32) & 0xffffffff, app->app_hw_addr + 0x00109c);
// RAM base address
iowrite32(ram_addr & 0xffffffff, app->app_hw_addr + 0x0010c0);
iowrite32((ram_addr >> 32) & 0xffffffff, app->app_hw_addr + 0x0010c4);
// RAM offset address
iowrite32(ram_offset & 0xffffffff, app->app_hw_addr + 0x0010c8);
iowrite32((ram_offset >> 32) & 0xffffffff, app->app_hw_addr + 0x0010cc);
// RAM offset mask
iowrite32(ram_offset_mask & 0xffffffff, app->app_hw_addr + 0x0010d0);
iowrite32((ram_offset_mask >> 32) & 0xffffffff, app->app_hw_addr + 0x0010d4);
// RAM stride
iowrite32(ram_stride & 0xffffffff, app->app_hw_addr + 0x0010d8);
iowrite32((ram_stride >> 32) & 0xffffffff, app->app_hw_addr + 0x0010dc);
// clear cycle count
iowrite32(0, app->app_hw_addr + 0x001008);
iowrite32(0, app->app_hw_addr + 0x00100c);
// block length
iowrite32(block_len, app->app_hw_addr + 0x001010);
// block count
iowrite32(block_count, app->app_hw_addr + 0x001018);
// start
iowrite32(1, app->app_hw_addr + 0x001000);
// wait for transfer to complete
t = jiffies + msecs_to_jiffies(20000);
while (time_before(jiffies, t)) {
if ((ioread32(app->app_hw_addr + 0x001000) & 1) == 0)
break;
}
if ((ioread32(app->app_hw_addr + 0x001000) & 1) != 0)
dev_warn(app->dev, "%s: operation timed out", __func__);
}
static void dma_block_write(struct mqnic_app_dma_bench *app,
dma_addr_t dma_addr, size_t dma_offset,
size_t dma_offset_mask, size_t dma_stride,
size_t ram_addr, size_t ram_offset,
size_t ram_offset_mask, size_t ram_stride,
size_t block_len, size_t block_count)
{
unsigned long t;
// DMA base address
iowrite32(dma_addr & 0xffffffff, app->app_hw_addr + 0x001180);
iowrite32((dma_addr >> 32) & 0xffffffff, app->app_hw_addr + 0x001184);
// DMA offset address
iowrite32(dma_offset & 0xffffffff, app->app_hw_addr + 0x001188);
iowrite32((dma_offset >> 32) & 0xffffffff, app->app_hw_addr + 0x00118c);
// DMA offset mask
iowrite32(dma_offset_mask & 0xffffffff, app->app_hw_addr + 0x001190);
iowrite32((dma_offset_mask >> 32) & 0xffffffff, app->app_hw_addr + 0x001194);
// DMA stride
iowrite32(dma_stride & 0xffffffff, app->app_hw_addr + 0x001198);
iowrite32((dma_stride >> 32) & 0xffffffff, app->app_hw_addr + 0x00119c);
// RAM base address
iowrite32(ram_addr & 0xffffffff, app->app_hw_addr + 0x0011c0);
iowrite32((ram_addr >> 32) & 0xffffffff, app->app_hw_addr + 0x0011c4);
// RAM offset address
iowrite32(ram_offset & 0xffffffff, app->app_hw_addr + 0x0011c8);
iowrite32((ram_offset >> 32) & 0xffffffff, app->app_hw_addr + 0x0011cc);
// RAM offset mask
iowrite32(ram_offset_mask & 0xffffffff, app->app_hw_addr + 0x0011d0);
iowrite32((ram_offset_mask >> 32) & 0xffffffff, app->app_hw_addr + 0x0011d4);
// RAM stride
iowrite32(ram_stride & 0xffffffff, app->app_hw_addr + 0x0011d8);
iowrite32((ram_stride >> 32) & 0xffffffff, app->app_hw_addr + 0x0011dc);
// clear cycle count
iowrite32(0, app->app_hw_addr + 0x001108);
iowrite32(0, app->app_hw_addr + 0x00110c);
// block length
iowrite32(block_len, app->app_hw_addr + 0x001110);
// block count
iowrite32(block_count, app->app_hw_addr + 0x001118);
// start
iowrite32(1, app->app_hw_addr + 0x001100);
// wait for transfer to complete
t = jiffies + msecs_to_jiffies(20000);
while (time_before(jiffies, t)) {
if ((ioread32(app->app_hw_addr + 0x001100) & 1) == 0)
break;
}
if ((ioread32(app->app_hw_addr + 0x001100) & 1) != 0)
dev_warn(app->dev, "%s: operation timed out", __func__);
}
static void dma_block_read_bench(struct mqnic_app_dma_bench *app,
dma_addr_t dma_addr, u64 size, u64 stride, u64 count)
{
u64 cycles;
u64 op_count;
u64 op_latency;
u64 req_count;
u64 req_latency;
udelay(5);
op_count = read_stat_counter(app, 32);
op_latency = read_stat_counter(app, 34);
req_count = read_stat_counter(app, 36);
req_latency = read_stat_counter(app, 37);
dma_block_read(app, dma_addr, 0, 0x3fff, stride,
0, 0, 0x3fff, stride, size, count);
cycles = ioread32(app->app_hw_addr + 0x001008);
udelay(5);
op_count = read_stat_counter(app, 32) - op_count;
op_latency = read_stat_counter(app, 34) - op_latency;
req_count = read_stat_counter(app, 36) - req_count;
req_latency = read_stat_counter(app, 37) - req_latency;
dev_info(app->dev, "read %lld blocks of %lld bytes (stride %lld) in %lld ns (%lld ns/op, %lld req, %lld ns/req): %lld Mbps",
count, size, stride, cycles * 4, (op_latency * 4) / op_count, req_count,
(req_latency * 4) / req_count, size * count * 8 * 1000 / (cycles * 4));
}
static void dma_block_write_bench(struct mqnic_app_dma_bench *app,
dma_addr_t dma_addr, u64 size, u64 stride, u64 count)
{
u64 cycles;
u64 op_count;
u64 op_latency;
u64 req_count;
u64 req_latency;
udelay(5);
op_count = read_stat_counter(app, 48);
op_latency = read_stat_counter(app, 50);
req_count = read_stat_counter(app, 52);
req_latency = read_stat_counter(app, 53);
dma_block_write(app, dma_addr, 0, 0x3fff, stride,
0, 0, 0x3fff, stride, size, count);
cycles = ioread32(app->app_hw_addr + 0x001108);
udelay(5);
op_count = read_stat_counter(app, 48) - op_count;
op_latency = read_stat_counter(app, 50) - op_latency;
req_count = read_stat_counter(app, 52) - req_count;
req_latency = read_stat_counter(app, 53) - req_latency;
dev_info(app->dev, "wrote %lld blocks of %lld bytes (stride %lld) in %lld ns (%lld ns/op, %lld req, %lld ns/req): %lld Mbps",
count, size, stride, cycles * 4, (op_latency * 4) / op_count, req_count,
(req_latency * 4) / req_count, size * count * 8 * 1000 / (cycles * 4));
}
static int mqnic_app_dma_bench_probe(struct auxiliary_device *adev,
const struct auxiliary_device_id *id)
{
struct mqnic_app_dma_bench *app;
struct mqnic_dev *mdev = container_of(adev, struct mqnic_adev, adev)->mdev;
struct device *dev = &adev->dev;
int mismatch = 0;
int k;
dev_info(dev, "%s() called", __func__);
if (!mdev->hw_addr || !mdev->app_hw_addr) {
dev_err(dev, "Error: required region not present");
return -EIO;
}
app = devm_kzalloc(dev, sizeof(*app), GFP_KERNEL);
if (!app)
return -ENOMEM;
app->dev = dev;
app->mdev = mdev;
dev_set_drvdata(&adev->dev, app);
app->nic_dev = mdev->dev;
app->nic_hw_addr = mdev->hw_addr;
app->app_hw_addr = mdev->app_hw_addr;
app->ram_hw_addr = mdev->ram_hw_addr;
// Allocate DMA buffer
app->dma_region_len = 16 * 1024;
app->dma_region = dma_alloc_coherent(app->nic_dev, app->dma_region_len,
&app->dma_region_addr, GFP_KERNEL | __GFP_ZERO);
if (!app->dma_region)
return -ENOMEM;
dev_info(dev, "Allocated DMA region virt %p, phys %p",
app->dma_region, (void *)app->dma_region_addr);
// Dump counters
dev_info(dev, "Statistics counters");
print_counters(app);
// DMA test
dev_info(dev, "Write test data");
for (k = 0; k < 256; k++)
((char *)app->dma_region)[k] = k;
dev_info(dev, "Read test data");
print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1,
app->dma_region, 256, true);
dev_info(dev, "Start copy to card");
dma_read(app, app->dma_region_addr + 0x0000, 0x100, 0x100);
dev_info(dev, "Start copy to host");
dma_write(app, app->dma_region_addr + 0x0200, 0x100, 0x100);
dev_info(dev, "read test data");
print_hex_dump(KERN_INFO, "", DUMP_PREFIX_NONE, 16, 1,
app->dma_region + 0x0200, 256, true);
if (memcmp(app->dma_region + 0x0000, app->dma_region + 0x0200, 256) == 0) {
dev_info(dev, "test data matches");
} else {
dev_warn(dev, "test data mismatch");
mismatch = 1;
}
if (!mismatch) {
u64 size;
u64 stride;
struct page *page;
dma_addr_t dma_addr;
dev_info(dev, "perform block reads (dma_alloc_coherent)");
for (size = 1; size <= 8192; size *= 2) {
for (stride = size; stride <= max(size, 256llu); stride *= 2) {
dma_block_read_bench(app, app->dma_region_addr + 0x0000,
size, stride, 10000);
}
}
dev_info(dev, "perform block writes (dma_alloc_coherent)");
for (size = 1; size <= 8192; size *= 2) {
for (stride = size; stride <= max(size, 256llu); stride *= 2) {
dma_block_write_bench(app, app->dma_region_addr + 0x0000,
size, stride, 10000);
}
}
page = alloc_pages_node(NUMA_NO_NODE, GFP_ATOMIC | __GFP_NOWARN |
__GFP_COMP | __GFP_MEMALLOC, 2);
if (page) {
dma_addr = dma_map_page(app->nic_dev, page, 0, 4096 * (1 << 2), PCI_DMA_TODEVICE);
if (!dma_mapping_error(app->nic_dev, dma_addr)) {
dev_info(dev, "perform block reads (alloc_pages_node)");
for (size = 1; size <= 8192; size *= 2) {
for (stride = size; stride <= max(size, 256llu); stride *= 2) {
dma_block_read_bench(app, dma_addr + 0x0000,
size, stride, 10000);
}
}
dma_unmap_page(app->nic_dev, dma_addr, 4096 * (1 << 2), PCI_DMA_TODEVICE);
} else {
dev_warn(dev, "DMA mapping error");
}
dma_addr = dma_map_page(app->nic_dev, page, 0, 4096 * (1 << 2), PCI_DMA_FROMDEVICE);
if (!dma_mapping_error(app->nic_dev, dma_addr)) {
dev_info(dev, "perform block writes (alloc_pages_node)");
for (size = 1; size <= 8192; size *= 2) {
for (stride = size; stride <= max(size, 256llu); stride *= 2) {
dma_block_write_bench(app, dma_addr + 0x0000,
size, stride, 10000);
}
}
dma_unmap_page(app->nic_dev, dma_addr, 4096 * (1 << 2), PCI_DMA_FROMDEVICE);
} else {
dev_warn(dev, "DMA mapping error");
}
}
if (page)
__free_pages(page, 2);
else
dev_warn(dev, "failed to allocate memory");
}
// Dump counters
dev_info(dev, "Statistics counters");
print_counters(app);
return 0;
}
static void mqnic_app_dma_bench_remove(struct auxiliary_device *adev)
{
struct mqnic_app_dma_bench *app = dev_get_drvdata(&adev->dev);
struct device *dev = app->dev;
dev_info(dev, "%s() called", __func__);
dma_free_coherent(app->nic_dev, app->dma_region_len, app->dma_region,
app->dma_region_addr);
}
static const struct auxiliary_device_id mqnic_app_dma_bench_id_table[] = {
{ .name = "mqnic.app_12348001" },
{},
};
MODULE_DEVICE_TABLE(auxiliary, mqnic_app_dma_bench_id_table);
static struct auxiliary_driver mqnic_app_dma_bench_driver = {
.name = "mqnic_app_dma_bench",
.probe = mqnic_app_dma_bench_probe,
.remove = mqnic_app_dma_bench_remove,
.id_table = mqnic_app_dma_bench_id_table,
};
static int __init mqnic_app_dma_bench_init(void)
{
return auxiliary_driver_register(&mqnic_app_dma_bench_driver);
}
static void __exit mqnic_app_dma_bench_exit(void)
{
auxiliary_driver_unregister(&mqnic_app_dma_bench_driver);
}
module_init(mqnic_app_dma_bench_init);
module_exit(mqnic_app_dma_bench_exit);

View File

@ -0,0 +1 @@
../../../common/rtl/

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,439 @@
# Copyright 2021, The Regents of the University of California.
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice,
# this list of conditions and the following disclaimer.
#
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE REGENTS OF THE UNIVERSITY OF CALIFORNIA ''AS
# IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE REGENTS OF THE UNIVERSITY OF CALIFORNIA OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
# OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
# IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
# OF SUCH DAMAGE.
#
# The views and conclusions contained in the software and documentation are those
# of the authors and should not be interpreted as representing official policies,
# either expressed or implied, of The Regents of the University of California.
TOPLEVEL_LANG = verilog
SIM ?= icarus
WAVES ?= 0
COCOTB_HDL_TIMEUNIT = 1ns
COCOTB_HDL_TIMEPRECISION = 1ps
DUT = mqnic_core_pcie_us
TOPLEVEL = $(DUT)
MODULE = test_$(DUT)
VERILOG_SOURCES += ../../rtl/common/$(DUT).v
VERILOG_SOURCES += ../../rtl/common/mqnic_core_pcie.v
VERILOG_SOURCES += ../../rtl/common/mqnic_core.v
VERILOG_SOURCES += ../../rtl/common/mqnic_interface.v
VERILOG_SOURCES += ../../rtl/common/mqnic_interface_tx.v
VERILOG_SOURCES += ../../rtl/common/mqnic_interface_rx.v
VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v
VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v
VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v
VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v
VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v
VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v
VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v
VERILOG_SOURCES += ../../rtl/common/cpl_write.v
VERILOG_SOURCES += ../../rtl/common/cpl_op_mux.v
VERILOG_SOURCES += ../../rtl/common/desc_fetch.v
VERILOG_SOURCES += ../../rtl/common/desc_op_mux.v
VERILOG_SOURCES += ../../rtl/common/event_mux.v
VERILOG_SOURCES += ../../rtl/common/queue_manager.v
VERILOG_SOURCES += ../../rtl/common/cpl_queue_manager.v
VERILOG_SOURCES += ../../rtl/common/tx_fifo.v
VERILOG_SOURCES += ../../rtl/common/rx_fifo.v
VERILOG_SOURCES += ../../rtl/common/tx_req_mux.v
VERILOG_SOURCES += ../../rtl/common/tx_engine.v
VERILOG_SOURCES += ../../rtl/common/rx_engine.v
VERILOG_SOURCES += ../../rtl/common/tx_checksum.v
VERILOG_SOURCES += ../../rtl/common/rx_hash.v
VERILOG_SOURCES += ../../rtl/common/rx_checksum.v
VERILOG_SOURCES += ../../rtl/common/stats_counter.v
VERILOG_SOURCES += ../../rtl/common/stats_collect.v
VERILOG_SOURCES += ../../rtl/common/stats_pcie_if.v
VERILOG_SOURCES += ../../rtl/common/stats_pcie_tlp.v
VERILOG_SOURCES += ../../rtl/common/stats_dma_if_pcie.v
VERILOG_SOURCES += ../../rtl/common/stats_dma_latency.v
VERILOG_SOURCES += ../../rtl/common/mqnic_tx_scheduler_block_rr.v
VERILOG_SOURCES += ../../rtl/common/tx_scheduler_rr.v
VERILOG_SOURCES += ../../rtl/mqnic_app_block_dma_bench.v
VERILOG_SOURCES += ../../lib/eth/rtl/ptp_clock.v
VERILOG_SOURCES += ../../lib/eth/rtl/ptp_clock_cdc.v
VERILOG_SOURCES += ../../lib/eth/rtl/ptp_perout.v
VERILOG_SOURCES += ../../lib/eth/rtl/ptp_ts_extract.v
VERILOG_SOURCES += ../../lib/axi/rtl/axil_crossbar.v
VERILOG_SOURCES += ../../lib/axi/rtl/axil_crossbar_addr.v
VERILOG_SOURCES += ../../lib/axi/rtl/axil_crossbar_rd.v
VERILOG_SOURCES += ../../lib/axi/rtl/axil_crossbar_wr.v
VERILOG_SOURCES += ../../lib/axi/rtl/axil_ram.v
VERILOG_SOURCES += ../../lib/axi/rtl/axil_reg_if.v
VERILOG_SOURCES += ../../lib/axi/rtl/axil_reg_if_rd.v
VERILOG_SOURCES += ../../lib/axi/rtl/axil_reg_if_wr.v
VERILOG_SOURCES += ../../lib/axi/rtl/axil_register_rd.v
VERILOG_SOURCES += ../../lib/axi/rtl/axil_register_wr.v
VERILOG_SOURCES += ../../lib/axi/rtl/arbiter.v
VERILOG_SOURCES += ../../lib/axi/rtl/priority_encoder.v
VERILOG_SOURCES += ../../lib/axis/rtl/axis_adapter.v
VERILOG_SOURCES += ../../lib/axis/rtl/axis_arb_mux.v
VERILOG_SOURCES += ../../lib/axis/rtl/axis_async_fifo.v
VERILOG_SOURCES += ../../lib/axis/rtl/axis_async_fifo_adapter.v
VERILOG_SOURCES += ../../lib/axis/rtl/axis_demux.v
VERILOG_SOURCES += ../../lib/axis/rtl/axis_fifo.v
VERILOG_SOURCES += ../../lib/axis/rtl/axis_fifo_adapter.v
VERILOG_SOURCES += ../../lib/axis/rtl/axis_pipeline_fifo.v
VERILOG_SOURCES += ../../lib/axis/rtl/axis_register.v
VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axil_master.v
VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux.v
VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux_bar.v
VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_mux.v
VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie.v
VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_rd.v
VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_wr.v
VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_mux.v
VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_mux_rd.v
VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_mux_wr.v
VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_desc_mux.v
VERILOG_SOURCES += ../../lib/pcie/rtl/dma_ram_demux_rd.v
VERILOG_SOURCES += ../../lib/pcie/rtl/dma_ram_demux_wr.v
VERILOG_SOURCES += ../../lib/pcie/rtl/dma_psdpram.v
VERILOG_SOURCES += ../../lib/pcie/rtl/dma_client_axis_sink.v
VERILOG_SOURCES += ../../lib/pcie/rtl/dma_client_axis_source.v
VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_us_if.v
VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_us_if_rc.v
VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_us_if_rq.v
VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_us_if_cc.v
VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_us_if_cq.v
VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_us_cfg.v
VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_us_msi.v
VERILOG_SOURCES += ../../lib/pcie/rtl/pulse_merge.v
# module parameters
# Structural configuration
export PARAM_IF_COUNT ?= 1
export PARAM_PORTS_PER_IF ?= 1
export PARAM_SCHED_PER_IF ?= $(PARAM_PORTS_PER_IF)
# PTP configuration
export PARAM_PTP_CLOCK_PIPELINE ?= 0
export PARAM_PTP_USE_SAMPLE_CLOCK ?= 0
export PARAM_PTP_SEPARATE_RX_CLOCK ?= 0
export PARAM_PTP_PORT_CDC_PIPELINE ?= 0
export PARAM_PTP_PEROUT_ENABLE ?= 0
export PARAM_PTP_PEROUT_COUNT ?= 1
# Queue manager configuration (interface)
export PARAM_EVENT_QUEUE_OP_TABLE_SIZE ?= 32
export PARAM_TX_QUEUE_OP_TABLE_SIZE ?= 32
export PARAM_RX_QUEUE_OP_TABLE_SIZE ?= 32
export PARAM_TX_CPL_QUEUE_OP_TABLE_SIZE ?= $(PARAM_TX_QUEUE_OP_TABLE_SIZE)
export PARAM_RX_CPL_QUEUE_OP_TABLE_SIZE ?= $(PARAM_RX_QUEUE_OP_TABLE_SIZE)
export PARAM_TX_QUEUE_INDEX_WIDTH ?= 13
export PARAM_RX_QUEUE_INDEX_WIDTH ?= 8
export PARAM_TX_CPL_QUEUE_INDEX_WIDTH ?= $(PARAM_TX_QUEUE_INDEX_WIDTH)
export PARAM_RX_CPL_QUEUE_INDEX_WIDTH ?= $(PARAM_RX_QUEUE_INDEX_WIDTH)
export PARAM_EVENT_QUEUE_PIPELINE ?= 3
export PARAM_TX_QUEUE_PIPELINE ?= $(shell python -c "print(3 + max($(PARAM_TX_QUEUE_INDEX_WIDTH)-12, 0))")
export PARAM_RX_QUEUE_PIPELINE ?= $(shell python -c "print(3 + max($(PARAM_RX_QUEUE_INDEX_WIDTH)-12, 0))")
export PARAM_TX_CPL_QUEUE_PIPELINE ?= $(PARAM_TX_QUEUE_PIPELINE)
export PARAM_RX_CPL_QUEUE_PIPELINE ?= $(PARAM_RX_QUEUE_PIPELINE)
# TX and RX engine configuration (port)
export PARAM_TX_DESC_TABLE_SIZE ?= 32
export PARAM_RX_DESC_TABLE_SIZE ?= 32
# Scheduler configuration (port)
export PARAM_TX_SCHEDULER_OP_TABLE_SIZE ?= $(PARAM_TX_DESC_TABLE_SIZE)
export PARAM_TX_SCHEDULER_PIPELINE ?= $(PARAM_TX_QUEUE_PIPELINE)
export PARAM_TDMA_INDEX_WIDTH ?= 6
# Timestamping configuration (port)
export PARAM_PTP_TS_ENABLE ?= 1
export PARAM_TX_PTP_TS_FIFO_DEPTH ?= 32
export PARAM_RX_PTP_TS_FIFO_DEPTH ?= 32
# Interface configuration (port)
export PARAM_TX_CHECKSUM_ENABLE ?= 1
export PARAM_RX_RSS_ENABLE ?= 1
export PARAM_RX_HASH_ENABLE ?= 1
export PARAM_RX_CHECKSUM_ENABLE ?= 1
export PARAM_TX_FIFO_DEPTH ?= 32768
export PARAM_RX_FIFO_DEPTH ?= 131072
export PARAM_MAX_TX_SIZE ?= 9214
export PARAM_MAX_RX_SIZE ?= 9214
export PARAM_TX_RAM_SIZE ?= 131072
export PARAM_RX_RAM_SIZE ?= 131072
# Application block configuration
export PARAM_APP_ID ?= $(shell echo $$((16#12348001)) )
export PARAM_APP_ENABLE ?= 1
export PARAM_APP_CTRL_ENABLE ?= 0
export PARAM_APP_DMA_ENABLE ?= 1
export PARAM_APP_AXIS_DIRECT_ENABLE ?= 0
export PARAM_APP_AXIS_SYNC_ENABLE ?= 0
export PARAM_APP_AXIS_IF_ENABLE ?= 0
export PARAM_APP_STAT_ENABLE ?= 0
# DMA interface configuration
export PARAM_DMA_IMM_ENABLE ?= 1
export PARAM_DMA_IMM_WIDTH ?= 32
export PARAM_DMA_LEN_WIDTH ?= 16
export PARAM_DMA_TAG_WIDTH ?= 16
export PARAM_RAM_ADDR_WIDTH ?= $(shell python -c "print((max($(PARAM_TX_RAM_SIZE), $(PARAM_RX_RAM_SIZE))-1).bit_length())")
export PARAM_RAM_PIPELINE ?= 2
# PCIe interface configuration
export PARAM_AXIS_PCIE_DATA_WIDTH ?= 512
export PARAM_PF_COUNT ?= 1
export PARAM_VF_COUNT ?= 0
export PARAM_PCIE_TAG_COUNT ?= 64
export PARAM_PCIE_DMA_READ_OP_TABLE_SIZE ?= $(PARAM_PCIE_TAG_COUNT)
export PARAM_PCIE_DMA_READ_TX_LIMIT ?= 16
export PARAM_PCIE_DMA_READ_TX_FC_ENABLE ?= 1
export PARAM_PCIE_DMA_WRITE_OP_TABLE_SIZE ?= 16
export PARAM_PCIE_DMA_WRITE_TX_LIMIT ?= 3
export PARAM_PCIE_DMA_WRITE_TX_FC_ENABLE ?= 1
export PARAM_MSI_COUNT ?= 32
# AXI lite interface configuration (control)
export PARAM_AXIL_CTRL_DATA_WIDTH ?= 32
export PARAM_AXIL_CTRL_ADDR_WIDTH ?= 24
export PARAM_AXIL_CSR_PASSTHROUGH_ENABLE ?= 0
# AXI lite interface configuration (application control)
export PARAM_AXIL_APP_CTRL_DATA_WIDTH ?= $(PARAM_AXIL_CTRL_DATA_WIDTH)
export PARAM_AXIL_APP_CTRL_ADDR_WIDTH ?= 24
# Ethernet interface configuration
export PARAM_AXIS_ETH_DATA_WIDTH ?= 512
export PARAM_AXIS_ETH_SYNC_DATA_WIDTH ?= $(PARAM_AXIS_ETH_DATA_WIDTH)
export PARAM_AXIS_ETH_RX_USE_READY ?= 0
export PARAM_AXIS_ETH_TX_PIPELINE ?= 0
export PARAM_AXIS_ETH_TX_FIFO_PIPELINE ?= 2
export PARAM_AXIS_ETH_TX_TS_PIPELINE ?= 0
export PARAM_AXIS_ETH_RX_PIPELINE ?= 0
export PARAM_AXIS_ETH_RX_FIFO_PIPELINE ?= 2
# Statistics counter subsystem
export PARAM_STAT_ENABLE ?= 1
export PARAM_STAT_DMA_ENABLE ?= 1
export PARAM_STAT_PCIE_ENABLE ?= 1
export PARAM_STAT_INC_WIDTH ?= 24
export PARAM_STAT_ID_WIDTH ?= 12
ifeq ($(SIM), icarus)
PLUSARGS += -fst
COMPILE_ARGS += -P $(TOPLEVEL).IF_COUNT=$(PARAM_IF_COUNT)
COMPILE_ARGS += -P $(TOPLEVEL).PORTS_PER_IF=$(PARAM_PORTS_PER_IF)
COMPILE_ARGS += -P $(TOPLEVEL).SCHED_PER_IF=$(PARAM_SCHED_PER_IF)
COMPILE_ARGS += -P $(TOPLEVEL).PTP_CLOCK_PIPELINE=$(PARAM_PTP_CLOCK_PIPELINE)
COMPILE_ARGS += -P $(TOPLEVEL).PTP_USE_SAMPLE_CLOCK=$(PARAM_PTP_USE_SAMPLE_CLOCK)
COMPILE_ARGS += -P $(TOPLEVEL).PTP_SEPARATE_RX_CLOCK=$(PARAM_PTP_SEPARATE_RX_CLOCK)
COMPILE_ARGS += -P $(TOPLEVEL).PTP_PORT_CDC_PIPELINE=$(PARAM_PTP_PORT_CDC_PIPELINE)
COMPILE_ARGS += -P $(TOPLEVEL).PTP_PEROUT_ENABLE=$(PARAM_PTP_PEROUT_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).PTP_PEROUT_COUNT=$(PARAM_PTP_PEROUT_COUNT)
COMPILE_ARGS += -P $(TOPLEVEL).EVENT_QUEUE_OP_TABLE_SIZE=$(PARAM_EVENT_QUEUE_OP_TABLE_SIZE)
COMPILE_ARGS += -P $(TOPLEVEL).TX_QUEUE_OP_TABLE_SIZE=$(PARAM_TX_QUEUE_OP_TABLE_SIZE)
COMPILE_ARGS += -P $(TOPLEVEL).RX_QUEUE_OP_TABLE_SIZE=$(PARAM_RX_QUEUE_OP_TABLE_SIZE)
COMPILE_ARGS += -P $(TOPLEVEL).TX_CPL_QUEUE_OP_TABLE_SIZE=$(PARAM_TX_CPL_QUEUE_OP_TABLE_SIZE)
COMPILE_ARGS += -P $(TOPLEVEL).RX_CPL_QUEUE_OP_TABLE_SIZE=$(PARAM_RX_CPL_QUEUE_OP_TABLE_SIZE)
COMPILE_ARGS += -P $(TOPLEVEL).TX_QUEUE_INDEX_WIDTH=$(PARAM_TX_QUEUE_INDEX_WIDTH)
COMPILE_ARGS += -P $(TOPLEVEL).RX_QUEUE_INDEX_WIDTH=$(PARAM_RX_QUEUE_INDEX_WIDTH)
COMPILE_ARGS += -P $(TOPLEVEL).TX_CPL_QUEUE_INDEX_WIDTH=$(PARAM_TX_CPL_QUEUE_INDEX_WIDTH)
COMPILE_ARGS += -P $(TOPLEVEL).RX_CPL_QUEUE_INDEX_WIDTH=$(PARAM_RX_CPL_QUEUE_INDEX_WIDTH)
COMPILE_ARGS += -P $(TOPLEVEL).EVENT_QUEUE_PIPELINE=$(PARAM_EVENT_QUEUE_PIPELINE)
COMPILE_ARGS += -P $(TOPLEVEL).TX_QUEUE_PIPELINE=$(PARAM_TX_QUEUE_PIPELINE)
COMPILE_ARGS += -P $(TOPLEVEL).RX_QUEUE_PIPELINE=$(PARAM_RX_QUEUE_PIPELINE)
COMPILE_ARGS += -P $(TOPLEVEL).TX_CPL_QUEUE_PIPELINE=$(PARAM_TX_CPL_QUEUE_PIPELINE)
COMPILE_ARGS += -P $(TOPLEVEL).RX_CPL_QUEUE_PIPELINE=$(PARAM_RX_CPL_QUEUE_PIPELINE)
COMPILE_ARGS += -P $(TOPLEVEL).TX_DESC_TABLE_SIZE=$(PARAM_TX_DESC_TABLE_SIZE)
COMPILE_ARGS += -P $(TOPLEVEL).RX_DESC_TABLE_SIZE=$(PARAM_RX_DESC_TABLE_SIZE)
COMPILE_ARGS += -P $(TOPLEVEL).TX_SCHEDULER_OP_TABLE_SIZE=$(PARAM_TX_SCHEDULER_OP_TABLE_SIZE)
COMPILE_ARGS += -P $(TOPLEVEL).TX_SCHEDULER_PIPELINE=$(PARAM_TX_SCHEDULER_PIPELINE)
COMPILE_ARGS += -P $(TOPLEVEL).TDMA_INDEX_WIDTH=$(PARAM_TDMA_INDEX_WIDTH)
COMPILE_ARGS += -P $(TOPLEVEL).PTP_TS_ENABLE=$(PARAM_PTP_TS_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).TX_PTP_TS_FIFO_DEPTH=$(PARAM_TX_PTP_TS_FIFO_DEPTH)
COMPILE_ARGS += -P $(TOPLEVEL).RX_PTP_TS_FIFO_DEPTH=$(PARAM_RX_PTP_TS_FIFO_DEPTH)
COMPILE_ARGS += -P $(TOPLEVEL).TX_CHECKSUM_ENABLE=$(PARAM_TX_CHECKSUM_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).RX_RSS_ENABLE=$(PARAM_RX_RSS_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).RX_HASH_ENABLE=$(PARAM_RX_HASH_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).RX_CHECKSUM_ENABLE=$(PARAM_RX_CHECKSUM_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).TX_FIFO_DEPTH=$(PARAM_TX_FIFO_DEPTH)
COMPILE_ARGS += -P $(TOPLEVEL).RX_FIFO_DEPTH=$(PARAM_RX_FIFO_DEPTH)
COMPILE_ARGS += -P $(TOPLEVEL).MAX_TX_SIZE=$(PARAM_MAX_TX_SIZE)
COMPILE_ARGS += -P $(TOPLEVEL).MAX_RX_SIZE=$(PARAM_MAX_RX_SIZE)
COMPILE_ARGS += -P $(TOPLEVEL).TX_RAM_SIZE=$(PARAM_TX_RAM_SIZE)
COMPILE_ARGS += -P $(TOPLEVEL).RX_RAM_SIZE=$(PARAM_RX_RAM_SIZE)
COMPILE_ARGS += -P $(TOPLEVEL).APP_ID=$(PARAM_APP_ID)
COMPILE_ARGS += -P $(TOPLEVEL).APP_ENABLE=$(PARAM_APP_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).APP_CTRL_ENABLE=$(PARAM_APP_CTRL_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).APP_DMA_ENABLE=$(PARAM_APP_DMA_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).APP_AXIS_DIRECT_ENABLE=$(PARAM_APP_AXIS_DIRECT_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).APP_AXIS_SYNC_ENABLE=$(PARAM_APP_AXIS_SYNC_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).APP_AXIS_IF_ENABLE=$(PARAM_APP_AXIS_IF_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).APP_STAT_ENABLE=$(PARAM_APP_STAT_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).DMA_IMM_ENABLE=$(PARAM_DMA_IMM_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).DMA_IMM_WIDTH=$(PARAM_DMA_IMM_WIDTH)
COMPILE_ARGS += -P $(TOPLEVEL).DMA_LEN_WIDTH=$(PARAM_DMA_LEN_WIDTH)
COMPILE_ARGS += -P $(TOPLEVEL).DMA_TAG_WIDTH=$(PARAM_DMA_TAG_WIDTH)
COMPILE_ARGS += -P $(TOPLEVEL).RAM_ADDR_WIDTH=$(PARAM_RAM_ADDR_WIDTH)
COMPILE_ARGS += -P $(TOPLEVEL).RAM_PIPELINE=$(PARAM_RAM_PIPELINE)
COMPILE_ARGS += -P $(TOPLEVEL).AXIS_PCIE_DATA_WIDTH=$(PARAM_AXIS_PCIE_DATA_WIDTH)
COMPILE_ARGS += -P $(TOPLEVEL).PF_COUNT=$(PARAM_PF_COUNT)
COMPILE_ARGS += -P $(TOPLEVEL).VF_COUNT=$(PARAM_VF_COUNT)
COMPILE_ARGS += -P $(TOPLEVEL).PCIE_TAG_COUNT=$(PARAM_PCIE_TAG_COUNT)
COMPILE_ARGS += -P $(TOPLEVEL).PCIE_DMA_READ_OP_TABLE_SIZE=$(PARAM_PCIE_DMA_READ_OP_TABLE_SIZE)
COMPILE_ARGS += -P $(TOPLEVEL).PCIE_DMA_READ_TX_LIMIT=$(PARAM_PCIE_DMA_READ_TX_LIMIT)
COMPILE_ARGS += -P $(TOPLEVEL).PCIE_DMA_READ_TX_FC_ENABLE=$(PARAM_PCIE_DMA_READ_TX_FC_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).PCIE_DMA_WRITE_OP_TABLE_SIZE=$(PARAM_PCIE_DMA_WRITE_OP_TABLE_SIZE)
COMPILE_ARGS += -P $(TOPLEVEL).PCIE_DMA_WRITE_TX_LIMIT=$(PARAM_PCIE_DMA_WRITE_TX_LIMIT)
COMPILE_ARGS += -P $(TOPLEVEL).PCIE_DMA_WRITE_TX_FC_ENABLE=$(PARAM_PCIE_DMA_WRITE_TX_FC_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).MSI_COUNT=$(PARAM_MSI_COUNT)
COMPILE_ARGS += -P $(TOPLEVEL).AXIL_CTRL_DATA_WIDTH=$(PARAM_AXIL_CTRL_DATA_WIDTH)
COMPILE_ARGS += -P $(TOPLEVEL).AXIL_CTRL_ADDR_WIDTH=$(PARAM_AXIL_CTRL_ADDR_WIDTH)
COMPILE_ARGS += -P $(TOPLEVEL).AXIL_CSR_PASSTHROUGH_ENABLE=$(PARAM_AXIL_CSR_PASSTHROUGH_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).AXIL_APP_CTRL_DATA_WIDTH=$(PARAM_AXIL_APP_CTRL_DATA_WIDTH)
COMPILE_ARGS += -P $(TOPLEVEL).AXIL_APP_CTRL_ADDR_WIDTH=$(PARAM_AXIL_APP_CTRL_ADDR_WIDTH)
COMPILE_ARGS += -P $(TOPLEVEL).AXIS_ETH_DATA_WIDTH=$(PARAM_AXIS_ETH_DATA_WIDTH)
COMPILE_ARGS += -P $(TOPLEVEL).AXIS_ETH_SYNC_DATA_WIDTH=$(PARAM_AXIS_ETH_SYNC_DATA_WIDTH)
COMPILE_ARGS += -P $(TOPLEVEL).AXIS_ETH_RX_USE_READY=$(PARAM_AXIS_ETH_RX_USE_READY)
COMPILE_ARGS += -P $(TOPLEVEL).AXIS_ETH_TX_PIPELINE=$(PARAM_AXIS_ETH_TX_PIPELINE)
COMPILE_ARGS += -P $(TOPLEVEL).AXIS_ETH_TX_FIFO_PIPELINE=$(PARAM_AXIS_ETH_TX_FIFO_PIPELINE)
COMPILE_ARGS += -P $(TOPLEVEL).AXIS_ETH_TX_TS_PIPELINE=$(PARAM_AXIS_ETH_TX_TS_PIPELINE)
COMPILE_ARGS += -P $(TOPLEVEL).AXIS_ETH_RX_PIPELINE=$(PARAM_AXIS_ETH_RX_PIPELINE)
COMPILE_ARGS += -P $(TOPLEVEL).AXIS_ETH_RX_FIFO_PIPELINE=$(PARAM_AXIS_ETH_RX_FIFO_PIPELINE)
COMPILE_ARGS += -P $(TOPLEVEL).STAT_ENABLE=$(PARAM_STAT_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).STAT_DMA_ENABLE=$(PARAM_STAT_DMA_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).STAT_PCIE_ENABLE=$(PARAM_STAT_PCIE_ENABLE)
COMPILE_ARGS += -P $(TOPLEVEL).STAT_INC_WIDTH=$(PARAM_STAT_INC_WIDTH)
COMPILE_ARGS += -P $(TOPLEVEL).STAT_ID_WIDTH=$(PARAM_STAT_ID_WIDTH)
ifeq ($(WAVES), 1)
VERILOG_SOURCES += iverilog_dump.v
COMPILE_ARGS += -s iverilog_dump
endif
else ifeq ($(SIM), verilator)
COMPILE_ARGS += -Wno-SELRANGE -Wno-WIDTH
COMPILE_ARGS += -GIF_COUNT=$(PARAM_IF_COUNT)
COMPILE_ARGS += -GPORTS_PER_IF=$(PARAM_PORTS_PER_IF)
COMPILE_ARGS += -GSCHED_PER_IF=$(PARAM_SCHED_PER_IF)
COMPILE_ARGS += -GPTP_CLOCK_PIPELINE=$(PARAM_PTP_CLOCK_PIPELINE)
COMPILE_ARGS += -GPTP_USE_SAMPLE_CLOCK=$(PARAM_PTP_USE_SAMPLE_CLOCK)
COMPILE_ARGS += -GPTP_SEPARATE_RX_CLOCK=$(PARAM_PTP_SEPARATE_RX_CLOCK)
COMPILE_ARGS += -GPTP_PORT_CDC_PIPELINE=$(PARAM_PTP_PORT_CDC_PIPELINE)
COMPILE_ARGS += -GPTP_PEROUT_ENABLE=$(PARAM_PTP_PEROUT_ENABLE)
COMPILE_ARGS += -GPTP_PEROUT_COUNT=$(PARAM_PTP_PEROUT_COUNT)
COMPILE_ARGS += -GEVENT_QUEUE_OP_TABLE_SIZE=$(PARAM_EVENT_QUEUE_OP_TABLE_SIZE)
COMPILE_ARGS += -GTX_QUEUE_OP_TABLE_SIZE=$(PARAM_TX_QUEUE_OP_TABLE_SIZE)
COMPILE_ARGS += -GRX_QUEUE_OP_TABLE_SIZE=$(PARAM_RX_QUEUE_OP_TABLE_SIZE)
COMPILE_ARGS += -GTX_CPL_QUEUE_OP_TABLE_SIZE=$(PARAM_TX_CPL_QUEUE_OP_TABLE_SIZE)
COMPILE_ARGS += -GRX_CPL_QUEUE_OP_TABLE_SIZE=$(PARAM_RX_CPL_QUEUE_OP_TABLE_SIZE)
COMPILE_ARGS += -GTX_QUEUE_INDEX_WIDTH=$(PARAM_TX_QUEUE_INDEX_WIDTH)
COMPILE_ARGS += -GRX_QUEUE_INDEX_WIDTH=$(PARAM_RX_QUEUE_INDEX_WIDTH)
COMPILE_ARGS += -GTX_CPL_QUEUE_INDEX_WIDTH=$(PARAM_TX_CPL_QUEUE_INDEX_WIDTH)
COMPILE_ARGS += -GRX_CPL_QUEUE_INDEX_WIDTH=$(PARAM_RX_CPL_QUEUE_INDEX_WIDTH)
COMPILE_ARGS += -GEVENT_QUEUE_PIPELINE=$(PARAM_EVENT_QUEUE_PIPELINE)
COMPILE_ARGS += -GTX_QUEUE_PIPELINE=$(PARAM_TX_QUEUE_PIPELINE)
COMPILE_ARGS += -GRX_QUEUE_PIPELINE=$(PARAM_RX_QUEUE_PIPELINE)
COMPILE_ARGS += -GTX_CPL_QUEUE_PIPELINE=$(PARAM_TX_CPL_QUEUE_PIPELINE)
COMPILE_ARGS += -GRX_CPL_QUEUE_PIPELINE=$(PARAM_RX_CPL_QUEUE_PIPELINE)
COMPILE_ARGS += -GTX_DESC_TABLE_SIZE=$(PARAM_TX_DESC_TABLE_SIZE)
COMPILE_ARGS += -GRX_DESC_TABLE_SIZE=$(PARAM_RX_DESC_TABLE_SIZE)
COMPILE_ARGS += -GTX_SCHEDULER_OP_TABLE_SIZE=$(PARAM_TX_SCHEDULER_OP_TABLE_SIZE)
COMPILE_ARGS += -GTX_SCHEDULER_PIPELINE=$(PARAM_TX_SCHEDULER_PIPELINE)
COMPILE_ARGS += -GTDMA_INDEX_WIDTH=$(PARAM_TDMA_INDEX_WIDTH)
COMPILE_ARGS += -GPTP_TS_ENABLE=$(PARAM_PTP_TS_ENABLE)
COMPILE_ARGS += -GTX_PTP_TS_FIFO_DEPTH=$(PARAM_TX_PTP_TS_FIFO_DEPTH)
COMPILE_ARGS += -GRX_PTP_TS_FIFO_DEPTH=$(PARAM_RX_PTP_TS_FIFO_DEPTH)
COMPILE_ARGS += -GTX_CHECKSUM_ENABLE=$(PARAM_TX_CHECKSUM_ENABLE)
COMPILE_ARGS += -GRX_RSS_ENABLE=$(PARAM_RX_RSS_ENABLE)
COMPILE_ARGS += -GRX_HASH_ENABLE=$(PARAM_RX_HASH_ENABLE)
COMPILE_ARGS += -GRX_CHECKSUM_ENABLE=$(PARAM_RX_CHECKSUM_ENABLE)
COMPILE_ARGS += -GTX_FIFO_DEPTH=$(PARAM_TX_FIFO_DEPTH)
COMPILE_ARGS += -GRX_FIFO_DEPTH=$(PARAM_RX_FIFO_DEPTH)
COMPILE_ARGS += -GMAX_TX_SIZE=$(PARAM_MAX_TX_SIZE)
COMPILE_ARGS += -GMAX_RX_SIZE=$(PARAM_MAX_RX_SIZE)
COMPILE_ARGS += -GTX_RAM_SIZE=$(PARAM_TX_RAM_SIZE)
COMPILE_ARGS += -GRX_RAM_SIZE=$(PARAM_RX_RAM_SIZE)
COMPILE_ARGS += -GAPP_ID=$(PARAM_APP_ID)
COMPILE_ARGS += -GAPP_ENABLE=$(PARAM_APP_ENABLE)
COMPILE_ARGS += -GAPP_CTRL_ENABLE=$(PARAM_APP_CTRL_ENABLE)
COMPILE_ARGS += -GAPP_DMA_ENABLE=$(PARAM_APP_DMA_ENABLE)
COMPILE_ARGS += -GAPP_AXIS_DIRECT_ENABLE=$(PARAM_APP_AXIS_DIRECT_ENABLE)
COMPILE_ARGS += -GAPP_AXIS_SYNC_ENABLE=$(PARAM_APP_AXIS_SYNC_ENABLE)
COMPILE_ARGS += -GAPP_AXIS_IF_ENABLE=$(PARAM_APP_AXIS_IF_ENABLE)
COMPILE_ARGS += -GAPP_STAT_ENABLE=$(PARAM_APP_STAT_ENABLE)
COMPILE_ARGS += -GDMA_IMM_ENABLE=$(PARAM_DMA_IMM_ENABLE)
COMPILE_ARGS += -GDMA_IMM_WIDTH=$(PARAM_DMA_IMM_WIDTH)
COMPILE_ARGS += -GDMA_LEN_WIDTH=$(PARAM_DMA_LEN_WIDTH)
COMPILE_ARGS += -GDMA_TAG_WIDTH=$(PARAM_DMA_TAG_WIDTH)
COMPILE_ARGS += -GRAM_ADDR_WIDTH=$(PARAM_RAM_ADDR_WIDTH)
COMPILE_ARGS += -GRAM_PIPELINE=$(PARAM_RAM_PIPELINE)
COMPILE_ARGS += -GAXIS_PCIE_DATA_WIDTH=$(PARAM_AXIS_PCIE_DATA_WIDTH)
COMPILE_ARGS += -GPF_COUNT=$(PARAM_PF_COUNT)
COMPILE_ARGS += -GVF_COUNT=$(PARAM_VF_COUNT)
COMPILE_ARGS += -GPCIE_TAG_COUNT=$(PARAM_PCIE_TAG_COUNT)
COMPILE_ARGS += -GPCIE_DMA_READ_OP_TABLE_SIZE=$(PARAM_PCIE_DMA_READ_OP_TABLE_SIZE)
COMPILE_ARGS += -GPCIE_DMA_READ_TX_LIMIT=$(PARAM_PCIE_DMA_READ_TX_LIMIT)
COMPILE_ARGS += -GPCIE_DMA_READ_TX_FC_ENABLE=$(PARAM_PCIE_DMA_READ_TX_FC_ENABLE)
COMPILE_ARGS += -GPCIE_DMA_WRITE_OP_TABLE_SIZE=$(PARAM_PCIE_DMA_WRITE_OP_TABLE_SIZE)
COMPILE_ARGS += -GPCIE_DMA_WRITE_TX_LIMIT=$(PARAM_PCIE_DMA_WRITE_TX_LIMIT)
COMPILE_ARGS += -GPCIE_DMA_WRITE_TX_FC_ENABLE=$(PARAM_PCIE_DMA_WRITE_TX_FC_ENABLE)
COMPILE_ARGS += -GMSI_COUNT=$(PARAM_MSI_COUNT)
COMPILE_ARGS += -GAXIL_CTRL_DATA_WIDTH=$(PARAM_AXIL_CTRL_DATA_WIDTH)
COMPILE_ARGS += -GAXIL_CTRL_ADDR_WIDTH=$(PARAM_AXIL_CTRL_ADDR_WIDTH)
COMPILE_ARGS += -GAXIL_CSR_PASSTHROUGH_ENABLE=$(PARAM_AXIL_CSR_PASSTHROUGH_ENABLE)
COMPILE_ARGS += -GAXIL_APP_CTRL_DATA_WIDTH=$(PARAM_AXIL_APP_CTRL_DATA_WIDTH)
COMPILE_ARGS += -GAXIL_APP_CTRL_ADDR_WIDTH=$(PARAM_AXIL_APP_CTRL_ADDR_WIDTH)
COMPILE_ARGS += -GAXIS_ETH_DATA_WIDTH=$(PARAM_AXIS_ETH_DATA_WIDTH)
COMPILE_ARGS += -GAXIS_ETH_SYNC_DATA_WIDTH=$(PARAM_AXIS_ETH_SYNC_DATA_WIDTH)
COMPILE_ARGS += -GAXIS_ETH_RX_USE_READY=$(PARAM_AXIS_ETH_RX_USE_READY)
COMPILE_ARGS += -GAXIS_ETH_TX_PIPELINE=$(PARAM_AXIS_ETH_TX_PIPELINE)
COMPILE_ARGS += -GAXIS_ETH_TX_FIFO_PIPELINE=$(PARAM_AXIS_ETH_TX_FIFO_PIPELINE)
COMPILE_ARGS += -GAXIS_ETH_TX_TS_PIPELINE=$(PARAM_AXIS_ETH_TX_TS_PIPELINE)
COMPILE_ARGS += -GAXIS_ETH_RX_PIPELINE=$(PARAM_AXIS_ETH_RX_PIPELINE)
COMPILE_ARGS += -GAXIS_ETH_RX_FIFO_PIPELINE=$(PARAM_AXIS_ETH_RX_FIFO_PIPELINE)
COMPILE_ARGS += -GSTAT_ENABLE=$(PARAM_STAT_ENABLE)
COMPILE_ARGS += -GSTAT_DMA_ENABLE=$(PARAM_STAT_DMA_ENABLE)
COMPILE_ARGS += -GSTAT_PCIE_ENABLE=$(PARAM_STAT_PCIE_ENABLE)
COMPILE_ARGS += -GSTAT_INC_WIDTH=$(PARAM_STAT_INC_WIDTH)
COMPILE_ARGS += -GSTAT_ID_WIDTH=$(PARAM_STAT_ID_WIDTH)
ifeq ($(WAVES), 1)
COMPILE_ARGS += --trace-fst
endif
endif
include $(shell cocotb-config --makefiles)/Makefile.sim
iverilog_dump.v:
echo 'module iverilog_dump();' > $@
echo 'initial begin' >> $@
echo ' $$dumpfile("$(TOPLEVEL).fst");' >> $@
echo ' $$dumpvars(0, $(TOPLEVEL));' >> $@
echo 'end' >> $@
echo 'endmodule' >> $@
clean::
@rm -rf iverilog_dump.v
@rm -rf dump.fst $(TOPLEVEL).fst

View File

@ -0,0 +1 @@
../../../../common/tb/mqnic.py

View File

@ -0,0 +1,967 @@
"""
Copyright 2021, The Regents of the University of California.
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
1. Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE REGENTS OF THE UNIVERSITY OF CALIFORNIA ''AS
IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
DISCLAIMED. IN NO EVENT SHALL THE REGENTS OF THE UNIVERSITY OF CALIFORNIA OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT
OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING
IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY
OF SUCH DAMAGE.
The views and conclusions contained in the software and documentation are those
of the authors and should not be interpreted as representing official policies,
either expressed or implied, of The Regents of the University of California.
"""
import logging
import os
import sys
import scapy.utils
from scapy.layers.l2 import Ether
from scapy.layers.inet import IP, UDP
import cocotb_test.simulator
import pytest
import cocotb
from cocotb.log import SimLog
from cocotb.clock import Clock
from cocotb.triggers import RisingEdge, FallingEdge, Timer
from cocotbext.axi import AxiStreamBus
from cocotbext.eth import EthMac
from cocotbext.pcie.core import RootComplex
from cocotbext.pcie.xilinx.us import UltraScalePlusPcieDevice
try:
import mqnic
except ImportError:
# attempt import from current directory
sys.path.insert(0, os.path.join(os.path.dirname(__file__)))
try:
import mqnic
finally:
del sys.path[0]
class TB(object):
def __init__(self, dut):
self.dut = dut
self.log = SimLog("cocotb.tb")
self.log.setLevel(logging.DEBUG)
# PCIe
self.rc = RootComplex()
self.rc.max_payload_size = 0x1 # 256 bytes
self.rc.max_read_request_size = 0x2 # 512 bytes
self.dev = UltraScalePlusPcieDevice(
# configuration options
pcie_generation=3,
# pcie_link_width=16,
user_clk_frequency=250e6,
alignment="dword",
cq_cc_straddle=False,
rq_rc_straddle=False,
rc_4tlp_straddle=False,
enable_pf1=False,
enable_client_tag=True,
enable_extended_tag=True,
enable_parity=False,
enable_rx_msg_interface=False,
enable_sriov=False,
enable_extended_configuration=False,
enable_pf0_msi=True,
enable_pf1_msi=False,
# signals
# Clock and Reset Interface
user_clk=dut.clk,
user_reset=dut.rst,
# user_lnk_up
# sys_clk
# sys_clk_gt
# sys_reset
# phy_rdy_out
# Requester reQuest Interface
rq_bus=AxiStreamBus.from_prefix(dut, "m_axis_rq"),
pcie_rq_seq_num0=dut.s_axis_rq_seq_num_0,
pcie_rq_seq_num_vld0=dut.s_axis_rq_seq_num_valid_0,
pcie_rq_seq_num1=dut.s_axis_rq_seq_num_1,
pcie_rq_seq_num_vld1=dut.s_axis_rq_seq_num_valid_1,
# pcie_rq_tag0
# pcie_rq_tag1
# pcie_rq_tag_av
# pcie_rq_tag_vld0
# pcie_rq_tag_vld1
# Requester Completion Interface
rc_bus=AxiStreamBus.from_prefix(dut, "s_axis_rc"),
# Completer reQuest Interface
cq_bus=AxiStreamBus.from_prefix(dut, "s_axis_cq"),
# pcie_cq_np_req
# pcie_cq_np_req_count
# Completer Completion Interface
cc_bus=AxiStreamBus.from_prefix(dut, "m_axis_cc"),
# Transmit Flow Control Interface
# pcie_tfc_nph_av=dut.pcie_tfc_nph_av,
# pcie_tfc_npd_av=dut.pcie_tfc_npd_av,
# Configuration Management Interface
cfg_mgmt_addr=dut.cfg_mgmt_addr,
cfg_mgmt_function_number=dut.cfg_mgmt_function_number,
cfg_mgmt_write=dut.cfg_mgmt_write,
cfg_mgmt_write_data=dut.cfg_mgmt_write_data,
cfg_mgmt_byte_enable=dut.cfg_mgmt_byte_enable,
cfg_mgmt_read=dut.cfg_mgmt_read,
cfg_mgmt_read_data=dut.cfg_mgmt_read_data,
cfg_mgmt_read_write_done=dut.cfg_mgmt_read_write_done,
# cfg_mgmt_debug_access
# Configuration Status Interface
# cfg_phy_link_down
# cfg_phy_link_status
# cfg_negotiated_width
# cfg_current_speed
cfg_max_payload=dut.cfg_max_payload,
cfg_max_read_req=dut.cfg_max_read_req,
# cfg_function_status
# cfg_vf_status
# cfg_function_power_state
# cfg_vf_power_state
# cfg_link_power_state
# cfg_err_cor_out
# cfg_err_nonfatal_out
# cfg_err_fatal_out
# cfg_local_error_out
# cfg_local_error_valid
# cfg_rx_pm_state
# cfg_tx_pm_state
# cfg_ltssm_state
# cfg_rcb_status
# cfg_obff_enable
# cfg_pl_status_change
# cfg_tph_requester_enable
# cfg_tph_st_mode
# cfg_vf_tph_requester_enable
# cfg_vf_tph_st_mode
# Configuration Received Message Interface
# cfg_msg_received
# cfg_msg_received_data
# cfg_msg_received_type
# Configuration Transmit Message Interface
# cfg_msg_transmit
# cfg_msg_transmit_type
# cfg_msg_transmit_data
# cfg_msg_transmit_done
# Configuration Flow Control Interface
cfg_fc_ph=dut.cfg_fc_ph,
cfg_fc_pd=dut.cfg_fc_pd,
cfg_fc_nph=dut.cfg_fc_nph,
cfg_fc_npd=dut.cfg_fc_npd,
cfg_fc_cplh=dut.cfg_fc_cplh,
cfg_fc_cpld=dut.cfg_fc_cpld,
cfg_fc_sel=dut.cfg_fc_sel,
# Configuration Control Interface
# cfg_hot_reset_in
# cfg_hot_reset_out
# cfg_config_space_enable
# cfg_dsn
# cfg_bus_number
# cfg_ds_port_number
# cfg_ds_bus_number
# cfg_ds_device_number
# cfg_ds_function_number
# cfg_power_state_change_ack
# cfg_power_state_change_interrupt
cfg_err_cor_in=dut.status_error_cor,
cfg_err_uncor_in=dut.status_error_uncor,
# cfg_flr_in_process
# cfg_flr_done
# cfg_vf_flr_in_process
# cfg_vf_flr_func_num
# cfg_vf_flr_done
# cfg_pm_aspm_l1_entry_reject
# cfg_pm_aspm_tx_l0s_entry_disable
# cfg_req_pm_transition_l23_ready
# cfg_link_training_enable
# Configuration Interrupt Controller Interface
# cfg_interrupt_int
# cfg_interrupt_sent
# cfg_interrupt_pending
cfg_interrupt_msi_enable=dut.cfg_interrupt_msi_enable,
cfg_interrupt_msi_mmenable=dut.cfg_interrupt_msi_mmenable,
cfg_interrupt_msi_mask_update=dut.cfg_interrupt_msi_mask_update,
cfg_interrupt_msi_data=dut.cfg_interrupt_msi_data,
# cfg_interrupt_msi_select=dut.cfg_interrupt_msi_select,
cfg_interrupt_msi_int=dut.cfg_interrupt_msi_int,
cfg_interrupt_msi_pending_status=dut.cfg_interrupt_msi_pending_status,
cfg_interrupt_msi_pending_status_data_enable=dut.cfg_interrupt_msi_pending_status_data_enable,
# cfg_interrupt_msi_pending_status_function_num=dut.cfg_interrupt_msi_pending_status_function_num,
cfg_interrupt_msi_sent=dut.cfg_interrupt_msi_sent,
cfg_interrupt_msi_fail=dut.cfg_interrupt_msi_fail,
# cfg_interrupt_msix_enable
# cfg_interrupt_msix_mask
# cfg_interrupt_msix_vf_enable
# cfg_interrupt_msix_vf_mask
# cfg_interrupt_msix_address
# cfg_interrupt_msix_data
# cfg_interrupt_msix_int
# cfg_interrupt_msix_vec_pending
# cfg_interrupt_msix_vec_pending_status
cfg_interrupt_msi_attr=dut.cfg_interrupt_msi_attr,
cfg_interrupt_msi_tph_present=dut.cfg_interrupt_msi_tph_present,
cfg_interrupt_msi_tph_type=dut.cfg_interrupt_msi_tph_type,
# cfg_interrupt_msi_tph_st_tag=dut.cfg_interrupt_msi_tph_st_tag,
# cfg_interrupt_msi_function_number=dut.cfg_interrupt_msi_function_number,
# Configuration Extend Interface
# cfg_ext_read_received
# cfg_ext_write_received
# cfg_ext_register_number
# cfg_ext_function_number
# cfg_ext_write_data
# cfg_ext_write_byte_enable
# cfg_ext_read_data
# cfg_ext_read_data_valid
)
# self.dev.log.setLevel(logging.DEBUG)
self.rc.make_port().connect(self.dev)
self.driver = mqnic.Driver()
self.dev.functions[0].msi_cap.msi_multiple_message_capable = 5
self.dev.functions[0].configure_bar(0, 2**len(dut.core_pcie_inst.axil_ctrl_araddr), ext=True, prefetch=True)
if hasattr(dut.core_pcie_inst, 'pcie_app_ctrl'):
self.dev.functions[0].configure_bar(2, 2**len(dut.core_pcie_inst.axil_app_ctrl_araddr), ext=True, prefetch=True)
# Ethernet
self.port_mac = []
eth_int_if_width = len(dut.core_pcie_inst.core_inst.iface[0].port[0].rx_async_fifo_inst.m_axis_tdata)
eth_clock_period = 6.4
eth_speed = 10e9
if eth_int_if_width == 64:
# 10G
eth_clock_period = 6.4
eth_speed = 10e9
elif eth_int_if_width == 128:
# 25G
eth_clock_period = 2.56
eth_speed = 25e9
elif eth_int_if_width == 512:
# 100G
eth_clock_period = 3.102
eth_speed = 100e9
for iface in dut.core_pcie_inst.core_inst.iface:
for port in iface.port:
cocotb.start_soon(Clock(port.port_rx_clk, eth_clock_period, units="ns").start())
cocotb.start_soon(Clock(port.port_tx_clk, eth_clock_period, units="ns").start())
port.port_rx_rst.setimmediatevalue(0)
port.port_tx_rst.setimmediatevalue(0)
mac = EthMac(
tx_clk=port.port_tx_clk,
tx_rst=port.port_tx_rst,
tx_bus=AxiStreamBus.from_prefix(port, "axis_tx"),
tx_ptp_time=port.ptp.tx_ptp_cdc_inst.output_ts,
tx_ptp_ts=port.ptp.axis_tx_ptp_ts,
tx_ptp_ts_tag=port.ptp.axis_tx_ptp_ts_tag,
tx_ptp_ts_valid=port.ptp.axis_tx_ptp_ts_valid,
rx_clk=port.port_rx_clk,
rx_rst=port.port_rx_rst,
rx_bus=AxiStreamBus.from_prefix(port, "axis_rx"),
rx_ptp_time=port.ptp.rx_ptp_cdc_inst.output_ts,
ifg=12, speed=eth_speed
)
self.port_mac.append(mac)
dut.ctrl_reg_wr_wait.setimmediatevalue(0)
dut.ctrl_reg_wr_ack.setimmediatevalue(0)
dut.ctrl_reg_rd_data.setimmediatevalue(0)
dut.ctrl_reg_rd_wait.setimmediatevalue(0)
dut.ctrl_reg_rd_ack.setimmediatevalue(0)
dut.ptp_sample_clk.setimmediatevalue(0)
dut.s_axis_stat_tdata.setimmediatevalue(0)
dut.s_axis_stat_tid.setimmediatevalue(0)
dut.s_axis_stat_tvalid.setimmediatevalue(0)
self.loopback_enable = False
cocotb.start_soon(self._run_loopback())
async def init(self):
for mac in self.port_mac:
mac.rx.reset.setimmediatevalue(0)
mac.tx.reset.setimmediatevalue(0)
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
for mac in self.port_mac:
mac.rx.reset.setimmediatevalue(1)
mac.tx.reset.setimmediatevalue(1)
await FallingEdge(self.dut.rst)
await Timer(100, 'ns')
await RisingEdge(self.dut.clk)
await RisingEdge(self.dut.clk)
for mac in self.port_mac:
mac.rx.reset.setimmediatevalue(0)
mac.tx.reset.setimmediatevalue(0)
await self.rc.enumerate(enable_bus_mastering=True, configure_msi=True)
async def _run_loopback(self):
while True:
await RisingEdge(self.dut.clk)
if self.loopback_enable:
for mac in self.port_mac:
if not mac.tx.empty():
await mac.rx.send(await mac.tx.recv())
@cocotb.test()
async def run_test_nic(dut):
tb = TB(dut)
await tb.init()
tb.log.info("Init driver")
await tb.driver.init_pcie_dev(tb.rc, tb.dev.functions[0].pcie_id)
for interface in tb.driver.interfaces:
await interface.open()
# enable queues
tb.log.info("Enable queues")
for interface in tb.driver.interfaces:
await interface.sched_blocks[0].schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000001)
for k in range(interface.tx_queue_count):
await interface.sched_blocks[0].schedulers[0].hw_regs.write_dword(4*k, 0x00000003)
# wait for all writes to complete
await tb.driver.hw_regs.read_dword(0)
tb.log.info("Init complete")
tb.log.info("Send and receive single packet")
for interface in tb.driver.interfaces:
data = bytearray([x % 256 for x in range(1024)])
await interface.start_xmit(data, 0)
pkt = await tb.port_mac[interface.index*interface.port_count].tx.recv()
tb.log.info("Packet: %s", pkt)
await tb.port_mac[interface.index*interface.port_count].rx.send(pkt)
pkt = await interface.recv()
tb.log.info("Packet: %s", pkt)
assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff
tb.log.info("RX and TX checksum tests")
payload = bytes([x % 256 for x in range(256)])
eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5')
ip = IP(src='192.168.1.100', dst='192.168.1.101')
udp = UDP(sport=1, dport=2)
test_pkt = eth / ip / udp / payload
test_pkt2 = test_pkt.copy()
test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP]))
await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6)
pkt = await tb.port_mac[0].tx.recv()
tb.log.info("Packet: %s", pkt)
await tb.port_mac[0].rx.send(pkt)
pkt = await tb.driver.interfaces[0].recv()
tb.log.info("Packet: %s", pkt)
assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff
assert Ether(pkt.data).build() == test_pkt.build()
tb.log.info("Multiple small packets")
count = 64
pkts = [bytearray([(x+k) % 256 for x in range(60)]) for k in range(count)]
tb.loopback_enable = True
for p in pkts:
await tb.driver.interfaces[0].start_xmit(p, 0)
for k in range(count):
pkt = await tb.driver.interfaces[0].recv()
tb.log.info("Packet: %s", pkt)
assert pkt.data == pkts[k]
assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff
tb.loopback_enable = False
tb.log.info("Multiple large packets")
count = 64
pkts = [bytearray([(x+k) % 256 for x in range(1514)]) for k in range(count)]
tb.loopback_enable = True
for p in pkts:
await tb.driver.interfaces[0].start_xmit(p, 0)
for k in range(count):
pkt = await tb.driver.interfaces[0].recv()
tb.log.info("Packet: %s", pkt)
assert pkt.data == pkts[k]
assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff
tb.loopback_enable = False
tb.log.info("Jumbo frames")
count = 64
pkts = [bytearray([(x+k) % 256 for x in range(9014)]) for k in range(count)]
tb.loopback_enable = True
for p in pkts:
await tb.driver.interfaces[0].start_xmit(p, 0)
for k in range(count):
pkt = await tb.driver.interfaces[0].recv()
tb.log.info("Packet: %s", pkt)
assert pkt.data == pkts[k]
assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff
tb.loopback_enable = False
if len(tb.driver.interfaces) > 1:
tb.log.info("All interfaces")
count = 64
pkts = [bytearray([(x+k) % 256 for x in range(1514)]) for k in range(count)]
tb.loopback_enable = True
for k, p in enumerate(pkts):
await tb.driver.interfaces[k % len(tb.driver.interfaces)].start_xmit(p, 0)
for k in range(count):
pkt = await tb.driver.interfaces[k % len(tb.driver.interfaces)].recv()
tb.log.info("Packet: %s", pkt)
assert pkt.data == pkts[k]
assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff
tb.loopback_enable = False
if len(tb.driver.interfaces[0].sched_blocks) > 1:
tb.log.info("All interface 0 scheduler blocks")
for block in tb.driver.interfaces[0].sched_blocks:
await block.schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000001)
for k in range(block.interface.tx_queue_count):
if k % len(tb.driver.interfaces[0].sched_blocks) == block.index:
await block.schedulers[0].hw_regs.write_dword(4*k, 0x00000003)
else:
await block.schedulers[0].hw_regs.write_dword(4*k, 0x00000000)
count = 64
pkts = [bytearray([(x+k) % 256 for x in range(1514)]) for k in range(count)]
tb.loopback_enable = True
for k, p in enumerate(pkts):
await tb.driver.interfaces[0].start_xmit(p, k % len(tb.driver.interfaces[0].sched_blocks))
for k in range(count):
pkt = await tb.driver.interfaces[0].recv()
tb.log.info("Packet: %s", pkt)
# assert pkt.data == pkts[k]
assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff
tb.loopback_enable = False
for block in tb.driver.interfaces[0].sched_blocks[1:]:
await block.schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000000)
mem = tb.rc.mem_pool.alloc_region(16*1024*1024)
mem_base = mem.get_absolute_address(0)
tb.log.info("Test DMA")
# write packet data
mem[0:1024] = bytearray([x % 256 for x in range(1024)])
# write pcie read descriptor
await tb.driver.app_hw_regs.write_dword(0x000100, (mem_base+0x0000) & 0xffffffff)
await tb.driver.app_hw_regs.write_dword(0x000104, (mem_base+0x0000 >> 32) & 0xffffffff)
await tb.driver.app_hw_regs.write_dword(0x000108, 0x100)
await tb.driver.app_hw_regs.write_dword(0x000110, 0x400)
await tb.driver.app_hw_regs.write_dword(0x000114, 0xAA)
await Timer(2000, 'ns')
# read status
val = await tb.driver.app_hw_regs.read_dword(0x000118)
tb.log.info("Status: 0x%x", val)
assert val == 0x800000AA
# write pcie write descriptor
await tb.driver.app_hw_regs.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff)
await tb.driver.app_hw_regs.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff)
await tb.driver.app_hw_regs.write_dword(0x000208, 0x100)
await tb.driver.app_hw_regs.write_dword(0x000210, 0x400)
await tb.driver.app_hw_regs.write_dword(0x000214, 0x55)
await Timer(2000, 'ns')
# read status
val = await tb.driver.app_hw_regs.read_dword(0x000218)
tb.log.info("Status: 0x%x", val)
assert val == 0x80000055
tb.log.info("%s", mem.hexdump_str(0x1000, 64))
assert mem[0:1024] == mem[0x1000:0x1000+1024]
tb.log.info("Test immediate write")
# write pcie write descriptor
await tb.driver.app_hw_regs.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff)
await tb.driver.app_hw_regs.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff)
await tb.driver.app_hw_regs.write_dword(0x000208, 0x44332211)
await tb.driver.app_hw_regs.write_dword(0x000210, 0x4)
await tb.driver.app_hw_regs.write_dword(0x000214, 0x800000AA)
await Timer(2000, 'ns')
# read status
val = await tb.driver.app_hw_regs.read_dword(0x000218)
tb.log.info("Status: 0x%x", val)
assert val == 0x800000AA
tb.log.info("%s", mem.hexdump_str(0x1000, 64))
assert mem[0x1000:0x1000+4] == b'\x11\x22\x33\x44'
tb.log.info("Test DMA block operations")
region_len = 0x2000
src_offset = 0x0000
dest_offset = 0x4000
block_size = 256
block_stride = block_size
block_count = 32
# write packet data
mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)])
# enable DMA
await tb.driver.app_hw_regs.write_dword(0x000000, 1)
# disable interrupts
await tb.driver.app_hw_regs.write_dword(0x000008, 0)
# configure operation (read)
# DMA base address
await tb.driver.app_hw_regs.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff)
await tb.driver.app_hw_regs.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff)
# DMA offset address
await tb.driver.app_hw_regs.write_dword(0x001088, 0)
await tb.driver.app_hw_regs.write_dword(0x00108c, 0)
# DMA offset mask
await tb.driver.app_hw_regs.write_dword(0x001090, region_len-1)
await tb.driver.app_hw_regs.write_dword(0x001094, 0)
# DMA stride
await tb.driver.app_hw_regs.write_dword(0x001098, block_stride)
await tb.driver.app_hw_regs.write_dword(0x00109c, 0)
# RAM base address
await tb.driver.app_hw_regs.write_dword(0x0010c0, 0)
await tb.driver.app_hw_regs.write_dword(0x0010c4, 0)
# RAM offset address
await tb.driver.app_hw_regs.write_dword(0x0010c8, 0)
await tb.driver.app_hw_regs.write_dword(0x0010cc, 0)
# RAM offset mask
await tb.driver.app_hw_regs.write_dword(0x0010d0, region_len-1)
await tb.driver.app_hw_regs.write_dword(0x0010d4, 0)
# RAM stride
await tb.driver.app_hw_regs.write_dword(0x0010d8, block_stride)
await tb.driver.app_hw_regs.write_dword(0x0010dc, 0)
# clear cycle count
await tb.driver.app_hw_regs.write_dword(0x001008, 0)
await tb.driver.app_hw_regs.write_dword(0x00100c, 0)
# block length
await tb.driver.app_hw_regs.write_dword(0x001010, block_size)
# block count
await tb.driver.app_hw_regs.write_dword(0x001018, block_count)
await tb.driver.app_hw_regs.write_dword(0x00101c, 0)
# start
await tb.driver.app_hw_regs.write_dword(0x001000, 1)
for k in range(10):
cnt = await tb.driver.app_hw_regs.read_dword(0x001018)
await Timer(1000, 'ns')
if cnt == 0:
break
# configure operation (write)
# DMA base address
await tb.driver.app_hw_regs.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff)
await tb.driver.app_hw_regs.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff)
# DMA offset address
await tb.driver.app_hw_regs.write_dword(0x001188, 0)
await tb.driver.app_hw_regs.write_dword(0x00118c, 0)
# DMA offset mask
await tb.driver.app_hw_regs.write_dword(0x001190, region_len-1)
await tb.driver.app_hw_regs.write_dword(0x001194, 0)
# DMA stride
await tb.driver.app_hw_regs.write_dword(0x001198, block_stride)
await tb.driver.app_hw_regs.write_dword(0x00119c, 0)
# RAM base address
await tb.driver.app_hw_regs.write_dword(0x0011c0, 0)
await tb.driver.app_hw_regs.write_dword(0x0011c4, 0)
# RAM offset address
await tb.driver.app_hw_regs.write_dword(0x0011c8, 0)
await tb.driver.app_hw_regs.write_dword(0x0011cc, 0)
# RAM offset mask
await tb.driver.app_hw_regs.write_dword(0x0011d0, region_len-1)
await tb.driver.app_hw_regs.write_dword(0x0011d4, 0)
# RAM stride
await tb.driver.app_hw_regs.write_dword(0x0011d8, block_stride)
await tb.driver.app_hw_regs.write_dword(0x0011dc, 0)
# clear cycle count
await tb.driver.app_hw_regs.write_dword(0x001108, 0)
await tb.driver.app_hw_regs.write_dword(0x00110c, 0)
# block length
await tb.driver.app_hw_regs.write_dword(0x001110, block_size)
# block count
await tb.driver.app_hw_regs.write_dword(0x001118, block_count)
await tb.driver.app_hw_regs.write_dword(0x00111c, 0)
# start
await tb.driver.app_hw_regs.write_dword(0x001100, 1)
for k in range(10):
cnt = await tb.driver.app_hw_regs.read_dword(0x001118)
await Timer(1000, 'ns')
if cnt == 0:
break
tb.log.info("%s", mem.hexdump_str(dest_offset, region_len))
assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len]
tb.log.info("Read statistics counters")
await Timer(2000, 'ns')
lst = []
for k in range(64):
lst.append(await tb.driver.hw_regs.read_dword(0x010000+k*8))
print(lst)
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
# cocotb-test
tests_dir = os.path.dirname(__file__)
rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl'))
lib_dir = os.path.abspath(os.path.join(rtl_dir, '..', 'lib'))
axi_rtl_dir = os.path.abspath(os.path.join(lib_dir, 'axi', 'rtl'))
axis_rtl_dir = os.path.abspath(os.path.join(lib_dir, 'axis', 'rtl'))
eth_rtl_dir = os.path.abspath(os.path.join(lib_dir, 'eth', 'rtl'))
pcie_rtl_dir = os.path.abspath(os.path.join(lib_dir, 'pcie', 'rtl'))
@pytest.mark.parametrize(("if_count", "ports_per_if", "axis_pcie_data_width",
"axis_eth_data_width", "axis_eth_sync_data_width"), [
(1, 1, 256, 64, 64),
(2, 1, 256, 64, 64),
(1, 2, 256, 64, 64),
(1, 1, 256, 64, 128),
(1, 1, 512, 64, 64),
(1, 1, 512, 64, 128),
(1, 1, 512, 512, 512),
])
def test_mqnic_core_pcie_us(request, if_count, ports_per_if, axis_pcie_data_width,
axis_eth_data_width, axis_eth_sync_data_width):
dut = "mqnic_core_pcie_us"
module = os.path.splitext(os.path.basename(__file__))[0]
toplevel = dut
verilog_sources = [
os.path.join(rtl_dir, "common", f"{dut}.v"),
os.path.join(rtl_dir, "common", "mqnic_core.v"),
os.path.join(rtl_dir, "common", "mqnic_core_pcie.v"),
os.path.join(rtl_dir, "common", "mqnic_interface.v"),
os.path.join(rtl_dir, "common", "mqnic_interface_tx.v"),
os.path.join(rtl_dir, "common", "mqnic_interface_rx.v"),
os.path.join(rtl_dir, "common", "mqnic_egress.v"),
os.path.join(rtl_dir, "common", "mqnic_ingress.v"),
os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"),
os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"),
os.path.join(rtl_dir, "common", "mqnic_ptp.v"),
os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"),
os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"),
os.path.join(rtl_dir, "common", "cpl_write.v"),
os.path.join(rtl_dir, "common", "cpl_op_mux.v"),
os.path.join(rtl_dir, "common", "desc_fetch.v"),
os.path.join(rtl_dir, "common", "desc_op_mux.v"),
os.path.join(rtl_dir, "common", "event_mux.v"),
os.path.join(rtl_dir, "common", "queue_manager.v"),
os.path.join(rtl_dir, "common", "cpl_queue_manager.v"),
os.path.join(rtl_dir, "common", "tx_fifo.v"),
os.path.join(rtl_dir, "common", "rx_fifo.v"),
os.path.join(rtl_dir, "common", "tx_req_mux.v"),
os.path.join(rtl_dir, "common", "tx_engine.v"),
os.path.join(rtl_dir, "common", "rx_engine.v"),
os.path.join(rtl_dir, "common", "tx_checksum.v"),
os.path.join(rtl_dir, "common", "rx_hash.v"),
os.path.join(rtl_dir, "common", "rx_checksum.v"),
os.path.join(rtl_dir, "common", "stats_counter.v"),
os.path.join(rtl_dir, "common", "stats_collect.v"),
os.path.join(rtl_dir, "common", "stats_pcie_if.v"),
os.path.join(rtl_dir, "common", "stats_pcie_tlp.v"),
os.path.join(rtl_dir, "common", "stats_dma_if_pcie.v"),
os.path.join(rtl_dir, "common", "stats_dma_latency.v"),
os.path.join(rtl_dir, "common", "mqnic_tx_scheduler_block_rr.v"),
os.path.join(rtl_dir, "common", "tx_scheduler_rr.v"),
os.path.join(rtl_dir, "mqnic_app_block_dma_bench.v"),
os.path.join(eth_rtl_dir, "ptp_clock.v"),
os.path.join(eth_rtl_dir, "ptp_clock_cdc.v"),
os.path.join(eth_rtl_dir, "ptp_perout.v"),
os.path.join(eth_rtl_dir, "ptp_ts_extract.v"),
os.path.join(axi_rtl_dir, "axil_crossbar.v"),
os.path.join(axi_rtl_dir, "axil_crossbar_addr.v"),
os.path.join(axi_rtl_dir, "axil_crossbar_rd.v"),
os.path.join(axi_rtl_dir, "axil_crossbar_wr.v"),
os.path.join(axi_rtl_dir, "axil_reg_if.v"),
os.path.join(axi_rtl_dir, "axil_reg_if_rd.v"),
os.path.join(axi_rtl_dir, "axil_reg_if_wr.v"),
os.path.join(axi_rtl_dir, "axil_register_rd.v"),
os.path.join(axi_rtl_dir, "axil_register_wr.v"),
os.path.join(axi_rtl_dir, "arbiter.v"),
os.path.join(axi_rtl_dir, "priority_encoder.v"),
os.path.join(axis_rtl_dir, "axis_adapter.v"),
os.path.join(axis_rtl_dir, "axis_arb_mux.v"),
os.path.join(axis_rtl_dir, "axis_async_fifo.v"),
os.path.join(axis_rtl_dir, "axis_async_fifo_adapter.v"),
os.path.join(axis_rtl_dir, "axis_demux.v"),
os.path.join(axis_rtl_dir, "axis_fifo.v"),
os.path.join(axis_rtl_dir, "axis_fifo_adapter.v"),
os.path.join(axis_rtl_dir, "axis_pipeline_fifo.v"),
os.path.join(axis_rtl_dir, "axis_register.v"),
os.path.join(pcie_rtl_dir, "pcie_axil_master.v"),
os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"),
os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"),
os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"),
os.path.join(pcie_rtl_dir, "dma_if_pcie.v"),
os.path.join(pcie_rtl_dir, "dma_if_pcie_rd.v"),
os.path.join(pcie_rtl_dir, "dma_if_pcie_wr.v"),
os.path.join(pcie_rtl_dir, "dma_if_mux.v"),
os.path.join(pcie_rtl_dir, "dma_if_mux_rd.v"),
os.path.join(pcie_rtl_dir, "dma_if_mux_wr.v"),
os.path.join(pcie_rtl_dir, "dma_if_desc_mux.v"),
os.path.join(pcie_rtl_dir, "dma_ram_demux_rd.v"),
os.path.join(pcie_rtl_dir, "dma_ram_demux_wr.v"),
os.path.join(pcie_rtl_dir, "dma_psdpram.v"),
os.path.join(pcie_rtl_dir, "dma_client_axis_sink.v"),
os.path.join(pcie_rtl_dir, "dma_client_axis_source.v"),
os.path.join(pcie_rtl_dir, "pcie_us_if.v"),
os.path.join(pcie_rtl_dir, "pcie_us_if_rc.v"),
os.path.join(pcie_rtl_dir, "pcie_us_if_rq.v"),
os.path.join(pcie_rtl_dir, "pcie_us_if_cc.v"),
os.path.join(pcie_rtl_dir, "pcie_us_if_cq.v"),
os.path.join(pcie_rtl_dir, "pcie_us_cfg.v"),
os.path.join(pcie_rtl_dir, "pcie_us_msi.v"),
os.path.join(pcie_rtl_dir, "pulse_merge.v"),
]
parameters = {}
# Structural configuration
parameters['IF_COUNT'] = if_count
parameters['PORTS_PER_IF'] = ports_per_if
parameters['SCHED_PER_IF'] = ports_per_if
# PTP configuration
parameters['PTP_CLOCK_PIPELINE'] = 0
parameters['PTP_USE_SAMPLE_CLOCK'] = 0
parameters['PTP_SEPARATE_RX_CLOCK'] = 0
parameters['PTP_PORT_CDC_PIPELINE'] = 0
parameters['PTP_PEROUT_ENABLE'] = 0
parameters['PTP_PEROUT_COUNT'] = 1
# Queue manager configuration (interface)
parameters['EVENT_QUEUE_OP_TABLE_SIZE'] = 32
parameters['TX_QUEUE_OP_TABLE_SIZE'] = 32
parameters['RX_QUEUE_OP_TABLE_SIZE'] = 32
parameters['TX_CPL_QUEUE_OP_TABLE_SIZE'] = parameters['TX_QUEUE_OP_TABLE_SIZE']
parameters['RX_CPL_QUEUE_OP_TABLE_SIZE'] = parameters['RX_QUEUE_OP_TABLE_SIZE']
parameters['TX_QUEUE_INDEX_WIDTH'] = 13
parameters['RX_QUEUE_INDEX_WIDTH'] = 8
parameters['TX_CPL_QUEUE_INDEX_WIDTH'] = parameters['TX_QUEUE_INDEX_WIDTH']
parameters['RX_CPL_QUEUE_INDEX_WIDTH'] = parameters['RX_QUEUE_INDEX_WIDTH']
parameters['EVENT_QUEUE_PIPELINE'] = 3
parameters['TX_QUEUE_PIPELINE'] = 3 + max(parameters['TX_QUEUE_INDEX_WIDTH']-12, 0)
parameters['RX_QUEUE_PIPELINE'] = 3 + max(parameters['RX_QUEUE_INDEX_WIDTH']-12, 0)
parameters['TX_CPL_QUEUE_PIPELINE'] = parameters['TX_QUEUE_PIPELINE']
parameters['RX_CPL_QUEUE_PIPELINE'] = parameters['RX_QUEUE_PIPELINE']
# TX and RX engine configuration (port)
parameters['TX_DESC_TABLE_SIZE'] = 32
parameters['RX_DESC_TABLE_SIZE'] = 32
# Scheduler configuration (port)
parameters['TX_SCHEDULER_OP_TABLE_SIZE'] = parameters['TX_DESC_TABLE_SIZE']
parameters['TX_SCHEDULER_PIPELINE'] = parameters['TX_QUEUE_PIPELINE']
parameters['TDMA_INDEX_WIDTH'] = 6
# Timestamping configuration (port)
parameters['PTP_TS_ENABLE'] = 1
parameters['TX_PTP_TS_FIFO_DEPTH'] = 32
parameters['RX_PTP_TS_FIFO_DEPTH'] = 32
# Interface configuration (port)
parameters['TX_CHECKSUM_ENABLE'] = 1
parameters['RX_RSS_ENABLE'] = 1
parameters['RX_HASH_ENABLE'] = 1
parameters['RX_CHECKSUM_ENABLE'] = 1
parameters['TX_FIFO_DEPTH'] = 32768
parameters['RX_FIFO_DEPTH'] = 131072
parameters['MAX_TX_SIZE'] = 9214
parameters['MAX_RX_SIZE'] = 9214
parameters['TX_RAM_SIZE'] = 131072
parameters['RX_RAM_SIZE'] = 131072
# Application block configuration
parameters['APP_ID'] = 0x12348001
parameters['APP_ENABLE'] = 1
parameters['APP_CTRL_ENABLE'] = 0
parameters['APP_DMA_ENABLE'] = 1
parameters['APP_AXIS_DIRECT_ENABLE'] = 0
parameters['APP_AXIS_SYNC_ENABLE'] = 0
parameters['APP_AXIS_IF_ENABLE'] = 0
parameters['APP_STAT_ENABLE'] = 0
# DMA interface configuration
parameters['DMA_IMM_ENABLE'] = 1
parameters['DMA_IMM_WIDTH'] = 32
parameters['DMA_LEN_WIDTH'] = 16
parameters['DMA_TAG_WIDTH'] = 16
parameters['RAM_ADDR_WIDTH'] = (max(parameters['TX_RAM_SIZE'], parameters['RX_RAM_SIZE'])-1).bit_length()
parameters['RAM_PIPELINE'] = 2
# PCIe interface configuration
parameters['AXIS_PCIE_DATA_WIDTH'] = axis_pcie_data_width
parameters['PF_COUNT'] = 1
parameters['VF_COUNT'] = 0
parameters['PCIE_TAG_COUNT'] = 64
parameters['PCIE_DMA_READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT']
parameters['PCIE_DMA_READ_TX_LIMIT'] = 16
parameters['PCIE_DMA_READ_TX_FC_ENABLE'] = 1
parameters['PCIE_DMA_WRITE_OP_TABLE_SIZE'] = 16
parameters['PCIE_DMA_WRITE_TX_LIMIT'] = 3
parameters['PCIE_DMA_WRITE_TX_FC_ENABLE'] = 1
parameters['MSI_COUNT'] = 32
# AXI lite interface configuration (control)
parameters['AXIL_CTRL_DATA_WIDTH'] = 32
parameters['AXIL_CTRL_ADDR_WIDTH'] = 24
parameters['AXIL_CSR_PASSTHROUGH_ENABLE'] = 0
# AXI lite interface configuration (application control)
parameters['AXIL_APP_CTRL_DATA_WIDTH'] = parameters['AXIL_CTRL_DATA_WIDTH']
parameters['AXIL_APP_CTRL_ADDR_WIDTH'] = 24
# Ethernet interface configuration
parameters['AXIS_ETH_DATA_WIDTH'] = axis_eth_data_width
parameters['AXIS_ETH_SYNC_DATA_WIDTH'] = axis_eth_sync_data_width
parameters['AXIS_ETH_RX_USE_READY'] = 0
parameters['AXIS_ETH_TX_PIPELINE'] = 0
parameters['AXIS_ETH_TX_FIFO_PIPELINE'] = 2
parameters['AXIS_ETH_TX_TS_PIPELINE'] = 0
parameters['AXIS_ETH_RX_PIPELINE'] = 0
parameters['AXIS_ETH_RX_FIFO_PIPELINE'] = 2
# Statistics counter subsystem
parameters['STAT_ENABLE'] = 1
parameters['STAT_DMA_ENABLE'] = 1
parameters['STAT_PCIE_ENABLE'] = 1
parameters['STAT_INC_WIDTH'] = 24
parameters['STAT_ID_WIDTH'] = 12
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
sim_build = os.path.join(tests_dir, "sim_build",
request.node.name.replace('[', '-').replace(']', ''))
cocotb_test.simulator.run(
python_search=[tests_dir],
verilog_sources=verilog_sources,
toplevel=toplevel,
module=module,
parameters=parameters,
sim_build=sim_build,
extra_env=extra_env,
)