From bee170319903ceef66cae6a4ba6c9cabe9d8d8f3 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Wed, 30 Nov 2022 23:26:05 -0800 Subject: [PATCH] fpga/app/dma_bench: Refactor DMA benchmark application, use register blocks Signed-off-by: Alex Forencich --- .../modules/mqnic_app_dma_bench/main.c | 179 +++-- fpga/app/dma_bench/rtl/dma_bench.v | 654 ++++++++++++++++ .../dma_bench/rtl/mqnic_app_block_dma_bench.v | 695 ++++-------------- .../dma_bench/tb/mqnic_core_pcie_us/Makefile | 1 + .../test_mqnic_core_pcie_us.py | 138 ++-- .../fpga_100g/fpga_app_dma_bench/Makefile | 1 + 6 files changed, 991 insertions(+), 677 deletions(-) create mode 100644 fpga/app/dma_bench/rtl/dma_bench.v diff --git a/fpga/app/dma_bench/modules/mqnic_app_dma_bench/main.c b/fpga/app/dma_bench/modules/mqnic_app_dma_bench/main.c index 6f7b25044..5b7a884b7 100644 --- a/fpga/app/dma_bench/modules/mqnic_app_dma_bench/main.c +++ b/fpga/app/dma_bench/modules/mqnic_app_dma_bench/main.c @@ -52,6 +52,9 @@ struct mqnic_app_dma_bench { void __iomem *app_hw_addr; void __iomem *ram_hw_addr; + struct mqnic_reg_block *rb_list; + struct mqnic_reg_block *dma_bench_rb; + // DMA buffer size_t dma_region_len; void *dma_region; @@ -152,19 +155,19 @@ static void dma_read(struct mqnic_app_dma_bench *app, int new_tag = 0; unsigned long t; - tag = ioread32(app->app_hw_addr + 0x000118); // dummy read - tag = (ioread32(app->app_hw_addr + 0x000118) & 0x7f) + 1; - iowrite32(dma_addr & 0xffffffff, app->app_hw_addr + 0x000100); - iowrite32((dma_addr >> 32) & 0xffffffff, app->app_hw_addr + 0x000104); - iowrite32(ram_addr, app->app_hw_addr + 0x000108); - iowrite32(0, app->app_hw_addr + 0x00010C); - iowrite32(len, app->app_hw_addr + 0x000110); - iowrite32(tag, app->app_hw_addr + 0x000114); + tag = ioread32(app->dma_bench_rb->regs + 0x118); // dummy read + tag = (ioread32(app->dma_bench_rb->regs + 0x118) & 0x7f) + 1; + iowrite32(dma_addr & 0xffffffff, app->dma_bench_rb->regs + 0x100); + iowrite32((dma_addr >> 32) & 0xffffffff, app->dma_bench_rb->regs + 0x104); + iowrite32(ram_addr, app->dma_bench_rb->regs + 0x108); + iowrite32(0, app->dma_bench_rb->regs + 0x10C); + iowrite32(len, app->dma_bench_rb->regs + 0x110); + iowrite32(tag, app->dma_bench_rb->regs + 0x114); // wait for transfer to complete t = jiffies + msecs_to_jiffies(200); while (time_before(jiffies, t)) { - new_tag = (ioread32(app->app_hw_addr + 0x000118) & 0xff); + new_tag = (ioread32(app->dma_bench_rb->regs + 0x118) & 0xff); if (new_tag == tag) break; } @@ -180,19 +183,19 @@ static void dma_write(struct mqnic_app_dma_bench *app, int new_tag = 0; unsigned long t; - tag = ioread32(app->app_hw_addr + 0x000218); // dummy read - tag = (ioread32(app->app_hw_addr + 0x000218) & 0x7f) + 1; - iowrite32(dma_addr & 0xffffffff, app->app_hw_addr + 0x000200); - iowrite32((dma_addr >> 32) & 0xffffffff, app->app_hw_addr + 0x000204); - iowrite32(ram_addr, app->app_hw_addr + 0x000208); - iowrite32(0, app->app_hw_addr + 0x00020C); - iowrite32(len, app->app_hw_addr + 0x000210); - iowrite32(tag, app->app_hw_addr + 0x000214); + tag = ioread32(app->dma_bench_rb->regs + 0x218); // dummy read + tag = (ioread32(app->dma_bench_rb->regs + 0x218) & 0x7f) + 1; + iowrite32(dma_addr & 0xffffffff, app->dma_bench_rb->regs + 0x200); + iowrite32((dma_addr >> 32) & 0xffffffff, app->dma_bench_rb->regs + 0x204); + iowrite32(ram_addr, app->dma_bench_rb->regs + 0x208); + iowrite32(0, app->dma_bench_rb->regs + 0x20C); + iowrite32(len, app->dma_bench_rb->regs + 0x210); + iowrite32(tag, app->dma_bench_rb->regs + 0x214); // wait for transfer to complete t = jiffies + msecs_to_jiffies(200); while (time_before(jiffies, t)) { - new_tag = (ioread32(app->app_hw_addr + 0x000218) & 0xff); + new_tag = (ioread32(app->dma_bench_rb->regs + 0x218) & 0xff); if (new_tag == tag) break; } @@ -211,47 +214,47 @@ static void dma_block_read(struct mqnic_app_dma_bench *app, unsigned long t; // DMA base address - iowrite32(dma_addr & 0xffffffff, app->app_hw_addr + 0x001080); - iowrite32((dma_addr >> 32) & 0xffffffff, app->app_hw_addr + 0x001084); + iowrite32(dma_addr & 0xffffffff, app->dma_bench_rb->regs + 0x380); + iowrite32((dma_addr >> 32) & 0xffffffff, app->dma_bench_rb->regs + 0x384); // DMA offset address - iowrite32(dma_offset & 0xffffffff, app->app_hw_addr + 0x001088); - iowrite32((dma_offset >> 32) & 0xffffffff, app->app_hw_addr + 0x00108c); + iowrite32(dma_offset & 0xffffffff, app->dma_bench_rb->regs + 0x388); + iowrite32((dma_offset >> 32) & 0xffffffff, app->dma_bench_rb->regs + 0x38c); // DMA offset mask - iowrite32(dma_offset_mask & 0xffffffff, app->app_hw_addr + 0x001090); - iowrite32((dma_offset_mask >> 32) & 0xffffffff, app->app_hw_addr + 0x001094); + iowrite32(dma_offset_mask & 0xffffffff, app->dma_bench_rb->regs + 0x390); + iowrite32((dma_offset_mask >> 32) & 0xffffffff, app->dma_bench_rb->regs + 0x394); // DMA stride - iowrite32(dma_stride & 0xffffffff, app->app_hw_addr + 0x001098); - iowrite32((dma_stride >> 32) & 0xffffffff, app->app_hw_addr + 0x00109c); + iowrite32(dma_stride & 0xffffffff, app->dma_bench_rb->regs + 0x398); + iowrite32((dma_stride >> 32) & 0xffffffff, app->dma_bench_rb->regs + 0x39c); // RAM base address - iowrite32(ram_addr & 0xffffffff, app->app_hw_addr + 0x0010c0); - iowrite32((ram_addr >> 32) & 0xffffffff, app->app_hw_addr + 0x0010c4); + iowrite32(ram_addr & 0xffffffff, app->dma_bench_rb->regs + 0x3c0); + iowrite32((ram_addr >> 32) & 0xffffffff, app->dma_bench_rb->regs + 0x3c4); // RAM offset address - iowrite32(ram_offset & 0xffffffff, app->app_hw_addr + 0x0010c8); - iowrite32((ram_offset >> 32) & 0xffffffff, app->app_hw_addr + 0x0010cc); + iowrite32(ram_offset & 0xffffffff, app->dma_bench_rb->regs + 0x3c8); + iowrite32((ram_offset >> 32) & 0xffffffff, app->dma_bench_rb->regs + 0x3cc); // RAM offset mask - iowrite32(ram_offset_mask & 0xffffffff, app->app_hw_addr + 0x0010d0); - iowrite32((ram_offset_mask >> 32) & 0xffffffff, app->app_hw_addr + 0x0010d4); + iowrite32(ram_offset_mask & 0xffffffff, app->dma_bench_rb->regs + 0x3d0); + iowrite32((ram_offset_mask >> 32) & 0xffffffff, app->dma_bench_rb->regs + 0x3d4); // RAM stride - iowrite32(ram_stride & 0xffffffff, app->app_hw_addr + 0x0010d8); - iowrite32((ram_stride >> 32) & 0xffffffff, app->app_hw_addr + 0x0010dc); + iowrite32(ram_stride & 0xffffffff, app->dma_bench_rb->regs + 0x3d8); + iowrite32((ram_stride >> 32) & 0xffffffff, app->dma_bench_rb->regs + 0x3dc); // clear cycle count - iowrite32(0, app->app_hw_addr + 0x001008); - iowrite32(0, app->app_hw_addr + 0x00100c); + iowrite32(0, app->dma_bench_rb->regs + 0x308); + iowrite32(0, app->dma_bench_rb->regs + 0x30c); // block length - iowrite32(block_len, app->app_hw_addr + 0x001010); + iowrite32(block_len, app->dma_bench_rb->regs + 0x310); // block count - iowrite32(block_count, app->app_hw_addr + 0x001018); + iowrite32(block_count, app->dma_bench_rb->regs + 0x318); // start - iowrite32(1, app->app_hw_addr + 0x001000); + iowrite32(1, app->dma_bench_rb->regs + 0x300); // wait for transfer to complete t = jiffies + msecs_to_jiffies(20000); while (time_before(jiffies, t)) { - if ((ioread32(app->app_hw_addr + 0x001000) & 1) == 0) + if ((ioread32(app->dma_bench_rb->regs + 0x300) & 1) == 0) break; } - if ((ioread32(app->app_hw_addr + 0x001000) & 1) != 0) + if ((ioread32(app->dma_bench_rb->regs + 0x300) & 1) != 0) dev_warn(app->dev, "%s: operation timed out", __func__); } @@ -265,47 +268,47 @@ static void dma_block_write(struct mqnic_app_dma_bench *app, unsigned long t; // DMA base address - iowrite32(dma_addr & 0xffffffff, app->app_hw_addr + 0x001180); - iowrite32((dma_addr >> 32) & 0xffffffff, app->app_hw_addr + 0x001184); + iowrite32(dma_addr & 0xffffffff, app->dma_bench_rb->regs + 0x480); + iowrite32((dma_addr >> 32) & 0xffffffff, app->dma_bench_rb->regs + 0x484); // DMA offset address - iowrite32(dma_offset & 0xffffffff, app->app_hw_addr + 0x001188); - iowrite32((dma_offset >> 32) & 0xffffffff, app->app_hw_addr + 0x00118c); + iowrite32(dma_offset & 0xffffffff, app->dma_bench_rb->regs + 0x488); + iowrite32((dma_offset >> 32) & 0xffffffff, app->dma_bench_rb->regs + 0x48c); // DMA offset mask - iowrite32(dma_offset_mask & 0xffffffff, app->app_hw_addr + 0x001190); - iowrite32((dma_offset_mask >> 32) & 0xffffffff, app->app_hw_addr + 0x001194); + iowrite32(dma_offset_mask & 0xffffffff, app->dma_bench_rb->regs + 0x490); + iowrite32((dma_offset_mask >> 32) & 0xffffffff, app->dma_bench_rb->regs + 0x494); // DMA stride - iowrite32(dma_stride & 0xffffffff, app->app_hw_addr + 0x001198); - iowrite32((dma_stride >> 32) & 0xffffffff, app->app_hw_addr + 0x00119c); + iowrite32(dma_stride & 0xffffffff, app->dma_bench_rb->regs + 0x498); + iowrite32((dma_stride >> 32) & 0xffffffff, app->dma_bench_rb->regs + 0x49c); // RAM base address - iowrite32(ram_addr & 0xffffffff, app->app_hw_addr + 0x0011c0); - iowrite32((ram_addr >> 32) & 0xffffffff, app->app_hw_addr + 0x0011c4); + iowrite32(ram_addr & 0xffffffff, app->dma_bench_rb->regs + 0x4c0); + iowrite32((ram_addr >> 32) & 0xffffffff, app->dma_bench_rb->regs + 0x4c4); // RAM offset address - iowrite32(ram_offset & 0xffffffff, app->app_hw_addr + 0x0011c8); - iowrite32((ram_offset >> 32) & 0xffffffff, app->app_hw_addr + 0x0011cc); + iowrite32(ram_offset & 0xffffffff, app->dma_bench_rb->regs + 0x4c8); + iowrite32((ram_offset >> 32) & 0xffffffff, app->dma_bench_rb->regs + 0x4cc); // RAM offset mask - iowrite32(ram_offset_mask & 0xffffffff, app->app_hw_addr + 0x0011d0); - iowrite32((ram_offset_mask >> 32) & 0xffffffff, app->app_hw_addr + 0x0011d4); + iowrite32(ram_offset_mask & 0xffffffff, app->dma_bench_rb->regs + 0x4d0); + iowrite32((ram_offset_mask >> 32) & 0xffffffff, app->dma_bench_rb->regs + 0x4d4); // RAM stride - iowrite32(ram_stride & 0xffffffff, app->app_hw_addr + 0x0011d8); - iowrite32((ram_stride >> 32) & 0xffffffff, app->app_hw_addr + 0x0011dc); + iowrite32(ram_stride & 0xffffffff, app->dma_bench_rb->regs + 0x4d8); + iowrite32((ram_stride >> 32) & 0xffffffff, app->dma_bench_rb->regs + 0x4dc); // clear cycle count - iowrite32(0, app->app_hw_addr + 0x001108); - iowrite32(0, app->app_hw_addr + 0x00110c); + iowrite32(0, app->dma_bench_rb->regs + 0x408); + iowrite32(0, app->dma_bench_rb->regs + 0x40c); // block length - iowrite32(block_len, app->app_hw_addr + 0x001110); + iowrite32(block_len, app->dma_bench_rb->regs + 0x410); // block count - iowrite32(block_count, app->app_hw_addr + 0x001118); + iowrite32(block_count, app->dma_bench_rb->regs + 0x418); // start - iowrite32(1, app->app_hw_addr + 0x001100); + iowrite32(1, app->dma_bench_rb->regs + 0x400); // wait for transfer to complete t = jiffies + msecs_to_jiffies(20000); while (time_before(jiffies, t)) { - if ((ioread32(app->app_hw_addr + 0x001100) & 1) == 0) + if ((ioread32(app->dma_bench_rb->regs + 0x400) & 1) == 0) break; } - if ((ioread32(app->app_hw_addr + 0x001100) & 1) != 0) + if ((ioread32(app->dma_bench_rb->regs + 0x400) & 1) != 0) dev_warn(app->dev, "%s: operation timed out", __func__); } @@ -328,7 +331,7 @@ static void dma_block_read_bench(struct mqnic_app_dma_bench *app, dma_block_read(app, dma_addr, 0, 0x3fff, stride, 0, 0, 0x3fff, stride, size, count); - time = mqnic_core_clk_cycles_to_ns(app->mdev, ioread32(app->app_hw_addr + 0x001008)); + time = mqnic_core_clk_cycles_to_ns(app->mdev, ioread32(app->dma_bench_rb->regs + 0x308)); udelay(5); @@ -361,7 +364,7 @@ static void dma_block_write_bench(struct mqnic_app_dma_bench *app, dma_block_write(app, dma_addr, 0, 0x3fff, stride, 0, 0, 0x3fff, stride, size, count); - time = mqnic_core_clk_cycles_to_ns(app->mdev, ioread32(app->app_hw_addr + 0x001108)); + time = mqnic_core_clk_cycles_to_ns(app->mdev, ioread32(app->dma_bench_rb->regs + 0x408)); udelay(5); @@ -375,12 +378,16 @@ static void dma_block_write_bench(struct mqnic_app_dma_bench *app, req_latency / req_count, size * count * 8 * 1000 / time); } +static void mqnic_app_dma_bench_remove(struct auxiliary_device *adev); + static int mqnic_app_dma_bench_probe(struct auxiliary_device *adev, const struct auxiliary_device_id *id) { + int ret = 0; struct mqnic_app_dma_bench *app; struct mqnic_dev *mdev = container_of(adev, struct mqnic_adev, adev)->mdev; struct device *dev = &adev->dev; + struct mqnic_reg_block *rb; int mismatch = 0; int k; @@ -405,12 +412,33 @@ static int mqnic_app_dma_bench_probe(struct auxiliary_device *adev, app->app_hw_addr = mdev->app_hw_addr; app->ram_hw_addr = mdev->ram_hw_addr; + app->rb_list = mqnic_enumerate_reg_block_list(mdev->app_hw_addr, 0, mdev->app_hw_regs_size); + if (!app->rb_list) { + dev_err(dev, "Failed to enumerate blocks"); + return -EIO; + } + + dev_info(dev, "Application register blocks:"); + for (rb = app->rb_list; rb->regs; rb++) + dev_info(dev, " type 0x%08x (v %d.%d.%d.%d)", rb->type, rb->version >> 24, + (rb->version >> 16) & 0xff, (rb->version >> 8) & 0xff, rb->version & 0xff); + + app->dma_bench_rb = mqnic_find_reg_block(app->rb_list, 0x12348101, 0x00000100, 0); + + if (!app->dma_bench_rb) { + ret = -EIO; + dev_err(dev, "Error: DMA bench register block not found"); + goto fail_rb_init; + } + // Allocate DMA buffer app->dma_region_len = 16 * 1024; app->dma_region = dma_alloc_coherent(app->nic_dev, app->dma_region_len, &app->dma_region_addr, GFP_KERNEL | __GFP_ZERO); - if (!app->dma_region) - return -ENOMEM; + if (!app->dma_region) { + ret = -ENOMEM; + goto fail_dma_alloc; + } dev_info(dev, "Allocated DMA region virt %p, phys %p", app->dma_region, (void *)app->dma_region_addr); @@ -519,7 +547,12 @@ static int mqnic_app_dma_bench_probe(struct auxiliary_device *adev, print_counters(app); return 0; -} + +fail_dma_alloc: +fail_rb_init: + mqnic_app_dma_bench_remove(adev); + return ret; +} static void mqnic_app_dma_bench_remove(struct auxiliary_device *adev) { @@ -528,8 +561,12 @@ static void mqnic_app_dma_bench_remove(struct auxiliary_device *adev) dev_info(dev, "%s() called", __func__); - dma_free_coherent(app->nic_dev, app->dma_region_len, app->dma_region, - app->dma_region_addr); + if (app->dma_region) + dma_free_coherent(app->nic_dev, app->dma_region_len, app->dma_region, + app->dma_region_addr); + + if (app->rb_list) + mqnic_free_reg_block_list(app->rb_list); } static const struct auxiliary_device_id mqnic_app_dma_bench_id_table[] = { diff --git a/fpga/app/dma_bench/rtl/dma_bench.v b/fpga/app/dma_bench/rtl/dma_bench.v new file mode 100644 index 000000000..48f05a177 --- /dev/null +++ b/fpga/app/dma_bench/rtl/dma_bench.v @@ -0,0 +1,654 @@ +/* + +Copyright 2021, The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS OF THE UNIVERSITY OF CALIFORNIA ''AS +IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OF THE UNIVERSITY OF CALIFORNIA OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those +of the authors and should not be interpreted as representing official policies, +either expressed or implied, of The Regents of the University of California. + +*/ + +// Language: Verilog 2001 + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * DMA benchmark module + */ +module dma_bench # +( + // DMA interface configuration + parameter DMA_ADDR_WIDTH = 64, + parameter DMA_IMM_ENABLE = 0, + parameter DMA_IMM_WIDTH = 32, + parameter DMA_LEN_WIDTH = 16, + parameter DMA_TAG_WIDTH = 16, + parameter RAM_SEL_WIDTH = 4, + parameter RAM_ADDR_WIDTH = 16, + parameter RAM_SEG_COUNT = 2, + parameter RAM_SEG_DATA_WIDTH = 256*2/RAM_SEG_COUNT, + parameter RAM_SEG_BE_WIDTH = RAM_SEG_DATA_WIDTH/8, + parameter RAM_SEG_ADDR_WIDTH = RAM_ADDR_WIDTH-$clog2(RAM_SEG_COUNT*RAM_SEG_BE_WIDTH), + parameter RAM_PIPELINE = 2, + + // Register interface + parameter REG_ADDR_WIDTH = 7, + parameter REG_DATA_WIDTH = 32, + parameter REG_STRB_WIDTH = (REG_DATA_WIDTH/8), + parameter RB_BASE_ADDR = 0, + parameter RB_NEXT_PTR = 0 +) +( + input wire clk, + input wire rst, + + /* + * Register interface + */ + input wire [REG_ADDR_WIDTH-1:0] reg_wr_addr, + input wire [REG_DATA_WIDTH-1:0] reg_wr_data, + input wire [REG_STRB_WIDTH-1:0] reg_wr_strb, + input wire reg_wr_en, + output wire reg_wr_wait, + output wire reg_wr_ack, + input wire [REG_ADDR_WIDTH-1:0] reg_rd_addr, + input wire reg_rd_en, + output wire [REG_DATA_WIDTH-1:0] reg_rd_data, + output wire reg_rd_wait, + output wire reg_rd_ack, + + /* + * DMA read descriptor output + */ + output wire [DMA_ADDR_WIDTH-1:0] m_axis_dma_read_desc_dma_addr, + output wire [RAM_SEL_WIDTH-1:0] m_axis_dma_read_desc_ram_sel, + output wire [RAM_ADDR_WIDTH-1:0] m_axis_dma_read_desc_ram_addr, + output wire [DMA_LEN_WIDTH-1:0] m_axis_dma_read_desc_len, + output wire [DMA_TAG_WIDTH-1:0] m_axis_dma_read_desc_tag, + output wire m_axis_dma_read_desc_valid, + input wire m_axis_dma_read_desc_ready, + + /* + * DMA read descriptor status input + */ + input wire [DMA_TAG_WIDTH-1:0] s_axis_dma_read_desc_status_tag, + input wire [3:0] s_axis_dma_read_desc_status_error, + input wire s_axis_dma_read_desc_status_valid, + + /* + * DMA write descriptor output + */ + output wire [DMA_ADDR_WIDTH-1:0] m_axis_dma_write_desc_dma_addr, + output wire [RAM_SEL_WIDTH-1:0] m_axis_dma_write_desc_ram_sel, + output wire [RAM_ADDR_WIDTH-1:0] m_axis_dma_write_desc_ram_addr, + output wire [DMA_IMM_WIDTH-1:0] m_axis_dma_write_desc_imm, + output wire m_axis_dma_write_desc_imm_en, + output wire [DMA_LEN_WIDTH-1:0] m_axis_dma_write_desc_len, + output wire [DMA_TAG_WIDTH-1:0] m_axis_dma_write_desc_tag, + output wire m_axis_dma_write_desc_valid, + input wire m_axis_dma_write_desc_ready, + + /* + * DMA write descriptor status input + */ + input wire [DMA_TAG_WIDTH-1:0] s_axis_dma_write_desc_status_tag, + input wire [3:0] s_axis_dma_write_desc_status_error, + input wire s_axis_dma_write_desc_status_valid, + + /* + * DMA RAM interface + */ + input wire [RAM_SEG_COUNT*RAM_SEL_WIDTH-1:0] dma_ram_wr_cmd_sel, + input wire [RAM_SEG_COUNT*RAM_SEG_BE_WIDTH-1:0] dma_ram_wr_cmd_be, + input wire [RAM_SEG_COUNT*RAM_SEG_ADDR_WIDTH-1:0] dma_ram_wr_cmd_addr, + input wire [RAM_SEG_COUNT*RAM_SEG_DATA_WIDTH-1:0] dma_ram_wr_cmd_data, + input wire [RAM_SEG_COUNT-1:0] dma_ram_wr_cmd_valid, + output wire [RAM_SEG_COUNT-1:0] dma_ram_wr_cmd_ready, + output wire [RAM_SEG_COUNT-1:0] dma_ram_wr_done, + input wire [RAM_SEG_COUNT*RAM_SEL_WIDTH-1:0] dma_ram_rd_cmd_sel, + input wire [RAM_SEG_COUNT*RAM_SEG_ADDR_WIDTH-1:0] dma_ram_rd_cmd_addr, + input wire [RAM_SEG_COUNT-1:0] dma_ram_rd_cmd_valid, + output wire [RAM_SEG_COUNT-1:0] dma_ram_rd_cmd_ready, + output wire [RAM_SEG_COUNT*RAM_SEG_DATA_WIDTH-1:0] dma_ram_rd_resp_data, + output wire [RAM_SEG_COUNT-1:0] dma_ram_rd_resp_valid, + input wire [RAM_SEG_COUNT-1:0] dma_ram_rd_resp_ready +); + +localparam RAM_ADDR_IMM_WIDTH = (DMA_IMM_ENABLE && (DMA_IMM_WIDTH > RAM_ADDR_WIDTH)) ? DMA_IMM_WIDTH : RAM_ADDR_WIDTH; + +localparam RBB = RB_BASE_ADDR & {REG_ADDR_WIDTH{1'b1}}; + +// check configuration +initial begin + if (REG_DATA_WIDTH != 32) begin + $error("Error: Register interface width must be 32 (instance %m)"); + $finish; + end + + if (REG_STRB_WIDTH * 8 != REG_DATA_WIDTH) begin + $error("Error: Register interface requires byte (8-bit) granularity (instance %m)"); + $finish; + end + + if (REG_ADDR_WIDTH < 12) begin + $error("Error: Register address width too narrow (instance %m)"); + $finish; + end + + if (RB_NEXT_PTR && RB_NEXT_PTR >= RB_BASE_ADDR && RB_NEXT_PTR < RB_BASE_ADDR + 13'h1000) begin + $error("Error: RB_NEXT_PTR overlaps block (instance %m)"); + $finish; + end +end + +// control registers +reg reg_wr_ack_reg = 1'b0, reg_wr_ack_next; +reg [REG_DATA_WIDTH-1:0] reg_rd_data_reg = 0, reg_rd_data_next; +reg reg_rd_ack_reg = 1'b0, reg_rd_ack_next; + +reg [63:0] cycle_count_reg = 0; +reg [15:0] dma_read_active_count_reg = 0; +reg [15:0] dma_write_active_count_reg = 0; + +reg [DMA_ADDR_WIDTH-1:0] dma_read_desc_dma_addr_reg = 0, dma_read_desc_dma_addr_next; +reg [RAM_ADDR_WIDTH-1:0] dma_read_desc_ram_addr_reg = 0, dma_read_desc_ram_addr_next; +reg [DMA_LEN_WIDTH-1:0] dma_read_desc_len_reg = 0, dma_read_desc_len_next; +reg [DMA_TAG_WIDTH-1:0] dma_read_desc_tag_reg = 0, dma_read_desc_tag_next; +reg dma_read_desc_valid_reg = 0, dma_read_desc_valid_next; + +reg [DMA_TAG_WIDTH-1:0] dma_read_desc_status_tag_reg = 0, dma_read_desc_status_tag_next; +reg [3:0] dma_read_desc_status_error_reg = 0, dma_read_desc_status_error_next; +reg dma_read_desc_status_valid_reg = 0, dma_read_desc_status_valid_next; + +reg [DMA_ADDR_WIDTH-1:0] dma_write_desc_dma_addr_reg = 0, dma_write_desc_dma_addr_next; +reg [RAM_ADDR_IMM_WIDTH-1:0] dma_write_desc_ram_addr_imm_reg = 0, dma_write_desc_ram_addr_imm_next; +reg dma_write_desc_imm_en_reg = 0, dma_write_desc_imm_en_next; +reg [DMA_LEN_WIDTH-1:0] dma_write_desc_len_reg = 0, dma_write_desc_len_next; +reg [DMA_TAG_WIDTH-1:0] dma_write_desc_tag_reg = 0, dma_write_desc_tag_next; +reg dma_write_desc_valid_reg = 0, dma_write_desc_valid_next; + +reg [DMA_TAG_WIDTH-1:0] dma_write_desc_status_tag_reg = 0, dma_write_desc_status_tag_next; +reg [3:0] dma_write_desc_status_error_reg = 0, dma_write_desc_status_error_next; +reg dma_write_desc_status_valid_reg = 0, dma_write_desc_status_valid_next; + +reg dma_rd_int_en_reg = 0, dma_rd_int_en_next; +reg dma_wr_int_en_reg = 0, dma_wr_int_en_next; + +reg dma_read_block_run_reg = 1'b0, dma_read_block_run_next; +reg [DMA_LEN_WIDTH-1:0] dma_read_block_len_reg = 0, dma_read_block_len_next; +reg [31:0] dma_read_block_count_reg = 0, dma_read_block_count_next; +reg [63:0] dma_read_block_cycle_count_reg = 0, dma_read_block_cycle_count_next; +reg [DMA_ADDR_WIDTH-1:0] dma_read_block_dma_base_addr_reg = 0, dma_read_block_dma_base_addr_next; +reg [DMA_ADDR_WIDTH-1:0] dma_read_block_dma_offset_reg = 0, dma_read_block_dma_offset_next; +reg [DMA_ADDR_WIDTH-1:0] dma_read_block_dma_offset_mask_reg = 0, dma_read_block_dma_offset_mask_next; +reg [DMA_ADDR_WIDTH-1:0] dma_read_block_dma_stride_reg = 0, dma_read_block_dma_stride_next; +reg [RAM_ADDR_WIDTH-1:0] dma_read_block_ram_base_addr_reg = 0, dma_read_block_ram_base_addr_next; +reg [RAM_ADDR_WIDTH-1:0] dma_read_block_ram_offset_reg = 0, dma_read_block_ram_offset_next; +reg [RAM_ADDR_WIDTH-1:0] dma_read_block_ram_offset_mask_reg = 0, dma_read_block_ram_offset_mask_next; +reg [RAM_ADDR_WIDTH-1:0] dma_read_block_ram_stride_reg = 0, dma_read_block_ram_stride_next; + +reg dma_write_block_run_reg = 1'b0, dma_write_block_run_next; +reg [DMA_LEN_WIDTH-1:0] dma_write_block_len_reg = 0, dma_write_block_len_next; +reg [31:0] dma_write_block_count_reg = 0, dma_write_block_count_next; +reg [63:0] dma_write_block_cycle_count_reg = 0, dma_write_block_cycle_count_next; +reg [DMA_ADDR_WIDTH-1:0] dma_write_block_dma_base_addr_reg = 0, dma_write_block_dma_base_addr_next; +reg [DMA_ADDR_WIDTH-1:0] dma_write_block_dma_offset_reg = 0, dma_write_block_dma_offset_next; +reg [DMA_ADDR_WIDTH-1:0] dma_write_block_dma_offset_mask_reg = 0, dma_write_block_dma_offset_mask_next; +reg [DMA_ADDR_WIDTH-1:0] dma_write_block_dma_stride_reg = 0, dma_write_block_dma_stride_next; +reg [RAM_ADDR_WIDTH-1:0] dma_write_block_ram_base_addr_reg = 0, dma_write_block_ram_base_addr_next; +reg [RAM_ADDR_WIDTH-1:0] dma_write_block_ram_offset_reg = 0, dma_write_block_ram_offset_next; +reg [RAM_ADDR_WIDTH-1:0] dma_write_block_ram_offset_mask_reg = 0, dma_write_block_ram_offset_mask_next; +reg [RAM_ADDR_WIDTH-1:0] dma_write_block_ram_stride_reg = 0, dma_write_block_ram_stride_next; + +assign reg_wr_wait = 1'b0; +assign reg_wr_ack = reg_wr_ack_reg; +assign reg_rd_data = reg_rd_data_reg; +assign reg_rd_wait = 1'b0; +assign reg_rd_ack = reg_rd_ack_reg; + +assign m_axis_dma_read_desc_dma_addr = dma_read_desc_dma_addr_reg; +assign m_axis_dma_read_desc_ram_sel = 0; +assign m_axis_dma_read_desc_ram_addr = dma_read_desc_ram_addr_reg; +assign m_axis_dma_read_desc_len = dma_read_desc_len_reg; +assign m_axis_dma_read_desc_tag = dma_read_desc_tag_reg; +assign m_axis_dma_read_desc_valid = dma_read_desc_valid_reg; + +assign m_axis_dma_write_desc_dma_addr = dma_write_desc_dma_addr_reg; +assign m_axis_dma_write_desc_ram_sel = 0; +assign m_axis_dma_write_desc_ram_addr = dma_write_desc_ram_addr_imm_reg; +assign m_axis_dma_write_desc_imm = dma_write_desc_ram_addr_imm_reg; +assign m_axis_dma_write_desc_imm_en = dma_write_desc_imm_en_reg; +assign m_axis_dma_write_desc_len = dma_write_desc_len_reg; +assign m_axis_dma_write_desc_tag = dma_write_desc_tag_reg; +assign m_axis_dma_write_desc_valid = dma_write_desc_valid_reg; + +always @* begin + reg_wr_ack_next = 1'b0; + reg_rd_data_next = 0; + reg_rd_ack_next = 1'b0; + + dma_read_desc_dma_addr_next = dma_read_desc_dma_addr_reg; + dma_read_desc_ram_addr_next = dma_read_desc_ram_addr_reg; + dma_read_desc_len_next = dma_read_desc_len_reg; + dma_read_desc_tag_next = dma_read_desc_tag_reg; + dma_read_desc_valid_next = dma_read_desc_valid_reg && !m_axis_dma_read_desc_ready; + + dma_read_desc_status_tag_next = dma_read_desc_status_tag_reg; + dma_read_desc_status_error_next = dma_read_desc_status_error_reg; + dma_read_desc_status_valid_next = dma_read_desc_status_valid_reg; + + dma_write_desc_dma_addr_next = dma_write_desc_dma_addr_reg; + dma_write_desc_ram_addr_imm_next = dma_write_desc_ram_addr_imm_reg; + dma_write_desc_imm_en_next = dma_write_desc_imm_en_reg; + dma_write_desc_len_next = dma_write_desc_len_reg; + dma_write_desc_tag_next = dma_write_desc_tag_reg; + dma_write_desc_valid_next = dma_write_desc_valid_reg && !m_axis_dma_write_desc_ready; + + dma_write_desc_status_tag_next = dma_write_desc_status_tag_reg; + dma_write_desc_status_error_next = dma_write_desc_status_error_reg; + dma_write_desc_status_valid_next = dma_write_desc_status_valid_reg; + + dma_rd_int_en_next = dma_rd_int_en_reg; + dma_wr_int_en_next = dma_wr_int_en_reg; + + dma_read_block_run_next = dma_read_block_run_reg; + dma_read_block_len_next = dma_read_block_len_reg; + dma_read_block_count_next = dma_read_block_count_reg; + dma_read_block_cycle_count_next = dma_read_block_cycle_count_reg; + dma_read_block_dma_base_addr_next = dma_read_block_dma_base_addr_reg; + dma_read_block_dma_offset_next = dma_read_block_dma_offset_reg; + dma_read_block_dma_offset_mask_next = dma_read_block_dma_offset_mask_reg; + dma_read_block_dma_stride_next = dma_read_block_dma_stride_reg; + dma_read_block_ram_base_addr_next = dma_read_block_ram_base_addr_reg; + dma_read_block_ram_offset_next = dma_read_block_ram_offset_reg; + dma_read_block_ram_offset_mask_next = dma_read_block_ram_offset_mask_reg; + dma_read_block_ram_stride_next = dma_read_block_ram_stride_reg; + + dma_write_block_run_next = dma_write_block_run_reg; + dma_write_block_len_next = dma_write_block_len_reg; + dma_write_block_count_next = dma_write_block_count_reg; + dma_write_block_cycle_count_next = dma_write_block_cycle_count_reg; + dma_write_block_dma_base_addr_next = dma_write_block_dma_base_addr_reg; + dma_write_block_dma_offset_next = dma_write_block_dma_offset_reg; + dma_write_block_dma_offset_mask_next = dma_write_block_dma_offset_mask_reg; + dma_write_block_dma_stride_next = dma_write_block_dma_stride_reg; + dma_write_block_ram_base_addr_next = dma_write_block_ram_base_addr_reg; + dma_write_block_ram_offset_next = dma_write_block_ram_offset_reg; + dma_write_block_ram_offset_mask_next = dma_write_block_ram_offset_mask_reg; + dma_write_block_ram_stride_next = dma_write_block_ram_stride_reg; + + if (reg_wr_en && !reg_wr_ack_reg) begin + // write operation + reg_wr_ack_next = 1'b1; + case ({reg_wr_addr >> 2, 2'b00}) + // control + RBB+12'h00c: begin + dma_rd_int_en_next = reg_wr_data[0]; + dma_wr_int_en_next = reg_wr_data[1]; + end + // single read + RBB+12'h100: dma_read_desc_dma_addr_next[31:0] = reg_wr_data; + RBB+12'h104: dma_read_desc_dma_addr_next[63:32] = reg_wr_data; + RBB+12'h108: dma_read_desc_ram_addr_next = reg_wr_data; + RBB+12'h110: dma_read_desc_len_next = reg_wr_data; + RBB+12'h114: begin + dma_read_desc_tag_next = reg_wr_data; + dma_read_desc_valid_next = 1'b1; + end + // single write + RBB+12'h200: dma_write_desc_dma_addr_next[31:0] = reg_wr_data; + RBB+12'h204: dma_write_desc_dma_addr_next[63:32] = reg_wr_data; + RBB+12'h208: dma_write_desc_ram_addr_imm_next = reg_wr_data; + RBB+12'h210: dma_write_desc_len_next = reg_wr_data; + RBB+12'h214: begin + dma_write_desc_tag_next = reg_wr_data[23:0]; + dma_write_desc_imm_en_next = reg_wr_data[31]; + dma_write_desc_valid_next = 1'b1; + end + // block read + RBB+12'h300: begin + dma_read_block_run_next = reg_wr_data[0]; + end + RBB+12'h308: dma_read_block_cycle_count_next[31:0] = reg_wr_data; + RBB+12'h30c: dma_read_block_cycle_count_next[63:32] = reg_wr_data; + RBB+12'h310: dma_read_block_len_next = reg_wr_data; + RBB+12'h318: dma_read_block_count_next[31:0] = reg_wr_data; + RBB+12'h380: dma_read_block_dma_base_addr_next[31:0] = reg_wr_data; + RBB+12'h384: dma_read_block_dma_base_addr_next[63:32] = reg_wr_data; + RBB+12'h388: dma_read_block_dma_offset_next[31:0] = reg_wr_data; + RBB+12'h38c: dma_read_block_dma_offset_next[63:32] = reg_wr_data; + RBB+12'h390: dma_read_block_dma_offset_mask_next[31:0] = reg_wr_data; + RBB+12'h394: dma_read_block_dma_offset_mask_next[63:32] = reg_wr_data; + RBB+12'h398: dma_read_block_dma_stride_next[31:0] = reg_wr_data; + RBB+12'h39c: dma_read_block_dma_stride_next[63:32] = reg_wr_data; + RBB+12'h3c0: dma_read_block_ram_base_addr_next = reg_wr_data; + RBB+12'h3c8: dma_read_block_ram_offset_next = reg_wr_data; + RBB+12'h3d0: dma_read_block_ram_offset_mask_next = reg_wr_data; + RBB+12'h3d8: dma_read_block_ram_stride_next = reg_wr_data; + // block write + RBB+12'h400: begin + dma_write_block_run_next = reg_wr_data[0]; + end + RBB+12'h408: dma_write_block_cycle_count_next[31:0] = reg_wr_data; + RBB+12'h40c: dma_write_block_cycle_count_next[63:32] = reg_wr_data; + RBB+12'h410: dma_write_block_len_next = reg_wr_data; + RBB+12'h418: dma_write_block_count_next[31:0] = reg_wr_data; + RBB+12'h480: dma_write_block_dma_base_addr_next[31:0] = reg_wr_data; + RBB+12'h484: dma_write_block_dma_base_addr_next[63:32] = reg_wr_data; + RBB+12'h488: dma_write_block_dma_offset_next[31:0] = reg_wr_data; + RBB+12'h48c: dma_write_block_dma_offset_next[63:32] = reg_wr_data; + RBB+12'h490: dma_write_block_dma_offset_mask_next[31:0] = reg_wr_data; + RBB+12'h494: dma_write_block_dma_offset_mask_next[63:32] = reg_wr_data; + RBB+12'h498: dma_write_block_dma_stride_next[31:0] = reg_wr_data; + RBB+12'h49c: dma_write_block_dma_stride_next[63:32] = reg_wr_data; + RBB+12'h4c0: dma_write_block_ram_base_addr_next = reg_wr_data; + RBB+12'h4c8: dma_write_block_ram_offset_next = reg_wr_data; + RBB+12'h4d0: dma_write_block_ram_offset_mask_next = reg_wr_data; + RBB+12'h4d8: dma_write_block_ram_stride_next = reg_wr_data; + default: reg_wr_ack_next = 1'b0; + endcase + end + + if (reg_rd_en && !reg_rd_ack_reg) begin + // read operation + reg_rd_ack_next = 1'b1; + case ({reg_rd_addr >> 2, 2'b00}) + RBB+12'h000: reg_rd_data_next = 32'h12348101; // Type + RBB+12'h004: reg_rd_data_next = 32'h00000100; // Version + RBB+12'h008: reg_rd_data_next = RB_NEXT_PTR; // Next header + // control + RBB+12'h00c: begin + reg_rd_data_next[0] = dma_rd_int_en_reg; + reg_rd_data_next[1] = dma_wr_int_en_reg; + end + RBB+12'h010: reg_rd_data_next = cycle_count_reg; + RBB+12'h014: reg_rd_data_next = cycle_count_reg >> 32; + RBB+12'h020: reg_rd_data_next = dma_read_active_count_reg; + RBB+12'h028: reg_rd_data_next = dma_write_active_count_reg; + // single read + RBB+12'h100: reg_rd_data_next = dma_read_desc_dma_addr_reg; + RBB+12'h104: reg_rd_data_next = dma_read_desc_dma_addr_reg >> 32; + RBB+12'h108: reg_rd_data_next = dma_read_desc_ram_addr_reg; + RBB+12'h10c: reg_rd_data_next = dma_read_desc_ram_addr_reg >> 32; + RBB+12'h110: reg_rd_data_next = dma_read_desc_len_reg; + RBB+12'h114: reg_rd_data_next = dma_read_desc_tag_reg; + RBB+12'h118: begin + reg_rd_data_next[15:0] = dma_read_desc_status_tag_reg; + reg_rd_data_next[27:24] = dma_read_desc_status_error_reg; + reg_rd_data_next[31] = dma_read_desc_status_valid_reg; + dma_read_desc_status_valid_next = 1'b0; + end + // single write + RBB+12'h200: reg_rd_data_next = dma_write_desc_dma_addr_reg; + RBB+12'h204: reg_rd_data_next = dma_write_desc_dma_addr_reg >> 32; + RBB+12'h208: reg_rd_data_next = dma_write_desc_ram_addr_imm_reg; + RBB+12'h20c: reg_rd_data_next = dma_write_desc_ram_addr_imm_reg >> 32; + RBB+12'h210: reg_rd_data_next = dma_write_desc_len_reg; + RBB+12'h214: begin + reg_rd_data_next[23:0] = dma_write_desc_tag_reg; + reg_rd_data_next[31] = dma_write_desc_imm_en_reg; + end + RBB+12'h218: begin + reg_rd_data_next[15:0] = dma_write_desc_status_tag_reg; + reg_rd_data_next[27:24] = dma_write_desc_status_error_reg; + reg_rd_data_next[31] = dma_write_desc_status_valid_reg; + dma_write_desc_status_valid_next = 1'b0; + end + // block read + RBB+12'h300: begin + reg_rd_data_next[0] = dma_read_block_run_reg; + end + RBB+12'h308: reg_rd_data_next = dma_read_block_cycle_count_reg; + RBB+12'h30c: reg_rd_data_next = dma_read_block_cycle_count_reg >> 32; + RBB+12'h310: reg_rd_data_next = dma_read_block_len_reg; + RBB+12'h318: reg_rd_data_next = dma_read_block_count_reg; + RBB+12'h31c: reg_rd_data_next = dma_read_block_count_reg >> 32; + RBB+12'h380: reg_rd_data_next = dma_read_block_dma_base_addr_reg; + RBB+12'h384: reg_rd_data_next = dma_read_block_dma_base_addr_reg >> 32; + RBB+12'h388: reg_rd_data_next = dma_read_block_dma_offset_reg; + RBB+12'h38c: reg_rd_data_next = dma_read_block_dma_offset_reg >> 32; + RBB+12'h390: reg_rd_data_next = dma_read_block_dma_offset_mask_reg; + RBB+12'h394: reg_rd_data_next = dma_read_block_dma_offset_mask_reg >> 32; + RBB+12'h398: reg_rd_data_next = dma_read_block_dma_stride_reg; + RBB+12'h39c: reg_rd_data_next = dma_read_block_dma_stride_reg >> 32; + RBB+12'h3c0: reg_rd_data_next = dma_read_block_ram_base_addr_reg; + RBB+12'h3c4: reg_rd_data_next = dma_read_block_ram_base_addr_reg >> 32; + RBB+12'h3c8: reg_rd_data_next = dma_read_block_ram_offset_reg; + RBB+12'h3cc: reg_rd_data_next = dma_read_block_ram_offset_reg >> 32; + RBB+12'h3d0: reg_rd_data_next = dma_read_block_ram_offset_mask_reg; + RBB+12'h3d4: reg_rd_data_next = dma_read_block_ram_offset_mask_reg >> 32; + RBB+12'h3d8: reg_rd_data_next = dma_read_block_ram_stride_reg; + RBB+12'h3dc: reg_rd_data_next = dma_read_block_ram_stride_reg >> 32; + // block write + RBB+12'h400: begin + reg_rd_data_next[0] = dma_write_block_run_reg; + end + RBB+12'h408: reg_rd_data_next = dma_write_block_cycle_count_reg; + RBB+12'h40c: reg_rd_data_next = dma_write_block_cycle_count_reg >> 32; + RBB+12'h410: reg_rd_data_next = dma_write_block_len_reg; + RBB+12'h418: reg_rd_data_next = dma_write_block_count_reg; + RBB+12'h41c: reg_rd_data_next = dma_write_block_count_reg >> 32; + RBB+12'h480: reg_rd_data_next = dma_write_block_dma_base_addr_reg; + RBB+12'h484: reg_rd_data_next = dma_write_block_dma_base_addr_reg >> 32; + RBB+12'h488: reg_rd_data_next = dma_write_block_dma_offset_reg; + RBB+12'h48c: reg_rd_data_next = dma_write_block_dma_offset_reg >> 32; + RBB+12'h490: reg_rd_data_next = dma_write_block_dma_offset_mask_reg; + RBB+12'h494: reg_rd_data_next = dma_write_block_dma_offset_mask_reg >> 32; + RBB+12'h498: reg_rd_data_next = dma_write_block_dma_stride_reg; + RBB+12'h49c: reg_rd_data_next = dma_write_block_dma_stride_reg >> 32; + RBB+12'h4c0: reg_rd_data_next = dma_write_block_ram_base_addr_reg; + RBB+12'h4c4: reg_rd_data_next = dma_write_block_ram_base_addr_reg >> 32; + RBB+12'h4c8: reg_rd_data_next = dma_write_block_ram_offset_reg; + RBB+12'h4cc: reg_rd_data_next = dma_write_block_ram_offset_reg >> 32; + RBB+12'h4d0: reg_rd_data_next = dma_write_block_ram_offset_mask_reg; + RBB+12'h4d4: reg_rd_data_next = dma_write_block_ram_offset_mask_reg >> 32; + RBB+12'h4d8: reg_rd_data_next = dma_write_block_ram_stride_reg; + RBB+12'h4dc: reg_rd_data_next = dma_write_block_ram_stride_reg >> 32; + default: reg_rd_ack_next = 1'b0; + endcase + end + + // store read response + if (s_axis_dma_read_desc_status_valid) begin + dma_read_desc_status_tag_next = s_axis_dma_read_desc_status_tag; + dma_read_desc_status_error_next = s_axis_dma_read_desc_status_error; + dma_read_desc_status_valid_next = s_axis_dma_read_desc_status_valid; + end + + // store write response + if (s_axis_dma_write_desc_status_valid) begin + dma_write_desc_status_tag_next = s_axis_dma_write_desc_status_tag; + dma_write_desc_status_error_next = s_axis_dma_write_desc_status_error; + dma_write_desc_status_valid_next = s_axis_dma_write_desc_status_valid; + end + + // block read + if (dma_read_block_run_reg) begin + dma_read_block_cycle_count_next = dma_read_block_cycle_count_reg + 1; + + if (dma_read_block_count_reg == 0) begin + if (dma_read_active_count_reg == 0) begin + dma_read_block_run_next = 1'b0; + end + end else begin + if (!dma_read_desc_valid_reg || m_axis_dma_read_desc_ready) begin + dma_read_block_dma_offset_next = dma_read_block_dma_offset_reg + dma_read_block_dma_stride_reg; + dma_read_desc_dma_addr_next = dma_read_block_dma_base_addr_reg + (dma_read_block_dma_offset_reg & dma_read_block_dma_offset_mask_reg); + dma_read_block_ram_offset_next = dma_read_block_ram_offset_reg + dma_read_block_ram_stride_reg; + dma_read_desc_ram_addr_next = dma_read_block_ram_base_addr_reg + (dma_read_block_ram_offset_reg & dma_read_block_ram_offset_mask_reg); + dma_read_desc_len_next = dma_read_block_len_reg; + dma_read_block_count_next = dma_read_block_count_reg - 1; + dma_read_desc_tag_next = dma_read_block_count_reg; + dma_read_desc_valid_next = 1'b1; + end + end + end + + // block write + if (dma_write_block_run_reg) begin + dma_write_block_cycle_count_next = dma_write_block_cycle_count_reg + 1; + + if (dma_write_block_count_reg == 0) begin + if (dma_write_active_count_reg == 0) begin + dma_write_block_run_next = 1'b0; + end + end else begin + if (!dma_write_desc_valid_reg || m_axis_dma_write_desc_ready) begin + dma_write_block_dma_offset_next = dma_write_block_dma_offset_reg + dma_write_block_dma_stride_reg; + dma_write_desc_dma_addr_next = dma_write_block_dma_base_addr_reg + (dma_write_block_dma_offset_reg & dma_write_block_dma_offset_mask_reg); + dma_write_block_ram_offset_next = dma_write_block_ram_offset_reg + dma_write_block_ram_stride_reg; + dma_write_desc_ram_addr_imm_next = dma_write_block_ram_base_addr_reg + (dma_write_block_ram_offset_reg & dma_write_block_ram_offset_mask_reg); + dma_write_desc_imm_en_next = 1'b0; + dma_write_desc_len_next = dma_write_block_len_reg; + dma_write_block_count_next = dma_write_block_count_reg - 1; + dma_write_desc_tag_next = dma_write_block_count_reg; + dma_write_desc_valid_next = 1'b1; + end + end + end +end + +always @(posedge clk) begin + reg_wr_ack_reg <= reg_wr_ack_next; + reg_rd_data_reg <= reg_rd_data_next; + reg_rd_ack_reg <= reg_rd_ack_next; + + cycle_count_reg <= cycle_count_reg + 1; + + dma_read_active_count_reg <= dma_read_active_count_reg + + (m_axis_dma_read_desc_valid && m_axis_dma_read_desc_ready) + - s_axis_dma_read_desc_status_valid; + dma_write_active_count_reg <= dma_write_active_count_reg + + (m_axis_dma_write_desc_valid && m_axis_dma_write_desc_ready) + - s_axis_dma_write_desc_status_valid; + + dma_read_desc_dma_addr_reg <= dma_read_desc_dma_addr_next; + dma_read_desc_ram_addr_reg <= dma_read_desc_ram_addr_next; + dma_read_desc_len_reg <= dma_read_desc_len_next; + dma_read_desc_tag_reg <= dma_read_desc_tag_next; + dma_read_desc_valid_reg <= dma_read_desc_valid_next; + + dma_read_desc_status_tag_reg <= dma_read_desc_status_tag_next; + dma_read_desc_status_error_reg <= dma_read_desc_status_error_next; + dma_read_desc_status_valid_reg <= dma_read_desc_status_valid_next; + + dma_write_desc_dma_addr_reg <= dma_write_desc_dma_addr_next; + dma_write_desc_ram_addr_imm_reg <= dma_write_desc_ram_addr_imm_next; + dma_write_desc_imm_en_reg <= dma_write_desc_imm_en_next; + dma_write_desc_len_reg <= dma_write_desc_len_next; + dma_write_desc_tag_reg <= dma_write_desc_tag_next; + dma_write_desc_valid_reg <= dma_write_desc_valid_next; + + dma_write_desc_status_tag_reg <= dma_write_desc_status_tag_next; + dma_write_desc_status_error_reg <= dma_write_desc_status_error_next; + dma_write_desc_status_valid_reg <= dma_write_desc_status_valid_next; + + dma_rd_int_en_reg <= dma_rd_int_en_next; + dma_wr_int_en_reg <= dma_wr_int_en_next; + + dma_read_block_run_reg <= dma_read_block_run_next; + dma_read_block_len_reg <= dma_read_block_len_next; + dma_read_block_count_reg <= dma_read_block_count_next; + dma_read_block_cycle_count_reg <= dma_read_block_cycle_count_next; + dma_read_block_dma_base_addr_reg <= dma_read_block_dma_base_addr_next; + dma_read_block_dma_offset_reg <= dma_read_block_dma_offset_next; + dma_read_block_dma_offset_mask_reg <= dma_read_block_dma_offset_mask_next; + dma_read_block_dma_stride_reg <= dma_read_block_dma_stride_next; + dma_read_block_ram_base_addr_reg <= dma_read_block_ram_base_addr_next; + dma_read_block_ram_offset_reg <= dma_read_block_ram_offset_next; + dma_read_block_ram_offset_mask_reg <= dma_read_block_ram_offset_mask_next; + dma_read_block_ram_stride_reg <= dma_read_block_ram_stride_next; + + dma_write_block_run_reg <= dma_write_block_run_next; + dma_write_block_len_reg <= dma_write_block_len_next; + dma_write_block_count_reg <= dma_write_block_count_next; + dma_write_block_cycle_count_reg <= dma_write_block_cycle_count_next; + dma_write_block_dma_base_addr_reg <= dma_write_block_dma_base_addr_next; + dma_write_block_dma_offset_reg <= dma_write_block_dma_offset_next; + dma_write_block_dma_offset_mask_reg <= dma_write_block_dma_offset_mask_next; + dma_write_block_dma_stride_reg <= dma_write_block_dma_stride_next; + dma_write_block_ram_base_addr_reg <= dma_write_block_ram_base_addr_next; + dma_write_block_ram_offset_reg <= dma_write_block_ram_offset_next; + dma_write_block_ram_offset_mask_reg <= dma_write_block_ram_offset_mask_next; + dma_write_block_ram_stride_reg <= dma_write_block_ram_stride_next; + + if (rst) begin + reg_wr_ack_reg <= 1'b0; + reg_rd_ack_reg <= 1'b0; + + cycle_count_reg <= 0; + dma_read_active_count_reg <= 0; + dma_write_active_count_reg <= 0; + + dma_read_desc_valid_reg <= 1'b0; + dma_read_desc_status_valid_reg <= 1'b0; + dma_write_desc_valid_reg <= 1'b0; + dma_write_desc_status_valid_reg <= 1'b0; + dma_rd_int_en_reg <= 1'b0; + dma_wr_int_en_reg <= 1'b0; + dma_read_block_run_reg <= 1'b0; + dma_write_block_run_reg <= 1'b0; + end +end + +dma_psdpram #( + .SIZE(16384), + .SEG_COUNT(RAM_SEG_COUNT), + .SEG_DATA_WIDTH(RAM_SEG_DATA_WIDTH), + .SEG_ADDR_WIDTH(RAM_SEG_ADDR_WIDTH), + .SEG_BE_WIDTH(RAM_SEG_BE_WIDTH), + .PIPELINE(2) +) +dma_ram_inst ( + .clk(clk), + .rst(rst), + + /* + * Write port + */ + .wr_cmd_be(dma_ram_wr_cmd_be), + .wr_cmd_addr(dma_ram_wr_cmd_addr), + .wr_cmd_data(dma_ram_wr_cmd_data), + .wr_cmd_valid(dma_ram_wr_cmd_valid), + .wr_cmd_ready(dma_ram_wr_cmd_ready), + .wr_done(dma_ram_wr_done), + + /* + * Read port + */ + .rd_cmd_addr(dma_ram_rd_cmd_addr), + .rd_cmd_valid(dma_ram_rd_cmd_valid), + .rd_cmd_ready(dma_ram_rd_cmd_ready), + .rd_resp_data(dma_ram_rd_resp_data), + .rd_resp_valid(dma_ram_rd_resp_valid), + .rd_resp_ready(dma_ram_rd_resp_ready) +); + +endmodule + +`resetall diff --git a/fpga/app/dma_bench/rtl/mqnic_app_block_dma_bench.v b/fpga/app/dma_bench/rtl/mqnic_app_block_dma_bench.v index 9e8a94e7d..0af4f55f9 100644 --- a/fpga/app/dma_bench/rtl/mqnic_app_block_dma_bench.v +++ b/fpga/app/dma_bench/rtl/mqnic_app_block_dma_bench.v @@ -616,6 +616,15 @@ module mqnic_app_block # input wire jtag_tck ); +localparam REG_ADDR_WIDTH = AXIL_APP_CTRL_ADDR_WIDTH; +localparam REG_DATA_WIDTH = AXIL_APP_CTRL_DATA_WIDTH; +localparam REG_STRB_WIDTH = AXIL_APP_CTRL_STRB_WIDTH; + +localparam RB_BASE_ADDR = 0; +localparam RBB = RB_BASE_ADDR & {AXIL_APP_CTRL_ADDR_WIDTH{1'b1}}; + +localparam DMA_BENCH_RB_BASE_ADDR = RB_BASE_ADDR; + // check configuration initial begin if (APP_ID != 32'h12348001) begin @@ -824,556 +833,164 @@ assign gpio_out = 0; assign jtag_tdo = jtag_tdi; -// AXI lite connections -wire [AXIL_APP_CTRL_ADDR_WIDTH-1:0] axil_csr_awaddr; -wire [2:0] axil_csr_awprot; -wire axil_csr_awvalid; -wire axil_csr_awready; -wire [AXIL_APP_CTRL_DATA_WIDTH-1:0] axil_csr_wdata; -wire [AXIL_APP_CTRL_STRB_WIDTH-1:0] axil_csr_wstrb; -wire axil_csr_wvalid; -wire axil_csr_wready; -wire [1:0] axil_csr_bresp; -wire axil_csr_bvalid; -wire axil_csr_bready; -wire [AXIL_APP_CTRL_ADDR_WIDTH-1:0] axil_csr_araddr; -wire [2:0] axil_csr_arprot; -wire axil_csr_arvalid; -wire axil_csr_arready; -wire [AXIL_APP_CTRL_DATA_WIDTH-1:0] axil_csr_rdata; -wire [1:0] axil_csr_rresp; -wire axil_csr_rvalid; -wire axil_csr_rready; - -assign axil_csr_awaddr = s_axil_app_ctrl_awaddr; -assign axil_csr_awprot = s_axil_app_ctrl_awprot; -assign axil_csr_awvalid = s_axil_app_ctrl_awvalid; -assign s_axil_app_ctrl_awready = axil_csr_awready; -assign axil_csr_wdata = s_axil_app_ctrl_wdata; -assign axil_csr_wstrb = s_axil_app_ctrl_wstrb; -assign axil_csr_wvalid = s_axil_app_ctrl_wvalid; -assign s_axil_app_ctrl_wready = axil_csr_wready; -assign s_axil_app_ctrl_bresp = axil_csr_bresp; -assign s_axil_app_ctrl_bvalid = axil_csr_bvalid; -assign axil_csr_bready = s_axil_app_ctrl_bready; -assign axil_csr_araddr = s_axil_app_ctrl_araddr; -assign axil_csr_arprot = s_axil_app_ctrl_arprot; -assign axil_csr_arvalid = s_axil_app_ctrl_arvalid; -assign s_axil_app_ctrl_arready = axil_csr_arready; -assign s_axil_app_ctrl_rdata = axil_csr_rdata; -assign s_axil_app_ctrl_rresp = axil_csr_rresp; -assign s_axil_app_ctrl_rvalid = axil_csr_rvalid; -assign axil_csr_rready = s_axil_app_ctrl_rready; - // control registers -reg axil_csr_awready_reg = 1'b0, axil_csr_awready_next; -reg axil_csr_wready_reg = 1'b0, axil_csr_wready_next; -reg [1:0] axil_csr_bresp_reg = 2'b00, axil_csr_bresp_next; -reg axil_csr_bvalid_reg = 1'b0, axil_csr_bvalid_next; -reg axil_csr_arready_reg = 1'b0, axil_csr_arready_next; -reg [AXIL_APP_CTRL_DATA_WIDTH-1:0] axil_csr_rdata_reg = 0, axil_csr_rdata_next; -reg [1:0] axil_csr_rresp_reg = 2'b00, axil_csr_rresp_next; -reg axil_csr_rvalid_reg = 1'b0, axil_csr_rvalid_next; +wire [REG_ADDR_WIDTH-1:0] ctrl_reg_wr_addr; +wire [REG_DATA_WIDTH-1:0] ctrl_reg_wr_data; +wire [REG_STRB_WIDTH-1:0] ctrl_reg_wr_strb; +wire ctrl_reg_wr_en; +wire ctrl_reg_wr_wait; +wire ctrl_reg_wr_ack; +wire [REG_ADDR_WIDTH-1:0] ctrl_reg_rd_addr; +wire ctrl_reg_rd_en; +wire [REG_DATA_WIDTH-1:0] ctrl_reg_rd_data; +wire ctrl_reg_rd_wait; +wire ctrl_reg_rd_ack; -reg [63:0] cycle_count_reg = 0; -reg [15:0] dma_read_active_count_reg = 0; -reg [15:0] dma_write_active_count_reg = 0; - -reg [DMA_ADDR_WIDTH-1:0] dma_read_desc_dma_addr_reg = 0, dma_read_desc_dma_addr_next; -reg [RAM_ADDR_WIDTH-1:0] dma_read_desc_ram_addr_reg = 0, dma_read_desc_ram_addr_next; -reg [DMA_LEN_WIDTH-1:0] dma_read_desc_len_reg = 0, dma_read_desc_len_next; -reg [DMA_TAG_WIDTH-1:0] dma_read_desc_tag_reg = 0, dma_read_desc_tag_next; -reg dma_read_desc_valid_reg = 0, dma_read_desc_valid_next; - -reg [DMA_TAG_WIDTH-1:0] dma_read_desc_status_tag_reg = 0, dma_read_desc_status_tag_next; -reg [3:0] dma_read_desc_status_error_reg = 0, dma_read_desc_status_error_next; -reg dma_read_desc_status_valid_reg = 0, dma_read_desc_status_valid_next; - -reg [DMA_ADDR_WIDTH-1:0] dma_write_desc_dma_addr_reg = 0, dma_write_desc_dma_addr_next; -reg [RAM_ADDR_IMM_WIDTH-1:0] dma_write_desc_ram_addr_imm_reg = 0, dma_write_desc_ram_addr_imm_next; -reg dma_write_desc_imm_en_reg = 0, dma_write_desc_imm_en_next; -reg [DMA_LEN_WIDTH-1:0] dma_write_desc_len_reg = 0, dma_write_desc_len_next; -reg [DMA_TAG_WIDTH-1:0] dma_write_desc_tag_reg = 0, dma_write_desc_tag_next; -reg dma_write_desc_valid_reg = 0, dma_write_desc_valid_next; - -reg [DMA_TAG_WIDTH-1:0] dma_write_desc_status_tag_reg = 0, dma_write_desc_status_tag_next; -reg [3:0] dma_write_desc_status_error_reg = 0, dma_write_desc_status_error_next; -reg dma_write_desc_status_valid_reg = 0, dma_write_desc_status_valid_next; - -reg dma_rd_int_en_reg = 0, dma_rd_int_en_next; -reg dma_wr_int_en_reg = 0, dma_wr_int_en_next; - -reg dma_read_block_run_reg = 1'b0, dma_read_block_run_next; -reg [DMA_LEN_WIDTH-1:0] dma_read_block_len_reg = 0, dma_read_block_len_next; -reg [31:0] dma_read_block_count_reg = 0, dma_read_block_count_next; -reg [63:0] dma_read_block_cycle_count_reg = 0, dma_read_block_cycle_count_next; -reg [DMA_ADDR_WIDTH-1:0] dma_read_block_dma_base_addr_reg = 0, dma_read_block_dma_base_addr_next; -reg [DMA_ADDR_WIDTH-1:0] dma_read_block_dma_offset_reg = 0, dma_read_block_dma_offset_next; -reg [DMA_ADDR_WIDTH-1:0] dma_read_block_dma_offset_mask_reg = 0, dma_read_block_dma_offset_mask_next; -reg [DMA_ADDR_WIDTH-1:0] dma_read_block_dma_stride_reg = 0, dma_read_block_dma_stride_next; -reg [RAM_ADDR_WIDTH-1:0] dma_read_block_ram_base_addr_reg = 0, dma_read_block_ram_base_addr_next; -reg [RAM_ADDR_WIDTH-1:0] dma_read_block_ram_offset_reg = 0, dma_read_block_ram_offset_next; -reg [RAM_ADDR_WIDTH-1:0] dma_read_block_ram_offset_mask_reg = 0, dma_read_block_ram_offset_mask_next; -reg [RAM_ADDR_WIDTH-1:0] dma_read_block_ram_stride_reg = 0, dma_read_block_ram_stride_next; - -reg dma_write_block_run_reg = 1'b0, dma_write_block_run_next; -reg [DMA_LEN_WIDTH-1:0] dma_write_block_len_reg = 0, dma_write_block_len_next; -reg [31:0] dma_write_block_count_reg = 0, dma_write_block_count_next; -reg [63:0] dma_write_block_cycle_count_reg = 0, dma_write_block_cycle_count_next; -reg [DMA_ADDR_WIDTH-1:0] dma_write_block_dma_base_addr_reg = 0, dma_write_block_dma_base_addr_next; -reg [DMA_ADDR_WIDTH-1:0] dma_write_block_dma_offset_reg = 0, dma_write_block_dma_offset_next; -reg [DMA_ADDR_WIDTH-1:0] dma_write_block_dma_offset_mask_reg = 0, dma_write_block_dma_offset_mask_next; -reg [DMA_ADDR_WIDTH-1:0] dma_write_block_dma_stride_reg = 0, dma_write_block_dma_stride_next; -reg [RAM_ADDR_WIDTH-1:0] dma_write_block_ram_base_addr_reg = 0, dma_write_block_ram_base_addr_next; -reg [RAM_ADDR_WIDTH-1:0] dma_write_block_ram_offset_reg = 0, dma_write_block_ram_offset_next; -reg [RAM_ADDR_WIDTH-1:0] dma_write_block_ram_offset_mask_reg = 0, dma_write_block_ram_offset_mask_next; -reg [RAM_ADDR_WIDTH-1:0] dma_write_block_ram_stride_reg = 0, dma_write_block_ram_stride_next; - -assign axil_csr_awready = axil_csr_awready_reg; -assign axil_csr_wready = axil_csr_wready_reg; -assign axil_csr_bresp = axil_csr_bresp_reg; -assign axil_csr_bvalid = axil_csr_bvalid_reg; -assign axil_csr_arready = axil_csr_arready_reg; -assign axil_csr_rdata = axil_csr_rdata_reg; -assign axil_csr_rresp = axil_csr_rresp_reg; -assign axil_csr_rvalid = axil_csr_rvalid_reg; - -assign m_axis_data_dma_read_desc_dma_addr = dma_read_desc_dma_addr_reg; -assign m_axis_data_dma_read_desc_ram_sel = 0; -assign m_axis_data_dma_read_desc_ram_addr = dma_read_desc_ram_addr_reg; -assign m_axis_data_dma_read_desc_len = dma_read_desc_len_reg; -assign m_axis_data_dma_read_desc_tag = dma_read_desc_tag_reg; -assign m_axis_data_dma_read_desc_valid = dma_read_desc_valid_reg; - -assign m_axis_data_dma_write_desc_dma_addr = dma_write_desc_dma_addr_reg; -assign m_axis_data_dma_write_desc_ram_sel = 0; -assign m_axis_data_dma_write_desc_ram_addr = dma_write_desc_ram_addr_imm_reg; -assign m_axis_data_dma_write_desc_imm = dma_write_desc_ram_addr_imm_reg; -assign m_axis_data_dma_write_desc_imm_en = dma_write_desc_imm_en_reg; -assign m_axis_data_dma_write_desc_len = dma_write_desc_len_reg; -assign m_axis_data_dma_write_desc_tag = dma_write_desc_tag_reg; -assign m_axis_data_dma_write_desc_valid = dma_write_desc_valid_reg; - -always @* begin - axil_csr_awready_next = 1'b0; - axil_csr_wready_next = 1'b0; - axil_csr_bresp_next = 2'b00; - axil_csr_bvalid_next = axil_csr_bvalid_reg && !axil_csr_bready; - axil_csr_arready_next = 1'b0; - axil_csr_rdata_next = axil_csr_rdata_reg; - axil_csr_rresp_next = 2'b00; - axil_csr_rvalid_next = axil_csr_rvalid_reg && !axil_csr_rready; - - dma_read_desc_dma_addr_next = dma_read_desc_dma_addr_reg; - dma_read_desc_ram_addr_next = dma_read_desc_ram_addr_reg; - dma_read_desc_len_next = dma_read_desc_len_reg; - dma_read_desc_tag_next = dma_read_desc_tag_reg; - dma_read_desc_valid_next = dma_read_desc_valid_reg && !m_axis_data_dma_read_desc_ready; - - dma_read_desc_status_tag_next = dma_read_desc_status_tag_reg; - dma_read_desc_status_error_next = dma_read_desc_status_error_reg; - dma_read_desc_status_valid_next = dma_read_desc_status_valid_reg; - - dma_write_desc_dma_addr_next = dma_write_desc_dma_addr_reg; - dma_write_desc_ram_addr_imm_next = dma_write_desc_ram_addr_imm_reg; - dma_write_desc_imm_en_next = dma_write_desc_imm_en_reg; - dma_write_desc_len_next = dma_write_desc_len_reg; - dma_write_desc_tag_next = dma_write_desc_tag_reg; - dma_write_desc_valid_next = dma_write_desc_valid_reg && !m_axis_data_dma_write_desc_ready; - - dma_write_desc_status_tag_next = dma_write_desc_status_tag_reg; - dma_write_desc_status_error_next = dma_write_desc_status_error_reg; - dma_write_desc_status_valid_next = dma_write_desc_status_valid_reg; - - dma_rd_int_en_next = dma_rd_int_en_reg; - dma_wr_int_en_next = dma_wr_int_en_reg; - - dma_read_block_run_next = dma_read_block_run_reg; - dma_read_block_len_next = dma_read_block_len_reg; - dma_read_block_count_next = dma_read_block_count_reg; - dma_read_block_cycle_count_next = dma_read_block_cycle_count_reg; - dma_read_block_dma_base_addr_next = dma_read_block_dma_base_addr_reg; - dma_read_block_dma_offset_next = dma_read_block_dma_offset_reg; - dma_read_block_dma_offset_mask_next = dma_read_block_dma_offset_mask_reg; - dma_read_block_dma_stride_next = dma_read_block_dma_stride_reg; - dma_read_block_ram_base_addr_next = dma_read_block_ram_base_addr_reg; - dma_read_block_ram_offset_next = dma_read_block_ram_offset_reg; - dma_read_block_ram_offset_mask_next = dma_read_block_ram_offset_mask_reg; - dma_read_block_ram_stride_next = dma_read_block_ram_stride_reg; - - dma_write_block_run_next = dma_write_block_run_reg; - dma_write_block_len_next = dma_write_block_len_reg; - dma_write_block_count_next = dma_write_block_count_reg; - dma_write_block_cycle_count_next = dma_write_block_cycle_count_reg; - dma_write_block_dma_base_addr_next = dma_write_block_dma_base_addr_reg; - dma_write_block_dma_offset_next = dma_write_block_dma_offset_reg; - dma_write_block_dma_offset_mask_next = dma_write_block_dma_offset_mask_reg; - dma_write_block_dma_stride_next = dma_write_block_dma_stride_reg; - dma_write_block_ram_base_addr_next = dma_write_block_ram_base_addr_reg; - dma_write_block_ram_offset_next = dma_write_block_ram_offset_reg; - dma_write_block_ram_offset_mask_next = dma_write_block_ram_offset_mask_reg; - dma_write_block_ram_stride_next = dma_write_block_ram_stride_reg; - - if (axil_csr_awvalid && axil_csr_wvalid && !axil_csr_bvalid_reg) begin - // write operation - axil_csr_awready_next = 1'b1; - axil_csr_wready_next = 1'b1; - axil_csr_bresp_next = 2'b00; - axil_csr_bvalid_next = 1'b1; - - case ({axil_csr_awaddr[15:2], 2'b00}) - // control - 16'h0000: begin - end - 16'h0008: begin - dma_rd_int_en_next = axil_csr_wdata[0]; - dma_wr_int_en_next = axil_csr_wdata[1]; - end - // single read - 16'h0100: dma_read_desc_dma_addr_next[31:0] = axil_csr_wdata; - 16'h0104: dma_read_desc_dma_addr_next[63:32] = axil_csr_wdata; - 16'h0108: dma_read_desc_ram_addr_next = axil_csr_wdata; - 16'h0110: dma_read_desc_len_next = axil_csr_wdata; - 16'h0114: begin - dma_read_desc_tag_next = axil_csr_wdata; - dma_read_desc_valid_next = 1'b1; - end - // single write - 16'h0200: dma_write_desc_dma_addr_next[31:0] = axil_csr_wdata; - 16'h0204: dma_write_desc_dma_addr_next[63:32] = axil_csr_wdata; - 16'h0208: dma_write_desc_ram_addr_imm_next = axil_csr_wdata; - 16'h0210: dma_write_desc_len_next = axil_csr_wdata; - 16'h0214: begin - dma_write_desc_tag_next = axil_csr_wdata[23:0]; - dma_write_desc_imm_en_next = axil_csr_wdata[31]; - dma_write_desc_valid_next = 1'b1; - end - // block read - 16'h1000: begin - dma_read_block_run_next = axil_csr_wdata[0]; - end - 16'h1008: dma_read_block_cycle_count_next[31:0] = axil_csr_wdata; - 16'h100c: dma_read_block_cycle_count_next[63:32] = axil_csr_wdata; - 16'h1010: dma_read_block_len_next = axil_csr_wdata; - 16'h1018: dma_read_block_count_next[31:0] = axil_csr_wdata; - 16'h1080: dma_read_block_dma_base_addr_next[31:0] = axil_csr_wdata; - 16'h1084: dma_read_block_dma_base_addr_next[63:32] = axil_csr_wdata; - 16'h1088: dma_read_block_dma_offset_next[31:0] = axil_csr_wdata; - 16'h108c: dma_read_block_dma_offset_next[63:32] = axil_csr_wdata; - 16'h1090: dma_read_block_dma_offset_mask_next[31:0] = axil_csr_wdata; - 16'h1094: dma_read_block_dma_offset_mask_next[63:32] = axil_csr_wdata; - 16'h1098: dma_read_block_dma_stride_next[31:0] = axil_csr_wdata; - 16'h109c: dma_read_block_dma_stride_next[63:32] = axil_csr_wdata; - 16'h10c0: dma_read_block_ram_base_addr_next = axil_csr_wdata; - 16'h10c8: dma_read_block_ram_offset_next = axil_csr_wdata; - 16'h10d0: dma_read_block_ram_offset_mask_next = axil_csr_wdata; - 16'h10d8: dma_read_block_ram_stride_next = axil_csr_wdata; - // block write - 16'h1100: begin - dma_write_block_run_next = axil_csr_wdata[0]; - end - 16'h1108: dma_write_block_cycle_count_next[31:0] = axil_csr_wdata; - 16'h110c: dma_write_block_cycle_count_next[63:32] = axil_csr_wdata; - 16'h1110: dma_write_block_len_next = axil_csr_wdata; - 16'h1118: dma_write_block_count_next[31:0] = axil_csr_wdata; - 16'h1180: dma_write_block_dma_base_addr_next[31:0] = axil_csr_wdata; - 16'h1184: dma_write_block_dma_base_addr_next[63:32] = axil_csr_wdata; - 16'h1188: dma_write_block_dma_offset_next[31:0] = axil_csr_wdata; - 16'h118c: dma_write_block_dma_offset_next[63:32] = axil_csr_wdata; - 16'h1190: dma_write_block_dma_offset_mask_next[31:0] = axil_csr_wdata; - 16'h1194: dma_write_block_dma_offset_mask_next[63:32] = axil_csr_wdata; - 16'h1198: dma_write_block_dma_stride_next[31:0] = axil_csr_wdata; - 16'h119c: dma_write_block_dma_stride_next[63:32] = axil_csr_wdata; - 16'h11c0: dma_write_block_ram_base_addr_next = axil_csr_wdata; - 16'h11c8: dma_write_block_ram_offset_next = axil_csr_wdata; - 16'h11d0: dma_write_block_ram_offset_mask_next = axil_csr_wdata; - 16'h11d8: dma_write_block_ram_stride_next = axil_csr_wdata; - endcase - end - - if (axil_csr_arvalid && !axil_csr_rvalid_reg) begin - // read operation - axil_csr_arready_next = 1'b1; - axil_csr_rresp_next = 2'b00; - axil_csr_rvalid_next = 1'b1; - axil_csr_rdata_next = 32'd0; - - case ({axil_csr_araddr[15:2], 2'b00}) - // control - 16'h0000: begin - end - 16'h0008: begin - axil_csr_rdata_next[0] = dma_rd_int_en_reg; - axil_csr_rdata_next[1] = dma_wr_int_en_reg; - end - 16'h0010: axil_csr_rdata_next = cycle_count_reg; - 16'h0014: axil_csr_rdata_next = cycle_count_reg >> 32; - 16'h0020: axil_csr_rdata_next = dma_read_active_count_reg; - 16'h0028: axil_csr_rdata_next = dma_write_active_count_reg; - // single read - 16'h0100: axil_csr_rdata_next = dma_read_desc_dma_addr_reg; - 16'h0104: axil_csr_rdata_next = dma_read_desc_dma_addr_reg >> 32; - 16'h0108: axil_csr_rdata_next = dma_read_desc_ram_addr_reg; - 16'h010c: axil_csr_rdata_next = dma_read_desc_ram_addr_reg >> 32; - 16'h0110: axil_csr_rdata_next = dma_read_desc_len_reg; - 16'h0114: axil_csr_rdata_next = dma_read_desc_tag_reg; - 16'h0118: begin - axil_csr_rdata_next[15:0] = dma_read_desc_status_tag_reg; - axil_csr_rdata_next[27:24] = dma_read_desc_status_error_reg; - axil_csr_rdata_next[31] = dma_read_desc_status_valid_reg; - dma_read_desc_status_valid_next = 1'b0; - end - // single write - 16'h0200: axil_csr_rdata_next = dma_write_desc_dma_addr_reg; - 16'h0204: axil_csr_rdata_next = dma_write_desc_dma_addr_reg >> 32; - 16'h0208: axil_csr_rdata_next = dma_write_desc_ram_addr_imm_reg; - 16'h020c: axil_csr_rdata_next = dma_write_desc_ram_addr_imm_reg >> 32; - 16'h0210: axil_csr_rdata_next = dma_write_desc_len_reg; - 16'h0214: begin - axil_csr_rdata_next[23:0] = dma_write_desc_tag_reg; - axil_csr_rdata_next[31] = dma_write_desc_imm_en_reg; - end - 16'h0218: begin - axil_csr_rdata_next[15:0] = dma_write_desc_status_tag_reg; - axil_csr_rdata_next[27:24] = dma_write_desc_status_error_reg; - axil_csr_rdata_next[31] = dma_write_desc_status_valid_reg; - dma_write_desc_status_valid_next = 1'b0; - end - // block read - 16'h1000: begin - axil_csr_rdata_next[0] = dma_read_block_run_reg; - end - 16'h1008: axil_csr_rdata_next = dma_read_block_cycle_count_reg; - 16'h100c: axil_csr_rdata_next = dma_read_block_cycle_count_reg >> 32; - 16'h1010: axil_csr_rdata_next = dma_read_block_len_reg; - 16'h1018: axil_csr_rdata_next = dma_read_block_count_reg; - 16'h101c: axil_csr_rdata_next = dma_read_block_count_reg >> 32; - 16'h1080: axil_csr_rdata_next = dma_read_block_dma_base_addr_reg; - 16'h1084: axil_csr_rdata_next = dma_read_block_dma_base_addr_reg >> 32; - 16'h1088: axil_csr_rdata_next = dma_read_block_dma_offset_reg; - 16'h108c: axil_csr_rdata_next = dma_read_block_dma_offset_reg >> 32; - 16'h1090: axil_csr_rdata_next = dma_read_block_dma_offset_mask_reg; - 16'h1094: axil_csr_rdata_next = dma_read_block_dma_offset_mask_reg >> 32; - 16'h1098: axil_csr_rdata_next = dma_read_block_dma_stride_reg; - 16'h109c: axil_csr_rdata_next = dma_read_block_dma_stride_reg >> 32; - 16'h10c0: axil_csr_rdata_next = dma_read_block_ram_base_addr_reg; - 16'h10c4: axil_csr_rdata_next = dma_read_block_ram_base_addr_reg >> 32; - 16'h10c8: axil_csr_rdata_next = dma_read_block_ram_offset_reg; - 16'h10cc: axil_csr_rdata_next = dma_read_block_ram_offset_reg >> 32; - 16'h10d0: axil_csr_rdata_next = dma_read_block_ram_offset_mask_reg; - 16'h10d4: axil_csr_rdata_next = dma_read_block_ram_offset_mask_reg >> 32; - 16'h10d8: axil_csr_rdata_next = dma_read_block_ram_stride_reg; - 16'h10dc: axil_csr_rdata_next = dma_read_block_ram_stride_reg >> 32; - // block write - 16'h1100: begin - axil_csr_rdata_next[0] = dma_write_block_run_reg; - end - 16'h1108: axil_csr_rdata_next = dma_write_block_cycle_count_reg; - 16'h110c: axil_csr_rdata_next = dma_write_block_cycle_count_reg >> 32; - 16'h1110: axil_csr_rdata_next = dma_write_block_len_reg; - 16'h1118: axil_csr_rdata_next = dma_write_block_count_reg; - 16'h111c: axil_csr_rdata_next = dma_write_block_count_reg >> 32; - 16'h1180: axil_csr_rdata_next = dma_write_block_dma_base_addr_reg; - 16'h1184: axil_csr_rdata_next = dma_write_block_dma_base_addr_reg >> 32; - 16'h1188: axil_csr_rdata_next = dma_write_block_dma_offset_reg; - 16'h118c: axil_csr_rdata_next = dma_write_block_dma_offset_reg >> 32; - 16'h1190: axil_csr_rdata_next = dma_write_block_dma_offset_mask_reg; - 16'h1194: axil_csr_rdata_next = dma_write_block_dma_offset_mask_reg >> 32; - 16'h1198: axil_csr_rdata_next = dma_write_block_dma_stride_reg; - 16'h119c: axil_csr_rdata_next = dma_write_block_dma_stride_reg >> 32; - 16'h11c0: axil_csr_rdata_next = dma_write_block_ram_base_addr_reg; - 16'h11c4: axil_csr_rdata_next = dma_write_block_ram_base_addr_reg >> 32; - 16'h11c8: axil_csr_rdata_next = dma_write_block_ram_offset_reg; - 16'h11cc: axil_csr_rdata_next = dma_write_block_ram_offset_reg >> 32; - 16'h11d0: axil_csr_rdata_next = dma_write_block_ram_offset_mask_reg; - 16'h11d4: axil_csr_rdata_next = dma_write_block_ram_offset_mask_reg >> 32; - 16'h11d8: axil_csr_rdata_next = dma_write_block_ram_stride_reg; - 16'h11dc: axil_csr_rdata_next = dma_write_block_ram_stride_reg >> 32; - endcase - end - - // store read response - if (s_axis_data_dma_read_desc_status_valid) begin - dma_read_desc_status_tag_next = s_axis_data_dma_read_desc_status_tag; - dma_read_desc_status_error_next = s_axis_data_dma_read_desc_status_error; - dma_read_desc_status_valid_next = s_axis_data_dma_read_desc_status_valid; - end - - // store write response - if (s_axis_data_dma_write_desc_status_valid) begin - dma_write_desc_status_tag_next = s_axis_data_dma_write_desc_status_tag; - dma_write_desc_status_error_next = s_axis_data_dma_write_desc_status_error; - dma_write_desc_status_valid_next = s_axis_data_dma_write_desc_status_valid; - end - - // block read - if (dma_read_block_run_reg) begin - dma_read_block_cycle_count_next = dma_read_block_cycle_count_reg + 1; - - if (dma_read_block_count_reg == 0) begin - if (dma_read_active_count_reg == 0) begin - dma_read_block_run_next = 1'b0; - end - end else begin - if (!dma_read_desc_valid_reg || m_axis_data_dma_read_desc_ready) begin - dma_read_block_dma_offset_next = dma_read_block_dma_offset_reg + dma_read_block_dma_stride_reg; - dma_read_desc_dma_addr_next = dma_read_block_dma_base_addr_reg + (dma_read_block_dma_offset_reg & dma_read_block_dma_offset_mask_reg); - dma_read_block_ram_offset_next = dma_read_block_ram_offset_reg + dma_read_block_ram_stride_reg; - dma_read_desc_ram_addr_next = dma_read_block_ram_base_addr_reg + (dma_read_block_ram_offset_reg & dma_read_block_ram_offset_mask_reg); - dma_read_desc_len_next = dma_read_block_len_reg; - dma_read_block_count_next = dma_read_block_count_reg - 1; - dma_read_desc_tag_next = dma_read_block_count_reg; - dma_read_desc_valid_next = 1'b1; - end - end - end - - // block write - if (dma_write_block_run_reg) begin - dma_write_block_cycle_count_next = dma_write_block_cycle_count_reg + 1; - - if (dma_write_block_count_reg == 0) begin - if (dma_write_active_count_reg == 0) begin - dma_write_block_run_next = 1'b0; - end - end else begin - if (!dma_write_desc_valid_reg || m_axis_data_dma_write_desc_ready) begin - dma_write_block_dma_offset_next = dma_write_block_dma_offset_reg + dma_write_block_dma_stride_reg; - dma_write_desc_dma_addr_next = dma_write_block_dma_base_addr_reg + (dma_write_block_dma_offset_reg & dma_write_block_dma_offset_mask_reg); - dma_write_block_ram_offset_next = dma_write_block_ram_offset_reg + dma_write_block_ram_stride_reg; - dma_write_desc_ram_addr_imm_next = dma_write_block_ram_base_addr_reg + (dma_write_block_ram_offset_reg & dma_write_block_ram_offset_mask_reg); - dma_write_desc_imm_en_next = 1'b0; - dma_write_desc_len_next = dma_write_block_len_reg; - dma_write_block_count_next = dma_write_block_count_reg - 1; - dma_write_desc_tag_next = dma_write_block_count_reg; - dma_write_desc_valid_next = 1'b1; - end - end - end -end - -always @(posedge clk) begin - axil_csr_awready_reg <= axil_csr_awready_next; - axil_csr_wready_reg <= axil_csr_wready_next; - axil_csr_bresp_reg <= axil_csr_bresp_next; - axil_csr_bvalid_reg <= axil_csr_bvalid_next; - axil_csr_arready_reg <= axil_csr_arready_next; - axil_csr_rdata_reg <= axil_csr_rdata_next; - axil_csr_rresp_reg <= axil_csr_rresp_next; - axil_csr_rvalid_reg <= axil_csr_rvalid_next; - - cycle_count_reg <= cycle_count_reg + 1; - - dma_read_active_count_reg <= dma_read_active_count_reg - + (m_axis_data_dma_read_desc_valid && m_axis_data_dma_read_desc_ready) - - s_axis_data_dma_read_desc_status_valid; - dma_write_active_count_reg <= dma_write_active_count_reg - + (m_axis_data_dma_write_desc_valid && m_axis_data_dma_write_desc_ready) - - s_axis_data_dma_write_desc_status_valid; - - dma_read_desc_dma_addr_reg <= dma_read_desc_dma_addr_next; - dma_read_desc_ram_addr_reg <= dma_read_desc_ram_addr_next; - dma_read_desc_len_reg <= dma_read_desc_len_next; - dma_read_desc_tag_reg <= dma_read_desc_tag_next; - dma_read_desc_valid_reg <= dma_read_desc_valid_next; - - dma_read_desc_status_tag_reg <= dma_read_desc_status_tag_next; - dma_read_desc_status_error_reg <= dma_read_desc_status_error_next; - dma_read_desc_status_valid_reg <= dma_read_desc_status_valid_next; - - dma_write_desc_dma_addr_reg <= dma_write_desc_dma_addr_next; - dma_write_desc_ram_addr_imm_reg <= dma_write_desc_ram_addr_imm_next; - dma_write_desc_imm_en_reg <= dma_write_desc_imm_en_next; - dma_write_desc_len_reg <= dma_write_desc_len_next; - dma_write_desc_tag_reg <= dma_write_desc_tag_next; - dma_write_desc_valid_reg <= dma_write_desc_valid_next; - - dma_write_desc_status_tag_reg <= dma_write_desc_status_tag_next; - dma_write_desc_status_error_reg <= dma_write_desc_status_error_next; - dma_write_desc_status_valid_reg <= dma_write_desc_status_valid_next; - - dma_rd_int_en_reg <= dma_rd_int_en_next; - dma_wr_int_en_reg <= dma_wr_int_en_next; - - dma_read_block_run_reg <= dma_read_block_run_next; - dma_read_block_len_reg <= dma_read_block_len_next; - dma_read_block_count_reg <= dma_read_block_count_next; - dma_read_block_cycle_count_reg <= dma_read_block_cycle_count_next; - dma_read_block_dma_base_addr_reg <= dma_read_block_dma_base_addr_next; - dma_read_block_dma_offset_reg <= dma_read_block_dma_offset_next; - dma_read_block_dma_offset_mask_reg <= dma_read_block_dma_offset_mask_next; - dma_read_block_dma_stride_reg <= dma_read_block_dma_stride_next; - dma_read_block_ram_base_addr_reg <= dma_read_block_ram_base_addr_next; - dma_read_block_ram_offset_reg <= dma_read_block_ram_offset_next; - dma_read_block_ram_offset_mask_reg <= dma_read_block_ram_offset_mask_next; - dma_read_block_ram_stride_reg <= dma_read_block_ram_stride_next; - - dma_write_block_run_reg <= dma_write_block_run_next; - dma_write_block_len_reg <= dma_write_block_len_next; - dma_write_block_count_reg <= dma_write_block_count_next; - dma_write_block_cycle_count_reg <= dma_write_block_cycle_count_next; - dma_write_block_dma_base_addr_reg <= dma_write_block_dma_base_addr_next; - dma_write_block_dma_offset_reg <= dma_write_block_dma_offset_next; - dma_write_block_dma_offset_mask_reg <= dma_write_block_dma_offset_mask_next; - dma_write_block_dma_stride_reg <= dma_write_block_dma_stride_next; - dma_write_block_ram_base_addr_reg <= dma_write_block_ram_base_addr_next; - dma_write_block_ram_offset_reg <= dma_write_block_ram_offset_next; - dma_write_block_ram_offset_mask_reg <= dma_write_block_ram_offset_mask_next; - dma_write_block_ram_stride_reg <= dma_write_block_ram_stride_next; - - if (rst) begin - axil_csr_awready_reg <= 1'b0; - axil_csr_wready_reg <= 1'b0; - axil_csr_bvalid_reg <= 1'b0; - axil_csr_arready_reg <= 1'b0; - axil_csr_rvalid_reg <= 1'b0; - - cycle_count_reg <= 0; - dma_read_active_count_reg <= 0; - dma_write_active_count_reg <= 0; - - dma_read_desc_valid_reg <= 1'b0; - dma_read_desc_status_valid_reg <= 1'b0; - dma_write_desc_valid_reg <= 1'b0; - dma_write_desc_status_valid_reg <= 1'b0; - dma_rd_int_en_reg <= 1'b0; - dma_wr_int_en_reg <= 1'b0; - dma_read_block_run_reg <= 1'b0; - dma_write_block_run_reg <= 1'b0; - end -end - -dma_psdpram #( - .SIZE(16384), - .SEG_COUNT(RAM_SEG_COUNT), - .SEG_DATA_WIDTH(RAM_SEG_DATA_WIDTH), - .SEG_ADDR_WIDTH(RAM_SEG_ADDR_WIDTH), - .SEG_BE_WIDTH(RAM_SEG_BE_WIDTH), - .PIPELINE(2) +axil_reg_if #( + .DATA_WIDTH(REG_DATA_WIDTH), + .ADDR_WIDTH(REG_ADDR_WIDTH), + .STRB_WIDTH(REG_STRB_WIDTH), + .TIMEOUT(4) ) -dma_ram_inst ( +axil_reg_if_inst ( .clk(clk), .rst(rst), /* - * Write port + * AXI-Lite slave interface */ - .wr_cmd_be(data_dma_ram_wr_cmd_be), - .wr_cmd_addr(data_dma_ram_wr_cmd_addr), - .wr_cmd_data(data_dma_ram_wr_cmd_data), - .wr_cmd_valid(data_dma_ram_wr_cmd_valid), - .wr_cmd_ready(data_dma_ram_wr_cmd_ready), - .wr_done(data_dma_ram_wr_done), + .s_axil_awaddr(s_axil_app_ctrl_awaddr), + .s_axil_awprot(s_axil_app_ctrl_awprot), + .s_axil_awvalid(s_axil_app_ctrl_awvalid), + .s_axil_awready(s_axil_app_ctrl_awready), + .s_axil_wdata(s_axil_app_ctrl_wdata), + .s_axil_wstrb(s_axil_app_ctrl_wstrb), + .s_axil_wvalid(s_axil_app_ctrl_wvalid), + .s_axil_wready(s_axil_app_ctrl_wready), + .s_axil_bresp(s_axil_app_ctrl_bresp), + .s_axil_bvalid(s_axil_app_ctrl_bvalid), + .s_axil_bready(s_axil_app_ctrl_bready), + .s_axil_araddr(s_axil_app_ctrl_araddr), + .s_axil_arprot(s_axil_app_ctrl_arprot), + .s_axil_arvalid(s_axil_app_ctrl_arvalid), + .s_axil_arready(s_axil_app_ctrl_arready), + .s_axil_rdata(s_axil_app_ctrl_rdata), + .s_axil_rresp(s_axil_app_ctrl_rresp), + .s_axil_rvalid(s_axil_app_ctrl_rvalid), + .s_axil_rready(s_axil_app_ctrl_rready), /* - * Read port + * Register interface */ - .rd_cmd_addr(data_dma_ram_rd_cmd_addr), - .rd_cmd_valid(data_dma_ram_rd_cmd_valid), - .rd_cmd_ready(data_dma_ram_rd_cmd_ready), - .rd_resp_data(data_dma_ram_rd_resp_data), - .rd_resp_valid(data_dma_ram_rd_resp_valid), - .rd_resp_ready(data_dma_ram_rd_resp_ready) + .reg_wr_addr(ctrl_reg_wr_addr), + .reg_wr_data(ctrl_reg_wr_data), + .reg_wr_strb(ctrl_reg_wr_strb), + .reg_wr_en(ctrl_reg_wr_en), + .reg_wr_wait(ctrl_reg_wr_wait), + .reg_wr_ack(ctrl_reg_wr_ack), + .reg_rd_addr(ctrl_reg_rd_addr), + .reg_rd_en(ctrl_reg_rd_en), + .reg_rd_data(ctrl_reg_rd_data), + .reg_rd_wait(ctrl_reg_rd_wait), + .reg_rd_ack(ctrl_reg_rd_ack) +); + +dma_bench #( + // DMA interface configuration + .DMA_ADDR_WIDTH(DMA_ADDR_WIDTH), + .DMA_IMM_ENABLE(DMA_IMM_ENABLE), + .DMA_IMM_WIDTH(DMA_IMM_WIDTH), + .DMA_LEN_WIDTH(DMA_LEN_WIDTH), + .DMA_TAG_WIDTH(DMA_TAG_WIDTH), + .RAM_SEL_WIDTH(RAM_SEL_WIDTH), + .RAM_ADDR_WIDTH(RAM_ADDR_WIDTH), + .RAM_SEG_COUNT(RAM_SEG_COUNT), + .RAM_SEG_DATA_WIDTH(RAM_SEG_DATA_WIDTH), + .RAM_SEG_BE_WIDTH(RAM_SEG_BE_WIDTH), + .RAM_SEG_ADDR_WIDTH(RAM_SEG_ADDR_WIDTH), + .RAM_PIPELINE(RAM_PIPELINE), + + // Register interface + .REG_ADDR_WIDTH(REG_ADDR_WIDTH), + .REG_DATA_WIDTH(REG_DATA_WIDTH), + .REG_STRB_WIDTH(REG_STRB_WIDTH), + .RB_BASE_ADDR(DMA_BENCH_RB_BASE_ADDR), + .RB_NEXT_PTR(0) +) +dma_bench_inst ( + .clk(clk), + .rst(rst), + + /* + * Register interface + */ + .reg_wr_addr(ctrl_reg_wr_addr), + .reg_wr_data(ctrl_reg_wr_data), + .reg_wr_strb(ctrl_reg_wr_strb), + .reg_wr_en(ctrl_reg_wr_en), + .reg_wr_wait(ctrl_reg_wr_wait), + .reg_wr_ack(ctrl_reg_wr_ack), + .reg_rd_addr(ctrl_reg_rd_addr), + .reg_rd_en(ctrl_reg_rd_en), + .reg_rd_data(ctrl_reg_rd_data), + .reg_rd_wait(ctrl_reg_rd_wait), + .reg_rd_ack(ctrl_reg_rd_ack), + + /* + * DMA read descriptor output + */ + .m_axis_dma_read_desc_dma_addr(m_axis_data_dma_read_desc_dma_addr), + .m_axis_dma_read_desc_ram_sel(m_axis_data_dma_read_desc_ram_sel), + .m_axis_dma_read_desc_ram_addr(m_axis_data_dma_read_desc_ram_addr), + .m_axis_dma_read_desc_len(m_axis_data_dma_read_desc_len), + .m_axis_dma_read_desc_tag(m_axis_data_dma_read_desc_tag), + .m_axis_dma_read_desc_valid(m_axis_data_dma_read_desc_valid), + .m_axis_dma_read_desc_ready(m_axis_data_dma_read_desc_ready), + + /* + * DMA read descriptor status input + */ + .s_axis_dma_read_desc_status_tag(s_axis_data_dma_read_desc_status_tag), + .s_axis_dma_read_desc_status_error(s_axis_data_dma_read_desc_status_error), + .s_axis_dma_read_desc_status_valid(s_axis_data_dma_read_desc_status_valid), + + /* + * DMA write descriptor output + */ + .m_axis_dma_write_desc_dma_addr(m_axis_data_dma_write_desc_dma_addr), + .m_axis_dma_write_desc_ram_sel(m_axis_data_dma_write_desc_ram_sel), + .m_axis_dma_write_desc_ram_addr(m_axis_data_dma_write_desc_ram_addr), + .m_axis_dma_write_desc_imm(m_axis_data_dma_write_desc_imm), + .m_axis_dma_write_desc_imm_en(m_axis_data_dma_write_desc_imm_en), + .m_axis_dma_write_desc_len(m_axis_data_dma_write_desc_len), + .m_axis_dma_write_desc_tag(m_axis_data_dma_write_desc_tag), + .m_axis_dma_write_desc_valid(m_axis_data_dma_write_desc_valid), + .m_axis_dma_write_desc_ready(m_axis_data_dma_write_desc_ready), + + /* + * DMA write descriptor status input + */ + .s_axis_dma_write_desc_status_tag(s_axis_data_dma_write_desc_status_tag), + .s_axis_dma_write_desc_status_error(s_axis_data_dma_write_desc_status_error), + .s_axis_dma_write_desc_status_valid(s_axis_data_dma_write_desc_status_valid), + + /* + * DMA RAM interface + */ + .dma_ram_wr_cmd_sel(data_dma_ram_wr_cmd_sel), + .dma_ram_wr_cmd_be(data_dma_ram_wr_cmd_be), + .dma_ram_wr_cmd_addr(data_dma_ram_wr_cmd_addr), + .dma_ram_wr_cmd_data(data_dma_ram_wr_cmd_data), + .dma_ram_wr_cmd_valid(data_dma_ram_wr_cmd_valid), + .dma_ram_wr_cmd_ready(data_dma_ram_wr_cmd_ready), + .dma_ram_wr_done(data_dma_ram_wr_done), + .dma_ram_rd_cmd_sel(data_dma_ram_rd_cmd_sel), + .dma_ram_rd_cmd_addr(data_dma_ram_rd_cmd_addr), + .dma_ram_rd_cmd_valid(data_dma_ram_rd_cmd_valid), + .dma_ram_rd_cmd_ready(data_dma_ram_rd_cmd_ready), + .dma_ram_rd_resp_data(data_dma_ram_rd_resp_data), + .dma_ram_rd_resp_valid(data_dma_ram_rd_resp_valid), + .dma_ram_rd_resp_ready(data_dma_ram_rd_resp_ready) ); endmodule diff --git a/fpga/app/dma_bench/tb/mqnic_core_pcie_us/Makefile b/fpga/app/dma_bench/tb/mqnic_core_pcie_us/Makefile index 63c053ef3..a304deb8f 100644 --- a/fpga/app/dma_bench/tb/mqnic_core_pcie_us/Makefile +++ b/fpga/app/dma_bench/tb/mqnic_core_pcie_us/Makefile @@ -80,6 +80,7 @@ VERILOG_SOURCES += ../../rtl/common/stats_dma_latency.v VERILOG_SOURCES += ../../rtl/common/mqnic_tx_scheduler_block_rr.v VERILOG_SOURCES += ../../rtl/common/tx_scheduler_rr.v VERILOG_SOURCES += ../../rtl/mqnic_app_block_dma_bench.v +VERILOG_SOURCES += ../../rtl/dma_bench.v VERILOG_SOURCES += ../../lib/eth/rtl/ptp_clock.v VERILOG_SOURCES += ../../lib/eth/rtl/ptp_clock_cdc.v VERILOG_SOURCES += ../../lib/eth/rtl/ptp_perout.v diff --git a/fpga/app/dma_bench/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py b/fpga/app/dma_bench/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py index 353d442df..30d259b86 100644 --- a/fpga/app/dma_bench/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py +++ b/fpga/app/dma_bench/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py @@ -648,6 +648,11 @@ async def run_test_nic(dut): await block.schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000000) await tb.driver.interfaces[0].set_rx_queue_map_offset(block.index, 0) + app_reg_blocks = mqnic.RegBlockList() + await app_reg_blocks.enumerate_reg_blocks(tb.driver.app_hw_regs) + + dma_bench_rb = app_reg_blocks.find(0x12348101, 0x00000100) + mem = tb.rc.mem_pool.alloc_region(16*1024*1024) mem_base = mem.get_absolute_address(0) @@ -657,30 +662,30 @@ async def run_test_nic(dut): mem[0:1024] = bytearray([x % 256 for x in range(1024)]) # write pcie read descriptor - await tb.driver.app_hw_regs.write_dword(0x000100, (mem_base+0x0000) & 0xffffffff) - await tb.driver.app_hw_regs.write_dword(0x000104, (mem_base+0x0000 >> 32) & 0xffffffff) - await tb.driver.app_hw_regs.write_dword(0x000108, 0x100) - await tb.driver.app_hw_regs.write_dword(0x000110, 0x400) - await tb.driver.app_hw_regs.write_dword(0x000114, 0xAA) + await dma_bench_rb.write_dword(0x100, (mem_base+0x0000) & 0xffffffff) + await dma_bench_rb.write_dword(0x104, (mem_base+0x0000 >> 32) & 0xffffffff) + await dma_bench_rb.write_dword(0x108, 0x100) + await dma_bench_rb.write_dword(0x110, 0x400) + await dma_bench_rb.write_dword(0x114, 0xAA) await Timer(2000, 'ns') # read status - val = await tb.driver.app_hw_regs.read_dword(0x000118) + val = await dma_bench_rb.read_dword(0x000118) tb.log.info("Status: 0x%x", val) assert val == 0x800000AA # write pcie write descriptor - await tb.driver.app_hw_regs.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) - await tb.driver.app_hw_regs.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) - await tb.driver.app_hw_regs.write_dword(0x000208, 0x100) - await tb.driver.app_hw_regs.write_dword(0x000210, 0x400) - await tb.driver.app_hw_regs.write_dword(0x000214, 0x55) + await dma_bench_rb.write_dword(0x200, (mem_base+0x1000) & 0xffffffff) + await dma_bench_rb.write_dword(0x204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dma_bench_rb.write_dword(0x208, 0x100) + await dma_bench_rb.write_dword(0x210, 0x400) + await dma_bench_rb.write_dword(0x214, 0x55) await Timer(2000, 'ns') # read status - val = await tb.driver.app_hw_regs.read_dword(0x000218) + val = await dma_bench_rb.read_dword(0x000218) tb.log.info("Status: 0x%x", val) assert val == 0x80000055 @@ -691,16 +696,16 @@ async def run_test_nic(dut): tb.log.info("Test immediate write") # write pcie write descriptor - await tb.driver.app_hw_regs.write_dword(0x000200, (mem_base+0x1000) & 0xffffffff) - await tb.driver.app_hw_regs.write_dword(0x000204, (mem_base+0x1000 >> 32) & 0xffffffff) - await tb.driver.app_hw_regs.write_dword(0x000208, 0x44332211) - await tb.driver.app_hw_regs.write_dword(0x000210, 0x4) - await tb.driver.app_hw_regs.write_dword(0x000214, 0x800000AA) + await dma_bench_rb.write_dword(0x200, (mem_base+0x1000) & 0xffffffff) + await dma_bench_rb.write_dword(0x204, (mem_base+0x1000 >> 32) & 0xffffffff) + await dma_bench_rb.write_dword(0x208, 0x44332211) + await dma_bench_rb.write_dword(0x210, 0x4) + await dma_bench_rb.write_dword(0x214, 0x800000AA) await Timer(2000, 'ns') # read status - val = await tb.driver.app_hw_regs.read_dword(0x000218) + val = await dma_bench_rb.read_dword(0x000218) tb.log.info("Status: 0x%x", val) assert val == 0x800000AA @@ -721,91 +726,89 @@ async def run_test_nic(dut): # write packet data mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)]) - # enable DMA - await tb.driver.app_hw_regs.write_dword(0x000000, 1) # disable interrupts - await tb.driver.app_hw_regs.write_dword(0x000008, 0) + await dma_bench_rb.write_dword(0x00C, 0) # configure operation (read) # DMA base address - await tb.driver.app_hw_regs.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff) - await tb.driver.app_hw_regs.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff) + await dma_bench_rb.write_dword(0x380, (mem_base+src_offset) & 0xffffffff) + await dma_bench_rb.write_dword(0x384, (mem_base+src_offset >> 32) & 0xffffffff) # DMA offset address - await tb.driver.app_hw_regs.write_dword(0x001088, 0) - await tb.driver.app_hw_regs.write_dword(0x00108c, 0) + await dma_bench_rb.write_dword(0x388, 0) + await dma_bench_rb.write_dword(0x38c, 0) # DMA offset mask - await tb.driver.app_hw_regs.write_dword(0x001090, region_len-1) - await tb.driver.app_hw_regs.write_dword(0x001094, 0) + await dma_bench_rb.write_dword(0x390, region_len-1) + await dma_bench_rb.write_dword(0x394, 0) # DMA stride - await tb.driver.app_hw_regs.write_dword(0x001098, block_stride) - await tb.driver.app_hw_regs.write_dword(0x00109c, 0) + await dma_bench_rb.write_dword(0x398, block_stride) + await dma_bench_rb.write_dword(0x39c, 0) # RAM base address - await tb.driver.app_hw_regs.write_dword(0x0010c0, 0) - await tb.driver.app_hw_regs.write_dword(0x0010c4, 0) + await dma_bench_rb.write_dword(0x3c0, 0) + await dma_bench_rb.write_dword(0x3c4, 0) # RAM offset address - await tb.driver.app_hw_regs.write_dword(0x0010c8, 0) - await tb.driver.app_hw_regs.write_dword(0x0010cc, 0) + await dma_bench_rb.write_dword(0x3c8, 0) + await dma_bench_rb.write_dword(0x3cc, 0) # RAM offset mask - await tb.driver.app_hw_regs.write_dword(0x0010d0, region_len-1) - await tb.driver.app_hw_regs.write_dword(0x0010d4, 0) + await dma_bench_rb.write_dword(0x3d0, region_len-1) + await dma_bench_rb.write_dword(0x3d4, 0) # RAM stride - await tb.driver.app_hw_regs.write_dword(0x0010d8, block_stride) - await tb.driver.app_hw_regs.write_dword(0x0010dc, 0) + await dma_bench_rb.write_dword(0x3d8, block_stride) + await dma_bench_rb.write_dword(0x3dc, 0) # clear cycle count - await tb.driver.app_hw_regs.write_dword(0x001008, 0) - await tb.driver.app_hw_regs.write_dword(0x00100c, 0) + await dma_bench_rb.write_dword(0x308, 0) + await dma_bench_rb.write_dword(0x30c, 0) # block length - await tb.driver.app_hw_regs.write_dword(0x001010, block_size) + await dma_bench_rb.write_dword(0x310, block_size) # block count - await tb.driver.app_hw_regs.write_dword(0x001018, block_count) - await tb.driver.app_hw_regs.write_dword(0x00101c, 0) + await dma_bench_rb.write_dword(0x318, block_count) + await dma_bench_rb.write_dword(0x31c, 0) # start - await tb.driver.app_hw_regs.write_dword(0x001000, 1) + await dma_bench_rb.write_dword(0x300, 1) for k in range(10): - cnt = await tb.driver.app_hw_regs.read_dword(0x001018) + cnt = await dma_bench_rb.read_dword(0x318) await Timer(1000, 'ns') if cnt == 0: break # configure operation (write) # DMA base address - await tb.driver.app_hw_regs.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) - await tb.driver.app_hw_regs.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff) + await dma_bench_rb.write_dword(0x480, (mem_base+dest_offset) & 0xffffffff) + await dma_bench_rb.write_dword(0x484, (mem_base+dest_offset >> 32) & 0xffffffff) # DMA offset address - await tb.driver.app_hw_regs.write_dword(0x001188, 0) - await tb.driver.app_hw_regs.write_dword(0x00118c, 0) + await dma_bench_rb.write_dword(0x488, 0) + await dma_bench_rb.write_dword(0x48c, 0) # DMA offset mask - await tb.driver.app_hw_regs.write_dword(0x001190, region_len-1) - await tb.driver.app_hw_regs.write_dword(0x001194, 0) + await dma_bench_rb.write_dword(0x490, region_len-1) + await dma_bench_rb.write_dword(0x494, 0) # DMA stride - await tb.driver.app_hw_regs.write_dword(0x001198, block_stride) - await tb.driver.app_hw_regs.write_dword(0x00119c, 0) + await dma_bench_rb.write_dword(0x498, block_stride) + await dma_bench_rb.write_dword(0x49c, 0) # RAM base address - await tb.driver.app_hw_regs.write_dword(0x0011c0, 0) - await tb.driver.app_hw_regs.write_dword(0x0011c4, 0) + await dma_bench_rb.write_dword(0x4c0, 0) + await dma_bench_rb.write_dword(0x4c4, 0) # RAM offset address - await tb.driver.app_hw_regs.write_dword(0x0011c8, 0) - await tb.driver.app_hw_regs.write_dword(0x0011cc, 0) + await dma_bench_rb.write_dword(0x4c8, 0) + await dma_bench_rb.write_dword(0x4cc, 0) # RAM offset mask - await tb.driver.app_hw_regs.write_dword(0x0011d0, region_len-1) - await tb.driver.app_hw_regs.write_dword(0x0011d4, 0) + await dma_bench_rb.write_dword(0x4d0, region_len-1) + await dma_bench_rb.write_dword(0x4d4, 0) # RAM stride - await tb.driver.app_hw_regs.write_dword(0x0011d8, block_stride) - await tb.driver.app_hw_regs.write_dword(0x0011dc, 0) + await dma_bench_rb.write_dword(0x4d8, block_stride) + await dma_bench_rb.write_dword(0x4dc, 0) # clear cycle count - await tb.driver.app_hw_regs.write_dword(0x001108, 0) - await tb.driver.app_hw_regs.write_dword(0x00110c, 0) + await dma_bench_rb.write_dword(0x408, 0) + await dma_bench_rb.write_dword(0x40c, 0) # block length - await tb.driver.app_hw_regs.write_dword(0x001110, block_size) + await dma_bench_rb.write_dword(0x410, block_size) # block count - await tb.driver.app_hw_regs.write_dword(0x001118, block_count) - await tb.driver.app_hw_regs.write_dword(0x00111c, 0) + await dma_bench_rb.write_dword(0x418, block_count) + await dma_bench_rb.write_dword(0x41c, 0) # start - await tb.driver.app_hw_regs.write_dword(0x001100, 1) + await dma_bench_rb.write_dword(0x400, 1) for k in range(10): - cnt = await tb.driver.app_hw_regs.read_dword(0x001118) + cnt = await dma_bench_rb.read_dword(0x418) await Timer(1000, 'ns') if cnt == 0: break @@ -900,6 +903,7 @@ def test_mqnic_core_pcie_us(request, if_count, ports_per_if, axis_pcie_data_widt os.path.join(rtl_dir, "common", "mqnic_tx_scheduler_block_rr.v"), os.path.join(rtl_dir, "common", "tx_scheduler_rr.v"), os.path.join(rtl_dir, "mqnic_app_block_dma_bench.v"), + os.path.join(rtl_dir, "dma_bench.v"), os.path.join(eth_rtl_dir, "ptp_clock.v"), os.path.join(eth_rtl_dir, "ptp_clock_cdc.v"), os.path.join(eth_rtl_dir, "ptp_perout.v"), diff --git a/fpga/mqnic/fb2CG/fpga_100g/fpga_app_dma_bench/Makefile b/fpga/mqnic/fb2CG/fpga_100g/fpga_app_dma_bench/Makefile index b6743c4bb..8ae182cfa 100644 --- a/fpga/mqnic/fb2CG/fpga_100g/fpga_app_dma_bench/Makefile +++ b/fpga/mqnic/fb2CG/fpga_100g/fpga_app_dma_bench/Makefile @@ -58,6 +58,7 @@ SYN_FILES += rtl/common/tx_scheduler_rr.v SYN_FILES += rtl/common/cmac_pad.v SYN_FILES += rtl/common/mac_ts_insert.v SYN_FILES += app/dma_bench/rtl/mqnic_app_block_dma_bench.v +SYN_FILES += app/dma_bench/rtl/dma_bench.v SYN_FILES += lib/eth/rtl/ptp_clock.v SYN_FILES += lib/eth/rtl/ptp_clock_cdc.v SYN_FILES += lib/eth/rtl/ptp_perout.v