From 2bd83502760e7fb7816c5249fe75073f08477ac4 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Sat, 23 Apr 2022 00:12:22 -0700 Subject: [PATCH] Add RX queue mapping module Signed-off-by: Alex Forencich --- docs/source/rb/if_ctrl.rst | 16 +- docs/source/rb/index.rst | 3 +- .../dma_bench/tb/mqnic_core_pcie_us/Makefile | 1 + .../test_mqnic_core_pcie_us.py | 64 +++++ .../template/tb/mqnic_core_pcie_us/Makefile | 1 + .../test_mqnic_core_pcie_us.py | 64 +++++ fpga/common/rtl/mqnic_ingress.v | 271 +++++++----------- fpga/common/rtl/mqnic_interface.v | 51 +++- fpga/common/rtl/mqnic_interface_rx.v | 165 +++++++---- fpga/common/rtl/mqnic_rx_queue_map.v | 232 +++++++++++++++ fpga/common/rtl/rx_engine.v | 168 +++++++---- fpga/common/tb/mqnic.py | 45 ++- fpga/common/tb/mqnic_core_axi/Makefile | 1 + .../tb/mqnic_core_axi/test_mqnic_core_axi.py | 64 +++++ fpga/common/tb/mqnic_core_pcie_s10/Makefile | 1 + .../test_mqnic_core_pcie_s10.py | 64 +++++ fpga/common/tb/mqnic_core_pcie_us/Makefile | 1 + .../test_mqnic_core_pcie_us.py | 64 +++++ .../tb/mqnic_core_pcie_us_tdma/Makefile | 1 + .../test_mqnic_core_pcie_us.py | 64 +++++ .../ADM_PCIE_9V3/fpga_100g/fpga/Makefile | 1 + .../ADM_PCIE_9V3/fpga_100g/fpga_tdma/Makefile | 1 + .../fpga_100g/tb/fpga_core/Makefile | 1 + .../fpga_100g/tb/fpga_core/test_fpga_core.py | 56 ++++ .../mqnic/ADM_PCIE_9V3/fpga_25g/fpga/Makefile | 1 + .../ADM_PCIE_9V3/fpga_25g/fpga_10g/Makefile | 1 + .../ADM_PCIE_9V3/fpga_25g/fpga_tdma/Makefile | 1 + .../fpga_25g/tb/fpga_core/Makefile | 1 + .../fpga_25g/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/AU200/fpga_100g/fpga/Makefile | 1 + .../AU200/fpga_100g/tb/fpga_core/Makefile | 1 + .../fpga_100g/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/AU200/fpga_25g/fpga/Makefile | 1 + fpga/mqnic/AU200/fpga_25g/fpga_10g/Makefile | 1 + .../AU200/fpga_25g/tb/fpga_core/Makefile | 1 + .../fpga_25g/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/AU250/fpga_100g/fpga/Makefile | 1 + .../AU250/fpga_100g/tb/fpga_core/Makefile | 1 + .../fpga_100g/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/AU250/fpga_25g/fpga/Makefile | 1 + fpga/mqnic/AU250/fpga_25g/fpga_10g/Makefile | 1 + .../AU250/fpga_25g/tb/fpga_core/Makefile | 1 + .../fpga_25g/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/AU280/fpga_100g/fpga/Makefile | 1 + .../AU280/fpga_100g/tb/fpga_core/Makefile | 1 + .../fpga_100g/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/AU280/fpga_25g/fpga/Makefile | 1 + fpga/mqnic/AU280/fpga_25g/fpga_10g/Makefile | 1 + .../AU280/fpga_25g/tb/fpga_core/Makefile | 1 + .../fpga_25g/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/AU50/fpga_100g/fpga/Makefile | 1 + .../AU50/fpga_100g/tb/fpga_core/Makefile | 1 + .../fpga_100g/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/AU50/fpga_25g/fpga/Makefile | 1 + fpga/mqnic/AU50/fpga_25g/fpga_10g/Makefile | 1 + .../mqnic/AU50/fpga_25g/tb/fpga_core/Makefile | 1 + .../fpga_25g/tb/fpga_core/test_fpga_core.py | 56 ++++ .../fpga/fpga_ku040/Makefile | 1 + .../fpga/fpga_ku060/Makefile | 1 + .../fpga/tb/fpga_core/Makefile | 1 + .../fpga/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/ExaNIC_X10/fpga/fpga/Makefile | 1 + .../ExaNIC_X10/fpga/tb/fpga_core/Makefile | 1 + .../fpga/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/ExaNIC_X25/fpga_25g/fpga/Makefile | 1 + .../ExaNIC_X25/fpga_25g/fpga_10g/Makefile | 1 + .../ExaNIC_X25/fpga_25g/tb/fpga_core/Makefile | 1 + .../fpga_25g/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/NetFPGA_SUME/fpga/fpga/Makefile | 1 + .../NetFPGA_SUME/fpga/tb/fpga_core/Makefile | 1 + .../fpga/tb/fpga_core/test_fpga_core.py | 56 ++++ .../S10MX_DK/fpga_10g/fpga_1sm21b/Makefile | 1 + .../S10MX_DK/fpga_10g/fpga_1sm21c/Makefile | 1 + .../S10MX_DK/fpga_10g/tb/fpga_core/Makefile | 1 + .../fpga_10g/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/VCU108/fpga_10g/fpga/Makefile | 1 + .../VCU108/fpga_10g/tb/fpga_core/Makefile | 1 + .../fpga_10g/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/VCU118/fpga_100g/fpga/Makefile | 1 + .../VCU118/fpga_100g/tb/fpga_core/Makefile | 1 + .../fpga_100g/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/VCU118/fpga_25g/fpga/Makefile | 1 + fpga/mqnic/VCU118/fpga_25g/fpga_10g/Makefile | 1 + .../VCU118/fpga_25g/tb/fpga_core/Makefile | 1 + .../fpga_25g/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/VCU1525/fpga_100g/fpga/Makefile | 1 + .../VCU1525/fpga_100g/tb/fpga_core/Makefile | 1 + .../fpga_100g/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/VCU1525/fpga_25g/fpga/Makefile | 1 + fpga/mqnic/VCU1525/fpga_25g/fpga_10g/Makefile | 1 + .../VCU1525/fpga_25g/tb/fpga_core/Makefile | 1 + .../fpga_25g/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/XUPP3R/fpga_100g/fpga/Makefile | 1 + .../XUPP3R/fpga_100g/tb/fpga_core/Makefile | 1 + .../fpga_100g/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/XUPP3R/fpga_25g/fpga/Makefile | 1 + fpga/mqnic/XUPP3R/fpga_25g/fpga_10g/Makefile | 1 + .../XUPP3R/fpga_25g/tb/fpga_core/Makefile | 1 + .../fpga_25g/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/ZCU106/fpga_pcie/fpga/Makefile | 1 + .../ZCU106/fpga_pcie/tb/fpga_core/Makefile | 1 + .../fpga_pcie/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/ZCU106/fpga_zynqmp/fpga/Makefile | 1 + .../ZCU106/fpga_zynqmp/tb/fpga_core/Makefile | 1 + .../tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/fb2CG/fpga_100g/fpga/Makefile | 1 + .../fpga_100g/fpga_app_dma_bench/Makefile | 1 + .../fpga_100g/fpga_app_template/Makefile | 1 + fpga/mqnic/fb2CG/fpga_100g/fpga_tdma/Makefile | 1 + .../fb2CG/fpga_100g/tb/fpga_core/Makefile | 1 + .../fpga_100g/tb/fpga_core/test_fpga_core.py | 56 ++++ fpga/mqnic/fb2CG/fpga_25g/fpga/Makefile | 1 + fpga/mqnic/fb2CG/fpga_25g/fpga_10g/Makefile | 1 + fpga/mqnic/fb2CG/fpga_25g/fpga_tdma/Makefile | 1 + .../fb2CG/fpga_25g/tb/fpga_core/Makefile | 1 + .../fpga_25g/tb/fpga_core/test_fpga_core.py | 56 ++++ lib/mqnic/mqnic.h | 6 + lib/mqnic/mqnic_if.c | 36 +++ modules/mqnic/mqnic.h | 9 +- modules/mqnic/mqnic_hw.h | 12 +- modules/mqnic/mqnic_if.c | 60 +++- modules/mqnic/mqnic_netdev.c | 2 +- utils/mqnic-dump.c | 12 +- 123 files changed, 2674 insertions(+), 330 deletions(-) create mode 100644 fpga/common/rtl/mqnic_rx_queue_map.v diff --git a/docs/source/rb/if_ctrl.rst b/docs/source/rb/if_ctrl.rst index 347780689..bad2486e4 100644 --- a/docs/source/rb/if_ctrl.rst +++ b/docs/source/rb/if_ctrl.rst @@ -4,7 +4,7 @@ Interface control register block ================================ -The interface control register block has a header with type 0x0000C001, version 0x00000300, and contains several interface-level control registers. +The interface control register block has a header with type 0x0000C001, version 0x00000400, and contains several interface-level control registers. .. table:: @@ -34,8 +34,6 @@ The interface control register block has a header with type 0x0000C001, version RBB+0x28 TX MTU TX MTU RW - -------- ------------- ------------------------------ ------------- RBB+0x2C RX MTU RX MTU RW - - -------- ------------- ------------------------------ ------------- - RBB+0x30 RSS mask RSS mask RW 0x00000000 ======== ============= ============================== ============= See :ref:`rb_overview` for definitions of the standard register block header fields. @@ -137,15 +135,3 @@ See :ref:`rb_overview` for definitions of the standard register block header fie ======== ====== ====== ====== ====== ============= RBB+0x2C RX MTU RW - ======== ============================== ============= - -.. object:: RSS mask - - The RSS mask field controls which receive queues will be selected by the computed RSS flow hash. - - .. table:: - - ======== ====== ====== ====== ====== ============= - Address 31..24 23..16 15..8 7..0 Reset value - ======== ====== ====== ====== ====== ============= - RBB+0x30 RSS mask RW 0x00000000 - ======== ============================== ============= diff --git a/docs/source/rb/index.rst b/docs/source/rb/index.rst index 162d71930..995c125af 100644 --- a/docs/source/rb/index.rst +++ b/docs/source/rb/index.rst @@ -63,7 +63,7 @@ The NIC register space is constructed from a linked list of register blocks. Ea 0x00000000 \- :ref:`rb_null` 0xFFFFFFFF 0x00000100 :ref:`rb_fw_id` 0x0000C000 0x00000100 :ref:`rb_if` - 0x0000C001 0x00000300 :ref:`rb_if_ctrl` + 0x0000C001 0x00000400 :ref:`rb_if_ctrl` 0x0000C002 0x00000200 port 0x0000C003 0x00000100 :ref:`rb_sched_block` 0x0000C004 0x00000200 application @@ -78,6 +78,7 @@ The NIC register space is constructed from a linked list of register blocks. Ea 0x0000C060 0x00000100 :ref:`rb_tdma_sch` 0x0000C080 0x00000100 :ref:`rb_phc` 0x0000C081 0x00000100 :ref:`rb_phc_perout` + 0x0000C090 0x00000100 RX queue map 0x0000C100 0x00000100 :ref:`rb_gpio` 0x0000C110 0x00000100 :ref:`rb_i2c` 0x0000C120 0x00000200 :ref:`rb_flash_spi` diff --git a/fpga/app/dma_bench/tb/mqnic_core_pcie_us/Makefile b/fpga/app/dma_bench/tb/mqnic_core_pcie_us/Makefile index b511f36c7..924e0411b 100644 --- a/fpga/app/dma_bench/tb/mqnic_core_pcie_us/Makefile +++ b/fpga/app/dma_bench/tb/mqnic_core_pcie_us/Makefile @@ -48,6 +48,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/app/dma_bench/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py b/fpga/app/dma_bench/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py index 85a8a86d5..ec8491753 100644 --- a/fpga/app/dma_bench/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py +++ b/fpga/app/dma_bench/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py @@ -428,6 +428,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -514,6 +569,7 @@ async def run_test_nic(dut): for block in tb.driver.interfaces[0].sched_blocks: await block.schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000001) + await tb.driver.interfaces[0].set_rx_queue_map_offset(block.index, block.index) for k in range(block.interface.tx_queue_count): if k % len(tb.driver.interfaces[0].sched_blocks) == block.index: await block.schedulers[0].hw_regs.write_dword(4*k, 0x00000003) @@ -526,6 +582,8 @@ async def run_test_nic(dut): tb.loopback_enable = True + queues = set() + for k, p in enumerate(pkts): await tb.driver.interfaces[0].start_xmit(p, k % len(tb.driver.interfaces[0].sched_blocks)) @@ -536,10 +594,15 @@ async def run_test_nic(dut): # assert pkt.data == pkts[k] assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + queues.add(pkt.queue) + + assert len(queues) == len(tb.driver.interfaces[0].sched_blocks) + tb.loopback_enable = False for block in tb.driver.interfaces[0].sched_blocks[1:]: await block.schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000000) + await tb.driver.interfaces[0].set_rx_queue_map_offset(block.index, 0) mem = tb.rc.mem_pool.alloc_region(16*1024*1024) mem_base = mem.get_absolute_address(0) @@ -760,6 +823,7 @@ def test_mqnic_core_pcie_us(request, if_count, ports_per_if, axis_pcie_data_widt os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/app/template/tb/mqnic_core_pcie_us/Makefile b/fpga/app/template/tb/mqnic_core_pcie_us/Makefile index 88809e51b..3d735c038 100644 --- a/fpga/app/template/tb/mqnic_core_pcie_us/Makefile +++ b/fpga/app/template/tb/mqnic_core_pcie_us/Makefile @@ -48,6 +48,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/app/template/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py b/fpga/app/template/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py index 99609ae40..571fedd27 100644 --- a/fpga/app/template/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py +++ b/fpga/app/template/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py @@ -428,6 +428,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -514,6 +569,7 @@ async def run_test_nic(dut): for block in tb.driver.interfaces[0].sched_blocks: await block.schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000001) + await tb.driver.interfaces[0].set_rx_queue_map_offset(block.index, block.index) for k in range(block.interface.tx_queue_count): if k % len(tb.driver.interfaces[0].sched_blocks) == block.index: await block.schedulers[0].hw_regs.write_dword(4*k, 0x00000003) @@ -526,6 +582,8 @@ async def run_test_nic(dut): tb.loopback_enable = True + queues = set() + for k, p in enumerate(pkts): await tb.driver.interfaces[0].start_xmit(p, k % len(tb.driver.interfaces[0].sched_blocks)) @@ -536,10 +594,15 @@ async def run_test_nic(dut): # assert pkt.data == pkts[k] assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + queues.add(pkt.queue) + + assert len(queues) == len(tb.driver.interfaces[0].sched_blocks) + tb.loopback_enable = False for block in tb.driver.interfaces[0].sched_blocks[1:]: await block.schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000000) + await tb.driver.interfaces[0].set_rx_queue_map_offset(block.index, 0) tb.log.info("Read statistics counters") @@ -600,6 +663,7 @@ def test_mqnic_core_pcie_us(request, if_count, ports_per_if, axis_pcie_data_widt os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/common/rtl/mqnic_ingress.v b/fpga/common/rtl/mqnic_ingress.v index a80c2c7e5..9a6832943 100644 --- a/fpga/common/rtl/mqnic_ingress.v +++ b/fpga/common/rtl/mqnic_ingress.v @@ -61,7 +61,9 @@ module mqnic_ingress # // AXI stream tdest signal width parameter AXIS_DEST_WIDTH = 8, // AXI stream tuser signal width - parameter AXIS_USER_WIDTH = 1, + parameter S_AXIS_USER_WIDTH = 1, + // AXI stream tuser signal width + parameter M_AXIS_USER_WIDTH = S_AXIS_USER_WIDTH, // Max receive packet size parameter MAX_RX_SIZE = 2048 ) @@ -79,7 +81,7 @@ module mqnic_ingress # input wire s_axis_tlast, input wire [AXIS_ID_WIDTH-1:0] s_axis_tid, input wire [AXIS_DEST_WIDTH-1:0] s_axis_tdest, - input wire [AXIS_USER_WIDTH-1:0] s_axis_tuser, + input wire [S_AXIS_USER_WIDTH-1:0] s_axis_tuser, /* * Receive data output @@ -91,45 +93,35 @@ module mqnic_ingress # output wire m_axis_tlast, output wire [AXIS_ID_WIDTH-1:0] m_axis_tid, output wire [AXIS_DEST_WIDTH-1:0] m_axis_tdest, - output wire [AXIS_USER_WIDTH-1:0] m_axis_tuser, - - /* - * RX command output - */ - output wire [RX_QUEUE_INDEX_WIDTH-1:0] rx_req_queue, - output wire [REQ_TAG_WIDTH-1:0] rx_req_tag, - output wire rx_req_valid, - input wire rx_req_ready, - - /* - * RX hash output - */ - output wire [31:0] rx_hash, - output wire [3:0] rx_hash_type, - output wire rx_hash_valid, - input wire rx_hash_ready, + output wire [M_AXIS_USER_WIDTH-1:0] m_axis_tuser, /* * RX checksum output */ output wire [15:0] rx_csum, output wire rx_csum_valid, - input wire rx_csum_ready, - - /* - * Configuration - */ - input wire [31:0] rss_mask + input wire rx_csum_ready ); +localparam RX_HASH_WIDTH = 32; +localparam RX_HASH_TYPE_WIDTH = 4; + +localparam TUSER_HASH_OFFSET = S_AXIS_USER_WIDTH; +localparam TUSER_HASH_TYPE_OFFSET = TUSER_HASH_OFFSET + (RX_HASH_ENABLE ? RX_HASH_WIDTH : 0); +localparam INT_TUSER_WIDTH = TUSER_HASH_TYPE_OFFSET + (RX_HASH_ENABLE ? RX_HASH_TYPE_WIDTH : 0); + generate -wire [31:0] rx_hash_int; -wire [3:0] rx_hash_type_int; -wire rx_hash_valid_int; +wire [31:0] rx_fifo_hash; +wire [3:0] rx_fifo_hash_type; +wire rx_fifo_hash_ready; +wire rx_fifo_hash_valid; if (RX_HASH_ENABLE) begin + wire [31:0] rx_hash_int; + wire [3:0] rx_hash_type_int; + wire rx_hash_valid_int; rx_hash #( .DATA_WIDTH(AXIS_DATA_WIDTH) @@ -172,10 +164,10 @@ if (RX_HASH_ENABLE) begin .s_axis_tuser(0), // AXI output - .m_axis_tdata({rx_hash_type, rx_hash}), + .m_axis_tdata({rx_fifo_hash_type, rx_fifo_hash}), .m_axis_tkeep(), - .m_axis_tvalid(rx_hash_valid), - .m_axis_tready(rx_hash_ready), + .m_axis_tvalid(rx_fifo_hash_valid), + .m_axis_tready(rx_fifo_hash_ready), .m_axis_tlast(), .m_axis_tid(), .m_axis_tdest(), @@ -189,137 +181,9 @@ if (RX_HASH_ENABLE) begin end else begin - assign rx_hash = 32'd0; - assign rx_hash_type = 4'd0; - assign rx_hash_valid = 1'b0; - -end - -if (RX_RSS_ENABLE && RX_HASH_ENABLE) begin - - axis_fifo #( - .DEPTH(AXIS_KEEP_WIDTH*32), - .DATA_WIDTH(AXIS_DATA_WIDTH), - .KEEP_ENABLE(AXIS_KEEP_WIDTH > 1), - .KEEP_WIDTH(AXIS_KEEP_WIDTH), - .LAST_ENABLE(1), - .ID_ENABLE(1), - .ID_WIDTH(AXIS_ID_WIDTH), - .DEST_ENABLE(1), - .DEST_WIDTH(AXIS_DEST_WIDTH), - .USER_ENABLE(1), - .USER_WIDTH(AXIS_USER_WIDTH), - .FRAME_FIFO(0) - ) - rx_hash_data_fifo ( - .clk(clk), - .rst(rst), - - // AXI input - .s_axis_tdata(s_axis_tdata), - .s_axis_tkeep(s_axis_tkeep), - .s_axis_tvalid(s_axis_tvalid), - .s_axis_tready(s_axis_tready), - .s_axis_tlast(s_axis_tlast), - .s_axis_tid(s_axis_tid), - .s_axis_tdest(s_axis_tdest), - .s_axis_tuser(s_axis_tuser), - - // AXI output - .m_axis_tdata(m_axis_tdata), - .m_axis_tkeep(m_axis_tkeep), - .m_axis_tvalid(m_axis_tvalid), - .m_axis_tready(m_axis_tready), - .m_axis_tlast(m_axis_tlast), - .m_axis_tid(m_axis_tid), - .m_axis_tdest(m_axis_tdest), - .m_axis_tuser(m_axis_tuser), - - // Status - .status_overflow(), - .status_bad_frame(), - .status_good_frame() - ); - - // Generate RX requests (RSS) - assign rx_req_tag = 0; - - axis_fifo #( - .DEPTH(32), - .DATA_WIDTH(RX_QUEUE_INDEX_WIDTH), - .KEEP_ENABLE(0), - .LAST_ENABLE(0), - .ID_ENABLE(0), - .DEST_ENABLE(0), - .USER_ENABLE(0), - .FRAME_FIFO(0) - ) - rx_req_fifo ( - .clk(clk), - .rst(rst), - - // AXI input - .s_axis_tdata(rx_hash_int & rss_mask), - .s_axis_tkeep(0), - .s_axis_tvalid(rx_hash_valid_int), - .s_axis_tready(), - .s_axis_tlast(0), - .s_axis_tid(0), - .s_axis_tdest(0), - .s_axis_tuser(0), - - // AXI output - .m_axis_tdata(rx_req_queue), - .m_axis_tkeep(), - .m_axis_tvalid(rx_req_valid), - .m_axis_tready(rx_req_ready), - .m_axis_tlast(), - .m_axis_tid(), - .m_axis_tdest(), - .m_axis_tuser(), - - // Status - .status_overflow(), - .status_bad_frame(), - .status_good_frame() - ); - -end else begin - - assign m_axis_tdata = s_axis_tdata; - assign m_axis_tkeep = s_axis_tkeep; - assign m_axis_tvalid = s_axis_tvalid; - assign s_axis_tready = m_axis_tready; - assign m_axis_tlast = s_axis_tlast; - assign m_axis_tid = s_axis_tid; - assign m_axis_tdest = s_axis_tdest; - assign m_axis_tuser = s_axis_tuser; - - // Generate RX requests (no RSS) - reg rx_frame_reg = 1'b0; - reg rx_req_valid_reg = 1'b0; - - assign rx_req_queue = 0; - assign rx_req_tag = 0; - assign rx_req_valid = s_axis_tvalid && !rx_frame_reg; - - always @(posedge clk) begin - if (rx_req_ready) begin - rx_req_valid_reg <= 1'b0; - end - - if (s_axis_tready && s_axis_tvalid) begin - if (!rx_frame_reg) begin - rx_req_valid_reg <= 1'b1; - end - rx_frame_reg <= !s_axis_tlast; - end - - if (rst) begin - rx_frame_reg <= 1'b0; - rx_req_valid_reg <= 1'b0; - end - end + assign rx_fifo_hash = 32'd0; + assign rx_fifo_hash_type = 4'd0; + assign rx_fifo_hash_valid = 1'b0; end @@ -389,6 +253,89 @@ end else begin end +if (RX_HASH_ENABLE) begin + + wire [AXIS_DATA_WIDTH-1:0] fifo_axis_tdata; + wire [AXIS_KEEP_WIDTH-1:0] fifo_axis_tkeep; + wire fifo_axis_tvalid; + wire fifo_axis_tready; + wire fifo_axis_tlast; + wire [AXIS_ID_WIDTH-1:0] fifo_axis_tid; + wire [AXIS_DEST_WIDTH-1:0] fifo_axis_tdest; + wire [S_AXIS_USER_WIDTH-1:0] fifo_axis_tuser; + + axis_fifo #( + .DEPTH(AXIS_KEEP_WIDTH*32), + .DATA_WIDTH(AXIS_DATA_WIDTH), + .KEEP_ENABLE(AXIS_KEEP_WIDTH > 1), + .KEEP_WIDTH(AXIS_KEEP_WIDTH), + .LAST_ENABLE(1), + .ID_ENABLE(1), + .ID_WIDTH(AXIS_ID_WIDTH), + .DEST_ENABLE(1), + .DEST_WIDTH(AXIS_DEST_WIDTH), + .USER_ENABLE(1), + .USER_WIDTH(S_AXIS_USER_WIDTH), + .FRAME_FIFO(0) + ) + rx_hash_data_fifo ( + .clk(clk), + .rst(rst), + + // AXI input + .s_axis_tdata(s_axis_tdata), + .s_axis_tkeep(s_axis_tkeep), + .s_axis_tvalid(s_axis_tvalid), + .s_axis_tready(s_axis_tready), + .s_axis_tlast(s_axis_tlast), + .s_axis_tid(s_axis_tid), + .s_axis_tdest(s_axis_tdest), + .s_axis_tuser(s_axis_tuser), + + // AXI output + .m_axis_tdata(fifo_axis_tdata), + .m_axis_tkeep(fifo_axis_tkeep), + .m_axis_tvalid(fifo_axis_tvalid), + .m_axis_tready(fifo_axis_tready), + .m_axis_tlast(fifo_axis_tlast), + .m_axis_tid(fifo_axis_tid), + .m_axis_tdest(fifo_axis_tdest), + .m_axis_tuser(fifo_axis_tuser), + + // Status + .status_overflow(), + .status_bad_frame(), + .status_good_frame() + ); + + wire sideband_valid = rx_fifo_hash_valid; + + assign rx_fifo_hash_ready = fifo_axis_tready && fifo_axis_tvalid && fifo_axis_tlast; + + assign fifo_axis_tready = m_axis_tready && sideband_valid; + + assign m_axis_tdata = fifo_axis_tdata; + assign m_axis_tkeep = fifo_axis_tkeep; + assign m_axis_tvalid = fifo_axis_tvalid && sideband_valid; + assign m_axis_tlast = fifo_axis_tlast; + assign m_axis_tid = fifo_axis_tid; + assign m_axis_tdest = fifo_axis_tdest; + assign m_axis_tuser = {rx_fifo_hash_type, rx_fifo_hash, fifo_axis_tuser}; + +end else begin + + // bypass + assign m_axis_tdata = s_axis_tdata; + assign m_axis_tkeep = s_axis_tkeep; + assign m_axis_tvalid = s_axis_tvalid; + assign s_axis_tready = m_axis_tready; + assign m_axis_tlast = s_axis_tlast; + assign m_axis_tid = s_axis_tid; + assign m_axis_tdest = s_axis_tdest; + assign m_axis_tuser = s_axis_tuser; + +end + endgenerate endmodule diff --git a/fpga/common/rtl/mqnic_interface.v b/fpga/common/rtl/mqnic_interface.v index 630e6e444..f87d5ea3f 100644 --- a/fpga/common/rtl/mqnic_interface.v +++ b/fpga/common/rtl/mqnic_interface.v @@ -426,6 +426,8 @@ parameter AXIL_SCHED_BASE_ADDR = AXIL_RX_CQM_BASE_ADDR + 2**AXIL_RX_CQM_ADDR_WID localparam RB_BASE_ADDR = AXIL_CTRL_BASE_ADDR; localparam RBB = RB_BASE_ADDR & {AXIL_CTRL_ADDR_WIDTH{1'b1}}; +localparam RX_RB_BASE_ADDR = RB_BASE_ADDR + 16'h100; + localparam SCHED_RB_BASE_ADDR = RB_BASE_ADDR + 16'h1000; localparam SCHED_RB_STRIDE = 16'h1000; @@ -899,6 +901,12 @@ reg ctrl_reg_wr_ack_reg = 1'b0; reg [AXIL_DATA_WIDTH-1:0] ctrl_reg_rd_data_reg = {AXIL_DATA_WIDTH{1'b0}}; reg ctrl_reg_rd_ack_reg = 1'b0; +wire if_rx_ctrl_reg_wr_wait; +wire if_rx_ctrl_reg_wr_ack; +wire [AXIL_DATA_WIDTH-1:0] if_rx_ctrl_reg_rd_data; +wire if_rx_ctrl_reg_rd_wait; +wire if_rx_ctrl_reg_rd_ack; + wire port_ctrl_reg_wr_wait[PORTS-1:0]; wire port_ctrl_reg_wr_ack[PORTS-1:0]; wire [AXIL_DATA_WIDTH-1:0] port_ctrl_reg_rd_data[PORTS-1:0]; @@ -920,11 +928,11 @@ assign ctrl_reg_rd_ack = ctrl_reg_rd_ack_cmb; integer k; always @* begin - ctrl_reg_wr_wait_cmb = 1'b0; - ctrl_reg_wr_ack_cmb = ctrl_reg_wr_ack_reg; - ctrl_reg_rd_data_cmb = ctrl_reg_rd_data_reg; - ctrl_reg_rd_wait_cmb = 1'b0; - ctrl_reg_rd_ack_cmb = ctrl_reg_rd_ack_reg; + ctrl_reg_wr_wait_cmb = if_rx_ctrl_reg_wr_wait; + ctrl_reg_wr_ack_cmb = ctrl_reg_wr_ack_reg | if_rx_ctrl_reg_wr_ack; + ctrl_reg_rd_data_cmb = ctrl_reg_rd_data_reg | if_rx_ctrl_reg_rd_data; + ctrl_reg_rd_wait_cmb = if_rx_ctrl_reg_rd_wait; + ctrl_reg_rd_ack_cmb = ctrl_reg_rd_ack_reg | if_rx_ctrl_reg_rd_ack; for (k = 0; k < PORTS; k = k + 1) begin ctrl_reg_wr_wait_cmb = ctrl_reg_wr_wait_cmb | port_ctrl_reg_wr_wait[k]; @@ -938,8 +946,6 @@ end reg [DMA_CLIENT_LEN_WIDTH-1:0] tx_mtu_reg = MAX_TX_SIZE; reg [DMA_CLIENT_LEN_WIDTH-1:0] rx_mtu_reg = MAX_RX_SIZE; -reg [RX_QUEUE_INDEX_WIDTH-1:0] rss_mask_reg = 0; - always @(posedge clk) begin ctrl_reg_wr_ack_reg <= 1'b0; ctrl_reg_rd_data_reg <= {AXIL_DATA_WIDTH{1'b0}}; @@ -952,7 +958,6 @@ always @(posedge clk) begin // Interface control RBB+8'h28: tx_mtu_reg <= ctrl_reg_wr_data; // IF ctrl: TX MTU RBB+8'h2C: rx_mtu_reg <= ctrl_reg_wr_data; // IF ctrl: RX MTU - RBB+8'h30: rss_mask_reg <= ctrl_reg_wr_data; // IF ctrl: RSS mask default: ctrl_reg_wr_ack_reg <= 1'b0; endcase end @@ -963,7 +968,7 @@ always @(posedge clk) begin case ({ctrl_reg_rd_addr >> 2, 2'b00}) // Interface control RBB+8'h00: ctrl_reg_rd_data_reg <= 32'h0000C001; // IF ctrl: Type - RBB+8'h04: ctrl_reg_rd_data_reg <= 32'h00000300; // IF ctrl: Version + RBB+8'h04: ctrl_reg_rd_data_reg <= 32'h00000400; // IF ctrl: Version RBB+8'h08: ctrl_reg_rd_data_reg <= RB_BASE_ADDR+8'h40; // IF ctrl: Next header RBB+8'h0C: begin // IF ctrl: features @@ -979,7 +984,6 @@ always @(posedge clk) begin RBB+8'h24: ctrl_reg_rd_data_reg <= MAX_RX_SIZE; // IF ctrl: Max RX MTU RBB+8'h28: ctrl_reg_rd_data_reg <= tx_mtu_reg; // IF ctrl: TX MTU RBB+8'h2C: ctrl_reg_rd_data_reg <= rx_mtu_reg; // IF ctrl: RX MTU - RBB+8'h30: ctrl_reg_rd_data_reg <= rss_mask_reg; // IF ctrl: RSS mask // Queue manager (Event) RBB+8'h40: ctrl_reg_rd_data_reg <= 32'h0000C010; // Event QM: Type RBB+8'h44: ctrl_reg_rd_data_reg <= 32'h00000100; // Event QM: Version @@ -1011,7 +1015,7 @@ always @(posedge clk) begin // Queue manager (RX CPL) RBB+8'hC0: ctrl_reg_rd_data_reg <= 32'h0000C031; // RX CPL QM: Type RBB+8'hC4: ctrl_reg_rd_data_reg <= 32'h00000100; // RX CPL QM: Version - RBB+8'hC8: ctrl_reg_rd_data_reg <= SCHED_RB_BASE_ADDR; // RX CPL QM: Next header + RBB+8'hC8: ctrl_reg_rd_data_reg <= RX_RB_BASE_ADDR; // RX CPL QM: Next header RBB+8'hCC: ctrl_reg_rd_data_reg <= AXIL_RX_CQM_BASE_ADDR; // RX CPL QM: Offset RBB+8'hD0: ctrl_reg_rd_data_reg <= 2**RX_CPL_QUEUE_INDEX_WIDTH; // RX CPL QM: Count RBB+8'hD4: ctrl_reg_rd_data_reg <= 32; // RX CPL QM: Stride @@ -1025,8 +1029,6 @@ always @(posedge clk) begin tx_mtu_reg <= MAX_TX_SIZE; rx_mtu_reg <= MAX_RX_SIZE; - - rss_mask_reg <= 0; end end @@ -2364,6 +2366,11 @@ assign m_axis_data_dma_read_desc_ram_sel = 0; mqnic_interface_rx #( .PORTS(PORTS), + .REG_ADDR_WIDTH(AXIL_CTRL_ADDR_WIDTH), + .REG_DATA_WIDTH(AXIL_DATA_WIDTH), + .REG_STRB_WIDTH(AXIL_STRB_WIDTH), + .RB_BASE_ADDR(RX_RB_BASE_ADDR), + .RB_NEXT_PTR(SCHED_RB_BASE_ADDR), .DMA_ADDR_WIDTH(DMA_ADDR_WIDTH), .DMA_LEN_WIDTH(DMA_LEN_WIDTH), .DMA_TAG_WIDTH(DMA_TAG_WIDTH), @@ -2409,6 +2416,21 @@ interface_rx_inst ( .clk(clk), .rst(rst), + /* + * Control register interface + */ + .ctrl_reg_wr_addr(ctrl_reg_wr_addr), + .ctrl_reg_wr_data(ctrl_reg_wr_data), + .ctrl_reg_wr_strb(ctrl_reg_wr_strb), + .ctrl_reg_wr_en(ctrl_reg_wr_en), + .ctrl_reg_wr_wait(if_rx_ctrl_reg_wr_wait), + .ctrl_reg_wr_ack(if_rx_ctrl_reg_wr_ack), + .ctrl_reg_rd_addr(ctrl_reg_rd_addr), + .ctrl_reg_rd_en(ctrl_reg_rd_en), + .ctrl_reg_rd_data(if_rx_ctrl_reg_rd_data), + .ctrl_reg_rd_wait(if_rx_ctrl_reg_rd_wait), + .ctrl_reg_rd_ack(if_rx_ctrl_reg_rd_ack), + /* * Descriptor request output */ @@ -2504,8 +2526,7 @@ interface_rx_inst ( /* * Configuration */ - .mtu(rx_mtu_reg), - .rss_mask(rss_mask_reg) + .mtu(rx_mtu_reg) ); assign m_axis_data_dma_write_desc_ram_sel = 0; diff --git a/fpga/common/rtl/mqnic_interface_rx.v b/fpga/common/rtl/mqnic_interface_rx.v index 24db2b2ee..507e2c452 100644 --- a/fpga/common/rtl/mqnic_interface_rx.v +++ b/fpga/common/rtl/mqnic_interface_rx.v @@ -44,6 +44,16 @@ module mqnic_interface_rx # ( // Number of ports parameter PORTS = 1, + // Control register interface address width + parameter REG_ADDR_WIDTH = 7, + // Control register interface data width + parameter REG_DATA_WIDTH = 32, + // Control register interface byte enable width + parameter REG_STRB_WIDTH = (REG_DATA_WIDTH/8), + // Register block base address + parameter RB_BASE_ADDR = 0, + // Register block next block address + parameter RB_NEXT_PTR = 0, // DMA address width parameter DMA_ADDR_WIDTH = 64, // DMA length field width @@ -129,6 +139,21 @@ module mqnic_interface_rx # input wire clk, input wire rst, + /* + * Control register interface + */ + input wire [REG_ADDR_WIDTH-1:0] ctrl_reg_wr_addr, + input wire [REG_DATA_WIDTH-1:0] ctrl_reg_wr_data, + input wire [REG_STRB_WIDTH-1:0] ctrl_reg_wr_strb, + input wire ctrl_reg_wr_en, + output wire ctrl_reg_wr_wait, + output wire ctrl_reg_wr_ack, + input wire [REG_ADDR_WIDTH-1:0] ctrl_reg_rd_addr, + input wire ctrl_reg_rd_en, + output wire [REG_DATA_WIDTH-1:0] ctrl_reg_rd_data, + output wire ctrl_reg_rd_wait, + output wire ctrl_reg_rd_ack, + /* * Descriptor request output */ @@ -224,8 +249,7 @@ module mqnic_interface_rx # /* * Configuration */ - input wire [DMA_CLIENT_LEN_WIDTH-1:0] mtu, - input wire [31:0] rss_mask + input wire [DMA_CLIENT_LEN_WIDTH-1:0] mtu ); parameter DMA_CLIENT_TAG_WIDTH = $clog2(RX_DESC_TABLE_SIZE); @@ -233,6 +257,13 @@ parameter DMA_CLIENT_LEN_WIDTH = DMA_LEN_WIDTH; parameter REQ_TAG_WIDTH = $clog2(RX_DESC_TABLE_SIZE); +localparam RX_HASH_WIDTH = 32; +localparam RX_HASH_TYPE_WIDTH = 4; + +localparam TUSER_HASH_OFFSET = AXIS_RX_USER_WIDTH; +localparam TUSER_HASH_TYPE_OFFSET = TUSER_HASH_OFFSET + (RX_HASH_ENABLE ? RX_HASH_WIDTH : 0); +localparam INT_AXIS_RX_USER_WIDTH = TUSER_HASH_TYPE_OFFSET + (RX_HASH_ENABLE ? RX_HASH_TYPE_WIDTH : 0); + wire [AXIS_DESC_DATA_WIDTH-1:0] rx_fifo_desc_tdata; wire [AXIS_DESC_KEEP_WIDTH-1:0] rx_fifo_desc_tkeep; wire rx_fifo_desc_tvalid; @@ -283,16 +314,6 @@ rx_desc_fifo ( .status_good_frame() ); -wire [RX_QUEUE_INDEX_WIDTH-1:0] rx_req_queue; -wire [REQ_TAG_WIDTH-1:0] rx_req_tag; -wire rx_req_valid; -wire rx_req_ready; - -wire [31:0] rx_hash; -wire [3:0] rx_hash_type; -wire rx_hash_valid; -wire rx_hash_ready; - wire [15:0] rx_csum; wire rx_csum_valid; wire rx_csum_ready; @@ -303,16 +324,50 @@ wire [DMA_CLIENT_TAG_WIDTH-1:0] dma_rx_desc_tag; wire dma_rx_desc_valid; wire dma_rx_desc_ready; -wire [DMA_CLIENT_LEN_WIDTH-1:0] dma_rx_desc_status_len; -wire [DMA_CLIENT_TAG_WIDTH-1:0] dma_rx_desc_status_tag; -wire [AXIS_RX_ID_WIDTH-1:0] dma_rx_desc_status_id; -wire [AXIS_RX_DEST_WIDTH-1:0] dma_rx_desc_status_dest; -wire [AXIS_RX_USER_WIDTH-1:0] dma_rx_desc_status_user; -wire [3:0] dma_rx_desc_status_error; -wire dma_rx_desc_status_valid; +wire [DMA_CLIENT_LEN_WIDTH-1:0] dma_rx_desc_status_len; +wire [DMA_CLIENT_TAG_WIDTH-1:0] dma_rx_desc_status_tag; +wire [AXIS_RX_ID_WIDTH-1:0] dma_rx_desc_status_id; +wire [AXIS_RX_DEST_WIDTH-1:0] dma_rx_desc_status_dest; +wire [INT_AXIS_RX_USER_WIDTH-1:0] dma_rx_desc_status_user; +wire [3:0] dma_rx_desc_status_error; +wire dma_rx_desc_status_valid; + +// Generate RX requests +reg rx_frame_reg = 1'b0; +reg [5:0] rx_req_cnt_reg = 0; + +wire rx_req_valid = rx_req_cnt_reg != 0; +wire rx_req_ready; + +always @(posedge clk) begin + if (rx_req_valid && rx_req_ready) begin + rx_req_cnt_reg <= rx_req_cnt_reg - 1; + end + + if (rx_axis_tready && rx_axis_tvalid) begin + if (!rx_frame_reg) begin + if (rx_req_valid && rx_req_ready) begin + rx_req_cnt_reg <= rx_req_cnt_reg; + end else begin + rx_req_cnt_reg <= rx_req_cnt_reg + 1; + end + end + rx_frame_reg <= !rx_axis_tlast; + end + + if (rst) begin + rx_frame_reg <= 1'b0; + rx_req_cnt_reg <= 0; + end +end rx_engine #( .PORTS(PORTS), + .REG_ADDR_WIDTH(REG_ADDR_WIDTH), + .REG_DATA_WIDTH(REG_DATA_WIDTH), + .REG_STRB_WIDTH(REG_STRB_WIDTH), + .RB_BASE_ADDR(RB_BASE_ADDR), + .RB_NEXT_PTR(RB_NEXT_PTR), .RAM_ADDR_WIDTH(RAM_ADDR_WIDTH), .DMA_ADDR_WIDTH(DMA_ADDR_WIDTH), .DMA_LEN_WIDTH(DMA_LEN_WIDTH), @@ -343,17 +398,31 @@ rx_engine #( .RX_CHECKSUM_ENABLE(RX_CHECKSUM_ENABLE), .AXIS_RX_ID_WIDTH(AXIS_RX_ID_WIDTH), .AXIS_RX_DEST_WIDTH(AXIS_RX_DEST_WIDTH), - .AXIS_RX_USER_WIDTH(AXIS_RX_USER_WIDTH) + .AXIS_RX_USER_WIDTH(INT_AXIS_RX_USER_WIDTH) ) rx_engine_inst ( .clk(clk), .rst(rst), + /* + * Control register interface + */ + .ctrl_reg_wr_addr(ctrl_reg_wr_addr), + .ctrl_reg_wr_data(ctrl_reg_wr_data), + .ctrl_reg_wr_strb(ctrl_reg_wr_strb), + .ctrl_reg_wr_en(ctrl_reg_wr_en), + .ctrl_reg_wr_wait(ctrl_reg_wr_wait), + .ctrl_reg_wr_ack(ctrl_reg_wr_ack), + .ctrl_reg_rd_addr(ctrl_reg_rd_addr), + .ctrl_reg_rd_en(ctrl_reg_rd_en), + .ctrl_reg_rd_data(ctrl_reg_rd_data), + .ctrl_reg_rd_wait(ctrl_reg_rd_wait), + .ctrl_reg_rd_ack(ctrl_reg_rd_ack), + /* * Receive request input (queue index) */ - .s_axis_rx_req_queue(rx_req_queue), - .s_axis_rx_req_tag(rx_req_tag), + .s_axis_rx_req_tag(0), .s_axis_rx_req_valid(rx_req_valid), .s_axis_rx_req_ready(rx_req_ready), @@ -448,14 +517,6 @@ rx_engine_inst ( .s_axis_rx_desc_status_error(dma_rx_desc_status_error), .s_axis_rx_desc_status_valid(dma_rx_desc_status_valid), - /* - * Receive hash input - */ - .s_axis_rx_hash(rx_hash), - .s_axis_rx_hash_type(rx_hash_type), - .s_axis_rx_hash_valid(rx_hash_valid), - .s_axis_rx_hash_ready(rx_hash_ready), - /* * Receive checksum input */ @@ -510,14 +571,14 @@ dma_psdpram_rx_inst ( .rd_resp_ready(dma_ram_rd_resp_ready) ); -wire [AXIS_DATA_WIDTH-1:0] rx_axis_tdata_int; -wire [AXIS_KEEP_WIDTH-1:0] rx_axis_tkeep_int; -wire rx_axis_tvalid_int; -wire rx_axis_tready_int; -wire rx_axis_tlast_int; -wire [AXIS_RX_ID_WIDTH-1:0] rx_axis_tid_int; -wire [AXIS_RX_DEST_WIDTH-1:0] rx_axis_tdest_int; -wire [AXIS_RX_USER_WIDTH-1:0] rx_axis_tuser_int; +wire [AXIS_DATA_WIDTH-1:0] rx_axis_tdata_int; +wire [AXIS_KEEP_WIDTH-1:0] rx_axis_tkeep_int; +wire rx_axis_tvalid_int; +wire rx_axis_tready_int; +wire rx_axis_tlast_int; +wire [AXIS_RX_ID_WIDTH-1:0] rx_axis_tid_int; +wire [AXIS_RX_DEST_WIDTH-1:0] rx_axis_tdest_int; +wire [INT_AXIS_RX_USER_WIDTH-1:0] rx_axis_tuser_int; mqnic_ingress #( .REQ_TAG_WIDTH(REQ_TAG_WIDTH), @@ -529,7 +590,8 @@ mqnic_ingress #( .AXIS_KEEP_WIDTH(AXIS_KEEP_WIDTH), .AXIS_ID_WIDTH(AXIS_RX_ID_WIDTH), .AXIS_DEST_WIDTH(AXIS_RX_DEST_WIDTH), - .AXIS_USER_WIDTH(AXIS_RX_USER_WIDTH), + .S_AXIS_USER_WIDTH(AXIS_RX_USER_WIDTH), + .M_AXIS_USER_WIDTH(INT_AXIS_RX_USER_WIDTH), .MAX_RX_SIZE(MAX_RX_SIZE) ) ingress_inst ( @@ -560,33 +622,12 @@ ingress_inst ( .m_axis_tdest(rx_axis_tdest_int), .m_axis_tuser(rx_axis_tuser_int), - /* - * RX command output - */ - .rx_req_queue(rx_req_queue), - .rx_req_tag(rx_req_tag), - .rx_req_valid(rx_req_valid), - .rx_req_ready(rx_req_ready), - - /* - * RX hash output - */ - .rx_hash(rx_hash), - .rx_hash_type(rx_hash_type), - .rx_hash_valid(rx_hash_valid), - .rx_hash_ready(rx_hash_ready), - /* * RX checksum output */ .rx_csum(rx_csum), .rx_csum_valid(rx_csum_valid), - .rx_csum_ready(rx_csum_ready), - - /* - * Configuration - */ - .rss_mask(rss_mask) + .rx_csum_ready(rx_csum_ready) ); dma_client_axis_sink #( @@ -604,7 +645,7 @@ dma_client_axis_sink #( .AXIS_DEST_ENABLE(1), .AXIS_DEST_WIDTH(AXIS_RX_DEST_WIDTH), .AXIS_USER_ENABLE(1), - .AXIS_USER_WIDTH(AXIS_RX_USER_WIDTH), + .AXIS_USER_WIDTH(INT_AXIS_RX_USER_WIDTH), .LEN_WIDTH(DMA_CLIENT_LEN_WIDTH), .TAG_WIDTH(DMA_CLIENT_TAG_WIDTH) ) diff --git a/fpga/common/rtl/mqnic_rx_queue_map.v b/fpga/common/rtl/mqnic_rx_queue_map.v new file mode 100644 index 000000000..08990558b --- /dev/null +++ b/fpga/common/rtl/mqnic_rx_queue_map.v @@ -0,0 +1,232 @@ +/* + +Copyright 2022, The Regents of the University of California. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + 1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + +THIS SOFTWARE IS PROVIDED BY THE REGENTS OF THE UNIVERSITY OF CALIFORNIA ''AS +IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE +DISCLAIMED. IN NO EVENT SHALL THE REGENTS OF THE UNIVERSITY OF CALIFORNIA OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT +OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING +IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY +OF SUCH DAMAGE. + +The views and conclusions contained in the software and documentation are those +of the authors and should not be interpreted as representing official policies, +either expressed or implied, of The Regents of the University of California. + +*/ + +// Language: Verilog 2001 + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * RX queue port mapping + */ +module mqnic_rx_queue_map # +( + // Number of ports + parameter PORTS = 1, + // Queue index width + parameter QUEUE_INDEX_WIDTH = 10, + // AXI stream tid signal width (source port) + parameter ID_WIDTH = $clog2(PORTS), + // AXI stream tdest signal width (from application) + parameter DEST_WIDTH = QUEUE_INDEX_WIDTH, + // Flow hash width + parameter HASH_WIDTH = 32, + // Tag width + parameter TAG_WIDTH = 8, + // Control register interface address width + parameter REG_ADDR_WIDTH = $clog2(16 + PORTS*16), + // Control register interface data width + parameter REG_DATA_WIDTH = 32, + // Control register interface byte enable width + parameter REG_STRB_WIDTH = (REG_DATA_WIDTH/8), + // Register block base address + parameter RB_BASE_ADDR = 0, + // Register block next block address + parameter RB_NEXT_PTR = 0 +) +( + input wire clk, + input wire rst, + + /* + * Register interface + */ + input wire [REG_ADDR_WIDTH-1:0] reg_wr_addr, + input wire [REG_DATA_WIDTH-1:0] reg_wr_data, + input wire [REG_STRB_WIDTH-1:0] reg_wr_strb, + input wire reg_wr_en, + output wire reg_wr_wait, + output wire reg_wr_ack, + input wire [REG_ADDR_WIDTH-1:0] reg_rd_addr, + input wire reg_rd_en, + output wire [REG_DATA_WIDTH-1:0] reg_rd_data, + output wire reg_rd_wait, + output wire reg_rd_ack, + + /* + * Request input + */ + input wire [ID_WIDTH-1:0] req_id, + input wire [DEST_WIDTH-1:0] req_dest, + input wire [HASH_WIDTH-1:0] req_hash, + input wire [TAG_WIDTH-1:0] req_tag, + input wire req_valid, + + /* + * Response output + */ + output wire [QUEUE_INDEX_WIDTH-1:0] resp_queue, + output wire [TAG_WIDTH-1:0] resp_tag, + output wire resp_valid +); + +localparam RBB = RB_BASE_ADDR & {REG_ADDR_WIDTH{1'b1}}; + +// check configuration +initial begin + if (REG_DATA_WIDTH != 32) begin + $error("Error: Register interface width must be 32 (instance %m)"); + $finish; + end + + if (REG_STRB_WIDTH * 8 != REG_DATA_WIDTH) begin + $error("Error: Register interface requires byte (8-bit) granularity (instance %m)"); + $finish; + end + + if (REG_ADDR_WIDTH < $clog2(16 + PORTS*16)) begin + $error("Error: Register address width too narrow (instance %m)"); + $finish; + end + + if (RB_NEXT_PTR >= RB_BASE_ADDR && RB_NEXT_PTR < RB_BASE_ADDR + 16 + PORTS*16) begin + $error("Error: RB_NEXT_PTR overlaps block (instance %m)"); + $finish; + end +end + +// control registers +reg reg_wr_ack_reg = 1'b0; +reg [REG_DATA_WIDTH-1:0] reg_rd_data_reg = 0; +reg reg_rd_ack_reg = 1'b0; + +reg [QUEUE_INDEX_WIDTH-1:0] offset_reg[PORTS-1:0]; +reg [QUEUE_INDEX_WIDTH-1:0] hash_mask_reg[PORTS-1:0]; +reg [QUEUE_INDEX_WIDTH-1:0] app_mask_reg[PORTS-1:0]; + +reg [QUEUE_INDEX_WIDTH-1:0] resp_queue_reg = 0; +reg [TAG_WIDTH-1:0] resp_tag_reg = 0; +reg resp_valid_reg = 1'b0; + +assign reg_wr_wait = 1'b0; +assign reg_wr_ack = reg_wr_ack_reg; +assign reg_rd_data = reg_rd_data_reg; +assign reg_rd_wait = 1'b0; +assign reg_rd_ack = reg_rd_ack_reg; + +assign resp_queue = resp_queue_reg; +assign resp_tag = resp_tag_reg; +assign resp_valid = resp_valid_reg; + +integer k; + +initial begin + for (k = 0; k < PORTS; k = k + 1) begin + offset_reg[k] = 0; + hash_mask_reg[k] = 0; + app_mask_reg[k] = 0; + end +end + +always @(posedge clk) begin + reg_wr_ack_reg <= 1'b0; + reg_rd_data_reg <= 0; + reg_rd_ack_reg <= 1'b0; + + if (reg_wr_en && !reg_wr_ack_reg) begin + // write operation + reg_wr_ack_reg <= 1'b0; + for (k = 0; k < PORTS; k = k + 1) begin + if ({reg_wr_addr >> 2, 2'b00} == RBB+7'h10 + k*16) begin + offset_reg[k] <= reg_wr_data; + reg_wr_ack_reg <= 1'b1; + end + if ({reg_wr_addr >> 2, 2'b00} == RBB+7'h14 + k*16) begin + hash_mask_reg[k] <= reg_wr_data; + reg_wr_ack_reg <= 1'b1; + end + if ({reg_wr_addr >> 2, 2'b00} == RBB+7'h18 + k*16) begin + app_mask_reg[k] <= reg_wr_data; + reg_wr_ack_reg <= 1'b1; + end + end + end + + if (reg_rd_en && !reg_rd_ack_reg) begin + // read operation + reg_rd_ack_reg <= 1'b1; + case ({reg_rd_addr >> 2, 2'b00}) + RBB+7'h00: reg_rd_data_reg <= 32'h0000C090; // Type + RBB+7'h04: reg_rd_data_reg <= 32'h00000100; // Version + RBB+7'h08: reg_rd_data_reg <= RB_NEXT_PTR; // Next header + RBB+7'h0C: reg_rd_data_reg <= PORTS; // Port count + default: reg_rd_ack_reg <= 1'b0; + endcase + for (k = 0; k < PORTS; k = k + 1) begin + if ({reg_rd_addr >> 2, 2'b00} == RBB+7'h10 + k*16) begin + reg_rd_data_reg <= offset_reg[k]; + reg_rd_ack_reg <= 1'b1; + end + if ({reg_rd_addr >> 2, 2'b00} == RBB+7'h14 + k*16) begin + reg_rd_data_reg <= hash_mask_reg[k]; + reg_rd_ack_reg <= 1'b1; + end + if ({reg_rd_addr >> 2, 2'b00} == RBB+7'h18 + k*16) begin + reg_rd_data_reg <= app_mask_reg[k]; + reg_rd_ack_reg <= 1'b1; + end + end + end + + resp_queue_reg <= (req_dest & app_mask_reg[req_id]) + (req_hash & hash_mask_reg[req_id]) + offset_reg[req_id]; + resp_tag_reg <= req_tag; + resp_valid_reg <= req_valid; + + if (rst) begin + reg_wr_ack_reg <= 1'b0; + reg_rd_ack_reg <= 1'b0; + + for (k = 0; k < PORTS; k = k + 1) begin + offset_reg[k] <= 0; + hash_mask_reg[k] <= 0; + app_mask_reg[k] <= 0; + end + + resp_valid_reg <= 1'b0; + end +end + +endmodule + +`resetall diff --git a/fpga/common/rtl/rx_engine.v b/fpga/common/rtl/rx_engine.v index 8ef5078f3..fa1130269 100644 --- a/fpga/common/rtl/rx_engine.v +++ b/fpga/common/rtl/rx_engine.v @@ -44,6 +44,16 @@ module rx_engine # ( // Number of ports parameter PORTS = 1, + // Control register interface address width + parameter REG_ADDR_WIDTH = 7, + // Control register interface data width + parameter REG_DATA_WIDTH = 32, + // Control register interface byte enable width + parameter REG_STRB_WIDTH = (REG_DATA_WIDTH/8), + // Register block base address + parameter RB_BASE_ADDR = 0, + // Register block next block address + parameter RB_NEXT_PTR = 0, // DMA RAM address width parameter RAM_ADDR_WIDTH = 16, // DMA address width @@ -111,10 +121,24 @@ module rx_engine # input wire clk, input wire rst, + /* + * Control register interface + */ + input wire [REG_ADDR_WIDTH-1:0] ctrl_reg_wr_addr, + input wire [REG_DATA_WIDTH-1:0] ctrl_reg_wr_data, + input wire [REG_STRB_WIDTH-1:0] ctrl_reg_wr_strb, + input wire ctrl_reg_wr_en, + output wire ctrl_reg_wr_wait, + output wire ctrl_reg_wr_ack, + input wire [REG_ADDR_WIDTH-1:0] ctrl_reg_rd_addr, + input wire ctrl_reg_rd_en, + output wire [REG_DATA_WIDTH-1:0] ctrl_reg_rd_data, + output wire ctrl_reg_rd_wait, + output wire ctrl_reg_rd_ack, + /* * Receive request input (queue index) */ - input wire [QUEUE_INDEX_WIDTH-1:0] s_axis_rx_req_queue, input wire [REQ_TAG_WIDTH-1:0] s_axis_rx_req_tag, input wire s_axis_rx_req_valid, output wire s_axis_rx_req_ready, @@ -210,14 +234,6 @@ module rx_engine # input wire [3:0] s_axis_rx_desc_status_error, input wire s_axis_rx_desc_status_valid, - /* - * Receive hash input - */ - input wire [31:0] s_axis_rx_hash, - input wire [3:0] s_axis_rx_hash_type, - input wire s_axis_rx_hash_valid, - output wire s_axis_rx_hash_ready, - /* * Receive checksum input */ @@ -243,6 +259,14 @@ parameter RX_BUFFER_PTR_MASK_UPPER = RX_BUFFER_PTR_MASK & ~RX_BUFFER_PTR_MASK_LO parameter CL_MAX_DESC_REQ = $clog2(MAX_DESC_REQ); +localparam RX_HASH_WIDTH = 32; +localparam RX_HASH_TYPE_WIDTH = 4; + +localparam TUSER_PTP_TS_OFFSET = 1; +localparam TUSER_HASH_OFFSET = TUSER_PTP_TS_OFFSET + (PTP_TS_ENABLE ? PTP_TS_WIDTH : 0); +localparam TUSER_HASH_TYPE_OFFSET = TUSER_HASH_OFFSET + (RX_HASH_ENABLE ? RX_HASH_WIDTH : 0); +localparam INT_TUSER_WIDTH = TUSER_HASH_TYPE_OFFSET + (RX_HASH_ENABLE ? RX_HASH_TYPE_WIDTH : 0); + // bus width assertions initial begin if (DMA_TAG_WIDTH < CL_DESC_TABLE_SIZE) begin @@ -355,7 +379,6 @@ reg [DESC_TABLE_DMA_OP_COUNT_WIDTH-1:0] desc_table_write_count_start[DESC_TABLE_ reg [DESC_TABLE_DMA_OP_COUNT_WIDTH-1:0] desc_table_write_count_finish[DESC_TABLE_SIZE-1:0]; reg [CL_DESC_TABLE_SIZE+1-1:0] desc_table_start_ptr_reg = 0; -reg [QUEUE_INDEX_WIDTH-1:0] desc_table_start_queue; reg [REQ_TAG_WIDTH-1:0] desc_table_start_tag; reg [CL_RX_BUFFER_SIZE+1-1:0] desc_table_start_buf_ptr; reg desc_table_start_en; @@ -363,7 +386,12 @@ reg [CL_DESC_TABLE_SIZE-1:0] desc_table_rx_finish_ptr; reg [DMA_CLIENT_LEN_WIDTH-1:0] desc_table_rx_finish_len; reg [AXIS_RX_ID_WIDTH-1:0] desc_table_rx_finish_id; reg [PTP_TS_WIDTH-1:0] desc_table_rx_finish_ptp_ts; +reg [31:0] desc_table_rx_finish_hash; +reg [3:0] desc_table_rx_finish_hash_type; reg desc_table_rx_finish_en; +reg [CL_DESC_TABLE_SIZE-1:0] desc_table_store_queue_ptr; +reg [QUEUE_INDEX_WIDTH-1:0] desc_table_store_queue; +reg desc_table_store_queue_en; reg [CL_DESC_TABLE_SIZE+1-1:0] desc_table_dequeue_start_ptr_reg = 0; reg desc_table_dequeue_start_en; reg [CL_DESC_TABLE_SIZE-1:0] desc_table_dequeue_ptr; @@ -376,10 +404,6 @@ reg [DMA_CLIENT_LEN_WIDTH-1:0] desc_table_desc_fetched_len; reg desc_table_desc_fetched_en; reg [CL_DESC_TABLE_SIZE-1:0] desc_table_data_written_ptr; reg desc_table_data_written_en; -reg [CL_DESC_TABLE_SIZE+1-1:0] desc_table_store_hash_ptr_reg = 0; -reg [31:0] desc_table_store_hash; -reg [3:0] desc_table_store_hash_type; -reg desc_table_store_hash_en; reg [CL_DESC_TABLE_SIZE+1-1:0] desc_table_store_csum_ptr_reg = 0; reg [15:0] desc_table_store_csum; reg desc_table_store_csum_en; @@ -424,8 +448,6 @@ assign m_axis_rx_desc_len = m_axis_rx_desc_len_reg; assign m_axis_rx_desc_tag = m_axis_rx_desc_tag_reg; assign m_axis_rx_desc_valid = m_axis_rx_desc_valid_reg; -assign s_axis_rx_hash_ready = s_axis_rx_hash_ready_reg; - assign s_axis_rx_csum_ready = s_axis_rx_csum_ready_reg; // reg [15:0] stall_cnt = 0; @@ -463,6 +485,59 @@ assign s_axis_rx_csum_ready = s_axis_rx_csum_ready_reg; // .probe5(0) // ); +wire [QUEUE_INDEX_WIDTH-1:0] queue_map_resp_queue; +wire [CL_DESC_TABLE_SIZE+1-1:0] queue_map_resp_tag; +wire queue_map_resp_valid; + +mqnic_rx_queue_map #( + .PORTS(PORTS), + .QUEUE_INDEX_WIDTH(QUEUE_INDEX_WIDTH), + .ID_WIDTH(AXIS_RX_ID_WIDTH), + .DEST_WIDTH(AXIS_RX_DEST_WIDTH), + .HASH_WIDTH(RX_HASH_WIDTH), + .TAG_WIDTH(CL_DESC_TABLE_SIZE+1), + .REG_ADDR_WIDTH(REG_ADDR_WIDTH), + .REG_DATA_WIDTH(REG_DATA_WIDTH), + .REG_STRB_WIDTH(REG_STRB_WIDTH), + .RB_BASE_ADDR(RB_BASE_ADDR), + .RB_NEXT_PTR(RB_NEXT_PTR) +) +mqnic_rx_queue_map_inst ( + .clk(clk), + .rst(rst), + + /* + * Register interface + */ + .reg_wr_addr(ctrl_reg_wr_addr), + .reg_wr_data(ctrl_reg_wr_data), + .reg_wr_strb(ctrl_reg_wr_strb), + .reg_wr_en(ctrl_reg_wr_en), + .reg_wr_wait(ctrl_reg_wr_wait), + .reg_wr_ack(ctrl_reg_wr_ack), + .reg_rd_addr(ctrl_reg_rd_addr), + .reg_rd_en(ctrl_reg_rd_en), + .reg_rd_data(ctrl_reg_rd_data), + .reg_rd_wait(ctrl_reg_rd_wait), + .reg_rd_ack(ctrl_reg_rd_ack), + + /* + * Request input + */ + .req_id(s_axis_rx_desc_status_id), + .req_dest(s_axis_rx_desc_status_dest), + .req_hash(s_axis_rx_desc_status_user >> TUSER_HASH_OFFSET), + .req_tag(s_axis_rx_desc_status_tag), + .req_valid(s_axis_rx_desc_status_valid), + + /* + * Response output + */ + .resp_queue(queue_map_resp_queue), + .resp_tag(queue_map_resp_tag), + .resp_valid(queue_map_resp_valid) +); + integer i; initial begin @@ -530,14 +605,18 @@ always @* begin dec_active_desc_req_2 = 1'b0; desc_table_start_tag = s_axis_rx_req_tag; - desc_table_start_queue = s_axis_rx_req_queue; desc_table_start_buf_ptr = buf_wr_ptr_reg; desc_table_start_en = 1'b0; desc_table_rx_finish_ptr = s_axis_rx_desc_status_tag; desc_table_rx_finish_len = s_axis_rx_desc_status_len; desc_table_rx_finish_id = s_axis_rx_desc_status_id; - desc_table_rx_finish_ptp_ts = s_axis_rx_desc_status_user >> 1; + desc_table_rx_finish_ptp_ts = s_axis_rx_desc_status_user >> TUSER_PTP_TS_OFFSET; + desc_table_rx_finish_hash = s_axis_rx_desc_status_user >> TUSER_HASH_OFFSET; + desc_table_rx_finish_hash_type = s_axis_rx_desc_status_user >> TUSER_HASH_TYPE_OFFSET; desc_table_rx_finish_en = 1'b0; + desc_table_store_queue_ptr = queue_map_resp_tag; + desc_table_store_queue = queue_map_resp_queue; + desc_table_store_queue_en = 1'b0; desc_table_dequeue_start_en = 1'b0; desc_table_dequeue_ptr = s_axis_desc_req_status_tag; desc_table_dequeue_queue_ptr = s_axis_desc_req_status_ptr; @@ -549,9 +628,6 @@ always @* begin desc_table_desc_fetched_en = 1'b0; desc_table_data_written_ptr = s_axis_dma_write_desc_status_tag & DESC_PTR_MASK; desc_table_data_written_en = 1'b0; - desc_table_store_hash = s_axis_rx_hash; - desc_table_store_hash_type = s_axis_rx_hash_type; - desc_table_store_hash_en = 1'b0; desc_table_store_csum = s_axis_rx_csum; desc_table_store_csum_en = 1'b0; desc_table_cpl_enqueue_start_en = 1'b0; @@ -573,7 +649,6 @@ always @* begin // store in descriptor table desc_table_start_tag = s_axis_rx_req_tag; - desc_table_start_queue = s_axis_rx_req_queue; desc_table_start_buf_ptr = buf_wr_ptr_reg; desc_table_start_en = 1'b1; @@ -592,17 +667,25 @@ always @* begin end // receive done - // wait for receive completion + // wait for DMA completion if (s_axis_rx_desc_status_valid) begin // update entry in descriptor table desc_table_rx_finish_ptr = s_axis_rx_desc_status_tag; desc_table_rx_finish_len = s_axis_rx_desc_status_len; desc_table_rx_finish_id = s_axis_rx_desc_status_id; - // desc_table_rx_finish_queue = s_axis_rx_desc_status_dest; - desc_table_rx_finish_ptp_ts = s_axis_rx_desc_status_user >> 1; + desc_table_rx_finish_ptp_ts = s_axis_rx_desc_status_user >> TUSER_PTP_TS_OFFSET; + desc_table_rx_finish_hash = s_axis_rx_desc_status_user >> TUSER_HASH_OFFSET; + desc_table_rx_finish_hash_type = s_axis_rx_desc_status_user >> TUSER_HASH_TYPE_OFFSET; desc_table_rx_finish_en = 1'b1; end + // store queue + if (queue_map_resp_valid) begin + desc_table_store_queue_ptr = queue_map_resp_tag; + desc_table_store_queue = queue_map_resp_queue; + desc_table_store_queue_en = 1'b1; + end + // descriptor fetch if (desc_table_active[desc_table_dequeue_start_ptr_reg & DESC_PTR_MASK] && desc_table_dequeue_start_ptr_reg != desc_table_start_ptr_reg) begin if (desc_table_rx_done[desc_table_dequeue_start_ptr_reg & DESC_PTR_MASK] && !m_axis_desc_req_valid && active_desc_req_count_reg < MAX_DESC_REQ) begin @@ -708,24 +791,6 @@ always @* begin desc_table_write_finish_en = 1'b1; end - // store RX hash - if (desc_table_active[desc_table_store_hash_ptr_reg & DESC_PTR_MASK] && desc_table_store_hash_ptr_reg != desc_table_start_ptr_reg && RX_HASH_ENABLE) begin - s_axis_rx_hash_ready_next = 1'b1; - if (desc_table_invalid[desc_table_store_hash_ptr_reg & DESC_PTR_MASK]) begin - // invalid entry; skip - desc_table_store_hash_en = 1'b1; - - s_axis_rx_hash_ready_next = 1'b0; - end else if (s_axis_rx_hash_ready && s_axis_rx_hash_valid) begin - // update entry in descriptor table - desc_table_store_hash = s_axis_rx_hash; - desc_table_store_hash_type = s_axis_rx_hash_type; - desc_table_store_hash_en = 1'b1; - - s_axis_rx_hash_ready_next = 1'b0; - end - end - // store RX checksum if (desc_table_active[desc_table_store_csum_ptr_reg & DESC_PTR_MASK] && desc_table_store_csum_ptr_reg != desc_table_start_ptr_reg && RX_CHECKSUM_ENABLE) begin s_axis_rx_csum_ready_next = 1'b1; @@ -747,7 +812,6 @@ always @* begin if (desc_table_active[desc_table_cpl_enqueue_start_ptr_reg & DESC_PTR_MASK] && desc_table_cpl_enqueue_start_ptr_reg != desc_table_start_ptr_reg && desc_table_cpl_enqueue_start_ptr_reg != desc_table_dequeue_start_ptr_reg && - (desc_table_cpl_enqueue_start_ptr_reg != desc_table_store_hash_ptr_reg || !RX_HASH_ENABLE) && (desc_table_cpl_enqueue_start_ptr_reg != desc_table_store_csum_ptr_reg || !RX_CHECKSUM_ENABLE)) begin if (desc_table_invalid[desc_table_cpl_enqueue_start_ptr_reg & DESC_PTR_MASK]) begin // invalid entry; skip @@ -876,7 +940,6 @@ always @(posedge clk) begin desc_table_desc_fetched[desc_table_start_ptr_reg & DESC_PTR_MASK] <= 1'b0; desc_table_data_written[desc_table_start_ptr_reg & DESC_PTR_MASK] <= 1'b0; desc_table_cpl_write_done[desc_table_start_ptr_reg & DESC_PTR_MASK] <= 1'b0; - desc_table_queue[desc_table_start_ptr_reg & DESC_PTR_MASK] <= desc_table_start_queue; desc_table_tag[desc_table_start_ptr_reg & DESC_PTR_MASK] <= desc_table_start_tag; desc_table_buf_ptr[desc_table_start_ptr_reg & DESC_PTR_MASK] <= desc_table_start_buf_ptr; desc_table_start_ptr_reg <= desc_table_start_ptr_reg + 1; @@ -886,7 +949,13 @@ always @(posedge clk) begin desc_table_dma_len[desc_table_rx_finish_ptr & DESC_PTR_MASK] <= desc_table_rx_finish_len; desc_table_id[desc_table_rx_finish_ptr & DESC_PTR_MASK] <= desc_table_rx_finish_id; desc_table_ptp_ts[desc_table_rx_finish_ptr & DESC_PTR_MASK] <= desc_table_rx_finish_ptp_ts; - desc_table_rx_done[desc_table_rx_finish_ptr & DESC_PTR_MASK] <= 1'b1; + desc_table_hash[desc_table_rx_finish_ptr & DESC_PTR_MASK] <= desc_table_rx_finish_hash; + desc_table_hash_type[desc_table_rx_finish_ptr & DESC_PTR_MASK] <= desc_table_rx_finish_hash_type; + end + + if (desc_table_store_queue_en) begin + desc_table_queue[desc_table_store_queue_ptr & DESC_PTR_MASK] <= desc_table_store_queue; + desc_table_rx_done[desc_table_store_queue_ptr & DESC_PTR_MASK] <= 1'b1; end if (desc_table_dequeue_start_en) begin @@ -910,12 +979,6 @@ always @(posedge clk) begin desc_table_data_written[desc_table_data_written_ptr & DESC_PTR_MASK] <= 1'b1; end - if (desc_table_store_hash_en) begin - desc_table_hash[desc_table_store_hash_ptr_reg & DESC_PTR_MASK] <= desc_table_store_hash; - desc_table_hash_type[desc_table_store_hash_ptr_reg & DESC_PTR_MASK] <= desc_table_store_hash_type; - desc_table_store_hash_ptr_reg <= desc_table_store_hash_ptr_reg + 1; - end - if (desc_table_store_csum_en) begin desc_table_csum[desc_table_store_csum_ptr_reg & DESC_PTR_MASK] <= desc_table_store_csum; desc_table_store_csum_ptr_reg <= desc_table_store_csum_ptr_reg + 1; @@ -980,7 +1043,6 @@ always @(posedge clk) begin desc_table_start_ptr_reg <= 0; desc_table_dequeue_start_ptr_reg <= 0; - desc_table_store_hash_ptr_reg <= 0; desc_table_store_csum_ptr_reg <= 0; desc_table_cpl_enqueue_start_ptr_reg <= 0; desc_table_finish_ptr_reg <= 0; diff --git a/fpga/common/tb/mqnic.py b/fpga/common/tb/mqnic.py index 2eda240ae..69bab3640 100644 --- a/fpga/common/tb/mqnic.py +++ b/fpga/common/tb/mqnic.py @@ -153,7 +153,7 @@ MQNIC_RB_IF_REG_STRIDE = 0x14 MQNIC_RB_IF_REG_CSR_OFFSET = 0x18 MQNIC_RB_IF_CTRL_TYPE = 0x0000C001 -MQNIC_RB_IF_CTRL_VER = 0x00000300 +MQNIC_RB_IF_CTRL_VER = 0x00000400 MQNIC_RB_IF_CTRL_REG_FEATURES = 0x0C MQNIC_RB_IF_CTRL_REG_PORT_COUNT = 0x10 MQNIC_RB_IF_CTRL_REG_SCHED_COUNT = 0x14 @@ -161,7 +161,6 @@ MQNIC_RB_IF_CTRL_REG_MAX_TX_MTU = 0x20 MQNIC_RB_IF_CTRL_REG_MAX_RX_MTU = 0x24 MQNIC_RB_IF_CTRL_REG_TX_MTU = 0x28 MQNIC_RB_IF_CTRL_REG_RX_MTU = 0x2C -MQNIC_RB_IF_CTRL_REG_RSS_MASK = 0x30 MQNIC_IF_FEATURE_RSS = (1 << 0) MQNIC_IF_FEATURE_PTP_TS = (1 << 4) @@ -169,6 +168,15 @@ MQNIC_IF_FEATURE_TX_CSUM = (1 << 8) MQNIC_IF_FEATURE_RX_CSUM = (1 << 9) MQNIC_IF_FEATURE_RX_HASH = (1 << 10) +MQNIC_RB_RX_QUEUE_MAP_TYPE = 0x0000C090 +MQNIC_RB_RX_QUEUE_MAP_VER = 0x00000100 +MQNIC_RB_RX_QUEUE_MAP_REG_PORTS = 0x0C +MQNIC_RB_RX_QUEUE_MAP_CH_OFFSET = 0x10 +MQNIC_RB_RX_QUEUE_MAP_CH_STRIDE = 0x10 +MQNIC_RB_RX_QUEUE_MAP_CH_REG_OFFSET = 0x00 +MQNIC_RB_RX_QUEUE_MAP_CH_REG_RSS_MASK = 0x04 +MQNIC_RB_RX_QUEUE_MAP_CH_REG_APP_MASK = 0x08 + MQNIC_RB_EVENT_QM_TYPE = 0x0000C010 MQNIC_RB_EVENT_QM_VER = 0x00000100 MQNIC_RB_EVENT_QM_REG_OFFSET = 0x0C @@ -808,6 +816,7 @@ class Interface: self.tx_cpl_queue_rb = None self.rx_queue_rb = None self.rx_cpl_queue_rb = None + self.rx_queue_map_rb = None self.if_features = None @@ -929,6 +938,13 @@ class Interface: self.rx_cpl_queue_count = min(self.rx_cpl_queue_count, MQNIC_MAX_RX_CPL_RINGS) + self.rx_queue_map_rb = self.reg_blocks.find(MQNIC_RB_RX_QUEUE_MAP_TYPE, MQNIC_RB_RX_QUEUE_MAP_VER) + + for k in range(self.port_count): + await self.set_rx_queue_map_offset(k, 0) + await self.set_rx_queue_map_rss_mask(k, 0) + await self.set_rx_queue_map_app_mask(k, 0) + self.event_queues = [] self.tx_queues = [] @@ -1225,8 +1241,29 @@ class Interface: await self.if_ctrl_rb.write_dword(MQNIC_RB_IF_CTRL_REG_TX_MTU, mtu) await self.if_ctrl_rb.write_dword(MQNIC_RB_IF_CTRL_REG_RX_MTU, mtu) - async def set_rss_mask(self, mask): - await self.if_ctrl_rb.write_dword(MQNIC_RB_IF_CTRL_REG_RSS_MASK, mask) + async def get_rx_queue_map_offset(self, port): + return await self.rx_queue_map_rb.read_dword(MQNIC_RB_RX_QUEUE_MAP_CH_OFFSET + + MQNIC_RB_RX_QUEUE_MAP_CH_STRIDE*port + MQNIC_RB_RX_QUEUE_MAP_CH_REG_OFFSET) + + async def set_rx_queue_map_offset(self, port, val): + await self.rx_queue_map_rb.write_dword(MQNIC_RB_RX_QUEUE_MAP_CH_OFFSET + + MQNIC_RB_RX_QUEUE_MAP_CH_STRIDE*port + MQNIC_RB_RX_QUEUE_MAP_CH_REG_OFFSET, val) + + async def get_rx_queue_map_rss_mask(self, port): + return await self.rx_queue_map_rb.read_dword(MQNIC_RB_RX_QUEUE_MAP_CH_OFFSET + + MQNIC_RB_RX_QUEUE_MAP_CH_STRIDE*port + MQNIC_RB_RX_QUEUE_MAP_CH_REG_RSS_MASK) + + async def set_rx_queue_map_rss_mask(self, port, val): + await self.rx_queue_map_rb.write_dword(MQNIC_RB_RX_QUEUE_MAP_CH_OFFSET + + MQNIC_RB_RX_QUEUE_MAP_CH_STRIDE*port + MQNIC_RB_RX_QUEUE_MAP_CH_REG_RSS_MASK, val) + + async def get_rx_queue_map_app_mask(self, port): + return await self.rx_queue_map_rb.read_dword(MQNIC_RB_RX_QUEUE_MAP_CH_OFFSET + + MQNIC_RB_RX_QUEUE_MAP_CH_STRIDE*port + MQNIC_RB_RX_QUEUE_MAP_CH_REG_APP_MASK) + + async def set_rx_queue_map_app_mask(self, port, val): + await self.rx_queue_map_rb.write_dword(MQNIC_RB_RX_QUEUE_MAP_CH_OFFSET + + MQNIC_RB_RX_QUEUE_MAP_CH_STRIDE*port + MQNIC_RB_RX_QUEUE_MAP_CH_REG_APP_MASK, val) async def recv(self): if not self.pkt_rx_queue: diff --git a/fpga/common/tb/mqnic_core_axi/Makefile b/fpga/common/tb/mqnic_core_axi/Makefile index e38df27cf..c1de9c695 100644 --- a/fpga/common/tb/mqnic_core_axi/Makefile +++ b/fpga/common/tb/mqnic_core_axi/Makefile @@ -47,6 +47,7 @@ VERILOG_SOURCES += ../../rtl/mqnic_egress.v VERILOG_SOURCES += ../../rtl/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/mqnic_ptp_perout.v diff --git a/fpga/common/tb/mqnic_core_axi/test_mqnic_core_axi.py b/fpga/common/tb/mqnic_core_axi/test_mqnic_core_axi.py index 07c58df0a..650784c32 100644 --- a/fpga/common/tb/mqnic_core_axi/test_mqnic_core_axi.py +++ b/fpga/common/tb/mqnic_core_axi/test_mqnic_core_axi.py @@ -242,6 +242,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -328,6 +383,7 @@ async def run_test_nic(dut): for block in tb.driver.interfaces[0].sched_blocks: await block.schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000001) + await tb.driver.interfaces[0].set_rx_queue_map_offset(block.index, block.index) for k in range(block.interface.tx_queue_count): if k % len(tb.driver.interfaces[0].sched_blocks) == block.index: await block.schedulers[0].hw_regs.write_dword(4*k, 0x00000003) @@ -340,6 +396,8 @@ async def run_test_nic(dut): tb.loopback_enable = True + queues = set() + for k, p in enumerate(pkts): await tb.driver.interfaces[0].start_xmit(p, k % len(tb.driver.interfaces[0].sched_blocks)) @@ -350,10 +408,15 @@ async def run_test_nic(dut): # assert pkt.data == pkts[k] assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + queues.add(pkt.queue) + + assert len(queues) == len(tb.driver.interfaces[0].sched_blocks) + tb.loopback_enable = False for block in tb.driver.interfaces[0].sched_blocks[1:]: await block.schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000000) + await tb.driver.interfaces[0].set_rx_queue_map_offset(block.index, 0) tb.log.info("Read statistics counters") @@ -404,6 +467,7 @@ def test_mqnic_core_pcie_axi(request, if_count, ports_per_if, axi_data_width, os.path.join(rtl_dir, "mqnic_ingress.v"), os.path.join(rtl_dir, "mqnic_l2_egress.v"), os.path.join(rtl_dir, "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "mqnic_ptp.v"), os.path.join(rtl_dir, "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "mqnic_ptp_perout.v"), diff --git a/fpga/common/tb/mqnic_core_pcie_s10/Makefile b/fpga/common/tb/mqnic_core_pcie_s10/Makefile index 11f3438cc..815f3ca87 100644 --- a/fpga/common/tb/mqnic_core_pcie_s10/Makefile +++ b/fpga/common/tb/mqnic_core_pcie_s10/Makefile @@ -48,6 +48,7 @@ VERILOG_SOURCES += ../../rtl/mqnic_egress.v VERILOG_SOURCES += ../../rtl/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/mqnic_ptp_perout.v diff --git a/fpga/common/tb/mqnic_core_pcie_s10/test_mqnic_core_pcie_s10.py b/fpga/common/tb/mqnic_core_pcie_s10/test_mqnic_core_pcie_s10.py index 8b7696000..906b13fd0 100644 --- a/fpga/common/tb/mqnic_core_pcie_s10/test_mqnic_core_pcie_s10.py +++ b/fpga/common/tb/mqnic_core_pcie_s10/test_mqnic_core_pcie_s10.py @@ -353,6 +353,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -439,6 +494,7 @@ async def run_test_nic(dut): for block in tb.driver.interfaces[0].sched_blocks: await block.schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000001) + await tb.driver.interfaces[0].set_rx_queue_map_offset(block.index, block.index) for k in range(block.interface.tx_queue_count): if k % len(tb.driver.interfaces[0].sched_blocks) == block.index: await block.schedulers[0].hw_regs.write_dword(4*k, 0x00000003) @@ -451,6 +507,8 @@ async def run_test_nic(dut): tb.loopback_enable = True + queues = set() + for k, p in enumerate(pkts): await tb.driver.interfaces[0].start_xmit(p, k % len(tb.driver.interfaces[0].sched_blocks)) @@ -461,10 +519,15 @@ async def run_test_nic(dut): # assert pkt.data == pkts[k] assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + queues.add(pkt.queue) + + assert len(queues) == len(tb.driver.interfaces[0].sched_blocks) + tb.loopback_enable = False for block in tb.driver.interfaces[0].sched_blocks[1:]: await block.schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000000) + await tb.driver.interfaces[0].set_rx_queue_map_offset(block.index, 0) tb.log.info("Read statistics counters") @@ -519,6 +582,7 @@ def test_mqnic_core_pcie_s10(request, if_count, ports_per_if, pcie_data_width, os.path.join(rtl_dir, "mqnic_ingress.v"), os.path.join(rtl_dir, "mqnic_l2_egress.v"), os.path.join(rtl_dir, "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "mqnic_ptp.v"), os.path.join(rtl_dir, "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "mqnic_ptp_perout.v"), diff --git a/fpga/common/tb/mqnic_core_pcie_us/Makefile b/fpga/common/tb/mqnic_core_pcie_us/Makefile index 210f2648b..c9d8b838b 100644 --- a/fpga/common/tb/mqnic_core_pcie_us/Makefile +++ b/fpga/common/tb/mqnic_core_pcie_us/Makefile @@ -48,6 +48,7 @@ VERILOG_SOURCES += ../../rtl/mqnic_egress.v VERILOG_SOURCES += ../../rtl/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/mqnic_ptp_perout.v diff --git a/fpga/common/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py b/fpga/common/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py index 6f33e0e89..f83eb6add 100644 --- a/fpga/common/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py +++ b/fpga/common/tb/mqnic_core_pcie_us/test_mqnic_core_pcie_us.py @@ -428,6 +428,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -514,6 +569,7 @@ async def run_test_nic(dut): for block in tb.driver.interfaces[0].sched_blocks: await block.schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000001) + await tb.driver.interfaces[0].set_rx_queue_map_offset(block.index, block.index) for k in range(block.interface.tx_queue_count): if k % len(tb.driver.interfaces[0].sched_blocks) == block.index: await block.schedulers[0].hw_regs.write_dword(4*k, 0x00000003) @@ -526,6 +582,8 @@ async def run_test_nic(dut): tb.loopback_enable = True + queues = set() + for k, p in enumerate(pkts): await tb.driver.interfaces[0].start_xmit(p, k % len(tb.driver.interfaces[0].sched_blocks)) @@ -536,10 +594,15 @@ async def run_test_nic(dut): # assert pkt.data == pkts[k] assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + queues.add(pkt.queue) + + assert len(queues) == len(tb.driver.interfaces[0].sched_blocks) + tb.loopback_enable = False for block in tb.driver.interfaces[0].sched_blocks[1:]: await block.schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000000) + await tb.driver.interfaces[0].set_rx_queue_map_offset(block.index, 0) tb.log.info("Read statistics counters") @@ -594,6 +657,7 @@ def test_mqnic_core_pcie_us(request, if_count, ports_per_if, axis_pcie_data_widt os.path.join(rtl_dir, "mqnic_ingress.v"), os.path.join(rtl_dir, "mqnic_l2_egress.v"), os.path.join(rtl_dir, "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "mqnic_ptp.v"), os.path.join(rtl_dir, "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "mqnic_ptp_perout.v"), diff --git a/fpga/common/tb/mqnic_core_pcie_us_tdma/Makefile b/fpga/common/tb/mqnic_core_pcie_us_tdma/Makefile index b8eaad7b1..826d84095 100644 --- a/fpga/common/tb/mqnic_core_pcie_us_tdma/Makefile +++ b/fpga/common/tb/mqnic_core_pcie_us_tdma/Makefile @@ -48,6 +48,7 @@ VERILOG_SOURCES += ../../rtl/mqnic_egress.v VERILOG_SOURCES += ../../rtl/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/mqnic_ptp_perout.v diff --git a/fpga/common/tb/mqnic_core_pcie_us_tdma/test_mqnic_core_pcie_us.py b/fpga/common/tb/mqnic_core_pcie_us_tdma/test_mqnic_core_pcie_us.py index 1a0be2aff..dc1ad2b9c 100644 --- a/fpga/common/tb/mqnic_core_pcie_us_tdma/test_mqnic_core_pcie_us.py +++ b/fpga/common/tb/mqnic_core_pcie_us_tdma/test_mqnic_core_pcie_us.py @@ -428,6 +428,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -514,6 +569,7 @@ async def run_test_nic(dut): for block in tb.driver.interfaces[0].sched_blocks: await block.schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000001) + await tb.driver.interfaces[0].set_rx_queue_map_offset(block.index, block.index) for k in range(block.interface.tx_queue_count): if k % len(tb.driver.interfaces[0].sched_blocks) == block.index: await block.schedulers[0].hw_regs.write_dword(4*k, 0x00000003) @@ -526,6 +582,8 @@ async def run_test_nic(dut): tb.loopback_enable = True + queues = set() + for k, p in enumerate(pkts): await tb.driver.interfaces[0].start_xmit(p, k % len(tb.driver.interfaces[0].sched_blocks)) @@ -536,10 +594,15 @@ async def run_test_nic(dut): # assert pkt.data == pkts[k] assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + queues.add(pkt.queue) + + assert len(queues) == len(tb.driver.interfaces[0].sched_blocks) + tb.loopback_enable = False for block in tb.driver.interfaces[0].sched_blocks[1:]: await block.schedulers[0].rb.write_dword(mqnic.MQNIC_RB_SCHED_RR_REG_CTRL, 0x00000000) + await tb.driver.interfaces[0].set_rx_queue_map_offset(block.index, 0) await Timer(1000, 'ns') @@ -647,6 +710,7 @@ def test_mqnic_core_pcie_us(request, if_count, ports_per_if, axis_pcie_data_widt os.path.join(rtl_dir, "mqnic_ingress.v"), os.path.join(rtl_dir, "mqnic_l2_egress.v"), os.path.join(rtl_dir, "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "mqnic_ptp.v"), os.path.join(rtl_dir, "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/ADM_PCIE_9V3/fpga_100g/fpga/Makefile b/fpga/mqnic/ADM_PCIE_9V3/fpga_100g/fpga/Makefile index 92af97d86..b34cc4d05 100644 --- a/fpga/mqnic/ADM_PCIE_9V3/fpga_100g/fpga/Makefile +++ b/fpga/mqnic/ADM_PCIE_9V3/fpga_100g/fpga/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/ADM_PCIE_9V3/fpga_100g/fpga_tdma/Makefile b/fpga/mqnic/ADM_PCIE_9V3/fpga_100g/fpga_tdma/Makefile index 924981026..552ddc2ae 100644 --- a/fpga/mqnic/ADM_PCIE_9V3/fpga_100g/fpga_tdma/Makefile +++ b/fpga/mqnic/ADM_PCIE_9V3/fpga_100g/fpga_tdma/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/ADM_PCIE_9V3/fpga_100g/tb/fpga_core/Makefile b/fpga/mqnic/ADM_PCIE_9V3/fpga_100g/tb/fpga_core/Makefile index 17679d545..09369484f 100644 --- a/fpga/mqnic/ADM_PCIE_9V3/fpga_100g/tb/fpga_core/Makefile +++ b/fpga/mqnic/ADM_PCIE_9V3/fpga_100g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/ADM_PCIE_9V3/fpga_100g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/ADM_PCIE_9V3/fpga_100g/tb/fpga_core/test_fpga_core.py index 8dd22d3db..5a3777820 100644 --- a/fpga/mqnic/ADM_PCIE_9V3/fpga_100g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/ADM_PCIE_9V3/fpga_100g/tb/fpga_core/test_fpga_core.py @@ -436,6 +436,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -529,6 +584,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/fpga/Makefile b/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/fpga/Makefile index 0f846c936..823332246 100644 --- a/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/fpga/Makefile +++ b/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/fpga/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/fpga_10g/Makefile b/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/fpga_10g/Makefile index 0f846c936..823332246 100644 --- a/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/fpga_10g/Makefile +++ b/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/fpga_10g/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/fpga_tdma/Makefile b/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/fpga_tdma/Makefile index bcc5503fa..2b1eeac42 100644 --- a/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/fpga_tdma/Makefile +++ b/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/fpga_tdma/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/tb/fpga_core/Makefile b/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/tb/fpga_core/Makefile index adc49db3a..a3a44b89b 100644 --- a/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/tb/fpga_core/Makefile +++ b/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/tb/fpga_core/test_fpga_core.py index aae97c531..7eca308a4 100644 --- a/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/ADM_PCIE_9V3/fpga_25g/tb/fpga_core/test_fpga_core.py @@ -508,6 +508,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -581,6 +636,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/AU200/fpga_100g/fpga/Makefile b/fpga/mqnic/AU200/fpga_100g/fpga/Makefile index fd0516265..f26177900 100644 --- a/fpga/mqnic/AU200/fpga_100g/fpga/Makefile +++ b/fpga/mqnic/AU200/fpga_100g/fpga/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU200/fpga_100g/tb/fpga_core/Makefile b/fpga/mqnic/AU200/fpga_100g/tb/fpga_core/Makefile index a050cea0e..e05ee2c28 100644 --- a/fpga/mqnic/AU200/fpga_100g/tb/fpga_core/Makefile +++ b/fpga/mqnic/AU200/fpga_100g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU200/fpga_100g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/AU200/fpga_100g/tb/fpga_core/test_fpga_core.py index f75309a54..86f85caa0 100644 --- a/fpga/mqnic/AU200/fpga_100g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/AU200/fpga_100g/tb/fpga_core/test_fpga_core.py @@ -436,6 +436,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -529,6 +584,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/AU200/fpga_25g/fpga/Makefile b/fpga/mqnic/AU200/fpga_25g/fpga/Makefile index 034ab80d6..3c1d3c416 100644 --- a/fpga/mqnic/AU200/fpga_25g/fpga/Makefile +++ b/fpga/mqnic/AU200/fpga_25g/fpga/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU200/fpga_25g/fpga_10g/Makefile b/fpga/mqnic/AU200/fpga_25g/fpga_10g/Makefile index 034ab80d6..3c1d3c416 100644 --- a/fpga/mqnic/AU200/fpga_25g/fpga_10g/Makefile +++ b/fpga/mqnic/AU200/fpga_25g/fpga_10g/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU200/fpga_25g/tb/fpga_core/Makefile b/fpga/mqnic/AU200/fpga_25g/tb/fpga_core/Makefile index b28ec4948..c6f43edd5 100644 --- a/fpga/mqnic/AU200/fpga_25g/tb/fpga_core/Makefile +++ b/fpga/mqnic/AU200/fpga_25g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU200/fpga_25g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/AU200/fpga_25g/tb/fpga_core/test_fpga_core.py index 2bcfed2d5..0192026f1 100644 --- a/fpga/mqnic/AU200/fpga_25g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/AU200/fpga_25g/tb/fpga_core/test_fpga_core.py @@ -508,6 +508,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -581,6 +636,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/AU250/fpga_100g/fpga/Makefile b/fpga/mqnic/AU250/fpga_100g/fpga/Makefile index 7fc858493..220d4db2f 100644 --- a/fpga/mqnic/AU250/fpga_100g/fpga/Makefile +++ b/fpga/mqnic/AU250/fpga_100g/fpga/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU250/fpga_100g/tb/fpga_core/Makefile b/fpga/mqnic/AU250/fpga_100g/tb/fpga_core/Makefile index a050cea0e..e05ee2c28 100644 --- a/fpga/mqnic/AU250/fpga_100g/tb/fpga_core/Makefile +++ b/fpga/mqnic/AU250/fpga_100g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU250/fpga_100g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/AU250/fpga_100g/tb/fpga_core/test_fpga_core.py index f75309a54..86f85caa0 100644 --- a/fpga/mqnic/AU250/fpga_100g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/AU250/fpga_100g/tb/fpga_core/test_fpga_core.py @@ -436,6 +436,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -529,6 +584,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/AU250/fpga_25g/fpga/Makefile b/fpga/mqnic/AU250/fpga_25g/fpga/Makefile index 92e50460a..ccc57ebb6 100644 --- a/fpga/mqnic/AU250/fpga_25g/fpga/Makefile +++ b/fpga/mqnic/AU250/fpga_25g/fpga/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU250/fpga_25g/fpga_10g/Makefile b/fpga/mqnic/AU250/fpga_25g/fpga_10g/Makefile index 92e50460a..ccc57ebb6 100644 --- a/fpga/mqnic/AU250/fpga_25g/fpga_10g/Makefile +++ b/fpga/mqnic/AU250/fpga_25g/fpga_10g/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU250/fpga_25g/tb/fpga_core/Makefile b/fpga/mqnic/AU250/fpga_25g/tb/fpga_core/Makefile index b28ec4948..c6f43edd5 100644 --- a/fpga/mqnic/AU250/fpga_25g/tb/fpga_core/Makefile +++ b/fpga/mqnic/AU250/fpga_25g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU250/fpga_25g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/AU250/fpga_25g/tb/fpga_core/test_fpga_core.py index 2bcfed2d5..0192026f1 100644 --- a/fpga/mqnic/AU250/fpga_25g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/AU250/fpga_25g/tb/fpga_core/test_fpga_core.py @@ -508,6 +508,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -581,6 +636,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/AU280/fpga_100g/fpga/Makefile b/fpga/mqnic/AU280/fpga_100g/fpga/Makefile index 2f6e506f3..7cb766a41 100644 --- a/fpga/mqnic/AU280/fpga_100g/fpga/Makefile +++ b/fpga/mqnic/AU280/fpga_100g/fpga/Makefile @@ -18,6 +18,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU280/fpga_100g/tb/fpga_core/Makefile b/fpga/mqnic/AU280/fpga_100g/tb/fpga_core/Makefile index 9ead163c3..5c08d93a7 100644 --- a/fpga/mqnic/AU280/fpga_100g/tb/fpga_core/Makefile +++ b/fpga/mqnic/AU280/fpga_100g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU280/fpga_100g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/AU280/fpga_100g/tb/fpga_core/test_fpga_core.py index 03396e688..6a0d0cb56 100644 --- a/fpga/mqnic/AU280/fpga_100g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/AU280/fpga_100g/tb/fpga_core/test_fpga_core.py @@ -425,6 +425,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -518,6 +573,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/AU280/fpga_25g/fpga/Makefile b/fpga/mqnic/AU280/fpga_25g/fpga/Makefile index 477672bb5..9bebde348 100644 --- a/fpga/mqnic/AU280/fpga_25g/fpga/Makefile +++ b/fpga/mqnic/AU280/fpga_25g/fpga/Makefile @@ -18,6 +18,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU280/fpga_25g/fpga_10g/Makefile b/fpga/mqnic/AU280/fpga_25g/fpga_10g/Makefile index 477672bb5..9bebde348 100644 --- a/fpga/mqnic/AU280/fpga_25g/fpga_10g/Makefile +++ b/fpga/mqnic/AU280/fpga_25g/fpga_10g/Makefile @@ -18,6 +18,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU280/fpga_25g/tb/fpga_core/Makefile b/fpga/mqnic/AU280/fpga_25g/tb/fpga_core/Makefile index baac1b5e1..9bab90d5b 100644 --- a/fpga/mqnic/AU280/fpga_25g/tb/fpga_core/Makefile +++ b/fpga/mqnic/AU280/fpga_25g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU280/fpga_25g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/AU280/fpga_25g/tb/fpga_core/test_fpga_core.py index 7805994b8..f998ae609 100644 --- a/fpga/mqnic/AU280/fpga_25g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/AU280/fpga_25g/tb/fpga_core/test_fpga_core.py @@ -497,6 +497,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -570,6 +625,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/AU50/fpga_100g/fpga/Makefile b/fpga/mqnic/AU50/fpga_100g/fpga/Makefile index 7a1ecfc86..d303bac19 100644 --- a/fpga/mqnic/AU50/fpga_100g/fpga/Makefile +++ b/fpga/mqnic/AU50/fpga_100g/fpga/Makefile @@ -18,6 +18,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU50/fpga_100g/tb/fpga_core/Makefile b/fpga/mqnic/AU50/fpga_100g/tb/fpga_core/Makefile index 79b42f997..86fbf1217 100644 --- a/fpga/mqnic/AU50/fpga_100g/tb/fpga_core/Makefile +++ b/fpga/mqnic/AU50/fpga_100g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU50/fpga_100g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/AU50/fpga_100g/tb/fpga_core/test_fpga_core.py index a9e09d14f..f266a5271 100644 --- a/fpga/mqnic/AU50/fpga_100g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/AU50/fpga_100g/tb/fpga_core/test_fpga_core.py @@ -386,6 +386,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -479,6 +534,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/AU50/fpga_25g/fpga/Makefile b/fpga/mqnic/AU50/fpga_25g/fpga/Makefile index 9bdba52d4..4371aedd4 100644 --- a/fpga/mqnic/AU50/fpga_25g/fpga/Makefile +++ b/fpga/mqnic/AU50/fpga_25g/fpga/Makefile @@ -18,6 +18,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU50/fpga_25g/fpga_10g/Makefile b/fpga/mqnic/AU50/fpga_25g/fpga_10g/Makefile index 9bdba52d4..4371aedd4 100644 --- a/fpga/mqnic/AU50/fpga_25g/fpga_10g/Makefile +++ b/fpga/mqnic/AU50/fpga_25g/fpga_10g/Makefile @@ -18,6 +18,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU50/fpga_25g/tb/fpga_core/Makefile b/fpga/mqnic/AU50/fpga_25g/tb/fpga_core/Makefile index 065764139..ab22fa1eb 100644 --- a/fpga/mqnic/AU50/fpga_25g/tb/fpga_core/Makefile +++ b/fpga/mqnic/AU50/fpga_25g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/AU50/fpga_25g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/AU50/fpga_25g/tb/fpga_core/test_fpga_core.py index ae2936441..aa29314ff 100644 --- a/fpga/mqnic/AU50/fpga_25g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/AU50/fpga_25g/tb/fpga_core/test_fpga_core.py @@ -422,6 +422,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -495,6 +550,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/DNPCIe_40G_KU_LL_2QSFP/fpga/fpga_ku040/Makefile b/fpga/mqnic/DNPCIe_40G_KU_LL_2QSFP/fpga/fpga_ku040/Makefile index bb774691b..4286f59e5 100644 --- a/fpga/mqnic/DNPCIe_40G_KU_LL_2QSFP/fpga/fpga_ku040/Makefile +++ b/fpga/mqnic/DNPCIe_40G_KU_LL_2QSFP/fpga/fpga_ku040/Makefile @@ -18,6 +18,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/DNPCIe_40G_KU_LL_2QSFP/fpga/fpga_ku060/Makefile b/fpga/mqnic/DNPCIe_40G_KU_LL_2QSFP/fpga/fpga_ku060/Makefile index 81083155e..74e2c97ee 100644 --- a/fpga/mqnic/DNPCIe_40G_KU_LL_2QSFP/fpga/fpga_ku060/Makefile +++ b/fpga/mqnic/DNPCIe_40G_KU_LL_2QSFP/fpga/fpga_ku060/Makefile @@ -18,6 +18,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/DNPCIe_40G_KU_LL_2QSFP/fpga/tb/fpga_core/Makefile b/fpga/mqnic/DNPCIe_40G_KU_LL_2QSFP/fpga/tb/fpga_core/Makefile index 3ac4eba50..2d8c49b43 100644 --- a/fpga/mqnic/DNPCIe_40G_KU_LL_2QSFP/fpga/tb/fpga_core/Makefile +++ b/fpga/mqnic/DNPCIe_40G_KU_LL_2QSFP/fpga/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/DNPCIe_40G_KU_LL_2QSFP/fpga/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/DNPCIe_40G_KU_LL_2QSFP/fpga/tb/fpga_core/test_fpga_core.py index e88f7e38f..f184f4d15 100644 --- a/fpga/mqnic/DNPCIe_40G_KU_LL_2QSFP/fpga/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/DNPCIe_40G_KU_LL_2QSFP/fpga/tb/fpga_core/test_fpga_core.py @@ -492,6 +492,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -565,6 +620,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/ExaNIC_X10/fpga/fpga/Makefile b/fpga/mqnic/ExaNIC_X10/fpga/fpga/Makefile index a2b042daa..01362f529 100644 --- a/fpga/mqnic/ExaNIC_X10/fpga/fpga/Makefile +++ b/fpga/mqnic/ExaNIC_X10/fpga/fpga/Makefile @@ -18,6 +18,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/ExaNIC_X10/fpga/tb/fpga_core/Makefile b/fpga/mqnic/ExaNIC_X10/fpga/tb/fpga_core/Makefile index 3ac4eba50..2d8c49b43 100644 --- a/fpga/mqnic/ExaNIC_X10/fpga/tb/fpga_core/Makefile +++ b/fpga/mqnic/ExaNIC_X10/fpga/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py index f505b15bd..0901f0c44 100644 --- a/fpga/mqnic/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py @@ -410,6 +410,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -483,6 +538,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/ExaNIC_X25/fpga_25g/fpga/Makefile b/fpga/mqnic/ExaNIC_X25/fpga_25g/fpga/Makefile index 36eea3c27..da0aec431 100644 --- a/fpga/mqnic/ExaNIC_X25/fpga_25g/fpga/Makefile +++ b/fpga/mqnic/ExaNIC_X25/fpga_25g/fpga/Makefile @@ -18,6 +18,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/ExaNIC_X25/fpga_25g/fpga_10g/Makefile b/fpga/mqnic/ExaNIC_X25/fpga_25g/fpga_10g/Makefile index 36eea3c27..da0aec431 100644 --- a/fpga/mqnic/ExaNIC_X25/fpga_25g/fpga_10g/Makefile +++ b/fpga/mqnic/ExaNIC_X25/fpga_25g/fpga_10g/Makefile @@ -18,6 +18,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/ExaNIC_X25/fpga_25g/tb/fpga_core/Makefile b/fpga/mqnic/ExaNIC_X25/fpga_25g/tb/fpga_core/Makefile index be61c41ea..4b837ca5b 100644 --- a/fpga/mqnic/ExaNIC_X25/fpga_25g/tb/fpga_core/Makefile +++ b/fpga/mqnic/ExaNIC_X25/fpga_25g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/ExaNIC_X25/fpga_25g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/ExaNIC_X25/fpga_25g/tb/fpga_core/test_fpga_core.py index 5d4dc9add..7c58c63a1 100644 --- a/fpga/mqnic/ExaNIC_X25/fpga_25g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/ExaNIC_X25/fpga_25g/tb/fpga_core/test_fpga_core.py @@ -419,6 +419,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -492,6 +547,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/NetFPGA_SUME/fpga/fpga/Makefile b/fpga/mqnic/NetFPGA_SUME/fpga/fpga/Makefile index e265652c6..73406ff12 100644 --- a/fpga/mqnic/NetFPGA_SUME/fpga/fpga/Makefile +++ b/fpga/mqnic/NetFPGA_SUME/fpga/fpga/Makefile @@ -21,6 +21,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/NetFPGA_SUME/fpga/tb/fpga_core/Makefile b/fpga/mqnic/NetFPGA_SUME/fpga/tb/fpga_core/Makefile index a0530acad..cbcdfa0df 100644 --- a/fpga/mqnic/NetFPGA_SUME/fpga/tb/fpga_core/Makefile +++ b/fpga/mqnic/NetFPGA_SUME/fpga/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/NetFPGA_SUME/fpga/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/NetFPGA_SUME/fpga/tb/fpga_core/test_fpga_core.py index 040bbbd4d..aab69e4f7 100644 --- a/fpga/mqnic/NetFPGA_SUME/fpga/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/NetFPGA_SUME/fpga/tb/fpga_core/test_fpga_core.py @@ -420,6 +420,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -493,6 +548,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/S10MX_DK/fpga_10g/fpga_1sm21b/Makefile b/fpga/mqnic/S10MX_DK/fpga_10g/fpga_1sm21b/Makefile index f1fb61c82..7678d4141 100644 --- a/fpga/mqnic/S10MX_DK/fpga_10g/fpga_1sm21b/Makefile +++ b/fpga/mqnic/S10MX_DK/fpga_10g/fpga_1sm21b/Makefile @@ -20,6 +20,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/S10MX_DK/fpga_10g/fpga_1sm21c/Makefile b/fpga/mqnic/S10MX_DK/fpga_10g/fpga_1sm21c/Makefile index 86f1ab1aa..741cc0366 100644 --- a/fpga/mqnic/S10MX_DK/fpga_10g/fpga_1sm21c/Makefile +++ b/fpga/mqnic/S10MX_DK/fpga_10g/fpga_1sm21c/Makefile @@ -20,6 +20,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/S10MX_DK/fpga_10g/tb/fpga_core/Makefile b/fpga/mqnic/S10MX_DK/fpga_10g/tb/fpga_core/Makefile index 9323c5b84..ff25ebd58 100644 --- a/fpga/mqnic/S10MX_DK/fpga_10g/tb/fpga_core/Makefile +++ b/fpga/mqnic/S10MX_DK/fpga_10g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/S10MX_DK/fpga_10g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/S10MX_DK/fpga_10g/tb/fpga_core/test_fpga_core.py index d8b6cb96f..7bfe1130d 100644 --- a/fpga/mqnic/S10MX_DK/fpga_10g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/S10MX_DK/fpga_10g/tb/fpga_core/test_fpga_core.py @@ -417,6 +417,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -490,6 +545,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/VCU108/fpga_10g/fpga/Makefile b/fpga/mqnic/VCU108/fpga_10g/fpga/Makefile index 49308a090..25d098518 100644 --- a/fpga/mqnic/VCU108/fpga_10g/fpga/Makefile +++ b/fpga/mqnic/VCU108/fpga_10g/fpga/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/VCU108/fpga_10g/tb/fpga_core/Makefile b/fpga/mqnic/VCU108/fpga_10g/tb/fpga_core/Makefile index 670d6620a..ce4ba4be3 100644 --- a/fpga/mqnic/VCU108/fpga_10g/tb/fpga_core/Makefile +++ b/fpga/mqnic/VCU108/fpga_10g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/VCU108/fpga_10g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/VCU108/fpga_10g/tb/fpga_core/test_fpga_core.py index 4f2f25191..6ba1bb382 100644 --- a/fpga/mqnic/VCU108/fpga_10g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/VCU108/fpga_10g/tb/fpga_core/test_fpga_core.py @@ -429,6 +429,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -502,6 +557,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/VCU118/fpga_100g/fpga/Makefile b/fpga/mqnic/VCU118/fpga_100g/fpga/Makefile index 1c54b0503..2a2c1fbf8 100644 --- a/fpga/mqnic/VCU118/fpga_100g/fpga/Makefile +++ b/fpga/mqnic/VCU118/fpga_100g/fpga/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/VCU118/fpga_100g/tb/fpga_core/Makefile b/fpga/mqnic/VCU118/fpga_100g/tb/fpga_core/Makefile index e5dc3e054..7719841e5 100644 --- a/fpga/mqnic/VCU118/fpga_100g/tb/fpga_core/Makefile +++ b/fpga/mqnic/VCU118/fpga_100g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/VCU118/fpga_100g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/VCU118/fpga_100g/tb/fpga_core/test_fpga_core.py index 4f65b773d..d329a0d74 100644 --- a/fpga/mqnic/VCU118/fpga_100g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/VCU118/fpga_100g/tb/fpga_core/test_fpga_core.py @@ -440,6 +440,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -533,6 +588,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/VCU118/fpga_25g/fpga/Makefile b/fpga/mqnic/VCU118/fpga_25g/fpga/Makefile index d32c523ac..dc1d5f0a4 100644 --- a/fpga/mqnic/VCU118/fpga_25g/fpga/Makefile +++ b/fpga/mqnic/VCU118/fpga_25g/fpga/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/VCU118/fpga_25g/fpga_10g/Makefile b/fpga/mqnic/VCU118/fpga_25g/fpga_10g/Makefile index d32c523ac..dc1d5f0a4 100644 --- a/fpga/mqnic/VCU118/fpga_25g/fpga_10g/Makefile +++ b/fpga/mqnic/VCU118/fpga_25g/fpga_10g/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/VCU118/fpga_25g/tb/fpga_core/Makefile b/fpga/mqnic/VCU118/fpga_25g/tb/fpga_core/Makefile index 9ec70b7c7..a28ca5940 100644 --- a/fpga/mqnic/VCU118/fpga_25g/tb/fpga_core/Makefile +++ b/fpga/mqnic/VCU118/fpga_25g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/VCU118/fpga_25g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/VCU118/fpga_25g/tb/fpga_core/test_fpga_core.py index 19b2844df..053a393ce 100644 --- a/fpga/mqnic/VCU118/fpga_25g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/VCU118/fpga_25g/tb/fpga_core/test_fpga_core.py @@ -512,6 +512,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -585,6 +640,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/VCU1525/fpga_100g/fpga/Makefile b/fpga/mqnic/VCU1525/fpga_100g/fpga/Makefile index eba4ec17f..d1a8ca9cb 100644 --- a/fpga/mqnic/VCU1525/fpga_100g/fpga/Makefile +++ b/fpga/mqnic/VCU1525/fpga_100g/fpga/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/VCU1525/fpga_100g/tb/fpga_core/Makefile b/fpga/mqnic/VCU1525/fpga_100g/tb/fpga_core/Makefile index a050cea0e..e05ee2c28 100644 --- a/fpga/mqnic/VCU1525/fpga_100g/tb/fpga_core/Makefile +++ b/fpga/mqnic/VCU1525/fpga_100g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/VCU1525/fpga_100g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/VCU1525/fpga_100g/tb/fpga_core/test_fpga_core.py index f5eb07d0b..84681bc74 100644 --- a/fpga/mqnic/VCU1525/fpga_100g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/VCU1525/fpga_100g/tb/fpga_core/test_fpga_core.py @@ -434,6 +434,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -527,6 +582,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/VCU1525/fpga_25g/fpga/Makefile b/fpga/mqnic/VCU1525/fpga_25g/fpga/Makefile index f12c89e95..ed4f16234 100644 --- a/fpga/mqnic/VCU1525/fpga_25g/fpga/Makefile +++ b/fpga/mqnic/VCU1525/fpga_25g/fpga/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/VCU1525/fpga_25g/fpga_10g/Makefile b/fpga/mqnic/VCU1525/fpga_25g/fpga_10g/Makefile index f12c89e95..ed4f16234 100644 --- a/fpga/mqnic/VCU1525/fpga_25g/fpga_10g/Makefile +++ b/fpga/mqnic/VCU1525/fpga_25g/fpga_10g/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/VCU1525/fpga_25g/tb/fpga_core/Makefile b/fpga/mqnic/VCU1525/fpga_25g/tb/fpga_core/Makefile index b28ec4948..c6f43edd5 100644 --- a/fpga/mqnic/VCU1525/fpga_25g/tb/fpga_core/Makefile +++ b/fpga/mqnic/VCU1525/fpga_25g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/VCU1525/fpga_25g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/VCU1525/fpga_25g/tb/fpga_core/test_fpga_core.py index 11833def9..e2f4984e1 100644 --- a/fpga/mqnic/VCU1525/fpga_25g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/VCU1525/fpga_25g/tb/fpga_core/test_fpga_core.py @@ -506,6 +506,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -579,6 +634,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/XUPP3R/fpga_100g/fpga/Makefile b/fpga/mqnic/XUPP3R/fpga_100g/fpga/Makefile index 4c63545ec..67c2e9034 100644 --- a/fpga/mqnic/XUPP3R/fpga_100g/fpga/Makefile +++ b/fpga/mqnic/XUPP3R/fpga_100g/fpga/Makefile @@ -18,6 +18,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/XUPP3R/fpga_100g/tb/fpga_core/Makefile b/fpga/mqnic/XUPP3R/fpga_100g/tb/fpga_core/Makefile index ab6553a1d..4b34fce83 100644 --- a/fpga/mqnic/XUPP3R/fpga_100g/tb/fpga_core/Makefile +++ b/fpga/mqnic/XUPP3R/fpga_100g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/XUPP3R/fpga_100g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/XUPP3R/fpga_100g/tb/fpga_core/test_fpga_core.py index 4401169c9..c7b0a92ae 100644 --- a/fpga/mqnic/XUPP3R/fpga_100g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/XUPP3R/fpga_100g/tb/fpga_core/test_fpga_core.py @@ -502,6 +502,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -595,6 +650,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/XUPP3R/fpga_25g/fpga/Makefile b/fpga/mqnic/XUPP3R/fpga_25g/fpga/Makefile index b01c32d05..ae82be852 100644 --- a/fpga/mqnic/XUPP3R/fpga_25g/fpga/Makefile +++ b/fpga/mqnic/XUPP3R/fpga_25g/fpga/Makefile @@ -18,6 +18,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/XUPP3R/fpga_25g/fpga_10g/Makefile b/fpga/mqnic/XUPP3R/fpga_25g/fpga_10g/Makefile index b01c32d05..ae82be852 100644 --- a/fpga/mqnic/XUPP3R/fpga_25g/fpga_10g/Makefile +++ b/fpga/mqnic/XUPP3R/fpga_25g/fpga_10g/Makefile @@ -18,6 +18,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/XUPP3R/fpga_25g/tb/fpga_core/Makefile b/fpga/mqnic/XUPP3R/fpga_25g/tb/fpga_core/Makefile index eaf72193e..61679bf9f 100644 --- a/fpga/mqnic/XUPP3R/fpga_25g/tb/fpga_core/Makefile +++ b/fpga/mqnic/XUPP3R/fpga_25g/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/XUPP3R/fpga_25g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/XUPP3R/fpga_25g/tb/fpga_core/test_fpga_core.py index 8cda92ad8..b48016483 100644 --- a/fpga/mqnic/XUPP3R/fpga_25g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/XUPP3R/fpga_25g/tb/fpga_core/test_fpga_core.py @@ -646,6 +646,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -719,6 +774,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/ZCU106/fpga_pcie/fpga/Makefile b/fpga/mqnic/ZCU106/fpga_pcie/fpga/Makefile index 2d7e022d2..3070a592c 100644 --- a/fpga/mqnic/ZCU106/fpga_pcie/fpga/Makefile +++ b/fpga/mqnic/ZCU106/fpga_pcie/fpga/Makefile @@ -19,6 +19,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/ZCU106/fpga_pcie/tb/fpga_core/Makefile b/fpga/mqnic/ZCU106/fpga_pcie/tb/fpga_core/Makefile index cdb9fc509..3e1fcd683 100644 --- a/fpga/mqnic/ZCU106/fpga_pcie/tb/fpga_core/Makefile +++ b/fpga/mqnic/ZCU106/fpga_pcie/tb/fpga_core/Makefile @@ -49,6 +49,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/ZCU106/fpga_pcie/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/ZCU106/fpga_pcie/tb/fpga_core/test_fpga_core.py index e351f76cc..ee7e9b10c 100644 --- a/fpga/mqnic/ZCU106/fpga_pcie/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/ZCU106/fpga_pcie/tb/fpga_core/test_fpga_core.py @@ -413,6 +413,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -486,6 +541,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/ZCU106/fpga_zynqmp/fpga/Makefile b/fpga/mqnic/ZCU106/fpga_zynqmp/fpga/Makefile index 2bc35d2be..b5c2ba3a7 100644 --- a/fpga/mqnic/ZCU106/fpga_zynqmp/fpga/Makefile +++ b/fpga/mqnic/ZCU106/fpga_zynqmp/fpga/Makefile @@ -17,6 +17,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/ZCU106/fpga_zynqmp/tb/fpga_core/Makefile b/fpga/mqnic/ZCU106/fpga_zynqmp/tb/fpga_core/Makefile index 1debd617a..f31f965fa 100644 --- a/fpga/mqnic/ZCU106/fpga_zynqmp/tb/fpga_core/Makefile +++ b/fpga/mqnic/ZCU106/fpga_zynqmp/tb/fpga_core/Makefile @@ -48,6 +48,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/ZCU106/fpga_zynqmp/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/ZCU106/fpga_zynqmp/tb/fpga_core/test_fpga_core.py index 58b28e46e..556150f6d 100644 --- a/fpga/mqnic/ZCU106/fpga_zynqmp/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/ZCU106/fpga_zynqmp/tb/fpga_core/test_fpga_core.py @@ -223,6 +223,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -295,6 +350,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/fb2CG/fpga_100g/fpga/Makefile b/fpga/mqnic/fb2CG/fpga_100g/fpga/Makefile index 2b851f9b5..352b156a7 100644 --- a/fpga/mqnic/fb2CG/fpga_100g/fpga/Makefile +++ b/fpga/mqnic/fb2CG/fpga_100g/fpga/Makefile @@ -20,6 +20,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/fb2CG/fpga_100g/fpga_app_dma_bench/Makefile b/fpga/mqnic/fb2CG/fpga_100g/fpga_app_dma_bench/Makefile index 6ed937e5e..f64e6dda6 100644 --- a/fpga/mqnic/fb2CG/fpga_100g/fpga_app_dma_bench/Makefile +++ b/fpga/mqnic/fb2CG/fpga_100g/fpga_app_dma_bench/Makefile @@ -20,6 +20,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/fb2CG/fpga_100g/fpga_app_template/Makefile b/fpga/mqnic/fb2CG/fpga_100g/fpga_app_template/Makefile index 01fad4904..eb5e6f8a8 100644 --- a/fpga/mqnic/fb2CG/fpga_100g/fpga_app_template/Makefile +++ b/fpga/mqnic/fb2CG/fpga_100g/fpga_app_template/Makefile @@ -20,6 +20,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/fb2CG/fpga_100g/fpga_tdma/Makefile b/fpga/mqnic/fb2CG/fpga_100g/fpga_tdma/Makefile index 643fd951e..cf0be2fae 100644 --- a/fpga/mqnic/fb2CG/fpga_100g/fpga_tdma/Makefile +++ b/fpga/mqnic/fb2CG/fpga_100g/fpga_tdma/Makefile @@ -20,6 +20,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/fb2CG/fpga_100g/tb/fpga_core/Makefile b/fpga/mqnic/fb2CG/fpga_100g/tb/fpga_core/Makefile index e5c96a1fa..edc90e31f 100644 --- a/fpga/mqnic/fb2CG/fpga_100g/tb/fpga_core/Makefile +++ b/fpga/mqnic/fb2CG/fpga_100g/tb/fpga_core/Makefile @@ -50,6 +50,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/fb2CG/fpga_100g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/fb2CG/fpga_100g/tb/fpga_core/test_fpga_core.py index 469b9f50e..a65a0b574 100644 --- a/fpga/mqnic/fb2CG/fpga_100g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/fb2CG/fpga_100g/tb/fpga_core/test_fpga_core.py @@ -438,6 +438,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -532,6 +587,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/fpga/mqnic/fb2CG/fpga_25g/fpga/Makefile b/fpga/mqnic/fb2CG/fpga_25g/fpga/Makefile index 55e0f6a74..a01a3a798 100644 --- a/fpga/mqnic/fb2CG/fpga_25g/fpga/Makefile +++ b/fpga/mqnic/fb2CG/fpga_25g/fpga/Makefile @@ -20,6 +20,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/fb2CG/fpga_25g/fpga_10g/Makefile b/fpga/mqnic/fb2CG/fpga_25g/fpga_10g/Makefile index 55e0f6a74..a01a3a798 100644 --- a/fpga/mqnic/fb2CG/fpga_25g/fpga_10g/Makefile +++ b/fpga/mqnic/fb2CG/fpga_25g/fpga_10g/Makefile @@ -20,6 +20,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/fb2CG/fpga_25g/fpga_tdma/Makefile b/fpga/mqnic/fb2CG/fpga_25g/fpga_tdma/Makefile index ee7709d7b..f2d1cbf18 100644 --- a/fpga/mqnic/fb2CG/fpga_25g/fpga_tdma/Makefile +++ b/fpga/mqnic/fb2CG/fpga_25g/fpga_tdma/Makefile @@ -20,6 +20,7 @@ SYN_FILES += rtl/common/mqnic_egress.v SYN_FILES += rtl/common/mqnic_ingress.v SYN_FILES += rtl/common/mqnic_l2_egress.v SYN_FILES += rtl/common/mqnic_l2_ingress.v +SYN_FILES += rtl/common/mqnic_rx_queue_map.v SYN_FILES += rtl/common/mqnic_ptp.v SYN_FILES += rtl/common/mqnic_ptp_clock.v SYN_FILES += rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/fb2CG/fpga_25g/tb/fpga_core/Makefile b/fpga/mqnic/fb2CG/fpga_25g/tb/fpga_core/Makefile index 11cee53d4..6a13b341c 100644 --- a/fpga/mqnic/fb2CG/fpga_25g/tb/fpga_core/Makefile +++ b/fpga/mqnic/fb2CG/fpga_25g/tb/fpga_core/Makefile @@ -50,6 +50,7 @@ VERILOG_SOURCES += ../../rtl/common/mqnic_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_ingress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_egress.v VERILOG_SOURCES += ../../rtl/common/mqnic_l2_ingress.v +VERILOG_SOURCES += ../../rtl/common/mqnic_rx_queue_map.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_clock.v VERILOG_SOURCES += ../../rtl/common/mqnic_ptp_perout.v diff --git a/fpga/mqnic/fb2CG/fpga_25g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/fb2CG/fpga_25g/tb/fpga_core/test_fpga_core.py index 8ba896185..d16933435 100644 --- a/fpga/mqnic/fb2CG/fpga_25g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/fb2CG/fpga_25g/tb/fpga_core/test_fpga_core.py @@ -510,6 +510,61 @@ async def run_test_nic(dut): assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff assert Ether(pkt.data).build() == test_pkt.build() + tb.log.info("Queue mapping offset test") + + data = bytearray([x % 256 for x in range(1024)]) + + tb.loopback_enable = True + + for k in range(4): + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, k) + + await tb.driver.interfaces[0].start_xmit(data, 0) + + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + assert pkt.queue == k + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_offset(0, 0) + + tb.log.info("Queue mapping RSS mask test") + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0x00000003) + + tb.loopback_enable = True + + queues = set() + + for k in range(64): + payload = bytes([x % 256 for x in range(256)]) + eth = Ether(src='5A:51:52:53:54:55', dst='DA:D1:D2:D3:D4:D5') + ip = IP(src='192.168.1.100', dst='192.168.1.101') + udp = UDP(sport=1, dport=k+0) + test_pkt = eth / ip / udp / payload + + test_pkt2 = test_pkt.copy() + test_pkt2[UDP].chksum = scapy.utils.checksum(bytes(test_pkt2[UDP])) + + await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) + + for k in range(64): + pkt = await tb.driver.interfaces[0].recv() + + tb.log.info("Packet: %s", pkt) + assert pkt.rx_checksum == ~scapy.utils.checksum(bytes(pkt.data[14:])) & 0xffff + + queues.add(pkt.queue) + + assert len(queues) == 4 + + tb.loopback_enable = False + + await tb.driver.interfaces[0].set_rx_queue_map_rss_mask(0, 0) + tb.log.info("Multiple small packets") count = 64 @@ -584,6 +639,7 @@ def test_fpga_core(request): os.path.join(rtl_dir, "common", "mqnic_ingress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_egress.v"), os.path.join(rtl_dir, "common", "mqnic_l2_ingress.v"), + os.path.join(rtl_dir, "common", "mqnic_rx_queue_map.v"), os.path.join(rtl_dir, "common", "mqnic_ptp.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_clock.v"), os.path.join(rtl_dir, "common", "mqnic_ptp_perout.v"), diff --git a/lib/mqnic/mqnic.h b/lib/mqnic/mqnic.h index 05fa692f1..509ac01d8 100644 --- a/lib/mqnic/mqnic.h +++ b/lib/mqnic/mqnic.h @@ -94,6 +94,7 @@ struct mqnic_if { struct reg_block *tx_cpl_queue_rb; struct reg_block *rx_queue_rb; struct reg_block *rx_cpl_queue_rb; + struct reg_block *rx_queue_map_rb; uint32_t if_features; @@ -174,6 +175,11 @@ void mqnic_print_fw_id(struct mqnic *dev); // mqnic_if.c struct mqnic_if *mqnic_if_open(struct mqnic *dev, int index, volatile uint8_t *regs); void mqnic_if_close(struct mqnic_if *interface); +uint32_t mqnic_interface_get_tx_mtu(struct mqnic_if *interface); +uint32_t mqnic_interface_get_rx_mtu(struct mqnic_if *interface); +uint32_t mqnic_interface_get_rx_queue_map_offset(struct mqnic_if *interface, int port); +uint32_t mqnic_interface_get_rx_queue_map_rss_mask(struct mqnic_if *interface, int port); +uint32_t mqnic_interface_get_rx_queue_map_app_mask(struct mqnic_if *interface, int port); // mqnic_sched_block.c struct mqnic_sched_block *mqnic_sched_block_open(struct mqnic_if *interface, int index, struct reg_block *block_rb); diff --git a/lib/mqnic/mqnic_if.c b/lib/mqnic/mqnic_if.c index 76b31d20b..53f415b78 100644 --- a/lib/mqnic/mqnic_if.c +++ b/lib/mqnic/mqnic_if.c @@ -155,6 +155,14 @@ struct mqnic_if *mqnic_if_open(struct mqnic *dev, int index, volatile uint8_t *r if (interface->rx_cpl_queue_count > MQNIC_MAX_RX_CPL_RINGS) interface->rx_cpl_queue_count = MQNIC_MAX_RX_CPL_RINGS; + interface->rx_queue_map_rb = find_reg_block(interface->rb_list, MQNIC_RB_RX_QUEUE_MAP_TYPE, MQNIC_RB_RX_QUEUE_MAP_VER, 0); + + if (!interface->rx_queue_map_rb) + { + fprintf(stderr, "Error: RX queue map block not found\n"); + goto fail; + } + for (int k = 0; k < interface->sched_block_count; k++) { struct reg_block *sched_block_rb = find_reg_block(interface->rb_list, MQNIC_RB_SCHED_BLOCK_TYPE, MQNIC_RB_SCHED_BLOCK_VER, k); @@ -197,3 +205,31 @@ void mqnic_if_close(struct mqnic_if *interface) free(interface); } + +uint32_t mqnic_interface_get_tx_mtu(struct mqnic_if *interface) +{ + return mqnic_reg_read32(interface->if_ctrl_rb->regs, MQNIC_RB_IF_CTRL_REG_TX_MTU); +} + +uint32_t mqnic_interface_get_rx_mtu(struct mqnic_if *interface) +{ + return mqnic_reg_read32(interface->if_ctrl_rb->regs, MQNIC_RB_IF_CTRL_REG_RX_MTU); +} + +uint32_t mqnic_interface_get_rx_queue_map_offset(struct mqnic_if *interface, int port) +{ + return mqnic_reg_read32(interface->rx_queue_map_rb->regs, MQNIC_RB_RX_QUEUE_MAP_CH_OFFSET + + MQNIC_RB_RX_QUEUE_MAP_CH_STRIDE*port + MQNIC_RB_RX_QUEUE_MAP_CH_REG_OFFSET); +} + +uint32_t mqnic_interface_get_rx_queue_map_rss_mask(struct mqnic_if *interface, int port) +{ + return mqnic_reg_read32(interface->rx_queue_map_rb->regs, MQNIC_RB_RX_QUEUE_MAP_CH_OFFSET + + MQNIC_RB_RX_QUEUE_MAP_CH_STRIDE*port + MQNIC_RB_RX_QUEUE_MAP_CH_REG_RSS_MASK); +} + +uint32_t mqnic_interface_get_rx_queue_map_app_mask(struct mqnic_if *interface, int port) +{ + return mqnic_reg_read32(interface->rx_queue_map_rb->regs, MQNIC_RB_RX_QUEUE_MAP_CH_OFFSET + + MQNIC_RB_RX_QUEUE_MAP_CH_STRIDE*port + MQNIC_RB_RX_QUEUE_MAP_CH_REG_APP_MASK); +} diff --git a/modules/mqnic/mqnic.h b/modules/mqnic/mqnic.h index 50bbebb09..bf9738d67 100644 --- a/modules/mqnic/mqnic.h +++ b/modules/mqnic/mqnic.h @@ -365,6 +365,7 @@ struct mqnic_if { struct reg_block *tx_cpl_queue_rb; struct reg_block *rx_queue_rb; struct reg_block *rx_cpl_queue_rb; + struct reg_block *rx_queue_map_rb; int index; @@ -477,12 +478,16 @@ extern const struct file_operations mqnic_fops; int mqnic_create_interface(struct mqnic_dev *mdev, struct mqnic_if **interface_ptr, int index, u8 __iomem *hw_addr); void mqnic_destroy_interface(struct mqnic_if **interface_ptr); -u32 mqnic_interface_get_rss_mask(struct mqnic_if *interface); -void mqnic_interface_set_rss_mask(struct mqnic_if *interface, u32 rss_mask); u32 mqnic_interface_get_tx_mtu(struct mqnic_if *interface); void mqnic_interface_set_tx_mtu(struct mqnic_if *interface, u32 mtu); u32 mqnic_interface_get_rx_mtu(struct mqnic_if *interface); void mqnic_interface_set_rx_mtu(struct mqnic_if *interface, u32 mtu); +u32 mqnic_interface_get_rx_queue_map_offset(struct mqnic_if *interface, int port); +void mqnic_interface_set_rx_queue_map_offset(struct mqnic_if *interface, int port, u32 val); +u32 mqnic_interface_get_rx_queue_map_rss_mask(struct mqnic_if *interface, int port); +void mqnic_interface_set_rx_queue_map_rss_mask(struct mqnic_if *interface, int port, u32 val); +u32 mqnic_interface_get_rx_queue_map_app_mask(struct mqnic_if *interface, int port); +void mqnic_interface_set_rx_queue_map_app_mask(struct mqnic_if *interface, int port, u32 val); // mqnic_netdev.c void mqnic_update_stats(struct net_device *ndev); diff --git a/modules/mqnic/mqnic_hw.h b/modules/mqnic/mqnic_hw.h index 85e90dd66..e789608b4 100644 --- a/modules/mqnic/mqnic_hw.h +++ b/modules/mqnic/mqnic_hw.h @@ -187,7 +187,7 @@ #define MQNIC_RB_IF_REG_CSR_OFFSET 0x18 #define MQNIC_RB_IF_CTRL_TYPE 0x0000C001 -#define MQNIC_RB_IF_CTRL_VER 0x00000300 +#define MQNIC_RB_IF_CTRL_VER 0x00000400 #define MQNIC_RB_IF_CTRL_REG_FEATURES 0x0C #define MQNIC_RB_IF_CTRL_REG_PORT_COUNT 0x10 #define MQNIC_RB_IF_CTRL_REG_SCHED_COUNT 0x14 @@ -195,7 +195,6 @@ #define MQNIC_RB_IF_CTRL_REG_MAX_RX_MTU 0x24 #define MQNIC_RB_IF_CTRL_REG_TX_MTU 0x28 #define MQNIC_RB_IF_CTRL_REG_RX_MTU 0x2C -#define MQNIC_RB_IF_CTRL_REG_RSS_MASK 0x30 #define MQNIC_IF_FEATURE_RSS (1 << 0) #define MQNIC_IF_FEATURE_PTP_TS (1 << 4) @@ -203,6 +202,15 @@ #define MQNIC_IF_FEATURE_RX_CSUM (1 << 9) #define MQNIC_IF_FEATURE_RX_HASH (1 << 10) +#define MQNIC_RB_RX_QUEUE_MAP_TYPE 0x0000C090 +#define MQNIC_RB_RX_QUEUE_MAP_VER 0x00000100 +#define MQNIC_RB_RX_QUEUE_MAP_REG_PORTS 0x0C +#define MQNIC_RB_RX_QUEUE_MAP_CH_OFFSET 0x10 +#define MQNIC_RB_RX_QUEUE_MAP_CH_STRIDE 0x10 +#define MQNIC_RB_RX_QUEUE_MAP_CH_REG_OFFSET 0x00 +#define MQNIC_RB_RX_QUEUE_MAP_CH_REG_RSS_MASK 0x04 +#define MQNIC_RB_RX_QUEUE_MAP_CH_REG_APP_MASK 0x08 + #define MQNIC_RB_EVENT_QM_TYPE 0x0000C010 #define MQNIC_RB_EVENT_QM_VER 0x00000100 #define MQNIC_RB_EVENT_QM_REG_OFFSET 0x0C diff --git a/modules/mqnic/mqnic_if.c b/modules/mqnic/mqnic_if.c index 197f46965..1734696eb 100644 --- a/modules/mqnic/mqnic_if.c +++ b/modules/mqnic/mqnic_if.c @@ -183,6 +183,20 @@ int mqnic_create_interface(struct mqnic_dev *mdev, struct mqnic_if **interface_p interface->rx_cpl_queue_count = min_t(u32, interface->rx_cpl_queue_count, MQNIC_MAX_RX_CPL_RINGS); + interface->rx_queue_map_rb = find_reg_block(interface->rb_list, MQNIC_RB_RX_QUEUE_MAP_TYPE, MQNIC_RB_RX_QUEUE_MAP_VER, 0); + + if (!interface->rx_queue_map_rb) { + ret = -EIO; + dev_err(dev, "RX queue map block not found"); + goto fail; + } + + for (k = 0; k < interface->port_count; k++) { + mqnic_interface_set_rx_queue_map_offset(interface, k, 0); + mqnic_interface_set_rx_queue_map_rss_mask(interface, k, 0); + mqnic_interface_set_rx_queue_map_app_mask(interface, k, 0); + } + // determine desc block size iowrite32(0xf << 8, hw_addr + interface->tx_queue_offset + MQNIC_QUEUE_ACTIVE_LOG_SIZE_REG); interface->max_desc_block_size = 1 << ((ioread32(hw_addr + interface->tx_queue_offset + MQNIC_QUEUE_ACTIVE_LOG_SIZE_REG) >> 8) & 0xf); @@ -315,16 +329,6 @@ void mqnic_destroy_interface(struct mqnic_if **interface_ptr) kfree(interface); } -u32 mqnic_interface_get_rss_mask(struct mqnic_if *interface) -{ - return ioread32(interface->if_ctrl_rb->regs + MQNIC_RB_IF_CTRL_REG_RSS_MASK); -} - -void mqnic_interface_set_rss_mask(struct mqnic_if *interface, u32 rss_mask) -{ - iowrite32(rss_mask, interface->if_ctrl_rb->regs + MQNIC_RB_IF_CTRL_REG_RSS_MASK); -} - u32 mqnic_interface_get_tx_mtu(struct mqnic_if *interface) { return ioread32(interface->if_ctrl_rb->regs + MQNIC_RB_IF_CTRL_REG_TX_MTU); @@ -344,3 +348,39 @@ void mqnic_interface_set_rx_mtu(struct mqnic_if *interface, u32 mtu) { iowrite32(mtu, interface->if_ctrl_rb->regs + MQNIC_RB_IF_CTRL_REG_RX_MTU); } + +u32 mqnic_interface_get_rx_queue_map_offset(struct mqnic_if *interface, int port) +{ + return ioread32(interface->rx_queue_map_rb->regs + MQNIC_RB_RX_QUEUE_MAP_CH_OFFSET + + MQNIC_RB_RX_QUEUE_MAP_CH_STRIDE*port + MQNIC_RB_RX_QUEUE_MAP_CH_REG_OFFSET); +} + +void mqnic_interface_set_rx_queue_map_offset(struct mqnic_if *interface, int port, u32 val) +{ + iowrite32(val, interface->rx_queue_map_rb->regs + MQNIC_RB_RX_QUEUE_MAP_CH_OFFSET + + MQNIC_RB_RX_QUEUE_MAP_CH_STRIDE*port + MQNIC_RB_RX_QUEUE_MAP_CH_REG_OFFSET); +} + +u32 mqnic_interface_get_rx_queue_map_rss_mask(struct mqnic_if *interface, int port) +{ + return ioread32(interface->rx_queue_map_rb->regs + MQNIC_RB_RX_QUEUE_MAP_CH_OFFSET + + MQNIC_RB_RX_QUEUE_MAP_CH_STRIDE*port + MQNIC_RB_RX_QUEUE_MAP_CH_REG_RSS_MASK); +} + +void mqnic_interface_set_rx_queue_map_rss_mask(struct mqnic_if *interface, int port, u32 val) +{ + iowrite32(val, interface->rx_queue_map_rb->regs + MQNIC_RB_RX_QUEUE_MAP_CH_OFFSET + + MQNIC_RB_RX_QUEUE_MAP_CH_STRIDE*port + MQNIC_RB_RX_QUEUE_MAP_CH_REG_RSS_MASK); +} + +u32 mqnic_interface_get_rx_queue_map_app_mask(struct mqnic_if *interface, int port) +{ + return ioread32(interface->rx_queue_map_rb->regs + MQNIC_RB_RX_QUEUE_MAP_CH_OFFSET + + MQNIC_RB_RX_QUEUE_MAP_CH_STRIDE*port + MQNIC_RB_RX_QUEUE_MAP_CH_REG_APP_MASK); +} + +void mqnic_interface_set_rx_queue_map_app_mask(struct mqnic_if *interface, int port, u32 val) +{ + iowrite32(val, interface->rx_queue_map_rb->regs + MQNIC_RB_RX_QUEUE_MAP_CH_OFFSET + + MQNIC_RB_RX_QUEUE_MAP_CH_STRIDE*port + MQNIC_RB_RX_QUEUE_MAP_CH_REG_APP_MASK); +} diff --git a/modules/mqnic/mqnic_netdev.c b/modules/mqnic/mqnic_netdev.c index d8460cf0c..b7e72688c 100644 --- a/modules/mqnic/mqnic_netdev.c +++ b/modules/mqnic/mqnic_netdev.c @@ -89,7 +89,7 @@ static int mqnic_start_port(struct net_device *ndev) mqnic_interface_set_rx_mtu(priv->interface, ndev->mtu + ETH_HLEN); // configure RSS - mqnic_interface_set_rss_mask(priv->interface, 0xffffffff); + mqnic_interface_set_rx_queue_map_rss_mask(priv->interface, 0, rounddown_pow_of_two(priv->rx_queue_count)-1); // enable first scheduler mqnic_activate_sched_block(priv->sched_block[0]); diff --git a/utils/mqnic-dump.c b/utils/mqnic-dump.c index 41429e530..c7459c8a8 100644 --- a/utils/mqnic-dump.c +++ b/utils/mqnic-dump.c @@ -183,9 +183,8 @@ int main(int argc, char *argv[]) printf("Scheduler block count: %d\n", dev_interface->sched_block_count); printf("Max TX MTU: %d\n", dev_interface->max_tx_mtu); printf("Max RX MTU: %d\n", dev_interface->max_rx_mtu); - printf("TX MTU: %d\n", mqnic_reg_read32(dev_interface->if_ctrl_rb->regs, MQNIC_RB_IF_CTRL_REG_TX_MTU)); - printf("RX MTU: %d\n", mqnic_reg_read32(dev_interface->if_ctrl_rb->regs, MQNIC_RB_IF_CTRL_REG_RX_MTU)); - printf("RSS mask: 0x%08x\n", mqnic_reg_read32(dev_interface->if_ctrl_rb->regs, MQNIC_RB_IF_CTRL_REG_RSS_MASK)); + printf("TX MTU: %d\n", mqnic_interface_get_tx_mtu(dev_interface)); + printf("RX MTU: %d\n", mqnic_interface_get_rx_mtu(dev_interface)); printf("Event queue offset: 0x%08x\n", dev_interface->event_queue_offset); printf("Event queue count: %d\n", dev_interface->event_queue_count); @@ -207,6 +206,13 @@ int main(int argc, char *argv[]) printf("RX completion queue count: %d\n", dev_interface->rx_cpl_queue_count); printf("RX completion queue stride: 0x%08x\n", dev_interface->rx_cpl_queue_stride); + for (int k = 0; k < dev_interface->port_count; k++) + { + printf("Port %d RX queue map offset: %d\n", k, mqnic_interface_get_rx_queue_map_offset(dev_interface, k)); + printf("Port %d RX queue map RSS mask: 0x%08x\n", k, mqnic_interface_get_rx_queue_map_rss_mask(dev_interface, k)); + printf("Port %d RX queue map app mask: 0x%08x\n", k, mqnic_interface_get_rx_queue_map_app_mask(dev_interface, k)); + } + if (port < 0 || port >= dev_interface->port_count) { fprintf(stderr, "Port out of range\n");