diff --git a/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py b/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py index b4dc9e2fb..6668724fe 100644 --- a/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py +++ b/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py @@ -388,6 +388,81 @@ async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count): assert status & 0x300 == 0 +async def dma_cpl_buf_test(tb, dev, addr, mask, size, stride, count, stall): + dev_pf0_bar0 = dev.bar_window[0] + + rd_req = await dev_pf0_bar0.read_dword(0x000020) + rd_cpl = await dev_pf0_bar0.read_dword(0x000024) + + # configure operation (read) + # DMA base address + await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff) + # DMA offset address + await dev_pf0_bar0.write_dword(0x001088, 0) + await dev_pf0_bar0.write_dword(0x00108c, 0) + # DMA offset mask + await dev_pf0_bar0.write_dword(0x001090, mask) + await dev_pf0_bar0.write_dword(0x001094, 0) + # DMA stride + await dev_pf0_bar0.write_dword(0x001098, stride) + await dev_pf0_bar0.write_dword(0x00109c, 0) + # RAM base address + await dev_pf0_bar0.write_dword(0x0010c0, 0) + await dev_pf0_bar0.write_dword(0x0010c4, 0) + # RAM offset address + await dev_pf0_bar0.write_dword(0x0010c8, 0) + await dev_pf0_bar0.write_dword(0x0010cc, 0) + # RAM offset mask + await dev_pf0_bar0.write_dword(0x0010d0, mask) + await dev_pf0_bar0.write_dword(0x0010d4, 0) + # RAM stride + await dev_pf0_bar0.write_dword(0x0010d8, stride) + await dev_pf0_bar0.write_dword(0x0010dc, 0) + # clear cycle count + await dev_pf0_bar0.write_dword(0x001008, 0) + await dev_pf0_bar0.write_dword(0x00100c, 0) + # block length + await dev_pf0_bar0.write_dword(0x001010, size) + # block count + await dev_pf0_bar0.write_dword(0x001018, count) + await dev_pf0_bar0.write_dword(0x00101c, 0) + + if stall: + # stall RX + await dev_pf0_bar0.write_dword(0x000040, stall) + + # start + await dev_pf0_bar0.write_dword(0x001000, 1) + + # wait for stall + if stall: + for k in range(stall): + await RisingEdge(tb.dut.clk) + + for k in range(100): + await Timer(1000, 'ns') + run = await dev_pf0_bar0.read_dword(0x001000) + status = await dev_pf0_bar0.read_dword(0x000000) + if run == 0 and status & 0x300 == 0: + break + + if run != 0: + tb.log.warning("Operation timed out") + if status & 0x300 != 0: + tb.log.warning("DMA engine busy") + + cycles = await dev_pf0_bar0.read_dword(0x001008) + + rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req + rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl + + tb.log.info("read %d x %d B (total %d B %d CPLD, stride %d) in %d ns (%d req %d cpl) %d Mbps", + count, size, count*size, count*((size+15)//16), stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4)) + + assert status & 0x300 == 0 + + @cocotb.test() async def run_test(dut): @@ -503,6 +578,34 @@ async def run_test(dut): assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] + tb.log.info("Test RX completion buffer (CPLH, 8)") + + size = 8 + stride = size + for count in range(32, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + + tb.log.info("Test RX completion buffer (CPLH, 8+64)") + + size = 8+64 + stride = 0 + for count in range(8, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + + tb.log.info("Test RX completion buffer (CPLH, 8+128+8)") + + size = 8+128+8 + stride = 0 + for count in range(8, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + + tb.log.info("Test RX completion buffer (CPLD)") + + size = 512 + stride = size + for count in range(8, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 4000) + tb.log.info("Perform block reads") count = 100 diff --git a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py index 9e43266e5..92fa6e5d9 100644 --- a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py +++ b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py @@ -336,6 +336,81 @@ async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count): assert status & 0x300 == 0 +async def dma_cpl_buf_test(tb, dev, addr, mask, size, stride, count, stall): + dev_pf0_bar0 = dev.bar_window[0] + + rd_req = await dev_pf0_bar0.read_dword(0x000020) + rd_cpl = await dev_pf0_bar0.read_dword(0x000024) + + # configure operation (read) + # DMA base address + await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff) + # DMA offset address + await dev_pf0_bar0.write_dword(0x001088, 0) + await dev_pf0_bar0.write_dword(0x00108c, 0) + # DMA offset mask + await dev_pf0_bar0.write_dword(0x001090, mask) + await dev_pf0_bar0.write_dword(0x001094, 0) + # DMA stride + await dev_pf0_bar0.write_dword(0x001098, stride) + await dev_pf0_bar0.write_dword(0x00109c, 0) + # RAM base address + await dev_pf0_bar0.write_dword(0x0010c0, 0) + await dev_pf0_bar0.write_dword(0x0010c4, 0) + # RAM offset address + await dev_pf0_bar0.write_dword(0x0010c8, 0) + await dev_pf0_bar0.write_dword(0x0010cc, 0) + # RAM offset mask + await dev_pf0_bar0.write_dword(0x0010d0, mask) + await dev_pf0_bar0.write_dword(0x0010d4, 0) + # RAM stride + await dev_pf0_bar0.write_dword(0x0010d8, stride) + await dev_pf0_bar0.write_dword(0x0010dc, 0) + # clear cycle count + await dev_pf0_bar0.write_dword(0x001008, 0) + await dev_pf0_bar0.write_dword(0x00100c, 0) + # block length + await dev_pf0_bar0.write_dword(0x001010, size) + # block count + await dev_pf0_bar0.write_dword(0x001018, count) + await dev_pf0_bar0.write_dword(0x00101c, 0) + + if stall: + # stall RX + await dev_pf0_bar0.write_dword(0x000040, stall) + + # start + await dev_pf0_bar0.write_dword(0x001000, 1) + + # wait for stall + if stall: + for k in range(stall): + await RisingEdge(tb.dut.clk) + + for k in range(100): + await Timer(1000, 'ns') + run = await dev_pf0_bar0.read_dword(0x001000) + status = await dev_pf0_bar0.read_dword(0x000000) + if run == 0 and status & 0x300 == 0: + break + + if run != 0: + tb.log.warning("Operation timed out") + if status & 0x300 != 0: + tb.log.warning("DMA engine busy") + + cycles = await dev_pf0_bar0.read_dword(0x001008) + + rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req + rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl + + tb.log.info("read %d x %d B (total %d B %d CPLD, stride %d) in %d ns (%d req %d cpl) %d Mbps", + count, size, count*size, count*((size+15)//16), stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4)) + + assert status & 0x300 == 0 + + @cocotb.test() async def run_test(dut): @@ -451,6 +526,34 @@ async def run_test(dut): assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] + tb.log.info("Test RX completion buffer (CPLH, 8)") + + size = 8 + stride = size + for count in range(32, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + + tb.log.info("Test RX completion buffer (CPLH, 8+64)") + + size = 8+64 + stride = 0 + for count in range(8, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + + tb.log.info("Test RX completion buffer (CPLH, 8+128+8)") + + size = 8+128+8 + stride = 0 + for count in range(8, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + + tb.log.info("Test RX completion buffer (CPLD)") + + size = 512 + stride = size + for count in range(8, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 4000) + tb.log.info("Perform block reads") count = 100 diff --git a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py index 4f89a5d86..832d58508 100644 --- a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py +++ b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py @@ -429,6 +429,81 @@ async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count): assert status & 0x300 == 0 +async def dma_cpl_buf_test(tb, dev, addr, mask, size, stride, count, stall): + dev_pf0_bar0 = dev.bar_window[0] + + rd_req = await dev_pf0_bar0.read_dword(0x000020) + rd_cpl = await dev_pf0_bar0.read_dword(0x000024) + + # configure operation (read) + # DMA base address + await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff) + await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff) + # DMA offset address + await dev_pf0_bar0.write_dword(0x001088, 0) + await dev_pf0_bar0.write_dword(0x00108c, 0) + # DMA offset mask + await dev_pf0_bar0.write_dword(0x001090, mask) + await dev_pf0_bar0.write_dword(0x001094, 0) + # DMA stride + await dev_pf0_bar0.write_dword(0x001098, stride) + await dev_pf0_bar0.write_dword(0x00109c, 0) + # RAM base address + await dev_pf0_bar0.write_dword(0x0010c0, 0) + await dev_pf0_bar0.write_dword(0x0010c4, 0) + # RAM offset address + await dev_pf0_bar0.write_dword(0x0010c8, 0) + await dev_pf0_bar0.write_dword(0x0010cc, 0) + # RAM offset mask + await dev_pf0_bar0.write_dword(0x0010d0, mask) + await dev_pf0_bar0.write_dword(0x0010d4, 0) + # RAM stride + await dev_pf0_bar0.write_dword(0x0010d8, stride) + await dev_pf0_bar0.write_dword(0x0010dc, 0) + # clear cycle count + await dev_pf0_bar0.write_dword(0x001008, 0) + await dev_pf0_bar0.write_dword(0x00100c, 0) + # block length + await dev_pf0_bar0.write_dword(0x001010, size) + # block count + await dev_pf0_bar0.write_dword(0x001018, count) + await dev_pf0_bar0.write_dword(0x00101c, 0) + + if stall: + # stall RX + await dev_pf0_bar0.write_dword(0x000040, stall) + + # start + await dev_pf0_bar0.write_dword(0x001000, 1) + + # wait for stall + if stall: + for k in range(stall): + await RisingEdge(tb.dut.clk) + + for k in range(100): + await Timer(1000, 'ns') + run = await dev_pf0_bar0.read_dword(0x001000) + status = await dev_pf0_bar0.read_dword(0x000000) + if run == 0 and status & 0x300 == 0: + break + + if run != 0: + tb.log.warning("Operation timed out") + if status & 0x300 != 0: + tb.log.warning("DMA engine busy") + + cycles = await dev_pf0_bar0.read_dword(0x001008) + + rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req + rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl + + tb.log.info("read %d x %d B (total %d B %d CPLD, stride %d) in %d ns (%d req %d cpl) %d Mbps", + count, size, count*size, count*((size+15)//16), stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4)) + + assert status & 0x300 == 0 + + @cocotb.test() async def run_test(dut): @@ -544,6 +619,34 @@ async def run_test(dut): assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] + tb.log.info("Test RX completion buffer (CPLH, 8)") + + size = 8 + stride = size + for count in range(32, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + + tb.log.info("Test RX completion buffer (CPLH, 8+64)") + + size = 8+64 + stride = 0 + for count in range(8, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + + tb.log.info("Test RX completion buffer (CPLH, 8+128+8)") + + size = 8+128+8 + stride = 0 + for count in range(8, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000) + + tb.log.info("Test RX completion buffer (CPLD)") + + size = 512 + stride = size + for count in range(8, 256+1, 8): + await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 4000) + tb.log.info("Perform block reads") count = 100