diff --git a/example/ADM_PCIE_9V3/fpga/fpga/Makefile b/example/ADM_PCIE_9V3/fpga/fpga/Makefile index a056e3ea0..92aa8cc4d 100644 --- a/example/ADM_PCIE_9V3/fpga/fpga/Makefile +++ b/example/ADM_PCIE_9V3/fpga/fpga/Makefile @@ -28,6 +28,8 @@ SYN_FILES += lib/pcie/rtl/pcie_axi_master_wr.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux_bar.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux.v SYN_FILES += lib/pcie/rtl/pcie_tlp_mux.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_raw.v SYN_FILES += lib/pcie/rtl/dma_if_pcie.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_rd.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/ADM_PCIE_9V3/fpga/tb/fpga_core/Makefile b/example/ADM_PCIE_9V3/fpga/tb/fpga_core/Makefile index 7c85b6e54..f574543cc 100644 --- a/example/ADM_PCIE_9V3/fpga/tb/fpga_core/Makefile +++ b/example/ADM_PCIE_9V3/fpga/tb/fpga_core/Makefile @@ -48,6 +48,8 @@ VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_rd.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py b/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py index 55545a4ee..64609e5f0 100644 --- a/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py @@ -515,6 +515,8 @@ def test_fpga_core(request): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_rd.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_wr.v"), diff --git a/example/AU200/fpga/fpga/Makefile b/example/AU200/fpga/fpga/Makefile index 9e4dba59c..10f719ffb 100644 --- a/example/AU200/fpga/fpga/Makefile +++ b/example/AU200/fpga/fpga/Makefile @@ -28,6 +28,8 @@ SYN_FILES += lib/pcie/rtl/pcie_axi_master_wr.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux_bar.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux.v SYN_FILES += lib/pcie/rtl/pcie_tlp_mux.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_raw.v SYN_FILES += lib/pcie/rtl/dma_if_pcie.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_rd.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/AU200/fpga/tb/fpga_core/Makefile b/example/AU200/fpga/tb/fpga_core/Makefile index 7c85b6e54..f574543cc 100644 --- a/example/AU200/fpga/tb/fpga_core/Makefile +++ b/example/AU200/fpga/tb/fpga_core/Makefile @@ -48,6 +48,8 @@ VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_rd.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/AU200/fpga/tb/fpga_core/test_fpga_core.py b/example/AU200/fpga/tb/fpga_core/test_fpga_core.py index 466a5652c..441d72b2f 100644 --- a/example/AU200/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU200/fpga/tb/fpga_core/test_fpga_core.py @@ -517,6 +517,8 @@ def test_fpga_core(request): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_rd.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_wr.v"), diff --git a/example/AU250/fpga/fpga/Makefile b/example/AU250/fpga/fpga/Makefile index 2dced7d09..c906b57a9 100644 --- a/example/AU250/fpga/fpga/Makefile +++ b/example/AU250/fpga/fpga/Makefile @@ -28,6 +28,8 @@ SYN_FILES += lib/pcie/rtl/pcie_axi_master_wr.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux_bar.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux.v SYN_FILES += lib/pcie/rtl/pcie_tlp_mux.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_raw.v SYN_FILES += lib/pcie/rtl/dma_if_pcie.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_rd.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/AU250/fpga/tb/fpga_core/Makefile b/example/AU250/fpga/tb/fpga_core/Makefile index 7c85b6e54..f574543cc 100644 --- a/example/AU250/fpga/tb/fpga_core/Makefile +++ b/example/AU250/fpga/tb/fpga_core/Makefile @@ -48,6 +48,8 @@ VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_rd.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/AU250/fpga/tb/fpga_core/test_fpga_core.py b/example/AU250/fpga/tb/fpga_core/test_fpga_core.py index 466a5652c..441d72b2f 100644 --- a/example/AU250/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU250/fpga/tb/fpga_core/test_fpga_core.py @@ -517,6 +517,8 @@ def test_fpga_core(request): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_rd.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_wr.v"), diff --git a/example/AU280/fpga/fpga/Makefile b/example/AU280/fpga/fpga/Makefile index e3ff6807c..f39b517f3 100644 --- a/example/AU280/fpga/fpga/Makefile +++ b/example/AU280/fpga/fpga/Makefile @@ -27,6 +27,8 @@ SYN_FILES += lib/pcie/rtl/pcie_axi_master_wr.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux_bar.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux.v SYN_FILES += lib/pcie/rtl/pcie_tlp_mux.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_raw.v SYN_FILES += lib/pcie/rtl/dma_if_pcie.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_rd.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/AU280/fpga/tb/fpga_core/Makefile b/example/AU280/fpga/tb/fpga_core/Makefile index 7c85b6e54..f574543cc 100644 --- a/example/AU280/fpga/tb/fpga_core/Makefile +++ b/example/AU280/fpga/tb/fpga_core/Makefile @@ -48,6 +48,8 @@ VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_rd.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/AU280/fpga/tb/fpga_core/test_fpga_core.py b/example/AU280/fpga/tb/fpga_core/test_fpga_core.py index 55545a4ee..64609e5f0 100644 --- a/example/AU280/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU280/fpga/tb/fpga_core/test_fpga_core.py @@ -515,6 +515,8 @@ def test_fpga_core(request): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_rd.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_wr.v"), diff --git a/example/AU50/fpga/fpga/Makefile b/example/AU50/fpga/fpga/Makefile index 4534c3289..335efaa28 100644 --- a/example/AU50/fpga/fpga/Makefile +++ b/example/AU50/fpga/fpga/Makefile @@ -27,6 +27,8 @@ SYN_FILES += lib/pcie/rtl/pcie_axi_master_wr.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux_bar.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux.v SYN_FILES += lib/pcie/rtl/pcie_tlp_mux.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_raw.v SYN_FILES += lib/pcie/rtl/dma_if_pcie.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_rd.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/AU50/fpga/tb/fpga_core/Makefile b/example/AU50/fpga/tb/fpga_core/Makefile index 7c85b6e54..f574543cc 100644 --- a/example/AU50/fpga/tb/fpga_core/Makefile +++ b/example/AU50/fpga/tb/fpga_core/Makefile @@ -48,6 +48,8 @@ VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_rd.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/AU50/fpga/tb/fpga_core/test_fpga_core.py b/example/AU50/fpga/tb/fpga_core/test_fpga_core.py index 55545a4ee..64609e5f0 100644 --- a/example/AU50/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/AU50/fpga/tb/fpga_core/test_fpga_core.py @@ -515,6 +515,8 @@ def test_fpga_core(request): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_rd.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_wr.v"), diff --git a/example/ExaNIC_X10/fpga/fpga/Makefile b/example/ExaNIC_X10/fpga/fpga/Makefile index ee821b8fb..9ed732915 100644 --- a/example/ExaNIC_X10/fpga/fpga/Makefile +++ b/example/ExaNIC_X10/fpga/fpga/Makefile @@ -27,6 +27,8 @@ SYN_FILES += lib/pcie/rtl/pcie_axi_master_wr.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux_bar.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux.v SYN_FILES += lib/pcie/rtl/pcie_tlp_mux.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_raw.v SYN_FILES += lib/pcie/rtl/dma_if_pcie.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_rd.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/ExaNIC_X10/fpga/tb/fpga_core/Makefile b/example/ExaNIC_X10/fpga/tb/fpga_core/Makefile index 2cd4c6b21..ed02b0273 100644 --- a/example/ExaNIC_X10/fpga/tb/fpga_core/Makefile +++ b/example/ExaNIC_X10/fpga/tb/fpga_core/Makefile @@ -48,6 +48,8 @@ VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_rd.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py b/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py index c90e23488..473c5bf79 100644 --- a/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py @@ -489,6 +489,8 @@ def test_fpga_core(request): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_rd.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_wr.v"), diff --git a/example/ExaNIC_X25/fpga/fpga/Makefile b/example/ExaNIC_X25/fpga/fpga/Makefile index 557d12385..2ea4ba6e7 100644 --- a/example/ExaNIC_X25/fpga/fpga/Makefile +++ b/example/ExaNIC_X25/fpga/fpga/Makefile @@ -28,6 +28,8 @@ SYN_FILES += lib/pcie/rtl/pcie_axi_master_wr.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux_bar.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux.v SYN_FILES += lib/pcie/rtl/pcie_tlp_mux.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_raw.v SYN_FILES += lib/pcie/rtl/dma_if_pcie.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_rd.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/ExaNIC_X25/fpga/tb/fpga_core/Makefile b/example/ExaNIC_X25/fpga/tb/fpga_core/Makefile index 5f08ec54c..1c32eb9f8 100644 --- a/example/ExaNIC_X25/fpga/tb/fpga_core/Makefile +++ b/example/ExaNIC_X25/fpga/tb/fpga_core/Makefile @@ -48,6 +48,8 @@ VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_rd.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py b/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py index cbc4e68e3..02906b5c2 100644 --- a/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py @@ -515,6 +515,8 @@ def test_fpga_core(request): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_rd.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_wr.v"), diff --git a/example/VCU108/fpga/fpga/Makefile b/example/VCU108/fpga/fpga/Makefile index 44724d530..f7ced6923 100644 --- a/example/VCU108/fpga/fpga/Makefile +++ b/example/VCU108/fpga/fpga/Makefile @@ -28,6 +28,8 @@ SYN_FILES += lib/pcie/rtl/pcie_axi_master_wr.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux_bar.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux.v SYN_FILES += lib/pcie/rtl/pcie_tlp_mux.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_raw.v SYN_FILES += lib/pcie/rtl/dma_if_pcie.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_rd.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/VCU108/fpga/tb/fpga_core/Makefile b/example/VCU108/fpga/tb/fpga_core/Makefile index 2cd4c6b21..ed02b0273 100644 --- a/example/VCU108/fpga/tb/fpga_core/Makefile +++ b/example/VCU108/fpga/tb/fpga_core/Makefile @@ -48,6 +48,8 @@ VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_rd.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py b/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py index 8bb337027..ed8d99b9f 100644 --- a/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py @@ -496,6 +496,8 @@ def test_fpga_core(request): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_rd.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_wr.v"), diff --git a/example/VCU118/fpga/fpga/Makefile b/example/VCU118/fpga/fpga/Makefile index 66c56cfe8..d8e032deb 100644 --- a/example/VCU118/fpga/fpga/Makefile +++ b/example/VCU118/fpga/fpga/Makefile @@ -28,6 +28,8 @@ SYN_FILES += lib/pcie/rtl/pcie_axi_master_wr.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux_bar.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux.v SYN_FILES += lib/pcie/rtl/pcie_tlp_mux.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_raw.v SYN_FILES += lib/pcie/rtl/dma_if_pcie.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_rd.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/VCU118/fpga/tb/fpga_core/Makefile b/example/VCU118/fpga/tb/fpga_core/Makefile index 7c85b6e54..f574543cc 100644 --- a/example/VCU118/fpga/tb/fpga_core/Makefile +++ b/example/VCU118/fpga/tb/fpga_core/Makefile @@ -48,6 +48,8 @@ VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_rd.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py b/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py index 76148a5a5..f0b91d7bf 100644 --- a/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py @@ -522,6 +522,8 @@ def test_fpga_core(request): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_rd.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_wr.v"), diff --git a/example/VCU1525/fpga/fpga/Makefile b/example/VCU1525/fpga/fpga/Makefile index 270dd5c62..33fe133b4 100644 --- a/example/VCU1525/fpga/fpga/Makefile +++ b/example/VCU1525/fpga/fpga/Makefile @@ -28,6 +28,8 @@ SYN_FILES += lib/pcie/rtl/pcie_axi_master_wr.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux_bar.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux.v SYN_FILES += lib/pcie/rtl/pcie_tlp_mux.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_raw.v SYN_FILES += lib/pcie/rtl/dma_if_pcie.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_rd.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/VCU1525/fpga/tb/fpga_core/Makefile b/example/VCU1525/fpga/tb/fpga_core/Makefile index 7c85b6e54..f574543cc 100644 --- a/example/VCU1525/fpga/tb/fpga_core/Makefile +++ b/example/VCU1525/fpga/tb/fpga_core/Makefile @@ -48,6 +48,8 @@ VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_rd.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py b/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py index 466a5652c..441d72b2f 100644 --- a/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py @@ -517,6 +517,8 @@ def test_fpga_core(request): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_rd.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_wr.v"), diff --git a/example/ZCU106/fpga/fpga/Makefile b/example/ZCU106/fpga/fpga/Makefile index 1ff1c78e3..f2c415c83 100644 --- a/example/ZCU106/fpga/fpga/Makefile +++ b/example/ZCU106/fpga/fpga/Makefile @@ -28,6 +28,8 @@ SYN_FILES += lib/pcie/rtl/pcie_axi_master_wr.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux_bar.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux.v SYN_FILES += lib/pcie/rtl/pcie_tlp_mux.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_raw.v SYN_FILES += lib/pcie/rtl/dma_if_pcie.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_rd.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/ZCU106/fpga/tb/fpga_core/Makefile b/example/ZCU106/fpga/tb/fpga_core/Makefile index 98bf8cb63..7b9ddc987 100644 --- a/example/ZCU106/fpga/tb/fpga_core/Makefile +++ b/example/ZCU106/fpga/tb/fpga_core/Makefile @@ -48,6 +48,8 @@ VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_rd.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py b/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py index 48a5802be..1a79a2660 100644 --- a/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py @@ -522,6 +522,8 @@ def test_fpga_core(request): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_rd.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_wr.v"), diff --git a/example/common/tb/example_core_pcie_us/Makefile b/example/common/tb/example_core_pcie_us/Makefile index 53532e23a..37bd67a89 100644 --- a/example/common/tb/example_core_pcie_us/Makefile +++ b/example/common/tb/example_core_pcie_us/Makefile @@ -47,6 +47,8 @@ VERILOG_SOURCES += ../../../../rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../../../rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../../../rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../../../rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../../../rtl/pcie_tlp_fifo.v +VERILOG_SOURCES += ../../../../rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../../../rtl/dma_if_pcie.v VERILOG_SOURCES += ../../../../rtl/dma_if_pcie_rd.v VERILOG_SOURCES += ../../../../rtl/dma_if_pcie_wr.v diff --git a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py index a986610a9..9005c5105 100644 --- a/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py +++ b/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py @@ -532,6 +532,8 @@ def test_example_core_pcie_us(request, axis_pcie_data_width): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_rd.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_wr.v"), diff --git a/example/fb2CG/fpga/fpga/Makefile b/example/fb2CG/fpga/fpga/Makefile index 5502084f4..6cd242e5a 100644 --- a/example/fb2CG/fpga/fpga/Makefile +++ b/example/fb2CG/fpga/fpga/Makefile @@ -28,6 +28,8 @@ SYN_FILES += lib/pcie/rtl/pcie_axi_master_wr.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux_bar.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux.v SYN_FILES += lib/pcie/rtl/pcie_tlp_mux.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_raw.v SYN_FILES += lib/pcie/rtl/dma_if_pcie.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_rd.v SYN_FILES += lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/fb2CG/fpga/tb/fpga_core/Makefile b/example/fb2CG/fpga/tb/fpga_core/Makefile index 7c85b6e54..f574543cc 100644 --- a/example/fb2CG/fpga/tb/fpga_core/Makefile +++ b/example/fb2CG/fpga/tb/fpga_core/Makefile @@ -48,6 +48,8 @@ VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_rd.v VERILOG_SOURCES += ../../lib/pcie/rtl/dma_if_pcie_wr.v diff --git a/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py b/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py index 55545a4ee..64609e5f0 100644 --- a/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py @@ -515,6 +515,8 @@ def test_fpga_core(request): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_rd.v"), os.path.join(pcie_rtl_dir, "dma_if_pcie_wr.v"), diff --git a/rtl/pcie_us_if.v b/rtl/pcie_us_if.v index 1dd668335..62c429ef8 100644 --- a/rtl/pcie_us_if.v +++ b/rtl/pcie_us_if.v @@ -1,6 +1,6 @@ /* -Copyright (c) 2021 Alex Forencich +Copyright (c) 2021-2022 Alex Forencich Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -45,6 +45,14 @@ module pcie_us_if # parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183, // PCIe AXI stream CC tuser signal width parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81, + // RC interface TLP straddling + parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256, + // RQ interface TLP straddling + parameter RQ_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 512, + // CQ interface TLP straddling + parameter CQ_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 512, + // CC interface TLP straddling + parameter CC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 512, // RQ sequence number width parameter RQ_SEQ_NUM_WIDTH = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 4 : 6, // TLP data width @@ -295,6 +303,7 @@ pcie_us_if_rc #( .AXIS_PCIE_DATA_WIDTH(AXIS_PCIE_DATA_WIDTH), .AXIS_PCIE_KEEP_WIDTH(AXIS_PCIE_KEEP_WIDTH), .AXIS_PCIE_RC_USER_WIDTH(AXIS_PCIE_RC_USER_WIDTH), + .RC_STRADDLE(RC_STRADDLE), .TLP_DATA_WIDTH(TLP_DATA_WIDTH), .TLP_STRB_WIDTH(TLP_STRB_WIDTH), .TLP_HDR_WIDTH(TLP_HDR_WIDTH), @@ -332,6 +341,7 @@ pcie_us_if_rq #( .AXIS_PCIE_DATA_WIDTH(AXIS_PCIE_DATA_WIDTH), .AXIS_PCIE_KEEP_WIDTH(AXIS_PCIE_KEEP_WIDTH), .AXIS_PCIE_RQ_USER_WIDTH(AXIS_PCIE_RQ_USER_WIDTH), + .RQ_STRADDLE(RQ_STRADDLE), .RQ_SEQ_NUM_WIDTH(RQ_SEQ_NUM_WIDTH), .TLP_DATA_WIDTH(TLP_DATA_WIDTH), .TLP_STRB_WIDTH(TLP_STRB_WIDTH), @@ -402,6 +412,7 @@ pcie_us_if_cq #( .AXIS_PCIE_DATA_WIDTH(AXIS_PCIE_DATA_WIDTH), .AXIS_PCIE_KEEP_WIDTH(AXIS_PCIE_KEEP_WIDTH), .AXIS_PCIE_CQ_USER_WIDTH(AXIS_PCIE_CQ_USER_WIDTH), + .CQ_STRADDLE(CQ_STRADDLE), .TLP_DATA_WIDTH(TLP_DATA_WIDTH), .TLP_STRB_WIDTH(TLP_STRB_WIDTH), .TLP_HDR_WIDTH(TLP_HDR_WIDTH), @@ -440,6 +451,7 @@ pcie_us_if_cc #( .AXIS_PCIE_DATA_WIDTH(AXIS_PCIE_DATA_WIDTH), .AXIS_PCIE_KEEP_WIDTH(AXIS_PCIE_KEEP_WIDTH), .AXIS_PCIE_CC_USER_WIDTH(AXIS_PCIE_CC_USER_WIDTH), + .CC_STRADDLE(CC_STRADDLE), .TLP_DATA_WIDTH(TLP_DATA_WIDTH), .TLP_STRB_WIDTH(TLP_STRB_WIDTH), .TLP_HDR_WIDTH(TLP_HDR_WIDTH), diff --git a/rtl/pcie_us_if_cc.v b/rtl/pcie_us_if_cc.v index fbd5f30dd..3810de446 100644 --- a/rtl/pcie_us_if_cc.v +++ b/rtl/pcie_us_if_cc.v @@ -1,6 +1,6 @@ /* -Copyright (c) 2021 Alex Forencich +Copyright (c) 2021-2022 Alex Forencich Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -39,6 +39,8 @@ module pcie_us_if_cc # parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32), // PCIe AXI stream CC tuser signal width parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81, + // CC interface TLP straddling + parameter CC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 512, // TLP data width parameter TLP_DATA_WIDTH = AXIS_PCIE_DATA_WIDTH, // TLP strobe width @@ -77,7 +79,11 @@ module pcie_us_if_cc # parameter TLP_DATA_WIDTH_BYTES = TLP_DATA_WIDTH/8; parameter TLP_DATA_WIDTH_DWORDS = TLP_DATA_WIDTH/32; -parameter OUTPUT_FIFO_ADDR_WIDTH = 5; +parameter INT_TLP_SEG_COUNT = (CC_STRADDLE && AXIS_PCIE_DATA_WIDTH >= 512) ? 2 : 1; +parameter INT_TLP_SEG_DATA_WIDTH = TLP_DATA_WIDTH / INT_TLP_SEG_COUNT; +parameter INT_TLP_SEG_STRB_WIDTH = TLP_STRB_WIDTH / INT_TLP_SEG_COUNT; + +parameter SEG_SEL_WIDTH = $clog2(INT_TLP_SEG_COUNT); // bus width assertions initial begin @@ -103,11 +109,6 @@ initial begin end end - if (TLP_SEG_COUNT != 1) begin - $error("Error: TLP segment count must be 1 (instance %m)"); - $finish; - end - if (TLP_DATA_WIDTH != AXIS_PCIE_DATA_WIDTH) begin $error("Error: Interface widths must match (instance %m)"); $finish; @@ -132,284 +133,42 @@ localparam [2:0] CPL_STATUS_CRS = 3'b010, // configuration request retry status CPL_STATUS_CA = 3'b100; // completer abort -reg tx_cpl_tlp_ready_cmb; +reg frame_reg = 1'b0, frame_next, frame_cyc; +reg tlp_hdr1_reg = 1'b0, tlp_hdr1_next, tlp_hdr1_cyc; +reg tlp_hdr2_reg = 1'b0, tlp_hdr2_next, tlp_hdr2_cyc; +reg tlp_split1_reg = 1'b0, tlp_split1_next, tlp_split1_cyc; +reg tlp_split2_reg = 1'b0, tlp_split2_next, tlp_split2_cyc; +reg [SEG_SEL_WIDTH-1:0] seg_offset_cyc; +reg [SEG_SEL_WIDTH+1-1:0] seg_count_cyc; +reg valid, sop, eop; +reg frame, abort; -assign tx_cpl_tlp_ready = tx_cpl_tlp_ready_cmb; +reg [INT_TLP_SEG_COUNT-1:0] out_sel, out_sel_cyc; +reg [INT_TLP_SEG_COUNT-1:0] out_sop; +reg [INT_TLP_SEG_COUNT-1:0] out_eop; +reg [INT_TLP_SEG_COUNT-1:0] out_tlp_hdr1; +reg [INT_TLP_SEG_COUNT-1:0] out_tlp_hdr2; +reg [INT_TLP_SEG_COUNT-1:0] out_tlp_split1; +reg [INT_TLP_SEG_COUNT-1:0] out_tlp_split2; +reg [SEG_SEL_WIDTH+1-1:0] out_sel_seg[0:INT_TLP_SEG_COUNT-1]; -// process outgoing TLPs -localparam [1:0] - TLP_OUTPUT_STATE_IDLE = 2'd0, - TLP_OUTPUT_STATE_HEADER = 2'd1, - TLP_OUTPUT_STATE_PAYLOAD = 2'd2; +reg [TLP_DATA_WIDTH-1:0] out_tlp_data; +reg [TLP_STRB_WIDTH-1:0] out_tlp_strb; +reg [INT_TLP_SEG_COUNT-1:0] out_tlp_valid; +reg [INT_TLP_SEG_COUNT-1:0] out_tlp_sop; +reg [INT_TLP_SEG_COUNT-1:0] out_tlp_eop; +reg [95:0] out_shift_tlp_data_reg = 0, out_shift_tlp_data_next; +reg [2:0] out_shift_tlp_strb_reg = 0, out_shift_tlp_strb_next; -reg [1:0] tlp_output_state_reg = TLP_OUTPUT_STATE_IDLE, tlp_output_state_next; +reg [127:0] seg_tlp_hdr; +reg [95:0] seg_cc_hdr; +reg [INT_TLP_SEG_COUNT*3-1:0] eop_index; -reg [TLP_DATA_WIDTH-1:0] out_tlp_data_reg = 0, out_tlp_data_next; -reg [TLP_STRB_WIDTH-1:0] out_tlp_strb_reg = 0, out_tlp_strb_next; -reg [TLP_SEG_COUNT-1:0] out_tlp_eop_reg = 0, out_tlp_eop_next; - -reg [2:0] tx_cpl_tlp_hdr_fmt; -reg [4:0] tx_cpl_tlp_hdr_type; -reg [2:0] tx_cpl_tlp_hdr_tc; -reg tx_cpl_tlp_hdr_ln; -reg tx_cpl_tlp_hdr_th; -reg tx_cpl_tlp_hdr_td; -reg tx_cpl_tlp_hdr_ep; -reg [2:0] tx_cpl_tlp_hdr_attr; -reg [1:0] tx_cpl_tlp_hdr_at; -reg [9:0] tx_cpl_tlp_hdr_length; -reg [15:0] tx_cpl_tlp_hdr_completer_id; -reg [2:0] tx_cpl_tlp_hdr_cpl_status; -reg tx_cpl_tlp_hdr_bcm; -reg [11:0] tx_cpl_tlp_hdr_byte_count; -reg [15:0] tx_cpl_tlp_hdr_requester_id; -reg [9:0] tx_cpl_tlp_hdr_tag; -reg [6:0] tx_cpl_tlp_hdr_lower_addr; - -reg [95:0] tlp_header_data; -reg [AXIS_PCIE_CC_USER_WIDTH-1:0] tlp_tuser; - -reg [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_cc_tdata_int = 0; -reg [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_cc_tkeep_int = 0; -reg m_axis_cc_tvalid_int = 0; -wire m_axis_cc_tready_int; -reg m_axis_cc_tlast_int = 0; -reg [AXIS_PCIE_CC_USER_WIDTH-1:0] m_axis_cc_tuser_int = 0; - -always @* begin - tlp_output_state_next = TLP_OUTPUT_STATE_IDLE; - - out_tlp_data_next = out_tlp_data_reg; - out_tlp_strb_next = out_tlp_strb_reg; - out_tlp_eop_next = out_tlp_eop_reg; - - tx_cpl_tlp_ready_cmb = 1'b0; - - // TLP header parsing - // DW 0 - tx_cpl_tlp_hdr_fmt = tx_cpl_tlp_hdr[127:125]; // fmt - tx_cpl_tlp_hdr_type = tx_cpl_tlp_hdr[124:120]; // type - tx_cpl_tlp_hdr_tag[9] = tx_cpl_tlp_hdr[119]; // T9 - tx_cpl_tlp_hdr_tc = tx_cpl_tlp_hdr[118:116]; // TC - tx_cpl_tlp_hdr_tag[8] = tx_cpl_tlp_hdr[115]; // T8 - tx_cpl_tlp_hdr_attr[2] = tx_cpl_tlp_hdr[114]; // attr - tx_cpl_tlp_hdr_ln = tx_cpl_tlp_hdr[113]; // LN - tx_cpl_tlp_hdr_th = tx_cpl_tlp_hdr[112]; // TH - tx_cpl_tlp_hdr_td = tx_cpl_tlp_hdr[111]; // TD - tx_cpl_tlp_hdr_ep = tx_cpl_tlp_hdr[110]; // EP - tx_cpl_tlp_hdr_attr[1:0] = tx_cpl_tlp_hdr[109:108]; // attr - tx_cpl_tlp_hdr_at = tx_cpl_tlp_hdr[107:106]; // AT - tx_cpl_tlp_hdr_length = tx_cpl_tlp_hdr[105:96]; // length - // DW 1 - tx_cpl_tlp_hdr_completer_id = tx_cpl_tlp_hdr[95:80]; // completer ID - tx_cpl_tlp_hdr_cpl_status = tx_cpl_tlp_hdr[79:77]; // completion status - tx_cpl_tlp_hdr_bcm = tx_cpl_tlp_hdr[76]; // BCM - tx_cpl_tlp_hdr_byte_count = tx_cpl_tlp_hdr[75:64]; // byte count - // DW 2 - tx_cpl_tlp_hdr_requester_id = tx_cpl_tlp_hdr[63:48]; // requester ID - tx_cpl_tlp_hdr_tag[7:0] = tx_cpl_tlp_hdr[47:40]; // tag - tx_cpl_tlp_hdr_lower_addr = tx_cpl_tlp_hdr[38:32]; // lower address - - tlp_header_data[6:0] = tx_cpl_tlp_hdr_lower_addr; // lower address - tlp_header_data[7] = 1'b0; - tlp_header_data[9:8] = tx_cpl_tlp_hdr_at; // AT - tlp_header_data[15:10] = 6'd0; - tlp_header_data[28:16] = tx_cpl_tlp_hdr_byte_count; // Byte count - tlp_header_data[29] = 1'b0; // locked read completion - tlp_header_data[31:30] = 2'd0; - tlp_header_data[42:32] = tx_cpl_tlp_hdr_length; // DWORD count - tlp_header_data[45:43] = tx_cpl_tlp_hdr_cpl_status; // completion status - tlp_header_data[46] = tx_cpl_tlp_hdr_ep; // poisoned - tlp_header_data[47] = 1'b0; - tlp_header_data[63:48] = tx_cpl_tlp_hdr_requester_id; // requester ID - tlp_header_data[71:64] = tx_cpl_tlp_hdr_tag; // tag - tlp_header_data[87:72] = tx_cpl_tlp_hdr_completer_id; // completer ID - tlp_header_data[88] = 1'b0; // completer ID enable - tlp_header_data[91:89] = tx_cpl_tlp_hdr_tc; // TC - tlp_header_data[94:92] = tx_cpl_tlp_hdr_attr; // attr - tlp_header_data[95] = 1'b0; // force ECRC - - if (AXIS_PCIE_DATA_WIDTH == 512) begin - tlp_tuser[1:0] = 2'b01; // is_sop - tlp_tuser[3:2] = 2'd0; // is_sop0_ptr - tlp_tuser[5:4] = 2'd0; // is_sop1_ptr - tlp_tuser[7:6] = 2'b01; // is_eop - tlp_tuser[11:8] = 4'd3; // is_eop0_ptr - tlp_tuser[15:12] = 4'd0; // is_eop1_ptr - tlp_tuser[16] = 1'b0; // discontinue - tlp_tuser[80:17] = 64'd0; // parity - end else begin - tlp_tuser[0] = 1'b0; // discontinue - tlp_tuser[32:1] = 32'd0; // parity - end - - // TLP output - m_axis_cc_tdata_int = 0; - m_axis_cc_tkeep_int = 0; - m_axis_cc_tvalid_int = 1'b0; - m_axis_cc_tlast_int = 1'b0; - m_axis_cc_tuser_int = 0; - - // combine header and payload, merge in read request TLPs - case (tlp_output_state_reg) - TLP_OUTPUT_STATE_IDLE: begin - // idle state - - if (tx_cpl_tlp_valid && m_axis_cc_tready_int) begin - if (AXIS_PCIE_DATA_WIDTH == 64) begin - // 64 bit interface, send first half of header - m_axis_cc_tdata_int = tlp_header_data[63:0]; - m_axis_cc_tkeep_int = 2'b11; - m_axis_cc_tvalid_int = 1'b1; - m_axis_cc_tlast_int = 1'b0; - m_axis_cc_tuser_int = tlp_tuser; - - tlp_output_state_next = TLP_OUTPUT_STATE_HEADER; - end else begin - // wider interface, send header and start of payload - m_axis_cc_tdata_int = {tx_cpl_tlp_data, tlp_header_data}; - m_axis_cc_tkeep_int = {tx_cpl_tlp_strb, 3'b111}; - m_axis_cc_tvalid_int = 1'b1; - m_axis_cc_tlast_int = 1'b0; - m_axis_cc_tuser_int = tlp_tuser; - - tx_cpl_tlp_ready_cmb = 1'b1; - - out_tlp_data_next = tx_cpl_tlp_data; - out_tlp_strb_next = tx_cpl_tlp_strb; - out_tlp_eop_next = tx_cpl_tlp_eop; - - if (tx_cpl_tlp_eop && ((tx_cpl_tlp_strb >> (TLP_DATA_WIDTH_DWORDS-3)) == 0)) begin - m_axis_cc_tlast_int = 1'b1; - tlp_output_state_next = TLP_OUTPUT_STATE_IDLE; - end else begin - tlp_output_state_next = TLP_OUTPUT_STATE_PAYLOAD; - end - end - end else begin - tlp_output_state_next = TLP_OUTPUT_STATE_IDLE; - end - end - TLP_OUTPUT_STATE_HEADER: begin - // second cycle of header (64 bit interface width only) - if (AXIS_PCIE_DATA_WIDTH == 64) begin - m_axis_cc_tdata_int = {tx_cpl_tlp_data, tlp_header_data[95:64]}; - m_axis_cc_tkeep_int = {tx_cpl_tlp_strb, 1'b1}; - m_axis_cc_tvalid_int = 1'b1; - m_axis_cc_tlast_int = 1'b0; - m_axis_cc_tuser_int = tlp_tuser; - - tx_cpl_tlp_ready_cmb = 1'b1; - - out_tlp_data_next = tx_cpl_tlp_data; - out_tlp_strb_next = tx_cpl_tlp_strb; - out_tlp_eop_next = tx_cpl_tlp_eop; - - if (tx_cpl_tlp_eop && ((tx_cpl_tlp_strb >> (TLP_DATA_WIDTH_DWORDS-1)) == 0)) begin - m_axis_cc_tlast_int = 1'b1; - tlp_output_state_next = TLP_OUTPUT_STATE_IDLE; - end else begin - tlp_output_state_next = TLP_OUTPUT_STATE_PAYLOAD; - end - end - end - TLP_OUTPUT_STATE_PAYLOAD: begin - // transfer payload - if (AXIS_PCIE_DATA_WIDTH >= 128) begin - m_axis_cc_tdata_int = {tx_cpl_tlp_data, out_tlp_data_reg[TLP_DATA_WIDTH-1:TLP_DATA_WIDTH-96]}; - if (tx_cpl_tlp_valid && !out_tlp_eop_reg) begin - m_axis_cc_tkeep_int = {tx_cpl_tlp_strb, out_tlp_strb_reg[TLP_STRB_WIDTH-1:TLP_DATA_WIDTH_DWORDS-3]}; - end else begin - m_axis_cc_tkeep_int = out_tlp_strb_reg[TLP_STRB_WIDTH-1:TLP_DATA_WIDTH_DWORDS-3]; - end - m_axis_cc_tlast_int = 1'b0; - m_axis_cc_tuser_int = tlp_tuser; - - if ((tx_cpl_tlp_valid || out_tlp_eop_reg) && m_axis_cc_tready_int) begin - m_axis_cc_tvalid_int = 1'b1; - tx_cpl_tlp_ready_cmb = !out_tlp_eop_reg; - - out_tlp_data_next = tx_cpl_tlp_data; - out_tlp_strb_next = tx_cpl_tlp_strb; - out_tlp_eop_next = tx_cpl_tlp_eop; - - if (out_tlp_eop_reg || (tx_cpl_tlp_eop && ((tx_cpl_tlp_strb >> (TLP_DATA_WIDTH_DWORDS-3)) == 0))) begin - m_axis_cc_tlast_int = 1'b1; - tlp_output_state_next = TLP_OUTPUT_STATE_IDLE; - end else begin - tlp_output_state_next = TLP_OUTPUT_STATE_PAYLOAD; - end - end else begin - tlp_output_state_next = TLP_OUTPUT_STATE_PAYLOAD; - end - end else begin - m_axis_cc_tdata_int = {tx_cpl_tlp_data, out_tlp_data_reg[TLP_DATA_WIDTH-1:TLP_DATA_WIDTH-32]}; - if (tx_cpl_tlp_valid && !out_tlp_eop_reg) begin - m_axis_cc_tkeep_int = {tx_cpl_tlp_strb, out_tlp_strb_reg[TLP_STRB_WIDTH-1:TLP_DATA_WIDTH_DWORDS-1]}; - end else begin - m_axis_cc_tkeep_int = out_tlp_strb_reg[TLP_STRB_WIDTH-1:TLP_DATA_WIDTH_DWORDS-1]; - end - m_axis_cc_tlast_int = 1'b0; - m_axis_cc_tuser_int = tlp_tuser; - - if ((tx_cpl_tlp_valid || out_tlp_eop_reg) && m_axis_cc_tready_int) begin - m_axis_cc_tvalid_int = 1'b1; - tx_cpl_tlp_ready_cmb = !out_tlp_eop_reg; - - out_tlp_data_next = tx_cpl_tlp_data; - out_tlp_strb_next = tx_cpl_tlp_strb; - out_tlp_eop_next = tx_cpl_tlp_eop; - - if (out_tlp_eop_reg || (tx_cpl_tlp_eop && ((tx_cpl_tlp_strb >> (TLP_DATA_WIDTH_DWORDS-1)) == 0))) begin - m_axis_cc_tlast_int = 1'b1; - tlp_output_state_next = TLP_OUTPUT_STATE_IDLE; - end else begin - tlp_output_state_next = TLP_OUTPUT_STATE_PAYLOAD; - end - end else begin - tlp_output_state_next = TLP_OUTPUT_STATE_PAYLOAD; - end - end - end - endcase -end - -always @(posedge clk) begin - tlp_output_state_reg <= tlp_output_state_next; - - out_tlp_data_reg <= out_tlp_data_next; - out_tlp_strb_reg <= out_tlp_strb_next; - out_tlp_eop_reg <= out_tlp_eop_next; - - if (rst) begin - tlp_output_state_reg <= TLP_OUTPUT_STATE_IDLE; - end -end - -// output datapath logic (PCIe TLP) -reg [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_cc_tdata_reg = {AXIS_PCIE_DATA_WIDTH{1'b0}}; -reg [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_cc_tkeep_reg = {AXIS_PCIE_KEEP_WIDTH{1'b0}}; -reg m_axis_cc_tvalid_reg = 1'b0, m_axis_cc_tvalid_next; -reg m_axis_cc_tlast_reg = 1'b0; -reg [AXIS_PCIE_CC_USER_WIDTH-1:0] m_axis_cc_tuser_reg = {AXIS_PCIE_CC_USER_WIDTH{1'b0}}; - -reg [OUTPUT_FIFO_ADDR_WIDTH+1-1:0] out_fifo_wr_ptr_reg = 0; -reg [OUTPUT_FIFO_ADDR_WIDTH+1-1:0] out_fifo_rd_ptr_reg = 0; -reg out_fifo_half_full_reg = 1'b0; - -wire out_fifo_full = out_fifo_wr_ptr_reg == (out_fifo_rd_ptr_reg ^ {1'b1, {OUTPUT_FIFO_ADDR_WIDTH{1'b0}}}); -wire out_fifo_empty = out_fifo_wr_ptr_reg == out_fifo_rd_ptr_reg; - -(* ram_style = "distributed" *) -reg [AXIS_PCIE_DATA_WIDTH-1:0] out_fifo_tdata[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; -(* ram_style = "distributed" *) -reg [AXIS_PCIE_KEEP_WIDTH-1:0] out_fifo_tkeep[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; -(* ram_style = "distributed" *) -reg out_fifo_tlast[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; -(* ram_style = "distributed" *) -reg [AXIS_PCIE_CC_USER_WIDTH-1:0] out_fifo_tuser[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; - -assign m_axis_cc_tready_int = !out_fifo_half_full_reg; +reg [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_cc_tdata_reg = 0, m_axis_cc_tdata_next; +reg [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_cc_tkeep_reg = 0, m_axis_cc_tkeep_next; +reg m_axis_cc_tvalid_reg = 1'b0, m_axis_cc_tvalid_next; +reg m_axis_cc_tlast_reg = 1'b0, m_axis_cc_tlast_next; +reg [AXIS_PCIE_CC_USER_WIDTH-1:0] m_axis_cc_tuser_reg = 0, m_axis_cc_tuser_next; assign m_axis_cc_tdata = m_axis_cc_tdata_reg; assign m_axis_cc_tkeep = m_axis_cc_tkeep_reg; @@ -417,32 +176,382 @@ assign m_axis_cc_tvalid = m_axis_cc_tvalid_reg; assign m_axis_cc_tlast = m_axis_cc_tlast_reg; assign m_axis_cc_tuser = m_axis_cc_tuser_reg; +wire [TLP_DATA_WIDTH-1:0] fifo_tlp_data; +wire [TLP_STRB_WIDTH-1:0] fifo_tlp_strb; +wire [INT_TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] fifo_tlp_hdr; +wire [INT_TLP_SEG_COUNT-1:0] fifo_tlp_valid; +wire [INT_TLP_SEG_COUNT-1:0] fifo_tlp_sop; +wire [INT_TLP_SEG_COUNT-1:0] fifo_tlp_eop; +wire [SEG_SEL_WIDTH-1:0] fifo_seg_offset; +wire [SEG_SEL_WIDTH+1-1:0] fifo_seg_count; +reg fifo_read_en; +reg [SEG_SEL_WIDTH+1-1:0] fifo_read_seg_count; + +// completions +pcie_tlp_fifo_raw #( + .DEPTH((1024/4)*2), + .TLP_DATA_WIDTH(TLP_DATA_WIDTH), + .TLP_STRB_WIDTH(TLP_STRB_WIDTH), + .TLP_HDR_WIDTH(TLP_HDR_WIDTH), + .SEQ_NUM_WIDTH(1), + .IN_TLP_SEG_COUNT(TLP_SEG_COUNT), + .OUT_TLP_SEG_COUNT(INT_TLP_SEG_COUNT), + .CTRL_OUT_EN(0) +) +cpl_fifo_inst ( + .clk(clk), + .rst(rst), + + /* + * TLP input + */ + .in_tlp_data(tx_cpl_tlp_data), + .in_tlp_strb(tx_cpl_tlp_strb), + .in_tlp_hdr(tx_cpl_tlp_hdr), + .in_tlp_seq(0), + .in_tlp_bar_id(0), + .in_tlp_func_num(0), + .in_tlp_error(0), + .in_tlp_valid(tx_cpl_tlp_valid), + .in_tlp_sop(tx_cpl_tlp_sop), + .in_tlp_eop(tx_cpl_tlp_eop), + .in_tlp_ready(tx_cpl_tlp_ready), + + /* + * TLP output + */ + .out_tlp_data(fifo_tlp_data), + .out_tlp_strb(fifo_tlp_strb), + .out_tlp_hdr(fifo_tlp_hdr), + .out_tlp_seq(), + .out_tlp_bar_id(), + .out_tlp_func_num(), + .out_tlp_error(), + .out_tlp_valid(fifo_tlp_valid), + .out_tlp_sop(fifo_tlp_sop), + .out_tlp_eop(fifo_tlp_eop), + .out_seg_offset(fifo_seg_offset), + .out_seg_count(fifo_seg_count), + .out_read_en(fifo_read_en), + .out_read_seg_count(fifo_read_seg_count), + + .out_ctrl_tlp_strb(), + .out_ctrl_tlp_hdr(), + .out_ctrl_tlp_valid(), + .out_ctrl_tlp_sop(), + .out_ctrl_tlp_eop(), + .out_ctrl_seg_offset(), + .out_ctrl_seg_count(), + .out_ctrl_read_en(0), + .out_ctrl_read_seg_count(0), + + /* + * Status + */ + .half_full(), + .watermark() +); + +integer seg, cur_seg, lane; + +always @* begin + frame_next = frame_reg; + tlp_hdr1_next = tlp_hdr1_reg; + tlp_hdr2_next = tlp_hdr2_reg; + tlp_split1_next = tlp_split1_reg; + tlp_split2_next = tlp_split2_reg; + + m_axis_cc_tdata_next = m_axis_cc_tdata_reg; + m_axis_cc_tkeep_next = m_axis_cc_tkeep_reg; + m_axis_cc_tvalid_next = m_axis_cc_tvalid_reg && !m_axis_cc_tready; + m_axis_cc_tlast_next = m_axis_cc_tlast_reg; + m_axis_cc_tuser_next = m_axis_cc_tuser_reg; + + fifo_read_en = 0; + + frame_cyc = frame_reg; + tlp_hdr1_cyc = tlp_hdr1_reg; + tlp_hdr2_cyc = tlp_hdr2_reg; + tlp_split1_cyc = tlp_split1_reg; + tlp_split2_cyc = tlp_split2_reg; + seg_offset_cyc = fifo_seg_offset; + seg_count_cyc = 0; + valid = 0; + eop = 0; + frame = frame_cyc; + abort = 0; + + fifo_read_seg_count = 0; + + out_sel = 0; + out_sel_cyc = 0; + out_sop = 0; + out_eop = 0; + out_tlp_hdr1 = 0; + out_tlp_hdr2 = 0; + out_tlp_split1 = 0; + out_tlp_split2 = 0; + for (seg = 0; seg < INT_TLP_SEG_COUNT; seg = seg + 1) begin + out_sel_seg[seg] = 0; + end + + out_shift_tlp_data_next = out_shift_tlp_data_reg; + out_shift_tlp_strb_next = out_shift_tlp_strb_reg; + + // compute mux settings + for (seg = 0; seg < INT_TLP_SEG_COUNT; seg = seg + 1) begin + if (!frame_cyc && !abort) begin + tlp_hdr1_cyc = 1'b1; + tlp_hdr2_cyc = 1'b0; + tlp_split1_cyc = 1'b0; + tlp_split2_cyc = 1'b0; + if (fifo_tlp_valid[seg_offset_cyc]) begin + frame_cyc = 1'b1; + end + end + + // route segment + valid = fifo_tlp_valid[seg_offset_cyc]; + sop = fifo_tlp_sop[seg_offset_cyc]; + eop = fifo_tlp_eop[seg_offset_cyc]; + frame = frame_cyc; + + out_sel_cyc[seg] = 1'b1; + out_sop[seg] = tlp_hdr1_cyc; + out_sel_seg[seg] = seg_offset_cyc; + + out_tlp_hdr1[seg] = tlp_hdr1_cyc; + out_tlp_hdr2[seg] = tlp_hdr2_cyc; + + if (AXIS_PCIE_DATA_WIDTH == 64 && tlp_hdr1_cyc) begin + // output header + tlp_hdr1_cyc = 1'b0; + tlp_hdr2_cyc = 1'b1; + end else if (eop && fifo_tlp_strb[seg_offset_cyc*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH] >> (INT_TLP_SEG_STRB_WIDTH-(AXIS_PCIE_DATA_WIDTH == 64 ? 1 : 3))) begin + // extra cycle + tlp_hdr1_cyc = 1'b0; + tlp_hdr2_cyc = 1'b0; + if (tlp_split1_cyc) begin + frame_cyc = 0; + out_eop[seg] = 1'b1; + tlp_split1_cyc = 1'b0; + tlp_split2_cyc = 1'b1; + seg_offset_cyc = seg_offset_cyc + 1; + seg_count_cyc = seg_count_cyc + 1; + end else begin + tlp_split1_cyc = 1'b1; + end + end else begin + tlp_hdr1_cyc = 1'b0; + tlp_hdr2_cyc = 1'b0; + if (eop) begin + // end of packet + frame_cyc = 0; + out_eop[seg] = 1'b1; + end + seg_offset_cyc = seg_offset_cyc + 1; + seg_count_cyc = seg_count_cyc + 1; + end + + out_tlp_split1[seg] = tlp_split1_cyc; + out_tlp_split2[seg] = tlp_split2_cyc; + + if (frame && !abort) begin + if (valid) begin + if (eop || seg == INT_TLP_SEG_COUNT-1) begin + // end of packet or end of cycle, commit + fifo_read_seg_count = seg_count_cyc; + if (!m_axis_cc_tvalid || m_axis_cc_tready) begin + frame_next = frame_cyc; + tlp_hdr1_next = tlp_hdr1_cyc; + tlp_hdr2_next = tlp_hdr2_cyc; + tlp_split1_next = tlp_split1_cyc; + tlp_split2_next = tlp_split2_cyc; + out_sel = out_sel_cyc; + fifo_read_en = seg_count_cyc != 0; + end + end + end else begin + // input has stalled, wait + abort = 1; + end + end + end + + out_tlp_data = 0; + out_tlp_strb = 0; + out_tlp_valid = 0; + out_tlp_sop = 0; + out_tlp_eop = 0; + + for (seg = 0; seg < INT_TLP_SEG_COUNT; seg = seg + 1) begin + // remap header + seg_tlp_hdr = fifo_tlp_hdr[out_sel_seg[seg]*TLP_HDR_WIDTH +: TLP_HDR_WIDTH]; + seg_cc_hdr[6:0] = seg_tlp_hdr[38:32]; // lower address + seg_cc_hdr[7] = 1'b0; + seg_cc_hdr[9:8] = seg_tlp_hdr[107:106]; // AT + seg_cc_hdr[15:10] = 6'd0; + seg_cc_hdr[28:16] = seg_tlp_hdr[75:64]; // Byte count + seg_cc_hdr[29] = 1'b0; // locked read completion + seg_cc_hdr[31:30] = 2'd0; + seg_cc_hdr[42:32] = seg_tlp_hdr[105:96]; // DWORD count + seg_cc_hdr[45:43] = seg_tlp_hdr[79:77]; // completion status + seg_cc_hdr[46] = seg_tlp_hdr[110]; // poisoned + seg_cc_hdr[47] = 1'b0; + seg_cc_hdr[63:48] = seg_tlp_hdr[63:48]; // requester ID + seg_cc_hdr[71:64] = seg_tlp_hdr[47:40]; // tag + seg_cc_hdr[87:72] = seg_tlp_hdr[95:80]; // completer ID + seg_cc_hdr[88] = 1'b0; // completer ID enable + seg_cc_hdr[91:89] = seg_tlp_hdr[118:116]; // TC + seg_cc_hdr[94:92] = {seg_tlp_hdr[114], seg_tlp_hdr[109:108]}; // attr + seg_cc_hdr[95] = 1'b0; // force ECRC + + // mux for output segments + if (AXIS_PCIE_DATA_WIDTH == 64) begin + out_tlp_data[seg*INT_TLP_SEG_DATA_WIDTH +: INT_TLP_SEG_DATA_WIDTH] = out_shift_tlp_data_next; + out_tlp_strb[seg*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH] = out_shift_tlp_strb_next; + out_tlp_data[seg*INT_TLP_SEG_DATA_WIDTH+32 +: INT_TLP_SEG_DATA_WIDTH-32] = fifo_tlp_data[out_sel_seg[seg]*INT_TLP_SEG_DATA_WIDTH +: INT_TLP_SEG_DATA_WIDTH-32]; + if (!out_tlp_split2[seg]) begin + out_tlp_strb[seg*INT_TLP_SEG_STRB_WIDTH+1 +: INT_TLP_SEG_STRB_WIDTH-1] = fifo_tlp_strb[out_sel_seg[seg]*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH-1]; + end + + if (out_tlp_hdr1[seg]) begin + out_tlp_data[seg*INT_TLP_SEG_DATA_WIDTH +: INT_TLP_SEG_DATA_WIDTH] = seg_cc_hdr[63:0]; + out_tlp_strb[seg*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH] = 2'b11; + end else if (out_tlp_hdr2[seg]) begin + out_tlp_data[seg*INT_TLP_SEG_DATA_WIDTH +: 32] = seg_cc_hdr[95:64]; + out_tlp_strb[seg*INT_TLP_SEG_STRB_WIDTH +: 1] = 1'b1; + end + + out_tlp_valid[seg] = out_sel[seg]; + out_tlp_sop[seg] = out_sop[seg]; + out_tlp_eop[seg] = out_eop[seg]; + + if (out_sel[seg]) begin + out_shift_tlp_data_next = fifo_tlp_data[(out_sel_seg[seg]+1)*INT_TLP_SEG_DATA_WIDTH-32 +: 32]; + out_shift_tlp_strb_next = fifo_tlp_strb[(out_sel_seg[seg]+1)*INT_TLP_SEG_STRB_WIDTH-1 +: 1]; + end + end else begin + out_tlp_data[seg*INT_TLP_SEG_DATA_WIDTH +: INT_TLP_SEG_DATA_WIDTH] = out_shift_tlp_data_next; + out_tlp_strb[seg*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH] = out_shift_tlp_strb_next; + out_tlp_data[seg*INT_TLP_SEG_DATA_WIDTH+96 +: INT_TLP_SEG_DATA_WIDTH-96] = fifo_tlp_data[out_sel_seg[seg]*INT_TLP_SEG_DATA_WIDTH +: INT_TLP_SEG_DATA_WIDTH-96]; + if (!out_tlp_split2[seg]) begin + out_tlp_strb[seg*INT_TLP_SEG_STRB_WIDTH+3 +: INT_TLP_SEG_STRB_WIDTH-3] = fifo_tlp_strb[out_sel_seg[seg]*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH-3]; + end + + if (out_tlp_hdr1[seg]) begin + out_tlp_data[seg*INT_TLP_SEG_DATA_WIDTH +: 96] = seg_cc_hdr; + out_tlp_strb[seg*INT_TLP_SEG_STRB_WIDTH +: 3] = 3'b111; + end + + out_tlp_valid[seg] = out_sel[seg]; + out_tlp_sop[seg] = out_sop[seg]; + out_tlp_eop[seg] = out_eop[seg]; + + if (out_sel[seg]) begin + out_shift_tlp_data_next = fifo_tlp_data[(out_sel_seg[seg]+1)*INT_TLP_SEG_DATA_WIDTH-96 +: 96]; + out_shift_tlp_strb_next = fifo_tlp_strb[(out_sel_seg[seg]+1)*INT_TLP_SEG_STRB_WIDTH-3 +: 3]; + end + end + end + + if (!m_axis_cc_tvalid || m_axis_cc_tready) begin + // remap header and sideband + m_axis_cc_tdata_next = out_tlp_data; + m_axis_cc_tkeep_next = 0; + m_axis_cc_tvalid_next = out_tlp_valid != 0; + m_axis_cc_tlast_next = !(CC_STRADDLE && AXIS_PCIE_DATA_WIDTH == 512) && (out_tlp_valid & out_tlp_eop); + m_axis_cc_tuser_next = 0; + + for (seg = 0; seg < INT_TLP_SEG_COUNT; seg = seg + 1) begin + if (out_tlp_valid[seg]) begin + m_axis_cc_tkeep_next[seg*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH] = out_tlp_strb[seg*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH]; + end + + eop_index[seg*3 +: 3] = 0; + for (lane = 0; lane < INT_TLP_SEG_STRB_WIDTH; lane = lane + 1) begin + if (out_tlp_strb[seg*INT_TLP_SEG_STRB_WIDTH+lane]) begin + eop_index[seg*3 +: 3] = lane; + end + end + end + + if (AXIS_PCIE_DATA_WIDTH == 512) begin + case (out_tlp_valid & out_tlp_sop) + 2'b00: begin + m_axis_cc_tuser_next[1:0] = 2'b00; // is_sop + m_axis_cc_tuser_next[3:2] = 2'd0; // is_sop0_ptr + m_axis_cc_tuser_next[5:4] = 2'd0; // is_sop1_ptr + end + 2'b01: begin + m_axis_cc_tuser_next[1:0] = 2'b01; // is_sop + m_axis_cc_tuser_next[3:2] = 2'd0; // is_sop0_ptr + m_axis_cc_tuser_next[5:4] = 2'd0; // is_sop1_ptr + end + 2'b10: begin + m_axis_cc_tuser_next[1:0] = 2'b01; // is_sop + m_axis_cc_tuser_next[3:2] = 2'd2; // is_sop0_ptr + m_axis_cc_tuser_next[5:4] = 2'd0; // is_sop1_ptr + end + 2'b11: begin + m_axis_cc_tuser_next[1:0] = 2'b11; // is_sop + m_axis_cc_tuser_next[3:2] = 2'd0; // is_sop0_ptr + m_axis_cc_tuser_next[5:4] = 2'd2; // is_sop1_ptr + end + endcase + case (out_tlp_valid & out_tlp_eop) + 2'b00: begin + m_axis_cc_tuser_next[7:6] = 2'b00; // is_eop + m_axis_cc_tuser_next[11:8] = 4'd0; // is_eop0_ptr + m_axis_cc_tuser_next[15:12] = 4'd0; // is_eop1_ptr + end + 2'b01: begin + m_axis_cc_tuser_next[7:6] = 2'b01; // is_eop + m_axis_cc_tuser_next[11:8] = eop_index[0*3 +: 3]; // is_eop0_ptr + m_axis_cc_tuser_next[15:12] = 4'd0; // is_eop1_ptr + end + 2'b10: begin + m_axis_cc_tuser_next[7:6] = 2'b01; // is_eop + m_axis_cc_tuser_next[11:8] = 4'd8+eop_index[1*3 +: 3]; // is_eop0_ptr + m_axis_cc_tuser_next[15:12] = 4'd0; // is_eop1_ptr + end + 2'b11: begin + m_axis_cc_tuser_next[7:6] = 2'b11; // is_eop + m_axis_cc_tuser_next[11:8] = eop_index[0*3 +: 3]; // is_eop0_ptr + m_axis_cc_tuser_next[15:12] = 4'd8+eop_index[1*3 +: 3]; // is_eop1_ptr + end + endcase + m_axis_cc_tuser_next[16] = 1'b0; // discontinue + m_axis_cc_tuser_next[80:17] = 64'd0; // parity + end else begin + m_axis_cc_tuser_next[1] = 1'b0; // discontinue + m_axis_cc_tuser_next[32:1] = 32'd0; // parity + end + end +end + +integer i; + always @(posedge clk) begin - m_axis_cc_tvalid_reg <= m_axis_cc_tvalid_reg && !m_axis_cc_tready; + frame_reg <= frame_next; + tlp_hdr1_reg <= tlp_hdr1_next; + tlp_hdr2_reg <= tlp_hdr2_next; + tlp_split1_reg <= tlp_split1_next; + tlp_split2_reg <= tlp_split2_next; - out_fifo_half_full_reg <= $unsigned(out_fifo_wr_ptr_reg - out_fifo_rd_ptr_reg) >= 2**(OUTPUT_FIFO_ADDR_WIDTH-1); + out_shift_tlp_data_reg <= out_shift_tlp_data_next; + out_shift_tlp_strb_reg <= out_shift_tlp_strb_next; - if (!out_fifo_full && m_axis_cc_tvalid_int) begin - out_fifo_tdata[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axis_cc_tdata_int; - out_fifo_tkeep[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axis_cc_tkeep_int; - out_fifo_tlast[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axis_cc_tlast_int; - out_fifo_tuser[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axis_cc_tuser_int; - out_fifo_wr_ptr_reg <= out_fifo_wr_ptr_reg + 1; - end - - if (!out_fifo_empty && (!m_axis_cc_tvalid_reg || m_axis_cc_tready)) begin - m_axis_cc_tdata_reg <= out_fifo_tdata[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; - m_axis_cc_tkeep_reg <= out_fifo_tkeep[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; - m_axis_cc_tvalid_reg <= 1'b1; - m_axis_cc_tlast_reg <= out_fifo_tlast[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; - m_axis_cc_tuser_reg <= out_fifo_tuser[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; - out_fifo_rd_ptr_reg <= out_fifo_rd_ptr_reg + 1; - end + m_axis_cc_tdata_reg <= m_axis_cc_tdata_next; + m_axis_cc_tkeep_reg <= m_axis_cc_tkeep_next; + m_axis_cc_tvalid_reg <= m_axis_cc_tvalid_next; + m_axis_cc_tlast_reg <= m_axis_cc_tlast_next; + m_axis_cc_tuser_reg <= m_axis_cc_tuser_next; if (rst) begin - out_fifo_wr_ptr_reg <= 0; - out_fifo_rd_ptr_reg <= 0; - m_axis_cc_tvalid_reg <= 1'b0; + frame_reg <= 1'b0; + + m_axis_cc_tvalid_reg <= 0; end end diff --git a/rtl/pcie_us_if_cq.v b/rtl/pcie_us_if_cq.v index 64b315fdc..291046be6 100644 --- a/rtl/pcie_us_if_cq.v +++ b/rtl/pcie_us_if_cq.v @@ -1,6 +1,6 @@ /* -Copyright (c) 2021 Alex Forencich +Copyright (c) 2021-2022 Alex Forencich Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -39,6 +39,8 @@ module pcie_us_if_cq # parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32), // PCIe AXI stream CQ tuser signal width parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183, + // CQ interface TLP straddling + parameter CQ_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 512, // TLP data width parameter TLP_DATA_WIDTH = AXIS_PCIE_DATA_WIDTH, // TLP strobe width @@ -79,7 +81,9 @@ module pcie_us_if_cq # parameter TLP_DATA_WIDTH_BYTES = TLP_DATA_WIDTH/8; parameter TLP_DATA_WIDTH_DWORDS = TLP_DATA_WIDTH/32; -parameter OUTPUT_FIFO_ADDR_WIDTH = 5; +parameter INT_TLP_SEG_COUNT = (CQ_STRADDLE && AXIS_PCIE_DATA_WIDTH >= 512) ? 2 : 1; +parameter INT_TLP_SEG_DATA_WIDTH = TLP_DATA_WIDTH / INT_TLP_SEG_COUNT; +parameter INT_TLP_SEG_STRB_WIDTH = TLP_STRB_WIDTH / INT_TLP_SEG_COUNT; // bus width assertions initial begin @@ -105,11 +109,6 @@ initial begin end end - if (TLP_SEG_COUNT != 1) begin - $error("Error: TLP segment count must be 1 (instance %m)"); - $finish; - end - if (TLP_DATA_WIDTH != AXIS_PCIE_DATA_WIDTH) begin $error("Error: Interface widths must match (instance %m)"); $finish; @@ -147,286 +146,349 @@ localparam [3:0] reg [TLP_DATA_WIDTH-1:0] rx_req_tlp_data_reg = 0, rx_req_tlp_data_next; reg [TLP_STRB_WIDTH-1:0] rx_req_tlp_strb_reg = 0, rx_req_tlp_strb_next; -reg [TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] rx_req_tlp_hdr_reg = 0, rx_req_tlp_hdr_next; -reg [TLP_SEG_COUNT*3-1:0] rx_req_tlp_bar_id_reg = 0, rx_req_tlp_bar_id_next; -reg [TLP_SEG_COUNT*7-1:0] rx_req_tlp_func_num_reg = 0, rx_req_tlp_func_num_next; -reg [TLP_SEG_COUNT-1:0] rx_req_tlp_valid_reg = 0, rx_req_tlp_valid_next; -reg [TLP_SEG_COUNT-1:0] rx_req_tlp_sop_reg = 0, rx_req_tlp_sop_next; -reg [TLP_SEG_COUNT-1:0] rx_req_tlp_eop_reg = 0, rx_req_tlp_eop_next; +reg [INT_TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] rx_req_tlp_hdr_reg = 0, rx_req_tlp_hdr_next; +reg [INT_TLP_SEG_COUNT*3-1:0] rx_req_tlp_bar_id_reg = 0, rx_req_tlp_bar_id_next; +reg [INT_TLP_SEG_COUNT*8-1:0] rx_req_tlp_func_num_reg = 0, rx_req_tlp_func_num_next; +reg [INT_TLP_SEG_COUNT-1:0] rx_req_tlp_valid_reg = 0, rx_req_tlp_valid_next; +reg [INT_TLP_SEG_COUNT-1:0] rx_req_tlp_sop_reg = 0, rx_req_tlp_sop_next; +reg [INT_TLP_SEG_COUNT-1:0] rx_req_tlp_eop_reg = 0, rx_req_tlp_eop_next; +reg tlp_frame_reg = 0, tlp_frame_next; -assign rx_req_tlp_data = rx_req_tlp_data_reg; -assign rx_req_tlp_strb = rx_req_tlp_strb_reg; -assign rx_req_tlp_hdr = rx_req_tlp_hdr_reg; -assign rx_req_tlp_bar_id = rx_req_tlp_bar_id_reg; -assign rx_req_tlp_func_num = rx_req_tlp_func_num_reg; -assign rx_req_tlp_valid = rx_req_tlp_valid_reg; -assign rx_req_tlp_sop = rx_req_tlp_sop_reg; -assign rx_req_tlp_eop = rx_req_tlp_eop_reg; - -localparam [1:0] - TLP_INPUT_STATE_IDLE = 2'd0, - TLP_INPUT_STATE_HEADER = 2'd1, - TLP_INPUT_STATE_PAYLOAD = 2'd2; - -reg [1:0] tlp_input_state_reg = TLP_INPUT_STATE_IDLE, tlp_input_state_next; - -reg s_axis_cq_tready_cmb; +wire fifo_tlp_ready; reg tlp_input_frame_reg = 1'b0, tlp_input_frame_next; -reg [AXIS_PCIE_DATA_WIDTH-1:0] cq_tdata_int_reg = {AXIS_PCIE_DATA_WIDTH{1'b0}}, cq_tdata_int_next; -reg [AXIS_PCIE_KEEP_WIDTH-1:0] cq_tkeep_int_reg = {AXIS_PCIE_KEEP_WIDTH{1'b0}}, cq_tkeep_int_next; -reg cq_tvalid_int_reg = 1'b0, cq_tvalid_int_next; -reg cq_tlast_int_reg = 1'b0, cq_tlast_int_next; -reg [AXIS_PCIE_CQ_USER_WIDTH-1:0] cq_tuser_int_reg = {AXIS_PCIE_CQ_USER_WIDTH{1'b0}}, cq_tuser_int_next; +reg [TLP_DATA_WIDTH-1:0] cq_data; +reg [TLP_STRB_WIDTH-1:0] cq_strb; +reg [INT_TLP_SEG_COUNT*8-1:0] cq_hdr_be; +reg [INT_TLP_SEG_COUNT-1:0] cq_valid; +reg [TLP_STRB_WIDTH-1:0] cq_strb_sop; +reg [TLP_STRB_WIDTH-1:0] cq_strb_eop; +reg [INT_TLP_SEG_COUNT-1:0] cq_sop; +reg [INT_TLP_SEG_COUNT-1:0] cq_eop; +reg cq_frame_reg = 1'b0, cq_frame_next; -wire [AXIS_PCIE_DATA_WIDTH*2-1:0] cq_tdata = {s_axis_cq_tdata, cq_tdata_int_reg}; -wire [AXIS_PCIE_KEEP_WIDTH*2-1:0] cq_tkeep = {s_axis_cq_tkeep, cq_tkeep_int_reg}; +reg [TLP_DATA_WIDTH-1:0] cq_data_int_reg = 0, cq_data_int_next; +reg [TLP_STRB_WIDTH-1:0] cq_strb_int_reg = 0, cq_strb_int_next; +reg [INT_TLP_SEG_COUNT*8-1:0] cq_hdr_be_int_reg = 0, cq_hdr_be_int_next; +reg [INT_TLP_SEG_COUNT-1:0] cq_valid_int_reg = 0, cq_valid_int_next; +reg [TLP_STRB_WIDTH-1:0] cq_strb_eop_int_reg = 0, cq_strb_eop_int_next; +reg [INT_TLP_SEG_COUNT-1:0] cq_sop_int_reg = 0, cq_sop_int_next; +reg [INT_TLP_SEG_COUNT-1:0] cq_eop_int_reg = 0, cq_eop_int_next; -reg [127:0] tlp_hdr; -reg [2:0] tlp_bar_id; -reg [7:0] tlp_func_num; +wire [TLP_DATA_WIDTH*2-1:0] cq_data_full = {cq_data, cq_data_int_reg}; +wire [TLP_STRB_WIDTH*2-1:0] cq_strb_full = {cq_strb, cq_strb_int_reg}; +wire [INT_TLP_SEG_COUNT*8*2-1:0] cq_hdr_be_full = {cq_hdr_be, cq_hdr_be_int_reg}; +wire [INT_TLP_SEG_COUNT*2-1:0] cq_valid_full = {cq_valid, cq_valid_int_reg}; +wire [TLP_STRB_WIDTH*2-1:0] cq_strb_eop_full = {cq_strb_eop, cq_strb_eop_int_reg}; +wire [INT_TLP_SEG_COUNT*2-1:0] cq_sop_full = {cq_sop, cq_sop_int_reg}; +wire [INT_TLP_SEG_COUNT*2-1:0] cq_eop_full = {cq_eop, cq_eop_int_reg}; -assign s_axis_cq_tready = s_axis_cq_tready_cmb; +reg [INT_TLP_SEG_COUNT*128-1:0] tlp_hdr; +reg [INT_TLP_SEG_COUNT*3-1:0] tlp_bar_id; +reg [INT_TLP_SEG_COUNT*8-1:0] tlp_func_num; + +assign s_axis_cq_tready = fifo_tlp_ready; + +pcie_tlp_fifo #( + .DEPTH((1024/4)*2), + .TLP_DATA_WIDTH(TLP_DATA_WIDTH), + .TLP_STRB_WIDTH(TLP_STRB_WIDTH), + .TLP_HDR_WIDTH(TLP_HDR_WIDTH), + .SEQ_NUM_WIDTH(1), + .IN_TLP_SEG_COUNT(INT_TLP_SEG_COUNT), + .OUT_TLP_SEG_COUNT(TLP_SEG_COUNT) +) +pcie_tlp_fifo_inst ( + .clk(clk), + .rst(rst), + + /* + * TLP input + */ + .in_tlp_data(rx_req_tlp_data_reg), + .in_tlp_strb(rx_req_tlp_strb_reg), + .in_tlp_hdr(rx_req_tlp_hdr_reg), + .in_tlp_seq(0), + .in_tlp_bar_id(rx_req_tlp_bar_id_reg), + .in_tlp_func_num(rx_req_tlp_func_num_reg), + .in_tlp_error(0), + .in_tlp_valid(rx_req_tlp_valid_reg), + .in_tlp_sop(rx_req_tlp_sop_reg), + .in_tlp_eop(rx_req_tlp_eop_reg), + .in_tlp_ready(fifo_tlp_ready), + + /* + * TLP output + */ + .out_tlp_data(rx_req_tlp_data), + .out_tlp_strb(rx_req_tlp_strb), + .out_tlp_hdr(rx_req_tlp_hdr), + .out_tlp_seq(), + .out_tlp_bar_id(rx_req_tlp_bar_id), + .out_tlp_func_num(rx_req_tlp_func_num), + .out_tlp_error(), + .out_tlp_valid(rx_req_tlp_valid), + .out_tlp_sop(rx_req_tlp_sop), + .out_tlp_eop(rx_req_tlp_eop), + .out_tlp_ready(rx_req_tlp_ready), + + /* + * Status + */ + .half_full(), + .watermark() +); + +integer seg, lane; +reg valid; always @* begin - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - rx_req_tlp_data_next = rx_req_tlp_data_reg; rx_req_tlp_strb_next = rx_req_tlp_strb_reg; rx_req_tlp_hdr_next = rx_req_tlp_hdr_reg; rx_req_tlp_bar_id_next = rx_req_tlp_bar_id_reg; rx_req_tlp_func_num_next = rx_req_tlp_func_num_reg; - rx_req_tlp_valid_next = rx_req_tlp_valid_reg && !rx_req_tlp_ready; + rx_req_tlp_valid_next = fifo_tlp_ready ? 0 : rx_req_tlp_valid_reg; rx_req_tlp_sop_next = rx_req_tlp_sop_reg; rx_req_tlp_eop_next = rx_req_tlp_eop_reg; + tlp_frame_next = tlp_frame_reg; - s_axis_cq_tready_cmb = rx_req_tlp_ready; + cq_frame_next = cq_frame_reg; - tlp_input_frame_next = tlp_input_frame_reg; + cq_data_int_next = cq_data_int_reg; + cq_strb_int_next = cq_strb_int_reg; + cq_hdr_be_int_next = cq_hdr_be_int_reg; + cq_valid_int_next = cq_valid_int_reg; + cq_strb_eop_int_next = cq_strb_eop_int_reg; + cq_sop_int_next = cq_sop_int_reg; + cq_eop_int_next = cq_eop_int_reg; - cq_tdata_int_next = cq_tdata_int_reg; - cq_tkeep_int_next = cq_tkeep_int_reg; - cq_tvalid_int_next = cq_tvalid_int_reg; - cq_tlast_int_next = cq_tlast_int_reg; - cq_tuser_int_next = cq_tuser_int_reg; + // decode framing + if (CQ_STRADDLE && AXIS_PCIE_DATA_WIDTH >= 512) begin + cq_data = s_axis_cq_tdata; + cq_strb = 0; + cq_hdr_be = {s_axis_cq_tuser[15:12], s_axis_cq_tuser[7:4], s_axis_cq_tuser[11:8], s_axis_cq_tuser[3:0]}; + cq_valid = 0; + cq_strb_sop = 0; + cq_strb_eop = 0; + cq_sop = 0; + cq_eop = 0; + for (seg = 0; seg < INT_TLP_SEG_COUNT; seg = seg + 1) begin + if (s_axis_cq_tuser[80+seg]) begin + cq_strb_sop[s_axis_cq_tuser[82+seg*2 +: 2]*4] = 1'b1; + end + if (s_axis_cq_tuser[86+seg]) begin + cq_strb_eop[s_axis_cq_tuser[88+seg*4 +: 4]] = 1'b1; + end + end + valid = 1; + for (lane = 0; lane < TLP_STRB_WIDTH; lane = lane + 1) begin + if (cq_strb_sop[lane]) begin + valid = 1; + cq_sop[lane/INT_TLP_SEG_STRB_WIDTH] = 1'b1; + end + if (valid) begin + cq_strb[lane] = 1'b1; + cq_valid[lane/INT_TLP_SEG_STRB_WIDTH] = s_axis_cq_tvalid; + end + if (cq_strb_eop[lane]) begin + valid = 0; + cq_eop[lane/INT_TLP_SEG_STRB_WIDTH] = 1'b1; + end + end + end else begin + cq_data = s_axis_cq_tdata; + cq_strb = s_axis_cq_tvalid ? s_axis_cq_tkeep : 0; + if (AXIS_PCIE_DATA_WIDTH >= 512) begin + cq_hdr_be = {s_axis_cq_tuser[11:8], s_axis_cq_tuser[3:0]}; + end else begin + cq_hdr_be = s_axis_cq_tuser[7:0]; + end + cq_valid = s_axis_cq_tvalid; + cq_sop = !cq_frame_reg; + cq_eop = s_axis_cq_tlast; + cq_strb_sop = cq_sop; + cq_strb_eop = 0; + for (lane = 0; lane < TLP_STRB_WIDTH; lane = lane + 1) begin + if (cq_strb[lane]) begin + cq_strb_eop = (cq_eop) << lane; + end + end + if (s_axis_cq_tready && s_axis_cq_tvalid) begin + cq_frame_next = !s_axis_cq_tlast; + end + end + + for (seg = 0; seg < INT_TLP_SEG_COUNT; seg = seg + 1) begin + // parse header + // DW 0 + case (cq_data_full[INT_TLP_SEG_DATA_WIDTH*seg+75 +: 4]) + REQ_MEM_READ: begin + tlp_hdr[128*seg+125 +: 3] = TLP_FMT_4DW; // fmt + tlp_hdr[128*seg+120 +: 5] = {5'b00000}; // type + end + REQ_MEM_WRITE: begin + tlp_hdr[128*seg+125 +: 3] = TLP_FMT_4DW_DATA; // fmt + tlp_hdr[128*seg+120 +: 5] = {5'b00000}; // type + end + REQ_IO_READ: begin + tlp_hdr[128*seg+125 +: 3] = TLP_FMT_4DW; // fmt + tlp_hdr[128*seg+120 +: 5] = {5'b00010}; // type + end + REQ_IO_WRITE: begin + tlp_hdr[128*seg+125 +: 3] = TLP_FMT_4DW_DATA; // fmt + tlp_hdr[128*seg+120 +: 5] = {5'b00010}; // type + end + REQ_MEM_FETCH_ADD: begin + tlp_hdr[128*seg+125 +: 3] = TLP_FMT_4DW_DATA; // fmt + tlp_hdr[128*seg+120 +: 5] = {5'b01100}; // type + end + REQ_MEM_SWAP: begin + tlp_hdr[128*seg+125 +: 3] = TLP_FMT_4DW_DATA; // fmt + tlp_hdr[128*seg+120 +: 5] = {5'b01101}; // type + end + REQ_MEM_CAS: begin + tlp_hdr[128*seg+125 +: 3] = TLP_FMT_4DW_DATA; // fmt + tlp_hdr[128*seg+120 +: 5] = {5'b01110}; // type + end + REQ_MEM_READ_LOCKED: begin + tlp_hdr[128*seg+125 +: 3] = TLP_FMT_4DW; // fmt + tlp_hdr[128*seg+120 +: 5] = {5'b00001}; // type + end + REQ_MSG: begin + if (cq_data_full[INT_TLP_SEG_DATA_WIDTH*seg+64 +: 11]) begin + tlp_hdr[128*seg+125 +: 3] = TLP_FMT_4DW_DATA; // fmt + end else begin + tlp_hdr[128*seg+125 +: 3] = TLP_FMT_4DW; // fmt + end + tlp_hdr[128*seg+120 +: 5] = {2'b10, cq_data_full[INT_TLP_SEG_DATA_WIDTH*seg+112 +: 3]}; // type + end + REQ_MSG_VENDOR: begin + if (cq_data_full[INT_TLP_SEG_DATA_WIDTH*seg+64 +: 11]) begin + tlp_hdr[128*seg+125 +: 3] = TLP_FMT_4DW_DATA; // fmt + end else begin + tlp_hdr[128*seg+125 +: 3] = TLP_FMT_4DW; // fmt + end + tlp_hdr[128*seg+120 +: 5] = {2'b10, cq_data_full[INT_TLP_SEG_DATA_WIDTH*seg+112 +: 3]}; // type + end + REQ_MSG_ATS: begin + if (cq_data_full[INT_TLP_SEG_DATA_WIDTH*seg+64 +: 11]) begin + tlp_hdr[128*seg+125 +: 3] = TLP_FMT_4DW_DATA; // fmt + end else begin + tlp_hdr[128*seg+125 +: 3] = TLP_FMT_4DW; // fmt + end + tlp_hdr[128*seg+120 +: 5] = {2'b10, cq_data_full[INT_TLP_SEG_DATA_WIDTH*seg+112 +: 3]}; // type + end + default: begin + tlp_hdr[128*seg+125 +: 3] = TLP_FMT_4DW; // fmt + tlp_hdr[128*seg+120 +: 5] = {5'b00000}; // type + end + endcase + tlp_hdr[128*seg+119] = 1'b0; // T9 + tlp_hdr[128*seg+116 +: 3] = cq_data_full[INT_TLP_SEG_DATA_WIDTH*seg+121 +: 3]; // TC + tlp_hdr[128*seg+115] = 1'b0; // T8 + tlp_hdr[128*seg+114] = cq_data_full[INT_TLP_SEG_DATA_WIDTH*seg+126]; // attr + tlp_hdr[128*seg+113] = 1'b0; // LN + tlp_hdr[128*seg+112] = 1'b0; // TH + tlp_hdr[128*seg+111] = 1'b0; // TD + tlp_hdr[128*seg+110] = 1'b0; // EP + tlp_hdr[128*seg+108 +: 2] = cq_data_full[INT_TLP_SEG_DATA_WIDTH*seg+124 +: 2]; // attr + tlp_hdr[128*seg+106 +: 2] = cq_data_full[INT_TLP_SEG_DATA_WIDTH*seg+0 +: 2]; // AT + tlp_hdr[128*seg+96 +: 10] = cq_data_full[INT_TLP_SEG_DATA_WIDTH*seg+64 +: 11]; // length + // DW 1 + tlp_hdr[128*seg+80 +: 16] = cq_data_full[INT_TLP_SEG_DATA_WIDTH*seg+80 +: 16]; // requester ID + tlp_hdr[128*seg+72 +: 8] = cq_data_full[INT_TLP_SEG_DATA_WIDTH*seg+96 +: 8]; // tag + tlp_hdr[128*seg+68 +: 4] = cq_hdr_be_full[8*seg+4 +: 4]; // last BE + tlp_hdr[128*seg+64 +: 4] = cq_hdr_be_full[8*seg+0 +: 4]; // first BE + // DW 2+3 + tlp_hdr[128*seg+2 +: 62] = cq_data_full[INT_TLP_SEG_DATA_WIDTH*seg+2 +: 62]; // address + tlp_hdr[128*seg+0 +: 2] = 2'b00; // PH + + tlp_bar_id[3*seg +: 3] = cq_data_full[INT_TLP_SEG_DATA_WIDTH*seg+112 +: 3]; + tlp_func_num[8*seg +: 8] = cq_data_full[INT_TLP_SEG_DATA_WIDTH*seg+104 +: 8]; + end + + if (fifo_tlp_ready) begin + rx_req_tlp_strb_next = 0; + rx_req_tlp_valid_next = 0; + rx_req_tlp_sop_next = 0; + rx_req_tlp_eop_next = 0; + if (TLP_DATA_WIDTH == 64) begin + if (cq_valid_full[0]) begin + rx_req_tlp_data_next = cq_data_full >> 64; + rx_req_tlp_strb_next = cq_strb_full >> 2; + if (cq_sop_full[0]) begin + tlp_frame_next = 1'b0; + rx_req_tlp_hdr_next = tlp_hdr; + rx_req_tlp_bar_id_next = tlp_bar_id; + rx_req_tlp_func_num_next = tlp_func_num; + if (cq_eop_full[0]) begin + cq_valid_int_next[0] = 1'b0; + end else if (cq_valid_full[1]) begin + cq_valid_int_next[0] = 1'b0; + if (cq_eop_full[1]) begin + rx_req_tlp_strb_next = 0; + rx_req_tlp_valid_next = 1'b1; + rx_req_tlp_sop_next = 1'b1; + rx_req_tlp_eop_next = 1'b1; + end + end + end else begin + rx_req_tlp_sop_next = !tlp_frame_reg; + rx_req_tlp_eop_next = 1'b0; + if (cq_eop_full[0]) begin + cq_valid_int_next[0] = 1'b0; + end else if (cq_valid_full[1]) begin + rx_req_tlp_valid_next = 1'b1; + cq_valid_int_next[0] = 1'b0; + tlp_frame_next = 1'b1; + rx_req_tlp_eop_next = cq_eop_full[1]; + end + end + end + end else begin + for (seg = 0; seg < INT_TLP_SEG_COUNT; seg = seg + 1) begin + if (cq_valid_full[seg]) begin + rx_req_tlp_data_next[INT_TLP_SEG_DATA_WIDTH*seg +: INT_TLP_SEG_DATA_WIDTH] = cq_data_full >> (128 + INT_TLP_SEG_DATA_WIDTH*seg); + if (cq_sop_full[seg]) begin + rx_req_tlp_hdr_next[TLP_HDR_WIDTH*seg +: TLP_HDR_WIDTH] = tlp_hdr[128*seg +: 128]; + rx_req_tlp_bar_id_next[3*seg +: 3] = tlp_bar_id[3*seg +: 3]; + rx_req_tlp_func_num_next[8*seg +: 8] = tlp_func_num[8*seg +: 8]; + end + rx_req_tlp_sop_next[seg] = cq_sop_full[seg]; + if (cq_eop_full[seg]) begin + rx_req_tlp_strb_next[INT_TLP_SEG_STRB_WIDTH*seg +: INT_TLP_SEG_STRB_WIDTH] = cq_strb_full[INT_TLP_SEG_STRB_WIDTH*seg +: INT_TLP_SEG_STRB_WIDTH] >> 4; + if (cq_sop_full[seg] || cq_strb_eop_full[INT_TLP_SEG_STRB_WIDTH*seg +: INT_TLP_SEG_STRB_WIDTH] >> 4) begin + rx_req_tlp_eop_next[seg] = 1'b1; + rx_req_tlp_valid_next[seg] = 1'b1; + end + cq_valid_int_next[seg] = 1'b0; + end else begin + rx_req_tlp_strb_next[INT_TLP_SEG_STRB_WIDTH*seg +: INT_TLP_SEG_STRB_WIDTH] = cq_strb_full >> (4 + INT_TLP_SEG_STRB_WIDTH*seg); + if (cq_valid_full[seg+1]) begin + rx_req_tlp_eop_next[seg] = cq_strb_eop_full[INT_TLP_SEG_STRB_WIDTH*(seg+1) +: 4] != 0; + rx_req_tlp_valid_next[seg] = 1'b1; + cq_valid_int_next[seg] = 1'b0; + end + end + end + end + end + end if (s_axis_cq_tready && s_axis_cq_tvalid) begin - cq_tdata_int_next = s_axis_cq_tdata; - cq_tkeep_int_next = s_axis_cq_tkeep; - cq_tvalid_int_next = s_axis_cq_tvalid; - cq_tlast_int_next = s_axis_cq_tlast; - cq_tuser_int_next = s_axis_cq_tuser; + cq_data_int_next = cq_data; + cq_strb_int_next = cq_strb; + cq_hdr_be_int_next = cq_hdr_be; + cq_valid_int_next = cq_valid; + cq_strb_eop_int_next = cq_strb_eop; + cq_sop_int_next = cq_sop; + cq_eop_int_next = cq_eop; end - - // parse header - // DW 0 - case (cq_tdata[78:75]) - REQ_MEM_READ: begin - tlp_hdr[127:125] = TLP_FMT_4DW; // fmt - tlp_hdr[124:120] = {5'b00000}; // type - end - REQ_MEM_WRITE: begin - tlp_hdr[127:125] = TLP_FMT_4DW_DATA; // fmt - tlp_hdr[124:120] = {5'b00000}; // type - end - REQ_IO_READ: begin - tlp_hdr[127:125] = TLP_FMT_4DW; // fmt - tlp_hdr[124:120] = {5'b00010}; // type - end - REQ_IO_WRITE: begin - tlp_hdr[127:125] = TLP_FMT_4DW_DATA; // fmt - tlp_hdr[124:120] = {5'b00010}; // type - end - REQ_MEM_FETCH_ADD: begin - tlp_hdr[127:125] = TLP_FMT_4DW_DATA; // fmt - tlp_hdr[124:120] = {5'b01100}; // type - end - REQ_MEM_SWAP: begin - tlp_hdr[127:125] = TLP_FMT_4DW_DATA; // fmt - tlp_hdr[124:120] = {5'b01101}; // type - end - REQ_MEM_CAS: begin - tlp_hdr[127:125] = TLP_FMT_4DW_DATA; // fmt - tlp_hdr[124:120] = {5'b01110}; // type - end - REQ_MEM_READ_LOCKED: begin - tlp_hdr[127:125] = TLP_FMT_4DW; // fmt - tlp_hdr[124:120] = {5'b00001}; // type - end - REQ_MSG: begin - if (cq_tdata[74:64]) begin - tlp_hdr[127:125] = TLP_FMT_4DW_DATA; // fmt - end else begin - tlp_hdr[127:125] = TLP_FMT_4DW; // fmt - end - tlp_hdr[124:120] = {2'b10, cq_tdata[114:112]}; // type - end - REQ_MSG_VENDOR: begin - if (cq_tdata[74:64]) begin - tlp_hdr[127:125] = TLP_FMT_4DW_DATA; // fmt - end else begin - tlp_hdr[127:125] = TLP_FMT_4DW; // fmt - end - tlp_hdr[124:120] = {2'b10, cq_tdata[114:112]}; // type - end - REQ_MSG_ATS: begin - if (cq_tdata[74:64]) begin - tlp_hdr[127:125] = TLP_FMT_4DW_DATA; // fmt - end else begin - tlp_hdr[127:125] = TLP_FMT_4DW; // fmt - end - tlp_hdr[124:120] = {2'b10, cq_tdata[114:112]}; // type - end - default: begin - tlp_hdr[127:125] = TLP_FMT_4DW; // fmt - tlp_hdr[124:120] = {5'b00000}; // type - end - endcase - tlp_hdr[119] = 1'b0; // T9 - tlp_hdr[118:116] = cq_tdata[123:121]; // TC - tlp_hdr[115] = 1'b0; // T8 - tlp_hdr[114] = cq_tdata[126]; // attr - tlp_hdr[113] = 1'b0; // LN - tlp_hdr[112] = 1'b0; // TH - tlp_hdr[111] = 1'b0; // TD - tlp_hdr[110] = 1'b0; // EP - tlp_hdr[109:108] = cq_tdata[125:124]; // attr - tlp_hdr[107:106] = cq_tdata[1:0]; // AT - tlp_hdr[105:96] = cq_tdata[74:64]; // length - // DW 1 - tlp_hdr[95:80] = cq_tdata[95:80]; // requester ID - tlp_hdr[79:72] = cq_tdata[103:96]; // tag - if (AXIS_PCIE_DATA_WIDTH == 512) begin - tlp_hdr[71:68] = cq_tuser_int_reg[11:8]; // last BE - tlp_hdr[67:64] = cq_tuser_int_reg[3:0]; // first BE - end else begin - tlp_hdr[71:68] = cq_tuser_int_reg[7:4]; // last BE - tlp_hdr[67:64] = cq_tuser_int_reg[3:0]; // first BE - end - // DW 2+3 - tlp_hdr[63:2] = cq_tdata[63:2]; // address - tlp_hdr[1:0] = 2'b00; // PH - - tlp_bar_id = cq_tdata[114:112]; - tlp_func_num = cq_tdata[111:104]; - - case (tlp_input_state_reg) - TLP_INPUT_STATE_IDLE: begin - s_axis_cq_tready_cmb = rx_req_tlp_ready; - - if (cq_tvalid_int_reg && rx_req_tlp_ready) begin - - rx_req_tlp_hdr_next = tlp_hdr; - rx_req_tlp_bar_id_next = tlp_bar_id; - rx_req_tlp_func_num_next = tlp_func_num; - - if (AXIS_PCIE_DATA_WIDTH > 64) begin - rx_req_tlp_data_next = cq_tdata >> 128; - rx_req_tlp_strb_next = cq_tkeep >> 4; - rx_req_tlp_sop_next = 1'b1; - rx_req_tlp_eop_next = 1'b0; - - tlp_input_frame_next = 1'b1; - - if (cq_tlast_int_reg) begin - rx_req_tlp_valid_next = 1'b1; - rx_req_tlp_strb_next = cq_tkeep_int_reg >> 4; - rx_req_tlp_eop_next = 1'b1; - cq_tvalid_int_next = s_axis_cq_tready && s_axis_cq_tvalid; - tlp_input_frame_next = 1'b0; - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - end else if (s_axis_cq_tready && s_axis_cq_tvalid) begin - if (s_axis_cq_tlast && s_axis_cq_tkeep >> 4 == 0) begin - rx_req_tlp_valid_next = 1'b1; - rx_req_tlp_eop_next = 1'b1; - cq_tvalid_int_next = 1'b0; - tlp_input_frame_next = 1'b0; - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - end else begin - rx_req_tlp_valid_next = 1'b1; - tlp_input_state_next = TLP_INPUT_STATE_PAYLOAD; - end - end else begin - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - end - end else begin - rx_req_tlp_data_next = 0; - rx_req_tlp_strb_next = 0; - rx_req_tlp_sop_next = 1'b1; - rx_req_tlp_eop_next = 1'b0; - - if (cq_tlast_int_reg) begin - cq_tvalid_int_next = s_axis_cq_tready && s_axis_cq_tvalid; - tlp_input_frame_next = 1'b0; - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - end else if (s_axis_cq_tready && s_axis_cq_tvalid) begin - if (s_axis_cq_tlast) begin - rx_req_tlp_valid_next = 1'b1; - rx_req_tlp_strb_next = 0; - rx_req_tlp_eop_next = 1'b1; - cq_tvalid_int_next = 1'b0; - tlp_input_frame_next = 1'b0; - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - end else begin - tlp_input_state_next = TLP_INPUT_STATE_PAYLOAD; - end - end else begin - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - end - end - end else begin - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - end - end - TLP_INPUT_STATE_PAYLOAD: begin - s_axis_cq_tready_cmb = rx_req_tlp_ready; - - if (cq_tvalid_int_reg && rx_req_tlp_ready) begin - - if (AXIS_PCIE_DATA_WIDTH > 128) begin - rx_req_tlp_data_next = cq_tdata >> 128; - rx_req_tlp_strb_next = cq_tkeep >> 4; - rx_req_tlp_sop_next = 1'b0; - end else begin - rx_req_tlp_data_next = s_axis_cq_tdata; - rx_req_tlp_strb_next = s_axis_cq_tkeep; - rx_req_tlp_sop_next = !tlp_input_frame_reg; - end - rx_req_tlp_eop_next = 1'b0; - - if (cq_tlast_int_reg) begin - rx_req_tlp_valid_next = 1'b1; - rx_req_tlp_strb_next = cq_tkeep_int_reg >> 4; - rx_req_tlp_eop_next = 1'b1; - cq_tvalid_int_next = s_axis_cq_tready && s_axis_cq_tvalid; - tlp_input_frame_next = 1'b0; - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - end else if (s_axis_cq_tready && s_axis_cq_tvalid) begin - if (s_axis_cq_tlast && s_axis_cq_tkeep >> 4 == 0) begin - rx_req_tlp_valid_next = 1'b1; - rx_req_tlp_eop_next = 1'b1; - cq_tvalid_int_next = 1'b0; - tlp_input_frame_next = 1'b0; - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - end else begin - rx_req_tlp_valid_next = 1'b1; - tlp_input_frame_next = 1'b1; - tlp_input_state_next = TLP_INPUT_STATE_PAYLOAD; - end - end else begin - tlp_input_state_next = TLP_INPUT_STATE_PAYLOAD; - end - end else begin - tlp_input_state_next = TLP_INPUT_STATE_PAYLOAD; - end - end - endcase end always @(posedge clk) begin - tlp_input_state_reg <= tlp_input_state_next; - rx_req_tlp_data_reg <= rx_req_tlp_data_next; rx_req_tlp_strb_reg <= rx_req_tlp_strb_next; rx_req_tlp_hdr_reg <= rx_req_tlp_hdr_next; @@ -435,21 +497,23 @@ always @(posedge clk) begin rx_req_tlp_valid_reg <= rx_req_tlp_valid_next; rx_req_tlp_sop_reg <= rx_req_tlp_sop_next; rx_req_tlp_eop_reg <= rx_req_tlp_eop_next; + tlp_frame_reg <= tlp_frame_next; - tlp_input_frame_reg <= tlp_input_frame_next; + cq_frame_reg <= cq_frame_next; - cq_tdata_int_reg <= cq_tdata_int_next; - cq_tkeep_int_reg <= cq_tkeep_int_next; - cq_tvalid_int_reg <= cq_tvalid_int_next; - cq_tlast_int_reg <= cq_tlast_int_next; - cq_tuser_int_reg <= cq_tuser_int_next; + cq_data_int_reg <= cq_data_int_next; + cq_strb_int_reg <= cq_strb_int_next; + cq_hdr_be_int_reg <= cq_hdr_be_int_next; + cq_valid_int_reg <= cq_valid_int_next; + cq_strb_eop_int_reg <= cq_strb_eop_int_next; + cq_sop_int_reg <= cq_sop_int_next; + cq_eop_int_reg <= cq_eop_int_next; if (rst) begin - tlp_input_state_reg <= TLP_INPUT_STATE_IDLE; - rx_req_tlp_valid_reg <= 0; - cq_tvalid_int_reg <= 1'b0; + cq_frame_reg <= 1'b0; + cq_valid_int_reg <= 0; end end diff --git a/rtl/pcie_us_if_rc.v b/rtl/pcie_us_if_rc.v index 0d3f724b2..8e8cfbb32 100644 --- a/rtl/pcie_us_if_rc.v +++ b/rtl/pcie_us_if_rc.v @@ -1,6 +1,6 @@ /* -Copyright (c) 2021 Alex Forencich +Copyright (c) 2021-2022 Alex Forencich Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -39,6 +39,8 @@ module pcie_us_if_rc # parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32), // PCIe AXI stream RC tuser signal width parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161, + // RC interface TLP straddling + parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256, // TLP data width parameter TLP_DATA_WIDTH = AXIS_PCIE_DATA_WIDTH, // TLP strobe width @@ -75,10 +77,9 @@ module pcie_us_if_rc # input wire rx_cpl_tlp_ready ); -parameter TLP_DATA_WIDTH_BYTES = TLP_DATA_WIDTH/8; -parameter TLP_DATA_WIDTH_DWORDS = TLP_DATA_WIDTH/32; - -parameter OUTPUT_FIFO_ADDR_WIDTH = 5; +parameter INT_TLP_SEG_COUNT = (RC_STRADDLE && AXIS_PCIE_DATA_WIDTH >= 256) ? (AXIS_PCIE_DATA_WIDTH == 512 ? 4 : 2) : 1; +parameter INT_TLP_SEG_DATA_WIDTH = TLP_DATA_WIDTH / INT_TLP_SEG_COUNT; +parameter INT_TLP_SEG_STRB_WIDTH = TLP_STRB_WIDTH / INT_TLP_SEG_COUNT; // bus width assertions initial begin @@ -104,11 +105,6 @@ initial begin end end - if (TLP_SEG_COUNT != 1) begin - $error("Error: TLP segment count must be 1 (instance %m)"); - $finish; - end - if (TLP_DATA_WIDTH != AXIS_PCIE_DATA_WIDTH) begin $error("Error: Interface widths must match (instance %m)"); $finish; @@ -157,237 +153,299 @@ localparam [3:0] reg [TLP_DATA_WIDTH-1:0] rx_cpl_tlp_data_reg = 0, rx_cpl_tlp_data_next; reg [TLP_STRB_WIDTH-1:0] rx_cpl_tlp_strb_reg = 0, rx_cpl_tlp_strb_next; -reg [TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] rx_cpl_tlp_hdr_reg = 0, rx_cpl_tlp_hdr_next; -reg [TLP_SEG_COUNT*4-1:0] rx_cpl_tlp_error_reg = 0, rx_cpl_tlp_error_next; -reg [TLP_SEG_COUNT-1:0] rx_cpl_tlp_valid_reg = 0, rx_cpl_tlp_valid_next; -reg [TLP_SEG_COUNT-1:0] rx_cpl_tlp_sop_reg = 0, rx_cpl_tlp_sop_next; -reg [TLP_SEG_COUNT-1:0] rx_cpl_tlp_eop_reg = 0, rx_cpl_tlp_eop_next; +reg [INT_TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] rx_cpl_tlp_hdr_reg = 0, rx_cpl_tlp_hdr_next; +reg [INT_TLP_SEG_COUNT*4-1:0] rx_cpl_tlp_error_reg = 0, rx_cpl_tlp_error_next; +reg [INT_TLP_SEG_COUNT-1:0] rx_cpl_tlp_valid_reg = 0, rx_cpl_tlp_valid_next; +reg [INT_TLP_SEG_COUNT-1:0] rx_cpl_tlp_sop_reg = 0, rx_cpl_tlp_sop_next; +reg [INT_TLP_SEG_COUNT-1:0] rx_cpl_tlp_eop_reg = 0, rx_cpl_tlp_eop_next; +reg tlp_frame_reg = 0, tlp_frame_next; -assign rx_cpl_tlp_data = rx_cpl_tlp_data_reg; -assign rx_cpl_tlp_strb = rx_cpl_tlp_strb_reg; -assign rx_cpl_tlp_hdr = rx_cpl_tlp_hdr_reg; -assign rx_cpl_tlp_error = rx_cpl_tlp_error_reg; -assign rx_cpl_tlp_valid = rx_cpl_tlp_valid_reg; -assign rx_cpl_tlp_sop = rx_cpl_tlp_sop_reg; -assign rx_cpl_tlp_eop = rx_cpl_tlp_eop_reg; - -localparam [1:0] - TLP_INPUT_STATE_IDLE = 2'd0, - TLP_INPUT_STATE_HEADER = 2'd1, - TLP_INPUT_STATE_PAYLOAD = 2'd2; - -reg [1:0] tlp_input_state_reg = TLP_INPUT_STATE_IDLE, tlp_input_state_next; - -reg s_axis_rc_tready_cmb; +wire fifo_tlp_ready; reg tlp_input_frame_reg = 1'b0, tlp_input_frame_next; -reg [AXIS_PCIE_DATA_WIDTH-1:0] rc_tdata_int_reg = {AXIS_PCIE_DATA_WIDTH{1'b0}}, rc_tdata_int_next; -reg [AXIS_PCIE_KEEP_WIDTH-1:0] rc_tkeep_int_reg = {AXIS_PCIE_KEEP_WIDTH{1'b0}}, rc_tkeep_int_next; -reg rc_tvalid_int_reg = 1'b0, rc_tvalid_int_next; -reg rc_tlast_int_reg = 1'b0, rc_tlast_int_next; +reg [TLP_DATA_WIDTH-1:0] rc_data; +reg [TLP_STRB_WIDTH-1:0] rc_strb; +reg [INT_TLP_SEG_COUNT-1:0] rc_valid; +reg [TLP_STRB_WIDTH-1:0] rc_strb_sop; +reg [TLP_STRB_WIDTH-1:0] rc_strb_eop; +reg [INT_TLP_SEG_COUNT-1:0] rc_sop; +reg [INT_TLP_SEG_COUNT-1:0] rc_eop; +reg rc_frame_reg = 1'b0, rc_frame_next; -wire [AXIS_PCIE_DATA_WIDTH*2-1:0] rc_tdata = {s_axis_rc_tdata, rc_tdata_int_reg}; -wire [AXIS_PCIE_KEEP_WIDTH*2-1:0] rc_tkeep = {s_axis_rc_tkeep, rc_tkeep_int_reg}; +reg [TLP_DATA_WIDTH-1:0] rc_data_int_reg = 0, rc_data_int_next; +reg [TLP_STRB_WIDTH-1:0] rc_strb_int_reg = 0, rc_strb_int_next; +reg [INT_TLP_SEG_COUNT-1:0] rc_valid_int_reg = 0, rc_valid_int_next; +reg [TLP_STRB_WIDTH-1:0] rc_strb_eop_int_reg = 0, rc_strb_eop_int_next; +reg [INT_TLP_SEG_COUNT-1:0] rc_sop_int_reg = 0, rc_sop_int_next; +reg [INT_TLP_SEG_COUNT-1:0] rc_eop_int_reg = 0, rc_eop_int_next; -reg [127:0] tlp_hdr; -reg [3:0] tlp_error; +wire [TLP_DATA_WIDTH*2-1:0] rc_data_full = {rc_data, rc_data_int_reg}; +wire [TLP_STRB_WIDTH*2-1:0] rc_strb_full = {rc_strb, rc_strb_int_reg}; +wire [INT_TLP_SEG_COUNT*2-1:0] rc_valid_full = {rc_valid, rc_valid_int_reg}; +wire [TLP_STRB_WIDTH*2-1:0] rc_strb_eop_full = {rc_strb_eop, rc_strb_eop_int_reg}; +wire [INT_TLP_SEG_COUNT*2-1:0] rc_sop_full = {rc_sop, rc_sop_int_reg}; +wire [INT_TLP_SEG_COUNT*2-1:0] rc_eop_full = {rc_eop, rc_eop_int_reg}; -assign s_axis_rc_tready = s_axis_rc_tready_cmb; +reg [INT_TLP_SEG_COUNT*128-1:0] tlp_hdr; +reg [INT_TLP_SEG_COUNT*4-1:0] tlp_error; + +assign s_axis_rc_tready = fifo_tlp_ready; + +pcie_tlp_fifo #( + .DEPTH((1024/4)*2), + .TLP_DATA_WIDTH(TLP_DATA_WIDTH), + .TLP_STRB_WIDTH(TLP_STRB_WIDTH), + .TLP_HDR_WIDTH(TLP_HDR_WIDTH), + .SEQ_NUM_WIDTH(1), + .IN_TLP_SEG_COUNT(INT_TLP_SEG_COUNT), + .OUT_TLP_SEG_COUNT(TLP_SEG_COUNT) +) +pcie_tlp_fifo_inst ( + .clk(clk), + .rst(rst), + + /* + * TLP input + */ + .in_tlp_data(rx_cpl_tlp_data_reg), + .in_tlp_strb(rx_cpl_tlp_strb_reg), + .in_tlp_hdr(rx_cpl_tlp_hdr_reg), + .in_tlp_seq(0), + .in_tlp_bar_id(0), + .in_tlp_func_num(0), + .in_tlp_error(rx_cpl_tlp_error_reg), + .in_tlp_valid(rx_cpl_tlp_valid_reg), + .in_tlp_sop(rx_cpl_tlp_sop_reg), + .in_tlp_eop(rx_cpl_tlp_eop_reg), + .in_tlp_ready(fifo_tlp_ready), + + /* + * TLP output + */ + .out_tlp_data(rx_cpl_tlp_data), + .out_tlp_strb(rx_cpl_tlp_strb), + .out_tlp_hdr(rx_cpl_tlp_hdr), + .out_tlp_seq(), + .out_tlp_bar_id(), + .out_tlp_func_num(), + .out_tlp_error(rx_cpl_tlp_error), + .out_tlp_valid(rx_cpl_tlp_valid), + .out_tlp_sop(rx_cpl_tlp_sop), + .out_tlp_eop(rx_cpl_tlp_eop), + .out_tlp_ready(rx_cpl_tlp_ready), + + /* + * Status + */ + .half_full(), + .watermark() +); + +integer seg, lane; +reg valid; always @* begin - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - rx_cpl_tlp_data_next = rx_cpl_tlp_data_reg; rx_cpl_tlp_strb_next = rx_cpl_tlp_strb_reg; rx_cpl_tlp_hdr_next = rx_cpl_tlp_hdr_reg; rx_cpl_tlp_error_next = rx_cpl_tlp_error_reg; - rx_cpl_tlp_valid_next = rx_cpl_tlp_valid_reg && !rx_cpl_tlp_ready; + rx_cpl_tlp_valid_next = fifo_tlp_ready ? 0 : rx_cpl_tlp_valid_reg; rx_cpl_tlp_sop_next = rx_cpl_tlp_sop_reg; rx_cpl_tlp_eop_next = rx_cpl_tlp_eop_reg; + tlp_frame_next = tlp_frame_reg; - s_axis_rc_tready_cmb = rx_cpl_tlp_ready; + rc_frame_next = rc_frame_reg; - tlp_input_frame_next = tlp_input_frame_reg; + rc_data_int_next = rc_data_int_reg; + rc_strb_int_next = rc_strb_int_reg; + rc_valid_int_next = rc_valid_int_reg; + rc_strb_eop_int_next = rc_strb_eop_int_reg; + rc_sop_int_next = rc_sop_int_reg; + rc_eop_int_next = rc_eop_int_reg; - rc_tdata_int_next = rc_tdata_int_reg; - rc_tkeep_int_next = rc_tkeep_int_reg; - rc_tvalid_int_next = rc_tvalid_int_reg; - rc_tlast_int_next = rc_tlast_int_reg; + // decode framing + if (RC_STRADDLE && AXIS_PCIE_DATA_WIDTH >= 256) begin + rc_data = s_axis_rc_tdata; + rc_strb = 0; + rc_valid = 0; + rc_strb_sop = 0; + rc_strb_eop = 0; + rc_sop = 0; + rc_eop = 0; + if (AXIS_PCIE_DATA_WIDTH == 256) begin + for (seg = 0; seg < INT_TLP_SEG_COUNT; seg = seg + 1) begin + if (s_axis_rc_tuser[32+seg]) begin + rc_strb_sop[seg*4] = 1'b1; + end + if (s_axis_rc_tuser[34+seg*4]) begin + rc_strb_eop[s_axis_rc_tuser[35+seg*4 +: 3]] = 1'b1; + end + end + end else if (AXIS_PCIE_DATA_WIDTH == 512) begin + for (seg = 0; seg < INT_TLP_SEG_COUNT; seg = seg + 1) begin + if (s_axis_rc_tuser[64+seg]) begin + rc_strb_sop[s_axis_rc_tuser[68+seg*2 +: 2]*4] = 1'b1; + end + if (s_axis_rc_tuser[76+seg]) begin + rc_strb_eop[s_axis_rc_tuser[80+seg*4 +: 4]] = 1'b1; + end + end + end + valid = 1; + for (lane = 0; lane < TLP_STRB_WIDTH; lane = lane + 1) begin + if (rc_strb_sop[lane]) begin + valid = 1; + rc_sop[lane/INT_TLP_SEG_STRB_WIDTH] = 1'b1; + end + if (valid) begin + rc_strb[lane] = 1'b1; + rc_valid[lane/INT_TLP_SEG_STRB_WIDTH] = s_axis_rc_tvalid; + end + if (rc_strb_eop[lane]) begin + valid = 0; + rc_eop[lane/INT_TLP_SEG_STRB_WIDTH] = 1'b1; + end + end + end else begin + rc_data = s_axis_rc_tdata; + rc_strb = s_axis_rc_tvalid ? s_axis_rc_tkeep : 0; + rc_valid = s_axis_rc_tvalid; + rc_sop = !rc_frame_reg; + rc_eop = s_axis_rc_tlast; + rc_strb_sop = rc_sop; + rc_strb_eop = 0; + for (lane = 0; lane < TLP_STRB_WIDTH; lane = lane + 1) begin + if (rc_strb[lane]) begin + rc_strb_eop = (rc_eop) << lane; + end + end + if (s_axis_rc_tready && s_axis_rc_tvalid) begin + rc_frame_next = !s_axis_rc_tlast; + end + end + + for (seg = 0; seg < INT_TLP_SEG_COUNT; seg = seg + 1) begin + // parse header + // DW 0 + if (rc_data_full[INT_TLP_SEG_DATA_WIDTH*seg+32 +: 11] != 0) begin + tlp_hdr[128*seg+125 +: 3] = TLP_FMT_3DW_DATA; // fmt - 3DW with data + end else begin + tlp_hdr[128*seg+125 +: 3] = TLP_FMT_3DW; // fmt - 3DW without data + end + tlp_hdr[128*seg+120 +: 5] = {4'b0101, rc_data_full[INT_TLP_SEG_DATA_WIDTH*seg+29]}; // type - completion + tlp_hdr[128*seg+119] = 1'b0; // T9 + tlp_hdr[128*seg+116 +: 3] = rc_data_full[INT_TLP_SEG_DATA_WIDTH*seg+89 +: 3]; // TC + tlp_hdr[128*seg+115] = 1'b0; // T8 + tlp_hdr[128*seg+114] = rc_data_full[INT_TLP_SEG_DATA_WIDTH*seg+94]; // attr + tlp_hdr[128*seg+113] = 1'b0; // LN + tlp_hdr[128*seg+112] = 1'b0; // TH + tlp_hdr[128*seg+111] = 1'b0; // TD + tlp_hdr[128*seg+110] = rc_data_full[INT_TLP_SEG_DATA_WIDTH*seg+46]; // EP + tlp_hdr[128*seg+108 +: 2] = rc_data_full[INT_TLP_SEG_DATA_WIDTH*seg+92 +: 2]; // attr + tlp_hdr[128*seg+106 +: 2] = 2'b00; // AT + tlp_hdr[128*seg+96 +: 10] = rc_data_full[INT_TLP_SEG_DATA_WIDTH*seg+32 +: 11]; // length + // DW 1 + tlp_hdr[128*seg+80 +: 16] = rc_data_full[INT_TLP_SEG_DATA_WIDTH*seg+72 +: 16]; // completer ID + tlp_hdr[128*seg+77 +: 3] = rc_data_full[INT_TLP_SEG_DATA_WIDTH*seg+43 +: 3]; // completion status + tlp_hdr[128*seg+76] = 1'b0; // BCM + tlp_hdr[128*seg+64 +: 12] = rc_data_full[INT_TLP_SEG_DATA_WIDTH*seg+16 +: 13]; // byte count + // DW 2 + tlp_hdr[128*seg+48 +: 16] = rc_data_full[INT_TLP_SEG_DATA_WIDTH*seg+48 +: 16]; // requester ID + tlp_hdr[128*seg+40 +: 8] = rc_data_full[INT_TLP_SEG_DATA_WIDTH*seg+64 +: 8]; // tag + tlp_hdr[128*seg+39] = 1'b0; + tlp_hdr[128*seg+32 +: 7] = rc_data_full[INT_TLP_SEG_DATA_WIDTH*seg+0 +: 7]; // lower address + // DW 3 + tlp_hdr[128*seg+0 +: 32] = 32'd0; + + // error code + case (rc_data_full[INT_TLP_SEG_DATA_WIDTH*seg+12 +: 4]) + RC_ERROR_NORMAL_TERMINATION: tlp_error[4*seg +: 4] = PCIE_ERROR_NONE; + RC_ERROR_POISONED: tlp_error[4*seg +: 4] = PCIE_ERROR_POISONED; + RC_ERROR_BAD_STATUS: tlp_error[4*seg +: 4] = PCIE_ERROR_BAD_STATUS; + RC_ERROR_INVALID_LENGTH: tlp_error[4*seg +: 4] = PCIE_ERROR_INVALID_LEN; + RC_ERROR_MISMATCH: tlp_error[4*seg +: 4] = PCIE_ERROR_MISMATCH; + RC_ERROR_INVALID_ADDRESS: tlp_error[4*seg +: 4] = PCIE_ERROR_INVALID_ADDR; + RC_ERROR_INVALID_TAG: tlp_error[4*seg +: 4] = PCIE_ERROR_INVALID_TAG; + RC_ERROR_FLR: tlp_error[4*seg +: 4] = PCIE_ERROR_FLR; + RC_ERROR_TIMEOUT: tlp_error[4*seg +: 4] = PCIE_ERROR_TIMEOUT; + default: tlp_error[4*seg +: 4] = PCIE_ERROR_NONE; + endcase + end + + if (fifo_tlp_ready) begin + rx_cpl_tlp_strb_next = 0; + rx_cpl_tlp_valid_next = 0; + rx_cpl_tlp_sop_next = 0; + rx_cpl_tlp_eop_next = 0; + if (TLP_DATA_WIDTH == 64) begin + if (rc_valid_full[0]) begin + rx_cpl_tlp_data_next = rc_data_full >> 32; + rx_cpl_tlp_strb_next = rc_strb_full >> 1; + if (rc_sop_full[0]) begin + tlp_frame_next = 1'b0; + rx_cpl_tlp_hdr_next = tlp_hdr; + rx_cpl_tlp_error_next = tlp_error; + if (rc_eop_full[0]) begin + rc_valid_int_next[0] = 1'b0; + end else if (rc_valid_full[1]) begin + rc_valid_int_next[0] = 1'b0; + end + end else begin + rx_cpl_tlp_sop_next = !tlp_frame_reg; + rx_cpl_tlp_eop_next = 1'b0; + if (rc_eop_full[0]) begin + rx_cpl_tlp_strb_next = rc_strb_full[1]; + rx_cpl_tlp_valid_next = 1'b1; + rc_valid_int_next[0] = 1'b0; + rx_cpl_tlp_eop_next = 1'b1; + end else if (rc_valid_full[1]) begin + rx_cpl_tlp_valid_next = 1'b1; + rc_valid_int_next[0] = 1'b0; + tlp_frame_next = 1'b1; + end + end + end + end else begin + for (seg = 0; seg < INT_TLP_SEG_COUNT; seg = seg + 1) begin + if (rc_valid_full[seg]) begin + rx_cpl_tlp_data_next[INT_TLP_SEG_DATA_WIDTH*seg +: INT_TLP_SEG_DATA_WIDTH] = rc_data_full >> (96 + INT_TLP_SEG_DATA_WIDTH*seg); + if (rc_sop_full[seg]) begin + rx_cpl_tlp_hdr_next[TLP_HDR_WIDTH*seg +: TLP_HDR_WIDTH] = tlp_hdr[128*seg +: 128]; + rx_cpl_tlp_error_next[4*seg +: 4] = tlp_error[4*seg +: 4]; + end + rx_cpl_tlp_sop_next[seg] = rc_sop_full[seg]; + if (rc_eop_full[seg]) begin + rx_cpl_tlp_strb_next[INT_TLP_SEG_STRB_WIDTH*seg +: INT_TLP_SEG_STRB_WIDTH] = rc_strb_full[INT_TLP_SEG_STRB_WIDTH*seg +: INT_TLP_SEG_STRB_WIDTH] >> 3; + if (rc_sop_full[seg] || rc_strb_eop_full[INT_TLP_SEG_STRB_WIDTH*seg +: INT_TLP_SEG_STRB_WIDTH] >> 3) begin + rx_cpl_tlp_eop_next[seg] = 1'b1; + rx_cpl_tlp_valid_next[seg] = 1'b1; + end + rc_valid_int_next[seg] = 1'b0; + end else begin + rx_cpl_tlp_strb_next[INT_TLP_SEG_STRB_WIDTH*seg +: INT_TLP_SEG_STRB_WIDTH] = rc_strb_full >> (3 + INT_TLP_SEG_STRB_WIDTH*seg); + if (rc_valid_full[seg+1]) begin + rx_cpl_tlp_eop_next[seg] = (rc_strb_eop_full[INT_TLP_SEG_STRB_WIDTH*(seg+1) +: INT_TLP_SEG_STRB_WIDTH] & 3'h7) != 0; + rx_cpl_tlp_valid_next[seg] = 1'b1; + rc_valid_int_next[seg] = 1'b0; + end + end + end + end + end + end if (s_axis_rc_tready && s_axis_rc_tvalid) begin - rc_tdata_int_next = s_axis_rc_tdata; - rc_tkeep_int_next = s_axis_rc_tkeep; - rc_tvalid_int_next = s_axis_rc_tvalid; - rc_tlast_int_next = s_axis_rc_tlast; + rc_data_int_next = rc_data; + rc_strb_int_next = rc_strb; + rc_valid_int_next = rc_valid; + rc_strb_eop_int_next = rc_strb_eop; + rc_sop_int_next = rc_sop; + rc_eop_int_next = rc_eop; end - - // parse header - // DW 0 - if (rc_tdata[42:32] != 0) begin - tlp_hdr[127:125] = TLP_FMT_3DW_DATA; // fmt - 3DW with data - end else begin - tlp_hdr[127:125] = TLP_FMT_3DW; // fmt - 3DW without data - end - tlp_hdr[124:120] = {4'b0101, rc_tdata[29]}; // type - completion - tlp_hdr[119] = 1'b0; // T9 - tlp_hdr[118:116] = rc_tdata[91:89]; // TC - tlp_hdr[115] = 1'b0; // T8 - tlp_hdr[114] = rc_tdata[94]; // attr - tlp_hdr[113] = 1'b0; // LN - tlp_hdr[112] = 1'b0; // TH - tlp_hdr[111] = 1'b0; // TD - tlp_hdr[110] = rc_tdata[46]; // EP - tlp_hdr[109:108] = rc_tdata[93:92]; // attr - tlp_hdr[107:106] = 2'b00; // AT - tlp_hdr[105:96] = rc_tdata[42:32]; // length - // DW 1 - tlp_hdr[95:80] = rc_tdata[87:72]; // completer ID - tlp_hdr[79:77] = rc_tdata[45:43]; // completion status - tlp_hdr[76] = 1'b0; // BCM - tlp_hdr[75:64] = rc_tdata[28:16]; // byte count - // DW 2 - tlp_hdr[63:48] = rc_tdata[63:48]; // requester ID - tlp_hdr[47:40] = rc_tdata[71:64]; // tag - tlp_hdr[39] = 1'b0; - tlp_hdr[38:32] = rc_tdata[6:0]; // lower address - // DW 3 - tlp_hdr[31:0] = 32'd0; - - // error code - case (rc_tdata[15:12]) - RC_ERROR_NORMAL_TERMINATION: tlp_error = PCIE_ERROR_NONE; - RC_ERROR_POISONED: tlp_error = PCIE_ERROR_POISONED; - RC_ERROR_BAD_STATUS: tlp_error = PCIE_ERROR_BAD_STATUS; - RC_ERROR_INVALID_LENGTH: tlp_error = PCIE_ERROR_INVALID_LEN; - RC_ERROR_MISMATCH: tlp_error = PCIE_ERROR_MISMATCH; - RC_ERROR_INVALID_ADDRESS: tlp_error = PCIE_ERROR_INVALID_ADDR; - RC_ERROR_INVALID_TAG: tlp_error = PCIE_ERROR_INVALID_TAG; - RC_ERROR_FLR: tlp_error = PCIE_ERROR_FLR; - RC_ERROR_TIMEOUT: tlp_error = PCIE_ERROR_TIMEOUT; - default: tlp_error = PCIE_ERROR_NONE; - endcase - - case (tlp_input_state_reg) - TLP_INPUT_STATE_IDLE: begin - s_axis_rc_tready_cmb = rx_cpl_tlp_ready; - - if (rc_tvalid_int_reg && rx_cpl_tlp_ready) begin - - rx_cpl_tlp_hdr_next = tlp_hdr; - rx_cpl_tlp_error_next = tlp_error; - - if (AXIS_PCIE_DATA_WIDTH > 64) begin - rx_cpl_tlp_data_next = rc_tdata >> 96; - rx_cpl_tlp_strb_next = rc_tkeep >> 3; - rx_cpl_tlp_sop_next = 1'b1; - rx_cpl_tlp_eop_next = 1'b0; - - tlp_input_frame_next = 1'b1; - - if (rc_tlast_int_reg) begin - rx_cpl_tlp_valid_next = 1'b1; - rx_cpl_tlp_strb_next = rc_tkeep_int_reg >> 3; - rx_cpl_tlp_eop_next = 1'b1; - rc_tvalid_int_next = s_axis_rc_tready && s_axis_rc_tvalid; - tlp_input_frame_next = 1'b0; - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - end else if (s_axis_rc_tready && s_axis_rc_tvalid) begin - if (s_axis_rc_tlast && s_axis_rc_tkeep >> 3 == 0) begin - rx_cpl_tlp_valid_next = 1'b1; - rx_cpl_tlp_eop_next = 1'b1; - rc_tvalid_int_next = 1'b0; - tlp_input_frame_next = 1'b0; - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - end else begin - rx_cpl_tlp_valid_next = 1'b1; - tlp_input_frame_next = 1'b1; - tlp_input_state_next = TLP_INPUT_STATE_PAYLOAD; - end - end else begin - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - end - end else begin - rx_cpl_tlp_data_next = rc_tdata >> 96; - rx_cpl_tlp_strb_next = rc_tkeep >> 3; - rx_cpl_tlp_sop_next = 1'b1; - rx_cpl_tlp_eop_next = 1'b0; - - if (rc_tlast_int_reg) begin - rc_tvalid_int_next = s_axis_rc_tready && s_axis_rc_tvalid; - tlp_input_frame_next = 1'b0; - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - end else if (s_axis_rc_tready && s_axis_rc_tvalid) begin - if (s_axis_rc_tlast) begin - rx_cpl_tlp_valid_next = 1'b1; - rx_cpl_tlp_strb_next = s_axis_rc_tkeep >> 1; - rx_cpl_tlp_eop_next = 1'b1; - rc_tvalid_int_next = 1'b0; - tlp_input_frame_next = 1'b0; - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - end else begin - tlp_input_state_next = TLP_INPUT_STATE_PAYLOAD; - end - end else begin - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - end - end - end else begin - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - end - end - TLP_INPUT_STATE_PAYLOAD: begin - s_axis_rc_tready_cmb = rx_cpl_tlp_ready; - - if (rc_tvalid_int_reg && rx_cpl_tlp_ready) begin - - if (AXIS_PCIE_DATA_WIDTH > 64) begin - rx_cpl_tlp_data_next = rc_tdata >> 96; - rx_cpl_tlp_strb_next = rc_tkeep >> 3; - rx_cpl_tlp_sop_next = 1'b0; - end else begin - rx_cpl_tlp_data_next = rc_tdata >> 32; - rx_cpl_tlp_strb_next = rc_tkeep >> 1; - rx_cpl_tlp_sop_next = !tlp_input_frame_reg; - end - rx_cpl_tlp_eop_next = 1'b0; - - if (rc_tlast_int_reg) begin - rx_cpl_tlp_valid_next = 1'b1; - if (AXIS_PCIE_DATA_WIDTH > 64) begin - rx_cpl_tlp_strb_next = rc_tkeep_int_reg >> 3; - end else begin - rx_cpl_tlp_strb_next = rc_tkeep_int_reg >> 1; - end - rx_cpl_tlp_eop_next = 1'b1; - rc_tvalid_int_next = s_axis_rc_tready && s_axis_rc_tvalid; - tlp_input_frame_next = 1'b0; - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - end else if (s_axis_rc_tready && s_axis_rc_tvalid) begin - if (s_axis_rc_tlast && (s_axis_rc_tkeep >> (AXIS_PCIE_DATA_WIDTH > 64 ? 3 : 1)) == 0) begin - rx_cpl_tlp_valid_next = 1'b1; - rx_cpl_tlp_eop_next = 1'b1; - rc_tvalid_int_next = 1'b0; - tlp_input_frame_next = 1'b0; - tlp_input_state_next = TLP_INPUT_STATE_IDLE; - end else begin - rx_cpl_tlp_valid_next = 1'b1; - tlp_input_frame_next = 1'b1; - tlp_input_state_next = TLP_INPUT_STATE_PAYLOAD; - end - end else begin - tlp_input_state_next = TLP_INPUT_STATE_PAYLOAD; - end - end else begin - tlp_input_state_next = TLP_INPUT_STATE_PAYLOAD; - end - end - endcase end always @(posedge clk) begin - tlp_input_state_reg <= tlp_input_state_next; - rx_cpl_tlp_data_reg <= rx_cpl_tlp_data_next; rx_cpl_tlp_strb_reg <= rx_cpl_tlp_strb_next; rx_cpl_tlp_hdr_reg <= rx_cpl_tlp_hdr_next; @@ -395,20 +453,22 @@ always @(posedge clk) begin rx_cpl_tlp_valid_reg <= rx_cpl_tlp_valid_next; rx_cpl_tlp_sop_reg <= rx_cpl_tlp_sop_next; rx_cpl_tlp_eop_reg <= rx_cpl_tlp_eop_next; + tlp_frame_reg <= tlp_frame_next; - tlp_input_frame_reg <= tlp_input_frame_next; + rc_frame_reg <= rc_frame_next; - rc_tdata_int_reg <= rc_tdata_int_next; - rc_tkeep_int_reg <= rc_tkeep_int_next; - rc_tvalid_int_reg <= rc_tvalid_int_next; - rc_tlast_int_reg <= rc_tlast_int_next; + rc_data_int_reg <= rc_data_int_next; + rc_strb_int_reg <= rc_strb_int_next; + rc_valid_int_reg <= rc_valid_int_next; + rc_strb_eop_int_reg <= rc_strb_eop_int_next; + rc_sop_int_reg <= rc_sop_int_next; + rc_eop_int_reg <= rc_eop_int_next; if (rst) begin - tlp_input_state_reg <= TLP_INPUT_STATE_IDLE; - rx_cpl_tlp_valid_reg <= 0; - rc_tvalid_int_reg <= 1'b0; + rc_frame_reg <= 1'b0; + rc_valid_int_reg <= 0; end end diff --git a/rtl/pcie_us_if_rq.v b/rtl/pcie_us_if_rq.v index f12dd7e4f..406e61c06 100644 --- a/rtl/pcie_us_if_rq.v +++ b/rtl/pcie_us_if_rq.v @@ -1,6 +1,6 @@ /* -Copyright (c) 2021 Alex Forencich +Copyright (c) 2021-2022 Alex Forencich Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -39,6 +39,8 @@ module pcie_us_if_rq # parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32), // PCIe AXI stream RQ tuser signal width parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137, + // RQ interface TLP straddling + parameter RQ_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 512, // RQ sequence number width parameter RQ_SEQ_NUM_WIDTH = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 4 : 6, // TLP data width @@ -114,11 +116,18 @@ module pcie_us_if_rq # parameter TLP_DATA_WIDTH_BYTES = TLP_DATA_WIDTH/8; parameter TLP_DATA_WIDTH_DWORDS = TLP_DATA_WIDTH/32; +parameter INT_TLP_SEG_COUNT = (RQ_STRADDLE && AXIS_PCIE_DATA_WIDTH >= 512) ? 2 : 1; +parameter INT_TLP_SEG_DATA_WIDTH = TLP_DATA_WIDTH / INT_TLP_SEG_COUNT; +parameter INT_TLP_SEG_STRB_WIDTH = TLP_STRB_WIDTH / INT_TLP_SEG_COUNT; + +parameter SEG_SEL_WIDTH = $clog2(INT_TLP_SEG_COUNT); + +parameter PORTS = 2; +parameter CL_PORTS = $clog2(PORTS); + parameter SEQ_NUM_MASK = {RQ_SEQ_NUM_WIDTH-1{1'b1}}; parameter SEQ_NUM_FLAG = {1'b1, {RQ_SEQ_NUM_WIDTH-1{1'b0}}}; -parameter OUTPUT_FIFO_ADDR_WIDTH = 5; - // bus width assertions initial begin if (AXIS_PCIE_DATA_WIDTH != 64 && AXIS_PCIE_DATA_WIDTH != 128 && AXIS_PCIE_DATA_WIDTH != 256 && AXIS_PCIE_DATA_WIDTH != 512) begin @@ -165,11 +174,6 @@ initial begin end end - if (TLP_SEG_COUNT != 1) begin - $error("Error: TLP segment count must be 1 (instance %m)"); - $finish; - end - if (TLP_DATA_WIDTH != AXIS_PCIE_DATA_WIDTH) begin $error("Error: Interface widths must match (instance %m)"); $finish; @@ -203,413 +207,667 @@ localparam [3:0] REQ_MSG_VENDOR = 4'b1101, REQ_MSG_ATS = 4'b1110; -reg tx_rd_req_tlp_ready_cmb; +reg frame_reg = 1'b0, frame_next, frame_cyc; +reg tlp_hdr1_reg = 1'b0, tlp_hdr1_next, tlp_hdr1_cyc; +reg tlp_hdr2_reg = 1'b0, tlp_hdr2_next, tlp_hdr2_cyc; +reg tlp_split1_reg = 1'b0, tlp_split1_next, tlp_split1_cyc; +reg tlp_split2_reg = 1'b0, tlp_split2_next, tlp_split2_cyc; +reg [CL_PORTS-1:0] port_reg = 0, port_next, port_cyc; +reg [SEG_SEL_WIDTH-1:0] seg_offset_cyc; +reg [SEG_SEL_WIDTH+1-1:0] seg_count_cyc; +reg valid, sop, eop; +reg frame, abort; +reg [SEG_SEL_WIDTH-1:0] port_seg_offset_cyc[0:PORTS-1]; +reg [SEG_SEL_WIDTH+1-1:0] port_seg_count_cyc[0:PORTS-1]; -wire [TLP_SEG_COUNT*RQ_SEQ_NUM_WIDTH-1:0] tx_rd_req_tlp_seq_int = {1'b1, tx_rd_req_tlp_seq}; +reg [INT_TLP_SEG_COUNT-1:0] port_seg_valid[0:PORTS-1]; +reg [INT_TLP_SEG_COUNT-1:0] port_seg_sop[0:PORTS-1]; +reg [INT_TLP_SEG_COUNT-1:0] port_seg_eop[0:PORTS-1]; +reg [INT_TLP_SEG_COUNT-1:0] port_seg_extra[0:PORTS-1]; -reg tx_wr_req_tlp_ready_cmb; +reg [INT_TLP_SEG_COUNT-1:0] out_sel, out_sel_cyc; +reg [INT_TLP_SEG_COUNT-1:0] out_sop; +reg [INT_TLP_SEG_COUNT-1:0] out_eop; +reg [INT_TLP_SEG_COUNT-1:0] out_tlp_hdr1; +reg [INT_TLP_SEG_COUNT-1:0] out_tlp_hdr2; +reg [INT_TLP_SEG_COUNT-1:0] out_tlp_split1; +reg [INT_TLP_SEG_COUNT-1:0] out_tlp_split2; +reg [CL_PORTS-1:0] out_sel_port[0:INT_TLP_SEG_COUNT-1]; +reg [SEG_SEL_WIDTH+1-1:0] out_sel_seg[0:INT_TLP_SEG_COUNT-1]; -wire [TLP_SEG_COUNT*RQ_SEQ_NUM_WIDTH-1:0] tx_wr_req_tlp_seq_int = {1'b0, tx_wr_req_tlp_seq}; +reg [TLP_DATA_WIDTH-1:0] out_tlp_data; +reg [TLP_STRB_WIDTH-1:0] out_tlp_strb; +reg [INT_TLP_SEG_COUNT*8-1:0] out_tlp_be; +reg [INT_TLP_SEG_COUNT*RQ_SEQ_NUM_WIDTH-1:0] out_tlp_seq; +reg [INT_TLP_SEG_COUNT-1:0] out_tlp_valid; +reg [INT_TLP_SEG_COUNT-1:0] out_tlp_sop; +reg [INT_TLP_SEG_COUNT-1:0] out_tlp_eop; +reg [127:0] out_shift_tlp_data_reg = 0, out_shift_tlp_data_next; +reg [3:0] out_shift_tlp_strb_reg = 0, out_shift_tlp_strb_next; -assign tx_rd_req_tlp_ready = tx_rd_req_tlp_ready_cmb; +reg [127:0] seg_tlp_hdr; +reg [127:0] seg_rc_hdr; +reg [INT_TLP_SEG_COUNT*3-1:0] eop_index; -assign tx_wr_req_tlp_ready = tx_wr_req_tlp_ready_cmb; +reg [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_rq_tdata_reg = 0, m_axis_rq_tdata_next; +reg [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_rq_tkeep_reg = 0, m_axis_rq_tkeep_next; +reg m_axis_rq_tvalid_reg = 1'b0, m_axis_rq_tvalid_next; +reg m_axis_rq_tlast_reg = 1'b0, m_axis_rq_tlast_next; +reg [AXIS_PCIE_RQ_USER_WIDTH-1:0] m_axis_rq_tuser_reg = 0, m_axis_rq_tuser_next; generate assign m_axis_rd_req_tx_seq_num[TX_SEQ_NUM_WIDTH*0 +: TX_SEQ_NUM_WIDTH] = s_axis_rq_seq_num_0; assign m_axis_rd_req_tx_seq_num_valid[0] = s_axis_rq_seq_num_valid_0 && ((s_axis_rq_seq_num_0 & SEQ_NUM_FLAG) != 0); - -if (TX_SEQ_NUM_COUNT > 1) begin - assign m_axis_rd_req_tx_seq_num[TX_SEQ_NUM_WIDTH*1 +: TX_SEQ_NUM_WIDTH] = s_axis_rq_seq_num_1; - assign m_axis_rd_req_tx_seq_num_valid[1] = s_axis_rq_seq_num_valid_1 && ((s_axis_rq_seq_num_1 & SEQ_NUM_FLAG) != 0); -end - assign m_axis_wr_req_tx_seq_num[TX_SEQ_NUM_WIDTH*0 +: TX_SEQ_NUM_WIDTH] = s_axis_rq_seq_num_0; assign m_axis_wr_req_tx_seq_num_valid[0] = s_axis_rq_seq_num_valid_0 && ((s_axis_rq_seq_num_0 & SEQ_NUM_FLAG) == 0); if (TX_SEQ_NUM_COUNT > 1) begin + assign m_axis_rd_req_tx_seq_num[TX_SEQ_NUM_WIDTH*1 +: TX_SEQ_NUM_WIDTH] = s_axis_rq_seq_num_1; + assign m_axis_rd_req_tx_seq_num_valid[1] = s_axis_rq_seq_num_valid_1 && ((s_axis_rq_seq_num_1 & SEQ_NUM_FLAG) != 0); assign m_axis_wr_req_tx_seq_num[TX_SEQ_NUM_WIDTH*1 +: TX_SEQ_NUM_WIDTH] = s_axis_rq_seq_num_1; assign m_axis_wr_req_tx_seq_num_valid[1] = s_axis_rq_seq_num_valid_1 && ((s_axis_rq_seq_num_1 & SEQ_NUM_FLAG) == 0); end endgenerate -localparam [1:0] - TLP_OUTPUT_STATE_IDLE = 2'd0, - TLP_OUTPUT_STATE_RD_HEADER = 2'd1, - TLP_OUTPUT_STATE_WR_HEADER = 2'd2, - TLP_OUTPUT_STATE_WR_PAYLOAD = 2'd3; - -reg [1:0] tlp_output_state_reg = TLP_OUTPUT_STATE_IDLE, tlp_output_state_next; - -reg [TLP_DATA_WIDTH-1:0] out_tlp_data_reg = 0, out_tlp_data_next; -reg [TLP_STRB_WIDTH-1:0] out_tlp_strb_reg = 0, out_tlp_strb_next; -reg [TLP_SEG_COUNT-1:0] out_tlp_eop_reg = 0, out_tlp_eop_next; - -reg [127:0] tlp_header_data_rd; -reg [AXIS_PCIE_RQ_USER_WIDTH-1:0] tlp_tuser_rd; -reg [127:0] tlp_header_data_wr; -reg [AXIS_PCIE_RQ_USER_WIDTH-1:0] tlp_tuser_wr; - -reg [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_rq_tdata_int = 0; -reg [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_rq_tkeep_int = 0; -reg m_axis_rq_tvalid_int = 0; -wire m_axis_rq_tready_int; -reg m_axis_rq_tlast_int = 0; -reg [AXIS_PCIE_RQ_USER_WIDTH-1:0] m_axis_rq_tuser_int = 0; - -always @* begin - tlp_output_state_next = TLP_OUTPUT_STATE_IDLE; - - out_tlp_data_next = out_tlp_data_reg; - out_tlp_strb_next = out_tlp_strb_reg; - out_tlp_eop_next = out_tlp_eop_reg; - - tx_rd_req_tlp_ready_cmb = 1'b0; - tx_wr_req_tlp_ready_cmb = 1'b0; - - // TLP header and sideband data - tlp_header_data_rd[1:0] = tx_rd_req_tlp_hdr[107:106]; // address type - tlp_header_data_rd[63:2] = tx_rd_req_tlp_hdr[63:2]; // address - tlp_header_data_rd[74:64] = (tx_rd_req_tlp_hdr[105:96] != 0) ? tx_rd_req_tlp_hdr[105:96] : 11'd1024; // DWORD count - if (tx_rd_req_tlp_hdr[124:120] == 5'h02) begin - tlp_header_data_rd[78:75] = REQ_IO_READ; // request type - IO read - end else begin - tlp_header_data_rd[78:75] = REQ_MEM_READ; // request type - memory read - end - tlp_header_data_rd[79] = tx_rd_req_tlp_hdr[110]; // poisoned request - tlp_header_data_rd[95:80] = tx_rd_req_tlp_hdr[95:80]; // requester ID - tlp_header_data_rd[103:96] = tx_rd_req_tlp_hdr[79:72]; // tag - tlp_header_data_rd[119:104] = 16'd0; // completer ID - tlp_header_data_rd[120] = 1'b0; // requester ID enable - tlp_header_data_rd[123:121] = tx_rd_req_tlp_hdr[118:116]; // traffic class - tlp_header_data_rd[126:124] = {tx_rd_req_tlp_hdr[114], tx_rd_req_tlp_hdr[109:108]}; // attr - tlp_header_data_rd[127] = 1'b0; // force ECRC - - if (AXIS_PCIE_DATA_WIDTH == 512) begin - tlp_tuser_rd[3:0] = tx_rd_req_tlp_hdr[67:64]; // first BE 0 - tlp_tuser_rd[7:4] = 4'd0; // first BE 1 - tlp_tuser_rd[11:8] = tx_rd_req_tlp_hdr[71:68]; // last BE 0 - tlp_tuser_rd[15:12] = 4'd0; // last BE 1 - tlp_tuser_rd[19:16] = 3'd0; // addr_offset - tlp_tuser_rd[21:20] = 2'b01; // is_sop - tlp_tuser_rd[23:22] = 2'd0; // is_sop0_ptr - tlp_tuser_rd[25:24] = 2'd0; // is_sop1_ptr - tlp_tuser_rd[27:26] = 2'b01; // is_eop - tlp_tuser_rd[31:28] = 4'd3; // is_eop0_ptr - tlp_tuser_rd[35:32] = 4'd0; // is_eop1_ptr - tlp_tuser_rd[36] = 1'b0; // discontinue - tlp_tuser_rd[38:37] = 2'b00; // tph_present - tlp_tuser_rd[42:39] = 4'b0000; // tph_type - tlp_tuser_rd[44:43] = 2'b00; // tph_indirect_tag_en - tlp_tuser_rd[60:45] = 16'd0; // tph_st_tag - tlp_tuser_rd[66:61] = tx_rd_req_tlp_seq_int; // seq_num0 - tlp_tuser_rd[72:67] = 6'd0; // seq_num1 - tlp_tuser_rd[136:73] = 64'd0; // parity - end else begin - tlp_tuser_rd[3:0] = tx_rd_req_tlp_hdr[67:64]; // first BE - tlp_tuser_rd[7:4] = tx_rd_req_tlp_hdr[71:68]; // last BE - tlp_tuser_rd[10:8] = 3'd0; // addr_offset - tlp_tuser_rd[11] = 1'b0; // discontinue - tlp_tuser_rd[12] = 1'b0; // tph_present - tlp_tuser_rd[14:13] = 2'b00; // tph_type - tlp_tuser_rd[15] = 1'b0; // tph_indirect_tag_en - tlp_tuser_rd[23:16] = 8'd0; // tph_st_tag - tlp_tuser_rd[27:24] = tx_rd_req_tlp_seq_int; // seq_num - tlp_tuser_rd[59:28] = 32'd0; // parity - if (AXIS_PCIE_RQ_USER_WIDTH == 62) begin - tlp_tuser_rd[61:60] = tx_rd_req_tlp_seq_int >> 4; // seq_num - end - end - - tlp_header_data_wr[1:0] = tx_wr_req_tlp_hdr[107:106]; // address type - tlp_header_data_wr[63:2] = tx_wr_req_tlp_hdr[63:2]; // address - tlp_header_data_wr[74:64] = (tx_wr_req_tlp_hdr[105:96] != 0) ? tx_wr_req_tlp_hdr[105:96] : 11'd1024; // DWORD count - if (tx_wr_req_tlp_hdr[124:120] == 5'h02) begin - tlp_header_data_wr[78:75] = REQ_IO_WRITE; // request type - IO write - end else begin - tlp_header_data_wr[78:75] = REQ_MEM_WRITE; // request type - memory write - end - tlp_header_data_wr[79] = tx_wr_req_tlp_hdr[110]; // poisoned request - tlp_header_data_wr[95:80] = tx_wr_req_tlp_hdr[95:80]; // requester ID - tlp_header_data_wr[103:96] = tx_wr_req_tlp_hdr[79:72]; // tag - tlp_header_data_wr[119:104] = 16'd0; // completer ID - tlp_header_data_wr[120] = 1'b0; // requester ID enable - tlp_header_data_wr[123:121] = tx_wr_req_tlp_hdr[118:116]; // traffic class - tlp_header_data_wr[126:124] = {tx_wr_req_tlp_hdr[114], tx_wr_req_tlp_hdr[109:108]}; // attr - tlp_header_data_wr[127] = 1'b0; // force ECRC - - if (AXIS_PCIE_DATA_WIDTH == 512) begin - tlp_tuser_wr[3:0] = tx_wr_req_tlp_hdr[67:64]; // first BE 0 - tlp_tuser_wr[7:4] = 4'd0; // first BE 1 - tlp_tuser_wr[11:8] = tx_wr_req_tlp_hdr[71:68]; // last BE 0 - tlp_tuser_wr[15:12] = 4'd0; // last BE 1 - tlp_tuser_wr[19:16] = 3'd0; // addr_offset - tlp_tuser_wr[21:20] = 2'b01; // is_sop - tlp_tuser_wr[23:22] = 2'd0; // is_sop0_ptr - tlp_tuser_wr[25:24] = 2'd0; // is_sop1_ptr - tlp_tuser_wr[27:26] = 2'b01; // is_eop - tlp_tuser_wr[31:28] = 4'd3; // is_eop0_ptr - tlp_tuser_wr[35:32] = 4'd0; // is_eop1_ptr - tlp_tuser_wr[36] = 1'b0; // discontinue - tlp_tuser_wr[38:37] = 2'b00; // tph_present - tlp_tuser_wr[42:39] = 4'b0000; // tph_type - tlp_tuser_wr[44:43] = 2'b00; // tph_indirect_tag_en - tlp_tuser_wr[60:45] = 16'd0; // tph_st_tag - tlp_tuser_wr[66:61] = tx_wr_req_tlp_seq_int; // seq_num0 - tlp_tuser_wr[72:67] = 6'd0; // seq_num1 - tlp_tuser_wr[136:73] = 64'd0; // parity - end else begin - tlp_tuser_wr[3:0] = tx_wr_req_tlp_hdr[67:64]; // first BE - tlp_tuser_wr[7:4] = tx_wr_req_tlp_hdr[71:68]; // last BE - tlp_tuser_wr[10:8] = 3'd0; // addr_offset - tlp_tuser_wr[11] = 1'b0; // discontinue - tlp_tuser_wr[12] = 1'b0; // tph_present - tlp_tuser_wr[14:13] = 2'b00; // tph_type - tlp_tuser_wr[15] = 1'b0; // tph_indirect_tag_en - tlp_tuser_wr[23:16] = 8'd0; // tph_st_tag - tlp_tuser_wr[27:24] = tx_wr_req_tlp_seq_int; // seq_num - tlp_tuser_wr[59:28] = 32'd0; // parity - if (AXIS_PCIE_RQ_USER_WIDTH == 62) begin - tlp_tuser_wr[61:60] = tx_wr_req_tlp_seq_int >> 4; // seq_num - end - end - - // TLP output - m_axis_rq_tdata_int = 0; - m_axis_rq_tkeep_int = 0; - m_axis_rq_tvalid_int = 1'b0; - m_axis_rq_tlast_int = 1'b0; - m_axis_rq_tuser_int = 0; - - // combine header and payload, merge in read request TLPs - case (tlp_output_state_reg) - TLP_OUTPUT_STATE_IDLE: begin - // idle state - - if (tx_rd_req_tlp_valid && m_axis_rq_tready_int) begin - if (AXIS_PCIE_DATA_WIDTH == 64) begin - // 64 bit interface, send first half of header (read request) - m_axis_rq_tdata_int = tlp_header_data_rd[63:0]; - m_axis_rq_tkeep_int = 2'b11; - m_axis_rq_tvalid_int = 1'b1; - m_axis_rq_tlast_int = 1'b0; - m_axis_rq_tuser_int = tlp_tuser_rd; - - tlp_output_state_next = TLP_OUTPUT_STATE_RD_HEADER; - end else begin - // wider interface, send complete header (read request) - m_axis_rq_tdata_int = tlp_header_data_rd; - m_axis_rq_tkeep_int = 4'b1111; - m_axis_rq_tvalid_int = 1'b1; - m_axis_rq_tlast_int = 1'b1; - m_axis_rq_tuser_int = tlp_tuser_rd; - - tx_rd_req_tlp_ready_cmb = 1'b1; - tlp_output_state_next = TLP_OUTPUT_STATE_IDLE; - end - end else if (tx_wr_req_tlp_valid && m_axis_rq_tready_int) begin - if (AXIS_PCIE_DATA_WIDTH == 64) begin - // 64 bit interface, send first half of header (write request) - m_axis_rq_tdata_int = tlp_header_data_wr[63:0]; - m_axis_rq_tkeep_int = 2'b11; - m_axis_rq_tvalid_int = 1'b1; - m_axis_rq_tlast_int = 1'b0; - m_axis_rq_tuser_int = tlp_tuser_wr; - - tlp_output_state_next = TLP_OUTPUT_STATE_WR_HEADER; - end else if (AXIS_PCIE_DATA_WIDTH == 128) begin - // 128 bit interface, send complete header (write request) - m_axis_rq_tdata_int = tlp_header_data_wr; - m_axis_rq_tkeep_int = 4'b1111; - m_axis_rq_tvalid_int = 1'b1; - m_axis_rq_tlast_int = 1'b0; - m_axis_rq_tuser_int = tlp_tuser_wr; - - tlp_output_state_next = TLP_OUTPUT_STATE_WR_PAYLOAD; - end else begin - // wider interface, send header and start of payload (write request) - m_axis_rq_tdata_int = {tx_wr_req_tlp_data, tlp_header_data_wr}; - m_axis_rq_tkeep_int = {tx_wr_req_tlp_strb, 4'b1111}; - m_axis_rq_tvalid_int = 1'b1; - m_axis_rq_tlast_int = 1'b0; - m_axis_rq_tuser_int = tlp_tuser_wr; - - tx_wr_req_tlp_ready_cmb = 1'b1; - - out_tlp_data_next = tx_wr_req_tlp_data; - out_tlp_strb_next = tx_wr_req_tlp_strb; - out_tlp_eop_next = tx_wr_req_tlp_eop; - - if (tx_wr_req_tlp_eop && ((tx_wr_req_tlp_strb >> (TLP_DATA_WIDTH_DWORDS-4)) == 0)) begin - m_axis_rq_tlast_int = 1'b1; - tlp_output_state_next = TLP_OUTPUT_STATE_IDLE; - end else begin - tlp_output_state_next = TLP_OUTPUT_STATE_WR_PAYLOAD; - end - end - end else begin - tlp_output_state_next = TLP_OUTPUT_STATE_IDLE; - end - end - TLP_OUTPUT_STATE_RD_HEADER: begin - // second cycle of header (read request) (64 bit interface width only) - if (AXIS_PCIE_DATA_WIDTH == 64) begin - m_axis_rq_tdata_int = tlp_header_data_rd[127:64]; - m_axis_rq_tkeep_int = 2'b11; - m_axis_rq_tlast_int = 1'b1; - m_axis_rq_tuser_int = tlp_tuser_rd; - - if (tx_rd_req_tlp_valid && m_axis_rq_tready_int) begin - m_axis_rq_tvalid_int = 1'b1; - - tx_rd_req_tlp_ready_cmb = 1'b1; - tlp_output_state_next = TLP_OUTPUT_STATE_IDLE; - end else begin - tlp_output_state_next = TLP_OUTPUT_STATE_RD_HEADER; - end - end - end - TLP_OUTPUT_STATE_WR_HEADER: begin - // second cycle of header (write request) (64 bit interface width only) - if (AXIS_PCIE_DATA_WIDTH == 64) begin - m_axis_rq_tdata_int = tlp_header_data_wr[127:64]; - m_axis_rq_tkeep_int = 2'b11; - m_axis_rq_tlast_int = 1'b0; - m_axis_rq_tuser_int = tlp_tuser_wr; - - if (tx_wr_req_tlp_valid && m_axis_rq_tready_int) begin - m_axis_rq_tvalid_int = 1'b1; - - tlp_output_state_next = TLP_OUTPUT_STATE_WR_PAYLOAD; - end else begin - tlp_output_state_next = TLP_OUTPUT_STATE_WR_HEADER; - end - end - end - TLP_OUTPUT_STATE_WR_PAYLOAD: begin - // transfer payload (write request) - if (AXIS_PCIE_DATA_WIDTH >= 256) begin - m_axis_rq_tdata_int = {tx_wr_req_tlp_data, out_tlp_data_reg[TLP_DATA_WIDTH-1:TLP_DATA_WIDTH-128]}; - if (tx_wr_req_tlp_valid && !out_tlp_eop_reg) begin - m_axis_rq_tkeep_int = {tx_wr_req_tlp_strb, out_tlp_strb_reg[TLP_STRB_WIDTH-1:TLP_DATA_WIDTH_DWORDS-4]}; - end else begin - m_axis_rq_tkeep_int = out_tlp_strb_reg[TLP_STRB_WIDTH-1:TLP_DATA_WIDTH_DWORDS-4]; - end - m_axis_rq_tlast_int = 1'b0; - m_axis_rq_tuser_int = tlp_tuser_wr; - - if ((tx_wr_req_tlp_valid || out_tlp_eop_reg) && m_axis_rq_tready_int) begin - m_axis_rq_tvalid_int = 1'b1; - tx_wr_req_tlp_ready_cmb = !out_tlp_eop_reg; - - out_tlp_data_next = tx_wr_req_tlp_data; - out_tlp_strb_next = tx_wr_req_tlp_strb; - out_tlp_eop_next = tx_wr_req_tlp_eop; - - if (out_tlp_eop_reg || (tx_wr_req_tlp_eop && ((tx_wr_req_tlp_strb >> (TLP_DATA_WIDTH_DWORDS-4)) == 0))) begin - m_axis_rq_tlast_int = 1'b1; - tlp_output_state_next = TLP_OUTPUT_STATE_IDLE; - end else begin - tlp_output_state_next = TLP_OUTPUT_STATE_WR_PAYLOAD; - end - end else begin - tlp_output_state_next = TLP_OUTPUT_STATE_WR_PAYLOAD; - end - end else begin - m_axis_rq_tdata_int = tx_wr_req_tlp_data; - m_axis_rq_tkeep_int = tx_wr_req_tlp_strb; - m_axis_rq_tlast_int = 1'b0; - m_axis_rq_tuser_int = tlp_tuser_wr; - - if (tx_wr_req_tlp_valid && m_axis_rq_tready_int) begin - m_axis_rq_tvalid_int = 1'b1; - tx_wr_req_tlp_ready_cmb = 1'b1; - - if (tx_wr_req_tlp_eop) begin - m_axis_rq_tlast_int = 1'b1; - tlp_output_state_next = TLP_OUTPUT_STATE_IDLE; - end else begin - tlp_output_state_next = TLP_OUTPUT_STATE_WR_PAYLOAD; - end - end else begin - tlp_output_state_next = TLP_OUTPUT_STATE_WR_PAYLOAD; - end - end - end - endcase -end - -always @(posedge clk) begin - tlp_output_state_reg <= tlp_output_state_next; - - out_tlp_data_reg <= out_tlp_data_next; - out_tlp_strb_reg <= out_tlp_strb_next; - out_tlp_eop_reg <= out_tlp_eop_next; - - if (rst) begin - tlp_output_state_reg <= TLP_OUTPUT_STATE_IDLE; - end -end - -// output datapath logic (PCIe TLP) -reg [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_rq_tdata_reg = {AXIS_PCIE_DATA_WIDTH{1'b0}}; -reg [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_rq_tkeep_reg = {AXIS_PCIE_KEEP_WIDTH{1'b0}}; -reg m_axis_rq_tvalid_reg = 1'b0, m_axis_rq_tvalid_next; -reg m_axis_rq_tlast_reg = 1'b0; -reg [AXIS_PCIE_RQ_USER_WIDTH-1:0] m_axis_rq_tuser_reg = {AXIS_PCIE_RQ_USER_WIDTH{1'b0}}; - -reg [OUTPUT_FIFO_ADDR_WIDTH+1-1:0] out_fifo_wr_ptr_reg = 0; -reg [OUTPUT_FIFO_ADDR_WIDTH+1-1:0] out_fifo_rd_ptr_reg = 0; -reg out_fifo_half_full_reg = 1'b0; - -wire out_fifo_full = out_fifo_wr_ptr_reg == (out_fifo_rd_ptr_reg ^ {1'b1, {OUTPUT_FIFO_ADDR_WIDTH{1'b0}}}); -wire out_fifo_empty = out_fifo_wr_ptr_reg == out_fifo_rd_ptr_reg; - -(* ram_style = "distributed" *) -reg [AXIS_PCIE_DATA_WIDTH-1:0] out_fifo_tdata[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; -(* ram_style = "distributed" *) -reg [AXIS_PCIE_KEEP_WIDTH-1:0] out_fifo_tkeep[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; -(* ram_style = "distributed" *) -reg out_fifo_tlast[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; -(* ram_style = "distributed" *) -reg [AXIS_PCIE_RQ_USER_WIDTH-1:0] out_fifo_tuser[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; - -assign m_axis_rq_tready_int = !out_fifo_half_full_reg; - assign m_axis_rq_tdata = m_axis_rq_tdata_reg; assign m_axis_rq_tkeep = m_axis_rq_tkeep_reg; assign m_axis_rq_tvalid = m_axis_rq_tvalid_reg; assign m_axis_rq_tlast = m_axis_rq_tlast_reg; assign m_axis_rq_tuser = m_axis_rq_tuser_reg; +wire [TLP_DATA_WIDTH-1:0] fifo_tlp_data[0:PORTS-1]; +wire [TLP_STRB_WIDTH-1:0] fifo_tlp_strb[0:PORTS-1]; +wire [INT_TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] fifo_tlp_hdr[0:PORTS-1]; +wire [INT_TLP_SEG_COUNT*TX_SEQ_NUM_WIDTH-1:0] fifo_tlp_seq[0:PORTS-1]; +wire [INT_TLP_SEG_COUNT-1:0] fifo_tlp_valid[0:PORTS-1]; +wire [INT_TLP_SEG_COUNT-1:0] fifo_tlp_sop[0:PORTS-1]; +wire [INT_TLP_SEG_COUNT-1:0] fifo_tlp_eop[0:PORTS-1]; +wire [SEG_SEL_WIDTH-1:0] fifo_seg_offset[0:PORTS-1]; +wire [SEG_SEL_WIDTH+1-1:0] fifo_seg_count[0:PORTS-1]; +reg [PORTS-1:0] fifo_read_en; +reg [SEG_SEL_WIDTH+1-1:0] fifo_read_seg_count[0:PORTS-1]; + +reg [INT_TLP_SEG_COUNT-1:0] fifo_tlp_extra[0:PORTS-1]; + +// read requests +pcie_tlp_fifo_raw #( + .DEPTH((1024/4)*2), + .TLP_DATA_WIDTH(TLP_DATA_WIDTH), + .TLP_STRB_WIDTH(TLP_STRB_WIDTH), + .TLP_HDR_WIDTH(TLP_HDR_WIDTH), + .SEQ_NUM_WIDTH(TX_SEQ_NUM_WIDTH), + .IN_TLP_SEG_COUNT(TLP_SEG_COUNT), + .OUT_TLP_SEG_COUNT(INT_TLP_SEG_COUNT), + .CTRL_OUT_EN(0) +) +rd_req_fifo_inst ( + .clk(clk), + .rst(rst), + + /* + * TLP input + */ + .in_tlp_data(0), + .in_tlp_strb(0), + .in_tlp_hdr(tx_rd_req_tlp_hdr), + .in_tlp_seq(tx_rd_req_tlp_seq), + .in_tlp_bar_id(0), + .in_tlp_func_num(0), + .in_tlp_error(0), + .in_tlp_valid(tx_rd_req_tlp_valid), + .in_tlp_sop(1'b1), + .in_tlp_eop(1'b1), + .in_tlp_ready(tx_rd_req_tlp_ready), + + /* + * TLP output + */ + .out_tlp_data(), + .out_tlp_strb(), + .out_tlp_hdr(fifo_tlp_hdr[0]), + .out_tlp_seq(fifo_tlp_seq[0]), + .out_tlp_bar_id(), + .out_tlp_func_num(), + .out_tlp_error(), + .out_tlp_valid(fifo_tlp_valid[0]), + .out_tlp_sop(fifo_tlp_sop[0]), + .out_tlp_eop(fifo_tlp_eop[0]), + .out_seg_offset(fifo_seg_offset[0]), + .out_seg_count(fifo_seg_count[0]), + .out_read_en(fifo_read_en[0]), + .out_read_seg_count(fifo_read_seg_count[0]), + + .out_ctrl_tlp_strb(), + .out_ctrl_tlp_hdr(), + .out_ctrl_tlp_valid(), + .out_ctrl_tlp_sop(), + .out_ctrl_tlp_eop(), + .out_ctrl_seg_offset(), + .out_ctrl_seg_count(), + .out_ctrl_read_en(0), + .out_ctrl_read_seg_count(0), + + /* + * Status + */ + .half_full(), + .watermark() +); + +assign fifo_tlp_data[0] = 0; +assign fifo_tlp_strb[0] = 0; + +// write requests +pcie_tlp_fifo_raw #( + .DEPTH((1024/4)*2), + .TLP_DATA_WIDTH(TLP_DATA_WIDTH), + .TLP_STRB_WIDTH(TLP_STRB_WIDTH), + .TLP_HDR_WIDTH(TLP_HDR_WIDTH), + .SEQ_NUM_WIDTH(TX_SEQ_NUM_WIDTH), + .IN_TLP_SEG_COUNT(TLP_SEG_COUNT), + .OUT_TLP_SEG_COUNT(INT_TLP_SEG_COUNT), + .CTRL_OUT_EN(0) +) +wr_req_fifo_inst ( + .clk(clk), + .rst(rst), + + /* + * TLP input + */ + .in_tlp_data(tx_wr_req_tlp_data), + .in_tlp_strb(tx_wr_req_tlp_strb), + .in_tlp_hdr(tx_wr_req_tlp_hdr), + .in_tlp_seq(tx_wr_req_tlp_seq), + .in_tlp_bar_id(0), + .in_tlp_func_num(0), + .in_tlp_error(0), + .in_tlp_valid(tx_wr_req_tlp_valid), + .in_tlp_sop(tx_wr_req_tlp_sop), + .in_tlp_eop(tx_wr_req_tlp_eop), + .in_tlp_ready(tx_wr_req_tlp_ready), + + /* + * TLP output + */ + .out_tlp_data(fifo_tlp_data[1]), + .out_tlp_strb(fifo_tlp_strb[1]), + .out_tlp_hdr(fifo_tlp_hdr[1]), + .out_tlp_seq(fifo_tlp_seq[1]), + .out_tlp_bar_id(), + .out_tlp_func_num(), + .out_tlp_error(), + .out_tlp_valid(fifo_tlp_valid[1]), + .out_tlp_sop(fifo_tlp_sop[1]), + .out_tlp_eop(fifo_tlp_eop[1]), + .out_seg_offset(fifo_seg_offset[1]), + .out_seg_count(fifo_seg_count[1]), + .out_read_en(fifo_read_en[1]), + .out_read_seg_count(fifo_read_seg_count[1]), + + .out_ctrl_tlp_strb(), + .out_ctrl_tlp_hdr(), + .out_ctrl_tlp_valid(), + .out_ctrl_tlp_sop(), + .out_ctrl_tlp_eop(), + .out_ctrl_seg_offset(), + .out_ctrl_seg_count(), + .out_ctrl_read_en(0), + .out_ctrl_read_seg_count(0), + + /* + * Status + */ + .half_full(), + .watermark() +); + +integer port, cur_port, seg, cur_seg, lane; + +always @* begin + frame_next = frame_reg; + tlp_hdr1_next = tlp_hdr1_reg; + tlp_hdr2_next = tlp_hdr2_reg; + tlp_split1_next = tlp_split1_reg; + tlp_split2_next = tlp_split2_reg; + port_next = port_reg; + + m_axis_rq_tdata_next = m_axis_rq_tdata_reg; + m_axis_rq_tkeep_next = m_axis_rq_tkeep_reg; + m_axis_rq_tvalid_next = m_axis_rq_tvalid_reg && !m_axis_rq_tready; + m_axis_rq_tlast_next = m_axis_rq_tlast_reg; + m_axis_rq_tuser_next = m_axis_rq_tuser_reg; + + fifo_read_en = 0; + + frame_cyc = frame_reg; + tlp_hdr1_cyc = tlp_hdr1_reg; + tlp_hdr2_cyc = tlp_hdr2_reg; + tlp_split1_cyc = tlp_split1_reg; + tlp_split2_cyc = tlp_split2_reg; + port_cyc = port_reg; + seg_offset_cyc = fifo_seg_offset[port_reg]; + seg_count_cyc = 0; + valid = 0; + eop = 0; + frame = frame_cyc; + abort = 0; + + eop_index = 0; + + for (port = 0; port < PORTS; port = port + 1) begin + port_seg_offset_cyc[port] = fifo_seg_offset[port]; + port_seg_count_cyc[port] = 0; + fifo_read_seg_count[port] = 0; + end + + out_sel = 0; + out_sel_cyc = 0; + out_sop = 0; + out_eop = 0; + out_tlp_hdr1 = 0; + out_tlp_hdr2 = 0; + out_tlp_split1 = 0; + out_tlp_split2 = 0; + for (seg = 0; seg < INT_TLP_SEG_COUNT; seg = seg + 1) begin + out_sel_port[seg] = 0; + out_sel_seg[seg] = 0; + end + + out_shift_tlp_data_next = out_shift_tlp_data_reg; + out_shift_tlp_strb_next = out_shift_tlp_strb_reg; + + // pre-compute + for (port = 0; port < PORTS; port = port + 1) begin + for (seg = 0; seg < INT_TLP_SEG_COUNT; seg = seg + 1) begin + fifo_tlp_extra[port][seg] = fifo_tlp_eop[port][seg] && fifo_tlp_strb[port][seg*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH] >> (INT_TLP_SEG_STRB_WIDTH-4); + end + end + + // compute mux settings + for (port = 0; port < PORTS; port = port + 1) begin + port_seg_valid[port] = {2{fifo_tlp_valid[port]}} >> fifo_seg_offset[port]; + port_seg_sop[port] = {2{fifo_tlp_sop[port]}} >> fifo_seg_offset[port]; + port_seg_eop[port] = {2{fifo_tlp_eop[port]}} >> fifo_seg_offset[port]; + port_seg_extra[port] = {2{fifo_tlp_extra[port]}} >> fifo_seg_offset[port]; + end + + for (seg = 0; seg < INT_TLP_SEG_COUNT; seg = seg + 1) begin + // select port + if (!frame_cyc && !abort) begin + // priority arb - start from high priority end + cur_port = 0; + tlp_hdr1_cyc = 1'b1; + tlp_hdr2_cyc = 1'b0; + tlp_split1_cyc = 1'b0; + tlp_split2_cyc = 1'b0; + for (port = 0; port < PORTS; port = port + 1) begin + if (port_seg_valid[cur_port][0] && !frame_cyc) begin + // select port, set frame + frame_cyc = 1'b1; + port_cyc = cur_port; + seg_offset_cyc = port_seg_offset_cyc[cur_port]; + seg_count_cyc = port_seg_count_cyc[cur_port]; + end + // next port + if (cur_port < PORTS-1) begin + cur_port = cur_port + 1; + end else begin + cur_port = 0; + end + end + end + + // route segment + valid = port_seg_valid[port_cyc][0]; + sop = port_seg_sop[port_cyc][0]; + eop = port_seg_eop[port_cyc][0]; + frame = frame_cyc; + + out_sel_cyc[seg] = 1'b1; + out_sop[seg] = tlp_hdr1_cyc; + out_sel_port[seg] = port_cyc; + out_sel_seg[seg] = seg_offset_cyc; + + out_tlp_hdr1[seg] = tlp_hdr1_cyc; + out_tlp_hdr2[seg] = tlp_hdr2_cyc; + + if (AXIS_PCIE_DATA_WIDTH == 64 && tlp_hdr1_cyc) begin + // output header (first cycle) + tlp_hdr1_cyc = 1'b0; + tlp_hdr2_cyc = 1'b1; + end else if ((AXIS_PCIE_DATA_WIDTH == 128 && tlp_hdr1_cyc) || (AXIS_PCIE_DATA_WIDTH == 64 && tlp_hdr2_cyc)) begin + // output header (last cycle) + tlp_hdr1_cyc = 1'b0; + tlp_hdr2_cyc = 1'b0; + if (eop && fifo_tlp_strb[port_cyc][seg_offset_cyc*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH] == 0) begin + // no payload + frame_cyc = 0; + out_eop[seg] = 1'b1; + seg_offset_cyc = seg_offset_cyc + 1; + seg_count_cyc = seg_count_cyc + 1; + port_seg_valid[port_cyc] = port_seg_valid[port_cyc] >> 1; + port_seg_sop[port_cyc] = port_seg_sop[port_cyc] >> 1; + port_seg_eop[port_cyc] = port_seg_eop[port_cyc] >> 1; + port_seg_extra[port_cyc] = port_seg_extra[port_cyc] >> 1; + end + end else if (AXIS_PCIE_DATA_WIDTH > 128 && port_seg_extra[port_cyc][0]) begin + tlp_hdr1_cyc = 1'b0; + tlp_hdr2_cyc = 1'b0; + // extra cycle + if (tlp_split1_cyc) begin + frame_cyc = 0; + out_eop[seg] = 1'b1; + tlp_split1_cyc = 1'b0; + tlp_split2_cyc = 1'b1; + seg_offset_cyc = seg_offset_cyc + 1; + seg_count_cyc = seg_count_cyc + 1; + port_seg_valid[port_cyc] = port_seg_valid[port_cyc] >> 1; + port_seg_sop[port_cyc] = port_seg_sop[port_cyc] >> 1; + port_seg_eop[port_cyc] = port_seg_eop[port_cyc] >> 1; + port_seg_extra[port_cyc] = port_seg_extra[port_cyc] >> 1; + end else begin + tlp_split1_cyc = 1'b1; + end + end else begin + tlp_hdr1_cyc = 1'b0; + tlp_hdr2_cyc = 1'b0; + if (eop) begin + // end of packet + frame_cyc = 0; + out_eop[seg] = 1'b1; + end + seg_offset_cyc = seg_offset_cyc + 1; + seg_count_cyc = seg_count_cyc + 1; + port_seg_valid[port_cyc] = port_seg_valid[port_cyc] >> 1; + port_seg_sop[port_cyc] = port_seg_sop[port_cyc] >> 1; + port_seg_eop[port_cyc] = port_seg_eop[port_cyc] >> 1; + port_seg_extra[port_cyc] = port_seg_extra[port_cyc] >> 1; + end + tlp_hdr1_cyc = 1'b0; + + out_tlp_split1[seg] = tlp_split1_cyc; + out_tlp_split2[seg] = tlp_split2_cyc; + + if (frame && !abort) begin + if (valid) begin + if (eop || seg == INT_TLP_SEG_COUNT-1) begin + // end of packet or end of cycle, commit + port_seg_offset_cyc[port_cyc] = seg_offset_cyc; + port_seg_count_cyc[port_cyc] = seg_count_cyc; + fifo_read_seg_count[port_cyc] = seg_count_cyc; + if (!m_axis_rq_tvalid || m_axis_rq_tready) begin + frame_next = frame_cyc; + tlp_hdr1_next = tlp_hdr1_cyc; + tlp_hdr2_next = tlp_hdr2_cyc; + tlp_split1_next = tlp_split1_cyc; + tlp_split2_next = tlp_split2_cyc; + out_sel = out_sel_cyc; + port_next = port_cyc; + fifo_read_en[port_cyc] = seg_count_cyc != 0; + end + end + end else begin + // input has stalled, wait + abort = 1; + end + end + end + + // remap + out_tlp_data = 0; + out_tlp_strb = 0; + out_tlp_be = 0; + out_tlp_seq = 0; + out_tlp_valid = 0; + out_tlp_sop = 0; + out_tlp_eop = 0; + + for (seg = 0; seg < INT_TLP_SEG_COUNT; seg = seg + 1) begin + // remap header + seg_tlp_hdr = fifo_tlp_hdr[out_sel_port[seg]][out_sel_seg[seg]*TLP_HDR_WIDTH +: TLP_HDR_WIDTH]; + seg_rc_hdr[1:0] = seg_tlp_hdr[107:106]; // address type + seg_rc_hdr[63:2] = seg_tlp_hdr[63:2]; // address + seg_rc_hdr[74:64] = seg_tlp_hdr[105:96]; // DWORD count + casez (seg_tlp_hdr[127:120]) + 8'b00z_00000: seg_rc_hdr[78:75] = REQ_MEM_READ; + 8'b00z_00001: seg_rc_hdr[78:75] = REQ_MEM_READ_LOCKED; + 8'b01z_00000: seg_rc_hdr[78:75] = REQ_MEM_WRITE; + 8'b00z_00010: seg_rc_hdr[78:75] = REQ_IO_READ; + 8'b01z_00010: seg_rc_hdr[78:75] = REQ_IO_WRITE; + 8'b000_00100: seg_rc_hdr[78:75] = REQ_CFG_READ_0; + 8'b010_00100: seg_rc_hdr[78:75] = REQ_CFG_WRITE_0; + 8'b000_00101: seg_rc_hdr[78:75] = REQ_CFG_READ_1; + 8'b010_00101: seg_rc_hdr[78:75] = REQ_CFG_WRITE_1; + 8'b01z_01100: seg_rc_hdr[78:75] = REQ_MEM_FETCH_ADD; + 8'b01z_01101: seg_rc_hdr[78:75] = REQ_MEM_SWAP; + 8'b01z_01110: seg_rc_hdr[78:75] = REQ_MEM_CAS; + default: seg_rc_hdr[78:75] = REQ_MEM_WRITE; + endcase + seg_rc_hdr[79] = seg_tlp_hdr[110]; // poisoned request + seg_rc_hdr[95:80] = seg_tlp_hdr[95:80]; // requester ID + seg_rc_hdr[103:96] = seg_tlp_hdr[79:72]; // tag + seg_rc_hdr[119:104] = 16'd0; // completer ID + seg_rc_hdr[120] = 1'b0; // requester ID enable + seg_rc_hdr[123:121] = seg_tlp_hdr[118:116]; // traffic class + seg_rc_hdr[126:124] = {seg_tlp_hdr[114], seg_tlp_hdr[109:108]}; // attr + seg_rc_hdr[127] = 1'b0; // force ECRC + + // mux for output segments + out_tlp_be[seg*8+0 +: 4] = seg_tlp_hdr[67:64]; // first BE + out_tlp_be[seg*8+4 +: 4] = seg_tlp_hdr[71:68]; // last BE + + if (AXIS_PCIE_DATA_WIDTH <= 128) begin + out_tlp_data[seg*INT_TLP_SEG_DATA_WIDTH +: INT_TLP_SEG_DATA_WIDTH] = fifo_tlp_data[out_sel_port[seg]][out_sel_seg[seg]*INT_TLP_SEG_DATA_WIDTH +: INT_TLP_SEG_DATA_WIDTH]; + out_tlp_strb[seg*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH] = fifo_tlp_strb[out_sel_port[seg]][out_sel_seg[seg]*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH]; + + if (AXIS_PCIE_DATA_WIDTH == 64) begin + if (out_tlp_hdr1[seg]) begin + out_tlp_data[seg*INT_TLP_SEG_DATA_WIDTH +: INT_TLP_SEG_DATA_WIDTH] = seg_rc_hdr[63:0]; + out_tlp_strb[seg*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH] = 2'b11; + end else if (out_tlp_hdr2[seg]) begin + out_tlp_data[seg*INT_TLP_SEG_DATA_WIDTH +: INT_TLP_SEG_DATA_WIDTH] = seg_rc_hdr[127:64]; + out_tlp_strb[seg*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH] = 2'b11; + end + end else begin + if (out_tlp_hdr1[seg]) begin + out_tlp_data[seg*INT_TLP_SEG_DATA_WIDTH +: INT_TLP_SEG_DATA_WIDTH] = seg_rc_hdr; + out_tlp_strb[seg*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH] = 4'b1111; + end + end + + out_tlp_valid[seg] = out_sel[seg]; + out_tlp_sop[seg] = out_sop[seg]; + out_tlp_eop[seg] = out_eop[seg]; + + out_tlp_seq[seg*RQ_SEQ_NUM_WIDTH +: RQ_SEQ_NUM_WIDTH] = fifo_tlp_seq[out_sel_port[seg]][out_sel_seg[seg]*TX_SEQ_NUM_WIDTH +: TX_SEQ_NUM_WIDTH] | (out_sel_port[seg] ? 0 : SEQ_NUM_FLAG); + end else begin + out_tlp_data[seg*INT_TLP_SEG_DATA_WIDTH +: INT_TLP_SEG_DATA_WIDTH] = out_shift_tlp_data_next; + out_tlp_strb[seg*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH] = out_shift_tlp_strb_next; + out_tlp_data[seg*INT_TLP_SEG_DATA_WIDTH+128 +: INT_TLP_SEG_DATA_WIDTH-128] = fifo_tlp_data[out_sel_port[seg]][out_sel_seg[seg]*INT_TLP_SEG_DATA_WIDTH +: INT_TLP_SEG_DATA_WIDTH-128]; + if (!out_tlp_split2[seg]) begin + out_tlp_strb[seg*INT_TLP_SEG_STRB_WIDTH+4 +: INT_TLP_SEG_STRB_WIDTH-4] = fifo_tlp_strb[out_sel_port[seg]][out_sel_seg[seg]*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH-4]; + end + + if (out_tlp_hdr1[seg]) begin + out_tlp_data[seg*INT_TLP_SEG_DATA_WIDTH +: 128] = seg_rc_hdr; + out_tlp_strb[seg*INT_TLP_SEG_STRB_WIDTH +: 4] = 4'b1111; + end + + out_tlp_valid[seg] = out_sel[seg]; + out_tlp_sop[seg] = out_sop[seg]; + out_tlp_eop[seg] = out_eop[seg]; + + out_tlp_seq[seg*RQ_SEQ_NUM_WIDTH +: RQ_SEQ_NUM_WIDTH] = fifo_tlp_seq[out_sel_port[seg]][out_sel_seg[seg]*TX_SEQ_NUM_WIDTH +: TX_SEQ_NUM_WIDTH] | (out_sel_port[seg] ? 0 : SEQ_NUM_FLAG); + + if (out_sel[seg]) begin + out_shift_tlp_data_next = fifo_tlp_data[out_sel_port[seg]][(out_sel_seg[seg]+1)*INT_TLP_SEG_DATA_WIDTH-128 +: 128]; + out_shift_tlp_strb_next = fifo_tlp_strb[out_sel_port[seg]][(out_sel_seg[seg]+1)*INT_TLP_SEG_STRB_WIDTH-4 +: 4]; + end + end + + end + + if (!m_axis_rq_tvalid || m_axis_rq_tready) begin + // remap header and sideband + m_axis_rq_tdata_next = out_tlp_data; + m_axis_rq_tkeep_next = 0; + m_axis_rq_tvalid_next = out_tlp_valid != 0; + m_axis_rq_tlast_next = !(RQ_STRADDLE && AXIS_PCIE_DATA_WIDTH == 512) && (out_tlp_valid & out_tlp_eop); + m_axis_rq_tuser_next = 0; + + for (seg = 0; seg < INT_TLP_SEG_COUNT; seg = seg + 1) begin + if (out_tlp_valid[seg]) begin + m_axis_rq_tkeep_next[seg*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH] = out_tlp_strb[seg*INT_TLP_SEG_STRB_WIDTH +: INT_TLP_SEG_STRB_WIDTH]; + end + + eop_index[seg*3 +: 3] = 0; + for (lane = 0; lane < INT_TLP_SEG_STRB_WIDTH; lane = lane + 1) begin + if (out_tlp_strb[seg*INT_TLP_SEG_STRB_WIDTH+lane]) begin + eop_index[seg*3 +: 3] = lane; + end + end + end + + if (AXIS_PCIE_DATA_WIDTH == 512) begin + if (INT_TLP_SEG_COUNT == 1) begin + m_axis_rq_tuser_next[3:0] = out_tlp_be[0*8+0 +: 4]; // first BE 0 + m_axis_rq_tuser_next[11:8] = out_tlp_be[0*8+4 +: 4]; // last BE 0 + m_axis_rq_tuser_next[7:4] = 0; // first BE 1 + m_axis_rq_tuser_next[15:12] = 0; // last BE 1 + end else begin + case (out_tlp_valid & out_tlp_sop) + 2'b00: begin + m_axis_rq_tuser_next[3:0] = out_tlp_be[0*8+0 +: 4]; // first BE 0 + m_axis_rq_tuser_next[11:8] = out_tlp_be[0*8+4 +: 4]; // last BE 0 + end + 2'b01: begin + m_axis_rq_tuser_next[3:0] = out_tlp_be[0*8+0 +: 4]; // first BE 0 + m_axis_rq_tuser_next[11:8] = out_tlp_be[0*8+4 +: 4]; // last BE 0 + end + 2'b10: begin + m_axis_rq_tuser_next[3:0] = out_tlp_be[1*8+0 +: 4]; // first BE 0 + m_axis_rq_tuser_next[11:8] = out_tlp_be[1*8+4 +: 4]; // last BE 0 + end + 2'b11: begin + m_axis_rq_tuser_next[3:0] = out_tlp_be[0*8+0 +: 4]; // first BE 0 + m_axis_rq_tuser_next[11:8] = out_tlp_be[0*8+4 +: 4]; // last BE 0 + end + endcase + m_axis_rq_tuser_next[7:4] = out_tlp_be[1*8+0 +: 4]; // first BE 1 + m_axis_rq_tuser_next[15:12] = out_tlp_be[1*8+4 +: 4]; // last BE 1 + end + m_axis_rq_tuser_next[19:16] = 3'd0; // addr_offset + if (INT_TLP_SEG_COUNT > 1) begin + case (out_tlp_valid & out_tlp_sop) + 2'b00: begin + m_axis_rq_tuser_next[21:20] = 2'b00; // is_sop + m_axis_rq_tuser_next[23:22] = 2'd0; // is_sop0_ptr + end + 2'b01: begin + m_axis_rq_tuser_next[21:20] = 2'b01; // is_sop + m_axis_rq_tuser_next[23:22] = 2'd0; // is_sop0_ptr + end + 2'b10: begin + m_axis_rq_tuser_next[21:20] = 2'b01; // is_sop + m_axis_rq_tuser_next[23:22] = 2'd2; // is_sop0_ptr + end + 2'b11: begin + m_axis_rq_tuser_next[21:20] = 2'b11; // is_sop + m_axis_rq_tuser_next[23:22] = 2'd0; // is_sop0_ptr + end + endcase + m_axis_rq_tuser_next[25:24] = 2'd2; // is_sop1_ptr + case (out_tlp_valid & out_tlp_eop) + 2'b00: begin + m_axis_rq_tuser_next[27:26] = 2'b00; // is_eop + m_axis_rq_tuser_next[31:28] = eop_index[0*3 +: 3]; // is_eop0_ptr + end + 2'b01: begin + m_axis_rq_tuser_next[27:26] = 2'b01; // is_eop + m_axis_rq_tuser_next[31:28] = eop_index[0*3 +: 3]; // is_eop0_ptr + end + 2'b10: begin + m_axis_rq_tuser_next[27:26] = 2'b01; // is_eop + m_axis_rq_tuser_next[31:28] = 4'd8+eop_index[1*3 +: 3]; // is_eop0_ptr + end + 2'b11: begin + m_axis_rq_tuser_next[27:26] = 2'b11; // is_eop + m_axis_rq_tuser_next[31:28] = eop_index[0*3 +: 3]; // is_eop0_ptr + end + endcase + m_axis_rq_tuser_next[35:32] = 4'd8+eop_index[1*3 +: 3]; // is_eop1_ptr + end + m_axis_rq_tuser_next[36] = 1'b0; // discontinue + m_axis_rq_tuser_next[38:37] = 2'b00; // tph_present + m_axis_rq_tuser_next[42:39] = 4'b0000; // tph_type + m_axis_rq_tuser_next[44:43] = 2'b00; // tph_indirect_tag_en + m_axis_rq_tuser_next[60:45] = 16'd0; // tph_st_tag + if (INT_TLP_SEG_COUNT == 1) begin + m_axis_rq_tuser_next[66:61] = out_tlp_seq[0*RQ_SEQ_NUM_WIDTH +: RQ_SEQ_NUM_WIDTH]; // seq_num0 + m_axis_rq_tuser_next[72:67] = 0; // seq_num1 + end else begin + case (out_tlp_valid & out_tlp_sop) + 2'b00: begin + m_axis_rq_tuser_next[66:61] = out_tlp_seq[0*RQ_SEQ_NUM_WIDTH +: RQ_SEQ_NUM_WIDTH]; // seq_num0 + end + 2'b01: begin + m_axis_rq_tuser_next[66:61] = out_tlp_seq[0*RQ_SEQ_NUM_WIDTH +: RQ_SEQ_NUM_WIDTH]; // seq_num0 + end + 2'b10: begin + m_axis_rq_tuser_next[66:61] = out_tlp_seq[1*RQ_SEQ_NUM_WIDTH +: RQ_SEQ_NUM_WIDTH]; // seq_num0 + end + 2'b11: begin + m_axis_rq_tuser_next[66:61] = out_tlp_seq[0*RQ_SEQ_NUM_WIDTH +: RQ_SEQ_NUM_WIDTH]; // seq_num0 + end + endcase + m_axis_rq_tuser_next[72:67] = out_tlp_seq[1*RQ_SEQ_NUM_WIDTH +: RQ_SEQ_NUM_WIDTH]; // seq_num1 + end + m_axis_rq_tuser_next[136:73] = 64'd0; // parity + end else begin + m_axis_rq_tuser_next[3:0] = out_tlp_be[0*8+0 +: 4]; // first BE + m_axis_rq_tuser_next[7:4] = out_tlp_be[0*8+4 +: 4]; // last BE + m_axis_rq_tuser_next[10:8] = 3'd0; // addr_offset + m_axis_rq_tuser_next[11] = 1'b0; // discontinue + m_axis_rq_tuser_next[12] = 1'b0; // tph_present + m_axis_rq_tuser_next[14:13] = 2'b00; // tph_type + m_axis_rq_tuser_next[15] = 1'b0; // tph_indirect_tag_en + m_axis_rq_tuser_next[23:16] = 8'd0; // tph_st_tag + m_axis_rq_tuser_next[27:24] = out_tlp_seq[0*RQ_SEQ_NUM_WIDTH +: RQ_SEQ_NUM_WIDTH]; // seq_num + m_axis_rq_tuser_next[59:28] = 32'd0; // parity + if (AXIS_PCIE_RQ_USER_WIDTH == 62) begin + m_axis_rq_tuser_next[61:60] = out_tlp_seq[0*RQ_SEQ_NUM_WIDTH +: RQ_SEQ_NUM_WIDTH] >> 4; // seq_num + end + end + end +end + +integer i; + always @(posedge clk) begin - m_axis_rq_tvalid_reg <= m_axis_rq_tvalid_reg && !m_axis_rq_tready; + frame_reg <= frame_next; + tlp_hdr1_reg <= tlp_hdr1_next; + tlp_hdr2_reg <= tlp_hdr2_next; + tlp_split1_reg <= tlp_split1_next; + tlp_split2_reg <= tlp_split2_next; + port_reg <= port_next; - out_fifo_half_full_reg <= $unsigned(out_fifo_wr_ptr_reg - out_fifo_rd_ptr_reg) >= 2**(OUTPUT_FIFO_ADDR_WIDTH-1); + out_shift_tlp_data_reg <= out_shift_tlp_data_next; + out_shift_tlp_strb_reg <= out_shift_tlp_strb_next; - if (!out_fifo_full && m_axis_rq_tvalid_int) begin - out_fifo_tdata[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axis_rq_tdata_int; - out_fifo_tkeep[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axis_rq_tkeep_int; - out_fifo_tlast[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axis_rq_tlast_int; - out_fifo_tuser[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axis_rq_tuser_int; - out_fifo_wr_ptr_reg <= out_fifo_wr_ptr_reg + 1; - end - - if (!out_fifo_empty && (!m_axis_rq_tvalid_reg || m_axis_rq_tready)) begin - m_axis_rq_tdata_reg <= out_fifo_tdata[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; - m_axis_rq_tkeep_reg <= out_fifo_tkeep[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; - m_axis_rq_tvalid_reg <= 1'b1; - m_axis_rq_tlast_reg <= out_fifo_tlast[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; - m_axis_rq_tuser_reg <= out_fifo_tuser[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; - out_fifo_rd_ptr_reg <= out_fifo_rd_ptr_reg + 1; - end + m_axis_rq_tdata_reg <= m_axis_rq_tdata_next; + m_axis_rq_tkeep_reg <= m_axis_rq_tkeep_next; + m_axis_rq_tvalid_reg <= m_axis_rq_tvalid_next; + m_axis_rq_tlast_reg <= m_axis_rq_tlast_next; + m_axis_rq_tuser_reg <= m_axis_rq_tuser_next; if (rst) begin - out_fifo_wr_ptr_reg <= 0; - out_fifo_rd_ptr_reg <= 0; - m_axis_rq_tvalid_reg <= 1'b0; + frame_reg <= 1'b0; + port_reg <= 0; + + m_axis_rq_tvalid_reg <= 0; end end diff --git a/tb/pcie_us_if/Makefile b/tb/pcie_us_if/Makefile index 774a8621a..ed0b7dc26 100644 --- a/tb/pcie_us_if/Makefile +++ b/tb/pcie_us_if/Makefile @@ -36,6 +36,8 @@ VERILOG_SOURCES += ../../rtl/$(DUT)_cc.v VERILOG_SOURCES += ../../rtl/$(DUT)_cq.v VERILOG_SOURCES += ../../rtl/pcie_us_cfg.v VERILOG_SOURCES += ../../rtl/pcie_us_msi.v +VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo.v +VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../rtl/arbiter.v VERILOG_SOURCES += ../../rtl/priority_encoder.v @@ -46,6 +48,10 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH ?= $(if $(filter-out 512,$(PARAM_AXIS_PCIE_ export PARAM_AXIS_PCIE_RC_USER_WIDTH ?= $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_CQ_USER_WIDTH ?= $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CC_USER_WIDTH ?= $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) +export PARAM_RC_STRADDLE ?= $(if $(filter-out 256 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),0,1) +export PARAM_RQ_STRADDLE ?= $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),0,1) +export PARAM_CQ_STRADDLE ?= $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),0,1) +export PARAM_CC_STRADDLE ?= $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),0,1) export PARAM_RQ_SEQ_NUM_WIDTH ?= $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),6,4) export PARAM_TLP_DATA_WIDTH ?= $(PARAM_AXIS_PCIE_DATA_WIDTH) export PARAM_TLP_STRB_WIDTH ?= $(shell expr $(PARAM_TLP_DATA_WIDTH) / 32 ) @@ -72,6 +78,10 @@ ifeq ($(SIM), icarus) COMPILE_ARGS += -P $(TOPLEVEL).AXIS_PCIE_RC_USER_WIDTH=$(PARAM_AXIS_PCIE_RC_USER_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).AXIS_PCIE_CQ_USER_WIDTH=$(PARAM_AXIS_PCIE_CQ_USER_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).AXIS_PCIE_CC_USER_WIDTH=$(PARAM_AXIS_PCIE_CC_USER_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).RC_STRADDLE=$(PARAM_RC_STRADDLE) + COMPILE_ARGS += -P $(TOPLEVEL).RQ_STRADDLE=$(PARAM_RQ_STRADDLE) + COMPILE_ARGS += -P $(TOPLEVEL).CQ_STRADDLE=$(PARAM_CQ_STRADDLE) + COMPILE_ARGS += -P $(TOPLEVEL).CC_STRADDLE=$(PARAM_CC_STRADDLE) COMPILE_ARGS += -P $(TOPLEVEL).RQ_SEQ_NUM_WIDTH=$(PARAM_RQ_SEQ_NUM_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).TLP_DATA_WIDTH=$(PARAM_TLP_DATA_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).TLP_STRB_WIDTH=$(PARAM_TLP_STRB_WIDTH) @@ -102,6 +112,10 @@ else ifeq ($(SIM), verilator) COMPILE_ARGS += -GAXIS_PCIE_RC_USER_WIDTH=$(PARAM_AXIS_PCIE_RC_USER_WIDTH) COMPILE_ARGS += -GAXIS_PCIE_CQ_USER_WIDTH=$(PARAM_AXIS_PCIE_CQ_USER_WIDTH) COMPILE_ARGS += -GAXIS_PCIE_CC_USER_WIDTH=$(PARAM_AXIS_PCIE_CC_USER_WIDTH) + COMPILE_ARGS += -GRC_STRADDLE=$(PARAM_RC_STRADDLE) + COMPILE_ARGS += -GRQ_STRADDLE=$(PARAM_RQ_STRADDLE) + COMPILE_ARGS += -GCQ_STRADDLE=$(PARAM_CQ_STRADDLE) + COMPILE_ARGS += -GCC_STRADDLE=$(PARAM_CC_STRADDLE) COMPILE_ARGS += -GRQ_SEQ_NUM_WIDTH=$(PARAM_RQ_SEQ_NUM_WIDTH) COMPILE_ARGS += -GTLP_DATA_WIDTH=$(PARAM_TLP_DATA_WIDTH) COMPILE_ARGS += -GTLP_STRB_WIDTH=$(PARAM_TLP_STRB_WIDTH) diff --git a/tb/pcie_us_if/test_pcie_us_if.py b/tb/pcie_us_if/test_pcie_us_if.py index 718c174d3..1a9c588ea 100644 --- a/tb/pcie_us_if/test_pcie_us_if.py +++ b/tb/pcie_us_if/test_pcie_us_if.py @@ -66,11 +66,11 @@ class TB(object): # pcie_link_width=2, # user_clk_frequency=250e6, alignment="dword", - cq_straddle=False, - cc_straddle=False, - rq_straddle=False, - rc_straddle=False, - rc_4tlp_straddle=False, + cq_straddle=len(dut.pcie_us_if_cq_inst.rx_req_tlp_valid_reg) > 1, + cc_straddle=len(dut.pcie_us_if_cc_inst.out_tlp_valid) > 1, + rq_straddle=len(dut.pcie_us_if_rq_inst.out_tlp_valid) > 1, + rc_straddle=len(dut.pcie_us_if_rc_inst.rx_cpl_tlp_valid_reg) > 1, + rc_4tlp_straddle=len(dut.pcie_us_if_rc_inst.rx_cpl_tlp_valid_reg) > 2, pf_count=1, max_payload_size=1024, enable_client_tag=True, @@ -427,6 +427,8 @@ async def run_test_dma(dut, idle_inserter=None, backpressure_inserter=None): # wait for write to complete while not tb.test_dev.tx_wr_req_tlp_source.empty() or tb.test_dev.tx_wr_req_tlp_source.active: await RisingEdge(dut.clk) + await Timer(100, 'ns') + await tb.test_dev.dma_mem_read(addr, 1, timeout=5000, timeout_unit='ns') await tb.test_dev.dma_mem_read(addr, 1, timeout=5000, timeout_unit='ns') assert mem[offset:offset+length] == test_data @@ -592,8 +594,9 @@ tests_dir = os.path.dirname(__file__) rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl')) -@pytest.mark.parametrize("axis_pcie_data_width", [64, 128, 256, 512]) -def test_pcie_us_if(request, axis_pcie_data_width): +@pytest.mark.parametrize(("axis_pcie_data_width", "straddle"), + [(64, False), (128, False), (256, False), (256, True), (512, False), (512, True)]) +def test_pcie_us_if(request, axis_pcie_data_width, straddle): dut = "pcie_us_if" module = os.path.splitext(os.path.basename(__file__))[0] toplevel = dut @@ -606,6 +609,8 @@ def test_pcie_us_if(request, axis_pcie_data_width): os.path.join(rtl_dir, f"{dut}_cq.v"), os.path.join(rtl_dir, "pcie_us_cfg.v"), os.path.join(rtl_dir, "pcie_us_msi.v"), + os.path.join(rtl_dir, "pcie_tlp_fifo.v"), + os.path.join(rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(rtl_dir, "arbiter.v"), os.path.join(rtl_dir, "priority_encoder.v"), ] @@ -618,8 +623,12 @@ def test_pcie_us_if(request, axis_pcie_data_width): parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 + parameters['RC_STRADDLE'] = int(parameters['AXIS_PCIE_DATA_WIDTH'] >= 256 and straddle) + parameters['RQ_STRADDLE'] = int(parameters['AXIS_PCIE_DATA_WIDTH'] >= 512 and straddle) + parameters['CQ_STRADDLE'] = int(parameters['AXIS_PCIE_DATA_WIDTH'] >= 512 and straddle) + parameters['CC_STRADDLE'] = int(parameters['AXIS_PCIE_DATA_WIDTH'] >= 512 and straddle) parameters['RQ_SEQ_NUM_WIDTH'] = 4 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 6 - parameters['TLP_DATA_WIDTH'] = axis_pcie_data_width + parameters['TLP_DATA_WIDTH'] = parameters['AXIS_PCIE_DATA_WIDTH'] parameters['TLP_STRB_WIDTH'] = parameters['TLP_DATA_WIDTH'] // 32 parameters['TLP_HDR_WIDTH'] = 128 parameters['TLP_SEG_COUNT'] = 1 diff --git a/tb/pcie_us_if_cc/Makefile b/tb/pcie_us_if_cc/Makefile index 07382553d..9f28f5594 100644 --- a/tb/pcie_us_if_cc/Makefile +++ b/tb/pcie_us_if_cc/Makefile @@ -30,11 +30,14 @@ DUT = pcie_us_if_cc TOPLEVEL = $(DUT) MODULE = test_$(DUT) VERILOG_SOURCES += ../../rtl/$(DUT).v +VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo.v +VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo_raw.v # module parameters export PARAM_AXIS_PCIE_DATA_WIDTH ?= 64 export PARAM_AXIS_PCIE_KEEP_WIDTH ?= $(shell expr $(PARAM_AXIS_PCIE_DATA_WIDTH) / 32 ) export PARAM_AXIS_PCIE_CC_USER_WIDTH ?= $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) +export PARAM_CC_STRADDLE ?= $(if $(filter-out 512,$(PARAM_DATA_WIDTH)),0,1) export PARAM_TLP_DATA_WIDTH ?= $(PARAM_AXIS_PCIE_DATA_WIDTH) export PARAM_TLP_STRB_WIDTH ?= $(shell expr $(PARAM_TLP_DATA_WIDTH) / 32 ) export PARAM_TLP_HDR_WIDTH ?= 128 @@ -46,6 +49,7 @@ ifeq ($(SIM), icarus) COMPILE_ARGS += -P $(TOPLEVEL).AXIS_PCIE_DATA_WIDTH=$(PARAM_AXIS_PCIE_DATA_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).AXIS_PCIE_KEEP_WIDTH=$(PARAM_AXIS_PCIE_KEEP_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).AXIS_PCIE_CC_USER_WIDTH=$(PARAM_AXIS_PCIE_CC_USER_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).CC_STRADDLE=$(PARAM_CC_STRADDLE) COMPILE_ARGS += -P $(TOPLEVEL).TLP_DATA_WIDTH=$(PARAM_TLP_DATA_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).TLP_STRB_WIDTH=$(PARAM_TLP_STRB_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).TLP_HDR_WIDTH=$(PARAM_TLP_HDR_WIDTH) @@ -61,6 +65,7 @@ else ifeq ($(SIM), verilator) COMPILE_ARGS += -GAXIS_PCIE_DATA_WIDTH=$(PARAM_AXIS_PCIE_DATA_WIDTH) COMPILE_ARGS += -GAXIS_PCIE_KEEP_WIDTH=$(PARAM_AXIS_PCIE_KEEP_WIDTH) COMPILE_ARGS += -GAXIS_PCIE_CC_USER_WIDTH=$(PARAM_AXIS_PCIE_CC_USER_WIDTH) + COMPILE_ARGS += -GCC_STRADDLE=$(PARAM_CC_STRADDLE) COMPILE_ARGS += -GTLP_DATA_WIDTH=$(PARAM_TLP_DATA_WIDTH) COMPILE_ARGS += -GTLP_STRB_WIDTH=$(PARAM_TLP_STRB_WIDTH) COMPILE_ARGS += -GTLP_HDR_WIDTH=$(PARAM_TLP_HDR_WIDTH) diff --git a/tb/pcie_us_if_cc/test_pcie_us_if_cc.py b/tb/pcie_us_if_cc/test_pcie_us_if_cc.py index 7d8d78b43..a947ee099 100644 --- a/tb/pcie_us_if_cc/test_pcie_us_if_cc.py +++ b/tb/pcie_us_if_cc/test_pcie_us_if_cc.py @@ -63,7 +63,7 @@ class TB(object): cocotb.start_soon(Clock(dut.clk, 4, units="ns").start()) self.source = PcieIfSource(PcieIfTxBus.from_prefix(dut, "tx_cpl_tlp"), dut.clk, dut.rst) - self.sink = CcSink(AxiStreamBus.from_prefix(dut, "m_axis_cc"), dut.clk, dut.rst) + self.sink = CcSink(AxiStreamBus.from_prefix(dut, "m_axis_cc"), dut.clk, dut.rst, segments=len(dut.out_tlp_valid)) def set_idle_generator(self, generator=None): if generator: @@ -222,14 +222,17 @@ tests_dir = os.path.dirname(__file__) rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl')) -@pytest.mark.parametrize("axis_pcie_data_width", [64, 128, 256, 512]) -def test_pcie_us_if_cc(request, axis_pcie_data_width): +@pytest.mark.parametrize(("axis_pcie_data_width", "straddle"), + [(64, False), (128, False), (256, False), (512, False), (512, True)]) +def test_pcie_us_if_cc(request, axis_pcie_data_width, straddle): dut = "pcie_us_if_cc" module = os.path.splitext(os.path.basename(__file__))[0] toplevel = dut verilog_sources = [ os.path.join(rtl_dir, f"{dut}.v"), + os.path.join(rtl_dir, "pcie_tlp_fifo.v"), + os.path.join(rtl_dir, "pcie_tlp_fifo_raw.v"), ] parameters = {} @@ -237,7 +240,8 @@ def test_pcie_us_if_cc(request, axis_pcie_data_width): parameters['AXIS_PCIE_DATA_WIDTH'] = axis_pcie_data_width parameters['AXIS_PCIE_KEEP_WIDTH'] = parameters['AXIS_PCIE_DATA_WIDTH'] // 32 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 - parameters['TLP_DATA_WIDTH'] = axis_pcie_data_width + parameters['CC_STRADDLE'] = int(parameters['AXIS_PCIE_DATA_WIDTH'] >= 512 and straddle) + parameters['TLP_DATA_WIDTH'] = parameters['AXIS_PCIE_DATA_WIDTH'] parameters['TLP_STRB_WIDTH'] = parameters['TLP_DATA_WIDTH'] // 32 parameters['TLP_HDR_WIDTH'] = 128 parameters['TLP_SEG_COUNT'] = 1 diff --git a/tb/pcie_us_if_cq/Makefile b/tb/pcie_us_if_cq/Makefile index 8a8810750..65fc1f66f 100644 --- a/tb/pcie_us_if_cq/Makefile +++ b/tb/pcie_us_if_cq/Makefile @@ -30,11 +30,14 @@ DUT = pcie_us_if_cq TOPLEVEL = $(DUT) MODULE = test_$(DUT) VERILOG_SOURCES += ../../rtl/$(DUT).v +VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo.v +VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo_raw.v # module parameters export PARAM_AXIS_PCIE_DATA_WIDTH ?= 64 export PARAM_AXIS_PCIE_KEEP_WIDTH ?= $(shell expr $(PARAM_AXIS_PCIE_DATA_WIDTH) / 32 ) export PARAM_AXIS_PCIE_CQ_USER_WIDTH ?= $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) +export PARAM_CQ_STRADDLE ?= $(if $(filter-out 512,$(PARAM_DATA_WIDTH)),0,1) export PARAM_TLP_DATA_WIDTH ?= $(PARAM_AXIS_PCIE_DATA_WIDTH) export PARAM_TLP_STRB_WIDTH ?= $(shell expr $(PARAM_TLP_DATA_WIDTH) / 32 ) export PARAM_TLP_HDR_WIDTH ?= 128 @@ -46,6 +49,7 @@ ifeq ($(SIM), icarus) COMPILE_ARGS += -P $(TOPLEVEL).AXIS_PCIE_DATA_WIDTH=$(PARAM_AXIS_PCIE_DATA_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).AXIS_PCIE_KEEP_WIDTH=$(PARAM_AXIS_PCIE_KEEP_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).AXIS_PCIE_CQ_USER_WIDTH=$(PARAM_AXIS_PCIE_CQ_USER_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).CQ_STRADDLE=$(PARAM_CQ_STRADDLE) COMPILE_ARGS += -P $(TOPLEVEL).TLP_DATA_WIDTH=$(PARAM_TLP_DATA_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).TLP_STRB_WIDTH=$(PARAM_TLP_STRB_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).TLP_HDR_WIDTH=$(PARAM_TLP_HDR_WIDTH) @@ -61,6 +65,7 @@ else ifeq ($(SIM), verilator) COMPILE_ARGS += -GAXIS_PCIE_DATA_WIDTH=$(PARAM_AXIS_PCIE_DATA_WIDTH) COMPILE_ARGS += -GAXIS_PCIE_KEEP_WIDTH=$(PARAM_AXIS_PCIE_KEEP_WIDTH) COMPILE_ARGS += -GAXIS_PCIE_CQ_USER_WIDTH=$(PARAM_AXIS_PCIE_CQ_USER_WIDTH) + COMPILE_ARGS += -GCQ_STRADDLE=$(PARAM_CQ_STRADDLE) COMPILE_ARGS += -GTLP_DATA_WIDTH=$(PARAM_TLP_DATA_WIDTH) COMPILE_ARGS += -GTLP_STRB_WIDTH=$(PARAM_TLP_STRB_WIDTH) COMPILE_ARGS += -GTLP_HDR_WIDTH=$(PARAM_TLP_HDR_WIDTH) diff --git a/tb/pcie_us_if_cq/test_pcie_us_if_cq.py b/tb/pcie_us_if_cq/test_pcie_us_if_cq.py index e763fc014..e057e407b 100644 --- a/tb/pcie_us_if_cq/test_pcie_us_if_cq.py +++ b/tb/pcie_us_if_cq/test_pcie_us_if_cq.py @@ -62,7 +62,7 @@ class TB(object): cocotb.start_soon(Clock(dut.clk, 4, units="ns").start()) - self.source = CqSource(AxiStreamBus.from_prefix(dut, "s_axis_cq"), dut.clk, dut.rst) + self.source = CqSource(AxiStreamBus.from_prefix(dut, "s_axis_cq"), dut.clk, dut.rst, segments=len(dut.rx_req_tlp_valid_reg)) self.sink = PcieIfSink(PcieIfRxBus.from_prefix(dut, "rx_req_tlp"), dut.clk, dut.rst) def set_idle_generator(self, generator=None): @@ -211,14 +211,17 @@ tests_dir = os.path.dirname(__file__) rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl')) -@pytest.mark.parametrize("axis_pcie_data_width", [64, 128, 256, 512]) -def test_pcie_us_if_cq(request, axis_pcie_data_width): +@pytest.mark.parametrize(("axis_pcie_data_width", "straddle"), + [(64, False), (128, False), (256, False), (512, False), (512, True)]) +def test_pcie_us_if_cq(request, axis_pcie_data_width, straddle): dut = "pcie_us_if_cq" module = os.path.splitext(os.path.basename(__file__))[0] toplevel = dut verilog_sources = [ os.path.join(rtl_dir, f"{dut}.v"), + os.path.join(rtl_dir, "pcie_tlp_fifo.v"), + os.path.join(rtl_dir, "pcie_tlp_fifo_raw.v"), ] parameters = {} @@ -226,7 +229,8 @@ def test_pcie_us_if_cq(request, axis_pcie_data_width): parameters['AXIS_PCIE_DATA_WIDTH'] = axis_pcie_data_width parameters['AXIS_PCIE_KEEP_WIDTH'] = parameters['AXIS_PCIE_DATA_WIDTH'] // 32 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 - parameters['TLP_DATA_WIDTH'] = axis_pcie_data_width + parameters['CQ_STRADDLE'] = int(parameters['AXIS_PCIE_DATA_WIDTH'] >= 512 and straddle) + parameters['TLP_DATA_WIDTH'] = parameters['AXIS_PCIE_DATA_WIDTH'] parameters['TLP_STRB_WIDTH'] = parameters['TLP_DATA_WIDTH'] // 32 parameters['TLP_HDR_WIDTH'] = 128 parameters['TLP_SEG_COUNT'] = 1 diff --git a/tb/pcie_us_if_rc/Makefile b/tb/pcie_us_if_rc/Makefile index 60a8b6720..08d4a3d3b 100644 --- a/tb/pcie_us_if_rc/Makefile +++ b/tb/pcie_us_if_rc/Makefile @@ -30,11 +30,14 @@ DUT = pcie_us_if_rc TOPLEVEL = $(DUT) MODULE = test_$(DUT) VERILOG_SOURCES += ../../rtl/$(DUT).v +VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo.v +VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo_raw.v # module parameters export PARAM_AXIS_PCIE_DATA_WIDTH ?= 64 export PARAM_AXIS_PCIE_KEEP_WIDTH ?= $(shell expr $(PARAM_AXIS_PCIE_DATA_WIDTH) / 32 ) export PARAM_AXIS_PCIE_RC_USER_WIDTH ?= $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) +export PARAM_RC_STRADDLE ?= $(if $(filter-out 256 512,$(PARAM_DATA_WIDTH)),0,1) export PARAM_TLP_DATA_WIDTH ?= $(PARAM_AXIS_PCIE_DATA_WIDTH) export PARAM_TLP_STRB_WIDTH ?= $(shell expr $(PARAM_TLP_DATA_WIDTH) / 32 ) export PARAM_TLP_HDR_WIDTH ?= 128 @@ -46,6 +49,7 @@ ifeq ($(SIM), icarus) COMPILE_ARGS += -P $(TOPLEVEL).AXIS_PCIE_DATA_WIDTH=$(PARAM_AXIS_PCIE_DATA_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).AXIS_PCIE_KEEP_WIDTH=$(PARAM_AXIS_PCIE_KEEP_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).AXIS_PCIE_RC_USER_WIDTH=$(PARAM_AXIS_PCIE_RC_USER_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).RC_STRADDLE=$(PARAM_RC_STRADDLE) COMPILE_ARGS += -P $(TOPLEVEL).TLP_DATA_WIDTH=$(PARAM_TLP_DATA_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).TLP_STRB_WIDTH=$(PARAM_TLP_STRB_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).TLP_HDR_WIDTH=$(PARAM_TLP_HDR_WIDTH) @@ -61,6 +65,7 @@ else ifeq ($(SIM), verilator) COMPILE_ARGS += -GAXIS_PCIE_DATA_WIDTH=$(PARAM_AXIS_PCIE_DATA_WIDTH) COMPILE_ARGS += -GAXIS_PCIE_KEEP_WIDTH=$(PARAM_AXIS_PCIE_KEEP_WIDTH) COMPILE_ARGS += -GAXIS_PCIE_RC_USER_WIDTH=$(PARAM_AXIS_PCIE_RC_USER_WIDTH) + COMPILE_ARGS += -GRC_STRADDLE=$(PARAM_RC_STRADDLE) COMPILE_ARGS += -GTLP_DATA_WIDTH=$(PARAM_TLP_DATA_WIDTH) COMPILE_ARGS += -GTLP_STRB_WIDTH=$(PARAM_TLP_STRB_WIDTH) COMPILE_ARGS += -GTLP_HDR_WIDTH=$(PARAM_TLP_HDR_WIDTH) diff --git a/tb/pcie_us_if_rc/test_pcie_us_if_rc.py b/tb/pcie_us_if_rc/test_pcie_us_if_rc.py index 22a106b4a..526dc8b7c 100644 --- a/tb/pcie_us_if_rc/test_pcie_us_if_rc.py +++ b/tb/pcie_us_if_rc/test_pcie_us_if_rc.py @@ -62,7 +62,7 @@ class TB(object): cocotb.start_soon(Clock(dut.clk, 4, units="ns").start()) - self.source = RcSource(AxiStreamBus.from_prefix(dut, "s_axis_rc"), dut.clk, dut.rst) + self.source = RcSource(AxiStreamBus.from_prefix(dut, "s_axis_rc"), dut.clk, dut.rst, segments=len(dut.rx_cpl_tlp_valid_reg)) self.sink = PcieIfSink(PcieIfRxBus.from_prefix(dut, "rx_cpl_tlp"), dut.clk, dut.rst) def set_idle_generator(self, generator=None): @@ -220,14 +220,17 @@ tests_dir = os.path.dirname(__file__) rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl')) -@pytest.mark.parametrize("axis_pcie_data_width", [64, 128, 256, 512]) -def test_pcie_us_if_rc(request, axis_pcie_data_width): +@pytest.mark.parametrize(("axis_pcie_data_width", "straddle"), + [(64, False), (128, False), (256, False), (256, True), (512, False), (512, True)]) +def test_pcie_us_if_rc(request, axis_pcie_data_width, straddle): dut = "pcie_us_if_rc" module = os.path.splitext(os.path.basename(__file__))[0] toplevel = dut verilog_sources = [ os.path.join(rtl_dir, f"{dut}.v"), + os.path.join(rtl_dir, "pcie_tlp_fifo.v"), + os.path.join(rtl_dir, "pcie_tlp_fifo_raw.v"), ] parameters = {} @@ -235,7 +238,8 @@ def test_pcie_us_if_rc(request, axis_pcie_data_width): parameters['AXIS_PCIE_DATA_WIDTH'] = axis_pcie_data_width parameters['AXIS_PCIE_KEEP_WIDTH'] = parameters['AXIS_PCIE_DATA_WIDTH'] // 32 parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 - parameters['TLP_DATA_WIDTH'] = axis_pcie_data_width + parameters['RC_STRADDLE'] = int(parameters['AXIS_PCIE_DATA_WIDTH'] >= 256 and straddle) + parameters['TLP_DATA_WIDTH'] = parameters['AXIS_PCIE_DATA_WIDTH'] parameters['TLP_STRB_WIDTH'] = parameters['TLP_DATA_WIDTH'] // 32 parameters['TLP_HDR_WIDTH'] = 128 parameters['TLP_SEG_COUNT'] = 1 diff --git a/tb/pcie_us_if_rq/Makefile b/tb/pcie_us_if_rq/Makefile index 276c22ecf..b06d15425 100644 --- a/tb/pcie_us_if_rq/Makefile +++ b/tb/pcie_us_if_rq/Makefile @@ -30,13 +30,14 @@ DUT = pcie_us_if_rq TOPLEVEL = $(DUT) MODULE = test_$(DUT) VERILOG_SOURCES += ../../rtl/$(DUT).v -VERILOG_SOURCES += ../../rtl/arbiter.v -VERILOG_SOURCES += ../../rtl/priority_encoder.v +VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo.v +VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo_raw.v # module parameters export PARAM_AXIS_PCIE_DATA_WIDTH ?= 64 export PARAM_AXIS_PCIE_KEEP_WIDTH ?= $(shell expr $(PARAM_AXIS_PCIE_DATA_WIDTH) / 32 ) export PARAM_AXIS_PCIE_RQ_USER_WIDTH ?= $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),62,137) +export PARAM_RQ_STRADDLE ?= $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),0,1) export PARAM_RQ_SEQ_NUM_WIDTH ?= $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),6,4) export PARAM_TLP_DATA_WIDTH ?= $(PARAM_AXIS_PCIE_DATA_WIDTH) export PARAM_TLP_STRB_WIDTH ?= $(shell expr $(PARAM_TLP_DATA_WIDTH) / 32 ) @@ -51,6 +52,7 @@ ifeq ($(SIM), icarus) COMPILE_ARGS += -P $(TOPLEVEL).AXIS_PCIE_DATA_WIDTH=$(PARAM_AXIS_PCIE_DATA_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).AXIS_PCIE_KEEP_WIDTH=$(PARAM_AXIS_PCIE_KEEP_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).AXIS_PCIE_RQ_USER_WIDTH=$(PARAM_AXIS_PCIE_RQ_USER_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).RQ_STRADDLE=$(PARAM_RQ_STRADDLE) COMPILE_ARGS += -P $(TOPLEVEL).RQ_SEQ_NUM_WIDTH=$(PARAM_RQ_SEQ_NUM_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).TLP_DATA_WIDTH=$(PARAM_TLP_DATA_WIDTH) COMPILE_ARGS += -P $(TOPLEVEL).TLP_STRB_WIDTH=$(PARAM_TLP_STRB_WIDTH) @@ -69,6 +71,7 @@ else ifeq ($(SIM), verilator) COMPILE_ARGS += -GAXIS_PCIE_DATA_WIDTH=$(PARAM_AXIS_PCIE_DATA_WIDTH) COMPILE_ARGS += -GAXIS_PCIE_KEEP_WIDTH=$(PARAM_AXIS_PCIE_KEEP_WIDTH) COMPILE_ARGS += -GAXIS_PCIE_RQ_USER_WIDTH=$(PARAM_AXIS_PCIE_RQ_USER_WIDTH) + COMPILE_ARGS += -GRQ_STRADDLE=$(PARAM_RQ_STRADDLE) COMPILE_ARGS += -GRQ_SEQ_NUM_WIDTH=$(PARAM_RQ_SEQ_NUM_WIDTH) COMPILE_ARGS += -GTLP_DATA_WIDTH=$(PARAM_TLP_DATA_WIDTH) COMPILE_ARGS += -GTLP_STRB_WIDTH=$(PARAM_TLP_STRB_WIDTH) diff --git a/tb/pcie_us_if_rq/test_pcie_us_if_rq.py b/tb/pcie_us_if_rq/test_pcie_us_if_rq.py index de773007c..b83226978 100644 --- a/tb/pcie_us_if_rq/test_pcie_us_if_rq.py +++ b/tb/pcie_us_if_rq/test_pcie_us_if_rq.py @@ -64,7 +64,12 @@ class TB(object): self.rd_req_source = PcieIfSource(PcieIfTxBus.from_prefix(dut, "tx_rd_req_tlp"), dut.clk, dut.rst) self.wr_req_source = PcieIfSource(PcieIfTxBus.from_prefix(dut, "tx_wr_req_tlp"), dut.clk, dut.rst) - self.sink = RqSink(AxiStreamBus.from_prefix(dut, "m_axis_rq"), dut.clk, dut.rst) + self.sink = RqSink(AxiStreamBus.from_prefix(dut, "m_axis_rq"), dut.clk, dut.rst, segments=len(dut.out_tlp_valid)) + + dut.s_axis_rq_seq_num_0.setimmediatevalue(0) + dut.s_axis_rq_seq_num_valid_0.setimmediatevalue(0) + dut.s_axis_rq_seq_num_1.setimmediatevalue(0) + dut.s_axis_rq_seq_num_valid_1.setimmediatevalue(0) def set_idle_generator(self, generator=None): if generator: @@ -231,16 +236,17 @@ tests_dir = os.path.dirname(__file__) rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl')) -@pytest.mark.parametrize("axis_pcie_data_width", [64, 128, 256, 512]) -def test_pcie_us_if_rq(request, axis_pcie_data_width): +@pytest.mark.parametrize(("axis_pcie_data_width", "straddle"), + [(64, False), (128, False), (256, False), (512, False), (512, True)]) +def test_pcie_us_if_rq(request, axis_pcie_data_width, straddle): dut = "pcie_us_if_rq" module = os.path.splitext(os.path.basename(__file__))[0] toplevel = dut verilog_sources = [ os.path.join(rtl_dir, f"{dut}.v"), - os.path.join(rtl_dir, "arbiter.v"), - os.path.join(rtl_dir, "priority_encoder.v"), + os.path.join(rtl_dir, "pcie_tlp_fifo.v"), + os.path.join(rtl_dir, "pcie_tlp_fifo_raw.v"), ] parameters = {} @@ -248,8 +254,9 @@ def test_pcie_us_if_rq(request, axis_pcie_data_width): parameters['AXIS_PCIE_DATA_WIDTH'] = axis_pcie_data_width parameters['AXIS_PCIE_KEEP_WIDTH'] = parameters['AXIS_PCIE_DATA_WIDTH'] // 32 parameters['AXIS_PCIE_RQ_USER_WIDTH'] = 62 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 137 + parameters['RQ_STRADDLE'] = int(parameters['AXIS_PCIE_DATA_WIDTH'] >= 512 and straddle) parameters['RQ_SEQ_NUM_WIDTH'] = 4 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 6 - parameters['TLP_DATA_WIDTH'] = axis_pcie_data_width + parameters['TLP_DATA_WIDTH'] = parameters['AXIS_PCIE_DATA_WIDTH'] parameters['TLP_STRB_WIDTH'] = parameters['TLP_DATA_WIDTH'] // 32 parameters['TLP_HDR_WIDTH'] = 128 parameters['TLP_SEG_COUNT'] = 1