diff --git a/fpga/lib/pcie/example/ADM_PCIE_9V3/fpga/rtl/fpga.v b/fpga/lib/pcie/example/ADM_PCIE_9V3/fpga/rtl/fpga.v index 914eee7cf..d8c59a846 100644 --- a/fpga/lib/pcie/example/ADM_PCIE_9V3/fpga/rtl/fpga.v +++ b/fpga/lib/pcie/example/ADM_PCIE_9V3/fpga/rtl/fpga.v @@ -143,6 +143,7 @@ wire pcie_rq_seq_num_vld1; wire [2:0] cfg_max_payload; wire [2:0] cfg_max_read_req; +wire [3:0] cfg_rcb_status; wire [9:0] cfg_mgmt_addr; wire [7:0] cfg_mgmt_function_number; @@ -261,7 +262,7 @@ pcie4_uscale_plus_inst ( .cfg_ltssm_state(), .cfg_rx_pm_state(), .cfg_tx_pm_state(), - .cfg_rcb_status(), + .cfg_rcb_status(cfg_rcb_status), .cfg_obff_enable(), .cfg_pl_status_change(), .cfg_tph_requester_enable(), @@ -407,7 +408,8 @@ core_inst ( .cfg_max_payload(cfg_max_payload), .cfg_max_read_req(cfg_max_read_req), - + .cfg_rcb_status(cfg_rcb_status), + .cfg_mgmt_addr(cfg_mgmt_addr), .cfg_mgmt_function_number(cfg_mgmt_function_number), .cfg_mgmt_write(cfg_mgmt_write), diff --git a/fpga/lib/pcie/example/ADM_PCIE_9V3/fpga/rtl/fpga_core.v b/fpga/lib/pcie/example/ADM_PCIE_9V3/fpga/rtl/fpga_core.v index 6cd16fc5b..78f28f9a0 100644 --- a/fpga/lib/pcie/example/ADM_PCIE_9V3/fpga/rtl/fpga_core.v +++ b/fpga/lib/pcie/example/ADM_PCIE_9V3/fpga/rtl/fpga_core.v @@ -103,6 +103,7 @@ module fpga_core # input wire [2:0] cfg_max_payload, input wire [2:0] cfg_max_read_req, + input wire [3:0] cfg_rcb_status, output wire [9:0] cfg_mgmt_addr, output wire [7:0] cfg_mgmt_function_number, @@ -158,6 +159,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), + .READ_CPLH_FC_LIMIT(128), + .READ_CPLD_FC_LIMIT(2048), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -260,6 +263,8 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), + // .cfg_rcb_status(cfg_rcb_status), + .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP /* * Status diff --git a/fpga/lib/pcie/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py b/fpga/lib/pcie/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py index f274d47a1..56b91ea37 100644 --- a/fpga/lib/pcie/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py +++ b/fpga/lib/pcie/example/ADM_PCIE_9V3/fpga/tb/fpga_core/test_fpga_core.py @@ -167,7 +167,7 @@ class TB(object): # cfg_rx_pm_state # cfg_tx_pm_state # cfg_ltssm_state - # cfg_rcb_status + cfg_rcb_status=dut.cfg_rcb_status, # cfg_obff_enable # cfg_pl_status_change # cfg_tph_requester_enable diff --git a/fpga/lib/pcie/example/AU200/fpga/rtl/fpga.v b/fpga/lib/pcie/example/AU200/fpga/rtl/fpga.v index 69a96f6ad..426b56c8c 100644 --- a/fpga/lib/pcie/example/AU200/fpga/rtl/fpga.v +++ b/fpga/lib/pcie/example/AU200/fpga/rtl/fpga.v @@ -158,6 +158,7 @@ wire pcie_rq_seq_num_vld1; wire [2:0] cfg_max_payload; wire [2:0] cfg_max_read_req; +wire [3:0] cfg_rcb_status; wire [9:0] cfg_mgmt_addr; wire [7:0] cfg_mgmt_function_number; @@ -276,7 +277,7 @@ pcie4_uscale_plus_inst ( .cfg_ltssm_state(), .cfg_rx_pm_state(), .cfg_tx_pm_state(), - .cfg_rcb_status(), + .cfg_rcb_status(cfg_rcb_status), .cfg_obff_enable(), .cfg_pl_status_change(), .cfg_tph_requester_enable(), @@ -421,7 +422,8 @@ core_inst ( .cfg_max_payload(cfg_max_payload), .cfg_max_read_req(cfg_max_read_req), - + .cfg_rcb_status(cfg_rcb_status), + .cfg_mgmt_addr(cfg_mgmt_addr), .cfg_mgmt_function_number(cfg_mgmt_function_number), .cfg_mgmt_write(cfg_mgmt_write), diff --git a/fpga/lib/pcie/example/AU200/fpga/rtl/fpga_core.v b/fpga/lib/pcie/example/AU200/fpga/rtl/fpga_core.v index 595baa235..39d8ab260 100644 --- a/fpga/lib/pcie/example/AU200/fpga/rtl/fpga_core.v +++ b/fpga/lib/pcie/example/AU200/fpga/rtl/fpga_core.v @@ -102,6 +102,7 @@ module fpga_core # input wire [2:0] cfg_max_payload, input wire [2:0] cfg_max_read_req, + input wire [3:0] cfg_rcb_status, output wire [9:0] cfg_mgmt_addr, output wire [7:0] cfg_mgmt_function_number, @@ -155,6 +156,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), + .READ_CPLH_FC_LIMIT(128), + .READ_CPLD_FC_LIMIT(2048), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -257,6 +260,8 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), + // .cfg_rcb_status(cfg_rcb_status), + .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP /* * Status diff --git a/fpga/lib/pcie/example/AU200/fpga/tb/fpga_core/test_fpga_core.py b/fpga/lib/pcie/example/AU200/fpga/tb/fpga_core/test_fpga_core.py index 6b5cd01c2..5508fb415 100644 --- a/fpga/lib/pcie/example/AU200/fpga/tb/fpga_core/test_fpga_core.py +++ b/fpga/lib/pcie/example/AU200/fpga/tb/fpga_core/test_fpga_core.py @@ -167,7 +167,7 @@ class TB(object): # cfg_rx_pm_state # cfg_tx_pm_state # cfg_ltssm_state - # cfg_rcb_status + cfg_rcb_status=dut.cfg_rcb_status, # cfg_obff_enable # cfg_pl_status_change # cfg_tph_requester_enable diff --git a/fpga/lib/pcie/example/AU250/fpga/rtl/fpga.v b/fpga/lib/pcie/example/AU250/fpga/rtl/fpga.v index 69a96f6ad..426b56c8c 100644 --- a/fpga/lib/pcie/example/AU250/fpga/rtl/fpga.v +++ b/fpga/lib/pcie/example/AU250/fpga/rtl/fpga.v @@ -158,6 +158,7 @@ wire pcie_rq_seq_num_vld1; wire [2:0] cfg_max_payload; wire [2:0] cfg_max_read_req; +wire [3:0] cfg_rcb_status; wire [9:0] cfg_mgmt_addr; wire [7:0] cfg_mgmt_function_number; @@ -276,7 +277,7 @@ pcie4_uscale_plus_inst ( .cfg_ltssm_state(), .cfg_rx_pm_state(), .cfg_tx_pm_state(), - .cfg_rcb_status(), + .cfg_rcb_status(cfg_rcb_status), .cfg_obff_enable(), .cfg_pl_status_change(), .cfg_tph_requester_enable(), @@ -421,7 +422,8 @@ core_inst ( .cfg_max_payload(cfg_max_payload), .cfg_max_read_req(cfg_max_read_req), - + .cfg_rcb_status(cfg_rcb_status), + .cfg_mgmt_addr(cfg_mgmt_addr), .cfg_mgmt_function_number(cfg_mgmt_function_number), .cfg_mgmt_write(cfg_mgmt_write), diff --git a/fpga/lib/pcie/example/AU250/fpga/rtl/fpga_core.v b/fpga/lib/pcie/example/AU250/fpga/rtl/fpga_core.v index 595baa235..39d8ab260 100644 --- a/fpga/lib/pcie/example/AU250/fpga/rtl/fpga_core.v +++ b/fpga/lib/pcie/example/AU250/fpga/rtl/fpga_core.v @@ -102,6 +102,7 @@ module fpga_core # input wire [2:0] cfg_max_payload, input wire [2:0] cfg_max_read_req, + input wire [3:0] cfg_rcb_status, output wire [9:0] cfg_mgmt_addr, output wire [7:0] cfg_mgmt_function_number, @@ -155,6 +156,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), + .READ_CPLH_FC_LIMIT(128), + .READ_CPLD_FC_LIMIT(2048), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -257,6 +260,8 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), + // .cfg_rcb_status(cfg_rcb_status), + .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP /* * Status diff --git a/fpga/lib/pcie/example/AU250/fpga/tb/fpga_core/test_fpga_core.py b/fpga/lib/pcie/example/AU250/fpga/tb/fpga_core/test_fpga_core.py index 6b5cd01c2..5508fb415 100644 --- a/fpga/lib/pcie/example/AU250/fpga/tb/fpga_core/test_fpga_core.py +++ b/fpga/lib/pcie/example/AU250/fpga/tb/fpga_core/test_fpga_core.py @@ -167,7 +167,7 @@ class TB(object): # cfg_rx_pm_state # cfg_tx_pm_state # cfg_ltssm_state - # cfg_rcb_status + cfg_rcb_status=dut.cfg_rcb_status, # cfg_obff_enable # cfg_pl_status_change # cfg_tph_requester_enable diff --git a/fpga/lib/pcie/example/AU280/fpga/rtl/fpga.v b/fpga/lib/pcie/example/AU280/fpga/rtl/fpga.v index 40e697dfd..52ae618c2 100644 --- a/fpga/lib/pcie/example/AU280/fpga/rtl/fpga.v +++ b/fpga/lib/pcie/example/AU280/fpga/rtl/fpga.v @@ -145,6 +145,7 @@ wire pcie_rq_seq_num_vld1; wire [2:0] cfg_max_payload; wire [2:0] cfg_max_read_req; +wire [3:0] cfg_rcb_status; wire [9:0] cfg_mgmt_addr; wire [7:0] cfg_mgmt_function_number; @@ -263,7 +264,7 @@ pcie4c_uscale_plus_inst ( .cfg_ltssm_state(), .cfg_rx_pm_state(), .cfg_tx_pm_state(), - .cfg_rcb_status(), + .cfg_rcb_status(cfg_rcb_status), .cfg_obff_enable(), .cfg_pl_status_change(), .cfg_tph_requester_enable(), @@ -403,7 +404,8 @@ core_inst ( .cfg_max_payload(cfg_max_payload), .cfg_max_read_req(cfg_max_read_req), - + .cfg_rcb_status(cfg_rcb_status), + .cfg_mgmt_addr(cfg_mgmt_addr), .cfg_mgmt_function_number(cfg_mgmt_function_number), .cfg_mgmt_write(cfg_mgmt_write), diff --git a/fpga/lib/pcie/example/AU280/fpga/rtl/fpga_core.v b/fpga/lib/pcie/example/AU280/fpga/rtl/fpga_core.v index e432d9f12..644ee09fd 100644 --- a/fpga/lib/pcie/example/AU280/fpga/rtl/fpga_core.v +++ b/fpga/lib/pcie/example/AU280/fpga/rtl/fpga_core.v @@ -96,6 +96,7 @@ module fpga_core # input wire [2:0] cfg_max_payload, input wire [2:0] cfg_max_read_req, + input wire [3:0] cfg_rcb_status, output wire [9:0] cfg_mgmt_addr, output wire [7:0] cfg_mgmt_function_number, @@ -147,6 +148,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), + .READ_CPLH_FC_LIMIT(128), + .READ_CPLD_FC_LIMIT(2048), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -249,6 +252,8 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), + // .cfg_rcb_status(cfg_rcb_status), + .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP /* * Status diff --git a/fpga/lib/pcie/example/AU280/fpga/tb/fpga_core/test_fpga_core.py b/fpga/lib/pcie/example/AU280/fpga/tb/fpga_core/test_fpga_core.py index f274d47a1..56b91ea37 100644 --- a/fpga/lib/pcie/example/AU280/fpga/tb/fpga_core/test_fpga_core.py +++ b/fpga/lib/pcie/example/AU280/fpga/tb/fpga_core/test_fpga_core.py @@ -167,7 +167,7 @@ class TB(object): # cfg_rx_pm_state # cfg_tx_pm_state # cfg_ltssm_state - # cfg_rcb_status + cfg_rcb_status=dut.cfg_rcb_status, # cfg_obff_enable # cfg_pl_status_change # cfg_tph_requester_enable diff --git a/fpga/lib/pcie/example/AU50/fpga/rtl/fpga.v b/fpga/lib/pcie/example/AU50/fpga/rtl/fpga.v index 0c2bfc3b5..f02aa7984 100644 --- a/fpga/lib/pcie/example/AU50/fpga/rtl/fpga.v +++ b/fpga/lib/pcie/example/AU50/fpga/rtl/fpga.v @@ -148,6 +148,7 @@ wire pcie_rq_seq_num_vld1; wire [2:0] cfg_max_payload; wire [2:0] cfg_max_read_req; +wire [3:0] cfg_rcb_status; wire [9:0] cfg_mgmt_addr; wire [7:0] cfg_mgmt_function_number; @@ -266,7 +267,7 @@ pcie4c_uscale_plus_inst ( .cfg_ltssm_state(), .cfg_rx_pm_state(), .cfg_tx_pm_state(), - .cfg_rcb_status(), + .cfg_rcb_status(cfg_rcb_status), .cfg_obff_enable(), .cfg_pl_status_change(), .cfg_tph_requester_enable(), @@ -412,7 +413,8 @@ core_inst ( .cfg_max_payload(cfg_max_payload), .cfg_max_read_req(cfg_max_read_req), - + .cfg_rcb_status(cfg_rcb_status), + .cfg_mgmt_addr(cfg_mgmt_addr), .cfg_mgmt_function_number(cfg_mgmt_function_number), .cfg_mgmt_write(cfg_mgmt_write), diff --git a/fpga/lib/pcie/example/AU50/fpga/rtl/fpga_core.v b/fpga/lib/pcie/example/AU50/fpga/rtl/fpga_core.v index 0a98d7397..1fff150c5 100644 --- a/fpga/lib/pcie/example/AU50/fpga/rtl/fpga_core.v +++ b/fpga/lib/pcie/example/AU50/fpga/rtl/fpga_core.v @@ -103,6 +103,7 @@ module fpga_core # input wire [2:0] cfg_max_payload, input wire [2:0] cfg_max_read_req, + input wire [3:0] cfg_rcb_status, output wire [9:0] cfg_mgmt_addr, output wire [7:0] cfg_mgmt_function_number, @@ -158,6 +159,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), + .READ_CPLH_FC_LIMIT(128), + .READ_CPLD_FC_LIMIT(2048), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -260,6 +263,8 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), + // .cfg_rcb_status(cfg_rcb_status), + .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP /* * Status diff --git a/fpga/lib/pcie/example/AU50/fpga/tb/fpga_core/test_fpga_core.py b/fpga/lib/pcie/example/AU50/fpga/tb/fpga_core/test_fpga_core.py index f274d47a1..56b91ea37 100644 --- a/fpga/lib/pcie/example/AU50/fpga/tb/fpga_core/test_fpga_core.py +++ b/fpga/lib/pcie/example/AU50/fpga/tb/fpga_core/test_fpga_core.py @@ -167,7 +167,7 @@ class TB(object): # cfg_rx_pm_state # cfg_tx_pm_state # cfg_ltssm_state - # cfg_rcb_status + cfg_rcb_status=dut.cfg_rcb_status, # cfg_obff_enable # cfg_pl_status_change # cfg_tph_requester_enable diff --git a/fpga/lib/pcie/example/ExaNIC_X10/fpga/rtl/fpga.v b/fpga/lib/pcie/example/ExaNIC_X10/fpga/rtl/fpga.v index 17054ed41..3ac20e3eb 100644 --- a/fpga/lib/pcie/example/ExaNIC_X10/fpga/rtl/fpga.v +++ b/fpga/lib/pcie/example/ExaNIC_X10/fpga/rtl/fpga.v @@ -141,6 +141,7 @@ wire pcie_rq_seq_num_vld; wire [2:0] cfg_max_payload; wire [2:0] cfg_max_read_req; +wire [3:0] cfg_rcb_status; wire [18:0] cfg_mgmt_addr; wire cfg_mgmt_write; @@ -249,7 +250,7 @@ pcie3_ultrascale_inst ( .cfg_local_error(), .cfg_ltr_enable(), .cfg_ltssm_state(), - .cfg_rcb_status(), + .cfg_rcb_status(cfg_rcb_status), .cfg_dpa_substate_change(), .cfg_obff_enable(), .cfg_pl_status_change(), @@ -401,6 +402,7 @@ core_inst ( .cfg_max_payload(cfg_max_payload), .cfg_max_read_req(cfg_max_read_req), + .cfg_rcb_status(cfg_rcb_status), .cfg_mgmt_addr(cfg_mgmt_addr), .cfg_mgmt_write(cfg_mgmt_write), diff --git a/fpga/lib/pcie/example/ExaNIC_X10/fpga/rtl/fpga_core.v b/fpga/lib/pcie/example/ExaNIC_X10/fpga/rtl/fpga_core.v index ae035c0da..d33065d87 100644 --- a/fpga/lib/pcie/example/ExaNIC_X10/fpga/rtl/fpga_core.v +++ b/fpga/lib/pcie/example/ExaNIC_X10/fpga/rtl/fpga_core.v @@ -101,6 +101,7 @@ module fpga_core # input wire [2:0] cfg_max_payload, input wire [2:0] cfg_max_read_req, + input wire [3:0] cfg_rcb_status, output wire [18:0] cfg_mgmt_addr, output wire cfg_mgmt_write, @@ -153,6 +154,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), + .READ_CPLH_FC_LIMIT(64), + .READ_CPLD_FC_LIMIT(992), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -255,6 +258,8 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), + // .cfg_rcb_status(cfg_rcb_status), + .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US PCIe HIP /* * Status diff --git a/fpga/lib/pcie/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py b/fpga/lib/pcie/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py index 6003b0e6c..8ce276b39 100644 --- a/fpga/lib/pcie/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py +++ b/fpga/lib/pcie/example/ExaNIC_X10/fpga/tb/fpga_core/test_fpga_core.py @@ -142,7 +142,7 @@ class TB(object): # cfg_rx_pm_state # cfg_tx_pm_state # cfg_ltssm_state - # cfg_rcb_status + cfg_rcb_status=dut.cfg_rcb_status, # cfg_obff_enable # cfg_pl_status_change # cfg_tph_requester_enable diff --git a/fpga/lib/pcie/example/ExaNIC_X25/fpga/rtl/fpga.v b/fpga/lib/pcie/example/ExaNIC_X25/fpga/rtl/fpga.v index 3f4c9895e..30463f830 100644 --- a/fpga/lib/pcie/example/ExaNIC_X25/fpga/rtl/fpga.v +++ b/fpga/lib/pcie/example/ExaNIC_X25/fpga/rtl/fpga.v @@ -143,6 +143,7 @@ wire pcie_rq_seq_num_vld1; wire [2:0] cfg_max_payload; wire [2:0] cfg_max_read_req; +wire [3:0] cfg_rcb_status; wire [9:0] cfg_mgmt_addr; wire [7:0] cfg_mgmt_function_number; @@ -261,7 +262,7 @@ pcie4_uscale_plus_inst ( .cfg_ltssm_state(), .cfg_rx_pm_state(), .cfg_tx_pm_state(), - .cfg_rcb_status(), + .cfg_rcb_status(cfg_rcb_status), .cfg_obff_enable(), .cfg_pl_status_change(), .cfg_tph_requester_enable(), @@ -407,7 +408,8 @@ core_inst ( .cfg_max_payload(cfg_max_payload), .cfg_max_read_req(cfg_max_read_req), - + .cfg_rcb_status(cfg_rcb_status), + .cfg_mgmt_addr(cfg_mgmt_addr), .cfg_mgmt_function_number(cfg_mgmt_function_number), .cfg_mgmt_write(cfg_mgmt_write), diff --git a/fpga/lib/pcie/example/ExaNIC_X25/fpga/rtl/fpga_core.v b/fpga/lib/pcie/example/ExaNIC_X25/fpga/rtl/fpga_core.v index d610ab3c5..08a1bef83 100644 --- a/fpga/lib/pcie/example/ExaNIC_X25/fpga/rtl/fpga_core.v +++ b/fpga/lib/pcie/example/ExaNIC_X25/fpga/rtl/fpga_core.v @@ -103,6 +103,7 @@ module fpga_core # input wire [2:0] cfg_max_payload, input wire [2:0] cfg_max_read_req, + input wire [3:0] cfg_rcb_status, output wire [9:0] cfg_mgmt_addr, output wire [7:0] cfg_mgmt_function_number, @@ -158,6 +159,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), + .READ_CPLH_FC_LIMIT(128), + .READ_CPLD_FC_LIMIT(2048), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -260,6 +263,8 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), + // .cfg_rcb_status(cfg_rcb_status), + .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP /* * Status diff --git a/fpga/lib/pcie/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py b/fpga/lib/pcie/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py index 82ab34e67..429c5adb1 100644 --- a/fpga/lib/pcie/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py +++ b/fpga/lib/pcie/example/ExaNIC_X25/fpga/tb/fpga_core/test_fpga_core.py @@ -167,7 +167,7 @@ class TB(object): # cfg_rx_pm_state # cfg_tx_pm_state # cfg_ltssm_state - # cfg_rcb_status + cfg_rcb_status=dut.cfg_rcb_status, # cfg_obff_enable # cfg_pl_status_change # cfg_tph_requester_enable diff --git a/fpga/lib/pcie/example/VCU108/fpga/rtl/fpga.v b/fpga/lib/pcie/example/VCU108/fpga/rtl/fpga.v index 4a25894ea..311bd933a 100644 --- a/fpga/lib/pcie/example/VCU108/fpga/rtl/fpga.v +++ b/fpga/lib/pcie/example/VCU108/fpga/rtl/fpga.v @@ -176,6 +176,7 @@ wire pcie_rq_seq_num_vld; wire [2:0] cfg_max_payload; wire [2:0] cfg_max_read_req; +wire [3:0] cfg_rcb_status; wire [18:0] cfg_mgmt_addr; wire cfg_mgmt_write; @@ -284,7 +285,7 @@ pcie3_ultrascale_inst ( .cfg_local_error(), .cfg_ltr_enable(), .cfg_ltssm_state(), - .cfg_rcb_status(), + .cfg_rcb_status(cfg_rcb_status), .cfg_dpa_substate_change(), .cfg_obff_enable(), .cfg_pl_status_change(), @@ -440,6 +441,7 @@ core_inst ( .cfg_max_payload(cfg_max_payload), .cfg_max_read_req(cfg_max_read_req), + .cfg_rcb_status(cfg_rcb_status), .cfg_mgmt_addr(cfg_mgmt_addr), .cfg_mgmt_write(cfg_mgmt_write), diff --git a/fpga/lib/pcie/example/VCU108/fpga/rtl/fpga_core.v b/fpga/lib/pcie/example/VCU108/fpga/rtl/fpga_core.v index 0ff250309..510ea1bba 100644 --- a/fpga/lib/pcie/example/VCU108/fpga/rtl/fpga_core.v +++ b/fpga/lib/pcie/example/VCU108/fpga/rtl/fpga_core.v @@ -105,6 +105,7 @@ module fpga_core # input wire [2:0] cfg_max_payload, input wire [2:0] cfg_max_read_req, + input wire [3:0] cfg_rcb_status, output wire [18:0] cfg_mgmt_addr, output wire cfg_mgmt_write, @@ -155,6 +156,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), + .READ_CPLH_FC_LIMIT(64), + .READ_CPLD_FC_LIMIT(992), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -257,6 +260,8 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), + // .cfg_rcb_status(cfg_rcb_status), + .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US PCIe HIP /* * Status diff --git a/fpga/lib/pcie/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py b/fpga/lib/pcie/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py index 38bff005c..623ba07af 100644 --- a/fpga/lib/pcie/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py +++ b/fpga/lib/pcie/example/VCU108/fpga/tb/fpga_core/test_fpga_core.py @@ -142,7 +142,7 @@ class TB(object): # cfg_rx_pm_state # cfg_tx_pm_state # cfg_ltssm_state - # cfg_rcb_status + cfg_rcb_status=dut.cfg_rcb_status, # cfg_obff_enable # cfg_pl_status_change # cfg_tph_requester_enable diff --git a/fpga/lib/pcie/example/VCU118/fpga/rtl/fpga.v b/fpga/lib/pcie/example/VCU118/fpga/rtl/fpga.v index f9699592c..eceb81788 100644 --- a/fpga/lib/pcie/example/VCU118/fpga/rtl/fpga.v +++ b/fpga/lib/pcie/example/VCU118/fpga/rtl/fpga.v @@ -178,6 +178,7 @@ wire pcie_rq_seq_num_vld1; wire [2:0] cfg_max_payload; wire [2:0] cfg_max_read_req; +wire [3:0] cfg_rcb_status; wire [9:0] cfg_mgmt_addr; wire [7:0] cfg_mgmt_function_number; @@ -296,7 +297,7 @@ pcie4_uscale_plus_inst ( .cfg_ltssm_state(), .cfg_rx_pm_state(), .cfg_tx_pm_state(), - .cfg_rcb_status(), + .cfg_rcb_status(cfg_rcb_status), .cfg_obff_enable(), .cfg_pl_status_change(), .cfg_tph_requester_enable(), @@ -446,7 +447,8 @@ core_inst ( .cfg_max_payload(cfg_max_payload), .cfg_max_read_req(cfg_max_read_req), - + .cfg_rcb_status(cfg_rcb_status), + .cfg_mgmt_addr(cfg_mgmt_addr), .cfg_mgmt_function_number(cfg_mgmt_function_number), .cfg_mgmt_write(cfg_mgmt_write), diff --git a/fpga/lib/pcie/example/VCU118/fpga/rtl/fpga_core.v b/fpga/lib/pcie/example/VCU118/fpga/rtl/fpga_core.v index eea8f0820..573ce020f 100644 --- a/fpga/lib/pcie/example/VCU118/fpga/rtl/fpga_core.v +++ b/fpga/lib/pcie/example/VCU118/fpga/rtl/fpga_core.v @@ -107,6 +107,7 @@ module fpga_core # input wire [2:0] cfg_max_payload, input wire [2:0] cfg_max_read_req, + input wire [3:0] cfg_rcb_status, output wire [9:0] cfg_mgmt_addr, output wire [7:0] cfg_mgmt_function_number, @@ -160,6 +161,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), + .READ_CPLH_FC_LIMIT(128), + .READ_CPLD_FC_LIMIT(2048), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -262,6 +265,8 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), + // .cfg_rcb_status(cfg_rcb_status), + .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP /* * Status diff --git a/fpga/lib/pcie/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py b/fpga/lib/pcie/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py index c021857dd..35d3440fc 100644 --- a/fpga/lib/pcie/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py +++ b/fpga/lib/pcie/example/VCU118/fpga/tb/fpga_core/test_fpga_core.py @@ -167,7 +167,7 @@ class TB(object): # cfg_rx_pm_state # cfg_tx_pm_state # cfg_ltssm_state - # cfg_rcb_status + cfg_rcb_status=dut.cfg_rcb_status, # cfg_obff_enable # cfg_pl_status_change # cfg_tph_requester_enable diff --git a/fpga/lib/pcie/example/VCU1525/fpga/rtl/fpga.v b/fpga/lib/pcie/example/VCU1525/fpga/rtl/fpga.v index 69a96f6ad..426b56c8c 100644 --- a/fpga/lib/pcie/example/VCU1525/fpga/rtl/fpga.v +++ b/fpga/lib/pcie/example/VCU1525/fpga/rtl/fpga.v @@ -158,6 +158,7 @@ wire pcie_rq_seq_num_vld1; wire [2:0] cfg_max_payload; wire [2:0] cfg_max_read_req; +wire [3:0] cfg_rcb_status; wire [9:0] cfg_mgmt_addr; wire [7:0] cfg_mgmt_function_number; @@ -276,7 +277,7 @@ pcie4_uscale_plus_inst ( .cfg_ltssm_state(), .cfg_rx_pm_state(), .cfg_tx_pm_state(), - .cfg_rcb_status(), + .cfg_rcb_status(cfg_rcb_status), .cfg_obff_enable(), .cfg_pl_status_change(), .cfg_tph_requester_enable(), @@ -421,7 +422,8 @@ core_inst ( .cfg_max_payload(cfg_max_payload), .cfg_max_read_req(cfg_max_read_req), - + .cfg_rcb_status(cfg_rcb_status), + .cfg_mgmt_addr(cfg_mgmt_addr), .cfg_mgmt_function_number(cfg_mgmt_function_number), .cfg_mgmt_write(cfg_mgmt_write), diff --git a/fpga/lib/pcie/example/VCU1525/fpga/rtl/fpga_core.v b/fpga/lib/pcie/example/VCU1525/fpga/rtl/fpga_core.v index 595baa235..39d8ab260 100644 --- a/fpga/lib/pcie/example/VCU1525/fpga/rtl/fpga_core.v +++ b/fpga/lib/pcie/example/VCU1525/fpga/rtl/fpga_core.v @@ -102,6 +102,7 @@ module fpga_core # input wire [2:0] cfg_max_payload, input wire [2:0] cfg_max_read_req, + input wire [3:0] cfg_rcb_status, output wire [9:0] cfg_mgmt_addr, output wire [7:0] cfg_mgmt_function_number, @@ -155,6 +156,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), + .READ_CPLH_FC_LIMIT(128), + .READ_CPLD_FC_LIMIT(2048), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -257,6 +260,8 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), + // .cfg_rcb_status(cfg_rcb_status), + .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP /* * Status diff --git a/fpga/lib/pcie/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py b/fpga/lib/pcie/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py index 6b5cd01c2..5508fb415 100644 --- a/fpga/lib/pcie/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py +++ b/fpga/lib/pcie/example/VCU1525/fpga/tb/fpga_core/test_fpga_core.py @@ -167,7 +167,7 @@ class TB(object): # cfg_rx_pm_state # cfg_tx_pm_state # cfg_ltssm_state - # cfg_rcb_status + cfg_rcb_status=dut.cfg_rcb_status, # cfg_obff_enable # cfg_pl_status_change # cfg_tph_requester_enable diff --git a/fpga/lib/pcie/example/ZCU106/fpga/rtl/fpga.v b/fpga/lib/pcie/example/ZCU106/fpga/rtl/fpga.v index da08eb111..6e9f9e77d 100644 --- a/fpga/lib/pcie/example/ZCU106/fpga/rtl/fpga.v +++ b/fpga/lib/pcie/example/ZCU106/fpga/rtl/fpga.v @@ -178,6 +178,7 @@ wire pcie_rq_seq_num_vld1; wire [2:0] cfg_max_payload; wire [2:0] cfg_max_read_req; +wire [3:0] cfg_rcb_status; wire [9:0] cfg_mgmt_addr; wire [7:0] cfg_mgmt_function_number; @@ -296,7 +297,7 @@ pcie4_uscale_plus_inst ( .cfg_ltssm_state(), .cfg_rx_pm_state(), .cfg_tx_pm_state(), - .cfg_rcb_status(), + .cfg_rcb_status(cfg_rcb_status), .cfg_obff_enable(), .cfg_pl_status_change(), .cfg_tph_requester_enable(), @@ -446,7 +447,8 @@ core_inst ( .cfg_max_payload(cfg_max_payload), .cfg_max_read_req(cfg_max_read_req), - + .cfg_rcb_status(cfg_rcb_status), + .cfg_mgmt_addr(cfg_mgmt_addr), .cfg_mgmt_function_number(cfg_mgmt_function_number), .cfg_mgmt_write(cfg_mgmt_write), diff --git a/fpga/lib/pcie/example/ZCU106/fpga/rtl/fpga_core.v b/fpga/lib/pcie/example/ZCU106/fpga/rtl/fpga_core.v index ab2acae02..b3879c240 100644 --- a/fpga/lib/pcie/example/ZCU106/fpga/rtl/fpga_core.v +++ b/fpga/lib/pcie/example/ZCU106/fpga/rtl/fpga_core.v @@ -107,6 +107,7 @@ module fpga_core # input wire [2:0] cfg_max_payload, input wire [2:0] cfg_max_read_req, + input wire [3:0] cfg_rcb_status, output wire [9:0] cfg_mgmt_addr, output wire [7:0] cfg_mgmt_function_number, @@ -160,6 +161,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), + .READ_CPLH_FC_LIMIT(128), + .READ_CPLD_FC_LIMIT(2048), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -262,6 +265,8 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), + // .cfg_rcb_status(cfg_rcb_status), + .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP /* * Status diff --git a/fpga/lib/pcie/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py b/fpga/lib/pcie/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py index 8f9c83a56..e471e8229 100644 --- a/fpga/lib/pcie/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py +++ b/fpga/lib/pcie/example/ZCU106/fpga/tb/fpga_core/test_fpga_core.py @@ -167,7 +167,7 @@ class TB(object): # cfg_rx_pm_state # cfg_tx_pm_state # cfg_ltssm_state - # cfg_rcb_status + cfg_rcb_status=dut.cfg_rcb_status, # cfg_obff_enable # cfg_pl_status_change # cfg_tph_requester_enable diff --git a/fpga/lib/pcie/example/common/rtl/example_core_pcie.v b/fpga/lib/pcie/example/common/rtl/example_core_pcie.v index 3d0805a0c..db710450c 100644 --- a/fpga/lib/pcie/example/common/rtl/example_core_pcie.v +++ b/fpga/lib/pcie/example/common/rtl/example_core_pcie.v @@ -57,6 +57,10 @@ module example_core_pcie # parameter READ_OP_TABLE_SIZE = PCIE_TAG_COUNT, // In-flight transmit limit (read) parameter READ_TX_LIMIT = 2**TX_SEQ_NUM_WIDTH, + // Completion header flow control credit limit (read) + parameter READ_CPLH_FC_LIMIT = 0, + // Completion data flow control credit limit (read) + parameter READ_CPLD_FC_LIMIT = READ_CPLH_FC_LIMIT*4, // Operation table size (write) parameter WRITE_OP_TABLE_SIZE = 2**TX_SEQ_NUM_WIDTH, // In-flight transmit limit (write) @@ -158,6 +162,7 @@ module example_core_pcie # */ input wire [7:0] bus_num, input wire ext_tag_enable, + input wire rcb_128b, input wire [2:0] max_read_request_size, input wire [2:0] max_payload_size, input wire msix_enable, @@ -784,6 +789,8 @@ dma_if_pcie #( .TAG_WIDTH(DMA_TAG_WIDTH), .READ_OP_TABLE_SIZE(READ_OP_TABLE_SIZE), .READ_TX_LIMIT(READ_TX_LIMIT), + .READ_CPLH_FC_LIMIT(READ_CPLH_FC_LIMIT), + .READ_CPLD_FC_LIMIT(READ_CPLD_FC_LIMIT), .WRITE_OP_TABLE_SIZE(WRITE_OP_TABLE_SIZE), .WRITE_TX_LIMIT(WRITE_TX_LIMIT), .TLP_FORCE_64_BIT_ADDR(TLP_FORCE_64_BIT_ADDR), @@ -896,6 +903,7 @@ dma_if_pcie_inst ( .read_enable(1'b1), .write_enable(1'b1), .ext_tag_enable(ext_tag_enable), + .rcb_128b(rcb_128b), .requester_id({bus_num, 5'd0, 3'd0}), .max_read_request_size(max_read_request_size), .max_payload_size(max_payload_size), @@ -903,6 +911,8 @@ dma_if_pcie_inst ( /* * Status */ + .status_rd_busy(), + .status_wr_busy(), .status_error_cor(status_error_cor_int[3]), .status_error_uncor(status_error_uncor_int[3]) ); diff --git a/fpga/lib/pcie/example/common/rtl/example_core_pcie_ptile.v b/fpga/lib/pcie/example/common/rtl/example_core_pcie_ptile.v index bbd6671c6..877b2d351 100644 --- a/fpga/lib/pcie/example/common/rtl/example_core_pcie_ptile.v +++ b/fpga/lib/pcie/example/common/rtl/example_core_pcie_ptile.v @@ -57,6 +57,10 @@ module example_core_pcie_ptile # parameter READ_OP_TABLE_SIZE = PCIE_TAG_COUNT, // In-flight transmit limit (read) parameter READ_TX_LIMIT = 2**TX_SEQ_NUM_WIDTH, + // Completion header flow control credit limit (read) + parameter READ_CPLH_FC_LIMIT = 1144, + // Completion data flow control credit limit (read) + parameter READ_CPLD_FC_LIMIT = 2888, // Operation table size (write) parameter WRITE_OP_TABLE_SIZE = 2**TX_SEQ_NUM_WIDTH, // In-flight transmit limit (write) @@ -189,6 +193,7 @@ wire pcie_tx_msix_wr_req_tlp_eop; wire pcie_tx_msix_wr_req_tlp_ready; wire ext_tag_enable; +wire rcb_128b; wire [7:0] bus_num; wire [2:0] max_read_request_size; wire [2:0] max_payload_size; @@ -356,6 +361,7 @@ pcie_ptile_if_inst ( * Configuration outputs */ .ext_tag_enable(ext_tag_enable), + .rcb_128b(rcb_128b), .bus_num(bus_num), .max_read_request_size(max_read_request_size), .max_payload_size(max_payload_size), @@ -376,6 +382,8 @@ example_core_pcie #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(READ_OP_TABLE_SIZE), .READ_TX_LIMIT(READ_TX_LIMIT), + .READ_CPLH_FC_LIMIT(READ_CPLH_FC_LIMIT), + .READ_CPLD_FC_LIMIT(READ_CPLD_FC_LIMIT), .WRITE_OP_TABLE_SIZE(WRITE_OP_TABLE_SIZE), .WRITE_TX_LIMIT(WRITE_TX_LIMIT), .TLP_FORCE_64_BIT_ADDR(0), @@ -470,6 +478,7 @@ core_pcie_inst ( */ .bus_num(bus_num), .ext_tag_enable(ext_tag_enable), + .rcb_128b(rcb_128b), .max_read_request_size(max_read_request_size), .max_payload_size(max_payload_size), .msix_enable(msix_enable), diff --git a/fpga/lib/pcie/example/common/rtl/example_core_pcie_s10.v b/fpga/lib/pcie/example/common/rtl/example_core_pcie_s10.v index c539ab89b..c51ec3ce1 100644 --- a/fpga/lib/pcie/example/common/rtl/example_core_pcie_s10.v +++ b/fpga/lib/pcie/example/common/rtl/example_core_pcie_s10.v @@ -55,6 +55,10 @@ module example_core_pcie_s10 # parameter READ_OP_TABLE_SIZE = PCIE_TAG_COUNT, // In-flight transmit limit (read) parameter READ_TX_LIMIT = 2**TX_SEQ_NUM_WIDTH, + // Completion header flow control credit limit (read) + parameter READ_CPLH_FC_LIMIT = 770, + // Completion data flow control credit limit (read) + parameter READ_CPLD_FC_LIMIT = 2500, // Operation table size (write) parameter WRITE_OP_TABLE_SIZE = 2**TX_SEQ_NUM_WIDTH, // In-flight transmit limit (write) @@ -183,6 +187,7 @@ wire pcie_tx_msix_wr_req_tlp_eop; wire pcie_tx_msix_wr_req_tlp_ready; wire ext_tag_enable; +wire rcb_128b; wire [7:0] bus_num; wire [2:0] max_read_request_size; wire [2:0] max_payload_size; @@ -358,6 +363,7 @@ pcie_s10_if_inst ( * Configuration outputs */ .ext_tag_enable(ext_tag_enable), + .rcb_128b(rcb_128b), .bus_num(bus_num), .max_read_request_size(max_read_request_size), .max_payload_size(max_payload_size), @@ -383,6 +389,8 @@ example_core_pcie #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(READ_OP_TABLE_SIZE), .READ_TX_LIMIT(READ_TX_LIMIT), + .READ_CPLH_FC_LIMIT(READ_CPLH_FC_LIMIT), + .READ_CPLD_FC_LIMIT(READ_CPLD_FC_LIMIT), .WRITE_OP_TABLE_SIZE(WRITE_OP_TABLE_SIZE), .WRITE_TX_LIMIT(WRITE_TX_LIMIT), .TLP_FORCE_64_BIT_ADDR(0), @@ -477,6 +485,7 @@ core_pcie_inst ( */ .bus_num(bus_num), .ext_tag_enable(ext_tag_enable), + .rcb_128b(rcb_128b), .max_read_request_size(max_read_request_size), .max_payload_size(max_payload_size), .msix_enable(msix_enable), diff --git a/fpga/lib/pcie/example/common/rtl/example_core_pcie_us.v b/fpga/lib/pcie/example/common/rtl/example_core_pcie_us.v index 7db3108c4..c8fe2cfcf 100644 --- a/fpga/lib/pcie/example/common/rtl/example_core_pcie_us.v +++ b/fpga/lib/pcie/example/common/rtl/example_core_pcie_us.v @@ -67,6 +67,10 @@ module example_core_pcie_us # parameter READ_OP_TABLE_SIZE = PCIE_TAG_COUNT, // In-flight transmit limit (read) parameter READ_TX_LIMIT = 2**(RQ_SEQ_NUM_WIDTH-1), + // Completion header flow control credit limit (read) + parameter READ_CPLH_FC_LIMIT = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 64 : 128, + // Completion data flow control credit limit (read) + parameter READ_CPLD_FC_LIMIT = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 992 : 2048, // Operation table size (write) parameter WRITE_OP_TABLE_SIZE = 2**(RQ_SEQ_NUM_WIDTH-1), // In-flight transmit limit (write) @@ -147,6 +151,7 @@ module example_core_pcie_us # */ input wire [2:0] cfg_max_read_req, input wire [2:0] cfg_max_payload, + input wire [3:0] cfg_rcb_status, /* * Configuration flow control interface @@ -513,6 +518,8 @@ example_core_pcie #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(READ_OP_TABLE_SIZE), .READ_TX_LIMIT(READ_TX_LIMIT), + .READ_CPLH_FC_LIMIT(READ_CPLH_FC_LIMIT), + .READ_CPLD_FC_LIMIT(READ_CPLD_FC_LIMIT), .WRITE_OP_TABLE_SIZE(WRITE_OP_TABLE_SIZE), .WRITE_TX_LIMIT(WRITE_TX_LIMIT), .TLP_FORCE_64_BIT_ADDR(1), @@ -607,6 +614,7 @@ core_pcie_inst ( */ .bus_num(8'd0), .ext_tag_enable(ext_tag_enable), + .rcb_128b(cfg_rcb_status[0]), .max_read_request_size(cfg_max_read_req), .max_payload_size(cfg_max_payload), .msix_enable(msix_enable), diff --git a/fpga/lib/pcie/example/common/tb/example_core_pcie/Makefile b/fpga/lib/pcie/example/common/tb/example_core_pcie/Makefile index 2c6986bac..590b9f67f 100644 --- a/fpga/lib/pcie/example/common/tb/example_core_pcie/Makefile +++ b/fpga/lib/pcie/example/common/tb/example_core_pcie/Makefile @@ -60,6 +60,8 @@ export PARAM_IMM_ENABLE := 1 export PARAM_IMM_WIDTH := 32 export PARAM_READ_OP_TABLE_SIZE := $(PARAM_PCIE_TAG_COUNT) export PARAM_READ_TX_LIMIT := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" ) +export PARAM_READ_CPLH_FC_LIMIT := 0 +export PARAM_READ_CPLD_FC_LIMIT := $(shell expr $(PARAM_READ_CPLH_FC_LIMIT) \* 4 ) export PARAM_WRITE_OP_TABLE_SIZE := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" ) export PARAM_WRITE_TX_LIMIT := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" ) export PARAM_TLP_FORCE_64_BIT_ADDR := 0 diff --git a/fpga/lib/pcie/example/common/tb/example_core_pcie/test_example_core_pcie.py b/fpga/lib/pcie/example/common/tb/example_core_pcie/test_example_core_pcie.py index 31c47a4da..70f1b9813 100644 --- a/fpga/lib/pcie/example/common/tb/example_core_pcie/test_example_core_pcie.py +++ b/fpga/lib/pcie/example/common/tb/example_core_pcie/test_example_core_pcie.py @@ -121,6 +121,7 @@ class TB(object): cfg_max_payload=dut.max_payload_size, cfg_max_read_req=dut.max_read_request_size, cfg_ext_tag_enable=dut.ext_tag_enable, + cfg_rcb=dut.rcb_128b, ) self.dev.log.setLevel(logging.DEBUG) @@ -422,6 +423,8 @@ def test_example_core_pcie(request, pcie_data_width): parameters['IMM_WIDTH'] = 32 parameters['READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT'] parameters['READ_TX_LIMIT'] = 2**parameters['TX_SEQ_NUM_WIDTH'] + parameters['READ_CPLH_FC_LIMIT'] = 0 + parameters['READ_CPLD_FC_LIMIT'] = parameters['READ_CPLH_FC_LIMIT']*4 parameters['WRITE_OP_TABLE_SIZE'] = 2**parameters['TX_SEQ_NUM_WIDTH'] parameters['WRITE_TX_LIMIT'] = 2**parameters['TX_SEQ_NUM_WIDTH'] parameters['TLP_FORCE_64_BIT_ADDR'] = 0 diff --git a/fpga/lib/pcie/example/common/tb/example_core_pcie_ptile/Makefile b/fpga/lib/pcie/example/common/tb/example_core_pcie_ptile/Makefile index 21d5fe0c3..dbd01170c 100644 --- a/fpga/lib/pcie/example/common/tb/example_core_pcie_ptile/Makefile +++ b/fpga/lib/pcie/example/common/tb/example_core_pcie_ptile/Makefile @@ -70,6 +70,8 @@ export PARAM_IMM_ENABLE := 1 export PARAM_IMM_WIDTH := 32 export PARAM_READ_OP_TABLE_SIZE := $(PARAM_PCIE_TAG_COUNT) export PARAM_READ_TX_LIMIT := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" ) +export PARAM_READ_CPLH_FC_LIMIT := 1144 +export PARAM_READ_CPLD_FC_LIMIT := 2888 export PARAM_WRITE_OP_TABLE_SIZE := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" ) export PARAM_WRITE_TX_LIMIT := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" ) export PARAM_BAR0_APERTURE := 24 diff --git a/fpga/lib/pcie/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py b/fpga/lib/pcie/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py index f49a83f10..b82024cc6 100644 --- a/fpga/lib/pcie/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py +++ b/fpga/lib/pcie/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py @@ -518,6 +518,8 @@ def test_example_core_pcie_ptile(request, data_width): parameters['IMM_WIDTH'] = 32 parameters['READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT'] parameters['READ_TX_LIMIT'] = 2**parameters['TX_SEQ_NUM_WIDTH'] + parameters['READ_CPLH_FC_LIMIT'] = 1144 + parameters['READ_CPLD_FC_LIMIT'] = 2888 parameters['WRITE_OP_TABLE_SIZE'] = 2**parameters['TX_SEQ_NUM_WIDTH'] parameters['WRITE_TX_LIMIT'] = 2**parameters['TX_SEQ_NUM_WIDTH'] parameters['BAR0_APERTURE'] = 24 diff --git a/fpga/lib/pcie/example/common/tb/example_core_pcie_s10/Makefile b/fpga/lib/pcie/example/common/tb/example_core_pcie_s10/Makefile index 82abc6534..6b629949c 100644 --- a/fpga/lib/pcie/example/common/tb/example_core_pcie_s10/Makefile +++ b/fpga/lib/pcie/example/common/tb/example_core_pcie_s10/Makefile @@ -68,6 +68,8 @@ export PARAM_IMM_ENABLE := 1 export PARAM_IMM_WIDTH := 32 export PARAM_READ_OP_TABLE_SIZE := $(PARAM_PCIE_TAG_COUNT) export PARAM_READ_TX_LIMIT := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" ) +export PARAM_READ_CPLH_FC_LIMIT := 770 +export PARAM_READ_CPLD_FC_LIMIT := 2500 export PARAM_WRITE_OP_TABLE_SIZE := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" ) export PARAM_WRITE_TX_LIMIT := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" ) export PARAM_BAR0_APERTURE := 24 diff --git a/fpga/lib/pcie/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py b/fpga/lib/pcie/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py index dd87411d4..b74f4a58e 100644 --- a/fpga/lib/pcie/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py +++ b/fpga/lib/pcie/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py @@ -465,6 +465,8 @@ def test_example_core_pcie_s10(request, data_width, l_tile): parameters['IMM_WIDTH'] = 32 parameters['READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT'] parameters['READ_TX_LIMIT'] = 2**parameters['TX_SEQ_NUM_WIDTH'] + parameters['READ_CPLH_FC_LIMIT'] = 770 + parameters['READ_CPLD_FC_LIMIT'] = 2500 parameters['WRITE_OP_TABLE_SIZE'] = 2**parameters['TX_SEQ_NUM_WIDTH'] parameters['WRITE_TX_LIMIT'] = 2**parameters['TX_SEQ_NUM_WIDTH'] parameters['BAR0_APERTURE'] = 24 diff --git a/fpga/lib/pcie/example/common/tb/example_core_pcie_us/Makefile b/fpga/lib/pcie/example/common/tb/example_core_pcie_us/Makefile index fe26130c3..4f6ced3e1 100644 --- a/fpga/lib/pcie/example/common/tb/example_core_pcie_us/Makefile +++ b/fpga/lib/pcie/example/common/tb/example_core_pcie_us/Makefile @@ -74,6 +74,8 @@ export PARAM_IMM_ENABLE := 1 export PARAM_IMM_WIDTH := 32 export PARAM_READ_OP_TABLE_SIZE := $(PARAM_PCIE_TAG_COUNT) export PARAM_READ_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" ) +export PARAM_READ_CPLH_FC_LIMIT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),64,128) +export PARAM_READ_CPLD_FC_LIMIT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),992,2048) export PARAM_WRITE_OP_TABLE_SIZE := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" ) export PARAM_WRITE_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" ) export PARAM_BAR0_APERTURE := 24 diff --git a/fpga/lib/pcie/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py b/fpga/lib/pcie/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py index 8ff747a35..e728be19c 100644 --- a/fpga/lib/pcie/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py +++ b/fpga/lib/pcie/example/common/tb/example_core_pcie_us/test_example_core_pcie_us.py @@ -168,7 +168,7 @@ class TB(object): # cfg_rx_pm_state # cfg_tx_pm_state # cfg_ltssm_state - # cfg_rcb_status + cfg_rcb_status=dut.cfg_rcb_status, # cfg_obff_enable # cfg_pl_status_change # cfg_tph_requester_enable @@ -566,6 +566,8 @@ def test_example_core_pcie_us(request, axis_pcie_data_width, straddle): parameters['IMM_WIDTH'] = 32 parameters['READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT'] parameters['READ_TX_LIMIT'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1) + parameters['READ_CPLH_FC_LIMIT'] = 64 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 128 + parameters['READ_CPLD_FC_LIMIT'] = 992 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 2048 parameters['WRITE_OP_TABLE_SIZE'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1) parameters['WRITE_TX_LIMIT'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1) parameters['BAR0_APERTURE'] = 24 diff --git a/fpga/lib/pcie/example/fb2CG/fpga/rtl/fpga.v b/fpga/lib/pcie/example/fb2CG/fpga/rtl/fpga.v index f2e16408c..78db5d5e9 100644 --- a/fpga/lib/pcie/example/fb2CG/fpga/rtl/fpga.v +++ b/fpga/lib/pcie/example/fb2CG/fpga/rtl/fpga.v @@ -145,6 +145,7 @@ wire pcie_rq_seq_num_vld1; wire [2:0] cfg_max_payload; wire [2:0] cfg_max_read_req; +wire [3:0] cfg_rcb_status; wire [9:0] cfg_mgmt_addr; wire [7:0] cfg_mgmt_function_number; @@ -263,7 +264,7 @@ pcie4_uscale_plus_inst ( .cfg_ltssm_state(), .cfg_rx_pm_state(), .cfg_tx_pm_state(), - .cfg_rcb_status(), + .cfg_rcb_status(cfg_rcb_status), .cfg_obff_enable(), .cfg_pl_status_change(), .cfg_tph_requester_enable(), @@ -448,7 +449,8 @@ core_inst ( .cfg_max_payload(cfg_max_payload), .cfg_max_read_req(cfg_max_read_req), - + .cfg_rcb_status(cfg_rcb_status), + .cfg_mgmt_addr(cfg_mgmt_addr), .cfg_mgmt_function_number(cfg_mgmt_function_number), .cfg_mgmt_write(cfg_mgmt_write), diff --git a/fpga/lib/pcie/example/fb2CG/fpga/rtl/fpga_core.v b/fpga/lib/pcie/example/fb2CG/fpga/rtl/fpga_core.v index 71b284bf0..10f534459 100644 --- a/fpga/lib/pcie/example/fb2CG/fpga/rtl/fpga_core.v +++ b/fpga/lib/pcie/example/fb2CG/fpga/rtl/fpga_core.v @@ -104,6 +104,7 @@ module fpga_core # input wire [2:0] cfg_max_payload, input wire [2:0] cfg_max_read_req, + input wire [3:0] cfg_rcb_status, output wire [9:0] cfg_mgmt_addr, output wire [7:0] cfg_mgmt_function_number, @@ -160,6 +161,8 @@ example_core_pcie_us #( .PCIE_TAG_COUNT(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), + .READ_CPLH_FC_LIMIT(128), + .READ_CPLD_FC_LIMIT(2048), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .BAR0_APERTURE(BAR0_APERTURE), @@ -262,6 +265,8 @@ example_core_pcie_us_inst ( */ .cfg_max_read_req(cfg_max_read_req), .cfg_max_payload(cfg_max_payload), + // .cfg_rcb_status(cfg_rcb_status), + .cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP /* * Status diff --git a/fpga/lib/pcie/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py b/fpga/lib/pcie/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py index f274d47a1..56b91ea37 100644 --- a/fpga/lib/pcie/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py +++ b/fpga/lib/pcie/example/fb2CG/fpga/tb/fpga_core/test_fpga_core.py @@ -167,7 +167,7 @@ class TB(object): # cfg_rx_pm_state # cfg_tx_pm_state # cfg_ltssm_state - # cfg_rcb_status + cfg_rcb_status=dut.cfg_rcb_status, # cfg_obff_enable # cfg_pl_status_change # cfg_tph_requester_enable diff --git a/fpga/lib/pcie/rtl/dma_if_axi.v b/fpga/lib/pcie/rtl/dma_if_axi.v index 582635ca1..4465485af 100644 --- a/fpga/lib/pcie/rtl/dma_if_axi.v +++ b/fpga/lib/pcie/rtl/dma_if_axi.v @@ -177,6 +177,12 @@ module dma_if_axi # input wire read_enable, input wire write_enable, + /* + * Status + */ + output wire status_rd_busy, + output wire status_wr_busy, + /* * Statistics */ @@ -285,6 +291,11 @@ dma_if_axi_rd_inst ( */ .enable(read_enable), + /* + * Status + */ + .status_busy(status_rd_busy), + /* * Statistics */ @@ -386,6 +397,11 @@ dma_if_axi_wr_inst ( */ .enable(write_enable), + /* + * Status + */ + .status_busy(status_wr_busy), + /* * Statistics */ diff --git a/fpga/lib/pcie/rtl/dma_if_axi_rd.v b/fpga/lib/pcie/rtl/dma_if_axi_rd.v index bdeb4b73d..b80e52328 100644 --- a/fpga/lib/pcie/rtl/dma_if_axi_rd.v +++ b/fpga/lib/pcie/rtl/dma_if_axi_rd.v @@ -122,6 +122,11 @@ module dma_if_axi_rd # */ input wire enable, + /* + * Status + */ + output wire status_busy, + /* * Statistics */ @@ -291,6 +296,10 @@ reg status_fifo_rd_finish_reg = 1'b0, status_fifo_rd_finish_next; reg [3:0] status_fifo_rd_error_reg = 4'd0, status_fifo_rd_error_next; reg status_fifo_rd_valid_reg = 1'b0, status_fifo_rd_valid_next; +reg [OP_TAG_WIDTH+1-1:0] active_op_count_reg = 0; +reg inc_active_op; +reg dec_active_op; + reg [AXI_DATA_WIDTH-1:0] m_axi_rdata_int_reg = {AXI_DATA_WIDTH{1'b0}}, m_axi_rdata_int_next; reg m_axi_rvalid_int_reg = 1'b0, m_axi_rvalid_int_next; @@ -306,6 +315,8 @@ reg [TAG_WIDTH-1:0] m_axis_read_desc_status_tag_reg = {TAG_WIDTH{1'b0}}, m_axis_ reg [3:0] m_axis_read_desc_status_error_reg = 4'd0, m_axis_read_desc_status_error_next; reg m_axis_read_desc_status_valid_reg = 1'b0, m_axis_read_desc_status_valid_next; +reg status_busy_reg = 1'b0; + reg [OP_TAG_WIDTH-1:0] stat_rd_op_start_tag_reg = 0, stat_rd_op_start_tag_next; reg [LEN_WIDTH-1:0] stat_rd_op_start_len_reg = 0, stat_rd_op_start_len_next; reg stat_rd_op_start_valid_reg = 1'b0, stat_rd_op_start_valid_next; @@ -349,6 +360,8 @@ assign m_axis_read_desc_status_tag = m_axis_read_desc_status_tag_reg; assign m_axis_read_desc_status_error = m_axis_read_desc_status_error_reg; assign m_axis_read_desc_status_valid = m_axis_read_desc_status_valid_reg; +assign status_busy = status_busy_reg; + assign stat_rd_op_start_tag = stat_rd_op_start_tag_reg; assign stat_rd_op_start_len = stat_rd_op_start_len_reg; assign stat_rd_op_start_valid = stat_rd_op_start_valid_reg; @@ -467,6 +480,8 @@ always @* begin op_table_start_last = 0; op_table_start_en = 1'b0; + inc_active_op = 1'b0; + // segmentation and request generation case (req_state_reg) REQ_STATE_IDLE: begin @@ -532,6 +547,7 @@ always @* begin op_table_start_cycle_count = (req_tr_count_next + (req_axi_addr_reg & OFFSET_MASK) - 1) >> AXI_BURST_SIZE; op_table_start_last = req_op_count_reg == req_tr_count_next; op_table_start_en = 1'b1; + inc_active_op = 1'b1; stat_rd_req_start_tag_next = op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]; stat_rd_req_start_len_next = req_zero_len_reg ? 0 : req_tr_count_reg; @@ -836,6 +852,7 @@ always @* begin // commit operations in-order op_table_finish_en = 1'b0; + dec_active_op = 1'b0; if (m_axis_read_desc_status_valid_reg) begin m_axis_read_desc_status_error_next = DMA_ERROR_NONE; @@ -852,6 +869,7 @@ always @* begin if (op_table_active[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]] && op_table_write_complete[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]] && op_table_finish_ptr_reg != op_table_start_ptr_reg) begin op_table_finish_en = 1'b1; + dec_active_op = 1'b1; if (op_table_error_a[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]] != op_table_error_b[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]) begin m_axis_read_desc_status_error_next = op_table_error_code[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]; @@ -910,6 +928,8 @@ always @(posedge clk) begin m_axis_read_desc_status_error_reg <= m_axis_read_desc_status_error_next; m_axis_read_desc_status_valid_reg <= m_axis_read_desc_status_valid_next; + status_busy_reg <= active_op_count_reg != 0; + stat_rd_op_start_tag_reg <= stat_rd_op_start_tag_next; stat_rd_op_start_len_reg <= stat_rd_op_start_len_next; stat_rd_op_start_valid_reg <= stat_rd_op_start_valid_next; @@ -947,6 +967,8 @@ always @(posedge clk) begin status_fifo_half_full_reg <= $unsigned(status_fifo_wr_ptr_reg - status_fifo_rd_ptr_reg) >= 2**(STATUS_FIFO_ADDR_WIDTH-1); + active_op_count_reg <= active_op_count_reg + inc_active_op - dec_active_op; + if (op_table_start_en) begin op_table_start_ptr_reg <= op_table_start_ptr_reg + 1; op_table_active[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] <= 1'b1; @@ -995,6 +1017,8 @@ always @(posedge clk) begin m_axis_read_desc_status_error_reg = 4'd0; m_axis_read_desc_status_valid_reg <= 1'b0; + status_busy_reg <= 1'b0; + stat_rd_op_start_tag_reg <= 0; stat_rd_op_start_valid_reg <= 1'b0; stat_rd_op_finish_tag_reg <= 0; @@ -1009,6 +1033,8 @@ always @(posedge clk) begin status_fifo_we_reg <= 1'b0; status_fifo_rd_valid_reg <= 1'b0; + active_op_count_reg <= 0; + op_table_start_ptr_reg <= 0; op_table_read_complete_ptr_reg <= 0; op_table_finish_ptr_reg <= 0; diff --git a/fpga/lib/pcie/rtl/dma_if_axi_wr.v b/fpga/lib/pcie/rtl/dma_if_axi_wr.v index f50cc2ea5..35e5c786a 100644 --- a/fpga/lib/pcie/rtl/dma_if_axi_wr.v +++ b/fpga/lib/pcie/rtl/dma_if_axi_wr.v @@ -131,6 +131,11 @@ module dma_if_axi_wr # */ input wire enable, + /* + * Status + */ + output wire status_busy, + /* * Statistics */ @@ -323,6 +328,10 @@ reg [RAM_SEG_COUNT-1:0] mask_fifo_wr_mask; wire mask_fifo_empty = mask_fifo_wr_ptr_reg == mask_fifo_rd_ptr_reg; wire mask_fifo_full = mask_fifo_wr_ptr_reg == (mask_fifo_rd_ptr_reg ^ (1 << MASK_FIFO_ADDR_WIDTH)); +reg [OP_TAG_WIDTH+1-1:0] active_op_count_reg = 0; +reg inc_active_op; +reg dec_active_op; + reg [AXI_ID_WIDTH-1:0] m_axi_awid_reg = {AXI_ID_WIDTH{1'b0}}, m_axi_awid_next; reg [AXI_ADDR_WIDTH-1:0] m_axi_awaddr_reg = {AXI_ADDR_WIDTH{1'b0}}, m_axi_awaddr_next; reg [7:0] m_axi_awlen_reg = 8'd0, m_axi_awlen_next; @@ -340,6 +349,8 @@ reg [RAM_SEG_COUNT*RAM_SEG_ADDR_WIDTH-1:0] ram_rd_cmd_addr_reg = 0, ram_rd_cmd_a reg [RAM_SEG_COUNT-1:0] ram_rd_cmd_valid_reg = 0, ram_rd_cmd_valid_next; reg [RAM_SEG_COUNT-1:0] ram_rd_resp_ready_cmb; +reg status_busy_reg = 1'b0; + reg [OP_TAG_WIDTH-1:0] stat_wr_op_start_tag_reg = 0, stat_wr_op_start_tag_next; reg [LEN_WIDTH-1:0] stat_wr_op_start_len_reg = 0, stat_wr_op_start_len_next; reg stat_wr_op_start_valid_reg = 1'b0, stat_wr_op_start_valid_next; @@ -384,6 +395,8 @@ assign ram_rd_cmd_addr = ram_rd_cmd_addr_reg; assign ram_rd_cmd_valid = ram_rd_cmd_valid_reg; assign ram_rd_resp_ready = ram_rd_resp_ready_cmb; +assign status_busy = status_busy_reg; + assign stat_wr_op_start_tag = stat_wr_op_start_tag_reg; assign stat_wr_op_start_len = stat_wr_op_start_len_reg; assign stat_wr_op_start_valid = stat_wr_op_start_valid_reg; @@ -506,6 +519,8 @@ always @* begin op_table_start_last = 0; op_table_start_en = 1'b0; + inc_active_op = 1'b0; + // TLP segmentation case (req_state_reg) REQ_STATE_IDLE: begin @@ -590,6 +605,7 @@ always @* begin op_table_start_tag = tag_reg; op_table_start_last = op_count_reg == tr_word_count_next; op_table_start_en = 1'b1; + inc_active_op = 1'b1; stat_wr_req_start_tag_next = op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]; stat_wr_req_start_len_next = zero_len_reg ? 0 : tr_word_count_next; @@ -955,9 +971,11 @@ always @* begin // commit operations in-order op_table_finish_en = 1'b0; + dec_active_op = 1'b0; if (op_table_active[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]] && op_table_write_complete[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]] && op_table_finish_ptr_reg != op_table_tx_finish_ptr_reg) begin op_table_finish_en = 1'b1; + dec_active_op = 1'b1; if (op_table_error_code[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]] != DMA_ERROR_NONE) begin m_axis_write_desc_status_error_next = op_table_error_code[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]; @@ -975,12 +993,14 @@ always @* begin end else begin // accept write completions op_table_finish_en = 1'b0; + dec_active_op = 1'b0; stat_wr_req_finish_tag_next = op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]; m_axi_bready_next = 1'b1; if (m_axi_bready && m_axi_bvalid) begin op_table_finish_en = 1'b1; + dec_active_op = 1'b1; stat_wr_req_finish_valid_next = 1'b1; if (m_axi_bresp == AXI_RESP_SLVERR) begin @@ -1066,6 +1086,8 @@ always @(posedge clk) begin m_axis_write_desc_status_error_reg <= m_axis_write_desc_status_error_next; m_axis_write_desc_status_valid_reg <= m_axis_write_desc_status_valid_next; + status_busy_reg <= active_op_count_reg != 0; + stat_wr_op_start_tag_reg <= stat_wr_op_start_tag_next; stat_wr_op_start_len_reg <= stat_wr_op_start_len_next; stat_wr_op_start_valid_reg <= stat_wr_op_start_valid_next; @@ -1085,6 +1107,8 @@ always @(posedge clk) begin ram_rd_cmd_addr_reg <= ram_rd_cmd_addr_next; ram_rd_cmd_valid_reg <= ram_rd_cmd_valid_next; + active_op_count_reg <= active_op_count_reg + inc_active_op - dec_active_op; + if (mask_fifo_we) begin mask_fifo_mask[mask_fifo_wr_ptr_reg[MASK_FIFO_ADDR_WIDTH-1:0]] <= mask_fifo_wr_mask; mask_fifo_wr_ptr_reg <= mask_fifo_wr_ptr_reg + 1; @@ -1140,6 +1164,8 @@ always @(posedge clk) begin m_axis_write_desc_status_error_reg <= 4'd0; m_axis_write_desc_status_valid_reg <= 1'b0; + status_busy_reg <= 1'b0; + stat_wr_op_start_tag_reg <= 0; stat_wr_op_start_valid_reg <= 1'b0; stat_wr_op_finish_tag_reg <= 0; @@ -1151,6 +1177,8 @@ always @(posedge clk) begin ram_rd_cmd_valid_reg <= {RAM_SEG_COUNT{1'b0}}; + active_op_count_reg <= 0; + mask_fifo_wr_ptr_reg <= 0; mask_fifo_rd_ptr_reg <= 0; diff --git a/fpga/lib/pcie/rtl/dma_if_pcie.v b/fpga/lib/pcie/rtl/dma_if_pcie.v index e319f3897..0108714b8 100644 --- a/fpga/lib/pcie/rtl/dma_if_pcie.v +++ b/fpga/lib/pcie/rtl/dma_if_pcie.v @@ -75,6 +75,10 @@ module dma_if_pcie # parameter READ_OP_TABLE_SIZE = PCIE_TAG_COUNT, // In-flight transmit limit (read) parameter READ_TX_LIMIT = 2**TX_SEQ_NUM_WIDTH, + // Completion header flow control credit limit (read) + parameter READ_CPLH_FC_LIMIT = 0, + // Completion data flow control credit limit (read) + parameter READ_CPLD_FC_LIMIT = READ_CPLH_FC_LIMIT*4, // Operation table size (write) parameter WRITE_OP_TABLE_SIZE = 2**TX_SEQ_NUM_WIDTH, // In-flight transmit limit (write) @@ -191,6 +195,7 @@ module dma_if_pcie # input wire read_enable, input wire write_enable, input wire ext_tag_enable, + input wire rcb_128b, input wire [15:0] requester_id, input wire [2:0] max_read_request_size, input wire [2:0] max_payload_size, @@ -198,6 +203,8 @@ module dma_if_pcie # /* * Status */ + output wire status_rd_busy, + output wire status_wr_busy, output wire status_error_cor, output wire status_error_uncor, @@ -257,6 +264,8 @@ dma_if_pcie_rd #( .TAG_WIDTH(TAG_WIDTH), .OP_TABLE_SIZE(READ_OP_TABLE_SIZE), .TX_LIMIT(READ_TX_LIMIT), + .CPLH_FC_LIMIT(READ_CPLH_FC_LIMIT), + .CPLD_FC_LIMIT(READ_CPLD_FC_LIMIT), .TLP_FORCE_64_BIT_ADDR(TLP_FORCE_64_BIT_ADDR), .CHECK_BUS_NUMBER(CHECK_BUS_NUMBER) ) @@ -325,12 +334,14 @@ dma_if_pcie_rd_inst ( */ .enable(read_enable), .ext_tag_enable(ext_tag_enable), + .rcb_128b(rcb_128b), .requester_id(requester_id), .max_read_request_size(max_read_request_size), /* * Status */ + .status_busy(status_rd_busy), .status_error_cor(status_error_cor), .status_error_uncor(status_error_uncor), @@ -439,6 +450,11 @@ dma_if_pcie_wr_inst ( .requester_id(requester_id), .max_payload_size(max_payload_size), + /* + * Status + */ + .status_busy(status_wr_busy), + /* * Statistics */ diff --git a/fpga/lib/pcie/rtl/dma_if_pcie_rd.v b/fpga/lib/pcie/rtl/dma_if_pcie_rd.v index 18dde339c..a238b0414 100644 --- a/fpga/lib/pcie/rtl/dma_if_pcie_rd.v +++ b/fpga/lib/pcie/rtl/dma_if_pcie_rd.v @@ -69,6 +69,10 @@ module dma_if_pcie_rd # parameter OP_TABLE_SIZE = PCIE_TAG_COUNT, // In-flight transmit limit parameter TX_LIMIT = 2**TX_SEQ_NUM_WIDTH, + // Completion header flow control credit limit + parameter CPLH_FC_LIMIT = 0, + // Completion data flow control credit limit + parameter CPLD_FC_LIMIT = CPLH_FC_LIMIT*4, // Force 64 bit address parameter TLP_FORCE_64_BIT_ADDR = 0, // Requester ID mash @@ -139,12 +143,14 @@ module dma_if_pcie_rd # */ input wire enable, input wire ext_tag_enable, + input wire rcb_128b, input wire [15:0] requester_id, input wire [2:0] max_read_request_size, /* * Status */ + output wire status_busy, output wire status_error_cor, output wire status_error_uncor, @@ -191,6 +197,9 @@ parameter OP_TABLE_READ_COUNT_WIDTH = PCIE_TAG_WIDTH+1; parameter TX_COUNT_WIDTH = $clog2(TX_LIMIT+1); +parameter CL_CPLH_FC_LIMIT = $clog2(CPLH_FC_LIMIT); +parameter CL_CPLD_FC_LIMIT = $clog2(CPLD_FC_LIMIT); + parameter STATUS_FIFO_ADDR_WIDTH = 5; parameter OUTPUT_FIFO_ADDR_WIDTH = 5; @@ -297,9 +306,12 @@ reg [1:0] tlp_state_reg = TLP_STATE_IDLE, tlp_state_next; // datapath control signals reg last_cycle; -reg [3:0] first_be; -reg [3:0] last_be; -reg [10:0] dword_count; +reg [3:0] req_first_be; +reg [3:0] req_last_be; +reg [12:0] req_tlp_count; +reg [10:0] req_dword_count; +reg [6:0] req_cplh_fc_count; +reg [8:0] req_cpld_fc_count; reg req_last_tlp; reg [PCIE_ADDR_WIDTH-1:0] req_pcie_addr; @@ -312,7 +324,6 @@ reg [PCIE_ADDR_WIDTH-1:0] req_pcie_addr_reg = {PCIE_ADDR_WIDTH{1'b0}}, req_pcie_ reg [RAM_SEL_WIDTH-1:0] req_ram_sel_reg = {RAM_SEL_WIDTH{1'b0}}, req_ram_sel_next; reg [RAM_ADDR_WIDTH-1:0] req_ram_addr_reg = {RAM_ADDR_WIDTH{1'b0}}, req_ram_addr_next; reg [LEN_WIDTH-1:0] req_op_count_reg = {LEN_WIDTH{1'b0}}, req_op_count_next; -reg [12:0] req_tlp_count_reg = 13'd0, req_tlp_count_next; reg req_zero_len_reg = 1'b0, req_zero_len_next; reg [OP_TAG_WIDTH-1:0] req_op_tag_reg = {OP_TAG_WIDTH{1'b0}}, req_op_tag_next; reg req_op_tag_valid_reg = 1'b0, req_op_tag_valid_next; @@ -370,6 +381,7 @@ reg [6:0] rx_cpl_tlp_hdr_lower_addr; reg [127:0] tlp_hdr; reg [10:0] max_read_request_size_dw_reg = 11'd0; +reg rcb_128b_reg = 1'b0; reg [STATUS_FIFO_ADDR_WIDTH+1-1:0] status_fifo_wr_ptr_reg = 0; reg [STATUS_FIFO_ADDR_WIDTH+1-1:0] status_fifo_rd_ptr_reg = 0; @@ -390,7 +402,7 @@ reg status_fifo_mask_reg = 1'b0, status_fifo_mask_next; reg status_fifo_finish_reg = 1'b0, status_fifo_finish_next; reg [3:0] status_fifo_error_reg = 4'd0, status_fifo_error_next; reg status_fifo_wr_en_reg = 1'b0, status_fifo_wr_en_next; -reg status_fifo_half_full_reg = 1'b0; +reg status_fifo_full_reg = 1'b0; reg status_fifo_rd_en; reg [OP_TAG_WIDTH-1:0] status_fifo_rd_op_tag_reg = 0; reg [RAM_SEG_COUNT-1:0] status_fifo_rd_mask_reg = 0; @@ -402,6 +414,24 @@ reg [TX_COUNT_WIDTH-1:0] active_tx_count_reg = {TX_COUNT_WIDTH{1'b0}}, active_tx reg active_tx_count_av_reg = 1'b1, active_tx_count_av_next; reg inc_active_tx; +reg [PCIE_TAG_WIDTH+1-1:0] active_tag_count_reg = 0; +reg inc_active_tag; +reg dec_active_tag; + +reg [OP_TAG_WIDTH+1-1:0] active_op_count_reg = 0; +reg inc_active_op; +reg dec_active_op; + +reg [CL_CPLH_FC_LIMIT+1-1:0] active_cplh_fc_count_reg = 0; +reg active_cplh_fc_av_reg = 1'b1; +reg [6:0] inc_active_cplh_fc_count; +reg [6:0] dec_active_cplh_fc_count; + +reg [CL_CPLD_FC_LIMIT+1-1:0] active_cpld_fc_count_reg = 0; +reg active_cpld_fc_av_reg = 1'b1; +reg [8:0] inc_active_cpld_fc_count; +reg [8:0] dec_active_cpld_fc_count; + reg rx_cpl_tlp_ready_reg = 1'b0, rx_cpl_tlp_ready_next; reg [TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] tx_rd_req_tlp_hdr_reg = 0, tx_rd_req_tlp_hdr_next; @@ -413,6 +443,7 @@ reg [TAG_WIDTH-1:0] m_axis_read_desc_status_tag_reg = {TAG_WIDTH{1'b0}}, m_axis_ reg [3:0] m_axis_read_desc_status_error_reg = 4'd0, m_axis_read_desc_status_error_next; reg m_axis_read_desc_status_valid_reg = 1'b0, m_axis_read_desc_status_valid_next; +reg status_busy_reg = 1'b0; reg status_error_cor_reg = 1'b0, status_error_cor_next; reg status_error_uncor_reg = 1'b0, status_error_uncor_next; @@ -459,6 +490,7 @@ assign m_axis_read_desc_status_tag = m_axis_read_desc_status_tag_reg; assign m_axis_read_desc_status_error = m_axis_read_desc_status_error_reg; assign m_axis_read_desc_status_valid = m_axis_read_desc_status_valid_reg; +assign status_busy = status_busy_reg; assign status_error_cor = status_error_cor_reg; assign status_error_uncor = status_error_uncor_reg; @@ -485,6 +517,8 @@ reg [PCIE_TAG_WIDTH-1:0] pcie_tag_table_start_ptr_reg = 0, pcie_tag_table_start_ reg [RAM_SEL_WIDTH-1:0] pcie_tag_table_start_ram_sel_reg = 0, pcie_tag_table_start_ram_sel_next; reg [RAM_ADDR_WIDTH-1:0] pcie_tag_table_start_ram_addr_reg = 0, pcie_tag_table_start_ram_addr_next; reg [OP_TAG_WIDTH-1:0] pcie_tag_table_start_op_tag_reg = 0, pcie_tag_table_start_op_tag_next; +reg [6:0] pcie_tag_table_start_cplh_fc_reg = 0, pcie_tag_table_start_cplh_fc_next; +reg [8:0] pcie_tag_table_start_cpld_fc_reg = 0, pcie_tag_table_start_cpld_fc_next; reg pcie_tag_table_start_zero_len_reg = 1'b0, pcie_tag_table_start_zero_len_next; reg pcie_tag_table_start_en_reg = 1'b0, pcie_tag_table_start_en_next; reg [PCIE_TAG_WIDTH-1:0] pcie_tag_table_finish_ptr; @@ -497,6 +531,10 @@ reg [RAM_ADDR_WIDTH-1:0] pcie_tag_table_ram_addr[(2**PCIE_TAG_WIDTH)-1:0]; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg [OP_TAG_WIDTH-1:0] pcie_tag_table_op_tag[(2**PCIE_TAG_WIDTH)-1:0]; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +reg [6:0] pcie_tag_table_cplh_fc[(2**PCIE_TAG_WIDTH)-1:0]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) +reg [8:0] pcie_tag_table_cpld_fc[(2**PCIE_TAG_WIDTH)-1:0]; +(* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg pcie_tag_table_zero_len[(2**PCIE_TAG_WIDTH)-1:0]; (* ram_style = "distributed", ramstyle = "no_rw_check, mlab" *) reg pcie_tag_table_active_a[(2**PCIE_TAG_WIDTH)-1:0]; @@ -601,7 +639,6 @@ always @* begin req_ram_sel_next = req_ram_sel_reg; req_ram_addr_next = req_ram_addr_reg; req_op_count_next = req_op_count_reg; - req_tlp_count_next = req_tlp_count_reg; req_zero_len_next = req_zero_len_reg; req_op_tag_next = req_op_tag_reg; req_op_tag_valid_next = req_op_tag_valid_reg; @@ -609,6 +646,10 @@ always @* begin req_pcie_tag_valid_next = req_pcie_tag_valid_reg; inc_active_tx = 1'b0; + inc_active_tag = 1'b0; + inc_active_op = 1'b0; + inc_active_cplh_fc_count = 0; + inc_active_cpld_fc_count = 0; op_table_start_ptr = req_op_tag_reg; op_table_start_tag = s_axis_read_desc_tag; @@ -623,16 +664,27 @@ always @* begin // packet smaller than max read request size if (((req_pcie_addr_reg & 12'hfff) + (req_op_count_reg & 12'hfff)) >> 12 != 0 || req_op_count_reg >> 12 != 0) begin // crosses 4k boundary, split on 4K boundary - req_tlp_count_next = 13'h1000 - req_pcie_addr_reg[11:0]; - dword_count = 11'h400 - req_pcie_addr_reg[11:2]; + req_tlp_count = 13'h1000 - req_pcie_addr_reg[11:0]; + req_dword_count = 11'h400 - req_pcie_addr_reg[11:2]; + req_cpld_fc_count = 9'h100 - req_pcie_addr_reg[11:4]; + if (rcb_128b_reg) begin + req_cplh_fc_count = 6'h20 - req_pcie_addr_reg[11:7]; + end else begin + req_cplh_fc_count = 7'h40 - req_pcie_addr_reg[11:6]; + end req_last_tlp = (((req_pcie_addr_reg & 12'hfff) + (req_op_count_reg & 12'hfff)) & 12'hfff) == 0 && req_op_count_reg >> 12 == 0; - // optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count_next req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]+1; req_pcie_addr[11:0] = 12'd0; end else begin // does not cross 4k boundary, send one TLP - req_tlp_count_next = req_op_count_reg; - dword_count = (req_op_count_reg + req_pcie_addr_reg[1:0] + 3) >> 2; + req_tlp_count = req_op_count_reg; + req_dword_count = (req_op_count_reg + req_pcie_addr_reg[1:0] + 3) >> 2; + req_cpld_fc_count = (req_op_count_reg + req_pcie_addr_reg[1:0] + 15) >> 4; + if (rcb_128b_reg) begin + req_cplh_fc_count = (req_pcie_addr_reg[6:0]+req_op_count_reg+127) >> 7; + end else begin + req_cplh_fc_count = (req_pcie_addr_reg[5:0]+req_op_count_reg+63) >> 6; + end req_last_tlp = 1'b1; // always last TLP, so next address is irrelevant req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]; @@ -642,32 +694,54 @@ always @* begin // packet larger than max read request size if (((req_pcie_addr_reg & 12'hfff) + {max_read_request_size_dw_reg, 2'b00}) >> 12 != 0) begin // crosses 4k boundary, split on 4K boundary - req_tlp_count_next = 13'h1000 - req_pcie_addr_reg[11:0]; - dword_count = 11'h400 - req_pcie_addr_reg[11:2]; + req_tlp_count = 13'h1000 - req_pcie_addr_reg[11:0]; + req_dword_count = 11'h400 - req_pcie_addr_reg[11:2]; + req_cpld_fc_count = 9'h100 - req_pcie_addr_reg[11:4]; + if (rcb_128b_reg) begin + req_cplh_fc_count = 6'h20 - req_pcie_addr_reg[11:7]; + end else begin + req_cplh_fc_count = 7'h40 - req_pcie_addr_reg[11:6]; + end req_last_tlp = 1'b0; - // optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count_next req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]+1; req_pcie_addr[11:0] = 12'd0; end else begin // does not cross 4k boundary, split on 128-byte read completion boundary - req_tlp_count_next = {max_read_request_size_dw_reg, 2'b00} - req_pcie_addr_reg[6:0]; - dword_count = max_read_request_size_dw_reg - req_pcie_addr_reg[6:2]; + req_tlp_count = {max_read_request_size_dw_reg, 2'b00} - req_pcie_addr_reg[6:0]; + req_dword_count = max_read_request_size_dw_reg - req_pcie_addr_reg[6:2]; + req_cpld_fc_count = max_read_request_size_dw_reg[10:2] - req_pcie_addr_reg[6:4]; + if (rcb_128b_reg) begin + req_cplh_fc_count = max_read_request_size_dw_reg[10:5]; + end else begin + req_cplh_fc_count = max_read_request_size_dw_reg[10:4] - req_pcie_addr_reg[6]; + end req_last_tlp = 1'b0; - // optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count_next req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]; req_pcie_addr[11:0] = {{req_pcie_addr_reg[11:7], 5'd0} + max_read_request_size_dw_reg, 2'b00}; end end + // un-optimized TLP size computations (for reference) + // req_dword_count = (req_tlp_count + req_pcie_addr_reg[1:0] + 3) >> 2 + // req_cpld_fc_count = (req_dword_count + 3) >> 2 + // if (rcb_128b_reg) begin + // req_cplh_fc_count = (req_pcie_addr_reg[6:0]+req_tlp_count+127) >> 7 + // end lse begin + // req_cplh_fc_count = (req_pcie_addr_reg[5:0]+req_tlp_count+63) >> 6 + // end + // req_pcie_addr = req_pcie_addr_reg + req_tlp_count + pcie_tag_table_start_ptr_next = req_pcie_tag_reg; pcie_tag_table_start_ram_sel_next = req_ram_sel_reg; - pcie_tag_table_start_ram_addr_next = req_ram_addr_reg + req_tlp_count_next; + pcie_tag_table_start_ram_addr_next = req_ram_addr_reg + req_tlp_count; pcie_tag_table_start_op_tag_next = req_op_tag_reg; + pcie_tag_table_start_cplh_fc_next = req_cplh_fc_count; + pcie_tag_table_start_cpld_fc_next = req_cpld_fc_count; pcie_tag_table_start_zero_len_next = req_zero_len_reg; pcie_tag_table_start_en_next = 1'b0; - first_be = 4'b1111 << req_pcie_addr_reg[1:0]; - last_be = 4'b1111 >> (3 - ((req_pcie_addr_reg[1:0] + req_tlp_count_next[1:0] - 1) & 3)); + req_first_be = 4'b1111 << req_pcie_addr_reg[1:0]; + req_last_be = 4'b1111 >> (3 - ((req_pcie_addr_reg[1:0] + req_tlp_count[1:0] - 1) & 3)); tx_rd_req_tlp_hdr_next = tx_rd_req_tlp_hdr_reg; tx_rd_req_tlp_valid_next = tx_rd_req_tlp_valid_reg && !tx_rd_req_tlp_ready; @@ -690,12 +764,12 @@ always @* begin tlp_hdr[110] = 1'b0; // EP tlp_hdr[109:108] = 2'b00; // attr tlp_hdr[107:106] = 3'b000; // AT - tlp_hdr[105:96] = dword_count; // length + tlp_hdr[105:96] = req_dword_count; // length // DW 1 tlp_hdr[95:80] = requester_id; // requester ID tlp_hdr[79:72] = req_pcie_tag_reg; // tag - tlp_hdr[71:68] = req_zero_len_reg ? 4'b0000 : (dword_count == 1 ? 4'b0000 : last_be); // last BE - tlp_hdr[67:64] = req_zero_len_reg ? 4'b0000 : (dword_count == 1 ? first_be & last_be : first_be); // first BE + tlp_hdr[71:68] = req_zero_len_reg ? 4'b0000 : (req_dword_count == 1 ? 4'b0000 : req_last_be); // last BE + tlp_hdr[67:64] = req_zero_len_reg ? 4'b0000 : (req_dword_count == 1 ? req_first_be & req_last_be : req_first_be); // first BE if (((req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:2] >> 30) != 0) || TLP_FORCE_64_BIT_ADDR) begin // DW 2+3 tlp_hdr[63:2] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:2]; // address @@ -729,6 +803,7 @@ always @* begin op_table_start_ptr = req_op_tag_reg; op_table_start_tag = s_axis_read_desc_tag; op_table_start_en = 1'b1; + inc_active_op = 1'b1; stat_rd_op_start_tag_next = req_op_tag_reg; stat_rd_op_start_len_next = s_axis_read_desc_len; stat_rd_op_start_valid_next = 1'b1; @@ -742,21 +817,26 @@ always @* begin tx_rd_req_tlp_hdr_next = tlp_hdr; end - if ((!tx_rd_req_tlp_valid_reg || tx_rd_req_tlp_ready) && req_pcie_tag_valid_reg && (!TX_SEQ_NUM_ENABLE || active_tx_count_av_reg)) begin + if ((!tx_rd_req_tlp_valid_reg || tx_rd_req_tlp_ready) && req_pcie_tag_valid_reg && (!TX_SEQ_NUM_ENABLE || active_tx_count_av_reg) && active_cplh_fc_av_reg && active_cpld_fc_av_reg) begin tx_rd_req_tlp_valid_next = 1'b1; inc_active_tx = 1'b1; req_pcie_addr_next = req_pcie_addr; - req_ram_addr_next = req_ram_addr_reg + req_tlp_count_next; - req_op_count_next = req_op_count_reg - req_tlp_count_next; + req_ram_addr_next = req_ram_addr_reg + req_tlp_count; + req_op_count_next = req_op_count_reg - req_tlp_count; pcie_tag_table_start_ptr_next = req_pcie_tag_reg; pcie_tag_table_start_ram_sel_next = req_ram_sel_reg; - pcie_tag_table_start_ram_addr_next = req_ram_addr_reg + req_tlp_count_next; + pcie_tag_table_start_ram_addr_next = req_ram_addr_reg + req_tlp_count; pcie_tag_table_start_op_tag_next = req_op_tag_reg; + pcie_tag_table_start_cplh_fc_next = req_cplh_fc_count; + pcie_tag_table_start_cpld_fc_next = req_cpld_fc_count; pcie_tag_table_start_zero_len_next = req_zero_len_reg; pcie_tag_table_start_en_next = 1'b1; + inc_active_tag = 1'b1; + inc_active_cplh_fc_count = req_cplh_fc_count; + inc_active_cpld_fc_count = req_cpld_fc_count; op_table_read_start_ptr = req_op_tag_reg; op_table_read_start_commit = req_last_tlp; @@ -765,7 +845,7 @@ always @* begin req_pcie_tag_valid_next = 1'b0; stat_rd_req_start_tag_next = req_pcie_tag_reg; - stat_rd_req_start_len_next = req_zero_len_reg ? 0 : req_tlp_count_next; + stat_rd_req_start_len_next = req_zero_len_reg ? 0 : req_tlp_count; stat_rd_req_start_valid_next = 1'b1; if (!req_last_tlp) begin @@ -852,6 +932,11 @@ always @* begin out_done_ack = {RAM_SEG_COUNT{1'b0}}; + dec_active_tag = 1'b0; + dec_active_op = 1'b0; + dec_active_cplh_fc_count = 0; + dec_active_cpld_fc_count = 0; + // Write generation ram_wr_cmd_sel_pipe_next = {RAM_SEG_COUNT{ram_sel_reg}}; if (!ram_wrap_reg) begin @@ -910,7 +995,7 @@ always @* begin case (tlp_state_reg) TLP_STATE_IDLE: begin // idle state, wait for completion - rx_cpl_tlp_ready_next = init_done_reg && &ram_wr_cmd_ready_int && !status_fifo_half_full_reg; + rx_cpl_tlp_ready_next = init_done_reg && &ram_wr_cmd_ready_int && !status_fifo_full_reg; if (rx_cpl_tlp_ready && rx_cpl_tlp_valid && rx_cpl_tlp_sop) begin op_dword_count_next = rx_cpl_tlp_hdr_length; @@ -1097,7 +1182,7 @@ always @* begin end TLP_STATE_WRITE: begin // write state - generate write operations - rx_cpl_tlp_ready_next = init_done_reg && &ram_wr_cmd_ready_int && !status_fifo_half_full_reg; + rx_cpl_tlp_ready_next = init_done_reg && &ram_wr_cmd_ready_int && !status_fifo_full_reg; if (rx_cpl_tlp_ready && rx_cpl_tlp_valid) begin tlp_data_int_next = rx_cpl_tlp_data; @@ -1167,7 +1252,7 @@ always @* begin if (rx_cpl_tlp_ready && rx_cpl_tlp_valid) begin if (rx_cpl_tlp_eop) begin - rx_cpl_tlp_ready_next = init_done_reg && &ram_wr_cmd_ready_int && !status_fifo_half_full_reg; + rx_cpl_tlp_ready_next = init_done_reg && &ram_wr_cmd_ready_int && !status_fifo_full_reg; tlp_state_next = TLP_STATE_IDLE; end else begin tlp_state_next = TLP_STATE_WAIT_END; @@ -1188,7 +1273,7 @@ always @* begin if (init_pcie_tag_reg) begin // initialize FIFO pcie_tag_fifo_wr_tag = init_count_reg; - if (pcie_tag_fifo_wr_tag < PCIE_TAG_COUNT_1) begin + if (pcie_tag_fifo_wr_tag < PCIE_TAG_COUNT_1 || !PCIE_TAG_COUNT_2) begin pcie_tag_fifo_1_we = 1'b1; end else if (pcie_tag_fifo_wr_tag) begin pcie_tag_fifo_2_we = 1'b1; @@ -1196,9 +1281,12 @@ always @* begin end else if (finish_tag_reg) begin pcie_tag_table_finish_ptr = pcie_tag_reg; pcie_tag_table_finish_en = 1'b1; + dec_active_tag = 1'b1; + dec_active_cplh_fc_count = pcie_tag_table_cplh_fc[pcie_tag_reg]; + dec_active_cpld_fc_count = pcie_tag_table_cpld_fc[pcie_tag_reg]; pcie_tag_fifo_wr_tag = pcie_tag_reg; - if (pcie_tag_fifo_wr_tag < PCIE_TAG_COUNT_1) begin + if (pcie_tag_fifo_wr_tag < PCIE_TAG_COUNT_1 || !PCIE_TAG_COUNT_2) begin pcie_tag_fifo_1_we = 1'b1; end else begin pcie_tag_fifo_2_we = 1'b1; @@ -1259,6 +1347,7 @@ always @* begin if (op_table_read_commit[op_table_read_finish_ptr] && (op_table_read_count_start[op_table_read_finish_ptr] == op_table_read_count_finish[op_table_read_finish_ptr])) begin op_tag_fifo_we = 1'b1; + dec_active_op = 1'b1; stat_rd_op_finish_valid_next = 1'b1; m_axis_read_desc_status_valid_next = 1'b1; end @@ -1309,7 +1398,6 @@ always @(posedge clk) begin req_ram_sel_reg <= req_ram_sel_next; req_ram_addr_reg <= req_ram_addr_next; req_op_count_reg <= req_op_count_next; - req_tlp_count_reg <= req_tlp_count_next; req_zero_len_reg <= req_zero_len_next; req_op_tag_reg <= req_op_tag_next; req_op_tag_valid_reg <= req_op_tag_valid_next; @@ -1357,6 +1445,7 @@ always @(posedge clk) begin m_axis_read_desc_status_error_reg <= m_axis_read_desc_status_error_next; m_axis_read_desc_status_valid_reg <= m_axis_read_desc_status_valid_next; + status_busy_reg <= active_op_count_reg != 0 || active_tx_count_reg != 0; status_error_cor_reg <= status_error_cor_next; status_error_uncor_reg <= status_error_uncor_next; @@ -1379,6 +1468,7 @@ always @(posedge clk) begin stat_rd_tx_stall_reg <= stat_rd_tx_stall_next; max_read_request_size_dw_reg <= 11'd32 << (max_read_request_size > 5 ? 5 : max_read_request_size); + rcb_128b_reg <= rcb_128b; if (status_fifo_wr_en) begin status_fifo_op_tag[status_fifo_wr_ptr_reg[STATUS_FIFO_ADDR_WIDTH-1:0]] <= status_fifo_wr_op_tag; @@ -1403,15 +1493,26 @@ always @(posedge clk) begin status_fifo_rd_valid_reg <= status_fifo_rd_valid_next; - status_fifo_half_full_reg <= $unsigned(status_fifo_wr_ptr_reg - status_fifo_rd_ptr_reg) >= 2**(STATUS_FIFO_ADDR_WIDTH-1); + status_fifo_full_reg <= $unsigned(status_fifo_wr_ptr_reg - status_fifo_rd_ptr_reg) >= 2**STATUS_FIFO_ADDR_WIDTH-4; active_tx_count_reg <= active_tx_count_next; active_tx_count_av_reg <= active_tx_count_av_next; + active_tag_count_reg <= active_tag_count_reg + inc_active_tag - dec_active_tag; + active_op_count_reg <= active_op_count_reg + inc_active_op - dec_active_op; + + active_cplh_fc_count_reg <= active_cplh_fc_count_reg + inc_active_cplh_fc_count - dec_active_cplh_fc_count; + active_cplh_fc_av_reg <= !CPLH_FC_LIMIT || active_cplh_fc_count_reg < CPLH_FC_LIMIT; + + active_cpld_fc_count_reg <= active_cpld_fc_count_reg + inc_active_cpld_fc_count - dec_active_cpld_fc_count; + active_cpld_fc_av_reg <= !CPLD_FC_LIMIT || active_cpld_fc_count_reg < CPLD_FC_LIMIT; + pcie_tag_table_start_ptr_reg <= pcie_tag_table_start_ptr_next; pcie_tag_table_start_ram_sel_reg <= pcie_tag_table_start_ram_sel_next; pcie_tag_table_start_ram_addr_reg <= pcie_tag_table_start_ram_addr_next; pcie_tag_table_start_op_tag_reg <= pcie_tag_table_start_op_tag_next; + pcie_tag_table_start_cplh_fc_reg <= pcie_tag_table_start_cplh_fc_next; + pcie_tag_table_start_cpld_fc_reg <= pcie_tag_table_start_cpld_fc_next; pcie_tag_table_start_zero_len_reg <= pcie_tag_table_start_zero_len_next; pcie_tag_table_start_en_reg <= pcie_tag_table_start_en_next; @@ -1421,6 +1522,8 @@ always @(posedge clk) begin pcie_tag_table_ram_sel[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_ram_sel_reg; pcie_tag_table_ram_addr[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_ram_addr_reg; pcie_tag_table_op_tag[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_op_tag_reg; + pcie_tag_table_cplh_fc[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_cplh_fc_reg; + pcie_tag_table_cpld_fc[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_cpld_fc_reg; pcie_tag_table_zero_len[pcie_tag_table_start_ptr_reg] <= pcie_tag_table_start_zero_len_reg; pcie_tag_table_active_a[pcie_tag_table_start_ptr_reg] <= !pcie_tag_table_active_b[pcie_tag_table_start_ptr_reg]; end @@ -1436,11 +1539,13 @@ always @(posedge clk) begin pcie_tag_fifo_1_wr_ptr_reg <= pcie_tag_fifo_1_wr_ptr_reg + 1; end pcie_tag_fifo_1_rd_ptr_reg <= pcie_tag_fifo_1_rd_ptr_next; - if (pcie_tag_fifo_2_we) begin - pcie_tag_fifo_2_mem[pcie_tag_fifo_2_wr_ptr_reg[PCIE_TAG_WIDTH_2-1:0]] <= pcie_tag_fifo_wr_tag; - pcie_tag_fifo_2_wr_ptr_reg <= pcie_tag_fifo_2_wr_ptr_reg + 1; + if (PCIE_TAG_COUNT_2) begin + if (pcie_tag_fifo_2_we) begin + pcie_tag_fifo_2_mem[pcie_tag_fifo_2_wr_ptr_reg[PCIE_TAG_WIDTH_2-1:0]] <= pcie_tag_fifo_wr_tag; + pcie_tag_fifo_2_wr_ptr_reg <= pcie_tag_fifo_2_wr_ptr_reg + 1; + end + pcie_tag_fifo_2_rd_ptr_reg <= pcie_tag_fifo_2_rd_ptr_next; end - pcie_tag_fifo_2_rd_ptr_reg <= pcie_tag_fifo_2_rd_ptr_next; if (init_op_tag_reg) begin op_table_read_init_a[init_count_reg] <= 1'b0; @@ -1509,6 +1614,7 @@ always @(posedge clk) begin m_axis_read_desc_status_valid_reg <= 1'b0; + status_busy_reg <= 1'b0; status_error_cor_reg <= 1'b0; status_error_uncor_reg <= 1'b0; @@ -1530,6 +1636,15 @@ always @(posedge clk) begin active_tx_count_reg <= {TX_COUNT_WIDTH{1'b0}}; active_tx_count_av_reg <= 1'b1; + active_tag_count_reg <= 0; + active_op_count_reg <= 0; + + active_cplh_fc_count_reg <= 0; + active_cplh_fc_av_reg <= 1'b1; + + active_cpld_fc_count_reg <= 0; + active_cpld_fc_av_reg <= 1'b1; + pcie_tag_table_start_en_reg <= 1'b0; pcie_tag_fifo_1_wr_ptr_reg <= 0; diff --git a/fpga/lib/pcie/rtl/dma_if_pcie_us_rd.v b/fpga/lib/pcie/rtl/dma_if_pcie_us_rd.v index f48b84fcd..4b9929b4d 100644 --- a/fpga/lib/pcie/rtl/dma_if_pcie_us_rd.v +++ b/fpga/lib/pcie/rtl/dma_if_pcie_us_rd.v @@ -334,9 +334,10 @@ reg [1:0] tlp_state_reg = TLP_STATE_IDLE, tlp_state_next; // datapath control signals reg last_cycle; -reg [3:0] first_be; -reg [3:0] last_be; -reg [10:0] dword_count; +reg [3:0] req_first_be; +reg [3:0] req_last_be; +reg [12:0] req_tlp_count; +reg [10:0] req_dword_count; reg req_last_tlp; reg [PCIE_ADDR_WIDTH-1:0] req_pcie_addr; @@ -349,7 +350,6 @@ reg [PCIE_ADDR_WIDTH-1:0] req_pcie_addr_reg = {PCIE_ADDR_WIDTH{1'b0}}, req_pcie_ reg [RAM_SEL_WIDTH-1:0] req_ram_sel_reg = {RAM_SEL_WIDTH{1'b0}}, req_ram_sel_next; reg [RAM_ADDR_WIDTH-1:0] req_ram_addr_reg = {RAM_ADDR_WIDTH{1'b0}}, req_ram_addr_next; reg [LEN_WIDTH-1:0] req_op_count_reg = {LEN_WIDTH{1'b0}}, req_op_count_next; -reg [12:0] req_tlp_count_reg = 13'd0, req_tlp_count_next; reg req_zero_len_reg = 1'b0, req_zero_len_next; reg [OP_TAG_WIDTH-1:0] req_op_tag_reg = {OP_TAG_WIDTH{1'b0}}, req_op_tag_next; reg req_op_tag_valid_reg = 1'b0, req_op_tag_valid_next; @@ -409,7 +409,7 @@ reg status_fifo_mask_reg = 1'b0, status_fifo_mask_next; reg status_fifo_finish_reg = 1'b0, status_fifo_finish_next; reg [3:0] status_fifo_error_reg = 4'd0, status_fifo_error_next; reg status_fifo_wr_en_reg = 1'b0, status_fifo_wr_en_next; -reg status_fifo_half_full_reg = 1'b0; +reg status_fifo_full_reg = 1'b0; reg status_fifo_rd_en; reg [OP_TAG_WIDTH-1:0] status_fifo_rd_op_tag_reg = 0; reg [SEG_COUNT-1:0] status_fifo_rd_mask_reg = 0; @@ -570,7 +570,6 @@ always @* begin req_ram_sel_next = req_ram_sel_reg; req_ram_addr_next = req_ram_addr_reg; req_op_count_next = req_op_count_reg; - req_tlp_count_next = req_tlp_count_reg; req_zero_len_next = req_zero_len_reg; req_op_tag_next = req_op_tag_reg; req_op_tag_valid_next = req_op_tag_valid_reg; @@ -592,16 +591,16 @@ always @* begin // packet smaller than max read request size if (((req_pcie_addr_reg & 12'hfff) + (req_op_count_reg & 12'hfff)) >> 12 != 0 || req_op_count_reg >> 12 != 0) begin // crosses 4k boundary, split on 4K boundary - req_tlp_count_next = 13'h1000 - req_pcie_addr_reg[11:0]; - dword_count = 11'h400 - req_pcie_addr_reg[11:2]; + req_tlp_count = 13'h1000 - req_pcie_addr_reg[11:0]; + req_dword_count = 11'h400 - req_pcie_addr_reg[11:2]; req_last_tlp = (((req_pcie_addr_reg & 12'hfff) + (req_op_count_reg & 12'hfff)) & 12'hfff) == 0 && req_op_count_reg >> 12 == 0; - // optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count_next + // optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]+1; req_pcie_addr[11:0] = 12'd0; end else begin // does not cross 4k boundary, send one TLP - req_tlp_count_next = req_op_count_reg; - dword_count = (req_op_count_reg + req_pcie_addr_reg[1:0] + 3) >> 2; + req_tlp_count = req_op_count_reg; + req_dword_count = (req_op_count_reg + req_pcie_addr_reg[1:0] + 3) >> 2; req_last_tlp = 1'b1; // always last TLP, so next address is irrelevant req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]; @@ -611,18 +610,18 @@ always @* begin // packet larger than max read request size if (((req_pcie_addr_reg & 12'hfff) + {max_read_request_size_dw_reg, 2'b00}) >> 12 != 0) begin // crosses 4k boundary, split on 4K boundary - req_tlp_count_next = 13'h1000 - req_pcie_addr_reg[11:0]; - dword_count = 11'h400 - req_pcie_addr_reg[11:2]; + req_tlp_count = 13'h1000 - req_pcie_addr_reg[11:0]; + req_dword_count = 11'h400 - req_pcie_addr_reg[11:2]; req_last_tlp = 1'b0; - // optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count_next + // optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]+1; req_pcie_addr[11:0] = 12'd0; end else begin // does not cross 4k boundary, split on 128-byte read completion boundary - req_tlp_count_next = {max_read_request_size_dw_reg, 2'b00} - req_pcie_addr_reg[6:0]; - dword_count = max_read_request_size_dw_reg - req_pcie_addr_reg[6:2]; + req_tlp_count = {max_read_request_size_dw_reg, 2'b00} - req_pcie_addr_reg[6:0]; + req_dword_count = max_read_request_size_dw_reg - req_pcie_addr_reg[6:2]; req_last_tlp = 1'b0; - // optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count_next + // optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]; req_pcie_addr[11:0] = {{req_pcie_addr_reg[11:7], 5'd0} + max_read_request_size_dw_reg, 2'b00}; end @@ -630,18 +629,18 @@ always @* begin pcie_tag_table_start_ptr_next = req_pcie_tag_reg; pcie_tag_table_start_ram_sel_next = req_ram_sel_reg; - pcie_tag_table_start_ram_addr_next = req_ram_addr_reg + req_tlp_count_next; + pcie_tag_table_start_ram_addr_next = req_ram_addr_reg + req_tlp_count; pcie_tag_table_start_op_tag_next = req_op_tag_reg; pcie_tag_table_start_zero_len_next = req_zero_len_reg; pcie_tag_table_start_en_next = 1'b0; - first_be = 4'b1111 << req_pcie_addr_reg[1:0]; - last_be = 4'b1111 >> (3 - ((req_pcie_addr_reg[1:0] + req_tlp_count_next[1:0] - 1) & 3)); + req_first_be = 4'b1111 << req_pcie_addr_reg[1:0]; + req_last_be = 4'b1111 >> (3 - ((req_pcie_addr_reg[1:0] + req_tlp_count[1:0] - 1) & 3)); // TLP header and sideband data tlp_header_data[1:0] = 2'b0; // address type tlp_header_data[63:2] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:2]; // address - tlp_header_data[74:64] = dword_count; // DWORD count + tlp_header_data[74:64] = req_dword_count; // DWORD count tlp_header_data[78:75] = REQ_MEM_READ; // request type - memory read tlp_header_data[79] = 1'b0; // poisoned request tlp_header_data[95:80] = requester_id; @@ -653,9 +652,9 @@ always @* begin tlp_header_data[127] = 1'b0; // force ECRC if (AXIS_PCIE_DATA_WIDTH == 512) begin - tlp_tuser[3:0] = req_zero_len_reg ? 4'b0000 : (dword_count == 1 ? first_be & last_be : first_be); // first BE 0 + tlp_tuser[3:0] = req_zero_len_reg ? 4'b0000 : (req_dword_count == 1 ? req_first_be & req_last_be : req_first_be); // first BE 0 tlp_tuser[7:4] = 4'd0; // first BE 1 - tlp_tuser[11:8] = req_zero_len_reg ? 4'b0000 : (dword_count == 1 ? 4'b0000 : last_be); // last BE 0 + tlp_tuser[11:8] = req_zero_len_reg ? 4'b0000 : (req_dword_count == 1 ? 4'b0000 : req_last_be); // last BE 0 tlp_tuser[15:12] = 4'd0; // last BE 1 tlp_tuser[19:16] = 3'd0; // addr_offset tlp_tuser[21:20] = 2'b01; // is_sop @@ -673,8 +672,8 @@ always @* begin tlp_tuser[72:67] = 6'd0; // seq_num1 tlp_tuser[136:73] = 64'd0; // parity end else begin - tlp_tuser[3:0] = req_zero_len_reg ? 4'b0000 : (dword_count == 1 ? first_be & last_be : first_be); // first BE - tlp_tuser[7:4] = req_zero_len_reg ? 4'b0000 : (dword_count == 1 ? 4'b0000 : last_be); // last BE + tlp_tuser[3:0] = req_zero_len_reg ? 4'b0000 : (req_dword_count == 1 ? req_first_be & req_last_be : req_first_be); // first BE + tlp_tuser[7:4] = req_zero_len_reg ? 4'b0000 : (req_dword_count == 1 ? 4'b0000 : req_last_be); // last BE tlp_tuser[10:8] = 3'd0; // addr_offset tlp_tuser[11] = 1'b0; // discontinue tlp_tuser[12] = 1'b0; // tph_present @@ -742,12 +741,12 @@ always @* begin if (AXIS_PCIE_DATA_WIDTH > 64) begin req_pcie_addr_next = req_pcie_addr; - req_ram_addr_next = req_ram_addr_reg + req_tlp_count_next; - req_op_count_next = req_op_count_reg - req_tlp_count_next; + req_ram_addr_next = req_ram_addr_reg + req_tlp_count; + req_op_count_next = req_op_count_reg - req_tlp_count; pcie_tag_table_start_ptr_next = req_pcie_tag_reg; pcie_tag_table_start_ram_sel_next = req_ram_sel_reg; - pcie_tag_table_start_ram_addr_next = req_ram_addr_reg + req_tlp_count_next; + pcie_tag_table_start_ram_addr_next = req_ram_addr_reg + req_tlp_count; pcie_tag_table_start_op_tag_next = req_op_tag_reg; pcie_tag_table_start_zero_len_next = req_zero_len_reg; pcie_tag_table_start_en_next = 1'b1; @@ -780,14 +779,14 @@ always @* begin if (m_axis_rq_tready_int_reg && req_pcie_tag_valid_reg) begin req_pcie_addr_next = req_pcie_addr; - req_ram_addr_next = req_ram_addr_reg + req_tlp_count_next; - req_op_count_next = req_op_count_reg - req_tlp_count_next; + req_ram_addr_next = req_ram_addr_reg + req_tlp_count; + req_op_count_next = req_op_count_reg - req_tlp_count; m_axis_rq_tvalid_int = 1'b1; pcie_tag_table_start_ptr_next = req_pcie_tag_reg; pcie_tag_table_start_ram_sel_next = req_ram_sel_reg; - pcie_tag_table_start_ram_addr_next = req_ram_addr_reg + req_tlp_count_next; + pcie_tag_table_start_ram_addr_next = req_ram_addr_reg + req_tlp_count; pcie_tag_table_start_op_tag_next = req_op_tag_reg; pcie_tag_table_start_zero_len_next = req_zero_len_reg; pcie_tag_table_start_en_next = 1'b1; @@ -906,7 +905,7 @@ always @* begin TLP_STATE_IDLE: begin // idle state, wait for completion if (AXIS_PCIE_DATA_WIDTH > 64) begin - s_axis_rc_tready_next = init_done_reg && &ram_wr_cmd_ready_int && !status_fifo_half_full_reg; + s_axis_rc_tready_next = init_done_reg && &ram_wr_cmd_ready_int && !status_fifo_full_reg; if (s_axis_rc_tready && s_axis_rc_tvalid) begin // header fields @@ -1132,7 +1131,7 @@ always @* begin s_axis_rc_tready_next = init_done_reg; tlp_state_next = TLP_STATE_IDLE; end else begin - s_axis_rc_tready_next = init_done_reg && &ram_wr_cmd_ready_int && !status_fifo_half_full_reg; + s_axis_rc_tready_next = init_done_reg && &ram_wr_cmd_ready_int && !status_fifo_full_reg; tlp_state_next = TLP_STATE_HEADER; end end else begin @@ -1143,7 +1142,7 @@ always @* begin end TLP_STATE_HEADER: begin // header state; process header (64 bit interface only) - s_axis_rc_tready_next = init_done_reg && &ram_wr_cmd_ready_int && !status_fifo_half_full_reg; + s_axis_rc_tready_next = init_done_reg && &ram_wr_cmd_ready_int && !status_fifo_full_reg; if (s_axis_rc_tready && s_axis_rc_tvalid) begin pcie_tag_next = s_axis_rc_tdata[7:0]; // tag @@ -1285,7 +1284,7 @@ always @* begin end TLP_STATE_WRITE: begin // write state - generate write operations - s_axis_rc_tready_next = init_done_reg && &ram_wr_cmd_ready_int && !status_fifo_half_full_reg; + s_axis_rc_tready_next = init_done_reg && &ram_wr_cmd_ready_int && !status_fifo_full_reg; if (s_axis_rc_tready && s_axis_rc_tvalid) begin rc_tdata_int_next = s_axis_rc_tdata; @@ -1350,7 +1349,7 @@ always @* begin if (s_axis_rc_tready & s_axis_rc_tvalid) begin if (s_axis_rc_tlast) begin if (AXIS_PCIE_DATA_WIDTH > 64) begin - s_axis_rc_tready_next = init_done_reg && &ram_wr_cmd_ready_int && !status_fifo_half_full_reg; + s_axis_rc_tready_next = init_done_reg && &ram_wr_cmd_ready_int && !status_fifo_full_reg; end else begin s_axis_rc_tready_next = init_done_reg; end @@ -1374,7 +1373,7 @@ always @* begin if (init_pcie_tag_reg) begin // initialize FIFO pcie_tag_fifo_wr_tag = init_count_reg; - if (pcie_tag_fifo_wr_tag < PCIE_TAG_COUNT_1) begin + if (pcie_tag_fifo_wr_tag < PCIE_TAG_COUNT_1 || !PCIE_TAG_COUNT_2) begin pcie_tag_fifo_1_we = 1'b1; end else if (pcie_tag_fifo_wr_tag) begin pcie_tag_fifo_2_we = 1'b1; @@ -1384,7 +1383,7 @@ always @* begin pcie_tag_table_finish_en = 1'b1; pcie_tag_fifo_wr_tag = pcie_tag_reg; - if (pcie_tag_fifo_wr_tag < PCIE_TAG_COUNT_1) begin + if (pcie_tag_fifo_wr_tag < PCIE_TAG_COUNT_1 || !PCIE_TAG_COUNT_2) begin pcie_tag_fifo_1_we = 1'b1; end else begin pcie_tag_fifo_2_we = 1'b1; @@ -1470,7 +1469,6 @@ always @(posedge clk) begin req_ram_sel_reg <= req_ram_sel_next; req_ram_addr_reg <= req_ram_addr_next; req_op_count_reg <= req_op_count_next; - req_tlp_count_reg <= req_tlp_count_next; req_zero_len_reg <= req_zero_len_next; req_op_tag_reg <= req_op_tag_next; req_op_tag_valid_reg <= req_op_tag_valid_next; @@ -1538,7 +1536,7 @@ always @(posedge clk) begin status_fifo_rd_valid_reg <= status_fifo_rd_valid_next; - status_fifo_half_full_reg <= $unsigned(status_fifo_wr_ptr_reg - status_fifo_rd_ptr_reg) >= 2**(STATUS_FIFO_ADDR_WIDTH-1); + status_fifo_full_reg <= $unsigned(status_fifo_wr_ptr_reg - status_fifo_rd_ptr_reg) >= 2**STATUS_FIFO_ADDR_WIDTH-4; if (active_tx_count_reg < TX_LIMIT && inc_active_tx && !s_axis_rq_seq_num_valid_0 && !s_axis_rq_seq_num_valid_1) begin // inc by 1 @@ -1584,11 +1582,13 @@ always @(posedge clk) begin pcie_tag_fifo_1_wr_ptr_reg <= pcie_tag_fifo_1_wr_ptr_reg + 1; end pcie_tag_fifo_1_rd_ptr_reg <= pcie_tag_fifo_1_rd_ptr_next; - if (pcie_tag_fifo_2_we) begin - pcie_tag_fifo_2_mem[pcie_tag_fifo_2_wr_ptr_reg[PCIE_TAG_WIDTH_2-1:0]] <= pcie_tag_fifo_wr_tag; - pcie_tag_fifo_2_wr_ptr_reg <= pcie_tag_fifo_2_wr_ptr_reg + 1; + if (PCIE_TAG_COUNT_2) begin + if (pcie_tag_fifo_2_we) begin + pcie_tag_fifo_2_mem[pcie_tag_fifo_2_wr_ptr_reg[PCIE_TAG_WIDTH_2-1:0]] <= pcie_tag_fifo_wr_tag; + pcie_tag_fifo_2_wr_ptr_reg <= pcie_tag_fifo_2_wr_ptr_reg + 1; + end + pcie_tag_fifo_2_rd_ptr_reg <= pcie_tag_fifo_2_rd_ptr_next; end - pcie_tag_fifo_2_rd_ptr_reg <= pcie_tag_fifo_2_rd_ptr_next; if (init_op_tag_reg) begin op_table_read_init_a[init_count_reg] <= 1'b0; diff --git a/fpga/lib/pcie/rtl/dma_if_pcie_wr.v b/fpga/lib/pcie/rtl/dma_if_pcie_wr.v index 6cd2cbbc2..3041c9807 100644 --- a/fpga/lib/pcie/rtl/dma_if_pcie_wr.v +++ b/fpga/lib/pcie/rtl/dma_if_pcie_wr.v @@ -136,6 +136,11 @@ module dma_if_pcie_wr # input wire [15:0] requester_id, input wire [2:0] max_payload_size, + /* + * Status + */ + output wire status_busy, + /* * Statistics */ @@ -327,6 +332,10 @@ reg [TX_COUNT_WIDTH-1:0] active_tx_count_reg = {TX_COUNT_WIDTH{1'b0}}, active_tx reg active_tx_count_av_reg = 1'b1, active_tx_count_av_next; reg inc_active_tx; +reg [OP_TAG_WIDTH+1-1:0] active_op_count_reg = 0; +reg inc_active_op; +reg dec_active_op; + reg [TLP_DATA_WIDTH-1:0] tx_wr_req_tlp_data_reg = 0, tx_wr_req_tlp_data_next; reg [TLP_STRB_WIDTH-1:0] tx_wr_req_tlp_strb_reg = 0, tx_wr_req_tlp_strb_next; reg [TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] tx_wr_req_tlp_hdr_reg = 0, tx_wr_req_tlp_hdr_next; @@ -345,6 +354,8 @@ reg [RAM_SEG_COUNT*RAM_SEG_ADDR_WIDTH-1:0] ram_rd_cmd_addr_reg = 0, ram_rd_cmd_a reg [RAM_SEG_COUNT-1:0] ram_rd_cmd_valid_reg = 0, ram_rd_cmd_valid_next; reg [RAM_SEG_COUNT-1:0] ram_rd_resp_ready_cmb; +reg status_busy_reg = 1'b0; + reg [OP_TAG_WIDTH-1:0] stat_wr_op_start_tag_reg = 0, stat_wr_op_start_tag_next; reg [LEN_WIDTH-1:0] stat_wr_op_start_len_reg = 0, stat_wr_op_start_len_next; reg stat_wr_op_start_valid_reg = 1'b0, stat_wr_op_start_valid_next; @@ -378,6 +389,8 @@ assign ram_rd_cmd_addr = ram_rd_cmd_addr_reg; assign ram_rd_cmd_valid = ram_rd_cmd_valid_reg; assign ram_rd_resp_ready = ram_rd_resp_ready_cmb; +assign status_busy = status_busy_reg; + assign stat_wr_op_start_tag = stat_wr_op_start_tag_reg; assign stat_wr_op_start_len = stat_wr_op_start_len_reg; assign stat_wr_op_start_valid = stat_wr_op_start_valid_reg; @@ -501,6 +514,8 @@ always @* begin op_table_start_last = op_count_reg == tlp_count_reg; op_table_start_en = 1'b0; + inc_active_op = 1'b0; + // TLP segmentation case (req_state_reg) REQ_STATE_IDLE: begin @@ -587,6 +602,7 @@ always @* begin op_table_start_tag = tag_reg; op_table_start_en = 1'b1; + inc_active_op = 1'b1; stat_wr_req_start_tag_next = op_table_start_ptr_reg; stat_wr_req_start_len_next = zero_len_reg ? 0 : tlp_count_reg; @@ -814,6 +830,7 @@ always @* begin op_table_tx_finish_en = 1'b0; inc_active_tx = 1'b0; + dec_active_op = 1'b0; tx_wr_req_tlp_data_next = tx_wr_req_tlp_data_reg; tx_wr_req_tlp_strb_next = tx_wr_req_tlp_strb_reg; @@ -965,6 +982,7 @@ always @* begin if (op_table_active[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]] && (!TX_SEQ_NUM_ENABLE || op_table_tx_done[op_table_finish_ptr_reg[OP_TAG_WIDTH-1:0]]) && op_table_finish_ptr_reg != op_table_tx_finish_ptr_reg) begin op_table_finish_en = 1'b1; + dec_active_op = 1'b1; stat_wr_req_finish_valid_next = 1'b1; @@ -1072,6 +1090,8 @@ always @(posedge clk) begin ram_rd_cmd_addr_reg <= ram_rd_cmd_addr_next; ram_rd_cmd_valid_reg <= ram_rd_cmd_valid_next; + status_busy_reg <= active_op_count_reg != 0 || active_tx_count_reg != 0; + stat_wr_op_start_tag_reg <= stat_wr_op_start_tag_next; stat_wr_op_start_len_reg <= stat_wr_op_start_len_next; stat_wr_op_start_valid_reg <= stat_wr_op_start_valid_next; @@ -1091,6 +1111,8 @@ always @(posedge clk) begin active_tx_count_reg <= active_tx_count_next; active_tx_count_av_reg <= active_tx_count_av_next; + active_op_count_reg <= active_op_count_reg + inc_active_op - dec_active_op; + if (mask_fifo_we) begin mask_fifo_mask[mask_fifo_wr_ptr_reg[MASK_FIFO_ADDR_WIDTH-1:0]] <= mask_fifo_wr_mask; mask_fifo_wr_ptr_reg <= mask_fifo_wr_ptr_reg + 1; @@ -1162,6 +1184,8 @@ always @(posedge clk) begin active_tx_count_reg <= {TX_COUNT_WIDTH{1'b0}}; active_tx_count_av_reg <= 1'b1; + active_op_count_reg <= 0; + mask_fifo_wr_ptr_reg <= 0; mask_fifo_rd_ptr_reg <= 0; diff --git a/fpga/lib/pcie/rtl/dma_psdpram.v b/fpga/lib/pcie/rtl/dma_psdpram.v index 2f054f8a1..4b0f864ac 100644 --- a/fpga/lib/pcie/rtl/dma_psdpram.v +++ b/fpga/lib/pcie/rtl/dma_psdpram.v @@ -117,8 +117,8 @@ generate for (i = 0; i < SEG_BE_WIDTH; i = i + 1) begin if (wr_cmd_valid[n] && wr_cmd_be[n*SEG_BE_WIDTH+i]) begin mem_reg[wr_cmd_addr[SEG_ADDR_WIDTH*n +: INT_ADDR_WIDTH]][i*8 +: 8] <= wr_cmd_data[SEG_DATA_WIDTH*n+i*8 +: 8]; - wr_done_reg <= 1'b1; end + wr_done_reg <= wr_cmd_valid[n]; end if (rst) begin diff --git a/fpga/lib/pcie/rtl/dma_psdpram_async.v b/fpga/lib/pcie/rtl/dma_psdpram_async.v index 08fdf5c68..e3e85be07 100644 --- a/fpga/lib/pcie/rtl/dma_psdpram_async.v +++ b/fpga/lib/pcie/rtl/dma_psdpram_async.v @@ -118,8 +118,8 @@ generate for (i = 0; i < SEG_BE_WIDTH; i = i + 1) begin if (wr_cmd_valid[n] && wr_cmd_be[n*SEG_BE_WIDTH+i]) begin mem_reg[wr_cmd_addr[SEG_ADDR_WIDTH*n +: INT_ADDR_WIDTH]][i*8 +: 8] <= wr_cmd_data[SEG_DATA_WIDTH*n+i*8 +: 8]; - wr_done_reg <= 1'b1; end + wr_done_reg <= wr_cmd_valid[n]; end if (rst_wr) begin diff --git a/fpga/lib/pcie/rtl/dma_ram_demux_wr.v b/fpga/lib/pcie/rtl/dma_ram_demux_wr.v index 4f4750005..2738548b8 100644 --- a/fpga/lib/pcie/rtl/dma_ram_demux_wr.v +++ b/fpga/lib/pcie/rtl/dma_ram_demux_wr.v @@ -268,7 +268,8 @@ for (n = 0; n < SEG_COUNT; n = n + 1) begin // RAM write done mux wire [PORTS-1:0] seg_ram_wr_done; - wire [PORTS-1:0] seg_ram_wr_done_sel; + wire [PORTS-1:0] seg_ram_wr_done_out; + wire [PORTS-1:0] seg_ram_wr_done_ack; wire seg_ctrl_wr_done; for (p = 0; p < PORTS; p = p + 1) begin @@ -277,25 +278,19 @@ for (n = 0; n < SEG_COUNT; n = n + 1) begin assign ctrl_wr_done[n] = seg_ctrl_wr_done; - wire [CL_PORTS-1:0] select_resp = fifo_sel[fifo_rd_ptr_reg[FIFO_ADDR_WIDTH-1:0]]; - for (p = 0; p < PORTS; p = p + 1) begin reg [FIFO_ADDR_WIDTH+1-1:0] done_count_reg = 0; reg done_reg = 1'b0; - assign seg_ram_wr_done_sel[p] = done_reg; + assign seg_ram_wr_done_out[p] = done_reg; always @(posedge clk) begin - if (select_resp == p && (done_count_reg != 0 || seg_ram_wr_done[p])) begin - done_reg <= 1'b1; - if (!seg_ram_wr_done[p]) begin - done_count_reg <= done_count_reg - 1; - end - end else begin - done_reg <= 1'b0; - if (seg_ram_wr_done[p]) begin - done_count_reg <= done_count_reg + 1; - end + if (done_count_reg < 2**FIFO_ADDR_WIDTH && seg_ram_wr_done[p] && !seg_ram_wr_done_ack[p]) begin + done_count_reg <= done_count_reg + 1; + done_reg <= 1; + end else if (done_count_reg > 0 && !seg_ram_wr_done[p] && seg_ram_wr_done_ack[p]) begin + done_count_reg <= done_count_reg - 1; + done_reg <= done_count_reg > 1; end if (rst) begin @@ -305,15 +300,25 @@ for (n = 0; n < SEG_COUNT; n = n + 1) begin end end - assign seg_ctrl_wr_done = seg_ram_wr_done_sel != 0; + reg [CL_PORTS-1:0] select_resp_reg = 0; + reg select_resp_valid_reg = 0; + + assign seg_ram_wr_done_ack = seg_ram_wr_done_out & (select_resp_valid_reg ? (1 << select_resp_reg) : 0); + assign seg_ctrl_wr_done = |seg_ram_wr_done_ack; always @(posedge clk) begin - if (seg_ctrl_wr_done && !fifo_empty) begin - fifo_rd_ptr_reg <= fifo_rd_ptr_reg + 1; + if (!select_resp_valid_reg || seg_ctrl_wr_done) begin + select_resp_valid_reg <= 1'b0; + if (!fifo_empty) begin + select_resp_reg <= fifo_sel[fifo_rd_ptr_reg[FIFO_ADDR_WIDTH-1:0]]; + fifo_rd_ptr_reg = fifo_rd_ptr_reg + 1; + select_resp_valid_reg <= 1'b1; + end end if (rst) begin fifo_rd_ptr_reg <= 0; + select_resp_valid_reg <= 1'b0; end end diff --git a/fpga/lib/pcie/rtl/pcie_ptile_if.v b/fpga/lib/pcie/rtl/pcie_ptile_if.v index 7bfda2416..87a2a039a 100644 --- a/fpga/lib/pcie/rtl/pcie_ptile_if.v +++ b/fpga/lib/pcie/rtl/pcie_ptile_if.v @@ -210,6 +210,7 @@ module pcie_ptile_if # * Configuration outputs */ output wire [F_COUNT-1:0] ext_tag_enable, + output wire [F_COUNT-1:0] rcb_128b, output wire [7:0] bus_num, output wire [F_COUNT*3-1:0] max_read_request_size, output wire [F_COUNT*3-1:0] max_payload_size, @@ -416,7 +417,7 @@ pcie_ptile_cfg_inst ( .cfg_device_num(), .cfg_bus_num(bus_num), .cfg_pm_no_soft_rst(), - .cfg_rcb_ctrl(), + .cfg_rcb_ctrl(rcb_128b), .cfg_irq_disable(), .cfg_pcie_cap_irq_msg_num(), .cfg_sys_pwr_ctrl(), diff --git a/fpga/lib/pcie/rtl/pcie_s10_if.v b/fpga/lib/pcie/rtl/pcie_s10_if.v index 78ea7b9b4..be6f4cdc6 100644 --- a/fpga/lib/pcie/rtl/pcie_s10_if.v +++ b/fpga/lib/pcie/rtl/pcie_s10_if.v @@ -218,6 +218,7 @@ module pcie_s10_if # * Configuration outputs */ output wire [F_COUNT-1:0] ext_tag_enable, + output wire [F_COUNT-1:0] rcb_128b, output wire [7:0] bus_num, output wire [F_COUNT*3-1:0] max_read_request_size, output wire [F_COUNT*3-1:0] max_payload_size, @@ -432,7 +433,7 @@ pcie_s10_cfg_inst ( .cfg_device_num(), .cfg_bus_num(bus_num), .cfg_pm_no_soft_rst(), - .cfg_rcb_ctrl(), + .cfg_rcb_ctrl(rcb_128b), .cfg_irq_disable(), .cfg_pcie_cap_irq_msg_num(), .cfg_sys_pwr_ctrl(), diff --git a/fpga/lib/pcie/rtl/pcie_us_axi_dma_rd.v b/fpga/lib/pcie/rtl/pcie_us_axi_dma_rd.v index 1650bff6f..482cc1f59 100644 --- a/fpga/lib/pcie/rtl/pcie_us_axi_dma_rd.v +++ b/fpga/lib/pcie/rtl/pcie_us_axi_dma_rd.v @@ -342,9 +342,10 @@ reg [2:0] tlp_state_reg = TLP_STATE_IDLE, tlp_state_next; // datapath control signals reg transfer_in_save; -reg [3:0] first_be; -reg [3:0] last_be; -reg [10:0] dword_count; +reg [3:0] req_first_be; +reg [3:0] req_last_be; +reg [12:0] req_tlp_count; +reg [10:0] req_dword_count; reg req_last_tlp; reg [PCIE_ADDR_WIDTH-1:0] req_pcie_addr; @@ -356,7 +357,6 @@ reg init_op_tag_reg = 1'b1; reg [PCIE_ADDR_WIDTH-1:0] req_pcie_addr_reg = {PCIE_ADDR_WIDTH{1'b0}}, req_pcie_addr_next; reg [AXI_ADDR_WIDTH-1:0] req_axi_addr_reg = {AXI_ADDR_WIDTH{1'b0}}, req_axi_addr_next; reg [LEN_WIDTH-1:0] req_op_count_reg = {LEN_WIDTH{1'b0}}, req_op_count_next; -reg [12:0] req_tlp_count_reg = 13'd0, req_tlp_count_next; reg req_zero_len_reg = 1'b0, req_zero_len_next; reg [OP_TAG_WIDTH-1:0] req_op_tag_reg = {OP_TAG_WIDTH{1'b0}}, req_op_tag_next; reg req_op_tag_valid_reg = 1'b0, req_op_tag_valid_next; @@ -413,7 +413,7 @@ reg status_fifo_skip_reg = 1'b0, status_fifo_skip_next; reg status_fifo_finish_reg = 1'b0, status_fifo_finish_next; reg [3:0] status_fifo_error_reg = 4'd0, status_fifo_error_next; reg status_fifo_we_reg = 1'b0, status_fifo_we_next; -reg status_fifo_half_full_reg = 1'b0; +reg status_fifo_full_reg = 1'b0; reg [OP_TAG_WIDTH-1:0] status_fifo_rd_op_tag_reg = 0, status_fifo_rd_op_tag_next; reg status_fifo_rd_skip_reg = 1'b0, status_fifo_rd_skip_next; reg status_fifo_rd_finish_reg = 1'b0, status_fifo_rd_finish_next; @@ -587,7 +587,6 @@ always @* begin req_pcie_addr_next = req_pcie_addr_reg; req_axi_addr_next = req_axi_addr_reg; req_op_count_next = req_op_count_reg; - req_tlp_count_next = req_tlp_count_reg; req_zero_len_next = req_zero_len_reg; req_op_tag_next = req_op_tag_reg; req_op_tag_valid_next = req_op_tag_valid_reg; @@ -609,16 +608,16 @@ always @* begin // packet smaller than max read request size if (((req_pcie_addr_reg & 12'hfff) + (req_op_count_reg & 12'hfff)) >> 12 != 0 || req_op_count_reg >> 12 != 0) begin // crosses 4k boundary, split on 4K boundary - req_tlp_count_next = 13'h1000 - req_pcie_addr_reg[11:0]; - dword_count = 11'h400 - req_pcie_addr_reg[11:2]; + req_tlp_count = 13'h1000 - req_pcie_addr_reg[11:0]; + req_dword_count = 11'h400 - req_pcie_addr_reg[11:2]; req_last_tlp = (((req_pcie_addr_reg & 12'hfff) + (req_op_count_reg & 12'hfff)) & 12'hfff) == 0 && req_op_count_reg >> 12 == 0; - // optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count_next + // optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]+1; req_pcie_addr[11:0] = 12'd0; end else begin // does not cross 4k boundary, send one TLP - req_tlp_count_next = req_op_count_reg; - dword_count = (req_op_count_reg + req_pcie_addr_reg[1:0] + 3) >> 2; + req_tlp_count = req_op_count_reg; + req_dword_count = (req_op_count_reg + req_pcie_addr_reg[1:0] + 3) >> 2; req_last_tlp = 1'b1; // always last TLP, so next address is irrelevant req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]; @@ -628,36 +627,36 @@ always @* begin // packet larger than max read request size if (((req_pcie_addr_reg & 12'hfff) + {max_read_request_size_dw_reg, 2'b00}) >> 12 != 0) begin // crosses 4k boundary, split on 4K boundary - req_tlp_count_next = 13'h1000 - req_pcie_addr_reg[11:0]; - dword_count = 11'h400 - req_pcie_addr_reg[11:2]; + req_tlp_count = 13'h1000 - req_pcie_addr_reg[11:0]; + req_dword_count = 11'h400 - req_pcie_addr_reg[11:2]; req_last_tlp = 1'b0; - // optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count_next + // optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]+1; req_pcie_addr[11:0] = 12'd0; end else begin // does not cross 4k boundary, split on 128-byte read completion boundary - req_tlp_count_next = {max_read_request_size_dw_reg, 2'b00} - req_pcie_addr_reg[6:0]; - dword_count = max_read_request_size_dw_reg - req_pcie_addr_reg[6:2]; + req_tlp_count = {max_read_request_size_dw_reg, 2'b00} - req_pcie_addr_reg[6:0]; + req_dword_count = max_read_request_size_dw_reg - req_pcie_addr_reg[6:2]; req_last_tlp = 1'b0; - // optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count_next + // optimized req_pcie_addr = req_pcie_addr_reg + req_tlp_count req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]; req_pcie_addr[11:0] = {{req_pcie_addr_reg[11:7], 5'd0} + max_read_request_size_dw_reg, 2'b00}; end end pcie_tag_table_start_ptr_next = req_pcie_tag_reg; - pcie_tag_table_start_axi_addr_next = req_axi_addr_reg + req_tlp_count_next; + pcie_tag_table_start_axi_addr_next = req_axi_addr_reg + req_tlp_count; pcie_tag_table_start_op_tag_next = req_op_tag_reg; pcie_tag_table_start_zero_len_next = req_zero_len_reg; pcie_tag_table_start_en_next = 1'b0; - first_be = 4'b1111 << req_pcie_addr_reg[1:0]; - last_be = 4'b1111 >> (3 - ((req_pcie_addr_reg[1:0] + req_tlp_count_next[1:0] - 1) & 3)); + req_first_be = 4'b1111 << req_pcie_addr_reg[1:0]; + req_last_be = 4'b1111 >> (3 - ((req_pcie_addr_reg[1:0] + req_tlp_count[1:0] - 1) & 3)); // TLP header and sideband data tlp_header_data[1:0] = 2'b0; // address type tlp_header_data[63:2] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:2]; // address - tlp_header_data[74:64] = dword_count; // DWORD count + tlp_header_data[74:64] = req_dword_count; // DWORD count tlp_header_data[78:75] = REQ_MEM_READ; // request type - memory read tlp_header_data[79] = 1'b0; // poisoned request tlp_header_data[95:80] = requester_id; @@ -669,9 +668,9 @@ always @* begin tlp_header_data[127] = 1'b0; // force ECRC if (AXIS_PCIE_DATA_WIDTH == 512) begin - tlp_tuser[3:0] = req_zero_len_reg ? 4'b0000 : (dword_count == 1 ? first_be & last_be : first_be); // first BE 0 + tlp_tuser[3:0] = req_zero_len_reg ? 4'b0000 : (req_dword_count == 1 ? req_first_be & req_last_be : req_first_be); // first BE 0 tlp_tuser[7:4] = 4'd0; // first BE 1 - tlp_tuser[11:8] = req_zero_len_reg ? 4'b0000 : (dword_count == 1 ? 4'b0000 : last_be); // last BE 0 + tlp_tuser[11:8] = req_zero_len_reg ? 4'b0000 : (req_dword_count == 1 ? 4'b0000 : req_last_be); // last BE 0 tlp_tuser[15:12] = 4'd0; // last BE 1 tlp_tuser[19:16] = 3'd0; // addr_offset tlp_tuser[21:20] = 2'b01; // is_sop @@ -689,8 +688,8 @@ always @* begin tlp_tuser[72:67] = 6'd0; // seq_num1 tlp_tuser[136:73] = 64'd0; // parity end else begin - tlp_tuser[3:0] = req_zero_len_reg ? 4'b0000 : (dword_count == 1 ? first_be & last_be : first_be); // first BE - tlp_tuser[7:4] = req_zero_len_reg ? 4'b0000 : (dword_count == 1 ? 4'b0000 : last_be); // last BE + tlp_tuser[3:0] = req_zero_len_reg ? 4'b0000 : (req_dword_count == 1 ? req_first_be & req_last_be : req_first_be); // first BE + tlp_tuser[7:4] = req_zero_len_reg ? 4'b0000 : (req_dword_count == 1 ? 4'b0000 : req_last_be); // last BE tlp_tuser[10:8] = 3'd0; // addr_offset tlp_tuser[11] = 1'b0; // discontinue tlp_tuser[12] = 1'b0; // tph_present @@ -758,11 +757,11 @@ always @* begin if (AXIS_PCIE_DATA_WIDTH > 64) begin req_pcie_addr_next = req_pcie_addr; - req_axi_addr_next = req_axi_addr_reg + req_tlp_count_next; - req_op_count_next = req_op_count_reg - req_tlp_count_next; + req_axi_addr_next = req_axi_addr_reg + req_tlp_count; + req_op_count_next = req_op_count_reg - req_tlp_count; pcie_tag_table_start_ptr_next = req_pcie_tag_reg; - pcie_tag_table_start_axi_addr_next = req_axi_addr_reg + req_tlp_count_next; + pcie_tag_table_start_axi_addr_next = req_axi_addr_reg + req_tlp_count; pcie_tag_table_start_op_tag_next = req_op_tag_reg; pcie_tag_table_start_zero_len_next = req_zero_len_reg; pcie_tag_table_start_en_next = 1'b1; @@ -795,13 +794,13 @@ always @* begin if (m_axis_rq_tready_int_reg && req_pcie_tag_valid_reg) begin req_pcie_addr_next = req_pcie_addr; - req_axi_addr_next = req_axi_addr_reg + req_tlp_count_next; - req_op_count_next = req_op_count_reg - req_tlp_count_next; + req_axi_addr_next = req_axi_addr_reg + req_tlp_count; + req_op_count_next = req_op_count_reg - req_tlp_count; m_axis_rq_tvalid_int = 1'b1; pcie_tag_table_start_ptr_next = req_pcie_tag_reg; - pcie_tag_table_start_axi_addr_next = req_axi_addr_reg + req_tlp_count_next; + pcie_tag_table_start_axi_addr_next = req_axi_addr_reg + req_tlp_count; pcie_tag_table_start_op_tag_next = req_op_tag_reg; pcie_tag_table_start_zero_len_next = req_zero_len_reg; pcie_tag_table_start_en_next = 1'b1; @@ -907,7 +906,7 @@ always @* begin if (AXIS_PCIE_DATA_WIDTH > 64) begin s_axis_rc_tready_next = 1'b0; - if (init_done_reg && s_axis_rc_tvalid && !status_fifo_half_full_reg) begin + if (init_done_reg && s_axis_rc_tvalid && !status_fifo_full_reg) begin // header fields lower_addr_next = s_axis_rc_tdata[11:0]; // lower address error_code_next = s_axis_rc_tdata[15:12]; // error code @@ -1036,7 +1035,7 @@ always @* begin tlp_state_next = TLP_STATE_IDLE; end end else begin - s_axis_rc_tready_next = init_done_reg && !status_fifo_half_full_reg; + s_axis_rc_tready_next = init_done_reg && !status_fifo_full_reg; if (s_axis_rc_tready && s_axis_rc_tvalid) begin // header fields @@ -1070,14 +1069,14 @@ always @* begin end if (s_axis_rc_tlast) begin - s_axis_rc_tready_next = init_done_reg && !status_fifo_half_full_reg; + s_axis_rc_tready_next = init_done_reg && !status_fifo_full_reg; tlp_state_next = TLP_STATE_IDLE; end else begin s_axis_rc_tready_next = 1'b0; tlp_state_next = TLP_STATE_HEADER; end end else begin - s_axis_rc_tready_next = init_done_reg && !status_fifo_half_full_reg; + s_axis_rc_tready_next = init_done_reg && !status_fifo_full_reg; tlp_state_next = TLP_STATE_IDLE; end end @@ -1341,7 +1340,7 @@ always @* begin if (AXIS_PCIE_DATA_WIDTH > 64) begin s_axis_rc_tready_next = 1'b0; end else begin - s_axis_rc_tready_next = init_done_reg && !status_fifo_half_full_reg; + s_axis_rc_tready_next = init_done_reg && !status_fifo_full_reg; end tlp_state_next = TLP_STATE_IDLE; end @@ -1358,7 +1357,7 @@ always @* begin if (AXIS_PCIE_DATA_WIDTH > 64) begin s_axis_rc_tready_next = 1'b0; end else begin - s_axis_rc_tready_next = init_done_reg && !status_fifo_half_full_reg; + s_axis_rc_tready_next = init_done_reg && !status_fifo_full_reg; end tlp_state_next = TLP_STATE_IDLE; end else begin @@ -1380,7 +1379,7 @@ always @* begin if (init_pcie_tag_reg) begin // initialize FIFO pcie_tag_fifo_wr_tag = init_count_reg; - if (pcie_tag_fifo_wr_tag < PCIE_TAG_COUNT_1) begin + if (pcie_tag_fifo_wr_tag < PCIE_TAG_COUNT_1 || !PCIE_TAG_COUNT_2) begin pcie_tag_fifo_1_we = 1'b1; end else if (pcie_tag_fifo_wr_tag) begin pcie_tag_fifo_2_we = 1'b1; @@ -1390,7 +1389,7 @@ always @* begin pcie_tag_table_finish_en = 1'b1; pcie_tag_fifo_wr_tag = pcie_tag_reg; - if (pcie_tag_fifo_wr_tag < PCIE_TAG_COUNT_1) begin + if (pcie_tag_fifo_wr_tag < PCIE_TAG_COUNT_1 || !PCIE_TAG_COUNT_2) begin pcie_tag_fifo_1_we = 1'b1; end else begin pcie_tag_fifo_2_we = 1'b1; @@ -1503,7 +1502,6 @@ always @(posedge clk) begin req_pcie_addr_reg <= req_pcie_addr_next; req_axi_addr_reg <= req_axi_addr_next; req_op_count_reg <= req_op_count_next; - req_tlp_count_reg <= req_tlp_count_next; req_zero_len_reg <= req_zero_len_next; req_op_tag_reg <= req_op_tag_next; req_op_tag_valid_reg <= req_op_tag_valid_next; @@ -1571,7 +1569,7 @@ always @(posedge clk) begin status_fifo_rd_error_reg <= status_fifo_rd_error_next; status_fifo_rd_valid_reg <= status_fifo_rd_valid_next; - status_fifo_half_full_reg <= $unsigned(status_fifo_wr_ptr_reg - status_fifo_rd_ptr_reg) >= 2**(STATUS_FIFO_ADDR_WIDTH-1); + status_fifo_full_reg <= $unsigned(status_fifo_wr_ptr_reg - status_fifo_rd_ptr_reg) >= 2**STATUS_FIFO_ADDR_WIDTH-4; if (inc_active_tx && !s_axis_rq_seq_num_valid_0 && !s_axis_rq_seq_num_valid_1) begin // inc by 1 @@ -1619,11 +1617,13 @@ always @(posedge clk) begin pcie_tag_fifo_1_wr_ptr_reg <= pcie_tag_fifo_1_wr_ptr_reg + 1; end pcie_tag_fifo_1_rd_ptr_reg <= pcie_tag_fifo_1_rd_ptr_next; - if (pcie_tag_fifo_2_we) begin - pcie_tag_fifo_2_mem[pcie_tag_fifo_2_wr_ptr_reg[PCIE_TAG_WIDTH_2-1:0]] <= pcie_tag_fifo_wr_tag; - pcie_tag_fifo_2_wr_ptr_reg <= pcie_tag_fifo_2_wr_ptr_reg + 1; + if (PCIE_TAG_COUNT_2) begin + if (pcie_tag_fifo_2_we) begin + pcie_tag_fifo_2_mem[pcie_tag_fifo_2_wr_ptr_reg[PCIE_TAG_WIDTH_2-1:0]] <= pcie_tag_fifo_wr_tag; + pcie_tag_fifo_2_wr_ptr_reg <= pcie_tag_fifo_2_wr_ptr_reg + 1; + end + pcie_tag_fifo_2_rd_ptr_reg <= pcie_tag_fifo_2_rd_ptr_next; end - pcie_tag_fifo_2_rd_ptr_reg <= pcie_tag_fifo_2_rd_ptr_next; if (init_op_tag_reg) begin op_table_read_init_a[init_count_reg] <= 1'b0; diff --git a/fpga/lib/pcie/rtl/pcie_us_if_rq.v b/fpga/lib/pcie/rtl/pcie_us_if_rq.v index 82f12fa16..0baced421 100644 --- a/fpga/lib/pcie/rtl/pcie_us_if_rq.v +++ b/fpga/lib/pcie/rtl/pcie_us_if_rq.v @@ -869,8 +869,8 @@ end always @(posedge clk) begin max_payload_size_fc_reg <= 9'd8 << (max_payload_size > 5 ? 5 : max_payload_size); - have_p_credit_reg <= (tx_fc_ph_av > 4) && (tx_fc_pd_av > (max_payload_size_fc_reg << 1)); - have_np_credit_reg <= tx_fc_nph_av > 4; + have_p_credit_reg <= (tx_fc_ph_av > 8) && (tx_fc_pd_av > (max_payload_size_fc_reg << 1)); + have_np_credit_reg <= tx_fc_nph_av > 8; frame_reg <= frame_next; tlp_hdr1_reg <= tlp_hdr1_next; diff --git a/fpga/lib/pcie/tb/dma_if_pcie_rd/Makefile b/fpga/lib/pcie/tb/dma_if_pcie_rd/Makefile index aaad83d4a..8fb618f02 100644 --- a/fpga/lib/pcie/tb/dma_if_pcie_rd/Makefile +++ b/fpga/lib/pcie/tb/dma_if_pcie_rd/Makefile @@ -50,6 +50,8 @@ export PARAM_LEN_WIDTH := 20 export PARAM_TAG_WIDTH := 8 export PARAM_OP_TABLE_SIZE := $(PARAM_PCIE_TAG_COUNT) export PARAM_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_TX_SEQ_NUM_WIDTH)-1) ))" ) +export PARAM_CPLH_FC_LIMIT := 512 +export PARAM_CPLD_FC_LIMIT := $(shell expr $(PARAM_CPLH_FC_LIMIT) \* 4 ) export PARAM_TLP_FORCE_64_BIT_ADDR := 0 export PARAM_CHECK_BUS_NUMBER := 1 diff --git a/fpga/lib/pcie/tb/dma_if_pcie_rd/test_dma_if_pcie_rd.py b/fpga/lib/pcie/tb/dma_if_pcie_rd/test_dma_if_pcie_rd.py index 0deebe999..b139ff068 100644 --- a/fpga/lib/pcie/tb/dma_if_pcie_rd/test_dma_if_pcie_rd.py +++ b/fpga/lib/pcie/tb/dma_if_pcie_rd/test_dma_if_pcie_rd.py @@ -85,6 +85,7 @@ class TB(object): cfg_max_read_req=dut.max_read_request_size, cfg_ext_tag_enable=dut.ext_tag_enable, + cfg_rcb=dut.rcb_128b, ) self.dev.log.setLevel(logging.DEBUG) @@ -330,6 +331,8 @@ def test_dma_if_pcie_rd(request, pcie_data_width, pcie_offset): parameters['TAG_WIDTH'] = 8 parameters['OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT'] parameters['TX_LIMIT'] = 2**(parameters['TX_SEQ_NUM_WIDTH']-1) + parameters['CPLH_FC_LIMIT'] = 512 + parameters['CPLD_FC_LIMIT'] = parameters['CPLH_FC_LIMIT']*4 parameters['TLP_FORCE_64_BIT_ADDR'] = 0 parameters['CHECK_BUS_NUMBER'] = 0 diff --git a/fpga/lib/pcie/tb/dma_psdp_ram.py b/fpga/lib/pcie/tb/dma_psdp_ram.py index c0199cf95..e589063a9 100644 --- a/fpga/lib/pcie/tb/dma_psdp_ram.py +++ b/fpga/lib/pcie/tb/dma_psdp_ram.py @@ -1,6 +1,6 @@ """ -Copyright (c) 2020 Alex Forencich +Copyright (c) 2020-2023 Alex Forencich Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal @@ -23,12 +23,70 @@ THE SOFTWARE. """ import logging +from typing import NamedTuple import cocotb -from cocotb.triggers import RisingEdge +from cocotb.queue import Queue +from cocotb.triggers import Event, RisingEdge from cocotb_bus.bus import Bus from cocotbext.axi.memory import Memory +from cocotbext.axi import Region + + +# master write helper objects +class WriteCmd(NamedTuple): + address: int + data: bytes + event: Event + + +class SegWriteData: + def __int__(self): + self.addr = 0 + self.data = 0 + self.be = 0 + + +class WriteRespCmd(NamedTuple): + address: int + length: int + segments: int + first_seg: int + event: Event + + +class WriteResp(NamedTuple): + address: int + length: int + + +# master read helper objects +class ReadCmd(NamedTuple): + address: int + length: int + event: Event + + +class SegReadCmd: + def __int__(self): + self.addr = 0 + + +class ReadRespCmd(NamedTuple): + address: int + length: int + segments: int + first_seg: int + event: Event + + +class ReadResp(NamedTuple): + address: int + data: bytes + + def __bytes__(self): + return self.data class BaseBus(Bus): @@ -80,6 +138,511 @@ class PsdpRamBus: return cls(write, read) +class PsdpRamMasterWrite(Region): + + def __init__(self, bus, clock, reset=None, **kwargs): + self.bus = bus + self.clock = clock + self.reset = reset + if bus._name: + self.log = logging.getLogger(f"cocotb.{bus._entity._name}.{bus._name}") + else: + self.log = logging.getLogger(f"cocotb.{bus._entity._name}") + + self.log.info("Parallel Simple Dual Port RAM master model (write)") + self.log.info("Copyright (c) 2020 Alex Forencich") + + self.pause = False + self._pause_generator = None + self._pause_cr = None + + self.in_flight_operations = 0 + self._idle = Event() + self._idle.set() + + self.width = len(self.bus.wr_cmd_data) + self.byte_size = 8 + self.byte_lanes = len(self.bus.wr_cmd_be) + + self.seg_count = len(self.bus.wr_cmd_valid) + self.seg_data_width = self.width // self.seg_count + self.seg_byte_lanes = self.seg_data_width // self.byte_size + self.seg_addr_width = len(self.bus.wr_cmd_addr) // self.seg_count + self.seg_be_width = self.seg_data_width // self.byte_size + + self.seg_data_mask = 2**self.seg_data_width-1 + self.seg_addr_mask = 2**self.seg_addr_width-1 + self.seg_be_mask = 2**self.seg_be_width-1 + + self.address_width = self.seg_addr_width + (self.seg_byte_lanes*self.seg_count-1).bit_length() + + self.write_command_queue = Queue() + self.write_command_queue.queue_occupancy_limit = 2 + self.current_write_command = None + + self.seg_write_queue = [Queue() for x in range(self.seg_count)] + self.seg_write_resp_queue = [Queue() for x in range(self.seg_count)] + + self.int_write_resp_command_queue = Queue() + self.current_write_resp_command = None + + super().__init__(2**self.address_width, **kwargs) + + self.log.info("Parallel Simple Dual Port RAM master model configuration:") + self.log.info(" Address width: %d bits", self.address_width) + self.log.info(" Segment count: %d", self.seg_count) + self.log.info(" Segment addr width: %d bits", self.seg_addr_width) + self.log.info(" Segment data width: %d bits (%d bytes)", self.seg_data_width, self.seg_byte_lanes) + self.log.info(" Total data width: %d bits (%d bytes)", self.width, self.byte_lanes) + + assert self.seg_be_width*self.seg_count == len(self.bus.wr_cmd_be) + + self.bus.wr_cmd_valid.setimmediatevalue(0) + + cocotb.start_soon(self._process_write()) + cocotb.start_soon(self._process_write_resp()) + cocotb.start_soon(self._run()) + + def set_pause_generator(self, generator=None): + if self._pause_cr is not None: + self._pause_cr.kill() + self._pause_cr = None + + self._pause_generator = generator + + if self._pause_generator is not None: + self._pause_cr = cocotb.start_soon(self._run_pause()) + + def clear_pause_generator(self): + self.set_pause_generator(None) + + def idle(self): + return not self.in_flight_operations + + async def wait(self): + while not self.idle(): + await self._idle.wait() + + async def write(self, address, data): + if address < 0 or address >= 2**self.address_width: + raise ValueError("Address out of range") + + if isinstance(data, int): + raise ValueError("Expected bytes or bytearray for data") + + if address+len(data) > 2**self.address_width: + raise ValueError("Requested transfer overruns end of address space") + + event = Event() + data = bytes(data) + + self.in_flight_operations += 1 + self._idle.clear() + + await self.write_command_queue.put(WriteCmd(address, data, event)) + await event.wait() + return event.data + + async def _process_write(self): + while True: + cmd = await self.write_command_queue.get() + self.current_write_command = cmd + + seg_start_offset = cmd.address % self.seg_byte_lanes + seg_end_offset = ((cmd.address + len(cmd.data) - 1) % self.seg_byte_lanes) + 1 + + seg_be_start = (self.seg_be_mask << seg_start_offset) & self.seg_be_mask + seg_be_end = self.seg_be_mask >> (self.seg_byte_lanes - seg_end_offset) + + first_seg = (cmd.address // self.seg_byte_lanes) % self.seg_count + segments = (len(cmd.data) + (cmd.address % self.seg_byte_lanes) + self.seg_byte_lanes-1) // self.seg_byte_lanes + + resp_cmd = WriteRespCmd(cmd.address, len(cmd.data), segments, first_seg, cmd.event) + await self.int_write_resp_command_queue.put(resp_cmd) + + offset = 0 + + if self.log.isEnabledFor(logging.INFO): + self.log.info("Write start addr: 0x%08x data: %s", + cmd.address, ' '.join((f'{c:02x}' for c in cmd.data))) + + seg = first_seg + for k in range(segments): + start = 0 + stop = self.seg_byte_lanes + be = self.seg_be_mask + + if k == 0: + start = seg_start_offset + be &= seg_be_start + if k == segments-1: + stop = seg_end_offset + be &= seg_be_end + + val = 0 + for j in range(start, stop): + val |= cmd.data[offset] << j*8 + offset += 1 + + op = SegWriteData() + op.addr = (cmd.address + k*self.seg_byte_lanes) // self.byte_lanes + op.data = val + op.be = be + + await self.seg_write_queue[seg].put(op) + + seg = (seg + 1) % self.seg_count + + self.current_write_command = None + + async def _process_write_resp(self): + while True: + cmd = await self.int_write_resp_command_queue.get() + self.current_write_resp_command = cmd + + seg = cmd.first_seg + for k in range(cmd.segments): + await self.seg_write_resp_queue[seg].get() + + seg = (seg + 1) % self.seg_count + + if self.log.isEnabledFor(logging.INFO): + self.log.info("Write complete addr: 0x%08x length: %d", cmd.address, cmd.length) + + write_resp = WriteResp(cmd.address, cmd.length) + + cmd.event.set(write_resp) + + self.current_write_resp_command = None + + self.in_flight_operations -= 1 + + if self.in_flight_operations == 0: + self._idle.set() + + async def _run(self): + cmd_valid = 0 + cmd_addr = 0 + cmd_data = 0 + cmd_be = 0 + + clock_edge_event = RisingEdge(self.clock) + + while True: + await clock_edge_event + + cmd_ready_sample = self.bus.wr_cmd_ready.value + done_sample = self.bus.wr_done.value + + if self.reset is not None and self.reset.value: + self.bus.wr_cmd_valid.setimmediatevalue(0) + continue + + # process segments + for seg in range(self.seg_count): + seg_mask = 1 << seg + + if (cmd_ready_sample & seg_mask) or not (cmd_valid & seg_mask): + if not self.seg_write_queue[seg].empty() and not self.pause: + op = await self.seg_write_queue[seg].get() + cmd_addr &= ~(self.seg_addr_mask << self.seg_addr_width*seg) + cmd_addr |= ((op.addr & self.seg_addr_mask) << self.seg_addr_width*seg) + cmd_data &= ~(self.seg_data_mask << self.seg_data_width*seg) + cmd_data |= ((op.data & self.seg_data_mask) << self.seg_data_width*seg) + cmd_be &= ~(self.seg_be_mask << self.seg_be_width*seg) + cmd_be |= ((op.be & self.seg_be_mask) << self.seg_be_width*seg) + cmd_valid |= seg_mask + + if self.log.isEnabledFor(logging.INFO): + self.log.info("Write word seg: %d addr: 0x%08x be 0x%02x data %s", + seg, op.addr, op.be, ' '.join((f'{c:02x}' for c in op.data.to_bytes(self.seg_byte_lanes, 'little')))) + else: + cmd_valid &= ~seg_mask + + if done_sample & seg_mask: + await self.seg_write_resp_queue[seg].put(None) + + self.bus.wr_cmd_valid.value = cmd_valid + self.bus.wr_cmd_addr.value = cmd_addr + self.bus.wr_cmd_data.value = cmd_data + self.bus.wr_cmd_be.value = cmd_be + + async def _run_pause(self): + clock_edge_event = RisingEdge(self.clock) + + for val in self._pause_generator: + self.pause = val + await clock_edge_event + + +class PsdpRamMasterRead(Region): + + def __init__(self, bus, clock, reset=None, **kwargs): + self.bus = bus + self.clock = clock + self.reset = reset + if bus._name: + self.log = logging.getLogger(f"cocotb.{bus._entity._name}.{bus._name}") + else: + self.log = logging.getLogger(f"cocotb.{bus._entity._name}") + + self.log.info("Parallel Simple Dual Port RAM master model (read)") + self.log.info("Copyright (c) 2020 Alex Forencich") + + self.pause = False + self._pause_generator = None + self._pause_cr = None + + self.in_flight_operations = 0 + self._idle = Event() + self._idle.set() + + self.width = len(self.bus.rd_resp_data) + self.byte_size = 8 + self.byte_lanes = self.width // self.byte_size + + self.seg_count = len(self.bus.rd_cmd_valid) + self.seg_data_width = self.width // self.seg_count + self.seg_byte_lanes = self.seg_data_width // self.byte_size + self.seg_addr_width = len(self.bus.rd_cmd_addr) // self.seg_count + + self.seg_data_mask = 2**self.seg_data_width-1 + self.seg_addr_mask = 2**self.seg_addr_width-1 + + self.address_width = self.seg_addr_width + (self.seg_byte_lanes*self.seg_count-1).bit_length() + + self.read_command_queue = Queue() + self.read_command_queue.queue_occupancy_limit = 2 + self.current_read_command = None + + self.seg_read_queue = [Queue() for x in range(self.seg_count)] + self.seg_read_resp_queue = [Queue() for x in range(self.seg_count)] + + self.int_read_resp_command_queue = Queue() + self.current_read_resp_command = None + + super().__init__(2**self.address_width, **kwargs) + + self.log.info("Parallel Simple Dual Port RAM master model configuration:") + self.log.info(" Address width: %d bits", self.address_width) + self.log.info(" Segment count: %d", self.seg_count) + self.log.info(" Segment addr width: %d bits", self.seg_addr_width) + self.log.info(" Segment data width: %d bits (%d bytes)", self.seg_data_width, self.seg_byte_lanes) + self.log.info(" Total data width: %d bits (%d bytes)", self.width, self.byte_lanes) + + self.bus.rd_cmd_valid.setimmediatevalue(0) + self.bus.rd_resp_ready.setimmediatevalue(0) + + cocotb.start_soon(self._process_read()) + cocotb.start_soon(self._process_read_resp()) + cocotb.start_soon(self._run()) + + def set_pause_generator(self, generator=None): + if self._pause_cr is not None: + self._pause_cr.kill() + self._pause_cr = None + + self._pause_generator = generator + + if self._pause_generator is not None: + self._pause_cr = cocotb.start_soon(self._run_pause()) + + def clear_pause_generator(self): + self.set_pause_generator(None) + + def idle(self): + return not self.in_flight_operations + + async def wait(self): + while not self.idle(): + await self._idle.wait() + + async def read(self, address, length): + if address < 0 or address >= 2**self.address_width: + raise ValueError("Address out of range") + + if length < 0: + raise ValueError("Read length must be positive") + + if address+length > 2**self.address_width: + raise ValueError("Requested transfer overruns end of address space") + + event = Event() + + self.in_flight_operations += 1 + self._idle.clear() + + await self.read_command_queue.put(ReadCmd(address, length, event)) + + await event.wait() + return event.data + + async def _process_read(self): + while True: + cmd = await self.read_command_queue.get() + self.current_read_command = cmd + + first_seg = (cmd.address // self.seg_byte_lanes) % self.seg_count + segments = (cmd.length + (cmd.address % self.seg_byte_lanes) + self.seg_byte_lanes-1) // self.seg_byte_lanes + + resp_cmd = ReadRespCmd(cmd.address, cmd.length, segments, first_seg, cmd.event) + await self.int_read_resp_command_queue.put(resp_cmd) + + if self.log.isEnabledFor(logging.INFO): + self.log.info("Read start addr: 0x%08x length: %d", cmd.address, cmd.length) + + seg = first_seg + for k in range(segments): + op = SegReadCmd() + op.addr = (cmd.address + k*self.seg_byte_lanes) // self.byte_lanes + + await self.seg_read_queue[seg].put(op) + + seg = (seg + 1) % self.seg_count + + self.current_read_command = None + + async def _process_read_resp(self): + while True: + cmd = await self.int_read_resp_command_queue.get() + self.current_read_resp_command = cmd + + seg_start_offset = cmd.address % self.seg_byte_lanes + seg_end_offset = ((cmd.address + cmd.length - 1) % self.seg_byte_lanes) + 1 + + data = bytearray() + + seg = cmd.first_seg + for k in range(cmd.segments): + seg_data = await self.seg_read_resp_queue[seg].get() + + start = 0 + stop = self.seg_byte_lanes + + if k == 0: + start = seg_start_offset + if k == cmd.segments-1: + stop = seg_end_offset + + for j in range(start, stop): + data.extend(bytearray([(seg_data >> j*8) & 0xff])) + + seg = (seg + 1) % self.seg_count + + if self.log.isEnabledFor(logging.INFO): + self.log.info("Read complete addr: 0x%08x data: %s", + cmd.address, ' '.join((f'{c:02x}' for c in data))) + + read_resp = ReadResp(cmd.address, bytes(data)) + + cmd.event.set(read_resp) + + self.current_read_resp_command = None + + self.in_flight_operations -= 1 + + if self.in_flight_operations == 0: + self._idle.set() + + async def _run(self): + cmd_valid = 0 + cmd_addr = 0 + resp_ready = 0 + + clock_edge_event = RisingEdge(self.clock) + + while True: + await clock_edge_event + + cmd_ready_sample = self.bus.rd_cmd_ready.value + resp_valid_sample = self.bus.rd_resp_valid.value + + if resp_valid_sample: + resp_data_sample = self.bus.rd_resp_data.value + + if self.reset is not None and self.reset.value: + self.bus.rd_cmd_valid.setimmediatevalue(0) + self.bus.rd_resp_ready.setimmediatevalue(0) + cmd_valid = 0 + resp_ready = 0 + continue + + # process segments + for seg in range(self.seg_count): + seg_mask = 1 << seg + + if (cmd_ready_sample & seg_mask) or not (cmd_valid & seg_mask): + if not self.seg_read_queue[seg].empty() and not self.pause: + op = await self.seg_read_queue[seg].get() + cmd_addr &= ~(self.seg_addr_mask << self.seg_addr_width*seg) + cmd_addr |= ((op.addr & self.seg_addr_mask) << self.seg_addr_width*seg) + cmd_valid |= seg_mask + + if self.log.isEnabledFor(logging.INFO): + self.log.info("Read word seg: %d addr: 0x%08x", seg, op.addr) + else: + cmd_valid &= ~seg_mask + + if resp_ready & resp_valid_sample & (1 << seg): + seg_data = (resp_data_sample >> self.seg_data_width*seg) & self.seg_data_mask + + await self.seg_read_resp_queue[seg].put(seg_data) + + resp_ready = 2**self.seg_count-1 + + if self.pause: + resp_ready = 0 + + self.bus.rd_cmd_valid.value = cmd_valid + self.bus.rd_cmd_addr.value = cmd_addr + + self.bus.rd_resp_ready.value = resp_ready + + async def _run_pause(self): + clock_edge_event = RisingEdge(self.clock) + + for val in self._pause_generator: + self.pause = val + await clock_edge_event + + +class PsdpRamMaster(Region): + def __init__(self, bus, clock, reset=None, **kwargs): + self.write_if = None + self.read_if = None + + self.write_if = PsdpRamMasterWrite(bus.write, clock, reset) + self.read_if = PsdpRamMasterRead(bus.read, clock, reset) + + super().__init__(max(self.write_if.size, self.read_if.size), **kwargs) + + def init_read(self, address, length, event=None): + return self.read_if.init_read(address, length, event) + + def init_write(self, address, data, event=None): + return self.write_if.init_write(address, data, event) + + def idle(self): + return (not self.read_if or self.read_if.idle()) and (not self.write_if or self.write_if.idle()) + + async def wait(self): + while not self.idle(): + await self.write_if.wait() + await self.read_if.wait() + + async def wait_read(self): + await self.read_if.wait() + + async def wait_write(self): + await self.write_if.wait() + + async def read(self, address, length): + return await self.read_if.read(address, length) + + async def write(self, address, data): + return await self.write_if.write(address, data) + + class PsdpRamWrite(Memory): def __init__(self, bus, clock, reset=None, size=1024, mem=None, *args, **kwargs): @@ -116,7 +679,7 @@ class PsdpRamWrite(Memory): self.log.info(" Segment count: %d", self.seg_count) self.log.info(" Segment addr width: %d bits", self.seg_addr_width) self.log.info(" Segment data width: %d bits (%d bytes)", self.seg_data_width, self.seg_byte_lanes) - self.log.info(" Total data width: %d bits (%d bytes)", self.width, self.width // self.byte_size) + self.log.info(" Total data width: %d bits (%d bytes)", self.width, self.byte_lanes) assert self.seg_be_width*self.seg_count == len(self.bus.wr_cmd_be) @@ -169,15 +732,30 @@ class PsdpRamWrite(Memory): addr = (seg_addr*self.seg_count+seg)*self.seg_byte_lanes - self.mem.seek(addr % self.size) + # generate operation list + offset = 0 + start_offset = None + write_ops = [] data = seg_data.to_bytes(self.seg_byte_lanes, 'little') - for i in range(self.seg_byte_lanes): + for i in range(self.byte_lanes): if seg_be & (1 << i): - self.mem.write(data[i:i+1]) + if start_offset is None: + start_offset = offset else: - self.mem.seek(1, 1) + if start_offset is not None and offset != start_offset: + write_ops.append((addr+start_offset, data[start_offset:offset])) + start_offset = None + + offset += 1 + + if start_offset is not None and offset != start_offset: + write_ops.append((addr+start_offset, data[start_offset:offset])) + + # perform writes + for addr, data in write_ops: + self.write(addr, data) wr_done |= 1 << seg @@ -234,7 +812,7 @@ class PsdpRamRead(Memory): self.log.info(" Segment count: %d", self.seg_count) self.log.info(" Segment addr width: %d bits", self.seg_addr_width) self.log.info(" Segment data width: %d bits (%d bytes)", self.seg_data_width, self.seg_byte_lanes) - self.log.info(" Total data width: %d bits (%d bytes)", self.width, self.width // self.byte_size) + self.log.info(" Total data width: %d bits (%d bytes)", self.width, self.byte_lanes) self.bus.rd_cmd_ready.setimmediatevalue(0) self.bus.rd_resp_valid.setimmediatevalue(0) @@ -303,9 +881,7 @@ class PsdpRamRead(Memory): addr = (seg_addr*self.seg_count+seg)*self.seg_byte_lanes - self.mem.seek(addr % self.size) - - data = self.mem.read(self.seg_byte_lanes) + data = self.read(addr % self.size, self.seg_byte_lanes) pipeline[seg][0] = int.from_bytes(data, 'little') self.log.info("Read word seg: %d addr: 0x%08x data %s", diff --git a/fpga/lib/pcie/tb/dma_psdpram/Makefile b/fpga/lib/pcie/tb/dma_psdpram/Makefile new file mode 100644 index 000000000..a9292be17 --- /dev/null +++ b/fpga/lib/pcie/tb/dma_psdpram/Makefile @@ -0,0 +1,73 @@ +# Copyright (c) 2023 Alex Forencich +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +TOPLEVEL_LANG = verilog + +SIM ?= icarus +WAVES ?= 0 + +COCOTB_HDL_TIMEUNIT = 1ns +COCOTB_HDL_TIMEPRECISION = 1ps + +DUT = dma_psdpram +TOPLEVEL = $(DUT) +MODULE = test_$(DUT) +VERILOG_SOURCES += ../../rtl/$(DUT).v + +# module parameters +export PARAM_SIZE := 65536 +export PARAM_SEG_COUNT := 2 +export PARAM_SEG_DATA_WIDTH := 32 +export PARAM_SEG_BE_WIDTH := $(shell expr $(PARAM_SEG_DATA_WIDTH) / 8 ) +export PARAM_SEG_ADDR_WIDTH := $(shell python -c "print(($(PARAM_SIZE)//($(PARAM_SEG_COUNT)*$(PARAM_SEG_BE_WIDTH))-1).bit_length())"), +export PARAM_PIPELINE := 2 + +ifeq ($(SIM), icarus) + PLUSARGS += -fst + + COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(TOPLEVEL).$(subst PARAM_,,$(v))=$($(v))) + + ifeq ($(WAVES), 1) + VERILOG_SOURCES += iverilog_dump.v + COMPILE_ARGS += -s iverilog_dump + endif +else ifeq ($(SIM), verilator) + COMPILE_ARGS += -Wno-SELRANGE -Wno-WIDTH + + COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v))) + + ifeq ($(WAVES), 1) + COMPILE_ARGS += --trace-fst + endif +endif + +include $(shell cocotb-config --makefiles)/Makefile.sim + +iverilog_dump.v: + echo 'module iverilog_dump();' > $@ + echo 'initial begin' >> $@ + echo ' $$dumpfile("$(TOPLEVEL).fst");' >> $@ + echo ' $$dumpvars(0, $(TOPLEVEL));' >> $@ + echo 'end' >> $@ + echo 'endmodule' >> $@ + +clean:: + @rm -rf iverilog_dump.v + @rm -rf dump.fst $(TOPLEVEL).fst diff --git a/fpga/lib/pcie/tb/dma_psdpram/dma_psdp_ram.py b/fpga/lib/pcie/tb/dma_psdpram/dma_psdp_ram.py new file mode 120000 index 000000000..6613351ee --- /dev/null +++ b/fpga/lib/pcie/tb/dma_psdpram/dma_psdp_ram.py @@ -0,0 +1 @@ +../dma_psdp_ram.py \ No newline at end of file diff --git a/fpga/lib/pcie/tb/dma_psdpram/test_dma_psdpram.py b/fpga/lib/pcie/tb/dma_psdpram/test_dma_psdpram.py new file mode 100644 index 000000000..77594a555 --- /dev/null +++ b/fpga/lib/pcie/tb/dma_psdpram/test_dma_psdpram.py @@ -0,0 +1,229 @@ +#!/usr/bin/env python +""" + +Copyright (c) 2023 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +""" + +import itertools +import logging +import os +import random +import sys + +import cocotb_test.simulator +import pytest + +import cocotb +from cocotb.clock import Clock +from cocotb.triggers import RisingEdge, Timer +from cocotb.regression import TestFactory + +try: + from dma_psdp_ram import PsdpRamMaster, PsdpRamBus +except ImportError: + # attempt import from current directory + sys.path.insert(0, os.path.join(os.path.dirname(__file__))) + try: + from dma_psdp_ram import PsdpRamMaster, PsdpRamBus + finally: + del sys.path[0] + + +class TB(object): + def __init__(self, dut): + self.dut = dut + + self.log = logging.getLogger("cocotb.tb") + self.log.setLevel(logging.DEBUG) + + cocotb.start_soon(Clock(dut.clk, 10, units="ns").start()) + + # DMA RAM + self.dma_ram_master = PsdpRamMaster(PsdpRamBus.from_entity(dut), dut.clk, dut.rst) + + def set_idle_generator(self, generator=None): + if generator: + self.dma_ram_master.write_if.set_pause_generator(generator()) + self.dma_ram_master.read_if.set_pause_generator(generator()) + + def set_backpressure_generator(self, generator=None): + if generator: + pass + + async def cycle_reset(self): + self.dut.rst.setimmediatevalue(0) + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + self.dut.rst.value = 1 + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + self.dut.rst.value = 0 + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + + +async def run_test_write(dut, data_in=None, idle_inserter=None, backpressure_inserter=None, size=None): + + tb = TB(dut) + + byte_lanes = tb.dma_ram_master.write_if.byte_lanes + + await tb.cycle_reset() + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + for length in list(range(1, byte_lanes*2))+[1024]: + for offset in list(range(byte_lanes, byte_lanes*2))+list(range(4096-byte_lanes, 4096)): + tb.log.info("length %d, offset %d", length, offset) + addr = offset+0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + + await tb.dma_ram_master.write(addr-4, b'\xaa'*(length+8)) + + await tb.dma_ram_master.write(addr, test_data) + + data = await tb.dma_ram_master.read(addr-1, length+2) + + assert data.data == b'\xaa'+test_data+b'\xaa' + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +async def run_test_read(dut, data_in=None, idle_inserter=None, backpressure_inserter=None, size=None): + + tb = TB(dut) + + byte_lanes = tb.dma_ram_master.write_if.byte_lanes + + await tb.cycle_reset() + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + for length in list(range(1, byte_lanes*2))+[1024]: + for offset in list(range(byte_lanes, byte_lanes*2))+list(range(4096-byte_lanes, 4096)): + tb.log.info("length %d, offset %d", length, offset) + addr = offset+0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + + await tb.dma_ram_master.write(addr, test_data) + + data = await tb.dma_ram_master.read(addr, length) + + assert data.data == test_data + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +async def run_stress_test(dut, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + await tb.cycle_reset() + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + async def worker(master, offset, aperture, count=16): + for k in range(count): + length = random.randint(1, min(512, aperture)) + addr = offset+random.randint(0, aperture-length) + test_data = bytearray([x % 256 for x in range(length)]) + + await Timer(random.randint(1, 100), 'ns') + + await master.write(addr, test_data) + + await Timer(random.randint(1, 100), 'ns') + + data = await master.read(addr, length) + assert data.data == test_data + + workers = [] + + for k in range(16): + workers.append(cocotb.start_soon(worker(tb.dma_ram_master, k*0x1000, 0x1000, count=16))) + + while workers: + await workers.pop(0).join() + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +def cycle_pause(): + return itertools.cycle([1, 1, 1, 0]) + + +if cocotb.SIM_NAME: + + for test in [run_test_write, run_test_read, run_stress_test]: + + factory = TestFactory(test) + factory.add_option("idle_inserter", [None, cycle_pause]) + factory.add_option("backpressure_inserter", [None, cycle_pause]) + factory.generate_tests() + + +# cocotb-test + +tests_dir = os.path.dirname(__file__) +rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl')) + + +@pytest.mark.parametrize("seg_data_width", [32, 64]) +@pytest.mark.parametrize("seg_count", [2, 4]) +def test_dma_psdpram(request, seg_data_width, seg_count): + dut = "dma_psdpram" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = dut + + verilog_sources = [ + os.path.join(rtl_dir, f"{dut}.v"), + ] + + parameters = {} + + parameters['SIZE'] = 65536 + parameters['SEG_COUNT'] = seg_count + parameters['SEG_DATA_WIDTH'] = seg_data_width + parameters['SEG_BE_WIDTH'] = parameters['SEG_DATA_WIDTH'] // 8 + parameters['SEG_ADDR_WIDTH'] = (parameters['SIZE']//(parameters['SEG_COUNT']*parameters['SEG_BE_WIDTH'])-1).bit_length() + parameters['PIPELINE'] = 2 + + extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} + + sim_build = os.path.join(tests_dir, "sim_build", + request.node.name.replace('[', '-').replace(']', '')) + + cocotb_test.simulator.run( + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + parameters=parameters, + sim_build=sim_build, + extra_env=extra_env, + ) diff --git a/fpga/lib/pcie/tb/dma_psdpram_async/Makefile b/fpga/lib/pcie/tb/dma_psdpram_async/Makefile new file mode 100644 index 000000000..4278e81fc --- /dev/null +++ b/fpga/lib/pcie/tb/dma_psdpram_async/Makefile @@ -0,0 +1,73 @@ +# Copyright (c) 2023 Alex Forencich +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +TOPLEVEL_LANG = verilog + +SIM ?= icarus +WAVES ?= 0 + +COCOTB_HDL_TIMEUNIT = 1ns +COCOTB_HDL_TIMEPRECISION = 1ps + +DUT = dma_psdpram_async +TOPLEVEL = $(DUT) +MODULE = test_$(DUT) +VERILOG_SOURCES += ../../rtl/$(DUT).v + +# module parameters +export PARAM_SIZE := 65536 +export PARAM_SEG_COUNT := 2 +export PARAM_SEG_DATA_WIDTH := 32 +export PARAM_SEG_BE_WIDTH := $(shell expr $(PARAM_SEG_DATA_WIDTH) / 8 ) +export PARAM_SEG_ADDR_WIDTH := $(shell python -c "print(($(PARAM_SIZE)//($(PARAM_SEG_COUNT)*$(PARAM_SEG_BE_WIDTH))-1).bit_length())"), +export PARAM_PIPELINE := 2 + +ifeq ($(SIM), icarus) + PLUSARGS += -fst + + COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-P $(TOPLEVEL).$(subst PARAM_,,$(v))=$($(v))) + + ifeq ($(WAVES), 1) + VERILOG_SOURCES += iverilog_dump.v + COMPILE_ARGS += -s iverilog_dump + endif +else ifeq ($(SIM), verilator) + COMPILE_ARGS += -Wno-SELRANGE -Wno-WIDTH + + COMPILE_ARGS += $(foreach v,$(filter PARAM_%,$(.VARIABLES)),-G$(subst PARAM_,,$(v))=$($(v))) + + ifeq ($(WAVES), 1) + COMPILE_ARGS += --trace-fst + endif +endif + +include $(shell cocotb-config --makefiles)/Makefile.sim + +iverilog_dump.v: + echo 'module iverilog_dump();' > $@ + echo 'initial begin' >> $@ + echo ' $$dumpfile("$(TOPLEVEL).fst");' >> $@ + echo ' $$dumpvars(0, $(TOPLEVEL));' >> $@ + echo 'end' >> $@ + echo 'endmodule' >> $@ + +clean:: + @rm -rf iverilog_dump.v + @rm -rf dump.fst $(TOPLEVEL).fst diff --git a/fpga/lib/pcie/tb/dma_psdpram_async/dma_psdp_ram.py b/fpga/lib/pcie/tb/dma_psdpram_async/dma_psdp_ram.py new file mode 120000 index 000000000..6613351ee --- /dev/null +++ b/fpga/lib/pcie/tb/dma_psdpram_async/dma_psdp_ram.py @@ -0,0 +1 @@ +../dma_psdp_ram.py \ No newline at end of file diff --git a/fpga/lib/pcie/tb/dma_psdpram_async/test_dma_psdpram_async.py b/fpga/lib/pcie/tb/dma_psdpram_async/test_dma_psdpram_async.py new file mode 100644 index 000000000..08885814f --- /dev/null +++ b/fpga/lib/pcie/tb/dma_psdpram_async/test_dma_psdpram_async.py @@ -0,0 +1,234 @@ +#!/usr/bin/env python +""" + +Copyright (c) 2023 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +""" + +import itertools +import logging +import os +import random +import sys + +import cocotb_test.simulator +import pytest + +import cocotb +from cocotb.clock import Clock +from cocotb.triggers import RisingEdge, Timer +from cocotb.regression import TestFactory + +try: + from dma_psdp_ram import PsdpRamMasterWrite, PsdpRamMasterRead, PsdpRamWriteBus, PsdpRamReadBus +except ImportError: + # attempt import from current directory + sys.path.insert(0, os.path.join(os.path.dirname(__file__))) + try: + from dma_psdp_ram import PsdpRamMasterWrite, PsdpRamMasterRead, PsdpRamWriteBus, PsdpRamReadBus + finally: + del sys.path[0] + + +class TB(object): + def __init__(self, dut): + self.dut = dut + + self.log = logging.getLogger("cocotb.tb") + self.log.setLevel(logging.DEBUG) + + cocotb.start_soon(Clock(dut.clk_wr, 10, units="ns").start()) + cocotb.start_soon(Clock(dut.clk_rd, 11, units="ns").start()) + + # DMA RAM + self.dma_ram_master_wr = PsdpRamMasterWrite(PsdpRamWriteBus.from_entity(dut), dut.clk_wr, dut.rst_wr) + self.dma_ram_master_rd = PsdpRamMasterRead(PsdpRamReadBus.from_entity(dut), dut.clk_rd, dut.rst_rd) + + def set_idle_generator(self, generator=None): + if generator: + self.dma_ram_master_wr.set_pause_generator(generator()) + self.dma_ram_master_rd.set_pause_generator(generator()) + + def set_backpressure_generator(self, generator=None): + if generator: + pass + + async def cycle_reset(self): + self.dut.rst_wr.setimmediatevalue(0) + self.dut.rst_rd.setimmediatevalue(0) + await RisingEdge(self.dut.clk_wr) + await RisingEdge(self.dut.clk_wr) + self.dut.rst_wr.value = 1 + self.dut.rst_rd.value = 1 + await RisingEdge(self.dut.clk_wr) + await RisingEdge(self.dut.clk_wr) + self.dut.rst_wr.value = 0 + self.dut.rst_rd.value = 0 + await RisingEdge(self.dut.clk_wr) + await RisingEdge(self.dut.clk_wr) + + +async def run_test_write(dut, data_in=None, idle_inserter=None, backpressure_inserter=None, size=None): + + tb = TB(dut) + + byte_lanes = tb.dma_ram_master_wr.byte_lanes + + await tb.cycle_reset() + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + for length in list(range(1, byte_lanes*2))+[1024]: + for offset in list(range(byte_lanes, byte_lanes*2))+list(range(4096-byte_lanes, 4096)): + tb.log.info("length %d, offset %d", length, offset) + addr = offset+0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + + await tb.dma_ram_master_wr.write(addr-4, b'\xaa'*(length+8)) + + await tb.dma_ram_master_wr.write(addr, test_data) + + data = await tb.dma_ram_master_rd.read(addr-1, length+2) + + assert data.data == b'\xaa'+test_data+b'\xaa' + + await RisingEdge(dut.clk_wr) + await RisingEdge(dut.clk_wr) + + +async def run_test_read(dut, data_in=None, idle_inserter=None, backpressure_inserter=None, size=None): + + tb = TB(dut) + + byte_lanes = tb.dma_ram_master_wr.byte_lanes + + await tb.cycle_reset() + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + for length in list(range(1, byte_lanes*2))+[1024]: + for offset in list(range(byte_lanes, byte_lanes*2))+list(range(4096-byte_lanes, 4096)): + tb.log.info("length %d, offset %d", length, offset) + addr = offset+0x1000 + test_data = bytearray([x % 256 for x in range(length)]) + + await tb.dma_ram_master_wr.write(addr, test_data) + + data = await tb.dma_ram_master_rd.read(addr, length) + + assert data.data == test_data + + await RisingEdge(dut.clk_wr) + await RisingEdge(dut.clk_wr) + + +async def run_stress_test(dut, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + await tb.cycle_reset() + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + async def worker(master_wr, master_rd, offset, aperture, count=16): + for k in range(count): + length = random.randint(1, min(512, aperture)) + addr = offset+random.randint(0, aperture-length) + test_data = bytearray([x % 256 for x in range(length)]) + + await Timer(random.randint(1, 100), 'ns') + + await master_wr.write(addr, test_data) + + await Timer(random.randint(1, 100), 'ns') + + data = await master_rd.read(addr, length) + assert data.data == test_data + + workers = [] + + for k in range(16): + workers.append(cocotb.start_soon(worker(tb.dma_ram_master_wr, tb.dma_ram_master_rd, k*0x1000, 0x1000, count=16))) + + while workers: + await workers.pop(0).join() + + await RisingEdge(dut.clk_wr) + await RisingEdge(dut.clk_wr) + + +def cycle_pause(): + return itertools.cycle([1, 1, 1, 0]) + + +if cocotb.SIM_NAME: + + for test in [run_test_write, run_test_read, run_stress_test]: + + factory = TestFactory(test) + factory.add_option("idle_inserter", [None, cycle_pause]) + factory.add_option("backpressure_inserter", [None, cycle_pause]) + factory.generate_tests() + + +# cocotb-test + +tests_dir = os.path.dirname(__file__) +rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl')) + + +@pytest.mark.parametrize("seg_data_width", [32, 64]) +@pytest.mark.parametrize("seg_count", [2, 4]) +def test_dma_psdpram_async(request, seg_data_width, seg_count): + dut = "dma_psdpram_async" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = dut + + verilog_sources = [ + os.path.join(rtl_dir, f"{dut}.v"), + ] + + parameters = {} + + parameters['SIZE'] = 65536 + parameters['SEG_COUNT'] = seg_count + parameters['SEG_DATA_WIDTH'] = seg_data_width + parameters['SEG_BE_WIDTH'] = parameters['SEG_DATA_WIDTH'] // 8 + parameters['SEG_ADDR_WIDTH'] = (parameters['SIZE']//(parameters['SEG_COUNT']*parameters['SEG_BE_WIDTH'])-1).bit_length() + parameters['PIPELINE'] = 2 + + extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} + + sim_build = os.path.join(tests_dir, "sim_build", + request.node.name.replace('[', '-').replace(']', '')) + + cocotb_test.simulator.run( + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + parameters=parameters, + sim_build=sim_build, + extra_env=extra_env, + ) diff --git a/fpga/lib/pcie/tb/pcie_if.py b/fpga/lib/pcie/tb/pcie_if.py index 7a6010a54..07a0b3a12 100644 --- a/fpga/lib/pcie/tb/pcie_if.py +++ b/fpga/lib/pcie/tb/pcie_if.py @@ -789,6 +789,7 @@ class PcieIfDevice(Device): cfg_max_payload=None, cfg_max_read_req=None, cfg_ext_tag_enable=None, + cfg_rcb=None, # Flow control tx_fc_ph_av=None, @@ -921,6 +922,7 @@ class PcieIfDevice(Device): self.cfg_max_payload = init_signal(cfg_max_payload, 3, 0) self.cfg_max_read_req = init_signal(cfg_max_read_req, 3, 0) self.cfg_ext_tag_enable = init_signal(cfg_ext_tag_enable, 1, 0) + self.cfg_rcb = init_signal(cfg_rcb, 1, 0) # Flow control self.tx_fc_ph_av = init_signal(tx_fc_ph_av, 8, 0) @@ -1261,6 +1263,8 @@ class PcieIfDevice(Device): self.cfg_max_read_req.value = self.functions[0].pcie_cap.max_read_request_size if self.cfg_ext_tag_enable is not None: self.cfg_ext_tag_enable.value = self.functions[0].pcie_cap.extended_tag_field_enable + if self.cfg_rcb is not None: + self.cfg_rcb.value = self.functions[0].pcie_cap.read_completion_boundary async def _run_fc_logic(self): clock_edge_event = RisingEdge(self.clk) diff --git a/fpga/lib/pcie/tox.ini b/fpga/lib/pcie/tox.ini index d82b140e5..897b5a325 100644 --- a/fpga/lib/pcie/tox.ini +++ b/fpga/lib/pcie/tox.ini @@ -17,7 +17,7 @@ deps = cocotb == 1.7.2 cocotb-bus == 0.2.1 cocotb-test == 0.2.4 - cocotbext-axi == 0.1.20 + cocotbext-axi == 0.1.24 cocotbext-pcie == 0.2.12 jinja2 == 3.1.2