mirror of
https://github.com/corundum/corundum.git
synced 2025-01-16 08:12:53 +08:00
merged changes in pcie
This commit is contained in:
commit
045b0c1c68
@ -54,7 +54,7 @@ module fpga (
|
||||
parameter AXIS_PCIE_DATA_WIDTH = 512;
|
||||
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
|
||||
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
|
||||
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
|
||||
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
|
||||
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;
|
||||
|
@ -159,8 +159,8 @@ example_core_pcie_us #(
|
||||
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
|
||||
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
|
||||
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.READ_CPLH_FC_LIMIT(128),
|
||||
.READ_CPLD_FC_LIMIT(2048),
|
||||
.READ_CPLH_FC_LIMIT(256),
|
||||
.READ_CPLD_FC_LIMIT(2048-256),
|
||||
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.BAR0_APERTURE(BAR0_APERTURE),
|
||||
@ -263,8 +263,7 @@ example_core_pcie_us_inst (
|
||||
*/
|
||||
.cfg_max_read_req(cfg_max_read_req),
|
||||
.cfg_max_payload(cfg_max_payload),
|
||||
// .cfg_rcb_status(cfg_rcb_status),
|
||||
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
|
||||
.cfg_rcb_status(cfg_rcb_status),
|
||||
|
||||
/*
|
||||
* Status
|
||||
|
@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
|
||||
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
|
||||
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
|
||||
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
|
||||
export PARAM_RQ_SEQ_NUM_WIDTH := 6
|
||||
|
||||
ifeq ($(SIM), icarus)
|
||||
PLUSARGS += -fst
|
||||
|
@ -396,7 +396,6 @@ def test_fpga_core(request):
|
||||
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
|
||||
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
|
||||
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
|
||||
parameters['RQ_SEQ_NUM_WIDTH'] = 6
|
||||
|
||||
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
|
||||
|
||||
|
@ -53,7 +53,7 @@ module fpga (
|
||||
parameter AXIS_PCIE_DATA_WIDTH = 512;
|
||||
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
|
||||
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
|
||||
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
|
||||
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
|
||||
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;
|
||||
|
@ -156,8 +156,8 @@ example_core_pcie_us #(
|
||||
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
|
||||
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
|
||||
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.READ_CPLH_FC_LIMIT(128),
|
||||
.READ_CPLD_FC_LIMIT(2048),
|
||||
.READ_CPLH_FC_LIMIT(256),
|
||||
.READ_CPLD_FC_LIMIT(2048-256),
|
||||
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.BAR0_APERTURE(BAR0_APERTURE),
|
||||
@ -260,8 +260,7 @@ example_core_pcie_us_inst (
|
||||
*/
|
||||
.cfg_max_read_req(cfg_max_read_req),
|
||||
.cfg_max_payload(cfg_max_payload),
|
||||
// .cfg_rcb_status(cfg_rcb_status),
|
||||
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
|
||||
.cfg_rcb_status(cfg_rcb_status),
|
||||
|
||||
/*
|
||||
* Status
|
||||
|
@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
|
||||
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
|
||||
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
|
||||
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
|
||||
export PARAM_RQ_SEQ_NUM_WIDTH := 6
|
||||
|
||||
ifeq ($(SIM), icarus)
|
||||
PLUSARGS += -fst
|
||||
|
@ -398,7 +398,6 @@ def test_fpga_core(request):
|
||||
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
|
||||
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
|
||||
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
|
||||
parameters['RQ_SEQ_NUM_WIDTH'] = 6
|
||||
|
||||
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
|
||||
|
||||
|
@ -53,7 +53,7 @@ module fpga (
|
||||
parameter AXIS_PCIE_DATA_WIDTH = 512;
|
||||
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
|
||||
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
|
||||
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
|
||||
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
|
||||
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;
|
||||
|
@ -156,8 +156,8 @@ example_core_pcie_us #(
|
||||
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
|
||||
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
|
||||
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.READ_CPLH_FC_LIMIT(128),
|
||||
.READ_CPLD_FC_LIMIT(2048),
|
||||
.READ_CPLH_FC_LIMIT(256),
|
||||
.READ_CPLD_FC_LIMIT(2048-256),
|
||||
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.BAR0_APERTURE(BAR0_APERTURE),
|
||||
@ -260,8 +260,7 @@ example_core_pcie_us_inst (
|
||||
*/
|
||||
.cfg_max_read_req(cfg_max_read_req),
|
||||
.cfg_max_payload(cfg_max_payload),
|
||||
// .cfg_rcb_status(cfg_rcb_status),
|
||||
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
|
||||
.cfg_rcb_status(cfg_rcb_status),
|
||||
|
||||
/*
|
||||
* Status
|
||||
|
@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
|
||||
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
|
||||
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
|
||||
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
|
||||
export PARAM_RQ_SEQ_NUM_WIDTH := 6
|
||||
|
||||
ifeq ($(SIM), icarus)
|
||||
PLUSARGS += -fst
|
||||
|
@ -398,7 +398,6 @@ def test_fpga_core(request):
|
||||
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
|
||||
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
|
||||
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
|
||||
parameters['RQ_SEQ_NUM_WIDTH'] = 6
|
||||
|
||||
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
|
||||
|
||||
|
@ -52,7 +52,7 @@ module fpga (
|
||||
parameter AXIS_PCIE_DATA_WIDTH = 512;
|
||||
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
|
||||
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
|
||||
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
|
||||
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
|
||||
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;
|
||||
|
@ -148,8 +148,8 @@ example_core_pcie_us #(
|
||||
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
|
||||
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
|
||||
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.READ_CPLH_FC_LIMIT(128),
|
||||
.READ_CPLD_FC_LIMIT(2048),
|
||||
.READ_CPLH_FC_LIMIT(256),
|
||||
.READ_CPLD_FC_LIMIT(2048-256),
|
||||
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.BAR0_APERTURE(BAR0_APERTURE),
|
||||
@ -252,8 +252,7 @@ example_core_pcie_us_inst (
|
||||
*/
|
||||
.cfg_max_read_req(cfg_max_read_req),
|
||||
.cfg_max_payload(cfg_max_payload),
|
||||
// .cfg_rcb_status(cfg_rcb_status),
|
||||
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
|
||||
.cfg_rcb_status(cfg_rcb_status),
|
||||
|
||||
/*
|
||||
* Status
|
||||
|
@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
|
||||
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
|
||||
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
|
||||
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
|
||||
export PARAM_RQ_SEQ_NUM_WIDTH := 6
|
||||
|
||||
ifeq ($(SIM), icarus)
|
||||
PLUSARGS += -fst
|
||||
|
@ -396,7 +396,6 @@ def test_fpga_core(request):
|
||||
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
|
||||
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
|
||||
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
|
||||
parameters['RQ_SEQ_NUM_WIDTH'] = 6
|
||||
|
||||
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
|
||||
|
||||
|
@ -55,7 +55,7 @@ module fpga (
|
||||
parameter AXIS_PCIE_DATA_WIDTH = 512;
|
||||
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
|
||||
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
|
||||
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
|
||||
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
|
||||
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;
|
||||
|
@ -159,8 +159,8 @@ example_core_pcie_us #(
|
||||
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
|
||||
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
|
||||
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.READ_CPLH_FC_LIMIT(128),
|
||||
.READ_CPLD_FC_LIMIT(2048),
|
||||
.READ_CPLH_FC_LIMIT(256),
|
||||
.READ_CPLD_FC_LIMIT(2048-256),
|
||||
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.BAR0_APERTURE(BAR0_APERTURE),
|
||||
@ -263,8 +263,7 @@ example_core_pcie_us_inst (
|
||||
*/
|
||||
.cfg_max_read_req(cfg_max_read_req),
|
||||
.cfg_max_payload(cfg_max_payload),
|
||||
// .cfg_rcb_status(cfg_rcb_status),
|
||||
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
|
||||
.cfg_rcb_status(cfg_rcb_status),
|
||||
|
||||
/*
|
||||
* Status
|
||||
|
@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
|
||||
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
|
||||
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
|
||||
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
|
||||
export PARAM_RQ_SEQ_NUM_WIDTH := 6
|
||||
|
||||
ifeq ($(SIM), icarus)
|
||||
PLUSARGS += -fst
|
||||
|
@ -396,7 +396,6 @@ def test_fpga_core(request):
|
||||
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
|
||||
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
|
||||
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
|
||||
parameters['RQ_SEQ_NUM_WIDTH'] = 6
|
||||
|
||||
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
|
||||
|
||||
|
@ -155,7 +155,7 @@ example_core_pcie_us #(
|
||||
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
|
||||
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.READ_CPLH_FC_LIMIT(64),
|
||||
.READ_CPLD_FC_LIMIT(992),
|
||||
.READ_CPLD_FC_LIMIT(1024-64),
|
||||
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.BAR0_APERTURE(BAR0_APERTURE),
|
||||
@ -258,8 +258,7 @@ example_core_pcie_us_inst (
|
||||
*/
|
||||
.cfg_max_read_req(cfg_max_read_req),
|
||||
.cfg_max_payload(cfg_max_payload),
|
||||
// .cfg_rcb_status(cfg_rcb_status),
|
||||
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US PCIe HIP
|
||||
.cfg_rcb_status(cfg_rcb_status),
|
||||
|
||||
/*
|
||||
* Status
|
||||
|
@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := 60
|
||||
export PARAM_AXIS_PCIE_RC_USER_WIDTH := 75
|
||||
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := 85
|
||||
export PARAM_AXIS_PCIE_CC_USER_WIDTH := 33
|
||||
export PARAM_RQ_SEQ_NUM_WIDTH := 4
|
||||
|
||||
ifeq ($(SIM), icarus)
|
||||
PLUSARGS += -fst
|
||||
|
@ -370,7 +370,6 @@ def test_fpga_core(request):
|
||||
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75
|
||||
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 85
|
||||
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33
|
||||
parameters['RQ_SEQ_NUM_WIDTH'] = 4
|
||||
|
||||
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
|
||||
|
||||
|
@ -54,7 +54,7 @@ module fpga (
|
||||
parameter AXIS_PCIE_DATA_WIDTH = 256;
|
||||
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
|
||||
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
|
||||
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
|
||||
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
|
||||
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;
|
||||
|
@ -159,8 +159,8 @@ example_core_pcie_us #(
|
||||
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
|
||||
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
|
||||
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.READ_CPLH_FC_LIMIT(128),
|
||||
.READ_CPLD_FC_LIMIT(2048),
|
||||
.READ_CPLH_FC_LIMIT(256),
|
||||
.READ_CPLD_FC_LIMIT(2048-256),
|
||||
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.BAR0_APERTURE(BAR0_APERTURE),
|
||||
@ -263,8 +263,7 @@ example_core_pcie_us_inst (
|
||||
*/
|
||||
.cfg_max_read_req(cfg_max_read_req),
|
||||
.cfg_max_payload(cfg_max_payload),
|
||||
// .cfg_rcb_status(cfg_rcb_status),
|
||||
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
|
||||
.cfg_rcb_status(cfg_rcb_status),
|
||||
|
||||
/*
|
||||
* Status
|
||||
|
@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
|
||||
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
|
||||
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
|
||||
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
|
||||
export PARAM_RQ_SEQ_NUM_WIDTH := 6
|
||||
|
||||
ifeq ($(SIM), icarus)
|
||||
PLUSARGS += -fst
|
||||
|
@ -396,7 +396,6 @@ def test_fpga_core(request):
|
||||
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
|
||||
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
|
||||
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
|
||||
parameters['RQ_SEQ_NUM_WIDTH'] = 6
|
||||
|
||||
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
|
||||
|
||||
|
@ -157,7 +157,7 @@ example_core_pcie_us #(
|
||||
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
|
||||
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.READ_CPLH_FC_LIMIT(64),
|
||||
.READ_CPLD_FC_LIMIT(992),
|
||||
.READ_CPLD_FC_LIMIT(1024-64),
|
||||
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.BAR0_APERTURE(BAR0_APERTURE),
|
||||
@ -260,8 +260,7 @@ example_core_pcie_us_inst (
|
||||
*/
|
||||
.cfg_max_read_req(cfg_max_read_req),
|
||||
.cfg_max_payload(cfg_max_payload),
|
||||
// .cfg_rcb_status(cfg_rcb_status),
|
||||
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US PCIe HIP
|
||||
.cfg_rcb_status(cfg_rcb_status),
|
||||
|
||||
/*
|
||||
* Status
|
||||
|
@ -57,6 +57,10 @@ module fpga (
|
||||
|
||||
parameter AXIS_PCIE_DATA_WIDTH = 256;
|
||||
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
|
||||
parameter AXIS_PCIE_RC_USER_WIDTH = 75;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = 60;
|
||||
parameter AXIS_PCIE_CQ_USER_WIDTH = 85;
|
||||
parameter AXIS_PCIE_CC_USER_WIDTH = 33;
|
||||
|
||||
// Clock and reset
|
||||
wire pcie_user_clk;
|
||||
@ -107,33 +111,33 @@ ibufds_gte3_pcie_mgt_refclk_inst (
|
||||
.ODIV2 (pcie_sys_clk)
|
||||
);
|
||||
|
||||
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_rq_tdata;
|
||||
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_rq_tkeep;
|
||||
wire axis_rq_tlast;
|
||||
wire axis_rq_tready;
|
||||
wire [59:0] axis_rq_tuser;
|
||||
wire axis_rq_tvalid;
|
||||
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_rq_tdata;
|
||||
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_rq_tkeep;
|
||||
wire axis_rq_tlast;
|
||||
wire axis_rq_tready;
|
||||
wire [AXIS_PCIE_RQ_USER_WIDTH-1:0] axis_rq_tuser;
|
||||
wire axis_rq_tvalid;
|
||||
|
||||
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_rc_tdata;
|
||||
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_rc_tkeep;
|
||||
wire axis_rc_tlast;
|
||||
wire axis_rc_tready;
|
||||
wire [74:0] axis_rc_tuser;
|
||||
wire axis_rc_tvalid;
|
||||
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_rc_tdata;
|
||||
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_rc_tkeep;
|
||||
wire axis_rc_tlast;
|
||||
wire axis_rc_tready;
|
||||
wire [AXIS_PCIE_RC_USER_WIDTH-1:0] axis_rc_tuser;
|
||||
wire axis_rc_tvalid;
|
||||
|
||||
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_cq_tdata;
|
||||
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_cq_tkeep;
|
||||
wire axis_cq_tlast;
|
||||
wire axis_cq_tready;
|
||||
wire [84:0] axis_cq_tuser;
|
||||
wire axis_cq_tvalid;
|
||||
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_cq_tdata;
|
||||
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_cq_tkeep;
|
||||
wire axis_cq_tlast;
|
||||
wire axis_cq_tready;
|
||||
wire [AXIS_PCIE_CQ_USER_WIDTH-1:0] axis_cq_tuser;
|
||||
wire axis_cq_tvalid;
|
||||
|
||||
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_cc_tdata;
|
||||
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_cc_tkeep;
|
||||
wire axis_cc_tlast;
|
||||
wire axis_cc_tready;
|
||||
wire [32:0] axis_cc_tuser;
|
||||
wire axis_cc_tvalid;
|
||||
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_cc_tdata;
|
||||
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_cc_tkeep;
|
||||
wire axis_cc_tlast;
|
||||
wire axis_cc_tready;
|
||||
wire [AXIS_PCIE_CC_USER_WIDTH-1:0] axis_cc_tuser;
|
||||
wire axis_cc_tvalid;
|
||||
|
||||
// ila_0 rq_ila (
|
||||
// .clk(pcie_user_clk),
|
||||
@ -357,7 +361,12 @@ pcie3_ultrascale_inst (
|
||||
);
|
||||
|
||||
fpga_core #(
|
||||
.AXIS_PCIE_DATA_WIDTH(AXIS_PCIE_DATA_WIDTH)
|
||||
.AXIS_PCIE_DATA_WIDTH(AXIS_PCIE_DATA_WIDTH),
|
||||
.AXIS_PCIE_KEEP_WIDTH(AXIS_PCIE_KEEP_WIDTH),
|
||||
.AXIS_PCIE_RC_USER_WIDTH(AXIS_PCIE_RC_USER_WIDTH),
|
||||
.AXIS_PCIE_RQ_USER_WIDTH(AXIS_PCIE_RQ_USER_WIDTH),
|
||||
.AXIS_PCIE_CQ_USER_WIDTH(AXIS_PCIE_CQ_USER_WIDTH),
|
||||
.AXIS_PCIE_CC_USER_WIDTH(AXIS_PCIE_CC_USER_WIDTH)
|
||||
)
|
||||
core_inst (
|
||||
/*
|
||||
|
@ -34,89 +34,93 @@ THE SOFTWARE.
|
||||
module fpga_core #
|
||||
(
|
||||
parameter AXIS_PCIE_DATA_WIDTH = 256,
|
||||
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32)
|
||||
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32),
|
||||
parameter AXIS_PCIE_RC_USER_WIDTH = 75,
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = 60,
|
||||
parameter AXIS_PCIE_CQ_USER_WIDTH = 85,
|
||||
parameter AXIS_PCIE_CC_USER_WIDTH = 33
|
||||
)
|
||||
(
|
||||
/*
|
||||
* Clock: 250 MHz
|
||||
* Synchronous reset
|
||||
*/
|
||||
input wire clk,
|
||||
input wire rst,
|
||||
input wire clk,
|
||||
input wire rst,
|
||||
|
||||
/*
|
||||
* GPIO
|
||||
*/
|
||||
input wire btnu,
|
||||
input wire btnl,
|
||||
input wire btnd,
|
||||
input wire btnr,
|
||||
input wire btnc,
|
||||
input wire [3:0] sw,
|
||||
output wire [7:0] led,
|
||||
input wire btnu,
|
||||
input wire btnl,
|
||||
input wire btnd,
|
||||
input wire btnr,
|
||||
input wire btnc,
|
||||
input wire [3:0] sw,
|
||||
output wire [7:0] led,
|
||||
|
||||
/*
|
||||
* PCIe
|
||||
*/
|
||||
output wire [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_rq_tdata,
|
||||
output wire [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_rq_tkeep,
|
||||
output wire m_axis_rq_tlast,
|
||||
input wire m_axis_rq_tready,
|
||||
output wire [59:0] m_axis_rq_tuser,
|
||||
output wire m_axis_rq_tvalid,
|
||||
output wire [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_rq_tdata,
|
||||
output wire [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_rq_tkeep,
|
||||
output wire m_axis_rq_tlast,
|
||||
input wire m_axis_rq_tready,
|
||||
output wire [AXIS_PCIE_RQ_USER_WIDTH-1:0] m_axis_rq_tuser,
|
||||
output wire m_axis_rq_tvalid,
|
||||
|
||||
input wire [AXIS_PCIE_DATA_WIDTH-1:0] s_axis_rc_tdata,
|
||||
input wire [AXIS_PCIE_KEEP_WIDTH-1:0] s_axis_rc_tkeep,
|
||||
input wire s_axis_rc_tlast,
|
||||
output wire s_axis_rc_tready,
|
||||
input wire [74:0] s_axis_rc_tuser,
|
||||
input wire s_axis_rc_tvalid,
|
||||
input wire [AXIS_PCIE_DATA_WIDTH-1:0] s_axis_rc_tdata,
|
||||
input wire [AXIS_PCIE_KEEP_WIDTH-1:0] s_axis_rc_tkeep,
|
||||
input wire s_axis_rc_tlast,
|
||||
output wire s_axis_rc_tready,
|
||||
input wire [AXIS_PCIE_RC_USER_WIDTH-1:0] s_axis_rc_tuser,
|
||||
input wire s_axis_rc_tvalid,
|
||||
|
||||
input wire [AXIS_PCIE_DATA_WIDTH-1:0] s_axis_cq_tdata,
|
||||
input wire [AXIS_PCIE_KEEP_WIDTH-1:0] s_axis_cq_tkeep,
|
||||
input wire s_axis_cq_tlast,
|
||||
output wire s_axis_cq_tready,
|
||||
input wire [84:0] s_axis_cq_tuser,
|
||||
input wire s_axis_cq_tvalid,
|
||||
input wire [AXIS_PCIE_DATA_WIDTH-1:0] s_axis_cq_tdata,
|
||||
input wire [AXIS_PCIE_KEEP_WIDTH-1:0] s_axis_cq_tkeep,
|
||||
input wire s_axis_cq_tlast,
|
||||
output wire s_axis_cq_tready,
|
||||
input wire [AXIS_PCIE_CQ_USER_WIDTH-1:0] s_axis_cq_tuser,
|
||||
input wire s_axis_cq_tvalid,
|
||||
|
||||
output wire [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_cc_tdata,
|
||||
output wire [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_cc_tkeep,
|
||||
output wire m_axis_cc_tlast,
|
||||
input wire m_axis_cc_tready,
|
||||
output wire [32:0] m_axis_cc_tuser,
|
||||
output wire m_axis_cc_tvalid,
|
||||
output wire [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_cc_tdata,
|
||||
output wire [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_cc_tkeep,
|
||||
output wire m_axis_cc_tlast,
|
||||
input wire m_axis_cc_tready,
|
||||
output wire [AXIS_PCIE_CC_USER_WIDTH-1:0] m_axis_cc_tuser,
|
||||
output wire m_axis_cc_tvalid,
|
||||
|
||||
input wire [2:0] cfg_max_payload,
|
||||
input wire [2:0] cfg_max_read_req,
|
||||
input wire [2:0] cfg_max_payload,
|
||||
input wire [2:0] cfg_max_read_req,
|
||||
|
||||
output wire [18:0] cfg_mgmt_addr,
|
||||
output wire cfg_mgmt_write,
|
||||
output wire [31:0] cfg_mgmt_write_data,
|
||||
output wire [3:0] cfg_mgmt_byte_enable,
|
||||
output wire cfg_mgmt_read,
|
||||
input wire [31:0] cfg_mgmt_read_data,
|
||||
input wire cfg_mgmt_read_write_done,
|
||||
output wire [18:0] cfg_mgmt_addr,
|
||||
output wire cfg_mgmt_write,
|
||||
output wire [31:0] cfg_mgmt_write_data,
|
||||
output wire [3:0] cfg_mgmt_byte_enable,
|
||||
output wire cfg_mgmt_read,
|
||||
input wire [31:0] cfg_mgmt_read_data,
|
||||
input wire cfg_mgmt_read_write_done,
|
||||
|
||||
input wire [3:0] cfg_interrupt_msi_enable,
|
||||
input wire [7:0] cfg_interrupt_msi_vf_enable,
|
||||
input wire [11:0] cfg_interrupt_msi_mmenable,
|
||||
input wire cfg_interrupt_msi_mask_update,
|
||||
input wire [31:0] cfg_interrupt_msi_data,
|
||||
output wire [3:0] cfg_interrupt_msi_select,
|
||||
output wire [31:0] cfg_interrupt_msi_int,
|
||||
output wire [31:0] cfg_interrupt_msi_pending_status,
|
||||
output wire cfg_interrupt_msi_pending_status_data_enable,
|
||||
output wire [3:0] cfg_interrupt_msi_pending_status_function_num,
|
||||
input wire cfg_interrupt_msi_sent,
|
||||
input wire cfg_interrupt_msi_fail,
|
||||
output wire [2:0] cfg_interrupt_msi_attr,
|
||||
output wire cfg_interrupt_msi_tph_present,
|
||||
output wire [1:0] cfg_interrupt_msi_tph_type,
|
||||
output wire [8:0] cfg_interrupt_msi_tph_st_tag,
|
||||
output wire [3:0] cfg_interrupt_msi_function_number,
|
||||
input wire [3:0] cfg_interrupt_msi_enable,
|
||||
input wire [7:0] cfg_interrupt_msi_vf_enable,
|
||||
input wire [11:0] cfg_interrupt_msi_mmenable,
|
||||
input wire cfg_interrupt_msi_mask_update,
|
||||
input wire [31:0] cfg_interrupt_msi_data,
|
||||
output wire [3:0] cfg_interrupt_msi_select,
|
||||
output wire [31:0] cfg_interrupt_msi_int,
|
||||
output wire [31:0] cfg_interrupt_msi_pending_status,
|
||||
output wire cfg_interrupt_msi_pending_status_data_enable,
|
||||
output wire [3:0] cfg_interrupt_msi_pending_status_function_num,
|
||||
input wire cfg_interrupt_msi_sent,
|
||||
input wire cfg_interrupt_msi_fail,
|
||||
output wire [2:0] cfg_interrupt_msi_attr,
|
||||
output wire cfg_interrupt_msi_tph_present,
|
||||
output wire [1:0] cfg_interrupt_msi_tph_type,
|
||||
output wire [8:0] cfg_interrupt_msi_tph_st_tag,
|
||||
output wire [3:0] cfg_interrupt_msi_function_number,
|
||||
|
||||
output wire status_error_cor,
|
||||
output wire status_error_uncor
|
||||
output wire status_error_cor,
|
||||
output wire status_error_uncor
|
||||
);
|
||||
|
||||
parameter PCIE_ADDR_WIDTH = 64;
|
||||
|
@ -377,7 +377,6 @@ def test_fpga_core(request):
|
||||
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75
|
||||
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 85
|
||||
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33
|
||||
parameters['RQ_SEQ_NUM_WIDTH'] = 4
|
||||
|
||||
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
|
||||
|
||||
|
@ -58,7 +58,7 @@ module fpga (
|
||||
parameter AXIS_PCIE_DATA_WIDTH = 512;
|
||||
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
|
||||
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
|
||||
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
|
||||
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
|
||||
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;
|
||||
|
@ -161,8 +161,8 @@ example_core_pcie_us #(
|
||||
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
|
||||
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
|
||||
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.READ_CPLH_FC_LIMIT(128),
|
||||
.READ_CPLD_FC_LIMIT(2048),
|
||||
.READ_CPLH_FC_LIMIT(256),
|
||||
.READ_CPLD_FC_LIMIT(2048-256),
|
||||
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.BAR0_APERTURE(BAR0_APERTURE),
|
||||
@ -265,8 +265,7 @@ example_core_pcie_us_inst (
|
||||
*/
|
||||
.cfg_max_read_req(cfg_max_read_req),
|
||||
.cfg_max_payload(cfg_max_payload),
|
||||
// .cfg_rcb_status(cfg_rcb_status),
|
||||
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
|
||||
.cfg_rcb_status(cfg_rcb_status),
|
||||
|
||||
/*
|
||||
* Status
|
||||
|
@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
|
||||
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
|
||||
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
|
||||
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
|
||||
export PARAM_RQ_SEQ_NUM_WIDTH := 6
|
||||
|
||||
ifeq ($(SIM), icarus)
|
||||
PLUSARGS += -fst
|
||||
|
@ -403,7 +403,6 @@ def test_fpga_core(request):
|
||||
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
|
||||
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
|
||||
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
|
||||
parameters['RQ_SEQ_NUM_WIDTH'] = 6
|
||||
|
||||
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
|
||||
|
||||
|
@ -53,7 +53,7 @@ module fpga (
|
||||
parameter AXIS_PCIE_DATA_WIDTH = 512;
|
||||
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
|
||||
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
|
||||
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
|
||||
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
|
||||
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;
|
||||
|
@ -156,8 +156,8 @@ example_core_pcie_us #(
|
||||
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
|
||||
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
|
||||
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.READ_CPLH_FC_LIMIT(128),
|
||||
.READ_CPLD_FC_LIMIT(2048),
|
||||
.READ_CPLH_FC_LIMIT(256),
|
||||
.READ_CPLD_FC_LIMIT(2048-256),
|
||||
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.BAR0_APERTURE(BAR0_APERTURE),
|
||||
@ -260,8 +260,7 @@ example_core_pcie_us_inst (
|
||||
*/
|
||||
.cfg_max_read_req(cfg_max_read_req),
|
||||
.cfg_max_payload(cfg_max_payload),
|
||||
// .cfg_rcb_status(cfg_rcb_status),
|
||||
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
|
||||
.cfg_rcb_status(cfg_rcb_status),
|
||||
|
||||
/*
|
||||
* Status
|
||||
|
@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
|
||||
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
|
||||
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
|
||||
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
|
||||
export PARAM_RQ_SEQ_NUM_WIDTH := 6
|
||||
|
||||
ifeq ($(SIM), icarus)
|
||||
PLUSARGS += -fst
|
||||
|
@ -398,7 +398,6 @@ def test_fpga_core(request):
|
||||
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
|
||||
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
|
||||
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
|
||||
parameters['RQ_SEQ_NUM_WIDTH'] = 6
|
||||
|
||||
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
|
||||
|
||||
|
@ -58,7 +58,7 @@ module fpga (
|
||||
parameter AXIS_PCIE_DATA_WIDTH = 128;
|
||||
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
|
||||
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
|
||||
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
|
||||
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
|
||||
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;
|
||||
|
@ -161,8 +161,8 @@ example_core_pcie_us #(
|
||||
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
|
||||
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
|
||||
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.READ_CPLH_FC_LIMIT(128),
|
||||
.READ_CPLD_FC_LIMIT(2048),
|
||||
.READ_CPLH_FC_LIMIT(256),
|
||||
.READ_CPLD_FC_LIMIT(2048-256),
|
||||
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.BAR0_APERTURE(BAR0_APERTURE),
|
||||
@ -265,8 +265,7 @@ example_core_pcie_us_inst (
|
||||
*/
|
||||
.cfg_max_read_req(cfg_max_read_req),
|
||||
.cfg_max_payload(cfg_max_payload),
|
||||
// .cfg_rcb_status(cfg_rcb_status),
|
||||
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
|
||||
.cfg_rcb_status(cfg_rcb_status),
|
||||
|
||||
/*
|
||||
* Status
|
||||
|
@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
|
||||
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
|
||||
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
|
||||
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
|
||||
export PARAM_RQ_SEQ_NUM_WIDTH := 6
|
||||
|
||||
ifeq ($(SIM), icarus)
|
||||
PLUSARGS += -fst
|
||||
|
@ -403,7 +403,6 @@ def test_fpga_core(request):
|
||||
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
|
||||
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
|
||||
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
|
||||
parameters['RQ_SEQ_NUM_WIDTH'] = 6
|
||||
|
||||
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
|
||||
|
||||
|
@ -103,6 +103,8 @@ static void dma_block_read(struct example_dev *edev,
|
||||
|
||||
if ((ioread32(edev->bar[0] + 0x001000) & 1) != 0)
|
||||
dev_warn(edev->dev, "%s: operation timed out", __func__);
|
||||
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
|
||||
dev_warn(edev->dev, "%s: DMA engine busy", __func__);
|
||||
}
|
||||
|
||||
static void dma_block_write(struct example_dev *edev,
|
||||
@ -157,15 +159,22 @@ static void dma_block_write(struct example_dev *edev,
|
||||
|
||||
if ((ioread32(edev->bar[0] + 0x001100) & 1) != 0)
|
||||
dev_warn(edev->dev, "%s: operation timed out", __func__);
|
||||
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
|
||||
dev_warn(edev->dev, "%s: DMA engine busy", __func__);
|
||||
}
|
||||
|
||||
static void dma_block_read_bench(struct example_dev *edev,
|
||||
dma_addr_t dma_addr, u64 size, u64 stride, u64 count)
|
||||
{
|
||||
u64 cycles;
|
||||
u32 rd_req;
|
||||
u32 rd_cpl;
|
||||
|
||||
udelay(5);
|
||||
|
||||
rd_req = ioread32(edev->bar[0] + 0x000020);
|
||||
rd_cpl = ioread32(edev->bar[0] + 0x000024);
|
||||
|
||||
dma_block_read(edev, dma_addr, 0, 0x3fff, stride,
|
||||
0, 0, 0x3fff, stride, size, count);
|
||||
|
||||
@ -173,17 +182,23 @@ static void dma_block_read_bench(struct example_dev *edev,
|
||||
|
||||
udelay(5);
|
||||
|
||||
dev_info(edev->dev, "read %lld blocks of %lld bytes (stride %lld) in %lld ns: %lld Mbps",
|
||||
count, size, stride, cycles * 4, size * count * 8 * 1000 / (cycles * 4));
|
||||
rd_req = ioread32(edev->bar[0] + 0x000020) - rd_req;
|
||||
rd_cpl = ioread32(edev->bar[0] + 0x000024) - rd_cpl;
|
||||
|
||||
dev_info(edev->dev, "read %lld blocks of %lld bytes (total %lld B, stride %lld) in %lld ns (%d req %d cpl): %lld Mbps",
|
||||
count, size, count*size, stride, cycles * 4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4));
|
||||
}
|
||||
|
||||
static void dma_block_write_bench(struct example_dev *edev,
|
||||
dma_addr_t dma_addr, u64 size, u64 stride, u64 count)
|
||||
{
|
||||
u64 cycles;
|
||||
u32 wr_req;
|
||||
|
||||
udelay(5);
|
||||
|
||||
wr_req = ioread32(edev->bar[0] + 0x000028);
|
||||
|
||||
dma_block_write(edev, dma_addr, 0, 0x3fff, stride,
|
||||
0, 0, 0x3fff, stride, size, count);
|
||||
|
||||
@ -191,8 +206,83 @@ static void dma_block_write_bench(struct example_dev *edev,
|
||||
|
||||
udelay(5);
|
||||
|
||||
dev_info(edev->dev, "wrote %lld blocks of %lld bytes (stride %lld) in %lld ns: %lld Mbps",
|
||||
count, size, stride, cycles * 4, size * count * 8 * 1000 / (cycles * 4));
|
||||
wr_req = ioread32(edev->bar[0] + 0x000028) - wr_req;
|
||||
|
||||
dev_info(edev->dev, "wrote %lld blocks of %lld bytes (total %lld B, stride %lld) in %lld ns (%d req): %lld Mbps",
|
||||
count, size, count*size, stride, cycles * 4, wr_req, size * count * 8 * 1000 / (cycles * 4));
|
||||
}
|
||||
|
||||
static void dma_cpl_buf_test(struct example_dev *edev, dma_addr_t dma_addr,
|
||||
u64 size, u64 stride, u64 count, int stall)
|
||||
{
|
||||
unsigned long t;
|
||||
u64 cycles;
|
||||
u32 rd_req;
|
||||
u32 rd_cpl;
|
||||
|
||||
rd_req = ioread32(edev->bar[0] + 0x000020);
|
||||
rd_cpl = ioread32(edev->bar[0] + 0x000024);
|
||||
|
||||
// DMA base address
|
||||
iowrite32(dma_addr & 0xffffffff, edev->bar[0] + 0x001080);
|
||||
iowrite32((dma_addr >> 32) & 0xffffffff, edev->bar[0] + 0x001084);
|
||||
// DMA offset address
|
||||
iowrite32(0, edev->bar[0] + 0x001088);
|
||||
iowrite32(0, edev->bar[0] + 0x00108c);
|
||||
// DMA offset mask
|
||||
iowrite32(0x3fff, edev->bar[0] + 0x001090);
|
||||
iowrite32(0, edev->bar[0] + 0x001094);
|
||||
// DMA stride
|
||||
iowrite32(stride & 0xffffffff, edev->bar[0] + 0x001098);
|
||||
iowrite32((stride >> 32) & 0xffffffff, edev->bar[0] + 0x00109c);
|
||||
// RAM base address
|
||||
iowrite32(0, edev->bar[0] + 0x0010c0);
|
||||
iowrite32(0, edev->bar[0] + 0x0010c4);
|
||||
// RAM offset address
|
||||
iowrite32(0, edev->bar[0] + 0x0010c8);
|
||||
iowrite32(0, edev->bar[0] + 0x0010cc);
|
||||
// RAM offset mask
|
||||
iowrite32(0x3fff, edev->bar[0] + 0x0010d0);
|
||||
iowrite32(0, edev->bar[0] + 0x0010d4);
|
||||
// RAM stride
|
||||
iowrite32(stride & 0xffffffff, edev->bar[0] + 0x0010d8);
|
||||
iowrite32((stride >> 32) & 0xffffffff, edev->bar[0] + 0x0010dc);
|
||||
// clear cycle count
|
||||
iowrite32(0, edev->bar[0] + 0x001008);
|
||||
iowrite32(0, edev->bar[0] + 0x00100c);
|
||||
// block length
|
||||
iowrite32(size, edev->bar[0] + 0x001010);
|
||||
// block count
|
||||
iowrite32(count, edev->bar[0] + 0x001018);
|
||||
|
||||
if (stall)
|
||||
iowrite32(stall, edev->bar[0] + 0x000040);
|
||||
|
||||
// start
|
||||
iowrite32(1, edev->bar[0] + 0x001000);
|
||||
|
||||
if (stall)
|
||||
msleep(10);
|
||||
|
||||
// wait for transfer to complete
|
||||
t = jiffies + msecs_to_jiffies(20000);
|
||||
while (time_before(jiffies, t)) {
|
||||
if ((ioread32(edev->bar[0] + 0x001000) & 1) == 0)
|
||||
break;
|
||||
}
|
||||
|
||||
if ((ioread32(edev->bar[0] + 0x001000) & 1) != 0)
|
||||
dev_warn(edev->dev, "%s: operation timed out", __func__);
|
||||
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
|
||||
dev_warn(edev->dev, "%s: DMA engine busy", __func__);
|
||||
|
||||
cycles = ioread32(edev->bar[0] + 0x001008);
|
||||
|
||||
rd_req = ioread32(edev->bar[0] + 0x000020) - rd_req;
|
||||
rd_cpl = ioread32(edev->bar[0] + 0x000024) - rd_cpl;
|
||||
|
||||
dev_info(edev->dev, "read %lld x %lld B (total %lld B %lld CPLD, stride %lld) in %lld ns (%d req %d cpl): %lld Mbps",
|
||||
count, size, count*size, count*((size+15) / 16), stride, cycles * 4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4));
|
||||
}
|
||||
|
||||
static irqreturn_t edev_intr(int irq, void *data)
|
||||
@ -227,16 +317,20 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
if (pdev->pcie_cap) {
|
||||
u16 devctl;
|
||||
u32 lnkcap;
|
||||
u16 lnkctl;
|
||||
u16 lnksta;
|
||||
|
||||
pci_read_config_word(pdev, pdev->pcie_cap + PCI_EXP_DEVCTL, &devctl);
|
||||
pci_read_config_dword(pdev, pdev->pcie_cap + PCI_EXP_LNKCAP, &lnkcap);
|
||||
pci_read_config_word(pdev, pdev->pcie_cap + PCI_EXP_LNKCTL, &lnkctl);
|
||||
pci_read_config_word(pdev, pdev->pcie_cap + PCI_EXP_LNKSTA, &lnksta);
|
||||
|
||||
dev_info(dev, " Max payload size: %d bytes",
|
||||
128 << ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5));
|
||||
dev_info(dev, " Max read request size: %d bytes",
|
||||
128 << ((devctl & PCI_EXP_DEVCTL_READRQ) >> 12));
|
||||
dev_info(dev, " Read completion boundary: %d bytes",
|
||||
lnkctl & PCI_EXP_LNKCTL_RCB ? 128 : 64);
|
||||
dev_info(dev, " Link capability: gen %d x%d",
|
||||
lnkcap & PCI_EXP_LNKCAP_SLS, (lnkcap & PCI_EXP_LNKCAP_MLW) >> 4);
|
||||
dev_info(dev, " Link status: gen %d x%d",
|
||||
@ -361,6 +455,7 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
msleep(1);
|
||||
|
||||
dev_info(dev, "Read status");
|
||||
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000000));
|
||||
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000118));
|
||||
|
||||
dev_info(dev, "start copy to host");
|
||||
@ -374,6 +469,7 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
msleep(1);
|
||||
|
||||
dev_info(dev, "Read status");
|
||||
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000000));
|
||||
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000218));
|
||||
|
||||
dev_info(dev, "read test data");
|
||||
@ -398,6 +494,7 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
msleep(1);
|
||||
|
||||
dev_info(dev, "Read status");
|
||||
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000000));
|
||||
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000218));
|
||||
|
||||
dev_info(dev, "read data");
|
||||
@ -407,31 +504,90 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
|
||||
if (!mismatch) {
|
||||
u64 size;
|
||||
u64 stride;
|
||||
u64 count;
|
||||
|
||||
dev_info(dev, "disable interrupts");
|
||||
iowrite32(0x0, edev->bar[0] + 0x000008);
|
||||
|
||||
dev_info(dev, "test RX completion buffer (CPLH, 8)");
|
||||
|
||||
size = 8;
|
||||
stride = size;
|
||||
for (count = 32; count <= 256; count += 8) {
|
||||
dma_cpl_buf_test(edev,
|
||||
edev->dma_region_addr + 0x0000,
|
||||
size, stride, count, 100000);
|
||||
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
dev_info(dev, "test RX completion buffer (CPLH, unaligned 8+64)");
|
||||
|
||||
size = 8+64;
|
||||
stride = 0;
|
||||
for (count = 8; count <= 256; count += 8) {
|
||||
dma_cpl_buf_test(edev,
|
||||
edev->dma_region_addr + 128 - 8,
|
||||
size, stride, count, 400000);
|
||||
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
dev_info(dev, "test RX completion buffer (CPLH, unaligned 8+128+8)");
|
||||
|
||||
size = 8+128+8;
|
||||
stride = 0;
|
||||
for (count = 8; count <= 256; count += 8) {
|
||||
dma_cpl_buf_test(edev,
|
||||
edev->dma_region_addr + 128 - 8,
|
||||
size, stride, count, 100000);
|
||||
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
dev_info(dev, "test RX completion buffer (CPLD)");
|
||||
|
||||
size = 512;
|
||||
stride = size;
|
||||
for (count = 8; count <= 256; count += 8) {
|
||||
dma_cpl_buf_test(edev,
|
||||
edev->dma_region_addr + 0x0000,
|
||||
size, stride, count, 100000);
|
||||
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
|
||||
goto out;
|
||||
}
|
||||
|
||||
dev_info(dev, "perform block reads (dma_alloc_coherent)");
|
||||
|
||||
count = 10000;
|
||||
for (size = 1; size <= 8192; size *= 2) {
|
||||
for (stride = size; stride <= max(size, 256llu); stride *= 2) {
|
||||
dma_block_read_bench(edev,
|
||||
edev->dma_region_addr + 0x0000,
|
||||
size, stride, 10000);
|
||||
size, stride, count);
|
||||
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
dev_info(dev, "perform block writes (dma_alloc_coherent)");
|
||||
|
||||
count = 10000;
|
||||
for (size = 1; size <= 8192; size *= 2) {
|
||||
for (stride = size; stride <= max(size, 256llu); stride *= 2) {
|
||||
dma_block_write_bench(edev,
|
||||
edev->dma_region_addr + 0x0000,
|
||||
size, stride, 10000);
|
||||
size, stride, count);
|
||||
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
out:
|
||||
dev_info(dev, "Read status");
|
||||
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000000));
|
||||
|
||||
// probe complete
|
||||
return 0;
|
||||
|
||||
|
@ -152,7 +152,18 @@ module example_core #
|
||||
*/
|
||||
output wire [IRQ_INDEX_WIDTH-1:0] irq_index,
|
||||
output wire irq_valid,
|
||||
input wire irq_ready
|
||||
input wire irq_ready,
|
||||
|
||||
/*
|
||||
* Control and status
|
||||
*/
|
||||
output wire dma_enable,
|
||||
input wire dma_rd_busy,
|
||||
input wire dma_wr_busy,
|
||||
input wire dma_rd_req,
|
||||
input wire dma_rd_cpl,
|
||||
input wire dma_wr_req,
|
||||
output wire rx_cpl_stall
|
||||
);
|
||||
|
||||
localparam RAM_ADDR_IMM_WIDTH = (DMA_IMM_ENABLE && (DMA_IMM_WIDTH > RAM_ADDR_WIDTH)) ? DMA_IMM_WIDTH : RAM_ADDR_WIDTH;
|
||||
@ -203,6 +214,9 @@ reg axil_ctrl_rvalid_reg = 1'b0, axil_ctrl_rvalid_next;
|
||||
reg [63:0] cycle_count_reg = 0;
|
||||
reg [15:0] dma_read_active_count_reg = 0;
|
||||
reg [15:0] dma_write_active_count_reg = 0;
|
||||
reg [31:0] dma_rd_req_count_reg = 0;
|
||||
reg [31:0] dma_rd_cpl_count_reg = 0;
|
||||
reg [31:0] dma_wr_req_count_reg = 0;
|
||||
|
||||
reg [DMA_ADDR_WIDTH-1:0] dma_read_desc_dma_addr_reg = 0, dma_read_desc_dma_addr_next;
|
||||
reg [RAM_ADDR_WIDTH-1:0] dma_read_desc_ram_addr_reg = 0, dma_read_desc_ram_addr_next;
|
||||
@ -230,6 +244,9 @@ reg dma_rd_int_en_reg = 0, dma_rd_int_en_next;
|
||||
reg dma_wr_int_en_reg = 0, dma_wr_int_en_next;
|
||||
reg irq_valid_reg = 1'b0, irq_valid_next;
|
||||
|
||||
reg rx_cpl_stall_reg = 1'b0, rx_cpl_stall_next;
|
||||
reg [23:0] rx_cpl_stall_count_reg = 0, rx_cpl_stall_count_next;
|
||||
|
||||
reg dma_read_block_run_reg = 1'b0, dma_read_block_run_next;
|
||||
reg [DMA_LEN_WIDTH-1:0] dma_read_block_len_reg = 0, dma_read_block_len_next;
|
||||
reg [31:0] dma_read_block_count_reg = 0, dma_read_block_count_next;
|
||||
@ -284,6 +301,9 @@ assign m_axis_dma_write_desc_valid = dma_write_desc_valid_reg;
|
||||
assign irq_index = 0;
|
||||
assign irq_valid = irq_valid_reg;
|
||||
|
||||
assign dma_enable = dma_enable_reg;
|
||||
assign rx_cpl_stall = rx_cpl_stall_reg;
|
||||
|
||||
always @* begin
|
||||
axil_ctrl_awready_next = 1'b0;
|
||||
axil_ctrl_wready_next = 1'b0;
|
||||
@ -322,6 +342,9 @@ always @* begin
|
||||
|
||||
irq_valid_next = irq_valid_reg && !irq_ready;
|
||||
|
||||
rx_cpl_stall_next = 1'b0;
|
||||
rx_cpl_stall_count_next = rx_cpl_stall_count_reg;
|
||||
|
||||
dma_read_block_run_next = dma_read_block_run_reg;
|
||||
dma_read_block_len_next = dma_read_block_len_reg;
|
||||
dma_read_block_count_next = dma_read_block_count_reg;
|
||||
@ -348,6 +371,11 @@ always @* begin
|
||||
dma_write_block_ram_offset_mask_next = dma_write_block_ram_offset_mask_reg;
|
||||
dma_write_block_ram_stride_next = dma_write_block_ram_stride_reg;
|
||||
|
||||
if (rx_cpl_stall_count_reg) begin
|
||||
rx_cpl_stall_count_next = rx_cpl_stall_count_reg - 1;
|
||||
rx_cpl_stall_next = 1'b1;
|
||||
end
|
||||
|
||||
if (s_axil_ctrl_awvalid && s_axil_ctrl_wvalid && !axil_ctrl_bvalid_reg) begin
|
||||
// write operation
|
||||
axil_ctrl_awready_next = 1'b1;
|
||||
@ -364,6 +392,7 @@ always @* begin
|
||||
dma_rd_int_en_next = s_axil_ctrl_wdata[0];
|
||||
dma_wr_int_en_next = s_axil_ctrl_wdata[1];
|
||||
end
|
||||
16'h0040: rx_cpl_stall_count_next = s_axil_ctrl_wdata;
|
||||
// single read
|
||||
16'h0100: dma_read_desc_dma_addr_next[31:0] = s_axil_ctrl_wdata;
|
||||
16'h0104: dma_read_desc_dma_addr_next[63:32] = s_axil_ctrl_wdata;
|
||||
@ -437,6 +466,8 @@ always @* begin
|
||||
// control
|
||||
16'h0000: begin
|
||||
axil_ctrl_rdata_next[0] = dma_enable_reg;
|
||||
axil_ctrl_rdata_next[8] = dma_wr_busy;
|
||||
axil_ctrl_rdata_next[9] = dma_rd_busy;
|
||||
end
|
||||
16'h0008: begin
|
||||
axil_ctrl_rdata_next[0] = dma_rd_int_en_reg;
|
||||
@ -444,8 +475,12 @@ always @* begin
|
||||
end
|
||||
16'h0010: axil_ctrl_rdata_next = cycle_count_reg;
|
||||
16'h0014: axil_ctrl_rdata_next = cycle_count_reg >> 32;
|
||||
16'h0020: axil_ctrl_rdata_next = dma_read_active_count_reg;
|
||||
16'h0028: axil_ctrl_rdata_next = dma_write_active_count_reg;
|
||||
16'h0018: axil_ctrl_rdata_next = dma_read_active_count_reg;
|
||||
16'h001c: axil_ctrl_rdata_next = dma_write_active_count_reg;
|
||||
16'h0020: axil_ctrl_rdata_next = dma_rd_req_count_reg;
|
||||
16'h0024: axil_ctrl_rdata_next = dma_rd_cpl_count_reg;
|
||||
16'h0028: axil_ctrl_rdata_next = dma_wr_req_count_reg;
|
||||
16'h0040: axil_ctrl_rdata_next = rx_cpl_stall_count_reg;
|
||||
// single read
|
||||
16'h0100: axil_ctrl_rdata_next = dma_read_desc_dma_addr_reg;
|
||||
16'h0104: axil_ctrl_rdata_next = dma_read_desc_dma_addr_reg >> 32;
|
||||
@ -615,6 +650,10 @@ always @(posedge clk) begin
|
||||
+ (m_axis_dma_write_desc_valid && m_axis_dma_write_desc_ready)
|
||||
- s_axis_dma_write_desc_status_valid;
|
||||
|
||||
dma_rd_req_count_reg <= dma_rd_req_count_reg + dma_rd_req;
|
||||
dma_rd_cpl_count_reg <= dma_rd_cpl_count_reg + dma_rd_cpl;
|
||||
dma_wr_req_count_reg <= dma_wr_req_count_reg + dma_wr_req;
|
||||
|
||||
dma_read_desc_dma_addr_reg <= dma_read_desc_dma_addr_next;
|
||||
dma_read_desc_ram_addr_reg <= dma_read_desc_ram_addr_next;
|
||||
dma_read_desc_len_reg <= dma_read_desc_len_next;
|
||||
@ -643,6 +682,9 @@ always @(posedge clk) begin
|
||||
|
||||
irq_valid_reg <= irq_valid_next;
|
||||
|
||||
rx_cpl_stall_reg <= rx_cpl_stall_next;
|
||||
rx_cpl_stall_count_reg <= rx_cpl_stall_count_next;
|
||||
|
||||
dma_read_block_run_reg <= dma_read_block_run_next;
|
||||
dma_read_block_len_reg <= dma_read_block_len_next;
|
||||
dma_read_block_count_reg <= dma_read_block_count_next;
|
||||
@ -679,6 +721,9 @@ always @(posedge clk) begin
|
||||
cycle_count_reg <= 0;
|
||||
dma_read_active_count_reg <= 0;
|
||||
dma_write_active_count_reg <= 0;
|
||||
dma_rd_req_count_reg <= 0;
|
||||
dma_rd_cpl_count_reg <= 0;
|
||||
dma_wr_req_count_reg <= 0;
|
||||
|
||||
dma_read_desc_valid_reg <= 1'b0;
|
||||
dma_read_desc_status_valid_reg <= 1'b0;
|
||||
@ -688,6 +733,8 @@ always @(posedge clk) begin
|
||||
dma_rd_int_en_reg <= 1'b0;
|
||||
dma_wr_int_en_reg <= 1'b0;
|
||||
irq_valid_reg <= 1'b0;
|
||||
rx_cpl_stall_reg <= 1'b0;
|
||||
rx_cpl_stall_count_reg <= 0;
|
||||
dma_read_block_run_reg <= 1'b0;
|
||||
dma_write_block_run_reg <= 1'b0;
|
||||
end
|
||||
|
@ -172,7 +172,12 @@ module example_core_pcie #
|
||||
* Status
|
||||
*/
|
||||
output wire status_error_cor,
|
||||
output wire status_error_uncor
|
||||
output wire status_error_uncor,
|
||||
|
||||
/*
|
||||
* Control and status
|
||||
*/
|
||||
output wire rx_cpl_stall
|
||||
);
|
||||
|
||||
parameter AXIL_CTRL_DATA_WIDTH = 32;
|
||||
@ -345,6 +350,11 @@ wire [IRQ_INDEX_WIDTH-1:0] irq_index;
|
||||
wire irq_valid;
|
||||
wire irq_ready;
|
||||
|
||||
// Control and status
|
||||
wire dma_enable;
|
||||
wire dma_rd_busy;
|
||||
wire dma_wr_busy;
|
||||
|
||||
pcie_tlp_demux_bar #(
|
||||
.PORTS(3),
|
||||
.TLP_DATA_WIDTH(TLP_DATA_WIDTH),
|
||||
@ -900,8 +910,8 @@ dma_if_pcie_inst (
|
||||
/*
|
||||
* Configuration
|
||||
*/
|
||||
.read_enable(1'b1),
|
||||
.write_enable(1'b1),
|
||||
.read_enable(dma_enable),
|
||||
.write_enable(dma_enable),
|
||||
.ext_tag_enable(ext_tag_enable),
|
||||
.rcb_128b(rcb_128b),
|
||||
.requester_id({bus_num, 5'd0, 3'd0}),
|
||||
@ -911,8 +921,8 @@ dma_if_pcie_inst (
|
||||
/*
|
||||
* Status
|
||||
*/
|
||||
.status_rd_busy(),
|
||||
.status_wr_busy(),
|
||||
.status_rd_busy(dma_rd_busy),
|
||||
.status_wr_busy(dma_wr_busy),
|
||||
.status_error_cor(status_error_cor_int[3]),
|
||||
.status_error_uncor(status_error_uncor_int[3])
|
||||
);
|
||||
@ -1109,7 +1119,18 @@ core_inst (
|
||||
*/
|
||||
.irq_index(irq_index),
|
||||
.irq_valid(irq_valid),
|
||||
.irq_ready(irq_ready)
|
||||
.irq_ready(irq_ready),
|
||||
|
||||
/*
|
||||
* Control and status
|
||||
*/
|
||||
.dma_enable(dma_enable),
|
||||
.dma_rd_busy(dma_rd_busy),
|
||||
.dma_wr_busy(dma_wr_busy),
|
||||
.dma_rd_req(tx_rd_req_tlp_valid && tx_rd_req_tlp_sop && tx_rd_req_tlp_ready),
|
||||
.dma_rd_cpl(rx_cpl_tlp_valid && rx_cpl_tlp_sop && rx_cpl_tlp_ready),
|
||||
.dma_wr_req(tx_wr_req_tlp_valid && tx_wr_req_tlp_sop && tx_wr_req_tlp_ready),
|
||||
.rx_cpl_stall(rx_cpl_stall)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
@ -200,6 +200,12 @@ wire [2:0] max_payload_size;
|
||||
wire msix_enable;
|
||||
wire msix_mask;
|
||||
|
||||
wire rx_cpl_stall;
|
||||
|
||||
wire rx_st_ready_int;
|
||||
|
||||
assign rx_st_ready = rx_st_ready_int & !rx_cpl_stall;
|
||||
|
||||
pcie_ptile_if #(
|
||||
.SEG_COUNT(SEG_COUNT),
|
||||
.SEG_DATA_WIDTH(SEG_DATA_WIDTH),
|
||||
@ -226,7 +232,7 @@ pcie_ptile_if_inst (
|
||||
.rx_st_sop(rx_st_sop),
|
||||
.rx_st_eop(rx_st_eop),
|
||||
.rx_st_valid(rx_st_valid),
|
||||
.rx_st_ready(rx_st_ready),
|
||||
.rx_st_ready(rx_st_ready_int),
|
||||
.rx_st_hdr(rx_st_hdr),
|
||||
.rx_st_tlp_prfx(rx_st_tlp_prfx),
|
||||
.rx_st_vf_active(rx_st_vf_active),
|
||||
@ -488,7 +494,12 @@ core_pcie_inst (
|
||||
* Status
|
||||
*/
|
||||
.status_error_cor(),
|
||||
.status_error_uncor()
|
||||
.status_error_uncor(),
|
||||
|
||||
/*
|
||||
* Control and status
|
||||
*/
|
||||
.rx_cpl_stall(rx_cpl_stall)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
@ -58,7 +58,7 @@ module example_core_pcie_s10 #
|
||||
// Completion header flow control credit limit (read)
|
||||
parameter READ_CPLH_FC_LIMIT = 770,
|
||||
// Completion data flow control credit limit (read)
|
||||
parameter READ_CPLD_FC_LIMIT = 2500,
|
||||
parameter READ_CPLD_FC_LIMIT = 2400,
|
||||
// Operation table size (write)
|
||||
parameter WRITE_OP_TABLE_SIZE = 2**TX_SEQ_NUM_WIDTH,
|
||||
// In-flight transmit limit (write)
|
||||
@ -194,6 +194,12 @@ wire [2:0] max_payload_size;
|
||||
wire msix_enable;
|
||||
wire msix_mask;
|
||||
|
||||
wire rx_cpl_stall;
|
||||
|
||||
wire rx_st_ready_int;
|
||||
|
||||
assign rx_st_ready = rx_st_ready_int & !rx_cpl_stall;
|
||||
|
||||
pcie_s10_if #(
|
||||
.SEG_COUNT(SEG_COUNT),
|
||||
.SEG_DATA_WIDTH(SEG_DATA_WIDTH),
|
||||
@ -222,7 +228,7 @@ pcie_s10_if_inst (
|
||||
.rx_st_sop(rx_st_sop),
|
||||
.rx_st_eop(rx_st_eop),
|
||||
.rx_st_valid(rx_st_valid),
|
||||
.rx_st_ready(rx_st_ready),
|
||||
.rx_st_ready(rx_st_ready_int),
|
||||
.rx_st_vf_active(rx_st_vf_active),
|
||||
.rx_st_func_num(rx_st_func_num),
|
||||
.rx_st_vf_num(rx_st_vf_num),
|
||||
@ -495,7 +501,12 @@ core_pcie_inst (
|
||||
* Status
|
||||
*/
|
||||
.status_error_cor(),
|
||||
.status_error_uncor()
|
||||
.status_error_uncor(),
|
||||
|
||||
/*
|
||||
* Control and status
|
||||
*/
|
||||
.rx_cpl_stall(rx_cpl_stall)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
@ -68,9 +68,9 @@ module example_core_pcie_us #
|
||||
// In-flight transmit limit (read)
|
||||
parameter READ_TX_LIMIT = 2**(RQ_SEQ_NUM_WIDTH-1),
|
||||
// Completion header flow control credit limit (read)
|
||||
parameter READ_CPLH_FC_LIMIT = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 64 : 128,
|
||||
parameter READ_CPLH_FC_LIMIT = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 64 : 256,
|
||||
// Completion data flow control credit limit (read)
|
||||
parameter READ_CPLD_FC_LIMIT = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 992 : 2048,
|
||||
parameter READ_CPLD_FC_LIMIT = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 1024-64 : 2048-256,
|
||||
// Operation table size (write)
|
||||
parameter WRITE_OP_TABLE_SIZE = 2**(RQ_SEQ_NUM_WIDTH-1),
|
||||
// In-flight transmit limit (write)
|
||||
@ -259,6 +259,14 @@ wire ext_tag_enable;
|
||||
wire msix_enable;
|
||||
wire msix_mask;
|
||||
|
||||
wire rx_cpl_stall;
|
||||
|
||||
wire s_axis_rc_tvalid_int;
|
||||
wire s_axis_rc_tready_int;
|
||||
|
||||
assign s_axis_rc_tvalid_int = s_axis_rc_tvalid & ~rx_cpl_stall;
|
||||
assign s_axis_rc_tready = s_axis_rc_tready_int & ~rx_cpl_stall;
|
||||
|
||||
pcie_us_if #(
|
||||
.AXIS_PCIE_DATA_WIDTH(AXIS_PCIE_DATA_WIDTH),
|
||||
.AXIS_PCIE_KEEP_WIDTH(AXIS_PCIE_KEEP_WIDTH),
|
||||
@ -295,8 +303,8 @@ pcie_us_if_inst (
|
||||
*/
|
||||
.s_axis_rc_tdata(s_axis_rc_tdata),
|
||||
.s_axis_rc_tkeep(s_axis_rc_tkeep),
|
||||
.s_axis_rc_tvalid(s_axis_rc_tvalid),
|
||||
.s_axis_rc_tready(s_axis_rc_tready),
|
||||
.s_axis_rc_tvalid(s_axis_rc_tvalid_int),
|
||||
.s_axis_rc_tready(s_axis_rc_tready_int),
|
||||
.s_axis_rc_tlast(s_axis_rc_tlast),
|
||||
.s_axis_rc_tuser(s_axis_rc_tuser),
|
||||
|
||||
@ -624,7 +632,12 @@ core_pcie_inst (
|
||||
* Status
|
||||
*/
|
||||
.status_error_cor(status_error_cor),
|
||||
.status_error_uncor(status_error_uncor)
|
||||
.status_error_uncor(status_error_uncor),
|
||||
|
||||
/*
|
||||
* Control and status
|
||||
*/
|
||||
.rx_cpl_stall(rx_cpl_stall)
|
||||
);
|
||||
|
||||
endmodule
|
||||
|
@ -224,6 +224,8 @@ async def run_test(dut):
|
||||
await Timer(2000, 'ns')
|
||||
|
||||
# read status
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
tb.log.info("DMA Status: 0x%x", status)
|
||||
val = await dev_pf0_bar0.read_dword(0x000118)
|
||||
tb.log.info("Status: 0x%x", val)
|
||||
assert val == 0x800000AA
|
||||
@ -238,6 +240,8 @@ async def run_test(dut):
|
||||
await Timer(2000, 'ns')
|
||||
|
||||
# read status
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
tb.log.info("DMA Status: 0x%x", status)
|
||||
val = await dev_pf0_bar0.read_dword(0x000218)
|
||||
tb.log.info("Status: 0x%x", val)
|
||||
assert val == 0x80000055
|
||||
@ -258,6 +262,8 @@ async def run_test(dut):
|
||||
await Timer(2000, 'ns')
|
||||
|
||||
# read status
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
tb.log.info("DMA Status: 0x%x", status)
|
||||
val = await dev_pf0_bar0.read_dword(0x000218)
|
||||
tb.log.info("Status: 0x%x", val)
|
||||
assert val == 0x800000AA
|
||||
@ -321,11 +327,15 @@ async def run_test(dut):
|
||||
await dev_pf0_bar0.write_dword(0x001000, 1)
|
||||
|
||||
for k in range(10):
|
||||
cnt = await dev_pf0_bar0.read_dword(0x001018)
|
||||
await Timer(1000, 'ns')
|
||||
if cnt == 0:
|
||||
run = await dev_pf0_bar0.read_dword(0x001000)
|
||||
if run == 0:
|
||||
break
|
||||
|
||||
# read status
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
tb.log.info("DMA Status: 0x%x", status)
|
||||
|
||||
# configure operation (write)
|
||||
# DMA base address
|
||||
await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff)
|
||||
@ -363,11 +373,17 @@ async def run_test(dut):
|
||||
await dev_pf0_bar0.write_dword(0x001100, 1)
|
||||
|
||||
for k in range(10):
|
||||
cnt = await dev_pf0_bar0.read_dword(0x001118)
|
||||
await Timer(1000, 'ns')
|
||||
if cnt == 0:
|
||||
run = await dev_pf0_bar0.read_dword(0x001100)
|
||||
if run == 0:
|
||||
break
|
||||
|
||||
# read status
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
tb.log.info("DMA Status: 0x%x", status)
|
||||
|
||||
assert status & 0x300 == 0
|
||||
|
||||
tb.log.info("%s", mem.hexdump_str(dest_offset, region_len))
|
||||
|
||||
assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len]
|
||||
|
@ -258,6 +258,211 @@ class TB(object):
|
||||
await self.rc.enumerate()
|
||||
|
||||
|
||||
async def dma_block_read_bench(tb, dev, addr, mask, size, stride, count):
|
||||
dev_pf0_bar0 = dev.bar_window[0]
|
||||
|
||||
rd_req = await dev_pf0_bar0.read_dword(0x000020)
|
||||
rd_cpl = await dev_pf0_bar0.read_dword(0x000024)
|
||||
|
||||
# configure operation (read)
|
||||
# DMA base address
|
||||
await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff)
|
||||
await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff)
|
||||
# DMA offset address
|
||||
await dev_pf0_bar0.write_dword(0x001088, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00108c, 0)
|
||||
# DMA offset mask
|
||||
await dev_pf0_bar0.write_dword(0x001090, mask)
|
||||
await dev_pf0_bar0.write_dword(0x001094, 0)
|
||||
# DMA stride
|
||||
await dev_pf0_bar0.write_dword(0x001098, stride)
|
||||
await dev_pf0_bar0.write_dword(0x00109c, 0)
|
||||
# RAM base address
|
||||
await dev_pf0_bar0.write_dword(0x0010c0, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0010c4, 0)
|
||||
# RAM offset address
|
||||
await dev_pf0_bar0.write_dword(0x0010c8, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0010cc, 0)
|
||||
# RAM offset mask
|
||||
await dev_pf0_bar0.write_dword(0x0010d0, mask)
|
||||
await dev_pf0_bar0.write_dword(0x0010d4, 0)
|
||||
# RAM stride
|
||||
await dev_pf0_bar0.write_dword(0x0010d8, stride)
|
||||
await dev_pf0_bar0.write_dword(0x0010dc, 0)
|
||||
# clear cycle count
|
||||
await dev_pf0_bar0.write_dword(0x001008, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00100c, 0)
|
||||
# block length
|
||||
await dev_pf0_bar0.write_dword(0x001010, size)
|
||||
# block count
|
||||
await dev_pf0_bar0.write_dword(0x001018, count)
|
||||
await dev_pf0_bar0.write_dword(0x00101c, 0)
|
||||
|
||||
# start
|
||||
await dev_pf0_bar0.write_dword(0x001000, 1)
|
||||
|
||||
for k in range(1000):
|
||||
await Timer(1000, 'ns')
|
||||
run = await dev_pf0_bar0.read_dword(0x001000)
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
if run == 0 and status & 0x300 == 0:
|
||||
break
|
||||
|
||||
if run != 0:
|
||||
tb.log.warning("Operation timed out")
|
||||
if status & 0x300 != 0:
|
||||
tb.log.warning("DMA engine busy")
|
||||
|
||||
cycles = await dev_pf0_bar0.read_dword(0x001008)
|
||||
|
||||
rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req
|
||||
rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl
|
||||
|
||||
tb.log.info("read %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req %d cpl) %d Mbps",
|
||||
count, size, count*size, stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4))
|
||||
|
||||
assert status & 0x300 == 0
|
||||
|
||||
|
||||
async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count):
|
||||
dev_pf0_bar0 = dev.bar_window[0]
|
||||
|
||||
wr_req = await dev_pf0_bar0.read_dword(0x000028)
|
||||
|
||||
# configure operation (read)
|
||||
# DMA base address
|
||||
await dev_pf0_bar0.write_dword(0x001180, addr & 0xffffffff)
|
||||
await dev_pf0_bar0.write_dword(0x001184, (addr >> 32) & 0xffffffff)
|
||||
# DMA offset address
|
||||
await dev_pf0_bar0.write_dword(0x001188, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00118c, 0)
|
||||
# DMA offset mask
|
||||
await dev_pf0_bar0.write_dword(0x001190, mask)
|
||||
await dev_pf0_bar0.write_dword(0x001194, 0)
|
||||
# DMA stride
|
||||
await dev_pf0_bar0.write_dword(0x001198, stride)
|
||||
await dev_pf0_bar0.write_dword(0x00119c, 0)
|
||||
# RAM base address
|
||||
await dev_pf0_bar0.write_dword(0x0011c0, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0011c4, 0)
|
||||
# RAM offset address
|
||||
await dev_pf0_bar0.write_dword(0x0011c8, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0011cc, 0)
|
||||
# RAM offset mask
|
||||
await dev_pf0_bar0.write_dword(0x0011d0, mask)
|
||||
await dev_pf0_bar0.write_dword(0x0011d4, 0)
|
||||
# RAM stride
|
||||
await dev_pf0_bar0.write_dword(0x0011d8, stride)
|
||||
await dev_pf0_bar0.write_dword(0x0011dc, 0)
|
||||
# clear cycle count
|
||||
await dev_pf0_bar0.write_dword(0x001108, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00110c, 0)
|
||||
# block length
|
||||
await dev_pf0_bar0.write_dword(0x001110, size)
|
||||
# block count
|
||||
await dev_pf0_bar0.write_dword(0x001118, count)
|
||||
await dev_pf0_bar0.write_dword(0x00111c, 0)
|
||||
|
||||
# start
|
||||
await dev_pf0_bar0.write_dword(0x001100, 1)
|
||||
|
||||
for k in range(1000):
|
||||
await Timer(1000, 'ns')
|
||||
run = await dev_pf0_bar0.read_dword(0x001100)
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
if run == 0 and status & 0x300 == 0:
|
||||
break
|
||||
|
||||
if run != 0:
|
||||
tb.log.warning("Operation timed out")
|
||||
if status & 0x300 != 0:
|
||||
tb.log.warning("DMA engine busy")
|
||||
|
||||
cycles = await dev_pf0_bar0.read_dword(0x001108)
|
||||
|
||||
wr_req = await dev_pf0_bar0.read_dword(0x000028) - wr_req
|
||||
|
||||
tb.log.info("wrote %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req) %d Mbps",
|
||||
count, size, count*size, stride, cycles*4, wr_req, size * count * 8 * 1000 / (cycles * 4))
|
||||
|
||||
assert status & 0x300 == 0
|
||||
|
||||
|
||||
async def dma_cpl_buf_test(tb, dev, addr, mask, size, stride, count, stall):
|
||||
dev_pf0_bar0 = dev.bar_window[0]
|
||||
|
||||
rd_req = await dev_pf0_bar0.read_dword(0x000020)
|
||||
rd_cpl = await dev_pf0_bar0.read_dword(0x000024)
|
||||
|
||||
# configure operation (read)
|
||||
# DMA base address
|
||||
await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff)
|
||||
await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff)
|
||||
# DMA offset address
|
||||
await dev_pf0_bar0.write_dword(0x001088, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00108c, 0)
|
||||
# DMA offset mask
|
||||
await dev_pf0_bar0.write_dword(0x001090, mask)
|
||||
await dev_pf0_bar0.write_dword(0x001094, 0)
|
||||
# DMA stride
|
||||
await dev_pf0_bar0.write_dword(0x001098, stride)
|
||||
await dev_pf0_bar0.write_dword(0x00109c, 0)
|
||||
# RAM base address
|
||||
await dev_pf0_bar0.write_dword(0x0010c0, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0010c4, 0)
|
||||
# RAM offset address
|
||||
await dev_pf0_bar0.write_dword(0x0010c8, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0010cc, 0)
|
||||
# RAM offset mask
|
||||
await dev_pf0_bar0.write_dword(0x0010d0, mask)
|
||||
await dev_pf0_bar0.write_dword(0x0010d4, 0)
|
||||
# RAM stride
|
||||
await dev_pf0_bar0.write_dword(0x0010d8, stride)
|
||||
await dev_pf0_bar0.write_dword(0x0010dc, 0)
|
||||
# clear cycle count
|
||||
await dev_pf0_bar0.write_dword(0x001008, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00100c, 0)
|
||||
# block length
|
||||
await dev_pf0_bar0.write_dword(0x001010, size)
|
||||
# block count
|
||||
await dev_pf0_bar0.write_dword(0x001018, count)
|
||||
await dev_pf0_bar0.write_dword(0x00101c, 0)
|
||||
|
||||
if stall:
|
||||
# stall RX
|
||||
await dev_pf0_bar0.write_dword(0x000040, stall)
|
||||
|
||||
# start
|
||||
await dev_pf0_bar0.write_dword(0x001000, 1)
|
||||
|
||||
# wait for stall
|
||||
if stall:
|
||||
for k in range(stall):
|
||||
await RisingEdge(tb.dut.clk)
|
||||
|
||||
for k in range(100):
|
||||
await Timer(1000, 'ns')
|
||||
run = await dev_pf0_bar0.read_dword(0x001000)
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
if run == 0 and status & 0x300 == 0:
|
||||
break
|
||||
|
||||
if run != 0:
|
||||
tb.log.warning("Operation timed out")
|
||||
if status & 0x300 != 0:
|
||||
tb.log.warning("DMA engine busy")
|
||||
|
||||
cycles = await dev_pf0_bar0.read_dword(0x001008)
|
||||
|
||||
rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req
|
||||
rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl
|
||||
|
||||
tb.log.info("read %d x %d B (total %d B %d CPLD, stride %d) in %d ns (%d req %d cpl) %d Mbps",
|
||||
count, size, count*size, count*((size+15)//16), stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4))
|
||||
|
||||
assert status & 0x300 == 0
|
||||
|
||||
|
||||
@cocotb.test()
|
||||
async def run_test(dut):
|
||||
|
||||
@ -309,6 +514,8 @@ async def run_test(dut):
|
||||
await Timer(2000, 'ns')
|
||||
|
||||
# read status
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
tb.log.info("DMA Status: 0x%x", status)
|
||||
val = await dev_pf0_bar0.read_dword(0x000118)
|
||||
tb.log.info("Status: 0x%x", val)
|
||||
assert val == 0x800000AA
|
||||
@ -323,6 +530,8 @@ async def run_test(dut):
|
||||
await Timer(2000, 'ns')
|
||||
|
||||
# read status
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
tb.log.info("DMA Status: 0x%x", status)
|
||||
val = await dev_pf0_bar0.read_dword(0x000218)
|
||||
tb.log.info("Status: 0x%x", val)
|
||||
assert val == 0x80000055
|
||||
@ -343,6 +552,8 @@ async def run_test(dut):
|
||||
await Timer(2000, 'ns')
|
||||
|
||||
# read status
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
tb.log.info("DMA Status: 0x%x", status)
|
||||
val = await dev_pf0_bar0.read_dword(0x000218)
|
||||
tb.log.info("Status: 0x%x", val)
|
||||
assert val == 0x800000AA
|
||||
@ -353,110 +564,66 @@ async def run_test(dut):
|
||||
|
||||
tb.log.info("Test DMA block operations")
|
||||
|
||||
# disable interrupts
|
||||
await dev_pf0_bar0.write_dword(0x000008, 0)
|
||||
|
||||
region_len = 0x2000
|
||||
src_offset = 0x0000
|
||||
dest_offset = 0x4000
|
||||
|
||||
block_size = 256
|
||||
block_stride = block_size
|
||||
block_count = 32
|
||||
|
||||
# write packet data
|
||||
mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)])
|
||||
|
||||
# enable DMA
|
||||
await dev_pf0_bar0.write_dword(0x000000, 1)
|
||||
# disable interrupts
|
||||
await dev_pf0_bar0.write_dword(0x000008, 0)
|
||||
|
||||
# configure operation (read)
|
||||
# DMA base address
|
||||
await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff)
|
||||
await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff)
|
||||
# DMA offset address
|
||||
await dev_pf0_bar0.write_dword(0x001088, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00108c, 0)
|
||||
# DMA offset mask
|
||||
await dev_pf0_bar0.write_dword(0x001090, region_len-1)
|
||||
await dev_pf0_bar0.write_dword(0x001094, 0)
|
||||
# DMA stride
|
||||
await dev_pf0_bar0.write_dword(0x001098, block_stride)
|
||||
await dev_pf0_bar0.write_dword(0x00109c, 0)
|
||||
# RAM base address
|
||||
await dev_pf0_bar0.write_dword(0x0010c0, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0010c4, 0)
|
||||
# RAM offset address
|
||||
await dev_pf0_bar0.write_dword(0x0010c8, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0010cc, 0)
|
||||
# RAM offset mask
|
||||
await dev_pf0_bar0.write_dword(0x0010d0, region_len-1)
|
||||
await dev_pf0_bar0.write_dword(0x0010d4, 0)
|
||||
# RAM stride
|
||||
await dev_pf0_bar0.write_dword(0x0010d8, block_stride)
|
||||
await dev_pf0_bar0.write_dword(0x0010dc, 0)
|
||||
# clear cycle count
|
||||
await dev_pf0_bar0.write_dword(0x001008, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00100c, 0)
|
||||
# block length
|
||||
await dev_pf0_bar0.write_dword(0x001010, block_size)
|
||||
# block count
|
||||
await dev_pf0_bar0.write_dword(0x001018, block_count)
|
||||
await dev_pf0_bar0.write_dword(0x00101c, 0)
|
||||
# start
|
||||
await dev_pf0_bar0.write_dword(0x001000, 1)
|
||||
|
||||
for k in range(10):
|
||||
cnt = await dev_pf0_bar0.read_dword(0x001018)
|
||||
await Timer(1000, 'ns')
|
||||
if cnt == 0:
|
||||
break
|
||||
|
||||
# configure operation (write)
|
||||
# DMA base address
|
||||
await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff)
|
||||
await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff)
|
||||
# DMA offset address
|
||||
await dev_pf0_bar0.write_dword(0x001188, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00118c, 0)
|
||||
# DMA offset mask
|
||||
await dev_pf0_bar0.write_dword(0x001190, region_len-1)
|
||||
await dev_pf0_bar0.write_dword(0x001194, 0)
|
||||
# DMA stride
|
||||
await dev_pf0_bar0.write_dword(0x001198, block_stride)
|
||||
await dev_pf0_bar0.write_dword(0x00119c, 0)
|
||||
# RAM base address
|
||||
await dev_pf0_bar0.write_dword(0x0011c0, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0011c4, 0)
|
||||
# RAM offset address
|
||||
await dev_pf0_bar0.write_dword(0x0011c8, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0011cc, 0)
|
||||
# RAM offset mask
|
||||
await dev_pf0_bar0.write_dword(0x0011d0, region_len-1)
|
||||
await dev_pf0_bar0.write_dword(0x0011d4, 0)
|
||||
# RAM stride
|
||||
await dev_pf0_bar0.write_dword(0x0011d8, block_stride)
|
||||
await dev_pf0_bar0.write_dword(0x0011dc, 0)
|
||||
# clear cycle count
|
||||
await dev_pf0_bar0.write_dword(0x001108, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00110c, 0)
|
||||
# block length
|
||||
await dev_pf0_bar0.write_dword(0x001110, block_size)
|
||||
# block count
|
||||
await dev_pf0_bar0.write_dword(0x001118, block_count)
|
||||
await dev_pf0_bar0.write_dword(0x00111c, 0)
|
||||
# start
|
||||
await dev_pf0_bar0.write_dword(0x001100, 1)
|
||||
|
||||
for k in range(10):
|
||||
cnt = await dev_pf0_bar0.read_dword(0x001118)
|
||||
await Timer(1000, 'ns')
|
||||
if cnt == 0:
|
||||
break
|
||||
await dma_block_read_bench(tb, dev, mem_base+src_offset, region_len-1, 256, 256, 32)
|
||||
await dma_block_write_bench(tb, dev, mem_base+dest_offset, region_len-1, 256, 256, 32)
|
||||
|
||||
tb.log.info("%s", mem.hexdump_str(dest_offset, region_len))
|
||||
|
||||
assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len]
|
||||
|
||||
tb.log.info("Test RX completion buffer (CPLH, 8)")
|
||||
|
||||
tb.rc.split_on_all_rcb = True
|
||||
|
||||
size = 8
|
||||
stride = size
|
||||
for count in range(32, 256+1, 8):
|
||||
await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000)
|
||||
|
||||
tb.log.info("Test RX completion buffer (CPLH, 8+64)")
|
||||
|
||||
size = 8+64
|
||||
stride = 0
|
||||
for count in range(8, 256+1, 8):
|
||||
await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000)
|
||||
|
||||
tb.log.info("Test RX completion buffer (CPLH, 8+128+8)")
|
||||
|
||||
size = 8+128+8
|
||||
stride = 0
|
||||
for count in range(8, 256+1, 8):
|
||||
await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000)
|
||||
|
||||
tb.rc.split_on_all_rcb = False
|
||||
|
||||
tb.log.info("Test RX completion buffer (CPLD)")
|
||||
|
||||
size = 512
|
||||
stride = size
|
||||
for count in range(8, 256+1, 8):
|
||||
await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 4000)
|
||||
|
||||
tb.log.info("Perform block reads")
|
||||
|
||||
count = 100
|
||||
for size in [2**x for x in range(14)]:
|
||||
stride = size
|
||||
await dma_block_read_bench(tb, dev, mem_base, region_len-1, size, stride, count)
|
||||
|
||||
tb.log.info("Perform block writes")
|
||||
|
||||
count = 100
|
||||
for size in [2**x for x in range(14)]:
|
||||
stride = size
|
||||
await dma_block_write_bench(tb, dev, mem_base, region_len-1, size, stride, count)
|
||||
|
||||
await RisingEdge(dut.clk)
|
||||
await RisingEdge(dut.clk)
|
||||
|
||||
|
@ -57,7 +57,7 @@ VERILOG_SOURCES += ../../../../rtl/priority_encoder.v
|
||||
VERILOG_SOURCES += ../../../../rtl/pulse_merge.v
|
||||
|
||||
# module parameters
|
||||
export PARAM_SEG_COUNT := 1
|
||||
export PARAM_SEG_COUNT := 2
|
||||
export PARAM_SEG_DATA_WIDTH := 256
|
||||
export PARAM_SEG_EMPTY_WIDTH := $(shell python -c "print((($(PARAM_SEG_DATA_WIDTH)//32)-1).bit_length())" )
|
||||
export PARAM_TX_SEQ_NUM_WIDTH := 6
|
||||
@ -69,7 +69,7 @@ export PARAM_IMM_WIDTH := 32
|
||||
export PARAM_READ_OP_TABLE_SIZE := $(PARAM_PCIE_TAG_COUNT)
|
||||
export PARAM_READ_TX_LIMIT := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" )
|
||||
export PARAM_READ_CPLH_FC_LIMIT := 770
|
||||
export PARAM_READ_CPLD_FC_LIMIT := 2500
|
||||
export PARAM_READ_CPLD_FC_LIMIT := 2400
|
||||
export PARAM_WRITE_OP_TABLE_SIZE := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" )
|
||||
export PARAM_WRITE_TX_LIMIT := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" )
|
||||
export PARAM_BAR0_APERTURE := 24
|
||||
|
@ -206,6 +206,211 @@ class TB(object):
|
||||
await self.rc.enumerate()
|
||||
|
||||
|
||||
async def dma_block_read_bench(tb, dev, addr, mask, size, stride, count):
|
||||
dev_pf0_bar0 = dev.bar_window[0]
|
||||
|
||||
rd_req = await dev_pf0_bar0.read_dword(0x000020)
|
||||
rd_cpl = await dev_pf0_bar0.read_dword(0x000024)
|
||||
|
||||
# configure operation (read)
|
||||
# DMA base address
|
||||
await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff)
|
||||
await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff)
|
||||
# DMA offset address
|
||||
await dev_pf0_bar0.write_dword(0x001088, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00108c, 0)
|
||||
# DMA offset mask
|
||||
await dev_pf0_bar0.write_dword(0x001090, mask)
|
||||
await dev_pf0_bar0.write_dword(0x001094, 0)
|
||||
# DMA stride
|
||||
await dev_pf0_bar0.write_dword(0x001098, stride)
|
||||
await dev_pf0_bar0.write_dword(0x00109c, 0)
|
||||
# RAM base address
|
||||
await dev_pf0_bar0.write_dword(0x0010c0, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0010c4, 0)
|
||||
# RAM offset address
|
||||
await dev_pf0_bar0.write_dword(0x0010c8, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0010cc, 0)
|
||||
# RAM offset mask
|
||||
await dev_pf0_bar0.write_dword(0x0010d0, mask)
|
||||
await dev_pf0_bar0.write_dword(0x0010d4, 0)
|
||||
# RAM stride
|
||||
await dev_pf0_bar0.write_dword(0x0010d8, stride)
|
||||
await dev_pf0_bar0.write_dword(0x0010dc, 0)
|
||||
# clear cycle count
|
||||
await dev_pf0_bar0.write_dword(0x001008, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00100c, 0)
|
||||
# block length
|
||||
await dev_pf0_bar0.write_dword(0x001010, size)
|
||||
# block count
|
||||
await dev_pf0_bar0.write_dword(0x001018, count)
|
||||
await dev_pf0_bar0.write_dword(0x00101c, 0)
|
||||
|
||||
# start
|
||||
await dev_pf0_bar0.write_dword(0x001000, 1)
|
||||
|
||||
for k in range(1000):
|
||||
await Timer(1000, 'ns')
|
||||
run = await dev_pf0_bar0.read_dword(0x001000)
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
if run == 0 and status & 0x300 == 0:
|
||||
break
|
||||
|
||||
if run != 0:
|
||||
tb.log.warning("Operation timed out")
|
||||
if status & 0x300 != 0:
|
||||
tb.log.warning("DMA engine busy")
|
||||
|
||||
cycles = await dev_pf0_bar0.read_dword(0x001008)
|
||||
|
||||
rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req
|
||||
rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl
|
||||
|
||||
tb.log.info("read %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req %d cpl) %d Mbps",
|
||||
count, size, count*size, stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4))
|
||||
|
||||
assert status & 0x300 == 0
|
||||
|
||||
|
||||
async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count):
|
||||
dev_pf0_bar0 = dev.bar_window[0]
|
||||
|
||||
wr_req = await dev_pf0_bar0.read_dword(0x000028)
|
||||
|
||||
# configure operation (read)
|
||||
# DMA base address
|
||||
await dev_pf0_bar0.write_dword(0x001180, addr & 0xffffffff)
|
||||
await dev_pf0_bar0.write_dword(0x001184, (addr >> 32) & 0xffffffff)
|
||||
# DMA offset address
|
||||
await dev_pf0_bar0.write_dword(0x001188, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00118c, 0)
|
||||
# DMA offset mask
|
||||
await dev_pf0_bar0.write_dword(0x001190, mask)
|
||||
await dev_pf0_bar0.write_dword(0x001194, 0)
|
||||
# DMA stride
|
||||
await dev_pf0_bar0.write_dword(0x001198, stride)
|
||||
await dev_pf0_bar0.write_dword(0x00119c, 0)
|
||||
# RAM base address
|
||||
await dev_pf0_bar0.write_dword(0x0011c0, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0011c4, 0)
|
||||
# RAM offset address
|
||||
await dev_pf0_bar0.write_dword(0x0011c8, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0011cc, 0)
|
||||
# RAM offset mask
|
||||
await dev_pf0_bar0.write_dword(0x0011d0, mask)
|
||||
await dev_pf0_bar0.write_dword(0x0011d4, 0)
|
||||
# RAM stride
|
||||
await dev_pf0_bar0.write_dword(0x0011d8, stride)
|
||||
await dev_pf0_bar0.write_dword(0x0011dc, 0)
|
||||
# clear cycle count
|
||||
await dev_pf0_bar0.write_dword(0x001108, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00110c, 0)
|
||||
# block length
|
||||
await dev_pf0_bar0.write_dword(0x001110, size)
|
||||
# block count
|
||||
await dev_pf0_bar0.write_dword(0x001118, count)
|
||||
await dev_pf0_bar0.write_dword(0x00111c, 0)
|
||||
|
||||
# start
|
||||
await dev_pf0_bar0.write_dword(0x001100, 1)
|
||||
|
||||
for k in range(1000):
|
||||
await Timer(1000, 'ns')
|
||||
run = await dev_pf0_bar0.read_dword(0x001100)
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
if run == 0 and status & 0x300 == 0:
|
||||
break
|
||||
|
||||
if run != 0:
|
||||
tb.log.warning("Operation timed out")
|
||||
if status & 0x300 != 0:
|
||||
tb.log.warning("DMA engine busy")
|
||||
|
||||
cycles = await dev_pf0_bar0.read_dword(0x001108)
|
||||
|
||||
wr_req = await dev_pf0_bar0.read_dword(0x000028) - wr_req
|
||||
|
||||
tb.log.info("wrote %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req) %d Mbps",
|
||||
count, size, count*size, stride, cycles*4, wr_req, size * count * 8 * 1000 / (cycles * 4))
|
||||
|
||||
assert status & 0x300 == 0
|
||||
|
||||
|
||||
async def dma_cpl_buf_test(tb, dev, addr, mask, size, stride, count, stall):
|
||||
dev_pf0_bar0 = dev.bar_window[0]
|
||||
|
||||
rd_req = await dev_pf0_bar0.read_dword(0x000020)
|
||||
rd_cpl = await dev_pf0_bar0.read_dword(0x000024)
|
||||
|
||||
# configure operation (read)
|
||||
# DMA base address
|
||||
await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff)
|
||||
await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff)
|
||||
# DMA offset address
|
||||
await dev_pf0_bar0.write_dword(0x001088, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00108c, 0)
|
||||
# DMA offset mask
|
||||
await dev_pf0_bar0.write_dword(0x001090, mask)
|
||||
await dev_pf0_bar0.write_dword(0x001094, 0)
|
||||
# DMA stride
|
||||
await dev_pf0_bar0.write_dword(0x001098, stride)
|
||||
await dev_pf0_bar0.write_dword(0x00109c, 0)
|
||||
# RAM base address
|
||||
await dev_pf0_bar0.write_dword(0x0010c0, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0010c4, 0)
|
||||
# RAM offset address
|
||||
await dev_pf0_bar0.write_dword(0x0010c8, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0010cc, 0)
|
||||
# RAM offset mask
|
||||
await dev_pf0_bar0.write_dword(0x0010d0, mask)
|
||||
await dev_pf0_bar0.write_dword(0x0010d4, 0)
|
||||
# RAM stride
|
||||
await dev_pf0_bar0.write_dword(0x0010d8, stride)
|
||||
await dev_pf0_bar0.write_dword(0x0010dc, 0)
|
||||
# clear cycle count
|
||||
await dev_pf0_bar0.write_dword(0x001008, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00100c, 0)
|
||||
# block length
|
||||
await dev_pf0_bar0.write_dword(0x001010, size)
|
||||
# block count
|
||||
await dev_pf0_bar0.write_dword(0x001018, count)
|
||||
await dev_pf0_bar0.write_dword(0x00101c, 0)
|
||||
|
||||
if stall:
|
||||
# stall RX
|
||||
await dev_pf0_bar0.write_dword(0x000040, stall)
|
||||
|
||||
# start
|
||||
await dev_pf0_bar0.write_dword(0x001000, 1)
|
||||
|
||||
# wait for stall
|
||||
if stall:
|
||||
for k in range(stall):
|
||||
await RisingEdge(tb.dut.clk)
|
||||
|
||||
for k in range(100):
|
||||
await Timer(1000, 'ns')
|
||||
run = await dev_pf0_bar0.read_dword(0x001000)
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
if run == 0 and status & 0x300 == 0:
|
||||
break
|
||||
|
||||
if run != 0:
|
||||
tb.log.warning("Operation timed out")
|
||||
if status & 0x300 != 0:
|
||||
tb.log.warning("DMA engine busy")
|
||||
|
||||
cycles = await dev_pf0_bar0.read_dword(0x001008)
|
||||
|
||||
rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req
|
||||
rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl
|
||||
|
||||
tb.log.info("read %d x %d B (total %d B %d CPLD, stride %d) in %d ns (%d req %d cpl) %d Mbps",
|
||||
count, size, count*size, count*((size+15)//16), stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4))
|
||||
|
||||
assert status & 0x300 == 0
|
||||
|
||||
|
||||
@cocotb.test()
|
||||
async def run_test(dut):
|
||||
|
||||
@ -257,6 +462,8 @@ async def run_test(dut):
|
||||
await Timer(2000, 'ns')
|
||||
|
||||
# read status
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
tb.log.info("DMA Status: 0x%x", status)
|
||||
val = await dev_pf0_bar0.read_dword(0x000118)
|
||||
tb.log.info("Status: 0x%x", val)
|
||||
assert val == 0x800000AA
|
||||
@ -271,6 +478,8 @@ async def run_test(dut):
|
||||
await Timer(2000, 'ns')
|
||||
|
||||
# read status
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
tb.log.info("DMA Status: 0x%x", status)
|
||||
val = await dev_pf0_bar0.read_dword(0x000218)
|
||||
tb.log.info("Status: 0x%x", val)
|
||||
assert val == 0x80000055
|
||||
@ -291,6 +500,8 @@ async def run_test(dut):
|
||||
await Timer(2000, 'ns')
|
||||
|
||||
# read status
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
tb.log.info("DMA Status: 0x%x", status)
|
||||
val = await dev_pf0_bar0.read_dword(0x000218)
|
||||
tb.log.info("Status: 0x%x", val)
|
||||
assert val == 0x800000AA
|
||||
@ -301,110 +512,66 @@ async def run_test(dut):
|
||||
|
||||
tb.log.info("Test DMA block operations")
|
||||
|
||||
# disable interrupts
|
||||
await dev_pf0_bar0.write_dword(0x000008, 0)
|
||||
|
||||
region_len = 0x2000
|
||||
src_offset = 0x0000
|
||||
dest_offset = 0x4000
|
||||
|
||||
block_size = 256
|
||||
block_stride = block_size
|
||||
block_count = 32
|
||||
|
||||
# write packet data
|
||||
mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)])
|
||||
|
||||
# enable DMA
|
||||
await dev_pf0_bar0.write_dword(0x000000, 1)
|
||||
# disable interrupts
|
||||
await dev_pf0_bar0.write_dword(0x000008, 0)
|
||||
|
||||
# configure operation (read)
|
||||
# DMA base address
|
||||
await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff)
|
||||
await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff)
|
||||
# DMA offset address
|
||||
await dev_pf0_bar0.write_dword(0x001088, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00108c, 0)
|
||||
# DMA offset mask
|
||||
await dev_pf0_bar0.write_dword(0x001090, region_len-1)
|
||||
await dev_pf0_bar0.write_dword(0x001094, 0)
|
||||
# DMA stride
|
||||
await dev_pf0_bar0.write_dword(0x001098, block_stride)
|
||||
await dev_pf0_bar0.write_dword(0x00109c, 0)
|
||||
# RAM base address
|
||||
await dev_pf0_bar0.write_dword(0x0010c0, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0010c4, 0)
|
||||
# RAM offset address
|
||||
await dev_pf0_bar0.write_dword(0x0010c8, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0010cc, 0)
|
||||
# RAM offset mask
|
||||
await dev_pf0_bar0.write_dword(0x0010d0, region_len-1)
|
||||
await dev_pf0_bar0.write_dword(0x0010d4, 0)
|
||||
# RAM stride
|
||||
await dev_pf0_bar0.write_dword(0x0010d8, block_stride)
|
||||
await dev_pf0_bar0.write_dword(0x0010dc, 0)
|
||||
# clear cycle count
|
||||
await dev_pf0_bar0.write_dword(0x001008, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00100c, 0)
|
||||
# block length
|
||||
await dev_pf0_bar0.write_dword(0x001010, block_size)
|
||||
# block count
|
||||
await dev_pf0_bar0.write_dword(0x001018, block_count)
|
||||
await dev_pf0_bar0.write_dword(0x00101c, 0)
|
||||
# start
|
||||
await dev_pf0_bar0.write_dword(0x001000, 1)
|
||||
|
||||
for k in range(10):
|
||||
cnt = await dev_pf0_bar0.read_dword(0x001018)
|
||||
await Timer(1000, 'ns')
|
||||
if cnt == 0:
|
||||
break
|
||||
|
||||
# configure operation (write)
|
||||
# DMA base address
|
||||
await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff)
|
||||
await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff)
|
||||
# DMA offset address
|
||||
await dev_pf0_bar0.write_dword(0x001188, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00118c, 0)
|
||||
# DMA offset mask
|
||||
await dev_pf0_bar0.write_dword(0x001190, region_len-1)
|
||||
await dev_pf0_bar0.write_dword(0x001194, 0)
|
||||
# DMA stride
|
||||
await dev_pf0_bar0.write_dword(0x001198, block_stride)
|
||||
await dev_pf0_bar0.write_dword(0x00119c, 0)
|
||||
# RAM base address
|
||||
await dev_pf0_bar0.write_dword(0x0011c0, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0011c4, 0)
|
||||
# RAM offset address
|
||||
await dev_pf0_bar0.write_dword(0x0011c8, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0011cc, 0)
|
||||
# RAM offset mask
|
||||
await dev_pf0_bar0.write_dword(0x0011d0, region_len-1)
|
||||
await dev_pf0_bar0.write_dword(0x0011d4, 0)
|
||||
# RAM stride
|
||||
await dev_pf0_bar0.write_dword(0x0011d8, block_stride)
|
||||
await dev_pf0_bar0.write_dword(0x0011dc, 0)
|
||||
# clear cycle count
|
||||
await dev_pf0_bar0.write_dword(0x001108, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00110c, 0)
|
||||
# block length
|
||||
await dev_pf0_bar0.write_dword(0x001110, block_size)
|
||||
# block count
|
||||
await dev_pf0_bar0.write_dword(0x001118, block_count)
|
||||
await dev_pf0_bar0.write_dword(0x00111c, 0)
|
||||
# start
|
||||
await dev_pf0_bar0.write_dword(0x001100, 1)
|
||||
|
||||
for k in range(10):
|
||||
cnt = await dev_pf0_bar0.read_dword(0x001118)
|
||||
await Timer(1000, 'ns')
|
||||
if cnt == 0:
|
||||
break
|
||||
await dma_block_read_bench(tb, dev, mem_base+src_offset, region_len-1, 256, 256, 32)
|
||||
await dma_block_write_bench(tb, dev, mem_base+dest_offset, region_len-1, 256, 256, 32)
|
||||
|
||||
tb.log.info("%s", mem.hexdump_str(dest_offset, region_len))
|
||||
|
||||
assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len]
|
||||
|
||||
tb.log.info("Test RX completion buffer (CPLH, 8)")
|
||||
|
||||
tb.rc.split_on_all_rcb = True
|
||||
|
||||
size = 8
|
||||
stride = size
|
||||
for count in range(32, 256+1, 8):
|
||||
await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000)
|
||||
|
||||
tb.log.info("Test RX completion buffer (CPLH, 8+64)")
|
||||
|
||||
size = 8+64
|
||||
stride = 0
|
||||
for count in range(8, 256+1, 8):
|
||||
await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000)
|
||||
|
||||
tb.log.info("Test RX completion buffer (CPLH, 8+128+8)")
|
||||
|
||||
size = 8+128+8
|
||||
stride = 0
|
||||
for count in range(8, 256+1, 8):
|
||||
await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000)
|
||||
|
||||
tb.rc.split_on_all_rcb = False
|
||||
|
||||
tb.log.info("Test RX completion buffer (CPLD)")
|
||||
|
||||
size = 512
|
||||
stride = size
|
||||
for count in range(8, 256+1, 8):
|
||||
await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 4000)
|
||||
|
||||
tb.log.info("Perform block reads")
|
||||
|
||||
count = 100
|
||||
for size in [2**x for x in range(14)]:
|
||||
stride = size
|
||||
await dma_block_read_bench(tb, dev, mem_base, region_len-1, size, stride, count)
|
||||
|
||||
tb.log.info("Perform block writes")
|
||||
|
||||
count = 100
|
||||
for size in [2**x for x in range(14)]:
|
||||
stride = size
|
||||
await dma_block_write_bench(tb, dev, mem_base, region_len-1, size, stride, count)
|
||||
|
||||
await RisingEdge(dut.clk)
|
||||
await RisingEdge(dut.clk)
|
||||
|
||||
@ -466,7 +633,7 @@ def test_example_core_pcie_s10(request, data_width, l_tile):
|
||||
parameters['READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT']
|
||||
parameters['READ_TX_LIMIT'] = 2**parameters['TX_SEQ_NUM_WIDTH']
|
||||
parameters['READ_CPLH_FC_LIMIT'] = 770
|
||||
parameters['READ_CPLD_FC_LIMIT'] = 2500
|
||||
parameters['READ_CPLD_FC_LIMIT'] = 2400
|
||||
parameters['WRITE_OP_TABLE_SIZE'] = 2**parameters['TX_SEQ_NUM_WIDTH']
|
||||
parameters['WRITE_TX_LIMIT'] = 2**parameters['TX_SEQ_NUM_WIDTH']
|
||||
parameters['BAR0_APERTURE'] = 24
|
||||
|
@ -74,8 +74,8 @@ export PARAM_IMM_ENABLE := 1
|
||||
export PARAM_IMM_WIDTH := 32
|
||||
export PARAM_READ_OP_TABLE_SIZE := $(PARAM_PCIE_TAG_COUNT)
|
||||
export PARAM_READ_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" )
|
||||
export PARAM_READ_CPLH_FC_LIMIT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),64,128)
|
||||
export PARAM_READ_CPLD_FC_LIMIT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),992,2048)
|
||||
export PARAM_READ_CPLH_FC_LIMIT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),256,64)
|
||||
export PARAM_READ_CPLD_FC_LIMIT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),1792,960)
|
||||
export PARAM_WRITE_OP_TABLE_SIZE := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" )
|
||||
export PARAM_WRITE_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" )
|
||||
export PARAM_BAR0_APERTURE := 24
|
||||
|
@ -299,6 +299,211 @@ class TB(object):
|
||||
await self.rc.enumerate()
|
||||
|
||||
|
||||
async def dma_block_read_bench(tb, dev, addr, mask, size, stride, count):
|
||||
dev_pf0_bar0 = dev.bar_window[0]
|
||||
|
||||
rd_req = await dev_pf0_bar0.read_dword(0x000020)
|
||||
rd_cpl = await dev_pf0_bar0.read_dword(0x000024)
|
||||
|
||||
# configure operation (read)
|
||||
# DMA base address
|
||||
await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff)
|
||||
await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff)
|
||||
# DMA offset address
|
||||
await dev_pf0_bar0.write_dword(0x001088, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00108c, 0)
|
||||
# DMA offset mask
|
||||
await dev_pf0_bar0.write_dword(0x001090, mask)
|
||||
await dev_pf0_bar0.write_dword(0x001094, 0)
|
||||
# DMA stride
|
||||
await dev_pf0_bar0.write_dword(0x001098, stride)
|
||||
await dev_pf0_bar0.write_dword(0x00109c, 0)
|
||||
# RAM base address
|
||||
await dev_pf0_bar0.write_dword(0x0010c0, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0010c4, 0)
|
||||
# RAM offset address
|
||||
await dev_pf0_bar0.write_dword(0x0010c8, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0010cc, 0)
|
||||
# RAM offset mask
|
||||
await dev_pf0_bar0.write_dword(0x0010d0, mask)
|
||||
await dev_pf0_bar0.write_dword(0x0010d4, 0)
|
||||
# RAM stride
|
||||
await dev_pf0_bar0.write_dword(0x0010d8, stride)
|
||||
await dev_pf0_bar0.write_dword(0x0010dc, 0)
|
||||
# clear cycle count
|
||||
await dev_pf0_bar0.write_dword(0x001008, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00100c, 0)
|
||||
# block length
|
||||
await dev_pf0_bar0.write_dword(0x001010, size)
|
||||
# block count
|
||||
await dev_pf0_bar0.write_dword(0x001018, count)
|
||||
await dev_pf0_bar0.write_dword(0x00101c, 0)
|
||||
|
||||
# start
|
||||
await dev_pf0_bar0.write_dword(0x001000, 1)
|
||||
|
||||
for k in range(1000):
|
||||
await Timer(1000, 'ns')
|
||||
run = await dev_pf0_bar0.read_dword(0x001000)
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
if run == 0 and status & 0x300 == 0:
|
||||
break
|
||||
|
||||
if run != 0:
|
||||
tb.log.warning("Operation timed out")
|
||||
if status & 0x300 != 0:
|
||||
tb.log.warning("DMA engine busy")
|
||||
|
||||
cycles = await dev_pf0_bar0.read_dword(0x001008)
|
||||
|
||||
rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req
|
||||
rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl
|
||||
|
||||
tb.log.info("read %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req %d cpl) %d Mbps",
|
||||
count, size, count*size, stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4))
|
||||
|
||||
assert status & 0x300 == 0
|
||||
|
||||
|
||||
async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count):
|
||||
dev_pf0_bar0 = dev.bar_window[0]
|
||||
|
||||
wr_req = await dev_pf0_bar0.read_dword(0x000028)
|
||||
|
||||
# configure operation (read)
|
||||
# DMA base address
|
||||
await dev_pf0_bar0.write_dword(0x001180, addr & 0xffffffff)
|
||||
await dev_pf0_bar0.write_dword(0x001184, (addr >> 32) & 0xffffffff)
|
||||
# DMA offset address
|
||||
await dev_pf0_bar0.write_dword(0x001188, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00118c, 0)
|
||||
# DMA offset mask
|
||||
await dev_pf0_bar0.write_dword(0x001190, mask)
|
||||
await dev_pf0_bar0.write_dword(0x001194, 0)
|
||||
# DMA stride
|
||||
await dev_pf0_bar0.write_dword(0x001198, stride)
|
||||
await dev_pf0_bar0.write_dword(0x00119c, 0)
|
||||
# RAM base address
|
||||
await dev_pf0_bar0.write_dword(0x0011c0, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0011c4, 0)
|
||||
# RAM offset address
|
||||
await dev_pf0_bar0.write_dword(0x0011c8, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0011cc, 0)
|
||||
# RAM offset mask
|
||||
await dev_pf0_bar0.write_dword(0x0011d0, mask)
|
||||
await dev_pf0_bar0.write_dword(0x0011d4, 0)
|
||||
# RAM stride
|
||||
await dev_pf0_bar0.write_dword(0x0011d8, stride)
|
||||
await dev_pf0_bar0.write_dword(0x0011dc, 0)
|
||||
# clear cycle count
|
||||
await dev_pf0_bar0.write_dword(0x001108, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00110c, 0)
|
||||
# block length
|
||||
await dev_pf0_bar0.write_dword(0x001110, size)
|
||||
# block count
|
||||
await dev_pf0_bar0.write_dword(0x001118, count)
|
||||
await dev_pf0_bar0.write_dword(0x00111c, 0)
|
||||
|
||||
# start
|
||||
await dev_pf0_bar0.write_dword(0x001100, 1)
|
||||
|
||||
for k in range(1000):
|
||||
await Timer(1000, 'ns')
|
||||
run = await dev_pf0_bar0.read_dword(0x001100)
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
if run == 0 and status & 0x300 == 0:
|
||||
break
|
||||
|
||||
if run != 0:
|
||||
tb.log.warning("Operation timed out")
|
||||
if status & 0x300 != 0:
|
||||
tb.log.warning("DMA engine busy")
|
||||
|
||||
cycles = await dev_pf0_bar0.read_dword(0x001108)
|
||||
|
||||
wr_req = await dev_pf0_bar0.read_dword(0x000028) - wr_req
|
||||
|
||||
tb.log.info("wrote %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req) %d Mbps",
|
||||
count, size, count*size, stride, cycles*4, wr_req, size * count * 8 * 1000 / (cycles * 4))
|
||||
|
||||
assert status & 0x300 == 0
|
||||
|
||||
|
||||
async def dma_cpl_buf_test(tb, dev, addr, mask, size, stride, count, stall):
|
||||
dev_pf0_bar0 = dev.bar_window[0]
|
||||
|
||||
rd_req = await dev_pf0_bar0.read_dword(0x000020)
|
||||
rd_cpl = await dev_pf0_bar0.read_dword(0x000024)
|
||||
|
||||
# configure operation (read)
|
||||
# DMA base address
|
||||
await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff)
|
||||
await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff)
|
||||
# DMA offset address
|
||||
await dev_pf0_bar0.write_dword(0x001088, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00108c, 0)
|
||||
# DMA offset mask
|
||||
await dev_pf0_bar0.write_dword(0x001090, mask)
|
||||
await dev_pf0_bar0.write_dword(0x001094, 0)
|
||||
# DMA stride
|
||||
await dev_pf0_bar0.write_dword(0x001098, stride)
|
||||
await dev_pf0_bar0.write_dword(0x00109c, 0)
|
||||
# RAM base address
|
||||
await dev_pf0_bar0.write_dword(0x0010c0, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0010c4, 0)
|
||||
# RAM offset address
|
||||
await dev_pf0_bar0.write_dword(0x0010c8, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0010cc, 0)
|
||||
# RAM offset mask
|
||||
await dev_pf0_bar0.write_dword(0x0010d0, mask)
|
||||
await dev_pf0_bar0.write_dword(0x0010d4, 0)
|
||||
# RAM stride
|
||||
await dev_pf0_bar0.write_dword(0x0010d8, stride)
|
||||
await dev_pf0_bar0.write_dword(0x0010dc, 0)
|
||||
# clear cycle count
|
||||
await dev_pf0_bar0.write_dword(0x001008, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00100c, 0)
|
||||
# block length
|
||||
await dev_pf0_bar0.write_dword(0x001010, size)
|
||||
# block count
|
||||
await dev_pf0_bar0.write_dword(0x001018, count)
|
||||
await dev_pf0_bar0.write_dword(0x00101c, 0)
|
||||
|
||||
if stall:
|
||||
# stall RX
|
||||
await dev_pf0_bar0.write_dword(0x000040, stall)
|
||||
|
||||
# start
|
||||
await dev_pf0_bar0.write_dword(0x001000, 1)
|
||||
|
||||
# wait for stall
|
||||
if stall:
|
||||
for k in range(stall):
|
||||
await RisingEdge(tb.dut.clk)
|
||||
|
||||
for k in range(100):
|
||||
await Timer(1000, 'ns')
|
||||
run = await dev_pf0_bar0.read_dword(0x001000)
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
if run == 0 and status & 0x300 == 0:
|
||||
break
|
||||
|
||||
if run != 0:
|
||||
tb.log.warning("Operation timed out")
|
||||
if status & 0x300 != 0:
|
||||
tb.log.warning("DMA engine busy")
|
||||
|
||||
cycles = await dev_pf0_bar0.read_dword(0x001008)
|
||||
|
||||
rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req
|
||||
rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl
|
||||
|
||||
tb.log.info("read %d x %d B (total %d B %d CPLD, stride %d) in %d ns (%d req %d cpl) %d Mbps",
|
||||
count, size, count*size, count*((size+15)//16), stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4))
|
||||
|
||||
assert status & 0x300 == 0
|
||||
|
||||
|
||||
@cocotb.test()
|
||||
async def run_test(dut):
|
||||
|
||||
@ -350,6 +555,8 @@ async def run_test(dut):
|
||||
await Timer(2000, 'ns')
|
||||
|
||||
# read status
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
tb.log.info("DMA Status: 0x%x", status)
|
||||
val = await dev_pf0_bar0.read_dword(0x000118)
|
||||
tb.log.info("Status: 0x%x", val)
|
||||
assert val == 0x800000AA
|
||||
@ -364,6 +571,8 @@ async def run_test(dut):
|
||||
await Timer(2000, 'ns')
|
||||
|
||||
# read status
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
tb.log.info("DMA Status: 0x%x", status)
|
||||
val = await dev_pf0_bar0.read_dword(0x000218)
|
||||
tb.log.info("Status: 0x%x", val)
|
||||
assert val == 0x80000055
|
||||
@ -384,6 +593,8 @@ async def run_test(dut):
|
||||
await Timer(2000, 'ns')
|
||||
|
||||
# read status
|
||||
status = await dev_pf0_bar0.read_dword(0x000000)
|
||||
tb.log.info("DMA Status: 0x%x", status)
|
||||
val = await dev_pf0_bar0.read_dword(0x000218)
|
||||
tb.log.info("Status: 0x%x", val)
|
||||
assert val == 0x800000AA
|
||||
@ -394,112 +605,66 @@ async def run_test(dut):
|
||||
|
||||
tb.log.info("Test DMA block operations")
|
||||
|
||||
# disable interrupts
|
||||
await dev_pf0_bar0.write_dword(0x000008, 0)
|
||||
|
||||
region_len = 0x2000
|
||||
src_offset = 0x0000
|
||||
dest_offset = 0x4000
|
||||
|
||||
block_size = 256
|
||||
block_stride = block_size
|
||||
block_count = 32
|
||||
|
||||
# write packet data
|
||||
mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)])
|
||||
|
||||
# enable DMA
|
||||
await dev_pf0_bar0.write_dword(0x000000, 1)
|
||||
# disable interrupts
|
||||
await dev_pf0_bar0.write_dword(0x000008, 0)
|
||||
|
||||
# configure operation (read)
|
||||
# DMA base address
|
||||
await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff)
|
||||
await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff)
|
||||
# DMA offset address
|
||||
await dev_pf0_bar0.write_dword(0x001088, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00108c, 0)
|
||||
# DMA offset mask
|
||||
await dev_pf0_bar0.write_dword(0x001090, region_len-1)
|
||||
await dev_pf0_bar0.write_dword(0x001094, 0)
|
||||
# DMA stride
|
||||
await dev_pf0_bar0.write_dword(0x001098, block_stride)
|
||||
await dev_pf0_bar0.write_dword(0x00109c, 0)
|
||||
# RAM base address
|
||||
await dev_pf0_bar0.write_dword(0x0010c0, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0010c4, 0)
|
||||
# RAM offset address
|
||||
await dev_pf0_bar0.write_dword(0x0010c8, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0010cc, 0)
|
||||
# RAM offset mask
|
||||
await dev_pf0_bar0.write_dword(0x0010d0, region_len-1)
|
||||
await dev_pf0_bar0.write_dword(0x0010d4, 0)
|
||||
# RAM stride
|
||||
await dev_pf0_bar0.write_dword(0x0010d8, block_stride)
|
||||
await dev_pf0_bar0.write_dword(0x0010dc, 0)
|
||||
# clear cycle count
|
||||
await dev_pf0_bar0.write_dword(0x001008, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00100c, 0)
|
||||
# block length
|
||||
await dev_pf0_bar0.write_dword(0x001010, block_size)
|
||||
# block count
|
||||
await dev_pf0_bar0.write_dword(0x001018, block_count)
|
||||
await dev_pf0_bar0.write_dword(0x00101c, 0)
|
||||
# start
|
||||
await dev_pf0_bar0.write_dword(0x001000, 1)
|
||||
|
||||
for k in range(10):
|
||||
cnt = await dev_pf0_bar0.read_dword(0x001018)
|
||||
await Timer(1000, 'ns')
|
||||
if cnt == 0:
|
||||
break
|
||||
|
||||
# configure operation (write)
|
||||
# DMA base address
|
||||
await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff)
|
||||
await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff)
|
||||
# DMA offset address
|
||||
await dev_pf0_bar0.write_dword(0x001188, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00118c, 0)
|
||||
# DMA offset mask
|
||||
await dev_pf0_bar0.write_dword(0x001190, region_len-1)
|
||||
await dev_pf0_bar0.write_dword(0x001194, 0)
|
||||
# DMA stride
|
||||
await dev_pf0_bar0.write_dword(0x001198, block_stride)
|
||||
await dev_pf0_bar0.write_dword(0x00119c, 0)
|
||||
# RAM base address
|
||||
await dev_pf0_bar0.write_dword(0x0011c0, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0011c4, 0)
|
||||
# RAM offset address
|
||||
await dev_pf0_bar0.write_dword(0x0011c8, 0)
|
||||
await dev_pf0_bar0.write_dword(0x0011cc, 0)
|
||||
# RAM offset mask
|
||||
await dev_pf0_bar0.write_dword(0x0011d0, region_len-1)
|
||||
await dev_pf0_bar0.write_dword(0x0011d4, 0)
|
||||
# RAM stride
|
||||
await dev_pf0_bar0.write_dword(0x0011d8, block_stride)
|
||||
await dev_pf0_bar0.write_dword(0x0011dc, 0)
|
||||
# clear cycle count
|
||||
await dev_pf0_bar0.write_dword(0x001108, 0)
|
||||
await dev_pf0_bar0.write_dword(0x00110c, 0)
|
||||
# block length
|
||||
await dev_pf0_bar0.write_dword(0x001110, block_size)
|
||||
# block count
|
||||
await dev_pf0_bar0.write_dword(0x001118, block_count)
|
||||
await dev_pf0_bar0.write_dword(0x00111c, 0)
|
||||
# start
|
||||
await dev_pf0_bar0.write_dword(0x001100, 1)
|
||||
|
||||
for k in range(10):
|
||||
cnt = await dev_pf0_bar0.read_dword(0x001118)
|
||||
await Timer(1000, 'ns')
|
||||
if cnt == 0:
|
||||
break
|
||||
|
||||
await Timer(2000, 'ns')
|
||||
await dma_block_read_bench(tb, dev, mem_base+src_offset, region_len-1, 256, 256, 32)
|
||||
await dma_block_write_bench(tb, dev, mem_base+dest_offset, region_len-1, 256, 256, 32)
|
||||
|
||||
tb.log.info("%s", mem.hexdump_str(dest_offset, region_len))
|
||||
|
||||
assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len]
|
||||
|
||||
tb.log.info("Test RX completion buffer (CPLH, 8)")
|
||||
|
||||
tb.rc.split_on_all_rcb = True
|
||||
|
||||
size = 8
|
||||
stride = size
|
||||
for count in range(32, 256+1, 8):
|
||||
await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000)
|
||||
|
||||
tb.log.info("Test RX completion buffer (CPLH, 8+64)")
|
||||
|
||||
size = 8+64
|
||||
stride = 0
|
||||
for count in range(8, 256+1, 8):
|
||||
await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000)
|
||||
|
||||
tb.log.info("Test RX completion buffer (CPLH, 8+128+8)")
|
||||
|
||||
size = 8+128+8
|
||||
stride = 0
|
||||
for count in range(8, 256+1, 8):
|
||||
await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000)
|
||||
|
||||
tb.rc.split_on_all_rcb = False
|
||||
|
||||
tb.log.info("Test RX completion buffer (CPLD)")
|
||||
|
||||
size = 512
|
||||
stride = size
|
||||
for count in range(8, 256+1, 8):
|
||||
await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 4000)
|
||||
|
||||
tb.log.info("Perform block reads")
|
||||
|
||||
count = 100
|
||||
for size in [2**x for x in range(14)]:
|
||||
stride = size
|
||||
await dma_block_read_bench(tb, dev, mem_base, region_len-1, size, stride, count)
|
||||
|
||||
tb.log.info("Perform block writes")
|
||||
|
||||
count = 100
|
||||
for size in [2**x for x in range(14)]:
|
||||
stride = size
|
||||
await dma_block_write_bench(tb, dev, mem_base, region_len-1, size, stride, count)
|
||||
|
||||
await RisingEdge(dut.clk)
|
||||
await RisingEdge(dut.clk)
|
||||
|
||||
@ -566,8 +731,8 @@ def test_example_core_pcie_us(request, axis_pcie_data_width, straddle):
|
||||
parameters['IMM_WIDTH'] = 32
|
||||
parameters['READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT']
|
||||
parameters['READ_TX_LIMIT'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1)
|
||||
parameters['READ_CPLH_FC_LIMIT'] = 64 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 128
|
||||
parameters['READ_CPLD_FC_LIMIT'] = 992 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 2048
|
||||
parameters['READ_CPLH_FC_LIMIT'] = 64 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 256
|
||||
parameters['READ_CPLD_FC_LIMIT'] = 1024-64 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 2048-256
|
||||
parameters['WRITE_OP_TABLE_SIZE'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1)
|
||||
parameters['WRITE_TX_LIMIT'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1)
|
||||
parameters['BAR0_APERTURE'] = 24
|
||||
|
@ -56,7 +56,7 @@ module fpga (
|
||||
parameter AXIS_PCIE_DATA_WIDTH = 512;
|
||||
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
|
||||
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
|
||||
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
|
||||
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
|
||||
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
|
||||
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;
|
||||
|
@ -161,8 +161,8 @@ example_core_pcie_us #(
|
||||
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
|
||||
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
|
||||
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.READ_CPLH_FC_LIMIT(128),
|
||||
.READ_CPLD_FC_LIMIT(2048),
|
||||
.READ_CPLH_FC_LIMIT(256),
|
||||
.READ_CPLD_FC_LIMIT(2048-256),
|
||||
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
|
||||
.BAR0_APERTURE(BAR0_APERTURE),
|
||||
@ -265,8 +265,7 @@ example_core_pcie_us_inst (
|
||||
*/
|
||||
.cfg_max_read_req(cfg_max_read_req),
|
||||
.cfg_max_payload(cfg_max_payload),
|
||||
// .cfg_rcb_status(cfg_rcb_status),
|
||||
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
|
||||
.cfg_rcb_status(cfg_rcb_status),
|
||||
|
||||
/*
|
||||
* Status
|
||||
|
@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
|
||||
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
|
||||
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
|
||||
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
|
||||
export PARAM_RQ_SEQ_NUM_WIDTH := 6
|
||||
|
||||
ifeq ($(SIM), icarus)
|
||||
PLUSARGS += -fst
|
||||
|
@ -396,7 +396,6 @@ def test_fpga_core(request):
|
||||
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
|
||||
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
|
||||
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
|
||||
parameters['RQ_SEQ_NUM_WIDTH'] = 6
|
||||
|
||||
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
|
||||
|
||||
|
@ -422,13 +422,13 @@ reg [OP_TAG_WIDTH+1-1:0] active_op_count_reg = 0;
|
||||
reg inc_active_op;
|
||||
reg dec_active_op;
|
||||
|
||||
reg [CL_CPLH_FC_LIMIT+1-1:0] active_cplh_fc_count_reg = 0;
|
||||
reg active_cplh_fc_av_reg = 1'b1;
|
||||
reg [CL_CPLH_FC_LIMIT+1-1:0] active_cplh_fc_count_reg = 0, active_cplh_fc_count_next;
|
||||
reg active_cplh_fc_av_reg = 1'b1, active_cplh_fc_av_next;
|
||||
reg [6:0] inc_active_cplh_fc_count;
|
||||
reg [6:0] dec_active_cplh_fc_count;
|
||||
|
||||
reg [CL_CPLD_FC_LIMIT+1-1:0] active_cpld_fc_count_reg = 0;
|
||||
reg active_cpld_fc_av_reg = 1'b1;
|
||||
reg [CL_CPLD_FC_LIMIT+1-1:0] active_cpld_fc_count_reg = 0, active_cpld_fc_count_next;
|
||||
reg active_cpld_fc_av_reg = 1'b1, active_cpld_fc_av_next;
|
||||
reg [8:0] inc_active_cpld_fc_count;
|
||||
reg [8:0] dec_active_cpld_fc_count;
|
||||
|
||||
@ -1382,6 +1382,12 @@ always @* begin
|
||||
end
|
||||
|
||||
active_tx_count_av_next = active_tx_count_next < TX_LIMIT;
|
||||
|
||||
active_cplh_fc_count_next <= active_cplh_fc_count_reg + inc_active_cplh_fc_count - dec_active_cplh_fc_count;
|
||||
active_cplh_fc_av_next <= !CPLH_FC_LIMIT || active_cplh_fc_count_next < CPLH_FC_LIMIT;
|
||||
|
||||
active_cpld_fc_count_next <= active_cpld_fc_count_reg + inc_active_cpld_fc_count - dec_active_cpld_fc_count;
|
||||
active_cpld_fc_av_next <= !CPLD_FC_LIMIT || active_cpld_fc_count_next < CPLD_FC_LIMIT;
|
||||
end
|
||||
|
||||
always @(posedge clk) begin
|
||||
@ -1501,11 +1507,11 @@ always @(posedge clk) begin
|
||||
active_tag_count_reg <= active_tag_count_reg + inc_active_tag - dec_active_tag;
|
||||
active_op_count_reg <= active_op_count_reg + inc_active_op - dec_active_op;
|
||||
|
||||
active_cplh_fc_count_reg <= active_cplh_fc_count_reg + inc_active_cplh_fc_count - dec_active_cplh_fc_count;
|
||||
active_cplh_fc_av_reg <= !CPLH_FC_LIMIT || active_cplh_fc_count_reg < CPLH_FC_LIMIT;
|
||||
active_cplh_fc_count_reg <= active_cplh_fc_count_next;
|
||||
active_cplh_fc_av_reg <= active_cplh_fc_av_next;
|
||||
|
||||
active_cpld_fc_count_reg <= active_cpld_fc_count_reg + inc_active_cpld_fc_count - dec_active_cpld_fc_count;
|
||||
active_cpld_fc_av_reg <= !CPLD_FC_LIMIT || active_cpld_fc_count_reg < CPLD_FC_LIMIT;
|
||||
active_cpld_fc_count_reg <= active_cpld_fc_count_next;
|
||||
active_cpld_fc_av_reg <= active_cpld_fc_av_next;
|
||||
|
||||
pcie_tag_table_start_ptr_reg <= pcie_tag_table_start_ptr_next;
|
||||
pcie_tag_table_start_ram_sel_reg <= pcie_tag_table_start_ram_sel_next;
|
||||
|
@ -347,7 +347,7 @@ always @* begin
|
||||
|
||||
// compute mux settings
|
||||
for (port = 0; port < PORTS; port = port + 1) begin
|
||||
port_seg_valid[port] = pause[port] ? 0 : {2{fifo_ctrl_tlp_valid[port]}} >> fifo_ctrl_seg_offset[port];
|
||||
port_seg_valid[port] = {2{fifo_ctrl_tlp_valid[port]}} >> fifo_ctrl_seg_offset[port];
|
||||
port_seg_eop[port] = {2{fifo_ctrl_tlp_eop[port]}} >> fifo_ctrl_seg_offset[port];
|
||||
end
|
||||
|
||||
@ -383,7 +383,7 @@ always @* begin
|
||||
port_cyc = cur_port;
|
||||
seg_offset_cyc = port_seg_offset_cyc[cur_port];
|
||||
seg_count_cyc = port_seg_count_cyc[cur_port];
|
||||
if (port_seg_valid[cur_port][0]) begin
|
||||
if (!pause[cur_port] && port_seg_valid[cur_port][0]) begin
|
||||
// set frame
|
||||
frame_cyc = 1;
|
||||
sel_tlp_seq_valid_cyc[OUT_TLP_SEG_COUNT*cur_port+seg] = 1'b1;
|
||||
|
@ -36,10 +36,7 @@ export PARAM_IRQ_INDEX_WIDTH := 11
|
||||
export PARAM_AXIL_DATA_WIDTH := 32
|
||||
export PARAM_AXIL_ADDR_WIDTH := $(shell expr $(PARAM_IRQ_INDEX_WIDTH) + 5 )
|
||||
export PARAM_AXIL_STRB_WIDTH := $(shell expr $(PARAM_AXIL_DATA_WIDTH) / 8 )
|
||||
export PARAM_TLP_DATA_WIDTH := 64
|
||||
export PARAM_TLP_STRB_WIDTH := $(shell expr $(PARAM_TLP_DATA_WIDTH) / 32 )
|
||||
export PARAM_TLP_HDR_WIDTH := 128
|
||||
export PARAM_TLP_SEG_COUNT := 1
|
||||
export PARAM_TLP_FORCE_64_BIT_ADDR := 0
|
||||
|
||||
ifeq ($(SIM), icarus)
|
||||
|
@ -319,8 +319,7 @@ rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl'))
|
||||
|
||||
|
||||
@pytest.mark.parametrize("axil_data_width", [32, 64])
|
||||
@pytest.mark.parametrize("pcie_data_width", [64, 128])
|
||||
def test_pcie_msix(request, pcie_data_width, axil_data_width):
|
||||
def test_pcie_msix(request, axil_data_width):
|
||||
dut = "pcie_msix"
|
||||
module = os.path.splitext(os.path.basename(__file__))[0]
|
||||
toplevel = dut
|
||||
@ -335,10 +334,7 @@ def test_pcie_msix(request, pcie_data_width, axil_data_width):
|
||||
parameters['AXIL_DATA_WIDTH'] = axil_data_width
|
||||
parameters['AXIL_ADDR_WIDTH'] = parameters['IRQ_INDEX_WIDTH']+5
|
||||
parameters['AXIL_STRB_WIDTH'] = (axil_data_width // 8)
|
||||
parameters['TLP_DATA_WIDTH'] = pcie_data_width
|
||||
parameters['TLP_STRB_WIDTH'] = pcie_data_width // 32
|
||||
parameters['TLP_HDR_WIDTH'] = 128
|
||||
parameters['TLP_SEG_COUNT'] = 1
|
||||
parameters['TLP_FORCE_64_BIT_ADDR'] = 0
|
||||
|
||||
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}
|
||||
|
@ -270,9 +270,6 @@ def test_pcie_us_axi_dma_wr(request, axis_pcie_data_width, pcie_offset):
|
||||
parameters['AXI_ID_WIDTH'] = 8
|
||||
parameters['AXI_MAX_BURST_LEN'] = 256
|
||||
parameters['PCIE_ADDR_WIDTH'] = 64
|
||||
parameters['PCIE_TAG_COUNT'] = 64 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 256
|
||||
parameters['PCIE_TAG_WIDTH'] = (parameters['PCIE_TAG_COUNT']-1).bit_length()
|
||||
parameters['PCIE_EXT_TAG_ENABLE'] = int(parameters['PCIE_TAG_COUNT'] > 32)
|
||||
parameters['LEN_WIDTH'] = 20
|
||||
parameters['TAG_WIDTH'] = 8
|
||||
parameters['OP_TABLE_SIZE'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1)
|
||||
|
@ -18,7 +18,7 @@ deps =
|
||||
cocotb-bus == 0.2.1
|
||||
cocotb-test == 0.2.4
|
||||
cocotbext-axi == 0.1.24
|
||||
cocotbext-pcie == 0.2.12
|
||||
cocotbext-pcie == 0.2.14
|
||||
jinja2 == 3.1.2
|
||||
|
||||
commands =
|
||||
|
Loading…
x
Reference in New Issue
Block a user