1
0
mirror of https://github.com/corundum/corundum.git synced 2025-01-16 08:12:53 +08:00

merged changes in pcie

This commit is contained in:
Alex Forencich 2023-06-23 22:49:05 -07:00
commit 045b0c1c68
65 changed files with 1259 additions and 511 deletions

View File

@ -54,7 +54,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 512; parameter AXIS_PCIE_DATA_WIDTH = 512;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -159,8 +159,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT), .PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128), .READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048), .READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE), .BAR0_APERTURE(BAR0_APERTURE),
@ -263,8 +263,7 @@ example_core_pcie_us_inst (
*/ */
.cfg_max_read_req(cfg_max_read_req), .cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload), .cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status), .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
/* /*
* Status * Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus) ifeq ($(SIM), icarus)
PLUSARGS += -fst PLUSARGS += -fst

View File

@ -396,7 +396,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -53,7 +53,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 512; parameter AXIS_PCIE_DATA_WIDTH = 512;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -156,8 +156,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT), .PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128), .READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048), .READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE), .BAR0_APERTURE(BAR0_APERTURE),
@ -260,8 +260,7 @@ example_core_pcie_us_inst (
*/ */
.cfg_max_read_req(cfg_max_read_req), .cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload), .cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status), .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
/* /*
* Status * Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus) ifeq ($(SIM), icarus)
PLUSARGS += -fst PLUSARGS += -fst

View File

@ -398,7 +398,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -53,7 +53,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 512; parameter AXIS_PCIE_DATA_WIDTH = 512;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -156,8 +156,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT), .PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128), .READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048), .READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE), .BAR0_APERTURE(BAR0_APERTURE),
@ -260,8 +260,7 @@ example_core_pcie_us_inst (
*/ */
.cfg_max_read_req(cfg_max_read_req), .cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload), .cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status), .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
/* /*
* Status * Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus) ifeq ($(SIM), icarus)
PLUSARGS += -fst PLUSARGS += -fst

View File

@ -398,7 +398,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -52,7 +52,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 512; parameter AXIS_PCIE_DATA_WIDTH = 512;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -148,8 +148,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT), .PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128), .READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048), .READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE), .BAR0_APERTURE(BAR0_APERTURE),
@ -252,8 +252,7 @@ example_core_pcie_us_inst (
*/ */
.cfg_max_read_req(cfg_max_read_req), .cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload), .cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status), .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
/* /*
* Status * Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus) ifeq ($(SIM), icarus)
PLUSARGS += -fst PLUSARGS += -fst

View File

@ -396,7 +396,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -55,7 +55,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 512; parameter AXIS_PCIE_DATA_WIDTH = 512;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -159,8 +159,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT), .PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128), .READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048), .READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE), .BAR0_APERTURE(BAR0_APERTURE),
@ -263,8 +263,7 @@ example_core_pcie_us_inst (
*/ */
.cfg_max_read_req(cfg_max_read_req), .cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload), .cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status), .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
/* /*
* Status * Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus) ifeq ($(SIM), icarus)
PLUSARGS += -fst PLUSARGS += -fst

View File

@ -396,7 +396,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -155,7 +155,7 @@ example_core_pcie_us #(
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(64), .READ_CPLH_FC_LIMIT(64),
.READ_CPLD_FC_LIMIT(992), .READ_CPLD_FC_LIMIT(1024-64),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE), .BAR0_APERTURE(BAR0_APERTURE),
@ -258,8 +258,7 @@ example_core_pcie_us_inst (
*/ */
.cfg_max_read_req(cfg_max_read_req), .cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload), .cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status), .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US PCIe HIP
/* /*
* Status * Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := 60
export PARAM_AXIS_PCIE_RC_USER_WIDTH := 75 export PARAM_AXIS_PCIE_RC_USER_WIDTH := 75
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := 85 export PARAM_AXIS_PCIE_CQ_USER_WIDTH := 85
export PARAM_AXIS_PCIE_CC_USER_WIDTH := 33 export PARAM_AXIS_PCIE_CC_USER_WIDTH := 33
export PARAM_RQ_SEQ_NUM_WIDTH := 4
ifeq ($(SIM), icarus) ifeq ($(SIM), icarus)
PLUSARGS += -fst PLUSARGS += -fst

View File

@ -370,7 +370,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 85 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 85
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33
parameters['RQ_SEQ_NUM_WIDTH'] = 4
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -54,7 +54,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 256; parameter AXIS_PCIE_DATA_WIDTH = 256;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -159,8 +159,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT), .PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128), .READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048), .READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE), .BAR0_APERTURE(BAR0_APERTURE),
@ -263,8 +263,7 @@ example_core_pcie_us_inst (
*/ */
.cfg_max_read_req(cfg_max_read_req), .cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload), .cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status), .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
/* /*
* Status * Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus) ifeq ($(SIM), icarus)
PLUSARGS += -fst PLUSARGS += -fst

View File

@ -396,7 +396,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -157,7 +157,7 @@ example_core_pcie_us #(
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(64), .READ_CPLH_FC_LIMIT(64),
.READ_CPLD_FC_LIMIT(992), .READ_CPLD_FC_LIMIT(1024-64),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE), .BAR0_APERTURE(BAR0_APERTURE),
@ -260,8 +260,7 @@ example_core_pcie_us_inst (
*/ */
.cfg_max_read_req(cfg_max_read_req), .cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload), .cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status), .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US PCIe HIP
/* /*
* Status * Status

View File

@ -57,6 +57,10 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 256; parameter AXIS_PCIE_DATA_WIDTH = 256;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = 75;
parameter AXIS_PCIE_RQ_USER_WIDTH = 60;
parameter AXIS_PCIE_CQ_USER_WIDTH = 85;
parameter AXIS_PCIE_CC_USER_WIDTH = 33;
// Clock and reset // Clock and reset
wire pcie_user_clk; wire pcie_user_clk;
@ -107,33 +111,33 @@ ibufds_gte3_pcie_mgt_refclk_inst (
.ODIV2 (pcie_sys_clk) .ODIV2 (pcie_sys_clk)
); );
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_rq_tdata; wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_rq_tdata;
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_rq_tkeep; wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_rq_tkeep;
wire axis_rq_tlast; wire axis_rq_tlast;
wire axis_rq_tready; wire axis_rq_tready;
wire [59:0] axis_rq_tuser; wire [AXIS_PCIE_RQ_USER_WIDTH-1:0] axis_rq_tuser;
wire axis_rq_tvalid; wire axis_rq_tvalid;
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_rc_tdata; wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_rc_tdata;
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_rc_tkeep; wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_rc_tkeep;
wire axis_rc_tlast; wire axis_rc_tlast;
wire axis_rc_tready; wire axis_rc_tready;
wire [74:0] axis_rc_tuser; wire [AXIS_PCIE_RC_USER_WIDTH-1:0] axis_rc_tuser;
wire axis_rc_tvalid; wire axis_rc_tvalid;
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_cq_tdata; wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_cq_tdata;
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_cq_tkeep; wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_cq_tkeep;
wire axis_cq_tlast; wire axis_cq_tlast;
wire axis_cq_tready; wire axis_cq_tready;
wire [84:0] axis_cq_tuser; wire [AXIS_PCIE_CQ_USER_WIDTH-1:0] axis_cq_tuser;
wire axis_cq_tvalid; wire axis_cq_tvalid;
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_cc_tdata; wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_cc_tdata;
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_cc_tkeep; wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_cc_tkeep;
wire axis_cc_tlast; wire axis_cc_tlast;
wire axis_cc_tready; wire axis_cc_tready;
wire [32:0] axis_cc_tuser; wire [AXIS_PCIE_CC_USER_WIDTH-1:0] axis_cc_tuser;
wire axis_cc_tvalid; wire axis_cc_tvalid;
// ila_0 rq_ila ( // ila_0 rq_ila (
// .clk(pcie_user_clk), // .clk(pcie_user_clk),
@ -357,7 +361,12 @@ pcie3_ultrascale_inst (
); );
fpga_core #( fpga_core #(
.AXIS_PCIE_DATA_WIDTH(AXIS_PCIE_DATA_WIDTH) .AXIS_PCIE_DATA_WIDTH(AXIS_PCIE_DATA_WIDTH),
.AXIS_PCIE_KEEP_WIDTH(AXIS_PCIE_KEEP_WIDTH),
.AXIS_PCIE_RC_USER_WIDTH(AXIS_PCIE_RC_USER_WIDTH),
.AXIS_PCIE_RQ_USER_WIDTH(AXIS_PCIE_RQ_USER_WIDTH),
.AXIS_PCIE_CQ_USER_WIDTH(AXIS_PCIE_CQ_USER_WIDTH),
.AXIS_PCIE_CC_USER_WIDTH(AXIS_PCIE_CC_USER_WIDTH)
) )
core_inst ( core_inst (
/* /*

View File

@ -34,89 +34,93 @@ THE SOFTWARE.
module fpga_core # module fpga_core #
( (
parameter AXIS_PCIE_DATA_WIDTH = 256, parameter AXIS_PCIE_DATA_WIDTH = 256,
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32) parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32),
parameter AXIS_PCIE_RC_USER_WIDTH = 75,
parameter AXIS_PCIE_RQ_USER_WIDTH = 60,
parameter AXIS_PCIE_CQ_USER_WIDTH = 85,
parameter AXIS_PCIE_CC_USER_WIDTH = 33
) )
( (
/* /*
* Clock: 250 MHz * Clock: 250 MHz
* Synchronous reset * Synchronous reset
*/ */
input wire clk, input wire clk,
input wire rst, input wire rst,
/* /*
* GPIO * GPIO
*/ */
input wire btnu, input wire btnu,
input wire btnl, input wire btnl,
input wire btnd, input wire btnd,
input wire btnr, input wire btnr,
input wire btnc, input wire btnc,
input wire [3:0] sw, input wire [3:0] sw,
output wire [7:0] led, output wire [7:0] led,
/* /*
* PCIe * PCIe
*/ */
output wire [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_rq_tdata, output wire [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_rq_tdata,
output wire [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_rq_tkeep, output wire [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_rq_tkeep,
output wire m_axis_rq_tlast, output wire m_axis_rq_tlast,
input wire m_axis_rq_tready, input wire m_axis_rq_tready,
output wire [59:0] m_axis_rq_tuser, output wire [AXIS_PCIE_RQ_USER_WIDTH-1:0] m_axis_rq_tuser,
output wire m_axis_rq_tvalid, output wire m_axis_rq_tvalid,
input wire [AXIS_PCIE_DATA_WIDTH-1:0] s_axis_rc_tdata, input wire [AXIS_PCIE_DATA_WIDTH-1:0] s_axis_rc_tdata,
input wire [AXIS_PCIE_KEEP_WIDTH-1:0] s_axis_rc_tkeep, input wire [AXIS_PCIE_KEEP_WIDTH-1:0] s_axis_rc_tkeep,
input wire s_axis_rc_tlast, input wire s_axis_rc_tlast,
output wire s_axis_rc_tready, output wire s_axis_rc_tready,
input wire [74:0] s_axis_rc_tuser, input wire [AXIS_PCIE_RC_USER_WIDTH-1:0] s_axis_rc_tuser,
input wire s_axis_rc_tvalid, input wire s_axis_rc_tvalid,
input wire [AXIS_PCIE_DATA_WIDTH-1:0] s_axis_cq_tdata, input wire [AXIS_PCIE_DATA_WIDTH-1:0] s_axis_cq_tdata,
input wire [AXIS_PCIE_KEEP_WIDTH-1:0] s_axis_cq_tkeep, input wire [AXIS_PCIE_KEEP_WIDTH-1:0] s_axis_cq_tkeep,
input wire s_axis_cq_tlast, input wire s_axis_cq_tlast,
output wire s_axis_cq_tready, output wire s_axis_cq_tready,
input wire [84:0] s_axis_cq_tuser, input wire [AXIS_PCIE_CQ_USER_WIDTH-1:0] s_axis_cq_tuser,
input wire s_axis_cq_tvalid, input wire s_axis_cq_tvalid,
output wire [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_cc_tdata, output wire [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_cc_tdata,
output wire [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_cc_tkeep, output wire [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_cc_tkeep,
output wire m_axis_cc_tlast, output wire m_axis_cc_tlast,
input wire m_axis_cc_tready, input wire m_axis_cc_tready,
output wire [32:0] m_axis_cc_tuser, output wire [AXIS_PCIE_CC_USER_WIDTH-1:0] m_axis_cc_tuser,
output wire m_axis_cc_tvalid, output wire m_axis_cc_tvalid,
input wire [2:0] cfg_max_payload, input wire [2:0] cfg_max_payload,
input wire [2:0] cfg_max_read_req, input wire [2:0] cfg_max_read_req,
output wire [18:0] cfg_mgmt_addr, output wire [18:0] cfg_mgmt_addr,
output wire cfg_mgmt_write, output wire cfg_mgmt_write,
output wire [31:0] cfg_mgmt_write_data, output wire [31:0] cfg_mgmt_write_data,
output wire [3:0] cfg_mgmt_byte_enable, output wire [3:0] cfg_mgmt_byte_enable,
output wire cfg_mgmt_read, output wire cfg_mgmt_read,
input wire [31:0] cfg_mgmt_read_data, input wire [31:0] cfg_mgmt_read_data,
input wire cfg_mgmt_read_write_done, input wire cfg_mgmt_read_write_done,
input wire [3:0] cfg_interrupt_msi_enable, input wire [3:0] cfg_interrupt_msi_enable,
input wire [7:0] cfg_interrupt_msi_vf_enable, input wire [7:0] cfg_interrupt_msi_vf_enable,
input wire [11:0] cfg_interrupt_msi_mmenable, input wire [11:0] cfg_interrupt_msi_mmenable,
input wire cfg_interrupt_msi_mask_update, input wire cfg_interrupt_msi_mask_update,
input wire [31:0] cfg_interrupt_msi_data, input wire [31:0] cfg_interrupt_msi_data,
output wire [3:0] cfg_interrupt_msi_select, output wire [3:0] cfg_interrupt_msi_select,
output wire [31:0] cfg_interrupt_msi_int, output wire [31:0] cfg_interrupt_msi_int,
output wire [31:0] cfg_interrupt_msi_pending_status, output wire [31:0] cfg_interrupt_msi_pending_status,
output wire cfg_interrupt_msi_pending_status_data_enable, output wire cfg_interrupt_msi_pending_status_data_enable,
output wire [3:0] cfg_interrupt_msi_pending_status_function_num, output wire [3:0] cfg_interrupt_msi_pending_status_function_num,
input wire cfg_interrupt_msi_sent, input wire cfg_interrupt_msi_sent,
input wire cfg_interrupt_msi_fail, input wire cfg_interrupt_msi_fail,
output wire [2:0] cfg_interrupt_msi_attr, output wire [2:0] cfg_interrupt_msi_attr,
output wire cfg_interrupt_msi_tph_present, output wire cfg_interrupt_msi_tph_present,
output wire [1:0] cfg_interrupt_msi_tph_type, output wire [1:0] cfg_interrupt_msi_tph_type,
output wire [8:0] cfg_interrupt_msi_tph_st_tag, output wire [8:0] cfg_interrupt_msi_tph_st_tag,
output wire [3:0] cfg_interrupt_msi_function_number, output wire [3:0] cfg_interrupt_msi_function_number,
output wire status_error_cor, output wire status_error_cor,
output wire status_error_uncor output wire status_error_uncor
); );
parameter PCIE_ADDR_WIDTH = 64; parameter PCIE_ADDR_WIDTH = 64;

View File

@ -377,7 +377,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 85 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 85
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33
parameters['RQ_SEQ_NUM_WIDTH'] = 4
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -58,7 +58,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 512; parameter AXIS_PCIE_DATA_WIDTH = 512;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -161,8 +161,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT), .PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128), .READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048), .READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE), .BAR0_APERTURE(BAR0_APERTURE),
@ -265,8 +265,7 @@ example_core_pcie_us_inst (
*/ */
.cfg_max_read_req(cfg_max_read_req), .cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload), .cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status), .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
/* /*
* Status * Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus) ifeq ($(SIM), icarus)
PLUSARGS += -fst PLUSARGS += -fst

View File

@ -403,7 +403,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -53,7 +53,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 512; parameter AXIS_PCIE_DATA_WIDTH = 512;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -156,8 +156,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT), .PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128), .READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048), .READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE), .BAR0_APERTURE(BAR0_APERTURE),
@ -260,8 +260,7 @@ example_core_pcie_us_inst (
*/ */
.cfg_max_read_req(cfg_max_read_req), .cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload), .cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status), .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
/* /*
* Status * Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus) ifeq ($(SIM), icarus)
PLUSARGS += -fst PLUSARGS += -fst

View File

@ -398,7 +398,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -58,7 +58,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 128; parameter AXIS_PCIE_DATA_WIDTH = 128;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -161,8 +161,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT), .PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128), .READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048), .READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE), .BAR0_APERTURE(BAR0_APERTURE),
@ -265,8 +265,7 @@ example_core_pcie_us_inst (
*/ */
.cfg_max_read_req(cfg_max_read_req), .cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload), .cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status), .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
/* /*
* Status * Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus) ifeq ($(SIM), icarus)
PLUSARGS += -fst PLUSARGS += -fst

View File

@ -403,7 +403,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -103,6 +103,8 @@ static void dma_block_read(struct example_dev *edev,
if ((ioread32(edev->bar[0] + 0x001000) & 1) != 0) if ((ioread32(edev->bar[0] + 0x001000) & 1) != 0)
dev_warn(edev->dev, "%s: operation timed out", __func__); dev_warn(edev->dev, "%s: operation timed out", __func__);
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
dev_warn(edev->dev, "%s: DMA engine busy", __func__);
} }
static void dma_block_write(struct example_dev *edev, static void dma_block_write(struct example_dev *edev,
@ -157,15 +159,22 @@ static void dma_block_write(struct example_dev *edev,
if ((ioread32(edev->bar[0] + 0x001100) & 1) != 0) if ((ioread32(edev->bar[0] + 0x001100) & 1) != 0)
dev_warn(edev->dev, "%s: operation timed out", __func__); dev_warn(edev->dev, "%s: operation timed out", __func__);
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
dev_warn(edev->dev, "%s: DMA engine busy", __func__);
} }
static void dma_block_read_bench(struct example_dev *edev, static void dma_block_read_bench(struct example_dev *edev,
dma_addr_t dma_addr, u64 size, u64 stride, u64 count) dma_addr_t dma_addr, u64 size, u64 stride, u64 count)
{ {
u64 cycles; u64 cycles;
u32 rd_req;
u32 rd_cpl;
udelay(5); udelay(5);
rd_req = ioread32(edev->bar[0] + 0x000020);
rd_cpl = ioread32(edev->bar[0] + 0x000024);
dma_block_read(edev, dma_addr, 0, 0x3fff, stride, dma_block_read(edev, dma_addr, 0, 0x3fff, stride,
0, 0, 0x3fff, stride, size, count); 0, 0, 0x3fff, stride, size, count);
@ -173,17 +182,23 @@ static void dma_block_read_bench(struct example_dev *edev,
udelay(5); udelay(5);
dev_info(edev->dev, "read %lld blocks of %lld bytes (stride %lld) in %lld ns: %lld Mbps", rd_req = ioread32(edev->bar[0] + 0x000020) - rd_req;
count, size, stride, cycles * 4, size * count * 8 * 1000 / (cycles * 4)); rd_cpl = ioread32(edev->bar[0] + 0x000024) - rd_cpl;
dev_info(edev->dev, "read %lld blocks of %lld bytes (total %lld B, stride %lld) in %lld ns (%d req %d cpl): %lld Mbps",
count, size, count*size, stride, cycles * 4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4));
} }
static void dma_block_write_bench(struct example_dev *edev, static void dma_block_write_bench(struct example_dev *edev,
dma_addr_t dma_addr, u64 size, u64 stride, u64 count) dma_addr_t dma_addr, u64 size, u64 stride, u64 count)
{ {
u64 cycles; u64 cycles;
u32 wr_req;
udelay(5); udelay(5);
wr_req = ioread32(edev->bar[0] + 0x000028);
dma_block_write(edev, dma_addr, 0, 0x3fff, stride, dma_block_write(edev, dma_addr, 0, 0x3fff, stride,
0, 0, 0x3fff, stride, size, count); 0, 0, 0x3fff, stride, size, count);
@ -191,8 +206,83 @@ static void dma_block_write_bench(struct example_dev *edev,
udelay(5); udelay(5);
dev_info(edev->dev, "wrote %lld blocks of %lld bytes (stride %lld) in %lld ns: %lld Mbps", wr_req = ioread32(edev->bar[0] + 0x000028) - wr_req;
count, size, stride, cycles * 4, size * count * 8 * 1000 / (cycles * 4));
dev_info(edev->dev, "wrote %lld blocks of %lld bytes (total %lld B, stride %lld) in %lld ns (%d req): %lld Mbps",
count, size, count*size, stride, cycles * 4, wr_req, size * count * 8 * 1000 / (cycles * 4));
}
static void dma_cpl_buf_test(struct example_dev *edev, dma_addr_t dma_addr,
u64 size, u64 stride, u64 count, int stall)
{
unsigned long t;
u64 cycles;
u32 rd_req;
u32 rd_cpl;
rd_req = ioread32(edev->bar[0] + 0x000020);
rd_cpl = ioread32(edev->bar[0] + 0x000024);
// DMA base address
iowrite32(dma_addr & 0xffffffff, edev->bar[0] + 0x001080);
iowrite32((dma_addr >> 32) & 0xffffffff, edev->bar[0] + 0x001084);
// DMA offset address
iowrite32(0, edev->bar[0] + 0x001088);
iowrite32(0, edev->bar[0] + 0x00108c);
// DMA offset mask
iowrite32(0x3fff, edev->bar[0] + 0x001090);
iowrite32(0, edev->bar[0] + 0x001094);
// DMA stride
iowrite32(stride & 0xffffffff, edev->bar[0] + 0x001098);
iowrite32((stride >> 32) & 0xffffffff, edev->bar[0] + 0x00109c);
// RAM base address
iowrite32(0, edev->bar[0] + 0x0010c0);
iowrite32(0, edev->bar[0] + 0x0010c4);
// RAM offset address
iowrite32(0, edev->bar[0] + 0x0010c8);
iowrite32(0, edev->bar[0] + 0x0010cc);
// RAM offset mask
iowrite32(0x3fff, edev->bar[0] + 0x0010d0);
iowrite32(0, edev->bar[0] + 0x0010d4);
// RAM stride
iowrite32(stride & 0xffffffff, edev->bar[0] + 0x0010d8);
iowrite32((stride >> 32) & 0xffffffff, edev->bar[0] + 0x0010dc);
// clear cycle count
iowrite32(0, edev->bar[0] + 0x001008);
iowrite32(0, edev->bar[0] + 0x00100c);
// block length
iowrite32(size, edev->bar[0] + 0x001010);
// block count
iowrite32(count, edev->bar[0] + 0x001018);
if (stall)
iowrite32(stall, edev->bar[0] + 0x000040);
// start
iowrite32(1, edev->bar[0] + 0x001000);
if (stall)
msleep(10);
// wait for transfer to complete
t = jiffies + msecs_to_jiffies(20000);
while (time_before(jiffies, t)) {
if ((ioread32(edev->bar[0] + 0x001000) & 1) == 0)
break;
}
if ((ioread32(edev->bar[0] + 0x001000) & 1) != 0)
dev_warn(edev->dev, "%s: operation timed out", __func__);
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
dev_warn(edev->dev, "%s: DMA engine busy", __func__);
cycles = ioread32(edev->bar[0] + 0x001008);
rd_req = ioread32(edev->bar[0] + 0x000020) - rd_req;
rd_cpl = ioread32(edev->bar[0] + 0x000024) - rd_cpl;
dev_info(edev->dev, "read %lld x %lld B (total %lld B %lld CPLD, stride %lld) in %lld ns (%d req %d cpl): %lld Mbps",
count, size, count*size, count*((size+15) / 16), stride, cycles * 4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4));
} }
static irqreturn_t edev_intr(int irq, void *data) static irqreturn_t edev_intr(int irq, void *data)
@ -227,16 +317,20 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (pdev->pcie_cap) { if (pdev->pcie_cap) {
u16 devctl; u16 devctl;
u32 lnkcap; u32 lnkcap;
u16 lnkctl;
u16 lnksta; u16 lnksta;
pci_read_config_word(pdev, pdev->pcie_cap + PCI_EXP_DEVCTL, &devctl); pci_read_config_word(pdev, pdev->pcie_cap + PCI_EXP_DEVCTL, &devctl);
pci_read_config_dword(pdev, pdev->pcie_cap + PCI_EXP_LNKCAP, &lnkcap); pci_read_config_dword(pdev, pdev->pcie_cap + PCI_EXP_LNKCAP, &lnkcap);
pci_read_config_word(pdev, pdev->pcie_cap + PCI_EXP_LNKCTL, &lnkctl);
pci_read_config_word(pdev, pdev->pcie_cap + PCI_EXP_LNKSTA, &lnksta); pci_read_config_word(pdev, pdev->pcie_cap + PCI_EXP_LNKSTA, &lnksta);
dev_info(dev, " Max payload size: %d bytes", dev_info(dev, " Max payload size: %d bytes",
128 << ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5)); 128 << ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5));
dev_info(dev, " Max read request size: %d bytes", dev_info(dev, " Max read request size: %d bytes",
128 << ((devctl & PCI_EXP_DEVCTL_READRQ) >> 12)); 128 << ((devctl & PCI_EXP_DEVCTL_READRQ) >> 12));
dev_info(dev, " Read completion boundary: %d bytes",
lnkctl & PCI_EXP_LNKCTL_RCB ? 128 : 64);
dev_info(dev, " Link capability: gen %d x%d", dev_info(dev, " Link capability: gen %d x%d",
lnkcap & PCI_EXP_LNKCAP_SLS, (lnkcap & PCI_EXP_LNKCAP_MLW) >> 4); lnkcap & PCI_EXP_LNKCAP_SLS, (lnkcap & PCI_EXP_LNKCAP_MLW) >> 4);
dev_info(dev, " Link status: gen %d x%d", dev_info(dev, " Link status: gen %d x%d",
@ -361,6 +455,7 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
msleep(1); msleep(1);
dev_info(dev, "Read status"); dev_info(dev, "Read status");
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000000));
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000118)); dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000118));
dev_info(dev, "start copy to host"); dev_info(dev, "start copy to host");
@ -374,6 +469,7 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
msleep(1); msleep(1);
dev_info(dev, "Read status"); dev_info(dev, "Read status");
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000000));
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000218)); dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000218));
dev_info(dev, "read test data"); dev_info(dev, "read test data");
@ -398,6 +494,7 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
msleep(1); msleep(1);
dev_info(dev, "Read status"); dev_info(dev, "Read status");
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000000));
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000218)); dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000218));
dev_info(dev, "read data"); dev_info(dev, "read data");
@ -407,31 +504,90 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (!mismatch) { if (!mismatch) {
u64 size; u64 size;
u64 stride; u64 stride;
u64 count;
dev_info(dev, "disable interrupts"); dev_info(dev, "disable interrupts");
iowrite32(0x0, edev->bar[0] + 0x000008); iowrite32(0x0, edev->bar[0] + 0x000008);
dev_info(dev, "test RX completion buffer (CPLH, 8)");
size = 8;
stride = size;
for (count = 32; count <= 256; count += 8) {
dma_cpl_buf_test(edev,
edev->dma_region_addr + 0x0000,
size, stride, count, 100000);
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
goto out;
}
dev_info(dev, "test RX completion buffer (CPLH, unaligned 8+64)");
size = 8+64;
stride = 0;
for (count = 8; count <= 256; count += 8) {
dma_cpl_buf_test(edev,
edev->dma_region_addr + 128 - 8,
size, stride, count, 400000);
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
goto out;
}
dev_info(dev, "test RX completion buffer (CPLH, unaligned 8+128+8)");
size = 8+128+8;
stride = 0;
for (count = 8; count <= 256; count += 8) {
dma_cpl_buf_test(edev,
edev->dma_region_addr + 128 - 8,
size, stride, count, 100000);
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
goto out;
}
dev_info(dev, "test RX completion buffer (CPLD)");
size = 512;
stride = size;
for (count = 8; count <= 256; count += 8) {
dma_cpl_buf_test(edev,
edev->dma_region_addr + 0x0000,
size, stride, count, 100000);
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
goto out;
}
dev_info(dev, "perform block reads (dma_alloc_coherent)"); dev_info(dev, "perform block reads (dma_alloc_coherent)");
count = 10000;
for (size = 1; size <= 8192; size *= 2) { for (size = 1; size <= 8192; size *= 2) {
for (stride = size; stride <= max(size, 256llu); stride *= 2) { for (stride = size; stride <= max(size, 256llu); stride *= 2) {
dma_block_read_bench(edev, dma_block_read_bench(edev,
edev->dma_region_addr + 0x0000, edev->dma_region_addr + 0x0000,
size, stride, 10000); size, stride, count);
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
goto out;
} }
} }
dev_info(dev, "perform block writes (dma_alloc_coherent)"); dev_info(dev, "perform block writes (dma_alloc_coherent)");
count = 10000;
for (size = 1; size <= 8192; size *= 2) { for (size = 1; size <= 8192; size *= 2) {
for (stride = size; stride <= max(size, 256llu); stride *= 2) { for (stride = size; stride <= max(size, 256llu); stride *= 2) {
dma_block_write_bench(edev, dma_block_write_bench(edev,
edev->dma_region_addr + 0x0000, edev->dma_region_addr + 0x0000,
size, stride, 10000); size, stride, count);
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
goto out;
} }
} }
} }
out:
dev_info(dev, "Read status");
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000000));
// probe complete // probe complete
return 0; return 0;

View File

@ -152,7 +152,18 @@ module example_core #
*/ */
output wire [IRQ_INDEX_WIDTH-1:0] irq_index, output wire [IRQ_INDEX_WIDTH-1:0] irq_index,
output wire irq_valid, output wire irq_valid,
input wire irq_ready input wire irq_ready,
/*
* Control and status
*/
output wire dma_enable,
input wire dma_rd_busy,
input wire dma_wr_busy,
input wire dma_rd_req,
input wire dma_rd_cpl,
input wire dma_wr_req,
output wire rx_cpl_stall
); );
localparam RAM_ADDR_IMM_WIDTH = (DMA_IMM_ENABLE && (DMA_IMM_WIDTH > RAM_ADDR_WIDTH)) ? DMA_IMM_WIDTH : RAM_ADDR_WIDTH; localparam RAM_ADDR_IMM_WIDTH = (DMA_IMM_ENABLE && (DMA_IMM_WIDTH > RAM_ADDR_WIDTH)) ? DMA_IMM_WIDTH : RAM_ADDR_WIDTH;
@ -203,6 +214,9 @@ reg axil_ctrl_rvalid_reg = 1'b0, axil_ctrl_rvalid_next;
reg [63:0] cycle_count_reg = 0; reg [63:0] cycle_count_reg = 0;
reg [15:0] dma_read_active_count_reg = 0; reg [15:0] dma_read_active_count_reg = 0;
reg [15:0] dma_write_active_count_reg = 0; reg [15:0] dma_write_active_count_reg = 0;
reg [31:0] dma_rd_req_count_reg = 0;
reg [31:0] dma_rd_cpl_count_reg = 0;
reg [31:0] dma_wr_req_count_reg = 0;
reg [DMA_ADDR_WIDTH-1:0] dma_read_desc_dma_addr_reg = 0, dma_read_desc_dma_addr_next; reg [DMA_ADDR_WIDTH-1:0] dma_read_desc_dma_addr_reg = 0, dma_read_desc_dma_addr_next;
reg [RAM_ADDR_WIDTH-1:0] dma_read_desc_ram_addr_reg = 0, dma_read_desc_ram_addr_next; reg [RAM_ADDR_WIDTH-1:0] dma_read_desc_ram_addr_reg = 0, dma_read_desc_ram_addr_next;
@ -230,6 +244,9 @@ reg dma_rd_int_en_reg = 0, dma_rd_int_en_next;
reg dma_wr_int_en_reg = 0, dma_wr_int_en_next; reg dma_wr_int_en_reg = 0, dma_wr_int_en_next;
reg irq_valid_reg = 1'b0, irq_valid_next; reg irq_valid_reg = 1'b0, irq_valid_next;
reg rx_cpl_stall_reg = 1'b0, rx_cpl_stall_next;
reg [23:0] rx_cpl_stall_count_reg = 0, rx_cpl_stall_count_next;
reg dma_read_block_run_reg = 1'b0, dma_read_block_run_next; reg dma_read_block_run_reg = 1'b0, dma_read_block_run_next;
reg [DMA_LEN_WIDTH-1:0] dma_read_block_len_reg = 0, dma_read_block_len_next; reg [DMA_LEN_WIDTH-1:0] dma_read_block_len_reg = 0, dma_read_block_len_next;
reg [31:0] dma_read_block_count_reg = 0, dma_read_block_count_next; reg [31:0] dma_read_block_count_reg = 0, dma_read_block_count_next;
@ -284,6 +301,9 @@ assign m_axis_dma_write_desc_valid = dma_write_desc_valid_reg;
assign irq_index = 0; assign irq_index = 0;
assign irq_valid = irq_valid_reg; assign irq_valid = irq_valid_reg;
assign dma_enable = dma_enable_reg;
assign rx_cpl_stall = rx_cpl_stall_reg;
always @* begin always @* begin
axil_ctrl_awready_next = 1'b0; axil_ctrl_awready_next = 1'b0;
axil_ctrl_wready_next = 1'b0; axil_ctrl_wready_next = 1'b0;
@ -322,6 +342,9 @@ always @* begin
irq_valid_next = irq_valid_reg && !irq_ready; irq_valid_next = irq_valid_reg && !irq_ready;
rx_cpl_stall_next = 1'b0;
rx_cpl_stall_count_next = rx_cpl_stall_count_reg;
dma_read_block_run_next = dma_read_block_run_reg; dma_read_block_run_next = dma_read_block_run_reg;
dma_read_block_len_next = dma_read_block_len_reg; dma_read_block_len_next = dma_read_block_len_reg;
dma_read_block_count_next = dma_read_block_count_reg; dma_read_block_count_next = dma_read_block_count_reg;
@ -348,6 +371,11 @@ always @* begin
dma_write_block_ram_offset_mask_next = dma_write_block_ram_offset_mask_reg; dma_write_block_ram_offset_mask_next = dma_write_block_ram_offset_mask_reg;
dma_write_block_ram_stride_next = dma_write_block_ram_stride_reg; dma_write_block_ram_stride_next = dma_write_block_ram_stride_reg;
if (rx_cpl_stall_count_reg) begin
rx_cpl_stall_count_next = rx_cpl_stall_count_reg - 1;
rx_cpl_stall_next = 1'b1;
end
if (s_axil_ctrl_awvalid && s_axil_ctrl_wvalid && !axil_ctrl_bvalid_reg) begin if (s_axil_ctrl_awvalid && s_axil_ctrl_wvalid && !axil_ctrl_bvalid_reg) begin
// write operation // write operation
axil_ctrl_awready_next = 1'b1; axil_ctrl_awready_next = 1'b1;
@ -364,6 +392,7 @@ always @* begin
dma_rd_int_en_next = s_axil_ctrl_wdata[0]; dma_rd_int_en_next = s_axil_ctrl_wdata[0];
dma_wr_int_en_next = s_axil_ctrl_wdata[1]; dma_wr_int_en_next = s_axil_ctrl_wdata[1];
end end
16'h0040: rx_cpl_stall_count_next = s_axil_ctrl_wdata;
// single read // single read
16'h0100: dma_read_desc_dma_addr_next[31:0] = s_axil_ctrl_wdata; 16'h0100: dma_read_desc_dma_addr_next[31:0] = s_axil_ctrl_wdata;
16'h0104: dma_read_desc_dma_addr_next[63:32] = s_axil_ctrl_wdata; 16'h0104: dma_read_desc_dma_addr_next[63:32] = s_axil_ctrl_wdata;
@ -437,6 +466,8 @@ always @* begin
// control // control
16'h0000: begin 16'h0000: begin
axil_ctrl_rdata_next[0] = dma_enable_reg; axil_ctrl_rdata_next[0] = dma_enable_reg;
axil_ctrl_rdata_next[8] = dma_wr_busy;
axil_ctrl_rdata_next[9] = dma_rd_busy;
end end
16'h0008: begin 16'h0008: begin
axil_ctrl_rdata_next[0] = dma_rd_int_en_reg; axil_ctrl_rdata_next[0] = dma_rd_int_en_reg;
@ -444,8 +475,12 @@ always @* begin
end end
16'h0010: axil_ctrl_rdata_next = cycle_count_reg; 16'h0010: axil_ctrl_rdata_next = cycle_count_reg;
16'h0014: axil_ctrl_rdata_next = cycle_count_reg >> 32; 16'h0014: axil_ctrl_rdata_next = cycle_count_reg >> 32;
16'h0020: axil_ctrl_rdata_next = dma_read_active_count_reg; 16'h0018: axil_ctrl_rdata_next = dma_read_active_count_reg;
16'h0028: axil_ctrl_rdata_next = dma_write_active_count_reg; 16'h001c: axil_ctrl_rdata_next = dma_write_active_count_reg;
16'h0020: axil_ctrl_rdata_next = dma_rd_req_count_reg;
16'h0024: axil_ctrl_rdata_next = dma_rd_cpl_count_reg;
16'h0028: axil_ctrl_rdata_next = dma_wr_req_count_reg;
16'h0040: axil_ctrl_rdata_next = rx_cpl_stall_count_reg;
// single read // single read
16'h0100: axil_ctrl_rdata_next = dma_read_desc_dma_addr_reg; 16'h0100: axil_ctrl_rdata_next = dma_read_desc_dma_addr_reg;
16'h0104: axil_ctrl_rdata_next = dma_read_desc_dma_addr_reg >> 32; 16'h0104: axil_ctrl_rdata_next = dma_read_desc_dma_addr_reg >> 32;
@ -615,6 +650,10 @@ always @(posedge clk) begin
+ (m_axis_dma_write_desc_valid && m_axis_dma_write_desc_ready) + (m_axis_dma_write_desc_valid && m_axis_dma_write_desc_ready)
- s_axis_dma_write_desc_status_valid; - s_axis_dma_write_desc_status_valid;
dma_rd_req_count_reg <= dma_rd_req_count_reg + dma_rd_req;
dma_rd_cpl_count_reg <= dma_rd_cpl_count_reg + dma_rd_cpl;
dma_wr_req_count_reg <= dma_wr_req_count_reg + dma_wr_req;
dma_read_desc_dma_addr_reg <= dma_read_desc_dma_addr_next; dma_read_desc_dma_addr_reg <= dma_read_desc_dma_addr_next;
dma_read_desc_ram_addr_reg <= dma_read_desc_ram_addr_next; dma_read_desc_ram_addr_reg <= dma_read_desc_ram_addr_next;
dma_read_desc_len_reg <= dma_read_desc_len_next; dma_read_desc_len_reg <= dma_read_desc_len_next;
@ -643,6 +682,9 @@ always @(posedge clk) begin
irq_valid_reg <= irq_valid_next; irq_valid_reg <= irq_valid_next;
rx_cpl_stall_reg <= rx_cpl_stall_next;
rx_cpl_stall_count_reg <= rx_cpl_stall_count_next;
dma_read_block_run_reg <= dma_read_block_run_next; dma_read_block_run_reg <= dma_read_block_run_next;
dma_read_block_len_reg <= dma_read_block_len_next; dma_read_block_len_reg <= dma_read_block_len_next;
dma_read_block_count_reg <= dma_read_block_count_next; dma_read_block_count_reg <= dma_read_block_count_next;
@ -679,6 +721,9 @@ always @(posedge clk) begin
cycle_count_reg <= 0; cycle_count_reg <= 0;
dma_read_active_count_reg <= 0; dma_read_active_count_reg <= 0;
dma_write_active_count_reg <= 0; dma_write_active_count_reg <= 0;
dma_rd_req_count_reg <= 0;
dma_rd_cpl_count_reg <= 0;
dma_wr_req_count_reg <= 0;
dma_read_desc_valid_reg <= 1'b0; dma_read_desc_valid_reg <= 1'b0;
dma_read_desc_status_valid_reg <= 1'b0; dma_read_desc_status_valid_reg <= 1'b0;
@ -688,6 +733,8 @@ always @(posedge clk) begin
dma_rd_int_en_reg <= 1'b0; dma_rd_int_en_reg <= 1'b0;
dma_wr_int_en_reg <= 1'b0; dma_wr_int_en_reg <= 1'b0;
irq_valid_reg <= 1'b0; irq_valid_reg <= 1'b0;
rx_cpl_stall_reg <= 1'b0;
rx_cpl_stall_count_reg <= 0;
dma_read_block_run_reg <= 1'b0; dma_read_block_run_reg <= 1'b0;
dma_write_block_run_reg <= 1'b0; dma_write_block_run_reg <= 1'b0;
end end

View File

@ -172,7 +172,12 @@ module example_core_pcie #
* Status * Status
*/ */
output wire status_error_cor, output wire status_error_cor,
output wire status_error_uncor output wire status_error_uncor,
/*
* Control and status
*/
output wire rx_cpl_stall
); );
parameter AXIL_CTRL_DATA_WIDTH = 32; parameter AXIL_CTRL_DATA_WIDTH = 32;
@ -345,6 +350,11 @@ wire [IRQ_INDEX_WIDTH-1:0] irq_index;
wire irq_valid; wire irq_valid;
wire irq_ready; wire irq_ready;
// Control and status
wire dma_enable;
wire dma_rd_busy;
wire dma_wr_busy;
pcie_tlp_demux_bar #( pcie_tlp_demux_bar #(
.PORTS(3), .PORTS(3),
.TLP_DATA_WIDTH(TLP_DATA_WIDTH), .TLP_DATA_WIDTH(TLP_DATA_WIDTH),
@ -900,8 +910,8 @@ dma_if_pcie_inst (
/* /*
* Configuration * Configuration
*/ */
.read_enable(1'b1), .read_enable(dma_enable),
.write_enable(1'b1), .write_enable(dma_enable),
.ext_tag_enable(ext_tag_enable), .ext_tag_enable(ext_tag_enable),
.rcb_128b(rcb_128b), .rcb_128b(rcb_128b),
.requester_id({bus_num, 5'd0, 3'd0}), .requester_id({bus_num, 5'd0, 3'd0}),
@ -911,8 +921,8 @@ dma_if_pcie_inst (
/* /*
* Status * Status
*/ */
.status_rd_busy(), .status_rd_busy(dma_rd_busy),
.status_wr_busy(), .status_wr_busy(dma_wr_busy),
.status_error_cor(status_error_cor_int[3]), .status_error_cor(status_error_cor_int[3]),
.status_error_uncor(status_error_uncor_int[3]) .status_error_uncor(status_error_uncor_int[3])
); );
@ -1109,7 +1119,18 @@ core_inst (
*/ */
.irq_index(irq_index), .irq_index(irq_index),
.irq_valid(irq_valid), .irq_valid(irq_valid),
.irq_ready(irq_ready) .irq_ready(irq_ready),
/*
* Control and status
*/
.dma_enable(dma_enable),
.dma_rd_busy(dma_rd_busy),
.dma_wr_busy(dma_wr_busy),
.dma_rd_req(tx_rd_req_tlp_valid && tx_rd_req_tlp_sop && tx_rd_req_tlp_ready),
.dma_rd_cpl(rx_cpl_tlp_valid && rx_cpl_tlp_sop && rx_cpl_tlp_ready),
.dma_wr_req(tx_wr_req_tlp_valid && tx_wr_req_tlp_sop && tx_wr_req_tlp_ready),
.rx_cpl_stall(rx_cpl_stall)
); );
endmodule endmodule

View File

@ -200,6 +200,12 @@ wire [2:0] max_payload_size;
wire msix_enable; wire msix_enable;
wire msix_mask; wire msix_mask;
wire rx_cpl_stall;
wire rx_st_ready_int;
assign rx_st_ready = rx_st_ready_int & !rx_cpl_stall;
pcie_ptile_if #( pcie_ptile_if #(
.SEG_COUNT(SEG_COUNT), .SEG_COUNT(SEG_COUNT),
.SEG_DATA_WIDTH(SEG_DATA_WIDTH), .SEG_DATA_WIDTH(SEG_DATA_WIDTH),
@ -226,7 +232,7 @@ pcie_ptile_if_inst (
.rx_st_sop(rx_st_sop), .rx_st_sop(rx_st_sop),
.rx_st_eop(rx_st_eop), .rx_st_eop(rx_st_eop),
.rx_st_valid(rx_st_valid), .rx_st_valid(rx_st_valid),
.rx_st_ready(rx_st_ready), .rx_st_ready(rx_st_ready_int),
.rx_st_hdr(rx_st_hdr), .rx_st_hdr(rx_st_hdr),
.rx_st_tlp_prfx(rx_st_tlp_prfx), .rx_st_tlp_prfx(rx_st_tlp_prfx),
.rx_st_vf_active(rx_st_vf_active), .rx_st_vf_active(rx_st_vf_active),
@ -488,7 +494,12 @@ core_pcie_inst (
* Status * Status
*/ */
.status_error_cor(), .status_error_cor(),
.status_error_uncor() .status_error_uncor(),
/*
* Control and status
*/
.rx_cpl_stall(rx_cpl_stall)
); );
endmodule endmodule

View File

@ -58,7 +58,7 @@ module example_core_pcie_s10 #
// Completion header flow control credit limit (read) // Completion header flow control credit limit (read)
parameter READ_CPLH_FC_LIMIT = 770, parameter READ_CPLH_FC_LIMIT = 770,
// Completion data flow control credit limit (read) // Completion data flow control credit limit (read)
parameter READ_CPLD_FC_LIMIT = 2500, parameter READ_CPLD_FC_LIMIT = 2400,
// Operation table size (write) // Operation table size (write)
parameter WRITE_OP_TABLE_SIZE = 2**TX_SEQ_NUM_WIDTH, parameter WRITE_OP_TABLE_SIZE = 2**TX_SEQ_NUM_WIDTH,
// In-flight transmit limit (write) // In-flight transmit limit (write)
@ -194,6 +194,12 @@ wire [2:0] max_payload_size;
wire msix_enable; wire msix_enable;
wire msix_mask; wire msix_mask;
wire rx_cpl_stall;
wire rx_st_ready_int;
assign rx_st_ready = rx_st_ready_int & !rx_cpl_stall;
pcie_s10_if #( pcie_s10_if #(
.SEG_COUNT(SEG_COUNT), .SEG_COUNT(SEG_COUNT),
.SEG_DATA_WIDTH(SEG_DATA_WIDTH), .SEG_DATA_WIDTH(SEG_DATA_WIDTH),
@ -222,7 +228,7 @@ pcie_s10_if_inst (
.rx_st_sop(rx_st_sop), .rx_st_sop(rx_st_sop),
.rx_st_eop(rx_st_eop), .rx_st_eop(rx_st_eop),
.rx_st_valid(rx_st_valid), .rx_st_valid(rx_st_valid),
.rx_st_ready(rx_st_ready), .rx_st_ready(rx_st_ready_int),
.rx_st_vf_active(rx_st_vf_active), .rx_st_vf_active(rx_st_vf_active),
.rx_st_func_num(rx_st_func_num), .rx_st_func_num(rx_st_func_num),
.rx_st_vf_num(rx_st_vf_num), .rx_st_vf_num(rx_st_vf_num),
@ -495,7 +501,12 @@ core_pcie_inst (
* Status * Status
*/ */
.status_error_cor(), .status_error_cor(),
.status_error_uncor() .status_error_uncor(),
/*
* Control and status
*/
.rx_cpl_stall(rx_cpl_stall)
); );
endmodule endmodule

View File

@ -68,9 +68,9 @@ module example_core_pcie_us #
// In-flight transmit limit (read) // In-flight transmit limit (read)
parameter READ_TX_LIMIT = 2**(RQ_SEQ_NUM_WIDTH-1), parameter READ_TX_LIMIT = 2**(RQ_SEQ_NUM_WIDTH-1),
// Completion header flow control credit limit (read) // Completion header flow control credit limit (read)
parameter READ_CPLH_FC_LIMIT = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 64 : 128, parameter READ_CPLH_FC_LIMIT = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 64 : 256,
// Completion data flow control credit limit (read) // Completion data flow control credit limit (read)
parameter READ_CPLD_FC_LIMIT = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 992 : 2048, parameter READ_CPLD_FC_LIMIT = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 1024-64 : 2048-256,
// Operation table size (write) // Operation table size (write)
parameter WRITE_OP_TABLE_SIZE = 2**(RQ_SEQ_NUM_WIDTH-1), parameter WRITE_OP_TABLE_SIZE = 2**(RQ_SEQ_NUM_WIDTH-1),
// In-flight transmit limit (write) // In-flight transmit limit (write)
@ -259,6 +259,14 @@ wire ext_tag_enable;
wire msix_enable; wire msix_enable;
wire msix_mask; wire msix_mask;
wire rx_cpl_stall;
wire s_axis_rc_tvalid_int;
wire s_axis_rc_tready_int;
assign s_axis_rc_tvalid_int = s_axis_rc_tvalid & ~rx_cpl_stall;
assign s_axis_rc_tready = s_axis_rc_tready_int & ~rx_cpl_stall;
pcie_us_if #( pcie_us_if #(
.AXIS_PCIE_DATA_WIDTH(AXIS_PCIE_DATA_WIDTH), .AXIS_PCIE_DATA_WIDTH(AXIS_PCIE_DATA_WIDTH),
.AXIS_PCIE_KEEP_WIDTH(AXIS_PCIE_KEEP_WIDTH), .AXIS_PCIE_KEEP_WIDTH(AXIS_PCIE_KEEP_WIDTH),
@ -295,8 +303,8 @@ pcie_us_if_inst (
*/ */
.s_axis_rc_tdata(s_axis_rc_tdata), .s_axis_rc_tdata(s_axis_rc_tdata),
.s_axis_rc_tkeep(s_axis_rc_tkeep), .s_axis_rc_tkeep(s_axis_rc_tkeep),
.s_axis_rc_tvalid(s_axis_rc_tvalid), .s_axis_rc_tvalid(s_axis_rc_tvalid_int),
.s_axis_rc_tready(s_axis_rc_tready), .s_axis_rc_tready(s_axis_rc_tready_int),
.s_axis_rc_tlast(s_axis_rc_tlast), .s_axis_rc_tlast(s_axis_rc_tlast),
.s_axis_rc_tuser(s_axis_rc_tuser), .s_axis_rc_tuser(s_axis_rc_tuser),
@ -624,7 +632,12 @@ core_pcie_inst (
* Status * Status
*/ */
.status_error_cor(status_error_cor), .status_error_cor(status_error_cor),
.status_error_uncor(status_error_uncor) .status_error_uncor(status_error_uncor),
/*
* Control and status
*/
.rx_cpl_stall(rx_cpl_stall)
); );
endmodule endmodule

View File

@ -224,6 +224,8 @@ async def run_test(dut):
await Timer(2000, 'ns') await Timer(2000, 'ns')
# read status # read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000118) val = await dev_pf0_bar0.read_dword(0x000118)
tb.log.info("Status: 0x%x", val) tb.log.info("Status: 0x%x", val)
assert val == 0x800000AA assert val == 0x800000AA
@ -238,6 +240,8 @@ async def run_test(dut):
await Timer(2000, 'ns') await Timer(2000, 'ns')
# read status # read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000218) val = await dev_pf0_bar0.read_dword(0x000218)
tb.log.info("Status: 0x%x", val) tb.log.info("Status: 0x%x", val)
assert val == 0x80000055 assert val == 0x80000055
@ -258,6 +262,8 @@ async def run_test(dut):
await Timer(2000, 'ns') await Timer(2000, 'ns')
# read status # read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000218) val = await dev_pf0_bar0.read_dword(0x000218)
tb.log.info("Status: 0x%x", val) tb.log.info("Status: 0x%x", val)
assert val == 0x800000AA assert val == 0x800000AA
@ -321,11 +327,15 @@ async def run_test(dut):
await dev_pf0_bar0.write_dword(0x001000, 1) await dev_pf0_bar0.write_dword(0x001000, 1)
for k in range(10): for k in range(10):
cnt = await dev_pf0_bar0.read_dword(0x001018)
await Timer(1000, 'ns') await Timer(1000, 'ns')
if cnt == 0: run = await dev_pf0_bar0.read_dword(0x001000)
if run == 0:
break break
# read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
# configure operation (write) # configure operation (write)
# DMA base address # DMA base address
await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff) await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff)
@ -363,11 +373,17 @@ async def run_test(dut):
await dev_pf0_bar0.write_dword(0x001100, 1) await dev_pf0_bar0.write_dword(0x001100, 1)
for k in range(10): for k in range(10):
cnt = await dev_pf0_bar0.read_dword(0x001118)
await Timer(1000, 'ns') await Timer(1000, 'ns')
if cnt == 0: run = await dev_pf0_bar0.read_dword(0x001100)
if run == 0:
break break
# read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
assert status & 0x300 == 0
tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) tb.log.info("%s", mem.hexdump_str(dest_offset, region_len))
assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len]

View File

@ -258,6 +258,211 @@ class TB(object):
await self.rc.enumerate() await self.rc.enumerate()
async def dma_block_read_bench(tb, dev, addr, mask, size, stride, count):
dev_pf0_bar0 = dev.bar_window[0]
rd_req = await dev_pf0_bar0.read_dword(0x000020)
rd_cpl = await dev_pf0_bar0.read_dword(0x000024)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001088, 0)
await dev_pf0_bar0.write_dword(0x00108c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001090, mask)
await dev_pf0_bar0.write_dword(0x001094, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001098, stride)
await dev_pf0_bar0.write_dword(0x00109c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0010c0, 0)
await dev_pf0_bar0.write_dword(0x0010c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0010c8, 0)
await dev_pf0_bar0.write_dword(0x0010cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0010d0, mask)
await dev_pf0_bar0.write_dword(0x0010d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0010d8, stride)
await dev_pf0_bar0.write_dword(0x0010dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001008, 0)
await dev_pf0_bar0.write_dword(0x00100c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001010, size)
# block count
await dev_pf0_bar0.write_dword(0x001018, count)
await dev_pf0_bar0.write_dword(0x00101c, 0)
# start
await dev_pf0_bar0.write_dword(0x001000, 1)
for k in range(1000):
await Timer(1000, 'ns')
run = await dev_pf0_bar0.read_dword(0x001000)
status = await dev_pf0_bar0.read_dword(0x000000)
if run == 0 and status & 0x300 == 0:
break
if run != 0:
tb.log.warning("Operation timed out")
if status & 0x300 != 0:
tb.log.warning("DMA engine busy")
cycles = await dev_pf0_bar0.read_dword(0x001008)
rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req
rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl
tb.log.info("read %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req %d cpl) %d Mbps",
count, size, count*size, stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4))
assert status & 0x300 == 0
async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count):
dev_pf0_bar0 = dev.bar_window[0]
wr_req = await dev_pf0_bar0.read_dword(0x000028)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001180, addr & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001184, (addr >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001188, 0)
await dev_pf0_bar0.write_dword(0x00118c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001190, mask)
await dev_pf0_bar0.write_dword(0x001194, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001198, stride)
await dev_pf0_bar0.write_dword(0x00119c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0011c0, 0)
await dev_pf0_bar0.write_dword(0x0011c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0011c8, 0)
await dev_pf0_bar0.write_dword(0x0011cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0011d0, mask)
await dev_pf0_bar0.write_dword(0x0011d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0011d8, stride)
await dev_pf0_bar0.write_dword(0x0011dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001108, 0)
await dev_pf0_bar0.write_dword(0x00110c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001110, size)
# block count
await dev_pf0_bar0.write_dword(0x001118, count)
await dev_pf0_bar0.write_dword(0x00111c, 0)
# start
await dev_pf0_bar0.write_dword(0x001100, 1)
for k in range(1000):
await Timer(1000, 'ns')
run = await dev_pf0_bar0.read_dword(0x001100)
status = await dev_pf0_bar0.read_dword(0x000000)
if run == 0 and status & 0x300 == 0:
break
if run != 0:
tb.log.warning("Operation timed out")
if status & 0x300 != 0:
tb.log.warning("DMA engine busy")
cycles = await dev_pf0_bar0.read_dword(0x001108)
wr_req = await dev_pf0_bar0.read_dword(0x000028) - wr_req
tb.log.info("wrote %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req) %d Mbps",
count, size, count*size, stride, cycles*4, wr_req, size * count * 8 * 1000 / (cycles * 4))
assert status & 0x300 == 0
async def dma_cpl_buf_test(tb, dev, addr, mask, size, stride, count, stall):
dev_pf0_bar0 = dev.bar_window[0]
rd_req = await dev_pf0_bar0.read_dword(0x000020)
rd_cpl = await dev_pf0_bar0.read_dword(0x000024)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001088, 0)
await dev_pf0_bar0.write_dword(0x00108c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001090, mask)
await dev_pf0_bar0.write_dword(0x001094, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001098, stride)
await dev_pf0_bar0.write_dword(0x00109c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0010c0, 0)
await dev_pf0_bar0.write_dword(0x0010c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0010c8, 0)
await dev_pf0_bar0.write_dword(0x0010cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0010d0, mask)
await dev_pf0_bar0.write_dword(0x0010d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0010d8, stride)
await dev_pf0_bar0.write_dword(0x0010dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001008, 0)
await dev_pf0_bar0.write_dword(0x00100c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001010, size)
# block count
await dev_pf0_bar0.write_dword(0x001018, count)
await dev_pf0_bar0.write_dword(0x00101c, 0)
if stall:
# stall RX
await dev_pf0_bar0.write_dword(0x000040, stall)
# start
await dev_pf0_bar0.write_dword(0x001000, 1)
# wait for stall
if stall:
for k in range(stall):
await RisingEdge(tb.dut.clk)
for k in range(100):
await Timer(1000, 'ns')
run = await dev_pf0_bar0.read_dword(0x001000)
status = await dev_pf0_bar0.read_dword(0x000000)
if run == 0 and status & 0x300 == 0:
break
if run != 0:
tb.log.warning("Operation timed out")
if status & 0x300 != 0:
tb.log.warning("DMA engine busy")
cycles = await dev_pf0_bar0.read_dword(0x001008)
rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req
rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl
tb.log.info("read %d x %d B (total %d B %d CPLD, stride %d) in %d ns (%d req %d cpl) %d Mbps",
count, size, count*size, count*((size+15)//16), stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4))
assert status & 0x300 == 0
@cocotb.test() @cocotb.test()
async def run_test(dut): async def run_test(dut):
@ -309,6 +514,8 @@ async def run_test(dut):
await Timer(2000, 'ns') await Timer(2000, 'ns')
# read status # read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000118) val = await dev_pf0_bar0.read_dword(0x000118)
tb.log.info("Status: 0x%x", val) tb.log.info("Status: 0x%x", val)
assert val == 0x800000AA assert val == 0x800000AA
@ -323,6 +530,8 @@ async def run_test(dut):
await Timer(2000, 'ns') await Timer(2000, 'ns')
# read status # read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000218) val = await dev_pf0_bar0.read_dword(0x000218)
tb.log.info("Status: 0x%x", val) tb.log.info("Status: 0x%x", val)
assert val == 0x80000055 assert val == 0x80000055
@ -343,6 +552,8 @@ async def run_test(dut):
await Timer(2000, 'ns') await Timer(2000, 'ns')
# read status # read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000218) val = await dev_pf0_bar0.read_dword(0x000218)
tb.log.info("Status: 0x%x", val) tb.log.info("Status: 0x%x", val)
assert val == 0x800000AA assert val == 0x800000AA
@ -353,110 +564,66 @@ async def run_test(dut):
tb.log.info("Test DMA block operations") tb.log.info("Test DMA block operations")
# disable interrupts
await dev_pf0_bar0.write_dword(0x000008, 0)
region_len = 0x2000 region_len = 0x2000
src_offset = 0x0000 src_offset = 0x0000
dest_offset = 0x4000 dest_offset = 0x4000
block_size = 256 await dma_block_read_bench(tb, dev, mem_base+src_offset, region_len-1, 256, 256, 32)
block_stride = block_size await dma_block_write_bench(tb, dev, mem_base+dest_offset, region_len-1, 256, 256, 32)
block_count = 32
# write packet data
mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)])
# enable DMA
await dev_pf0_bar0.write_dword(0x000000, 1)
# disable interrupts
await dev_pf0_bar0.write_dword(0x000008, 0)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001088, 0)
await dev_pf0_bar0.write_dword(0x00108c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001090, region_len-1)
await dev_pf0_bar0.write_dword(0x001094, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001098, block_stride)
await dev_pf0_bar0.write_dword(0x00109c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0010c0, 0)
await dev_pf0_bar0.write_dword(0x0010c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0010c8, 0)
await dev_pf0_bar0.write_dword(0x0010cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0010d0, region_len-1)
await dev_pf0_bar0.write_dword(0x0010d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0010d8, block_stride)
await dev_pf0_bar0.write_dword(0x0010dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001008, 0)
await dev_pf0_bar0.write_dword(0x00100c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001010, block_size)
# block count
await dev_pf0_bar0.write_dword(0x001018, block_count)
await dev_pf0_bar0.write_dword(0x00101c, 0)
# start
await dev_pf0_bar0.write_dword(0x001000, 1)
for k in range(10):
cnt = await dev_pf0_bar0.read_dword(0x001018)
await Timer(1000, 'ns')
if cnt == 0:
break
# configure operation (write)
# DMA base address
await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001188, 0)
await dev_pf0_bar0.write_dword(0x00118c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001190, region_len-1)
await dev_pf0_bar0.write_dword(0x001194, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001198, block_stride)
await dev_pf0_bar0.write_dword(0x00119c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0011c0, 0)
await dev_pf0_bar0.write_dword(0x0011c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0011c8, 0)
await dev_pf0_bar0.write_dword(0x0011cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0011d0, region_len-1)
await dev_pf0_bar0.write_dword(0x0011d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0011d8, block_stride)
await dev_pf0_bar0.write_dword(0x0011dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001108, 0)
await dev_pf0_bar0.write_dword(0x00110c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001110, block_size)
# block count
await dev_pf0_bar0.write_dword(0x001118, block_count)
await dev_pf0_bar0.write_dword(0x00111c, 0)
# start
await dev_pf0_bar0.write_dword(0x001100, 1)
for k in range(10):
cnt = await dev_pf0_bar0.read_dword(0x001118)
await Timer(1000, 'ns')
if cnt == 0:
break
tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) tb.log.info("%s", mem.hexdump_str(dest_offset, region_len))
assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len]
tb.log.info("Test RX completion buffer (CPLH, 8)")
tb.rc.split_on_all_rcb = True
size = 8
stride = size
for count in range(32, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000)
tb.log.info("Test RX completion buffer (CPLH, 8+64)")
size = 8+64
stride = 0
for count in range(8, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000)
tb.log.info("Test RX completion buffer (CPLH, 8+128+8)")
size = 8+128+8
stride = 0
for count in range(8, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000)
tb.rc.split_on_all_rcb = False
tb.log.info("Test RX completion buffer (CPLD)")
size = 512
stride = size
for count in range(8, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 4000)
tb.log.info("Perform block reads")
count = 100
for size in [2**x for x in range(14)]:
stride = size
await dma_block_read_bench(tb, dev, mem_base, region_len-1, size, stride, count)
tb.log.info("Perform block writes")
count = 100
for size in [2**x for x in range(14)]:
stride = size
await dma_block_write_bench(tb, dev, mem_base, region_len-1, size, stride, count)
await RisingEdge(dut.clk) await RisingEdge(dut.clk)
await RisingEdge(dut.clk) await RisingEdge(dut.clk)

View File

@ -57,7 +57,7 @@ VERILOG_SOURCES += ../../../../rtl/priority_encoder.v
VERILOG_SOURCES += ../../../../rtl/pulse_merge.v VERILOG_SOURCES += ../../../../rtl/pulse_merge.v
# module parameters # module parameters
export PARAM_SEG_COUNT := 1 export PARAM_SEG_COUNT := 2
export PARAM_SEG_DATA_WIDTH := 256 export PARAM_SEG_DATA_WIDTH := 256
export PARAM_SEG_EMPTY_WIDTH := $(shell python -c "print((($(PARAM_SEG_DATA_WIDTH)//32)-1).bit_length())" ) export PARAM_SEG_EMPTY_WIDTH := $(shell python -c "print((($(PARAM_SEG_DATA_WIDTH)//32)-1).bit_length())" )
export PARAM_TX_SEQ_NUM_WIDTH := 6 export PARAM_TX_SEQ_NUM_WIDTH := 6
@ -69,7 +69,7 @@ export PARAM_IMM_WIDTH := 32
export PARAM_READ_OP_TABLE_SIZE := $(PARAM_PCIE_TAG_COUNT) export PARAM_READ_OP_TABLE_SIZE := $(PARAM_PCIE_TAG_COUNT)
export PARAM_READ_TX_LIMIT := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" ) export PARAM_READ_TX_LIMIT := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" )
export PARAM_READ_CPLH_FC_LIMIT := 770 export PARAM_READ_CPLH_FC_LIMIT := 770
export PARAM_READ_CPLD_FC_LIMIT := 2500 export PARAM_READ_CPLD_FC_LIMIT := 2400
export PARAM_WRITE_OP_TABLE_SIZE := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" ) export PARAM_WRITE_OP_TABLE_SIZE := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" )
export PARAM_WRITE_TX_LIMIT := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" ) export PARAM_WRITE_TX_LIMIT := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" )
export PARAM_BAR0_APERTURE := 24 export PARAM_BAR0_APERTURE := 24

View File

@ -206,6 +206,211 @@ class TB(object):
await self.rc.enumerate() await self.rc.enumerate()
async def dma_block_read_bench(tb, dev, addr, mask, size, stride, count):
dev_pf0_bar0 = dev.bar_window[0]
rd_req = await dev_pf0_bar0.read_dword(0x000020)
rd_cpl = await dev_pf0_bar0.read_dword(0x000024)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001088, 0)
await dev_pf0_bar0.write_dword(0x00108c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001090, mask)
await dev_pf0_bar0.write_dword(0x001094, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001098, stride)
await dev_pf0_bar0.write_dword(0x00109c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0010c0, 0)
await dev_pf0_bar0.write_dword(0x0010c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0010c8, 0)
await dev_pf0_bar0.write_dword(0x0010cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0010d0, mask)
await dev_pf0_bar0.write_dword(0x0010d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0010d8, stride)
await dev_pf0_bar0.write_dword(0x0010dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001008, 0)
await dev_pf0_bar0.write_dword(0x00100c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001010, size)
# block count
await dev_pf0_bar0.write_dword(0x001018, count)
await dev_pf0_bar0.write_dword(0x00101c, 0)
# start
await dev_pf0_bar0.write_dword(0x001000, 1)
for k in range(1000):
await Timer(1000, 'ns')
run = await dev_pf0_bar0.read_dword(0x001000)
status = await dev_pf0_bar0.read_dword(0x000000)
if run == 0 and status & 0x300 == 0:
break
if run != 0:
tb.log.warning("Operation timed out")
if status & 0x300 != 0:
tb.log.warning("DMA engine busy")
cycles = await dev_pf0_bar0.read_dword(0x001008)
rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req
rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl
tb.log.info("read %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req %d cpl) %d Mbps",
count, size, count*size, stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4))
assert status & 0x300 == 0
async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count):
dev_pf0_bar0 = dev.bar_window[0]
wr_req = await dev_pf0_bar0.read_dword(0x000028)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001180, addr & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001184, (addr >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001188, 0)
await dev_pf0_bar0.write_dword(0x00118c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001190, mask)
await dev_pf0_bar0.write_dword(0x001194, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001198, stride)
await dev_pf0_bar0.write_dword(0x00119c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0011c0, 0)
await dev_pf0_bar0.write_dword(0x0011c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0011c8, 0)
await dev_pf0_bar0.write_dword(0x0011cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0011d0, mask)
await dev_pf0_bar0.write_dword(0x0011d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0011d8, stride)
await dev_pf0_bar0.write_dword(0x0011dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001108, 0)
await dev_pf0_bar0.write_dword(0x00110c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001110, size)
# block count
await dev_pf0_bar0.write_dword(0x001118, count)
await dev_pf0_bar0.write_dword(0x00111c, 0)
# start
await dev_pf0_bar0.write_dword(0x001100, 1)
for k in range(1000):
await Timer(1000, 'ns')
run = await dev_pf0_bar0.read_dword(0x001100)
status = await dev_pf0_bar0.read_dword(0x000000)
if run == 0 and status & 0x300 == 0:
break
if run != 0:
tb.log.warning("Operation timed out")
if status & 0x300 != 0:
tb.log.warning("DMA engine busy")
cycles = await dev_pf0_bar0.read_dword(0x001108)
wr_req = await dev_pf0_bar0.read_dword(0x000028) - wr_req
tb.log.info("wrote %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req) %d Mbps",
count, size, count*size, stride, cycles*4, wr_req, size * count * 8 * 1000 / (cycles * 4))
assert status & 0x300 == 0
async def dma_cpl_buf_test(tb, dev, addr, mask, size, stride, count, stall):
dev_pf0_bar0 = dev.bar_window[0]
rd_req = await dev_pf0_bar0.read_dword(0x000020)
rd_cpl = await dev_pf0_bar0.read_dword(0x000024)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001088, 0)
await dev_pf0_bar0.write_dword(0x00108c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001090, mask)
await dev_pf0_bar0.write_dword(0x001094, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001098, stride)
await dev_pf0_bar0.write_dword(0x00109c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0010c0, 0)
await dev_pf0_bar0.write_dword(0x0010c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0010c8, 0)
await dev_pf0_bar0.write_dword(0x0010cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0010d0, mask)
await dev_pf0_bar0.write_dword(0x0010d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0010d8, stride)
await dev_pf0_bar0.write_dword(0x0010dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001008, 0)
await dev_pf0_bar0.write_dword(0x00100c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001010, size)
# block count
await dev_pf0_bar0.write_dword(0x001018, count)
await dev_pf0_bar0.write_dword(0x00101c, 0)
if stall:
# stall RX
await dev_pf0_bar0.write_dword(0x000040, stall)
# start
await dev_pf0_bar0.write_dword(0x001000, 1)
# wait for stall
if stall:
for k in range(stall):
await RisingEdge(tb.dut.clk)
for k in range(100):
await Timer(1000, 'ns')
run = await dev_pf0_bar0.read_dword(0x001000)
status = await dev_pf0_bar0.read_dword(0x000000)
if run == 0 and status & 0x300 == 0:
break
if run != 0:
tb.log.warning("Operation timed out")
if status & 0x300 != 0:
tb.log.warning("DMA engine busy")
cycles = await dev_pf0_bar0.read_dword(0x001008)
rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req
rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl
tb.log.info("read %d x %d B (total %d B %d CPLD, stride %d) in %d ns (%d req %d cpl) %d Mbps",
count, size, count*size, count*((size+15)//16), stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4))
assert status & 0x300 == 0
@cocotb.test() @cocotb.test()
async def run_test(dut): async def run_test(dut):
@ -257,6 +462,8 @@ async def run_test(dut):
await Timer(2000, 'ns') await Timer(2000, 'ns')
# read status # read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000118) val = await dev_pf0_bar0.read_dword(0x000118)
tb.log.info("Status: 0x%x", val) tb.log.info("Status: 0x%x", val)
assert val == 0x800000AA assert val == 0x800000AA
@ -271,6 +478,8 @@ async def run_test(dut):
await Timer(2000, 'ns') await Timer(2000, 'ns')
# read status # read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000218) val = await dev_pf0_bar0.read_dword(0x000218)
tb.log.info("Status: 0x%x", val) tb.log.info("Status: 0x%x", val)
assert val == 0x80000055 assert val == 0x80000055
@ -291,6 +500,8 @@ async def run_test(dut):
await Timer(2000, 'ns') await Timer(2000, 'ns')
# read status # read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000218) val = await dev_pf0_bar0.read_dword(0x000218)
tb.log.info("Status: 0x%x", val) tb.log.info("Status: 0x%x", val)
assert val == 0x800000AA assert val == 0x800000AA
@ -301,110 +512,66 @@ async def run_test(dut):
tb.log.info("Test DMA block operations") tb.log.info("Test DMA block operations")
# disable interrupts
await dev_pf0_bar0.write_dword(0x000008, 0)
region_len = 0x2000 region_len = 0x2000
src_offset = 0x0000 src_offset = 0x0000
dest_offset = 0x4000 dest_offset = 0x4000
block_size = 256 await dma_block_read_bench(tb, dev, mem_base+src_offset, region_len-1, 256, 256, 32)
block_stride = block_size await dma_block_write_bench(tb, dev, mem_base+dest_offset, region_len-1, 256, 256, 32)
block_count = 32
# write packet data
mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)])
# enable DMA
await dev_pf0_bar0.write_dword(0x000000, 1)
# disable interrupts
await dev_pf0_bar0.write_dword(0x000008, 0)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001088, 0)
await dev_pf0_bar0.write_dword(0x00108c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001090, region_len-1)
await dev_pf0_bar0.write_dword(0x001094, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001098, block_stride)
await dev_pf0_bar0.write_dword(0x00109c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0010c0, 0)
await dev_pf0_bar0.write_dword(0x0010c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0010c8, 0)
await dev_pf0_bar0.write_dword(0x0010cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0010d0, region_len-1)
await dev_pf0_bar0.write_dword(0x0010d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0010d8, block_stride)
await dev_pf0_bar0.write_dword(0x0010dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001008, 0)
await dev_pf0_bar0.write_dword(0x00100c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001010, block_size)
# block count
await dev_pf0_bar0.write_dword(0x001018, block_count)
await dev_pf0_bar0.write_dword(0x00101c, 0)
# start
await dev_pf0_bar0.write_dword(0x001000, 1)
for k in range(10):
cnt = await dev_pf0_bar0.read_dword(0x001018)
await Timer(1000, 'ns')
if cnt == 0:
break
# configure operation (write)
# DMA base address
await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001188, 0)
await dev_pf0_bar0.write_dword(0x00118c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001190, region_len-1)
await dev_pf0_bar0.write_dword(0x001194, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001198, block_stride)
await dev_pf0_bar0.write_dword(0x00119c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0011c0, 0)
await dev_pf0_bar0.write_dword(0x0011c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0011c8, 0)
await dev_pf0_bar0.write_dword(0x0011cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0011d0, region_len-1)
await dev_pf0_bar0.write_dword(0x0011d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0011d8, block_stride)
await dev_pf0_bar0.write_dword(0x0011dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001108, 0)
await dev_pf0_bar0.write_dword(0x00110c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001110, block_size)
# block count
await dev_pf0_bar0.write_dword(0x001118, block_count)
await dev_pf0_bar0.write_dword(0x00111c, 0)
# start
await dev_pf0_bar0.write_dword(0x001100, 1)
for k in range(10):
cnt = await dev_pf0_bar0.read_dword(0x001118)
await Timer(1000, 'ns')
if cnt == 0:
break
tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) tb.log.info("%s", mem.hexdump_str(dest_offset, region_len))
assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len]
tb.log.info("Test RX completion buffer (CPLH, 8)")
tb.rc.split_on_all_rcb = True
size = 8
stride = size
for count in range(32, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000)
tb.log.info("Test RX completion buffer (CPLH, 8+64)")
size = 8+64
stride = 0
for count in range(8, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000)
tb.log.info("Test RX completion buffer (CPLH, 8+128+8)")
size = 8+128+8
stride = 0
for count in range(8, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000)
tb.rc.split_on_all_rcb = False
tb.log.info("Test RX completion buffer (CPLD)")
size = 512
stride = size
for count in range(8, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 4000)
tb.log.info("Perform block reads")
count = 100
for size in [2**x for x in range(14)]:
stride = size
await dma_block_read_bench(tb, dev, mem_base, region_len-1, size, stride, count)
tb.log.info("Perform block writes")
count = 100
for size in [2**x for x in range(14)]:
stride = size
await dma_block_write_bench(tb, dev, mem_base, region_len-1, size, stride, count)
await RisingEdge(dut.clk) await RisingEdge(dut.clk)
await RisingEdge(dut.clk) await RisingEdge(dut.clk)
@ -466,7 +633,7 @@ def test_example_core_pcie_s10(request, data_width, l_tile):
parameters['READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT'] parameters['READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT']
parameters['READ_TX_LIMIT'] = 2**parameters['TX_SEQ_NUM_WIDTH'] parameters['READ_TX_LIMIT'] = 2**parameters['TX_SEQ_NUM_WIDTH']
parameters['READ_CPLH_FC_LIMIT'] = 770 parameters['READ_CPLH_FC_LIMIT'] = 770
parameters['READ_CPLD_FC_LIMIT'] = 2500 parameters['READ_CPLD_FC_LIMIT'] = 2400
parameters['WRITE_OP_TABLE_SIZE'] = 2**parameters['TX_SEQ_NUM_WIDTH'] parameters['WRITE_OP_TABLE_SIZE'] = 2**parameters['TX_SEQ_NUM_WIDTH']
parameters['WRITE_TX_LIMIT'] = 2**parameters['TX_SEQ_NUM_WIDTH'] parameters['WRITE_TX_LIMIT'] = 2**parameters['TX_SEQ_NUM_WIDTH']
parameters['BAR0_APERTURE'] = 24 parameters['BAR0_APERTURE'] = 24

View File

@ -74,8 +74,8 @@ export PARAM_IMM_ENABLE := 1
export PARAM_IMM_WIDTH := 32 export PARAM_IMM_WIDTH := 32
export PARAM_READ_OP_TABLE_SIZE := $(PARAM_PCIE_TAG_COUNT) export PARAM_READ_OP_TABLE_SIZE := $(PARAM_PCIE_TAG_COUNT)
export PARAM_READ_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" ) export PARAM_READ_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" )
export PARAM_READ_CPLH_FC_LIMIT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),64,128) export PARAM_READ_CPLH_FC_LIMIT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),256,64)
export PARAM_READ_CPLD_FC_LIMIT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),992,2048) export PARAM_READ_CPLD_FC_LIMIT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),1792,960)
export PARAM_WRITE_OP_TABLE_SIZE := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" ) export PARAM_WRITE_OP_TABLE_SIZE := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" )
export PARAM_WRITE_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" ) export PARAM_WRITE_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" )
export PARAM_BAR0_APERTURE := 24 export PARAM_BAR0_APERTURE := 24

View File

@ -299,6 +299,211 @@ class TB(object):
await self.rc.enumerate() await self.rc.enumerate()
async def dma_block_read_bench(tb, dev, addr, mask, size, stride, count):
dev_pf0_bar0 = dev.bar_window[0]
rd_req = await dev_pf0_bar0.read_dword(0x000020)
rd_cpl = await dev_pf0_bar0.read_dword(0x000024)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001088, 0)
await dev_pf0_bar0.write_dword(0x00108c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001090, mask)
await dev_pf0_bar0.write_dword(0x001094, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001098, stride)
await dev_pf0_bar0.write_dword(0x00109c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0010c0, 0)
await dev_pf0_bar0.write_dword(0x0010c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0010c8, 0)
await dev_pf0_bar0.write_dword(0x0010cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0010d0, mask)
await dev_pf0_bar0.write_dword(0x0010d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0010d8, stride)
await dev_pf0_bar0.write_dword(0x0010dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001008, 0)
await dev_pf0_bar0.write_dword(0x00100c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001010, size)
# block count
await dev_pf0_bar0.write_dword(0x001018, count)
await dev_pf0_bar0.write_dword(0x00101c, 0)
# start
await dev_pf0_bar0.write_dword(0x001000, 1)
for k in range(1000):
await Timer(1000, 'ns')
run = await dev_pf0_bar0.read_dword(0x001000)
status = await dev_pf0_bar0.read_dword(0x000000)
if run == 0 and status & 0x300 == 0:
break
if run != 0:
tb.log.warning("Operation timed out")
if status & 0x300 != 0:
tb.log.warning("DMA engine busy")
cycles = await dev_pf0_bar0.read_dword(0x001008)
rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req
rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl
tb.log.info("read %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req %d cpl) %d Mbps",
count, size, count*size, stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4))
assert status & 0x300 == 0
async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count):
dev_pf0_bar0 = dev.bar_window[0]
wr_req = await dev_pf0_bar0.read_dword(0x000028)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001180, addr & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001184, (addr >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001188, 0)
await dev_pf0_bar0.write_dword(0x00118c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001190, mask)
await dev_pf0_bar0.write_dword(0x001194, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001198, stride)
await dev_pf0_bar0.write_dword(0x00119c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0011c0, 0)
await dev_pf0_bar0.write_dword(0x0011c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0011c8, 0)
await dev_pf0_bar0.write_dword(0x0011cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0011d0, mask)
await dev_pf0_bar0.write_dword(0x0011d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0011d8, stride)
await dev_pf0_bar0.write_dword(0x0011dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001108, 0)
await dev_pf0_bar0.write_dword(0x00110c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001110, size)
# block count
await dev_pf0_bar0.write_dword(0x001118, count)
await dev_pf0_bar0.write_dword(0x00111c, 0)
# start
await dev_pf0_bar0.write_dword(0x001100, 1)
for k in range(1000):
await Timer(1000, 'ns')
run = await dev_pf0_bar0.read_dword(0x001100)
status = await dev_pf0_bar0.read_dword(0x000000)
if run == 0 and status & 0x300 == 0:
break
if run != 0:
tb.log.warning("Operation timed out")
if status & 0x300 != 0:
tb.log.warning("DMA engine busy")
cycles = await dev_pf0_bar0.read_dword(0x001108)
wr_req = await dev_pf0_bar0.read_dword(0x000028) - wr_req
tb.log.info("wrote %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req) %d Mbps",
count, size, count*size, stride, cycles*4, wr_req, size * count * 8 * 1000 / (cycles * 4))
assert status & 0x300 == 0
async def dma_cpl_buf_test(tb, dev, addr, mask, size, stride, count, stall):
dev_pf0_bar0 = dev.bar_window[0]
rd_req = await dev_pf0_bar0.read_dword(0x000020)
rd_cpl = await dev_pf0_bar0.read_dword(0x000024)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001088, 0)
await dev_pf0_bar0.write_dword(0x00108c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001090, mask)
await dev_pf0_bar0.write_dword(0x001094, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001098, stride)
await dev_pf0_bar0.write_dword(0x00109c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0010c0, 0)
await dev_pf0_bar0.write_dword(0x0010c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0010c8, 0)
await dev_pf0_bar0.write_dword(0x0010cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0010d0, mask)
await dev_pf0_bar0.write_dword(0x0010d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0010d8, stride)
await dev_pf0_bar0.write_dword(0x0010dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001008, 0)
await dev_pf0_bar0.write_dword(0x00100c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001010, size)
# block count
await dev_pf0_bar0.write_dword(0x001018, count)
await dev_pf0_bar0.write_dword(0x00101c, 0)
if stall:
# stall RX
await dev_pf0_bar0.write_dword(0x000040, stall)
# start
await dev_pf0_bar0.write_dword(0x001000, 1)
# wait for stall
if stall:
for k in range(stall):
await RisingEdge(tb.dut.clk)
for k in range(100):
await Timer(1000, 'ns')
run = await dev_pf0_bar0.read_dword(0x001000)
status = await dev_pf0_bar0.read_dword(0x000000)
if run == 0 and status & 0x300 == 0:
break
if run != 0:
tb.log.warning("Operation timed out")
if status & 0x300 != 0:
tb.log.warning("DMA engine busy")
cycles = await dev_pf0_bar0.read_dword(0x001008)
rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req
rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl
tb.log.info("read %d x %d B (total %d B %d CPLD, stride %d) in %d ns (%d req %d cpl) %d Mbps",
count, size, count*size, count*((size+15)//16), stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4))
assert status & 0x300 == 0
@cocotb.test() @cocotb.test()
async def run_test(dut): async def run_test(dut):
@ -350,6 +555,8 @@ async def run_test(dut):
await Timer(2000, 'ns') await Timer(2000, 'ns')
# read status # read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000118) val = await dev_pf0_bar0.read_dword(0x000118)
tb.log.info("Status: 0x%x", val) tb.log.info("Status: 0x%x", val)
assert val == 0x800000AA assert val == 0x800000AA
@ -364,6 +571,8 @@ async def run_test(dut):
await Timer(2000, 'ns') await Timer(2000, 'ns')
# read status # read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000218) val = await dev_pf0_bar0.read_dword(0x000218)
tb.log.info("Status: 0x%x", val) tb.log.info("Status: 0x%x", val)
assert val == 0x80000055 assert val == 0x80000055
@ -384,6 +593,8 @@ async def run_test(dut):
await Timer(2000, 'ns') await Timer(2000, 'ns')
# read status # read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000218) val = await dev_pf0_bar0.read_dword(0x000218)
tb.log.info("Status: 0x%x", val) tb.log.info("Status: 0x%x", val)
assert val == 0x800000AA assert val == 0x800000AA
@ -394,112 +605,66 @@ async def run_test(dut):
tb.log.info("Test DMA block operations") tb.log.info("Test DMA block operations")
# disable interrupts
await dev_pf0_bar0.write_dword(0x000008, 0)
region_len = 0x2000 region_len = 0x2000
src_offset = 0x0000 src_offset = 0x0000
dest_offset = 0x4000 dest_offset = 0x4000
block_size = 256 await dma_block_read_bench(tb, dev, mem_base+src_offset, region_len-1, 256, 256, 32)
block_stride = block_size await dma_block_write_bench(tb, dev, mem_base+dest_offset, region_len-1, 256, 256, 32)
block_count = 32
# write packet data
mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)])
# enable DMA
await dev_pf0_bar0.write_dword(0x000000, 1)
# disable interrupts
await dev_pf0_bar0.write_dword(0x000008, 0)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001088, 0)
await dev_pf0_bar0.write_dword(0x00108c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001090, region_len-1)
await dev_pf0_bar0.write_dword(0x001094, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001098, block_stride)
await dev_pf0_bar0.write_dword(0x00109c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0010c0, 0)
await dev_pf0_bar0.write_dword(0x0010c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0010c8, 0)
await dev_pf0_bar0.write_dword(0x0010cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0010d0, region_len-1)
await dev_pf0_bar0.write_dword(0x0010d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0010d8, block_stride)
await dev_pf0_bar0.write_dword(0x0010dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001008, 0)
await dev_pf0_bar0.write_dword(0x00100c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001010, block_size)
# block count
await dev_pf0_bar0.write_dword(0x001018, block_count)
await dev_pf0_bar0.write_dword(0x00101c, 0)
# start
await dev_pf0_bar0.write_dword(0x001000, 1)
for k in range(10):
cnt = await dev_pf0_bar0.read_dword(0x001018)
await Timer(1000, 'ns')
if cnt == 0:
break
# configure operation (write)
# DMA base address
await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001188, 0)
await dev_pf0_bar0.write_dword(0x00118c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001190, region_len-1)
await dev_pf0_bar0.write_dword(0x001194, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001198, block_stride)
await dev_pf0_bar0.write_dword(0x00119c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0011c0, 0)
await dev_pf0_bar0.write_dword(0x0011c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0011c8, 0)
await dev_pf0_bar0.write_dword(0x0011cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0011d0, region_len-1)
await dev_pf0_bar0.write_dword(0x0011d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0011d8, block_stride)
await dev_pf0_bar0.write_dword(0x0011dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001108, 0)
await dev_pf0_bar0.write_dword(0x00110c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001110, block_size)
# block count
await dev_pf0_bar0.write_dword(0x001118, block_count)
await dev_pf0_bar0.write_dword(0x00111c, 0)
# start
await dev_pf0_bar0.write_dword(0x001100, 1)
for k in range(10):
cnt = await dev_pf0_bar0.read_dword(0x001118)
await Timer(1000, 'ns')
if cnt == 0:
break
await Timer(2000, 'ns')
tb.log.info("%s", mem.hexdump_str(dest_offset, region_len)) tb.log.info("%s", mem.hexdump_str(dest_offset, region_len))
assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len] assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len]
tb.log.info("Test RX completion buffer (CPLH, 8)")
tb.rc.split_on_all_rcb = True
size = 8
stride = size
for count in range(32, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000)
tb.log.info("Test RX completion buffer (CPLH, 8+64)")
size = 8+64
stride = 0
for count in range(8, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000)
tb.log.info("Test RX completion buffer (CPLH, 8+128+8)")
size = 8+128+8
stride = 0
for count in range(8, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000)
tb.rc.split_on_all_rcb = False
tb.log.info("Test RX completion buffer (CPLD)")
size = 512
stride = size
for count in range(8, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 4000)
tb.log.info("Perform block reads")
count = 100
for size in [2**x for x in range(14)]:
stride = size
await dma_block_read_bench(tb, dev, mem_base, region_len-1, size, stride, count)
tb.log.info("Perform block writes")
count = 100
for size in [2**x for x in range(14)]:
stride = size
await dma_block_write_bench(tb, dev, mem_base, region_len-1, size, stride, count)
await RisingEdge(dut.clk) await RisingEdge(dut.clk)
await RisingEdge(dut.clk) await RisingEdge(dut.clk)
@ -566,8 +731,8 @@ def test_example_core_pcie_us(request, axis_pcie_data_width, straddle):
parameters['IMM_WIDTH'] = 32 parameters['IMM_WIDTH'] = 32
parameters['READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT'] parameters['READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT']
parameters['READ_TX_LIMIT'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1) parameters['READ_TX_LIMIT'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1)
parameters['READ_CPLH_FC_LIMIT'] = 64 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 128 parameters['READ_CPLH_FC_LIMIT'] = 64 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 256
parameters['READ_CPLD_FC_LIMIT'] = 992 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 2048 parameters['READ_CPLD_FC_LIMIT'] = 1024-64 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 2048-256
parameters['WRITE_OP_TABLE_SIZE'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1) parameters['WRITE_OP_TABLE_SIZE'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1)
parameters['WRITE_TX_LIMIT'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1) parameters['WRITE_TX_LIMIT'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1)
parameters['BAR0_APERTURE'] = 24 parameters['BAR0_APERTURE'] = 24

View File

@ -56,7 +56,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 512; parameter AXIS_PCIE_DATA_WIDTH = 512;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32); parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161; parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137; parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183; parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81; parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256; parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -161,8 +161,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT), .PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT), .READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128), .READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048), .READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)), .WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE), .BAR0_APERTURE(BAR0_APERTURE),
@ -265,8 +265,7 @@ example_core_pcie_us_inst (
*/ */
.cfg_max_read_req(cfg_max_read_req), .cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload), .cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status), .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
/* /*
* Status * Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161) export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183) export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81) export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus) ifeq ($(SIM), icarus)
PLUSARGS += -fst PLUSARGS += -fst

View File

@ -396,7 +396,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161 parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183 parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81 parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -422,13 +422,13 @@ reg [OP_TAG_WIDTH+1-1:0] active_op_count_reg = 0;
reg inc_active_op; reg inc_active_op;
reg dec_active_op; reg dec_active_op;
reg [CL_CPLH_FC_LIMIT+1-1:0] active_cplh_fc_count_reg = 0; reg [CL_CPLH_FC_LIMIT+1-1:0] active_cplh_fc_count_reg = 0, active_cplh_fc_count_next;
reg active_cplh_fc_av_reg = 1'b1; reg active_cplh_fc_av_reg = 1'b1, active_cplh_fc_av_next;
reg [6:0] inc_active_cplh_fc_count; reg [6:0] inc_active_cplh_fc_count;
reg [6:0] dec_active_cplh_fc_count; reg [6:0] dec_active_cplh_fc_count;
reg [CL_CPLD_FC_LIMIT+1-1:0] active_cpld_fc_count_reg = 0; reg [CL_CPLD_FC_LIMIT+1-1:0] active_cpld_fc_count_reg = 0, active_cpld_fc_count_next;
reg active_cpld_fc_av_reg = 1'b1; reg active_cpld_fc_av_reg = 1'b1, active_cpld_fc_av_next;
reg [8:0] inc_active_cpld_fc_count; reg [8:0] inc_active_cpld_fc_count;
reg [8:0] dec_active_cpld_fc_count; reg [8:0] dec_active_cpld_fc_count;
@ -1382,6 +1382,12 @@ always @* begin
end end
active_tx_count_av_next = active_tx_count_next < TX_LIMIT; active_tx_count_av_next = active_tx_count_next < TX_LIMIT;
active_cplh_fc_count_next <= active_cplh_fc_count_reg + inc_active_cplh_fc_count - dec_active_cplh_fc_count;
active_cplh_fc_av_next <= !CPLH_FC_LIMIT || active_cplh_fc_count_next < CPLH_FC_LIMIT;
active_cpld_fc_count_next <= active_cpld_fc_count_reg + inc_active_cpld_fc_count - dec_active_cpld_fc_count;
active_cpld_fc_av_next <= !CPLD_FC_LIMIT || active_cpld_fc_count_next < CPLD_FC_LIMIT;
end end
always @(posedge clk) begin always @(posedge clk) begin
@ -1501,11 +1507,11 @@ always @(posedge clk) begin
active_tag_count_reg <= active_tag_count_reg + inc_active_tag - dec_active_tag; active_tag_count_reg <= active_tag_count_reg + inc_active_tag - dec_active_tag;
active_op_count_reg <= active_op_count_reg + inc_active_op - dec_active_op; active_op_count_reg <= active_op_count_reg + inc_active_op - dec_active_op;
active_cplh_fc_count_reg <= active_cplh_fc_count_reg + inc_active_cplh_fc_count - dec_active_cplh_fc_count; active_cplh_fc_count_reg <= active_cplh_fc_count_next;
active_cplh_fc_av_reg <= !CPLH_FC_LIMIT || active_cplh_fc_count_reg < CPLH_FC_LIMIT; active_cplh_fc_av_reg <= active_cplh_fc_av_next;
active_cpld_fc_count_reg <= active_cpld_fc_count_reg + inc_active_cpld_fc_count - dec_active_cpld_fc_count; active_cpld_fc_count_reg <= active_cpld_fc_count_next;
active_cpld_fc_av_reg <= !CPLD_FC_LIMIT || active_cpld_fc_count_reg < CPLD_FC_LIMIT; active_cpld_fc_av_reg <= active_cpld_fc_av_next;
pcie_tag_table_start_ptr_reg <= pcie_tag_table_start_ptr_next; pcie_tag_table_start_ptr_reg <= pcie_tag_table_start_ptr_next;
pcie_tag_table_start_ram_sel_reg <= pcie_tag_table_start_ram_sel_next; pcie_tag_table_start_ram_sel_reg <= pcie_tag_table_start_ram_sel_next;

View File

@ -347,7 +347,7 @@ always @* begin
// compute mux settings // compute mux settings
for (port = 0; port < PORTS; port = port + 1) begin for (port = 0; port < PORTS; port = port + 1) begin
port_seg_valid[port] = pause[port] ? 0 : {2{fifo_ctrl_tlp_valid[port]}} >> fifo_ctrl_seg_offset[port]; port_seg_valid[port] = {2{fifo_ctrl_tlp_valid[port]}} >> fifo_ctrl_seg_offset[port];
port_seg_eop[port] = {2{fifo_ctrl_tlp_eop[port]}} >> fifo_ctrl_seg_offset[port]; port_seg_eop[port] = {2{fifo_ctrl_tlp_eop[port]}} >> fifo_ctrl_seg_offset[port];
end end
@ -383,7 +383,7 @@ always @* begin
port_cyc = cur_port; port_cyc = cur_port;
seg_offset_cyc = port_seg_offset_cyc[cur_port]; seg_offset_cyc = port_seg_offset_cyc[cur_port];
seg_count_cyc = port_seg_count_cyc[cur_port]; seg_count_cyc = port_seg_count_cyc[cur_port];
if (port_seg_valid[cur_port][0]) begin if (!pause[cur_port] && port_seg_valid[cur_port][0]) begin
// set frame // set frame
frame_cyc = 1; frame_cyc = 1;
sel_tlp_seq_valid_cyc[OUT_TLP_SEG_COUNT*cur_port+seg] = 1'b1; sel_tlp_seq_valid_cyc[OUT_TLP_SEG_COUNT*cur_port+seg] = 1'b1;

View File

@ -36,10 +36,7 @@ export PARAM_IRQ_INDEX_WIDTH := 11
export PARAM_AXIL_DATA_WIDTH := 32 export PARAM_AXIL_DATA_WIDTH := 32
export PARAM_AXIL_ADDR_WIDTH := $(shell expr $(PARAM_IRQ_INDEX_WIDTH) + 5 ) export PARAM_AXIL_ADDR_WIDTH := $(shell expr $(PARAM_IRQ_INDEX_WIDTH) + 5 )
export PARAM_AXIL_STRB_WIDTH := $(shell expr $(PARAM_AXIL_DATA_WIDTH) / 8 ) export PARAM_AXIL_STRB_WIDTH := $(shell expr $(PARAM_AXIL_DATA_WIDTH) / 8 )
export PARAM_TLP_DATA_WIDTH := 64
export PARAM_TLP_STRB_WIDTH := $(shell expr $(PARAM_TLP_DATA_WIDTH) / 32 )
export PARAM_TLP_HDR_WIDTH := 128 export PARAM_TLP_HDR_WIDTH := 128
export PARAM_TLP_SEG_COUNT := 1
export PARAM_TLP_FORCE_64_BIT_ADDR := 0 export PARAM_TLP_FORCE_64_BIT_ADDR := 0
ifeq ($(SIM), icarus) ifeq ($(SIM), icarus)

View File

@ -319,8 +319,7 @@ rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl'))
@pytest.mark.parametrize("axil_data_width", [32, 64]) @pytest.mark.parametrize("axil_data_width", [32, 64])
@pytest.mark.parametrize("pcie_data_width", [64, 128]) def test_pcie_msix(request, axil_data_width):
def test_pcie_msix(request, pcie_data_width, axil_data_width):
dut = "pcie_msix" dut = "pcie_msix"
module = os.path.splitext(os.path.basename(__file__))[0] module = os.path.splitext(os.path.basename(__file__))[0]
toplevel = dut toplevel = dut
@ -335,10 +334,7 @@ def test_pcie_msix(request, pcie_data_width, axil_data_width):
parameters['AXIL_DATA_WIDTH'] = axil_data_width parameters['AXIL_DATA_WIDTH'] = axil_data_width
parameters['AXIL_ADDR_WIDTH'] = parameters['IRQ_INDEX_WIDTH']+5 parameters['AXIL_ADDR_WIDTH'] = parameters['IRQ_INDEX_WIDTH']+5
parameters['AXIL_STRB_WIDTH'] = (axil_data_width // 8) parameters['AXIL_STRB_WIDTH'] = (axil_data_width // 8)
parameters['TLP_DATA_WIDTH'] = pcie_data_width
parameters['TLP_STRB_WIDTH'] = pcie_data_width // 32
parameters['TLP_HDR_WIDTH'] = 128 parameters['TLP_HDR_WIDTH'] = 128
parameters['TLP_SEG_COUNT'] = 1
parameters['TLP_FORCE_64_BIT_ADDR'] = 0 parameters['TLP_FORCE_64_BIT_ADDR'] = 0
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -270,9 +270,6 @@ def test_pcie_us_axi_dma_wr(request, axis_pcie_data_width, pcie_offset):
parameters['AXI_ID_WIDTH'] = 8 parameters['AXI_ID_WIDTH'] = 8
parameters['AXI_MAX_BURST_LEN'] = 256 parameters['AXI_MAX_BURST_LEN'] = 256
parameters['PCIE_ADDR_WIDTH'] = 64 parameters['PCIE_ADDR_WIDTH'] = 64
parameters['PCIE_TAG_COUNT'] = 64 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 256
parameters['PCIE_TAG_WIDTH'] = (parameters['PCIE_TAG_COUNT']-1).bit_length()
parameters['PCIE_EXT_TAG_ENABLE'] = int(parameters['PCIE_TAG_COUNT'] > 32)
parameters['LEN_WIDTH'] = 20 parameters['LEN_WIDTH'] = 20
parameters['TAG_WIDTH'] = 8 parameters['TAG_WIDTH'] = 8
parameters['OP_TABLE_SIZE'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1) parameters['OP_TABLE_SIZE'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1)

View File

@ -18,7 +18,7 @@ deps =
cocotb-bus == 0.2.1 cocotb-bus == 0.2.1
cocotb-test == 0.2.4 cocotb-test == 0.2.4
cocotbext-axi == 0.1.24 cocotbext-axi == 0.1.24
cocotbext-pcie == 0.2.12 cocotbext-pcie == 0.2.14
jinja2 == 3.1.2 jinja2 == 3.1.2
commands = commands =