1
0
mirror of https://github.com/corundum/corundum.git synced 2025-01-16 08:12:53 +08:00

merged changes in pcie

This commit is contained in:
Alex Forencich 2023-06-23 22:49:05 -07:00
commit 045b0c1c68
65 changed files with 1259 additions and 511 deletions

View File

@ -54,7 +54,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 512;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -159,8 +159,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128),
.READ_CPLD_FC_LIMIT(2048),
.READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE),
@ -263,8 +263,7 @@ example_core_pcie_us_inst (
*/
.cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
.cfg_rcb_status(cfg_rcb_status),
/*
* Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus)
PLUSARGS += -fst

View File

@ -396,7 +396,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -53,7 +53,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 512;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -156,8 +156,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128),
.READ_CPLD_FC_LIMIT(2048),
.READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE),
@ -260,8 +260,7 @@ example_core_pcie_us_inst (
*/
.cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
.cfg_rcb_status(cfg_rcb_status),
/*
* Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus)
PLUSARGS += -fst

View File

@ -398,7 +398,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -53,7 +53,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 512;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -156,8 +156,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128),
.READ_CPLD_FC_LIMIT(2048),
.READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE),
@ -260,8 +260,7 @@ example_core_pcie_us_inst (
*/
.cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
.cfg_rcb_status(cfg_rcb_status),
/*
* Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus)
PLUSARGS += -fst

View File

@ -398,7 +398,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -52,7 +52,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 512;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -148,8 +148,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128),
.READ_CPLD_FC_LIMIT(2048),
.READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE),
@ -252,8 +252,7 @@ example_core_pcie_us_inst (
*/
.cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
.cfg_rcb_status(cfg_rcb_status),
/*
* Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus)
PLUSARGS += -fst

View File

@ -396,7 +396,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -55,7 +55,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 512;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -159,8 +159,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128),
.READ_CPLD_FC_LIMIT(2048),
.READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE),
@ -263,8 +263,7 @@ example_core_pcie_us_inst (
*/
.cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
.cfg_rcb_status(cfg_rcb_status),
/*
* Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus)
PLUSARGS += -fst

View File

@ -396,7 +396,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -155,7 +155,7 @@ example_core_pcie_us #(
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(64),
.READ_CPLD_FC_LIMIT(992),
.READ_CPLD_FC_LIMIT(1024-64),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE),
@ -258,8 +258,7 @@ example_core_pcie_us_inst (
*/
.cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US PCIe HIP
.cfg_rcb_status(cfg_rcb_status),
/*
* Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := 60
export PARAM_AXIS_PCIE_RC_USER_WIDTH := 75
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := 85
export PARAM_AXIS_PCIE_CC_USER_WIDTH := 33
export PARAM_RQ_SEQ_NUM_WIDTH := 4
ifeq ($(SIM), icarus)
PLUSARGS += -fst

View File

@ -370,7 +370,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 85
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33
parameters['RQ_SEQ_NUM_WIDTH'] = 4
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -54,7 +54,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 256;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -159,8 +159,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128),
.READ_CPLD_FC_LIMIT(2048),
.READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE),
@ -263,8 +263,7 @@ example_core_pcie_us_inst (
*/
.cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
.cfg_rcb_status(cfg_rcb_status),
/*
* Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus)
PLUSARGS += -fst

View File

@ -396,7 +396,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -157,7 +157,7 @@ example_core_pcie_us #(
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(64),
.READ_CPLD_FC_LIMIT(992),
.READ_CPLD_FC_LIMIT(1024-64),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE),
@ -260,8 +260,7 @@ example_core_pcie_us_inst (
*/
.cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US PCIe HIP
.cfg_rcb_status(cfg_rcb_status),
/*
* Status

View File

@ -57,6 +57,10 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 256;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = 75;
parameter AXIS_PCIE_RQ_USER_WIDTH = 60;
parameter AXIS_PCIE_CQ_USER_WIDTH = 85;
parameter AXIS_PCIE_CC_USER_WIDTH = 33;
// Clock and reset
wire pcie_user_clk;
@ -107,33 +111,33 @@ ibufds_gte3_pcie_mgt_refclk_inst (
.ODIV2 (pcie_sys_clk)
);
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_rq_tdata;
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_rq_tkeep;
wire axis_rq_tlast;
wire axis_rq_tready;
wire [59:0] axis_rq_tuser;
wire axis_rq_tvalid;
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_rq_tdata;
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_rq_tkeep;
wire axis_rq_tlast;
wire axis_rq_tready;
wire [AXIS_PCIE_RQ_USER_WIDTH-1:0] axis_rq_tuser;
wire axis_rq_tvalid;
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_rc_tdata;
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_rc_tkeep;
wire axis_rc_tlast;
wire axis_rc_tready;
wire [74:0] axis_rc_tuser;
wire axis_rc_tvalid;
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_rc_tdata;
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_rc_tkeep;
wire axis_rc_tlast;
wire axis_rc_tready;
wire [AXIS_PCIE_RC_USER_WIDTH-1:0] axis_rc_tuser;
wire axis_rc_tvalid;
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_cq_tdata;
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_cq_tkeep;
wire axis_cq_tlast;
wire axis_cq_tready;
wire [84:0] axis_cq_tuser;
wire axis_cq_tvalid;
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_cq_tdata;
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_cq_tkeep;
wire axis_cq_tlast;
wire axis_cq_tready;
wire [AXIS_PCIE_CQ_USER_WIDTH-1:0] axis_cq_tuser;
wire axis_cq_tvalid;
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_cc_tdata;
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_cc_tkeep;
wire axis_cc_tlast;
wire axis_cc_tready;
wire [32:0] axis_cc_tuser;
wire axis_cc_tvalid;
wire [AXIS_PCIE_DATA_WIDTH-1:0] axis_cc_tdata;
wire [AXIS_PCIE_KEEP_WIDTH-1:0] axis_cc_tkeep;
wire axis_cc_tlast;
wire axis_cc_tready;
wire [AXIS_PCIE_CC_USER_WIDTH-1:0] axis_cc_tuser;
wire axis_cc_tvalid;
// ila_0 rq_ila (
// .clk(pcie_user_clk),
@ -357,7 +361,12 @@ pcie3_ultrascale_inst (
);
fpga_core #(
.AXIS_PCIE_DATA_WIDTH(AXIS_PCIE_DATA_WIDTH)
.AXIS_PCIE_DATA_WIDTH(AXIS_PCIE_DATA_WIDTH),
.AXIS_PCIE_KEEP_WIDTH(AXIS_PCIE_KEEP_WIDTH),
.AXIS_PCIE_RC_USER_WIDTH(AXIS_PCIE_RC_USER_WIDTH),
.AXIS_PCIE_RQ_USER_WIDTH(AXIS_PCIE_RQ_USER_WIDTH),
.AXIS_PCIE_CQ_USER_WIDTH(AXIS_PCIE_CQ_USER_WIDTH),
.AXIS_PCIE_CC_USER_WIDTH(AXIS_PCIE_CC_USER_WIDTH)
)
core_inst (
/*

View File

@ -34,89 +34,93 @@ THE SOFTWARE.
module fpga_core #
(
parameter AXIS_PCIE_DATA_WIDTH = 256,
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32)
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32),
parameter AXIS_PCIE_RC_USER_WIDTH = 75,
parameter AXIS_PCIE_RQ_USER_WIDTH = 60,
parameter AXIS_PCIE_CQ_USER_WIDTH = 85,
parameter AXIS_PCIE_CC_USER_WIDTH = 33
)
(
/*
* Clock: 250 MHz
* Synchronous reset
*/
input wire clk,
input wire rst,
input wire clk,
input wire rst,
/*
* GPIO
*/
input wire btnu,
input wire btnl,
input wire btnd,
input wire btnr,
input wire btnc,
input wire [3:0] sw,
output wire [7:0] led,
input wire btnu,
input wire btnl,
input wire btnd,
input wire btnr,
input wire btnc,
input wire [3:0] sw,
output wire [7:0] led,
/*
* PCIe
*/
output wire [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_rq_tdata,
output wire [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_rq_tkeep,
output wire m_axis_rq_tlast,
input wire m_axis_rq_tready,
output wire [59:0] m_axis_rq_tuser,
output wire m_axis_rq_tvalid,
output wire [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_rq_tdata,
output wire [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_rq_tkeep,
output wire m_axis_rq_tlast,
input wire m_axis_rq_tready,
output wire [AXIS_PCIE_RQ_USER_WIDTH-1:0] m_axis_rq_tuser,
output wire m_axis_rq_tvalid,
input wire [AXIS_PCIE_DATA_WIDTH-1:0] s_axis_rc_tdata,
input wire [AXIS_PCIE_KEEP_WIDTH-1:0] s_axis_rc_tkeep,
input wire s_axis_rc_tlast,
output wire s_axis_rc_tready,
input wire [74:0] s_axis_rc_tuser,
input wire s_axis_rc_tvalid,
input wire [AXIS_PCIE_DATA_WIDTH-1:0] s_axis_rc_tdata,
input wire [AXIS_PCIE_KEEP_WIDTH-1:0] s_axis_rc_tkeep,
input wire s_axis_rc_tlast,
output wire s_axis_rc_tready,
input wire [AXIS_PCIE_RC_USER_WIDTH-1:0] s_axis_rc_tuser,
input wire s_axis_rc_tvalid,
input wire [AXIS_PCIE_DATA_WIDTH-1:0] s_axis_cq_tdata,
input wire [AXIS_PCIE_KEEP_WIDTH-1:0] s_axis_cq_tkeep,
input wire s_axis_cq_tlast,
output wire s_axis_cq_tready,
input wire [84:0] s_axis_cq_tuser,
input wire s_axis_cq_tvalid,
input wire [AXIS_PCIE_DATA_WIDTH-1:0] s_axis_cq_tdata,
input wire [AXIS_PCIE_KEEP_WIDTH-1:0] s_axis_cq_tkeep,
input wire s_axis_cq_tlast,
output wire s_axis_cq_tready,
input wire [AXIS_PCIE_CQ_USER_WIDTH-1:0] s_axis_cq_tuser,
input wire s_axis_cq_tvalid,
output wire [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_cc_tdata,
output wire [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_cc_tkeep,
output wire m_axis_cc_tlast,
input wire m_axis_cc_tready,
output wire [32:0] m_axis_cc_tuser,
output wire m_axis_cc_tvalid,
output wire [AXIS_PCIE_DATA_WIDTH-1:0] m_axis_cc_tdata,
output wire [AXIS_PCIE_KEEP_WIDTH-1:0] m_axis_cc_tkeep,
output wire m_axis_cc_tlast,
input wire m_axis_cc_tready,
output wire [AXIS_PCIE_CC_USER_WIDTH-1:0] m_axis_cc_tuser,
output wire m_axis_cc_tvalid,
input wire [2:0] cfg_max_payload,
input wire [2:0] cfg_max_read_req,
input wire [2:0] cfg_max_payload,
input wire [2:0] cfg_max_read_req,
output wire [18:0] cfg_mgmt_addr,
output wire cfg_mgmt_write,
output wire [31:0] cfg_mgmt_write_data,
output wire [3:0] cfg_mgmt_byte_enable,
output wire cfg_mgmt_read,
input wire [31:0] cfg_mgmt_read_data,
input wire cfg_mgmt_read_write_done,
output wire [18:0] cfg_mgmt_addr,
output wire cfg_mgmt_write,
output wire [31:0] cfg_mgmt_write_data,
output wire [3:0] cfg_mgmt_byte_enable,
output wire cfg_mgmt_read,
input wire [31:0] cfg_mgmt_read_data,
input wire cfg_mgmt_read_write_done,
input wire [3:0] cfg_interrupt_msi_enable,
input wire [7:0] cfg_interrupt_msi_vf_enable,
input wire [11:0] cfg_interrupt_msi_mmenable,
input wire cfg_interrupt_msi_mask_update,
input wire [31:0] cfg_interrupt_msi_data,
output wire [3:0] cfg_interrupt_msi_select,
output wire [31:0] cfg_interrupt_msi_int,
output wire [31:0] cfg_interrupt_msi_pending_status,
output wire cfg_interrupt_msi_pending_status_data_enable,
output wire [3:0] cfg_interrupt_msi_pending_status_function_num,
input wire cfg_interrupt_msi_sent,
input wire cfg_interrupt_msi_fail,
output wire [2:0] cfg_interrupt_msi_attr,
output wire cfg_interrupt_msi_tph_present,
output wire [1:0] cfg_interrupt_msi_tph_type,
output wire [8:0] cfg_interrupt_msi_tph_st_tag,
output wire [3:0] cfg_interrupt_msi_function_number,
input wire [3:0] cfg_interrupt_msi_enable,
input wire [7:0] cfg_interrupt_msi_vf_enable,
input wire [11:0] cfg_interrupt_msi_mmenable,
input wire cfg_interrupt_msi_mask_update,
input wire [31:0] cfg_interrupt_msi_data,
output wire [3:0] cfg_interrupt_msi_select,
output wire [31:0] cfg_interrupt_msi_int,
output wire [31:0] cfg_interrupt_msi_pending_status,
output wire cfg_interrupt_msi_pending_status_data_enable,
output wire [3:0] cfg_interrupt_msi_pending_status_function_num,
input wire cfg_interrupt_msi_sent,
input wire cfg_interrupt_msi_fail,
output wire [2:0] cfg_interrupt_msi_attr,
output wire cfg_interrupt_msi_tph_present,
output wire [1:0] cfg_interrupt_msi_tph_type,
output wire [8:0] cfg_interrupt_msi_tph_st_tag,
output wire [3:0] cfg_interrupt_msi_function_number,
output wire status_error_cor,
output wire status_error_uncor
output wire status_error_cor,
output wire status_error_uncor
);
parameter PCIE_ADDR_WIDTH = 64;

View File

@ -377,7 +377,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 85
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33
parameters['RQ_SEQ_NUM_WIDTH'] = 4
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -58,7 +58,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 512;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -161,8 +161,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128),
.READ_CPLD_FC_LIMIT(2048),
.READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE),
@ -265,8 +265,7 @@ example_core_pcie_us_inst (
*/
.cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
.cfg_rcb_status(cfg_rcb_status),
/*
* Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus)
PLUSARGS += -fst

View File

@ -403,7 +403,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -53,7 +53,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 512;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -156,8 +156,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128),
.READ_CPLD_FC_LIMIT(2048),
.READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE),
@ -260,8 +260,7 @@ example_core_pcie_us_inst (
*/
.cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
.cfg_rcb_status(cfg_rcb_status),
/*
* Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus)
PLUSARGS += -fst

View File

@ -398,7 +398,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -58,7 +58,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 128;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -161,8 +161,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128),
.READ_CPLD_FC_LIMIT(2048),
.READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE),
@ -265,8 +265,7 @@ example_core_pcie_us_inst (
*/
.cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
.cfg_rcb_status(cfg_rcb_status),
/*
* Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus)
PLUSARGS += -fst

View File

@ -403,7 +403,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -103,6 +103,8 @@ static void dma_block_read(struct example_dev *edev,
if ((ioread32(edev->bar[0] + 0x001000) & 1) != 0)
dev_warn(edev->dev, "%s: operation timed out", __func__);
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
dev_warn(edev->dev, "%s: DMA engine busy", __func__);
}
static void dma_block_write(struct example_dev *edev,
@ -157,15 +159,22 @@ static void dma_block_write(struct example_dev *edev,
if ((ioread32(edev->bar[0] + 0x001100) & 1) != 0)
dev_warn(edev->dev, "%s: operation timed out", __func__);
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
dev_warn(edev->dev, "%s: DMA engine busy", __func__);
}
static void dma_block_read_bench(struct example_dev *edev,
dma_addr_t dma_addr, u64 size, u64 stride, u64 count)
{
u64 cycles;
u32 rd_req;
u32 rd_cpl;
udelay(5);
rd_req = ioread32(edev->bar[0] + 0x000020);
rd_cpl = ioread32(edev->bar[0] + 0x000024);
dma_block_read(edev, dma_addr, 0, 0x3fff, stride,
0, 0, 0x3fff, stride, size, count);
@ -173,17 +182,23 @@ static void dma_block_read_bench(struct example_dev *edev,
udelay(5);
dev_info(edev->dev, "read %lld blocks of %lld bytes (stride %lld) in %lld ns: %lld Mbps",
count, size, stride, cycles * 4, size * count * 8 * 1000 / (cycles * 4));
rd_req = ioread32(edev->bar[0] + 0x000020) - rd_req;
rd_cpl = ioread32(edev->bar[0] + 0x000024) - rd_cpl;
dev_info(edev->dev, "read %lld blocks of %lld bytes (total %lld B, stride %lld) in %lld ns (%d req %d cpl): %lld Mbps",
count, size, count*size, stride, cycles * 4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4));
}
static void dma_block_write_bench(struct example_dev *edev,
dma_addr_t dma_addr, u64 size, u64 stride, u64 count)
{
u64 cycles;
u32 wr_req;
udelay(5);
wr_req = ioread32(edev->bar[0] + 0x000028);
dma_block_write(edev, dma_addr, 0, 0x3fff, stride,
0, 0, 0x3fff, stride, size, count);
@ -191,8 +206,83 @@ static void dma_block_write_bench(struct example_dev *edev,
udelay(5);
dev_info(edev->dev, "wrote %lld blocks of %lld bytes (stride %lld) in %lld ns: %lld Mbps",
count, size, stride, cycles * 4, size * count * 8 * 1000 / (cycles * 4));
wr_req = ioread32(edev->bar[0] + 0x000028) - wr_req;
dev_info(edev->dev, "wrote %lld blocks of %lld bytes (total %lld B, stride %lld) in %lld ns (%d req): %lld Mbps",
count, size, count*size, stride, cycles * 4, wr_req, size * count * 8 * 1000 / (cycles * 4));
}
static void dma_cpl_buf_test(struct example_dev *edev, dma_addr_t dma_addr,
u64 size, u64 stride, u64 count, int stall)
{
unsigned long t;
u64 cycles;
u32 rd_req;
u32 rd_cpl;
rd_req = ioread32(edev->bar[0] + 0x000020);
rd_cpl = ioread32(edev->bar[0] + 0x000024);
// DMA base address
iowrite32(dma_addr & 0xffffffff, edev->bar[0] + 0x001080);
iowrite32((dma_addr >> 32) & 0xffffffff, edev->bar[0] + 0x001084);
// DMA offset address
iowrite32(0, edev->bar[0] + 0x001088);
iowrite32(0, edev->bar[0] + 0x00108c);
// DMA offset mask
iowrite32(0x3fff, edev->bar[0] + 0x001090);
iowrite32(0, edev->bar[0] + 0x001094);
// DMA stride
iowrite32(stride & 0xffffffff, edev->bar[0] + 0x001098);
iowrite32((stride >> 32) & 0xffffffff, edev->bar[0] + 0x00109c);
// RAM base address
iowrite32(0, edev->bar[0] + 0x0010c0);
iowrite32(0, edev->bar[0] + 0x0010c4);
// RAM offset address
iowrite32(0, edev->bar[0] + 0x0010c8);
iowrite32(0, edev->bar[0] + 0x0010cc);
// RAM offset mask
iowrite32(0x3fff, edev->bar[0] + 0x0010d0);
iowrite32(0, edev->bar[0] + 0x0010d4);
// RAM stride
iowrite32(stride & 0xffffffff, edev->bar[0] + 0x0010d8);
iowrite32((stride >> 32) & 0xffffffff, edev->bar[0] + 0x0010dc);
// clear cycle count
iowrite32(0, edev->bar[0] + 0x001008);
iowrite32(0, edev->bar[0] + 0x00100c);
// block length
iowrite32(size, edev->bar[0] + 0x001010);
// block count
iowrite32(count, edev->bar[0] + 0x001018);
if (stall)
iowrite32(stall, edev->bar[0] + 0x000040);
// start
iowrite32(1, edev->bar[0] + 0x001000);
if (stall)
msleep(10);
// wait for transfer to complete
t = jiffies + msecs_to_jiffies(20000);
while (time_before(jiffies, t)) {
if ((ioread32(edev->bar[0] + 0x001000) & 1) == 0)
break;
}
if ((ioread32(edev->bar[0] + 0x001000) & 1) != 0)
dev_warn(edev->dev, "%s: operation timed out", __func__);
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
dev_warn(edev->dev, "%s: DMA engine busy", __func__);
cycles = ioread32(edev->bar[0] + 0x001008);
rd_req = ioread32(edev->bar[0] + 0x000020) - rd_req;
rd_cpl = ioread32(edev->bar[0] + 0x000024) - rd_cpl;
dev_info(edev->dev, "read %lld x %lld B (total %lld B %lld CPLD, stride %lld) in %lld ns (%d req %d cpl): %lld Mbps",
count, size, count*size, count*((size+15) / 16), stride, cycles * 4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4));
}
static irqreturn_t edev_intr(int irq, void *data)
@ -227,16 +317,20 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (pdev->pcie_cap) {
u16 devctl;
u32 lnkcap;
u16 lnkctl;
u16 lnksta;
pci_read_config_word(pdev, pdev->pcie_cap + PCI_EXP_DEVCTL, &devctl);
pci_read_config_dword(pdev, pdev->pcie_cap + PCI_EXP_LNKCAP, &lnkcap);
pci_read_config_word(pdev, pdev->pcie_cap + PCI_EXP_LNKCTL, &lnkctl);
pci_read_config_word(pdev, pdev->pcie_cap + PCI_EXP_LNKSTA, &lnksta);
dev_info(dev, " Max payload size: %d bytes",
128 << ((devctl & PCI_EXP_DEVCTL_PAYLOAD) >> 5));
dev_info(dev, " Max read request size: %d bytes",
128 << ((devctl & PCI_EXP_DEVCTL_READRQ) >> 12));
dev_info(dev, " Read completion boundary: %d bytes",
lnkctl & PCI_EXP_LNKCTL_RCB ? 128 : 64);
dev_info(dev, " Link capability: gen %d x%d",
lnkcap & PCI_EXP_LNKCAP_SLS, (lnkcap & PCI_EXP_LNKCAP_MLW) >> 4);
dev_info(dev, " Link status: gen %d x%d",
@ -361,6 +455,7 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
msleep(1);
dev_info(dev, "Read status");
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000000));
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000118));
dev_info(dev, "start copy to host");
@ -374,6 +469,7 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
msleep(1);
dev_info(dev, "Read status");
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000000));
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000218));
dev_info(dev, "read test data");
@ -398,6 +494,7 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
msleep(1);
dev_info(dev, "Read status");
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000000));
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000218));
dev_info(dev, "read data");
@ -407,31 +504,90 @@ static int edev_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
if (!mismatch) {
u64 size;
u64 stride;
u64 count;
dev_info(dev, "disable interrupts");
iowrite32(0x0, edev->bar[0] + 0x000008);
dev_info(dev, "test RX completion buffer (CPLH, 8)");
size = 8;
stride = size;
for (count = 32; count <= 256; count += 8) {
dma_cpl_buf_test(edev,
edev->dma_region_addr + 0x0000,
size, stride, count, 100000);
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
goto out;
}
dev_info(dev, "test RX completion buffer (CPLH, unaligned 8+64)");
size = 8+64;
stride = 0;
for (count = 8; count <= 256; count += 8) {
dma_cpl_buf_test(edev,
edev->dma_region_addr + 128 - 8,
size, stride, count, 400000);
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
goto out;
}
dev_info(dev, "test RX completion buffer (CPLH, unaligned 8+128+8)");
size = 8+128+8;
stride = 0;
for (count = 8; count <= 256; count += 8) {
dma_cpl_buf_test(edev,
edev->dma_region_addr + 128 - 8,
size, stride, count, 100000);
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
goto out;
}
dev_info(dev, "test RX completion buffer (CPLD)");
size = 512;
stride = size;
for (count = 8; count <= 256; count += 8) {
dma_cpl_buf_test(edev,
edev->dma_region_addr + 0x0000,
size, stride, count, 100000);
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
goto out;
}
dev_info(dev, "perform block reads (dma_alloc_coherent)");
count = 10000;
for (size = 1; size <= 8192; size *= 2) {
for (stride = size; stride <= max(size, 256llu); stride *= 2) {
dma_block_read_bench(edev,
edev->dma_region_addr + 0x0000,
size, stride, 10000);
size, stride, count);
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
goto out;
}
}
dev_info(dev, "perform block writes (dma_alloc_coherent)");
count = 10000;
for (size = 1; size <= 8192; size *= 2) {
for (stride = size; stride <= max(size, 256llu); stride *= 2) {
dma_block_write_bench(edev,
edev->dma_region_addr + 0x0000,
size, stride, 10000);
size, stride, count);
if ((ioread32(edev->bar[0] + 0x000000) & 0x300) != 0)
goto out;
}
}
}
out:
dev_info(dev, "Read status");
dev_info(dev, "%08x", ioread32(edev->bar[0] + 0x000000));
// probe complete
return 0;

View File

@ -152,7 +152,18 @@ module example_core #
*/
output wire [IRQ_INDEX_WIDTH-1:0] irq_index,
output wire irq_valid,
input wire irq_ready
input wire irq_ready,
/*
* Control and status
*/
output wire dma_enable,
input wire dma_rd_busy,
input wire dma_wr_busy,
input wire dma_rd_req,
input wire dma_rd_cpl,
input wire dma_wr_req,
output wire rx_cpl_stall
);
localparam RAM_ADDR_IMM_WIDTH = (DMA_IMM_ENABLE && (DMA_IMM_WIDTH > RAM_ADDR_WIDTH)) ? DMA_IMM_WIDTH : RAM_ADDR_WIDTH;
@ -203,6 +214,9 @@ reg axil_ctrl_rvalid_reg = 1'b0, axil_ctrl_rvalid_next;
reg [63:0] cycle_count_reg = 0;
reg [15:0] dma_read_active_count_reg = 0;
reg [15:0] dma_write_active_count_reg = 0;
reg [31:0] dma_rd_req_count_reg = 0;
reg [31:0] dma_rd_cpl_count_reg = 0;
reg [31:0] dma_wr_req_count_reg = 0;
reg [DMA_ADDR_WIDTH-1:0] dma_read_desc_dma_addr_reg = 0, dma_read_desc_dma_addr_next;
reg [RAM_ADDR_WIDTH-1:0] dma_read_desc_ram_addr_reg = 0, dma_read_desc_ram_addr_next;
@ -230,6 +244,9 @@ reg dma_rd_int_en_reg = 0, dma_rd_int_en_next;
reg dma_wr_int_en_reg = 0, dma_wr_int_en_next;
reg irq_valid_reg = 1'b0, irq_valid_next;
reg rx_cpl_stall_reg = 1'b0, rx_cpl_stall_next;
reg [23:0] rx_cpl_stall_count_reg = 0, rx_cpl_stall_count_next;
reg dma_read_block_run_reg = 1'b0, dma_read_block_run_next;
reg [DMA_LEN_WIDTH-1:0] dma_read_block_len_reg = 0, dma_read_block_len_next;
reg [31:0] dma_read_block_count_reg = 0, dma_read_block_count_next;
@ -284,6 +301,9 @@ assign m_axis_dma_write_desc_valid = dma_write_desc_valid_reg;
assign irq_index = 0;
assign irq_valid = irq_valid_reg;
assign dma_enable = dma_enable_reg;
assign rx_cpl_stall = rx_cpl_stall_reg;
always @* begin
axil_ctrl_awready_next = 1'b0;
axil_ctrl_wready_next = 1'b0;
@ -322,6 +342,9 @@ always @* begin
irq_valid_next = irq_valid_reg && !irq_ready;
rx_cpl_stall_next = 1'b0;
rx_cpl_stall_count_next = rx_cpl_stall_count_reg;
dma_read_block_run_next = dma_read_block_run_reg;
dma_read_block_len_next = dma_read_block_len_reg;
dma_read_block_count_next = dma_read_block_count_reg;
@ -348,6 +371,11 @@ always @* begin
dma_write_block_ram_offset_mask_next = dma_write_block_ram_offset_mask_reg;
dma_write_block_ram_stride_next = dma_write_block_ram_stride_reg;
if (rx_cpl_stall_count_reg) begin
rx_cpl_stall_count_next = rx_cpl_stall_count_reg - 1;
rx_cpl_stall_next = 1'b1;
end
if (s_axil_ctrl_awvalid && s_axil_ctrl_wvalid && !axil_ctrl_bvalid_reg) begin
// write operation
axil_ctrl_awready_next = 1'b1;
@ -364,6 +392,7 @@ always @* begin
dma_rd_int_en_next = s_axil_ctrl_wdata[0];
dma_wr_int_en_next = s_axil_ctrl_wdata[1];
end
16'h0040: rx_cpl_stall_count_next = s_axil_ctrl_wdata;
// single read
16'h0100: dma_read_desc_dma_addr_next[31:0] = s_axil_ctrl_wdata;
16'h0104: dma_read_desc_dma_addr_next[63:32] = s_axil_ctrl_wdata;
@ -437,6 +466,8 @@ always @* begin
// control
16'h0000: begin
axil_ctrl_rdata_next[0] = dma_enable_reg;
axil_ctrl_rdata_next[8] = dma_wr_busy;
axil_ctrl_rdata_next[9] = dma_rd_busy;
end
16'h0008: begin
axil_ctrl_rdata_next[0] = dma_rd_int_en_reg;
@ -444,8 +475,12 @@ always @* begin
end
16'h0010: axil_ctrl_rdata_next = cycle_count_reg;
16'h0014: axil_ctrl_rdata_next = cycle_count_reg >> 32;
16'h0020: axil_ctrl_rdata_next = dma_read_active_count_reg;
16'h0028: axil_ctrl_rdata_next = dma_write_active_count_reg;
16'h0018: axil_ctrl_rdata_next = dma_read_active_count_reg;
16'h001c: axil_ctrl_rdata_next = dma_write_active_count_reg;
16'h0020: axil_ctrl_rdata_next = dma_rd_req_count_reg;
16'h0024: axil_ctrl_rdata_next = dma_rd_cpl_count_reg;
16'h0028: axil_ctrl_rdata_next = dma_wr_req_count_reg;
16'h0040: axil_ctrl_rdata_next = rx_cpl_stall_count_reg;
// single read
16'h0100: axil_ctrl_rdata_next = dma_read_desc_dma_addr_reg;
16'h0104: axil_ctrl_rdata_next = dma_read_desc_dma_addr_reg >> 32;
@ -615,6 +650,10 @@ always @(posedge clk) begin
+ (m_axis_dma_write_desc_valid && m_axis_dma_write_desc_ready)
- s_axis_dma_write_desc_status_valid;
dma_rd_req_count_reg <= dma_rd_req_count_reg + dma_rd_req;
dma_rd_cpl_count_reg <= dma_rd_cpl_count_reg + dma_rd_cpl;
dma_wr_req_count_reg <= dma_wr_req_count_reg + dma_wr_req;
dma_read_desc_dma_addr_reg <= dma_read_desc_dma_addr_next;
dma_read_desc_ram_addr_reg <= dma_read_desc_ram_addr_next;
dma_read_desc_len_reg <= dma_read_desc_len_next;
@ -643,6 +682,9 @@ always @(posedge clk) begin
irq_valid_reg <= irq_valid_next;
rx_cpl_stall_reg <= rx_cpl_stall_next;
rx_cpl_stall_count_reg <= rx_cpl_stall_count_next;
dma_read_block_run_reg <= dma_read_block_run_next;
dma_read_block_len_reg <= dma_read_block_len_next;
dma_read_block_count_reg <= dma_read_block_count_next;
@ -679,6 +721,9 @@ always @(posedge clk) begin
cycle_count_reg <= 0;
dma_read_active_count_reg <= 0;
dma_write_active_count_reg <= 0;
dma_rd_req_count_reg <= 0;
dma_rd_cpl_count_reg <= 0;
dma_wr_req_count_reg <= 0;
dma_read_desc_valid_reg <= 1'b0;
dma_read_desc_status_valid_reg <= 1'b0;
@ -688,6 +733,8 @@ always @(posedge clk) begin
dma_rd_int_en_reg <= 1'b0;
dma_wr_int_en_reg <= 1'b0;
irq_valid_reg <= 1'b0;
rx_cpl_stall_reg <= 1'b0;
rx_cpl_stall_count_reg <= 0;
dma_read_block_run_reg <= 1'b0;
dma_write_block_run_reg <= 1'b0;
end

View File

@ -172,7 +172,12 @@ module example_core_pcie #
* Status
*/
output wire status_error_cor,
output wire status_error_uncor
output wire status_error_uncor,
/*
* Control and status
*/
output wire rx_cpl_stall
);
parameter AXIL_CTRL_DATA_WIDTH = 32;
@ -345,6 +350,11 @@ wire [IRQ_INDEX_WIDTH-1:0] irq_index;
wire irq_valid;
wire irq_ready;
// Control and status
wire dma_enable;
wire dma_rd_busy;
wire dma_wr_busy;
pcie_tlp_demux_bar #(
.PORTS(3),
.TLP_DATA_WIDTH(TLP_DATA_WIDTH),
@ -900,8 +910,8 @@ dma_if_pcie_inst (
/*
* Configuration
*/
.read_enable(1'b1),
.write_enable(1'b1),
.read_enable(dma_enable),
.write_enable(dma_enable),
.ext_tag_enable(ext_tag_enable),
.rcb_128b(rcb_128b),
.requester_id({bus_num, 5'd0, 3'd0}),
@ -911,8 +921,8 @@ dma_if_pcie_inst (
/*
* Status
*/
.status_rd_busy(),
.status_wr_busy(),
.status_rd_busy(dma_rd_busy),
.status_wr_busy(dma_wr_busy),
.status_error_cor(status_error_cor_int[3]),
.status_error_uncor(status_error_uncor_int[3])
);
@ -1109,7 +1119,18 @@ core_inst (
*/
.irq_index(irq_index),
.irq_valid(irq_valid),
.irq_ready(irq_ready)
.irq_ready(irq_ready),
/*
* Control and status
*/
.dma_enable(dma_enable),
.dma_rd_busy(dma_rd_busy),
.dma_wr_busy(dma_wr_busy),
.dma_rd_req(tx_rd_req_tlp_valid && tx_rd_req_tlp_sop && tx_rd_req_tlp_ready),
.dma_rd_cpl(rx_cpl_tlp_valid && rx_cpl_tlp_sop && rx_cpl_tlp_ready),
.dma_wr_req(tx_wr_req_tlp_valid && tx_wr_req_tlp_sop && tx_wr_req_tlp_ready),
.rx_cpl_stall(rx_cpl_stall)
);
endmodule

View File

@ -200,6 +200,12 @@ wire [2:0] max_payload_size;
wire msix_enable;
wire msix_mask;
wire rx_cpl_stall;
wire rx_st_ready_int;
assign rx_st_ready = rx_st_ready_int & !rx_cpl_stall;
pcie_ptile_if #(
.SEG_COUNT(SEG_COUNT),
.SEG_DATA_WIDTH(SEG_DATA_WIDTH),
@ -226,7 +232,7 @@ pcie_ptile_if_inst (
.rx_st_sop(rx_st_sop),
.rx_st_eop(rx_st_eop),
.rx_st_valid(rx_st_valid),
.rx_st_ready(rx_st_ready),
.rx_st_ready(rx_st_ready_int),
.rx_st_hdr(rx_st_hdr),
.rx_st_tlp_prfx(rx_st_tlp_prfx),
.rx_st_vf_active(rx_st_vf_active),
@ -488,7 +494,12 @@ core_pcie_inst (
* Status
*/
.status_error_cor(),
.status_error_uncor()
.status_error_uncor(),
/*
* Control and status
*/
.rx_cpl_stall(rx_cpl_stall)
);
endmodule

View File

@ -58,7 +58,7 @@ module example_core_pcie_s10 #
// Completion header flow control credit limit (read)
parameter READ_CPLH_FC_LIMIT = 770,
// Completion data flow control credit limit (read)
parameter READ_CPLD_FC_LIMIT = 2500,
parameter READ_CPLD_FC_LIMIT = 2400,
// Operation table size (write)
parameter WRITE_OP_TABLE_SIZE = 2**TX_SEQ_NUM_WIDTH,
// In-flight transmit limit (write)
@ -194,6 +194,12 @@ wire [2:0] max_payload_size;
wire msix_enable;
wire msix_mask;
wire rx_cpl_stall;
wire rx_st_ready_int;
assign rx_st_ready = rx_st_ready_int & !rx_cpl_stall;
pcie_s10_if #(
.SEG_COUNT(SEG_COUNT),
.SEG_DATA_WIDTH(SEG_DATA_WIDTH),
@ -222,7 +228,7 @@ pcie_s10_if_inst (
.rx_st_sop(rx_st_sop),
.rx_st_eop(rx_st_eop),
.rx_st_valid(rx_st_valid),
.rx_st_ready(rx_st_ready),
.rx_st_ready(rx_st_ready_int),
.rx_st_vf_active(rx_st_vf_active),
.rx_st_func_num(rx_st_func_num),
.rx_st_vf_num(rx_st_vf_num),
@ -495,7 +501,12 @@ core_pcie_inst (
* Status
*/
.status_error_cor(),
.status_error_uncor()
.status_error_uncor(),
/*
* Control and status
*/
.rx_cpl_stall(rx_cpl_stall)
);
endmodule

View File

@ -68,9 +68,9 @@ module example_core_pcie_us #
// In-flight transmit limit (read)
parameter READ_TX_LIMIT = 2**(RQ_SEQ_NUM_WIDTH-1),
// Completion header flow control credit limit (read)
parameter READ_CPLH_FC_LIMIT = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 64 : 128,
parameter READ_CPLH_FC_LIMIT = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 64 : 256,
// Completion data flow control credit limit (read)
parameter READ_CPLD_FC_LIMIT = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 992 : 2048,
parameter READ_CPLD_FC_LIMIT = AXIS_PCIE_RQ_USER_WIDTH == 60 ? 1024-64 : 2048-256,
// Operation table size (write)
parameter WRITE_OP_TABLE_SIZE = 2**(RQ_SEQ_NUM_WIDTH-1),
// In-flight transmit limit (write)
@ -259,6 +259,14 @@ wire ext_tag_enable;
wire msix_enable;
wire msix_mask;
wire rx_cpl_stall;
wire s_axis_rc_tvalid_int;
wire s_axis_rc_tready_int;
assign s_axis_rc_tvalid_int = s_axis_rc_tvalid & ~rx_cpl_stall;
assign s_axis_rc_tready = s_axis_rc_tready_int & ~rx_cpl_stall;
pcie_us_if #(
.AXIS_PCIE_DATA_WIDTH(AXIS_PCIE_DATA_WIDTH),
.AXIS_PCIE_KEEP_WIDTH(AXIS_PCIE_KEEP_WIDTH),
@ -295,8 +303,8 @@ pcie_us_if_inst (
*/
.s_axis_rc_tdata(s_axis_rc_tdata),
.s_axis_rc_tkeep(s_axis_rc_tkeep),
.s_axis_rc_tvalid(s_axis_rc_tvalid),
.s_axis_rc_tready(s_axis_rc_tready),
.s_axis_rc_tvalid(s_axis_rc_tvalid_int),
.s_axis_rc_tready(s_axis_rc_tready_int),
.s_axis_rc_tlast(s_axis_rc_tlast),
.s_axis_rc_tuser(s_axis_rc_tuser),
@ -624,7 +632,12 @@ core_pcie_inst (
* Status
*/
.status_error_cor(status_error_cor),
.status_error_uncor(status_error_uncor)
.status_error_uncor(status_error_uncor),
/*
* Control and status
*/
.rx_cpl_stall(rx_cpl_stall)
);
endmodule

View File

@ -224,6 +224,8 @@ async def run_test(dut):
await Timer(2000, 'ns')
# read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000118)
tb.log.info("Status: 0x%x", val)
assert val == 0x800000AA
@ -238,6 +240,8 @@ async def run_test(dut):
await Timer(2000, 'ns')
# read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000218)
tb.log.info("Status: 0x%x", val)
assert val == 0x80000055
@ -258,6 +262,8 @@ async def run_test(dut):
await Timer(2000, 'ns')
# read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000218)
tb.log.info("Status: 0x%x", val)
assert val == 0x800000AA
@ -321,11 +327,15 @@ async def run_test(dut):
await dev_pf0_bar0.write_dword(0x001000, 1)
for k in range(10):
cnt = await dev_pf0_bar0.read_dword(0x001018)
await Timer(1000, 'ns')
if cnt == 0:
run = await dev_pf0_bar0.read_dword(0x001000)
if run == 0:
break
# read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
# configure operation (write)
# DMA base address
await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff)
@ -363,11 +373,17 @@ async def run_test(dut):
await dev_pf0_bar0.write_dword(0x001100, 1)
for k in range(10):
cnt = await dev_pf0_bar0.read_dword(0x001118)
await Timer(1000, 'ns')
if cnt == 0:
run = await dev_pf0_bar0.read_dword(0x001100)
if run == 0:
break
# read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
assert status & 0x300 == 0
tb.log.info("%s", mem.hexdump_str(dest_offset, region_len))
assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len]

View File

@ -258,6 +258,211 @@ class TB(object):
await self.rc.enumerate()
async def dma_block_read_bench(tb, dev, addr, mask, size, stride, count):
dev_pf0_bar0 = dev.bar_window[0]
rd_req = await dev_pf0_bar0.read_dword(0x000020)
rd_cpl = await dev_pf0_bar0.read_dword(0x000024)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001088, 0)
await dev_pf0_bar0.write_dword(0x00108c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001090, mask)
await dev_pf0_bar0.write_dword(0x001094, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001098, stride)
await dev_pf0_bar0.write_dword(0x00109c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0010c0, 0)
await dev_pf0_bar0.write_dword(0x0010c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0010c8, 0)
await dev_pf0_bar0.write_dword(0x0010cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0010d0, mask)
await dev_pf0_bar0.write_dword(0x0010d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0010d8, stride)
await dev_pf0_bar0.write_dword(0x0010dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001008, 0)
await dev_pf0_bar0.write_dword(0x00100c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001010, size)
# block count
await dev_pf0_bar0.write_dword(0x001018, count)
await dev_pf0_bar0.write_dword(0x00101c, 0)
# start
await dev_pf0_bar0.write_dword(0x001000, 1)
for k in range(1000):
await Timer(1000, 'ns')
run = await dev_pf0_bar0.read_dword(0x001000)
status = await dev_pf0_bar0.read_dword(0x000000)
if run == 0 and status & 0x300 == 0:
break
if run != 0:
tb.log.warning("Operation timed out")
if status & 0x300 != 0:
tb.log.warning("DMA engine busy")
cycles = await dev_pf0_bar0.read_dword(0x001008)
rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req
rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl
tb.log.info("read %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req %d cpl) %d Mbps",
count, size, count*size, stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4))
assert status & 0x300 == 0
async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count):
dev_pf0_bar0 = dev.bar_window[0]
wr_req = await dev_pf0_bar0.read_dword(0x000028)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001180, addr & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001184, (addr >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001188, 0)
await dev_pf0_bar0.write_dword(0x00118c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001190, mask)
await dev_pf0_bar0.write_dword(0x001194, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001198, stride)
await dev_pf0_bar0.write_dword(0x00119c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0011c0, 0)
await dev_pf0_bar0.write_dword(0x0011c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0011c8, 0)
await dev_pf0_bar0.write_dword(0x0011cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0011d0, mask)
await dev_pf0_bar0.write_dword(0x0011d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0011d8, stride)
await dev_pf0_bar0.write_dword(0x0011dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001108, 0)
await dev_pf0_bar0.write_dword(0x00110c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001110, size)
# block count
await dev_pf0_bar0.write_dword(0x001118, count)
await dev_pf0_bar0.write_dword(0x00111c, 0)
# start
await dev_pf0_bar0.write_dword(0x001100, 1)
for k in range(1000):
await Timer(1000, 'ns')
run = await dev_pf0_bar0.read_dword(0x001100)
status = await dev_pf0_bar0.read_dword(0x000000)
if run == 0 and status & 0x300 == 0:
break
if run != 0:
tb.log.warning("Operation timed out")
if status & 0x300 != 0:
tb.log.warning("DMA engine busy")
cycles = await dev_pf0_bar0.read_dword(0x001108)
wr_req = await dev_pf0_bar0.read_dword(0x000028) - wr_req
tb.log.info("wrote %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req) %d Mbps",
count, size, count*size, stride, cycles*4, wr_req, size * count * 8 * 1000 / (cycles * 4))
assert status & 0x300 == 0
async def dma_cpl_buf_test(tb, dev, addr, mask, size, stride, count, stall):
dev_pf0_bar0 = dev.bar_window[0]
rd_req = await dev_pf0_bar0.read_dword(0x000020)
rd_cpl = await dev_pf0_bar0.read_dword(0x000024)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001088, 0)
await dev_pf0_bar0.write_dword(0x00108c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001090, mask)
await dev_pf0_bar0.write_dword(0x001094, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001098, stride)
await dev_pf0_bar0.write_dword(0x00109c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0010c0, 0)
await dev_pf0_bar0.write_dword(0x0010c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0010c8, 0)
await dev_pf0_bar0.write_dword(0x0010cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0010d0, mask)
await dev_pf0_bar0.write_dword(0x0010d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0010d8, stride)
await dev_pf0_bar0.write_dword(0x0010dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001008, 0)
await dev_pf0_bar0.write_dword(0x00100c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001010, size)
# block count
await dev_pf0_bar0.write_dword(0x001018, count)
await dev_pf0_bar0.write_dword(0x00101c, 0)
if stall:
# stall RX
await dev_pf0_bar0.write_dword(0x000040, stall)
# start
await dev_pf0_bar0.write_dword(0x001000, 1)
# wait for stall
if stall:
for k in range(stall):
await RisingEdge(tb.dut.clk)
for k in range(100):
await Timer(1000, 'ns')
run = await dev_pf0_bar0.read_dword(0x001000)
status = await dev_pf0_bar0.read_dword(0x000000)
if run == 0 and status & 0x300 == 0:
break
if run != 0:
tb.log.warning("Operation timed out")
if status & 0x300 != 0:
tb.log.warning("DMA engine busy")
cycles = await dev_pf0_bar0.read_dword(0x001008)
rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req
rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl
tb.log.info("read %d x %d B (total %d B %d CPLD, stride %d) in %d ns (%d req %d cpl) %d Mbps",
count, size, count*size, count*((size+15)//16), stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4))
assert status & 0x300 == 0
@cocotb.test()
async def run_test(dut):
@ -309,6 +514,8 @@ async def run_test(dut):
await Timer(2000, 'ns')
# read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000118)
tb.log.info("Status: 0x%x", val)
assert val == 0x800000AA
@ -323,6 +530,8 @@ async def run_test(dut):
await Timer(2000, 'ns')
# read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000218)
tb.log.info("Status: 0x%x", val)
assert val == 0x80000055
@ -343,6 +552,8 @@ async def run_test(dut):
await Timer(2000, 'ns')
# read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000218)
tb.log.info("Status: 0x%x", val)
assert val == 0x800000AA
@ -353,110 +564,66 @@ async def run_test(dut):
tb.log.info("Test DMA block operations")
# disable interrupts
await dev_pf0_bar0.write_dword(0x000008, 0)
region_len = 0x2000
src_offset = 0x0000
dest_offset = 0x4000
block_size = 256
block_stride = block_size
block_count = 32
# write packet data
mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)])
# enable DMA
await dev_pf0_bar0.write_dword(0x000000, 1)
# disable interrupts
await dev_pf0_bar0.write_dword(0x000008, 0)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001088, 0)
await dev_pf0_bar0.write_dword(0x00108c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001090, region_len-1)
await dev_pf0_bar0.write_dword(0x001094, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001098, block_stride)
await dev_pf0_bar0.write_dword(0x00109c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0010c0, 0)
await dev_pf0_bar0.write_dword(0x0010c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0010c8, 0)
await dev_pf0_bar0.write_dword(0x0010cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0010d0, region_len-1)
await dev_pf0_bar0.write_dword(0x0010d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0010d8, block_stride)
await dev_pf0_bar0.write_dword(0x0010dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001008, 0)
await dev_pf0_bar0.write_dword(0x00100c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001010, block_size)
# block count
await dev_pf0_bar0.write_dword(0x001018, block_count)
await dev_pf0_bar0.write_dword(0x00101c, 0)
# start
await dev_pf0_bar0.write_dword(0x001000, 1)
for k in range(10):
cnt = await dev_pf0_bar0.read_dword(0x001018)
await Timer(1000, 'ns')
if cnt == 0:
break
# configure operation (write)
# DMA base address
await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001188, 0)
await dev_pf0_bar0.write_dword(0x00118c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001190, region_len-1)
await dev_pf0_bar0.write_dword(0x001194, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001198, block_stride)
await dev_pf0_bar0.write_dword(0x00119c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0011c0, 0)
await dev_pf0_bar0.write_dword(0x0011c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0011c8, 0)
await dev_pf0_bar0.write_dword(0x0011cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0011d0, region_len-1)
await dev_pf0_bar0.write_dword(0x0011d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0011d8, block_stride)
await dev_pf0_bar0.write_dword(0x0011dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001108, 0)
await dev_pf0_bar0.write_dword(0x00110c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001110, block_size)
# block count
await dev_pf0_bar0.write_dword(0x001118, block_count)
await dev_pf0_bar0.write_dword(0x00111c, 0)
# start
await dev_pf0_bar0.write_dword(0x001100, 1)
for k in range(10):
cnt = await dev_pf0_bar0.read_dword(0x001118)
await Timer(1000, 'ns')
if cnt == 0:
break
await dma_block_read_bench(tb, dev, mem_base+src_offset, region_len-1, 256, 256, 32)
await dma_block_write_bench(tb, dev, mem_base+dest_offset, region_len-1, 256, 256, 32)
tb.log.info("%s", mem.hexdump_str(dest_offset, region_len))
assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len]
tb.log.info("Test RX completion buffer (CPLH, 8)")
tb.rc.split_on_all_rcb = True
size = 8
stride = size
for count in range(32, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000)
tb.log.info("Test RX completion buffer (CPLH, 8+64)")
size = 8+64
stride = 0
for count in range(8, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000)
tb.log.info("Test RX completion buffer (CPLH, 8+128+8)")
size = 8+128+8
stride = 0
for count in range(8, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000)
tb.rc.split_on_all_rcb = False
tb.log.info("Test RX completion buffer (CPLD)")
size = 512
stride = size
for count in range(8, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 4000)
tb.log.info("Perform block reads")
count = 100
for size in [2**x for x in range(14)]:
stride = size
await dma_block_read_bench(tb, dev, mem_base, region_len-1, size, stride, count)
tb.log.info("Perform block writes")
count = 100
for size in [2**x for x in range(14)]:
stride = size
await dma_block_write_bench(tb, dev, mem_base, region_len-1, size, stride, count)
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)

View File

@ -57,7 +57,7 @@ VERILOG_SOURCES += ../../../../rtl/priority_encoder.v
VERILOG_SOURCES += ../../../../rtl/pulse_merge.v
# module parameters
export PARAM_SEG_COUNT := 1
export PARAM_SEG_COUNT := 2
export PARAM_SEG_DATA_WIDTH := 256
export PARAM_SEG_EMPTY_WIDTH := $(shell python -c "print((($(PARAM_SEG_DATA_WIDTH)//32)-1).bit_length())" )
export PARAM_TX_SEQ_NUM_WIDTH := 6
@ -69,7 +69,7 @@ export PARAM_IMM_WIDTH := 32
export PARAM_READ_OP_TABLE_SIZE := $(PARAM_PCIE_TAG_COUNT)
export PARAM_READ_TX_LIMIT := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" )
export PARAM_READ_CPLH_FC_LIMIT := 770
export PARAM_READ_CPLD_FC_LIMIT := 2500
export PARAM_READ_CPLD_FC_LIMIT := 2400
export PARAM_WRITE_OP_TABLE_SIZE := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" )
export PARAM_WRITE_TX_LIMIT := $(shell echo "$$(( 1 << $(PARAM_TX_SEQ_NUM_WIDTH) ))" )
export PARAM_BAR0_APERTURE := 24

View File

@ -206,6 +206,211 @@ class TB(object):
await self.rc.enumerate()
async def dma_block_read_bench(tb, dev, addr, mask, size, stride, count):
dev_pf0_bar0 = dev.bar_window[0]
rd_req = await dev_pf0_bar0.read_dword(0x000020)
rd_cpl = await dev_pf0_bar0.read_dword(0x000024)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001088, 0)
await dev_pf0_bar0.write_dword(0x00108c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001090, mask)
await dev_pf0_bar0.write_dword(0x001094, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001098, stride)
await dev_pf0_bar0.write_dword(0x00109c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0010c0, 0)
await dev_pf0_bar0.write_dword(0x0010c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0010c8, 0)
await dev_pf0_bar0.write_dword(0x0010cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0010d0, mask)
await dev_pf0_bar0.write_dword(0x0010d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0010d8, stride)
await dev_pf0_bar0.write_dword(0x0010dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001008, 0)
await dev_pf0_bar0.write_dword(0x00100c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001010, size)
# block count
await dev_pf0_bar0.write_dword(0x001018, count)
await dev_pf0_bar0.write_dword(0x00101c, 0)
# start
await dev_pf0_bar0.write_dword(0x001000, 1)
for k in range(1000):
await Timer(1000, 'ns')
run = await dev_pf0_bar0.read_dword(0x001000)
status = await dev_pf0_bar0.read_dword(0x000000)
if run == 0 and status & 0x300 == 0:
break
if run != 0:
tb.log.warning("Operation timed out")
if status & 0x300 != 0:
tb.log.warning("DMA engine busy")
cycles = await dev_pf0_bar0.read_dword(0x001008)
rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req
rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl
tb.log.info("read %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req %d cpl) %d Mbps",
count, size, count*size, stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4))
assert status & 0x300 == 0
async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count):
dev_pf0_bar0 = dev.bar_window[0]
wr_req = await dev_pf0_bar0.read_dword(0x000028)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001180, addr & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001184, (addr >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001188, 0)
await dev_pf0_bar0.write_dword(0x00118c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001190, mask)
await dev_pf0_bar0.write_dword(0x001194, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001198, stride)
await dev_pf0_bar0.write_dword(0x00119c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0011c0, 0)
await dev_pf0_bar0.write_dword(0x0011c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0011c8, 0)
await dev_pf0_bar0.write_dword(0x0011cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0011d0, mask)
await dev_pf0_bar0.write_dword(0x0011d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0011d8, stride)
await dev_pf0_bar0.write_dword(0x0011dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001108, 0)
await dev_pf0_bar0.write_dword(0x00110c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001110, size)
# block count
await dev_pf0_bar0.write_dword(0x001118, count)
await dev_pf0_bar0.write_dword(0x00111c, 0)
# start
await dev_pf0_bar0.write_dword(0x001100, 1)
for k in range(1000):
await Timer(1000, 'ns')
run = await dev_pf0_bar0.read_dword(0x001100)
status = await dev_pf0_bar0.read_dword(0x000000)
if run == 0 and status & 0x300 == 0:
break
if run != 0:
tb.log.warning("Operation timed out")
if status & 0x300 != 0:
tb.log.warning("DMA engine busy")
cycles = await dev_pf0_bar0.read_dword(0x001108)
wr_req = await dev_pf0_bar0.read_dword(0x000028) - wr_req
tb.log.info("wrote %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req) %d Mbps",
count, size, count*size, stride, cycles*4, wr_req, size * count * 8 * 1000 / (cycles * 4))
assert status & 0x300 == 0
async def dma_cpl_buf_test(tb, dev, addr, mask, size, stride, count, stall):
dev_pf0_bar0 = dev.bar_window[0]
rd_req = await dev_pf0_bar0.read_dword(0x000020)
rd_cpl = await dev_pf0_bar0.read_dword(0x000024)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001088, 0)
await dev_pf0_bar0.write_dword(0x00108c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001090, mask)
await dev_pf0_bar0.write_dword(0x001094, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001098, stride)
await dev_pf0_bar0.write_dword(0x00109c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0010c0, 0)
await dev_pf0_bar0.write_dword(0x0010c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0010c8, 0)
await dev_pf0_bar0.write_dword(0x0010cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0010d0, mask)
await dev_pf0_bar0.write_dword(0x0010d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0010d8, stride)
await dev_pf0_bar0.write_dword(0x0010dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001008, 0)
await dev_pf0_bar0.write_dword(0x00100c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001010, size)
# block count
await dev_pf0_bar0.write_dword(0x001018, count)
await dev_pf0_bar0.write_dword(0x00101c, 0)
if stall:
# stall RX
await dev_pf0_bar0.write_dword(0x000040, stall)
# start
await dev_pf0_bar0.write_dword(0x001000, 1)
# wait for stall
if stall:
for k in range(stall):
await RisingEdge(tb.dut.clk)
for k in range(100):
await Timer(1000, 'ns')
run = await dev_pf0_bar0.read_dword(0x001000)
status = await dev_pf0_bar0.read_dword(0x000000)
if run == 0 and status & 0x300 == 0:
break
if run != 0:
tb.log.warning("Operation timed out")
if status & 0x300 != 0:
tb.log.warning("DMA engine busy")
cycles = await dev_pf0_bar0.read_dword(0x001008)
rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req
rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl
tb.log.info("read %d x %d B (total %d B %d CPLD, stride %d) in %d ns (%d req %d cpl) %d Mbps",
count, size, count*size, count*((size+15)//16), stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4))
assert status & 0x300 == 0
@cocotb.test()
async def run_test(dut):
@ -257,6 +462,8 @@ async def run_test(dut):
await Timer(2000, 'ns')
# read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000118)
tb.log.info("Status: 0x%x", val)
assert val == 0x800000AA
@ -271,6 +478,8 @@ async def run_test(dut):
await Timer(2000, 'ns')
# read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000218)
tb.log.info("Status: 0x%x", val)
assert val == 0x80000055
@ -291,6 +500,8 @@ async def run_test(dut):
await Timer(2000, 'ns')
# read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000218)
tb.log.info("Status: 0x%x", val)
assert val == 0x800000AA
@ -301,110 +512,66 @@ async def run_test(dut):
tb.log.info("Test DMA block operations")
# disable interrupts
await dev_pf0_bar0.write_dword(0x000008, 0)
region_len = 0x2000
src_offset = 0x0000
dest_offset = 0x4000
block_size = 256
block_stride = block_size
block_count = 32
# write packet data
mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)])
# enable DMA
await dev_pf0_bar0.write_dword(0x000000, 1)
# disable interrupts
await dev_pf0_bar0.write_dword(0x000008, 0)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001088, 0)
await dev_pf0_bar0.write_dword(0x00108c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001090, region_len-1)
await dev_pf0_bar0.write_dword(0x001094, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001098, block_stride)
await dev_pf0_bar0.write_dword(0x00109c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0010c0, 0)
await dev_pf0_bar0.write_dword(0x0010c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0010c8, 0)
await dev_pf0_bar0.write_dword(0x0010cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0010d0, region_len-1)
await dev_pf0_bar0.write_dword(0x0010d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0010d8, block_stride)
await dev_pf0_bar0.write_dword(0x0010dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001008, 0)
await dev_pf0_bar0.write_dword(0x00100c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001010, block_size)
# block count
await dev_pf0_bar0.write_dword(0x001018, block_count)
await dev_pf0_bar0.write_dword(0x00101c, 0)
# start
await dev_pf0_bar0.write_dword(0x001000, 1)
for k in range(10):
cnt = await dev_pf0_bar0.read_dword(0x001018)
await Timer(1000, 'ns')
if cnt == 0:
break
# configure operation (write)
# DMA base address
await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001188, 0)
await dev_pf0_bar0.write_dword(0x00118c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001190, region_len-1)
await dev_pf0_bar0.write_dword(0x001194, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001198, block_stride)
await dev_pf0_bar0.write_dword(0x00119c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0011c0, 0)
await dev_pf0_bar0.write_dword(0x0011c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0011c8, 0)
await dev_pf0_bar0.write_dword(0x0011cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0011d0, region_len-1)
await dev_pf0_bar0.write_dword(0x0011d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0011d8, block_stride)
await dev_pf0_bar0.write_dword(0x0011dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001108, 0)
await dev_pf0_bar0.write_dword(0x00110c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001110, block_size)
# block count
await dev_pf0_bar0.write_dword(0x001118, block_count)
await dev_pf0_bar0.write_dword(0x00111c, 0)
# start
await dev_pf0_bar0.write_dword(0x001100, 1)
for k in range(10):
cnt = await dev_pf0_bar0.read_dword(0x001118)
await Timer(1000, 'ns')
if cnt == 0:
break
await dma_block_read_bench(tb, dev, mem_base+src_offset, region_len-1, 256, 256, 32)
await dma_block_write_bench(tb, dev, mem_base+dest_offset, region_len-1, 256, 256, 32)
tb.log.info("%s", mem.hexdump_str(dest_offset, region_len))
assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len]
tb.log.info("Test RX completion buffer (CPLH, 8)")
tb.rc.split_on_all_rcb = True
size = 8
stride = size
for count in range(32, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000)
tb.log.info("Test RX completion buffer (CPLH, 8+64)")
size = 8+64
stride = 0
for count in range(8, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000)
tb.log.info("Test RX completion buffer (CPLH, 8+128+8)")
size = 8+128+8
stride = 0
for count in range(8, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000)
tb.rc.split_on_all_rcb = False
tb.log.info("Test RX completion buffer (CPLD)")
size = 512
stride = size
for count in range(8, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 4000)
tb.log.info("Perform block reads")
count = 100
for size in [2**x for x in range(14)]:
stride = size
await dma_block_read_bench(tb, dev, mem_base, region_len-1, size, stride, count)
tb.log.info("Perform block writes")
count = 100
for size in [2**x for x in range(14)]:
stride = size
await dma_block_write_bench(tb, dev, mem_base, region_len-1, size, stride, count)
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
@ -466,7 +633,7 @@ def test_example_core_pcie_s10(request, data_width, l_tile):
parameters['READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT']
parameters['READ_TX_LIMIT'] = 2**parameters['TX_SEQ_NUM_WIDTH']
parameters['READ_CPLH_FC_LIMIT'] = 770
parameters['READ_CPLD_FC_LIMIT'] = 2500
parameters['READ_CPLD_FC_LIMIT'] = 2400
parameters['WRITE_OP_TABLE_SIZE'] = 2**parameters['TX_SEQ_NUM_WIDTH']
parameters['WRITE_TX_LIMIT'] = 2**parameters['TX_SEQ_NUM_WIDTH']
parameters['BAR0_APERTURE'] = 24

View File

@ -74,8 +74,8 @@ export PARAM_IMM_ENABLE := 1
export PARAM_IMM_WIDTH := 32
export PARAM_READ_OP_TABLE_SIZE := $(PARAM_PCIE_TAG_COUNT)
export PARAM_READ_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" )
export PARAM_READ_CPLH_FC_LIMIT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),64,128)
export PARAM_READ_CPLD_FC_LIMIT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),992,2048)
export PARAM_READ_CPLH_FC_LIMIT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),256,64)
export PARAM_READ_CPLD_FC_LIMIT := $(if $(filter-out 60,$(PARAM_AXIS_PCIE_RQ_USER_WIDTH)),1792,960)
export PARAM_WRITE_OP_TABLE_SIZE := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" )
export PARAM_WRITE_TX_LIMIT := $(shell echo "$$(( 1 << ($(PARAM_RQ_SEQ_NUM_WIDTH)-1) ))" )
export PARAM_BAR0_APERTURE := 24

View File

@ -299,6 +299,211 @@ class TB(object):
await self.rc.enumerate()
async def dma_block_read_bench(tb, dev, addr, mask, size, stride, count):
dev_pf0_bar0 = dev.bar_window[0]
rd_req = await dev_pf0_bar0.read_dword(0x000020)
rd_cpl = await dev_pf0_bar0.read_dword(0x000024)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001088, 0)
await dev_pf0_bar0.write_dword(0x00108c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001090, mask)
await dev_pf0_bar0.write_dword(0x001094, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001098, stride)
await dev_pf0_bar0.write_dword(0x00109c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0010c0, 0)
await dev_pf0_bar0.write_dword(0x0010c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0010c8, 0)
await dev_pf0_bar0.write_dword(0x0010cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0010d0, mask)
await dev_pf0_bar0.write_dword(0x0010d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0010d8, stride)
await dev_pf0_bar0.write_dword(0x0010dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001008, 0)
await dev_pf0_bar0.write_dword(0x00100c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001010, size)
# block count
await dev_pf0_bar0.write_dword(0x001018, count)
await dev_pf0_bar0.write_dword(0x00101c, 0)
# start
await dev_pf0_bar0.write_dword(0x001000, 1)
for k in range(1000):
await Timer(1000, 'ns')
run = await dev_pf0_bar0.read_dword(0x001000)
status = await dev_pf0_bar0.read_dword(0x000000)
if run == 0 and status & 0x300 == 0:
break
if run != 0:
tb.log.warning("Operation timed out")
if status & 0x300 != 0:
tb.log.warning("DMA engine busy")
cycles = await dev_pf0_bar0.read_dword(0x001008)
rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req
rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl
tb.log.info("read %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req %d cpl) %d Mbps",
count, size, count*size, stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4))
assert status & 0x300 == 0
async def dma_block_write_bench(tb, dev, addr, mask, size, stride, count):
dev_pf0_bar0 = dev.bar_window[0]
wr_req = await dev_pf0_bar0.read_dword(0x000028)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001180, addr & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001184, (addr >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001188, 0)
await dev_pf0_bar0.write_dword(0x00118c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001190, mask)
await dev_pf0_bar0.write_dword(0x001194, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001198, stride)
await dev_pf0_bar0.write_dword(0x00119c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0011c0, 0)
await dev_pf0_bar0.write_dword(0x0011c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0011c8, 0)
await dev_pf0_bar0.write_dword(0x0011cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0011d0, mask)
await dev_pf0_bar0.write_dword(0x0011d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0011d8, stride)
await dev_pf0_bar0.write_dword(0x0011dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001108, 0)
await dev_pf0_bar0.write_dword(0x00110c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001110, size)
# block count
await dev_pf0_bar0.write_dword(0x001118, count)
await dev_pf0_bar0.write_dword(0x00111c, 0)
# start
await dev_pf0_bar0.write_dword(0x001100, 1)
for k in range(1000):
await Timer(1000, 'ns')
run = await dev_pf0_bar0.read_dword(0x001100)
status = await dev_pf0_bar0.read_dword(0x000000)
if run == 0 and status & 0x300 == 0:
break
if run != 0:
tb.log.warning("Operation timed out")
if status & 0x300 != 0:
tb.log.warning("DMA engine busy")
cycles = await dev_pf0_bar0.read_dword(0x001108)
wr_req = await dev_pf0_bar0.read_dword(0x000028) - wr_req
tb.log.info("wrote %d blocks of %d bytes (total %d B, stride %d) in %d ns (%d req) %d Mbps",
count, size, count*size, stride, cycles*4, wr_req, size * count * 8 * 1000 / (cycles * 4))
assert status & 0x300 == 0
async def dma_cpl_buf_test(tb, dev, addr, mask, size, stride, count, stall):
dev_pf0_bar0 = dev.bar_window[0]
rd_req = await dev_pf0_bar0.read_dword(0x000020)
rd_cpl = await dev_pf0_bar0.read_dword(0x000024)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001080, addr & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001084, (addr >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001088, 0)
await dev_pf0_bar0.write_dword(0x00108c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001090, mask)
await dev_pf0_bar0.write_dword(0x001094, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001098, stride)
await dev_pf0_bar0.write_dword(0x00109c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0010c0, 0)
await dev_pf0_bar0.write_dword(0x0010c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0010c8, 0)
await dev_pf0_bar0.write_dword(0x0010cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0010d0, mask)
await dev_pf0_bar0.write_dword(0x0010d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0010d8, stride)
await dev_pf0_bar0.write_dword(0x0010dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001008, 0)
await dev_pf0_bar0.write_dword(0x00100c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001010, size)
# block count
await dev_pf0_bar0.write_dword(0x001018, count)
await dev_pf0_bar0.write_dword(0x00101c, 0)
if stall:
# stall RX
await dev_pf0_bar0.write_dword(0x000040, stall)
# start
await dev_pf0_bar0.write_dword(0x001000, 1)
# wait for stall
if stall:
for k in range(stall):
await RisingEdge(tb.dut.clk)
for k in range(100):
await Timer(1000, 'ns')
run = await dev_pf0_bar0.read_dword(0x001000)
status = await dev_pf0_bar0.read_dword(0x000000)
if run == 0 and status & 0x300 == 0:
break
if run != 0:
tb.log.warning("Operation timed out")
if status & 0x300 != 0:
tb.log.warning("DMA engine busy")
cycles = await dev_pf0_bar0.read_dword(0x001008)
rd_req = await dev_pf0_bar0.read_dword(0x000020) - rd_req
rd_cpl = await dev_pf0_bar0.read_dword(0x000024) - rd_cpl
tb.log.info("read %d x %d B (total %d B %d CPLD, stride %d) in %d ns (%d req %d cpl) %d Mbps",
count, size, count*size, count*((size+15)//16), stride, cycles*4, rd_req, rd_cpl, size * count * 8 * 1000 / (cycles * 4))
assert status & 0x300 == 0
@cocotb.test()
async def run_test(dut):
@ -350,6 +555,8 @@ async def run_test(dut):
await Timer(2000, 'ns')
# read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000118)
tb.log.info("Status: 0x%x", val)
assert val == 0x800000AA
@ -364,6 +571,8 @@ async def run_test(dut):
await Timer(2000, 'ns')
# read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000218)
tb.log.info("Status: 0x%x", val)
assert val == 0x80000055
@ -384,6 +593,8 @@ async def run_test(dut):
await Timer(2000, 'ns')
# read status
status = await dev_pf0_bar0.read_dword(0x000000)
tb.log.info("DMA Status: 0x%x", status)
val = await dev_pf0_bar0.read_dword(0x000218)
tb.log.info("Status: 0x%x", val)
assert val == 0x800000AA
@ -394,112 +605,66 @@ async def run_test(dut):
tb.log.info("Test DMA block operations")
# disable interrupts
await dev_pf0_bar0.write_dword(0x000008, 0)
region_len = 0x2000
src_offset = 0x0000
dest_offset = 0x4000
block_size = 256
block_stride = block_size
block_count = 32
# write packet data
mem[src_offset:src_offset+region_len] = bytearray([x % 256 for x in range(region_len)])
# enable DMA
await dev_pf0_bar0.write_dword(0x000000, 1)
# disable interrupts
await dev_pf0_bar0.write_dword(0x000008, 0)
# configure operation (read)
# DMA base address
await dev_pf0_bar0.write_dword(0x001080, (mem_base+src_offset) & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001084, (mem_base+src_offset >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001088, 0)
await dev_pf0_bar0.write_dword(0x00108c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001090, region_len-1)
await dev_pf0_bar0.write_dword(0x001094, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001098, block_stride)
await dev_pf0_bar0.write_dword(0x00109c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0010c0, 0)
await dev_pf0_bar0.write_dword(0x0010c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0010c8, 0)
await dev_pf0_bar0.write_dword(0x0010cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0010d0, region_len-1)
await dev_pf0_bar0.write_dword(0x0010d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0010d8, block_stride)
await dev_pf0_bar0.write_dword(0x0010dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001008, 0)
await dev_pf0_bar0.write_dword(0x00100c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001010, block_size)
# block count
await dev_pf0_bar0.write_dword(0x001018, block_count)
await dev_pf0_bar0.write_dword(0x00101c, 0)
# start
await dev_pf0_bar0.write_dword(0x001000, 1)
for k in range(10):
cnt = await dev_pf0_bar0.read_dword(0x001018)
await Timer(1000, 'ns')
if cnt == 0:
break
# configure operation (write)
# DMA base address
await dev_pf0_bar0.write_dword(0x001180, (mem_base+dest_offset) & 0xffffffff)
await dev_pf0_bar0.write_dword(0x001184, (mem_base+dest_offset >> 32) & 0xffffffff)
# DMA offset address
await dev_pf0_bar0.write_dword(0x001188, 0)
await dev_pf0_bar0.write_dword(0x00118c, 0)
# DMA offset mask
await dev_pf0_bar0.write_dword(0x001190, region_len-1)
await dev_pf0_bar0.write_dword(0x001194, 0)
# DMA stride
await dev_pf0_bar0.write_dword(0x001198, block_stride)
await dev_pf0_bar0.write_dword(0x00119c, 0)
# RAM base address
await dev_pf0_bar0.write_dword(0x0011c0, 0)
await dev_pf0_bar0.write_dword(0x0011c4, 0)
# RAM offset address
await dev_pf0_bar0.write_dword(0x0011c8, 0)
await dev_pf0_bar0.write_dword(0x0011cc, 0)
# RAM offset mask
await dev_pf0_bar0.write_dword(0x0011d0, region_len-1)
await dev_pf0_bar0.write_dword(0x0011d4, 0)
# RAM stride
await dev_pf0_bar0.write_dword(0x0011d8, block_stride)
await dev_pf0_bar0.write_dword(0x0011dc, 0)
# clear cycle count
await dev_pf0_bar0.write_dword(0x001108, 0)
await dev_pf0_bar0.write_dword(0x00110c, 0)
# block length
await dev_pf0_bar0.write_dword(0x001110, block_size)
# block count
await dev_pf0_bar0.write_dword(0x001118, block_count)
await dev_pf0_bar0.write_dword(0x00111c, 0)
# start
await dev_pf0_bar0.write_dword(0x001100, 1)
for k in range(10):
cnt = await dev_pf0_bar0.read_dword(0x001118)
await Timer(1000, 'ns')
if cnt == 0:
break
await Timer(2000, 'ns')
await dma_block_read_bench(tb, dev, mem_base+src_offset, region_len-1, 256, 256, 32)
await dma_block_write_bench(tb, dev, mem_base+dest_offset, region_len-1, 256, 256, 32)
tb.log.info("%s", mem.hexdump_str(dest_offset, region_len))
assert mem[src_offset:src_offset+region_len] == mem[dest_offset:dest_offset+region_len]
tb.log.info("Test RX completion buffer (CPLH, 8)")
tb.rc.split_on_all_rcb = True
size = 8
stride = size
for count in range(32, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 2000)
tb.log.info("Test RX completion buffer (CPLH, 8+64)")
size = 8+64
stride = 0
for count in range(8, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000)
tb.log.info("Test RX completion buffer (CPLH, 8+128+8)")
size = 8+128+8
stride = 0
for count in range(8, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base+128-8, region_len-1, size, stride, count, 2000)
tb.rc.split_on_all_rcb = False
tb.log.info("Test RX completion buffer (CPLD)")
size = 512
stride = size
for count in range(8, 256+1, 8):
await dma_cpl_buf_test(tb, dev, mem_base, region_len-1, size, stride, count, 4000)
tb.log.info("Perform block reads")
count = 100
for size in [2**x for x in range(14)]:
stride = size
await dma_block_read_bench(tb, dev, mem_base, region_len-1, size, stride, count)
tb.log.info("Perform block writes")
count = 100
for size in [2**x for x in range(14)]:
stride = size
await dma_block_write_bench(tb, dev, mem_base, region_len-1, size, stride, count)
await RisingEdge(dut.clk)
await RisingEdge(dut.clk)
@ -566,8 +731,8 @@ def test_example_core_pcie_us(request, axis_pcie_data_width, straddle):
parameters['IMM_WIDTH'] = 32
parameters['READ_OP_TABLE_SIZE'] = parameters['PCIE_TAG_COUNT']
parameters['READ_TX_LIMIT'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1)
parameters['READ_CPLH_FC_LIMIT'] = 64 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 128
parameters['READ_CPLD_FC_LIMIT'] = 992 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 2048
parameters['READ_CPLH_FC_LIMIT'] = 64 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 256
parameters['READ_CPLD_FC_LIMIT'] = 1024-64 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 2048-256
parameters['WRITE_OP_TABLE_SIZE'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1)
parameters['WRITE_TX_LIMIT'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1)
parameters['BAR0_APERTURE'] = 24

View File

@ -56,7 +56,7 @@ module fpga (
parameter AXIS_PCIE_DATA_WIDTH = 512;
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32);
parameter AXIS_PCIE_RC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 75 : 161;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 60 : 137;
parameter AXIS_PCIE_RQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 62 : 137;
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183;
parameter AXIS_PCIE_CC_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 33 : 81;
parameter RC_STRADDLE = AXIS_PCIE_DATA_WIDTH >= 256;

View File

@ -161,8 +161,8 @@ example_core_pcie_us #(
.PCIE_TAG_COUNT(PCIE_TAG_COUNT),
.READ_OP_TABLE_SIZE(PCIE_TAG_COUNT),
.READ_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.READ_CPLH_FC_LIMIT(128),
.READ_CPLD_FC_LIMIT(2048),
.READ_CPLH_FC_LIMIT(256),
.READ_CPLD_FC_LIMIT(2048-256),
.WRITE_OP_TABLE_SIZE(2**(RQ_SEQ_NUM_WIDTH-1)),
.WRITE_TX_LIMIT(2**(RQ_SEQ_NUM_WIDTH-1)),
.BAR0_APERTURE(BAR0_APERTURE),
@ -265,8 +265,7 @@ example_core_pcie_us_inst (
*/
.cfg_max_read_req(cfg_max_read_req),
.cfg_max_payload(cfg_max_payload),
// .cfg_rcb_status(cfg_rcb_status),
.cfg_rcb_status(1'b1), // force RCB 128 due to insufficient CPLH limit in US+ PCIe HIP
.cfg_rcb_status(cfg_rcb_status),
/*
* Status

View File

@ -54,7 +54,6 @@ export PARAM_AXIS_PCIE_RQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_
export PARAM_AXIS_PCIE_RC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),75,161)
export PARAM_AXIS_PCIE_CQ_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),88,183)
export PARAM_AXIS_PCIE_CC_USER_WIDTH := $(if $(filter-out 512,$(PARAM_AXIS_PCIE_DATA_WIDTH)),33,81)
export PARAM_RQ_SEQ_NUM_WIDTH := 6
ifeq ($(SIM), icarus)
PLUSARGS += -fst

View File

@ -396,7 +396,6 @@ def test_fpga_core(request):
parameters['AXIS_PCIE_RC_USER_WIDTH'] = 75 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 161
parameters['AXIS_PCIE_CQ_USER_WIDTH'] = 88 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 183
parameters['AXIS_PCIE_CC_USER_WIDTH'] = 33 if parameters['AXIS_PCIE_DATA_WIDTH'] < 512 else 81
parameters['RQ_SEQ_NUM_WIDTH'] = 6
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -422,13 +422,13 @@ reg [OP_TAG_WIDTH+1-1:0] active_op_count_reg = 0;
reg inc_active_op;
reg dec_active_op;
reg [CL_CPLH_FC_LIMIT+1-1:0] active_cplh_fc_count_reg = 0;
reg active_cplh_fc_av_reg = 1'b1;
reg [CL_CPLH_FC_LIMIT+1-1:0] active_cplh_fc_count_reg = 0, active_cplh_fc_count_next;
reg active_cplh_fc_av_reg = 1'b1, active_cplh_fc_av_next;
reg [6:0] inc_active_cplh_fc_count;
reg [6:0] dec_active_cplh_fc_count;
reg [CL_CPLD_FC_LIMIT+1-1:0] active_cpld_fc_count_reg = 0;
reg active_cpld_fc_av_reg = 1'b1;
reg [CL_CPLD_FC_LIMIT+1-1:0] active_cpld_fc_count_reg = 0, active_cpld_fc_count_next;
reg active_cpld_fc_av_reg = 1'b1, active_cpld_fc_av_next;
reg [8:0] inc_active_cpld_fc_count;
reg [8:0] dec_active_cpld_fc_count;
@ -1382,6 +1382,12 @@ always @* begin
end
active_tx_count_av_next = active_tx_count_next < TX_LIMIT;
active_cplh_fc_count_next <= active_cplh_fc_count_reg + inc_active_cplh_fc_count - dec_active_cplh_fc_count;
active_cplh_fc_av_next <= !CPLH_FC_LIMIT || active_cplh_fc_count_next < CPLH_FC_LIMIT;
active_cpld_fc_count_next <= active_cpld_fc_count_reg + inc_active_cpld_fc_count - dec_active_cpld_fc_count;
active_cpld_fc_av_next <= !CPLD_FC_LIMIT || active_cpld_fc_count_next < CPLD_FC_LIMIT;
end
always @(posedge clk) begin
@ -1501,11 +1507,11 @@ always @(posedge clk) begin
active_tag_count_reg <= active_tag_count_reg + inc_active_tag - dec_active_tag;
active_op_count_reg <= active_op_count_reg + inc_active_op - dec_active_op;
active_cplh_fc_count_reg <= active_cplh_fc_count_reg + inc_active_cplh_fc_count - dec_active_cplh_fc_count;
active_cplh_fc_av_reg <= !CPLH_FC_LIMIT || active_cplh_fc_count_reg < CPLH_FC_LIMIT;
active_cplh_fc_count_reg <= active_cplh_fc_count_next;
active_cplh_fc_av_reg <= active_cplh_fc_av_next;
active_cpld_fc_count_reg <= active_cpld_fc_count_reg + inc_active_cpld_fc_count - dec_active_cpld_fc_count;
active_cpld_fc_av_reg <= !CPLD_FC_LIMIT || active_cpld_fc_count_reg < CPLD_FC_LIMIT;
active_cpld_fc_count_reg <= active_cpld_fc_count_next;
active_cpld_fc_av_reg <= active_cpld_fc_av_next;
pcie_tag_table_start_ptr_reg <= pcie_tag_table_start_ptr_next;
pcie_tag_table_start_ram_sel_reg <= pcie_tag_table_start_ram_sel_next;

View File

@ -347,7 +347,7 @@ always @* begin
// compute mux settings
for (port = 0; port < PORTS; port = port + 1) begin
port_seg_valid[port] = pause[port] ? 0 : {2{fifo_ctrl_tlp_valid[port]}} >> fifo_ctrl_seg_offset[port];
port_seg_valid[port] = {2{fifo_ctrl_tlp_valid[port]}} >> fifo_ctrl_seg_offset[port];
port_seg_eop[port] = {2{fifo_ctrl_tlp_eop[port]}} >> fifo_ctrl_seg_offset[port];
end
@ -383,7 +383,7 @@ always @* begin
port_cyc = cur_port;
seg_offset_cyc = port_seg_offset_cyc[cur_port];
seg_count_cyc = port_seg_count_cyc[cur_port];
if (port_seg_valid[cur_port][0]) begin
if (!pause[cur_port] && port_seg_valid[cur_port][0]) begin
// set frame
frame_cyc = 1;
sel_tlp_seq_valid_cyc[OUT_TLP_SEG_COUNT*cur_port+seg] = 1'b1;

View File

@ -36,10 +36,7 @@ export PARAM_IRQ_INDEX_WIDTH := 11
export PARAM_AXIL_DATA_WIDTH := 32
export PARAM_AXIL_ADDR_WIDTH := $(shell expr $(PARAM_IRQ_INDEX_WIDTH) + 5 )
export PARAM_AXIL_STRB_WIDTH := $(shell expr $(PARAM_AXIL_DATA_WIDTH) / 8 )
export PARAM_TLP_DATA_WIDTH := 64
export PARAM_TLP_STRB_WIDTH := $(shell expr $(PARAM_TLP_DATA_WIDTH) / 32 )
export PARAM_TLP_HDR_WIDTH := 128
export PARAM_TLP_SEG_COUNT := 1
export PARAM_TLP_FORCE_64_BIT_ADDR := 0
ifeq ($(SIM), icarus)

View File

@ -319,8 +319,7 @@ rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl'))
@pytest.mark.parametrize("axil_data_width", [32, 64])
@pytest.mark.parametrize("pcie_data_width", [64, 128])
def test_pcie_msix(request, pcie_data_width, axil_data_width):
def test_pcie_msix(request, axil_data_width):
dut = "pcie_msix"
module = os.path.splitext(os.path.basename(__file__))[0]
toplevel = dut
@ -335,10 +334,7 @@ def test_pcie_msix(request, pcie_data_width, axil_data_width):
parameters['AXIL_DATA_WIDTH'] = axil_data_width
parameters['AXIL_ADDR_WIDTH'] = parameters['IRQ_INDEX_WIDTH']+5
parameters['AXIL_STRB_WIDTH'] = (axil_data_width // 8)
parameters['TLP_DATA_WIDTH'] = pcie_data_width
parameters['TLP_STRB_WIDTH'] = pcie_data_width // 32
parameters['TLP_HDR_WIDTH'] = 128
parameters['TLP_SEG_COUNT'] = 1
parameters['TLP_FORCE_64_BIT_ADDR'] = 0
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}

View File

@ -270,9 +270,6 @@ def test_pcie_us_axi_dma_wr(request, axis_pcie_data_width, pcie_offset):
parameters['AXI_ID_WIDTH'] = 8
parameters['AXI_MAX_BURST_LEN'] = 256
parameters['PCIE_ADDR_WIDTH'] = 64
parameters['PCIE_TAG_COUNT'] = 64 if parameters['AXIS_PCIE_RQ_USER_WIDTH'] == 60 else 256
parameters['PCIE_TAG_WIDTH'] = (parameters['PCIE_TAG_COUNT']-1).bit_length()
parameters['PCIE_EXT_TAG_ENABLE'] = int(parameters['PCIE_TAG_COUNT'] > 32)
parameters['LEN_WIDTH'] = 20
parameters['TAG_WIDTH'] = 8
parameters['OP_TABLE_SIZE'] = 2**(parameters['RQ_SEQ_NUM_WIDTH']-1)

View File

@ -18,7 +18,7 @@ deps =
cocotb-bus == 0.2.1
cocotb-test == 0.2.4
cocotbext-axi == 0.1.24
cocotbext-pcie == 0.2.12
cocotbext-pcie == 0.2.14
jinja2 == 3.1.2
commands =