From b1b82a3f2bebf461968e67778804d6dbf6e154a3 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Fri, 29 Jul 2022 17:16:05 -0700 Subject: [PATCH 1/7] Add pause inputs to TLP mux modules Signed-off-by: Alex Forencich --- example/common/rtl/example_core_pcie.v | 5 +++++ rtl/pcie_ptile_if_tx.v | 5 +++++ rtl/pcie_s10_if_tx.v | 5 +++++ rtl/pcie_tlp_fifo_mux.v | 7 ++++++- rtl/pcie_tlp_fifo_mux_wrap.py | 12 ++++++++++++ rtl/pcie_tlp_mux.v | 7 ++++++- rtl/pcie_tlp_mux_wrap.py | 12 ++++++++++++ tb/pcie_tlp_fifo_mux/test_pcie_tlp_fifo_mux.py | 3 +++ tb/pcie_tlp_mux/test_pcie_tlp_mux.py | 3 +++ 9 files changed, 57 insertions(+), 2 deletions(-) diff --git a/example/common/rtl/example_core_pcie.v b/example/common/rtl/example_core_pcie.v index 718996bc0..8f4360ce4 100644 --- a/example/common/rtl/example_core_pcie.v +++ b/example/common/rtl/example_core_pcie.v @@ -450,6 +450,11 @@ pcie_tlp_mux_inst ( .out_tlp_eop(tx_cpl_tlp_eop), .out_tlp_ready(tx_cpl_tlp_ready), + /* + * Control + */ + .pause(0), + /* * Status */ diff --git a/rtl/pcie_ptile_if_tx.v b/rtl/pcie_ptile_if_tx.v index 48bce3761..a0b8d0320 100644 --- a/rtl/pcie_ptile_if_tx.v +++ b/rtl/pcie_ptile_if_tx.v @@ -297,6 +297,11 @@ pcie_tlp_fifo_mux_inst ( .out_tlp_eop(mux_out_tlp_eop), .out_tlp_ready(mux_out_tlp_ready_cmb), + /* + * Control + */ + .pause(0), + /* * Status */ diff --git a/rtl/pcie_s10_if_tx.v b/rtl/pcie_s10_if_tx.v index d57d42bea..7057a8694 100644 --- a/rtl/pcie_s10_if_tx.v +++ b/rtl/pcie_s10_if_tx.v @@ -320,6 +320,11 @@ pcie_tlp_fifo_mux_inst ( .out_tlp_eop(mux_out_tlp_eop), .out_tlp_ready(mux_out_tlp_ready), + /* + * Control + */ + .pause(0), + /* * Status */ diff --git a/rtl/pcie_tlp_fifo_mux.v b/rtl/pcie_tlp_fifo_mux.v index 36558ec23..fd7583344 100644 --- a/rtl/pcie_tlp_fifo_mux.v +++ b/rtl/pcie_tlp_fifo_mux.v @@ -90,6 +90,11 @@ module pcie_tlp_fifo_mux # output wire [OUT_TLP_SEG_COUNT-1:0] out_tlp_eop, input wire out_tlp_ready, + /* + * Control + */ + input wire [PORTS-1:0] pause, + /* * Status */ @@ -336,7 +341,7 @@ always @* begin end end for (port = 0; port < PORTS; port = port + 1) begin - if (port_seg_valid[cur_port][0] && !frame_cyc) begin + if (port_seg_valid[cur_port][0] && !pause[cur_port] && !frame_cyc) begin // select port, set frame frame_cyc = 1; port_cyc = cur_port; diff --git a/rtl/pcie_tlp_fifo_mux_wrap.py b/rtl/pcie_tlp_fifo_mux_wrap.py index 09c9512c7..45d1e23fc 100755 --- a/rtl/pcie_tlp_fifo_mux_wrap.py +++ b/rtl/pcie_tlp_fifo_mux_wrap.py @@ -126,6 +126,13 @@ module {{name}} # output wire [OUT_TLP_SEG_COUNT-1:0] out_tlp_eop, input wire out_tlp_ready, + /* + * Control + */ +{%- for p in range(n) %} + input wire in{{'%02d'%p}}_pause, +{%- endfor %} + /* * Status */ @@ -184,6 +191,11 @@ pcie_tlp_fifo_mux_inst ( .out_tlp_eop(out_tlp_eop), .out_tlp_ready(out_tlp_ready), + /* + * Control + */ + .pause({ {% for p in range(n-1,-1,-1) %}in{{'%02d'%p}}_pause{% if not loop.last %}, {% endif %}{% endfor %} }), + /* * Status */ diff --git a/rtl/pcie_tlp_mux.v b/rtl/pcie_tlp_mux.v index d6562c3c7..ce10120f1 100644 --- a/rtl/pcie_tlp_mux.v +++ b/rtl/pcie_tlp_mux.v @@ -84,6 +84,11 @@ module pcie_tlp_mux # output wire [TLP_SEG_COUNT-1:0] out_tlp_eop, input wire out_tlp_ready, + /* + * Control + */ + input wire [PORTS-1:0] pause, + /* * Status */ @@ -267,7 +272,7 @@ always @* begin end end for (port = 0; port < PORTS; port = port + 1) begin - if (port_seg_valid[cur_port][0] && !frame_cyc) begin + if (port_seg_valid[cur_port][0] && !pause[cur_port] && !frame_cyc) begin // select port, set frame frame_cyc = 1; port_cyc = cur_port; diff --git a/rtl/pcie_tlp_mux_wrap.py b/rtl/pcie_tlp_mux_wrap.py index 1f4d5c7fa..fd0a650e0 100755 --- a/rtl/pcie_tlp_mux_wrap.py +++ b/rtl/pcie_tlp_mux_wrap.py @@ -120,6 +120,13 @@ module {{name}} # output wire [TLP_SEG_COUNT-1:0] out_tlp_eop, input wire out_tlp_ready, + /* + * Control + */ +{%- for p in range(n) %} + input wire in{{'%02d'%p}}_pause, +{%- endfor %} + /* * Status */ @@ -173,6 +180,11 @@ pcie_tlp_mux_inst ( .out_tlp_eop(out_tlp_eop), .out_tlp_ready(out_tlp_ready), + /* + * Control + */ + .pause({ {% for p in range(n-1,-1,-1) %}in{{'%02d'%p}}_pause{% if not loop.last %}, {% endif %}{% endfor %} }), + /* * Status */ diff --git a/tb/pcie_tlp_fifo_mux/test_pcie_tlp_fifo_mux.py b/tb/pcie_tlp_fifo_mux/test_pcie_tlp_fifo_mux.py index 2005bfd75..ff91b453e 100644 --- a/tb/pcie_tlp_fifo_mux/test_pcie_tlp_fifo_mux.py +++ b/tb/pcie_tlp_fifo_mux/test_pcie_tlp_fifo_mux.py @@ -66,6 +66,9 @@ class TB(object): self.source = [PcieIfSource(PcieIfBus.from_prefix(dut, f"in{k:02d}_tlp"), dut.clk, dut.rst) for k in range(ports)] self.sink = PcieIfSink(PcieIfBus.from_prefix(dut, "out_tlp"), dut.clk, dut.rst) + for k in range(ports): + getattr(dut, f"in{k:02d}_pause").setimmediatevalue(0) + def set_idle_generator(self, generator=None): if generator: for source in self.source: diff --git a/tb/pcie_tlp_mux/test_pcie_tlp_mux.py b/tb/pcie_tlp_mux/test_pcie_tlp_mux.py index 194c3d677..d04d0a6a3 100644 --- a/tb/pcie_tlp_mux/test_pcie_tlp_mux.py +++ b/tb/pcie_tlp_mux/test_pcie_tlp_mux.py @@ -66,6 +66,9 @@ class TB(object): self.source = [PcieIfSource(PcieIfBus.from_prefix(dut, f"in{k:02d}_tlp"), dut.clk, dut.rst) for k in range(ports)] self.sink = PcieIfSink(PcieIfBus.from_prefix(dut, "out_tlp"), dut.clk, dut.rst) + for k in range(ports): + getattr(dut, f"in{k:02d}_pause").setimmediatevalue(0) + def set_idle_generator(self, generator=None): if generator: for source in self.source: From 1dfdd8b0e3f7cbeed054cb1c1aac176df37181e8 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Sun, 31 Jul 2022 17:24:03 -0700 Subject: [PATCH 2/7] Timing optimization Signed-off-by: Alex Forencich --- rtl/pcie_tlp_fifo_mux.v | 11 +++++++---- rtl/pcie_tlp_mux.v | 11 +++++++---- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/rtl/pcie_tlp_fifo_mux.v b/rtl/pcie_tlp_fifo_mux.v index fd7583344..3c7d63cfc 100644 --- a/rtl/pcie_tlp_fifo_mux.v +++ b/rtl/pcie_tlp_fifo_mux.v @@ -341,13 +341,16 @@ always @* begin end end for (port = 0; port < PORTS; port = port + 1) begin - if (port_seg_valid[cur_port][0] && !pause[cur_port] && !frame_cyc) begin - // select port, set frame - frame_cyc = 1; + if (!frame_cyc) begin + // select port port_cyc = cur_port; seg_offset_cyc = port_seg_offset_cyc[cur_port]; seg_count_cyc = port_seg_count_cyc[cur_port]; - sel_tlp_seq_valid_cyc[OUT_TLP_SEG_COUNT*cur_port+seg] = 1'b1; + if (port_seg_valid[cur_port][0] && !pause[cur_port]) begin + // set frame + frame_cyc = 1; + sel_tlp_seq_valid_cyc[OUT_TLP_SEG_COUNT*cur_port+seg] = 1'b1; + end end // next port if (ARB_LSB_HIGH_PRIORITY) begin diff --git a/rtl/pcie_tlp_mux.v b/rtl/pcie_tlp_mux.v index ce10120f1..c111e8b82 100644 --- a/rtl/pcie_tlp_mux.v +++ b/rtl/pcie_tlp_mux.v @@ -272,12 +272,15 @@ always @* begin end end for (port = 0; port < PORTS; port = port + 1) begin - if (port_seg_valid[cur_port][0] && !pause[cur_port] && !frame_cyc) begin - // select port, set frame - frame_cyc = 1; + if (!frame_cyc) begin + // select port port_cyc = cur_port; seg_offset_cyc = port_seg_offset_next[cur_port]; - sel_tlp_seq_valid_cyc[TLP_SEG_COUNT*cur_port+seg] = 1'b1; + if (port_seg_valid[cur_port][0] && !pause[cur_port]) begin + // set frame + frame_cyc = 1; + sel_tlp_seq_valid_cyc[TLP_SEG_COUNT*cur_port+seg] = 1'b1; + end end // next port if (ARB_LSB_HIGH_PRIORITY) begin From ad5a322ee179a4060a607d54dd93487f8903772e Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Sun, 31 Jul 2022 17:24:43 -0700 Subject: [PATCH 3/7] Add PCIe flow control credit count module Signed-off-by: Alex Forencich --- rtl/pcie_tlp_fc_count.v | 187 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 rtl/pcie_tlp_fc_count.v diff --git a/rtl/pcie_tlp_fc_count.v b/rtl/pcie_tlp_fc_count.v new file mode 100644 index 000000000..565610e15 --- /dev/null +++ b/rtl/pcie_tlp_fc_count.v @@ -0,0 +1,187 @@ +/* + +Copyright (c) 2022 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +// Language: Verilog 2001 + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * PCIe TLP flow control credit counter + */ +module pcie_tlp_fc_count # +( + // TLP header width + parameter TLP_HDR_WIDTH = 128, + // TLP segment count + parameter TLP_SEG_COUNT = 1 +) +( + input wire clk, + input wire rst, + + /* + * TLP monitor + */ + input wire [TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] tlp_hdr, + input wire [TLP_SEG_COUNT-1:0] tlp_valid, + input wire [TLP_SEG_COUNT-1:0] tlp_sop, + input wire tlp_ready, + + /* + * Flow control count output + */ + output wire [3:0] out_fc_ph, + output wire [8:0] out_fc_pd, + output wire [3:0] out_fc_nph, + output wire [8:0] out_fc_npd, + output wire [3:0] out_fc_cplh, + output wire [8:0] out_fc_cpld +); + +// check configuration +initial begin + if (TLP_HDR_WIDTH != 128) begin + $error("Error: TLP segment header width must be 128 (instance %m)"); + $finish; + end +end + +localparam [1:0] + FC_TYPE_P = 2'b00, + FC_TYPE_NP = 2'b01, + FC_TYPE_CPL = 2'b10; + +function [1:0] tlp_fc_type; + input [7:0] fmt_type; + case (fmt_type) + 8'b000_00000: tlp_fc_type = FC_TYPE_NP; // MEM_READ + 8'b001_00000: tlp_fc_type = FC_TYPE_NP; // MEM_READ_64 + 8'b000_00001: tlp_fc_type = FC_TYPE_NP; // MEM_READ_LOCKED + 8'b001_00001: tlp_fc_type = FC_TYPE_NP; // MEM_READ_LOCKED_64 + 8'b010_00000: tlp_fc_type = FC_TYPE_P; // MEM_WRITE + 8'b011_00000: tlp_fc_type = FC_TYPE_P; // MEM_WRITE_64 + 8'b000_00010: tlp_fc_type = FC_TYPE_NP; // IO_READ + 8'b010_00010: tlp_fc_type = FC_TYPE_NP; // IO_WRITE + 8'b000_00100: tlp_fc_type = FC_TYPE_NP; // CFG_READ_0 + 8'b010_00100: tlp_fc_type = FC_TYPE_NP; // CFG_WRITE_0 + 8'b000_00101: tlp_fc_type = FC_TYPE_NP; // CFG_READ_1 + 8'b010_00101: tlp_fc_type = FC_TYPE_NP; // CFG_WRITE_1 + 8'b001_10000: tlp_fc_type = FC_TYPE_P; // MSG_TO_RC + 8'b001_10001: tlp_fc_type = FC_TYPE_P; // MSG_ADDR + 8'b001_10010: tlp_fc_type = FC_TYPE_P; // MSG_ID + 8'b001_10011: tlp_fc_type = FC_TYPE_P; // MSG_BCAST + 8'b001_10100: tlp_fc_type = FC_TYPE_P; // MSG_LOCAL + 8'b001_10101: tlp_fc_type = FC_TYPE_P; // MSG_GATHER + 8'b011_10000: tlp_fc_type = FC_TYPE_P; // MSG_DATA_TO_RC + 8'b011_10001: tlp_fc_type = FC_TYPE_P; // MSG_DATA_ADDR + 8'b011_10010: tlp_fc_type = FC_TYPE_P; // MSG_DATA_ID + 8'b011_10011: tlp_fc_type = FC_TYPE_P; // MSG_DATA_BCAST + 8'b011_10100: tlp_fc_type = FC_TYPE_P; // MSG_DATA_LOCAL + 8'b011_10101: tlp_fc_type = FC_TYPE_P; // MSG_DATA_GATHER + 8'b000_01010: tlp_fc_type = FC_TYPE_CPL; // CPL + 8'b010_01010: tlp_fc_type = FC_TYPE_CPL; // CPL_DATA + 8'b000_01011: tlp_fc_type = FC_TYPE_CPL; // CPL_LOCKED + 8'b010_01011: tlp_fc_type = FC_TYPE_CPL; // CPL_LOCKED_DATA + 8'b010_01100: tlp_fc_type = FC_TYPE_NP; // FETCH_ADD + 8'b011_01100: tlp_fc_type = FC_TYPE_NP; // FETCH_ADD_64 + 8'b010_01101: tlp_fc_type = FC_TYPE_NP; // SWAP + 8'b011_01101: tlp_fc_type = FC_TYPE_NP; // SWAP_64 + 8'b010_01110: tlp_fc_type = FC_TYPE_NP; // CAS + 8'b011_01110: tlp_fc_type = FC_TYPE_NP; // CAS_64 + default: tlp_fc_type = 2'bxx; + endcase +endfunction + +reg [1:0] seg_fc_type; +reg [11:0] seg_fc_d; + +reg [3:0] fc_ph_reg = 0, fc_ph_next; +reg [8:0] fc_pd_reg = 0, fc_pd_next; +reg [3:0] fc_nph_reg = 0, fc_nph_next; +reg [8:0] fc_npd_reg = 0, fc_npd_next; +reg [3:0] fc_cplh_reg = 0, fc_cplh_next; +reg [8:0] fc_cpld_reg = 0, fc_cpld_next; + +assign out_fc_ph = fc_ph_reg; +assign out_fc_pd = fc_pd_reg; +assign out_fc_nph = fc_nph_reg; +assign out_fc_npd = fc_npd_reg; +assign out_fc_cplh = fc_cplh_reg; +assign out_fc_cpld = fc_cpld_reg; + +integer seg; + +always @* begin + fc_ph_next = 0; + fc_pd_next = 0; + fc_nph_next = 0; + fc_npd_next = 0; + fc_cplh_next = 0; + fc_cpld_next = 0; + + for (seg = 0; seg < TLP_SEG_COUNT; seg = seg + 1) begin + seg_fc_type = tlp_fc_type(tlp_hdr[seg*TLP_HDR_WIDTH+120 +: 8]); + seg_fc_d = 0; + if (tlp_hdr[seg*TLP_HDR_WIDTH+126]) begin + seg_fc_d = ({tlp_hdr[seg*TLP_HDR_WIDTH+96 +: 9] == 0, tlp_hdr[seg*TLP_HDR_WIDTH+96 +: 9]}+3) >> 2; + end + + if (tlp_sop[seg] && tlp_valid[seg] && tlp_ready) begin + if (seg_fc_type == FC_TYPE_P) begin + fc_ph_next = fc_ph_next + 1; + fc_pd_next = fc_pd_next + seg_fc_d; + end else if (seg_fc_type == FC_TYPE_NP) begin + fc_nph_next = fc_nph_next + 1; + fc_npd_next = fc_npd_next + seg_fc_d; + end else if (seg_fc_type == FC_TYPE_CPL) begin + fc_cplh_next = fc_cplh_next + 1; + fc_cpld_next = fc_cpld_next + seg_fc_d; + end + end + end +end + +always @(posedge clk) begin + fc_ph_reg <= fc_ph_next; + fc_pd_reg <= fc_pd_next; + fc_nph_reg <= fc_nph_next; + fc_npd_reg <= fc_npd_next; + fc_cplh_reg <= fc_cplh_next; + fc_cpld_reg <= fc_cpld_next; + + if (rst) begin + fc_ph_reg <= 0; + fc_pd_reg <= 0; + fc_nph_reg <= 0; + fc_npd_reg <= 0; + fc_cplh_reg <= 0; + fc_cpld_reg <= 0; + end +end + +endmodule + +`resetall From 9c434687a8c3d77ca693df84981634213fce0d3c Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Sun, 31 Jul 2022 17:35:07 -0700 Subject: [PATCH 4/7] Add flow control credit counter to TLP FIFO MUX module Signed-off-by: Alex Forencich --- example/520N_MX/fpga/fpga/Makefile | 1 + example/520N_MX/fpga/tb/fpga_core/Makefile | 1 + .../fpga/tb/fpga_core/test_fpga_core.py | 1 + example/DE10_Agilex/fpga/fpga_24AR0/Makefile | 1 + example/DE10_Agilex/fpga/fpga_24B/Makefile | 1 + .../DE10_Agilex/fpga/tb/fpga_core/Makefile | 1 + .../fpga/tb/fpga_core/test_fpga_core.py | 1 + example/S10DX_DK/fpga/fpga/Makefile | 1 + example/S10DX_DK/fpga/tb/fpga_core/Makefile | 1 + .../fpga/tb/fpga_core/test_fpga_core.py | 1 + example/S10MX_DK/fpga/fpga_1sm21b/Makefile | 1 + example/S10MX_DK/fpga/fpga_1sm21c/Makefile | 1 + example/S10MX_DK/fpga/tb/fpga_core/Makefile | 1 + .../fpga/tb/fpga_core/test_fpga_core.py | 1 + .../tb/example_core_pcie_ptile/Makefile | 1 + .../test_example_core_pcie_ptile.py | 1 + .../common/tb/example_core_pcie_s10/Makefile | 1 + .../test_example_core_pcie_s10.py | 1 + rtl/pcie_tlp_fifo_mux.v | 37 +++++++++++++++++++ tb/pcie_ptile_if/Makefile | 1 + tb/pcie_ptile_if/test_pcie_ptile_if.py | 1 + tb/pcie_ptile_if_tx/Makefile | 1 + tb/pcie_ptile_if_tx/test_pcie_ptile_if_tx.py | 1 + tb/pcie_s10_if/Makefile | 1 + tb/pcie_s10_if/test_pcie_s10_if.py | 1 + tb/pcie_s10_if_tx/Makefile | 1 + tb/pcie_s10_if_tx/test_pcie_s10_if_tx.py | 1 + tb/pcie_tlp_fifo_mux/Makefile | 1 + .../test_pcie_tlp_fifo_mux.py | 1 + 29 files changed, 65 insertions(+) diff --git a/example/520N_MX/fpga/fpga/Makefile b/example/520N_MX/fpga/fpga/Makefile index 932493404..439e95286 100644 --- a/example/520N_MX/fpga/fpga/Makefile +++ b/example/520N_MX/fpga/fpga/Makefile @@ -24,6 +24,7 @@ SYN_FILES += lib/pcie/rtl/pcie_axi_master_wr.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux_bar.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux.v SYN_FILES += lib/pcie/rtl/pcie_tlp_mux.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fc_count.v SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo.v SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_raw.v SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_mux.v diff --git a/example/520N_MX/fpga/tb/fpga_core/Makefile b/example/520N_MX/fpga/tb/fpga_core/Makefile index c2d2fd018..bc24999c8 100644 --- a/example/520N_MX/fpga/tb/fpga_core/Makefile +++ b/example/520N_MX/fpga/tb/fpga_core/Makefile @@ -45,6 +45,7 @@ VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fc_count.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_mux.v diff --git a/example/520N_MX/fpga/tb/fpga_core/test_fpga_core.py b/example/520N_MX/fpga/tb/fpga_core/test_fpga_core.py index 1947cf502..905f4924e 100644 --- a/example/520N_MX/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/520N_MX/fpga/tb/fpga_core/test_fpga_core.py @@ -436,6 +436,7 @@ def test_fpga_core(request): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fc_count.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_mux.v"), diff --git a/example/DE10_Agilex/fpga/fpga_24AR0/Makefile b/example/DE10_Agilex/fpga/fpga_24AR0/Makefile index a4d8ed336..f9f21c479 100644 --- a/example/DE10_Agilex/fpga/fpga_24AR0/Makefile +++ b/example/DE10_Agilex/fpga/fpga_24AR0/Makefile @@ -24,6 +24,7 @@ SYN_FILES += lib/pcie/rtl/pcie_axi_master_wr.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux_bar.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux.v SYN_FILES += lib/pcie/rtl/pcie_tlp_mux.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fc_count.v SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo.v SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_raw.v SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_mux.v diff --git a/example/DE10_Agilex/fpga/fpga_24B/Makefile b/example/DE10_Agilex/fpga/fpga_24B/Makefile index 5883e11a6..a909d1af8 100644 --- a/example/DE10_Agilex/fpga/fpga_24B/Makefile +++ b/example/DE10_Agilex/fpga/fpga_24B/Makefile @@ -24,6 +24,7 @@ SYN_FILES += lib/pcie/rtl/pcie_axi_master_wr.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux_bar.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux.v SYN_FILES += lib/pcie/rtl/pcie_tlp_mux.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fc_count.v SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo.v SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_raw.v SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_mux.v diff --git a/example/DE10_Agilex/fpga/tb/fpga_core/Makefile b/example/DE10_Agilex/fpga/tb/fpga_core/Makefile index 5a48b7077..50cb10b02 100644 --- a/example/DE10_Agilex/fpga/tb/fpga_core/Makefile +++ b/example/DE10_Agilex/fpga/tb/fpga_core/Makefile @@ -45,6 +45,7 @@ VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fc_count.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_mux.v diff --git a/example/DE10_Agilex/fpga/tb/fpga_core/test_fpga_core.py b/example/DE10_Agilex/fpga/tb/fpga_core/test_fpga_core.py index 364bee7a8..6e3372f7b 100644 --- a/example/DE10_Agilex/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/DE10_Agilex/fpga/tb/fpga_core/test_fpga_core.py @@ -490,6 +490,7 @@ def test_fpga_core(request): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fc_count.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_mux.v"), diff --git a/example/S10DX_DK/fpga/fpga/Makefile b/example/S10DX_DK/fpga/fpga/Makefile index a1a189e71..0f66ae08d 100644 --- a/example/S10DX_DK/fpga/fpga/Makefile +++ b/example/S10DX_DK/fpga/fpga/Makefile @@ -24,6 +24,7 @@ SYN_FILES += lib/pcie/rtl/pcie_axi_master_wr.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux_bar.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux.v SYN_FILES += lib/pcie/rtl/pcie_tlp_mux.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fc_count.v SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo.v SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_raw.v SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_mux.v diff --git a/example/S10DX_DK/fpga/tb/fpga_core/Makefile b/example/S10DX_DK/fpga/tb/fpga_core/Makefile index 5a48b7077..50cb10b02 100644 --- a/example/S10DX_DK/fpga/tb/fpga_core/Makefile +++ b/example/S10DX_DK/fpga/tb/fpga_core/Makefile @@ -45,6 +45,7 @@ VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fc_count.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_mux.v diff --git a/example/S10DX_DK/fpga/tb/fpga_core/test_fpga_core.py b/example/S10DX_DK/fpga/tb/fpga_core/test_fpga_core.py index 364bee7a8..6e3372f7b 100644 --- a/example/S10DX_DK/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/S10DX_DK/fpga/tb/fpga_core/test_fpga_core.py @@ -490,6 +490,7 @@ def test_fpga_core(request): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fc_count.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_mux.v"), diff --git a/example/S10MX_DK/fpga/fpga_1sm21b/Makefile b/example/S10MX_DK/fpga/fpga_1sm21b/Makefile index 415d50ea0..11a0e21fa 100644 --- a/example/S10MX_DK/fpga/fpga_1sm21b/Makefile +++ b/example/S10MX_DK/fpga/fpga_1sm21b/Makefile @@ -24,6 +24,7 @@ SYN_FILES += lib/pcie/rtl/pcie_axi_master_wr.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux_bar.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux.v SYN_FILES += lib/pcie/rtl/pcie_tlp_mux.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fc_count.v SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo.v SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_raw.v SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_mux.v diff --git a/example/S10MX_DK/fpga/fpga_1sm21c/Makefile b/example/S10MX_DK/fpga/fpga_1sm21c/Makefile index d4ac883ad..1666dd64f 100644 --- a/example/S10MX_DK/fpga/fpga_1sm21c/Makefile +++ b/example/S10MX_DK/fpga/fpga_1sm21c/Makefile @@ -24,6 +24,7 @@ SYN_FILES += lib/pcie/rtl/pcie_axi_master_wr.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux_bar.v SYN_FILES += lib/pcie/rtl/pcie_tlp_demux.v SYN_FILES += lib/pcie/rtl/pcie_tlp_mux.v +SYN_FILES += lib/pcie/rtl/pcie_tlp_fc_count.v SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo.v SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_raw.v SYN_FILES += lib/pcie/rtl/pcie_tlp_fifo_mux.v diff --git a/example/S10MX_DK/fpga/tb/fpga_core/Makefile b/example/S10MX_DK/fpga/tb/fpga_core/Makefile index c2d2fd018..bc24999c8 100644 --- a/example/S10MX_DK/fpga/tb/fpga_core/Makefile +++ b/example/S10MX_DK/fpga/tb/fpga_core/Makefile @@ -45,6 +45,7 @@ VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fc_count.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_tlp_fifo_mux.v diff --git a/example/S10MX_DK/fpga/tb/fpga_core/test_fpga_core.py b/example/S10MX_DK/fpga/tb/fpga_core/test_fpga_core.py index 1947cf502..905f4924e 100644 --- a/example/S10MX_DK/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/S10MX_DK/fpga/tb/fpga_core/test_fpga_core.py @@ -436,6 +436,7 @@ def test_fpga_core(request): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fc_count.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_mux.v"), diff --git a/example/common/tb/example_core_pcie_ptile/Makefile b/example/common/tb/example_core_pcie_ptile/Makefile index 56d620676..b3d15d6ef 100644 --- a/example/common/tb/example_core_pcie_ptile/Makefile +++ b/example/common/tb/example_core_pcie_ptile/Makefile @@ -44,6 +44,7 @@ VERILOG_SOURCES += ../../../../rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../../../rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../../../rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../../../rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../../../rtl/pcie_tlp_fc_count.v VERILOG_SOURCES += ../../../../rtl/pcie_tlp_fifo.v VERILOG_SOURCES += ../../../../rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../../../rtl/pcie_tlp_fifo_mux.v diff --git a/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py b/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py index 2383f1032..54ba2f4f0 100644 --- a/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py +++ b/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py @@ -490,6 +490,7 @@ def test_example_core_pcie_ptile(request, data_width): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fc_count.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_mux.v"), diff --git a/example/common/tb/example_core_pcie_s10/Makefile b/example/common/tb/example_core_pcie_s10/Makefile index 28b9411d0..d5a110155 100644 --- a/example/common/tb/example_core_pcie_s10/Makefile +++ b/example/common/tb/example_core_pcie_s10/Makefile @@ -44,6 +44,7 @@ VERILOG_SOURCES += ../../../../rtl/pcie_axi_master_wr.v VERILOG_SOURCES += ../../../../rtl/pcie_tlp_demux_bar.v VERILOG_SOURCES += ../../../../rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../../../rtl/pcie_tlp_mux.v +VERILOG_SOURCES += ../../../../rtl/pcie_tlp_fc_count.v VERILOG_SOURCES += ../../../../rtl/pcie_tlp_fifo.v VERILOG_SOURCES += ../../../../rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../../../rtl/pcie_tlp_fifo_mux.v diff --git a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py index 297573b56..eea7355d7 100644 --- a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py +++ b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py @@ -439,6 +439,7 @@ def test_example_core_pcie_s10(request, data_width, l_tile): os.path.join(pcie_rtl_dir, "pcie_tlp_demux_bar.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_demux.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_mux.v"), + os.path.join(pcie_rtl_dir, "pcie_tlp_fc_count.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_fifo.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(pcie_rtl_dir, "pcie_tlp_fifo_mux.v"), diff --git a/rtl/pcie_tlp_fifo_mux.v b/rtl/pcie_tlp_fifo_mux.v index 3c7d63cfc..b92065584 100644 --- a/rtl/pcie_tlp_fifo_mux.v +++ b/rtl/pcie_tlp_fifo_mux.v @@ -90,6 +90,16 @@ module pcie_tlp_fifo_mux # output wire [OUT_TLP_SEG_COUNT-1:0] out_tlp_eop, input wire out_tlp_ready, + /* + * Flow control count output + */ + output wire [3:0] out_fc_ph, + output wire [8:0] out_fc_pd, + output wire [3:0] out_fc_nph, + output wire [8:0] out_fc_npd, + output wire [3:0] out_fc_cplh, + output wire [8:0] out_fc_cpld, + /* * Control */ @@ -260,6 +270,33 @@ endgenerate assign sel_tlp_seq = {PORTS{out_sel_tlp_seq_reg}}; assign sel_tlp_seq_valid = out_sel_tlp_seq_valid_reg; +pcie_tlp_fc_count #( + .TLP_HDR_WIDTH(TLP_HDR_WIDTH), + .TLP_SEG_COUNT(OUT_TLP_SEG_COUNT) +) +fc_count_inst ( + .clk(clk), + .rst(rst), + + /* + * TLP monitor + */ + .tlp_hdr(out_tlp_hdr_int), + .tlp_valid(out_tlp_valid_int), + .tlp_sop(out_tlp_sop_int), + .tlp_ready(1'b1), + + /* + * Flow control count output + */ + .out_fc_ph(out_fc_ph), + .out_fc_pd(out_fc_pd), + .out_fc_nph(out_fc_nph), + .out_fc_npd(out_fc_npd), + .out_fc_cplh(out_fc_cplh), + .out_fc_cpld(out_fc_cpld) +); + integer port, cur_port, seg, cur_seg; always @* begin diff --git a/tb/pcie_ptile_if/Makefile b/tb/pcie_ptile_if/Makefile index f0a462187..324d3d844 100644 --- a/tb/pcie_ptile_if/Makefile +++ b/tb/pcie_ptile_if/Makefile @@ -34,6 +34,7 @@ VERILOG_SOURCES += ../../rtl/$(DUT)_rx.v VERILOG_SOURCES += ../../rtl/$(DUT)_tx.v VERILOG_SOURCES += ../../rtl/pcie_ptile_cfg.v VERILOG_SOURCES += ../../rtl/pcie_tlp_demux.v +VERILOG_SOURCES += ../../rtl/pcie_tlp_fc_count.v VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo.v VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo_mux.v diff --git a/tb/pcie_ptile_if/test_pcie_ptile_if.py b/tb/pcie_ptile_if/test_pcie_ptile_if.py index 7e88960d1..68a4ad0be 100644 --- a/tb/pcie_ptile_if/test_pcie_ptile_if.py +++ b/tb/pcie_ptile_if/test_pcie_ptile_if.py @@ -614,6 +614,7 @@ def test_pcie_ptile_if(request, data_width): os.path.join(rtl_dir, f"{dut}_tx.v"), os.path.join(rtl_dir, "pcie_ptile_cfg.v"), os.path.join(rtl_dir, "pcie_tlp_demux.v"), + os.path.join(rtl_dir, "pcie_tlp_fc_count.v"), os.path.join(rtl_dir, "pcie_tlp_fifo.v"), os.path.join(rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(rtl_dir, "pcie_tlp_fifo_mux.v"), diff --git a/tb/pcie_ptile_if_tx/Makefile b/tb/pcie_ptile_if_tx/Makefile index 12cdf2b73..160856e0c 100644 --- a/tb/pcie_ptile_if_tx/Makefile +++ b/tb/pcie_ptile_if_tx/Makefile @@ -30,6 +30,7 @@ DUT = pcie_ptile_if_tx TOPLEVEL = $(DUT) MODULE = test_$(DUT) VERILOG_SOURCES += ../../rtl/$(DUT).v +VERILOG_SOURCES += ../../rtl/pcie_tlp_fc_count.v VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo_mux.v diff --git a/tb/pcie_ptile_if_tx/test_pcie_ptile_if_tx.py b/tb/pcie_ptile_if_tx/test_pcie_ptile_if_tx.py index 73bc70d2f..763a7b9d4 100644 --- a/tb/pcie_ptile_if_tx/test_pcie_ptile_if_tx.py +++ b/tb/pcie_ptile_if_tx/test_pcie_ptile_if_tx.py @@ -368,6 +368,7 @@ def test_pcie_ptile_if_tx(request, data_width): verilog_sources = [ os.path.join(rtl_dir, f"{dut}.v"), + os.path.join(rtl_dir, "pcie_tlp_fc_count.v"), os.path.join(rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(rtl_dir, "pcie_tlp_fifo_mux.v"), ] diff --git a/tb/pcie_s10_if/Makefile b/tb/pcie_s10_if/Makefile index e45ac9490..afca88272 100644 --- a/tb/pcie_s10_if/Makefile +++ b/tb/pcie_s10_if/Makefile @@ -35,6 +35,7 @@ VERILOG_SOURCES += ../../rtl/$(DUT)_tx.v VERILOG_SOURCES += ../../rtl/pcie_s10_cfg.v VERILOG_SOURCES += ../../rtl/pcie_s10_msi.v VERILOG_SOURCES += ../../rtl/pcie_tlp_demux.v +VERILOG_SOURCES += ../../rtl/pcie_tlp_fc_count.v VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo.v VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo_mux.v diff --git a/tb/pcie_s10_if/test_pcie_s10_if.py b/tb/pcie_s10_if/test_pcie_s10_if.py index 94e759ccf..e04271a75 100644 --- a/tb/pcie_s10_if/test_pcie_s10_if.py +++ b/tb/pcie_s10_if/test_pcie_s10_if.py @@ -566,6 +566,7 @@ def test_pcie_s10_if(request, data_width, l_tile): os.path.join(rtl_dir, "pcie_s10_cfg.v"), os.path.join(rtl_dir, "pcie_s10_msi.v"), os.path.join(rtl_dir, "pcie_tlp_demux.v"), + os.path.join(rtl_dir, "pcie_tlp_fc_count.v"), os.path.join(rtl_dir, "pcie_tlp_fifo.v"), os.path.join(rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(rtl_dir, "pcie_tlp_fifo_mux.v"), diff --git a/tb/pcie_s10_if_tx/Makefile b/tb/pcie_s10_if_tx/Makefile index 45592edaf..267c3b04c 100644 --- a/tb/pcie_s10_if_tx/Makefile +++ b/tb/pcie_s10_if_tx/Makefile @@ -30,6 +30,7 @@ DUT = pcie_s10_if_tx TOPLEVEL = $(DUT) MODULE = test_$(DUT) VERILOG_SOURCES += ../../rtl/$(DUT).v +VERILOG_SOURCES += ../../rtl/pcie_tlp_fc_count.v VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo_mux.v diff --git a/tb/pcie_s10_if_tx/test_pcie_s10_if_tx.py b/tb/pcie_s10_if_tx/test_pcie_s10_if_tx.py index 83c607482..7d6f03efd 100644 --- a/tb/pcie_s10_if_tx/test_pcie_s10_if_tx.py +++ b/tb/pcie_s10_if_tx/test_pcie_s10_if_tx.py @@ -364,6 +364,7 @@ def test_pcie_s10_if_tx(request, data_width): verilog_sources = [ os.path.join(rtl_dir, f"{dut}.v"), + os.path.join(rtl_dir, "pcie_tlp_fc_count.v"), os.path.join(rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(rtl_dir, "pcie_tlp_fifo_mux.v"), ] diff --git a/tb/pcie_tlp_fifo_mux/Makefile b/tb/pcie_tlp_fifo_mux/Makefile index 18eb7c93a..b12bbdca2 100644 --- a/tb/pcie_tlp_fifo_mux/Makefile +++ b/tb/pcie_tlp_fifo_mux/Makefile @@ -34,6 +34,7 @@ TOPLEVEL = $(WRAPPER) MODULE = test_$(DUT) VERILOG_SOURCES += $(WRAPPER).v VERILOG_SOURCES += ../../rtl/$(DUT).v +VERILOG_SOURCES += ../../rtl/pcie_tlp_fc_count.v VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo_raw.v # module parameters diff --git a/tb/pcie_tlp_fifo_mux/test_pcie_tlp_fifo_mux.py b/tb/pcie_tlp_fifo_mux/test_pcie_tlp_fifo_mux.py index ff91b453e..254e73502 100644 --- a/tb/pcie_tlp_fifo_mux/test_pcie_tlp_fifo_mux.py +++ b/tb/pcie_tlp_fifo_mux/test_pcie_tlp_fifo_mux.py @@ -255,6 +255,7 @@ def test_pcie_tlp_fifo_mux(request, pcie_data_width, tlp_seg_count, ports, round verilog_sources = [ wrapper_file, os.path.join(rtl_dir, f"{dut}.v"), + os.path.join(rtl_dir, "pcie_tlp_fc_count.v"), os.path.join(rtl_dir, "pcie_tlp_fifo_raw.v"), ] From 7f0bd00170f13bf3d3c9a36af98bb108b23b61d0 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Mon, 1 Aug 2022 13:19:01 -0700 Subject: [PATCH 5/7] Implement flow control for Stratix 10 shim Signed-off-by: Alex Forencich --- .../test_example_core_pcie_s10.py | 2 +- rtl/pcie_s10_if.v | 38 +++- rtl/pcie_s10_if_tx.v | 174 +++++++++++++++++- tb/pcie_s10_if_tx/test_pcie_s10_if_tx.py | 13 ++ 4 files changed, 216 insertions(+), 11 deletions(-) diff --git a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py index eea7355d7..e12d45fc3 100644 --- a/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py +++ b/example/common/tb/example_core_pcie_s10/test_example_core_pcie_s10.py @@ -51,7 +51,7 @@ class TB(object): pcie_generation=3, # pcie_link_width=2, # pld_clk_frequency=250e6, - l_tile=False, + l_tile=dut.L_TILE.value, pf_count=1, max_payload_size=1024, enable_extended_tag=True, diff --git a/rtl/pcie_s10_if.v b/rtl/pcie_s10_if.v index 976d644f4..78ea7b9b4 100644 --- a/rtl/pcie_s10_if.v +++ b/rtl/pcie_s10_if.v @@ -310,6 +310,20 @@ pcie_s10_if_tx_inst ( .tx_st_ready(tx_st_ready), .tx_st_err(tx_st_err), + /* + * H-Tile/L-Tile TX flow control + */ + .tx_ph_cdts(tx_ph_cdts), + .tx_pd_cdts(tx_pd_cdts), + .tx_nph_cdts(tx_nph_cdts), + .tx_npd_cdts(L_TILE ? tx_npd_cdts : 12'h800), + .tx_cplh_cdts(tx_cplh_cdts), + .tx_cpld_cdts(L_TILE ? tx_cpld_cdts : 12'h800), + .tx_hdr_cdts_consumed(tx_hdr_cdts_consumed), + .tx_data_cdts_consumed(tx_data_cdts_consumed), + .tx_cdts_type(tx_cdts_type), + .tx_cdts_data_value(tx_cdts_data_value), + /* * TLP input (read request from DMA) */ @@ -364,7 +378,22 @@ pcie_s10_if_tx_inst ( .tx_msi_wr_req_tlp_valid(tx_msi_wr_req_tlp_valid), .tx_msi_wr_req_tlp_sop(tx_msi_wr_req_tlp_sop), .tx_msi_wr_req_tlp_eop(tx_msi_wr_req_tlp_eop), - .tx_msi_wr_req_tlp_ready(tx_msi_wr_req_tlp_ready) + .tx_msi_wr_req_tlp_ready(tx_msi_wr_req_tlp_ready), + + /* + * Flow control + */ + .tx_fc_ph_av(tx_fc_ph_av), + .tx_fc_pd_av(tx_fc_pd_av), + .tx_fc_nph_av(tx_fc_nph_av), + .tx_fc_npd_av(tx_fc_npd_av), + .tx_fc_cplh_av(tx_fc_cplh_av), + .tx_fc_cpld_av(tx_fc_cpld_av), + + /* + * Configuration + */ + .max_payload_size(max_payload_size) ); pcie_s10_cfg #( @@ -436,13 +465,6 @@ pcie_s10_cfg_inst ( .cfg_aer_uncor_err_severity() ); -assign tx_fc_ph_av = tx_ph_cdts; -assign tx_fc_pd_av = tx_pd_cdts; -assign tx_fc_nph_av = tx_nph_cdts; -assign tx_fc_npd_av = L_TILE ? tx_npd_cdts : 0; -assign tx_fc_cplh_av = tx_cplh_cdts; -assign tx_fc_cpld_av = L_TILE ? tx_cpld_cdts : 0; - generate if (MSI_ENABLE) begin diff --git a/rtl/pcie_s10_if_tx.v b/rtl/pcie_s10_if_tx.v index 7057a8694..1b8ffa280 100644 --- a/rtl/pcie_s10_if_tx.v +++ b/rtl/pcie_s10_if_tx.v @@ -62,6 +62,20 @@ module pcie_s10_if_tx # input wire tx_st_ready, output wire [SEG_COUNT-1:0] tx_st_err, + /* + * H-Tile/L-Tile TX flow control + */ + input wire [7:0] tx_ph_cdts, + input wire [11:0] tx_pd_cdts, + input wire [7:0] tx_nph_cdts, + input wire [11:0] tx_npd_cdts, + input wire [7:0] tx_cplh_cdts, + input wire [11:0] tx_cpld_cdts, + input wire [SEG_COUNT-1:0] tx_hdr_cdts_consumed, + input wire [SEG_COUNT-1:0] tx_data_cdts_consumed, + input wire [SEG_COUNT*2-1:0] tx_cdts_type, + input wire [SEG_COUNT*1-1:0] tx_cdts_data_value, + /* * TLP input (read request from DMA) */ @@ -116,7 +130,22 @@ module pcie_s10_if_tx # input wire tx_msi_wr_req_tlp_valid, input wire tx_msi_wr_req_tlp_sop, input wire tx_msi_wr_req_tlp_eop, - output wire tx_msi_wr_req_tlp_ready + output wire tx_msi_wr_req_tlp_ready, + + /* + * Flow control + */ + output wire [7:0] tx_fc_ph_av, + output wire [11:0] tx_fc_pd_av, + output wire [7:0] tx_fc_nph_av, + output wire [11:0] tx_fc_npd_av, + output wire [7:0] tx_fc_cplh_av, + output wire [11:0] tx_fc_cpld_av, + + /* + * Configuration + */ + input wire [2:0] max_payload_size ); parameter SEG_STRB_WIDTH = SEG_DATA_WIDTH/32; @@ -153,6 +182,7 @@ reg tlp_hdr_4dw_reg = 1'b0, tlp_hdr_4dw_next, tlp_hdr_4dw_cyc; reg tlp_hdr_cyc; reg tlp_split1_reg = 1'b0, tlp_split1_next, tlp_split1_cyc; reg tlp_split2_reg = 1'b0, tlp_split2_next, tlp_split2_cyc; +reg [INT_TLP_SEG_COUNT-1:0] seg_cons_reg = 0, seg_cons_next, seg_cons_cyc; reg [SEG_SEL_WIDTH-1:0] seg_offset_cyc; reg [SEG_SEL_WIDTH+1-1:0] seg_count_cyc; reg valid, eop; @@ -199,6 +229,8 @@ wire [INT_TLP_SEG_COUNT-1:0] mux_out_tlp_sop; wire [INT_TLP_SEG_COUNT-1:0] mux_out_tlp_eop; wire mux_out_tlp_ready; +wire [PORTS-1:0] mux_pause; + wire [PORTS*INT_TLP_SEG_COUNT*TX_SEQ_NUM_WIDTH-1:0] mux_out_sel_tlp_seq; wire [PORTS*INT_TLP_SEG_COUNT-1:0] mux_out_sel_tlp_seq_valid; @@ -227,6 +259,32 @@ reg [INT_TLP_SEG_COUNT-1:0] fifo_ctrl_tlp_hdr_4dw; reg [INT_TLP_SEG_COUNT-1:0] fifo_ctrl_tlp_extra_3dw; reg [INT_TLP_SEG_COUNT-1:0] fifo_ctrl_tlp_extra_4dw; +wire [3:0] mux_tx_fc_ph, out_tx_fc_ph; +wire [8:0] mux_tx_fc_pd, out_tx_fc_pd; +wire [3:0] mux_tx_fc_nph, out_tx_fc_nph; +wire [8:0] mux_tx_fc_npd, out_tx_fc_npd; +wire [3:0] mux_tx_fc_cplh, out_tx_fc_cplh; +wire [8:0] mux_tx_fc_cpld, out_tx_fc_cpld; + +reg [7:0] int_tx_fc_ph_reg = 0; +reg [11:0] int_tx_fc_pd_reg = 0; +reg [7:0] int_tx_fc_nph_reg = 0; +reg [11:0] int_tx_fc_npd_reg = 0; +reg [7:0] int_tx_fc_cplh_reg = 0; +reg [11:0] int_tx_fc_cpld_reg = 0; + +reg [7:0] adj_tx_fc_ph_reg = 0; +reg [11:0] adj_tx_fc_pd_reg = 0; +reg [7:0] adj_tx_fc_nph_reg = 0; +reg [11:0] adj_tx_fc_npd_reg = 0; +reg [7:0] adj_tx_fc_cplh_reg = 0; +reg [11:0] adj_tx_fc_cpld_reg = 0; + +reg [8:0] max_payload_size_fc_reg = 9'd0; +reg have_p_credit_reg = 1'b0; +reg have_np_credit_reg = 1'b0; +reg have_cpl_credit_reg = 1'b0; + assign mux_in_tlp_data[TLP_DATA_WIDTH*0 +: TLP_DATA_WIDTH] = tx_msi_wr_req_tlp_data; assign mux_in_tlp_strb[TLP_STRB_WIDTH*0 +: TLP_STRB_WIDTH] = tx_msi_wr_req_tlp_strb; assign mux_in_tlp_hdr[TLP_SEG_COUNT*TLP_HDR_WIDTH*0 +: TLP_SEG_COUNT*TLP_HDR_WIDTH] = tx_msi_wr_req_tlp_hdr; @@ -236,6 +294,8 @@ assign mux_in_tlp_sop[TLP_SEG_COUNT*0 +: TLP_SEG_COUNT] = tx_msi_wr_req_tlp_sop; assign mux_in_tlp_eop[TLP_SEG_COUNT*0 +: TLP_SEG_COUNT] = tx_msi_wr_req_tlp_eop; assign tx_msi_wr_req_tlp_ready = mux_in_tlp_ready[0 +: 1]; +assign mux_pause[0] = !have_p_credit_reg; + assign mux_in_tlp_data[TLP_DATA_WIDTH*1 +: TLP_DATA_WIDTH] = tx_cpl_tlp_data; assign mux_in_tlp_strb[TLP_STRB_WIDTH*1 +: TLP_STRB_WIDTH] = tx_cpl_tlp_strb; assign mux_in_tlp_hdr[TLP_SEG_COUNT*TLP_HDR_WIDTH*1 +: TLP_SEG_COUNT*TLP_HDR_WIDTH] = tx_cpl_tlp_hdr; @@ -245,6 +305,8 @@ assign mux_in_tlp_sop[TLP_SEG_COUNT*1 +: TLP_SEG_COUNT] = tx_cpl_tlp_sop; assign mux_in_tlp_eop[TLP_SEG_COUNT*1 +: TLP_SEG_COUNT] = tx_cpl_tlp_eop; assign tx_cpl_tlp_ready = mux_in_tlp_ready[1 +: 1]; +assign mux_pause[1] = !have_cpl_credit_reg; + assign mux_in_tlp_data[TLP_DATA_WIDTH*2 +: TLP_DATA_WIDTH] = 0; assign mux_in_tlp_strb[TLP_STRB_WIDTH*2 +: TLP_STRB_WIDTH] = 0; assign mux_in_tlp_hdr[TLP_SEG_COUNT*TLP_HDR_WIDTH*2 +: TLP_SEG_COUNT*TLP_HDR_WIDTH] = tx_rd_req_tlp_hdr; @@ -254,6 +316,8 @@ assign mux_in_tlp_sop[TLP_SEG_COUNT*2 +: TLP_SEG_COUNT] = {TLP_SEG_COUNT{1'b1}}; assign mux_in_tlp_eop[TLP_SEG_COUNT*2 +: TLP_SEG_COUNT] = {TLP_SEG_COUNT{1'b1}}; assign tx_rd_req_tlp_ready = mux_in_tlp_ready[2 +: 1]; +assign mux_pause[2] = !have_np_credit_reg; + assign mux_in_tlp_data[TLP_DATA_WIDTH*3 +: TLP_DATA_WIDTH] = tx_wr_req_tlp_data; assign mux_in_tlp_strb[TLP_STRB_WIDTH*3 +: TLP_STRB_WIDTH] = tx_wr_req_tlp_strb; assign mux_in_tlp_hdr[TLP_SEG_COUNT*TLP_HDR_WIDTH*3 +: TLP_SEG_COUNT*TLP_HDR_WIDTH] = tx_wr_req_tlp_hdr; @@ -263,6 +327,8 @@ assign mux_in_tlp_sop[TLP_SEG_COUNT*3 +: TLP_SEG_COUNT] = tx_wr_req_tlp_sop; assign mux_in_tlp_eop[TLP_SEG_COUNT*3 +: TLP_SEG_COUNT] = tx_wr_req_tlp_eop; assign tx_wr_req_tlp_ready = mux_in_tlp_ready[3 +: 1]; +assign mux_pause[3] = !have_p_credit_reg; + assign m_axis_rd_req_tx_seq_num = mux_out_sel_tlp_seq[INT_TLP_SEG_COUNT*TX_SEQ_NUM_WIDTH*2 +: INT_TLP_SEG_COUNT*TX_SEQ_NUM_WIDTH]; assign m_axis_rd_req_tx_seq_num_valid = mux_out_sel_tlp_seq_valid[INT_TLP_SEG_COUNT*2 +: INT_TLP_SEG_COUNT]; assign m_axis_wr_req_tx_seq_num = mux_out_sel_tlp_seq[INT_TLP_SEG_COUNT*TX_SEQ_NUM_WIDTH*3 +: INT_TLP_SEG_COUNT*TX_SEQ_NUM_WIDTH]; @@ -274,6 +340,13 @@ assign tx_st_eop = tx_st_eop_reg; assign tx_st_valid = tx_st_valid_reg; assign tx_st_err = 0; +assign tx_fc_ph_av = adj_tx_fc_ph_reg; +assign tx_fc_pd_av = adj_tx_fc_pd_reg; +assign tx_fc_nph_av = adj_tx_fc_nph_reg; +assign tx_fc_npd_av = adj_tx_fc_npd_reg; +assign tx_fc_cplh_av = adj_tx_fc_cplh_reg; +assign tx_fc_cpld_av = adj_tx_fc_cpld_reg; + pcie_tlp_fifo_mux #( .PORTS(PORTS), .TLP_DATA_WIDTH(TLP_DATA_WIDTH), @@ -320,10 +393,20 @@ pcie_tlp_fifo_mux_inst ( .out_tlp_eop(mux_out_tlp_eop), .out_tlp_ready(mux_out_tlp_ready), + /* + * Flow control count output + */ + .out_fc_ph(mux_tx_fc_ph), + .out_fc_pd(mux_tx_fc_pd), + .out_fc_nph(mux_tx_fc_nph), + .out_fc_npd(mux_tx_fc_npd), + .out_fc_cplh(mux_tx_fc_cplh), + .out_fc_cpld(mux_tx_fc_cpld), + /* * Control */ - .pause(0), + .pause(mux_pause), /* * Status @@ -398,6 +481,53 @@ pcie_tlp_fifo_inst ( .watermark() ); +reg [INT_TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] fc_delay_fifo_hdr_mem[31:0]; +reg [INT_TLP_SEG_COUNT-1:0] fc_delay_fifo_valid_mem[31:0]; +reg [4:0] fc_delay_fifo_wr_ptr_reg = 0; +reg [4:0] fc_delay_fifo_rd_ptr_reg = 0; + +always @(posedge clk) begin + fc_delay_fifo_hdr_mem[fc_delay_fifo_wr_ptr_reg] <= fifo_tlp_hdr; + fc_delay_fifo_valid_mem[fc_delay_fifo_wr_ptr_reg] <= seg_cons_reg & fifo_tlp_sop; + + fc_delay_fifo_wr_ptr_reg <= fc_delay_fifo_wr_ptr_reg + 1; + if (fc_delay_fifo_wr_ptr_reg - fc_delay_fifo_rd_ptr_reg >= 23) begin + fc_delay_fifo_rd_ptr_reg <= fc_delay_fifo_rd_ptr_reg + 1; + end + + if (rst) begin + fc_delay_fifo_wr_ptr_reg <= 0; + fc_delay_fifo_rd_ptr_reg <= 0; + end +end + +pcie_tlp_fc_count #( + .TLP_HDR_WIDTH(TLP_HDR_WIDTH), + .TLP_SEG_COUNT(INT_TLP_SEG_COUNT) +) +fc_count_inst ( + .clk(clk), + .rst(rst), + + /* + * TLP monitor + */ + .tlp_hdr(fc_delay_fifo_hdr_mem[fc_delay_fifo_rd_ptr_reg]), + .tlp_valid(fc_delay_fifo_valid_mem[fc_delay_fifo_rd_ptr_reg]), + .tlp_sop({INT_TLP_SEG_COUNT{1'b1}}), + .tlp_ready(1'b1), + + /* + * Flow control count output + */ + .out_fc_ph(out_tx_fc_ph), + .out_fc_pd(out_tx_fc_pd), + .out_fc_nph(out_tx_fc_nph), + .out_fc_npd(out_tx_fc_npd), + .out_fc_cplh(out_tx_fc_cplh), + .out_fc_cpld(out_tx_fc_cpld) +); + integer seg, cur_seg; always @* begin @@ -421,6 +551,8 @@ always @* begin tlp_hdr_cyc = 1'b0; tlp_split1_cyc = tlp_split1_reg; tlp_split2_cyc = tlp_split2_reg; + seg_cons_cyc = 0; + seg_cons_next = 0; seg_offset_cyc = fifo_ctrl_seg_offset; seg_count_cyc = 0; valid = 0; @@ -486,6 +618,7 @@ always @* begin out_eop_next[seg] = 1'b1; tlp_split1_cyc = 1'b0; tlp_split2_cyc = 1'b1; + seg_cons_cyc[seg_offset_cyc] = 1'b1; seg_offset_cyc = seg_offset_cyc + 1; seg_count_cyc = seg_count_cyc + 1; port_seg_valid = port_seg_valid >> 1; @@ -503,6 +636,7 @@ always @* begin frame_cyc = 0; out_eop_next[seg] = 1'b1; end + seg_cons_cyc[seg_offset_cyc] = 1'b1; seg_offset_cyc = seg_offset_cyc + 1; seg_count_cyc = seg_count_cyc + 1; port_seg_valid = port_seg_valid >> 1; @@ -529,6 +663,7 @@ always @* begin out_sel_next = out_sel_cyc; fifo_ctrl_read_en = seg_count_cyc != 0; fifo_read_en_next = seg_count_cyc != 0; + seg_cons_next = seg_cons_cyc; end end end else begin @@ -572,6 +707,7 @@ always @(posedge clk) begin tlp_hdr_4dw_reg <= tlp_hdr_4dw_next; tlp_split1_reg <= tlp_split1_next; tlp_split2_reg <= tlp_split2_next; + seg_cons_reg <= seg_cons_next; out_sel_reg <= out_sel_next; out_sop_reg <= out_sop_next; @@ -596,6 +732,26 @@ always @(posedge clk) begin tx_st_ready_delay_reg <= {tx_st_ready_delay_reg, tx_st_ready}; + // flow control + int_tx_fc_ph_reg <= int_tx_fc_ph_reg + mux_tx_fc_ph - out_tx_fc_ph; + int_tx_fc_pd_reg <= int_tx_fc_pd_reg + mux_tx_fc_pd - out_tx_fc_pd; + int_tx_fc_nph_reg <= int_tx_fc_nph_reg + mux_tx_fc_nph - out_tx_fc_nph; + int_tx_fc_npd_reg <= int_tx_fc_npd_reg + mux_tx_fc_npd - out_tx_fc_npd; + int_tx_fc_cplh_reg <= int_tx_fc_cplh_reg + mux_tx_fc_cplh - out_tx_fc_cplh; + int_tx_fc_cpld_reg <= int_tx_fc_cpld_reg + mux_tx_fc_cpld - out_tx_fc_cpld; + + adj_tx_fc_ph_reg <= tx_ph_cdts > int_tx_fc_ph_reg ? tx_ph_cdts - int_tx_fc_ph_reg : 0; + adj_tx_fc_pd_reg <= tx_pd_cdts > int_tx_fc_pd_reg ? tx_pd_cdts - int_tx_fc_pd_reg : 0; + adj_tx_fc_nph_reg <= tx_nph_cdts > int_tx_fc_nph_reg ? tx_nph_cdts - int_tx_fc_nph_reg : 0; + adj_tx_fc_npd_reg <= tx_npd_cdts > int_tx_fc_npd_reg ? tx_npd_cdts - int_tx_fc_npd_reg : 0; + adj_tx_fc_cplh_reg <= tx_cplh_cdts > int_tx_fc_cplh_reg ? tx_cplh_cdts - int_tx_fc_cplh_reg : 0; + adj_tx_fc_cpld_reg <= tx_cpld_cdts > int_tx_fc_cpld_reg ? tx_cpld_cdts - int_tx_fc_cpld_reg : 0; + + max_payload_size_fc_reg <= 9'd8 << (max_payload_size > 5 ? 5 : max_payload_size); + have_p_credit_reg <= (adj_tx_fc_ph_reg > 4) && (adj_tx_fc_pd_reg > (max_payload_size_fc_reg << 1)); + have_np_credit_reg <= adj_tx_fc_nph_reg > 4; + have_cpl_credit_reg <= (adj_tx_fc_cplh_reg > 4) && (adj_tx_fc_cpld_reg > (max_payload_size_fc_reg << 1)); + if (rst) begin frame_reg <= 1'b0; @@ -605,6 +761,20 @@ always @(posedge clk) begin tx_st_valid_reg <= 0; tx_st_ready_delay_reg <= 0; + + int_tx_fc_ph_reg <= 0; + int_tx_fc_pd_reg <= 0; + int_tx_fc_nph_reg <= 0; + int_tx_fc_npd_reg <= 0; + int_tx_fc_cplh_reg <= 0; + int_tx_fc_cpld_reg <= 0; + + adj_tx_fc_ph_reg <= 0; + adj_tx_fc_pd_reg <= 0; + adj_tx_fc_nph_reg <= 0; + adj_tx_fc_npd_reg <= 0; + adj_tx_fc_cplh_reg <= 0; + adj_tx_fc_cpld_reg <= 0; end end diff --git a/tb/pcie_s10_if_tx/test_pcie_s10_if_tx.py b/tb/pcie_s10_if_tx/test_pcie_s10_if_tx.py index 7d6f03efd..2ca782ba5 100644 --- a/tb/pcie_s10_if_tx/test_pcie_s10_if_tx.py +++ b/tb/pcie_s10_if_tx/test_pcie_s10_if_tx.py @@ -67,6 +67,19 @@ class TB(object): self.sink = S10PcieSink(S10TxBus.from_prefix(dut, "tx_st"), dut.clk, dut.rst) self.sink.ready_latency = 3 + dut.tx_ph_cdts.setimmediatevalue(0x80) + dut.tx_pd_cdts.setimmediatevalue(0x800) + dut.tx_nph_cdts.setimmediatevalue(0x80) + dut.tx_npd_cdts.setimmediatevalue(0x800) + dut.tx_cplh_cdts.setimmediatevalue(0x80) + dut.tx_cpld_cdts.setimmediatevalue(0x800) + dut.tx_hdr_cdts_consumed.setimmediatevalue(0) + dut.tx_data_cdts_consumed.setimmediatevalue(0) + dut.tx_cdts_type.setimmediatevalue(0) + dut.tx_cdts_data_value.setimmediatevalue(0) + + dut.max_payload_size.setimmediatevalue(0) + def set_idle_generator(self, generator=None): if generator: self.rd_req_source.set_pause_generator(generator()) From 53ee26f3eca420c67fc65f05997f93eedbbdec57 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Mon, 1 Aug 2022 13:25:51 -0700 Subject: [PATCH 6/7] Use latest version of cocotbext-pcie Signed-off-by: Alex Forencich --- tb/pcie_if.py | 36 ++++++++++++++++++------------------ tox.ini | 2 +- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/tb/pcie_if.py b/tb/pcie_if.py index 5bebe5697..2a3da424a 100644 --- a/tb/pcie_if.py +++ b/tb/pcie_if.py @@ -1255,43 +1255,43 @@ class PcieIfDevice(Device): await RisingEdge(self.clk) if self.tx_fc_ph_av is not None: - self.tx_fc_ph_av.value = self.upstream_port.fc_state[0].ph.tx_credits_available + self.tx_fc_ph_av.value = self.upstream_port.fc_state[0].ph.tx_credits_available & 0xff if self.tx_fc_pd_av is not None: - self.tx_fc_pd_av.value = self.upstream_port.fc_state[0].pd.tx_credits_available + self.tx_fc_pd_av.value = self.upstream_port.fc_state[0].pd.tx_credits_available & 0xfff if self.tx_fc_nph_av is not None: - self.tx_fc_nph_av.value = self.upstream_port.fc_state[0].nph.tx_credits_available + self.tx_fc_nph_av.value = self.upstream_port.fc_state[0].nph.tx_credits_available & 0xff if self.tx_fc_npd_av is not None: - self.tx_fc_npd_av.value = self.upstream_port.fc_state[0].npd.tx_credits_available + self.tx_fc_npd_av.value = self.upstream_port.fc_state[0].npd.tx_credits_available & 0xfff if self.tx_fc_cplh_av is not None: - self.tx_fc_cplh_av.value = self.upstream_port.fc_state[0].cplh.tx_credits_available + self.tx_fc_cplh_av.value = self.upstream_port.fc_state[0].cplh.tx_credits_available & 0xff if self.tx_fc_cpld_av is not None: - self.tx_fc_cpld_av.value = self.upstream_port.fc_state[0].cpld.tx_credits_available + self.tx_fc_cpld_av.value = self.upstream_port.fc_state[0].cpld.tx_credits_available & 0xfff if self.tx_fc_ph_lim is not None: - self.tx_fc_ph_lim.value = self.upstream_port.fc_state[0].ph.tx_credit_limit + self.tx_fc_ph_lim.value = self.upstream_port.fc_state[0].ph.tx_credit_limit & 0xff if self.tx_fc_pd_lim is not None: - self.tx_fc_pd_lim.value = self.upstream_port.fc_state[0].pd.tx_credit_limit + self.tx_fc_pd_lim.value = self.upstream_port.fc_state[0].pd.tx_credit_limit & 0xfff if self.tx_fc_nph_lim is not None: - self.tx_fc_nph_lim.value = self.upstream_port.fc_state[0].nph.tx_credit_limit + self.tx_fc_nph_lim.value = self.upstream_port.fc_state[0].nph.tx_credit_limit & 0xff if self.tx_fc_npd_lim is not None: - self.tx_fc_npd_lim.value = self.upstream_port.fc_state[0].npd.tx_credit_limit + self.tx_fc_npd_lim.value = self.upstream_port.fc_state[0].npd.tx_credit_limit & 0xfff if self.tx_fc_cplh_lim is not None: - self.tx_fc_cplh_lim.value = self.upstream_port.fc_state[0].cplh.tx_credit_limit + self.tx_fc_cplh_lim.value = self.upstream_port.fc_state[0].cplh.tx_credit_limit & 0xff if self.tx_fc_cpld_lim is not None: - self.tx_fc_cpld_lim.value = self.upstream_port.fc_state[0].cpld.tx_credit_limit + self.tx_fc_cpld_lim.value = self.upstream_port.fc_state[0].cpld.tx_credit_limit & 0xfff if self.tx_fc_ph_cons is not None: - self.tx_fc_ph_cons.value = self.upstream_port.fc_state[0].ph.tx_credits_consumed + self.tx_fc_ph_cons.value = self.upstream_port.fc_state[0].ph.tx_credits_consumed & 0xff if self.tx_fc_pd_cons is not None: - self.tx_fc_pd_cons.value = self.upstream_port.fc_state[0].pd.tx_credits_consumed + self.tx_fc_pd_cons.value = self.upstream_port.fc_state[0].pd.tx_credits_consumed & 0xfff if self.tx_fc_nph_cons is not None: - self.tx_fc_nph_cons.value = self.upstream_port.fc_state[0].nph.tx_credits_consumed + self.tx_fc_nph_cons.value = self.upstream_port.fc_state[0].nph.tx_credits_consumed & 0xff if self.tx_fc_npd_cons is not None: - self.tx_fc_npd_cons.value = self.upstream_port.fc_state[0].npd.tx_credits_consumed + self.tx_fc_npd_cons.value = self.upstream_port.fc_state[0].npd.tx_credits_consumed & 0xfff if self.tx_fc_cplh_cons is not None: - self.tx_fc_cplh_cons.value = self.upstream_port.fc_state[0].cplh.tx_credits_consumed + self.tx_fc_cplh_cons.value = self.upstream_port.fc_state[0].cplh.tx_credits_consumed & 0xff if self.tx_fc_cpld_cons is not None: - self.tx_fc_cpld_cons.value = self.upstream_port.fc_state[0].cpld.tx_credits_consumed + self.tx_fc_cpld_cons.value = self.upstream_port.fc_state[0].cpld.tx_credits_consumed & 0xfff class PcieIfTestDevice: diff --git a/tox.ini b/tox.ini index 3b9cea967..d6acdf3c8 100644 --- a/tox.ini +++ b/tox.ini @@ -17,7 +17,7 @@ deps = cocotb == 1.6.1 cocotb-test == 0.2.1 cocotbext-axi == 0.1.16 - cocotbext-pcie == 0.2.8 + cocotbext-pcie == 0.2.10 jinja2 == 3.0.3 commands = From 3f3be1e14d81bbed3bf9f13ab8983bcc4b1d0495 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Tue, 2 Aug 2022 22:57:27 -0700 Subject: [PATCH 7/7] Implement flow control for P-Tile Signed-off-by: Alex Forencich --- example/DE10_Agilex/fpga/fpga_24AR0/Makefile | 1 + example/DE10_Agilex/fpga/fpga_24B/Makefile | 1 + .../DE10_Agilex/fpga/tb/fpga_core/Makefile | 1 + .../fpga/tb/fpga_core/test_fpga_core.py | 1 + example/S10DX_DK/fpga/fpga/Makefile | 1 + example/S10DX_DK/fpga/tb/fpga_core/Makefile | 1 + .../fpga/tb/fpga_core/test_fpga_core.py | 1 + .../tb/example_core_pcie_ptile/Makefile | 1 + .../test_example_core_pcie_ptile.py | 1 + rtl/pcie_ptile_fc_counter.v | 82 ++++++++++ rtl/pcie_ptile_if.v | 31 ++-- rtl/pcie_ptile_if_tx.v | 140 +++++++++++++++++- tb/pcie_ptile_if/Makefile | 1 + tb/pcie_ptile_if/test_pcie_ptile_if.py | 1 + tb/pcie_ptile_if_tx/Makefile | 1 + tb/pcie_ptile_if_tx/test_pcie_ptile_if_tx.py | 83 +++++++++++ 16 files changed, 337 insertions(+), 11 deletions(-) create mode 100644 rtl/pcie_ptile_fc_counter.v diff --git a/example/DE10_Agilex/fpga/fpga_24AR0/Makefile b/example/DE10_Agilex/fpga/fpga_24AR0/Makefile index f9f21c479..98b5ed42f 100644 --- a/example/DE10_Agilex/fpga/fpga_24AR0/Makefile +++ b/example/DE10_Agilex/fpga/fpga_24AR0/Makefile @@ -17,6 +17,7 @@ SYN_FILES += lib/pcie/rtl/pcie_ptile_if.v SYN_FILES += lib/pcie/rtl/pcie_ptile_if_rx.v SYN_FILES += lib/pcie/rtl/pcie_ptile_if_tx.v SYN_FILES += lib/pcie/rtl/pcie_ptile_cfg.v +SYN_FILES += lib/pcie/rtl/pcie_ptile_fc_counter.v SYN_FILES += lib/pcie/rtl/pcie_axil_master.v SYN_FILES += lib/pcie/rtl/pcie_axi_master.v SYN_FILES += lib/pcie/rtl/pcie_axi_master_rd.v diff --git a/example/DE10_Agilex/fpga/fpga_24B/Makefile b/example/DE10_Agilex/fpga/fpga_24B/Makefile index a909d1af8..4c4723ff1 100644 --- a/example/DE10_Agilex/fpga/fpga_24B/Makefile +++ b/example/DE10_Agilex/fpga/fpga_24B/Makefile @@ -17,6 +17,7 @@ SYN_FILES += lib/pcie/rtl/pcie_ptile_if.v SYN_FILES += lib/pcie/rtl/pcie_ptile_if_rx.v SYN_FILES += lib/pcie/rtl/pcie_ptile_if_tx.v SYN_FILES += lib/pcie/rtl/pcie_ptile_cfg.v +SYN_FILES += lib/pcie/rtl/pcie_ptile_fc_counter.v SYN_FILES += lib/pcie/rtl/pcie_axil_master.v SYN_FILES += lib/pcie/rtl/pcie_axi_master.v SYN_FILES += lib/pcie/rtl/pcie_axi_master_rd.v diff --git a/example/DE10_Agilex/fpga/tb/fpga_core/Makefile b/example/DE10_Agilex/fpga/tb/fpga_core/Makefile index 50cb10b02..75db1b738 100644 --- a/example/DE10_Agilex/fpga/tb/fpga_core/Makefile +++ b/example/DE10_Agilex/fpga/tb/fpga_core/Makefile @@ -38,6 +38,7 @@ VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_ptile_if.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_ptile_if_rx.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_ptile_if_tx.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_ptile_cfg.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_ptile_fc_counter.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axil_master.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master_rd.v diff --git a/example/DE10_Agilex/fpga/tb/fpga_core/test_fpga_core.py b/example/DE10_Agilex/fpga/tb/fpga_core/test_fpga_core.py index 6e3372f7b..50fb2d9cf 100644 --- a/example/DE10_Agilex/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/DE10_Agilex/fpga/tb/fpga_core/test_fpga_core.py @@ -483,6 +483,7 @@ def test_fpga_core(request): os.path.join(pcie_rtl_dir, "pcie_ptile_if_rx.v"), os.path.join(pcie_rtl_dir, "pcie_ptile_if_tx.v"), os.path.join(pcie_rtl_dir, "pcie_ptile_cfg.v"), + os.path.join(pcie_rtl_dir, "pcie_ptile_fc_counter.v"), os.path.join(pcie_rtl_dir, "pcie_axil_master.v"), os.path.join(pcie_rtl_dir, "pcie_axi_master.v"), os.path.join(pcie_rtl_dir, "pcie_axi_master_rd.v"), diff --git a/example/S10DX_DK/fpga/fpga/Makefile b/example/S10DX_DK/fpga/fpga/Makefile index 0f66ae08d..093c1f618 100644 --- a/example/S10DX_DK/fpga/fpga/Makefile +++ b/example/S10DX_DK/fpga/fpga/Makefile @@ -17,6 +17,7 @@ SYN_FILES += lib/pcie/rtl/pcie_ptile_if.v SYN_FILES += lib/pcie/rtl/pcie_ptile_if_rx.v SYN_FILES += lib/pcie/rtl/pcie_ptile_if_tx.v SYN_FILES += lib/pcie/rtl/pcie_ptile_cfg.v +SYN_FILES += lib/pcie/rtl/pcie_ptile_fc_counter.v SYN_FILES += lib/pcie/rtl/pcie_axil_master.v SYN_FILES += lib/pcie/rtl/pcie_axi_master.v SYN_FILES += lib/pcie/rtl/pcie_axi_master_rd.v diff --git a/example/S10DX_DK/fpga/tb/fpga_core/Makefile b/example/S10DX_DK/fpga/tb/fpga_core/Makefile index 50cb10b02..75db1b738 100644 --- a/example/S10DX_DK/fpga/tb/fpga_core/Makefile +++ b/example/S10DX_DK/fpga/tb/fpga_core/Makefile @@ -38,6 +38,7 @@ VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_ptile_if.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_ptile_if_rx.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_ptile_if_tx.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_ptile_cfg.v +VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_ptile_fc_counter.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axil_master.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master.v VERILOG_SOURCES += ../../lib/pcie/rtl/pcie_axi_master_rd.v diff --git a/example/S10DX_DK/fpga/tb/fpga_core/test_fpga_core.py b/example/S10DX_DK/fpga/tb/fpga_core/test_fpga_core.py index 6e3372f7b..50fb2d9cf 100644 --- a/example/S10DX_DK/fpga/tb/fpga_core/test_fpga_core.py +++ b/example/S10DX_DK/fpga/tb/fpga_core/test_fpga_core.py @@ -483,6 +483,7 @@ def test_fpga_core(request): os.path.join(pcie_rtl_dir, "pcie_ptile_if_rx.v"), os.path.join(pcie_rtl_dir, "pcie_ptile_if_tx.v"), os.path.join(pcie_rtl_dir, "pcie_ptile_cfg.v"), + os.path.join(pcie_rtl_dir, "pcie_ptile_fc_counter.v"), os.path.join(pcie_rtl_dir, "pcie_axil_master.v"), os.path.join(pcie_rtl_dir, "pcie_axi_master.v"), os.path.join(pcie_rtl_dir, "pcie_axi_master_rd.v"), diff --git a/example/common/tb/example_core_pcie_ptile/Makefile b/example/common/tb/example_core_pcie_ptile/Makefile index b3d15d6ef..6b7aa2574 100644 --- a/example/common/tb/example_core_pcie_ptile/Makefile +++ b/example/common/tb/example_core_pcie_ptile/Makefile @@ -37,6 +37,7 @@ VERILOG_SOURCES += ../../../../rtl/pcie_ptile_if.v VERILOG_SOURCES += ../../../../rtl/pcie_ptile_if_rx.v VERILOG_SOURCES += ../../../../rtl/pcie_ptile_if_tx.v VERILOG_SOURCES += ../../../../rtl/pcie_ptile_cfg.v +VERILOG_SOURCES += ../../../../rtl/pcie_ptile_fc_counter.v VERILOG_SOURCES += ../../../../rtl/pcie_axil_master.v VERILOG_SOURCES += ../../../../rtl/pcie_axi_master.v VERILOG_SOURCES += ../../../../rtl/pcie_axi_master_rd.v diff --git a/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py b/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py index 54ba2f4f0..2e6d6e21c 100644 --- a/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py +++ b/example/common/tb/example_core_pcie_ptile/test_example_core_pcie_ptile.py @@ -483,6 +483,7 @@ def test_example_core_pcie_ptile(request, data_width): os.path.join(pcie_rtl_dir, "pcie_ptile_if_rx.v"), os.path.join(pcie_rtl_dir, "pcie_ptile_if_tx.v"), os.path.join(pcie_rtl_dir, "pcie_ptile_cfg.v"), + os.path.join(pcie_rtl_dir, "pcie_ptile_fc_counter.v"), os.path.join(pcie_rtl_dir, "pcie_axil_master.v"), os.path.join(pcie_rtl_dir, "pcie_axi_master.v"), os.path.join(pcie_rtl_dir, "pcie_axi_master_rd.v"), diff --git a/rtl/pcie_ptile_fc_counter.v b/rtl/pcie_ptile_fc_counter.v new file mode 100644 index 000000000..b219e32f3 --- /dev/null +++ b/rtl/pcie_ptile_fc_counter.v @@ -0,0 +1,82 @@ +/* + +Copyright (c) 2022 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +// Language: Verilog 2001 + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * P-Tile PCIe flow control counter + */ +module ptile_tx_fc_counter #( + parameter WIDTH = 16, + parameter INDEX = 0 +) +( + input wire clk, + input wire rst, + + input wire [WIDTH-1:0] tx_cdts_limit, + input wire [2:0] tx_cdts_limit_tdm_idx, + input wire [WIDTH-1:0] fc_dec, + output wire [WIDTH-1:0] fc_av +); + +reg [WIDTH-1:0] fc_cap_reg = 0; +reg [WIDTH-1:0] fc_limit_reg = 0; +reg [WIDTH-1:0] fc_inc_reg = 0; +reg [WIDTH-1:0] fc_av_reg = 0; + +assign fc_av = fc_av_reg; + +always @(posedge clk) begin + if (tx_cdts_limit_tdm_idx == INDEX) begin + if (!fc_cap_reg) begin + fc_cap_reg <= tx_cdts_limit; + end + fc_inc_reg <= tx_cdts_limit - fc_limit_reg; + fc_limit_reg <= tx_cdts_limit; + end + + if ($signed({1'b0, fc_av_reg}) - $signed({1'b0, fc_dec}) + $signed({1'b0, fc_inc_reg}) < 0) begin + fc_av_reg <= 0; + end else if ($signed({1'b0, fc_av_reg}) - $signed({1'b0, fc_dec}) + $signed({1'b0, fc_inc_reg}) > fc_cap_reg) begin + fc_av_reg <= fc_cap_reg; + end else begin + fc_av_reg <= $signed({1'b0, fc_av_reg}) - $signed({1'b0, fc_dec}) + $signed({1'b0, fc_inc_reg}); + end + + if (rst) begin + fc_cap_reg <= 0; + fc_limit_reg <= 0; + fc_inc_reg <= 0; + fc_av_reg <= 0; + end +end + +endmodule + +`resetall diff --git a/rtl/pcie_ptile_if.v b/rtl/pcie_ptile_if.v index 1e1f1cd5f..7bfda2416 100644 --- a/rtl/pcie_ptile_if.v +++ b/rtl/pcie_ptile_if.v @@ -217,14 +217,6 @@ module pcie_ptile_if # output wire [F_COUNT-1:0] msix_mask ); -// TODO -assign tx_fc_ph_av = 0; -assign tx_fc_pd_av = 0; -assign tx_fc_nph_av = 0; -assign tx_fc_npd_av = 0; -assign tx_fc_cplh_av = 0; -assign tx_fc_cpld_av = 0; - pcie_ptile_if_rx #( .SEG_COUNT(SEG_COUNT), .SEG_DATA_WIDTH(SEG_DATA_WIDTH), @@ -311,6 +303,12 @@ pcie_ptile_if_tx_inst ( .tx_st_hdr(tx_st_hdr), .tx_st_tlp_prfx(tx_st_tlp_prfx), + /* + * P-Tile TX flow control + */ + .tx_cdts_limit(tx_cdts_limit), + .tx_cdts_limit_tdm_idx(tx_cdts_limit_tdm_idx), + /* * TLP input (read request from DMA) */ @@ -365,7 +363,22 @@ pcie_ptile_if_tx_inst ( .tx_msi_wr_req_tlp_valid(tx_msi_wr_req_tlp_valid), .tx_msi_wr_req_tlp_sop(tx_msi_wr_req_tlp_sop), .tx_msi_wr_req_tlp_eop(tx_msi_wr_req_tlp_eop), - .tx_msi_wr_req_tlp_ready(tx_msi_wr_req_tlp_ready) + .tx_msi_wr_req_tlp_ready(tx_msi_wr_req_tlp_ready), + + /* + * Flow control + */ + .tx_fc_ph_av(tx_fc_ph_av), + .tx_fc_pd_av(tx_fc_pd_av), + .tx_fc_nph_av(tx_fc_nph_av), + .tx_fc_npd_av(tx_fc_npd_av), + .tx_fc_cplh_av(tx_fc_cplh_av), + .tx_fc_cpld_av(tx_fc_cpld_av), + + /* + * Configuration + */ + .max_payload_size(max_payload_size) ); pcie_ptile_cfg #( diff --git a/rtl/pcie_ptile_if_tx.v b/rtl/pcie_ptile_if_tx.v index a0b8d0320..34d5ffe5d 100644 --- a/rtl/pcie_ptile_if_tx.v +++ b/rtl/pcie_ptile_if_tx.v @@ -68,6 +68,12 @@ module pcie_ptile_if_tx # output wire [SEG_COUNT*SEG_HDR_WIDTH-1:0] tx_st_hdr, output wire [SEG_COUNT*SEG_PRFX_WIDTH-1:0] tx_st_tlp_prfx, + /* + * P-Tile TX flow control + */ + input wire [15:0] tx_cdts_limit, + input wire [2:0] tx_cdts_limit_tdm_idx, + /* * TLP input (read request from DMA) */ @@ -122,7 +128,22 @@ module pcie_ptile_if_tx # input wire tx_msi_wr_req_tlp_valid, input wire tx_msi_wr_req_tlp_sop, input wire tx_msi_wr_req_tlp_eop, - output wire tx_msi_wr_req_tlp_ready + output wire tx_msi_wr_req_tlp_ready, + + /* + * Flow control + */ + output wire [11:0] tx_fc_ph_av, + output wire [15:0] tx_fc_pd_av, + output wire [11:0] tx_fc_nph_av, + output wire [15:0] tx_fc_npd_av, + output wire [11:0] tx_fc_cplh_av, + output wire [15:0] tx_fc_cpld_av, + + /* + * Configuration + */ + input wire [2:0] max_payload_size ); parameter SEG_STRB_WIDTH = SEG_DATA_WIDTH/32; @@ -199,9 +220,23 @@ wire [INT_TLP_SEG_COUNT-1:0] mux_out_tlp_sop; wire [INT_TLP_SEG_COUNT-1:0] mux_out_tlp_eop; reg mux_out_tlp_ready_cmb; +wire [PORTS-1:0] mux_pause; + wire [PORTS*INT_TLP_SEG_COUNT*TX_SEQ_NUM_WIDTH-1:0] mux_out_sel_tlp_seq; wire [PORTS*INT_TLP_SEG_COUNT-1:0] mux_out_sel_tlp_seq_valid; +wire [3:0] mux_tx_fc_ph; +wire [8:0] mux_tx_fc_pd; +wire [3:0] mux_tx_fc_nph; +wire [8:0] mux_tx_fc_npd; +wire [3:0] mux_tx_fc_cplh; +wire [8:0] mux_tx_fc_cpld; + +reg [8:0] max_payload_size_fc_reg = 9'd0; +reg have_p_credit_reg = 1'b0; +reg have_np_credit_reg = 1'b0; +reg have_cpl_credit_reg = 1'b0; + assign mux_in_tlp_data[TLP_DATA_WIDTH*0 +: TLP_DATA_WIDTH] = tx_msi_wr_req_tlp_data; assign mux_in_tlp_strb[TLP_STRB_WIDTH*0 +: TLP_STRB_WIDTH] = tx_msi_wr_req_tlp_strb; assign mux_in_tlp_hdr[TLP_SEG_COUNT*TLP_HDR_WIDTH*0 +: TLP_SEG_COUNT*TLP_HDR_WIDTH] = tx_msi_wr_req_tlp_hdr; @@ -211,6 +246,8 @@ assign mux_in_tlp_sop[TLP_SEG_COUNT*0 +: TLP_SEG_COUNT] = tx_msi_wr_req_tlp_sop; assign mux_in_tlp_eop[TLP_SEG_COUNT*0 +: TLP_SEG_COUNT] = tx_msi_wr_req_tlp_eop; assign tx_msi_wr_req_tlp_ready = mux_in_tlp_ready[0 +: 1]; +assign mux_pause[0] = !have_p_credit_reg; + assign mux_in_tlp_data[TLP_DATA_WIDTH*1 +: TLP_DATA_WIDTH] = tx_cpl_tlp_data; assign mux_in_tlp_strb[TLP_STRB_WIDTH*1 +: TLP_STRB_WIDTH] = tx_cpl_tlp_strb; assign mux_in_tlp_hdr[TLP_SEG_COUNT*TLP_HDR_WIDTH*1 +: TLP_SEG_COUNT*TLP_HDR_WIDTH] = tx_cpl_tlp_hdr; @@ -220,6 +257,8 @@ assign mux_in_tlp_sop[TLP_SEG_COUNT*1 +: TLP_SEG_COUNT] = tx_cpl_tlp_sop; assign mux_in_tlp_eop[TLP_SEG_COUNT*1 +: TLP_SEG_COUNT] = tx_cpl_tlp_eop; assign tx_cpl_tlp_ready = mux_in_tlp_ready[1 +: 1]; +assign mux_pause[1] = !have_cpl_credit_reg; + assign mux_in_tlp_data[TLP_DATA_WIDTH*2 +: TLP_DATA_WIDTH] = 0; assign mux_in_tlp_strb[TLP_STRB_WIDTH*2 +: TLP_STRB_WIDTH] = 0; assign mux_in_tlp_hdr[TLP_SEG_COUNT*TLP_HDR_WIDTH*2 +: TLP_SEG_COUNT*TLP_HDR_WIDTH] = tx_rd_req_tlp_hdr; @@ -229,6 +268,8 @@ assign mux_in_tlp_sop[TLP_SEG_COUNT*2 +: TLP_SEG_COUNT] = {TLP_SEG_COUNT{1'b1}}; assign mux_in_tlp_eop[TLP_SEG_COUNT*2 +: TLP_SEG_COUNT] = {TLP_SEG_COUNT{1'b1}}; assign tx_rd_req_tlp_ready = mux_in_tlp_ready[2 +: 1]; +assign mux_pause[2] = !have_np_credit_reg; + assign mux_in_tlp_data[TLP_DATA_WIDTH*3 +: TLP_DATA_WIDTH] = tx_wr_req_tlp_data; assign mux_in_tlp_strb[TLP_STRB_WIDTH*3 +: TLP_STRB_WIDTH] = tx_wr_req_tlp_strb; assign mux_in_tlp_hdr[TLP_SEG_COUNT*TLP_HDR_WIDTH*3 +: TLP_SEG_COUNT*TLP_HDR_WIDTH] = tx_wr_req_tlp_hdr; @@ -238,6 +279,8 @@ assign mux_in_tlp_sop[TLP_SEG_COUNT*3 +: TLP_SEG_COUNT] = tx_wr_req_tlp_sop; assign mux_in_tlp_eop[TLP_SEG_COUNT*3 +: TLP_SEG_COUNT] = tx_wr_req_tlp_eop; assign tx_wr_req_tlp_ready = mux_in_tlp_ready[3 +: 1]; +assign mux_pause[3] = !have_p_credit_reg; + assign m_axis_rd_req_tx_seq_num = mux_out_sel_tlp_seq[INT_TLP_SEG_COUNT*TX_SEQ_NUM_WIDTH*2 +: INT_TLP_SEG_COUNT*TX_SEQ_NUM_WIDTH]; assign m_axis_rd_req_tx_seq_num_valid = mux_out_sel_tlp_seq_valid[INT_TLP_SEG_COUNT*2 +: INT_TLP_SEG_COUNT]; assign m_axis_wr_req_tx_seq_num = mux_out_sel_tlp_seq[INT_TLP_SEG_COUNT*TX_SEQ_NUM_WIDTH*3 +: INT_TLP_SEG_COUNT*TX_SEQ_NUM_WIDTH]; @@ -297,10 +340,20 @@ pcie_tlp_fifo_mux_inst ( .out_tlp_eop(mux_out_tlp_eop), .out_tlp_ready(mux_out_tlp_ready_cmb), + /* + * Flow control count output + */ + .out_fc_ph(mux_tx_fc_ph), + .out_fc_pd(mux_tx_fc_pd), + .out_fc_nph(mux_tx_fc_nph), + .out_fc_npd(mux_tx_fc_npd), + .out_fc_cplh(mux_tx_fc_cplh), + .out_fc_cpld(mux_tx_fc_cpld), + /* * Control */ - .pause(0), + .pause(mux_pause), /* * Status @@ -311,6 +364,84 @@ pcie_tlp_fifo_mux_inst ( .fifo_watermark() ); +ptile_tx_fc_counter #( + .WIDTH(12), + .INDEX(0) +) +fc_counter_ph ( + .clk(clk), + .rst(rst), + .tx_cdts_limit(tx_cdts_limit), + .tx_cdts_limit_tdm_idx(tx_cdts_limit_tdm_idx), + .fc_dec(mux_tx_fc_ph), + .fc_av(tx_fc_ph_av) +); + +ptile_tx_fc_counter #( + .WIDTH(12), + .INDEX(1) +) +fc_counter_nph ( + .clk(clk), + .rst(rst), + .tx_cdts_limit(tx_cdts_limit), + .tx_cdts_limit_tdm_idx(tx_cdts_limit_tdm_idx), + .fc_dec(mux_tx_fc_nph), + .fc_av(tx_fc_nph_av) +); + +ptile_tx_fc_counter #( + .WIDTH(12), + .INDEX(2) +) +fc_counter_cplh ( + .clk(clk), + .rst(rst), + .tx_cdts_limit(tx_cdts_limit), + .tx_cdts_limit_tdm_idx(tx_cdts_limit_tdm_idx), + .fc_dec(mux_tx_fc_cplh), + .fc_av(tx_fc_cplh_av) +); + +ptile_tx_fc_counter #( + .WIDTH(16), + .INDEX(4) +) +fc_counter_pd ( + .clk(clk), + .rst(rst), + .tx_cdts_limit(tx_cdts_limit), + .tx_cdts_limit_tdm_idx(tx_cdts_limit_tdm_idx), + .fc_dec(mux_tx_fc_pd), + .fc_av(tx_fc_pd_av) +); + +ptile_tx_fc_counter #( + .WIDTH(16), + .INDEX(5) +) +fc_counter_npd ( + .clk(clk), + .rst(rst), + .tx_cdts_limit(tx_cdts_limit), + .tx_cdts_limit_tdm_idx(tx_cdts_limit_tdm_idx), + .fc_dec(mux_tx_fc_npd), + .fc_av(tx_fc_npd_av) +); + +ptile_tx_fc_counter #( + .WIDTH(16), + .INDEX(6) +) +fc_counter_cpld ( + .clk(clk), + .rst(rst), + .tx_cdts_limit(tx_cdts_limit), + .tx_cdts_limit_tdm_idx(tx_cdts_limit_tdm_idx), + .fc_dec(mux_tx_fc_cpld), + .fc_av(tx_fc_cpld_av) +); + always @* begin mux_out_tlp_ready_cmb = 1'b0; @@ -340,6 +471,11 @@ always @(posedge clk) begin tx_st_ready_delay_reg <= {tx_st_ready_delay_reg, tx_st_ready}; + max_payload_size_fc_reg <= 9'd8 << (max_payload_size > 5 ? 5 : max_payload_size); + have_p_credit_reg <= (tx_fc_ph_av > 4) && (tx_fc_pd_av > (max_payload_size_fc_reg << 1)); + have_np_credit_reg <= tx_fc_nph_av > 4; + have_cpl_credit_reg <= (tx_fc_cplh_av > 4) && (tx_fc_cpld_av > (max_payload_size_fc_reg << 1)); + if (rst) begin tx_st_valid_reg <= 0; tx_st_ready_delay_reg <= 0; diff --git a/tb/pcie_ptile_if/Makefile b/tb/pcie_ptile_if/Makefile index 324d3d844..7a51817e7 100644 --- a/tb/pcie_ptile_if/Makefile +++ b/tb/pcie_ptile_if/Makefile @@ -33,6 +33,7 @@ VERILOG_SOURCES += ../../rtl/$(DUT).v VERILOG_SOURCES += ../../rtl/$(DUT)_rx.v VERILOG_SOURCES += ../../rtl/$(DUT)_tx.v VERILOG_SOURCES += ../../rtl/pcie_ptile_cfg.v +VERILOG_SOURCES += ../../rtl/pcie_ptile_fc_counter.v VERILOG_SOURCES += ../../rtl/pcie_tlp_demux.v VERILOG_SOURCES += ../../rtl/pcie_tlp_fc_count.v VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo.v diff --git a/tb/pcie_ptile_if/test_pcie_ptile_if.py b/tb/pcie_ptile_if/test_pcie_ptile_if.py index 68a4ad0be..fc26bd36c 100644 --- a/tb/pcie_ptile_if/test_pcie_ptile_if.py +++ b/tb/pcie_ptile_if/test_pcie_ptile_if.py @@ -613,6 +613,7 @@ def test_pcie_ptile_if(request, data_width): os.path.join(rtl_dir, f"{dut}_rx.v"), os.path.join(rtl_dir, f"{dut}_tx.v"), os.path.join(rtl_dir, "pcie_ptile_cfg.v"), + os.path.join(rtl_dir, "pcie_ptile_fc_counter.v"), os.path.join(rtl_dir, "pcie_tlp_demux.v"), os.path.join(rtl_dir, "pcie_tlp_fc_count.v"), os.path.join(rtl_dir, "pcie_tlp_fifo.v"), diff --git a/tb/pcie_ptile_if_tx/Makefile b/tb/pcie_ptile_if_tx/Makefile index 160856e0c..0e087847a 100644 --- a/tb/pcie_ptile_if_tx/Makefile +++ b/tb/pcie_ptile_if_tx/Makefile @@ -30,6 +30,7 @@ DUT = pcie_ptile_if_tx TOPLEVEL = $(DUT) MODULE = test_$(DUT) VERILOG_SOURCES += ../../rtl/$(DUT).v +VERILOG_SOURCES += ../../rtl/pcie_ptile_fc_counter.v VERILOG_SOURCES += ../../rtl/pcie_tlp_fc_count.v VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo_raw.v VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo_mux.v diff --git a/tb/pcie_ptile_if_tx/test_pcie_ptile_if_tx.py b/tb/pcie_ptile_if_tx/test_pcie_ptile_if_tx.py index 763a7b9d4..a8c8ab106 100644 --- a/tb/pcie_ptile_if_tx/test_pcie_ptile_if_tx.py +++ b/tb/pcie_ptile_if_tx/test_pcie_ptile_if_tx.py @@ -67,6 +67,20 @@ class TB(object): self.sink = PTilePcieSink(PTileTxBus.from_prefix(dut, "tx_st"), dut.clk, dut.rst) self.sink.ready_latency = 3 + dut.tx_cdts_limit.setimmediatevalue(0) + dut.tx_cdts_limit_tdm_idx.setimmediatevalue(0) + + dut.max_payload_size.setimmediatevalue(0) + + self.tx_fc_ph_limit = 0x080 + self.tx_fc_pd_limit = 0x0800 + self.tx_fc_nph_limit = 0x080 + self.tx_fc_npd_limit = 0x0800 + self.tx_fc_cplh_limit = 0x080 + self.tx_fc_cpld_limit = 0x0800 + + cocotb.start_soon(self.run_fc_logic()) + def set_idle_generator(self, generator=None): if generator: self.rd_req_source.set_pause_generator(generator()) @@ -89,6 +103,34 @@ class TB(object): await RisingEdge(self.dut.clk) await RisingEdge(self.dut.clk) + async def run_fc_logic(self): + clock_edge_event = RisingEdge(self.dut.clk) + + while True: + self.dut.tx_cdts_limit.value = self.tx_fc_ph_limit & 0xfff + self.dut.tx_cdts_limit_tdm_idx.value = 0 + await clock_edge_event + + self.dut.tx_cdts_limit.value = self.tx_fc_nph_limit & 0xfff + self.dut.tx_cdts_limit_tdm_idx.value = 1 + await clock_edge_event + + self.dut.tx_cdts_limit.value = self.tx_fc_cplh_limit & 0xfff + self.dut.tx_cdts_limit_tdm_idx.value = 2 + await clock_edge_event + + self.dut.tx_cdts_limit.value = self.tx_fc_pd_limit & 0xffff + self.dut.tx_cdts_limit_tdm_idx.value = 4 + await clock_edge_event + + self.dut.tx_cdts_limit.value = self.tx_fc_npd_limit & 0xffff + self.dut.tx_cdts_limit_tdm_idx.value = 5 + await clock_edge_event + + self.dut.tx_cdts_limit.value = self.tx_fc_cpld_limit & 0xffff + self.dut.tx_cdts_limit_tdm_idx.value = 6 + await clock_edge_event + async def run_test_req(dut, payload_lengths=None, payload_data=None, idle_inserter=None, backpressure_inserter=None): @@ -135,6 +177,16 @@ async def run_test_req(dut, payload_lengths=None, payload_data=None, idle_insert assert test_tlp == rx_tlp + if rx_tlp.is_posted(): + tb.tx_fc_ph_limit += 1 + tb.tx_fc_pd_limit += rx_tlp.get_data_credits() + if rx_tlp.is_nonposted(): + tb.tx_fc_nph_limit += 1 + tb.tx_fc_npd_limit += rx_tlp.get_data_credits() + if rx_tlp.is_completion(): + tb.tx_fc_cplh_limit += 1 + tb.tx_fc_cpld_limit += rx_tlp.get_data_credits() + assert tb.sink.empty() await RisingEdge(dut.clk) @@ -187,6 +239,16 @@ async def run_test_cpl(dut, payload_lengths=None, payload_data=None, idle_insert assert test_tlp == rx_tlp + if rx_tlp.is_posted(): + tb.tx_fc_ph_limit += 1 + tb.tx_fc_pd_limit += rx_tlp.get_data_credits() + if rx_tlp.is_nonposted(): + tb.tx_fc_nph_limit += 1 + tb.tx_fc_npd_limit += rx_tlp.get_data_credits() + if rx_tlp.is_completion(): + tb.tx_fc_cplh_limit += 1 + tb.tx_fc_cpld_limit += rx_tlp.get_data_credits() + assert tb.sink.empty() await RisingEdge(dut.clk) @@ -230,6 +292,16 @@ async def run_test_msi(dut, idle_inserter=None, backpressure_inserter=None): assert test_tlp == rx_tlp + if rx_tlp.is_posted(): + tb.tx_fc_ph_limit += 1 + tb.tx_fc_pd_limit += rx_tlp.get_data_credits() + if rx_tlp.is_nonposted(): + tb.tx_fc_nph_limit += 1 + tb.tx_fc_npd_limit += rx_tlp.get_data_credits() + if rx_tlp.is_completion(): + tb.tx_fc_cplh_limit += 1 + tb.tx_fc_cpld_limit += rx_tlp.get_data_credits() + assert tb.sink.empty() await RisingEdge(dut.clk) @@ -303,6 +375,16 @@ async def run_stress_test(dut, idle_inserter=None, backpressure_inserter=None): elif rx_tlp.fmt_type in (TlpType.CPL, TlpType.CPL_DATA): rx_cpl_tlps.append(rx_tlp) + if rx_tlp.is_posted(): + tb.tx_fc_ph_limit += 1 + tb.tx_fc_pd_limit += rx_tlp.get_data_credits() + if rx_tlp.is_nonposted(): + tb.tx_fc_nph_limit += 1 + tb.tx_fc_npd_limit += rx_tlp.get_data_credits() + if rx_tlp.is_completion(): + tb.tx_fc_cplh_limit += 1 + tb.tx_fc_cpld_limit += rx_tlp.get_data_credits() + for test_tlp in test_wr_tlps: assert test_tlp == rx_wr_tlps.pop(0) @@ -368,6 +450,7 @@ def test_pcie_ptile_if_tx(request, data_width): verilog_sources = [ os.path.join(rtl_dir, f"{dut}.v"), + os.path.join(rtl_dir, "pcie_ptile_fc_counter.v"), os.path.join(rtl_dir, "pcie_tlp_fc_count.v"), os.path.join(rtl_dir, "pcie_tlp_fifo_raw.v"), os.path.join(rtl_dir, "pcie_tlp_fifo_mux.v"),