From 4b261150d273144ab3dbd0a2988b6face9b94720 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Sun, 15 May 2022 17:57:02 -0700 Subject: [PATCH 1/5] Update axis_arb_mux Signed-off-by: Alex Forencich --- rtl/axis_arb_mux.v | 146 +++++++++++++++++++++++++++++++-------------- 1 file changed, 101 insertions(+), 45 deletions(-) diff --git a/rtl/axis_arb_mux.v b/rtl/axis_arb_mux.v index 99f0a8016..f7e39f3ba 100644 --- a/rtl/axis_arb_mux.v +++ b/rtl/axis_arb_mux.v @@ -40,11 +40,13 @@ module axis_arb_mux # // Propagate tkeep signal parameter KEEP_ENABLE = (DATA_WIDTH>8), // tkeep signal width (words per cycle) - parameter KEEP_WIDTH = (DATA_WIDTH/8), + parameter KEEP_WIDTH = ((DATA_WIDTH+7)/8), // Propagate tid signal parameter ID_ENABLE = 0, - // tid signal width - parameter ID_WIDTH = 8, + // input tid signal width + parameter S_ID_WIDTH = 8, + // output tid signal width + parameter M_ID_WIDTH = S_ID_WIDTH+$clog2(S_COUNT), // Propagate tdest signal parameter DEST_ENABLE = 0, // tdest signal width @@ -53,6 +55,10 @@ module axis_arb_mux # parameter USER_ENABLE = 1, // tuser signal width parameter USER_WIDTH = 1, + // Propagate tlast signal + parameter LAST_ENABLE = 1, + // Update tid with routing information + parameter UPDATE_TID = 0, // select round robin arbitration parameter ARB_TYPE_ROUND_ROBIN = 0, // LSB priority selection @@ -65,58 +71,84 @@ module axis_arb_mux # /* * AXI Stream inputs */ - input wire [S_COUNT*DATA_WIDTH-1:0] s_axis_tdata, - input wire [S_COUNT*KEEP_WIDTH-1:0] s_axis_tkeep, - input wire [S_COUNT-1:0] s_axis_tvalid, - output wire [S_COUNT-1:0] s_axis_tready, - input wire [S_COUNT-1:0] s_axis_tlast, - input wire [S_COUNT*ID_WIDTH-1:0] s_axis_tid, - input wire [S_COUNT*DEST_WIDTH-1:0] s_axis_tdest, - input wire [S_COUNT*USER_WIDTH-1:0] s_axis_tuser, + input wire [S_COUNT*DATA_WIDTH-1:0] s_axis_tdata, + input wire [S_COUNT*KEEP_WIDTH-1:0] s_axis_tkeep, + input wire [S_COUNT-1:0] s_axis_tvalid, + output wire [S_COUNT-1:0] s_axis_tready, + input wire [S_COUNT-1:0] s_axis_tlast, + input wire [S_COUNT*S_ID_WIDTH-1:0] s_axis_tid, + input wire [S_COUNT*DEST_WIDTH-1:0] s_axis_tdest, + input wire [S_COUNT*USER_WIDTH-1:0] s_axis_tuser, /* * AXI Stream output */ - output wire [DATA_WIDTH-1:0] m_axis_tdata, - output wire [KEEP_WIDTH-1:0] m_axis_tkeep, - output wire m_axis_tvalid, - input wire m_axis_tready, - output wire m_axis_tlast, - output wire [ID_WIDTH-1:0] m_axis_tid, - output wire [DEST_WIDTH-1:0] m_axis_tdest, - output wire [USER_WIDTH-1:0] m_axis_tuser + output wire [DATA_WIDTH-1:0] m_axis_tdata, + output wire [KEEP_WIDTH-1:0] m_axis_tkeep, + output wire m_axis_tvalid, + input wire m_axis_tready, + output wire m_axis_tlast, + output wire [M_ID_WIDTH-1:0] m_axis_tid, + output wire [DEST_WIDTH-1:0] m_axis_tdest, + output wire [USER_WIDTH-1:0] m_axis_tuser ); parameter CL_S_COUNT = $clog2(S_COUNT); +parameter S_ID_WIDTH_INT = S_ID_WIDTH > 0 ? S_ID_WIDTH : 1; + +// check configuration +initial begin + if (UPDATE_TID) begin + if (!ID_ENABLE) begin + $error("Error: UPDATE_TID set requires ID_ENABLE set (instance %m)"); + $finish; + end + + if (M_ID_WIDTH < CL_S_COUNT) begin + $error("Error: M_ID_WIDTH too small for port count (instance %m)"); + $finish; + end + end +end + wire [S_COUNT-1:0] request; wire [S_COUNT-1:0] acknowledge; wire [S_COUNT-1:0] grant; wire grant_valid; wire [CL_S_COUNT-1:0] grant_encoded; +// input registers to pipeline arbitration delay +reg [S_COUNT*DATA_WIDTH-1:0] s_axis_tdata_reg = 0; +reg [S_COUNT*KEEP_WIDTH-1:0] s_axis_tkeep_reg = 0; +reg [S_COUNT-1:0] s_axis_tvalid_reg = 0; +reg [S_COUNT-1:0] s_axis_tlast_reg = 0; +reg [S_COUNT*S_ID_WIDTH-1:0] s_axis_tid_reg = 0; +reg [S_COUNT*DEST_WIDTH-1:0] s_axis_tdest_reg = 0; +reg [S_COUNT*USER_WIDTH-1:0] s_axis_tuser_reg = 0; + // internal datapath reg [DATA_WIDTH-1:0] m_axis_tdata_int; reg [KEEP_WIDTH-1:0] m_axis_tkeep_int; reg m_axis_tvalid_int; reg m_axis_tready_int_reg = 1'b0; reg m_axis_tlast_int; -reg [ID_WIDTH-1:0] m_axis_tid_int; +reg [M_ID_WIDTH-1:0] m_axis_tid_int; reg [DEST_WIDTH-1:0] m_axis_tdest_int; reg [USER_WIDTH-1:0] m_axis_tuser_int; wire m_axis_tready_int_early; -assign s_axis_tready = (m_axis_tready_int_reg && grant_valid) << grant_encoded; +assign s_axis_tready = ~s_axis_tvalid_reg | ({S_COUNT{m_axis_tready_int_reg}} & grant); // mux for incoming packet -wire [DATA_WIDTH-1:0] current_s_tdata = s_axis_tdata[grant_encoded*DATA_WIDTH +: DATA_WIDTH]; -wire [KEEP_WIDTH-1:0] current_s_tkeep = s_axis_tkeep[grant_encoded*KEEP_WIDTH +: KEEP_WIDTH]; -wire current_s_tvalid = s_axis_tvalid[grant_encoded]; +wire [DATA_WIDTH-1:0] current_s_tdata = s_axis_tdata_reg[grant_encoded*DATA_WIDTH +: DATA_WIDTH]; +wire [KEEP_WIDTH-1:0] current_s_tkeep = s_axis_tkeep_reg[grant_encoded*KEEP_WIDTH +: KEEP_WIDTH]; +wire current_s_tvalid = s_axis_tvalid_reg[grant_encoded]; wire current_s_tready = s_axis_tready[grant_encoded]; -wire current_s_tlast = s_axis_tlast[grant_encoded]; -wire [ID_WIDTH-1:0] current_s_tid = s_axis_tid[grant_encoded*ID_WIDTH +: ID_WIDTH]; -wire [DEST_WIDTH-1:0] current_s_tdest = s_axis_tdest[grant_encoded*DEST_WIDTH +: DEST_WIDTH]; -wire [USER_WIDTH-1:0] current_s_tuser = s_axis_tuser[grant_encoded*USER_WIDTH +: USER_WIDTH]; +wire current_s_tlast = s_axis_tlast_reg[grant_encoded]; +wire [S_ID_WIDTH-1:0] current_s_tid = s_axis_tid_reg[grant_encoded*S_ID_WIDTH +: S_ID_WIDTH_INT]; +wire [DEST_WIDTH-1:0] current_s_tdest = s_axis_tdest_reg[grant_encoded*DEST_WIDTH +: DEST_WIDTH]; +wire [USER_WIDTH-1:0] current_s_tuser = s_axis_tuser_reg[grant_encoded*USER_WIDTH +: USER_WIDTH]; // arbiter instance arbiter #( @@ -136,8 +168,8 @@ arb_inst ( .grant_encoded(grant_encoded) ); -assign request = s_axis_tvalid & ~grant; -assign acknowledge = grant & s_axis_tvalid & s_axis_tready & s_axis_tlast; +assign request = (s_axis_tvalid_reg & ~grant) | (s_axis_tvalid & grant); +assign acknowledge = grant & s_axis_tvalid_reg & {S_COUNT{m_axis_tready_int_reg}} & (LAST_ENABLE ? s_axis_tlast_reg : {S_COUNT{1'b1}}); always @* begin // pass through selected packet data @@ -146,16 +178,40 @@ always @* begin m_axis_tvalid_int = current_s_tvalid && m_axis_tready_int_reg && grant_valid; m_axis_tlast_int = current_s_tlast; m_axis_tid_int = current_s_tid; + if (UPDATE_TID && S_COUNT > 1) begin + m_axis_tid_int[M_ID_WIDTH-1:M_ID_WIDTH-CL_S_COUNT] = grant_encoded; + end m_axis_tdest_int = current_s_tdest; m_axis_tuser_int = current_s_tuser; end +integer i; + +always @(posedge clk) begin + // register inputs + for (i = 0; i < S_COUNT; i = i + 1) begin + if (s_axis_tready[i]) begin + s_axis_tdata_reg[i*DATA_WIDTH +: DATA_WIDTH] <= s_axis_tdata[i*DATA_WIDTH +: DATA_WIDTH]; + s_axis_tkeep_reg[i*KEEP_WIDTH +: KEEP_WIDTH] <= s_axis_tkeep[i*KEEP_WIDTH +: KEEP_WIDTH]; + s_axis_tvalid_reg[i] <= s_axis_tvalid[i]; + s_axis_tlast_reg[i] <= s_axis_tlast[i]; + s_axis_tid_reg[i*S_ID_WIDTH +: S_ID_WIDTH_INT] <= s_axis_tid[i*S_ID_WIDTH +: S_ID_WIDTH_INT]; + s_axis_tdest_reg[i*DEST_WIDTH +: DEST_WIDTH] <= s_axis_tdest[i*DEST_WIDTH +: DEST_WIDTH]; + s_axis_tuser_reg[i*USER_WIDTH +: USER_WIDTH] <= s_axis_tuser[i*USER_WIDTH +: USER_WIDTH]; + end + end + + if (rst) begin + s_axis_tvalid_reg <= 0; + end +end + // output datapath logic reg [DATA_WIDTH-1:0] m_axis_tdata_reg = {DATA_WIDTH{1'b0}}; reg [KEEP_WIDTH-1:0] m_axis_tkeep_reg = {KEEP_WIDTH{1'b0}}; reg m_axis_tvalid_reg = 1'b0, m_axis_tvalid_next; reg m_axis_tlast_reg = 1'b0; -reg [ID_WIDTH-1:0] m_axis_tid_reg = {ID_WIDTH{1'b0}}; +reg [M_ID_WIDTH-1:0] m_axis_tid_reg = {M_ID_WIDTH{1'b0}}; reg [DEST_WIDTH-1:0] m_axis_tdest_reg = {DEST_WIDTH{1'b0}}; reg [USER_WIDTH-1:0] m_axis_tuser_reg = {USER_WIDTH{1'b0}}; @@ -163,7 +219,7 @@ reg [DATA_WIDTH-1:0] temp_m_axis_tdata_reg = {DATA_WIDTH{1'b0}}; reg [KEEP_WIDTH-1:0] temp_m_axis_tkeep_reg = {KEEP_WIDTH{1'b0}}; reg temp_m_axis_tvalid_reg = 1'b0, temp_m_axis_tvalid_next; reg temp_m_axis_tlast_reg = 1'b0; -reg [ID_WIDTH-1:0] temp_m_axis_tid_reg = {ID_WIDTH{1'b0}}; +reg [M_ID_WIDTH-1:0] temp_m_axis_tid_reg = {M_ID_WIDTH{1'b0}}; reg [DEST_WIDTH-1:0] temp_m_axis_tdest_reg = {DEST_WIDTH{1'b0}}; reg [USER_WIDTH-1:0] temp_m_axis_tuser_reg = {USER_WIDTH{1'b0}}; @@ -175,13 +231,13 @@ reg store_axis_temp_to_output; assign m_axis_tdata = m_axis_tdata_reg; assign m_axis_tkeep = KEEP_ENABLE ? m_axis_tkeep_reg : {KEEP_WIDTH{1'b1}}; assign m_axis_tvalid = m_axis_tvalid_reg; -assign m_axis_tlast = m_axis_tlast_reg; -assign m_axis_tid = ID_ENABLE ? m_axis_tid_reg : {ID_WIDTH{1'b0}}; +assign m_axis_tlast = LAST_ENABLE ? m_axis_tlast_reg : 1'b1; +assign m_axis_tid = ID_ENABLE ? m_axis_tid_reg : {M_ID_WIDTH{1'b0}}; assign m_axis_tdest = DEST_ENABLE ? m_axis_tdest_reg : {DEST_WIDTH{1'b0}}; assign m_axis_tuser = USER_ENABLE ? m_axis_tuser_reg : {USER_WIDTH{1'b0}}; -// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) -assign m_axis_tready_int_early = m_axis_tready || (!temp_m_axis_tvalid_reg && (!m_axis_tvalid_reg || !m_axis_tvalid_int)); +// enable ready input next cycle if output is ready or if both output registers are empty +assign m_axis_tready_int_early = m_axis_tready || (!temp_m_axis_tvalid_reg && !m_axis_tvalid_reg); always @* begin // transfer sink ready state to source @@ -212,15 +268,9 @@ always @* begin end always @(posedge clk) begin - if (rst) begin - m_axis_tvalid_reg <= 1'b0; - m_axis_tready_int_reg <= 1'b0; - temp_m_axis_tvalid_reg <= 1'b0; - end else begin - m_axis_tvalid_reg <= m_axis_tvalid_next; - m_axis_tready_int_reg <= m_axis_tready_int_early; - temp_m_axis_tvalid_reg <= temp_m_axis_tvalid_next; - end + m_axis_tvalid_reg <= m_axis_tvalid_next; + m_axis_tready_int_reg <= m_axis_tready_int_early; + temp_m_axis_tvalid_reg <= temp_m_axis_tvalid_next; // datapath if (store_axis_int_to_output) begin @@ -247,6 +297,12 @@ always @(posedge clk) begin temp_m_axis_tdest_reg <= m_axis_tdest_int; temp_m_axis_tuser_reg <= m_axis_tuser_int; end + + if (rst) begin + m_axis_tvalid_reg <= 1'b0; + m_axis_tready_int_reg <= 1'b0; + temp_m_axis_tvalid_reg <= 1'b0; + end end endmodule From 8cdb780ee33dca2bdb81ace956e3ac2d7c669bd1 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Sun, 15 May 2022 17:57:26 -0700 Subject: [PATCH 2/5] Rewrite resets Signed-off-by: Alex Forencich --- rtl/dma_if_desc_mux.v | 18 +++++++++--------- rtl/dma_ram_demux_rd.v | 18 +++++++++--------- rtl/dma_ram_demux_wr.v | 18 +++++++++--------- rtl/pcie_axi_dma_desc_mux.v | 18 +++++++++--------- rtl/pcie_axi_master_rd.v | 18 +++++++++--------- rtl/pcie_tlp_demux.v | 18 +++++++++--------- rtl/pcie_tlp_mux.v | 18 +++++++++--------- rtl/pcie_us_axi_dma_rd.v | 18 +++++++++--------- rtl/pcie_us_axi_dma_wr.v | 18 +++++++++--------- rtl/pcie_us_axi_master_rd.v | 18 +++++++++--------- rtl/pcie_us_axi_master_wr.v | 18 +++++++++--------- rtl/pcie_us_axil_master.v | 18 +++++++++--------- rtl/pcie_us_axis_cq_demux.v | 18 +++++++++--------- rtl/pcie_us_axis_rc_demux.v | 18 +++++++++--------- 14 files changed, 126 insertions(+), 126 deletions(-) diff --git a/rtl/dma_if_desc_mux.v b/rtl/dma_if_desc_mux.v index 92efbd0fd..6bd8345cd 100644 --- a/rtl/dma_if_desc_mux.v +++ b/rtl/dma_if_desc_mux.v @@ -269,15 +269,9 @@ always @* begin end always @(posedge clk) begin - if (rst) begin - m_axis_desc_valid_reg <= 1'b0; - m_axis_desc_ready_int_reg <= 1'b0; - temp_m_axis_desc_valid_reg <= 1'b0; - end else begin - m_axis_desc_valid_reg <= m_axis_desc_valid_next; - m_axis_desc_ready_int_reg <= m_axis_desc_ready_int_early; - temp_m_axis_desc_valid_reg <= temp_m_axis_desc_valid_next; - end + m_axis_desc_valid_reg <= m_axis_desc_valid_next; + m_axis_desc_ready_int_reg <= m_axis_desc_ready_int_early; + temp_m_axis_desc_valid_reg <= temp_m_axis_desc_valid_next; // datapath if (store_axis_int_to_output) begin @@ -307,6 +301,12 @@ always @(posedge clk) begin temp_m_axis_desc_len_reg <= m_axis_desc_len_int; temp_m_axis_desc_tag_reg <= m_axis_desc_tag_int; end + + if (rst) begin + m_axis_desc_valid_reg <= 1'b0; + m_axis_desc_ready_int_reg <= 1'b0; + temp_m_axis_desc_valid_reg <= 1'b0; + end end // descriptor status demux diff --git a/rtl/dma_ram_demux_rd.v b/rtl/dma_ram_demux_rd.v index 815c3f175..21e22d327 100644 --- a/rtl/dma_ram_demux_rd.v +++ b/rtl/dma_ram_demux_rd.v @@ -217,15 +217,9 @@ for (n = 0; n < SEG_COUNT; n = n + 1) begin end always @(posedge clk) begin - if (rst) begin - seg_ram_rd_cmd_valid_reg <= {PORTS{1'b0}}; - seg_ram_rd_cmd_ready_int_reg <= 1'b0; - temp_seg_ram_rd_cmd_valid_reg <= {PORTS{1'b0}}; - end else begin - seg_ram_rd_cmd_valid_reg <= seg_ram_rd_cmd_valid_next; - seg_ram_rd_cmd_ready_int_reg <= seg_ram_rd_cmd_ready_int_early; - temp_seg_ram_rd_cmd_valid_reg <= temp_seg_ram_rd_cmd_valid_next; - end + seg_ram_rd_cmd_valid_reg <= seg_ram_rd_cmd_valid_next; + seg_ram_rd_cmd_ready_int_reg <= seg_ram_rd_cmd_ready_int_early; + temp_seg_ram_rd_cmd_valid_reg <= temp_seg_ram_rd_cmd_valid_next; // datapath if (store_axis_resp_int_to_output) begin @@ -240,6 +234,12 @@ for (n = 0; n < SEG_COUNT; n = n + 1) begin temp_seg_ram_rd_cmd_sel_reg <= seg_ram_rd_cmd_sel_int; temp_seg_ram_rd_cmd_addr_reg <= seg_ram_rd_cmd_addr_int; end + + if (rst) begin + seg_ram_rd_cmd_valid_reg <= {PORTS{1'b0}}; + seg_ram_rd_cmd_ready_int_reg <= 1'b0; + temp_seg_ram_rd_cmd_valid_reg <= {PORTS{1'b0}}; + end end // RAM read response mux diff --git a/rtl/dma_ram_demux_wr.v b/rtl/dma_ram_demux_wr.v index 07a33c3d3..23fc9056a 100644 --- a/rtl/dma_ram_demux_wr.v +++ b/rtl/dma_ram_demux_wr.v @@ -234,15 +234,9 @@ for (n = 0; n < SEG_COUNT; n = n + 1) begin end always @(posedge clk) begin - if (rst) begin - seg_ram_wr_cmd_valid_reg <= {PORTS{1'b0}}; - seg_ram_wr_cmd_ready_int_reg <= 1'b0; - temp_seg_ram_wr_cmd_valid_reg <= {PORTS{1'b0}}; - end else begin - seg_ram_wr_cmd_valid_reg <= seg_ram_wr_cmd_valid_next; - seg_ram_wr_cmd_ready_int_reg <= seg_ram_wr_cmd_ready_int_early; - temp_seg_ram_wr_cmd_valid_reg <= temp_seg_ram_wr_cmd_valid_next; - end + seg_ram_wr_cmd_valid_reg <= seg_ram_wr_cmd_valid_next; + seg_ram_wr_cmd_ready_int_reg <= seg_ram_wr_cmd_ready_int_early; + temp_seg_ram_wr_cmd_valid_reg <= temp_seg_ram_wr_cmd_valid_next; // datapath if (store_axis_resp_int_to_output) begin @@ -263,6 +257,12 @@ for (n = 0; n < SEG_COUNT; n = n + 1) begin temp_seg_ram_wr_cmd_addr_reg <= seg_ram_wr_cmd_addr_int; temp_seg_ram_wr_cmd_data_reg <= seg_ram_wr_cmd_data_int; end + + if (rst) begin + seg_ram_wr_cmd_valid_reg <= {PORTS{1'b0}}; + seg_ram_wr_cmd_ready_int_reg <= 1'b0; + temp_seg_ram_wr_cmd_valid_reg <= {PORTS{1'b0}}; + end end // RAM write done mux diff --git a/rtl/pcie_axi_dma_desc_mux.v b/rtl/pcie_axi_dma_desc_mux.v index 0a120ddd2..6a79c40cb 100644 --- a/rtl/pcie_axi_dma_desc_mux.v +++ b/rtl/pcie_axi_dma_desc_mux.v @@ -215,15 +215,9 @@ always @* begin end always @(posedge clk) begin - if (rst) begin - m_axis_desc_valid_reg <= 1'b0; - m_axis_desc_ready_int_reg <= 1'b0; - temp_m_axis_desc_valid_reg <= 1'b0; - end else begin - m_axis_desc_valid_reg <= m_axis_desc_valid_next; - m_axis_desc_ready_int_reg <= m_axis_desc_ready_int_early; - temp_m_axis_desc_valid_reg <= temp_m_axis_desc_valid_next; - end + m_axis_desc_valid_reg <= m_axis_desc_valid_next; + m_axis_desc_ready_int_reg <= m_axis_desc_ready_int_early; + temp_m_axis_desc_valid_reg <= temp_m_axis_desc_valid_next; // datapath if (store_axis_int_to_output) begin @@ -244,6 +238,12 @@ always @(posedge clk) begin temp_m_axis_desc_len_reg <= m_axis_desc_len_int; temp_m_axis_desc_tag_reg <= m_axis_desc_tag_int; end + + if (rst) begin + m_axis_desc_valid_reg <= 1'b0; + m_axis_desc_ready_int_reg <= 1'b0; + temp_m_axis_desc_valid_reg <= 1'b0; + end end // descriptor status demux diff --git a/rtl/pcie_axi_master_rd.v b/rtl/pcie_axi_master_rd.v index feb4f56ff..19f7cfd8e 100644 --- a/rtl/pcie_axi_master_rd.v +++ b/rtl/pcie_axi_master_rd.v @@ -945,15 +945,9 @@ always @* begin end always @(posedge clk) begin - if (rst) begin - tx_cpl_tlp_valid_reg <= 1'b0; - tx_cpl_tlp_ready_int_reg <= 1'b0; - temp_tx_cpl_tlp_valid_reg <= 1'b0; - end else begin - tx_cpl_tlp_valid_reg <= tx_cpl_tlp_valid_next; - tx_cpl_tlp_ready_int_reg <= tx_cpl_tlp_ready_int_early; - temp_tx_cpl_tlp_valid_reg <= temp_tx_cpl_tlp_valid_next; - end + tx_cpl_tlp_valid_reg <= tx_cpl_tlp_valid_next; + tx_cpl_tlp_ready_int_reg <= tx_cpl_tlp_ready_int_early; + temp_tx_cpl_tlp_valid_reg <= temp_tx_cpl_tlp_valid_next; // datapath if (store_axis_int_to_output) begin @@ -977,6 +971,12 @@ always @(posedge clk) begin temp_tx_cpl_tlp_sop_reg <= tx_cpl_tlp_sop_int; temp_tx_cpl_tlp_eop_reg <= tx_cpl_tlp_eop_int; end + + if (rst) begin + tx_cpl_tlp_valid_reg <= 1'b0; + tx_cpl_tlp_ready_int_reg <= 1'b0; + temp_tx_cpl_tlp_valid_reg <= 1'b0; + end end endmodule diff --git a/rtl/pcie_tlp_demux.v b/rtl/pcie_tlp_demux.v index 341287d88..8c4377ac1 100644 --- a/rtl/pcie_tlp_demux.v +++ b/rtl/pcie_tlp_demux.v @@ -280,15 +280,9 @@ always @* begin end always @(posedge clk) begin - if (rst) begin - out_tlp_valid_reg <= {PORTS{1'b0}}; - out_tlp_ready_int_reg <= 1'b0; - temp_out_tlp_valid_reg <= 1'b0; - end else begin - out_tlp_valid_reg <= out_tlp_valid_next; - out_tlp_ready_int_reg <= out_tlp_ready_int_early; - temp_out_tlp_valid_reg <= temp_out_tlp_valid_next; - end + out_tlp_valid_reg <= out_tlp_valid_next; + out_tlp_ready_int_reg <= out_tlp_ready_int_early; + temp_out_tlp_valid_reg <= temp_out_tlp_valid_next; // datapath if (store_int_to_output) begin @@ -321,6 +315,12 @@ always @(posedge clk) begin temp_out_tlp_sop_reg <= out_tlp_sop_int; temp_out_tlp_eop_reg <= out_tlp_eop_int; end + + if (rst) begin + out_tlp_valid_reg <= {PORTS{1'b0}}; + out_tlp_ready_int_reg <= 1'b0; + temp_out_tlp_valid_reg <= 1'b0; + end end endmodule diff --git a/rtl/pcie_tlp_mux.v b/rtl/pcie_tlp_mux.v index 4f6bf6a87..d076227e4 100644 --- a/rtl/pcie_tlp_mux.v +++ b/rtl/pcie_tlp_mux.v @@ -236,15 +236,9 @@ always @* begin end always @(posedge clk) begin - if (rst) begin - out_tlp_valid_reg <= 1'b0; - out_tlp_ready_int_reg <= 1'b0; - temp_out_tlp_valid_reg <= 1'b0; - end else begin - out_tlp_valid_reg <= out_tlp_valid_next; - out_tlp_ready_int_reg <= out_tlp_ready_int_early; - temp_out_tlp_valid_reg <= temp_out_tlp_valid_next; - end + out_tlp_valid_reg <= out_tlp_valid_next; + out_tlp_ready_int_reg <= out_tlp_ready_int_early; + temp_out_tlp_valid_reg <= temp_out_tlp_valid_next; // datapath if (store_axis_int_to_output) begin @@ -277,6 +271,12 @@ always @(posedge clk) begin temp_out_tlp_sop_reg <= out_tlp_sop_int; temp_out_tlp_eop_reg <= out_tlp_eop_int; end + + if (rst) begin + out_tlp_valid_reg <= 1'b0; + out_tlp_ready_int_reg <= 1'b0; + temp_out_tlp_valid_reg <= 1'b0; + end end endmodule diff --git a/rtl/pcie_us_axi_dma_rd.v b/rtl/pcie_us_axi_dma_rd.v index 5752fa44a..7142976b0 100644 --- a/rtl/pcie_us_axi_dma_rd.v +++ b/rtl/pcie_us_axi_dma_rd.v @@ -1853,15 +1853,9 @@ always @* begin end always @(posedge clk) begin - if (rst) begin - m_axi_wvalid_reg <= 1'b0; - m_axi_wready_int_reg <= 1'b0; - temp_m_axi_wvalid_reg <= 1'b0; - end else begin - m_axi_wvalid_reg <= m_axi_wvalid_next; - m_axi_wready_int_reg <= m_axi_wready_int_early; - temp_m_axi_wvalid_reg <= temp_m_axi_wvalid_next; - end + m_axi_wvalid_reg <= m_axi_wvalid_next; + m_axi_wready_int_reg <= m_axi_wready_int_early; + temp_m_axi_wvalid_reg <= temp_m_axi_wvalid_next; // datapath if (store_axi_w_int_to_output) begin @@ -1879,6 +1873,12 @@ always @(posedge clk) begin temp_m_axi_wstrb_reg <= m_axi_wstrb_int; temp_m_axi_wlast_reg <= m_axi_wlast_int; end + + if (rst) begin + m_axi_wvalid_reg <= 1'b0; + m_axi_wready_int_reg <= 1'b0; + temp_m_axi_wvalid_reg <= 1'b0; + end end endmodule diff --git a/rtl/pcie_us_axi_dma_wr.v b/rtl/pcie_us_axi_dma_wr.v index 1796b6387..b43b317ad 100644 --- a/rtl/pcie_us_axi_dma_wr.v +++ b/rtl/pcie_us_axi_dma_wr.v @@ -1253,15 +1253,9 @@ always @* begin end always @(posedge clk) begin - if (rst) begin - m_axis_rq_tvalid_reg <= 1'b0; - m_axis_rq_tready_int_reg <= 1'b0; - temp_m_axis_rq_tvalid_reg <= 1'b0; - end else begin - m_axis_rq_tvalid_reg <= m_axis_rq_tvalid_next; - m_axis_rq_tready_int_reg <= m_axis_rq_tready_int_early; - temp_m_axis_rq_tvalid_reg <= temp_m_axis_rq_tvalid_next; - end + m_axis_rq_tvalid_reg <= m_axis_rq_tvalid_next; + m_axis_rq_tready_int_reg <= m_axis_rq_tready_int_early; + temp_m_axis_rq_tvalid_reg <= temp_m_axis_rq_tvalid_next; // datapath if (store_axis_rq_int_to_output) begin @@ -1282,6 +1276,12 @@ always @(posedge clk) begin temp_m_axis_rq_tlast_reg <= m_axis_rq_tlast_int; temp_m_axis_rq_tuser_reg <= m_axis_rq_tuser_int; end + + if (rst) begin + m_axis_rq_tvalid_reg <= 1'b0; + m_axis_rq_tready_int_reg <= 1'b0; + temp_m_axis_rq_tvalid_reg <= 1'b0; + end end endmodule diff --git a/rtl/pcie_us_axi_master_rd.v b/rtl/pcie_us_axi_master_rd.v index 5d722452d..6ba9ae57d 100644 --- a/rtl/pcie_us_axi_master_rd.v +++ b/rtl/pcie_us_axi_master_rd.v @@ -1156,15 +1156,9 @@ always @* begin end always @(posedge clk) begin - if (rst) begin - m_axis_cc_tvalid_reg <= 1'b0; - m_axis_cc_tready_int_reg <= 1'b0; - temp_m_axis_cc_tvalid_reg <= 1'b0; - end else begin - m_axis_cc_tvalid_reg <= m_axis_cc_tvalid_next; - m_axis_cc_tready_int_reg <= m_axis_cc_tready_int_early; - temp_m_axis_cc_tvalid_reg <= temp_m_axis_cc_tvalid_next; - end + m_axis_cc_tvalid_reg <= m_axis_cc_tvalid_next; + m_axis_cc_tready_int_reg <= m_axis_cc_tready_int_early; + temp_m_axis_cc_tvalid_reg <= temp_m_axis_cc_tvalid_next; // datapath if (store_axis_cc_int_to_output) begin @@ -1185,6 +1179,12 @@ always @(posedge clk) begin temp_m_axis_cc_tlast_reg <= m_axis_cc_tlast_int; temp_m_axis_cc_tuser_reg <= m_axis_cc_tuser_int; end + + if (rst) begin + m_axis_cc_tvalid_reg <= 1'b0; + m_axis_cc_tready_int_reg <= 1'b0; + temp_m_axis_cc_tvalid_reg <= 1'b0; + end end endmodule diff --git a/rtl/pcie_us_axi_master_wr.v b/rtl/pcie_us_axi_master_wr.v index a79423ff4..7128a8ab5 100644 --- a/rtl/pcie_us_axi_master_wr.v +++ b/rtl/pcie_us_axi_master_wr.v @@ -647,15 +647,9 @@ always @* begin end always @(posedge clk) begin - if (rst) begin - m_axi_wvalid_reg <= 1'b0; - m_axi_wready_int_reg <= 1'b0; - temp_m_axi_wvalid_reg <= 1'b0; - end else begin - m_axi_wvalid_reg <= m_axi_wvalid_next; - m_axi_wready_int_reg <= m_axi_wready_int_early; - temp_m_axi_wvalid_reg <= temp_m_axi_wvalid_next; - end + m_axi_wvalid_reg <= m_axi_wvalid_next; + m_axi_wready_int_reg <= m_axi_wready_int_early; + temp_m_axi_wvalid_reg <= temp_m_axi_wvalid_next; // datapath if (store_axi_w_int_to_output) begin @@ -673,6 +667,12 @@ always @(posedge clk) begin temp_m_axi_wstrb_reg <= m_axi_wstrb_int; temp_m_axi_wlast_reg <= m_axi_wlast_int; end + + if (rst) begin + m_axi_wvalid_reg <= 1'b0; + m_axi_wready_int_reg <= 1'b0; + temp_m_axi_wvalid_reg <= 1'b0; + end end endmodule diff --git a/rtl/pcie_us_axil_master.v b/rtl/pcie_us_axil_master.v index f13f26933..af6259c53 100644 --- a/rtl/pcie_us_axil_master.v +++ b/rtl/pcie_us_axil_master.v @@ -897,15 +897,9 @@ always @* begin end always @(posedge clk) begin - if (rst) begin - m_axis_cc_tvalid_reg <= 1'b0; - m_axis_cc_tready_int_reg <= 1'b0; - temp_m_axis_cc_tvalid_reg <= 1'b0; - end else begin - m_axis_cc_tvalid_reg <= m_axis_cc_tvalid_next; - m_axis_cc_tready_int_reg <= m_axis_cc_tready_int_early; - temp_m_axis_cc_tvalid_reg <= temp_m_axis_cc_tvalid_next; - end + m_axis_cc_tvalid_reg <= m_axis_cc_tvalid_next; + m_axis_cc_tready_int_reg <= m_axis_cc_tready_int_early; + temp_m_axis_cc_tvalid_reg <= temp_m_axis_cc_tvalid_next; // datapath if (store_axis_int_to_output) begin @@ -926,6 +920,12 @@ always @(posedge clk) begin temp_m_axis_cc_tlast_reg <= m_axis_cc_tlast_int; temp_m_axis_cc_tuser_reg <= m_axis_cc_tuser_int; end + + if (rst) begin + m_axis_cc_tvalid_reg <= 1'b0; + m_axis_cc_tready_int_reg <= 1'b0; + temp_m_axis_cc_tvalid_reg <= 1'b0; + end end endmodule diff --git a/rtl/pcie_us_axis_cq_demux.v b/rtl/pcie_us_axis_cq_demux.v index 6b7ce7ee5..d8c4dbe07 100644 --- a/rtl/pcie_us_axis_cq_demux.v +++ b/rtl/pcie_us_axis_cq_demux.v @@ -283,15 +283,9 @@ always @* begin end always @(posedge clk) begin - if (rst) begin - m_axis_cq_tvalid_reg <= {M_COUNT{1'b0}}; - m_axis_cq_tready_int_reg <= 1'b0; - temp_m_axis_cq_tvalid_reg <= 1'b0; - end else begin - m_axis_cq_tvalid_reg <= m_axis_cq_tvalid_next; - m_axis_cq_tready_int_reg <= m_axis_cq_tready_int_early; - temp_m_axis_cq_tvalid_reg <= temp_m_axis_cq_tvalid_next; - end + m_axis_cq_tvalid_reg <= m_axis_cq_tvalid_next; + m_axis_cq_tready_int_reg <= m_axis_cq_tready_int_early; + temp_m_axis_cq_tvalid_reg <= temp_m_axis_cq_tvalid_next; // datapath if (store_axis_int_to_output) begin @@ -312,6 +306,12 @@ always @(posedge clk) begin temp_m_axis_cq_tlast_reg <= m_axis_cq_tlast_int; temp_m_axis_cq_tuser_reg <= m_axis_cq_tuser_int; end + + if (rst) begin + m_axis_cq_tvalid_reg <= {M_COUNT{1'b0}}; + m_axis_cq_tready_int_reg <= 1'b0; + temp_m_axis_cq_tvalid_reg <= 1'b0; + end end endmodule diff --git a/rtl/pcie_us_axis_rc_demux.v b/rtl/pcie_us_axis_rc_demux.v index 688594739..341e4e0be 100644 --- a/rtl/pcie_us_axis_rc_demux.v +++ b/rtl/pcie_us_axis_rc_demux.v @@ -231,15 +231,9 @@ always @* begin end always @(posedge clk) begin - if (rst) begin - m_axis_rc_tvalid_reg <= {M_COUNT{1'b0}}; - m_axis_rc_tready_int_reg <= 1'b0; - temp_m_axis_rc_tvalid_reg <= 1'b0; - end else begin - m_axis_rc_tvalid_reg <= m_axis_rc_tvalid_next; - m_axis_rc_tready_int_reg <= m_axis_rc_tready_int_early; - temp_m_axis_rc_tvalid_reg <= temp_m_axis_rc_tvalid_next; - end + m_axis_rc_tvalid_reg <= m_axis_rc_tvalid_next; + m_axis_rc_tready_int_reg <= m_axis_rc_tready_int_early; + temp_m_axis_rc_tvalid_reg <= temp_m_axis_rc_tvalid_next; // datapath if (store_axis_int_to_output) begin @@ -260,6 +254,12 @@ always @(posedge clk) begin temp_m_axis_rc_tlast_reg <= m_axis_rc_tlast_int; temp_m_axis_rc_tuser_reg <= m_axis_rc_tuser_int; end + + if (rst) begin + m_axis_rc_tvalid_reg <= {M_COUNT{1'b0}}; + m_axis_rc_tready_int_reg <= 1'b0; + temp_m_axis_rc_tvalid_reg <= 1'b0; + end end endmodule From ae1f4a9a22920446f840142e0607ae5910267c43 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Sun, 15 May 2022 19:25:30 -0700 Subject: [PATCH 3/5] Rewrite early ready condition Signed-off-by: Alex Forencich --- rtl/dma_if_desc_mux.v | 4 ++-- rtl/dma_ram_demux_rd.v | 4 ++-- rtl/dma_ram_demux_wr.v | 4 ++-- rtl/pcie_axi_dma_desc_mux.v | 4 ++-- rtl/pcie_axi_master_rd.v | 4 ++-- rtl/pcie_tlp_demux.v | 4 ++-- rtl/pcie_tlp_mux.v | 4 ++-- rtl/pcie_us_axi_dma_rd.v | 4 ++-- rtl/pcie_us_axi_dma_wr.v | 4 ++-- rtl/pcie_us_axi_master_rd.v | 4 ++-- rtl/pcie_us_axi_master_wr.v | 4 ++-- rtl/pcie_us_axil_master.v | 4 ++-- rtl/pcie_us_axis_cq_demux.v | 4 ++-- rtl/pcie_us_axis_rc_demux.v | 4 ++-- 14 files changed, 28 insertions(+), 28 deletions(-) diff --git a/rtl/dma_if_desc_mux.v b/rtl/dma_if_desc_mux.v index 6bd8345cd..47892121b 100644 --- a/rtl/dma_if_desc_mux.v +++ b/rtl/dma_if_desc_mux.v @@ -237,8 +237,8 @@ assign m_axis_desc_len = m_axis_desc_len_reg; assign m_axis_desc_tag = m_axis_desc_tag_reg; assign m_axis_desc_valid = m_axis_desc_valid_reg; -// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) -assign m_axis_desc_ready_int_early = m_axis_desc_ready || (!temp_m_axis_desc_valid_reg && (!m_axis_desc_valid_reg || !m_axis_desc_valid_int)); +// enable ready input next cycle if output is ready or if both output registers are empty +assign m_axis_desc_ready_int_early = m_axis_desc_ready || (!temp_m_axis_desc_valid_reg && !m_axis_desc_valid_reg); always @* begin // transfer sink ready state to source diff --git a/rtl/dma_ram_demux_rd.v b/rtl/dma_ram_demux_rd.v index 21e22d327..ef4da47f1 100644 --- a/rtl/dma_ram_demux_rd.v +++ b/rtl/dma_ram_demux_rd.v @@ -185,8 +185,8 @@ for (n = 0; n < SEG_COUNT; n = n + 1) begin assign seg_ram_rd_cmd_addr = {PORTS{seg_ram_rd_cmd_addr_reg}}; assign seg_ram_rd_cmd_valid = seg_ram_rd_cmd_valid_reg; - // enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) - assign seg_ram_rd_cmd_ready_int_early = (seg_ram_rd_cmd_ready & seg_ram_rd_cmd_valid_reg) || (!temp_seg_ram_rd_cmd_valid_reg && (!seg_ram_rd_cmd_valid_reg || !seg_ram_rd_cmd_valid_int)); + // enable ready input next cycle if output is ready or if both output registers are empty + assign seg_ram_rd_cmd_ready_int_early = (seg_ram_rd_cmd_ready & seg_ram_rd_cmd_valid) || (!temp_seg_ram_rd_cmd_valid_reg && !seg_ram_rd_cmd_valid_reg); always @* begin // transfer sink ready state to source diff --git a/rtl/dma_ram_demux_wr.v b/rtl/dma_ram_demux_wr.v index 23fc9056a..4f4750005 100644 --- a/rtl/dma_ram_demux_wr.v +++ b/rtl/dma_ram_demux_wr.v @@ -202,8 +202,8 @@ for (n = 0; n < SEG_COUNT; n = n + 1) begin assign seg_ram_wr_cmd_data = {PORTS{seg_ram_wr_cmd_data_reg}}; assign seg_ram_wr_cmd_valid = seg_ram_wr_cmd_valid_reg; - // enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) - assign seg_ram_wr_cmd_ready_int_early = (seg_ram_wr_cmd_ready & seg_ram_wr_cmd_valid_reg) || (!temp_seg_ram_wr_cmd_valid_reg && (!seg_ram_wr_cmd_valid_reg || !seg_ram_wr_cmd_valid_int)); + // enable ready input next cycle if output is ready or if both output registers are empty + assign seg_ram_wr_cmd_ready_int_early = (seg_ram_wr_cmd_ready & seg_ram_wr_cmd_valid) || (!temp_seg_ram_wr_cmd_valid_reg && !seg_ram_wr_cmd_valid_reg); always @* begin // transfer sink ready state to source diff --git a/rtl/pcie_axi_dma_desc_mux.v b/rtl/pcie_axi_dma_desc_mux.v index 6a79c40cb..fa0a3cea7 100644 --- a/rtl/pcie_axi_dma_desc_mux.v +++ b/rtl/pcie_axi_dma_desc_mux.v @@ -183,8 +183,8 @@ assign m_axis_desc_len = m_axis_desc_len_reg; assign m_axis_desc_tag = m_axis_desc_tag_reg; assign m_axis_desc_valid = m_axis_desc_valid_reg; -// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) -assign m_axis_desc_ready_int_early = m_axis_desc_ready || (!temp_m_axis_desc_valid_reg && (!m_axis_desc_valid_reg || !m_axis_desc_valid_int)); +// enable ready input next cycle if output is ready or if both output registers are empty +assign m_axis_desc_ready_int_early = m_axis_desc_ready || (!temp_m_axis_desc_valid_reg && !m_axis_desc_valid_reg); always @* begin // transfer sink ready state to source diff --git a/rtl/pcie_axi_master_rd.v b/rtl/pcie_axi_master_rd.v index 19f7cfd8e..e29ad1355 100644 --- a/rtl/pcie_axi_master_rd.v +++ b/rtl/pcie_axi_master_rd.v @@ -913,8 +913,8 @@ assign tx_cpl_tlp_valid = tx_cpl_tlp_valid_reg; assign tx_cpl_tlp_sop = tx_cpl_tlp_sop_reg; assign tx_cpl_tlp_eop = tx_cpl_tlp_eop_reg; -// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) -assign tx_cpl_tlp_ready_int_early = tx_cpl_tlp_ready || (!temp_tx_cpl_tlp_valid_reg && (!tx_cpl_tlp_valid_reg || !tx_cpl_tlp_valid_int)); +// enable ready input next cycle if output is ready or if both output registers are empty +assign tx_cpl_tlp_ready_int_early = tx_cpl_tlp_ready || (!temp_tx_cpl_tlp_valid_reg && !tx_cpl_tlp_valid_reg); always @* begin // transfer sink ready state to source diff --git a/rtl/pcie_tlp_demux.v b/rtl/pcie_tlp_demux.v index 8c4377ac1..2bdb03bf8 100644 --- a/rtl/pcie_tlp_demux.v +++ b/rtl/pcie_tlp_demux.v @@ -248,8 +248,8 @@ assign out_tlp_valid = out_tlp_valid_reg; assign out_tlp_sop = {PORTS{out_tlp_sop_reg}}; assign out_tlp_eop = {PORTS{out_tlp_eop_reg}}; -// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) -assign out_tlp_ready_int_early = (out_tlp_ready & out_tlp_valid) || (!temp_out_tlp_valid_reg && (!out_tlp_valid || !out_tlp_valid_int)); +// enable ready input next cycle if output is ready or if both output registers are empty +assign out_tlp_ready_int_early = (out_tlp_ready & out_tlp_valid) || (!temp_out_tlp_valid_reg && !out_tlp_valid_reg); always @* begin // transfer sink ready state to source diff --git a/rtl/pcie_tlp_mux.v b/rtl/pcie_tlp_mux.v index d076227e4..e1311106a 100644 --- a/rtl/pcie_tlp_mux.v +++ b/rtl/pcie_tlp_mux.v @@ -204,8 +204,8 @@ assign out_tlp_valid = out_tlp_valid_reg; assign out_tlp_sop = out_tlp_sop_reg; assign out_tlp_eop = out_tlp_eop_reg; -// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) -assign out_tlp_ready_int_early = out_tlp_ready || (!temp_out_tlp_valid_reg && (!out_tlp_valid_reg || !out_tlp_valid_int)); +// enable ready input next cycle if output is ready or if both output registers are empty +assign out_tlp_ready_int_early = out_tlp_ready || (!temp_out_tlp_valid_reg && !out_tlp_valid_reg); always @* begin // transfer sink ready state to source diff --git a/rtl/pcie_us_axi_dma_rd.v b/rtl/pcie_us_axi_dma_rd.v index 7142976b0..1650bff6f 100644 --- a/rtl/pcie_us_axi_dma_rd.v +++ b/rtl/pcie_us_axi_dma_rd.v @@ -1821,8 +1821,8 @@ assign m_axi_wstrb = m_axi_wstrb_reg; assign m_axi_wvalid = m_axi_wvalid_reg; assign m_axi_wlast = m_axi_wlast_reg; -// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) -assign m_axi_wready_int_early = m_axi_wready || (!temp_m_axi_wvalid_reg && (!m_axi_wvalid_reg || !m_axi_wvalid_int)); +// enable ready input next cycle if output is ready or if both output registers are empty +assign m_axi_wready_int_early = m_axi_wready || (!temp_m_axi_wvalid_reg && !m_axi_wvalid_reg); always @* begin // transfer sink ready state to source diff --git a/rtl/pcie_us_axi_dma_wr.v b/rtl/pcie_us_axi_dma_wr.v index b43b317ad..b1f9ba887 100644 --- a/rtl/pcie_us_axi_dma_wr.v +++ b/rtl/pcie_us_axi_dma_wr.v @@ -1221,8 +1221,8 @@ assign m_axis_rq_tvalid = m_axis_rq_tvalid_reg; assign m_axis_rq_tlast = m_axis_rq_tlast_reg; assign m_axis_rq_tuser = m_axis_rq_tuser_reg; -// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) -assign m_axis_rq_tready_int_early = m_axis_rq_tready || (!temp_m_axis_rq_tvalid_reg && (!m_axis_rq_tvalid_reg || !m_axis_rq_tvalid_int)); +// enable ready input next cycle if output is ready or if both output registers are empty +assign m_axis_rq_tready_int_early = m_axis_rq_tready || (!temp_m_axis_rq_tvalid_reg && !m_axis_rq_tvalid_reg); always @* begin // transfer sink ready state to source diff --git a/rtl/pcie_us_axi_master_rd.v b/rtl/pcie_us_axi_master_rd.v index 6ba9ae57d..d9db8bee3 100644 --- a/rtl/pcie_us_axi_master_rd.v +++ b/rtl/pcie_us_axi_master_rd.v @@ -1124,8 +1124,8 @@ assign m_axis_cc_tvalid = m_axis_cc_tvalid_reg; assign m_axis_cc_tlast = m_axis_cc_tlast_reg; assign m_axis_cc_tuser = m_axis_cc_tuser_reg; -// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) -assign m_axis_cc_tready_int_early = m_axis_cc_tready || (!temp_m_axis_cc_tvalid_reg && (!m_axis_cc_tvalid_reg || !m_axis_cc_tvalid_int)); +// enable ready input next cycle if output is ready or if both output registers are empty +assign m_axis_cc_tready_int_early = m_axis_cc_tready || (!temp_m_axis_cc_tvalid_reg && !m_axis_cc_tvalid_reg); always @* begin // transfer sink ready state to source diff --git a/rtl/pcie_us_axi_master_wr.v b/rtl/pcie_us_axi_master_wr.v index 7128a8ab5..b0d06d82a 100644 --- a/rtl/pcie_us_axi_master_wr.v +++ b/rtl/pcie_us_axi_master_wr.v @@ -615,8 +615,8 @@ assign m_axi_wstrb = m_axi_wstrb_reg; assign m_axi_wvalid = m_axi_wvalid_reg; assign m_axi_wlast = m_axi_wlast_reg; -// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) -assign m_axi_wready_int_early = m_axi_wready || (!temp_m_axi_wvalid_reg && (!m_axi_wvalid_reg || !m_axi_wvalid_int)); +// enable ready input next cycle if output is ready or if both output registers are empty +assign m_axi_wready_int_early = m_axi_wready || (!temp_m_axi_wvalid_reg && !m_axi_wvalid_reg); always @* begin // transfer sink ready state to source diff --git a/rtl/pcie_us_axil_master.v b/rtl/pcie_us_axil_master.v index af6259c53..bdb32abd1 100644 --- a/rtl/pcie_us_axil_master.v +++ b/rtl/pcie_us_axil_master.v @@ -865,8 +865,8 @@ assign m_axis_cc_tvalid = m_axis_cc_tvalid_reg; assign m_axis_cc_tlast = m_axis_cc_tlast_reg; assign m_axis_cc_tuser = m_axis_cc_tuser_reg; -// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) -assign m_axis_cc_tready_int_early = m_axis_cc_tready || (!temp_m_axis_cc_tvalid_reg && (!m_axis_cc_tvalid_reg || !m_axis_cc_tvalid_int)); +// enable ready input next cycle if output is ready or if both output registers are empty +assign m_axis_cc_tready_int_early = m_axis_cc_tready || (!temp_m_axis_cc_tvalid_reg && !m_axis_cc_tvalid_reg); always @* begin // transfer sink ready state to source diff --git a/rtl/pcie_us_axis_cq_demux.v b/rtl/pcie_us_axis_cq_demux.v index d8c4dbe07..8754ab435 100644 --- a/rtl/pcie_us_axis_cq_demux.v +++ b/rtl/pcie_us_axis_cq_demux.v @@ -251,8 +251,8 @@ assign m_axis_cq_tvalid = m_axis_cq_tvalid_reg; assign m_axis_cq_tlast = {M_COUNT{m_axis_cq_tlast_reg}}; assign m_axis_cq_tuser = {M_COUNT{m_axis_cq_tuser_reg}}; -// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) -assign m_axis_cq_tready_int_early = (m_axis_cq_tready & m_axis_cq_tvalid) || (!temp_m_axis_cq_tvalid_reg && (!m_axis_cq_tvalid || !m_axis_cq_tvalid_int)); +// enable ready input next cycle if output is ready or if both output registers are empty +assign m_axis_cq_tready_int_early = (m_axis_cq_tready & m_axis_cq_tvalid) || (!temp_m_axis_cq_tvalid_reg && !m_axis_cq_tvalid_reg); always @* begin // transfer sink ready state to source diff --git a/rtl/pcie_us_axis_rc_demux.v b/rtl/pcie_us_axis_rc_demux.v index 341e4e0be..f8bd459ac 100644 --- a/rtl/pcie_us_axis_rc_demux.v +++ b/rtl/pcie_us_axis_rc_demux.v @@ -199,8 +199,8 @@ assign m_axis_rc_tvalid = m_axis_rc_tvalid_reg; assign m_axis_rc_tlast = {M_COUNT{m_axis_rc_tlast_reg}}; assign m_axis_rc_tuser = {M_COUNT{m_axis_rc_tuser_reg}}; -// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) -assign m_axis_rc_tready_int_early = (m_axis_rc_tready & m_axis_rc_tvalid) || (!temp_m_axis_rc_tvalid_reg && (!m_axis_rc_tvalid || !m_axis_rc_tvalid_int)); +// enable ready input next cycle if output is ready or if both output registers are empty +assign m_axis_rc_tready_int_early = (m_axis_rc_tready & m_axis_rc_tvalid) || (!temp_m_axis_rc_tvalid_reg && !m_axis_rc_tvalid_reg); always @* begin // transfer sink ready state to source From 234c318ea1a20141fbc858130299e669997ce672 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Sun, 15 May 2022 19:25:55 -0700 Subject: [PATCH 4/5] Pipeline arbitration delay in muxes Signed-off-by: Alex Forencich --- rtl/dma_if_desc_mux.v | 54 ++++++++++++++++++++++++++------- rtl/pcie_axi_dma_desc_mux.v | 42 +++++++++++++++++++++----- rtl/pcie_tlp_mux.v | 60 +++++++++++++++++++++++++++++-------- 3 files changed, 124 insertions(+), 32 deletions(-) diff --git a/rtl/dma_if_desc_mux.v b/rtl/dma_if_desc_mux.v index 47892121b..14d9bc429 100644 --- a/rtl/dma_if_desc_mux.v +++ b/rtl/dma_if_desc_mux.v @@ -140,6 +140,16 @@ wire [PORTS-1:0] grant; wire grant_valid; wire [CL_PORTS-1:0] grant_encoded; +// input registers to pipeline arbitration delay +reg [PORTS*DMA_ADDR_WIDTH-1:0] s_axis_desc_dma_addr_reg = 0; +reg [PORTS*S_RAM_SEL_WIDTH-1:0] s_axis_desc_ram_sel_reg = 0; +reg [PORTS*RAM_ADDR_WIDTH-1:0] s_axis_desc_ram_addr_reg = 0; +reg [PORTS*IMM_WIDTH-1:0] s_axis_desc_imm_reg = 0; +reg [PORTS-1:0] s_axis_desc_imm_en_reg = 0; +reg [PORTS*LEN_WIDTH-1:0] s_axis_desc_len_reg = 0; +reg [PORTS*S_TAG_WIDTH-1:0] s_axis_desc_tag_reg = 0; +reg [PORTS-1:0] s_axis_desc_valid_reg = 0; + // internal datapath reg [DMA_ADDR_WIDTH-1:0] m_axis_desc_dma_addr_int; reg [M_RAM_SEL_WIDTH-1:0] m_axis_desc_ram_sel_int; @@ -152,17 +162,17 @@ reg m_axis_desc_valid_int; reg m_axis_desc_ready_int_reg = 1'b0; wire m_axis_desc_ready_int_early; -assign s_axis_desc_ready = (m_axis_desc_ready_int_reg && grant_valid) << grant_encoded; +assign s_axis_desc_ready = ~s_axis_desc_valid_reg | ({PORTS{m_axis_desc_ready_int_reg}} & grant); // mux for incoming packet -wire [DMA_ADDR_WIDTH-1:0] current_s_desc_dma_addr = s_axis_desc_dma_addr[grant_encoded*DMA_ADDR_WIDTH +: DMA_ADDR_WIDTH]; -wire [S_RAM_SEL_WIDTH-1:0] current_s_desc_ram_sel = s_axis_desc_ram_sel[grant_encoded*S_RAM_SEL_WIDTH +: S_RAM_SEL_WIDTH_INT]; -wire [RAM_ADDR_WIDTH-1:0] current_s_desc_ram_addr = s_axis_desc_ram_addr[grant_encoded*RAM_ADDR_WIDTH +: RAM_ADDR_WIDTH]; -wire [IMM_WIDTH-1:0] current_s_desc_imm = s_axis_desc_imm[grant_encoded*IMM_WIDTH +: IMM_WIDTH]; -wire current_s_desc_imm_en = s_axis_desc_imm_en[grant_encoded]; -wire [LEN_WIDTH-1:0] current_s_desc_len = s_axis_desc_len[grant_encoded*LEN_WIDTH +: LEN_WIDTH]; -wire [S_TAG_WIDTH-1:0] current_s_desc_tag = s_axis_desc_tag[grant_encoded*S_TAG_WIDTH +: S_TAG_WIDTH]; -wire current_s_desc_valid = s_axis_desc_valid[grant_encoded]; +wire [DMA_ADDR_WIDTH-1:0] current_s_desc_dma_addr = s_axis_desc_dma_addr_reg[grant_encoded*DMA_ADDR_WIDTH +: DMA_ADDR_WIDTH]; +wire [S_RAM_SEL_WIDTH-1:0] current_s_desc_ram_sel = s_axis_desc_ram_sel_reg[grant_encoded*S_RAM_SEL_WIDTH +: S_RAM_SEL_WIDTH_INT]; +wire [RAM_ADDR_WIDTH-1:0] current_s_desc_ram_addr = s_axis_desc_ram_addr_reg[grant_encoded*RAM_ADDR_WIDTH +: RAM_ADDR_WIDTH]; +wire [IMM_WIDTH-1:0] current_s_desc_imm = s_axis_desc_imm_reg[grant_encoded*IMM_WIDTH +: IMM_WIDTH]; +wire current_s_desc_imm_en = s_axis_desc_imm_en_reg[grant_encoded]; +wire [LEN_WIDTH-1:0] current_s_desc_len = s_axis_desc_len_reg[grant_encoded*LEN_WIDTH +: LEN_WIDTH]; +wire [S_TAG_WIDTH-1:0] current_s_desc_tag = s_axis_desc_tag_reg[grant_encoded*S_TAG_WIDTH +: S_TAG_WIDTH]; +wire current_s_desc_valid = s_axis_desc_valid_reg[grant_encoded]; wire current_s_desc_ready = s_axis_desc_ready[grant_encoded]; // arbiter instance @@ -183,8 +193,8 @@ arb_inst ( .grant_encoded(grant_encoded) ); -assign request = s_axis_desc_valid & ~grant; -assign acknowledge = grant & s_axis_desc_valid & s_axis_desc_ready; +assign request = (s_axis_desc_valid_reg & ~grant) | (s_axis_desc_valid & grant); +assign acknowledge = grant & s_axis_desc_valid_reg & {PORTS{m_axis_desc_ready_int_reg}}; always @* begin // pass through selected packet data @@ -204,6 +214,28 @@ always @* begin m_axis_desc_valid_int = current_s_desc_valid && m_axis_desc_ready_int_reg && grant_valid; end +integer i; + +always @(posedge clk) begin + // register inputs + for (i = 0; i < PORTS; i = i + 1) begin + if (s_axis_desc_ready[i]) begin + s_axis_desc_dma_addr_reg[i*DMA_ADDR_WIDTH +: DMA_ADDR_WIDTH] <= s_axis_desc_dma_addr[i*DMA_ADDR_WIDTH +: DMA_ADDR_WIDTH]; + s_axis_desc_ram_sel_reg[i*S_RAM_SEL_WIDTH +: S_RAM_SEL_WIDTH_INT] <= s_axis_desc_ram_sel[i*S_RAM_SEL_WIDTH +: S_RAM_SEL_WIDTH_INT]; + s_axis_desc_ram_addr_reg[i*RAM_ADDR_WIDTH +: RAM_ADDR_WIDTH] <= s_axis_desc_ram_addr[i*RAM_ADDR_WIDTH +: RAM_ADDR_WIDTH]; + s_axis_desc_imm_reg[i*IMM_WIDTH +: IMM_WIDTH] <= s_axis_desc_imm[i*IMM_WIDTH +: IMM_WIDTH]; + s_axis_desc_imm_en_reg[i] <= s_axis_desc_imm_en[i]; + s_axis_desc_len_reg[i*LEN_WIDTH +: LEN_WIDTH] <= s_axis_desc_len[i*LEN_WIDTH +: LEN_WIDTH]; + s_axis_desc_tag_reg[i*S_TAG_WIDTH +: S_TAG_WIDTH] <= s_axis_desc_tag[i*S_TAG_WIDTH +: S_TAG_WIDTH]; + s_axis_desc_valid_reg[i] <= s_axis_desc_valid[i]; + end + end + + if (rst) begin + s_axis_desc_valid_reg <= 0; + end +end + // output datapath logic reg [DMA_ADDR_WIDTH-1:0] m_axis_desc_dma_addr_reg = {DMA_ADDR_WIDTH{1'b0}}; reg [M_RAM_SEL_WIDTH-1:0] m_axis_desc_ram_sel_reg = {M_RAM_SEL_WIDTH{1'b0}}; diff --git a/rtl/pcie_axi_dma_desc_mux.v b/rtl/pcie_axi_dma_desc_mux.v index fa0a3cea7..69c805056 100644 --- a/rtl/pcie_axi_dma_desc_mux.v +++ b/rtl/pcie_axi_dma_desc_mux.v @@ -107,6 +107,13 @@ wire [PORTS-1:0] grant; wire grant_valid; wire [CL_PORTS-1:0] grant_encoded; +// input registers to pipeline arbitration delay +reg [PORTS*PCIE_ADDR_WIDTH-1:0] s_axis_desc_pcie_addr_reg = 0; +reg [PORTS*AXI_ADDR_WIDTH-1:0] s_axis_desc_axi_addr_reg = 0; +reg [PORTS*LEN_WIDTH-1:0] s_axis_desc_len_reg = 0; +reg [PORTS*S_TAG_WIDTH-1:0] s_axis_desc_tag_reg = 0; +reg [PORTS-1:0] s_axis_desc_valid_reg = 0; + // internal datapath reg [PCIE_ADDR_WIDTH-1:0] m_axis_desc_pcie_addr_int; reg [AXI_ADDR_WIDTH-1:0] m_axis_desc_axi_addr_int; @@ -116,14 +123,14 @@ reg m_axis_desc_valid_int; reg m_axis_desc_ready_int_reg = 1'b0; wire m_axis_desc_ready_int_early; -assign s_axis_desc_ready = (m_axis_desc_ready_int_reg && grant_valid) << grant_encoded; +assign s_axis_desc_ready = ~s_axis_desc_valid_reg | ({PORTS{m_axis_desc_ready_int_reg}} & grant); // mux for incoming packet -wire [PCIE_ADDR_WIDTH-1:0] current_s_desc_pcie_addr = s_axis_desc_pcie_addr[grant_encoded*PCIE_ADDR_WIDTH +: PCIE_ADDR_WIDTH]; -wire [AXI_ADDR_WIDTH-1:0] current_s_desc_axi_addr = s_axis_desc_axi_addr[grant_encoded*AXI_ADDR_WIDTH +: AXI_ADDR_WIDTH]; -wire [LEN_WIDTH-1:0] current_s_desc_len = s_axis_desc_len[grant_encoded*LEN_WIDTH +: LEN_WIDTH]; -wire [S_TAG_WIDTH-1:0] current_s_desc_tag = s_axis_desc_tag[grant_encoded*S_TAG_WIDTH +: S_TAG_WIDTH]; -wire current_s_desc_valid = s_axis_desc_valid[grant_encoded]; +wire [PCIE_ADDR_WIDTH-1:0] current_s_desc_pcie_addr = s_axis_desc_pcie_addr_reg[grant_encoded*PCIE_ADDR_WIDTH +: PCIE_ADDR_WIDTH]; +wire [AXI_ADDR_WIDTH-1:0] current_s_desc_axi_addr = s_axis_desc_axi_addr_reg[grant_encoded*AXI_ADDR_WIDTH +: AXI_ADDR_WIDTH]; +wire [LEN_WIDTH-1:0] current_s_desc_len = s_axis_desc_len_reg[grant_encoded*LEN_WIDTH +: LEN_WIDTH]; +wire [S_TAG_WIDTH-1:0] current_s_desc_tag = s_axis_desc_tag_reg[grant_encoded*S_TAG_WIDTH +: S_TAG_WIDTH]; +wire current_s_desc_valid = s_axis_desc_valid_reg[grant_encoded]; wire current_s_desc_ready = s_axis_desc_ready[grant_encoded]; // arbiter instance @@ -144,8 +151,8 @@ arb_inst ( .grant_encoded(grant_encoded) ); -assign request = s_axis_desc_valid & ~grant; -assign acknowledge = grant & s_axis_desc_valid & s_axis_desc_ready; +assign request = (s_axis_desc_valid_reg & ~grant) | (s_axis_desc_valid & grant); +assign acknowledge = grant & s_axis_desc_valid_reg & {PORTS{m_axis_desc_ready_int_reg}}; always @* begin // pass through selected packet data @@ -159,6 +166,25 @@ always @* begin m_axis_desc_valid_int = current_s_desc_valid && m_axis_desc_ready_int_reg && grant_valid; end +integer i; + +always @(posedge clk) begin + // register inputs + for (i = 0; i < PORTS; i = i + 1) begin + if (s_axis_desc_ready[i]) begin + s_axis_desc_pcie_addr_reg[i*PCIE_ADDR_WIDTH +: PCIE_ADDR_WIDTH] <= s_axis_desc_pcie_addr[i*PCIE_ADDR_WIDTH +: PCIE_ADDR_WIDTH]; + s_axis_desc_axi_addr_reg[i*AXI_ADDR_WIDTH +: AXI_ADDR_WIDTH] <= s_axis_desc_axi_addr[i*AXI_ADDR_WIDTH +: AXI_ADDR_WIDTH]; + s_axis_desc_len_reg[i*LEN_WIDTH +: LEN_WIDTH] <= s_axis_desc_len[i*LEN_WIDTH +: LEN_WIDTH]; + s_axis_desc_tag_reg[i*S_TAG_WIDTH +: S_TAG_WIDTH] <= s_axis_desc_tag[i*S_TAG_WIDTH +: S_TAG_WIDTH]; + s_axis_desc_valid_reg[i] <= s_axis_desc_valid[i]; + end + end + + if (rst) begin + s_axis_desc_valid_reg <= 0; + end +end + // output datapath logic reg [PCIE_ADDR_WIDTH-1:0] m_axis_desc_pcie_addr_reg = {PCIE_ADDR_WIDTH{1'b0}}; reg [AXI_ADDR_WIDTH-1:0] m_axis_desc_axi_addr_reg = {AXI_ADDR_WIDTH{1'b0}}; diff --git a/rtl/pcie_tlp_mux.v b/rtl/pcie_tlp_mux.v index e1311106a..5b12cc969 100644 --- a/rtl/pcie_tlp_mux.v +++ b/rtl/pcie_tlp_mux.v @@ -107,6 +107,17 @@ wire [PORTS-1:0] grant; wire grant_valid; wire [CL_PORTS-1:0] grant_encoded; +// input registers to pipeline arbitration delay +reg [PORTS*TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH-1:0] in_tlp_data_reg = 0; +reg [PORTS*TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH-1:0] in_tlp_strb_reg = 0; +reg [PORTS*TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH-1:0] in_tlp_hdr_reg = 0; +reg [PORTS*TLP_SEG_COUNT*3-1:0] in_tlp_bar_id_reg = 0; +reg [PORTS*TLP_SEG_COUNT*8-1:0] in_tlp_func_num_reg = 0; +reg [PORTS*TLP_SEG_COUNT*4-1:0] in_tlp_error_reg = 0; +reg [PORTS*TLP_SEG_COUNT-1:0] in_tlp_valid_reg = 0; +reg [PORTS*TLP_SEG_COUNT-1:0] in_tlp_sop_reg = 0; +reg [PORTS*TLP_SEG_COUNT-1:0] in_tlp_eop_reg = 0; + // internal datapath reg [TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH-1:0] out_tlp_data_int; reg [TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH-1:0] out_tlp_strb_int; @@ -120,18 +131,18 @@ reg [TLP_SEG_COUNT-1:0] out_tlp_eop_int; reg out_tlp_ready_int_reg = 1'b0; wire out_tlp_ready_int_early; -assign in_tlp_ready = (out_tlp_ready_int_reg && grant_valid) << grant_encoded; +assign in_tlp_ready = ~in_tlp_valid_reg | ({PORTS{out_tlp_ready_int_reg}} & grant); // mux for incoming packet -wire [TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH-1:0] current_in_tlp_data = in_tlp_data[grant_encoded*TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH +: TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH]; -wire [TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH-1:0] current_in_tlp_strb = in_tlp_strb[grant_encoded*TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH +: TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH]; -wire [TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH-1:0] current_in_tlp_hdr = in_tlp_hdr[grant_encoded*TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH +: TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH]; -wire [TLP_SEG_COUNT*3-1:0] current_in_tlp_bar_id = in_tlp_bar_id[grant_encoded*TLP_SEG_COUNT*3 +: TLP_SEG_COUNT*3]; -wire [TLP_SEG_COUNT*8-1:0] current_in_tlp_func_num = in_tlp_func_num[grant_encoded*TLP_SEG_COUNT*8 +: TLP_SEG_COUNT*8]; -wire [TLP_SEG_COUNT*4-1:0] current_in_tlp_error = in_tlp_error[grant_encoded*TLP_SEG_COUNT*4 +: TLP_SEG_COUNT*4]; -wire [TLP_SEG_COUNT-1:0] current_in_tlp_valid = in_tlp_valid[grant_encoded*TLP_SEG_COUNT +: TLP_SEG_COUNT]; -wire [TLP_SEG_COUNT-1:0] current_in_tlp_sop = in_tlp_sop[grant_encoded*TLP_SEG_COUNT +: TLP_SEG_COUNT]; -wire [TLP_SEG_COUNT-1:0] current_in_tlp_eop = in_tlp_eop[grant_encoded*TLP_SEG_COUNT +: TLP_SEG_COUNT]; +wire [TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH-1:0] current_in_tlp_data = in_tlp_data_reg[grant_encoded*TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH +: TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH]; +wire [TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH-1:0] current_in_tlp_strb = in_tlp_strb_reg[grant_encoded*TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH +: TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH]; +wire [TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH-1:0] current_in_tlp_hdr = in_tlp_hdr_reg[grant_encoded*TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH +: TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH]; +wire [TLP_SEG_COUNT*3-1:0] current_in_tlp_bar_id = in_tlp_bar_id_reg[grant_encoded*TLP_SEG_COUNT*3 +: TLP_SEG_COUNT*3]; +wire [TLP_SEG_COUNT*8-1:0] current_in_tlp_func_num = in_tlp_func_num_reg[grant_encoded*TLP_SEG_COUNT*8 +: TLP_SEG_COUNT*8]; +wire [TLP_SEG_COUNT*4-1:0] current_in_tlp_error = in_tlp_error_reg[grant_encoded*TLP_SEG_COUNT*4 +: TLP_SEG_COUNT*4]; +wire [TLP_SEG_COUNT-1:0] current_in_tlp_valid = in_tlp_valid_reg[grant_encoded*TLP_SEG_COUNT +: TLP_SEG_COUNT]; +wire [TLP_SEG_COUNT-1:0] current_in_tlp_sop = in_tlp_sop_reg[grant_encoded*TLP_SEG_COUNT +: TLP_SEG_COUNT]; +wire [TLP_SEG_COUNT-1:0] current_in_tlp_eop = in_tlp_eop_reg[grant_encoded*TLP_SEG_COUNT +: TLP_SEG_COUNT]; wire current_in_tlp_ready = in_tlp_ready[grant_encoded]; // arbiter instance @@ -152,8 +163,8 @@ arb_inst ( .grant_encoded(grant_encoded) ); -assign request = in_tlp_valid & ~grant; -assign acknowledge = grant & in_tlp_valid & in_tlp_ready & in_tlp_eop; +assign request = (in_tlp_valid_reg & ~grant) | (in_tlp_valid & grant); +assign acknowledge = grant & in_tlp_valid_reg & {PORTS{out_tlp_ready_int_reg}} & in_tlp_eop_reg; always @* begin // pass through selected packet data @@ -163,11 +174,34 @@ always @* begin out_tlp_bar_id_int = current_in_tlp_bar_id; out_tlp_func_num_int = current_in_tlp_func_num; out_tlp_error_int = current_in_tlp_error; - out_tlp_valid_int = out_tlp_ready_int_reg && grant_valid ? current_in_tlp_valid : 0; + out_tlp_valid_int = current_in_tlp_valid && out_tlp_ready_int_reg && grant_valid; out_tlp_sop_int = current_in_tlp_sop; out_tlp_eop_int = current_in_tlp_eop; end +integer i; + +always @(posedge clk) begin + // register inputs + for (i = 0; i < PORTS; i = i + 1) begin + if (in_tlp_ready[i]) begin + in_tlp_data_reg[i*TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH +: TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH] <= in_tlp_data[i*TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH +: TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH]; + in_tlp_strb_reg[i*TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH +: TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH] <= in_tlp_strb[i*TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH +: TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH]; + in_tlp_hdr_reg[i*TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH +: TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH] <= in_tlp_hdr[i*TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH +: TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH]; + in_tlp_bar_id_reg[i*TLP_SEG_COUNT*3 +: TLP_SEG_COUNT*3] <= in_tlp_bar_id[i*TLP_SEG_COUNT*3 +: TLP_SEG_COUNT*3]; + in_tlp_func_num_reg[i*TLP_SEG_COUNT*8 +: TLP_SEG_COUNT*8] <= in_tlp_func_num[i*TLP_SEG_COUNT*8 +: TLP_SEG_COUNT*8]; + in_tlp_error_reg[i*TLP_SEG_COUNT*4 +: TLP_SEG_COUNT*4] <= in_tlp_error[i*TLP_SEG_COUNT*4 +: TLP_SEG_COUNT*4]; + in_tlp_valid_reg[i*TLP_SEG_COUNT +: TLP_SEG_COUNT] <= in_tlp_valid[i*TLP_SEG_COUNT +: TLP_SEG_COUNT]; + in_tlp_sop_reg[i*TLP_SEG_COUNT +: TLP_SEG_COUNT] <= in_tlp_sop[i*TLP_SEG_COUNT +: TLP_SEG_COUNT]; + in_tlp_eop_reg[i*TLP_SEG_COUNT +: TLP_SEG_COUNT] <= in_tlp_eop[i*TLP_SEG_COUNT +: TLP_SEG_COUNT]; + end + end + + if (rst) begin + in_tlp_valid_reg <= 0; + end +end + // output datapath logic reg [TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH-1:0] out_tlp_data_reg = 0; reg [TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH-1:0] out_tlp_strb_reg = 0; From d685b0b125cbbe07711ff2058f672eb5861ad4f0 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Sun, 15 May 2022 19:26:10 -0700 Subject: [PATCH 5/5] Avoid width mismatch warning Signed-off-by: Alex Forencich --- rtl/dma_if_mux_rd.v | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/rtl/dma_if_mux_rd.v b/rtl/dma_if_mux_rd.v index de1c753a1..8adec48f9 100644 --- a/rtl/dma_if_mux_rd.v +++ b/rtl/dma_if_mux_rd.v @@ -171,8 +171,8 @@ dma_if_desc_mux_inst ( .s_axis_desc_dma_addr(s_axis_read_desc_dma_addr), .s_axis_desc_ram_sel(s_axis_read_desc_ram_sel), .s_axis_desc_ram_addr(s_axis_read_desc_ram_addr), - .s_axis_desc_imm(32'd0), - .s_axis_desc_imm_en(1'b0), + .s_axis_desc_imm({PORTS{32'd0}}), + .s_axis_desc_imm_en({PORTS{1'b0}}), .s_axis_desc_len(s_axis_read_desc_len), .s_axis_desc_tag(s_axis_read_desc_tag), .s_axis_desc_valid(s_axis_read_desc_valid),