1
0
mirror of https://github.com/corundum/corundum.git synced 2025-01-16 08:12:53 +08:00

Pipeline arbitration delay in muxes

Signed-off-by: Alex Forencich <alex@alexforencich.com>
This commit is contained in:
Alex Forencich 2022-05-15 19:25:55 -07:00
parent ae1f4a9a22
commit 234c318ea1
3 changed files with 124 additions and 32 deletions

View File

@ -140,6 +140,16 @@ wire [PORTS-1:0] grant;
wire grant_valid;
wire [CL_PORTS-1:0] grant_encoded;
// input registers to pipeline arbitration delay
reg [PORTS*DMA_ADDR_WIDTH-1:0] s_axis_desc_dma_addr_reg = 0;
reg [PORTS*S_RAM_SEL_WIDTH-1:0] s_axis_desc_ram_sel_reg = 0;
reg [PORTS*RAM_ADDR_WIDTH-1:0] s_axis_desc_ram_addr_reg = 0;
reg [PORTS*IMM_WIDTH-1:0] s_axis_desc_imm_reg = 0;
reg [PORTS-1:0] s_axis_desc_imm_en_reg = 0;
reg [PORTS*LEN_WIDTH-1:0] s_axis_desc_len_reg = 0;
reg [PORTS*S_TAG_WIDTH-1:0] s_axis_desc_tag_reg = 0;
reg [PORTS-1:0] s_axis_desc_valid_reg = 0;
// internal datapath
reg [DMA_ADDR_WIDTH-1:0] m_axis_desc_dma_addr_int;
reg [M_RAM_SEL_WIDTH-1:0] m_axis_desc_ram_sel_int;
@ -152,17 +162,17 @@ reg m_axis_desc_valid_int;
reg m_axis_desc_ready_int_reg = 1'b0;
wire m_axis_desc_ready_int_early;
assign s_axis_desc_ready = (m_axis_desc_ready_int_reg && grant_valid) << grant_encoded;
assign s_axis_desc_ready = ~s_axis_desc_valid_reg | ({PORTS{m_axis_desc_ready_int_reg}} & grant);
// mux for incoming packet
wire [DMA_ADDR_WIDTH-1:0] current_s_desc_dma_addr = s_axis_desc_dma_addr[grant_encoded*DMA_ADDR_WIDTH +: DMA_ADDR_WIDTH];
wire [S_RAM_SEL_WIDTH-1:0] current_s_desc_ram_sel = s_axis_desc_ram_sel[grant_encoded*S_RAM_SEL_WIDTH +: S_RAM_SEL_WIDTH_INT];
wire [RAM_ADDR_WIDTH-1:0] current_s_desc_ram_addr = s_axis_desc_ram_addr[grant_encoded*RAM_ADDR_WIDTH +: RAM_ADDR_WIDTH];
wire [IMM_WIDTH-1:0] current_s_desc_imm = s_axis_desc_imm[grant_encoded*IMM_WIDTH +: IMM_WIDTH];
wire current_s_desc_imm_en = s_axis_desc_imm_en[grant_encoded];
wire [LEN_WIDTH-1:0] current_s_desc_len = s_axis_desc_len[grant_encoded*LEN_WIDTH +: LEN_WIDTH];
wire [S_TAG_WIDTH-1:0] current_s_desc_tag = s_axis_desc_tag[grant_encoded*S_TAG_WIDTH +: S_TAG_WIDTH];
wire current_s_desc_valid = s_axis_desc_valid[grant_encoded];
wire [DMA_ADDR_WIDTH-1:0] current_s_desc_dma_addr = s_axis_desc_dma_addr_reg[grant_encoded*DMA_ADDR_WIDTH +: DMA_ADDR_WIDTH];
wire [S_RAM_SEL_WIDTH-1:0] current_s_desc_ram_sel = s_axis_desc_ram_sel_reg[grant_encoded*S_RAM_SEL_WIDTH +: S_RAM_SEL_WIDTH_INT];
wire [RAM_ADDR_WIDTH-1:0] current_s_desc_ram_addr = s_axis_desc_ram_addr_reg[grant_encoded*RAM_ADDR_WIDTH +: RAM_ADDR_WIDTH];
wire [IMM_WIDTH-1:0] current_s_desc_imm = s_axis_desc_imm_reg[grant_encoded*IMM_WIDTH +: IMM_WIDTH];
wire current_s_desc_imm_en = s_axis_desc_imm_en_reg[grant_encoded];
wire [LEN_WIDTH-1:0] current_s_desc_len = s_axis_desc_len_reg[grant_encoded*LEN_WIDTH +: LEN_WIDTH];
wire [S_TAG_WIDTH-1:0] current_s_desc_tag = s_axis_desc_tag_reg[grant_encoded*S_TAG_WIDTH +: S_TAG_WIDTH];
wire current_s_desc_valid = s_axis_desc_valid_reg[grant_encoded];
wire current_s_desc_ready = s_axis_desc_ready[grant_encoded];
// arbiter instance
@ -183,8 +193,8 @@ arb_inst (
.grant_encoded(grant_encoded)
);
assign request = s_axis_desc_valid & ~grant;
assign acknowledge = grant & s_axis_desc_valid & s_axis_desc_ready;
assign request = (s_axis_desc_valid_reg & ~grant) | (s_axis_desc_valid & grant);
assign acknowledge = grant & s_axis_desc_valid_reg & {PORTS{m_axis_desc_ready_int_reg}};
always @* begin
// pass through selected packet data
@ -204,6 +214,28 @@ always @* begin
m_axis_desc_valid_int = current_s_desc_valid && m_axis_desc_ready_int_reg && grant_valid;
end
integer i;
always @(posedge clk) begin
// register inputs
for (i = 0; i < PORTS; i = i + 1) begin
if (s_axis_desc_ready[i]) begin
s_axis_desc_dma_addr_reg[i*DMA_ADDR_WIDTH +: DMA_ADDR_WIDTH] <= s_axis_desc_dma_addr[i*DMA_ADDR_WIDTH +: DMA_ADDR_WIDTH];
s_axis_desc_ram_sel_reg[i*S_RAM_SEL_WIDTH +: S_RAM_SEL_WIDTH_INT] <= s_axis_desc_ram_sel[i*S_RAM_SEL_WIDTH +: S_RAM_SEL_WIDTH_INT];
s_axis_desc_ram_addr_reg[i*RAM_ADDR_WIDTH +: RAM_ADDR_WIDTH] <= s_axis_desc_ram_addr[i*RAM_ADDR_WIDTH +: RAM_ADDR_WIDTH];
s_axis_desc_imm_reg[i*IMM_WIDTH +: IMM_WIDTH] <= s_axis_desc_imm[i*IMM_WIDTH +: IMM_WIDTH];
s_axis_desc_imm_en_reg[i] <= s_axis_desc_imm_en[i];
s_axis_desc_len_reg[i*LEN_WIDTH +: LEN_WIDTH] <= s_axis_desc_len[i*LEN_WIDTH +: LEN_WIDTH];
s_axis_desc_tag_reg[i*S_TAG_WIDTH +: S_TAG_WIDTH] <= s_axis_desc_tag[i*S_TAG_WIDTH +: S_TAG_WIDTH];
s_axis_desc_valid_reg[i] <= s_axis_desc_valid[i];
end
end
if (rst) begin
s_axis_desc_valid_reg <= 0;
end
end
// output datapath logic
reg [DMA_ADDR_WIDTH-1:0] m_axis_desc_dma_addr_reg = {DMA_ADDR_WIDTH{1'b0}};
reg [M_RAM_SEL_WIDTH-1:0] m_axis_desc_ram_sel_reg = {M_RAM_SEL_WIDTH{1'b0}};

View File

@ -107,6 +107,13 @@ wire [PORTS-1:0] grant;
wire grant_valid;
wire [CL_PORTS-1:0] grant_encoded;
// input registers to pipeline arbitration delay
reg [PORTS*PCIE_ADDR_WIDTH-1:0] s_axis_desc_pcie_addr_reg = 0;
reg [PORTS*AXI_ADDR_WIDTH-1:0] s_axis_desc_axi_addr_reg = 0;
reg [PORTS*LEN_WIDTH-1:0] s_axis_desc_len_reg = 0;
reg [PORTS*S_TAG_WIDTH-1:0] s_axis_desc_tag_reg = 0;
reg [PORTS-1:0] s_axis_desc_valid_reg = 0;
// internal datapath
reg [PCIE_ADDR_WIDTH-1:0] m_axis_desc_pcie_addr_int;
reg [AXI_ADDR_WIDTH-1:0] m_axis_desc_axi_addr_int;
@ -116,14 +123,14 @@ reg m_axis_desc_valid_int;
reg m_axis_desc_ready_int_reg = 1'b0;
wire m_axis_desc_ready_int_early;
assign s_axis_desc_ready = (m_axis_desc_ready_int_reg && grant_valid) << grant_encoded;
assign s_axis_desc_ready = ~s_axis_desc_valid_reg | ({PORTS{m_axis_desc_ready_int_reg}} & grant);
// mux for incoming packet
wire [PCIE_ADDR_WIDTH-1:0] current_s_desc_pcie_addr = s_axis_desc_pcie_addr[grant_encoded*PCIE_ADDR_WIDTH +: PCIE_ADDR_WIDTH];
wire [AXI_ADDR_WIDTH-1:0] current_s_desc_axi_addr = s_axis_desc_axi_addr[grant_encoded*AXI_ADDR_WIDTH +: AXI_ADDR_WIDTH];
wire [LEN_WIDTH-1:0] current_s_desc_len = s_axis_desc_len[grant_encoded*LEN_WIDTH +: LEN_WIDTH];
wire [S_TAG_WIDTH-1:0] current_s_desc_tag = s_axis_desc_tag[grant_encoded*S_TAG_WIDTH +: S_TAG_WIDTH];
wire current_s_desc_valid = s_axis_desc_valid[grant_encoded];
wire [PCIE_ADDR_WIDTH-1:0] current_s_desc_pcie_addr = s_axis_desc_pcie_addr_reg[grant_encoded*PCIE_ADDR_WIDTH +: PCIE_ADDR_WIDTH];
wire [AXI_ADDR_WIDTH-1:0] current_s_desc_axi_addr = s_axis_desc_axi_addr_reg[grant_encoded*AXI_ADDR_WIDTH +: AXI_ADDR_WIDTH];
wire [LEN_WIDTH-1:0] current_s_desc_len = s_axis_desc_len_reg[grant_encoded*LEN_WIDTH +: LEN_WIDTH];
wire [S_TAG_WIDTH-1:0] current_s_desc_tag = s_axis_desc_tag_reg[grant_encoded*S_TAG_WIDTH +: S_TAG_WIDTH];
wire current_s_desc_valid = s_axis_desc_valid_reg[grant_encoded];
wire current_s_desc_ready = s_axis_desc_ready[grant_encoded];
// arbiter instance
@ -144,8 +151,8 @@ arb_inst (
.grant_encoded(grant_encoded)
);
assign request = s_axis_desc_valid & ~grant;
assign acknowledge = grant & s_axis_desc_valid & s_axis_desc_ready;
assign request = (s_axis_desc_valid_reg & ~grant) | (s_axis_desc_valid & grant);
assign acknowledge = grant & s_axis_desc_valid_reg & {PORTS{m_axis_desc_ready_int_reg}};
always @* begin
// pass through selected packet data
@ -159,6 +166,25 @@ always @* begin
m_axis_desc_valid_int = current_s_desc_valid && m_axis_desc_ready_int_reg && grant_valid;
end
integer i;
always @(posedge clk) begin
// register inputs
for (i = 0; i < PORTS; i = i + 1) begin
if (s_axis_desc_ready[i]) begin
s_axis_desc_pcie_addr_reg[i*PCIE_ADDR_WIDTH +: PCIE_ADDR_WIDTH] <= s_axis_desc_pcie_addr[i*PCIE_ADDR_WIDTH +: PCIE_ADDR_WIDTH];
s_axis_desc_axi_addr_reg[i*AXI_ADDR_WIDTH +: AXI_ADDR_WIDTH] <= s_axis_desc_axi_addr[i*AXI_ADDR_WIDTH +: AXI_ADDR_WIDTH];
s_axis_desc_len_reg[i*LEN_WIDTH +: LEN_WIDTH] <= s_axis_desc_len[i*LEN_WIDTH +: LEN_WIDTH];
s_axis_desc_tag_reg[i*S_TAG_WIDTH +: S_TAG_WIDTH] <= s_axis_desc_tag[i*S_TAG_WIDTH +: S_TAG_WIDTH];
s_axis_desc_valid_reg[i] <= s_axis_desc_valid[i];
end
end
if (rst) begin
s_axis_desc_valid_reg <= 0;
end
end
// output datapath logic
reg [PCIE_ADDR_WIDTH-1:0] m_axis_desc_pcie_addr_reg = {PCIE_ADDR_WIDTH{1'b0}};
reg [AXI_ADDR_WIDTH-1:0] m_axis_desc_axi_addr_reg = {AXI_ADDR_WIDTH{1'b0}};

View File

@ -107,6 +107,17 @@ wire [PORTS-1:0] grant;
wire grant_valid;
wire [CL_PORTS-1:0] grant_encoded;
// input registers to pipeline arbitration delay
reg [PORTS*TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH-1:0] in_tlp_data_reg = 0;
reg [PORTS*TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH-1:0] in_tlp_strb_reg = 0;
reg [PORTS*TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH-1:0] in_tlp_hdr_reg = 0;
reg [PORTS*TLP_SEG_COUNT*3-1:0] in_tlp_bar_id_reg = 0;
reg [PORTS*TLP_SEG_COUNT*8-1:0] in_tlp_func_num_reg = 0;
reg [PORTS*TLP_SEG_COUNT*4-1:0] in_tlp_error_reg = 0;
reg [PORTS*TLP_SEG_COUNT-1:0] in_tlp_valid_reg = 0;
reg [PORTS*TLP_SEG_COUNT-1:0] in_tlp_sop_reg = 0;
reg [PORTS*TLP_SEG_COUNT-1:0] in_tlp_eop_reg = 0;
// internal datapath
reg [TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH-1:0] out_tlp_data_int;
reg [TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH-1:0] out_tlp_strb_int;
@ -120,18 +131,18 @@ reg [TLP_SEG_COUNT-1:0] out_tlp_eop_int;
reg out_tlp_ready_int_reg = 1'b0;
wire out_tlp_ready_int_early;
assign in_tlp_ready = (out_tlp_ready_int_reg && grant_valid) << grant_encoded;
assign in_tlp_ready = ~in_tlp_valid_reg | ({PORTS{out_tlp_ready_int_reg}} & grant);
// mux for incoming packet
wire [TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH-1:0] current_in_tlp_data = in_tlp_data[grant_encoded*TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH +: TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH];
wire [TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH-1:0] current_in_tlp_strb = in_tlp_strb[grant_encoded*TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH +: TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH];
wire [TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH-1:0] current_in_tlp_hdr = in_tlp_hdr[grant_encoded*TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH +: TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH];
wire [TLP_SEG_COUNT*3-1:0] current_in_tlp_bar_id = in_tlp_bar_id[grant_encoded*TLP_SEG_COUNT*3 +: TLP_SEG_COUNT*3];
wire [TLP_SEG_COUNT*8-1:0] current_in_tlp_func_num = in_tlp_func_num[grant_encoded*TLP_SEG_COUNT*8 +: TLP_SEG_COUNT*8];
wire [TLP_SEG_COUNT*4-1:0] current_in_tlp_error = in_tlp_error[grant_encoded*TLP_SEG_COUNT*4 +: TLP_SEG_COUNT*4];
wire [TLP_SEG_COUNT-1:0] current_in_tlp_valid = in_tlp_valid[grant_encoded*TLP_SEG_COUNT +: TLP_SEG_COUNT];
wire [TLP_SEG_COUNT-1:0] current_in_tlp_sop = in_tlp_sop[grant_encoded*TLP_SEG_COUNT +: TLP_SEG_COUNT];
wire [TLP_SEG_COUNT-1:0] current_in_tlp_eop = in_tlp_eop[grant_encoded*TLP_SEG_COUNT +: TLP_SEG_COUNT];
wire [TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH-1:0] current_in_tlp_data = in_tlp_data_reg[grant_encoded*TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH +: TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH];
wire [TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH-1:0] current_in_tlp_strb = in_tlp_strb_reg[grant_encoded*TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH +: TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH];
wire [TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH-1:0] current_in_tlp_hdr = in_tlp_hdr_reg[grant_encoded*TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH +: TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH];
wire [TLP_SEG_COUNT*3-1:0] current_in_tlp_bar_id = in_tlp_bar_id_reg[grant_encoded*TLP_SEG_COUNT*3 +: TLP_SEG_COUNT*3];
wire [TLP_SEG_COUNT*8-1:0] current_in_tlp_func_num = in_tlp_func_num_reg[grant_encoded*TLP_SEG_COUNT*8 +: TLP_SEG_COUNT*8];
wire [TLP_SEG_COUNT*4-1:0] current_in_tlp_error = in_tlp_error_reg[grant_encoded*TLP_SEG_COUNT*4 +: TLP_SEG_COUNT*4];
wire [TLP_SEG_COUNT-1:0] current_in_tlp_valid = in_tlp_valid_reg[grant_encoded*TLP_SEG_COUNT +: TLP_SEG_COUNT];
wire [TLP_SEG_COUNT-1:0] current_in_tlp_sop = in_tlp_sop_reg[grant_encoded*TLP_SEG_COUNT +: TLP_SEG_COUNT];
wire [TLP_SEG_COUNT-1:0] current_in_tlp_eop = in_tlp_eop_reg[grant_encoded*TLP_SEG_COUNT +: TLP_SEG_COUNT];
wire current_in_tlp_ready = in_tlp_ready[grant_encoded];
// arbiter instance
@ -152,8 +163,8 @@ arb_inst (
.grant_encoded(grant_encoded)
);
assign request = in_tlp_valid & ~grant;
assign acknowledge = grant & in_tlp_valid & in_tlp_ready & in_tlp_eop;
assign request = (in_tlp_valid_reg & ~grant) | (in_tlp_valid & grant);
assign acknowledge = grant & in_tlp_valid_reg & {PORTS{out_tlp_ready_int_reg}} & in_tlp_eop_reg;
always @* begin
// pass through selected packet data
@ -163,11 +174,34 @@ always @* begin
out_tlp_bar_id_int = current_in_tlp_bar_id;
out_tlp_func_num_int = current_in_tlp_func_num;
out_tlp_error_int = current_in_tlp_error;
out_tlp_valid_int = out_tlp_ready_int_reg && grant_valid ? current_in_tlp_valid : 0;
out_tlp_valid_int = current_in_tlp_valid && out_tlp_ready_int_reg && grant_valid;
out_tlp_sop_int = current_in_tlp_sop;
out_tlp_eop_int = current_in_tlp_eop;
end
integer i;
always @(posedge clk) begin
// register inputs
for (i = 0; i < PORTS; i = i + 1) begin
if (in_tlp_ready[i]) begin
in_tlp_data_reg[i*TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH +: TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH] <= in_tlp_data[i*TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH +: TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH];
in_tlp_strb_reg[i*TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH +: TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH] <= in_tlp_strb[i*TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH +: TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH];
in_tlp_hdr_reg[i*TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH +: TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH] <= in_tlp_hdr[i*TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH +: TLP_SEG_COUNT*TLP_SEG_HDR_WIDTH];
in_tlp_bar_id_reg[i*TLP_SEG_COUNT*3 +: TLP_SEG_COUNT*3] <= in_tlp_bar_id[i*TLP_SEG_COUNT*3 +: TLP_SEG_COUNT*3];
in_tlp_func_num_reg[i*TLP_SEG_COUNT*8 +: TLP_SEG_COUNT*8] <= in_tlp_func_num[i*TLP_SEG_COUNT*8 +: TLP_SEG_COUNT*8];
in_tlp_error_reg[i*TLP_SEG_COUNT*4 +: TLP_SEG_COUNT*4] <= in_tlp_error[i*TLP_SEG_COUNT*4 +: TLP_SEG_COUNT*4];
in_tlp_valid_reg[i*TLP_SEG_COUNT +: TLP_SEG_COUNT] <= in_tlp_valid[i*TLP_SEG_COUNT +: TLP_SEG_COUNT];
in_tlp_sop_reg[i*TLP_SEG_COUNT +: TLP_SEG_COUNT] <= in_tlp_sop[i*TLP_SEG_COUNT +: TLP_SEG_COUNT];
in_tlp_eop_reg[i*TLP_SEG_COUNT +: TLP_SEG_COUNT] <= in_tlp_eop[i*TLP_SEG_COUNT +: TLP_SEG_COUNT];
end
end
if (rst) begin
in_tlp_valid_reg <= 0;
end
end
// output datapath logic
reg [TLP_SEG_COUNT*TLP_SEG_DATA_WIDTH-1:0] out_tlp_data_reg = 0;
reg [TLP_SEG_COUNT*TLP_SEG_STRB_WIDTH-1:0] out_tlp_strb_reg = 0;