diff --git a/rtl/pcie_tlp_fifo_mux.v b/rtl/pcie_tlp_fifo_mux.v new file mode 100644 index 000000000..36558ec23 --- /dev/null +++ b/rtl/pcie_tlp_fifo_mux.v @@ -0,0 +1,558 @@ +/* + +Copyright (c) 2022 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +// Language: Verilog 2001 + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * PCIe TLP multiplexer with input FIFOs + */ +module pcie_tlp_fifo_mux # +( + // Input count + parameter PORTS = 2, + // TLP data width + parameter TLP_DATA_WIDTH = 256, + // TLP strobe width + parameter TLP_STRB_WIDTH = TLP_DATA_WIDTH/32, + // TLP header width + parameter TLP_HDR_WIDTH = 128, + // Sequence number width + parameter SEQ_NUM_WIDTH = 6, + // TLP segment count (input) + parameter IN_TLP_SEG_COUNT = 1, + // TLP segment count (output) + parameter OUT_TLP_SEG_COUNT = IN_TLP_SEG_COUNT, + // select round robin arbitration + parameter ARB_TYPE_ROUND_ROBIN = 0, + // LSB priority selection + parameter ARB_LSB_HIGH_PRIORITY = 1, + // FIFO depth + parameter FIFO_DEPTH = 2048, + // FIFO watermark level + parameter FIFO_WATERMARK = FIFO_DEPTH/2 +) +( + input wire clk, + input wire rst, + + /* + * TLP input + */ + input wire [PORTS*TLP_DATA_WIDTH-1:0] in_tlp_data, + input wire [PORTS*TLP_STRB_WIDTH-1:0] in_tlp_strb, + input wire [PORTS*IN_TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] in_tlp_hdr, + input wire [PORTS*IN_TLP_SEG_COUNT*SEQ_NUM_WIDTH-1:0] in_tlp_seq, + input wire [PORTS*IN_TLP_SEG_COUNT*3-1:0] in_tlp_bar_id, + input wire [PORTS*IN_TLP_SEG_COUNT*8-1:0] in_tlp_func_num, + input wire [PORTS*IN_TLP_SEG_COUNT*4-1:0] in_tlp_error, + input wire [PORTS*IN_TLP_SEG_COUNT-1:0] in_tlp_valid, + input wire [PORTS*IN_TLP_SEG_COUNT-1:0] in_tlp_sop, + input wire [PORTS*IN_TLP_SEG_COUNT-1:0] in_tlp_eop, + output wire [PORTS-1:0] in_tlp_ready, + + /* + * TLP output + */ + output wire [TLP_DATA_WIDTH-1:0] out_tlp_data, + output wire [TLP_STRB_WIDTH-1:0] out_tlp_strb, + output wire [OUT_TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] out_tlp_hdr, + output wire [OUT_TLP_SEG_COUNT*SEQ_NUM_WIDTH-1:0] out_tlp_seq, + output wire [OUT_TLP_SEG_COUNT*3-1:0] out_tlp_bar_id, + output wire [OUT_TLP_SEG_COUNT*8-1:0] out_tlp_func_num, + output wire [OUT_TLP_SEG_COUNT*4-1:0] out_tlp_error, + output wire [OUT_TLP_SEG_COUNT-1:0] out_tlp_valid, + output wire [OUT_TLP_SEG_COUNT-1:0] out_tlp_sop, + output wire [OUT_TLP_SEG_COUNT-1:0] out_tlp_eop, + input wire out_tlp_ready, + + /* + * Status + */ + output wire [PORTS*OUT_TLP_SEG_COUNT*SEQ_NUM_WIDTH-1:0] sel_tlp_seq, + output wire [PORTS*OUT_TLP_SEG_COUNT-1:0] sel_tlp_seq_valid, + output wire [PORTS-1:0] fifo_half_full, + output wire [PORTS-1:0] fifo_watermark +); + +parameter CL_PORTS = $clog2(PORTS); + +parameter TLP_SEG_DATA_WIDTH = TLP_DATA_WIDTH / OUT_TLP_SEG_COUNT; +parameter TLP_SEG_STRB_WIDTH = TLP_STRB_WIDTH / OUT_TLP_SEG_COUNT; + +parameter SEG_SEL_WIDTH = $clog2(OUT_TLP_SEG_COUNT); + +parameter OUTPUT_FIFO_ADDR_WIDTH = 5; + +// check configuration +initial begin + if (TLP_HDR_WIDTH != 128) begin + $error("Error: TLP segment header width must be 128 (instance %m)"); + $finish; + end + + if (TLP_STRB_WIDTH*32 != TLP_DATA_WIDTH) begin + $error("Error: PCIe interface requires dword (32-bit) granularity (instance %m)"); + $finish; + end +end + +reg frame_reg = 1'b0, frame_next, frame_cyc; +reg [CL_PORTS-1:0] port_reg = 0, port_next, port_cyc; +reg [SEG_SEL_WIDTH-1:0] seg_offset_cyc; +reg [SEG_SEL_WIDTH+1-1:0] seg_count_cyc; +reg valid, eop; +reg frame, abort; +reg [SEG_SEL_WIDTH-1:0] port_seg_offset_cyc[0:PORTS-1]; +reg [SEG_SEL_WIDTH+1-1:0] port_seg_count_cyc[0:PORTS-1]; + +reg [OUT_TLP_SEG_COUNT-1:0] port_seg_valid[0:PORTS-1]; +reg [OUT_TLP_SEG_COUNT-1:0] port_seg_eop[0:PORTS-1]; + +reg [OUT_TLP_SEG_COUNT-1:0] out_sel_reg = 0, out_sel_next, out_sel_cyc; +reg [CL_PORTS-1:0] out_sel_port_reg[0:OUT_TLP_SEG_COUNT-1], out_sel_port_next[0:OUT_TLP_SEG_COUNT-1]; +reg [SEG_SEL_WIDTH+1-1:0] out_sel_seg_reg[0:OUT_TLP_SEG_COUNT-1], out_sel_seg_next[0:OUT_TLP_SEG_COUNT-1]; + +reg [PORTS*OUT_TLP_SEG_COUNT-1:0] sel_tlp_seq_valid_reg = 0, sel_tlp_seq_valid_next, sel_tlp_seq_valid_cyc; +reg [OUT_TLP_SEG_COUNT*SEQ_NUM_WIDTH-1:0] out_sel_tlp_seq_reg = 0, out_sel_tlp_seq_next; +reg [PORTS*OUT_TLP_SEG_COUNT-1:0] out_sel_tlp_seq_valid_reg = 0, out_sel_tlp_seq_valid_next; + +// internal datapath +reg [TLP_DATA_WIDTH-1:0] out_tlp_data_int; +reg [TLP_STRB_WIDTH-1:0] out_tlp_strb_int; +reg [OUT_TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] out_tlp_hdr_int; +reg [OUT_TLP_SEG_COUNT*SEQ_NUM_WIDTH-1:0] out_tlp_seq_int; +reg [OUT_TLP_SEG_COUNT*3-1:0] out_tlp_bar_id_int; +reg [OUT_TLP_SEG_COUNT*8-1:0] out_tlp_func_num_int; +reg [OUT_TLP_SEG_COUNT*4-1:0] out_tlp_error_int; +reg [OUT_TLP_SEG_COUNT-1:0] out_tlp_valid_int; +reg [OUT_TLP_SEG_COUNT-1:0] out_tlp_sop_int; +reg [OUT_TLP_SEG_COUNT-1:0] out_tlp_eop_int; +wire out_tlp_ready_int; + +wire [TLP_DATA_WIDTH-1:0] fifo_tlp_data[0:PORTS-1]; +wire [TLP_STRB_WIDTH-1:0] fifo_tlp_strb[0:PORTS-1]; +wire [OUT_TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] fifo_tlp_hdr[0:PORTS-1]; +wire [OUT_TLP_SEG_COUNT*SEQ_NUM_WIDTH-1:0] fifo_tlp_seq[0:PORTS-1]; +wire [OUT_TLP_SEG_COUNT*3-1:0] fifo_tlp_bar_id[0:PORTS-1]; +wire [OUT_TLP_SEG_COUNT*8-1:0] fifo_tlp_func_num[0:PORTS-1]; +wire [OUT_TLP_SEG_COUNT*4-1:0] fifo_tlp_error[0:PORTS-1]; +wire [OUT_TLP_SEG_COUNT-1:0] fifo_tlp_valid[0:PORTS-1]; +wire [OUT_TLP_SEG_COUNT-1:0] fifo_tlp_sop[0:PORTS-1]; +wire [OUT_TLP_SEG_COUNT-1:0] fifo_tlp_eop[0:PORTS-1]; +wire [SEG_SEL_WIDTH-1:0] fifo_seg_offset[0:PORTS-1]; +wire [SEG_SEL_WIDTH+1-1:0] fifo_seg_count[0:PORTS-1]; +reg [PORTS-1:0] fifo_read_en_reg = 0, fifo_read_en_next; +reg [SEG_SEL_WIDTH+1-1:0] fifo_read_seg_count_reg[0:PORTS-1], fifo_read_seg_count_next[0:PORTS-1]; + +wire [OUT_TLP_SEG_COUNT-1:0] fifo_ctrl_tlp_valid[0:PORTS-1]; +wire [OUT_TLP_SEG_COUNT-1:0] fifo_ctrl_tlp_sop[0:PORTS-1]; +wire [OUT_TLP_SEG_COUNT-1:0] fifo_ctrl_tlp_eop[0:PORTS-1]; +wire [SEG_SEL_WIDTH-1:0] fifo_ctrl_seg_offset[0:PORTS-1]; +wire [SEG_SEL_WIDTH+1-1:0] fifo_ctrl_seg_count[0:PORTS-1]; +reg [PORTS-1:0] fifo_ctrl_read_en; +reg [SEG_SEL_WIDTH+1-1:0] fifo_ctrl_read_seg_count[0:PORTS-1]; + +generate + +genvar n; + +for (n = 0; n < PORTS; n = n + 1) begin + + pcie_tlp_fifo_raw #( + .DEPTH(FIFO_DEPTH), + .TLP_DATA_WIDTH(TLP_DATA_WIDTH), + .TLP_STRB_WIDTH(TLP_STRB_WIDTH), + .TLP_HDR_WIDTH(TLP_HDR_WIDTH), + .SEQ_NUM_WIDTH(SEQ_NUM_WIDTH), + .IN_TLP_SEG_COUNT(IN_TLP_SEG_COUNT), + .OUT_TLP_SEG_COUNT(OUT_TLP_SEG_COUNT), + .WATERMARK(FIFO_WATERMARK), + .CTRL_OUT_EN(1) + ) + pcie_tlp_fifo_inst ( + .clk(clk), + .rst(rst), + + /* + * TLP input + */ + .in_tlp_data(in_tlp_data[TLP_DATA_WIDTH*n +: TLP_DATA_WIDTH]), + .in_tlp_strb(in_tlp_strb[TLP_STRB_WIDTH*n +: TLP_STRB_WIDTH]), + .in_tlp_hdr(in_tlp_hdr[IN_TLP_SEG_COUNT*TLP_HDR_WIDTH*n +: IN_TLP_SEG_COUNT*TLP_HDR_WIDTH]), + .in_tlp_seq(in_tlp_seq[IN_TLP_SEG_COUNT*SEQ_NUM_WIDTH*n +: IN_TLP_SEG_COUNT*SEQ_NUM_WIDTH]), + .in_tlp_bar_id(in_tlp_bar_id[IN_TLP_SEG_COUNT*3*n +: IN_TLP_SEG_COUNT*3]), + .in_tlp_func_num(in_tlp_func_num[IN_TLP_SEG_COUNT*8*n +: IN_TLP_SEG_COUNT*8]), + .in_tlp_error(in_tlp_error[IN_TLP_SEG_COUNT*4*n +: IN_TLP_SEG_COUNT*4]), + .in_tlp_valid(in_tlp_valid[IN_TLP_SEG_COUNT*n +: IN_TLP_SEG_COUNT]), + .in_tlp_sop(in_tlp_sop[IN_TLP_SEG_COUNT*n +: IN_TLP_SEG_COUNT]), + .in_tlp_eop(in_tlp_eop[IN_TLP_SEG_COUNT*n +: IN_TLP_SEG_COUNT]), + .in_tlp_ready(in_tlp_ready[n +: 1]), + + /* + * TLP output + */ + .out_tlp_data(fifo_tlp_data[n]), + .out_tlp_strb(fifo_tlp_strb[n]), + .out_tlp_hdr(fifo_tlp_hdr[n]), + .out_tlp_seq(fifo_tlp_seq[n]), + .out_tlp_bar_id(fifo_tlp_bar_id[n]), + .out_tlp_func_num(fifo_tlp_func_num[n]), + .out_tlp_error(fifo_tlp_error[n]), + .out_tlp_valid(fifo_tlp_valid[n]), + .out_tlp_sop(fifo_tlp_sop[n]), + .out_tlp_eop(fifo_tlp_eop[n]), + .out_seg_offset(fifo_seg_offset[n]), + .out_seg_count(fifo_seg_count[n]), + .out_read_en(fifo_read_en_reg[n]), + .out_read_seg_count(fifo_read_seg_count_reg[n]), + + .out_ctrl_tlp_strb(), + .out_ctrl_tlp_hdr(), + .out_ctrl_tlp_valid(fifo_ctrl_tlp_valid[n]), + .out_ctrl_tlp_sop(fifo_ctrl_tlp_sop[n]), + .out_ctrl_tlp_eop(fifo_ctrl_tlp_eop[n]), + .out_ctrl_seg_offset(fifo_ctrl_seg_offset[n]), + .out_ctrl_seg_count(fifo_ctrl_seg_count[n]), + .out_ctrl_read_en(fifo_ctrl_read_en[n]), + .out_ctrl_read_seg_count(fifo_ctrl_read_seg_count[n]), + + /* + * Status + */ + .half_full(fifo_half_full[n +: 1]), + .watermark(fifo_watermark[n +: 1]) + ); + +end + +endgenerate + +assign sel_tlp_seq = {PORTS{out_sel_tlp_seq_reg}}; +assign sel_tlp_seq_valid = out_sel_tlp_seq_valid_reg; + +integer port, cur_port, seg, cur_seg; + +always @* begin + frame_next = frame_reg; + port_next = port_reg; + + out_tlp_data_int = 0; + out_tlp_strb_int = 0; + out_tlp_hdr_int = 0; + out_tlp_seq_int = 0; + out_tlp_bar_id_int = 0; + out_tlp_func_num_int = 0; + out_tlp_error_int = 0; + out_tlp_valid_int = 0; + out_tlp_sop_int = 0; + out_tlp_eop_int = 0; + + fifo_read_en_next = 0; + fifo_ctrl_read_en = 0; + + frame_cyc = frame_reg; + port_cyc = port_reg; + seg_offset_cyc = fifo_ctrl_seg_offset[port_reg]; + seg_count_cyc = 0; + valid = 0; + eop = 0; + frame = frame_cyc; + abort = 0; + + for (port = 0; port < PORTS; port = port + 1) begin + port_seg_offset_cyc[port] = fifo_ctrl_seg_offset[port]; + port_seg_count_cyc[port] = 0; + fifo_ctrl_read_seg_count[port] = 0; + fifo_read_seg_count_next[port] = 0; + end + + out_sel_next = 0; + out_sel_cyc = 0; + for (seg = 0; seg < OUT_TLP_SEG_COUNT; seg = seg + 1) begin + out_sel_port_next[seg] = 0; + out_sel_seg_next[seg] = 0; + end + + sel_tlp_seq_valid_next = 0; + sel_tlp_seq_valid_cyc = 0; + out_sel_tlp_seq_next = 0; + out_sel_tlp_seq_valid_next = 0; + + // compute mux settings + for (port = 0; port < PORTS; port = port + 1) begin + port_seg_valid[port] = {2{fifo_ctrl_tlp_valid[port]}} >> fifo_ctrl_seg_offset[port]; + port_seg_eop[port] = {2{fifo_ctrl_tlp_eop[port]}} >> fifo_ctrl_seg_offset[port]; + end + + for (seg = 0; seg < OUT_TLP_SEG_COUNT; seg = seg + 1) begin + // select port + if (!frame_cyc && !abort) begin + if (ARB_TYPE_ROUND_ROBIN) begin + // round robin arb - start checking after previously-selected port + if (ARB_LSB_HIGH_PRIORITY) begin + if (port_cyc < PORTS-1) begin + cur_port = port_cyc + 1; + end else begin + cur_port = 0; + end + end else begin + if (port_cyc > 0) begin + cur_port = port_cyc - 1; + end else begin + cur_port = PORTS-1; + end + end + end else begin + // priority arb - start from high priority end + if (ARB_LSB_HIGH_PRIORITY) begin + cur_port = 0; + end else begin + cur_port = PORTS-1; + end + end + for (port = 0; port < PORTS; port = port + 1) begin + if (port_seg_valid[cur_port][0] && !frame_cyc) begin + // select port, set frame + frame_cyc = 1; + port_cyc = cur_port; + seg_offset_cyc = port_seg_offset_cyc[cur_port]; + seg_count_cyc = port_seg_count_cyc[cur_port]; + sel_tlp_seq_valid_cyc[OUT_TLP_SEG_COUNT*cur_port+seg] = 1'b1; + end + // next port + if (ARB_LSB_HIGH_PRIORITY) begin + if (cur_port < PORTS-1) begin + cur_port = cur_port + 1; + end else begin + cur_port = 0; + end + end else begin + if (cur_port > 0) begin + cur_port = cur_port - 1; + end else begin + cur_port = PORTS-1; + end + end + end + end + + // route segment + valid = port_seg_valid[port_cyc][0]; + eop = port_seg_eop[port_cyc][0]; + frame = frame_cyc; + + out_sel_cyc[seg] = 1'b1; + out_sel_port_next[seg] = port_cyc; + out_sel_seg_next[seg] = seg_offset_cyc; + if (eop) begin + // end of packet, clear frame + frame_cyc = 0; + end + seg_offset_cyc = seg_offset_cyc + 1; + seg_count_cyc = seg_count_cyc + 1; + port_seg_offset_cyc[port_cyc] = seg_offset_cyc; + port_seg_count_cyc[port_cyc] = seg_count_cyc; + port_seg_valid[port_cyc] = port_seg_valid[port_cyc] >> 1; + port_seg_eop[port_cyc] = port_seg_eop[port_cyc] >> 1; + + if (frame && !abort) begin + if (valid) begin + if (eop || seg == OUT_TLP_SEG_COUNT-1) begin + // end of packet or end of cycle, commit + fifo_read_seg_count_next[port_cyc] = seg_count_cyc; + fifo_ctrl_read_seg_count[port_cyc] = seg_count_cyc; + if (out_tlp_ready_int) begin + frame_next = frame_cyc; + port_next = port_cyc; + out_sel_next = out_sel_cyc; + fifo_read_en_next[port_cyc] = 1'b1; + fifo_ctrl_read_en[port_cyc] = 1'b1; + sel_tlp_seq_valid_next = sel_tlp_seq_valid_cyc; + end + end + end else begin + // input has stalled, wait + abort = 1; + end + end + end + + // mux for output segments + for (seg = 0; seg < OUT_TLP_SEG_COUNT; seg = seg + 1) begin + out_tlp_data_int[seg*TLP_SEG_DATA_WIDTH +: TLP_SEG_DATA_WIDTH] = fifo_tlp_data[out_sel_port_reg[seg]][out_sel_seg_reg[seg]*TLP_SEG_DATA_WIDTH +: TLP_SEG_DATA_WIDTH]; + out_tlp_strb_int[seg*TLP_SEG_STRB_WIDTH +: TLP_SEG_STRB_WIDTH] = fifo_tlp_strb[out_sel_port_reg[seg]][out_sel_seg_reg[seg]*TLP_SEG_STRB_WIDTH +: TLP_SEG_STRB_WIDTH]; + out_tlp_hdr_int[seg*TLP_HDR_WIDTH +: TLP_HDR_WIDTH] = fifo_tlp_hdr[out_sel_port_reg[seg]][out_sel_seg_reg[seg]*TLP_HDR_WIDTH +: TLP_HDR_WIDTH]; + out_tlp_seq_int[seg*SEQ_NUM_WIDTH +: SEQ_NUM_WIDTH] = fifo_tlp_seq[out_sel_port_reg[seg]][out_sel_seg_reg[seg]*SEQ_NUM_WIDTH +: SEQ_NUM_WIDTH]; + out_tlp_bar_id_int[seg*3 +: 3] = fifo_tlp_bar_id[out_sel_port_reg[seg]][out_sel_seg_reg[seg]*3 +: 3]; + out_tlp_func_num_int[seg*8 +: 8] = fifo_tlp_func_num[out_sel_port_reg[seg]][out_sel_seg_reg[seg]*8 +: 8]; + out_tlp_error_int[seg*4 +: 4] = fifo_tlp_error[out_sel_port_reg[seg]][out_sel_seg_reg[seg]*4 +: 4]; + if (out_sel_reg[seg]) begin + out_tlp_valid_int[seg +: 1] = fifo_tlp_valid[out_sel_port_reg[seg]][out_sel_seg_reg[seg] +: 1]; + end + out_tlp_sop_int[seg +: 1] = fifo_tlp_sop[out_sel_port_reg[seg]][out_sel_seg_reg[seg] +: 1]; + out_tlp_eop_int[seg +: 1] = fifo_tlp_eop[out_sel_port_reg[seg]][out_sel_seg_reg[seg] +: 1]; + + out_sel_tlp_seq_next[seg*SEQ_NUM_WIDTH +: SEQ_NUM_WIDTH] = fifo_tlp_seq[out_sel_port_reg[seg]][out_sel_seg_reg[seg]*SEQ_NUM_WIDTH +: SEQ_NUM_WIDTH]; + end + out_sel_tlp_seq_valid_next = sel_tlp_seq_valid_reg; +end + +integer i; + +always @(posedge clk) begin + frame_reg <= frame_next; + port_reg <= port_next; + + out_sel_reg <= out_sel_next; + for (i = 0; i < OUT_TLP_SEG_COUNT; i = i + 1) begin + out_sel_port_reg[i] <= out_sel_port_next[i]; + out_sel_seg_reg[i] <= out_sel_seg_next[i]; + end + + fifo_read_en_reg <= fifo_read_en_next; + for (i = 0; i < PORTS; i = i + 1) begin + fifo_read_seg_count_reg[i] <= fifo_read_seg_count_next[i]; + end + + sel_tlp_seq_valid_reg <= sel_tlp_seq_valid_next; + out_sel_tlp_seq_reg <= out_sel_tlp_seq_next; + out_sel_tlp_seq_valid_reg <= out_sel_tlp_seq_valid_next; + + if (rst) begin + frame_reg <= 1'b0; + port_reg <= 0; + + out_sel_reg <= 0; + + fifo_read_en_reg <= 0; + + sel_tlp_seq_valid_reg <= 0; + out_sel_tlp_seq_valid_reg <= 0; + end +end + +// output datapath logic +reg [TLP_DATA_WIDTH-1:0] out_tlp_data_reg = 0; +reg [TLP_STRB_WIDTH-1:0] out_tlp_strb_reg = 0; +reg [OUT_TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] out_tlp_hdr_reg = 0; +reg [OUT_TLP_SEG_COUNT*SEQ_NUM_WIDTH-1:0] out_tlp_seq_reg = 0; +reg [OUT_TLP_SEG_COUNT*3-1:0] out_tlp_bar_id_reg = 0; +reg [OUT_TLP_SEG_COUNT*8-1:0] out_tlp_func_num_reg = 0; +reg [OUT_TLP_SEG_COUNT*4-1:0] out_tlp_error_reg = 0; +reg [OUT_TLP_SEG_COUNT-1:0] out_tlp_valid_reg = 0; +reg [OUT_TLP_SEG_COUNT-1:0] out_tlp_sop_reg = 0; +reg [OUT_TLP_SEG_COUNT-1:0] out_tlp_eop_reg = 0; + +reg [OUTPUT_FIFO_ADDR_WIDTH+1-1:0] out_fifo_wr_ptr_reg = 0; +reg [OUTPUT_FIFO_ADDR_WIDTH+1-1:0] out_fifo_rd_ptr_reg = 0; +reg out_fifo_half_full_reg = 1'b0; + +wire out_fifo_full = out_fifo_wr_ptr_reg == (out_fifo_rd_ptr_reg ^ {1'b1, {OUTPUT_FIFO_ADDR_WIDTH{1'b0}}}); +wire out_fifo_empty = out_fifo_wr_ptr_reg == out_fifo_rd_ptr_reg; + +(* ramstyle = "no_rw_check, mlab" *) +reg [TLP_DATA_WIDTH-1:0] out_fifo_out_tlp_data[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; +(* ramstyle = "no_rw_check, mlab" *) +reg [TLP_STRB_WIDTH-1:0] out_fifo_out_tlp_strb[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; +(* ramstyle = "no_rw_check, mlab" *) +reg [OUT_TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] out_fifo_out_tlp_hdr[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; +(* ramstyle = "no_rw_check, mlab" *) +reg [OUT_TLP_SEG_COUNT*SEQ_NUM_WIDTH-1:0] out_fifo_out_tlp_seq[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; +(* ramstyle = "no_rw_check, mlab" *) +reg [OUT_TLP_SEG_COUNT*3-1:0] out_fifo_out_tlp_bar_id[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; +(* ramstyle = "no_rw_check, mlab" *) +reg [OUT_TLP_SEG_COUNT*8-1:0] out_fifo_out_tlp_func_num[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; +(* ramstyle = "no_rw_check, mlab" *) +reg [OUT_TLP_SEG_COUNT*4-1:0] out_fifo_out_tlp_error[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; +(* ramstyle = "no_rw_check, mlab" *) +reg [OUT_TLP_SEG_COUNT-1:0] out_fifo_out_tlp_valid[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; +(* ramstyle = "no_rw_check, mlab" *) +reg [OUT_TLP_SEG_COUNT-1:0] out_fifo_out_tlp_sop[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; +(* ramstyle = "no_rw_check, mlab" *) +reg [OUT_TLP_SEG_COUNT-1:0] out_fifo_out_tlp_eop[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; + +assign out_tlp_ready_int = !out_fifo_half_full_reg; + +assign out_tlp_data = out_tlp_data_reg; +assign out_tlp_strb = out_tlp_strb_reg; +assign out_tlp_hdr = out_tlp_hdr_reg; +assign out_tlp_seq = out_tlp_seq_reg; +assign out_tlp_bar_id = out_tlp_bar_id_reg; +assign out_tlp_func_num = out_tlp_func_num_reg; +assign out_tlp_error = out_tlp_error_reg; +assign out_tlp_valid = out_tlp_valid_reg; +assign out_tlp_sop = out_tlp_sop_reg; +assign out_tlp_eop = out_tlp_eop_reg; + +always @(posedge clk) begin + out_tlp_valid_reg <= out_tlp_ready ? 0 : out_tlp_valid_reg; + + out_fifo_half_full_reg <= $unsigned(out_fifo_wr_ptr_reg - out_fifo_rd_ptr_reg) >= 2**(OUTPUT_FIFO_ADDR_WIDTH-1); + + if (!out_fifo_full && out_tlp_valid_int) begin + out_fifo_out_tlp_data[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= out_tlp_data_int; + out_fifo_out_tlp_strb[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= out_tlp_strb_int; + out_fifo_out_tlp_hdr[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= out_tlp_hdr_int; + out_fifo_out_tlp_seq[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= out_tlp_seq_int; + out_fifo_out_tlp_bar_id[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= out_tlp_bar_id_int; + out_fifo_out_tlp_func_num[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= out_tlp_func_num_int; + out_fifo_out_tlp_error[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= out_tlp_error_int; + out_fifo_out_tlp_valid[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= out_tlp_valid_int; + out_fifo_out_tlp_sop[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= out_tlp_sop_int; + out_fifo_out_tlp_eop[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= out_tlp_eop_int; + out_fifo_wr_ptr_reg <= out_fifo_wr_ptr_reg + 1; + end + + if (!out_fifo_empty && (!out_tlp_valid_reg || out_tlp_ready)) begin + out_tlp_data_reg <= out_fifo_out_tlp_data[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + out_tlp_strb_reg <= out_fifo_out_tlp_strb[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + out_tlp_hdr_reg <= out_fifo_out_tlp_hdr[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + out_tlp_seq_reg <= out_fifo_out_tlp_seq[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + out_tlp_bar_id_reg <= out_fifo_out_tlp_bar_id[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + out_tlp_func_num_reg <= out_fifo_out_tlp_func_num[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + out_tlp_error_reg <= out_fifo_out_tlp_error[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + if (OUT_TLP_SEG_COUNT == 1) begin + out_tlp_valid_reg <= 1'b1; + end else begin + out_tlp_valid_reg <= out_fifo_out_tlp_valid[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + end + out_tlp_sop_reg <= out_fifo_out_tlp_sop[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + out_tlp_eop_reg <= out_fifo_out_tlp_eop[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + out_fifo_rd_ptr_reg <= out_fifo_rd_ptr_reg + 1; + end + + if (rst) begin + out_fifo_wr_ptr_reg <= 0; + out_fifo_rd_ptr_reg <= 0; + out_tlp_valid_reg <= 1'b0; + end +end + +endmodule + +`resetall diff --git a/rtl/pcie_tlp_fifo_mux_wrap.py b/rtl/pcie_tlp_fifo_mux_wrap.py new file mode 100755 index 000000000..09c9512c7 --- /dev/null +++ b/rtl/pcie_tlp_fifo_mux_wrap.py @@ -0,0 +1,216 @@ +#!/usr/bin/env python +""" +Generates a PCIe TLP mux with input FIFOs wrapper with the specified number of ports +""" + +import argparse +from jinja2 import Template + + +def main(): + parser = argparse.ArgumentParser(description=__doc__.strip()) + parser.add_argument('-p', '--ports', type=int, default=4, help="number of ports") + parser.add_argument('-n', '--name', type=str, help="module name") + parser.add_argument('-o', '--output', type=str, help="output file name") + + args = parser.parse_args() + + try: + generate(**args.__dict__) + except IOError as ex: + print(ex) + exit(1) + + +def generate(ports=4, name=None, output=None): + n = ports + + if name is None: + name = "pcie_tlp_fifo_mux_wrap_{0}".format(n) + + if output is None: + output = name + ".v" + + print("Generating {0} port PCIe TLP mux with input FIFOs wrapper {1}...".format(n, name)) + + cn = (n-1).bit_length() + + t = Template(u"""/* + +Copyright (c) 2022 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +// Language: Verilog 2001 + +`resetall +`timescale 1ns / 1ps +`default_nettype none + +/* + * PCIe TLP {{n}} port mux with input FIFOs (wrapper) + */ +module {{name}} # +( + // TLP data width + parameter TLP_DATA_WIDTH = 256, + // TLP strobe width + parameter TLP_STRB_WIDTH = TLP_DATA_WIDTH/32, + // TLP header width + parameter TLP_HDR_WIDTH = 128, + // Sequence number width + parameter SEQ_NUM_WIDTH = 6, + // TLP segment count (input) + parameter IN_TLP_SEG_COUNT = 1, + // TLP segment count (output) + parameter OUT_TLP_SEG_COUNT = IN_TLP_SEG_COUNT, + // select round robin arbitration + parameter ARB_TYPE_ROUND_ROBIN = 0, + // LSB priority selection + parameter ARB_LSB_HIGH_PRIORITY = 1, + // FIFO depth + parameter FIFO_DEPTH = 2048, + // FIFO watermark level + parameter FIFO_WATERMARK = FIFO_DEPTH/2 +) +( + input wire clk, + input wire rst, + + /* + * TLP inputs + */ +{%- for p in range(n) %} + input wire [TLP_DATA_WIDTH-1:0] in{{'%02d'%p}}_tlp_data, + input wire [TLP_STRB_WIDTH-1:0] in{{'%02d'%p}}_tlp_strb, + input wire [IN_TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] in{{'%02d'%p}}_tlp_hdr, + input wire [IN_TLP_SEG_COUNT*SEQ_NUM_WIDTH-1:0] in{{'%02d'%p}}_tlp_seq, + input wire [IN_TLP_SEG_COUNT*3-1:0] in{{'%02d'%p}}_tlp_bar_id, + input wire [IN_TLP_SEG_COUNT*8-1:0] in{{'%02d'%p}}_tlp_func_num, + input wire [IN_TLP_SEG_COUNT*4-1:0] in{{'%02d'%p}}_tlp_error, + input wire [IN_TLP_SEG_COUNT-1:0] in{{'%02d'%p}}_tlp_valid, + input wire [IN_TLP_SEG_COUNT-1:0] in{{'%02d'%p}}_tlp_sop, + input wire [IN_TLP_SEG_COUNT-1:0] in{{'%02d'%p}}_tlp_eop, + output wire in{{'%02d'%p}}_tlp_ready, +{% endfor %} + /* + * TLP output + */ + output wire [TLP_DATA_WIDTH-1:0] out_tlp_data, + output wire [TLP_STRB_WIDTH-1:0] out_tlp_strb, + output wire [OUT_TLP_SEG_COUNT*TLP_HDR_WIDTH-1:0] out_tlp_hdr, + output wire [OUT_TLP_SEG_COUNT*SEQ_NUM_WIDTH-1:0] out_tlp_seq, + output wire [OUT_TLP_SEG_COUNT*3-1:0] out_tlp_bar_id, + output wire [OUT_TLP_SEG_COUNT*8-1:0] out_tlp_func_num, + output wire [OUT_TLP_SEG_COUNT*4-1:0] out_tlp_error, + output wire [OUT_TLP_SEG_COUNT-1:0] out_tlp_valid, + output wire [OUT_TLP_SEG_COUNT-1:0] out_tlp_sop, + output wire [OUT_TLP_SEG_COUNT-1:0] out_tlp_eop, + input wire out_tlp_ready, + + /* + * Status + */ +{%- for p in range(n) %} + output wire [OUT_TLP_SEG_COUNT*SEQ_NUM_WIDTH-1:0] in{{'%02d'%p}}_sel_tlp_seq, + output wire [OUT_TLP_SEG_COUNT-1:0] in{{'%02d'%p}}_sel_tlp_seq_valid, + output wire in{{'%02d'%p}}_fifo_half_full, + output wire in{{'%02d'%p}}_fifo_watermark{% if not loop.last %},{% endif %} +{%- endfor %} +); + +pcie_tlp_fifo_mux #( + .PORTS({{n}}), + .TLP_DATA_WIDTH(TLP_DATA_WIDTH), + .TLP_STRB_WIDTH(TLP_STRB_WIDTH), + .TLP_HDR_WIDTH(TLP_HDR_WIDTH), + .SEQ_NUM_WIDTH(SEQ_NUM_WIDTH), + .IN_TLP_SEG_COUNT(IN_TLP_SEG_COUNT), + .OUT_TLP_SEG_COUNT(OUT_TLP_SEG_COUNT), + .ARB_TYPE_ROUND_ROBIN(ARB_TYPE_ROUND_ROBIN), + .ARB_LSB_HIGH_PRIORITY(ARB_LSB_HIGH_PRIORITY), + .FIFO_DEPTH(FIFO_DEPTH), + .FIFO_WATERMARK(FIFO_WATERMARK) +) +pcie_tlp_fifo_mux_inst ( + .clk(clk), + .rst(rst), + + /* + * TLP input + */ + .in_tlp_data({ {% for p in range(n-1,-1,-1) %}in{{'%02d'%p}}_tlp_data{% if not loop.last %}, {% endif %}{% endfor %} }), + .in_tlp_strb({ {% for p in range(n-1,-1,-1) %}in{{'%02d'%p}}_tlp_strb{% if not loop.last %}, {% endif %}{% endfor %} }), + .in_tlp_hdr({ {% for p in range(n-1,-1,-1) %}in{{'%02d'%p}}_tlp_hdr{% if not loop.last %}, {% endif %}{% endfor %} }), + .in_tlp_seq({ {% for p in range(n-1,-1,-1) %}in{{'%02d'%p}}_tlp_seq{% if not loop.last %}, {% endif %}{% endfor %} }), + .in_tlp_bar_id({ {% for p in range(n-1,-1,-1) %}in{{'%02d'%p}}_tlp_bar_id{% if not loop.last %}, {% endif %}{% endfor %} }), + .in_tlp_func_num({ {% for p in range(n-1,-1,-1) %}in{{'%02d'%p}}_tlp_func_num{% if not loop.last %}, {% endif %}{% endfor %} }), + .in_tlp_error({ {% for p in range(n-1,-1,-1) %}in{{'%02d'%p}}_tlp_error{% if not loop.last %}, {% endif %}{% endfor %} }), + .in_tlp_valid({ {% for p in range(n-1,-1,-1) %}in{{'%02d'%p}}_tlp_valid{% if not loop.last %}, {% endif %}{% endfor %} }), + .in_tlp_sop({ {% for p in range(n-1,-1,-1) %}in{{'%02d'%p}}_tlp_sop{% if not loop.last %}, {% endif %}{% endfor %} }), + .in_tlp_eop({ {% for p in range(n-1,-1,-1) %}in{{'%02d'%p}}_tlp_eop{% if not loop.last %}, {% endif %}{% endfor %} }), + .in_tlp_ready({ {% for p in range(n-1,-1,-1) %}in{{'%02d'%p}}_tlp_ready{% if not loop.last %}, {% endif %}{% endfor %} }), + + /* + * TLP output + */ + .out_tlp_data(out_tlp_data), + .out_tlp_strb(out_tlp_strb), + .out_tlp_hdr(out_tlp_hdr), + .out_tlp_seq(out_tlp_seq), + .out_tlp_bar_id(out_tlp_bar_id), + .out_tlp_func_num(out_tlp_func_num), + .out_tlp_error(out_tlp_error), + .out_tlp_valid(out_tlp_valid), + .out_tlp_sop(out_tlp_sop), + .out_tlp_eop(out_tlp_eop), + .out_tlp_ready(out_tlp_ready), + + /* + * Status + */ + .sel_tlp_seq({ {% for p in range(n-1,-1,-1) %}in{{'%02d'%p}}_sel_tlp_seq{% if not loop.last %}, {% endif %}{% endfor %} }), + .sel_tlp_seq_valid({ {% for p in range(n-1,-1,-1) %}in{{'%02d'%p}}_sel_tlp_seq_valid{% if not loop.last %}, {% endif %}{% endfor %} }), + .fifo_half_full({ {% for p in range(n-1,-1,-1) %}in{{'%02d'%p}}_fifo_half_full{% if not loop.last %}, {% endif %}{% endfor %} }), + .fifo_watermark({ {% for p in range(n-1,-1,-1) %}in{{'%02d'%p}}_fifo_watermark{% if not loop.last %}, {% endif %}{% endfor %} }) +); + +endmodule + +`resetall + +""") + + print(f"Writing file '{output}'...") + + with open(output, 'w') as f: + f.write(t.render( + n=n, + cn=cn, + name=name + )) + f.flush() + + print("Done") + + +if __name__ == "__main__": + main() diff --git a/tb/pcie_tlp_fifo_mux/Makefile b/tb/pcie_tlp_fifo_mux/Makefile new file mode 100644 index 000000000..18eb7c93a --- /dev/null +++ b/tb/pcie_tlp_fifo_mux/Makefile @@ -0,0 +1,104 @@ +# Copyright (c) 2022 Alex Forencich +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to deal +# in the Software without restriction, including without limitation the rights +# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +# copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in +# all copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +# THE SOFTWARE. + +TOPLEVEL_LANG = verilog + +SIM ?= icarus +WAVES ?= 0 + +COCOTB_HDL_TIMEUNIT = 1ns +COCOTB_HDL_TIMEPRECISION = 1ps + +export PORTS ?= 4 + +DUT = pcie_tlp_fifo_mux +WRAPPER = $(DUT)_wrap_$(PORTS) +TOPLEVEL = $(WRAPPER) +MODULE = test_$(DUT) +VERILOG_SOURCES += $(WRAPPER).v +VERILOG_SOURCES += ../../rtl/$(DUT).v +VERILOG_SOURCES += ../../rtl/pcie_tlp_fifo_raw.v + +# module parameters +export PARAM_TLP_DATA_WIDTH ?= 64 +export PARAM_TLP_STRB_WIDTH ?= $(shell expr $(PARAM_TLP_DATA_WIDTH) / 32 ) +export PARAM_TLP_HDR_WIDTH ?= 128 +export PARAM_SEQ_NUM_WIDTH ?= 6 +export PARAM_IN_TLP_SEG_COUNT ?= 1 +export PARAM_OUT_TLP_SEG_COUNT ?= $(PARAM_IN_TLP_SEG_COUNT) +export PARAM_ARB_TYPE_ROUND_ROBIN ?= 0 +export PARAM_ARB_LSB_HIGH_PRIORITY ?= 1 +export PARAM_FIFO_DEPTH ?= 4096 +export PARAM_FIFO_WATERMARK ?= $(shell expr $(PARAM_FIFO_DEPTH) / 2 ) + +ifeq ($(SIM), icarus) + PLUSARGS += -fst + + COMPILE_ARGS += -P $(TOPLEVEL).TLP_DATA_WIDTH=$(PARAM_TLP_DATA_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).TLP_STRB_WIDTH=$(PARAM_TLP_STRB_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).TLP_HDR_WIDTH=$(PARAM_TLP_HDR_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).SEQ_NUM_WIDTH=$(PARAM_SEQ_NUM_WIDTH) + COMPILE_ARGS += -P $(TOPLEVEL).IN_TLP_SEG_COUNT=$(PARAM_IN_TLP_SEG_COUNT) + COMPILE_ARGS += -P $(TOPLEVEL).OUT_TLP_SEG_COUNT=$(PARAM_OUT_TLP_SEG_COUNT) + COMPILE_ARGS += -P $(TOPLEVEL).ARB_TYPE_ROUND_ROBIN=$(PARAM_ARB_TYPE_ROUND_ROBIN) + COMPILE_ARGS += -P $(TOPLEVEL).ARB_LSB_HIGH_PRIORITY=$(PARAM_ARB_LSB_HIGH_PRIORITY) + COMPILE_ARGS += -P $(TOPLEVEL).FIFO_DEPTH=$(PARAM_FIFO_DEPTH) + COMPILE_ARGS += -P $(TOPLEVEL).FIFO_WATERMARK=$(PARAM_FIFO_WATERMARK) + + ifeq ($(WAVES), 1) + VERILOG_SOURCES += iverilog_dump.v + COMPILE_ARGS += -s iverilog_dump + endif +else ifeq ($(SIM), verilator) + COMPILE_ARGS += -Wno-SELRANGE -Wno-WIDTH + + COMPILE_ARGS += -GTLP_DATA_WIDTH=$(PARAM_TLP_DATA_WIDTH) + COMPILE_ARGS += -GTLP_STRB_WIDTH=$(PARAM_TLP_STRB_WIDTH) + COMPILE_ARGS += -GTLP_HDR_WIDTH=$(PARAM_TLP_HDR_WIDTH) + COMPILE_ARGS += -GSEQ_NUM_WIDTH=$(PARAM_SEQ_NUM_WIDTH) + COMPILE_ARGS += -GIN_TLP_SEG_COUNT=$(PARAM_IN_TLP_SEG_COUNT) + COMPILE_ARGS += -GOUT_TLP_SEG_COUNT=$(PARAM_OUT_TLP_SEG_COUNT) + COMPILE_ARGS += -GARB_TYPE_ROUND_ROBIN=$(PARAM_ARB_TYPE_ROUND_ROBIN) + COMPILE_ARGS += -GARB_LSB_HIGH_PRIORITY=$(PARAM_ARB_LSB_HIGH_PRIORITY) + COMPILE_ARGS += -GFIFO_DEPTH=$(PARAM_FIFO_DEPTH) + COMPILE_ARGS += -GFIFO_WATERMARK=$(PARAM_FIFO_WATERMARK) + + ifeq ($(WAVES), 1) + COMPILE_ARGS += --trace-fst + endif +endif + +include $(shell cocotb-config --makefiles)/Makefile.sim + +$(WRAPPER).v: ../../rtl/$(DUT)_wrap.py + $< -p $(PORTS) + +iverilog_dump.v: + echo 'module iverilog_dump();' > $@ + echo 'initial begin' >> $@ + echo ' $$dumpfile("$(TOPLEVEL).fst");' >> $@ + echo ' $$dumpvars(0, $(TOPLEVEL));' >> $@ + echo 'end' >> $@ + echo 'endmodule' >> $@ + +clean:: + @rm -rf iverilog_dump.v + @rm -rf dump.fst $(TOPLEVEL).fst + @rm -rf *_wrap_*.v diff --git a/tb/pcie_tlp_fifo_mux/pcie_if.py b/tb/pcie_tlp_fifo_mux/pcie_if.py new file mode 120000 index 000000000..10502b03d --- /dev/null +++ b/tb/pcie_tlp_fifo_mux/pcie_if.py @@ -0,0 +1 @@ +../pcie_if.py \ No newline at end of file diff --git a/tb/pcie_tlp_fifo_mux/test_pcie_tlp_fifo_mux.py b/tb/pcie_tlp_fifo_mux/test_pcie_tlp_fifo_mux.py new file mode 100644 index 000000000..2005bfd75 --- /dev/null +++ b/tb/pcie_tlp_fifo_mux/test_pcie_tlp_fifo_mux.py @@ -0,0 +1,284 @@ +#!/usr/bin/env python +""" + +Copyright (c) 2022 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +""" + +import itertools +import logging +import os +import random +import subprocess +import sys + +import cocotb_test.simulator +import pytest + +import cocotb +from cocotb.clock import Clock +from cocotb.triggers import RisingEdge +from cocotb.regression import TestFactory + +from cocotbext.pcie.core.tlp import Tlp, TlpType + + +try: + from pcie_if import PcieIfSource, PcieIfSink, PcieIfBus, PcieIfFrame +except ImportError: + # attempt import from current directory + sys.path.insert(0, os.path.join(os.path.dirname(__file__))) + try: + from pcie_if import PcieIfSource, PcieIfSink, PcieIfBus, PcieIfFrame + finally: + del sys.path[0] + + +class TB(object): + def __init__(self, dut): + self.dut = dut + + ports = len(dut.pcie_tlp_fifo_mux_inst.in_tlp_ready) + + self.log = logging.getLogger("cocotb.tb") + self.log.setLevel(logging.DEBUG) + + cocotb.start_soon(Clock(dut.clk, 4, units="ns").start()) + + self.source = [PcieIfSource(PcieIfBus.from_prefix(dut, f"in{k:02d}_tlp"), dut.clk, dut.rst) for k in range(ports)] + self.sink = PcieIfSink(PcieIfBus.from_prefix(dut, "out_tlp"), dut.clk, dut.rst) + + def set_idle_generator(self, generator=None): + if generator: + for source in self.source: + source.set_pause_generator(generator()) + + def set_backpressure_generator(self, generator=None): + if generator: + self.sink.set_pause_generator(generator()) + + async def cycle_reset(self): + self.dut.rst.setimmediatevalue(0) + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + self.dut.rst.value = 1 + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + self.dut.rst.value = 0 + await RisingEdge(self.dut.clk) + await RisingEdge(self.dut.clk) + + +async def run_test(dut, payload_lengths=None, payload_data=None, idle_inserter=None, backpressure_inserter=None, port=0): + + tb = TB(dut) + + seg_count = len(tb.sink.bus.valid) + seq_count = 2**(len(tb.sink.bus.seq) // seg_count) + + cur_seq = 1 + + await tb.cycle_reset() + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + test_tlps = [] + test_frames = [] + + for test_data in [payload_data(x) for x in payload_lengths()]: + test_tlp = Tlp() + + if len(test_data): + test_tlp.fmt_type = TlpType.MEM_WRITE + test_tlp.set_addr_be_data(cur_seq*4, test_data) + test_tlp.requester_id = port + else: + test_tlp.fmt_type = TlpType.MEM_READ + test_tlp.set_addr_be(cur_seq*4, 4) + test_tlp.requester_id = port + + test_frame = PcieIfFrame.from_tlp(test_tlp) + test_frame.seq = cur_seq + test_frame.func_num = port + + test_tlps.append(test_tlp) + test_frames.append(test_frame) + await tb.source[port].send(test_frame) + + cur_seq = (cur_seq + 1) % seq_count + + for test_tlp in test_tlps: + rx_frame = await tb.sink.recv() + + rx_tlp = rx_frame.to_tlp() + + assert rx_tlp == test_tlp + + assert tb.sink.empty() + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +async def run_stress_test(dut, idle_inserter=None, backpressure_inserter=None): + + tb = TB(dut) + + seg_count = len(tb.sink.bus.valid) + seq_count = 2**(len(tb.sink.bus.seq) // seg_count) + + cur_seq = 1 + + await tb.cycle_reset() + + tb.set_idle_generator(idle_inserter) + tb.set_backpressure_generator(backpressure_inserter) + + test_tlps = [list() for x in tb.source] + + for p in range(len(tb.source)): + for k in range(128): + length = random.randint(1, 512) + test_tlp = Tlp() + test_tlp.fmt_type = random.choice([TlpType.MEM_WRITE, TlpType.MEM_READ]) + if test_tlp.fmt_type == TlpType.MEM_WRITE: + test_data = bytearray(itertools.islice(itertools.cycle(range(256)), length)) + test_tlp.set_addr_be_data(cur_seq*4, test_data) + test_tlp.requester_id = p + elif test_tlp.fmt_type == TlpType.MEM_READ: + test_tlp.set_addr_be(cur_seq*4, length) + test_tlp.tag = cur_seq + test_tlp.requester_id = p + + test_frame = PcieIfFrame.from_tlp(test_tlp) + test_frame.seq = cur_seq + test_frame.func_num = p + + test_tlps[p].append(test_tlp) + await tb.source[p].send(test_frame) + + cur_seq = (cur_seq + 1) % seq_count + + while any(test_tlps): + rx_frame = await tb.sink.recv() + + rx_tlp = rx_frame.to_tlp() + + test_tlp = test_tlps[rx_frame.func_num].pop(0) + + assert test_frame is not None + + assert rx_tlp == test_tlp + + assert tb.sink.empty() + + await RisingEdge(dut.clk) + await RisingEdge(dut.clk) + + +def cycle_pause(): + return itertools.cycle([1, 1, 1, 0]) + + +def size_list(): + return list(range(0, 512+1, 4))+[4]*64 + + +def incrementing_payload(length): + return bytearray(itertools.islice(itertools.cycle(range(256)), length)) + + +if cocotb.SIM_NAME: + + ports = len(cocotb.top.pcie_tlp_fifo_mux_inst.in_tlp_ready) + + factory = TestFactory(run_test) + factory.add_option("payload_lengths", [size_list]) + factory.add_option("payload_data", [incrementing_payload]) + factory.add_option("idle_inserter", [None, cycle_pause]) + factory.add_option("backpressure_inserter", [None, cycle_pause]) + factory.add_option("port", list(range(ports))) + factory.generate_tests() + + factory = TestFactory(run_stress_test) + factory.add_option("idle_inserter", [None, cycle_pause]) + factory.add_option("backpressure_inserter", [None, cycle_pause]) + factory.generate_tests() + + +# cocotb-test + +tests_dir = os.path.dirname(__file__) +rtl_dir = os.path.abspath(os.path.join(tests_dir, '..', '..', 'rtl')) + + +@pytest.mark.parametrize("round_robin", [0, 1]) +@pytest.mark.parametrize(("pcie_data_width", "tlp_seg_count"), + [(64, 1), (128, 1), (256, 1), (256, 2), (512, 1), (512, 2), (512, 4)]) +@pytest.mark.parametrize("ports", [1, 4]) +def test_pcie_tlp_fifo_mux(request, pcie_data_width, tlp_seg_count, ports, round_robin): + dut = "pcie_tlp_fifo_mux" + wrapper = f"{dut}_wrap_{ports}" + module = os.path.splitext(os.path.basename(__file__))[0] + toplevel = wrapper + + # generate wrapper + wrapper_file = os.path.join(tests_dir, f"{wrapper}.v") + if not os.path.exists(wrapper_file): + subprocess.Popen( + [os.path.join(rtl_dir, f"{dut}_wrap.py"), "-p", f"{ports}"], + cwd=tests_dir + ).wait() + + verilog_sources = [ + wrapper_file, + os.path.join(rtl_dir, f"{dut}.v"), + os.path.join(rtl_dir, "pcie_tlp_fifo_raw.v"), + ] + + parameters = {} + + parameters['TLP_DATA_WIDTH'] = pcie_data_width + parameters['TLP_STRB_WIDTH'] = parameters['TLP_DATA_WIDTH'] // 32 + parameters['TLP_HDR_WIDTH'] = 128 + parameters['SEQ_NUM_WIDTH'] = 6 + parameters['IN_TLP_SEG_COUNT'] = tlp_seg_count + parameters['OUT_TLP_SEG_COUNT'] = parameters['IN_TLP_SEG_COUNT'] + parameters['ARB_TYPE_ROUND_ROBIN'] = round_robin + parameters['ARB_LSB_HIGH_PRIORITY'] = 1 + parameters['FIFO_DEPTH'] = 4096 + parameters['FIFO_WATERMARK'] = parameters['FIFO_DEPTH'] // 2 + + extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()} + + sim_build = os.path.join(tests_dir, "sim_build", + request.node.name.replace('[', '-').replace(']', '')) + + cocotb_test.simulator.run( + python_search=[tests_dir], + verilog_sources=verilog_sources, + toplevel=toplevel, + module=module, + parameters=parameters, + sim_build=sim_build, + extra_env=extra_env, + )