From d4b009b6d24574aeabb7591e8782094c3dd95a24 Mon Sep 17 00:00:00 2001 From: Alex Forencich Date: Thu, 1 Apr 2021 17:25:57 -0700 Subject: [PATCH] Add PTP support at 100G on VCU1525 --- .../VCU1525/fpga_100g/ip/cmac_usplus_0.tcl | 3 +- .../VCU1525/fpga_100g/ip/cmac_usplus_1.tcl | 3 +- fpga/mqnic/VCU1525/fpga_100g/placement.xdc | 8 +- fpga/mqnic/VCU1525/fpga_100g/rtl/fpga.v | 100 +++++- fpga/mqnic/VCU1525/fpga_100g/rtl/fpga_core.v | 328 ++++++++++++++++-- .../fpga_100g/tb/fpga_core/test_fpga_core.py | 55 ++- 6 files changed, 451 insertions(+), 46 deletions(-) diff --git a/fpga/mqnic/VCU1525/fpga_100g/ip/cmac_usplus_0.tcl b/fpga/mqnic/VCU1525/fpga_100g/ip/cmac_usplus_0.tcl index 4e32cd4bd..f8f668d4b 100644 --- a/fpga/mqnic/VCU1525/fpga_100g/ip/cmac_usplus_0.tcl +++ b/fpga/mqnic/VCU1525/fpga_100g/ip/cmac_usplus_0.tcl @@ -16,5 +16,6 @@ set_property -dict [list \ CONFIG.LANE2_GT_LOC {X1Y49} \ CONFIG.LANE3_GT_LOC {X1Y50} \ CONFIG.LANE4_GT_LOC {X1Y51} \ - CONFIG.ENABLE_PIPELINE_REG {1} + CONFIG.ENABLE_PIPELINE_REG {1} \ + CONFIG.ENABLE_TIME_STAMPING {1} ] [get_ips cmac_usplus_0] diff --git a/fpga/mqnic/VCU1525/fpga_100g/ip/cmac_usplus_1.tcl b/fpga/mqnic/VCU1525/fpga_100g/ip/cmac_usplus_1.tcl index b7f833f16..4b3a8c726 100644 --- a/fpga/mqnic/VCU1525/fpga_100g/ip/cmac_usplus_1.tcl +++ b/fpga/mqnic/VCU1525/fpga_100g/ip/cmac_usplus_1.tcl @@ -16,5 +16,6 @@ set_property -dict [list \ CONFIG.LANE2_GT_LOC {X1Y45} \ CONFIG.LANE3_GT_LOC {X1Y46} \ CONFIG.LANE4_GT_LOC {X1Y47} \ - CONFIG.ENABLE_PIPELINE_REG {1} + CONFIG.ENABLE_PIPELINE_REG {1} \ + CONFIG.ENABLE_TIME_STAMPING {1} ] [get_ips cmac_usplus_1] diff --git a/fpga/mqnic/VCU1525/fpga_100g/placement.xdc b/fpga/mqnic/VCU1525/fpga_100g/placement.xdc index 7514faeea..8dd3be1b8 100644 --- a/fpga/mqnic/VCU1525/fpga_100g/placement.xdc +++ b/fpga/mqnic/VCU1525/fpga_100g/placement.xdc @@ -25,7 +25,11 @@ resize_pblock [get_pblocks pblock_pcie] -add {CLOCKREGION_X4Y5:CLOCKREGION_X5Y8} create_pblock pblock_eth add_cells_to_pblock [get_pblocks pblock_eth] [get_cells -quiet [list qsfp0_cmac_pad_inst]] -add_cells_to_pblock [get_pblocks pblock_eth] [get_cells -quiet [list core_inst/iface[0].mac[0].mac_tx_fifo_inst core_inst/iface[0].mac[0].mac_rx_fifo_inst]] +add_cells_to_pblock [get_pblocks pblock_eth] [get_cells -quiet [list core_inst/iface[0].mac[0].mac_tx_fifo_inst]] +add_cells_to_pblock [get_pblocks pblock_eth] [get_cells -quiet [list core_inst/iface[0].mac[0].mac_rx_fifo_inst]] +add_cells_to_pblock [get_pblocks pblock_eth] [get_cells -quiet [list core_inst/iface[0].mac[0].tx_ptp_ts_fifo]] add_cells_to_pblock [get_pblocks pblock_eth] [get_cells -quiet [list qsfp1_cmac_pad_inst]] -add_cells_to_pblock [get_pblocks pblock_eth] [get_cells -quiet [list core_inst/iface[1].mac[0].mac_tx_fifo_inst core_inst/iface[1].mac[0].mac_rx_fifo_inst]] +add_cells_to_pblock [get_pblocks pblock_eth] [get_cells -quiet [list core_inst/iface[1].mac[0].mac_tx_fifo_inst]] +add_cells_to_pblock [get_pblocks pblock_eth] [get_cells -quiet [list core_inst/iface[1].mac[0].mac_rx_fifo_inst]] +add_cells_to_pblock [get_pblocks pblock_eth] [get_cells -quiet [list core_inst/iface[1].mac[0].tx_ptp_ts_fifo]] resize_pblock [get_pblocks pblock_eth] -add {CLOCKREGION_X0Y10:CLOCKREGION_X0Y14} diff --git a/fpga/mqnic/VCU1525/fpga_100g/rtl/fpga.v b/fpga/mqnic/VCU1525/fpga_100g/rtl/fpga.v index 4c4045291..84549341d 100644 --- a/fpga/mqnic/VCU1525/fpga_100g/rtl/fpga.v +++ b/fpga/mqnic/VCU1525/fpga_100g/rtl/fpga.v @@ -775,6 +775,10 @@ wire qsfp0_mac_tx_axis_tready; wire qsfp0_mac_tx_axis_tlast; wire qsfp0_mac_tx_axis_tuser; +wire [79:0] qsfp0_tx_ptp_time_int; +wire [79:0] qsfp0_tx_ptp_ts_int; +wire qsfp0_tx_ptp_ts_valid_int; + wire qsfp0_rx_clk_int; wire qsfp0_rx_rst_int; @@ -782,7 +786,9 @@ wire [AXIS_ETH_DATA_WIDTH-1:0] qsfp0_rx_axis_tdata_int; wire [AXIS_ETH_KEEP_WIDTH-1:0] qsfp0_rx_axis_tkeep_int; wire qsfp0_rx_axis_tvalid_int; wire qsfp0_rx_axis_tlast_int; -wire qsfp0_rx_axis_tuser_int; +wire [80+1-1:0] qsfp0_rx_axis_tuser_int; + +wire [79:0] qsfp0_rx_ptp_time_int; assign qsfp1_refclk_reset = qsfp_refclk_reset_reg; assign qsfp1_fs = 2'b10; @@ -804,6 +810,10 @@ wire qsfp1_mac_tx_axis_tready; wire qsfp1_mac_tx_axis_tlast; wire qsfp1_mac_tx_axis_tuser; +wire [79:0] qsfp1_tx_ptp_time_int; +wire [79:0] qsfp1_tx_ptp_ts_int; +wire qsfp1_tx_ptp_ts_valid_int; + wire qsfp1_rx_clk_int; wire qsfp1_rx_rst_int; @@ -811,7 +821,9 @@ wire [AXIS_ETH_DATA_WIDTH-1:0] qsfp1_rx_axis_tdata_int; wire [AXIS_ETH_KEEP_WIDTH-1:0] qsfp1_rx_axis_tkeep_int; wire qsfp1_rx_axis_tvalid_int; wire qsfp1_rx_axis_tlast_int; -wire qsfp1_rx_axis_tuser_int; +wire [80+1-1:0] qsfp1_rx_axis_tuser_int; + +wire [79:0] qsfp1_rx_ptp_time_int; wire qsfp0_rx_status; wire qsfp1_rx_status; @@ -872,7 +884,7 @@ qsfp0_cmac_inst ( .rx_axis_tdata(qsfp0_rx_axis_tdata_int), // output [511:0] .rx_axis_tlast(qsfp0_rx_axis_tlast_int), // output .rx_axis_tkeep(qsfp0_rx_axis_tkeep_int), // output [63:0] - .rx_axis_tuser(qsfp0_rx_axis_tuser_int), // output + .rx_axis_tuser(qsfp0_rx_axis_tuser_int[0]), // output .rx_otn_bip8_0(), // output [7:0] .rx_otn_bip8_1(), // output [7:0] @@ -891,6 +903,30 @@ qsfp0_cmac_inst ( .usr_rx_reset(qsfp0_rx_rst_int), // output .gt_rxusrclk2(), // output + .rx_lane_aligner_fill_0(), // output [6:0] + .rx_lane_aligner_fill_1(), // output [6:0] + .rx_lane_aligner_fill_10(), // output [6:0] + .rx_lane_aligner_fill_11(), // output [6:0] + .rx_lane_aligner_fill_12(), // output [6:0] + .rx_lane_aligner_fill_13(), // output [6:0] + .rx_lane_aligner_fill_14(), // output [6:0] + .rx_lane_aligner_fill_15(), // output [6:0] + .rx_lane_aligner_fill_16(), // output [6:0] + .rx_lane_aligner_fill_17(), // output [6:0] + .rx_lane_aligner_fill_18(), // output [6:0] + .rx_lane_aligner_fill_19(), // output [6:0] + .rx_lane_aligner_fill_2(), // output [6:0] + .rx_lane_aligner_fill_3(), // output [6:0] + .rx_lane_aligner_fill_4(), // output [6:0] + .rx_lane_aligner_fill_5(), // output [6:0] + .rx_lane_aligner_fill_6(), // output [6:0] + .rx_lane_aligner_fill_7(), // output [6:0] + .rx_lane_aligner_fill_8(), // output [6:0] + .rx_lane_aligner_fill_9(), // output [6:0] + .rx_ptp_tstamp_out(qsfp0_rx_axis_tuser_int[80:1]), // output [79:0] + .rx_ptp_pcslane_out(), // output [4:0] + .ctl_rx_systemtimerin(qsfp0_rx_ptp_time_int), // input [79:0] + .stat_rx_aligned(), // output .stat_rx_aligned_err(), // output .stat_rx_bad_code(), // output [2:0] @@ -1054,6 +1090,18 @@ qsfp0_cmac_inst ( .stat_rx_rsfec_lane_mapping(), // output [7:0] .stat_rx_rsfec_uncorrected_cw_inc(), // output + .ctl_tx_systemtimerin(qsfp0_tx_ptp_time_int), // input [79:0] + + .stat_tx_ptp_fifo_read_error(), // output + .stat_tx_ptp_fifo_write_error(), // output + + .tx_ptp_tstamp_valid_out(qsfp0_tx_ptp_ts_valid_int), // output + .tx_ptp_pcslane_out(), // output [4:0] + .tx_ptp_tstamp_tag_out(), // output [15:0] + .tx_ptp_tstamp_out(qsfp0_tx_ptp_ts_int), // output [79:0] + .tx_ptp_1588op_in(2'b10), // input [1:0] + .tx_ptp_tag_field_in(16'd0), // input [15:0] + .stat_tx_bad_fcs(), // output .stat_tx_broadcast(), // output .stat_tx_frame_error(), // output @@ -1156,7 +1204,7 @@ qsfp1_cmac_inst ( .rx_axis_tdata(qsfp1_rx_axis_tdata_int), // output [511:0] .rx_axis_tlast(qsfp1_rx_axis_tlast_int), // output .rx_axis_tkeep(qsfp1_rx_axis_tkeep_int), // output [63:0] - .rx_axis_tuser(qsfp1_rx_axis_tuser_int), // output + .rx_axis_tuser(qsfp1_rx_axis_tuser_int[0]), // output .rx_otn_bip8_0(), // output [7:0] .rx_otn_bip8_1(), // output [7:0] @@ -1175,6 +1223,30 @@ qsfp1_cmac_inst ( .usr_rx_reset(qsfp1_rx_rst_int), // output .gt_rxusrclk2(), // output + .rx_lane_aligner_fill_0(), // output [6:0] + .rx_lane_aligner_fill_1(), // output [6:0] + .rx_lane_aligner_fill_10(), // output [6:0] + .rx_lane_aligner_fill_11(), // output [6:0] + .rx_lane_aligner_fill_12(), // output [6:0] + .rx_lane_aligner_fill_13(), // output [6:0] + .rx_lane_aligner_fill_14(), // output [6:0] + .rx_lane_aligner_fill_15(), // output [6:0] + .rx_lane_aligner_fill_16(), // output [6:0] + .rx_lane_aligner_fill_17(), // output [6:0] + .rx_lane_aligner_fill_18(), // output [6:0] + .rx_lane_aligner_fill_19(), // output [6:0] + .rx_lane_aligner_fill_2(), // output [6:0] + .rx_lane_aligner_fill_3(), // output [6:0] + .rx_lane_aligner_fill_4(), // output [6:0] + .rx_lane_aligner_fill_5(), // output [6:0] + .rx_lane_aligner_fill_6(), // output [6:0] + .rx_lane_aligner_fill_7(), // output [6:0] + .rx_lane_aligner_fill_8(), // output [6:0] + .rx_lane_aligner_fill_9(), // output [6:0] + .rx_ptp_tstamp_out(qsfp1_rx_axis_tuser_int[80:1]), // output [79:0] + .rx_ptp_pcslane_out(), // output [4:0] + .ctl_rx_systemtimerin(qsfp1_rx_ptp_time_int), // input [79:0] + .stat_rx_aligned(), // output .stat_rx_aligned_err(), // output .stat_rx_bad_code(), // output [2:0] @@ -1338,6 +1410,18 @@ qsfp1_cmac_inst ( .stat_rx_rsfec_lane_mapping(), // output [7:0] .stat_rx_rsfec_uncorrected_cw_inc(), // output + .ctl_tx_systemtimerin(qsfp1_tx_ptp_time_int), // input [79:0] + + .stat_tx_ptp_fifo_read_error(), // output + .stat_tx_ptp_fifo_write_error(), // output + + .tx_ptp_tstamp_valid_out(qsfp1_tx_ptp_ts_valid_int), // output + .tx_ptp_pcslane_out(), // output [4:0] + .tx_ptp_tstamp_tag_out(), // output [15:0] + .tx_ptp_tstamp_out(qsfp1_tx_ptp_ts_int), // output [79:0] + .tx_ptp_1588op_in(2'b10), // input [1:0] + .tx_ptp_tag_field_in(16'd0), // input [15:0] + .stat_tx_bad_fcs(), // output .stat_tx_broadcast(), // output .stat_tx_frame_error(), // output @@ -1524,6 +1608,9 @@ core_inst ( .qsfp0_tx_axis_tready(qsfp0_tx_axis_tready_int), .qsfp0_tx_axis_tlast(qsfp0_tx_axis_tlast_int), .qsfp0_tx_axis_tuser(qsfp0_tx_axis_tuser_int), + .qsfp0_tx_ptp_time(qsfp0_tx_ptp_time_int), + .qsfp0_tx_ptp_ts(qsfp0_tx_ptp_ts_int), + .qsfp0_tx_ptp_ts_valid(qsfp0_tx_ptp_ts_valid_int), .qsfp0_rx_clk(qsfp0_rx_clk_int), .qsfp0_rx_rst(qsfp0_rx_rst_int), .qsfp0_rx_axis_tdata(qsfp0_rx_axis_tdata_int), @@ -1531,6 +1618,7 @@ core_inst ( .qsfp0_rx_axis_tvalid(qsfp0_rx_axis_tvalid_int), .qsfp0_rx_axis_tlast(qsfp0_rx_axis_tlast_int), .qsfp0_rx_axis_tuser(qsfp0_rx_axis_tuser_int), + .qsfp0_rx_ptp_time(qsfp0_rx_ptp_time_int), .qsfp0_modprsl(qsfp0_modprsl_int), .qsfp0_modsell(qsfp0_modsell), .qsfp0_resetl(qsfp0_resetl), @@ -1545,6 +1633,9 @@ core_inst ( .qsfp1_tx_axis_tready(qsfp1_tx_axis_tready_int), .qsfp1_tx_axis_tlast(qsfp1_tx_axis_tlast_int), .qsfp1_tx_axis_tuser(qsfp1_tx_axis_tuser_int), + .qsfp1_tx_ptp_time(qsfp1_tx_ptp_time_int), + .qsfp1_tx_ptp_ts(qsfp1_tx_ptp_ts_int), + .qsfp1_tx_ptp_ts_valid(qsfp1_tx_ptp_ts_valid_int), .qsfp1_rx_clk(qsfp1_rx_clk_int), .qsfp1_rx_rst(qsfp1_rx_rst_int), .qsfp1_rx_axis_tdata(qsfp1_rx_axis_tdata_int), @@ -1552,6 +1643,7 @@ core_inst ( .qsfp1_rx_axis_tvalid(qsfp1_rx_axis_tvalid_int), .qsfp1_rx_axis_tlast(qsfp1_rx_axis_tlast_int), .qsfp1_rx_axis_tuser(qsfp1_rx_axis_tuser_int), + .qsfp1_rx_ptp_time(qsfp1_rx_ptp_time_int), .qsfp1_modprsl(qsfp1_modprsl_int), .qsfp1_modsell(qsfp1_modsell), .qsfp1_resetl(qsfp1_resetl), diff --git a/fpga/mqnic/VCU1525/fpga_100g/rtl/fpga_core.v b/fpga/mqnic/VCU1525/fpga_100g/rtl/fpga_core.v index 36f2f92da..0ffa5b65b 100644 --- a/fpga/mqnic/VCU1525/fpga_100g/rtl/fpga_core.v +++ b/fpga/mqnic/VCU1525/fpga_100g/rtl/fpga_core.v @@ -168,6 +168,10 @@ module fpga_core # output wire qsfp0_tx_axis_tlast, output wire qsfp0_tx_axis_tuser, + output wire [79:0] qsfp0_tx_ptp_time, + input wire [79:0] qsfp0_tx_ptp_ts, + input wire qsfp0_tx_ptp_ts_valid, + input wire qsfp0_rx_clk, input wire qsfp0_rx_rst, @@ -175,7 +179,9 @@ module fpga_core # input wire [AXIS_ETH_KEEP_WIDTH-1:0] qsfp0_rx_axis_tkeep, input wire qsfp0_rx_axis_tvalid, input wire qsfp0_rx_axis_tlast, - input wire qsfp0_rx_axis_tuser, + input wire [80+1-1:0] qsfp0_rx_axis_tuser, + + output wire [79:0] qsfp0_rx_ptp_time, output wire qsfp0_modsell, output wire qsfp0_resetl, @@ -193,6 +199,10 @@ module fpga_core # output wire qsfp1_tx_axis_tlast, output wire qsfp1_tx_axis_tuser, + output wire [79:0] qsfp1_tx_ptp_time, + input wire [79:0] qsfp1_tx_ptp_ts, + input wire qsfp1_tx_ptp_ts_valid, + input wire qsfp1_rx_clk, input wire qsfp1_rx_rst, @@ -200,7 +210,9 @@ module fpga_core # input wire [AXIS_ETH_KEEP_WIDTH-1:0] qsfp1_rx_axis_tkeep, input wire qsfp1_rx_axis_tvalid, input wire qsfp1_rx_axis_tlast, - input wire qsfp1_rx_axis_tuser, + input wire [80+1-1:0] qsfp1_rx_axis_tuser, + + output wire [79:0] qsfp1_rx_ptp_time, output wire qsfp1_modsell, output wire qsfp1_resetl, @@ -268,9 +280,7 @@ parameter TX_SCHEDULER_PIPELINE = TX_QUEUE_PIPELINE; parameter TDMA_INDEX_WIDTH = 6; // Timstamping parameters (port) -parameter IF_PTP_PERIOD_NS = 6'h3; -parameter IF_PTP_PERIOD_FNS = 16'h1a60; -parameter PTP_TS_ENABLE = 0; +parameter PTP_TS_ENABLE = 1; parameter PTP_TS_WIDTH = 96; parameter TX_PTP_TS_FIFO_DEPTH = 32; parameter RX_PTP_TS_FIFO_DEPTH = 32; @@ -1888,6 +1898,8 @@ wire [PORT_COUNT-1:0] port_tx_axis_tvalid; wire [PORT_COUNT-1:0] port_tx_axis_tready; wire [PORT_COUNT-1:0] port_tx_axis_tlast; wire [PORT_COUNT-1:0] port_tx_axis_tuser; +wire [PORT_COUNT*80-1:0] port_tx_ptp_ts; +wire [PORT_COUNT-1:0] port_tx_ptp_ts_valid; wire [PORT_COUNT-1:0] port_rx_clk; wire [PORT_COUNT-1:0] port_rx_rst; @@ -1895,7 +1907,7 @@ wire [PORT_COUNT*AXIS_ETH_DATA_WIDTH-1:0] port_rx_axis_tdata; wire [PORT_COUNT*AXIS_ETH_KEEP_WIDTH-1:0] port_rx_axis_tkeep; wire [PORT_COUNT-1:0] port_rx_axis_tvalid; wire [PORT_COUNT-1:0] port_rx_axis_tlast; -wire [PORT_COUNT-1:0] port_rx_axis_tuser; +wire [PORT_COUNT*81-1:0] port_rx_axis_tuser; assign led[0] = pps_led_reg; assign led[2:1] = 0; @@ -1914,7 +1926,7 @@ localparam QSFP1_IND = 1; generate genvar m, n; - if (QSFP0_IND >= 0 && QSFP0_IND < PORT_COUNT) begin + if (QSFP0_IND >= 0 && QSFP0_IND < PORT_COUNT) begin : qsfp0 assign port_tx_clk[QSFP0_IND] = qsfp0_tx_clk; assign port_tx_rst[QSFP0_IND] = qsfp0_tx_rst; assign qsfp0_tx_axis_tdata = port_tx_axis_tdata[QSFP0_IND*AXIS_ETH_DATA_WIDTH +: AXIS_ETH_DATA_WIDTH]; @@ -1923,6 +1935,8 @@ generate assign port_tx_axis_tready[QSFP0_IND] = qsfp0_tx_axis_tready; assign qsfp0_tx_axis_tlast = port_tx_axis_tlast[QSFP0_IND]; assign qsfp0_tx_axis_tuser = port_tx_axis_tuser[QSFP0_IND]; + assign port_tx_ptp_ts[QSFP0_IND*80 +: 80] = qsfp0_tx_ptp_ts; + assign port_tx_ptp_ts_valid[QSFP0_IND] = qsfp0_tx_ptp_ts_valid; assign port_rx_clk[QSFP0_IND] = qsfp0_rx_clk; assign port_rx_rst[QSFP0_IND] = qsfp0_rx_rst; @@ -1930,16 +1944,69 @@ generate assign port_rx_axis_tkeep[QSFP0_IND*AXIS_ETH_KEEP_WIDTH +: AXIS_ETH_KEEP_WIDTH] = qsfp0_rx_axis_tkeep; assign port_rx_axis_tvalid[QSFP0_IND] = qsfp0_rx_axis_tvalid; assign port_rx_axis_tlast[QSFP0_IND] = qsfp0_rx_axis_tlast; - assign port_rx_axis_tuser[QSFP0_IND] = qsfp0_rx_axis_tuser; + assign port_rx_axis_tuser[QSFP0_IND*81 +: 81] = qsfp0_rx_axis_tuser; + + if (PTP_TS_ENABLE) begin : ptp + wire [PTP_TS_WIDTH-1:0] tx_ptp_ts_96; + wire [PTP_TS_WIDTH-1:0] rx_ptp_ts_96; + + assign qsfp0_tx_ptp_time = tx_ptp_ts_96[95:16]; + assign qsfp0_rx_ptp_time = rx_ptp_ts_96[95:16]; + + ptp_clock_cdc #( + .TS_WIDTH(96), + .NS_WIDTH(4), + .FNS_WIDTH(16), + .USE_SAMPLE_CLOCK(1'b0) + ) + tx_ptp_cdc ( + .input_clk(clk_250mhz), + .input_rst(rst_250mhz), + .output_clk(qsfp0_tx_clk), + .output_rst(qsfp0_tx_rst), + .sample_clk(clk_250mhz), + .input_ts(ptp_ts_96), + .input_ts_step(ptp_ts_step), + .output_ts(tx_ptp_ts_96), + .output_ts_step(), + .output_pps(), + .locked() + ); + + ptp_clock_cdc #( + .TS_WIDTH(96), + .NS_WIDTH(4), + .FNS_WIDTH(16), + .USE_SAMPLE_CLOCK(1'b0) + ) + rx_ptp_cdc ( + .input_clk(clk_250mhz), + .input_rst(rst_250mhz), + .output_clk(qsfp0_rx_clk), + .output_rst(qsfp0_rx_rst), + .sample_clk(clk_250mhz), + .input_ts(ptp_ts_96), + .input_ts_step(ptp_ts_step), + .output_ts(rx_ptp_ts_96), + .output_ts_step(), + .output_pps(), + .locked() + ); + end else begin + assign qsfp0_tx_ptp_time = 80'd0; + assign qsfp0_rx_ptp_time = 80'd0; + end end else begin assign qsfp0_tx_axis_tdata = {AXIS_ETH_DATA_WIDTH{1'b0}}; assign qsfp0_tx_axis_tkeep = {AXIS_ETH_KEEP_WIDTH{1'b0}}; assign qsfp0_tx_axis_tvalid = 1'b0; assign qsfp0_tx_axis_tlast = 1'b0; assign qsfp0_tx_axis_tuser = 1'b0; + assign qsfp0_tx_ptp_time = 80'd0; + assign qsfp0_rx_ptp_time = 80'd0; end - if (QSFP1_IND >= 0 && QSFP1_IND < PORT_COUNT) begin + if (QSFP1_IND >= 0 && QSFP1_IND < PORT_COUNT) begin : qsfp1 assign port_tx_clk[QSFP1_IND] = qsfp1_tx_clk; assign port_tx_rst[QSFP1_IND] = qsfp1_tx_rst; assign qsfp1_tx_axis_tdata = port_tx_axis_tdata[QSFP1_IND*AXIS_ETH_DATA_WIDTH +: AXIS_ETH_DATA_WIDTH]; @@ -1948,6 +2015,8 @@ generate assign port_tx_axis_tready[QSFP1_IND] = qsfp1_tx_axis_tready; assign qsfp1_tx_axis_tlast = port_tx_axis_tlast[QSFP1_IND]; assign qsfp1_tx_axis_tuser = port_tx_axis_tuser[QSFP1_IND]; + assign port_tx_ptp_ts[QSFP1_IND*80 +: 80] = qsfp1_tx_ptp_ts; + assign port_tx_ptp_ts_valid[QSFP1_IND] = qsfp1_tx_ptp_ts_valid; assign port_rx_clk[QSFP1_IND] = qsfp1_rx_clk; assign port_rx_rst[QSFP1_IND] = qsfp1_rx_rst; @@ -1955,13 +2024,66 @@ generate assign port_rx_axis_tkeep[QSFP1_IND*AXIS_ETH_KEEP_WIDTH +: AXIS_ETH_KEEP_WIDTH] = qsfp1_rx_axis_tkeep; assign port_rx_axis_tvalid[QSFP1_IND] = qsfp1_rx_axis_tvalid; assign port_rx_axis_tlast[QSFP1_IND] = qsfp1_rx_axis_tlast; - assign port_rx_axis_tuser[QSFP1_IND] = qsfp1_rx_axis_tuser; + assign port_rx_axis_tuser[QSFP1_IND*81 +: 81] = qsfp1_rx_axis_tuser; + + if (PTP_TS_ENABLE) begin : ptp + wire [PTP_TS_WIDTH-1:0] tx_ptp_ts_96; + wire [PTP_TS_WIDTH-1:0] rx_ptp_ts_96; + + assign qsfp1_tx_ptp_time = tx_ptp_ts_96[95:16]; + assign qsfp1_rx_ptp_time = rx_ptp_ts_96[95:16]; + + ptp_clock_cdc #( + .TS_WIDTH(96), + .NS_WIDTH(4), + .FNS_WIDTH(16), + .USE_SAMPLE_CLOCK(1'b0) + ) + tx_ptp_cdc ( + .input_clk(clk_250mhz), + .input_rst(rst_250mhz), + .output_clk(qsfp1_tx_clk), + .output_rst(qsfp1_tx_rst), + .sample_clk(clk_250mhz), + .input_ts(ptp_ts_96), + .input_ts_step(ptp_ts_step), + .output_ts(tx_ptp_ts_96), + .output_ts_step(), + .output_pps(), + .locked() + ); + + ptp_clock_cdc #( + .TS_WIDTH(96), + .NS_WIDTH(4), + .FNS_WIDTH(16), + .USE_SAMPLE_CLOCK(1'b0) + ) + rx_ptp_cdc ( + .input_clk(clk_250mhz), + .input_rst(rst_250mhz), + .output_clk(qsfp1_rx_clk), + .output_rst(qsfp1_rx_rst), + .sample_clk(clk_250mhz), + .input_ts(ptp_ts_96), + .input_ts_step(ptp_ts_step), + .output_ts(rx_ptp_ts_96), + .output_ts_step(), + .output_pps(), + .locked() + ); + end else begin + assign qsfp1_tx_ptp_time = 80'd0; + assign qsfp1_rx_ptp_time = 80'd0; + end end else begin assign qsfp1_tx_axis_tdata = {AXIS_ETH_DATA_WIDTH{1'b0}}; assign qsfp1_tx_axis_tkeep = {AXIS_ETH_KEEP_WIDTH{1'b0}}; assign qsfp1_tx_axis_tvalid = 1'b0; assign qsfp1_tx_axis_tlast = 1'b0; assign qsfp1_tx_axis_tuser = 1'b0; + assign qsfp1_tx_ptp_time = 80'd0; + assign qsfp1_rx_ptp_time = 80'd0; end case (IF_COUNT) @@ -1988,6 +2110,7 @@ generate wire [PORTS_PER_IF-1:0] rx_axis_tready; wire [PORTS_PER_IF-1:0] rx_axis_tlast; wire [PORTS_PER_IF-1:0] rx_axis_tuser; + wire [PORTS_PER_IF*81-1:0] rx_axis_tuser_int; wire [PORTS_PER_IF*PTP_TS_WIDTH-1:0] rx_ptp_ts_96; wire [PORTS_PER_IF-1:0] rx_ptp_ts_valid; @@ -2248,6 +2371,171 @@ generate for (m = 0; m < PORTS_PER_IF; m = m + 1) begin : mac + if (PTP_TS_ENABLE) begin + + wire [79:0] tx_ptp_ts_96_pipe; + wire tx_ptp_ts_valid_pipe; + wire tx_ptp_ts_ready_pipe; + + axis_async_fifo #( + .DEPTH(TX_PTP_TS_FIFO_DEPTH), + .DATA_WIDTH(80), + .KEEP_ENABLE(0), + .LAST_ENABLE(0), + .ID_ENABLE(0), + .DEST_ENABLE(0), + .USER_ENABLE(0), + .FRAME_FIFO(0) + ) + tx_ptp_ts_fifo ( + .async_rst(rst_250mhz | port_tx_rst[n*PORTS_PER_IF+m]), + + // AXI input + .s_clk(port_tx_clk[n*PORTS_PER_IF+m]), + .s_axis_tdata(port_tx_ptp_ts[(n*PORTS_PER_IF+m)*80 +: 80]), + .s_axis_tkeep(0), + .s_axis_tvalid(port_tx_ptp_ts_valid[n*PORTS_PER_IF+m]), + .s_axis_tready(), + .s_axis_tlast(0), + .s_axis_tid(0), + .s_axis_tdest(0), + .s_axis_tuser(0), + + // AXI output + .m_clk(clk_250mhz), + .m_axis_tdata(tx_ptp_ts_96_pipe), + .m_axis_tkeep(), + .m_axis_tvalid(tx_ptp_ts_valid_pipe), + .m_axis_tready(tx_ptp_ts_ready_pipe), + .m_axis_tlast(), + .m_axis_tid(), + .m_axis_tdest(), + .m_axis_tuser(), + + // Status + .s_status_overflow(), + .s_status_bad_frame(), + .s_status_good_frame(), + .m_status_overflow(), + .m_status_bad_frame(), + .m_status_good_frame() + ); + + axis_pipeline_register #( + .DATA_WIDTH(80), + .KEEP_ENABLE(0), + .LAST_ENABLE(0), + .ID_ENABLE(0), + .DEST_ENABLE(0), + .USER_ENABLE(0), + .REG_TYPE(2), + .LENGTH(2) + ) + tx_ptp_ts_reg ( + .clk(clk_250mhz), + .rst(rst_250mhz), + // AXI input + .s_axis_tdata(tx_ptp_ts_96_pipe), + .s_axis_tkeep(0), + .s_axis_tvalid(tx_ptp_ts_valid_pipe), + .s_axis_tready(tx_ptp_ts_ready_pipe), + .s_axis_tlast(0), + .s_axis_tid(0), + .s_axis_tdest(0), + .s_axis_tuser(0), + // AXI output + .m_axis_tdata(tx_ptp_ts_96[m*PTP_TS_WIDTH+16 +: 80]), + .m_axis_tkeep(), + .m_axis_tvalid(tx_ptp_ts_valid[m +: 1]), + .m_axis_tready(tx_ptp_ts_ready[m +: 1]), + .m_axis_tlast(), + .m_axis_tid(), + .m_axis_tdest(), + .m_axis_tuser() + ); + + assign tx_ptp_ts_96[m*PTP_TS_WIDTH +: 16] = 16'd0; + + end else begin + + assign tx_ptp_ts_96[m*PTP_TS_WIDTH +: PTP_TS_WIDTH] = {PTP_TS_WIDTH{1'b0}}; + assign tx_ptp_ts_valid = 1'b0; + + end + + if (PTP_TS_ENABLE) begin + + wire [79:0] rx_ts; + wire rx_ts_valid; + + ptp_ts_extract #( + .TS_WIDTH(80), + .TS_OFFSET(1), + .USER_WIDTH(81) + ) + rx_ptp_ts_extract ( + .clk(clk_250mhz), + .rst(rst_250mhz), + + // AXI stream input + .s_axis_tvalid(rx_axis_tvalid[m +: 1] && rx_axis_tready[m +: 1]), + .s_axis_tlast(rx_axis_tlast[m +: 1]), + .s_axis_tuser(rx_axis_tuser_int[m*81 +: 81]), + + // Timestamp output + .m_axis_ts(rx_ts), + .m_axis_ts_valid(rx_ts_valid) + ); + + axis_fifo #( + .DEPTH(RX_PTP_TS_FIFO_DEPTH), + .DATA_WIDTH(80), + .KEEP_ENABLE(0), + .LAST_ENABLE(0), + .ID_ENABLE(0), + .DEST_ENABLE(0), + .USER_ENABLE(0), + .FRAME_FIFO(0) + ) + rx_ptp_ts_fifo ( + .clk(clk_250mhz), + .rst(rst_250mhz), + + // AXI input + .s_axis_tdata(rx_ts), + .s_axis_tkeep(0), + .s_axis_tvalid(rx_ts_valid), + .s_axis_tready(), + .s_axis_tlast(0), + .s_axis_tid(0), + .s_axis_tdest(0), + .s_axis_tuser(0), + + // AXI output + .m_axis_tdata(rx_ptp_ts_96[m*PTP_TS_WIDTH+16 +: 80]), + .m_axis_tkeep(), + .m_axis_tvalid(rx_ptp_ts_valid[m +: 1]), + .m_axis_tready(rx_ptp_ts_ready[m +: 1]), + .m_axis_tlast(), + .m_axis_tid(), + .m_axis_tdest(), + .m_axis_tuser(), + + // Status + .status_overflow(), + .status_bad_frame(), + .status_good_frame() + ); + + assign rx_ptp_ts_96[m*PTP_TS_WIDTH +: 16] = 16'd0; + + end else begin + + assign rx_ptp_ts_96[m*PTP_TS_WIDTH +: PTP_TS_WIDTH] = {PTP_TS_WIDTH{1'b0}}; + assign rx_ptp_ts_valid[m +: 1] = 1'b0; + + end + wire [AXIS_DATA_WIDTH-1:0] tx_axis_tdata_pipe; wire [AXIS_KEEP_WIDTH-1:0] tx_axis_tkeep_pipe; wire tx_axis_tvalid_pipe; @@ -2255,20 +2543,12 @@ generate wire tx_axis_tlast_pipe; wire tx_axis_tuser_pipe; - // wire [PTP_TS_WIDTH-1:0] tx_ptp_ts_96_pipe; - // wire tx_ptp_ts_valid_pipe; - // wire tx_ptp_ts_ready_pipe; - wire [AXIS_DATA_WIDTH-1:0] rx_axis_tdata_pipe; wire [AXIS_KEEP_WIDTH-1:0] rx_axis_tkeep_pipe; wire rx_axis_tvalid_pipe; wire rx_axis_tready_pipe; wire rx_axis_tlast_pipe; - wire rx_axis_tuser_pipe; - - // wire [PTP_TS_WIDTH-1:0] rx_ptp_ts_96_pipe; - // wire rx_ptp_ts_valid_pipe; - // wire rx_ptp_ts_ready_pipe; + wire [80:0] rx_axis_tuser_pipe; axis_pipeline_register #( .DATA_WIDTH(AXIS_DATA_WIDTH), @@ -2313,7 +2593,7 @@ generate .ID_ENABLE(0), .DEST_ENABLE(0), .USER_ENABLE(1), - .USER_WIDTH(1), + .USER_WIDTH(PTP_TS_ENABLE ? 81 : 1), .REG_TYPE(2), .LENGTH(2) ) @@ -2337,9 +2617,11 @@ generate .m_axis_tlast(rx_axis_tlast[m +: 1]), .m_axis_tid(), .m_axis_tdest(), - .m_axis_tuser(rx_axis_tuser[m +: 1]) + .m_axis_tuser(rx_axis_tuser_int[m*81 +: 81]) ); + assign rx_axis_tuser[m +: 1] = rx_axis_tuser_int[m*81 +: 1]; + axis_async_fifo #( .DEPTH(TX_FIFO_DEPTH), .DATA_WIDTH(AXIS_ETH_DATA_WIDTH), @@ -2397,7 +2679,7 @@ generate .ID_ENABLE(0), .DEST_ENABLE(0), .USER_ENABLE(1), - .USER_WIDTH(1), + .USER_WIDTH(PTP_TS_ENABLE ? 81 : 1), .FRAME_FIFO(1), .USER_BAD_FRAME_VALUE(1'b1), .USER_BAD_FRAME_MASK(1'b1), @@ -2416,7 +2698,7 @@ generate .s_axis_tlast(port_rx_axis_tlast[n*PORTS_PER_IF+m]), .s_axis_tid(0), .s_axis_tdest(0), - .s_axis_tuser(port_rx_axis_tuser[n*PORTS_PER_IF+m]), + .s_axis_tuser(port_rx_axis_tuser[(n*PORTS_PER_IF+m)*81 +: 81]), // AXI output .m_clk(clk_250mhz), .m_axis_tdata(rx_axis_tdata_pipe), diff --git a/fpga/mqnic/VCU1525/fpga_100g/tb/fpga_core/test_fpga_core.py b/fpga/mqnic/VCU1525/fpga_100g/tb/fpga_core/test_fpga_core.py index 8c5661c9e..6549e4ce7 100644 --- a/fpga/mqnic/VCU1525/fpga_100g/tb/fpga_core/test_fpga_core.py +++ b/fpga/mqnic/VCU1525/fpga_100g/tb/fpga_core/test_fpga_core.py @@ -46,7 +46,8 @@ from cocotb.log import SimLog from cocotb.clock import Clock from cocotb.triggers import RisingEdge, FallingEdge, Timer -from cocotbext.axi import AxiStreamBus, AxiStreamSource, AxiStreamSink +from cocotbext.axi import AxiStreamBus +from cocotbext.eth import EthMac from cocotbext.pcie.core import RootComplex from cocotbext.pcie.xilinx.us import UltraScalePlusPcieDevice @@ -269,14 +270,38 @@ class TB(object): # Ethernet cocotb.fork(Clock(dut.qsfp0_rx_clk, 3.102, units="ns").start()) - self.qsfp0_source = AxiStreamSource(AxiStreamBus.from_prefix(dut, "qsfp0_rx_axis"), dut.qsfp0_rx_clk, dut.qsfp0_rx_rst) cocotb.fork(Clock(dut.qsfp0_tx_clk, 3.102, units="ns").start()) - self.qsfp0_sink = AxiStreamSink(AxiStreamBus.from_prefix(dut, "qsfp0_tx_axis"), dut.qsfp0_tx_clk, dut.qsfp0_tx_rst) + + self.qsfp0_mac = EthMac( + tx_clk=dut.qsfp0_tx_clk, + tx_rst=dut.qsfp0_tx_rst, + tx_bus=AxiStreamBus.from_prefix(dut, "qsfp0_tx_axis"), + tx_ptp_time=dut.qsfp0_tx_ptp_time, + tx_ptp_ts=dut.qsfp0_tx_ptp_ts, + tx_ptp_ts_valid=dut.qsfp0_tx_ptp_ts_valid, + rx_clk=dut.qsfp0_rx_clk, + rx_rst=dut.qsfp0_rx_rst, + rx_bus=AxiStreamBus.from_prefix(dut, "qsfp0_rx_axis"), + rx_ptp_time=dut.qsfp0_rx_ptp_time, + ifg=12, speed=100e9 + ) cocotb.fork(Clock(dut.qsfp1_rx_clk, 3.102, units="ns").start()) - self.qsfp1_source = AxiStreamSource(AxiStreamBus.from_prefix(dut, "qsfp1_rx_axis"), dut.qsfp1_rx_clk, dut.qsfp1_rx_rst) cocotb.fork(Clock(dut.qsfp1_tx_clk, 3.102, units="ns").start()) - self.qsfp1_sink = AxiStreamSink(AxiStreamBus.from_prefix(dut, "qsfp1_tx_axis"), dut.qsfp1_tx_clk, dut.qsfp1_tx_rst) + + self.qsfp1_mac = EthMac( + tx_clk=dut.qsfp1_tx_clk, + tx_rst=dut.qsfp1_tx_rst, + tx_bus=AxiStreamBus.from_prefix(dut, "qsfp1_tx_axis"), + tx_ptp_time=dut.qsfp1_tx_ptp_time, + tx_ptp_ts=dut.qsfp1_tx_ptp_ts, + tx_ptp_ts_valid=dut.qsfp1_tx_ptp_ts_valid, + rx_clk=dut.qsfp1_rx_clk, + rx_rst=dut.qsfp1_rx_rst, + rx_bus=AxiStreamBus.from_prefix(dut, "qsfp1_rx_axis"), + rx_ptp_time=dut.qsfp1_rx_ptp_time, + ifg=12, speed=100e9 + ) dut.sw.setimmediatevalue(0) @@ -327,10 +352,10 @@ class TB(object): await RisingEdge(self.dut.clk_250mhz) if self.loopback_enable: - if not self.qsfp0_sink.empty(): - await self.qsfp0_source.send(await self.qsfp0_sink.recv()) - if not self.qsfp1_sink.empty(): - await self.qsfp1_source.send(await self.qsfp1_sink.recv()) + if not self.qsfp0_mac.tx.empty(): + await self.qsfp0_mac.rx.send(await self.qsfp0_mac.tx.recv()) + if not self.qsfp1_mac.tx.empty(): + await self.qsfp1_mac.rx.send(await self.qsfp1_mac.tx.recv()) @cocotb.test() @@ -361,10 +386,10 @@ async def run_test_nic(dut): await tb.driver.interfaces[0].start_xmit(data, 0) - pkt = await tb.qsfp0_sink.recv() + pkt = await tb.qsfp0_mac.tx.recv() tb.log.info("Packet: %s", pkt) - await tb.qsfp0_source.send(pkt) + await tb.qsfp0_mac.rx.send(pkt) pkt = await tb.driver.interfaces[0].recv() @@ -373,10 +398,10 @@ async def run_test_nic(dut): # await tb.driver.interfaces[1].start_xmit(data, 0) - # pkt = await tb.qsfp1_0_sink.recv() + # pkt = await tb.qsfp1_mac.tx.recv() # tb.log.info("Packet: %s", pkt) - # await tb.qsfp1_0_source.send(pkt) + # await tb.qsfp1_mac.rx.send(pkt) # pkt = await tb.driver.interfaces[1].recv() @@ -396,10 +421,10 @@ async def run_test_nic(dut): await tb.driver.interfaces[0].start_xmit(test_pkt2.build(), 0, 34, 6) - pkt = await tb.qsfp0_sink.recv() + pkt = await tb.qsfp0_mac.tx.recv() tb.log.info("Packet: %s", pkt) - await tb.qsfp0_source.send(pkt) + await tb.qsfp0_mac.rx.send(pkt) pkt = await tb.driver.interfaces[0].recv()