1
0
mirror of https://github.com/corundum/corundum.git synced 2025-01-16 08:12:53 +08:00
corundum/fpga/common/rtl/tx_checksum.v
Alex Forencich 448fa8eb4c Use SPDX
Signed-off-by: Alex Forencich <alex@alexforencich.com>
2023-06-26 11:44:57 -07:00

547 lines
17 KiB
Verilog

// SPDX-License-Identifier: BSD-2-Clause-Views
/*
* Copyright (c) 2019-2023 The Regents of the University of California
*/
// Language: Verilog 2001
`resetall
`timescale 1ns / 1ps
`default_nettype none
/*
* Transmit checksum offload module
*/
module tx_checksum #
(
// Width of AXI stream interfaces in bits
parameter DATA_WIDTH = 256,
// AXI stream tkeep signal width (words per cycle)
parameter KEEP_WIDTH = (DATA_WIDTH/8),
// Propagate tid signal
parameter ID_ENABLE = 0,
// tid signal width
parameter ID_WIDTH = 8,
// Propagate tdest signal
parameter DEST_ENABLE = 0,
// tdest signal width
parameter DEST_WIDTH = 8,
// Propagate tuser signal
parameter USER_ENABLE = 1,
// tuser signal width
parameter USER_WIDTH = 1,
// Use checksum init value
parameter USE_INIT_VALUE = 0,
// Depth of data FIFO in words
parameter DATA_FIFO_DEPTH = 4096,
// Depth of checksum FIFO
parameter CHECKSUM_FIFO_DEPTH = 64
)
(
input wire clk,
input wire rst,
/*
* AXI input
*/
input wire [DATA_WIDTH-1:0] s_axis_tdata,
input wire [KEEP_WIDTH-1:0] s_axis_tkeep,
input wire s_axis_tvalid,
output wire s_axis_tready,
input wire s_axis_tlast,
input wire [ID_WIDTH-1:0] s_axis_tid,
input wire [DEST_WIDTH-1:0] s_axis_tdest,
input wire [USER_WIDTH-1:0] s_axis_tuser,
/*
* AXI output
*/
output wire [DATA_WIDTH-1:0] m_axis_tdata,
output wire [KEEP_WIDTH-1:0] m_axis_tkeep,
output wire m_axis_tvalid,
input wire m_axis_tready,
output wire m_axis_tlast,
output wire [ID_WIDTH-1:0] m_axis_tid,
output wire [DEST_WIDTH-1:0] m_axis_tdest,
output wire [USER_WIDTH-1:0] m_axis_tuser,
/*
* Control
*/
input wire s_axis_cmd_csum_enable,
input wire [7:0] s_axis_cmd_csum_start,
input wire [7:0] s_axis_cmd_csum_offset,
input wire [15:0] s_axis_cmd_csum_init,
input wire s_axis_cmd_valid,
output wire s_axis_cmd_ready
);
parameter LEVELS = $clog2(DATA_WIDTH/8);
// bus width assertions
initial begin
if (KEEP_WIDTH * 8 != DATA_WIDTH) begin
$error("Error: AXI stream interface requires byte (8-bit) granularity (instance %m)");
$finish;
end
end
reg transfer_in_reg = 1'b0;
reg [15:0] csum_in_csum_reg = 0;
reg [7:0] csum_in_offset_reg = 0;
reg csum_in_enable_reg = 1'b0;
reg csum_in_valid_reg = 1'b0;
wire csum_in_ready;
wire [15:0] csum_out_csum;
wire [7:0] csum_out_offset;
wire csum_out_enable;
wire csum_out_valid;
reg csum_out_ready = 1'b0;
reg [KEEP_WIDTH-1:0] mask_reg = 0;
reg first_cycle_reg = 1'b0;
reg [7:0] input_offset_reg = 0;
reg [DATA_WIDTH-1:0] s_axis_tdata_masked;
reg frame_reg = 1'b0, frame_next;
reg [15:0] csum_reg = 16'd0, csum_next;
reg [7:0] csum_offset_reg = 8'd0, csum_offset_next;
reg csum_enable_reg = 1'b0, csum_enable_next;
reg csum_split_reg = 1'b0, csum_split_next;
reg [DATA_WIDTH-1:0] sum_reg[LEVELS-2:0];
reg [LEVELS-2:0] sum_valid_reg = 0;
reg [LEVELS-2:0] sum_odd_reg = 0;
reg [LEVELS-2:0] sum_last_reg = 0;
reg [LEVELS-2:0] sum_enable_reg = 0;
reg [7:0] sum_offset_reg[LEVELS-2:0];
reg [15:0] sum_init_reg[LEVELS-2:0];
reg [LEVELS-2:0] sum_init_valid_reg = 0;
reg [16+LEVELS-1:0] sum_acc_temp = 0;
reg [15:0] sum_acc_reg = 0;
// internal datapath
reg [DATA_WIDTH-1:0] m_axis_tdata_int;
reg [KEEP_WIDTH-1:0] m_axis_tkeep_int;
reg m_axis_tvalid_int;
reg m_axis_tready_int_reg = 1'b0;
reg m_axis_tlast_int;
reg [ID_WIDTH-1:0] m_axis_tid_int;
reg [DEST_WIDTH-1:0] m_axis_tdest_int;
reg [USER_WIDTH-1:0] m_axis_tuser_int;
wire m_axis_tready_int_early;
wire [DATA_WIDTH-1:0] data_in_axis_tdata;
wire [KEEP_WIDTH-1:0] data_in_axis_tkeep;
wire data_in_axis_tvalid;
wire data_in_axis_tready;
wire data_in_axis_tlast;
wire [ID_WIDTH-1:0] data_in_axis_tid;
wire [DEST_WIDTH-1:0] data_in_axis_tdest;
wire [USER_WIDTH-1:0] data_in_axis_tuser;
wire [DATA_WIDTH-1:0] data_out_axis_tdata;
wire [KEEP_WIDTH-1:0] data_out_axis_tkeep;
wire data_out_axis_tvalid;
reg data_out_axis_tready;
wire data_out_axis_tlast;
wire [ID_WIDTH-1:0] data_out_axis_tid;
wire [DEST_WIDTH-1:0] data_out_axis_tdest;
wire [USER_WIDTH-1:0] data_out_axis_tuser;
assign s_axis_tready = data_in_axis_tready && csum_in_ready && transfer_in_reg;
assign s_axis_cmd_ready = csum_in_ready && !transfer_in_reg;
// data FIFO
assign data_in_axis_tdata = s_axis_tdata;
assign data_in_axis_tkeep = s_axis_tkeep;
assign data_in_axis_tvalid = s_axis_tvalid && csum_in_ready && transfer_in_reg;
assign data_in_axis_tlast = s_axis_tlast;
assign data_in_axis_tid = s_axis_tid;
assign data_in_axis_tdest = s_axis_tdest;
assign data_in_axis_tuser = s_axis_tuser;
axis_fifo #(
.DEPTH(DATA_FIFO_DEPTH),
.DATA_WIDTH(DATA_WIDTH),
.KEEP_ENABLE(1),
.KEEP_WIDTH(KEEP_WIDTH),
.LAST_ENABLE(1),
.ID_ENABLE(ID_ENABLE),
.ID_WIDTH(ID_WIDTH),
.DEST_ENABLE(DEST_ENABLE),
.DEST_WIDTH(DEST_WIDTH),
.USER_ENABLE(USER_ENABLE),
.USER_WIDTH(USER_WIDTH),
.FRAME_FIFO(0)
)
data_fifo (
.clk(clk),
.rst(rst),
// AXI input
.s_axis_tdata(data_in_axis_tdata),
.s_axis_tkeep(data_in_axis_tkeep),
.s_axis_tvalid(data_in_axis_tvalid),
.s_axis_tready(data_in_axis_tready),
.s_axis_tlast(data_in_axis_tlast),
.s_axis_tid(data_in_axis_tid),
.s_axis_tdest(data_in_axis_tdest),
.s_axis_tuser(data_in_axis_tuser),
// AXI output
.m_axis_tdata(data_out_axis_tdata),
.m_axis_tkeep(data_out_axis_tkeep),
.m_axis_tvalid(data_out_axis_tvalid),
.m_axis_tready(data_out_axis_tready),
.m_axis_tlast(data_out_axis_tlast),
.m_axis_tid(data_out_axis_tid),
.m_axis_tdest(data_out_axis_tdest),
.m_axis_tuser(data_out_axis_tuser),
// Status
.status_overflow(),
.status_bad_frame(),
.status_good_frame()
);
// checksum FIFO
axis_fifo #(
.DEPTH(CHECKSUM_FIFO_DEPTH),
.DATA_WIDTH(16+8+1),
.KEEP_ENABLE(0),
.LAST_ENABLE(0),
.ID_ENABLE(0),
.DEST_ENABLE(0),
.USER_ENABLE(0),
.FRAME_FIFO(0)
)
csum_fifo (
.clk(clk),
.rst(rst),
// AXI input
.s_axis_tdata({csum_in_csum_reg, csum_in_offset_reg, csum_in_enable_reg}),
.s_axis_tkeep(0),
.s_axis_tvalid(csum_in_valid_reg),
.s_axis_tready(csum_in_ready),
.s_axis_tlast(0),
.s_axis_tid(0),
.s_axis_tdest(0),
.s_axis_tuser(0),
// AXI output
.m_axis_tdata({csum_out_csum, csum_out_offset, csum_out_enable}),
.m_axis_tkeep(),
.m_axis_tvalid(csum_out_valid),
.m_axis_tready(csum_out_ready),
.m_axis_tlast(),
.m_axis_tid(),
.m_axis_tdest(),
.m_axis_tuser(),
// Status
.status_overflow(),
.status_bad_frame(),
.status_good_frame()
);
// Mask input data
integer j;
always @* begin
for (j = 0; j < KEEP_WIDTH; j = j + 1) begin
s_axis_tdata_masked[j*8 +: 8] = (s_axis_tkeep[j] && mask_reg[j]) ? s_axis_tdata[j*8 +: 8] : 8'd0;
end
end
// Compute checksum
integer i;
always @(posedge clk) begin
sum_valid_reg[0] <= sum_valid_reg[0] && !csum_in_ready;
if (s_axis_tvalid && s_axis_tready) begin
for (i = 0; i < DATA_WIDTH/8/4; i = i + 1) begin
sum_reg[0][i*17 +: 17] <= {s_axis_tdata_masked[(4*i+0)*8 +: 8], s_axis_tdata_masked[(4*i+1)*8 +: 8]} + {s_axis_tdata_masked[(4*i+2)*8 +: 8], s_axis_tdata_masked[(4*i+3)*8 +: 8]};
end
sum_valid_reg[0] <= 1'b1;
sum_last_reg[0] <= s_axis_tlast;
sum_init_valid_reg[0] <= first_cycle_reg;
first_cycle_reg <= 1'b0;
if (s_axis_tlast) begin
transfer_in_reg <= 1'b0;
end
if (input_offset_reg > 0) begin
if (input_offset_reg >= KEEP_WIDTH) begin
mask_reg <= 0;
input_offset_reg <= input_offset_reg - KEEP_WIDTH;
end else begin
mask_reg <= {KEEP_WIDTH{1'b1}} << input_offset_reg;
input_offset_reg <= 0;
end
end else begin
mask_reg <= {KEEP_WIDTH{1'b1}};
end
end
if (s_axis_cmd_valid && s_axis_cmd_ready) begin
transfer_in_reg <= 1'b1;
sum_odd_reg[0] <= s_axis_cmd_csum_start[0];
sum_enable_reg[0] <= s_axis_cmd_csum_enable;
sum_offset_reg[0] <= s_axis_cmd_csum_offset;
sum_init_reg[0] <= s_axis_cmd_csum_init;
first_cycle_reg <= 1'b1;
if (s_axis_cmd_csum_start >= KEEP_WIDTH) begin
mask_reg <= 0;
input_offset_reg <= s_axis_cmd_csum_start - KEEP_WIDTH;
end else begin
mask_reg <= {KEEP_WIDTH{1'b1}} << s_axis_cmd_csum_start;
input_offset_reg <= 0;
end
end
if (rst) begin
transfer_in_reg <= 1'b0;
sum_valid_reg[0] <= 1'b0;
end
end
generate
genvar l;
for (l = 1; l < LEVELS-1; l = l + 1) begin
always @(posedge clk) begin
sum_valid_reg[l] <= sum_valid_reg[l] && !csum_in_ready;
if (sum_valid_reg[l-1] && csum_in_ready) begin
for (i = 0; i < DATA_WIDTH/8/4/2**l; i = i + 1) begin
sum_reg[l][i*(17+l) +: (17+l)] <= sum_reg[l-1][(i*2+0)*(17+l-1) +: (17+l-1)] + sum_reg[l-1][(i*2+1)*(17+l-1) +: (17+l-1)];
end
sum_valid_reg[l] <= 1'b1;
sum_odd_reg[l] <= sum_odd_reg[l-1];
sum_last_reg[l] <= sum_last_reg[l-1];
sum_enable_reg[l] <= sum_enable_reg[l-1];
sum_offset_reg[l] <= sum_offset_reg[l-1];
sum_init_reg[l] <= sum_init_reg[l-1];
sum_init_valid_reg[l] <= sum_init_valid_reg[l-1];
end
if (rst) begin
sum_valid_reg[l] <= 1'b0;
end
end
end
endgenerate
always @(posedge clk) begin
csum_in_valid_reg <= 1'b0;
if (sum_valid_reg[LEVELS-2] && csum_in_ready) begin
sum_acc_temp = sum_reg[LEVELS-2][16+LEVELS-1-1:0] + (sum_init_valid_reg[LEVELS-2] && USE_INIT_VALUE ? sum_init_reg[LEVELS-2] : sum_acc_reg);
sum_acc_temp = sum_acc_temp[15:0] + (sum_acc_temp >> 16);
sum_acc_temp = sum_acc_temp[15:0] + sum_acc_temp[16];
if (sum_last_reg[LEVELS-2]) begin
if (sum_odd_reg[LEVELS-2]) begin
csum_in_csum_reg[7:0] <= ~sum_acc_temp[15:8];
csum_in_csum_reg[15:8] <= ~sum_acc_temp[7:0];
end else begin
csum_in_csum_reg[7:0] <= ~sum_acc_temp[7:0];
csum_in_csum_reg[15:8] <= ~sum_acc_temp[15:8];
end
csum_in_offset_reg <= sum_offset_reg[LEVELS-2];
csum_in_enable_reg <= sum_enable_reg[LEVELS-2];
csum_in_valid_reg <= 1'b1;
sum_acc_reg <= 0;
end else begin
sum_acc_reg <= sum_acc_temp;
end
end
if (rst) begin
csum_in_valid_reg <= 1'b0;
end
end
// Insert checksum
always @* begin
data_out_axis_tready = m_axis_tready_int_reg && frame_reg;
csum_out_ready = 1'b0;
frame_next = frame_reg;
csum_next = csum_reg;
csum_offset_next = csum_offset_reg;
csum_enable_next = csum_enable_reg;
csum_split_next = csum_split_reg;
m_axis_tdata_int = data_out_axis_tdata;
m_axis_tkeep_int = data_out_axis_tkeep;
m_axis_tvalid_int = data_out_axis_tvalid && data_out_axis_tready;
m_axis_tlast_int = data_out_axis_tlast;
m_axis_tid_int = data_out_axis_tid;
m_axis_tdest_int = data_out_axis_tdest;
m_axis_tuser_int = data_out_axis_tuser;
if (frame_reg) begin
if (data_out_axis_tvalid && data_out_axis_tready) begin
if (data_out_axis_tlast) begin
frame_next = 1'b0;
end
if (csum_enable_reg) begin
if (csum_offset_reg >= KEEP_WIDTH) begin
csum_offset_next = csum_offset_reg - KEEP_WIDTH;
end else if (csum_split_reg) begin
// other byte of split checksum
m_axis_tdata_int[0 +: 8] = csum_reg[7:0];
csum_enable_next = 1'b0;
end else if (csum_offset_reg == KEEP_WIDTH-1) begin
// split across two cycles
m_axis_tdata_int[DATA_WIDTH-8 +: 8] = csum_reg[15:8];
csum_split_next = 1'b1;
end else begin
m_axis_tdata_int[csum_offset_reg*8 +: 8] = csum_reg[15:8];
m_axis_tdata_int[(csum_offset_reg+1)*8 +: 8] = csum_reg[7:0];
csum_enable_next = 1'b0;
end
end
end
end else begin
csum_out_ready = 1'b1;
csum_next = csum_out_csum;
csum_offset_next = csum_out_offset;
csum_enable_next = csum_out_enable;
csum_split_next = 1'b0;
if (csum_out_valid) begin
frame_next = 1'b1;
end
end
end
always @(posedge clk) begin
frame_reg <= frame_next;
csum_reg <= csum_next;
csum_offset_reg <= csum_offset_next;
csum_enable_reg <= csum_enable_next;
csum_split_reg <= csum_split_next;
if (rst) begin
frame_reg <= 1'b0;
csum_enable_reg <= 1'b0;
end
end
// output datapath logic
reg [DATA_WIDTH-1:0] m_axis_tdata_reg = {DATA_WIDTH{1'b0}};
reg [KEEP_WIDTH-1:0] m_axis_tkeep_reg = {KEEP_WIDTH{1'b0}};
reg m_axis_tvalid_reg = 1'b0, m_axis_tvalid_next;
reg m_axis_tlast_reg = 1'b0;
reg [ID_WIDTH-1:0] m_axis_tid_reg = {ID_WIDTH{1'b0}};
reg [DEST_WIDTH-1:0] m_axis_tdest_reg = {DEST_WIDTH{1'b0}};
reg [USER_WIDTH-1:0] m_axis_tuser_reg = {USER_WIDTH{1'b0}};
reg [DATA_WIDTH-1:0] temp_m_axis_tdata_reg = {DATA_WIDTH{1'b0}};
reg [KEEP_WIDTH-1:0] temp_m_axis_tkeep_reg = {KEEP_WIDTH{1'b0}};
reg temp_m_axis_tvalid_reg = 1'b0, temp_m_axis_tvalid_next;
reg temp_m_axis_tlast_reg = 1'b0;
reg [ID_WIDTH-1:0] temp_m_axis_tid_reg = {ID_WIDTH{1'b0}};
reg [DEST_WIDTH-1:0] temp_m_axis_tdest_reg = {DEST_WIDTH{1'b0}};
reg [USER_WIDTH-1:0] temp_m_axis_tuser_reg = {USER_WIDTH{1'b0}};
// datapath control
reg store_axis_int_to_output;
reg store_axis_int_to_temp;
reg store_axis_temp_to_output;
assign m_axis_tdata = m_axis_tdata_reg;
assign m_axis_tkeep = m_axis_tkeep_reg;
assign m_axis_tvalid = m_axis_tvalid_reg;
assign m_axis_tlast = m_axis_tlast_reg;
assign m_axis_tid = ID_ENABLE ? m_axis_tid_reg : {ID_WIDTH{1'b0}};
assign m_axis_tdest = DEST_ENABLE ? m_axis_tdest_reg : {DEST_WIDTH{1'b0}};
assign m_axis_tuser = USER_ENABLE ? m_axis_tuser_reg : {USER_WIDTH{1'b0}};
// enable ready input next cycle if output is ready or if both output registers are empty
assign m_axis_tready_int_early = m_axis_tready || (!temp_m_axis_tvalid_reg && !m_axis_tvalid_reg);
always @* begin
// transfer sink ready state to source
m_axis_tvalid_next = m_axis_tvalid_reg;
temp_m_axis_tvalid_next = temp_m_axis_tvalid_reg;
store_axis_int_to_output = 1'b0;
store_axis_int_to_temp = 1'b0;
store_axis_temp_to_output = 1'b0;
if (m_axis_tready_int_reg) begin
// input is ready
if (m_axis_tready || !m_axis_tvalid_reg) begin
// output is ready or currently not valid, transfer data to output
m_axis_tvalid_next = m_axis_tvalid_int;
store_axis_int_to_output = 1'b1;
end else begin
// output is not ready, store input in temp
temp_m_axis_tvalid_next = m_axis_tvalid_int;
store_axis_int_to_temp = 1'b1;
end
end else if (m_axis_tready) begin
// input is not ready, but output is ready
m_axis_tvalid_next = temp_m_axis_tvalid_reg;
temp_m_axis_tvalid_next = 1'b0;
store_axis_temp_to_output = 1'b1;
end
end
always @(posedge clk) begin
m_axis_tvalid_reg <= m_axis_tvalid_next;
m_axis_tready_int_reg <= m_axis_tready_int_early;
temp_m_axis_tvalid_reg <= temp_m_axis_tvalid_next;
// datapath
if (store_axis_int_to_output) begin
m_axis_tdata_reg <= m_axis_tdata_int;
m_axis_tkeep_reg <= m_axis_tkeep_int;
m_axis_tlast_reg <= m_axis_tlast_int;
m_axis_tid_reg <= m_axis_tid_int;
m_axis_tdest_reg <= m_axis_tdest_int;
m_axis_tuser_reg <= m_axis_tuser_int;
end else if (store_axis_temp_to_output) begin
m_axis_tdata_reg <= temp_m_axis_tdata_reg;
m_axis_tkeep_reg <= temp_m_axis_tkeep_reg;
m_axis_tlast_reg <= temp_m_axis_tlast_reg;
m_axis_tid_reg <= temp_m_axis_tid_reg;
m_axis_tdest_reg <= temp_m_axis_tdest_reg;
m_axis_tuser_reg <= temp_m_axis_tuser_reg;
end
if (store_axis_int_to_temp) begin
temp_m_axis_tdata_reg <= m_axis_tdata_int;
temp_m_axis_tkeep_reg <= m_axis_tkeep_int;
temp_m_axis_tlast_reg <= m_axis_tlast_int;
temp_m_axis_tid_reg <= m_axis_tid_int;
temp_m_axis_tdest_reg <= m_axis_tdest_int;
temp_m_axis_tuser_reg <= m_axis_tuser_int;
end
if (rst) begin
m_axis_tvalid_reg <= 1'b0;
m_axis_tready_int_reg <= 1'b0;
temp_m_axis_tvalid_reg <= 1'b0;
end
end
endmodule
`resetall