mirror of
https://github.com/corundum/corundum.git
synced 2025-01-16 08:12:53 +08:00
681 lines
28 KiB
Verilog
681 lines
28 KiB
Verilog
/*
|
|
|
|
Copyright (c) 2018 Alex Forencich
|
|
|
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
of this software and associated documentation files (the "Software"), to deal
|
|
in the Software without restriction, including without limitation the rights
|
|
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
copies of the Software, and to permit persons to whom the Software is
|
|
furnished to do so, subject to the following conditions:
|
|
|
|
The above copyright notice and this permission notice shall be included in
|
|
all copies or substantial portions of the Software.
|
|
|
|
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
|
|
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
THE SOFTWARE.
|
|
|
|
*/
|
|
|
|
// Language: Verilog 2001
|
|
|
|
`resetall
|
|
`timescale 1ns / 1ps
|
|
`default_nettype none
|
|
|
|
/*
|
|
* Ultrascale PCIe AXI Master (write)
|
|
*/
|
|
module pcie_us_axi_master_wr #
|
|
(
|
|
// Width of PCIe AXI stream interfaces in bits
|
|
parameter AXIS_PCIE_DATA_WIDTH = 256,
|
|
// PCIe AXI stream tkeep signal width (words per cycle)
|
|
parameter AXIS_PCIE_KEEP_WIDTH = (AXIS_PCIE_DATA_WIDTH/32),
|
|
// PCIe AXI stream CQ tuser signal width
|
|
parameter AXIS_PCIE_CQ_USER_WIDTH = AXIS_PCIE_DATA_WIDTH < 512 ? 85 : 183,
|
|
// Width of AXI data bus in bits
|
|
parameter AXI_DATA_WIDTH = AXIS_PCIE_DATA_WIDTH,
|
|
// Width of AXI address bus in bits
|
|
parameter AXI_ADDR_WIDTH = 64,
|
|
// Width of AXI wstrb (width of data bus in words)
|
|
parameter AXI_STRB_WIDTH = (AXI_DATA_WIDTH/8),
|
|
// Width of AXI ID signal
|
|
parameter AXI_ID_WIDTH = 8,
|
|
// Maximum AXI burst length to generate
|
|
parameter AXI_MAX_BURST_LEN = 256
|
|
)
|
|
(
|
|
input wire clk,
|
|
input wire rst,
|
|
|
|
/*
|
|
* AXI input (CQ)
|
|
*/
|
|
input wire [AXIS_PCIE_DATA_WIDTH-1:0] s_axis_cq_tdata,
|
|
input wire [AXIS_PCIE_KEEP_WIDTH-1:0] s_axis_cq_tkeep,
|
|
input wire s_axis_cq_tvalid,
|
|
output wire s_axis_cq_tready,
|
|
input wire s_axis_cq_tlast,
|
|
input wire [AXIS_PCIE_CQ_USER_WIDTH-1:0] s_axis_cq_tuser,
|
|
|
|
/*
|
|
* AXI Master output
|
|
*/
|
|
output wire [AXI_ID_WIDTH-1:0] m_axi_awid,
|
|
output wire [AXI_ADDR_WIDTH-1:0] m_axi_awaddr,
|
|
output wire [7:0] m_axi_awlen,
|
|
output wire [2:0] m_axi_awsize,
|
|
output wire [1:0] m_axi_awburst,
|
|
output wire m_axi_awlock,
|
|
output wire [3:0] m_axi_awcache,
|
|
output wire [2:0] m_axi_awprot,
|
|
output wire m_axi_awvalid,
|
|
input wire m_axi_awready,
|
|
output wire [AXI_DATA_WIDTH-1:0] m_axi_wdata,
|
|
output wire [AXI_STRB_WIDTH-1:0] m_axi_wstrb,
|
|
output wire m_axi_wlast,
|
|
output wire m_axi_wvalid,
|
|
input wire m_axi_wready,
|
|
input wire [AXI_ID_WIDTH-1:0] m_axi_bid,
|
|
input wire [1:0] m_axi_bresp,
|
|
input wire m_axi_bvalid,
|
|
output wire m_axi_bready,
|
|
|
|
/*
|
|
* Status
|
|
*/
|
|
output wire status_error_uncor
|
|
);
|
|
|
|
parameter AXI_WORD_WIDTH = AXI_STRB_WIDTH;
|
|
parameter AXI_WORD_SIZE = AXI_DATA_WIDTH/AXI_WORD_WIDTH;
|
|
parameter AXI_BURST_SIZE = $clog2(AXI_STRB_WIDTH);
|
|
parameter AXI_MAX_BURST_SIZE = AXI_MAX_BURST_LEN*AXI_WORD_WIDTH;
|
|
|
|
parameter AXIS_PCIE_WORD_WIDTH = AXIS_PCIE_KEEP_WIDTH;
|
|
parameter AXIS_PCIE_WORD_SIZE = AXIS_PCIE_DATA_WIDTH/AXIS_PCIE_WORD_WIDTH;
|
|
|
|
parameter OFFSET_WIDTH = $clog2(AXIS_PCIE_DATA_WIDTH/32);
|
|
|
|
// bus width assertions
|
|
initial begin
|
|
if (AXIS_PCIE_DATA_WIDTH != 64 && AXIS_PCIE_DATA_WIDTH != 128 && AXIS_PCIE_DATA_WIDTH != 256 && AXIS_PCIE_DATA_WIDTH != 512) begin
|
|
$error("Error: PCIe interface width must be 64, 128, 256, or 512 (instance %m)");
|
|
$finish;
|
|
end
|
|
|
|
if (AXIS_PCIE_KEEP_WIDTH * 32 != AXIS_PCIE_DATA_WIDTH) begin
|
|
$error("Error: PCIe interface requires dword (32-bit) granularity (instance %m)");
|
|
$finish;
|
|
end
|
|
|
|
if (AXIS_PCIE_DATA_WIDTH == 512) begin
|
|
if (AXIS_PCIE_CQ_USER_WIDTH != 183) begin
|
|
$error("Error: PCIe CQ tuser width must be 183 (instance %m)");
|
|
$finish;
|
|
end
|
|
end else begin
|
|
if (AXIS_PCIE_CQ_USER_WIDTH != 85 && AXIS_PCIE_CQ_USER_WIDTH != 88) begin
|
|
$error("Error: PCIe CQ tuser width must be 85 or 88 (instance %m)");
|
|
$finish;
|
|
end
|
|
end
|
|
|
|
if (AXI_DATA_WIDTH != AXIS_PCIE_DATA_WIDTH) begin
|
|
$error("Error: AXI interface width must match PCIe interface width (instance %m)");
|
|
$finish;
|
|
end
|
|
|
|
if (AXI_STRB_WIDTH * 8 != AXI_DATA_WIDTH) begin
|
|
$error("Error: AXI interface requires byte (8-bit) granularity (instance %m)");
|
|
$finish;
|
|
end
|
|
|
|
if (AXI_MAX_BURST_LEN < 1 || AXI_MAX_BURST_LEN > 256) begin
|
|
$error("Error: AXI_MAX_BURST_LEN must be between 1 and 256 (instance %m)");
|
|
$finish;
|
|
end
|
|
end
|
|
|
|
localparam [3:0]
|
|
REQ_MEM_READ = 4'b0000,
|
|
REQ_MEM_WRITE = 4'b0001,
|
|
REQ_IO_READ = 4'b0010,
|
|
REQ_IO_WRITE = 4'b0011,
|
|
REQ_MEM_FETCH_ADD = 4'b0100,
|
|
REQ_MEM_SWAP = 4'b0101,
|
|
REQ_MEM_CAS = 4'b0110,
|
|
REQ_MEM_READ_LOCKED = 4'b0111,
|
|
REQ_CFG_READ_0 = 4'b1000,
|
|
REQ_CFG_READ_1 = 4'b1001,
|
|
REQ_CFG_WRITE_0 = 4'b1010,
|
|
REQ_CFG_WRITE_1 = 4'b1011,
|
|
REQ_MSG = 4'b1100,
|
|
REQ_MSG_VENDOR = 4'b1101,
|
|
REQ_MSG_ATS = 4'b1110;
|
|
|
|
localparam [2:0]
|
|
CPL_STATUS_SC = 3'b000, // successful completion
|
|
CPL_STATUS_UR = 3'b001, // unsupported request
|
|
CPL_STATUS_CRS = 3'b010, // configuration request retry status
|
|
CPL_STATUS_CA = 3'b100; // completer abort
|
|
|
|
localparam [1:0]
|
|
STATE_IDLE = 2'd0,
|
|
STATE_HEADER = 3'd1,
|
|
STATE_TRANSFER = 2'd2,
|
|
STATE_WAIT_END = 2'd3;
|
|
|
|
reg [1:0] state_reg = STATE_IDLE, state_next;
|
|
|
|
// datapath control signals
|
|
reg transfer_in_save;
|
|
reg flush_save;
|
|
|
|
reg [AXI_ADDR_WIDTH-1:0] axi_addr_reg = {AXI_ADDR_WIDTH{1'b0}}, axi_addr_next;
|
|
reg [9:0] op_dword_count_reg = 10'd0, op_dword_count_next;
|
|
reg [9:0] tr_dword_count_reg = 10'd0, tr_dword_count_next;
|
|
reg [11:0] input_cycle_count_reg = 12'd0, input_cycle_count_next;
|
|
reg [11:0] output_cycle_count_reg = 12'd0, output_cycle_count_next;
|
|
reg input_active_reg = 1'b0, input_active_next;
|
|
reg bubble_cycle_reg = 1'b0, bubble_cycle_next;
|
|
reg first_cycle_reg = 1'b0, first_cycle_next;
|
|
reg last_cycle_reg = 1'b0, last_cycle_next;
|
|
|
|
reg [3:0] type_reg = 4'd0, type_next;
|
|
reg [3:0] first_be_reg = 4'd0, first_be_next;
|
|
reg [3:0] last_be_reg = 4'd0, last_be_next;
|
|
reg [OFFSET_WIDTH-1:0] offset_reg = {OFFSET_WIDTH{1'b0}}, offset_next;
|
|
reg [OFFSET_WIDTH-1:0] first_cycle_offset_reg = {OFFSET_WIDTH{1'b0}}, first_cycle_offset_next;
|
|
reg [OFFSET_WIDTH-1:0] last_cycle_offset_reg = {OFFSET_WIDTH{1'b0}}, last_cycle_offset_next;
|
|
|
|
reg s_axis_cq_tready_reg = 1'b0, s_axis_cq_tready_next;
|
|
|
|
reg [AXI_ADDR_WIDTH-1:0] m_axi_awaddr_reg = {AXI_ADDR_WIDTH{1'b0}}, m_axi_awaddr_next;
|
|
reg [7:0] m_axi_awlen_reg = 8'd0, m_axi_awlen_next;
|
|
reg m_axi_awvalid_reg = 1'b0, m_axi_awvalid_next;
|
|
|
|
reg [AXI_DATA_WIDTH-1:0] save_axis_tdata_reg = {AXI_DATA_WIDTH{1'b0}};
|
|
|
|
wire [AXI_DATA_WIDTH-1:0] shift_axis_tdata = {s_axis_cq_tdata, save_axis_tdata_reg} >> ((AXI_STRB_WIDTH/4-offset_reg)*32);
|
|
|
|
reg status_error_uncor_reg = 1'b0, status_error_uncor_next;
|
|
|
|
// internal datapath
|
|
reg [AXI_DATA_WIDTH-1:0] m_axi_wdata_int;
|
|
reg [AXI_STRB_WIDTH-1:0] m_axi_wstrb_int;
|
|
reg m_axi_wvalid_int;
|
|
reg m_axi_wready_int_reg = 1'b0;
|
|
reg m_axi_wlast_int;
|
|
wire m_axi_wready_int_early;
|
|
|
|
assign s_axis_cq_tready = s_axis_cq_tready_reg;
|
|
|
|
assign m_axi_awid = {AXI_ID_WIDTH{1'b0}};
|
|
assign m_axi_awaddr = m_axi_awaddr_reg;
|
|
assign m_axi_awlen = m_axi_awlen_reg;
|
|
assign m_axi_awsize = $clog2(AXI_STRB_WIDTH);
|
|
assign m_axi_awburst = 2'b01;
|
|
assign m_axi_awlock = 1'b0;
|
|
assign m_axi_awcache = 4'b0011;
|
|
assign m_axi_awprot = 3'b010;
|
|
assign m_axi_awvalid = m_axi_awvalid_reg;
|
|
|
|
assign m_axi_bready = 1'b1;
|
|
|
|
assign status_error_uncor = status_error_uncor_reg;
|
|
|
|
always @* begin
|
|
state_next = STATE_IDLE;
|
|
|
|
transfer_in_save = 1'b0;
|
|
|
|
s_axis_cq_tready_next = 1'b0;
|
|
|
|
type_next = type_reg;
|
|
axi_addr_next = axi_addr_reg;
|
|
op_dword_count_next = op_dword_count_reg;
|
|
tr_dword_count_next = tr_dword_count_reg;
|
|
input_cycle_count_next = input_cycle_count_reg;
|
|
output_cycle_count_next = output_cycle_count_reg;
|
|
input_active_next = input_active_reg;
|
|
bubble_cycle_next = bubble_cycle_reg;
|
|
first_cycle_next = first_cycle_reg;
|
|
last_cycle_next = last_cycle_reg;
|
|
first_be_next = first_be_reg;
|
|
last_be_next = last_be_reg;
|
|
offset_next = offset_reg;
|
|
first_cycle_offset_next = first_cycle_offset_reg;
|
|
last_cycle_offset_next = last_cycle_offset_reg;
|
|
|
|
m_axi_awaddr_next = m_axi_awaddr_reg;
|
|
m_axi_awlen_next = m_axi_awlen_reg;
|
|
m_axi_awvalid_next = m_axi_awvalid_reg && !m_axi_awready;
|
|
|
|
m_axi_wdata_int = shift_axis_tdata;
|
|
m_axi_wstrb_int = {AXI_STRB_WIDTH{1'b1}};
|
|
m_axi_wvalid_int = 1'b0;
|
|
m_axi_wlast_int = 1'b0;
|
|
|
|
status_error_uncor_next = 1'b0;
|
|
|
|
case (state_reg)
|
|
STATE_IDLE: begin
|
|
// idle state, wait for completion request
|
|
if (AXIS_PCIE_DATA_WIDTH > 64) begin
|
|
s_axis_cq_tready_next = m_axi_wready_int_early && (!m_axi_awvalid || m_axi_awready);
|
|
|
|
if (s_axis_cq_tready && s_axis_cq_tvalid) begin
|
|
transfer_in_save = 1'b1;
|
|
|
|
// header fields
|
|
axi_addr_next = {s_axis_cq_tdata[63:2], 2'b00};
|
|
op_dword_count_next = s_axis_cq_tdata[74:64];
|
|
type_next = s_axis_cq_tdata[78:75];
|
|
|
|
// tuser fields
|
|
if (AXIS_PCIE_DATA_WIDTH == 512) begin
|
|
first_be_next = s_axis_cq_tuser[3:0];
|
|
last_be_next = s_axis_cq_tuser[11:8];
|
|
end else begin
|
|
first_be_next = s_axis_cq_tuser[3:0];
|
|
last_be_next = s_axis_cq_tuser[7:4];
|
|
end
|
|
|
|
if (op_dword_count_next == 1) begin
|
|
// use first_be for both byte enables for single DWORD transfers
|
|
last_be_next = first_be_next;
|
|
end
|
|
|
|
if (op_dword_count_next <= AXI_MAX_BURST_SIZE/4) begin
|
|
// packet smaller than max burst size
|
|
// assumed to not cross 4k boundary, send one request
|
|
tr_dword_count_next = op_dword_count_next;
|
|
m_axi_awlen_next = (tr_dword_count_next + axi_addr_next[OFFSET_WIDTH+2-1:2] - 1) >> (AXI_BURST_SIZE-2);
|
|
end else begin
|
|
// packet larger than max burst size
|
|
// assumed to not cross 4k boundary, send one request
|
|
tr_dword_count_next = AXI_MAX_BURST_SIZE/4 - axi_addr_next[OFFSET_WIDTH+2-1:2];
|
|
m_axi_awlen_next = (tr_dword_count_next - 1) >> (AXI_BURST_SIZE-2);
|
|
end
|
|
|
|
m_axi_awaddr_next = axi_addr_next;
|
|
|
|
// required DWORD shift to place first DWORD from the TLP payload into proper position on AXI interface
|
|
// bubble cycle required if first TLP payload transfer does not fill first AXI transfer
|
|
if (AXIS_PCIE_DATA_WIDTH >= 256) begin
|
|
offset_next = axi_addr_next[OFFSET_WIDTH+2-1:2] - 4;
|
|
bubble_cycle_next = axi_addr_next[OFFSET_WIDTH+2-1:2] < 4;
|
|
end else begin
|
|
offset_next = axi_addr_next[OFFSET_WIDTH+2-1:2];
|
|
bubble_cycle_next = 1'b0;
|
|
end
|
|
first_cycle_offset_next = axi_addr_next[OFFSET_WIDTH+2-1:2];
|
|
first_cycle_next = 1'b1;
|
|
|
|
// number of bus transfers in TLP, DOWRD count plus payload start DWORD offset, divided by bus width in DWORDS
|
|
if (AXIS_PCIE_DATA_WIDTH >= 256) begin
|
|
input_cycle_count_next = (tr_dword_count_next + 4 - 1) >> (AXI_BURST_SIZE-2);
|
|
end else begin
|
|
input_cycle_count_next = (tr_dword_count_next - 1) >> (AXI_BURST_SIZE-2);
|
|
end
|
|
// number of bus transfers to AXI, DWORD count plus DWORD offset, divided by bus width in DWORDS
|
|
output_cycle_count_next = (tr_dword_count_next + axi_addr_next[OFFSET_WIDTH+2-1:2] - 1) >> (AXI_BURST_SIZE-2);
|
|
last_cycle_offset_next = axi_addr_next[OFFSET_WIDTH+2-1:2] + tr_dword_count_next;
|
|
last_cycle_next = output_cycle_count_next == 0;
|
|
input_active_next = 1'b1;
|
|
|
|
axi_addr_next = axi_addr_next + (tr_dword_count_next << 2);
|
|
op_dword_count_next = op_dword_count_next - tr_dword_count_next;
|
|
|
|
if (type_next == REQ_MEM_WRITE) begin
|
|
// write request
|
|
m_axi_awvalid_next = 1'b1;
|
|
if (AXIS_PCIE_DATA_WIDTH >= 256) begin
|
|
// some data is transferred with header
|
|
input_active_next = input_cycle_count_next > 0;
|
|
input_cycle_count_next = input_cycle_count_next - 1;
|
|
s_axis_cq_tready_next = 1'b0;
|
|
state_next = STATE_TRANSFER;
|
|
end else begin
|
|
s_axis_cq_tready_next = m_axi_wready_int_early;
|
|
state_next = STATE_TRANSFER;
|
|
end
|
|
end else begin
|
|
// invalid request
|
|
status_error_uncor_next = 1'b1;
|
|
if (s_axis_cq_tlast) begin
|
|
state_next = STATE_IDLE;
|
|
end else begin
|
|
s_axis_cq_tready_next = 1'b1;
|
|
state_next = STATE_WAIT_END;
|
|
end
|
|
end
|
|
end else begin
|
|
state_next = STATE_IDLE;
|
|
end
|
|
end else begin
|
|
s_axis_cq_tready_next = !m_axi_awvalid || m_axi_awready;
|
|
|
|
if (s_axis_cq_tready & s_axis_cq_tvalid) begin
|
|
// header fields
|
|
axi_addr_next = {s_axis_cq_tdata[63:2], 2'b00};
|
|
|
|
// tuser fields
|
|
first_be_next = s_axis_cq_tuser[3:0];
|
|
last_be_next = s_axis_cq_tuser[7:4];
|
|
|
|
state_next = STATE_HEADER;
|
|
end else begin
|
|
state_next = STATE_IDLE;
|
|
end
|
|
end
|
|
end
|
|
STATE_HEADER: begin
|
|
// header state, store rest of header (64 bit interface only)
|
|
s_axis_cq_tready_next = m_axi_wready_int_early;
|
|
|
|
if (s_axis_cq_tready && s_axis_cq_tvalid) begin
|
|
transfer_in_save = 1'b1;
|
|
|
|
// header fields
|
|
op_dword_count_next = s_axis_cq_tdata[10:0];
|
|
type_next = s_axis_cq_tdata[14:11];
|
|
|
|
if (op_dword_count_next == 1) begin
|
|
// use first_be for both byte enables for single DWORD transfers
|
|
last_be_next = first_be_reg;
|
|
end
|
|
|
|
if (op_dword_count_next <= AXI_MAX_BURST_SIZE/4) begin
|
|
// packet smaller than max burst size (only for 64 bits)
|
|
// assumed to not cross 4k boundary, send one request
|
|
tr_dword_count_next = op_dword_count_next;
|
|
end else begin
|
|
// packet larger than max burst size
|
|
// assumed to not cross 4k boundary, send one request
|
|
tr_dword_count_next = AXI_MAX_BURST_SIZE/4 - axi_addr_reg[OFFSET_WIDTH+2-1:2];
|
|
end
|
|
|
|
// required DWORD shift to place first DWORD from the TLP payload into proper position on AXI interface
|
|
// bubble cycle required if first TLP payload transfer does not fill first AXI transfer
|
|
offset_next = axi_addr_reg[OFFSET_WIDTH+2-1:2];
|
|
bubble_cycle_next = 1'b0;
|
|
first_cycle_offset_next = axi_addr_reg[OFFSET_WIDTH+2-1:2];
|
|
first_cycle_next = 1'b1;
|
|
|
|
// number of bus transfers in TLP, DOWRD count plus payload start DWORD offset, divided by bus width in DWORDS
|
|
input_cycle_count_next = (tr_dword_count_next - 1) >> (AXI_BURST_SIZE-2);
|
|
// number of bus transfers to AXI, DWORD count plus DWORD offset, divided by bus width in DWORDS
|
|
output_cycle_count_next = (tr_dword_count_next + axi_addr_reg[OFFSET_WIDTH+2-1:2] - 1) >> (AXI_BURST_SIZE-2);
|
|
last_cycle_offset_next = axi_addr_reg[OFFSET_WIDTH+2-1:2] + tr_dword_count_next;
|
|
last_cycle_next = output_cycle_count_next == 0;
|
|
input_active_next = 1'b1;
|
|
|
|
m_axi_awaddr_next = axi_addr_reg;
|
|
m_axi_awlen_next = output_cycle_count_next;
|
|
|
|
axi_addr_next = axi_addr_reg + (tr_dword_count_next << 2);
|
|
op_dword_count_next = op_dword_count_next - tr_dword_count_next;
|
|
|
|
if (type_next == REQ_MEM_WRITE) begin
|
|
// write request
|
|
m_axi_awvalid_next = 1'b1;
|
|
s_axis_cq_tready_next = m_axi_wready_int_early;
|
|
state_next = STATE_TRANSFER;
|
|
end else begin
|
|
// invalid request
|
|
status_error_uncor_next = 1'b1;
|
|
if (s_axis_cq_tlast) begin
|
|
state_next = STATE_IDLE;
|
|
end else begin
|
|
s_axis_cq_tready_next = 1'b1;
|
|
state_next = STATE_WAIT_END;
|
|
end
|
|
end
|
|
end else begin
|
|
state_next = STATE_HEADER;
|
|
end
|
|
end
|
|
STATE_TRANSFER: begin
|
|
// transfer state, transfer data
|
|
s_axis_cq_tready_next = m_axi_wready_int_early && input_active_reg && !(AXIS_PCIE_DATA_WIDTH >= 256 && first_cycle_reg && !bubble_cycle_reg);
|
|
|
|
if (m_axi_wready_int_reg && ((s_axis_cq_tready && s_axis_cq_tvalid) || !input_active_reg || (AXIS_PCIE_DATA_WIDTH >= 256 && first_cycle_reg && !bubble_cycle_reg))) begin
|
|
transfer_in_save = s_axis_cq_tready && s_axis_cq_tvalid;
|
|
|
|
// transfer data
|
|
if (AXIS_PCIE_DATA_WIDTH >= 256 && first_cycle_reg && !bubble_cycle_reg) begin
|
|
m_axi_wdata_int = {save_axis_tdata_reg, {AXIS_PCIE_DATA_WIDTH{1'b0}}} >> ((AXI_STRB_WIDTH/4-offset_reg)*32);
|
|
s_axis_cq_tready_next = m_axi_wready_int_early && input_active_reg;
|
|
end else begin
|
|
m_axi_wdata_int = shift_axis_tdata;
|
|
end
|
|
// generate strb signal
|
|
if (first_cycle_reg) begin
|
|
m_axi_wstrb_int = {{AXI_STRB_WIDTH-4{1'b1}}, first_be_reg} << (first_cycle_offset_reg*4);
|
|
end else begin
|
|
m_axi_wstrb_int = {AXI_STRB_WIDTH{1'b1}};
|
|
end
|
|
|
|
// update cycle counters
|
|
if (input_active_reg && !(AXIS_PCIE_DATA_WIDTH >= 256 && first_cycle_reg && !bubble_cycle_reg)) begin
|
|
input_cycle_count_next = input_cycle_count_reg - 1;
|
|
input_active_next = input_cycle_count_reg > 0;
|
|
end
|
|
output_cycle_count_next = output_cycle_count_reg - 1;
|
|
last_cycle_next = output_cycle_count_next == 0;
|
|
|
|
// modify strb signal at end of transfer
|
|
if (last_cycle_reg) begin
|
|
if (op_dword_count_reg == 0) begin
|
|
if (last_cycle_offset_reg > 0) begin
|
|
m_axi_wstrb_int = m_axi_wstrb_int & {last_be_reg, {AXI_STRB_WIDTH-4{1'b1}}} >> (AXI_STRB_WIDTH-last_cycle_offset_reg*4);
|
|
end else begin
|
|
m_axi_wstrb_int = m_axi_wstrb_int & {last_be_reg, {AXI_STRB_WIDTH-4{1'b1}}};
|
|
end
|
|
end
|
|
m_axi_wlast_int = 1'b1;
|
|
end
|
|
m_axi_wvalid_int = 1'b1;
|
|
first_cycle_next = 1'b0;
|
|
if (!last_cycle_reg) begin
|
|
s_axis_cq_tready_next = m_axi_wready_int_early && input_active_next;
|
|
state_next = STATE_TRANSFER;
|
|
end else if (op_dword_count_reg > 0) begin
|
|
// current transfer done, but operation not finished yet
|
|
if (op_dword_count_reg <= AXI_MAX_BURST_SIZE/4) begin
|
|
// packet smaller than max burst size
|
|
// assumed to not cross 4k boundary, send one request
|
|
tr_dword_count_next = op_dword_count_reg;
|
|
m_axi_awlen_next = (tr_dword_count_next + axi_addr_reg[OFFSET_WIDTH+2-1:2] - 1) >> (AXI_BURST_SIZE-2);
|
|
end else begin
|
|
// packet larger than max burst size
|
|
// assumed to not cross 4k boundary, send one request
|
|
tr_dword_count_next = AXI_MAX_BURST_SIZE/4 - axi_addr_reg[OFFSET_WIDTH+2-1:2];
|
|
m_axi_awlen_next = (tr_dword_count_next - 1) >> (AXI_BURST_SIZE-2);
|
|
end
|
|
|
|
m_axi_awaddr_next = axi_addr_reg;
|
|
|
|
// keep offset, no bubble cycles, not first cycle
|
|
bubble_cycle_next = 1'b0;
|
|
first_cycle_next = 1'b0;
|
|
|
|
// number of bus transfers in TLP, DOWRD count minus payload start DWORD offset, divided by bus width in DWORDS
|
|
input_cycle_count_next = (tr_dword_count_next - offset_reg - 1) >> (AXI_BURST_SIZE-2);
|
|
// number of bus transfers to AXI, DWORD count plus DWORD offset, divided by bus width in DWORDS
|
|
output_cycle_count_next = (tr_dword_count_next + axi_addr_reg[OFFSET_WIDTH+2-1:2] - 1) >> (AXI_BURST_SIZE-2);
|
|
last_cycle_offset_next = axi_addr_reg[OFFSET_WIDTH+2-1:2] + tr_dword_count_next;
|
|
last_cycle_next = output_cycle_count_next == 0;
|
|
input_active_next = tr_dword_count_next > offset_reg;
|
|
|
|
axi_addr_next = axi_addr_reg + (tr_dword_count_next << 2);
|
|
op_dword_count_next = op_dword_count_reg - tr_dword_count_next;
|
|
|
|
m_axi_awvalid_next = 1'b1;
|
|
s_axis_cq_tready_next = m_axi_wready_int_early && input_active_next;
|
|
state_next = STATE_TRANSFER;
|
|
end else begin
|
|
s_axis_cq_tready_next = m_axi_wready_int_early && (!m_axi_awvalid || m_axi_awready);
|
|
state_next = STATE_IDLE;
|
|
end
|
|
end else begin
|
|
state_next = STATE_TRANSFER;
|
|
end
|
|
end
|
|
STATE_WAIT_END: begin
|
|
// wait end state, wait for end of TLP
|
|
s_axis_cq_tready_next = 1'b1;
|
|
|
|
if (s_axis_cq_tready & s_axis_cq_tvalid) begin
|
|
if (s_axis_cq_tlast) begin
|
|
if (AXIS_PCIE_DATA_WIDTH > 64) begin
|
|
s_axis_cq_tready_next = m_axi_wready_int_early && (!m_axi_awvalid || m_axi_awready);
|
|
end else begin
|
|
s_axis_cq_tready_next = 1'b1;
|
|
end
|
|
state_next = STATE_IDLE;
|
|
end else begin
|
|
state_next = STATE_WAIT_END;
|
|
end
|
|
end else begin
|
|
state_next = STATE_WAIT_END;
|
|
end
|
|
end
|
|
endcase
|
|
end
|
|
|
|
always @(posedge clk) begin
|
|
if (rst) begin
|
|
state_reg <= STATE_IDLE;
|
|
s_axis_cq_tready_reg <= 1'b0;
|
|
|
|
m_axi_awvalid_reg <= 1'b0;
|
|
|
|
status_error_uncor_reg <= 1'b0;
|
|
end else begin
|
|
state_reg <= state_next;
|
|
s_axis_cq_tready_reg <= s_axis_cq_tready_next;
|
|
|
|
m_axi_awvalid_reg <= m_axi_awvalid_next;
|
|
|
|
status_error_uncor_reg <= status_error_uncor_next;
|
|
end
|
|
|
|
axi_addr_reg <= axi_addr_next;
|
|
op_dword_count_reg <= op_dword_count_next;
|
|
tr_dword_count_reg <= tr_dword_count_next;
|
|
input_cycle_count_reg <= input_cycle_count_next;
|
|
output_cycle_count_reg <= output_cycle_count_next;
|
|
input_active_reg <= input_active_next;
|
|
bubble_cycle_reg <= bubble_cycle_next;
|
|
first_cycle_reg <= first_cycle_next;
|
|
last_cycle_reg <= last_cycle_next;
|
|
|
|
type_reg <= type_next;
|
|
first_be_reg <= first_be_next;
|
|
last_be_reg <= last_be_next;
|
|
offset_reg <= offset_next;
|
|
first_cycle_offset_reg <= first_cycle_offset_next;
|
|
last_cycle_offset_reg <= last_cycle_offset_next;
|
|
|
|
m_axi_awaddr_reg <= m_axi_awaddr_next;
|
|
m_axi_awlen_reg <= m_axi_awlen_next;
|
|
|
|
if (transfer_in_save) begin
|
|
save_axis_tdata_reg <= s_axis_cq_tdata;
|
|
end
|
|
end
|
|
|
|
// output datapath logic (AXI write data)
|
|
reg [AXI_DATA_WIDTH-1:0] m_axi_wdata_reg = {AXI_DATA_WIDTH{1'b0}};
|
|
reg [AXI_STRB_WIDTH-1:0] m_axi_wstrb_reg = {AXI_STRB_WIDTH{1'b0}};
|
|
reg m_axi_wvalid_reg = 1'b0, m_axi_wvalid_next;
|
|
reg m_axi_wlast_reg = 1'b0;
|
|
|
|
reg [AXI_DATA_WIDTH-1:0] temp_m_axi_wdata_reg = {AXI_DATA_WIDTH{1'b0}};
|
|
reg [AXI_STRB_WIDTH-1:0] temp_m_axi_wstrb_reg = {AXI_STRB_WIDTH{1'b0}};
|
|
reg temp_m_axi_wvalid_reg = 1'b0, temp_m_axi_wvalid_next;
|
|
reg temp_m_axi_wlast_reg = 1'b0;
|
|
|
|
// datapath control
|
|
reg store_axi_w_int_to_output;
|
|
reg store_axi_w_int_to_temp;
|
|
reg store_axi_w_temp_to_output;
|
|
|
|
assign m_axi_wdata = m_axi_wdata_reg;
|
|
assign m_axi_wstrb = m_axi_wstrb_reg;
|
|
assign m_axi_wvalid = m_axi_wvalid_reg;
|
|
assign m_axi_wlast = m_axi_wlast_reg;
|
|
|
|
// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input)
|
|
assign m_axi_wready_int_early = m_axi_wready || (!temp_m_axi_wvalid_reg && (!m_axi_wvalid_reg || !m_axi_wvalid_int));
|
|
|
|
always @* begin
|
|
// transfer sink ready state to source
|
|
m_axi_wvalid_next = m_axi_wvalid_reg;
|
|
temp_m_axi_wvalid_next = temp_m_axi_wvalid_reg;
|
|
|
|
store_axi_w_int_to_output = 1'b0;
|
|
store_axi_w_int_to_temp = 1'b0;
|
|
store_axi_w_temp_to_output = 1'b0;
|
|
|
|
if (m_axi_wready_int_reg) begin
|
|
// input is ready
|
|
if (m_axi_wready || !m_axi_wvalid_reg) begin
|
|
// output is ready or currently not valid, transfer data to output
|
|
m_axi_wvalid_next = m_axi_wvalid_int;
|
|
store_axi_w_int_to_output = 1'b1;
|
|
end else begin
|
|
// output is not ready, store input in temp
|
|
temp_m_axi_wvalid_next = m_axi_wvalid_int;
|
|
store_axi_w_int_to_temp = 1'b1;
|
|
end
|
|
end else if (m_axi_wready) begin
|
|
// input is not ready, but output is ready
|
|
m_axi_wvalid_next = temp_m_axi_wvalid_reg;
|
|
temp_m_axi_wvalid_next = 1'b0;
|
|
store_axi_w_temp_to_output = 1'b1;
|
|
end
|
|
end
|
|
|
|
always @(posedge clk) begin
|
|
if (rst) begin
|
|
m_axi_wvalid_reg <= 1'b0;
|
|
m_axi_wready_int_reg <= 1'b0;
|
|
temp_m_axi_wvalid_reg <= 1'b0;
|
|
end else begin
|
|
m_axi_wvalid_reg <= m_axi_wvalid_next;
|
|
m_axi_wready_int_reg <= m_axi_wready_int_early;
|
|
temp_m_axi_wvalid_reg <= temp_m_axi_wvalid_next;
|
|
end
|
|
|
|
// datapath
|
|
if (store_axi_w_int_to_output) begin
|
|
m_axi_wdata_reg <= m_axi_wdata_int;
|
|
m_axi_wstrb_reg <= m_axi_wstrb_int;
|
|
m_axi_wlast_reg <= m_axi_wlast_int;
|
|
end else if (store_axi_w_temp_to_output) begin
|
|
m_axi_wdata_reg <= temp_m_axi_wdata_reg;
|
|
m_axi_wstrb_reg <= temp_m_axi_wstrb_reg;
|
|
m_axi_wlast_reg <= temp_m_axi_wlast_reg;
|
|
end
|
|
|
|
if (store_axi_w_int_to_temp) begin
|
|
temp_m_axi_wdata_reg <= m_axi_wdata_int;
|
|
temp_m_axi_wstrb_reg <= m_axi_wstrb_int;
|
|
temp_m_axi_wlast_reg <= m_axi_wlast_int;
|
|
end
|
|
end
|
|
|
|
endmodule
|
|
|
|
`resetall
|