diff --git a/rtl/axi_cdma.v b/rtl/axi_cdma.v index 0d0c9cc..06dad0f 100644 --- a/rtl/axi_cdma.v +++ b/rtl/axi_cdma.v @@ -129,6 +129,7 @@ parameter ADDR_MASK = {AXI_ADDR_WIDTH{1'b1}} << $clog2(AXI_STRB_WIDTH); parameter CYCLE_COUNT_WIDTH = LEN_WIDTH - AXI_BURST_SIZE + 1; parameter STATUS_FIFO_ADDR_WIDTH = 5; +parameter OUTPUT_FIFO_ADDR_WIDTH = 5; // bus width assertions initial begin @@ -258,8 +259,7 @@ reg [AXI_DATA_WIDTH-1:0] m_axi_wdata_int; reg [AXI_STRB_WIDTH-1:0] m_axi_wstrb_int; reg m_axi_wlast_int; reg m_axi_wvalid_int; -reg m_axi_wready_int_reg = 1'b0; -wire m_axi_wready_int_early; +wire m_axi_wready_int; assign s_axis_desc_ready = s_axis_desc_ready_reg; @@ -530,15 +530,15 @@ always @* begin m_axi_awlen_next = axi_cmd_output_cycle_count_reg; m_axi_awvalid_next = 1'b1; - m_axi_rready_next = m_axi_wready_int_early; + m_axi_rready_next = m_axi_wready_int; axi_state_next = AXI_STATE_WRITE; end end AXI_STATE_WRITE: begin // handle AXI read data - m_axi_rready_next = m_axi_wready_int_early && input_active_reg; + m_axi_rready_next = m_axi_wready_int && input_active_reg; - if (m_axi_wready_int_reg && ((m_axi_rready && m_axi_rvalid) || !input_active_reg)) begin + if ((m_axi_rready && m_axi_rvalid) || !input_active_reg) begin // transfer in AXI read data transfer_in_save = m_axi_rready && m_axi_rvalid; @@ -550,7 +550,7 @@ always @* begin bubble_cycle_next = 1'b0; first_input_cycle_next = 1'b0; - m_axi_rready_next = m_axi_wready_int_early && input_active_next; + m_axi_rready_next = m_axi_wready_int && input_active_next; axi_state_next = AXI_STATE_WRITE; end else begin // update counters @@ -741,80 +741,53 @@ end reg [AXI_DATA_WIDTH-1:0] m_axi_wdata_reg = {AXI_DATA_WIDTH{1'b0}}; reg [AXI_STRB_WIDTH-1:0] m_axi_wstrb_reg = {AXI_STRB_WIDTH{1'b0}}; reg m_axi_wlast_reg = 1'b0; -reg m_axi_wvalid_reg = 1'b0, m_axi_wvalid_next; +reg m_axi_wvalid_reg = 1'b0; -reg [AXI_DATA_WIDTH-1:0] temp_m_axi_wdata_reg = {AXI_DATA_WIDTH{1'b0}}; -reg [AXI_STRB_WIDTH-1:0] temp_m_axi_wstrb_reg = {AXI_STRB_WIDTH{1'b0}}; -reg temp_m_axi_wlast_reg = 1'b0; -reg temp_m_axi_wvalid_reg = 1'b0, temp_m_axi_wvalid_next; +reg [OUTPUT_FIFO_ADDR_WIDTH+1-1:0] out_fifo_wr_ptr_reg = 0; +reg [OUTPUT_FIFO_ADDR_WIDTH+1-1:0] out_fifo_rd_ptr_reg = 0; +reg out_fifo_half_full_reg = 1'b0; -// datapath control -reg store_axi_w_int_to_output; -reg store_axi_w_int_to_temp; -reg store_axi_w_temp_to_output; +wire out_fifo_full = out_fifo_wr_ptr_reg == (out_fifo_rd_ptr_reg ^ {1'b1, {OUTPUT_FIFO_ADDR_WIDTH{1'b0}}}); +wire out_fifo_empty = out_fifo_wr_ptr_reg == out_fifo_rd_ptr_reg; + +(* ram_style = "distributed" *) +reg [AXI_DATA_WIDTH-1:0] out_fifo_wdata[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; +(* ram_style = "distributed" *) +reg [AXI_STRB_WIDTH-1:0] out_fifo_wstrb[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; +(* ram_style = "distributed" *) +reg out_fifo_wlast[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; + +assign m_axi_wready_int = !out_fifo_half_full_reg; assign m_axi_wdata = m_axi_wdata_reg; assign m_axi_wstrb = m_axi_wstrb_reg; assign m_axi_wvalid = m_axi_wvalid_reg; assign m_axi_wlast = m_axi_wlast_reg; -// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) -assign m_axi_wready_int_early = m_axi_wready || (!temp_m_axi_wvalid_reg && (!m_axi_wvalid_reg || !m_axi_wvalid_int)); - -always @* begin - // transfer sink ready state to source - m_axi_wvalid_next = m_axi_wvalid_reg; - temp_m_axi_wvalid_next = temp_m_axi_wvalid_reg; - - store_axi_w_int_to_output = 1'b0; - store_axi_w_int_to_temp = 1'b0; - store_axi_w_temp_to_output = 1'b0; - - if (m_axi_wready_int_reg) begin - // input is ready - if (m_axi_wready || !m_axi_wvalid_reg) begin - // output is ready or currently not valid, transfer data to output - m_axi_wvalid_next = m_axi_wvalid_int; - store_axi_w_int_to_output = 1'b1; - end else begin - // output is not ready, store input in temp - temp_m_axi_wvalid_next = m_axi_wvalid_int; - store_axi_w_int_to_temp = 1'b1; - end - end else if (m_axi_wready) begin - // input is not ready, but output is ready - m_axi_wvalid_next = temp_m_axi_wvalid_reg; - temp_m_axi_wvalid_next = 1'b0; - store_axi_w_temp_to_output = 1'b1; - end -end - always @(posedge clk) begin + m_axi_wvalid_reg <= m_axi_wvalid_reg && !m_axi_wready; + + out_fifo_half_full_reg <= $unsigned(out_fifo_wr_ptr_reg - out_fifo_rd_ptr_reg) >= 2**(OUTPUT_FIFO_ADDR_WIDTH-1); + + if (!out_fifo_full && m_axi_wvalid_int) begin + out_fifo_wdata[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axi_wdata_int; + out_fifo_wstrb[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axi_wstrb_int; + out_fifo_wlast[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axi_wlast_int; + out_fifo_wr_ptr_reg <= out_fifo_wr_ptr_reg + 1; + end + + if (!out_fifo_empty && (!m_axi_wvalid_reg || m_axi_wready)) begin + m_axi_wdata_reg <= out_fifo_wdata[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + m_axi_wstrb_reg <= out_fifo_wstrb[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + m_axi_wlast_reg <= out_fifo_wlast[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + m_axi_wvalid_reg <= 1'b1; + out_fifo_rd_ptr_reg <= out_fifo_rd_ptr_reg + 1; + end + if (rst) begin + out_fifo_wr_ptr_reg <= 0; + out_fifo_rd_ptr_reg <= 0; m_axi_wvalid_reg <= 1'b0; - m_axi_wready_int_reg <= 1'b0; - temp_m_axi_wvalid_reg <= 1'b0; - end else begin - m_axi_wvalid_reg <= m_axi_wvalid_next; - m_axi_wready_int_reg <= m_axi_wready_int_early; - temp_m_axi_wvalid_reg <= temp_m_axi_wvalid_next; - end - - // datapath - if (store_axi_w_int_to_output) begin - m_axi_wdata_reg <= m_axi_wdata_int; - m_axi_wstrb_reg <= m_axi_wstrb_int; - m_axi_wlast_reg <= m_axi_wlast_int; - end else if (store_axi_w_temp_to_output) begin - m_axi_wdata_reg <= temp_m_axi_wdata_reg; - m_axi_wstrb_reg <= temp_m_axi_wstrb_reg; - m_axi_wlast_reg <= temp_m_axi_wlast_reg; - end - - if (store_axi_w_int_to_temp) begin - temp_m_axi_wdata_reg <= m_axi_wdata_int; - temp_m_axi_wstrb_reg <= m_axi_wstrb_int; - temp_m_axi_wlast_reg <= m_axi_wlast_int; end end diff --git a/rtl/axi_dma_rd.v b/rtl/axi_dma_rd.v index 49a978d..57f0543 100644 --- a/rtl/axi_dma_rd.v +++ b/rtl/axi_dma_rd.v @@ -146,6 +146,8 @@ parameter OFFSET_MASK = AXI_STRB_WIDTH > 1 ? {OFFSET_WIDTH{1'b1}} : 0; parameter ADDR_MASK = {AXI_ADDR_WIDTH{1'b1}} << $clog2(AXI_STRB_WIDTH); parameter CYCLE_COUNT_WIDTH = LEN_WIDTH - AXI_BURST_SIZE + 1; +parameter OUTPUT_FIFO_ADDR_WIDTH = 5; + // bus width assertions initial begin if (AXI_WORD_SIZE * AXI_STRB_WIDTH != AXI_DATA_WIDTH) begin @@ -269,12 +271,11 @@ wire [AXI_DATA_WIDTH-1:0] shift_axi_rdata = {m_axi_rdata, save_axi_rdata_reg} >> reg [AXIS_DATA_WIDTH-1:0] m_axis_read_data_tdata_int; reg [AXIS_KEEP_WIDTH-1:0] m_axis_read_data_tkeep_int; reg m_axis_read_data_tvalid_int; -reg m_axis_read_data_tready_int_reg = 1'b0; +wire m_axis_read_data_tready_int; reg m_axis_read_data_tlast_int; reg [AXIS_ID_WIDTH-1:0] m_axis_read_data_tid_int; reg [AXIS_DEST_WIDTH-1:0] m_axis_read_data_tdest_int; reg [AXIS_USER_WIDTH-1:0] m_axis_read_data_tuser_int; -wire m_axis_read_data_tready_int_early; assign s_axis_read_desc_ready = s_axis_read_desc_ready_reg; @@ -471,15 +472,15 @@ always @* begin if (axis_cmd_valid_reg) begin axis_cmd_ready = 1'b1; - m_axi_rready_next = m_axis_read_data_tready_int_early; + m_axi_rready_next = m_axis_read_data_tready_int; axis_state_next = AXIS_STATE_READ; end end AXIS_STATE_READ: begin // handle AXI read data - m_axi_rready_next = m_axis_read_data_tready_int_early && input_active_reg; + m_axi_rready_next = m_axis_read_data_tready_int && input_active_reg; - if (m_axis_read_data_tready_int_reg && ((m_axi_rready && m_axi_rvalid) || !input_active_reg)) begin + if ((m_axi_rready && m_axi_rvalid) || !input_active_reg) begin // transfer in AXI read data transfer_in_save = m_axi_rready && m_axi_rvalid; @@ -491,7 +492,7 @@ always @* begin bubble_cycle_next = 1'b0; first_cycle_next = 1'b0; - m_axi_rready_next = m_axis_read_data_tready_int_early && input_active_next; + m_axi_rready_next = m_axis_read_data_tready_int && input_active_next; axis_state_next = AXIS_STATE_READ; end else begin // update counters @@ -535,7 +536,7 @@ always @* begin axis_state_next = AXIS_STATE_IDLE; end else begin // more cycles in AXI transfer - m_axi_rready_next = m_axis_read_data_tready_int_early && input_active_next; + m_axi_rready_next = m_axis_read_data_tready_int && input_active_next; axis_state_next = AXIS_STATE_READ; end end @@ -615,24 +616,33 @@ end // output datapath logic reg [AXIS_DATA_WIDTH-1:0] m_axis_read_data_tdata_reg = {AXIS_DATA_WIDTH{1'b0}}; reg [AXIS_KEEP_WIDTH-1:0] m_axis_read_data_tkeep_reg = {AXIS_KEEP_WIDTH{1'b0}}; -reg m_axis_read_data_tvalid_reg = 1'b0, m_axis_read_data_tvalid_next; +reg m_axis_read_data_tvalid_reg = 1'b0; reg m_axis_read_data_tlast_reg = 1'b0; reg [AXIS_ID_WIDTH-1:0] m_axis_read_data_tid_reg = {AXIS_ID_WIDTH{1'b0}}; reg [AXIS_DEST_WIDTH-1:0] m_axis_read_data_tdest_reg = {AXIS_DEST_WIDTH{1'b0}}; reg [AXIS_USER_WIDTH-1:0] m_axis_read_data_tuser_reg = {AXIS_USER_WIDTH{1'b0}}; -reg [AXIS_DATA_WIDTH-1:0] temp_m_axis_read_data_tdata_reg = {AXIS_DATA_WIDTH{1'b0}}; -reg [AXIS_KEEP_WIDTH-1:0] temp_m_axis_read_data_tkeep_reg = {AXIS_KEEP_WIDTH{1'b0}}; -reg temp_m_axis_read_data_tvalid_reg = 1'b0, temp_m_axis_read_data_tvalid_next; -reg temp_m_axis_read_data_tlast_reg = 1'b0; -reg [AXIS_ID_WIDTH-1:0] temp_m_axis_read_data_tid_reg = {AXIS_ID_WIDTH{1'b0}}; -reg [AXIS_DEST_WIDTH-1:0] temp_m_axis_read_data_tdest_reg = {AXIS_DEST_WIDTH{1'b0}}; -reg [AXIS_USER_WIDTH-1:0] temp_m_axis_read_data_tuser_reg = {AXIS_USER_WIDTH{1'b0}}; +reg [OUTPUT_FIFO_ADDR_WIDTH+1-1:0] out_fifo_wr_ptr_reg = 0; +reg [OUTPUT_FIFO_ADDR_WIDTH+1-1:0] out_fifo_rd_ptr_reg = 0; +reg out_fifo_half_full_reg = 1'b0; -// datapath control -reg store_axis_int_to_output; -reg store_axis_int_to_temp; -reg store_axis_temp_to_output; +wire out_fifo_full = out_fifo_wr_ptr_reg == (out_fifo_rd_ptr_reg ^ {1'b1, {OUTPUT_FIFO_ADDR_WIDTH{1'b0}}}); +wire out_fifo_empty = out_fifo_wr_ptr_reg == out_fifo_rd_ptr_reg; + +(* ram_style = "distributed" *) +reg [AXIS_DATA_WIDTH-1:0] out_fifo_tdata[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; +(* ram_style = "distributed" *) +reg [AXIS_KEEP_WIDTH-1:0] out_fifo_tkeep[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; +(* ram_style = "distributed" *) +reg out_fifo_tlast[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; +(* ram_style = "distributed" *) +reg [AXIS_ID_WIDTH-1:0] out_fifo_tid[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; +(* ram_style = "distributed" *) +reg [AXIS_DEST_WIDTH-1:0] out_fifo_tdest[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; +(* ram_style = "distributed" *) +reg [AXIS_USER_WIDTH-1:0] out_fifo_tuser[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; + +assign m_axis_read_data_tready_int = !out_fifo_half_full_reg; assign m_axis_read_data_tdata = m_axis_read_data_tdata_reg; assign m_axis_read_data_tkeep = AXIS_KEEP_ENABLE ? m_axis_read_data_tkeep_reg : {AXIS_KEEP_WIDTH{1'b1}}; @@ -642,72 +652,36 @@ assign m_axis_read_data_tid = AXIS_ID_ENABLE ? m_axis_read_data_tid_reg : assign m_axis_read_data_tdest = AXIS_DEST_ENABLE ? m_axis_read_data_tdest_reg : {AXIS_DEST_WIDTH{1'b0}}; assign m_axis_read_data_tuser = AXIS_USER_ENABLE ? m_axis_read_data_tuser_reg : {AXIS_USER_WIDTH{1'b0}}; -// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) -assign m_axis_read_data_tready_int_early = m_axis_read_data_tready || (!temp_m_axis_read_data_tvalid_reg && (!m_axis_read_data_tvalid_reg || !m_axis_read_data_tvalid_int)); - -always @* begin - // transfer sink ready state to source - m_axis_read_data_tvalid_next = m_axis_read_data_tvalid_reg; - temp_m_axis_read_data_tvalid_next = temp_m_axis_read_data_tvalid_reg; - - store_axis_int_to_output = 1'b0; - store_axis_int_to_temp = 1'b0; - store_axis_temp_to_output = 1'b0; - - if (m_axis_read_data_tready_int_reg) begin - // input is ready - if (m_axis_read_data_tready || !m_axis_read_data_tvalid_reg) begin - // output is ready or currently not valid, transfer data to output - m_axis_read_data_tvalid_next = m_axis_read_data_tvalid_int; - store_axis_int_to_output = 1'b1; - end else begin - // output is not ready, store input in temp - temp_m_axis_read_data_tvalid_next = m_axis_read_data_tvalid_int; - store_axis_int_to_temp = 1'b1; - end - end else if (m_axis_read_data_tready) begin - // input is not ready, but output is ready - m_axis_read_data_tvalid_next = temp_m_axis_read_data_tvalid_reg; - temp_m_axis_read_data_tvalid_next = 1'b0; - store_axis_temp_to_output = 1'b1; - end -end - always @(posedge clk) begin + m_axis_read_data_tvalid_reg <= m_axis_read_data_tvalid_reg && !m_axis_read_data_tready; + + out_fifo_half_full_reg <= $unsigned(out_fifo_wr_ptr_reg - out_fifo_rd_ptr_reg) >= 2**(OUTPUT_FIFO_ADDR_WIDTH-1); + + if (!out_fifo_full && m_axis_read_data_tvalid_int) begin + out_fifo_tdata[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axis_read_data_tdata_int; + out_fifo_tkeep[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axis_read_data_tkeep_int; + out_fifo_tlast[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axis_read_data_tlast_int; + out_fifo_tid[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axis_read_data_tid_int; + out_fifo_tdest[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axis_read_data_tdest_int; + out_fifo_tuser[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axis_read_data_tuser_int; + out_fifo_wr_ptr_reg <= out_fifo_wr_ptr_reg + 1; + end + + if (!out_fifo_empty && (!m_axis_read_data_tvalid_reg || m_axis_read_data_tready)) begin + m_axis_read_data_tdata_reg <= out_fifo_tdata[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + m_axis_read_data_tkeep_reg <= out_fifo_tkeep[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + m_axis_read_data_tvalid_reg <= 1'b1; + m_axis_read_data_tlast_reg <= out_fifo_tlast[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + m_axis_read_data_tid_reg <= out_fifo_tid[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + m_axis_read_data_tdest_reg <= out_fifo_tdest[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + m_axis_read_data_tuser_reg <= out_fifo_tuser[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + out_fifo_rd_ptr_reg <= out_fifo_rd_ptr_reg + 1; + end + if (rst) begin + out_fifo_wr_ptr_reg <= 0; + out_fifo_rd_ptr_reg <= 0; m_axis_read_data_tvalid_reg <= 1'b0; - m_axis_read_data_tready_int_reg <= 1'b0; - temp_m_axis_read_data_tvalid_reg <= 1'b0; - end else begin - m_axis_read_data_tvalid_reg <= m_axis_read_data_tvalid_next; - m_axis_read_data_tready_int_reg <= m_axis_read_data_tready_int_early; - temp_m_axis_read_data_tvalid_reg <= temp_m_axis_read_data_tvalid_next; - end - - // datapath - if (store_axis_int_to_output) begin - m_axis_read_data_tdata_reg <= m_axis_read_data_tdata_int; - m_axis_read_data_tkeep_reg <= m_axis_read_data_tkeep_int; - m_axis_read_data_tlast_reg <= m_axis_read_data_tlast_int; - m_axis_read_data_tid_reg <= m_axis_read_data_tid_int; - m_axis_read_data_tdest_reg <= m_axis_read_data_tdest_int; - m_axis_read_data_tuser_reg <= m_axis_read_data_tuser_int; - end else if (store_axis_temp_to_output) begin - m_axis_read_data_tdata_reg <= temp_m_axis_read_data_tdata_reg; - m_axis_read_data_tkeep_reg <= temp_m_axis_read_data_tkeep_reg; - m_axis_read_data_tlast_reg <= temp_m_axis_read_data_tlast_reg; - m_axis_read_data_tid_reg <= temp_m_axis_read_data_tid_reg; - m_axis_read_data_tdest_reg <= temp_m_axis_read_data_tdest_reg; - m_axis_read_data_tuser_reg <= temp_m_axis_read_data_tuser_reg; - end - - if (store_axis_int_to_temp) begin - temp_m_axis_read_data_tdata_reg <= m_axis_read_data_tdata_int; - temp_m_axis_read_data_tkeep_reg <= m_axis_read_data_tkeep_int; - temp_m_axis_read_data_tlast_reg <= m_axis_read_data_tlast_int; - temp_m_axis_read_data_tid_reg <= m_axis_read_data_tid_int; - temp_m_axis_read_data_tdest_reg <= m_axis_read_data_tdest_int; - temp_m_axis_read_data_tuser_reg <= m_axis_read_data_tuser_int; end end diff --git a/rtl/axi_dma_wr.v b/rtl/axi_dma_wr.v index 2946300..55be381 100644 --- a/rtl/axi_dma_wr.v +++ b/rtl/axi_dma_wr.v @@ -152,6 +152,7 @@ parameter ADDR_MASK = {AXI_ADDR_WIDTH{1'b1}} << $clog2(AXI_STRB_WIDTH); parameter CYCLE_COUNT_WIDTH = LEN_WIDTH - AXI_BURST_SIZE + 1; parameter STATUS_FIFO_ADDR_WIDTH = 5; +parameter OUTPUT_FIFO_ADDR_WIDTH = 5; // bus width assertions initial begin @@ -303,8 +304,7 @@ reg [AXI_DATA_WIDTH-1:0] m_axi_wdata_int; reg [AXI_STRB_WIDTH-1:0] m_axi_wstrb_int; reg m_axi_wlast_int; reg m_axi_wvalid_int; -reg m_axi_wready_int_reg = 1'b0; -wire m_axi_wready_int_early; +wire m_axi_wready_int; assign s_axis_write_desc_ready = s_axis_write_desc_ready_reg; @@ -510,7 +510,7 @@ always @* begin addr_next = addr_reg + tr_word_count_next; op_word_count_next = op_word_count_reg - tr_word_count_next; - s_axis_write_data_tready_next = m_axi_wready_int_early && input_active_next; + s_axis_write_data_tready_next = m_axi_wready_int && input_active_next; inc_active = 1'b1; @@ -523,9 +523,9 @@ always @* begin end end STATE_WRITE: begin - s_axis_write_data_tready_next = m_axi_wready_int_early && (last_transfer_reg || input_active_reg) && shift_axis_input_tready; + s_axis_write_data_tready_next = m_axi_wready_int && (last_transfer_reg || input_active_reg) && shift_axis_input_tready; - if (m_axi_wready_int_reg && ((s_axis_write_data_tready && shift_axis_tvalid) || (!input_active_reg && !last_transfer_reg) || !shift_axis_input_tready)) begin + if ((s_axis_write_data_tready && shift_axis_tvalid) || (!input_active_reg && !last_transfer_reg) || !shift_axis_input_tready) begin if (s_axis_write_data_tready && s_axis_write_data_tvalid) begin transfer_in_save = 1'b1; @@ -689,7 +689,7 @@ always @* begin end end end else begin - s_axis_write_data_tready_next = m_axi_wready_int_early && (last_transfer_reg || input_active_next) && shift_axis_input_tready; + s_axis_write_data_tready_next = m_axi_wready_int && (last_transfer_reg || input_active_next) && shift_axis_input_tready; state_next = STATE_WRITE; end end else begin @@ -699,7 +699,7 @@ always @* begin STATE_FINISH_BURST: begin // finish current AXI burst - if (m_axi_wready_int_reg) begin + if (m_axi_wready_int) begin // update counters if (input_active_reg) begin input_cycle_count_next = input_cycle_count_reg - 1; @@ -885,80 +885,53 @@ end reg [AXI_DATA_WIDTH-1:0] m_axi_wdata_reg = {AXI_DATA_WIDTH{1'b0}}; reg [AXI_STRB_WIDTH-1:0] m_axi_wstrb_reg = {AXI_STRB_WIDTH{1'b0}}; reg m_axi_wlast_reg = 1'b0; -reg m_axi_wvalid_reg = 1'b0, m_axi_wvalid_next; +reg m_axi_wvalid_reg = 1'b0; -reg [AXI_DATA_WIDTH-1:0] temp_m_axi_wdata_reg = {AXI_DATA_WIDTH{1'b0}}; -reg [AXI_STRB_WIDTH-1:0] temp_m_axi_wstrb_reg = {AXI_STRB_WIDTH{1'b0}}; -reg temp_m_axi_wlast_reg = 1'b0; -reg temp_m_axi_wvalid_reg = 1'b0, temp_m_axi_wvalid_next; +reg [OUTPUT_FIFO_ADDR_WIDTH+1-1:0] out_fifo_wr_ptr_reg = 0; +reg [OUTPUT_FIFO_ADDR_WIDTH+1-1:0] out_fifo_rd_ptr_reg = 0; +reg out_fifo_half_full_reg = 1'b0; -// datapath control -reg store_axi_w_int_to_output; -reg store_axi_w_int_to_temp; -reg store_axi_w_temp_to_output; +wire out_fifo_full = out_fifo_wr_ptr_reg == (out_fifo_rd_ptr_reg ^ {1'b1, {OUTPUT_FIFO_ADDR_WIDTH{1'b0}}}); +wire out_fifo_empty = out_fifo_wr_ptr_reg == out_fifo_rd_ptr_reg; + +(* ram_style = "distributed" *) +reg [AXI_DATA_WIDTH-1:0] out_fifo_wdata[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; +(* ram_style = "distributed" *) +reg [AXI_STRB_WIDTH-1:0] out_fifo_wstrb[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; +(* ram_style = "distributed" *) +reg out_fifo_wlast[2**OUTPUT_FIFO_ADDR_WIDTH-1:0]; + +assign m_axi_wready_int = !out_fifo_half_full_reg; assign m_axi_wdata = m_axi_wdata_reg; assign m_axi_wstrb = m_axi_wstrb_reg; assign m_axi_wvalid = m_axi_wvalid_reg; assign m_axi_wlast = m_axi_wlast_reg; -// enable ready input next cycle if output is ready or the temp reg will not be filled on the next cycle (output reg empty or no input) -assign m_axi_wready_int_early = m_axi_wready || (!temp_m_axi_wvalid_reg && (!m_axi_wvalid_reg || !m_axi_wvalid_int)); - -always @* begin - // transfer sink ready state to source - m_axi_wvalid_next = m_axi_wvalid_reg; - temp_m_axi_wvalid_next = temp_m_axi_wvalid_reg; - - store_axi_w_int_to_output = 1'b0; - store_axi_w_int_to_temp = 1'b0; - store_axi_w_temp_to_output = 1'b0; - - if (m_axi_wready_int_reg) begin - // input is ready - if (m_axi_wready || !m_axi_wvalid_reg) begin - // output is ready or currently not valid, transfer data to output - m_axi_wvalid_next = m_axi_wvalid_int; - store_axi_w_int_to_output = 1'b1; - end else begin - // output is not ready, store input in temp - temp_m_axi_wvalid_next = m_axi_wvalid_int; - store_axi_w_int_to_temp = 1'b1; - end - end else if (m_axi_wready) begin - // input is not ready, but output is ready - m_axi_wvalid_next = temp_m_axi_wvalid_reg; - temp_m_axi_wvalid_next = 1'b0; - store_axi_w_temp_to_output = 1'b1; - end -end - always @(posedge clk) begin + m_axi_wvalid_reg <= m_axi_wvalid_reg && !m_axi_wready; + + out_fifo_half_full_reg <= $unsigned(out_fifo_wr_ptr_reg - out_fifo_rd_ptr_reg) >= 2**(OUTPUT_FIFO_ADDR_WIDTH-1); + + if (!out_fifo_full && m_axi_wvalid_int) begin + out_fifo_wdata[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axi_wdata_int; + out_fifo_wstrb[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axi_wstrb_int; + out_fifo_wlast[out_fifo_wr_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]] <= m_axi_wlast_int; + out_fifo_wr_ptr_reg <= out_fifo_wr_ptr_reg + 1; + end + + if (!out_fifo_empty && (!m_axi_wvalid_reg || m_axi_wready)) begin + m_axi_wdata_reg <= out_fifo_wdata[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + m_axi_wstrb_reg <= out_fifo_wstrb[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + m_axi_wlast_reg <= out_fifo_wlast[out_fifo_rd_ptr_reg[OUTPUT_FIFO_ADDR_WIDTH-1:0]]; + m_axi_wvalid_reg <= 1'b1; + out_fifo_rd_ptr_reg <= out_fifo_rd_ptr_reg + 1; + end + if (rst) begin + out_fifo_wr_ptr_reg <= 0; + out_fifo_rd_ptr_reg <= 0; m_axi_wvalid_reg <= 1'b0; - m_axi_wready_int_reg <= 1'b0; - temp_m_axi_wvalid_reg <= 1'b0; - end else begin - m_axi_wvalid_reg <= m_axi_wvalid_next; - m_axi_wready_int_reg <= m_axi_wready_int_early; - temp_m_axi_wvalid_reg <= temp_m_axi_wvalid_next; - end - - // datapath - if (store_axi_w_int_to_output) begin - m_axi_wdata_reg <= m_axi_wdata_int; - m_axi_wstrb_reg <= m_axi_wstrb_int; - m_axi_wlast_reg <= m_axi_wlast_int; - end else if (store_axi_w_temp_to_output) begin - m_axi_wdata_reg <= temp_m_axi_wdata_reg; - m_axi_wstrb_reg <= temp_m_axi_wstrb_reg; - m_axi_wlast_reg <= temp_m_axi_wlast_reg; - end - - if (store_axi_w_int_to_temp) begin - temp_m_axi_wdata_reg <= m_axi_wdata_int; - temp_m_axi_wstrb_reg <= m_axi_wstrb_int; - temp_m_axi_wlast_reg <= m_axi_wlast_int; end end