1
0
mirror of https://github.com/corundum/corundum.git synced 2025-01-30 08:32:52 +08:00

fpga/common: Rework stats counter to use pipeline and infer URAM

Signed-off-by: Alex Forencich <alex@alexforencich.com>
This commit is contained in:
Alex Forencich 2022-12-01 17:15:56 -08:00
parent bee1703199
commit 6d4373ec97
3 changed files with 161 additions and 161 deletions

View File

@ -44,7 +44,9 @@ module stats_counter #
// Width of AXI lite address bus in bits
parameter AXIL_ADDR_WIDTH = STAT_ID_WIDTH+$clog2(((AXIL_DATA_WIDTH > STAT_COUNT_WIDTH ? AXIL_DATA_WIDTH : STAT_COUNT_WIDTH)+7)/8),
// Width of AXI lite wstrb (width of data bus in words)
parameter AXIL_STRB_WIDTH = (AXIL_DATA_WIDTH/8)
parameter AXIL_STRB_WIDTH = (AXIL_DATA_WIDTH/8),
// Pipeline length
parameter PIPELINE = 2
)
(
input wire clk,
@ -86,7 +88,7 @@ parameter ID_SHIFT = $clog2(((AXIL_DATA_WIDTH > STAT_COUNT_WIDTH ? AXIL_DATA_WID
parameter WORD_SELECT_SHIFT = $clog2(AXIL_DATA_WIDTH/8);
parameter WORD_SELECT_WIDTH = STAT_COUNT_WIDTH > AXIL_DATA_WIDTH ? $clog2((STAT_COUNT_WIDTH+7)/8) - $clog2(AXIL_DATA_WIDTH/8) : 0;
// bus width assertions
// check configuration
initial begin
if (AXIL_STRB_WIDTH * 8 != AXIL_DATA_WIDTH) begin
$error("Error: AXI lite interface requires byte (8-bit) granularity (instance %m)");
@ -97,45 +99,44 @@ initial begin
$error("Error: AXI lite address width too narrow (instance %m)");
$finish;
end
if (PIPELINE < 2) begin
$error("Error: PIPELINE must be at least 2 (instance %m)");
$finish;
end
end
localparam [1:0]
STATE_INIT = 2'd0,
STATE_IDLE = 2'd1,
STATE_READ = 2'd2,
STATE_WRITE = 2'd3;
reg init_reg = 1'b1, init_next;
reg [STAT_ID_WIDTH-1:0] init_ptr_reg = 0, init_ptr_next;
reg [1:0] state_reg = STATE_INIT, state_next;
reg op_acc_pipe_hazard;
reg stage_active;
reg [PIPELINE-1:0] op_axil_read_pipe_reg = 0, op_axil_read_pipe_next;
reg [PIPELINE-1:0] op_acc_pipe_reg = 0, op_acc_pipe_next;
reg [STAT_ID_WIDTH-1:0] mem_addr_pipeline_reg[PIPELINE-1:0], mem_addr_pipeline_next[PIPELINE-1:0];
reg [WORD_SELECT_WIDTH-1:0] axil_shift_pipeline_reg[PIPELINE-1:0], axil_shift_pipeline_next[PIPELINE-1:0];
reg [STAT_INC_WIDTH-1:0] stat_inc_pipeline_reg[PIPELINE-1:0], stat_inc_pipeline_next[PIPELINE-1:0];
reg s_axis_stat_tready_reg = 1'b0, s_axis_stat_tready_next;
reg s_axil_awready_reg = 1'b0, s_axil_awready_next;
reg s_axil_wready_reg = 1'b0, s_axil_wready_next;
reg s_axil_bvalid_reg = 1'b0, s_axil_bvalid_next;
reg s_axil_arready_reg = 1'b0, s_axil_arready_next;
reg [AXIL_DATA_WIDTH-1:0] s_axil_rdata_reg = {AXIL_DATA_WIDTH{1'b0}}, s_axil_rdata_next;
reg s_axil_rvalid_reg = 1'b0, s_axil_rvalid_next;
reg [STAT_ID_WIDTH-1:0] id_reg = {STAT_ID_WIDTH{1'b0}}, id_next;
reg [STAT_INC_WIDTH-1:0] inc_reg = {STAT_INC_WIDTH{1'b0}}, inc_next;
reg rd_data_valid_reg = 1'b0, rd_data_valid_next;
reg [WORD_SELECT_WIDTH-1:0] rd_data_shift_reg = 0, rd_data_shift_next;
reg s_axil_awready_reg = 0, s_axil_awready_next;
reg s_axil_wready_reg = 0, s_axil_wready_next;
reg s_axil_bvalid_reg = 0, s_axil_bvalid_next;
reg s_axil_arready_reg = 0, s_axil_arready_next;
reg [AXIL_DATA_WIDTH-1:0] s_axil_rdata_reg = 0, s_axil_rdata_next;
reg s_axil_rvalid_reg = 0, s_axil_rvalid_next;
(* ramstyle = "no_rw_check" *)
reg [STAT_COUNT_WIDTH-1:0] mem_reg[(2**STAT_ID_WIDTH)-1:0];
reg [STAT_COUNT_WIDTH-1:0] mem[2**STAT_ID_WIDTH-1:0];
reg [STAT_COUNT_WIDTH-1:0] mem_rd_data_reg = {STAT_COUNT_WIDTH{1'b0}};
reg [STAT_COUNT_WIDTH-1:0] mem_rd_data_axil_reg = {STAT_COUNT_WIDTH{1'b0}};
reg mem_rd_en;
reg mem_wr_en;
reg [STAT_ID_WIDTH-1:0] mem_rd_addr;
reg [STAT_ID_WIDTH-1:0] mem_wr_addr;
reg [STAT_COUNT_WIDTH-1:0] mem_wr_data;
reg mem_rd_en_axil;
wire [STAT_ID_WIDTH-1:0] s_axil_araddr_id = s_axil_araddr >> ID_SHIFT;
wire [WORD_SELECT_WIDTH-1:0] s_axil_araddr_word = s_axil_araddr >> WORD_SELECT_SHIFT;
reg mem_wr_en;
reg [STAT_COUNT_WIDTH-1:0] mem_read_data_reg = 0;
reg [STAT_COUNT_WIDTH-1:0] mem_read_data_pipeline_reg[PIPELINE-1:1];
assign s_axis_stat_tready = s_axis_stat_tready_reg;
@ -148,169 +149,168 @@ assign s_axil_rdata = s_axil_rdata_reg;
assign s_axil_rresp = 2'b00;
assign s_axil_rvalid = s_axil_rvalid_reg;
wire [STAT_ID_WIDTH-1:0] s_axil_araddr_id = s_axil_araddr >> ID_SHIFT;
wire [WORD_SELECT_WIDTH-1:0] s_axil_araddr_shift = s_axil_araddr >> WORD_SELECT_SHIFT;
integer i, j;
initial begin
// two nested loops for smaller number of iterations per loop
// workaround for synthesizer complaints about large loop counts
// break up loop to work around iteration termination
for (i = 0; i < 2**STAT_ID_WIDTH; i = i + 2**(STAT_ID_WIDTH/2)) begin
for (j = i; j < i + 2**(STAT_ID_WIDTH/2); j = j + 1) begin
mem_reg[j] = 0;
end
mem[j] = 0;
end
end
for (i = 0; i < PIPELINE; i = i + 1) begin
mem_addr_pipeline_reg[i] = 0;
axil_shift_pipeline_reg[i] = 0;
stat_inc_pipeline_reg[i] = 0;
end
end
// accumulate
always @* begin
state_next = STATE_IDLE;
init_next = init_reg;
init_ptr_next = init_ptr_reg;
op_axil_read_pipe_next = {op_axil_read_pipe_reg, 1'b0};
op_acc_pipe_next = {op_acc_pipe_reg, 1'b0};
mem_addr_pipeline_next[0] = 0;
axil_shift_pipeline_next[0] = 0;
stat_inc_pipeline_next[0] = 0;
for (j = 1; j < PIPELINE; j = j + 1) begin
mem_addr_pipeline_next[j] = mem_addr_pipeline_reg[j-1];
axil_shift_pipeline_next[j] = axil_shift_pipeline_reg[j-1];
stat_inc_pipeline_next[j] = stat_inc_pipeline_reg[j-1];
end
s_axis_stat_tready_next = 1'b0;
id_next = id_reg;
inc_next = inc_reg;
mem_rd_en = 1'b0;
mem_wr_en = 1'b0;
mem_wr_data = mem_rd_data_reg + inc_reg;
case (state_reg)
STATE_INIT: begin
id_next = id_reg + 1;
mem_wr_en = 1'b1;
mem_wr_data = 0;
if (id_reg == {STAT_ID_WIDTH{1'b1}}) begin
state_next = STATE_IDLE;
end else begin
state_next = STATE_INIT;
end
end
STATE_IDLE: begin
s_axis_stat_tready_next = 1'b1;
if (s_axis_stat_tvalid && s_axis_stat_tready) begin
inc_next = s_axis_stat_tdata;
id_next = s_axis_stat_tid;
s_axis_stat_tready_next = 1'b0;
state_next = STATE_READ;
end else begin
state_next = STATE_IDLE;
end
end
STATE_READ: begin
s_axis_stat_tready_next = 1'b1;
mem_rd_en = 1'b1;
state_next = STATE_WRITE;
end
STATE_WRITE: begin
s_axis_stat_tready_next = 1'b1;
mem_wr_en = 1'b1;
mem_wr_data = mem_rd_data_reg + inc_reg;
if (s_axis_stat_tvalid && s_axis_stat_tready) begin
inc_next = s_axis_stat_tdata;
id_next = s_axis_stat_tid;
s_axis_stat_tready_next = 1'b0;
state_next = STATE_READ;
end else begin
state_next = STATE_IDLE;
end
end
endcase
end
always @(posedge clk) begin
state_reg <= state_next;
s_axis_stat_tready_reg <= s_axis_stat_tready_next;
id_reg <= id_next;
inc_reg <= inc_next;
if (mem_wr_en) begin
mem_reg[id_reg] <= mem_wr_data;
end else if (mem_rd_en) begin
mem_rd_data_reg <= mem_reg[id_reg];
end
if (rst) begin
state_reg <= STATE_INIT;
s_axis_stat_tready_reg <= 1'b0;
id_reg <= {STAT_ID_WIDTH{1'b0}};
end
end
// register interface
always @* begin
s_axil_awready_next = 1'b0;
s_axil_wready_next = 1'b0;
s_axil_bvalid_next = s_axil_bvalid_reg && !s_axil_bready;
s_axil_arready_next = 1'b0;
s_axil_rdata_next = s_axil_rdata_reg;
s_axil_rvalid_next = s_axil_rvalid_reg && !s_axil_rready;
mem_rd_addr = 0;
mem_wr_addr = mem_addr_pipeline_reg[PIPELINE-1];
mem_wr_data = mem_read_data_pipeline_reg[PIPELINE-1] + stat_inc_pipeline_reg[PIPELINE-1];
mem_wr_en = 0;
op_acc_pipe_hazard = 1'b0;
stage_active = 1'b0;
for (j = 0; j < PIPELINE; j = j + 1) begin
stage_active = op_axil_read_pipe_reg[j] || op_acc_pipe_reg[j];
op_acc_pipe_hazard = op_acc_pipe_hazard || (stage_active && mem_addr_pipeline_reg[j] == s_axis_stat_tid);
end
// discard writes
if (s_axil_awvalid && s_axil_wvalid && (!s_axil_bvalid || s_axil_bready) && (!s_axil_awready && !s_axil_wready)) begin
s_axil_awready_next = 1'b1;
s_axil_wready_next = 1'b1;
s_axil_bvalid_next = 1'b1;
end
// pipeline stage 0 - accept request
if (init_reg) begin
init_ptr_next = init_ptr_reg + 1;
mem_wr_addr = init_ptr_reg;
mem_wr_data = 0;
mem_wr_en = 1'b1;
if (&init_ptr_reg) begin
init_next = 1'b0;
end
end else if (s_axil_arvalid && (!s_axil_rvalid || s_axil_rready) && !op_axil_read_pipe_reg) begin
// AXIL read
op_axil_read_pipe_next[0] = 1'b1;
s_axil_arready_next = 1'b1;
mem_rd_addr = s_axil_araddr_id;
mem_addr_pipeline_next[0] = s_axil_araddr_id;
axil_shift_pipeline_next[0] = s_axil_araddr_shift;
end else if (s_axis_stat_tvalid && !s_axis_stat_tready && !op_acc_pipe_hazard) begin
// accumulate
op_acc_pipe_next[0] = 1'b1;
s_axis_stat_tready_next = 1'b1;
stat_inc_pipeline_next[0] = s_axis_stat_tdata;
mem_rd_addr = s_axis_stat_tid;
mem_addr_pipeline_next[0] = s_axis_stat_tid;
end
// read complete, perform operation
if (op_acc_pipe_reg[PIPELINE-1]) begin
// accumulate
mem_wr_addr = mem_addr_pipeline_reg[PIPELINE-1];
mem_wr_data = mem_read_data_pipeline_reg[PIPELINE-1] + stat_inc_pipeline_reg[PIPELINE-1];
mem_wr_en = 1'b1;
end else if (op_axil_read_pipe_reg[PIPELINE-1]) begin
// AXIL read
s_axil_rvalid_next = 1'b1;
s_axil_rdata_next = 0;
if (STAT_COUNT_WIDTH > AXIL_DATA_WIDTH) begin
s_axil_rdata_next = mem_read_data_pipeline_reg[PIPELINE-1] >> axil_shift_pipeline_reg[PIPELINE-1]*AXIL_DATA_WIDTH;
end else begin
s_axil_rdata_next = mem_read_data_pipeline_reg[PIPELINE-1];
end
end
end
always @(posedge clk) begin
init_reg <= init_next;
init_ptr_reg <= init_ptr_next;
op_axil_read_pipe_reg <= op_axil_read_pipe_next;
op_acc_pipe_reg <= op_acc_pipe_next;
s_axis_stat_tready_reg <= s_axis_stat_tready_next;
s_axil_awready_reg <= s_axil_awready_next;
s_axil_wready_reg <= s_axil_wready_next;
s_axil_bvalid_reg <= s_axil_bvalid_next;
s_axil_arready_reg <= s_axil_arready_next;
s_axil_rdata_reg <= s_axil_rdata_next;
s_axil_rvalid_reg <= s_axil_rvalid_next;
for (i = 0; i < PIPELINE; i = i + 1) begin
mem_addr_pipeline_reg[i] <= mem_addr_pipeline_next[i];
axil_shift_pipeline_reg[i] <= axil_shift_pipeline_next[i];
stat_inc_pipeline_reg[i] <= stat_inc_pipeline_next[i];
end
if (mem_wr_en) begin
mem[mem_wr_addr] <= mem_wr_data;
end
mem_read_data_reg <= mem[mem_rd_addr];
mem_read_data_pipeline_reg[1] <= mem_read_data_reg;
for (i = 2; i < PIPELINE; i = i + 1) begin
mem_read_data_pipeline_reg[i] <= mem_read_data_pipeline_reg[i-1];
end
if (rst) begin
init_reg <= 1'b1;
init_ptr_reg <= 0;
op_axil_read_pipe_reg <= 0;
op_acc_pipe_reg <= 0;
s_axis_stat_tready_reg <= 1'b0;
s_axil_awready_reg <= 1'b0;
s_axil_wready_reg <= 1'b0;
s_axil_bvalid_reg <= 1'b0;
end
end
always @* begin
s_axil_arready_next = 1'b0;
s_axil_rvalid_next = s_axil_rvalid_reg && !s_axil_rready;
s_axil_rdata_next = s_axil_rdata_reg;
rd_data_valid_next = rd_data_valid_reg;
rd_data_shift_next = rd_data_shift_reg;
mem_rd_en_axil = 1'b0;
if (rd_data_valid_reg && (!s_axil_rvalid || s_axil_rready)) begin
s_axil_rvalid_next = 1'b1;
rd_data_valid_next = 1'b0;
if (STAT_COUNT_WIDTH > AXIL_DATA_WIDTH) begin
s_axil_rdata_next = mem_rd_data_axil_reg >> rd_data_shift_reg*AXIL_DATA_WIDTH;
end else begin
s_axil_rdata_next = mem_rd_data_axil_reg;
end
end
if (s_axil_arvalid && (!s_axil_rvalid || s_axil_rready || !rd_data_valid_reg) && !s_axil_arready) begin
s_axil_arready_next = 1'b1;
rd_data_valid_next = 1'b1;
rd_data_shift_next = s_axil_araddr_word;
mem_rd_en_axil = 1'b1;
end
end
always @(posedge clk) begin
s_axil_arready_reg <= s_axil_arready_next;
s_axil_rvalid_reg <= s_axil_rvalid_next;
s_axil_rdata_reg <= s_axil_rdata_next;
rd_data_valid_reg <= rd_data_valid_next;
rd_data_shift_reg <= rd_data_shift_next;
if (mem_rd_en_axil) begin
mem_rd_data_axil_reg <= mem_reg[s_axil_araddr_id];
end
if (rst) begin
s_axil_arready_reg <= 1'b0;
s_axil_rvalid_reg <= 1'b0;
rd_data_valid_reg <= 1'b0;
end
end

View File

@ -38,7 +38,7 @@ export PARAM_STAT_COUNT_WIDTH ?= 32
export PARAM_AXIL_DATA_WIDTH ?= 32
export PARAM_AXIL_ADDR_WIDTH ?= $(shell python -c "print($(PARAM_STAT_ID_WIDTH) + (($(PARAM_STAT_COUNT_WIDTH)+7)//8-1).bit_length())")
export PARAM_AXIL_STRB_WIDTH ?= $(shell expr $(PARAM_AXIL_DATA_WIDTH) / 8 )
export PARAM_PIPELINE ?= 0
export PARAM_PIPELINE ?= 2
ifeq ($(SIM), icarus)
PLUSARGS += -fst

View File

@ -211,7 +211,7 @@ def test_stats_counter(request, stat_count_width):
parameters['AXIL_DATA_WIDTH'] = 32
parameters['AXIL_ADDR_WIDTH'] = parameters['STAT_ID_WIDTH'] + ((parameters['STAT_COUNT_WIDTH']+7)//8-1).bit_length()
parameters['AXIL_STRB_WIDTH'] = parameters['AXIL_DATA_WIDTH'] // 8
parameters['PIPELINE'] = 1
parameters['PIPELINE'] = 2
extra_env = {f'PARAM_{k}': str(v) for k, v in parameters.items()}