From 4cd95ad2dccf64486ec8346e5d4c75f5564721c2 Mon Sep 17 00:00:00 2001 From: "Konstantin Pavlov (ms)" Date: Mon, 19 Jul 2021 01:45:45 +0300 Subject: [PATCH] Added universal block RAM fifo --- fifo_single_clock_ram.sv | 223 +++++++++++++++++++++++++++++ fifo_single_clock_ram_tb.sv | 277 ++++++++++++++++++++++++++++++++++++ 2 files changed, 500 insertions(+) create mode 100755 fifo_single_clock_ram.sv create mode 100755 fifo_single_clock_ram_tb.sv diff --git a/fifo_single_clock_ram.sv b/fifo_single_clock_ram.sv new file mode 100755 index 0000000..f47d2b0 --- /dev/null +++ b/fifo_single_clock_ram.sv @@ -0,0 +1,223 @@ +//------------------------------------------------------------------------------ +// fifo_single_clock_ram.sv +// Konstantin Pavlov, pavlovconst@gmail.com +//------------------------------------------------------------------------------ + +// INFO ------------------------------------------------------------------------ +// Single-clock FIFO buffer implementation, also known as "queue" +// +// This fifo variant should synthesize into block RAM seamlessly, both for +// Altera and for Xilinx chips. Simulation is also consistent. +// Use this fifo when you need cross-vendor and sim/synth compatibility. +// +// Features: +// - single clock operation +// - configurable depth and data width +// - only "normal" mode is supported here, no FWFT mode +// - protected against overflow and underflow +// + + +/* --- INSTANTIATION TEMPLATE BEGIN --- + +fifo_single_clock_ram #( + .DEPTH( 8 ), + .DATA_W( 32 ) +) FF1 ( + .clk( clk ), + .nrst( 1'b1 ), + + .w_req( ), + .w_data( ), + + .r_req( ), + .r_data( ), + + .cnt( ), + .empty( ), + .full( ) +); + +--- INSTANTIATION TEMPLATE END ---*/ + +module fifo_single_clock_ram #( parameter + + //FWFT_MODE = "TRUE", // "TRUE" - first word fall-trrough" mode + // "FALSE" - normal fifo mode + + DEPTH = 8, // max elements count == DEPTH, DEPTH MUST be power of 2 + DEPTH_W = $clog2(DEPTH)+1, // elements counter width, extra bit to store + // "fifo full" state, see cnt[] variable comments + + DATA_W = 32 // data field width +)( + + input clk, + input nrst, // inverted reset + + // input port + input w_req, + input [DATA_W-1:0] w_data, + + // output port + input r_req, + output [DATA_W-1:0] r_data, + + // helper ports + output logic [DEPTH_W-1:0] cnt = '0, + output logic empty, + output logic full, + + output logic fail +); + + +// read and write pointers +logic [DEPTH_W-1:0] w_ptr = '0; +logic [DEPTH_W-1:0] r_ptr = '0; + +// filtered requests +logic w_req_f; +assign w_req_f = w_req && ~full; + +logic r_req_f; +assign r_req_f = r_req && ~empty; + + +true_dual_port_write_first_2_clock_ram #( + .RAM_WIDTH( DATA_W ), + .RAM_DEPTH( DEPTH ), + .INIT_FILE( "" ) +) data_ram ( + .clka( clk ), + .addra( w_ptr[DEPTH_W-1:0] ), + .ena( w_req_f ), + .wea( 1'b1 ), + .dina( w_data[DATA_W-1:0] ), + .douta( ), + + .clkb( clk ), + .addrb( r_ptr[DEPTH_W-1:0] ), + .enb( r_req_f ), + .web( 1'b0 ), + .dinb( '0 ), + .doutb( r_data[DATA_W-1:0] ) +); + + +function [DEPTH_W-1:0] inc_ptr ( + input [DEPTH_W-1:0] ptr +); + + if( ptr[DEPTH_W-1:0] == DEPTH-1 ) begin + inc_ptr[DEPTH_W-1:0] = '0; + end else begin + inc_ptr[DEPTH_W-1:0] = ptr[DEPTH_W-1:0] + 1'b1; + end +endfunction + + +always_ff @(posedge clk) begin + if ( ~nrst ) begin + w_ptr[DEPTH_W-1:0] <= '0; + r_ptr[DEPTH_W-1:0] <= '0; + + cnt[DEPTH_W-1:0] <= '0; + end else begin + + if( w_req_f ) begin + w_ptr[DEPTH_W-1:0] <= inc_ptr(w_ptr[DEPTH_W-1:0]); + end + + if( r_req_f ) begin + r_ptr[DEPTH_W-1:0] <= inc_ptr(r_ptr[DEPTH_W-1:0]); + end + + if( w_req_f && ~r_req_f ) begin + cnt[DEPTH_W-1:0] <= cnt[DEPTH_W-1:0] + 1'b1; + end else if( ~w_req_f && r_req_f ) begin + cnt[DEPTH_W-1:0] <= cnt[DEPTH_W-1:0] - 1'b1; + end + + end +end + +always_comb begin + empty = ( cnt[DEPTH_W-1:0] == '0 ); + full = ( cnt[DEPTH_W-1:0] == DEPTH ); + + fail = ( empty && r_req ) || + ( full && w_req ); +end + +endmodule + + + +module true_dual_port_write_first_2_clock_ram #( parameter + RAM_WIDTH = 16, + RAM_DEPTH = 8, + INIT_FILE = "" +)( + input clka, + input [clogb2(RAM_DEPTH-1)-1:0] addra, + input ena, + input wea, + input [RAM_WIDTH-1:0] dina, + output [RAM_WIDTH-1:0] douta, + + input clkb, + input [clogb2(RAM_DEPTH-1)-1:0] addrb, + input enb, + input web, + input [RAM_WIDTH-1:0] dinb, + output [RAM_WIDTH-1:0] doutb +); + + reg [RAM_WIDTH-1:0] BRAM [RAM_DEPTH-1:0]; + reg [RAM_WIDTH-1:0] ram_data_a = {RAM_WIDTH{1'b0}}; + reg [RAM_WIDTH-1:0] ram_data_b = {RAM_WIDTH{1'b0}}; + + // either initializes the memory values to a specified file or to all zeros + // to match hardware + generate + if (INIT_FILE != "") begin: use_init_file + initial + $readmemh(INIT_FILE, BRAM, 0, RAM_DEPTH-1); + end else begin: init_bram_to_zero + integer ram_index; + initial + for (ram_index = 0; ram_index < RAM_DEPTH; ram_index = ram_index + 1) + BRAM[ram_index] = {RAM_WIDTH{1'b0}}; + end + endgenerate + + always @(posedge clka) + if (ena) + if (wea) begin + BRAM[addra] <= dina; + ram_data_a <= dina; + end else + ram_data_a <= BRAM[addra]; + + always @(posedge clkb) + if (enb) + if (web) begin + BRAM[addrb] <= dinb; + ram_data_b <= dinb; + end else + ram_data_b <= BRAM[addrb]; + + // no output register + assign douta = ram_data_a; + assign doutb = ram_data_b; + + // calculates the address width based on specified RAM depth + function integer clogb2; + input integer depth; + for (clogb2=0; depth>0; clogb2=clogb2+1) + depth = depth >> 1; + endfunction + +endmodule + diff --git a/fifo_single_clock_ram_tb.sv b/fifo_single_clock_ram_tb.sv new file mode 100755 index 0000000..a241e49 --- /dev/null +++ b/fifo_single_clock_ram_tb.sv @@ -0,0 +1,277 @@ +//------------------------------------------------------------------------------ +// fifo_single_clock_ram_tb.sv +// Konstantin Pavlov, pavlovconst@gmail.com +//------------------------------------------------------------------------------ + +// INFO ------------------------------------------------------------------------ +// testbench for fifo_single_clock_reg_ram.sv module +// + +`timescale 1ns / 1ps + +module fifo_single_clock_ram_tb(); + +logic clk200; +initial begin + #0 clk200 = 1'b0; + forever + #2.5 clk200 = ~clk200; +end + +// external device "asynchronous" clock +logic clk33; +initial begin + #0 clk33 = 1'b0; + forever + #15.151 clk33 = ~clk33; +end + +logic rst; +initial begin + #0 rst = 1'b0; + #10.2 rst = 1'b1; + #5 rst = 1'b0; + //#10000; + forever begin + #9985 rst = ~rst; + #5 rst = ~rst; + end +end + +logic nrst; +assign nrst = ~rst; + +logic rst_once; +initial begin + #0 rst_once = 1'b0; + #10.2 rst_once = 1'b1; + #5 rst_once = 1'b0; +end + +logic nrst_once; +assign nrst_once = ~rst_once; + +logic [31:0] DerivedClocks; +clk_divider #( + .WIDTH( 32 ) +) cd1 ( + .clk( clk200 ), + .nrst( nrst_once ), + .ena( 1'b1 ), + .out( DerivedClocks[31:0] ) +); + +logic [31:0] E_DerivedClocks; +edge_detect ed1[31:0] ( + .clk( {32{clk200}} ), + .nrst( {32{nrst_once}} ), + .in( DerivedClocks[31:0] ), + .rising( E_DerivedClocks[31:0] ), + .falling( ), + .both( ) +); + +logic [15:0] RandomNumber1; +c_rand rng1 ( + .clk(clk200), + .rst(rst_once), + .reseed(1'b0), + .seed_val(DerivedClocks[31:0]), + .out( RandomNumber1[15:0] ) +); + +logic start; +initial begin + #0 start = 1'b0; + #100 start = 1'b1; + #20 start = 1'b0; +end + +// Module under test ========================================================== + +// comment or uncomment to test FWFT and normal fifo modes +//`define TEST_FWFT yes + +// comment or uncomment to sweep-test or random test +`define TEST_SWEEP yes + +// comment or uncomment to use bare scfifo or quartus wizard-generated wrappers +//`define BARE_SCFIFO yes + +logic full1, empty1; +logic full1_d1, empty1_d1; + +logic direction1 = 1'b0; +always_ff @(posedge clk200) begin + if( ~nrst ) begin + direction1 <= 1'b0; + end else begin + // sweep logic + if( full1_d1 ) begin + direction1 <= 1'b1; + end else if( empty1_d1 ) begin + direction1 <= 1'b0; + end + + // these signals allow "erroring" requests testing: + // - reads from the empty fifo + // - writes to the filled fifo + full1_d1 <= full1; + empty1_d1 <= empty1; + end +end + +logic [3:0] cnt1; +logic [15:0] data_out1; +fifo_single_clock_ram #( + .DEPTH( 8 ), + .DATA_W( 16 ) +) FF1 ( + .clk( clk200 ), + .nrst( nrst_once ), + +`ifdef TEST_SWEEP + .w_req( ~direction1 && &RandomNumber1[10] ), + .w_data( RandomNumber1[15:0] ), + + .r_req( direction1 && &RandomNumber1[10] ), + .r_data( data_out1[15:0] ), +`else + .w_req( &RandomNumber1[10:9] ), + .w_data( RandomNumber1[15:0] ), + + .r_req( &RandomNumber1[8:7] ), + .r_data( data_out1[15:0] ), +`endif + + .cnt( cnt1[3:0] ), + .empty( empty1 ), + .full( full1 ) +); + + + +logic full2, empty2; +logic full2_d1, empty2_d1; + +logic direction2 = 1'b0; +always_ff @(posedge clk200) begin + if( ~nrst ) begin + direction2 <= 1'b0; + end else begin + // sweep logic + if( full2_d1 ) begin + direction2 <= 1'b1; + end else if( empty2_d1 ) begin + direction2 <= 1'b0; + end + + // these signals allow "erroring" requests testing: + // - reads from the empty fifo + // - writes to the filled fifo + full2_d1 <= full2; + empty2_d1 <= empty2; + end +end + +//============================================================================== + +logic [15:0] data_out2; + + DCFIFO #( + .LPM_WIDTH( 16 ), + .LPM_NUMWORDS( 8 ), + .LPM_WIDTHU( $clog2(8) ), /// CEIL(LOG2(LPM_NUMWORDS)), + + `ifdef TEST_FWFT + .LPM_SHOWAHEAD( "ON" ), + `else + .LPM_SHOWAHEAD( "OFF" ), + `endif + .UNDERFLOW_CHECKING( "ON" ), + .OVERFLOW_CHECKING( "ON" ), + + .ADD_RAM_OUTPUT_REGISTER( "OFF" ), + .ENABLE_ECC( "FALSE" ), + + // output delay to the usedw[] outputs + .DELAY_RDUSEDW( 1 ), // one clock cycle by default + .DELAY_WRUSEDW( 1 ), + // Pipe length used for synchronization and metastability resolving + // If the rdclk and wrclk are unrelated, most often used values range from 2 to 4 + // If they are syncronized to one another, 0 might be used + .RDSYNC_DELAYPIPE( 3 ), // from the wrclk to the rdclk subsystem + .WRSYNC_DELAYPIPE( 3 ), // from the rdclk to the wrclk subsystem + .CLOCKS_ARE_SYNCHRONIZED( "TRUE" ), // Are the clocks sufficiently synchronized (or clock multiples of each other with no pashe shift) + // such that the synchronization and pipeline registers may be elliminated + .ADD_USEDW_MSB_BIT( "ON" ), + .WRITE_ACLR_SYNCH( "OFF" ), + .READ_ACLR_SYNCH( "OFF" ) + + //.USE_EAB( "ON" ), + //.MAXIMIZE_SPEED( 5 ), + //.DEVICE_FAMILY( "CYCLONE V" ), + //.OPTIMIZE_FOR_SPEED( 5 ), + //.CBXI_PARAMETER( "NOTHING" ) + ) FF2 ( + .aclr( 1'b0 ), + + .wrclk( clk200 ), + `ifdef TEST_SWEEP + .wrreq( ~direction1 && &RandomNumber1[10] ), + .data( RandomNumber1[15:0] ), + `else + .wrreq( &RandomNumber1[10:9] ), + .data( RandomNumber1[15:0] ), + `endif + .wrempty( ), + .wrfull( ), + .wrusedw( ), + + .rdclk( clk200 ), + `ifdef TEST_SWEEP + .rdreq( direction1 && &RandomNumber1[10] ), + .q( data_out2[15:0] ), + `else + .rdreq( &RandomNumber1[8:7] ), + .q( data_out2[15:0] ), + `endif + .rdempty( empty2 ), + .rdfull( full2 ), + .rdusedw( ), + + .eccstatus( ) + ); + + +//============================================================================== + +logic outputs_equal; +assign outputs_equal = ( data_out1[15:0] == data_out2[15:0] ) || +`ifdef TEST_FWFT + // scipping minor discontinuity + // seems like altera`s fifo has some additional buffering??? + ( cnt1[3:0] == 1 && data_out1[15:0] != data_out2[15:0] ); +`else + 1'b0; +`endif + +logic empty_equal; +assign empty_equal = ( empty1 == empty2 ); + +logic full_equal; +assign full_equal = ( full1 == full2 ); + +logic success = 1'b1; +always_ff @(posedge clk200) begin + if( ~nrst ) begin + success <= 1'b1; + end else begin + if( ~outputs_equal ) begin + success <= 1'b0; + end + end +end + + +endmodule