diff --git a/rtl/lfsr.v b/rtl/lfsr.v new file mode 100644 index 00000000..7a92182b --- /dev/null +++ b/rtl/lfsr.v @@ -0,0 +1,346 @@ +/* + +Copyright (c) 2016 Alex Forencich + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in +all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +*/ + +// Language: Verilog 2001 + +`timescale 1ns / 1ps + +/* + * Parametrizable combinatorial parallel LFSR/CRC + */ +module lfsr # +( + // width of LFSR + parameter LFSR_WIDTH = 31, + // LFSR polynomial + parameter LFSR_POLY = 31'h10000001, + // LFSR configuration: "GALOIS", "FIBONACCI" + parameter LFSR_CONFIG = "FIBONACCI", + // bit-reverse input and output + parameter REVERSE = 0, + // width of data input + parameter DATA_WIDTH = 8, + // width of CRC/LFSR output + parameter OUTPUT_WIDTH = LFSR_WIDTH, + // implementation style: "AUTO", "LOOP", "REDUCTION" + parameter STYLE = "AUTO" +) +( + input wire [DATA_WIDTH-1:0] data_in, + input wire [LFSR_WIDTH-1:0] lfsr_in, + output wire [OUTPUT_WIDTH-1:0] lfsr_out +); + +/* + +Fully parametrizable combinatorial parallel LFSR/CRC module. Implements an unrolled LFSR +next state computation, shifting DATA_WIDTH bits per pass through the module. Input data +is XORed with LFSR feedback path, tie data_in to zero if this is not required. + +Works in two parts: statically computes a set of bit masks, then uses these bit masks to +select bits for XORing to compute the next state. + +Ports: + +data_in + +Data bits to be XORed with the LFSR feedback path (DATA_WIDTH bits) + +lfsr_in + +LFSR/CRC current state input (LFSR_WIDTH bits) + +lfsr_out + +LFSR/CRC next state output (OUTPUT_WIDTH bits) + +Parameters: + +LFSR_WIDTH + +Specify width of LFSR/CRC register + +LFSR_POLY + +Specify the LFSR/CRC polynomial in hex format. For example, the polynomial + +x^32 + x^26 + x^23 + x^22 + x^16 + x^12 + x^11 + x^10 + x^8 + x^7 + x^5 + x^4 + x^2 + x + 1 + +would be represented as + +32'h04c11db7 + +Note that the largest term (x^32) is suppressed. This term is generated automatically based +on LFSR_WIDTH. + +LFSR_CONFIG + +Specify the LFSR configuration, either Fibonacci or Galois. Fibonacci is generally used +for linear-feedback shift registers (LFSR) for pseudorandom binary sequence (PRBS) generators, +scramblers, and descrambers, while Galois is generally used for cyclic redundancy check +generators and checkers. + +Fibonacci style (example for 64b66b scrambler, 0x8000000001) + + ,-----------------------------(+)<------------------------------, + | ^ | + | .----. .----. .----. | .----. .----. .----. | + `->| 0 |->| 1 |->...->| 38 |-+->| 39 |->...->| 56 |->| 57 |->(+)<-DIN (MSB first) + '----' '----' '----' '----' '----' '----' + +Galois style (example for CRC16, 0x8005) + + ,-------------------+---------------------------------+------------, + | | | | + | .----. .----. V .----. .----. .----. V .----. | + `->| 0 |->| 1 |->(+)->| 2 |->| 3 |->...->| 14 |->(+)->| 15 |->(+)<-DIN (MSB first) + '----' '----' '----' '----' '----' '----' + +REVERSE + +Bit-reverse LFSR input and output. + +DATA_WIDTH + +Specify width of input data bus. The module will perform one shift per input data bit, +so if the input data bus is not required tie data_in to zero and set DATA_WIDTH to the +required number of shifts per clock cycle. + +OUTPUT_WIDTH + +Specify width of output data bus. Defaults to LFSR_WIDTH. Mainly useful for extending +the output width for LFSRs. Ensure that lfsr_out is properly shifted and truncated so +that feeding it back around to lfsr_in produces the expected result. Note that if +OUTPUT_WIDTH is smaller than LFSR_WIDTH, it may not be possible to get the LFSR to +feed back correctly. + +STYLE + +Specify implementation style. Can be "AUTO", "LOOP", or "REDUCTION". When "AUTO" +is selected, implemenation will be "LOOP" or "REDUCTION" based on synthesis translate +directives. "REDUCTION" and "LOOP" are functionally identical, however they simulate +and synthesize differently. "REDUCTION" is implemented with a loop over a Verilog +reduction operator. "LOOP" is implemented as a doubly-nested loop with no reduction +operator. "REDUCTION" is very fast for simulation in iverilog and synthesizes well in +Quartus but synthesizes poorly in ISE, likely due to large inferred XOR gates causing +problems with the optimizer. "LOOP" synthesizes will in both ISE and Quartus. "AUTO" +will default to "REDUCTION" when simulating and "LOOP" for synthesizers that obey +synthesis translate directives. + +Settings for common LFSR/CRC implementations: + +Name Configuration Length Polynomial Initial value Notes +CRC32 Galois, bit-reverse 32 32'h04c11db7 32'hffffffff Ethernet FCS; invert final output +PRBS6 Fibonacci 6 6'h21 any +PRBS7 Fibonacci 7 7'h41 amy +PRBS9 Fibonacci 9 9'h021 any ITU V.52 +PRBS10 Fibonacci 10 10'h081 any ITU +PRBS11 Fibonacci 11 11'h201 any ITU O.152 +PRBS15 Fibonacci 15 15'h4001 any ITU O.152 +PRBS17 Fibonacci 17 17'h04001 any +PRBS20 Fibonacci 20 20'h00009 any ITU V.57 +PRBS23 Fibonacci 23 23'h040001 any ITU O.151 +PRBS31 Fibonacci 31 31'h10000001 any +64b66b Fibonacci 58 58'h8000000001 any 10G Ethernet +128b130b Fibonacci 23 23'h210125 any PCIe gen 3 + +*/ + +// STATE_WIDTH is OUTPUT_WIDTH or LFSR_WIDTH, whichever is larger +parameter STATE_WIDTH = OUTPUT_WIDTH > LFSR_WIDTH ? OUTPUT_WIDTH : LFSR_WIDTH; + +reg [LFSR_WIDTH-1:0] lfsr_mask_state[STATE_WIDTH-1:0]; +reg [DATA_WIDTH-1:0] lfsr_mask_data[STATE_WIDTH-1:0]; + +reg [LFSR_WIDTH-1:0] state_val = 0; +reg [DATA_WIDTH-1:0] data_val = 0; + +integer i, j, k; + +initial begin + // init bit masks + for (i = 0; i < STATE_WIDTH; i = i + 1) begin + lfsr_mask_state[i] = {LFSR_WIDTH{1'b0}}; + if (i < LFSR_WIDTH) begin + lfsr_mask_state[i][i] = 1'b1; + end + lfsr_mask_data[i] = {DATA_WIDTH{1'b0}}; + end + + // simulate shift register + if (LFSR_CONFIG == "FIBONACCI") begin + // Fibonacci configuration + for (i = DATA_WIDTH-1; i >= 0; i = i - 1) begin + // determine shift in value + // current value in last FF, XOR with input data bit (MSB first) + state_val = lfsr_mask_state[LFSR_WIDTH-1]; + data_val = lfsr_mask_data[LFSR_WIDTH-1]; + data_val = data_val ^ (1 << i); + + // add XOR inputs from correct indicies + for (j = 1; j < STATE_WIDTH; j = j + 1) begin + if (LFSR_POLY & (1 << j)) begin + state_val = lfsr_mask_state[j-1] ^ state_val; + data_val = lfsr_mask_data[j-1] ^ data_val; + end + end + + // shift + for (j = STATE_WIDTH-1; j > 0; j = j - 1) begin + lfsr_mask_state[j] = lfsr_mask_state[j-1]; + lfsr_mask_data[j] = lfsr_mask_data[j-1]; + end + lfsr_mask_state[0] = state_val; + lfsr_mask_data[0] = data_val; + end + end else if (LFSR_CONFIG == "GALOIS") begin + // Galois configuration + for (i = DATA_WIDTH-1; i >= 0; i = i - 1) begin + // determine shift in value + // current value in last FF, XOR with input data bit (MSB first) + state_val = lfsr_mask_state[LFSR_WIDTH-1]; + data_val = lfsr_mask_data[LFSR_WIDTH-1]; + data_val = data_val ^ (1 << i); + + // shift + for (j = STATE_WIDTH-1; j > 0; j = j - 1) begin + lfsr_mask_state[j] = lfsr_mask_state[j-1]; + lfsr_mask_data[j] = lfsr_mask_data[j-1]; + end + lfsr_mask_state[0] = state_val; + lfsr_mask_data[0] = data_val; + + // add XOR inputs at correct indicies + for (j = 1; j < STATE_WIDTH; j = j + 1) begin + if (LFSR_POLY & (1 << j)) begin + lfsr_mask_state[j] = lfsr_mask_state[j] ^ state_val; + lfsr_mask_data[j] = lfsr_mask_data[j] ^ data_val; + end + end + end + end else begin + $error("Error: unknown configuration setting!"); + $finish; + end + + // reverse bits if selected + if (REVERSE) begin + // reverse order + for (i = 0; i < OUTPUT_WIDTH/2; i = i + 1) begin + state_val = lfsr_mask_state[i]; + data_val = lfsr_mask_data[i]; + lfsr_mask_state[i] = lfsr_mask_state[OUTPUT_WIDTH-i-1]; + lfsr_mask_data[i] = lfsr_mask_data[OUTPUT_WIDTH-i-1]; + lfsr_mask_state[OUTPUT_WIDTH-i-1] = state_val; + lfsr_mask_data[OUTPUT_WIDTH-i-1] = data_val; + end + // reverse bits + for (i = 0; i < OUTPUT_WIDTH; i = i + 1) begin + state_val = 0; + for (j = 0; j < STATE_WIDTH; j = j + 1) begin + state_val[j] = lfsr_mask_state[i][STATE_WIDTH-j-1]; + end + lfsr_mask_state[i] = state_val; + + data_val = 0; + for (j = 0; j < DATA_WIDTH; j = j + 1) begin + data_val[j] = lfsr_mask_data[i][DATA_WIDTH-j-1]; + end + lfsr_mask_data[i] = data_val; + end + end + + // for (i = 0; i < OUTPUT_WIDTH; i = i + 1) begin + // $display("%b %b", lfsr_mask_state[i], lfsr_mask_data[i]); + // end +end + +// synthesis translate_off +`define SIMULATION +// synthesis translate_on + +`ifdef SIMULATION +// "AUTO" style is "REDUCTION" for faster simulation +parameter STYLE_INT = (STYLE == "AUTO") ? "REDUCTION" : STYLE; +`else +// "AUTO" style is "LOOP" for better synthesis result +parameter STYLE_INT = (STYLE == "AUTO") ? "LOOP" : STYLE; +`endif + +genvar n; + +generate + +if (STYLE_INT == "REDUCTION") begin + + // use Verilog reduction operator + // fast in iverilog + // significantly larger than generated code with ISE (inferred wide XORs may be tripping up optimizer) + // slightly smaller than generated code with Quartus + // --> better for simulation + + for (n = 0; n < OUTPUT_WIDTH; n = n + 1) begin : loop + assign lfsr_out[n] = ^{(lfsr_in & lfsr_mask_state[n]), (data_in & lfsr_mask_data[n])}; + end + +end else if (STYLE_INT == "LOOP") begin + + // use nested loops + // very slow in iverilog + // slightly smaller than generated code with ISE + // same size as generated code with Quartus + // --> better for synthesis + + reg [OUTPUT_WIDTH-1:0] lfsr_out_reg = 0; + + assign lfsr_out = lfsr_out_reg; + + always @* begin + for (i = 0; i < OUTPUT_WIDTH; i = i + 1) begin + lfsr_out_reg[i] = 0; + for (j = 0; j < STATE_WIDTH; j = j + 1) begin + if (lfsr_mask_state[i][j]) begin + lfsr_out_reg[i] = lfsr_out_reg[i] ^ lfsr_in[j]; + end + end + for (j = 0; j < DATA_WIDTH; j = j + 1) begin + if (lfsr_mask_data[i][j]) begin + lfsr_out_reg[i] = lfsr_out_reg[i] ^ data_in[j]; + end + end + end + end + +end else begin + + initial begin + $error("Error: unknown style setting!"); + $finish; + end + +end + +endgenerate + +endmodule