1
0
mirror of https://github.com/pConst/basic_verilog.git synced 2025-01-14 06:42:54 +08:00

Test project for iterative compilation Quartus projects

This commit is contained in:
Konstantin Pavlov 2021-02-05 16:13:27 +03:00
parent 3619810053
commit 111dbc65c6
12 changed files with 604 additions and 0 deletions

Binary file not shown.

After

Width:  |  Height:  |  Size: 33 KiB

View File

@ -0,0 +1,53 @@
#------------------------------------------------------------------------------
# Makefile for iterative compilation for Intel / Altera Quartus
# Konstantin Pavlov, pavlovconst@gmail.com
#
#
# INFO ------------------------------------------------------------------------
#
# - This is a top-level Makefile
# - It makes a bunch of Quartus project copies which differ only one variable
# - Then it compiles all projects in parallel and collects FMAX data
#
# - Please define var sweep range below
# - Separate quartus project will be created and compiled for every var value
#
# - This makefile is "make -j"-friendly
#
VAR_START = 5
VAR_STOP = 32
VAR = $(shell seq $(VAR_START) ${VAR_STOP})
JOBS = $(addprefix job,${VAR})
.PHONY: all fmax clean
all: fmax
echo '$@ success'
${JOBS}: job%:
mkdir -p ./$*; \
cp ./base/* ./$*; \
echo "\`define WIDTH $*" > ./$*/define.vh; \
$(MAKE) -C ./$* stap
fmax: ${JOBS}
echo "FMAX summary report for iterative compilation" | tee > ./fmax.rpt; \
for (( var = $(VAR_START); var <= $(VAR_STOP); var++ )); do \
echo "$$var" | tee >> ./fmax.rpt; \
cat ./$$var/OUTPUT/test.sta.rpt | \
grep -A3 '; Fmax ; Restricted Fmax ; Clock Name ; Note ;' | \
tail -n2 | tee >> ./fmax.rpt; \
done; \
echo 'fmax.rpt file done'
clean:
for (( var = $(VAR_START); var <= $(VAR_STOP); var++ )); do \
rm -rfv ./$$var; \
rm -rfv ./fmax.rpt; \
done

View File

@ -0,0 +1,13 @@
fast_counter_iterative_test project
-----------------------------------
This project shows how to make iterative compilation for Intel / Altera Quartus FPGA
We create a bunch of generated Quartus project copies which differ only one variable
All projects get compiled in parallel collecting FMAX data
This particular test shows FMAX advantage of using 'fast_counter.sv' module
Launch compilation using "make -j" command

View File

@ -0,0 +1,207 @@
#------------------------------------------------------------------------------
# Makefile for Intel / Altera Quartus
# Konstantin Pavlov, pavlovconst@gmail.com
#
#
# INFO ------------------------------------------------------------------------
#
# - Use this Makefile in linux terminal or on Windows under Cygwin
#
# - Default target ("make" command without any options) is intended to get fast
# compilation and timing analysis. Suitable for general project development
# and debugging
#
# - "make -j" runs timing analysis and *.sof file assembling in parallel. That
# saves you ~20 seconds every time :)
#
# - Specific targets (for example, "make sof") provide you requested results
# assuming that timing analysis is unnexessary
#
# - Check that Quartus and Modelsim directories are in your $PATH. Something like
# echo $PATH | tr : \\n | grep quartus
# export PATH = '/cygdrive/c/intelFPGA/17.0/quartus/bin64:$PATH'
# export PATH = '/cygdrive/c/intelFPGA/17.0/quartus/bin:$PATH'
# echo $PATH | tr : \\n | grep modelsim
# export PATH = '/cygdrive/c/intelFPGA/17.0/modelsim_ase/win32aloem:$PATH'
PROJ_DIR = $(shell pwd)
PROJ = $(shell ls -1 *.qpf | tail -n1 | awk '{ gsub(".qpf","") } 1' )
#SRCS = $(shell ls -R1 SOURCE/*.{v,sv,vh,sdc,tcl,hex,bin} 2>/dev/null | grep -v ':' )
SRCS = $(shell ls -R1 SOURCE/* )
QPF = $(PROJ).qpf
QSF = $(PROJ).qsf
SOF = ./OUTPUT/$(PROJ).sof
POF = ./OUTPUT/$(PROJ).pof
RBF = ./OUTPUT/$(PROJ).rbf
JAM = ./OUTPUT/$(PROJ).jam
PRE_SCRIPT = './DEBUG/pre_flow.tcl'
POST_SCRIPT = './DEBUG/post_flow.tcl'
MAP_REPORT = ./OUTPUT/$(PROJ).map.rpt
FIT_REPORT = ./OUTPUT/$(PROJ).fit.rpt
DSE_CONFIG = $(PROJ).dse
TARGET_IP = '192.168.1.1'
TARGET_PORT = 'USB-1'
TARGET_CHIP = '1'
.PHONY: all info clean stp gui
all: sta sof
info:
echo -e \\n ' Project directory: ' $(PROJ_DIR) \
\\n ' Project name: ' $(PROJ) \
\\n ' Preject sources: ' $(SRCS)
gui:
quartus $(QPF) 1>/dev/null
$(MAP_REPORT): $(SRCS) $(QPF) $(QSF)
$(shell if test -f $(PRE_SCRIPT); then quartus_sh -t $(PRE_SCRIPT) compile $(PROJ) $(PROJ); fi )
quartus_map --no_banner \
--read_settings_files=on \
--write_settings_files=off \
--64bit $(PROJ) -c $(PROJ)
# dont use --effort=fast because it can dramatically increase fitting time
map: $(PROJ).map.rpt
$(FIT_REPORT): $(MAP_REPORT)
# quartus_cdb --read_settings_files=on \
# --write_settings_files=off \
# --64bit $(PROJ) -c $(PROJ)
quartus_fit --no_banner \
--read_settings_files=on \
--write_settings_files=off \
--inner_num=1 \
--one_fit_attempt=on \
--pack_register=off \
--effort=fast \
--64bit $(PROJ) -c $(PROJ)
# using --io_smart_recompile for secondary fitter launches is tricky
fit: $(FIT_REPORT)
$(SOF): $(FIT_REPORT)
quartus_asm --no_banner \
--read_settings_files=off \
--write_settings_files=off \
--64bit $(PROJ) -c $(PROJ)
asm: $(SOF)
sta: $(FIT_REPORT)
quartus_sta $(PROJ) -c $(PROJ)
#$(shell if test -f $(POST_SCRIPT); then quartus_sh -t $(POST_SCRIPT) compile $(PROJ) $(PROJ); fi )
stap: $(FIT_REPORT)
quartus_sta --parallel --model=slow $(PROJ) -c $(PROJ)
#$(shell if test -f $(POST_SCRIPT); then quartus_sh -t $(POST_SCRIPT) compile $(PROJ) $(PROJ); fi )
$(POF): $(SOF)
quartus_cpf --no_banner \
-c $(SOF) $(POF)
$(RBF): $(SOF)
quartus_cpf --no_banner \
-c $(SOF) $(RBF)
$(JAM): $(SOF)
quartus_cpf --no_banner \
-c $(SOF) $(JAM)
sof: $(SOF)
pof: $(POF)
rbf: $(RBF)
jam: $(JAM)
prog: sof
quartus_pgm --no_banner \
-c "USB-Blaster on $(TARGET_IP) [$(TARGET_PORT)]" -m jtag \
-o "P;$(SOF)@$(TARGET_CHIP)"
prog_pof: pof
quartus_pgm --no_banner \
-c "USB-Blaster on $(TARGET_IP) [$(TARGET_PORT)]" -m jtag \
-o "BVP;$(POF)@$(TARGET_CHIP)"
prog_rbf: rbf
quartus_pgm --no_banner \
-c "USB-Blaster on $(TARGET_IP) [$(TARGET_PORT)]" -m jtag \
-o "BVP;$(RBF)@$(TARGET_CHIP)"
clean:
# clean common junk files
rm -rfv $(PROJ).qws c5_pin_model_dump.txt $(PROJ).ipregen.rpt .qsys_edit/
# clean compilation databases
rm -rfv db/ incremental_db/ greybox_tmp/
# clean output directory
rm -rfv OUTPUT/
# clean hard memory controller
rm -rfv ddr3_hmc_ddr3_0_p0_0_summary.csv ddr3_hmc_ddr3_0_p0_1_summary.csv
# clean design space explorer files
rm -rfv dse/ dse1_base.qpf dse1_base.qsf $(PROJ).dse.rpt $(PROJ).archive.rpt
# clean early power estimator files
rm -rfv $(PROJ)_early_pwr.csv
# TODO: add project-specific files to remove here
dse: $(DSE_CONFIG)
quartus_dse --no_banner \
--terminate off \
--num-parallel-processors 10 \
--auto-discover-files on \
--revision $(PROJ) $(PROJ).qpf \
--use-dse-file $(DSE_CONFIG)
sim: $(SRCS)
modelsim -do compile.tcl
sim_clean:
gtkwave: $(SRCS)
# creating VVP file
iverilog -Wall -g2012 -o iverilog_sim.vvp -s $(SRCS)
# creating VCD file
vvp -v iverilog_sim.vvp
# creating settings file for gtkwave on-the-fly
echo fontname_waves Verdana 9 > .\gtkwaverc
echo fontname_signals Verdana 9 >> .\gtkwaverc
echo fontname_logfile Verdana 9 >> .\gtkwaverc
echo splash_disable 1 >> .\gtkwaverc
echo use_roundcaps 1 >> .\gtkwaverc
echo force_toolbars 1 >> .\gtkwaverc
echo left_justify_sigs 1 >> .\gtkwaverc
# launching gtkwave
# press CTRL+S to save vawe config. gtkwave will open it automatically next time
gtkwave -r .\gtkwaverc iverilog_sim.vcd wave.gtkw
# // place this code into your testbench and add signals you want to dump
# // and navigate during simulation
# initial begin
# $dumpfile("iverilog_sim.vcd");
# $dumpvars( 0, M );
# #10000 $finish;
# end
stp:
quartus_stp --no_banner \
$(QPF)

View File

@ -0,0 +1 @@
`define WIDTH 5

View File

@ -0,0 +1,109 @@
//------------------------------------------------------------------------------
// fast_counter.sv
// Konstantin Pavlov, pavlovconst@gmail.com
//------------------------------------------------------------------------------
// INFO ------------------------------------------------------------------------
//
// - This is a synthetic fast counter which appears faster than a standard one
// generated from pure Verilog code
//
// - My tests show that it is on average 30MHz faster in direct comparisons for
// counters from 5 to 32 bit widths in Cyclone V
//
// - Use this counter only when counter performance is your last and ultimate
// resort to conquer timings. Fast counter is area-unefficient thing.
//
// - fast_counter_iterative_test project in the repo shows fast counter`s advantage
// https://github.com/pConst/basic_verilog/fast_counter_iterative_test/
//
/* --- INSTANTIATION TEMPLATE BEGIN ---
fast_counter #(
.WIDTH( 14 )
) fc (
.clk( clk ),
.set( ), // highest priority operation, use it like a reset also
.set_val( ),
.dec( ),
.q( ),
.q_is_zero( )
);
--- INSTANTIATION TEMPLATE END ---*/
module fast_counter #( parameter
WIDTH = 8
)(
input clk,
input set,
input [WIDTH-1:0] set_val,
input dec,
output [WIDTH-1:0] q,
output q_is_zero
);
const logic [5:0][15:0] lsb_bits_init = { 16'b0000000000000001,
16'b1000000000000000,
16'b1111111100000000,
16'b1111000011110000,
16'b1100110011001100,
16'b1010101010101010 };
logic [WIDTH-4-1:0] msb_bits = '0;
logic [5:0][15:0] lsb_bits = lsb_bits_init;
logic [16*6-1:0] lsb_bits_flat;
assign lsb_bits_flat[16*6-1:0] = lsb_bits;
integer i,j;
always_ff @(posedge clk) begin
if( set ) begin
msb_bits[WIDTH-4-1:0] <= set_val[WIDTH-1:4];
for( i=0; i<6; i++ ) begin
for( j=0; j<16; j++ ) begin
lsb_bits[i][j] <= lsb_bits_init[i][(set_val[3:0]+j) % 16];
end
end
end else if( dec ) begin
if( lsb_bits[5][0] ) begin
msb_bits[WIDTH-4-1:0] <= msb_bits[WIDTH-4-1:0] - 1'b1;
end
for( i=0; i<6; i++ ) begin
for( j=0; j<16; j++ ) begin
if( j==0 ) begin
lsb_bits[i][j] <= lsb_bits[i][15];
end else begin
lsb_bits[i][j] <= lsb_bits[i][j-1];
end
end
end
end
end
assign q[WIDTH-1:4] = msb_bits[WIDTH-4-1:0];
assign q[3] = lsb_bits[3][0],
q[2] = lsb_bits[2][0],
q[1] = lsb_bits[1][0],
q[0] = lsb_bits[0][0];
assign q_is_zero = ~|q[WIDTH-1:0];
endmodule

View File

@ -0,0 +1,6 @@
create_clock -period 2.000 -waveform { 0.000 1.000 } [get_ports {clk1}]
create_clock -period 2.000 -waveform { 0.000 1.000 } [get_ports {clk2}]
derive_pll_clocks
derive_clock_uncertainty

View File

@ -0,0 +1,73 @@
// Fast counter test project
// Konstantin Pavlov, pavlovconst@gmail.com
`include "define.vh"
module main(
input clk1,
input nrst1,
input set1,
input [`WIDTH-1:0] set_val1,
input dec1,
output logic q_is_zero1 = 1'b0,
input clk2,
input nrst2,
input set2,
input [`WIDTH-1:0] set_val2,
input dec2,
output logic q_is_zero2 = 1'b0
);
logic [`WIDTH-1:0] std_cntr = '0;
always_ff @(posedge clk1) begin
if( set1 || nrst1 ) begin
std_cntr[`WIDTH-1:0] <= set_val1[`WIDTH-1:0];
end else if( dec1 ) begin
std_cntr[`WIDTH-1:0] <= std_cntr[`WIDTH-1:0] - 1'b1;
end
end
//registering all outputs
always_ff @(posedge clk1) begin
if( ~nrst1 ) begin
q_is_zero1 <= 1'b0;
end else begin
q_is_zero1 <= (std_cntr[`WIDTH-1:0] == '0);
end
end
logic qz;
fast_counter #(
.WIDTH( `WIDTH )
) fc (
.clk( clk2 ),
.set( set2 || nrst2 ),
.set_val( set_val2 ),
.dec( dec2 ),
// no value output
.q_is_zero( qz )
);
//registering all outputs
always_ff @(posedge clk1) begin
if( ~nrst2 ) begin
q_is_zero2 <= 1'b0;
end else begin
q_is_zero2 <= qz;
end
end
endmodule

View File

@ -0,0 +1,31 @@
# -------------------------------------------------------------------------- #
#
# Copyright (C) 2017 Intel Corporation. All rights reserved.
# Your use of Intel Corporation's design tools, logic functions
# and other software and tools, and its AMPP partner logic
# functions, and any output files from any of the foregoing
# (including device programming or simulation files), and any
# associated documentation or information are expressly subject
# to the terms and conditions of the Intel Program License
# Subscription Agreement, the Intel Quartus Prime License Agreement,
# the Intel MegaCore Function License Agreement, or other
# applicable license agreement, including, without limitation,
# that your use is for the sole purpose of programming logic
# devices manufactured by Intel and sold by Intel or its
# authorized distributors. Please refer to the applicable
# agreement for further details.
#
# -------------------------------------------------------------------------- #
#
# Quartus Prime
# Version 17.0.0 Build 595 04/25/2017 SJ Standard Edition
# Date created = 11:22:30 September 26, 2018
#
# -------------------------------------------------------------------------- #
QUARTUS_VERSION = "17.0"
DATE = "11:22:30 September 26, 2018"
# Revisions
PROJECT_REVISION = "test"

View File

@ -0,0 +1,26 @@
set_global_assignment -name FAMILY "Cyclone V"
set_global_assignment -name DEVICE 5CGXFC4C7F27C8
set_global_assignment -name ORIGINAL_QUARTUS_VERSION 17.0.0
set_global_assignment -name LAST_QUARTUS_VERSION "17.0.0 Lite Edition"
set_global_assignment -name TOP_LEVEL_ENTITY main
set_global_assignment -name PROJECT_OUTPUT_DIRECTORY OUTPUT
set_global_assignment -name NUM_PARALLEL_PROCESSORS ALL
set_global_assignment -name FITTER_EFFORT "STANDARD FIT"
set_global_assignment -name PARTITION_NETLIST_TYPE SOURCE -section_id Top
set_global_assignment -name PARTITION_FITTER_PRESERVATION_LEVEL PLACEMENT_AND_ROUTING -section_id Top
set_global_assignment -name PARTITION_COLOR 16764057 -section_id Top
set_global_assignment -name SYSTEMVERILOG_FILE fast_counter.sv
set_global_assignment -name SYSTEMVERILOG_FILE define.vh
set_global_assignment -name SYSTEMVERILOG_FILE main.sv
set_global_assignment -name SDC_FILE main.sdc
set_global_assignment -name MIN_CORE_JUNCTION_TEMP 0
set_global_assignment -name MAX_CORE_JUNCTION_TEMP 85
set_global_assignment -name POWER_PRESET_COOLING_SOLUTION "23 MM HEAT SINK WITH 200 LFPM AIRFLOW"
set_global_assignment -name POWER_BOARD_THERMAL_MODEL "NONE (CONSERVATIVE)"
set_instance_assignment -name PARTITION_HIERARCHY root_partition -to | -section_id Top

View File

@ -0,0 +1,85 @@
FMAX summary report for iterative compilation
5
; 356.38 MHz ; 356.38 MHz ; clk1 ; ;
; 445.24 MHz ; 445.24 MHz ; clk2 ; ;
6
; 329.71 MHz ; 329.71 MHz ; clk1 ; ;
; 376.51 MHz ; 376.51 MHz ; clk2 ; ;
7
; 322.27 MHz ; 322.27 MHz ; clk1 ; ;
; 412.71 MHz ; 412.71 MHz ; clk2 ; ;
8
; 330.58 MHz ; 330.58 MHz ; clk1 ; ;
; 341.88 MHz ; 341.88 MHz ; clk2 ; ;
9
; 322.48 MHz ; 322.48 MHz ; clk1 ; ;
; 382.12 MHz ; 382.12 MHz ; clk2 ; ;
10
; 288.68 MHz ; 288.68 MHz ; clk1 ; ;
; 353.23 MHz ; 353.23 MHz ; clk2 ; ;
11
; 303.03 MHz ; 303.03 MHz ; clk2 ; ;
; 316.36 MHz ; 316.36 MHz ; clk1 ; ;
12
; 300.48 MHz ; 300.48 MHz ; clk1 ; ;
; 323.62 MHz ; 323.62 MHz ; clk2 ; ;
13
; 276.24 MHz ; 276.24 MHz ; clk1 ; ;
; 281.29 MHz ; 281.29 MHz ; clk2 ; ;
14
; 283.53 MHz ; 283.53 MHz ; clk1 ; ;
; 301.11 MHz ; 301.11 MHz ; clk2 ; ;
15
; 257.33 MHz ; 257.33 MHz ; clk1 ; ;
; 300.93 MHz ; 300.93 MHz ; clk2 ; ;
16
; 268.02 MHz ; 268.02 MHz ; clk1 ; ;
; 282.81 MHz ; 282.81 MHz ; clk2 ; ;
17
; 248.45 MHz ; 248.45 MHz ; clk1 ; ;
; 287.77 MHz ; 287.77 MHz ; clk2 ; ;
18
; 246.97 MHz ; 246.97 MHz ; clk2 ; ;
; 268.1 MHz ; 268.1 MHz ; clk1 ; ;
19
; 254.32 MHz ; 254.32 MHz ; clk1 ; ;
; 279.56 MHz ; 279.56 MHz ; clk2 ; ;
20
; 254.07 MHz ; 254.07 MHz ; clk1 ; ;
; 277.55 MHz ; 277.55 MHz ; clk2 ; ;
21
; 249.07 MHz ; 249.07 MHz ; clk2 ; ;
; 264.27 MHz ; 264.27 MHz ; clk1 ; ;
22
; 242.13 MHz ; 242.13 MHz ; clk1 ; ;
; 260.55 MHz ; 260.55 MHz ; clk2 ; ;
23
; 246.73 MHz ; 246.73 MHz ; clk2 ; ;
; 255.56 MHz ; 255.56 MHz ; clk1 ; ;
24
; 219.88 MHz ; 219.88 MHz ; clk2 ; ;
; 258.33 MHz ; 258.33 MHz ; clk1 ; ;
25
; 257.33 MHz ; 257.33 MHz ; clk1 ; ;
; 266.31 MHz ; 266.31 MHz ; clk2 ; ;
26
; 229.57 MHz ; 229.57 MHz ; clk2 ; ;
; 258.87 MHz ; 258.87 MHz ; clk1 ; ;
27
; 238.83 MHz ; 238.83 MHz ; clk2 ; ;
; 247.65 MHz ; 247.65 MHz ; clk1 ; ;
28
; 236.74 MHz ; 236.74 MHz ; clk2 ; ;
; 259.27 MHz ; 259.27 MHz ; clk1 ; ;
29
; 233.32 MHz ; 233.32 MHz ; clk2 ; ;
; 251.57 MHz ; 251.57 MHz ; clk1 ; ;
30
; 222.62 MHz ; 222.62 MHz ; clk1 ; ;
; 238.04 MHz ; 238.04 MHz ; clk2 ; ;
31
; 229.62 MHz ; 229.62 MHz ; clk1 ; ;
; 229.99 MHz ; 229.99 MHz ; clk2 ; ;
32
; 190.62 MHz ; 190.62 MHz ; clk2 ; ;
; 228.83 MHz ; 228.83 MHz ; clk1 ; ;