mirror of
https://github.com/aolofsson/oh.git
synced 2025-01-17 20:02:53 +08:00
91f8e3db5a
- After finding the bug in the reference model and wasting countless hours going back and forth with FPGA timing optimization and bug tweaks, I realized that the design was fundementally broken. The decision to use two clock domains (high speed) and low speed was correct from the beginning. The FPGA is dreadfully slow, (you definitely don't want to do much logic at 300MHz...), but the handoff between tclk and tclk_div4 was too complicated. The puzzle of having to respond to wait quickly, covering the corner cases, and meeting timing was just too ugly. - The "new" design goes back to the method of using the high speed logic only for doing a "dumb" parallel to serial converter and preparing all the necessary signals in the low speed domain. - This feel A LOT cleaner and the it already passes basic tests with the chip reference and the loopback after less than 3 hours of redesign work! - The TX meets timing but there is still some work to do with wait pushback testing.
199 lines
5.0 KiB
Verilog
199 lines
5.0 KiB
Verilog
module etx_io (/*AUTOARG*/
|
|
// Outputs
|
|
txo_lclk_p, txo_lclk_n, txo_frame_p, txo_frame_n, txo_data_p,
|
|
txo_data_n, tx_wr_wait, tx_rd_wait,
|
|
// Inputs
|
|
tx_lclk_io, tx_lclk_div4, tx_lclk90, txi_wr_wait_p, txi_wr_wait_n,
|
|
txi_rd_wait_p, txi_rd_wait_n, tx_data_slow, tx_frame_slow
|
|
);
|
|
|
|
parameter IOSTD_ELINK = "LVDS_25";
|
|
parameter PW = 104;
|
|
parameter ETYPE = 0; // 0 = parallella
|
|
// 1 = ephycard
|
|
//###########
|
|
//# reset, clocks
|
|
//##########
|
|
input tx_lclk_io; //fast ODDR
|
|
input tx_lclk_div4; //slow clock
|
|
input tx_lclk90; //fast 90deg shifted lclk
|
|
|
|
//###########
|
|
//# eLink pins
|
|
//###########
|
|
output txo_lclk_p, txo_lclk_n; // tx clock output
|
|
output txo_frame_p, txo_frame_n; // tx frame signal
|
|
output [7:0] txo_data_p, txo_data_n; // tx data (dual data rate)
|
|
input txi_wr_wait_p,txi_wr_wait_n; // tx write pushback
|
|
input txi_rd_wait_p, txi_rd_wait_n; // tx read pushback
|
|
|
|
//#############
|
|
//# Fabric interface
|
|
//#############
|
|
input [63:0] tx_data_slow; //data for burst or transaction
|
|
input [3:0] tx_frame_slow; //framing signal
|
|
output tx_wr_wait;
|
|
output tx_rd_wait;
|
|
|
|
//############
|
|
//# REGS
|
|
//############
|
|
reg [63:0] tx_data;
|
|
reg [3:0] tx_frame;
|
|
wire [15:0] tx_data16;
|
|
wire tx_frame16;
|
|
reg tx_wr_wait_sync;
|
|
reg tx_rd_wait_sync;
|
|
reg tx_wr_wait;
|
|
reg tx_rd_wait;
|
|
|
|
//############
|
|
//# WIRES
|
|
//############
|
|
wire [15:0] tx_data_mux;
|
|
wire txo_frame_ddr;
|
|
wire txo_lclk90;
|
|
wire tx_wr_wait_async;
|
|
wire tx_rd_wait_async;
|
|
wire [7:0] txo_data_ddr;
|
|
|
|
//#########################################
|
|
//# Synchronizatsion to fast domain
|
|
//#########################################
|
|
|
|
//Find the aligned edge
|
|
edgealign edgealign0 (.firstedge (firstedge),
|
|
.fastclk (tx_lclk_io),
|
|
.slowclk (tx_lclk_div4)
|
|
);
|
|
|
|
//Data shift registers
|
|
always @ (posedge tx_lclk_io)
|
|
if(firstedge) //"load"
|
|
begin
|
|
tx_data[63:0] <= tx_data_slow[63:0]; //changes every 4 cycles
|
|
tx_frame[3:0] <= tx_frame_slow[3:0];
|
|
end
|
|
else //"shift"
|
|
begin
|
|
tx_data[63:0] <= {16'b0,tx_data[63:16]};
|
|
tx_frame[3:0] <= {tx_frame[2:0],1'b0};
|
|
end
|
|
|
|
assign tx_data16[15:0] = tx_data[15:0];
|
|
assign tx_frame16 = tx_frame[3];
|
|
|
|
//##############################################
|
|
//# Wait signal synchronization
|
|
//##############################################
|
|
always @ (posedge tx_lclk_io)
|
|
begin
|
|
tx_wr_wait_sync <= tx_wr_wait_async;
|
|
tx_rd_wait_sync <= tx_rd_wait_async;
|
|
end
|
|
|
|
always @ (negedge tx_lclk_div4)
|
|
begin
|
|
tx_wr_wait <= tx_wr_wait_sync;
|
|
tx_rd_wait <= tx_rd_wait_sync;
|
|
end
|
|
|
|
//############################################
|
|
//# IO DRIVER STUFF
|
|
//############################################
|
|
|
|
//DATA
|
|
genvar i;
|
|
generate for(i=0; i<8; i=i+1)
|
|
begin : gen_oddr
|
|
ODDR #(.DDR_CLK_EDGE ("SAME_EDGE"))
|
|
oddr_data (
|
|
.Q (txo_data_ddr[i]),
|
|
.C (tx_lclk_io),
|
|
.CE (1'b1),
|
|
.D1 (tx_data16[i+8]),
|
|
.D2 (tx_data16[i]),
|
|
.R (1'b0),
|
|
.S (1'b0)
|
|
);
|
|
end
|
|
endgenerate
|
|
|
|
//FRAME
|
|
ODDR #(.DDR_CLK_EDGE ("SAME_EDGE"))
|
|
oddr_frame (
|
|
.Q (txo_frame_ddr),
|
|
.C (tx_lclk_io),
|
|
.CE (1'b1),
|
|
.D1 (tx_frame16),
|
|
.D2 (tx_frame16),
|
|
.R (1'b0), //reset
|
|
.S (1'b0)
|
|
);
|
|
|
|
//LCLK
|
|
ODDR #(.DDR_CLK_EDGE ("SAME_EDGE"))
|
|
oddr_lclk (
|
|
.Q (txo_lclk90),
|
|
.C (tx_lclk90),
|
|
.CE (1'b1),
|
|
.D1 (1'b1),
|
|
.D2 (1'b0),
|
|
.R (1'b0),//should be no reason to reset clock, static input
|
|
.S (1'b0)
|
|
);
|
|
|
|
//Buffer drivers
|
|
OBUFDS obufds_data[7:0] (
|
|
.O (txo_data_p[7:0]),
|
|
.OB (txo_data_n[7:0]),
|
|
.I (txo_data_ddr[7:0])
|
|
);
|
|
|
|
OBUFDS obufds_frame ( .O (txo_frame_p),
|
|
.OB (txo_frame_n),
|
|
.I (txo_frame_ddr)
|
|
);
|
|
|
|
OBUFDS obufds_lclk ( .O (txo_lclk_p),
|
|
.OB (txo_lclk_n),
|
|
.I (txo_lclk90)
|
|
);
|
|
//Wait inputs
|
|
generate
|
|
if(ETYPE==1)
|
|
begin
|
|
assign tx_wr_wait_async = txi_wr_wait_p;
|
|
end
|
|
else if (ETYPE==0)
|
|
begin
|
|
IBUFDS
|
|
#(.DIFF_TERM ("TRUE"), // Differential termination
|
|
.IOSTANDARD (IOSTD_ELINK))
|
|
ibufds_wrwait
|
|
(.I (txi_wr_wait_p),
|
|
.IB (txi_wr_wait_n),
|
|
.O (tx_wr_wait_async));
|
|
end
|
|
endgenerate
|
|
|
|
//TODO: Come up with cleaner defines for this
|
|
`ifdef TODO
|
|
IBUFDS
|
|
#(.DIFF_TERM ("TRUE"), // Differential termination
|
|
.IOSTANDARD (IOSTD_ELINK))
|
|
ibufds_rdwait
|
|
(.I (txi_rd_wait_p),
|
|
.IB (txi_rd_wait_n),
|
|
.O (tx_rd_wait_async));
|
|
`else
|
|
//On Parallella this signal comes in single-ended
|
|
assign tx_rd_wait_async = txi_rd_wait_p;
|
|
`endif
|
|
|
|
endmodule // etx_io
|
|
// Local Variables:
|
|
// verilog-library-directories:("." "../../emesh/hdl" "../../common/hdl")
|
|
// End:
|
|
|