1
0
mirror of https://github.com/aolofsson/oh.git synced 2025-01-17 20:02:53 +08:00
oh/elink/hdl/etx_io.v
Andreas Olofsson 91f8e3db5a Complete redesign of the TX
- After finding the bug in the reference model and wasting countless hours going back and forth with FPGA timing optimization and bug tweaks, I realized that the  design was fundementally broken. The decision to use two clock domains (high speed) and low speed was correct from the beginning. The FPGA is dreadfully slow, (you definitely don't want to do much logic at 300MHz...), but the handoff between tclk and tclk_div4 was too complicated. The puzzle of having to respond to wait quickly, covering the corner cases, and meeting timing was just too ugly.
- The "new" design goes back to the method of using the high speed logic only for doing a "dumb" parallel to serial converter and preparing all the necessary signals in the low speed domain.
- This feel A LOT cleaner and the it already passes basic tests with the chip reference and the loopback after less than 3 hours of redesign work!
- The TX meets timing but there is still some work to do with wait pushback testing.
2015-11-24 01:12:07 -05:00

199 lines
5.0 KiB
Verilog

module etx_io (/*AUTOARG*/
// Outputs
txo_lclk_p, txo_lclk_n, txo_frame_p, txo_frame_n, txo_data_p,
txo_data_n, tx_wr_wait, tx_rd_wait,
// Inputs
tx_lclk_io, tx_lclk_div4, tx_lclk90, txi_wr_wait_p, txi_wr_wait_n,
txi_rd_wait_p, txi_rd_wait_n, tx_data_slow, tx_frame_slow
);
parameter IOSTD_ELINK = "LVDS_25";
parameter PW = 104;
parameter ETYPE = 0; // 0 = parallella
// 1 = ephycard
//###########
//# reset, clocks
//##########
input tx_lclk_io; //fast ODDR
input tx_lclk_div4; //slow clock
input tx_lclk90; //fast 90deg shifted lclk
//###########
//# eLink pins
//###########
output txo_lclk_p, txo_lclk_n; // tx clock output
output txo_frame_p, txo_frame_n; // tx frame signal
output [7:0] txo_data_p, txo_data_n; // tx data (dual data rate)
input txi_wr_wait_p,txi_wr_wait_n; // tx write pushback
input txi_rd_wait_p, txi_rd_wait_n; // tx read pushback
//#############
//# Fabric interface
//#############
input [63:0] tx_data_slow; //data for burst or transaction
input [3:0] tx_frame_slow; //framing signal
output tx_wr_wait;
output tx_rd_wait;
//############
//# REGS
//############
reg [63:0] tx_data;
reg [3:0] tx_frame;
wire [15:0] tx_data16;
wire tx_frame16;
reg tx_wr_wait_sync;
reg tx_rd_wait_sync;
reg tx_wr_wait;
reg tx_rd_wait;
//############
//# WIRES
//############
wire [15:0] tx_data_mux;
wire txo_frame_ddr;
wire txo_lclk90;
wire tx_wr_wait_async;
wire tx_rd_wait_async;
wire [7:0] txo_data_ddr;
//#########################################
//# Synchronizatsion to fast domain
//#########################################
//Find the aligned edge
edgealign edgealign0 (.firstedge (firstedge),
.fastclk (tx_lclk_io),
.slowclk (tx_lclk_div4)
);
//Data shift registers
always @ (posedge tx_lclk_io)
if(firstedge) //"load"
begin
tx_data[63:0] <= tx_data_slow[63:0]; //changes every 4 cycles
tx_frame[3:0] <= tx_frame_slow[3:0];
end
else //"shift"
begin
tx_data[63:0] <= {16'b0,tx_data[63:16]};
tx_frame[3:0] <= {tx_frame[2:0],1'b0};
end
assign tx_data16[15:0] = tx_data[15:0];
assign tx_frame16 = tx_frame[3];
//##############################################
//# Wait signal synchronization
//##############################################
always @ (posedge tx_lclk_io)
begin
tx_wr_wait_sync <= tx_wr_wait_async;
tx_rd_wait_sync <= tx_rd_wait_async;
end
always @ (negedge tx_lclk_div4)
begin
tx_wr_wait <= tx_wr_wait_sync;
tx_rd_wait <= tx_rd_wait_sync;
end
//############################################
//# IO DRIVER STUFF
//############################################
//DATA
genvar i;
generate for(i=0; i<8; i=i+1)
begin : gen_oddr
ODDR #(.DDR_CLK_EDGE ("SAME_EDGE"))
oddr_data (
.Q (txo_data_ddr[i]),
.C (tx_lclk_io),
.CE (1'b1),
.D1 (tx_data16[i+8]),
.D2 (tx_data16[i]),
.R (1'b0),
.S (1'b0)
);
end
endgenerate
//FRAME
ODDR #(.DDR_CLK_EDGE ("SAME_EDGE"))
oddr_frame (
.Q (txo_frame_ddr),
.C (tx_lclk_io),
.CE (1'b1),
.D1 (tx_frame16),
.D2 (tx_frame16),
.R (1'b0), //reset
.S (1'b0)
);
//LCLK
ODDR #(.DDR_CLK_EDGE ("SAME_EDGE"))
oddr_lclk (
.Q (txo_lclk90),
.C (tx_lclk90),
.CE (1'b1),
.D1 (1'b1),
.D2 (1'b0),
.R (1'b0),//should be no reason to reset clock, static input
.S (1'b0)
);
//Buffer drivers
OBUFDS obufds_data[7:0] (
.O (txo_data_p[7:0]),
.OB (txo_data_n[7:0]),
.I (txo_data_ddr[7:0])
);
OBUFDS obufds_frame ( .O (txo_frame_p),
.OB (txo_frame_n),
.I (txo_frame_ddr)
);
OBUFDS obufds_lclk ( .O (txo_lclk_p),
.OB (txo_lclk_n),
.I (txo_lclk90)
);
//Wait inputs
generate
if(ETYPE==1)
begin
assign tx_wr_wait_async = txi_wr_wait_p;
end
else if (ETYPE==0)
begin
IBUFDS
#(.DIFF_TERM ("TRUE"), // Differential termination
.IOSTANDARD (IOSTD_ELINK))
ibufds_wrwait
(.I (txi_wr_wait_p),
.IB (txi_wr_wait_n),
.O (tx_wr_wait_async));
end
endgenerate
//TODO: Come up with cleaner defines for this
`ifdef TODO
IBUFDS
#(.DIFF_TERM ("TRUE"), // Differential termination
.IOSTANDARD (IOSTD_ELINK))
ibufds_rdwait
(.I (txi_rd_wait_p),
.IB (txi_rd_wait_n),
.O (tx_rd_wait_async));
`else
//On Parallella this signal comes in single-ended
assign tx_rd_wait_async = txi_rd_wait_p;
`endif
endmodule // etx_io
// Local Variables:
// verilog-library-directories:("." "../../emesh/hdl" "../../common/hdl")
// End: