1
0
mirror of https://github.com/corundum/corundum.git synced 2025-01-16 08:12:53 +08:00

merged changes in pcie

This commit is contained in:
Alex Forencich 2020-03-08 15:25:28 -07:00
commit 24eae58e6c
5 changed files with 221 additions and 124 deletions

View File

@ -314,11 +314,13 @@ reg finish_tag;
reg [3:0] first_be;
reg [3:0] last_be;
reg [10:0] dword_count;
reg req_last_tlp;
reg [PCIE_ADDR_WIDTH-1:0] req_pcie_addr;
reg [PCIE_ADDR_WIDTH-1:0] req_pcie_addr_reg = {PCIE_ADDR_WIDTH{1'b0}}, req_pcie_addr_next;
reg [RAM_ADDR_WIDTH-1:0] req_addr_reg = {RAM_ADDR_WIDTH{1'b0}}, req_addr_next;
reg [LEN_WIDTH-1:0] req_op_count_reg = {LEN_WIDTH{1'b0}}, req_op_count_next;
reg [LEN_WIDTH-1:0] req_tlp_count_reg = {LEN_WIDTH{1'b0}}, req_tlp_count_next;
reg [12:0] req_tlp_count_reg = 13'd0, req_tlp_count_next;
reg [11:0] lower_addr_reg = 12'd0, lower_addr_next;
reg [12:0] byte_count_reg = 13'd0, byte_count_next;
@ -585,10 +587,18 @@ always @* begin
// crosses 4k boundary
req_tlp_count_next = 13'h1000 - req_pcie_addr_reg[11:0];
dword_count = 11'h400 - req_pcie_addr_reg[11:2];
req_last_tlp = req_pcie_addr_plus_op_count[11:0] == 0;
// optimized req_pcie_addr = req_addr_reg + req_tlp_count_next
req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]+1;
req_pcie_addr[11:0] = 12'd0;
end else begin
// does not cross 4k boundary, send one TLP
req_tlp_count_next = req_op_count_reg;
dword_count = (req_op_count_reg + req_pcie_addr_reg[1:0] + 3) >> 2;
req_last_tlp = 1'b1;
// optimized req_pcie_addr = req_addr_reg + req_tlp_count_next
req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12];
req_pcie_addr[11:0] = req_pcie_addr_reg[11:0] + req_op_count_reg;
end
end else begin
// packet larger than max read request size
@ -596,10 +606,18 @@ always @* begin
// crosses 4k boundary
req_tlp_count_next = 13'h1000 - req_pcie_addr_reg[11:0];
dword_count = 11'h400 - req_pcie_addr_reg[11:2];
req_last_tlp = 1'b0;
// optimized req_pcie_addr = req_addr_reg + req_tlp_count_next
req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]+1;
req_pcie_addr[11:0] = 12'd0;
end else begin
// does not cross 4k boundary, send one TLP
req_tlp_count_next = {max_read_request_size_dw_reg, 2'b00} - req_pcie_addr_reg[1:0];
dword_count = max_read_request_size_dw_reg;
req_last_tlp = 1'b0;
// optimized req_pcie_addr = req_addr_reg + req_tlp_count_next
req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12];
req_pcie_addr[11:0] = {req_pcie_addr_reg[11:2] + max_read_request_size_dw_reg, 2'b00};
end
end
@ -647,7 +665,7 @@ always @* begin
inc_active_tx = 1'b1;
if (AXIS_PCIE_DATA_WIDTH > 64) begin
req_pcie_addr_next = req_pcie_addr_reg + req_tlp_count_next;
req_pcie_addr_next = req_pcie_addr;
req_addr_next = req_addr_reg + req_tlp_count_next;
req_op_count_next = req_op_count_reg - req_tlp_count_next;
@ -655,14 +673,14 @@ always @* begin
tlp_cmd_addr_next = req_addr_reg;
tlp_cmd_pcie_tag_next = new_tag;
tlp_cmd_last_next = req_op_count_next == 0;
tlp_cmd_last_next = req_last_tlp;
tlp_cmd_valid_next = 1'b1;
op_table_read_start_ptr = tlp_cmd_op_tag_reg;
op_table_read_start_commit = req_op_count_next == 0;
op_table_read_start_commit = req_last_tlp;
op_table_read_start_en = 1'b1;
if (req_op_count_next != 0) begin
if (!req_last_tlp) begin
req_state_next = REQ_STATE_START;
end else begin
s_axis_read_desc_ready_next = 1'b0;
@ -677,7 +695,7 @@ always @* begin
end
REQ_STATE_HEADER: begin
if (m_axis_rq_tready_int_reg && !tlp_cmd_valid_reg && new_tag_valid) begin
req_pcie_addr_next = req_pcie_addr_reg + req_tlp_count_next;
req_pcie_addr_next = req_pcie_addr;
req_addr_next = req_addr_reg + req_tlp_count_next;
req_op_count_next = req_op_count_reg - req_tlp_count_next;
@ -698,14 +716,14 @@ always @* begin
tlp_cmd_addr_next = req_addr_reg;
tlp_cmd_pcie_tag_next = new_tag;
tlp_cmd_last_next = req_op_count_next == 0;
tlp_cmd_last_next = req_last_tlp;
tlp_cmd_valid_next = 1'b1;
op_table_read_start_ptr = tlp_cmd_op_tag_reg;
op_table_read_start_commit = req_op_count_next == 0;
op_table_read_start_commit = req_last_tlp;
op_table_read_start_en = 1'b1;
if (req_op_count_next != 0) begin
if (!req_last_tlp) begin
req_state_next = REQ_STATE_START;
end else begin
s_axis_read_desc_ready_next = 1'b0;
@ -843,20 +861,19 @@ always @* begin
// cycle_byte_count_next = op_count_next;
// end
start_offset_next = addr_next;
end_offset_next = start_offset_next+cycle_byte_count_next-1;
ram_wrap_next = {1'b0, start_offset_next}+cycle_byte_count_next > 2**RAM_OFFSET_WIDTH;
{ram_wrap_next, end_offset_next} = start_offset_next+cycle_byte_count_next-1;
ram_mask_0_next = {SEG_COUNT{1'b1}} << (start_offset_next >> $clog2(SEG_BE_WIDTH));
ram_mask_1_next = {SEG_COUNT{1'b1}} >> (SEG_COUNT-1-(end_offset_next >> $clog2(SEG_BE_WIDTH)));
if (!ram_wrap_next) begin
ram_mask_next = ram_mask_0_next & ram_mask_1_next;
ram_mask_0_next = ram_mask_0_next & ram_mask_1_next;
ram_mask_1_next = 0;
end else begin
ram_mask_next = ram_mask_0_next | ram_mask_1_next;
end
ram_mask_next = ram_mask_0_next | ram_mask_1_next;
addr_delay_next = addr_next;
addr_next = addr_next + cycle_byte_count_next;
op_count_next = op_count_next - cycle_byte_count_next;
@ -979,20 +996,19 @@ always @* begin
cycle_byte_count_next = op_count_next;
end
start_offset_next = addr_next;
end_offset_next = start_offset_next+cycle_byte_count_next-1;
ram_wrap_next = {1'b0, start_offset_next}+cycle_byte_count_next > 2**RAM_OFFSET_WIDTH;
{ram_wrap_next, end_offset_next} = start_offset_next+cycle_byte_count_next-1;
ram_mask_0_next = {SEG_COUNT{1'b1}} << (start_offset_next >> $clog2(SEG_BE_WIDTH));
ram_mask_1_next = {SEG_COUNT{1'b1}} >> (SEG_COUNT-1-(end_offset_next >> $clog2(SEG_BE_WIDTH)));
if (!ram_wrap_next) begin
ram_mask_next = ram_mask_0_next & ram_mask_1_next;
ram_mask_0_next = ram_mask_0_next & ram_mask_1_next;
ram_mask_1_next = 0;
end else begin
ram_mask_next = ram_mask_0_next | ram_mask_1_next;
end
ram_mask_next = ram_mask_0_next | ram_mask_1_next;
addr_delay_next = addr_next;
addr_next = addr_next + cycle_byte_count_next;
op_count_next = op_count_next - cycle_byte_count_next;
@ -1072,20 +1088,19 @@ always @* begin
cycle_byte_count_next = op_count_next;
end
start_offset_next = addr_next;
end_offset_next = start_offset_next+cycle_byte_count_next-1;
ram_wrap_next = {1'b0, start_offset_next}+cycle_byte_count_next > 2**RAM_OFFSET_WIDTH;
{ram_wrap_next, end_offset_next} = start_offset_next+cycle_byte_count_next-1;
ram_mask_0_next = {SEG_COUNT{1'b1}} << (start_offset_next >> $clog2(SEG_BE_WIDTH));
ram_mask_1_next = {SEG_COUNT{1'b1}} >> (SEG_COUNT-1-(end_offset_next >> $clog2(SEG_BE_WIDTH)));
if (!ram_wrap_next) begin
ram_mask_next = ram_mask_0_next & ram_mask_1_next;
ram_mask_0_next = ram_mask_0_next & ram_mask_1_next;
ram_mask_1_next = 0;
end else begin
ram_mask_next = ram_mask_0_next | ram_mask_1_next;
end
ram_mask_next = ram_mask_0_next | ram_mask_1_next;
addr_delay_next = addr_reg;
addr_next = addr_reg + cycle_byte_count_next;
op_count_next = op_count_reg - cycle_byte_count_next;

View File

@ -293,7 +293,7 @@ reg [RAM_SEL_WIDTH-1:0] ram_sel_reg = {RAM_SEL_WIDTH{1'b0}}, ram_sel_next;
reg [RAM_ADDR_WIDTH-1:0] ram_addr_reg = {RAM_ADDR_WIDTH{1'b0}}, ram_addr_next;
reg [LEN_WIDTH-1:0] op_count_reg = {LEN_WIDTH{1'b0}}, op_count_next;
reg [LEN_WIDTH-1:0] tr_count_reg = {LEN_WIDTH{1'b0}}, tr_count_next;
reg [LEN_WIDTH-1:0] tlp_count_reg = {LEN_WIDTH{1'b0}}, tlp_count_next;
reg [12:0] tlp_count_reg = 13'd0, tlp_count_next;
reg [PCIE_ADDR_WIDTH-1:0] read_pcie_addr_reg = {PCIE_ADDR_WIDTH{1'b0}}, read_pcie_addr_next;
reg [RAM_SEL_WIDTH-1:0] read_ram_sel_reg = {RAM_SEL_WIDTH{1'b0}}, read_ram_sel_next;
@ -531,7 +531,7 @@ always @* begin
end else begin
op_table_start_offset = pcie_addr_reg[1:0]-ram_addr_reg[RAM_OFFSET_WIDTH-1:0];
end
op_table_start_last = op_count_next == 0;
op_table_start_last = op_count_reg == tlp_count_next;
op_table_start_tag = tlp_cmd_tag_reg;
op_table_start_en = 1'b1;
@ -556,7 +556,7 @@ always @* begin
end
end
if (op_count_next != 0) begin
if (!op_table_start_last) begin
req_state_next = REQ_STATE_START;
end else begin
s_axis_write_desc_ready_next = !op_table_active[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] && ($unsigned(op_table_start_ptr_reg - op_table_finish_ptr_reg) < 2**OP_TAG_WIDTH) && enable;
@ -615,20 +615,19 @@ always @* begin
cycle_byte_count_next = read_len_next;
end
start_offset_next = read_ram_addr_next;
end_offset_next = start_offset_next+cycle_byte_count_next-1;
ram_wrap_next = {1'b0, start_offset_next}+cycle_byte_count_next > 2**RAM_OFFSET_WIDTH;
{ram_wrap_next, end_offset_next} = start_offset_next+cycle_byte_count_next-1;
read_ram_mask_0_next = {SEG_COUNT{1'b1}} << (start_offset_next >> $clog2(SEG_BE_WIDTH));
read_ram_mask_1_next = {SEG_COUNT{1'b1}} >> (SEG_COUNT-1-(end_offset_next >> $clog2(SEG_BE_WIDTH)));
if (!ram_wrap_next) begin
read_ram_mask_next = read_ram_mask_0_next & read_ram_mask_1_next;
read_ram_mask_0_next = read_ram_mask_0_next & read_ram_mask_1_next;
read_ram_mask_1_next = 0;
end else begin
read_ram_mask_next = read_ram_mask_0_next | read_ram_mask_1_next;
end
read_ram_mask_next = read_ram_mask_0_next | read_ram_mask_1_next;
if (read_cmd_valid_reg) begin
read_cmd_ready = 1'b1;
read_state_next = READ_STATE_READ;
@ -669,20 +668,19 @@ always @* begin
cycle_byte_count_next = read_len_next;
end
start_offset_next = read_ram_addr_next;
end_offset_next = start_offset_next+cycle_byte_count_next-1;
ram_wrap_next = {1'b0, start_offset_next}+cycle_byte_count_next > 2**RAM_OFFSET_WIDTH;
{ram_wrap_next, end_offset_next} = start_offset_next+cycle_byte_count_next-1;
read_ram_mask_0_next = {SEG_COUNT{1'b1}} << (start_offset_next >> $clog2(SEG_BE_WIDTH));
read_ram_mask_1_next = {SEG_COUNT{1'b1}} >> (SEG_COUNT-1-(end_offset_next >> $clog2(SEG_BE_WIDTH)));
if (!ram_wrap_next) begin
read_ram_mask_next = read_ram_mask_0_next & read_ram_mask_1_next;
read_ram_mask_0_next = read_ram_mask_0_next & read_ram_mask_1_next;
read_ram_mask_1_next = 0;
end else begin
read_ram_mask_next = read_ram_mask_0_next | read_ram_mask_1_next;
end
read_ram_mask_next = read_ram_mask_0_next | read_ram_mask_1_next;
if (!read_last_cycle_reg) begin
read_state_next = READ_STATE_READ;
end else if (read_cmd_valid_reg) begin
@ -702,20 +700,19 @@ always @* begin
cycle_byte_count_next = read_len_next;
end
start_offset_next = read_ram_addr_next;
end_offset_next = start_offset_next+cycle_byte_count_next-1;
ram_wrap_next = {1'b0, start_offset_next}+cycle_byte_count_next > 2**RAM_OFFSET_WIDTH;
{ram_wrap_next, end_offset_next} = start_offset_next+cycle_byte_count_next-1;
read_ram_mask_0_next = {SEG_COUNT{1'b1}} << (start_offset_next >> $clog2(SEG_BE_WIDTH));
read_ram_mask_1_next = {SEG_COUNT{1'b1}} >> (SEG_COUNT-1-(end_offset_next >> $clog2(SEG_BE_WIDTH)));
if (!ram_wrap_next) begin
read_ram_mask_next = read_ram_mask_0_next & read_ram_mask_1_next;
read_ram_mask_0_next = read_ram_mask_0_next & read_ram_mask_1_next;
read_ram_mask_1_next = 0;
end else begin
read_ram_mask_next = read_ram_mask_0_next | read_ram_mask_1_next;
end
read_ram_mask_next = read_ram_mask_0_next | read_ram_mask_1_next;
read_cmd_ready = 1'b1;
read_state_next = READ_STATE_READ;

View File

@ -324,11 +324,13 @@ reg finish_tag;
reg [3:0] first_be;
reg [3:0] last_be;
reg [10:0] dword_count;
reg req_last_tlp;
reg [PCIE_ADDR_WIDTH-1:0] req_pcie_addr;
reg [PCIE_ADDR_WIDTH-1:0] req_pcie_addr_reg = {PCIE_ADDR_WIDTH{1'b0}}, req_pcie_addr_next;
reg [AXI_ADDR_WIDTH-1:0] req_axi_addr_reg = {AXI_ADDR_WIDTH{1'b0}}, req_axi_addr_next;
reg [LEN_WIDTH-1:0] req_op_count_reg = {LEN_WIDTH{1'b0}}, req_op_count_next;
reg [LEN_WIDTH-1:0] req_tlp_count_reg = {LEN_WIDTH{1'b0}}, req_tlp_count_next;
reg [12:0] req_tlp_count_reg = 13'd0, req_tlp_count_next;
reg [11:0] lower_addr_reg = 12'd0, lower_addr_next;
reg [12:0] byte_count_reg = 13'd0, byte_count_next;
@ -627,10 +629,18 @@ always @* begin
// crosses 4k boundary
req_tlp_count_next = 13'h1000 - req_pcie_addr_reg[11:0];
dword_count = 11'h400 - req_pcie_addr_reg[11:2];
req_last_tlp = req_pcie_addr_plus_op_count[11:0] == 0;
// optimized req_pcie_addr = req_addr_reg + req_tlp_count_next
req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]+1;
req_pcie_addr[11:0] = 12'd0;
end else begin
// does not cross 4k boundary, send one TLP
req_tlp_count_next = req_op_count_reg;
dword_count = (req_op_count_reg + req_pcie_addr_reg[1:0] + 3) >> 2;
req_last_tlp = 1'b1;
// optimized req_pcie_addr = req_addr_reg + req_tlp_count_next
req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12];
req_pcie_addr[11:0] = req_pcie_addr_reg[11:0] + req_op_count_reg;
end
end else begin
// packet larger than max read request size
@ -638,10 +648,18 @@ always @* begin
// crosses 4k boundary
req_tlp_count_next = 13'h1000 - req_pcie_addr_reg[11:0];
dword_count = 11'h400 - req_pcie_addr_reg[11:2];
req_last_tlp = 1'b0;
// optimized req_pcie_addr = req_addr_reg + req_tlp_count_next
req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]+1;
req_pcie_addr[11:0] = 12'd0;
end else begin
// does not cross 4k boundary, send one TLP
req_tlp_count_next = {max_read_request_size_dw_reg, 2'b00}-req_pcie_addr_reg[1:0];
dword_count = max_read_request_size_dw_reg;
req_last_tlp = 1'b0;
// optimized req_pcie_addr = req_addr_reg + req_tlp_count_next
req_pcie_addr[PCIE_ADDR_WIDTH-1:12] = req_pcie_addr_reg[PCIE_ADDR_WIDTH-1:12];
req_pcie_addr[11:0] = {req_pcie_addr_reg[11:2] + max_read_request_size_dw_reg, 2'b00};
end
end
@ -689,7 +707,7 @@ always @* begin
inc_active_tx = 1'b1;
if (AXIS_PCIE_DATA_WIDTH > 64) begin
req_pcie_addr_next = req_pcie_addr_reg + req_tlp_count_next;
req_pcie_addr_next = req_pcie_addr;
req_axi_addr_next = req_axi_addr_reg + req_tlp_count_next;
req_op_count_next = req_op_count_reg - req_tlp_count_next;
@ -697,14 +715,14 @@ always @* begin
tlp_cmd_addr_next = req_axi_addr_reg;
tlp_cmd_pcie_tag_next = new_tag;
tlp_cmd_last_next = req_op_count_next == 0;
tlp_cmd_last_next = req_last_tlp;
tlp_cmd_valid_next = 1'b1;
op_table_read_start_ptr = tlp_cmd_op_tag_reg;
op_table_read_start_commit = req_op_count_next == 0;
op_table_read_start_commit = req_last_tlp;
op_table_read_start_en = 1'b1;
if (req_op_count_next != 0) begin
if (!req_last_tlp) begin
req_state_next = REQ_STATE_START;
end else begin
s_axis_read_desc_ready_next = 1'b0;
@ -719,7 +737,7 @@ always @* begin
end
REQ_STATE_HEADER: begin
if (m_axis_rq_tready_int_reg && !tlp_cmd_valid_reg && new_tag_valid) begin
req_pcie_addr_next = req_pcie_addr_reg + req_tlp_count_next;
req_pcie_addr_next = req_pcie_addr;
req_axi_addr_next = req_axi_addr_reg + req_tlp_count_next;
req_op_count_next = req_op_count_reg - req_tlp_count_next;
@ -740,14 +758,14 @@ always @* begin
tlp_cmd_addr_next = req_axi_addr_reg;
tlp_cmd_pcie_tag_next = new_tag;
tlp_cmd_last_next = req_op_count_next == 0;
tlp_cmd_last_next = req_last_tlp;
tlp_cmd_valid_next = 1'b1;
op_table_read_start_ptr = tlp_cmd_op_tag_reg;
op_table_read_start_commit = req_op_count_next == 0;
op_table_read_start_commit = req_last_tlp;
op_table_read_start_en = 1'b1;
if (req_op_count_next != 0) begin
if (!req_last_tlp) begin
req_state_next = REQ_STATE_START;
end else begin
s_axis_read_desc_ready_next = 1'b0;

View File

@ -276,11 +276,20 @@ reg [2:0] tlp_state_reg = TLP_STATE_IDLE, tlp_state_next;
// datapath control signals
reg transfer_in_save;
reg [12:0] tlp_count;
reg [10:0] dword_count;
reg last_tlp;
reg [PCIE_ADDR_WIDTH-1:0] pcie_addr;
reg [12:0] tr_count;
reg last_tr;
reg [AXI_ADDR_WIDTH-1:0] axi_addr;
reg [PCIE_ADDR_WIDTH-1:0] pcie_addr_reg = {PCIE_ADDR_WIDTH{1'b0}}, pcie_addr_next;
reg [AXI_ADDR_WIDTH-1:0] axi_addr_reg = {AXI_ADDR_WIDTH{1'b0}}, axi_addr_next;
reg [LEN_WIDTH-1:0] op_count_reg = {LEN_WIDTH{1'b0}}, op_count_next;
reg [LEN_WIDTH-1:0] tr_count_reg = {LEN_WIDTH{1'b0}}, tr_count_next;
reg [LEN_WIDTH-1:0] tlp_count_reg = {LEN_WIDTH{1'b0}}, tlp_count_next;
reg [12:0] tlp_count_reg = 13'd0, tlp_count_next;
reg [PCIE_ADDR_WIDTH-1:0] tlp_addr_reg = {PCIE_ADDR_WIDTH{1'b0}}, tlp_addr_next;
reg [11:0] tlp_len_reg = 12'd0, tlp_len_next;
@ -427,15 +436,102 @@ always @* begin
tlp_cmd_tag_next = tlp_cmd_tag_reg;
tlp_cmd_last_next = tlp_cmd_last_reg;
// TLP size computation
if (op_count_reg <= {max_payload_size_dw_reg, 2'b00}-pcie_addr_reg[1:0]) begin
// packet smaller than max read request size
if (pcie_addr_reg[12] != pcie_addr_plus_op_count[12]) begin
// crosses 4k boundary
tlp_count = 13'h1000 - pcie_addr_reg[11:0];
dword_count = 11'h400 - pcie_addr_reg[11:2];
last_tlp = pcie_addr_plus_op_count[11:0] == 0;
// optimized pcie_addr = pcie_addr_reg + tlp_count
pcie_addr[PCIE_ADDR_WIDTH-1:12] = pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]+1;
pcie_addr[11:0] = 12'd0;
end else begin
// does not cross 4k boundary, send one TLP
tlp_count = op_count_reg;
dword_count = (op_count_reg + pcie_addr_reg[1:0] + 3) >> 2;
last_tlp = 1'b1;
// optimized pcie_addr = pcie_addr_reg + tlp_count
pcie_addr[PCIE_ADDR_WIDTH-1:12] = pcie_addr_reg[PCIE_ADDR_WIDTH-1:12];
pcie_addr[11:0] = pcie_addr_reg[11:0] + op_count_reg;
end
end else begin
// packet larger than max read request size
if (pcie_addr_reg[12] != pcie_addr_plus_max_payload[12]) begin
// crosses 4k boundary
tlp_count = 13'h1000 - pcie_addr_reg[11:0];
dword_count = 11'h400 - pcie_addr_reg[11:2];
last_tlp = 1'b0;
// optimized pcie_addr = pcie_addr_reg + tlp_count
pcie_addr[PCIE_ADDR_WIDTH-1:12] = pcie_addr_reg[PCIE_ADDR_WIDTH-1:12]+1;
pcie_addr[11:0] = 12'd0;
end else begin
// does not cross 4k boundary, send one TLP
tlp_count = {max_payload_size_dw_reg, 2'b00}-pcie_addr_reg[1:0];
dword_count = max_payload_size_dw_reg;
last_tlp = 1'b0;
// optimized pcie_addr = pcie_addr_reg + tlp_count
pcie_addr[PCIE_ADDR_WIDTH-1:12] = pcie_addr_reg[PCIE_ADDR_WIDTH-1:12];
pcie_addr[11:0] = {pcie_addr_reg[11:2] + max_payload_size_dw_reg, 2'b00};
end
end
// AXI transfer size computation
if (tlp_count_reg <= AXI_MAX_BURST_SIZE-axi_addr_reg[OFFSET_WIDTH-1:0] || AXI_MAX_BURST_SIZE >= 4096) begin
// packet smaller than max read request size
if (axi_addr_reg[12] != axi_addr_plus_tlp_count[12]) begin
// crosses 4k boundary
tr_count = 13'h1000 - axi_addr_reg[11:0];
last_tr = axi_addr_plus_tlp_count[11:0] == 0;
// optimized axi_addr = axi_addr_reg + tr_count
axi_addr[AXI_ADDR_WIDTH-1:12] = axi_addr_reg[AXI_ADDR_WIDTH-1:12]+1;
axi_addr[11:0] = 12'd0;
end else begin
// does not cross 4k boundary, send one TLP
tr_count = tlp_count_reg;
last_tr = 1'b1;
// optimized axi_addr = axi_addr_reg + tr_count
axi_addr[AXI_ADDR_WIDTH-1:12] = axi_addr_reg[AXI_ADDR_WIDTH-1:12];
axi_addr[11:0] = axi_addr_reg[11:0] + tlp_count_reg;
end
end else begin
// packet larger than max read request size
if (axi_addr_reg[12] != axi_addr_plus_max_burst[12]) begin
// crosses 4k boundary
tr_count = 13'h1000 - axi_addr_reg[11:0];
last_tr = 1'b0;
// optimized axi_addr = axi_addr_reg + tr_count
axi_addr[AXI_ADDR_WIDTH-1:12] = axi_addr_reg[AXI_ADDR_WIDTH-1:12]+1;
axi_addr[11:0] = 12'd0;
end else begin
// does not cross 4k boundary, send one TLP
tr_count = AXI_MAX_BURST_SIZE-axi_addr_reg[1:0];
last_tr = 1'b0;
// optimized axi_addr = axi_addr_reg + tr_count
axi_addr[AXI_ADDR_WIDTH-1:12] = axi_addr_reg[AXI_ADDR_WIDTH-1:12];
axi_addr[11:0] = {axi_addr_reg[11:2], 2'b00} + AXI_MAX_BURST_SIZE;
end
end
op_table_start_pcie_addr = pcie_addr_reg;
op_table_start_len = 0;
op_table_start_dword_len = 0;
op_table_start_input_cycle_count = 0;
op_table_start_output_cycle_count = 0;
op_table_start_offset = 0;
op_table_start_bubble_cycle = 0;
op_table_start_len = tlp_count;
op_table_start_dword_len = dword_count;
op_table_start_input_cycle_count = (tlp_count + axi_addr_reg[OFFSET_WIDTH-1:0] - 1) >> AXI_BURST_SIZE;
if (AXIS_PCIE_DATA_WIDTH >= 256) begin
op_table_start_output_cycle_count = (tlp_count + 16+pcie_addr_reg[1:0] - 1) >> AXI_BURST_SIZE;
end else begin
op_table_start_output_cycle_count = (tlp_count + pcie_addr_reg[1:0] - 1) >> AXI_BURST_SIZE;
end
if (AXIS_PCIE_DATA_WIDTH >= 256) begin
op_table_start_offset = 16+pcie_addr_reg[1:0]-axi_addr_reg[OFFSET_WIDTH-1:0];
op_table_start_bubble_cycle = axi_addr_reg[OFFSET_WIDTH-1:0] > 16+pcie_addr_reg[1:0];
end else begin
op_table_start_offset = pcie_addr_reg[1:0]-axi_addr_reg[OFFSET_WIDTH-1:0];
op_table_start_bubble_cycle = axi_addr_reg[OFFSET_WIDTH-1:0] > pcie_addr_reg[1:0];
end
op_table_start_tag = tlp_cmd_tag_reg;
op_table_start_last = 0;
op_table_start_last = last_tlp;
op_table_start_en = 1'b0;
// TLP segmentation and AXI read request generation
@ -444,11 +540,12 @@ always @* begin
// idle state, wait for incoming descriptor
s_axis_write_desc_ready_next = !op_table_active[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] && ($unsigned(op_table_start_ptr_reg - op_table_finish_ptr_reg) < 2**OP_TAG_WIDTH) && enable;
pcie_addr_next = s_axis_write_desc_pcie_addr;
axi_addr_next = s_axis_write_desc_axi_addr;
op_count_next = s_axis_write_desc_len;
if (s_axis_write_desc_ready & s_axis_write_desc_valid) begin
s_axis_write_desc_ready_next = 1'b0;
pcie_addr_next = s_axis_write_desc_pcie_addr;
axi_addr_next = s_axis_write_desc_axi_addr;
op_count_next = s_axis_write_desc_len;
tlp_cmd_tag_next = s_axis_write_desc_tag;
axi_state_next = AXI_STATE_START;
end else begin
@ -457,51 +554,33 @@ always @* begin
end
AXI_STATE_START: begin
// start state, compute TLP length
tlp_count_next = tlp_count;
op_table_start_pcie_addr = pcie_addr_reg;
op_table_start_len = tlp_count;
op_table_start_dword_len = dword_count;
op_table_start_input_cycle_count = (tlp_count + axi_addr_reg[OFFSET_WIDTH-1:0] - 1) >> AXI_BURST_SIZE;
if (AXIS_PCIE_DATA_WIDTH >= 256) begin
op_table_start_output_cycle_count = (tlp_count + 16+pcie_addr_reg[1:0] - 1) >> AXI_BURST_SIZE;
end else begin
op_table_start_output_cycle_count = (tlp_count + pcie_addr_reg[1:0] - 1) >> AXI_BURST_SIZE;
end
if (AXIS_PCIE_DATA_WIDTH >= 256) begin
op_table_start_offset = 16+pcie_addr_reg[1:0]-axi_addr_reg[OFFSET_WIDTH-1:0];
op_table_start_bubble_cycle = axi_addr_reg[OFFSET_WIDTH-1:0] > 16+pcie_addr_reg[1:0];
end else begin
op_table_start_offset = pcie_addr_reg[1:0]-axi_addr_reg[OFFSET_WIDTH-1:0];
op_table_start_bubble_cycle = axi_addr_reg[OFFSET_WIDTH-1:0] > pcie_addr_reg[1:0];
end
op_table_start_tag = tlp_cmd_tag_reg;
op_table_start_last = last_tlp;
if (!op_table_active[op_table_start_ptr_reg[OP_TAG_WIDTH-1:0]] && ($unsigned(op_table_start_ptr_reg - op_table_finish_ptr_reg) < 2**OP_TAG_WIDTH)) begin
if (op_count_reg <= {max_payload_size_dw_reg, 2'b00}-pcie_addr_reg[1:0]) begin
// packet smaller than max payload size
if (pcie_addr_reg[12] != pcie_addr_plus_op_count[12]) begin
// crosses 4k boundary
tlp_count_next = 13'h1000 - pcie_addr_reg[11:0];
end else begin
// does not cross 4k boundary, send one TLP
tlp_count_next = op_count_reg;
end
end else begin
// packet larger than max payload size
if (pcie_addr_reg[12] != pcie_addr_plus_max_payload[12]) begin
// crosses 4k boundary
tlp_count_next = 13'h1000 - pcie_addr_reg[11:0];
end else begin
// does not cross 4k boundary, send one TLP
tlp_count_next = {max_payload_size_dw_reg, 2'b00}-pcie_addr_reg[1:0];
end
end
op_table_start_input_cycle_count = (tlp_count_next + axi_addr_reg[OFFSET_WIDTH-1:0] - 1) >> AXI_BURST_SIZE;
if (AXIS_PCIE_DATA_WIDTH >= 256) begin
op_table_start_output_cycle_count = (tlp_count_next + 16+pcie_addr_reg[1:0] - 1) >> AXI_BURST_SIZE;
end else begin
op_table_start_output_cycle_count = (tlp_count_next + pcie_addr_reg[1:0] - 1) >> AXI_BURST_SIZE;
end
pcie_addr_next = pcie_addr_reg + tlp_count_next;
pcie_addr_next = pcie_addr;
op_count_next = op_count_reg - tlp_count_next;
op_table_start_pcie_addr = pcie_addr_reg;
op_table_start_len = tlp_count_next;
op_table_start_dword_len = (tlp_count_next + pcie_addr_reg[1:0] + 3) >> 2;
if (AXIS_PCIE_DATA_WIDTH >= 256) begin
op_table_start_offset = 16+pcie_addr_reg[1:0]-axi_addr_reg[OFFSET_WIDTH-1:0];
op_table_start_bubble_cycle = axi_addr_reg[OFFSET_WIDTH-1:0] > 16+pcie_addr_reg[1:0];
end else begin
op_table_start_offset = pcie_addr_reg[1:0]-axi_addr_reg[OFFSET_WIDTH-1:0];
op_table_start_bubble_cycle = axi_addr_reg[OFFSET_WIDTH-1:0] > pcie_addr_reg[1:0];
end
tlp_cmd_last_next = op_count_next == 0;
op_table_start_last = op_count_next == 0;
tlp_cmd_last_next = last_tlp;
op_table_start_tag = tlp_cmd_tag_reg;
op_table_start_en = 1'b1;
axi_state_next = AXI_STATE_REQ;
@ -512,34 +591,16 @@ always @* begin
AXI_STATE_REQ: begin
// request state, generate AXI read requests
if (!m_axi_arvalid) begin
if (tlp_count_reg <= AXI_MAX_BURST_SIZE-axi_addr_reg[OFFSET_WIDTH-1:0] || AXI_MAX_BURST_SIZE >= 4096) begin
// packet smaller than max burst size
if (axi_addr_reg[12] != axi_addr_plus_tlp_count[12]) begin
// crosses 4k boundary
tr_count_next = 13'h1000 - axi_addr_reg[11:0];
end else begin
// does not cross 4k boundary, send one request
tr_count_next = tlp_count_reg;
end
end else begin
// packet larger than max burst size
if (axi_addr_reg[12] != axi_addr_plus_max_burst[12]) begin
// crosses 4k boundary
tr_count_next = 13'h1000 - axi_addr_reg[11:0];
end else begin
// does not cross 4k boundary, send one request
tr_count_next = AXI_MAX_BURST_SIZE - axi_addr_reg[OFFSET_WIDTH-1:0];
end
end
tr_count_next = tr_count;
m_axi_araddr_next = axi_addr_reg;
m_axi_arlen_next = (tr_count_next + axi_addr_reg[OFFSET_WIDTH-1:0] - 1) >> AXI_BURST_SIZE;
m_axi_arvalid_next = 1;
axi_addr_next = axi_addr_reg + tr_count_next;
axi_addr_next = axi_addr;
tlp_count_next = tlp_count_reg - tr_count_next;
if (tlp_count_next != 0) begin
if (!last_tr) begin
axi_state_next = AXI_STATE_REQ;
end else if (!tlp_cmd_last_reg) begin
axi_state_next = AXI_STATE_START;

View File

@ -4126,12 +4126,18 @@ class RootComplex(Switch):
if self.msi_addr is None:
self.msi_addr, _ = self.alloc_region(4, self.msi_region_read, self.msi_region_write)
if not self.tree:
# device tree missing
return False
ti = self.tree.find_dev(dev)
if not ti:
# device not found
return False
if ti.get_capability_offset(MSI_CAP_ID) is None:
# does not support MSI
return False
if ti.msi_addr is not None and ti.msi_data is not None:
# already configured
return True
msg_ctrl = yield from self.capability_read_dword(dev, MSI_CAP_ID, 0)