Skip to content
Snippets Groups Projects
Commit 90e42fcb authored by Daniel van der Schuur's avatar Daniel van der Schuur
Browse files

-Added Second ss_parallel stage (+ FIFOs and 16b->8b->16b rewiring) to top

 level;
-Added Python script to generate SS selection HEX files. Functionally this
 does no reordering, out=in (for 16b mode).
-Verified in sim.
parent 220bca7e
No related branches found
No related tags found
No related merge requests found
...@@ -9,12 +9,24 @@ synth_top_level_entity = ...@@ -9,12 +9,24 @@ synth_top_level_entity =
quartus_copy_files = quartus_copy_files =
src/quartus/sopc_aartfaac_bn_sdo.sopc . src/quartus/sopc_aartfaac_bn_sdo.sopc .
$SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/ hex $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/udp_sdo_ss.hex hex
$SVN/Aartfaac/trunk/Firmware/modules/rsp_terminal/src/hex/ hex src/hex/ hex
modelsim_copy_files = modelsim_copy_files =
$SVN/Aartfaac/trunk/Firmware/modules/rsp_terminal/src/hex/ hex $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_0.hex hex
$SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/ hex $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_1.hex hex
$SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_2.hex hex
$SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_3.hex hex
$SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_4.hex hex
$SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_5.hex hex
$SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_6.hex hex
$SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_7.hex hex
$SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_8.hex hex
$SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_9.hex hex
$SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_10.hex hex
$SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_11.hex hex
$SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/udp_sdo_ss.hex hex
src/hex/ hex
synth_files = synth_files =
$HDL_BUILD_DIR/unb1/quartus/aartfaac_bn_sdo/sopc_aartfaac_bn_sdo.vhd $HDL_BUILD_DIR/unb1/quartus/aartfaac_bn_sdo/sopc_aartfaac_bn_sdo.vhd
......
###############################################################################
#
# Copyright (C) 2015
# ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/>
# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
#
# This program is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
###############################################################################
from common import *
from mem_init_file import list_to_hex
import pi_ss_parallel
# Purpose:
# . Generate the HEX files for the ss_parallel instance.
# Description:
# . In 16b mode, no extra reordering takes place: dout=din.
# . The ss_parallel instance requires 3 types of hex files (14 in total):
# . Input reorder stage : ss_parallel_sb_16b_reorder_in.hex
# . Main selection stage : ss_parallel_sb_16b_ss_wide_0.hex
# : ss_parallel_sb_16b_ss_wide_1.hex
# . Output reorder stage : ss_parallel_sb_16b_reorder_out.hex
#
# Remark:
NOF_IN = 2
NOF_INTERNALS = 2
NOT_OUT = 2
FRAME_SIZE_IN = 864 # 12*72 or 9*96
FRAME_SIZE_OUT = FRAME_SIZE_IN
HEX_REORDER_IN_MEM_WIDTH = 8 # Actually 1 bit but HEX files require byte boundaries
HEX_REORDER_IN_MEM_DEPTH = 1024 # 864 cycles * 1 bit to encode the 2 inputs on each word = 864 regs
HEX_REORDER_IN_FILE_NAME = "../hex/ss_parallel_sb_16b_reorder_in.hex"
HEX_SS_WIDE_MEM_WIDTH = 10
HEX_SS_WIDE_MEM_DEPTH = 1024 # size = 864 words each (matches output block size) for each of its 2 internal instances
HEX_SS_WIDE_FILE_PREFIX = "../hex/ss_parallel_sb_16b_ss_wide_"
HEX_REORDER_OUT_MEM_WIDTH = 8 # Actually 4 bits but HEX files require byte boundaries
HEX_REORDER_OUT_MEM_DEPTH = 1024 # 864 cycles * 4 bits to encode the 12 inputs on each word = 864 regs
HEX_REORDER_OUT_FILE_NAME = "../hex/ss_parallel_sb_16b_reorder_out.hex"
# This is not a TC but we must provide a TC to create the object
import test_case
dummy_tc = test_case.Testcase('','')
# We won't be using IO either.
import node_io
dummy_io = node_io.NodeIO(dummy_tc.nodeImages, dummy_tc.base_ip)
ss = pi_ss_parallel.PiSsParallel(dummy_tc, dummy_io, NOF_IN, NOF_INTERNALS, NOT_OUT, FRAME_SIZE_IN, FRAME_SIZE_OUT)
# ========================
# Create the input matrix:
# ========================
# stream 0) [ ( 0,0), ( 0,1), .. , ( 0, 70), ( 0,71) ]
# ..
# stream 11) [ (11,0), (11,1), .. , (11, 70), (11,71) ]
din = ss.create_Din()
# =========================
# Create the output matrix:
# =========================
dout = din
# ======================
# Generate the settings:
# ======================
[result, Rin, Dram, Dsel, Rout, Errout] = ss.create_settings(din, dout)
# =====================================================
# Create the selection buffer values from the settings:
# =====================================================
reorder_in_buf = ss.ssReorderIn.create_selection_buf(Rin)
select_buf = flatten(Dsel)
reorder_out_buf = ss.ssReorderOut.create_selection_buf(Rout)
# ================================
# Generate hex file: input reorder
# ================================
list_to_hex(reorder_in_buf, HEX_REORDER_IN_FILE_NAME, HEX_REORDER_IN_MEM_WIDTH, HEX_REORDER_IN_MEM_DEPTH)
# ===========================
# Generate hex files: ss_wide
# ===========================
# First replace the don't cares (-1) with zeroes for list_to_hex (requires integers)
for n,i in enumerate(select_buf):
if i==-1:
select_buf[n]=0
# The created select_buf is a flat list meant to MM write to several instances
# from a certain offset. However, we want to create a HEX file for each
# individual instance, so split the list into 2 (sublist size of 864).
for i, sublist in zip(range(2), split_list(select_buf, 864)):
list_to_hex(sublist, HEX_SS_WIDE_FILE_PREFIX+str(i)+".hex", HEX_SS_WIDE_MEM_WIDTH, HEX_SS_WIDE_MEM_DEPTH)
# ==================================
# Generate hex files: output reorder
# ==================================
# The output reorder list contains 864 words of 4 bits, so each word easily
# fits within 32b words.
list_to_hex(reorder_out_buf, HEX_REORDER_OUT_FILE_NAME, HEX_REORDER_OUT_MEM_WIDTH, HEX_REORDER_OUT_MEM_DEPTH)
...@@ -219,11 +219,20 @@ ARCHITECTURE str OF aartfaac_bn_sdo IS ...@@ -219,11 +219,20 @@ ARCHITECTURE str OF aartfaac_bn_sdo IS
SIGNAL io_rsp_terminal_rsp_snk_in_arr : t_dp_sosi_arr(c_rsp_terminal_nof_lanes-1 DOWNTO 0); SIGNAL io_rsp_terminal_rsp_snk_in_arr : t_dp_sosi_arr(c_rsp_terminal_nof_lanes-1 DOWNTO 0);
SIGNAL io_rsp_terminal_rsp_snk_out_arr : t_dp_siso_arr(c_rsp_terminal_nof_lanes-1 DOWNTO 0); SIGNAL io_rsp_terminal_rsp_snk_out_arr : t_dp_siso_arr(c_rsp_terminal_nof_lanes-1 DOWNTO 0);
SIGNAL io_rsp_terminal_subband_src_out_arr : t_dp_sosi_arr(c_rsp_terminal_nof_lanes-1 DOWNTO 0); SIGNAL io_rsp_terminal_subband_src_out_arr : t_dp_sosi_arr(c_rsp_terminal_nof_lanes-1 DOWNTO 0);
SIGNAL ss_parallel_sp_snk_out_arr : t_dp_siso_arr(c_rsp_terminal_nof_lanes-1 DOWNTO 0) := (OTHERS=>c_dp_siso_rdy); SIGNAL ss_parallel_sp_snk_out_arr : t_dp_siso_arr(c_rsp_terminal_nof_lanes-1 DOWNTO 0) := (OTHERS=>c_dp_siso_rdy);
SIGNAL ss_parallel_sp_snk_in_arr : t_dp_sosi_arr(c_rsp_terminal_nof_lanes-1 DOWNTO 0); SIGNAL ss_parallel_sp_snk_in_arr : t_dp_sosi_arr(c_rsp_terminal_nof_lanes-1 DOWNTO 0);
SIGNAL ss_parallel_sp_src_out_arr : t_dp_sosi_arr(c_nof_offload_streams-1 DOWNTO 0); SIGNAL ss_parallel_sp_src_out_arr : t_dp_sosi_arr(c_nof_offload_streams-1 DOWNTO 0);
SIGNAL dp_fifo_sc_src_out_arr : t_dp_sosi_arr(c_nof_offload_streams-1 DOWNTO 0); SIGNAL ss_parallel_sp_fifo_src_out_arr : t_dp_sosi_arr(c_nof_offload_streams-1 DOWNTO 0);
SIGNAL dp_fifo_sc_src_in_arr : t_dp_siso_arr(c_nof_offload_streams-1 DOWNTO 0); SIGNAL ss_parallel_sp_fifo_src_in_arr : t_dp_siso_arr(c_nof_offload_streams-1 DOWNTO 0);
SIGNAL ss_parallel_sb_snk_out_arr : t_dp_siso_arr(2-1 DOWNTO 0) := (OTHERS=>c_dp_siso_rdy);
SIGNAL ss_parallel_sb_snk_in_arr : t_dp_sosi_arr(2-1 DOWNTO 0);
SIGNAL ss_parallel_sb_src_out_arr : t_dp_sosi_arr(2-1 DOWNTO 0);
SIGNAL ss_parallel_sb_fifo_snk_in_arr : t_dp_sosi_arr(c_nof_offload_streams-1 DOWNTO 0);
SIGNAL ss_parallel_sb_fifo_src_out_arr : t_dp_sosi_arr(c_nof_offload_streams-1 DOWNTO 0);
SIGNAL ss_parallel_sb_fifo_src_in_arr : t_dp_siso_arr(c_nof_offload_streams-1 DOWNTO 0);
SIGNAL aartfaac_bn_sdo_udp_sdo_src_out_arr : t_dp_sosi_arr(c_nof_offload_streams-1 DOWNTO 0); SIGNAL aartfaac_bn_sdo_udp_sdo_src_out_arr : t_dp_sosi_arr(c_nof_offload_streams-1 DOWNTO 0);
SIGNAL aartfaac_bn_sdo_udp_sdo_src_in_arr : t_dp_siso_arr(c_nof_offload_streams-1 DOWNTO 0); SIGNAL aartfaac_bn_sdo_udp_sdo_src_in_arr : t_dp_siso_arr(c_nof_offload_streams-1 DOWNTO 0);
...@@ -435,9 +444,8 @@ BEGIN ...@@ -435,9 +444,8 @@ BEGIN
output_siso_arr => (OTHERS=>c_dp_siso_rdy) output_siso_arr => (OTHERS=>c_dp_siso_rdy)
); );
-- FIFO required as ss_parallel_sp does not have src flow control and dp_offload de-asserts -- FIFO required as ss_parallel_sp does not have src flow control
-- its snk_out.ready. u_dp_fifo_sc_sp : ENTITY dp_lib.dp_fifo_sc
u_dp_fifo_sc : ENTITY dp_lib.dp_fifo_sc
GENERIC MAP ( GENERIC MAP (
g_data_w => c_nof_complex*c_rsp_terminal_subband_dat_w, g_data_w => c_nof_complex*c_rsp_terminal_subband_dat_w,
g_bsn_w => c_dp_stream_bsn_w, g_bsn_w => c_dp_stream_bsn_w,
...@@ -460,8 +468,106 @@ BEGIN ...@@ -460,8 +468,106 @@ BEGIN
snk_out => OPEN, snk_out => OPEN,
snk_in => ss_parallel_sp_src_out_arr(0), snk_in => ss_parallel_sp_src_out_arr(0),
src_in => dp_fifo_sc_src_in_arr(0), src_in => ss_parallel_sp_fifo_src_in_arr(0),
src_out => dp_fifo_sc_src_out_arr(0) src_out => ss_parallel_sp_fifo_src_out_arr(0)
);
-----------------------------------------------------------------------------
-- Subband (SB) reordering for 8b mode:
-- . When in 8b mode, input subbands are no longer grouped together like in 16b mode; they're interleaved.
-- . GPU correlator would then need to de-interleave 8b subbands which would take too many CPU resources.
-- . So we need to group together the 8b subbands as shown as '8b output format' below:
-- 16b input format : [ [(SB0,SP0), .. ,(SB0,SP95)], .. , [(SB8 ,SP0), .. ,(SB8 , SP95)] ] data bits 15..0
-- 8b input format : [ [(SB0,SP0), .. ,(SB0,SP95)], .. , [(SB16,SP0), .. ,(SB16, SP95)] ] data bits 7..0 // interleaved 8b subbands, bad for GPU machine
-- [ [(SB1,SP0), .. ,(SB1,SP95)], .. , [(SB17,SP0), .. ,(SB17, SP95)] ] data bits 15..8 // interleaved 8b subbands, bad for GPU machine
-- 8b output format : [ [(SB0,SP0),..,(SB0,SP94),(SB1,SP0),..,(SB1,SP94)], .. , [(SB16,SP0),..,(SB16,SP94),(SB17,SP0),..,(SB17, SP94)] ] data bits 7..0 // reordered 8b subbands, good for GPU machine
-- [ [(SB0,SP1),..,(SB0,SP95),(SB1,SP1),..,(SB1,SP95)], .. , [(SB16,SP1),..,(SB16,SP95),(SB17,SP1),..,(SB17, SP95)] ] data bits 15..8 // reordered 8b subbands, good for GPU machine
-----------------------------------------------------------------------------
-- Rewire 1*16b to 2*8b
p_connect_sb : PROCESS(ss_parallel_sp_fifo_src_out_arr, ss_parallel_sb_snk_out_arr)
BEGIN
-- ctrl
ss_parallel_sb_snk_in_arr(0) <= ss_parallel_sp_fifo_src_out_arr(0);
ss_parallel_sb_snk_in_arr(1) <= ss_parallel_sp_fifo_src_out_arr(0);
-- flow control in opposite direction
ss_parallel_sp_fifo_src_in_arr(0) <= ss_parallel_sb_snk_out_arr(0);
-- Subband data: even 8b subband indices are located in former 16b real part
ss_parallel_sb_snk_in_arr(0).im(7 DOWNTO 0) <= ss_parallel_sp_fifo_src_out_arr(0).re(15 DOWNTO 8);
ss_parallel_sb_snk_in_arr(0).re(7 DOWNTO 0) <= ss_parallel_sp_fifo_src_out_arr(0).re( 7 DOWNTO 0);
-- subband data: odd 8b subband indices are located in former 16b imaginary part
ss_parallel_sb_snk_in_arr(1).im(7 DOWNTO 0) <= ss_parallel_sp_fifo_src_out_arr(0).im(15 DOWNTO 8);
ss_parallel_sb_snk_in_arr(1).re(7 DOWNTO 0) <= ss_parallel_sp_fifo_src_out_arr(0).im( 7 DOWNTO 0);
END PROCESS;
-- Re-order the two 8b subband streams
u_ss_parallel_sb : ENTITY ss_lib.ss_parallel
GENERIC MAP(
g_nof_inputs => 2,
g_nof_internals => 2,
g_nof_outputs => 2,
g_dsp_data_w => 8,
g_frame_size_in => c_rsp_terminal_nof_subbands_per_lane * c_rsp_terminal_nof_lanes, -- = 864 (9*96),
g_frame_size_out => c_rsp_terminal_nof_subbands_per_lane * c_rsp_terminal_nof_lanes, -- = 864 (9*96)
g_reorder_in_file_name => "hex/ss_parallel_sb_16b_reorder_in.hex",
g_ss_wide_file_prefix => "hex/ss_parallel_sb_16b_ss_wide",
g_reorder_out_file_name => "hex/ss_parallel_sb_16b_reorder_out.hex"
)
PORT MAP (
mm_rst => mm_rst,
mm_clk => mm_clk,
dp_rst => dp_rst,
dp_clk => dp_clk,
ram_ss_reorder_in_mosi => c_mem_mosi_rst,
ram_ss_reorder_in_miso => OPEN,
ram_ss_reorder_out_mosi => c_mem_mosi_rst,
ram_ss_reorder_out_miso => OPEN,
ram_ss_ss_wide_mosi => c_mem_mosi_rst,
ram_ss_ss_wide_miso => OPEN,
input_sosi_arr => ss_parallel_sb_snk_in_arr,
input_siso_arr => ss_parallel_sb_snk_out_arr,
output_sosi_arr => ss_parallel_sb_src_out_arr,
output_siso_arr => (OTHERS=>c_dp_siso_rdy)
);
-- Rewire 2*8b back to 1*16b
p_connect : PROCESS(ss_parallel_sb_src_out_arr)
BEGIN
-- ctrl
ss_parallel_sb_fifo_snk_in_arr(0) <= ss_parallel_sb_src_out_arr(0);
-- Data
ss_parallel_sb_fifo_snk_in_arr(0).im(15 DOWNTO 0) <= ss_parallel_sb_src_out_arr(1).im(7 DOWNTO 0) & ss_parallel_sb_src_out_arr(1).re(7 DOWNTO 0);
ss_parallel_sb_fifo_snk_in_arr(0).re(15 DOWNTO 0) <= ss_parallel_sb_src_out_arr(0).im(7 DOWNTO 0) & ss_parallel_sb_src_out_arr(0).re(7 DOWNTO 0);
END PROCESS;
-- FIFO required as ss_parallel_sb does not have src flow control
u_dp_fifo_sc_sb : ENTITY dp_lib.dp_fifo_sc
GENERIC MAP (
g_data_w => c_nof_complex*c_rsp_terminal_subband_dat_w,
g_bsn_w => c_dp_stream_bsn_w,
g_empty_w => c_dp_stream_empty_w,
g_channel_w => c_dp_stream_channel_w,
g_error_w => c_dp_stream_error_w,
g_use_complex => TRUE,
g_use_bsn => TRUE,
g_use_empty => TRUE,
g_use_channel => TRUE,
g_use_error => TRUE,
g_use_sync => TRUE,
g_use_ctrl => TRUE,
g_fifo_size => 100
)
PORT MAP (
rst => dp_rst,
clk => dp_clk,
snk_out => OPEN,
snk_in => ss_parallel_sb_fifo_snk_in_arr(0),
src_in => ss_parallel_sb_fifo_src_in_arr(0),
src_out => ss_parallel_sb_fifo_src_out_arr(0)
); );
----------------------------------------------------------------------------- -----------------------------------------------------------------------------
...@@ -478,8 +584,8 @@ BEGIN ...@@ -478,8 +584,8 @@ BEGIN
dp_rst => dp_rst, dp_rst => dp_rst,
dp_clk => dp_clk, dp_clk => dp_clk,
snk_in_arr => dp_fifo_sc_src_out_arr, snk_in_arr => ss_parallel_sb_fifo_src_out_arr,
snk_out_arr => dp_fifo_sc_src_in_arr, snk_out_arr => ss_parallel_sb_fifo_src_in_arr,
src_out_arr => aartfaac_bn_sdo_udp_sdo_src_out_arr, src_out_arr => aartfaac_bn_sdo_udp_sdo_src_out_arr,
src_in_arr => aartfaac_bn_sdo_udp_sdo_src_in_arr, src_in_arr => aartfaac_bn_sdo_udp_sdo_src_in_arr,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment