diff --git a/applications/aartfaac/designs/aartfaac_bn_sdo/hdllib.cfg b/applications/aartfaac/designs/aartfaac_bn_sdo/hdllib.cfg index c2f8f2c1fd3fcb856618029b454dc94492bbc500..f313015ad42c7e841a8000bd0ffc3f8d43c75b64 100644 --- a/applications/aartfaac/designs/aartfaac_bn_sdo/hdllib.cfg +++ b/applications/aartfaac/designs/aartfaac_bn_sdo/hdllib.cfg @@ -9,12 +9,24 @@ synth_top_level_entity = quartus_copy_files = src/quartus/sopc_aartfaac_bn_sdo.sopc . - $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/ hex - $SVN/Aartfaac/trunk/Firmware/modules/rsp_terminal/src/hex/ hex + $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/udp_sdo_ss.hex hex + src/hex/ hex modelsim_copy_files = - $SVN/Aartfaac/trunk/Firmware/modules/rsp_terminal/src/hex/ hex - $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/ hex + $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_0.hex hex + $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_1.hex hex + $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_2.hex hex + $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_3.hex hex + $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_4.hex hex + $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_5.hex hex + $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_6.hex hex + $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_7.hex hex + $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_8.hex hex + $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_9.hex hex + $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_10.hex hex + $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/subband_dat_11.hex hex + $SVN/Aartfaac/trunk/Firmware/designs/aartfaac_bn_sdo/src/hex/udp_sdo_ss.hex hex + src/hex/ hex synth_files = $HDL_BUILD_DIR/unb1/quartus/aartfaac_bn_sdo/sopc_aartfaac_bn_sdo.vhd diff --git a/applications/aartfaac/designs/aartfaac_bn_sdo/src/python/gen_hex_files_ss_parallel_sb_16b.py b/applications/aartfaac/designs/aartfaac_bn_sdo/src/python/gen_hex_files_ss_parallel_sb_16b.py new file mode 100644 index 0000000000000000000000000000000000000000..4924c7fc0125a8f1357f5d86ff3937b78ba40948 --- /dev/null +++ b/applications/aartfaac/designs/aartfaac_bn_sdo/src/python/gen_hex_files_ss_parallel_sb_16b.py @@ -0,0 +1,115 @@ +############################################################################### +# +# Copyright (C) 2015 +# ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/> +# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands +# +# This program is free software: you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation, either version 3 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. +# +############################################################################### + +from common import * +from mem_init_file import list_to_hex +import pi_ss_parallel + +# Purpose: +# . Generate the HEX files for the ss_parallel instance. +# Description: +# . In 16b mode, no extra reordering takes place: dout=din. +# . The ss_parallel instance requires 3 types of hex files (14 in total): +# . Input reorder stage : ss_parallel_sb_16b_reorder_in.hex +# . Main selection stage : ss_parallel_sb_16b_ss_wide_0.hex +# : ss_parallel_sb_16b_ss_wide_1.hex +# . Output reorder stage : ss_parallel_sb_16b_reorder_out.hex +# +# Remark: + +NOF_IN = 2 +NOF_INTERNALS = 2 +NOT_OUT = 2 +FRAME_SIZE_IN = 864 # 12*72 or 9*96 +FRAME_SIZE_OUT = FRAME_SIZE_IN + +HEX_REORDER_IN_MEM_WIDTH = 8 # Actually 1 bit but HEX files require byte boundaries +HEX_REORDER_IN_MEM_DEPTH = 1024 # 864 cycles * 1 bit to encode the 2 inputs on each word = 864 regs +HEX_REORDER_IN_FILE_NAME = "../hex/ss_parallel_sb_16b_reorder_in.hex" + +HEX_SS_WIDE_MEM_WIDTH = 10 +HEX_SS_WIDE_MEM_DEPTH = 1024 # size = 864 words each (matches output block size) for each of its 2 internal instances +HEX_SS_WIDE_FILE_PREFIX = "../hex/ss_parallel_sb_16b_ss_wide_" + +HEX_REORDER_OUT_MEM_WIDTH = 8 # Actually 4 bits but HEX files require byte boundaries +HEX_REORDER_OUT_MEM_DEPTH = 1024 # 864 cycles * 4 bits to encode the 12 inputs on each word = 864 regs +HEX_REORDER_OUT_FILE_NAME = "../hex/ss_parallel_sb_16b_reorder_out.hex" + +# This is not a TC but we must provide a TC to create the object +import test_case +dummy_tc = test_case.Testcase('','') +# We won't be using IO either. +import node_io +dummy_io = node_io.NodeIO(dummy_tc.nodeImages, dummy_tc.base_ip) + +ss = pi_ss_parallel.PiSsParallel(dummy_tc, dummy_io, NOF_IN, NOF_INTERNALS, NOT_OUT, FRAME_SIZE_IN, FRAME_SIZE_OUT) + +# ======================== +# Create the input matrix: +# ======================== +# stream 0) [ ( 0,0), ( 0,1), .. , ( 0, 70), ( 0,71) ] +# .. +# stream 11) [ (11,0), (11,1), .. , (11, 70), (11,71) ] +din = ss.create_Din() + +# ========================= +# Create the output matrix: +# ========================= +dout = din + +# ====================== +# Generate the settings: +# ====================== +[result, Rin, Dram, Dsel, Rout, Errout] = ss.create_settings(din, dout) + +# ===================================================== +# Create the selection buffer values from the settings: +# ===================================================== +reorder_in_buf = ss.ssReorderIn.create_selection_buf(Rin) +select_buf = flatten(Dsel) +reorder_out_buf = ss.ssReorderOut.create_selection_buf(Rout) + +# ================================ +# Generate hex file: input reorder +# ================================ +list_to_hex(reorder_in_buf, HEX_REORDER_IN_FILE_NAME, HEX_REORDER_IN_MEM_WIDTH, HEX_REORDER_IN_MEM_DEPTH) + +# =========================== +# Generate hex files: ss_wide +# =========================== +# First replace the don't cares (-1) with zeroes for list_to_hex (requires integers) +for n,i in enumerate(select_buf): + if i==-1: + select_buf[n]=0 + + +# The created select_buf is a flat list meant to MM write to several instances +# from a certain offset. However, we want to create a HEX file for each +# individual instance, so split the list into 2 (sublist size of 864). +for i, sublist in zip(range(2), split_list(select_buf, 864)): + list_to_hex(sublist, HEX_SS_WIDE_FILE_PREFIX+str(i)+".hex", HEX_SS_WIDE_MEM_WIDTH, HEX_SS_WIDE_MEM_DEPTH) + +# ================================== +# Generate hex files: output reorder +# ================================== +# The output reorder list contains 864 words of 4 bits, so each word easily +# fits within 32b words. +list_to_hex(reorder_out_buf, HEX_REORDER_OUT_FILE_NAME, HEX_REORDER_OUT_MEM_WIDTH, HEX_REORDER_OUT_MEM_DEPTH) diff --git a/applications/aartfaac/designs/aartfaac_bn_sdo/src/vhdl/aartfaac_bn_sdo.vhd b/applications/aartfaac/designs/aartfaac_bn_sdo/src/vhdl/aartfaac_bn_sdo.vhd index 1b28651eb13845f688e3cc0606e44fcc481c1f5a..cf57de8133e2b67ef57dcd5c82ee8978c0507406 100644 --- a/applications/aartfaac/designs/aartfaac_bn_sdo/src/vhdl/aartfaac_bn_sdo.vhd +++ b/applications/aartfaac/designs/aartfaac_bn_sdo/src/vhdl/aartfaac_bn_sdo.vhd @@ -219,11 +219,20 @@ ARCHITECTURE str OF aartfaac_bn_sdo IS SIGNAL io_rsp_terminal_rsp_snk_in_arr : t_dp_sosi_arr(c_rsp_terminal_nof_lanes-1 DOWNTO 0); SIGNAL io_rsp_terminal_rsp_snk_out_arr : t_dp_siso_arr(c_rsp_terminal_nof_lanes-1 DOWNTO 0); SIGNAL io_rsp_terminal_subband_src_out_arr : t_dp_sosi_arr(c_rsp_terminal_nof_lanes-1 DOWNTO 0); + SIGNAL ss_parallel_sp_snk_out_arr : t_dp_siso_arr(c_rsp_terminal_nof_lanes-1 DOWNTO 0) := (OTHERS=>c_dp_siso_rdy); SIGNAL ss_parallel_sp_snk_in_arr : t_dp_sosi_arr(c_rsp_terminal_nof_lanes-1 DOWNTO 0); SIGNAL ss_parallel_sp_src_out_arr : t_dp_sosi_arr(c_nof_offload_streams-1 DOWNTO 0); - SIGNAL dp_fifo_sc_src_out_arr : t_dp_sosi_arr(c_nof_offload_streams-1 DOWNTO 0); - SIGNAL dp_fifo_sc_src_in_arr : t_dp_siso_arr(c_nof_offload_streams-1 DOWNTO 0); + SIGNAL ss_parallel_sp_fifo_src_out_arr : t_dp_sosi_arr(c_nof_offload_streams-1 DOWNTO 0); + SIGNAL ss_parallel_sp_fifo_src_in_arr : t_dp_siso_arr(c_nof_offload_streams-1 DOWNTO 0); + + SIGNAL ss_parallel_sb_snk_out_arr : t_dp_siso_arr(2-1 DOWNTO 0) := (OTHERS=>c_dp_siso_rdy); + SIGNAL ss_parallel_sb_snk_in_arr : t_dp_sosi_arr(2-1 DOWNTO 0); + SIGNAL ss_parallel_sb_src_out_arr : t_dp_sosi_arr(2-1 DOWNTO 0); + SIGNAL ss_parallel_sb_fifo_snk_in_arr : t_dp_sosi_arr(c_nof_offload_streams-1 DOWNTO 0); + SIGNAL ss_parallel_sb_fifo_src_out_arr : t_dp_sosi_arr(c_nof_offload_streams-1 DOWNTO 0); + SIGNAL ss_parallel_sb_fifo_src_in_arr : t_dp_siso_arr(c_nof_offload_streams-1 DOWNTO 0); + SIGNAL aartfaac_bn_sdo_udp_sdo_src_out_arr : t_dp_sosi_arr(c_nof_offload_streams-1 DOWNTO 0); SIGNAL aartfaac_bn_sdo_udp_sdo_src_in_arr : t_dp_siso_arr(c_nof_offload_streams-1 DOWNTO 0); @@ -417,10 +426,10 @@ BEGIN g_reorder_out_file_name => "hex/ss_parallel_sp_reorder_out.hex" ) PORT MAP ( - mm_rst => mm_rst, - mm_clk => mm_clk, - dp_rst => dp_rst, - dp_clk => dp_clk, + mm_rst => mm_rst, + mm_clk => mm_clk, + dp_rst => dp_rst, + dp_clk => dp_clk, ram_ss_reorder_in_mosi => ram_ss_reorder_in_mosi, ram_ss_reorder_in_miso => ram_ss_reorder_in_miso, @@ -435,9 +444,8 @@ BEGIN output_siso_arr => (OTHERS=>c_dp_siso_rdy) ); - -- FIFO required as ss_parallel_sp does not have src flow control and dp_offload de-asserts - -- its snk_out.ready. - u_dp_fifo_sc : ENTITY dp_lib.dp_fifo_sc + -- FIFO required as ss_parallel_sp does not have src flow control + u_dp_fifo_sc_sp : ENTITY dp_lib.dp_fifo_sc GENERIC MAP ( g_data_w => c_nof_complex*c_rsp_terminal_subband_dat_w, g_bsn_w => c_dp_stream_bsn_w, @@ -460,10 +468,108 @@ BEGIN snk_out => OPEN, snk_in => ss_parallel_sp_src_out_arr(0), - src_in => dp_fifo_sc_src_in_arr(0), - src_out => dp_fifo_sc_src_out_arr(0) + src_in => ss_parallel_sp_fifo_src_in_arr(0), + src_out => ss_parallel_sp_fifo_src_out_arr(0) + ); + + ----------------------------------------------------------------------------- + -- Subband (SB) reordering for 8b mode: + -- . When in 8b mode, input subbands are no longer grouped together like in 16b mode; they're interleaved. + -- . GPU correlator would then need to de-interleave 8b subbands which would take too many CPU resources. + -- . So we need to group together the 8b subbands as shown as '8b output format' below: + -- 16b input format : [ [(SB0,SP0), .. ,(SB0,SP95)], .. , [(SB8 ,SP0), .. ,(SB8 , SP95)] ] data bits 15..0 + -- 8b input format : [ [(SB0,SP0), .. ,(SB0,SP95)], .. , [(SB16,SP0), .. ,(SB16, SP95)] ] data bits 7..0 // interleaved 8b subbands, bad for GPU machine + -- [ [(SB1,SP0), .. ,(SB1,SP95)], .. , [(SB17,SP0), .. ,(SB17, SP95)] ] data bits 15..8 // interleaved 8b subbands, bad for GPU machine + -- 8b output format : [ [(SB0,SP0),..,(SB0,SP94),(SB1,SP0),..,(SB1,SP94)], .. , [(SB16,SP0),..,(SB16,SP94),(SB17,SP0),..,(SB17, SP94)] ] data bits 7..0 // reordered 8b subbands, good for GPU machine + -- [ [(SB0,SP1),..,(SB0,SP95),(SB1,SP1),..,(SB1,SP95)], .. , [(SB16,SP1),..,(SB16,SP95),(SB17,SP1),..,(SB17, SP95)] ] data bits 15..8 // reordered 8b subbands, good for GPU machine + ----------------------------------------------------------------------------- + + -- Rewire 1*16b to 2*8b + p_connect_sb : PROCESS(ss_parallel_sp_fifo_src_out_arr, ss_parallel_sb_snk_out_arr) + BEGIN + -- ctrl + ss_parallel_sb_snk_in_arr(0) <= ss_parallel_sp_fifo_src_out_arr(0); + ss_parallel_sb_snk_in_arr(1) <= ss_parallel_sp_fifo_src_out_arr(0); + -- flow control in opposite direction + ss_parallel_sp_fifo_src_in_arr(0) <= ss_parallel_sb_snk_out_arr(0); + -- Subband data: even 8b subband indices are located in former 16b real part + ss_parallel_sb_snk_in_arr(0).im(7 DOWNTO 0) <= ss_parallel_sp_fifo_src_out_arr(0).re(15 DOWNTO 8); + ss_parallel_sb_snk_in_arr(0).re(7 DOWNTO 0) <= ss_parallel_sp_fifo_src_out_arr(0).re( 7 DOWNTO 0); + -- subband data: odd 8b subband indices are located in former 16b imaginary part + ss_parallel_sb_snk_in_arr(1).im(7 DOWNTO 0) <= ss_parallel_sp_fifo_src_out_arr(0).im(15 DOWNTO 8); + ss_parallel_sb_snk_in_arr(1).re(7 DOWNTO 0) <= ss_parallel_sp_fifo_src_out_arr(0).im( 7 DOWNTO 0); + END PROCESS; + + -- Re-order the two 8b subband streams + u_ss_parallel_sb : ENTITY ss_lib.ss_parallel + GENERIC MAP( + g_nof_inputs => 2, + g_nof_internals => 2, + g_nof_outputs => 2, + g_dsp_data_w => 8, + g_frame_size_in => c_rsp_terminal_nof_subbands_per_lane * c_rsp_terminal_nof_lanes, -- = 864 (9*96), + g_frame_size_out => c_rsp_terminal_nof_subbands_per_lane * c_rsp_terminal_nof_lanes, -- = 864 (9*96) + g_reorder_in_file_name => "hex/ss_parallel_sb_16b_reorder_in.hex", + g_ss_wide_file_prefix => "hex/ss_parallel_sb_16b_ss_wide", + g_reorder_out_file_name => "hex/ss_parallel_sb_16b_reorder_out.hex" + ) + PORT MAP ( + mm_rst => mm_rst, + mm_clk => mm_clk, + dp_rst => dp_rst, + dp_clk => dp_clk, + + ram_ss_reorder_in_mosi => c_mem_mosi_rst, + ram_ss_reorder_in_miso => OPEN, + ram_ss_reorder_out_mosi => c_mem_mosi_rst, + ram_ss_reorder_out_miso => OPEN, + ram_ss_ss_wide_mosi => c_mem_mosi_rst, + ram_ss_ss_wide_miso => OPEN, + + input_sosi_arr => ss_parallel_sb_snk_in_arr, + input_siso_arr => ss_parallel_sb_snk_out_arr, + output_sosi_arr => ss_parallel_sb_src_out_arr, + output_siso_arr => (OTHERS=>c_dp_siso_rdy) ); + + -- Rewire 2*8b back to 1*16b + p_connect : PROCESS(ss_parallel_sb_src_out_arr) + BEGIN + -- ctrl + ss_parallel_sb_fifo_snk_in_arr(0) <= ss_parallel_sb_src_out_arr(0); + -- Data + ss_parallel_sb_fifo_snk_in_arr(0).im(15 DOWNTO 0) <= ss_parallel_sb_src_out_arr(1).im(7 DOWNTO 0) & ss_parallel_sb_src_out_arr(1).re(7 DOWNTO 0); + ss_parallel_sb_fifo_snk_in_arr(0).re(15 DOWNTO 0) <= ss_parallel_sb_src_out_arr(0).im(7 DOWNTO 0) & ss_parallel_sb_src_out_arr(0).re(7 DOWNTO 0); + END PROCESS; + + -- FIFO required as ss_parallel_sb does not have src flow control + u_dp_fifo_sc_sb : ENTITY dp_lib.dp_fifo_sc + GENERIC MAP ( + g_data_w => c_nof_complex*c_rsp_terminal_subband_dat_w, + g_bsn_w => c_dp_stream_bsn_w, + g_empty_w => c_dp_stream_empty_w, + g_channel_w => c_dp_stream_channel_w, + g_error_w => c_dp_stream_error_w, + g_use_complex => TRUE, + g_use_bsn => TRUE, + g_use_empty => TRUE, + g_use_channel => TRUE, + g_use_error => TRUE, + g_use_sync => TRUE, + g_use_ctrl => TRUE, + g_fifo_size => 100 + ) + PORT MAP ( + rst => dp_rst, + clk => dp_clk, + snk_out => OPEN, + snk_in => ss_parallel_sb_fifo_snk_in_arr(0), + + src_in => ss_parallel_sb_fifo_src_in_arr(0), + src_out => ss_parallel_sb_fifo_src_out_arr(0) + ); + ----------------------------------------------------------------------------- -- Subband offload: -- . Selects 8 out of 9 subbands per block = 8*96=768 32b words @@ -478,8 +584,8 @@ BEGIN dp_rst => dp_rst, dp_clk => dp_clk, - snk_in_arr => dp_fifo_sc_src_out_arr, - snk_out_arr => dp_fifo_sc_src_in_arr, + snk_in_arr => ss_parallel_sb_fifo_src_out_arr, + snk_out_arr => ss_parallel_sb_fifo_src_in_arr, src_out_arr => aartfaac_bn_sdo_udp_sdo_src_out_arr, src_in_arr => aartfaac_bn_sdo_udp_sdo_src_in_arr,