diff --git a/applications/lofar2/libraries/sdp/hdllib.cfg b/applications/lofar2/libraries/sdp/hdllib.cfg index 6e7ad9ac344bfb31412ec917035554d5013d8ca7..8369aee4be263ba8176c1e9f5fd799569f5c09ad 100644 --- a/applications/lofar2/libraries/sdp/hdllib.cfg +++ b/applications/lofar2/libraries/sdp/hdllib.cfg @@ -22,6 +22,8 @@ synth_files = src/vhdl/sdp_beamformer_output.vhd src/vhdl/sdp_statistics_offload.vhd src/vhdl/sdp_crosslets_subband_select.vhd + src/vhdl/sdp_crosslets_remote.vhd + src/vhdl/sdp_crosslets_remote_v2.vhd src/vhdl/node_sdp_adc_input_and_timing.vhd src/vhdl/node_sdp_filterbank.vhd src/vhdl/node_sdp_oversampled_filterbank.vhd @@ -37,12 +39,16 @@ test_bench_files = tb/vhdl/tb_sdp_crosslets_subband_select.vhd tb/vhdl/tb_sdp_beamformer_output.vhd tb/vhdl/tb_tb_sdp_beamformer_output.vhd + tb/vhdl/tb_sdp_beamformer_remote_ring.vhd + tb/vhdl/tb_sdp_crosslets_remote_ring.vhd regression_test_vhdl = tb/vhdl/tb_sdp_info.vhd tb/vhdl/tb_sdp_statistics_offload.vhd tb/vhdl/tb_tb_sdp_statistics_offload.vhd tb/vhdl/tb_sdp_crosslets_subband_select.vhd + tb/vhdl/tb_sdp_crosslets_remote_ring.vhd + tb/vhdl/tb_sdp_beamformer_remote_ring.vhd tb/vhdl/tb_tb_sdp_beamformer_output.vhd [modelsim_project_file] diff --git a/applications/lofar2/libraries/sdp/src/vhdl/node_sdp_correlator.vhd b/applications/lofar2/libraries/sdp/src/vhdl/node_sdp_correlator.vhd index d5c96ea99d864ddf79b199ce24fe9c532dc776bf..fa97a9f7d77b10986605677f326a2135b1a0b8e2 100644 --- a/applications/lofar2/libraries/sdp/src/vhdl/node_sdp_correlator.vhd +++ b/applications/lofar2/libraries/sdp/src/vhdl/node_sdp_correlator.vhd @@ -94,20 +94,8 @@ end node_sdp_correlator; architecture str of node_sdp_correlator is constant c_nof_controllers : positive := 2; - constant c_block_size : natural := c_sdp_N_crosslets_max * c_sdp_S_pn; - constant c_block_size_longwords : natural := ceil_div(c_block_size, 2); -- 32b -> 64b - constant c_data_w : natural := c_sdp_W_crosslet * c_nof_complex; - --- The size for 1 block is probably already enough as the number of blocks received --- on the remote input of the mux probably have enough gap time in between. Just --- to be sure to not run into issues in the future, the fifo size is increased to --- buffer the maximum nof blocks per block period. - constant c_mux_fifo_size : natural := 2**ceil_log2(g_P_sq * c_block_size_longwords); --- c_fifo_fill_size should be at least 2 * c_block_size_longwords as dp_repack_data --- repacks from 64bit to 32bit. Chosing 3x to have some room. - constant c_fifo_fill_size : natural := 2**ceil_log2(3 * c_block_size_longwords); - --- crosslet statistics offload + + -- crosslet statistics offload signal ram_st_offload_copi : t_mem_copi := c_mem_copi_rst; signal ram_st_offload_cipo : t_mem_cipo := c_mem_cipo_rst; @@ -117,21 +105,9 @@ architecture str of node_sdp_correlator is signal controller_cipo_arr : t_mem_cipo_arr(0 to c_nof_controllers - 1) := (others => c_mem_cipo_rst); signal quant_sosi_arr : t_dp_sosi_arr(c_sdp_P_pfb - 1 downto 0) := (others => c_dp_sosi_rst); - signal dp_bsn_sync_scheduler_src_out : t_dp_sosi := c_dp_sosi_rst; signal xsel_sosi : t_dp_sosi := c_dp_sosi_rst; - signal xsel_data_sosi : t_dp_sosi := c_dp_sosi_rst; - signal local_sosi : t_dp_sosi := c_dp_sosi_rst; - signal new_interval : std_logic; - signal ring_mux_sosi : t_dp_sosi := c_dp_sosi_rst; - signal ring_mux_siso : t_dp_siso := c_dp_siso_rdy; - signal dp_fifo_fill_sosi : t_dp_sosi := c_dp_sosi_rst; - signal dp_fifo_fill_siso : t_dp_siso := c_dp_siso_rdy; - signal rx_sosi : t_dp_sosi := c_dp_sosi_rst; - signal dispatch_invert_sosi_arr : t_dp_sosi_arr(0 to g_P_sq - 1) := (others => c_dp_sosi_rst); - signal dispatch_sosi_arr : t_dp_sosi_arr(g_P_sq - 1 downto 0) := (others => c_dp_sosi_rst); - signal crosslets_sosi : t_dp_sosi := c_dp_sosi_rst; signal crosslets_copi : t_mem_copi := c_mem_copi_rst; signal crosslets_cipo_arr : t_mem_cipo_arr(g_P_sq - 1 downto 0) := (others => c_mem_cipo_rst); @@ -203,168 +179,33 @@ begin xst_bs_sosi <= xsel_sosi; --------------------------------------------------------------- - -- Repack 32b to 64b - --------------------------------------------------------------- - -- repacking xsel re/im to data field. - p_wire_xsel_sosi : process(xsel_sosi) - begin - xsel_data_sosi <= xsel_sosi; - xsel_data_sosi.data( c_sdp_W_crosslet - 1 downto 0) <= xsel_sosi.re(c_sdp_W_crosslet - 1 downto 0); - xsel_data_sosi.data(c_nof_complex * c_sdp_W_crosslet - 1 downto c_sdp_W_crosslet) <= xsel_sosi.im(c_sdp_W_crosslet - 1 downto 0); - end process; - - u_dp_repack_data_local : entity dp_lib.dp_repack_data - generic map ( - g_in_dat_w => c_data_w, - g_in_nof_words => c_longword_w / c_data_w, - g_out_dat_w => c_longword_w, - g_out_nof_words => 1, - g_pipeline_ready => true -- Needed for src_in.ready to snk_out.ready. - ) - port map ( - rst => dp_rst, - clk => dp_clk, - - snk_in => xsel_data_sosi, - src_out => local_sosi - ); - - --------------------------------------------------------------- - -- ring_mux - --------------------------------------------------------------- - u_ring_mux : entity ring_lib.ring_mux - generic map ( - g_bsn_w => c_dp_stream_bsn_w, - g_data_w => c_longword_w, - g_channel_w => c_word_w, - g_use_error => false, - g_fifo_size => array_init(c_mux_fifo_size, 2) - ) - port map ( - dp_clk => dp_clk, - dp_rst => dp_rst, - - remote_sosi => from_ri_sosi, - local_sosi => local_sosi, - mux_sosi => ring_mux_sosi, - mux_siso => ring_mux_siso - ); - - to_ri_sosi <= ring_mux_sosi; - - -- fill fifo to remove gaps - u_dp_fifo_fill_eop : entity dp_lib.dp_fifo_fill_eop - generic map ( - g_data_w => c_longword_w, - g_bsn_w => c_dp_stream_bsn_w, - g_empty_w => c_dp_stream_empty_w, - g_channel_w => c_dp_stream_channel_w, - g_error_w => c_dp_stream_error_w, - g_use_bsn => true, - g_use_empty => true, - g_use_channel => true, - g_use_error => true, - g_use_sync => true, - g_fifo_fill => c_block_size_longwords, - g_fifo_size => c_fifo_fill_size - ) - port map ( - wr_rst => dp_rst, - wr_clk => dp_clk, - rd_rst => dp_rst, - rd_clk => dp_clk, - - snk_out => ring_mux_siso, - snk_in => ring_mux_sosi, - - src_in => dp_fifo_fill_siso, - src_out => dp_fifo_fill_sosi - ); - - --------------------------------------------------------------- - -- Repack 64b to 32b + -- Local and remote crosslets --------------------------------------------------------------- - u_dp_repack_data_rx : entity dp_lib.dp_repack_data + u_sdp_crosslets_remote : entity work.sdp_crosslets_remote_v2 generic map ( - g_in_dat_w => c_longword_w, - g_in_nof_words => 1, - g_out_dat_w => c_data_w, - g_out_nof_words => c_longword_w / c_data_w, - g_pipeline_ready => true -- Needed for src_in.ready to snk_out.ready. + g_P_sq => g_P_sq ) port map ( - rst => dp_rst, - clk => dp_clk, - - snk_in => dp_fifo_fill_sosi, - snk_out => dp_fifo_fill_siso, - src_out => rx_sosi - ); - - --------------------------------------------------------------- - -- dp_demux - --------------------------------------------------------------- - u_dp_demux : entity dp_lib.dp_demux - generic map ( - g_mode => 0, - g_nof_output => g_P_sq, - g_remove_channel_lo => false, - g_sel_ctrl_invert => true -- TRUE when indexed (g_nof_input-1 DOWNTO 0) - ) - port map ( - rst => dp_rst, - clk => dp_clk, - - snk_in => rx_sosi, - src_out_arr => dispatch_invert_sosi_arr - ); - - dispatch_sosi_arr <= func_dp_stream_arr_reverse_range(dispatch_invert_sosi_arr); - - --------------------------------------------------------------- - -- dp_bsn_aligner_v2 - --------------------------------------------------------------- - u_mmp_dp_bsn_align_v2 : entity dp_lib.mmp_dp_bsn_align_v2 - generic map( - -- for dp_bsn_align_v2 - g_nof_streams => g_P_sq, - g_bsn_latency_max => 2, - g_nof_aligners_max => 1, -- 1 for Access scheme 3. - g_block_size => c_block_size, - g_data_w => c_data_w, - g_use_mm_output => true, - g_rd_latency => 1, -- Required for st_xst - -- for mms_dp_bsn_monitor_v2 - -- Using c_sdp_N_clk_sync_timeout_xsub as g_nof_clk_per_sync is used for BSN monitor timeout. - g_nof_clk_per_sync => c_sdp_N_clk_sync_timeout_xsub, - g_nof_input_bsn_monitors => g_P_sq, - g_use_bsn_output_monitor => true - ) - port map ( - -- Memory-mapped clock domain - mm_rst => mm_rst, - mm_clk => mm_clk, - - reg_bsn_align_copi => reg_bsn_align_copi, - reg_bsn_align_cipo => reg_bsn_align_cipo, - - reg_input_monitor_copi => reg_bsn_monitor_v2_bsn_align_input_copi, - reg_input_monitor_cipo => reg_bsn_monitor_v2_bsn_align_input_cipo, - - reg_output_monitor_copi => reg_bsn_monitor_v2_bsn_align_output_copi, - reg_output_monitor_cipo => reg_bsn_monitor_v2_bsn_align_output_cipo, - - -- Streaming clock domain - dp_rst => dp_rst, - dp_clk => dp_clk, - - -- Streaming input - in_sosi_arr => dispatch_sosi_arr, - - -- Output via local MM interface in dp_clk domain, when g_use_mm_output = TRUE. - mm_sosi => crosslets_sosi, - mm_copi => crosslets_copi, - mm_cipo_arr => crosslets_cipo_arr + dp_clk => dp_clk, + dp_rst => dp_rst, + + xsel_sosi => xsel_sosi, + from_ri_sosi => from_ri_sosi, + to_ri_sosi => to_ri_sosi, + + crosslets_sosi => crosslets_sosi, + crosslets_copi => crosslets_copi, + crosslets_cipo_arr => crosslets_cipo_arr, + + mm_rst => mm_rst, + mm_clk => mm_clk, + + reg_bsn_align_copi => reg_bsn_align_copi, + reg_bsn_align_cipo => reg_bsn_align_cipo, + reg_bsn_monitor_v2_bsn_align_input_copi => reg_bsn_monitor_v2_bsn_align_input_copi, + reg_bsn_monitor_v2_bsn_align_input_cipo => reg_bsn_monitor_v2_bsn_align_input_cipo, + reg_bsn_monitor_v2_bsn_align_output_copi => reg_bsn_monitor_v2_bsn_align_output_copi, + reg_bsn_monitor_v2_bsn_align_output_cipo => reg_bsn_monitor_v2_bsn_align_output_cipo ); --------------------------------------------------------------- @@ -397,8 +238,8 @@ begin --------------------------------------------------------------- -- Connect 2 mm_controllers to the common_mem_mux output controller_copi_arr(0) <= ram_st_xsq_copi; -- MM access via QSYS MM bus - ram_st_xsq_cipo <= controller_cipo_arr(0); controller_copi_arr(1) <= ram_st_offload_copi; -- MM access by UDP offload + ram_st_xsq_cipo <= controller_cipo_arr(0); ram_st_offload_cipo <= controller_cipo_arr(1); u_mem_controller_mux : entity mm_lib.mm_master_mux diff --git a/applications/lofar2/libraries/sdp/src/vhdl/sdp_beamformer_remote.vhd b/applications/lofar2/libraries/sdp/src/vhdl/sdp_beamformer_remote.vhd index 11f74388b5f5cb189b8d17adfe68ea0044199480..00295713564f8764bcbd741e7a212a378b4c1f1b 100644 --- a/applications/lofar2/libraries/sdp/src/vhdl/sdp_beamformer_remote.vhd +++ b/applications/lofar2/libraries/sdp/src/vhdl/sdp_beamformer_remote.vhd @@ -37,11 +37,14 @@ use dp_lib.dp_stream_pkg.all; use work.sdp_pkg.all; entity sdp_beamformer_remote is + generic ( + g_nof_aligners_max : natural := c_sdp_N_pn_max + ); port ( - dp_clk : in std_logic; - dp_rst : in std_logic; + dp_clk : in std_logic; + dp_rst : in std_logic; - rn_index : in natural range 0 to c_sdp_N_pn_max - 1 := 0; + rn_index : in natural range 0 to c_sdp_N_pn_max - 1 := 0; local_bf_sosi : in t_dp_sosi; from_ri_sosi : in t_dp_sosi; @@ -65,13 +68,26 @@ end sdp_beamformer_remote; architecture str of sdp_beamformer_remote is constant c_data_w : natural := c_nof_complex * c_sdp_W_beamlet_sum; constant c_block_size : natural := c_sdp_S_sub_bf * c_sdp_N_pol_bf; - constant c_fifo_size : natural := 2**ceil_log2((c_block_size * 9) / 16); -- 9/16 = 36/64, 1 block of 64 bit words rounded to the next power of 2 = 1024. + constant c_fifo_size : natural := 2**ceil_log2((c_block_size * 9) / 16); -- 9/16 = 36/64, 1 block of + -- 64 bit words rounded to the next power of 2 = 1024. + + -- Max 2 blocks latency per node in chain. Use c_bsn_latency_first_node = 1 + -- for first node is possible, because it does not have to align with remote + -- input. By using c_bsn_latency_first_node = 1 the circular buffer size + -- becomes true_log_pow2(1 + g_nof_aligners_max * c_bsn_latency_max + + -- c_bsn_latency_first_node) = true_log_pow2(1 + (16 - 1) * 2 + 1) = 32 + -- blocks, instead of true_log_pow2(1 + 16 * 2) = 64 blocks. + constant c_bsn_latency_max : natural := 2; + constant c_bsn_latency_first_node : natural := 1; + + signal chain_node_index : natural range 0 to c_sdp_N_pn_max - 1 := 0; - signal dispatch_sosi_arr : t_dp_sosi_arr(c_dual - 1 downto 0) := (others => c_dp_sosi_rst); -- 1 for local, 1 for remote. + -- c_sdp_P_sum = 2 streams, 1 for local, 1 for remote + signal dispatch_sosi_arr : t_dp_sosi_arr(c_sdp_P_sum - 1 downto 0) := (others => c_dp_sosi_rst); signal dp_fifo_sosi : t_dp_sosi := c_dp_sosi_rst; signal dp_fifo_siso : t_dp_siso := c_dp_siso_rdy; - signal beamlets_data_sosi_arr : t_dp_sosi_arr(c_dual - 1 downto 0) := (others => c_dp_sosi_rst); - signal beamlets_sosi_arr : t_dp_sosi_arr(c_dual - 1 downto 0) := (others => c_dp_sosi_rst); + signal beamlets_data_sosi_arr : t_dp_sosi_arr(c_sdp_P_sum - 1 downto 0) := (others => c_dp_sosi_rst); + signal beamlets_sosi_arr : t_dp_sosi_arr(c_sdp_P_sum - 1 downto 0) := (others => c_dp_sosi_rst); signal i_bf_sum_sosi : t_dp_sosi := c_dp_sosi_rst; signal bf_sum_data_sosi : t_dp_sosi := c_dp_sosi_rst; begin @@ -79,8 +95,10 @@ begin p_wire_local_bf_sosi : process(local_bf_sosi) begin dispatch_sosi_arr(0) <= local_bf_sosi; - dispatch_sosi_arr(0).data(c_sdp_W_beamlet_sum - 1 downto 0) <= local_bf_sosi.re(c_sdp_W_beamlet_sum - 1 downto 0); - dispatch_sosi_arr(0).data(c_data_w - 1 downto c_sdp_W_beamlet_sum) <= local_bf_sosi.im(c_sdp_W_beamlet_sum - 1 downto 0); + dispatch_sosi_arr(0).data(c_sdp_W_beamlet_sum - 1 downto 0) <= + local_bf_sosi.re(c_sdp_W_beamlet_sum - 1 downto 0); + dispatch_sosi_arr(0).data(c_data_w - 1 downto c_sdp_W_beamlet_sum) <= + local_bf_sosi.im(c_sdp_W_beamlet_sum - 1 downto 0); end process; --------------------------------------------------------------- @@ -126,20 +144,30 @@ begin --------------------------------------------------------------- -- dp_bsn_aligner_v2 --------------------------------------------------------------- + + -- The SDP beamformer starts at ring node 0 and outputs at the last ring + -- node, therefore the chain_node_index = the rn_index. The chain_node_index + -- does not wrap, because it starts at ring node 0. Therefore a design with + -- an SDP beamformer that is defined for g_nof_aligners_max = c_sdp_N_pn_max + -- = 16 will also work in a ring with less nodes. + chain_node_index <= rn_index; + u_mmp_dp_bsn_align_v2 : entity dp_lib.mmp_dp_bsn_align_v2 generic map( -- for dp_bsn_align_v2 - g_nof_streams => c_dual, - g_bsn_latency_max => 2, -- max 2 blocks latency - g_nof_aligners_max => c_sdp_N_pn_max, - g_block_size => c_block_size, - g_data_w => c_data_w, - g_use_mm_output => false, - g_rd_latency => 1, + g_nof_streams => c_sdp_P_sum, + g_bsn_latency_max => c_bsn_latency_max, + g_bsn_latency_first_node => c_bsn_latency_first_node, + g_nof_aligners_max => g_nof_aligners_max, + g_block_size => c_block_size, + g_data_w => c_data_w, + g_use_mm_output => false, + g_rd_latency => 1, -- for mms_dp_bsn_monitor_v2 - g_nof_clk_per_sync => c_sdp_N_clk_sync_timeout, -- Using c_sdp_N_clk_sync_timeout as g_nof_clk_per_sync is used for BSN monitor timeout. - g_nof_input_bsn_monitors => c_dual, - g_use_bsn_output_monitor => true + g_nof_clk_per_sync => c_sdp_N_clk_sync_timeout, -- Using c_sdp_N_clk_sync_timeout as g_nof_clk_per_sync + -- is used for BSN monitor timeout. + g_nof_input_bsn_monitors => c_sdp_P_sum, + g_use_bsn_output_monitor => true ) port map ( -- Memory-mapped clock domain @@ -159,7 +187,7 @@ begin dp_rst => dp_rst, dp_clk => dp_clk, - node_index => rn_index, + chain_node_index => chain_node_index, -- Streaming input in_sosi_arr => dispatch_sosi_arr, @@ -182,7 +210,7 @@ begin --------------------------------------------------------------- u_dp_complex_add : entity dp_lib.dp_complex_add generic map( - g_nof_inputs => c_dual, + g_nof_inputs => c_sdp_P_sum, g_data_w => c_sdp_W_beamlet_sum ) port map( diff --git a/applications/lofar2/libraries/sdp/src/vhdl/sdp_crosslets_remote.vhd b/applications/lofar2/libraries/sdp/src/vhdl/sdp_crosslets_remote.vhd new file mode 100644 index 0000000000000000000000000000000000000000..97c1941ff2278c6f0218008e9a96d170e5b0e439 --- /dev/null +++ b/applications/lofar2/libraries/sdp/src/vhdl/sdp_crosslets_remote.vhd @@ -0,0 +1,267 @@ +------------------------------------------------------------------------------- +-- +-- Copyright 2021 +-- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/> +-- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands +-- +-- Licensed under the Apache License, Version 2.0 (the "License"); +-- you may not use this file except in compliance with the License. +-- You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +------------------------------------------------------------------------------- + +------------------------------------------------------------------------------- +-- +-- Author: R. van der Walle, E. Kooistra +-- Purpose: +-- . Implements the functionality for remote crosslets IO and aligning the +-- local and remote crosslets in the node_sdp_correlator of the LOFAR2 +-- SDPFW design. +-- Description: +------------------------------------------------------------------------------- + +library IEEE, common_lib, dp_lib, reorder_lib, st_lib, mm_lib, ring_lib; +use IEEE.std_logic_1164.all; +use common_lib.common_pkg.all; +use common_lib.common_mem_pkg.all; +use common_lib.common_network_layers_pkg.all; +use dp_lib.dp_stream_pkg.all; +use ring_lib.ring_pkg.all; +use work.sdp_pkg.all; + +entity sdp_crosslets_remote is + generic ( + g_P_sq : natural := c_sdp_P_sq + ); + port ( + dp_clk : in std_logic; + dp_rst : in std_logic; + + xsel_sosi : in t_dp_sosi; + from_ri_sosi : in t_dp_sosi := c_dp_sosi_rst; + to_ri_sosi : out t_dp_sosi; + + crosslets_sosi : out t_dp_sosi; + crosslets_copi : in t_mem_copi := c_mem_copi_rst; + crosslets_cipo_arr : out t_mem_cipo_arr(g_P_sq - 1 downto 0); + + mm_rst : in std_logic; + mm_clk : in std_logic; + + reg_bsn_align_copi : in t_mem_copi := c_mem_copi_rst; + reg_bsn_align_cipo : out t_mem_cipo; + reg_bsn_monitor_v2_bsn_align_input_copi : in t_mem_copi := c_mem_copi_rst; + reg_bsn_monitor_v2_bsn_align_input_cipo : out t_mem_cipo; + reg_bsn_monitor_v2_bsn_align_output_copi : in t_mem_copi := c_mem_copi_rst; + reg_bsn_monitor_v2_bsn_align_output_cipo : out t_mem_cipo + ); +end sdp_crosslets_remote; + +architecture str of sdp_crosslets_remote is + constant c_block_size : natural := c_sdp_N_crosslets_max * c_sdp_S_pn; + constant c_block_size_longwords : natural := ceil_div(c_block_size, 2); -- 32b -> 64b + constant c_data_w : natural := c_sdp_W_crosslet * c_nof_complex; + -- The channel field carries the index of time multiplexed crosslet packets + constant c_use_channel : boolean := true; + constant c_channel_w : natural := ceil_log2(g_P_sq); + -- With 32b data repacked in 64b one empty bit is enough. For crosslets the number + -- of 32b words is c_block_size is even, so empty will be 0 always. However do + -- support odd sizes, to be save. + constant c_use_empty : boolean := true; + constant c_empty_w : natural := 1; + -- The from_ri_sosi only carries correct packets, so error field is not used. + constant c_use_error : boolean := false; + + -- The size for 1 block is probably already enough as the number of blocks received + -- on the remote input of the mux probably have enough gap time in between. Just + -- to be sure to not run into issues in the future, the fifo size is increased to + -- buffer the maximum nof blocks per block period. + constant c_mux_fifo_size : natural := 2**ceil_log2(g_P_sq * c_block_size_longwords); + -- c_fifo_fill_size should be at least 2 * c_block_size_longwords as dp_repack_data + -- repacks from 64bit to 32bit. Chosing 3x to have some room. + constant c_fifo_fill_size : natural := 2**ceil_log2(3 * c_block_size_longwords); + + signal xsel_data_sosi : t_dp_sosi := c_dp_sosi_rst; + signal local_sosi : t_dp_sosi := c_dp_sosi_rst; + + signal ring_mux_sosi : t_dp_sosi := c_dp_sosi_rst; + signal ring_mux_siso : t_dp_siso := c_dp_siso_rdy; + signal dp_fifo_fill_sosi : t_dp_sosi := c_dp_sosi_rst; + signal dp_fifo_fill_siso : t_dp_siso := c_dp_siso_rdy; + signal rx_sosi : t_dp_sosi := c_dp_sosi_rst; + signal dispatch_invert_sosi_arr : t_dp_sosi_arr(0 to g_P_sq - 1) := (others => c_dp_sosi_rst); + signal dispatch_sosi_arr : t_dp_sosi_arr(g_P_sq - 1 downto 0) := (others => c_dp_sosi_rst); +begin + --------------------------------------------------------------- + -- Repack 32b to 64b + --------------------------------------------------------------- + -- repacking xsel re/im to data field. + p_wire_xsel_sosi : process(xsel_sosi) + begin + xsel_data_sosi <= xsel_sosi; + xsel_data_sosi.data( c_sdp_W_crosslet - 1 downto 0) <= xsel_sosi.re(c_sdp_W_crosslet - 1 downto 0); + xsel_data_sosi.data(c_nof_complex * c_sdp_W_crosslet - 1 downto c_sdp_W_crosslet) <= xsel_sosi.im(c_sdp_W_crosslet - 1 downto 0); + end process; + + u_dp_repack_data_local : entity dp_lib.dp_repack_data + generic map ( + g_in_dat_w => c_data_w, + g_in_nof_words => c_longword_w / c_data_w, + g_out_dat_w => c_longword_w, + g_out_nof_words => 1, + g_pipeline_ready => true -- Needed for src_in.ready to snk_out.ready. + ) + port map ( + rst => dp_rst, + clk => dp_clk, + + snk_in => xsel_data_sosi, + src_out => local_sosi + ); + + --------------------------------------------------------------- + -- ring_mux + --------------------------------------------------------------- + u_ring_mux : entity ring_lib.ring_mux + generic map ( + g_bsn_w => c_dp_stream_bsn_w, + g_data_w => c_longword_w, + g_channel_w => c_word_w, + g_use_error => c_use_error, + g_fifo_size => array_init(c_mux_fifo_size, 2) + ) + port map ( + dp_clk => dp_clk, + dp_rst => dp_rst, + + remote_sosi => from_ri_sosi, + local_sosi => local_sosi, + mux_sosi => ring_mux_sosi, + mux_siso => ring_mux_siso + ); + + to_ri_sosi <= ring_mux_sosi; + + -- fill fifo to remove valid gaps that occur due to repack 32b/64b in local_sosi, + -- the from_ri_sosi has no valid gaps during block. + u_dp_fifo_fill_eop : entity dp_lib.dp_fifo_fill_eop + generic map ( + g_data_w => c_longword_w, + g_bsn_w => c_dp_stream_bsn_w, + g_empty_w => c_empty_w, + g_channel_w => c_channel_w, + g_use_bsn => true, + g_use_empty => c_use_empty, + g_use_channel => c_use_channel, + g_use_error => c_use_error, + g_use_sync => true, + g_fifo_fill => c_block_size_longwords, + g_fifo_size => c_fifo_fill_size + ) + port map ( + wr_rst => dp_rst, + wr_clk => dp_clk, + rd_rst => dp_rst, + rd_clk => dp_clk, + + snk_out => ring_mux_siso, + snk_in => ring_mux_sosi, + + src_in => dp_fifo_fill_siso, + src_out => dp_fifo_fill_sosi + ); + + --------------------------------------------------------------- + -- Repack 64b to 32b + --------------------------------------------------------------- + u_dp_repack_data_rx : entity dp_lib.dp_repack_data + generic map ( + g_in_dat_w => c_longword_w, + g_in_nof_words => 1, + g_out_dat_w => c_data_w, + g_out_nof_words => c_longword_w / c_data_w, + g_pipeline_ready => true -- Needed for src_in.ready to snk_out.ready. + ) + port map ( + rst => dp_rst, + clk => dp_clk, + + snk_in => dp_fifo_fill_sosi, + snk_out => dp_fifo_fill_siso, + src_out => rx_sosi + ); + + --------------------------------------------------------------- + -- dp_demux + --------------------------------------------------------------- + u_dp_demux : entity dp_lib.dp_demux + generic map ( + g_mode => 0, + g_nof_output => g_P_sq, + g_remove_channel_lo => false, + g_sel_ctrl_invert => true -- TRUE when indexed (g_nof_input-1 DOWNTO 0) + ) + port map ( + rst => dp_rst, + clk => dp_clk, + + snk_in => rx_sosi, + src_out_arr => dispatch_invert_sosi_arr + ); + + dispatch_sosi_arr <= func_dp_stream_arr_reverse_range(dispatch_invert_sosi_arr); + + --------------------------------------------------------------- + -- dp_bsn_aligner_v2 + --------------------------------------------------------------- + u_mmp_dp_bsn_align_v2 : entity dp_lib.mmp_dp_bsn_align_v2 + generic map( + -- for dp_bsn_align_v2 + g_nof_streams => g_P_sq, + g_bsn_latency_max => 2, + g_nof_aligners_max => 1, -- 1 for Access scheme 3. + g_block_size => c_block_size, + g_data_w => c_data_w, + g_use_mm_output => true, + g_rd_latency => 1, -- Required for st_xst + -- for mms_dp_bsn_monitor_v2 + -- Using c_sdp_N_clk_sync_timeout_xsub as g_nof_clk_per_sync is used for BSN monitor timeout. + g_nof_clk_per_sync => c_sdp_N_clk_sync_timeout_xsub, + g_nof_input_bsn_monitors => g_P_sq, + g_use_bsn_output_monitor => true + ) + port map ( + -- Memory-mapped clock domain + mm_rst => mm_rst, + mm_clk => mm_clk, + + reg_bsn_align_copi => reg_bsn_align_copi, + reg_bsn_align_cipo => reg_bsn_align_cipo, + + reg_input_monitor_copi => reg_bsn_monitor_v2_bsn_align_input_copi, + reg_input_monitor_cipo => reg_bsn_monitor_v2_bsn_align_input_cipo, + + reg_output_monitor_copi => reg_bsn_monitor_v2_bsn_align_output_copi, + reg_output_monitor_cipo => reg_bsn_monitor_v2_bsn_align_output_cipo, + + -- Streaming clock domain + dp_rst => dp_rst, + dp_clk => dp_clk, + + -- Streaming input + in_sosi_arr => dispatch_sosi_arr, + + -- Output via local MM interface in dp_clk domain, when g_use_mm_output = TRUE. + mm_sosi => crosslets_sosi, + mm_copi => crosslets_copi, + mm_cipo_arr => crosslets_cipo_arr + ); +end str; diff --git a/applications/lofar2/libraries/sdp/src/vhdl/sdp_crosslets_remote_v2.vhd b/applications/lofar2/libraries/sdp/src/vhdl/sdp_crosslets_remote_v2.vhd new file mode 100644 index 0000000000000000000000000000000000000000..2f283a380342a2fe65510774a4f33dcc95071cb0 --- /dev/null +++ b/applications/lofar2/libraries/sdp/src/vhdl/sdp_crosslets_remote_v2.vhd @@ -0,0 +1,273 @@ +------------------------------------------------------------------------------- +-- +-- Copyright 2021 +-- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/> +-- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands +-- +-- Licensed under the Apache License, Version 2.0 (the "License"); +-- you may not use this file except in compliance with the License. +-- You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +------------------------------------------------------------------------------- + +------------------------------------------------------------------------------- +-- +-- Author: R. van der Walle, E. Kooistra +-- Purpose: +-- . Implements the functionality for remote crosslets IO and aligning the +-- local and remote crosslets in the node_sdp_correlator of the LOFAR2 +-- SDPFW design. +-- Description: +-- . Improvement compared to v1 is that in v2 the local crosslets are passed +-- on directly to input 0 of the dp_bsn_align_v2, instead of via the +-- ring_mux and dp_demux. In this way the block period of the reference +-- input 0 remains constant and therefore also of crosslets_sosi.sop. This +-- ensure that there is always constant, and thus enough, time to read the +-- aligned output. via crosslets_copi. +------------------------------------------------------------------------------- + +library IEEE, common_lib, dp_lib, reorder_lib, st_lib, mm_lib, ring_lib; +use IEEE.std_logic_1164.all; +use common_lib.common_pkg.all; +use common_lib.common_mem_pkg.all; +use common_lib.common_network_layers_pkg.all; +use dp_lib.dp_stream_pkg.all; +use ring_lib.ring_pkg.all; +use work.sdp_pkg.all; + +entity sdp_crosslets_remote_v2 is + generic ( + g_P_sq : natural := c_sdp_P_sq + ); + port ( + dp_clk : in std_logic; + dp_rst : in std_logic; + + xsel_sosi : in t_dp_sosi; + from_ri_sosi : in t_dp_sosi := c_dp_sosi_rst; + to_ri_sosi : out t_dp_sosi; + + crosslets_sosi : out t_dp_sosi; + crosslets_copi : in t_mem_copi := c_mem_copi_rst; + crosslets_cipo_arr : out t_mem_cipo_arr(g_P_sq - 1 downto 0); + + mm_rst : in std_logic; + mm_clk : in std_logic; + + reg_bsn_align_copi : in t_mem_copi := c_mem_copi_rst; + reg_bsn_align_cipo : out t_mem_cipo; + reg_bsn_monitor_v2_bsn_align_input_copi : in t_mem_copi := c_mem_copi_rst; + reg_bsn_monitor_v2_bsn_align_input_cipo : out t_mem_cipo; + reg_bsn_monitor_v2_bsn_align_output_copi : in t_mem_copi := c_mem_copi_rst; + reg_bsn_monitor_v2_bsn_align_output_cipo : out t_mem_cipo + ); +end sdp_crosslets_remote_v2; + +architecture str of sdp_crosslets_remote_v2 is + constant c_block_size : natural := c_sdp_N_crosslets_max * c_sdp_S_pn; + constant c_block_size_longwords : natural := ceil_div(c_block_size, 2); -- 32b -> 64b + constant c_data_w : natural := c_sdp_W_crosslet * c_nof_complex; + -- The channel field carries the index of time multiplexed crosslet packets + constant c_use_channel : boolean := true; + constant c_channel_w : natural := ceil_log2(g_P_sq); + -- With 32b data repacked in 64b one empty bit is enough. For crosslets the number + -- of 32b words is c_block_size is even, so empty will be 0 always. However do + -- support odd sizes, to be save. + constant c_use_empty : boolean := true; + constant c_empty_w : natural := 1; + -- The from_ri_sosi only carries correct packets, so error field is not used. + constant c_use_error : boolean := false; + + -- The size for 1 block is probably already enough as the number of blocks received + -- on the remote input of the mux probably have enough gap time in between. Just + -- to be sure to not run into issues in the future, the fifo size is increased to + -- buffer the maximum nof blocks per block period. + constant c_mux_fifo_size : natural := 2**ceil_log2(g_P_sq * c_block_size_longwords); + -- c_repack_fifo_size should be at least c_block_size_longwords / 2, as dp_repack_data + -- unpacks by factor 2 from 64bit to 32bit. Choose 1x to have some room. + constant c_repack_fifo_size : natural := 2**ceil_log2(1 * c_block_size_longwords); + + signal xsel_data_sosi : t_dp_sosi := c_dp_sosi_rst; + signal local_sosi : t_dp_sosi := c_dp_sosi_rst; + signal ring_mux_sosi : t_dp_sosi := c_dp_sosi_rst; + signal ring_mux_siso : t_dp_siso := c_dp_siso_rdy; + signal repack_fifo_sosi : t_dp_sosi := c_dp_sosi_rst; + signal repack_fifo_siso : t_dp_siso := c_dp_siso_rdy; + signal rx_sosi : t_dp_sosi := c_dp_sosi_rst; + signal dispatch_invert_sosi_arr : t_dp_sosi_arr(0 to g_P_sq - 1) := (others => c_dp_sosi_rst); + signal dispatch_sosi_arr : t_dp_sosi_arr(g_P_sq - 1 downto 0) := (others => c_dp_sosi_rst); + signal to_aligner_sosi_arr : t_dp_sosi_arr(g_P_sq - 1 downto 0) := (others => c_dp_sosi_rst); +begin + --------------------------------------------------------------- + -- Repack 32b to 64b + --------------------------------------------------------------- + -- repacking xsel re/im to data field. + p_wire_xsel_sosi : process(xsel_sosi) + begin + xsel_data_sosi <= xsel_sosi; + xsel_data_sosi.data( c_sdp_W_crosslet - 1 downto 0) <= xsel_sosi.re(c_sdp_W_crosslet - 1 downto 0); + xsel_data_sosi.data(c_nof_complex * c_sdp_W_crosslet - 1 downto c_sdp_W_crosslet) <= xsel_sosi.im(c_sdp_W_crosslet - 1 downto 0); + end process; + + u_dp_repack_data_local : entity dp_lib.dp_repack_data + generic map ( + g_in_dat_w => c_data_w, + g_in_nof_words => c_longword_w / c_data_w, + g_out_dat_w => c_longword_w, + g_out_nof_words => 1, + g_pipeline_ready => true -- Needed for src_in.ready to snk_out.ready. + ) + port map ( + rst => dp_rst, + clk => dp_clk, + + snk_in => xsel_data_sosi, + src_out => local_sosi + ); + + --------------------------------------------------------------- + -- ring_mux + --------------------------------------------------------------- + u_ring_mux : entity ring_lib.ring_mux + generic map ( + g_bsn_w => c_dp_stream_bsn_w, + g_data_w => c_longword_w, + g_channel_w => c_word_w, + g_use_error => c_use_error, + g_fifo_size => array_init(c_mux_fifo_size, 2) + ) + port map ( + dp_clk => dp_clk, + dp_rst => dp_rst, + + remote_sosi => from_ri_sosi, + local_sosi => local_sosi, + mux_sosi => ring_mux_sosi, + mux_siso => ring_mux_siso + ); + + to_ri_sosi <= ring_mux_sosi; + + --------------------------------------------------------------- + -- Repack 64b to 32b + --------------------------------------------------------------- + -- FIFO to take backpressure from u_dp_repack_data_rx + u_dp_fifo_sc : entity dp_lib.dp_fifo_sc + generic map ( + g_data_w => c_longword_w, + g_bsn_w => c_dp_stream_bsn_w, + g_empty_w => c_empty_w, + g_channel_w => c_channel_w, + g_use_bsn => true, + g_use_empty => c_use_empty, + g_use_channel => c_use_channel, + g_use_error => c_use_error, + g_use_sync => true, + g_fifo_size => c_repack_fifo_size + ) + port map ( + rst => dp_rst, + clk => dp_clk, + + snk_out => open, + snk_in => from_ri_sosi, + + src_in => repack_fifo_siso, + src_out => repack_fifo_sosi + ); + + u_dp_repack_data_rx : entity dp_lib.dp_repack_data + generic map ( + g_in_dat_w => c_longword_w, + g_in_nof_words => 1, + g_out_dat_w => c_data_w, + g_out_nof_words => c_longword_w / c_data_w, + g_pipeline_ready => true -- Needed for src_in.ready to snk_out.ready. + ) + port map ( + rst => dp_rst, + clk => dp_clk, + + snk_in => repack_fifo_sosi, + snk_out => repack_fifo_siso, + src_out => rx_sosi + ); + + --------------------------------------------------------------- + -- dp_demux + --------------------------------------------------------------- + u_dp_demux : entity dp_lib.dp_demux + generic map ( + g_mode => 0, + g_nof_output => g_P_sq, + g_remove_channel_lo => false, + g_sel_ctrl_invert => true -- TRUE when indexed (g_nof_input-1 DOWNTO 0) + ) + port map ( + rst => dp_rst, + clk => dp_clk, + + snk_in => rx_sosi, + src_out_arr => dispatch_invert_sosi_arr + ); + + dispatch_sosi_arr <= func_dp_stream_arr_reverse_range(dispatch_invert_sosi_arr); + + -- Group local input stream with and remote input streams + to_aligner_sosi_arr(g_P_sq - 1 downto 1) <= dispatch_sosi_arr(g_P_sq - 1 downto 1); + to_aligner_sosi_arr(0) <= xsel_data_sosi; + + --------------------------------------------------------------- + -- dp_bsn_aligner_v2 + --------------------------------------------------------------- + u_mmp_dp_bsn_align_v2 : entity dp_lib.mmp_dp_bsn_align_v2 + generic map( + -- for dp_bsn_align_v2 + g_nof_streams => g_P_sq, + g_bsn_latency_max => 2, + g_nof_aligners_max => 1, -- 1 for Access scheme 3. + g_block_size => c_block_size, + g_data_w => c_data_w, + g_use_mm_output => true, + g_rd_latency => 1, -- Required for st_xst + -- for mms_dp_bsn_monitor_v2 + -- Using c_sdp_N_clk_sync_timeout_xsub as g_nof_clk_per_sync is used for BSN monitor timeout. + g_nof_clk_per_sync => c_sdp_N_clk_sync_timeout_xsub, + g_nof_input_bsn_monitors => g_P_sq, + g_use_bsn_output_monitor => true + ) + port map ( + -- Memory-mapped clock domain + mm_rst => mm_rst, + mm_clk => mm_clk, + + reg_bsn_align_copi => reg_bsn_align_copi, + reg_bsn_align_cipo => reg_bsn_align_cipo, + + reg_input_monitor_copi => reg_bsn_monitor_v2_bsn_align_input_copi, + reg_input_monitor_cipo => reg_bsn_monitor_v2_bsn_align_input_cipo, + + reg_output_monitor_copi => reg_bsn_monitor_v2_bsn_align_output_copi, + reg_output_monitor_cipo => reg_bsn_monitor_v2_bsn_align_output_cipo, + + -- Streaming clock domain + dp_rst => dp_rst, + dp_clk => dp_clk, + + -- Streaming input + in_sosi_arr => to_aligner_sosi_arr, + + -- Output via local MM interface in dp_clk domain, when g_use_mm_output = TRUE. + mm_sosi => crosslets_sosi, + mm_copi => crosslets_copi, + mm_cipo_arr => crosslets_cipo_arr + ); +end str; diff --git a/applications/lofar2/libraries/sdp/src/vhdl/sdp_pkg.vhd b/applications/lofar2/libraries/sdp/src/vhdl/sdp_pkg.vhd index 1e11aca4f8f8450f7c75170cf30f1ea9ba2e27e2..8f6701daf221360c81c7ba470e92bbf4ec5e7042 100644 --- a/applications/lofar2/libraries/sdp/src/vhdl/sdp_pkg.vhd +++ b/applications/lofar2/libraries/sdp/src/vhdl/sdp_pkg.vhd @@ -74,12 +74,13 @@ package sdp_pkg is constant c_sdp_N_pn_max : natural := 16; -- max 16 PN per ring = per antenna band constant c_sdp_N_pol : natural := 2; constant c_sdp_N_pol_bf : natural := 2; - constant c_sdp_N_rings_sdp : natural := 1; + constant c_sdp_N_rings_sdp : natural := 1; -- number of QSFP rings in SDP, each has N_lane = 8 lanes constant c_sdp_N_ring_lanes_max : natural := 8; -- = N_lane in doc constant c_sdp_N_sub : natural := 512; constant c_sdp_N_sync_rcu : natural := 1; constant c_sdp_N_taps : natural := 16; - constant c_sdp_P_sq : natural := 9; -- = N_pn / 2 + 1 + constant c_sdp_P_sq : natural := 9; -- = N_pn / 2 + 1 square correlator cells for XST + constant c_sdp_P_sum : natural := 2; -- sums of two in ring beamformer adder tree constant c_sdp_Q_fft : natural := 2; constant c_sdp_S_pn : natural := 12; constant c_sdp_S_rcu : natural := 3; @@ -589,8 +590,8 @@ package sdp_pkg is constant c_sdp_reg_stat_hdr_dat_bst_addr_w : natural := ceil_log2(c_sdp_N_beamsets) + c_sdp_reg_stat_hdr_dat_addr_w; constant c_sdp_reg_bsn_monitor_v2_bst_offload_addr_w : natural := ceil_log2(c_sdp_N_beamsets) + c_sdp_reg_bsn_monitor_v2_addr_w; constant c_sdp_reg_bsn_monitor_v2_beamlet_output_addr_w : natural := ceil_log2(c_sdp_N_beamsets) + c_sdp_reg_bsn_monitor_v2_addr_w; - constant c_sdp_reg_bsn_align_v2_bf_addr_w : natural := ceil_log2(c_sdp_N_beamsets) + ceil_log2(c_dual) + c_sdp_reg_bsn_align_v2_addr_w; - constant c_sdp_reg_bsn_monitor_v2_rx_align_bf_addr_w : natural := ceil_log2(c_sdp_N_beamsets) + ceil_log2(c_dual) + c_sdp_reg_bsn_monitor_v2_addr_w; + constant c_sdp_reg_bsn_align_v2_bf_addr_w : natural := ceil_log2(c_sdp_N_beamsets) + ceil_log2(c_sdp_P_sum) + c_sdp_reg_bsn_align_v2_addr_w; + constant c_sdp_reg_bsn_monitor_v2_rx_align_bf_addr_w : natural := ceil_log2(c_sdp_N_beamsets) + ceil_log2(c_sdp_P_sum) + c_sdp_reg_bsn_monitor_v2_addr_w; constant c_sdp_reg_bsn_monitor_v2_aligned_bf_addr_w : natural := ceil_log2(c_sdp_N_beamsets) + c_sdp_reg_bsn_monitor_v2_addr_w; constant c_sdp_reg_ring_lane_info_bf_addr_w : natural := ceil_log2(c_sdp_N_beamsets) + 1; constant c_sdp_reg_bsn_monitor_v2_ring_rx_bf_addr_w : natural := ceil_log2(c_sdp_N_beamsets) + c_sdp_reg_bsn_monitor_v2_addr_w; diff --git a/applications/lofar2/libraries/sdp/src/vhdl/sdp_station.vhd b/applications/lofar2/libraries/sdp/src/vhdl/sdp_station.vhd index 727c59d7774ed8e9207a182450e5452e177994d0..dee188c0b1a2d0c1e24e21be34797d944da75c36 100644 --- a/applications/lofar2/libraries/sdp/src/vhdl/sdp_station.vhd +++ b/applications/lofar2/libraries/sdp/src/vhdl/sdp_station.vhd @@ -419,8 +419,8 @@ architecture str of sdp_station is constant c_addr_w_reg_bdo_destinations : natural := c_sdp_reg_bdo_destinations_info_w_one; constant c_addr_w_reg_dp_xonoff : natural := 1; constant c_addr_w_ram_st_bst : natural := ceil_log2(c_sdp_S_sub_bf * c_sdp_N_pol * (c_longword_sz / c_word_sz)); - constant c_addr_w_reg_bsn_align_v2_bf : natural := ceil_log2(c_dual) + c_sdp_reg_bsn_align_v2_addr_w; - constant c_addr_w_reg_bsn_monitor_v2_rx_align_bf : natural := ceil_log2(c_dual) + c_sdp_reg_bsn_monitor_v2_addr_w; + constant c_addr_w_reg_bsn_align_v2_bf : natural := ceil_log2(c_sdp_P_sum) + c_sdp_reg_bsn_align_v2_addr_w; + constant c_addr_w_reg_bsn_monitor_v2_rx_align_bf : natural := ceil_log2(c_sdp_P_sum) + c_sdp_reg_bsn_monitor_v2_addr_w; constant c_addr_w_reg_ring_lane_info_bf : natural := 1; -- Read only sdp_info values diff --git a/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_beamformer_remote_ring.vhd b/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_beamformer_remote_ring.vhd index 421f4496540b32e8393235abbc0d4a23388d1b55..5cc1a7bc4af02db5ba94db771d8050b1368fda87 100644 --- a/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_beamformer_remote_ring.vhd +++ b/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_beamformer_remote_ring.vhd @@ -20,50 +20,313 @@ -- -- Author: E. Kooistra -- Purpose: --- . Test bench for multiple sdp_beamformer_output.vhd + ring_lane.vhd in a ring +-- . Test bench for multiple sdp_beamformer_remote.vhd + ring_lane.vhd + +-- tr_10GbE in a ring -- Description: -- . https://support.astron.nl/confluence/display/L2M/L5+SDPFW+Design+Document%3A+Beamformer -- --- This tb is inspired by tb_lofar2_unb2c_sdp_station_bf_ring.vhd, however +-- . This tb is inspired by tb_lofar2_unb2c_sdp_station_bf_ring.vhd, however -- here the purpose is to simulate the memory usage of the circular buffer -- in the bsn_aligner_v2 at each node. -- +-- . Block diagram: +-- * tb can use one instance of tr_10Gbe to model Rx from ring and Tx to ring. +-- * Ring lane serial links for ring nodes RN = 0 to c_last_rn: +-- +-- tr_10gbe_ring_serial_tx_arr --> tr_10gbe_ring_serial_rx_arr after c_cable_delay +-- +-- /<-------------------------------------------------------------\ +-- \---> 0 ---> RN - 1 ---> RN ---> RN + 1 ---> c_last_rn --->/ +-- |^ +-- tr_10gbe_ring_serial_tx_arr(RN) || tr_10gbe_ring_serial_tx_arr(RN) +-- v| +-- tr_10Gbe +-- |^ +-- tr_10gbe_ring_rx_sosi_arr(RN) || tr_10gbe_ring_tx_sosi_arr(RN) +-- v| +-- ring_lane +-- |^ +-- from_ri_sosi_arr(RN) || to_ri_sosi_arr(RN) +-- v| +-- local_bf_sosi --> sdp_beamformer_remote --> bf_sum_sosi_arr(RN) +-- bf_sum_sosi +-- * BSN monitors: +-- RN +-- |^ +-- ring_lane/ring_rx || ring_lane/ring_tx +-- FPGA_bf_ring_rx_latency_R(RN) || FPGA_bf_ring_tx_latency_R(RN) +-- || +-- dp_bsn_align_v2 P_sum = 2 inputs || +-- FPGA_bf_rx_align_latency_R(RN)(P_sum) || +-- || +-- dp_bsn_align_v2 aligned output || +-- FPGA_bf_aligned_latency_R(RN) v| +-- +-- . BF latency results from SDP-ARTS HW +-- - with 16 ring nodes (GN = 64 is RN = 0) +-- - 2024-03-02T21.16.33_d601da896_lofar2_unb2b_sdp_station_full_wg +-- +-- Node: bf_ring_rx bf_rx_align bf_aligned bf_ring_tx +-- _latency: _latency: _latency: _latency: +-- 64: -1 ( 1 -1 ) 2053 3114 +-- 65: 4898 ( 1 3880 ) 4101 5162 +-- 66: 6949 ( 1 5916 ) 6149 7210 +-- 67: 8998 ( 1 7960 ) 8197 9258 +-- 68: 11048 ( 1 10003 ) 10245 11306 +-- 69: 13093 ( 1 12063 ) 12293 13354 +-- 70: 15135 ( 1 14105 ) 14341 15402 +-- 71: 17174 ( 1 16154 ) 16389 17450 +-- 72: 19261 ( 1 18229 ) 18437 19498 +-- 73: 21288 ( 1 20261 ) 20485 21546 +-- 74: 23319 ( 1 22292 ) 22533 23594 +-- 75: 25367 ( 1 24359 ) 24581 25642 +-- 76: 27448 ( 1 26417 ) 26629 27690 +-- 77: 29471 ( 1 28453 ) 28677 29738 +-- 78: 31512 ( 1 30481 ) 30725 31786 +-- 79: 33567 ( 1 32537 ) 32773 -1 +-- +-- Simulation latency results with this tb +-- +-- # c_cable_delay = 0 * 6.4 ns (c_bsn_latency_first_node = 2) +-- # +-- # Node: bf_ring_rx bf_rx_align bf_aligned bf_ring_tx +-- # _latency: _latency: _latency: _latency: +-- # 0: -1 ( 1 0 ) 2053 2075 +-- # 1: 3824 ( 1 3837 ) 4101 4123 +-- # 2: 5876 ( 1 5889 ) 6149 6171 +-- # 3: 7926 ( 1 7939 ) 8197 8219 +-- # 4: 9977 ( 1 9990 ) 10245 10267 +-- # 5: 12029 ( 1 12042 ) 12293 12315 +-- # 6: 14079 ( 1 14092 ) 14341 14363 +-- # 7: 16108 ( 1 16121 ) 16389 16411 +-- # 8: 18159 ( 1 18172 ) 18437 18459 +-- # 9: 20211 ( 1 20224 ) 20485 20507 +-- # 10: 22261 ( 1 22274 ) 22533 22555 +-- # 11: 24312 ( 1 24325 ) 24581 24603 +-- # 12: 26363 ( 1 26376 ) 26629 26651 +-- # 13: 28414 ( 1 28427 ) 28677 28699 +-- # 14: 30465 ( 1 30478 ) 30725 30747 +-- # 15: 32493 ( 1 32506 ) 32773 -1 +-- +-- # c_cable_delay = 30 * 6.4 ns (c_bsn_latency_first_node = 2) +-- # +-- # Node: bf_ring_rx bf_rx_align bf_aligned bf_ring_tx +-- # _latency: _latency: _latency: _latency: +-- # 0: -1 ( 1 0 ) 2053 2075 +-- # 1: 3862 ( 1 3875 ) 4101 4123 +-- # 2: 5914 ( 1 5927 ) 6149 6171 +-- # 3: 7965 ( 1 7978 ) 8197 8219 +-- # 4: 10015 ( 1 10028 ) 10245 10267 +-- # 5: 12067 ( 1 12080 ) 12293 12315 +-- # 6: 14118 ( 1 14131 ) 14341 14363 +-- # 7: 16146 ( 1 16159 ) 16389 16411 +-- # 8: 18197 ( 1 18210 ) 18437 18459 +-- # 9: 20249 ( 1 20262 ) 20485 20507 +-- # 10: 22299 ( 1 22312 ) 22533 22555 +-- # 11: 24350 ( 1 24363 ) 24581 24603 +-- # 12: 26402 ( 1 26415 ) 26629 26651 +-- # 13: 28452 ( 1 28465 ) 28677 28699 +-- # 14: 30503 ( 1 30516 ) 30725 30747 +-- # 15: 32532 ( 1 32545 ) 32773 -1 +-- +-- # c_cable_delay = 30 * 6.4 ns (c_bsn_latency_first_node = 1) +-- # +-- # Node: bf_ring_rx bf_rx_align bf_aligned bf_ring_tx +-- # _latency: _latency: _latency: _latency: +-- # 0: -1 ( 1 0 ) 1029 1051 +-- # 1: 2837 ( 1 2850 ) 3077 3099 +-- # 2: 4888 ( 1 4901 ) 5125 5147 +-- # 3: 6939 ( 1 6952 ) 7173 7195 +-- # 4: 8990 ( 1 9003 ) 9221 9243 +-- # 5: 11041 ( 1 11054 ) 11269 11291 +-- # 6: 13092 ( 1 13105 ) 13317 13339 +-- # 7: 15143 ( 1 15156 ) 15365 15387 +-- # 8: 17172 ( 1 17185 ) 17413 17435 +-- # 9: 19222 ( 1 19235 ) 19461 19483 +-- # 10: 21274 ( 1 21287 ) 21509 21531 +-- # 11: 23325 ( 1 23338 ) 23557 23579 +-- # 12: 25375 ( 1 25388 ) 25605 25627 +-- # 13: 27427 ( 1 27440 ) 27653 27675 +-- # 14: 29478 ( 1 29491 ) 29701 29723 +-- # 15: 31507 ( 1 31520 ) 31749 -1 +-- +-- - The dp_bsn_align_v2 BSN latency monitor results agree between sim an HW. +-- - The bf_aligned_latency is exactly equal in sim and on HW, because the +-- mmp_dp_bsn_align_v2 uses the ref_sync for the BSN monitor and also to +-- release its BSN aligned output, so the latency only depends on internal +-- FW buffering and latency. +-- . The bf_aligned_latency and bf_ring_tx_latency do not depend on cable +-- delays and are constant when read again in sim or on HW, because they +-- only depend on fixed internal FW buffering and latency. +-- - The ring_lane BSN latency monitor results differ between sim and HW, it +-- is unclear why: +-- . the ring_rx and ring_tx BSN latency monitor results are about one +-- block of 1024 larger on HW. +-- . on the same HW node, the bf_ring_rx_latency is about one block of 1024 +-- larger than the bf_rx_align_latency, even though they are taken at +-- nearly the same place in the ring_rx signal path. +-- . on the same HW node, the bf_ring_tx_latency is about one block of 1024 +-- larger than the bf_align_latency, even though they are taken at nearly +-- the same place in the tx signal path. +-- . the ring_rx and ring_tx BSN latency monitor results for XST do not +-- show a one block is 1024 offset. +-- TODO: +-- . Assume the ring_lane latencies are one block is 1024 too high, and +-- assume that the bf_rx_align_latency is correct and reflects the actual +-- packet latency. +-- . The ring_rx and ring_tx both use func_ring_nof_hops_to_source_rn() and +-- hops = sosi.channel to get monitor_sosi, maybe there occurs an offset +-- there. +-- . The ring_rx and ring_tx both use dp_demux.vhd, maybe that causes a one +-- block is 1024 shift in sosi.sync. +-- -- Usage: --- > as 8 +-- > as 3 or more +-- > add wave -position insertpoint sim:/tb_sdp_beamformer_remote_ring/bf_sum_sosi_arr -- > run -a ------------------------------------------------------------------------------- -library IEEE, common_lib, dp_lib, reorder_lib; +library IEEE, common_lib, dp_lib, ring_lib, tr_10GbE_lib, tech_pll_lib; use IEEE.std_logic_1164.all; use common_lib.common_pkg.all; use common_lib.common_mem_pkg.all; use common_lib.tb_common_pkg.all; use common_lib.tb_common_mem_pkg.all; +use common_lib.common_str_pkg.all; use dp_lib.dp_stream_pkg.all; +use ring_lib.ring_pkg.all; +use tech_pll_lib.tech_pll_component_pkg.all; use work.sdp_pkg.all; use work.tb_sdp_pkg.all; entity tb_sdp_beamformer_remote_ring is generic ( - g_nof_rn : natural := 16 -- number of nodes in the ring + g_nof_rn : natural := 4; -- number of nodes in the ring + g_nof_sync : natural := 2 ); end tb_sdp_beamformer_remote_ring; architecture tb of tb_sdp_beamformer_remote_ring is constant c_dp_clk_period : time := 5 ns; -- 200 MHz constant c_mm_clk_period : time := 1 ns; -- fast MM clk to speed up simulation + constant c_sa_clk_period : time := tech_pll_clk_644_period; -- 644MHz + + -- Apply cable delay in tech_pll_clk_156_period units, to remain aligned with tr_10GbE sim model + -- . Choose c_cable_delay = 30 * 6.4 ~= 192 ns ~= 38 dp_clk of 5 ns, to match delay seen on HW + -- . Maximum c_cable_delay <= 186 * 6.4 = 1210 ns ~= 242 dp_clk of 5 ns in simulation with + -- g_nof_rn = 16. For larger c_cable_delay the bf_sum_sosi.data goes wrong. The maximum + -- c_cable_delay depends a little bit on g_nof_rn, for g_nof_rn = 2 the data goes wrong when + -- c_cable_delay >= 190. + constant c_clk_156_period : time := tech_pll_clk_156_period; -- 6.400020 ns ~= 156.25 MHz + constant c_nof_delay : natural := 30; --286; + constant c_cable_delay : time := c_clk_156_period * c_nof_delay; + + -- BF data + constant c_block_period : natural := c_sdp_N_fft; + constant c_block_size : natural := c_sdp_S_sub_bf * c_sdp_N_pol_bf; + constant c_gap_size : natural := c_block_period - c_block_size; + -- choose sync interval somewhat longer than maximum BF ring latency + constant c_nof_blocks_per_sync : natural := largest(10, (g_nof_rn + 1) * 2); + constant c_local_bf_re : integer := 1; + constant c_local_bf_im : integer := 2; + + -- Ring lane packets + constant c_last_rn : natural := g_nof_rn - 1; -- first ring node has index RN = 0 by definition. + constant c_use_cable : std_logic := '1'; -- '0' ring via PCB traces, '1' ring via QSFP cables + constant c_lane_payload_nof_longwords_bf : natural := (c_block_size * 9) / 16; -- beamlet block size repacked + -- from 36b to 64b (9/16 = 36/64), 488 * 2 * 9 / 16 = 549 longwords + constant c_lane_packet_nof_longwords_max : natural := c_lane_payload_nof_longwords_bf + c_ring_dp_hdr_field_size; + -- = 549 + 3 = 552 + constant c_fifo_tx_fill_margin : natural := 10; -- >= c_fifo_fill_margin = 6 that is used in dp_fifo_fill_eop + constant c_fifo_tx_size_ring : natural := true_log_pow2(c_lane_packet_nof_longwords_max + c_fifo_tx_fill_margin); + -- = 552 + 6 --> 1024 + constant c_fifo_tx_fill_ring : natural := c_fifo_tx_size_ring - c_fifo_tx_fill_margin; + -- = maximum fill level, so rely on eop + constant c_err_bi : natural := 0; + constant c_nof_err_counts : natural := 8; + constant c_bsn_at_sync_check_channel : natural := 1; + constant c_validate_channel : boolean := true; + constant c_validate_channel_mode : string := "="; + constant c_sync_timeout : natural := c_block_period * (c_nof_blocks_per_sync + 1); + + -- Timeout tb if there is no output bf_sum_sosi + constant c_tb_timeout : time := (g_nof_sync + 1) * c_sync_timeout * c_dp_clk_period; + + -- Address widths of a single MM instance + constant c_addr_w_reg_ring_lane_info_bf : natural := 1; + + signal mm_init : std_logic := '1'; + signal tb_end : std_logic := '0'; + signal dp_clk : std_logic := '1'; + signal dp_rst : std_logic; + signal mm_clk : std_logic := '1'; + signal mm_rst : std_logic; + signal SA_CLK : std_logic := '1'; + signal tr_ref_clk_312 : std_logic := '0'; + signal tr_ref_clk_156 : std_logic := '0'; + signal tr_ref_rst_156 : std_logic := '0'; - constant c_last_rn : natural := g_nof_rn - 1; -- first ring node has index RN = 0 by definition. + signal stimuli_rst : std_logic; + signal stimuli_end : std_logic; - signal mm_init : std_logic := '1'; - signal tb_end : std_logic := '0'; - signal dp_clk : std_logic := '1'; - signal dp_rst : std_logic; - signal mm_clk : std_logic := '1'; - signal mm_rst : std_logic; + signal stimuli_sosi : t_dp_sosi; + signal local_bf_sosi : t_dp_sosi; + signal bf_bs_sosi : t_dp_sosi; + signal from_ri_sosi_arr : t_dp_sosi_arr(c_last_rn downto 0); + signal to_ri_sosi_arr : t_dp_sosi_arr(c_last_rn downto 0); + signal bf_sum_sosi_arr : t_dp_sosi_arr(c_last_rn downto 0); + signal bf_sum_sosi : t_dp_sosi; - signal rn_index : natural range 0 to c_sdp_N_pn_max - 1 := 0; + -- 10GbE ring + signal tr_10gbe_ring_rx_sosi_arr : t_dp_sosi_arr(c_last_rn downto 0) := (others => c_dp_sosi_rst); + signal tr_10gbe_ring_tx_sosi_arr : t_dp_sosi_arr(c_last_rn downto 0) := (others => c_dp_sosi_rst); + signal tr_10gbe_ring_serial_rx_arr : std_logic_vector(c_last_rn downto 0) := (others => '0'); + signal tr_10gbe_ring_serial_tx_arr : std_logic_vector(c_last_rn downto 0) := (others => '0'); + -- BF ring MM registers + signal reg_ring_lane_info_bf_copi_arr : t_mem_copi_arr(c_last_rn downto 0) := (others => c_mem_copi_rst); + signal reg_ring_lane_info_bf_cipo_arr : t_mem_cipo_arr(c_last_rn downto 0) := (others => c_mem_cipo_rst); + signal reg_ring_lane_info_bf_copi : t_mem_copi := c_mem_copi_rst; + signal reg_ring_lane_info_bf_cipo : t_mem_cipo := c_mem_cipo_rst; + signal reg_bsn_monitor_v2_ring_rx_bf_copi_arr : t_mem_copi_arr(c_last_rn downto 0) := (others => c_mem_copi_rst); + signal reg_bsn_monitor_v2_ring_rx_bf_cipo_arr : t_mem_cipo_arr(c_last_rn downto 0) := (others => c_mem_cipo_rst); + signal reg_bsn_monitor_v2_ring_rx_bf_copi : t_mem_copi := c_mem_copi_rst; + signal reg_bsn_monitor_v2_ring_rx_bf_cipo : t_mem_cipo := c_mem_cipo_rst; + signal reg_bsn_monitor_v2_ring_tx_bf_copi_arr : t_mem_copi_arr(c_last_rn downto 0) := (others => c_mem_copi_rst); + signal reg_bsn_monitor_v2_ring_tx_bf_cipo_arr : t_mem_cipo_arr(c_last_rn downto 0) := (others => c_mem_cipo_rst); + signal reg_bsn_monitor_v2_ring_tx_bf_copi : t_mem_copi := c_mem_copi_rst; + signal reg_bsn_monitor_v2_ring_tx_bf_cipo : t_mem_cipo := c_mem_cipo_rst; + signal reg_dp_block_validate_err_bf_copi_arr : t_mem_copi_arr(c_last_rn downto 0) := (others => c_mem_copi_rst); + signal reg_dp_block_validate_err_bf_cipo_arr : t_mem_cipo_arr(c_last_rn downto 0) := (others => c_mem_cipo_rst); + signal reg_dp_block_validate_err_bf_copi : t_mem_copi := c_mem_copi_rst; + signal reg_dp_block_validate_err_bf_cipo : t_mem_cipo := c_mem_cipo_rst; + signal reg_dp_block_validate_bsn_at_sync_bf_copi_arr : t_mem_copi_arr(c_last_rn downto 0) := + (others => c_mem_copi_rst); + signal reg_dp_block_validate_bsn_at_sync_bf_cipo_arr : t_mem_cipo_arr(c_last_rn downto 0) := + (others => c_mem_cipo_rst); + signal reg_dp_block_validate_bsn_at_sync_bf_copi : t_mem_copi := c_mem_copi_rst; + signal reg_dp_block_validate_bsn_at_sync_bf_cipo : t_mem_cipo := c_mem_cipo_rst; + -- BF ring MM points + signal FPGA_bf_ring_nof_transport_hops_R : t_natural_arr(c_last_rn downto 0); + signal FPGA_bf_ring_rx_latency_R : t_integer_arr(c_last_rn downto 0); + signal FPGA_bf_ring_tx_latency_R : t_integer_arr(c_last_rn downto 0); + + -- BSN aligner MM registers + signal reg_bsn_align_v2_bf_copi_arr : t_mem_copi_arr(c_last_rn downto 0) := (others => c_mem_copi_rst); + signal reg_bsn_align_v2_bf_cipo_arr : t_mem_cipo_arr(c_last_rn downto 0) := (others => c_mem_cipo_rst); + signal reg_bsn_align_v2_bf_copi : t_mem_copi := c_mem_copi_rst; + signal reg_bsn_align_v2_bf_cipo : t_mem_cipo := c_mem_cipo_rst; + signal reg_bsn_monitor_v2_bf_rx_align_copi_arr : t_mem_copi_arr(c_last_rn downto 0) := (others => c_mem_copi_rst); + signal reg_bsn_monitor_v2_bf_rx_align_cipo_arr : t_mem_cipo_arr(c_last_rn downto 0) := (others => c_mem_cipo_rst); + signal reg_bsn_monitor_v2_bf_rx_align_copi : t_mem_copi := c_mem_copi_rst; + signal reg_bsn_monitor_v2_bf_rx_align_cipo : t_mem_cipo := c_mem_cipo_rst; + signal reg_bsn_monitor_v2_bf_aligned_copi_arr : t_mem_copi_arr(c_last_rn downto 0) := (others => c_mem_copi_rst); + signal reg_bsn_monitor_v2_bf_aligned_cipo_arr : t_mem_cipo_arr(c_last_rn downto 0) := (others => c_mem_cipo_rst); + signal reg_bsn_monitor_v2_bf_aligned_copi : t_mem_copi := c_mem_copi_rst; + signal reg_bsn_monitor_v2_bf_aligned_cipo : t_mem_cipo := c_mem_cipo_rst; + -- BSN aligner Monitor Points + signal FPGA_bf_rx_align_latency_R : t_integer_2arr_2(c_last_rn downto 0); -- c_sdp_P_sum = 2 + signal FPGA_bf_aligned_latency_R : t_integer_arr(c_last_rn downto 0); begin dp_rst <= '1', '0' after c_dp_clk_period * 7; dp_clk <= (not dp_clk) or tb_end after c_dp_clk_period / 2; @@ -71,29 +334,208 @@ begin mm_rst <= '1', '0' after c_mm_clk_period * 7; mm_clk <= (not mm_clk) or tb_end after c_mm_clk_period / 2; + -- Wait for tr_10GbE to be active + stimuli_rst <= '1', '0' after 15 us; + + SA_CLK <= not SA_CLK after c_sa_clk_period / 2; -- Serial Gigabit IO sa clock (644 MHz) + + -- Generate local BF stream, use same for all nodes + u_stimuli : entity dp_lib.dp_stream_stimuli + generic map ( + g_sync_period => c_nof_blocks_per_sync, + g_nof_repeat => c_nof_blocks_per_sync * g_nof_sync, + g_pkt_len => c_block_size, + g_pkt_gap => c_gap_size + ) + port map ( + rst => stimuli_rst, + clk => dp_clk, + -- Generate stimuli + src_out => stimuli_sosi, + -- End of stimuli + tb_end => stimuli_end + ); + + -- Use constant beamlet data to ease verification of (intermediate) beamlet sums at each node + p_local_bf_sosi : process(stimuli_sosi) + begin + local_bf_sosi <= stimuli_sosi; + local_bf_sosi.data <= TO_DP_SDATA(0); + local_bf_sosi.re <= TO_DP_DSP_DATA(c_local_bf_re); + local_bf_sosi.im <= TO_DP_DSP_DATA(c_local_bf_im); + local_bf_sosi.channel <= TO_DP_CHANNEL(0); + local_bf_sosi.err <= TO_DP_ERROR(0); + end process; + + bf_bs_sosi <= local_bf_sosi; + bf_sum_sosi <= bf_sum_sosi_arr(c_last_rn); + p_mm : process - variable v_offset : natural; + variable v_span : natural; + variable v_span_node : natural; + variable v_offset : natural; + variable v_transport_nof_hops : natural; begin proc_common_wait_until_low(dp_clk, mm_rst); proc_common_wait_some_cycles(mm_clk, 10); - proc_common_wait_cross_clock_domain_latency(c_mm_clk_period, c_dp_clk_period, c_common_cross_clock_domain_latency * 2); + proc_common_wait_cross_clock_domain_latency(c_mm_clk_period, c_dp_clk_period, + c_common_cross_clock_domain_latency * 2); + --------------------------------------------------------------------------- + -- Setup transport nof hops for RN = 0:15 to [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0] + --------------------------------------------------------------------------- + -- Write FPGA_bf_ring_nof_transport_hops_RW = ring_lane_info.transport_nof_hops + v_span := 2**c_addr_w_reg_ring_lane_info_bf; + for RN in 0 to c_last_rn LOOP + v_offset := 1 + RN * v_span; + v_transport_nof_hops := 1; + if RN = c_last_rn then + v_transport_nof_hops := 0; + end if; + proc_mem_mm_bus_wr(v_offset, v_transport_nof_hops, mm_clk, + reg_ring_lane_info_bf_cipo, reg_ring_lane_info_bf_copi); + end loop; + proc_common_wait_cross_clock_domain_latency(c_mm_clk_period, c_dp_clk_period, + c_common_cross_clock_domain_latency * 2); + -- Readback FPGA_bf_ring_nof_transport_hops_R + for RN in 0 to c_last_rn LOOP + v_offset := 1 + RN * v_span; + proc_mem_mm_bus_rd(v_offset, mm_clk, reg_ring_lane_info_bf_cipo, reg_ring_lane_info_bf_copi); + proc_mem_mm_bus_rd_latency(1, mm_clk); + FPGA_bf_ring_nof_transport_hops_R(RN) <= TO_UINT(reg_ring_lane_info_bf_cipo.rddata(c_word_w - 1 downto 0)); + end loop; + + --------------------------------------------------------------------------- + -- Wait until second bf_sum_sosi.sync + --------------------------------------------------------------------------- + proc_common_wait_until_hi_lo(dp_clk, bf_sum_sosi.sync); + proc_common_wait_until_hi_lo(dp_clk, bf_sum_sosi.sync); + + --------------------------------------------------------------------------- + -- Read BSN monitors + --------------------------------------------------------------------------- + v_span := 2**c_sdp_reg_bsn_monitor_v2_addr_w; + -- Read FPGA_bf_ring_rx_latency_R + for RN in 0 to c_last_rn LOOP + v_offset := 6 + RN * v_span; + proc_mem_mm_bus_rd(v_offset, mm_clk, reg_bsn_monitor_v2_ring_rx_bf_cipo, reg_bsn_monitor_v2_ring_rx_bf_copi); + proc_mem_mm_bus_rd_latency(1, mm_clk); + FPGA_bf_ring_rx_latency_R(RN) <= TO_SINT(reg_bsn_monitor_v2_ring_rx_bf_cipo.rddata(c_word_w - 1 downto 0)); + end loop; + -- Read FPGA_bf_rx_align_latency_R, for both c_sdp_P_sum = 2 inputs per RN + v_span_node := true_log_pow2(c_sdp_P_sum) * v_span; + for RN in 0 to c_last_rn LOOP + for P in 0 to c_sdp_P_sum - 1 loop + v_offset := 6 + RN * v_span_node + P * v_span; + proc_mem_mm_bus_rd(v_offset, mm_clk, reg_bsn_monitor_v2_bf_rx_align_cipo, reg_bsn_monitor_v2_bf_rx_align_copi); + proc_mem_mm_bus_rd_latency(1, mm_clk); + FPGA_bf_rx_align_latency_R(RN)(P) <= TO_SINT(reg_bsn_monitor_v2_bf_rx_align_cipo.rddata(c_word_w - 1 downto 0)); + end loop; + end loop; + -- Read FPGA_bf_aligned_latency_R + for RN in 0 to c_last_rn LOOP + v_offset := 6 + RN * v_span; + proc_mem_mm_bus_rd(v_offset, mm_clk, reg_bsn_monitor_v2_bf_aligned_cipo, reg_bsn_monitor_v2_bf_aligned_copi); + proc_mem_mm_bus_rd_latency(1, mm_clk); + FPGA_bf_aligned_latency_R(RN) <= TO_SINT(reg_bsn_monitor_v2_bf_aligned_cipo.rddata(c_word_w - 1 downto 0)); + end loop; + -- Read FPGA_bf_ring_tx_latency_R + for RN in 0 to c_last_rn LOOP + v_offset := 6 + RN * v_span; + proc_mem_mm_bus_rd(v_offset, mm_clk, reg_bsn_monitor_v2_ring_tx_bf_cipo, reg_bsn_monitor_v2_ring_tx_bf_copi); + proc_mem_mm_bus_rd_latency(1, mm_clk); + FPGA_bf_ring_tx_latency_R(RN) <= TO_SINT(reg_bsn_monitor_v2_ring_tx_bf_cipo.rddata(c_word_w - 1 downto 0)); + end loop; + + --------------------------------------------------------------------------- + -- Wait until end of simulation + --------------------------------------------------------------------------- mm_init <= '0'; + + proc_common_wait_until_high(dp_clk, stimuli_end); + proc_common_wait_some_cycles(dp_clk, 1000); + + --------------------------------------------------------------------------- + -- Print latency results + --------------------------------------------------------------------------- + print_str("c_cable_delay = " & int_to_str(c_nof_delay) & " * 6.4 ns"); + print_str(""); + print_str("Node: bf_ring_rx bf_rx_align bf_aligned bf_ring_tx"); + print_str(" _latency: _latency: _latency: _latency:"); + for RN in 0 to c_last_rn loop + print_str(int_to_str(RN) & ": " & + int_to_str(FPGA_bf_ring_rx_latency_R(RN)) & " ( " & + int_to_str(FPGA_bf_rx_align_latency_R(RN)(0)) & " " & + int_to_str(FPGA_bf_rx_align_latency_R(RN)(1)) & " ) " & + int_to_str(FPGA_bf_aligned_latency_R(RN)) & " " & + int_to_str(FPGA_bf_ring_tx_latency_R(RN))); + end Loop; + + tb_end <= '1'; wait; end process; + -- End the tb simulation + proc_common_timeout_failure(c_tb_timeout, tb_end); -- ERROR: end simulation if it fails to end in time + proc_common_stop_simulation(tb_end); -- OK: end simulation ------------------------------------------------------------------------------ -- DUT ------------------------------------------------------------------------------ gen_dut : for RN in 0 to c_last_rn generate - -- Ring connections between nodes 0:c_last_rn,0 + -- Connect ring wires between the nodes + wire_ring : if RN > 0 generate + tr_10gbe_ring_serial_rx_arr(RN) <= transport tr_10gbe_ring_serial_tx_arr(RN - 1) after c_cable_delay; + end generate; + close_ring : if RN = 0 generate + tr_10gbe_ring_serial_rx_arr(0) <= transport tr_10gbe_ring_serial_tx_arr(c_last_rn) after c_cable_delay; + end generate; + + -- tr_10GbE access at each node, all via front_io QSFP[0] + u_tr_10GbE_ring: entity tr_10GbE_lib.tr_10GbE + generic map ( + g_sim => true, + g_sim_level => 1, + g_nof_macs => 1, + g_direction => "TX_RX", + g_tx_fifo_fill => c_fifo_tx_fill_ring, + g_tx_fifo_size => c_fifo_tx_size_ring + ) + port map ( + -- Transceiver PLL reference clock + tr_ref_clk_644 => SA_CLK, + tr_ref_clk_312 => tr_ref_clk_312, + tr_ref_clk_156 => tr_ref_clk_156, + tr_ref_rst_156 => tr_ref_rst_156, + + -- MM interface + mm_rst => mm_rst, + mm_clk => mm_clk, + + reg_mac_mosi => c_mem_copi_rst, + reg_mac_miso => open, + reg_eth10g_mosi => c_mem_copi_rst, + reg_eth10g_miso => open, + + -- DP interface + dp_rst => dp_rst, + dp_clk => dp_clk, + + src_out_arr => tr_10gbe_ring_rx_sosi_arr(RN downto RN), + snk_in_arr => tr_10gbe_ring_tx_sosi_arr(RN downto RN), + + -- Serial IO + serial_tx_arr => tr_10gbe_ring_serial_tx_arr(RN downto RN), + serial_rx_arr => tr_10gbe_ring_serial_rx_arr(RN downto RN) + ); + + -- Ring lane access at each node u_ring_lane_bf : entity ring_lib.ring_lane generic map ( g_lane_direction => 1, -- transport in positive RN direction. g_lane_data_w => c_longword_w, g_lane_packet_length => c_lane_payload_nof_longwords_bf, - g_lane_total_nof_packets_w => c_lane_total_nof_packets_w, + g_lane_total_nof_packets_w => 32, g_use_dp_layer => true, g_nof_rx_monitors => 1, g_nof_tx_monitors => 1, @@ -110,56 +552,159 @@ begin dp_clk => dp_clk, dp_rst => dp_rst, - from_lane_sosi => bf_from_ri_sosi_arr(beamset_id), - to_lane_sosi => bf_to_ri_sosi_arr(beamset_id), - lane_rx_cable_sosi => lane_rx_cable_sosi_arr(1 + beamset_id), - lane_rx_board_sosi => lane_rx_board_sosi_arr(1 + beamset_id), - lane_tx_cable_sosi => lane_tx_cable_sosi_arr(1 + beamset_id), - lane_tx_board_sosi => lane_tx_board_sosi_arr(1 + beamset_id), + from_lane_sosi => from_ri_sosi_arr(RN), + to_lane_sosi => to_ri_sosi_arr(RN), + lane_rx_cable_sosi => tr_10gbe_ring_rx_sosi_arr(RN), + lane_rx_board_sosi => c_dp_sosi_rst, + lane_tx_cable_sosi => tr_10gbe_ring_tx_sosi_arr(RN), + lane_tx_board_sosi => open, bs_sosi => bf_bs_sosi, -- used for bsn and sync - reg_ring_lane_info_copi => reg_ring_lane_info_bf_copi_arr(beamset_id), - reg_ring_lane_info_cipo => reg_ring_lane_info_bf_cipo_arr(beamset_id), - reg_bsn_monitor_v2_ring_rx_copi => reg_bsn_monitor_v2_ring_rx_bf_copi_arr(beamset_id), - reg_bsn_monitor_v2_ring_rx_cipo => reg_bsn_monitor_v2_ring_rx_bf_cipo_arr(beamset_id), - reg_bsn_monitor_v2_ring_tx_copi => reg_bsn_monitor_v2_ring_tx_bf_copi_arr(beamset_id), - reg_bsn_monitor_v2_ring_tx_cipo => reg_bsn_monitor_v2_ring_tx_bf_cipo_arr(beamset_id), - reg_dp_block_validate_err_copi => reg_dp_block_validate_err_bf_copi_arr(beamset_id), - reg_dp_block_validate_err_cipo => reg_dp_block_validate_err_bf_cipo_arr(beamset_id), - reg_dp_block_validate_bsn_at_sync_copi => reg_dp_block_validate_bsn_at_sync_bf_copi_arr(beamset_id), - reg_dp_block_validate_bsn_at_sync_cipo => reg_dp_block_validate_bsn_at_sync_bf_cipo_arr(beamset_id), - - this_rn => this_rn, - N_rn => ring_info.N_rn, - rx_select => ring_info.use_cable_to_previous_rn, - tx_select => ring_info.use_cable_to_next_rn + reg_ring_lane_info_copi => reg_ring_lane_info_bf_copi_arr(RN), + reg_ring_lane_info_cipo => reg_ring_lane_info_bf_cipo_arr(RN), + reg_bsn_monitor_v2_ring_rx_copi => reg_bsn_monitor_v2_ring_rx_bf_copi_arr(RN), + reg_bsn_monitor_v2_ring_rx_cipo => reg_bsn_monitor_v2_ring_rx_bf_cipo_arr(RN), + reg_bsn_monitor_v2_ring_tx_copi => reg_bsn_monitor_v2_ring_tx_bf_copi_arr(RN), + reg_bsn_monitor_v2_ring_tx_cipo => reg_bsn_monitor_v2_ring_tx_bf_cipo_arr(RN), + reg_dp_block_validate_err_copi => reg_dp_block_validate_err_bf_copi_arr(RN), + reg_dp_block_validate_err_cipo => reg_dp_block_validate_err_bf_cipo_arr(RN), + reg_dp_block_validate_bsn_at_sync_copi => reg_dp_block_validate_bsn_at_sync_bf_copi_arr(RN), + reg_dp_block_validate_bsn_at_sync_cipo => reg_dp_block_validate_bsn_at_sync_bf_cipo_arr(RN), + + this_rn => to_uvec(RN, c_byte_w), + N_rn => to_uvec(g_nof_rn, c_byte_w), + rx_select => c_use_cable, + tx_select => c_use_cable ); -- Intermediate BF alignment and summation at each node u_sdp_beamformer_remote : entity work.sdp_beamformer_remote + generic map ( + g_nof_aligners_max => g_nof_rn + ) port map ( dp_clk => dp_clk, dp_rst => dp_rst, - rn_index => rn_index, + rn_index => RN, - local_bf_sosi : in t_dp_sosi; - from_ri_sosi : in t_dp_sosi; - to_ri_sosi : out t_dp_sosi; - bf_sum_sosi : out t_dp_sosi; + local_bf_sosi => local_bf_sosi, -- all nodes use same local reference data + from_ri_sosi => from_ri_sosi_arr(RN), + to_ri_sosi => to_ri_sosi_arr(RN), + bf_sum_sosi => bf_sum_sosi_arr(RN), - mm_rst : in std_logic; - mm_clk : in std_logic; + mm_rst => mm_rst, + mm_clk => mm_clk, - reg_bsn_align_copi : in t_mem_copi := c_mem_copi_rst; - reg_bsn_align_cipo : out t_mem_cipo; + reg_bsn_align_copi => reg_bsn_align_v2_bf_copi_arr(RN), + reg_bsn_align_cipo => reg_bsn_align_v2_bf_cipo_arr(RN), + reg_bsn_monitor_v2_bsn_align_input_copi => reg_bsn_monitor_v2_bf_rx_align_copi_arr(RN), + reg_bsn_monitor_v2_bsn_align_input_cipo => reg_bsn_monitor_v2_bf_rx_align_cipo_arr(RN), + reg_bsn_monitor_v2_bsn_align_output_copi => reg_bsn_monitor_v2_bf_aligned_copi_arr(RN), + reg_bsn_monitor_v2_bsn_align_output_cipo => reg_bsn_monitor_v2_bf_aligned_cipo_arr(RN) + ); + end generate; -- gen_dut - reg_bsn_monitor_v2_bsn_align_input_copi : in t_mem_copi := c_mem_copi_rst; - reg_bsn_monitor_v2_bsn_align_input_cipo : out t_mem_cipo; + ------------------------------------------------------------------------------ + -- Verify bf_sum_sosi_arr at every node, to check that no packets were lost + ------------------------------------------------------------------------------ + p_verify_bf_sum : process(dp_clk) + begin + for RN in 0 to c_last_rn Loop + if bf_sum_sosi_arr(RN).valid = '1' then + assert TO_SINT(bf_sum_sosi_arr(RN).re) = (RN + 1) * c_local_bf_re report "Wrong BF re sum at node " & int_to_str(RN) severity error; + assert TO_SINT(bf_sum_sosi_arr(RN).im) = (RN + 1) * c_local_bf_im report "Wrong BF im sum at node " & int_to_str(RN) severity error; + end if; + end loop; + end process; - reg_bsn_monitor_v2_bsn_align_output_copi : in t_mem_copi := c_mem_copi_rst; - reg_bsn_monitor_v2_bsn_align_output_cipo : out t_mem_cipo - ); -end generate; -- gen_dut + ------------------------------------------------------------------------------ + -- 10GbE clocks + ------------------------------------------------------------------------------ + u_tech_pll_xgmii_mac_clocks : entity tech_pll_lib.tech_pll_xgmii_mac_clocks + port map ( + refclk_644 => SA_CLK, + rst_in => mm_rst, + clk_156 => tr_ref_clk_156, + clk_312 => tr_ref_clk_312, + rst_156 => tr_ref_rst_156, + rst_312 => open + ); + + ------------------------------------------------------------------------------ + -- MM bus multiplexers + ------------------------------------------------------------------------------ + -- Use common_mem_mux to avoid (vcom-1450) Actual (indexed name) for formal "mm_miso" is not a static signal name. + -- Use downto range for _arr, to match downto range of mosi_arr. + u_mem_mux_reg_ring_lane_info_bf : entity common_lib.common_mem_mux + generic map ( + g_nof_mosi => g_nof_rn, + g_mult_addr_w => c_addr_w_reg_ring_lane_info_bf + ) + port map ( + mosi => reg_ring_lane_info_bf_copi, + miso => reg_ring_lane_info_bf_cipo, + mosi_arr => reg_ring_lane_info_bf_copi_arr, + miso_arr => reg_ring_lane_info_bf_cipo_arr + ); + + u_mem_mux_reg_bsn_monitor_v2_ring_rx_bf : entity common_lib.common_mem_mux + generic map ( + g_nof_mosi => g_nof_rn, + g_mult_addr_w => c_sdp_reg_bsn_monitor_v2_addr_w + ) + port map ( + mosi => reg_bsn_monitor_v2_ring_rx_bf_copi, + miso => reg_bsn_monitor_v2_ring_rx_bf_cipo, + mosi_arr => reg_bsn_monitor_v2_ring_rx_bf_copi_arr, + miso_arr => reg_bsn_monitor_v2_ring_rx_bf_cipo_arr + ); + u_mem_mux_reg_bsn_monitor_v2_ring_tx_bf : entity common_lib.common_mem_mux + generic map ( + g_nof_mosi => g_nof_rn, + g_mult_addr_w => c_sdp_reg_bsn_monitor_v2_addr_w + ) + port map ( + mosi => reg_bsn_monitor_v2_ring_tx_bf_copi, + miso => reg_bsn_monitor_v2_ring_tx_bf_cipo, + mosi_arr => reg_bsn_monitor_v2_ring_tx_bf_copi_arr, + miso_arr => reg_bsn_monitor_v2_ring_tx_bf_cipo_arr + ); + + u_mem_mux_reg_bsn_monitor_v2_bf_rx_align : entity common_lib.common_mem_mux + generic map ( + g_nof_mosi => g_nof_rn, + g_mult_addr_w => c_sdp_reg_bsn_monitor_v2_addr_w + ceil_log2(c_sdp_P_sum) + ) + port map ( + mosi => reg_bsn_monitor_v2_bf_rx_align_copi, + miso => reg_bsn_monitor_v2_bf_rx_align_cipo, + mosi_arr => reg_bsn_monitor_v2_bf_rx_align_copi_arr, + miso_arr => reg_bsn_monitor_v2_bf_rx_align_cipo_arr + ); + + u_mem_mux_reg_bsn_monitor_v2_bf_aligned : entity common_lib.common_mem_mux + generic map ( + g_nof_mosi => g_nof_rn, + g_mult_addr_w => c_sdp_reg_bsn_monitor_v2_addr_w + ) + port map ( + mosi => reg_bsn_monitor_v2_bf_aligned_copi, + miso => reg_bsn_monitor_v2_bf_aligned_cipo, + mosi_arr => reg_bsn_monitor_v2_bf_aligned_copi_arr, + miso_arr => reg_bsn_monitor_v2_bf_aligned_cipo_arr + ); + + u_mem_mux_reg_bsn_align_v2_bf : entity common_lib.common_mem_mux + generic map ( + g_nof_mosi => g_nof_rn, + g_mult_addr_w => c_sdp_reg_bsn_align_v2_addr_w + ) + port map ( + mosi => reg_bsn_align_v2_bf_copi, + miso => reg_bsn_align_v2_bf_cipo, + mosi_arr => reg_bsn_align_v2_bf_copi_arr, + miso_arr => reg_bsn_align_v2_bf_cipo_arr + ); end tb; diff --git a/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_crosslets_remote_ring.vhd b/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_crosslets_remote_ring.vhd new file mode 100644 index 0000000000000000000000000000000000000000..1ec7de0db3d269388ee83389ea985b8da6ebf426 --- /dev/null +++ b/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_crosslets_remote_ring.vhd @@ -0,0 +1,938 @@ +------------------------------------------------------------------------------- +-- +-- Copyright 2024 +-- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/> +-- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands +-- +-- Licensed under the Apache License, Version 2.0 (the "License"); +-- you may not use this file except in compliance with the License. +-- You may obtain a copy of the License at +-- +-- http://www.apache.org/licenses/LICENSE-2.0 +-- +-- Unless required by applicable law or agreed to in writing, software +-- distributed under the License is distributed on an "AS IS" BASIS, +-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +-- See the License for the specific language governing permissions and +-- limitations under the License. +-- +------------------------------------------------------------------------------- +-- +-- Author: E. Kooistra +-- Purpose: +-- . Test bench for multiple sdp_crosslets_remote.vhd + ring_lane.vhd + +-- tr_10GbE in a ring +-- Description: +-- . https://support.astron.nl/confluence/display/L2M/L5+SDPFW+Design+Document%3A+Subband+Correlator +-- +-- . Block diagram: +-- * tb can use one instance of tr_10Gbe to model Rx from ring and Tx to ring. +-- * Ring lane serial links for ring nodes RN = 0 to c_last_rn: +-- +-- tr_10gbe_ring_serial_tx_arr --> tr_10gbe_ring_serial_rx_arr after c_cable_delay +-- +-- /<-------------------------------------------------------------\ +-- \---> 0 ---> RN - 1 ---> RN ---> RN + 1 ---> c_last_rn --->/ +-- |^ +-- tr_10gbe_ring_serial_tx_arr(RN) || tr_10gbe_ring_serial_tx_arr(RN) +-- v| +-- tr_10Gbe +-- |^ +-- tr_10gbe_ring_rx_sosi_arr(RN) || tr_10gbe_ring_tx_sosi_arr(RN) +-- v| +-- ring_lane +-- |^ +-- from_ri_sosi_arr(RN) || to_ri_sosi_arr(RN) +-- v| +-- local_crosslets_sosi --> sdp_crosslets_remote --> x_sosi_arr(RN)(P_sq) +-- x_sosi +-- +-- * BSN monitors: +-- RN +-- |^ +-- ring_lane/ring_rx || ring_lane/ring_tx +-- FPGA_xst_ring_rx_latency_R(RN)(RN) || FPGA_xst_ring_tx_latency_R(RN)(RN) +-- || +-- dp_bsn_align_v2 P_sq inputs || +-- FPGA_xst_rx_align_latency_R(RN)(P_sq) || +-- || +-- dp_bsn_align_v2 aligned output || +-- FPGA_xst_aligned_latency_R(RN) v| +-- +-- . XST ring latency results from SDP-ARTS HW: +-- - xst_ring_rx_latency (SDP-ARTS HW): +-- node 64: -1 -1 -1 -1 -1 -1 -1 -1 1774 1569 1363 1112 906 677 472 266 +-- node 65: 249 -1 -1 -1 -1 -1 -1 -1 -1 1776 1579 1352 1113 890 692 472 +-- node 66: 466 267 -1 -1 -1 -1 -1 -1 -1 -1 1787 1566 1340 1105 905 685 +-- node 67: 688 487 266 -1 -1 -1 -1 -1 -1 -1 -1 1793 1566 1346 1128 905 +-- node 68: 904 699 493 264 -1 -1 -1 -1 -1 -1 -1 -1 1788 1567 1355 1133 +-- node 69: 1114 913 717 473 252 -1 -1 -1 -1 -1 -1 -1 -1 1776 1576 1357 +-- node 70: 1341 1122 945 681 460 259 -1 -1 -1 -1 -1 -1 -1 -1 1783 1566 +-- node 71: 1551 1348 1156 890 667 471 250 -1 -1 -1 -1 -1 -1 -1 -1 1773 +-- node 72: 1785 1596 1397 1122 894 711 482 277 -1 -1 -1 -1 -1 -1 -1 -1 +-- node 73: -1 1819 1618 1350 1114 936 693 497 254 -1 -1 -1 -1 -1 -1 -1 +-- node 74: -1 -1 1828 1563 1342 1146 901 704 461 260 -1 -1 -1 -1 -1 -1 +-- node 75: -1 -1 -1 1784 1564 1366 1121 920 677 480 257 -1 -1 -1 -1 -1 +-- node 76: -1 -1 -1 -1 1804 1597 1362 1164 913 707 500 273 -1 -1 -1 -1 +-- node 77: -1 -1 -1 -1 -1 1810 1587 1390 1125 924 723 480 261 -1 -1 -1 +-- node 78: -1 -1 -1 -1 -1 -1 1800 1599 1351 1137 938 693 472 253 -1 -1 +-- node 79: -1 -1 -1 -1 -1 -1 -1 1809 1566 1344 1143 899 681 460 259 -1 +-- +-- # FPGA_xst_ring_rx_latency_R (sim: c_nof_delay = 0 with sdp_crosslets_remote_v2.vhd): +-- # 0: -1 -1 -1 -1 -1 -1 -1 -1 1604 1409 1211 1016 818 623 427 230 +-- # 1: 230 -1 -1 -1 -1 -1 -1 -1 -1 1604 1409 1211 1016 818 623 427 +-- # 2: 427 230 -1 -1 -1 -1 -1 -1 -1 -1 1604 1409 1211 1016 818 623 +-- # 3: 623 427 230 -1 -1 -1 -1 -1 -1 -1 -1 1604 1409 1211 1016 818 +-- # 4: 818 623 427 230 -1 -1 -1 -1 -1 -1 -1 -1 1604 1409 1211 1016 +-- # 5: 1016 818 623 427 230 -1 -1 -1 -1 -1 -1 -1 -1 1604 1409 1211 +-- # 6: 1211 1016 818 623 427 230 -1 -1 -1 -1 -1 -1 -1 -1 1604 1409 +-- # 7: 1409 1211 1016 818 623 427 230 -1 -1 -1 -1 -1 -1 -1 -1 1604 +-- # 8: 1604 1409 1211 1016 818 623 427 230 -1 -1 -1 -1 -1 -1 -1 -1 +-- # 9: -1 1604 1409 1211 1016 818 623 427 230 -1 -1 -1 -1 -1 -1 -1 +-- # 10: -1 -1 1604 1409 1211 1016 818 623 427 230 -1 -1 -1 -1 -1 -1 +-- # 11: -1 -1 -1 1604 1409 1211 1016 818 623 427 230 -1 -1 -1 -1 -1 +-- # 12: -1 -1 -1 -1 1604 1409 1211 1016 818 623 427 230 -1 -1 -1 -1 +-- # 13: -1 -1 -1 -1 -1 1604 1409 1211 1016 818 623 427 230 -1 -1 -1 +-- # 14: -1 -1 -1 -1 -1 -1 1604 1409 1211 1016 818 623 427 230 -1 -1 +-- # 15: -1 -1 -1 -1 -1 -1 -1 1604 1409 1211 1016 818 623 427 230 -1 +-- +-- # FPGA_xst_ring_rx_latency_R (sim: c_nof_delay = 12): +-- # 0: -1 -1 -1 -1 -1 -1 -1 -1 1729 1533 1332 1053 856 638 442 245 +-- # 1: 245 -1 -1 -1 -1 -1 -1 -1 -1 1729 1533 1332 1053 856 638 442 +-- # 2: 442 245 -1 -1 -1 -1 -1 -1 -1 -1 1729 1533 1332 1053 856 638 +-- # 3: 638 442 245 -1 -1 -1 -1 -1 -1 -1 -1 1729 1533 1332 1053 856 +-- # 4: 856 638 442 245 -1 -1 -1 -1 -1 -1 -1 -1 1729 1533 1332 1053 +-- # 5: 1053 856 638 442 245 -1 -1 -1 -1 -1 -1 -1 -1 1729 1533 1332 +-- # 6: 1332 1053 856 638 442 245 -1 -1 -1 -1 -1 -1 -1 -1 1729 1533 +-- # 7: 1533 1332 1053 856 638 442 245 -1 -1 -1 -1 -1 -1 -1 -1 1729 +-- # 8: 1729 1533 1332 1053 856 638 442 245 -1 -1 -1 -1 -1 -1 -1 -1 +-- # 9: -1 1729 1533 1332 1053 856 638 442 245 -1 -1 -1 -1 -1 -1 -1 +-- # 10: -1 -1 1729 1533 1332 1053 856 638 442 245 -1 -1 -1 -1 -1 -1 +-- # 11: -1 -1 -1 1729 1533 1332 1053 856 638 442 245 -1 -1 -1 -1 -1 +-- # 12: -1 -1 -1 -1 1729 1533 1332 1053 856 638 442 245 -1 -1 -1 -1 +-- # 13: -1 -1 -1 -1 -1 1729 1533 1332 1053 856 638 442 245 -1 -1 -1 +-- # 14: -1 -1 -1 -1 -1 -1 1729 1533 1332 1053 856 638 442 245 -1 -1 +-- # 15: -1 -1 -1 -1 -1 -1 -1 1729 1533 1332 1053 856 638 442 245 -1 +-- +-- # FPGA_xst_ring_rx_latency_R (sim: c_nof_delay = 25): +-- # 0: -1 -1 -1 -1 -1 -1 -1 -1 1789 1571 1352 1135 917 698 481 262 +-- # 1: 262 -1 -1 -1 -1 -1 -1 -1 -1 1789 1571 1352 1135 917 698 481 +-- # 2: 481 262 -1 -1 -1 -1 -1 -1 -1 -1 1789 1571 1352 1135 917 698 +-- # 3: 698 481 262 -1 -1 -1 -1 -1 -1 -1 -1 1789 1571 1352 1135 917 +-- # 4: 917 698 481 262 -1 -1 -1 -1 -1 -1 -1 -1 1789 1571 1352 1135 +-- # 5: 1135 917 698 481 262 -1 -1 -1 -1 -1 -1 -1 -1 1789 1571 1352 +-- # 6: 1352 1135 917 698 481 262 -1 -1 -1 -1 -1 -1 -1 -1 1789 1571 +-- # 7: 1571 1352 1135 917 698 481 262 -1 -1 -1 -1 -1 -1 -1 -1 1789 +-- # 8: 1789 1571 1352 1135 917 698 481 262 -1 -1 -1 -1 -1 -1 -1 -1 +-- # 9: -1 1789 1571 1352 1135 917 698 481 262 -1 -1 -1 -1 -1 -1 -1 +-- # 10: -1 -1 1789 1571 1352 1135 917 698 481 262 -1 -1 -1 -1 -1 -1 +-- # 11: -1 -1 -1 1789 1571 1352 1135 917 698 481 262 -1 -1 -1 -1 -1 +-- # 12: -1 -1 -1 -1 1789 1571 1352 1135 917 698 481 262 -1 -1 -1 -1 +-- # 13: -1 -1 -1 -1 -1 1789 1571 1352 1135 917 698 481 262 -1 -1 -1 +-- # 14: -1 -1 -1 -1 -1 -1 1789 1571 1352 1135 917 698 481 262 -1 -1 +-- # 15: -1 -1 -1 -1 -1 -1 -1 1789 1571 1352 1135 917 698 481 262 -1 +-- +-- - xst_ring_tx_latency (SDP-ARTS HW): +-- node 64: 12 -1 -1 -1 -1 -1 -1 -1 -1 1611 1361 1155 926 698 470 264 +-- node 65: 256 12 -1 -1 -1 -1 -1 -1 -1 -1 1583 1363 1143 920 676 476 +-- node 66: 470 272 12 -1 -1 -1 -1 -1 -1 -1 -1 1575 1353 1131 892 692 +-- node 67: 694 496 274 12 -1 -1 -1 -1 -1 -1 -1 -1 1577 1357 1119 914 +-- node 68: 922 714 486 258 12 -1 -1 -1 -1 -1 -1 -1 -1 1585 1347 1125 +-- node 69: 1145 926 704 482 260 12 -1 -1 -1 -1 -1 -1 -1 -1 1567 1345 +-- node 70: 1371 1147 924 704 484 264 12 -1 -1 -1 -1 -1 -1 -1 -1 1567 +-- node 71: 1579 1359 1139 916 696 476 256 12 -1 -1 -1 -1 -1 -1 -1 -1 +-- node 72: -1 1597 1369 1141 934 706 500 274 12 -1 -1 -1 -1 -1 -1 -1 +-- node 73: -1 -1 1593 1347 1149 930 706 484 262 12 -1 -1 -1 -1 -1 -1 +-- node 74: -1 -1 -1 1571 1373 1151 928 708 488 264 12 -1 -1 -1 -1 -1 +-- node 75: -1 -1 -1 -1 1595 1373 1151 928 708 488 264 12 -1 -1 -1 -1 +-- node 76: -1 -1 -1 -1 -1 1619 1391 1163 934 728 500 294 12 -1 -1 -1 +-- node 77: -1 -1 -1 -1 -1 -1 1611 1369 1145 946 706 508 264 12 -1 -1 +-- node 78: -1 -1 -1 -1 -1 -1 -1 1593 1371 1171 928 732 488 268 12 -1 +-- node 79: -1 -1 -1 -1 -1 -1 -1 -1 1587 1387 1143 948 702 480 262 12 +-- +-- # FPGA_xst_ring_tx_latency_R (sim: c_nof_delay = 0 with sdp_crosslets_remote_v2.vhd): +-- # 0: 13 -1 -1 -1 -1 -1 -1 -1 -1 1415 1217 1023 824 629 434 237 +-- # 1: 237 13 -1 -1 -1 -1 -1 -1 -1 -1 1415 1217 1023 824 629 434 +-- # 2: 434 237 13 -1 -1 -1 -1 -1 -1 -1 -1 1415 1217 1023 824 629 +-- # 3: 629 434 237 13 -1 -1 -1 -1 -1 -1 -1 -1 1415 1217 1023 824 +-- # 4: 824 629 434 237 13 -1 -1 -1 -1 -1 -1 -1 -1 1415 1217 1023 +-- # 5: 1023 824 629 434 237 13 -1 -1 -1 -1 -1 -1 -1 -1 1415 1217 +-- # 6: 1217 1023 824 629 434 237 13 -1 -1 -1 -1 -1 -1 -1 -1 1415 +-- # 7: 1415 1217 1023 824 629 434 237 13 -1 -1 -1 -1 -1 -1 -1 -1 +-- # 8: -1 1415 1217 1023 824 629 434 237 13 -1 -1 -1 -1 -1 -1 -1 +-- # 9: -1 -1 1415 1217 1023 824 629 434 237 13 -1 -1 -1 -1 -1 -1 +-- # 10: -1 -1 -1 1415 1217 1023 824 629 434 237 13 -1 -1 -1 -1 -1 +-- # 11: -1 -1 -1 -1 1415 1217 1023 824 629 434 237 13 -1 -1 -1 -1 +-- # 12: -1 -1 -1 -1 -1 1415 1217 1023 824 629 434 237 13 -1 -1 -1 +-- # 13: -1 -1 -1 -1 -1 -1 1415 1217 1023 824 629 434 237 13 -1 -1 +-- # 14: -1 -1 -1 -1 -1 -1 -1 1415 1217 1023 824 629 434 237 13 -1 +-- # 15: -1 -1 -1 -1 -1 -1 -1 -1 1415 1217 1023 824 629 434 237 13 +-- +-- # FPGA_xst_ring_tx_latency_R (sim: c_nof_delay = 12): +-- # 0: 12 -1 -1 -1 -1 -1 -1 -1 -1 1539 1339 1119 862 645 448 251 +-- # 1: 251 12 -1 -1 -1 -1 -1 -1 -1 -1 1539 1339 1119 862 645 448 +-- # 2: 448 251 12 -1 -1 -1 -1 -1 -1 -1 -1 1539 1339 1119 862 645 +-- # 3: 645 448 251 12 -1 -1 -1 -1 -1 -1 -1 -1 1539 1339 1119 862 +-- # 4: 862 645 448 251 12 -1 -1 -1 -1 -1 -1 -1 -1 1539 1339 1119 +-- # 5: 1119 862 645 448 251 12 -1 -1 -1 -1 -1 -1 -1 -1 1539 1339 +-- # 6: 1339 1119 862 645 448 251 12 -1 -1 -1 -1 -1 -1 -1 -1 1539 +-- # 7: 1539 1339 1119 862 645 448 251 12 -1 -1 -1 -1 -1 -1 -1 -1 +-- # 8: -1 1539 1339 1119 862 645 448 251 12 -1 -1 -1 -1 -1 -1 -1 +-- # 9: -1 -1 1539 1339 1119 862 645 448 251 12 -1 -1 -1 -1 -1 -1 +-- # 10: -1 -1 -1 1539 1339 1119 862 645 448 251 12 -1 -1 -1 -1 -1 +-- # 11: -1 -1 -1 -1 1539 1339 1119 862 645 448 251 12 -1 -1 -1 -1 +-- # 12: -1 -1 -1 -1 -1 1539 1339 1119 862 645 448 251 12 -1 -1 -1 +-- # 13: -1 -1 -1 -1 -1 -1 1539 1339 1119 862 645 448 251 12 -1 -1 +-- # 14: -1 -1 -1 -1 -1 -1 -1 1539 1339 1119 862 645 448 251 12 -1 +-- # 15: -1 -1 -1 -1 -1 -1 -1 -1 1539 1339 1119 862 645 448 251 12 +-- +-- # FPGA_xst_ring_tx_latency_R (sim: c_nof_delay = 25): +-- # 0: 12 -1 -1 -1 -1 -1 -1 -1 -1 1577 1359 1141 924 705 488 269 +-- # 1: 269 12 -1 -1 -1 -1 -1 -1 -1 -1 1577 1359 1141 924 705 488 +-- # 2: 488 269 12 -1 -1 -1 -1 -1 -1 -1 -1 1577 1359 1141 924 705 +-- # 3: 705 488 269 12 -1 -1 -1 -1 -1 -1 -1 -1 1577 1359 1141 924 +-- # 4: 924 705 488 269 12 -1 -1 -1 -1 -1 -1 -1 -1 1577 1359 1141 +-- # 5: 1141 924 705 488 269 12 -1 -1 -1 -1 -1 -1 -1 -1 1577 1359 +-- # 6: 1359 1141 924 705 488 269 12 -1 -1 -1 -1 -1 -1 -1 -1 1577 +-- # 7: 1577 1359 1141 924 705 488 269 12 -1 -1 -1 -1 -1 -1 -1 -1 +-- # 8: -1 1577 1359 1141 924 705 488 269 12 -1 -1 -1 -1 -1 -1 -1 +-- # 9: -1 -1 1577 1359 1141 924 705 488 269 12 -1 -1 -1 -1 -1 -1 +-- # 10: -1 -1 -1 1577 1359 1141 924 705 488 269 12 -1 -1 -1 -1 -1 +-- # 11: -1 -1 -1 -1 1577 1359 1141 924 705 488 269 12 -1 -1 -1 -1 +-- # 12: -1 -1 -1 -1 -1 1577 1359 1141 924 705 488 269 12 -1 -1 -1 +-- # 13: -1 -1 -1 -1 -1 -1 1577 1359 1141 924 705 488 269 12 -1 -1 +-- # 14: -1 -1 -1 -1 -1 -1 -1 1577 1359 1141 924 705 488 269 12 -1 +-- # 15: -1 -1 -1 -1 -1 -1 -1 -1 1577 1359 1141 924 705 488 269 12 +-- +-- - xst_rx_align_latency (SDP-ARTS HW): +-- node 64: 1 204 434 638 868 1109 1318 1546 1774 +-- node 65: 1 214 412 652 852 1109 1315 1532 1756 +-- node 66: 1 202 422 622 866 1109 1326 1529 1750 +-- node 67: 1 204 426 648 846 1109 1324 1548 1751 +-- node 68: 1 210 416 644 874 1109 1322 1528 1758 +-- node 69: 1 204 426 626 848 1109 1328 1546 1746 +-- node 70: 1 208 428 648 848 1109 1330 1550 1753 +-- node 71: 1 210 430 648 870 1109 1328 1552 1770 +-- node 72: 1 230 436 666 892 1109 1342 1570 1776 +-- node 73: 1 202 444 640 884 1109 1327 1566 1788 +-- node 74: 1 222 422 664 860 1109 1323 1543 1784 +-- node 75: 1 214 432 634 878 1109 1319 1541 1763 +-- node 76: 1 232 438 668 872 1125 1346 1559 1789 +-- node 77: 1 216 436 654 876 1109 1337 1554 1777 +-- node 78: 1 206 430 648 868 1109 1332 1559 1772 +-- node 79: 1 208 430 650 870 1109 1328 1550 1775 +-- +-- # FPGA_xst_rx_align_latency_R (sim: c_nof_delay = 0 with sdp_crosslets_remote_v2.vhd): +-- # 0: 1 235 432 628 823 1021 1216 1414 1609 +-- # 1: 1 235 432 628 823 1021 1216 1414 1609 +-- # 2: 1 235 432 628 823 1021 1216 1414 1609 +-- # 3: 1 235 432 628 823 1021 1216 1414 1609 +-- # 4: 1 235 432 628 823 1021 1216 1414 1609 +-- # 5: 1 235 432 628 823 1021 1216 1414 1609 +-- # 6: 1 235 432 628 823 1021 1216 1414 1609 +-- # 7: 1 235 432 628 823 1021 1216 1414 1609 +-- # 8: 1 235 432 628 823 1021 1216 1414 1609 +-- # 9: 1 235 432 628 823 1021 1216 1414 1609 +-- # 10: 1 235 432 628 823 1021 1216 1414 1609 +-- # 11: 1 235 432 628 823 1021 1216 1414 1609 +-- # 12: 1 235 432 628 823 1021 1216 1414 1609 +-- # 13: 1 235 432 628 823 1021 1216 1414 1609 +-- # 14: 1 235 432 628 823 1021 1216 1414 1609 +-- # 15: 1 235 432 628 823 1021 1216 1414 1609 +-- +-- # FPGA_xst_rx_align_latency_R (sim: c_nof_delay = 12): +-- # 0: 1 199 396 593 810 1109 1308 1506 1702 +-- # 1: 1 199 396 593 810 1109 1308 1506 1702 +-- # 2: 1 199 396 593 810 1109 1308 1506 1702 +-- # 3: 1 199 396 593 810 1109 1308 1506 1702 +-- # 4: 1 199 396 593 810 1109 1308 1506 1702 +-- # 5: 1 199 396 593 810 1109 1308 1506 1702 +-- # 6: 1 199 396 593 810 1109 1308 1506 1702 +-- # 7: 1 199 396 593 810 1109 1308 1506 1702 +-- # 8: 1 199 396 593 810 1109 1308 1506 1702 +-- # 9: 1 199 396 593 810 1109 1308 1506 1702 +-- # 10: 1 199 396 593 810 1109 1308 1506 1702 +-- # 11: 1 199 396 593 810 1109 1308 1506 1702 +-- # 12: 1 199 396 593 810 1109 1308 1506 1702 +-- # 13: 1 199 396 593 810 1109 1308 1506 1702 +-- # 14: 1 199 396 593 810 1109 1308 1506 1702 +-- # 15: 1 199 396 593 810 1109 1308 1506 1702 +-- +-- # FPGA_xst_rx_align_latency_R (sim: c_nof_delay = 25): +-- # 0: 1 217 436 653 872 1109 1326 1544 1762 +-- # 1: 1 217 436 653 872 1109 1326 1544 1762 +-- # 2: 1 217 436 653 872 1109 1326 1544 1762 +-- # 3: 1 217 436 653 872 1109 1326 1544 1762 +-- # 4: 1 217 436 653 872 1109 1326 1544 1762 +-- # 5: 1 217 436 653 872 1109 1326 1544 1762 +-- # 6: 1 217 436 653 872 1109 1326 1544 1762 +-- # 7: 1 217 436 653 872 1109 1326 1544 1762 +-- # 8: 1 217 436 653 872 1109 1326 1544 1762 +-- # 9: 1 217 436 653 872 1109 1326 1544 1762 +-- # 10: 1 217 436 653 872 1109 1326 1544 1762 +-- # 11: 1 217 436 653 872 1109 1326 1544 1762 +-- # 12: 1 217 436 653 872 1109 1326 1544 1762 +-- # 13: 1 217 436 653 872 1109 1326 1544 1762 +-- # 14: 1 217 436 653 872 1109 1326 1544 1762 +-- # 15: 1 217 436 653 872 1109 1326 1544 1762 +-- +-- - xst_aligned_latency (SDP-ARTS HW): # FPGA_xst_aligned_latency_R (sim: c_nof_delay = 0, 12, 25): +-- node 64: 2051 # 0: 2051 +-- node 65: 2051 # 1: 2051 +-- node 66: 2051 # 2: 2051 +-- node 67: 2051 # 3: 2051 +-- node 68: 2051 # 4: 2051 +-- node 69: 2051 # 5: 2051 +-- node 70: 2051 # 6: 2051 +-- node 71: 2051 # 7: 2051 +-- node 72: 2051 # 8: 2051 +-- node 73: 2051 # 9: 2051 +-- node 74: 2051 # 10: 2051 +-- node 75: 2051 # 11: 2051 +-- node 76: 2051 # 12: 2051 +-- node 77: 2051 # 13: 2051 +-- node 78: 2051 # 14: 2051 +-- node 79: 2051 # 15: 2051 +-- +-- Usage: +-- > as 3 or more +-- > add wave -position insertpoint sim:/tb_sdp_crosslets_remote_ring/x_sosi_2arr +-- > run -a +------------------------------------------------------------------------------- + +library IEEE, common_lib, dp_lib, st_lib, ring_lib, tr_10GbE_lib, tech_pll_lib; +use IEEE.std_logic_1164.all; +use common_lib.common_pkg.all; +use common_lib.common_mem_pkg.all; +use common_lib.tb_common_pkg.all; +use common_lib.tb_common_mem_pkg.all; +use common_lib.common_str_pkg.all; +use dp_lib.dp_stream_pkg.all; +use ring_lib.ring_pkg.all; +use tech_pll_lib.tech_pll_component_pkg.all; +use work.sdp_pkg.all; +use work.tb_sdp_pkg.all; + +entity tb_sdp_crosslets_remote_ring is + generic ( + g_nof_rn : natural := 4; -- number of nodes in the ring + g_nof_sync : natural := 2 + ); +end tb_sdp_crosslets_remote_ring; + +architecture tb of tb_sdp_crosslets_remote_ring is + constant c_dp_clk_period : time := 5 ns; -- 200 MHz + constant c_mm_clk_period : time := 1 ns; -- fast MM clk to speed up simulation + constant c_sa_clk_period : time := tech_pll_clk_644_period; -- 644MHz + + -- Apply cable delay in tech_pll_clk_156_period units, to remain aligned with tr_10GbE sim model + -- . Choose c_cable_delay = 16 * 6.4 ~= 102 ns ~= 20 dp_clk of 5 ns, to match delay seen on HW + -- . Minimum c_cable_delay >= 12 * 6.4 = 77 ns ~= 15 dp_clk of 5 ns, else missed blocks in x_sosi + -- This minimum occurs when g_nof_rn > 8 and was found with g_nof_rn = 16. It happens due to + -- that the local crosslets are passed through ring_mux and dp_demux. This causes that the + -- block period of the local crosslets can vary and the there is not enough time to read all + -- aligned croslets. Therefore instead use sdp_crosslets_remote_v2.vhd. + -- . Maximum c_cable_delay <= 29 * 6.4 = 185 ns ~= 37 dp_clk of 5 ns, else missed blocks in x_sosi + constant c_clk_156_period : time := tech_pll_clk_156_period; -- 6.400020 ns ~= 156.25 MHz + constant c_nof_delay : natural := 0; + constant c_cable_delay : time := c_clk_156_period * c_nof_delay; + + -- XST data + constant c_P_sq : natural := g_nof_rn / 2 + 1; -- nof square correlator cells + constant c_nof_transport_hops : natural := c_P_sq - 1; + constant c_block_period : natural := c_sdp_N_fft; + constant c_block_size : natural := c_sdp_N_crosslets_max * c_sdp_S_pn; + constant c_gap_size : natural := c_block_period - c_block_size; + constant c_nof_blocks_per_sync : natural := 10; + constant c_local_crosslet_re : integer := 1; + constant c_local_crosslet_im : integer := 2; + + constant c_last_rn : natural := g_nof_rn - 1; -- first ring node has index RN = 0 by definition. + + type t_ring_integer_2arr is array (integer range <>) of t_integer_arr(c_last_rn downto 0); + + type t_crosslets_cipo_2arr is array (integer range <>) of t_mem_cipo_arr(c_P_sq - 1 downto 0); + type t_crosslets_sosi_2arr is array (integer range <>) of t_dp_sosi_arr(c_P_sq - 1 downto 0); + type t_crosslets_integer_2arr is array (integer range <>) of t_integer_arr(c_P_sq - 1 downto 0); + + -- Ring lane packets + constant c_use_cable : std_logic := '1'; -- '0' ring via PCB traces, '1' ring via QSFP cables + + -- = crosslet subband select block size divided by 2 as it is repacked from 32b to 64b. = 42 longwords + constant c_lane_payload_nof_longwords_xst : natural := c_sdp_N_crosslets_max * c_sdp_S_pn / 2; + constant c_lane_packet_nof_longwords_max : natural := c_lane_payload_nof_longwords_xst + c_ring_dp_hdr_field_size; + -- = 54 + 3 = 57 + constant c_fifo_tx_fill_margin : natural := 10; -- >= c_fifo_fill_margin = 6 that is used in dp_fifo_fill_eop + constant c_fifo_tx_size_ring : natural := true_log_pow2(c_lane_packet_nof_longwords_max * 2 + c_fifo_tx_fill_margin); + -- = 552 + 6 --> 1024 + constant c_fifo_tx_fill_ring : natural := c_fifo_tx_size_ring - c_fifo_tx_fill_margin; + -- = maximum fill level, so rely on eop + constant c_err_bi : natural := 0; + constant c_nof_err_counts : natural := 8; + constant c_bsn_at_sync_check_channel : natural := 1; + constant c_validate_channel : boolean := true; + constant c_validate_channel_mode : string := "="; + constant c_sync_timeout : natural := c_block_period * (c_nof_blocks_per_sync + 1); + + -- Timeout tb if there is no output x_sosi + constant c_tb_timeout : time := (g_nof_sync + 1) * c_sync_timeout * c_dp_clk_period; + + -- Address widths of a single MM instance + constant c_addr_w_reg_ring_lane_info_xst : natural := 1; + + signal mm_init : std_logic := '1'; + signal tb_end : std_logic := '0'; + signal dp_clk : std_logic := '1'; + signal dp_rst : std_logic; + signal mm_clk : std_logic := '1'; + signal mm_rst : std_logic; + signal SA_CLK : std_logic := '1'; + signal tr_ref_clk_312 : std_logic := '0'; + signal tr_ref_clk_156 : std_logic := '0'; + signal tr_ref_rst_156 : std_logic := '0'; + + signal stimuli_rst : std_logic; + signal stimuli_end : std_logic; + + signal stimuli_sosi : t_dp_sosi; + signal local_crosslets_sosi_arr : t_dp_sosi_arr(c_last_rn downto 0); + signal xst_bs_sosi : t_dp_sosi; + signal from_ri_sosi_arr : t_dp_sosi_arr(c_last_rn downto 0); + signal to_ri_sosi_arr : t_dp_sosi_arr(c_last_rn downto 0); + signal crosslets_sosi_arr : t_dp_sosi_arr(c_last_rn downto 0); + signal crosslets_copi_arr : t_mem_copi_arr(c_last_rn downto 0) := (others => c_mem_copi_rst); + signal crosslets_cipo_2arr : t_crosslets_cipo_2arr(c_last_rn downto 0); + signal x_sosi_2arr : t_crosslets_sosi_2arr(c_last_rn downto 0); + signal x_sosi_2arr_valids : std_logic_vector(g_nof_rn * c_P_sq - 1 downto 0); + signal x_sosi_arr : t_dp_sosi_arr(c_last_rn downto 0); + signal x_sosi : t_dp_sosi; + + -- 10GbE ring + signal tr_10gbe_ring_rx_sosi_arr : t_dp_sosi_arr(c_last_rn downto 0) := (others => c_dp_sosi_rst); + signal tr_10gbe_ring_tx_sosi_arr : t_dp_sosi_arr(c_last_rn downto 0) := (others => c_dp_sosi_rst); + signal tr_10gbe_ring_serial_rx_arr : std_logic_vector(c_last_rn downto 0) := (others => '0'); + signal tr_10gbe_ring_serial_tx_arr : std_logic_vector(c_last_rn downto 0) := (others => '0'); + + -- Crosslets ring MM registers + signal reg_ring_lane_info_xst_copi_arr : t_mem_copi_arr(c_last_rn downto 0) := (others => c_mem_copi_rst); + signal reg_ring_lane_info_xst_cipo_arr : t_mem_cipo_arr(c_last_rn downto 0) := (others => c_mem_cipo_rst); + signal reg_ring_lane_info_xst_copi : t_mem_copi := c_mem_copi_rst; + signal reg_ring_lane_info_xst_cipo : t_mem_cipo := c_mem_cipo_rst; + signal reg_bsn_monitor_v2_ring_rx_xst_copi_arr : t_mem_copi_arr(c_last_rn downto 0) := (others => c_mem_copi_rst); + signal reg_bsn_monitor_v2_ring_rx_xst_cipo_arr : t_mem_cipo_arr(c_last_rn downto 0) := (others => c_mem_cipo_rst); + signal reg_bsn_monitor_v2_ring_rx_xst_copi : t_mem_copi := c_mem_copi_rst; + signal reg_bsn_monitor_v2_ring_rx_xst_cipo : t_mem_cipo := c_mem_cipo_rst; + signal reg_bsn_monitor_v2_ring_tx_xst_copi_arr : t_mem_copi_arr(c_last_rn downto 0) := (others => c_mem_copi_rst); + signal reg_bsn_monitor_v2_ring_tx_xst_cipo_arr : t_mem_cipo_arr(c_last_rn downto 0) := (others => c_mem_cipo_rst); + signal reg_bsn_monitor_v2_ring_tx_xst_copi : t_mem_copi := c_mem_copi_rst; + signal reg_bsn_monitor_v2_ring_tx_xst_cipo : t_mem_cipo := c_mem_cipo_rst; + signal reg_dp_block_validate_err_xst_copi_arr : t_mem_copi_arr(c_last_rn downto 0) := (others => c_mem_copi_rst); + signal reg_dp_block_validate_err_xst_cipo_arr : t_mem_cipo_arr(c_last_rn downto 0) := (others => c_mem_cipo_rst); + signal reg_dp_block_validate_err_xst_copi : t_mem_copi := c_mem_copi_rst; + signal reg_dp_block_validate_err_xst_cipo : t_mem_cipo := c_mem_cipo_rst; + signal reg_dp_block_validate_bsn_at_sync_xst_copi_arr : t_mem_copi_arr(c_last_rn downto 0) := + (others => c_mem_copi_rst); + signal reg_dp_block_validate_bsn_at_sync_xst_cipo_arr : t_mem_cipo_arr(c_last_rn downto 0) := + (others => c_mem_cipo_rst); + signal reg_dp_block_validate_bsn_at_sync_xst_copi : t_mem_copi := c_mem_copi_rst; + signal reg_dp_block_validate_bsn_at_sync_xst_cipo : t_mem_cipo := c_mem_cipo_rst; + -- Crosslets ring MM points + signal FPGA_xst_ring_nof_transport_hops_R : t_natural_arr(c_last_rn downto 0); + signal FPGA_xst_ring_rx_latency_R : t_ring_integer_2arr(c_last_rn downto 0); + signal FPGA_xst_ring_tx_latency_R : t_ring_integer_2arr(c_last_rn downto 0); + + -- BSN aligner MM registers + signal reg_bsn_align_v2_xst_copi_arr : t_mem_copi_arr(c_last_rn downto 0) := (others => c_mem_copi_rst); + signal reg_bsn_align_v2_xst_cipo_arr : t_mem_cipo_arr(c_last_rn downto 0) := (others => c_mem_cipo_rst); + signal reg_bsn_align_v2_xst_copi : t_mem_copi := c_mem_copi_rst; + signal reg_bsn_align_v2_xst_cipo : t_mem_cipo := c_mem_cipo_rst; + signal reg_bsn_monitor_v2_xst_rx_align_copi_arr : t_mem_copi_arr(c_last_rn downto 0) := (others => c_mem_copi_rst); + signal reg_bsn_monitor_v2_xst_rx_align_cipo_arr : t_mem_cipo_arr(c_last_rn downto 0) := (others => c_mem_cipo_rst); + signal reg_bsn_monitor_v2_xst_rx_align_copi : t_mem_copi := c_mem_copi_rst; + signal reg_bsn_monitor_v2_xst_rx_align_cipo : t_mem_cipo := c_mem_cipo_rst; + signal reg_bsn_monitor_v2_xst_aligned_copi_arr : t_mem_copi_arr(c_last_rn downto 0) := (others => c_mem_copi_rst); + signal reg_bsn_monitor_v2_xst_aligned_cipo_arr : t_mem_cipo_arr(c_last_rn downto 0) := (others => c_mem_cipo_rst); + signal reg_bsn_monitor_v2_xst_aligned_copi : t_mem_copi := c_mem_copi_rst; + signal reg_bsn_monitor_v2_xst_aligned_cipo : t_mem_cipo := c_mem_cipo_rst; + -- BSN aligner Monitor Points + signal FPGA_xst_rx_align_latency_R : t_crosslets_integer_2arr(c_last_rn downto 0); + signal FPGA_xst_aligned_latency_R : t_integer_arr(c_last_rn downto 0); +begin + dp_rst <= '1', '0' after c_dp_clk_period * 7; + dp_clk <= (not dp_clk) or tb_end after c_dp_clk_period / 2; + + mm_rst <= '1', '0' after c_mm_clk_period * 7; + mm_clk <= (not mm_clk) or tb_end after c_mm_clk_period / 2; + + -- Wait for tr_10GbE to be active + stimuli_rst <= '1', '0' after 15 us; + + SA_CLK <= not SA_CLK after c_sa_clk_period / 2; -- Serial Gigabit IO sa clock (644 MHz) + + -- Generate local crosslets stream, use same for all nodes + u_stimuli : entity dp_lib.dp_stream_stimuli + generic map ( + g_sync_period => c_nof_blocks_per_sync, + g_nof_repeat => c_nof_blocks_per_sync * g_nof_sync, + g_pkt_len => c_block_size, + g_pkt_gap => c_gap_size + ) + port map ( + rst => stimuli_rst, + clk => dp_clk, + -- Generate stimuli + src_out => stimuli_sosi, + -- End of stimuli + tb_end => stimuli_end + ); + + -- Use constant crosslet data to ease verification of aligned crosslet data at each node + p_local_crosslets_sosi : process(stimuli_sosi) + begin + for RN in 0 to c_last_rn loop + local_crosslets_sosi_arr(RN) <= stimuli_sosi; + local_crosslets_sosi_arr(RN).data <= TO_DP_SDATA(0); + -- different crosslets value (and /= 0) per node + local_crosslets_sosi_arr(RN).re <= TO_DP_DSP_DATA(RN * c_nof_complex + c_local_crosslet_re); -- odd + local_crosslets_sosi_arr(RN).im <= TO_DP_DSP_DATA(RN * c_nof_complex + c_local_crosslet_im); -- even + local_crosslets_sosi_arr(RN).channel <= TO_DP_CHANNEL(0); + local_crosslets_sosi_arr(RN).err <= TO_DP_ERROR(0); + end loop; + end process; + + xst_bs_sosi <= local_crosslets_sosi_arr(0); + + p_mm : process + -- MM access + variable v_span : natural; + variable v_span_node : natural; + variable v_offset : natural; + -- print_str() + constant c_nof_col : natural := 1 + g_nof_rn; + constant c_col_w : natural := 6; + constant c_line_w : natural := c_nof_col * c_col_w; + variable v_line : string(1 to c_line_w); + variable v_col : natural; + begin + proc_common_wait_until_low(dp_clk, mm_rst); + proc_common_wait_some_cycles(mm_clk, 10); + + proc_common_wait_cross_clock_domain_latency(c_mm_clk_period, c_dp_clk_period, + c_common_cross_clock_domain_latency * 2); + --------------------------------------------------------------------------- + -- Setup transport nof hops for each RN to c_nof_transport_hops + --------------------------------------------------------------------------- + -- Write FPGA_xst_ring_nof_transport_hops_RW = ring_lane_info.transport_nof_hops + v_span := 2**c_addr_w_reg_ring_lane_info_xst; + for RN in 0 to c_last_rn loop + v_offset := 1 + RN * v_span; + proc_mem_mm_bus_wr(v_offset, c_nof_transport_hops, mm_clk, + reg_ring_lane_info_xst_cipo, reg_ring_lane_info_xst_copi); + end loop; + proc_common_wait_cross_clock_domain_latency(c_mm_clk_period, c_dp_clk_period, + c_common_cross_clock_domain_latency * 2); + -- Readback FPGA_xst_ring_nof_transport_hops_R + for RN in 0 to c_last_rn loop + v_offset := 1 + RN * v_span; + proc_mem_mm_bus_rd(v_offset, mm_clk, reg_ring_lane_info_xst_cipo, reg_ring_lane_info_xst_copi); + proc_mem_mm_bus_rd_latency(1, mm_clk); + FPGA_xst_ring_nof_transport_hops_R(RN) <= TO_UINT(reg_ring_lane_info_xst_cipo.rddata(c_word_w - 1 downto 0)); + end loop; + + --------------------------------------------------------------------------- + -- Wait until second x_sosi.sync + --------------------------------------------------------------------------- + proc_common_wait_until_hi_lo(dp_clk, x_sosi.sync); + proc_common_wait_until_hi_lo(dp_clk, x_sosi.sync); + + --------------------------------------------------------------------------- + -- Read BSN monitors + --------------------------------------------------------------------------- + v_span := 2**c_sdp_reg_bsn_monitor_v2_addr_w; + -- Read FPGA_xst_ring_rx_latency_R + v_span_node := true_log_pow2(g_nof_rn) * v_span; + for RN in 0 to c_last_rn loop + for U in 0 to c_last_rn loop + v_offset := 6 + RN * v_span_node + U * v_span; + proc_mem_mm_bus_rd(v_offset, mm_clk, reg_bsn_monitor_v2_ring_rx_xst_cipo, reg_bsn_monitor_v2_ring_rx_xst_copi); + proc_mem_mm_bus_rd_latency(1, mm_clk); + FPGA_xst_ring_rx_latency_R(RN)(U) <= TO_SINT(reg_bsn_monitor_v2_ring_rx_xst_cipo.rddata(c_word_w - 1 downto 0)); + end loop; + end loop; + -- Read FPGA_xst_rx_align_latency_R, for c_P_sq inputs per RN + v_span_node := true_log_pow2(c_P_sq) * v_span; + for RN in 0 to c_last_rn loop + for P in 0 to c_P_sq - 1 loop + v_offset := 6 + RN * v_span_node + P * v_span; + proc_mem_mm_bus_rd(v_offset, mm_clk, reg_bsn_monitor_v2_xst_rx_align_cipo, reg_bsn_monitor_v2_xst_rx_align_copi); + proc_mem_mm_bus_rd_latency(1, mm_clk); + FPGA_xst_rx_align_latency_R(RN)(P) <= TO_SINT(reg_bsn_monitor_v2_xst_rx_align_cipo.rddata(c_word_w - 1 downto 0)); + end loop; + end loop; + -- Read FPGA_xst_aligned_latency_R + for RN in 0 to c_last_rn loop + v_offset := 6 + RN * v_span; + proc_mem_mm_bus_rd(v_offset, mm_clk, reg_bsn_monitor_v2_xst_aligned_cipo, reg_bsn_monitor_v2_xst_aligned_copi); + proc_mem_mm_bus_rd_latency(1, mm_clk); + FPGA_xst_aligned_latency_R(RN) <= TO_SINT(reg_bsn_monitor_v2_xst_aligned_cipo.rddata(c_word_w - 1 downto 0)); + end loop; + -- Read FPGA_xst_ring_tx_latency_R + v_span_node := true_log_pow2(g_nof_rn) * v_span; + for RN in 0 to c_last_rn loop + for U in 0 to c_last_rn loop + v_offset := 6 + RN * v_span_node + U * v_span; + proc_mem_mm_bus_rd(v_offset, mm_clk, reg_bsn_monitor_v2_ring_tx_xst_cipo, reg_bsn_monitor_v2_ring_tx_xst_copi); + proc_mem_mm_bus_rd_latency(1, mm_clk); + FPGA_xst_ring_tx_latency_R(RN)(U) <= TO_SINT(reg_bsn_monitor_v2_ring_tx_xst_cipo.rddata(c_word_w - 1 downto 0)); + end loop; + end loop; + + --------------------------------------------------------------------------- + -- Wait until end of simulation + --------------------------------------------------------------------------- + mm_init <= '0'; + + proc_common_wait_until_high(dp_clk, stimuli_end); + proc_common_wait_some_cycles(dp_clk, 1000); + + --------------------------------------------------------------------------- + -- Print latency results + --------------------------------------------------------------------------- + print_str("c_cable_delay = " & int_to_str(c_nof_delay) & " * 6.4 ns"); + print_str(""); + print_str("FPGA_xst_ring_rx_latency_R:"); + for RN in 0 to c_last_rn loop + v_line := (others => ' '); + -- ring node index + v_line(1 to c_col_w - 2) := int_to_str(RN, c_col_w - 2); + v_line(c_col_w - 1) := ':'; + -- latency values + for U in 0 to c_last_rn loop + v_col := 1 + U; + v_line(1 + v_col * c_col_w to (v_col + 1) * c_col_w) := + int_to_str(FPGA_xst_ring_rx_latency_R(RN)(U), c_col_w); + end loop; + print_str(v_line); + end Loop; + print_str(""); + + print_str("FPGA_xst_ring_tx_latency_R:"); + for RN in 0 to c_last_rn loop + v_line := (others => ' '); + -- ring node index + v_line(1 to c_col_w - 2) := int_to_str(RN, c_col_w - 2); + v_line(c_col_w - 1) := ':'; + -- latency values + for U in 0 to c_last_rn loop + v_col := 1 + U; + v_line(1 + v_col * c_col_w to (v_col + 1) * c_col_w) := + int_to_str(FPGA_xst_ring_tx_latency_R(RN)(U), c_col_w); + end loop; + print_str(v_line); + end Loop; + print_str(""); + + print_str("FPGA_xst_rx_align_latency_R:"); + for RN in 0 to c_last_rn loop + v_line := (others => ' '); + -- ring node index + v_line(1 to c_col_w - 2) := int_to_str(RN, c_col_w - 2); + v_line(c_col_w - 1) := ':'; + -- latency values + for U in 0 to c_P_sq - 1 loop + v_col := 1 + U; + v_line(1 + v_col * c_col_w to (v_col + 1) * c_col_w) := + int_to_str(FPGA_xst_rx_align_latency_R(RN)(U), c_col_w); + end loop; + print_str(v_line); + end Loop; + print_str(""); + + print_str("FPGA_xst_aligned_latency_R:"); + for RN in 0 to c_last_rn loop + print_str(int_to_str(RN) & ": " & int_to_str(FPGA_xst_aligned_latency_R(RN))); + end Loop; + print_str(""); + + tb_end <= '1'; + wait; + end process; + + -- End the tb simulation + proc_common_timeout_failure(c_tb_timeout, tb_end); -- ERROR: end simulation if it fails to end in time + proc_common_stop_simulation(tb_end); -- OK: end simulation + + ------------------------------------------------------------------------------ + -- DUT + ------------------------------------------------------------------------------ + gen_dut : for RN in 0 to c_last_rn generate + -- Connect ring wires between the nodes + wire_ring : if RN > 0 generate + tr_10gbe_ring_serial_rx_arr(RN) <= transport tr_10gbe_ring_serial_tx_arr(RN - 1) after c_cable_delay; + end generate; + close_ring : if RN = 0 generate + tr_10gbe_ring_serial_rx_arr(0) <= transport tr_10gbe_ring_serial_tx_arr(c_last_rn) after c_cable_delay; + end generate; + + -- tr_10GbE access at each node, all via front_io QSFP[0] + u_tr_10GbE_ring: entity tr_10GbE_lib.tr_10GbE + generic map ( + g_sim => true, + g_sim_level => 1, + g_nof_macs => 1, + g_direction => "TX_RX", + g_tx_fifo_fill => c_fifo_tx_fill_ring, + g_tx_fifo_size => c_fifo_tx_size_ring + ) + port map ( + -- Transceiver PLL reference clock + tr_ref_clk_644 => SA_CLK, + tr_ref_clk_312 => tr_ref_clk_312, + tr_ref_clk_156 => tr_ref_clk_156, + tr_ref_rst_156 => tr_ref_rst_156, + + -- MM interface + mm_rst => mm_rst, + mm_clk => mm_clk, + + reg_mac_mosi => c_mem_copi_rst, + reg_mac_miso => open, + reg_eth10g_mosi => c_mem_copi_rst, + reg_eth10g_miso => open, + + -- DP interface + dp_rst => dp_rst, + dp_clk => dp_clk, + + src_out_arr => tr_10gbe_ring_rx_sosi_arr(RN downto RN), + snk_in_arr => tr_10gbe_ring_tx_sosi_arr(RN downto RN), + + -- Serial IO + serial_tx_arr => tr_10gbe_ring_serial_tx_arr(RN downto RN), + serial_rx_arr => tr_10gbe_ring_serial_rx_arr(RN downto RN) + ); + + -- Ring lane access at each node + u_ring_lane_xst : entity ring_lib.ring_lane + generic map ( + g_lane_direction => 1, -- transport in positive RN direction. + g_lane_data_w => c_longword_w, + g_lane_packet_length => c_lane_payload_nof_longwords_xst, + g_lane_total_nof_packets_w => 32, + g_use_dp_layer => true, + g_nof_rx_monitors => g_nof_rn, + g_nof_tx_monitors => g_nof_rn, + g_err_bi => c_err_bi, + g_nof_err_counts => c_nof_err_counts, + g_bsn_at_sync_check_channel => c_bsn_at_sync_check_channel, + g_validate_channel => c_validate_channel, + g_validate_channel_mode => c_validate_channel_mode, + g_sync_timeout => c_sync_timeout + ) + port map ( + mm_rst => mm_rst, + mm_clk => mm_clk, + dp_clk => dp_clk, + dp_rst => dp_rst, + + from_lane_sosi => from_ri_sosi_arr(RN), + to_lane_sosi => to_ri_sosi_arr(RN), + lane_rx_cable_sosi => tr_10gbe_ring_rx_sosi_arr(RN), + lane_rx_board_sosi => c_dp_sosi_rst, + lane_tx_cable_sosi => tr_10gbe_ring_tx_sosi_arr(RN), + lane_tx_board_sosi => open, + bs_sosi => xst_bs_sosi, -- used for bsn and sync + + reg_ring_lane_info_copi => reg_ring_lane_info_xst_copi_arr(RN), + reg_ring_lane_info_cipo => reg_ring_lane_info_xst_cipo_arr(RN), + reg_bsn_monitor_v2_ring_rx_copi => reg_bsn_monitor_v2_ring_rx_xst_copi_arr(RN), + reg_bsn_monitor_v2_ring_rx_cipo => reg_bsn_monitor_v2_ring_rx_xst_cipo_arr(RN), + reg_bsn_monitor_v2_ring_tx_copi => reg_bsn_monitor_v2_ring_tx_xst_copi_arr(RN), + reg_bsn_monitor_v2_ring_tx_cipo => reg_bsn_monitor_v2_ring_tx_xst_cipo_arr(RN), + reg_dp_block_validate_err_copi => reg_dp_block_validate_err_xst_copi_arr(RN), + reg_dp_block_validate_err_cipo => reg_dp_block_validate_err_xst_cipo_arr(RN), + reg_dp_block_validate_bsn_at_sync_copi => reg_dp_block_validate_bsn_at_sync_xst_copi_arr(RN), + reg_dp_block_validate_bsn_at_sync_cipo => reg_dp_block_validate_bsn_at_sync_xst_cipo_arr(RN), + + this_rn => to_uvec(RN, c_byte_w), + N_rn => to_uvec(g_nof_rn, c_byte_w), + rx_select => c_use_cable, + tx_select => c_use_cable + ); + + -- Intermediate crosslets alignment at each node + u_sdp_crosslets_remote : entity work.sdp_crosslets_remote_v2 + generic map ( + g_P_sq => c_P_sq + ) + port map ( + dp_clk => dp_clk, + dp_rst => dp_rst, + + xsel_sosi => local_crosslets_sosi_arr(RN), + from_ri_sosi => from_ri_sosi_arr(RN), + to_ri_sosi => to_ri_sosi_arr(RN), + + crosslets_sosi => crosslets_sosi_arr(RN), + crosslets_copi => crosslets_copi_arr(RN), + crosslets_cipo_arr => crosslets_cipo_2arr(RN), + + mm_rst => mm_rst, + mm_clk => mm_clk, + + reg_bsn_align_copi => reg_bsn_align_v2_xst_copi_arr(RN), + reg_bsn_align_cipo => reg_bsn_align_v2_xst_cipo_arr(RN), + reg_bsn_monitor_v2_bsn_align_input_copi => reg_bsn_monitor_v2_xst_rx_align_copi_arr(RN), + reg_bsn_monitor_v2_bsn_align_input_cipo => reg_bsn_monitor_v2_xst_rx_align_cipo_arr(RN), + reg_bsn_monitor_v2_bsn_align_output_copi => reg_bsn_monitor_v2_xst_aligned_copi_arr(RN), + reg_bsn_monitor_v2_bsn_align_output_cipo => reg_bsn_monitor_v2_xst_aligned_cipo_arr(RN) + ); + + -- MM -> DP + u_st_xsq_mm_to_dp : entity st_lib.st_xsq_mm_to_dp + generic map( + g_nof_streams => c_P_sq, + g_nof_crosslets => c_sdp_N_crosslets_max, + g_nof_signal_inputs => c_sdp_S_pn, + g_dsp_data_w => c_sdp_W_crosslet + ) + port map( + rst => dp_rst, + clk => dp_clk, + in_sosi => crosslets_sosi_arr(RN), + mm_mosi => crosslets_copi_arr(RN), + mm_miso_arr => crosslets_cipo_2arr(RN), + out_sosi_arr => x_sosi_2arr(RN) + ); + end generate; -- gen_dut + + -- View status of x_sosi_2arr + p_x_sosi_2arr : process(x_sosi_2arr) + begin + for RN in 0 to c_last_rn loop + -- Group all x_sosi_2arr valids into one slv + for P in 0 to c_P_sq - 1 loop + x_sosi_2arr_valids(RN * c_P_sq + P) <= x_sosi_2arr(RN)(P).valid; + end loop; + + -- Group aligned first output from all RN + x_sosi_arr(RN) <= x_sosi_2arr(RN)(0); + end loop; + + -- Get aligned first output from first RN + x_sosi <= x_sosi_2arr(0)(0); + end process; + + ------------------------------------------------------------------------------ + -- Verify crosslets at every node, to check that no packets were lost + ------------------------------------------------------------------------------ + p_verify_crosslets : process(dp_clk) + begin + -- Verify that data /= 0, so no lost data = 0 insertion + for RN in 0 to c_last_rn Loop + for P in 0 to c_P_sq - 1 loop + if x_sosi_2arr(RN)(P).valid = '1' then + assert TO_SINT(x_sosi_2arr(RN)(P).re) /= 0 report "Wrong crosslet re at node " & int_to_str(RN) severity error; + assert TO_SINT(x_sosi_2arr(RN)(P).im) /= 0 report "Wrong crosslet im at node " & int_to_str(RN) severity error; + end if; + end loop; + end loop; + + -- Verify that all aligned outputs on all RN are valid at the same time + if x_sosi.valid = '1' then + assert vector_and(x_sosi_2arr_valids) = '1' report "Missing aligned output valid" severity error; + else + assert vector_and(x_sosi_2arr_valids) = '0' report "Unexpected aligned output valid" severity error; + end if; + end process; + + ------------------------------------------------------------------------------ + -- 10GbE clocks + ------------------------------------------------------------------------------ + u_tech_pll_xgmii_mac_clocks : entity tech_pll_lib.tech_pll_xgmii_mac_clocks + port map ( + refclk_644 => SA_CLK, + rst_in => mm_rst, + clk_156 => tr_ref_clk_156, + clk_312 => tr_ref_clk_312, + rst_156 => tr_ref_rst_156, + rst_312 => open + ); + + ------------------------------------------------------------------------------ + -- MM bus multiplexers + ------------------------------------------------------------------------------ + -- Use common_mem_mux to avoid (vcom-1450) Actual (indexed name) for formal "mm_miso" is not a static signal name. + -- Use downto range for _arr, to match downto range of mosi_arr. + u_mem_mux_reg_ring_lane_info_xst : entity common_lib.common_mem_mux + generic map ( + g_nof_mosi => g_nof_rn, + g_mult_addr_w => c_addr_w_reg_ring_lane_info_xst + ) + port map ( + mosi => reg_ring_lane_info_xst_copi, + miso => reg_ring_lane_info_xst_cipo, + mosi_arr => reg_ring_lane_info_xst_copi_arr, + miso_arr => reg_ring_lane_info_xst_cipo_arr + ); + + u_mem_mux_reg_bsn_monitor_v2_ring_rx_xst : entity common_lib.common_mem_mux + generic map ( + g_nof_mosi => g_nof_rn, + g_mult_addr_w => c_sdp_reg_bsn_monitor_v2_addr_w + ceil_log2(g_nof_rn) + ) + port map ( + mosi => reg_bsn_monitor_v2_ring_rx_xst_copi, + miso => reg_bsn_monitor_v2_ring_rx_xst_cipo, + mosi_arr => reg_bsn_monitor_v2_ring_rx_xst_copi_arr, + miso_arr => reg_bsn_monitor_v2_ring_rx_xst_cipo_arr + ); + + u_mem_mux_reg_bsn_monitor_v2_ring_tx_xst : entity common_lib.common_mem_mux + generic map ( + g_nof_mosi => g_nof_rn, + g_mult_addr_w => c_sdp_reg_bsn_monitor_v2_addr_w + ceil_log2(g_nof_rn) + ) + port map ( + mosi => reg_bsn_monitor_v2_ring_tx_xst_copi, + miso => reg_bsn_monitor_v2_ring_tx_xst_cipo, + mosi_arr => reg_bsn_monitor_v2_ring_tx_xst_copi_arr, + miso_arr => reg_bsn_monitor_v2_ring_tx_xst_cipo_arr + ); + + u_mem_mux_reg_bsn_monitor_v2_xst_rx_align : entity common_lib.common_mem_mux + generic map ( + g_nof_mosi => g_nof_rn, + g_mult_addr_w => c_sdp_reg_bsn_monitor_v2_addr_w + ceil_log2(c_P_sq) + ) + port map ( + mosi => reg_bsn_monitor_v2_xst_rx_align_copi, + miso => reg_bsn_monitor_v2_xst_rx_align_cipo, + mosi_arr => reg_bsn_monitor_v2_xst_rx_align_copi_arr, + miso_arr => reg_bsn_monitor_v2_xst_rx_align_cipo_arr + ); + + u_mem_mux_reg_bsn_monitor_v2_xst_aligned : entity common_lib.common_mem_mux + generic map ( + g_nof_mosi => g_nof_rn, + g_mult_addr_w => c_sdp_reg_bsn_monitor_v2_addr_w + ) + port map ( + mosi => reg_bsn_monitor_v2_xst_aligned_copi, + miso => reg_bsn_monitor_v2_xst_aligned_cipo, + mosi_arr => reg_bsn_monitor_v2_xst_aligned_copi_arr, + miso_arr => reg_bsn_monitor_v2_xst_aligned_cipo_arr + ); + + u_mem_mux_reg_bsn_align_v2_xst : entity common_lib.common_mem_mux + generic map ( + g_nof_mosi => g_nof_rn, + g_mult_addr_w => c_sdp_reg_bsn_align_v2_addr_w + ) + port map ( + mosi => reg_bsn_align_v2_xst_copi, + miso => reg_bsn_align_v2_xst_cipo, + mosi_arr => reg_bsn_align_v2_xst_copi_arr, + miso_arr => reg_bsn_align_v2_xst_cipo_arr + ); +end tb; diff --git a/libraries/base/common/src/vhdl/common_str_pkg.vhd b/libraries/base/common/src/vhdl/common_str_pkg.vhd index e309a4d10a00d1432c3c166ae1b0bc0174f251c9..8aad8788922126a200c54d4bbfc735cb55ef384f 100644 --- a/libraries/base/common/src/vhdl/common_str_pkg.vhd +++ b/libraries/base/common/src/vhdl/common_str_pkg.vhd @@ -53,6 +53,7 @@ package common_str_pkg is function hex_nibble_to_slv(c: character) return std_logic_vector; function int_to_str(int: integer) return string; + function int_to_str(int, w: integer) return string; function real_to_str(re: real; width : integer; digits : integer) return string; procedure print_str(str : string); @@ -222,10 +223,9 @@ package body common_str_pkg is when 'X' => v_result := "XXXX"; when 'z' => v_result := "ZZZZ"; when 'Z' => v_result := "ZZZZ"; - - when others => v_result := "0000"; - end case; - return v_result; + when others => v_result := "0000"; + end case; + return v_result; end hex_nibble_to_slv; function int_to_str(int: integer) return string is @@ -238,6 +238,19 @@ package body common_str_pkg is return v_str; end; + function int_to_str(int, w: integer) return string is + constant c_len: natural := nof_digits_int(int); + variable v_line: LINE; + variable v_str: string(1 to c_len) := (others => ' '); + variable v_ret: string(1 to w) := (others => ' '); + begin + STD.TEXTIO.WRITE(v_line, int); + v_str(v_line.ALL'range) := v_line.all; + deallocate(v_line); + v_ret(w - c_len + 1 to w) := v_str; -- right align v_str in v_ret + return v_ret; + end; + function real_to_str(re: real; width : integer; digits : integer) return string is -- . The number length is width + 1, with +1 for the . in the floating point number. -- However if width is too small to fit the number, then it will use more characters. diff --git a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd index 9042f050f7797b53fec3194857d70a0b6aac009c..61db3174e6f4f846ff445cecef3a07421177f3b2 100644 --- a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd +++ b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd @@ -20,14 +20,43 @@ -- Purpose : -- Align frames from multiple input streams -- Description: --- The aligner uses a circular buffer to capture the blocks that arrive at --- the input streams. The blocks have a block sequence number (BSN) that --- is used to align the inputs. The input stream 0 is treated as local --- input stream that is ahead of the other remote input streams. After a --- certain number of blocks on input 0, the same block on all remote --- inputs should also have arrived. If not then they are replaced by --- replacement data. The output streams are paced by the block rate of --- input 0. The user has to read the block within the block period. +-- Aligner: +-- . The aligner uses a circular buffer to capture the blocks that arrive at +-- the input streams. The blocks have a block sequence number (BSN) that +-- is used to align the inputs. The input stream 0 is treated as local +-- input stream that is ahead of the other remote input streams. After a +-- certain number of blocks on input 0, the same block on all remote +-- inputs should also have arrived. If not then they are replaced by +-- replacement data. The output streams are paced by the block rate of +-- input 0. The user has to read the block within the block period. +-- . The aligner can align g_nof_streams that all arrive within a latency +-- of g_bsn_latency_max after the local stream at index 0. The aligner +-- can also be used in a chain of aligners, whereby each aligner typically +-- has the local input and one remote input and the remote input is the +-- output of an upstream aligner. Then the latency on the last node in +-- the chain will be within g_nof_aligners_max * g_bsn_latency_max. +-- +-- Circular buffer: +-- . The size of the circular buffer is c_buffer_nof_blocks and depends on +-- the maximum latency. The c_buffer_nof_blocks has to a power of two to +-- ease the control of the circular buffer. The lowest bits of the input +-- block sequence number (BSN) are used as write block index into the +-- circular buffer. The g_bsn_latency_first_node can be useful to reduce +-- the required circular buffer size just enough, such that the next power +-- of two is only a few blocks larger, instead of almost a factor two +-- larger. This then can save a significant amount of block RAM. +-- For example: The circular buffer size c_buffer_nof_blocks is 1 + the +-- sum of bsn latencies at each node. Therefor if g_nof_aligners_max = 16 +-- (a power of two) and g_bsn_latency_max = 2, then the circular buffer +-- becomes true_log_pow2(1 + 16 * 2) = 64 blocks, so almost twice as large +-- as needed. If the first input stream does not have active remote input, +-- or is disabled via stream_en_arr, then choose g_bsn_latency_first_node +-- = 1, to get a buffer size of true_log_pow2(1 + 15 * 2 + 1) = 32 blocks. +-- . In case of a chain of aligners then the circular buffer size depends on +-- the latency of local input. The most remote input will only use a +-- fraction of the buffer. Therefore more block RAM can be saved by using +-- a smaller circular buffer size for signal inputs that are from more +-- remote (i.e. that have passed through more upstream aligners). -- -- Features: -- . The g_block_size <= block period, so supports input blocks arriving @@ -35,8 +64,44 @@ -- . uses replacement data to replace lost input blocks and channel bit 0 as -- lost_data flag -- . uses replacement data to replace disabled input streams --- . output block can be read in arbitrary order via g_use_mm_output = TRUE --- . output block can be streamed via g_use_mm_output = FALSE +-- . output block can be read in arbitrary order via g_use_mm_output = true +-- . output block can be streamed via g_use_mm_output = false +-- +-- Parameters: +-- . g_nof_streams: number of input and output streams. Stream index 0 is +-- the local stream. Streams index > 0 is for remote streams. The +-- remote streams arrive later than the local stream, but within +-- g_bsn_latency_max or within an integer multiple of g_bsn_latency_max. +-- . g_bsn_latency_max: >= 1, maximum travel latency of a remote block in +-- number of block periods T_blk. +-- . g_bsn_latency_first_node: typically <= g_bsn_latency_max of the other +-- nodes in a chain. Use g_bsn_latency_first_node = 0 for immediate +-- output from first node in a chain of nodes. Only used when +-- g_nof_aligners_max > 1. The g_bsn_latency_first_node setting only +-- affects the latency along the chain, and therefore the required +-- size of the circular buffer. If the circular buffer is large enough +-- anyway, then the g_bsn_latency_first_node setting is don't care, +-- assuming that a little extra latency is don't care. +-- . g_nof_aligners_max: Number of dp_bsn_align_v2 aligners in a chain. +-- = 1 when only align at last node, or +-- > 1 when align at every intermediate node in a chain of nodes, and then +-- g_nof_aligners_max should equal the number of nodes for +-- chain_node_index range. The g_nof_aligners_max is the number of +-- nodes in the chain including the first node. +-- +-- Inputs: +-- . chain_node_index: Node index in chain of nodes. First node has index 0. +-- In case of a ring of nodes the chain of nodes can range the whole ring, +-- or only a part of the ring. The number of nodes in the chain is given +-- by g_nof_aligners_max. Only used when g_nof_aligners_max > 1. +-- . stream_en_arr: when '1' then align corresponding input stream, else +-- replace data from corresponding inut stream by 0 and do not raise the +-- lost data flag. Whether a stream is enabled or not has no effect on the +-- aligner timing, it only sets the data to 0. +-- +-- Outputs: +-- . replace_cnt_en_arr: count number of lost data blocks per input stream, +-- that got replaced by 0 value, per sync interval. -- -- For more detailed description see: -- https://support.astron.nl/confluence/display/L2M/L6+FWLIB+Design+Document%3A+BSN+aligner+v2 @@ -46,6 +111,13 @@ -- APERTIF. Main differences are that the old component uses FIFO buffers, -- timeouts and states, and v2 does not, which makes v2 simpler and more -- robust. +-- . The g_bsn_latency_first_node = 0 should also be feasible, but does not +-- work and is not investigated further, because g_bsn_latency_first_node = +-- 1 in combination with g_bsn_latency_max = 2 is sufficient to reduce the +-- circular buffer size when g_nof_aligners_max is a power of two. +-- . Using a circular buffer with optimum size, that does not have to have a +-- power of two number of blocks, makes the circular buffer control and +-- access more complicated and is not investigated further. library IEEE,common_lib; use IEEE.std_logic_1164.all; @@ -57,22 +129,23 @@ use work.dp_stream_pkg.all; entity dp_bsn_align_v2 is generic ( g_nof_streams : natural := 2; -- >= 2, number of input and output streams - g_bsn_latency_max : natural := 2; -- maximum travel latency of a remote block in number of block periods T_blk - g_nof_aligners_max : positive := 16; -- 1 when only align at last node, > 1 when align at every intermediate node + g_bsn_latency_max : natural := 2; -- >= 1 + g_bsn_latency_first_node : natural := 2; -- default use same as g_bsn_latency_max + g_nof_aligners_max : positive := 16; g_block_size : natural := 1024; -- > 1, g_block_size=1 is not supported g_bsn_w : natural := c_dp_stream_bsn_w; -- number of bits in sosi BSN g_data_w : natural := 36; -- number of bits in sosi data g_data_replacement_value : integer := 0; -- output sosi data value for missing input blocks g_use_mm_output : boolean := false; -- output via MM or via streaming DP - g_pipeline_input : natural := 1; -- >= 0, choose 0 for wires, choose 1 to ease timing closure of in_sosi_arr - g_pipeline_output : natural := 1; -- >= 0, choose 0 for wires, choose 1 to ease timing closure of out_sosi_arr + g_pipeline_input : natural := 1; -- >= 0, 0 for wires, 1 to ease timing closure of in_sosi_arr + g_pipeline_output : natural := 1; -- >= 0, 0 for wires, 1 to ease timing closure of out_sosi_arr g_rd_latency : natural := 2 -- 1 or 2, choose 2 to ease timing closure ); port ( dp_rst : in std_logic; dp_clk : in std_logic; - node_index : in natural range 0 to g_nof_aligners_max - 1 := 0; -- only used when g_nof_aligners_max > 1 + chain_node_index : in natural range 0 to g_nof_aligners_max - 1 := 0; -- MM control stream_en_arr : in std_logic_vector(g_nof_streams - 1 downto 0) := (others => '1'); @@ -81,19 +154,23 @@ entity dp_bsn_align_v2 is -- Streaming input in_sosi_arr : in t_dp_sosi_arr(g_nof_streams - 1 downto 0); - -- Output via local MM interface in dp_clk domain, when g_use_mm_output = TRUE. - mm_sosi : out t_dp_sosi; -- streaming information that signals that an output block can be read - mm_copi : in t_mem_copi := c_mem_copi_rst; -- read access to output block, all output streams share same mm_copi + -- Output via local MM interface in dp_clk domain, when g_use_mm_output = true + -- . streaming information that signals that an output block can be read + mm_sosi : out t_dp_sosi; + -- . MM read access to output block, all output streams share same mm_copi + mm_copi : in t_mem_copi := c_mem_copi_rst; mm_cipo_arr : out t_mem_cipo_arr(g_nof_streams - 1 downto 0); - -- Output via streaming DP interface, when g_use_mm_output = FALSE. + -- Output via streaming DP interface, when g_use_mm_output = false. out_sosi_arr : out t_dp_sosi_arr(g_nof_streams - 1 downto 0) ); end dp_bsn_align_v2; architecture rtl of dp_bsn_align_v2 is - -- Circular buffer per stream, size is next power of 2 that fits - constant c_buffer_nof_blocks : natural := true_log_pow2(1 + g_nof_aligners_max * g_bsn_latency_max); + -- Circular buffer per stream, size is next power of two that fits + constant c_buffer_nof_blocks : natural := sel_a_b(g_nof_aligners_max = 1, + true_log_pow2(1 + g_bsn_latency_max), + true_log_pow2(1 + g_bsn_latency_max * (g_nof_aligners_max - 1) + g_bsn_latency_first_node)); constant c_ram_size : natural := c_buffer_nof_blocks * g_block_size; constant c_ram_buf : t_c_mem := (latency => 1, @@ -102,7 +179,7 @@ architecture rtl of dp_bsn_align_v2 is nof_dat => c_ram_size, init_sl => '0'); - -- Use +1 to ensure that g_block_size that is power of 2 also fits in c_block_size_slv + -- Use +1 to ensure that g_block_size that is power of two also fits in c_block_size_slv constant c_block_size_w : natural := ceil_log2(g_block_size + 1); constant c_block_size_slv : std_logic_vector(c_block_size_w - 1 downto 0) := TO_UVEC(g_block_size, c_block_size_w); constant c_blk_pointer_w : natural := ceil_log2(c_buffer_nof_blocks); @@ -121,6 +198,7 @@ architecture rtl of dp_bsn_align_v2 is -- State type t_reg is record + ref_sosi : t_dp_sosi; -- p_write_arr wr_blk_pointer : natural; wr_copi_arr : t_mem_copi_arr(g_nof_streams - 1 downto 0); @@ -136,9 +214,12 @@ architecture rtl of dp_bsn_align_v2 is rd_blk_pointer : integer; -- use integer to detect need to wrap to natural rd_offset : std_logic_vector(c_ram_buf.adr_w - 1 downto 0); rd_copi : t_mem_copi; - fill_cipo_arr : t_mem_cipo_arr(g_nof_streams - 1 downto 0); -- used combinatorial to contain rd_cipo_arr from buffer or replacement data - out_bsn : std_logic_vector(g_bsn_w - 1 downto 0); -- hold BSN until next sop, for easy view in Wave window - out_channel_arr : t_channel_arr(g_nof_streams - 1 downto 0); -- hold channel until next sop per stream, for easy view in Wave window + fill_cipo_arr : t_mem_cipo_arr(g_nof_streams - 1 downto 0); -- used combinatorial to contain rd_cipo_arr + -- from buffer or replacement data + out_bsn : std_logic_vector(g_bsn_w - 1 downto 0); -- hold BSN until next sop, for easy view in Wave + -- window + out_channel_arr : t_channel_arr(g_nof_streams - 1 downto 0); -- hold channel until next sop per stream, for + -- easy view in Wave window replace_cnt_en_arr : std_logic_vector(g_nof_streams - 1 downto 0); end record; @@ -146,16 +227,16 @@ architecture rtl of dp_bsn_align_v2 is -- . For unique representation as signal wire, the p_comb should assign each -- field in t_comb only once to a variable. It is allowed to reasign a -- t_comb variable in p_comb, but then only the last assignment value will - -- be visible via the signal dbg_wires in the Wave window. + -- be visible via the signal w_comb in the Wave window. type t_comb is record - ref_sosi : t_dp_sosi; blk_pointer_slv : std_logic_vector(c_blk_pointer_w - 1 downto 0); product_slv : std_logic_vector(c_product_w - 1 downto 0); lost_data_flags_arr : std_logic_vector(g_nof_streams - 1 downto 0); out_sosi_arr : t_dp_sosi_arr(g_nof_streams - 1 downto 0); end record; - constant c_reg_rst : t_reg := (0, + constant c_reg_rst : t_reg := (c_dp_sosi_rst, + 0, (others => c_mem_copi_rst), (others => (others => '0')), (others => '0'), @@ -171,18 +252,17 @@ architecture rtl of dp_bsn_align_v2 is (others => (others => '0')), (others => '0')); - constant c_comb_rst : t_comb := (c_dp_sosi_rst, - (others => '0'), - (others => '0'), - (others => '0'), - (others => c_dp_sosi_rst)); + constant c_comb_rst : t_comb := ((others => '0'), + (others => '0'), + (others => '0'), + (others => c_dp_sosi_rst)); -- State registers for p_comb signal r : t_reg; signal nxt_r : t_reg; -- Memoryless signals in p_comb (wires used as local variables) - signal dbg_wires : t_comb; + signal w_comb : t_comb; -- Structural signals (wires used to connect components and IO) signal dp_done : std_logic; @@ -199,7 +279,6 @@ architecture rtl of dp_bsn_align_v2 is signal comb_out_sosi_arr : t_dp_sosi_arr(g_nof_streams - 1 downto 0); -- Counter signals - signal replace_cnt_arr : t_slv_32_arr(g_nof_streams - 1 downto 0); signal nxt_hold_replace_cnt_arr : t_slv_32_arr(g_nof_streams - 1 downto 0); signal hold_replace_cnt_arr : t_slv_32_arr(g_nof_streams - 1 downto 0); @@ -230,7 +309,7 @@ begin end if; end process; - p_comb : process(r, in_sosi_arr_p, mm_copi, dp_copi, rd_cipo_arr, rd_sosi_arr, stream_en_arr, node_index) + p_comb : process(r, in_sosi_arr_p, mm_copi, dp_copi, rd_cipo_arr, rd_sosi_arr, stream_en_arr, chain_node_index) variable v : t_reg; -- State variable variable w : t_comb; -- Local wires = memoryless auxiliary variables begin @@ -264,23 +343,32 @@ begin end if; end loop; - ---------------------------------------------------------------------------- + --------------------------------------------------------------------------- -- p_control, all at sop of local reference input 0 - ---------------------------------------------------------------------------- - w.ref_sosi := in_sosi_arr_p(0); - if w.ref_sosi.sop = '1' then + --------------------------------------------------------------------------- + v.ref_sosi := in_sosi_arr_p(0); + -- Use v.ref_sosi.sop instead of r.ref_sosi.sop, to support alignment of + -- streams that have no data valid gap between blocks, so when + -- g_block_size is equal to the block period or when shorter blocks have + -- jitter in arrival time that could cause two blocks to arrive without a + -- gap. + if v.ref_sosi.sop = '1' then -- . write sync & bsn buffer - v.wr_blk_pointer := TO_UINT(w.ref_sosi.bsn(c_blk_pointer_w - 1 downto 0)); - v.sync_arr(v.wr_blk_pointer) := w.ref_sosi.sync; - v.bsn_arr(v.wr_blk_pointer) := w.ref_sosi.bsn(g_bsn_w - 1 downto 0); + v.wr_blk_pointer := TO_UINT(v.ref_sosi.bsn(c_blk_pointer_w - 1 downto 0)); + v.sync_arr(v.wr_blk_pointer) := v.ref_sosi.sync; + v.bsn_arr(v.wr_blk_pointer) := v.ref_sosi.bsn(g_bsn_w - 1 downto 0); -- . update read block pointer at g_bsn_latency_max blocks behind the - -- reference write pointer, dependent on the node_index. For - -- g_bsn_latency_max = 1 the node_index = 0 fixed. For - -- g_bsn_latency_max > 1, node_index is the first BSN aligner in a - -- chain. Each subsequent node in the chain then has to account for - -- g_bsn_latency_max additional block latency. - v.rd_blk_pointer := v.wr_blk_pointer - g_bsn_latency_max * (1 + node_index); + -- reference write pointer, dependent on the chain_node_index: + -- - for g_nof_aligners_max = 1 the chain_node_index = 0 fixed + -- - for g_nof_aligners_max > 1, chain_node_index = 0 is the first BSN + -- aligner in a chain. Each subsequent node in the chain then has to + -- account for g_bsn_latency_max additional block latency. + if g_nof_aligners_max = 1 then + v.rd_blk_pointer := v.wr_blk_pointer - g_bsn_latency_max; + else + v.rd_blk_pointer := v.wr_blk_pointer - g_bsn_latency_max * chain_node_index - g_bsn_latency_first_node; + end if; if v.rd_blk_pointer < 0 then v.rd_blk_pointer := v.rd_blk_pointer + c_buffer_nof_blocks; end if; @@ -291,6 +379,10 @@ begin v.rd_offset := RESIZE_UVEC(w.product_slv, c_ram_buf.adr_w); -- . issue mm_sosi, if there is output ready to be read, indicated by filled reference block + -- - can use 'if r.filled_arr(0)' instead of 'if v.filled_arr(0)', + -- because input stream 0 arrives first, so is already filled + -- - need to use 'not v.filled_arr(I)' for w.lost_data_flags_arr(I), + -- because last input I = g_nof_streams - 1 may just got filled. if r.filled_arr(0)(v.rd_blk_pointer) = '1' then v.mm_sosi.sop := '1'; v.mm_sosi.eop := '1'; @@ -339,7 +431,7 @@ begin -- Do the output via the MM interface -------------------------------------------------------------------------- -- . adjust the rd address to the current buffer output block - -- sum yields c_ram_buf.adr_w bits, because left operand in ADD_UVECdetermines width + -- sum yields c_ram_buf.adr_w bits, because left operand in ADD_UVEC determines width v.rd_copi := mm_copi; v.rd_copi.address := RESIZE_MEM_ADDRESS(ADD_UVEC(r.rd_offset, mm_copi.address)); @@ -353,7 +445,7 @@ begin -- Do the output via the DP streaming interface -------------------------------------------------------------------------- -- . adjust the rd address - -- sum yields c_ram_buf.adr_w bits, because left operand in ADD_UVECdetermines width + -- sum yields c_ram_buf.adr_w bits, because left operand in ADD_UVEC determines width v.rd_copi := dp_copi; v.rd_copi.address := RESIZE_MEM_ADDRESS(ADD_UVEC(r.rd_offset, dp_copi.address)); @@ -401,7 +493,7 @@ begin nxt_r <= v; -- local wires, only for view in wave window - dbg_wires <= w; + w_comb <= w; end process; ------------------------------------------------------------------------------ diff --git a/libraries/base/dp/src/vhdl/mmp_dp_bsn_align_v2.vhd b/libraries/base/dp/src/vhdl/mmp_dp_bsn_align_v2.vhd index 20456ab387ba3873e6a87d8843bafd78250ac876..d7803fb866b75af0a44f18595e8934a5db1b11b5 100644 --- a/libraries/base/dp/src/vhdl/mmp_dp_bsn_align_v2.vhd +++ b/libraries/base/dp/src/vhdl/mmp_dp_bsn_align_v2.vhd @@ -44,16 +44,18 @@ use work.dp_stream_pkg.all; entity mmp_dp_bsn_align_v2 is generic ( -- for dp_bsn_align_v2 - g_nof_streams : natural; -- number of input and output streams - g_bsn_latency_max : natural; -- Maximum travel latency of a remote block in number of block periods T_blk - g_nof_aligners_max : natural := 1; -- 1 when only align at last node, > 1 when align at every intermediate node + g_nof_streams : natural := 2; -- number of input and output streams + g_bsn_latency_max : natural := 2; -- Maximum travel latency of a remote block in number of block periods + g_bsn_latency_first_node : natural := 2; -- default use same as g_bsn_latency_max + g_nof_aligners_max : natural := 1; -- 1 when only align at last node, + -- > 1 when align at every intermediate node g_block_size : natural := 32; -- > 1, g_block_size=1 is not supported g_bsn_w : natural := c_dp_stream_bsn_w; -- number of bits in sosi BSN g_data_w : natural; -- number of bits in sosi data g_data_replacement_value : integer := 0; -- output sosi data value for missing input blocks g_use_mm_output : boolean := false; -- output via MM or via streaming DP - g_pipeline_input : natural := 1; -- >= 0, choose 0 for wires, choose 1 to ease timing closure of in_sosi_arr - g_pipeline_output : natural := 1; -- >= 0, choose 0 for wires, choose 1 to ease timing closure of out_sosi_arr + g_pipeline_input : natural := 1; -- >= 0, 0 for wires, 1 to ease timing closure of in_sosi_arr + g_pipeline_output : natural := 1; -- >= 0, 0 for wires, 1 to ease timing closure of out_sosi_arr g_rd_latency : natural := 2; -- 1 or 2, choose 2 to ease timing closure -- for mms_dp_bsn_monitor_v2 g_nof_clk_per_sync : natural := 200 * 10**6; @@ -78,17 +80,17 @@ entity mmp_dp_bsn_align_v2 is dp_rst : in std_logic; dp_clk : in std_logic; - node_index : in natural range 0 to g_nof_aligners_max - 1 := 0; -- only used when g_nof_aligners_max > 1 + chain_node_index : in natural range 0 to g_nof_aligners_max - 1 := 0; -- only used when g_nof_aligners_max > 1 -- Streaming input in_sosi_arr : in t_dp_sosi_arr(g_nof_streams - 1 downto 0); - -- Output via local MM interface in dp_clk domain, when g_use_mm_output = TRUE. + -- Output via local MM interface in dp_clk domain, when g_use_mm_output = true. mm_sosi : out t_dp_sosi; -- streaming information that signals that an output block can be read mm_copi : in t_mem_copi := c_mem_copi_rst; -- read access to output block, all output streams share same mm_copi mm_cipo_arr : out t_mem_cipo_arr(g_nof_streams - 1 downto 0); - -- Output via streaming DP interface, when g_use_mm_output = FALSE. + -- Output via streaming DP interface, when g_use_mm_output = false. out_sosi_arr : out t_dp_sosi_arr(g_nof_streams - 1 downto 0) ); end mmp_dp_bsn_align_v2; @@ -220,6 +222,7 @@ begin generic map ( g_nof_streams => g_nof_streams, g_bsn_latency_max => g_bsn_latency_max, + g_bsn_latency_first_node => g_bsn_latency_first_node, g_nof_aligners_max => g_nof_aligners_max, g_block_size => g_block_size, g_bsn_w => g_bsn_w, @@ -233,7 +236,7 @@ begin port map ( dp_rst => dp_rst, dp_clk => dp_clk, - node_index => node_index, + chain_node_index => chain_node_index, -- MM control stream_en_arr => stream_en_arr, stream_replaced_cnt_arr => stream_replaced_cnt_arr, diff --git a/libraries/base/dp/tb/vhdl/tb_dp_bsn_align_v2.vhd b/libraries/base/dp/tb/vhdl/tb_dp_bsn_align_v2.vhd index c6db289f9ed18f783f7200c0011a250f934233b7..7d5f7e3d695acabc3aa4042abfabf20d51b17ab4 100644 --- a/libraries/base/dp/tb/vhdl/tb_dp_bsn_align_v2.vhd +++ b/libraries/base/dp/tb/vhdl/tb_dp_bsn_align_v2.vhd @@ -34,7 +34,7 @@ -- . g_lost_bsn_id to loose a single block in stream 1 and verify that -- it gets replaced and flagged. -- . array of one or more BSN aligners via g_nof_aligners_max >= 1, --- using node_index_arr, only support tb for g_use_mm_output = FALSE +-- using chain_node_index_arr, only support tb for g_use_mm_output = false -- Remark: -- For this BSN aligner component it was essential to have an almost -- complete, reviewed, detailed design document, because it is a complex @@ -85,8 +85,9 @@ entity tb_dp_bsn_align_v2 is generic ( -- DUT g_nof_streams : natural := 2; -- number of input and output streams - g_bsn_latency_max : natural := 1; -- Maximum travel latency of a remote block in number of block periods T_blk - g_nof_aligners_max : positive := 1; -- 1 when only align at last node, > 1 when align at every intermediate node + g_bsn_latency_max : natural := 2; -- Maximum travel latency of a remote block in number of block periods + g_bsn_latency_first_node : natural := 1; + g_nof_aligners_max : positive := 8; -- 1 when only align at last node, > 1 when align at every intermediate node g_block_size : natural := 11; -- > 1, g_block_size=1 is not supported g_block_period : natural := 20; -- >= g_block_size, = g_block_size + c_gap_size g_bsn_w : natural := c_dp_stream_bsn_w; -- number of bits in sosi BSN @@ -105,7 +106,8 @@ entity tb_dp_bsn_align_v2 is g_tb_diff_delay : integer := 0; -- 0 = aligned inputs, -1 = max input delay for no loss, -- >~ g_bsn_latency_max * g_block_period will give loss g_tb_nof_restart : natural := 2; -- number of times to restart the input stimuli - g_tb_nof_blocks : natural := 20 -- number of input blocks per restart + g_tb_nof_blocks : natural := 30 -- number of input blocks per restart, choose > circular buffer size, so + -- > c_align_latency_nof_blocks ); end tb_dp_bsn_align_v2; @@ -139,7 +141,11 @@ architecture tb of tb_dp_bsn_align_v2 is constant c_gap_size : natural := g_block_period - g_block_size; - constant c_lost_bsn_stream_id : natural := sel_a_b(g_nof_streams > 1, 1, 0); -- fixed use stream 1 to verify g_lost_bsn_id. Use 0 for g_nof_streams = 1. + -- Fixed use stream 1 to verify g_lost_bsn_id. Use 0 for g_nof_streams = 1. + constant c_lost_bsn_stream_id : natural := sel_a_b(g_nof_streams > 1, 1, 0); + + -- In tb no support (yet) for immediate aligned output at first node, when c_nof_aligners_max > 1 + constant c_use_aligner_at_first_node : boolean := true; -- In the tb only support MM interface verification for c_nof_aligners_max = 1 constant c_nof_aligners_max : positive := sel_a_b(g_use_mm_output, 1, g_nof_aligners_max); @@ -151,9 +157,11 @@ architecture tb of tb_dp_bsn_align_v2 is constant c_dut_latency : natural := g_pipeline_input + g_rd_latency + c_mm_to_dp_latency + g_pipeline_output; -- DUT buffer latency for chain of DUTs - constant c_align_latency_nof_blocks : natural := g_bsn_latency_max * c_nof_aligners_max; -- in number blocks - constant c_align_latency_nof_valid : natural := g_bsn_latency_max * c_nof_aligners_max * g_block_size; -- in number of data samples - constant c_align_latency_nof_clk : natural := g_bsn_latency_max * c_nof_aligners_max * g_block_period; -- in number clk cycles + constant c_align_latency_nof_blocks : natural := sel_a_b(c_nof_aligners_max = 1, + g_bsn_latency_max, + g_bsn_latency_max * (c_nof_aligners_max - 1) + g_bsn_latency_first_node); -- number blocks + constant c_align_latency_nof_valid : natural := c_align_latency_nof_blocks * g_block_size; -- number of data samples + constant c_align_latency_nof_clk : natural := c_align_latency_nof_blocks * g_block_period; -- number clk cycles -- Total DUT chain latency constant c_total_latency : natural := c_dut_latency + c_align_latency_nof_clk; @@ -181,7 +189,7 @@ architecture tb of tb_dp_bsn_align_v2 is signal rst : std_logic := '1'; signal sl1 : std_logic := '1'; - signal node_index_arr : t_nat_natural_arr(0 to c_nof_aligners_max - 1) := array_init(0, c_nof_aligners_max, 1); + signal chain_node_index_arr : t_nat_natural_arr(0 to c_nof_aligners_max - 1) := array_init(0, c_nof_aligners_max, 1); signal stream_en_arr : std_logic_vector(g_nof_streams - 1 downto 0) := (others => '1'); -- default all streams are enabled signal stream_lost_arr : std_logic_vector(g_nof_streams - 1 downto 0) := (others => '0'); -- default no streams are lost @@ -501,6 +509,7 @@ begin generic map ( g_nof_streams => g_nof_streams, g_bsn_latency_max => g_bsn_latency_max, + g_bsn_latency_first_node => g_bsn_latency_first_node, g_nof_aligners_max => c_nof_aligners_max, g_block_size => g_block_size, g_bsn_w => g_bsn_w, @@ -515,8 +524,8 @@ begin dp_rst => rst, dp_clk => clk, -- Control - node_index => node_index_arr(0), - stream_en_arr => stream_en_arr, + chain_node_index => chain_node_index_arr(0), + stream_en_arr => stream_en_arr, -- Streaming input in_sosi_arr => dut_in_sosi_2arr(0), -- Output via local MM interface in dp_clk domain @@ -553,6 +562,7 @@ begin generic map ( g_nof_streams => g_nof_streams, g_bsn_latency_max => g_bsn_latency_max, + g_bsn_latency_first_node => g_bsn_latency_first_node, g_nof_aligners_max => c_nof_aligners_max, g_block_size => g_block_size, g_bsn_w => g_bsn_w, @@ -567,8 +577,8 @@ begin dp_rst => rst, dp_clk => clk, -- Control - node_index => node_index_arr(I), - stream_en_arr => stream_en_arr, + chain_node_index => chain_node_index_arr(I), + stream_en_arr => stream_en_arr, -- Streaming input in_sosi_arr => dut_in_sosi_2arr(I), -- Output via streaming DP interface diff --git a/libraries/base/dp/tb/vhdl/tb_dp_repack_data.vhd b/libraries/base/dp/tb/vhdl/tb_dp_repack_data.vhd index 1fbc042d83c68b5f5e55817d62faab380e2ca0c7..4a09164eaf1339f29b58c733de09d3c67e1da91b 100644 --- a/libraries/base/dp/tb/vhdl/tb_dp_repack_data.vhd +++ b/libraries/base/dp/tb/vhdl/tb_dp_repack_data.vhd @@ -54,17 +54,17 @@ entity tb_dp_repack_data is g_flow_control_stimuli : t_dp_flow_control_enum := e_active; -- always e_active, e_random or e_pulse flow control g_flow_control_verify : t_dp_flow_control_enum := e_active; -- always e_active, e_random or e_pulse flow control -- specific - g_in_dat_w : natural := 8 * 42; - g_in_nof_words : natural := 1; - g_pack_dat_w : natural := 32; - g_pack_nof_words : natural := 11; - g_in_bypass : boolean := true; -- can use TRUE when g_in_nof_words=1 or g_in_nof_words=g_out_nof_words + g_in_dat_w : natural := 36; + g_in_nof_words : natural := 16; + g_pack_dat_w : natural := 64; + g_pack_nof_words : natural := 9; + g_in_bypass : boolean := false; -- can use TRUE when g_in_nof_words=1 or g_in_nof_words=g_out_nof_words g_pack_bypass : boolean := false; -- can use TRUE when g_out_nof_words=1 or g_in_nof_words=g_out_nof_words - g_in_symbol_w : natural := 8; -- default 1 for snk_in.empty in nof bits, else use power of 2 - g_pack_symbol_w : natural := 8; -- default 1 for src_out.empty in nof bits, else use power of 2 + g_in_symbol_w : natural := 1; -- default 1 for snk_in.empty in nof bits, else use power of 2 + g_pack_symbol_w : natural := 1; -- default 1 for src_out.empty in nof bits, else use power of 2 g_nof_repeat : natural := 10; - g_pkt_len : natural := 1; -- if not a multiple of g_in_nof_words then the input stage flush creates gap between blocks - g_pkt_gap : natural := 0 + g_pkt_len : natural := 64; -- if not a multiple of g_in_nof_words then the input stage flush creates gap between blocks + g_pkt_gap : natural := 10 ); end tb_dp_repack_data; diff --git a/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd b/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd index 78ff1107922be99fcc03ddd829a454cf448692a1..79f281d6e3b7c08fa3f913d698379b7d3620e966 100644 --- a/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd +++ b/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd @@ -63,6 +63,7 @@ architecture tb of tb_mmp_dp_bsn_align_v2 is -- . for dp_bsn_align_v2 constant c_nof_streams : natural := 5; constant c_bsn_latency_max : natural := 1; + constant c_bsn_latency_first_node : natural := c_bsn_latency_max; constant c_nof_aligners_max : positive := 1; -- fixed in this tb constant c_block_size : natural := 11; constant c_block_period : natural := 11; @@ -103,8 +104,10 @@ architecture tb of tb_mmp_dp_bsn_align_v2 is constant c_gap_size : natural := c_block_period - c_block_size; -- DUT latency + constant c_ref_sosi_latency : natural := 0; constant c_mm_to_dp_latency : natural := 1; - constant c_dut_latency : natural := c_pipeline_input + c_rd_latency + c_mm_to_dp_latency + c_pipeline_output; + constant c_dut_latency : natural := c_pipeline_input + c_ref_sosi_latency + + c_rd_latency + c_mm_to_dp_latency + c_pipeline_output; constant c_align_latency_nof_blocks : natural := c_bsn_latency_max * c_nof_aligners_max; -- in number blocks constant c_align_latency_nof_valid : natural := c_bsn_latency_max * c_nof_aligners_max * c_block_size; -- in number of data samples @@ -147,7 +150,7 @@ architecture tb of tb_mmp_dp_bsn_align_v2 is signal dp_clk : std_logic := '1'; signal dp_rst : std_logic := '1'; - signal node_index : natural := 0; + signal chain_node_index : natural := 0; signal ref_siso_arr : t_dp_siso_arr(c_nof_streams - 1 downto 0) := (others => c_dp_siso_rdy); signal ref_sosi_arr : t_dp_sosi_arr(c_nof_streams - 1 downto 0); -- generated stimuli signal in_sosi_arr : t_dp_sosi_arr(c_nof_streams - 1 downto 0) := (others => c_dp_sosi_rst); -- input stimuli @@ -427,6 +430,7 @@ begin generic map ( g_nof_streams => c_nof_streams, g_bsn_latency_max => c_bsn_latency_max, + g_bsn_latency_first_node => c_bsn_latency_first_node, g_nof_aligners_max => c_nof_aligners_max, g_block_size => c_block_size, g_bsn_w => c_bsn_w, @@ -456,7 +460,7 @@ begin dp_rst => dp_rst, dp_clk => dp_clk, - node_index => node_index, + chain_node_index => chain_node_index, -- Streaming input in_sosi_arr => in_sosi_arr, -- Output via local MM in dp_clk domain diff --git a/libraries/base/dp/tb/vhdl/tb_tb_dp_bsn_align_v2.vhd b/libraries/base/dp/tb/vhdl/tb_tb_dp_bsn_align_v2.vhd index d1ac6698b74d433a03b75d037ee651dafc42690b..8c9a3bdc2ba244940a8a6b7c6018170d3e611b81 100644 --- a/libraries/base/dp/tb/vhdl/tb_tb_dp_bsn_align_v2.vhd +++ b/libraries/base/dp/tb/vhdl/tb_tb_dp_bsn_align_v2.vhd @@ -23,8 +23,9 @@ -- > as 3 -- > run -all -library IEEE; +library IEEE, common_lib; use IEEE.std_logic_1164.all; +use common_lib.common_pkg.all; use work.tb_dp_pkg.all; entity tb_tb_dp_bsn_align_v2 is @@ -33,13 +34,14 @@ end tb_tb_dp_bsn_align_v2; architecture tb of tb_tb_dp_bsn_align_v2 is constant c_block : natural := 11; constant c_period : natural := 20; - constant c_nof_blk : natural := 30; + constant c_nof_blk : natural := 20; -- choose > circular buffer size signal tb_end : std_logic := '0'; -- declare tb_end to avoid 'No objects found' error on 'when -label tb_end' begin -- -- DUT -- g_nof_streams : NATURAL := 2; -- number of input and output streams -- g_bsn_latency_max : NATURAL := 1; -- Maximum travel latency of a remote block in number of block periods T_blk + -- g_bsn_latency_first_node : natural := 1; -- g_nof_aligners_max : NATURAL := 1; -- 1 when only align at last node, > 1 when align at every intermediate node -- g_block_size : NATURAL := 11; -- > 1, g_block_size=1 is not supported -- g_block_period : NATURAL := 20; -- >= g_block_size, = g_block_size + c_gap_size @@ -61,25 +63,34 @@ begin -- g_tb_nof_restart : NATURAL := 1; -- number of times to restart the input stimuli -- g_tb_nof_blocks : NATURAL := 10 -- number of input blocks per restart - u_mm_output : entity work.tb_dp_bsn_align_v2 generic map (2, 1, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, true, 0, 0, 1, 0, 2, c_nof_blk); - u_mm_output_pow2 : entity work.tb_dp_bsn_align_v2 generic map (2, 1, 1, 16, c_period, 32, 16, 17, 0, 0, 0, 3, true, 0, 0, 1, 0, 2, c_nof_blk); -- g_block_size = 2**4 = 16 - u_mm_output_large_bsn : entity work.tb_dp_bsn_align_v2 generic map (2, 1, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3000, true, 0, 0, 1, 0, 2, c_nof_blk); -- test where bsn * g_block_size > 2^10 to test address resizing - u_mm_output_single : entity work.tb_dp_bsn_align_v2 generic map (1, 1, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, true, 0, 0, 1, 0, 2, c_nof_blk); - u_dp_output : entity work.tb_dp_bsn_align_v2 generic map (2, 1, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); - u_dp_output_pow2 : entity work.tb_dp_bsn_align_v2 generic map (2, 1, 1, 16, c_period, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); -- g_block_size = 2**4 = 16 - u_dp_output_large_bsn : entity work.tb_dp_bsn_align_v2 generic map (2, 1, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3000, false, 0, 0, 1, 0, 2, c_nof_blk); -- test where bsn * g_block_size > 2^10 to test address resizing - u_dp_output_single : entity work.tb_dp_bsn_align_v2 generic map (1, 1, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); - u_dp_output_p1 : entity work.tb_dp_bsn_align_v2 generic map (2, 1, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 1, 1, 1, 0, 2, c_nof_blk); - u_bsn_lat_max_2 : entity work.tb_dp_bsn_align_v2 generic map (2, 2, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); - u_bsn_lat_max_3 : entity work.tb_dp_bsn_align_v2 generic map (2, 3, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); - u_p1_rd2 : entity work.tb_dp_bsn_align_v2 generic map (2, 1, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 1, 0, 2, 0, 2, c_nof_blk); - u_zero_gap : entity work.tb_dp_bsn_align_v2 generic map (2, 1, 1, c_block, c_block, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); - u_zero_gap_p1_rd2 : entity work.tb_dp_bsn_align_v2 generic map (2, 1, 1, c_block, c_block, 32, 16, 17, 0, 0, 0, 3, false, 1, 1, 2, 0, 2, c_nof_blk); - u_stream_disable : entity work.tb_dp_bsn_align_v2 generic map (3, 1, 1, c_block, c_period, 32, 16, 17, 2, 0, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); - u_stream_lost : entity work.tb_dp_bsn_align_v2 generic map (3, 1, 1, c_block, c_period, 32, 16, 17, 0, 2, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); - u_stream_disable_lost : entity work.tb_dp_bsn_align_v2 generic map (4, 1, 1, c_block, c_period, 32, 16, 17, 1, 2, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); - u_bsn_lost : entity work.tb_dp_bsn_align_v2 generic map (3, 1, 1, c_block, c_period, 32, 16, 17, 0, 0, 10, 3, false, 0, 0, 1, 0, 2, c_nof_blk); - u_diff_delay : entity work.tb_dp_bsn_align_v2 generic map (3, 1, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, -1, 2, c_nof_blk); - u_nof_aligners : entity work.tb_dp_bsn_align_v2 generic map (2, 1, 8, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); - u_nof_aligners_diff_delay : entity work.tb_dp_bsn_align_v2 generic map (4, 1, 3, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, -1, 2, c_nof_blk); + u_mm_output : entity work.tb_dp_bsn_align_v2 generic map (2, c_1, c_1, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, true, 0, 0, 1, 0, 2, c_nof_blk); + u_mm_output_single : entity work.tb_dp_bsn_align_v2 generic map (1, c_1, c_1, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, true, 0, 0, 1, 0, 2, c_nof_blk); + u_output : entity work.tb_dp_bsn_align_v2 generic map (2, c_1, c_1, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); + u_output_single : entity work.tb_dp_bsn_align_v2 generic map (1, c_1, c_1, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); + u_output_pipe1 : entity work.tb_dp_bsn_align_v2 generic map (2, c_1, c_1, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 1, 1, 1, 0, 2, c_nof_blk); + u_pipe1_rdlat2 : entity work.tb_dp_bsn_align_v2 generic map (2, c_1, c_1, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 1, 0, 2, 0, 2, c_nof_blk); + u_zero_gap : entity work.tb_dp_bsn_align_v2 generic map (2, c_1, c_1, 1, c_block, c_block, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); + u_zero_gap_pipe1_rdlat2 : entity work.tb_dp_bsn_align_v2 generic map (2, c_1, c_1, 1, c_block, c_block, 32, 16, 17, 0, 0, 0, 3, false, 1, 1, 2, 0, 2, c_nof_blk); + u_stream_disable : entity work.tb_dp_bsn_align_v2 generic map (3, c_1, c_1, 1, c_block, c_period, 32, 16, 17, 2, 0, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); + u_stream_lost : entity work.tb_dp_bsn_align_v2 generic map (3, c_1, c_1, 1, c_block, c_period, 32, 16, 17, 0, 2, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); + u_stream_disable_lost : entity work.tb_dp_bsn_align_v2 generic map (4, c_1, c_1, 1, c_block, c_period, 32, 16, 17, 1, 2, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); + u_bsn_lost : entity work.tb_dp_bsn_align_v2 generic map (3, c_1, c_1, 1, c_block, c_period, 32, 16, 17, 0, 0, 10, 3, false, 0, 0, 1, 0, 2, c_nof_blk); + u_diff_delay : entity work.tb_dp_bsn_align_v2 generic map (3, c_1, c_1, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, -1, 2, c_nof_blk); + + -- g_block_size = 2**4 = 16 + u_mm_block_pow2 : entity work.tb_dp_bsn_align_v2 generic map (2, c_1, c_1, 1, 16, c_period, 32, 16, 17, 0, 0, 0, 3, true, 0, 0, 1, 0, 2, c_nof_blk); + u_block_pow2 : entity work.tb_dp_bsn_align_v2 generic map (2, c_1, c_1, 1, 16, c_period, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); + + -- test where bsn * g_block_size > 2^10 to test address resizing + u_mm_large_bsn : entity work.tb_dp_bsn_align_v2 generic map (2, c_1, c_1, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3000, true, 0, 0, 1, 0, 2, c_nof_blk); + u_large_bsn : entity work.tb_dp_bsn_align_v2 generic map (2, c_1, c_1, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3000, false, 0, 0, 1, 0, 2, c_nof_blk); + + -- BSN latency + u_bsn_lat_max_2 : entity work.tb_dp_bsn_align_v2 generic map (2, 2, 2, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); + u_bsn_lat_max_3 : entity work.tb_dp_bsn_align_v2 generic map (2, 3, 3, 1, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, 0, 2, c_nof_blk); + + -- chain of aligners + u_nof_aligners_16 : entity work.tb_dp_bsn_align_v2 generic map (2, 2, c_1, 16, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, 0, 2, 100); + u_nof_aligners_8 : entity work.tb_dp_bsn_align_v2 generic map (2, c_1, c_1, 8, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, 0, 2, 50); + u_nof_aligners_diff_delay : entity work.tb_dp_bsn_align_v2 generic map (4, c_1, c_1, 3, c_block, c_period, 32, 16, 17, 0, 0, 0, 3, false, 0, 0, 1, -1, 2, 50); end tb;