diff --git a/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_beamformer_remote_ring_bf.vhd b/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_beamformer_remote_ring_bf.vhd index 527f0f06505e7361dce1282f70a21c0bc069fbea..39df6e2aba87de40d3be5750b3f5b2e3bcf53b4c 100644 --- a/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_beamformer_remote_ring_bf.vhd +++ b/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_beamformer_remote_ring_bf.vhd @@ -20,7 +20,8 @@ -- -- Author: E. Kooistra -- Purpose: --- . Test bench for multiple sdp_beamformer_output.vhd + ring_lane.vhd in a ring +-- . Test bench for multiple sdp_beamformer_remote.vhd + ring_lane.vhd + +-- tr_10GbE in a ring -- Description: -- . https://support.astron.nl/confluence/display/L2M/L5+SDPFW+Design+Document%3A+Beamformer -- @@ -61,7 +62,8 @@ -- dp_bsn_align_v2 aligned output || -- FPGA_bf_aligned_latency_R(RN) v| -- --- . Latency results from SDP-ARTS HW with 16 ring nodes (GN = 64 is RN = 0) +-- . BF latency results from SDP-ARTS HW +-- - with 16 ring nodes (GN = 64 is RN = 0) -- - 2024-03-02T21.16.33_d601da896_lofar2_unb2b_sdp_station_full_wg -- -- Node: bf_ring_rx bf_rx_align bf_aligned bf_ring_tx @@ -86,13 +88,43 @@ -- Simulation latency results with this tb -- Node: bf_ring_rx bf_rx_align bf_aligned bf_ring_tx -- _latency: _latency: _latency: _latency: --- 0: -1 ( 1 0 ) 2053 2075 --- 1: 3862 ( 1 3875 ) 4101 4123 --- 2: 5914 ( 1 5927 ) 6149 6171 --- 3: 7965 ( 1 7978 ) 8197 -1 +-- 0: -1 ( 1 0 ) 2053 2075 +-- 1: 3862 ( 1 3875 ) 4101 4123 +-- 2: 5914 ( 1 5927 ) 6149 6171 +-- 3: 7965 ( 1 7978 ) 8197 -1 +-- +-- - The dp_bsn_align_v2 BSN latency monitor results agree between sim an HW. +-- - The bf_aligned_latency is exactly equal in sim and on HW, because the +-- mmp_dp_bsn_align_v2 uses the ref_sync for the BSN monitor and also to +-- release its BSN aligned output, so the latency only depends on internal +-- FW buffering and latency. +-- . The bf_aligned_latency and bf_ring_tx_latency do not depend on cable +-- delays and are constant when read again in sim or on HW, because they +-- only depend on fixed internal FW buffering and latency. +-- - The ring_lane BSN latency monitor results differ between sim and HW, it +-- is unclear why: +-- . the ring_rx and ring_tx BSN latency monitor results are about one +-- block of 1024 larger on HW. +-- . on the same HW node, the bf_ring_rx_latency is about one block of 1024 +-- larger than the bf_rx_align_latency, even though they are taken at +-- nearly the same place in the ring_rx signal path. +-- . on the same HW node, the bf_ring_tx_latency is about one block of 1024 +-- larger than the bf_align_latency, even though they are taken at nearly +-- the same place in the tx signal path. +-- . the ring_rx and ring_tx BSN latency monitor results for XST do not +-- show a one block is 1024 offset. +-- TODO: +-- . Assume the ring_lane latencies are one block is 1024 too high, and +-- assume that the bf_rx_align_latency is correct and reflects the actual +-- packet latency. +-- . The ring_rx and ring_tx both use func_ring_nof_hops_to_source_rn() and +-- hops = sosi.channel to get monitor_sosi, maybe there occurs an offset +-- there. +-- . The ring_rx and ring_tx both use dp_demux.vhd, maybe that causes a one +-- block is 1024 shift in sosi.sync. -- -- Usage: --- > as 8 +-- > as 3 or more -- > run -a ------------------------------------------------------------------------------- @@ -111,7 +143,8 @@ use work.tb_sdp_pkg.all; entity tb_sdp_beamformer_remote_ring is generic ( - g_nof_rn : natural := 4 -- number of nodes in the ring + g_nof_rn : natural := 2; -- number of nodes in the ring + g_nof_sync : natural := 2 ); end tb_sdp_beamformer_remote_ring; @@ -121,9 +154,13 @@ architecture tb of tb_sdp_beamformer_remote_ring is constant c_sa_clk_period : time := tech_pll_clk_644_period; -- 644MHz -- Apply cable delay in tech_pll_clk_156_period units, to remain aligned with tr_10GbE sim model - -- Choose c_cable_delay = 30 * 6.4 ~= 192 ns ~= 38 dp_clk of 5 ns, to match delay seen on HW + -- . Choose c_cable_delay = 30 * 6.4 ~= 192 ns ~= 38 dp_clk of 5 ns, to match delay seen on HW + -- . Maximum c_cable_delay <= 186 * 6.4 = 1210 ns ~= 242 dp_clk of 5 ns in simulation with + -- g_nof_rn = 16. For larger c_cable_delay the bf_sum_sosi.data goes wrong. The maximum + -- c_cable_delay depends a little bit on g_nof_rn, for g_nof_rn = 2 the data goes wrong when + -- c_cable_delay >= 190. constant c_clk_156_period : time := tech_pll_clk_156_period; -- 6.400020 ns ~= 156.25 MHz - constant c_cable_delay : time := c_clk_156_period * 30; + constant c_cable_delay : time := c_clk_156_period * 186; -- BF data constant c_block_period : natural := c_sdp_N_fft; @@ -131,7 +168,6 @@ architecture tb of tb_sdp_beamformer_remote_ring is constant c_gap_size : natural := c_block_period - c_block_size; -- choose sync interval somewhat longer than maximum BF ring latency constant c_nof_blocks_per_sync : natural := largest(10, (g_nof_rn + 1) * 2); - constant c_nof_sync : natural := 2; constant c_local_bf_re : integer := 1; constant c_local_bf_im : integer := 2; @@ -155,7 +191,7 @@ architecture tb of tb_sdp_beamformer_remote_ring is constant c_sync_timeout : natural := c_block_period * (c_nof_blocks_per_sync + 1); -- Timeout tb if there is no output bf_sum_sosi - constant c_tb_timeout : time := (c_nof_sync + 1) * c_sync_timeout * c_dp_clk_period; + constant c_tb_timeout : time := (g_nof_sync + 1) * c_sync_timeout * c_dp_clk_period; -- Address widths of a single MM instance constant c_addr_w_reg_ring_lane_info_bf : natural := 1; @@ -248,7 +284,7 @@ begin u_stimuli : entity dp_lib.dp_stream_stimuli generic map ( g_sync_period => c_nof_blocks_per_sync, - g_nof_repeat => c_nof_blocks_per_sync * c_nof_sync, + g_nof_repeat => c_nof_blocks_per_sync * g_nof_sync, g_pkt_len => c_block_size, g_pkt_gap => c_gap_size ) @@ -268,6 +304,8 @@ begin local_bf_sosi.data <= TO_DP_SDATA(0); local_bf_sosi.re <= TO_DP_DSP_DATA(c_local_bf_re); local_bf_sosi.im <= TO_DP_DSP_DATA(c_local_bf_im); + local_bf_sosi.channel <= TO_DP_CHANNEL(0); + local_bf_sosi.err <= TO_DP_ERROR(0); end process; bf_bs_sosi <= local_bf_sosi; @@ -275,6 +313,7 @@ begin p_mm : process variable v_span : natural; + variable v_span_node : natural; variable v_offset : natural; variable v_transport_nof_hops : natural; begin @@ -283,6 +322,9 @@ begin proc_common_wait_cross_clock_domain_latency(c_mm_clk_period, c_dp_clk_period, c_common_cross_clock_domain_latency * 2); + --------------------------------------------------------------------------- + -- Setup transport nof hops for RN = 0:15 to [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0] + --------------------------------------------------------------------------- -- Write FPGA_bf_ring_nof_transport_hops_RW = ring_lane_info.transport_nof_hops v_span := 2**c_addr_w_reg_ring_lane_info_bf; for RN in 0 to c_last_rn LOOP @@ -304,10 +346,15 @@ begin FPGA_bf_ring_nof_transport_hops_R(RN) <= TO_UINT(reg_ring_lane_info_bf_cipo.rddata(c_word_w - 1 downto 0)); end loop; + --------------------------------------------------------------------------- -- Wait until second bf_sum_sosi.sync + --------------------------------------------------------------------------- proc_common_wait_until_hi_lo(dp_clk, bf_sum_sosi.sync); proc_common_wait_until_hi_lo(dp_clk, bf_sum_sosi.sync); + + --------------------------------------------------------------------------- -- Read BSN monitors + --------------------------------------------------------------------------- v_span := 2**c_sdp_reg_bsn_monitor_v2_addr_w; -- Read FPGA_bf_ring_rx_latency_R for RN in 0 to c_last_rn LOOP @@ -317,12 +364,13 @@ begin FPGA_bf_ring_rx_latency_R(RN) <= TO_SINT(reg_bsn_monitor_v2_ring_rx_bf_cipo.rddata(c_word_w - 1 downto 0)); end loop; -- Read FPGA_bf_rx_align_latency_R, for both c_sdp_P_sum = 2 inputs per RN + v_span_node := true_log_pow2(c_sdp_P_sum) * v_span; for RN in 0 to c_last_rn LOOP - for I in 0 to c_sdp_P_sum - 1 loop - v_offset := 6 + RN * c_sdp_P_sum * v_span + I * v_span; + for P in 0 to c_sdp_P_sum - 1 loop + v_offset := 6 + RN * v_span_node + P * v_span; proc_mem_mm_bus_rd(v_offset, mm_clk, reg_bsn_monitor_v2_bf_rx_align_cipo, reg_bsn_monitor_v2_bf_rx_align_copi); proc_mem_mm_bus_rd_latency(1, mm_clk); - FPGA_bf_rx_align_latency_R(RN)(I) <= TO_SINT(reg_bsn_monitor_v2_bf_rx_align_cipo.rddata(c_word_w - 1 downto 0)); + FPGA_bf_rx_align_latency_R(RN)(P) <= TO_SINT(reg_bsn_monitor_v2_bf_rx_align_cipo.rddata(c_word_w - 1 downto 0)); end loop; end loop; -- Read FPGA_bf_aligned_latency_R @@ -340,20 +388,24 @@ begin FPGA_bf_ring_tx_latency_R(RN) <= TO_SINT(reg_bsn_monitor_v2_ring_tx_bf_cipo.rddata(c_word_w - 1 downto 0)); end loop; + --------------------------------------------------------------------------- + -- Wait until end of simulation + --------------------------------------------------------------------------- mm_init <= '0'; - -- Wait to stop simulation proc_common_wait_until_high(dp_clk, stimuli_end); proc_common_wait_some_cycles(dp_clk, 1000); + --------------------------------------------------------------------------- -- Print latency results + --------------------------------------------------------------------------- print_str("Node: bf_ring_rx bf_rx_align bf_aligned bf_ring_tx"); print_str(" _latency: _latency: _latency: _latency:"); for RN in 0 to c_last_rn loop print_str(int_to_str(RN) & ": " & int_to_str(FPGA_bf_ring_rx_latency_R(RN)) & " ( " & - int_to_str(FPGA_bf_rx_align_latency_R(RN)(0)) & " " & - int_to_str(FPGA_bf_rx_align_latency_R(RN)(1)) & " ) " & + int_to_str(FPGA_bf_rx_align_latency_R(RN)(0)) & " " & + int_to_str(FPGA_bf_rx_align_latency_R(RN)(1)) & " ) " & int_to_str(FPGA_bf_aligned_latency_R(RN)) & " " & int_to_str(FPGA_bf_ring_tx_latency_R(RN))); end Loop; @@ -362,6 +414,7 @@ begin wait; end process; + -- End the tb simulation proc_common_timeout_failure(c_tb_timeout, tb_end); -- ERROR: end simulation if it fails to end in time proc_common_stop_simulation(tb_end); -- OK: end simulation