diff --git a/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_beamformer_remote_ring_bf.vhd b/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_beamformer_remote_ring_bf.vhd
index 527f0f06505e7361dce1282f70a21c0bc069fbea..39df6e2aba87de40d3be5750b3f5b2e3bcf53b4c 100644
--- a/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_beamformer_remote_ring_bf.vhd
+++ b/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_beamformer_remote_ring_bf.vhd
@@ -20,7 +20,8 @@
 --
 -- Author: E. Kooistra
 -- Purpose:
--- . Test bench for multiple sdp_beamformer_output.vhd + ring_lane.vhd in a ring
+-- . Test bench for multiple sdp_beamformer_remote.vhd + ring_lane.vhd +
+--   tr_10GbE in a ring
 -- Description:
 -- . https://support.astron.nl/confluence/display/L2M/L5+SDPFW+Design+Document%3A+Beamformer
 --
@@ -61,7 +62,8 @@
 --           dp_bsn_align_v2 aligned output   ||
 --              FPGA_bf_aligned_latency_R(RN) v|
 --
--- . Latency results from SDP-ARTS HW with 16 ring nodes (GN = 64 is RN = 0)
+-- . BF latency results from SDP-ARTS HW
+--   - with 16 ring nodes (GN = 64 is RN = 0)
 --   - 2024-03-02T21.16.33_d601da896_lofar2_unb2b_sdp_station_full_wg
 --
 --   Node:  bf_ring_rx    bf_rx_align        bf_aligned      bf_ring_tx
@@ -86,13 +88,43 @@
 --   Simulation latency results with this tb
 --   Node:  bf_ring_rx    bf_rx_align        bf_aligned      bf_ring_tx
 --          _latency:     _latency:          _latency:       _latency:
---   0:     -1            ( 1      0    )    2053            2075
---   1:     3862          ( 1      3875 )    4101            4123
---   2:     5914          ( 1      5927 )    6149            6171
---   3:     7965          ( 1      7978 )    8197            -1
+--   0:     -1            ( 1    0    )      2053            2075
+--   1:     3862          ( 1    3875 )      4101            4123
+--   2:     5914          ( 1    5927 )      6149            6171
+--   3:     7965          ( 1    7978 )      8197            -1
+--
+--   - The dp_bsn_align_v2 BSN latency monitor results agree between sim an HW.
+--   - The bf_aligned_latency is exactly equal in sim and on HW, because the
+--     mmp_dp_bsn_align_v2 uses the ref_sync for the BSN monitor and also to
+--     release its BSN aligned output, so the latency only depends on internal
+--     FW buffering and latency.
+--   . The bf_aligned_latency and bf_ring_tx_latency do not depend on cable
+--     delays and are constant when read again in sim or on HW, because they
+--     only depend on fixed internal FW buffering and latency.
+--   - The ring_lane BSN latency monitor results differ between sim and HW, it
+--     is unclear why:
+--     . the ring_rx and ring_tx BSN latency monitor results are about one
+--       block of 1024 larger on HW.
+--     . on the same HW node, the bf_ring_rx_latency is about one block of 1024
+--       larger than the bf_rx_align_latency, even though they are taken at
+--       nearly the same place in the ring_rx signal path.
+--     . on the same HW node, the bf_ring_tx_latency is about one block of 1024
+--       larger than the bf_align_latency, even though they are taken at nearly
+--       the same place in the tx signal path.
+--     . the ring_rx and ring_tx BSN latency monitor results for XST do not
+--       show a one block is 1024 offset.
+--     TODO:
+--     . Assume the ring_lane latencies are one block is 1024 too high, and
+--       assume that the bf_rx_align_latency is correct and reflects the actual
+--       packet latency.
+--     . The ring_rx and ring_tx both use func_ring_nof_hops_to_source_rn() and
+--       hops = sosi.channel to get monitor_sosi, maybe there occurs an offset
+--       there.
+--     . The ring_rx and ring_tx both use dp_demux.vhd, maybe that causes a one
+--       block is 1024 shift in sosi.sync.
 --
 -- Usage:
--- > as 8
+-- > as 3 or more
 -- > run -a
 -------------------------------------------------------------------------------
 
@@ -111,7 +143,8 @@ use work.tb_sdp_pkg.all;
 
 entity tb_sdp_beamformer_remote_ring is
   generic (
-    g_nof_rn  : natural := 4  -- number of nodes in the ring
+    g_nof_rn    : natural := 2;  -- number of nodes in the ring
+    g_nof_sync  : natural := 2
   );
 end tb_sdp_beamformer_remote_ring;
 
@@ -121,9 +154,13 @@ architecture tb of tb_sdp_beamformer_remote_ring is
   constant c_sa_clk_period : time := tech_pll_clk_644_period;  -- 644MHz
 
   -- Apply cable delay in tech_pll_clk_156_period units, to remain aligned with tr_10GbE sim model
-  -- Choose c_cable_delay = 30 * 6.4 ~= 192 ns ~= 38 dp_clk of 5 ns, to match delay seen on HW
+  -- . Choose c_cable_delay = 30 * 6.4 ~= 192 ns ~= 38 dp_clk of 5 ns, to match delay seen on HW
+  -- . Maximum c_cable_delay <= 186 * 6.4 = 1210 ns ~= 242 dp_clk of 5 ns in simulation with
+  --   g_nof_rn = 16. For larger c_cable_delay the bf_sum_sosi.data goes wrong. The maximum
+  --   c_cable_delay depends a little bit on g_nof_rn, for g_nof_rn = 2 the data goes wrong when
+  --   c_cable_delay >= 190.
   constant c_clk_156_period  : time := tech_pll_clk_156_period;  -- 6.400020 ns ~= 156.25 MHz
-  constant c_cable_delay     : time := c_clk_156_period * 30;
+  constant c_cable_delay     : time := c_clk_156_period * 186;
 
   -- BF data
   constant c_block_period              : natural := c_sdp_N_fft;
@@ -131,7 +168,6 @@ architecture tb of tb_sdp_beamformer_remote_ring is
   constant c_gap_size                  : natural := c_block_period - c_block_size;
   -- choose sync interval somewhat longer than maximum BF ring latency
   constant c_nof_blocks_per_sync       : natural := largest(10, (g_nof_rn + 1) * 2);
-  constant c_nof_sync                  : natural := 2;
   constant c_local_bf_re               : integer := 1;
   constant c_local_bf_im               : integer := 2;
 
@@ -155,7 +191,7 @@ architecture tb of tb_sdp_beamformer_remote_ring is
   constant c_sync_timeout              : natural := c_block_period * (c_nof_blocks_per_sync + 1);
 
   -- Timeout tb if there is no output bf_sum_sosi
-  constant c_tb_timeout                : time := (c_nof_sync + 1) * c_sync_timeout * c_dp_clk_period;
+  constant c_tb_timeout                : time := (g_nof_sync + 1) * c_sync_timeout * c_dp_clk_period;
 
   -- Address widths of a single MM instance
   constant c_addr_w_reg_ring_lane_info_bf          : natural := 1;
@@ -248,7 +284,7 @@ begin
   u_stimuli : entity dp_lib.dp_stream_stimuli
   generic map (
     g_sync_period => c_nof_blocks_per_sync,
-    g_nof_repeat  => c_nof_blocks_per_sync * c_nof_sync,
+    g_nof_repeat  => c_nof_blocks_per_sync * g_nof_sync,
     g_pkt_len     => c_block_size,
     g_pkt_gap     => c_gap_size
   )
@@ -268,6 +304,8 @@ begin
     local_bf_sosi.data <= TO_DP_SDATA(0);
     local_bf_sosi.re <= TO_DP_DSP_DATA(c_local_bf_re);
     local_bf_sosi.im <= TO_DP_DSP_DATA(c_local_bf_im);
+    local_bf_sosi.channel <= TO_DP_CHANNEL(0);
+    local_bf_sosi.err <= TO_DP_ERROR(0);
   end process;
 
   bf_bs_sosi <= local_bf_sosi;
@@ -275,6 +313,7 @@ begin
 
   p_mm : process
     variable v_span               : natural;
+    variable v_span_node          : natural;
     variable v_offset             : natural;
     variable v_transport_nof_hops : natural;
   begin
@@ -283,6 +322,9 @@ begin
 
     proc_common_wait_cross_clock_domain_latency(c_mm_clk_period, c_dp_clk_period,
                                                 c_common_cross_clock_domain_latency * 2);
+    ---------------------------------------------------------------------------
+    -- Setup transport nof hops for RN = 0:15 to [1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0]
+    ---------------------------------------------------------------------------
     -- Write FPGA_bf_ring_nof_transport_hops_RW = ring_lane_info.transport_nof_hops
     v_span := 2**c_addr_w_reg_ring_lane_info_bf;
     for RN in 0 to c_last_rn LOOP
@@ -304,10 +346,15 @@ begin
       FPGA_bf_ring_nof_transport_hops_R(RN) <= TO_UINT(reg_ring_lane_info_bf_cipo.rddata(c_word_w - 1 downto 0));
     end loop;
 
+    ---------------------------------------------------------------------------
     -- Wait until second bf_sum_sosi.sync
+    ---------------------------------------------------------------------------
     proc_common_wait_until_hi_lo(dp_clk, bf_sum_sosi.sync);
     proc_common_wait_until_hi_lo(dp_clk, bf_sum_sosi.sync);
+
+    ---------------------------------------------------------------------------
     -- Read BSN monitors
+    ---------------------------------------------------------------------------
     v_span := 2**c_sdp_reg_bsn_monitor_v2_addr_w;
     -- Read FPGA_bf_ring_rx_latency_R
     for RN in 0 to c_last_rn LOOP
@@ -317,12 +364,13 @@ begin
       FPGA_bf_ring_rx_latency_R(RN) <= TO_SINT(reg_bsn_monitor_v2_ring_rx_bf_cipo.rddata(c_word_w - 1 downto 0));
     end loop;
     -- Read FPGA_bf_rx_align_latency_R, for both c_sdp_P_sum = 2 inputs per RN
+    v_span_node := true_log_pow2(c_sdp_P_sum) * v_span;
     for RN in 0 to c_last_rn LOOP
-      for I in 0 to c_sdp_P_sum - 1 loop
-        v_offset := 6 + RN * c_sdp_P_sum * v_span + I * v_span;
+      for P in 0 to c_sdp_P_sum - 1 loop
+        v_offset := 6 + RN * v_span_node + P * v_span;
         proc_mem_mm_bus_rd(v_offset, mm_clk, reg_bsn_monitor_v2_bf_rx_align_cipo, reg_bsn_monitor_v2_bf_rx_align_copi);
         proc_mem_mm_bus_rd_latency(1, mm_clk);
-        FPGA_bf_rx_align_latency_R(RN)(I) <= TO_SINT(reg_bsn_monitor_v2_bf_rx_align_cipo.rddata(c_word_w - 1 downto 0));
+        FPGA_bf_rx_align_latency_R(RN)(P) <= TO_SINT(reg_bsn_monitor_v2_bf_rx_align_cipo.rddata(c_word_w - 1 downto 0));
       end loop;
     end loop;
     -- Read FPGA_bf_aligned_latency_R
@@ -340,20 +388,24 @@ begin
       FPGA_bf_ring_tx_latency_R(RN) <= TO_SINT(reg_bsn_monitor_v2_ring_tx_bf_cipo.rddata(c_word_w - 1 downto 0));
     end loop;
 
+    ---------------------------------------------------------------------------
+    -- Wait until end of simulation
+    ---------------------------------------------------------------------------
     mm_init <= '0';
 
-    -- Wait to stop simulation
     proc_common_wait_until_high(dp_clk, stimuli_end);
     proc_common_wait_some_cycles(dp_clk, 1000);
 
+    ---------------------------------------------------------------------------
     -- Print latency results
+    ---------------------------------------------------------------------------
     print_str("Node:  bf_ring_rx    bf_rx_align          bf_aligned      bf_ring_tx");
     print_str("       _latency:     _latency:            _latency:       _latency:");
     for RN in 0 to c_last_rn loop
        print_str(int_to_str(RN) & ":     " &
                  int_to_str(FPGA_bf_ring_rx_latency_R(RN)) & "          ( " &
-                 int_to_str(FPGA_bf_rx_align_latency_R(RN)(0)) & "      " &
-                 int_to_str(FPGA_bf_rx_align_latency_R(RN)(1)) & " )      " &
+                 int_to_str(FPGA_bf_rx_align_latency_R(RN)(0)) & "    " &
+                 int_to_str(FPGA_bf_rx_align_latency_R(RN)(1)) & " )        " &
                  int_to_str(FPGA_bf_aligned_latency_R(RN)) & "            " &
                  int_to_str(FPGA_bf_ring_tx_latency_R(RN)));
     end Loop;
@@ -362,6 +414,7 @@ begin
     wait;
   end process;
 
+  -- End the tb simulation
   proc_common_timeout_failure(c_tb_timeout, tb_end);  -- ERROR: end simulation if it fails to end in time
   proc_common_stop_simulation(tb_end);  -- OK: end simulation