diff --git a/applications/lofar2/designs/lofar2_unb2b_sdp_station/revisions/lofar2_unb2b_sdp_station_bf/tb_lofar2_unb2b_sdp_station_bf.vhd b/applications/lofar2/designs/lofar2_unb2b_sdp_station/revisions/lofar2_unb2b_sdp_station_bf/tb_lofar2_unb2b_sdp_station_bf.vhd
index 21f703cf9f691ffcbc7f16d9813d6ba0e873ff6b..24bb76e9513ce0d4c81e2494110c1d5b93f66b34 100644
--- a/applications/lofar2/designs/lofar2_unb2b_sdp_station/revisions/lofar2_unb2b_sdp_station_bf/tb_lofar2_unb2b_sdp_station_bf.vhd
+++ b/applications/lofar2/designs/lofar2_unb2b_sdp_station/revisions/lofar2_unb2b_sdp_station_bf/tb_lofar2_unb2b_sdp_station_bf.vhd
@@ -46,6 +46,7 @@
 --   > as 7    # default
 --   > as 12   # for detailed debugging
 --   > run -a  
+--   Takes about 45 m
 --
 -------------------------------------------------------------------------------
 LIBRARY IEEE, common_lib, unb2b_board_lib, i2c_lib, mm_lib, dp_lib, diag_lib, lofar2_sdp_lib, wpfb_lib, tech_pll_lib, tr_10GbE_lib, lofar2_unb2b_sdp_station_lib;
@@ -87,6 +88,7 @@ ARCHITECTURE tb OF tb_lofar2_unb2b_sdp_station_bf IS
   CONSTANT c_nof_clk_per_sync    : NATURAL := c_nof_block_per_sync*c_sdp_N_fft; 
   CONSTANT c_pps_period          : NATURAL := c_nof_clk_per_sync;
   CONSTANT c_wpfb_sim            : t_wpfb := func_wpfb_set_nof_block_per_sync(c_sdp_wpfb_subbands, c_nof_block_per_sync);
+  CONSTANT c_stat_data_sz        : NATURAL := c_longword_sz/c_word_sz;  -- = 2
    
   CONSTANT c_percentage          : REAL := 0.05;  -- percentage that actual value may differ from expected value
   CONSTANT c_lo_factor           : REAL := 1.0 - c_percentage;  -- lower boundary  
@@ -109,7 +111,8 @@ ARCHITECTURE tb OF tb_lofar2_unb2b_sdp_station_bf IS
   CONSTANT c_exp_beamlet_power_sp_0         : REAL := c_exp_wg_power_sp_0 * c_exp_sp_beamlet_power_ratio;
 
   TYPE t_real_arr IS ARRAY (INTEGER RANGE <>) OF REAL; 
-  TyPE t_slv_64_subbands_arr IS ARRAY (INTEGER RANGE <>) OF t_slv_64_arr(0 TO c_sdp_S_sub_bf);
+  TYPE t_slv_64_subbands_arr IS ARRAY (INTEGER RANGE <>) OF t_slv_64_arr(0 TO c_sdp_N_sub);
+  TYPE t_slv_64_beamlets_arr IS ARRAY (INTEGER RANGE <>) OF t_slv_64_arr(0 TO c_sdp_S_sub_bf);
 
   -- MM  
   CONSTANT c_mm_file_reg_ppsh             : STRING := mmf_unb_file_prefix(c_unb_nr, c_node_nr) & "PIO_PPS";
@@ -127,31 +130,36 @@ ARCHITECTURE tb OF tb_lofar2_unb2b_sdp_station_bf IS
   SIGNAL rd_data             : STD_LOGIC_VECTOR(c_32-1 DOWNTO 0);
 
   -- WG
-  SIGNAL current_bsn_wg          : STD_LOGIC_VECTOR(c_dp_stream_bsn_w-1 DOWNTO 0);
+  SIGNAL current_bsn_wg                 : STD_LOGIC_VECTOR(c_dp_stream_bsn_w-1 DOWNTO 0);
 
   -- WPFB
-  SIGNAL sp_subband_powers_arr2         : t_slv_64_subbands_arr(c_sdp_N_beamsets*c_sdp_N_pol-1 DOWNTO 0);   -- [sp][sub]
+  SIGNAL sp_subband_powers_arr2         : t_slv_64_subbands_arr(c_sdp_N_pol-1 DOWNTO 0);   -- [sp][sub]
 
-  SIGNAL sp_beamlet_powers_arr2         : t_slv_64_subbands_arr(c_sdp_N_beamsets*c_sdp_N_pol-1 DOWNTO 0);   -- [sp][sub]
+  SIGNAL sp_beamlet_powers_arr2         : t_slv_64_beamlets_arr(c_sdp_N_beamsets*c_sdp_N_pol_bf-1 DOWNTO 0);   -- [sp][sub]
   SIGNAL sp_beamlet_power_0             : REAL;
-  SIGNAL sp_beamlet_power_sum           : t_real_arr(c_sdp_N_beamsets*c_sdp_N_pol-1 DOWNTO 0) := (OTHERS=>0.0);
+  SIGNAL sp_beamlet_power_sum           : t_real_arr(c_sdp_N_beamsets*c_sdp_N_pol_bf-1 DOWNTO 0) := (OTHERS=>0.0);
   SIGNAL sp_beamlet_power_sum_0         : REAL;
   SIGNAL sp_beamlet_power_ratio_0       : REAL;
   SIGNAL sp_beamlet_power_sum_ratio_0   : REAL;
   SIGNAL sp_beamlet_power_leakage_sum_0 : REAL;
 
   -- 10GbE
-  CONSTANT c_exp_beamlet_index : NATURAL := NATURAL(c_subband_sp_0) * c_sdp_N_pol;
-  CONSTANT c_exp_beamlet_re : STD_LOGIC_VECTOR(7 DOWNTO 0) := x"81"; --Derived from simulation
-  CONSTANT c_exp_beamlet_im : STD_LOGIC_VECTOR(7 DOWNTO 0) := x"7F"; --Derived from simulation
+  CONSTANT c_exp_beamlet_index : NATURAL := NATURAL(c_subband_sp_0) * c_sdp_N_pol_bf;
+  CONSTANT c_exp_beamlet_re    : STD_LOGIC_VECTOR(7 DOWNTO 0) := x"81"; -- = -127, derived from simulation
+  CONSTANT c_exp_beamlet_im    : STD_LOGIC_VECTOR(7 DOWNTO 0) := x"7F"; -- = +127, derived from simulation
 
-  SIGNAL beamlet_arr2_re : t_slv_8_arr(c_sdp_cep_nof_beamlets_per_block-1 DOWNTO 0);
-  SIGNAL beamlet_arr2_im : t_slv_8_arr(c_sdp_cep_nof_beamlets_per_block-1 DOWNTO 0);
+  SIGNAL dbg_beamlet_arr_re  : t_slv_8_arr(c_sdp_cep_nof_blocks_per_packet-1 DOWNTO 0);   -- [3:0]
+  SIGNAL dbg_beamlet_arr_im  : t_slv_8_arr(c_sdp_cep_nof_blocks_per_packet-1 DOWNTO 0);   -- [3:0]
+  SIGNAL dbg_beamlet_cnt     : NATURAL;
+  SIGNAL dbg_beamlet_valid   : STD_LOGIC;
 
-  SIGNAL tr_10GbE_src_out       : t_dp_sosi;
-  SIGNAL tr_ref_clk_312         : STD_LOGIC := '0';
-  SIGNAL tr_ref_clk_156         : STD_LOGIC := '0';
-  SIGNAL tr_ref_rst_156         : STD_LOGIC := '0';
+  SIGNAL beamlet_arr2_re     : t_slv_8_arr(c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf-1 DOWNTO 0);  -- [488 * 2-1:0] = [975:0]
+  SIGNAL beamlet_arr2_im     : t_slv_8_arr(c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf-1 DOWNTO 0);  -- [488 * 2-1:0] = [975:0]
+
+  SIGNAL tr_10GbE_src_out    : t_dp_sosi;
+  SIGNAL tr_ref_clk_312      : STD_LOGIC := '0';
+  SIGNAL tr_ref_clk_156      : STD_LOGIC := '0';
+  SIGNAL tr_ref_rst_156      : STD_LOGIC := '0';
 
   -- DUT
   SIGNAL ext_clk             : STD_LOGIC := '0';
@@ -192,7 +200,6 @@ BEGIN
   eth_clk <= NOT eth_clk AFTER c_eth_clk_period/2;  -- Ethernet ref clock (125 MHz)
   JESD204B_REFCLK <= NOT JESD204B_REFCLK AFTER c_bck_ref_clk_period/2;  -- JESD sample clock (200MHz) 
   SA_CLK <= NOT SA_CLK AFTER c_sa_clk_period/2; -- Serial Gigabit IO sa clock (644 MHz)
-  pps_rst <= '0' AFTER c_ext_clk_period*2;
 
   INTA <= 'H';  -- pull up
   INTB <= 'H';  -- pull up
@@ -316,13 +323,12 @@ BEGIN
     VARIABLE v_sp_beamlet_power      : REAL;
     VARIABLE v_sp_subband_power      : REAL;
     VARIABLE v_W, v_T, v_U, v_S, v_B : NATURAL;  -- array indicies
+    VARIABLE v_re, v_im              : INTEGER;
+    VARIABLE v_re_exp, v_im_exp      : INTEGER;
   BEGIN
     -- Wait for DUT power up after reset
     WAIT FOR 1 us;
-    
-    proc_common_wait_until_hi_lo(ext_clk, ext_pps);
 
- 
     ----------------------------------------------------------------------------
     -- Enable UDP offload (dp_xonoff) of beamset 0
     ----------------------------------------------------------------------------
@@ -332,11 +338,15 @@ BEGIN
     ----------------------------------------------------------------------------
     -- Enable BS
     ----------------------------------------------------------------------------
-    mmf_mm_bus_wr(c_mm_file_reg_bsn_source_v2, 3,                    0, tb_clk);
-    mmf_mm_bus_wr(c_mm_file_reg_bsn_source_v2, 2,                    0, tb_clk);  -- Init BSN = 0
+    mmf_mm_bus_wr(c_mm_file_reg_bsn_source_v2, 3,                  0, tb_clk);
+    mmf_mm_bus_wr(c_mm_file_reg_bsn_source_v2, 2,                  0, tb_clk);  -- Init BSN = 0
     mmf_mm_bus_wr(c_mm_file_reg_bsn_source_v2, 1, c_nof_clk_per_sync, tb_clk);  -- nof_block_per_sync
-    mmf_mm_bus_wr(c_mm_file_reg_bsn_source_v2, 0,         16#00000003#, tb_clk);  -- Enable BS at PPS
+    mmf_mm_bus_wr(c_mm_file_reg_bsn_source_v2, 0,       16#00000003#, tb_clk);  -- Enable BS at PPS
     
+    -- Release PPS pulser, to get first PPS now and to start BSN source
+    WAIT FOR 1 us;
+    pps_rst <= '0';
+
     ----------------------------------------------------------------------------
     -- Enable WG
     ----------------------------------------------------------------------------
@@ -372,17 +382,18 @@ BEGIN
     ---------------------------------------------------------------------------   
     -- . the subband statistics are c_wpfb_sim.stat_data_sz = 2 word power values.
     -- . there are c_sdp_N_sub = 512 subbands per signal path
-    -- . one complex WPFB can process two real inputs A, B
+    -- . one complex WPFB can process two real inputs A, B, is c_sdp_Q_fft = c_sdp_N_pol = 2
     -- . the subbands are output alternately so A0 B0 A1 B1 ... A511 B511 for input A, B
     -- . the subband statistics multiple WPFB units appear in order in the ram_st_sst address map
     -- . the subband statistics are stored first lo word 0 then hi word 1
-    
-    FOR I IN 0 TO c_sdp_N_pol*c_sdp_S_sub_bf*(c_longword_sz/c_word_sz)-1 LOOP
-      v_W := I MOD (c_longword_sz/c_word_sz);
-      v_T := (I / (c_longword_sz/c_word_sz)) MOD c_sdp_N_pol;
-      v_U := I / (c_sdp_N_pol*(c_longword_sz/c_word_sz)*c_sdp_S_sub_bf);
+
+    FOR I IN 0 TO c_sdp_N_pol*c_sdp_N_sub*c_stat_data_sz-1 LOOP  -- 2048 = 2 * 512 * 64/32
+      v_W := I MOD c_stat_data_sz;                               -- 0, 1 per statistics word
+      v_T := (I / c_stat_data_sz) MOD c_sdp_N_pol;               -- 0, 1 per pol
+      v_U := I / (c_sdp_N_pol*c_stat_data_sz*c_sdp_N_sub);       -- / 2048
       v_S := v_T + v_U * c_sdp_N_pol;
-      v_B := (I / (c_sdp_N_pol*(c_longword_sz/c_word_sz))) MOD c_sdp_S_sub_bf;
+      v_B := (I / (c_sdp_N_pol*c_stat_data_sz)) MOD c_sdp_N_sub; -- 0:511 per dual pol
+      -- Only read sp 0, pol 0 (v_S = 0)
       IF v_S=0 THEN
         IF v_W=0 THEN
           -- low part
@@ -399,27 +410,27 @@ BEGIN
     ---------------------------------------------------------------------------
     -- Read beamlet statistics
     ---------------------------------------------------------------------------
-    -- . the beamlet statistics are (c_longword_sz/c_word_sz) = 2 word power values.
+    -- . the beamlet statistics are c_stat_data_sz = 2 word power values.
     -- . there are c_sdp_S_sub_bf = 488 subbands per signal path
     -- . the subbands are output alternately so A0 B0 A1 B1 ... A5487 B487 for input A, B
     -- . the subband statistics multiple units appear in order in the ram_st_bst address map
     -- . the subband statistics are stored first lo word 0 then hi word 1
-    -- . Only read beamset 0, pol 0
-    FOR I IN 0 TO c_sdp_N_pol*c_sdp_S_sub_bf*(c_longword_sz/c_word_sz)-1 LOOP
-      v_W := I MOD (c_longword_sz/c_word_sz);
-      v_T := (I / (c_longword_sz/c_word_sz)) MOD c_sdp_N_pol;
-      v_U := I / (c_sdp_N_pol*(c_longword_sz/c_word_sz)*c_sdp_S_sub_bf);
-      v_S := v_T + v_U * c_sdp_N_pol;
-      v_B := (I / (c_sdp_N_pol*(c_longword_sz/c_word_sz))) MOD c_sdp_S_sub_bf;
+    FOR I IN 0 TO c_sdp_N_pol_bf*c_sdp_S_sub_bf*c_stat_data_sz-1 LOOP
+      v_W := I MOD c_stat_data_sz;
+      v_T := (I / c_stat_data_sz) MOD c_sdp_N_pol_bf;
+      v_U := I / (c_sdp_N_pol_bf*c_stat_data_sz*c_sdp_S_sub_bf);
+      v_S := v_T + v_U * c_sdp_N_pol_bf;
+      v_B := (I / (c_sdp_N_pol_bf*c_stat_data_sz)) MOD c_sdp_S_sub_bf;
+      -- Only read beamset 0, pol 0 (v_S = 0)
       IF v_S=0 THEN
         IF v_W=0 THEN
           -- low part
-          --mmf_mm_bus_rd(c_mm_file_ram_st_bst, I+(c_sdp_N_pol*c_sdp_N_sub*(c_longword_sz/c_word_sz)), rd_data, tb_clk);
+          --mmf_mm_bus_rd(c_mm_file_ram_st_bst, I+(c_sdp_N_pol_bf*c_sdp_N_sub*c_stat_data_sz), rd_data, tb_clk);
           mmf_mm_bus_rd(c_mm_file_ram_st_bst, I, rd_data, tb_clk);
           sp_beamlet_powers_arr2(v_S)(v_B)(31 DOWNTO 0) <= rd_data;
         ELSE      
           -- high part
-          --mmf_mm_bus_rd(c_mm_file_ram_st_bst, I+(c_sdp_N_pol*c_sdp_N_sub*(c_longword_sz/c_word_sz)), rd_data, tb_clk);
+          --mmf_mm_bus_rd(c_mm_file_ram_st_bst, I+(c_sdp_N_pol_bf*c_sdp_N_sub*c_stat_data_sz), rd_data, tb_clk);
           mmf_mm_bus_rd(c_mm_file_ram_st_bst, I, rd_data, tb_clk);
           sp_beamlet_powers_arr2(v_S)(v_B)(63 DOWNTO 32) <= rd_data;
   
@@ -436,47 +447,49 @@ BEGIN
     -- sp_beamlet_power_sum is the sum of all subband powers per SP, this value will be close to sp_beamlet_power
     -- because the input is a sinus, so most power will be in 1 subband. The sp_beamlet_power_leakage_sum shows
     -- how much power from the input sinus at a specific subband has leaked into the 511 other subbands.
-    sp_beamlet_power_0 <= REAL(TO_UINT(sp_beamlet_powers_arr2(0)(INTEGER(ROUND(c_subband_sp_0)))(61 DOWNTO 30)))*2.0**30 + 
+    sp_beamlet_power_0 <=
+        REAL(TO_UINT(sp_beamlet_powers_arr2(0)(INTEGER(ROUND(c_subband_sp_0)))(61 DOWNTO 30)))*2.0**30 +
         REAL(TO_UINT(sp_beamlet_powers_arr2(0)(INTEGER(ROUND(c_subband_sp_0)))(29 DOWNTO 0)));
 
     sp_beamlet_power_sum_0 <= sp_beamlet_power_sum(0);
     
     proc_common_wait_some_cycles(tb_clk, 1);
 
- 
     ---------------------------------------------------------------------------
     -- Read 10GbE Stream
     ---------------------------------------------------------------------------
+    -- Wait until start of a beamlet packet, capture only first block in packet
     proc_common_wait_until_high(ext_clk, tr_10GbE_src_out.sop);
     FOR I IN 0 TO 8 LOOP -- Packet header is 9.25 words wide, which can be discarded
       proc_common_wait_until_high(ext_clk, tr_10GbE_src_out.valid);
       proc_common_wait_some_cycles(ext_clk, 1);
     END LOOP;
   
-    -- First word contains 3 beamlets + 1 header part
-    beamlet_arr2_re(0) <= tr_10GbE_src_out.data(7 DOWNTO 0);
+    -- First word contains 1.5 dual pol (= XY, X) beamlets + 1 header part
+    beamlet_arr2_re(0) <= tr_10GbE_src_out.data(7 DOWNTO 0);    -- X
     beamlet_arr2_im(0) <= tr_10GbE_src_out.data(15 DOWNTO 8);
-    beamlet_arr2_re(1) <= tr_10GbE_src_out.data(23 DOWNTO 16);
+    beamlet_arr2_re(1) <= tr_10GbE_src_out.data(23 DOWNTO 16);  -- Y
     beamlet_arr2_im(1) <= tr_10GbE_src_out.data(31 DOWNTO 24);
-    beamlet_arr2_re(2) <= tr_10GbE_src_out.data(39 DOWNTO 32);
+    beamlet_arr2_re(2) <= tr_10GbE_src_out.data(39 DOWNTO 32);  -- X
     beamlet_arr2_im(2) <= tr_10GbE_src_out.data(47 DOWNTO 40);
     proc_common_wait_until_high(ext_clk, tr_10GbE_src_out.valid);
     proc_common_wait_some_cycles(ext_clk, 1);
-    FOR I IN 1 TO (c_sdp_cep_nof_beamlets_per_block/4)-1 LOOP
-      beamlet_arr2_re(I*4 -1) <= tr_10GbE_src_out.data(7 DOWNTO 0);
+    -- 2 dual pol beamlets (= Y, XY, X) /64b data word
+    FOR I IN 1 TO (c_sdp_cep_nof_beamlets_per_block/2)-1 LOOP
+      beamlet_arr2_re(I*4 -1) <= tr_10GbE_src_out.data(7 DOWNTO 0);    -- Y
       beamlet_arr2_im(I*4 -1) <= tr_10GbE_src_out.data(15 DOWNTO 8);
-      beamlet_arr2_re(I*4 +0) <= tr_10GbE_src_out.data(23 DOWNTO 16);
+      beamlet_arr2_re(I*4 +0) <= tr_10GbE_src_out.data(23 DOWNTO 16);  -- X
       beamlet_arr2_im(I*4 +0) <= tr_10GbE_src_out.data(31 DOWNTO 24);
-      beamlet_arr2_re(I*4 +1) <= tr_10GbE_src_out.data(39 DOWNTO 32);
+      beamlet_arr2_re(I*4 +1) <= tr_10GbE_src_out.data(39 DOWNTO 32);  -- Y
       beamlet_arr2_im(I*4 +1) <= tr_10GbE_src_out.data(47 DOWNTO 40);
-      beamlet_arr2_re(I*4 +2) <= tr_10GbE_src_out.data(55 DOWNTO 48);
+      beamlet_arr2_re(I*4 +2) <= tr_10GbE_src_out.data(55 DOWNTO 48);  -- X
       beamlet_arr2_im(I*4 +2) <= tr_10GbE_src_out.data(63 DOWNTO 56);
       proc_common_wait_until_high(ext_clk, tr_10GbE_src_out.valid);
       proc_common_wait_some_cycles(ext_clk, 1);
     END LOOP;
-
-    beamlet_arr2_re(c_sdp_cep_nof_beamlets_per_block-1) <= tr_10GbE_src_out.data(55 DOWNTO 48);
-    beamlet_arr2_im(c_sdp_cep_nof_beamlets_per_block-1) <= tr_10GbE_src_out.data(63 DOWNTO 56);
+    -- Last word contains last 0.5 (= Y) dual pol beamlet
+    beamlet_arr2_re(c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf-1) <= tr_10GbE_src_out.data(55 DOWNTO 48);  -- Y
+    beamlet_arr2_im(c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf-1) <= tr_10GbE_src_out.data(63 DOWNTO 56);
 
     ---------------------------------------------------------------------------
     -- Verify subband statistics
@@ -522,8 +535,10 @@ BEGIN
     ---------------------------------------------------------------------------
     -- Verify 10GbE UDP offload
     --------------------------------------------------------------------------- 
-    ASSERT beamlet_arr2_re(c_exp_beamlet_index) = c_exp_beamlet_re REPORT "Wrong 10GbE output (re)" SEVERITY ERROR;
-    ASSERT beamlet_arr2_im(c_exp_beamlet_index) = c_exp_beamlet_im REPORT "Wrong 10GbE output (im)" SEVERITY ERROR;
+    v_re := TO_SINT(beamlet_arr2_re(c_exp_beamlet_index));  v_re_exp := TO_SINT(c_exp_beamlet_re);
+    v_im := TO_SINT(beamlet_arr2_im(c_exp_beamlet_index));  v_im_exp := TO_SINT(c_exp_beamlet_im);
+    ASSERT v_re = v_re_exp REPORT "Wrong 10GbE output (re) " & INTEGER'IMAGE(v_re) & " != " & INTEGER'IMAGE(v_re_exp) SEVERITY ERROR;
+    ASSERT v_im = v_im_exp REPORT "Wrong 10GbE output (im) " & INTEGER'IMAGE(v_re) & " != " & INTEGER'IMAGE(v_re_exp) SEVERITY ERROR;
 
     ---------------------------------------------------------------------------
     -- End Simulation 
@@ -534,4 +549,66 @@ BEGIN
     WAIT;
   END PROCESS;
 
+  -----------------------------------------------------------------------------
+  -- Debugging signals
+  -----------------------------------------------------------------------------
+  -- Show received beamlets from 10GbE stream in Wave Window
+  -- . expect c_nof_block_per_sync / c_sdp_cep_nof_blocks_per_packet * c_sdp_N_beamsets
+  --   = 16 / 4 * 2 = 4 * 2 = 8 packets per sync interval
+  -- . expect c_sdp_cep_nof_beamlets_per_block = c_sdp_S_sub_bf = 488 dual pol
+  --   and complex beamlets per packet, so 2 dual pol beamlets/64b data word.
+  p_dbg_10GbE_beamlets : PROCESS
+  BEGIN
+    -- Wait until start of (next) beamlet packet
+    dbg_beamlet_cnt   <= 0;
+    dbg_beamlet_valid <= '0';
+    proc_common_wait_until_high(ext_clk, tr_10GbE_src_out.sop);
+    FOR I IN 0 TO 8 LOOP -- Packet header is 9.25 words wide, which can be discarded
+      proc_common_wait_until_high(ext_clk, tr_10GbE_src_out.valid);
+      -- Use at least one WAIT instead of proc_common_wait_some_cycles() to avoid Modelsim warning:
+      -- (vcom-1090) Possible infinite loop: Process contains no WAIT statement.
+      WAIT UNTIL rising_edge(ext_clk);
+    END LOOP;
+
+    -- First word contains 1.5 dual pol (= XY, X) beamlets + 1 header part
+    dbg_beamlet_arr_re(0) <= tr_10GbE_src_out.data(7 DOWNTO 0);    -- X
+    dbg_beamlet_arr_im(0) <= tr_10GbE_src_out.data(15 DOWNTO 8);
+    dbg_beamlet_arr_re(1) <= tr_10GbE_src_out.data(23 DOWNTO 16);  -- Y
+    dbg_beamlet_arr_im(1) <= tr_10GbE_src_out.data(31 DOWNTO 24);
+    dbg_beamlet_arr_re(2) <= tr_10GbE_src_out.data(39 DOWNTO 32);  -- X
+    dbg_beamlet_arr_im(2) <= tr_10GbE_src_out.data(47 DOWNTO 40);
+    dbg_beamlet_arr_re(3) <= (OTHERS => '1');  -- mark unused octet by 0xFF = -1
+    dbg_beamlet_arr_im(3) <= (OTHERS => '1');  -- mark unused octet by 0xFF = -1
+    dbg_beamlet_cnt   <= dbg_beamlet_cnt + 3;
+    dbg_beamlet_valid <= '1';
+    proc_common_wait_until_high(ext_clk, tr_10GbE_src_out.valid);
+    proc_common_wait_some_cycles(ext_clk, 1);
+    -- 2 dual pol beamlets (= Y, XY, X) /64b data word
+    FOR I IN 1 TO (c_sdp_cep_nof_beamlets_per_block*c_sdp_cep_nof_blocks_per_packet/2)-1 LOOP
+      dbg_beamlet_arr_re(0) <= tr_10GbE_src_out.data(7 DOWNTO 0);    -- Y
+      dbg_beamlet_arr_im(0) <= tr_10GbE_src_out.data(15 DOWNTO 8);
+      dbg_beamlet_arr_re(1) <= tr_10GbE_src_out.data(23 DOWNTO 16);  -- X
+      dbg_beamlet_arr_im(1) <= tr_10GbE_src_out.data(31 DOWNTO 24);
+      dbg_beamlet_arr_re(2) <= tr_10GbE_src_out.data(39 DOWNTO 32);  -- Y
+      dbg_beamlet_arr_im(2) <= tr_10GbE_src_out.data(47 DOWNTO 40);
+      dbg_beamlet_arr_re(3) <= tr_10GbE_src_out.data(55 DOWNTO 48);  -- X
+      dbg_beamlet_arr_im(3) <= tr_10GbE_src_out.data(63 DOWNTO 56);
+      dbg_beamlet_cnt   <= (dbg_beamlet_cnt + 4) MOD c_sdp_cep_nof_beamlets_per_block;  -- 4 blocks/packet
+      dbg_beamlet_valid <= '1';
+      proc_common_wait_until_high(ext_clk, tr_10GbE_src_out.valid);
+      proc_common_wait_some_cycles(ext_clk, 1);
+    END LOOP;
+    -- Last word contains last 0.5 (= Y) dual pol beamlet
+    dbg_beamlet_arr_re(0) <= tr_10GbE_src_out.data(55 DOWNTO 48);  -- Y
+    dbg_beamlet_arr_im(0) <= tr_10GbE_src_out.data(63 DOWNTO 56);
+    dbg_beamlet_arr_re(1) <= (OTHERS => '1');  -- mark unused octet by 0xFF = -1
+    dbg_beamlet_arr_im(1) <= (OTHERS => '1');  -- mark unused octet by 0xFF = -1
+    dbg_beamlet_arr_re(2) <= (OTHERS => '1');  -- mark unused octet by 0xFF = -1
+    dbg_beamlet_arr_im(2) <= (OTHERS => '1');  -- mark unused octet by 0xFF = -1
+    dbg_beamlet_arr_re(3) <= (OTHERS => '1');  -- mark unused octet by 0xFF = -1
+    dbg_beamlet_arr_im(3) <= (OTHERS => '1');  -- mark unused octet by 0xFF = -1
+    dbg_beamlet_cnt   <= dbg_beamlet_cnt + 1;
+    dbg_beamlet_valid <= '1';
+  END PROCESS;
+
 END tb;
diff --git a/applications/lofar2/designs/lofar2_unb2b_sdp_station/revisions/lofar2_unb2b_sdp_station_bf/tb_lofar2_unb2b_sdp_station_bf_bst_offload.vhd b/applications/lofar2/designs/lofar2_unb2b_sdp_station/revisions/lofar2_unb2b_sdp_station_bf/tb_lofar2_unb2b_sdp_station_bf_bst_offload.vhd
index a074912f158de3a06665064efad16d4bc15d5e06..c3cd741d7fb6f3a0ed5ecdaa41b34cab956ef939 100644
--- a/applications/lofar2/designs/lofar2_unb2b_sdp_station/revisions/lofar2_unb2b_sdp_station_bf/tb_lofar2_unb2b_sdp_station_bf_bst_offload.vhd
+++ b/applications/lofar2/designs/lofar2_unb2b_sdp_station/revisions/lofar2_unb2b_sdp_station_bf/tb_lofar2_unb2b_sdp_station_bf_bst_offload.vhd
@@ -35,6 +35,7 @@
 --   > as 7    # default
 --   > as 12   # for detailed debugging
 --   > run -a  
+--   Takes about 10 m
 --
 -------------------------------------------------------------------------------
 LIBRARY IEEE, common_lib, unb2b_board_lib, i2c_lib, mm_lib, dp_lib, diag_lib, lofar2_sdp_lib, wpfb_lib, lofar2_unb2b_sdp_station_lib, eth_lib;
diff --git a/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_lofar2_unb2c_sdp_station_bf.vhd b/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_lofar2_unb2c_sdp_station_bf.vhd
index 4cdb49bef0859e4a0806939659c6fa8b1b179e26..be921d69725e723ca1cc8c42cd2cfb3c6f22f0b5 100644
--- a/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_lofar2_unb2c_sdp_station_bf.vhd
+++ b/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_lofar2_unb2c_sdp_station_bf.vhd
@@ -46,6 +46,7 @@
 --   > as 7    # default
 --   > as 12   # for detailed debugging
 --   > run -a  
+--   Takes about 45 m
 --
 -------------------------------------------------------------------------------
 LIBRARY IEEE, common_lib, unb2c_board_lib, i2c_lib, mm_lib, dp_lib, diag_lib, lofar2_sdp_lib, wpfb_lib, tech_pll_lib, tr_10GbE_lib, lofar2_unb2c_sdp_station_lib;
@@ -87,7 +88,8 @@ ARCHITECTURE tb OF tb_lofar2_unb2c_sdp_station_bf IS
   CONSTANT c_nof_clk_per_sync    : NATURAL := c_nof_block_per_sync*c_sdp_N_fft; 
   CONSTANT c_pps_period          : NATURAL := c_nof_clk_per_sync;
   CONSTANT c_wpfb_sim            : t_wpfb := func_wpfb_set_nof_block_per_sync(c_sdp_wpfb_subbands, c_nof_block_per_sync);
-   
+  CONSTANT c_stat_data_sz        : NATURAL := c_longword_sz/c_word_sz;  -- = 2
+
   CONSTANT c_percentage          : REAL := 0.05;  -- percentage that actual value may differ from expected value
   CONSTANT c_lo_factor           : REAL := 1.0 - c_percentage;  -- lower boundary  
   CONSTANT c_hi_factor           : REAL := 1.0 + c_percentage;  -- higher boundary
@@ -109,7 +111,8 @@ ARCHITECTURE tb OF tb_lofar2_unb2c_sdp_station_bf IS
   CONSTANT c_exp_beamlet_power_sp_0         : REAL := c_exp_wg_power_sp_0 * c_exp_sp_beamlet_power_ratio;
 
   TYPE t_real_arr IS ARRAY (INTEGER RANGE <>) OF REAL; 
-  TyPE t_slv_64_subbands_arr IS ARRAY (INTEGER RANGE <>) OF t_slv_64_arr(0 TO c_sdp_S_sub_bf);
+  TYPE t_slv_64_subbands_arr IS ARRAY (INTEGER RANGE <>) OF t_slv_64_arr(0 TO c_sdp_N_sub);
+  TYPE t_slv_64_beamlets_arr IS ARRAY (INTEGER RANGE <>) OF t_slv_64_arr(0 TO c_sdp_S_sub_bf);
 
   -- MM  
   CONSTANT c_mm_file_reg_ppsh             : STRING := mmf_unb_file_prefix(c_unb_nr, c_node_nr) & "PIO_PPS";
@@ -127,31 +130,36 @@ ARCHITECTURE tb OF tb_lofar2_unb2c_sdp_station_bf IS
   SIGNAL rd_data             : STD_LOGIC_VECTOR(c_32-1 DOWNTO 0);
 
   -- WG
-  SIGNAL current_bsn_wg          : STD_LOGIC_VECTOR(c_dp_stream_bsn_w-1 DOWNTO 0);
+  SIGNAL current_bsn_wg                 : STD_LOGIC_VECTOR(c_dp_stream_bsn_w-1 DOWNTO 0);
 
   -- WPFB
-  SIGNAL sp_subband_powers_arr2         : t_slv_64_subbands_arr(c_sdp_N_beamsets*c_sdp_N_pol-1 DOWNTO 0);   -- [sp][sub]
+  SIGNAL sp_subband_powers_arr2         : t_slv_64_subbands_arr(c_sdp_N_pol-1 DOWNTO 0);   -- [sp][sub]
 
-  SIGNAL sp_beamlet_powers_arr2         : t_slv_64_subbands_arr(c_sdp_N_beamsets*c_sdp_N_pol-1 DOWNTO 0);   -- [sp][sub]
+  SIGNAL sp_beamlet_powers_arr2         : t_slv_64_beamlets_arr(c_sdp_N_beamsets*c_sdp_N_pol_bf-1 DOWNTO 0);   -- [sp][sub]
   SIGNAL sp_beamlet_power_0             : REAL;
-  SIGNAL sp_beamlet_power_sum           : t_real_arr(c_sdp_N_beamsets*c_sdp_N_pol-1 DOWNTO 0) := (OTHERS=>0.0);
+  SIGNAL sp_beamlet_power_sum           : t_real_arr(c_sdp_N_beamsets*c_sdp_N_pol_bf-1 DOWNTO 0) := (OTHERS=>0.0);
   SIGNAL sp_beamlet_power_sum_0         : REAL;
   SIGNAL sp_beamlet_power_ratio_0       : REAL;
   SIGNAL sp_beamlet_power_sum_ratio_0   : REAL;
   SIGNAL sp_beamlet_power_leakage_sum_0 : REAL;
 
   -- 10GbE
-  CONSTANT c_exp_beamlet_index : NATURAL := NATURAL(c_subband_sp_0) * c_sdp_N_pol;
-  CONSTANT c_exp_beamlet_re : STD_LOGIC_VECTOR(7 DOWNTO 0) := x"81"; --Derived from simulation
-  CONSTANT c_exp_beamlet_im : STD_LOGIC_VECTOR(7 DOWNTO 0) := x"7F"; --Derived from simulation
+  CONSTANT c_exp_beamlet_index : NATURAL := NATURAL(c_subband_sp_0) * c_sdp_N_pol_bf;
+  CONSTANT c_exp_beamlet_re    : STD_LOGIC_VECTOR(7 DOWNTO 0) := x"81"; -- = -127, derived from simulation
+  CONSTANT c_exp_beamlet_im    : STD_LOGIC_VECTOR(7 DOWNTO 0) := x"7F"; -- = +127, derived from simulation
 
-  SIGNAL beamlet_arr2_re : t_slv_8_arr(c_sdp_cep_nof_beamlets_per_block-1 DOWNTO 0);
-  SIGNAL beamlet_arr2_im : t_slv_8_arr(c_sdp_cep_nof_beamlets_per_block-1 DOWNTO 0);
+  SIGNAL dbg_beamlet_arr_re  : t_slv_8_arr(c_sdp_cep_nof_blocks_per_packet-1 DOWNTO 0);   -- [3:0]
+  SIGNAL dbg_beamlet_arr_im  : t_slv_8_arr(c_sdp_cep_nof_blocks_per_packet-1 DOWNTO 0);   -- [3:0]
+  SIGNAL dbg_beamlet_cnt     : NATURAL;
+  SIGNAL dbg_beamlet_valid   : STD_LOGIC;
 
-  SIGNAL tr_10GbE_src_out       : t_dp_sosi;
-  SIGNAL tr_ref_clk_312         : STD_LOGIC := '0';
-  SIGNAL tr_ref_clk_156         : STD_LOGIC := '0';
-  SIGNAL tr_ref_rst_156         : STD_LOGIC := '0';
+  SIGNAL beamlet_arr2_re     : t_slv_8_arr(c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf-1 DOWNTO 0);  -- [488 * 2-1:0] = [975:0]
+  SIGNAL beamlet_arr2_im     : t_slv_8_arr(c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf-1 DOWNTO 0);  -- [488 * 2-1:0] = [975:0]
+
+  SIGNAL tr_10GbE_src_out    : t_dp_sosi;
+  SIGNAL tr_ref_clk_312      : STD_LOGIC := '0';
+  SIGNAL tr_ref_clk_156      : STD_LOGIC := '0';
+  SIGNAL tr_ref_rst_156      : STD_LOGIC := '0';
 
   -- DUT
   SIGNAL ext_clk             : STD_LOGIC := '0';
@@ -298,13 +306,12 @@ BEGIN
     VARIABLE v_sp_beamlet_power      : REAL;
     VARIABLE v_sp_subband_power      : REAL;
     VARIABLE v_W, v_T, v_U, v_S, v_B : NATURAL;  -- array indicies
+    VARIABLE v_re, v_im              : INTEGER;
+    VARIABLE v_re_exp, v_im_exp      : INTEGER;
   BEGIN
     -- Wait for DUT power up after reset
     WAIT FOR 1 us;
     
-    proc_common_wait_until_hi_lo(ext_clk, ext_pps);
-
- 
     ----------------------------------------------------------------------------
     -- Enable UDP offload (dp_xonoff) of beamset 0
     ----------------------------------------------------------------------------
@@ -319,6 +326,10 @@ BEGIN
     mmf_mm_bus_wr(c_mm_file_reg_bsn_source_v2, 1, c_nof_clk_per_sync, tb_clk);  -- nof_block_per_sync
     mmf_mm_bus_wr(c_mm_file_reg_bsn_source_v2, 0,         16#00000003#, tb_clk);  -- Enable BS at PPS
     
+    -- Release PPS pulser, to get first PPS now and to start BSN source
+    WAIT FOR 1 us;
+    pps_rst <= '0';
+
     ----------------------------------------------------------------------------
     -- Enable WG
     ----------------------------------------------------------------------------
@@ -354,17 +365,18 @@ BEGIN
     ---------------------------------------------------------------------------   
     -- . the subband statistics are c_wpfb_sim.stat_data_sz = 2 word power values.
     -- . there are c_sdp_N_sub = 512 subbands per signal path
-    -- . one complex WPFB can process two real inputs A, B
+    -- . one complex WPFB can process two real inputs A, B, is c_sdp_Q_fft = c_sdp_N_pol = 2
     -- . the subbands are output alternately so A0 B0 A1 B1 ... A511 B511 for input A, B
     -- . the subband statistics multiple WPFB units appear in order in the ram_st_sst address map
     -- . the subband statistics are stored first lo word 0 then hi word 1
     
-    FOR I IN 0 TO c_sdp_N_pol*c_sdp_S_sub_bf*(c_longword_sz/c_word_sz)-1 LOOP
-      v_W := I MOD (c_longword_sz/c_word_sz);
-      v_T := (I / (c_longword_sz/c_word_sz)) MOD c_sdp_N_pol;
-      v_U := I / (c_sdp_N_pol*(c_longword_sz/c_word_sz)*c_sdp_S_sub_bf);
+    FOR I IN 0 TO c_sdp_N_pol*c_sdp_N_sub*c_stat_data_sz-1 LOOP  -- 2048 = 2 * 512 * 64/32
+      v_W := I MOD c_stat_data_sz;                               -- 0, 1 per statistics word
+      v_T := (I / c_stat_data_sz) MOD c_sdp_N_pol;               -- 0, 1 per pol
+      v_U := I / (c_sdp_N_pol*c_stat_data_sz*c_sdp_N_sub);       -- / 2048
       v_S := v_T + v_U * c_sdp_N_pol;
-      v_B := (I / (c_sdp_N_pol*(c_longword_sz/c_word_sz))) MOD c_sdp_S_sub_bf;
+      v_B := (I / (c_sdp_N_pol*c_stat_data_sz)) MOD c_sdp_N_sub; -- 0:511 per dual pol
+      -- Only read sp 0, pol 0 (v_S = 0)
       IF v_S=0 THEN
         IF v_W=0 THEN
           -- low part
@@ -381,27 +393,27 @@ BEGIN
     ---------------------------------------------------------------------------
     -- Read beamlet statistics
     ---------------------------------------------------------------------------
-    -- . the beamlet statistics are (c_longword_sz/c_word_sz) = 2 word power values.
+    -- . the beamlet statistics are c_stat_data_sz = 2 word power values.
     -- . there are c_sdp_S_sub_bf = 488 subbands per signal path
     -- . the subbands are output alternately so A0 B0 A1 B1 ... A5487 B487 for input A, B
     -- . the subband statistics multiple units appear in order in the ram_st_bst address map
     -- . the subband statistics are stored first lo word 0 then hi word 1
-    -- . Only read beamset 0, pol 0
-    FOR I IN 0 TO c_sdp_N_pol*c_sdp_S_sub_bf*(c_longword_sz/c_word_sz)-1 LOOP
-      v_W := I MOD (c_longword_sz/c_word_sz);
-      v_T := (I / (c_longword_sz/c_word_sz)) MOD c_sdp_N_pol;
-      v_U := I / (c_sdp_N_pol*(c_longword_sz/c_word_sz)*c_sdp_S_sub_bf);
-      v_S := v_T + v_U * c_sdp_N_pol;
-      v_B := (I / (c_sdp_N_pol*(c_longword_sz/c_word_sz))) MOD c_sdp_S_sub_bf;
+    FOR I IN 0 TO c_sdp_N_pol_bf*c_sdp_S_sub_bf*c_stat_data_sz-1 LOOP
+      v_W := I MOD c_stat_data_sz;
+      v_T := (I / c_stat_data_sz) MOD c_sdp_N_pol_bf;
+      v_U := I / (c_sdp_N_pol_bf*c_stat_data_sz*c_sdp_S_sub_bf);
+      v_S := v_T + v_U * c_sdp_N_pol_bf;
+      v_B := (I / (c_sdp_N_pol_bf*c_stat_data_sz)) MOD c_sdp_S_sub_bf;
+      -- Only read beamset 0, pol 0 (v_S = 0)
       IF v_S=0 THEN
         IF v_W=0 THEN
           -- low part
-          --mmf_mm_bus_rd(c_mm_file_ram_st_bst, I+(c_sdp_N_pol*c_sdp_N_sub*(c_longword_sz/c_word_sz)), rd_data, tb_clk);
+          --mmf_mm_bus_rd(c_mm_file_ram_st_bst, I+(c_sdp_N_pol_bf*c_sdp_N_sub*c_stat_data_sz), rd_data, tb_clk);
           mmf_mm_bus_rd(c_mm_file_ram_st_bst, I, rd_data, tb_clk);
           sp_beamlet_powers_arr2(v_S)(v_B)(31 DOWNTO 0) <= rd_data;
         ELSE      
           -- high part
-          --mmf_mm_bus_rd(c_mm_file_ram_st_bst, I+(c_sdp_N_pol*c_sdp_N_sub*(c_longword_sz/c_word_sz)), rd_data, tb_clk);
+          --mmf_mm_bus_rd(c_mm_file_ram_st_bst, I+(c_sdp_N_pol_bf*c_sdp_N_sub*c_stat_data_sz), rd_data, tb_clk);
           mmf_mm_bus_rd(c_mm_file_ram_st_bst, I, rd_data, tb_clk);
           sp_beamlet_powers_arr2(v_S)(v_B)(63 DOWNTO 32) <= rd_data;
   
@@ -418,47 +430,49 @@ BEGIN
     -- sp_beamlet_power_sum is the sum of all subband powers per SP, this value will be close to sp_beamlet_power
     -- because the input is a sinus, so most power will be in 1 subband. The sp_beamlet_power_leakage_sum shows
     -- how much power from the input sinus at a specific subband has leaked into the 511 other subbands.
-    sp_beamlet_power_0 <= REAL(TO_UINT(sp_beamlet_powers_arr2(0)(INTEGER(ROUND(c_subband_sp_0)))(61 DOWNTO 30)))*2.0**30 + 
+    sp_beamlet_power_0 <=
+        REAL(TO_UINT(sp_beamlet_powers_arr2(0)(INTEGER(ROUND(c_subband_sp_0)))(61 DOWNTO 30)))*2.0**30 +
         REAL(TO_UINT(sp_beamlet_powers_arr2(0)(INTEGER(ROUND(c_subband_sp_0)))(29 DOWNTO 0)));
 
     sp_beamlet_power_sum_0 <= sp_beamlet_power_sum(0);
     
     proc_common_wait_some_cycles(tb_clk, 1);
-
  
     ---------------------------------------------------------------------------
     -- Read 10GbE Stream
     ---------------------------------------------------------------------------
+    -- Wait until start of a beamlet packet, capture only first block in packet
     proc_common_wait_until_high(ext_clk, tr_10GbE_src_out.sop);
     FOR I IN 0 TO 8 LOOP -- Packet header is 9.25 words wide, which can be discarded
       proc_common_wait_until_high(ext_clk, tr_10GbE_src_out.valid);
       proc_common_wait_some_cycles(ext_clk, 1);
     END LOOP;
-  
-    -- First word contains 3 beamlets + 1 header part
-    beamlet_arr2_re(0) <= tr_10GbE_src_out.data(7 DOWNTO 0);
+
+    -- First word contains 1.5 dual pol (= XY, X) beamlets + 1 header part
+    beamlet_arr2_re(0) <= tr_10GbE_src_out.data(7 DOWNTO 0);    -- X
     beamlet_arr2_im(0) <= tr_10GbE_src_out.data(15 DOWNTO 8);
-    beamlet_arr2_re(1) <= tr_10GbE_src_out.data(23 DOWNTO 16);
+    beamlet_arr2_re(1) <= tr_10GbE_src_out.data(23 DOWNTO 16);  -- Y
     beamlet_arr2_im(1) <= tr_10GbE_src_out.data(31 DOWNTO 24);
-    beamlet_arr2_re(2) <= tr_10GbE_src_out.data(39 DOWNTO 32);
+    beamlet_arr2_re(2) <= tr_10GbE_src_out.data(39 DOWNTO 32);  -- X
     beamlet_arr2_im(2) <= tr_10GbE_src_out.data(47 DOWNTO 40);
     proc_common_wait_until_high(ext_clk, tr_10GbE_src_out.valid);
     proc_common_wait_some_cycles(ext_clk, 1);
-    FOR I IN 1 TO (c_sdp_cep_nof_beamlets_per_block/4)-1 LOOP
-      beamlet_arr2_re(I*4 -1) <= tr_10GbE_src_out.data(7 DOWNTO 0);
+    -- 2 dual pol beamlets (= Y, XY, X) /64b data word
+    FOR I IN 1 TO (c_sdp_cep_nof_beamlets_per_block/2)-1 LOOP
+      beamlet_arr2_re(I*4 -1) <= tr_10GbE_src_out.data(7 DOWNTO 0);    -- Y
       beamlet_arr2_im(I*4 -1) <= tr_10GbE_src_out.data(15 DOWNTO 8);
-      beamlet_arr2_re(I*4 +0) <= tr_10GbE_src_out.data(23 DOWNTO 16);
+      beamlet_arr2_re(I*4 +0) <= tr_10GbE_src_out.data(23 DOWNTO 16);  -- X
       beamlet_arr2_im(I*4 +0) <= tr_10GbE_src_out.data(31 DOWNTO 24);
-      beamlet_arr2_re(I*4 +1) <= tr_10GbE_src_out.data(39 DOWNTO 32);
+      beamlet_arr2_re(I*4 +1) <= tr_10GbE_src_out.data(39 DOWNTO 32);  -- Y
       beamlet_arr2_im(I*4 +1) <= tr_10GbE_src_out.data(47 DOWNTO 40);
-      beamlet_arr2_re(I*4 +2) <= tr_10GbE_src_out.data(55 DOWNTO 48);
+      beamlet_arr2_re(I*4 +2) <= tr_10GbE_src_out.data(55 DOWNTO 48);  -- X
       beamlet_arr2_im(I*4 +2) <= tr_10GbE_src_out.data(63 DOWNTO 56);
       proc_common_wait_until_high(ext_clk, tr_10GbE_src_out.valid);
       proc_common_wait_some_cycles(ext_clk, 1);
     END LOOP;
-
-    beamlet_arr2_re(c_sdp_cep_nof_beamlets_per_block-1) <= tr_10GbE_src_out.data(55 DOWNTO 48);
-    beamlet_arr2_im(c_sdp_cep_nof_beamlets_per_block-1) <= tr_10GbE_src_out.data(63 DOWNTO 56);
+    -- Last word contains last 0.5 (= Y) dual pol beamlet
+    beamlet_arr2_re(c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf-1) <= tr_10GbE_src_out.data(55 DOWNTO 48);  -- Y
+    beamlet_arr2_im(c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf-1) <= tr_10GbE_src_out.data(63 DOWNTO 56);
 
     ---------------------------------------------------------------------------
     -- Verify subband statistics
@@ -503,9 +517,11 @@ BEGIN
 
     ---------------------------------------------------------------------------
     -- Verify 10GbE UDP offload
-    --------------------------------------------------------------------------- 
-    ASSERT beamlet_arr2_re(c_exp_beamlet_index) = c_exp_beamlet_re REPORT "Wrong 10GbE output (re)" SEVERITY ERROR;
-    ASSERT beamlet_arr2_im(c_exp_beamlet_index) = c_exp_beamlet_im REPORT "Wrong 10GbE output (im)" SEVERITY ERROR;
+    ---------------------------------------------------------------------------
+    v_re := TO_SINT(beamlet_arr2_re(c_exp_beamlet_index));  v_re_exp := TO_SINT(c_exp_beamlet_re);
+    v_im := TO_SINT(beamlet_arr2_im(c_exp_beamlet_index));  v_im_exp := TO_SINT(c_exp_beamlet_im);
+    ASSERT v_re = v_re_exp REPORT "Wrong 10GbE output (re) " & INTEGER'IMAGE(v_re) & " != " & INTEGER'IMAGE(v_re_exp) SEVERITY ERROR;
+    ASSERT v_im = v_im_exp REPORT "Wrong 10GbE output (im) " & INTEGER'IMAGE(v_re) & " != " & INTEGER'IMAGE(v_re_exp) SEVERITY ERROR;
 
     ---------------------------------------------------------------------------
     -- End Simulation 
@@ -516,4 +532,66 @@ BEGIN
     WAIT;
   END PROCESS;
 
+  -----------------------------------------------------------------------------
+  -- Debugging signals
+  -----------------------------------------------------------------------------
+  -- Show received beamlets from 10GbE stream in Wave Window
+  -- . expect c_nof_block_per_sync / c_sdp_cep_nof_blocks_per_packet * c_sdp_N_beamsets
+  --   = 16 / 4 * 2 = 4 * 2 = 8 packets per sync interval
+  -- . expect c_sdp_cep_nof_beamlets_per_block = c_sdp_S_sub_bf = 488 dual pol
+  --   and complex beamlets per packet, so 2 dual pol beamlets/64b data word.
+  p_dbg_10GbE_beamlets : PROCESS
+  BEGIN
+    -- Wait until start of (next) beamlet packet
+    dbg_beamlet_cnt   <= 0;
+    dbg_beamlet_valid <= '0';
+    proc_common_wait_until_high(ext_clk, tr_10GbE_src_out.sop);
+    FOR I IN 0 TO 8 LOOP -- Packet header is 9.25 words wide, which can be discarded
+      proc_common_wait_until_high(ext_clk, tr_10GbE_src_out.valid);
+      -- Use at least one WAIT instead of proc_common_wait_some_cycles() to avoid Modelsim warning:
+      -- (vcom-1090) Possible infinite loop: Process contains no WAIT statement.
+      WAIT UNTIL rising_edge(ext_clk);
+    END LOOP;
+
+    -- First word contains 1.5 dual pol (= XY, X) beamlets + 1 header part
+    dbg_beamlet_arr_re(0) <= tr_10GbE_src_out.data(7 DOWNTO 0);    -- X
+    dbg_beamlet_arr_im(0) <= tr_10GbE_src_out.data(15 DOWNTO 8);
+    dbg_beamlet_arr_re(1) <= tr_10GbE_src_out.data(23 DOWNTO 16);  -- Y
+    dbg_beamlet_arr_im(1) <= tr_10GbE_src_out.data(31 DOWNTO 24);
+    dbg_beamlet_arr_re(2) <= tr_10GbE_src_out.data(39 DOWNTO 32);  -- X
+    dbg_beamlet_arr_im(2) <= tr_10GbE_src_out.data(47 DOWNTO 40);
+    dbg_beamlet_arr_re(3) <= (OTHERS => '1');  -- mark unused octet by 0xFF = -1
+    dbg_beamlet_arr_im(3) <= (OTHERS => '1');  -- mark unused octet by 0xFF = -1
+    dbg_beamlet_cnt   <= dbg_beamlet_cnt + 3;
+    dbg_beamlet_valid <= '1';
+    proc_common_wait_until_high(ext_clk, tr_10GbE_src_out.valid);
+    proc_common_wait_some_cycles(ext_clk, 1);
+    -- 2 dual pol beamlets (= Y, XY, X) /64b data word
+    FOR I IN 1 TO (c_sdp_cep_nof_beamlets_per_block*c_sdp_cep_nof_blocks_per_packet/2)-1 LOOP
+      dbg_beamlet_arr_re(0) <= tr_10GbE_src_out.data(7 DOWNTO 0);    -- Y
+      dbg_beamlet_arr_im(0) <= tr_10GbE_src_out.data(15 DOWNTO 8);
+      dbg_beamlet_arr_re(1) <= tr_10GbE_src_out.data(23 DOWNTO 16);  -- X
+      dbg_beamlet_arr_im(1) <= tr_10GbE_src_out.data(31 DOWNTO 24);
+      dbg_beamlet_arr_re(2) <= tr_10GbE_src_out.data(39 DOWNTO 32);  -- Y
+      dbg_beamlet_arr_im(2) <= tr_10GbE_src_out.data(47 DOWNTO 40);
+      dbg_beamlet_arr_re(3) <= tr_10GbE_src_out.data(55 DOWNTO 48);  -- X
+      dbg_beamlet_arr_im(3) <= tr_10GbE_src_out.data(63 DOWNTO 56);
+      dbg_beamlet_cnt   <= (dbg_beamlet_cnt + 4) MOD c_sdp_cep_nof_beamlets_per_block;  -- 4 blocks/packet
+      dbg_beamlet_valid <= '1';
+      proc_common_wait_until_high(ext_clk, tr_10GbE_src_out.valid);
+      proc_common_wait_some_cycles(ext_clk, 1);
+    END LOOP;
+    -- Last word contains last 0.5 (= Y) dual pol beamlet
+    dbg_beamlet_arr_re(0) <= tr_10GbE_src_out.data(55 DOWNTO 48);  -- Y
+    dbg_beamlet_arr_im(0) <= tr_10GbE_src_out.data(63 DOWNTO 56);
+    dbg_beamlet_arr_re(1) <= (OTHERS => '1');  -- mark unused octet by 0xFF = -1
+    dbg_beamlet_arr_im(1) <= (OTHERS => '1');  -- mark unused octet by 0xFF = -1
+    dbg_beamlet_arr_re(2) <= (OTHERS => '1');  -- mark unused octet by 0xFF = -1
+    dbg_beamlet_arr_im(2) <= (OTHERS => '1');  -- mark unused octet by 0xFF = -1
+    dbg_beamlet_arr_re(3) <= (OTHERS => '1');  -- mark unused octet by 0xFF = -1
+    dbg_beamlet_arr_im(3) <= (OTHERS => '1');  -- mark unused octet by 0xFF = -1
+    dbg_beamlet_cnt   <= dbg_beamlet_cnt + 1;
+    dbg_beamlet_valid <= '1';
+  END PROCESS;
+
 END tb;
diff --git a/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_lofar2_unb2c_sdp_station_bf_bst_offload.vhd b/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_lofar2_unb2c_sdp_station_bf_bst_offload.vhd
index cfacdffbc7d26617dd39f21a9be9443f5ed508f9..5930355ea60378745c482fe374b8f529f4f51448 100644
--- a/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_lofar2_unb2c_sdp_station_bf_bst_offload.vhd
+++ b/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_lofar2_unb2c_sdp_station_bf_bst_offload.vhd
@@ -35,6 +35,7 @@
 --   > as 7    # default
 --   > as 12   # for detailed debugging
 --   > run -a  
+--   Takes about 10 m
 --
 -------------------------------------------------------------------------------
 LIBRARY IEEE, common_lib, unb2c_board_lib, i2c_lib, mm_lib, dp_lib, diag_lib, lofar2_sdp_lib, wpfb_lib, lofar2_unb2c_sdp_station_lib, eth_lib;