diff --git a/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/hdllib.cfg b/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/hdllib.cfg
index 2dbc42a190d69aae9720163ca219e905cd8cce2a..621e23f9915cbe547dffb998a42a755054646c3b 100644
--- a/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/hdllib.cfg
+++ b/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/hdllib.cfg
@@ -9,10 +9,11 @@ hdl_lib_technology = ip_arria10_e2sg
 
 test_bench_files = 
     tb_lofar2_unb2c_sdp_station_bf.vhd
+    tb_tb_lofar2_unb2c_sdp_station_bf.vhd
     tb_lofar2_unb2c_sdp_station_bf_bst_offload.vhd
 
 regression_test_vhdl =
-    tb_lofar2_unb2c_sdp_station_bf.vhd
+    tb_tb_lofar2_unb2c_sdp_station_bf.vhd
     tb_lofar2_unb2c_sdp_station_bf_bst_offload.vhd
 
 [modelsim_project_file]
diff --git a/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_lofar2_unb2c_sdp_station_bf.vhd b/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_lofar2_unb2c_sdp_station_bf.vhd
index 257123269176fbca6ff5a72b651b5c6aa11f0d29..c641685358e7fcb63c7f01ad40ca2c14c3c4dc69 100644
--- a/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_lofar2_unb2c_sdp_station_bf.vhd
+++ b/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_lofar2_unb2c_sdp_station_bf.vhd
@@ -21,37 +21,98 @@
 -------------------------------------------------------------------------------
 --
 -- Author: R. van der Walle (original), E. Kooistra (updates)
--- Purpose: Self-checking testbench for simulating lofar2_unb2c_sdp_station_bf using WG data.
+-- Purpose: Self-checking testbench for simulating lofar2_unb2c_sdp_station_bf
+--   using WG data.
 --
 -- Description:
+--   This tb is a balance between verification coverage and keeping it simple:
+--   - Use only one signal input (g_sp). Put same remnant WG signal on the
+--     other signal inputs.
+--   - Use different BF weight for the two beamlet polarizations (g_bf_x_gain,
+--     g_bf_x_phase and g_bf_y_phase, g_bf_y_phase) of signal input g_sp.
+--     Using different BF weights for the N_pol_bf = 2 BF polarizations allows
+--     verification of the dual polarization beamlet.
+--   - Use same remnant BF weight for the other S_pn - 1 = 11 signal inputs.
+--     The remnant signal inputs and BF weights allow verifying the BF sum if
+--     they are not 0. Using the same settings for all remnant SP simplyfies
+--     the tb, while still testing the BF sum.
+--   - Select one beamlet for the subband (g_beamlet). Selecting one beamlet
+--     other than the default beamlet for the subband is sufficient to verify
+--     the beamlet subband select.
+--   - Use same stimuli for both beamsets.
+--
 --   MM control actions:
 --
---   1) Enable calc mode for WG on signal input g_sp via reg_diag_wg with:
---        g_subband = 102 --> WG freq = 19.921875MHz
---        g_ampl = 1.0 --> WG ampl = 2**13
+--   1) Enable calc mode for WG on signal input (si) g_sp via reg_diag_wg with:
+--        g_subband = 102 --> 102 * f_sub = 19.921875 MHz
+--        g_sp_ampl = 1.0 --> 1.0 yield WG ampl = 2**13
+--        g_sp_phase --> subband phase
+--      Use g_sp_remnant_ampl = 0.1 and g_sp_remnant_phase = 0.0 for the other
+--      S_pn-1 = 11 signal inputs, than g_sp, that are not used in the BF.
 --   
---   2) Read current BSN from reg_bsn_scheduler_wg and write reg_bsn_scheduler_wg 
---      to trigger start of WG at BSN.
+--   2) Read current BSN from reg_bsn_scheduler_wg and write
+--      reg_bsn_scheduler_wg to trigger start of WG at BSN.
 --     
---   3) Read and verify subband statistics (SST)
+--   3) Read and verify subband statistics (SST) for g_sp. This also reads the
+--      SST of the other signal input of the WPFB that processes g_sp.
+--
+--   4) Select subband g_subband for beamlets in g_beamlet
+--
+--   5) Apply BF weight to g_beamlet X beam and Y beam, so for example if g_sp
+--      = 3, then w_x3 = g_bf_x_gain/phase and w_y3 = = g_bf_x_gain/phase. The
+--      other BF weights are 0.
 --
---   4) Select subband g_subband for beamlet g_beamlet
+--      WG          BF               BF
+--      si          weight           weight
+--                  X                Y
+--       0 -----> * w_x0  ..+
+--           \--------------|----> * w_y0  ..+
+--       1 -----> * w_x1  ..+                |
+--           \--------------|----> * w_y1  ..+
+--       2 -----> * w_x2  ..+                |
+--           \--------------|----> * w_y2  ..+
+--       3 -----> * w_x3  ..+                |
+--           \--------------|----> * w_y3  ..+
+--       4 -----> * w_x4  ..+                |
+--           \--------------|----> * w_y4  ..+
+--       5 -----> * w_x5  ..+                |
+--           \--------------|----> * w_y5  ..+
+--       6 -----> * w_x6  ..+                |
+--           \--------------|----> * w_y6  ..+
+--       7 -----> * w_x7  ..+                |
+--           \--------------|----> * w_y7  ..+
+--       8 -----> * w_x8  ..+                |
+--           \--------------|----> * w_y8  ..+
+--       9 -----> * w_x9  ..+                |
+--           \--------------|----> * w_y9  ..+
+--      10 -----> * w_x10 ..+                |
+--           \--------------|----> * w_y10 ..+
+--      11 -----> * w_x11 ..+                |
+--           \--------------|----> * w_y11 ..+
+--                          |                |
+--                          \----------------|---> beamlet_x
+--                                            \--> beamlet_y
 --
---   5) Apply BF weight (g_bf_gain, g_bf_phase) to g_beamlet X beam and Y beam
 --
 --   6) Read and verify beamlet statistics (BST)
---        View sp_subband_sst in Wave window
---        View pol_beamlet_bst in Wave window
+--        View sp_sst in Wave window
+--        View bst_x_arr, bst_y_arr in Wave window
 --
 --   7) Verify 10GbE output header and output payload for g_beamlet.
+--        View rx_beamlet_sosi
+--        View rx_beamlet_cnt (in analog format)
+--
+-- Remark:
+-- . The c_wg_phase_offset and c_subband_phase_offset are used to tune the WG
+--   phase reference to 0.0 degrees at the start (sop)
 --
 -- Usage:
 --   > as 7    # default
 --   > as 12   # for detailed debugging
 --   # Manually add missing signal
 --   > add wave -position insertpoint  \
---     sim:/tb_lofar2_unb2c_sdp_station_bf/sp_subband_ssts_arr2 \
---     sim:/tb_lofar2_unb2c_sdp_station_bf/pol_beamlet_bsts_arr2
+--     sim:/tb_lofar2_unb2c_sdp_station_bf/sp_ssts_arr2 \
+--     sim:/tb_lofar2_unb2c_sdp_station_bf/bsts_arr2
 --   > run -a  
 --   Takes about   40 m when g_read_all_* = FALSE
 --   Takes about 1h 5 m when g_read_all_* = TRUE
@@ -79,32 +140,41 @@ USE tech_pll_lib.tech_pll_component_pkg.ALL;
 
 ENTITY tb_lofar2_unb2c_sdp_station_bf IS
   GENERIC (
-    g_sp                : NATURAL := 0;     -- WG signal path index in range(S_pn = 12)
-    g_wg_ampl           : REAL := 1.0;      -- WG normalized amplitude
-    g_subband           : NATURAL := 102;   -- select g_subband at index 102 = 102/1024 * 200MHz = 19.921875 MHz
-    g_beamlet           : NATURAL := 10;    -- map g_subband to g_beamlet index in beamset in range(c_sdp_S_sub_bf = 488)
-    g_beamlet_scale     : REAL := 1.0 / 2.0**9;  -- g_beamlet output scale factor
-    g_bf_gain           : REAL := 1.0;      -- g_beamlet BF weight normalized gain
-    g_bf_phase          : REAL := 30.0;      -- g_beamlet BF weight phase rotation in degrees
-    g_read_all_SST      : BOOLEAN := FALSE;  -- when FALSE only read SST for g_subband, to save sim time
-    g_read_all_BST      : BOOLEAN := FALSE   -- when FALSE only read BST for g_beamlet, to save sim time
+    g_sp                 : NATURAL := 3;      -- WG signal path (SP) index in range(S_pn = 12)
+    g_sp_ampl            : REAL := 0.5;       -- WG normalized amplitude
+    g_sp_phase           : REAL := -110.0;      -- WG phase in degrees = subband phase
+    g_sp_remnant_ampl    : REAL := 0.1;       -- WG normalized amplitude for remnant sp
+    g_sp_remnant_phase   : REAL := 15.0;      -- WG phase in degrees for remnant sp
+    g_subband            : NATURAL := 102;    -- select g_subband at index 102 = 102/1024 * 200MHz = 19.921875 MHz
+    g_beamlet            : NATURAL := 10;     -- map g_subband to g_beamlet index in beamset in range(c_sdp_S_sub_bf = 488)
+    g_beamlet_scale      : REAL := 1.0 / 2.0**9;  -- g_beamlet output scale factor
+    g_bf_x_gain          : REAL := 0.7;       -- g_beamlet X BF weight normalized gain for g_sp
+    g_bf_y_gain          : REAL := 0.6;       -- g_beamlet Y BF weight normalized gain for g_sp
+    g_bf_x_phase         : REAL := 30.0;      -- g_beamlet X BF weight phase rotation in degrees for g_sp
+    g_bf_y_phase         : REAL := 40.0;      -- g_beamlet Y BF weight phase rotation in degrees for g_sp
+    g_bf_remnant_x_gain  : REAL := 0.05;       -- g_beamlet X BF weight normalized gain for remnant sp
+    g_bf_remnant_y_gain  : REAL := 0.04;       -- g_beamlet Y BF weight normalized gain for remnant sp
+    g_bf_remnant_x_phase : REAL := 170.0;       -- g_beamlet X BF weight phase rotation in degrees for g_sp
+    g_bf_remnant_y_phase : REAL := -135.0;       -- g_beamlet Y BF weight phase rotation in degrees for g_sp
+    g_read_all_SST       : BOOLEAN := FALSE;  -- when FALSE only read SST for g_subband, to save sim time
+    g_read_all_BST       : BOOLEAN := FALSE   -- when FALSE only read BST for g_beamlet, to save sim time
   );
 END tb_lofar2_unb2c_sdp_station_bf;
 
 ARCHITECTURE tb OF tb_lofar2_unb2c_sdp_station_bf IS
 
-  CONSTANT c_sim             : BOOLEAN := TRUE;
-  CONSTANT c_unb_nr          : NATURAL := 0; -- UniBoard 0
-  CONSTANT c_node_nr         : NATURAL := 0; 
-  CONSTANT c_gn_index        : NATURAL := c_unb_nr * 4 + c_node_nr;           -- this node GN
-  CONSTANT c_init_bsn        : NATURAL := 17;  -- some recognizable value >= 0
+  CONSTANT c_sim                 : BOOLEAN := TRUE;
+  CONSTANT c_unb_nr              : NATURAL := 0; -- UniBoard 0
+  CONSTANT c_node_nr             : NATURAL := 0;
+  CONSTANT c_gn_index            : NATURAL := c_unb_nr * 4 + c_node_nr;           -- this node GN
+  CONSTANT c_init_bsn            : NATURAL := 17;  -- some recognizable value >= 0
 
-  CONSTANT c_id              : STD_LOGIC_VECTOR(7 DOWNTO 0) := TO_UVEC(c_gn_index, 8);
-  CONSTANT c_version         : STD_LOGIC_VECTOR(1 DOWNTO 0) := "00";
-  CONSTANT c_fw_version      : t_unb2c_board_fw_version := (1, 0);
+  CONSTANT c_id                  : STD_LOGIC_VECTOR(7 DOWNTO 0) := TO_UVEC(c_gn_index, 8);
+  CONSTANT c_version             : STD_LOGIC_VECTOR(1 DOWNTO 0) := "00";
+  CONSTANT c_fw_version          : t_unb2c_board_fw_version := (1, 0);
 
-  CONSTANT c_mac_15_0        : STD_LOGIC_VECTOR(15 DOWNTO 0) := TO_UVEC(c_unb_nr, 8) & TO_UVEC(c_node_nr, 8);
-  CONSTANT c_ip_15_0         : STD_LOGIC_VECTOR(15 DOWNTO 0) := TO_UVEC(c_unb_nr, 8) & TO_UVEC(c_node_nr+1, 8);  -- +1 to avoid IP = *.*.*.0
+  CONSTANT c_mac_15_0            : STD_LOGIC_VECTOR(15 DOWNTO 0) := TO_UVEC(c_unb_nr, 8) & TO_UVEC(c_node_nr, 8);
+  CONSTANT c_ip_15_0             : STD_LOGIC_VECTOR(15 DOWNTO 0) := TO_UVEC(c_unb_nr, 8) & TO_UVEC(c_node_nr+1, 8);  -- +1 to avoid IP = *.*.*.0
 
   CONSTANT c_eth_clk_period      : TIME := 8 ns;  -- 125 MHz XO on UniBoard
   CONSTANT c_ext_clk_period      : TIME := 5 ns;
@@ -124,6 +194,8 @@ ARCHITECTURE tb OF tb_lofar2_unb2c_sdp_station_bf IS
   CONSTANT c_stat_lo_factor      : REAL := 1.0 - c_stat_percentage;  -- lower boundary
   CONSTANT c_stat_hi_factor      : REAL := 1.0 + c_stat_percentage;  -- higher boundary
 
+  CONSTANT c_nof_beamlets_per_data : NATURAL := 2;  -- 2 dual pol beamlets (= XY, XY) per 64b data word
+
   CONSTANT c_beamlet_output_delta : INTEGER := 2;  -- +-delta margin
 
   -- header fields
@@ -155,74 +227,128 @@ ARCHITECTURE tb OF tb_lofar2_unb2c_sdp_station_bf IS
   -- .ampl
   CONSTANT c_wg_ampl_full_scale   : NATURAL := 2**(c_sdp_W_adc-1);  -- full scale (FS) of WG, will just cause clipping of +FS to +FS-1
   CONSTANT c_wg_ampl_lsb          : REAL := c_diag_wg_ampl_unit / REAL(c_wg_ampl_full_scale);  -- amplitude in number of LSbit resolution steps
-  CONSTANT c_wg_ampl              : NATURAL := NATURAL(g_wg_ampl * REAL(c_wg_ampl_full_scale));  -- in number of lsb
+  CONSTANT c_wg_ampl              : NATURAL := NATURAL(g_sp_ampl * REAL(c_wg_ampl_full_scale));  -- in number of lsb
+  CONSTANT c_wg_remnant_ampl      : NATURAL := NATURAL(g_sp_remnant_ampl * REAL(c_wg_ampl_full_scale));  -- in number of lsb
   CONSTANT c_exp_sp_power         : REAL := REAL(c_wg_ampl**2) / 2.0;
   CONSTANT c_exp_sp_ast           : REAL := c_exp_sp_power * REAL(c_nof_clk_per_sync);
   -- . phase
-  CONSTANT c_subband_phase        : REAL := 0.0;  -- wanted subband phase in degrees = WG phase at sop
   CONSTANT c_subband_freq         : REAL := REAL(g_subband) / REAL(c_sdp_N_fft);  -- normalized by fs = f_adc = 200 MHz = dp_clk rate
   CONSTANT c_wg_latency           : INTEGER := c_diag_wg_latency - 0;  -- -0 to account for BSN scheduler start trigger latency
   CONSTANT c_wg_phase_offset      : REAL := 360.0 * REAL(c_wg_latency) * c_subband_freq;  -- c_diag_wg_latency is in dp_clk cycles
-  CONSTANT c_wg_phase             : REAL := c_subband_phase + c_wg_phase_offset;  -- WG phase in degrees
+  CONSTANT c_wg_phase             : REAL := g_sp_phase + c_wg_phase_offset;  -- WG phase in degrees
+  CONSTANT c_wg_remnant_phase     : REAL := g_sp_remnant_phase + c_wg_phase_offset;  -- WG phase in degrees
   -- . freq
   CONSTANT c_wg_subband_freq_unit : REAL := c_diag_wg_freq_unit/REAL(c_sdp_N_fft);  -- subband freq = Fs/1024 = 200 MSps/1024 = 195312.5 Hz sinus
 
   -- WPFB
-  CONSTANT c_pol_index                      : NATURAL := g_sp MOD c_sdp_Q_fft;
-  CONSTANT c_pfb_index                      : NATURAL := g_sp / c_sdp_Q_fft;  -- only read used WPFB unit out of range(c_sdp_P_pfb = 6)
-  CONSTANT c_subband_phase_offset           : REAL := -90.0;  -- WG with zero phase sinues yields subband with -90 degrees phase (negative Im, zero Re)
-  CONSTANT c_subband_weight_gain            : REAL := 1.0;  -- use default unit subband weights
-  CONSTANT c_subband_weight_phase           : REAL := 0.0;  -- use default unit subband weights
-  CONSTANT c_exp_subband_sp_ampl_ratio      : REAL := 7.96;  -- ~= 8 for unit FIR DC gain, depends on internal WPFB quantization and FIR coefficients
-  CONSTANT c_exp_subband_ampl               : REAL := REAL(c_wg_ampl) * c_exp_subband_sp_ampl_ratio * c_subband_weight_gain;
-  CONSTANT c_exp_subband_power              : REAL := c_exp_subband_ampl**2.0;  -- complex, so no divide by 2
-  CONSTANT c_exp_subband_sst                : REAL := c_exp_subband_power * REAL(c_nof_block_per_sync);
-
-  TYPE t_real_arr IS ARRAY (INTEGER RANGE <>) OF REAL; 
+  CONSTANT c_pol_index                    : NATURAL := g_sp MOD c_sdp_Q_fft;
+  CONSTANT c_pfb_index                    : NATURAL := g_sp / c_sdp_Q_fft;  -- only read used WPFB unit out of range(c_sdp_P_pfb = 6)
+  CONSTANT c_subband_phase_offset         : REAL := -90.0;  -- WG with zero phase sinus yields subband with -90 degrees phase (negative Im, zero Re)
+  CONSTANT c_subband_weight_gain          : REAL := 1.0;  -- use default unit subband weights
+  CONSTANT c_subband_weight_phase         : REAL := 0.0;  -- use default unit subband weights
+  CONSTANT c_exp_subband_phase            : REAL := g_sp_phase + c_subband_phase_offset + c_subband_weight_phase;
+  CONSTANT c_exp_subband_ampl             : REAL := REAL(c_wg_ampl) * c_sdp_wpfb_subband_sp_ampl_ratio * c_subband_weight_gain;
+  CONSTANT c_exp_subband_power            : REAL := c_exp_subband_ampl**2.0;  -- complex signal ampl, so no divide by 2
+  CONSTANT c_exp_subband_sst              : REAL := c_exp_subband_power * REAL(c_nof_block_per_sync);
+
+  CONSTANT c_exp_remnant_subband_phase    : REAL := g_sp_remnant_phase + c_subband_phase_offset + c_subband_weight_phase;
+  CONSTANT c_exp_remnant_subband_ampl     : REAL := REAL(c_wg_remnant_ampl) * c_sdp_wpfb_subband_sp_ampl_ratio * c_subband_weight_gain;
+
+  TYPE t_real_arr IS ARRAY (INTEGER RANGE <>) OF REAL;
   TYPE t_slv_64_subbands_arr IS ARRAY (INTEGER RANGE <>) OF t_slv_64_arr(0 TO c_sdp_N_sub-1);           -- 512
   TYPE t_slv_64_beamlets_arr IS ARRAY (INTEGER RANGE <>) OF t_slv_64_arr(0 TO c_sdp_N_beamlets_sdp-1);  -- 2*488 = 976
 
-  -- BF
+  -- BF X-pol and Y-pol
   -- . select
-  CONSTANT c_exp_beamlet_index        : NATURAL := g_beamlet * c_sdp_N_pol_bf;  -- in beamset 0
+  CONSTANT c_exp_beamlet_x_index          : NATURAL := g_beamlet * c_sdp_N_pol_bf;      -- X index in beamset 0
+  CONSTANT c_exp_beamlet_y_index          : NATURAL := g_beamlet * c_sdp_N_pol_bf + 1;  -- Y index in beamset 0
   -- . Beamlet weights for selected g_sp
-  CONSTANT c_bf_weight_re             : INTEGER := INTEGER(g_bf_gain * REAL(c_sdp_unit_bf_weight) * COS(g_bf_phase * MATH_2_PI / 360.0));
-  CONSTANT c_bf_weight_im             : INTEGER := INTEGER(g_bf_gain * REAL(c_sdp_unit_bf_weight) * SIN(g_bf_phase * MATH_2_PI / 360.0));
+  CONSTANT c_bf_x_weight_re               : INTEGER := INTEGER(COMPLEX_RE(g_bf_x_gain * REAL(c_sdp_unit_bf_weight), g_bf_x_phase));
+  CONSTANT c_bf_x_weight_im               : INTEGER := INTEGER(COMPLEX_IM(g_bf_x_gain * REAL(c_sdp_unit_bf_weight), g_bf_x_phase));
+  CONSTANT c_bf_y_weight_re               : INTEGER := INTEGER(COMPLEX_RE(g_bf_y_gain * REAL(c_sdp_unit_bf_weight), g_bf_y_phase));
+  CONSTANT c_bf_y_weight_im               : INTEGER := INTEGER(COMPLEX_IM(g_bf_y_gain * REAL(c_sdp_unit_bf_weight), g_bf_y_phase));
+  CONSTANT c_bf_remnant_x_weight_re       : INTEGER := INTEGER(COMPLEX_RE(g_bf_remnant_x_gain * REAL(c_sdp_unit_bf_weight), g_bf_remnant_x_phase));
+  CONSTANT c_bf_remnant_x_weight_im       : INTEGER := INTEGER(COMPLEX_IM(g_bf_remnant_x_gain * REAL(c_sdp_unit_bf_weight), g_bf_remnant_x_phase));
+  CONSTANT c_bf_remnant_y_weight_re       : INTEGER := INTEGER(COMPLEX_RE(g_bf_remnant_y_gain * REAL(c_sdp_unit_bf_weight), g_bf_remnant_y_phase));
+  CONSTANT c_bf_remnant_y_weight_im       : INTEGER := INTEGER(COMPLEX_IM(g_bf_remnant_y_gain * REAL(c_sdp_unit_bf_weight), g_bf_remnant_y_phase));
+
+  -- Model the SDP beamformer for one g_sp and S_pn-1 = 11 remnant signal inputs
+  FUNCTION bf_calculate_expected_beamlet(sp_subband_ampl, sp_subband_phase, sp_bf_gain, sp_bf_phase,
+                                         rem_subband_ampl, rem_subband_phase, rem_bf_gain, rem_bf_phase : REAL) RETURN t_real_arr IS  -- 0:3 = ampl, phase, re, im
+    CONSTANT c_nof_rem : REAL := REAL(c_sdp_S_pn - 1);  -- BF for one g_sp and 11 remnant signal inputs
+    VARIABLE v_sp_ampl, v_sp_phase, v_sp_re, v_sp_im     : REAL;
+    VARIABLE v_rem_ampl, v_rem_phase, v_rem_re, v_rem_im : REAL;
+    VARIABLE v_sum_ampl, v_sum_phase, v_sum_re, v_sum_im : REAL;
+    VARIABLE v_tuple                                     : t_real_arr(0 TO 3);
+  BEGIN
+    v_sp_ampl   := sp_subband_ampl * sp_bf_gain;
+    v_sp_phase  := sp_subband_phase + sp_bf_phase;
+    v_sp_re     := COMPLEX_RE(v_sp_ampl, v_sp_phase);
+    v_sp_im     := COMPLEX_IM(v_sp_ampl, v_sp_phase);
+    v_rem_ampl  := rem_subband_ampl * rem_bf_gain;
+    v_rem_phase := rem_subband_phase + rem_bf_phase;
+    v_rem_re    := COMPLEX_RE(v_rem_ampl, v_rem_phase);
+    v_rem_im    := COMPLEX_IM(v_rem_ampl, v_rem_phase);
+    v_sum_re    := v_sp_re + c_nof_rem * v_rem_re;  -- BF sum re
+    v_sum_im    := v_sp_im + c_nof_rem * v_rem_im;  -- BF sum im
+    v_sum_ampl  := COMPLEX_RADIUS(v_sum_re, v_sum_im);
+    v_sum_phase := COMPLEX_PHASE(v_sum_re, v_sum_im);
+    v_tuple     := (0 => v_sum_ampl, 1 => v_sum_phase, 2 => v_sum_re, 3 => v_sum_im);
+    RETURN v_tuple;
+  END;
+
   -- . Beamlet internal
-  CONSTANT c_exp_beamlet_ampl         : REAL := c_exp_subband_ampl * g_bf_gain;
-  CONSTANT c_exp_beamlet_phase        : REAL := c_subband_phase_offset + c_subband_weight_phase + g_bf_phase;
-  CONSTANT c_exp_beamlet_re           : REAL := c_exp_beamlet_ampl * COS(c_exp_beamlet_phase * MATH_2_PI / 360.0);
-  CONSTANT c_exp_beamlet_im           : REAL := c_exp_beamlet_ampl * SIN(c_exp_beamlet_phase * MATH_2_PI / 360.0);
+  CONSTANT c_exp_beamlet_x_tuple          : t_real_arr(0 TO 3) := bf_calculate_expected_beamlet(
+                                              c_exp_subband_ampl, c_exp_subband_phase, g_bf_x_gain, g_bf_x_phase,
+                                              c_exp_remnant_subband_ampl, c_exp_remnant_subband_phase, g_bf_remnant_x_gain, g_bf_remnant_x_phase);
+  CONSTANT c_exp_beamlet_x_ampl           : REAL := c_exp_beamlet_x_tuple(0);
+  CONSTANT c_exp_beamlet_x_phase          : REAL := c_exp_beamlet_x_tuple(1);
+  CONSTANT c_exp_beamlet_x_re             : REAL := c_exp_beamlet_x_tuple(2);
+  CONSTANT c_exp_beamlet_x_im             : REAL := c_exp_beamlet_x_tuple(3);
+
+  CONSTANT c_exp_beamlet_y_tuple          : t_real_arr(0 TO 3) := bf_calculate_expected_beamlet(
+                                              c_exp_subband_ampl, c_exp_subband_phase, g_bf_y_gain, g_bf_y_phase,
+                                              c_exp_remnant_subband_ampl, c_exp_remnant_subband_phase, g_bf_remnant_y_gain, g_bf_remnant_y_phase);
+  CONSTANT c_exp_beamlet_y_ampl           : REAL := c_exp_beamlet_y_tuple(0);
+  CONSTANT c_exp_beamlet_y_phase          : REAL := c_exp_beamlet_y_tuple(1);
+  CONSTANT c_exp_beamlet_y_re             : REAL := c_exp_beamlet_y_tuple(2);
+  CONSTANT c_exp_beamlet_y_im             : REAL := c_exp_beamlet_y_tuple(3);
   -- . BST
-  CONSTANT c_exp_beamlet_power        : REAL := c_exp_beamlet_ampl**2.0;  -- complex, so no divide by 2
-  CONSTANT c_exp_beamlet_bst          : REAL := c_exp_subband_sst * g_bf_gain**2.0;  -- = c_exp_beamlet_power *  REAL(c_nof_block_per_sync)
+  CONSTANT c_exp_beamlet_x_power          : REAL := c_exp_beamlet_x_ampl**2.0;  -- complex signal ampl, so no divide by 2
+  CONSTANT c_exp_beamlet_x_bst            : REAL := c_exp_beamlet_x_power * REAL(c_nof_block_per_sync);
+  CONSTANT c_exp_beamlet_y_power          : REAL := c_exp_beamlet_y_ampl**2.0;  -- complex signal ampl, so no divide by 2
+  CONSTANT c_exp_beamlet_y_bst            : REAL := c_exp_beamlet_y_power * REAL(c_nof_block_per_sync);
   -- . Beamlet output
-  CONSTANT c_exp_beamlet_output_ampl  : REAL := c_exp_beamlet_ampl * g_beamlet_scale;
-  CONSTANT c_exp_beamlet_output_phase : REAL := c_exp_beamlet_phase;
-  CONSTANT c_exp_beamlet_output_re    : REAL := c_exp_beamlet_re * g_beamlet_scale;
-  CONSTANT c_exp_beamlet_output_im    : REAL := c_exp_beamlet_im * g_beamlet_scale;
+  CONSTANT c_exp_beamlet_x_output_ampl    : REAL := c_exp_beamlet_x_ampl * g_beamlet_scale;
+  CONSTANT c_exp_beamlet_x_output_phase   : REAL := c_exp_beamlet_x_phase;
+  CONSTANT c_exp_beamlet_x_output_re      : REAL := c_exp_beamlet_x_re * g_beamlet_scale;
+  CONSTANT c_exp_beamlet_x_output_im      : REAL := c_exp_beamlet_x_im * g_beamlet_scale;
+  CONSTANT c_exp_beamlet_y_output_ampl    : REAL := c_exp_beamlet_y_ampl * g_beamlet_scale;
+  CONSTANT c_exp_beamlet_y_output_phase   : REAL := c_exp_beamlet_y_phase;
+  CONSTANT c_exp_beamlet_y_output_re      : REAL := c_exp_beamlet_y_re * g_beamlet_scale;
+  CONSTANT c_exp_beamlet_y_output_im      : REAL := c_exp_beamlet_y_im * g_beamlet_scale;
 
   -- MM
   -- . Address widths of a single MM instance
   --   . c_sdp_S_pn = 12 instances
-  CONSTANT c_addr_w_reg_diag_wg     : NATURAL := 2;
+  CONSTANT c_addr_w_reg_diag_wg           : NATURAL := 2;
   --   . c_sdp_N_beamsets = 2 instances
-  CONSTANT c_addr_w_ram_ss_ss_wide : NATURAL := ceil_log2(c_sdp_P_pfb * c_sdp_S_sub_bf * c_sdp_Q_fft);
-  CONSTANT c_addr_w_ram_bf_weights  : NATURAL := ceil_log2(c_sdp_N_pol_bf * c_sdp_P_pfb * c_sdp_S_sub_bf * c_sdp_Q_fft);
-  CONSTANT c_addr_w_reg_bf_scale   : NATURAL := 1;
-  CONSTANT c_addr_w_reg_hdr_dat    : NATURAL := ceil_log2(field_nof_words(c_sdp_cep_hdr_field_arr, c_word_w));
-  CONSTANT c_addr_w_reg_dp_xonoff  : NATURAL := 1;
-  CONSTANT c_addr_w_ram_st_bst      : NATURAL := ceil_log2(c_sdp_S_sub_bf*c_sdp_N_pol_bf*c_stat_data_sz);
+  CONSTANT c_addr_w_ram_ss_ss_wide        : NATURAL := ceil_log2(c_sdp_P_pfb * c_sdp_S_sub_bf * c_sdp_Q_fft);
+  CONSTANT c_addr_w_ram_bf_weights        : NATURAL := ceil_log2(c_sdp_N_pol_bf * c_sdp_P_pfb * c_sdp_S_sub_bf * c_sdp_Q_fft);
+  CONSTANT c_addr_w_reg_bf_scale          : NATURAL := 1;
+  CONSTANT c_addr_w_reg_hdr_dat           : NATURAL := ceil_log2(field_nof_words(c_sdp_cep_hdr_field_arr, c_word_w));
+  CONSTANT c_addr_w_reg_dp_xonoff         : NATURAL := 1;
+  CONSTANT c_addr_w_ram_st_bst            : NATURAL := ceil_log2(c_sdp_S_sub_bf*c_sdp_N_pol_bf*c_stat_data_sz);
   -- . Address spans of a single MM instance
   --   . c_sdp_S_pn = 12 instances
-  CONSTANT c_mm_span_reg_diag_wg    : NATURAL := 2**c_addr_w_reg_diag_wg;
+  CONSTANT c_mm_span_reg_diag_wg          : NATURAL := 2**c_addr_w_reg_diag_wg;
   --   . c_sdp_N_beamsets = 2 instances
-  CONSTANT c_mm_span_ram_ss_ss_wide : NATURAL := 2**c_addr_w_ram_ss_ss_wide;
-  CONSTANT c_mm_span_ram_bf_weights : NATURAL := 2**c_addr_w_ram_bf_weights;
-  CONSTANT c_mm_span_reg_bf_scale   : NATURAL := 2**c_addr_w_reg_bf_scale;
-  CONSTANT c_mm_span_reg_hdr_dat    : NATURAL := 2**c_addr_w_reg_hdr_dat;
-  CONSTANT c_mm_span_reg_dp_xonoff  : NATURAL := 2**c_addr_w_reg_dp_xonoff;
-  CONSTANT c_mm_span_ram_st_bst     : NATURAL := 2**c_addr_w_ram_st_bst;
+  CONSTANT c_mm_span_ram_ss_ss_wide       : NATURAL := 2**c_addr_w_ram_ss_ss_wide;
+  CONSTANT c_mm_span_ram_bf_weights       : NATURAL := 2**c_addr_w_ram_bf_weights;
+  CONSTANT c_mm_span_reg_bf_scale         : NATURAL := 2**c_addr_w_reg_bf_scale;
+  CONSTANT c_mm_span_reg_hdr_dat          : NATURAL := 2**c_addr_w_reg_hdr_dat;
+  CONSTANT c_mm_span_reg_dp_xonoff        : NATURAL := 2**c_addr_w_reg_dp_xonoff;
+  CONSTANT c_mm_span_ram_st_bst           : NATURAL := 2**c_addr_w_ram_st_bst;
 
   CONSTANT c_mm_file_reg_ppsh             : STRING := mmf_unb_file_prefix(c_unb_nr, c_node_nr) & "PIO_PPS";
   CONSTANT c_mm_file_reg_bsn_source_v2    : STRING := mmf_unb_file_prefix(c_unb_nr, c_node_nr) & "REG_BSN_SOURCE_V2";
@@ -262,16 +388,16 @@ ARCHITECTURE tb OF tb_lofar2_unb2c_sdp_station_bf IS
   SIGNAL rd_cep_udp_dst_port : STD_LOGIC_VECTOR(15 DOWNTO 0);
 
   -- WG
-  SIGNAL current_bsn_wg                 : STD_LOGIC_VECTOR(c_dp_stream_bsn_w-1 DOWNTO 0);
+  SIGNAL current_bsn_wg      : STD_LOGIC_VECTOR(c_dp_stream_bsn_w-1 DOWNTO 0);
 
   -- FSUB
-  -- . Read sp_subband_ssts_arr2 = SST for one WPFB unit that processes g_sp
-  SIGNAL sp_subband_ssts_arr2  : t_slv_64_subbands_arr(c_sdp_N_pol-1 DOWNTO 0);   -- [pol][sub], for X,Y pair of A, B
-  SIGNAL sp_subband_sst        : REAL := 0.0;
-  SIGNAL stat_data               : STD_LOGIC_VECTOR(c_longword_w-1 DOWNTO 0);
+  -- . Read sp_ssts_arr2 = SST for one WPFB unit that processes g_sp
+  SIGNAL sp_ssts_arr2        : t_slv_64_subbands_arr(c_sdp_N_pol-1 DOWNTO 0);   -- [pol][sub], for X,Y pair of A, B
+  SIGNAL sp_sst              : REAL := 0.0;
+  SIGNAL stat_data           : STD_LOGIC_VECTOR(c_longword_w-1 DOWNTO 0);
 
   -- . Selector
-  SIGNAL sst_offload_weighted_subbands : STD_LOGIC;
+  SIGNAL sst_weighted_subbands_flag : STD_LOGIC;
 
   -- . Subband equalizer
   SIGNAL sp_subband_weight_re    : INTEGER := 0;
@@ -280,47 +406,59 @@ ARCHITECTURE tb OF tb_lofar2_unb2c_sdp_station_bf IS
   SIGNAL sp_subband_weight_phase : REAL := 0.0;
 
   -- BF
-  SIGNAL sp_subband_select       : NATURAL :=  0;
-  SIGNAL sp_subband_select_arr   : t_natural_arr(0 TO c_sdp_S_sub_bf * c_sdp_N_pol-1) := (OTHERS => 0);  -- Q_fft = N_pol = 2
-  SIGNAL sp_bf_weights_re_arr    : t_integer_arr(0 TO c_sdp_S_pn-1) := (OTHERS => 0);
-  SIGNAL sp_bf_weights_im_arr    : t_integer_arr(0 TO c_sdp_S_pn-1) := (OTHERS => 0);
-  SIGNAL sp_bf_weights_gain_arr  : t_real_arr(0 TO c_sdp_S_pn-1) := (OTHERS => 0.0);
-  SIGNAL sp_bf_weights_phase_arr : t_real_arr(0 TO c_sdp_S_pn-1) := (OTHERS => 0.0);
-
-  SIGNAL pol_beamlet_bsts_arr2 : t_slv_64_beamlets_arr(c_sdp_N_pol_bf-1 DOWNTO 0);  -- [pol_bf][blet]
-  SIGNAL pol_beamlet_bst_X_arr : t_real_arr(0 TO c_sdp_N_beamsets-1) := (OTHERS => 0.0);  -- [bset]
-  SIGNAL pol_beamlet_bst_Y_arr : t_real_arr(0 TO c_sdp_N_beamsets-1) := (OTHERS => 0.0);  -- [bset]
-
-  -- 10GbE
-  SIGNAL rx_beamlet_arr_re   : t_slv_8_arr(c_sdp_cep_nof_blocks_per_packet-1 DOWNTO 0);   -- [3:0]
-  SIGNAL rx_beamlet_arr_im   : t_slv_8_arr(c_sdp_cep_nof_blocks_per_packet-1 DOWNTO 0);   -- [3:0]
-  SIGNAL rx_beamlet_cnt      : NATURAL;
-  SIGNAL rx_beamlet_valid    : STD_LOGIC;
+  -- . beamlet subband selection
+  SIGNAL sp_subband_select         : NATURAL :=  0;
+  SIGNAL sp_subband_select_arr     : t_natural_arr(0 TO c_sdp_S_sub_bf * c_sdp_N_pol-1) := (OTHERS => 0);  -- Q_fft = N_pol = 2
+
+  -- . beamlet X-pol
+  SIGNAL sp_bf_x_weights_re_arr    : t_integer_arr(0 TO c_sdp_S_pn-1) := (OTHERS => 0);
+  SIGNAL sp_bf_x_weights_im_arr    : t_integer_arr(0 TO c_sdp_S_pn-1) := (OTHERS => 0);
+  SIGNAL sp_bf_x_weights_gain_arr  : t_real_arr(0 TO c_sdp_S_pn-1) := (OTHERS => 0.0);
+  SIGNAL sp_bf_x_weights_phase_arr : t_real_arr(0 TO c_sdp_S_pn-1) := (OTHERS => 0.0);
+  -- . beamlet Y-pol
+  SIGNAL sp_bf_y_weights_re_arr    : t_integer_arr(0 TO c_sdp_S_pn-1) := (OTHERS => 0);
+  SIGNAL sp_bf_y_weights_im_arr    : t_integer_arr(0 TO c_sdp_S_pn-1) := (OTHERS => 0);
+  SIGNAL sp_bf_y_weights_gain_arr  : t_real_arr(0 TO c_sdp_S_pn-1) := (OTHERS => 0.0);
+  SIGNAL sp_bf_y_weights_phase_arr : t_real_arr(0 TO c_sdp_S_pn-1) := (OTHERS => 0.0);
 
-  SIGNAL rx_beamlet_list_re  : t_slv_8_arr(c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf-1 DOWNTO 0);  -- [488 * 2-1:0] = [975:0]
-  SIGNAL rx_beamlet_list_im  : t_slv_8_arr(c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf-1 DOWNTO 0);  -- [488 * 2-1:0] = [975:0]
+  -- . BST
+  SIGNAL bsts_arr2           : t_slv_64_beamlets_arr(c_sdp_N_pol_bf-1 DOWNTO 0);  -- [pol_bf][blet]
+  SIGNAL bst_x_arr           : t_real_arr(0 TO c_sdp_N_beamsets-1) := (OTHERS => 0.0);  -- [bset] for BF X pol
+  SIGNAL bst_y_arr           : t_real_arr(0 TO c_sdp_N_beamsets-1) := (OTHERS => 0.0);  -- [bset] for BF Y pol
 
+  -- CEP model
+  -- . 10GbE
   SIGNAL tr_10GbE_src_out    : t_dp_sosi;
   SIGNAL tr_10GbE_src_in     : t_dp_siso;
   SIGNAL tr_ref_clk_312      : STD_LOGIC := '0';
   SIGNAL tr_ref_clk_156      : STD_LOGIC := '0';
   SIGNAL tr_ref_rst_156      : STD_LOGIC := '0';
 
-  -- dp_offload_rx
-  SIGNAL offload_rx_hdr_dat_mosi : t_mem_mosi := c_mem_mosi_rst;
-  SIGNAL offload_rx_hdr_dat_miso : t_mem_miso;
+  -- . dp_offload_rx
+  SIGNAL rx_hdr_dat_mosi     : t_mem_mosi := c_mem_mosi_rst;
+  SIGNAL rx_hdr_dat_miso     : t_mem_miso;
+
+  SIGNAL rx_hdr_fields_out   : STD_LOGIC_VECTOR(1023 DOWNTO 0);
+  SIGNAL rx_hdr_fields_raw   : STD_LOGIC_VECTOR(1023 DOWNTO 0) := (OTHERS => '0');
+
+  -- Beamlets packets header
+  SIGNAL rx_sdp_cep_header   : t_sdp_cep_header;
+  SIGNAL exp_sdp_cep_header  : t_sdp_cep_header;
+  SIGNAL exp_dp_bsn          : NATURAL;
+
+  -- Beamlets packets data
+  SIGNAL rx_beamlet_data     : STD_LOGIC_VECTOR(c_longword_w-1 DOWNTO 0);  -- 64 bit
+  SIGNAL rx_beamlet_sosi     : t_dp_sosi := c_dp_sosi_rst;
+  SIGNAL rx_beamlet_sop_cnt  : NATURAL := 0;
+  SIGNAL rx_beamlet_eop_cnt  : NATURAL := 0;
 
-  SIGNAL test_offload_en         : STD_LOGIC := '0';
-  SIGNAL test_offload_data       : STD_LOGIC_VECTOR(c_longword_w-1 DOWNTO 0);  -- 64 bit
-  SIGNAL test_offload_sosi       : t_dp_sosi := c_dp_sosi_rst;
-  SIGNAL test_offload_sop_cnt    : NATURAL := 0;
-  SIGNAL test_offload_eop_cnt    : NATURAL := 0;
+  SIGNAL rx_beamlet_arr_re   : t_slv_8_arr(c_sdp_cep_nof_blocks_per_packet-1 DOWNTO 0);   -- [3:0]
+  SIGNAL rx_beamlet_arr_im   : t_slv_8_arr(c_sdp_cep_nof_blocks_per_packet-1 DOWNTO 0);   -- [3:0]
+  SIGNAL rx_beamlet_cnt      : NATURAL;
+  SIGNAL rx_beamlet_valid    : STD_LOGIC;
 
-  SIGNAL rx_hdr_fields_out       : STD_LOGIC_VECTOR(1023 DOWNTO 0);
-  SIGNAL rx_hdr_fields_raw       : STD_LOGIC_VECTOR(1023 DOWNTO 0) := (OTHERS => '0');
-  SIGNAL rx_sdp_cep_header       : t_sdp_cep_header;
-  SIGNAL exp_sdp_cep_header      : t_sdp_cep_header;
-  SIGNAL exp_dp_bsn              : NATURAL;
+  SIGNAL rx_beamlet_list_re  : t_slv_8_arr(c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf-1 DOWNTO 0);  -- [488 * 2-1:0] = [975:0]
+  SIGNAL rx_beamlet_list_im  : t_slv_8_arr(c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf-1 DOWNTO 0);  -- [488 * 2-1:0] = [975:0]
 
   -- DUT
   SIGNAL ext_clk             : STD_LOGIC := '0';
@@ -416,6 +554,9 @@ BEGIN
     JESD204B_SYNC_N => jesd204b_sync_n
   );
 
+  ------------------------------------------------------------------------------
+  -- CEP model
+  ------------------------------------------------------------------------------
   u_unb2_board_clk644_pll : ENTITY tech_pll_lib.tech_pll_xgmii_mac_clocks
   PORT MAP (
     refclk_644 => SA_CLK,
@@ -471,13 +612,13 @@ BEGIN
     dp_rst                => dest_rst,
     dp_clk                => ext_clk,
 
-    reg_hdr_dat_mosi      => offload_rx_hdr_dat_mosi,
-    reg_hdr_dat_miso      => offload_rx_hdr_dat_miso,
+    reg_hdr_dat_mosi      => rx_hdr_dat_mosi,
+    reg_hdr_dat_miso      => rx_hdr_dat_miso,
 
     snk_in_arr(0)         => tr_10GbE_src_out,
     snk_out_arr(0)        => tr_10GbE_src_in,
 
-    src_out_arr(0)        => test_offload_sosi,
+    src_out_arr(0)        => rx_beamlet_sosi,
 
     hdr_fields_out_arr(0) => rx_hdr_fields_out,
     hdr_fields_raw_arr(0) => rx_hdr_fields_raw
@@ -489,19 +630,52 @@ BEGIN
   tb_clk  <= NOT tb_clk AFTER c_tb_clk_period/2;    -- Testbench MM clock
   
   p_mm_stimuli : PROCESS
-    VARIABLE v_bsn                   : NATURAL;
-    VARIABLE v_sp_subband_sst                       : REAL := 0.0;
-    VARIABLE v_pol_beamlet_bst                      : REAL := 0.0;
+    VARIABLE v_bsn                                  : NATURAL;
+    VARIABLE v_sp_sst                               : REAL := 0.0;
+    VARIABLE v_bst                                  : REAL := 0.0;
     VARIABLE v_data_lo, v_data_hi                   : STD_LOGIC_VECTOR(c_word_w-1 DOWNTO 0);
     VARIABLE v_stat_data                            : STD_LOGIC_VECTOR(c_longword_w-1 DOWNTO 0);
     VARIABLE v_len, v_span, v_offset, v_addr, v_sel : NATURAL;  -- address ranges, indices
-    VARIABLE v_W, v_P, v_S, v_A, v_B, v_G           : NATURAL;  -- array indicies
+    VARIABLE v_W, v_P, v_PB, v_S, v_A, v_B, v_G     : NATURAL;  -- array indicies
     VARIABLE v_re, v_im, v_weight                   : INTEGER;
     VARIABLE v_re_exp, v_im_exp                     : REAL := 0.0;
   BEGIN
     -- Wait for DUT power up after reset
     WAIT FOR 1 us;
-    
+
+    print_str("");
+    print_str("WG:");
+    print_str(". c_wg_ampl                            = " & int_to_str(c_wg_ampl));
+    print_str(". c_exp_sp_power                       = " & real_to_str(c_exp_sp_power, 20, 1));
+    print_str(". c_exp_sp_ast                         = " & real_to_str(c_exp_sp_ast, 20, 1));
+
+    print_str("");
+    print_str("Subband weight:");
+    print_str(". sp_subband_weight_gain               = " & real_to_str(sp_subband_weight_gain, 20, 6));
+    print_str(". sp_subband_weight_phase              = " & real_to_str(sp_subband_weight_phase, 20, 6));
+
+    print_str("");
+    print_str("SST results:");
+    print_str(". sst_weighted_subbands_flag           = " & sl_to_str(sst_weighted_subbands_flag));
+    print_str("");
+    print_str(". c_exp_subband_ampl                   = " & int_to_str(NATURAL(c_exp_subband_ampl)));
+    print_str(". c_exp_subband_power                  = " & real_to_str(c_exp_subband_power, 20, 1));
+    print_str(". c_exp_subband_sst                    = " & real_to_str(c_exp_subband_sst, 20, 1));
+    print_str("");
+    print_str(". sp_sst                               = " & real_to_str(sp_sst, 20, 1));
+    print_str(". sp_sst / c_exp_subband_sst           = " & real_to_str(sp_sst / c_exp_subband_sst, 20, 6));
+
+    print_str("");
+    print_str("BST results:");
+    print_str(". c_exp_beamlet_x_ampl                   = " & int_to_str(NATURAL(c_exp_beamlet_x_ampl)));
+    print_str(". c_exp_beamlet_x_power                  = " & real_to_str(c_exp_beamlet_x_power, 20, 1));
+    print_str(". c_exp_beamlet_x_bst                    = " & real_to_str(c_exp_beamlet_x_bst, 20, 1));
+    print_str("");
+    print_str(". c_exp_beamlet_y_ampl                   = " & int_to_str(NATURAL(c_exp_beamlet_y_ampl)));
+    print_str(". c_exp_beamlet_y_power                  = " & real_to_str(c_exp_beamlet_y_power, 20, 1));
+    print_str(". c_exp_beamlet_y_bst                    = " & real_to_str(c_exp_beamlet_y_bst, 20, 1));
+    print_str("");
+
     ----------------------------------------------------------------------------
     -- Set and check SDP info
     ----------------------------------------------------------------------------
@@ -635,12 +809,21 @@ BEGIN
     --   1 : phase[15:0]
     --   2 : freq[30:0]
     --   3 : ampl[16:0]
-    -- . Put wanted signal on g_sp input
-    v_offset := g_sp * c_mm_span_reg_diag_wg;
-    mmf_mm_bus_wr(c_mm_file_reg_diag_wg, v_offset + 0, 1024*2**16 + 1, tb_clk);  -- nof_samples, mode calc
-    mmf_mm_bus_wr(c_mm_file_reg_diag_wg, v_offset + 1, INTEGER(c_wg_phase * c_diag_wg_phase_unit), tb_clk);  -- phase offset in degrees
-    mmf_mm_bus_wr(c_mm_file_reg_diag_wg, v_offset + 2, INTEGER(REAL(g_subband) * c_wg_subband_freq_unit), tb_clk);  -- freq
-    mmf_mm_bus_wr(c_mm_file_reg_diag_wg, v_offset + 3, INTEGER(REAL(c_wg_ampl) * c_wg_ampl_lsb), tb_clk);  -- ampl
+    -- . Put wanted signal on g_sp input and remnant signal on the other inputs
+    FOR S IN 0 TO c_sdp_S_pn-1 LOOP
+      v_offset := S * c_mm_span_reg_diag_wg;
+      IF s = g_sp THEN
+        mmf_mm_bus_wr(c_mm_file_reg_diag_wg, v_offset + 0, 1024*2**16 + 1, tb_clk);  -- nof_samples, mode calc
+        mmf_mm_bus_wr(c_mm_file_reg_diag_wg, v_offset + 1, INTEGER(c_wg_phase * c_diag_wg_phase_unit), tb_clk);  -- phase offset in degrees
+        mmf_mm_bus_wr(c_mm_file_reg_diag_wg, v_offset + 2, INTEGER(REAL(g_subband) * c_wg_subband_freq_unit), tb_clk);  -- freq
+        mmf_mm_bus_wr(c_mm_file_reg_diag_wg, v_offset + 3, INTEGER(REAL(c_wg_ampl) * c_wg_ampl_lsb), tb_clk);  -- ampl
+      ELSE
+        mmf_mm_bus_wr(c_mm_file_reg_diag_wg, v_offset + 0, 1024*2**16 + 1, tb_clk);  -- nof_samples, mode calc
+        mmf_mm_bus_wr(c_mm_file_reg_diag_wg, v_offset + 1, INTEGER(c_wg_remnant_phase * c_diag_wg_phase_unit), tb_clk);  -- phase offset in degrees
+        mmf_mm_bus_wr(c_mm_file_reg_diag_wg, v_offset + 2, INTEGER(REAL(g_subband) * c_wg_subband_freq_unit), tb_clk);  -- freq
+        mmf_mm_bus_wr(c_mm_file_reg_diag_wg, v_offset + 3, INTEGER(REAL(c_wg_remnant_ampl) * c_wg_ampl_lsb), tb_clk);  -- ampl
+      END IF;
+    END LOOP;
 
     -- Read current BSN
     mmf_mm_bus_rd(c_mm_file_reg_bsn_scheduler_wg, 0, current_bsn_wg(31 DOWNTO  0), tb_clk);
@@ -658,7 +841,7 @@ BEGIN
     -- Read weighted subband selector
     ----------------------------------------------------------------------------
     mmf_mm_bus_rd(c_mm_file_reg_dp_selector, 0, rd_data, tb_clk);
-    sst_offload_weighted_subbands <= NOT rd_data(0);
+    sst_weighted_subbands_flag <= NOT rd_data(0);
     proc_common_wait_some_cycles(tb_clk, 1);
 
     ----------------------------------------------------------------------------
@@ -673,8 +856,8 @@ BEGIN
     v_im := unpack_complex_im(rd_data, c_sdp_W_sub_weight);
     sp_subband_weight_re <= v_re;
     sp_subband_weight_im <= v_im;
-    sp_subband_weight_gain <= SQRT(REAL(v_re)**2.0 + REAL(v_im)**2.0) / REAL(c_sdp_unit_sub_weight);
-    sp_subband_weight_phase <= atan2(Y => REAL(v_im), X => REAL(v_re)) * 360.0 / MATH_2_PI;
+    sp_subband_weight_gain <= COMPLEX_RADIUS(v_re, v_im) / REAL(c_sdp_unit_sub_weight);
+    sp_subband_weight_phase <= COMPLEX_PHASE(v_re, v_im);
 
     -- No need to write subband weight, because default it is unit weight
 
@@ -716,7 +899,7 @@ BEGIN
     proc_common_wait_some_cycles(ext_clk, 100);  -- delay for ease of view in Wave window
 
     ----------------------------------------------------------------------------
-    -- Write beamlet weight for g_beamlet
+    -- Write beamlet weight for g_beamlet in S_sub_bf
     ----------------------------------------------------------------------------
     -- . MM format: (cint16)RAM_BF_WEIGHTS[N_beamsets][N_pol_bf][A_pn]_[N_pol][S_sub_bf]
 
@@ -724,38 +907,65 @@ BEGIN
     v_span := true_log_pow2(c_sdp_N_pol * c_sdp_S_sub_bf);  -- = 1024
     FOR U IN 0 TO c_sdp_N_beamsets-1 LOOP
       -- Same BF weights for both beamsets
-      FOR A IN 0 TO c_sdp_A_pn-1 LOOP
-        FOR P IN 0 TO c_sdp_N_pol-1 LOOP
-          v_S := A * c_sdp_N_pol + P;
-          IF v_S = g_sp THEN
-            -- use generic BF weight for g_sp in g_beamlet
-            v_weight := pack_complex(re => c_bf_weight_re, im => c_bf_weight_im, w => c_sdp_W_bf_weight);
-          ELSE
-            -- default set all weights to zero
-            v_weight := 0;
-          END IF;
-          v_addr := g_beamlet + A * v_span + U * c_mm_span_ram_bf_weights;
-          v_addr := v_addr + P * c_sdp_S_sub_bf;
-          mmf_mm_bus_wr(c_mm_file_ram_bf_weights, v_addr, v_weight, tb_clk);
+      FOR PB IN 0 TO c_sdp_N_pol_bf-1 LOOP
+        -- Same BF weights for both beamlet polarizations
+        FOR A IN 0 TO c_sdp_A_pn-1 LOOP
+          FOR P IN 0 TO c_sdp_N_pol-1 LOOP
+            v_S := A * c_sdp_N_pol + P;
+            IF v_S = g_sp THEN
+              -- use generic BF weight for g_sp in g_beamlet
+              IF PB = 0 THEN
+                v_weight := pack_complex(re => c_bf_x_weight_re, im => c_bf_x_weight_im, w => c_sdp_W_bf_weight);
+              ELSE
+                v_weight := pack_complex(re => c_bf_y_weight_re, im => c_bf_y_weight_im, w => c_sdp_W_bf_weight);
+              END IF;
+            ELSE
+              -- use the remnant BF weights for the other SP
+              IF PB = 0 THEN
+                v_weight := pack_complex(re => c_bf_remnant_x_weight_re, im => c_bf_remnant_x_weight_im, w => c_sdp_W_bf_weight);
+              ELSE
+                v_weight := pack_complex(re => c_bf_remnant_y_weight_re, im => c_bf_remnant_y_weight_im, w => c_sdp_W_bf_weight);
+              END IF;
+            END IF;
+            v_addr := g_beamlet;                              -- beamlet index
+            v_addr := v_addr + P * c_sdp_S_sub_bf;            -- antenna input polarization address offset
+            v_addr := v_addr + A * v_span;                    -- antenna input address offset
+            v_addr := v_addr + PB * c_sdp_A_pn * v_span;      -- beamlet polarization address offset
+            v_addr := v_addr + U * c_mm_span_ram_bf_weights;  -- beamset address offset
+            mmf_mm_bus_wr(c_mm_file_ram_bf_weights, v_addr, v_weight, tb_clk);
+          END LOOP;
         END LOOP;
       END LOOP;
     END LOOP;
 
-    -- . read back BF weights for g_beamlet
+    -- . read back BF weights for g_beamlet in S_sub_bf
     FOR U IN 0 TO c_sdp_N_beamsets-1 LOOP
-      FOR A IN 0 TO c_sdp_A_pn-1 LOOP
-        FOR P IN 0 TO c_sdp_N_pol-1 LOOP
-          v_addr := g_beamlet + A * v_span + U * c_mm_span_ram_bf_weights;
-          v_addr := v_addr + P * c_sdp_S_sub_bf;
-          mmf_mm_bus_rd(c_mm_file_ram_bf_weights, v_addr, rd_data, tb_clk);
-          v_re := unpack_complex_re(rd_data, c_sdp_W_bf_weight);
-          v_im := unpack_complex_im(rd_data, c_sdp_W_bf_weight);
-          -- same BF weights for both beamsets, so fine to use only one sp_bf_weights_*_arr()
-          v_S := A * c_sdp_N_pol + P;
-          sp_bf_weights_re_arr(v_S) <= v_re;
-          sp_bf_weights_im_arr(v_S) <= v_im;
-          sp_bf_weights_gain_arr(v_S) <= SQRT(REAL(v_re)**2.0 + REAL(v_im)**2.0) / REAL(c_sdp_unit_bf_weight);
-          sp_bf_weights_phase_arr(v_S) <= atan2(Y => REAL(v_im), X => REAL(v_re)) * 360.0 / MATH_2_PI;
+      FOR PB IN 0 TO c_sdp_N_pol_bf-1 LOOP
+        FOR A IN 0 TO c_sdp_A_pn-1 LOOP
+          FOR P IN 0 TO c_sdp_N_pol-1 LOOP
+            v_addr := g_beamlet;                              -- beamlet index
+            v_addr := v_addr + P * c_sdp_S_sub_bf;            -- antenna input polarization address offset
+            v_addr := v_addr + A * v_span;                    -- antenna input address offset
+            v_addr := v_addr + PB * c_sdp_A_pn * v_span;      -- beamlet polarization address offset
+            v_addr := v_addr + U * c_mm_span_ram_bf_weights;  -- beamset address offset
+            mmf_mm_bus_rd(c_mm_file_ram_bf_weights, v_addr, rd_data, tb_clk);
+            v_re := unpack_complex_re(rd_data, c_sdp_W_bf_weight);
+            v_im := unpack_complex_im(rd_data, c_sdp_W_bf_weight);
+            -- same BF weights for both beamsets and both beamlet polarizations,
+            -- so fine to use only one sp_bf_x_weights_*_arr()
+            v_S := A * c_sdp_N_pol + P;
+            IF PB = 0 THEN
+              sp_bf_x_weights_re_arr(v_S) <= v_re;
+              sp_bf_x_weights_im_arr(v_S) <= v_im;
+              sp_bf_x_weights_gain_arr(v_S) <= COMPLEX_RADIUS(v_re, v_im) / REAL(c_sdp_unit_bf_weight);
+              sp_bf_x_weights_phase_arr(v_S) <= COMPLEX_PHASE(v_re, v_im);
+            ELSE
+              sp_bf_y_weights_re_arr(v_S) <= v_re;
+              sp_bf_y_weights_im_arr(v_S) <= v_im;
+              sp_bf_y_weights_gain_arr(v_S) <= COMPLEX_RADIUS(v_re, v_im) / REAL(c_sdp_unit_bf_weight);
+              sp_bf_y_weights_phase_arr(v_S) <= COMPLEX_PHASE(v_re, v_im);
+            END IF;
+          END LOOP;
         END LOOP;
       END LOOP;
     END LOOP;
@@ -804,7 +1014,7 @@ BEGIN
           v_data_hi := rd_data;
           v_stat_data := v_data_hi & v_data_lo;
 
-          sp_subband_ssts_arr2(v_P)(v_B) <= v_stat_data;
+          sp_ssts_arr2(v_P)(v_B) <= v_stat_data;
           stat_data <= v_stat_data;  -- for time series view in Wave window
         END IF;
       END IF;
@@ -812,10 +1022,7 @@ BEGIN
     proc_common_wait_some_cycles(tb_clk, 1);
 
     -- Subband power of g_subband in g_sp
-    -- . For the selected g_subband in g_sp the sp_subband_sst will be close
-    --   to sp_subband_sst_sum_arr(c_pol_index), because the input is a
-    --   sinus, so most power will be in 1 subband.
-    sp_subband_sst <= TO_UREAL(sp_subband_ssts_arr2(c_pol_index)(g_subband));
+    sp_sst <= TO_UREAL(sp_ssts_arr2(c_pol_index)(g_subband));
     proc_common_wait_some_cycles(tb_clk, 1);
     proc_common_wait_some_cycles(ext_clk, 100);  -- delay for ease of view in Wave window
  
@@ -831,8 +1038,8 @@ BEGIN
     v_span := true_log_pow2(v_len);                             -- = 2048
     FOR U IN 0 TO c_sdp_N_beamsets-1 LOOP
       FOR I IN 0 TO v_len-1 LOOP
-        v_W := I MOD c_stat_data_sz;                     -- 0, 1 per statistics word, word index
-        v_P := (I / c_stat_data_sz) MOD c_sdp_N_pol_bf;  -- 0, 1 per BF pol, polarization index
+        v_W := I MOD c_stat_data_sz;                      -- 0, 1 per statistics word, word index
+        v_PB := (I / c_stat_data_sz) MOD c_sdp_N_pol_bf;  -- 0, 1 per BF pol, polarization index
         v_B := I / (c_sdp_N_pol_bf * c_stat_data_sz);    -- beamlet index in beamset, range(S_sub_bf = 488) per dual pol
         v_G := v_B + U * c_sdp_S_sub_bf;                 -- global beamlet index, range(c_sdp_N_beamlets_sdp)
         v_addr := I + U * v_span;                        -- MM address
@@ -848,7 +1055,7 @@ BEGIN
             v_data_hi := rd_data;
             v_stat_data := v_data_hi & v_data_lo;
 
-            pol_beamlet_bsts_arr2(v_P)(v_G) <= v_stat_data;
+            bsts_arr2(v_PB)(v_G) <= v_stat_data;
             stat_data <= v_stat_data;  -- for time series view in Wave window
           END IF;
         END IF;
@@ -859,8 +1066,8 @@ BEGIN
     -- Beamlet power of g_beamlet X and Y, same for both beamsets
     FOR U IN 0 TO c_sdp_N_beamsets-1 LOOP
       v_G := g_beamlet + U * c_sdp_S_sub_bf;  -- global beamlet index, range(c_sdp_N_beamlets_sdp)
-      pol_beamlet_bst_X_arr(U) <= TO_UREAL(pol_beamlet_bsts_arr2(0)(v_G));  -- X pol beamlet
-      pol_beamlet_bst_Y_arr(U) <= TO_UREAL(pol_beamlet_bsts_arr2(1)(v_G));  -- Y pol beamlet
+      bst_x_arr(U) <= TO_UREAL(bsts_arr2(0)(v_G));  -- X pol beamlet
+      bst_y_arr(U) <= TO_UREAL(bsts_arr2(1)(v_G));  -- Y pol beamlet
     END LOOP;
     proc_common_wait_some_cycles(tb_clk, 1);
     proc_common_wait_some_cycles(ext_clk, 100);  -- delay for ease of view in Wave window
@@ -875,10 +1082,6 @@ BEGIN
     print_str(". c_exp_sp_power                       = " & real_to_str(c_exp_sp_power, 20, 1));
     print_str(". c_exp_sp_ast                         = " & real_to_str(c_exp_sp_ast, 20, 1));
   
-    print_str("");
-    print_str("Subband selector:");
-    print_str(". sst_offload_weighted_subbands        = " & sl_to_str(sst_offload_weighted_subbands));
-
     print_str("");
     print_str("Subband weight:");
     print_str(". sp_subband_weight_gain               = " & real_to_str(sp_subband_weight_gain, 20, 6));
@@ -886,69 +1089,88 @@ BEGIN
 
     print_str("");
     print_str("SST results:");
+    print_str(". sst_weighted_subbands_flag           = " & sl_to_str(sst_weighted_subbands_flag));
+    print_str("");
     print_str(". c_exp_subband_ampl                   = " & int_to_str(NATURAL(c_exp_subband_ampl)));
     print_str(". c_exp_subband_power                  = " & real_to_str(c_exp_subband_power, 20, 1));
     print_str(". c_exp_subband_sst                    = " & real_to_str(c_exp_subband_sst, 20, 1));
     print_str("");
-    print_str(". sp_subband_sst                       = " & real_to_str(sp_subband_sst, 20, 1));
-    print_str(". sp_subband_sst / c_exp_subband_sst   = " & real_to_str(sp_subband_sst / c_exp_subband_sst, 20, 6));
+    print_str(". sp_sst                               = " & real_to_str(sp_sst, 20, 1));
+    print_str(". sp_sst / c_exp_subband_sst           = " & real_to_str(sp_sst / c_exp_subband_sst, 20, 6));
 
     print_str("");
     print_str("BST results:");
-    print_str(". c_exp_beamlet_ampl                   = " & int_to_str(NATURAL(c_exp_beamlet_ampl)));
-    print_str(". c_exp_beamlet_power                  = " & real_to_str(c_exp_beamlet_power, 20, 1));
-    print_str(". c_exp_beamlet_bst                    = " & real_to_str(c_exp_beamlet_bst, 20, 1));
+    print_str(". c_exp_beamlet_x_ampl                   = " & int_to_str(NATURAL(c_exp_beamlet_x_ampl)));
+    print_str(". c_exp_beamlet_x_power                  = " & real_to_str(c_exp_beamlet_x_power, 20, 1));
+    print_str(". c_exp_beamlet_x_bst                    = " & real_to_str(c_exp_beamlet_x_bst, 20, 1));
+    print_str("");
+    print_str(". c_exp_beamlet_y_ampl                   = " & int_to_str(NATURAL(c_exp_beamlet_y_ampl)));
+    print_str(". c_exp_beamlet_y_power                  = " & real_to_str(c_exp_beamlet_y_power, 20, 1));
+    print_str(". c_exp_beamlet_y_bst                    = " & real_to_str(c_exp_beamlet_y_bst, 20, 1));
     print_str("");
     FOR U IN 0 TO c_sdp_N_beamsets-1 LOOP
       v_G := g_beamlet + U * c_sdp_S_sub_bf;  -- global beamlet index, range(c_sdp_N_beamlets_sdp)
-      print_str(". pol_beamlet_bst_X beamlet(" & INTEGER'IMAGE(v_G) & ") = " & real_to_str(pol_beamlet_bst_X_arr(U), 20, 1));
-      print_str(". pol_beamlet_bst_Y beamlet(" & INTEGER'IMAGE(v_G) & ") = " & real_to_str(pol_beamlet_bst_Y_arr(U), 20, 1));
+      print_str(". bst_x_arr(" & INTEGER'IMAGE(v_G) & ") = " & real_to_str(bst_x_arr(U), 20, 1));
+      print_str(". bst_y_arr(" & INTEGER'IMAGE(v_G) & ") = " & real_to_str(bst_y_arr(U), 20, 1));
     END LOOP;
     FOR U IN 0 TO c_sdp_N_beamsets-1 LOOP
       v_G := g_beamlet + U * c_sdp_S_sub_bf;  -- global beamlet index, range(c_sdp_N_beamlets_sdp)
-      print_str(". pol_beamlet_bst_X beamlet(" & INTEGER'IMAGE(v_G) & ") / c_exp_beamlet_bst = " & real_to_str(pol_beamlet_bst_X_arr(U) / c_exp_beamlet_bst, 20, 6));
-      print_str(". pol_beamlet_bst_Y beamlet(" & INTEGER'IMAGE(v_G) & ") / c_exp_beamlet_bst = " & real_to_str(pol_beamlet_bst_Y_arr(U) / c_exp_beamlet_bst, 20, 6));
+      print_str(". bst_x_arr(" & INTEGER'IMAGE(v_G) & ") / c_exp_beamlet_x_bst = " & real_to_str(bst_x_arr(U) / c_exp_beamlet_x_bst, 20, 6));
+      print_str(". bst_y_arr(" & INTEGER'IMAGE(v_G) & ") / c_exp_beamlet_y_bst = " & real_to_str(bst_y_arr(U) / c_exp_beamlet_y_bst, 20, 6));
     END LOOP;
 
     print_str("");
     print_str("Beamlet output:");
     print_str(". rd_beamlet_scale                     = " & int_to_str(TO_UINT(rd_beamlet_scale)));
-    print_str(". c_exp_beamlet_output_ampl            = " & int_to_str(NATURAL(c_exp_beamlet_output_ampl)));
-    
+    print_str(". c_exp_beamlet_scale                  = " & int_to_str(c_exp_beamlet_scale));
+    print_str("");
+    print_str(". c_exp_beamlet_x_output_ampl          = " & int_to_str(NATURAL(c_exp_beamlet_x_output_ampl)));
+    print_str(". c_exp_beamlet_x_output_phase         = " & int_to_str(INTEGER(c_exp_beamlet_x_output_phase)));
+    print_str(". c_exp_beamlet_x_output_re            = " & int_to_str(INTEGER(c_exp_beamlet_x_output_re)));
+    print_str(". c_exp_beamlet_x_output_im            = " & int_to_str(INTEGER(c_exp_beamlet_x_output_im)));
+    print_str("");
+    print_str(". c_exp_beamlet_y_output_ampl          = " & int_to_str(NATURAL(c_exp_beamlet_y_output_ampl)));
+    print_str(". c_exp_beamlet_y_output_phase         = " & int_to_str(INTEGER(c_exp_beamlet_y_output_phase)));
+    print_str(". c_exp_beamlet_y_output_re            = " & int_to_str(INTEGER(c_exp_beamlet_y_output_re)));
+    print_str(". c_exp_beamlet_y_output_im            = " & int_to_str(INTEGER(c_exp_beamlet_y_output_im)));
+
     ---------------------------------------------------------------------------
-    -- Verify SST and BST
+    -- Verify SST
     ---------------------------------------------------------------------------
-    
     -- verify expected subband power based on WG power
-    ASSERT sp_subband_sst > c_stat_lo_factor * c_exp_subband_sst REPORT "Wrong subband power for SP " & NATURAL'IMAGE(g_sp) SEVERITY ERROR;
-    ASSERT sp_subband_sst < c_stat_hi_factor * c_exp_subband_sst REPORT "Wrong subband power for SP " & NATURAL'IMAGE(g_sp) SEVERITY ERROR;
+    ASSERT sp_sst > c_stat_lo_factor * c_exp_subband_sst REPORT "Wrong subband power for SP " & NATURAL'IMAGE(g_sp) SEVERITY ERROR;
+    ASSERT sp_sst < c_stat_hi_factor * c_exp_subband_sst REPORT "Wrong subband power for SP " & NATURAL'IMAGE(g_sp) SEVERITY ERROR;
 
+    ---------------------------------------------------------------------------
+    -- Verify BST
+    ---------------------------------------------------------------------------
     -- verify expected beamlet power based on WG power and BF weigths
-    --
-    -- All co and cross polarization weights are equal: w = w_xx = w_xy = w_yx = w_yy
-    -- With one g_sp either SP X or SP Y is used, so the other antenna polarization is 0
-    -- Hence the c_exp_beamlet_bst will be the same for beamlet X and Y independent of whether g_sp is an X or Y signal input:
-    --   g_sp = X --> w_xx --> beamlet X = c_exp_beamlet_bst
-    --   g_sp = Y --> w_xy --> beamlet X = c_exp_beamlet_bst
-    --   g_sp = X --> w_yx --> beamlet Y = c_exp_beamlet_bst
-    --   g_sp = Y --> w_yy --> beamlet Y = c_exp_beamlet_bst
-    --
     FOR U IN 0 TO c_sdp_N_beamsets-1 LOOP
-      ASSERT pol_beamlet_bst_X_arr(U) > c_stat_lo_factor * c_exp_beamlet_bst REPORT "Wrong beamlet power for X in beamset " & NATURAL'IMAGE(U) SEVERITY ERROR;
-      ASSERT pol_beamlet_bst_X_arr(U) < c_stat_hi_factor * c_exp_beamlet_bst REPORT "Wrong beamlet power for X in beamset " & NATURAL'IMAGE(U) SEVERITY ERROR;
-      ASSERT pol_beamlet_bst_Y_arr(U) > c_stat_lo_factor * c_exp_beamlet_bst REPORT "Wrong beamlet power for Y in beamset " & NATURAL'IMAGE(U) SEVERITY ERROR;
-      ASSERT pol_beamlet_bst_Y_arr(U) < c_stat_hi_factor * c_exp_beamlet_bst REPORT "Wrong beamlet power for Y in beamset " & NATURAL'IMAGE(U) SEVERITY ERROR;
+      -- X-pol
+      ASSERT bst_x_arr(U) < c_stat_hi_factor * c_exp_beamlet_x_bst REPORT "Wrong beamlet X power in beamset " & NATURAL'IMAGE(U) SEVERITY ERROR;
+      ASSERT bst_x_arr(U) > c_stat_lo_factor * c_exp_beamlet_x_bst REPORT "Wrong beamlet X power in beamset " & NATURAL'IMAGE(U) SEVERITY ERROR;
+      -- Y-pol
+      ASSERT bst_y_arr(U) > c_stat_lo_factor * c_exp_beamlet_y_bst REPORT "Wrong beamlet Y power in beamset " & NATURAL'IMAGE(U) SEVERITY ERROR;
+      ASSERT bst_y_arr(U) < c_stat_hi_factor * c_exp_beamlet_y_bst REPORT "Wrong beamlet Y power in beamset " & NATURAL'IMAGE(U) SEVERITY ERROR;
     END LOOP;
 
     ---------------------------------------------------------------------------
     -- Verify beamlet output in 10GbE UDP offload
     ---------------------------------------------------------------------------
-    v_re := TO_SINT(rx_beamlet_list_re(c_exp_beamlet_index)); v_re_exp := c_exp_beamlet_output_re;
-    v_im := TO_SINT(rx_beamlet_list_im(c_exp_beamlet_index)); v_im_exp := c_exp_beamlet_output_im;
-    ASSERT v_re > INTEGER(v_re_exp) - c_beamlet_output_delta REPORT "Wrong 10GbE output (re) " & INTEGER'IMAGE(v_re) & " != " & REAL'IMAGE(v_re_exp) SEVERITY ERROR;
-    ASSERT v_re < INTEGER(v_re_exp) + c_beamlet_output_delta REPORT "Wrong 10GbE output (re) " & INTEGER'IMAGE(v_re) & " != " & REAL'IMAGE(v_re_exp) SEVERITY ERROR;
-    ASSERT v_im > INTEGER(v_im_exp) - c_beamlet_output_delta REPORT "Wrong 10GbE output (im) " & INTEGER'IMAGE(v_im) & " != " & REAL'IMAGE(v_im_exp) SEVERITY ERROR;
-    ASSERT v_im < INTEGER(v_im_exp) + c_beamlet_output_delta REPORT "Wrong 10GbE output (im) " & INTEGER'IMAGE(v_im) & " != " & REAL'IMAGE(v_im_exp) SEVERITY ERROR;
+    -- X-pol
+    v_re := TO_SINT(rx_beamlet_list_re(c_exp_beamlet_x_index)); v_re_exp := c_exp_beamlet_x_output_re;
+    v_im := TO_SINT(rx_beamlet_list_im(c_exp_beamlet_x_index)); v_im_exp := c_exp_beamlet_x_output_im;
+    ASSERT v_re > INTEGER(v_re_exp) - c_beamlet_output_delta REPORT "Wrong beamlet X output (re) " & INTEGER'IMAGE(v_re) & " != " & REAL'IMAGE(v_re_exp) SEVERITY ERROR;
+    ASSERT v_re < INTEGER(v_re_exp) + c_beamlet_output_delta REPORT "Wrong beamlet X output (re) " & INTEGER'IMAGE(v_re) & " != " & REAL'IMAGE(v_re_exp) SEVERITY ERROR;
+    ASSERT v_im > INTEGER(v_im_exp) - c_beamlet_output_delta REPORT "Wrong beamlet X output (im) " & INTEGER'IMAGE(v_im) & " != " & REAL'IMAGE(v_im_exp) SEVERITY ERROR;
+    ASSERT v_im < INTEGER(v_im_exp) + c_beamlet_output_delta REPORT "Wrong beamlet X output (im) " & INTEGER'IMAGE(v_im) & " != " & REAL'IMAGE(v_im_exp) SEVERITY ERROR;
+    -- Y-pol
+    v_re := TO_SINT(rx_beamlet_list_re(c_exp_beamlet_y_index)); v_re_exp := c_exp_beamlet_y_output_re;
+    v_im := TO_SINT(rx_beamlet_list_im(c_exp_beamlet_y_index)); v_im_exp := c_exp_beamlet_y_output_im;
+    ASSERT v_re > INTEGER(v_re_exp) - c_beamlet_output_delta REPORT "Wrong beamlet Y output (re) " & INTEGER'IMAGE(v_re) & " != " & REAL'IMAGE(v_re_exp) SEVERITY ERROR;
+    ASSERT v_re < INTEGER(v_re_exp) + c_beamlet_output_delta REPORT "Wrong beamlet Y output (re) " & INTEGER'IMAGE(v_re) & " != " & REAL'IMAGE(v_re_exp) SEVERITY ERROR;
+    ASSERT v_im > INTEGER(v_im_exp) - c_beamlet_output_delta REPORT "Wrong beamlet Y output (im) " & INTEGER'IMAGE(v_im) & " != " & REAL'IMAGE(v_im_exp) SEVERITY ERROR;
+    ASSERT v_im < INTEGER(v_im_exp) + c_beamlet_output_delta REPORT "Wrong beamlet Y output (im) " & INTEGER'IMAGE(v_im) & " != " & REAL'IMAGE(v_im_exp) SEVERITY ERROR;
 
     ---------------------------------------------------------------------------
     -- End Simulation 
@@ -967,22 +1189,22 @@ BEGIN
   p_test_counters : PROCESS(ext_clk)
   BEGIN
     IF rising_edge(ext_clk) THEN
-      -- Count test_offload_sosi packets
-      IF test_offload_sosi.sop = '1' THEN
-        test_offload_sop_cnt <= test_offload_sop_cnt + 1;  -- early count
+      -- Count rx_beamlet_sosi packets
+      IF rx_beamlet_sosi.sop = '1' THEN
+        rx_beamlet_sop_cnt <= rx_beamlet_sop_cnt + 1;  -- early count
       END IF;
-      IF test_offload_sosi.eop = '1' THEN
-        test_offload_eop_cnt <= test_offload_eop_cnt + 1;  -- after count
+      IF rx_beamlet_sosi.eop = '1' THEN
+        rx_beamlet_eop_cnt <= rx_beamlet_eop_cnt + 1;  -- after count
       END IF;
     END IF;
   END PROCESS;
 
-  -- Count sync intervals using in_sosi.sync, because there is no test_offload_sosi.sync
+  -- Count sync intervals using in_sosi.sync, because there is no rx_beamlet_sosi.sync
   in_sync_cnt <= in_sync_cnt + 1 WHEN rising_edge(ext_clk) AND in_sync = '1';
-  test_sync_cnt <= in_sync_cnt - 1;  -- optionally adjust to fit test_offload_sosi
+  test_sync_cnt <= in_sync_cnt - 1;  -- optionally adjust to fit rx_beamlet_sosi
 
-  -- Prepare exp_sdp_cep_header before test_offload_sosi.eop, so that
-  -- p_exp_sdp_cep_header can verify it at test_offload_sosi.eop.
+  -- Prepare exp_sdp_cep_header before rx_beamlet_sosi.eop, so that
+  -- p_exp_sdp_cep_header can verify it at rx_beamlet_sosi.eop.
 
   p_exp_sdp_cep_header : PROCESS(exp_dp_bsn)
   BEGIN
@@ -1044,10 +1266,10 @@ BEGIN
     WAIT UNTIL rising_edge(ext_clk);
 
     -- Prepare exp_sdp_cep_header at sop, so that it can be verified at eop
-    IF test_offload_sosi.sop = '1' THEN
+    IF rx_beamlet_sosi.sop = '1' THEN
       -- Expected BSN increments by c_sdp_cep_nof_blocks_per_packet = 4 blocks per packet
-      IF test_offload_sop_cnt MOD c_sdp_N_beamsets = 0 THEN
-        exp_dp_bsn <= c_init_bsn + (test_offload_sop_cnt / c_sdp_N_beamsets) * c_sdp_cep_nof_blocks_per_packet;
+      IF rx_beamlet_sop_cnt MOD c_sdp_N_beamsets = 0 THEN
+        exp_dp_bsn <= c_init_bsn + (rx_beamlet_sop_cnt / c_sdp_N_beamsets) * c_sdp_cep_nof_blocks_per_packet;
       END IF;
     END IF;
 
@@ -1056,7 +1278,7 @@ BEGIN
     --   or 1, but the order in which the packets arrive is undetermined.
     --   Therefore accept any beamlet_index MOD c_sdp_S_sub_bf = 0 as correct
     --   in func_sdp_verify_cep_header().
-    IF test_offload_sosi.eop = '1' THEN
+    IF rx_beamlet_sosi.eop = '1' THEN
       v_bool := func_sdp_verify_cep_header(rx_sdp_cep_header, exp_sdp_cep_header);
     END IF;
   END PROCESS;
@@ -1072,47 +1294,47 @@ BEGIN
   -- . expect c_sdp_cep_nof_beamlets_per_block = c_sdp_S_sub_bf = 488 dual pol
   --   and complex beamlets per packet, so 2 dual pol beamlets/64b data word.
   -- . Beamlets array is stored big endian in the data, so X index 0 first in
-  --   MSByte of test_offload_sosi.data.
+  --   MSByte of rx_beamlet_sosi.data.
   p_rx_cep_beamlets : PROCESS
   BEGIN
     rx_beamlet_cnt <= 0;
     rx_beamlet_valid <= '0';
     -- Wait until start of a beamlet packet, capture only first block in packet
-    proc_common_wait_until_high(ext_clk, test_offload_sosi.sop);
-    -- 2 dual pol beamlets (= XY, XY) per 64b data word
-    FOR I IN 0 TO (c_sdp_cep_nof_blocks_per_packet * c_sdp_cep_nof_beamlets_per_block/2)-1 LOOP
-      proc_common_wait_until_high(ext_clk, test_offload_sosi.valid);
+    proc_common_wait_until_high(ext_clk, rx_beamlet_sosi.sop);
+    -- c_nof_beamlets_per_data = 2 dual pol beamlets (= XY, XY) per 64b data word
+    FOR I IN 0 TO (c_sdp_cep_nof_blocks_per_packet * c_sdp_cep_nof_beamlets_per_block / c_nof_beamlets_per_data)-1 LOOP
+      proc_common_wait_until_high(ext_clk, rx_beamlet_sosi.valid);
       rx_beamlet_valid <= '1';
       -- Capture rx beamlets per longword in rx_beamlet_arr, for time series view in Wave window
-      rx_beamlet_arr_re(0) <= test_offload_sosi.data(55 DOWNTO 48);  -- X
-      rx_beamlet_arr_im(0) <= test_offload_sosi.data(63 DOWNTO 56);
-      rx_beamlet_arr_re(1) <= test_offload_sosi.data(39 DOWNTO 32);  -- Y
-      rx_beamlet_arr_im(1) <= test_offload_sosi.data(47 DOWNTO 40);
-      rx_beamlet_arr_re(2) <= test_offload_sosi.data(23 DOWNTO 16);  -- X
-      rx_beamlet_arr_im(2) <= test_offload_sosi.data(31 DOWNTO 24);
-      rx_beamlet_arr_re(3) <= test_offload_sosi.data( 7 DOWNTO 0);   -- Y
-      rx_beamlet_arr_im(3) <= test_offload_sosi.data(15 DOWNTO 8);
-      IF I < c_sdp_cep_nof_beamlets_per_block/2 THEN
+      rx_beamlet_arr_re(0) <= rx_beamlet_sosi.data(55 DOWNTO 48);  -- X
+      rx_beamlet_arr_im(0) <= rx_beamlet_sosi.data(63 DOWNTO 56);
+      rx_beamlet_arr_re(1) <= rx_beamlet_sosi.data(39 DOWNTO 32);  -- Y
+      rx_beamlet_arr_im(1) <= rx_beamlet_sosi.data(47 DOWNTO 40);
+      rx_beamlet_arr_re(2) <= rx_beamlet_sosi.data(23 DOWNTO 16);  -- X
+      rx_beamlet_arr_im(2) <= rx_beamlet_sosi.data(31 DOWNTO 24);
+      rx_beamlet_arr_re(3) <= rx_beamlet_sosi.data( 7 DOWNTO 0);   -- Y
+      rx_beamlet_arr_im(3) <= rx_beamlet_sosi.data(15 DOWNTO 8);
+      IF I < c_sdp_cep_nof_beamlets_per_block / c_nof_beamlets_per_data THEN
         -- Only capture the first beamlets block of each packet in rx_beamlet_list
-        rx_beamlet_list_re(I*4 + 0) <= test_offload_sosi.data(55 DOWNTO 48);  -- X
-        rx_beamlet_list_im(I*4 + 0) <= test_offload_sosi.data(63 DOWNTO 56);
-        rx_beamlet_list_re(I*4 + 1) <= test_offload_sosi.data(39 DOWNTO 32);  -- Y
-        rx_beamlet_list_im(I*4 + 1) <= test_offload_sosi.data(47 DOWNTO 40);
-        rx_beamlet_list_re(I*4 + 2) <= test_offload_sosi.data(23 DOWNTO 16);  -- X
-        rx_beamlet_list_im(I*4 + 2) <= test_offload_sosi.data(31 DOWNTO 24);
-        rx_beamlet_list_re(I*4 + 3) <= test_offload_sosi.data( 7 DOWNTO 0);   -- Y
-        rx_beamlet_list_im(I*4 + 3) <= test_offload_sosi.data(15 DOWNTO 8);
+        rx_beamlet_list_re(I*4 + 0) <= rx_beamlet_sosi.data(55 DOWNTO 48);  -- X
+        rx_beamlet_list_im(I*4 + 0) <= rx_beamlet_sosi.data(63 DOWNTO 56);
+        rx_beamlet_list_re(I*4 + 1) <= rx_beamlet_sosi.data(39 DOWNTO 32);  -- Y
+        rx_beamlet_list_im(I*4 + 1) <= rx_beamlet_sosi.data(47 DOWNTO 40);
+        rx_beamlet_list_re(I*4 + 2) <= rx_beamlet_sosi.data(23 DOWNTO 16);  -- X
+        rx_beamlet_list_im(I*4 + 2) <= rx_beamlet_sosi.data(31 DOWNTO 24);
+        rx_beamlet_list_re(I*4 + 3) <= rx_beamlet_sosi.data( 7 DOWNTO 0);   -- Y
+        rx_beamlet_list_im(I*4 + 3) <= rx_beamlet_sosi.data(15 DOWNTO 8);
       END IF;
-      proc_common_wait_until_high(ext_clk, test_offload_sosi.valid);
+      proc_common_wait_until_high(ext_clk, rx_beamlet_sosi.valid);
       -- Use at least one WAIT instead of proc_common_wait_some_cycles() to
       -- avoid Modelsim warning: (vcom-1090) Possible infinite loop: Process
       -- contains no WAIT statement.
       WAIT UNTIL rising_edge(ext_clk);
       rx_beamlet_valid <= '0';
-      rx_beamlet_cnt   <= (rx_beamlet_cnt + 4) MOD c_sdp_cep_nof_beamlets_per_block;  -- 4 blocks/packet
+      rx_beamlet_cnt   <= (rx_beamlet_cnt + c_nof_beamlets_per_data) MOD c_sdp_cep_nof_beamlets_per_block;  -- 4 blocks/packet
     END LOOP;
   END PROCESS;
 
   -- To view the 64 bit 10GbE offload data more easily in the Wave window
-  test_offload_data <= test_offload_sosi.data(c_longword_w-1 DOWNTO 0);
+  rx_beamlet_data <= rx_beamlet_sosi.data(c_longword_w-1 DOWNTO 0);
 END tb;
diff --git a/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_tb_lofar2_unb2c_sdp_station_bf.vhd b/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_tb_lofar2_unb2c_sdp_station_bf.vhd
new file mode 100644
index 0000000000000000000000000000000000000000..3c601579d3607b1ee85fc730359a46660b34fdcb
--- /dev/null
+++ b/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_tb_lofar2_unb2c_sdp_station_bf.vhd
@@ -0,0 +1,65 @@
+-- --------------------------------------------------------------------------
+-- Copyright 2021
+-- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/>
+-- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+-- http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+-- --------------------------------------------------------------------------
+--
+-- Author: E. Kooistra, 10 march 2022
+-- Purpose: Regression multi tb for tb_lofar2_unb2c_sdp_station_bf
+-- Description:
+--   The multi tb only has one instance, so the tb_tb is more a wrapper to
+--   ensure that always the same tb generics are used in the regression test.
+--   This allows modifying the generics in the tb.
+-- Usage:
+-- > as 4
+-- > run -all
+
+LIBRARY IEEE;
+USE IEEE.std_logic_1164.ALL;
+
+
+ENTITY tb_tb_lofar2_unb2c_sdp_station_bf IS
+END tb_tb_lofar2_unb2c_sdp_station_bf;
+
+
+ARCHITECTURE tb OF tb_tb_lofar2_unb2c_sdp_station_bf IS
+
+  SIGNAL tb_end : STD_LOGIC := '0';  -- declare tb_end to avoid 'No objects found' error on 'when -label tb_end'
+
+BEGIN
+
+  u_bf : ENTITY work.tb_lofar2_unb2c_sdp_station_bf
+  GENERIC MAP (
+    g_sp                 => 3,             -- WG signal path (SP) index in range(S_pn = 12)
+    g_sp_ampl            => 0.5,           -- WG normalized amplitude
+    g_sp_phase           => -110.0,        -- WG phase in degrees = subband phase
+    g_sp_remnant_ampl    => 0.1,           -- WG normalized amplitude for remnant sp
+    g_sp_remnant_phase   => 15.0,          -- WG phase in degrees for remnant sp
+    g_subband            => 102,           -- select g_subband at index 102 = 102/1024 * 200MHz = 19.921875 MHz
+    g_beamlet            => 10,            -- map g_subband to g_beamlet index in beamset in range(c_sdp_S_sub_bf = 488)
+    g_beamlet_scale      => 1.0 / 2.0**9,  -- g_beamlet output scale factor
+    g_bf_x_gain          => 0.7,           -- g_beamlet X BF weight normalized gain for g_sp
+    g_bf_y_gain          => 0.6,           -- g_beamlet Y BF weight normalized gain for g_sp
+    g_bf_x_phase         => 30.0,          -- g_beamlet X BF weight phase rotation in degrees for g_sp
+    g_bf_y_phase         => 40.0,          -- g_beamlet Y BF weight phase rotation in degrees for g_sp
+    g_bf_remnant_x_gain  => 0.05,          -- g_beamlet X BF weight normalized gain for remnant sp
+    g_bf_remnant_y_gain  => 0.04,          -- g_beamlet Y BF weight normalized gain for remnant sp
+    g_bf_remnant_x_phase => 170.0,         -- g_beamlet X BF weight phase rotation in degrees for g_sp
+    g_bf_remnant_y_phase => -135.0,        -- g_beamlet Y BF weight phase rotation in degrees for g_sp
+    g_read_all_SST       => FALSE,         -- when FALSE only read SST for g_subband, to save sim time
+    g_read_all_BST       => FALSE          -- when FALSE only read BST for g_beamlet, to save sim time
+  );
+
+END tb;
diff --git a/applications/lofar2/libraries/sdp/src/vhdl/node_sdp_adc_input_and_timing.vhd b/applications/lofar2/libraries/sdp/src/vhdl/node_sdp_adc_input_and_timing.vhd
index 54d4befcf255eeb8d7b3f1ce8e07e4c193c48f59..62f00be5361a92925379ac1631dc4e307b1e3790 100644
--- a/applications/lofar2/libraries/sdp/src/vhdl/node_sdp_adc_input_and_timing.vhd
+++ b/applications/lofar2/libraries/sdp/src/vhdl/node_sdp_adc_input_and_timing.vhd
@@ -145,14 +145,13 @@ ARCHITECTURE str OF node_sdp_adc_input_and_timing IS
   SIGNAL mm_rst_internal            : STD_LOGIC; 
   SIGNAL mm_jesd_ctrl_reg           : STD_LOGIC_VECTOR(c_word_w-1 DOWNTO 0);
   SIGNAL jesd204b_disable_arr       : STD_LOGIC_VECTOR(c_sdp_S_pn-1 DOWNTO 0);
-  SIGNAL jesd204b_reset             : STD_LOGIC;
 
 BEGIN
 
-  -- The node AIT is reset at power up by mm_rst and under software control by jesd204b_reset.
+  -- The node AIT is reset at power up by mm_rst and under software control by jesd204b_disable_arr.
   -- The mm_rst internal will cause a reset on the rx_rst by the reset sequencer in the u_jesd204b.
-  -- The MM jesd204b_reset is intended for node AIT resynchronisation tests of the u_jesd204b.
-  -- The MM jesd204b_reset should not be applied in an SDP application, because this will cause
+  -- The MM jesd204b_disable_arr is intended for node AIT resynchronisation tests of the u_jesd204b.
+  -- The MM jesd204b_disable_arr should not be applied in an SDP application, because this will cause
   -- a disturbance in the block timing of the out_sosi_arr(i).sync,bsn,sop,eop. The other logic
   -- in an SDP application assumes that the block timing of the out_sosi_arr(i) only contains
   -- complete blocks, so from sop to eop.
diff --git a/applications/lofar2/libraries/sdp/src/vhdl/sdp_beamformer_local.vhd b/applications/lofar2/libraries/sdp/src/vhdl/sdp_beamformer_local.vhd
index 134b80aa33fee8a984a5e9a0d60fe94f8023ba5e..39f94923b302fcb5ac28f72cf5cc5185b84901b8 100644
--- a/applications/lofar2/libraries/sdp/src/vhdl/sdp_beamformer_local.vhd
+++ b/applications/lofar2/libraries/sdp/src/vhdl/sdp_beamformer_local.vhd
@@ -77,11 +77,15 @@ ARCHITECTURE str OF sdp_beamformer_local IS
 
 BEGIN
   ---------------------------------------------------------------
-  -- COPY INPUT STERAMS FOR X AND Y POLARIZATION PATHS 
-  ---------------------------------------------------------------
-  gen_pol : FOR N_pol_bf IN 0 TO c_sdp_N_pol_bf-1 GENERATE
-    gen_pfb : FOR P_pfb IN 0 TO c_sdp_P_pfb-1 GENERATE
-      sub_sosi_arr(N_pol_bf * c_sdp_P_pfb + P_pfb) <= in_sosi_arr(P_pfb);
+  -- COPY INPUT STREAMS FOR X AND Y POLARIZATION PATHS
+  --   0: 5 = S_pn signal inputs (time multiplexed by Q_fft) for BF X pol
+  --   6:11 = S_pn signal inputs (time multiplexed by Q_fft) for BF Y pol
+  ---------------------------------------------------------------
+  -- Use short index variables PB (= Polarization Beamlet), I (= Instance)
+  -- names, to ease recognizing them as loop indices.
+  gen_pol : FOR PB IN 0 TO c_sdp_N_pol_bf-1 GENERATE
+    gen_pfb : FOR I IN 0 TO c_sdp_P_pfb-1 GENERATE
+      sub_sosi_arr(PB * c_sdp_P_pfb + I) <= in_sosi_arr(I);
     END GENERATE;
   END GENERATE;
 
@@ -109,23 +113,8 @@ BEGIN
   -- X pol is lower half of bf_weights_out
   bf_weights_x_sosi_arr <= bf_weights_out_sosi_arr(c_sdp_P_pfb-1 DOWNTO 0);
 
-  ---------------------------------------------------------------
-  -- DP PIPELINE Y PATH 
-  ---------------------------------------------------------------
-  -- The weighted subbands from the Y polarization path are pipelined 
-  -- by one cycle so that they can be time multiplexed with the 
-  -- weighted subbands from the X polarization.
-  u_pipeline_y_pol : ENTITY dp_lib.dp_pipeline_arr
-  GENERIC MAP (
-    g_nof_streams => c_sdp_P_pfb,
-    g_pipeline    => 1 
-  )
-  PORT MAP (
-    rst          => dp_rst,
-    clk          => dp_clk,
-    snk_in_arr   => bf_weights_out_sosi_arr(2*c_sdp_P_pfb-1 DOWNTO c_sdp_P_pfb),
-    src_out_arr  => bf_weights_y_sosi_arr
-  );
+  -- Y pol is upper half of bf_weights_out
+  bf_weights_y_sosi_arr <= bf_weights_out_sosi_arr(2*c_sdp_P_pfb-1 DOWNTO c_sdp_P_pfb);
 
   ---------------------------------------------------------------
   -- DEINTERLEAVE X PATH
@@ -157,7 +146,7 @@ BEGIN
       rst   => dp_rst,      
       clk   => dp_clk,      
   
-      snk_in => bf_weights_y_sosi_arr(I),  
+      snk_in => bf_weights_y_sosi_arr(I),
       src_out_arr(0) => deinterleaved_y_sosi_arr(c_sdp_Q_fft*I), 
       src_out_arr(1) => deinterleaved_y_sosi_arr(c_sdp_Q_fft*I+1) 
     );
diff --git a/applications/lofar2/libraries/sdp/src/vhdl/sdp_bf_weights.vhd b/applications/lofar2/libraries/sdp/src/vhdl/sdp_bf_weights.vhd
index 0b868af821c25a614814cb356f9f93a6fbadeadf..3dec6eb52968ae1f86598a42c32e9e7771852cf2 100644
--- a/applications/lofar2/libraries/sdp/src/vhdl/sdp_bf_weights.vhd
+++ b/applications/lofar2/libraries/sdp/src/vhdl/sdp_bf_weights.vhd
@@ -77,30 +77,32 @@ BEGIN
   -- [N_pol_bf][S_pn/Q_fft]_[S_sub_bf][Q_fft]. Therefore this counter
   -- has to account for this difference in order.
   p_cnt : PROCESS(dp_clk, dp_rst)
-    VARIABLE v_Q_fft, v_S_sub_bf : NATURAL;
+    -- Use short index variables v_Q, v_BLET names in capitals, to ease
+    -- recognizing them as (loop) indices.
+    VARIABLE v_Q, v_BLET : NATURAL;
   BEGIN
     IF dp_rst = '1' THEN
       cnt <= 0;
-      v_Q_fft := 0;
-      v_S_sub_bf := 0;
+      v_Q := 0;
+      v_BLET := 0;
     ELSIF rising_edge(dp_clk) THEN
       IF in_sosi_arr(0).valid = '1' THEN
         IF in_sosi_arr(0).eop = '1' THEN
-          v_Q_fft := 0;
-          v_S_sub_bf := 0;
+          v_Q := 0;
+          v_BLET := 0;
         ELSE
-          IF v_Q_fft >= c_sdp_Q_fft-1 THEN
-            v_Q_fft := 0;
-            IF v_S_sub_bf >= c_sdp_S_sub_bf-1 THEN
-              v_S_sub_bf := 0;
+          IF v_Q >= c_sdp_Q_fft-1 THEN
+            v_Q := 0;
+            IF v_BLET >= c_sdp_S_sub_bf-1 THEN
+              v_BLET := 0;
             ELSE
-              v_S_sub_bf := v_S_sub_bf + 1;
+              v_BLET := v_BLET + 1;
             END IF;
           ELSE
-            v_Q_fft := v_Q_fft + 1;
+            v_Q := v_Q + 1;
           END IF;
         END IF;
-        cnt <= v_Q_fft * c_sdp_S_sub_bf + v_S_sub_bf;
+        cnt <= v_Q * c_sdp_S_sub_bf + v_BLET;
       END IF;
     END IF;
   END PROCESS;
diff --git a/applications/lofar2/libraries/sdp/src/vhdl/sdp_pkg.vhd b/applications/lofar2/libraries/sdp/src/vhdl/sdp_pkg.vhd
index 64ad332f5e364538a5573e2da7fbb19b41982d07..fc9491cb8fda0f72a80c4f27511b66079a8a400e 100644
--- a/applications/lofar2/libraries/sdp/src/vhdl/sdp_pkg.vhd
+++ b/applications/lofar2/libraries/sdp/src/vhdl/sdp_pkg.vhd
@@ -28,8 +28,9 @@
 -- . See Document: L3 SDP Decision: SDP Parameter definitions.
 --   https://support.astron.nl/confluence/display/L2M/L3+SDP+Decision%3A+SDP+Parameter+definitions
 -------------------------------------------------------------------------------
-LIBRARY ieee, common_lib, rTwoSDF_lib, fft_lib, filter_lib, wpfb_lib;
+LIBRARY IEEE, common_lib, rTwoSDF_lib, fft_lib, filter_lib, wpfb_lib;
 USE IEEE.std_logic_1164.ALL;
+USE IEEE.math_real.ALL;
 USE common_lib.common_pkg.ALL;
 USE common_lib.common_mem_pkg.ALL;
 USE common_lib.common_field_pkg.ALL;
@@ -154,6 +155,11 @@ PACKAGE sdp_pkg is
     true, 54, c_sdp_W_statistic_sz, 195313, c_fft_pipeline, c_fft_pipeline,
     c_fil_ppf_pipeline);
 
+  -- DC gain of WPFB FIR filter obtained from applications/lofar2/model/run_pfir_coef.m using application = 'lofar_subband'
+  -- Not used in RTL, only used in test benches to verify expected suband levels
+  CONSTANT c_sdp_wpfb_fir_filter_dc_gain    : REAL := 0.994817;
+  CONSTANT c_sdp_wpfb_subband_sp_ampl_ratio : REAL := 8.0 * c_sdp_wpfb_fir_filter_dc_gain;  -- ~= 8 for unit FIR DC gain, depends on internal WPFB quantization and FIR coefficients
+
   -----------------------------------------------------------------------------
   -- Statistics offload
   -----------------------------------------------------------------------------
diff --git a/applications/lofar2/libraries/sdp/src/vhdl/sdp_subband_equalizer.vhd b/applications/lofar2/libraries/sdp/src/vhdl/sdp_subband_equalizer.vhd
index 7ae6012f078832edcae802a8e94dec511874f292..05d222f417f158351779bbee469fc7b5543f3124 100644
--- a/applications/lofar2/libraries/sdp/src/vhdl/sdp_subband_equalizer.vhd
+++ b/applications/lofar2/libraries/sdp/src/vhdl/sdp_subband_equalizer.vhd
@@ -77,30 +77,32 @@ BEGIN
   -- fsub[S_pn/Q_fft]_[N_sub][Q_fft]. Therefore the counter in 
   -- sdp_subband_equalizer.vhd has to account for this difference in order.
   p_cnt : PROCESS(dp_clk, dp_rst)
-    VARIABLE v_Q_fft, v_N_sub : NATURAL;
+    -- Use short index variables v_Q, v_SUB names in capitals, to ease
+    -- recognizing them as (loop) indices.
+    VARIABLE v_Q, v_SUB : NATURAL;
   BEGIN
     IF dp_rst = '1' THEN
       cnt <= 0;
-      v_Q_fft := 0;
-      v_N_sub := 0;
+      v_Q := 0;
+      v_SUB := 0;
     ELSIF rising_edge(dp_clk) THEN
       IF in_sosi_arr(0).valid = '1' THEN
         IF in_sosi_arr(0).eop = '1' THEN
-          v_Q_fft := 0;
-          v_N_sub := 0;
+          v_Q := 0;
+          v_SUB := 0;
         ELSE
-          IF v_Q_fft >= c_sdp_Q_fft-1 THEN
-            v_Q_fft := 0;
-            IF v_N_sub >= c_sdp_N_sub-1 THEN
-              v_N_sub := 0;
+          IF v_Q >= c_sdp_Q_fft-1 THEN
+            v_Q := 0;
+            IF v_SUB >= c_sdp_N_sub-1 THEN
+              v_SUB := 0;
             ELSE
-              v_N_sub := v_N_sub + 1;
+              v_SUB := v_SUB + 1;
             END IF;
           ELSE
-            v_Q_fft := v_Q_fft + 1;
+            v_Q := v_Q + 1;
           END IF;
         END IF;
-        cnt <= v_Q_fft * c_sdp_N_sub + v_N_sub;
+        cnt <= v_Q * c_sdp_N_sub + v_SUB;
       END IF;
     END IF;
   END PROCESS;
diff --git a/applications/lofar2/model/run_pfir_coeff.m b/applications/lofar2/model/run_pfir_coeff.m
index 5c77cd51d8f3d7c2795968b922afa16543f6a87d..a16ef12190fa45322ed43d7a94c957b1a5ea4f33 100644
--- a/applications/lofar2/model/run_pfir_coeff.m
+++ b/applications/lofar2/model/run_pfir_coeff.m
@@ -127,8 +127,8 @@ elseif strcmp(application, 'lofar_subband')
     L = 16;
     coeff_w = 16;
     q_full_scale = 2^(coeff_w-1);
-    coeffLoad = load('data/Coefficient_16KKaiser.dat');   % column
-    %coeffLoad = load('data/Coeffs16384Kaiser-quant.dat');   % column
+    %coeffLoad = load('data/Coefficient_16KKaiser.gold');    % column, created with pfs_coeff_final.m, DC gain = 0.995714
+    coeffLoad = load('data/Coeffs16384Kaiser-quant.dat');   % column, used in LOFAR1, DC gain = 0.994817
     coeffLoad = coeffLoad';                                 % row with integer coefficients from file
     config.dc_adjust = false;
     if config.dc_adjust==false
diff --git a/libraries/base/common/src/vhdl/common_pkg.vhd b/libraries/base/common/src/vhdl/common_pkg.vhd
index 13d8042484abd05f906b0b30d243174e12139cb2..98085930720f4bbdfe088f1323d2114f72b802ee 100644
--- a/libraries/base/common/src/vhdl/common_pkg.vhd
+++ b/libraries/base/common/src/vhdl/common_pkg.vhd
@@ -470,7 +470,26 @@ PACKAGE common_pkg IS
 
   FUNCTION COMPLEX_MULT_REAL(a_re, a_im, b_re, b_im : INTEGER) RETURN INTEGER;  -- Calculate real part of complex multiplication: a_re*b_re - a_im*b_im 
   FUNCTION COMPLEX_MULT_IMAG(a_re, a_im, b_re, b_im : INTEGER) RETURN INTEGER;  -- Calculate imag part of complex multiplication: a_im*b_re + a_re*b_im 
-  
+
+  -- Convert between polar and rectangular coordinates
+  FUNCTION COMPLEX_RADIUS(re, im : REAL)     RETURN REAL;
+  FUNCTION COMPLEX_RADIUS(re, im : INTEGER)  RETURN REAL;
+
+  FUNCTION COMPLEX_PHASE( re, im : REAL;    radians : BOOLEAN) RETURN REAL;  -- phase in radians or degrees
+  FUNCTION COMPLEX_PHASE( re, im : INTEGER; radians : BOOLEAN) RETURN REAL;  -- phase in radians or degrees
+  FUNCTION COMPLEX_PHASE( re, im : REAL)                       RETURN REAL;  -- phase in degrees
+  FUNCTION COMPLEX_PHASE( re, im : INTEGER)                    RETURN REAL;  -- phase in degrees
+
+  FUNCTION COMPLEX_RE(ampl, phase : REAL;    radians : BOOLEAN) RETURN REAL;  -- phase in radians or degrees
+  FUNCTION COMPLEX_RE(ampl, phase : INTEGER; radians : BOOLEAN) RETURN REAL;  -- phase in radians or degrees
+  FUNCTION COMPLEX_RE(ampl, phase : REAL)                       RETURN REAL;  -- phase in degrees
+  FUNCTION COMPLEX_RE(ampl, phase : INTEGER)                    RETURN REAL;  -- phase in degrees
+
+  FUNCTION COMPLEX_IM(ampl, phase : REAL;    radians : BOOLEAN) RETURN REAL;  -- phase in radians or degrees
+  FUNCTION COMPLEX_IM(ampl, phase : INTEGER; radians : BOOLEAN) RETURN REAL;  -- phase in radians or degrees
+  FUNCTION COMPLEX_IM(ampl, phase : REAL)                       RETURN REAL;  -- phase in degrees
+  FUNCTION COMPLEX_IM(ampl, phase : INTEGER)                    RETURN REAL;  -- phase in degrees
+
   FUNCTION SHIFT_UVEC(vec : STD_LOGIC_VECTOR; shift : INTEGER) RETURN STD_LOGIC_VECTOR;  -- < 0 shift left, > 0 shift right
   FUNCTION SHIFT_SVEC(vec : STD_LOGIC_VECTOR; shift : INTEGER) RETURN STD_LOGIC_VECTOR;  -- < 0 shift left, > 0 shift right
 
@@ -2254,7 +2273,94 @@ PACKAGE BODY common_pkg IS
   BEGIN
     RETURN (a_im*b_re + a_re*b_im);
   END;
+
   
+  FUNCTION COMPLEX_RADIUS(re, im : REAL) RETURN REAL IS
+  BEGIN
+    -- Must use ABS() with ** of real, because (negative)**2.0 yields error and value 0.0.
+    -- Must must use brackets (ABS()) to avoid compile error.
+    -- Alternative equivalent code would be: SQRT(re * re + im * im).
+    RETURN SQRT((ABS(re))**2.0 + (ABS(im))**2.0);
+  END;
+
+  FUNCTION COMPLEX_RADIUS(re, im : INTEGER) RETURN REAL IS
+  BEGIN
+    RETURN COMPLEX_RADIUS(REAL(re), REAL(im));
+  END;
+
+  FUNCTION COMPLEX_PHASE(re, im : REAL; radians : BOOLEAN) RETURN REAL IS
+  BEGIN
+    IF radians = TRUE THEN
+      RETURN ATAN2(Y => im, X => re);
+    ELSE
+      RETURN ATAN2(Y => im, X => re) * 360.0 / MATH_2_PI;
+    END IF;
+  END;
+
+  FUNCTION COMPLEX_PHASE(re, im : INTEGER; radians : BOOLEAN) RETURN REAL IS
+  BEGIN
+    RETURN COMPLEX_PHASE(REAL(re), REAL(im), radians);
+  END;
+
+  FUNCTION COMPLEX_PHASE(re, im : REAL) RETURN REAL IS
+  BEGIN
+    RETURN COMPLEX_PHASE(re, im, FALSE);
+  END;
+
+  FUNCTION COMPLEX_PHASE(re, im : INTEGER) RETURN REAL IS
+  BEGIN
+    RETURN COMPLEX_PHASE(REAL(re), REAL(im), FALSE);
+  END;
+
+  FUNCTION COMPLEX_RE(ampl, phase : REAL; radians : BOOLEAN) RETURN REAL IS
+  BEGIN
+    IF radians = TRUE THEN
+      RETURN ampl * COS(phase);
+    ELSE
+      RETURN ampl * COS(phase * MATH_2_PI / 360.0);
+    END IF;
+  END;
+
+  FUNCTION COMPLEX_RE(ampl, phase : INTEGER; radians : BOOLEAN) RETURN REAL IS
+  BEGIN
+    RETURN COMPLEX_RE(REAL(ampl), REAL(phase), radians);
+  END;
+
+  FUNCTION COMPLEX_RE(ampl, phase : REAL) RETURN REAL IS
+  BEGIN
+    RETURN COMPLEX_RE(ampl, phase, FALSE);
+  END;
+
+  FUNCTION COMPLEX_RE(ampl, phase : INTEGER) RETURN REAL IS
+  BEGIN
+    RETURN COMPLEX_RE(REAL(ampl), REAL(phase), FALSE);
+  END;
+
+  FUNCTION COMPLEX_IM(ampl, phase : REAL; radians : BOOLEAN) RETURN REAL IS
+  BEGIN
+    IF radians = TRUE THEN
+      RETURN ampl * SIN(phase);
+    ELSE
+      RETURN ampl * SIN(phase * MATH_2_PI / 360.0);
+    END IF;
+  END;
+
+  FUNCTION COMPLEX_IM(ampl, phase : INTEGER; radians : BOOLEAN) RETURN REAL IS
+  BEGIN
+    RETURN COMPLEX_IM(REAL(ampl), REAL(phase), radians);
+  END;
+
+  FUNCTION COMPLEX_IM(ampl, phase : REAL) RETURN REAL IS
+  BEGIN
+    RETURN COMPLEX_IM(ampl, phase, FALSE);
+  END;
+
+  FUNCTION COMPLEX_IM(ampl, phase : INTEGER) RETURN REAL IS
+  BEGIN
+    RETURN COMPLEX_IM(REAL(ampl), REAL(phase), FALSE);
+  END;
+
+
   FUNCTION SHIFT_UVEC(vec : STD_LOGIC_VECTOR; shift : INTEGER) RETURN STD_LOGIC_VECTOR IS
   BEGIN
     IF shift < 0 THEN
diff --git a/libraries/base/diag/tb/vhdl/tb_diag_pkg.vhd b/libraries/base/diag/tb/vhdl/tb_diag_pkg.vhd
index 47474faa2976ab7ae41a01136cecd27f837299cf..790e7d959bb6fba217a07a32f776ad3f01ffe029 100644
--- a/libraries/base/diag/tb/vhdl/tb_diag_pkg.vhd
+++ b/libraries/base/diag/tb/vhdl/tb_diag_pkg.vhd
@@ -566,7 +566,7 @@ PACKAGE BODY tb_diag_pkg IS
     CONSTANT c_Q     : REAL := COS(c_angle);  -- Q = quadrature reference
     CONSTANT c_dat   : REAL := REAL(TO_SINT(in_dat));
     CONSTANT c_phase : REAL := ARCTAN(accum_Q, accum_I + c_eps);
-    CONSTANT c_ampl  : REAL := SQRT((ABS(accum_I))**2.0 + (ABS(accum_Q))**2.0) * 2.0 / c_Nsamples;
+    CONSTANT c_ampl  : REAL := COMPLEX_RADIUS(accum_I, accum_Q) * 2.0 / c_Nsamples;
   BEGIN
     IF rising_edge(dp_clk) THEN
       -- Output reference I and Q for debugging in wave window
diff --git a/libraries/dsp/verify_pfb/tb_verify_pfb_wg.vhd b/libraries/dsp/verify_pfb/tb_verify_pfb_wg.vhd
index fe84e21ca58d62cad52a94d62f450297d6ed2050..eac6be532f7a4dbf4ef9af6f0d775cf997238b36 100644
--- a/libraries/dsp/verify_pfb/tb_verify_pfb_wg.vhd
+++ b/libraries/dsp/verify_pfb/tb_verify_pfb_wg.vhd
@@ -80,9 +80,6 @@
 --   > observe the *_scope signals as radix decimal, format analogue format signals in the Wave window.
 --     default use analogue(automatic), if necessary zoom in using right click analogue(custom).
 --
--- Note:
--- . Must use ABS() with **2.0 of negative REAL, because (negative)**2.0 yields error and value 0.0,
--- . Must use brackets (ABS()) to avoid compile error
 
 LIBRARY ieee, common_lib, dp_lib, diag_lib, filter_lib, rTwoSDF_lib, fft_lib, wpfb_lib;
 LIBRARY pfs_lib, pft2_lib, pfb2_lib;
@@ -798,12 +795,11 @@ BEGIN
   sub_b_re_frac <= out_re WHEN rising_edge(dp_clk) AND out_bin = c_bin_b + 1 AND out_val_b = '1';
   sub_b_im_frac <= out_im WHEN rising_edge(dp_clk) AND out_bin = c_bin_b + 1 AND out_val_b = '1';
   
-  -- Must use ABS() with ** of real, because (negative)**2.0 yields error and value 0.0, also must use brackets (ABS()) to avoid compile error
-  sub_a_ampl      <= SQRT((ABS(REAL(sub_a_re)))**2.0      + (ABS(REAL(sub_a_im)))**2.0);
-  sub_a_ampl_frac <= SQRT((ABS(REAL(sub_a_re_frac)))**2.0 + (ABS(REAL(sub_a_im_frac)))**2.0);
+  sub_a_ampl      <= COMPLEX_RADIUS(sub_a_re, sub_a_im);
+  sub_a_ampl_frac <= COMPLEX_RADIUS(sub_a_re_frac, sub_a_im_frac);
 
-  sub_b_ampl      <= SQRT((ABS(REAL(sub_b_re)))**2.0      + (ABS(REAL(sub_b_im)))**2.0);
-  sub_b_ampl_frac <= SQRT((ABS(REAL(sub_b_re_frac)))**2.0 + (ABS(REAL(sub_b_im_frac)))**2.0);
+  sub_b_ampl      <= COMPLEX_RADIUS(sub_b_re, sub_b_im);
+  sub_b_ampl_frac <= COMPLEX_RADIUS(sub_b_re_frac, sub_b_im_frac);
   
   ---------------------------------------------------------------------------
   -- Measure ADC/WG input mean (DC) and power, and determine sine amplitude