diff --git a/applications/lofar2/designs/lofar2_unb2b_sdp_station/src/vhdl/lofar2_unb2b_sdp_station.vhd b/applications/lofar2/designs/lofar2_unb2b_sdp_station/src/vhdl/lofar2_unb2b_sdp_station.vhd
index f4947909cf7154cc6ba578469c7455de0f72074e..76c4d3cc12316b8d3dd0356e57dba9223e0220ca 100644
--- a/applications/lofar2/designs/lofar2_unb2b_sdp_station/src/vhdl/lofar2_unb2b_sdp_station.vhd
+++ b/applications/lofar2/designs/lofar2_unb2b_sdp_station/src/vhdl/lofar2_unb2b_sdp_station.vhd
@@ -814,6 +814,7 @@ begin
     g_use_oversample         => c_revision_select.use_oversample,
     g_use_xsub               => c_revision_select.use_xsub,
     g_use_bf                 => c_revision_select.use_bf,
+    g_use_bdo_transpose      => c_revision_select.use_bdo_transpose,
     g_use_ring               => c_revision_select.use_ring,
     g_P_sq                   => c_revision_select.P_sq
   )
diff --git a/applications/lofar2/designs/lofar2_unb2b_sdp_station/src/vhdl/lofar2_unb2b_sdp_station_pkg.vhd b/applications/lofar2/designs/lofar2_unb2b_sdp_station/src/vhdl/lofar2_unb2b_sdp_station_pkg.vhd
index dd06a93a7fe1f0af2bf646d0ab3ac55e584f2dcc..e97227200a0c032a4741653caf32e2b18e09553c 100644
--- a/applications/lofar2/designs/lofar2_unb2b_sdp_station/src/vhdl/lofar2_unb2b_sdp_station_pkg.vhd
+++ b/applications/lofar2/designs/lofar2_unb2b_sdp_station/src/vhdl/lofar2_unb2b_sdp_station_pkg.vhd
@@ -35,21 +35,26 @@ package lofar2_unb2b_sdp_station_pkg is
     use_fsub          : boolean;
     use_oversample    : boolean;
     use_bf            : boolean;
+    use_bdo_transpose : boolean;
     use_xsub          : boolean;
     use_ring          : boolean;
     P_sq              : natural;
   end record;
 
-  constant c_ait        : t_lofar2_unb2b_sdp_station_config := (false, false, false, false, false, false, 0);
-  constant c_fsub       : t_lofar2_unb2b_sdp_station_config := (false, true,  false, false, false, false, 0);
-  constant c_bf         : t_lofar2_unb2b_sdp_station_config := (false, true,  false, true,  false, false, 0);
-  constant c_bf_ring    : t_lofar2_unb2b_sdp_station_config := (false, true,  false, true,  false, true,  0);
-  constant c_xsub_one   : t_lofar2_unb2b_sdp_station_config := (false, true,  false, false, true,  false, 1);
-  constant c_xsub_ring  : t_lofar2_unb2b_sdp_station_config := (false, true,  false, false, true,  true,  9);
-  constant c_full_wg    : t_lofar2_unb2b_sdp_station_config := (true,  true,  false, true,  true,  true,  9);
-  constant c_full       : t_lofar2_unb2b_sdp_station_config := (false, true,  false, true,  true,  true,  9);
-  constant c_full_wg_os : t_lofar2_unb2b_sdp_station_config := (true,  true,  true,  true,  true,  true,  9);
-  constant c_full_os    : t_lofar2_unb2b_sdp_station_config := (false, true,  true,  true,  true,  true,  9);
+  constant c_ait        : t_lofar2_unb2b_sdp_station_config := (false, false, false, false, false, false, false, 0);
+  constant c_fsub       : t_lofar2_unb2b_sdp_station_config := (false, true,  false, false, false, false, false, 0);
+  -- use c_bf on one node also to simulate bdo transpose
+  -- use c_bf_ring with ring also to simulate bdo identity
+  constant c_bf         : t_lofar2_unb2b_sdp_station_config := (false, true,  false, true,  false, false, false, 0);
+  constant c_bf_ring    : t_lofar2_unb2b_sdp_station_config := (false, true,  false, true,  false, false, true,  0);
+  constant c_xsub_one   : t_lofar2_unb2b_sdp_station_config := (false, true,  false, false, false, true,  false, 1);
+  constant c_xsub_ring  : t_lofar2_unb2b_sdp_station_config := (false, true,  false, false, false, true,  true,  9);
+  -- use c_full_wg for SDP regression test on Arts-unb2b
+  constant c_full_wg    : t_lofar2_unb2b_sdp_station_config := (true,  true,  false, true,  false, true,  true,  9);
+  constant c_full       : t_lofar2_unb2b_sdp_station_config := (false, true,  false, true,  false, true,  true,  9);
+  constant c_full_wg_os : t_lofar2_unb2b_sdp_station_config := (true,  true,  true,  true,  false, true,  true,  9);
+  -- use c_full_os for SDP on LTS-unb2b of Disturb2
+  constant c_full_os    : t_lofar2_unb2b_sdp_station_config := (false, true,  true,  true,  false, true,  true,  9);
 
   -- Function to select the revision configuration.
   function func_sel_revision_rec(g_design_name : string) return t_lofar2_unb2b_sdp_station_config;
diff --git a/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_lofar2_unb2c_sdp_station_bf.vhd b/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_lofar2_unb2c_sdp_station_bf.vhd
index 74fa6e366d14530426c0c5a1256df0f55473062e..7acfa22c2a95eaf984df88565b77e331c9692f48 100644
--- a/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_lofar2_unb2c_sdp_station_bf.vhd
+++ b/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf/tb_lofar2_unb2c_sdp_station_bf.vhd
@@ -107,10 +107,15 @@
 -- * Use g_beamlet_scale = 2**10, for full scale WG and N_ant = 1, see [1]
 -- * Using g_beamlet = c_sdp_S_sub_bf-1 = 487 puts g_subband = 102 at the last
 --   beamlet in the beamset, so at index 974,975 of rx_beamlet_list_re/im.
+-- * Try g_beamlet somewhere random in range 0:c_sdp_S_sub_bf-1 = 0:487 to
+--   verify that the beamlet output reorder works at that position in the
+--   beamlets block. E.g. use some prime number, like g_beamlet = 137.
 -- * Default beamlet 102 also contains g_subband = 102. On HW the BF weights
 --   are default 0, but in sim the BF weights in node_sdp_beamformer.vhd
 --   are default unit weights. Therefore also write the BF weight for default
---   beamlet 102 to define it value, in case g_beamlet /= 102.
+--   beamlet 102 to define it value, in case g_beamlet /= 102. In this tb
+--   the BF weigth for beamlet = g_subband = 102 is set to 0, so that the
+--   g_subband = 102 will only show up in g_beamlet.
 -- * A simulation only section in sdp_beamformer_output.vhd disturbs the BSN,
 --   to cause a merged payload error, so that sdp_source_info_payload_error
 --   can be verified here.
@@ -121,7 +126,11 @@
 --   # Manually add missing signals and constants using objects in GUI
 --   > add wave -position insertpoint  \
 --     sim:/tb_lofar2_unb2c_sdp_station_bf/sp_ssts_arr2 \
---     sim:/tb_lofar2_unb2c_sdp_station_bf/bsts_arr2
+--     sim:/tb_lofar2_unb2c_sdp_station_bf/bsts_arr2 \
+--     sim:/tb_lofar2_unb2c_sdp_station_bf/rx_packet_list_re \
+--     sim:/tb_lofar2_unb2c_sdp_station_bf/rx_packet_list_im \
+--     sim:/tb_lofar2_unb2c_sdp_station_bf/rx_reordered_list_re \
+--     sim:/tb_lofar2_unb2c_sdp_station_bf/rx_reordered_list_im
 --   > run -a
 --   View beamlet output as radix-decimal
 --   Takes about 1h  1 m when g_read_all_* = FALSE
@@ -132,7 +141,7 @@
 --     https://support.astron.nl/confluence/pages/viewpage.action?spaceKey=L2M&title=L4+SDPFW+Decision%3A+LOFAR2.0+SDP+Firmware+Quantization+Model
 --
 -------------------------------------------------------------------------------
-library IEEE, common_lib, unb2c_board_lib, i2c_lib, mm_lib, dp_lib, diag_lib, lofar2_sdp_lib, wpfb_lib, tech_pll_lib, tr_10GbE_lib, lofar2_unb2c_sdp_station_lib;
+library IEEE, common_lib, unb2c_board_lib, i2c_lib, mm_lib, dp_lib, diag_lib, reorder_lib, lofar2_sdp_lib, wpfb_lib, tech_pll_lib, tr_10GbE_lib, lofar2_unb2c_sdp_station_lib;
 use IEEE.std_logic_1164.all;
 use IEEE.numeric_std.all;
 use IEEE.math_real.all;
@@ -146,11 +155,13 @@ use mm_lib.mm_file_pkg.all;
 use dp_lib.dp_stream_pkg.all;
 use mm_lib.mm_file_unb_pkg.all;
 use diag_lib.diag_pkg.all;
+use reorder_lib.reorder_pkg.all;
 use wpfb_lib.wpfb_pkg.all;
 use unb2c_board_lib.unb2c_board_pkg.all;
 use lofar2_sdp_lib.sdp_pkg.all;
 use lofar2_sdp_lib.tb_sdp_pkg.all;
 use tech_pll_lib.tech_pll_component_pkg.all;
+use lofar2_unb2c_sdp_station_lib.lofar2_unb2c_sdp_station_pkg.all;
 
 entity tb_lofar2_unb2c_sdp_station_bf is
   generic (
@@ -160,7 +171,7 @@ entity tb_lofar2_unb2c_sdp_station_bf is
     g_sp_remnant_ampl    : real := 0.1;  -- WG normalized amplitude for remnant sp
     g_sp_remnant_phase   : real := 15.0;  -- WG phase in degrees for remnant sp
     g_subband            : natural := 102;  -- select g_subband at index 102 = 102/1024 * 200MHz = 19.921875 MHz
-    g_beamlet            : natural := c_sdp_S_sub_bf - 1;  -- map g_subband to g_beamlet index in beamset in range(c_sdp_S_sub_bf = 488)
+    g_beamlet            : natural := 137;  -- map g_subband to g_beamlet index in beamset in range(c_sdp_S_sub_bf = 488)
     g_beamlet_scale      : real := 1.0 / 2.0**9;  -- g_beamlet output scale factor
     g_bf_x_gain          : real := 0.7;  -- g_beamlet X BF weight normalized gain for g_sp
     g_bf_y_gain          : real := 0.6;  -- g_beamlet Y BF weight normalized gain for g_sp
@@ -177,20 +188,22 @@ entity tb_lofar2_unb2c_sdp_station_bf is
 end tb_lofar2_unb2c_sdp_station_bf;
 
 architecture tb of tb_lofar2_unb2c_sdp_station_bf is
+  -- Revision parameters
+  constant c_design_name         : string := "lofar2_unb2c_sdp_station_bf";
+  constant c_revision_select     : t_lofar2_unb2c_sdp_station_config := func_sel_revision_rec(c_design_name);
+
   constant c_sim                 : boolean := true;
   constant c_unb_nr              : natural := 0;  -- UniBoard 0
   constant c_node_nr             : natural := 0;
   constant c_gn_index            : natural := c_unb_nr * 4 + c_node_nr;  -- this node GN
   constant c_init_bsn            : natural := 17;  -- some recognizable value >= 0
   constant c_bsn_latency         : natural := 5;  -- used to time stimuli_done
+  constant c_use_bdo_transpose   : boolean := c_revision_select.use_bdo_transpose;
 
   constant c_id                  : std_logic_vector(7 downto 0) := TO_UVEC(c_gn_index, 8);
   constant c_version             : std_logic_vector(1 downto 0) := "00";
   constant c_fw_version          : t_unb2c_board_fw_version := (1, 0);
 
-  constant c_mac_15_0            : std_logic_vector(15 downto 0) := TO_UVEC(c_unb_nr, 8) & TO_UVEC(c_node_nr, 8);
-  constant c_ip_15_0             : std_logic_vector(15 downto 0) := TO_UVEC(c_unb_nr, 8) & TO_UVEC(c_node_nr + 1, 8);  -- +1 to avoid IP = *.*.*.0
-
   constant c_eth_clk_period      : time := 8 ns;  -- 125 MHz XO on UniBoard
   constant c_ext_clk_period      : time := 5 ns;
   constant c_mm_clk_period       : time := 10 ns;  -- 100 MHz internal mm_clk
@@ -209,8 +222,6 @@ architecture tb of tb_lofar2_unb2c_sdp_station_bf is
   constant c_stat_lo_factor      : real := 1.0 - c_stat_percentage;  -- lower boundary
   constant c_stat_hi_factor      : real := 1.0 + c_stat_percentage;  -- higher boundary
 
-  constant c_nof_beamlets_per_data : natural := c_sdp_cep_nof_beamlets_per_longword;  -- = 2 dual pol beamlets per 64b data word
-
   constant c_beamlet_output_delta : integer := 2;  -- +-delta margin
 
   -- header fields
@@ -235,6 +246,15 @@ architecture tb of tb_lofar2_unb2c_sdp_station_bf is
                                      x"1400"  -- block_period = 5120
                                    );
 
+  -- Expected transposed indices order by func_reorder_transpose_packet().
+  -- Yields same c_reorder_transpose_indices order as:
+  -- > python applications/lofar2/libraries/sdp/src/python/test_func_sdp_bdo_transpose_packet.py
+  constant c_nof_ch                    : natural := c_sdp_cep_nof_beamlets_per_packet * c_sdp_N_pol_bf;
+  constant c_reorder_transpose_indices : t_natural_arr(0 to c_nof_ch - 1) :=
+    func_reorder_transpose_indices(c_sdp_cep_nof_blocks_per_packet,
+                                   c_sdp_cep_nof_beamlets_per_block,
+                                   c_sdp_N_pol_bf);
+
   -- WG
   constant c_bsn_start_wg         : natural := c_init_bsn + 2;  -- start WG at this BSN to instead of some BSN, to avoid mismatches in exact expected data values
   -- .ampl
@@ -355,12 +375,17 @@ architecture tb of tb_lofar2_unb2c_sdp_station_bf is
   constant c_mm_file_reg_sdp_info         : string := mmf_unb_file_prefix(c_unb_nr, c_node_nr) & "REG_SDP_INFO";
   constant c_mm_file_reg_hdr_dat          : string := mmf_unb_file_prefix(c_unb_nr, c_node_nr) & "REG_HDR_DAT";  -- c_sdp_N_beamsets = 2 beamsets
 
+  -- Tb BSN moments
+  constant c_stimuli_done_bsn             : natural := c_init_bsn + c_bsn_latency + c_nof_block_per_sync * 3;
+  constant c_verify_rx_beamlet_list_bsn   : natural := c_stimuli_done_bsn - c_nof_block_per_sync;
+
   -- Tb
   signal stimuli_done        : std_logic := '0';
   signal tb_almost_end       : std_logic := '0';
   signal tb_end              : std_logic := '0';
   signal tb_clk              : std_logic := '0';
   signal rd_data             : std_logic_vector(c_32 - 1 downto 0);
+  signal rd_data_bsn         : std_logic_vector(c_32 - 1 downto 0);
 
   signal dest_rst            : std_logic := '1';  -- use separate destination rst for Rx 10GbE in tb
   signal pps_rst             : std_logic := '1';  -- use separate reset to release the PPS generator
@@ -446,13 +471,29 @@ architecture tb of tb_lofar2_unb2c_sdp_station_bf is
   signal rx_beamlet_sop_cnt  : natural := 0;
   signal rx_beamlet_eop_cnt  : natural := 0;
 
-  signal rx_beamlet_arr_re   : t_slv_8_arr(c_sdp_cep_nof_blocks_per_packet - 1 downto 0);  -- [3:0]
-  signal rx_beamlet_arr_im   : t_slv_8_arr(c_sdp_cep_nof_blocks_per_packet - 1 downto 0);  -- [3:0]
+  -- [0 : 3] =  X, Y, X, Y
+  signal rx_beamlet_arr_re   : t_sdp_beamlet_part_arr;
+  signal rx_beamlet_arr_im   : t_sdp_beamlet_part_arr;
   signal rx_beamlet_cnt      : natural;
   signal rx_beamlet_valid    : std_logic;
 
-  signal rx_beamlet_list_re  : t_slv_8_arr(c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf - 1 downto 0);  -- [488 * 2-1:0] = [975:0]
-  signal rx_beamlet_list_im  : t_slv_8_arr(c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf - 1 downto 0);  -- [488 * 2-1:0] = [975:0]
+  -- [0 : 4 * 488 * 2 - 1] = [0 : 3903]
+  signal rx_packet_list_re      : t_sdp_beamlet_packet_list;
+  signal rx_packet_list_im      : t_sdp_beamlet_packet_list;
+  signal rx_reordered_list_re   : t_sdp_beamlet_packet_list;
+  signal rx_reordered_list_im   : t_sdp_beamlet_packet_list;
+
+  -- Recover original beamlet order per block, either by using c_use_bdo_transpose
+  -- = false or by using c_use_bdo_transpose and func_sdp_bdo_transpose_packet().
+  -- List: [0 : 488 * 2 - 1] = [0 : 975]
+  -- . X part at even indices
+  -- . Y part at odd indices
+  signal prev_rx_beamlet_list_re : t_sdp_beamlet_block_list;
+  signal prev_rx_beamlet_list_im : t_sdp_beamlet_block_list;
+  signal rx_beamlet_list_re      : t_sdp_beamlet_block_list;
+  signal rx_beamlet_list_im      : t_sdp_beamlet_block_list;
+  signal rx_beamlet_list_val     : std_logic;
+  signal verify_rx_beamlet_list  : std_logic := '0';
 
   signal rx_beamlet_x_output_re : integer;
   signal rx_beamlet_x_output_im : integer;
@@ -506,7 +547,7 @@ begin
   ------------------------------------------------------------------------------
   u_lofar_unb2c_sdp_station_bf : entity lofar2_unb2c_sdp_station_lib.lofar2_unb2c_sdp_station
   generic map (
-    g_design_name            => "lofar2_unb2c_sdp_station_bf",
+    g_design_name            => c_design_name,
     g_design_note            => "",
     g_sim                    => c_sim,
     g_sim_unb_nr             => c_unb_nr,
@@ -672,7 +713,9 @@ begin
     ---- Set and check BF per beamset
     ------------------------------------------------------------------------------
     for bset in 0 to c_sdp_N_beamsets - 1 loop
+      ----------------------------------------------------------------------------
       -- MM beamlet_scale
+      ----------------------------------------------------------------------------
       -- . write
       v_offset := bset * c_mm_span_reg_bf_scale;
       mmf_mm_bus_wr(c_mm_file_reg_bf_scale, v_offset + 0, c_exp_beamlet_scale, tb_clk);
@@ -684,6 +727,9 @@ begin
       proc_common_wait_some_cycles(tb_clk, 1);
       assert TO_UINT(rd_beamlet_scale) = c_exp_beamlet_scale report "Wrong MM read beamlet_scale for beamset " & natural'image(bset) severity ERROR;
 
+      ----------------------------------------------------------------------------
+      -- Set CEP beamlets output MAC,IP,UDP port
+      ----------------------------------------------------------------------------
       -- CEP beamlet output header
       --     c_sdp_cep_hdr_field_arr : t_common_field_arr(c_sdp_cep_nof_hdr_fields-1 DOWNTO 0) := (
       --  40   "eth_dst_mac"                        ), "RW", 48, field_default(c_sdp_cep_eth_dst_mac) ),
@@ -1007,7 +1053,7 @@ begin
     ----------------------------------------------------------------------------
     -- read BSN low, this is the wait until condition
     mmf_mm_wait_until_value(c_mm_file_reg_bsn_scheduler_wg, 0,
-                            "UNSIGNED", rd_data, ">=", c_init_bsn + c_bsn_latency + c_nof_block_per_sync * 3,
+                            "UNSIGNED", rd_data_bsn, ">=", c_stimuli_done_bsn,
                             c_sdp_T_sub, tb_clk);
 
     -- Stimuli done, now verify results at end of test
@@ -1318,10 +1364,10 @@ begin
   begin
     rx_beamlet_cnt <= 0;
     rx_beamlet_valid <= '0';
-    -- Wait until start of a beamlet packet, capture only first block in packet
+    -- Wait until start of a beamlet packet
     proc_common_wait_until_high(ext_clk, rx_beamlet_sosi.sop);
-    -- c_nof_beamlets_per_data = 2 dual pol beamlets (= XY, XY) per 64b data word
-    for I in 0 to (c_sdp_cep_nof_blocks_per_packet * c_sdp_cep_nof_beamlets_per_block / c_nof_beamlets_per_data) - 1 loop
+    -- c_sdp_nof_beamlets_per_longword = 2 dual pol beamlets (= XY, XY) per 64b data word
+    for I in 0 to (c_sdp_cep_nof_beamlets_per_packet / c_sdp_nof_beamlets_per_longword) - 1 loop
       proc_common_wait_until_high(ext_clk, rx_beamlet_sosi.valid);
       rx_beamlet_valid <= '1';
       -- Capture rx beamlets per longword in rx_beamlet_arr, for time series view in Wave window
@@ -1333,27 +1379,88 @@ begin
       rx_beamlet_arr_im(2) <= rx_beamlet_sosi.data(23 downto 16);
       rx_beamlet_arr_re(3) <= rx_beamlet_sosi.data(15 downto 8);  -- Y
       rx_beamlet_arr_im(3) <= rx_beamlet_sosi.data( 7 downto 0);
-      if I < c_sdp_cep_nof_beamlets_per_block / c_nof_beamlets_per_data then
-        -- Only capture the first beamlets block of each packet in rx_beamlet_list
-        rx_beamlet_list_re(I * 4 + 0) <= rx_beamlet_sosi.data(63 downto 56);  -- X
-        rx_beamlet_list_im(I * 4 + 0) <= rx_beamlet_sosi.data(55 downto 48);
-        rx_beamlet_list_re(I * 4 + 1) <= rx_beamlet_sosi.data(47 downto 40);  -- Y
-        rx_beamlet_list_im(I * 4 + 1) <= rx_beamlet_sosi.data(39 downto 32);
-        rx_beamlet_list_re(I * 4 + 2) <= rx_beamlet_sosi.data(31 downto 24);  -- X
-        rx_beamlet_list_im(I * 4 + 2) <= rx_beamlet_sosi.data(23 downto 16);
-        rx_beamlet_list_re(I * 4 + 3) <= rx_beamlet_sosi.data(15 downto 8);  -- Y
-        rx_beamlet_list_im(I * 4 + 3) <= rx_beamlet_sosi.data( 7 downto 0);
-      end if;
+      -- Capture the beamlets block of each packet in rx_packet_list
+      rx_packet_list_re(I * 4 + 0) <= rx_beamlet_sosi.data(63 downto 56);  -- X
+      rx_packet_list_im(I * 4 + 0) <= rx_beamlet_sosi.data(55 downto 48);
+      rx_packet_list_re(I * 4 + 1) <= rx_beamlet_sosi.data(47 downto 40);  -- Y
+      rx_packet_list_im(I * 4 + 1) <= rx_beamlet_sosi.data(39 downto 32);
+      rx_packet_list_re(I * 4 + 2) <= rx_beamlet_sosi.data(31 downto 24);  -- X
+      rx_packet_list_im(I * 4 + 2) <= rx_beamlet_sosi.data(23 downto 16);
+      rx_packet_list_re(I * 4 + 3) <= rx_beamlet_sosi.data(15 downto 8);  -- Y
+      rx_packet_list_im(I * 4 + 3) <= rx_beamlet_sosi.data( 7 downto 0);
       proc_common_wait_until_high(ext_clk, rx_beamlet_sosi.valid);
       -- Use at least one WAIT instead of proc_common_wait_some_cycles() to
       -- avoid Modelsim warning: (vcom-1090) Possible infinite loop: Process
       -- contains no WAIT statement.
       wait until rising_edge(ext_clk);
       rx_beamlet_valid <= '0';
-      rx_beamlet_cnt   <= (rx_beamlet_cnt + c_nof_beamlets_per_data) mod c_sdp_cep_nof_beamlets_per_block;  -- 4 blocks/packet
+      rx_beamlet_cnt   <= (rx_beamlet_cnt + c_sdp_nof_beamlets_per_longword) mod c_sdp_cep_nof_beamlets_per_block;  -- 4 blocks/packet
     end loop;
   end process;
 
+  -- Undo the beamlet output transpose, to have original beamlet order
+  p_rx_reordered_list : process
+  begin
+    -- Wait until end of a beamlet packet
+    wait until rising_edge(ext_clk);  -- to avoid Modelsim warning: (vcom-1090)
+    proc_common_wait_until_hi_lo(ext_clk, rx_beamlet_sosi.eop);  -- to reduce simulation effort
+    rx_reordered_list_re <= func_sdp_bdo_transpose_packet(c_sdp_cep_nof_blocks_per_packet,
+                                                          c_sdp_cep_nof_beamlets_per_block,
+                                                          rx_packet_list_re);
+    rx_reordered_list_im <= func_sdp_bdo_transpose_packet(c_sdp_cep_nof_blocks_per_packet,
+                                                          c_sdp_cep_nof_beamlets_per_block,
+                                                          rx_packet_list_im);
+  end process;
+
+  p_rx_beamlet_list : process
+    constant c_N : natural := c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf;
+  begin
+    rx_beamlet_list_val <= '0';
+
+    -- Wait until after p_rx_reordered_list has updated
+    proc_common_wait_until_hi_lo(ext_clk, rx_beamlet_sosi.eop);
+    wait until rising_edge(ext_clk);
+
+    -- Use same rx_beamlet_list to show all 4 blocks of a packet in time, to
+    -- ease viewing the blocks in the wave window.
+    for blk in 0 to c_sdp_cep_nof_blocks_per_packet - 1 loop
+      -- Copy block blk from rx_packet_list into rx_beamlet_list of one block.
+      if c_use_bdo_transpose then
+        -- undone transposed beamlet output order
+        rx_beamlet_list_re <= rx_reordered_list_re(blk * c_N to (blk + 1) * c_N - 1);
+        rx_beamlet_list_im <= rx_reordered_list_im(blk * c_N to (blk + 1) * c_N - 1);
+      else
+        -- identity beamlet output order
+        rx_beamlet_list_re <= rx_packet_list_re(blk * c_N to (blk + 1) * c_N - 1);
+        rx_beamlet_list_im <= rx_packet_list_im(blk * c_N to (blk + 1) * c_N - 1);
+      end if;
+      rx_beamlet_list_val <= '1';
+      wait until rising_edge(ext_clk);
+    end loop;
+  end process;
+
+  -- Verify that beamlet values remain stable in time, so same beamlet value in each time block
+  verify_rx_beamlet_list <= '1' when unsigned(rd_data_bsn) > c_verify_rx_beamlet_list_bsn else '0';
+
+  p_verify_rx_beamlet_list : process(ext_clk)
+  begin
+    if rising_edge(ext_clk) then
+      -- Wait until p_rx_beamlet_list is valid
+      if rx_beamlet_list_val = '1' then
+        -- Maintain previous x_beamlet_list for comparision
+        prev_rx_beamlet_list_re <= rx_beamlet_list_re;
+        prev_rx_beamlet_list_im <= rx_beamlet_list_im;
+        -- After some time all rx blocks should have same beamlet values, so
+        -- then rx_beamlet_list then does not change in time and the other
+        -- blocks should be the same as the first block.
+        if verify_rx_beamlet_list = '1' then
+          assert rx_beamlet_list_re = prev_rx_beamlet_list_re report "Wrong: rx_beamlet_list_re differs from previous block" severity ERROR;
+          assert rx_beamlet_list_im = prev_rx_beamlet_list_im report "Wrong: rx_beamlet_list_im differs from previous block" severity ERROR;
+        end if;
+      end if;
+    end if;
+  end process;
+
   -- get rx_beamlet for comparision with c_exp_beamlet
   rx_beamlet_x_output_re <= TO_SINT(rx_beamlet_list_re(c_exp_beamlet_x_index));
   rx_beamlet_x_output_im <= TO_SINT(rx_beamlet_list_im(c_exp_beamlet_x_index));
diff --git a/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf_ring/tb_lofar2_unb2c_sdp_station_bf_ring.vhd b/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf_ring/tb_lofar2_unb2c_sdp_station_bf_ring.vhd
index 351a0af16c45536bcdb590081907ba511f696b98..879d09cd9749d0841dc08a64c7b3226ef34b110a 100644
--- a/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf_ring/tb_lofar2_unb2c_sdp_station_bf_ring.vhd
+++ b/applications/lofar2/designs/lofar2_unb2c_sdp_station/revisions/lofar2_unb2c_sdp_station_bf_ring/tb_lofar2_unb2c_sdp_station_bf_ring.vhd
@@ -21,7 +21,8 @@
 -------------------------------------------------------------------------------
 --
 -- Author: R. van der Walle (original), E. Kooistra (updates)
--- Purpose: Self-checking testbench for simulating lofar2_unb2c_sdp_station_bf_ring
+-- Purpose:
+--   Self-checking testbench for simulating lofar2_unb2c_sdp_station_bf_ring
 --   using WG data.
 --
 -- Description:
@@ -105,6 +106,14 @@
 -- * The c_wg_phase_offset and c_subband_phase_offset are used to tune the WG
 --   phase reference to 0.0 degrees at the start (sop)
 -- * Use g_beamlet_scale = 2**10, for full scale WG and N_ant = 1, see [1]
+-- * Using g_beamlet = c_sdp_S_sub_bf-1 = 487 puts g_subband = 102 at the last
+--   beamlet in the beamset, so at index 974,975 of rx_beamlet_list_re/im.
+-- * Default beamlet 102 also contains g_subband = 102. On HW the BF weights
+--   are default 0, but in sim the BF weights in node_sdp_beamformer.vhd
+--   are default unit weights. Therefore also write the BF weight for default
+--   beamlet 102 to define it value, in case g_beamlet /= 102. In this tb
+--   the BF weigth for beamlet = g_subband = 102 is set to 0, so that the
+--   g_subband = 102 will only show up in g_beamlet.
 -- * A simulation only section in sdp_beamformer_output.vhd disturbs the BSN,
 --   to cause a merged payload error, so that sdp_source_info_payload_error
 --   can be verified here.
@@ -115,7 +124,11 @@
 --   # Manually add missing signals and constants using objects in GUI
 --   > add wave -position insertpoint  \
 --     sim:/tb_lofar2_unb2c_sdp_station_bf_ring/sp_ssts_arr2 \
---     sim:/tb_lofar2_unb2c_sdp_station_bf_ring/bsts_arr2
+--     sim:/tb_lofar2_unb2c_sdp_station_bf_ring/bsts_arr2 \
+--     sim:/tb_lofar2_unb2c_sdp_station_bf_ring/rx_packet_list_re \
+--     sim:/tb_lofar2_unb2c_sdp_station_bf_ring/rx_packet_list_im \
+--     sim:/tb_lofar2_unb2c_sdp_station_bf_ring/rx_reordered_list_re \
+--     sim:/tb_lofar2_unb2c_sdp_station_bf_ring/rx_reordered_list_im
 --   > run -a
 --   Takes about 2h 25m when g_read_all_* = FALSE
 --
@@ -124,7 +137,7 @@
 --     https://support.astron.nl/confluence/pages/viewpage.action?spaceKey=L2M&title=L4+SDPFW+Decision%3A+LOFAR2.0+SDP+Firmware+Quantization+Model
 --
 -------------------------------------------------------------------------------
-library IEEE, common_lib, unb2c_board_lib, i2c_lib, mm_lib, dp_lib, diag_lib, lofar2_sdp_lib, wpfb_lib, tech_pll_lib, tr_10GbE_lib, lofar2_unb2c_sdp_station_lib;
+library IEEE, common_lib, unb2c_board_lib, i2c_lib, mm_lib, dp_lib, diag_lib, reorder_lib, lofar2_sdp_lib, wpfb_lib, tech_pll_lib, tr_10GbE_lib, lofar2_unb2c_sdp_station_lib;
 use IEEE.std_logic_1164.all;
 use IEEE.numeric_std.all;
 use IEEE.math_real.all;
@@ -138,11 +151,13 @@ use mm_lib.mm_file_pkg.all;
 use dp_lib.dp_stream_pkg.all;
 use mm_lib.mm_file_unb_pkg.all;
 use diag_lib.diag_pkg.all;
+use reorder_lib.reorder_pkg.all;
 use wpfb_lib.wpfb_pkg.all;
 use unb2c_board_lib.unb2c_board_pkg.all;
 use lofar2_sdp_lib.sdp_pkg.all;
 use lofar2_sdp_lib.tb_sdp_pkg.all;
 use tech_pll_lib.tech_pll_component_pkg.all;
+use lofar2_unb2c_sdp_station_lib.lofar2_unb2c_sdp_station_pkg.all;
 
 entity tb_lofar2_unb2c_sdp_station_bf_ring is
   generic (
@@ -171,6 +186,10 @@ entity tb_lofar2_unb2c_sdp_station_bf_ring is
 end tb_lofar2_unb2c_sdp_station_bf_ring;
 
 architecture tb of tb_lofar2_unb2c_sdp_station_bf_ring is
+  -- Revision parameters
+  constant c_design_name         : string := "lofar2_unb2c_sdp_station_bf_ring";
+  constant c_revision_select     : t_lofar2_unb2c_sdp_station_config := func_sel_revision_rec(c_design_name);
+
   constant c_sim                 : boolean := true;
   constant c_first_unb_nr        : natural := g_first_gn / c_quad;  -- c_quad = 4 FPGAs per UniBoard2
   constant c_first_node_nr       : natural := g_first_gn mod c_quad;  -- first node_nr in range(c_quad) = [0:3] on c_first_unb_nr
@@ -187,6 +206,7 @@ architecture tb of tb_lofar2_unb2c_sdp_station_bf_ring is
 
   constant c_init_bsn            : natural := 17;  -- some recognizable value >= 0
   constant c_nof_lanes           : natural := c_sdp_N_beamsets;
+  constant c_use_bdo_transpose   : boolean := c_revision_select.use_bdo_transpose;
 
   constant c_last_id             : std_logic_vector(7 downto 0) := TO_UVEC(c_last_gn, 8);
   constant c_version             : std_logic_vector(1 downto 0) := "00";
@@ -210,8 +230,6 @@ architecture tb of tb_lofar2_unb2c_sdp_station_bf_ring is
   constant c_stat_lo_factor      : real := 1.0 - c_stat_percentage;  -- lower boundary
   constant c_stat_hi_factor      : real := 1.0 + c_stat_percentage;  -- higher boundary
 
-  constant c_nof_beamlets_per_data : natural := 2;  -- 2 dual pol beamlets (= XY, XY) per 64b data word
-
   constant c_beamlet_output_delta : integer := 2;  -- +-delta margin
 
   -- header fields
@@ -240,6 +258,15 @@ architecture tb of tb_lofar2_unb2c_sdp_station_bf_ring is
                                      x"1400"  -- block_period = 5120
                                    );
 
+  -- Expected transposed indices order by func_reorder_transpose_packet().
+  -- Yields same c_reorder_transpose_indices order as:
+  -- > python applications/lofar2/libraries/sdp/src/python/test_func_sdp_bdo_transpose_packet.py
+  constant c_nof_ch                    : natural := c_sdp_cep_nof_beamlets_per_packet * c_sdp_N_pol_bf;
+  constant c_reorder_transpose_indices : t_natural_arr(0 to c_nof_ch - 1) :=
+    func_reorder_transpose_indices(c_sdp_cep_nof_blocks_per_packet,
+                                   c_sdp_cep_nof_beamlets_per_block,
+                                   c_sdp_N_pol_bf);
+
   -- WG
   constant c_bsn_start_wg         : natural := c_init_bsn + 2;  -- start WG at this BSN to instead of some BSN, to avoid mismatches in exact expected data values
   -- .ampl
@@ -262,7 +289,7 @@ architecture tb of tb_lofar2_unb2c_sdp_station_bf_ring is
   constant c_subband_weight_phase         : real := 0.0;  -- use default unit subband weights
   constant c_exp_subband_phase            : real := g_sp_phase + c_subband_phase_offset + c_subband_weight_phase;
   constant c_exp_subband_ampl             : real := real(c_wg_ampl) * c_sdp_wpfb_subband_sp_ampl_ratio * c_subband_weight_gain;
-  constant c_exp_subband_power            : real := c_exp_subband_ampl**2.0;  -- complex signal ampl, so no divide by 2
+  constant c_exp_subband_power            : real := c_exp_subband_ampl**2.0;  -- complex signal ampl, so power is A**2 (not A**2 / 2 as for real)
   constant c_exp_subband_sst              : real := c_exp_subband_power * real(c_nof_block_per_sync);
 
   constant c_exp_remnant_subband_phase    : real := g_sp_remnant_phase + c_subband_phase_offset + c_subband_weight_phase;
@@ -358,12 +385,17 @@ architecture tb of tb_lofar2_unb2c_sdp_station_bf_ring is
   constant c_mm_file_reg_bf_scale         : string := mmf_unb_file_prefix(c_last_unb_nr, c_last_node_nr) & "REG_BF_SCALE";  -- readback
   constant c_mm_file_reg_hdr_dat          : string := mmf_unb_file_prefix(c_last_unb_nr, c_last_node_nr) & "REG_HDR_DAT";  -- control beamlet output
 
+  -- Tb BSN moments
+  constant c_stimuli_done_bsn             : natural := c_init_bsn + c_nof_block_per_sync * 3;
+  constant c_verify_rx_beamlet_list_bsn   : natural := c_stimuli_done_bsn - c_nof_block_per_sync;
+
   -- Tb
   signal stimuli_done        : std_logic := '0';
   signal tb_almost_end       : std_logic := '0';
   signal tb_end              : std_logic := '0';
   signal tb_clk              : std_logic := '0';
   signal rd_data             : std_logic_vector(c_32 - 1 downto 0);
+  signal rd_data_bsn         : std_logic_vector(c_32 - 1 downto 0);
 
   signal dest_rst            : std_logic := '1';  -- use separate destination rst for Rx 10GbE in tb
   signal pps_rst             : std_logic := '1';  -- use separate reset to release the PPS generator
@@ -449,13 +481,34 @@ architecture tb of tb_lofar2_unb2c_sdp_station_bf_ring is
   signal rx_beamlet_sop_cnt  : natural := 0;
   signal rx_beamlet_eop_cnt  : natural := 0;
 
-  signal rx_beamlet_arr_re   : t_slv_8_arr(c_sdp_cep_nof_blocks_per_packet - 1 downto 0);  -- [3:0]
-  signal rx_beamlet_arr_im   : t_slv_8_arr(c_sdp_cep_nof_blocks_per_packet - 1 downto 0);  -- [3:0]
+  -- [0 : 3] =  X, Y, X, Y
+  signal rx_beamlet_arr_re   : t_sdp_beamlet_part_arr;
+  signal rx_beamlet_arr_im   : t_sdp_beamlet_part_arr;
   signal rx_beamlet_cnt      : natural;
   signal rx_beamlet_valid    : std_logic;
 
-  signal rx_beamlet_list_re  : t_slv_8_arr(c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf - 1 downto 0);  -- [488 * 2-1:0] = [975:0]
-  signal rx_beamlet_list_im  : t_slv_8_arr(c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf - 1 downto 0);  -- [488 * 2-1:0] = [975:0]
+  -- [0 : 4 * 488 * 2 - 1] = [0 : 3903]
+  signal rx_packet_list_re      : t_sdp_beamlet_packet_list;
+  signal rx_packet_list_im      : t_sdp_beamlet_packet_list;
+  signal rx_reordered_list_re   : t_sdp_beamlet_packet_list;
+  signal rx_reordered_list_im   : t_sdp_beamlet_packet_list;
+
+  -- Recover original beamlet order per block, either by using c_use_bdo_transpose
+  -- = false or by using c_use_bdo_transpose and func_sdp_bdo_transpose_packet().
+  -- List: [0 : 488 * 2 - 1] = [0 : 975]
+  -- . X part at even indices
+  -- . Y part at odd indices
+  signal prev_rx_beamlet_list_re : t_sdp_beamlet_block_list;
+  signal prev_rx_beamlet_list_im : t_sdp_beamlet_block_list;
+  signal rx_beamlet_list_re      : t_sdp_beamlet_block_list;
+  signal rx_beamlet_list_im      : t_sdp_beamlet_block_list;
+  signal rx_beamlet_list_val     : std_logic;
+  signal verify_rx_beamlet_list  : std_logic := '0';
+
+  signal rx_beamlet_x_output_re : integer;
+  signal rx_beamlet_x_output_im : integer;
+  signal rx_beamlet_y_output_re : integer;
+  signal rx_beamlet_y_output_im : integer;
 
   -- DUT
   signal ext_clk             : std_logic := '0';
@@ -511,7 +564,7 @@ begin
   gen_dut : for RN in 0 to c_last_rn generate
     u_lofar_unb2c_sdp_station_bf : entity lofar2_unb2c_sdp_station_lib.lofar2_unb2c_sdp_station
     generic map (
-      g_design_name            => "lofar2_unb2c_sdp_station_bf_ring",
+      g_design_name            => c_design_name,
       g_design_note            => "",
       g_sim                    => c_sim,
       g_sim_unb_nr             => (g_first_gn + RN) / c_quad,
@@ -671,39 +724,6 @@ begin
     -- Wait for DUT power up after reset
     wait for 1 us;
 
-    print_str("");
-    print_str("WG:");
-    print_str(". c_wg_ampl                            = " & int_to_str(c_wg_ampl));
-    print_str(". c_exp_sp_power                       = " & real_to_str(c_exp_sp_power, 20, 1));
-    print_str(". c_exp_sp_ast                         = " & real_to_str(c_exp_sp_ast, 20, 1));
-
-    print_str("");
-    print_str("Subband weight:");
-    print_str(". sp_subband_weight_gain               = " & real_to_str(sp_subband_weight_gain, 20, 6));
-    print_str(". sp_subband_weight_phase              = " & real_to_str(sp_subband_weight_phase, 20, 6));
-
-    print_str("");
-    print_str("SST results:");
-    print_str(". sst_weighted_subbands_flag           = " & sl_to_str(sst_weighted_subbands_flag));
-    print_str("");
-    print_str(". c_exp_subband_ampl                   = " & int_to_str(natural(c_exp_subband_ampl)));
-    print_str(". c_exp_subband_power                  = " & real_to_str(c_exp_subband_power, 20, 1));
-    print_str(". c_exp_subband_sst                    = " & real_to_str(c_exp_subband_sst, 20, 1));
-    print_str("");
-    print_str(". sp_sst                               = " & real_to_str(sp_sst, 20, 1));
-    print_str(". sp_sst / c_exp_subband_sst           = " & real_to_str(sp_sst / c_exp_subband_sst, 20, 6));
-
-    print_str("");
-    print_str("BST results:");
-    print_str(". c_exp_beamlet_x_ampl                   = " & int_to_str(natural(c_exp_beamlet_x_ampl)));
-    print_str(". c_exp_beamlet_x_power                  = " & real_to_str(c_exp_beamlet_x_power, 20, 1));
-    print_str(". c_exp_beamlet_x_bst                    = " & real_to_str(c_exp_beamlet_x_bst, 20, 1));
-    print_str("");
-    print_str(". c_exp_beamlet_y_ampl                   = " & int_to_str(natural(c_exp_beamlet_y_ampl)));
-    print_str(". c_exp_beamlet_y_power                  = " & real_to_str(c_exp_beamlet_y_power, 20, 1));
-    print_str(". c_exp_beamlet_y_bst                    = " & real_to_str(c_exp_beamlet_y_bst, 20, 1));
-    print_str("");
-
     ----------------------------------------------------------------------------
     -- Set and check SDP info
     ----------------------------------------------------------------------------
@@ -719,7 +739,6 @@ begin
     --   0   block_period            : STD_LOGIC_VECTOR(15 DOWNTO 0);
     --     END RECORD;
     -- . Write
-
     for RN in 0 to c_last_rn loop
       v_gn := g_first_gn + RN;
       mmf_mm_bus_wr(mmf_unb_file_prefix(v_gn / c_quad, v_gn mod c_quad) & "REG_SDP_INFO",  8, TO_UINT(c_exp_sdp_info.antenna_field_index), tb_clk);
@@ -1129,8 +1148,9 @@ begin
     ----------------------------------------------------------------------------
     -- Wait for enough WG data and start of sync interval
     ----------------------------------------------------------------------------
-    mmf_mm_wait_until_value(c_mm_file_reg_bsn_scheduler_wg, 0,  -- read BSN low
-                            "UNSIGNED", rd_data, ">=", c_init_bsn + c_nof_block_per_sync * 3,  -- this is the wait until condition
+    -- read BSN low, this is the wait until condition
+    mmf_mm_wait_until_value(c_mm_file_reg_bsn_scheduler_wg, 0,
+                            "UNSIGNED", rd_data_bsn, ">=", c_stimuli_done_bsn,
                             c_sdp_T_sub, tb_clk);
 
     -- Stimuli done, now verify results at end of test
@@ -1231,18 +1251,18 @@ begin
     ---------------------------------------------------------------------------
 
     print_str("");
-    print_str("WG:");
+    print_str("* WG:");
     print_str(". c_wg_ampl                            = " & int_to_str(c_wg_ampl));
     print_str(". c_exp_sp_power                       = " & real_to_str(c_exp_sp_power, 20, 1));
     print_str(". c_exp_sp_ast                         = " & real_to_str(c_exp_sp_ast, 20, 1));
 
     print_str("");
-    print_str("Subband weight:");
+    print_str("* Subband weight:");
     print_str(". sp_subband_weight_gain               = " & real_to_str(sp_subband_weight_gain, 20, 6));
     print_str(". sp_subband_weight_phase              = " & real_to_str(sp_subband_weight_phase, 20, 6));
 
     print_str("");
-    print_str("SST results:");
+    print_str("* SST results:");
     print_str(". sst_weighted_subbands_flag           = " & sl_to_str(sst_weighted_subbands_flag));
     print_str("");
     print_str(". c_exp_subband_ampl                   = " & int_to_str(natural(c_exp_subband_ampl)));
@@ -1253,7 +1273,7 @@ begin
     print_str(". sp_sst / c_exp_subband_sst           = " & real_to_str(sp_sst / c_exp_subband_sst, 20, 6));
 
     print_str("");
-    print_str("BST results:");
+    print_str("* BST results:");
     print_str(". c_exp_beamlet_x_ampl                   = " & int_to_str(natural(c_exp_beamlet_x_ampl)));
     print_str(". c_exp_beamlet_x_power                  = " & real_to_str(c_exp_beamlet_x_power, 20, 1));
     print_str(". c_exp_beamlet_x_bst                    = " & real_to_str(c_exp_beamlet_x_bst, 20, 1));
@@ -1267,6 +1287,7 @@ begin
       print_str(". bst_x_arr(" & integer'image(v_G) & ") = " & real_to_str(bst_x_arr(U), 20, 1));
       print_str(". bst_y_arr(" & integer'image(v_G) & ") = " & real_to_str(bst_y_arr(U), 20, 1));
     end loop;
+    print_str("");
     for U in 0 to c_sdp_N_beamsets - 1 loop
       v_G := g_beamlet + U * c_sdp_S_sub_bf;  -- global beamlet index, range(c_sdp_N_beamlets_sdp)
       print_str(". bst_x_arr(" & integer'image(v_G) & ") / c_exp_beamlet_x_bst = " & real_to_str(bst_x_arr(U) / c_exp_beamlet_x_bst, 20, 6));
@@ -1274,19 +1295,27 @@ begin
     end loop;
 
     print_str("");
-    print_str("Beamlet output:");
+    print_str("* Beamlet scale:");
     print_str(". rd_beamlet_scale                     = " & int_to_str(TO_UINT(rd_beamlet_scale)));
     print_str(". c_exp_beamlet_scale                  = " & int_to_str(c_exp_beamlet_scale));
     print_str("");
-    print_str(". c_exp_beamlet_x_output_ampl          = " & int_to_str(natural(c_exp_beamlet_x_output_ampl)));
-    print_str(". c_exp_beamlet_x_output_phase         = " & int_to_str(integer(c_exp_beamlet_x_output_phase)));
-    print_str(". c_exp_beamlet_x_output_re            = " & int_to_str(integer(c_exp_beamlet_x_output_re)));
-    print_str(". c_exp_beamlet_x_output_im            = " & int_to_str(integer(c_exp_beamlet_x_output_im)));
+    print_str("* Beamlet output:");
+    print_str("  . c_exp_beamlet_x_output_ampl        = " & int_to_str(natural(c_exp_beamlet_x_output_ampl)));
+    print_str("  . c_exp_beamlet_x_output_phase       = " & int_to_str(integer(c_exp_beamlet_x_output_phase)));
+    print_str("  . rx_beamlet_x_output_re             = " & int_to_str(rx_beamlet_x_output_re));
+    print_str("  . c_exp_beamlet_x_output_re          = " & int_to_str(integer(c_exp_beamlet_x_output_re)));
+    print_str("  . rx_beamlet_x_output_im             = " & int_to_str(rx_beamlet_x_output_im));
+    print_str("  . c_exp_beamlet_x_output_im          = " & int_to_str(integer(c_exp_beamlet_x_output_im)));
+    print_str("");
+    print_str("  . c_exp_beamlet_y_output_ampl        = " & int_to_str(natural(c_exp_beamlet_y_output_ampl)));
+    print_str("  . c_exp_beamlet_y_output_phase       = " & int_to_str(integer(c_exp_beamlet_y_output_phase)));
+    print_str("  . rx_beamlet_y_output_re             = " & int_to_str(rx_beamlet_y_output_re));
+    print_str("  . c_exp_beamlet_y_output_re          = " & int_to_str(integer(c_exp_beamlet_y_output_re)));
+    print_str("  . rx_beamlet_y_output_im             = " & int_to_str(rx_beamlet_y_output_im));
+    print_str("  . c_exp_beamlet_y_output_im          = " & int_to_str(integer(c_exp_beamlet_y_output_im)));
+    print_str("");
+    print_str("  . c_beamlet_output_delta (+- margin)   = " & int_to_str(integer(c_beamlet_output_delta)));
     print_str("");
-    print_str(". c_exp_beamlet_y_output_ampl          = " & int_to_str(natural(c_exp_beamlet_y_output_ampl)));
-    print_str(". c_exp_beamlet_y_output_phase         = " & int_to_str(integer(c_exp_beamlet_y_output_phase)));
-    print_str(". c_exp_beamlet_y_output_re            = " & int_to_str(integer(c_exp_beamlet_y_output_re)));
-    print_str(". c_exp_beamlet_y_output_im            = " & int_to_str(integer(c_exp_beamlet_y_output_im)));
 
     ---------------------------------------------------------------------------
     -- Verify SST
@@ -1312,15 +1341,15 @@ begin
     -- Verify beamlet output in 10GbE UDP offload
     ---------------------------------------------------------------------------
     -- X-pol
-    v_re := TO_SINT(rx_beamlet_list_re(c_exp_beamlet_x_index)); v_re_exp := c_exp_beamlet_x_output_re;
-    v_im := TO_SINT(rx_beamlet_list_im(c_exp_beamlet_x_index)); v_im_exp := c_exp_beamlet_x_output_im;
+    v_re := rx_beamlet_x_output_re; v_re_exp := c_exp_beamlet_x_output_re;
+    v_im := rx_beamlet_x_output_im; v_im_exp := c_exp_beamlet_x_output_im;
     assert v_re > integer(v_re_exp) - c_beamlet_output_delta report "Wrong beamlet X output (re) " & integer'image(v_re) & " != " & real'image(v_re_exp) severity ERROR;
     assert v_re < integer(v_re_exp) + c_beamlet_output_delta report "Wrong beamlet X output (re) " & integer'image(v_re) & " != " & real'image(v_re_exp) severity ERROR;
     assert v_im > integer(v_im_exp) - c_beamlet_output_delta report "Wrong beamlet X output (im) " & integer'image(v_im) & " != " & real'image(v_im_exp) severity ERROR;
     assert v_im < integer(v_im_exp) + c_beamlet_output_delta report "Wrong beamlet X output (im) " & integer'image(v_im) & " != " & real'image(v_im_exp) severity ERROR;
     -- Y-pol
-    v_re := TO_SINT(rx_beamlet_list_re(c_exp_beamlet_y_index)); v_re_exp := c_exp_beamlet_y_output_re;
-    v_im := TO_SINT(rx_beamlet_list_im(c_exp_beamlet_y_index)); v_im_exp := c_exp_beamlet_y_output_im;
+    v_re := rx_beamlet_y_output_re; v_re_exp := c_exp_beamlet_y_output_re;
+    v_im := rx_beamlet_y_output_im; v_im_exp := c_exp_beamlet_y_output_im;
     assert v_re > integer(v_re_exp) - c_beamlet_output_delta report "Wrong beamlet Y output (re) " & integer'image(v_re) & " != " & real'image(v_re_exp) severity ERROR;
     assert v_re < integer(v_re_exp) + c_beamlet_output_delta report "Wrong beamlet Y output (re) " & integer'image(v_re) & " != " & real'image(v_re_exp) severity ERROR;
     assert v_im > integer(v_im_exp) - c_beamlet_output_delta report "Wrong beamlet Y output (im) " & integer'image(v_im) & " != " & real'image(v_im_exp) severity ERROR;
@@ -1433,10 +1462,10 @@ begin
   begin
     rx_beamlet_cnt <= 0;
     rx_beamlet_valid <= '0';
-    -- Wait until start of a beamlet packet, capture only first block in packet
+    -- Wait until start of a beamlet packet
     proc_common_wait_until_high(ext_clk, rx_beamlet_sosi.sop);
-    -- c_nof_beamlets_per_data = 2 dual pol beamlets (= XY, XY) per 64b data word
-    for I in 0 to (c_sdp_cep_nof_blocks_per_packet * c_sdp_cep_nof_beamlets_per_block / c_nof_beamlets_per_data) - 1 loop
+    -- c_sdp_nof_beamlets_per_longword = 2 dual pol beamlets (= XY, XY) per 64b data word
+    for I in 0 to (c_sdp_cep_nof_beamlets_per_packet / c_sdp_nof_beamlets_per_longword) - 1 loop
       proc_common_wait_until_high(ext_clk, rx_beamlet_sosi.valid);
       rx_beamlet_valid <= '1';
       -- Capture rx beamlets per longword in rx_beamlet_arr, for time series view in Wave window
@@ -1448,27 +1477,94 @@ begin
       rx_beamlet_arr_im(2) <= rx_beamlet_sosi.data(23 downto 16);
       rx_beamlet_arr_re(3) <= rx_beamlet_sosi.data(15 downto 8);  -- Y
       rx_beamlet_arr_im(3) <= rx_beamlet_sosi.data( 7 downto 0);
-      if I < c_sdp_cep_nof_beamlets_per_block / c_nof_beamlets_per_data then
-        -- Only capture the first beamlets block of each packet in rx_beamlet_list
-        rx_beamlet_list_re(I * 4 + 0) <= rx_beamlet_sosi.data(63 downto 56);  -- X
-        rx_beamlet_list_im(I * 4 + 0) <= rx_beamlet_sosi.data(55 downto 48);
-        rx_beamlet_list_re(I * 4 + 1) <= rx_beamlet_sosi.data(47 downto 40);  -- Y
-        rx_beamlet_list_im(I * 4 + 1) <= rx_beamlet_sosi.data(39 downto 32);
-        rx_beamlet_list_re(I * 4 + 2) <= rx_beamlet_sosi.data(31 downto 24);  -- X
-        rx_beamlet_list_im(I * 4 + 2) <= rx_beamlet_sosi.data(23 downto 16);
-        rx_beamlet_list_re(I * 4 + 3) <= rx_beamlet_sosi.data(15 downto 8);  -- Y
-        rx_beamlet_list_im(I * 4 + 3) <= rx_beamlet_sosi.data( 7 downto 0);
-      end if;
+      -- Capture the beamlets block of each packet in rx_packet_list
+      rx_packet_list_re(I * 4 + 0) <= rx_beamlet_sosi.data(63 downto 56);  -- X
+      rx_packet_list_im(I * 4 + 0) <= rx_beamlet_sosi.data(55 downto 48);
+      rx_packet_list_re(I * 4 + 1) <= rx_beamlet_sosi.data(47 downto 40);  -- Y
+      rx_packet_list_im(I * 4 + 1) <= rx_beamlet_sosi.data(39 downto 32);
+      rx_packet_list_re(I * 4 + 2) <= rx_beamlet_sosi.data(31 downto 24);  -- X
+      rx_packet_list_im(I * 4 + 2) <= rx_beamlet_sosi.data(23 downto 16);
+      rx_packet_list_re(I * 4 + 3) <= rx_beamlet_sosi.data(15 downto 8);  -- Y
+      rx_packet_list_im(I * 4 + 3) <= rx_beamlet_sosi.data( 7 downto 0);
       proc_common_wait_until_high(ext_clk, rx_beamlet_sosi.valid);
       -- Use at least one WAIT instead of proc_common_wait_some_cycles() to
       -- avoid Modelsim warning: (vcom-1090) Possible infinite loop: Process
       -- contains no WAIT statement.
       wait until rising_edge(ext_clk);
       rx_beamlet_valid <= '0';
-      rx_beamlet_cnt   <= (rx_beamlet_cnt + c_nof_beamlets_per_data) mod c_sdp_cep_nof_beamlets_per_block;  -- 4 blocks/packet
+      rx_beamlet_cnt   <= (rx_beamlet_cnt + c_sdp_nof_beamlets_per_longword) mod c_sdp_cep_nof_beamlets_per_block;  -- 4 blocks/packet
+    end loop;
+  end process;
+
+  -- Undo the beamlet output transpose, to have original beamlet order
+  p_rx_reordered_list : process
+  begin
+    -- Wait until end of a beamlet packet
+    wait until rising_edge(ext_clk);  -- to avoid Modelsim warning: (vcom-1090)
+    proc_common_wait_until_hi_lo(ext_clk, rx_beamlet_sosi.eop);  -- to reduce simulation effort
+    rx_reordered_list_re <= func_sdp_bdo_transpose_packet(c_sdp_cep_nof_blocks_per_packet,
+                                                          c_sdp_cep_nof_beamlets_per_block,
+                                                          rx_packet_list_re);
+    rx_reordered_list_im <= func_sdp_bdo_transpose_packet(c_sdp_cep_nof_blocks_per_packet,
+                                                          c_sdp_cep_nof_beamlets_per_block,
+                                                          rx_packet_list_im);
+  end process;
+
+  p_rx_beamlet_list : process
+    constant c_N : natural := c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf;
+  begin
+    rx_beamlet_list_val <= '0';
+
+    -- Wait until after p_rx_reordered_list has updated
+    proc_common_wait_until_hi_lo(ext_clk, rx_beamlet_sosi.eop);
+    wait until rising_edge(ext_clk);
+
+    -- Use same rx_beamlet_list to show all 4 blocks of a packet in time, to
+    -- ease viewing the blocks in the wave window.
+    for blk in 0 to c_sdp_cep_nof_blocks_per_packet - 1 loop
+      -- Copy block blk from rx_packet_list into rx_beamlet_list of one block.
+      if c_use_bdo_transpose then
+        -- undone transposed beamlet output order
+        rx_beamlet_list_re <= rx_reordered_list_re(blk * c_N to (blk + 1) * c_N - 1);
+        rx_beamlet_list_im <= rx_reordered_list_im(blk * c_N to (blk + 1) * c_N - 1);
+      else
+        -- identity beamlet output order
+        rx_beamlet_list_re <= rx_packet_list_re(blk * c_N to (blk + 1) * c_N - 1);
+        rx_beamlet_list_im <= rx_packet_list_im(blk * c_N to (blk + 1) * c_N - 1);
+      end if;
+      rx_beamlet_list_val <= '1';
+      wait until rising_edge(ext_clk);
     end loop;
   end process;
 
+  -- Verify that beamlet values remain stable in time, so same beamlet value in each time block
+  verify_rx_beamlet_list <= '1' when unsigned(rd_data_bsn) > c_verify_rx_beamlet_list_bsn else '0';
+
+  p_verify_rx_beamlet_list : process(ext_clk)
+  begin
+    if rising_edge(ext_clk) then
+      -- Wait until p_rx_beamlet_list is valid
+      if rx_beamlet_list_val = '1' then
+        -- Maintain previous x_beamlet_list for comparision
+        prev_rx_beamlet_list_re <= rx_beamlet_list_re;
+        prev_rx_beamlet_list_im <= rx_beamlet_list_im;
+        -- After some time all rx blocks should have same beamlet values, so
+        -- then rx_beamlet_list then does not change in time and the other
+        -- blocks should be the same as the first block.
+        if verify_rx_beamlet_list = '1' then
+          assert rx_beamlet_list_re = prev_rx_beamlet_list_re report "Wrong: rx_beamlet_list_re differs from previous block" severity ERROR;
+          assert rx_beamlet_list_im = prev_rx_beamlet_list_im report "Wrong: rx_beamlet_list_im differs from previous block" severity ERROR;
+        end if;
+      end if;
+    end if;
+  end process;
+
+  -- get rx_beamlet for comparision with c_exp_beamlet
+  rx_beamlet_x_output_re <= TO_SINT(rx_beamlet_list_re(c_exp_beamlet_x_index));
+  rx_beamlet_x_output_im <= TO_SINT(rx_beamlet_list_im(c_exp_beamlet_x_index));
+  rx_beamlet_y_output_re <= TO_SINT(rx_beamlet_list_re(c_exp_beamlet_y_index));
+  rx_beamlet_y_output_im <= TO_SINT(rx_beamlet_list_im(c_exp_beamlet_y_index));
+
   -- To view the 64 bit 10GbE offload data more easily in the Wave window
   rx_beamlet_data <= rx_beamlet_sosi.data(c_longword_w - 1 downto 0);
 end tb;
diff --git a/applications/lofar2/designs/lofar2_unb2c_sdp_station/src/vhdl/lofar2_unb2c_sdp_station.vhd b/applications/lofar2/designs/lofar2_unb2c_sdp_station/src/vhdl/lofar2_unb2c_sdp_station.vhd
index 0d4176a1f28ac31a330e9db3848355a5be3569f8..8635117451988ddf5114ad7ece2c26191757fbf3 100644
--- a/applications/lofar2/designs/lofar2_unb2c_sdp_station/src/vhdl/lofar2_unb2c_sdp_station.vhd
+++ b/applications/lofar2/designs/lofar2_unb2c_sdp_station/src/vhdl/lofar2_unb2c_sdp_station.vhd
@@ -779,6 +779,7 @@ begin
     g_use_oversample         => c_revision_select.use_oversample,
     g_use_xsub               => c_revision_select.use_xsub,
     g_use_bf                 => c_revision_select.use_bf,
+    g_use_bdo_transpose      => c_revision_select.use_bdo_transpose,
     g_use_ring               => c_revision_select.use_ring,
     g_P_sq                   => c_revision_select.P_sq
   )
diff --git a/applications/lofar2/designs/lofar2_unb2c_sdp_station/src/vhdl/lofar2_unb2c_sdp_station_pkg.vhd b/applications/lofar2/designs/lofar2_unb2c_sdp_station/src/vhdl/lofar2_unb2c_sdp_station_pkg.vhd
index 45da1f25a4bb4ab8346c1c0ae99dbf3f8a48b39a..5621f9ded779957839d0b765a571a043e1e10b73 100644
--- a/applications/lofar2/designs/lofar2_unb2c_sdp_station/src/vhdl/lofar2_unb2c_sdp_station_pkg.vhd
+++ b/applications/lofar2/designs/lofar2_unb2c_sdp_station/src/vhdl/lofar2_unb2c_sdp_station_pkg.vhd
@@ -35,21 +35,25 @@ package lofar2_unb2c_sdp_station_pkg is
     use_fsub          : boolean;
     use_oversample    : boolean;
     use_bf            : boolean;
+    use_bdo_transpose : boolean;
     use_xsub          : boolean;
     use_ring          : boolean;
     P_sq              : natural;
   end record;
 
-  constant c_ait        : t_lofar2_unb2c_sdp_station_config := (false, false, false, false, false, false, 0);
-  constant c_fsub       : t_lofar2_unb2c_sdp_station_config := (false, true,  false, false, false, false, 0);
-  constant c_bf         : t_lofar2_unb2c_sdp_station_config := (false, true,  false, true,  false, false, 0);
-  constant c_bf_ring    : t_lofar2_unb2c_sdp_station_config := (false, true,  false, true,  false, true,  0);
-  constant c_xsub_one   : t_lofar2_unb2c_sdp_station_config := (false, true,  false, false, true,  false, 1);
-  constant c_xsub_ring  : t_lofar2_unb2c_sdp_station_config := (false, true,  false, false, true,  true,  9);
-  constant c_full_wg    : t_lofar2_unb2c_sdp_station_config := (true,  true,  false, true,  true,  true,  9);
-  constant c_full       : t_lofar2_unb2c_sdp_station_config := (false, true,  false, true,  true,  true,  9);
-  constant c_full_wg_os : t_lofar2_unb2c_sdp_station_config := (true,  true,  true,  true,  true,  true,  9);
-  constant c_full_os    : t_lofar2_unb2c_sdp_station_config := (false, true,  true,  true,  true,  true,  9);
+  constant c_ait        : t_lofar2_unb2c_sdp_station_config := (false, false, false, false, false, false, false, 0);
+  constant c_fsub       : t_lofar2_unb2c_sdp_station_config := (false, true,  false, false, false, false, false, 0);
+  -- use c_bf on one node also to simulate bdo transpose
+  -- use c_bf_ring with ring also to simulate bdo identity
+  constant c_bf         : t_lofar2_unb2c_sdp_station_config := (false, true,  false, true,  true,  false, false, 0);
+  constant c_bf_ring    : t_lofar2_unb2c_sdp_station_config := (false, true,  false, true,  false, false, true,  0);
+  constant c_xsub_one   : t_lofar2_unb2c_sdp_station_config := (false, true,  false, false, false, true,  false, 1);
+  constant c_xsub_ring  : t_lofar2_unb2c_sdp_station_config := (false, true,  false, false, false, true,  true,  9);
+  constant c_full_wg    : t_lofar2_unb2c_sdp_station_config := (true,  true,  false, true,  false, true,  true,  9);
+  -- Use c_full for LOFAR2 Station SDP operations
+  constant c_full       : t_lofar2_unb2c_sdp_station_config := (false, true,  false, true,  true,  true,  true,  9);
+  constant c_full_wg_os : t_lofar2_unb2c_sdp_station_config := (true,  true,  true,  true,  false, true,  true,  9);
+  constant c_full_os    : t_lofar2_unb2c_sdp_station_config := (false, true,  true,  true,  false, true,  true,  9);
 
   -- Function to select the revision configuration.
   function func_sel_revision_rec(g_design_name : string) return t_lofar2_unb2c_sdp_station_config;
diff --git a/applications/lofar2/libraries/sdp/src/python/test_func_sdp_transpose_packet.py b/applications/lofar2/libraries/sdp/src/python/test_func_sdp_transpose_packet.py
new file mode 100644
index 0000000000000000000000000000000000000000..f1063a7473a9f5c5d8787fed11282d6698937ead
--- /dev/null
+++ b/applications/lofar2/libraries/sdp/src/python/test_func_sdp_transpose_packet.py
@@ -0,0 +1,48 @@
+###############################################################################
+#
+# Copyright 2023
+# ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/>
+# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+###############################################################################
+
+# Author: Eric Kooistra
+# Date: Aug 2023
+# Purpose:
+#   Use Python to verify equivalent VHDL function func_sdp_bdo_transpose_packet()
+#   in tb_sdp_pkg.vhd
+# Usage:
+# > python test_func_sdp_bdo_transpose_packet.py > x
+# > more x
+# > tail -n 50 x
+
+c_sdp_N_pol_bf = 2
+
+def func_sdp_bdo_transpose_packet(nof_blocks_per_packet, nof_beamlets_per_block, packet_list):
+    v_list = [0] * len(packet_list)
+    for blk in range(nof_blocks_per_packet):
+        for blet in range(nof_beamlets_per_block):
+            for pol_bf in range(c_sdp_N_pol_bf):
+                v_in = (blk * nof_beamlets_per_block + blet) * c_sdp_N_pol_bf + pol_bf
+                v_out = (blet * nof_blocks_per_packet + blk) * c_sdp_N_pol_bf + pol_bf
+                v_list[v_out] = packet_list[v_in]
+    return v_list
+
+nof_blocks_per_packet = 4
+nof_beamlets_per_block = 488
+packet_list = list(range(0, nof_beamlets_per_block * nof_blocks_per_packet * c_sdp_N_pol_bf))
+out_list = func_sdp_bdo_transpose_packet(nof_blocks_per_packet, nof_beamlets_per_block, packet_list)
+for d in out_list:
+    print('%d' % d)
diff --git a/applications/lofar2/libraries/sdp/src/vhdl/node_sdp_beamformer.vhd b/applications/lofar2/libraries/sdp/src/vhdl/node_sdp_beamformer.vhd
index a2ea28949b81de46b3caec0db64d887885664610..d278d1ab57802420adceec141afdf925a7036cd7 100644
--- a/applications/lofar2/libraries/sdp/src/vhdl/node_sdp_beamformer.vhd
+++ b/applications/lofar2/libraries/sdp/src/vhdl/node_sdp_beamformer.vhd
@@ -43,6 +43,7 @@ entity node_sdp_beamformer is
     g_sim                    : boolean := false;
     g_sim_sdp                : t_sdp_sim := c_sdp_sim;
     g_beamset_id             : natural := 0;
+    g_use_bdo_transpose      : boolean := false;
     g_scope_selected_beamlet : natural := 0;
     -- Use no default raw width, to force instance to set it
     g_subband_raw_dat_w      : natural;  -- default: c_sdp_W_subband;
@@ -254,7 +255,8 @@ begin
   ---------------------------------------------------------------
   u_sdp_beamformer_output : entity work.sdp_beamformer_output
   generic map(
-    g_beamset_id  => g_beamset_id
+    g_beamset_id    => g_beamset_id,
+    g_use_transpose => g_use_bdo_transpose
   )
   port map (
     mm_rst => mm_rst,
diff --git a/applications/lofar2/libraries/sdp/src/vhdl/sdp_beamformer_output.vhd b/applications/lofar2/libraries/sdp/src/vhdl/sdp_beamformer_output.vhd
index 210c308c6596fb1d5d456602cce0a2b7807e870a..d12b7783bb7d960b402db1a5896280f4e4c1409a 100644
--- a/applications/lofar2/libraries/sdp/src/vhdl/sdp_beamformer_output.vhd
+++ b/applications/lofar2/libraries/sdp/src/vhdl/sdp_beamformer_output.vhd
@@ -22,15 +22,17 @@
 --
 -- Author: R. van der Walle, E. Kooistra (payload error support)
 -- Purpose:
--- The beamformer output (BDO) packetizes the beamlet data into UDP/IP packets.
--- Description:
--- * https://support.astron.nl/confluence/display/L2M/L5+SDPFW+Design+Document%3A+Beamformer
--- * https://support.astron.nl/confluence/display/L2M/L4+SDPFW+Decision%3A+Multiple+beamlet+output+destinations
+-- The beamformer data output (BDO) packetizes the beamlet data into UDP/IP packets.
+-- Description: see references
+-- References:
+-- [1] https://support.astron.nl/confluence/display/L2M/L5+SDPFW+Design+Document%3A+Beamformer
+-- [2] https://support.astron.nl/confluence/display/L2M/L4+SDPFW+Decision%3A+Multiple+beamlet+output+destinations
+-- [3] https://plm.astron.nl/polarion/#/project/LOFAR2System/wiki/L1%20Interface%20Control%20Documents/STAT%20to%20CEP%20ICD
 -- Remark:
 --
 -------------------------------------------------------------------------------
 
-library IEEE, common_lib, dp_lib, tr_10GbE_lib;
+library IEEE, common_lib, dp_lib, reorder_lib, tr_10GbE_lib;
 use IEEE.std_logic_1164.all;
 use IEEE.numeric_std.all;
 use common_lib.common_pkg.all;
@@ -38,13 +40,14 @@ use common_lib.common_field_pkg.all;
 use common_lib.common_mem_pkg.all;
 use common_lib.common_network_layers_pkg.all;
 use dp_lib.dp_stream_pkg.all;
+use reorder_lib.reorder_pkg.all;
 use work.sdp_pkg.all;
 
 entity sdp_beamformer_output is
   generic (
-    g_beamset_id : natural := 0
-
-   );
+    g_beamset_id    : natural := 0;
+    g_use_transpose : boolean := false
+  );
   port (
     dp_clk   : in  std_logic;
     dp_rst   : in  std_logic;
@@ -88,21 +91,51 @@ architecture str of sdp_beamformer_output is
   constant c_fifo_fill      : natural := c_sdp_cep_payload_nof_longwords;  -- 976
   constant c_fifo_size      : natural := true_log_pow2(c_sdp_cep_payload_nof_longwords) * c_sdp_N_beamsets;  -- 2048
 
-  signal snk_in_concat             : t_dp_sosi;
-  signal dp_repack_data_src_out    : t_dp_sosi;
-  signal dp_packet_merge_src_out   : t_dp_sosi;
-  signal dp_fifo_merge_src_out     : t_dp_sosi;
-  signal dp_fifo_merge_src_in      : t_dp_siso;
-  signal dp_pipeline_src_out       : t_dp_sosi;
-  signal dp_pipeline_src_in        : t_dp_siso;
-  signal dp_offload_tx_src_out     : t_dp_sosi;
-  signal dp_offload_tx_src_in      : t_dp_siso;
-  signal ip_checksum_src_out       : t_dp_sosi;
-  signal ip_checksum_src_in        : t_dp_siso;
-  signal dp_pipeline_ready_src_out : t_dp_sosi;
-  signal dp_pipeline_ready_src_in  : t_dp_siso;
-
-  signal dbg_bsn_offset        : std_logic;
+  -- Reorder c_nof_ch = c_nof_ch_sel = c_nof_ch_in
+  constant c_nof_ch         : natural := c_sdp_S_sub_bf * c_sdp_cep_nof_blocks_per_packet;
+
+  -- Dynamic reorder block size control input
+  -- . The data consists of 1 word = 1 ch, because 1 word contains 1 dual pol
+  --   beamlet.
+  -- . The input packet has nof_ch = nof_data_per_block * nof_blocks_per_packet
+  --   of data per packet.
+  -- . The transposed output packet will have blocks with nof_blocks_per_packet
+  --   data per block and nof_data_per_block blocks per packet.
+  signal nof_ch                : natural := c_nof_ch;
+  signal nof_data_per_block    : natural := c_sdp_S_sub_bf;
+  signal nof_blocks_per_packet : natural := c_sdp_cep_nof_blocks_per_packet;
+  signal select_copi           : t_mem_copi := c_mem_copi_rst;
+  signal select_cipo           : t_mem_cipo := c_mem_cipo_rst;
+  signal r_identity            : t_reorder_identity;
+  signal d_identity            : t_reorder_identity;
+  signal r_transpose           : t_reorder_transpose;
+  signal d_transpose           : t_reorder_transpose;
+
+  signal snk_in_concat              : t_dp_sosi;
+  signal snk_in_concat_data         : std_logic_vector(c_data_w - 1 downto 0);
+  signal snk_in_concat_re           : std_logic_vector(c_sdp_W_beamlet - 1 downto 0);
+  signal snk_in_concat_im           : std_logic_vector(c_sdp_W_beamlet - 1 downto 0);
+  signal dp_repack_beamlet_src_out  : t_dp_sosi;
+  signal dp_repack_beamlet_word     : t_sdp_dual_pol_beamlet_in_word;
+  signal dp_packet_merge_src_out    : t_dp_sosi;
+  signal dp_packet_merge_word       : t_sdp_dual_pol_beamlet_in_word;
+  signal dp_packet_reorder_src_out  : t_dp_sosi;
+  signal dp_packet_reorder_word     : t_sdp_dual_pol_beamlet_in_word;
+  signal reorder_busy               : std_logic;
+  signal dp_repack_longword_src_out : t_dp_sosi;
+  signal dp_repack_longword         : t_sdp_dual_pol_beamlet_in_longword;
+  signal dp_fifo_fill_eop_src_out   : t_dp_sosi;
+  signal dp_fifo_fill_eop_src_in    : t_dp_siso;
+  signal dp_pipeline_src_out        : t_dp_sosi;
+  signal dp_pipeline_src_in         : t_dp_siso;
+  signal dp_offload_tx_src_out      : t_dp_sosi;
+  signal dp_offload_tx_src_in       : t_dp_siso;
+  signal ip_checksum_src_out        : t_dp_sosi;
+  signal ip_checksum_src_in         : t_dp_siso;
+  signal dp_pipeline_ready_src_out  : t_dp_sosi;
+  signal dp_pipeline_ready_src_in   : t_dp_siso;
+
+  signal dbg_bsn_offset     : std_logic;
   signal payload_err        : std_logic_vector(0 downto 0);
   signal station_info       : std_logic_vector(15 downto 0) := (others => '0');
 
@@ -110,57 +143,72 @@ architecture str of sdp_beamformer_output is
   signal dp_offload_tx_hdr_fields : std_logic_vector(1023 downto 0) := (others => '0');
   signal dp_offload_tx_header     : t_sdp_cep_header;  -- to view dp_offload_tx_hdr_fields in Wave window
 begin
-  -------------------------------------------------------------------------------
+  -----------------------------------------------------------------------------
   -- Input rewiring: concatenate input complex fields to data field
   -- . dp_repack_data works with data fields only
-  -- . send beamlet data big endian with X.re part first, then X.im, Y.re, Y.im
-  -------------------------------------------------------------------------------
+  -- . send beamlet data big endian with X.re part first, then X.im, Y.re,
+  --   and Y.im, conform ICD STAT-CEP [3].
+  -----------------------------------------------------------------------------
+
+  -- Debug signals for view in Wave window
+  snk_in_concat_data <= snk_in_concat.data(c_data_w - 1 downto 0);
+  snk_in_concat_re <= in_sosi.re(c_sdp_W_beamlet - 1 downto 0);
+  snk_in_concat_im <= in_sosi.re(c_sdp_W_beamlet - 1 downto 0);
+
   p_snk_in_arr : process(in_sosi)
     variable v_ref_time : time := 0 ns;
   begin
     snk_in_concat <= in_sosi;
-    snk_in_concat.data(c_data_w - 1 downto 0) <= in_sosi.re(c_sdp_W_beamlet - 1 downto 0) & in_sosi.im(c_sdp_W_beamlet - 1 downto 0);
+    snk_in_concat.data(c_data_w - 1 downto 0) <= in_sosi.re(c_sdp_W_beamlet - 1 downto 0) &
+                                                 in_sosi.im(c_sdp_W_beamlet - 1 downto 0);
 
+    ---------------------------------------------------------------------------
     -- synthesis translate_off
-    -- Force BSN error in simulation to verify payload error in tb_lofar2_unb2c_sdp_station_bf.vhd,
-    -- this will cause two times payload errors, one when BSN goes wrong and one when BSN goes ok again.
+    -- Force BSN error in simulation to verify payload error in
+    -- tb_lofar2_unb2c_sdp_station_bf.vhd, this will cause two times payload
+    -- errors, one when BSN goes wrong and one when BSN goes ok again.
     dbg_bsn_offset <= '0';
     if v_ref_time = 0 ns then
       if in_sosi.sop = '1' then
-        -- Use start of input as reference time, rather than e.g. fixed 50 us, to be
-        -- independent of how long it takes for the tb to deliver the first block.
+        -- Use start of input as reference time, rather than e.g. fixed 50 us,
+        -- to be independent of how long it takes for the tb to deliver the
+        -- first block.
         v_ref_time := NOW;
-        -- Offset the v_ref_time to the second block of the c_sdp_cep_nof_blocks_per_packet
-        -- = 4 blocks that will be merged.
+        -- Offset the v_ref_time to the second block of the
+        -- c_sdp_cep_nof_blocks_per_packet = 4 blocks that will be merged.
         v_ref_time := v_ref_time + c_sdp_block_period * 1 ns;
       end if;
     elsif NOW > v_ref_time + 1 * c_sdp_cep_nof_blocks_per_packet * c_sdp_block_period * 1 ns and
           NOW < v_ref_time + 4 * c_sdp_cep_nof_blocks_per_packet * c_sdp_block_period * 1 ns then
-      -- Disturb BSN to cause merged payload error. Expected results for the merged blocks:
+      -- Disturb BSN to cause merged payload error. Expected results for the
+      -- merged blocks:
       -- . index 0 : First merged block bsn ok and payload_error = '0'.
-      -- . index 1 : bsn still ok, but payload error = '1', due to bsn++ after first block
-      -- . index 2,3 : bsn wrong due to bsn++, but payload error = '0', because all 4
-      --               merged blocks have incrementing bsn
-      -- . index 4 : bsn still wrong due to bsn++, and payload error = '1', because the bsn
-      --             is restored after first block, so the merged blocks do not have
-      --             incrementing bsn
+      -- . index 1 : bsn still ok, but payload error = '1', due to bsn++
+      --             after first block
+      -- . index 2,3 : bsn wrong due to bsn++, but payload error = '0',
+      --               because all 4 merged blocks have incrementing bsn
+      -- . index 4 : bsn still wrong due to bsn++, and payload error = '1',
+      --             because the bsn is restored after first block, so the
+      --             merged blocks do not have incrementing bsn
       -- . index >= 5 : bsn ok and payload_error = '0'.
       dbg_bsn_offset <= '1';
       snk_in_concat.bsn <= INCR_UVEC(in_sosi.bsn, 1);
     end if;
     -- synthesis translate_on
+    ---------------------------------------------------------------------------
   end process;
 
-  -------------------------------------------------------------------------------
+  -----------------------------------------------------------------------------
   -- dp_repack_data
-  -- . 16b -> 64b
-  -- . We don't need to flow control the source because we're going from 16b->64b
-  -------------------------------------------------------------------------------
-  u_dp_repack_data : entity dp_lib.dp_repack_data
+  -- . Repack 16b -> 32b, to get dual polarization beamlets of N_pol_bf *
+  --   N_complex * W_beamlet = 2 * 2 * 8 = 32b words
+  -- . No need to flow control the source, because repack into wider words
+  -----------------------------------------------------------------------------
+  u_dp_repack_data_beamlet : entity dp_lib.dp_repack_data
   generic map (
-    g_in_dat_w      => c_data_w,
-    g_in_nof_words  => 4,
-    g_out_dat_w     => c_longword_w,
+    g_in_dat_w      => c_data_w,  -- = 16b
+    g_in_nof_words  => c_sdp_N_pol_bf,  -- = 2
+    g_out_dat_w     => c_sdp_W_dual_pol_beamlet,  -- = 32b
     g_out_nof_words => 1
   )
   port map (
@@ -170,13 +218,17 @@ begin
     snk_in  => snk_in_concat,
     snk_out => OPEN,
 
-    src_out => dp_repack_data_src_out,
+    src_out => dp_repack_beamlet_src_out,
     src_in  => c_dp_siso_rdy
   );
 
-  -------------------------------------------------------------------------------
+  -- Debug signals for view in Wave window
+  -- [0:3] = [Xre, Xim, Yre, Yim]
+  dp_repack_beamlet_word <= unpack_data(dp_repack_beamlet_src_out.data(c_sdp_W_dual_pol_beamlet - 1 downto 0));
+
+  -----------------------------------------------------------------------------
   -- dp_packet_merge
-  -------------------------------------------------------------------------------
+  -----------------------------------------------------------------------------
   u_dp_packet_merge : entity dp_lib.dp_packet_merge
   generic map(
     g_nof_pkt       => c_sdp_cep_nof_blocks_per_packet,
@@ -187,17 +239,138 @@ begin
     clk     => dp_clk,
 
     snk_out => OPEN,
-    snk_in  => dp_repack_data_src_out,
+    snk_in  => dp_repack_beamlet_src_out,
 
     src_in  => c_dp_siso_rdy,
     src_out => dp_packet_merge_src_out
   );
 
-  -------------------------------------------------------------------------------
+  -- Debug signals for view in Wave window
+  dp_packet_merge_word <= unpack_data(dp_packet_merge_src_out.data(c_sdp_W_dual_pol_beamlet - 1 downto 0));
+
+  -----------------------------------------------------------------------------
+  -- reorder_col_select
+  -- . Reorder beamlet data from:
+  --     (int8) [t] [N_blocks_per_packet][S_sub_bf] [N_pol_bf][N_complex]
+  --   to:
+  --     (int8) [t] [S_sub_bf][N_blocks_per_packet] [N_pol_bf][N_complex]
+  --
+  -- . where (int8) [N_pol_bf][N_complex] = c_sdp_W_dual_pol_beamlet = 32b
+  --   dual polarization beamlet word
+  -- See tb_reorder_col_select_all.vhd for how to control col_select_copi/cipo.
+  -----------------------------------------------------------------------------
+  u_reorder_col_select : entity reorder_lib.reorder_col_select
+  generic map (
+    g_dsp_data_w  => c_sdp_W_dual_pol_beamlet / c_nof_complex,  -- = 32b / 2
+    g_nof_ch_in   => c_nof_ch,
+    g_nof_ch_sel  => c_nof_ch,
+    g_use_complex => false
+  )
+  port map (
+    dp_rst          => dp_rst,
+    dp_clk          => dp_clk,
+
+    reorder_busy    => reorder_busy,
+
+    -- Dynamic reorder block size control input
+    nof_ch_in       => nof_ch,
+    nof_ch_sel      => nof_ch,
+
+    -- Captured reorder block size control used for output_sosi
+    output_nof_ch_in  => open,
+    output_nof_ch_sel => open,
+
+    -- Memory Mapped
+    col_select_mosi => select_copi,
+    col_select_miso => select_cipo,
+
+    -- Streaming
+    input_sosi      => dp_packet_merge_src_out,
+    output_sosi     => dp_packet_reorder_src_out
+  );
+
+  -- Debug signals for view in Wave window
+  dp_packet_reorder_word <= unpack_data(dp_packet_reorder_src_out.data(c_sdp_W_dual_pol_beamlet - 1 downto 0));
+
+  -- Use synchronous reset in d signals
+  p_dp_clk_synchronous : process(dp_clk)
+  begin
+    if rising_edge(dp_clk) then
+      r_identity  <= d_identity;
+      r_transpose <= d_transpose;
+    end if;
+  end process;
+
+  -- Pass on beamlet data in original order or in transposed order
+  select_copi <= r_transpose.select_copi when g_use_transpose else r_identity.select_copi;
+
+  p_reorder_identity : process(dp_rst, select_cipo, nof_ch, r_identity)
+    variable v : t_reorder_identity;
+  begin
+    if select_cipo.waitrequest = '0' then
+      -- Read from reorder_col_select page
+      v := func_reorder_identity(nof_ch, r_identity);
+    else
+      -- No read, new reorder_col_select page not available yet
+      v := c_reorder_identity_rst;
+    end if;
+    -- Synchronous reset
+    if dp_rst = '1' then
+      v := c_reorder_identity_rst;
+    end if;
+    d_identity <= v;
+  end process;
+
+  p_reorder_transpose : process(dp_rst, select_cipo,
+                                nof_data_per_block, nof_blocks_per_packet, r_transpose)
+    variable v : t_reorder_transpose;
+  begin
+    if select_cipo.waitrequest = '0' then
+      -- Read from reorder_col_select page
+      v := func_reorder_transpose(nof_data_per_block, nof_blocks_per_packet, r_transpose);
+    else
+      -- No read, new reorder_col_select page not available yet
+      v := c_reorder_transpose_rst;
+    end if;
+    -- Synchronous reset
+    if dp_rst = '1' then
+      v := c_reorder_transpose_rst;
+    end if;
+    d_transpose <= v;
+  end process;
+
+  -----------------------------------------------------------------------------
+  -- dp_repack_data
+  -- . Repack 32b -> 64b, to get 64b longwords for network packet data
+  -- . No need to flow control the source, because repack into wider words
+  -----------------------------------------------------------------------------
+  u_dp_repack_data_longword : entity dp_lib.dp_repack_data
+  generic map (
+    g_in_dat_w      => c_sdp_W_dual_pol_beamlet,  -- = 32b
+    g_in_nof_words  => c_sdp_nof_beamlets_per_longword,  -- = 2
+    g_out_dat_w     => c_longword_w,  -- = 64b
+    g_out_nof_words => 1
+  )
+  port map (
+    clk     => dp_clk,
+    rst     => dp_rst,
+
+    snk_in  => dp_packet_reorder_src_out,
+    snk_out => OPEN,
+
+    src_out => dp_repack_longword_src_out,
+    src_in  => c_dp_siso_rdy
+  );
+
+  dp_repack_longword <= unpack_data(dp_repack_longword_src_out.data(c_longword_w - 1 downto 0));
+
+  -----------------------------------------------------------------------------
   -- FIFO
-  -------------------------------------------------------------------------------
+  -----------------------------------------------------------------------------
+  -- Pass on dp_repack_longword_src_out.err field (from u_dp_packet_merge) via
+  -- separate u_common_fifo_sc_err
   u_dp_fifo_fill_eop_sc : entity dp_lib.dp_fifo_fill_eop_sc
-  generic map (  -- pass on dp_packet_merge_src_out.err via u_common_fifo_sc_err
+  generic map (
     g_data_w         => c_longword_w,
     g_empty_w        => c_byte_w,
     g_use_empty      => true,
@@ -211,16 +384,16 @@ begin
   port map (
     clk     => dp_clk,
     rst     => dp_rst,
-    snk_in  => dp_packet_merge_src_out,
-    src_out => dp_fifo_merge_src_out,
-    src_in  => dp_fifo_merge_src_in
+    snk_in  => dp_repack_longword_src_out,
+    src_out => dp_fifo_fill_eop_src_out,
+    src_in  => dp_fifo_fill_eop_src_in
   );
 
-  -- Simple fifo to store the payload error bit at eop of FIFO input to be used at sop of FIFO
-  -- output, so that payload_err can then be used in the packet header.
-  -- Typically the u_dp_fifo_fill_eop will store between 0 and c_sdp_N_beamsets = 2 packets.
-  -- Choose g_nof_words > c_sdp_N_beamsets to have some margin compared to c_fifo_size of the
-  -- data FIFO.
+  -- Simple fifo to store the payload error bit at eop of FIFO input to be used
+  -- at sop of FIFO output, so that payload_err can then be used in the packet
+  -- header. Typically the u_dp_fifo_fill_eop will store between 0 and
+  -- c_sdp_N_beamsets = 2 packets. Choose g_nof_words > c_sdp_N_beamsets to
+  -- have some margin compared to c_fifo_size of the data FIFO.
   u_common_fifo_sc_err : entity common_lib.common_fifo_sc
   generic map (
     g_dat_w => 1,
@@ -229,10 +402,10 @@ begin
   port map (
     rst    => dp_rst,
     clk    => dp_clk,
-    wr_dat => dp_packet_merge_src_out.err(0 downto 0),
-    wr_req => dp_packet_merge_src_out.eop,
+    wr_dat => dp_repack_longword_src_out.err(0 downto 0),
+    wr_req => dp_repack_longword_src_out.eop,
     rd_dat => payload_err,
-    rd_req => dp_fifo_merge_src_out.sop
+    rd_req => dp_fifo_fill_eop_src_out.sop
   );
 
   -- Pipeline FIFO output to align payload_err at dp_pipeline_src_out.sop
@@ -244,21 +417,22 @@ begin
     rst        => dp_rst,
     clk        => dp_clk,
     -- ST sink
-    snk_out    => dp_fifo_merge_src_in,
-    snk_in     => dp_fifo_merge_src_out,
+    snk_out    => dp_fifo_fill_eop_src_in,
+    snk_in     => dp_fifo_fill_eop_src_out,
     -- ST source
     src_in     => dp_pipeline_src_in,
     src_out    => dp_pipeline_src_out
   );
 
-  -------------------------------------------------------------------------------
+  -----------------------------------------------------------------------------
   -- Assemble offload info
-  -------------------------------------------------------------------------------
-  -- Whether the dp_offload_tx_hdr_fields value is actually used in the Tx header depends on c_sdp_cep_hdr_field_sel
+  -----------------------------------------------------------------------------
+  -- Whether the dp_offload_tx_hdr_fields value is actually used in the Tx
+  -- header depends on c_sdp_cep_hdr_field_sel:
   -- . c_sdp_cep_hdr_field_sel = "111"&"111111111011"&"1110"&"1100"&"00000010"&"100110"&"0";
   --                              eth   ip             udp    app
-  --   where 0 = data path, 1 = MM controlled. The '0' fields are assigned here via dp_offload_tx_hdr_fields.
-  --   in order:
+  --   where 0 = data path, 1 = MM controlled. The '0' fields are assigned here
+  --   via dp_offload_tx_hdr_fields. In order:
   --     access   field
   --     MM       eth_dst_mac
   --     MM       eth_src_mac
@@ -280,8 +454,9 @@ begin
   --     MM       udp_src_port
   --     MM       udp_dst_port
   --     MM       udp_total_length
-  --        DP    udp_checksum --> default fixed 0, so not used, not calculated here or in tr_10GbE
-  --                               because would require store and forward
+  --        DP    udp_checksum --> default fixed 0, so not used, not calculated
+  --                               here or in tr_10GbE because would require
+  --                               store and forward
   --
   --     MM       sdp_marker
   --     MM       sdp_version_id
@@ -307,10 +482,12 @@ begin
   --
   --        DP    dp_bsn
 
-  -- Use MM programmable source MAC/IP/UDP instead of source MAC/IP/UDP based on node ID. This is necessary because
-  -- beamlet packets from different stations must have different source MAC/IP/UDP.
-  -- Hence the eth_src_mac, udp_src_port and ip_src_addr are ignored, because c_sdp_cep_hdr_field_sel selects MM control,
-  -- but keep the code to be able to enable using them by just changing the selection bit.
+  -- Use MM programmable source MAC/IP/UDP instead of source MAC/IP/UDP based
+  -- on node ID. This is necessary because beamlet packets from different
+  -- stations must have different source MAC/IP/UDP. Hence the eth_src_mac,
+  -- udp_src_port and ip_src_addr are ignored, because c_sdp_cep_hdr_field_sel
+  -- selects MM control, but keep the code to be able to enable using them by
+  -- just changing the selection bit.
 
   station_info <= sdp_info.antenna_field_index & sdp_info.station_id;
 
@@ -337,9 +514,9 @@ begin
   -- For viewing the header fields in wave window
   dp_offload_tx_header <= func_sdp_map_cep_header(dp_offload_tx_hdr_fields);
 
-  -------------------------------------------------------------------------------
+  -----------------------------------------------------------------------------
   -- dp_offload_tx_v3
-  -------------------------------------------------------------------------------
+  -----------------------------------------------------------------------------
   u_dp_offload_tx_v3 : entity dp_lib.dp_offload_tx_v3
   generic map (
     g_nof_streams   => 1,
@@ -369,9 +546,9 @@ begin
     hdr_fields_out_arr(0) => hdr_fields_out
   );
 
-  -------------------------------------------------------------------------------
+  -----------------------------------------------------------------------------
   -- tr_10GbE_ip_checksum
-  -------------------------------------------------------------------------------
+  -----------------------------------------------------------------------------
   u_tr_10GbE_ip_checksum : entity tr_10GbE_lib.tr_10GbE_ip_checksum
   port map (
     rst     => dp_rst,
@@ -384,9 +561,9 @@ begin
     src_in  => ip_checksum_src_in
   );
 
-  -------------------------------------------------------------------------------
+  -----------------------------------------------------------------------------
   -- dp_pipeline_ready to ease timing closure
-  -------------------------------------------------------------------------------
+  -----------------------------------------------------------------------------
   u_dp_pipeline_ready : entity dp_lib.dp_pipeline_ready
   port map(
     rst => dp_rst,
@@ -398,9 +575,9 @@ begin
     src_out => dp_pipeline_ready_src_out
   );
 
-  -------------------------------------------------------------------------------
+  -----------------------------------------------------------------------------
   -- mms_dp_xonoff
-  -------------------------------------------------------------------------------
+  -----------------------------------------------------------------------------
   u_mms_dp_xonoff : entity dp_lib.mms_dp_xonoff
   generic map(
     g_default_value => '0'
diff --git a/applications/lofar2/libraries/sdp/src/vhdl/sdp_pkg.vhd b/applications/lofar2/libraries/sdp/src/vhdl/sdp_pkg.vhd
index e9dc68716674a535bb5dd91fa0448a5ab3eb1180..3fc98280108e517370dcb6c98016a8c380d54dde 100644
--- a/applications/lofar2/libraries/sdp/src/vhdl/sdp_pkg.vhd
+++ b/applications/lofar2/libraries/sdp/src/vhdl/sdp_pkg.vhd
@@ -135,12 +135,25 @@ package sdp_pkg is
   constant c_sdp_X_sq                    : natural := c_sdp_S_pn * c_sdp_S_pn;  -- = 144
   constant c_sdp_block_period            : natural := c_sdp_N_fft * 1000 / c_sdp_f_adc_MHz;  -- = 5120 [ns]
   constant c_sdp_N_beamlets_sdp          : natural := c_sdp_N_beamsets * c_sdp_S_sub_bf;  -- = 976
+  constant c_sdp_W_dual_pol_beamlet      : natural := c_sdp_N_pol_bf * c_nof_complex * c_sdp_W_beamlet;  -- 2 * 2 * 8 = 32b
+
+  constant c_sdp_nof_beamlets_per_longword : natural := 2;  -- 2 dual pol, complex, 8bit beamlets fit in 1 64bit longword
 
   -- . unit weights
   constant c_sdp_unit_sub_weight      : natural := 2**c_sdp_W_sub_weight_fraction;  -- 2**13, so range +-4.0 for 16 bit signed weight
   constant c_sdp_unit_bf_weight       : natural := 2**c_sdp_W_bf_weight_fraction;  -- 2**14, so range +-2.0 for 16 bit signed weight
   constant c_sdp_unit_beamlet_scale   : natural := 2**c_sdp_W_beamlet_scale_fraction;  -- 2**15, so range +-1.0 for 16 bit signed weight
 
+  -- One dual polarization beamlet fits in a 32b word:
+  -- [0:3] = [Xre, Xim, Yre, Yim] parts of c_sdp_W_beamlet = 8 bit, so
+  -- c_sdp_N_pol_bf * c_nof_complex * c_sdp_W_beamlet = 2 * 2 = 4 octets
+  subtype t_sdp_dual_pol_beamlet_in_word is t_slv_8_arr(0 to 3);
+
+  -- Two dual polarization beamlets fit in a 64b longword:
+  -- [0:7] = [0:3,4:7] = [Xre, Xim, Yre, Yim,  Xre, Xim, Yre, Yim], so
+  -- c_sdp_nof_beamlets_per_longword * c_sdp_N_pol_bf * c_nof_complex = 2 * 2 * 2 = 8 octets
+  subtype t_sdp_dual_pol_beamlet_in_longword is t_slv_8_arr(0 to 7);
+
   -----------------------------------------------------------------------------
   -- PFB
   -----------------------------------------------------------------------------
@@ -407,17 +420,18 @@ package sdp_pkg is
 
   constant c_sdp_cep_nof_blocks_per_packet     : natural := 4;  -- number of time blocks of beamlets per output packet
   constant c_sdp_cep_nof_beamlets_per_block    : natural := c_sdp_S_sub_bf;  -- number of dual pol beamlets (c_sdp_N_pol_bf = 2)
-  constant c_sdp_cep_nof_beamlets_per_longword : natural := 2;  -- 2 dual pol, complex, 8bit beamlets fit in 1 64bit longword
-  constant c_sdp_cep_payload_nof_longwords     : natural := c_sdp_cep_nof_blocks_per_packet * c_sdp_cep_nof_beamlets_per_block / c_sdp_cep_nof_beamlets_per_longword;  -- = 976
+  constant c_sdp_cep_nof_beamlets_per_packet   : natural := c_sdp_cep_nof_blocks_per_packet * c_sdp_cep_nof_beamlets_per_block;
+  constant c_sdp_cep_payload_nof_longwords     : natural := c_sdp_cep_nof_beamlets_per_packet / c_sdp_nof_beamlets_per_longword;  -- = 976
   constant c_sdp_cep_packet_nof_longwords      : natural := ceil_div(c_sdp_cep_header_len, c_longword_sz) + c_sdp_cep_payload_nof_longwords;  -- without tail CRC, the CRC is applied by 10GbE MAC
 
-  constant c_sdp_cep_nof_hdr_fields : natural := 3 + 12 + 4 + 4 + 9 + 6 + 1;  -- c_sdp_cep_header_len / c_longword_sz = 74 / 8 = 9.25 64b words = 592b
+  constant c_sdp_cep_nof_hdr_fields : natural := 3 + 12 + 4 + 4 + 9 + 6 + 1;  -- = 39 fields
+  -- c_sdp_cep_header_len / c_longword_sz = 74 / 8 = 9.25 64b words = 592b
   -- hdr_field_sel bit selects where the hdr_field value is set:
   -- . 0 = data path controlled, value is set in sdp_beamformer_output.vhd, so field_default() is not used.
   -- . 1 = MM controlled, value is set via MM or by the field_default(), so any data path setting in
   --       sdp_beamformer_output.vhd is not used.
   -- Remarks: see remarks at c_sdp_stat_nof_hdr_fields.
-  --                                                                                            eth   ip             udp    app
+  --                                                                                             eth     ip               udp      app
   constant c_sdp_cep_hdr_field_sel  : std_logic_vector(c_sdp_cep_nof_hdr_fields - 1 downto 0) := "111" & "111111111011" & "1110" & "1100" & "100000010" & "100110" & "0";  -- current
 --CONSTANT c_sdp_cep_hdr_field_sel  : STD_LOGIC_VECTOR(c_sdp_cep_nof_hdr_fields-1 DOWNTO 0) := "101"&"111111111001"&"0111"&"1100"&"100000010"&"000110"&"0";  -- previous 27 sep 2022
 --CONSTANT c_sdp_cep_hdr_field_sel  : STD_LOGIC_VECTOR(c_sdp_cep_nof_hdr_fields-1 DOWNTO 0) := "100"&"000000010001"&"0100"&"0100"&"100000000"&"101000"&"0";  -- initial
diff --git a/applications/lofar2/libraries/sdp/src/vhdl/sdp_station.vhd b/applications/lofar2/libraries/sdp/src/vhdl/sdp_station.vhd
index e4c467900159aeef3f5d866a652e9dd00cf63ddf..6eb6479310aaf40180e891d1d9f5fe5e049f64b3 100644
--- a/applications/lofar2/libraries/sdp/src/vhdl/sdp_station.vhd
+++ b/applications/lofar2/libraries/sdp/src/vhdl/sdp_station.vhd
@@ -66,6 +66,7 @@ entity sdp_station is
     g_use_oversample         : boolean := false;
     g_use_xsub               : boolean := true;
     g_use_bf                 : boolean := true;
+    g_use_bdo_transpose      : boolean := false;
     g_use_ring               : boolean := true;
     g_P_sq                   : natural := 1
   );
@@ -909,6 +910,7 @@ begin
         g_sim                    => g_sim,
         g_sim_sdp                => g_sim_sdp,
         g_beamset_id             => beamset_id,
+        g_use_bdo_transpose      => g_use_bdo_transpose,
         g_scope_selected_beamlet => g_scope_selected_subband,
         g_subband_raw_dat_w      => c_subband_raw_dat_w,
         g_subband_raw_fraction_w => c_subband_raw_fraction_w
diff --git a/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_pkg.vhd b/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_pkg.vhd
index 47cc9348b285cc47fc7700505b731c141889f73c..e49931e96a21654d8858ac39830149adf432b777 100644
--- a/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_pkg.vhd
+++ b/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_pkg.vhd
@@ -25,10 +25,11 @@
 -- . This package contains specific constants, functions for sdp test benches.
 -- Description:
 -------------------------------------------------------------------------------
-library IEEE, common_lib;
+library IEEE, common_lib, reorder_lib;
 use IEEE.std_logic_1164.all;
 use common_lib.common_pkg.all;
 use common_lib.common_network_layers_pkg.all;
+use reorder_lib.reorder_pkg.all;
 use work.sdp_pkg.all;
 
 package tb_sdp_pkg is
@@ -102,6 +103,27 @@ package tb_sdp_pkg is
                                rem_subband_ampl, rem_subband_phase, rem_bf_gain, rem_bf_phase : real;
                                nof_rem : natural)
                                return t_real_arr;  -- 0:3 = ampl, phase, re, im
+
+  -----------------------------------------------------------------------------
+  -- Beamlet output packet
+  -----------------------------------------------------------------------------
+  -- beamlet part index [0 : 3] of X, Y, X, Y in network longword:
+  -- - use separate array for re and for im:
+  --   . re[0 : 3] at 0, 2, 4, 6 in longword
+  --   . im[0 : 3] at 1, 3, 5, 7 in longword
+  subtype t_sdp_beamlet_part_arr is t_slv_8_arr(0 to c_sdp_nof_beamlets_per_longword * c_sdp_N_pol_bf - 1);
+
+  -- beamlet part index in packet with 4 blocks [0 : 4 * 488 * 2 - 1] = [0 : 3903]
+  -- . use separate list for re and for im
+  subtype t_sdp_beamlet_packet_list is t_slv_8_arr(0 to c_sdp_cep_nof_beamlets_per_packet * c_sdp_N_pol_bf - 1);
+
+  -- beamlet part index in one block [0 : 488 * 2 - 1] =  [0 : 975]
+  -- . use separate list for re and for im
+  subtype t_sdp_beamlet_block_list is t_slv_8_arr(0 to c_sdp_cep_nof_beamlets_per_block * c_sdp_N_pol_bf - 1);
+
+  function func_sdp_bdo_transpose_packet(nof_blocks_per_packet : natural;
+                                         nof_beamlets_per_block : natural;
+                                         packet_list : t_sdp_beamlet_packet_list) return t_sdp_beamlet_packet_list;
 end package tb_sdp_pkg;
 
 package body tb_sdp_pkg is
@@ -469,4 +491,37 @@ package body tb_sdp_pkg is
     return v_tuple;
   end;
 
+  -- BDO transpose:
+  -- . See sdp/src/python/test_func_sdp_bdo_transpose_packet.py to verify that
+  --   v_out = func_sdp_bdo_transpose_packet(4, 488, v_in) yields the expected v_out.
+  -- . See data repacking section in:
+  --   https://support.astron.nl/confluence/pages/viewpage.action?spaceKey=L2M&title=L4+SDPFW+Decision%3A+Multiple+beamlet+output+destinations
+  -- . Use separate packet_list for re and im. The list contain 4 * 488 * 2 = 3904
+  --   beamlet part octet values.
+  -- input packet_list:
+  -- . blk               0,            1,            2,            3,  for nof_blocks_per_packet = 4
+  -- . blet   0,   ... 487, 0,   ... 487, 0,   ... 487, 0,   ... 487,  for nof_beamlets_per_block = 488
+  -- . pol_bf X,Y, ... X,Y, X,Y, ... X,Y, X,Y, ... X,Y, X,Y, ... X,Y,  for N_pol_bf = 2, X,Y = 0,1
+  -- . v_in   0,   ... 975, 976, ...1951,1952, ...2927,2928, ...3903,  input list index
+  -- return v_list = transposed packet_list:
+  -- . pol_bf              X,Y,X,Y,X,Y,X,Y, ..., X,Y,X,Y,X,Y,X,Y,  for N_pol_bf = 2, X,Y = 0,1
+  -- . blk                   0,  1,  2,  3, ...,   0,  1,  2,  3,  for nof_blocks_per_packet = 4
+  -- . blet                              0, ...,             487,  for nof_beamlets_per_block = 488
+  -- . v_out 0,1,  976, 977, 1952,1953, 2928,2929,
+  --         2,3,  978, 979, 1954,1955, 2930,2931,
+  --         ...,       ...,       ...,       ...,
+  --     972,973, 1948,1949, 2924,2925, 3900,3901,
+  --     974,775, 1950,1951, 2926,2927, 3902,3903, output list index
+  function func_sdp_bdo_transpose_packet(nof_blocks_per_packet : natural;
+                                         nof_beamlets_per_block : natural;
+                                         packet_list : t_sdp_beamlet_packet_list) return t_sdp_beamlet_packet_list is
+    variable v_list : t_sdp_beamlet_packet_list;
+  begin
+    v_list := func_reorder_transpose_packet(nof_blocks_per_packet,
+                                            nof_beamlets_per_block,
+                                            c_sdp_N_pol_bf,
+                                            packet_list);
+    return v_list;
+  end func_sdp_bdo_transpose_packet;
+
 end tb_sdp_pkg;
diff --git a/libraries/base/common/src/vhdl/common_pkg.vhd b/libraries/base/common/src/vhdl/common_pkg.vhd
index e1c2f98ffbcb2ba3cbf1ce2ba0ceff36e2e5250c..3820f512559300a6e55401b844f04174ec04f88c 100644
--- a/libraries/base/common/src/vhdl/common_pkg.vhd
+++ b/libraries/base/common/src/vhdl/common_pkg.vhd
@@ -223,6 +223,10 @@ package common_pkg is
   function unpack_complex_im(data : integer;          w : natural) return integer;  -- pack order: im & re
   function unpack_complex_im(data : std_logic_vector; w : natural) return integer;  -- pack order: im & re
 
+  -- data[left : right] --> t_slv_8_arr[left : right]
+  -- if necessary, call flip() on returned t_slv_8_arr to change the order
+  function unpack_data(data : std_logic_vector) return t_slv_8_arr;
+
   function atan2(Y, X: real) return real;  -- = ARCTAN(Y, X) but returns 0 when Y = X = 0, without reporting Error: ARCTAN(0.0, 0.0) is undetermined
 
   function to_natural_arr(n : t_integer_arr; to_zero : boolean) return t_natural_arr;  -- if to_zero=TRUE then negative numbers are forced to zero, otherwise they will give a compile range error
@@ -346,7 +350,7 @@ package common_pkg is
   function array_init(init,             nof, incr        : integer) return t_slv_32_arr;
   function array_init(init,             nof, width       : natural) return std_logic_vector;  -- useful to init an unconstrained std_logic_vector with repetitive content
   function array_init(init,             nof, width, incr : natural) return std_logic_vector;  -- useful to init an unconstrained std_logic_vector with incrementing content
-  function array_sinit(init : integer;   nof, width       : natural) return std_logic_vector;  -- useful to init an unconstrained std_logic_vector with repetitive content
+  function array_sinit(init : integer;  nof, width       : natural) return std_logic_vector;  -- useful to init an unconstrained std_logic_vector with repetitive content
 
   function init_slv_64_matrix(nof_a, nof_b, k : integer) return t_slv_64_matrix;  -- initialize all elements in t_slv_64_matrix to value k
 
@@ -541,6 +545,7 @@ package common_pkg is
   function flip(a : std_logic_vector)  return std_logic_vector;  -- bit flip a vector, map a[h:0] to [0:h]
   function flip(a, w : natural)        return natural;  -- bit flip a vector, map a[h:0] to [0:h], h = w-1
   function flip(a : t_slv_32_arr)      return t_slv_32_arr;
+  function flip(a : t_slv_8_arr)       return t_slv_8_arr;
   function flip(a : t_integer_arr)     return t_integer_arr;
   function flip(a : t_natural_arr)     return t_natural_arr;
   function flip(a : t_nat_natural_arr) return t_nat_natural_arr;
@@ -819,6 +824,20 @@ package body common_pkg is
     return TO_SINT(v_complex_slv(c_complex_w - 1 downto w));  -- Im in MS part
   end;
 
+  function unpack_data(data : std_logic_vector) return t_slv_8_arr is
+    constant c_nof_octets : natural := data'length / c_octet_w;
+    variable v_data       : std_logic_vector(data'length - 1 downto 0) := data;
+    variable v_a          : t_slv_8_arr(c_nof_octets - 1 downto 0);
+  begin
+    assert data'length = c_nof_octets * c_octet_w
+      report "common_pkg: unpack_data must be integer number of octest"
+      severity FAILURE;
+    for I in v_a'range loop
+      v_a(I) := v_data((I + 1) * c_octet_w - 1 downto I * c_octet_w);
+    end loop;
+    return v_a;
+  end;
+
   function atan2(Y, X: real) return real is
   begin
     if Y = 0.0 and X = 0.0 then
@@ -2791,6 +2810,16 @@ package body common_pkg is
     return v_b;
   end;
 
+  function flip(a : t_slv_8_arr) return t_slv_8_arr is
+    variable v_a : t_slv_8_arr(a'length - 1 downto 0) := a;
+    variable v_b : t_slv_8_arr(a'length - 1 downto 0);
+  begin
+    for I in v_a'range loop
+      v_b(a'length - 1 - I) := v_a(I);
+    end loop;
+    return v_b;
+  end flip;
+
   function flip(a : t_integer_arr) return t_integer_arr is
     variable v_a : t_integer_arr(a'length - 1 downto 0) := a;
     variable v_b : t_integer_arr(a'length - 1 downto 0);
diff --git a/libraries/base/dp/tb/vhdl/dp_stream_verify.vhd b/libraries/base/dp/tb/vhdl/dp_stream_verify.vhd
index 374ccc129dc01a07aa6b38b5c702eddcccabe463..df8c655e04b253b189951166fecab8e7dd6aaeaf 100644
--- a/libraries/base/dp/tb/vhdl/dp_stream_verify.vhd
+++ b/libraries/base/dp/tb/vhdl/dp_stream_verify.vhd
@@ -51,7 +51,8 @@ entity dp_stream_verify is
   generic (
     g_instance_nr         : natural := 0;
     -- flow control
-    g_random_w            : natural := 14;  -- use different random width for stimuli and for verify to have different random sequences
+    -- . use different random width for stimuli and for verify to have different random sequences
+    g_random_w            : natural := 14;
     g_pulse_active        : natural := 1;
     g_pulse_period        : natural := 2;
     g_flow_control        : t_dp_flow_control_enum := e_active;  -- always active, random or pulse flow control
@@ -85,13 +86,19 @@ architecture tb of dp_stream_verify is
   constant c_rl                       : natural := 1;
   constant c_no_dut                   : boolean := true;
 
-  signal random                     : std_logic_vector(g_random_w - 1 downto 0) := TO_UVEC(g_instance_nr, g_random_w);  -- use different initialization to have different random sequences per stream
+  -- Use different initialization by g_instance_nr, to have different random sequences per stream
+  signal random                     : std_logic_vector(g_random_w - 1 downto 0) := TO_UVEC(g_instance_nr, g_random_w);
   signal pulse                      : std_logic;
   signal pulse_en                   : std_logic := '1';
 
   signal i_snk_out                  : t_dp_siso := c_dp_siso_rdy;
   signal prev_snk_out               : t_dp_siso;
-  signal hold_snk_in_data           : std_logic_vector(c_dp_stream_data_w - 1 downto 0);  -- used to hold valid data for verify at verify_expected_snk_in_evt
+
+  -- hold valid data for verify at verify_expected_snk_in_evt
+  signal hold_snk_in_data           : std_logic_vector(c_dp_stream_data_w - 1 downto 0);
+  signal hold_snk_in_re             : std_logic_vector(c_dp_stream_dsp_data_w - 1 downto 0);
+  signal hold_snk_in_im             : std_logic_vector(c_dp_stream_dsp_data_w - 1 downto 0);
+
   signal snk_in_data                : std_logic_vector(g_in_dat_w - 1 downto 0);
   signal prev_snk_in                : t_dp_sosi;
 
@@ -133,8 +140,10 @@ begin
   proc_dp_verify_value("snk_in.eop",              clk, verify_expected_snk_in_evt.eop,     expected_snk_in.eop,     detected_snk_in_ctrl.eop);
   proc_dp_verify_value("snk_in.valid",            clk, verify_expected_snk_in_evt.valid,   expected_snk_in.valid,   detected_snk_in_ctrl.valid);
 
-  -- Verify that the last sosi data, bsn, channel and err fields are correct
+  -- Verify that the last sosi data, re, im, bsn, channel and err fields are correct
   proc_dp_verify_value("snk_in.data",    e_equal, clk, verify_expected_snk_in_evt.data,    expected_snk_in.data,    hold_snk_in_data);
+  proc_dp_verify_value("snk_in.re",      e_equal, clk, verify_expected_snk_in_evt.re,      expected_snk_in.re,      hold_snk_in_re);
+  proc_dp_verify_value("snk_in.im",      e_equal, clk, verify_expected_snk_in_evt.im,      expected_snk_in.im,      hold_snk_in_im);
   proc_dp_verify_value("snk_in.bsn",     e_equal, clk, verify_expected_snk_in_evt.bsn,     expected_snk_in.bsn,     snk_in.bsn);
   proc_dp_verify_value("snk_in.channel", e_equal, clk, verify_expected_snk_in_evt.channel, expected_snk_in.channel, snk_in.channel);
   proc_dp_verify_value("snk_in.err",     e_equal, clk, verify_expected_snk_in_evt.err,     expected_snk_in.err,     snk_in.err);
@@ -191,4 +200,6 @@ begin
   snk_in_data  <= snk_in.data(g_in_dat_w - 1 downto 0);
 
   hold_snk_in_data <= snk_in.data when snk_in.valid = '1';
+  hold_snk_in_re   <= snk_in.re   when snk_in.valid = '1';
+  hold_snk_in_im   <= snk_in.im   when snk_in.valid = '1';
 end tb;
diff --git a/libraries/base/reorder/hdllib.cfg b/libraries/base/reorder/hdllib.cfg
index d7fa2674a8ec6283693d91a54b40528c8abddaf9..1cb4bbb708592aeeee4e3e603bd326f28f78b357 100644
--- a/libraries/base/reorder/hdllib.cfg
+++ b/libraries/base/reorder/hdllib.cfg
@@ -49,6 +49,8 @@ synth_files =
     src/vhdl/mms_reorder_rewire.vhd
 
 test_bench_files =
+    tb/vhdl/reorder_pkg_test.vhd
+    tb/vhdl/reorder_pkg_test_test.vhd
     tb/vhdl/tb_reorder_transpose.vhd
     tb/vhdl/tb_reorder_col.vhd
     tb/vhdl/tb_tb_reorder_col.vhd
@@ -62,6 +64,8 @@ test_bench_files =
     tb/vhdl/tb_tb_reorder_col_select_all.vhd
 
 regression_test_vhdl =
+    tb/vhdl/reorder_pkg_test.vhd
+    tb/vhdl/reorder_pkg_test_test.vhd
     tb/vhdl/tb_tb_reorder_col_wide_row_select.vhd
     tb/vhdl/tb_tb_reorder_col.vhd
     tb/vhdl/tb_tb_reorder_col_select_all.vhd
diff --git a/libraries/base/reorder/src/vhdl/reorder_pkg.vhd b/libraries/base/reorder/src/vhdl/reorder_pkg.vhd
index caad085b16c31d06a5e57bd1333a5279dc2f82a4..4db2a54945e51eefcdfdb7232dcb6eaf9382ba37 100644
--- a/libraries/base/reorder/src/vhdl/reorder_pkg.vhd
+++ b/libraries/base/reorder/src/vhdl/reorder_pkg.vhd
@@ -76,37 +76,203 @@ package reorder_pkg is
     (0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,0)
   );
 
+  -----------------------------------------------------------------------------
+  -- Reorder transpose
+  -----------------------------------------------------------------------------
+
   -- Block and data counters to derive select_copi.address for transpose
-  -- reording between nof_blocks_per_packet and nof_data_per_block.
+  -- reordering between nof_blocks_per_packet and nof_data_per_block.
+  -- Optionally the data can consist of multiple words at consecutive
+  -- addresses. If data is one word, then word_cnt record field is not used.
   type t_reorder_transpose is record
     select_copi : t_mem_copi;
     addr        : natural;
     blk_cnt     : natural;
-    data_cnt    : natural;
+    blk_offset  : natural;
+    dat_cnt     : natural;
+    dat_offset  : natural;
+    word_cnt    : natural;
   end record;
 
-  constant c_reorder_transpose_rst : t_reorder_transpose := (c_mem_copi_rst, 0, 0, 0);
+  constant c_reorder_transpose_rst : t_reorder_transpose := (c_mem_copi_rst, 0, 0, 0, 0, 0, 0);
+
+  -- Input packet has nof_ch = nof_data_per_block * nof_blocks_per_packet of
+  -- data per packet. The order of the words per nof_words_per_data is
+  -- preserved.
+
+  -- Transpose functions that operate on list of data indices
+  function func_reorder_transpose_indices(nof_blocks_per_packet : natural;
+                                          nof_data_per_block    : natural;
+                                          nof_words_per_data    : natural) return t_natural_arr;
+
+  function func_reorder_transpose_indices_impl(nof_blocks_per_packet : natural;
+                                               nof_data_per_block    : natural;
+                                               nof_words_per_data    : natural) return t_natural_arr;
+  -- Transpose function that operates on a packet
+  function func_reorder_transpose_packet(nof_blocks_per_packet : natural;
+                                         nof_data_per_block    : natural;
+                                         nof_words_per_data    : natural;
+                                         packet_list : t_slv_8_arr) return t_slv_8_arr;
+
+  -- Transpose functions that operate sequentially to determine the read
+  -- transpose.select_copi.address
+  -- . The transpose.select_copi.address order will yield transposed output,
+  --   with nof_blocks_per_packet data per block, and with nof_data_per_block
+  --   number of blocks per packet.
+  function func_reorder_transpose(nof_blocks_per_packet : natural;
+                                  nof_data_per_block    : natural;
+                                  nof_words_per_data    : natural;
+                                  transpose             : t_reorder_transpose)
+                                  return t_reorder_transpose;
+
+  -- Alternative implementation using a look up list:
+  -- func_reorder_transpose_look_up() = func_reorder_transpose()
+  function func_reorder_transpose_look_up(nof_blocks_per_packet : natural;
+                                          nof_data_per_block    : natural;
+                                          nof_words_per_data    : natural;
+                                          transpose             : t_reorder_transpose)
+                                          return t_reorder_transpose;
 
-  -- Input block has nof_ch = nof_data_per_block * nof_blocks_per_packet
-  -- data per packet. The transpose.select_copi.address order will yield
-  -- transposed output, with nof_blocks_per_packet and nof_data_per_block.
+  -- Variant with nof_words_per_data = 1
   function func_reorder_transpose(nof_blocks_per_packet : natural;
                                   nof_data_per_block    : natural;
                                   transpose             : t_reorder_transpose)
                                   return t_reorder_transpose;
 
+  -----------------------------------------------------------------------------
+  -- Reorder identity
+  -- . so no reordering, same out as in, but delayed due to dual page
+  --   buffering of reorder
+  -----------------------------------------------------------------------------
+
+  -- Pass on input to output in same order.
+  type t_reorder_identity is record
+    select_copi : t_mem_copi;
+    addr        : natural;
+  end record;
+
+  constant c_reorder_identity_rst : t_reorder_identity := (c_mem_copi_rst, 0);
+
+  -- Identity function that operates sequentially to determine the read
+  -- identity.select_copi.address, which is an incrementing address range.
+  function func_reorder_identity(nof_ch_per_packet : natural;
+                                 identity          : t_reorder_identity)
+                                 return t_reorder_identity;
+
 end reorder_pkg;
 
 package body reorder_pkg is
+  -- Determine transpose index for input packet_index
+  -- . The transpose is between nof_blocks_per_packet and nof_data_per_block.
+  --   Doing transpose again with swapped nof_blocks_per_packet and
+  --   nof_data_per_block, yields original order.
+  -- . The order of the words per nof_words_per_data is preserved.
+  -- Example for:
+  --   . blk in range(nof_blocks_per_packet = 4)
+  --   . dat in range(nof_data_per_block = 488)
+  --   . wi in range(nof_words_per_data = 2)
+  --   input packet_index v_in:
+  --   . blk             0,            1,            2,            3
+  --   . dat    0, ... 487, 0,   ... 487, 0,   ... 487, 0,   ... 487
+  --   . wi   0,1, ... 0,1, 0,1, ... 0,1, 0,1, ... 0,1, 0,1, ... 0,1
+  --   . v_in   0, ... 975, 976, ...1951,1952, ...2927,2928, ...3903
+  --   return index v_out:
+  --   . wi                         0,1,0,1,0,1,0,1, ..., 0,1,0,1,0,1,0,1
+  --   . blk                          0,  1,  2,  3, ...,   0,  1,  2,  3
+  --   . dat                                      0, ...,             487
+  --   . v_out 0,1,  976, 977, 1952,1953, 2928,2929,
+  --           2,3,  978, 979, 1954,1955, 2930,2931,
+  --           ...,       ...,       ...,       ...,
+  --       972,973, 1948,1949, 2924,2925, 3900,3901,
+  --       974,775, 1950,1951, 2926,2927, 3902,3903
+  function func_reorder_transpose_indices(nof_blocks_per_packet : natural;
+                                          nof_data_per_block    : natural;
+                                          nof_words_per_data    : natural) return t_natural_arr is
+    constant c_nof_ch  : natural := nof_blocks_per_packet * nof_data_per_block * nof_words_per_data;
+    variable v_arr     : t_natural_arr(0 to c_nof_ch - 1);
+    variable v_in      : natural;
+    variable v_out     : natural;
+    variable v_ch      : natural := 0;
+  begin
+    -- Use outer loop blk and inner loop dat to have v_in = v_ch.
+    -- Use outer loop dat and inner loop blk to have v_out = v_ch.
+    -- For the return v_arr it does not matter which loop is the outer loop
+    -- or the inner loop. Choose to have v_out = v_ch, because then the
+    -- values in v_arr are calculated in output order, similar as if they
+    -- are output sequentially by reorder_col_select.
+    for dat in 0 to nof_data_per_block - 1 loop
+      for blk in 0 to nof_blocks_per_packet - 1 loop
+        for wi in 0 to nof_words_per_data - 1 loop
+          -- v_out is the transpose index for index v_in, so output value at
+          -- index v_out becomes input value at index v_in
+          v_in := (blk * nof_data_per_block + dat) * nof_words_per_data + wi;
+          v_out := (dat * nof_blocks_per_packet + blk) * nof_words_per_data + wi;
+          assert v_out = v_ch report "Wrong index in func_reorder_transpose_indices()" severity failure;
+          v_arr(v_out) := v_in;
+          v_ch := v_ch + 1;
+        end loop;
+      end loop;
+    end loop;
+    return v_arr;
+  end;
+
+  -- The func_reorder_transpose_indices_impl() yields the same as
+  -- func_reorder_transpose_indices(), except that it uses only
+  -- additions to calculate the indices, so no multiplications in
+  -- the loops.
+  function func_reorder_transpose_indices_impl(nof_blocks_per_packet : natural;
+                                               nof_data_per_block    : natural;
+                                               nof_words_per_data    : natural) return t_natural_arr is
+    constant c_nof_ch              : natural := nof_blocks_per_packet * nof_data_per_block * nof_words_per_data;
+    constant c_nof_words_per_block : natural := nof_words_per_data * nof_data_per_block;
+    variable v_blk_offset  : natural := 0;
+    variable v_dat_offset  : natural := 0;
+    variable v_arr     : t_natural_arr(0 to c_nof_ch - 1);
+    variable v_ch      : natural := 0;
+  begin
+    for dat in 0 to nof_data_per_block - 1 loop
+      for blk in 0 to nof_blocks_per_packet - 1 loop
+        for wi in 0 to nof_words_per_data - 1 loop
+          v_arr(v_ch) := v_blk_offset + v_dat_offset + wi;
+          v_ch := v_ch + 1;
+        end loop;
+        v_blk_offset := v_blk_offset + c_nof_words_per_block;
+      end loop;
+      v_blk_offset := 0;
+      v_dat_offset := v_dat_offset + nof_words_per_data;
+    end loop;
+    return v_arr;
+  end;
+
+  -- Apply func_reorder_transpose_indices() on a packet
+  function func_reorder_transpose_packet(nof_blocks_per_packet : natural;
+                                         nof_data_per_block    : natural;
+                                         nof_words_per_data    : natural;
+                                         packet_list : t_slv_8_arr) return t_slv_8_arr is
+    constant c_nof_ch       : natural := nof_blocks_per_packet * nof_data_per_block * nof_words_per_data;
+    constant c_look_up_list : t_natural_arr(0 to c_nof_ch - 1) :=
+      func_reorder_transpose_indices(nof_blocks_per_packet,
+                                     nof_data_per_block,
+                                     nof_words_per_data);
+    variable v_list : t_slv_8_arr(packet_list'range);
+  begin
+    assert c_nof_ch = packet_list'length report "Wrong packet_list length" severity error;
+    for ch in 0 to c_nof_ch - 1 loop
+      v_list(ch) := packet_list(c_look_up_list(ch));
+    end loop;
+    return v_list;
+  end func_reorder_transpose_packet;
+
   -- A transpose process and an undo transpose process can both use
   -- func_reorder_transpose(), by swapping the transpose dimensions.
   -- For example, to get transposed output with:
   -- . g_nof_blocks_per_packet = 3 and
   -- . g_nof_data_per_block = 5
+  -- . g_nof_words_per_data = 1
   -- the p_comb_transpose selects:
   --
   --   v.blk_cnt:      0              1              2
-  --   v.data_cnt:     0  1  2  3  4  0  1  2  3  4  0  1  2  3  4
+  --   v.dat_cnt:      0  1  2  3  4  0  1  2  3  4  0  1  2  3  4
   --   ch:             0  1  2  3  4  5  6  7  8  9 10 11 12 13 14
   --   data_in         0  1  2  3  4  5  6  7  8  9 10 11 12 13 14  -- in_sosi
   --   transpose:      0        3        6        9       12
@@ -122,7 +288,7 @@ package body reorder_pkg is
   -- the p_comb_undo_transpose selects:
   --
   --   v.blk_cnt:      0        1        2        3        4
-  --   v.data_cnt:     0  1  2  0  1  2  0  1  2  0  1  2  0  1  2
+  --   v.dat_cnt:      0  1  2  0  1  2  0  1  2  0  1  2  0  1  2
   --   ch:             0  1  2  3  4  5  6  7  8  9 10 11 12 13 14
   --   data_in         0  3  6  9 12  1  4  7 10 13  2  5  8 11 14  -- transposed_sosi
   --   undo_transpose: 0              1              2
@@ -136,31 +302,105 @@ package body reorder_pkg is
   -- to restore the original order.
   function func_reorder_transpose(nof_blocks_per_packet : natural;
                                   nof_data_per_block    : natural;
+                                  nof_words_per_data    : natural;
                                   transpose             : t_reorder_transpose)
                                   return t_reorder_transpose is
+    constant c_nof_words_per_block : natural := nof_words_per_data * nof_data_per_block;
+
     variable v : t_reorder_transpose;
   begin
+    -- Implementation derived from func_reorder_transpose_indices_impl().
+    -- Instead of using incrementing v_ch and for-loops to return list of all
+    -- read indices, use sequential calls of this func_reorder_transpose()
+    -- to return next read address.
     v := transpose;
-    -- read at current address
+    -- Read at current address
     v.select_copi.address := TO_MEM_ADDRESS(v.addr);
     v.select_copi.rd := '1';
-    -- prepare next read address
-    if v.blk_cnt <= nof_blocks_per_packet - 1 then
-      if v.data_cnt < nof_data_per_block - 1 then
-        v.data_cnt := v.data_cnt + 1;
-        v.addr := v.addr + nof_blocks_per_packet;
-      else
-        v.data_cnt := 0;
+    -- Prepare next read address
+    -- . loop word_cnt
+    if v.word_cnt < nof_words_per_data - 1 then
+      v.word_cnt := v.word_cnt + 1;
+    else
+      -- . end loop word_cnt
+      v.word_cnt := 0;
+      -- . loop blk_cnt
+      if v.blk_cnt < nof_blocks_per_packet - 1 then
         v.blk_cnt := v.blk_cnt + 1;
-        if v.blk_cnt = nof_blocks_per_packet then
-          v.blk_cnt := 0;
+        v.blk_offset := v.blk_offset + c_nof_words_per_block;
+      else
+        -- . end loop blk_cnt
+        v.blk_cnt := 0;
+        v.blk_offset := 0;
+        -- . loop dat_cnt
+        if v.dat_cnt < nof_data_per_block - 1 then
+          v.dat_cnt := v.dat_cnt + 1;
+          v.dat_offset := v.dat_offset + nof_words_per_data;
+        else
+          -- . end loop dat_cnt
+          v.dat_cnt := 0;
+          v.dat_offset := 0;
         end if;
-
-        v.addr := v.blk_cnt;
       end if;
+    end if;
+    v.addr := v.blk_offset + v.dat_offset + v.word_cnt;
+    return v;
+  end;
+
+  function func_reorder_transpose_look_up(nof_blocks_per_packet : natural;
+                                          nof_data_per_block    : natural;
+                                          nof_words_per_data    : natural;
+                                          transpose             : t_reorder_transpose)
+                                          return t_reorder_transpose is
+    constant c_nof_ch       : natural := nof_blocks_per_packet * nof_data_per_block * nof_words_per_data;
+    constant c_look_up_list : t_natural_arr(0 to c_nof_ch - 1) :=
+      func_reorder_transpose_indices(nof_blocks_per_packet,
+                                     nof_data_per_block,
+                                     nof_words_per_data);
+    variable v : t_reorder_transpose;
+  begin
+    -- Equivalent implementation, so func_reorder_transpose_look_up() =
+    -- func_reorder_transpose()
+    v := transpose;
+    -- Read at current address
+    v.select_copi.address := TO_MEM_ADDRESS(v.addr);
+    v.select_copi.rd := '1';
+    -- Prepare next read address
+    -- . use word_cnt as incrementing ch index to count the sequential calls
+    --   of this func_reorder_transpose() to return next read address
+    if v.word_cnt < c_nof_ch - 1 then
+      v.word_cnt := v.word_cnt + 1;
+    else
+      v.word_cnt := 0;
+    end if;
+    v.addr := c_look_up_list(v.word_cnt);
+    return v;
+  end;
+
+  function func_reorder_transpose(nof_blocks_per_packet : natural;
+                                  nof_data_per_block    : natural;
+                                  transpose             : t_reorder_transpose)
+                                  return t_reorder_transpose is
+  begin
+    return func_reorder_transpose(nof_blocks_per_packet,
+                                  nof_data_per_block,
+                                  1,
+                                  transpose);
+  end;
+
+  function func_reorder_identity(nof_ch_per_packet : natural;
+                                 identity          : t_reorder_identity)
+                                 return t_reorder_identity is
+    variable v : t_reorder_identity;
+  begin
+    v := identity;
+    -- Read at current address
+    v.select_copi.address := TO_MEM_ADDRESS(v.addr);
+    v.select_copi.rd := '1';
+    -- Prepare next read address
+    if v.addr < nof_ch_per_packet - 1 then
+      v.addr := v.addr + 1;
     else
-      v.data_cnt := 0;
-      v.blk_cnt := 0;
       v.addr := 0;
     end if;
     return v;
diff --git a/libraries/base/reorder/tb/vhdl/reorder_pkg_test.vhd b/libraries/base/reorder/tb/vhdl/reorder_pkg_test.vhd
new file mode 100644
index 0000000000000000000000000000000000000000..a51252e77b80678d1466155eadf409a303bd5755
--- /dev/null
+++ b/libraries/base/reorder/tb/vhdl/reorder_pkg_test.vhd
@@ -0,0 +1,124 @@
+-------------------------------------------------------------------------------
+--
+-- Copyright 2023
+-- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/>
+-- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+-------------------------------------------------------------------------------
+
+-------------------------------------------------------------------------------
+-- Author : E. Kooistra
+-- Purpose:
+--   Test bench to verify functions in reorder_pkg.vhd.
+-- Description:
+--
+-- Usage:
+-- > as 3
+-- > run -all
+-- * The tb is self stopping and self checking, tb_end will stop the simulation
+--   by stopping the clk and thus all toggling.
+
+library IEEE, common_lib, dp_lib;
+use IEEE.std_logic_1164.all;
+use common_lib.common_pkg.all;
+use common_lib.common_mem_pkg.all;
+use common_lib.tb_common_pkg.all;
+use dp_lib.dp_stream_pkg.all;
+use work.reorder_pkg.all;
+
+entity reorder_pkg_test is
+  generic(
+    g_nof_blocks_per_packet : natural := 4;
+    g_nof_data_per_block    : natural := 488;
+    g_nof_words_per_data    : natural := 1
+  );
+end reorder_pkg_test;
+
+
+architecture tb of reorder_pkg_test is
+
+  constant c_clk_period   : time := 10 ns;
+  constant c_nof_data     : natural := g_nof_blocks_per_packet * g_nof_data_per_block;
+
+  signal rst              : std_logic;
+  signal clk              : std_logic := '1';
+  signal tb_end           : std_logic := '0';
+
+  -- Verify default and alternative (lu = look up) implementation of r_transpose
+  signal r_transpose      : t_reorder_transpose := c_reorder_transpose_rst;
+  signal r_transpose_lu   : t_reorder_transpose := c_reorder_transpose_rst;
+  signal in_address       : natural := 0;
+  signal in_val           : std_logic := '0';
+  signal out_address      : natural;
+  signal out_address_lu   : natural;
+  signal out_val          : std_logic;
+  signal exp_address      : natural;
+
+  constant c_exp_addresses_arr : t_natural_arr := func_reorder_transpose_indices(g_nof_blocks_per_packet,
+                                                                                 g_nof_data_per_block,
+                                                                                 g_nof_words_per_data);
+
+  constant c_impl_addresses_arr : t_natural_arr := func_reorder_transpose_indices_impl(g_nof_blocks_per_packet,
+                                                                                       g_nof_data_per_block,
+                                                                                       g_nof_words_per_data);
+begin
+
+  assert c_exp_addresses_arr = c_impl_addresses_arr report "Wrong func_reorder_transpose_indices_impl()" severity failure;
+
+  clk <= (not clk) or tb_end after c_clk_period / 2;
+  rst <= '1', '0' after c_clk_period * 7;
+
+  p_stimuli : process
+  begin
+    proc_common_wait_until_low(clk, rst);
+    proc_common_wait_some_cycles(clk, 5);
+
+    for I in 0 to c_nof_data - 1 loop
+      in_val <= '1';
+      in_address <= I;
+      r_transpose    <= func_reorder_transpose(
+        g_nof_blocks_per_packet, g_nof_data_per_block, g_nof_words_per_data, r_transpose);
+      r_transpose_lu <= func_reorder_transpose_look_up(
+        g_nof_blocks_per_packet, g_nof_data_per_block, g_nof_words_per_data, r_transpose_lu);
+      proc_common_wait_some_cycles(clk, 1);
+    end loop;
+    in_val <= '0';
+    r_transpose <= c_reorder_transpose_rst;
+    r_transpose_lu <= c_reorder_transpose_rst;
+
+    proc_common_wait_some_cycles(clk, 5);
+    tb_end <= '1';
+    wait;
+  end process;
+
+  out_address    <= TO_UINT(r_transpose.select_copi.address);
+  out_address_lu <= TO_UINT(r_transpose_lu.select_copi.address);
+  out_val        <= r_transpose.select_copi.rd;
+
+  exp_address <= c_exp_addresses_arr(in_address);
+
+  p_verify : process(clk)
+  begin
+    if rising_edge(clk) then
+      if in_val = '1' then
+        -- Only when valid expect that out_address = exp_address
+        assert out_address = exp_address report "Wrong transpose address" severity error;
+      end if;
+      -- Always expect that out_address_lu = out_address
+      assert out_address_lu = out_address report "Wrong transpose_lu address" severity error;
+    end if;
+  end process;
+
+end tb;
diff --git a/libraries/base/reorder/tb/vhdl/reorder_pkg_test_test.vhd b/libraries/base/reorder/tb/vhdl/reorder_pkg_test_test.vhd
new file mode 100644
index 0000000000000000000000000000000000000000..e8aeb6583dcaa90dcc28494017ed232d43aae2f7
--- /dev/null
+++ b/libraries/base/reorder/tb/vhdl/reorder_pkg_test_test.vhd
@@ -0,0 +1,53 @@
+-------------------------------------------------------------------------------
+--
+-- Copyright 2023
+-- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/>
+-- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
+--
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+--
+--     http://www.apache.org/licenses/LICENSE-2.0
+--
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+-------------------------------------------------------------------------------
+
+-------------------------------------------------------------------------------
+-- Author : E. Kooistra
+-- Purpose:
+--   Multi test bench to verify functions in reorder_pkg.vhd using instances
+--   of reorder_pkg_test.vhd.
+-- Description:
+--
+-- Usage:
+-- > as 3
+-- > run -all
+-- * The tb is self stopping and self checking, tb_end will stop the simulation
+--   by stopping the clk and thus all toggling.
+library IEEE;
+use IEEE.std_logic_1164.all;
+
+
+entity reorder_pkg_test_test is
+end reorder_pkg_test_test;
+
+
+architecture tb of reorder_pkg_test_test is
+
+  signal tb_end : std_logic := '0';  -- declare tb_end to avoid 'No objects found' error on 'when -label tb_end'
+
+begin
+  -- g_nof_blocks_per_packet : natural := 4;
+  -- g_nof_data_per_block    : natural := 488;
+  -- g_nof_words_per_data    : natural := 1
+
+  u_4_488_1  : entity work.reorder_pkg_test generic map (4, 488, 1);
+  u_4_488_2  : entity work.reorder_pkg_test generic map (4, 488, 2);
+
+end tb;
diff --git a/libraries/base/reorder/tb/vhdl/tb_reorder_col_select_all.vhd b/libraries/base/reorder/tb/vhdl/tb_reorder_col_select_all.vhd
index c335260390e6fd8e1ab24f4091fc1df5c36061b9..af47f4fc4649732957ff7ad952c66d3fdbac64a1 100644
--- a/libraries/base/reorder/tb/vhdl/tb_reorder_col_select_all.vhd
+++ b/libraries/base/reorder/tb/vhdl/tb_reorder_col_select_all.vhd
@@ -83,7 +83,8 @@ entity tb_reorder_col_select_all is
     g_nof_data_per_block    : natural := 3;
     g_inter_valid_gap       : natural := 0;  -- nof clk gap in in_sosi.valid
     g_inter_packet_gap      : natural := 3;  -- nof clk gap betweek in_sosi.eop and next in_sosi.sop
-    g_use_complex           : boolean := false;
+    g_use_complex           : boolean := true;
+    g_use_identity          : boolean := true;  -- reorder identity or transpose
     g_use_dynamic_selection : boolean := false
   );
 end tb_reorder_col_select_all;
@@ -113,9 +114,12 @@ architecture tb of tb_reorder_col_select_all is
   signal clk              : std_logic := '1';
   signal tb_end           : std_logic := '0';
 
-  signal verify_en_sosi          : t_dp_sosi_sl;
-  signal verify_en_out_sosi      : t_dp_sosi_sl;
-  signal verify_en_out_sosi_long : t_dp_sosi_sl;
+  signal verify_en_sosi_sl          : t_dp_sosi_sl;
+  signal verify_en_out_sosi_sl      : t_dp_sosi_sl;
+  signal verify_en_out_sosi_sl_long : t_dp_sosi_sl;
+
+  signal expected_last_sosi      : t_dp_sosi := c_dp_sosi_rst;
+  signal expected_last_sosi_evt  : t_dp_sosi_sl := c_dp_sosi_sl_rst;
 
   -- Data
   signal in_en            : std_logic := '1';
@@ -124,7 +128,7 @@ architecture tb of tb_reorder_col_select_all is
   signal transposed_sosi  : t_dp_sosi;
   signal out_sosi         : t_dp_sosi;
 
-  -- Reorder control for transpose and undo transpose
+  -- Reorder control for first and second reorder_col_select
   signal sel_long                         : boolean := false;
   signal reorder_busy_transposed          : std_logic;
   signal reorder_busy_output              : std_logic;
@@ -139,11 +143,18 @@ architecture tb of tb_reorder_col_select_all is
   signal nof_data_per_block_transposed    : natural;
   signal nof_data_per_block_output        : natural;
 
+  -- Connect reorder identity or reorder transpose, dependent on g_use_identity
+  signal select_copi      : t_mem_copi;
+  signal undo_select_copi : t_mem_copi;
   signal select_cipo      : t_mem_cipo;
   signal undo_select_cipo : t_mem_cipo;
-  signal r_transpose      : t_reorder_transpose;
+  signal r_identity       : t_reorder_identity := c_reorder_identity_rst;
+  signal d_identity       : t_reorder_identity;
+  signal r_redo_identity  : t_reorder_identity := c_reorder_identity_rst;
+  signal d_redo_identity  : t_reorder_identity;
+  signal r_transpose      : t_reorder_transpose := c_reorder_transpose_rst;
   signal d_transpose      : t_reorder_transpose;
-  signal r_undo_transpose : t_reorder_transpose;
+  signal r_undo_transpose : t_reorder_transpose := c_reorder_transpose_rst;
   signal d_undo_transpose : t_reorder_transpose;
 begin
   clk <= (not clk) or tb_end after c_clk_period / 2;
@@ -169,7 +180,8 @@ begin
     -- Run some sync intervals with counter data in the packets
     -- proc_dp_gen_block_data(
     --   constant c_ready_latency  : in  natural;  -- 0, 1 are supported by proc_dp_stream_ready_latency()
-    --   constant c_use_data       : in  boolean;  -- when TRUE use data field, else use re, im fields, and keep unused fields at 'X'
+    --   constant c_use_data       : in  boolean;  -- when TRUE use data field, else use re, im fields,
+    --                                                and keep unused fields at 'X'
     --   constant c_data_w         : in  natural;  -- data width for the data, re and im fields
     --   constant c_symbol_w       : in  natural;  -- c_data_w/c_symbol_w must be an integer
     --   constant c_symbol_init    : in  natural;  -- init counter for symbols in data field
@@ -242,6 +254,20 @@ begin
     in_sosi <= c_dp_sosi_rst;
     proc_common_wait_some_cycles(clk, c_nof_ch_long * 2);
     proc_common_wait_some_cycles(clk, 10);
+
+    -- Pulse event for used sosi fields, to verify that stimuli have been applied
+    expected_last_sosi_evt <= c_dp_sosi_sl_ones;
+    if g_use_complex then
+      expected_last_sosi_evt.data <= '0';
+    else
+      expected_last_sosi_evt.re <= '0';
+      expected_last_sosi_evt.im <= '0';
+    end if;
+    proc_common_wait_some_cycles(clk, 1);
+    expected_last_sosi_evt <= c_dp_sosi_sl_rst;
+
+    -- End of test
+    proc_common_wait_some_cycles(clk, 10);
     tb_end <= '1';
     wait;
   end process;
@@ -250,25 +276,42 @@ begin
   -- Verification
   ------------------------------------------------------------------------------
 
-  p_verify_en_sosi : process
+  -- During stimuli verify that values are incrementing
+  p_verify_en_sosi_sl : process
   begin
-    verify_en_sosi <= c_dp_sosi_sl_rst;
+    verify_en_sosi_sl <= c_dp_sosi_sl_rst;
     proc_common_wait_until_low(clk, rst);
 
     -- Verify all sosi fields, except for some
-    verify_en_sosi <= c_dp_sosi_sl_ones;
+    verify_en_sosi_sl <= c_dp_sosi_sl_ones;
     if g_use_complex then
-      verify_en_sosi.data <= '0';
+      verify_en_sosi_sl.data <= '0';
     else
-      verify_en_sosi.re <= '0';
-      verify_en_sosi.im <= '0';
+      verify_en_sosi_sl.re <= '0';
+      verify_en_sosi_sl.im <= '0';
     end if;
-    verify_en_sosi.empty <= '0';
+    verify_en_sosi_sl.empty <= '0';
     wait;
   end process;
 
-  verify_en_out_sosi      <= verify_en_sosi when sel_long = false else c_dp_sosi_sl_rst;
-  verify_en_out_sosi_long <= verify_en_sosi when sel_long = true  else c_dp_sosi_sl_rst;
+  verify_en_out_sosi_sl      <= verify_en_sosi_sl when sel_long = false else c_dp_sosi_sl_rst;
+  verify_en_out_sosi_sl_long <= verify_en_sosi_sl when sel_long = true  else c_dp_sosi_sl_rst;
+
+  -- After stimuli verify last valid values, to check that stimuli have been applied
+  p_expected_last_sosi : process(clk)
+  begin
+    if rising_edge(clk) then
+      if in_sosi.valid = '1' then
+        -- hold last valid sosi.info fields
+        expected_last_sosi <= in_sosi;
+      end if;
+      -- sosi.ctrl fields must have occured at least once
+      expected_last_sosi.valid <= '1';
+      expected_last_sosi.sync <= '1';
+      expected_last_sosi.sop <= '1';
+      expected_last_sosi.eop <= '1';
+    end if;
+  end process;
 
   u_verify_out_sosi : entity dp_lib.dp_stream_verify
   generic map (
@@ -285,11 +328,11 @@ begin
     -- Verify data
     snk_in                     => out_sosi,
     -- During stimuli
-    verify_snk_in_enable       => verify_en_out_sosi,
+    verify_snk_in_enable       => verify_en_out_sosi_sl,
 
     -- End of stimuli
-    expected_snk_in            => c_dp_sosi_rst,
-    verify_expected_snk_in_evt => c_dp_sosi_sl_rst
+    expected_snk_in            => expected_last_sosi,
+    verify_expected_snk_in_evt => expected_last_sosi_evt
   );
 
   -- When g_use_dynamic_selection = true then c_nof_sync = 2 and second sync interval
@@ -312,28 +355,65 @@ begin
     -- Verify data
     snk_in                     => out_sosi,
     -- During stimuli
-    verify_snk_in_enable       => verify_en_out_sosi_long,
+    verify_snk_in_enable       => verify_en_out_sosi_sl_long,
 
     -- End of stimuli
-    expected_snk_in            => c_dp_sosi_rst,
+    expected_snk_in            => c_dp_sosi_rst,  -- u_verify_out_sosi already does this
     verify_expected_snk_in_evt => c_dp_sosi_sl_rst
   );
 
   ------------------------------------------------------------------------------
-  -- DUT
+  -- DUT reorder control for select_copi/select_cipo
   ------------------------------------------------------------------------------
 
-  p_clk : process(rst, clk)
+  select_copi      <= r_identity.select_copi      when g_use_identity else r_transpose.select_copi;
+  undo_select_copi <= r_redo_identity.select_copi when g_use_identity else r_undo_transpose.select_copi;
+
+  -- Use synchronous reset in d signals
+  p_clk : process(clk)
   begin
-    if rst = '1' then
-      r_transpose      <= c_reorder_transpose_rst;
-      r_undo_transpose <= c_reorder_transpose_rst;
-    elsif rising_edge(clk) then
+    if rising_edge(clk) then
+      r_identity       <= d_identity;
+      r_redo_identity  <= d_redo_identity;
       r_transpose      <= d_transpose;
       r_undo_transpose <= d_undo_transpose;
     end if;
   end process;
 
+  p_comb_identity : process(rst, select_cipo, nof_ch_input, r_identity)
+    variable v : t_reorder_identity;
+  begin
+    if select_cipo.waitrequest = '0' then
+      -- Read from reorder_col_select page
+      v := func_reorder_identity(nof_ch_input, r_identity);
+    else
+      -- No read, new reorder_col_select page not available yet
+      v := c_reorder_identity_rst;
+    end if;
+    -- Synchronous reset
+    if rst = '1' then
+      v := c_reorder_identity_rst;
+    end if;
+    d_identity <= v;
+  end process;
+
+  p_comb_redo_identity : process(rst, undo_select_cipo, nof_ch_input, r_redo_identity)
+    variable v : t_reorder_identity;
+  begin
+    if undo_select_cipo.waitrequest = '0' then
+      -- Read from reorder_col_select page
+      v := func_reorder_identity(nof_ch_input, r_redo_identity);
+    else
+      -- No read, new reorder_col_select page not available yet
+      v := c_reorder_identity_rst;
+    end if;
+    -- Synchronous reset
+    if rst = '1' then
+      v := c_reorder_identity_rst;
+    end if;
+    d_redo_identity <= v;
+  end process;
+
   -- The p_comb_transpose and p_comb_undo_transpose can both use
   -- func_reorder_transpose(), by swapping the transpose dimensions.
 
@@ -342,7 +422,8 @@ begin
   nof_data_per_block_input <= g_nof_data_per_block when nof_ch_input = c_nof_ch else
                               g_nof_data_per_block * c_factor_dat;
 
-  p_comb_transpose : process(rst, r_transpose, select_cipo)
+  p_comb_transpose : process(rst, select_cipo,
+                             nof_data_per_block_input, nof_blocks_per_packet_input, r_transpose)
     variable v : t_reorder_transpose;
   begin
     if select_cipo.waitrequest = '0' then
@@ -364,7 +445,8 @@ begin
   nof_data_per_block_transposed <= g_nof_data_per_block when nof_ch_transposed = c_nof_ch else
                                    g_nof_data_per_block * c_factor_dat;
 
-  p_comb_undo_transpose : process(rst, r_undo_transpose, undo_select_cipo)
+  p_comb_undo_transpose : process(rst, undo_select_cipo,
+                                  nof_blocks_per_packet_transposed, nof_data_per_block_transposed, r_undo_transpose)
     variable v : t_reorder_transpose;
   begin
     if undo_select_cipo.waitrequest = '0' then
@@ -386,6 +468,9 @@ begin
   nof_data_per_block_output <= g_nof_data_per_block when nof_ch_output = c_nof_ch else
                                g_nof_data_per_block * c_factor_dat;
 
+  ------------------------------------------------------------------------------
+  -- DUT
+  ------------------------------------------------------------------------------
   u_transpose : entity work.reorder_col_select
   generic map (
     g_dsp_data_w  => g_dsp_data_w,
@@ -408,7 +493,7 @@ begin
     output_nof_ch_sel => open,
 
     -- Memory Mapped
-    col_select_mosi => r_transpose.select_copi,
+    col_select_mosi => select_copi,
     col_select_miso => select_cipo,  -- only used for waitrequest
 
     -- Streaming
@@ -438,7 +523,7 @@ begin
     output_nof_ch_sel => open,
 
     -- Memory Mapped
-    col_select_mosi => r_undo_transpose.select_copi,
+    col_select_mosi => undo_select_copi,
     col_select_miso => undo_select_cipo,  -- only used for waitrequest
 
     -- Streaming
diff --git a/libraries/base/reorder/tb/vhdl/tb_tb_reorder_col_select_all.vhd b/libraries/base/reorder/tb/vhdl/tb_tb_reorder_col_select_all.vhd
index 279f2bdb4b547d0112008dda43a17eca6180f443..fe473421bb2a3e70b7736ae4d0275915f529a29f 100644
--- a/libraries/base/reorder/tb/vhdl/tb_tb_reorder_col_select_all.vhd
+++ b/libraries/base/reorder/tb/vhdl/tb_tb_reorder_col_select_all.vhd
@@ -44,15 +44,18 @@ begin
 -- g_inter_valid_gap       : natural := 5;  -- nof clk gap in in_sosi.valid
 -- g_inter_packet_gap      : natural := 0;  -- nof clk gap between in_sosi.eop and next in_sosi.sop
 -- g_use_complex           : boolean := false;
+-- g_use_identity          : boolean := false;  -- reorder identity or transpose
 -- g_use_dynamic_selection : boolean := true
 
-  u_complex_5_3_no_gaps      : entity work.tb_reorder_col_select_all generic map(16, 3, 2, 5, 3, 0, 0, true,  false);
-  u_data_5_3_no_gaps         : entity work.tb_reorder_col_select_all generic map(16, 3, 3, 5, 3, 0, 0, false, false);
-  u_data_5_1_no_gaps         : entity work.tb_reorder_col_select_all generic map(16, 3, 4, 5, 1, 0, 0, false, false);
-  u_data_1_3_no_gaps         : entity work.tb_reorder_col_select_all generic map(16, 3, 5, 1, 3, 0, 0, false, false);
-  u_data_3_5_pkt_gap_1       : entity work.tb_reorder_col_select_all generic map(16, 3, 6, 3, 5, 0, 1, false, false);
-  u_data_3_5_valid_gap_1     : entity work.tb_reorder_col_select_all generic map(16, 3, 6, 3, 5, 1, 0, false, false);
+  u_data_5_3_no_gaps_identity : entity work.tb_reorder_col_select_all generic map(16, 3, 2, 5, 3, 0,  0, false,  true,  false);
 
-  u_dynamic_data_5_3_no_gaps : entity work.tb_reorder_col_select_all generic map(16, 3, 3, 5, 3, 0,  0, false, true);
-  u_dynamic_data_5_3_gaps    : entity work.tb_reorder_col_select_all generic map(16, 3, 3, 5, 3,10,100, false, true);
+  u_complex_5_3_no_gaps       : entity work.tb_reorder_col_select_all generic map(16, 3, 2, 5, 3, 0,  0, true,  false, false);
+  u_data_5_3_no_gaps          : entity work.tb_reorder_col_select_all generic map(16, 3, 3, 5, 3, 0,  0, false, false, false);
+  u_data_5_1_no_gaps          : entity work.tb_reorder_col_select_all generic map(16, 3, 4, 5, 1, 0,  0, false, false, false);
+  u_data_1_3_no_gaps          : entity work.tb_reorder_col_select_all generic map(16, 3, 5, 1, 3, 0,  0, false, false, false);
+  u_data_3_5_pkt_gap_1        : entity work.tb_reorder_col_select_all generic map(16, 3, 6, 3, 5, 0,  1, false, false, false);
+  u_data_3_5_valid_gap_1      : entity work.tb_reorder_col_select_all generic map(16, 3, 6, 3, 5, 1,  0, false, false, false);
+
+  u_dynamic_data_5_3_no_gaps  : entity work.tb_reorder_col_select_all generic map(16, 3, 3, 5, 3, 0,  0, false, false, true);
+  u_dynamic_data_5_3_gaps     : entity work.tb_reorder_col_select_all generic map(16, 3, 3, 5, 3,10,100, false, false, true);
 end tb;