diff --git a/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd b/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd
index c0d9e8b402da0d2eae6d0170f8e3c7010fc4c5dc..f4f2544d5814160c769988ccb047ecd5b3caebd5 100644
--- a/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd
+++ b/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd
@@ -20,6 +20,10 @@
 -- Purpose: Verify MM part of mmp_dp_bsn_align_v2
 -- Description:
 --    The functional part is already verified by tb_tb_dp_bsn_align_v2.vhd.
+--    Tb features:
+--    . verify expected end values in gen_verify_ctrl to ensure that test has
+--      ran
+--
 -- Usage:
 -- > as 5
 -- > run -all
@@ -45,33 +49,68 @@ ARCHITECTURE tb OF tb_mmp_dp_bsn_align_v2 IS
   CONSTANT c_dp_clk_period              : TIME := 10 ns;
   CONSTANT c_cross_clock_domain_latency : NATURAL := 20;
 
-  CONSTANT c_report_note                : BOOLEAN := FALSE;  -- Use TRUE for tb debugging, else FALSE to keep Transcript window more empty
-
-  CONSTANT c_nof_input_sync             : NATURAL := 10;
-  CONSTANT c_nof_block_per_sync         : NATURAL := 32;
-  CONSTANT c_block_size                 : NATURAL := 10;
-  CONSTANT c_input_gap_size             : NATURAL := 3;
-  CONSTANT c_sim_nof_blocks             : NATURAL := c_nof_block_per_sync * c_nof_input_sync;
+  CONSTANT c_rl                         : NATURAL := 1;
+  CONSTANT c_tb_nof_restart             : NATURAL := 2;    -- number of times to restart the input stimuli
+  CONSTANT c_tb_nof_blocks              : NATURAL := 20;   -- number of input blocks per restart
 
+  -- Fixed dut generics
+  -- . for dp_bsn_align_v2
   CONSTANT c_nof_streams                : NATURAL := 2;
-  CONSTANT c_bsn_latency_max            : POSITIVE := 2;
-  CONSTANT c_nof_aligners_max           : NATURAL := 1;
+  CONSTANT c_bsn_latency_max            : NATURAL := 1;
+  CONSTANT c_nof_aligners_max           : POSITIVE := 1;   -- fixed in this tb
+  CONSTANT c_block_size                 : NATURAL := 11;
+  CONSTANT c_block_period               : NATURAL := 20;
   CONSTANT c_bsn_w                      : NATURAL := c_dp_stream_bsn_w;
   CONSTANT c_data_w                     : NATURAL := 16;
-  CONSTANT c_data_replacement_value     : INTEGER := 0;
+  CONSTANT c_data_replacement_value     : INTEGER := 17;
+  CONSTANT c_use_mm_output              : BOOLEAN := FALSE;
+  CONSTANT c_pipeline_input             : NATURAL := 1;
+  CONSTANT c_rd_latency                 : NATURAL := 2;
+  -- . for mms_dp_bsn_monitor_v2
   CONSTANT c_nof_clk_per_sync           : NATURAL := 200*10**6;
   CONSTANT c_nof_input_bsn_monitors     : NATURAL := 0;
   CONSTANT c_use_bsn_output_monitor     : BOOLEAN := FALSE;
 
+  -- Input stream settings
+  CONSTANT c_data_init                  : INTEGER := 0;
+  CONSTANT c_bsn_init                   : NATURAL := 3;
+  CONSTANT c_channel_init               : INTEGER := 0;
+  CONSTANT c_err_init                   : NATURAL := 247;
+  CONSTANT c_sync_period                : NATURAL := 7;
+  CONSTANT c_sync_offset                : NATURAL := 2;
+  CONSTANT c_gap_size                   : NATURAL := c_block_period - c_block_size;
+
+  -- DUT latency
+  CONSTANT c_mm_to_dp_latency         : NATURAL := 1;
+  CONSTANT c_dut_latency              : NATURAL := c_pipeline_input + c_rd_latency + c_mm_to_dp_latency;
+
+  CONSTANT c_align_latency_nof_blocks : NATURAL := c_bsn_latency_max * c_nof_aligners_max;  -- in number blocks
+  CONSTANT c_align_latency_nof_valid  : NATURAL := c_bsn_latency_max * c_nof_aligners_max * c_block_size;  -- in number of data samples
+  CONSTANT c_align_latency_nof_clk    : NATURAL := c_bsn_latency_max * c_nof_aligners_max * c_block_period;  -- in number clk cycles
+
+  -- Total DUT chain latency
+  CONSTANT c_total_latency            : NATURAL := c_dut_latency + c_align_latency_nof_clk;
+  CONSTANT c_verify_nof_blocks        : NATURAL := c_tb_nof_blocks - c_align_latency_nof_blocks;  -- skip last blocks that are still in the DUT buffer
+
+  -- Signal monitoring and verification
+  TYPE t_data_arr    IS ARRAY (c_nof_streams-1 DOWNTO 0) OF STD_LOGIC_VECTOR(c_data_w-1 DOWNTO 0);
+  TYPE t_bsn_arr     IS ARRAY (c_nof_streams-1 DOWNTO 0) OF STD_LOGIC_VECTOR(c_bsn_w-1 DOWNTO 0);
+  TYPE t_err_arr     IS ARRAY (c_nof_streams-1 DOWNTO 0) OF STD_LOGIC_VECTOR(c_dp_stream_error_w-1 DOWNTO 0);
+  TYPE t_channel_arr IS ARRAY (c_nof_streams-1 DOWNTO 0) OF STD_LOGIC_VECTOR(c_dp_stream_channel_w-1 DOWNTO 0);
+
+  SIGNAL sl1                      : STD_LOGIC := '1';
+  SIGNAL mm_end                   : STD_LOGIC := '0';
+  SIGNAL dp_end_arr               : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0) := (OTHERS => '0');
+  SIGNAL dp_end                   : STD_LOGIC := '0';
   SIGNAL tb_end                   : STD_LOGIC := '0';
-  SIGNAL stimuli_end              : STD_LOGIC := '0';
+  SIGNAL streams_enabled          : STD_LOGIC := '0';
 
   -- MM clock domain
   SIGNAL mm_clk                   : STD_LOGIC := '1';
   SIGNAL mm_rst                   : STD_LOGIC := '1';
 
-  SIGNAL reg_copi                 : t_mem_copi := c_mem_copi_rst;
-  SIGNAL reg_cipo                 : t_mem_cipo;
+  SIGNAL reg_bsn_align_copi       : t_mem_copi := c_mem_copi_rst;
+  SIGNAL reg_bsn_align_cipo       : t_mem_cipo;
   SIGNAL reg_input_monitor_copi   : t_mem_copi := c_mem_copi_rst;
   SIGNAL reg_input_monitor_cipo   : t_mem_cipo;
   SIGNAL reg_output_monitor_copi  : t_mem_copi := c_mem_copi_rst;
@@ -81,12 +120,39 @@ ARCHITECTURE tb OF tb_mmp_dp_bsn_align_v2 IS
   SIGNAL dp_clk                   : STD_LOGIC := '1';
   SIGNAL dp_rst                   : STD_LOGIC := '1';
 
-  SIGNAL node_index               : NATURAL := 0;  -- only used when g_bsn_latency_use_node_index is TRUE
-  SIGNAL stimuli_sosi             : t_dp_sosi;
-  SIGNAL in_sosi_arr              : t_dp_sosi_arr(c_nof_streams-1 DOWNTO 0);
-  SIGNAL mm_copi                  : t_mem_copi;   -- read access to output block, all output streams share same mm_copi
-  SIGNAL mm_cipo_arr              : t_mem_cipo_arr(c_nof_streams-1 DOWNTO 0);
-  SIGNAL mm_sosi                  : t_dp_sosi;   -- streaming information that signals that an output block can be read
+  SIGNAL node_index               : NATURAL := 0;
+  SIGNAL ref_siso_arr             : t_dp_siso_arr(c_nof_streams-1 DOWNTO 0) := (OTHERS => c_dp_siso_rdy);
+  SIGNAL ref_sosi_arr             : t_dp_sosi_arr(c_nof_streams-1 DOWNTO 0);                               -- generated stimuli
+  SIGNAL in_sosi_arr              : t_dp_sosi_arr(c_nof_streams-1 DOWNTO 0) := (OTHERS => c_dp_sosi_rst);  -- input stimuli
+
+  SIGNAL in_sync_arr              : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0);
+  SIGNAL in_sop_arr               : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0);
+  SIGNAL in_eop_arr               : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0);
+  SIGNAL in_val_arr               : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0);
+  SIGNAL in_data_arr              : t_data_arr;
+  SIGNAL in_bsn_arr               : t_bsn_arr;
+  SIGNAL in_channel_arr           : t_channel_arr;
+  SIGNAL in_err_arr               : t_err_arr;
+
+  SIGNAL out_sosi_arr             : t_dp_sosi_arr(c_nof_streams-1 DOWNTO 0) := (OTHERS => c_dp_sosi_rst);  -- output
+  SIGNAL out_sosi                 : t_dp_sosi;
+  SIGNAL out_sync_arr             : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0);
+  SIGNAL out_sop_arr              : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0);
+  SIGNAL out_eop_arr              : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0);
+  SIGNAL out_val_arr              : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0);
+  SIGNAL out_data_arr             : t_data_arr;
+  SIGNAL hold_data_arr            : t_data_arr;
+  SIGNAL out_bsn_arr              : t_bsn_arr;
+  SIGNAL out_bsn                  : INTEGER;
+  SIGNAL out_channel_arr          : t_channel_arr;
+  SIGNAL out_err_arr              : t_err_arr;
+
+  SIGNAL verify_done_arr          : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0) := (OTHERS => '0');
+
+  SIGNAL hold_out_sop_arr         : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0) := (OTHERS => '0');
+  SIGNAL expected_out_bsn_arr     : t_bsn_arr;
+  SIGNAL expected_out_data_arr    : t_data_arr;
+  SIGNAL expected_out_channel_arr : t_channel_arr;
 
 BEGIN
 
@@ -95,23 +161,49 @@ BEGIN
   dp_rst <= '1', '0' AFTER c_dp_clk_period*7;    
   mm_rst <= '1', '0' AFTER c_mm_clk_period*7;
   
+  tb_end <= mm_end AND dp_end;
+
+  ------------------------------------------------------------------------------
+  -- MM stimuli
+  ------------------------------------------------------------------------------
   ------------------------------------------------------------------------------
   -- MM stimuli and verification
   ------------------------------------------------------------------------------
 
   p_stimuli_and_verify_mm : PROCESS
     VARIABLE v_bsn : NATURAL;
-  BEGIN              
+  BEGIN
     proc_common_wait_until_low(dp_clk, mm_rst);
     proc_common_wait_until_low(dp_clk, dp_rst);
     proc_common_wait_some_cycles(mm_clk, 5);
 
+    -- . Read stream enable bits, default '0' after power up
+    FOR I IN 0 TO c_nof_streams-1 LOOP
+      proc_mem_mm_bus_rd(I, mm_clk, reg_bsn_align_cipo, reg_bsn_align_copi);
+      proc_mem_mm_bus_rd_latency(1, mm_clk);
+      ASSERT reg_bsn_align_cipo.rddata(0) = '0' REPORT "Wrong stream disable for output " & int_to_str(I) SEVERITY ERROR;
+    END LOOP;
+
+    -- . Write stream enable bits for stream_en_arr
+    FOR I IN 0 TO c_nof_streams-1 LOOP
+      proc_mem_mm_bus_wr(I, 1,  mm_clk, reg_bsn_align_cipo, reg_bsn_align_copi);
+    END LOOP;
+    proc_common_wait_some_cycles(mm_clk, c_cross_clock_domain_latency);
+    proc_common_wait_some_cycles(dp_clk, c_cross_clock_domain_latency);
+
+    -- . Read stream enable bits, should now be '1'
+    FOR I IN 0 TO c_nof_streams-1 LOOP
+      proc_mem_mm_bus_rd(I, mm_clk, reg_bsn_align_cipo, reg_bsn_align_copi);
+      proc_mem_mm_bus_rd_latency(1, mm_clk);
+      ASSERT reg_bsn_align_cipo.rddata(0) = '1' REPORT "Wrong stream enable for output " & int_to_str(I) SEVERITY ERROR;
+    END LOOP;
+
+    streams_enabled <= '1';
 
     ---------------------------------------------------------------------------
     -- End of test
     ---------------------------------------------------------------------------
-    proc_common_wait_until_high(dp_clk, stimuli_end);
-    tb_end <= '1';
+    mm_end <= '1';
     WAIT;
   END PROCESS;
 
@@ -119,36 +211,102 @@ BEGIN
   -- Streaming stimuli
   ------------------------------------------------------------------------------
 
-  -- Generate data blocks with input sync
-  u_stimuli : ENTITY work.dp_stream_stimuli
-  GENERIC MAP (
-    g_sync_period  => c_nof_block_per_sync,
-    g_err_init     => 0,
-    g_err_incr     => 0,  -- do not increment, to not distract from viewing of BSN in Wave window
-    g_channel_init => 0,
-    g_channel_incr => 0,  -- do not increment, to not distract from viewing of BSN in Wave window
-    g_nof_repeat   => c_sim_nof_blocks,
-    g_pkt_len      => c_block_size,
-    g_pkt_gap      => c_input_gap_size
-  )
-  PORT MAP (
-    rst               => dp_rst,
-    clk               => dp_clk,
+  -- Generate data path input data (similar as in tb_mmp_dp_bsn_align_v2.vhd)
+  gen_input : FOR I IN c_nof_streams-1 DOWNTO 0 GENERATE
+    p_stimuli : PROCESS
+      VARIABLE v_sync      : STD_LOGIC := '0';
+      VARIABLE v_bsn       : NATURAL;
+      VARIABLE v_data      : NATURAL := c_data_init;
+      VARIABLE v_channel   : NATURAL := c_channel_init;
+      VARIABLE v_err       : NATURAL := c_err_init;
+    BEGIN
+      v_data := v_data + I;
+      ref_sosi_arr(I) <= c_dp_sosi_rst;
+      proc_common_wait_until_low(dp_clk, dp_rst);
+      proc_common_wait_until_high(dp_clk, streams_enabled);
+      proc_common_wait_some_cycles(dp_clk, 10);
 
-    -- Generate stimuli
-    src_out           => stimuli_sosi,
+      -- Begin of stimuli
+      FOR S IN 0 TO c_tb_nof_restart-1 LOOP
+        v_bsn := c_bsn_init;
+        FOR R IN 0 TO c_tb_nof_blocks-1 LOOP
+          v_sync := sel_a_b(v_bsn MOD c_sync_period = c_sync_offset, '1', '0');
+          proc_dp_gen_block_data(c_rl, TRUE, c_data_w, c_data_w, v_data, 0, 0, c_block_size, v_channel, v_err, v_sync, TO_UVEC(v_bsn, c_bsn_w), dp_clk, sl1, ref_siso_arr(I), ref_sosi_arr(I));
+          v_bsn  := v_bsn + 1;
+          v_data := v_data + c_block_size;
+          proc_common_wait_some_cycles(dp_clk, c_gap_size);  -- create gap between frames
+        END LOOP;
+        -- Create gap between restarts
+        proc_common_wait_some_cycles(dp_clk, 100);
+      END LOOP;
 
-    -- End of stimuli
-    tb_end            => stimuli_end
-  );
+      -- End of stimuli
+      -- . default c_bsn_latency_max blocks remain in DUT buffer
+      expected_out_bsn_arr(I) <= TO_UVEC(v_bsn-1 - c_align_latency_nof_blocks, c_bsn_w);
+      expected_out_data_arr(I) <= TO_UVEC(v_data-1 - c_align_latency_nof_valid, c_data_w);
+      -- . default no data is lost, so all channel(0) lost data flags are 0
+      expected_out_channel_arr(I) <= TO_DP_CHANNEL(0);
+
+      proc_common_wait_some_cycles(dp_clk, 100);
+      verify_done_arr(I) <= '1';
+      proc_common_wait_some_cycles(dp_clk, 1);
+      verify_done_arr(I) <= '0';
+
+      dp_end_arr(I) <= '1';
+      WAIT;
+    END PROCESS;
+  END GENERATE;
+
+  in_sosi_arr <= ref_sosi_arr;
+
+  dp_end <= vector_and(dp_end_arr);
+
+  ------------------------------------------------------------------------------
+  -- Data verification
+  ------------------------------------------------------------------------------
+
+  mon_sosi : FOR I IN c_nof_streams-1 DOWNTO 0 GENERATE
+    -- Ease in_sosi_arr monitoring
+    in_sync_arr(I)    <= in_sosi_arr(I).sync;
+    in_sop_arr(I)     <= in_sosi_arr(I).sop;
+    in_eop_arr(I)     <= in_sosi_arr(I).eop;
+    in_val_arr(I)     <= in_sosi_arr(I).valid;
+    in_data_arr(I)    <= in_sosi_arr(I).data(c_data_w-1 DOWNTO 0);
+    in_bsn_arr(I)     <= in_sosi_arr(I).bsn(c_bsn_w-1 DOWNTO 0);
+    in_channel_arr(I) <= in_sosi_arr(I).channel;
+    in_err_arr(I)     <= in_sosi_arr(I).err;
+
+    -- Ease out_sosi_arr monitoring and verification
+    out_sync_arr(I)    <= out_sosi_arr(I).sync;
+    out_sop_arr(I)     <= out_sosi_arr(I).sop;
+    out_eop_arr(I)     <= out_sosi_arr(I).eop;
+    out_val_arr(I)     <= out_sosi_arr(I).valid;
+    out_data_arr(I)    <= out_sosi_arr(I).data(c_data_w-1 DOWNTO 0);
+    out_bsn_arr(I)     <= out_sosi_arr(I).bsn(c_bsn_w-1 DOWNTO 0);
+    out_channel_arr(I) <= out_sosi_arr(I).channel;
+    out_err_arr(I)     <= out_sosi_arr(I).err;
+  END GENERATE;
+
+  out_sosi <= out_sosi_arr(0);  -- take out_sosi control and info from out_sosi_arr(0)
+
+  out_bsn <= TO_UINT(out_sosi.bsn);  -- = out_bsn_arr().bsn = out_sosi_arr(I).bsn
+
+  gen_verify_ctrl : FOR I IN c_nof_streams-1 DOWNTO 0 GENERATE
+    -- . Verify that sop and eop come in pairs
+    proc_dp_verify_sop_and_eop(dp_clk, out_val_arr(I), out_sop_arr(I), out_eop_arr(I), hold_out_sop_arr(I));
 
-  in_sosi_arr <= (OTHERS => stimuli_sosi);
+    -- . Verify that the stimuli have been applied at all
+    hold_data_arr(I) <= out_data_arr(I) WHEN out_val_arr(I) = '1';  -- hold last valid data
+    proc_dp_verify_value("out_data_arr", e_equal, dp_clk, verify_done_arr(I), expected_out_data_arr(I), hold_data_arr(I));
+    proc_dp_verify_value("out_bsn_arr", e_equal, dp_clk, verify_done_arr(I), expected_out_bsn_arr(I), out_bsn_arr(I));
+    proc_dp_verify_value("out_channel_arr", e_equal, dp_clk, verify_done_arr(I), expected_out_channel_arr(I), out_channel_arr(I));
+  END GENERATE;
 
   ------------------------------------------------------------------------------
   -- DUT
   ------------------------------------------------------------------------------
 
-  u_bsn_align : ENTITY work.mmp_dp_bsn_align_v2
+  u_mmp_dp_bsn_align : ENTITY work.mmp_dp_bsn_align_v2
   GENERIC MAP (
     g_nof_streams                => c_nof_streams,
     g_bsn_latency_max            => c_bsn_latency_max,
@@ -157,6 +315,9 @@ BEGIN
     g_bsn_w                      => c_bsn_w,
     g_data_w                     => c_data_w,
     g_data_replacement_value     => c_data_replacement_value,
+    g_use_mm_output              => c_use_mm_output,
+    g_pipeline_input             => c_pipeline_input,
+    g_rd_latency                 => c_rd_latency,
     g_nof_clk_per_sync           => c_nof_clk_per_sync,
     g_nof_input_bsn_monitors     => c_nof_input_bsn_monitors,
     g_use_bsn_output_monitor     => c_use_bsn_output_monitor
@@ -165,8 +326,8 @@ BEGIN
     mm_rst                  => mm_rst,
     mm_clk                  => mm_clk,
 
-    reg_copi                => reg_copi,
-    reg_cipo                => reg_cipo,
+    reg_bsn_align_copi      => reg_bsn_align_copi,
+    reg_bsn_align_cipo      => reg_bsn_align_cipo,
 
     reg_input_monitor_copi  => reg_input_monitor_copi,
     reg_input_monitor_cipo  => reg_input_monitor_cipo,
@@ -181,9 +342,11 @@ BEGIN
     -- Streaming input
     in_sosi_arr             => in_sosi_arr,
     -- Output via local MM in dp_clk domain
-    mm_copi                 => mm_copi,
-    mm_cipo_arr             => mm_cipo_arr,
-    mm_sosi                 => mm_sosi
+    --mm_sosi                 => mm_sosi,
+    --mm_copi                 => mm_copi,
+    --mm_cipo_arr             => mm_cipo_arr,
+    -- Output via streaming DP interface, when g_use_mm_output = TRUE.
+    out_sosi_arr            => out_sosi_arr
   );
 
 END tb;