diff --git a/doc/erko_hdl_design_article.txt b/doc/erko_hdl_design_article.txt
index f33018fb8cf93b0123681154e039cd08b261fa8f..4f6b4339456ac650d68d892fe9f6e06fd0a1f2c8 100644
--- a/doc/erko_hdl_design_article.txt
+++ b/doc/erko_hdl_design_article.txt
@@ -111,10 +111,16 @@ Implementation steps:
     SIGNAL r             : t_reg;
     SIGNAL nxt_r         : t_reg;
 
-  . -- Memoryless signals in p_comb (wires used as local auxiliary variables)
-    SIGNAL s             : t_comb;
+  . -- Memoryless signals and auxiliary variables in p_comb
+    -- . For unique representation of variables in p_comb as signal wires, the p_comb
+    --   should assign each field in t_comb only once to a variable. It is allowed to
+    --   reasign a t_comb variable in p_comb, so use it as a temporary auxiliary
+    --   variable, but then only the last assignment value will be visible via the
+    --   signal dbg_wires in the Wave window.
+    SIGNAL dbg_wires     : t_comb;
 
-  . -- Structural signals (wires used to connect components and IO)
+  . -- Structural signals (wires used to connect r, nxt_r to other components and to
+    -- the entity IO)
 
   . -- Pipeline registers
     SIGNAL in_data_p     : ...
@@ -135,8 +141,9 @@ Implementation steps:
       -- State variable
       VARIABLE v : t_reg;
       -- Auxiliary variables
-      VARIABLE v_*   -- optional, use to improve code readability
-                     -- use v. only on left side? use separate v_* to clearly indicate when we use it also on the right side of assignments ?
+      --VARIABLE v_*   -- optional, use to improve code readability
+      --               -- use v. only on left side? use separate v_* to clearly indicate when we use it also on the right side of assignments ?
+      VARIABLE w : t_comb;
     BEGIN
       v := r;      -- default keep existing state
       v.* := ...;  -- default force specific values, e.g. set strobes to '0',
@@ -152,7 +159,7 @@ Implementation steps:
       nxt_r <= v;
 
       -- memory less signals, only for view in wave window
-      s <= d;
+      dbg_wires <= w;
     END PROCESS;
 
   . -- Pipelining
diff --git a/libraries/base/dp/src/vhdl/dp_block_from_mm.vhd b/libraries/base/dp/src/vhdl/dp_block_from_mm.vhd
index 37b58b49b76f0b9a8cdf48339be448a2de30a941..795c772f562a4c8e01bd747ff4e3c6bc49016fed 100644
--- a/libraries/base/dp/src/vhdl/dp_block_from_mm.vhd
+++ b/libraries/base/dp/src/vhdl/dp_block_from_mm.vhd
@@ -124,7 +124,11 @@ BEGIN
     IF r.busy = '0' AND start_pulse = '1' THEN
       -- initiate next block
       v.busy := '1';
-    ELSIF r.busy = '1' THEN
+    END IF;
+
+    -- use v.busy, instead of r.busy, to allow start_pulse at mm_done, to
+    -- support zero gaps between output blocks
+    IF v.busy = '1' THEN
       IF out_siso.ready = '1' THEN
         -- continue with block
         mm_mosi.rd <= '1';
diff --git a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
index 87dcb99739f0d7daec770140c9dc65d42511126d..e94cd4efd070011f1e89c84d1ca3ffdd8a6c89ed 100644
--- a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
+++ b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
@@ -26,13 +26,17 @@
 --   input stream that is ahead of the other remote input streams. After a
 --   certain number of blocks on input 0, the same block on all remote
 --   inputs should also have arrived. If not then they are replaced by
---   replacement data. The output streams are paced by the block rate of input 0.
---   The user has to read the block within the block period.
+--   replacement data. The output streams are paced by the block rate of
+--   input 0. The user has to read the block within the block period.
 --
 --   Features:
---   . uses lost_data flag and replacement data to replace lost input blocks
+--   . The g_block_size <= block period, so supports input blocks arriving
+--     with or without data valid gaps
+--   . uses replacement data to replace lost input blocks and channel bit 0 as
+--     lost_data flag
 --   . uses replacement data to replace disabled input streams
---   . output block can be read in arbitrary order
+--   . output block can be read in arbitrary order via g_use_mm_output = TRUE
+--   . output block can be streamed via g_use_mm_output = FALSE
 --
 --   For more detailed description see:
 --   https://support.astron.nl/confluence/display/L2M/L6+FWLIB+Design+Document%3A+BSN+aligner+v2
@@ -53,22 +57,23 @@ USE work.dp_stream_pkg.ALL;
 
 ENTITY dp_bsn_align_v2 IS
   GENERIC (
-    g_nof_streams                : NATURAL;           -- number of input and output streams
-    g_bsn_latency_max            : NATURAL;           -- Maximum travel latency of a remote block in number of block periods T_blk
+    g_nof_streams                : NATURAL;           -- >= 2, number of input and output streams
+    g_bsn_latency_max            : NATURAL;           -- maximum travel latency of a remote block in number of block periods T_blk
     g_nof_aligners_max           : POSITIVE := 1;     -- 1 when only align at last node, > 1 when align at every intermediate node
     g_block_size                 : NATURAL := 32;     -- > 1, g_block_size=1 is not supported
     g_bsn_w                      : NATURAL := c_dp_stream_bsn_w;  -- number of bits in sosi BSN
     g_data_w                     : NATURAL;           -- number of bits in sosi data
-    g_replacement_value          : INTEGER := 0;      -- output sosi data value for missing input blocks
+    g_data_replacement_value     : INTEGER := 0;      -- output sosi data value for missing input blocks
     g_use_mm_output              : BOOLEAN := FALSE;  -- output via MM or via streaming DP
-    g_pipeline_input             : NATURAL := 0;      -- >= 0, choose 0 for wires, choose 1 to ease timing closure
-    g_rd_latency                 : NATURAL := 1       -- 1 or 2, choose 2 to ease timing closure
+    g_pipeline_input             : NATURAL := 1;      -- >= 0, choose 0 for wires, choose 1 to ease timing closure of in_sosi_arr
+    g_pipeline_output            : NATURAL := 1;      -- >= 0, choose 0 for wires, choose 1 to ease timing closure of out_sosi_arr
+    g_rd_latency                 : NATURAL := 2       -- 1 or 2, choose 2 to ease timing closure
   );
   PORT (
     dp_rst         : IN  STD_LOGIC;
     dp_clk         : IN  STD_LOGIC;
 
-    node_index     : IN  NATURAL RANGE 0 TO g_nof_aligners_max := 0;  -- only used when g_nof_aligners_max > 1
+    node_index     : IN  NATURAL RANGE 0 TO g_nof_aligners_max-1 := 0;  -- only used when g_nof_aligners_max > 1
 
     -- MM control
     stream_en_arr  : IN  STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0) := (OTHERS=>'1');
@@ -89,8 +94,8 @@ END dp_bsn_align_v2;
 
 ARCHITECTURE rtl OF dp_bsn_align_v2 IS
 
-  -- Circular buffer per stream
-  CONSTANT c_buffer_nof_blocks : NATURAL :=  ceil_pow2(1 + g_nof_aligners_max * g_bsn_latency_max);
+  -- Circular buffer per stream, size is next power of 2 that fits
+  CONSTANT c_buffer_nof_blocks : NATURAL :=  true_log_pow2(1 + g_nof_aligners_max * g_bsn_latency_max);
 
   CONSTANT c_ram_size       : NATURAL := c_buffer_nof_blocks * g_block_size;
   CONSTANT c_ram_buf        : t_c_mem := (latency  => 1,
@@ -107,13 +112,18 @@ ARCHITECTURE rtl OF dp_bsn_align_v2 IS
   -- avoid that synthesis may infer a too larger multiplier
   CONSTANT c_product_w      : NATURAL := c_blk_pointer_w + c_block_size_w;
 
+  -- Output on lost data flag via out_sosi_arr().channel bit 0
+  CONSTANT c_channel_w      : NATURAL := 1;
+
   TYPE t_bsn_arr IS ARRAY (INTEGER RANGE <>) OF STD_LOGIC_VECTOR(g_bsn_w-1 DOWNTO 0);
+  TYPE t_channel_arr IS ARRAY (INTEGER RANGE <>) OF STD_LOGIC_VECTOR(c_channel_w-1 DOWNTO 0);
   TYPE t_adr_arr IS ARRAY (INTEGER RANGE <>) OF STD_LOGIC_VECTOR(c_mem_ram.adr_w-1 DOWNTO 0);
   TYPE t_filled_arr IS ARRAY (INTEGER RANGE <>) OF STD_LOGIC_VECTOR(c_buffer_nof_blocks-1 DOWNTO 0);
 
+  -- State
   TYPE t_reg IS RECORD
     -- p_write_arr
-    wr_pointer           : NATURAL;
+    wr_blk_pointer       : NATURAL;
     wr_copi_arr          : t_mem_copi_arr(g_nof_streams-1 DOWNTO 0);
     -- all streams
     filled_arr           : t_filled_arr(g_nof_streams-1 DOWNTO 0);
@@ -124,19 +134,25 @@ ARCHITECTURE rtl OF dp_bsn_align_v2 IS
     mm_sosi              : t_dp_sosi;
     dp_sosi              : t_dp_sosi;
     -- p_read
-    rd_pointer           : INTEGER;  -- use integer to detect need to wrap to natural
+    rd_blk_pointer       : INTEGER;  -- use integer to detect need to wrap to natural
     rd_offset            : STD_LOGIC_VECTOR(c_mem_ram.adr_w-1 DOWNTO 0);
     rd_copi              : t_mem_copi;
     fill_cipo_arr        : t_mem_cipo_arr(g_nof_streams-1 DOWNTO 0);  -- used combinatorial to contain rd_cipo_arr from buffer or replacement data
-    out_bsn              : STD_LOGIC_VECTOR(g_bsn_w-1 DOWNTO 0);  -- hold BSN for streaming output
+    out_bsn              : STD_LOGIC_VECTOR(g_bsn_w-1 DOWNTO 0);  -- hold BSN until next sop, for easy view in Wave window
+    out_channel_arr      : t_channel_arr(g_nof_streams-1 DOWNTO 0);  -- hold channel until next sop per stream, for easy view in Wave window
   END RECORD;
 
+  -- Wires and auxiliary variables in p_comb
+  -- . For unique representation as signal wire, the p_comb should assign each
+  --   field in t_comb only once to a variable. It is allowed to reasign a
+  --   t_comb variable in p_comb, but then only the last assignment value will
+  --   be visible via the signal dbg_wires in the Wave window.
   TYPE t_comb IS RECORD
-    ref_sosi          : t_dp_sosi;
-    pointer_slv       : STD_LOGIC_VECTOR(c_blk_pointer_w-1 DOWNTO 0);
-    product_slv       : STD_LOGIC_VECTOR(c_product_w-1 DOWNTO 0);
-    lost_data_flag    : STD_LOGIC;
-    out_sosi_arr      : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
+    ref_sosi            : t_dp_sosi;
+    blk_pointer_slv     : STD_LOGIC_VECTOR(c_blk_pointer_w-1 DOWNTO 0);
+    product_slv         : STD_LOGIC_VECTOR(c_product_w-1 DOWNTO 0);
+    lost_data_flags_arr : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0);
+    out_sosi_arr        : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
   END RECORD;
 
   CONSTANT c_reg_rst  : t_reg := (0,
@@ -151,45 +167,48 @@ ARCHITECTURE rtl OF dp_bsn_align_v2 IS
                                   (OTHERS=>'0'),
                                   c_mem_copi_rst,
                                   (OTHERS=>c_mem_cipo_rst),
-                                  (OTHERS=>'0'));
+                                  (OTHERS=>'0'),
+                                  (OTHERS=>(OTHERS=>'0')));
 
   -- State registers for p_comb
-  SIGNAL r             : t_reg;
-  SIGNAL nxt_r         : t_reg;
+  SIGNAL r                 : t_reg;
+  SIGNAL nxt_r             : t_reg;
 
-  -- Memoryless signals in p_comb (wires used as local auxiliary variables)
-  SIGNAL s             : t_comb;
+  -- Memoryless signals in p_comb (wires used as local variables)
+  SIGNAL dbg_wires         : t_comb;
 
   -- Structural signals (wires used to connect components and IO)
-  SIGNAL dp_done       : STD_LOGIC;
-  SIGNAL dp_done_arr   : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0);
-  SIGNAL dp_copi       : t_mem_copi;
-  SIGNAL dp_copi_arr   : t_mem_copi_arr(g_nof_streams-1 DOWNTO 0);
+  SIGNAL dp_done           : STD_LOGIC;
+  SIGNAL dp_done_arr       : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0);
+  SIGNAL dp_copi           : t_mem_copi;
+  SIGNAL dp_copi_arr       : t_mem_copi_arr(g_nof_streams-1 DOWNTO 0);
 
-  SIGNAL rd_sosi_arr   : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
-  SIGNAL rd_cipo_arr   : t_mem_cipo_arr(g_nof_streams-1 DOWNTO 0) := (OTHERS=>c_mem_cipo_rst);
+  SIGNAL rd_sosi_arr       : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
+  SIGNAL rd_cipo_arr       : t_mem_cipo_arr(g_nof_streams-1 DOWNTO 0) := (OTHERS=>c_mem_cipo_rst);
 
   -- Pipeline registers
-  SIGNAL in_sosi_arr_p : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
-  SIGNAL rd_copi       : t_mem_copi;
+  SIGNAL in_sosi_arr_p     : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
+  SIGNAL rd_copi           : t_mem_copi;
+  SIGNAL comb_out_sosi_arr : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
 
   -- Debug signals
-  SIGNAL dbg_nof_streams         : NATURAL := g_nof_streams;
-  SIGNAL dbg_bsn_latency_max     : NATURAL := g_bsn_latency_max;
-  SIGNAL dbg_nof_aligners_max    : NATURAL := g_nof_aligners_max;
-  SIGNAL dbg_block_size          : NATURAL := g_block_size;
-  SIGNAL dbg_bsn_w               : NATURAL := g_bsn_w;
-  SIGNAL dbg_data_w              : NATURAL := g_data_w;
-  SIGNAL dbg_replacement_value   : INTEGER := g_replacement_value;
-  SIGNAL dbg_use_mm_output       : BOOLEAN := g_use_mm_output;
-  SIGNAL dbg_pipeline_input      : NATURAL := g_pipeline_input;
-  SIGNAL dbg_rd_latency          : NATURAL := g_rd_latency;
-  SIGNAL dbg_c_buffer_nof_blocks : NATURAL := c_buffer_nof_blocks;
-  SIGNAL dbg_c_product_w         : NATURAL := c_product_w;
+  SIGNAL dbg_nof_streams            : NATURAL := g_nof_streams;
+  SIGNAL dbg_bsn_latency_max        : NATURAL := g_bsn_latency_max;
+  SIGNAL dbg_nof_aligners_max       : NATURAL := g_nof_aligners_max;
+  SIGNAL dbg_block_size             : NATURAL := g_block_size;
+  SIGNAL dbg_bsn_w                  : NATURAL := g_bsn_w;
+  SIGNAL dbg_data_w                 : NATURAL := g_data_w;
+  SIGNAL dbg_data_replacement_value : INTEGER := g_data_replacement_value;
+  SIGNAL dbg_use_mm_output          : BOOLEAN := g_use_mm_output;
+  SIGNAL dbg_pipeline_input         : NATURAL := g_pipeline_input;
+  SIGNAL dbg_rd_latency             : NATURAL := g_rd_latency;
+  SIGNAL dbg_c_buffer_nof_blocks    : NATURAL := c_buffer_nof_blocks;
+  SIGNAL dbg_c_product_w            : NATURAL := c_product_w;
 
 BEGIN
 
-  mm_sosi <= r.mm_sosi WHEN g_use_mm_output = TRUE ELSE c_dp_sosi_rst;
+  -- Output mm_sosi, also when g_use_mm_output = FALSE.
+  mm_sosi <= r.mm_sosi;
 
   p_reg : PROCESS(dp_clk, dp_rst)
   BEGIN
@@ -202,7 +221,7 @@ BEGIN
   
   p_comb : PROCESS(r, in_sosi_arr_p, mm_copi, dp_copi, rd_cipo_arr, rd_sosi_arr)
     VARIABLE v : t_reg;   -- State variable
-    VARIABLE d : t_comb;  -- Memoryless auxiliary variables, local wires
+    VARIABLE w : t_comb;  -- Local wires = memoryless auxiliary variables
   BEGIN
     v := r;  -- state signals
     v.mm_sosi := func_dp_stream_reset_control(r.mm_sosi);
@@ -222,56 +241,57 @@ BEGIN
 
       IF in_sosi_arr_p(I).sop = '1' THEN
         -- . set address at start of block
-        d.pointer_slv := in_sosi_arr_p(I).bsn(c_blk_pointer_w-1 DOWNTO 0);
-        d.product_slv := MULT_UVEC(d.pointer_slv, c_block_size_slv);
-        v.wr_copi_arr(I).address := RESIZE_MEM_ADDRESS(d.product_slv);
+        w.blk_pointer_slv := in_sosi_arr_p(I).bsn(c_blk_pointer_w-1 DOWNTO 0);
+        w.product_slv := MULT_UVEC(w.blk_pointer_slv, c_block_size_slv);
+        v.wr_copi_arr(I).address := RESIZE_MEM_ADDRESS(w.product_slv);
 
         -- . set filled flag at sop, so assume rest of block will follow in time
-        v.filled_arr(I)(TO_UINT(d.pointer_slv)) := '1';
+        v.filled_arr(I)(TO_UINT(w.blk_pointer_slv)) := '1';
       END IF;
     END LOOP;
 
     ----------------------------------------------------------------------------
     -- p_control, all at sop of local reference input 0
     ----------------------------------------------------------------------------
-    d.ref_sosi := in_sosi_arr_p(0);
-    IF d.ref_sosi.sop = '1' THEN
+    w.ref_sosi := in_sosi_arr_p(0);
+    IF w.ref_sosi.sop = '1' THEN
       -- . write sync & bsn buffer
-      v.wr_pointer := TO_UINT(d.ref_sosi.bsn(c_blk_pointer_w-1 DOWNTO 0));
-      v.sync_arr(v.wr_pointer) := d.ref_sosi.sync;
-      v.bsn_arr(v.wr_pointer) := d.ref_sosi.bsn(g_bsn_w-1 DOWNTO 0);
-
-      -- . update read block pointer at g_bsn_latency_max blocks behind the reference write pointer
-      IF g_nof_aligners_max = 1 THEN
-        v.rd_pointer := v.wr_pointer - g_bsn_latency_max;
-      ELSE
-        v.rd_pointer := v.wr_pointer - g_bsn_latency_max * node_index;
-      END IF;
-      IF v.rd_pointer < 0 THEN
-        v.rd_pointer := v.rd_pointer + c_buffer_nof_blocks;
+      v.wr_blk_pointer := TO_UINT(w.ref_sosi.bsn(c_blk_pointer_w-1 DOWNTO 0));
+      v.sync_arr(v.wr_blk_pointer) := w.ref_sosi.sync;
+      v.bsn_arr(v.wr_blk_pointer) := w.ref_sosi.bsn(g_bsn_w-1 DOWNTO 0);
+
+      -- . update read block pointer at g_bsn_latency_max blocks behind the
+      --   reference write pointer, dependent on the node_index. For
+      --   g_bsn_latency_max = 1 the node_index = 0 fixed. For
+      --   g_bsn_latency_max > 1, node_index is the first BSN aligner in a
+      --   chain. Each subsequent node in the chain then has to account for
+      --   g_bsn_latency_max additional block latency.
+      v.rd_blk_pointer := v.wr_blk_pointer - g_bsn_latency_max * (1 + node_index);
+      IF v.rd_blk_pointer < 0 THEN
+        v.rd_blk_pointer := v.rd_blk_pointer + c_buffer_nof_blocks;
       END IF;
 
       -- . update read address of read block pointer
-      d.pointer_slv := TO_UVEC(v.rd_pointer, c_blk_pointer_w);
-      d.product_slv := MULT_UVEC(d.pointer_slv, c_block_size_slv);
-      v.rd_offset := RESIZE_UVEC(d.product_slv, c_mem_ram.adr_w);
+      w.blk_pointer_slv := TO_UVEC(v.rd_blk_pointer, c_blk_pointer_w);
+      w.product_slv := MULT_UVEC(w.blk_pointer_slv, c_block_size_slv);
+      v.rd_offset := RESIZE_UVEC(w.product_slv, c_mem_ram.adr_w);
 
       -- . issue mm_sosi, if there is output ready to be read, indicated by filled reference block
-      IF r.filled_arr(0)(v.rd_pointer) = '1' THEN
+      IF r.filled_arr(0)(v.rd_blk_pointer) = '1' THEN
         v.mm_sosi.sop := '1';
         v.mm_sosi.eop := '1';
         v.mm_sosi.valid := '1';
         -- . pass on timestamp information
-        v.mm_sosi.sync := v.sync_arr(v.rd_pointer);
-        v.mm_sosi.bsn := RESIZE_DP_BSN(v.bsn_arr(v.rd_pointer));
+        v.mm_sosi.sync := v.sync_arr(v.rd_blk_pointer);
+        v.mm_sosi.bsn := RESIZE_DP_BSN(v.bsn_arr(v.rd_blk_pointer));
         -- . pass on lost data flags for enabled streams via channel field, and
         --   determine whether the ouput has to insert replacement data
         v.mm_sosi.channel := (OTHERS=>'0');
         FOR I IN 0 TO g_nof_streams-1 LOOP
-          d.lost_data_flag := NOT v.filled_arr(I)(v.rd_pointer);
+          w.lost_data_flags_arr(I) := NOT v.filled_arr(I)(v.rd_blk_pointer);
           IF stream_en_arr(I) = '1' THEN  -- use MM bit at sop
-            v.use_replacement_data(I) := d.lost_data_flag;  -- enabled stream, so replace the data if the data was lost
-            v.mm_sosi.channel(I) := d.lost_data_flag;  -- enabled stream, so flag the data if the data was lost
+            v.use_replacement_data(I) := w.lost_data_flags_arr(I);  -- enabled stream, so replace the data if the data was lost
+            v.mm_sosi.channel(I) := w.lost_data_flags_arr(I);  -- enabled stream, so flag the data if the data was lost
           ELSE
             v.use_replacement_data(I) := '1';  -- disabled stream, so replace the data, but do not flag the data as lost
           END IF;
@@ -280,7 +300,7 @@ BEGIN
 
       -- . clear filled flags, after mm_sosi was issued, or could have been issued
       FOR I IN 0 TO g_nof_streams-1 LOOP
-        v.filled_arr(I)(v.rd_pointer) := '0';
+        v.filled_arr(I)(v.rd_blk_pointer) := '0';
       END LOOP;
     END IF;
 
@@ -294,7 +314,7 @@ BEGIN
     -- . if necessary, replace a stream by replacement data
     FOR I IN 0 TO g_nof_streams-1 LOOP
       IF r.use_replacement_data(I) = '1' THEN
-        v.fill_cipo_arr(I).rddata := TO_MEM_SDATA(g_replacement_value);
+        v.fill_cipo_arr(I).rddata := TO_MEM_SDATA(g_data_replacement_value);
       END IF;
     END LOOP;
 
@@ -311,7 +331,7 @@ BEGIN
       mm_cipo_arr <= v.fill_cipo_arr;
 
       -- . no output via DP streaming interface
-      out_sosi_arr <= (OTHERS => c_dp_sosi_rst);
+      comb_out_sosi_arr <= (OTHERS => c_dp_sosi_rst);
     ELSE
       --------------------------------------------------------------------------
       -- Do the output via the DP streaming interface
@@ -327,26 +347,33 @@ BEGIN
       END IF;
 
       -- . pass on input data from the buffer
-      d.out_sosi_arr := rd_sosi_arr;  -- = v.fill_cipo_arr in streaming format, contains the
+      w.out_sosi_arr := rd_sosi_arr;  -- = v.fill_cipo_arr in streaming format, contains the
                                       -- input data from the buffer or replacement data
       IF rd_sosi_arr(0).sop = '1' THEN
         -- . at sop pass on input info from r.dp_sosi to all streams in out_sosi_arr
-        d.out_sosi_arr := func_dp_stream_arr_set(d.out_sosi_arr, r.dp_sosi.sync, "SYNC");
-        d.out_sosi_arr := func_dp_stream_arr_set(d.out_sosi_arr, r.dp_sosi.bsn, "BSN");
+        w.out_sosi_arr := func_dp_stream_arr_set(w.out_sosi_arr, r.dp_sosi.sync, "SYNC");
+        w.out_sosi_arr := func_dp_stream_arr_set(w.out_sosi_arr, r.dp_sosi.bsn, "BSN");
         FOR I IN 0 TO g_nof_streams-1 LOOP
           -- . pass on the lost flag per stream
-          d.out_sosi_arr(I).channel := RESIZE_DP_CHANNEL(slv(r.dp_sosi.channel(I)));
+          w.out_sosi_arr(I).channel := RESIZE_DP_CHANNEL(slv(r.dp_sosi.channel(I)));
         END LOOP;
 
-        -- . hold BSN until next sop, to ease view in wave window
+        -- . hold sop info fields until next sop, to ease view in wave window
         v.out_bsn := r.dp_sosi.bsn(g_bsn_w-1 DOWNTO 0);
+        FOR I IN 0 TO g_nof_streams-1 LOOP
+          v.out_channel_arr(I) := w.out_sosi_arr(I).channel(c_channel_w-1 DOWNTO 0);
+        END LOOP;
       ELSE
-        -- . until next sop pass on BSN, to ease view in wave window
-        d.out_sosi_arr := func_dp_stream_arr_set(d.out_sosi_arr, r.out_bsn, "BSN");
+        -- . until next sop pass on BSN to all streams, to ease view in wave window
+        w.out_sosi_arr := func_dp_stream_arr_set(w.out_sosi_arr, r.out_bsn, "BSN");
+        FOR I IN 0 TO g_nof_streams-1 LOOP
+           -- . until next sop pass on channel bit 0 per stream, to ease view in wave window
+           w.out_sosi_arr(I).channel := RESIZE_DP_CHANNEL(r.out_channel_arr(I));
+        END LOOP;
       END IF;
 
       -- . output via DP streaming interface
-      out_sosi_arr <= d.out_sosi_arr;
+      comb_out_sosi_arr <= w.out_sosi_arr;
 
       -- . no output via MM interface
       mm_cipo_arr <= (OTHERS => c_mem_cipo_rst);
@@ -357,8 +384,8 @@ BEGIN
     ----------------------------------------------------------------------------
     nxt_r <= v;
 
-    -- memory less signals, only for view in wave window
-    s <= d;
+    -- local wires, only for view in wave window
+    dbg_wires <= w;
   END PROCESS;
 
   ------------------------------------------------------------------------------
@@ -421,7 +448,7 @@ BEGIN
   -- Pipelining
   ------------------------------------------------------------------------------
 
-  -- . input
+  -- . input streams
   u_in_sosi_arr_p : ENTITY work.dp_pipeline_arr
   GENERIC MAP (
     g_nof_streams => g_nof_streams,
@@ -439,4 +466,19 @@ BEGIN
   -- . read RAM
   rd_copi <= nxt_r.rd_copi WHEN g_rd_latency = 1 ELSE r.rd_copi;
 
+  -- . output streams
+  u_out_sosi_arr_p : ENTITY work.dp_pipeline_arr
+  GENERIC MAP (
+    g_nof_streams => g_nof_streams,
+    g_pipeline    => g_pipeline_output
+  )
+  PORT MAP (
+    rst          => dp_rst,
+    clk          => dp_clk,
+    -- ST sink
+    snk_in_arr   => comb_out_sosi_arr,
+    -- ST source
+    src_out_arr  => out_sosi_arr
+  );
+
 END rtl;
diff --git a/libraries/base/dp/src/vhdl/mmp_dp_bsn_align_v2.vhd b/libraries/base/dp/src/vhdl/mmp_dp_bsn_align_v2.vhd
index 5f55dc5a6675482176658ff3d29b308769cde600..e18047afe4322bab4ac2b2909e2ab21350987c4b 100644
--- a/libraries/base/dp/src/vhdl/mmp_dp_bsn_align_v2.vhd
+++ b/libraries/base/dp/src/vhdl/mmp_dp_bsn_align_v2.vhd
@@ -43,13 +43,19 @@ USE work.dp_stream_pkg.ALL;
 
 ENTITY mmp_dp_bsn_align_v2 IS
   GENERIC (
+    -- for dp_bsn_align_v2
     g_nof_streams                : NATURAL;           -- number of input and output streams
     g_bsn_latency_max            : NATURAL;           -- Maximum travel latency of a remote block in number of block periods T_blk
     g_nof_aligners_max           : NATURAL := 1;      -- 1 when only align at last node, > 1 when align at every intermediate node
     g_block_size                 : NATURAL := 32;     -- > 1, g_block_size=1 is not supported
     g_bsn_w                      : NATURAL := c_dp_stream_bsn_w;  -- number of bits in sosi BSN
     g_data_w                     : NATURAL;           -- number of bits in sosi data
-    g_replacement_value          : INTEGER := 0;      -- output sosi data value for missing input blocks
+    g_data_replacement_value     : INTEGER := 0;      -- output sosi data value for missing input blocks
+    g_use_mm_output              : BOOLEAN := FALSE;  -- output via MM or via streaming DP
+    g_pipeline_input             : NATURAL := 1;      -- >= 0, choose 0 for wires, choose 1 to ease timing closure of in_sosi_arr
+    g_pipeline_output            : NATURAL := 1;      -- >= 0, choose 0 for wires, choose 1 to ease timing closure of out_sosi_arr
+    g_rd_latency                 : NATURAL := 2;      -- 1 or 2, choose 2 to ease timing closure
+    -- for mms_dp_bsn_monitor_v2
     g_nof_clk_per_sync           : NATURAL := 200*10**6;
     g_nof_input_bsn_monitors     : NATURAL := 0;
     g_use_bsn_output_monitor     : BOOLEAN := FALSE
@@ -59,8 +65,8 @@ ENTITY mmp_dp_bsn_align_v2 IS
     mm_rst                  : IN  STD_LOGIC;
     mm_clk                  : IN  STD_LOGIC;
 
-    reg_copi                : IN  t_mem_copi;
-    reg_cipo                : OUT t_mem_cipo;
+    reg_bsn_align_copi      : IN  t_mem_copi;
+    reg_bsn_align_cipo      : OUT t_mem_cipo;
 
     reg_input_monitor_copi  : IN  t_mem_copi;
     reg_input_monitor_cipo  : OUT t_mem_cipo;
@@ -69,18 +75,21 @@ ENTITY mmp_dp_bsn_align_v2 IS
     reg_output_monitor_cipo : OUT t_mem_cipo;
 
     -- Streaming clock domain
-    dp_rst         : IN  STD_LOGIC;
-    dp_clk         : IN  STD_LOGIC;
+    dp_rst                  : IN  STD_LOGIC;
+    dp_clk                  : IN  STD_LOGIC;
 
-    node_index     : IN  NATURAL := 0;  -- only used when g_nof_aligners_max > 1
+    node_index              : IN  NATURAL RANGE 0 TO g_nof_aligners_max-1 := 0;  -- only used when g_nof_aligners_max > 1
 
     -- Streaming input
-    in_sosi_arr    : IN  t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
+    in_sosi_arr             : IN  t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
 
-    -- Output via local MM in dp_clk domain
-    mm_copi        : IN  t_mem_copi;  -- read access to output block, all output streams share same mm_copi
-    mm_cipo_arr    : OUT t_mem_cipo_arr(g_nof_streams-1 DOWNTO 0);
-    mm_sosi        : OUT t_dp_sosi   -- streaming information that signals that an output block can be read
+    -- Output via local MM interface in dp_clk domain, when g_use_mm_output = TRUE.
+    mm_sosi                 : OUT t_dp_sosi;   -- streaming information that signals that an output block can be read
+    mm_copi                 : IN  t_mem_copi := c_mem_copi_rst;  -- read access to output block, all output streams share same mm_copi
+    mm_cipo_arr             : OUT t_mem_cipo_arr(g_nof_streams-1 DOWNTO 0);
+
+    -- Output via streaming DP interface, when g_use_mm_output = TRUE.
+    out_sosi_arr            : OUT t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0)
   );
 END mmp_dp_bsn_align_v2;
 
@@ -95,15 +104,19 @@ ARCHITECTURE str OF mmp_dp_bsn_align_v2 IS
   --   dat_w     : NATURAL;
   --   nof_dat   : NATURAL;    -- optional, nof dat words <= 2**adr_w
   --   init_sl   : STD_LOGIC;  -- optional, init all dat words to std_logic '0', '1' or 'X'
-  CONSTANT c_mm_reg     : t_c_mem := (1, ceil_log2(g_nof_streams), 1, g_nof_streams, '0');
+  CONSTANT c_mm_reg       : t_c_mem := (1, ceil_log2(g_nof_streams), 1, g_nof_streams, '0');
 
-  SIGNAL reg_wr         : STD_LOGIC_VECTOR(c_mm_reg.nof_dat*c_mm_reg.dat_w-1 DOWNTO 0);
-  SIGNAL stream_en_arr  : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0);
+  SIGNAL reg_wr           : STD_LOGIC_VECTOR(c_mm_reg.nof_dat*c_mm_reg.dat_w-1 DOWNTO 0);
+  SIGNAL stream_en_arr    : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0);
 
-  SIGNAL mm_sosi_arr    : t_dp_sosi_arr(0 DOWNTO 0);
+  SIGNAL ref_sync         : STD_LOGIC;
+  SIGNAL mon_out_sosi_arr : t_dp_sosi_arr(0 DOWNTO 0);
+  SIGNAL i_out_sosi_arr   : t_dp_sosi_arr(g_nof_streams-1  DOWNTO 0);
+  SIGNAL i_mm_sosi        : t_dp_sosi;
 
 BEGIN
 
+  -- MM control of BSN aligner
   u_reg : ENTITY common_lib.common_reg_r_w_dc
   GENERIC MAP (
     g_cross_clock_domain   => TRUE,
@@ -118,8 +131,8 @@ BEGIN
     st_clk         => dp_clk,
 
     -- Memory Mapped Slave in mm_clk domain
-    sla_in         => reg_copi,
-    sla_out        => reg_cipo,
+    sla_in         => reg_bsn_align_copi,
+    sla_out        => reg_bsn_align_cipo,
 
     -- MM registers in st_clk domain
     reg_wr_arr     => OPEN,
@@ -130,6 +143,9 @@ BEGIN
 
   stream_en_arr <= reg_wr;
 
+  -- Use local sync as reference sync input for the BSN monitors
+  ref_sync <= in_sosi_arr(0).sync;
+
   -- Use input BSN monitors for the first g_nof_input_bsn_monitors input
   -- streams, e.g. to support:
   -- . only one input stream (g_nof_input_bsn_monitors = 1), or
@@ -156,9 +172,8 @@ BEGIN
       -- Streaming clock domain
       dp_rst         => dp_rst,
       dp_clk         => dp_clk,
-      ref_sync       => in_sosi_arr(0).sync,  -- local reference sync input
+      ref_sync       => ref_sync,
 
-      in_siso_arr    => (OTHERS=>c_dp_siso_rdy),
       in_sosi_arr    => in_sosi_arr(g_nof_input_bsn_monitors-1 DOWNTO 0)
     );
   END GENERATE;
@@ -185,13 +200,19 @@ BEGIN
       -- Streaming clock domain
       dp_rst         => dp_rst,
       dp_clk         => dp_clk,
-      ref_sync       => in_sosi_arr(0).sync,  -- local reference sync input
+      ref_sync       => ref_sync,
 
-      in_siso_arr    => (OTHERS=>c_dp_siso_rdy),
-      in_sosi_arr    => mm_sosi_arr
+      in_sosi_arr    => mon_out_sosi_arr
     );
   END GENERATE;
 
+  -- Use mm_sosi or out_sosi_arr(0) from BSN aligner for output BSN monitor
+  mon_out_sosi_arr(0) <= i_mm_sosi WHEN g_use_mm_output = TRUE ELSE i_out_sosi_arr(0);
+
+  -- wire to output
+  mm_sosi <= i_mm_sosi;
+  out_sosi_arr <= i_out_sosi_arr;
+
   u_bsn_align : ENTITY work.dp_bsn_align_v2
   GENERIC MAP (
     g_nof_streams                => g_nof_streams,
@@ -200,7 +221,11 @@ BEGIN
     g_block_size                 => g_block_size,
     g_bsn_w                      => g_bsn_w,
     g_data_w                     => g_data_w,
-    g_replacement_value          => g_replacement_value
+    g_data_replacement_value     => g_data_replacement_value,
+    g_use_mm_output              => g_use_mm_output,
+    g_pipeline_input             => g_pipeline_input,
+    g_pipeline_output            => g_pipeline_output,
+    g_rd_latency                 => g_rd_latency
   )
   PORT MAP (
     dp_rst         => dp_rst,
@@ -211,12 +236,12 @@ BEGIN
     -- Streaming input
     in_sosi_arr    => in_sosi_arr,
     -- Output via local MM in dp_clk domain
+    mm_sosi        => i_mm_sosi,
     mm_copi        => mm_copi,
     mm_cipo_arr    => mm_cipo_arr,
-    mm_sosi        => mm_sosi
+    -- Output via streaming DP interface, when g_use_mm_output = TRUE.
+    out_sosi_arr   => i_out_sosi_arr
   );
 
-  mm_sosi <= mm_sosi_arr(0);
-
 END str;
 
diff --git a/libraries/base/dp/tb/vhdl/tb_dp_bsn_align_v2.vhd b/libraries/base/dp/tb/vhdl/tb_dp_bsn_align_v2.vhd
index 4e62f1ceba00e9d1ddd859c1ef6ed081bfb85ec4..1c464c468c67424c91dadffc99860e32c941e4c4 100644
--- a/libraries/base/dp/tb/vhdl/tb_dp_bsn_align_v2.vhd
+++ b/libraries/base/dp/tb/vhdl/tb_dp_bsn_align_v2.vhd
@@ -19,6 +19,54 @@
 -- Author: Eric Kooistra, 3 Sept 2021
 -- Purpose: Verify dp_bsn_align_v2
 -- Description:
+--   The tb verifies:
+--   . DUT alignment of g_nof_streams >= 2
+--   . DUT restart via g_tb_nof_restart > 1
+--   . gaps or no gaps between blocks via g_block_period >= g_block_size
+--   . g_bsn_latency_max in combination with g_tb_diff_delay
+--   . g_use_mm_output using DUT MM to DP or external MM to DP in tb
+--   . g_data_replacement_value for a remote g_disable_stream_id (it is
+--     sufficient to verify one stream)
+--   . g_data_replacement_value and lost flag channel(0) bit for a lost
+--     remote g_lost_stream_id
+--   . the local stream cannot be disabled or lost, because if it does,
+--     then there is no output, which is verified by g_tb_nof_restart >= 2
+--   . g_lost_bsn_id to loose a single block in stream 1 and verify that
+--     it gets replaced and flagged.
+--   . array of one or more BSN aligners via g_nof_aligners_max >= 1,
+--     using node_index_arr, only support tb for g_use_mm_output = FALSE
+-- Remark:
+--   For this BSN aligner component it was essential to have an almost
+--   complete, reviewed, detailed design document, because it is a complex
+--   component. Main difference after review was addding build in support
+--   for streaming output via g_use_mm_output. The clear design made it
+--   much easier to achieve a draft implementation that was almost correct.
+--   For the DUT it implementation it was also essential to use the p_reg,
+--   p_comb coding template.
+--   The initial DUT implementation did not change much anymore after the
+--   first tb tests, expect for some small (but important) corrections.
+--   Each feature of the tb took several hours or up to a day to add. Much
+--   time was also spent to regularly clean up and simply the code. The
+--   last tb feature that verifies a chain of DUTs is realy nice to see,
+--   e.g. by expanding dut_out_sosi_2_arr in the Wave window. The ascii
+--   drawing at gen_bsn_align_chain was used to more easily connect and
+--   clarify the wiring for the chain of DUTs.
+--   Implementing the tb took more time than the DUT, but the design
+--   document and the especially the design decisions took the most time.
+--   The design decision to use a circular buffer instead of FIFOs and the
+--   design decision to rely on a local reference and fixed latencies were
+--   important. Initial prestudy end 2020 was done based on experience with
+--   aligning streams in LOFAR1 and with the bsn_aligner.vhd in APERTIF.
+--   About a in autumn 2021 the detailed design and implementation were done.
+--   . prestudy ~ 1 week
+--   . design decisions doc ~ 1 week (based on prestudy)
+--   . detailed design doc ~ 1 week
+--   . review and process review ~ 2 days
+--   . implement DUT ~ 2 days (core in 1 day, features one more day)
+--   . implement tb (and adding to tb_tb) ~ 1 week
+--   . implement tb_mmp ~ 1 day
+--   Total: ~ 6 weeks
+--
 -- Usage:
 -- > as 10
 -- > run -all
@@ -37,22 +85,27 @@ USE dp_lib.tb_dp_pkg.ALL;
 ENTITY tb_dp_bsn_align_v2 IS
   GENERIC (
     -- DUT
-    g_nof_streams          : NATURAL := 2;      -- number of input and output streams
-    g_bsn_latency_max      : NATURAL := 2;      -- Maximum travel latency of a remote block in number of block periods T_blk
-    g_nof_aligners_max     : POSITIVE := 1;     -- 1 when only align at last node, > 1 when align at every intermediate node
-    g_block_size           : NATURAL := 11;     -- > 1, g_block_size=1 is not supported
-    g_block_period         : NATURAL := 20;     -- >= g_block_size, = g_block_size + c_gap_size
-    g_bsn_w                : NATURAL := c_dp_stream_bsn_w;  -- number of bits in sosi BSN
-    g_data_w               : NATURAL := 16;     -- number of bits in sosi data
-    g_replacement_value    : INTEGER := 17;      -- output sosi data replacement value for missing input blocks
-    g_use_mm_output        : BOOLEAN := FALSE;   -- output via MM or via streaming DP
-    g_pipeline_input       : NATURAL := 1;      -- >= 0, choose 0 for wires, choose 1 to ease timing closure
-    g_rd_latency           : NATURAL := 2;      -- 1 or 2, choose 2 to ease timing closure
+    g_nof_streams            : NATURAL := 2;      -- number of input and output streams
+    g_bsn_latency_max        : NATURAL := 1;      -- Maximum travel latency of a remote block in number of block periods T_blk
+    g_nof_aligners_max       : POSITIVE := 1;     -- 1 when only align at last node, > 1 when align at every intermediate node
+    g_block_size             : NATURAL := 11;     -- > 1, g_block_size=1 is not supported
+    g_block_period           : NATURAL := 20;     -- >= g_block_size, = g_block_size + c_gap_size
+    g_bsn_w                  : NATURAL := c_dp_stream_bsn_w;  -- number of bits in sosi BSN
+    g_data_w                 : NATURAL := 16;     -- number of bits in sosi data
+    g_data_replacement_value : INTEGER := 17;     -- output sosi data replacement value for missing input blocks
+    g_disable_stream_id      : NATURAL := 0;      -- default 0 to enable all streams, > 0 selects stream that will be disabled
+    g_lost_stream_id         : NATURAL := 0;      -- default 0 to have all streams, > 0 selects stream that will be lost
+    g_lost_bsn_id            : NATURAL := 0;      -- for stream 1 the block with bsn = g_lost_bsn_id will be lost
+    g_use_mm_output          : BOOLEAN := FALSE;  -- output via MM or via streaming DP
+    g_pipeline_input         : NATURAL := 0;      -- >= 0, choose 0 for wires, choose 1 to ease timing closure of in_sosi_arr
+    g_pipeline_output        : NATURAL := 0;      -- >= 0, choose 0 for wires, choose 1 to ease timing closure of out_sop_arr
+    g_rd_latency             : NATURAL := 1;      -- 1 or 2, choose 2 to ease timing closure
 
     -- TB
-    g_tb_diff_delay_max    : NATURAL := 10;      -- maximum nof clk delay between any inputs, <= c_align_latency_nof_clk
-    g_tb_nof_restart       : NATURAL := 2;       -- number of times to restart the input stimuli
-    g_tb_nof_blocks        : NATURAL := 20       -- number of input blocks per restart
+    g_tb_diff_delay          : INTEGER := 0;       -- 0 = aligned inputs, -1 = max input delay for no loss,
+                                                   -- >~ g_bsn_latency_max * g_block_period will give loss
+    g_tb_nof_restart         : NATURAL := 2;       -- number of times to restart the input stimuli
+    g_tb_nof_blocks          : NATURAL := 20       -- number of input blocks per restart
   );
 END tb_dp_bsn_align_v2;
 
@@ -64,17 +117,42 @@ ARCHITECTURE tb OF tb_dp_bsn_align_v2 IS
   CONSTANT c_data_w                   : NATURAL := 16;
   CONSTANT c_data_init                : INTEGER := 0;
   CONSTANT c_bsn_w                    : NATURAL := 16;  -- use <= 31 bit to fit NATURAL
-  CONSTANT c_bsn_init                 : NATURAL := 3;
+  CONSTANT c_bsn_init                 : NATURAL := 3;   -- use > 0 to have no lost data for g_lost_bsn_id = 0
   CONSTANT c_channel_init             : INTEGER := 0;
   CONSTANT c_err_init                 : NATURAL := 247;
   CONSTANT c_sync_period              : NATURAL := 7;
   CONSTANT c_sync_offset              : NATURAL := 2;
   
+  -- maximum nof clk delay between any inputs, <= c_align_latency_nof_clk
+  -- . the -1 is due to some acceptable pipeline detail related to dp_block_from_mm
+  CONSTANT c_diff_delay_max           : NATURAL := g_bsn_latency_max * g_block_period - sel_a_b(g_rd_latency > 1, 0, 1);
+  CONSTANT c_diff_delay               : NATURAL := sel_a_b(g_tb_diff_delay < 0, c_diff_delay_max, g_tb_diff_delay);
+
+  -- Return input delay as function of inputs stream index I
+  FUNCTION func_input_delay(I : NATURAL) RETURN NATURAL IS
+  BEGIN
+    RETURN c_diff_delay * I / (g_nof_streams - 1);
+  END;
+
   CONSTANT c_gap_size                 : NATURAL := g_block_period - g_block_size;
-  CONSTANT c_dut_latency              : NATURAL := g_pipeline_input + g_rd_latency + 2;
-  CONSTANT c_align_latency_nof_blocks : NATURAL := g_bsn_latency_max;  -- DUT buffer latency in number blocks
-  CONSTANT c_align_latency_nof_valid  : NATURAL := g_bsn_latency_max * g_block_size;  -- DUT buffer latency in number of data samples
-  CONSTANT c_align_latency_nof_clk    : NATURAL := g_bsn_latency_max * g_block_period;  -- DUT buffer latency in number clk cycles
+
+  CONSTANT c_lost_bsn_stream_id       : NATURAL := 1;  -- fixed use stream 1 to verify g_lost_bsn_id
+
+  -- In the tb only support MM interface verification for c_nof_aligners_max = 1
+  CONSTANT c_nof_aligners_max  : POSITIVE := sel_a_b(g_use_mm_output, 1, g_nof_aligners_max);
+
+  -- DUT latency of chain of DUTs is same as DUT latency of one DUT, so
+  -- independent c_nof_aligners_max. This is because the c_dut_latency of the
+  -- other DUTs is covered by the buffer latency.
+  CONSTANT c_mm_to_dp_latency         : NATURAL := 1;
+  CONSTANT c_dut_latency              : NATURAL := g_pipeline_input + g_rd_latency + c_mm_to_dp_latency + g_pipeline_output;
+
+  -- DUT buffer latency for chain of DUTs
+  CONSTANT c_align_latency_nof_blocks : NATURAL := g_bsn_latency_max * c_nof_aligners_max;  -- in number blocks
+  CONSTANT c_align_latency_nof_valid  : NATURAL := g_bsn_latency_max * c_nof_aligners_max * g_block_size;  -- in number of data samples
+  CONSTANT c_align_latency_nof_clk    : NATURAL := g_bsn_latency_max * c_nof_aligners_max * g_block_period;  -- in number clk cycles
+
+  -- Total DUT chain latency
   CONSTANT c_total_latency            : NATURAL := c_dut_latency + c_align_latency_nof_clk;
   CONSTANT c_verify_nof_blocks        : NATURAL := g_tb_nof_blocks - c_align_latency_nof_blocks;  -- skip last blocks that are still in the DUT buffer
 
@@ -92,18 +170,26 @@ ARCHITECTURE tb OF tb_dp_bsn_align_v2 IS
     out_sosi_arr : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
   END RECORD;
 
+  TYPE t_dut_sosi_2arr IS ARRAY (NATURAL RANGE <>) OF t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
+
   SIGNAL tb_end_arr            : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0) := (OTHERS => '0');
   SIGNAL tb_end                : STD_LOGIC;
   SIGNAL clk                   : STD_LOGIC := '1';
   SIGNAL rst                   : STD_LOGIC := '1';
+  SIGNAL sl1                   : STD_LOGIC := '1';
 
-  SIGNAL node_index            : NATURAL := 0;
+  SIGNAL node_index_arr        : t_nat_natural_arr(0 TO c_nof_aligners_max-1) := array_init(0, c_nof_aligners_max, 1);
 
   SIGNAL stream_en_arr         : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0) := (OTHERS => '1');  -- default all streams are enabled
+  SIGNAL stream_lost_arr       : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0) := (OTHERS => '0');  -- default no streams are lost
+  SIGNAL in_bsn_lost_arr       : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0) := (OTHERS => '0');  -- default no blocks are lost
+  SIGNAL in_bsn_lost           : STD_LOGIC;  -- = in_bsn_lost_arr(c_lost_bsn_stream_id)
+  SIGNAL exp_bsn_lost_arr      : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0) := (OTHERS => '0');  -- default no blocks are lost
 
   SIGNAL ref_siso_arr          : t_dp_siso_arr(g_nof_streams-1 DOWNTO 0) := (OTHERS => c_dp_siso_rdy);
-  SIGNAL ref_sosi_arr          : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
-  SIGNAL in_sosi_arr           : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0) := (OTHERS => c_dp_sosi_rst);
+  SIGNAL ref_sosi_arr          : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);                               -- generated stimuli
+  SIGNAL rx_sosi_arr           : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0) := (OTHERS => c_dp_sosi_rst);  -- received stimuli
+  SIGNAL in_sosi_arr           : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0) := (OTHERS => c_dp_sosi_rst);  -- input stimuli
 
   SIGNAL in_sync_arr           : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0);
   SIGNAL in_sop_arr            : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0);
@@ -120,12 +206,13 @@ ARCHITECTURE tb OF tb_dp_bsn_align_v2 IS
   SIGNAL mm_sosi               : t_dp_sosi;   -- streaming information that signals that an output block can be read
   SIGNAL mm_done_arr           : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0);
   SIGNAL mm_done               : STD_LOGIC;
-  SIGNAL dut_sosi_arr          : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
-  SIGNAL tb_sosi_arr           : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
+  SIGNAL mm_sosi_arr           : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
+  SIGNAL dut_in_sosi_2arr      : t_dut_sosi_2arr(0 TO c_nof_aligners_max-1);
+  SIGNAL dut_out_sosi_2arr     : t_dut_sosi_2arr(0 TO c_nof_aligners_max-1);
+  SIGNAL dut_sosi_arr          : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);  -- last BSN aligner output
   SIGNAL r                     : t_reg;
   SIGNAL nxt_r                 : t_reg;
 
-  SIGNAL out_siso_arr          : t_dp_siso_arr(g_nof_streams-1 DOWNTO 0) := (OTHERS => c_dp_siso_rdy);
   SIGNAL out_sosi_arr          : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
   SIGNAL out_sosi              : t_dp_sosi;
 
@@ -136,11 +223,11 @@ ARCHITECTURE tb OF tb_dp_bsn_align_v2 IS
   SIGNAL out_data_arr          : t_data_arr;
   SIGNAL hold_data_arr         : t_data_arr;
   SIGNAL out_bsn_arr           : t_bsn_arr;
+  SIGNAL out_bsn               : INTEGER;
   SIGNAL out_channel_arr       : t_channel_arr;
   SIGNAL out_err_arr           : t_err_arr;
 
   SIGNAL tb_state              : t_tb_state;
-  SIGNAL tb_bsn                : INTEGER;
   SIGNAL restart_cnt_arr       : t_nat_integer_arr(g_nof_streams-1 DOWNTO 0) := (OTHERS => -1);
   SIGNAL restart_cnt           : INTEGER := 0;
   SIGNAL ref_sosi_arr_dly      : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0) := (OTHERS => c_dp_sosi_rst);
@@ -149,18 +236,27 @@ ARCHITECTURE tb OF tb_dp_bsn_align_v2 IS
   SIGNAL verify_done_arr       : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0) := (OTHERS => '0');
   SIGNAL verify_sosi_en_arr    : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0) := (OTHERS => '0');
 
-  SIGNAL hold_out_sop_arr      : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0) := (OTHERS => '0');
-  SIGNAL expected_out_bsn_arr  : t_bsn_arr;
-  SIGNAL expected_out_data_arr : t_data_arr;
+  SIGNAL hold_out_sop_arr         : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0) := (OTHERS => '0');
+  SIGNAL expected_out_bsn_arr     : t_bsn_arr;
+  SIGNAL expected_out_data_arr    : t_data_arr;
+  SIGNAL expected_out_channel_arr : t_channel_arr;
 
-  -- Return input delay as function of inputs stream index I
-  FUNCTION func_input_delay(I : NATURAL) RETURN NATURAL IS
-  BEGIN
-    RETURN g_tb_diff_delay_max * I / (g_nof_streams - 1);
-  END;
+  -- Debug signals to view in Wave window
+  SIGNAL dbg_func_delay_max             : NATURAL := func_input_delay(g_nof_streams - 1);
 
-  SIGNAL dbg_tb_diff_delay_max : NATURAL := g_tb_diff_delay_max;
-  SIGNAL dbg_func_delay_max : NATURAL := func_input_delay(g_nof_streams - 1);
+  SIGNAL dbg_c_align_latency_nof_blocks : NATURAL := c_align_latency_nof_blocks;
+  SIGNAL dbg_c_align_latency_nof_valid  : NATURAL := c_align_latency_nof_valid;
+  SIGNAL dbg_c_align_latency_nof_clk    : NATURAL := c_align_latency_nof_clk;
+  SIGNAL dbg_c_total_latency            : NATURAL := c_total_latency;
+  SIGNAL dbg_c_verify_nof_blocks        : NATURAL := c_verify_nof_blocks;
+
+  -- Debug signals to view that verification conditions actually occur
+  SIGNAL dbg_verify_sosi_control_arr    : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0) := (OTHERS => '0');   -- '1' when sosi control is verified
+  SIGNAL dbg_verify_passed_on_data_arr  : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0) := (OTHERS => '0');   -- '1' when passed on data is verified
+  SIGNAL dbg_verify_replaced_data_arr   : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0) := (OTHERS => '0');   -- '1' when replaced data is verified
+  SIGNAL dbg_verify_bsn_arr             : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0) := (OTHERS => '0');   -- '1' when bsn in all streams is verified
+  SIGNAL dbg_verify_no_lost_flag_arr    : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0) := (OTHERS => '0');   -- '1' when lost data flag = 0 is verified
+  SIGNAL dbg_verify_lost_flag_arr       : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0) := (OTHERS => '0');   -- '1' when lost data flag = 1 is verified
 
 BEGIN
 
@@ -172,7 +268,7 @@ BEGIN
   ------------------------------------------------------------------------------
   
   -- Generate data path input data
-  gen_input : FOR I IN g_nof_streams-1 DOWNTO 0 GENERATE
+  gen_stimuli : FOR I IN g_nof_streams-1 DOWNTO 0 GENERATE
     p_stimuli : PROCESS
       VARIABLE v_sync      : STD_LOGIC := '0';
       VARIABLE v_bsn       : NATURAL;
@@ -189,24 +285,39 @@ BEGIN
       -- Begin of stimuli
       FOR S IN 0 TO g_tb_nof_restart-1 LOOP
         v_bsn := c_bsn_init;
-        IF S = 2 THEN
-          stream_en_arr(1) <= '0';
-        END IF;
         FOR R IN 0 TO g_tb_nof_blocks-1 LOOP
           v_sync := sel_a_b(v_bsn MOD c_sync_period = c_sync_offset, '1', '0');
-          proc_dp_gen_block_data(c_rl, TRUE, c_data_w, c_data_w, v_data, 0, 0, g_block_size, v_channel, v_err, v_sync, TO_UVEC(v_bsn, c_bsn_w), clk, stream_en_arr(I), ref_siso_arr(I), ref_sosi_arr(I));
+          proc_dp_gen_block_data(c_rl, TRUE, c_data_w, c_data_w, v_data, 0, 0, g_block_size, v_channel, v_err, v_sync, TO_UVEC(v_bsn, c_bsn_w), clk, sl1, ref_siso_arr(I), ref_sosi_arr(I));
           v_bsn  := v_bsn + 1;
           v_data := v_data + g_block_size;
           proc_common_wait_some_cycles(clk, c_gap_size);  -- create gap between frames
         END LOOP;
-        -- no gap between restarts, to ease verification by maintaining fixed latency of out_sosi_arr_exp
+        -- no gap between restarts, to ease verification by maintaining fixed
+        -- latency of out_sosi_arr_exp
         restart_cnt_arr(I) <= restart_cnt_arr(I) + 1;
       END LOOP;
 
-      -- End of stimuli, g_bsn_latency_max blocks remain in DUT buffer
+      -- End of stimuli
+      -- . default g_bsn_latency_max blocks remain in DUT buffer
       expected_out_bsn_arr(I) <= TO_UVEC(v_bsn-1 - c_align_latency_nof_blocks, c_bsn_w);
       expected_out_data_arr(I) <= TO_UVEC(v_data-1 - c_align_latency_nof_valid, c_data_w);
-      
+      -- . default no data is lost, so all channel(0) lost data flags are 0
+      expected_out_channel_arr(I) <= TO_DP_CHANNEL(0);
+
+      -- Account for disturbed remote input streams
+      IF I > 0 THEN
+        IF I = g_disable_stream_id THEN
+          -- Expected stream disable replacement data value
+          expected_out_data_arr(I) <= TO_UVEC(g_data_replacement_value, c_data_w);
+        END IF;
+        IF I = g_lost_stream_id THEN
+          -- Expected stream lost replacement data value and expected lost
+          -- flag channel(0) value
+          expected_out_data_arr(I) <= TO_UVEC(g_data_replacement_value, c_data_w);
+          expected_out_channel_arr(I) <= TO_DP_CHANNEL(1);
+        END IF;
+      END IF;
+
       proc_common_wait_some_cycles(clk, 100);
       verify_done_arr(I) <= '1';
       proc_common_wait_some_cycles(clk, 1);
@@ -217,6 +328,39 @@ BEGIN
     END PROCESS;
   END GENERATE;
 
+  tb_end <= vector_and(tb_end_arr);
+
+  -- Model misalignment latency between the input streams.
+  -- . it is sufficient to only model misalignment for the first DUT in case
+  --   c_nof_aligners_max > 1.
+  gen_rx_sosi_arr : FOR I IN g_nof_streams-1 DOWNTO 0 GENERATE
+    rx_sosi_arr(I) <= TRANSPORT ref_sosi_arr(I) AFTER func_input_delay(I) * clk_period;
+  END GENERATE;
+
+  -- Model enable/disable remote input stream
+  stream_en_arr(g_disable_stream_id) <= '0' WHEN g_disable_stream_id > 0;
+
+  -- Model lost remote input stream
+  stream_lost_arr(g_lost_stream_id) <= '1' WHEN g_lost_stream_id > 0;
+
+  -- Model lost block on one remote input stream c_lost_bsn_stream_id
+  in_bsn_lost_arr(c_lost_bsn_stream_id) <= '1' WHEN TO_UINT(rx_sosi_arr(c_lost_bsn_stream_id).bsn) = g_lost_bsn_id ELSE '0';
+  in_bsn_lost <= in_bsn_lost_arr(c_lost_bsn_stream_id);
+
+  p_in_sosi_arr : PROCESS(rx_sosi_arr, stream_lost_arr, in_bsn_lost_arr)
+  BEGIN
+    in_sosi_arr <= rx_sosi_arr;
+    -- Model entirely lost remote input stream
+    IF stream_lost_arr(g_lost_stream_id) = '1' THEN
+      in_sosi_arr(g_lost_stream_id) <= RESET_DP_SOSI_CTRL(rx_sosi_arr(g_lost_stream_id));
+    END IF;
+    -- Model single lost block in a stream (stream c_lost_bsn_stream_id = 1)
+    IF in_bsn_lost_arr(c_lost_bsn_stream_id) = '1' THEN
+      in_sosi_arr(c_lost_bsn_stream_id) <= RESET_DP_SOSI_CTRL(rx_sosi_arr(c_lost_bsn_stream_id));
+    END IF;
+  END PROCESS;
+
+
   -- Use tb_state to view tb progress in Wave window
   restart_cnt <= restart_cnt_arr(0);
 
@@ -228,14 +372,6 @@ BEGIN
     IF restart_cnt > 1 THEN tb_state <= s_restart; END IF;
   END PROCESS;
 
-  -- Create latency misalignment between the input streams
-  gen_in_sosi_arr : FOR I IN g_nof_streams-1 DOWNTO 0 GENERATE
-    in_sosi_arr(I) <= TRANSPORT ref_sosi_arr(I) AFTER func_input_delay(I) * clk_period;
-  END GENERATE;
-
-
-  tb_end <= vector_and(tb_end_arr);
-  
   mon_sosi : FOR I IN g_nof_streams-1 DOWNTO 0 GENERATE
     -- Ease in_sosi_arr monitoring
     in_sync_arr(I)    <= in_sosi_arr(I).sync;
@@ -260,14 +396,14 @@ BEGIN
 
   out_sosi <= out_sosi_arr(0);  -- take out_sosi control and info from out_sosi_arr(0)
 
+  out_bsn <= TO_UINT(out_sosi.bsn);  -- = out_bsn_arr().bsn = out_sosi_arr(I).bsn
+
   ------------------------------------------------------------------------------
   -- DATA VERIFICATION, use multiple ways to increase coverage
-  -- a) Use proc_dp_verify_*() to verify output compared to prev output
-  -- b) Use delayed in_sosi_arr as expected out_sosi_arr
+  -- a) Use proc_dp_verify_*() to verify output sosi format
+  -- b) Use delayed in_sosi_arr as expected out_sosi_arr to verify output sosi
   ------------------------------------------------------------------------------
 
-  tb_bsn <= TO_UINT(out_sosi.bsn);
-
   ref_sosi_arr_dly <= TRANSPORT ref_sosi_arr AFTER c_total_latency * clk_period;
   out_sosi_arr_exp <= ref_sosi_arr_dly WHEN rising_edge(clk);
   out_sosi_exp <= out_sosi_arr_exp(0);  -- take out_sosi_exp control and info from out_sosi_arr_exp(0)
@@ -280,16 +416,16 @@ BEGIN
 
     -- . Verify that the stimuli have been applied at all
     hold_data_arr(I) <= out_data_arr(I) WHEN out_val_arr(I) = '1';  -- hold last valid data
-
     proc_dp_verify_value("out_data_arr", e_equal, clk, verify_done_arr(I), expected_out_data_arr(I), hold_data_arr(I));
     proc_dp_verify_value("out_bsn_arr", e_equal, clk, verify_done_arr(I), expected_out_bsn_arr(I), out_bsn_arr(I));
+    proc_dp_verify_value("out_channel_arr", e_equal, clk, verify_done_arr(I), expected_out_channel_arr(I), out_channel_arr(I));
   END GENERATE;
   
   -- . Use delayed in_sosi_arr as expected out_sosi_arr, this is possible
   --   because the DUT has no flow control and has a fixed latency.
   p_verify_sosi_en_arr : PROCESS(out_sosi_exp)
   BEGIN
-    IF g_tb_diff_delay_max <= c_align_latency_nof_clk THEN
+    IF c_diff_delay <= c_align_latency_nof_clk THEN
       verify_sosi_en_arr <= (OTHERS => '1');
       IF TO_UINT(out_sosi_exp.bsn) - c_bsn_init >= c_verify_nof_blocks THEN
         verify_sosi_en_arr <= (OTHERS => '0');
@@ -297,23 +433,57 @@ BEGIN
     END IF;
   END PROCESS;
 
+  exp_bsn_lost_arr(c_lost_bsn_stream_id) <= '1' WHEN TO_UINT(out_sosi_arr_exp(c_lost_bsn_stream_id).bsn) = g_lost_bsn_id ELSE '0';
+
   gen_verify_streams : FOR I IN g_nof_streams-1 DOWNTO 0 GENERATE
-    p_verify_sosi : PROCESS(clk)
+    p_verify_stream : PROCESS(clk)
     BEGIN
       IF rising_edge(clk) THEN
+        dbg_verify_sosi_control_arr(I) <= '0';
+        dbg_verify_passed_on_data_arr(I) <= '0';
+        dbg_verify_replaced_data_arr(I) <= '0';
+        dbg_verify_bsn_arr(I) <= '0';
+        dbg_verify_no_lost_flag_arr(I) <= '0';
+        dbg_verify_lost_flag_arr(I) <= '0';
         IF verify_sosi_en_arr(I) = '1' AND out_sosi_arr_exp(I).valid = '1' THEN
+           -- Verify sosi control fields
+           dbg_verify_sosi_control_arr(I) <= '1';
            ASSERT out_sosi_arr(I).sync = out_sosi_arr_exp(I).sync REPORT "Wrong sync for output " & int_to_str(I) SEVERITY ERROR;
            ASSERT out_sosi_arr(I).sop = out_sosi_arr_exp(I).sop REPORT "Wrong sop for output " & int_to_str(I) SEVERITY ERROR;
            ASSERT out_sosi_arr(I).eop = out_sosi_arr_exp(I).eop REPORT "Wrong eop for output " & int_to_str(I) SEVERITY ERROR;
            ASSERT out_sosi_arr(I).valid = out_sosi_arr_exp(I).valid REPORT "Wrong valid for output " & int_to_str(I) SEVERITY ERROR;
-           IF stream_en_arr(I) = '1' THEN
-             ASSERT out_sosi_arr(I).data  = out_sosi_arr_exp(I).data REPORT "Wrong data for output " & int_to_str(I) & " : "
-                                                                            & int_to_str(TO_UINT(out_sosi_arr(I).data)) & " /= "
-                                                                            & int_to_str(TO_UINT(out_sosi_arr_exp(I).data)) SEVERITY ERROR;
+
+           -- Verify data field
+           IF stream_en_arr(I) = '1' AND stream_lost_arr(I) = '0' AND exp_bsn_lost_arr(I) = '0' THEN
+             -- verify passed on data
+             dbg_verify_passed_on_data_arr(I) <= '1';
+             ASSERT out_sosi_arr(I).data  = out_sosi_arr_exp(I).data REPORT "Wrong data for output stream " & int_to_str(I) & " : "
+                                                                       & int_to_str(TO_UINT(out_sosi_arr(I).data)) & " /= "
+                                                                       & int_to_str(TO_UINT(out_sosi_arr_exp(I).data)) SEVERITY ERROR;
            ELSE
-             ASSERT TO_UINT(out_sosi_arr(I).data) = g_replacement_value REPORT "Wrong data for output " & int_to_str(I) & " : "
-                                                                               & int_to_str(TO_UINT(out_sosi_arr(I).data)) & " /= "
-                                                                               & int_to_str(g_replacement_value) SEVERITY ERROR;
+             -- verify lost data stream at g_disable_stream_id or g_lost_stream_id or g_lost_bsn_id
+             dbg_verify_replaced_data_arr(I) <= '1';
+             ASSERT TO_UINT(out_sosi_arr(I).data) = g_data_replacement_value REPORT "Wrong replacement data for output stream " & int_to_str(I) & " : "
+                                                                       & int_to_str(TO_UINT(out_sosi_arr(I).data)) & " /= "
+                                                                       & int_to_str(g_data_replacement_value) SEVERITY ERROR;
+           END IF;
+
+           -- Verify sop info fields
+           IF out_sosi_arr_exp(I).sop = '1' THEN
+             -- bsn field
+             dbg_verify_bsn_arr(I) <= '1';
+             ASSERT out_sosi_arr(I).bsn = out_sosi_arr_exp(I).bsn REPORT "Wrong bsn for output " & int_to_str(I) SEVERITY ERROR;
+
+             -- channel field with lost flag bit 0
+             IF stream_lost_arr(I) = '0' AND exp_bsn_lost_arr(I) = '0' THEN
+               -- verify no lost stream
+               dbg_verify_no_lost_flag_arr(I) <= '1';
+               ASSERT out_sosi_arr(I).channel = TO_DP_CHANNEL(0) REPORT "Wrong lost flag bit in channel /= 0 for output " & int_to_str(I) SEVERITY ERROR;
+             ELSE
+               -- verify lost stream g_lost_stream_id or lost block g_lost_bsn_id
+               dbg_verify_lost_flag_arr(I) <= '1';
+               ASSERT out_sosi_arr(I).channel = TO_DP_CHANNEL(1) REPORT "Wrong lost flag bit channel /= 1 for output " & int_to_str(I) SEVERITY ERROR;
+             END IF;
            END IF;
         END IF;
       END IF;
@@ -323,37 +493,102 @@ BEGIN
   ------------------------------------------------------------------------------
   -- DUT 
   ------------------------------------------------------------------------------
-  
+
+  -- Connect all inputs to first DUT
+  dut_in_sosi_2arr(0) <= in_sosi_arr;
+
   u_bsn_align : ENTITY work.dp_bsn_align_v2
   GENERIC MAP (
     g_nof_streams                => g_nof_streams,
     g_bsn_latency_max            => g_bsn_latency_max,
-    g_nof_aligners_max           => g_nof_aligners_max,
+    g_nof_aligners_max           => c_nof_aligners_max,
     g_block_size                 => g_block_size,
     g_bsn_w                      => g_bsn_w,
     g_data_w                     => g_data_w,
-    g_replacement_value          => g_replacement_value,
-    g_use_mm_output              => g_use_mm_output,    -- output via MM or via streaming DP
-    g_pipeline_input             => g_pipeline_input,   -- >= 0, choose 0 for wires, choose 1 to ease timing closure
-    g_rd_latency                 => g_rd_latency        -- 1 or 2, choose 2 to ease timing closure
+    g_data_replacement_value     => g_data_replacement_value,
+    g_use_mm_output              => g_use_mm_output,
+    g_pipeline_input             => g_pipeline_input,
+    g_pipeline_output            => g_pipeline_output,
+    g_rd_latency                 => g_rd_latency
   )
   PORT MAP (
     dp_rst         => rst,
     dp_clk         => clk,
     -- Control
-    node_index     => node_index,
+    node_index     => node_index_arr(0),
     stream_en_arr  => stream_en_arr,
     -- Streaming input
-    in_sosi_arr    => in_sosi_arr,
+    in_sosi_arr    => dut_in_sosi_2arr(0),
     -- Output via local MM interface in dp_clk domain
     mm_copi        => mm_copi,
     mm_cipo_arr    => mm_cipo_arr,
     mm_sosi        => mm_sosi,
 
     -- Output via streaming DP interface
-    out_sosi_arr   => dut_sosi_arr
+    out_sosi_arr   => dut_out_sosi_2arr(0)
   );
 
+  -- Simulate series of DUT, when g_use_mm_output = FALSE and
+  -- g_nof_aligners_max > 1. Use same local in_sosi_arr(0) input for all BSN
+  -- aligners, because all DUT have same local reference. Connect the remote
+  -- in_sosi_arr(> 0) inputs via the BSN aligners:
+  --
+  --   remote
+  --   in_sosi_arr(> 0) ------> DUT --------> DUT --------> DUT --> dut_sosi_arr()(> 0)
+  --                        /->  0  --X   /->  1  --X   /->  2  --> dut_sosi_arr()(= 0)
+  --   local               /             /             / .      .
+  --   in_sosi_arr(= 0) --/-------------/-------------/  .      .
+  --                         .      .      .      .      .      .
+  --                         .      .      .      .      .      .
+  --                         v      .      .      .      .      .
+  --        dut_in_sosi_2arr(0)     v      .      .      .      .
+  --              dut_out_sosi_2arr(0)     v      .      .      .
+  --                      dut_in_sosi_2arr(1)     v      .      .
+  --                            dut_out_sosi_2arr(1)     v      .
+  --                                    dut_in_sosi_2arr(2)     v
+  --                                          dut_out_sosi_2arr(2)
+  --
+  gen_bsn_align_chain : FOR I IN 1 TO c_nof_aligners_max-1 GENERATE
+    u_bsn_align : ENTITY work.dp_bsn_align_v2
+    GENERIC MAP (
+      g_nof_streams                => g_nof_streams,
+      g_bsn_latency_max            => g_bsn_latency_max,
+      g_nof_aligners_max           => c_nof_aligners_max,
+      g_block_size                 => g_block_size,
+      g_bsn_w                      => g_bsn_w,
+      g_data_w                     => g_data_w,
+      g_data_replacement_value     => g_data_replacement_value,
+      g_use_mm_output              => g_use_mm_output,
+      g_pipeline_input             => g_pipeline_input,
+      g_pipeline_output            => g_pipeline_output,
+      g_rd_latency                 => g_rd_latency
+    )
+    PORT MAP (
+      dp_rst         => rst,
+      dp_clk         => clk,
+      -- Control
+      node_index     => node_index_arr(I),
+      stream_en_arr  => stream_en_arr,
+      -- Streaming input
+      in_sosi_arr    => dut_in_sosi_2arr(I),
+      -- Output via streaming DP interface
+      out_sosi_arr   => dut_out_sosi_2arr(I)
+    );
+
+    -- Connect remote and local between DUTs in the chain of DUTs
+    p_connect : PROCESS(dut_out_sosi_2arr, in_sosi_arr)
+    BEGIN
+      -- connect the remote inputs, by connecting all inputs from previous DUT
+      dut_in_sosi_2arr(I) <= dut_out_sosi_2arr(I-1);
+
+      -- connect the local input, same for all DUTs
+      dut_in_sosi_2arr(I)(0) <= in_sosi_arr(0);
+    END PROCESS;
+  END GENERATE;
+
+  -- Connect output from last DUT, so only verify last output
+  dut_sosi_arr <= dut_out_sosi_2arr(c_nof_aligners_max-1);
+
   ------------------------------------------------------------------------------
   -- MM to streaming DP
   ------------------------------------------------------------------------------
@@ -383,12 +618,12 @@ BEGIN
         mm_done       => mm_done_arr(I),
         mm_mosi       => mm_copi_arr(I),
         mm_miso       => mm_cipo_arr(I),
-        out_sosi      => tb_sosi_arr(I),
+        out_sosi      => mm_sosi_arr(I),
         out_siso      => c_dp_siso_rdy
       );
     END GENERATE;
 
-    p_comb : PROCESS(r, mm_sosi, tb_sosi_arr)
+    p_comb : PROCESS(r, mm_sosi, mm_sosi_arr)
       VARIABLE v : t_reg;
     BEGIN
       v := r;
@@ -400,8 +635,8 @@ BEGIN
       END IF;
 
       -- apply mm_sosi.sync, bsn at sop to all streams in out_sosi_arr
-      v.out_sosi_arr := tb_sosi_arr;
-      IF tb_sosi_arr(0).sop = '1' THEN
+      v.out_sosi_arr := mm_sosi_arr;
+      IF mm_sosi_arr(0).sop = '1' THEN
         v.out_sosi_arr := func_dp_stream_arr_set(v.out_sosi_arr, r.sync, "SYNC");
         v.out_sosi_arr := func_dp_stream_arr_set(v.out_sosi_arr, r.bsn, "BSN");
       ELSE
diff --git a/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd b/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd
index 2b624d5d125f2c1f117c8b18c2fd3e7a2b867f55..0b5a6f61f4899f666adf4930ca8728f089805daa 100644
--- a/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd
+++ b/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd
@@ -20,6 +20,15 @@
 -- Purpose: Verify MM part of mmp_dp_bsn_align_v2
 -- Description:
 --    The functional part is already verified by tb_tb_dp_bsn_align_v2.vhd.
+--    Tb features:
+--    . verify expected end values for output data and bsn in gen_verify_ctrl
+--      to ensure that test has ran
+--    . verify MM access to input stream enable in p_mm_verify_bsn_align
+--    . verify MM access to input BSN monitors by verifiying expected input
+--      latencies for different input delays
+--    . verify MM access to output BSN monitor by verifiying expected output
+--      latencies.
+--
 -- Usage:
 -- > as 5
 -- > run -all
@@ -45,48 +54,138 @@ ARCHITECTURE tb OF tb_mmp_dp_bsn_align_v2 IS
   CONSTANT c_dp_clk_period              : TIME := 10 ns;
   CONSTANT c_cross_clock_domain_latency : NATURAL := 20;
 
-  CONSTANT c_report_note                : BOOLEAN := FALSE;  -- Use TRUE for tb debugging, else FALSE to keep Transcript window more empty
-
-  CONSTANT c_nof_input_sync             : NATURAL := 10;
-  CONSTANT c_nof_block_per_sync         : NATURAL := 32;
-  CONSTANT c_block_size                 : NATURAL := 10;
-  CONSTANT c_input_gap_size             : NATURAL := 3;
-  CONSTANT c_sim_nof_blocks             : NATURAL := c_nof_block_per_sync * c_nof_input_sync;
+  CONSTANT c_rl                         : NATURAL := 1;
+  CONSTANT c_tb_nof_restart             : NATURAL := 2;    -- number of times to restart the input stimuli
+  CONSTANT c_tb_nof_blocks              : NATURAL := 50;   -- number of input blocks per restart
 
-  CONSTANT c_nof_streams                : NATURAL := 2;
-  CONSTANT c_bsn_latency_max            : POSITIVE := 2;
-  CONSTANT c_nof_aligners_max           : NATURAL := 1;
+  -- Fixed dut generics
+  -- . for dp_bsn_align_v2
+  CONSTANT c_nof_streams                : NATURAL := 3;
+  CONSTANT c_bsn_latency_max            : NATURAL := 1;
+  CONSTANT c_nof_aligners_max           : POSITIVE := 1;   -- fixed in this tb
+  CONSTANT c_block_size                 : NATURAL := 11;
+  CONSTANT c_block_period               : NATURAL := 11;
   CONSTANT c_bsn_w                      : NATURAL := c_dp_stream_bsn_w;
   CONSTANT c_data_w                     : NATURAL := 16;
-  CONSTANT c_replacement_value          : INTEGER := 0;
+  CONSTANT c_data_replacement_value     : INTEGER := 17;
+  CONSTANT c_use_mm_output              : BOOLEAN := FALSE;
+  CONSTANT c_pipeline_input             : NATURAL := 1;
+  CONSTANT c_pipeline_output            : NATURAL := 1;
+  CONSTANT c_rd_latency                 : NATURAL := 2;
+  -- . for mms_dp_bsn_monitor_v2
   CONSTANT c_nof_clk_per_sync           : NATURAL := 200*10**6;
-  CONSTANT c_nof_input_bsn_monitors     : NATURAL := 0;
-  CONSTANT c_use_bsn_output_monitor     : BOOLEAN := FALSE;
+  CONSTANT c_nof_input_bsn_monitors     : NATURAL := c_nof_streams;
+  CONSTANT c_use_bsn_output_monitor     : BOOLEAN := TRUE;
+
+  CONSTANT c_reg_bsn_monitor_adr_w      : NATURAL := ceil_log2(7);
+  CONSTANT c_reg_bsn_monitor_span       : NATURAL := 2**c_reg_bsn_monitor_adr_w;
+
+  -- maximum nof clk delay between any inputs, <= c_align_latency_nof_clk
+  -- . the -1 is due to some acceptable pipeline detail related to dp_block_from_mm
+  CONSTANT c_diff_delay_max             : NATURAL := c_bsn_latency_max * c_block_period - sel_a_b(c_rd_latency > 1, 0, 1);
+  CONSTANT c_diff_delay                 : NATURAL := c_diff_delay_max;
+
+  -- Return input delay as function of inputs stream index I
+  FUNCTION func_input_delay(I : NATURAL) RETURN NATURAL IS
+  BEGIN
+    RETURN c_diff_delay * I / (c_nof_streams - 1);
+  END;
+
+  -- Input stream settings
+  CONSTANT c_data_init                  : INTEGER := 0;
+  CONSTANT c_bsn_init                   : NATURAL := 3;
+  CONSTANT c_channel_init               : INTEGER := 0;
+  CONSTANT c_err_init                   : NATURAL := 247;
+  CONSTANT c_sync_period                : NATURAL := 7;
+  CONSTANT c_sync_offset                : NATURAL := 2;
+  CONSTANT c_gap_size                   : NATURAL := c_block_period - c_block_size;
 
+  -- DUT latency
+  CONSTANT c_mm_to_dp_latency           : NATURAL := 1;
+  CONSTANT c_dut_latency                : NATURAL := c_pipeline_input + c_rd_latency + c_mm_to_dp_latency + c_pipeline_output;
+
+  CONSTANT c_align_latency_nof_blocks   : NATURAL := c_bsn_latency_max * c_nof_aligners_max;  -- in number blocks
+  CONSTANT c_align_latency_nof_valid    : NATURAL := c_bsn_latency_max * c_nof_aligners_max * c_block_size;  -- in number of data samples
+  CONSTANT c_align_latency_nof_clk      : NATURAL := c_bsn_latency_max * c_nof_aligners_max * c_block_period;  -- in number clk cycles
+
+  -- Total DUT chain latency
+  CONSTANT c_total_latency              : NATURAL := c_dut_latency + c_align_latency_nof_clk;
+  CONSTANT c_verify_nof_blocks          : NATURAL := c_tb_nof_blocks - c_align_latency_nof_blocks;  -- skip last blocks that are still in the DUT buffer
+
+  -- Signal monitoring and verification
+  TYPE t_data_arr    IS ARRAY (c_nof_streams-1 DOWNTO 0) OF STD_LOGIC_VECTOR(c_data_w-1 DOWNTO 0);
+  TYPE t_bsn_arr     IS ARRAY (c_nof_streams-1 DOWNTO 0) OF STD_LOGIC_VECTOR(c_bsn_w-1 DOWNTO 0);
+  TYPE t_err_arr     IS ARRAY (c_nof_streams-1 DOWNTO 0) OF STD_LOGIC_VECTOR(c_dp_stream_error_w-1 DOWNTO 0);
+  TYPE t_channel_arr IS ARRAY (c_nof_streams-1 DOWNTO 0) OF STD_LOGIC_VECTOR(c_dp_stream_channel_w-1 DOWNTO 0);
+
+  SIGNAL sl1                      : STD_LOGIC := '1';
+  SIGNAL mm_end                   : STD_LOGIC := '0';
+  SIGNAL dp_end_arr               : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0) := (OTHERS => '0');
+  SIGNAL dp_end                   : STD_LOGIC := '0';
   SIGNAL tb_end                   : STD_LOGIC := '0';
-  SIGNAL stimuli_end              : STD_LOGIC := '0';
+  SIGNAL streams_enabled          : STD_LOGIC := '0';
+  SIGNAL restart_cnt_arr          : t_nat_integer_arr(c_nof_streams-1 DOWNTO 0) := (OTHERS => -1);
+  SIGNAL restart_cnt              : INTEGER := 0;
 
   -- MM clock domain
   SIGNAL mm_clk                   : STD_LOGIC := '1';
   SIGNAL mm_rst                   : STD_LOGIC := '1';
 
-  SIGNAL reg_copi                 : t_mem_copi := c_mem_copi_rst;
-  SIGNAL reg_cipo                 : t_mem_cipo;
+  SIGNAL reg_bsn_align_copi       : t_mem_copi := c_mem_copi_rst;
+  SIGNAL reg_bsn_align_cipo       : t_mem_cipo;
   SIGNAL reg_input_monitor_copi   : t_mem_copi := c_mem_copi_rst;
   SIGNAL reg_input_monitor_cipo   : t_mem_cipo;
   SIGNAL reg_output_monitor_copi  : t_mem_copi := c_mem_copi_rst;
   SIGNAL reg_output_monitor_cipo  : t_mem_cipo;
 
+  SIGNAL mon_latency_input_arr    : t_nat_natural_arr(c_nof_streams-1 DOWNTO 0);
+  SIGNAL mon_latency_output       : NATURAL;
+
   -- DP clock domain
   SIGNAL dp_clk                   : STD_LOGIC := '1';
   SIGNAL dp_rst                   : STD_LOGIC := '1';
 
-  SIGNAL node_index               : NATURAL := 0;  -- only used when g_bsn_latency_use_node_index is TRUE
-  SIGNAL stimuli_sosi             : t_dp_sosi;
-  SIGNAL in_sosi_arr              : t_dp_sosi_arr(c_nof_streams-1 DOWNTO 0);
-  SIGNAL mm_copi                  : t_mem_copi;   -- read access to output block, all output streams share same mm_copi
-  SIGNAL mm_cipo_arr              : t_mem_cipo_arr(c_nof_streams-1 DOWNTO 0);
-  SIGNAL mm_sosi                  : t_dp_sosi;   -- streaming information that signals that an output block can be read
+  SIGNAL node_index               : NATURAL := 0;
+  SIGNAL ref_siso_arr             : t_dp_siso_arr(c_nof_streams-1 DOWNTO 0) := (OTHERS => c_dp_siso_rdy);
+  SIGNAL ref_sosi_arr             : t_dp_sosi_arr(c_nof_streams-1 DOWNTO 0);                               -- generated stimuli
+  SIGNAL in_sosi_arr              : t_dp_sosi_arr(c_nof_streams-1 DOWNTO 0) := (OTHERS => c_dp_sosi_rst);  -- input stimuli
+
+  SIGNAL in_sync_arr              : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0);
+  SIGNAL in_sop_arr               : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0);
+  SIGNAL in_eop_arr               : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0);
+  SIGNAL in_val_arr               : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0);
+  SIGNAL in_data_arr              : t_data_arr;
+  SIGNAL in_bsn_arr               : t_bsn_arr;
+  SIGNAL in_channel_arr           : t_channel_arr;
+  SIGNAL in_err_arr               : t_err_arr;
+
+  SIGNAL out_sosi_arr             : t_dp_sosi_arr(c_nof_streams-1 DOWNTO 0) := (OTHERS => c_dp_sosi_rst);  -- output
+  SIGNAL out_sosi                 : t_dp_sosi;
+  SIGNAL out_sync_arr             : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0);
+  SIGNAL out_sop_arr              : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0);
+  SIGNAL out_eop_arr              : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0);
+  SIGNAL out_val_arr              : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0);
+  SIGNAL out_data_arr             : t_data_arr;
+  SIGNAL hold_data_arr            : t_data_arr;
+  SIGNAL out_bsn_arr              : t_bsn_arr;
+  SIGNAL out_bsn                  : INTEGER;
+  SIGNAL out_channel_arr          : t_channel_arr;
+  SIGNAL out_err_arr              : t_err_arr;
+
+  SIGNAL verify_done_arr          : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0) := (OTHERS => '0');
+  SIGNAL verify_done              : STD_LOGIC;
+
+  SIGNAL hold_out_sop_arr         : STD_LOGIC_VECTOR(c_nof_streams-1 DOWNTO 0) := (OTHERS => '0');
+  SIGNAL expected_out_bsn_arr     : t_bsn_arr;
+  SIGNAL expected_out_data_arr    : t_data_arr;
+  SIGNAL expected_out_channel_arr : t_channel_arr;
+
+  -- Debug signals for view in Wave window
+  SIGNAL dbg_c_align_latency_nof_blocks : NATURAL := c_align_latency_nof_blocks;
+  SIGNAL dbg_c_align_latency_nof_valid  : NATURAL := c_align_latency_nof_valid;
+  SIGNAL dbg_c_align_latency_nof_clk    : NATURAL := c_align_latency_nof_clk;
+  SIGNAL dbg_c_total_latency            : NATURAL := c_total_latency;
+  SIGNAL dbg_c_verify_nof_blocks        : NATURAL := c_verify_nof_blocks;
 
 BEGIN
 
@@ -95,23 +194,70 @@ BEGIN
   dp_rst <= '1', '0' AFTER c_dp_clk_period*7;    
   mm_rst <= '1', '0' AFTER c_mm_clk_period*7;
   
+  tb_end <= mm_end AND dp_end;
+
+  ------------------------------------------------------------------------------
+  -- MM stimuli
+  ------------------------------------------------------------------------------
   ------------------------------------------------------------------------------
   -- MM stimuli and verification
   ------------------------------------------------------------------------------
 
-  p_stimuli_and_verify_mm : PROCESS
-    VARIABLE v_bsn : NATURAL;
-  BEGIN              
+  p_mm_verify_bsn_align : PROCESS
+  BEGIN
     proc_common_wait_until_low(dp_clk, mm_rst);
     proc_common_wait_until_low(dp_clk, dp_rst);
     proc_common_wait_some_cycles(mm_clk, 5);
 
+    -- Read stream enable bits, default '0' after power up
+    FOR I IN 0 TO c_nof_streams-1 LOOP
+      proc_mem_mm_bus_rd(I, mm_clk, reg_bsn_align_cipo, reg_bsn_align_copi);
+      proc_mem_mm_bus_rd_latency(1, mm_clk);
+      ASSERT reg_bsn_align_cipo.rddata(0) = '0' REPORT "Wrong stream disable for output " & int_to_str(I) SEVERITY ERROR;
+    END LOOP;
+
+    -- Write stream enable bits for stream_en_arr
+    FOR I IN 0 TO c_nof_streams-1 LOOP
+      proc_mem_mm_bus_wr(I, 1,  mm_clk, reg_bsn_align_cipo, reg_bsn_align_copi);
+    END LOOP;
+    proc_common_wait_some_cycles(mm_clk, c_cross_clock_domain_latency);
+    proc_common_wait_some_cycles(dp_clk, c_cross_clock_domain_latency);
+
+    -- Read stream enable bits, should now be '1'
+    FOR I IN 0 TO c_nof_streams-1 LOOP
+      proc_mem_mm_bus_rd(I, mm_clk, reg_bsn_align_cipo, reg_bsn_align_copi);
+      proc_mem_mm_bus_rd_latency(1, mm_clk);
+      ASSERT reg_bsn_align_cipo.rddata(0) = '1' REPORT "Wrong BSN align stream enable for output " & int_to_str(I) SEVERITY ERROR;
+    END LOOP;
 
-    ---------------------------------------------------------------------------
-    -- End of test
-    ---------------------------------------------------------------------------
-    proc_common_wait_until_high(dp_clk, stimuli_end);
-    tb_end <= '1';
+    -- End of MM test
+    streams_enabled <= '1';
+    WAIT;
+  END PROCESS;
+
+  p_mm_verify_bsn_monitors : PROCESS
+  BEGIN
+    proc_common_wait_until_high(mm_clk, verify_done);
+
+    -- Read input BSN monitors
+    FOR I IN 0 TO c_nof_streams-1 LOOP
+      proc_mem_mm_bus_rd(6 + I*c_reg_bsn_monitor_span, mm_clk, reg_input_monitor_cipo, reg_input_monitor_copi);
+      proc_mem_mm_bus_rd_latency(1, mm_clk);
+      mon_latency_input_arr(I) <= TO_UINT(reg_input_monitor_cipo.rddata);
+      proc_common_wait_some_cycles(mm_clk, 1);
+      ASSERT mon_latency_input_arr(I) = func_input_delay(I) REPORT "Wrong input BSN monitor latency for input " & int_to_str(I) SEVERITY ERROR;
+    END LOOP;
+
+    -- Read output BSN monitor
+    proc_mem_mm_bus_rd(6, mm_clk, reg_output_monitor_cipo, reg_output_monitor_copi);
+    proc_mem_mm_bus_rd_latency(1, mm_clk);
+    mon_latency_output <= TO_UINT(reg_output_monitor_cipo.rddata);
+
+    proc_common_wait_some_cycles(mm_clk, 1);
+    ASSERT mon_latency_output = c_total_latency REPORT "Wrong output BSN monitor latency" SEVERITY ERROR;
+
+    -- End of MM test
+    mm_end <= '1';
     WAIT;
   END PROCESS;
 
@@ -119,36 +265,113 @@ BEGIN
   -- Streaming stimuli
   ------------------------------------------------------------------------------
 
-  -- Generate data blocks with input sync
-  u_stimuli : ENTITY work.dp_stream_stimuli
-  GENERIC MAP (
-    g_sync_period  => c_nof_block_per_sync,
-    g_err_init     => 0,
-    g_err_incr     => 0,  -- do not increment, to not distract from viewing of BSN in Wave window
-    g_channel_init => 0,
-    g_channel_incr => 0,  -- do not increment, to not distract from viewing of BSN in Wave window
-    g_nof_repeat   => c_sim_nof_blocks,
-    g_pkt_len      => c_block_size,
-    g_pkt_gap      => c_input_gap_size
-  )
-  PORT MAP (
-    rst               => dp_rst,
-    clk               => dp_clk,
+  -- Generate data path input data (similar as in tb_mmp_dp_bsn_align_v2.vhd)
+  gen_input : FOR I IN c_nof_streams-1 DOWNTO 0 GENERATE
+    p_stimuli : PROCESS
+      VARIABLE v_sync      : STD_LOGIC := '0';
+      VARIABLE v_bsn       : NATURAL;
+      VARIABLE v_data      : NATURAL := c_data_init;
+      VARIABLE v_channel   : NATURAL := c_channel_init;
+      VARIABLE v_err       : NATURAL := c_err_init;
+    BEGIN
+      v_data := v_data + I;
+      ref_sosi_arr(I) <= c_dp_sosi_rst;
+      proc_common_wait_until_low(dp_clk, dp_rst);
+      proc_common_wait_until_high(dp_clk, streams_enabled);
+      proc_common_wait_some_cycles(dp_clk, 10);
+      restart_cnt_arr(I) <= restart_cnt_arr(I) + 1;
 
-    -- Generate stimuli
-    src_out           => stimuli_sosi,
+      -- Begin of stimuli
+      FOR S IN 0 TO c_tb_nof_restart-1 LOOP
+        v_bsn := c_bsn_init;
+        FOR R IN 0 TO c_tb_nof_blocks-1 LOOP
+          v_sync := sel_a_b(v_bsn MOD c_sync_period = c_sync_offset, '1', '0');
+          proc_dp_gen_block_data(c_rl, TRUE, c_data_w, c_data_w, v_data, 0, 0, c_block_size, v_channel, v_err, v_sync, TO_UVEC(v_bsn, c_bsn_w), dp_clk, sl1, ref_siso_arr(I), ref_sosi_arr(I));
+          v_bsn  := v_bsn + 1;
+          v_data := v_data + c_block_size;
+          proc_common_wait_some_cycles(dp_clk, c_gap_size);  -- create gap between frames
+        END LOOP;
+        -- Create gap between restarts
+        proc_common_wait_some_cycles(dp_clk, 100);
+        restart_cnt_arr(I) <= restart_cnt_arr(I) + 1;
+      END LOOP;
 
-    -- End of stimuli
-    tb_end            => stimuli_end
-  );
+      -- End of stimuli
+      -- . default c_bsn_latency_max blocks remain in DUT buffer
+      expected_out_bsn_arr(I) <= TO_UVEC(v_bsn-1 - c_align_latency_nof_blocks, c_bsn_w);
+      expected_out_data_arr(I) <= TO_UVEC(v_data-1 - c_align_latency_nof_valid, c_data_w);
+      -- . default no data is lost, so all channel(0) lost data flags are 0
+      expected_out_channel_arr(I) <= TO_DP_CHANNEL(0);
+
+      proc_common_wait_some_cycles(dp_clk, 100);
+      verify_done_arr(I) <= '1';
+      proc_common_wait_some_cycles(dp_clk, 1);
+      verify_done_arr(I) <= '0';
+
+      -- Simulate some more to easy recognizing verify_done in Wave window
+      proc_common_wait_some_cycles(dp_clk, 100);
+      dp_end_arr(I) <= '1';
+      WAIT;
+    END PROCESS;
+  END GENERATE;
+
+  verify_done <= verify_done_arr(0);
+  restart_cnt <= restart_cnt_arr(0);
+
+  dp_end <= vector_and(dp_end_arr);
+
+  -- Model misalignment latency between the input streams to have different
+  -- input BSN monitor latencies
+  gen_rx_sosi_arr : FOR I IN c_nof_streams-1 DOWNTO 0 GENERATE
+    in_sosi_arr(I) <= TRANSPORT ref_sosi_arr(I) AFTER func_input_delay(I) * c_dp_clk_period;
+  END GENERATE;
+
+  ------------------------------------------------------------------------------
+  -- Data verification
+  ------------------------------------------------------------------------------
+
+  mon_sosi : FOR I IN c_nof_streams-1 DOWNTO 0 GENERATE
+    -- Ease in_sosi_arr monitoring
+    in_sync_arr(I)    <= in_sosi_arr(I).sync;
+    in_sop_arr(I)     <= in_sosi_arr(I).sop;
+    in_eop_arr(I)     <= in_sosi_arr(I).eop;
+    in_val_arr(I)     <= in_sosi_arr(I).valid;
+    in_data_arr(I)    <= in_sosi_arr(I).data(c_data_w-1 DOWNTO 0);
+    in_bsn_arr(I)     <= in_sosi_arr(I).bsn(c_bsn_w-1 DOWNTO 0);
+    in_channel_arr(I) <= in_sosi_arr(I).channel;
+    in_err_arr(I)     <= in_sosi_arr(I).err;
+
+    -- Ease out_sosi_arr monitoring and verification
+    out_sync_arr(I)    <= out_sosi_arr(I).sync;
+    out_sop_arr(I)     <= out_sosi_arr(I).sop;
+    out_eop_arr(I)     <= out_sosi_arr(I).eop;
+    out_val_arr(I)     <= out_sosi_arr(I).valid;
+    out_data_arr(I)    <= out_sosi_arr(I).data(c_data_w-1 DOWNTO 0);
+    out_bsn_arr(I)     <= out_sosi_arr(I).bsn(c_bsn_w-1 DOWNTO 0);
+    out_channel_arr(I) <= out_sosi_arr(I).channel;
+    out_err_arr(I)     <= out_sosi_arr(I).err;
+  END GENERATE;
+
+  out_sosi <= out_sosi_arr(0);  -- take out_sosi control and info from out_sosi_arr(0)
+
+  out_bsn <= TO_UINT(out_sosi.bsn);  -- = out_bsn_arr().bsn = out_sosi_arr(I).bsn
+
+  gen_verify_ctrl : FOR I IN c_nof_streams-1 DOWNTO 0 GENERATE
+    -- . Verify that sop and eop come in pairs
+    proc_dp_verify_sop_and_eop(dp_clk, out_val_arr(I), out_sop_arr(I), out_eop_arr(I), hold_out_sop_arr(I));
 
-  in_sosi_arr <= (OTHERS => stimuli_sosi);
+    -- . Verify that the stimuli have been applied at all
+    hold_data_arr(I) <= out_data_arr(I) WHEN out_val_arr(I) = '1';  -- hold last valid data
+    proc_dp_verify_value("out_data_arr", e_equal, dp_clk, verify_done_arr(I), expected_out_data_arr(I), hold_data_arr(I));
+    proc_dp_verify_value("out_bsn_arr", e_equal, dp_clk, verify_done_arr(I), expected_out_bsn_arr(I), out_bsn_arr(I));
+    proc_dp_verify_value("out_channel_arr", e_equal, dp_clk, verify_done_arr(I), expected_out_channel_arr(I), out_channel_arr(I));
+  END GENERATE;
 
   ------------------------------------------------------------------------------
   -- DUT
   ------------------------------------------------------------------------------
 
-  u_bsn_align : ENTITY work.mmp_dp_bsn_align_v2
+  u_mmp_dp_bsn_align : ENTITY work.mmp_dp_bsn_align_v2
   GENERIC MAP (
     g_nof_streams                => c_nof_streams,
     g_bsn_latency_max            => c_bsn_latency_max,
@@ -156,7 +379,11 @@ BEGIN
     g_block_size                 => c_block_size,
     g_bsn_w                      => c_bsn_w,
     g_data_w                     => c_data_w,
-    g_replacement_value          => c_replacement_value,
+    g_data_replacement_value     => c_data_replacement_value,
+    g_use_mm_output              => c_use_mm_output,
+    g_pipeline_input             => c_pipeline_input,
+    g_pipeline_output            => c_pipeline_output,
+    g_rd_latency                 => c_rd_latency,
     g_nof_clk_per_sync           => c_nof_clk_per_sync,
     g_nof_input_bsn_monitors     => c_nof_input_bsn_monitors,
     g_use_bsn_output_monitor     => c_use_bsn_output_monitor
@@ -165,8 +392,8 @@ BEGIN
     mm_rst                  => mm_rst,
     mm_clk                  => mm_clk,
 
-    reg_copi                => reg_copi,
-    reg_cipo                => reg_cipo,
+    reg_bsn_align_copi      => reg_bsn_align_copi,
+    reg_bsn_align_cipo      => reg_bsn_align_cipo,
 
     reg_input_monitor_copi  => reg_input_monitor_copi,
     reg_input_monitor_cipo  => reg_input_monitor_cipo,
@@ -181,9 +408,11 @@ BEGIN
     -- Streaming input
     in_sosi_arr             => in_sosi_arr,
     -- Output via local MM in dp_clk domain
-    mm_copi                 => mm_copi,
-    mm_cipo_arr             => mm_cipo_arr,
-    mm_sosi                 => mm_sosi
+    --mm_sosi                 => mm_sosi,
+    --mm_copi                 => mm_copi,
+    --mm_cipo_arr             => mm_cipo_arr,
+    -- Output via streaming DP interface, when g_use_mm_output = TRUE.
+    out_sosi_arr            => out_sosi_arr
   );
 
 END tb;
diff --git a/libraries/base/dp/tb/vhdl/tb_tb_dp_bsn_align_v2.vhd b/libraries/base/dp/tb/vhdl/tb_tb_dp_bsn_align_v2.vhd
index f47d2acf82d36d526f01bd4df781fd3e733b094f..dfafc97e2fe1d57c9ac1f2b90e5816792ca92c9d 100644
--- a/libraries/base/dp/tb/vhdl/tb_tb_dp_bsn_align_v2.vhd
+++ b/libraries/base/dp/tb/vhdl/tb_tb_dp_bsn_align_v2.vhd
@@ -34,11 +34,10 @@ END tb_tb_dp_bsn_align_v2;
 
 ARCHITECTURE tb OF tb_tb_dp_bsn_align_v2 IS
 
-  CONSTANT c_bsn_latency_max      : POSITIVE := 1;
   CONSTANT c_block                : NATURAL := 11;
   CONSTANT c_period               : NATURAL := 20;
-  CONSTANT c_delay_max            : NATURAL := c_bsn_latency_max * c_period;
- 
+  CONSTANT c_nof_blk              : NATURAL := 30;
+
   SIGNAL tb_end : STD_LOGIC := '0';  -- declare tb_end to avoid 'No objects found' error on 'when -label tb_end'
 
 BEGIN
@@ -51,20 +50,35 @@ BEGIN
   -- g_block_period               : NATURAL := 20;     -- >= g_block_size, = g_block_size + c_gap_size
   -- g_bsn_w                      : NATURAL := c_dp_stream_bsn_w;  -- number of bits in sosi BSN
   -- g_data_w                     : NATURAL := 16;     -- number of bits in sosi data
-  -- c_replacement_value          : INTEGER := 0;      -- output sosi data replacement value for missing input blocks
+  -- g_data_replacement_value     : INTEGER := 0;      -- output sosi data replacement value for missing input blocks
+  -- g_disable_stream_id          : NATURAL := 0;      -- default 0 to enable all streams, > 0 selects stream that will be disabled
+  -- g_lost_stream_id             : NATURAL := 0;      -- default 0 to have all streams, > 0 selects stream that will be lost
+  -- g_lost_bsn_id                : NATURAL := 10;     -- for stream 1 the block with bsn = g_lost_bsn_id will be lost
   -- g_use_mm_output              : BOOLEAN := FALSE;  -- output via MM or via streaming DP
-  -- g_pipeline_input             : NATURAL := 1;      -- >= 0, choose 0 for wires, choose 1 to ease timing closure
+  -- g_pipeline_input             : NATURAL := 0;      -- >= 0, choose 0 for wires, choose 1 to ease timing closure of in_sosi_arr
+  -- g_pipeline_output            : NATURAL := 0;      -- >= 0, choose 0 for wires, choose 1 to ease timing closure of out_sop_arr
   -- g_rd_latency                 : NATURAL := 2;      -- 1 or 2, choose 2 to ease timing closure
   --
   -- -- TB
-  -- g_tb_diff_delay_max    : NATURAL := 45;      -- maximum nof clk delay between any inputs, <= c_align_latency
+  -- g_tb_diff_delay        : INTEGER := 0;       -- 0 = aligned inputs, -1 = max input delay for no loss,
+  --                                              -- >~ g_bsn_latency_max * g_block_period will give loss
   -- g_tb_nof_restart       : NATURAL := 1;       -- number of times to restart the input stimuli
   -- g_tb_nof_blocks        : NATURAL := 10       -- number of input blocks per restart
 
-  u_mm_output          : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (2, c_bsn_latency_max, 1, c_block, c_period, 32, 16, 17,  TRUE, 0, 1,                0, 1, 50);
-  u_dp_output          : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (2, c_bsn_latency_max, 1, c_block, c_period, 32, 16, 17, FALSE, 0, 1,                0, 1, 50);
-
-  u_diff_delay_no_loss : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (2, c_bsn_latency_max, 1, c_block, c_period, 32, 16, 17, FALSE, 0, 1,      c_delay_max, 1, 50);
-  --u_loss_replacement   : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (2, c_bsn_latency_max, 1, c_block, c_period, 32, 16, 17, FALSE, 0, 1, 40 + c_delay_max, 1, 50);
+  u_mm_output               : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (2, 1, 1, c_block, c_period, 32, 16, 17, 0, 0,  0,  TRUE, 0, 0, 1,  0, 2, c_nof_blk);
+  u_dp_output               : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (2, 1, 1, c_block, c_period, 32, 16, 17, 0, 0,  0, FALSE, 0, 0, 1,  0, 2, c_nof_blk);
+  u_dp_output_p1            : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (2, 1, 1, c_block, c_period, 32, 16, 17, 0, 0,  0, FALSE, 1, 1, 1,  0, 2, c_nof_blk);
+  u_bsn_lat_max_2           : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (2, 2, 1, c_block, c_period, 32, 16, 17, 0, 0,  0, FALSE, 0, 0, 1,  0, 2, c_nof_blk);
+  u_bsn_lat_max_3           : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (2, 3, 1, c_block, c_period, 32, 16, 17, 0, 0,  0, FALSE, 0, 0, 1,  0, 2, c_nof_blk);
+  u_p1_rd2                  : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (2, 1, 1, c_block, c_period, 32, 16, 17, 0, 0,  0, FALSE, 1, 0, 2,  0, 2, c_nof_blk);
+  u_zero_gap                : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (2, 1, 1, c_block,  c_block, 32, 16, 17, 0, 0,  0, FALSE, 0, 0, 1,  0, 2, c_nof_blk);
+  u_zero_gap_p1_rd2         : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (2, 1, 1, c_block,  c_block, 32, 16, 17, 0, 0,  0, FALSE, 1, 1, 2,  0, 2, c_nof_blk);
+  u_stream_disable          : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (3, 1, 1, c_block, c_period, 32, 16, 17, 2, 0,  0, FALSE, 0, 0, 1,  0, 2, c_nof_blk);
+  u_stream_lost             : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (3, 1, 1, c_block, c_period, 32, 16, 17, 0, 2,  0, FALSE, 0, 0, 1,  0, 2, c_nof_blk);
+  u_stream_disable_lost     : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (4, 1, 1, c_block, c_period, 32, 16, 17, 1, 2,  0, FALSE, 0, 0, 1,  0, 2, c_nof_blk);
+  u_bsn_lost                : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (3, 1, 1, c_block, c_period, 32, 16, 17, 0, 0, 10, FALSE, 0, 0, 1,  0, 2, c_nof_blk);
+  u_diff_delay              : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (3, 1, 1, c_block, c_period, 32, 16, 17, 0, 0,  0, FALSE, 0, 0, 1, -1, 2, c_nof_blk);
+  u_nof_aligners            : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (2, 1, 8, c_block, c_period, 32, 16, 17, 0, 0,  0, FALSE, 0, 0, 1,  0, 2, c_nof_blk);
+  u_nof_aligners_diff_delay : ENTITY work.tb_dp_bsn_align_v2 GENERIC MAP (4, 1, 3, c_block, c_period, 32, 16, 17, 0, 0,  0, FALSE, 0, 0, 1, -1, 2, c_nof_blk);
 
 END tb;