diff --git a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
index b3c8a443e191662d20893c97308b918cd259d355..913aaa73b5fa9a0fbc8a6d55fda37c939e9badff 100644
--- a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
+++ b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
@@ -27,6 +27,11 @@
 --   certain number of blocks on input 0, the same block on all remote
 --   inputs should also have arrived. If not then they are replaced by
 --   filler data. The output streams are paced by the block rate of input 0.
+--   The user has to read the block within the block period.
+--
+--   Features:
+--   . uses filler flag and data to replace lost input blocks
+--   . output block can be read in arbitrary order
 --
 --   For more detailed description see:
 --   https://support.astron.nl/confluence/display/L2M/L6+FWLIB+Design+Document%3A+BSN+aligner+v2
@@ -50,6 +55,7 @@ ENTITY dp_bsn_align_v2 IS
     g_nof_streams                : NATURAL;           -- number of input and output streams
     g_bsn_latency_max            : NATURAL;           -- Maximum travel latency of a remote block in number of block periods T_blk
     g_bsn_latency_use_node_index : BOOLEAN := FALSE;  -- FALSE for align at end node, TRUE for align at every intermediate node
+    g_node_index_max             : NATURAL := 31;     -- limit to functional 5 bit range, instead of full 31 bit NATURAL range
     g_block_size                 : NATURAL := 32;     -- > 1, g_block_size=1 is not supported
     g_buffer_nof_blocks          : NATURAL;           -- circular buffer size per input, choose ceil_pow2(1 + g_bsn_latency_max)
     g_bsn_w                      : NATURAL := c_dp_stream_bsn_w;  -- number of bits in sosi BSN
@@ -62,18 +68,18 @@ ENTITY dp_bsn_align_v2 IS
     dp_rst         : IN  STD_LOGIC;
     dp_clk         : IN  STD_LOGIC;
 
-    node_index     : IN  NATURAL := 0;  -- only used when g_bsn_latency_use_node_index is TRUE
+    node_index     : IN  NATURAL RANGE 0 TO g_node_index_max := 0;  -- only used when g_bsn_latency_use_node_index is TRUE
 
     -- MM control
-    in_en_arr      : IN  STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0);
+    stream_en_arr  : IN  STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0) := (OTHERS=>'1');
 
     -- Streaming input
     in_sosi_arr    : IN  t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
 
     -- Output via local MM interface in dp_clk domain
+    mm_sosi        : OUT t_dp_sosi;   -- streaming information that signals that an output block can be read
     mm_copi        : IN  t_mem_copi;  -- read access to output block, all output streams share same mm_copi
-    mm_cipo_arr    : OUT t_mem_cipo_arr(g_nof_streams-1 DOWNTO 0);
-    mm_sosi        : OUT t_dp_sosi   -- streaming information that signals that an output block can be read
+    mm_cipo_arr    : OUT t_mem_cipo_arr(g_nof_streams-1 DOWNTO 0)
   );
 END dp_bsn_align_v2;
 
@@ -106,6 +112,7 @@ ARCHITECTURE rtl OF dp_bsn_align_v2 IS
     wr_copi_arr       : t_mem_copi_arr(g_nof_streams-1 DOWNTO 0);
     -- all streams
     filled_arr        : t_filled_arr(g_nof_streams-1 DOWNTO 0);
+    use_filler_data   : STD_LOGIC_VECTOR(c_product_w-1 DOWNTO 0);
     -- local reference
     sync_arr          : STD_LOGIC_VECTOR(g_buffer_nof_blocks-1 DOWNTO 0);
     bsn_arr           : t_bsn_arr(g_buffer_nof_blocks-1 DOWNTO 0);
@@ -120,6 +127,7 @@ ARCHITECTURE rtl OF dp_bsn_align_v2 IS
                                   (OTHERS=>c_mem_copi_rst),
                                   (OTHERS=>(OTHERS=>'0')),
                                   (OTHERS=>'0'),
+                                  (OTHERS=>'0'),
                                   (OTHERS=>(OTHERS=>'0')),
                                   c_dp_sosi_rst,
                                   0,
@@ -133,6 +141,7 @@ ARCHITECTURE rtl OF dp_bsn_align_v2 IS
   -- Pipeline registers
   SIGNAL in_sosi_arr_p : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
   SIGNAL rd_copi_p     : t_mem_copi;
+  SIGNAL rd_cipo_arr   : t_mem_cipo_arr(g_nof_streams-1 DOWNTO 0);
 
 BEGIN
 
@@ -147,13 +156,14 @@ BEGIN
     END IF;
   END PROCESS;
   
-  p_comb : PROCESS(r, in_sosi_arr_p, mm_copi)
+  p_comb : PROCESS(r, in_sosi_arr_p, mm_copi, rd_cipo_arr)
     -- State variable
     VARIABLE v : t_reg;
     -- Auxiliary variables
-    VARIABLE v_ref_sosi    : t_dp_sosi;
-    VARIABLE v_pointer_slv : STD_LOGIC_VECTOR(c_blk_pointer_w-1 DOWNTO 0);
-    VARIABLE v_product_slv : STD_LOGIC_VECTOR(c_product_w-1 DOWNTO 0);
+    VARIABLE v_ref_sosi          : t_dp_sosi;
+    VARIABLE v_pointer_slv       : STD_LOGIC_VECTOR(c_blk_pointer_w-1 DOWNTO 0);
+    VARIABLE v_product_slv       : STD_LOGIC_VECTOR(c_product_w-1 DOWNTO 0);
+    VARIABLE v_fill_flag         : STD_LOGIC;
   BEGIN
     v := r;
     v.mm_sosi := func_dp_stream_reset_control(r.mm_sosi);
@@ -180,7 +190,7 @@ BEGIN
       END IF;
     END LOOP;
 
-    -- p_control, all at local reference input 0 sop
+    -- p_control, all at sop of local reference input 0
     v_ref_sosi := in_sosi_arr_p(0);
     IF v_ref_sosi.sop = '1' THEN
       -- . write sync & bsn buffer
@@ -189,7 +199,11 @@ BEGIN
       v.bsn_arr(v.wr_pointer) := v_ref_sosi.bsn(g_bsn_w-1 DOWNTO 0);
 
       -- . update read block pointer at g_bsn_latency_max blocks behind the reference write pointer
-      v.rd_pointer := v.wr_pointer - g_bsn_latency_max;
+      IF g_bsn_latency_use_node_index = FALSE THEN
+        v.rd_pointer := v.wr_pointer - g_bsn_latency_max;
+      ELSE
+        v.rd_pointer := v.wr_pointer - g_bsn_latency_max * node_index;
+      END IF;
       IF v.rd_pointer < 0 THEN
         v.rd_pointer := v.rd_pointer + g_buffer_nof_blocks;
       END IF;
@@ -207,9 +221,17 @@ BEGIN
         -- . pass on timestamp information
         v.mm_sosi.sync := v.sync_arr(v.rd_pointer);
         v.mm_sosi.bsn := v.bsn_arr(v.rd_pointer);
-        -- . use channel field to pass on filled flags
+        -- . pass on filled flags for enabled streams via channel field, and
+        --   determine whether the ouput has to insert filler data
+        v.mm_sosi.channel := (OTHERS=>'0');
         FOR I IN 0 TO g_nof_streams-1 LOOP
-          v.mm_sosi.channel(I) := v.filled_arr(I)(v.rd_pointer);
+          v_fill_flag := v.filled_arr(I)(v.rd_pointer);
+          IF stream_en_arr(I) = '1' THEN  -- use MM bit at sop
+            v.use_filler_data(I) := v_fill_flag;  -- enabled stream
+            v.mm_sosi.channel(I) := v_fill_flag;
+          ELSE
+            v.use_filler_data(I) := v_fill_flag;  -- disabled stream
+          END IF;
         END LOOP;
       END IF;
 
@@ -220,9 +242,18 @@ BEGIN
     END IF;
 
     -- p_read
+    -- . rd address
     v.rd_copi := mm_copi;
     v.rd_copi.address := RESIZE_MEM_ADDRESS(ADD_UVEC(r.rd_offset, mm_copi.address));  -- sum yields c_mem_ram.adr_w bits, because left operand determines width
 
+    -- . rd data
+    mm_cipo_arr <= rd_cipo_arr;  -- default use input data
+    FOR I IN 0 TO g_nof_streams-1 LOOP
+      IF r.use_filler_data(I) = '1' THEN
+        mm_cipo_arr(I).rddata <= TO_MEM_SDATA(g_filler_value);
+      END IF;
+    END LOOP;
+
     -- next state
     nxt_r <= v;
   END PROCESS;
@@ -241,8 +272,8 @@ BEGIN
       wr_dat    => r.wr_copi_arr(I).wrdata(c_ram_buf.dat_w-1 DOWNTO 0),
       rd_en     => rd_copi_p.rd,
       rd_adr    => rd_copi_p.address(c_ram_buf.adr_w-1 DOWNTO 0),
-      rd_dat    => mm_cipo_arr(I).rddata(c_ram_buf.dat_w-1 DOWNTO 0),
-      rd_val    => mm_cipo_arr(I).rdval
+      rd_dat    => rd_cipo_arr(I).rddata(c_ram_buf.dat_w-1 DOWNTO 0),
+      rd_val    => rd_cipo_arr(I).rdval
     );
   END GENERATE;