diff --git a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
index eae4c4d60fc384fdc15638f8d309c1c7d514614b..4653c6ccba90e1083091ce29bee84b230375fab6 100644
--- a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
+++ b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
@@ -29,14 +29,72 @@
 --   replacement data. The output streams are paced by the block rate of
 --   input 0. The user has to read the block within the block period.
 --
+--   The aligner can align g_nof_streams that all arrive within a latency
+--   of g_bsn_latency_max after the local stream at index 0. The aligner
+--   can also be used in a chain of aligners, whereby each aligner typically
+--   has the local input and one remote input and the remote input is the
+--   output of an upstream aligner. Then the latency on the last node in
+--   the chain will be within g_nof_aligners_max * g_bsn_latency_max.
+--
+--   The size of the circular buffer is c_buffer_nof_blocks and depends on the
+--   maximum latency. The c_buffer_nof_blocks has to a power of two to ease
+--   the control of the circular buffer. The lowest bits of the input block
+--   sequence number (BSN) are used as write block index into the circular
+--   buffer. The g_use_aligner_at_first_node can be useful to reduce the
+--   required circular buffer size just enough, such that the next power of two
+--   is only a feq blocks larger, instead of almost a factor two larger. This
+--   then may save a significant amount of block RAM.
+--
+--   In case of a chain of aligners then the circular buffer size depends on
+--   the latency of local input. The most remote input will only use a
+--   fraction of the buffer. Therefore more block RAM can be saved by using
+--   a smaller circular buffer size for signal inputs that are from more
+--   remote (i.e. that have passed through more upstream aligners).
+--
 --   Features:
 --   . The g_block_size <= block period, so supports input blocks arriving
 --     with or without data valid gaps
 --   . uses replacement data to replace lost input blocks and channel bit 0 as
 --     lost_data flag
 --   . uses replacement data to replace disabled input streams
---   . output block can be read in arbitrary order via g_use_mm_output = TRUE
---   . output block can be streamed via g_use_mm_output = FALSE
+--   . output block can be read in arbitrary order via g_use_mm_output = true
+--   . output block can be streamed via g_use_mm_output = false
+--
+--   Parameters:
+--   . g_nof_streams: number of input and output streams. Stream index 0 is
+--     the local stream. Streams index > 0 is for remote streams. The
+--     remote streams arrive later than the local stream, but within
+--     g_bsn_latency_max or within an integer multiple of g_bsn_latency_max.
+--   . g_bsn_latency_max: maximum travel latency of a remote block in number
+--     of block periods T_blk.
+--   . g_nof_aligners_max: Number of dp_bsn_align_v2 aligners in a chain.
+--     = 1 when only align at last node, or
+--     > 1 when align at every intermediate node in a chain of nodes, and then
+--         g_nof_aligners_max should equal the number of nodes for
+--         chain_node_index range. The g_nof_aligners_max is the number of
+--         nodes in the chain including the first node.
+--   . g_use_aligner_at_first_node: when true use g_bsn_latency_max at first
+--     node as well, else use c_bsn_latency_first_node = 0 for immediate
+--     output from first node in a chain of nodes. Only used when
+--     g_nof_aligners_max > 1. The g_use_aligner_at_first_node setting only
+--     affects the latency along the chain, and therefore the required
+--     size of the circular buffer. If the circular buffer is large enough
+--     anyway, then the g_use_aligner_at_first_node setting is don't care,
+--     assuming that a little extra latency is don't care.
+--
+--   Inputs:
+--   . chain_node_index: Node index in chain of nodes. First node has index 0.
+--     In case of a ring of nodes the chain of nodes can range the whole ring,
+--     or only a part of the ring. The number of nodes in the chain is given
+--     by g_nof_aligners_max. Only used when g_nof_aligners_max > 1.
+--   . stream_en_arr: when '1' then align corresponding input stream, else
+--     replace data from corresponding inut stream by 0 and do not raise the
+--     lost data flag. Whether a stream is enabled or not has no effect on the
+--     aligner timing, it only sets the data to 0.
+--
+--   Outputs:
+--   . replace_cnt_en_arr: count number of lost data blocks per input stream,
+--     that got replaced by 0 value, per sync interval.
 --
 --   For more detailed description see:
 --   https://support.astron.nl/confluence/display/L2M/L6+FWLIB+Design+Document%3A+BSN+aligner+v2
@@ -57,22 +115,23 @@ use work.dp_stream_pkg.all;
 entity dp_bsn_align_v2 is
   generic (
     g_nof_streams                : natural := 2;  -- >= 2, number of input and output streams
-    g_bsn_latency_max            : natural := 2;  -- maximum travel latency of a remote block in number of block periods T_blk
-    g_nof_aligners_max           : positive := 16;  -- 1 when only align at last node, > 1 when align at every intermediate node
+    g_bsn_latency_max            : natural := 2;
+    g_use_aligner_at_first_node  : boolean := true;
+    g_nof_aligners_max           : positive := 16;
     g_block_size                 : natural := 1024;  -- > 1, g_block_size=1 is not supported
     g_bsn_w                      : natural := c_dp_stream_bsn_w;  -- number of bits in sosi BSN
     g_data_w                     : natural := 36;  -- number of bits in sosi data
     g_data_replacement_value     : integer := 0;  -- output sosi data value for missing input blocks
     g_use_mm_output              : boolean := false;  -- output via MM or via streaming DP
-    g_pipeline_input             : natural := 1;  -- >= 0, choose 0 for wires, choose 1 to ease timing closure of in_sosi_arr
-    g_pipeline_output            : natural := 1;  -- >= 0, choose 0 for wires, choose 1 to ease timing closure of out_sosi_arr
+    g_pipeline_input             : natural := 1;  -- >= 0, 0 for wires, 1 to ease timing closure of in_sosi_arr
+    g_pipeline_output            : natural := 1;  -- >= 0, 0 for wires, 1 to ease timing closure of out_sosi_arr
     g_rd_latency                 : natural := 2  -- 1 or 2, choose 2 to ease timing closure
   );
   port (
     dp_rst         : in  std_logic;
     dp_clk         : in  std_logic;
 
-    node_index     : in  natural range 0 to g_nof_aligners_max - 1 := 0;  -- only used when g_nof_aligners_max > 1
+    chain_node_index : in  natural range 0 to g_nof_aligners_max - 1 := 0;
 
     -- MM control
     stream_en_arr            : in  std_logic_vector(g_nof_streams - 1 downto 0) := (others => '1');
@@ -81,19 +140,25 @@ entity dp_bsn_align_v2 is
     -- Streaming input
     in_sosi_arr    : in  t_dp_sosi_arr(g_nof_streams - 1 downto 0);
 
-    -- Output via local MM interface in dp_clk domain, when g_use_mm_output = TRUE.
-    mm_sosi        : out t_dp_sosi;  -- streaming information that signals that an output block can be read
-    mm_copi        : in  t_mem_copi := c_mem_copi_rst;  -- read access to output block, all output streams share same mm_copi
+    -- Output via local MM interface in dp_clk domain, when g_use_mm_output = true
+    -- . streaming information that signals that an output block can be read
+    mm_sosi        : out t_dp_sosi;
+    -- . MM read access to output block, all output streams share same mm_copi
+    mm_copi        : in  t_mem_copi := c_mem_copi_rst;
     mm_cipo_arr    : out t_mem_cipo_arr(g_nof_streams - 1 downto 0);
 
-    -- Output via streaming DP interface, when g_use_mm_output = FALSE.
+    -- Output via streaming DP interface, when g_use_mm_output = false.
     out_sosi_arr   : out t_dp_sosi_arr(g_nof_streams - 1 downto 0)
   );
 end dp_bsn_align_v2;
 
 architecture rtl of dp_bsn_align_v2 is
+  constant c_bsn_latency_first_node : natural := sel_a_b(g_use_aligner_at_first_node, g_bsn_latency_max, 0);
+
   -- Circular buffer per stream, size is next power of 2 that fits
-  constant c_buffer_nof_blocks : natural :=  true_log_pow2(1 + g_nof_aligners_max * g_bsn_latency_max);
+  constant c_buffer_nof_blocks : natural := sel_a_b(g_nof_aligners_max = 1,
+           true_log_pow2(1 + g_bsn_latency_max),
+           true_log_pow2(1 + g_bsn_latency_max * (g_nof_aligners_max - 1) + c_bsn_latency_first_node));
 
   constant c_ram_size       : natural := c_buffer_nof_blocks * g_block_size;
   constant c_ram_buf        : t_c_mem := (latency  => 1,
@@ -121,6 +186,7 @@ architecture rtl of dp_bsn_align_v2 is
 
   -- State
   type t_reg is record
+    ref_sosi             : t_dp_sosi;
     -- p_write_arr
     wr_blk_pointer       : natural;
     wr_copi_arr          : t_mem_copi_arr(g_nof_streams - 1 downto 0);
@@ -136,9 +202,12 @@ architecture rtl of dp_bsn_align_v2 is
     rd_blk_pointer       : integer;  -- use integer to detect need to wrap to natural
     rd_offset            : std_logic_vector(c_ram_buf.adr_w - 1 downto 0);
     rd_copi              : t_mem_copi;
-    fill_cipo_arr        : t_mem_cipo_arr(g_nof_streams - 1 downto 0);  -- used combinatorial to contain rd_cipo_arr from buffer or replacement data
-    out_bsn              : std_logic_vector(g_bsn_w - 1 downto 0);  -- hold BSN until next sop, for easy view in Wave window
-    out_channel_arr      : t_channel_arr(g_nof_streams - 1 downto 0);  -- hold channel until next sop per stream, for easy view in Wave window
+    fill_cipo_arr        : t_mem_cipo_arr(g_nof_streams - 1 downto 0);  -- used combinatorial to contain rd_cipo_arr
+                                                                        -- from buffer or replacement data
+    out_bsn              : std_logic_vector(g_bsn_w - 1 downto 0);  -- hold BSN until next sop, for easy view in Wave
+                                                                    -- window
+    out_channel_arr      : t_channel_arr(g_nof_streams - 1 downto 0);  -- hold channel until next sop per stream, for
+                                                                       -- easy view in Wave window
     replace_cnt_en_arr   : std_logic_vector(g_nof_streams - 1 downto 0);
   end record;
 
@@ -148,14 +217,14 @@ architecture rtl of dp_bsn_align_v2 is
   --   t_comb variable in p_comb, but then only the last assignment value will
   --   be visible via the signal dbg_wires in the Wave window.
   type t_comb is record
-    ref_sosi            : t_dp_sosi;
     blk_pointer_slv     : std_logic_vector(c_blk_pointer_w - 1 downto 0);
     product_slv         : std_logic_vector(c_product_w - 1 downto 0);
     lost_data_flags_arr : std_logic_vector(g_nof_streams - 1 downto 0);
     out_sosi_arr        : t_dp_sosi_arr(g_nof_streams - 1 downto 0);
   end record;
 
-  constant c_reg_rst  : t_reg := (0,
+  constant c_reg_rst  : t_reg := (c_dp_sosi_rst,
+                                  0,
                                   (others => c_mem_copi_rst),
                                   (others => (others => '0')),
                                   (others => '0'),
@@ -171,11 +240,10 @@ architecture rtl of dp_bsn_align_v2 is
                                   (others => (others => '0')),
                                   (others => '0'));
 
-  constant c_comb_rst  : t_comb := (c_dp_sosi_rst,
-                                   (others => '0'),
-                                   (others => '0'),
-                                   (others => '0'),
-                                   (others => c_dp_sosi_rst));
+  constant c_comb_rst  : t_comb := ((others => '0'),
+                                    (others => '0'),
+                                    (others => '0'),
+                                    (others => c_dp_sosi_rst));
 
   -- State registers for p_comb
   signal r                 : t_reg;
@@ -199,7 +267,6 @@ architecture rtl of dp_bsn_align_v2 is
   signal comb_out_sosi_arr : t_dp_sosi_arr(g_nof_streams - 1 downto 0);
 
   -- Counter signals
-
   signal replace_cnt_arr          : t_slv_32_arr(g_nof_streams - 1 downto 0);
   signal nxt_hold_replace_cnt_arr : t_slv_32_arr(g_nof_streams - 1 downto 0);
   signal hold_replace_cnt_arr     : t_slv_32_arr(g_nof_streams - 1 downto 0);
@@ -230,7 +297,7 @@ begin
     end if;
   end process;
 
-  p_comb : process(r, in_sosi_arr_p, mm_copi, dp_copi, rd_cipo_arr, rd_sosi_arr, stream_en_arr, node_index)
+  p_comb : process(r, in_sosi_arr_p, mm_copi, dp_copi, rd_cipo_arr, rd_sosi_arr, stream_en_arr, chain_node_index)
     variable v : t_reg;  -- State variable
     variable w : t_comb;  -- Local wires = memoryless auxiliary variables
   begin
@@ -264,26 +331,31 @@ begin
       end if;
     end loop;
 
-    ----------------------------------------------------------------------------
+    ---------------------------------------------------------------------------
     -- p_control, all at sop of local reference input 0
-    ----------------------------------------------------------------------------
-    w.ref_sosi := in_sosi_arr_p(0);
-    if w.ref_sosi.sop = '1' then
+    ---------------------------------------------------------------------------
+    v.ref_sosi := in_sosi_arr_p(0);
+    -- Use r.ref_sosi.sop, that occurs one cycle after in_sosi_arr_p(I).sop,
+    -- to support immediate aligner output when g_use_aligner_at_first_node =
+    -- false. While the local block of chain_node_index = 0 is written into
+    -- the circular buffer, then it can already be read from the circular
+    -- buffer one dp_clk cycle later.
+    if v.ref_sosi.sop = '1' then
       -- . write sync & bsn buffer
-      v.wr_blk_pointer := TO_UINT(w.ref_sosi.bsn(c_blk_pointer_w - 1 downto 0));
-      v.sync_arr(v.wr_blk_pointer) := w.ref_sosi.sync;
-      v.bsn_arr(v.wr_blk_pointer) := w.ref_sosi.bsn(g_bsn_w - 1 downto 0);
+      v.wr_blk_pointer := TO_UINT(v.ref_sosi.bsn(c_blk_pointer_w - 1 downto 0));
+      v.sync_arr(v.wr_blk_pointer) := v.ref_sosi.sync;
+      v.bsn_arr(v.wr_blk_pointer) := v.ref_sosi.bsn(g_bsn_w - 1 downto 0);
 
       -- . update read block pointer at g_bsn_latency_max blocks behind the
-      --   reference write pointer, dependent on the node_index:
-      --   - for g_nof_aligners_max = 1 the node_index = 0 fixed
-      --   - for g_nof_aligners_max > 1, node_index is the first BSN aligner in
-      --     a chain. Each subsequent node in the chain then has to account
-      --     for g_bsn_latency_max additional block latency.
+      --   reference write pointer, dependent on the chain_node_index:
+      --   - for g_nof_aligners_max = 1 the chain_node_index = 0 fixed
+      --   - for g_nof_aligners_max > 1, chain_node_index = 0 is the first BSN
+      --     aligner in a chain. Each subsequent node in the chain then has to
+      --     account for g_bsn_latency_max additional block latency.
       if g_nof_aligners_max = 1 then
         v.rd_blk_pointer := v.wr_blk_pointer - g_bsn_latency_max;
       else
-        v.rd_blk_pointer := v.wr_blk_pointer - g_bsn_latency_max * (1 + node_index);
+        v.rd_blk_pointer := v.wr_blk_pointer - g_bsn_latency_max * chain_node_index - c_bsn_latency_first_node;
       end if;
       if v.rd_blk_pointer < 0 then
         v.rd_blk_pointer := v.rd_blk_pointer + c_buffer_nof_blocks;
diff --git a/libraries/base/dp/src/vhdl/mmp_dp_bsn_align_v2.vhd b/libraries/base/dp/src/vhdl/mmp_dp_bsn_align_v2.vhd
index 20456ab387ba3873e6a87d8843bafd78250ac876..8159e1975563675b9ce940cc16f49a3262e264a0 100644
--- a/libraries/base/dp/src/vhdl/mmp_dp_bsn_align_v2.vhd
+++ b/libraries/base/dp/src/vhdl/mmp_dp_bsn_align_v2.vhd
@@ -45,15 +45,17 @@ entity mmp_dp_bsn_align_v2 is
   generic (
     -- for dp_bsn_align_v2
     g_nof_streams                : natural;  -- number of input and output streams
-    g_bsn_latency_max            : natural;  -- Maximum travel latency of a remote block in number of block periods T_blk
-    g_nof_aligners_max           : natural := 1;  -- 1 when only align at last node, > 1 when align at every intermediate node
+    g_bsn_latency_max            : natural;  -- Maximum travel latency of a remote block in number of block periods
+    g_use_aligner_at_first_node  : boolean := true;
+    g_nof_aligners_max           : natural := 1;  -- 1 when only align at last node,
+                                                  -- > 1 when align at every intermediate node
     g_block_size                 : natural := 32;  -- > 1, g_block_size=1 is not supported
     g_bsn_w                      : natural := c_dp_stream_bsn_w;  -- number of bits in sosi BSN
     g_data_w                     : natural;  -- number of bits in sosi data
     g_data_replacement_value     : integer := 0;  -- output sosi data value for missing input blocks
     g_use_mm_output              : boolean := false;  -- output via MM or via streaming DP
-    g_pipeline_input             : natural := 1;  -- >= 0, choose 0 for wires, choose 1 to ease timing closure of in_sosi_arr
-    g_pipeline_output            : natural := 1;  -- >= 0, choose 0 for wires, choose 1 to ease timing closure of out_sosi_arr
+    g_pipeline_input             : natural := 1;  -- >= 0, 0 for wires, 1 to ease timing closure of in_sosi_arr
+    g_pipeline_output            : natural := 1;  -- >= 0, 0 for wires, 1 to ease timing closure of out_sosi_arr
     g_rd_latency                 : natural := 2;  -- 1 or 2, choose 2 to ease timing closure
     -- for mms_dp_bsn_monitor_v2
     g_nof_clk_per_sync           : natural := 200 * 10**6;
@@ -78,17 +80,17 @@ entity mmp_dp_bsn_align_v2 is
     dp_rst                  : in  std_logic;
     dp_clk                  : in  std_logic;
 
-    node_index              : in  natural range 0 to g_nof_aligners_max - 1 := 0;  -- only used when g_nof_aligners_max > 1
+    chain_node_index       : in  natural range 0 to g_nof_aligners_max - 1 := 0;  -- only used when g_nof_aligners_max > 1
 
     -- Streaming input
     in_sosi_arr             : in  t_dp_sosi_arr(g_nof_streams - 1 downto 0);
 
-    -- Output via local MM interface in dp_clk domain, when g_use_mm_output = TRUE.
+    -- Output via local MM interface in dp_clk domain, when g_use_mm_output = true.
     mm_sosi                 : out t_dp_sosi;  -- streaming information that signals that an output block can be read
     mm_copi                 : in  t_mem_copi := c_mem_copi_rst;  -- read access to output block, all output streams share same mm_copi
     mm_cipo_arr             : out t_mem_cipo_arr(g_nof_streams - 1 downto 0);
 
-    -- Output via streaming DP interface, when g_use_mm_output = FALSE.
+    -- Output via streaming DP interface, when g_use_mm_output = false.
     out_sosi_arr            : out t_dp_sosi_arr(g_nof_streams - 1 downto 0)
   );
 end mmp_dp_bsn_align_v2;
@@ -220,6 +222,7 @@ begin
   generic map (
     g_nof_streams                => g_nof_streams,
     g_bsn_latency_max            => g_bsn_latency_max,
+    g_use_aligner_at_first_node  => g_use_aligner_at_first_node,
     g_nof_aligners_max           => g_nof_aligners_max,
     g_block_size                 => g_block_size,
     g_bsn_w                      => g_bsn_w,
@@ -233,7 +236,7 @@ begin
   port map (
     dp_rst                  => dp_rst,
     dp_clk                  => dp_clk,
-    node_index              => node_index,
+    chain_node_index        => chain_node_index,
     -- MM control
     stream_en_arr           => stream_en_arr,
     stream_replaced_cnt_arr => stream_replaced_cnt_arr,
diff --git a/libraries/base/dp/tb/vhdl/tb_dp_bsn_align_v2.vhd b/libraries/base/dp/tb/vhdl/tb_dp_bsn_align_v2.vhd
index c6db289f9ed18f783f7200c0011a250f934233b7..de380eb9a93944d9dc1f036262457eb9adb0e038 100644
--- a/libraries/base/dp/tb/vhdl/tb_dp_bsn_align_v2.vhd
+++ b/libraries/base/dp/tb/vhdl/tb_dp_bsn_align_v2.vhd
@@ -34,7 +34,7 @@
 --   . g_lost_bsn_id to loose a single block in stream 1 and verify that
 --     it gets replaced and flagged.
 --   . array of one or more BSN aligners via g_nof_aligners_max >= 1,
---     using node_index_arr, only support tb for g_use_mm_output = FALSE
+--     using chain_node_index_arr, only support tb for g_use_mm_output = false
 -- Remark:
 --   For this BSN aligner component it was essential to have an almost
 --   complete, reviewed, detailed design document, because it is a complex
@@ -141,6 +141,9 @@ architecture tb of tb_dp_bsn_align_v2 is
 
   constant c_lost_bsn_stream_id       : natural := sel_a_b(g_nof_streams > 1, 1, 0);  -- fixed use stream 1 to verify g_lost_bsn_id. Use 0 for g_nof_streams = 1.
 
+  -- In tb no support (yet) for immediate aligned output at first node, when c_nof_aligners_max > 1
+  constant c_use_aligner_at_first_node : boolean := true;
+
   -- In the tb only support MM interface verification for c_nof_aligners_max = 1
   constant c_nof_aligners_max  : positive := sel_a_b(g_use_mm_output, 1, g_nof_aligners_max);
 
@@ -181,7 +184,7 @@ architecture tb of tb_dp_bsn_align_v2 is
   signal rst                   : std_logic := '1';
   signal sl1                   : std_logic := '1';
 
-  signal node_index_arr        : t_nat_natural_arr(0 to c_nof_aligners_max - 1) := array_init(0, c_nof_aligners_max, 1);
+  signal chain_node_index_arr  : t_nat_natural_arr(0 to c_nof_aligners_max - 1) := array_init(0, c_nof_aligners_max, 1);
 
   signal stream_en_arr         : std_logic_vector(g_nof_streams - 1 downto 0) := (others => '1');  -- default all streams are enabled
   signal stream_lost_arr       : std_logic_vector(g_nof_streams - 1 downto 0) := (others => '0');  -- default no streams are lost
@@ -501,6 +504,7 @@ begin
   generic map (
     g_nof_streams                => g_nof_streams,
     g_bsn_latency_max            => g_bsn_latency_max,
+    g_use_aligner_at_first_node  => c_use_aligner_at_first_node,
     g_nof_aligners_max           => c_nof_aligners_max,
     g_block_size                 => g_block_size,
     g_bsn_w                      => g_bsn_w,
@@ -515,8 +519,8 @@ begin
     dp_rst         => rst,
     dp_clk         => clk,
     -- Control
-    node_index     => node_index_arr(0),
-    stream_en_arr  => stream_en_arr,
+    chain_node_index => chain_node_index_arr(0),
+    stream_en_arr    => stream_en_arr,
     -- Streaming input
     in_sosi_arr    => dut_in_sosi_2arr(0),
     -- Output via local MM interface in dp_clk domain
@@ -567,8 +571,8 @@ begin
       dp_rst         => rst,
       dp_clk         => clk,
       -- Control
-      node_index     => node_index_arr(I),
-      stream_en_arr  => stream_en_arr,
+      chain_node_index => chain_node_index_arr(I),
+      stream_en_arr    => stream_en_arr,
       -- Streaming input
       in_sosi_arr    => dut_in_sosi_2arr(I),
       -- Output via streaming DP interface
diff --git a/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd b/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd
index 78ff1107922be99fcc03ddd829a454cf448692a1..6360636150cfd1447aab750ddc3109df80ffb2e8 100644
--- a/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd
+++ b/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd
@@ -103,8 +103,10 @@ architecture tb of tb_mmp_dp_bsn_align_v2 is
   constant c_gap_size                   : natural := c_block_period - c_block_size;
 
   -- DUT latency
+  constant c_ref_sosi_latency           : natural := 0;
   constant c_mm_to_dp_latency           : natural := 1;
-  constant c_dut_latency                : natural := c_pipeline_input + c_rd_latency + c_mm_to_dp_latency + c_pipeline_output;
+  constant c_dut_latency                : natural := c_pipeline_input + c_ref_sosi_latency +
+                                                     c_rd_latency + c_mm_to_dp_latency + c_pipeline_output;
 
   constant c_align_latency_nof_blocks   : natural := c_bsn_latency_max * c_nof_aligners_max;  -- in number blocks
   constant c_align_latency_nof_valid    : natural := c_bsn_latency_max * c_nof_aligners_max * c_block_size;  -- in number of data samples
@@ -147,7 +149,7 @@ architecture tb of tb_mmp_dp_bsn_align_v2 is
   signal dp_clk                   : std_logic := '1';
   signal dp_rst                   : std_logic := '1';
 
-  signal node_index               : natural := 0;
+  signal chain_node_index         : natural := 0;
   signal ref_siso_arr             : t_dp_siso_arr(c_nof_streams - 1 downto 0) := (others => c_dp_siso_rdy);
   signal ref_sosi_arr             : t_dp_sosi_arr(c_nof_streams - 1 downto 0);  -- generated stimuli
   signal in_sosi_arr              : t_dp_sosi_arr(c_nof_streams - 1 downto 0) := (others => c_dp_sosi_rst);  -- input stimuli
@@ -456,7 +458,7 @@ begin
     dp_rst                  => dp_rst,
     dp_clk                  => dp_clk,
 
-    node_index              => node_index,
+    chain_node_index        => chain_node_index,
     -- Streaming input
     in_sosi_arr             => in_sosi_arr,
     -- Output via local MM in dp_clk domain