diff --git a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
index 913aaa73b5fa9a0fbc8a6d55fda37c939e9badff..1cb352790fe78b5c37bf5d1370d272f94e77a6ad 100644
--- a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
+++ b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
@@ -61,6 +61,7 @@ ENTITY dp_bsn_align_v2 IS
     g_bsn_w                      : NATURAL := c_dp_stream_bsn_w;  -- number of bits in sosi BSN
     g_data_w                     : NATURAL;           -- number of bits in sosi data
     g_filler_value               : INTEGER := 0;      -- output sosi data value for missing input blocks
+    g_use_mm_output              : BOOLEAN := FALSE;  -- output via MM or via streaming DP
     g_pipeline_input             : NATURAL := 0;      -- >= 0, choose 0 for wires, choose 1 to ease timing closure
     g_rd_latency                 : NATURAL := 1       -- 1 or 2, choose 2 to ease timing closure
   );
@@ -79,7 +80,10 @@ ENTITY dp_bsn_align_v2 IS
     -- Output via local MM interface in dp_clk domain
     mm_sosi        : OUT t_dp_sosi;   -- streaming information that signals that an output block can be read
     mm_copi        : IN  t_mem_copi;  -- read access to output block, all output streams share same mm_copi
-    mm_cipo_arr    : OUT t_mem_cipo_arr(g_nof_streams-1 DOWNTO 0)
+    mm_cipo_arr    : OUT t_mem_cipo_arr(g_nof_streams-1 DOWNTO 0);
+
+    -- Output via streaming DP interface
+    out_sosi_arr   : OUT t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0)
   );
 END dp_bsn_align_v2;
 
@@ -112,7 +116,7 @@ ARCHITECTURE rtl OF dp_bsn_align_v2 IS
     wr_copi_arr       : t_mem_copi_arr(g_nof_streams-1 DOWNTO 0);
     -- all streams
     filled_arr        : t_filled_arr(g_nof_streams-1 DOWNTO 0);
-    use_filler_data   : STD_LOGIC_VECTOR(c_product_w-1 DOWNTO 0);
+    use_filler_data   : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0);
     -- local reference
     sync_arr          : STD_LOGIC_VECTOR(g_buffer_nof_blocks-1 DOWNTO 0);
     bsn_arr           : t_bsn_arr(g_buffer_nof_blocks-1 DOWNTO 0);
@@ -138,10 +142,13 @@ ARCHITECTURE rtl OF dp_bsn_align_v2 IS
   SIGNAL r             : t_reg;
   SIGNAL nxt_r         : t_reg;
 
+  SIGNAL dp_copi       : t_mem_copi;
+  SIGNAL dp_cipo_arr   : t_mem_cipo_arr(g_nof_streams-1 DOWNTO 0);
+
   -- Pipeline registers
   SIGNAL in_sosi_arr_p : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
   SIGNAL rd_copi_p     : t_mem_copi;
-  SIGNAL rd_cipo_arr   : t_mem_cipo_arr(g_nof_streams-1 DOWNTO 0);
+  SIGNAL rd_cipo_arr   : t_mem_cipo_arr(g_nof_streams-1 DOWNTO 0) := (OTHERS=>c_mem_cipo_rst);
 
 BEGIN
 
@@ -163,7 +170,8 @@ BEGIN
     VARIABLE v_ref_sosi          : t_dp_sosi;
     VARIABLE v_pointer_slv       : STD_LOGIC_VECTOR(c_blk_pointer_w-1 DOWNTO 0);
     VARIABLE v_product_slv       : STD_LOGIC_VECTOR(c_product_w-1 DOWNTO 0);
-    VARIABLE v_fill_flag         : STD_LOGIC;
+    VARIABLE v_filler_flag       : STD_LOGIC;
+    VARIABLE v_rd_cipo_arr       : t_mem_cipo_arr(g_nof_streams-1 DOWNTO 0);
   BEGIN
     v := r;
     v.mm_sosi := func_dp_stream_reset_control(r.mm_sosi);
@@ -225,12 +233,12 @@ BEGIN
         --   determine whether the ouput has to insert filler data
         v.mm_sosi.channel := (OTHERS=>'0');
         FOR I IN 0 TO g_nof_streams-1 LOOP
-          v_fill_flag := v.filled_arr(I)(v.rd_pointer);
+          v_filler_flag := NOT v.filled_arr(I)(v.rd_pointer);
           IF stream_en_arr(I) = '1' THEN  -- use MM bit at sop
-            v.use_filler_data(I) := v_fill_flag;  -- enabled stream
-            v.mm_sosi.channel(I) := v_fill_flag;
+            v.use_filler_data(I) := v_filler_flag;  -- enabled stream
+            v.mm_sosi.channel(I) := v_filler_flag;
           ELSE
-            v.use_filler_data(I) := v_fill_flag;  -- disabled stream
+            v.use_filler_data(I) := '1';  -- disabled stream
           END IF;
         END LOOP;
       END IF;
@@ -242,17 +250,27 @@ BEGIN
     END IF;
 
     -- p_read
-    -- . rd address
-    v.rd_copi := mm_copi;
-    v.rd_copi.address := RESIZE_MEM_ADDRESS(ADD_UVEC(r.rd_offset, mm_copi.address));  -- sum yields c_mem_ram.adr_w bits, because left operand determines width
+    -- . adjust the rd address
+    IF g_use_mm_output THEN
+      v.rd_copi := mm_copi;  -- do output via MM interface
+      v.rd_copi.address := RESIZE_MEM_ADDRESS(ADD_UVEC(r.rd_offset, mm_copi.address));  -- sum yields c_mem_ram.adr_w bits, because left operand determines width
+    ELSE
+      v.rd_copi := dp_copi;  -- do output via DP streaming interface
+      v.rd_copi.address := RESIZE_MEM_ADDRESS(ADD_UVEC(r.rd_offset, dp_copi.address));  -- sum yields c_mem_ram.adr_w bits, because left operand determines width
+    END IF;
 
-    -- . rd data
-    mm_cipo_arr <= rd_cipo_arr;  -- default use input data
+    -- . output the rd data
+    v_rd_cipo_arr := rd_cipo_arr;  -- default use input data
     FOR I IN 0 TO g_nof_streams-1 LOOP
       IF r.use_filler_data(I) = '1' THEN
-        mm_cipo_arr(I).rddata <= TO_MEM_SDATA(g_filler_value);
+        v_rd_cipo_arr(I).rddata := TO_MEM_SDATA(g_filler_value);  -- replace by filler data
       END IF;
     END LOOP;
+    IF g_use_mm_output THEN
+      mm_cipo_arr <= v_rd_cipo_arr;  -- output via MM interface
+    ELSE
+      dp_cipo_arr <= v_rd_cipo_arr;  -- output via DP streaming interface
+    END IF;
 
     -- next state
     nxt_r <= v;
@@ -279,6 +297,7 @@ BEGIN
 
 
   -- Pipelining
+  -- . input
   u_in_sosi_arr_p : ENTITY work.dp_pipeline_arr
   GENERIC MAP (
     g_nof_streams => g_nof_streams,
@@ -293,6 +312,7 @@ BEGIN
     src_out_arr  => in_sosi_arr_p
   );
 
+  -- . read RAM
   rd_copi_p <= nxt_r.rd_copi WHEN g_rd_latency = 1 ELSE r.rd_copi;
 
 END rtl;