diff --git a/libraries/base/dp/hdllib.cfg b/libraries/base/dp/hdllib.cfg
index 1d3533b137fdbd8d27f60afb547b4febd902487b..26e882b63228ecd4ab4c86bd1150e28e84695f03 100644
--- a/libraries/base/dp/hdllib.cfg
+++ b/libraries/base/dp/hdllib.cfg
@@ -110,6 +110,7 @@ synth_files =
     src/vhdl/dp_bsn_align.vhd
     src/vhdl/dp_bsn_align_reg.vhd
     src/vhdl/mms_dp_bsn_align.vhd
+    src/vhdl/dp_bsn_align_buffer.vhd
     src/vhdl/dp_bsn_align_v2.vhd
     src/vhdl/mmp_dp_bsn_align_v2.vhd
     src/vhdl/dp_frame_rd.vhd
diff --git a/libraries/base/dp/src/vhdl/dp_bsn_align_buffer.vhd b/libraries/base/dp/src/vhdl/dp_bsn_align_buffer.vhd
new file mode 100644
index 0000000000000000000000000000000000000000..3bff0bcf7e754abce1f79298d09c9c021ce7f742
--- /dev/null
+++ b/libraries/base/dp/src/vhdl/dp_bsn_align_buffer.vhd
@@ -0,0 +1,114 @@
+-------------------------------------------------------------------------------
+--
+-- Copyright (C) 2024
+-- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/>
+-- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
+--
+-- This program is free software: you can redistribute it and/or modify
+-- it under the terms of the GNU General Public License as published by
+-- the Free Software Foundation, either version 3 of the License, or
+-- (at your option) any later version.
+--
+-- This program is distributed in the hope that it will be useful,
+-- but WITHOUT ANY WARRANTY; without even the implied warranty of
+-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+-- GNU General Public License for more details.
+--
+-- You should have received a copy of the GNU General Public License
+-- along with this program.  If not, see <http://www.gnu.org/licenses/>.
+--
+-------------------------------------------------------------------------------
+
+-- Author: E. Kooistra
+-- Purpose: Circular buffer for dp_bsn_align_v2
+-- Description:
+-- . Default use same buffer size for all streams.
+-- . Optionally use reduced size for the remote stream to save block RAM. The
+--   reduced size is possible when there is only one remote input (so
+--   g_nof_streams = 2) and that input has been passed along a series of
+--   bsn aligners (so g_nof_aligners_max > 1). The local input requires the
+--   default buffer size (= g_ram_buf), but the remote input then can use a
+--   reduced buffer size (g_remote_ram_buf).
+-- . The buffer sizes have to be a power of two number of blocks.
+
+library IEEE, common_lib;
+use IEEE.std_logic_1164.all;
+use common_lib.common_mem_pkg.all;
+
+entity dp_bsn_align_buffer is
+  generic (
+    g_nof_streams      : natural;
+    g_nof_aligners_max : natural := 1;
+    -- Default RAM buffer size for all streams
+    g_ram_buf          : t_c_mem;
+    -- Optional smaller size for remote stream when c_use_reduced_remote_buffer = true
+    g_remote_ram_buf   : t_c_mem := c_mem_ram
+  );
+  port (
+    dp_rst       : in  std_logic := '0';
+    dp_clk       : in  std_logic;
+    wr_copi_arr  : in  t_mem_copi_arr(g_nof_streams - 1 downto 0);
+    rd_copi      : in  t_mem_copi;
+    rd_cipo_arr  : out t_mem_cipo_arr(g_nof_streams - 1 downto 0)
+  );
+end dp_bsn_align_buffer;
+
+architecture str of dp_bsn_align_buffer is
+  constant c_use_reduced_remote_buffer : boolean := g_nof_streams = 2 and g_nof_aligners_max > 1;
+begin
+  use_uniform_data_buffer : if c_use_reduced_remote_buffer = false generate
+    gen_data_buffer : for I in 0 to g_nof_streams - 1 generate
+      u_data_buffer : entity common_lib.common_ram_r_w
+      generic map (
+        g_ram     => g_ram_buf
+      )
+      port map (
+        rst       => dp_rst,
+        clk       => dp_clk,
+        wr_en     => wr_copi_arr(I).wr,
+        wr_adr    => wr_copi_arr(I).address(g_ram_buf.adr_w - 1 downto 0),
+        wr_dat    => wr_copi_arr(I).wrdata(g_ram_buf.dat_w - 1 downto 0),
+        rd_en     => rd_copi.rd,
+        rd_adr    => rd_copi.address(g_ram_buf.adr_w - 1 downto 0),
+        rd_dat    => rd_cipo_arr(I).rddata(g_ram_buf.dat_w - 1 downto 0),
+        rd_val    => rd_cipo_arr(I).rdval
+      );
+    end generate;
+  end generate;
+
+  use_reduced_data_buffer : if c_use_reduced_remote_buffer = true generate
+    -- Local data buffer has default size
+    u_local_data_buffer : entity common_lib.common_ram_r_w
+    generic map (
+      g_ram     => g_ram_buf
+    )
+    port map (
+      rst       => dp_rst,
+      clk       => dp_clk,
+      wr_en     => wr_copi_arr(0).wr,
+      wr_adr    => wr_copi_arr(0).address(g_ram_buf.adr_w - 1 downto 0),
+      wr_dat    => wr_copi_arr(0).wrdata(g_ram_buf.dat_w - 1 downto 0),
+      rd_en     => rd_copi.rd,
+      rd_adr    => rd_copi.address(g_ram_buf.adr_w - 1 downto 0),
+      rd_dat    => rd_cipo_arr(0).rddata(g_ram_buf.dat_w - 1 downto 0),
+      rd_val    => rd_cipo_arr(0).rdval
+    );
+
+    -- Remote data buffer has reduced size, to save block RAM
+    u_remote_data_buffer : entity common_lib.common_ram_r_w
+    generic map (
+      g_ram     => g_remote_ram_buf
+    )
+    port map (
+      rst       => dp_rst,
+      clk       => dp_clk,
+      wr_en     => wr_copi_arr(1).wr,
+      wr_adr    => wr_copi_arr(1).address(g_remote_ram_buf.adr_w - 1 downto 0),
+      wr_dat    => wr_copi_arr(1).wrdata(g_remote_ram_buf.dat_w - 1 downto 0),
+      rd_en     => rd_copi.rd,
+      rd_adr    => rd_copi.address(g_remote_ram_buf.adr_w - 1 downto 0),
+      rd_dat    => rd_cipo_arr(1).rddata(g_remote_ram_buf.dat_w - 1 downto 0),
+      rd_val    => rd_cipo_arr(1).rdval
+    );
+  end generate;
+end str;
diff --git a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
index 61db3174e6f4f846ff445cecef3a07421177f3b2..1db302c2c260d6961ad03118b44550c0afb7e6fd 100644
--- a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
+++ b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
@@ -168,6 +168,7 @@ end dp_bsn_align_v2;
 
 architecture rtl of dp_bsn_align_v2 is
   -- Circular buffer per stream, size is next power of two that fits
+  -- . default buffer size for uniform buffer
   constant c_buffer_nof_blocks : natural := sel_a_b(g_nof_aligners_max = 1,
            true_log_pow2(1 + g_bsn_latency_max),
            true_log_pow2(1 + g_bsn_latency_max * (g_nof_aligners_max - 1) + g_bsn_latency_first_node));
@@ -179,6 +180,15 @@ architecture rtl of dp_bsn_align_v2 is
                                           nof_dat  => c_ram_size,
                                           init_sl  => '0');
 
+  -- . reduced buffer size for remote input
+  constant c_remote_buffer_nof_blocks  : natural := true_log_pow2(1 + g_bsn_latency_max);
+  constant c_remote_ram_size           : natural := c_remote_buffer_nof_blocks * g_block_size;
+  constant c_remote_ram_buf            : t_c_mem := (latency  => 1,
+                                                     adr_w    => ceil_log2(c_remote_ram_size),
+                                                     dat_w    => g_data_w,
+                                                     nof_dat  => c_remote_ram_size,
+                                                     init_sl  => '0');
+
   -- Use +1 to ensure that g_block_size that is power of two also fits in c_block_size_slv
   constant c_block_size_w   : natural := ceil_log2(g_block_size + 1);
   constant c_block_size_slv : std_logic_vector(c_block_size_w - 1 downto 0) := TO_UVEC(g_block_size, c_block_size_w);
@@ -500,23 +510,22 @@ begin
   -- Circular buffers
   ------------------------------------------------------------------------------
 
-  gen_data_buffer : for I in 0 to g_nof_streams - 1 generate
-    u_data_buffer : entity common_lib.common_ram_r_w
-    generic map (
-      g_ram     => c_ram_buf
-    )
-    port map (
-      rst       => dp_rst,
-      clk       => dp_clk,
-      wr_en     => r.wr_copi_arr(I).wr,
-      wr_adr    => r.wr_copi_arr(I).address(c_ram_buf.adr_w - 1 downto 0),
-      wr_dat    => r.wr_copi_arr(I).wrdata(c_ram_buf.dat_w - 1 downto 0),
-      rd_en     => rd_copi.rd,
-      rd_adr    => rd_copi.address(c_ram_buf.adr_w - 1 downto 0),
-      rd_dat    => rd_cipo_arr(I).rddata(c_ram_buf.dat_w - 1 downto 0),
-      rd_val    => rd_cipo_arr(I).rdval
-    );
-  end generate;
+  u_circular_buffer : entity work.dp_bsn_align_buffer
+  generic map (
+    g_nof_streams      => g_nof_streams,
+    g_nof_aligners_max => g_nof_aligners_max,
+    -- Default RAM buffer size for all streams
+    g_ram_buf          => c_ram_buf,
+    -- Optional smaller size for remote stream when g_nof_streams = 2 and g_nof_aligners_max > 1
+    g_remote_ram_buf   => c_remote_ram_buf
+  )
+  port map (
+    dp_rst       => dp_rst,
+    dp_clk       => dp_clk,
+    wr_copi_arr  => r.wr_copi_arr,
+    rd_copi      => rd_copi,
+    rd_cipo_arr  => rd_cipo_arr
+  );
 
   ------------------------------------------------------------------------------
   -- MM to streaming DP