diff --git a/applications/arts/libraries/arts_tab_beamformer/src/vhdl/arts_tab_beamformer.vhd b/applications/arts/libraries/arts_tab_beamformer/src/vhdl/arts_tab_beamformer.vhd
index 14a7e9db5e038b7634f9104406a5339d098a72cb..643f6c7c1f308152967948ab261efbeb42f2ed95 100644
--- a/applications/arts/libraries/arts_tab_beamformer/src/vhdl/arts_tab_beamformer.vhd
+++ b/applications/arts/libraries/arts_tab_beamformer/src/vhdl/arts_tab_beamformer.vhd
@@ -182,7 +182,9 @@ BEGIN
       g_weights_w             => g_weights_w,
       g_weights_file          => sel_a_b(g_weights_file="UNUSED", "UNUSED", g_weights_file & "_" & NATURAL'IMAGE(i)),
       g_weights_ram_dual_port => g_weights_ram_dual_port,
-      g_mult_variant          => g_mult_variant
+      g_mult_variant          => g_mult_variant,
+      g_use_weight_ram        => FALSE,
+      g_use_weight_reg        => TRUE -- Use registers instead of RAM
     )
     PORT MAP (
       dp_clk      => dp_clk,
@@ -198,9 +200,7 @@ BEGIN
   
       snk_in_arr  => snk_in_arr,
       src_out     => beamformer_src_out_arr(i)
-    );
-
-    
+    );  
 
     ------------------------------------------------------------------------------
     -- Requantize the outputs to the desired bit width
diff --git a/libraries/dsp/beamformer/src/vhdl/beamformer.vhd b/libraries/dsp/beamformer/src/vhdl/beamformer.vhd
index c7e82a0a11f487cc2f24dc11544b35cddcf35a48..444ff319504ed8961e8a269e002af7f19b4d4526 100644
--- a/libraries/dsp/beamformer/src/vhdl/beamformer.vhd
+++ b/libraries/dsp/beamformer/src/vhdl/beamformer.vhd
@@ -35,6 +35,11 @@
 --   . Input/output reordering and quantization are also application 
 --     specific and should be done in the wrapper.
 -- . The input array snk_in_arr must be synchronous.
+-- . If RAM is not desired, set g_use_weight_reg or g_use_weight_arr.
+--   . The addressing remains the same (done using weight_addr) but the
+--     source of the weights can be selected that way.
+-- . Note: Only set 1 of g_use_weight_ram, g_use_weight_reg, g_use_weight_arr
+--         to TRUE.
 
 LIBRARY IEEE;
 USE IEEE.std_logic_1164.ALL;
@@ -53,8 +58,11 @@ ENTITY beamformer IS
     g_nof_weights           : NATURAL;
     g_weights_w             : NATURAL := 16;
     g_weights_file          : STRING  := "hex/beamformer_weights";
-    g_weights_ram_dual_port : BOOLEAN := TRUE;
-    g_mult_variant          : STRING := "IP"
+    g_weights_ram_dual_port : BOOLEAN := TRUE; --FIXME rename this to readback_weights
+    g_mult_variant          : STRING := "IP";
+    g_use_weight_ram        : BOOLEAN := TRUE;  -- Use weights RAM (default)
+    g_use_weight_reg        : BOOLEAN := FALSE; -- Use a weights register instead of RAM
+    g_use_weight_arr        : BOOLEAN := FALSE  -- Use the input weights array instead of weights RAM of Registers
   );
   PORT (
     dp_clk      : IN  STD_LOGIC; 
@@ -63,10 +71,11 @@ ENTITY beamformer IS
     mm_clk      : IN  STD_LOGIC;
     mm_rst      : IN  STD_LOGIC;
 
-    ram_mosi    : IN  t_mem_mosi := c_mem_mosi_rst; -- MM interface to upload weights to RAM
+    ram_mosi    : IN  t_mem_mosi := c_mem_mosi_rst; -- MM interface to upload weights to RAM or REG
     ram_miso    : OUT t_mem_miso;
 
-    weight_addr : IN  STD_LOGIC_VECTOR(ceil_log2(g_nof_weights)-1 DOWNTO 0); -- Weight RAM address
+    weight_addr : IN  STD_LOGIC_VECTOR(ceil_log2(g_nof_weights)-1 DOWNTO 0); -- Weight address
+    weight_arr  : IN  t_slv_32_arr(g_nof_inputs-1 DOWNTO 0) := (OTHERS=>(OTHERS=>'0'));
    
     snk_in_arr  : IN  t_dp_sosi_arr(g_nof_inputs-1 DOWNTO 0); -- All streams must be synchronous
     src_out     : OUT t_dp_sosi
@@ -97,6 +106,20 @@ ARCHITECTURE str OF beamformer IS
  
   SIGNAL common_ram_crw_crw_src_out_arr : t_dp_sosi_arr(g_nof_inputs-1 DOWNTO 0);
 
+  ------------------------------------------------------------------------------
+  -- Weights REG
+  ------------------------------------------------------------------------------
+  TYPE t_common_reg_r_d_dc_out_slv_arr IS ARRAY(g_nof_inputs-1 DOWNTO 0) OF STD_LOGIC_VECTOR(c_common_ram_crw_crw_ram.dat_w*c_common_ram_crw_crw_ram.nof_dat-1 DOWNTO 0);
+  TYPE t_common_reg_r_d_dc_out_arr     IS ARRAY(g_nof_weights-1 DOWNTO 0) OF STD_LOGIC_VECTOR(c_common_ram_crw_crw_ram.dat_w-1 DOWNTO 0);
+  TYPE t_common_reg_r_d_dc_out_2arr    IS ARRAY(g_nof_inputs-1 DOWNTO 0) OF t_common_reg_r_d_dc_out_arr;
+
+  SIGNAL common_reg_r_w_dc_in_reg_slv_arr  : t_common_reg_r_d_dc_out_slv_arr;
+  SIGNAL common_reg_r_w_dc_out_reg_slv_arr : t_common_reg_r_d_dc_out_slv_arr;
+  SIGNAL common_reg_r_w_dc_out_reg_2arr    : t_common_reg_r_d_dc_out_2arr;
+
+  -- The register outputs the weight 1 cycle too soon relative to the RAM, so register the address
+  SIGNAL reg_weight_addr :  STD_LOGIC_VECTOR(ceil_log2(g_nof_weights)-1 DOWNTO 0);
+
   ------------------------------------------------------------------------------
   -- Pipeline 
   ------------------------------------------------------------------------------
@@ -113,50 +136,101 @@ BEGIN
   ------------------------------------------------------------------------------
   -- Weights RAM
   ------------------------------------------------------------------------------
-  gen_common_ram_crw_crw : FOR i IN 0 TO g_nof_inputs-1 GENERATE
-
-    -- Read request on every incoming valid cycle
-    common_ram_crw_crw_rd_en_b_arr(i) <= snk_in_arr(i).valid;
-
-    -- Use entity input for read address
-    common_ram_crw_crw_adr_b_arr(i) <= weight_addr;
-
-    -- Dual clock RAM
-    u_common_ram_crw_crw : ENTITY common_lib.common_ram_crw_crw
-    GENERIC MAP (
-      g_technology     => g_technology,
-      g_ram            => c_common_ram_crw_crw_ram,
-      g_init_file      => sel_a_b(g_weights_file="UNUSED", "UNUSED", g_weights_file & "_" & NATURAL'IMAGE(i) & ".hex"),
-      g_true_dual_port => g_weights_ram_dual_port
-    )
-    PORT MAP (
-      rst_a     => mm_rst,
-      clk_a     => mm_clk,
-      wr_en_a   => ram_mosi_arr(i).wr,
-      wr_dat_a  => ram_mosi_arr(i).wrdata(c_common_ram_crw_crw_ram.dat_w -1 DOWNTO 0),
-      adr_a     => ram_mosi_arr(i).address(c_common_ram_crw_crw_ram.adr_w-1 DOWNTO 0),
-      rd_en_a   => ram_mosi_arr(i).rd,
-      rd_dat_a  => ram_miso_arr(i).rddata(c_common_ram_crw_crw_ram.dat_w -1 DOWNTO 0),
-      rd_val_a  => ram_miso_arr(i).rdval,
-
-      rst_b     => dp_rst,
-      clk_b     => dp_clk,
-      wr_en_b   => '0',
-      wr_dat_b  => (OTHERS =>'0'),
-      adr_b     => common_ram_crw_crw_adr_b_arr(i),
-      rd_en_b   => common_ram_crw_crw_rd_en_b_arr(i),
-      rd_dat_b  => common_ram_crw_crw_rd_dat_b_arr(i),
-      rd_val_b  => common_ram_crw_crw_rd_val_b_arr(i)
-    );  
-
-    -- RAM output rewired to SOSI array
-    common_ram_crw_crw_src_out_arr(i).re <= RESIZE_DP_DSP_DATA(common_ram_crw_crw_rd_dat_b_arr(i)(  g_weights_w-1 DOWNTO 0));
-    common_ram_crw_crw_src_out_arr(i).im <= RESIZE_DP_DSP_DATA(common_ram_crw_crw_rd_dat_b_arr(i)(2*g_weights_w-1 DOWNTO g_weights_w));
-    common_ram_crw_crw_src_out_arr(i).valid <= common_ram_crw_crw_rd_val_b_arr(i);
+  gen_weight_ram : IF g_use_weight_ram = TRUE GENERATE
+    gen_common_ram_crw_crw : FOR i IN 0 TO g_nof_inputs-1 GENERATE
+  
+      -- Read request on every incoming valid cycle
+      common_ram_crw_crw_rd_en_b_arr(i) <= snk_in_arr(i).valid;
+  
+      -- Use entity input for read address
+      common_ram_crw_crw_adr_b_arr(i) <= weight_addr;
+  
+      -- Dual clock RAM
+      u_common_ram_crw_crw : ENTITY common_lib.common_ram_crw_crw
+      GENERIC MAP (
+        g_technology     => g_technology,
+        g_ram            => c_common_ram_crw_crw_ram,
+        g_init_file      => sel_a_b(g_weights_file="UNUSED", "UNUSED", g_weights_file & "_" & NATURAL'IMAGE(i) & ".hex"),
+        g_true_dual_port => g_weights_ram_dual_port
+      )
+      PORT MAP (
+        rst_a     => mm_rst,
+        clk_a     => mm_clk,
+        wr_en_a   => ram_mosi_arr(i).wr,
+        wr_dat_a  => ram_mosi_arr(i).wrdata(c_common_ram_crw_crw_ram.dat_w -1 DOWNTO 0),
+        adr_a     => ram_mosi_arr(i).address(c_common_ram_crw_crw_ram.adr_w-1 DOWNTO 0),
+        rd_en_a   => ram_mosi_arr(i).rd,
+        rd_dat_a  => ram_miso_arr(i).rddata(c_common_ram_crw_crw_ram.dat_w -1 DOWNTO 0),
+        rd_val_a  => ram_miso_arr(i).rdval,
+  
+        rst_b     => dp_rst,
+        clk_b     => dp_clk,
+        wr_en_b   => '0',
+        wr_dat_b  => (OTHERS =>'0'),
+        adr_b     => common_ram_crw_crw_adr_b_arr(i),
+        rd_en_b   => common_ram_crw_crw_rd_en_b_arr(i),
+        rd_dat_b  => common_ram_crw_crw_rd_dat_b_arr(i),
+        rd_val_b  => common_ram_crw_crw_rd_val_b_arr(i)
+      );  
+  
+      -- RAM output rewired to SOSI array
+      common_ram_crw_crw_src_out_arr(i).re <= RESIZE_DP_DSP_DATA(common_ram_crw_crw_rd_dat_b_arr(i)(  g_weights_w-1 DOWNTO 0));
+      common_ram_crw_crw_src_out_arr(i).im <= RESIZE_DP_DSP_DATA(common_ram_crw_crw_rd_dat_b_arr(i)(2*g_weights_w-1 DOWNTO g_weights_w));
+      common_ram_crw_crw_src_out_arr(i).valid <= common_ram_crw_crw_rd_val_b_arr(i);
+  
+    END GENERATE;
+  END GENERATE;
+
+  ------------------------------------------------------------------------------
+  -- Weights register
+  ------------------------------------------------------------------------------
+  gen_weight_reg : IF g_use_weight_reg = TRUE GENERATE
+    gen_common_reg_r_w_dc : FOR i IN 0 TO g_nof_inputs-1 GENERATE
+  
+      u_common_reg_r_w_dc : ENTITY common_lib.common_reg_r_w_dc
+      GENERIC MAP (
+        g_cross_clock_domain => TRUE,
+        g_readback           => g_weights_ram_dual_port,
+        g_reg                => c_common_ram_crw_crw_ram
+      )
+      PORT MAP (
+        -- Clocks and reset
+        mm_rst         => mm_rst,
+        mm_clk         => mm_clk,
+        st_rst         => dp_rst,
+        st_clk         => dp_clk,
+    
+        -- Memory Mapped Slave in mm_clk domain
+        sla_in         => ram_mosi_arr(i),
+        sla_out        => ram_miso_arr(i),
+    
+        -- MM registers in st_clk domain
+        reg_wr_arr     => open,
+        reg_rd_arr     => open,
+        in_reg         => common_reg_r_w_dc_in_reg_slv_arr(i),  
+        out_reg        => common_reg_r_w_dc_out_reg_slv_arr(i)
+      );
+  
+      -- Rewire the concatenated SLV array to something we can index properly [g_nof_inputs][g_nof_weights]
+      gen_common_reg_r_w_dc_out_reg_2arr : FOR j IN 0 TO g_nof_weights-1 GENERATE
+        common_reg_r_w_dc_out_reg_2arr(i)(j) <= common_reg_r_w_dc_out_reg_slv_arr(i)(j*2*g_weights_w+2*g_weights_w-1 DOWNTO j*2*g_weights_w);
+      END GENERATE;
+
+      -- REG output rewired to 'RAM' SOSI array. The weight is indexed by reg_weight_addr. 
+      common_ram_crw_crw_src_out_arr(i).re <= RESIZE_DP_DSP_DATA(common_reg_r_w_dc_out_reg_2arr(i)(TO_UINT(reg_weight_addr))(  g_weights_w-1 DOWNTO 0));
+      common_ram_crw_crw_src_out_arr(i).im <= RESIZE_DP_DSP_DATA(common_reg_r_w_dc_out_reg_2arr(i)(TO_UINT(reg_weight_addr))(2*g_weights_w-1 DOWNTO g_weights_w));
+
+    END GENERATE; 
+
+    gen_readback: IF g_weights_ram_dual_port=TRUE GENERATE
+      common_reg_r_w_dc_in_reg_slv_arr <= common_reg_r_w_dc_out_reg_slv_arr;
+    END GENERATE;
 
   END GENERATE; 
 
-  -- Combine the individual RAM MM buses into one
+  ------------------------------------------------------------------------------
+  -- Combine the individual MM buses into one
+  ------------------------------------------------------------------------------
   u_common_mem_mux : ENTITY common_lib.common_mem_mux
   GENERIC MAP (    
     g_nof_mosi    => g_nof_inputs,
@@ -224,4 +298,16 @@ BEGIN
     src_out    => src_out
   );
 
+  ------------------------------------------------------------------------------
+  -- Registers
+  ------------------------------------------------------------------------------
+  p_clk : PROCESS(dp_rst, dp_clk)
+  BEGIN
+    IF dp_rst='1' THEN
+      reg_weight_addr <= (OTHERS=>'0');
+    ELSIF rising_edge(dp_clk) THEN
+      reg_weight_addr <= weight_addr;
+    END IF;
+  END PROCESS;
+
 END str; 
diff --git a/libraries/dsp/beamformer/tb/vhdl/tb_beamformer.vhd b/libraries/dsp/beamformer/tb/vhdl/tb_beamformer.vhd
index 8f59580bdd65b88c8f06041a55d11e119c191f9a..d612047441006d80726c172ebdcf332ec0c790cb 100644
--- a/libraries/dsp/beamformer/tb/vhdl/tb_beamformer.vhd
+++ b/libraries/dsp/beamformer/tb/vhdl/tb_beamformer.vhd
@@ -42,11 +42,13 @@ USE mm_lib.mm_file_pkg.ALL;
 
 ENTITY tb_beamformer IS
   GENERIC (
-    g_tb_index     : NATURAL := 0;      -- use different index to avoid MM file conflict in multi tb
-    --g_technology   : NATURAL := c_tech_select_default;
-    g_nof_inputs   : NATURAL := 2;
-    g_nof_weights  : NATURAL := 32;
-    g_data_w       : NATURAL := 8   --8b complex input data
+    g_tb_index       : NATURAL := 0;      -- use different index to avoid MM file conflict in multi tb
+    --g_technology     : NATURAL := c_tech_select_default;
+    g_nof_inputs     : NATURAL := 2;
+    g_nof_weights    : NATURAL := 32;
+    g_data_w         : NATURAL := 8;  --8b complex input data
+    g_use_weight_ram : BOOLEAN := FALSE;
+    g_use_weight_reg : BOOLEAN := TRUE
   );
 END tb_beamformer;
 
@@ -281,9 +283,9 @@ ARCHITECTURE tb OF tb_beamformer IS
 
     proc_common_wait_some_cycles(dp_clk, 1);
     IF OK = '0' THEN
-      REPORT "TEST WENT WRONG.";
+      REPORT "TEST FAILED.";
     ELSE
-      REPORT "Test succesfull ended.";
+      REPORT "Test passed.";
     END IF;
 
     tb_end <= '1';  -- end test
@@ -302,11 +304,13 @@ ARCHITECTURE tb OF tb_beamformer IS
   u_beamformer : ENTITY work.beamformer
   GENERIC MAP (
 
-    g_technology   => c_tech_select_default,
-    g_nof_inputs   => g_nof_inputs,
-    g_nof_weights  => g_nof_weights,
-    g_data_w       => g_data_w,
-    g_weights_file => "UNUSED"
+    g_technology     => c_tech_select_default,
+    g_nof_inputs     => g_nof_inputs,
+    g_nof_weights    => g_nof_weights,
+    g_data_w         => g_data_w,
+    g_weights_file   => "UNUSED",
+    g_use_weight_ram => g_use_weight_ram,
+    g_use_weight_reg => g_use_weight_reg
   )
   PORT MAP (
     dp_clk      => dp_clk,
diff --git a/libraries/dsp/beamformer/tb/vhdl/tb_tb_beamformer.vhd b/libraries/dsp/beamformer/tb/vhdl/tb_tb_beamformer.vhd
index 0b2aabd2627180f91c34014c4b5c5c6caf35f208..465906a8f32b102a06a6e02c5497427b137740a7 100644
--- a/libraries/dsp/beamformer/tb/vhdl/tb_tb_beamformer.vhd
+++ b/libraries/dsp/beamformer/tb/vhdl/tb_tb_beamformer.vhd
@@ -42,16 +42,23 @@ BEGIN
 --   > Testbenches are self-checking
 
 --    
---  g_tb_index     : NATURAL := 0;      -- use different index to avoid MM file conflict in multi tb
+--  g_tb_index       : NATURAL := 0;      -- use different index to avoid MM file conflict in multi tb
 --  --g_technology   : NATURAL := c_tech_select_default;
---  g_nof_inputs   : NATURAL := 2;
---  g_nof_weights  : NATURAL := 32;
---  g_data_w       : NATURAL := 8   --8b complex input data
+--  g_nof_inputs     : NATURAL := 2;
+--  g_nof_weights    : NATURAL := 32;
+--  g_data_w         : NATURAL := 8;   --8b complex input data
+--  g_use_weight_ram : BOOLEAN := FALSE;
+--  g_use_weight_reg : BOOLEAN := TRUE
 --
 
 -- do test for different number of inputs
-sim_i1_beamformer   : ENTITY work.tb_beamformer GENERIC MAP (1, 1, 32, 8);
-sim_i2_beamformer   : ENTITY work.tb_beamformer GENERIC MAP (2, 2, 32, 8);
-sim_i32_beamformer   : ENTITY work.tb_beamformer GENERIC MAP (3, 32, 32, 8);
+-- . (weights in RAM)
+sim_i01_ram_beamformer : ENTITY work.tb_beamformer GENERIC MAP (1,  1, 32, 8, TRUE, FALSE);
+sim_i02_ram_beamformer : ENTITY work.tb_beamformer GENERIC MAP (2,  2, 32, 8, TRUE, FALSE);
+sim_i32_ram_beamformer : ENTITY work.tb_beamformer GENERIC MAP (3, 32, 32, 8, TRUE, FALSE);
+-- . (weights in Registers)
+sim_i01_reg_beamformer : ENTITY work.tb_beamformer GENERIC MAP (4,  1, 32, 8, FALSE, TRUE);
+sim_i02_reg_beamformer : ENTITY work.tb_beamformer GENERIC MAP (5,  2, 32, 8, FALSE, TRUE);
+sim_i32_reg_beamformer : ENTITY work.tb_beamformer GENERIC MAP (6, 32, 32, 8, FALSE, TRUE);
 
-END tb;
\ No newline at end of file
+END tb;