diff --git a/libraries/dsp/correlator/src/vhdl/corr_accumulator.vhd b/libraries/dsp/correlator/src/vhdl/corr_accumulator.vhd
index b0bd1d89712361b6c3fb4a1c09a53766c11e6f69..7954ecb0ee58808540e0fde2ad8ac1f4f745d113 100644
--- a/libraries/dsp/correlator/src/vhdl/corr_accumulator.vhd
+++ b/libraries/dsp/correlator/src/vhdl/corr_accumulator.vhd
@@ -34,10 +34,10 @@ USE dp_lib.dp_stream_pkg.ALL;
 
 ENTITY corr_accumulator IS
   GENERIC (
-    g_nof_inputs        : NATURAL;
-    g_nof_acc_per_input : NATURAL;
-    g_nof_words_to_acc  : NATURAL;
-    g_data_w            : NATURAL
+    g_nof_inputs        : NATURAL; -- Number of input streams
+    g_nof_acc_per_input : NATURAL; -- Number of accumulators to keep per input stream
+    g_nof_words_to_acc  : NATURAL; -- Number of words to accumulate
+    g_data_w            : NATURAL  -- Complex input data width
    ); 
   PORT (
     rst            : IN  STD_LOGIC;
@@ -50,15 +50,12 @@ END corr_accumulator;
 
 ARCHITECTURE str OF corr_accumulator IS
 
-  -- Help constant so other constants can be calculated if g_nof_words_to_acc<1
---  CONSTANT g_nof_words_to_acc        : NATURAL := sel_a_b(g_nof_words_to_acc>0, g_nof_words_to_acc, 3);
-
-  CONSTANT c_shiftram_io_delay        : NATURAL := 4; -- common_shiftram data_in takes 4 cycles to emerge as data_out
-
-  CONSTANT c_shiftram_data_w          : NATURAL := 32; --2*(g_data_w+1);
+  -- Complex accumulator data width: wide enough to support g_nof_words_to_acc accumulations
+  CONSTANT c_acc_data_w               : NATURAL := ceil_log2(g_nof_words_to_acc*(pow2(g_data_w)-1));
 
   -- c_shiftram_delay is such that common_shiftram output aligns exactly with snk_in_arr. Functionally this 
   -- means we aligned the current word to the corresponding previous word at the adder inputs.
+  CONSTANT c_shiftram_io_delay        : NATURAL := 4; -- common_shiftram data_in takes 4 cycles to emerge as data_out
   CONSTANT c_shiftram_delay           : NATURAL := g_nof_acc_per_input-c_shiftram_io_delay-1;
   CONSTANT c_shift_w                  : NATURAL := ceil_log2(g_nof_acc_per_input);
   CONSTANT c_common_shiftram_shift_in : STD_LOGIC_VECTOR(c_shift_w-1 DOWNTO 0) := TO_UVEC(c_shiftram_delay, c_shift_w);
@@ -74,81 +71,76 @@ ARCHITECTURE str OF corr_accumulator IS
 
 BEGIN
 
---  gen_accumulator: IF g_nof_words_to_acc>0 GENERATE
-    -----------------------------------------------------------------------------
-    -- Adder inputs: current snk_in_arr + corresponding previous running sum
-    -- from shiftram
-    -----------------------------------------------------------------------------
-    gen_adder_inputs : FOR i IN 0 TO g_nof_inputs-1 GENERATE
-      corr_adder_snk_in_2arr_2(i)(0) <= snk_in_arr(i);
-      corr_adder_snk_in_2arr_2(i)(1).valid <= common_shiftram_src_out_arr(i).valid;
-      corr_adder_snk_in_2arr_2(i)(1).re(16-1 DOWNTO 0) <= common_shiftram_src_out_arr(i).data(c_shiftram_data_w-1   DOWNTO c_shiftram_data_w/2);
-      corr_adder_snk_in_2arr_2(i)(1).im(16-1 DOWNTO 0) <= common_shiftram_src_out_arr(i).data(c_shiftram_data_w/2-1 DOWNTO 0);
-    END GENERATE;
-  
-    -----------------------------------------------------------------------------
-    -- Complex adder stage
-    -----------------------------------------------------------------------------
-    u_corr_adder : ENTITY work.corr_adder
+  -----------------------------------------------------------------------------
+  -- Adder inputs: current snk_in_arr + corresponding previous running sum
+  -- from shiftram
+  -----------------------------------------------------------------------------
+  gen_adder_inputs : FOR i IN 0 TO g_nof_inputs-1 GENERATE
+    -- Incoming data width: g_data_w interpreted as c_acc_data_w
+    corr_adder_snk_in_2arr_2(i)(0) <= snk_in_arr(i);
+    -- Accumulator data width: c_acc_data_w    
+    corr_adder_snk_in_2arr_2(i)(1).valid                       <= common_shiftram_src_out_arr(i).valid;
+    corr_adder_snk_in_2arr_2(i)(1).re(c_acc_data_w-1 DOWNTO 0) <= common_shiftram_src_out_arr(i).data(2*c_acc_data_w-1 DOWNTO c_acc_data_w);
+    corr_adder_snk_in_2arr_2(i)(1).im(c_acc_data_w-1 DOWNTO 0) <= common_shiftram_src_out_arr(i).data(  c_acc_data_w-1 DOWNTO 0);
+  END GENERATE;
+
+  -----------------------------------------------------------------------------
+  -- Complex adder stage
+  -----------------------------------------------------------------------------
+  u_corr_adder : ENTITY work.corr_adder
+  GENERIC MAP (
+    g_nof_inputs => g_nof_inputs,
+    g_data_w     => c_acc_data_w -- Output becomes c_acc_data_w+1 but we know we won't need that extra bit
+  )
+  PORT MAP (
+    clk            => clk,
+    rst            => rst,
+
+    snk_in_2arr_2  => corr_adder_snk_in_2arr_2,
+    src_out_arr    => corr_adder_src_out_arr
+  );  
+
+  -----------------------------------------------------------------------------
+  -- Write the current sum to RAM; RAM outputs delayed running sums that align
+  -- at the adder inputs:
+  -- . common_shiftram_src_out_arr = delayed corr_adder_src_out_arr
+  -----------------------------------------------------------------------------
+  gen_concat_complex : FOR i IN 0 TO g_nof_inputs-1 GENERATE
+    -- Concatenate real&imaginary parts
+    common_shiftram_snk_in_arr(i).data(2*c_acc_data_w-1 DOWNTO c_acc_data_w) <= corr_adder_src_out_arr(i).re(c_acc_data_w-1 DOWNTO 0);
+    common_shiftram_snk_in_arr(i).data(  c_acc_data_w-1 DOWNTO 0)            <= corr_adder_src_out_arr(i).im(c_acc_data_w-1 DOWNTO 0);
+    common_shiftram_snk_in_arr(i).valid                                      <= corr_adder_src_out_arr(i).valid;
+  END GENERATE;
+
+  gen_common_shiftram : FOR i IN 0 TO g_nof_inputs-1 GENERATE
+    u_common_shiftram : ENTITY common_lib.common_shiftram
     GENERIC MAP (
-      g_nof_inputs => g_nof_inputs,
-      g_data_w     => 16 --output = 17 bits!
+      g_data_w                           => 2*c_acc_data_w,
+      g_nof_words                        => pow2(ceil_log2(g_nof_acc_per_input)),
+      g_output_invalid_during_shift_incr => TRUE
     )
     PORT MAP (
-      clk            => clk,
       rst            => rst,
-  
-      snk_in_2arr_2  => corr_adder_snk_in_2arr_2,
-      src_out_arr    => corr_adder_src_out_arr
-    );  
-  
-    -----------------------------------------------------------------------------
-    -- Write the current sum to RAM; RAM outputs delayed running sums that align
-    -- at the adder inputs:
-    -- . common_shiftram_src_out_arr = delayed corr_adder_src_out_arr
-    -----------------------------------------------------------------------------
-    gen_concat_complex : FOR i IN 0 TO g_nof_inputs-1 GENERATE
-      -- Concatenate real&imaginary parts
-      common_shiftram_snk_in_arr(i).data(c_shiftram_data_w-1   DOWNTO c_shiftram_data_w/2) <= corr_adder_src_out_arr(i).re(c_shiftram_data_w/2-1 DOWNTO 0);
-      common_shiftram_snk_in_arr(i).data(c_shiftram_data_w/2-1 DOWNTO 0)                   <= corr_adder_src_out_arr(i).im(c_shiftram_data_w/2-1 DOWNTO 0);
-      common_shiftram_snk_in_arr(i).valid                                                  <= corr_adder_src_out_arr(i).valid;
-    END GENERATE;
-  
-    gen_common_shiftram : FOR i IN 0 TO g_nof_inputs-1 GENERATE
-      u_common_shiftram : ENTITY common_lib.common_shiftram
-      GENERIC MAP (
-        g_data_w    => c_shiftram_data_w,
-        g_nof_words => pow2(ceil_log2(g_nof_acc_per_input)),
-        g_output_invalid_during_shift_incr => TRUE
-      )
-      PORT MAP (
-        rst            => rst,
-        clk            => clk,
-        
-        data_in        => common_shiftram_snk_in_arr(i).data(c_shiftram_data_w-1 DOWNTO 0),
-        data_in_val    => common_shiftram_snk_in_arr(i).valid,
-        data_in_shift  => c_common_shiftram_shift_in,
-     
-        data_out       => common_shiftram_src_out_arr(i).data(c_shiftram_data_w-1 DOWNTO 0),
-        data_out_val   => common_shiftram_src_out_arr(i).valid,
-        data_out_shift => common_shiftram_data_out_shift_arr(i)
-      );
-    END GENERATE;
-  
-    -----------------------------------------------------------------------------
-    -- Output 1/g_nof_acc_per_input words per stream
-    -- . Not implemented yet.
-    -----------------------------------------------------------------------------
-    gen_src_out_arr : FOR i IN 0 TO g_nof_inputs-1 GENERATE
-      src_out_arr(i).re(c_shiftram_data_w/2-1 DOWNTO 0) <= common_shiftram_src_out_arr(i).data(c_shiftram_data_w-1   DOWNTO c_shiftram_data_w/2);
-      src_out_arr(i).im(c_shiftram_data_w/2-1 DOWNTO 0) <= common_shiftram_src_out_arr(i).data(c_shiftram_data_w/2-1 DOWNTO 0);
-      src_out_arr(i).valid <= common_shiftram_src_out_arr(i).valid;
-    END GENERATE;
-
---  END GENERATE;
-
---  gen_bypass: IF g_nof_words_to_acc<1 GENERATE
---    src_out_arr <= snk_in_arr;
---  END GENERATE;
+      clk            => clk,
+      
+      data_in        => common_shiftram_snk_in_arr(i).data(2*c_acc_data_w-1 DOWNTO 0),
+      data_in_val    => common_shiftram_snk_in_arr(i).valid,
+      data_in_shift  => c_common_shiftram_shift_in,
+   
+      data_out       => common_shiftram_src_out_arr(i).data(2*c_acc_data_w-1 DOWNTO 0),
+      data_out_val   => common_shiftram_src_out_arr(i).valid,
+      data_out_shift => common_shiftram_data_out_shift_arr(i)
+    );
+  END GENERATE;
+
+  -----------------------------------------------------------------------------
+  -- Output 1/g_nof_acc_per_input words per stream
+  -- . Not implemented yet.
+  -----------------------------------------------------------------------------
+  gen_src_out_arr : FOR i IN 0 TO g_nof_inputs-1 GENERATE
+    src_out_arr(i).re(c_acc_data_w-1 DOWNTO 0) <= common_shiftram_src_out_arr(i).data(2*c_acc_data_w-1 DOWNTO c_acc_data_w);
+    src_out_arr(i).im(c_acc_data_w-1 DOWNTO 0) <= common_shiftram_src_out_arr(i).data(  c_acc_data_w-1 DOWNTO 0);
+    src_out_arr(i).valid                       <= common_shiftram_src_out_arr(i).valid;
+  END GENERATE;
 
 END str;
diff --git a/libraries/dsp/correlator/tb/python/tc_correlator.py b/libraries/dsp/correlator/tb/python/tc_correlator.py
index bfbb81b0dd05a7928f50ad67de2ddde75ab991fc..bcac2360d61eea01858ad23858576ab9a4392d67 100644
--- a/libraries/dsp/correlator/tb/python/tc_correlator.py
+++ b/libraries/dsp/correlator/tb/python/tc_correlator.py
@@ -36,12 +36,12 @@ NOF_INPUTS = 10
 NOF_OUTPUTS = NOF_INPUTS*(NOF_INPUTS+1)/2
 NOF_CHANNELS = 64
 
-BUFFER_DEPTH = 256
-BUFFER_WIDTH = 2*COMPLEX_WIDTH
+#BUFFER_DEPTH = 256
+BUFFER_WIDTH = 64 #2*COMPLEX_WIDTH
 
 tc = test_case.Testcase('TB - ', '')
 io = node_io.NodeIO(tc.nodeImages, tc.base_ip)
-db = pi_diag_data_buffer.PiDiagDataBuffer(tc, io, nofStreams=NOF_OUTPUTS, ramSizePerStream=BUFFER_DEPTH)
+db = pi_diag_data_buffer.PiDiagDataBuffer(tc, io, nofStreams=NOF_OUTPUTS, ramSizePerStream=2*256)
 
 ###############################################################################
 # Read data buffers
@@ -50,9 +50,9 @@ db = pi_diag_data_buffer.PiDiagDataBuffer(tc, io, nofStreams=NOF_OUTPUTS, ramSiz
 ###############################################################################
 data = []
 
-do_until_ge(db.read_nof_words, ms_retry=2000, val=BUFFER_DEPTH, s_timeout=900)
+do_until_ge(db.read_nof_words, ms_retry=2000, val=256, s_timeout=900)
 for output_nr in range(NOF_OUTPUTS):
-    data.append( db.read_data_buffer(streamNr=output_nr, n=256, radix='uns', width=BUFFER_WIDTH, nofColumns=12)[0] )
+    data.append( db.read_data_buffer(streamNr=output_nr, n=2*256, radix='uns', width=BUFFER_WIDTH, nofColumns=12)[0] )
 
 ###############################################################################
 # 'data' is now a 2d array of [NOF_OUTPUTS][NOF_CHANNELS]. We want to group the
@@ -68,8 +68,8 @@ for channel_nr in range(NOF_CHANNELS):
     channel_data = data[channel_nr]
     for index,word in enumerate(channel_data):
         word_bits = CommonBits(word, BUFFER_WIDTH)
-        re = word_bits[BUFFER_WIDTH-1:COMPLEX_WIDTH]
-        im = word_bits[COMPLEX_WIDTH-1:0]
+        re = word_bits[BUFFER_WIDTH-1:BUFFER_WIDTH/2]
+        im = word_bits[BUFFER_WIDTH/2-1:0]
         channel_data[index] = complex(im, re)
     
     ###############################################################################
diff --git a/libraries/dsp/correlator/tb/vhdl/tb_correlator.vhd b/libraries/dsp/correlator/tb/vhdl/tb_correlator.vhd
index b047d61355a03737bcc5886eba1604f2b261ff7a..9fc0be2024768ff1e741f551c700bd6d080a39df 100644
--- a/libraries/dsp/correlator/tb/vhdl/tb_correlator.vhd
+++ b/libraries/dsp/correlator/tb/vhdl/tb_correlator.vhd
@@ -40,7 +40,7 @@ ARCHITECTURE tb OF tb_correlator IS
 
   CONSTANT c_dp_clk_period      : TIME := 10 ns;
   CONSTANT c_mm_clk_period      : TIME := 10 ps;
-  CONSTANT c_data_w             : NATURAL := 32;
+  CONSTANT c_complex_data_w     : NATURAL := 16;
 
   -- Block generator
   CONSTANT c_bg_block_size              : NATURAL := 1024;
@@ -109,9 +109,9 @@ BEGIN
   u_mms_diag_block_gen : ENTITY diag_lib.mms_diag_block_gen
   GENERIC MAP (
     g_nof_output_streams => c_nof_inputs,
-    g_buf_dat_w          => c_data_w,
+    g_buf_dat_w          => 2*c_complex_data_w,
     g_buf_addr_w         => ceil_log2(TO_UINT(c_bg_ctrl.samples_per_packet)),
-    g_file_name_prefix   => "../../../libraries/dsp/correlator/src/hex/complex_subbands_" & NATURAL'IMAGE(c_data_w/2),
+    g_file_name_prefix   => "../../../libraries/dsp/correlator/src/hex/complex_subbands_" & NATURAL'IMAGE(c_complex_data_w),
     g_diag_block_gen_rst => c_bg_ctrl
   )
   PORT MAP (
@@ -146,7 +146,7 @@ BEGIN
   u_diag_data_buffer : ENTITY diag_lib.mms_diag_data_buffer
   GENERIC MAP (    
     g_nof_streams  => c_nof_mults,
-    g_data_w       => c_data_w,
+    g_data_w       => 64,
     g_data_type    => e_complex,
     g_buf_nof_data => 256, --c_bg_block_size,
     g_buf_use_sync => FALSE