diff --git a/libraries/dsp/correlator/src/vhdl/corr_accumulator.vhd b/libraries/dsp/correlator/src/vhdl/corr_accumulator.vhd
index 102434984b1dc609bc3b119c45ce89ac3ba34595..5292d94a20c6e0c09fbe00734e0a44888b848bd2 100644
--- a/libraries/dsp/correlator/src/vhdl/corr_accumulator.vhd
+++ b/libraries/dsp/correlator/src/vhdl/corr_accumulator.vhd
@@ -35,7 +35,7 @@ USE dp_lib.dp_stream_pkg.ALL;
 ENTITY corr_accumulator IS
   GENERIC (
     g_nof_inputs         : NATURAL; -- Number of input streams
-    g_nof_channels       : NATURAL; -- Number of accumulators to keep per input stream
+    g_nof_accumulators   : NATURAL; -- Number of accumulators to keep per input stream
     g_integration_period : NATURAL; -- Number of timesamples to accumulate
     g_data_w             : NATURAL  -- Complex input data width
    ); 
@@ -58,14 +58,14 @@ ARCHITECTURE str OF corr_accumulator IS
   CONSTANT c_shiftram_io_delay        : NATURAL := 4; -- common_shiftram data_in takes 4 cycles to emerge as data_out
 
   -- Note: Increase corr_adder pipelining by n = decrease c_shiftram_io_delay by n and vice versa.
-  CONSTANT c_shiftram_delay           : NATURAL := g_nof_channels-c_shiftram_io_delay-2;
+  CONSTANT c_shiftram_delay           : NATURAL := g_nof_accumulators-c_shiftram_io_delay-2;
 
-  CONSTANT c_shift_w                  : NATURAL := ceil_log2(g_nof_channels);
+  CONSTANT c_shift_w                  : NATURAL := ceil_log2(g_nof_accumulators);
   CONSTANT c_common_shiftram_shift_in : STD_LOGIC_VECTOR(c_shift_w-1 DOWNTO 0) := TO_UVEC(c_shiftram_delay, c_shift_w);
 
   -- Counter to keep track of the total numer of accumulations so we know when to reset accumulator to zero
-  SIGNAL acc_cnt                      : STD_LOGIC_VECTOR(ceil_log2(g_nof_channels*g_integration_period)-1 DOWNTO 0);
-  SIGNAL nxt_acc_cnt                  : STD_LOGIC_VECTOR(ceil_log2(g_nof_channels*g_integration_period)-1 DOWNTO 0);
+  SIGNAL acc_cnt                      : STD_LOGIC_VECTOR(ceil_log2(g_nof_accumulators*g_integration_period)-1 DOWNTO 0);
+  SIGNAL nxt_acc_cnt                  : STD_LOGIC_VECTOR(ceil_log2(g_nof_accumulators*g_integration_period)-1 DOWNTO 0);
 
   SIGNAL corr_adder_snk_in_2arr_2     : t_dp_sosi_2arr_2(g_nof_inputs-1 DOWNTO 0); -- Array of pairs
   SIGNAL nxt_corr_adder_snk_in_2arr_2 : t_dp_sosi_2arr_2(g_nof_inputs-1 DOWNTO 0); -- Array of pairs
@@ -88,15 +88,15 @@ BEGIN
   -- we'll feed zeros into the second adder input to reset the accumulator value
   -- for each channel.
   -----------------------------------------------------------------------------
-  nxt_acc_cnt <= (OTHERS=>'0') WHEN TO_UINT(acc_cnt)=g_nof_channels*g_integration_period-1 ELSE
+  nxt_acc_cnt <= (OTHERS=>'0') WHEN TO_UINT(acc_cnt)=g_nof_accumulators*g_integration_period-1 ELSE
                  INCR_UVEC(acc_cnt, 1) WHEN snk_in_arr(0).valid = '1' ELSE acc_cnt;
 
   gen_adder_inputs : FOR i IN 0 TO g_nof_inputs-1 GENERATE
     nxt_corr_adder_snk_in_2arr_2(i)(0) <= snk_in_arr(i);
     nxt_corr_adder_snk_in_2arr_2(i)(1).valid                       <= common_shiftram_src_out_arr(i).valid; 
-    nxt_corr_adder_snk_in_2arr_2(i)(1).re(c_acc_data_w-1 DOWNTO 0) <= (OTHERS=>'0') WHEN TO_UINT(acc_cnt)<=g_nof_channels-1 ELSE
+    nxt_corr_adder_snk_in_2arr_2(i)(1).re(c_acc_data_w-1 DOWNTO 0) <= (OTHERS=>'0') WHEN TO_UINT(acc_cnt)<=g_nof_accumulators-1 ELSE
                                                                       common_shiftram_src_out_arr(i).data(2*c_acc_data_w-1 DOWNTO c_acc_data_w);
-    nxt_corr_adder_snk_in_2arr_2(i)(1).im(c_acc_data_w-1 DOWNTO 0) <= (OTHERS=>'0') WHEN TO_UINT(acc_cnt)<=g_nof_channels-1 ELSE
+    nxt_corr_adder_snk_in_2arr_2(i)(1).im(c_acc_data_w-1 DOWNTO 0) <= (OTHERS=>'0') WHEN TO_UINT(acc_cnt)<=g_nof_accumulators-1 ELSE
                                                                       common_shiftram_src_out_arr(i).data(  c_acc_data_w-1 DOWNTO 0);  
   END GENERATE;
 
@@ -132,7 +132,7 @@ BEGIN
     u_common_shiftram : ENTITY common_lib.common_shiftram
     GENERIC MAP (
       g_data_w                           => 2*c_acc_data_w,
-      g_nof_words                        => pow2(ceil_log2(g_nof_channels)),
+      g_nof_words                        => pow2(ceil_log2(g_nof_accumulators)),
       g_output_invalid_during_shift_incr => TRUE,
       g_fixed_shift                      => TRUE
     )
@@ -152,7 +152,7 @@ BEGIN
 
   -----------------------------------------------------------------------------
   -- Output g_integration_period per stream once per integration period
-  -- . The first g_nof_channels words are output. At the same time, the
+  -- . The first g_nof_accumulators words are output. At the same time, the
   --   accumulators are reset (zeros) at the adder inputs.
   -- . Make sure the shiftram output is valid too so we don't output a block
   --   of zeros initially.
@@ -161,9 +161,9 @@ BEGIN
     nxt_src_out_arr(i).re(c_acc_data_w-1 DOWNTO 0) <= common_shiftram_src_out_arr(i).data(2*c_acc_data_w-1 DOWNTO c_acc_data_w);
     nxt_src_out_arr(i).im(c_acc_data_w-1 DOWNTO 0) <= common_shiftram_src_out_arr(i).data(  c_acc_data_w-1 DOWNTO 0);
     
-    nxt_src_out_arr(i).valid <= '1' WHEN TO_UINT(acc_cnt)<g_nof_channels   AND common_shiftram_src_out_arr(0).valid='1' ELSE '0';
-    nxt_src_out_arr(i).sop   <= '1' WHEN TO_UINT(acc_cnt)=0                AND common_shiftram_src_out_arr(0).valid='1' ELSE '0';
-    nxt_src_out_arr(i).eop   <= '1' WHEN TO_UINT(acc_cnt)=g_nof_channels-1 AND common_shiftram_src_out_arr(0).valid='1' ELSE '0';
+    nxt_src_out_arr(i).valid <= '1' WHEN TO_UINT(acc_cnt)<g_nof_accumulators   AND common_shiftram_src_out_arr(0).valid='1' ELSE '0';
+    nxt_src_out_arr(i).sop   <= '1' WHEN TO_UINT(acc_cnt)=0                    AND common_shiftram_src_out_arr(0).valid='1' ELSE '0';
+    nxt_src_out_arr(i).eop   <= '1' WHEN TO_UINT(acc_cnt)=g_nof_accumulators-1 AND common_shiftram_src_out_arr(0).valid='1' ELSE '0';
   END GENERATE;
 
   -----------------------------------------------------------------------------
diff --git a/libraries/dsp/correlator/src/vhdl/corr_visibility_buffer.vhd b/libraries/dsp/correlator/src/vhdl/corr_visibility_buffer.vhd
index 4d0cc9ec599de47eb0ab4feaddee14db9ac0fb86..1f83229f9aad1ec8662e90dcacb8702dea562ab4 100644
--- a/libraries/dsp/correlator/src/vhdl/corr_visibility_buffer.vhd
+++ b/libraries/dsp/correlator/src/vhdl/corr_visibility_buffer.vhd
@@ -33,8 +33,9 @@ USE dp_lib.dp_stream_pkg.ALL;
 ENTITY corr_visibility_buffer IS
   GENERIC (
     g_nof_inputs         : NATURAL; -- Number of input streams
-    g_nof_channels       : NATURAL; -- Number of channels per visibility
-    g_data_w             : NATURAL  -- Complex input data width
+    g_buffer_depth       : NATURAL; -- Number of words to buffer
+    g_data_w             : NATURAL; -- Complex input data width
+    g_nof_pre_mult_folds : NATURAL  -- Nof times the data has been folded
    ); 
   PORT (
     rst            : IN  STD_LOGIC;
@@ -64,7 +65,7 @@ BEGIN
       g_data_w         => 2*g_data_w,
       g_use_ctrl       => FALSE,
       g_use_complex    => TRUE,
-      g_fifo_size      => g_nof_channels,
+      g_fifo_size      => g_buffer_depth,
       g_fifo_af_margin => 0
     )
     PORT MAP (
@@ -108,8 +109,9 @@ BEGIN
   gen_dp_src_out_timer : FOR i IN 0 TO g_nof_inputs-1 GENERATE
     u_dp_src_out_timer : ENTITY dp_lib.dp_src_out_timer
     GENERIC MAP (
-      g_init_valid_delay      => i, --relative to dp_fifo_sc_src_out_arr(i).valid
-      g_nof_invalid_per_valid => g_nof_inputs 
+      g_init_valid_delay      => i*pow2(g_nof_pre_mult_folds), --relative to dp_fifo_sc_src_out_arr(i).valid
+      g_nof_invalid_per_valid => g_nof_inputs*pow2(g_nof_pre_mult_folds),
+      g_block_len             => pow2(g_nof_pre_mult_folds)
     )
     PORT MAP (
       rst                  => rst,
diff --git a/libraries/dsp/correlator/src/vhdl/correlator.vhd b/libraries/dsp/correlator/src/vhdl/correlator.vhd
index 96ee2dae71d76a59e19c7bf4818e459e1628fe22..58b991f1c0115f9a1b80556781ddf01fbc0f2fe5 100644
--- a/libraries/dsp/correlator/src/vhdl/correlator.vhd
+++ b/libraries/dsp/correlator/src/vhdl/correlator.vhd
@@ -68,11 +68,15 @@ ARCHITECTURE str OF correlator IS
   -- equal to or larger than the number of accumulator outputs.
   CONSTANT c_integration_period              : NATURAL := largest(g_integration_period, c_nof_mults);
 
+  -- Nof accumulator sums to keep per stream depends on folding
+  CONSTANT c_nof_accumulators                : NATURAL := g_nof_channels*pow2(g_nof_pre_mult_folds);
+
   CONSTANT c_acc_data_w                      : NATURAL := ceil_log2(c_integration_period*(pow2(g_data_w)-1));
 
   SIGNAL corr_permutator_src_out_2arr_2      : t_dp_sosi_2arr_2(c_nof_visibilities-1 DOWNTO 0); -- Array of pairs
   SIGNAL corr_folder_snk_in_2arr_2           : t_dp_sosi_2arr_2(c_nof_visibilities-1 DOWNTO 0); -- Array of pairs, not folded yet
   SIGNAL corr_folder_src_out_2arr_2          : t_dp_sosi_2arr_2(c_nof_mults-1 DOWNTO 0);        -- Array of pairs, folded 
+  SIGNAL corr_multiplier_snk_in_2arr_2       : t_dp_sosi_2arr_2(c_nof_mults-1 DOWNTO 0);
   SIGNAL corr_multiplier_src_out_arr         : t_dp_sosi_arr(c_nof_mults-1 DOWNTO 0);
   SIGNAL corr_accumulator_snk_in_arr         : t_dp_sosi_arr(c_nof_mults-1 DOWNTO 0);
   SIGNAL corr_accumulator_src_out_arr        : t_dp_sosi_arr(c_nof_mults-1 DOWNTO 0);
@@ -142,6 +146,19 @@ BEGIN
     src_out_2arr_2 => corr_folder_src_out_2arr_2
   );
 
+  -- If we're folding an uneven number of input pairs, we'll get a 50% valid
+  -- rate on the highest stream index. We don't want that (because 
+  -- common_shiftram in corr_accumulator does not support holes in the data),
+  -- so override the valid with corr_folder_src_out_2arr_2(0).valid in that case.
+  p_override_valid: PROCESS(corr_folder_src_out_2arr_2)
+  BEGIN
+    corr_multiplier_snk_in_2arr_2 <= corr_folder_src_out_2arr_2;
+    IF g_nof_pre_mult_folds/=0 AND (c_nof_visibilities REM 2 = 1) THEN
+      corr_multiplier_snk_in_2arr_2(c_nof_mults-1)(0).valid <= corr_folder_src_out_2arr_2(0)(0).valid;
+      corr_multiplier_snk_in_2arr_2(c_nof_mults-1)(1).valid <= corr_folder_src_out_2arr_2(0)(0).valid;
+    END IF;
+  END PROCESS;
+
   -----------------------------------------------------------------------------
   -- Complex multiplier stage
   -----------------------------------------------------------------------------
@@ -155,7 +172,7 @@ BEGIN
     clk            => clk,
     rst            => rst,
 
-    snk_in_2arr_2  => corr_folder_src_out_2arr_2,
+    snk_in_2arr_2  => corr_multiplier_snk_in_2arr_2,
     src_out_arr    => corr_multiplier_src_out_arr
   );
 
@@ -182,7 +199,7 @@ BEGIN
   u_corr_accumulator : ENTITY work.corr_accumulator
   GENERIC MAP (
     g_nof_inputs         => c_nof_mults,
-    g_nof_channels       => g_nof_channels,
+    g_nof_accumulators   => c_nof_accumulators,
     g_integration_period => c_integration_period,
     g_data_w             => g_data_w 
   )
@@ -219,9 +236,10 @@ BEGIN
   ------------------------------------------------------------------------------
   u_corr_visibility_buffer : ENTITY work.corr_visibility_buffer
   GENERIC MAP (
-    g_nof_inputs   => c_nof_mults,
-    g_data_w       => c_acc_data_w,
-    g_nof_channels => g_nof_channels 
+    g_nof_inputs         => c_nof_mults,
+    g_data_w             => c_acc_data_w,
+    g_buffer_depth       => c_nof_accumulators,
+    g_nof_pre_mult_folds => g_nof_pre_mult_folds
   )
   PORT MAP (
     clk            => clk,
@@ -250,12 +268,16 @@ BEGIN
 
   -----------------------------------------------------------------------------
   -- Add proper SOP and EOP to mux output
-  -- . Output one block of c_nof_mults for each channel
+  -- . if no pre multiplier folding is present, blocks of c_nof_visibilities
+  --   words are output. If folding is applied then also c_nof_visibilities
+  --   words are output, but rounded (up) to an even number. This is to
+  --   compensate for one dead multiplier cycle when folding an uneven number
+  --   of visibilities.
   -----------------------------------------------------------------------------
   dp_block_gen: ENTITY dp_lib.dp_block_gen
   GENERIC MAP (
     g_use_src_in         => FALSE,
-    g_nof_data           => c_nof_mults,
+    g_nof_data           => sel_a_b(g_nof_pre_mult_folds=0, c_nof_visibilities, 2*ceil_div(c_nof_visibilities, 2)),
     g_nof_blk_per_sync   => 10 -- Randomly chosen sync interval 
   )
   PORT MAP (
diff --git a/libraries/dsp/correlator/tb/vhdl/tb_correlator.vhd b/libraries/dsp/correlator/tb/vhdl/tb_correlator.vhd
index 5073a0333d801ea0d5c585f3609387ec903cca1e..49b130822ed6d6e2aca0d481585bcb21db7223c0 100644
--- a/libraries/dsp/correlator/tb/vhdl/tb_correlator.vhd
+++ b/libraries/dsp/correlator/tb/vhdl/tb_correlator.vhd
@@ -35,21 +35,19 @@ END tb_correlator;
 
 ARCHITECTURE tb OF tb_correlator IS
 
-  CONSTANT c_nof_inputs         : NATURAL := 10;
-  CONSTANT c_nof_pre_mult_folds : NATURAL := 0; 
-  CONSTANT c_complex_data_w     : NATURAL := 16;
-  CONSTANT c_conjugate          : BOOLEAN := TRUE;
-  CONSTANT c_nof_channels       : NATURAL := 64;
-  CONSTANT c_integration_period : NATURAL := 0;
-
-  CONSTANT c_dp_clk_period      : TIME := 10 ns;
-  CONSTANT c_mm_clk_period      : TIME := 10 ps;
-
+  CONSTANT c_nof_inputs                 : NATURAL := 10;
+  CONSTANT c_nof_pre_mult_folds         : NATURAL := 1; 
+  CONSTANT c_complex_data_w             : NATURAL := 16;
+  CONSTANT c_conjugate                  : BOOLEAN := TRUE;
+  CONSTANT c_nof_channels               : NATURAL := 64;
+  CONSTANT c_integration_period         : NATURAL := 0;
 
+  -- Gap size on the correlator input depends on the number of folds
+  CONSTANT c_nof_invalid_per_valid      : NATURAL := pow2(c_nof_pre_mult_folds)-1;
 
   -- Block generator
   CONSTANT c_bg_block_size              : NATURAL := c_nof_channels;
-  CONSTANT c_bg_gapsize                 : NATURAL := c_nof_pre_mult_folds; -- No pre-mult folding = 100% valid
+  CONSTANT c_bg_gapsize                 : NATURAL := c_bg_block_size*c_nof_invalid_per_valid;
   CONSTANT c_bg_blocks_per_sync         : NATURAL := 10;
   CONSTANT c_bg_ctrl                    : t_diag_block_gen := ('1',                      -- enable             
                                                                '0',                      -- enable_sync        
@@ -60,6 +58,8 @@ ARCHITECTURE tb OF tb_correlator IS
                                                               TO_UVEC(   c_bg_block_size-1, c_diag_bg_mem_high_adrs_w),
                                                               TO_UVEC(                   0, c_diag_bg_bsn_init_w));
 
+  CONSTANT c_dp_clk_period      : TIME := 10 ns;
+  CONSTANT c_mm_clk_period      : TIME := 10 ps;
 
   SIGNAL tb_end                 : STD_LOGIC := '0';
   SIGNAL dp_clk                 : STD_LOGIC := '1';
@@ -67,6 +67,11 @@ ARCHITECTURE tb OF tb_correlator IS
   SIGNAL mm_clk                 : STD_LOGIC := '1';
   SIGNAL mm_rst                 : STD_LOGIC;
 
+  SIGNAL block_gen_src_out_arr  : t_dp_sosi_arr(c_nof_inputs-1 DOWNTO 0);
+
+  SIGNAL dp_fifo_sc_src_out_arr : t_dp_sosi_arr(c_nof_inputs-1 DOWNTO 0);
+  SIGNAL dp_fifo_sc_src_in_arr  : t_dp_siso_arr(c_nof_inputs-1 DOWNTO 0);
+
   SIGNAL correlator_snk_in_arr  : t_dp_sosi_arr(c_nof_inputs-1 DOWNTO 0);
   SIGNAL correlator_src_out_arr : t_dp_sosi_arr(1-1 DOWNTO 0);
 
@@ -126,9 +131,55 @@ BEGIN
     dp_rst           => dp_rst,
     dp_clk           => dp_clk,
 
-    out_sosi_arr     => correlator_snk_in_arr
+    out_sosi_arr     => block_gen_src_out_arr
   );
 
+  -----------------------------------------------------------------------------
+  -- Introduce gaps in the input stream
+  -- . mms_diag_block_gen does not support gaps within blocks.
+  -- . We'll use FIFO buffers and dp_gap to read out the FIFOs to introduce
+  --   gaps.
+  -----------------------------------------------------------------------------
+  gen_dp_fifo_sc : FOR i IN 0 TO c_nof_inputs-1 GENERATE
+    u_dp_fifo_sc : ENTITY dp_lib.dp_fifo_sc
+    GENERIC MAP (
+      g_data_w         => 2*c_complex_data_w,
+      g_use_ctrl       => FALSE,
+      g_use_complex    => TRUE,
+      g_fifo_size      => c_nof_channels,
+      g_fifo_af_margin => 0
+    )
+    PORT MAP (
+      rst         => dp_rst,
+      clk         => dp_clk,
+
+      wr_ful      => OPEN,
+      usedw       => OPEN,
+      rd_emp      => OPEN,
+
+      snk_out     => OPEN,
+      snk_in      => block_gen_src_out_arr(i),
+      src_in      => dp_fifo_sc_src_in_arr(i),
+      src_out     => dp_fifo_sc_src_out_arr(i)
+    );
+  END GENERATE;
+
+  gen_dp_src_out_timer : FOR i IN 0 TO c_nof_inputs-1 GENERATE
+    u_dp_src_out_timer : ENTITY dp_lib.dp_src_out_timer
+    GENERIC MAP (
+      g_nof_invalid_per_valid => c_nof_invalid_per_valid
+    )
+    PORT MAP (
+      rst                  => dp_rst,
+      clk                  => dp_clk,
+
+      snk_in               => dp_fifo_sc_src_out_arr(i),
+      snk_out              => dp_fifo_sc_src_in_arr(i)
+    );
+  END GENERATE;
+
+  correlator_snk_in_arr <= dp_fifo_sc_src_out_arr;
+
   -----------------------------------------------------------------------------
   -- Device under test: correlator
   -----------------------------------------------------------------------------