diff --git a/applications/unb1_correlator/src/vhdl/mmm_unb1_correlator.vhd b/applications/unb1_correlator/src/vhdl/mmm_unb1_correlator.vhd
index e511bc0358cdf207fba467cbae4f68005084044c..c2187bc31b7bd923751239e1a94a6af16cb4df39 100644
--- a/applications/unb1_correlator/src/vhdl/mmm_unb1_correlator.vhd
+++ b/applications/unb1_correlator/src/vhdl/mmm_unb1_correlator.vhd
@@ -185,7 +185,7 @@ ARCHITECTURE str OF mmm_unb1_correlator IS
           ram_diag_data_buf_read_export                 : out std_logic;                                        -- export
           ram_diag_data_buf_writedata_export            : out std_logic_vector(31 downto 0);                    -- export
           ram_diag_data_buf_write_export                : out std_logic;                                        -- export
-          ram_diag_data_buf_address_export              : out std_logic_vector(16 downto 0);                    -- export
+          ram_diag_data_buf_address_export              : out std_logic_vector(14 downto 0);                    -- export
           ram_diag_data_buf_clk_export                  : out std_logic;                                        -- export
           ram_diag_data_buf_reset_export                : out std_logic;                                        -- export
           reg_diag_data_buf_readdata_export             : in  std_logic_vector(31 downto 0) := (others => 'X'); -- export
@@ -329,7 +329,7 @@ BEGIN
       ram_diag_data_buf_read_export                 => ram_diag_data_buf_mosi.rd,      
       ram_diag_data_buf_writedata_export            => ram_diag_data_buf_mosi.wrdata(c_word_w-1 DOWNTO 0),       
       ram_diag_data_buf_write_export                => ram_diag_data_buf_mosi.wr,       
-      ram_diag_data_buf_address_export              => ram_diag_data_buf_mosi.address(16 DOWNTO 0),       
+      ram_diag_data_buf_address_export              => ram_diag_data_buf_mosi.address(14 DOWNTO 0),       
       ram_diag_data_buf_clk_export                  => OPEN,     
       ram_diag_data_buf_reset_export                => OPEN,   
     
diff --git a/applications/unb1_correlator/src/vhdl/unb1_correlator.vhd b/applications/unb1_correlator/src/vhdl/unb1_correlator.vhd
index 5a3513c2724a99a453acbecda55ff99cc7ebf2a7..30bdac820a33d74e09a5a2ecbd7c4b19a2f00b0e 100644
--- a/applications/unb1_correlator/src/vhdl/unb1_correlator.vhd
+++ b/applications/unb1_correlator/src/vhdl/unb1_correlator.vhd
@@ -116,9 +116,10 @@ ARCHITECTURE str OF unb1_correlator IS
   SIGNAL eth1g_ram_miso             : t_mem_miso;
 
   -- Correlator
-  CONSTANT c_nof_inputs             : NATURAL := 24;
+  CONSTANT c_nof_inputs             : NATURAL := 10; --24;
   CONSTANT c_nof_mults              : NATURAL := (c_nof_inputs*(c_nof_inputs+1))/2;
-  CONSTANT c_data_w                 : NATURAL := 32;
+  CONSTANT c_complex_data_w         : NATURAL := 16;
+  CONSTANT c_nof_channels           : NATURAL := 64;
 
   -- Block generator
   CONSTANT c_bg_block_size          : NATURAL := 1024;
@@ -134,7 +135,7 @@ ARCHITECTURE str OF unb1_correlator IS
                                                           TO_UVEC(                   0, c_diag_bg_bsn_init_w));
 
   SIGNAL correlator_snk_in_arr      : t_dp_sosi_arr(c_nof_inputs-1 DOWNTO 0);
-  SIGNAL correlator_src_out_arr     : t_dp_sosi_arr(c_nof_inputs*(c_nof_inputs+1)/2-1 DOWNTO 0);
+  SIGNAL correlator_src_out_arr     : t_dp_sosi_arr(1-1 DOWNTO 0);
 
   SIGNAL ram_diag_data_buf_mosi     : t_mem_mosi;
   SIGNAL ram_diag_data_buf_miso     : t_mem_miso;
@@ -153,9 +154,9 @@ BEGIN
   u_mms_diag_block_gen : ENTITY diag_lib.mms_diag_block_gen
   GENERIC MAP (
     g_nof_output_streams => c_nof_inputs,
-    g_buf_dat_w          => c_data_w,
+    g_buf_dat_w          => 2*c_complex_data_w,
     g_buf_addr_w         => ceil_log2(TO_UINT(c_bg_ctrl.samples_per_packet)),
-    g_file_name_prefix   => "../../../libraries/dsp/correlator/src/hex/complex_subbands_" & NATURAL'IMAGE(c_data_w/2),
+    g_file_name_prefix   => "../../../libraries/dsp/correlator/src/hex/complex_subbands_" & NATURAL'IMAGE(c_complex_data_w),
     g_diag_block_gen_rst => c_bg_ctrl
   )
   PORT MAP (
@@ -189,10 +190,10 @@ BEGIN
   -----------------------------------------------------------------------------
   u_diag_data_buffer : ENTITY diag_lib.mms_diag_data_buffer
   GENERIC MAP (    
-    g_nof_streams  => c_nof_mults,
-    g_data_w       => c_data_w,
+    g_nof_streams  => 1,
+    g_data_w       => 64,
     g_data_type    => e_complex,
-    g_buf_nof_data => 256, --c_bg_block_size,
+    g_buf_nof_data => c_nof_channels,
     g_buf_use_sync => FALSE 
   )
   PORT MAP (
diff --git a/libraries/dsp/correlator/src/vhdl/corr_accumulator.vhd b/libraries/dsp/correlator/src/vhdl/corr_accumulator.vhd
index 4af9c16f9f721b694bd7dba706159b08a2990ffd..5a30d2cb494e7834facf5fc2de3a9985861827a7 100644
--- a/libraries/dsp/correlator/src/vhdl/corr_accumulator.vhd
+++ b/libraries/dsp/correlator/src/vhdl/corr_accumulator.vhd
@@ -157,7 +157,9 @@ BEGIN
     nxt_src_out_arr(i).re(c_acc_data_w-1 DOWNTO 0) <= common_shiftram_src_out_arr(i).data(2*c_acc_data_w-1 DOWNTO c_acc_data_w);
     nxt_src_out_arr(i).im(c_acc_data_w-1 DOWNTO 0) <= common_shiftram_src_out_arr(i).data(  c_acc_data_w-1 DOWNTO 0);
     
-    nxt_src_out_arr(i).valid <= '1' WHEN TO_UINT(acc_cnt)<g_nof_acc_per_input AND common_shiftram_src_out_arr(0).valid='1' ELSE '0';
+    nxt_src_out_arr(i).valid <= '1' WHEN TO_UINT(acc_cnt)<g_nof_acc_per_input   AND common_shiftram_src_out_arr(0).valid='1' ELSE '0';
+    nxt_src_out_arr(i).sop   <= '1' WHEN TO_UINT(acc_cnt)=0                     AND common_shiftram_src_out_arr(0).valid='1' ELSE '0';
+    nxt_src_out_arr(i).eop   <= '1' WHEN TO_UINT(acc_cnt)=g_nof_acc_per_input-1 AND common_shiftram_src_out_arr(0).valid='1' ELSE '0';
   END GENERATE;
 
   -----------------------------------------------------------------------------
diff --git a/libraries/dsp/correlator/src/vhdl/correlator.vhd b/libraries/dsp/correlator/src/vhdl/correlator.vhd
index 5358abb66028cddac87a3ad78888c8de2a1251db..74a6273eb1f391c02cbd4d15de37bb16ba94bfe2 100644
--- a/libraries/dsp/correlator/src/vhdl/correlator.vhd
+++ b/libraries/dsp/correlator/src/vhdl/correlator.vhd
@@ -32,10 +32,12 @@ USE dp_lib.dp_stream_pkg.ALL;
 
 ENTITY correlator IS
   GENERIC (
-    g_nof_inputs  : NATURAL;
-    g_nof_mults   : NATURAL;
-    g_data_w      : NATURAL := 16;
-    g_conjugate   : BOOLEAN := TRUE
+    g_nof_inputs        : NATURAL;
+    g_nof_mults         : NATURAL;
+    g_data_w            : NATURAL := 16;
+    g_conjugate         : BOOLEAN := TRUE;
+    g_nof_acc_per_input : NATURAL := 64; -- aka channels
+    g_nof_words_to_acc  : NATURAL := 55  -- Internally forced to g_nof_mults as a minimum unless zero
    ); 
   PORT (
     rst            : IN  STD_LOGIC;
@@ -43,16 +45,28 @@ ENTITY correlator IS
 
     snk_in_arr     : IN  t_dp_sosi_arr(g_nof_inputs-1 DOWNTO 0);
 
-    src_out_arr    : OUT t_dp_sosi_arr(g_nof_inputs*(g_nof_inputs+1)/2-1 DOWNTO 0)
+    src_out_arr    : OUT t_dp_sosi_arr(1-1 DOWNTO 0) -- Single output for now
   );
 END correlator;
 
 ARCHITECTURE str OF correlator IS
 
+  -- We can multiplex the accumulator outputs onto one stream as long as the integration period is
+  -- equal to or larger than the number of accumulator outputs.
+  CONSTANT c_nof_words_to_acc : NATURAL := largest(g_nof_words_to_acc, g_nof_mults);
+
+  CONSTANT c_acc_data_w                 : NATURAL := ceil_log2(c_nof_words_to_acc*(pow2(g_data_w)-1));
+
+  CONSTANT c_dp_mux_fifo_size : t_natural_arr := array_init(g_nof_acc_per_input, g_nof_mults);
+  CONSTANT c_dp_mux_fifo_fill : t_natural_arr := array_init(0, g_nof_mults); -- Start outputting right away
+
   SIGNAL corr_permutator_src_out_2arr_2 : t_dp_sosi_2arr_2(g_nof_inputs*(g_nof_inputs+1)/2-1 DOWNTO 0); -- Array of pairs
   SIGNAL corr_folder_src_out_2arr_2     : t_dp_sosi_2arr_2(g_nof_inputs*(g_nof_inputs+1)/2-1 DOWNTO 0); -- Array of pairs, not folded yet
   SIGNAL corr_multiplier_src_out_arr    : t_dp_sosi_arr(g_nof_mults-1 DOWNTO 0);
   SIGNAL corr_accumulator_src_out_arr   : t_dp_sosi_arr(g_nof_mults-1 DOWNTO 0);
+  SIGNAL dp_mux_snk_in_arr              : t_dp_sosi_arr(g_nof_mults-1 DOWNTO 0);
+  SIGNAL dp_mux_src_out                 : t_dp_sosi;
+  SIGNAL dp_block_gen_snk_in            : t_dp_sosi;
 
 BEGIN
 
@@ -100,8 +114,8 @@ BEGIN
   u_corr_accumulator : ENTITY work.corr_accumulator
   GENERIC MAP (
     g_nof_inputs        => g_nof_mults,
-    g_nof_acc_per_input => 64,
-    g_nof_words_to_acc  => 10,
+    g_nof_acc_per_input => g_nof_acc_per_input,
+    g_nof_words_to_acc  => c_nof_words_to_acc,
     g_data_w            => g_data_w 
   )
   PORT MAP (
@@ -112,6 +126,76 @@ BEGIN
     src_out_arr    => corr_accumulator_src_out_arr
   );
 
-  src_out_arr <= corr_accumulator_src_out_arr;
+  -----------------------------------------------------------------------------
+  -- Multiplex the parallel visibility blocks onto one output stream.
+  -- . In : All channels per visibility
+  -- . Out: All visibilities per channel
+  -- . Example for 64 channels:
+  --             ______
+  -- [0..63] -> |      |
+  --   ..       |dp_mux| -> [0..0]..[63..63]
+  -- [0..63] -> |______|
+  -----------------------------------------------------------------------------
+  gen_concat_complex : FOR i IN 0 TO g_nof_mults-1 GENERATE
+    -- Concatenate real&imaginary parts
+    dp_mux_snk_in_arr(i).data(2*c_acc_data_w-1 DOWNTO c_acc_data_w) <= corr_accumulator_src_out_arr(i).re(c_acc_data_w-1 DOWNTO 0);
+    dp_mux_snk_in_arr(i).data(  c_acc_data_w-1 DOWNTO 0)            <= corr_accumulator_src_out_arr(i).im(c_acc_data_w-1 DOWNTO 0);
+    dp_mux_snk_in_arr(i).valid                                      <= corr_accumulator_src_out_arr(i).valid;
+
+    -- SOP, EOP = valid. This creates blocks of one cycle. This makes dp_mux multiplex per single word instead of
+    -- per block of x different channels, effectively performing a transpose so dp_mux outputs blocks of the same channel.
+    dp_mux_snk_in_arr(i).sop                                        <= corr_accumulator_src_out_arr(i).valid;
+    dp_mux_snk_in_arr(i).eop                                        <= corr_accumulator_src_out_arr(i).valid;
+  END GENERATE;
+
+  u_dp_mux : ENTITY dp_lib.dp_mux
+  GENERIC MAP (
+    g_data_w          => 2*c_acc_data_w,
+    g_mode            => 1,
+    g_nof_input       => g_nof_mults,
+    g_use_fifo        => TRUE,
+    g_fifo_size       => c_dp_mux_fifo_size,
+    g_fifo_fill       => c_dp_mux_fifo_fill,
+    g_fifo_af_margin  => 0
+  )
+  PORT MAP (
+    rst         => rst,
+    clk         => clk,
+    -- ST sinks
+    snk_out_arr => OPEN,
+    snk_in_arr  => func_dp_stream_arr_reverse_range(dp_mux_snk_in_arr), -- dp_mux uses TO range!
+    -- ST source
+    src_in      => c_dp_siso_rdy,
+    src_out     => dp_mux_src_out -- Note: dp_mux_src_out.channel outputs mux input index
+  );
+
+  -- Extract real&imaginary parts
+  dp_block_gen_snk_in.re(c_acc_data_w-1 DOWNTO 0) <= dp_mux_src_out.data(2*c_acc_data_w-1 DOWNTO c_acc_data_w);
+  dp_block_gen_snk_in.im(c_acc_data_w-1 DOWNTO 0) <= dp_mux_src_out.data(  c_acc_data_w-1 DOWNTO 0);
+
+  -- Pad the rest with zeroes
+  dp_block_gen_snk_in.re(64-1 DOWNTO c_acc_data_w) <= (OTHERS=>'0');
+  dp_block_gen_snk_in.im(64-1 DOWNTO c_acc_data_w) <= (OTHERS=>'0');
+
+  dp_block_gen_snk_in.valid                       <= dp_mux_src_out.valid;
+
+  -----------------------------------------------------------------------------
+  -- Add proper SOP and EOP to mux output
+  -- . Output one block of g_nof_mults for each channel
+  -----------------------------------------------------------------------------
+  dp_block_gen: ENTITY dp_lib.dp_block_gen
+  GENERIC MAP (
+    g_use_src_in         => FALSE,
+    g_nof_data           => g_nof_mults,
+    g_nof_blk_per_sync   => 10 -- Randomly chosen sync interval 
+  )
+  PORT MAP (
+    rst        => rst,
+    clk        => clk,
+
+    snk_in     => dp_block_gen_snk_in,
+
+    src_out    => src_out_arr(0)
+  );
 
 END str;
diff --git a/libraries/dsp/correlator/tb/python/tc_correlator.py b/libraries/dsp/correlator/tb/python/tc_correlator.py
index 9e5bb24d88754bd30c4c48ae45bec64e6dcebeea..bc6dc5beda8fe2da8a850439f6631bacde8bccd4 100644
--- a/libraries/dsp/correlator/tb/python/tc_correlator.py
+++ b/libraries/dsp/correlator/tb/python/tc_correlator.py
@@ -47,59 +47,60 @@ db = pi_diag_data_buffer.PiDiagDataBuffer(tc, io, nofStreams=NOF_OUTPUTS, ramSiz
 ###############################################################################
 data = []
 
-do_until_ge(db.read_nof_words, ms_retry=2000, val=NOF_CHANNELS, s_timeout=900)
-for output_nr in range(NOF_OUTPUTS):
-    data.append( db.read_data_buffer(streamNr=output_nr, n=2*NOF_CHANNELS, radix='uns', width=BUFFER_WIDTH, nofColumns=12)[0] )
+do_until_ge(db.read_nof_words, ms_retry=2000, val=55, s_timeout=900)
+#for output_nr in range(NOF_OUTPUTS):
+data = db.read_data_buffer(streamNr=0, n=2*55, radix='uns', width=BUFFER_WIDTH, nofColumns=12)[0]
 
 ###############################################################################
 # 'data' is now a 2d array of [NOF_OUTPUTS][NOF_CHANNELS]. We
 # want to group the outputs by channel, so transpose this 2d array into 
 # [NOF_CHANNELS][NOF_OUTPUTS]
 ###############################################################################
-data = transpose(data)
-
+#data = transpose(data)
+print data
 mat_list = []
 amplitudes = []
-for channel_nr in range(NOF_CHANNELS):
-    ###############################################################################
-    # Convert the unsigned words to complex
-    ###############################################################################
-    channel_data = data[channel_nr]
-    for index,word in enumerate(channel_data):
-        word_bits = CommonBits(word, BUFFER_WIDTH)
-        re = word_bits[BUFFER_WIDTH-1:BUFFER_WIDTH/2]
-        im = word_bits[BUFFER_WIDTH/2-1:0]
-        channel_data[index] = complex(im, re)
-    
-    ###############################################################################
-    # Convert binomials to complex phasor notation
-    ###############################################################################
-    for index,word in enumerate(channel_data):
-        channel_data[index] = complex_binomial_to_phasor(word)
-   
-    ###############################################################################
-    # Extract the phases and amplitudes from the complex data
-    ###############################################################################
-    phases = []
-
-    for word in channel_data:
-        amplitudes.append(word[0])
-        phases.append(word[1])
-   
-    ################################################################################
-    # Re-shape the flat list into a matrix
-    ################################################################################
-    mat = unique_vis_to_full_matrix(phases)    
-    mat_list.append(mat)
+
+###############################################################################
+# Convert the unsigned words to complex
+###############################################################################
+channel_data = data
+for index,word in enumerate(channel_data):
+    word_bits = CommonBits(word, BUFFER_WIDTH)
+    re = word_bits[BUFFER_WIDTH-1:BUFFER_WIDTH/2]
+    im = word_bits[BUFFER_WIDTH/2-1:0]
+    channel_data[index] = complex(im, re)
+
+###############################################################################
+# Convert binomials to complex phasor notation
+###############################################################################
+for index,word in enumerate(channel_data):
+    channel_data[index] = complex_binomial_to_phasor(word)
+
+###############################################################################
+# Extract the phases and amplitudes from the complex data
+###############################################################################
+phases = []
+
+for word in channel_data:
+    amplitudes.append(word[0])
+    phases.append(word[1])
+
+################################################################################
+# Re-shape the flat list into a matrix
+################################################################################
+mat = unique_vis_to_full_matrix(phases)    
+#mat_list.append(mat)
 
 ################################################################################
 # re-shape the flat list of 64 matrices into a an 8*8 matrix of matrices, plot
 ################################################################################
-print 'Plotting phases of %d channels.' %NOF_CHANNELS
-print '. Channel amplitudes:'
-for channel_nr in range(NOF_CHANNELS):
-    print '   .', channel_nr, ' - ', amplitudes[channel_nr]
 
-mat_mat = split_list(mat_list, 8)
-plot_matrix_color(mat_mat)
+#print 'Plotting phases of %d channels.' %NOF_CHANNELS
+#print '. Channel amplitudes:'
+#for channel_nr in range(NOF_CHANNELS):
+#    print '   .', channel_nr, ' - ', amplitudes[channel_nr]
+
+#mat_mat = split_list(mat_list, 8)
+plot_matrix_color([[mat]])
 
diff --git a/libraries/dsp/correlator/tb/vhdl/tb_correlator.vhd b/libraries/dsp/correlator/tb/vhdl/tb_correlator.vhd
index f9005796bec977d7910fa2405fdedbcdb066893b..5009723a441dff7335d52ee30c479f6dc71896b8 100644
--- a/libraries/dsp/correlator/tb/vhdl/tb_correlator.vhd
+++ b/libraries/dsp/correlator/tb/vhdl/tb_correlator.vhd
@@ -64,7 +64,7 @@ ARCHITECTURE tb OF tb_correlator IS
   SIGNAL mm_rst                 : STD_LOGIC;
 
   SIGNAL correlator_snk_in_arr  : t_dp_sosi_arr(c_nof_inputs-1 DOWNTO 0);
-  SIGNAL correlator_src_out_arr : t_dp_sosi_arr(c_nof_inputs*(c_nof_inputs+1)/2-1 DOWNTO 0);
+  SIGNAL correlator_src_out_arr : t_dp_sosi_arr(1-1 DOWNTO 0);
 
   SIGNAL ram_diag_data_buf_mosi : t_mem_mosi;
   SIGNAL ram_diag_data_buf_miso : t_mem_miso;
@@ -146,7 +146,7 @@ BEGIN
   -----------------------------------------------------------------------------
   u_diag_data_buffer : ENTITY diag_lib.mms_diag_data_buffer
   GENERIC MAP (    
-    g_nof_streams  => c_nof_mults,
+    g_nof_streams  => 1,
     g_data_w       => 64,
     g_data_type    => e_complex,
     g_buf_nof_data => c_nof_channels,