diff --git a/libraries/base/common/src/vhdl/common_mem_pkg.vhd b/libraries/base/common/src/vhdl/common_mem_pkg.vhd
index 70490338cbf26aed3fa288c719bd18b82d671af5..444e5c915cda27579b56c56520f7378cec7c3069 100644
--- a/libraries/base/common/src/vhdl/common_mem_pkg.vhd
+++ b/libraries/base/common/src/vhdl/common_mem_pkg.vhd
@@ -60,19 +60,19 @@ PACKAGE common_mem_pkg IS
   CONSTANT c_mem_address_sz : NATURAL := c_mem_address_w/c_byte_w;
   CONSTANT c_mem_data_sz    : NATURAL := c_mem_data_w/c_byte_w;
   
-  TYPE t_mem_miso IS RECORD  -- Master In Slave Out
+  TYPE t_mem_miso IS RECORD  -- Master In Slave Out. For backward compatibility only. Use t_mem_copi for new designs.
     rddata      : STD_LOGIC_VECTOR(c_mem_data_w-1 DOWNTO 0);  -- data width (suits 1, 2 or 4 bytes)
     rdval       : STD_LOGIC;
     waitrequest : STD_LOGIC;
   END RECORD;
   
-  TYPE t_mem_mosi IS RECORD  -- Master Out Slave In
+  TYPE t_mem_mosi IS RECORD  -- Master Out Slave In.  For backward compatibility only. Use t_mem_cipo for new designs.
     address     : STD_LOGIC_VECTOR(c_mem_address_w-1 DOWNTO 0);  -- address range (suits 32-bit processor)
     wrdata      : STD_LOGIC_VECTOR(c_mem_data_w-1 DOWNTO 0);     -- data width (suits 1, 2 or 4 bytes)
     wr          : STD_LOGIC;
     rd          : STD_LOGIC;
   END RECORD;
-  
+ 
   CONSTANT c_mem_miso_rst : t_mem_miso := ((OTHERS=>'0'), '0', '0');
   CONSTANT c_mem_mosi_rst : t_mem_mosi := ((OTHERS=>'0'), (OTHERS=>'0'), '0', '0');
   
@@ -94,7 +94,17 @@ PACKAGE common_mem_pkg IS
   FUNCTION RESIZE_MEM_UDATA(  vec : STD_LOGIC_VECTOR) RETURN STD_LOGIC_VECTOR;  -- unsigned
   FUNCTION RESIZE_MEM_SDATA(  vec : STD_LOGIC_VECTOR) RETURN STD_LOGIC_VECTOR;  -- sign extended
   FUNCTION RESIZE_MEM_XDATA(  vec : STD_LOGIC_VECTOR) RETURN STD_LOGIC_VECTOR;  -- set unused MSBits to 'X'
-  
+
+  -- MOSI/MISO subtypes  
+  SUBTYPE t_mem_copi IS t_mem_mosi; -- Controller Out Peripheral In
+  SUBTYPE t_mem_cipo IS t_mem_miso; -- Peripheral In Controller Out
+
+  CONSTANT c_mem_cipo_rst : t_mem_cipo := c_mem_miso_rst;
+  CONSTANT c_mem_copi_rst : t_mem_copi := c_mem_mosi_rst;
+
+  SUBTYPE t_mem_cipo_arr IS t_mem_miso_arr;
+  SUBTYPE t_mem_copi_arr IS t_mem_mosi_arr;
+ 
   
   ------------------------------------------------------------------------------
   -- Burst memory access (for DDR access interface)
diff --git a/libraries/dsp/st/hdllib.cfg b/libraries/dsp/st/hdllib.cfg
index a00e04d87086a3e0910487baf04a6c559e4391e5..b1ccf21327833e433f0f05db1f73d6145d760511 100644
--- a/libraries/dsp/st/hdllib.cfg
+++ b/libraries/dsp/st/hdllib.cfg
@@ -16,8 +16,7 @@ synth_files =
     src/vhdl/st_xst.vhd 
 #    src/vhdl/st_top.vhd 
     src/vhdl/st_histogram.vhd
-    src/vhdl/st_histogram_reg.vhd
-    src/vhdl/mms_st_histogram.vhd
+    src/vhdl/mmp_st_histogram.vhd
 
     tb/vhdl/tb_st_pkg.vhd 
  
@@ -31,7 +30,7 @@ test_bench_files =
     tb/vhdl/tb_tb_st_xst.vhd
     tb/vhdl/tb_st_histogram.vhd
 
-    tb/vhdl/tb_mms_st_histogram.vhd
+    tb/vhdl/tb_mmp_st_histogram.vhd
     tb/vhdl/tb_st_histogram.vhd
     tb/vhdl/tb_tb_st_histogram.vhd
 
diff --git a/libraries/dsp/st/src/vhdl/mmp_st_histogram.vhd b/libraries/dsp/st/src/vhdl/mmp_st_histogram.vhd
new file mode 100644
index 0000000000000000000000000000000000000000..02b5005874fdec28e95da2cd3e6a09bb91cdc231
--- /dev/null
+++ b/libraries/dsp/st/src/vhdl/mmp_st_histogram.vhd
@@ -0,0 +1,215 @@
+-------------------------------------------------------------------------------
+--
+-- Copyright 2021
+-- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/>
+-- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
+-- 
+-- Licensed under the Apache License, Version 2.0 (the "License");
+-- you may not use this file except in compliance with the License.
+-- You may obtain a copy of the License at
+-- 
+--     http://www.apache.org/licenses/LICENSE-2.0
+-- 
+-- Unless required by applicable law or agreed to in writing, software
+-- distributed under the License is distributed on an "AS IS" BASIS,
+-- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+-- See the License for the specific language governing permissions and
+-- limitations under the License.
+--
+-------------------------------------------------------------------------------
+
+-- Author: 
+-- . Daniel van der Schuur 
+-- Purpose:
+-- . MMP-wrapper that adds MM clock domain RAM readout and and multi-instance 
+--   support to st_histogram.
+-- Description:
+-- . Adds logic to move st_histogram RAM contents into the dual clock RAM for
+--   readout in MM clock domain.
+-- . Per instance there are at least (or more dependent on g_nof_bins) two
+--   block RAM:
+--   . one dual page block RAM in st_histogram in the dp_clk domain that
+--     accumulate or hold the bin values for every sync interval,
+--   . one dual clock block RAM here to provide the read access to the
+--     page with the hold bin values via the mm_clk domain.
+
+
+LIBRARY IEEE, common_lib, mm_lib, technology_lib, dp_lib;
+USE IEEE.std_logic_1164.ALL;
+USE common_lib.common_pkg.ALL;
+USE common_lib.common_mem_pkg.ALL;
+USE dp_lib.dp_stream_pkg.ALL;
+USE technology_lib.technology_select_pkg.ALL;
+
+ENTITY mmp_st_histogram IS
+  GENERIC (
+    g_nof_instances     : NATURAL;
+    g_data_w            : NATURAL;
+    g_nof_bins          : NATURAL;
+    g_nof_data_per_sync : NATURAL        
+  );                
+  PORT (            
+    dp_clk     : IN  STD_LOGIC;
+    dp_rst     : IN  STD_LOGIC;
+
+    snk_in_arr : IN  t_dp_sosi_arr(g_nof_instances-1 DOWNTO 0);
+
+    mm_clk     : IN  STD_LOGIC;
+    mm_rst     : IN  STD_LOGIC;               
+
+    ram_copi   : IN  t_mem_copi;
+    ram_cipo   : OUT t_mem_cipo
+  );
+END mmp_st_histogram;
+
+ARCHITECTURE str OF mmp_st_histogram IS
+
+  -------------------------------------------------------------------------------
+  -- st_histogram instances
+  -------------------------------------------------------------------------------
+  SIGNAL st_histogram_ram_copi_arr  : t_mem_copi_arr(g_nof_instances-1 DOWNTO 0);
+  SIGNAL st_histogram_ram_cipo_arr  : t_mem_cipo_arr(g_nof_instances-1 DOWNTO 0);
+
+  -------------------------------------------------------------------------------
+  -- Dual clock RAM
+  -------------------------------------------------------------------------------
+  CONSTANT c_reg_adr_w : NATURAL := 1;
+  CONSTANT c_ram_adr_w : NATURAL := ceil_log2(g_nof_bins);
+  CONSTANT c_ram_dat_w : NATURAL := ceil_log2(g_nof_data_per_sync+1);
+
+  CONSTANT c_ram                    : t_c_mem := (latency  => 1,
+                                                  adr_w    => c_ram_adr_w, 
+                                                  dat_w    => c_ram_dat_w,
+                                                  nof_dat  => g_nof_bins,
+                                                  init_sl  => '0');
+
+  CONSTANT c_addr_high : NATURAL := g_nof_bins-1;
+
+  SIGNAL wr_copi_arr : t_mem_copi_arr(g_nof_instances-1 DOWNTO 0);
+
+  -------------------------------------------------------------------------------
+  -- Logic to move st_histogram RAM contents into the dual clock RAM
+  -------------------------------------------------------------------------------
+  SIGNAL ram_fill_arr      : STD_LOGIC_VECTOR(g_nof_instances-1 DOWNTO 0);
+  SIGNAL ram_fill_inst     : STD_LOGIC_VECTOR(ceil_log2(g_nof_instances)-1 DOWNTO 0);
+  SIGNAL ram_fill_inst_int : NATURAL;
+  SIGNAL ram_fill          : STD_LOGIC;
+  SIGNAL ram_filling       : STD_LOGIC;
+  SIGNAL nxt_ram_filling   : STD_LOGIC;
+  SIGNAL ram_address       : STD_LOGIC_VECTOR(c_ram_adr_w-1 DOWNTO 0);
+  SIGNAL nxt_ram_address   : STD_LOGIC_VECTOR(c_ram_adr_w-1 DOWNTO 0);
+
+  -------------------------------------------------------------------------------
+  -- MM multiplexing
+  -------------------------------------------------------------------------------
+  SIGNAL ram_copi_arr  : t_mem_copi_arr(g_nof_instances-1 DOWNTO 0);
+  SIGNAL ram_cipo_arr  : t_mem_cipo_arr(g_nof_instances-1 DOWNTO 0);
+
+
+BEGIN 
+
+  -------------------------------------------------------------------------------
+  -- st_histogram instances
+  -------------------------------------------------------------------------------
+  gen_st_histogram : FOR i IN 0 TO g_nof_instances-1 GENERATE
+    u_st_histogram : ENTITY work.st_histogram
+    GENERIC MAP(
+      g_data_w            => g_data_w,
+      g_nof_bins          => g_nof_bins,
+      g_nof_data_per_sync => g_nof_data_per_sync
+    )
+    PORT MAP (
+      dp_clk       => dp_clk,
+      dp_rst       => dp_rst,
+      
+      snk_in       => snk_in_arr(i),
+  
+      ram_mosi     => st_histogram_ram_copi_arr(i),
+      ram_miso     => st_histogram_ram_cipo_arr(i)
+    );
+  END GENERATE;
+
+  -------------------------------------------------------------------------------
+  -- Dual clock RAM: DP write side, MM read side
+  -- . How do we get the st_histogram RAM contents into the RAMs below?
+  --   . DPRAM -> read>write process -> MM RAM
+  -------------------------------------------------------------------------------
+  gen_common_ram_cr_cw : FOR i IN 0 TO g_nof_instances-1 GENERATE
+    u_common_ram_cr_cw : ENTITY common_lib.common_ram_cr_cw
+    GENERIC MAP (
+      g_technology     => c_tech_select_default,
+      g_ram            => c_ram,
+      g_init_file      => "UNUSED"
+    )
+    PORT MAP (
+      wr_clk   => dp_clk,
+      wr_rst   => dp_rst, 
+      wr_clken => '1',
+      wr_en    => wr_copi_arr(i).wr,
+      wr_adr   => wr_copi_arr(i).address(c_ram_adr_w-1 DOWNTO 0),
+      wr_dat   => wr_copi_arr(i).wrdata(c_ram_dat_w-1 DOWNTO 0),
+      rd_clk   => mm_clk,
+      rd_rst   => mm_rst, 
+      rd_clken => '1',
+      rd_en    => ram_copi_arr(i).rd,
+      rd_adr   => ram_copi_arr(i).address(c_ram_adr_w-1 DOWNTO 0),
+      rd_dat   => ram_cipo_arr(i).rddata(c_ram_dat_w-1 DOWNTO 0),
+      rd_val   => ram_cipo_arr(i).rdval
+    );
+  END GENERATE;
+ 
+
+  -------------------------------------------------------------------------------
+  -- Logic to move st_histogram RAM contents into the dual clock RAM above
+  -------------------------------------------------------------------------------
+  -- Use only the status signal of st_histogram instance 0
+  ram_fill <= snk_in_arr(0).sync;
+
+  -- Keep track of ram_filling status and ram_address (used for reading and writing)
+  nxt_ram_filling <= '0' WHEN TO_UINT(ram_address)=c_addr_high ELSE '1' WHEN ram_fill='1' ELSE ram_filling;
+  nxt_ram_address <= (OTHERS=>'0') WHEN ram_filling='0' ELSE INCR_UVEC(ram_address, 1) WHEN ram_filling='1' ELSE ram_address;
+
+  -- Do read request on ram_copi when ram_filling
+  gen_copi_arr: FOR i IN 0 TO g_nof_instances-1 GENERATE
+    st_histogram_ram_copi_arr(i).wr                              <= '0';
+    st_histogram_ram_copi_arr(i).wrdata                          <= (OTHERS=>'0');
+    st_histogram_ram_copi_arr(i).rd                              <= ram_filling;
+    st_histogram_ram_copi_arr(i).address(c_ram_adr_w-1 DOWNTO 0) <= ram_address;
+  END GENERATE;
+
+  -- Forward the read histogram data from ram_cipo into write copi of dual clock RAM
+  gen_rd_cipo_to_wr_copi: FOR i IN 0 TO g_nof_instances-1 GENERATE
+    wr_copi_arr(i).wr                              <= st_histogram_ram_cipo_arr(i).rdval;
+    wr_copi_arr(i).wrdata(c_ram_dat_w-1 DOWNTO 0)  <= st_histogram_ram_cipo_arr(i).rddata(c_ram_dat_w-1 DOWNTO 0);
+    wr_copi_arr(i).address(c_ram_adr_w-1 DOWNTO 0) <= ram_address;
+  END GENERATE;
+
+  -- Registers
+  p_clk : PROCESS(dp_clk, dp_rst) IS
+  BEGIN
+    IF dp_rst = '1' THEN
+      ram_address <= (OTHERS=>'0');
+      ram_filling <= '0';
+    ELSIF RISING_EDGE(dp_clk) THEN
+      ram_address <= nxt_ram_address;
+      ram_filling <= nxt_ram_filling;
+    END IF;
+  END PROCESS;
+
+  -------------------------------------------------------------------------------
+  -- MM multiplexing
+  -------------------------------------------------------------------------------
+  u_common_mem_mux : ENTITY common_lib.common_mem_mux
+  GENERIC MAP (
+    g_nof_mosi    => g_nof_instances,
+    g_mult_addr_w => c_ram_adr_w
+  )
+  PORT MAP (
+    mosi     => ram_copi,
+    miso     => ram_cipo,
+    mosi_arr => ram_copi_arr,
+    miso_arr => ram_cipo_arr
+  );
+
+END str;
+
diff --git a/libraries/dsp/st/src/vhdl/mms_st_histogram.vhd b/libraries/dsp/st/src/vhdl/mms_st_histogram.vhd
deleted file mode 100644
index a17010950a6dd5c84c5cbece7105364dd6d51c08..0000000000000000000000000000000000000000
--- a/libraries/dsp/st/src/vhdl/mms_st_histogram.vhd
+++ /dev/null
@@ -1,233 +0,0 @@
--------------------------------------------------------------------------------
---
--- Copyright 2020
--- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/>
--- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
--- 
--- Licensed under the Apache License, Version 2.0 (the "License");
--- you may not use this file except in compliance with the License.
--- You may obtain a copy of the License at
--- 
---     http://www.apache.org/licenses/LICENSE-2.0
--- 
--- Unless required by applicable law or agreed to in writing, software
--- distributed under the License is distributed on an "AS IS" BASIS,
--- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
--- See the License for the specific language governing permissions and
--- limitations under the License.
---
--------------------------------------------------------------------------------
-
--- Author: 
--- . Daniel van der Schuur 
--- Purpose:
--- . MMS-wrapper that adds registers and multi-instance support to st_histogram.
--- Description:
--- . st_histogram_reg implements the registers to control all g_nof_instances
--- . This MMS wrapper contains logic to fill a local RAM with the contents of
---   a selected st_histogram instance.
--- Usage (see st_histogram_reg.vhd for the register map):
--- . Reading RAM contents:
---   1) User writes instance to read (0..g_nof_instances-1) to ram_fill_inst
---      register via reg_mosi
---   2) Users writes to bit 0 of fill_ram register via reg_mosi
---      . ram_filling status will go high
---   3) User reads ram_filling status until it reads zero via reg_mosi
---   4) User reads freshly filled RAM contents via ram_mosi
--- . Clearing the RAMs:
---   . The inactive RAM is cleared automatically just before the next input sync.
---      . ram_clearing status will go high during this time.
-
-LIBRARY IEEE, common_lib, mm_lib, technology_lib, dp_lib;
-USE IEEE.std_logic_1164.ALL;
-USE common_lib.common_pkg.ALL;
-USE common_lib.common_mem_pkg.ALL;
-USE dp_lib.dp_stream_pkg.ALL;
-USE technology_lib.technology_select_pkg.ALL;
-
-ENTITY mms_st_histogram IS
-  GENERIC (
-    g_nof_instances     : NATURAL;
-    g_data_w            : NATURAL;
-    g_nof_bins          : NATURAL;
-    g_nof_data_per_sync : NATURAL        
-  );                
-  PORT (            
-    dp_clk     : IN  STD_LOGIC;
-    dp_rst     : IN  STD_LOGIC;
-
-    snk_in_arr : IN  t_dp_sosi_arr(g_nof_instances-1 DOWNTO 0);
-
-    mm_clk     : IN  STD_LOGIC;
-    mm_rst     : IN  STD_LOGIC;               
-
-    reg_mosi   : IN  t_mem_mosi;
-    reg_miso   : OUT t_mem_miso;
-
-    ram_mosi   : IN  t_mem_mosi;
-    ram_miso   : OUT t_mem_miso
-  );
-END mms_st_histogram;
-
-ARCHITECTURE str OF mms_st_histogram IS
-
-  CONSTANT c_reg_adr_w : NATURAL := 1;
-  CONSTANT c_ram_adr_w : NATURAL := ceil_log2(g_nof_bins);
-  CONSTANT c_ram_dat_w : NATURAL := ceil_log2(g_nof_data_per_sync);
-
-  CONSTANT c_ram                    : t_c_mem := (latency  => 1,
-                                                  adr_w    => c_ram_adr_w, 
-                                                  dat_w    => c_ram_dat_w,
-                                                  nof_dat  => g_nof_bins,
-                                                  init_sl  => '0');
-
-  CONSTANT c_addr_high : NATURAL := g_nof_bins-1;
-
-  SIGNAL common_ram_cr_cw_wr_mosi     : t_mem_mosi;
-  SIGNAL nxt_common_ram_cr_cw_wr_mosi : t_mem_mosi;
-
-  SIGNAL common_ram_cr_cw_rd_mosi : t_mem_mosi;
-  SIGNAL common_ram_cr_cw_rd_miso : t_mem_miso; 
-
-  SIGNAL ram_mosi_arr  : t_mem_mosi_arr(g_nof_instances-1 DOWNTO 0);
-  SIGNAL ram_miso_arr  : t_mem_miso_arr(g_nof_instances-1 DOWNTO 0);
-
-  SIGNAL ram_clearing_arr  : STD_LOGIC_VECTOR(g_nof_instances-1 DOWNTO 0);
-
-  SIGNAL ram_fill_inst     : STD_LOGIC_VECTOR(ceil_log2(g_nof_instances)-1 DOWNTO 0);
-  SIGNAL ram_fill_inst_int : NATURAL;
-
-  SIGNAL ram_fill          : STD_LOGIC;
-  SIGNAL ram_filling       : STD_LOGIC;
-  SIGNAL nxt_ram_filling   : STD_LOGIC;
-  SIGNAL address           : STD_LOGIC_VECTOR(c_ram_adr_w-1 DOWNTO 0);
-  SIGNAL nxt_address       : STD_LOGIC_VECTOR(c_ram_adr_w-1 DOWNTO 0);
-   
-BEGIN 
-
-  -------------------------------------------------------------------------------
-  -- st_histogram instances and their registers
-  -------------------------------------------------------------------------------
-  gen_st_histogram : FOR i IN 0 TO g_nof_instances-1 GENERATE
-    u_st_histogram : ENTITY work.st_histogram
-    GENERIC MAP(
-      g_data_w            => g_data_w,
-      g_nof_bins          => g_nof_bins,
-      g_nof_data_per_sync => g_nof_data_per_sync
-    )
-    PORT MAP (
-      dp_clk       => dp_clk,
-      dp_rst       => dp_rst,
-      
-      snk_in       => snk_in_arr(i),
-  
-      ram_clearing => ram_clearing_arr(i),
-  
-      ram_mosi     => ram_mosi_arr(i),
-      ram_miso     => ram_miso_arr(i)
-    );
-  END GENERATE;
-
-  u_st_histogram_reg : ENTITY work.st_histogram_reg
-  GENERIC MAP (
-    g_nof_instances => g_nof_instances
-  )
-  PORT MAP (
-    dp_clk        => dp_clk,
-    dp_rst        => dp_rst,
-
-    ram_clearing  => ram_clearing_arr(0),
-    ram_filling   => ram_filling,
-
-    mm_clk        => mm_clk,
-    mm_rst        => mm_rst,
-
-    ram_fill_inst => ram_fill_inst,
-    ram_fill      => ram_fill,
-    
-    reg_mosi      => reg_mosi,
-    reg_miso      => reg_miso
-  );
-
-
-  -------------------------------------------------------------------------------
-  -- Dual clock RAM: DP write side, MM read side
-  -------------------------------------------------------------------------------
-  u_common_ram_cr_cw : ENTITY common_lib.common_ram_cr_cw
-  GENERIC MAP (
-    g_technology     => c_tech_select_default,
-    g_ram            => c_ram,
-    g_init_file      => "UNUSED"
-  )
-  PORT MAP (
-    wr_clk   => dp_clk,
-    wr_rst   => dp_rst, 
-    wr_clken => '1',
-    wr_en    => common_ram_cr_cw_wr_mosi.wr,
-    wr_adr   => common_ram_cr_cw_wr_mosi.address(c_ram_adr_w-1 DOWNTO 0),
-    wr_dat   => common_ram_cr_cw_wr_mosi.wrdata(c_ram_dat_w-1 DOWNTO 0),
-    rd_clk   => mm_clk,
-    rd_rst   => mm_rst, 
-    rd_clken => '1',
-    rd_en    => common_ram_cr_cw_rd_mosi.rd,
-    rd_adr   => common_ram_cr_cw_rd_mosi.address(c_ram_adr_w-1 DOWNTO 0),
-    rd_dat   => common_ram_cr_cw_rd_miso.rddata(c_ram_dat_w-1 DOWNTO 0),
-    rd_val   => common_ram_cr_cw_rd_miso.rdval
-  );
- 
-  -- User side MM bus for histogram readout
-  common_ram_cr_cw_rd_mosi <= ram_mosi;
-  ram_miso <= common_ram_cr_cw_rd_miso;
-
-
-  -------------------------------------------------------------------------------
-  -- Logic to move st_histogram RAM contents into the dual clock RAM above
-  -------------------------------------------------------------------------------
-
-  -- Keep track of ram_filling status and address
-  nxt_ram_filling <= '0' WHEN TO_UINT(address)=c_addr_high ELSE '1' WHEN ram_fill='1' ELSE ram_filling;
-  nxt_address <= (OTHERS=>'0') WHEN ram_filling='0' ELSE INCR_UVEC(address, 1) WHEN ram_filling='1' ELSE address;
-
-  -- Help signal for bus selection
-  ram_fill_inst_int <= TO_UINT(ram_fill_inst);
-
-  -- Do read request on ram_mosi when ram_filling
-  p_mosi_arr: PROCESS (ram_filling, address, ram_fill_inst_int)
-  BEGIN
-    FOR i IN 0 TO g_nof_instances-1 LOOP
-      ram_mosi_arr(i) <= c_mem_mosi_rst;
-      IF i = ram_fill_inst_int THEN
-        ram_mosi_arr(i).rd                              <= ram_filling;
-        ram_mosi_arr(i).address(c_ram_adr_w-1 DOWNTO 0) <= address;
-      END IF;
-    END LOOP;
-  END PROCESS;
-
-  -- Forward the read histogram data from ram_miso into write mosi of dual clock RAM
-  p_rd_miso_to_wr_mosi : PROCESS(ram_miso_arr, ram_fill_inst_int, address)
-  BEGIN
-    nxt_common_ram_cr_cw_wr_mosi <= common_ram_cr_cw_wr_mosi;
-    FOR i IN 0 TO g_nof_instances-1 LOOP
-      IF i = ram_fill_inst_int THEN
-        nxt_common_ram_cr_cw_wr_mosi.wr                              <= ram_miso_arr(i).rdval;
-        nxt_common_ram_cr_cw_wr_mosi.wrdata(c_ram_dat_w-1 DOWNTO 0)  <= ram_miso_arr(i).rddata(c_ram_dat_w-1 DOWNTO 0);
-        nxt_common_ram_cr_cw_wr_mosi.address(c_ram_adr_w-1 DOWNTO 0) <= address;
-      END IF;
-    END LOOP;
-  END PROCESS;
-
-  -- Registers
-  p_clk : PROCESS(dp_clk, dp_rst) IS
-  BEGIN
-    IF dp_rst = '1' THEN
-      common_ram_cr_cw_wr_mosi <= c_mem_mosi_rst;
-      address <= (OTHERS=>'0');
-      ram_filling <= '0';
-    ELSIF RISING_EDGE(dp_clk) THEN
-      common_ram_cr_cw_wr_mosi <= nxt_common_ram_cr_cw_wr_mosi;
-      address <= nxt_address;
-      ram_filling <= nxt_ram_filling;
-    END IF;
-  END PROCESS;
-
-END str;
diff --git a/libraries/dsp/st/src/vhdl/st_histogram.vhd b/libraries/dsp/st/src/vhdl/st_histogram.vhd
index f99a130b80b589f0a82b7120404d6c8267d09205..2d32186cb7e0452ca7a0b151030fe87224c44a6f 100644
--- a/libraries/dsp/st/src/vhdl/st_histogram.vhd
+++ b/libraries/dsp/st/src/vhdl/st_histogram.vhd
@@ -1,6 +1,6 @@
 ------------------------------------------------------------------------------
 --
--- Copyright 2020
+-- Copyright 2021
 -- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/>
 -- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
 -- 
@@ -20,47 +20,58 @@
 
 -- Author: 
 -- . Daniel van der Schuur
--- . Jan Oudman 
 -- Purpose: 
 -- . Count incoming data values and keep the counts in RAM as a histogram
 -- Description: 
 --  . See st_histogram.txt for the original design description.
---  . The contents of the inactive RAM is cleared automatically just before the
---    next sync interval. This way, no data is lost and all valid input data
---    contributes to the histogram. The ram_clearing status output is high
---    during this automated clearing.
---  . All valid data of a DC input contributes to the histogram, no data is 
---    lost.
+--  . The histogram of all input data per sync interval (g_nof_data_per_sync) 
+--    is kept as count values in g_nof_bins RAM addresses. 
+--    g_nof_data_per_sync determines the required width of the data in RAM.
+--  . All valid data of the input contributes to the histogram, no data is 
+--    lost. This applies to any input type including DC.
+--  . Internally 2 RAM pages are used and are swapped on a sync. One 
+--    (inactive) page contains the histogram of the last sync interval 
+--    while the new data is written to the other (active) page. The contents
+--    of the inactive RAM are cleared automatically just before the next sync
+--    interval. This way no data is lost and all valid g_nof_data_per_sync
+--    samples contribute to the histogram. 
 --  . The block schematic below shows the data flow from snk_in to ram_mosi:
---    . snk_in.data is interpreted as address (bin) to read from RAM by bin_reader.
+--    . snk_in.data is reduced by combining up to 3 consecutive identical  
+--      samples into 1 and carrying the count (1..3) in the channel field.
+--      . This is to prevent simultaneous read/writes to the RAM from/on 
+--        the same address. The read,increment,write sequence takes 3 cycles,
+--        hence combining up to 3 consecutive samples (=addresses) into 1
+--        rules out this issue.
+--    . snk_in_reg.data is interpreted as address (bin) to read from RAM by 
+--      bin_reader.
 --      . a RAM pointer 0 or 1 is kept as MS part of the address.
---        . snk_in.sync determines the RAM pointer 0 or 1.
---    . The data read from that adress, the bin count, is incremented and written
---      back by bin_writer.
---    . bin_arbiter decides whether a read or write accessw takes precedence, in case
---      of simultanious RAM access requests by both bin_reader and bin_writer.
---    . Upon request (ram_miso), the bin counts (the histogram) are output on 
---      ram_mosi.
---                             bin_reader_miso    bin_arbiter_rd_miso             
---               __________    |   ___________    |   ___________                 
---              |          |   |  |           |   |  |           |                
--- ---snk_in--->|bin_reader|<--+--|           |<--+--|           |                
---              |__________|      |           |      |           |                
---                   |            |           |      |           |                
---                   |            |           |      |           |                
---     bin_reader_to_writer_mosi  |bin_arbiter|      | RAM(1..0) |----ram_mosi--->
---                   |            |           |      |           |                
---               ____v_____       |           |      |           |                
---              |          |      |           |      |           |                
---              |bin_writer|---+->|           |---+->|           |                
---              |__________|   |  |___________|   |  |___________|                
---                             |                  |                               
---                             bin_writer_mosi    bin_arbiter_wr_mosi             
+--        . snk_in_reg.sync determines the RAM pointer 0 or 1.
+--    . The data read from that adress, the bin count, is incremented with
+--      the count carried in the channel field (1..3) and written back to 
+--      the RAM by bin_writer.
+--    . Upon request (ram_miso), the bin counts (=the histogram) are output
+--      on ram_mosi.
+--                                                  bin_reader_miso            
+--             ______                 __________    |   _________              
+--            |      |               |          |   |  |         |             
+-- --snk_in-->|reduce|--snk_in_reg-->|bin_reader|<--+--|         |             
+--            |______|               |__________|      |         |             
+--                                        |            |         |             
+--                                        |            |         |             
+--                           bin_reader_to_writer_mosi |RAM(1..0)|--ram_mosi-->
+--                                        |            |         |             
+--                                    ____v_____       |         |             
+--                                   |          |      |         |             
+--                                   |bin_writer|--+-->|         |             
+--                                   |__________|  |   |_________|             
+--                                                 |                           
+--                                           bin_writer_mosi                   
 -- Usage:
 -- . The ram_mosi input applies to the RAM page that is inactive (not
 --   being written to from data path) *at that time*. The user should take care to
 --   time these controls such that the active RAM page does not swap before these
---   operation (ram_mosi readout) has finished.
+--   operation (ram_mosi readout) has finished, otherwise the read histogram will
+--   contain data from both the current and the previous sync periods.
 -- Remarks:
 -- . The RAM block we use basically needs 3 ports:
 --   1 - read port in dp_clk domain to read current bin value
@@ -76,7 +87,10 @@
 --   . Downside of common_ram_r_w: it uses a single clock
 --     . This st_histogram.vhd operates in dp_clk domain only, so we need to 
 --       provide MM access to the user, in the mm_clk domain, elsewhere. This
---       has been done in mms_st_histogram.vhd.
+--       has been done in mmp_st_histogram.vhd.
+--   . Possibly common_paged_ram_r_w could be used instead of 2 common_ram_r_w
+--     instances. However that modification would not add an MM clk domain for
+--     readout.
 
 LIBRARY IEEE, common_lib, mm_lib, technology_lib, dp_lib;
 USE IEEE.std_logic_1164.ALL;
@@ -88,8 +102,9 @@ USE technology_lib.technology_select_pkg.ALL;
 ENTITY st_histogram IS
   GENERIC (
     g_data_w            : NATURAL := 8;
-    g_nof_bins          : NATURAL := 256;
-    g_nof_data_per_sync : NATURAL := 1024
+    g_nof_bins          : NATURAL := 256;  -- <= 2^g_data_w (having more bins than possible values is not useful)
+    g_nof_data_per_sync : NATURAL := 1024; 
+    g_data_type         : STRING  := "unsigned" -- unsigned or signed
   );
   PORT (            
     dp_clk       : IN  STD_LOGIC;
@@ -97,8 +112,6 @@ ENTITY st_histogram IS
                     
     snk_in       : IN  t_dp_sosi; -- Active RAM page swaps on snk_in.sync
 
-    ram_clearing : OUT STD_LOGIC; -- Status output: high while RAM is being cleared
-
     ram_mosi     : IN  t_mem_mosi; -- MM access to the inactive RAM page
     ram_miso     : OUT t_mem_miso
   );
@@ -108,17 +121,30 @@ END st_histogram;
 ARCHITECTURE rtl OF st_histogram IS
 
   -------------------------------------------------------------------------------
-  -- Constants derived from generics
+  -- Main Constants 
   -------------------------------------------------------------------------------
   CONSTANT c_ram_adr_w : NATURAL := ceil_log2(g_nof_bins);
   CONSTANT c_adr_low   : NATURAL := g_data_w-c_ram_adr_w; 
-  CONSTANT c_ram_dat_w : NATURAL := ceil_log2(g_nof_data_per_sync)+1;
+  CONSTANT c_ram_dat_w : NATURAL := ceil_log2(g_nof_data_per_sync+1);
+
+  -------------------------------------------------------------------------------
+  -- snk_in.data help signal
+  -------------------------------------------------------------------------------
+  SIGNAL snk_in_data : STD_LOGIC_VECTOR(c_ram_adr_w-1 DOWNTO 0);
+
+  -------------------------------------------------------------------------------
+  -- snk_in_reg_arr
+  -------------------------------------------------------------------------------
+  CONSTANT c_ram_rd_wr_latency : NATURAL := 3; -- RAM read,incr,write cycle latency
+  CONSTANT c_shiftreg_depth    : NATURAL := c_ram_rd_wr_latency+1;
+
+  SIGNAL snk_in_reg_arr     : t_dp_sosi_arr(c_shiftreg_depth DOWNTO 0);
+  SIGNAL nxt_snk_in_reg_arr : t_dp_sosi_arr(c_shiftreg_depth DOWNTO 0);
+  SIGNAL snk_in_reg         : t_dp_sosi;
 
   -------------------------------------------------------------------------------
   -- ram_pointer
   -------------------------------------------------------------------------------
-  SIGNAL toggle_ram_pointer     : STD_LOGIC;
-  SIGNAL nxt_toggle_ram_pointer : STD_LOGIC;
   SIGNAL ram_pointer            : STD_LOGIC;
   SIGNAL prv_ram_pointer        : STD_LOGIC;
 
@@ -127,42 +153,31 @@ ARCHITECTURE rtl OF st_histogram IS
   -------------------------------------------------------------------------------
   SIGNAL bin_reader_mosi     : t_mem_mosi;
   SIGNAL bin_reader_miso     : t_mem_miso;
-
   SIGNAL prv_bin_reader_mosi : t_mem_mosi;
 
   -------------------------------------------------------------------------------
   -- bin_writer
   -------------------------------------------------------------------------------
-  SIGNAL bin_reader_to_writer_mosi : t_mem_mosi;
+  SIGNAL bin_reader_to_writer_mosi      : t_mem_mosi;
+  SIGNAL bin_reader_to_writer_count     : NATURAL;
+  SIGNAL nxt_bin_reader_to_writer_count : NATURAL;
 
-  SIGNAL nxt_bin_writer_mosi       : t_mem_mosi;
-  SIGNAL bin_writer_mosi           : t_mem_mosi;
-
-  -------------------------------------------------------------------------------
-  -- bin_arbiter
-  -------------------------------------------------------------------------------
-  SIGNAL bin_arbiter_wr_ram_pointer     : STD_LOGIC;
-  SIGNAL bin_arbiter_rd_ram_pointer     : STD_LOGIC;
-  SIGNAL prv_bin_arbiter_rd_ram_pointer : STD_LOGIC;
-
-  SIGNAL read_allowed                   : BOOLEAN;
-  SIGNAL prv_read_allowed               : BOOLEAN;
-
-  SIGNAL nxt_bin_arbiter_wr_mosi        : t_mem_mosi;
-  SIGNAL bin_arbiter_wr_mosi            : t_mem_mosi;
-  SIGNAL bin_arbiter_rd_mosi            : t_mem_mosi;
-  SIGNAL bin_arbiter_rd_miso            : t_mem_miso;
+  SIGNAL nxt_bin_writer_mosi            : t_mem_mosi;
+  SIGNAL bin_writer_mosi                : t_mem_mosi;
 
   -------------------------------------------------------------------------------
   -- 2x RAM (common_ram_r_w) instances
   -------------------------------------------------------------------------------
   CONSTANT c_nof_ram_pages     : NATURAL := 2;
+  CONSTANT c_ram               : t_c_mem := (latency  => 1,
+                                             adr_w    => c_ram_adr_w, 
+                                             dat_w    => c_ram_dat_w,
+                                             nof_dat  => g_nof_bins,
+                                             init_sl  => '0');
 
-  CONSTANT c_ram                    : t_c_mem := (latency  => 1,
-                                                  adr_w    => c_ram_adr_w, 
-                                                  dat_w    => c_ram_dat_w,
-                                                  nof_dat  => g_nof_bins,
-                                                  init_sl  => '0');
+  SIGNAL bin_writer_ram_pointer     : STD_LOGIC;
+  SIGNAL bin_reader_ram_pointer     : STD_LOGIC;
+  SIGNAL prv_bin_reader_ram_pointer : STD_LOGIC;
 
   SIGNAL common_ram_r_w_wr_mosi_arr : t_mem_mosi_arr(1 DOWNTO 0);
   SIGNAL common_ram_r_w_rd_mosi_arr : t_mem_mosi_arr(1 DOWNTO 0);
@@ -181,43 +196,152 @@ ARCHITECTURE rtl OF st_histogram IS
   SIGNAL nxt_data_cnt          : STD_LOGIC_VECTOR(c_data_cnt_w-1 DOWNTO 0);
 
   SIGNAL ram_clear             : STD_LOGIC;
+  SIGNAL ram_clearing          : STD_LOGIC;
 
   SIGNAL ram_clear_address     : STD_LOGIC_VECTOR(c_ram_adr_w-1 DOWNTO 0);
   SIGNAL nxt_ram_clear_address : STD_LOGIC_VECTOR(c_ram_adr_w-1 DOWNTO 0);
 
-  SIGNAL i_ram_clearing        : STD_LOGIC;
   SIGNAL nxt_ram_clearing      : STD_LOGIC;
 
+
 BEGIN 
 
+  -------------------------------------------------------------------------------
+  -- Select range from snk_in.data and interpret as (un)signed
+  -------------------------------------------------------------------------------
+  gen_unsigned: IF g_data_type/="signed" GENERATE
+    snk_in_data <= snk_in.data(g_data_w-1 DOWNTO c_adr_low);
+  END GENERATE;
+
+  -- Use offset_binary() from common_pkg.vhd, to swap the lower half and
+  -- upper half of the bins in case the input data is signed. The signed
+  -- input values can be two-complement or offset binary, dependent on how
+  -- they were sampled by the ADC or generated by an waveform generator.
+  -- The difference is in the details. For example with 8 bit data:
+  --
+  --                          bin:    0    127    128      255
+  --  signed two-complement value: -128     -1      0     +127
+  --  signed offset binary value:  -127.5   -0.5   +0.5   +127.5
+  --  unsigned value:                 0    127    128      255
+  gen_signed: IF g_data_type="signed" GENERATE
+    snk_in_data <= offset_binary(snk_in.data(g_data_w-1 DOWNTO c_adr_low));
+  END GENERATE;
+
+
+  -------------------------------------------------------------------------------
+  -- Slightly reduce the incoming data to prevent simultineous read/write
+  -- . Take out every 2nd and 3rd duplicate data value (set valid='0')
+  -- . Put the number of duplicates in the channel field to be applied downstream
+  -- . With a RAM read->write latency of 3 cycles (c_ram_rd_wr_latency), we need 
+  --   a shift register of 4 words (0,1,2,3) deep to prevent simultaneous 
+  --   read/writes on the RAM.
+  --   . Element 3 is only and output register
+  -- . A sequence of duplicate data could cross a sync period:
+  --   . We need to stop&restart counting duplicates on a sync, don't count
+  --     across sync periods to ensure exactly correct bin values in each sync 
+  --     interval
+  --     . We can still get a read on cycle n and a write on cycle n+2 on the 
+  --       same address, but that does not matter as the read,write will be on
+  --       different RAM blocks (1 RAM block per sync period).
+  --     . snk_in_reg_arr(0).sync='1' : Don't compare with older snk_in_reg_arr(1)
+  --       and (2)
+  --     . snk_in_reg_arr(1).sync='1' : Don't compare with older (2)
+  --     . snk_in_reg_arr(2).sync='1' : OK to compare with both (1) and (0)
+  -- . Input : snk_in
+  -- . Output: snk_in_reg
+  -------------------------------------------------------------------------------
+  p_nxt_snk_in_reg_arr: PROCESS(snk_in, snk_in_data, snk_in_reg_arr) IS
+  BEGIN
+    FOR i IN 0 TO c_shiftreg_depth-1 LOOP
+      nxt_snk_in_reg_arr(i) <= c_dp_sosi_rst;
+    END LOOP;
+
+    IF snk_in.valid='1' THEN
+      -- The base function is a shift register
+      nxt_snk_in_reg_arr(0)      <= snk_in;
+      nxt_snk_in_reg_arr(0).data(c_ram_adr_w-1 DOWNTO 0) <= snk_in_data; -- Use the ranged data
+      nxt_snk_in_reg_arr(1) <= snk_in_reg_arr(0);
+      nxt_snk_in_reg_arr(2) <= snk_in_reg_arr(1);
+      nxt_snk_in_reg_arr(3) <= snk_in_reg_arr(2);
+
+      -- Assign a count of 1 to valid data 
+      nxt_snk_in_reg_arr(0).channel <= TO_DP_CHANNEL(1);
+
+      IF snk_in_reg_arr(2).valid = '1' THEN -- Shift register 0,1,2 filled with valid data
+        -- Overwrite channel field (=count) when duplicate data is found
+        IF snk_in_reg_arr(1).data(g_data_w-1 DOWNTO 0) = snk_in_reg_arr(2).data(g_data_w-1 DOWNTO 0) THEN
+          -- 1=2
+          IF snk_in_reg_arr(1).sync = '0' THEN -- Don't count across sync periods
+            nxt_snk_in_reg_arr(2).valid   <= '0';
+            nxt_snk_in_reg_arr(2).channel <= TO_DP_CHANNEL(0);
+            nxt_snk_in_reg_arr(3).channel <= TO_DP_CHANNEL(2);
+          END IF;
+        END IF;
+        IF snk_in_reg_arr(0).data(g_data_w-1 DOWNTO 0) = snk_in_reg_arr(1).data(g_data_w-1 DOWNTO 0) THEN
+          IF snk_in_reg_arr(0).sync = '0' THEN -- Don't count across sync periods
+            IF snk_in_reg_arr(1).data(g_data_w-1 DOWNTO 0) = snk_in_reg_arr(2).data(g_data_w-1 DOWNTO 0) THEN
+              -- 0=1=2
+              IF snk_in_reg_arr(1).sync = '0' THEN -- Don't count across sync periods
+                nxt_snk_in_reg_arr(1).valid   <= '0';
+                nxt_snk_in_reg_arr(1).channel <= TO_DP_CHANNEL(0);
+                nxt_snk_in_reg_arr(2).valid   <= '0'; 
+                nxt_snk_in_reg_arr(2).channel <= TO_DP_CHANNEL(0);
+                nxt_snk_in_reg_arr(3).channel <= TO_DP_CHANNEL(3);
+              END IF;
+            ELSE
+              -- 0=1
+              -- Do nothing, otherwise we will never see 0=1=2. Instead wait until 0,1 shifted to 1,2.
+            END IF;
+          END IF;
+        ELSIF snk_in_reg_arr(0).data(g_data_w-1 DOWNTO 0) = snk_in_reg_arr(2).data(g_data_w-1 DOWNTO 0) THEN
+          -- 0=2
+          IF snk_in_reg_arr(0).sync = '0' THEN -- Don't count across sync periods
+            nxt_snk_in_reg_arr(1).valid   <= '0';
+            nxt_snk_in_reg_arr(1).channel <= TO_DP_CHANNEL(0);
+            nxt_snk_in_reg_arr(3).channel <= TO_DP_CHANNEL(2);
+          END IF;
+        END IF;
+      END IF;
+    END IF;
+  END PROCESS;
+
+  snk_in_reg <= snk_in_reg_arr(3);
+
+  -- Registers
+  p_snk_in_reg_arr: PROCESS(dp_clk, dp_rst) IS
+  BEGIN
+    IF dp_rst = '1' THEN
+      snk_in_reg_arr <= (OTHERS => c_dp_sosi_rst);
+    ELSIF RISING_EDGE(dp_clk) THEN
+      snk_in_reg_arr <= nxt_snk_in_reg_arr;
+    END IF;
+  END PROCESS;
+
+
   -------------------------------------------------------------------------------
   -- ram_pointer: Keep track of what RAM to target
   -- . Target either RAM 0 or 1 per sync period
   -- . RD/WR sides of RAM have shifted sync periods due to rd>wr latency
   --   . e.g. a new sync period is read while an old sync period is written
   --   . Solution: treat the RAM pointer as MS address bit in separate RD/WR buses
-  --   . ram_pointer is synchronous to snk_in.sync
+  --   . ram_pointer is synchronous to snk_in_reg.sync
   -------------------------------------------------------------------------------
   p_ram_pointer : PROCESS(dp_rst, dp_clk) IS
   BEGIN
     IF dp_rst='1' THEN
-      prv_ram_pointer    <= '0';
-      toggle_ram_pointer <= '0';
+      prv_ram_pointer    <= '1';
     ELSIF RISING_EDGE(dp_clk) THEN
-      toggle_ram_pointer <= nxt_toggle_ram_pointer;
       prv_ram_pointer    <= ram_pointer;
     END IF;
   END PROCESS;
 
-  -- Don't toggle the RAM pointer on the first sync as we're already reading the RAM at that point.
-  nxt_toggle_ram_pointer <= '1' WHEN snk_in.sync='1' ELSE toggle_ram_pointer;
-  -- Toggle the RAM pointer starting from 2nd sync onwards
-  ram_pointer <= NOT prv_ram_pointer WHEN snk_in.sync='1' AND toggle_ram_pointer='1' ELSE prv_ram_pointer;
+  -- Toggle the RAM pointer on the sync
+  ram_pointer <= NOT prv_ram_pointer WHEN snk_in_reg.sync='1' ELSE prv_ram_pointer;
 
 
   -------------------------------------------------------------------------------
   -- bin_reader : reads bin from RAM, sends bin to bin_writer.
-  -- . Input  : snk_in          (input data stream)
+  -- . Input  : snk_in_reg      (input data stream)
   --            bin_reader_miso (reply to RAM read request: rddata = bin count)
   --            ram_pointer (to put in MOSI buses as MS address bit)
   -- . Output : bin_reader_mosi (RAM read request, address = bin)
@@ -226,16 +350,17 @@ BEGIN
   -- Fetch the bin from RAM
   bin_reader_mosi.wrdata  <= (OTHERS=>'0');
   bin_reader_mosi.wr      <= '0';
-  bin_reader_mosi.rd      <= snk_in.valid;
-  bin_reader_mosi.address <= RESIZE_UVEC(ram_pointer & snk_in.data(g_data_w-1 DOWNTO c_adr_low), c_word_w); 
+  bin_reader_mosi.rd      <= snk_in_reg.valid;
+  bin_reader_mosi.address <= RESIZE_UVEC(ram_pointer & snk_in_reg.data(c_ram_adr_w-1 DOWNTO 0), c_word_w); 
 
-  -- Store the rd address as bin_writer needs to know where to write the bin count
   p_prv_bin_reader_mosi : PROCESS(dp_clk, dp_rst) IS
   BEGIN
     IF dp_rst = '1' THEN
-      prv_bin_reader_mosi <= c_mem_mosi_rst;
+      prv_bin_reader_mosi        <= c_mem_mosi_rst;
+      bin_reader_to_writer_count <= 0;
     ELSIF RISING_EDGE(dp_clk) THEN
-      prv_bin_reader_mosi  <= bin_reader_mosi;
+      prv_bin_reader_mosi        <= bin_reader_mosi;
+      bin_reader_to_writer_count <= nxt_bin_reader_to_writer_count;
     END IF;
   END PROCESS;
 
@@ -243,19 +368,20 @@ BEGIN
   bin_reader_to_writer_mosi.wr      <= bin_reader_miso.rdval;
   bin_reader_to_writer_mosi.wrdata  <= RESIZE_UVEC(bin_reader_miso.rddata(c_ram_dat_w-1 DOWNTO 0), c_mem_data_w);
   bin_reader_to_writer_mosi.address <= prv_bin_reader_mosi.address;
+  nxt_bin_reader_to_writer_count    <= TO_UINT(snk_in_reg.channel); -- Use register (_nxt) to align count with rdval instead of snk_in_reg.valid
 
 
   -------------------------------------------------------------------------------
-  -- bin_writer : Increment the bin, forward write request to bin_arbiter
+  -- bin_writer : Increment the bin, do write request
   -- . Input  : bin_reader_to_writer_mosi (from bin_reader = bin + bin count)
-  -- . Output : bin_writer_mosi (to bin_arbiter = bin + incremented bin count)
+  -- . Output : bin_writer_mosi 
   -------------------------------------------------------------------------------
   nxt_bin_writer_mosi.rd      <= '0';
   nxt_bin_writer_mosi.wr      <= bin_reader_to_writer_mosi.wr;
   nxt_bin_writer_mosi.address <= bin_reader_to_writer_mosi.address;
-  nxt_bin_writer_mosi.wrdata  <= INCR_UVEC(bin_reader_to_writer_mosi.wrdata, 1) WHEN bin_reader_to_writer_mosi.wr='1' ELSE bin_writer_mosi.wrdata; 
+  nxt_bin_writer_mosi.wrdata  <= INCR_UVEC(bin_reader_to_writer_mosi.wrdata, bin_reader_to_writer_count) WHEN bin_reader_to_writer_mosi.wr='1' ELSE bin_writer_mosi.wrdata; 
  
-  -- Register the outputs to bin_arbiter (above we have a combinational adder = propagation delay)
+  -- Register the outputs (above we have a combinational adder = propagation delay)
   p_bin_writer_mosi : PROCESS(dp_clk, dp_rst) IS
   BEGIN
     IF dp_rst = '1' THEN
@@ -266,90 +392,42 @@ BEGIN
   END PROCESS;
 
 
-  -------------------------------------------------------------------------------
-  -- bin_arbiter : Take care of simultaneous rd/wr to the same RAM address
-  -- . Input: bin_reader_mosi (wants to read bins)
-  --          bin_writer_mosi (wants to write to bins)
-  --          bin_arbiter_rd_miso (carries the bins requested by bin_reader)
-  -- . Output: bin_arbiter_wr_mosi (wr requests to RAM)
-  --           bin_arbiter_rd_mosi (rd requests to RAM)
-  --           bin_reader_miso (carries the bins requested by bin_reader)
-  -------------------------------------------------------------------------------
-  -- Really simple arbitration: always allow writes, only allow reads when possible (rd_addr != wr_addr).
-  read_allowed <= FALSE WHEN bin_writer_mosi.wr='1' AND bin_writer_mosi.address=bin_reader_mosi.address ELSE TRUE;
-  -- save previous read_allowed
-  p_prv_read_allowed: PROCESS(dp_rst, dp_clk) IS
-  BEGIN
-    IF dp_rst='1' THEN
-      prv_read_allowed <= FALSE;
-    ELSIF RISING_EDGE(dp_clk) THEN
-      prv_read_allowed <= read_allowed;
-    END IF;
-  END PROCESS;
-
-  -- Forward MOSI buses
-  -- . RD MOSI
-  bin_arbiter_rd_mosi.wr      <= '0';
-  bin_arbiter_rd_mosi.rd      <= bin_reader_mosi.rd WHEN read_allowed ELSE '0';
-  bin_arbiter_rd_mosi.address <= bin_reader_mosi.address;
-  -- . WR MOSI
-  bin_arbiter_wr_mosi.rd      <= '0';
-  bin_arbiter_wr_mosi.wr      <= bin_writer_mosi.wr;
-  bin_arbiter_wr_mosi.wrdata  <= bin_writer_mosi.wrdata;
-  bin_arbiter_wr_mosi.address <= bin_writer_mosi.address;
-
-  -- Loop back the WR data to the RD side when read was not allowed or on second read of same address
-  p_bin_reader_miso : PROCESS(prv_read_allowed, bin_reader_mosi, bin_reader_miso, bin_writer_mosi, read_allowed, bin_arbiter_rd_miso) IS
-  BEGIN
-    bin_reader_miso <= bin_arbiter_rd_miso;
-    IF prv_bin_reader_mosi.rd = '1' AND prv_read_allowed = FALSE THEN -- Fake succesful readback when read was not allowed
-      bin_reader_miso.rdval  <= '1';
-      bin_reader_miso.rddata <= bin_writer_mosi.wrdata;
-    ELSIF read_allowed = TRUE THEN
-      bin_reader_miso <= bin_arbiter_rd_miso;
-    ELSIF (prv_bin_reader_mosi.rd = '1' AND bin_reader_mosi.rd='1') AND (prv_bin_reader_mosi.address=bin_reader_mosi.address) THEN -- 2 reads on same address in row: 2nd read is outdated so return wrdata here
-      bin_reader_miso.rdval  <= '1';
-      bin_reader_miso.rddata <= bin_writer_mosi.wrdata;
-    END IF;
-  END PROCESS;
-
-
   -------------------------------------------------------------------------------
   -- Two RAM (common_ram_r_w) instances. The user can read the histogram from the 
-  -- instance that is not being written to by the bin_arbiter.
-  -- . Input:  bin_arbiter_wr_mosi (writes bins)
-  --           bin_arbiter_rd_mosi (requests to read bins to increment bin count)
+  -- instance that is not being written to by the bin_writer.
+  -- . Input:  bin_writer_mosi (writes bins)
+  --           bin_reader_mosi (requests to read bins to increment bin count)
   --           histogram_rd_mosi (requests to read the bins on the user side)
   --           histogram_wr_mosi (on user side, auto clears RAM every sync)
   -- . Output: histogram_rd_miso (carries the bins the user wants to read)
-  --           bin_arbiter_miso (carries then bins the bin_reader wants to read)
+  --           bin_reader_miso (carries then bins the bin_reader wants to read)
   -- . Note: the ram_pointer is carried (with different latencies) as MSbit in:
-  --         . bin_arbiter_wr_mosi.address
-  --         . bin_arbiter_rd_mosi.address 
+  --         . bin_writer_mosi.address
+  --         . bin_reader_mosi.address 
   -------------------------------------------------------------------------------
-  bin_arbiter_wr_ram_pointer <= bin_arbiter_wr_mosi.address(c_ram_adr_w);
-  bin_arbiter_rd_ram_pointer <= bin_arbiter_rd_mosi.address(c_ram_adr_w);  
+  bin_writer_ram_pointer <= bin_writer_mosi.address(c_ram_adr_w);
+  bin_reader_ram_pointer <= bin_reader_mosi.address(c_ram_adr_w);  
 
   -- Store the previous RAM pointer of the read bus
   p_prv_ram_pointer : PROCESS(dp_clk, dp_rst) IS
   BEGIN
     IF dp_rst = '1' THEN
-      prv_bin_arbiter_rd_ram_pointer <= '0';
+      prv_bin_reader_ram_pointer <= '0';
     ELSIF RISING_EDGE(dp_clk) THEN
-      prv_bin_arbiter_rd_ram_pointer <= bin_arbiter_rd_ram_pointer;
+      prv_bin_reader_ram_pointer <= bin_reader_ram_pointer;
     END IF;
   END PROCESS;
 
-  -- Let bin_arbiter write RAM 0 while user reads RAM 1 and vice versa
-  common_ram_r_w_wr_mosi_arr(0) <= bin_arbiter_wr_mosi WHEN bin_arbiter_wr_ram_pointer='0' ELSE histogram_wr_mosi;
-  common_ram_r_w_rd_mosi_arr(0) <= bin_arbiter_rd_mosi WHEN bin_arbiter_rd_ram_pointer='0' ELSE histogram_rd_mosi;
-  common_ram_r_w_wr_mosi_arr(1) <= bin_arbiter_wr_mosi WHEN bin_arbiter_wr_ram_pointer='1' ELSE histogram_wr_mosi; 
-  common_ram_r_w_rd_mosi_arr(1) <= bin_arbiter_rd_mosi WHEN bin_arbiter_rd_ram_pointer='1' ELSE histogram_rd_mosi;
+  -- Let bin_writter write RAM 0 while user reads RAM 1 and vice versa
+  common_ram_r_w_wr_mosi_arr(0) <= bin_writer_mosi WHEN bin_writer_ram_pointer='0' ELSE histogram_wr_mosi;
+  common_ram_r_w_rd_mosi_arr(0) <= bin_reader_mosi WHEN bin_reader_ram_pointer='0' ELSE histogram_rd_mosi;
+  common_ram_r_w_wr_mosi_arr(1) <= bin_writer_mosi WHEN bin_writer_ram_pointer='1' ELSE histogram_wr_mosi; 
+  common_ram_r_w_rd_mosi_arr(1) <= bin_reader_mosi WHEN bin_reader_ram_pointer='1' ELSE histogram_rd_mosi;
   
-  -- Let bin_arbiter read RAM 0 while user reads RAM 1 and vice versa
-  -- . We always want the MISO bus to switch 1 cycle later than the MOSI (such that the MM operation can finish); hence using prv_bin_arbiter_rd_ram_pointer.
-  bin_arbiter_rd_miso  <= common_ram_r_w_rd_miso_arr(0) WHEN prv_bin_arbiter_rd_ram_pointer='0' ELSE common_ram_r_w_rd_miso_arr(1);
-  histogram_rd_miso    <= common_ram_r_w_rd_miso_arr(1) WHEN prv_bin_arbiter_rd_ram_pointer='0' ELSE common_ram_r_w_rd_miso_arr(0);
+  -- Let bin_reader read RAM 0 while user reads RAM 1 and vice versa
+  -- . We always want the MISO bus to switch 1 cycle later than the MOSI (such that the MM operation can finish); hence using prv_bin_reader_ram_pointer.
+  bin_reader_miso   <= common_ram_r_w_rd_miso_arr(0) WHEN prv_bin_reader_ram_pointer='0' ELSE common_ram_r_w_rd_miso_arr(1);
+  histogram_rd_miso <= common_ram_r_w_rd_miso_arr(1) WHEN prv_bin_reader_ram_pointer='0' ELSE common_ram_r_w_rd_miso_arr(0);
 
   gen_common_ram_r_w : FOR i IN 0 TO c_nof_ram_pages-1 GENERATE
     u_common_ram_r_w : ENTITY common_lib.common_ram_r_w
@@ -383,12 +461,12 @@ BEGIN
   ram_clear <= '1' WHEN TO_UINT(data_cnt)=g_nof_data_per_sync-g_nof_bins-1 ELSE '0';
 
   -- Signal to indicate when RAM is being cleared
-  nxt_ram_clearing <= '1' WHEN ram_clear='1' ELSE '0' WHEN TO_UINT(ram_clear_address)=g_nof_bins-1 ELSE i_ram_clearing;
+  nxt_ram_clearing <= '1' WHEN ram_clear='1' ELSE '0' WHEN TO_UINT(ram_clear_address)=g_nof_bins-1 ELSE ram_clearing;
 
   -- Address counter: 0 to g_nof_bins-1.
-  nxt_ram_clear_address <= INCR_UVEC(ram_clear_address, 1) WHEN i_ram_clearing='1' ELSE (OTHERS=>'0');
+  nxt_ram_clear_address <= INCR_UVEC(ram_clear_address, 1) WHEN ram_clearing='1' ELSE (OTHERS=>'0');
 
-  histogram_wr_mosi.wr                              <= i_ram_clearing;
+  histogram_wr_mosi.wr                              <= ram_clearing;
   histogram_wr_mosi.address(c_ram_adr_w-1 DOWNTO 0) <= ram_clear_address;
   histogram_wr_mosi.wrdata                          <= (OTHERS=>'0');
   histogram_wr_mosi.rd                              <= '0';
@@ -398,16 +476,15 @@ BEGIN
   BEGIN
     IF dp_rst = '1' THEN
       ram_clear_address <= (OTHERS=>'0');
-      i_ram_clearing    <= '0';
+      ram_clearing      <= '0';
       data_cnt          <= (OTHERS=>'0');
     ELSIF RISING_EDGE(dp_clk) THEN
       ram_clear_address <= nxt_ram_clear_address;
-      i_ram_clearing    <= nxt_ram_clearing;
+      ram_clearing      <= nxt_ram_clearing;
       data_cnt          <= nxt_data_cnt;
     END IF;
   END PROCESS;
 
-  ram_clearing <= i_ram_clearing;
 
   -------------------------------------------------------------------------------
   -- Expose the MM buses to the user
diff --git a/libraries/dsp/st/src/vhdl/st_histogram_reg.vhd b/libraries/dsp/st/src/vhdl/st_histogram_reg.vhd
deleted file mode 100644
index 9230e11503c35feb7cbe26464f04d10776d48d51..0000000000000000000000000000000000000000
--- a/libraries/dsp/st/src/vhdl/st_histogram_reg.vhd
+++ /dev/null
@@ -1,203 +0,0 @@
--------------------------------------------------------------------------------
---
--- Copyright 2020
--- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/>
--- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
--- 
--- Licensed under the Apache License, Version 2.0 (the "License");
--- you may not use this file except in compliance with the License.
--- You may obtain a copy of the License at
--- 
---     http://www.apache.org/licenses/LICENSE-2.0
--- 
--- Unless required by applicable law or agreed to in writing, software
--- distributed under the License is distributed on an "AS IS" BASIS,
--- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
--- See the License for the specific language governing permissions and
--- limitations under the License.
---
--------------------------------------------------------------------------------
-
--- Author:
--- . Daniel van der Schuur
--- Purpose:
--- . Provide MM registers for st_histogram
--- Description:
--- . Address 0, bit 0 = RAM clear
---   . Read : 'ram_clearing' status output of st_histogram.vhd. '1' when RAM is clearing.
--- . Address 1 = select RAM instance to fill (read out)
---   . Read : read back selected instance
---   . Write: select RAM instance to fill
--- . Address 2, bit 0 = RAM fill
---   . Read : 'ram_filling' status.  '1' right after write of ram_fill. '0' when not filling RAM (anymore).
---   . Write: 'ram_fill '   control. '1' to fill RAM on write event.
-
-
-LIBRARY IEEE, common_lib;
-USE IEEE.std_logic_1164.ALL;
-USE common_lib.common_pkg.ALL;
-USE common_lib.common_mem_pkg.ALL;
-
-ENTITY st_histogram_reg IS
-  GENERIC (
-    g_nof_instances : NATURAL
-  );
-  PORT (            
-    dp_clk        : IN  STD_LOGIC;
-    dp_rst        : IN  STD_LOGIC;
-  
-    ram_clearing  : IN  STD_LOGIC;
-  
-    ram_fill_inst : OUT STD_LOGIC_VECTOR(ceil_log2(g_nof_instances)-1 DOWNTO 0);
-    ram_fill      : OUT STD_LOGIC;
-    ram_filling   : IN  STD_LOGIC;
-  
-    mm_clk        : IN  STD_LOGIC;
-    mm_rst        : IN  STD_LOGIC;               
-  
-    reg_mosi      : IN  t_mem_mosi;
-    reg_miso      : OUT t_mem_miso
-  );
-END st_histogram_reg;
-
-ARCHITECTURE rtl OF st_histogram_reg IS
-
-  CONSTANT c_nof_addresses : NATURAL := 3;
-
-  CONSTANT c_mm_reg : t_c_mem := (latency  => 1,
-                                  adr_w    => ceil_log2(c_nof_addresses),
-                                  dat_w    => c_word_w, -- Use MM bus data width = c_word_w = 32 for all MM registers
-                                  nof_dat  => c_nof_addresses,
-                                  init_sl  => '0');                                              
-
-  SIGNAL mm_ram_clearing : STD_LOGIC; 
-
-  SIGNAL mm_ram_fill_inst : STD_LOGIC_VECTOR(ceil_log2(g_nof_instances)-1 DOWNTO 0);
-  SIGNAL mm_ram_fill      : STD_LOGIC; 
-  SIGNAL mm_ram_filling   : STD_LOGIC; 
-  
-BEGIN 
-  
-  ------------------------------------------------------------------------------
-  -- MM register access in the mm_clk domain
-  -- . Hardcode the shared MM slave register directly in RTL instead of using
-  --   the common_reg_r_w instance. Directly using RTL is easier when the large
-  --   MM register has multiple different fields and with different read and
-  --   write options per field in one MM register.
-  ------------------------------------------------------------------------------
-  p_mm_reg : PROCESS (mm_clk, mm_rst)
-  BEGIN
-    IF mm_rst = '1' THEN
-      -- Read access
-      reg_miso <= c_mem_miso_rst;
-      
-      -- Access event, register values
-      mm_ram_fill <= '0';
-      mm_ram_fill_inst <= (OTHERS=>'0');
- 
-    ELSIF rising_edge(mm_clk) THEN
-      -- Read access defaults
-      reg_miso.rdval <= '0';
-      
-      -- Access event defaults
-      mm_ram_fill  <= '0';
-      
-      -- Write access: set register value
-      IF reg_mosi.wr = '1' THEN
-        CASE TO_UINT(reg_mosi.address(c_mm_reg.adr_w-1 DOWNTO 0)) IS
-          WHEN 1 =>
-            mm_ram_fill_inst <= reg_mosi.wrdata(ceil_log2(g_nof_instances)-1 DOWNTO 0);
-          WHEN 2 =>
-            mm_ram_fill <= '1';
-          WHEN OTHERS => NULL;  -- unused MM addresses
-        END CASE;
-        
-      -- Read access: get register value
-      ELSIF reg_mosi.rd = '1' THEN
-        reg_miso       <= c_mem_miso_rst;    -- set unused rddata bits to '0' when read
-        reg_miso.rdval <= '1';               -- c_mm_reg.latency = 1
-        CASE TO_UINT(reg_mosi.address(c_mm_reg.adr_w-1 DOWNTO 0)) IS
-          WHEN 0 =>
-            -- Read RAM clearing status
-            reg_miso.rddata(0) <= mm_ram_clearing;
-          WHEN 1 =>
-            -- Read selected RAM instance to fill
-            reg_miso.rddata(ceil_log2(g_nof_instances)-1 DOWNTO 0) <= mm_ram_fill_inst;
-          WHEN 2 =>
-            -- Read RAM filling status
-            reg_miso.rddata(0) <= mm_ram_filling;
-          WHEN OTHERS => NULL;  -- unused MM addresses
-        END CASE;
-      END IF;
-    END IF;
-  END PROCESS;
-
-  ------------------------------------------------------------------------------
-  -- Transfer register value between mm_clk and st_clk domain.
-  -- If the function of the register ensures that the value will not be used
-  -- immediately when it was set, then the transfer between the clock domains
-  -- can be done by wires only. Otherwise if the change in register value can
-  -- have an immediate effect then the bit or word value needs to be transfered
-  -- using:
-  --
-  -- . common_async            --> for single-bit level signal
-  -- . common_spulse           --> for single-bit pulse signal
-  -- . common_reg_cross_domain --> for a multi-bit (a word) signal
-  --
-  -- Typically always use a crossing component for the single bit signals (to
-  -- be on the save side) and only use a crossing component for the word
-  -- signals if it is necessary (to avoid using more logic than necessary).
-  ------------------------------------------------------------------------------
-  
-  -- ST --> MM
-  u_common_async_clear : ENTITY common_lib.common_async
-  GENERIC MAP (
-    g_rst_level => '0'
-  )
-  PORT MAP (
-    clk  => mm_clk,
-    rst  => mm_rst,
-
-    din  => ram_clearing,
-    dout => mm_ram_clearing
-  );
-
- u_common_async_fill : ENTITY common_lib.common_async
-  GENERIC MAP (
-    g_rst_level => '0'
-  )
-  PORT MAP (
-    clk  => mm_clk,
-    rst  => mm_rst,
-
-    din  => ram_filling,
-    dout => mm_ram_filling
-  );
-
-  u_common_spulse_fill : ENTITY common_lib.common_spulse
-  PORT MAP (
-    in_clk    => mm_clk,
-    in_rst    => mm_rst,
-
-    in_pulse  => mm_ram_fill,
-    in_busy   => OPEN,
-
-    out_clk   => dp_clk,
-    out_rst   => dp_rst,
-
-    out_pulse => ram_fill
-  ); 
-
-  u_common_reg_cross_domain : ENTITY common_lib.common_reg_cross_domain
-    PORT MAP (
-      in_clk      => mm_clk,
-      in_rst      => mm_rst,
-      in_dat      => mm_ram_fill_inst,
-      in_done     => OPEN,
-      out_clk     => dp_clk,
-      out_rst     => dp_rst,
-      out_dat     => ram_fill_inst,
-      out_new     => OPEN
-    );
-
-END rtl;
diff --git a/libraries/dsp/st/tb/vhdl/tb_mms_st_histogram.vhd b/libraries/dsp/st/tb/vhdl/tb_mmp_st_histogram.vhd
similarity index 72%
rename from libraries/dsp/st/tb/vhdl/tb_mms_st_histogram.vhd
rename to libraries/dsp/st/tb/vhdl/tb_mmp_st_histogram.vhd
index 0e8fea35b4f7ce3e88e4db243d208345db39ef2a..98fbed5994737e5efeaa70f15e89bd0f9c467770 100644
--- a/libraries/dsp/st/tb/vhdl/tb_mms_st_histogram.vhd
+++ b/libraries/dsp/st/tb/vhdl/tb_mmp_st_histogram.vhd
@@ -1,6 +1,6 @@
 -------------------------------------------------------------------------------
 --
--- Copyright 2020
+-- Copyright 2021
 -- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/>
 -- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
 -- 
@@ -23,14 +23,17 @@
 -- Author: 
 -- . Daniel van der Schuur
 -- Purpose:
--- . 
+-- . Read and verify histogram RAM of instance 0
 -- ModelSim usage:
 -- . (open project, compile)
 -- . (load simulation config)
 -- . as 8
 -- . run -a
 -- Description:
--- . 
+-- . This TB is self checking and stops after g_nof_sync test iterations.
+-- . This TB only checks the MM aspects of mmp_st_histogram with limited (counter
+--   data) stimuli and verification. Details of st_histogram are thoroughly 
+--   checked in tb_tb_st_histogram.
 -------------------------------------------------------------------------------
 
 LIBRARY IEEE, common_lib, mm_lib, dp_lib;
@@ -43,18 +46,18 @@ USE common_lib.tb_common_pkg.ALL;
 USE dp_lib.dp_stream_pkg.ALL;
 USE dp_lib.tb_dp_pkg.ALL;
 
-ENTITY tb_mms_st_histogram IS
+ENTITY tb_mmp_st_histogram IS
   GENERIC(
     g_nof_sync          : NATURAL := 4;
     g_nof_instances     : NATURAL := 12;
     g_data_w            : NATURAL := 14;
     g_nof_bins          : NATURAL := 512;
-    g_nof_data_per_sync : NATURAL := 40000
-  );
-END tb_mms_st_histogram;
+    g_nof_data_per_sync : NATURAL := 16384 -- g_nof_data_per_sync/g_nof_bins should be integer so
+  );                                       -- counter data yields the same histogram in each bin
+END tb_mmp_st_histogram;
 
 
-ARCHITECTURE tb OF tb_mms_st_histogram IS
+ARCHITECTURE tb OF tb_mmp_st_histogram IS
  
   ---------------------------------------------------------------------------
   -- Clocks and resets
@@ -64,45 +67,37 @@ ARCHITECTURE tb OF tb_mms_st_histogram IS
 
   SIGNAL dp_clk            : STD_LOGIC := '1';
   SIGNAL dp_rst            : STD_LOGIC;
-
   SIGNAL mm_clk            : STD_LOGIC := '1';
   SIGNAL mm_rst            : STD_LOGIC;
-
   SIGNAL tb_end            : STD_LOGIC := '0';
 
   ----------------------------------------------------------------------------
   -- stimuli
   ----------------------------------------------------------------------------
-  SIGNAL stimuli_en : STD_LOGIC := '1';
-
+  SIGNAL stimuli_en      : STD_LOGIC := '1';
   SIGNAL stimuli_src_out : t_dp_sosi;
   SIGNAL stimuli_src_in  : t_dp_siso;
+  SIGNAL stimuli_done    : STD_LOGIC;
    
   ----------------------------------------------------------------------------
   -- st_histogram
   ----------------------------------------------------------------------------
   SIGNAL st_histogram_snk_in_arr : t_dp_sosi_arr(g_nof_instances-1 DOWNTO 0);
-
-  SIGNAL st_histogram_reg_mosi   : t_mem_mosi;
-  SIGNAL st_histogram_reg_miso   : t_mem_miso;
-
-  SIGNAL st_histogram_ram_mosi   : t_mem_mosi;
-  SIGNAL st_histogram_ram_miso   : t_mem_miso;
+  SIGNAL st_histogram_ram_copi   : t_mem_copi;
+  SIGNAL st_histogram_ram_cipo   : t_mem_cipo;
 
    
   ----------------------------------------------------------------------------
   -- Readout & verification
   ----------------------------------------------------------------------------
-  CONSTANT c_ram_dat_w : NATURAL := ceil_log2(g_nof_data_per_sync)+1;
-
+  CONSTANT c_ram_dat_w            : NATURAL := ceil_log2(g_nof_data_per_sync+1);
   CONSTANT c_expected_ram_content : NATURAL := g_nof_data_per_sync/g_nof_bins;
 
-  SIGNAL ram_filling : STD_LOGIC;
-
   SIGNAL ram_rd_word           : STD_LOGIC_VECTOR(c_ram_dat_w-1 DOWNTO 0);
   SIGNAL ram_rd_word_int       : NATURAL;
   SIGNAL ram_rd_word_valid     : STD_LOGIC;
   SIGNAL nxt_ram_rd_word_valid : STD_LOGIC;
+  SIGNAL verification_done     : STD_LOGIC;
 
 BEGIN 
   
@@ -117,7 +112,7 @@ BEGIN
  
 
   ----------------------------------------------------------------------------
-  -- DP Stimuli: generate st_histogram input data
+  -- DP Stimuli: generate st_histogram input (counter) data
   ---------------------------------------------------------------------------- 
   stimuli_src_in <= c_dp_siso_rdy;
 
@@ -125,6 +120,7 @@ BEGIN
   p_generate_packets : PROCESS
     VARIABLE v_sosi : t_dp_sosi := c_dp_sosi_rst;
   BEGIN
+    stimuli_done    <= '0';
     stimuli_src_out <= c_dp_sosi_rst;
     proc_common_wait_until_low(dp_clk, dp_rst);
     proc_common_wait_some_cycles(dp_clk, 5);
@@ -132,22 +128,25 @@ BEGIN
     FOR I IN 0 TO g_nof_sync-1 LOOP
       v_sosi.sync    := '1';
       v_sosi.data    := RESIZE_DP_DATA(v_sosi.data(g_data_w-1 DOWNTO 0));  -- wrap when >= 2**g_data_w    
+      -- Generate a block of counter data
       proc_dp_gen_block_data(g_data_w, TO_UINT(v_sosi.data), g_nof_data_per_sync, TO_UINT(v_sosi.channel), TO_UINT(v_sosi.err), v_sosi.sync, v_sosi.bsn, dp_clk, stimuli_en, stimuli_src_in, stimuli_src_out);
     END LOOP;     
 
+    stimuli_done <= '1';
     proc_common_wait_some_cycles(dp_clk, 50);
     tb_end <= '1';
     WAIT;
   END PROCESS;
 
+
   ----------------------------------------------------------------------------
-  -- mms_st_histogram
+  -- mmp_st_histogram
   ----------------------------------------------------------------------------
   gen_snk_in_arr: FOR i IN 0 TO g_nof_instances-1 GENERATE
     st_histogram_snk_in_arr(i) <= stimuli_src_out;
   END GENERATE;
 
-  u_mms_st_histogram : ENTITY work.mms_st_histogram
+  u_mmp_st_histogram : ENTITY work.mmp_st_histogram
   GENERIC MAP(
     g_nof_instances     => g_nof_instances,
     g_data_w            => g_data_w,
@@ -163,11 +162,8 @@ BEGIN
 
     snk_in_arr   => st_histogram_snk_in_arr,
 
-    reg_mosi     => st_histogram_reg_mosi,
-    reg_miso     => st_histogram_reg_miso,
- 
-    ram_mosi     => st_histogram_ram_mosi,
-    ram_miso     => st_histogram_ram_miso
+    ram_copi     => st_histogram_ram_copi,
+    ram_cipo     => st_histogram_ram_cipo
   );
 
 
@@ -176,48 +172,25 @@ BEGIN
   ---------------------------------------------------------------------------- 
   p_ram_clear : PROCESS
   BEGIN
-    st_histogram_ram_mosi <= c_mem_mosi_rst;
-    st_histogram_reg_mosi <= c_mem_mosi_rst;
-    ram_filling <= '0';
+    st_histogram_ram_copi <= c_mem_copi_rst;
     ram_rd_word <= (OTHERS=>'0');
-     -- The first sync indicates start of incoming data - let it pass
-     proc_common_wait_until_high(dp_clk, stimuli_src_out.sync);
-     proc_common_wait_some_cycles(mm_clk, 10);
-     FOR i IN 0 TO g_nof_sync-2 LOOP 
-       -- Wiat for a full sync period of data
-       proc_common_wait_until_high(dp_clk, stimuli_src_out.sync);
-        -- The sync has passed, we can start reading the resulting histogram
-       FOR j IN 0 TO g_nof_instances-1 LOOP
-         -- Select st_histogram instance to read out
-         proc_mem_mm_bus_wr(1, j, mm_clk, st_histogram_reg_mosi);
-         proc_common_wait_some_cycles(mm_clk, 2);
-
-         -- Enable RAM filling
-         proc_mem_mm_bus_wr(2, 1, mm_clk, st_histogram_reg_mosi);
-         proc_common_wait_some_cycles(mm_clk, 10);
-
-         -- Wait until RAM filling is done
-         proc_mem_mm_bus_rd(2, mm_clk, st_histogram_reg_mosi);
-         ram_filling <= st_histogram_reg_miso.rddata(0);
-         proc_common_wait_some_cycles(mm_clk, 2);
-         WHILE ram_filling='1' LOOP
-            -- Read filling status
-           proc_mem_mm_bus_rd(2, mm_clk, st_histogram_reg_mosi);
-           ram_filling <= st_histogram_reg_miso.rddata(0);
-           proc_common_wait_some_cycles(mm_clk, 1);
-         END LOOP;
-
-         -- Read out the RAM contents
-        FOR k IN 0 TO g_nof_bins-1 LOOP
-           proc_mem_mm_bus_rd(k, mm_clk, st_histogram_ram_mosi);
-           ram_rd_word <= st_histogram_ram_miso.rddata(c_ram_dat_w-1 DOWNTO 0);
-           ram_rd_word_int <= TO_UINT(ram_rd_word);
-        END LOOP;
+
+    -- The first sync indicates start of incoming data - let it pass
+    proc_common_wait_until_high(dp_clk, stimuli_src_out.sync);
+    proc_common_wait_some_cycles(mm_clk, 10);
+    FOR i IN 0 TO g_nof_sync-2 LOOP 
+      -- Wait for a full sync period of data
+      proc_common_wait_until_high(dp_clk, stimuli_src_out.sync);
+      -- Read out the RAM contents
+      FOR k IN 0 TO g_nof_bins-1 LOOP
+        proc_mem_mm_bus_rd(k, mm_clk, st_histogram_ram_copi);
+        ram_rd_word <= st_histogram_ram_cipo.rddata(c_ram_dat_w-1 DOWNTO 0);
+        ram_rd_word_int <= TO_UINT(ram_rd_word);
       END LOOP;
-    END LOOP;
+   END LOOP;
   END PROCESS;
 
-  -- Register st_histogram_ram_miso.rdval so we read only valid data
+  -- Register st_histogram_ram_cipo.rdval so we read only valid data
   p_nxt_ram_rd_word_valid : PROCESS(mm_rst, mm_clk)
   BEGIN
    IF mm_rst = '1' THEN
@@ -226,7 +199,7 @@ BEGIN
       ram_rd_word_valid <= nxt_ram_rd_word_valid;
     END IF;
   END PROCESS;
-  nxt_ram_rd_word_valid <= st_histogram_ram_miso.rdval;
+  nxt_ram_rd_word_valid <= st_histogram_ram_cipo.rdval;
 
 
   ----------------------------------------------------------------------------
@@ -234,6 +207,7 @@ BEGIN
   ----------------------------------------------------------------------------
   p_verify_assert : PROCESS
   BEGIN
+    verification_done <= '0';
     FOR i IN 0 TO g_nof_sync-1 LOOP
       proc_common_wait_until_high(dp_clk, stimuli_src_out.sync);  
       proc_common_wait_until_high(dp_clk, ram_rd_word_valid);      
@@ -244,6 +218,16 @@ BEGIN
       END IF;
     END LOOP;
     WAIT FOR 5 ns;
+    verification_done <= '1'; --We have blocking proc_common_wait_until_high procedures above so we need to know if we make it here.
+  END PROCESS;
+
+  -- Check if verification was done at all
+  p_check_verification_done : PROCESS
+  BEGIN
+    proc_common_wait_until_high(dp_clk, stimuli_done);
+    proc_common_wait_some_cycles(dp_clk, 50);
+    ASSERT verification_done='1' REPORT "Verification failed" SEVERITY ERROR;
+    WAIT;
   END PROCESS;
 
 
diff --git a/libraries/dsp/st/tb/vhdl/tb_st_histogram.vhd b/libraries/dsp/st/tb/vhdl/tb_st_histogram.vhd
index 6b4d3eeadd9b5807ab58e9ae606dcd72d3df91c7..6e84b4be217440180bbdeb989ab5e40eff2895f8 100644
--- a/libraries/dsp/st/tb/vhdl/tb_st_histogram.vhd
+++ b/libraries/dsp/st/tb/vhdl/tb_st_histogram.vhd
@@ -1,6 +1,6 @@
 -------------------------------------------------------------------------------
 --
--- Copyright 2020
+-- Copyright 2021
 -- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/>
 -- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
 -- 
@@ -29,25 +29,41 @@
 -- . (load simulation config)
 -- . as 8
 -- . run -a
+-- . For signals 'stimuli_data' and 'histogram' select Format->Analog(automatic)
+--   . set Radix to 'decimal' for signed input data.
 -- Description:
--- . Verification be eye (wave window) - observe that:
---   . There are 4 sync periods in which 3 packets of 1024 words are generated;
---   . histogram_snk_in.data = 0..1023, 3 times per sync
---     . st_histogram has 256 bins so uses the 8 MS bits of snk_in.data
---     . st_histogram will count 4*0..255 instead of 0..1023 per packet
---     . st_histogram will count 12 occurences (3 packets * 4 * 0..255) per sync.
---   . bin_writer_mosi writes bin counts 1..12 per sync interval;
---   . Both RAMs are used twice: RAM 0, RAM 1, RAM 0, RAM 1;
---   . RAM clearing completes just before the end of each sync interval.
--- . Automatic verification:
---   . In each sync period the RAM contents are read out via ram_mosi/miso and 
---     compared to the expected bin counts.
+--  . tb_st_histogram generates and visualizes an input sine wave and the
+--    resulting histogram in the wave window.
+-- . Verification be eye (wave window):
+--   . For the sine input (default), observe input 'stimuli_data' and output 
+--     'histogram' signals. Set Radix->Decimal and Format->Analog (automatic) 
+--     in QuestaSim.
+--   . For counter data, observe that:
+--     . There are 4 sync periods in which 3 packets of 1024 words are generated;
+--     . histogram_snk_in.data = 0..1023, 3 times per sync
+--       . st_histogram has 256 bins so uses the 8 MS bits of snk_in.data
+--       . st_histogram will count 4*0..255 instead of 0..1023 per packet
+--       . st_histogram will count 12 occurences (3 packets * 4 * 0..255) per sync.
+--     . bin_writer_mosi writes bin counts 1..12 per sync interval;
+--     . Both RAMs are used twice: RAM 0, RAM 1, RAM 0, RAM 1;
+--     . RAM clearing completes just before the end of each sync interval.
+-- . Automatic verification - in each sync period:
+--   . the RAM contents are read out via ram_mosi/miso and compared to the 
+--     expected bin counts. This is done only for g_stimuli_mode = counter 
+--     and dc because that is sufficient and easily done automatically. 
+--     . The counter mode yields the same value in all bins
+--     . DC mode yields max value in one bin and zero in other bins.
+--   . the sum of all bins is checked against the expected g_nof_data_per_sync.
+--     . this is done for all modes 'counter', 'dc', 'random' and 'sine'.
 -------------------------------------------------------------------------------
 
 LIBRARY IEEE, common_lib, mm_lib, dp_lib;
 USE IEEE.std_logic_1164.ALL;
 USE IEEE.numeric_std.ALL; 
+USE IEEE.math_real.ALL;
 USE common_lib.common_pkg.ALL;
+USE common_lib.common_lfsr_sequences_pkg.ALL;
+USE common_lib.common_str_pkg.ALL;
 USE common_lib.common_mem_pkg.ALL;
 USE common_lib.tb_common_mem_pkg.ALL;
 USE common_lib.tb_common_pkg.ALL;
@@ -56,11 +72,14 @@ USE dp_lib.tb_dp_pkg.ALL;
 
 ENTITY tb_st_histogram IS
   GENERIC(
-    g_nof_sync             : NATURAL := 4; -- We're simulating at least 4 g_nof_sync so both RAMs are written and cleared twice.
-    g_data_w               : NATURAL := 8; -- Determines maximum number of bins (2^g_data_w)
-    g_nof_bins             : NATURAL := 256; -- Lower than or equal to 2^g_data_w. Higher is allowed but makes no sense.
-    g_nof_data_per_sync    : NATURAL := 1024 -- Determines max required RAM data width. e.g. 11b to store max bin count '1024'.
-    );
+    g_nof_sync          : NATURAL := 4;        -- We're simulating at least 4 g_nof_sync so both RAMs are written and cleared twice.
+    g_data_w            : NATURAL := 3;        -- Determines maximum number of bins (2^g_data_w)
+    g_nof_bins          : NATURAL := 8;        -- Lower than or equal to 2^g_data_w. Higher is allowed but makes no sense.
+    g_nof_data_per_sync : NATURAL := 20;       -- >= g_nof_bins. Determines max required RAM data width. e.g. 11b to store max bin count '1024'.
+    g_stimuli_mode      : STRING  := "sine";   -- "counter", "dc", "sine" or "random"
+    g_data_type         : STRING  := "signed"; -- use "signed" if g_stimuli_mode="sine"
+    g_lock_sine         : BOOLEAN := TRUE      -- TRUE to lock the sine wave to Sync - produces sparse histogram with low number of non-zero samples (occuring 2*c_sine_nof_periods)
+    );                                         -- FALSE produces a dense histogram as the drifting sine wave hits more levels.
 END tb_st_histogram;
 
 
@@ -69,42 +88,59 @@ ARCHITECTURE tb OF tb_st_histogram IS
   ---------------------------------------------------------------------------
   -- Constants derived from generics
   ---------------------------------------------------------------------------
-  CONSTANT c_expected_ram_content      : NATURAL := g_nof_data_per_sync/g_nof_bins;
-
-  CONSTANT c_ram_dat_w : NATURAL := ceil_log2(g_nof_data_per_sync)+1;
+  CONSTANT c_expected_ram_content_counter : NATURAL := g_nof_data_per_sync/g_nof_bins;
+  CONSTANT c_nof_levels_per_bin           : NATURAL := (2**g_data_w)/g_nof_bins; --e.g. 2 values per bin if g_data_w=9 (512 levels) and g_nof_bins=256
+  CONSTANT c_ram_dat_w                    : NATURAL := ceil_log2(g_nof_data_per_sync+1);
+  CONSTANT c_ram_adr_w                    : NATURAL := ceil_log2(g_nof_bins);
     
   ---------------------------------------------------------------------------
   -- Clocks and resets
   ---------------------------------------------------------------------------
-  CONSTANT c_dp_clk_period      : TIME := 5 ns;
+  CONSTANT c_dp_clk_period : TIME := 5 ns;
 
-  SIGNAL dp_rst                 : STD_LOGIC;
-  SIGNAL dp_clk                 : STD_LOGIC := '1';
+  SIGNAL dp_rst            : STD_LOGIC;
+  SIGNAL dp_clk            : STD_LOGIC := '1';
 
-  SIGNAL tb_end                 : STD_LOGIC := '0';
+  SIGNAL tb_end            : STD_LOGIC := '0';
    
   ----------------------------------------------------------------------------
   -- stimuli
   ----------------------------------------------------------------------------
-  SIGNAL stimuli_en : STD_LOGIC := '1';
+  CONSTANT c_sine_amplitude              : REAL := real((2**g_data_w)/2-1);
+  CONSTANT c_sine_nof_periods            : REAL := 1.0;
+  CONSTANT c_sine_nof_samples_per_period : REAL := real(g_nof_data_per_sync)/c_sine_nof_periods;
+  CONSTANT c_sine_time_step_denom        : REAL := sel_a_b(g_lock_sine, MATH_2_PI, 5.0); -- Use 5 instead of 2 pi to create unlocked, drifting sine wave
+  CONSTANT c_sine_time_step              : REAL := c_sine_time_step_denom / c_sine_nof_samples_per_period;
 
-  SIGNAL stimuli_src_out : t_dp_sosi;
-  SIGNAL stimuli_src_in  : t_dp_siso;
+  SIGNAL stimuli_en          : STD_LOGIC := '1';
+  SIGNAL stimuli_src_out     : t_dp_sosi;
+  SIGNAL nxt_stimuli_src_out : t_dp_sosi;
+  SIGNAL stimuli_src_in      : t_dp_siso;
+  SIGNAL stimuli_count       : REAL;
+  SIGNAL stimuli_data        : STD_LOGIC_VECTOR(g_data_w-1 DOWNTO 0); -- QuestaSim: Format->Analog, Radix->Decimal
+  SIGNAL random_data         : STD_LOGIC_VECTOR(g_data_w-1 DOWNTO 0);
+  SIGNAL stimuli_done        : STD_LOGIC;
 
   ----------------------------------------------------------------------------
   -- st_histogram
   ----------------------------------------------------------------------------
-  SIGNAL st_histogram_snk_in    : t_dp_sosi;
-  SIGNAL st_histogram_ram_mosi  : t_mem_mosi;
-  SIGNAL st_histogram_ram_miso  : t_mem_miso;
+  SIGNAL st_histogram_snk_in       : t_dp_sosi;
+  SIGNAL st_histogram_ram_mosi     : t_mem_mosi;
+  SIGNAL prv_st_histogram_ram_mosi : t_mem_mosi;
+  SIGNAL st_histogram_ram_miso     : t_mem_miso;
   
-   ----------------------------------------------------------------------------
-   -- Automatic verification of RAM readout
-   ----------------------------------------------------------------------------
-  SIGNAL ram_rd_word           : STD_LOGIC_VECTOR(c_ram_dat_w-1 DOWNTO 0);
-  SIGNAL ram_rd_word_int       : NATURAL;
-  SIGNAL ram_rd_word_valid     : STD_LOGIC;
-  SIGNAL nxt_ram_rd_word_valid : STD_LOGIC;
+  ----------------------------------------------------------------------------
+  -- Automatic verification of RAM readout
+  ----------------------------------------------------------------------------
+  -- Use these 4 signals to verify histogram by eye in the wave window
+  SIGNAL histogram_data         : NATURAL; -- QuestaSim: Format->Analog
+  SIGNAL histogram_bin_unsigned : NATURAL;
+  SIGNAL histogram_bin_signed   : INTEGER; -- QuestaSim: Radix->Decimal
+  SIGNAL histogram_valid        : STD_LOGIC;
+
+  SIGNAL sum_of_bins            : NATURAL;
+  SIGNAL verification_done      : STD_LOGIC;
+
 
 BEGIN 
   
@@ -116,7 +152,7 @@ BEGIN
  
  
   ----------------------------------------------------------------------------
-  -- Stimuli: generate st_histogram input data and clear the RAM
+  -- Stimuli: generate st_histogram input data
   ---------------------------------------------------------------------------- 
   stimuli_src_in <= c_dp_siso_rdy;
 
@@ -124,21 +160,85 @@ BEGIN
   p_generate_packets : PROCESS
     VARIABLE v_sosi : t_dp_sosi := c_dp_sosi_rst;
   BEGIN
-    stimuli_src_out <= c_dp_sosi_rst;
+    nxt_stimuli_src_out <= c_dp_sosi_rst;
+    stimuli_done <= '0';
     proc_common_wait_until_low(dp_clk, dp_rst);
     proc_common_wait_some_cycles(dp_clk, 5);
 
-    FOR I IN 0 TO g_nof_sync-1 LOOP
-      v_sosi.sync    := '1';
-      v_sosi.data    := RESIZE_DP_DATA(v_sosi.data(g_data_w-1 DOWNTO 0));  -- wrap when >= 2**g_data_w    
-      proc_dp_gen_block_data(g_data_w, TO_UINT(v_sosi.data), g_nof_data_per_sync, TO_UINT(v_sosi.channel), TO_UINT(v_sosi.err), v_sosi.sync, v_sosi.bsn, dp_clk, stimuli_en, stimuli_src_in, stimuli_src_out);
-    END LOOP;     
+    -- Generate a block of counter data every sync
+    IF g_stimuli_mode="counter" THEN
+      FOR I IN 0 TO g_nof_sync-1 LOOP
+        v_sosi.sync    := '1';
+        v_sosi.data    := RESIZE_DP_DATA(v_sosi.data(g_data_w-1 DOWNTO 0));  -- wrap when >= 2**g_data_w    
+        proc_dp_gen_block_data(g_data_w, TO_UINT(v_sosi.data), g_nof_data_per_sync, TO_UINT(v_sosi.channel), TO_UINT(v_sosi.err), v_sosi.sync, v_sosi.bsn, dp_clk, stimuli_en, stimuli_src_in, nxt_stimuli_src_out);
+      END LOOP;
+    END IF;
+
+    -- Generate a DC level that increments every sync  
+    IF g_stimuli_mode="dc" THEN
+      nxt_stimuli_src_out.valid <= '1';
+      FOR I IN 0 TO g_nof_sync-1 LOOP
+        nxt_stimuli_src_out.data <= INCR_UVEC(stimuli_src_out.data, 1); --all g_nof_data_per_sync cycles
+        nxt_stimuli_src_out.sync <= '1'; -- cycle 0
+        WAIT FOR 5 ns;
+        FOR j IN 1 TO g_nof_data_per_sync-1 LOOP --cycles 1..g_nof_data_per_sync-1
+          nxt_stimuli_src_out.sync <= '0';
+          WAIT FOR 5 ns;
+        END LOOP;
+      END LOOP;     
+    END IF;
+
+    -- Generate a sine wave
+    IF g_stimuli_mode="sine" THEN
+      nxt_stimuli_src_out <= stimuli_src_out;
+      nxt_stimuli_src_out.valid <= '1';
+      stimuli_count <= 0.0;
+      FOR I IN 0 TO g_nof_sync-1 LOOP       
+        nxt_stimuli_src_out.sync <= '1'; -- cycle 0
+        WAIT FOR 5 ns;
+        FOR j IN 1 TO g_nof_data_per_sync-1 LOOP --cycles 1..g_nof_data_per_sync-1
+          nxt_stimuli_src_out.sync <= '0';
+          nxt_stimuli_src_out.data(g_data_w-1 DOWNTO 0) <= TO_SVEC( integer(round( c_sine_amplitude * sin(stimuli_count) )), g_data_w);
+          stimuli_count<=stimuli_count+c_sine_time_step;
+          WAIT FOR 5 ns;
+        END LOOP;
+      END LOOP;  
+    END IF;
 
+    -- Generate pseudo random noise 
+    IF g_stimuli_mode="random" THEN
+      nxt_stimuli_src_out.valid <= '1';
+      FOR I IN 0 TO g_nof_sync-1 LOOP
+        random_data <= (OTHERS=>'0');
+        nxt_stimuli_src_out.sync <= '1'; -- cycle 0
+        WAIT FOR 5 ns;
+        FOR j IN 1 TO g_nof_data_per_sync-1 LOOP
+          nxt_stimuli_src_out.sync <= '0';
+          random_data <=  func_common_random(random_data);
+          nxt_stimuli_src_out.data(g_data_w-1 DOWNTO 0) <= random_data; --all g_nof_data_per_sync cycles
+          WAIT FOR 5 ns;
+        END LOOP;
+      END LOOP;     
+    END IF;
+
+    stimuli_done <= '1';
     proc_common_wait_some_cycles(dp_clk, 50);
     tb_end <= '1';
     WAIT;
   END PROCESS;
- 
+
+  p_stimuli_src_out: PROCESS(dp_rst, dp_clk) IS
+  BEGIN
+    IF dp_rst='1' THEN
+      stimuli_src_out <= c_dp_sosi_rst;
+    ELSIF RISING_EDGE(dp_clk) THEN
+      stimuli_src_out <= nxt_stimuli_src_out;
+    END IF;
+  END PROCESS;
+
+  -- signal to verify histogram by eye in the wave window
+  stimuli_data <= stimuli_src_out.data(g_data_w-1 DOWNTO 0);
+
 
   ----------------------------------------------------------------------------
   -- st_histogram
@@ -149,7 +249,8 @@ BEGIN
   GENERIC MAP(
     g_data_w            => g_data_w,
     g_nof_bins          => g_nof_bins,
-    g_nof_data_per_sync => g_nof_data_per_sync
+    g_nof_data_per_sync => g_nof_data_per_sync,
+    g_data_type         => g_data_type
   )
   PORT MAP (
     dp_clk       => dp_clk,           
@@ -166,55 +267,103 @@ BEGIN
   -- . The table below shows what RAM we are reading here ('RAM read') via the
   --   ram_mosi/miso interface, and what the expected RAM contents are.
   --                                         
-  ---+-------------+-------------+----------+--------------+ 
-  -- | Sync period | RAM written | RAM read | RAM contents |
-  -- +-------------+-------------+----------+--------------+
-  -- | 0           | 0           | 1        | 256 * 0      |
-  -- | 1           | 1           | 0        | 256 * 12     |
-  -- | 2           | 0           | 1        | 256 * 12     |
-  -- | 3           | 1           | 0        | 256 * 12     |
-  -- +-------------+-------------+----------+--------------+
+  -- Counter data (the same every sync excl. sync 0):
+  ---+-------------+-------------+----------+-----------------------+ 
+  -- | Sync period | RAM written | RAM read | RAM contents          |
+  -- +-------------+-------------+----------+-----------------------+
+  -- | 0           | 0           | 1        | 256 addresses *  0    |
+  -- | 1           | 1           | 0        | 256 addresses * 12    |
+  -- | 2           | 0           | 1        | 256 addresses * 12    |
+  -- | 3           | 1           | 0        | 256 addresses * 12    |
+  -- +-------------+-------------+----------+-----------------------+
   -- 
+  -- DC data (increments level every sync: 0, 1, 2, 3, ..):
+  ---+-------------+-------------+----------+-----------------------+ 
+  -- | Sync period | RAM written | RAM read | RAM contents          |
+  -- +-------------+-------------+----------+-----------------------+
+  -- | 0           | 0           | 1        | 256 addresses *  0    |
+  -- | 1           | 1           | 0        | Addr 1: 1024, others 0|
+  -- | 2           | 0           | 1        | Addr 2: 1024, others 0|
+  -- | 3           | 1           | 0        | Addr 3: 1024, others 0|
+  -- +-------------+-------------+----------+-----------------------+
   ----------------------------------------------------------------------------
   -- Perform MM read and put result in ram_rd_word
   p_verify_mm_read : PROCESS
   BEGIN
     st_histogram_ram_mosi.wr <= '0';
     FOR i IN 0 TO g_nof_sync-1 LOOP
-      proc_common_wait_until_high(dp_clk, stimuli_src_out.sync);  -- Wait for sync
-      proc_common_wait_some_cycles(dp_clk, 10); -- give it a couple of more cycles
+      proc_common_wait_until_high(dp_clk, stimuli_src_out.sync);
+      proc_common_wait_some_cycles(dp_clk, 10);
       FOR j IN 0 TO g_nof_bins-1 LOOP
         proc_mem_mm_bus_rd(j, dp_clk, st_histogram_ram_mosi); 
-        ram_rd_word <= st_histogram_ram_miso.rddata(c_ram_dat_w-1 DOWNTO 0);
-        ram_rd_word_int <= TO_UINT(ram_rd_word);
       END LOOP;
     END LOOP;
+    WAIT;
   END PROCESS;
 
-  -- Register st_histogram_ram_miso.rdval so we read only valid ram_rd_word
-  p_nxt_ram_rd_word_valid : PROCESS(dp_rst, dp_clk)
-  BEGIN
-   IF dp_rst = '1' THEN
-      ram_rd_word_valid <= '0';     
-    ELSIF RISING_EDGE(dp_clk) THEN
-      ram_rd_word_valid <= nxt_ram_rd_word_valid;
-    END IF;
-  END PROCESS;
-  nxt_ram_rd_word_valid <= st_histogram_ram_miso.rdval;
+  -- Help signals that contain the histogram bins+data
+  histogram_bin_unsigned <= TO_UINT(              prv_st_histogram_ram_mosi.address(c_ram_adr_w-1 DOWNTO 0));
+  histogram_bin_signed   <= TO_SINT(offset_binary(prv_st_histogram_ram_mosi.address(c_ram_adr_w-1 DOWNTO 0)));
+  histogram_data         <= TO_UINT(st_histogram_ram_miso.rddata(c_ram_dat_w-1 DOWNTO 0)) WHEN st_histogram_ram_miso.rdval='1'ELSE 0;
+  histogram_valid        <= st_histogram_ram_miso.rdval;
 
   -- Perform verification of ram_rd_word when ram_rd_word_valid
   p_verify_assert : PROCESS
   BEGIN
+    verification_done <= '0';
     FOR i IN 0 TO g_nof_sync-1 LOOP
+      sum_of_bins <= 0;
       proc_common_wait_until_high(dp_clk, stimuli_src_out.sync);  
-      proc_common_wait_until_high(dp_clk, ram_rd_word_valid);      
-      IF i=0 THEN -- Sync period 0: we expect RAM to contain zeros
-        ASSERT ram_rd_word_int=0                      REPORT "RAM contains wrong bin count (expected 0, actual " & INTEGER'IMAGE(ram_rd_word_int) & ")" SEVERITY ERROR;
-      ELSE -- Sync period 1 onwards
-        ASSERT ram_rd_word_int=c_expected_ram_content REPORT "RAM contains wrong bin count (expected " & INTEGER'IMAGE(c_expected_ram_content) & ", actual " & INTEGER'IMAGE(ram_rd_word_int) & ")" SEVERITY ERROR;
+      FOR j IN 0 TO g_nof_bins-1 LOOP
+        proc_common_wait_until_high(dp_clk, histogram_valid);
+        IF i=0 THEN -- Sync period 0: we expect RAM to contain zeros
+          ASSERT histogram_data=0 REPORT "RAM contains wrong bin count (expected 0, actual " & INTEGER'IMAGE(histogram_data) & ")" SEVERITY ERROR;
+        ELSE -- Sync period 1 onwards
+          IF g_stimuli_mode="counter" THEN
+            -- Counter data: bin values remain the same every sync
+            ASSERT histogram_data=c_expected_ram_content_counter REPORT "RAM contains wrong bin count (expected " & INTEGER'IMAGE(c_expected_ram_content_counter) & ", actual " & INTEGER'IMAGE(histogram_data) & ")" SEVERITY ERROR;
+          ELSIF g_stimuli_mode="dc" THEN
+            -- DC data: DC level increments every sync 
+            IF j=(i/c_nof_levels_per_bin) THEN -- Check bin address and account for multiple levels per bin
+              -- this address (j) should contain the DC level total count of this sync period (i)
+              ASSERT histogram_data=g_nof_data_per_sync REPORT "RAM contains wrong bin count (expected " & INTEGER'IMAGE(g_nof_data_per_sync) & ", actual " & INTEGER'IMAGE(histogram_data) & ")" SEVERITY ERROR;
+            ELSE
+              -- this address should contain zero
+              ASSERT histogram_data=0 REPORT "RAM contains wrong bin count (expected 0, actual " & INTEGER'IMAGE(histogram_data) & ")" SEVERITY ERROR;
+            END IF;
+          END IF;
+        END IF;
+        sum_of_bins<=sum_of_bins+histogram_data; -- Keep the sum of all bins  
+        WAIT FOR 5 ns; 
+      END LOOP;   
+
+      -- Check the sum of all bins
+      IF i>0 THEN -- Skip sync 0 (histogram still all zeros)
+        ASSERT sum_of_bins=g_nof_data_per_sync REPORT "Sum of bins not equal to g_nof_data_per_sync (expected " & INTEGER'IMAGE(g_nof_data_per_sync) & ", actual " & INTEGER'IMAGE(sum_of_bins) & ")" SEVERITY ERROR;
       END IF;
+
     END LOOP;
-    WAIT FOR 5 ns;
+    verification_done <= '1'; --We have blocking proc_common_wait_until_high procedures above so we need to know if we make it here.
+    WAIT;
   END PROCESS;
-  
+
+  -- Check if verification was done at all
+  p_check_verification_done : PROCESS
+  BEGIN
+    proc_common_wait_until_high(dp_clk, stimuli_done);
+    proc_common_wait_some_cycles(dp_clk, 50);
+    ASSERT verification_done='1' REPORT "Verification failed" SEVERITY ERROR;
+    WAIT;
+  END PROCESS;
+
+  -- Register MOSI to store the read address
+  p_clk: PROCESS(dp_rst, dp_clk) IS
+  BEGIN
+    IF dp_rst = '1' THEN
+      prv_st_histogram_ram_mosi <= c_mem_mosi_rst;
+    ELSIF RISING_EDGE(dp_clk) THEN
+      prv_st_histogram_ram_mosi <= st_histogram_ram_mosi;
+    END IF;
+  END PROCESS;
+ 
 END tb;
diff --git a/libraries/dsp/st/tb/vhdl/tb_tb_st_histogram.vhd b/libraries/dsp/st/tb/vhdl/tb_tb_st_histogram.vhd
index 12b4b26876a53513ce6aff00a12b0ab19d15f05f..b278c2c124e1efb150531eee2e6301bf1ff2e232 100644
--- a/libraries/dsp/st/tb/vhdl/tb_tb_st_histogram.vhd
+++ b/libraries/dsp/st/tb/vhdl/tb_tb_st_histogram.vhd
@@ -25,7 +25,9 @@
 -- Usage
 -- . as 8
 -- . run -all 
--- . Testbenches are self-checking
+-- . Testbenches are self-checking.
+-- . The sine wave test benches are best for verification by eye in the wave window.
+--   . tb_st_histogram uses a sine wave as input by default
 
 LIBRARY IEEE;
 USE IEEE.std_logic_1164.ALL;
@@ -37,16 +39,47 @@ ARCHITECTURE tb OF tb_tb_st_histogram IS
   SIGNAL tb_end : STD_LOGIC := '0';  -- declare tb_end to avoid 'No objects found' error on 'when -label tb_end'
 BEGIN
   
---  g_nof_sync             : NATURAL := 4;
---  g_data_w               : NATURAL := 8;
---  g_nof_bins             : NATURAL := 256;
---  g_nof_data             : NATURAL := 1024;
+--  g_nof_sync     : NATURAL := 4;
+--  g_data_w       : NATURAL := 8;
+--  g_nof_bins     : NATURAL := 256;
+--  g_nof_data     : NATURAL := 1024;
+--  g_stimuli_mode : STRING  := "dc";
+--  g_data_type    : STRING  := "unsigned";
+--  g_lock_sine    : BOOLEAN := TRUE
 
-u_tb_st_histogram_0 : ENTITY work.tb_st_histogram GENERIC MAP ( 7,  8,  256, 1024); -- Incoming data wraps (repeats) 1024/ 256= 4 times: Bin count =  4
-u_tb_st_histogram_1 : ENTITY work.tb_st_histogram GENERIC MAP ( 6, 10,  256, 4096); -- Incoming data wraps (repeats) 4096/ 256=16 times: Bin count = 16
-u_tb_st_histogram_2 : ENTITY work.tb_st_histogram GENERIC MAP ( 5, 12,  512, 4096); -- Incoming data wraps (repeats) 4096/ 512= 8 times: Bin count =  8
-u_tb_st_histogram_3 : ENTITY work.tb_st_histogram GENERIC MAP ( 4, 13, 1024, 8192); -- Incoming data wraps (repeats) 8192/1024= 8 times: Bin count =  8
-u_tb_st_histogram_4 : ENTITY work.tb_st_histogram GENERIC MAP (40,  6,   64,  128); -- Incoming data wraps (repeats)  128/  64= 2 times: Bin count =  2
+-- Counter data
+u_tb_st_histogram_0 : ENTITY work.tb_st_histogram GENERIC MAP ( 7,  8,  256, 1024, "counter", "unsigned"); -- Incoming data repeats 1024/ 256= 4 times: Bin count =  4
+u_tb_st_histogram_1 : ENTITY work.tb_st_histogram GENERIC MAP ( 6, 10,  256, 4096, "counter", "unsigned"); -- Incoming data repeats 4096/ 256=16 times: Bin count = 16
+u_tb_st_histogram_2 : ENTITY work.tb_st_histogram GENERIC MAP ( 5, 12,  512, 4096, "counter", "unsigned"); -- Incoming data repeats 4096/ 512= 8 times: Bin count =  8
+u_tb_st_histogram_3 : ENTITY work.tb_st_histogram GENERIC MAP ( 4, 13, 1024, 8192, "counter", "unsigned"); -- Incoming data repeats 8192/1024= 8 times: Bin count =  8
+u_tb_st_histogram_4 : ENTITY work.tb_st_histogram GENERIC MAP (20,  6,   64,  128, "counter", "unsigned"); -- Incoming data repeats  128/  64= 2 times: Bin count =  2
 
+-- DC signal
+u_tb_st_histogram_5 : ENTITY work.tb_st_histogram GENERIC MAP ( 2,  8,  256, 1000, "dc", "unsigned");
+u_tb_st_histogram_6 : ENTITY work.tb_st_histogram GENERIC MAP ( 6, 10,  256, 4000, "dc", "unsigned");
+u_tb_st_histogram_7 : ENTITY work.tb_st_histogram GENERIC MAP ( 5, 12,  512, 4000, "dc", "unsigned");
+u_tb_st_histogram_8 : ENTITY work.tb_st_histogram GENERIC MAP ( 4, 13, 1024, 8000, "dc", "unsigned");
+u_tb_st_histogram_9 : ENTITY work.tb_st_histogram GENERIC MAP (11,  6,   64,  100, "dc", "unsigned");
+
+-- Locked Sine wave
+u_tb_st_histogram_10: ENTITY work.tb_st_histogram GENERIC MAP ( 4,  3,    8,   20, "sine", "signed");
+u_tb_st_histogram_11: ENTITY work.tb_st_histogram GENERIC MAP ( 8,  6,   64,  200, "sine", "signed");
+u_tb_st_histogram_12: ENTITY work.tb_st_histogram GENERIC MAP (12,  8,  256, 2000, "sine", "signed");
+u_tb_st_histogram_13: ENTITY work.tb_st_histogram GENERIC MAP (17, 10,  256, 3455, "sine", "signed");
+u_tb_st_histogram_14: ENTITY work.tb_st_histogram GENERIC MAP (21, 14, 1024, 8111, "sine", "signed");
+
+-- Drifting Sine wave
+u_tb_st_histogram_15: ENTITY work.tb_st_histogram GENERIC MAP ( 4,  3,    8,   20, "sine", "signed", FALSE);
+u_tb_st_histogram_16: ENTITY work.tb_st_histogram GENERIC MAP ( 8,  6,   64,  200, "sine", "signed", FALSE);
+u_tb_st_histogram_17: ENTITY work.tb_st_histogram GENERIC MAP (12,  8,  256, 2000, "sine", "signed", FALSE);
+u_tb_st_histogram_18: ENTITY work.tb_st_histogram GENERIC MAP (17, 10,  256, 3455, "sine", "signed", FALSE);
+u_tb_st_histogram_19: ENTITY work.tb_st_histogram GENERIC MAP (21, 14, 1024, 8111, "sine", "signed", FALSE);
+
+-- Random 
+u_tb_st_histogram_20: ENTITY work.tb_st_histogram GENERIC MAP ( 4,  3,    8,   20, "random", "signed");
+u_tb_st_histogram_21: ENTITY work.tb_st_histogram GENERIC MAP ( 6,  6,   64,  200, "random", "signed");
+u_tb_st_histogram_22: ENTITY work.tb_st_histogram GENERIC MAP ( 9,  8,  256, 2000, "random", "signed");
+u_tb_st_histogram_23: ENTITY work.tb_st_histogram GENERIC MAP (17, 10,  256, 3455, "random", "signed");
+u_tb_st_histogram_24: ENTITY work.tb_st_histogram GENERIC MAP (13, 14, 1024, 8111, "random", "signed");
 
 END tb;