diff --git a/libraries/dsp/st/src/vhdl/mmp_st_histogram.vhd b/libraries/dsp/st/src/vhdl/mmp_st_histogram.vhd
index 98208bb34fca6202340cfc3ba3a3a26ec1b7e601..02b5005874fdec28e95da2cd3e6a09bb91cdc231 100644
--- a/libraries/dsp/st/src/vhdl/mmp_st_histogram.vhd
+++ b/libraries/dsp/st/src/vhdl/mmp_st_histogram.vhd
@@ -21,9 +21,17 @@
 -- Author: 
 -- . Daniel van der Schuur 
 -- Purpose:
--- . MMS-wrapper that adds MM clock domain RAM readout and and multi-instance 
+-- . MMP-wrapper that adds MM clock domain RAM readout and and multi-instance 
 --   support to st_histogram.
 -- Description:
+-- . Adds logic to move st_histogram RAM contents into the dual clock RAM for
+--   readout in MM clock domain.
+-- . Per instance there are at least (or more dependent on g_nof_bins) two
+--   block RAM:
+--   . one dual page block RAM in st_histogram in the dp_clk domain that
+--     accumulate or hold the bin values for every sync interval,
+--   . one dual clock block RAM here to provide the read access to the
+--     page with the hold bin values via the mm_clk domain.
 
 
 LIBRARY IEEE, common_lib, mm_lib, technology_lib, dp_lib;
diff --git a/libraries/dsp/st/src/vhdl/st_histogram.vhd b/libraries/dsp/st/src/vhdl/st_histogram.vhd
index 7be95283cb06c4a3dfec6f8ec8ab451a4061b2cc..2d32186cb7e0452ca7a0b151030fe87224c44a6f 100644
--- a/libraries/dsp/st/src/vhdl/st_histogram.vhd
+++ b/libraries/dsp/st/src/vhdl/st_histogram.vhd
@@ -70,7 +70,8 @@
 -- . The ram_mosi input applies to the RAM page that is inactive (not
 --   being written to from data path) *at that time*. The user should take care to
 --   time these controls such that the active RAM page does not swap before these
---   operation (ram_mosi readout) has finished.
+--   operation (ram_mosi readout) has finished, otherwise the read histogram will
+--   contain data from both the current and the previous sync periods.
 -- Remarks:
 -- . The RAM block we use basically needs 3 ports:
 --   1 - read port in dp_clk domain to read current bin value
@@ -101,8 +102,8 @@ USE technology_lib.technology_select_pkg.ALL;
 ENTITY st_histogram IS
   GENERIC (
     g_data_w            : NATURAL := 8;
-    g_nof_bins          : NATURAL := 256;  -- <= 2^g_data_w
-    g_nof_data_per_sync : NATURAL := 1024; -- >= g_nof_bins
+    g_nof_bins          : NATURAL := 256;  -- <= 2^g_data_w (having more bins than possible values is not useful)
+    g_nof_data_per_sync : NATURAL := 1024; 
     g_data_type         : STRING  := "unsigned" -- unsigned or signed
   );
   PORT (            
@@ -144,8 +145,6 @@ ARCHITECTURE rtl OF st_histogram IS
   -------------------------------------------------------------------------------
   -- ram_pointer
   -------------------------------------------------------------------------------
-  SIGNAL toggle_ram_pointer     : STD_LOGIC;
-  SIGNAL nxt_toggle_ram_pointer : STD_LOGIC;
   SIGNAL ram_pointer            : STD_LOGIC;
   SIGNAL prv_ram_pointer        : STD_LOGIC;
 
@@ -210,25 +209,37 @@ BEGIN
   -------------------------------------------------------------------------------
   -- Select range from snk_in.data and interpret as (un)signed
   -------------------------------------------------------------------------------
-  gen_signed: IF g_data_type="signed" GENERATE
-    snk_in_data <= offset_binary(snk_in.data(g_data_w-1 DOWNTO c_adr_low));
-  END GENERATE;
-
   gen_unsigned: IF g_data_type/="signed" GENERATE
     snk_in_data <= snk_in.data(g_data_w-1 DOWNTO c_adr_low);
   END GENERATE;
 
+  -- Use offset_binary() from common_pkg.vhd, to swap the lower half and
+  -- upper half of the bins in case the input data is signed. The signed
+  -- input values can be two-complement or offset binary, dependent on how
+  -- they were sampled by the ADC or generated by an waveform generator.
+  -- The difference is in the details. For example with 8 bit data:
+  --
+  --                          bin:    0    127    128      255
+  --  signed two-complement value: -128     -1      0     +127
+  --  signed offset binary value:  -127.5   -0.5   +0.5   +127.5
+  --  unsigned value:                 0    127    128      255
+  gen_signed: IF g_data_type="signed" GENERATE
+    snk_in_data <= offset_binary(snk_in.data(g_data_w-1 DOWNTO c_adr_low));
+  END GENERATE;
+
 
   -------------------------------------------------------------------------------
   -- Slightly reduce the incoming data to prevent simultineous read/write
   -- . Take out every 2nd and 3rd duplicate data value (set valid='0')
   -- . Put the number of duplicates in the channel field to be applied downstream
-  -- . With a RAM read->write latency of 3 cycles, we need a shift register of 4
-  --   words (0,1,2,3) deep to prevent simultaneous read/writes on the RAM.
+  -- . With a RAM read->write latency of 3 cycles (c_ram_rd_wr_latency), we need 
+  --   a shift register of 4 words (0,1,2,3) deep to prevent simultaneous 
+  --   read/writes on the RAM.
   --   . Element 3 is only and output register
   -- . A sequence of duplicate data could cross a sync period:
   --   . We need to stop&restart counting duplicates on a sync, don't count
-  --     across sync periods
+  --     across sync periods to ensure exactly correct bin values in each sync 
+  --     interval
   --     . We can still get a read on cycle n and a write on cycle n+2 on the 
   --       same address, but that does not matter as the read,write will be on
   --       different RAM blocks (1 RAM block per sync period).
@@ -299,13 +310,11 @@ BEGIN
   -- Registers
   p_snk_in_reg_arr: PROCESS(dp_clk, dp_rst) IS
   BEGIN
-    FOR i IN 0 TO c_shiftreg_depth-1 LOOP
-      IF dp_rst = '1' THEN
-        snk_in_reg_arr(i) <= c_dp_sosi_rst;
-      ELSIF RISING_EDGE(dp_clk) THEN
-        snk_in_reg_arr(i) <= nxt_snk_in_reg_arr(i);
-      END IF;
-    END LOOP;
+    IF dp_rst = '1' THEN
+      snk_in_reg_arr <= (OTHERS => c_dp_sosi_rst);
+    ELSIF RISING_EDGE(dp_clk) THEN
+      snk_in_reg_arr <= nxt_snk_in_reg_arr;
+    END IF;
   END PROCESS;
 
 
@@ -320,18 +329,14 @@ BEGIN
   p_ram_pointer : PROCESS(dp_rst, dp_clk) IS
   BEGIN
     IF dp_rst='1' THEN
-      prv_ram_pointer    <= '0';
-      toggle_ram_pointer <= '0';
+      prv_ram_pointer    <= '1';
     ELSIF RISING_EDGE(dp_clk) THEN
-      toggle_ram_pointer <= nxt_toggle_ram_pointer;
       prv_ram_pointer    <= ram_pointer;
     END IF;
   END PROCESS;
 
-  -- Don't toggle the RAM pointer on the first sync as we're already reading the RAM at that point.
-  nxt_toggle_ram_pointer <= '1' WHEN snk_in_reg.sync='1' ELSE toggle_ram_pointer;
-  -- Toggle the RAM pointer starting from 2nd sync onwards
-  ram_pointer <= NOT prv_ram_pointer WHEN snk_in_reg.sync='1' AND toggle_ram_pointer='1' ELSE prv_ram_pointer;
+  -- Toggle the RAM pointer on the sync
+  ram_pointer <= NOT prv_ram_pointer WHEN snk_in_reg.sync='1' ELSE prv_ram_pointer;
 
 
   -------------------------------------------------------------------------------
diff --git a/libraries/dsp/st/tb/vhdl/tb_mmp_st_histogram.vhd b/libraries/dsp/st/tb/vhdl/tb_mmp_st_histogram.vhd
index e2e34b149bcf1a1d3fe32bcdd06df094f788dfa4..98fbed5994737e5efeaa70f15e89bd0f9c467770 100644
--- a/libraries/dsp/st/tb/vhdl/tb_mmp_st_histogram.vhd
+++ b/libraries/dsp/st/tb/vhdl/tb_mmp_st_histogram.vhd
@@ -31,6 +31,9 @@
 -- . run -a
 -- Description:
 -- . This TB is self checking and stops after g_nof_sync test iterations.
+-- . This TB only checks the MM aspects of mmp_st_histogram with limited (counter
+--   data) stimuli and verification. Details of st_histogram are thoroughly 
+--   checked in tb_tb_st_histogram.
 -------------------------------------------------------------------------------
 
 LIBRARY IEEE, common_lib, mm_lib, dp_lib;
@@ -49,8 +52,8 @@ ENTITY tb_mmp_st_histogram IS
     g_nof_instances     : NATURAL := 12;
     g_data_w            : NATURAL := 14;
     g_nof_bins          : NATURAL := 512;
-    g_nof_data_per_sync : NATURAL := 32768 
-  );
+    g_nof_data_per_sync : NATURAL := 16384 -- g_nof_data_per_sync/g_nof_bins should be integer so
+  );                                       -- counter data yields the same histogram in each bin
 END tb_mmp_st_histogram;
 
 
@@ -64,25 +67,22 @@ ARCHITECTURE tb OF tb_mmp_st_histogram IS
 
   SIGNAL dp_clk            : STD_LOGIC := '1';
   SIGNAL dp_rst            : STD_LOGIC;
-
   SIGNAL mm_clk            : STD_LOGIC := '1';
   SIGNAL mm_rst            : STD_LOGIC;
-
   SIGNAL tb_end            : STD_LOGIC := '0';
 
   ----------------------------------------------------------------------------
   -- stimuli
   ----------------------------------------------------------------------------
-  SIGNAL stimuli_en : STD_LOGIC := '1';
-
+  SIGNAL stimuli_en      : STD_LOGIC := '1';
   SIGNAL stimuli_src_out : t_dp_sosi;
   SIGNAL stimuli_src_in  : t_dp_siso;
+  SIGNAL stimuli_done    : STD_LOGIC;
    
   ----------------------------------------------------------------------------
   -- st_histogram
   ----------------------------------------------------------------------------
   SIGNAL st_histogram_snk_in_arr : t_dp_sosi_arr(g_nof_instances-1 DOWNTO 0);
-
   SIGNAL st_histogram_ram_copi   : t_mem_copi;
   SIGNAL st_histogram_ram_cipo   : t_mem_cipo;
 
@@ -90,16 +90,14 @@ ARCHITECTURE tb OF tb_mmp_st_histogram IS
   ----------------------------------------------------------------------------
   -- Readout & verification
   ----------------------------------------------------------------------------
-  CONSTANT c_ram_dat_w : NATURAL := ceil_log2(g_nof_data_per_sync+1);
-
+  CONSTANT c_ram_dat_w            : NATURAL := ceil_log2(g_nof_data_per_sync+1);
   CONSTANT c_expected_ram_content : NATURAL := g_nof_data_per_sync/g_nof_bins;
 
-  SIGNAL ram_filling : STD_LOGIC;
-
   SIGNAL ram_rd_word           : STD_LOGIC_VECTOR(c_ram_dat_w-1 DOWNTO 0);
   SIGNAL ram_rd_word_int       : NATURAL;
   SIGNAL ram_rd_word_valid     : STD_LOGIC;
   SIGNAL nxt_ram_rd_word_valid : STD_LOGIC;
+  SIGNAL verification_done     : STD_LOGIC;
 
 BEGIN 
   
@@ -114,7 +112,7 @@ BEGIN
  
 
   ----------------------------------------------------------------------------
-  -- DP Stimuli: generate st_histogram input data
+  -- DP Stimuli: generate st_histogram input (counter) data
   ---------------------------------------------------------------------------- 
   stimuli_src_in <= c_dp_siso_rdy;
 
@@ -122,6 +120,7 @@ BEGIN
   p_generate_packets : PROCESS
     VARIABLE v_sosi : t_dp_sosi := c_dp_sosi_rst;
   BEGIN
+    stimuli_done    <= '0';
     stimuli_src_out <= c_dp_sosi_rst;
     proc_common_wait_until_low(dp_clk, dp_rst);
     proc_common_wait_some_cycles(dp_clk, 5);
@@ -129,9 +128,11 @@ BEGIN
     FOR I IN 0 TO g_nof_sync-1 LOOP
       v_sosi.sync    := '1';
       v_sosi.data    := RESIZE_DP_DATA(v_sosi.data(g_data_w-1 DOWNTO 0));  -- wrap when >= 2**g_data_w    
+      -- Generate a block of counter data
       proc_dp_gen_block_data(g_data_w, TO_UINT(v_sosi.data), g_nof_data_per_sync, TO_UINT(v_sosi.channel), TO_UINT(v_sosi.err), v_sosi.sync, v_sosi.bsn, dp_clk, stimuli_en, stimuli_src_in, stimuli_src_out);
     END LOOP;     
 
+    stimuli_done <= '1';
     proc_common_wait_some_cycles(dp_clk, 50);
     tb_end <= '1';
     WAIT;
@@ -174,20 +175,18 @@ BEGIN
     st_histogram_ram_copi <= c_mem_copi_rst;
     ram_rd_word <= (OTHERS=>'0');
 
-     -- The first sync indicates start of incoming data - let it pass
-     proc_common_wait_until_high(dp_clk, stimuli_src_out.sync);
-     proc_common_wait_some_cycles(mm_clk, 10);
-     FOR i IN 0 TO g_nof_sync-2 LOOP 
-       -- Wiat for a full sync period of data
-       proc_common_wait_until_high(dp_clk, stimuli_src_out.sync);
-
-     -- Read out the RAM contents
-     FOR k IN 0 TO g_nof_bins-1 LOOP
-       proc_mem_mm_bus_rd(k, mm_clk, st_histogram_ram_copi);
-       ram_rd_word <= st_histogram_ram_cipo.rddata(c_ram_dat_w-1 DOWNTO 0);
-       ram_rd_word_int <= TO_UINT(ram_rd_word);
-     END LOOP;
-
+    -- The first sync indicates start of incoming data - let it pass
+    proc_common_wait_until_high(dp_clk, stimuli_src_out.sync);
+    proc_common_wait_some_cycles(mm_clk, 10);
+    FOR i IN 0 TO g_nof_sync-2 LOOP 
+      -- Wait for a full sync period of data
+      proc_common_wait_until_high(dp_clk, stimuli_src_out.sync);
+      -- Read out the RAM contents
+      FOR k IN 0 TO g_nof_bins-1 LOOP
+        proc_mem_mm_bus_rd(k, mm_clk, st_histogram_ram_copi);
+        ram_rd_word <= st_histogram_ram_cipo.rddata(c_ram_dat_w-1 DOWNTO 0);
+        ram_rd_word_int <= TO_UINT(ram_rd_word);
+      END LOOP;
    END LOOP;
   END PROCESS;
 
@@ -208,6 +207,7 @@ BEGIN
   ----------------------------------------------------------------------------
   p_verify_assert : PROCESS
   BEGIN
+    verification_done <= '0';
     FOR i IN 0 TO g_nof_sync-1 LOOP
       proc_common_wait_until_high(dp_clk, stimuli_src_out.sync);  
       proc_common_wait_until_high(dp_clk, ram_rd_word_valid);      
@@ -218,6 +218,16 @@ BEGIN
       END IF;
     END LOOP;
     WAIT FOR 5 ns;
+    verification_done <= '1'; --We have blocking proc_common_wait_until_high procedures above so we need to know if we make it here.
+  END PROCESS;
+
+  -- Check if verification was done at all
+  p_check_verification_done : PROCESS
+  BEGIN
+    proc_common_wait_until_high(dp_clk, stimuli_done);
+    proc_common_wait_some_cycles(dp_clk, 50);
+    ASSERT verification_done='1' REPORT "Verification failed" SEVERITY ERROR;
+    WAIT;
   END PROCESS;
 
 
diff --git a/libraries/dsp/st/tb/vhdl/tb_st_histogram.vhd b/libraries/dsp/st/tb/vhdl/tb_st_histogram.vhd
index 34f8314d4ff9542bda2c9dce40247e3edad7157c..6e84b4be217440180bbdeb989ab5e40eff2895f8 100644
--- a/libraries/dsp/st/tb/vhdl/tb_st_histogram.vhd
+++ b/libraries/dsp/st/tb/vhdl/tb_st_histogram.vhd
@@ -49,8 +49,12 @@
 --     . RAM clearing completes just before the end of each sync interval.
 -- . Automatic verification - in each sync period:
 --   . the RAM contents are read out via ram_mosi/miso and compared to the 
---     expected bin counts.
+--     expected bin counts. This is done only for g_stimuli_mode = counter 
+--     and dc because that is sufficient and easily done automatically. 
+--     . The counter mode yields the same value in all bins
+--     . DC mode yields max value in one bin and zero in other bins.
 --   . the sum of all bins is checked against the expected g_nof_data_per_sync.
+--     . this is done for all modes 'counter', 'dc', 'random' and 'sine'.
 -------------------------------------------------------------------------------
 
 LIBRARY IEEE, common_lib, mm_lib, dp_lib;
@@ -71,7 +75,7 @@ ENTITY tb_st_histogram IS
     g_nof_sync          : NATURAL := 4;        -- We're simulating at least 4 g_nof_sync so both RAMs are written and cleared twice.
     g_data_w            : NATURAL := 3;        -- Determines maximum number of bins (2^g_data_w)
     g_nof_bins          : NATURAL := 8;        -- Lower than or equal to 2^g_data_w. Higher is allowed but makes no sense.
-    g_nof_data_per_sync : NATURAL := 20;       -- Determines max required RAM data width. e.g. 11b to store max bin count '1024'.
+    g_nof_data_per_sync : NATURAL := 20;       -- >= g_nof_bins. Determines max required RAM data width. e.g. 11b to store max bin count '1024'.
     g_stimuli_mode      : STRING  := "sine";   -- "counter", "dc", "sine" or "random"
     g_data_type         : STRING  := "signed"; -- use "signed" if g_stimuli_mode="sine"
     g_lock_sine         : BOOLEAN := TRUE      -- TRUE to lock the sine wave to Sync - produces sparse histogram with low number of non-zero samples (occuring 2*c_sine_nof_periods)
@@ -300,8 +304,8 @@ BEGIN
   -- Help signals that contain the histogram bins+data
   histogram_bin_unsigned <= TO_UINT(              prv_st_histogram_ram_mosi.address(c_ram_adr_w-1 DOWNTO 0));
   histogram_bin_signed   <= TO_SINT(offset_binary(prv_st_histogram_ram_mosi.address(c_ram_adr_w-1 DOWNTO 0)));
-  histogram_data  <= TO_UINT(st_histogram_ram_miso.rddata(c_ram_dat_w-1 DOWNTO 0)) WHEN st_histogram_ram_miso.rdval='1'ELSE 0;
-  histogram_valid <= st_histogram_ram_miso.rdval;
+  histogram_data         <= TO_UINT(st_histogram_ram_miso.rddata(c_ram_dat_w-1 DOWNTO 0)) WHEN st_histogram_ram_miso.rdval='1'ELSE 0;
+  histogram_valid        <= st_histogram_ram_miso.rdval;
 
   -- Perform verification of ram_rd_word when ram_rd_word_valid
   p_verify_assert : PROCESS