diff --git a/libraries/dsp/st/src/vhdl/st_histogram.vhd b/libraries/dsp/st/src/vhdl/st_histogram.vhd
index 1633fa442dd2248a6ba561de9da56f34422300ad..35e76df29b6c5e66a2f12e041317970c0ac04559 100644
--- a/libraries/dsp/st/src/vhdl/st_histogram.vhd
+++ b/libraries/dsp/st/src/vhdl/st_histogram.vhd
@@ -88,7 +88,7 @@ ENTITY st_histogram IS
   GENERIC (
     g_data_w            : NATURAL := 8;
     g_nof_bins          : NATURAL := 256;
-    g_nof_data_per_sync : NATURAL := 1024;
+    g_nof_data_per_sync : NATURAL := 1024; -- must be larger than g_nof_bins
     g_data_type         : STRING  := "unsigned" -- unsigned or signed
   );
   PORT (            
@@ -115,7 +115,14 @@ ARCHITECTURE rtl OF st_histogram IS
   -------------------------------------------------------------------------------
   -- snk_in.data help signal
   -------------------------------------------------------------------------------
-  SIGNAL snk_in_data : STD_LOGIC_VECTOR(g_data_w-1 DOWNTO c_adr_low);
+  SIGNAL snk_in_data : STD_LOGIC_VECTOR(c_ram_adr_w-1 DOWNTO 0);
+
+  -------------------------------------------------------------------------------
+  -- snk_in_reg_arr
+  -------------------------------------------------------------------------------
+  SIGNAL snk_in_reg_arr     : t_dp_sosi_arr(3 DOWNTO 0);
+  SIGNAL nxt_snk_in_reg_arr : t_dp_sosi_arr(3 DOWNTO 0);
+  SIGNAL snk_in_reg         : t_dp_sosi;
 
   -------------------------------------------------------------------------------
   -- ram_pointer
@@ -132,11 +139,12 @@ ARCHITECTURE rtl OF st_histogram IS
   SIGNAL bin_reader_miso     : t_mem_miso;
 
   SIGNAL prv_bin_reader_mosi : t_mem_mosi;
-
   -------------------------------------------------------------------------------
   -- bin_writer
   -------------------------------------------------------------------------------
-  SIGNAL bin_reader_to_writer_mosi : t_mem_mosi;
+  SIGNAL bin_reader_to_writer_mosi      : t_mem_mosi;
+  SIGNAL bin_reader_to_writer_count     : NATURAL;
+  SIGNAL nxt_bin_reader_to_writer_count : NATURAL;
 
   SIGNAL nxt_bin_writer_mosi       : t_mem_mosi;
   SIGNAL bin_writer_mosi           : t_mem_mosi;
@@ -191,6 +199,14 @@ ARCHITECTURE rtl OF st_histogram IS
 
   SIGNAL nxt_ram_clearing      : STD_LOGIC;
 
+
+
+
+SIGNAL debug1 : STD_LOGIC;
+SIGNAL debug2 : STD_LOGIC;
+
+
+
 BEGIN 
 
   -------------------------------------------------------------------------------
@@ -204,13 +220,102 @@ BEGIN
     snk_in_data <= snk_in.data(g_data_w-1 DOWNTO c_adr_low);
   END GENERATE;
 
+
+  -------------------------------------------------------------------------------
+  -- Slightly reduce the incoming data to prevent simultineous read/write
+  -- . Take out every 2nd and 3rd duplicate data value (set valid='0')
+  -- . Put the number of duplicates in the channel field to be applied downstream
+  -- . With a RAM read->write latency of 2 cycles, we need a shift register of 3
+  --   words (0,1,2) deep to prevent simultaneous read/writes on the RAM.
+  -- . A sequence of duplicate data could cross a sync period:
+  --   . We need to stop&restart counting duplicates on a sync, don't count
+  --     across sync periods
+  --     . We can still get a read on cycle n and a write on cycle n+2 on the 
+  --       same address, but that does not matter as the read,write will be on
+  --       different RAM blocks (1 RAM block per sync period).
+  --     . snk_in_reg_arr(0).sync='1' : Don't compare with older snk_in_reg_arr(1)
+  --       and (2)
+  --     . snk_in_reg_arr(1).sync='1' : Don't compare with older (2)
+  --     . snk_in_reg_arr(2).sync='1' : OK to compare with both (1) and (0)
+  -- . Input : snk_in
+  -- . Output: snk_in_reg
+  -------------------------------------------------------------------------------
+  p_nxt_snk_in_reg_arr: PROCESS(snk_in, snk_in_data, snk_in_reg_arr) IS
+  BEGIN
+    FOR i IN 0 TO 3 LOOP
+      nxt_snk_in_reg_arr(i) <= c_dp_sosi_rst;
+    END LOOP;
+
+    IF snk_in.valid='1' THEN
+      -- The base function is a shift register
+      nxt_snk_in_reg_arr(0)      <= snk_in;
+      nxt_snk_in_reg_arr(0).data(c_ram_adr_w-1 DOWNTO 0) <= snk_in_data; -- Use the ranged data
+      nxt_snk_in_reg_arr(1) <= snk_in_reg_arr(0);
+      nxt_snk_in_reg_arr(2) <= snk_in_reg_arr(1);
+      nxt_snk_in_reg_arr(3) <= snk_in_reg_arr(2);
+
+      -- Assign a count of 1 to valid data 
+      nxt_snk_in_reg_arr(0).channel <= TO_DP_CHANNEL(1);
+
+      IF snk_in_reg_arr(2).valid = '1' THEN -- Shift register 0,1,2 filled with valid data
+        -- Overwrite channel field (=count) when duplicate data is found
+        IF snk_in_reg_arr(1).data(g_data_w-1 DOWNTO 0) = snk_in_reg_arr(2).data(g_data_w-1 DOWNTO 0) THEN
+          -- 1=2
+          IF snk_in_reg_arr(1).sync = '0' THEN -- Don't count across sync periods
+            nxt_snk_in_reg_arr(2).valid   <= '0';
+            nxt_snk_in_reg_arr(2).channel <= TO_DP_CHANNEL(0);
+            nxt_snk_in_reg_arr(3).channel <= TO_DP_CHANNEL(2);
+          END IF;
+        END IF;
+        IF snk_in_reg_arr(0).data(g_data_w-1 DOWNTO 0) = snk_in_reg_arr(1).data(g_data_w-1 DOWNTO 0) THEN
+          IF snk_in_reg_arr(0).sync = '0' THEN -- Don't count across sync periods
+            IF snk_in_reg_arr(1).data(g_data_w-1 DOWNTO 0) = snk_in_reg_arr(2).data(g_data_w-1 DOWNTO 0) THEN
+              -- 0=1=2
+              IF snk_in_reg_arr(1).sync = '0' THEN -- Don't count across sync periods
+                nxt_snk_in_reg_arr(1).valid   <= '0';
+                nxt_snk_in_reg_arr(1).channel <= TO_DP_CHANNEL(0);
+                nxt_snk_in_reg_arr(2).valid   <= '0'; 
+                nxt_snk_in_reg_arr(2).channel <= TO_DP_CHANNEL(0);
+                nxt_snk_in_reg_arr(3).channel <= TO_DP_CHANNEL(3);
+              END IF;
+            ELSE
+              -- 0=1
+              -- Do nothing, otherwise we will never see 0=1=2. Instead wait until 0,1 shifted to 1,2.
+            END IF;
+          END IF;
+        ELSIF snk_in_reg_arr(0).data(g_data_w-1 DOWNTO 0) = snk_in_reg_arr(2).data(g_data_w-1 DOWNTO 0) THEN
+          -- 0=2
+          IF snk_in_reg_arr(0).sync = '0' THEN -- Don't count across sync periods
+            nxt_snk_in_reg_arr(1).valid   <= '0';
+            nxt_snk_in_reg_arr(1).channel <= TO_DP_CHANNEL(0);
+            nxt_snk_in_reg_arr(3).channel <= TO_DP_CHANNEL(2);
+          END IF;
+        END IF;
+      END IF;
+    END IF;
+  END PROCESS;
+
+  snk_in_reg <= snk_in_reg_arr(3);
+
+  -- Registers
+  p_snk_in_reg_arr: PROCESS(dp_clk, dp_rst) IS
+  BEGIN
+    FOR i IN 0 TO 3 LOOP
+      IF dp_rst = '1' THEN
+        snk_in_reg_arr(i) <= c_dp_sosi_rst;
+      ELSIF RISING_EDGE(dp_clk) THEN
+        snk_in_reg_arr(i) <= nxt_snk_in_reg_arr(i);
+      END IF;
+    END LOOP;
+  END PROCESS;
+
   -------------------------------------------------------------------------------
   -- ram_pointer: Keep track of what RAM to target
   -- . Target either RAM 0 or 1 per sync period
   -- . RD/WR sides of RAM have shifted sync periods due to rd>wr latency
   --   . e.g. a new sync period is read while an old sync period is written
   --   . Solution: treat the RAM pointer as MS address bit in separate RD/WR buses
-  --   . ram_pointer is synchronous to snk_in.sync
+  --   . ram_pointer is synchronous to snk_in_reg.sync
   -------------------------------------------------------------------------------
   p_ram_pointer : PROCESS(dp_rst, dp_clk) IS
   BEGIN
@@ -224,14 +329,14 @@ BEGIN
   END PROCESS;
 
   -- Don't toggle the RAM pointer on the first sync as we're already reading the RAM at that point.
-  nxt_toggle_ram_pointer <= '1' WHEN snk_in.sync='1' ELSE toggle_ram_pointer;
+  nxt_toggle_ram_pointer <= '1' WHEN snk_in_reg.sync='1' ELSE toggle_ram_pointer;
   -- Toggle the RAM pointer starting from 2nd sync onwards
-  ram_pointer <= NOT prv_ram_pointer WHEN snk_in.sync='1' AND toggle_ram_pointer='1' ELSE prv_ram_pointer;
+  ram_pointer <= NOT prv_ram_pointer WHEN snk_in_reg.sync='1' AND toggle_ram_pointer='1' ELSE prv_ram_pointer;
 
 
   -------------------------------------------------------------------------------
   -- bin_reader : reads bin from RAM, sends bin to bin_writer.
-  -- . Input  : snk_in          (input data stream)
+  -- . Input  : snk_in_reg      (input data stream)
   --            bin_reader_miso (reply to RAM read request: rddata = bin count)
   --            ram_pointer (to put in MOSI buses as MS address bit)
   -- . Output : bin_reader_mosi (RAM read request, address = bin)
@@ -240,16 +345,18 @@ BEGIN
   -- Fetch the bin from RAM
   bin_reader_mosi.wrdata  <= (OTHERS=>'0');
   bin_reader_mosi.wr      <= '0';
-  bin_reader_mosi.rd      <= snk_in.valid;
-  bin_reader_mosi.address <= RESIZE_UVEC(ram_pointer & snk_in_data, c_word_w); 
+  bin_reader_mosi.rd      <= snk_in_reg.valid;
+--  bin_reader_mosi.address <= RESIZE_UVEC(ram_pointer & snk_in_data, c_word_w); 
+  bin_reader_mosi.address <= RESIZE_UVEC(ram_pointer & snk_in_reg.data(c_ram_adr_w-1 DOWNTO 0), c_word_w); 
 
-  -- Store the rd address as bin_writer needs to know where to write the bin count
   p_prv_bin_reader_mosi : PROCESS(dp_clk, dp_rst) IS
   BEGIN
     IF dp_rst = '1' THEN
-      prv_bin_reader_mosi <= c_mem_mosi_rst;
+      prv_bin_reader_mosi        <= c_mem_mosi_rst;
+      bin_reader_to_writer_count <= 0;
     ELSIF RISING_EDGE(dp_clk) THEN
-      prv_bin_reader_mosi  <= bin_reader_mosi;
+      prv_bin_reader_mosi        <= bin_reader_mosi;
+      bin_reader_to_writer_count <= nxt_bin_reader_to_writer_count;
     END IF;
   END PROCESS;
 
@@ -257,17 +364,22 @@ BEGIN
   bin_reader_to_writer_mosi.wr      <= bin_reader_miso.rdval;
   bin_reader_to_writer_mosi.wrdata  <= RESIZE_UVEC(bin_reader_miso.rddata(c_ram_dat_w-1 DOWNTO 0), c_mem_data_w);
   bin_reader_to_writer_mosi.address <= prv_bin_reader_mosi.address;
-
+  nxt_bin_reader_to_writer_count    <= TO_UINT(snk_in_reg.channel); -- Use register (_nxt) to align count with rdval instead of snk_in_reg.valid
 
   -------------------------------------------------------------------------------
   -- bin_writer : Increment the bin, forward write request to bin_arbiter
   -- . Input  : bin_reader_to_writer_mosi (from bin_reader = bin + bin count)
   -- . Output : bin_writer_mosi (to bin_arbiter = bin + incremented bin count)
   -------------------------------------------------------------------------------
+--  bin_writer_mosi.rd      <= '0';
+--  bin_writer_mosi.wr      <= bin_reader_to_writer_mosi.wr;
+--  bin_writer_mosi.address <= bin_reader_to_writer_mosi.address;
+--  bin_writer_mosi.wrdata  <= INCR_UVEC(bin_reader_to_writer_mosi.wrdata, 1) WHEN bin_reader_to_writer_mosi.wr='1' ELSE bin_reader_to_writer_mosi.wrdata; 
+
   nxt_bin_writer_mosi.rd      <= '0';
   nxt_bin_writer_mosi.wr      <= bin_reader_to_writer_mosi.wr;
   nxt_bin_writer_mosi.address <= bin_reader_to_writer_mosi.address;
-  nxt_bin_writer_mosi.wrdata  <= INCR_UVEC(bin_reader_to_writer_mosi.wrdata, 1) WHEN bin_reader_to_writer_mosi.wr='1' ELSE bin_writer_mosi.wrdata; 
+  nxt_bin_writer_mosi.wrdata  <= INCR_UVEC(bin_reader_to_writer_mosi.wrdata, bin_reader_to_writer_count) WHEN bin_reader_to_writer_mosi.wr='1' ELSE bin_writer_mosi.wrdata; 
  
   -- Register the outputs to bin_arbiter (above we have a combinational adder = propagation delay)
   p_bin_writer_mosi : PROCESS(dp_clk, dp_rst) IS
@@ -290,7 +402,7 @@ BEGIN
   --           bin_reader_miso (carries the bins requested by bin_reader)
   -------------------------------------------------------------------------------
   -- Really simple arbitration: always allow writes, only allow reads when possible (rd_addr != wr_addr).
-  read_allowed <= FALSE WHEN bin_writer_mosi.wr='1' AND bin_writer_mosi.address=bin_reader_mosi.address ELSE TRUE;
+  read_allowed <= FALSE WHEN bin_writer_mosi.wr='1' AND bin_reader_mosi.rd='1' AND bin_writer_mosi.address=bin_reader_mosi.address ELSE TRUE;
   -- save previous read_allowed
   p_prv_read_allowed: PROCESS(dp_rst, dp_clk) IS
   BEGIN
@@ -313,20 +425,30 @@ BEGIN
   bin_arbiter_wr_mosi.address <= bin_writer_mosi.address;
 
   -- Loop back the WR data to the RD side when read was not allowed or on second read of same address
-  p_bin_reader_miso : PROCESS(prv_read_allowed, bin_reader_mosi, bin_reader_miso, bin_writer_mosi, read_allowed, bin_arbiter_rd_miso) IS
+  p_bin_reader_miso : PROCESS(prv_bin_reader_mosi, prv_read_allowed, bin_reader_mosi, bin_writer_mosi, read_allowed, bin_arbiter_rd_miso) IS
   BEGIN
+    debug1 <= '0';
+    debug2 <= '0';
     bin_reader_miso <= bin_arbiter_rd_miso;
-    IF prv_bin_reader_mosi.rd = '1' AND prv_read_allowed = FALSE THEN -- Fake succesful readback when read was not allowed
-      bin_reader_miso.rdval  <= '1';
-      bin_reader_miso.rddata <= bin_writer_mosi.wrdata;
-    ELSIF read_allowed = TRUE THEN
-      bin_reader_miso <= bin_arbiter_rd_miso;
-    ELSIF (prv_bin_reader_mosi.rd = '1' AND bin_reader_mosi.rd='1') AND (prv_bin_reader_mosi.address=bin_reader_mosi.address) THEN -- 2 reads on same address in row: 2nd read is outdated so return wrdata here
-      bin_reader_miso.rdval  <= '1';
-      bin_reader_miso.rddata <= bin_writer_mosi.wrdata;
-    END IF;
+--    IF prv_bin_reader_mosi.rd = '1' AND prv_read_allowed = FALSE THEN
+--      -- Fake succesful readback when read was not allowed
+--      bin_reader_miso.rdval  <= '1';
+--      bin_reader_miso.rddata <= bin_writer_mosi.wrdata;
+--      debug1 <= '1';
+--    ELSIF read_allowed = TRUE THEN
+--      bin_reader_miso <= bin_arbiter_rd_miso;
+--    ELSIF (prv_bin_reader_mosi.rd = '1' AND bin_reader_mosi.rd='1') AND (prv_bin_reader_mosi.address=bin_reader_mosi.address) THEN
+--      -- 2 reads on same address in row: 2nd read is outdated so return wrdata here
+--      debug2 <= '1';
+--      bin_reader_miso.rdval  <= '1';
+--      bin_reader_miso.rddata <= bin_writer_mosi.wrdata;
+--    END IF;
   END PROCESS;
 
+  -- 1) Read bin from RAM (MOSI)
+  -- 2) Read bin value valid (MISO), increment bin value
+  -- 3) Write bin to RAM (MOSI)
+
 
   -------------------------------------------------------------------------------
   -- Two RAM (common_ram_r_w) instances. The user can read the histogram from the 
diff --git a/libraries/dsp/st/tb/vhdl/tb_st_histogram.vhd b/libraries/dsp/st/tb/vhdl/tb_st_histogram.vhd
index 356ba139b7b0821fbd67f523ec2820b11b7fb7b6..56c1840c058535b437b8531847883be83f44fe8c 100644
--- a/libraries/dsp/st/tb/vhdl/tb_st_histogram.vhd
+++ b/libraries/dsp/st/tb/vhdl/tb_st_histogram.vhd
@@ -64,9 +64,9 @@ USE dp_lib.tb_dp_pkg.ALL;
 ENTITY tb_st_histogram IS
   GENERIC(
     g_nof_sync          : NATURAL := 4;        -- We're simulating at least 4 g_nof_sync so both RAMs are written and cleared twice.
-    g_data_w            : NATURAL := 8;        -- Determines maximum number of bins (2^g_data_w)
-    g_nof_bins          : NATURAL := 256;      -- Lower than or equal to 2^g_data_w. Higher is allowed but makes no sense.
-    g_nof_data_per_sync : NATURAL := 1000;     -- Determines max required RAM data width. e.g. 11b to store max bin count '1024'.
+    g_data_w            : NATURAL := 3;        -- Determines maximum number of bins (2^g_data_w)
+    g_nof_bins          : NATURAL := 8;      -- Lower than or equal to 2^g_data_w. Higher is allowed but makes no sense.
+    g_nof_data_per_sync : NATURAL := 20;     -- Determines max required RAM data width. e.g. 11b to store max bin count '1024'.
     g_stimuli_mode      : STRING  := "sine";   -- "counter", "dc", "sine" or "random"
     g_data_type         : STRING  := "signed"; -- use "signed" if g_stimuli_mode="sine"
     g_lock_sine         : BOOLEAN := TRUE      -- TRUE to lock the sine wave to Sync - produces sparse histogram with low number of non-zero samples (occuring 2*c_sine_nof_periods)
@@ -83,6 +83,7 @@ ARCHITECTURE tb OF tb_st_histogram IS
   CONSTANT c_nof_levels_per_bin : NATURAL := (2**g_data_w)/g_nof_bins; --e.g. 2 values per bin if g_data_w=9 (512 levels) and g_nof_bins=256
 
   CONSTANT c_ram_dat_w : NATURAL := ceil_log2(g_nof_data_per_sync)+1;
+  CONSTANT c_ram_adr_w : NATURAL := ceil_log2(g_nof_bins);
     
   ---------------------------------------------------------------------------
   -- Clocks and resets
@@ -106,6 +107,7 @@ ARCHITECTURE tb OF tb_st_histogram IS
   SIGNAL stimuli_en : STD_LOGIC := '1';
 
   SIGNAL stimuli_src_out : t_dp_sosi;
+  SIGNAL nxt_stimuli_src_out : t_dp_sosi;
   SIGNAL stimuli_src_in  : t_dp_siso;
   SIGNAL stimuli_count   : REAL;
   SIGNAL stimuli_data    : STD_LOGIC_VECTOR(g_data_w-1 DOWNTO 0);
@@ -115,26 +117,23 @@ ARCHITECTURE tb OF tb_st_histogram IS
   ----------------------------------------------------------------------------
   -- st_histogram
   ----------------------------------------------------------------------------
-  SIGNAL st_histogram_snk_in    : t_dp_sosi;
-  SIGNAL st_histogram_ram_mosi  : t_mem_mosi;
-  SIGNAL st_histogram_ram_miso  : t_mem_miso;
+  SIGNAL st_histogram_snk_in       : t_dp_sosi;
+  SIGNAL st_histogram_ram_mosi     : t_mem_mosi;
+  SIGNAL prv_st_histogram_ram_mosi : t_mem_mosi;
+  SIGNAL st_histogram_ram_miso     : t_mem_miso;
   
-   ----------------------------------------------------------------------------
-   -- Automatic verification of RAM readout
-   ----------------------------------------------------------------------------
-  SIGNAL ram_rd_addr           : NATURAL;
-  SIGNAL prv_ram_rd_addr       : NATURAL;
-  SIGNAL ram_rd_word           : STD_LOGIC_VECTOR(c_ram_dat_w-1 DOWNTO 0);
-  SIGNAL ram_rd_word_int       : NATURAL;
-  SIGNAL ram_rd_word_valid     : STD_LOGIC;
-  SIGNAL nxt_ram_rd_word_valid : STD_LOGIC;
+  ----------------------------------------------------------------------------
+  -- Automatic verification of RAM readout
+  ----------------------------------------------------------------------------
+  -- Use these 4 signals to verify histogram by eye in the wave window
+  SIGNAL histogram_data         : NATURAL; -- QuestaSim: Format->Analog
+  SIGNAL histogram_bin_unsigned : NATURAL;
+  SIGNAL histogram_bin_signed   : INTEGER; -- QuestaSim: Radix->Decimal
+  SIGNAL histogram_valid        : STD_LOGIC;
+
   SIGNAL sum_of_bins           : NATURAL;
   SIGNAL verification_done     : STD_LOGIC;
 
-   ----------------------------------------------------------------------------
-   -- Signal to display histogram as 'analog signal' in wave window
-   ----------------------------------------------------------------------------
-  SIGNAL histogram : NATURAL;
 
 BEGIN 
   
@@ -154,8 +153,9 @@ BEGIN
   p_generate_packets : PROCESS
     VARIABLE v_sosi : t_dp_sosi := c_dp_sosi_rst;
   BEGIN
+    nxt_stimuli_src_out <= c_dp_sosi_rst;
     stimuli_done <= '0';
-    stimuli_src_out <= c_dp_sosi_rst;
+--    stimuli_src_out <= c_dp_sosi_rst;
     proc_common_wait_until_low(dp_clk, dp_rst);
     proc_common_wait_some_cycles(dp_clk, 5);
 
@@ -164,19 +164,19 @@ BEGIN
       FOR I IN 0 TO g_nof_sync-1 LOOP
         v_sosi.sync    := '1';
         v_sosi.data    := RESIZE_DP_DATA(v_sosi.data(g_data_w-1 DOWNTO 0));  -- wrap when >= 2**g_data_w    
-        proc_dp_gen_block_data(g_data_w, TO_UINT(v_sosi.data), g_nof_data_per_sync, TO_UINT(v_sosi.channel), TO_UINT(v_sosi.err), v_sosi.sync, v_sosi.bsn, dp_clk, stimuli_en, stimuli_src_in, stimuli_src_out);
+        proc_dp_gen_block_data(g_data_w, TO_UINT(v_sosi.data), g_nof_data_per_sync, TO_UINT(v_sosi.channel), TO_UINT(v_sosi.err), v_sosi.sync, v_sosi.bsn, dp_clk, stimuli_en, stimuli_src_in, nxt_stimuli_src_out);
       END LOOP;
     END IF;
 
     -- Generate a DC level that increments every sync  
     IF g_stimuli_mode="dc" THEN
-      stimuli_src_out.valid <= '1';
+      nxt_stimuli_src_out.valid <= '1';
       FOR I IN 0 TO g_nof_sync-1 LOOP
-        stimuli_src_out.data <= INCR_UVEC(stimuli_src_out.data, 1); --all g_nof_data_per_sync cycles
-        stimuli_src_out.sync <= '1'; -- cycle 0
+        nxt_stimuli_src_out.data <= INCR_UVEC(stimuli_src_out.data, 1); --all g_nof_data_per_sync cycles
+        nxt_stimuli_src_out.sync <= '1'; -- cycle 0
         WAIT FOR 5 ns;
         FOR j IN 1 TO g_nof_data_per_sync-1 LOOP --cycles 1..g_nof_data_per_sync-1
-          stimuli_src_out.sync <= '0';
+          nxt_stimuli_src_out.sync <= '0';
           WAIT FOR 5 ns;
         END LOOP;
       END LOOP;     
@@ -184,34 +184,35 @@ BEGIN
 
     -- Generate a sine wave
     IF g_stimuli_mode="sine" THEN
-      stimuli_src_out.valid <= '1';
+      nxt_stimuli_src_out <= stimuli_src_out;
+      nxt_stimuli_src_out.valid <= '1';
       stimuli_count <= 0.0;
       FOR I IN 0 TO g_nof_sync-1 LOOP       
-        stimuli_src_out.data <= (OTHERS=>'0');
-        stimuli_data <= (OTHERS=>'0');
-        stimuli_src_out.sync <= '1'; -- cycle 0
+--        nxt_stimuli_src_out.data <= (OTHERS=>'0');
+--        stimuli_data <= (OTHERS=>'0');
+        nxt_stimuli_src_out.sync <= '1'; -- cycle 0
         WAIT FOR 5 ns;
         FOR j IN 1 TO g_nof_data_per_sync-1 LOOP --cycles 1..g_nof_data_per_sync-1
-          stimuli_src_out.sync <= '0';
-          stimuli_data <= TO_SVEC( integer(round( c_sine_amplitude * sin(stimuli_count) )), g_data_w);
-          stimuli_src_out.data(g_data_w-1 DOWNTO 0) <= stimuli_data;
+          nxt_stimuli_src_out.sync <= '0';
+--          stimuli_data <= TO_SVEC( integer(round( c_sine_amplitude * sin(stimuli_count) )), g_data_w);
+          nxt_stimuli_src_out.data(g_data_w-1 DOWNTO 0) <= TO_SVEC( integer(round( c_sine_amplitude * sin(stimuli_count) )), g_data_w);
           stimuli_count<=stimuli_count+c_sine_time_step;
           WAIT FOR 5 ns;
         END LOOP;
-      END LOOP;     
+      END LOOP;  
     END IF;
 
     -- Generate pseudo random noise 
     IF g_stimuli_mode="random" THEN
-      stimuli_src_out.valid <= '1';
+      nxt_stimuli_src_out.valid <= '1';
       FOR I IN 0 TO g_nof_sync-1 LOOP
         stimuli_data <= (OTHERS=>'0');
-        stimuli_src_out.sync <= '1'; -- cycle 0
+        nxt_stimuli_src_out.sync <= '1'; -- cycle 0
         WAIT FOR 5 ns;
         FOR j IN 1 TO g_nof_data_per_sync-1 LOOP
-          stimuli_src_out.sync <= '0';
+          nxt_stimuli_src_out.sync <= '0';
           stimuli_data <=  func_common_random(stimuli_data);
-          stimuli_src_out.data(g_data_w-1 DOWNTO 0) <= stimuli_data; --all g_nof_data_per_sync cycles
+          nxt_stimuli_src_out.data(g_data_w-1 DOWNTO 0) <= stimuli_data; --all g_nof_data_per_sync cycles
           WAIT FOR 5 ns;
         END LOOP;
       END LOOP;     
@@ -222,7 +223,17 @@ BEGIN
     tb_end <= '1';
     WAIT;
   END PROCESS;
+
+  p_stimuli_src_out: PROCESS(dp_rst, dp_clk) IS
+  BEGIN
+    IF dp_rst='1' THEN
+      stimuli_src_out <= c_dp_sosi_rst;
+    ELSIF RISING_EDGE(dp_clk) THEN
+      stimuli_src_out <= nxt_stimuli_src_out;
+    END IF;
+  END PROCESS;
  
+--  nxt_stimuli_src_out.data(g_data_w-1 DOWNTO 0) <= stimuli_data;  
 
   ----------------------------------------------------------------------------
   -- st_histogram
@@ -270,38 +281,26 @@ BEGIN
   -- | 2           | 0           | 1        | Addr 2: 1024, others 0|
   -- | 3           | 1           | 0        | Addr 3: 1024, others 0|
   -- +-------------+-------------+----------+-----------------------+
-
   ----------------------------------------------------------------------------
   -- Perform MM read and put result in ram_rd_word
   p_verify_mm_read : PROCESS
   BEGIN
     st_histogram_ram_mosi.wr <= '0';
-    ram_rd_addr <= 0;
     FOR i IN 0 TO g_nof_sync-1 LOOP
       proc_common_wait_until_high(dp_clk, stimuli_src_out.sync);
       proc_common_wait_some_cycles(dp_clk, 10);
       FOR j IN 0 TO g_nof_bins-1 LOOP
         proc_mem_mm_bus_rd(j, dp_clk, st_histogram_ram_mosi); 
-        ram_rd_addr <= j;
-        ram_rd_word <= st_histogram_ram_miso.rddata(c_ram_dat_w-1 DOWNTO 0);
-        ram_rd_word_int <= TO_UINT(ram_rd_word);
       END LOOP;
     END LOOP;
     WAIT;
   END PROCESS;
 
-  -- Register st_histogram_ram_miso.rdval so we read only valid ram_rd_word
-  p_nxt_ram_rd_word_valid : PROCESS(dp_rst, dp_clk)
-  BEGIN
-   IF dp_rst = '1' THEN
-      ram_rd_word_valid <= '0';
-      prv_ram_rd_addr   <= 0;
-    ELSIF RISING_EDGE(dp_clk) THEN
-      ram_rd_word_valid <= nxt_ram_rd_word_valid;
-      prv_ram_rd_addr <= ram_rd_addr; -- align the rd address with rd data for wave window debugging
-    END IF;
-  END PROCESS;
-  nxt_ram_rd_word_valid <= st_histogram_ram_miso.rdval;
+  -- Help signals that contain the histogram bins+data
+  histogram_bin_unsigned <= TO_UINT(              prv_st_histogram_ram_mosi.address(c_ram_adr_w-1 DOWNTO 0));
+  histogram_bin_signed   <= TO_SINT(offset_binary(prv_st_histogram_ram_mosi.address(c_ram_adr_w-1 DOWNTO 0)));
+  histogram_data  <= TO_UINT(st_histogram_ram_miso.rddata(c_ram_dat_w-1 DOWNTO 0)) WHEN st_histogram_ram_miso.rdval='1'ELSE 0;
+  histogram_valid <= st_histogram_ram_miso.rdval;
 
   -- Perform verification of ram_rd_word when ram_rd_word_valid
   p_verify_assert : PROCESS
@@ -311,31 +310,31 @@ BEGIN
       sum_of_bins <= 0;
       proc_common_wait_until_high(dp_clk, stimuli_src_out.sync);  
       FOR j IN 0 TO g_nof_bins-1 LOOP
-        proc_common_wait_until_high(dp_clk, ram_rd_word_valid);    
+        proc_common_wait_until_high(dp_clk, histogram_valid);
         IF i=0 THEN -- Sync period 0: we expect RAM to contain zeros
-          ASSERT ram_rd_word_int=0 REPORT "RAM contains wrong bin count (expected 0, actual " & INTEGER'IMAGE(ram_rd_word_int) & ")" SEVERITY ERROR;
+          ASSERT histogram_data=0 REPORT "RAM contains wrong bin count (expected 0, actual " & INTEGER'IMAGE(histogram_data) & ")" SEVERITY ERROR;
         ELSE -- Sync period 1 onwards
           IF g_stimuli_mode="counter" THEN
-            -- Counter data: ban values remain the same every sync
-            ASSERT ram_rd_word_int=c_expected_ram_content_counter REPORT "RAM contains wrong bin count (expected " & INTEGER'IMAGE(c_expected_ram_content_counter) & ", actual " & INTEGER'IMAGE(ram_rd_word_int) & ")" SEVERITY ERROR;
+            -- Counter data: bin values remain the same every sync
+            ASSERT histogram_data=c_expected_ram_content_counter REPORT "RAM contains wrong bin count (expected " & INTEGER'IMAGE(c_expected_ram_content_counter) & ", actual " & INTEGER'IMAGE(histogram_data) & ")" SEVERITY ERROR;
           ELSIF g_stimuli_mode="dc" THEN
             -- DC data: DC level increments every sync 
-            IF j=(i/c_nof_levels_per_bin)+1 THEN -- Check bin address and account for multiple levels per bin
+            IF j=(i/c_nof_levels_per_bin) THEN -- Check bin address and account for multiple levels per bin
               -- this address (j) should contain the DC level total count of this sync period (i)
-              ASSERT ram_rd_word_int=g_nof_data_per_sync REPORT "RAM contains wrong bin count (expected " & INTEGER'IMAGE(g_nof_data_per_sync) & ", actual " & INTEGER'IMAGE(ram_rd_word_int) & ")" SEVERITY ERROR;
+              ASSERT histogram_data=g_nof_data_per_sync REPORT "RAM contains wrong bin count (expected " & INTEGER'IMAGE(g_nof_data_per_sync) & ", actual " & INTEGER'IMAGE(histogram_data) & ")" SEVERITY ERROR;
             ELSE
               -- this address should contain zero
-              ASSERT ram_rd_word_int=0 REPORT "RAM contains wrong bin count (expected 0, actual " & INTEGER'IMAGE(ram_rd_word_int) & ")" SEVERITY ERROR;
+              ASSERT histogram_data=0 REPORT "RAM contains wrong bin count (expected 0, actual " & INTEGER'IMAGE(histogram_data) & ")" SEVERITY ERROR;
             END IF;
           END IF;
         END IF;
-        sum_of_bins<=sum_of_bins+ram_rd_word_int; -- Keep the sum of all bins  
-        WAIT FOR 5 ns;
+        sum_of_bins<=sum_of_bins+histogram_data; -- Keep the sum of all bins  
+        WAIT FOR 5 ns; 
       END LOOP;   
 
       -- Check the sum of all bins
       IF i>0 THEN -- Skip sync 0 (histogram still all zeros)
-        ASSERT sum_of_bins=g_nof_data_per_sync REPORT "Sum of bins not equal to g_nof_data_per_sync" SEVERITY ERROR;
+        ASSERT sum_of_bins=g_nof_data_per_sync REPORT "Sum of bins not equal to g_nof_data_per_sync (expected " & INTEGER'IMAGE(g_nof_data_per_sync) & ", actual " & INTEGER'IMAGE(sum_of_bins) & ")" SEVERITY ERROR;
       END IF;
 
     END LOOP;
@@ -347,15 +346,19 @@ BEGIN
   p_check_verification_done : PROCESS
   BEGIN
     proc_common_wait_until_high(dp_clk, stimuli_done);
+    proc_common_wait_some_cycles(dp_clk, 50);
     ASSERT verification_done='1' REPORT "Verification failed" SEVERITY ERROR;
     WAIT;
   END PROCESS;
 
-  ----------------------------------------------------------------------------
-  -- Create a signal that displays histogram (view as analog in Questa Sim) 
-  -- in wave window
-  ----------------------------------------------------------------------------
-  histogram <= ram_rd_word_int WHEN ram_rd_word_valid='1' ELSE 0;
-
-  
+  -- Register MOSI to store the read address
+  p_clk: PROCESS(dp_rst, dp_clk) IS
+  BEGIN
+    IF dp_rst = '1' THEN
+      prv_st_histogram_ram_mosi <= c_mem_mosi_rst;
+    ELSIF RISING_EDGE(dp_clk) THEN
+      prv_st_histogram_ram_mosi <= st_histogram_ram_mosi;
+    END IF;
+  END PROCESS;
+ 
 END tb;