diff --git a/libraries/base/diag/src/vhdl/diag_pkg.vhd b/libraries/base/diag/src/vhdl/diag_pkg.vhd
index 94a35514d546fbc8ac25469a3b6663954e064bb8..348e80b0d34bba239db6e0532a8c2ed11ee087d2 100644
--- a/libraries/base/diag/src/vhdl/diag_pkg.vhd
+++ b/libraries/base/diag/src/vhdl/diag_pkg.vhd
@@ -162,10 +162,12 @@ PACKAGE diag_pkg IS
   -- CNTR / PSRG sequence test data
   -----------------------------------------------------------------------------
   
-  CONSTANT c_diag_seq_tx_reg_nof_dat : NATURAL := 3;
-  CONSTANT c_diag_seq_tx_reg_adr_w   : NATURAL := ceil_log2(c_diag_seq_tx_reg_nof_dat);
-  CONSTANT c_diag_seq_rx_reg_nof_dat : NATURAL := 4;
-  CONSTANT c_diag_seq_rx_reg_adr_w   : NATURAL := ceil_log2(c_diag_seq_rx_reg_nof_dat);
+  CONSTANT c_diag_seq_tx_reg_nof_dat      : NATURAL := 3;
+  CONSTANT c_diag_seq_tx_reg_adr_w        : NATURAL := ceil_log2(c_diag_seq_tx_reg_nof_dat);
+  CONSTANT c_diag_seq_rx_reg_nof_steps_wi : NATURAL := 4;
+  CONSTANT c_diag_seq_rx_reg_nof_steps    : NATURAL := 4;
+  CONSTANT c_diag_seq_rx_reg_nof_dat      : NATURAL := c_diag_seq_rx_reg_nof_steps_wi + c_diag_seq_rx_reg_nof_steps;
+  CONSTANT c_diag_seq_rx_reg_adr_w        : NATURAL := ceil_log2(c_diag_seq_rx_reg_nof_dat);
   
   -- Record with all diag seq MM register fields
   TYPE t_diag_seq_mm_reg IS RECORD
diff --git a/libraries/base/diag/src/vhdl/diag_rx_seq.vhd b/libraries/base/diag/src/vhdl/diag_rx_seq.vhd
index 6cff0f9828f4af2d18fbbdd405ca870f0a84ab54..65e11e220e03066583b7363556ae0c3ba7dec867 100644
--- a/libraries/base/diag/src/vhdl/diag_rx_seq.vhd
+++ b/libraries/base/diag/src/vhdl/diag_rx_seq.vhd
@@ -20,15 +20,11 @@
 --
 --------------------------------------------------------------------------------
  
-LIBRARY IEEE, common_lib;
-USE IEEE.std_logic_1164.ALL;
-USE IEEE.numeric_std.ALL;
-USE common_lib.common_pkg.ALL;
-USE common_lib.common_lfsr_sequences_pkg.ALL;
-
-
--- Purpose: Verify received continuous PRSG or COUNTER test sequence data.
+-- Purpose: Verify received continuous test sequence data.
 -- Description:
+--   The diag_rx_seq can operate in one of two modes that depend on g_use_steps:
+--
+-- . g_use_steps = FALSE
 --   The test data can be PRSG or COUNTER dependent on diag_sel.
 --   The Rx is enabled by diag_en. Typically the Tx should already be running,
 --   but it is also allowed to first enable the Rx.
@@ -50,31 +46,71 @@ USE common_lib.common_lfsr_sequences_pkg.ALL;
 --   The diag_sample keeps the last valid in_dat value. When diag_en='0' it is
 --   reset to 0. Reading diag_sample via MM gives an impression of the valid
 --   in_dat activity.
+--
+-- . g_use_steps = TRUE
+--   The test data is fixed to COUNTER and diag_sel is ignored. The rx_seq can
+--   verify counter data that increments in steps that are specified via
+--   diag_steps_arr[3:0]. Up to g_nof_steps <= c_diag_seq_rx_reg_nof_steps = 4
+--   step sizes are supported. If all steps are set to 1 then there is no
+--   difference compared using the COUNTER in g_use_steps = FALSE. Constant
+--   value data can be verified by setting alls step to 0. Usinf different
+--   steps is useful when the data is generated in linear incrementing order,
+--   but received in a different order. Eg. like after a transpose operation
+--   where blocks of data are written in row and and read in colums:
+--   
+--     tx:          0 1   2 3   4 5   6 7   8 9   10 11
+--     transpose:   0 1   4 5   8 9   2 3   6 7   10 11
+--     rx steps:     +1    +1    +1    +1    +1      +1
+--                -11    +3    +3    -7    +3    +3
+-- 
+--   The step size value range is set by the 32 bit range of the VHDL integer.
+--   Therefore typically g_dat_w should be <= 32 b. For a transpose that 
+--   contains more than 2**32 data words this means that the COUNTER data 
+--   wraps within the transpose. This is acceptable, because it use g_dat_w
+--   <= 32 then still provides sufficient coverage to detect all errors.
+--
+--   Data errors that match a step size cannot be detected. However if such
+--   an error occurs then typically the next increment will cause a mismatch.
+--
+--   The feature of being able to detect errors per bit as with g_use_steps=
+--   FALSE is not supported when g_use_steps=TRUE. Therefore the
+--   diag_res[g_dat_w-1:0] = -1 (ll '1') when a difference occurs that is no
+--   in diag_steps_arr. 
+
+LIBRARY IEEE, common_lib;
+USE IEEE.std_logic_1164.ALL;
+USE IEEE.numeric_std.ALL;
+USE common_lib.common_pkg.ALL;
+USE common_lib.common_lfsr_sequences_pkg.ALL;
+USE work.diag_pkg.ALL;
 
 ENTITY diag_rx_seq IS
   GENERIC (
     g_input_reg  : BOOLEAN := FALSE;  -- Use unregistered input to save logic, use registered input to ease achieving timing constrains.
+    g_use_steps  : BOOLEAN := FALSE;
+    g_nof_steps  : NATURAL := c_diag_seq_rx_reg_nof_steps;
     g_sel        : STD_LOGIC := '1';  -- '0' = PRSG, '1' = COUNTER
     g_cnt_w      : NATURAL := c_word_w;
     g_dat_w      : NATURAL := 12;
     g_diag_res_w : NATURAL := 16
   );
   PORT (
-    rst          : IN  STD_LOGIC;
-    clk          : IN  STD_LOGIC;
-    clken        : IN  STD_LOGIC := '1';
+    rst            : IN  STD_LOGIC;
+    clk            : IN  STD_LOGIC;
+    clken          : IN  STD_LOGIC := '1';
     
     -- Static control input (connect via MM or leave open to use default)
-    diag_en      : IN  STD_LOGIC;                                  -- '0' = init and disable, '1' = enable
-    diag_sel     : IN  STD_LOGIC := g_sel;
-    diag_res     : OUT STD_LOGIC_VECTOR(g_diag_res_w-1 DOWNTO 0);  -- diag_res valid indication bits & aggregate diff of in_dat during diag_en
-    diag_res_val : OUT STD_LOGIC;
-    diag_sample  : OUT STD_LOGIC_VECTOR(g_dat_w-1 DOWNTO 0);  -- monitor last valid in_dat
+    diag_en        : IN  STD_LOGIC;                                  -- '0' = init and disable, '1' = enable
+    diag_sel       : IN  STD_LOGIC := g_sel;
+    diag_steps_arr : t_integer_arr(g_nof_steps-1 DOWNTO 0) := (OTHERS=>1);
+    diag_res       : OUT STD_LOGIC_VECTOR(g_diag_res_w-1 DOWNTO 0);  -- diag_res valid indication bits & aggregate diff of in_dat during diag_en
+    diag_res_val   : OUT STD_LOGIC;
+    diag_sample    : OUT STD_LOGIC_VECTOR(g_dat_w-1 DOWNTO 0);  -- monitor last valid in_dat
     
     -- ST input
-    in_cnt       : OUT STD_LOGIC_VECTOR(g_cnt_w-1 DOWNTO 0);  -- count valid input test sequence data
-    in_dat       : IN  STD_LOGIC_VECTOR(g_dat_w-1 DOWNTO 0);  -- input test sequence data
-    in_val       : IN  STD_LOGIC    -- gaps are allowed, however diag_res requires at least 2 valid in_dat to report a valid result
+    in_cnt         : OUT STD_LOGIC_VECTOR(g_cnt_w-1 DOWNTO 0);  -- count valid input test sequence data
+    in_dat         : IN  STD_LOGIC_VECTOR(g_dat_w-1 DOWNTO 0);  -- input test sequence data
+    in_val         : IN  STD_LOGIC    -- gaps are allowed, however diag_res requires at least 2 valid in_dat to report a valid result
   );
 END diag_rx_seq;
 
@@ -126,6 +162,16 @@ ARCHITECTURE rtl OF diag_rx_seq IS
   SIGNAL i_diag_sample   : STD_LOGIC_VECTOR(g_dat_w-1 DOWNTO 0);
   SIGNAL nxt_diag_sample : STD_LOGIC_VECTOR(g_dat_w-1 DOWNTO 0);
 
+  TYPE t_dat_arr IS ARRAY (INTEGER RANGE <>) OF STD_LOGIC_VECTOR(g_dat_w-1 DOWNTO 0);
+  
+  SIGNAL ref_dat_arr      : t_dat_arr(g_nof_steps-1 DOWNTO 0);
+  SIGNAL nxt_ref_dat_arr  : t_dat_arr(g_nof_steps-1 DOWNTO 0);
+  SIGNAL diff_arr         : STD_LOGIC_VECTOR(g_nof_steps-1 DOWNTO 0);
+  SIGNAL nxt_diff_arr     : STD_LOGIC_VECTOR(g_nof_steps-1 DOWNTO 0);
+  SIGNAL diff_detect      : STD_LOGIC;
+  SIGNAL nxt_diff_detect  : STD_LOGIC;
+  SIGNAL diff_hold        : STD_LOGIC;
+  
 BEGIN
 
   diag_dis <= NOT diag_en;
@@ -175,13 +221,18 @@ BEGIN
         in_val_dly2  <= in_val_dly1;
         -- Internal.
         in_val_2_dly <= in_val_2 & in_val_2_dly(0 TO c_diag_res_latency-2);
+        diag_res_int <= nxt_diag_res;
+        diag_res_en  <= nxt_diag_res_en;
+        diag_res_val <= nxt_diag_res_val;
+        -- . g_use_steps=FALSE
         prsg         <= nxt_prsg;
         cntr         <= nxt_cntr;
         ref_dat      <= nxt_ref_dat;
         diff_dat     <= nxt_diff_dat;
-        diag_res_int <= nxt_diag_res;
-        diag_res_en  <= nxt_diag_res_en;
-        diag_res_val <= nxt_diag_res_val;
+        -- . g_use_steps=TRUE
+        ref_dat_arr  <= nxt_ref_dat_arr;
+        diff_arr     <= nxt_diff_arr;
+        diff_detect  <= nxt_diff_detect;
         -- Outputs.
         i_diag_sample <= nxt_diag_sample;
       END IF;
@@ -220,62 +271,108 @@ BEGIN
   -- Use in_val_2_act instead of in_val_2 to have stable start in case diag_dis takes just a pulse and in_val is continue high
   in_val_2_act <= vector_and(in_val_2 & in_val_2_dly);
   
-  ------------------------------------------------------------------------------
-  -- Compare the in_dat bus with the reference dat
-  ------------------------------------------------------------------------------
-  
   -- Use the first valid in_dat after diag_en='1' to initialize the reference data sequence
   ref_en <= in_val_1;
   
-  common_lfsr_nxt_seq(c_lfsr_nr,    -- IN
-                      ref_en,       -- IN
-                      in_val_reg,   -- IN, use in_val_reg to allow gaps in the input data valid stream
-                      in_dat_reg,   -- IN
-                      prsg,         -- IN
-                      cntr,         -- IN
-                      nxt_prsg,     -- OUT
-                      nxt_cntr);    -- OUT
-    
-  nxt_ref_dat <= prsg WHEN diag_sel='0' ELSE cntr;
-
-  -- The ref_dat has latency 2 compared to the in_dat, because of the register
-  -- stage in psrg/cntr and the register stage in ref_dat.
-  p_diff : PROCESS (diff_dat, ref_dat, in_val_dly2, in_dat_dly2)
-  BEGIN
-    nxt_diff_dat <= diff_dat;
-    IF in_val_dly2='1' THEN
-      FOR I IN in_dat'RANGE LOOP
-        nxt_diff_dat(I) <= ref_dat(I) XOR in_dat_dly2(I);
-      END LOOP;
-    END IF;
-  END PROCESS;
-  
-  
-  ------------------------------------------------------------------------------
-  -- Hold any difference on the in_dat bus lines
-  ------------------------------------------------------------------------------
-  
+  -- Use the detection of second valid in_dat after diag_en='1' to start detection of differences
   diff_dis <= NOT in_val_2_act;
+    
+  no_steps : IF g_use_steps=FALSE GENERATE
+    -- Determine next reference dat based on current input dat
+    common_lfsr_nxt_seq(c_lfsr_nr,    -- IN
+                        ref_en,       -- IN
+                        in_val_reg,   -- IN, use in_val_reg to allow gaps in the input data valid stream
+                        in_dat_reg,   -- IN
+                        prsg,         -- IN
+                        cntr,         -- IN
+                        nxt_prsg,     -- OUT
+                        nxt_cntr);    -- OUT
+      
+    nxt_ref_dat <= prsg WHEN diag_sel='0' ELSE cntr;
+  
+    -- Detect difference per bit. The ref_dat has latency 2 compared to the in_dat, because of the register stage in psrg/cntr and the register stage in ref_dat.
+    p_diff_dat : PROCESS (diff_dat, ref_dat, in_val_dly2, in_dat_dly2)
+    BEGIN
+      nxt_diff_dat <= diff_dat;
+      IF in_val_dly2='1' THEN
+        FOR I IN in_dat'RANGE LOOP
+          nxt_diff_dat(I) <= ref_dat(I) XOR in_dat_dly2(I);
+        END LOOP;
+      END IF;
+    END PROCESS;
+    
+    gen_verify_dat : FOR I IN in_dat'RANGE GENERATE
+      -- Detect and report undefined diff input 'X', which in simulation leaves diff_res at OK, because switch_high only acts on '1'
+      p_sim_only : PROCESS(clk)
+      BEGIN
+        IF rising_edge(clk) THEN
+          IF diff_dat(I)/='0' AND diff_dat(I)/='1' THEN
+            REPORT "diag_rx_seq : undefined input" SEVERITY FAILURE;
+          END IF;
+        END IF;
+      END PROCESS;
+      
+      -- Hold any difference on the in_dat bus lines
+      u_dat : ENTITY common_lib.common_switch
+      PORT MAP(
+        clk         => clk,
+        rst         => rst,
+        switch_high => diff_dat(I),
+        switch_low  => diff_dis,
+        out_level   => diff_res(I)
+      );
+    END GENERATE;
+  END GENERATE;
   
-  gen_verify_dat : FOR I IN in_dat'RANGE GENERATE
-    -- Detect and report undefined diff input 'X', which in simulation leaves diff_res at OK, because switch_high only acts on '1'
-    p_sim_only : PROCESS(clk)
+  use_steps : IF g_use_steps=TRUE GENERATE
+    -- Determine next reference data for all steps increments of current input dat
+    p_ref_dat_arr : PROCESS(in_dat_reg, in_val_reg, ref_dat_arr)
     BEGIN
-      IF rising_edge(clk) THEN
-        IF diff_dat(I)/='0' AND diff_dat(I)/='1' THEN
-          REPORT "diag_rx_seq : undefined input" SEVERITY FAILURE;
+      nxt_ref_dat_arr <= ref_dat_arr;
+      IF in_val_reg='1' THEN
+        FOR I IN g_nof_steps-1 DOWNTO 0 LOOP
+          nxt_ref_dat_arr(I) <= INCR_UVEC(in_dat_reg, diag_steps_arr(I));
+        END LOOP;
+      END IF;
+    END PROCESS;
+        
+    -- Detect difference for each step.  The ref_dat_arr has latency 1 compared to the input dat
+    p_diff_arr : PROCESS(diff_arr, in_val_dly1, in_dat_dly1, ref_dat_arr)
+    BEGIN
+      nxt_diff_arr <= diff_arr;
+      IF in_val_dly1='1' THEN
+        nxt_diff_arr <= (OTHERS=>'1');
+        FOR I IN g_nof_steps-1 DOWNTO 0 LOOP
+          IF UNSIGNED(ref_dat_arr(I))=UNSIGNED(in_dat_dly1) THEN
+            nxt_diff_arr(I) <= '0';
+          END IF;
+        END LOOP;
+      END IF;
+    END PROCESS;
+    
+    -- detect diff when no step counter value matches
+    p_diff_detect : PROCESS(diff_detect, diff_arr, in_val_dly2)
+    BEGIN
+      nxt_diff_detect <= diff_detect;
+      IF in_val_dly2='1' THEN
+        nxt_diff_detect <= '0';
+        IF vector_and(diff_arr)='1' THEN
+          nxt_diff_detect <= '1';
         END IF;
       END IF;
     END PROCESS;
-  
+    
+    -- hold detected diff value
     u_dat : ENTITY common_lib.common_switch
     PORT MAP(
       clk         => clk,
       rst         => rst,
-      switch_high => diff_dat(I),
+      switch_high => diff_detect,
       switch_low  => diff_dis,
-      out_level   => diff_res(I)
+      out_level   => diff_hold
     );
+    
+    diff_dat <= (OTHERS=> diff_hold);
   END GENERATE;
   
   
diff --git a/libraries/base/diag/src/vhdl/mms_diag_rx_seq.vhd b/libraries/base/diag/src/vhdl/mms_diag_rx_seq.vhd
index 277ecd569b9d8e28217fea26288b5c595a8ba383..c631c171166c05abdd7e0684faf8a5b664fbf752 100644
--- a/libraries/base/diag/src/vhdl/mms_diag_rx_seq.vhd
+++ b/libraries/base/diag/src/vhdl/mms_diag_rx_seq.vhd
@@ -37,6 +37,14 @@
 --  |-----------------------------------------------------------------------|
 --  |                              rx_sample[g_seq_dat_w-1:0]               |  3  RO
 --  |-----------------------------------------------------------------------|
+--  |                      diag_steps_arr[0][g_seq_dat_w-1:0]               |  4  RW
+--  |-----------------------------------------------------------------------|
+--  |                      diag_steps_arr[1][g_seq_dat_w-1:0]               |  5  RW
+--  |-----------------------------------------------------------------------|
+--  |                      diag_steps_arr[2][g_seq_dat_w-1:0]               |  6  RW
+--  |-----------------------------------------------------------------------|
+--  |                      diag_steps_arr[3][g_seq_dat_w-1:0]               |  7  RW
+--  |-----------------------------------------------------------------------|
 --
 -- . g_nof_streams
 --   The MM control register for stream I in 0:g_nof_streams-1 starts at word
@@ -78,6 +86,13 @@
 --   The rx_sample keeps the last valid in_dat value. When diag_en='0' it is
 --   reset to 0. Reading rx_sample via MM gives an impression of the valid
 --   in_dat activity.
+--
+-- . g_use_steps
+--   When g_use_steps=FALSE then diag_sel selects whether PSRG or COUNTER
+--   data with increment +1 is used to verify the input data.
+--   When g_use_steps=TRUE then the g_nof_steps = 
+--   c_diag_seq_rx_reg_nof_steps = 4 MM step registers define the allowed
+--   COUNTER increment values.
 
 LIBRARY IEEE, common_lib, dp_lib;
 USE IEEE.std_logic_1164.ALL;
@@ -91,6 +106,8 @@ USE work.diag_pkg.ALL;
 ENTITY mms_diag_rx_seq IS
   GENERIC (
     g_nof_streams : NATURAL := 1;
+    g_use_steps   : BOOLEAN := FALSE;
+    g_nof_steps   : NATURAL := c_diag_seq_rx_reg_nof_steps;
     g_seq_dat_w   : NATURAL := c_word_w;  -- >= 1, test sequence data width
     g_data_w      : NATURAL := c_word_w   -- >= g_seq_dat_w, user data width
   );
@@ -115,23 +132,30 @@ ARCHITECTURE str OF mms_diag_rx_seq IS
 
   -- Define MM slave register size
   CONSTANT c_mm_reg      : t_c_mem  := (latency  => 1,
-                                        adr_w    => c_diag_seq_rx_reg_adr_w,    -- = 2
+                                        adr_w    => c_diag_seq_rx_reg_adr_w,
                                         dat_w    => c_word_w,                   -- Use MM bus data width = c_word_w = 32 for all MM registers
-                                        nof_dat  => c_diag_seq_rx_reg_nof_dat,  -- = 4
+                                        nof_dat  => c_diag_seq_rx_reg_nof_dat,
                                         init_sl  => '0');
   
   -- Define MM slave register fields for Python peripheral using pi_common.py (specify MM register access per word, not per individual bit because mm_fields assumes 1 field per MM word)
   CONSTANT c_mm_reg_field_arr : t_common_field_arr(c_mm_reg.nof_dat-1 DOWNTO 0) :=
-                               ( ( field_name_pad("control"),   "RW",        2, field_default(0) ),  -- control[1:0] = diag_sel & diag_en
-                                 ( field_name_pad("result"),    "RO",        2, field_default(0) ),  -- result[1:0]  = res_val_n & res_ok_n
-                                 ( field_name_pad("rx_cnt"),    "RO", c_word_w, field_default(0) ),
-                                 ( field_name_pad("rx_sample"), "RO", c_word_w, field_default(0) ));
+                               ( ( field_name_pad("control"),   "RW",        2, field_default(0) ),   -- [0] = control[1:0] = diag_sel & diag_en
+                                 ( field_name_pad("result"),    "RO",        2, field_default(0) ),   -- [1] = result[1:0]  = res_val_n & res_ok_n
+                                 ( field_name_pad("rx_cnt"),    "RO", c_word_w, field_default(0) ),   -- [2]
+                                 ( field_name_pad("rx_sample"), "RO", c_word_w, field_default(0) ),   -- [3]
+                                 ( field_name_pad("step_0"),    "RW", c_word_w, field_default(1) ),   -- [4] = diag_steps_arr[0]
+                                 ( field_name_pad("step_1"),    "RW", c_word_w, field_default(1) ),   -- [5] = diag_steps_arr[1]
+                                 ( field_name_pad("step_2"),    "RW", c_word_w, field_default(1) ),   -- [6] = diag_steps_arr[2]
+                                 ( field_name_pad("step_3"),    "RW", c_word_w, field_default(1) ));  -- [7] = diag_steps_arr[3], c_diag_seq_rx_reg_nof_steps = 4
                                   
   CONSTANT c_reg_slv_w   : NATURAL := c_mm_reg.nof_dat*c_mm_reg.dat_w;
   
+  CONSTANT c_nof_steps_wi     : NATURAL := c_diag_seq_rx_reg_nof_steps_wi;
+  
   TYPE t_reg_slv_arr IS ARRAY (INTEGER RANGE <>) OF STD_LOGIC_VECTOR(c_reg_slv_w-1 DOWNTO 0);
   TYPE t_seq_dat_arr IS ARRAY (INTEGER RANGE <>) OF STD_LOGIC_VECTOR(g_seq_dat_w-1 DOWNTO 0);
   TYPE t_data_arr    IS ARRAY (INTEGER RANGE <>) OF STD_LOGIC_VECTOR(g_data_w-1 DOWNTO 0);
+  TYPE t_steps_2arr  IS ARRAY (INTEGER RANGE <>) OF t_integer_arr(g_nof_steps-1 DOWNTO 0);
   
   SIGNAL reg_mosi_arr        : t_mem_mosi_arr(g_nof_streams-1 DOWNTO 0);
   SIGNAL reg_miso_arr        : t_mem_miso_arr(g_nof_streams-1 DOWNTO 0);
@@ -142,6 +166,7 @@ ARCHITECTURE str OF mms_diag_rx_seq IS
   
   SIGNAL diag_en_arr         : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0);
   SIGNAL diag_sel_arr        : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0);
+  SIGNAL diag_steps_2arr     : t_steps_2arr(g_nof_streams-1 DOWNTO 0);
   
   SIGNAL rx_cnt_arr          : t_slv_32_arr(g_nof_streams-1 DOWNTO 0);  -- can use t_slv_32_arr because c_mm_reg.dat_w = c_word_w = 32 fixed
   SIGNAL rx_sample_arr       : t_seq_dat_arr(g_nof_streams-1 DOWNTO 0);
@@ -191,6 +216,8 @@ BEGIN
     -- detect rx sequence errors
     u_diag_rx_seq: ENTITY WORK.diag_rx_seq
     GENERIC MAP (
+      g_use_steps       => g_use_steps,
+      g_nof_steps       => g_nof_steps,
       g_cnt_w           => c_word_w,
       g_dat_w           => g_seq_dat_w,
       g_diag_res_w      => g_seq_dat_w  -- do not use g_seq_dat_w+1 to include NOT diag_res_val in MSbit, instead use diag_res_val output
@@ -202,6 +229,7 @@ BEGIN
       -- Write and read back registers:
       diag_en           => diag_en_arr(I),
       diag_sel          => diag_sel_arr(I),
+      diag_steps_arr    => diag_steps_2arr(I),
 
       -- Read only registers:
       diag_res          => diag_res_arr(I),
@@ -222,12 +250,16 @@ BEGIN
     -- . write ctrl_reg_arr
     diag_en_arr(I)   <= ctrl_reg_arr(I)(0);  -- address 0, data bit [0]
     diag_sel_arr(I)  <= ctrl_reg_arr(I)(1);  -- address 0, data bit [1]
-                                             -- address 1, not used for control
+    
+    gen_diag_steps_2arr : FOR J IN 0 TO g_nof_steps-1 GENERATE
+      diag_steps_2arr(I)(J) <= TO_UINT(ctrl_reg_arr(I)(g_seq_dat_w-1 + (c_nof_steps_wi+J)*c_word_w DOWNTO (c_nof_steps_wi+J)*c_word_w));  -- address 4, 5, 6, 7
+    END GENERATE;
+    
     -- . read stat_reg_arr
     p_stat_reg_arr : PROCESS(ctrl_reg_arr, stat_res_ok_n_arr, stat_res_val_n_arr, rx_cnt_arr, rx_sample_arr)
     BEGIN
       -- Default write / readback:
-      stat_reg_arr(I) <= ctrl_reg_arr(I);                                        -- address 0: control read back
+      stat_reg_arr(I) <= ctrl_reg_arr(I);                                        -- default control read back
       -- Status read only:
       stat_reg_arr(I)(                  0+1*c_word_w) <= stat_res_ok_n_arr(I);   -- address 1, data bit [0]
       stat_reg_arr(I)(                  1+1*c_word_w) <= stat_res_val_n_arr(I);  -- address 1, data bit [1]