diff --git a/libraries/io/ddr/tb/vhdl/tb_io_ddr.vhd b/libraries/io/ddr/tb/vhdl/tb_io_ddr.vhd
index ae1566cea9497125fe55eb71660ac76c597b33e0..f2abb5ce4b3bc02bc03f5f26ee6ac7887f2a080d 100644
--- a/libraries/io/ddr/tb/vhdl/tb_io_ddr.vhd
+++ b/libraries/io/ddr/tb/vhdl/tb_io_ddr.vhd
@@ -53,7 +53,8 @@ ENTITY tb_io_ddr IS
     g_ctlr_ref_clk_period   : TIME := 5000 ps;   -- 200 MHz
     g_dvr_clk_period        : TIME := 5000 ps;   -- 50 MHz
     g_dp_clk_period         : TIME := 5000 ps;   -- 200 MHz
-    g_dp_factor             : NATURAL := 4;      -- 1 or power of 2, c_dp_data_w = c_ctlr_data_w / g_dp_factor
+    g_dp_factor             : NATURAL := 1;      -- 1 or power of 2, c_dp_data_w = c_ctlr_data_w / g_dp_factor
+    g_rd_fifo_depth         : NATURAL := 256;    -- default 256 because 32b*256 fits in 1 M9K, use larger to fit more read bursts eg. in case g_dp_factor>1
     g_block_len             : NATURAL := 2500;   -- block length for a DDR write access and read back access in number of c_ctlr_data_w words
     g_nof_block             : NATURAL := 3;      -- number of blocks that will be written to DDR and readback from DDR
     g_nof_wr_per_block      : NATURAL := 1;      -- number of write accesses per block
@@ -78,6 +79,17 @@ ARCHITECTURE str of tb_io_ddr IS
   
   CONSTANT c_dp_data_w                : NATURAL := c_ctlr_data_w/g_dp_factor;
   
+  CONSTANT c_queue_nof_rd             : NATURAL := sel_a_b(c_tech_ddr.name="DDR3", 1, 3);   -- derived empirically from simulation, seems to match (c_tech_ddr.command_queue_depth-1)/2
+  
+  CONSTANT c_wr_fifo_depth            : NATURAL := 256;
+  CONSTANT c_rd_fifo_depth            : NATURAL := g_rd_fifo_depth;
+  CONSTANT c_rd_fifo_af_margin        : NATURAL := 4 + c_queue_nof_rd*c_tech_ddr.maxburstsize;  -- sufficient to fit one or more rd burst accesses of g_tech_ddr.maxburstsize each
+  
+  -- Frame size for sop/eop
+  CONSTANT c_wr_frame_size            : NATURAL := 32;
+  -- Sync period
+  CONSTANT c_wr_sync_period           : NATURAL := 512;
+ 
   -- Typical DDR access stimuli
   -- . write block of words in 1 write access and then readback in 4 block read accesses
   -- . use appropriate c_len to access across a DDR address column (a_col_w=10)
@@ -144,19 +156,15 @@ ARCHITECTURE str of tb_io_ddr IS
   CONSTANT c_ctlr_nof_address_arr     : t_nat_natural_arr(0 TO c_nof_access-1) := func_ctlr_nof_address_arr;
   CONSTANT c_ctlr_wr_not_rd_arr       : STD_LOGIC_VECTOR(0 TO c_nof_access-1)  := func_ctlr_wr_not_rd_arr;
                                                       
-  CONSTANT c_wr_fifo_depth            : NATURAL := 256;
-  CONSTANT c_rd_fifo_depth            : NATURAL := 16384;--256;
-  
-  -- Frame size for sop/eop
-  CONSTANT c_wr_frame_size            : NATURAL := 32;
-  -- Sync period
-  CONSTANT c_wr_sync_period           : NATURAL := 512;
- 
   SIGNAL dbg_c_ctlr_address_lo_arr    : t_nat_natural_arr(0 TO c_nof_access-1) := c_ctlr_address_lo_arr;
   SIGNAL dbg_c_ctlr_nof_address_arr   : t_nat_natural_arr(0 TO c_nof_access-1) := c_ctlr_nof_address_arr;
   SIGNAL dbg_c_ctlr_wr_not_rd_arr     : STD_LOGIC_VECTOR(0 TO c_nof_access-1)  := c_ctlr_wr_not_rd_arr;
+  
   SIGNAL dbg_c_tech_ddr               : t_c_tech_ddr := c_tech_ddr;
   SIGNAL dbg_c_dp_data_w              : NATURAL := c_dp_data_w;
+  SIGNAL dbg_c_wr_fifo_depth          : NATURAL := c_wr_fifo_depth;
+  SIGNAL dbg_c_rd_fifo_depth          : NATURAL := c_rd_fifo_depth;
+  SIGNAL dbg_c_rd_fifo_af_margin      : NATURAL := c_rd_fifo_af_margin;
   
   SIGNAL i_tb_end             : STD_LOGIC := '0';
   SIGNAL ctlr_ref_clk         : STD_LOGIC := '0';
@@ -388,6 +396,7 @@ BEGIN
     g_wr_data_w              => c_dp_data_w,
     g_wr_fifo_depth          => c_wr_fifo_depth,  -- >=16 AND >g_tech_ddr.maxburstsize, defined at DDR side of the FIFO.
     g_rd_fifo_depth          => c_rd_fifo_depth,  -- >=16 AND >g_tech_ddr.maxburstsize, defined at DDR side of the FIFO. 
+    g_rd_fifo_af_margin      => c_rd_fifo_af_margin,
     g_rd_data_w              => c_dp_data_w,
     g_wr_flush_mode          => g_wr_flush_mode,
     g_wr_flush_use_channel   => FALSE,
diff --git a/libraries/io/ddr/tb/vhdl/tb_tb_io_ddr.vhd b/libraries/io/ddr/tb/vhdl/tb_tb_io_ddr.vhd
index c60fdc3265e589e5bc8115749c8d226a8c26848c..7f8911ccc1d4eadbad36bb91da915c8f8f085a87 100644
--- a/libraries/io/ddr/tb/vhdl/tb_tb_io_ddr.vhd
+++ b/libraries/io/ddr/tb/vhdl/tb_tb_io_ddr.vhd
@@ -58,6 +58,7 @@ BEGIN
   -- g_dvr_clk_period        : TIME := 5 ns;      -- 50 ns
   -- g_dp_clk_period         : TIME := 5000 ps;   -- 200 MHz
   -- g_dp_factor             : NATURAL := 1;      -- 1 or power of 2, c_dp_data_w = c_ctlr_data_w / g_dp_factor
+  -- g_rd_fifo_depth         : NATURAL := 256;    -- default 256 because 32b*256 fits in 1 M9K, use larger to fit more read bursts
   -- g_block_len             : NATURAL := 64;     -- block length for a DDR write access and read back access in number of c_ctlr_data_w words
   -- g_nof_block             : NATURAL := 12;     -- number of blocks that will be written to DDR and readback from DDR
   -- g_nof_wr_per_block      : NATURAL := 1;      -- number of write accesses per block
@@ -66,26 +67,26 @@ BEGIN
   -- g_wr_flush_mode         : STRING := "SYN"    -- "VAL", "SOP", "SYN"
 
   gen_ddr3 : IF c_tech_ddr.name="DDR3" GENERATE
-    u_fill_wrfifo_on_next_valid     : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE, FALSE, FALSE, 5 ns,  5 ns, 5 ns,  1, 1000, 2, 1, 4,  2, "VAL") PORT MAP (tb_end_vec(0));
-    u_fill_wrfifo_on_next_sop       : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE, FALSE, FALSE, 5 ns,  5 ns, 5 ns,  1, 1000, 2, 3, 4,  2, "SOP") PORT MAP (tb_end_vec(1));
-    u_fill_wrfifo_on_next_sync      : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE, FALSE, FALSE, 5 ns,  5 ns, 5 ns,  1, 1000, 2, 4, 1,  2, "SYN") PORT MAP (tb_end_vec(2));
-                                                                                                                                                                                     
-    u_ext_memory_model              : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE,  TRUE, FALSE, 5 ns,  5 ns, 5 ns,  1, 1000, 1, 2, 3,  1, "VAL") PORT MAP (tb_end_vec(3));
-    u_mixed_width                   : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE, FALSE, FALSE, 5 ns,  5 ns, 5 ns,  8, 1000, 1, 3, 2,  1, "VAL") PORT MAP (tb_end_vec(4));
-                                                                                                                                                                                     
-    u_wr_burst_size_0               : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE,  TRUE, FALSE, 5 ns,  5 ns, 5 ns,  4,    2,10, 3, 3,  2, "VAL") PORT MAP (tb_end_vec(5));
-    u_wr_burst_size_1               : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE,  TRUE, FALSE, 5 ns,  5 ns, 5 ns,  4,    1,10, 1, 1,  2, "VAL") PORT MAP (tb_end_vec(6));
-    
-    u_cross_dvr_to_faster_ctlr      : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE, FALSE, FALSE, 5 ns, 20 ns, 5 ns,  1, 1000, 1, 1, 4,  1, "VAL") PORT MAP (tb_end_vec(7));
-    u_cross_dvr_to_slower_ctlr      : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE, FALSE, FALSE, 5 ns,  1 ns, 5 ns,  1, 1000, 1, 1, 4,  1, "VAL") PORT MAP (tb_end_vec(8));
-                                                                                                                                                                                     
-    u_sequencer_1_16                : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE, FALSE, FALSE, 5 ns,  5 ns, 5 ns,  4,   64,10, 1,16,  1, "VAL") PORT MAP (tb_end_vec(9));
-    u_sequencer_16_1                : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE, FALSE, FALSE, 5 ns,  5 ns, 5 ns,  4,   64,10,16, 1,  1, "VAL") PORT MAP (tb_end_vec(10));
+    u_fill_wrfifo_on_next_valid : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE, FALSE, FALSE, 5 ns,  5 ns, 5 ns,  1, 256,  1000, 2, 1, 4,   2, "VAL") PORT MAP (tb_end_vec(0));
+    u_fill_wrfifo_on_next_sop   : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE, FALSE, FALSE, 5 ns,  5 ns, 5 ns,  1, 256,  1000, 2, 3, 4,   2, "SOP") PORT MAP (tb_end_vec(1));
+    u_fill_wrfifo_on_next_sync  : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE, FALSE, FALSE, 5 ns,  5 ns, 5 ns,  1, 256,  1000, 2, 4, 1,   2, "SYN") PORT MAP (tb_end_vec(2));
+                                                                                                                                                                               
+    u_ext_memory_model          : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE,  TRUE, FALSE, 5 ns,  5 ns, 5 ns,  1,8192,  2500, 1, 2, 3,   1, "VAL") PORT MAP (tb_end_vec(3));
+    u_mixed_width               : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE, FALSE, FALSE, 5 ns,  5 ns, 5 ns,  8,8192,  2500, 1, 3, 2,   1, "VAL") PORT MAP (tb_end_vec(4));
+                                                                                                                                                                               
+    u_wr_burst_size_0           : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE,  TRUE, FALSE, 5 ns,  5 ns, 5 ns,  4, 256,     2,10, 3, 3,   2, "VAL") PORT MAP (tb_end_vec(5));
+    u_wr_burst_size_1           : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE,  TRUE, FALSE, 5 ns,  5 ns, 5 ns,  4, 256,     1,10, 1, 1,   2, "VAL") PORT MAP (tb_end_vec(6));
+                                                                                                                                                                               
+    u_cross_dvr_to_faster_ctlr  : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE, FALSE, FALSE, 5 ns, 20 ns, 5 ns,  1,8192,  1000, 1, 1, 4,   1, "VAL") PORT MAP (tb_end_vec(7));
+    u_cross_dvr_to_slower_ctlr  : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE, FALSE, FALSE, 5 ns,  1 ns, 5 ns,  1,8192,  1000, 1, 1, 4,   1, "VAL") PORT MAP (tb_end_vec(8));
+                                                                                                                                                                               
+    u_sequencer_1_16            : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE, FALSE, FALSE, 5 ns,  5 ns, 5 ns,  4, 256,    64,10, 1,16,   1, "VAL") PORT MAP (tb_end_vec(9));
+    u_sequencer_16_1            : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE, FALSE, FALSE, 5 ns,  5 ns, 5 ns,  4, 256,    64,10,16, 1,   1, "VAL") PORT MAP (tb_end_vec(10));
   END GENERATE;
   
   -- Distinghuis between tests for DDR3 and DDR4, because the Quartus 14.1 ip_arria10 DDR4 model simulates about 40x slower than the Quartus 11.1 ip_stratixiv DDR3 uniphy model.
   gen_ddr4 : IF c_tech_ddr.name="DDR4" GENERATE
-    u_sequencer_1_16                : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE, FALSE, FALSE, 5 ns,  5 ns, 5 ns,  4,   64, 9, 1,16,  1, "VAL") PORT MAP (tb_end_vec(0));
+    u_sequencer_1_16                : ENTITY work.tb_io_ddr GENERIC MAP (c_technology, c_tech_ddr3, c_tech_ddr4, FALSE, FALSE, FALSE, 5 ns,  5 ns, 5 ns,  4,256,   64, 9, 1,16,  1, "VAL") PORT MAP (tb_end_vec(0));
   END GENERATE;
   
   p_tb_end : PROCESS