diff --git a/applications/apertif/designs/apertif_unb1_correlator/tb/vhdl/tb_node_apertif_unb1_correlator_processing_output.vhd b/applications/apertif/designs/apertif_unb1_correlator/tb/vhdl/tb_node_apertif_unb1_correlator_processing_output.vhd
index 056a77b791c8027aa0852bf3f55349900f6b347d..9349f8312a3889ad63e6e15b3fb7c2eb43a7bd87 100644
--- a/applications/apertif/designs/apertif_unb1_correlator/tb/vhdl/tb_node_apertif_unb1_correlator_processing_output.vhd
+++ b/applications/apertif/designs/apertif_unb1_correlator/tb/vhdl/tb_node_apertif_unb1_correlator_processing_output.vhd
@@ -25,6 +25,26 @@
 --          and DB in node_apertif_unb1_correlator_output
 --
 -- Desription:
+-- A) This tb verifies:
+--
+--   1) p_check_inter_channel_delay: Verify that g_inter_channel_delay is in min
+--      max range
+--   2) p_mm_stimuli : set offload src MAC and dest MAC
+--      p_mm_diag_data_buffer_output : verify offload src MAC, dest MAC, and
+--        BSN timestamp, channel and beamlet
+--   3) p_mm_stimuli : set phasor waveform into BG and start BG
+--   3a)p_verify_cor_src_out: verify expected channel data at cor_src_out
+--        interface for g_use_prefilter, g_use_prefilter_ones
+--   3b)p_mm_diag_data_buffer_output :verify expected channel data at offload
+--        interface for g_use_prefilter, g_use_prefilter_ones
+--   4) u_eth_statistics : verify number of offloaded visibility packet data
+--
+--   * Test 3a) is also covered by tb_node_apertif_unb1_correlator.vhd.
+--   * Test 2) is also covered by tb_apertif_unb1_correlator_nodes.vhd using
+--     file IO.
+--   * Test 4) is also covered by tb_apertif_unb1_correlator_nodes.vhd.
+--
+-- B) Data formats:
 --   The beamlet data after the mesh for all Nband*Ndest = 16*8 = 128 PN
 --   in the Apertif Correlator together is described by:
 --
@@ -69,94 +89,175 @@
 --     . pair    : Q_interleave
 --     . tp_pair : tp MOD Q_interleave
 --
+-- C) Maximum and minumum inter channel delay for visibility offload
 -- * c_inter_channel_delay
 --   With no inter channel output delay, channels are output back-to-back, creating a short full valid burst of 300 visibilities
---   per channel for 64 channels per beamlet. This yields c_nof_complex * cor_out_dat_w * f_clk = 2 * 32 * 200M = 12.8 Gbps in burst.
+--   per channel for 64 channels per beamlet. This yields c_nof_complex * c_cor_out_dat_w * f_clk = 2 * 32 * 200M = 12.8 Gbps in burst.
+--   The link capacity reduces towards the output and is less than:
+--   a)   12.8  Gbps = 200 MHz * 64 bit at node_apertif_unb1_correlator_processing cor_src_out sosi.re/im complex data output
+--   b)    4    Gbps = 125 MHz * 32 bit at node_apertif_unb1_correlator_output sosi.data packet data output
+--   c)    1    Gbps = at 1GbE interface of one node as defined by udp_tx_snk_in_arr with udp_tx_src_out_arr flow control
+--   d)  250    Mbps = 1 Gbps / 4 at 1GbE interface of UniBoard with 8 nodes and one switch with 2 output ports
+--   e)  156.25 Mbps = 20 Gbps / 128 at 2 * 10GbE interface of the Data Writer for 16 UniBoard each with 8 nodes so N_PN = 128 nodes
 --
 -- * Set c_inter_channel_delay and g_bg_duty_cycle_gap in simulation with 1 PN and 1 GbE link:
 --   In the tb with c_nof_tp=2*2=4 there are c_nof_visibilities=10 visibilities, g_nof_beamlets=2 and c_nof_channels=64. Hence the
 --   number of output bits per BG sync interval is:
 --
---     c_nof_complex * cor_out_dat_w * c_nof_visibilities * c_nof_channels * g_nof_beamlets = 2 * 32 * 10 * 64 * 2
---                                                                                        = 81920 bits = 1280 complex words
+--     g_nof_beamlets * c_nof_channels * c_nof_visibilities * c_nof_complex * c_cor_out_dat_w = 2 * 64 * 10 * 2 * 32b =
+--                                                                                            = 81920 bits = 1280 complex words
+--     . c_nof_channels             = c_nof_points = 64
+--     . c_nof_blocks_per_sync      = c_nof_tsub_per_sync = c_nof_tchan_per_sync * c_nof_channels
+--     . c_nof_frames_per_sync      = c_nof_blocks_per_sync
+--     . c_frame_size               = g_nof_beamlets
+--     . c_bg_block_period          = c_frame_size * Q_interleave + c_bg_duty_cycle_gap
+--
+--   Typical for Apertif correlator the c_bg_block_period = N_clk = 256 dp_clk cycles. To speed up simulation c_bg_duty_cycle_gap
+--   can be set as small as possible:
+--
+--     c_bg_nof_clk_per_sync      = c_nof_blocks_per_sync * c_bg_block_period = 640 * (2*2 + 0) = 2560 dp_clk cycles minimal
+--     
+--   The minimal BG sync interval in the tb takes c_bg_nof_clk_per_sync = 2560 dp_clk cycles, so g_bg_duty_cycle_gap = 0 can be used,
+--   because 2560 > 1280 at the cor_src_out sosi.re/im interface that can support < 12.8 Gbps. However the data rate at the 1GbE
+--   serial PHY interface must remain < 1Gbps. Per channel there are:
 --
---   The BG sync interval in the tb takes c_bg_nof_clk_per_sync = c_nof_blocks_per_sync * c_bg_block_period = 960 * 4 = 3840 clk
---   cycles, so g_bg_duty_cycle_gap=0 can be used, because 3840 > 1280 at the cor_src_out sosi.re/im interface.
---   The data rate at the udp_tx_snk_in_arr(0).re/im interface is 125MHz * 32b = 4 Gbps. However at the 1GbE serial PHY interface
---   the burst data rate must remain < 1Gbps. Per channel there are:
+--     c_vis_header_size   = 21 words
+--     c_vis_payload_size  = c_nof_visibilities * c_nof_complex = 10 * 2 = 20 words
+--     c_vis_packet_size   = c_vis_header_size + c_vis_payload_size = 21 + 20 = 41 words
 --
---     gap size     = c_network_eth_gap_len * 8b = 96 bits for some idle time between packets
---     header size  = 21 * 32b = 672 bits
---     tail size    = 1 * 32b = 32 bits
---     payload size = c_nof_complex * cor_out_dat_w * c_nof_visibilities = 2 * 32 * 10 = 640 bits
---     packet size  = gap size + header size + payload size + tail size = 96 + 672 + 640 + 32 = 1440 bits
+--     c_vis_nof_packets_per_sync = g_nof_beamlets * c_nof_channels = 2 * 64 = 128
 --
---   The transport over 1GbE takes at least 1440 ns @ 1Gbps. One dp_clk cycle is 5 ns (= 1/f_clk = 1/200M), so 1440 / 5 = 288
---   dp_clk cycles. The visibilities arrive in c_nof_visibilities=10 dp_clk cycles in cor_src_out. Therefore:
+--     c_tail_size         = 1 word
+--     c_gap_size          = c_network_eth_gap_len * 8b / 32b = 3 words for some idle time between packets
+--     c_vis_packet_load   = (c_vis_packet_size + c_tail_size + c_gap_size) * 32b = (41 + 1 + 3) * 32b = 45 * 32b = 1440 bits
 --
---     c_inter_channel_delay must be > 288 - c_nof_visibilities = 288 - 10 = 278 --> 279
+--   The transport of one packet over 1GbE takes at least c_vis_packet_load / f_link = 1440 ns (f_link = 1Gbps). One dp_clk cycle
+--   is 5 ns (= 1/f_clk = 1/200M), so:
+-- 
+--     c_vis_packet_nof_dp_clk = c_vis_packet_load * f_clk / f_link = 1440 * 200/1000 = 288 dp_clk cycles
 --
---   However with an c_inter_channel_delay the total offload time per sync interval becomes about:
+--   The visibilities per packet arrive in c_nof_visibilities = 10 dp_clk cycles at the cor_src_out sosi.re/im interface. The
+--   c_inter_channel_delay is applied at this interface. Therefore the minimum c_inter_channel_delay is:
 --
---     c_inter_channel_delay * c_nof_channels * g_nof_beamlets = 279 * 64 * 2 = 35712 dp_clk cycles
+--     c_inter_channel_delay >= c_vis_packet_nof_dp_clk - c_nof_visibilities = 288 - 10 = 278 --> 279 dp_clk cycles
 --
---   This is > BG sync interval of 3840 dp_clk cycles with g_bg_duty_cycle_gap=0. Therefore g_bg_duty_cycle_gap needs to be
+--   With this minimum c_inter_channel_delay the total offload time per sync interval becomes about:
+--
+--     c_vis_nof_packets_per_sync * c_vis_packet_nof_dp_clk = (2 * 64) * 288 = 36864 dp_clk cycles
+--
+--   This is > BG sync interval of 2560 dp_clk cycles with g_bg_duty_cycle_gap = 0. Therefore g_bg_duty_cycle_gap needs to be
 --   increased to:
 --
---     c_bg_block_period = c_bg_nof_clk_per_sync / c_nof_blocks_per_sync = 35712 / 640 = 55.8 --> 56
---     g_bg_duty_cycle_gap = c_bg_block_period - c_frame_size * Q_interleave = 56 - 2*2 = 52
+--     c_bg_block_period = c_bg_nof_clk_per_sync / c_nof_blocks_per_sync = 36864 / 640 = 57.6 --> 58
+--     g_bg_duty_cycle_gap = c_bg_block_period - c_frame_size * Q_interleave = 58 - 2*2 = 54
+--
+--   Therefore in combination with minimum c_inter_channel_delay = 279 use g_bg_duty_cycle_gap >= 54 to make it work for f_link =
+--   1Gbps. The maximum g_bg_duty_cycle_gap = N_clk - c_frame_size * Q_interleave = 256 - 88 * 2 = 80 and this is also the typical
+--   g_bg_duty_cycle_gap for the Apertif correlator, because N_clk = 256 is fixed. Hence the typical BG sync interval is:
+--
+--     c_bg_block_period     = N_clk = 256
+--     c_bg_nof_clk_per_sync = c_nof_blocks_per_sync * c_bg_block_period = 640 * 256 = 163840 dp_clk cycles typical
+--
+--   The visibility offload must finish within this BG sync interval, hence the maximum number of dp_clk cycles per visibility
+--   packet is:
 --
---   Therefore in combination with c_inter_channel_delay=279 use g_bg_duty_cycle_gap >= 52 to make it work for 1GbE @ 1Gbps.
+--     c_vis_packet_offload_nof_dp_clk <= c_bg_nof_clk_per_sync / c_vis_nof_packets_per_sync = 163840 / (2 * 64) = 1280
 --
---   To make it work with N_PN = 128 nodes and 2 * 10GbE links to the data writer the c_inter_channel_delay needs to be larger
---   to reduce the rate per 1GbE link to < 20 Gbps / 128 = 156.25 Mbps.
+--   Therefore the maximum c_inter_channel_delay applied at the cor_src_out sosi.re/im interface becomes:
+-- 
+--     c_inter_channel_delay <= c_vis_packet_offload_nof_dp_clk - c_nof_visibilities = 1280 - 10 = 1270
+--
+-- * Correlator constraint on c_nof_tchan_per_sync:
+--   The correlator.vhd has the constraint that c_nof_tchan_per_sync >= c_nof_mult = c_nof_visibilities / 2**c_nof_pre_mult_folds.
+--   Therefore choose c_nof_tchan_per_sync >= 300 / 2**1 = 150.
 --
 -- * Set c_inter_channel_delay on hardware with 128 PN and 2 10GbE links:
 --   In total there are c_nof_beamlets * c_nof_channels * c_nof_visibilities per t_int = 1.024 sec, so 88 * 64 * 300 * 2 * 32b / 1.024s =
 --   105.6 Mbps on average (8 bit mode with 88 beamlets) or 144 Mbps (6 bit mode with 120 beamlets), which can run over one 1GbE
---   link. The total Apertif correlator output rate for N_PN = 128 nodes to the Apertif data writer is 128 * 105.6M = 13.5168 Gbps
---   respectively 18.432 Gbps, which can run over two 10GbE links.
---
---   We need to set an appropriate number of c_inter_channel_delay cycles for a constant visibility buffer output rate. The
---   correlator outputs c_nof_beamlets * c_nof_channels = 88 * 64 = 5632 respectively 120 * 64 = 7680 blocks of c_nof_visibilities =
---   300 visibility samples per t_int = N_int / f_sub = 1.024 s = 204.8M dp_clk cycles @ f_clk = 200 MHz. Hence per block there
---   are maximum 36363 or 26666 dp_clk cycles available. The block itself takes c_nof_visibilities = 300 dp_clk cycles. Hence the
---   maximum c_inter_channel_delay becomes 36363 - 300 = 36063 or 26666 - 300 = 26066 dp_clk cycles.
---   The minimum c_inter_channel_delay depends on the capacity of the two 10GbE links. Using the total Apertif correlator output 
---   this yields about 13.5168G/20GbE * max(c_inter_channel_delay) = 24373 for 88 beamlets respectively 24022 for 120 beamlets.
---
---   In formula:
+--   link. The total Apertif correlator output rate for N_PN = 128 nodes to the Apertif data writer is 128 * 105.6M = 13.5168 Gbps so
+--   13.5168/20 = 0.676 utilization, respectively 128 * 144M = 18.432 Gbps so 18.432/20 = 0.922 utilization, which can run over two
+--   10GbE links.
+--
+--   We need to set an appropriate number of c_inter_channel_delay cycles for a constant visibility buffer output rate.
+--   . The maximum c_inter_channel_delay is determined by the number of cycles per sync interval divided by the number of
+--     visibility packets per sync interval. The correlator outputs c_vis_nof_packets_per_sync = c_nof_beamlets * c_nof_channels
+--     = 88 * 64 = 5632 respectively 120 * 64 = 7680 blocks of c_nof_visibilities = 300 visibility samples per
+--     t_int = N_int / f_sub = 1.024 s = 204.8M dp_clk cycles @ f_clk = 200 MHz. Hence per block there are maximum
+--     204.8M / 5632 = 36363 or 204.8M / 7680 = 26666 dp_clk cycles available. The block itself takes c_nof_visibilities
+--     = 300 dp_clk cycles. Hence the maximum c_inter_channel_delay becomes 36363 - 300 = 36063 or 26666 - 300 = 26066
+--     dp_clk cycles.
+--   . The minimum c_inter_channel_delay is determined by the available data rate f_link and the processing clock rate of  dp_clk.
+--     Important to note is that the burst rate is independent of the packet size, however the minimum inter_channel_delay is
+--     dependent on the packet size, because the inter_channel_delay is applied between packets (so not between samples). Therefore
+--     the minimum c_inter_channel_delay is also determined by the number bits per visibility packet and the number of dp_clk
+--     cycles to process a visibility packet. Per PN the link capacity is f_link = 20G/128 = 156.25 Mbps.
+--     The c_vis_packet_load = (21 + 300*2 + 1 +3) * 32b = 20000 bits per packet. With f_clk = 200 MHz, so 5 ns period, this
+--     corresponds to c_vis_packet_nof_dp_clk = c_vis_packet_load * f_clk / f_link = 20000 * 200M / 156.25 M = 25600
+--     dp_clk cycles. Internally one visibility packet is handled in c_nof_visibilities = 300 dp_clk cycles, so the minimum
+--     c_inter_channel_delay = 25600 - 300 = 25300 dp_clk cycles and is independent of the beamlet bit mode, so the same for
+--     8 bit and 6 bit.
+--   . As check the ratio between min and max c_inter_channel_delay is (25300 + 300) / (36363 + 300) = 0.698 for 8 bit beamlet
+--     mode and (25300 + 300) / (26666 + 300) = 0.949 for 6 bit beamlet mode.
+--     
+--   In formula with HW parameter values:
 --   . N_clk = f_clk / f_sub = 200M / 781250 = 256
 --   . N_int = c_nof_tsub_per_sync = c_nof_tchan_per_sync*c_nof_points = 12500 * 64 = 800000
 --   . c_nof_clk_per_sync = N_int * N_clk = 204.8M
---   . c_nof_visibility_blocks = c_nof_beamlets * c_nof_channels = 5632 for c_nof_beamlets =  88 and c_nof_channels = 64
---                                                               = 7680 for c_nof_beamlets = 120 and c_nof_channels = 64
+--   . c_vis_nof_packets_per_sync = c_nof_beamlets * c_nof_channels = 5632 for c_nof_beamlets =  88 and c_nof_channels = 64
+--                                                                  = 7680 for c_nof_beamlets = 120 and c_nof_channels = 64
 --   . c_nof_visibilities = 300 = (24*25)/2 for c_nof_tp = 24
---   c_inter_channel_delay = c_nof_clk_per_sync / c_nof_visibility_blocks - c_nof_visibilities = 204.8M / 5632 - 300 = 36063
---                                                                                             = 204.8M / 7680 - 300 = 26066
---
---   In simulation the N_int needs to be set much smaller than 800000, to shorten the simulation time as much as possible.
---   When c_nof_beamlets is 2 and c_nof_tp = 4 then:
---   . N_clk = 256
---   . N_int = c_nof_tsub_per_sync = c_nof_tchan_per_sync*c_nof_points = 10 * 64
---   . c_nof_clk_per_sync = N_int * N_clk = 163840
---   . c_nof_visibility_blocks = 128 for c_nof_beamlets = 2 and c_nof_channels = 64
---   . c_nof_visibilities = 10 = ( 4* 5)/2 for c_nof_tp = 4
---   c_inter_channel_delay = 163840 / 128 - 10 = 1270 > 0 so this is possible even without throttle BG xon
---     Clearly the c_nof_beamlets has more impact than the c_nof_tp.
---
---   When c_nof_beamlets is 88 and c_nof_tp = 4 then:
---   . N_int = c_nof_tsub_per_sync = c_nof_tchan_per_sync*c_nof_points = 10 * 64
---   . c_nof_clk_per_sync = N_int * N_clk = 163840
---   . c_nof_visibility_blocks = 5632 for c_nof_beamlets = 88 and c_nof_channels = 64
---   . c_nof_visibilities = 10 = ( 4* 5)/2 for c_nof_tp = 4
---   c_inter_channel_delay = 163840 / 5632 - 10 = 19 > 0, so possible.
---
---   If c_nof_tp is increased also then BG xon can be kept active if c_nof_tchan_per_sync is increased also. For
---   c_nof_beamlets = 88 and c_nof_tp = 24 get c_nof_clk_per_sync >= c_nof_visibility_blocks * c_nof_visibilities = 5632 * 300 = 1689600
---   and c_nof_tchan_per_sync =  c_nof_clk_per_sync / N_clk / c_nof_points = 1689600 / 256 / 64 = 103, so choose 110. However
---   from the correlator.vhd the constraint is c_nof_tchan_per_sync >= c_nof_mult = c_nof_visibilities / 2**c_nof_pre_mult_folds.
---   Therefore choose c_nof_tchan_per_sync >= 300 / 2**1 = 150.
+--   . f_link = 156.25M
+--   . maximum c_inter_channel_delay = c_nof_clk_per_sync / c_vis_nof_packets_per_sync - c_nof_visibilities
+--                                   = 204.8M / 5632 - 300 = 36063 = c_hw_inter_channel_delay_max_8bit
+--                                   = 204.8M / 7680 - 300 = 26366 = c_hw_inter_channel_delay_max_6bit
+--   . minimum c_inter_channel_delay = c_vis_packet_nof_dp_clk - c_nof_visibilities
+--                                   = (25 + 300*2) * 32b * 200M/156.25M - 300 = 25300 = c_hw_inter_channel_delay_min
+--
+--   In simulation:
+--     The N_int needs to be set much smaller than 800000, to shorten the simulation time as much as possible.
+--     When c_nof_beamlets is 2 and c_nof_tp = 4 then:
+--     . c_bg_block_period = N_clk = 256
+--     . c_nof_visibilities = 10 = (4* 5)/2 for c_nof_tp = 4
+--     . N_int = c_nof_tsub_per_sync = c_nof_tchan_per_sync*c_nof_points = 10 * 64 = 640
+--     . c_nof_clk_per_sync = N_int * c_bg_block_period = 640 * 256 = 163840
+--     . c_vis_nof_packets_per_sync = 128 for c_nof_beamlets = 2 and c_nof_channels = 64
+--     . f_link = 1G, in simulation with one PN use full 1GbE capacity
+--     . maximum c_inter_channel_delay = c_nof_clk_per_sync / c_vis_nof_packets_per_sync - c_nof_visibilities
+--                                     = 163840 / 128 - 10 = 1270
+--     . minimum c_inter_channel_delay = c_vis_packet_nof_dp_clk - c_nof_visibilities =
+--                                     = (25 + 10*2) * 32b * 200M/1G - 10 = 278
+--     Trial simulations show:
+--       <= between 230 and 240 it fails because FIFO in node output overflows
+--       >= between 280 and 290 it fails because FIFO in corr_visibility_buffer overflows
+--       Possibly the min value 278 and max value 280 are correct, but the FIFO overflow takes some time to occur,
+--       so the simulation is to short to sharpely reveal the min and max of c_inter_channel_delay.
+--
+--     The fastest simulation is achieved if the c_inter_channel_delay min and max are almost
+--     equal. The maximum c_inter_channel_delay can be reduced by reducing the c_bg_block_period
+--     via g_bg_duty_cycle_gap, this then yields g_bg_duty_cycle_gap = 54 as shown above. To
+--     check with g_bg_duty_cycle_gap = 54 and c_nof_beamlets is 2 and c_nof_tp = 4 then:
+--     . c_bg_block_period = c_frame_size * Q_interleave + c_bg_duty_cycle_gap = 2*2+ 54 = 58
+--     . c_nof_visibilities = 10 = (4* 5)/2 for c_nof_tp = 4
+--     . N_int = c_nof_tsub_per_sync = c_nof_tchan_per_sync*c_nof_points = 10 * 64 = 640
+--     . c_nof_clk_per_sync = N_int * c_bg_block_period = 640 * 58 = 37120
+--     . c_vis_nof_packets_per_sync = 128 for c_nof_beamlets = 2 and c_nof_channels = 64
+--     . f_link = 1G, in simulation with one PN use full 1GbE capacity
+--     . maximum c_inter_channel_delay = c_nof_clk_per_sync / c_vis_nof_packets_per_sync - c_nof_visibilities
+--                                     = 37120 / 128 - 10 = 280
+--     . minimum c_inter_channel_delay = c_vis_packet_nof_dp_clk - c_nof_visibilities =
+--                                     = (25 + 10*2) * 32b * 200M/1G - 10 = 278, so choose 279 to fit within min and max
+
+--   When c_nof_beamlets is 2 and c_nof_tp = 24 then:
+--   . c_bg_block_period = N_clk = 256
+--   . c_nof_visibilities = 300 = (24*25)/2 for c_nof_tp = 24
+--   . N_int = c_nof_tsub_per_sync = c_nof_tchan_per_sync*c_nof_points = 160 * 64 = 10240, where correlator constraint 160 > 300/2
+--   . c_nof_clk_per_sync = N_int * c_bg_block_period = 2621440
+--   . c_vis_nof_packets_per_sync = 128 for c_nof_beamlets = 2 and c_nof_channels = 64
+--   . f_link = 1G, in simulation with one PN use full 1GbE capacity
+--   . maximum c_inter_channel_delay = c_nof_clk_per_sync / c_vis_nof_packets_per_sync - c_nof_visibilities
+--                                   = 2621440 / 128 - 10 = 20470
+--   . minimum c_inter_channel_delay = c_vis_packet_nof_dp_clk - c_nof_visibilities =
+--                                   = (25 + 300*2) * 32b * 200M/1G - 300 = 3700
 --
 -- Usage:
 --   > as 10
@@ -185,24 +286,25 @@ USE dp_lib.dp_stream_pkg.ALL;
 USE diag_lib.diag_pkg.ALL;
 USE fft_lib.fft_pkg.ALL;
 USE wpfb_lib.wpfb_pkg.ALL;
+USE work.apertif_unb1_correlator_pkg.ALL;
 
 
 ENTITY tb_node_apertif_unb1_correlator_processing_output IS
   GENERIC (
     g_tb_end                : BOOLEAN := TRUE;   -- when TRUE then tb_end ends this simulation, else a higher multi-testbench will end the simulation
-    g_tb_index              : NATURAL := 0;      -- use different index to avoid MM file conflict in multi tb
     g_nof_bg_sync           : NATURAL := 2;   -- [t] number of BG sync intervals, is number ofintegration intervals
     -- DUT settings
     g_nof_pn                : NATURAL := 2;   -- number of PN, choose >= 1
     g_nof_10G               : NATURAL := 2;   -- number of 10G input per PN, choose >= 2 and even (to fit Q_interleave=2)
     g_nof_beamlets          : NATURAL := 2;   -- [bu_i], is 88 in 8bit, 120 in 6bit beamlet mode
     g_nof_tchan_per_sync    : NATURAL := 10;  -- [t_c], is 12500, choose > number of taps of WPFB to simulate longer than the FIR impulse response
-    g_inter_channel_delay   : NATURAL := 279;   -- throttle correlator output
+    g_inter_channel_delay   : NATURAL := 279;   -- throttle correlator output, for expected 279:
     g_use_wpfb              : BOOLEAN := TRUE;
     g_use_prefilter         : BOOLEAN := TRUE;
     g_use_prefilter_ones    : BOOLEAN := FALSE;
     -- BG settings
-    g_bg_duty_cycle_gap     : NATURAL := 56;   -- 0 or small for faster simulation, use N_clk - Q_interleave*g_nof_beamlets for HW
+    g_bg_duty_cycle_gap     : NATURAL := 54;   -- small for faster simulation, use N_clk - Q_interleave*g_nof_beamlets for HW
+    --g_bg_duty_cycle_gap     : NATURAL := 256;   -- use 256 to default to N_clk - Q_interleave*g_nof_beamlets for HW
     g_phasor_ampl           : REAL := 0.25;   -- range 0:1 where by 1.0 corresponds to maximum +amplitude, use <= 1.0 / 2**c_wpfb_fil_in_backoff_w
     g_phasor_freq           : REAL := 1.0;    -- for N=64 point FFT choose channel in range -32.0 : 0.0 : 31.0
     g_phasor_phase          : REAL := 0.0     -- [0:2pi>, has nearly no effect on correlator output, because all inputs use the same phasor
@@ -221,6 +323,10 @@ ARCHITECTURE tb OF tb_node_apertif_unb1_correlator_processing_output IS
   CONSTANT c_dp_clk_period              : TIME := 5 ns;
   CONSTANT c_cross_clock_domain_latency : NATURAL := 20;
 
+  CONSTANT f_link                       : REAL := 1000.0;  -- 1 Gbps
+  --CONSTANT f_link                       : REAL := 156.25;
+  CONSTANT f_dp_clk                     : REAL :=  200.0;  -- 200 MHz
+
   -- UniBoard
   CONSTANT c_unb_nr               : NATURAL := 14;      -- Uniboard 0:15
   CONSTANT c_fpga_nr              : NATURAL := 5;       -- FPGA 0:7 on a UniBoard, FPGA 0:3 = FN 0:3, FPGA 4:7 = BN 0:3
@@ -269,32 +375,41 @@ ARCHITECTURE tb OF tb_node_apertif_unb1_correlator_processing_output IS
   CONSTANT c_bg_nof_clk_per_sync    : NATURAL := c_nof_blocks_per_sync * c_bg_block_period;
   
   -- . correlator DUT input
-  CONSTANT c_nof_tp                : NATURAL := g_nof_pn * g_nof_10G;                 -- number of telescope paths
+  CONSTANT c_nof_tp                 : NATURAL := g_nof_pn * g_nof_10G;                 -- number of telescope paths
 
-  CONSTANT c_nof_visibilities      : NATURAL := c_nof_tp * (c_nof_tp+1) / 2;
-  CONSTANT c_accu_nof_samples      : NATURAL := c_nof_tsub_per_sync;                  -- [ti]
-  CONSTANT c_inter_channel_delay   : NATURAL := g_inter_channel_delay;
+  CONSTANT c_nof_visibilities       : NATURAL := c_nof_tp * (c_nof_tp+1) / 2;
+  CONSTANT c_accu_nof_samples       : NATURAL := c_nof_tsub_per_sync;                  -- [ti]
+  CONSTANT c_inter_channel_delay    : NATURAL := g_inter_channel_delay;
 
-  CONSTANT c_in_dat_w             : NATURAL := c_wpfb_apertif_channels.fil_in_dat_w;  -- =  8 bit
-  CONSTANT c_in_complex_w         : NATURAL := c_nof_complex * c_in_dat_w;            -- = 16 bit, complex Im & Re
-  CONSTANT c_cor_out_dat_w        : NATURAL := 32;
+  CONSTANT c_in_dat_w               : NATURAL := c_wpfb_apertif_channels.fil_in_dat_w;  -- =  8 bit
+  CONSTANT c_in_complex_w           : NATURAL := c_nof_complex * c_in_dat_w;            -- = 16 bit, complex Im & Re
+  CONSTANT c_cor_out_dat_w          : NATURAL := 32;
   
-  -- . DUT output DB
-  CONSTANT c_vis_header_size          : NATURAL := 21;  -- (pad(2) + eth(14) + ip(20) + udp(8) + app_id(16) + app_flags(24)) / 4 = 84 bytes / 4 = 21 words
-  CONSTANT c_vis_payload_size         : NATURAL := c_nof_complex*c_nof_visibilities;
-  CONSTANT c_vis_packet_size          : NATURAL := c_vis_header_size + c_vis_payload_size;
+  -- . 1GbE output
+  CONSTANT c_packet_info              : t_apertif_unb1_correlator_packet_info := func_apertif_unb1_correlator_packet_info(c_nof_visibilities, f_link, f_dp_clk);
+  CONSTANT c_inter_channel_delay_min  : INTEGER := func_apertif_unb1_correlator_inter_channel_delay_min(c_packet_info, c_nof_visibilities);
+  CONSTANT c_inter_channel_delay_max  : INTEGER := func_apertif_unb1_correlator_inter_channel_delay_max(c_bg_block_period, c_bg_nof_blocks_per_sync, g_nof_beamlets, c_nof_channels, c_nof_visibilities);
+  
+  CONSTANT c_vis_header_size          : NATURAL := c_packet_info.vis_header_size;           -- = (pad(2) + eth(14) + ip(20) + udp(8) + app_id(16) + app_flags(24)) / 4 = 84 bytes / 4 = 21 words
+  CONSTANT c_vis_payload_size         : NATURAL := c_packet_info.vis_payload_size;          -- = c_nof_complex*c_nof_visibilities;
+  CONSTANT c_vis_packet_size          : NATURAL := c_packet_info.vis_packet_size;           -- = c_vis_header_size + c_vis_payload_size;
   CONSTANT c_vis_nof_packets_per_sync : NATURAL := g_nof_beamlets * c_nof_channels;
   CONSTANT c_vis_nof_data_per_sync    : NATURAL := c_vis_nof_packets_per_sync * c_vis_packet_size;
-  --CONSTANT c_db_sync_delay            : NATURAL := c_vis_nof_data_per_sync - c_vis_packet_size;  -- at last packet in sync interval
-  CONSTANT c_db_sync_delay            : NATURAL := 0;
     
-  -- . 1GbE output
   CONSTANT c_eth_check_nof_packets  : NATURAL := c_nof_cor_sync * c_nof_channels;
-  CONSTANT c_eth_header_size        : NATURAL := 21;  -- (pad(2) + eth(14) + ip(20) + udp(8) + app(16+24))/4 = 84 / 4 
-  CONSTANT c_udp_payload_size       : NATURAL := c_nof_complex * c_nof_visibilities;
-  CONSTANT c_eth_packet_size        : NATURAL := c_eth_header_size + c_udp_payload_size;
-
+  CONSTANT c_eth_runtime_timeout    : TIME := g_nof_bg_sync * c_bg_nof_clk_per_sync * c_dp_clk_period * 2;  -- factor 2 margin
+  SIGNAL dbg_c_eth_runtime_timeout  : TIME := c_eth_runtime_timeout;
                                      
+  -- . DUT output DB
+  --CONSTANT c_db_sync_delay            : NATURAL := c_vis_nof_data_per_sync - c_vis_packet_size;  -- at last packet in sync interval
+  CONSTANT c_db_sync_delay            : NATURAL := 0;
+  
+  -- Show parameter values for HW generics in Wave window for debugging purposes
+  CONSTANT c_hw_packet_info                  : t_apertif_unb1_correlator_packet_info := func_apertif_unb1_correlator_packet_info(300, 156.25, 200.0);
+  CONSTANT c_hw_inter_channel_delay_min      : INTEGER := func_apertif_unb1_correlator_inter_channel_delay_min(c_hw_packet_info, 300);
+  CONSTANT c_hw_inter_channel_delay_max_8bit : INTEGER := func_apertif_unb1_correlator_inter_channel_delay_max(256, 800000,  88, 64, 300);
+  CONSTANT c_hw_inter_channel_delay_max_6bit : INTEGER := func_apertif_unb1_correlator_inter_channel_delay_max(256, 800000, 120, 64, 300);
+
   -- Phasor: exp(j*angle) = cos(angle) + j*sin(angle)
   -- A complex FFT of N points has N bins or channels: ch = -N/2:0:N/2-1.
   -- To create an FFT input phasor with frequency in the middle of a channel use FREQ = ch.
@@ -372,6 +487,18 @@ ARCHITECTURE tb OF tb_node_apertif_unb1_correlator_processing_output IS
   SIGNAL dp_rst                   : STD_LOGIC := '1';
   SIGNAL verify_activated         : STD_LOGIC := '0';
   
+  -- Show parameter values for Tb generics in Wave window for debugging purposes
+  SIGNAL dbg_c_packet_info             : t_apertif_unb1_correlator_packet_info := func_apertif_unb1_correlator_packet_info(c_nof_visibilities, f_link, f_dp_clk);
+  SIGNAL dbg_c_inter_channel_delay     : INTEGER := c_inter_channel_delay;
+  SIGNAL dbg_c_inter_channel_delay_min : INTEGER := c_inter_channel_delay_min;
+  SIGNAL dbg_c_inter_channel_delay_max : INTEGER := c_inter_channel_delay_max;
+  
+  -- Show parameter values for HW generics in Wave window for debugging purposes
+  SIGNAL dbg_c_hw_packet_info                  : t_apertif_unb1_correlator_packet_info := c_hw_packet_info;
+  SIGNAL dbg_c_hw_inter_channel_delay_min      : INTEGER := c_hw_inter_channel_delay_min;
+  SIGNAL dbg_c_hw_inter_channel_delay_max_6bit : INTEGER := c_hw_inter_channel_delay_max_6bit;
+  SIGNAL dbg_c_hw_inter_channel_delay_max_8bit : INTEGER := c_hw_inter_channel_delay_max_8bit;
+
   SIGNAL bg_start                 : STD_LOGIC;
   SIGNAL bg_sync_cnt              : NATURAL := 0;
   SIGNAL cor_sync_cnt             : NATURAL;
@@ -418,6 +545,24 @@ BEGIN
   dp_rst <= '1', '0' AFTER c_dp_clk_period*7;
   mm_rst <= '1', '0' AFTER c_mm_clk_period*7;
 
+  p_check_inter_channel_delay : PROCESS
+    VARIABLE v_bool : BOOLEAN := TRUE;
+    --VARIABLE v_bool : BOOLEAN := FALSE;
+  BEGIN
+    WAIT FOR 1 us;
+    IF v_bool THEN
+      v_bool := func_apertif_unb1_correlator_verify_and_log_output_rate(c_inter_channel_delay,
+                                                                        c_bg_block_period,
+                                                                        c_nof_blocks_per_sync,
+                                                                        c_nof_beamlets,
+                                                                        c_nof_channels,
+                                                                        c_nof_visibilities,
+                                                                        f_link,
+                                                                        f_dp_clk);
+    END IF;
+    WAIT;
+  END PROCESS;
+    
   p_mm_stimuli : PROCESS
     CONSTANT c_tp_force_zero   : BOOLEAN := FALSE;  -- TRUE to understand corr_folder order of [tp_pair][pair], else FALSE
     VARIABLE c_tp_sel          : NATURAL := 0;     -- selected TP for force data parallel
@@ -636,9 +781,9 @@ BEGIN
   u_eth_statistics : ENTITY eth_lib.eth_statistics
     GENERIC MAP (
       g_runtime_nof_packets => c_eth_check_nof_packets,
-      g_runtime_timeout     => 1000 us,
+      g_runtime_timeout     => c_eth_runtime_timeout,
       g_check_nof_valid     => TRUE,
-      g_check_nof_valid_ref => c_eth_check_nof_packets*c_eth_packet_size
+      g_check_nof_valid_ref => c_eth_check_nof_packets*c_vis_packet_size
     )
   PORT MAP (  
     eth_serial_in => eth_sgout,
@@ -706,7 +851,7 @@ BEGIN
     VARIABLE v_rd_vis_im   : INTEGER;
     VARIABLE v_rd_timestamp    : STD_LOGIC_VECTOR(63 DOWNTO 0);
     VARIABLE v_exp_beamlet     : NATURAL;  -- interleaved beamlet index
-    VARIABLE v_exp_vis_channel : NATURAL;  -- channel index in visibility offload packet
+    VARIABLE v_exp_channel     : NATURAL;  -- channel index in visibility offload packet
     VARIABLE v_exp_vis_re      : INTEGER;
     VARIABLE v_exp_vis_im      : INTEGER;
     VARIABLE v_exp_timestamp   : STD_LOGIC_VECTOR(63 DOWNTO 0);
@@ -755,8 +900,8 @@ BEGIN
       v_exp_beamlet := v_exp_beamlet + vB * Q_interleave;                                                  -- offset serial beamlets
       FOR vC IN 0 TO c_nof_channels-1 LOOP
         v_addr := ((vB * c_nof_channels) + vC) * c_vis_packet_size;
-        v_exp_vis_channel := flip(vC, c_nof_channels_w);
-        v_exp_vis_channel := fft_shift(v_exp_vis_channel, c_nof_channels_w);
+        v_exp_channel := flip(vC, c_nof_channels_w);
+        v_exp_channel := fft_shift(v_exp_channel, c_nof_channels_w);
         -----------------------------------------------------------------------
         -- Verify header fields
         --    0 gap(16) + ETH dst mac hi(16)
@@ -807,8 +952,8 @@ BEGIN
         proc_mem_mm_bus_rd(v_addr + 12, mm_clk, ram_diag_data_buffer_output_miso, ram_diag_data_buffer_output_mosi);
         proc_mem_mm_bus_rd_latency(c_mem_reg_rd_latency, mm_clk);
         v_rd_channel := TO_UINT(ram_diag_data_buffer_output_miso.rddata(31 DOWNTO 16));
-        ASSERT v_rd_beamlet = v_exp_beamlet AND v_rd_channel = v_exp_vis_channel REPORT "Wrong beamlet, channel index in packet header: " &
-               "(" & int_to_str(v_exp_beamlet) & ", " & int_to_str(v_exp_vis_channel) & ") /= " &
+        ASSERT v_rd_beamlet = v_exp_beamlet AND v_rd_channel = v_exp_channel REPORT "Wrong beamlet, channel index in packet header: " &
+               "(" & int_to_str(v_exp_beamlet) & ", " & int_to_str(v_exp_channel) & ") /= " &
                "(" & int_to_str(v_rd_beamlet) & ", " & int_to_str(v_rd_channel) & ")" SEVERITY ERROR;
 
         -- . timestamp (= bsn)
@@ -866,10 +1011,10 @@ BEGIN
           END IF;
           -- . verify
           ASSERT v_rd_vis_re = v_exp_vis_re REPORT "Wrong real visibility for beamlet, channel index in packet payload " &
-                 "(" & int_to_str(v_exp_beamlet) & ", " & int_to_str(v_exp_vis_channel) & ") : " &
+                 "(" & int_to_str(v_exp_beamlet) & ", " & int_to_str(v_exp_channel) & ") : " &
                  int_to_str(v_rd_vis_re) & " /= " & int_to_str(v_exp_vis_re) SEVERITY ERROR;
           ASSERT v_rd_vis_im = v_exp_vis_im REPORT "Wrong imag visibility for beamlet, channel index in packet payload " &
-                 "(" & int_to_str(v_exp_beamlet) & ", " & int_to_str(v_exp_vis_channel) & ") : " &
+                 "(" & int_to_str(v_exp_beamlet) & ", " & int_to_str(v_exp_channel) & ") : " &
                  int_to_str(v_rd_vis_im) & " /= " & int_to_str(v_exp_vis_im) SEVERITY ERROR;
         END LOOP;
       END LOOP;