diff --git a/applications/apertif/designs/apertif_unb1_correlator/tb/vhdl/tb_node_apertif_unb1_correlator_processing_output.vhd b/applications/apertif/designs/apertif_unb1_correlator/tb/vhdl/tb_node_apertif_unb1_correlator_processing_output.vhd index 056a77b791c8027aa0852bf3f55349900f6b347d..9349f8312a3889ad63e6e15b3fb7c2eb43a7bd87 100644 --- a/applications/apertif/designs/apertif_unb1_correlator/tb/vhdl/tb_node_apertif_unb1_correlator_processing_output.vhd +++ b/applications/apertif/designs/apertif_unb1_correlator/tb/vhdl/tb_node_apertif_unb1_correlator_processing_output.vhd @@ -25,6 +25,26 @@ -- and DB in node_apertif_unb1_correlator_output -- -- Desription: +-- A) This tb verifies: +-- +-- 1) p_check_inter_channel_delay: Verify that g_inter_channel_delay is in min +-- max range +-- 2) p_mm_stimuli : set offload src MAC and dest MAC +-- p_mm_diag_data_buffer_output : verify offload src MAC, dest MAC, and +-- BSN timestamp, channel and beamlet +-- 3) p_mm_stimuli : set phasor waveform into BG and start BG +-- 3a)p_verify_cor_src_out: verify expected channel data at cor_src_out +-- interface for g_use_prefilter, g_use_prefilter_ones +-- 3b)p_mm_diag_data_buffer_output :verify expected channel data at offload +-- interface for g_use_prefilter, g_use_prefilter_ones +-- 4) u_eth_statistics : verify number of offloaded visibility packet data +-- +-- * Test 3a) is also covered by tb_node_apertif_unb1_correlator.vhd. +-- * Test 2) is also covered by tb_apertif_unb1_correlator_nodes.vhd using +-- file IO. +-- * Test 4) is also covered by tb_apertif_unb1_correlator_nodes.vhd. +-- +-- B) Data formats: -- The beamlet data after the mesh for all Nband*Ndest = 16*8 = 128 PN -- in the Apertif Correlator together is described by: -- @@ -69,94 +89,175 @@ -- . pair : Q_interleave -- . tp_pair : tp MOD Q_interleave -- +-- C) Maximum and minumum inter channel delay for visibility offload -- * c_inter_channel_delay -- With no inter channel output delay, channels are output back-to-back, creating a short full valid burst of 300 visibilities --- per channel for 64 channels per beamlet. This yields c_nof_complex * cor_out_dat_w * f_clk = 2 * 32 * 200M = 12.8 Gbps in burst. +-- per channel for 64 channels per beamlet. This yields c_nof_complex * c_cor_out_dat_w * f_clk = 2 * 32 * 200M = 12.8 Gbps in burst. +-- The link capacity reduces towards the output and is less than: +-- a) 12.8 Gbps = 200 MHz * 64 bit at node_apertif_unb1_correlator_processing cor_src_out sosi.re/im complex data output +-- b) 4 Gbps = 125 MHz * 32 bit at node_apertif_unb1_correlator_output sosi.data packet data output +-- c) 1 Gbps = at 1GbE interface of one node as defined by udp_tx_snk_in_arr with udp_tx_src_out_arr flow control +-- d) 250 Mbps = 1 Gbps / 4 at 1GbE interface of UniBoard with 8 nodes and one switch with 2 output ports +-- e) 156.25 Mbps = 20 Gbps / 128 at 2 * 10GbE interface of the Data Writer for 16 UniBoard each with 8 nodes so N_PN = 128 nodes -- -- * Set c_inter_channel_delay and g_bg_duty_cycle_gap in simulation with 1 PN and 1 GbE link: -- In the tb with c_nof_tp=2*2=4 there are c_nof_visibilities=10 visibilities, g_nof_beamlets=2 and c_nof_channels=64. Hence the -- number of output bits per BG sync interval is: -- --- c_nof_complex * cor_out_dat_w * c_nof_visibilities * c_nof_channels * g_nof_beamlets = 2 * 32 * 10 * 64 * 2 --- = 81920 bits = 1280 complex words +-- g_nof_beamlets * c_nof_channels * c_nof_visibilities * c_nof_complex * c_cor_out_dat_w = 2 * 64 * 10 * 2 * 32b = +-- = 81920 bits = 1280 complex words +-- . c_nof_channels = c_nof_points = 64 +-- . c_nof_blocks_per_sync = c_nof_tsub_per_sync = c_nof_tchan_per_sync * c_nof_channels +-- . c_nof_frames_per_sync = c_nof_blocks_per_sync +-- . c_frame_size = g_nof_beamlets +-- . c_bg_block_period = c_frame_size * Q_interleave + c_bg_duty_cycle_gap +-- +-- Typical for Apertif correlator the c_bg_block_period = N_clk = 256 dp_clk cycles. To speed up simulation c_bg_duty_cycle_gap +-- can be set as small as possible: +-- +-- c_bg_nof_clk_per_sync = c_nof_blocks_per_sync * c_bg_block_period = 640 * (2*2 + 0) = 2560 dp_clk cycles minimal +-- +-- The minimal BG sync interval in the tb takes c_bg_nof_clk_per_sync = 2560 dp_clk cycles, so g_bg_duty_cycle_gap = 0 can be used, +-- because 2560 > 1280 at the cor_src_out sosi.re/im interface that can support < 12.8 Gbps. However the data rate at the 1GbE +-- serial PHY interface must remain < 1Gbps. Per channel there are: -- --- The BG sync interval in the tb takes c_bg_nof_clk_per_sync = c_nof_blocks_per_sync * c_bg_block_period = 960 * 4 = 3840 clk --- cycles, so g_bg_duty_cycle_gap=0 can be used, because 3840 > 1280 at the cor_src_out sosi.re/im interface. --- The data rate at the udp_tx_snk_in_arr(0).re/im interface is 125MHz * 32b = 4 Gbps. However at the 1GbE serial PHY interface --- the burst data rate must remain < 1Gbps. Per channel there are: +-- c_vis_header_size = 21 words +-- c_vis_payload_size = c_nof_visibilities * c_nof_complex = 10 * 2 = 20 words +-- c_vis_packet_size = c_vis_header_size + c_vis_payload_size = 21 + 20 = 41 words -- --- gap size = c_network_eth_gap_len * 8b = 96 bits for some idle time between packets --- header size = 21 * 32b = 672 bits --- tail size = 1 * 32b = 32 bits --- payload size = c_nof_complex * cor_out_dat_w * c_nof_visibilities = 2 * 32 * 10 = 640 bits --- packet size = gap size + header size + payload size + tail size = 96 + 672 + 640 + 32 = 1440 bits +-- c_vis_nof_packets_per_sync = g_nof_beamlets * c_nof_channels = 2 * 64 = 128 -- --- The transport over 1GbE takes at least 1440 ns @ 1Gbps. One dp_clk cycle is 5 ns (= 1/f_clk = 1/200M), so 1440 / 5 = 288 --- dp_clk cycles. The visibilities arrive in c_nof_visibilities=10 dp_clk cycles in cor_src_out. Therefore: +-- c_tail_size = 1 word +-- c_gap_size = c_network_eth_gap_len * 8b / 32b = 3 words for some idle time between packets +-- c_vis_packet_load = (c_vis_packet_size + c_tail_size + c_gap_size) * 32b = (41 + 1 + 3) * 32b = 45 * 32b = 1440 bits -- --- c_inter_channel_delay must be > 288 - c_nof_visibilities = 288 - 10 = 278 --> 279 +-- The transport of one packet over 1GbE takes at least c_vis_packet_load / f_link = 1440 ns (f_link = 1Gbps). One dp_clk cycle +-- is 5 ns (= 1/f_clk = 1/200M), so: +-- +-- c_vis_packet_nof_dp_clk = c_vis_packet_load * f_clk / f_link = 1440 * 200/1000 = 288 dp_clk cycles -- --- However with an c_inter_channel_delay the total offload time per sync interval becomes about: +-- The visibilities per packet arrive in c_nof_visibilities = 10 dp_clk cycles at the cor_src_out sosi.re/im interface. The +-- c_inter_channel_delay is applied at this interface. Therefore the minimum c_inter_channel_delay is: -- --- c_inter_channel_delay * c_nof_channels * g_nof_beamlets = 279 * 64 * 2 = 35712 dp_clk cycles +-- c_inter_channel_delay >= c_vis_packet_nof_dp_clk - c_nof_visibilities = 288 - 10 = 278 --> 279 dp_clk cycles -- --- This is > BG sync interval of 3840 dp_clk cycles with g_bg_duty_cycle_gap=0. Therefore g_bg_duty_cycle_gap needs to be +-- With this minimum c_inter_channel_delay the total offload time per sync interval becomes about: +-- +-- c_vis_nof_packets_per_sync * c_vis_packet_nof_dp_clk = (2 * 64) * 288 = 36864 dp_clk cycles +-- +-- This is > BG sync interval of 2560 dp_clk cycles with g_bg_duty_cycle_gap = 0. Therefore g_bg_duty_cycle_gap needs to be -- increased to: -- --- c_bg_block_period = c_bg_nof_clk_per_sync / c_nof_blocks_per_sync = 35712 / 640 = 55.8 --> 56 --- g_bg_duty_cycle_gap = c_bg_block_period - c_frame_size * Q_interleave = 56 - 2*2 = 52 +-- c_bg_block_period = c_bg_nof_clk_per_sync / c_nof_blocks_per_sync = 36864 / 640 = 57.6 --> 58 +-- g_bg_duty_cycle_gap = c_bg_block_period - c_frame_size * Q_interleave = 58 - 2*2 = 54 +-- +-- Therefore in combination with minimum c_inter_channel_delay = 279 use g_bg_duty_cycle_gap >= 54 to make it work for f_link = +-- 1Gbps. The maximum g_bg_duty_cycle_gap = N_clk - c_frame_size * Q_interleave = 256 - 88 * 2 = 80 and this is also the typical +-- g_bg_duty_cycle_gap for the Apertif correlator, because N_clk = 256 is fixed. Hence the typical BG sync interval is: +-- +-- c_bg_block_period = N_clk = 256 +-- c_bg_nof_clk_per_sync = c_nof_blocks_per_sync * c_bg_block_period = 640 * 256 = 163840 dp_clk cycles typical +-- +-- The visibility offload must finish within this BG sync interval, hence the maximum number of dp_clk cycles per visibility +-- packet is: -- --- Therefore in combination with c_inter_channel_delay=279 use g_bg_duty_cycle_gap >= 52 to make it work for 1GbE @ 1Gbps. +-- c_vis_packet_offload_nof_dp_clk <= c_bg_nof_clk_per_sync / c_vis_nof_packets_per_sync = 163840 / (2 * 64) = 1280 -- --- To make it work with N_PN = 128 nodes and 2 * 10GbE links to the data writer the c_inter_channel_delay needs to be larger --- to reduce the rate per 1GbE link to < 20 Gbps / 128 = 156.25 Mbps. +-- Therefore the maximum c_inter_channel_delay applied at the cor_src_out sosi.re/im interface becomes: +-- +-- c_inter_channel_delay <= c_vis_packet_offload_nof_dp_clk - c_nof_visibilities = 1280 - 10 = 1270 +-- +-- * Correlator constraint on c_nof_tchan_per_sync: +-- The correlator.vhd has the constraint that c_nof_tchan_per_sync >= c_nof_mult = c_nof_visibilities / 2**c_nof_pre_mult_folds. +-- Therefore choose c_nof_tchan_per_sync >= 300 / 2**1 = 150. -- -- * Set c_inter_channel_delay on hardware with 128 PN and 2 10GbE links: -- In total there are c_nof_beamlets * c_nof_channels * c_nof_visibilities per t_int = 1.024 sec, so 88 * 64 * 300 * 2 * 32b / 1.024s = -- 105.6 Mbps on average (8 bit mode with 88 beamlets) or 144 Mbps (6 bit mode with 120 beamlets), which can run over one 1GbE --- link. The total Apertif correlator output rate for N_PN = 128 nodes to the Apertif data writer is 128 * 105.6M = 13.5168 Gbps --- respectively 18.432 Gbps, which can run over two 10GbE links. --- --- We need to set an appropriate number of c_inter_channel_delay cycles for a constant visibility buffer output rate. The --- correlator outputs c_nof_beamlets * c_nof_channels = 88 * 64 = 5632 respectively 120 * 64 = 7680 blocks of c_nof_visibilities = --- 300 visibility samples per t_int = N_int / f_sub = 1.024 s = 204.8M dp_clk cycles @ f_clk = 200 MHz. Hence per block there --- are maximum 36363 or 26666 dp_clk cycles available. The block itself takes c_nof_visibilities = 300 dp_clk cycles. Hence the --- maximum c_inter_channel_delay becomes 36363 - 300 = 36063 or 26666 - 300 = 26066 dp_clk cycles. --- The minimum c_inter_channel_delay depends on the capacity of the two 10GbE links. Using the total Apertif correlator output --- this yields about 13.5168G/20GbE * max(c_inter_channel_delay) = 24373 for 88 beamlets respectively 24022 for 120 beamlets. --- --- In formula: +-- link. The total Apertif correlator output rate for N_PN = 128 nodes to the Apertif data writer is 128 * 105.6M = 13.5168 Gbps so +-- 13.5168/20 = 0.676 utilization, respectively 128 * 144M = 18.432 Gbps so 18.432/20 = 0.922 utilization, which can run over two +-- 10GbE links. +-- +-- We need to set an appropriate number of c_inter_channel_delay cycles for a constant visibility buffer output rate. +-- . The maximum c_inter_channel_delay is determined by the number of cycles per sync interval divided by the number of +-- visibility packets per sync interval. The correlator outputs c_vis_nof_packets_per_sync = c_nof_beamlets * c_nof_channels +-- = 88 * 64 = 5632 respectively 120 * 64 = 7680 blocks of c_nof_visibilities = 300 visibility samples per +-- t_int = N_int / f_sub = 1.024 s = 204.8M dp_clk cycles @ f_clk = 200 MHz. Hence per block there are maximum +-- 204.8M / 5632 = 36363 or 204.8M / 7680 = 26666 dp_clk cycles available. The block itself takes c_nof_visibilities +-- = 300 dp_clk cycles. Hence the maximum c_inter_channel_delay becomes 36363 - 300 = 36063 or 26666 - 300 = 26066 +-- dp_clk cycles. +-- . The minimum c_inter_channel_delay is determined by the available data rate f_link and the processing clock rate of dp_clk. +-- Important to note is that the burst rate is independent of the packet size, however the minimum inter_channel_delay is +-- dependent on the packet size, because the inter_channel_delay is applied between packets (so not between samples). Therefore +-- the minimum c_inter_channel_delay is also determined by the number bits per visibility packet and the number of dp_clk +-- cycles to process a visibility packet. Per PN the link capacity is f_link = 20G/128 = 156.25 Mbps. +-- The c_vis_packet_load = (21 + 300*2 + 1 +3) * 32b = 20000 bits per packet. With f_clk = 200 MHz, so 5 ns period, this +-- corresponds to c_vis_packet_nof_dp_clk = c_vis_packet_load * f_clk / f_link = 20000 * 200M / 156.25 M = 25600 +-- dp_clk cycles. Internally one visibility packet is handled in c_nof_visibilities = 300 dp_clk cycles, so the minimum +-- c_inter_channel_delay = 25600 - 300 = 25300 dp_clk cycles and is independent of the beamlet bit mode, so the same for +-- 8 bit and 6 bit. +-- . As check the ratio between min and max c_inter_channel_delay is (25300 + 300) / (36363 + 300) = 0.698 for 8 bit beamlet +-- mode and (25300 + 300) / (26666 + 300) = 0.949 for 6 bit beamlet mode. +-- +-- In formula with HW parameter values: -- . N_clk = f_clk / f_sub = 200M / 781250 = 256 -- . N_int = c_nof_tsub_per_sync = c_nof_tchan_per_sync*c_nof_points = 12500 * 64 = 800000 -- . c_nof_clk_per_sync = N_int * N_clk = 204.8M --- . c_nof_visibility_blocks = c_nof_beamlets * c_nof_channels = 5632 for c_nof_beamlets = 88 and c_nof_channels = 64 --- = 7680 for c_nof_beamlets = 120 and c_nof_channels = 64 +-- . c_vis_nof_packets_per_sync = c_nof_beamlets * c_nof_channels = 5632 for c_nof_beamlets = 88 and c_nof_channels = 64 +-- = 7680 for c_nof_beamlets = 120 and c_nof_channels = 64 -- . c_nof_visibilities = 300 = (24*25)/2 for c_nof_tp = 24 --- c_inter_channel_delay = c_nof_clk_per_sync / c_nof_visibility_blocks - c_nof_visibilities = 204.8M / 5632 - 300 = 36063 --- = 204.8M / 7680 - 300 = 26066 --- --- In simulation the N_int needs to be set much smaller than 800000, to shorten the simulation time as much as possible. --- When c_nof_beamlets is 2 and c_nof_tp = 4 then: --- . N_clk = 256 --- . N_int = c_nof_tsub_per_sync = c_nof_tchan_per_sync*c_nof_points = 10 * 64 --- . c_nof_clk_per_sync = N_int * N_clk = 163840 --- . c_nof_visibility_blocks = 128 for c_nof_beamlets = 2 and c_nof_channels = 64 --- . c_nof_visibilities = 10 = ( 4* 5)/2 for c_nof_tp = 4 --- c_inter_channel_delay = 163840 / 128 - 10 = 1270 > 0 so this is possible even without throttle BG xon --- Clearly the c_nof_beamlets has more impact than the c_nof_tp. --- --- When c_nof_beamlets is 88 and c_nof_tp = 4 then: --- . N_int = c_nof_tsub_per_sync = c_nof_tchan_per_sync*c_nof_points = 10 * 64 --- . c_nof_clk_per_sync = N_int * N_clk = 163840 --- . c_nof_visibility_blocks = 5632 for c_nof_beamlets = 88 and c_nof_channels = 64 --- . c_nof_visibilities = 10 = ( 4* 5)/2 for c_nof_tp = 4 --- c_inter_channel_delay = 163840 / 5632 - 10 = 19 > 0, so possible. --- --- If c_nof_tp is increased also then BG xon can be kept active if c_nof_tchan_per_sync is increased also. For --- c_nof_beamlets = 88 and c_nof_tp = 24 get c_nof_clk_per_sync >= c_nof_visibility_blocks * c_nof_visibilities = 5632 * 300 = 1689600 --- and c_nof_tchan_per_sync = c_nof_clk_per_sync / N_clk / c_nof_points = 1689600 / 256 / 64 = 103, so choose 110. However --- from the correlator.vhd the constraint is c_nof_tchan_per_sync >= c_nof_mult = c_nof_visibilities / 2**c_nof_pre_mult_folds. --- Therefore choose c_nof_tchan_per_sync >= 300 / 2**1 = 150. +-- . f_link = 156.25M +-- . maximum c_inter_channel_delay = c_nof_clk_per_sync / c_vis_nof_packets_per_sync - c_nof_visibilities +-- = 204.8M / 5632 - 300 = 36063 = c_hw_inter_channel_delay_max_8bit +-- = 204.8M / 7680 - 300 = 26366 = c_hw_inter_channel_delay_max_6bit +-- . minimum c_inter_channel_delay = c_vis_packet_nof_dp_clk - c_nof_visibilities +-- = (25 + 300*2) * 32b * 200M/156.25M - 300 = 25300 = c_hw_inter_channel_delay_min +-- +-- In simulation: +-- The N_int needs to be set much smaller than 800000, to shorten the simulation time as much as possible. +-- When c_nof_beamlets is 2 and c_nof_tp = 4 then: +-- . c_bg_block_period = N_clk = 256 +-- . c_nof_visibilities = 10 = (4* 5)/2 for c_nof_tp = 4 +-- . N_int = c_nof_tsub_per_sync = c_nof_tchan_per_sync*c_nof_points = 10 * 64 = 640 +-- . c_nof_clk_per_sync = N_int * c_bg_block_period = 640 * 256 = 163840 +-- . c_vis_nof_packets_per_sync = 128 for c_nof_beamlets = 2 and c_nof_channels = 64 +-- . f_link = 1G, in simulation with one PN use full 1GbE capacity +-- . maximum c_inter_channel_delay = c_nof_clk_per_sync / c_vis_nof_packets_per_sync - c_nof_visibilities +-- = 163840 / 128 - 10 = 1270 +-- . minimum c_inter_channel_delay = c_vis_packet_nof_dp_clk - c_nof_visibilities = +-- = (25 + 10*2) * 32b * 200M/1G - 10 = 278 +-- Trial simulations show: +-- <= between 230 and 240 it fails because FIFO in node output overflows +-- >= between 280 and 290 it fails because FIFO in corr_visibility_buffer overflows +-- Possibly the min value 278 and max value 280 are correct, but the FIFO overflow takes some time to occur, +-- so the simulation is to short to sharpely reveal the min and max of c_inter_channel_delay. +-- +-- The fastest simulation is achieved if the c_inter_channel_delay min and max are almost +-- equal. The maximum c_inter_channel_delay can be reduced by reducing the c_bg_block_period +-- via g_bg_duty_cycle_gap, this then yields g_bg_duty_cycle_gap = 54 as shown above. To +-- check with g_bg_duty_cycle_gap = 54 and c_nof_beamlets is 2 and c_nof_tp = 4 then: +-- . c_bg_block_period = c_frame_size * Q_interleave + c_bg_duty_cycle_gap = 2*2+ 54 = 58 +-- . c_nof_visibilities = 10 = (4* 5)/2 for c_nof_tp = 4 +-- . N_int = c_nof_tsub_per_sync = c_nof_tchan_per_sync*c_nof_points = 10 * 64 = 640 +-- . c_nof_clk_per_sync = N_int * c_bg_block_period = 640 * 58 = 37120 +-- . c_vis_nof_packets_per_sync = 128 for c_nof_beamlets = 2 and c_nof_channels = 64 +-- . f_link = 1G, in simulation with one PN use full 1GbE capacity +-- . maximum c_inter_channel_delay = c_nof_clk_per_sync / c_vis_nof_packets_per_sync - c_nof_visibilities +-- = 37120 / 128 - 10 = 280 +-- . minimum c_inter_channel_delay = c_vis_packet_nof_dp_clk - c_nof_visibilities = +-- = (25 + 10*2) * 32b * 200M/1G - 10 = 278, so choose 279 to fit within min and max + +-- When c_nof_beamlets is 2 and c_nof_tp = 24 then: +-- . c_bg_block_period = N_clk = 256 +-- . c_nof_visibilities = 300 = (24*25)/2 for c_nof_tp = 24 +-- . N_int = c_nof_tsub_per_sync = c_nof_tchan_per_sync*c_nof_points = 160 * 64 = 10240, where correlator constraint 160 > 300/2 +-- . c_nof_clk_per_sync = N_int * c_bg_block_period = 2621440 +-- . c_vis_nof_packets_per_sync = 128 for c_nof_beamlets = 2 and c_nof_channels = 64 +-- . f_link = 1G, in simulation with one PN use full 1GbE capacity +-- . maximum c_inter_channel_delay = c_nof_clk_per_sync / c_vis_nof_packets_per_sync - c_nof_visibilities +-- = 2621440 / 128 - 10 = 20470 +-- . minimum c_inter_channel_delay = c_vis_packet_nof_dp_clk - c_nof_visibilities = +-- = (25 + 300*2) * 32b * 200M/1G - 300 = 3700 -- -- Usage: -- > as 10 @@ -185,24 +286,25 @@ USE dp_lib.dp_stream_pkg.ALL; USE diag_lib.diag_pkg.ALL; USE fft_lib.fft_pkg.ALL; USE wpfb_lib.wpfb_pkg.ALL; +USE work.apertif_unb1_correlator_pkg.ALL; ENTITY tb_node_apertif_unb1_correlator_processing_output IS GENERIC ( g_tb_end : BOOLEAN := TRUE; -- when TRUE then tb_end ends this simulation, else a higher multi-testbench will end the simulation - g_tb_index : NATURAL := 0; -- use different index to avoid MM file conflict in multi tb g_nof_bg_sync : NATURAL := 2; -- [t] number of BG sync intervals, is number ofintegration intervals -- DUT settings g_nof_pn : NATURAL := 2; -- number of PN, choose >= 1 g_nof_10G : NATURAL := 2; -- number of 10G input per PN, choose >= 2 and even (to fit Q_interleave=2) g_nof_beamlets : NATURAL := 2; -- [bu_i], is 88 in 8bit, 120 in 6bit beamlet mode g_nof_tchan_per_sync : NATURAL := 10; -- [t_c], is 12500, choose > number of taps of WPFB to simulate longer than the FIR impulse response - g_inter_channel_delay : NATURAL := 279; -- throttle correlator output + g_inter_channel_delay : NATURAL := 279; -- throttle correlator output, for expected 279: g_use_wpfb : BOOLEAN := TRUE; g_use_prefilter : BOOLEAN := TRUE; g_use_prefilter_ones : BOOLEAN := FALSE; -- BG settings - g_bg_duty_cycle_gap : NATURAL := 56; -- 0 or small for faster simulation, use N_clk - Q_interleave*g_nof_beamlets for HW + g_bg_duty_cycle_gap : NATURAL := 54; -- small for faster simulation, use N_clk - Q_interleave*g_nof_beamlets for HW + --g_bg_duty_cycle_gap : NATURAL := 256; -- use 256 to default to N_clk - Q_interleave*g_nof_beamlets for HW g_phasor_ampl : REAL := 0.25; -- range 0:1 where by 1.0 corresponds to maximum +amplitude, use <= 1.0 / 2**c_wpfb_fil_in_backoff_w g_phasor_freq : REAL := 1.0; -- for N=64 point FFT choose channel in range -32.0 : 0.0 : 31.0 g_phasor_phase : REAL := 0.0 -- [0:2pi>, has nearly no effect on correlator output, because all inputs use the same phasor @@ -221,6 +323,10 @@ ARCHITECTURE tb OF tb_node_apertif_unb1_correlator_processing_output IS CONSTANT c_dp_clk_period : TIME := 5 ns; CONSTANT c_cross_clock_domain_latency : NATURAL := 20; + CONSTANT f_link : REAL := 1000.0; -- 1 Gbps + --CONSTANT f_link : REAL := 156.25; + CONSTANT f_dp_clk : REAL := 200.0; -- 200 MHz + -- UniBoard CONSTANT c_unb_nr : NATURAL := 14; -- Uniboard 0:15 CONSTANT c_fpga_nr : NATURAL := 5; -- FPGA 0:7 on a UniBoard, FPGA 0:3 = FN 0:3, FPGA 4:7 = BN 0:3 @@ -269,32 +375,41 @@ ARCHITECTURE tb OF tb_node_apertif_unb1_correlator_processing_output IS CONSTANT c_bg_nof_clk_per_sync : NATURAL := c_nof_blocks_per_sync * c_bg_block_period; -- . correlator DUT input - CONSTANT c_nof_tp : NATURAL := g_nof_pn * g_nof_10G; -- number of telescope paths + CONSTANT c_nof_tp : NATURAL := g_nof_pn * g_nof_10G; -- number of telescope paths - CONSTANT c_nof_visibilities : NATURAL := c_nof_tp * (c_nof_tp+1) / 2; - CONSTANT c_accu_nof_samples : NATURAL := c_nof_tsub_per_sync; -- [ti] - CONSTANT c_inter_channel_delay : NATURAL := g_inter_channel_delay; + CONSTANT c_nof_visibilities : NATURAL := c_nof_tp * (c_nof_tp+1) / 2; + CONSTANT c_accu_nof_samples : NATURAL := c_nof_tsub_per_sync; -- [ti] + CONSTANT c_inter_channel_delay : NATURAL := g_inter_channel_delay; - CONSTANT c_in_dat_w : NATURAL := c_wpfb_apertif_channels.fil_in_dat_w; -- = 8 bit - CONSTANT c_in_complex_w : NATURAL := c_nof_complex * c_in_dat_w; -- = 16 bit, complex Im & Re - CONSTANT c_cor_out_dat_w : NATURAL := 32; + CONSTANT c_in_dat_w : NATURAL := c_wpfb_apertif_channels.fil_in_dat_w; -- = 8 bit + CONSTANT c_in_complex_w : NATURAL := c_nof_complex * c_in_dat_w; -- = 16 bit, complex Im & Re + CONSTANT c_cor_out_dat_w : NATURAL := 32; - -- . DUT output DB - CONSTANT c_vis_header_size : NATURAL := 21; -- (pad(2) + eth(14) + ip(20) + udp(8) + app_id(16) + app_flags(24)) / 4 = 84 bytes / 4 = 21 words - CONSTANT c_vis_payload_size : NATURAL := c_nof_complex*c_nof_visibilities; - CONSTANT c_vis_packet_size : NATURAL := c_vis_header_size + c_vis_payload_size; + -- . 1GbE output + CONSTANT c_packet_info : t_apertif_unb1_correlator_packet_info := func_apertif_unb1_correlator_packet_info(c_nof_visibilities, f_link, f_dp_clk); + CONSTANT c_inter_channel_delay_min : INTEGER := func_apertif_unb1_correlator_inter_channel_delay_min(c_packet_info, c_nof_visibilities); + CONSTANT c_inter_channel_delay_max : INTEGER := func_apertif_unb1_correlator_inter_channel_delay_max(c_bg_block_period, c_bg_nof_blocks_per_sync, g_nof_beamlets, c_nof_channels, c_nof_visibilities); + + CONSTANT c_vis_header_size : NATURAL := c_packet_info.vis_header_size; -- = (pad(2) + eth(14) + ip(20) + udp(8) + app_id(16) + app_flags(24)) / 4 = 84 bytes / 4 = 21 words + CONSTANT c_vis_payload_size : NATURAL := c_packet_info.vis_payload_size; -- = c_nof_complex*c_nof_visibilities; + CONSTANT c_vis_packet_size : NATURAL := c_packet_info.vis_packet_size; -- = c_vis_header_size + c_vis_payload_size; CONSTANT c_vis_nof_packets_per_sync : NATURAL := g_nof_beamlets * c_nof_channels; CONSTANT c_vis_nof_data_per_sync : NATURAL := c_vis_nof_packets_per_sync * c_vis_packet_size; - --CONSTANT c_db_sync_delay : NATURAL := c_vis_nof_data_per_sync - c_vis_packet_size; -- at last packet in sync interval - CONSTANT c_db_sync_delay : NATURAL := 0; - -- . 1GbE output CONSTANT c_eth_check_nof_packets : NATURAL := c_nof_cor_sync * c_nof_channels; - CONSTANT c_eth_header_size : NATURAL := 21; -- (pad(2) + eth(14) + ip(20) + udp(8) + app(16+24))/4 = 84 / 4 - CONSTANT c_udp_payload_size : NATURAL := c_nof_complex * c_nof_visibilities; - CONSTANT c_eth_packet_size : NATURAL := c_eth_header_size + c_udp_payload_size; - + CONSTANT c_eth_runtime_timeout : TIME := g_nof_bg_sync * c_bg_nof_clk_per_sync * c_dp_clk_period * 2; -- factor 2 margin + SIGNAL dbg_c_eth_runtime_timeout : TIME := c_eth_runtime_timeout; + -- . DUT output DB + --CONSTANT c_db_sync_delay : NATURAL := c_vis_nof_data_per_sync - c_vis_packet_size; -- at last packet in sync interval + CONSTANT c_db_sync_delay : NATURAL := 0; + + -- Show parameter values for HW generics in Wave window for debugging purposes + CONSTANT c_hw_packet_info : t_apertif_unb1_correlator_packet_info := func_apertif_unb1_correlator_packet_info(300, 156.25, 200.0); + CONSTANT c_hw_inter_channel_delay_min : INTEGER := func_apertif_unb1_correlator_inter_channel_delay_min(c_hw_packet_info, 300); + CONSTANT c_hw_inter_channel_delay_max_8bit : INTEGER := func_apertif_unb1_correlator_inter_channel_delay_max(256, 800000, 88, 64, 300); + CONSTANT c_hw_inter_channel_delay_max_6bit : INTEGER := func_apertif_unb1_correlator_inter_channel_delay_max(256, 800000, 120, 64, 300); + -- Phasor: exp(j*angle) = cos(angle) + j*sin(angle) -- A complex FFT of N points has N bins or channels: ch = -N/2:0:N/2-1. -- To create an FFT input phasor with frequency in the middle of a channel use FREQ = ch. @@ -372,6 +487,18 @@ ARCHITECTURE tb OF tb_node_apertif_unb1_correlator_processing_output IS SIGNAL dp_rst : STD_LOGIC := '1'; SIGNAL verify_activated : STD_LOGIC := '0'; + -- Show parameter values for Tb generics in Wave window for debugging purposes + SIGNAL dbg_c_packet_info : t_apertif_unb1_correlator_packet_info := func_apertif_unb1_correlator_packet_info(c_nof_visibilities, f_link, f_dp_clk); + SIGNAL dbg_c_inter_channel_delay : INTEGER := c_inter_channel_delay; + SIGNAL dbg_c_inter_channel_delay_min : INTEGER := c_inter_channel_delay_min; + SIGNAL dbg_c_inter_channel_delay_max : INTEGER := c_inter_channel_delay_max; + + -- Show parameter values for HW generics in Wave window for debugging purposes + SIGNAL dbg_c_hw_packet_info : t_apertif_unb1_correlator_packet_info := c_hw_packet_info; + SIGNAL dbg_c_hw_inter_channel_delay_min : INTEGER := c_hw_inter_channel_delay_min; + SIGNAL dbg_c_hw_inter_channel_delay_max_6bit : INTEGER := c_hw_inter_channel_delay_max_6bit; + SIGNAL dbg_c_hw_inter_channel_delay_max_8bit : INTEGER := c_hw_inter_channel_delay_max_8bit; + SIGNAL bg_start : STD_LOGIC; SIGNAL bg_sync_cnt : NATURAL := 0; SIGNAL cor_sync_cnt : NATURAL; @@ -418,6 +545,24 @@ BEGIN dp_rst <= '1', '0' AFTER c_dp_clk_period*7; mm_rst <= '1', '0' AFTER c_mm_clk_period*7; + p_check_inter_channel_delay : PROCESS + VARIABLE v_bool : BOOLEAN := TRUE; + --VARIABLE v_bool : BOOLEAN := FALSE; + BEGIN + WAIT FOR 1 us; + IF v_bool THEN + v_bool := func_apertif_unb1_correlator_verify_and_log_output_rate(c_inter_channel_delay, + c_bg_block_period, + c_nof_blocks_per_sync, + c_nof_beamlets, + c_nof_channels, + c_nof_visibilities, + f_link, + f_dp_clk); + END IF; + WAIT; + END PROCESS; + p_mm_stimuli : PROCESS CONSTANT c_tp_force_zero : BOOLEAN := FALSE; -- TRUE to understand corr_folder order of [tp_pair][pair], else FALSE VARIABLE c_tp_sel : NATURAL := 0; -- selected TP for force data parallel @@ -636,9 +781,9 @@ BEGIN u_eth_statistics : ENTITY eth_lib.eth_statistics GENERIC MAP ( g_runtime_nof_packets => c_eth_check_nof_packets, - g_runtime_timeout => 1000 us, + g_runtime_timeout => c_eth_runtime_timeout, g_check_nof_valid => TRUE, - g_check_nof_valid_ref => c_eth_check_nof_packets*c_eth_packet_size + g_check_nof_valid_ref => c_eth_check_nof_packets*c_vis_packet_size ) PORT MAP ( eth_serial_in => eth_sgout, @@ -706,7 +851,7 @@ BEGIN VARIABLE v_rd_vis_im : INTEGER; VARIABLE v_rd_timestamp : STD_LOGIC_VECTOR(63 DOWNTO 0); VARIABLE v_exp_beamlet : NATURAL; -- interleaved beamlet index - VARIABLE v_exp_vis_channel : NATURAL; -- channel index in visibility offload packet + VARIABLE v_exp_channel : NATURAL; -- channel index in visibility offload packet VARIABLE v_exp_vis_re : INTEGER; VARIABLE v_exp_vis_im : INTEGER; VARIABLE v_exp_timestamp : STD_LOGIC_VECTOR(63 DOWNTO 0); @@ -755,8 +900,8 @@ BEGIN v_exp_beamlet := v_exp_beamlet + vB * Q_interleave; -- offset serial beamlets FOR vC IN 0 TO c_nof_channels-1 LOOP v_addr := ((vB * c_nof_channels) + vC) * c_vis_packet_size; - v_exp_vis_channel := flip(vC, c_nof_channels_w); - v_exp_vis_channel := fft_shift(v_exp_vis_channel, c_nof_channels_w); + v_exp_channel := flip(vC, c_nof_channels_w); + v_exp_channel := fft_shift(v_exp_channel, c_nof_channels_w); ----------------------------------------------------------------------- -- Verify header fields -- 0 gap(16) + ETH dst mac hi(16) @@ -807,8 +952,8 @@ BEGIN proc_mem_mm_bus_rd(v_addr + 12, mm_clk, ram_diag_data_buffer_output_miso, ram_diag_data_buffer_output_mosi); proc_mem_mm_bus_rd_latency(c_mem_reg_rd_latency, mm_clk); v_rd_channel := TO_UINT(ram_diag_data_buffer_output_miso.rddata(31 DOWNTO 16)); - ASSERT v_rd_beamlet = v_exp_beamlet AND v_rd_channel = v_exp_vis_channel REPORT "Wrong beamlet, channel index in packet header: " & - "(" & int_to_str(v_exp_beamlet) & ", " & int_to_str(v_exp_vis_channel) & ") /= " & + ASSERT v_rd_beamlet = v_exp_beamlet AND v_rd_channel = v_exp_channel REPORT "Wrong beamlet, channel index in packet header: " & + "(" & int_to_str(v_exp_beamlet) & ", " & int_to_str(v_exp_channel) & ") /= " & "(" & int_to_str(v_rd_beamlet) & ", " & int_to_str(v_rd_channel) & ")" SEVERITY ERROR; -- . timestamp (= bsn) @@ -866,10 +1011,10 @@ BEGIN END IF; -- . verify ASSERT v_rd_vis_re = v_exp_vis_re REPORT "Wrong real visibility for beamlet, channel index in packet payload " & - "(" & int_to_str(v_exp_beamlet) & ", " & int_to_str(v_exp_vis_channel) & ") : " & + "(" & int_to_str(v_exp_beamlet) & ", " & int_to_str(v_exp_channel) & ") : " & int_to_str(v_rd_vis_re) & " /= " & int_to_str(v_exp_vis_re) SEVERITY ERROR; ASSERT v_rd_vis_im = v_exp_vis_im REPORT "Wrong imag visibility for beamlet, channel index in packet payload " & - "(" & int_to_str(v_exp_beamlet) & ", " & int_to_str(v_exp_vis_channel) & ") : " & + "(" & int_to_str(v_exp_beamlet) & ", " & int_to_str(v_exp_channel) & ") : " & int_to_str(v_rd_vis_im) & " /= " & int_to_str(v_exp_vis_im) SEVERITY ERROR; END LOOP; END LOOP;