From 41fdac20241f940049ab29e3b7dde2952fc30b83 Mon Sep 17 00:00:00 2001
From: Eric Kooistra <kooistra@astron.nl>
Date: Wed, 28 Feb 2024 16:12:38 +0100
Subject: [PATCH] First working version of tb_sdp_beamformer_remote_ring.vhd.

---
 applications/lofar2/libraries/sdp/hdllib.cfg  |   2 +
 .../tb/vhdl/tb_sdp_beamformer_remote_ring.vhd | 254 ++++++++++++++----
 2 files changed, 206 insertions(+), 50 deletions(-)

diff --git a/applications/lofar2/libraries/sdp/hdllib.cfg b/applications/lofar2/libraries/sdp/hdllib.cfg
index 6e7ad9ac34..7ad34980c3 100644
--- a/applications/lofar2/libraries/sdp/hdllib.cfg
+++ b/applications/lofar2/libraries/sdp/hdllib.cfg
@@ -37,6 +37,7 @@ test_bench_files =
     tb/vhdl/tb_sdp_crosslets_subband_select.vhd
     tb/vhdl/tb_sdp_beamformer_output.vhd
     tb/vhdl/tb_tb_sdp_beamformer_output.vhd
+    tb/vhdl/tb_sdp_beamformer_remote_ring.vhd
 
 regression_test_vhdl =
     tb/vhdl/tb_sdp_info.vhd
@@ -44,6 +45,7 @@ regression_test_vhdl =
     tb/vhdl/tb_tb_sdp_statistics_offload.vhd
     tb/vhdl/tb_sdp_crosslets_subband_select.vhd
     tb/vhdl/tb_tb_sdp_beamformer_output.vhd
+    tb/vhdl/tb_sdp_beamformer_remote_ring.vhd
 
 [modelsim_project_file]
 
diff --git a/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_beamformer_remote_ring.vhd b/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_beamformer_remote_ring.vhd
index 421f449654..e0142064f9 100644
--- a/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_beamformer_remote_ring.vhd
+++ b/applications/lofar2/libraries/sdp/tb/vhdl/tb_sdp_beamformer_remote_ring.vhd
@@ -33,36 +33,104 @@
 -- > run -a
 -------------------------------------------------------------------------------
 
-library IEEE, common_lib, dp_lib, reorder_lib;
+library IEEE, common_lib, dp_lib, ring_lib, tr_10GbE_lib, tech_pll_lib;
 use IEEE.std_logic_1164.all;
 use common_lib.common_pkg.all;
 use common_lib.common_mem_pkg.all;
 use common_lib.tb_common_pkg.all;
 use common_lib.tb_common_mem_pkg.all;
 use dp_lib.dp_stream_pkg.all;
+use ring_lib.ring_pkg.all;
+use tech_pll_lib.tech_pll_component_pkg.all;
 use work.sdp_pkg.all;
 use work.tb_sdp_pkg.all;
 
 entity tb_sdp_beamformer_remote_ring is
   generic (
-    g_nof_rn             : natural := 16  -- number of nodes in the ring
+    g_nof_rn  : natural := 4  -- number of nodes in the ring
   );
 end tb_sdp_beamformer_remote_ring;
 
 architecture tb of tb_sdp_beamformer_remote_ring is
   constant c_dp_clk_period : time := 5 ns;  -- 200 MHz
   constant c_mm_clk_period : time := 1 ns;  -- fast MM clk to speed up simulation
+  constant c_sa_clk_period : time := tech_pll_clk_644_period;  -- 644MHz
 
-  constant c_last_rn             : natural := g_nof_rn - 1;  -- first ring node has index RN = 0 by definition.
+  -- BF data
+  constant c_block_period              : natural := c_sdp_N_fft;
+  constant c_block_size                : natural := c_sdp_S_sub_bf * c_sdp_N_pol_bf;
+  constant c_gap_size                  : natural := c_block_period - c_block_size;
+  -- choose sync interval somewhat longer than maximum BF ring latency
+  constant c_nof_blocks_per_sync       : natural := largest(10, (g_nof_rn + 1) * 2);
+  constant c_nof_sync                  : natural := 2;
 
-  signal mm_init      : std_logic := '1';
-  signal tb_end       : std_logic := '0';
-  signal dp_clk       : std_logic := '1';
-  signal dp_rst       : std_logic;
-  signal mm_clk       : std_logic := '1';
-  signal mm_rst       : std_logic;
+  -- Ring lane packets
+  constant c_last_rn                   : natural := g_nof_rn - 1;  -- first ring node has index RN = 0 by definition.
+  constant c_use_cable                 : std_logic := '1';  -- '0' ring via PCB traces, '1' ring via QSFP cables
+  constant c_lane_payload_nof_longwords_bf  : natural := (c_block_size * 9) / 16;  -- beamlet block size repacked
+                                              -- from 36b to 64b (9/16 = 36/64), 488 * 2 * 9 / 16 = 549 longwords
+  constant c_lane_packet_nof_longwords_max  : natural := c_lane_payload_nof_longwords_bf + c_ring_dp_hdr_field_size;
+                                              -- = 549 + 3 = 552
+  constant c_fifo_tx_fill_margin       : natural := 10;  -- >= c_fifo_fill_margin = 6 that is used in dp_fifo_fill_eop
+  constant c_fifo_tx_size_ring : natural := true_log_pow2(c_lane_packet_nof_longwords_max + c_fifo_tx_fill_margin);
+                                            -- = 552 + 6 --> 1024
+  constant c_fifo_tx_fill_ring : natural := c_fifo_tx_size_ring - c_fifo_tx_fill_margin;
+                                            -- = maximum fill level, so rely on eop
+  constant c_err_bi                    : natural := 0;
+  constant c_nof_err_counts            : natural := 8;
+  constant c_bsn_at_sync_check_channel : natural := 1;
+  constant c_validate_channel          : boolean := true;
+  constant c_validate_channel_mode     : string  := "=";
+  constant c_sync_timeout              : natural := c_block_period * (c_nof_blocks_per_sync + 1);
 
-  signal rn_index     : natural range 0 to c_sdp_N_pn_max - 1 := 0;
+  signal mm_init                : std_logic := '1';
+  signal tb_end                 : std_logic := '0';
+  signal dp_clk                 : std_logic := '1';
+  signal dp_rst                 : std_logic;
+  signal mm_clk                 : std_logic := '1';
+  signal mm_rst                 : std_logic;
+  signal SA_CLK                 : std_logic := '1';
+  signal tr_ref_clk_312         : std_logic := '0';
+  signal tr_ref_clk_156         : std_logic := '0';
+  signal tr_ref_rst_156         : std_logic := '0';
+
+  signal stimuli_rst            : std_logic;
+  signal stimuli_end            : std_logic;
+
+  signal local_bf_sosi          : t_dp_sosi;
+  signal bf_bs_sosi             : t_dp_sosi;
+  signal from_ri_sosi_arr       : t_dp_sosi_arr(0 to c_last_rn);
+  signal to_ri_sosi_arr         : t_dp_sosi_arr(0 to c_last_rn);
+  signal bf_sum_sosi_arr        : t_dp_sosi_arr(0 to c_last_rn);
+  signal bf_sum_sosi            : t_dp_sosi;
+
+  -- 10GbE ring
+  signal tr_10gbe_ring_rx_sosi_arr    : t_dp_sosi_arr(0 to c_last_rn) := (others => c_dp_sosi_rst);
+  signal tr_10gbe_ring_tx_sosi_arr    : t_dp_sosi_arr(0 to c_last_rn) := (others => c_dp_sosi_rst);
+  signal tr_10gbe_ring_serial_rx_arr  : std_logic_vector(0 to c_last_rn) := (others => '0');
+  signal tr_10gbe_ring_serial_tx_arr  : std_logic_vector(0 to c_last_rn) := (others => '0');
+
+  -- BF ring MM points
+  signal reg_ring_lane_info_bf_copi_arr         : t_mem_copi_arr(0 to c_last_rn) := (others => c_mem_copi_rst);
+  signal reg_ring_lane_info_bf_cipo_arr         : t_mem_cipo_arr(0 to c_last_rn) := (others => c_mem_cipo_rst);
+  signal reg_bsn_monitor_v2_ring_rx_bf_copi_arr : t_mem_copi_arr(0 to c_last_rn) := (others => c_mem_copi_rst);
+  signal reg_bsn_monitor_v2_ring_rx_bf_cipo_arr : t_mem_cipo_arr(0 to c_last_rn) := (others => c_mem_cipo_rst);
+  signal reg_bsn_monitor_v2_ring_tx_bf_copi_arr : t_mem_copi_arr(0 to c_last_rn) := (others => c_mem_copi_rst);
+  signal reg_bsn_monitor_v2_ring_tx_bf_cipo_arr : t_mem_cipo_arr(0 to c_last_rn) := (others => c_mem_cipo_rst);
+  signal reg_dp_block_validate_err_bf_copi_arr  : t_mem_copi_arr(0 to c_last_rn) := (others => c_mem_copi_rst);
+  signal reg_dp_block_validate_err_bf_cipo_arr  : t_mem_cipo_arr(0 to c_last_rn) := (others => c_mem_cipo_rst);
+  signal reg_dp_block_validate_bsn_at_sync_bf_copi_arr : t_mem_copi_arr(0 to c_last_rn) :=
+                                                         (others => c_mem_copi_rst);
+  signal reg_dp_block_validate_bsn_at_sync_bf_cipo_arr : t_mem_cipo_arr(0 to c_last_rn) :=
+                                                         (others => c_mem_cipo_rst);
+
+  -- BSN aligner MM points
+  signal reg_bsn_align_v2_bf_copi_arr            : t_mem_copi_arr(0 to c_last_rn) := (others => c_mem_copi_rst);
+  signal reg_bsn_align_v2_bf_cipo_arr            : t_mem_cipo_arr(0 to c_last_rn) := (others => c_mem_cipo_rst);
+  signal reg_bsn_monitor_v2_rx_align_bf_copi_arr : t_mem_copi_arr(0 to c_last_rn) := (others => c_mem_copi_rst);
+  signal reg_bsn_monitor_v2_rx_align_bf_cipo_arr : t_mem_cipo_arr(0 to c_last_rn) := (others => c_mem_cipo_rst);
+  signal reg_bsn_monitor_v2_aligned_bf_copi_arr  : t_mem_copi_arr(0 to c_last_rn) := (others => c_mem_copi_rst);
+  signal reg_bsn_monitor_v2_aligned_bf_cipo_arr  : t_mem_cipo_arr(0 to c_last_rn) := (others => c_mem_cipo_rst);
 
 begin
   dp_rst <= '1', '0' after c_dp_clk_period * 7;
@@ -71,29 +139,107 @@ begin
   mm_rst <= '1', '0' after c_mm_clk_period * 7;
   mm_clk <= (not mm_clk) or tb_end after c_mm_clk_period / 2;
 
+  -- Wait for tr_10GbE to be active
+  stimuli_rst <= '1', '0' after 15 us;
+
+  SA_CLK <= not SA_CLK after c_sa_clk_period / 2;  -- Serial Gigabit IO sa clock (644 MHz)
+
+  -- Generate local BF stream, use same for all nodes
+  u_stimuli : entity dp_lib.dp_stream_stimuli
+  generic map (
+    g_sync_period => c_nof_blocks_per_sync,
+    g_nof_repeat  => c_nof_blocks_per_sync * c_nof_sync,
+    g_pkt_len     => c_block_size,
+    g_pkt_gap     => c_gap_size
+  )
+  port map (
+    rst               => stimuli_rst,
+    clk               => dp_clk,
+    -- Generate stimuli
+    src_out           => local_bf_sosi,
+    -- End of stimuli
+    tb_end            => stimuli_end
+  );
+
+  bf_bs_sosi <= local_bf_sosi;
+  bf_sum_sosi <= bf_sum_sosi_arr(c_last_rn);
+
   p_mm : process
     variable v_offset : natural;
   begin
     proc_common_wait_until_low(dp_clk, mm_rst);
     proc_common_wait_some_cycles(mm_clk, 10);
 
-    proc_common_wait_cross_clock_domain_latency(c_mm_clk_period, c_dp_clk_period, c_common_cross_clock_domain_latency * 2);
+    proc_common_wait_cross_clock_domain_latency(c_mm_clk_period, c_dp_clk_period,
+                                                c_common_cross_clock_domain_latency * 2);
     mm_init <= '0';
+
+    -- Wait to stop simulation
+    proc_common_wait_until_high(dp_clk, stimuli_end);
+    proc_common_wait_some_cycles(dp_clk, 1000);
+    tb_end <= '1';
     wait;
   end process;
 
+  proc_common_stop_simulation(tb_end);  -- OK: end simulation
 
   ------------------------------------------------------------------------------
   -- DUT
   ------------------------------------------------------------------------------
   gen_dut : for RN in 0 to c_last_rn generate
-    -- Ring connections between nodes 0:c_last_rn,0
+    -- Connect ring wires between the nodes
+    wire_ring : if RN > 0 generate
+      tr_10gbe_ring_serial_rx_arr(RN) <= tr_10gbe_ring_serial_tx_arr(RN - 1);
+    end generate;
+    close_ring : if RN = 0 generate
+      tr_10gbe_ring_serial_rx_arr(0) <= tr_10gbe_ring_serial_tx_arr(c_last_rn);
+    end generate;
+
+    -- tr_10GbE access at each node, all via front_io QSFP[0]
+    u_tr_10GbE_ring: entity tr_10GbE_lib.tr_10GbE
+    generic map (
+      g_sim           => true,
+      g_sim_level     => 1,
+      g_nof_macs      => 1,
+      g_direction     => "TX_RX",
+      g_tx_fifo_fill  => c_fifo_tx_fill_ring,
+      g_tx_fifo_size  => c_fifo_tx_size_ring
+    )
+    port map (
+      -- Transceiver PLL reference clock
+      tr_ref_clk_644        => SA_CLK,
+      tr_ref_clk_312        => tr_ref_clk_312,
+      tr_ref_clk_156        => tr_ref_clk_156,
+      tr_ref_rst_156        => tr_ref_rst_156,
+
+      -- MM interface
+      mm_rst                => mm_rst,
+      mm_clk                => mm_clk,
+
+      reg_mac_mosi          => c_mem_copi_rst,
+      reg_mac_miso          => open,
+      reg_eth10g_mosi       => c_mem_copi_rst,
+      reg_eth10g_miso       => open,
+
+      -- DP interface
+      dp_rst                => dp_rst,
+      dp_clk                => dp_clk,
+
+      src_out_arr           => tr_10gbe_ring_rx_sosi_arr(RN to RN),
+      snk_in_arr            => tr_10gbe_ring_tx_sosi_arr(RN to RN),
+
+      -- Serial IO
+      serial_tx_arr         => tr_10gbe_ring_serial_tx_arr(RN to RN),
+      serial_rx_arr         => tr_10gbe_ring_serial_rx_arr(RN to RN)
+    );
+
+    -- Ring lane access at each node
     u_ring_lane_bf : entity ring_lib.ring_lane
       generic map (
         g_lane_direction            => 1,  -- transport in positive RN direction.
         g_lane_data_w               => c_longword_w,
         g_lane_packet_length        => c_lane_payload_nof_longwords_bf,
-        g_lane_total_nof_packets_w  => c_lane_total_nof_packets_w,
+        g_lane_total_nof_packets_w  => 32,
         g_use_dp_layer              => true,
         g_nof_rx_monitors           => 1,
         g_nof_tx_monitors           => 1,
@@ -110,29 +256,29 @@ begin
         dp_clk => dp_clk,
         dp_rst => dp_rst,
 
-        from_lane_sosi     => bf_from_ri_sosi_arr(beamset_id),
-        to_lane_sosi       => bf_to_ri_sosi_arr(beamset_id),
-        lane_rx_cable_sosi => lane_rx_cable_sosi_arr(1 + beamset_id),
-        lane_rx_board_sosi => lane_rx_board_sosi_arr(1 + beamset_id),
-        lane_tx_cable_sosi => lane_tx_cable_sosi_arr(1 + beamset_id),
-        lane_tx_board_sosi => lane_tx_board_sosi_arr(1 + beamset_id),
+        from_lane_sosi     => from_ri_sosi_arr(RN),
+        to_lane_sosi       => to_ri_sosi_arr(RN),
+        lane_rx_cable_sosi => tr_10gbe_ring_rx_sosi_arr(RN),
+        lane_rx_board_sosi => c_dp_sosi_rst,
+        lane_tx_cable_sosi => tr_10gbe_ring_tx_sosi_arr(RN),
+        lane_tx_board_sosi => open,
         bs_sosi            => bf_bs_sosi,  -- used for bsn and sync
 
-        reg_ring_lane_info_copi                => reg_ring_lane_info_bf_copi_arr(beamset_id),
-        reg_ring_lane_info_cipo                => reg_ring_lane_info_bf_cipo_arr(beamset_id),
-        reg_bsn_monitor_v2_ring_rx_copi        => reg_bsn_monitor_v2_ring_rx_bf_copi_arr(beamset_id),
-        reg_bsn_monitor_v2_ring_rx_cipo        => reg_bsn_monitor_v2_ring_rx_bf_cipo_arr(beamset_id),
-        reg_bsn_monitor_v2_ring_tx_copi        => reg_bsn_monitor_v2_ring_tx_bf_copi_arr(beamset_id),
-        reg_bsn_monitor_v2_ring_tx_cipo        => reg_bsn_monitor_v2_ring_tx_bf_cipo_arr(beamset_id),
-        reg_dp_block_validate_err_copi         => reg_dp_block_validate_err_bf_copi_arr(beamset_id),
-        reg_dp_block_validate_err_cipo         => reg_dp_block_validate_err_bf_cipo_arr(beamset_id),
-        reg_dp_block_validate_bsn_at_sync_copi => reg_dp_block_validate_bsn_at_sync_bf_copi_arr(beamset_id),
-        reg_dp_block_validate_bsn_at_sync_cipo => reg_dp_block_validate_bsn_at_sync_bf_cipo_arr(beamset_id),
-
-        this_rn   => this_rn,
-        N_rn      => ring_info.N_rn,
-        rx_select => ring_info.use_cable_to_previous_rn,
-        tx_select => ring_info.use_cable_to_next_rn
+        reg_ring_lane_info_copi                => reg_ring_lane_info_bf_copi_arr(RN),
+        reg_ring_lane_info_cipo                => reg_ring_lane_info_bf_cipo_arr(RN),
+        reg_bsn_monitor_v2_ring_rx_copi        => reg_bsn_monitor_v2_ring_rx_bf_copi_arr(RN),
+        reg_bsn_monitor_v2_ring_rx_cipo        => reg_bsn_monitor_v2_ring_rx_bf_cipo_arr(RN),
+        reg_bsn_monitor_v2_ring_tx_copi        => reg_bsn_monitor_v2_ring_tx_bf_copi_arr(RN),
+        reg_bsn_monitor_v2_ring_tx_cipo        => reg_bsn_monitor_v2_ring_tx_bf_cipo_arr(RN),
+        reg_dp_block_validate_err_copi         => reg_dp_block_validate_err_bf_copi_arr(RN),
+        reg_dp_block_validate_err_cipo         => reg_dp_block_validate_err_bf_cipo_arr(RN),
+        reg_dp_block_validate_bsn_at_sync_copi => reg_dp_block_validate_bsn_at_sync_bf_copi_arr(RN),
+        reg_dp_block_validate_bsn_at_sync_cipo => reg_dp_block_validate_bsn_at_sync_bf_cipo_arr(RN),
+
+        this_rn   => to_uvec(RN, c_byte_w),
+        N_rn      => to_uvec(g_nof_rn, c_byte_w),
+        rx_select => c_use_cable,
+        tx_select => c_use_cable
       );
 
     -- Intermediate BF alignment and summation at each node
@@ -141,25 +287,33 @@ begin
         dp_clk        => dp_clk,
         dp_rst        => dp_rst,
 
-        rn_index      => rn_index,
-
-        local_bf_sosi : in  t_dp_sosi;
-        from_ri_sosi  : in  t_dp_sosi;
-        to_ri_sosi    : out t_dp_sosi;
-        bf_sum_sosi   : out t_dp_sosi;
+        rn_index      => RN,
 
-        mm_rst        : in  std_logic;
-        mm_clk        : in  std_logic;
+        local_bf_sosi => local_bf_sosi,  -- all nodes use same local reference data
+        from_ri_sosi  => from_ri_sosi_arr(RN),
+        to_ri_sosi    => to_ri_sosi_arr(RN),
+        bf_sum_sosi   => bf_sum_sosi_arr(RN),
 
-        reg_bsn_align_copi : in  t_mem_copi := c_mem_copi_rst;
-        reg_bsn_align_cipo : out t_mem_cipo;
+        mm_rst        => mm_rst,
+        mm_clk        => mm_clk,
 
-        reg_bsn_monitor_v2_bsn_align_input_copi  : in  t_mem_copi := c_mem_copi_rst;
-        reg_bsn_monitor_v2_bsn_align_input_cipo  : out t_mem_cipo;
-
-        reg_bsn_monitor_v2_bsn_align_output_copi : in  t_mem_copi := c_mem_copi_rst;
-        reg_bsn_monitor_v2_bsn_align_output_cipo : out t_mem_cipo
+        reg_bsn_align_copi                       => reg_bsn_align_v2_bf_copi_arr(RN),
+        reg_bsn_align_cipo                       => reg_bsn_align_v2_bf_cipo_arr(RN),
+        reg_bsn_monitor_v2_bsn_align_input_copi  => reg_bsn_monitor_v2_rx_align_bf_copi_arr(RN),
+        reg_bsn_monitor_v2_bsn_align_input_cipo  => reg_bsn_monitor_v2_rx_align_bf_cipo_arr(RN),
+        reg_bsn_monitor_v2_bsn_align_output_copi => reg_bsn_monitor_v2_aligned_bf_copi_arr(RN),
+        reg_bsn_monitor_v2_bsn_align_output_cipo => reg_bsn_monitor_v2_aligned_bf_cipo_arr(RN)
       );
-end generate;  -- gen_dut
+  end generate;  -- gen_dut
 
+  -- 10GbE clocks
+  u_tech_pll_xgmii_mac_clocks : entity tech_pll_lib.tech_pll_xgmii_mac_clocks
+  port map (
+    refclk_644 => SA_CLK,
+    rst_in     => mm_rst,
+    clk_156    => tr_ref_clk_156,
+    clk_312    => tr_ref_clk_312,
+    rst_156    => tr_ref_rst_156,
+    rst_312    => open
+  );
 end tb;
-- 
GitLab