diff --git a/boards/uniboard1/libraries/unb1_board/src/vhdl/node_unb1_fn_terminal_db.vhd b/boards/uniboard1/libraries/unb1_board/src/vhdl/node_unb1_fn_terminal_db.vhd new file mode 100644 index 0000000000000000000000000000000000000000..64d7c1cf82acb8b490e2f81a06a7bfa0a650639b --- /dev/null +++ b/boards/uniboard1/libraries/unb1_board/src/vhdl/node_unb1_fn_terminal_db.vhd @@ -0,0 +1,438 @@ +------------------------------------------------------------------------------- +-- +-- Copyright (C) 2012 +-- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/> +-- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands +-- +-- This program is free software: you can redistribute it and/or modify +-- it under the terms of the GNU General Public License as published by +-- the Free Software Foundation, either version 3 of the License, or +-- (at your option) any later version. +-- +-- This program is distributed in the hope that it will be useful, +-- but WITHOUT ANY WARRANTY; without even the implied warranty of +-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +-- GNU General Public License for more details. +-- +-- You should have received a copy of the GNU General Public License +-- along with this program. If not, see <http://www.gnu.org/licenses/>. +-- +------------------------------------------------------------------------------- + +-- Purpose: User Rx interface for the UniBoard mesh +-- +-- Description: +-- . The node_fn_terminal_db can receive up to 4*4 user steams via 4*3 phy +-- lanes on the UniBoard mesh. The phy streams are DP/UTH encoded and +-- get decoded and output via dp_out_sosi_arr. +-- . If g_mesh_use_tx=TRUE then the node_fn_terminal_db can also transmit test +-- data for bidirectional transceiver diagnostic purposes. The IO phy is +-- tr_nonbonded. +-- . If g_use_bsn_align=TRUE then the dp_out_sosi_arr user streams are also +-- BSN aligned. +-- . If g_use_data_buf=TRUE then at every sync the dp_out_sosi_arr blocks get +-- captured into a diag_databuffer for monitoring via MM. +-- . If g_mesh_mon_select/=0 then also internal signals in the +-- unb_terminals_mesh can be monitored via a diag_databuffer. +-- +-- Remarks: +-- +-- Explanation of impact of dp_distribute on c_rx_output_fifo_size: +-- The terminals transport c_usr_nof_streams_per_bus=4 blocks onto +-- g_mesh_nof_serial=3 lanes. This has an impact on c_rx_output_fifo_size. +-- The dp_distribute from 4 --> 3 causes a transpose such that the 4 usr +-- streams for t=0 are being transmitted via lane 0. While this happens +-- one g_usr_block_per later the 4 usr blocks for t=1 will be transmitted +-- via lane 1, and again one g_usr_block_per later the 4 usr blocks for +-- t=2 will be transmitted via lane 2: +-- +-- 3*g_usr_block_per 3*g_usr_block_per 3*g_usr_block_per +-- <-----------------><-----------------><-----------------> +-- lane 0: Block_0_1_2_3(t0) Block_0_1_2_3(t3) Block_0_1_2_3(t6) +-- lane 1: Block_0_1_2_3(t1) Block_0_1_2_3(t4) ... +-- lane 2: Block_0_1_2_3(t2) Block_0_1_2_3(t5) ... +-- +-- In fact the transport of the 4 usr streams via one lane takes 3 time +-- blocks (= 3*g_usr_block_per). The BSN aligner needs to align the 4 +-- usr streams until they are all present at the inputs of the BSN aligner +-- the FIFO must be able to buffer c_burst_usr_nof_block = 3 blocks, +-- because there are g_mesh_nof_serial-1 = +2 other lanes that already +-- send blocks for t1 and t2 during t0 on lane 0 and +1 because it is a +-- fill FIFO that stores at least 1 complete frame. +-- +-- Explanation of impact of dp_distribute on BSN align latency: +-- In a subrack simulation with g_unb_sys=4,1,4 it follows that the BSN +-- difference can be as much as 4 for UNB1/FN0. The time difference +-- between BSN 0 for these two streams is then 5195 ns = 4.06 +-- g_usr_block_per. For a single UniBoard simulation with g_unb_sys=1,1,4 +-- it follows that the BSN difference becomes 2 and almost 3. The time +-- difference between these two streams is than 2495 ns = 1.95 +-- g_usr_block_per. To support c_bsn_latency for the dp_bsn_align the +-- c_burst_bsn_latency therefore needs to be increased. For both subrack +-- and single Uniboard (as set by g_multi_unb) choose the same BSN latency +-- adjustment of +c_burst_usr_nof_block. For the subrack sop timeout needs +-- to be even > 5 g_usr_block_per, because otherwise the BSN difference +-- of 4 still gets missed. Therefore use c_bsn_latency = 2, so then +-- c_burst_bsn_latency = 2 + 3 = 5. Note that for the BSN comparison logic +-- the preferred values for g_bsn_latency are 3, 7, 15 because these +-- require the least logic. Similar for the c_rx_output_fifo_size +-- g_bsn_latency = 3 and 7 would result in using 1 RAMB or 2 RAMB in +-- case that g_usr_block_len=96. Therefore choose c_bsn_latency = 3 to +-- have some extra margin without logic or RAMB cost. The rules from +-- dp_bsn_align.vhd for how to choose the timeouts and the FIFO size +-- should therefore now be applied with c_burst_bsn_latency. +-- +-- Explanation of impact of dp_distribute on c_phy_rx_fifo_size: +-- The dp_distribute uses one of the dual clock FIFO in tr_nonbonded to +-- fill with less than 1 block, the dual clock FIFOs on the oFirmware/designs/fn_terminal_db/src/vhdl/node_fn_terminal_db.vhdther +-- g_mesh_nof_serial-1 lanes only fill with 1 word. This less than one +-- block was observed in simulation. Assume that indeed c_phy_rx_fifo_len +-- = 1 block is sufficient to avoid using more RAM than needed. +-- +-- Explanation of impact of c_rx_timeout_w on c_phy_rx_fifo_size: +-- The uth_rx timeout ensures that the rx FIFO in tr_nonbonded does not +-- overflow if the reception should suddenly stop during a frame. +-- If the c_rx_timeout_w is set +1 larger then the rx FIFO will overflow +-- as was seen in simulation. Temporarily setting the severity in +-- proc_common_fifo_asserts() to ERROR instead of FAILURE shows that the +-- terminals will recover from this overflow. The reason is that the +-- overflow will not corrupt the FIFO control, the uth_rx will ensure +-- that only complete packets get passed on and the BSN aligner will +-- after some flushing then recover the Rx alignment. +-- The uth_rx timeout restarts on every valid. In simulation one of the +-- 4 uth_rx reaches a timeout_cnt of 71 so close to 1 g_usr_block_per. +-- The other three reach at most 2 words. Typically the gap should be +-- < g_usr_block_len, because that is the maximum time that the BSN +-- aligner holds the sop. Hence using ceil_log2(g_usr_block_per) +-- yields a sufficient upper value. It is not necessary to add +1. It +-- is also not good to use less, because there could occur quite some +-- data not valid gap time between the sop and the next valid, due to +-- the BSN aligner. +-- +-- Some more remarks: +-- . Use ceil_value to get the maximum FIFO size for the requested size. +-- . In fact c_burst_usr_nof_block depends on whether the terminals use +-- dp_distribute, because dp_distribute causes a transpose that results +-- in a burst. +-- . The c_bsn_latency also depends on whether there is a backplane or +-- not so on g_multi_unb. Therefore alternatively c_bsn_latency = +-- sel_a_b(g_multi_unb, 3, 0) could be used. +-- . Alternatively c_bsn_latency could be made available as generic and +-- set directly by the user. The c_burst_usr_nof_block can then be +-- removed to let all settings directly depend on g_bsn_latency. The +-- advantage is that it + + +LIBRARY IEEE, common_lib, dp_lib, unb1_board_lib, diag_lib; +USE IEEE.STD_LOGIC_1164.ALL; +USE IEEE.NUMERIC_STD.ALL; +USE common_lib.common_pkg.ALL; +USE common_lib.common_mem_pkg.ALL; +USE dp_lib.dp_stream_pkg.ALL; +USE unb1_board_lib.unb1_board_pkg.ALL; +USE unb1_board_lib.unb1_board_peripherals_pkg.ALL; + + +ENTITY node_unb1_fn_terminal_db IS + GENERIC( + g_sim : BOOLEAN := FALSE; + g_sim_level : NATURAL := 0; -- 0 = simulate GX IP, 1 = use fast serial behavioural model + g_use_bsn_align : BOOLEAN := TRUE; -- default TRUE, support FALSE for packet flow debugging purposes (faster synthesis) + g_use_data_buf : BOOLEAN := TRUE; + -- Application Interface + g_usr_nof_streams : NATURAL := 16; + g_usr_data_w : NATURAL := 32; + g_usr_block_len : NATURAL := 96; + g_usr_block_per : NATURAL := 256; -- block period = block length + gap length + -- Terminals Interface + g_multi_unb : BOOLEAN := FALSE; + g_use_mesh : BOOLEAN := TRUE; + g_mesh_nof_serial : NATURAL := 3; -- default only use the 3 full featured transceivers in the mesh (out of maximal 4), using only 2 is not enough + g_mesh_gx_mbps : NATURAL := 5000; + g_mesh_ena_reorder : BOOLEAN := TRUE; + g_mesh_use_tx : BOOLEAN := TRUE; -- can be FALSE for BF, use TRUE to support for bidirectional TR diagnostics + g_mesh_mon_select : NATURAL := 0; -- 0 = no data monitor buffers via MM, else see unb_terminals_mesh.vhd + g_mesh_mon_nof_words : NATURAL := 1024; + g_mesh_mon_use_sync : BOOLEAN := TRUE; + -- Auxiliary Interface + g_aux : t_c_unb1_board_aux := c_unb1_board_aux + ); + PORT( + -- System + chip_id : IN STD_LOGIC_VECTOR(g_aux.chip_id_w-1 DOWNTO 0); -- [2:0] + + mm_rst : IN STD_LOGIC; + mm_clk : IN STD_LOGIC; -- 50 MHz from xo_clk PLL in SOPC system + dp_rst : IN STD_LOGIC; + dp_clk : IN STD_LOGIC; -- 200 MHz from CLK system clock + dp_pps : IN STD_LOGIC := '0'; + tr_mesh_clk : IN STD_LOGIC; -- 156.25 MHz from SB_CLK transceiver clock + cal_clk : IN STD_LOGIC; -- 40 MHz from xo_clk PLL in SOPC system + + -- MM interface + -- . diag_data_buffer + ram_diag_data_buf_mosi : IN t_mem_mosi := c_mem_mosi_rst; + ram_diag_data_buf_miso : OUT t_mem_miso; + reg_diag_data_buf_mosi : IN t_mem_mosi := c_mem_mosi_rst; + reg_diag_data_buf_miso : OUT t_mem_miso; + -- . tr_nonbonded + reg_tr_nonbonded_mosi : IN t_mem_mosi := c_mem_mosi_rst; + reg_tr_nonbonded_miso : OUT t_mem_miso; + reg_diagnostics_mosi : IN t_mem_mosi := c_mem_mosi_rst; + reg_diagnostics_miso : OUT t_mem_miso; + -- . diag_data_buffer_mesh + ram_mesh_diag_data_buf_mosi : IN t_mem_mosi := c_mem_mosi_rst; + ram_mesh_diag_data_buf_miso : OUT t_mem_miso; + -- . bsn_monitor + reg_bsn_monitor_mosi : IN t_mem_mosi := c_mem_mosi_rst; + reg_bsn_monitor_miso : OUT t_mem_miso; + + -- Node stream out + dp_out_sosi_arr : OUT t_dp_sosi_arr(g_usr_nof_streams-1 DOWNTO 0); + dp_out_siso_arr : IN t_dp_siso_arr(g_usr_nof_streams-1 DOWNTO 0) := (OTHERS => c_dp_siso_rdy); + + -- Mesh serial interface (tr_nonbonded) + tx_serial_2arr : OUT t_unb1_board_mesh_sl_2arr; -- Tx + rx_serial_2arr : IN t_unb1_board_mesh_sl_2arr:= (OTHERS=>(OTHERS=>'0')) -- Rx support for diagnostics + ); +END node_unb1_fn_terminal_db; + +ARCHITECTURE str OF node_unb1_fn_terminal_db IS + + ----------------------------------------------------------------------------- + -- BSN align + ----------------------------------------------------------------------------- + CONSTANT c_bsn_request_pipeline : NATURAL := 2; -- need to use 2 to ease timing closure at 200 MHz + CONSTANT c_bsn_latency : NATURAL := 3; + CONSTANT c_burst_usr_nof_block : NATURAL := g_mesh_nof_serial; -- 3 phy lane blocks t012 per user stream + CONSTANT c_burst_bsn_latency : NATURAL := c_bsn_latency + c_burst_usr_nof_block; + + ----------------------------------------------------------------------------- + -- Terminals + ----------------------------------------------------------------------------- + + CONSTANT c_usr_nof_streams_per_bus : NATURAL := g_usr_nof_streams/c_unb1_board_nof_bn; -- 16 / 4 = 4 + + -- . tr_nonbonded rx fifo + CONSTANT c_burst_phy_nof_block : NATURAL := c_usr_nof_streams_per_bus; -- burst of 4 usr blocks per t0 per phy lane + CONSTANT c_phy_block_len : NATURAL := g_usr_block_len; -- no data packing so phy frame is about as long as the user frame (expect for few overhead words) + CONSTANT c_phy_rx_fifo_len : NATURAL := c_phy_block_len * 1; -- assume only need to store less than one block in practise + CONSTANT c_phy_rx_fifo_size : NATURAL := ceil_value(c_phy_rx_fifo_len, c_bram_m9k_fifo_depth); -- use tr_nonbonded Rx dual clock FIFO to buffer for dp_distribute + + -- . uth_rx timeout_cnt + CONSTANT c_rx_timeout_w : NATURAL := 0;--ceil_log2(g_usr_block_per); + + -- . uth_terminal_rx output fifo + CONSTANT c_rx_output_fifo_len : NATURAL := (c_burst_bsn_latency + 2) * g_usr_block_len; + CONSTANT c_rx_output_fifo_size : NATURAL := ceil_value(c_rx_output_fifo_len, c_bram_m9k_fifo_depth); + CONSTANT c_rx_output_fifo_fill : NATURAL := g_usr_block_len; -- keep a full block in the FIFO to ensure that dp_bsn_align gets input without data not valid gaps when the lane rate is slower (as with tr_nonbonded) + + -- . BSN aligner timeouts + CONSTANT c_sop_timeout : NATURAL := (c_burst_bsn_latency + 1) * g_usr_block_per; -- wait for sop for some block periods + CONSTANT c_xoff_timeout : NATURAL := c_burst_bsn_latency * 2 * g_usr_block_per; + + SIGNAL rx_usr_siso_2arr : t_unb1_board_mesh_siso_2arr; + SIGNAL rx_usr_sosi_2arr : t_unb1_board_mesh_sosi_2arr; + + SIGNAL rx_rew_siso_2arr : t_unb1_board_mesh_siso_2arr; + SIGNAL rx_rew_sosi_2arr : t_unb1_board_mesh_sosi_2arr; + + SIGNAL rx_usr_siso_arr : t_dp_siso_arr(g_usr_nof_streams-1 DOWNTO 0); + SIGNAL rx_usr_sosi_arr : t_dp_sosi_arr(g_usr_nof_streams-1 DOWNTO 0); + + ----------------------------------------------------------------------------- + -- Data buffer + ----------------------------------------------------------------------------- + SIGNAL db_in_sosi_arr : t_dp_sosi_arr(g_usr_nof_streams-1 DOWNTO 0); + +BEGIN + + gen_mesh: IF g_use_mesh = TRUE GENERATE + ----------------------------------------------------------------------------- + -- Terminals + ----------------------------------------------------------------------------- + + u_terminals_mesh : ENTITY unb1_board_lib.unb1_board_terminals_mesh + GENERIC MAP ( + g_sim => g_sim, + g_sim_level => g_sim_level, + -- System + g_node_type => e_fn, + g_nof_bus => c_unb1_board_nof_bn, -- 4 to 4 nodes in mesh + -- User + g_usr_use_complex => TRUE, + g_usr_data_w => g_usr_data_w, + g_usr_frame_len => g_usr_block_len, + g_usr_nof_streams => c_usr_nof_streams_per_bus, + -- Phy + g_phy_nof_serial => g_mesh_nof_serial, + g_phy_gx_mbps => g_mesh_gx_mbps, + g_phy_rx_fifo_size => c_phy_rx_fifo_size, + g_phy_ena_reorder => g_mesh_ena_reorder, + -- Tx + g_use_tx => g_mesh_use_tx, -- optionally do support diag Tx + g_tx_input_use_fifo => FALSE, -- no user Tx + -- Rx + g_use_rx => TRUE, -- user Rx must be TRUE for DB in FN, + g_rx_output_use_fifo => TRUE, -- Rx output provides FIFOs to ensure that dp_distribute does not get blocked due to substantial backpressure on another output + g_rx_output_fifo_size => c_rx_output_fifo_size, + g_rx_output_fifo_fill => c_rx_output_fifo_fill, + g_rx_timeout_w => c_rx_timeout_w, + -- Monitoring + g_mon_select => g_mesh_mon_select, + g_mon_nof_words => g_mesh_mon_nof_words, + g_mon_use_sync => g_mesh_mon_use_sync + ) + PORT MAP ( + chip_id => chip_id, + + mm_rst => mm_rst, + mm_clk => mm_clk, + dp_rst => dp_rst, + dp_clk => dp_clk, + dp_sync => dp_pps, + tr_clk => tr_mesh_clk, + cal_clk => cal_clk, + + -- User interface (4 nodes)(4 input streams) + rx_usr_siso_2arr => rx_usr_siso_2arr, + rx_usr_sosi_2arr => rx_usr_sosi_2arr, -- Rx (user Tx from FN to BN is unused) + + -- Mesh interface level (4 nodes)(4 lanes) + -- . Serial (tr_nonbonded) + tx_serial_2arr => tx_serial_2arr, -- Tx + rx_serial_2arr => rx_serial_2arr, -- Rx + + -- MM Control + -- . tr_nonbonded + reg_tr_nonbonded_mosi => reg_tr_nonbonded_mosi, + reg_tr_nonbonded_miso => reg_tr_nonbonded_miso, + reg_diagnostics_mosi => reg_diagnostics_mosi, + reg_diagnostics_miso => reg_diagnostics_miso, + + -- . diag_data_buffer + ram_diag_data_buf_mosi => ram_mesh_diag_data_buf_mosi, + ram_diag_data_buf_miso => ram_mesh_diag_data_buf_miso + ); + + --------------------------------------------------------------------------------------- + -- Forward the received streams, rewire for single or multi UniBoard use + --------------------------------------------------------------------------------------- + gen_single_unb: IF g_multi_unb = FALSE GENERATE + rx_usr_siso_2arr <= rx_rew_siso_2arr; + rx_rew_sosi_2arr <= rx_usr_sosi_2arr; + END GENERATE; + + gen_multi_unb: IF g_multi_unb = TRUE GENERATE + rx_rew_sosi_2arr <= func_unb1_board_transpose_2arr(rx_usr_sosi_2arr); + rx_usr_siso_2arr <= func_unb1_board_transpose_2arr(rx_rew_siso_2arr); + END GENERATE; + + --------------------------------------------------------------------------------------- + -- From 2d to 1d array + --------------------------------------------------------------------------------------- + gen_i : FOR I IN 0 TO c_unb1_board_nof_bn-1 GENERATE + gen_j : FOR J IN 0 TO c_usr_nof_streams_per_bus-1 GENERATE + rx_rew_siso_2arr(I)(J) <= rx_usr_siso_arr(I*c_usr_nof_streams_per_bus + J); + rx_usr_sosi_arr(I*c_usr_nof_streams_per_bus + J) <= rx_rew_sosi_2arr(I)(J); + END GENERATE; + END GENERATE; + + ----------------------------------------------------------------------------- + -- BSN align + ----------------------------------------------------------------------------- + no_align : IF g_use_bsn_align=FALSE GENERATE + rx_usr_siso_arr <= (OTHERS=>c_dp_siso_rdy); + + reg_bsn_monitor_miso <= c_mem_miso_rst; + END GENERATE; + + gen_align : IF g_use_bsn_align=TRUE GENERATE + u_bsn_align : ENTITY dp_lib.dp_bsn_align + GENERIC MAP ( + g_block_size => g_usr_block_len, + g_nof_input => g_usr_nof_streams, + g_xoff_timeout => c_xoff_timeout, + g_sop_timeout => c_sop_timeout, + g_bsn_latency => c_burst_bsn_latency, + g_bsn_request_pipeline => c_bsn_request_pipeline + ) + PORT MAP ( + rst => dp_rst, + clk => dp_clk, + -- ST sinks + snk_out_arr => rx_usr_siso_arr, + snk_in_arr => rx_usr_sosi_arr, + -- ST source + src_in_arr => dp_out_siso_arr, + src_out_arr => db_in_sosi_arr, + -- MM + in_en_evt => '0', -- pulse '1' indicates that the in_en_arr user input enables have been updated + in_en_arr => (OTHERS=>'1') -- default all user inputs are enabled + ); + + u_bsn_monitor_align : ENTITY dp_lib.mms_dp_bsn_monitor + GENERIC MAP ( + g_nof_streams => 1, -- All streams are synchronous. Only monitor stream(0). + g_cross_clock_domain => TRUE, + g_bsn_w => c_dp_stream_bsn_w, + g_cnt_sop_w => c_word_w, + g_cnt_valid_w => c_word_w, + g_log_first_bsn => TRUE + ) + PORT MAP ( + -- Memory-mapped clock domain + mm_rst => mm_rst, + mm_clk => mm_clk, + reg_mosi => reg_bsn_monitor_mosi, + reg_miso => reg_bsn_monitor_miso, + + -- Streaming clock domain + dp_rst => dp_rst, + dp_clk => dp_clk, + in_siso_arr => (OTHERS=>c_dp_siso_rdy), + in_sosi_arr => db_in_sosi_arr(0 DOWNTO 0) + ); + END GENERATE; + + ----------------------------------------------------------------------------- + -- Data buffer + ----------------------------------------------------------------------------- + no_data_buf : IF g_use_data_buf=FALSE GENERATE + ram_diag_data_buf_miso <= c_mem_miso_rst; + reg_diag_data_buf_miso <= c_mem_miso_rst; + END GENERATE; + + gen_data_buf : IF g_use_data_buf=TRUE GENERATE + u_data_buf : ENTITY diag_lib.mms_diag_data_buffer + GENERIC MAP ( + g_nof_streams => g_usr_nof_streams, + g_data_w => g_usr_data_w, + g_buf_nof_data => 1024, + g_buf_use_sync => TRUE + ) + PORT MAP ( + -- System + mm_rst => mm_rst, + mm_clk => mm_clk, + dp_rst => dp_rst, + dp_clk => dp_clk, + -- MM interface + ram_data_buf_mosi => ram_diag_data_buf_mosi, + ram_data_buf_miso => ram_diag_data_buf_miso, + reg_data_buf_mosi => reg_diag_data_buf_mosi, + reg_data_buf_miso => reg_diag_data_buf_miso, + -- ST interface + in_sync => db_in_sosi_arr(0).sync, + in_sosi_arr => db_in_sosi_arr + ); + END GENERATE; + + ----------------------------------------------------------------------------- + -- Entity DP out + ----------------------------------------------------------------------------- + dp_out_sosi_arr <= db_in_sosi_arr; + + END GENERATE; + +END str;