From bc8c549e3b3a98be2dccd129c07b827dec8f4c81 Mon Sep 17 00:00:00 2001 From: Eric Kooistra <kooistra@astron.nl> Date: Wed, 13 Mar 2024 13:03:03 +0100 Subject: [PATCH] Prepare for g_use_aligner_at_first_node, use true to have no functional change yet. --- .../base/dp/src/vhdl/dp_bsn_align_v2.vhd | 146 +++++++++++++----- .../base/dp/src/vhdl/mmp_dp_bsn_align_v2.vhd | 19 ++- .../base/dp/tb/vhdl/tb_dp_bsn_align_v2.vhd | 16 +- .../dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd | 8 +- 4 files changed, 135 insertions(+), 54 deletions(-) diff --git a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd index eae4c4d60f..4653c6ccba 100644 --- a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd +++ b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd @@ -29,14 +29,72 @@ -- replacement data. The output streams are paced by the block rate of -- input 0. The user has to read the block within the block period. -- +-- The aligner can align g_nof_streams that all arrive within a latency +-- of g_bsn_latency_max after the local stream at index 0. The aligner +-- can also be used in a chain of aligners, whereby each aligner typically +-- has the local input and one remote input and the remote input is the +-- output of an upstream aligner. Then the latency on the last node in +-- the chain will be within g_nof_aligners_max * g_bsn_latency_max. +-- +-- The size of the circular buffer is c_buffer_nof_blocks and depends on the +-- maximum latency. The c_buffer_nof_blocks has to a power of two to ease +-- the control of the circular buffer. The lowest bits of the input block +-- sequence number (BSN) are used as write block index into the circular +-- buffer. The g_use_aligner_at_first_node can be useful to reduce the +-- required circular buffer size just enough, such that the next power of two +-- is only a feq blocks larger, instead of almost a factor two larger. This +-- then may save a significant amount of block RAM. +-- +-- In case of a chain of aligners then the circular buffer size depends on +-- the latency of local input. The most remote input will only use a +-- fraction of the buffer. Therefore more block RAM can be saved by using +-- a smaller circular buffer size for signal inputs that are from more +-- remote (i.e. that have passed through more upstream aligners). +-- -- Features: -- . The g_block_size <= block period, so supports input blocks arriving -- with or without data valid gaps -- . uses replacement data to replace lost input blocks and channel bit 0 as -- lost_data flag -- . uses replacement data to replace disabled input streams --- . output block can be read in arbitrary order via g_use_mm_output = TRUE --- . output block can be streamed via g_use_mm_output = FALSE +-- . output block can be read in arbitrary order via g_use_mm_output = true +-- . output block can be streamed via g_use_mm_output = false +-- +-- Parameters: +-- . g_nof_streams: number of input and output streams. Stream index 0 is +-- the local stream. Streams index > 0 is for remote streams. The +-- remote streams arrive later than the local stream, but within +-- g_bsn_latency_max or within an integer multiple of g_bsn_latency_max. +-- . g_bsn_latency_max: maximum travel latency of a remote block in number +-- of block periods T_blk. +-- . g_nof_aligners_max: Number of dp_bsn_align_v2 aligners in a chain. +-- = 1 when only align at last node, or +-- > 1 when align at every intermediate node in a chain of nodes, and then +-- g_nof_aligners_max should equal the number of nodes for +-- chain_node_index range. The g_nof_aligners_max is the number of +-- nodes in the chain including the first node. +-- . g_use_aligner_at_first_node: when true use g_bsn_latency_max at first +-- node as well, else use c_bsn_latency_first_node = 0 for immediate +-- output from first node in a chain of nodes. Only used when +-- g_nof_aligners_max > 1. The g_use_aligner_at_first_node setting only +-- affects the latency along the chain, and therefore the required +-- size of the circular buffer. If the circular buffer is large enough +-- anyway, then the g_use_aligner_at_first_node setting is don't care, +-- assuming that a little extra latency is don't care. +-- +-- Inputs: +-- . chain_node_index: Node index in chain of nodes. First node has index 0. +-- In case of a ring of nodes the chain of nodes can range the whole ring, +-- or only a part of the ring. The number of nodes in the chain is given +-- by g_nof_aligners_max. Only used when g_nof_aligners_max > 1. +-- . stream_en_arr: when '1' then align corresponding input stream, else +-- replace data from corresponding inut stream by 0 and do not raise the +-- lost data flag. Whether a stream is enabled or not has no effect on the +-- aligner timing, it only sets the data to 0. +-- +-- Outputs: +-- . replace_cnt_en_arr: count number of lost data blocks per input stream, +-- that got replaced by 0 value, per sync interval. -- -- For more detailed description see: -- https://support.astron.nl/confluence/display/L2M/L6+FWLIB+Design+Document%3A+BSN+aligner+v2 @@ -57,22 +115,23 @@ use work.dp_stream_pkg.all; entity dp_bsn_align_v2 is generic ( g_nof_streams : natural := 2; -- >= 2, number of input and output streams - g_bsn_latency_max : natural := 2; -- maximum travel latency of a remote block in number of block periods T_blk - g_nof_aligners_max : positive := 16; -- 1 when only align at last node, > 1 when align at every intermediate node + g_bsn_latency_max : natural := 2; + g_use_aligner_at_first_node : boolean := true; + g_nof_aligners_max : positive := 16; g_block_size : natural := 1024; -- > 1, g_block_size=1 is not supported g_bsn_w : natural := c_dp_stream_bsn_w; -- number of bits in sosi BSN g_data_w : natural := 36; -- number of bits in sosi data g_data_replacement_value : integer := 0; -- output sosi data value for missing input blocks g_use_mm_output : boolean := false; -- output via MM or via streaming DP - g_pipeline_input : natural := 1; -- >= 0, choose 0 for wires, choose 1 to ease timing closure of in_sosi_arr - g_pipeline_output : natural := 1; -- >= 0, choose 0 for wires, choose 1 to ease timing closure of out_sosi_arr + g_pipeline_input : natural := 1; -- >= 0, 0 for wires, 1 to ease timing closure of in_sosi_arr + g_pipeline_output : natural := 1; -- >= 0, 0 for wires, 1 to ease timing closure of out_sosi_arr g_rd_latency : natural := 2 -- 1 or 2, choose 2 to ease timing closure ); port ( dp_rst : in std_logic; dp_clk : in std_logic; - node_index : in natural range 0 to g_nof_aligners_max - 1 := 0; -- only used when g_nof_aligners_max > 1 + chain_node_index : in natural range 0 to g_nof_aligners_max - 1 := 0; -- MM control stream_en_arr : in std_logic_vector(g_nof_streams - 1 downto 0) := (others => '1'); @@ -81,19 +140,25 @@ entity dp_bsn_align_v2 is -- Streaming input in_sosi_arr : in t_dp_sosi_arr(g_nof_streams - 1 downto 0); - -- Output via local MM interface in dp_clk domain, when g_use_mm_output = TRUE. - mm_sosi : out t_dp_sosi; -- streaming information that signals that an output block can be read - mm_copi : in t_mem_copi := c_mem_copi_rst; -- read access to output block, all output streams share same mm_copi + -- Output via local MM interface in dp_clk domain, when g_use_mm_output = true + -- . streaming information that signals that an output block can be read + mm_sosi : out t_dp_sosi; + -- . MM read access to output block, all output streams share same mm_copi + mm_copi : in t_mem_copi := c_mem_copi_rst; mm_cipo_arr : out t_mem_cipo_arr(g_nof_streams - 1 downto 0); - -- Output via streaming DP interface, when g_use_mm_output = FALSE. + -- Output via streaming DP interface, when g_use_mm_output = false. out_sosi_arr : out t_dp_sosi_arr(g_nof_streams - 1 downto 0) ); end dp_bsn_align_v2; architecture rtl of dp_bsn_align_v2 is + constant c_bsn_latency_first_node : natural := sel_a_b(g_use_aligner_at_first_node, g_bsn_latency_max, 0); + -- Circular buffer per stream, size is next power of 2 that fits - constant c_buffer_nof_blocks : natural := true_log_pow2(1 + g_nof_aligners_max * g_bsn_latency_max); + constant c_buffer_nof_blocks : natural := sel_a_b(g_nof_aligners_max = 1, + true_log_pow2(1 + g_bsn_latency_max), + true_log_pow2(1 + g_bsn_latency_max * (g_nof_aligners_max - 1) + c_bsn_latency_first_node)); constant c_ram_size : natural := c_buffer_nof_blocks * g_block_size; constant c_ram_buf : t_c_mem := (latency => 1, @@ -121,6 +186,7 @@ architecture rtl of dp_bsn_align_v2 is -- State type t_reg is record + ref_sosi : t_dp_sosi; -- p_write_arr wr_blk_pointer : natural; wr_copi_arr : t_mem_copi_arr(g_nof_streams - 1 downto 0); @@ -136,9 +202,12 @@ architecture rtl of dp_bsn_align_v2 is rd_blk_pointer : integer; -- use integer to detect need to wrap to natural rd_offset : std_logic_vector(c_ram_buf.adr_w - 1 downto 0); rd_copi : t_mem_copi; - fill_cipo_arr : t_mem_cipo_arr(g_nof_streams - 1 downto 0); -- used combinatorial to contain rd_cipo_arr from buffer or replacement data - out_bsn : std_logic_vector(g_bsn_w - 1 downto 0); -- hold BSN until next sop, for easy view in Wave window - out_channel_arr : t_channel_arr(g_nof_streams - 1 downto 0); -- hold channel until next sop per stream, for easy view in Wave window + fill_cipo_arr : t_mem_cipo_arr(g_nof_streams - 1 downto 0); -- used combinatorial to contain rd_cipo_arr + -- from buffer or replacement data + out_bsn : std_logic_vector(g_bsn_w - 1 downto 0); -- hold BSN until next sop, for easy view in Wave + -- window + out_channel_arr : t_channel_arr(g_nof_streams - 1 downto 0); -- hold channel until next sop per stream, for + -- easy view in Wave window replace_cnt_en_arr : std_logic_vector(g_nof_streams - 1 downto 0); end record; @@ -148,14 +217,14 @@ architecture rtl of dp_bsn_align_v2 is -- t_comb variable in p_comb, but then only the last assignment value will -- be visible via the signal dbg_wires in the Wave window. type t_comb is record - ref_sosi : t_dp_sosi; blk_pointer_slv : std_logic_vector(c_blk_pointer_w - 1 downto 0); product_slv : std_logic_vector(c_product_w - 1 downto 0); lost_data_flags_arr : std_logic_vector(g_nof_streams - 1 downto 0); out_sosi_arr : t_dp_sosi_arr(g_nof_streams - 1 downto 0); end record; - constant c_reg_rst : t_reg := (0, + constant c_reg_rst : t_reg := (c_dp_sosi_rst, + 0, (others => c_mem_copi_rst), (others => (others => '0')), (others => '0'), @@ -171,11 +240,10 @@ architecture rtl of dp_bsn_align_v2 is (others => (others => '0')), (others => '0')); - constant c_comb_rst : t_comb := (c_dp_sosi_rst, - (others => '0'), - (others => '0'), - (others => '0'), - (others => c_dp_sosi_rst)); + constant c_comb_rst : t_comb := ((others => '0'), + (others => '0'), + (others => '0'), + (others => c_dp_sosi_rst)); -- State registers for p_comb signal r : t_reg; @@ -199,7 +267,6 @@ architecture rtl of dp_bsn_align_v2 is signal comb_out_sosi_arr : t_dp_sosi_arr(g_nof_streams - 1 downto 0); -- Counter signals - signal replace_cnt_arr : t_slv_32_arr(g_nof_streams - 1 downto 0); signal nxt_hold_replace_cnt_arr : t_slv_32_arr(g_nof_streams - 1 downto 0); signal hold_replace_cnt_arr : t_slv_32_arr(g_nof_streams - 1 downto 0); @@ -230,7 +297,7 @@ begin end if; end process; - p_comb : process(r, in_sosi_arr_p, mm_copi, dp_copi, rd_cipo_arr, rd_sosi_arr, stream_en_arr, node_index) + p_comb : process(r, in_sosi_arr_p, mm_copi, dp_copi, rd_cipo_arr, rd_sosi_arr, stream_en_arr, chain_node_index) variable v : t_reg; -- State variable variable w : t_comb; -- Local wires = memoryless auxiliary variables begin @@ -264,26 +331,31 @@ begin end if; end loop; - ---------------------------------------------------------------------------- + --------------------------------------------------------------------------- -- p_control, all at sop of local reference input 0 - ---------------------------------------------------------------------------- - w.ref_sosi := in_sosi_arr_p(0); - if w.ref_sosi.sop = '1' then + --------------------------------------------------------------------------- + v.ref_sosi := in_sosi_arr_p(0); + -- Use r.ref_sosi.sop, that occurs one cycle after in_sosi_arr_p(I).sop, + -- to support immediate aligner output when g_use_aligner_at_first_node = + -- false. While the local block of chain_node_index = 0 is written into + -- the circular buffer, then it can already be read from the circular + -- buffer one dp_clk cycle later. + if v.ref_sosi.sop = '1' then -- . write sync & bsn buffer - v.wr_blk_pointer := TO_UINT(w.ref_sosi.bsn(c_blk_pointer_w - 1 downto 0)); - v.sync_arr(v.wr_blk_pointer) := w.ref_sosi.sync; - v.bsn_arr(v.wr_blk_pointer) := w.ref_sosi.bsn(g_bsn_w - 1 downto 0); + v.wr_blk_pointer := TO_UINT(v.ref_sosi.bsn(c_blk_pointer_w - 1 downto 0)); + v.sync_arr(v.wr_blk_pointer) := v.ref_sosi.sync; + v.bsn_arr(v.wr_blk_pointer) := v.ref_sosi.bsn(g_bsn_w - 1 downto 0); -- . update read block pointer at g_bsn_latency_max blocks behind the - -- reference write pointer, dependent on the node_index: - -- - for g_nof_aligners_max = 1 the node_index = 0 fixed - -- - for g_nof_aligners_max > 1, node_index is the first BSN aligner in - -- a chain. Each subsequent node in the chain then has to account - -- for g_bsn_latency_max additional block latency. + -- reference write pointer, dependent on the chain_node_index: + -- - for g_nof_aligners_max = 1 the chain_node_index = 0 fixed + -- - for g_nof_aligners_max > 1, chain_node_index = 0 is the first BSN + -- aligner in a chain. Each subsequent node in the chain then has to + -- account for g_bsn_latency_max additional block latency. if g_nof_aligners_max = 1 then v.rd_blk_pointer := v.wr_blk_pointer - g_bsn_latency_max; else - v.rd_blk_pointer := v.wr_blk_pointer - g_bsn_latency_max * (1 + node_index); + v.rd_blk_pointer := v.wr_blk_pointer - g_bsn_latency_max * chain_node_index - c_bsn_latency_first_node; end if; if v.rd_blk_pointer < 0 then v.rd_blk_pointer := v.rd_blk_pointer + c_buffer_nof_blocks; diff --git a/libraries/base/dp/src/vhdl/mmp_dp_bsn_align_v2.vhd b/libraries/base/dp/src/vhdl/mmp_dp_bsn_align_v2.vhd index 20456ab387..8159e19755 100644 --- a/libraries/base/dp/src/vhdl/mmp_dp_bsn_align_v2.vhd +++ b/libraries/base/dp/src/vhdl/mmp_dp_bsn_align_v2.vhd @@ -45,15 +45,17 @@ entity mmp_dp_bsn_align_v2 is generic ( -- for dp_bsn_align_v2 g_nof_streams : natural; -- number of input and output streams - g_bsn_latency_max : natural; -- Maximum travel latency of a remote block in number of block periods T_blk - g_nof_aligners_max : natural := 1; -- 1 when only align at last node, > 1 when align at every intermediate node + g_bsn_latency_max : natural; -- Maximum travel latency of a remote block in number of block periods + g_use_aligner_at_first_node : boolean := true; + g_nof_aligners_max : natural := 1; -- 1 when only align at last node, + -- > 1 when align at every intermediate node g_block_size : natural := 32; -- > 1, g_block_size=1 is not supported g_bsn_w : natural := c_dp_stream_bsn_w; -- number of bits in sosi BSN g_data_w : natural; -- number of bits in sosi data g_data_replacement_value : integer := 0; -- output sosi data value for missing input blocks g_use_mm_output : boolean := false; -- output via MM or via streaming DP - g_pipeline_input : natural := 1; -- >= 0, choose 0 for wires, choose 1 to ease timing closure of in_sosi_arr - g_pipeline_output : natural := 1; -- >= 0, choose 0 for wires, choose 1 to ease timing closure of out_sosi_arr + g_pipeline_input : natural := 1; -- >= 0, 0 for wires, 1 to ease timing closure of in_sosi_arr + g_pipeline_output : natural := 1; -- >= 0, 0 for wires, 1 to ease timing closure of out_sosi_arr g_rd_latency : natural := 2; -- 1 or 2, choose 2 to ease timing closure -- for mms_dp_bsn_monitor_v2 g_nof_clk_per_sync : natural := 200 * 10**6; @@ -78,17 +80,17 @@ entity mmp_dp_bsn_align_v2 is dp_rst : in std_logic; dp_clk : in std_logic; - node_index : in natural range 0 to g_nof_aligners_max - 1 := 0; -- only used when g_nof_aligners_max > 1 + chain_node_index : in natural range 0 to g_nof_aligners_max - 1 := 0; -- only used when g_nof_aligners_max > 1 -- Streaming input in_sosi_arr : in t_dp_sosi_arr(g_nof_streams - 1 downto 0); - -- Output via local MM interface in dp_clk domain, when g_use_mm_output = TRUE. + -- Output via local MM interface in dp_clk domain, when g_use_mm_output = true. mm_sosi : out t_dp_sosi; -- streaming information that signals that an output block can be read mm_copi : in t_mem_copi := c_mem_copi_rst; -- read access to output block, all output streams share same mm_copi mm_cipo_arr : out t_mem_cipo_arr(g_nof_streams - 1 downto 0); - -- Output via streaming DP interface, when g_use_mm_output = FALSE. + -- Output via streaming DP interface, when g_use_mm_output = false. out_sosi_arr : out t_dp_sosi_arr(g_nof_streams - 1 downto 0) ); end mmp_dp_bsn_align_v2; @@ -220,6 +222,7 @@ begin generic map ( g_nof_streams => g_nof_streams, g_bsn_latency_max => g_bsn_latency_max, + g_use_aligner_at_first_node => g_use_aligner_at_first_node, g_nof_aligners_max => g_nof_aligners_max, g_block_size => g_block_size, g_bsn_w => g_bsn_w, @@ -233,7 +236,7 @@ begin port map ( dp_rst => dp_rst, dp_clk => dp_clk, - node_index => node_index, + chain_node_index => chain_node_index, -- MM control stream_en_arr => stream_en_arr, stream_replaced_cnt_arr => stream_replaced_cnt_arr, diff --git a/libraries/base/dp/tb/vhdl/tb_dp_bsn_align_v2.vhd b/libraries/base/dp/tb/vhdl/tb_dp_bsn_align_v2.vhd index c6db289f9e..de380eb9a9 100644 --- a/libraries/base/dp/tb/vhdl/tb_dp_bsn_align_v2.vhd +++ b/libraries/base/dp/tb/vhdl/tb_dp_bsn_align_v2.vhd @@ -34,7 +34,7 @@ -- . g_lost_bsn_id to loose a single block in stream 1 and verify that -- it gets replaced and flagged. -- . array of one or more BSN aligners via g_nof_aligners_max >= 1, --- using node_index_arr, only support tb for g_use_mm_output = FALSE +-- using chain_node_index_arr, only support tb for g_use_mm_output = false -- Remark: -- For this BSN aligner component it was essential to have an almost -- complete, reviewed, detailed design document, because it is a complex @@ -141,6 +141,9 @@ architecture tb of tb_dp_bsn_align_v2 is constant c_lost_bsn_stream_id : natural := sel_a_b(g_nof_streams > 1, 1, 0); -- fixed use stream 1 to verify g_lost_bsn_id. Use 0 for g_nof_streams = 1. + -- In tb no support (yet) for immediate aligned output at first node, when c_nof_aligners_max > 1 + constant c_use_aligner_at_first_node : boolean := true; + -- In the tb only support MM interface verification for c_nof_aligners_max = 1 constant c_nof_aligners_max : positive := sel_a_b(g_use_mm_output, 1, g_nof_aligners_max); @@ -181,7 +184,7 @@ architecture tb of tb_dp_bsn_align_v2 is signal rst : std_logic := '1'; signal sl1 : std_logic := '1'; - signal node_index_arr : t_nat_natural_arr(0 to c_nof_aligners_max - 1) := array_init(0, c_nof_aligners_max, 1); + signal chain_node_index_arr : t_nat_natural_arr(0 to c_nof_aligners_max - 1) := array_init(0, c_nof_aligners_max, 1); signal stream_en_arr : std_logic_vector(g_nof_streams - 1 downto 0) := (others => '1'); -- default all streams are enabled signal stream_lost_arr : std_logic_vector(g_nof_streams - 1 downto 0) := (others => '0'); -- default no streams are lost @@ -501,6 +504,7 @@ begin generic map ( g_nof_streams => g_nof_streams, g_bsn_latency_max => g_bsn_latency_max, + g_use_aligner_at_first_node => c_use_aligner_at_first_node, g_nof_aligners_max => c_nof_aligners_max, g_block_size => g_block_size, g_bsn_w => g_bsn_w, @@ -515,8 +519,8 @@ begin dp_rst => rst, dp_clk => clk, -- Control - node_index => node_index_arr(0), - stream_en_arr => stream_en_arr, + chain_node_index => chain_node_index_arr(0), + stream_en_arr => stream_en_arr, -- Streaming input in_sosi_arr => dut_in_sosi_2arr(0), -- Output via local MM interface in dp_clk domain @@ -567,8 +571,8 @@ begin dp_rst => rst, dp_clk => clk, -- Control - node_index => node_index_arr(I), - stream_en_arr => stream_en_arr, + chain_node_index => chain_node_index_arr(I), + stream_en_arr => stream_en_arr, -- Streaming input in_sosi_arr => dut_in_sosi_2arr(I), -- Output via streaming DP interface diff --git a/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd b/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd index 78ff110792..6360636150 100644 --- a/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd +++ b/libraries/base/dp/tb/vhdl/tb_mmp_dp_bsn_align_v2.vhd @@ -103,8 +103,10 @@ architecture tb of tb_mmp_dp_bsn_align_v2 is constant c_gap_size : natural := c_block_period - c_block_size; -- DUT latency + constant c_ref_sosi_latency : natural := 0; constant c_mm_to_dp_latency : natural := 1; - constant c_dut_latency : natural := c_pipeline_input + c_rd_latency + c_mm_to_dp_latency + c_pipeline_output; + constant c_dut_latency : natural := c_pipeline_input + c_ref_sosi_latency + + c_rd_latency + c_mm_to_dp_latency + c_pipeline_output; constant c_align_latency_nof_blocks : natural := c_bsn_latency_max * c_nof_aligners_max; -- in number blocks constant c_align_latency_nof_valid : natural := c_bsn_latency_max * c_nof_aligners_max * c_block_size; -- in number of data samples @@ -147,7 +149,7 @@ architecture tb of tb_mmp_dp_bsn_align_v2 is signal dp_clk : std_logic := '1'; signal dp_rst : std_logic := '1'; - signal node_index : natural := 0; + signal chain_node_index : natural := 0; signal ref_siso_arr : t_dp_siso_arr(c_nof_streams - 1 downto 0) := (others => c_dp_siso_rdy); signal ref_sosi_arr : t_dp_sosi_arr(c_nof_streams - 1 downto 0); -- generated stimuli signal in_sosi_arr : t_dp_sosi_arr(c_nof_streams - 1 downto 0) := (others => c_dp_sosi_rst); -- input stimuli @@ -456,7 +458,7 @@ begin dp_rst => dp_rst, dp_clk => dp_clk, - node_index => node_index, + chain_node_index => chain_node_index, -- Streaming input in_sosi_arr => in_sosi_arr, -- Output via local MM in dp_clk domain -- GitLab