diff --git a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd index d4242feaebc959a9d0e792e2416a2f3cb1ec3c88..27447448cb25d3e196dfb262c3f0f332e86be09e 100644 --- a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd +++ b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd @@ -20,36 +20,36 @@ -- Purpose : -- Align frames from multiple input streams -- Description: --- The aligner uses a circular buffer to capture the blocks that arrive at --- the input streams. The blocks have a block sequence number (BSN) that --- is used to align the inputs. The input stream 0 is treated as local --- input stream that is ahead of the other remote input streams. After a --- certain number of blocks on input 0, the same block on all remote --- inputs should also have arrived. If not then they are replaced by --- replacement data. The output streams are paced by the block rate of --- input 0. The user has to read the block within the block period. +-- Aligner: +-- . The aligner uses a circular buffer to capture the blocks that arrive at +-- the input streams. The blocks have a block sequence number (BSN) that +-- is used to align the inputs. The input stream 0 is treated as local +-- input stream that is ahead of the other remote input streams. After a +-- certain number of blocks on input 0, the same block on all remote +-- inputs should also have arrived. If not then they are replaced by +-- replacement data. The output streams are paced by the block rate of +-- input 0. The user has to read the block within the block period. +-- . The aligner can align g_nof_streams that all arrive within a latency +-- of g_bsn_latency_max after the local stream at index 0. The aligner +-- can also be used in a chain of aligners, whereby each aligner typically +-- has the local input and one remote input and the remote input is the +-- output of an upstream aligner. Then the latency on the last node in +-- the chain will be within g_nof_aligners_max * g_bsn_latency_max. -- --- The aligner can align g_nof_streams that all arrive within a latency --- of g_bsn_latency_max after the local stream at index 0. The aligner --- can also be used in a chain of aligners, whereby each aligner typically --- has the local input and one remote input and the remote input is the --- output of an upstream aligner. Then the latency on the last node in --- the chain will be within g_nof_aligners_max * g_bsn_latency_max. --- --- The size of the circular buffer is c_buffer_nof_blocks and depends on the --- maximum latency. The c_buffer_nof_blocks has to a power of two to ease --- the control of the circular buffer. The lowest bits of the input block --- sequence number (BSN) are used as write block index into the circular --- buffer. The g_bsn_latency_first_node can be useful to reduce the --- required circular buffer size just enough, such that the next power of two --- is only a feq blocks larger, instead of almost a factor two larger. This --- then may save a significant amount of block RAM. --- --- In case of a chain of aligners then the circular buffer size depends on --- the latency of local input. The most remote input will only use a --- fraction of the buffer. Therefore more block RAM can be saved by using --- a smaller circular buffer size for signal inputs that are from more --- remote (i.e. that have passed through more upstream aligners). +-- Circular buffer: +-- . The size of the circular buffer is c_buffer_nof_blocks and depends on +-- the maximum latency. The c_buffer_nof_blocks has to a power of two to +-- ease the control of the circular buffer. The lowest bits of the input +-- block sequence number (BSN) are used as write block index into the +-- circular buffer. The g_bsn_latency_first_node can be useful to reduce +-- the required circular buffer size just enough, such that the next power +-- of two is only a few blocks larger, instead of almost a factor two +-- larger. This then can save a significant amount of block RAM. +-- . In case of a chain of aligners then the circular buffer size depends on +-- the latency of local input. The most remote input will only use a +-- fraction of the buffer. Therefore more block RAM can be saved by using +-- a smaller circular buffer size for signal inputs that are from more +-- remote (i.e. that have passed through more upstream aligners). -- -- Features: -- . The g_block_size <= block period, so supports input blocks arriving @@ -213,7 +213,7 @@ architecture rtl of dp_bsn_align_v2 is -- . For unique representation as signal wire, the p_comb should assign each -- field in t_comb only once to a variable. It is allowed to reasign a -- t_comb variable in p_comb, but then only the last assignment value will - -- be visible via the signal dbg_wires in the Wave window. + -- be visible via the signal w_comb in the Wave window. type t_comb is record blk_pointer_slv : std_logic_vector(c_blk_pointer_w - 1 downto 0); product_slv : std_logic_vector(c_product_w - 1 downto 0); @@ -248,7 +248,7 @@ architecture rtl of dp_bsn_align_v2 is signal nxt_r : t_reg; -- Memoryless signals in p_comb (wires used as local variables) - signal dbg_wires : t_comb; + signal w_comb : t_comb; -- Structural signals (wires used to connect components and IO) signal dp_done : std_logic; @@ -333,11 +333,11 @@ begin -- p_control, all at sop of local reference input 0 --------------------------------------------------------------------------- v.ref_sosi := in_sosi_arr_p(0); - -- Use r.ref_sosi.sop, that occurs one cycle after in_sosi_arr_p(I).sop, - -- to support immediate aligner output when g_use_aligner_at_first_node = - -- false. While the local block of chain_node_index = 0 is written into - -- the circular buffer, then it can already be read from the circular - -- buffer one dp_clk cycle later. + -- Use v.ref_sosi.sop instead of r.ref_sosi.sop, to support alignment of + -- streams that have no data valid gap between blocks, so when + -- g_block_size is equal to the block period or when shorter blocks have + -- jitter in arrival time that could cause two blocks to arrive without a + -- gap. if v.ref_sosi.sop = '1' then -- . write sync & bsn buffer v.wr_blk_pointer := TO_UINT(v.ref_sosi.bsn(c_blk_pointer_w - 1 downto 0)); @@ -365,6 +365,10 @@ begin v.rd_offset := RESIZE_UVEC(w.product_slv, c_ram_buf.adr_w); -- . issue mm_sosi, if there is output ready to be read, indicated by filled reference block + -- - can use 'if r.filled_arr(0)' instead of 'if v.filled_arr(0)', + -- because input stream 0 arrives first, so is already filled + -- - need to use 'not v.filled_arr(I)' for w.lost_data_flags_arr(I), + -- because last input I = g_nof_streams - 1 may just got filled. if r.filled_arr(0)(v.rd_blk_pointer) = '1' then v.mm_sosi.sop := '1'; v.mm_sosi.eop := '1'; @@ -413,7 +417,7 @@ begin -- Do the output via the MM interface -------------------------------------------------------------------------- -- . adjust the rd address to the current buffer output block - -- sum yields c_ram_buf.adr_w bits, because left operand in ADD_UVECdetermines width + -- sum yields c_ram_buf.adr_w bits, because left operand in ADD_UVEC determines width v.rd_copi := mm_copi; v.rd_copi.address := RESIZE_MEM_ADDRESS(ADD_UVEC(r.rd_offset, mm_copi.address)); @@ -427,7 +431,7 @@ begin -- Do the output via the DP streaming interface -------------------------------------------------------------------------- -- . adjust the rd address - -- sum yields c_ram_buf.adr_w bits, because left operand in ADD_UVECdetermines width + -- sum yields c_ram_buf.adr_w bits, because left operand in ADD_UVEC determines width v.rd_copi := dp_copi; v.rd_copi.address := RESIZE_MEM_ADDRESS(ADD_UVEC(r.rd_offset, dp_copi.address)); @@ -475,7 +479,7 @@ begin nxt_r <= v; -- local wires, only for view in wave window - dbg_wires <= w; + w_comb <= w; end process; ------------------------------------------------------------------------------