Eric Kooistra
--- a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd

+ 43

− 39
+++ b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd

+ 43

− 39
 @@ -20,36 +20,36 @@
 @@ -20,36 +20,36 @@
 -- Purpose :
 --   Align frames from multiple input streams
 -- Description:
--   The aligner uses a circular buffer to capture the blocks that arrive at
+--   Aligner:
--   the input streams. The blocks have a block sequence number (BSN) that
+--   . The aligner uses a circular buffer to capture the blocks that arrive at
--   is used to align the inputs. The input stream 0 is treated as local
+--     the input streams. The blocks have a block sequence number (BSN) that
--   input stream that is ahead of the other remote input streams. After a
+--     is used to align the inputs. The input stream 0 is treated as local
--   certain number of blocks on input 0, the same block on all remote
+--     input stream that is ahead of the other remote input streams. After a
--   inputs should also have arrived. If not then they are replaced by
+--     certain number of blocks on input 0, the same block on all remote
--   replacement data. The output streams are paced by the block rate of
+--     inputs should also have arrived. If not then they are replaced by
--   input 0. The user has to read the block within the block period.
+--     replacement data. The output streams are paced by the block rate of
+--     input 0. The user has to read the block within the block period.
+--   . The aligner can align g_nof_streams that all arrive within a latency
+--     of g_bsn_latency_max after the local stream at index 0. The aligner
+--     can also be used in a chain of aligners, whereby each aligner typically
+--     has the local input and one remote input and the remote input is the
+--     output of an upstream aligner. Then the latency on the last node in
+--     the chain will be within g_nof_aligners_max * g_bsn_latency_max.
 --
--   The aligner can align g_nof_streams that all arrive within a latency
+--   Circular buffer:
--   of g_bsn_latency_max after the local stream at index 0. The aligner
+--   . The size of the circular buffer is c_buffer_nof_blocks and depends on
--   can also be used in a chain of aligners, whereby each aligner typically
+--     the maximum latency. The c_buffer_nof_blocks has to a power of two to
--   has the local input and one remote input and the remote input is the
+--     ease the control of the circular buffer. The lowest bits of the input
--   output of an upstream aligner. Then the latency on the last node in
+--     block sequence number (BSN) are used as write block index into the
--   the chain will be within g_nof_aligners_max * g_bsn_latency_max.
+--     circular buffer. The g_bsn_latency_first_node can be useful to reduce
--
+--     the required circular buffer size just enough, such that the next power
--   The size of the circular buffer is c_buffer_nof_blocks and depends on the
+--     of two is only a few blocks larger, instead of almost a factor two
--   maximum latency. The c_buffer_nof_blocks has to a power of two to ease
+--     larger. This then can save a significant amount of block RAM.
--   the control of the circular buffer. The lowest bits of the input block
+--   . In case of a chain of aligners then the circular buffer size depends on
--   sequence number (BSN) are used as write block index into the circular
+--     the latency of local input. The most remote input will only use a
--   buffer. The g_bsn_latency_first_node can be useful to reduce the
+--     fraction of the buffer. Therefore more block RAM can be saved by using
--   required circular buffer size just enough, such that the next power of two
+--     a smaller circular buffer size for signal inputs that are from more
--   is only a feq blocks larger, instead of almost a factor two larger. This
+--     remote (i.e. that have passed through more upstream aligners).
--   then may save a significant amount of block RAM.
--
--   In case of a chain of aligners then the circular buffer size depends on
--   the latency of local input. The most remote input will only use a
--   fraction of the buffer. Therefore more block RAM can be saved by using
--   a smaller circular buffer size for signal inputs that are from more
--   remote (i.e. that have passed through more upstream aligners).
 --
 --   Features:
 --   . The g_block_size <= block period, so supports input blocks arriving
 @@ -213,7 +213,7 @@ architecture rtl of dp_bsn_align_v2 is
 @@ -213,7 +213,7 @@ architecture rtl of dp_bsn_align_v2 is
  -- . For unique representation as signal wire, the p_comb should assign each
  --   field in t_comb only once to a variable. It is allowed to reasign a
  --   t_comb variable in p_comb, but then only the last assignment value will
-  --   be visible via the signal dbg_wires in the Wave window.
+  --   be visible via the signal w_comb in the Wave window.
  type t_comb is record
    blk_pointer_slv     : std_logic_vector(c_blk_pointer_w - 1 downto 0);
    product_slv         : std_logic_vector(c_product_w - 1 downto 0);
 @@ -248,7 +248,7 @@ architecture rtl of dp_bsn_align_v2 is
 @@ -248,7 +248,7 @@ architecture rtl of dp_bsn_align_v2 is
  signal nxt_r             : t_reg;
  -- Memoryless signals in p_comb (wires used as local variables)
-  signal dbg_wires         : t_comb;
+  signal w_comb            : t_comb;
  -- Structural signals (wires used to connect components and IO)
  signal dp_done           : std_logic;
 @@ -333,11 +333,11 @@ begin
 @@ -333,11 +333,11 @@ begin
    -- p_control, all at sop of local reference input 0
    ---------------------------------------------------------------------------
    v.ref_sosi := in_sosi_arr_p(0);
-    -- Use r.ref_sosi.sop, that occurs one cycle after in_sosi_arr_p(I).sop,
+    -- Use v.ref_sosi.sop instead of r.ref_sosi.sop, to support alignment of
-    -- to support immediate aligner output when g_use_aligner_at_first_node =
+    -- streams that have no data valid gap between blocks, so when
-    -- false. While the local block of chain_node_index = 0 is written into
+    -- g_block_size is equal to the block period or when shorter blocks have
-    -- the circular buffer, then it can already be read from the circular
+    -- jitter in arrival time that could cause two blocks to arrive without a
-    -- buffer one dp_clk cycle later.
+    -- gap.
    if v.ref_sosi.sop = '1' then
      -- . write sync & bsn buffer
      v.wr_blk_pointer := TO_UINT(v.ref_sosi.bsn(c_blk_pointer_w - 1 downto 0));
 @@ -365,6 +365,10 @@ begin
 @@ -365,6 +365,10 @@ begin
      v.rd_offset := RESIZE_UVEC(w.product_slv, c_ram_buf.adr_w);
      -- . issue mm_sosi, if there is output ready to be read, indicated by filled reference block
+      --   - can use 'if r.filled_arr(0)' instead of 'if v.filled_arr(0)',
+      --     because input stream 0 arrives first, so is already filled
+      --   - need to use 'not v.filled_arr(I)' for w.lost_data_flags_arr(I),
+      --     because last input I = g_nof_streams - 1 may just got filled.
      if r.filled_arr(0)(v.rd_blk_pointer) = '1' then
        v.mm_sosi.sop := '1';
        v.mm_sosi.eop := '1';
 @@ -413,7 +417,7 @@ begin
 @@ -413,7 +417,7 @@ begin
      -- Do the output via the MM interface
      --------------------------------------------------------------------------
      -- . adjust the rd address to the current buffer output block
-      --   sum yields c_ram_buf.adr_w bits, because left operand in ADD_UVECdetermines width
+      --   sum yields c_ram_buf.adr_w bits, because left operand in ADD_UVEC determines width
      v.rd_copi := mm_copi;
      v.rd_copi.address := RESIZE_MEM_ADDRESS(ADD_UVEC(r.rd_offset, mm_copi.address));
 @@ -427,7 +431,7 @@ begin
 @@ -427,7 +431,7 @@ begin
      -- Do the output via the DP streaming interface
      --------------------------------------------------------------------------
      -- . adjust the rd address
-      --   sum yields c_ram_buf.adr_w bits, because left operand in ADD_UVECdetermines width
+      --   sum yields c_ram_buf.adr_w bits, because left operand in ADD_UVEC determines width
      v.rd_copi := dp_copi;
      v.rd_copi.address := RESIZE_MEM_ADDRESS(ADD_UVEC(r.rd_offset, dp_copi.address));
 @@ -475,7 +479,7 @@ begin
 @@ -475,7 +479,7 @@ begin
    nxt_r <= v;
    -- local wires, only for view in wave window
-    dbg_wires <= w;
+    w_comb <= w;
  end process;
  ------------------------------------------------------------------------------