From cbd034d0b4abce8a713b890fbdbde386e30baeaf Mon Sep 17 00:00:00 2001
From: Eric Kooistra <kooistra@astron.nl>
Date: Thu, 14 Mar 2024 09:11:44 +0100
Subject: [PATCH] Add circular buffer size example with
 g_bsn_latency_first_node in description.

---
 .../base/dp/src/vhdl/dp_bsn_align_v2.vhd      | 36 +++++++++++++------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
index 27447448cb..61db3174e6 100644
--- a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
+++ b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
@@ -45,6 +45,13 @@
 --     the required circular buffer size just enough, such that the next power
 --     of two is only a few blocks larger, instead of almost a factor two
 --     larger. This then can save a significant amount of block RAM.
+--     For example: The circular buffer size c_buffer_nof_blocks is 1 + the
+--     sum of bsn latencies at each node. Therefor if g_nof_aligners_max = 16
+--     (a power of two) and g_bsn_latency_max = 2, then the circular buffer
+--     becomes true_log_pow2(1 + 16 * 2) = 64 blocks, so almost twice as large
+--     as needed. If the first input stream does not have active remote input,
+--     or is disabled via stream_en_arr, then choose g_bsn_latency_first_node
+--     = 1, to get a buffer size of true_log_pow2(1 + 15 * 2 + 1) = 32 blocks.
 --   . In case of a chain of aligners then the circular buffer size depends on
 --     the latency of local input. The most remote input will only use a
 --     fraction of the buffer. Therefore more block RAM can be saved by using
@@ -65,14 +72,8 @@
 --     the local stream. Streams index > 0 is for remote streams. The
 --     remote streams arrive later than the local stream, but within
 --     g_bsn_latency_max or within an integer multiple of g_bsn_latency_max.
---   . g_bsn_latency_max: maximum travel latency of a remote block in number
---     of block periods T_blk.
---   . g_nof_aligners_max: Number of dp_bsn_align_v2 aligners in a chain.
---     = 1 when only align at last node, or
---     > 1 when align at every intermediate node in a chain of nodes, and then
---         g_nof_aligners_max should equal the number of nodes for
---         chain_node_index range. The g_nof_aligners_max is the number of
---         nodes in the chain including the first node.
+--   . g_bsn_latency_max: >= 1, maximum travel latency of a remote block in
+--     number of block periods T_blk.
 --   . g_bsn_latency_first_node: typically <= g_bsn_latency_max of the other
 --     nodes in a chain. Use g_bsn_latency_first_node = 0 for immediate
 --     output from first node in a chain of nodes. Only used when
@@ -81,6 +82,12 @@
 --     size of the circular buffer. If the circular buffer is large enough
 --     anyway, then the g_bsn_latency_first_node setting is don't care,
 --     assuming that a little extra latency is don't care.
+--   . g_nof_aligners_max: Number of dp_bsn_align_v2 aligners in a chain.
+--     = 1 when only align at last node, or
+--     > 1 when align at every intermediate node in a chain of nodes, and then
+--         g_nof_aligners_max should equal the number of nodes for
+--         chain_node_index range. The g_nof_aligners_max is the number of
+--         nodes in the chain including the first node.
 --
 --   Inputs:
 --   . chain_node_index: Node index in chain of nodes. First node has index 0.
@@ -104,6 +111,13 @@
 --   APERTIF. Main differences are that the old component uses FIFO buffers,
 --   timeouts and states, and v2 does not, which makes v2 simpler and more
 --   robust.
+-- . The g_bsn_latency_first_node = 0 should also be feasible, but does not
+--   work and is not investigated further, because g_bsn_latency_first_node =
+--   1 in combination with g_bsn_latency_max = 2 is sufficient to reduce the
+--   circular buffer size when g_nof_aligners_max is a power of two.
+-- . Using a circular buffer with optimum size, that does not have to have a
+--   power of two number of blocks, makes the circular buffer control and
+--   access more complicated and is not investigated further.
 
 library IEEE,common_lib;
 use IEEE.std_logic_1164.all;
@@ -115,7 +129,7 @@ use work.dp_stream_pkg.all;
 entity dp_bsn_align_v2 is
   generic (
     g_nof_streams                : natural := 2;  -- >= 2, number of input and output streams
-    g_bsn_latency_max            : natural := 2;
+    g_bsn_latency_max            : natural := 2;  -- >= 1
     g_bsn_latency_first_node     : natural := 2;  -- default use same as g_bsn_latency_max
     g_nof_aligners_max           : positive := 16;
     g_block_size                 : natural := 1024;  -- > 1, g_block_size=1 is not supported
@@ -153,7 +167,7 @@ entity dp_bsn_align_v2 is
 end dp_bsn_align_v2;
 
 architecture rtl of dp_bsn_align_v2 is
-  -- Circular buffer per stream, size is next power of 2 that fits
+  -- Circular buffer per stream, size is next power of two that fits
   constant c_buffer_nof_blocks : natural := sel_a_b(g_nof_aligners_max = 1,
            true_log_pow2(1 + g_bsn_latency_max),
            true_log_pow2(1 + g_bsn_latency_max * (g_nof_aligners_max - 1) + g_bsn_latency_first_node));
@@ -165,7 +179,7 @@ architecture rtl of dp_bsn_align_v2 is
                                           nof_dat  => c_ram_size,
                                           init_sl  => '0');
 
-  -- Use +1 to ensure that g_block_size that is power of 2 also fits in c_block_size_slv
+  -- Use +1 to ensure that g_block_size that is power of two also fits in c_block_size_slv
   constant c_block_size_w   : natural := ceil_log2(g_block_size + 1);
   constant c_block_size_slv : std_logic_vector(c_block_size_w - 1 downto 0) := TO_UVEC(g_block_size, c_block_size_w);
   constant c_blk_pointer_w  : natural := ceil_log2(c_buffer_nof_blocks);
-- 
GitLab