From cbd034d0b4abce8a713b890fbdbde386e30baeaf Mon Sep 17 00:00:00 2001 From: Eric Kooistra <kooistra@astron.nl> Date: Thu, 14 Mar 2024 09:11:44 +0100 Subject: [PATCH] Add circular buffer size example with g_bsn_latency_first_node in description. --- .../base/dp/src/vhdl/dp_bsn_align_v2.vhd | 36 +++++++++++++------ 1 file changed, 25 insertions(+), 11 deletions(-) diff --git a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd index 27447448cb..61db3174e6 100644 --- a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd +++ b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd @@ -45,6 +45,13 @@ -- the required circular buffer size just enough, such that the next power -- of two is only a few blocks larger, instead of almost a factor two -- larger. This then can save a significant amount of block RAM. +-- For example: The circular buffer size c_buffer_nof_blocks is 1 + the +-- sum of bsn latencies at each node. Therefor if g_nof_aligners_max = 16 +-- (a power of two) and g_bsn_latency_max = 2, then the circular buffer +-- becomes true_log_pow2(1 + 16 * 2) = 64 blocks, so almost twice as large +-- as needed. If the first input stream does not have active remote input, +-- or is disabled via stream_en_arr, then choose g_bsn_latency_first_node +-- = 1, to get a buffer size of true_log_pow2(1 + 15 * 2 + 1) = 32 blocks. -- . In case of a chain of aligners then the circular buffer size depends on -- the latency of local input. The most remote input will only use a -- fraction of the buffer. Therefore more block RAM can be saved by using @@ -65,14 +72,8 @@ -- the local stream. Streams index > 0 is for remote streams. The -- remote streams arrive later than the local stream, but within -- g_bsn_latency_max or within an integer multiple of g_bsn_latency_max. --- . g_bsn_latency_max: maximum travel latency of a remote block in number --- of block periods T_blk. --- . g_nof_aligners_max: Number of dp_bsn_align_v2 aligners in a chain. --- = 1 when only align at last node, or --- > 1 when align at every intermediate node in a chain of nodes, and then --- g_nof_aligners_max should equal the number of nodes for --- chain_node_index range. The g_nof_aligners_max is the number of --- nodes in the chain including the first node. +-- . g_bsn_latency_max: >= 1, maximum travel latency of a remote block in +-- number of block periods T_blk. -- . g_bsn_latency_first_node: typically <= g_bsn_latency_max of the other -- nodes in a chain. Use g_bsn_latency_first_node = 0 for immediate -- output from first node in a chain of nodes. Only used when @@ -81,6 +82,12 @@ -- size of the circular buffer. If the circular buffer is large enough -- anyway, then the g_bsn_latency_first_node setting is don't care, -- assuming that a little extra latency is don't care. +-- . g_nof_aligners_max: Number of dp_bsn_align_v2 aligners in a chain. +-- = 1 when only align at last node, or +-- > 1 when align at every intermediate node in a chain of nodes, and then +-- g_nof_aligners_max should equal the number of nodes for +-- chain_node_index range. The g_nof_aligners_max is the number of +-- nodes in the chain including the first node. -- -- Inputs: -- . chain_node_index: Node index in chain of nodes. First node has index 0. @@ -104,6 +111,13 @@ -- APERTIF. Main differences are that the old component uses FIFO buffers, -- timeouts and states, and v2 does not, which makes v2 simpler and more -- robust. +-- . The g_bsn_latency_first_node = 0 should also be feasible, but does not +-- work and is not investigated further, because g_bsn_latency_first_node = +-- 1 in combination with g_bsn_latency_max = 2 is sufficient to reduce the +-- circular buffer size when g_nof_aligners_max is a power of two. +-- . Using a circular buffer with optimum size, that does not have to have a +-- power of two number of blocks, makes the circular buffer control and +-- access more complicated and is not investigated further. library IEEE,common_lib; use IEEE.std_logic_1164.all; @@ -115,7 +129,7 @@ use work.dp_stream_pkg.all; entity dp_bsn_align_v2 is generic ( g_nof_streams : natural := 2; -- >= 2, number of input and output streams - g_bsn_latency_max : natural := 2; + g_bsn_latency_max : natural := 2; -- >= 1 g_bsn_latency_first_node : natural := 2; -- default use same as g_bsn_latency_max g_nof_aligners_max : positive := 16; g_block_size : natural := 1024; -- > 1, g_block_size=1 is not supported @@ -153,7 +167,7 @@ entity dp_bsn_align_v2 is end dp_bsn_align_v2; architecture rtl of dp_bsn_align_v2 is - -- Circular buffer per stream, size is next power of 2 that fits + -- Circular buffer per stream, size is next power of two that fits constant c_buffer_nof_blocks : natural := sel_a_b(g_nof_aligners_max = 1, true_log_pow2(1 + g_bsn_latency_max), true_log_pow2(1 + g_bsn_latency_max * (g_nof_aligners_max - 1) + g_bsn_latency_first_node)); @@ -165,7 +179,7 @@ architecture rtl of dp_bsn_align_v2 is nof_dat => c_ram_size, init_sl => '0'); - -- Use +1 to ensure that g_block_size that is power of 2 also fits in c_block_size_slv + -- Use +1 to ensure that g_block_size that is power of two also fits in c_block_size_slv constant c_block_size_w : natural := ceil_log2(g_block_size + 1); constant c_block_size_slv : std_logic_vector(c_block_size_w - 1 downto 0) := TO_UVEC(g_block_size, c_block_size_w); constant c_blk_pointer_w : natural := ceil_log2(c_buffer_nof_blocks); -- GitLab