From efa4129e20655bfcb00ff44fc601940371748c4a Mon Sep 17 00:00:00 2001
From: Eric Kooistra <kooistra@astron.nl>
Date: Thu, 23 Sep 2021 17:06:15 +0200
Subject: [PATCH] Corrected r.dp_sosi.

---
 .../base/dp/src/vhdl/dp_bsn_align_v2.vhd      | 124 ++++++++++--------
 1 file changed, 72 insertions(+), 52 deletions(-)

diff --git a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
index d8e6bdc316..051a6533af 100644
--- a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
+++ b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd
@@ -26,11 +26,12 @@
 --   input stream that is ahead of the other remote input streams. After a
 --   certain number of blocks on input 0, the same block on all remote
 --   inputs should also have arrived. If not then they are replaced by
---   filler data. The output streams are paced by the block rate of input 0.
+--   replacement data. The output streams are paced by the block rate of input 0.
 --   The user has to read the block within the block period.
 --
 --   Features:
---   . uses filler flag and data to replace lost input blocks
+--   . uses lost_data flag and replacement data to replace lost input blocks
+--   . uses replacement data to replace disabled input streams
 --   . output block can be read in arbitrary order
 --
 --   For more detailed description see:
@@ -38,7 +39,7 @@
 --
 -- Remarks:
 -- . This dp_bsn_align_v2.vhd replaces the dp_bsn_align.vhd that was used in
---   APERTIF. Mian differences are that the old component uses FIFO buffers,
+--   APERTIF. Main differences are that the old component uses FIFO buffers,
 --   timeouts and states, and v2 does not, which makes v2 simpler and more
 --   robust.
 
@@ -54,13 +55,11 @@ ENTITY dp_bsn_align_v2 IS
   GENERIC (
     g_nof_streams                : NATURAL;           -- number of input and output streams
     g_bsn_latency_max            : NATURAL;           -- Maximum travel latency of a remote block in number of block periods T_blk
-    g_bsn_latency_use_node_index : BOOLEAN := FALSE;  -- FALSE for align at end node, TRUE for align at every intermediate node
-    g_node_index_max             : NATURAL := 31;     -- limit to functional 5 bit range, instead of full 31 bit NATURAL range
+    g_nof_aligners_max           : POSITIVE := 1;     -- 1 when only align at last node, > 1 when align at every intermediate node
     g_block_size                 : NATURAL := 32;     -- > 1, g_block_size=1 is not supported
-    g_buffer_nof_blocks          : NATURAL;           -- circular buffer size per input, choose ceil_pow2(1 + g_bsn_latency_max)
     g_bsn_w                      : NATURAL := c_dp_stream_bsn_w;  -- number of bits in sosi BSN
     g_data_w                     : NATURAL;           -- number of bits in sosi data
-    g_filler_value               : INTEGER := 0;      -- output sosi data value for missing input blocks
+    g_replacement_value          : INTEGER := 0;      -- output sosi data value for missing input blocks
     g_use_mm_output              : BOOLEAN := FALSE;  -- output via MM or via streaming DP
     g_pipeline_input             : NATURAL := 0;      -- >= 0, choose 0 for wires, choose 1 to ease timing closure
     g_rd_latency                 : NATURAL := 1       -- 1 or 2, choose 2 to ease timing closure
@@ -69,7 +68,7 @@ ENTITY dp_bsn_align_v2 IS
     dp_rst         : IN  STD_LOGIC;
     dp_clk         : IN  STD_LOGIC;
 
-    node_index     : IN  NATURAL RANGE 0 TO g_node_index_max := 0;  -- only used when g_bsn_latency_use_node_index is TRUE
+    node_index     : IN  NATURAL RANGE 0 TO g_nof_aligners_max := 0;  -- only used when g_nof_aligners_max > 1
 
     -- MM control
     stream_en_arr  : IN  STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0) := (OTHERS=>'1');
@@ -91,7 +90,9 @@ END dp_bsn_align_v2;
 ARCHITECTURE rtl OF dp_bsn_align_v2 IS
 
   -- Circular buffer per stream
-  CONSTANT c_ram_size       : NATURAL := g_buffer_nof_blocks * g_block_size;
+  CONSTANT c_buffer_nof_blocks : NATURAL :=  ceil_pow2(1 + g_nof_aligners_max * g_bsn_latency_max);
+
+  CONSTANT c_ram_size       : NATURAL := c_buffer_nof_blocks * g_block_size;
   CONSTANT c_ram_buf        : t_c_mem := (latency  => 1,
                                           adr_w    => ceil_log2(c_ram_size),
                                           dat_w    => g_data_w,
@@ -100,7 +101,7 @@ ARCHITECTURE rtl OF dp_bsn_align_v2 IS
 
   CONSTANT c_block_size_w   : NATURAL := ceil_log2(g_block_size);
   CONSTANT c_block_size_slv : STD_LOGIC_VECTOR(c_block_size_w-1 DOWNTO 0) := TO_UVEC(g_block_size, c_block_size_w);
-  CONSTANT c_blk_pointer_w  : NATURAL := ceil_log2(g_buffer_nof_blocks);
+  CONSTANT c_blk_pointer_w  : NATURAL := ceil_log2(c_buffer_nof_blocks);
 
   -- Use fixed slv width instead of using naturals for address calculation, to
   -- avoid that synthesis may infer a too larger multiplier
@@ -108,25 +109,26 @@ ARCHITECTURE rtl OF dp_bsn_align_v2 IS
 
   TYPE t_bsn_arr IS ARRAY (INTEGER RANGE <>) OF STD_LOGIC_VECTOR(g_bsn_w-1 DOWNTO 0);
   TYPE t_adr_arr IS ARRAY (INTEGER RANGE <>) OF STD_LOGIC_VECTOR(c_mem_ram.adr_w-1 DOWNTO 0);
-  TYPE t_filled_arr IS ARRAY (INTEGER RANGE <>) OF STD_LOGIC_VECTOR(g_buffer_nof_blocks-1 DOWNTO 0);
+  TYPE t_filled_arr IS ARRAY (INTEGER RANGE <>) OF STD_LOGIC_VECTOR(c_buffer_nof_blocks-1 DOWNTO 0);
 
   TYPE t_reg IS RECORD
     -- p_write_arr
-    wr_pointer        : NATURAL;
-    wr_copi_arr       : t_mem_copi_arr(g_nof_streams-1 DOWNTO 0);
+    wr_pointer           : NATURAL;
+    wr_copi_arr          : t_mem_copi_arr(g_nof_streams-1 DOWNTO 0);
     -- all streams
-    filled_arr        : t_filled_arr(g_nof_streams-1 DOWNTO 0);
-    use_filler_data   : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0);
+    filled_arr           : t_filled_arr(g_nof_streams-1 DOWNTO 0);
+    use_replacement_data : STD_LOGIC_VECTOR(g_nof_streams-1 DOWNTO 0);
     -- local reference
-    sync_arr          : STD_LOGIC_VECTOR(g_buffer_nof_blocks-1 DOWNTO 0);
-    bsn_arr           : t_bsn_arr(g_buffer_nof_blocks-1 DOWNTO 0);
-    mm_sosi           : t_dp_sosi;
+    sync_arr             : STD_LOGIC_VECTOR(c_buffer_nof_blocks-1 DOWNTO 0);
+    bsn_arr              : t_bsn_arr(c_buffer_nof_blocks-1 DOWNTO 0);
+    mm_sosi              : t_dp_sosi;
+    dp_sosi              : t_dp_sosi;
     -- p_read
-    rd_pointer        : INTEGER;  -- use integer to detect need to wrap to natural
-    rd_offset         : STD_LOGIC_VECTOR(c_mem_ram.adr_w-1 DOWNTO 0);
-    rd_copi           : t_mem_copi;
-    fill_cipo_arr     : t_mem_cipo_arr(g_nof_streams-1 DOWNTO 0);  -- used combinatorial to contain rd_cipo_arr from buffer or filler data
-    out_bsn           : STD_LOGIC_VECTOR(g_bsn_w-1 DOWNTO 0);  -- hold BSN for streaming output
+    rd_pointer           : INTEGER;  -- use integer to detect need to wrap to natural
+    rd_offset            : STD_LOGIC_VECTOR(c_mem_ram.adr_w-1 DOWNTO 0);
+    rd_copi              : t_mem_copi;
+    fill_cipo_arr        : t_mem_cipo_arr(g_nof_streams-1 DOWNTO 0);  -- used combinatorial to contain rd_cipo_arr from buffer or replacement data
+    out_bsn              : STD_LOGIC_VECTOR(g_bsn_w-1 DOWNTO 0);  -- hold BSN for streaming output
   END RECORD;
 
   CONSTANT c_reg_rst  : t_reg := (0,
@@ -136,6 +138,7 @@ ARCHITECTURE rtl OF dp_bsn_align_v2 IS
                                   (OTHERS=>'0'),
                                   (OTHERS=>(OTHERS=>'0')),
                                   c_dp_sosi_rst,
+                                  c_dp_sosi_rst,
                                   0,
                                   (OTHERS=>'0'),
                                   c_mem_copi_rst,
@@ -152,13 +155,26 @@ ARCHITECTURE rtl OF dp_bsn_align_v2 IS
   SIGNAL dp_copi       : t_mem_copi;
   SIGNAL dp_copi_arr   : t_mem_copi_arr(g_nof_streams-1 DOWNTO 0);
 
-  SIGNAL dp_sosi       : t_dp_sosi;
   SIGNAL rd_sosi_arr   : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
   SIGNAL rd_cipo_arr   : t_mem_cipo_arr(g_nof_streams-1 DOWNTO 0) := (OTHERS=>c_mem_cipo_rst);
 
   -- Pipeline registers
   SIGNAL in_sosi_arr_p : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
-  SIGNAL rd_copi_p     : t_mem_copi;
+  SIGNAL rd_copi       : t_mem_copi;
+
+  -- Debug signals
+  SIGNAL dbg_nof_streams         : NATURAL := g_nof_streams;
+  SIGNAL dbg_bsn_latency_max     : NATURAL := g_bsn_latency_max;
+  SIGNAL dbg_nof_aligners_max    : NATURAL := g_nof_aligners_max;
+  SIGNAL dbg_block_size          : NATURAL := g_block_size;
+  SIGNAL dbg_bsn_w               : NATURAL := g_bsn_w;
+  SIGNAL dbg_data_w              : NATURAL := g_data_w;
+  SIGNAL dbg_replacement_value   : INTEGER := g_replacement_value;
+  SIGNAL dbg_use_mm_output       : BOOLEAN := g_use_mm_output;
+  SIGNAL dbg_pipeline_input      : NATURAL := g_pipeline_input;
+  SIGNAL dbg_rd_latency          : NATURAL := g_rd_latency;
+  SIGNAL dbg_c_buffer_nof_blocks : NATURAL := c_buffer_nof_blocks;
+  SIGNAL dbg_c_product_w         : NATURAL := c_product_w;
 
 BEGIN
 
@@ -173,14 +189,14 @@ BEGIN
     END IF;
   END PROCESS;
   
-  p_comb : PROCESS(r, in_sosi_arr_p, mm_copi, rd_cipo_arr, rd_sosi_arr)
+  p_comb : PROCESS(r, in_sosi_arr_p, mm_copi, dp_copi, rd_cipo_arr, rd_sosi_arr)
     -- State variable
     VARIABLE v : t_reg;
     -- Auxiliary variables / local wires / no memory
     VARIABLE v_ref_sosi          : t_dp_sosi;
     VARIABLE v_pointer_slv       : STD_LOGIC_VECTOR(c_blk_pointer_w-1 DOWNTO 0);
     VARIABLE v_product_slv       : STD_LOGIC_VECTOR(c_product_w-1 DOWNTO 0);
-    VARIABLE v_filler_flag       : STD_LOGIC;
+    VARIABLE v_lost_data_flag    : STD_LOGIC;
     VARIABLE v_out_sosi_arr      : t_dp_sosi_arr(g_nof_streams-1 DOWNTO 0);
   BEGIN
     v := r;
@@ -221,13 +237,13 @@ BEGIN
       v.bsn_arr(v.wr_pointer) := v_ref_sosi.bsn(g_bsn_w-1 DOWNTO 0);
 
       -- . update read block pointer at g_bsn_latency_max blocks behind the reference write pointer
-      IF g_bsn_latency_use_node_index = FALSE THEN
+      IF g_nof_aligners_max = 1 THEN
         v.rd_pointer := v.wr_pointer - g_bsn_latency_max;
       ELSE
         v.rd_pointer := v.wr_pointer - g_bsn_latency_max * node_index;
       END IF;
       IF v.rd_pointer < 0 THEN
-        v.rd_pointer := v.rd_pointer + g_buffer_nof_blocks;
+        v.rd_pointer := v.rd_pointer + c_buffer_nof_blocks;
       END IF;
 
       -- . update read address of read block pointer
@@ -243,16 +259,16 @@ BEGIN
         -- . pass on timestamp information
         v.mm_sosi.sync := v.sync_arr(v.rd_pointer);
         v.mm_sosi.bsn := RESIZE_DP_BSN(v.bsn_arr(v.rd_pointer));
-        -- . pass on filled flags for enabled streams via channel field, and
-        --   determine whether the ouput has to insert filler data
+        -- . pass on lost data flags for enabled streams via channel field, and
+        --   determine whether the ouput has to insert replacement data
         v.mm_sosi.channel := (OTHERS=>'0');
         FOR I IN 0 TO g_nof_streams-1 LOOP
-          v_filler_flag := NOT v.filled_arr(I)(v.rd_pointer);
+          v_lost_data_flag := NOT v.filled_arr(I)(v.rd_pointer);
           IF stream_en_arr(I) = '1' THEN  -- use MM bit at sop
-            v.use_filler_data(I) := v_filler_flag;  -- enabled stream
-            v.mm_sosi.channel(I) := v_filler_flag;
+            v.use_replacement_data(I) := v_lost_data_flag;  -- enabled stream, so replace if data was lost
+            v.mm_sosi.channel(I) := v_lost_data_flag;
           ELSE
-            v.use_filler_data(I) := '1';  -- disabled stream
+            v.use_replacement_data(I) := '1';  -- disabled stream, so replace data
           END IF;
         END LOOP;
       END IF;
@@ -267,13 +283,13 @@ BEGIN
     -- p_read
     ----------------------------------------------------------------------------
 
-    -- Read the data from the buffer, or replace a block by filler data
+    -- Read the data from the buffer, or replace a block by replacement data
     -- . default use input data from the circular buffer
     v.fill_cipo_arr := rd_cipo_arr;
-    -- . if necessary, replace a stream by filler data
+    -- . if necessary, replace a stream by replacement data
     FOR I IN 0 TO g_nof_streams-1 LOOP
-      IF r.use_filler_data(I) = '1' THEN
-        v.fill_cipo_arr(I).rddata := TO_MEM_SDATA(g_filler_value);
+      IF r.use_replacement_data(I) = '1' THEN
+        v.fill_cipo_arr(I).rddata := TO_MEM_SDATA(g_replacement_value);
       END IF;
     END LOOP;
 
@@ -282,8 +298,9 @@ BEGIN
       -- Do the output via the MM interface
       --------------------------------------------------------------------------
       -- . adjust the rd address to the current buffer output block
+      --   sum yields c_mem_ram.adr_w bits, because left operand in ADD_UVECdetermines width
       v.rd_copi := mm_copi;
-      v.rd_copi.address := RESIZE_MEM_ADDRESS(ADD_UVEC(r.rd_offset, mm_copi.address));  -- sum yields c_mem_ram.adr_w bits, because left operand determines width
+      v.rd_copi.address := RESIZE_MEM_ADDRESS(ADD_UVEC(r.rd_offset, mm_copi.address));
 
       -- . output via MM interface
       mm_cipo_arr <= v.fill_cipo_arr;
@@ -292,20 +309,22 @@ BEGIN
       -- Do the output via the DP streaming interface
       --------------------------------------------------------------------------
       -- . adjust the rd address
+      --   sum yields c_mem_ram.adr_w bits, because left operand in ADD_UVECdetermines width
       v.rd_copi := dp_copi;
-      v.rd_copi.address := RESIZE_MEM_ADDRESS(ADD_UVEC(r.rd_offset, dp_copi.address));  -- sum yields c_mem_ram.adr_w bits, because left operand determines width
+      v.rd_copi.address := RESIZE_MEM_ADDRESS(ADD_UVEC(r.rd_offset, dp_copi.address));
 
       -- . hold mm_sosi.sync, bsn
       IF r.mm_sosi.sop = '1' THEN
-        dp_sosi <= r.mm_sosi;
+        v.dp_sosi := r.mm_sosi;
       END IF;
 
       -- apply mm_sosi.sync and bsn at sop to all streams in out_sosi_arr
-      v_out_sosi_arr := rd_sosi_arr;  -- the input data from the buffer or filler data (= v.fill_cipo_arr in streaming format)
+      v_out_sosi_arr := rd_sosi_arr;  -- = v.fill_cipo_arr in streaming format, contains the
+                                      -- input data from the buffer or replacement data
       IF rd_sosi_arr(0).sop = '1' THEN
-        v_out_sosi_arr := func_dp_stream_arr_set(v_out_sosi_arr, dp_sosi.sync, "SYNC");
-        v_out_sosi_arr := func_dp_stream_arr_set(v_out_sosi_arr, dp_sosi.bsn, "BSN");
-        v.out_bsn := dp_sosi.bsn(g_bsn_w-1 DOWNTO 0);  -- hold BSN until next sop, to ease view in wave window
+        v_out_sosi_arr := func_dp_stream_arr_set(v_out_sosi_arr, r.dp_sosi.sync, "SYNC");
+        v_out_sosi_arr := func_dp_stream_arr_set(v_out_sosi_arr, r.dp_sosi.bsn, "BSN");
+        v.out_bsn := r.dp_sosi.bsn(g_bsn_w-1 DOWNTO 0);  -- hold BSN until next sop, to ease view in wave window
       ELSE
         -- hold BSN until next sop, to ease view in wave window
         v_out_sosi_arr := func_dp_stream_arr_set(v_out_sosi_arr, r.out_bsn, "BSN");
@@ -336,8 +355,8 @@ BEGIN
       wr_en     => r.wr_copi_arr(I).wr,
       wr_adr    => r.wr_copi_arr(I).address(c_ram_buf.adr_w-1 DOWNTO 0),
       wr_dat    => r.wr_copi_arr(I).wrdata(c_ram_buf.dat_w-1 DOWNTO 0),
-      rd_en     => rd_copi_p.rd,
-      rd_adr    => rd_copi_p.address(c_ram_buf.adr_w-1 DOWNTO 0),
+      rd_en     => rd_copi.rd,
+      rd_adr    => rd_copi.address(c_ram_buf.adr_w-1 DOWNTO 0),
       rd_dat    => rd_cipo_arr(I).rddata(c_ram_buf.dat_w-1 DOWNTO 0),
       rd_val    => rd_cipo_arr(I).rdval
     );
@@ -348,9 +367,6 @@ BEGIN
   ------------------------------------------------------------------------------
 
   gen_streaming_output : IF NOT g_use_mm_output GENERATE
-    dp_copi <= dp_copi_arr(0);
-    dp_done <= dp_done_arr(0);   -- for viewing only
-
     gen_mm_to_dp : FOR I IN 0 TO g_nof_streams-1 GENERATE
       u_mm_to_dp: ENTITY work.dp_block_from_mm
       GENERIC MAP (
@@ -373,6 +389,10 @@ BEGIN
         out_siso      => c_dp_siso_rdy
       );
     END GENERATE;
+
+    -- Use dp_copi_arr(0) to read same addresses in parallel for all streams
+    dp_copi <= dp_copi_arr(0);
+    dp_done <= dp_done_arr(0);   -- for viewing only
   END GENERATE;
 
 
@@ -396,6 +416,6 @@ BEGIN
   );
 
   -- . read RAM
-  rd_copi_p <= nxt_r.rd_copi WHEN g_rd_latency = 1 ELSE r.rd_copi;
+  rd_copi <= nxt_r.rd_copi WHEN g_rd_latency = 1 ELSE r.rd_copi;
 
 END rtl;
-- 
GitLab