diff --git a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd index b3ea7dc99f01d6c8054451961578a59f3327db7d..158b828fe417e7bad8f75a0bc9fbcc93c1573be4 100644 --- a/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd +++ b/libraries/base/dp/src/vhdl/dp_bsn_align_v2.vhd @@ -70,7 +70,7 @@ ENTITY dp_bsn_align_v2 IS -- Output via local MM interface in dp_clk domain mm_copi : IN t_mem_copi; -- read access to output block, all output streams share same mm_copi - mm_cipo_arr : OUT t_mem_copi_arr(g_nof_streams-1 DOWNTO 0); + mm_cipo_arr : OUT t_mem_cipo_arr(g_nof_streams-1 DOWNTO 0); mm_sosi : OUT t_dp_sosi -- streaming information that signals that an output block can be read ); END dp_bsn_align_v2; @@ -78,12 +78,51 @@ END dp_bsn_align_v2; ARCHITECTURE rtl OF dp_bsn_align_v2 IS + -- Circular buffer per stream + CONSTANT c_ram_size : NATURAL := g_buffer_nof_blocks * g_block_size; + CONSTANT c_ram_buf : t_c_mem := (latency => 1, + adr_w => ceil_log2(c_ram_size), + dat_w => g_data_w, + nof_dat => c_ram_size, + init_sl => '0'); + + CONSTANT c_block_size_w : NATURAL := ceil_log2(g_block_size); + CONSTANT c_block_size_slv : STD_LOGIC_VECTOR(c_block_size_w-1 DOWNTO 0) := TO_UVEC(g_block_size, c_block_size_w); + CONSTANT c_blk_pointer_w : NATURAL := ceil_log2(g_buffer_nof_blocks); + + -- Use fixed slv width instead of using naturals for address calculation, to + -- avoid that synthesis may infer a too larger multiplier + CONSTANT c_product_w : NATURAL := c_blk_pointer_w + c_block_size_w; + + TYPE t_bsn_arr IS ARRAY (INTEGER RANGE <>) OF STD_LOGIC_VECTOR(g_bsn_w-1 DOWNTO 0); + TYPE t_adr_arr IS ARRAY (INTEGER RANGE <>) OF STD_LOGIC_VECTOR(c_mem_ram.adr_w-1 DOWNTO 0); + TYPE t_filled_arr IS ARRAY (INTEGER RANGE <>) OF STD_LOGIC_VECTOR(g_buffer_nof_blocks-1 DOWNTO 0); + TYPE t_reg IS RECORD - a : STD_LOGIC; - b : NATURAL; + -- p_write_arr + wr_pointer : NATURAL; -- only for debugging + wr_copi_arr : t_mem_copi_arr(g_nof_streams-1 DOWNTO 0); + -- all streams + filled_arr : t_filled_arr(g_nof_streams-1 DOWNTO 0); + -- local reference + sync_arr : STD_LOGIC_VECTOR(g_buffer_nof_blocks-1 DOWNTO 0); + bsn_arr : t_bsn_arr(g_buffer_nof_blocks-1 DOWNTO 0); + mm_sosi : t_dp_sosi; + -- p_read + rd_pointer : NATURAL; -- only for debugging + rd_offset : STD_LOGIC_VECTOR(c_mem_ram.adr_w-1 DOWNTO 0); + rd_copi : t_mem_copi; END RECORD; - CONSTANT c_reg_rst : t_reg := ('0', 0); + CONSTANT c_reg_rst : t_reg := (0, + (OTHERS=>c_mem_copi_rst), + (OTHERS=>(OTHERS=>'0')), + (OTHERS=>'0'), + (OTHERS=>(OTHERS=>'0')), + c_dp_sosi_rst, + 0, + (OTHERS=>'0'), + c_mem_copi_rst); -- Local registers SIGNAL r : t_reg; @@ -91,6 +130,8 @@ ARCHITECTURE rtl OF dp_bsn_align_v2 IS BEGIN + mm_sosi <= r.mm_sosi; + p_clk: PROCESS(dp_clk, dp_rst) BEGIN IF dp_rst='1' THEN @@ -100,7 +141,107 @@ BEGIN END IF; END PROCESS; + p_comb : PROCESS(r, in_sosi_arr, mm_copi) + -- State variable + VARIABLE v : t_reg; + -- Auxiliary variables + VARIABLE v_ref_sosi : t_dp_sosi; + VARIABLE v_pointer : INTEGER; + VARIABLE v_pointer_slv : STD_LOGIC_VECTOR(c_blk_pointer_w-1 DOWNTO 0); + VARIABLE v_product_slv : STD_LOGIC_VECTOR(c_product_w-1 DOWNTO 0); + BEGIN + v := r; + v.mm_sosi.sop := '0'; + v.mm_sosi.eop := '0'; + v.mm_sosi.valid := '0'; + v.mm_sosi.sync := '0'; + + -- p_write_arr + FOR I IN 0 TO g_nof_streams-1 LOOP + -- p_write + IF in_sosi_arr(I).valid = '1' THEN + -- Increment address or start at block + v.wr_copi_arr(I).address := RESIZE_MEM_ADDRESS(INCR_UVEC(r.wr_copi_arr(I).address(c_mem_ram.adr_w-1 DOWNTO 0), 1)); + IF in_sosi_arr(I).sop = '1' THEN + v_pointer_slv := in_sosi_arr(I).bsn(c_blk_pointer_w-1 DOWNTO 0); + v_product_slv := STD_LOGIC_VECTOR(UNSIGNED(v_pointer_slv) * UNSIGNED(c_block_size_slv)); + v.wr_copi_arr(I).address := RESIZE_MEM_ADDRESS(v_product_slv); + END IF; + v.wr_copi_arr(I).wr := '1'; + v.wr_copi_arr(I).wrdata := RESIZE_MEM_SDATA(in_sosi_arr(I).data); + END IF; + + -- Set filled flag + v_pointer := TO_UINT(v_pointer_slv); + IF in_sosi_arr(I).sop = '1' THEN -- at sop, so assume rest will follow in time + v.filled_arr(I)(v_pointer) := '1'; + END IF; + + END LOOP; + + -- p_control, all at local reference input 0 sop + v_ref_sosi := in_sosi_arr(0); + IF v_ref_sosi.sop = '1' THEN + -- . write sync & bsn buffer + v_pointer := TO_UINT(v_ref_sosi.bsn(c_blk_pointer_w-1 DOWNTO 0)); + v.sync_arr(v_pointer) := v_ref_sosi.sync; + v.bsn_arr(v_pointer) := v_ref_sosi.bsn(g_bsn_w-1 DOWNTO 0); + v.wr_pointer := v_pointer; + + -- . update read pointer at g_bsn_latency_max blocks behind the reference write pointer + v_pointer := v_pointer - g_bsn_latency_max; + IF v_pointer < 0 THEN + v_pointer := v_pointer + g_buffer_nof_blocks; + END IF; + v.rd_pointer := v_pointer; + + -- . update read address + v_pointer_slv := TO_UVEC(v_pointer, c_blk_pointer_w); + v_product_slv := STD_LOGIC_VECTOR(UNSIGNED(v_pointer_slv) * UNSIGNED(c_block_size_slv)); + v.rd_offset := RESIZE_UVEC(v_product_slv, c_mem_ram.adr_w); + + -- . issue mm_sosi, if there is output ready to be read, indicated by filled reference block + IF r.filled_arr(0)(v_pointer) = '1' THEN + v.mm_sosi.sop := '1'; + v.mm_sosi.eop := '1'; + v.mm_sosi.valid := '1'; + -- . pass on timestamp information + v.mm_sosi.sync := v.sync_arr(v_pointer); + v.mm_sosi.bsn := v.bsn_arr(v_pointer); + -- . use channel field to pass on filled flags + FOR I IN 0 TO g_nof_streams-1 LOOP + v.mm_sosi.channel(I) := v.filled_arr(I)(v_pointer); + END LOOP; + END IF; + + -- . clear filled flags, after mm_sosi was issued, or could have been issued + FOR I IN 0 TO g_nof_streams-1 LOOP + v.filled_arr(I)(v_pointer) := '0'; + END LOOP; + END IF; + + -- p_read + v.rd_copi := mm_copi; + v.rd_copi.address := RESIZE_MEM_ADDRESS(ADD_UVEC(r.rd_offset, mm_copi.address)); -- sum yields c_mem_ram.adr_w bits, because left operand determines width + END PROCESS; + gen_streams : FOR I IN 0 TO g_nof_streams-1 GENERATE + u_data_buffer : ENTITY common_lib.common_ram_r_w + GENERIC MAP ( + g_ram => c_ram_buf + ) + PORT MAP ( + rst => dp_rst, + clk => dp_clk, + wr_en => r.wr_copi_arr(I).wr, + wr_adr => r.wr_copi_arr(I).address(c_ram_buf.adr_w-1 DOWNTO 0), + wr_dat => r.wr_copi_arr(I).wrdata(c_ram_buf.dat_w-1 DOWNTO 0), + rd_en => nxt_r.rd_copi.rd, + rd_adr => nxt_r.rd_copi.address(c_ram_buf.adr_w-1 DOWNTO 0), -- use nxt_r to not increase the rd latency + rd_dat => mm_cipo_arr(I).rddata(c_ram_buf.dat_w-1 DOWNTO 0), + rd_val => mm_cipo_arr(I).rdval + ); + END GENERATE; END rtl;