diff --git a/applications/arts/libraries/arts_tab_beamformer/src/vhdl/arts_tab_beamformer.vhd b/applications/arts/libraries/arts_tab_beamformer/src/vhdl/arts_tab_beamformer.vhd index 14a7e9db5e038b7634f9104406a5339d098a72cb..643f6c7c1f308152967948ab261efbeb42f2ed95 100644 --- a/applications/arts/libraries/arts_tab_beamformer/src/vhdl/arts_tab_beamformer.vhd +++ b/applications/arts/libraries/arts_tab_beamformer/src/vhdl/arts_tab_beamformer.vhd @@ -182,7 +182,9 @@ BEGIN g_weights_w => g_weights_w, g_weights_file => sel_a_b(g_weights_file="UNUSED", "UNUSED", g_weights_file & "_" & NATURAL'IMAGE(i)), g_weights_ram_dual_port => g_weights_ram_dual_port, - g_mult_variant => g_mult_variant + g_mult_variant => g_mult_variant, + g_use_weight_ram => FALSE, + g_use_weight_reg => TRUE -- Use registers instead of RAM ) PORT MAP ( dp_clk => dp_clk, @@ -198,9 +200,7 @@ BEGIN snk_in_arr => snk_in_arr, src_out => beamformer_src_out_arr(i) - ); - - + ); ------------------------------------------------------------------------------ -- Requantize the outputs to the desired bit width diff --git a/libraries/dsp/beamformer/src/vhdl/beamformer.vhd b/libraries/dsp/beamformer/src/vhdl/beamformer.vhd index c7e82a0a11f487cc2f24dc11544b35cddcf35a48..444ff319504ed8961e8a269e002af7f19b4d4526 100644 --- a/libraries/dsp/beamformer/src/vhdl/beamformer.vhd +++ b/libraries/dsp/beamformer/src/vhdl/beamformer.vhd @@ -35,6 +35,11 @@ -- . Input/output reordering and quantization are also application -- specific and should be done in the wrapper. -- . The input array snk_in_arr must be synchronous. +-- . If RAM is not desired, set g_use_weight_reg or g_use_weight_arr. +-- . The addressing remains the same (done using weight_addr) but the +-- source of the weights can be selected that way. +-- . Note: Only set 1 of g_use_weight_ram, g_use_weight_reg, g_use_weight_arr +-- to TRUE. LIBRARY IEEE; USE IEEE.std_logic_1164.ALL; @@ -53,8 +58,11 @@ ENTITY beamformer IS g_nof_weights : NATURAL; g_weights_w : NATURAL := 16; g_weights_file : STRING := "hex/beamformer_weights"; - g_weights_ram_dual_port : BOOLEAN := TRUE; - g_mult_variant : STRING := "IP" + g_weights_ram_dual_port : BOOLEAN := TRUE; --FIXME rename this to readback_weights + g_mult_variant : STRING := "IP"; + g_use_weight_ram : BOOLEAN := TRUE; -- Use weights RAM (default) + g_use_weight_reg : BOOLEAN := FALSE; -- Use a weights register instead of RAM + g_use_weight_arr : BOOLEAN := FALSE -- Use the input weights array instead of weights RAM of Registers ); PORT ( dp_clk : IN STD_LOGIC; @@ -63,10 +71,11 @@ ENTITY beamformer IS mm_clk : IN STD_LOGIC; mm_rst : IN STD_LOGIC; - ram_mosi : IN t_mem_mosi := c_mem_mosi_rst; -- MM interface to upload weights to RAM + ram_mosi : IN t_mem_mosi := c_mem_mosi_rst; -- MM interface to upload weights to RAM or REG ram_miso : OUT t_mem_miso; - weight_addr : IN STD_LOGIC_VECTOR(ceil_log2(g_nof_weights)-1 DOWNTO 0); -- Weight RAM address + weight_addr : IN STD_LOGIC_VECTOR(ceil_log2(g_nof_weights)-1 DOWNTO 0); -- Weight address + weight_arr : IN t_slv_32_arr(g_nof_inputs-1 DOWNTO 0) := (OTHERS=>(OTHERS=>'0')); snk_in_arr : IN t_dp_sosi_arr(g_nof_inputs-1 DOWNTO 0); -- All streams must be synchronous src_out : OUT t_dp_sosi @@ -97,6 +106,20 @@ ARCHITECTURE str OF beamformer IS SIGNAL common_ram_crw_crw_src_out_arr : t_dp_sosi_arr(g_nof_inputs-1 DOWNTO 0); + ------------------------------------------------------------------------------ + -- Weights REG + ------------------------------------------------------------------------------ + TYPE t_common_reg_r_d_dc_out_slv_arr IS ARRAY(g_nof_inputs-1 DOWNTO 0) OF STD_LOGIC_VECTOR(c_common_ram_crw_crw_ram.dat_w*c_common_ram_crw_crw_ram.nof_dat-1 DOWNTO 0); + TYPE t_common_reg_r_d_dc_out_arr IS ARRAY(g_nof_weights-1 DOWNTO 0) OF STD_LOGIC_VECTOR(c_common_ram_crw_crw_ram.dat_w-1 DOWNTO 0); + TYPE t_common_reg_r_d_dc_out_2arr IS ARRAY(g_nof_inputs-1 DOWNTO 0) OF t_common_reg_r_d_dc_out_arr; + + SIGNAL common_reg_r_w_dc_in_reg_slv_arr : t_common_reg_r_d_dc_out_slv_arr; + SIGNAL common_reg_r_w_dc_out_reg_slv_arr : t_common_reg_r_d_dc_out_slv_arr; + SIGNAL common_reg_r_w_dc_out_reg_2arr : t_common_reg_r_d_dc_out_2arr; + + -- The register outputs the weight 1 cycle too soon relative to the RAM, so register the address + SIGNAL reg_weight_addr : STD_LOGIC_VECTOR(ceil_log2(g_nof_weights)-1 DOWNTO 0); + ------------------------------------------------------------------------------ -- Pipeline ------------------------------------------------------------------------------ @@ -113,50 +136,101 @@ BEGIN ------------------------------------------------------------------------------ -- Weights RAM ------------------------------------------------------------------------------ - gen_common_ram_crw_crw : FOR i IN 0 TO g_nof_inputs-1 GENERATE - - -- Read request on every incoming valid cycle - common_ram_crw_crw_rd_en_b_arr(i) <= snk_in_arr(i).valid; - - -- Use entity input for read address - common_ram_crw_crw_adr_b_arr(i) <= weight_addr; - - -- Dual clock RAM - u_common_ram_crw_crw : ENTITY common_lib.common_ram_crw_crw - GENERIC MAP ( - g_technology => g_technology, - g_ram => c_common_ram_crw_crw_ram, - g_init_file => sel_a_b(g_weights_file="UNUSED", "UNUSED", g_weights_file & "_" & NATURAL'IMAGE(i) & ".hex"), - g_true_dual_port => g_weights_ram_dual_port - ) - PORT MAP ( - rst_a => mm_rst, - clk_a => mm_clk, - wr_en_a => ram_mosi_arr(i).wr, - wr_dat_a => ram_mosi_arr(i).wrdata(c_common_ram_crw_crw_ram.dat_w -1 DOWNTO 0), - adr_a => ram_mosi_arr(i).address(c_common_ram_crw_crw_ram.adr_w-1 DOWNTO 0), - rd_en_a => ram_mosi_arr(i).rd, - rd_dat_a => ram_miso_arr(i).rddata(c_common_ram_crw_crw_ram.dat_w -1 DOWNTO 0), - rd_val_a => ram_miso_arr(i).rdval, - - rst_b => dp_rst, - clk_b => dp_clk, - wr_en_b => '0', - wr_dat_b => (OTHERS =>'0'), - adr_b => common_ram_crw_crw_adr_b_arr(i), - rd_en_b => common_ram_crw_crw_rd_en_b_arr(i), - rd_dat_b => common_ram_crw_crw_rd_dat_b_arr(i), - rd_val_b => common_ram_crw_crw_rd_val_b_arr(i) - ); - - -- RAM output rewired to SOSI array - common_ram_crw_crw_src_out_arr(i).re <= RESIZE_DP_DSP_DATA(common_ram_crw_crw_rd_dat_b_arr(i)( g_weights_w-1 DOWNTO 0)); - common_ram_crw_crw_src_out_arr(i).im <= RESIZE_DP_DSP_DATA(common_ram_crw_crw_rd_dat_b_arr(i)(2*g_weights_w-1 DOWNTO g_weights_w)); - common_ram_crw_crw_src_out_arr(i).valid <= common_ram_crw_crw_rd_val_b_arr(i); + gen_weight_ram : IF g_use_weight_ram = TRUE GENERATE + gen_common_ram_crw_crw : FOR i IN 0 TO g_nof_inputs-1 GENERATE + + -- Read request on every incoming valid cycle + common_ram_crw_crw_rd_en_b_arr(i) <= snk_in_arr(i).valid; + + -- Use entity input for read address + common_ram_crw_crw_adr_b_arr(i) <= weight_addr; + + -- Dual clock RAM + u_common_ram_crw_crw : ENTITY common_lib.common_ram_crw_crw + GENERIC MAP ( + g_technology => g_technology, + g_ram => c_common_ram_crw_crw_ram, + g_init_file => sel_a_b(g_weights_file="UNUSED", "UNUSED", g_weights_file & "_" & NATURAL'IMAGE(i) & ".hex"), + g_true_dual_port => g_weights_ram_dual_port + ) + PORT MAP ( + rst_a => mm_rst, + clk_a => mm_clk, + wr_en_a => ram_mosi_arr(i).wr, + wr_dat_a => ram_mosi_arr(i).wrdata(c_common_ram_crw_crw_ram.dat_w -1 DOWNTO 0), + adr_a => ram_mosi_arr(i).address(c_common_ram_crw_crw_ram.adr_w-1 DOWNTO 0), + rd_en_a => ram_mosi_arr(i).rd, + rd_dat_a => ram_miso_arr(i).rddata(c_common_ram_crw_crw_ram.dat_w -1 DOWNTO 0), + rd_val_a => ram_miso_arr(i).rdval, + + rst_b => dp_rst, + clk_b => dp_clk, + wr_en_b => '0', + wr_dat_b => (OTHERS =>'0'), + adr_b => common_ram_crw_crw_adr_b_arr(i), + rd_en_b => common_ram_crw_crw_rd_en_b_arr(i), + rd_dat_b => common_ram_crw_crw_rd_dat_b_arr(i), + rd_val_b => common_ram_crw_crw_rd_val_b_arr(i) + ); + + -- RAM output rewired to SOSI array + common_ram_crw_crw_src_out_arr(i).re <= RESIZE_DP_DSP_DATA(common_ram_crw_crw_rd_dat_b_arr(i)( g_weights_w-1 DOWNTO 0)); + common_ram_crw_crw_src_out_arr(i).im <= RESIZE_DP_DSP_DATA(common_ram_crw_crw_rd_dat_b_arr(i)(2*g_weights_w-1 DOWNTO g_weights_w)); + common_ram_crw_crw_src_out_arr(i).valid <= common_ram_crw_crw_rd_val_b_arr(i); + + END GENERATE; + END GENERATE; + + ------------------------------------------------------------------------------ + -- Weights register + ------------------------------------------------------------------------------ + gen_weight_reg : IF g_use_weight_reg = TRUE GENERATE + gen_common_reg_r_w_dc : FOR i IN 0 TO g_nof_inputs-1 GENERATE + + u_common_reg_r_w_dc : ENTITY common_lib.common_reg_r_w_dc + GENERIC MAP ( + g_cross_clock_domain => TRUE, + g_readback => g_weights_ram_dual_port, + g_reg => c_common_ram_crw_crw_ram + ) + PORT MAP ( + -- Clocks and reset + mm_rst => mm_rst, + mm_clk => mm_clk, + st_rst => dp_rst, + st_clk => dp_clk, + + -- Memory Mapped Slave in mm_clk domain + sla_in => ram_mosi_arr(i), + sla_out => ram_miso_arr(i), + + -- MM registers in st_clk domain + reg_wr_arr => open, + reg_rd_arr => open, + in_reg => common_reg_r_w_dc_in_reg_slv_arr(i), + out_reg => common_reg_r_w_dc_out_reg_slv_arr(i) + ); + + -- Rewire the concatenated SLV array to something we can index properly [g_nof_inputs][g_nof_weights] + gen_common_reg_r_w_dc_out_reg_2arr : FOR j IN 0 TO g_nof_weights-1 GENERATE + common_reg_r_w_dc_out_reg_2arr(i)(j) <= common_reg_r_w_dc_out_reg_slv_arr(i)(j*2*g_weights_w+2*g_weights_w-1 DOWNTO j*2*g_weights_w); + END GENERATE; + + -- REG output rewired to 'RAM' SOSI array. The weight is indexed by reg_weight_addr. + common_ram_crw_crw_src_out_arr(i).re <= RESIZE_DP_DSP_DATA(common_reg_r_w_dc_out_reg_2arr(i)(TO_UINT(reg_weight_addr))( g_weights_w-1 DOWNTO 0)); + common_ram_crw_crw_src_out_arr(i).im <= RESIZE_DP_DSP_DATA(common_reg_r_w_dc_out_reg_2arr(i)(TO_UINT(reg_weight_addr))(2*g_weights_w-1 DOWNTO g_weights_w)); + + END GENERATE; + + gen_readback: IF g_weights_ram_dual_port=TRUE GENERATE + common_reg_r_w_dc_in_reg_slv_arr <= common_reg_r_w_dc_out_reg_slv_arr; + END GENERATE; END GENERATE; - -- Combine the individual RAM MM buses into one + ------------------------------------------------------------------------------ + -- Combine the individual MM buses into one + ------------------------------------------------------------------------------ u_common_mem_mux : ENTITY common_lib.common_mem_mux GENERIC MAP ( g_nof_mosi => g_nof_inputs, @@ -224,4 +298,16 @@ BEGIN src_out => src_out ); + ------------------------------------------------------------------------------ + -- Registers + ------------------------------------------------------------------------------ + p_clk : PROCESS(dp_rst, dp_clk) + BEGIN + IF dp_rst='1' THEN + reg_weight_addr <= (OTHERS=>'0'); + ELSIF rising_edge(dp_clk) THEN + reg_weight_addr <= weight_addr; + END IF; + END PROCESS; + END str; diff --git a/libraries/dsp/beamformer/tb/vhdl/tb_beamformer.vhd b/libraries/dsp/beamformer/tb/vhdl/tb_beamformer.vhd index 8f59580bdd65b88c8f06041a55d11e119c191f9a..d612047441006d80726c172ebdcf332ec0c790cb 100644 --- a/libraries/dsp/beamformer/tb/vhdl/tb_beamformer.vhd +++ b/libraries/dsp/beamformer/tb/vhdl/tb_beamformer.vhd @@ -42,11 +42,13 @@ USE mm_lib.mm_file_pkg.ALL; ENTITY tb_beamformer IS GENERIC ( - g_tb_index : NATURAL := 0; -- use different index to avoid MM file conflict in multi tb - --g_technology : NATURAL := c_tech_select_default; - g_nof_inputs : NATURAL := 2; - g_nof_weights : NATURAL := 32; - g_data_w : NATURAL := 8 --8b complex input data + g_tb_index : NATURAL := 0; -- use different index to avoid MM file conflict in multi tb + --g_technology : NATURAL := c_tech_select_default; + g_nof_inputs : NATURAL := 2; + g_nof_weights : NATURAL := 32; + g_data_w : NATURAL := 8; --8b complex input data + g_use_weight_ram : BOOLEAN := FALSE; + g_use_weight_reg : BOOLEAN := TRUE ); END tb_beamformer; @@ -281,9 +283,9 @@ ARCHITECTURE tb OF tb_beamformer IS proc_common_wait_some_cycles(dp_clk, 1); IF OK = '0' THEN - REPORT "TEST WENT WRONG."; + REPORT "TEST FAILED."; ELSE - REPORT "Test succesfull ended."; + REPORT "Test passed."; END IF; tb_end <= '1'; -- end test @@ -302,11 +304,13 @@ ARCHITECTURE tb OF tb_beamformer IS u_beamformer : ENTITY work.beamformer GENERIC MAP ( - g_technology => c_tech_select_default, - g_nof_inputs => g_nof_inputs, - g_nof_weights => g_nof_weights, - g_data_w => g_data_w, - g_weights_file => "UNUSED" + g_technology => c_tech_select_default, + g_nof_inputs => g_nof_inputs, + g_nof_weights => g_nof_weights, + g_data_w => g_data_w, + g_weights_file => "UNUSED", + g_use_weight_ram => g_use_weight_ram, + g_use_weight_reg => g_use_weight_reg ) PORT MAP ( dp_clk => dp_clk, diff --git a/libraries/dsp/beamformer/tb/vhdl/tb_tb_beamformer.vhd b/libraries/dsp/beamformer/tb/vhdl/tb_tb_beamformer.vhd index 0b2aabd2627180f91c34014c4b5c5c6caf35f208..465906a8f32b102a06a6e02c5497427b137740a7 100644 --- a/libraries/dsp/beamformer/tb/vhdl/tb_tb_beamformer.vhd +++ b/libraries/dsp/beamformer/tb/vhdl/tb_tb_beamformer.vhd @@ -42,16 +42,23 @@ BEGIN -- > Testbenches are self-checking -- --- g_tb_index : NATURAL := 0; -- use different index to avoid MM file conflict in multi tb +-- g_tb_index : NATURAL := 0; -- use different index to avoid MM file conflict in multi tb -- --g_technology : NATURAL := c_tech_select_default; --- g_nof_inputs : NATURAL := 2; --- g_nof_weights : NATURAL := 32; --- g_data_w : NATURAL := 8 --8b complex input data +-- g_nof_inputs : NATURAL := 2; +-- g_nof_weights : NATURAL := 32; +-- g_data_w : NATURAL := 8; --8b complex input data +-- g_use_weight_ram : BOOLEAN := FALSE; +-- g_use_weight_reg : BOOLEAN := TRUE -- -- do test for different number of inputs -sim_i1_beamformer : ENTITY work.tb_beamformer GENERIC MAP (1, 1, 32, 8); -sim_i2_beamformer : ENTITY work.tb_beamformer GENERIC MAP (2, 2, 32, 8); -sim_i32_beamformer : ENTITY work.tb_beamformer GENERIC MAP (3, 32, 32, 8); +-- . (weights in RAM) +sim_i01_ram_beamformer : ENTITY work.tb_beamformer GENERIC MAP (1, 1, 32, 8, TRUE, FALSE); +sim_i02_ram_beamformer : ENTITY work.tb_beamformer GENERIC MAP (2, 2, 32, 8, TRUE, FALSE); +sim_i32_ram_beamformer : ENTITY work.tb_beamformer GENERIC MAP (3, 32, 32, 8, TRUE, FALSE); +-- . (weights in Registers) +sim_i01_reg_beamformer : ENTITY work.tb_beamformer GENERIC MAP (4, 1, 32, 8, FALSE, TRUE); +sim_i02_reg_beamformer : ENTITY work.tb_beamformer GENERIC MAP (5, 2, 32, 8, FALSE, TRUE); +sim_i32_reg_beamformer : ENTITY work.tb_beamformer GENERIC MAP (6, 32, 32, 8, FALSE, TRUE); -END tb; \ No newline at end of file +END tb;