diff --git a/libraries/dsp/rTwoSDF/src/vhdl/rTwoSDFPkg.vhd b/libraries/dsp/rTwoSDF/src/vhdl/rTwoSDFPkg.vhd index 79c1bcff2dd8caddf9c9978b336c9b5b5d71dacf..6b88465f4311e4a8ed57f84dd0c84d9ecf4d7346 100644 --- a/libraries/dsp/rTwoSDF/src/vhdl/rTwoSDFPkg.vhd +++ b/libraries/dsp/rTwoSDF/src/vhdl/rTwoSDFPkg.vhd @@ -30,7 +30,7 @@ package rTwoSDFPkg is -- generics for rTwoSDFStage stage_lat : natural; -- = 1 weight_lat : natural; -- = 1 - mul_lat : natural; -- = 3 + mul_lat : natural; -- = 3+1 -- generics for rTwoBFStage bf_lat : natural; -- = 1 -- generics for rTwoBF @@ -41,7 +41,7 @@ package rTwoSDFPkg is sep_lat : natural; -- = 1 end record; - constant c_fft_pipeline : t_fft_pipeline := (1, 1, 3, 1, 1, 0, 0, 1); + constant c_fft_pipeline : t_fft_pipeline := (1, 1, 4, 1, 1, 0, 0, 1); end package rTwoSDFPkg; diff --git a/libraries/dsp/rTwoSDF/src/vhdl/rTwoWMul.vhd b/libraries/dsp/rTwoSDF/src/vhdl/rTwoWMul.vhd index 67247a2939212e91d504d075ad714e23163d56c7..de0b44de498d7f3c63e161e2be0ace40c08538cf 100644 --- a/libraries/dsp/rTwoSDF/src/vhdl/rTwoWMul.vhd +++ b/libraries/dsp/rTwoSDF/src/vhdl/rTwoWMul.vhd @@ -28,7 +28,7 @@ entity rTwoWMul is generic ( g_technology : NATURAL := c_tech_select_default; g_stage : natural := 1; - g_lat : natural := 3 + g_lat : natural := 3+1 -- 3 for mult, 1 for round ); port ( clk : in std_logic; @@ -47,16 +47,32 @@ end entity rTwoWMul; architecture str of rTwoWMul is - -- Derive the common_complex_mult g_pipeline_* values from g_lat. The sum must match g_lat. - constant c_mult_input_lat : natural := sel_a_b(g_lat>1, 1, 0); -- second priority use DSP pipeline input - constant c_mult_product_lat : natural := 0; - constant c_mult_adder_lat : natural := sel_a_b(g_lat>2, 1, 0); -- third priority use DSP internal product-sum pipeline - constant c_mult_extra_lat : natural := sel_a_b(g_lat>3, g_lat-3, 0); -- remaining extra pipelining in logic - constant c_mult_output_lat : natural := sel_a_b(g_lat>0, 1, 0) + c_mult_extra_lat; -- first priority use DSP pipeline output + -- Use multiplier product truncate or signed rounding (= away from zero). On hardware for Fsub in + -- Apertif and using the WG at various frequencies at subband or between subbands it appears that + -- using truncate or sround does not make a noticable difference in the SST. Still choose to use + -- signed rounding to preserve zero DC. + constant c_use_truncate : boolean := true; --false; + + -- Derive the common_complex_mult g_pipeline_* values from g_lat. The sum c_total_lat = g_lat, so that g_lat defines + -- the total latency from in_* to out_*. -- DSP multiplier IP - constant c_dsp_mult_lat : natural:= 3; + constant c_dsp_mult_lat : natural := 3; + -- Pipeline multiplier product rounding from c_prod_w via c_round_w to c_out_dat_w + constant c_round_lat : natural := sel_a_b(g_lat > c_dsp_mult_lat, 1, 0); -- allocate 1 pipeline for round + constant c_lat : natural := g_lat - c_round_lat; -- allocate remaining pipeline to multiplier + + constant c_mult_input_lat : natural := sel_a_b(c_lat>1, 1, 0); -- second priority use DSP pipeline input + constant c_mult_product_lat : natural := 0; + constant c_mult_adder_lat : natural := sel_a_b(c_lat>2, 1, 0); -- third priority use DSP internal product-sum pipeline + constant c_mult_extra_lat : natural := sel_a_b(c_lat>3, c_lat-3, 0); -- remaining extra pipelining in logic + constant c_mult_output_lat : natural := sel_a_b(c_lat>0, 1, 0) + c_mult_extra_lat; -- first priority use DSP pipeline output + constant c_mult_lat : natural := c_mult_input_lat + c_mult_product_lat + c_mult_adder_lat + c_mult_output_lat; + + -- Total input to output latency + constant c_total_lat : natural := c_mult_lat + c_round_lat; + -- Quantization constant c_in_dat_w : natural:= in_re'length; constant c_weight_w : natural:= weight_re'length; @@ -74,6 +90,11 @@ architecture str of rTwoWMul is begin + -- Total latency check + ASSERT c_total_lat = g_lat + REPORT "rTwoWMul: total pipeline error" + SEVERITY FAILURE; + ------------------------------------------------------------------------------ -- Complex multiplication -- . use the common_complex_mult(rtl) for the output stage 1 because then @@ -81,11 +102,11 @@ begin -- weight_re = 1 and weight_im = 0 inputs. -- . the IP in common_complex_mult(stratix4) only supports up to 18b wide -- inputs. - -- . for g_lat = 0,1,2 use the RTL multiplier - -- . for g_lat >= 3 default best use the FPGA multiplier IP block. + -- . for c_lat = 0,1,2 use the RTL multiplier + -- . for c_lat >= 3 default best use the FPGA multiplier IP block. ------------------------------------------------------------------------------ - gen_rtl : if g_stage=1 or c_in_dat_w>c_dsp_mult_w or g_lat<c_dsp_mult_lat generate + gen_rtl : if g_stage=1 or c_in_dat_w>c_dsp_mult_w or c_lat<c_dsp_mult_lat generate u_CmplxMul : entity common_mult_lib.common_complex_mult generic map ( g_technology => g_technology, @@ -109,11 +130,11 @@ begin in_val => in_val, out_pr => product_re, out_pi => product_im, - out_val => out_val + out_val => OPEN ); end generate; - gen_ip : if g_stage>1 and c_in_dat_w<=c_dsp_mult_w and g_lat>=c_dsp_mult_lat generate + gen_ip : if g_stage>1 and c_in_dat_w<=c_dsp_mult_w and c_lat>=c_dsp_mult_lat generate u_cmplx_mul : entity common_mult_lib.common_complex_mult generic map ( g_technology => g_technology, @@ -137,7 +158,7 @@ begin in_val => in_val, out_pr => product_re, out_pi => product_im, - out_val => out_val + out_val => OPEN ); end generate; @@ -145,20 +166,40 @@ begin -- Round WMult output ------------------------------------------------------------------------------ - -- use truncate that throws away the c_round_w lower bits as rounding function - -- use resize_svec that keeps the c_out_dat_w lower bits to get to the output width - round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w); - round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w); - - -- output real and imaginary, switch between input and product - out_re <= round_re when out_sel = '1' else in_re_dly; - out_im <= round_im when out_sel = '1' else in_im_dly; + gen_truncate : if c_use_truncate=true GENERATE + -- use truncate that throws away the c_round_w lower bits as rounding function + -- use resize_svec that keeps the c_out_dat_w lower bits to get to the output width + gen_comb : if c_round_lat=0 generate + round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w); + round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w); + end generate; + gen_reg : if c_round_lat=1 generate + round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w) when rising_edge(clk); + round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w) when rising_edge(clk); + end generate; + end generate; + + + gen_sround : if c_use_truncate=false GENERATE + -- Use resize_svec(s_round()) instead of truncate_and_resize_svec() to have symmetrical rounding around 0 + -- Rounding takes logic due to adding 0.5 therefore need to use c_round_lat=1 to achieve timing + gen_comb : if c_round_lat=0 generate + ASSERT false REPORT "rTwoWMul: can probably not achieve timing for sround without pipeline" SEVERITY FAILURE; + round_re <= RESIZE_SVEC(s_round(product_re, c_round_w), c_out_dat_w); + round_im <= RESIZE_SVEC(s_round(product_im, c_round_w), c_out_dat_w); + end generate; + gen_reg : if c_round_lat=1 generate + round_re <= RESIZE_SVEC(s_round(product_re, c_round_w), c_out_dat_w) when rising_edge(clk); + round_im <= RESIZE_SVEC(s_round(product_im, c_round_w), c_out_dat_w) when rising_edge(clk); + end generate; + end generate; ------------------------------------------------------------------------------ -- Propagate data and control signals for input/output choice at WMult output ------------------------------------------------------------------------------ - + + -- No need to use rst for data, because initial data value is don't care u_re_lat : entity common_lib.common_pipeline generic map ( g_pipeline => g_lat, @@ -183,14 +224,33 @@ begin out_dat => in_im_dly ); + -- Use rst for control to ensure initial low u_sel_lat : entity common_lib.common_pipeline_sl generic map ( g_pipeline => g_lat ) port map ( + rst => rst, clk => clk, in_dat => in_sel, out_dat => out_sel ); + u_pipeline_out_val : entity common_lib.common_pipeline_sl + generic map ( + g_pipeline => g_lat + ) + port map ( + rst => rst, + clk => clk, + in_dat => in_val, + out_dat => out_val + ); + + ------------------------------------------------------------------------------ + -- Output real and imaginary, switch between input and product + ------------------------------------------------------------------------------ + out_re <= round_re when out_sel = '1' else in_re_dly; + out_im <= round_im when out_sel = '1' else in_im_dly; + end str; \ No newline at end of file diff --git a/libraries/dsp/rTwoSDF/tb/vhdl/tb_rTwoSDF.vhd b/libraries/dsp/rTwoSDF/tb/vhdl/tb_rTwoSDF.vhd index 7e113d74d9c937224097f155c7357cf846f2b092..a03663c51fb0056f341b11fb19774159d0bd5d62 100644 --- a/libraries/dsp/rTwoSDF/tb/vhdl/tb_rTwoSDF.vhd +++ b/libraries/dsp/rTwoSDF/tb/vhdl/tb_rTwoSDF.vhd @@ -84,22 +84,7 @@ entity tb_rTwoSDF is g_nof_points : natural := 1024; g_in_dat_w : natural := 8; g_out_dat_w : natural := 14; - g_guard_w : natural := 2; -- guard bits are used to avoid overflow in single FFT stage. - - -- Internal pipeline settings for rTwoSDF - g_pipeline : t_fft_pipeline := (1, 1, 3, 1, 1, 0, 0, 1) -- type t_rtwo_sdf_stage_pipeline is record - -- -- generics for rTwoSDFStage - -- stage_lat : natural; -- = 1 - -- weight_lat : natural; -- = 1 - -- mul_lat : natural; -- = 3 - -- -- generics for rTwoBFStage - -- bf_lat : natural; -- = 1 - -- -- generics for rTwoBF - -- bf_use_zdly : natural; -- = 1 - -- bf_in_a_zdly : natural; -- = 0 - -- bf_out_d_zdly : natural; -- = 0 - -- sep_lat : natural; -- = 1 - -- end record; + g_guard_w : natural := 2 -- guard bits are used to avoid overflow in single FFT stage. ); end entity tb_rTwoSDF; @@ -269,9 +254,7 @@ begin g_out_dat_w => g_out_dat_w, g_stage_dat_w => c_stage_dat_w, g_guard_w => g_guard_w, - g_nof_points => g_nof_points, - -- generics for rTwoSDFStage - g_pipeline => g_pipeline + g_nof_points => g_nof_points ) port map( clk => clk, diff --git a/libraries/dsp/rTwoSDF/tb/vhdl/tb_tb_rTwoSDF.vhd b/libraries/dsp/rTwoSDF/tb/vhdl/tb_tb_rTwoSDF.vhd index 8734636a2db1dc11d0432cc332f454d14a267b8b..1efbecad26bbfb1f8a876bded1c7b4e24b09bf7c 100644 --- a/libraries/dsp/rTwoSDF/tb/vhdl/tb_tb_rTwoSDF.vhd +++ b/libraries/dsp/rTwoSDF/tb/vhdl/tb_tb_rTwoSDF.vhd @@ -52,26 +52,11 @@ begin -- g_nof_points : natural := 1024; -- g_in_dat_w : natural := 8; -- g_out_dat_w : natural := 14; --- g_guard_w : natural := 2; -- guard bits are used to avoid overflow in single FFT stage. --- --- -- Internal pipeline settings for rTwoSDF --- g_pipeline : t_fft_pipeline := (1, 1, 3, 1, 1, 0, 0, 1) -- type t_rtwo_sdf_stage_pipeline is record --- -- -- generics for rTwoSDFStage --- -- stage_lat : natural; -- = 1 --- -- weight_lat : natural; -- = 1 --- -- mul_lat : natural; -- = 3 --- -- -- generics for rTwoBFStage --- -- bf_lat : natural; -- = 1 --- -- -- generics for rTwoBF --- -- bf_use_zdly : natural; -- = 1 --- -- bf_in_a_zdly : natural; -- = 0 --- -- bf_out_d_zdly : natural; -- = 0 --- -- sep_lat : natural; -- = 1 --- -- end record; +-- g_guard_w : natural := 2 -- guard bits are used to avoid overflow in single FFT stage. - --u_act_impulse_16p_16i_16o : entity work.tb_rTwoSDF generic map (false, 1, true, 16, 16, 16, 2, (1, 1, 3, 1, 1, 0, 0, 1)); - u_act_noise_1024p_8i_14o : entity work.tb_rTwoSDF generic map (true, 1, true, 1024, 8, 14, 2, (1, 1, 3, 1, 1, 0, 0, 1)); - u_rnd_noise_1024p_8i_14o : entity work.tb_rTwoSDF generic map (true, 0, true, 1024, 8, 14, 2, (1, 1, 3, 1, 1, 0, 0, 1)); - u_rnd_noise_1024p_8i_14o_flipped : entity work.tb_rTwoSDF generic map (true, 0, false, 1024, 8, 14, 2, (1, 1, 3, 1, 1, 0, 0, 1)); + --u_act_impulse_16p_16i_16o : entity work.tb_rTwoSDF generic map (false, 1, true, 16, 16, 16, 2); + u_act_noise_1024p_8i_14o : entity work.tb_rTwoSDF generic map (true, 1, true, 1024, 8, 14, 2); + u_rnd_noise_1024p_8i_14o : entity work.tb_rTwoSDF generic map (true, 0, true, 1024, 8, 14, 2); + u_rnd_noise_1024p_8i_14o_flipped : entity work.tb_rTwoSDF generic map (true, 0, false, 1024, 8, 14, 2); end tb;