From 6b3a3db1c9f076b622653a35e50d47afe5297e24 Mon Sep 17 00:00:00 2001 From: Eric Kooistra <kooistra@astron.nl> Date: Fri, 18 Mar 2022 15:04:46 +0100 Subject: [PATCH] Round outside separate function in output quantizer, to avoid more inaccurate rounding of 1 LSbit in separate for bit growth. --- libraries/dsp/fft/src/vhdl/fft_r2_par.vhd | 140 +++++------------ libraries/dsp/fft/src/vhdl/fft_r2_pipe.vhd | 32 ++-- libraries/dsp/fft/src/vhdl/fft_r2_wide.vhd | 32 ++-- .../fft/src/vhdl/fft_reorder_sepa_pipe.vhd | 12 +- libraries/dsp/fft/src/vhdl/fft_sepa.vhd | 143 ++++++------------ libraries/dsp/fft/src/vhdl/fft_sepa_wide.vhd | 68 +++++---- 6 files changed, 163 insertions(+), 264 deletions(-) diff --git a/libraries/dsp/fft/src/vhdl/fft_r2_par.vhd b/libraries/dsp/fft/src/vhdl/fft_r2_par.vhd index c9089c8de0..02bf10f3cd 100644 --- a/libraries/dsp/fft/src/vhdl/fft_r2_par.vhd +++ b/libraries/dsp/fft/src/vhdl/fft_r2_par.vhd @@ -125,30 +125,37 @@ architecture str of fft_r2_par is constant c_pipeline_add_sub : natural := 1; constant c_pipeline_remove_lsb : natural := 1; - constant c_sepa_round : boolean := true; -- must be true, because separate should round the 1 bit growth - + constant c_nof_stages : natural := ceil_log2(g_fft.nof_points); constant c_nof_bf_per_stage : natural := g_fft.nof_points/2; constant c_in_scale_w_tester : integer := g_fft.stage_dat_w - g_fft.in_dat_w - sel_a_b(g_fft.guard_enable, g_fft.guard_w, 0); constant c_in_scale_w : natural := sel_a_b(c_in_scale_w_tester > 0, c_in_scale_w_tester, 0); -- Only scale when in_dat_w is not too big. constant c_out_scale_w : integer := g_fft.stage_dat_w - g_fft.out_dat_w - g_fft.out_gain_w; -- Estimate number of LSBs to throw away when > 0 or insert when < 0 + + constant c_sepa_growth_w : natural := sel_a_b(g_fft.use_separate, 1, 0); -- add one bit for add sub growth in separate + constant c_raw_dat_w : natural := g_fft.stage_dat_w + c_sepa_growth_w; type t_stage_dat_arr is array (integer range <>) of std_logic_vector(g_fft.stage_dat_w-1 downto 0); - type t_stage_sum_arr is array (integer range <>) of std_logic_vector(g_fft.stage_dat_w downto 0); + type t_stage_raw_arr is array (integer range <>) of std_logic_vector(c_raw_dat_w-1 downto 0); type t_data_arr2 is array(c_nof_stages downto 0) of t_stage_dat_arr(g_fft.nof_points-1 downto 0); type t_val_arr is array(c_nof_stages downto 0) of std_logic_vector( g_fft.nof_points-1 downto 0); signal data_re : t_data_arr2; signal data_im : t_data_arr2; signal data_val : t_val_arr; + signal int_re_arr : t_stage_dat_arr(g_fft.nof_points-1 downto 0); signal int_im_arr : t_stage_dat_arr(g_fft.nof_points-1 downto 0); - signal fft_re_arr : t_stage_dat_arr(g_fft.nof_points-1 downto 0); - signal fft_im_arr : t_stage_dat_arr(g_fft.nof_points-1 downto 0); - signal add_arr : t_stage_sum_arr(g_fft.nof_points-1 downto 0); - signal sub_arr : t_stage_sum_arr(g_fft.nof_points-1 downto 0); signal int_val : std_logic; + signal int_a_dc : std_logic_vector(g_fft.stage_dat_w-1 downto 0); + signal int_b_dc : std_logic_vector(g_fft.stage_dat_w-1 downto 0); + + signal add_arr : t_stage_raw_arr(g_fft.nof_points-1 downto 0); + signal sub_arr : t_stage_raw_arr(g_fft.nof_points-1 downto 0); + + signal fft_re_arr : t_stage_raw_arr(g_fft.nof_points-1 downto 0); + signal fft_im_arr : t_stage_raw_arr(g_fft.nof_points-1 downto 0); signal fft_val : std_logic; begin @@ -235,7 +242,7 @@ begin g_pipeline_input => 0, g_pipeline_output => c_pipeline_add_sub, g_in_dat_w => g_fft.stage_dat_w, - g_out_dat_w => g_fft.stage_dat_w+1 + g_out_dat_w => c_raw_dat_w ) port map ( clk => clk, @@ -251,7 +258,7 @@ begin g_pipeline_input => 0, g_pipeline_output => c_pipeline_add_sub, g_in_dat_w => g_fft.stage_dat_w, - g_out_dat_w => g_fft.stage_dat_w+1 + g_out_dat_w => c_raw_dat_w ) port map ( clk => clk, @@ -267,7 +274,7 @@ begin g_pipeline_input => 0, g_pipeline_output => c_pipeline_add_sub, g_in_dat_w => g_fft.stage_dat_w, - g_out_dat_w => g_fft.stage_dat_w+1 + g_out_dat_w => c_raw_dat_w ) port map ( clk => clk, @@ -283,7 +290,7 @@ begin g_pipeline_input => 0, g_pipeline_output => c_pipeline_add_sub, g_in_dat_w => g_fft.stage_dat_w, - g_out_dat_w => g_fft.stage_dat_w+1 + g_out_dat_w => c_raw_dat_w ) port map ( clk => clk, @@ -292,84 +299,14 @@ begin result => sub_arr(2*I+1) ); - gen_sepa_truncate : IF c_sepa_round=false GENERATE - -- truncate the one LSbit - fft_re_arr(2*I ) <= add_arr(2*I )(g_fft.stage_dat_w DOWNTO 1); -- A real - fft_re_arr(2*I+1) <= add_arr(2*I+1)(g_fft.stage_dat_w DOWNTO 1); -- B real - fft_im_arr(2*I ) <= sub_arr(2*I )(g_fft.stage_dat_w DOWNTO 1); -- A imag - fft_im_arr(2*I+1) <= sub_arr(2*I+1)(g_fft.stage_dat_w DOWNTO 1); -- B imag - end generate; - - gen_sepa_round : IF c_sepa_round=true GENERATE - -- round the one LSbit - round_re_a : ENTITY common_lib.common_round - GENERIC MAP ( - g_representation => "SIGNED", -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity) - g_round => TRUE, -- when TRUE round the input, else truncate the input - g_round_clip => FALSE, -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned) - g_pipeline_input => 0, -- >= 0 - g_pipeline_output => 0, -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output - g_in_dat_w => g_fft.stage_dat_w+1, - g_out_dat_w => g_fft.stage_dat_w - ) - PORT MAP ( - clk => clk, - in_dat => add_arr(2*I), - out_dat => fft_re_arr(2*I) - ); - - round_re_b : ENTITY common_lib.common_round - GENERIC MAP ( - g_representation => "SIGNED", -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity) - g_round => TRUE, -- when TRUE round the input, else truncate the input - g_round_clip => FALSE, -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned) - g_pipeline_input => 0, -- >= 0 - g_pipeline_output => 0, -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output - g_in_dat_w => g_fft.stage_dat_w+1, - g_out_dat_w => g_fft.stage_dat_w - ) - PORT MAP ( - clk => clk, - in_dat => add_arr(2*I+1), - out_dat => fft_re_arr(2*I+1) - ); - - round_im_a : ENTITY common_lib.common_round - GENERIC MAP ( - g_representation => "SIGNED", -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity) - g_round => TRUE, -- when TRUE round the input, else truncate the input - g_round_clip => FALSE, -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned) - g_pipeline_input => 0, -- >= 0 - g_pipeline_output => 0, -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output - g_in_dat_w => g_fft.stage_dat_w+1, - g_out_dat_w => g_fft.stage_dat_w - ) - PORT MAP ( - clk => clk, - in_dat => sub_arr(2*I), - out_dat => fft_im_arr(2*I) - ); - - round_im_b : ENTITY common_lib.common_round - GENERIC MAP ( - g_representation => "SIGNED", -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity) - g_round => TRUE, -- when TRUE round the input, else truncate the input - g_round_clip => FALSE, -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned) - g_pipeline_input => 0, -- >= 0 - g_pipeline_output => 0, -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output - g_in_dat_w => g_fft.stage_dat_w+1, - g_out_dat_w => g_fft.stage_dat_w - ) - PORT MAP ( - clk => clk, - in_dat => sub_arr(2*I+1), - out_dat => fft_im_arr(2*I+1) - ); - end generate; + fft_re_arr(2*I ) <= add_arr(2*I )(c_raw_dat_w-1 DOWNTO 0); -- A real + fft_re_arr(2*I+1) <= add_arr(2*I+1)(c_raw_dat_w-1 DOWNTO 0); -- B real + fft_im_arr(2*I ) <= sub_arr(2*I )(c_raw_dat_w-1 DOWNTO 0); -- A imag + fft_im_arr(2*I+1) <= sub_arr(2*I+1)(c_raw_dat_w-1 DOWNTO 0); -- B imag end generate; --------------------------------------------------------------------------- - -- Generate bin 0 directly + -- Generate bin 0 = DC directly --------------------------------------------------------------------------- -- Index N=g_fft.nof_points wraps to index 0: -- . fft_re_arr(0) = (int_re_arr(0) + int_re_arr(N)) / 2 = int_re_arr(0) @@ -379,28 +316,34 @@ begin u_pipeline_a_re_0 : entity common_lib.common_pipeline generic map ( - g_pipeline => c_pipeline_add_sub, - g_in_dat_w => g_fft.stage_dat_w, - g_out_dat_w => g_fft.stage_dat_w + g_representation => "SIGNED", + g_pipeline => c_pipeline_add_sub, + g_in_dat_w => g_fft.stage_dat_w, + g_out_dat_w => g_fft.stage_dat_w ) port map ( clk => clk, in_dat => int_re_arr(0), - out_dat => fft_re_arr(0) + out_dat => int_a_dc ); u_pipeline_b_re_0 : entity common_lib.common_pipeline generic map ( - g_pipeline => c_pipeline_add_sub, - g_in_dat_w => g_fft.stage_dat_w, - g_out_dat_w => g_fft.stage_dat_w + g_representation => "SIGNED", + g_pipeline => c_pipeline_add_sub, + g_in_dat_w => g_fft.stage_dat_w, + g_out_dat_w => g_fft.stage_dat_w ) port map ( clk => clk, in_dat => int_im_arr(0), - out_dat => fft_re_arr(1) + out_dat => int_b_dc ); + -- The real outputs of A(0) and B(0) are scaled by shift left is * 2 for separate add + fft_re_arr(0) <= int_a_dc & '0'; + fft_re_arr(1) <= int_b_dc & '0'; + -- The imaginary outputs of A(0) and B(0) are always zero in case two real inputs are provided fft_im_arr(0) <= (others=>'0'); fft_im_arr(1) <= (others=>'0'); @@ -421,6 +364,7 @@ begin no_separate : if g_fft.use_separate=false generate assign_outputs : for I in 0 to g_fft.nof_points-1 generate + -- c_raw_dat_w = g_fft.stage_dat_w, because g_fft.use_separate=false fft_re_arr(I) <= int_re_arr(I); fft_im_arr(I) <= int_im_arr(I); end generate; @@ -434,14 +378,14 @@ begin u_requantize_re : entity common_lib.common_requantize generic map ( g_representation => "SIGNED", - g_lsb_w => c_out_scale_w, + g_lsb_w => c_out_scale_w + c_sepa_growth_w, g_lsb_round => TRUE, g_lsb_round_clip => FALSE, g_msb_clip => FALSE, g_msb_clip_symmetric => FALSE, g_pipeline_remove_lsb => c_pipeline_remove_lsb, g_pipeline_remove_msb => 0, - g_in_dat_w => g_fft.stage_dat_w, + g_in_dat_w => c_raw_dat_w, g_out_dat_w => g_fft.out_dat_w ) port map ( @@ -454,14 +398,14 @@ begin u_requantize_im : entity common_lib.common_requantize generic map ( g_representation => "SIGNED", - g_lsb_w => c_out_scale_w, + g_lsb_w => c_out_scale_w + c_sepa_growth_w, g_lsb_round => TRUE, g_lsb_round_clip => FALSE, g_msb_clip => FALSE, g_msb_clip_symmetric => FALSE, g_pipeline_remove_lsb => c_pipeline_remove_lsb, g_pipeline_remove_msb => 0, - g_in_dat_w => g_fft.stage_dat_w, + g_in_dat_w => c_raw_dat_w, g_out_dat_w => g_fft.out_dat_w ) port map ( diff --git a/libraries/dsp/fft/src/vhdl/fft_r2_pipe.vhd b/libraries/dsp/fft/src/vhdl/fft_r2_pipe.vhd index 00c2007bd8..994f865331 100644 --- a/libraries/dsp/fft/src/vhdl/fft_r2_pipe.vhd +++ b/libraries/dsp/fft/src/vhdl/fft_r2_pipe.vhd @@ -100,7 +100,8 @@ architecture str of fft_r2_pipe is constant c_in_scale_w : natural := g_fft.stage_dat_w - g_fft.in_dat_w - sel_a_b(g_fft.guard_enable, g_fft.guard_w, 0); constant c_out_scale_w : integer := g_fft.stage_dat_w - g_fft.out_dat_w - g_fft.out_gain_w; -- Estimate number of LSBs to throw throw away when > 0 or insert when < 0 constant c_raw_dat_extra_w : natural := sel_a_b(g_fft.use_separate, g_sepa_extra_w, 0); - constant c_raw_dat_w : natural := g_fft.stage_dat_w + c_raw_dat_extra_w; + constant c_sepa_growth_w : natural := sel_a_b(g_fft.use_separate, 1, 0); -- add one bit for add sub growth in separate + constant c_raw_dat_w : natural := g_fft.stage_dat_w + c_sepa_growth_w; -- number the stage instances from c_nof_stages:1 -- . the data input for the first stage has index c_nof_stages @@ -117,12 +118,10 @@ architecture str of fft_r2_pipe is signal data_re : t_data_arr; signal data_im : t_data_arr; - signal last_re : std_logic_vector(c_raw_dat_w-1 downto 0); - signal last_im : std_logic_vector(c_raw_dat_w-1 downto 0); signal data_val : std_logic_vector(c_nof_stages downto 0):= (others=>'0'); + signal in_cplx : std_logic_vector(c_nof_complex*g_fft.stage_dat_w-1 downto 0); signal out_cplx : std_logic_vector(c_nof_complex*c_raw_dat_w-1 downto 0); - signal in_cplx : std_logic_vector(c_nof_complex*c_raw_dat_w-1 downto 0); signal raw_out_re : std_logic_vector(c_raw_dat_w-1 downto 0); signal raw_out_im : std_logic_vector(c_raw_dat_w-1 downto 0); signal raw_out_val : std_logic; @@ -209,16 +208,16 @@ begin in_re => data_re(1), in_im => data_im(1), in_val => data_val(1), - out_re => last_re, -- = data_re(0), but may instead have c_raw_dat_w bits - out_im => last_im, -- = data_im(0), but may instead have c_raw_dat_w bits + out_re => data_re(0), + out_im => data_im(0), out_val => data_val(0) ); ------------------------------------------------------------------------------ -- Optional output reorder and separation ------------------------------------------------------------------------------ - gen_reorder_and_separate : if(g_fft.use_separate or g_fft.use_reorder) generate - in_cplx <= last_im & last_re; + gen_reorder_and_separate : if g_fft.use_separate or g_fft.use_reorder generate + in_cplx <= data_im(0) & data_re(0); u_reorder_sep : entity work.fft_reorder_sepa_pipe generic map ( @@ -232,20 +231,23 @@ begin port map ( clk => clk, rst => rst, - in_dat => in_cplx, + in_dat => in_cplx, -- c_nof_complex * g_fft.stage_dat_w in_val => data_val(0), - out_dat => out_cplx, + out_dat => out_cplx, -- c_nof_complex * c_raw_dat_w out_val => raw_out_val ); + -- c_raw_dat_w = g_fft.stage_dat_w when g_fft.use_separate = false + -- c_raw_dat_w = g_fft.stage_dat_w + 1 when g_fft.use_separate = true raw_out_re <= out_cplx( c_raw_dat_w-1 downto 0); raw_out_im <= out_cplx(2*c_raw_dat_w-1 downto c_raw_dat_w); end generate; - no_reorder_no_seperate : if(g_fft.use_separate=false and g_fft.use_reorder=false) generate - raw_out_re <= last_re; - raw_out_im <= last_im; + no_reorder_no_seperate : if g_fft.use_separate=false and g_fft.use_reorder=false generate + -- c_raw_dat_w = g_fft.stage_dat_w because g_fft.use_separate = false + raw_out_re <= data_re(0); + raw_out_im <= data_im(0); raw_out_val <= data_val(0); end generate; @@ -255,7 +257,7 @@ begin u_requantize_re : entity common_lib.common_requantize generic map ( g_representation => "SIGNED", - g_lsb_w => c_out_scale_w + c_raw_dat_extra_w, + g_lsb_w => c_out_scale_w + c_sepa_growth_w, g_lsb_round => TRUE, g_lsb_round_clip => FALSE, g_msb_clip => FALSE, @@ -275,7 +277,7 @@ begin u_requantize_im : entity common_lib.common_requantize generic map ( g_representation => "SIGNED", - g_lsb_w => c_out_scale_w + c_raw_dat_extra_w, + g_lsb_w => c_out_scale_w + c_sepa_growth_w, g_lsb_round => TRUE, g_lsb_round_clip => FALSE, g_msb_clip => FALSE, diff --git a/libraries/dsp/fft/src/vhdl/fft_r2_wide.vhd b/libraries/dsp/fft/src/vhdl/fft_r2_wide.vhd index da55a674b0..2490f5c6ab 100644 --- a/libraries/dsp/fft/src/vhdl/fft_r2_wide.vhd +++ b/libraries/dsp/fft/src/vhdl/fft_r2_wide.vhd @@ -153,18 +153,25 @@ architecture rtl of fft_r2_wide is constant c_out_scale_w : integer := c_fft_r2_par.out_dat_w - g_fft.out_dat_w - g_fft.out_gain_w; -- Estimate number of LSBs to throw away when > 0 or insert when < 0 + constant c_sepa_growth_w : natural := sel_a_b(g_fft.use_separate, 1, 0); -- add one bit for add sub growth in separate + constant c_raw_dat_w : natural := g_fft.stage_dat_w + c_sepa_growth_w; + + -- g_fft.wb_factor = 1 + signal fft_pipe_out_re : std_logic_vector(g_fft.out_dat_w-1 downto 0); + signal fft_pipe_out_im : std_logic_vector(g_fft.out_dat_w-1 downto 0); + + -- g_fft.wb_factor > 1 and < g_fft.nof_points signal in_fft_pipe_re_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0); signal in_fft_pipe_im_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0); signal out_fft_pipe_re_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0); signal out_fft_pipe_im_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0); + signal out_fft_pipe_val : std_logic_vector(g_fft.wb_factor-1 downto 0); + signal in_fft_par : std_logic; -- = out_fft_pipe_val(0) signal in_fft_par_re_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0); signal in_fft_par_im_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0); - signal fft_pipe_out_re : std_logic_vector(g_fft.out_dat_w-1 downto 0); - signal fft_pipe_out_im : std_logic_vector(g_fft.out_dat_w-1 downto 0); - signal fft_out_re_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0); signal fft_out_im_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0); signal fft_out_val : std_logic; @@ -173,11 +180,6 @@ architecture rtl of fft_r2_wide is signal sep_out_im_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0); signal sep_out_val : std_logic; - signal int_val : std_logic_vector(g_fft.wb_factor-1 downto 0); - - signal out_cplx : std_logic_vector(c_nof_complex*g_fft.stage_dat_w-1 downto 0); - signal in_cplx : std_logic_vector(c_nof_complex*g_fft.stage_dat_w-1 downto 0); - begin -- Default to fft_r2_pipe when g_fft.wb_factor=1 @@ -252,7 +254,7 @@ begin in_val => in_val, out_re => out_fft_pipe_re_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0), out_im => out_fft_pipe_im_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0), - out_val => int_val(I) + out_val => out_fft_pipe_val(I) ); end generate; @@ -261,6 +263,8 @@ begin -- PARALLEL FFT STAGE --------------------------------------------------------------- + in_fft_par <= out_fft_pipe_val(0); + -- Create input for parallel FFT gen_inputs_for_par : for I in g_fft.wb_factor-1 downto 0 generate in_fft_par_re_arr(I) <= resize_fft_svec(out_fft_pipe_re_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0)); @@ -279,7 +283,7 @@ begin rst => rst, in_re_arr => in_fft_par_re_arr, in_im_arr => in_fft_par_im_arr, - in_val => int_val(0), + in_val => in_fft_par, out_re_arr => fft_out_re_arr, out_im_arr => fft_out_im_arr, out_val => fft_out_val @@ -320,14 +324,14 @@ begin u_requantize_output_re : entity common_lib.common_requantize generic map ( g_representation => "SIGNED", - g_lsb_w => c_out_scale_w, + g_lsb_w => c_out_scale_w + c_sepa_growth_w, g_lsb_round => TRUE, g_lsb_round_clip => FALSE, g_msb_clip => FALSE, g_msb_clip_symmetric => FALSE, g_pipeline_remove_lsb => c_pipeline_remove_lsb, g_pipeline_remove_msb => 0, - g_in_dat_w => g_fft.stage_dat_w, + g_in_dat_w => c_raw_dat_w, g_out_dat_w => g_fft.out_dat_w ) port map ( @@ -340,14 +344,14 @@ begin u_requantize_output_im : entity common_lib.common_requantize generic map ( g_representation => "SIGNED", - g_lsb_w => c_out_scale_w, + g_lsb_w => c_out_scale_w + c_sepa_growth_w, g_lsb_round => TRUE, g_lsb_round_clip => FALSE, g_msb_clip => FALSE, g_msb_clip_symmetric => FALSE, g_pipeline_remove_lsb => c_pipeline_remove_lsb, g_pipeline_remove_msb => 0, - g_in_dat_w => g_fft.stage_dat_w, + g_in_dat_w => c_raw_dat_w, g_out_dat_w => g_fft.out_dat_w ) port map ( diff --git a/libraries/dsp/fft/src/vhdl/fft_reorder_sepa_pipe.vhd b/libraries/dsp/fft/src/vhdl/fft_reorder_sepa_pipe.vhd index 89d056fb82..b363745caf 100644 --- a/libraries/dsp/fft/src/vhdl/fft_reorder_sepa_pipe.vhd +++ b/libraries/dsp/fft/src/vhdl/fft_reorder_sepa_pipe.vhd @@ -51,9 +51,9 @@ entity fft_reorder_sepa_pipe is port ( clk : in std_logic; rst : in std_logic; - in_dat : in std_logic_vector; + in_dat : in std_logic_vector; -- c_dat_w in_val : in std_logic; - out_dat : out std_logic_vector; + out_dat : out std_logic_vector; -- c_dat_w when g_separate = false, else c_dat_w + 2 out_val : out std_logic ); end entity fft_reorder_sepa_pipe; @@ -323,9 +323,9 @@ begin port map ( clk => clk, rst => rst, - in_dat => out_dat_i, + in_dat => out_dat_i, -- c_dat_w in_val => out_val_i, - out_dat => out_dat, + out_dat => out_dat, -- c_dat_w + 2 out_val => out_val ); end generate; @@ -335,8 +335,8 @@ begin -- the output signals are directly driven. gen_no_separate : if g_separate=false generate rd_adr <= TO_UVEC(r.count_up, c_adr_tot_w); - out_dat <= out_dat_i; - out_val <= out_val_i; + out_dat <= out_dat_i; -- c_dat_w + out_val <= out_val_i; end generate; end rtl; diff --git a/libraries/dsp/fft/src/vhdl/fft_sepa.vhd b/libraries/dsp/fft/src/vhdl/fft_sepa.vhd index 5bf2423a85..65da081cd7 100644 --- a/libraries/dsp/fft/src/vhdl/fft_sepa.vhd +++ b/libraries/dsp/fft/src/vhdl/fft_sepa.vhd @@ -41,17 +41,9 @@ -- B.imag(m) = (X.real(N-m) - X.real(m))/2 -- -- Remarks: --- . The add and sub output of the separate have 1 bit growth that needs to be --- rounded. Simply skipping 1 LSbit is not suitable, because it yields --- asymmetry around 0 and thus a DC offset. For example for N = 3-bit data: --- x = -4 -3 -2 -1 0 1 2 3 --- round(x/2) = -2 -2 -1 -1 0 1 1 2 = common_round for signed --- floor(x/2) = -2 -2 -1 -1 0 0 1 1 = truncation --- The most negative value can be ignored: --- x : mean(-3 -2 -1 0 1 2 3) = 0 --- . round(x/2) : mean(-2 -1 -1 0 1 1 2) = 0 --- . floor(x/2) : mean(-2 -1 -1 0 0 1 1) = -2/8 = -0.25 = -2^(N-1)/2 / 2^N --- So the DC offset due to truncation is -0.25 LSbit, independent of N. +-- . The A, B outputs are scaled by factor 2 due to separate add and sub. +-- Therefore in_dat re, im have c_in_data_w bits and out_dat re, im have +-- c_out_data_w = c_in_data_w + 1 bits, to avoid overflow. library IEEE, common_lib; use IEEE.std_logic_1164.ALL; @@ -62,40 +54,40 @@ entity fft_sepa is port ( clk : in std_logic; rst : in std_logic; - in_dat : in std_logic_vector; + in_dat : in std_logic_vector; -- c_nof_complex * c_in_data_w in_val : in std_logic; - out_dat : out std_logic_vector; + out_dat : out std_logic_vector; -- c_nof_complex * c_out_data_w = c_nof_complex * (c_in_data_w + 1) out_val : out std_logic ); end entity fft_sepa; architecture rtl of fft_sepa is - constant c_sepa_round : boolean := true; -- must be true, because separate should round the 1 bit growth - - constant c_data_w : natural := in_dat'length/c_nof_complex; - constant c_c_data_w : natural := c_nof_complex*c_data_w; - constant c_pipeline : natural := 3; + constant c_in_data_w : natural := in_dat'length / c_nof_complex; + constant c_in_complex_w : natural := c_nof_complex * c_in_data_w; + constant c_out_data_w : natural := c_in_data_w + 1; + constant c_out_complex_w : natural := c_nof_complex * c_out_data_w; + constant c_pipeline : natural := 3; - type reg_type is record - switch : std_logic; -- Register used to toggle between A & B definitionn - val_dly : std_logic_vector(c_pipeline-1 downto 0); -- Register that delays the incoming valid signal - xn_m_reg : std_logic_vector(c_c_data_w-1 downto 0); -- Register to hold the X(N-m) value for one cycle - xm_reg : std_logic_vector(c_c_data_w-1 downto 0); -- Register to hold the X(m) value for one cycle - add_reg_a : std_logic_vector(c_data_w-1 downto 0); -- Input register A for the adder - add_reg_b : std_logic_vector(c_data_w-1 downto 0); -- Input register B for the adder - sub_reg_a : std_logic_vector(c_data_w-1 downto 0); -- Input register A for the subtractor - sub_reg_b : std_logic_vector(c_data_w-1 downto 0); -- Input register B for the subtractor - out_dat : std_logic_vector(c_c_data_w-1 downto 0); -- Registered output value - out_val : std_logic; -- Registered data valid signal + type t_reg is record + switch : std_logic; -- Register used to toggle between A & B definitionn + val_dly : std_logic_vector(c_pipeline-1 downto 0); -- Register that delays the incoming valid signal + xn_m_reg : std_logic_vector(c_in_complex_w-1 downto 0); -- Register to hold the X(N-m) value for one cycle + xm_reg : std_logic_vector(c_in_complex_w-1 downto 0); -- Register to hold the X(m) value for one cycle + add_reg_a : std_logic_vector(c_in_data_w-1 downto 0); -- Input register A for the adder + add_reg_b : std_logic_vector(c_in_data_w-1 downto 0); -- Input register B for the adder + sub_reg_a : std_logic_vector(c_in_data_w-1 downto 0); -- Input register A for the subtractor + sub_reg_b : std_logic_vector(c_in_data_w-1 downto 0); -- Input register B for the subtractor + out_dat : std_logic_vector(c_out_complex_w-1 downto 0); -- Registered output value + out_val : std_logic; -- Registered data valid signal end record; + + constant c_reg_init : t_reg := ('0', (others=>'0'), (others=>'0'), (others=>'0'), (others=>'0'), (others=>'0'), (others=>'0'), (others=>'0'), (others=>'0'), '0'); - signal r, rin : reg_type; - signal sub_result : std_logic_vector(c_data_w downto 0); -- Result of the subtractor - signal add_result : std_logic_vector(c_data_w downto 0); -- Result of the adder - - signal sub_result_q : std_logic_vector(c_data_w-1 downto 0); -- Requantized result of the subtractor - signal add_result_q : std_logic_vector(c_data_w-1 downto 0); -- Requantized result of the adder + signal r : t_reg := c_reg_init; + signal rin : t_reg; + signal sub_result : std_logic_vector(c_out_data_w-1 downto 0); -- Result of the subtractor + signal add_result : std_logic_vector(c_out_data_w-1 downto 0); -- Result of the adder begin @@ -108,8 +100,8 @@ begin g_representation => "SIGNED", g_pipeline_input => 0, g_pipeline_output => 1, - g_in_dat_w => c_data_w, - g_out_dat_w => c_data_w + 1 + g_in_dat_w => c_in_data_w, + g_out_dat_w => c_out_data_w -- = c_in_data_w + 1 ) port map ( clk => clk, @@ -124,8 +116,8 @@ begin g_representation => "SIGNED", g_pipeline_input => 0, g_pipeline_output => 1, - g_in_dat_w => c_data_w, - g_out_dat_w => c_data_w + 1 + g_in_dat_w => c_in_data_w, + g_out_dat_w => c_out_data_w -- = c_in_data_w + 1 ) port map ( clk => clk, @@ -134,52 +126,11 @@ begin result => sub_result ); - gen_sepa_truncate : IF c_sepa_round=FALSE GENERATE - -- truncate the one LSbit - add_result_q <= add_result(c_data_w downto 1); - sub_result_q <= sub_result(c_data_w downto 1); - end generate; - - gen_sepa_round : IF c_sepa_round=TRUE GENERATE - -- round the one LSbit - round_add : ENTITY common_lib.common_round - GENERIC MAP ( - g_representation => "SIGNED", -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity) - g_round => TRUE, -- when TRUE round the input, else truncate the input - g_round_clip => FALSE, -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned) - g_pipeline_input => 0, -- >= 0 - g_pipeline_output => 0, -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output - g_in_dat_w => c_data_w+1, - g_out_dat_w => c_data_w - ) - PORT MAP ( - clk => clk, - in_dat => add_result, - out_dat => add_result_q - ); - - round_sub : ENTITY common_lib.common_round - GENERIC MAP ( - g_representation => "SIGNED", -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity) - g_round => TRUE, -- when TRUE round the input, else truncate the input - g_round_clip => FALSE, -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned) - g_pipeline_input => 0, -- >= 0 - g_pipeline_output => 0, -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output - g_in_dat_w => c_data_w+1, - g_out_dat_w => c_data_w - ) - PORT MAP ( - clk => clk, - in_dat => sub_result, - out_dat => sub_result_q - ); - end generate; - --------------------------------------------------------------- -- CONTROL PROCESS --------------------------------------------------------------- - comb : process(r, rst, in_val, in_dat, add_result_q, sub_result_q) - variable v : reg_type; + comb : process(r, rst, in_val, in_dat, add_result, sub_result) + variable v : t_reg; begin v := r; @@ -188,7 +139,7 @@ begin v.val_dly(0) := in_val; -- Composition of the output registers: - v.out_dat := sub_result_q & add_result_q; + v.out_dat := sub_result & add_result; v.out_val := r.val_dly(c_pipeline-1); -- Compose the inputs for the adder and subtractor @@ -196,16 +147,16 @@ begin if in_val = '1' or r.val_dly(0) = '1' then if r.switch = '0' then v.xm_reg := in_dat; - v.add_reg_a := r.xm_reg(c_c_data_w-1 downto c_data_w); -- Xm imag - v.add_reg_b := r.xn_m_reg(c_c_data_w-1 downto c_data_w); -- Xn-m imag - v.sub_reg_a := r.xn_m_reg(c_data_w-1 downto 0); -- Xn-m real - v.sub_reg_b := r.xm_reg(c_data_w-1 downto 0); -- Xm real + v.add_reg_a := r.xm_reg(c_in_complex_w-1 downto c_in_data_w); -- Xm imag + v.add_reg_b := r.xn_m_reg(c_in_complex_w-1 downto c_in_data_w); -- Xn-m imag + v.sub_reg_a := r.xn_m_reg(c_in_data_w-1 downto 0); -- Xn-m real + v.sub_reg_b := r.xm_reg(c_in_data_w-1 downto 0); -- Xm real else v.xn_m_reg := in_dat; - v.add_reg_a := r.xm_reg(c_data_w-1 downto 0); -- Xm real - v.add_reg_b := in_dat(c_data_w-1 downto 0); -- Xn-m real - v.sub_reg_a := r.xm_reg(c_c_data_w-1 downto c_data_w); -- Xm imag - v.sub_reg_b := in_dat(c_c_data_w-1 downto c_data_w); -- Xn-m imag + v.add_reg_a := r.xm_reg(c_in_data_w-1 downto 0); -- Xm real + v.add_reg_b := in_dat(c_in_data_w-1 downto 0); -- Xn-m real + v.sub_reg_a := r.xm_reg(c_in_complex_w-1 downto c_in_data_w); -- Xm imag + v.sub_reg_b := in_dat(c_in_complex_w-1 downto c_in_data_w); -- Xn-m imag end if; end if; @@ -213,16 +164,10 @@ begin v.switch := not r.switch; end if; - if(rst = '1') then + if rst = '1' then + -- Only need to reset the control signals v.switch := '0'; v.val_dly := (others => '0'); - v.xn_m_reg := (others => '0'); - v.xm_reg := (others => '0'); - v.add_reg_a := (others => '0'); - v.add_reg_b := (others => '0'); - v.sub_reg_a := (others => '0'); - v.sub_reg_b := (others => '0'); - v.out_dat := (others => '0'); v.out_val := '0'; end if; diff --git a/libraries/dsp/fft/src/vhdl/fft_sepa_wide.vhd b/libraries/dsp/fft/src/vhdl/fft_sepa_wide.vhd index a950206d5e..0af4993aee 100644 --- a/libraries/dsp/fft/src/vhdl/fft_sepa_wide.vhd +++ b/libraries/dsp/fft/src/vhdl/fft_sepa_wide.vhd @@ -67,11 +67,17 @@ architecture rtl of fft_sepa_wide is constant c_page_size : natural := g_fft.nof_points/g_fft.wb_factor; -- Size of the memories constant c_nof_pages : natural := 2; -- The number of pages in each ram. - constant c_dat_w : natural := c_nof_complex*g_fft.stage_dat_w; -- Data width for the internal vectors where real and imag are combined. + constant c_in_w : natural := g_fft.stage_dat_w; + constant c_dat_w : natural := c_nof_complex*c_in_w; -- Data width for the internal vectors where real and imag are combined. constant c_adr_w : natural := ceil_log2(c_page_size); -- Address width of the rams constant c_nof_streams : natural := 2; -- Number of inputstreams for the zip units - type t_dat_arr is array(integer range <> ) of std_logic_vector(c_dat_w-1 downto 0); + constant c_sepa_growth_w : natural := sel_a_b(g_fft.use_separate, 1, 0); -- add one bit for add sub growth in separate + constant c_out_w : natural := c_in_w + c_sepa_growth_w; + constant c_raw_dat_w : natural := c_nof_complex*c_out_w; -- = c_dat_w or c_dat_w + 2 + + type t_dat_arr is array(integer range <> ) of std_logic_vector(c_dat_w-1 downto 0); + type t_raw_dat_arr is array(integer range <> ) of std_logic_vector(c_raw_dat_w-1 downto 0); type t_rd_adr_arr is array(integer range <> ) of std_logic_vector(c_adr_w-1 downto 0); type t_zip_in_matrix is array(integer range <> ) of t_slv_64_arr(1 downto 0); -- Every Zip unit has two inputs. @@ -85,24 +91,27 @@ architecture rtl of fft_sepa_wide is signal zip_in_matrix : t_zip_in_matrix(g_fft.wb_factor-1 downto 0); -- Matrix that contains the inputs for zip units signal zip_in_val : std_logic_vector(g_fft.wb_factor-1 downto 0); -- Vector that holds the data input valids for the zip units - signal zip_out_dat_arr : t_dat_arr(g_fft.wb_factor-1 downto 0); -- Array that holds the outputs of all zip units. + signal zip_out_dat_arr : t_dat_arr(g_fft.wb_factor-1 downto 0); -- Array that holds the outputs of all zip units. signal zip_out_val : std_logic_vector(g_fft.wb_factor-1 downto 0); -- Vector that holds the output valids of the zip units - signal sep_out_dat_arr : t_dat_arr(g_fft.wb_factor-1 downto 0); -- Array that holds the outputs of the separation blocks + signal sep_out_dat_arr : t_raw_dat_arr(g_fft.wb_factor-1 downto 0); -- Array that holds the outputs of the separation blocks signal sep_out_val_vec : std_logic_vector(g_fft.wb_factor-1 downto 0); -- Vector containing the datavalids from the separation blocks - signal out_dat_arr : t_dat_arr(g_fft.wb_factor-1 downto 0); -- Array that holds the ouput values, where real and imag are concatenated + signal out_dat_arr : t_raw_dat_arr(g_fft.wb_factor-1 downto 0); -- Array that holds the ouput values, where real and imag are concatenated - type state_type is (s_idle, s_read); - type reg_type is record + type t_state is (s_idle, s_read); + type t_reg is record switch : std_logic; -- Toggle register used for separate functionalilty count_up : natural range 0 to c_page_size; -- An upwards counter for read addressing count_down : natural range 0 to c_page_size; -- A downwards counter for read addressing val_odd : std_logic; -- Register that drives the in_valid of the odd zip units val_even : std_logic; -- Register that drives the in_valid of the even zip units - state : state_type; -- The state machine. + state : t_state; -- The state machine. end record; - signal r, rin : reg_type; + constant c_reg_init : t_reg := ('0', 0, 0, '0', '0', s_idle); + + signal r : t_reg := c_reg_init; + signal rin : t_reg; begin @@ -111,7 +120,7 @@ begin --------------------------------------------------------------- -- Prepare the data for the dual paged memory. Real and imaginary part are concatenated into one vector. gen_prep_write_data : for I in 0 to g_fft.wb_factor-1 generate - wr_dat(I) <= in_im_arr(I)(g_fft.stage_dat_w-1 downto 0) & in_re_arr(I)(g_fft.stage_dat_w-1 downto 0); + wr_dat(I) <= in_im_arr(I)(c_in_w-1 downto 0) & in_re_arr(I)(c_in_w-1 downto 0); end generate; -- Prepare the write control signals for the memories. @@ -204,9 +213,9 @@ begin port map ( clk => clk, rst => rst, - in_dat => zip_out_dat_arr(I), + in_dat => zip_out_dat_arr(I), -- c_dat_w in_val => zip_out_val(I), - out_dat => sep_out_dat_arr(I), + out_dat => sep_out_dat_arr(I), -- c_dat_w + 2 out_val => sep_out_val_vec(I) ); end generate; @@ -218,13 +227,13 @@ begin -- the fellow toggle signals. It also controls the starting and stopping -- of the data stream. comb : process(r, rst, next_page) - variable v : reg_type; + variable v : t_reg; begin v := r; case r.state is - when s_idle => + when s_idle => v.switch := '0'; v.val_odd := '0'; v.val_even := '0'; @@ -234,7 +243,7 @@ begin v.state := s_read; end if; - when s_read => + when s_read => if(r.switch = '0') then -- Toggle the switch register from 0 to 1 v.switch := '1'; end if; @@ -255,22 +264,17 @@ begin v.val_odd := r.switch; -- Assignment of the odd and even markers v.val_even := not(r.switch); - when others => - v.state := s_idle; + when others => + v.state := s_idle; - end case; + end case; - if(rst = '1') then - v.switch := '0'; - v.count_up := 0; - v.count_down := 0; - v.val_odd := '0'; - v.val_even := '0'; - v.state := s_idle; + if rst = '1' then + v := c_reg_init; end if; rin <= v; - + end process comb; regs : process(clk) @@ -287,8 +291,8 @@ begin u_output_pipeline_align : entity common_lib.common_pipeline generic map ( g_pipeline => c_pipeline_output + 1, -- Pipeline + one stage for allignment - g_in_dat_w => c_dat_w, - g_out_dat_w => c_dat_w + g_in_dat_w => c_raw_dat_w, + g_out_dat_w => c_raw_dat_w ) port map ( clk => clk, @@ -299,8 +303,8 @@ begin u_output_pipeline : entity common_lib.common_pipeline generic map ( g_pipeline => c_pipeline_output, -- Only pipeline stage - g_in_dat_w => c_dat_w, - g_out_dat_w => c_dat_w + g_in_dat_w => c_raw_dat_w, + g_out_dat_w => c_raw_dat_w ) port map ( clk => clk, @@ -321,8 +325,8 @@ begin -- Split the concatenated array into a real and imaginary array for the output gen_output_arrays : for I in g_fft.wb_factor-1 downto 0 generate - out_re_arr(I) <= resize_fft_svec(out_dat_arr(I)( g_fft.stage_dat_w-1 downto 0)); - out_im_arr(I) <= resize_fft_svec(out_dat_arr(I)(c_nof_complex*g_fft.stage_dat_w-1 downto g_fft.stage_dat_w)); + out_re_arr(I) <= resize_fft_svec(out_dat_arr(I)( c_out_w-1 downto 0)); + out_im_arr(I) <= resize_fft_svec(out_dat_arr(I)(c_nof_complex*c_out_w-1 downto c_out_w)); end generate; end rtl; -- GitLab