diff --git a/libraries/dsp/doc/filterbank.txt b/libraries/dsp/doc/filterbank.txt index b8750865ab39699854e03f026866a2025930cd9d..e7eedb9959bdc5b8d695bdaf298f412fb3bc4a7e 100644 --- a/libraries/dsp/doc/filterbank.txt +++ b/libraries/dsp/doc/filterbank.txt @@ -263,5 +263,18 @@ b) FFT . first start the tb simulation (run -a, or about 30 us) . then start the tc script The tc does not verify the output, so the tc PASSED statement is void. + + $5) Two real separate rounding + The add and sub output of the separate have 1 bit growth that needs to be + rounded. Simply skipping 1 LSbit is not suitable, because it yields + asymmetry around 0 and thus a DC offset. For example for N = 3-bit data: + x = -4 -3 -2 -1 0 1 2 3 + round(x/2) = -2 -2 -1 -1 0 1 1 2 = common_round for signed + floor(x/2) = -2 -2 -1 -1 0 0 1 1 = truncation + The most negative value can be ignored: + x : mean(-3 -2 -1 0 1 2 3) = 0 + . round(x/2) : mean(-2 -1 -1 0 1 1 2) = 0 + . floor(x/2) : mean(-2 -1 -1 0 0 1 1) = -2/8 = -0.25 = -2^(N-1)/2 / 2^N + So the DC offset due to truncation is -0.25 LSbit, independent of N. c) \ No newline at end of file diff --git a/libraries/dsp/fft/src/vhdl/fft_r2_par.vhd b/libraries/dsp/fft/src/vhdl/fft_r2_par.vhd index 7cd67dd0720fbb84683125d87c3414adab423097..3f1927422398eee82f0e0d4065299cfaffe994ad 100644 --- a/libraries/dsp/fft/src/vhdl/fft_r2_par.vhd +++ b/libraries/dsp/fft/src/vhdl/fft_r2_par.vhd @@ -33,7 +33,9 @@ -- real stream (B) presented on the imaginary input. -- The separation unit outputs the spectrum of A and B in -- an alternating way: A(0), B(0), A(1), B(1).... etc - +-- The separate function adds and subtracts two complex bins. +-- Therefore it causes 1 bit growth that needs to be rounded, as +-- explained in fft_sepa.vhd library ieee, common_lib, rTwoSDF_lib; use IEEE.std_logic_1164.all; @@ -104,14 +106,14 @@ architecture str of fft_r2_par is v_nr_of_domains := nr_of_points/2**(stage+1); v_offset := 2**stage; for I in 0 to v_nr_of_domains loop - if(array_index >= (2*I)*2**stage and array_index < (2*I+1)*2**stage) then -- Detect if output is an even section - if((array_index mod 2) = 0) then -- Check if input value is odd or even + if array_index >= (2*I)*2**stage and array_index < (2*I+1)*2**stage then -- Detect if output is an even section + if (array_index mod 2) = 0 then -- Check if input value is odd or even v_return := array_index; -- When even: value of element else v_return := array_index+v_offset-1; -- When odd: value of element + offset end if; - elsif(array_index >= (2*I+1)*2**stage and array_index < (2*I+2)*2**stage) then - if((array_index mod 2) = 0) then -- Check if input value is odd or even + elsif array_index >= (2*I+1)*2**stage and array_index < (2*I+2)*2**stage then + if (array_index mod 2) = 0 then -- Check if input value is odd or even v_return := array_index-v_offset+1; -- When even: offset is subtracted from the element else v_return := array_index; -- When odd: element stays the the same. @@ -121,6 +123,9 @@ architecture str of fft_r2_par is return v_return; end; + constant c_pipeline_remove_lsb : natural := 1; + constant c_sepa_round : boolean := true; -- must be true, because separate should round the 1 bit growth + constant c_nof_stages : natural := ceil_log2(g_fft.nof_points); constant c_nof_bf_per_stage : natural := g_fft.nof_points/2; constant c_in_scale_w_tester : integer := g_fft.stage_dat_w - g_fft.in_dat_w - sel_a_b(g_fft.guard_enable, g_fft.guard_w, 0); @@ -140,11 +145,12 @@ architecture str of fft_r2_par is signal data_val : t_val_arr; signal int_re_arr : t_stage_dat_arr(g_fft.nof_points-1 downto 0); signal int_im_arr : t_stage_dat_arr(g_fft.nof_points-1 downto 0); - signal pre_quant_re_arr : t_stage_dat_arr(g_fft.nof_points-1 downto 0); - signal pre_quant_im_arr : t_stage_dat_arr(g_fft.nof_points-1 downto 0); + signal fft_re_arr : t_stage_dat_arr(g_fft.nof_points-1 downto 0); + signal fft_im_arr : t_stage_dat_arr(g_fft.nof_points-1 downto 0); signal add_arr : t_stage_sum_arr(g_fft.nof_points-1 downto 0); signal sub_arr : t_stage_sum_arr(g_fft.nof_points-1 downto 0); signal int_val : std_logic; + signal fft_val : std_logic; begin @@ -217,8 +223,12 @@ begin -------------------------------------------------------------------------------- -- Optional separate -------------------------------------------------------------------------------- - gen_separate : if(g_fft.use_separate) generate - gen_reordering : for I in 1 to g_fft.nof_points/2 - 1 generate + gen_separate : if g_fft.use_separate generate + --------------------------------------------------------------------------- + -- Calulate the positive bins + --------------------------------------------------------------------------- + gen_positive_bins : for I in 1 to g_fft.nof_points/2 - 1 generate + -- common_add_sub a_output_real_adder : entity common_lib.common_add_sub generic map ( g_direction => "ADD", @@ -232,9 +242,8 @@ begin clk => clk, in_a => int_re_arr(g_fft.nof_points-I), in_b => int_re_arr(I), - result => add_arr(2*i) + result => add_arr(2*I) ); - pre_quant_re_arr(2*I) <= add_arr(2*i)(g_fft.stage_dat_w DOWNTO 1); b_output_real_adder : entity common_lib.common_add_sub generic map ( @@ -249,9 +258,8 @@ begin clk => clk, in_a => int_im_arr(g_fft.nof_points-I), in_b => int_im_arr(I), - result => add_arr(2*i+1) + result => add_arr(2*I+1) ); - pre_quant_re_arr(2*I+1) <= add_arr(2*i+1)(g_fft.stage_dat_w DOWNTO 1); a_output_imag_subtractor : entity common_lib.common_add_sub generic map ( @@ -266,9 +274,8 @@ begin clk => clk, in_a => int_im_arr(I), in_b => int_im_arr(g_fft.nof_points-I), - result => sub_arr(2*i) + result => sub_arr(2*I) ); - pre_quant_im_arr(2*I) <= sub_arr(2*i)(g_fft.stage_dat_w DOWNTO 1); b_output_imag_subtractor : entity common_lib.common_add_sub generic map ( @@ -283,12 +290,94 @@ begin clk => clk, in_a => int_re_arr(g_fft.nof_points-I), in_b => int_re_arr(I), - result => sub_arr(2*i+1) + result => sub_arr(2*I+1) ); - pre_quant_im_arr(2*I+1) <= sub_arr(2*i+1)(g_fft.stage_dat_w DOWNTO 1); - end generate; + gen_sepa_truncate : IF c_sepa_round=false GENERATE + -- truncate the one LSbit + fft_re_arr(2*I ) <= add_arr(2*I )(g_fft.stage_dat_w DOWNTO 1); -- A real + fft_re_arr(2*I+1) <= add_arr(2*I+1)(g_fft.stage_dat_w DOWNTO 1); -- B real + fft_im_arr(2*I ) <= sub_arr(2*I )(g_fft.stage_dat_w DOWNTO 1); -- A imag + fft_im_arr(2*I+1) <= sub_arr(2*I+1)(g_fft.stage_dat_w DOWNTO 1); -- B imag + end generate; + + gen_sepa_round : IF c_sepa_round=true GENERATE + -- round the one LSbit + round_re_a : ENTITY common_lib.common_round + GENERIC MAP ( + g_representation => "SIGNED", -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity) + g_round => TRUE, -- when TRUE round the input, else truncate the input + g_round_clip => FALSE, -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned) + g_pipeline_input => 0, -- >= 0 + g_pipeline_output => 0, -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output + g_in_dat_w => g_fft.stage_dat_w+1, + g_out_dat_w => g_fft.stage_dat_w + ) + PORT MAP ( + clk => clk, + in_dat => add_arr(2*I), + out_dat => fft_re_arr(2*I) + ); + + round_re_b : ENTITY common_lib.common_round + GENERIC MAP ( + g_representation => "SIGNED", -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity) + g_round => TRUE, -- when TRUE round the input, else truncate the input + g_round_clip => FALSE, -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned) + g_pipeline_input => 0, -- >= 0 + g_pipeline_output => 0, -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output + g_in_dat_w => g_fft.stage_dat_w+1, + g_out_dat_w => g_fft.stage_dat_w + ) + PORT MAP ( + clk => clk, + in_dat => add_arr(2*I+1), + out_dat => fft_re_arr(2*I+1) + ); + + round_im_a : ENTITY common_lib.common_round + GENERIC MAP ( + g_representation => "SIGNED", -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity) + g_round => TRUE, -- when TRUE round the input, else truncate the input + g_round_clip => FALSE, -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned) + g_pipeline_input => 0, -- >= 0 + g_pipeline_output => 0, -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output + g_in_dat_w => g_fft.stage_dat_w+1, + g_out_dat_w => g_fft.stage_dat_w + ) + PORT MAP ( + clk => clk, + in_dat => sub_arr(2*I), + out_dat => fft_im_arr(2*I) + ); + + round_im_b : ENTITY common_lib.common_round + GENERIC MAP ( + g_representation => "SIGNED", -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity) + g_round => TRUE, -- when TRUE round the input, else truncate the input + g_round_clip => FALSE, -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned) + g_pipeline_input => 0, -- >= 0 + g_pipeline_output => 0, -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output + g_in_dat_w => g_fft.stage_dat_w+1, + g_out_dat_w => g_fft.stage_dat_w + ) + PORT MAP ( + clk => clk, + in_dat => sub_arr(2*I+1), + out_dat => fft_im_arr(2*I+1) + ); + end generate; + end generate; + --------------------------------------------------------------------------- + -- Generate bin 0 directly + --------------------------------------------------------------------------- + -- Index N=g_fft.nof_points wraps to index 0: + -- . fft_re_arr(0) = (int_re_arr(0) + int_re_arr(N)) / 2 = int_re_arr(0) + -- . fft_re_arr(1) = (int_im_arr(0) + int_im_arr(N)) / 2 = int_im_arr(0) + -- . fft_im_arr(0) = (int_im_arr(0) - int_im_arr(N)) / 2 = 0 + -- . fft_im_arr(1) = (int_re_arr(0) - int_re_arr(N)) / 2 = 0 + u_pipeline_a_re_0 : entity common_lib.common_pipeline generic map ( g_pipeline => g_pipeline.sep_lat, @@ -298,7 +387,7 @@ begin port map ( clk => clk, in_dat => int_re_arr(0), - out_dat => pre_quant_re_arr(0) + out_dat => fft_re_arr(0) ); u_pipeline_b_re_0 : entity common_lib.common_pipeline @@ -310,39 +399,39 @@ begin port map ( clk => clk, in_dat => int_im_arr(0), - out_dat => pre_quant_re_arr(1) + out_dat => fft_re_arr(1) ); + -- The imaginary outputs of A(0) and B(0) are always zero in case two real inputs are provided - pre_quant_im_arr(0) <= (others => '0'); - pre_quant_im_arr(1) <= (others => '0'); + fft_im_arr(0) <= (others=>'0'); + fft_im_arr(1) <= (others=>'0'); ------------------------------------------------------------------------------ - -- Valid pipelining. The val signal must be pipelined for ser_lat cycles - -- to compensate for the pipelines atge of the adders and subtractors + -- Valid pipelining for separate ------------------------------------------------------------------------------ - u_val_ser_lat : entity common_lib.common_pipeline_sl + u_seperate_fft_val : entity common_lib.common_pipeline_sl generic map ( g_pipeline => g_pipeline.sep_lat ) port map ( clk => clk, in_dat => int_val, - out_dat => out_val + out_dat => fft_val ); end generate; - no_separate : if(g_fft.use_separate=false) generate + no_separate : if g_fft.use_separate=false generate assign_outputs : for I in 0 to g_fft.nof_points-1 generate - pre_quant_re_arr(I) <= int_re_arr(I); - pre_quant_im_arr(I) <= int_im_arr(I); + fft_re_arr(I) <= int_re_arr(I); + fft_im_arr(I) <= int_im_arr(I); end generate; - out_val <= int_val; + fft_val <= int_val; end generate; ------------------------------------------------------------------------------ -- Parallel FFT output requantization ------------------------------------------------------------------------------ - create_output_requantizers : for I in 0 to g_fft.nof_points-1 generate + gen_output_requantizers : for I in 0 to g_fft.nof_points-1 generate u_requantize_re : entity common_lib.common_requantize generic map ( g_representation => "SIGNED", @@ -352,15 +441,14 @@ begin g_msb_clip => FALSE, g_msb_clip_symmetric => FALSE, g_gain_w => c_out_gain_w, - g_pipeline_remove_lsb => 0, + g_pipeline_remove_lsb => c_pipeline_remove_lsb, g_pipeline_remove_msb => 0, g_in_dat_w => g_fft.stage_dat_w, g_out_dat_w => g_fft.out_dat_w ) port map ( clk => clk, - clken => '1', - in_dat => pre_quant_re_arr(I), + in_dat => fft_re_arr(I), out_dat => out_re_arr(I), out_ovr => open ); @@ -374,18 +462,28 @@ begin g_msb_clip => FALSE, g_msb_clip_symmetric => FALSE, g_gain_w => c_out_gain_w, - g_pipeline_remove_lsb => 0, + g_pipeline_remove_lsb => c_pipeline_remove_lsb, g_pipeline_remove_msb => 0, g_in_dat_w => g_fft.stage_dat_w, g_out_dat_w => g_fft.out_dat_w ) port map ( clk => clk, - clken => '1', - in_dat => pre_quant_im_arr(I), + in_dat => fft_im_arr(I), out_dat => out_im_arr(I), out_ovr => open ); + + u_out_val : entity common_lib.common_pipeline_sl + generic map ( + g_pipeline => c_pipeline_remove_lsb + ) + port map ( + rst => rst, + clk => clk, + in_dat => fft_val, + out_dat => out_val + ); end generate; end str; diff --git a/libraries/dsp/fft/src/vhdl/fft_r2_pipe.vhd b/libraries/dsp/fft/src/vhdl/fft_r2_pipe.vhd index 19ab52a6ae0109263bc94c388110fbf98307c574..e3be369fd9fdc7127c32f65905dff8c4b5c21a43 100644 --- a/libraries/dsp/fft/src/vhdl/fft_r2_pipe.vhd +++ b/libraries/dsp/fft/src/vhdl/fft_r2_pipe.vhd @@ -76,6 +76,8 @@ end entity fft_r2_pipe; architecture str of fft_r2_pipe is + constant c_pipeline_remove_lsb : natural := 0; + constant c_nof_stages : natural := ceil_log2(g_fft.nof_points); constant c_stage_offset : natural := true_log2(g_fft.wb_factor); -- Stage offset is required for twiddle generation in wideband fft constant c_in_scale_w : natural := g_fft.stage_dat_w - g_fft.in_dat_w - sel_a_b(g_fft.guard_enable, g_fft.guard_w, 0); @@ -176,14 +178,13 @@ begin g_msb_clip => FALSE, g_msb_clip_symmetric => FALSE, g_gain_w => c_out_gain_w, - g_pipeline_remove_lsb => 0, + g_pipeline_remove_lsb => c_pipeline_remove_lsb, g_pipeline_remove_msb => 0, g_in_dat_w => g_fft.stage_dat_w, g_out_dat_w => g_fft.out_dat_w ) port map ( clk => clk, - clken => '1', in_dat => raw_out_re, out_dat => out_re, out_ovr => open @@ -198,21 +199,29 @@ begin g_msb_clip => FALSE, g_msb_clip_symmetric => FALSE, g_gain_w => c_out_gain_w, - g_pipeline_remove_lsb => 0, + g_pipeline_remove_lsb => c_pipeline_remove_lsb, g_pipeline_remove_msb => 0, g_in_dat_w => g_fft.stage_dat_w, g_out_dat_w => g_fft.out_dat_w ) port map ( clk => clk, - clken => '1', in_dat => raw_out_im, out_dat => out_im, out_ovr => open ); -- Valid Output - out_val <= raw_out_val; + u_out_val : entity common_lib.common_pipeline_sl + generic map ( + g_pipeline => c_pipeline_remove_lsb + ) + port map ( + rst => rst, + clk => clk, + in_dat => raw_out_val, + out_dat => out_val + ); end str; diff --git a/libraries/dsp/fft/src/vhdl/fft_r2_wide.vhd b/libraries/dsp/fft/src/vhdl/fft_r2_wide.vhd index 72ecd6d70d2d4f7ce02642f84c317eb684adbbd2..7385f24d9404342cceaa1feaa5e574973e2b17f9 100644 --- a/libraries/dsp/fft/src/vhdl/fft_r2_wide.vhd +++ b/libraries/dsp/fft/src/vhdl/fft_r2_wide.vhd @@ -50,15 +50,14 @@ -- fft_shift() only applies to spectra for complex input. -- -- Remarks: --- . This fft_r2_wide does not (yet) support wb_factor = 1 (= only a --- fft_r2_pipe instance) or wb_factor = g_fft.nof_points (= only a --- fft_r2_par instance). Fixing this is nice to have, but not essential. --- Care must be taken to properly account for guard_w and out_gain_w. --- Therefore probably it is most clear to use a structural approach that --- would generate seperate instances for each case: --- . wb_factor = 1 --- . wb_factor > 1 AND wb_factor < g_fft.nof_points --- . wb_factor = g_fft.nof_points. +-- . This fft_r2_wide also support wb_factor = 1 (= only a fft_r2_pipe +-- instance) or wb_factor = g_fft.nof_points (= only a fft_r2_par instance). +-- Care must be taken to properly account for guard_w and out_gain_w, +-- therefore it is best to simply use a structural approach that generates +-- seperate instances for each case: +-- . wb_factor = 1 --> pipe +-- . wb_factor > 1 AND wb_factor < g_fft.nof_points --> wide +-- . wb_factor = g_fft.nof_points --> par -- . This fft_r2_wide uses the use_reorder in the pipeline FFT, in the parallel -- FFT and also has reorder memory in the fft_sepa_wide instance. The reorder -- memories in the FFTs can maybe be saved by using only the reorder memory @@ -144,6 +143,8 @@ architecture rtl of fft_r2_wide is return v_return; end; + constant c_pipeline_remove_lsb : natural := 0; + constant c_fft_r2_pipe_arr : t_fft_arr(g_fft.wb_factor-1 downto 0) := func_create_generic_for_pipe_fft(g_fft); constant c_fft_r2_par : t_fft := func_create_generic_for_par_fft(g_fft); @@ -162,6 +163,9 @@ architecture rtl of fft_r2_wide is signal in_fft_par_re_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0); signal in_fft_par_im_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0); + signal fft_pipe_out_re : std_logic_vector(g_fft.out_dat_w-1 downto 0); + signal fft_pipe_out_im : std_logic_vector(g_fft.out_dat_w-1 downto 0); + signal fft_out_re_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0); signal fft_out_im_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0); signal fft_out_val : std_logic; @@ -170,63 +174,106 @@ architecture rtl of fft_r2_wide is signal sep_out_im_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0); signal sep_out_val : std_logic; - signal int_val : std_logic_vector(g_fft.wb_factor-1 downto 0); + signal int_val : std_logic_vector(g_fft.wb_factor-1 downto 0); signal out_cplx : std_logic_vector(c_nof_complex*g_fft.stage_dat_w-1 downto 0); signal in_cplx : std_logic_vector(c_nof_complex*g_fft.stage_dat_w-1 downto 0); begin + + -- Default to fft_r2_pipe when g_fft.wb_factor=1 + gen_fft_r2_pipe : if g_fft.wb_factor=1 generate + u_fft_r2_pipe : entity work.fft_r2_pipe + generic map ( + g_fft => g_fft, + g_pipeline => g_pft_pipeline + ) + port map ( + clk => clk, + rst => rst, + in_re => in_re_arr(0)(g_fft.in_dat_w-1 downto 0), + in_im => in_im_arr(0)(g_fft.in_dat_w-1 downto 0), + in_val => in_val, + out_re => fft_pipe_out_re, + out_im => fft_pipe_out_im, + out_val => out_val + ); + + out_re_arr(0) <= resize_fft_svec(fft_pipe_out_re); + out_im_arr(0) <= resize_fft_svec(fft_pipe_out_im); + end generate; - ------------------------------------------------------------------------------ - -- Inputs are prepared/scaled for the pipelined ffts - ------------------------------------------------------------------------------ - gen_get_the_inputs : for I in 0 to g_fft.wb_factor-1 generate - in_fft_pipe_re_arr(I) <= scale_and_resize_svec(in_re_arr(I), c_in_scale_w, c_fft_slv_w); - in_fft_pipe_im_arr(I) <= scale_and_resize_svec(in_im_arr(I), c_in_scale_w, c_fft_slv_w); - end generate; - - --------------------------------------------------------------- - -- PIPELINED FFT STAGE - --------------------------------------------------------------- - -- The first stage of the wideband fft consist of the generation of "wb_factor" - -- pipelined fft's. These pipelines fft's operate in parallel. - gen_pipelined_ffts : for I in g_fft.wb_factor-1 downto 0 generate - u_pft : entity work.fft_r2_pipe - generic map( - g_fft => c_fft_r2_pipe_arr(I), -- generics for the pipelined FFTs - g_pipeline => g_pft_pipeline -- pipeline generics for the pipelined FFTs + -- Default to fft_r2_par when g_fft.wb_factor=g_fft.nof_points + gen_fft_r2_par : if g_fft.wb_factor=g_fft.nof_points generate + u_fft_r2_par : entity work.fft_r2_par + generic map ( + g_fft => g_fft, + g_pipeline => g_fft_pipeline ) - port map( - clk => clk, - rst => rst, - in_re => in_fft_pipe_re_arr(I)(c_fft_r2_pipe_arr(I).in_dat_w-1 downto 0), - in_im => in_fft_pipe_im_arr(I)(c_fft_r2_pipe_arr(I).in_dat_w-1 downto 0), - in_val => in_val, - out_re => out_fft_pipe_re_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0), - out_im => out_fft_pipe_im_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0), - out_val => int_val(I) - ); - end generate; - - create_par_fft : if (g_fft.wb_factor > 1) generate - -- Create input for paralle FFT. - gen_inputs_for_par : for I in g_fft.wb_factor-1 downto 0 generate - -- Perform the 1 bit scaling here befor entering the parallel FFT: - in_fft_par_re_arr(I) <= RESIZE_SVEC(out_fft_pipe_re_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0), c_fft_slv_w); - in_fft_par_im_arr(I) <= RESIZE_SVEC(out_fft_pipe_im_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0), c_fft_slv_w); - end generate; + port map ( + clk => clk, + rst => rst, + in_re_arr => in_re_arr, + in_im_arr => in_im_arr, + in_val => in_val, + out_re_arr => out_re_arr, + out_im_arr => out_im_arr, + out_val => out_val + ); + end generate; + + -- Create wideband FFT as combinination of g_fft.wb_factor instances of fft_r2_pipe with one instance of fft_r2_par + gen_fft_r2_wide : if g_fft.wb_factor>1 and g_fft.wb_factor<g_fft.nof_points generate + --------------------------------------------------------------- + -- PIPELINED FFT STAGE + --------------------------------------------------------------- + + -- Inputs are prepared/scaled for the pipelined ffts + gen_fft_pipe_inputs : for I in 0 to g_fft.wb_factor-1 generate + in_fft_pipe_re_arr(I) <= scale_and_resize_svec(in_re_arr(I), c_in_scale_w, c_fft_slv_w); + in_fft_pipe_im_arr(I) <= scale_and_resize_svec(in_im_arr(I), c_in_scale_w, c_fft_slv_w); + end generate; + + -- The first stage of the wideband fft consist of the generation of g_fft.wb_factor + -- pipelined fft's. These pipelines fft's operate in parallel. + gen_pipelined_ffts : for I in g_fft.wb_factor-1 downto 0 generate + u_pft : entity work.fft_r2_pipe + generic map ( + g_fft => c_fft_r2_pipe_arr(I), -- generics for the pipelined FFTs + g_pipeline => g_pft_pipeline -- pipeline generics for the pipelined FFTs + ) + port map ( + clk => clk, + rst => rst, + in_re => in_fft_pipe_re_arr(I)(c_fft_r2_pipe_arr(I).in_dat_w-1 downto 0), + in_im => in_fft_pipe_im_arr(I)(c_fft_r2_pipe_arr(I).in_dat_w-1 downto 0), + in_val => in_val, + out_re => out_fft_pipe_re_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0), + out_im => out_fft_pipe_im_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0), + out_val => int_val(I) + ); + end generate; + + --------------------------------------------------------------- -- PARALLEL FFT STAGE --------------------------------------------------------------- - -- The "wb_factor" outputs of the pipelined fft's are offered + + -- Create input for parallel FFT + gen_inputs_for_par : for I in g_fft.wb_factor-1 downto 0 generate + in_fft_par_re_arr(I) <= resize_fft_svec(out_fft_pipe_re_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0)); + in_fft_par_im_arr(I) <= resize_fft_svec(out_fft_pipe_im_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0)); + end generate; + + -- The g_fft.wb_factor outputs of the pipelined fft's are offered -- to the input of a single parallel FFT. u_fft : entity work.fft_r2_par - generic map( + generic map ( g_fft => c_fft_r2_par, -- generics for the FFT g_pipeline => g_fft_pipeline -- pipeline generics for the parallel FFT ) - port map( + port map ( clk => clk, rst => rst, in_re_arr => in_fft_par_re_arr, @@ -236,27 +283,18 @@ begin out_im_arr => fft_out_im_arr, out_val => fft_out_val ); - end generate; - - -- When wb_factor = 1 the parallel FFT can be skipped. - bypass_par_fft : if (g_fft.wb_factor = 1) generate - fft_out_re_arr(0) <= RESIZE_SVEC(out_fft_pipe_re_arr(0)(c_fft_r2_pipe_arr(0).out_dat_w-1 downto 0), c_fft_slv_w); - fft_out_im_arr(0) <= RESIZE_SVEC(out_fft_pipe_im_arr(0)(c_fft_r2_pipe_arr(0).out_dat_w-1 downto 0), c_fft_slv_w); - fft_out_val <= int_val(0); - end generate; - - --------------------------------------------------------------- - -- OPTIONAL: SEPARATION STAGE - --------------------------------------------------------------- - -- When the separate functionality is required: - gen_separate : if(g_fft.use_separate) generate - use_wideband_separator : if (g_fft.wb_factor > 1) generate + + --------------------------------------------------------------- + -- OPTIONAL: SEPARATION STAGE + --------------------------------------------------------------- + -- When the separate functionality is required: + gen_separate : if g_fft.use_separate generate u_separator : entity work.fft_sepa_wide - generic map( + generic map ( g_fft => g_fft, g_pipeline => g_fft_pipeline.sep_lat ) - port map( + port map ( clk => clk, rst => rst, in_re_arr => fft_out_re_arr, @@ -268,86 +306,70 @@ begin ); end generate; - -- Use different separation implementation for wb_factor = 1 - use_single_channel_separator : if (g_fft.wb_factor = 1) generate - in_cplx <= fft_out_im_arr(0)(g_fft.stage_dat_w-1 downto 0) & fft_out_re_arr(0)(g_fft.stage_dat_w-1 downto 0); - - u_reorder_sep : entity work.fft_reorder_sepa_pipe + -- In case no separtion is required, the output of the parallel fft is used. + no_separate : if g_fft.use_separate=false generate + sep_out_re_arr <= fft_out_re_arr; + sep_out_im_arr <= fft_out_im_arr; + sep_out_val <= fft_out_val; + end generate; + + --------------------------------------------------------------- + -- OUTPUT QUANTIZER + --------------------------------------------------------------- + gen_output_requantizers : for I in g_fft.wb_factor-1 downto 0 generate + u_requantize_output_re : entity common_lib.common_requantize generic map ( - g_bit_flip => FALSE, -- Reordering is done in the pipelined FFT already. - g_separate => g_fft.use_separate, - g_nof_points => g_fft.nof_points + g_representation => "SIGNED", + g_lsb_w => c_out_scale_w, + g_lsb_round => TRUE, + g_lsb_round_clip => FALSE, + g_msb_clip => FALSE, + g_msb_clip_symmetric => FALSE, + g_gain_w => c_out_gain_w, + g_pipeline_remove_lsb => c_pipeline_remove_lsb, + g_pipeline_remove_msb => 0, + g_in_dat_w => g_fft.stage_dat_w, + g_out_dat_w => g_fft.out_dat_w ) port map ( - clk => clk, - rst => rst, - in_dat => in_cplx, - in_val => fft_out_val, - out_dat => out_cplx, - out_val => sep_out_val - ); - - sep_out_re_arr(0) <= RESIZE_SVEC(out_cplx( g_fft.stage_dat_w-1 downto 0), c_fft_slv_w); - sep_out_im_arr(0) <= RESIZE_SVEC(out_cplx(2*g_fft.stage_dat_w-1 downto g_fft.stage_dat_w), c_fft_slv_w); - end generate; - end generate; - - -- In case no separtion is required, the output of the parallel fft is used. - no_separate : if(g_fft.use_separate=false) generate - sep_out_re_arr <= fft_out_re_arr; - sep_out_im_arr <= fft_out_im_arr; - sep_out_val <= fft_out_val; - end generate; + clk => clk, + in_dat => sep_out_re_arr(I), + out_dat => out_re_arr(I), + out_ovr => open + ); - --------------------------------------------------------------- - -- OUTPUT QUANTIZER - --------------------------------------------------------------- - gen_output_requantizers : for I in g_fft.wb_factor-1 downto 0 generate - u_requantize_output_re : entity common_lib.common_requantize - generic map ( - g_representation => "SIGNED", - g_lsb_w => c_out_scale_w, - g_lsb_round => TRUE, - g_lsb_round_clip => FALSE, - g_msb_clip => FALSE, - g_msb_clip_symmetric => FALSE, - g_gain_w => c_out_gain_w, - g_pipeline_remove_lsb => 0, - g_pipeline_remove_msb => 0, - g_in_dat_w => g_fft.stage_dat_w, - g_out_dat_w => g_fft.out_dat_w - ) - port map ( - clk => clk, - clken => '1', - in_dat => sep_out_re_arr(I), - out_dat => out_re_arr(I), - out_ovr => open - ); + u_requantize_output_im : entity common_lib.common_requantize + generic map ( + g_representation => "SIGNED", + g_lsb_w => c_out_scale_w, + g_lsb_round => TRUE, + g_lsb_round_clip => FALSE, + g_msb_clip => FALSE, + g_msb_clip_symmetric => FALSE, + g_gain_w => c_out_gain_w, + g_pipeline_remove_lsb => c_pipeline_remove_lsb, + g_pipeline_remove_msb => 0, + g_in_dat_w => g_fft.stage_dat_w, + g_out_dat_w => g_fft.out_dat_w + ) + port map ( + clk => clk, + in_dat => sep_out_im_arr(I), + out_dat => out_im_arr(I), + out_ovr => open + ); + end generate; - u_requantize_output_im : entity common_lib.common_requantize + u_out_val : entity common_lib.common_pipeline_sl generic map ( - g_representation => "SIGNED", - g_lsb_w => c_out_scale_w, - g_lsb_round => TRUE, - g_lsb_round_clip => FALSE, - g_msb_clip => FALSE, - g_msb_clip_symmetric => FALSE, - g_gain_w => c_out_gain_w, - g_pipeline_remove_lsb => 0, - g_pipeline_remove_msb => 0, - g_in_dat_w => g_fft.stage_dat_w, - g_out_dat_w => g_fft.out_dat_w + g_pipeline => c_pipeline_remove_lsb ) port map ( - clk => clk, - clken => '1', - in_dat => sep_out_im_arr(I), - out_dat => out_im_arr(I), - out_ovr => open - ); - end generate; - - out_val <= sep_out_val; - + rst => rst, + clk => clk, + in_dat => sep_out_val, + out_dat => out_val + ); + + end generate; end rtl; diff --git a/libraries/dsp/fft/src/vhdl/fft_sepa.vhd b/libraries/dsp/fft/src/vhdl/fft_sepa.vhd index b8dccb90dde79ab5c94b01e22514163e359bce83..5bf2423a8595c65f9e3218c48f4ab6fc01906049 100644 --- a/libraries/dsp/fft/src/vhdl/fft_sepa.vhd +++ b/libraries/dsp/fft/src/vhdl/fft_sepa.vhd @@ -40,6 +40,18 @@ -- B.real(m) = (X.imag(m) + X.imag(N-m))/2 -- B.imag(m) = (X.real(N-m) - X.real(m))/2 -- +-- Remarks: +-- . The add and sub output of the separate have 1 bit growth that needs to be +-- rounded. Simply skipping 1 LSbit is not suitable, because it yields +-- asymmetry around 0 and thus a DC offset. For example for N = 3-bit data: +-- x = -4 -3 -2 -1 0 1 2 3 +-- round(x/2) = -2 -2 -1 -1 0 1 1 2 = common_round for signed +-- floor(x/2) = -2 -2 -1 -1 0 0 1 1 = truncation +-- The most negative value can be ignored: +-- x : mean(-3 -2 -1 0 1 2 3) = 0 +-- . round(x/2) : mean(-2 -1 -1 0 1 1 2) = 0 +-- . floor(x/2) : mean(-2 -1 -1 0 0 1 1) = -2/8 = -0.25 = -2^(N-1)/2 / 2^N +-- So the DC offset due to truncation is -0.25 LSbit, independent of N. library IEEE, common_lib; use IEEE.std_logic_1164.ALL; @@ -59,6 +71,8 @@ end entity fft_sepa; architecture rtl of fft_sepa is + constant c_sepa_round : boolean := true; -- must be true, because separate should round the 1 bit growth + constant c_data_w : natural := in_dat'length/c_nof_complex; constant c_c_data_w : natural := c_nof_complex*c_data_w; constant c_pipeline : natural := 3; @@ -80,6 +94,9 @@ architecture rtl of fft_sepa is signal sub_result : std_logic_vector(c_data_w downto 0); -- Result of the subtractor signal add_result : std_logic_vector(c_data_w downto 0); -- Result of the adder + signal sub_result_q : std_logic_vector(c_data_w-1 downto 0); -- Requantized result of the subtractor + signal add_result_q : std_logic_vector(c_data_w-1 downto 0); -- Requantized result of the adder + begin --------------------------------------------------------------- @@ -117,10 +134,51 @@ begin result => sub_result ); + gen_sepa_truncate : IF c_sepa_round=FALSE GENERATE + -- truncate the one LSbit + add_result_q <= add_result(c_data_w downto 1); + sub_result_q <= sub_result(c_data_w downto 1); + end generate; + + gen_sepa_round : IF c_sepa_round=TRUE GENERATE + -- round the one LSbit + round_add : ENTITY common_lib.common_round + GENERIC MAP ( + g_representation => "SIGNED", -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity) + g_round => TRUE, -- when TRUE round the input, else truncate the input + g_round_clip => FALSE, -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned) + g_pipeline_input => 0, -- >= 0 + g_pipeline_output => 0, -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output + g_in_dat_w => c_data_w+1, + g_out_dat_w => c_data_w + ) + PORT MAP ( + clk => clk, + in_dat => add_result, + out_dat => add_result_q + ); + + round_sub : ENTITY common_lib.common_round + GENERIC MAP ( + g_representation => "SIGNED", -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity) + g_round => TRUE, -- when TRUE round the input, else truncate the input + g_round_clip => FALSE, -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned) + g_pipeline_input => 0, -- >= 0 + g_pipeline_output => 0, -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output + g_in_dat_w => c_data_w+1, + g_out_dat_w => c_data_w + ) + PORT MAP ( + clk => clk, + in_dat => sub_result, + out_dat => sub_result_q + ); + end generate; + --------------------------------------------------------------- -- CONTROL PROCESS --------------------------------------------------------------- - comb : process(r, rst, in_val, in_dat, add_result, sub_result) + comb : process(r, rst, in_val, in_dat, add_result_q, sub_result_q) variable v : reg_type; begin v := r; @@ -130,7 +188,7 @@ begin v.val_dly(0) := in_val; -- Composition of the output registers: - v.out_dat := sub_result(c_data_w downto 1) & add_result(c_data_w downto 1); + v.out_dat := sub_result_q & add_result_q; v.out_val := r.val_dly(c_pipeline-1); -- Compose the inputs for the adder and subtractor diff --git a/libraries/dsp/fft/src/vhdl/fft_sepa_wide.vhd b/libraries/dsp/fft/src/vhdl/fft_sepa_wide.vhd index 5feeb2e259def0f993157014d348a1ec9993e693..5775854e1b2c2bcef95f6520d8d16f10bb99e119 100644 --- a/libraries/dsp/fft/src/vhdl/fft_sepa_wide.vhd +++ b/libraries/dsp/fft/src/vhdl/fft_sepa_wide.vhd @@ -320,8 +320,8 @@ begin -- Split the concatenated array into a real and imaginary array for the output gen_output_arrays : for I in g_fft.wb_factor-1 downto 0 generate - out_re_arr(I) <= RESIZE_SVEC(out_dat_arr(I)( g_fft.stage_dat_w-1 downto 0), c_fft_slv_w); - out_im_arr(I) <= RESIZE_SVEC(out_dat_arr(I)(c_nof_complex*g_fft.stage_dat_w-1 downto g_fft.stage_dat_w), c_fft_slv_w); + out_re_arr(I) <= resize_fft_svec(out_dat_arr(I)( g_fft.stage_dat_w-1 downto 0)); + out_im_arr(I) <= resize_fft_svec(out_dat_arr(I)(c_nof_complex*g_fft.stage_dat_w-1 downto g_fft.stage_dat_w)); end generate; end rtl; diff --git a/libraries/dsp/fft/tb/vhdl/tb_fft_r2_wide.vhd b/libraries/dsp/fft/tb/vhdl/tb_fft_r2_wide.vhd index d30e4577cdd352e359b1746508925e79d92de5a7..1c814cfd6f0b3b69c417ac3a712ea71b552c1dc1 100644 --- a/libraries/dsp/fft/tb/vhdl/tb_fft_r2_wide.vhd +++ b/libraries/dsp/fft/tb/vhdl/tb_fft_r2_wide.vhd @@ -152,7 +152,7 @@ architecture tb of tb_fft_r2_wide is constant c_rnd_factor : natural := sel_a_b(g_enable_in_val_gaps, 3, 1); constant c_dut_block_latency : natural := 4; - constant c_dut_clk_latency : natural := c_nof_valid_per_block * c_dut_block_latency * c_rnd_factor; -- worst case + constant c_dut_clk_latency : natural := c_nof_valid_per_block * c_dut_block_latency * c_rnd_factor + 50; -- worst case -- input/output data width constant c_in_dat_w : natural := g_fft.in_dat_w; @@ -356,13 +356,19 @@ begin -- Wait until tb_end_almost proc_common_wait_until_high(clk, tb_end_almost); assert in_val_cnt > 0 report "Test did not run, no valid input data" severity error; - -- The PFFT has a memory of 1 block, independent of use_reorder and use_separate, but without the - -- reorder buffer it outputs 1 sample more, because that is immediately available in a new block. - -- Ensure g_data_file_nof_lines is multiple of g_fft.nof_points. - if g_fft.use_reorder=true then - assert out_val_cnt = in_val_cnt-c_nof_valid_per_block report "Unexpected number of valid output data" severity error; + if g_fft.wb_factor=g_fft.nof_points then + -- Parallel FFT + assert out_val_cnt = in_val_cnt report "Unexpected number of valid output data" severity error; else - assert out_val_cnt = in_val_cnt-c_nof_valid_per_block+c_nof_channels report "Unexpected number of valid output data" severity error; + -- Wideband FFT + -- The PFFT has a memory of 1 block, independent of use_reorder and use_separate, but without the + -- reorder buffer it outputs 1 sample more, because that is immediately available in a new block. + -- Ensure g_data_file_nof_lines is multiple of g_fft.nof_points. + if g_fft.use_reorder=true then + assert out_val_cnt = in_val_cnt-c_nof_valid_per_block report "Unexpected number of valid output data" severity error; + else + assert out_val_cnt = in_val_cnt-c_nof_valid_per_block+c_nof_channels report "Unexpected number of valid output data" severity error; + end if; end if; wait; end process; diff --git a/libraries/dsp/fft/tb/vhdl/tb_tb_fft_r2_wide.vhd b/libraries/dsp/fft/tb/vhdl/tb_tb_fft_r2_wide.vhd index ca32674640976d7ab829681f23251b8ff3eb7b2e..1dcb38e653b62dc2a6b9d334bd1b4d1600132c28 100644 --- a/libraries/dsp/fft/tb/vhdl/tb_tb_fft_r2_wide.vhd +++ b/libraries/dsp/fft/tb/vhdl/tb_tb_fft_r2_wide.vhd @@ -44,10 +44,13 @@ ARCHITECTURE tb OF tb_tb_fft_r2_wide IS CONSTANT c_pipeline : t_fft_pipeline := (1, 1, 3, 1, 1, 0, 0, 1); CONSTANT c_fft_wb4_two_real : t_fft := ( true, false, true, 0, 4, 0, 128, 8, 16, 0, c_dsp_mult_w, 2, true, 56, 2); - CONSTANT c_fft_wb4_complex : t_fft := ( true, false, false, 0, 4, 0, 64, 8, 16, 0, c_dsp_mult_w, 2, true, 56, 2); CONSTANT c_fft_wb4_complex_fft_shift : t_fft := ( true, true, false, 0, 4, 0, 64, 8, 16, 0, c_dsp_mult_w, 2, true, 56, 2); CONSTANT c_fft_wb4_complex_flipped : t_fft := (false, false, false, 0, 4, 0, 64, 8, 16, 0, c_dsp_mult_w, 2, true, 56, 2); + CONSTANT c_fft_wb4_complex : t_fft := ( true, false, false, 0, 4, 0, 64, 8, 16, 0, c_dsp_mult_w, 2, true, 56, 2); + CONSTANT c_fft_wb1_complex : t_fft := ( true, false, false, 0, 1, 0, 64, 8, 16, 0, c_dsp_mult_w, 2, true, 56, 2); + CONSTANT c_fft_wb64_complex : t_fft := ( true, false, false, 0,64, 0, 64, 8, 16, 0, c_dsp_mult_w, 2, true, 56, 2); + CONSTANT c_diff_margin : natural := 2; -- Real input @@ -156,4 +159,8 @@ BEGIN u_act_complex_fft_shift : ENTITY work.tb_fft_r2_wide GENERIC MAP (c_pipeline, c_fft_wb4_complex_fft_shift, c_diff_margin, c_unused, 0, c_unused, 0, c_phasor_chirp, 12800, 1280, FALSE); u_act_complex_flipped : ENTITY work.tb_fft_r2_wide GENERIC MAP (c_pipeline, c_fft_wb4_complex_flipped, c_diff_margin, c_unused, 0, c_unused, 0, c_phasor_chirp, 12800, 1280, FALSE); u_rnd_complex_noise : ENTITY work.tb_fft_r2_wide GENERIC MAP (c_pipeline, c_fft_wb4_complex, c_diff_margin, c_unused, 0, c_unused, 0, c_noise_complex, 640, 640, TRUE); + + -- Extreme wb_factor=1 and wb_factor=nof_points + u_act_wb1_complex_noise : ENTITY work.tb_fft_r2_wide GENERIC MAP (c_pipeline, c_fft_wb1_complex, c_diff_margin, c_unused, 0, c_unused, 0, c_noise_complex, 640, 640, FALSE); + u_act_wb64_complex_noise : ENTITY work.tb_fft_r2_wide GENERIC MAP (c_pipeline, c_fft_wb64_complex, c_diff_margin, c_unused, 0, c_unused, 0, c_noise_complex, 640, 640, FALSE); END tb;