Skip to content
Snippets Groups Projects
Commit 6b3a3db1 authored by Eric Kooistra's avatar Eric Kooistra
Browse files

Round outside separate function in output quantizer, to avoid more inaccurate...

Round outside separate function in output quantizer, to avoid more inaccurate rounding of 1 LSbit in separate for bit growth.
parent f468c258
No related branches found
No related tags found
1 merge request!225Round outside separate function in output quantizer, to avoid more inaccurate...
......@@ -125,30 +125,37 @@ architecture str of fft_r2_par is
constant c_pipeline_add_sub : natural := 1;
constant c_pipeline_remove_lsb : natural := 1;
constant c_sepa_round : boolean := true; -- must be true, because separate should round the 1 bit growth
constant c_nof_stages : natural := ceil_log2(g_fft.nof_points);
constant c_nof_bf_per_stage : natural := g_fft.nof_points/2;
constant c_in_scale_w_tester : integer := g_fft.stage_dat_w - g_fft.in_dat_w - sel_a_b(g_fft.guard_enable, g_fft.guard_w, 0);
constant c_in_scale_w : natural := sel_a_b(c_in_scale_w_tester > 0, c_in_scale_w_tester, 0); -- Only scale when in_dat_w is not too big.
constant c_out_scale_w : integer := g_fft.stage_dat_w - g_fft.out_dat_w - g_fft.out_gain_w; -- Estimate number of LSBs to throw away when > 0 or insert when < 0
constant c_sepa_growth_w : natural := sel_a_b(g_fft.use_separate, 1, 0); -- add one bit for add sub growth in separate
constant c_raw_dat_w : natural := g_fft.stage_dat_w + c_sepa_growth_w;
type t_stage_dat_arr is array (integer range <>) of std_logic_vector(g_fft.stage_dat_w-1 downto 0);
type t_stage_sum_arr is array (integer range <>) of std_logic_vector(g_fft.stage_dat_w downto 0);
type t_stage_raw_arr is array (integer range <>) of std_logic_vector(c_raw_dat_w-1 downto 0);
type t_data_arr2 is array(c_nof_stages downto 0) of t_stage_dat_arr(g_fft.nof_points-1 downto 0);
type t_val_arr is array(c_nof_stages downto 0) of std_logic_vector( g_fft.nof_points-1 downto 0);
signal data_re : t_data_arr2;
signal data_im : t_data_arr2;
signal data_val : t_val_arr;
signal int_re_arr : t_stage_dat_arr(g_fft.nof_points-1 downto 0);
signal int_im_arr : t_stage_dat_arr(g_fft.nof_points-1 downto 0);
signal fft_re_arr : t_stage_dat_arr(g_fft.nof_points-1 downto 0);
signal fft_im_arr : t_stage_dat_arr(g_fft.nof_points-1 downto 0);
signal add_arr : t_stage_sum_arr(g_fft.nof_points-1 downto 0);
signal sub_arr : t_stage_sum_arr(g_fft.nof_points-1 downto 0);
signal int_val : std_logic;
signal int_a_dc : std_logic_vector(g_fft.stage_dat_w-1 downto 0);
signal int_b_dc : std_logic_vector(g_fft.stage_dat_w-1 downto 0);
signal add_arr : t_stage_raw_arr(g_fft.nof_points-1 downto 0);
signal sub_arr : t_stage_raw_arr(g_fft.nof_points-1 downto 0);
signal fft_re_arr : t_stage_raw_arr(g_fft.nof_points-1 downto 0);
signal fft_im_arr : t_stage_raw_arr(g_fft.nof_points-1 downto 0);
signal fft_val : std_logic;
begin
......@@ -235,7 +242,7 @@ begin
g_pipeline_input => 0,
g_pipeline_output => c_pipeline_add_sub,
g_in_dat_w => g_fft.stage_dat_w,
g_out_dat_w => g_fft.stage_dat_w+1
g_out_dat_w => c_raw_dat_w
)
port map (
clk => clk,
......@@ -251,7 +258,7 @@ begin
g_pipeline_input => 0,
g_pipeline_output => c_pipeline_add_sub,
g_in_dat_w => g_fft.stage_dat_w,
g_out_dat_w => g_fft.stage_dat_w+1
g_out_dat_w => c_raw_dat_w
)
port map (
clk => clk,
......@@ -267,7 +274,7 @@ begin
g_pipeline_input => 0,
g_pipeline_output => c_pipeline_add_sub,
g_in_dat_w => g_fft.stage_dat_w,
g_out_dat_w => g_fft.stage_dat_w+1
g_out_dat_w => c_raw_dat_w
)
port map (
clk => clk,
......@@ -283,7 +290,7 @@ begin
g_pipeline_input => 0,
g_pipeline_output => c_pipeline_add_sub,
g_in_dat_w => g_fft.stage_dat_w,
g_out_dat_w => g_fft.stage_dat_w+1
g_out_dat_w => c_raw_dat_w
)
port map (
clk => clk,
......@@ -292,84 +299,14 @@ begin
result => sub_arr(2*I+1)
);
gen_sepa_truncate : IF c_sepa_round=false GENERATE
-- truncate the one LSbit
fft_re_arr(2*I ) <= add_arr(2*I )(g_fft.stage_dat_w DOWNTO 1); -- A real
fft_re_arr(2*I+1) <= add_arr(2*I+1)(g_fft.stage_dat_w DOWNTO 1); -- B real
fft_im_arr(2*I ) <= sub_arr(2*I )(g_fft.stage_dat_w DOWNTO 1); -- A imag
fft_im_arr(2*I+1) <= sub_arr(2*I+1)(g_fft.stage_dat_w DOWNTO 1); -- B imag
end generate;
gen_sepa_round : IF c_sepa_round=true GENERATE
-- round the one LSbit
round_re_a : ENTITY common_lib.common_round
GENERIC MAP (
g_representation => "SIGNED", -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity)
g_round => TRUE, -- when TRUE round the input, else truncate the input
g_round_clip => FALSE, -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned)
g_pipeline_input => 0, -- >= 0
g_pipeline_output => 0, -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output
g_in_dat_w => g_fft.stage_dat_w+1,
g_out_dat_w => g_fft.stage_dat_w
)
PORT MAP (
clk => clk,
in_dat => add_arr(2*I),
out_dat => fft_re_arr(2*I)
);
round_re_b : ENTITY common_lib.common_round
GENERIC MAP (
g_representation => "SIGNED", -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity)
g_round => TRUE, -- when TRUE round the input, else truncate the input
g_round_clip => FALSE, -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned)
g_pipeline_input => 0, -- >= 0
g_pipeline_output => 0, -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output
g_in_dat_w => g_fft.stage_dat_w+1,
g_out_dat_w => g_fft.stage_dat_w
)
PORT MAP (
clk => clk,
in_dat => add_arr(2*I+1),
out_dat => fft_re_arr(2*I+1)
);
round_im_a : ENTITY common_lib.common_round
GENERIC MAP (
g_representation => "SIGNED", -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity)
g_round => TRUE, -- when TRUE round the input, else truncate the input
g_round_clip => FALSE, -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned)
g_pipeline_input => 0, -- >= 0
g_pipeline_output => 0, -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output
g_in_dat_w => g_fft.stage_dat_w+1,
g_out_dat_w => g_fft.stage_dat_w
)
PORT MAP (
clk => clk,
in_dat => sub_arr(2*I),
out_dat => fft_im_arr(2*I)
);
round_im_b : ENTITY common_lib.common_round
GENERIC MAP (
g_representation => "SIGNED", -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity)
g_round => TRUE, -- when TRUE round the input, else truncate the input
g_round_clip => FALSE, -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned)
g_pipeline_input => 0, -- >= 0
g_pipeline_output => 0, -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output
g_in_dat_w => g_fft.stage_dat_w+1,
g_out_dat_w => g_fft.stage_dat_w
)
PORT MAP (
clk => clk,
in_dat => sub_arr(2*I+1),
out_dat => fft_im_arr(2*I+1)
);
end generate;
fft_re_arr(2*I ) <= add_arr(2*I )(c_raw_dat_w-1 DOWNTO 0); -- A real
fft_re_arr(2*I+1) <= add_arr(2*I+1)(c_raw_dat_w-1 DOWNTO 0); -- B real
fft_im_arr(2*I ) <= sub_arr(2*I )(c_raw_dat_w-1 DOWNTO 0); -- A imag
fft_im_arr(2*I+1) <= sub_arr(2*I+1)(c_raw_dat_w-1 DOWNTO 0); -- B imag
end generate;
---------------------------------------------------------------------------
-- Generate bin 0 directly
-- Generate bin 0 = DC directly
---------------------------------------------------------------------------
-- Index N=g_fft.nof_points wraps to index 0:
-- . fft_re_arr(0) = (int_re_arr(0) + int_re_arr(N)) / 2 = int_re_arr(0)
......@@ -379,28 +316,34 @@ begin
u_pipeline_a_re_0 : entity common_lib.common_pipeline
generic map (
g_pipeline => c_pipeline_add_sub,
g_in_dat_w => g_fft.stage_dat_w,
g_out_dat_w => g_fft.stage_dat_w
g_representation => "SIGNED",
g_pipeline => c_pipeline_add_sub,
g_in_dat_w => g_fft.stage_dat_w,
g_out_dat_w => g_fft.stage_dat_w
)
port map (
clk => clk,
in_dat => int_re_arr(0),
out_dat => fft_re_arr(0)
out_dat => int_a_dc
);
u_pipeline_b_re_0 : entity common_lib.common_pipeline
generic map (
g_pipeline => c_pipeline_add_sub,
g_in_dat_w => g_fft.stage_dat_w,
g_out_dat_w => g_fft.stage_dat_w
g_representation => "SIGNED",
g_pipeline => c_pipeline_add_sub,
g_in_dat_w => g_fft.stage_dat_w,
g_out_dat_w => g_fft.stage_dat_w
)
port map (
clk => clk,
in_dat => int_im_arr(0),
out_dat => fft_re_arr(1)
out_dat => int_b_dc
);
-- The real outputs of A(0) and B(0) are scaled by shift left is * 2 for separate add
fft_re_arr(0) <= int_a_dc & '0';
fft_re_arr(1) <= int_b_dc & '0';
-- The imaginary outputs of A(0) and B(0) are always zero in case two real inputs are provided
fft_im_arr(0) <= (others=>'0');
fft_im_arr(1) <= (others=>'0');
......@@ -421,6 +364,7 @@ begin
no_separate : if g_fft.use_separate=false generate
assign_outputs : for I in 0 to g_fft.nof_points-1 generate
-- c_raw_dat_w = g_fft.stage_dat_w, because g_fft.use_separate=false
fft_re_arr(I) <= int_re_arr(I);
fft_im_arr(I) <= int_im_arr(I);
end generate;
......@@ -434,14 +378,14 @@ begin
u_requantize_re : entity common_lib.common_requantize
generic map (
g_representation => "SIGNED",
g_lsb_w => c_out_scale_w,
g_lsb_w => c_out_scale_w + c_sepa_growth_w,
g_lsb_round => TRUE,
g_lsb_round_clip => FALSE,
g_msb_clip => FALSE,
g_msb_clip_symmetric => FALSE,
g_pipeline_remove_lsb => c_pipeline_remove_lsb,
g_pipeline_remove_msb => 0,
g_in_dat_w => g_fft.stage_dat_w,
g_in_dat_w => c_raw_dat_w,
g_out_dat_w => g_fft.out_dat_w
)
port map (
......@@ -454,14 +398,14 @@ begin
u_requantize_im : entity common_lib.common_requantize
generic map (
g_representation => "SIGNED",
g_lsb_w => c_out_scale_w,
g_lsb_w => c_out_scale_w + c_sepa_growth_w,
g_lsb_round => TRUE,
g_lsb_round_clip => FALSE,
g_msb_clip => FALSE,
g_msb_clip_symmetric => FALSE,
g_pipeline_remove_lsb => c_pipeline_remove_lsb,
g_pipeline_remove_msb => 0,
g_in_dat_w => g_fft.stage_dat_w,
g_in_dat_w => c_raw_dat_w,
g_out_dat_w => g_fft.out_dat_w
)
port map (
......
......@@ -100,7 +100,8 @@ architecture str of fft_r2_pipe is
constant c_in_scale_w : natural := g_fft.stage_dat_w - g_fft.in_dat_w - sel_a_b(g_fft.guard_enable, g_fft.guard_w, 0);
constant c_out_scale_w : integer := g_fft.stage_dat_w - g_fft.out_dat_w - g_fft.out_gain_w; -- Estimate number of LSBs to throw throw away when > 0 or insert when < 0
constant c_raw_dat_extra_w : natural := sel_a_b(g_fft.use_separate, g_sepa_extra_w, 0);
constant c_raw_dat_w : natural := g_fft.stage_dat_w + c_raw_dat_extra_w;
constant c_sepa_growth_w : natural := sel_a_b(g_fft.use_separate, 1, 0); -- add one bit for add sub growth in separate
constant c_raw_dat_w : natural := g_fft.stage_dat_w + c_sepa_growth_w;
-- number the stage instances from c_nof_stages:1
-- . the data input for the first stage has index c_nof_stages
......@@ -117,12 +118,10 @@ architecture str of fft_r2_pipe is
signal data_re : t_data_arr;
signal data_im : t_data_arr;
signal last_re : std_logic_vector(c_raw_dat_w-1 downto 0);
signal last_im : std_logic_vector(c_raw_dat_w-1 downto 0);
signal data_val : std_logic_vector(c_nof_stages downto 0):= (others=>'0');
signal in_cplx : std_logic_vector(c_nof_complex*g_fft.stage_dat_w-1 downto 0);
signal out_cplx : std_logic_vector(c_nof_complex*c_raw_dat_w-1 downto 0);
signal in_cplx : std_logic_vector(c_nof_complex*c_raw_dat_w-1 downto 0);
signal raw_out_re : std_logic_vector(c_raw_dat_w-1 downto 0);
signal raw_out_im : std_logic_vector(c_raw_dat_w-1 downto 0);
signal raw_out_val : std_logic;
......@@ -209,16 +208,16 @@ begin
in_re => data_re(1),
in_im => data_im(1),
in_val => data_val(1),
out_re => last_re, -- = data_re(0), but may instead have c_raw_dat_w bits
out_im => last_im, -- = data_im(0), but may instead have c_raw_dat_w bits
out_re => data_re(0),
out_im => data_im(0),
out_val => data_val(0)
);
------------------------------------------------------------------------------
-- Optional output reorder and separation
------------------------------------------------------------------------------
gen_reorder_and_separate : if(g_fft.use_separate or g_fft.use_reorder) generate
in_cplx <= last_im & last_re;
gen_reorder_and_separate : if g_fft.use_separate or g_fft.use_reorder generate
in_cplx <= data_im(0) & data_re(0);
u_reorder_sep : entity work.fft_reorder_sepa_pipe
generic map (
......@@ -232,20 +231,23 @@ begin
port map (
clk => clk,
rst => rst,
in_dat => in_cplx,
in_dat => in_cplx, -- c_nof_complex * g_fft.stage_dat_w
in_val => data_val(0),
out_dat => out_cplx,
out_dat => out_cplx, -- c_nof_complex * c_raw_dat_w
out_val => raw_out_val
);
-- c_raw_dat_w = g_fft.stage_dat_w when g_fft.use_separate = false
-- c_raw_dat_w = g_fft.stage_dat_w + 1 when g_fft.use_separate = true
raw_out_re <= out_cplx( c_raw_dat_w-1 downto 0);
raw_out_im <= out_cplx(2*c_raw_dat_w-1 downto c_raw_dat_w);
end generate;
no_reorder_no_seperate : if(g_fft.use_separate=false and g_fft.use_reorder=false) generate
raw_out_re <= last_re;
raw_out_im <= last_im;
no_reorder_no_seperate : if g_fft.use_separate=false and g_fft.use_reorder=false generate
-- c_raw_dat_w = g_fft.stage_dat_w because g_fft.use_separate = false
raw_out_re <= data_re(0);
raw_out_im <= data_im(0);
raw_out_val <= data_val(0);
end generate;
......@@ -255,7 +257,7 @@ begin
u_requantize_re : entity common_lib.common_requantize
generic map (
g_representation => "SIGNED",
g_lsb_w => c_out_scale_w + c_raw_dat_extra_w,
g_lsb_w => c_out_scale_w + c_sepa_growth_w,
g_lsb_round => TRUE,
g_lsb_round_clip => FALSE,
g_msb_clip => FALSE,
......@@ -275,7 +277,7 @@ begin
u_requantize_im : entity common_lib.common_requantize
generic map (
g_representation => "SIGNED",
g_lsb_w => c_out_scale_w + c_raw_dat_extra_w,
g_lsb_w => c_out_scale_w + c_sepa_growth_w,
g_lsb_round => TRUE,
g_lsb_round_clip => FALSE,
g_msb_clip => FALSE,
......
......@@ -153,18 +153,25 @@ architecture rtl of fft_r2_wide is
constant c_out_scale_w : integer := c_fft_r2_par.out_dat_w - g_fft.out_dat_w - g_fft.out_gain_w; -- Estimate number of LSBs to throw away when > 0 or insert when < 0
constant c_sepa_growth_w : natural := sel_a_b(g_fft.use_separate, 1, 0); -- add one bit for add sub growth in separate
constant c_raw_dat_w : natural := g_fft.stage_dat_w + c_sepa_growth_w;
-- g_fft.wb_factor = 1
signal fft_pipe_out_re : std_logic_vector(g_fft.out_dat_w-1 downto 0);
signal fft_pipe_out_im : std_logic_vector(g_fft.out_dat_w-1 downto 0);
-- g_fft.wb_factor > 1 and < g_fft.nof_points
signal in_fft_pipe_re_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0);
signal in_fft_pipe_im_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0);
signal out_fft_pipe_re_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0);
signal out_fft_pipe_im_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0);
signal out_fft_pipe_val : std_logic_vector(g_fft.wb_factor-1 downto 0);
signal in_fft_par : std_logic; -- = out_fft_pipe_val(0)
signal in_fft_par_re_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0);
signal in_fft_par_im_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0);
signal fft_pipe_out_re : std_logic_vector(g_fft.out_dat_w-1 downto 0);
signal fft_pipe_out_im : std_logic_vector(g_fft.out_dat_w-1 downto 0);
signal fft_out_re_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0);
signal fft_out_im_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0);
signal fft_out_val : std_logic;
......@@ -173,11 +180,6 @@ architecture rtl of fft_r2_wide is
signal sep_out_im_arr : t_fft_slv_arr(g_fft.wb_factor-1 downto 0);
signal sep_out_val : std_logic;
signal int_val : std_logic_vector(g_fft.wb_factor-1 downto 0);
signal out_cplx : std_logic_vector(c_nof_complex*g_fft.stage_dat_w-1 downto 0);
signal in_cplx : std_logic_vector(c_nof_complex*g_fft.stage_dat_w-1 downto 0);
begin
-- Default to fft_r2_pipe when g_fft.wb_factor=1
......@@ -252,7 +254,7 @@ begin
in_val => in_val,
out_re => out_fft_pipe_re_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0),
out_im => out_fft_pipe_im_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0),
out_val => int_val(I)
out_val => out_fft_pipe_val(I)
);
end generate;
......@@ -261,6 +263,8 @@ begin
-- PARALLEL FFT STAGE
---------------------------------------------------------------
in_fft_par <= out_fft_pipe_val(0);
-- Create input for parallel FFT
gen_inputs_for_par : for I in g_fft.wb_factor-1 downto 0 generate
in_fft_par_re_arr(I) <= resize_fft_svec(out_fft_pipe_re_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0));
......@@ -279,7 +283,7 @@ begin
rst => rst,
in_re_arr => in_fft_par_re_arr,
in_im_arr => in_fft_par_im_arr,
in_val => int_val(0),
in_val => in_fft_par,
out_re_arr => fft_out_re_arr,
out_im_arr => fft_out_im_arr,
out_val => fft_out_val
......@@ -320,14 +324,14 @@ begin
u_requantize_output_re : entity common_lib.common_requantize
generic map (
g_representation => "SIGNED",
g_lsb_w => c_out_scale_w,
g_lsb_w => c_out_scale_w + c_sepa_growth_w,
g_lsb_round => TRUE,
g_lsb_round_clip => FALSE,
g_msb_clip => FALSE,
g_msb_clip_symmetric => FALSE,
g_pipeline_remove_lsb => c_pipeline_remove_lsb,
g_pipeline_remove_msb => 0,
g_in_dat_w => g_fft.stage_dat_w,
g_in_dat_w => c_raw_dat_w,
g_out_dat_w => g_fft.out_dat_w
)
port map (
......@@ -340,14 +344,14 @@ begin
u_requantize_output_im : entity common_lib.common_requantize
generic map (
g_representation => "SIGNED",
g_lsb_w => c_out_scale_w,
g_lsb_w => c_out_scale_w + c_sepa_growth_w,
g_lsb_round => TRUE,
g_lsb_round_clip => FALSE,
g_msb_clip => FALSE,
g_msb_clip_symmetric => FALSE,
g_pipeline_remove_lsb => c_pipeline_remove_lsb,
g_pipeline_remove_msb => 0,
g_in_dat_w => g_fft.stage_dat_w,
g_in_dat_w => c_raw_dat_w,
g_out_dat_w => g_fft.out_dat_w
)
port map (
......
......@@ -51,9 +51,9 @@ entity fft_reorder_sepa_pipe is
port (
clk : in std_logic;
rst : in std_logic;
in_dat : in std_logic_vector;
in_dat : in std_logic_vector; -- c_dat_w
in_val : in std_logic;
out_dat : out std_logic_vector;
out_dat : out std_logic_vector; -- c_dat_w when g_separate = false, else c_dat_w + 2
out_val : out std_logic
);
end entity fft_reorder_sepa_pipe;
......@@ -323,9 +323,9 @@ begin
port map (
clk => clk,
rst => rst,
in_dat => out_dat_i,
in_dat => out_dat_i, -- c_dat_w
in_val => out_val_i,
out_dat => out_dat,
out_dat => out_dat, -- c_dat_w + 2
out_val => out_val
);
end generate;
......@@ -335,8 +335,8 @@ begin
-- the output signals are directly driven.
gen_no_separate : if g_separate=false generate
rd_adr <= TO_UVEC(r.count_up, c_adr_tot_w);
out_dat <= out_dat_i;
out_val <= out_val_i;
out_dat <= out_dat_i; -- c_dat_w
out_val <= out_val_i;
end generate;
end rtl;
......
......@@ -41,17 +41,9 @@
-- B.imag(m) = (X.real(N-m) - X.real(m))/2
--
-- Remarks:
-- . The add and sub output of the separate have 1 bit growth that needs to be
-- rounded. Simply skipping 1 LSbit is not suitable, because it yields
-- asymmetry around 0 and thus a DC offset. For example for N = 3-bit data:
-- x = -4 -3 -2 -1 0 1 2 3
-- round(x/2) = -2 -2 -1 -1 0 1 1 2 = common_round for signed
-- floor(x/2) = -2 -2 -1 -1 0 0 1 1 = truncation
-- The most negative value can be ignored:
-- x : mean(-3 -2 -1 0 1 2 3) = 0
-- . round(x/2) : mean(-2 -1 -1 0 1 1 2) = 0
-- . floor(x/2) : mean(-2 -1 -1 0 0 1 1) = -2/8 = -0.25 = -2^(N-1)/2 / 2^N
-- So the DC offset due to truncation is -0.25 LSbit, independent of N.
-- . The A, B outputs are scaled by factor 2 due to separate add and sub.
-- Therefore in_dat re, im have c_in_data_w bits and out_dat re, im have
-- c_out_data_w = c_in_data_w + 1 bits, to avoid overflow.
library IEEE, common_lib;
use IEEE.std_logic_1164.ALL;
......@@ -62,40 +54,40 @@ entity fft_sepa is
port (
clk : in std_logic;
rst : in std_logic;
in_dat : in std_logic_vector;
in_dat : in std_logic_vector; -- c_nof_complex * c_in_data_w
in_val : in std_logic;
out_dat : out std_logic_vector;
out_dat : out std_logic_vector; -- c_nof_complex * c_out_data_w = c_nof_complex * (c_in_data_w + 1)
out_val : out std_logic
);
end entity fft_sepa;
architecture rtl of fft_sepa is
constant c_sepa_round : boolean := true; -- must be true, because separate should round the 1 bit growth
constant c_data_w : natural := in_dat'length/c_nof_complex;
constant c_c_data_w : natural := c_nof_complex*c_data_w;
constant c_pipeline : natural := 3;
constant c_in_data_w : natural := in_dat'length / c_nof_complex;
constant c_in_complex_w : natural := c_nof_complex * c_in_data_w;
constant c_out_data_w : natural := c_in_data_w + 1;
constant c_out_complex_w : natural := c_nof_complex * c_out_data_w;
constant c_pipeline : natural := 3;
type reg_type is record
switch : std_logic; -- Register used to toggle between A & B definitionn
val_dly : std_logic_vector(c_pipeline-1 downto 0); -- Register that delays the incoming valid signal
xn_m_reg : std_logic_vector(c_c_data_w-1 downto 0); -- Register to hold the X(N-m) value for one cycle
xm_reg : std_logic_vector(c_c_data_w-1 downto 0); -- Register to hold the X(m) value for one cycle
add_reg_a : std_logic_vector(c_data_w-1 downto 0); -- Input register A for the adder
add_reg_b : std_logic_vector(c_data_w-1 downto 0); -- Input register B for the adder
sub_reg_a : std_logic_vector(c_data_w-1 downto 0); -- Input register A for the subtractor
sub_reg_b : std_logic_vector(c_data_w-1 downto 0); -- Input register B for the subtractor
out_dat : std_logic_vector(c_c_data_w-1 downto 0); -- Registered output value
out_val : std_logic; -- Registered data valid signal
type t_reg is record
switch : std_logic; -- Register used to toggle between A & B definitionn
val_dly : std_logic_vector(c_pipeline-1 downto 0); -- Register that delays the incoming valid signal
xn_m_reg : std_logic_vector(c_in_complex_w-1 downto 0); -- Register to hold the X(N-m) value for one cycle
xm_reg : std_logic_vector(c_in_complex_w-1 downto 0); -- Register to hold the X(m) value for one cycle
add_reg_a : std_logic_vector(c_in_data_w-1 downto 0); -- Input register A for the adder
add_reg_b : std_logic_vector(c_in_data_w-1 downto 0); -- Input register B for the adder
sub_reg_a : std_logic_vector(c_in_data_w-1 downto 0); -- Input register A for the subtractor
sub_reg_b : std_logic_vector(c_in_data_w-1 downto 0); -- Input register B for the subtractor
out_dat : std_logic_vector(c_out_complex_w-1 downto 0); -- Registered output value
out_val : std_logic; -- Registered data valid signal
end record;
constant c_reg_init : t_reg := ('0', (others=>'0'), (others=>'0'), (others=>'0'), (others=>'0'), (others=>'0'), (others=>'0'), (others=>'0'), (others=>'0'), '0');
signal r, rin : reg_type;
signal sub_result : std_logic_vector(c_data_w downto 0); -- Result of the subtractor
signal add_result : std_logic_vector(c_data_w downto 0); -- Result of the adder
signal sub_result_q : std_logic_vector(c_data_w-1 downto 0); -- Requantized result of the subtractor
signal add_result_q : std_logic_vector(c_data_w-1 downto 0); -- Requantized result of the adder
signal r : t_reg := c_reg_init;
signal rin : t_reg;
signal sub_result : std_logic_vector(c_out_data_w-1 downto 0); -- Result of the subtractor
signal add_result : std_logic_vector(c_out_data_w-1 downto 0); -- Result of the adder
begin
......@@ -108,8 +100,8 @@ begin
g_representation => "SIGNED",
g_pipeline_input => 0,
g_pipeline_output => 1,
g_in_dat_w => c_data_w,
g_out_dat_w => c_data_w + 1
g_in_dat_w => c_in_data_w,
g_out_dat_w => c_out_data_w -- = c_in_data_w + 1
)
port map (
clk => clk,
......@@ -124,8 +116,8 @@ begin
g_representation => "SIGNED",
g_pipeline_input => 0,
g_pipeline_output => 1,
g_in_dat_w => c_data_w,
g_out_dat_w => c_data_w + 1
g_in_dat_w => c_in_data_w,
g_out_dat_w => c_out_data_w -- = c_in_data_w + 1
)
port map (
clk => clk,
......@@ -134,52 +126,11 @@ begin
result => sub_result
);
gen_sepa_truncate : IF c_sepa_round=FALSE GENERATE
-- truncate the one LSbit
add_result_q <= add_result(c_data_w downto 1);
sub_result_q <= sub_result(c_data_w downto 1);
end generate;
gen_sepa_round : IF c_sepa_round=TRUE GENERATE
-- round the one LSbit
round_add : ENTITY common_lib.common_round
GENERIC MAP (
g_representation => "SIGNED", -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity)
g_round => TRUE, -- when TRUE round the input, else truncate the input
g_round_clip => FALSE, -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned)
g_pipeline_input => 0, -- >= 0
g_pipeline_output => 0, -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output
g_in_dat_w => c_data_w+1,
g_out_dat_w => c_data_w
)
PORT MAP (
clk => clk,
in_dat => add_result,
out_dat => add_result_q
);
round_sub : ENTITY common_lib.common_round
GENERIC MAP (
g_representation => "SIGNED", -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity)
g_round => TRUE, -- when TRUE round the input, else truncate the input
g_round_clip => FALSE, -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned)
g_pipeline_input => 0, -- >= 0
g_pipeline_output => 0, -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output
g_in_dat_w => c_data_w+1,
g_out_dat_w => c_data_w
)
PORT MAP (
clk => clk,
in_dat => sub_result,
out_dat => sub_result_q
);
end generate;
---------------------------------------------------------------
-- CONTROL PROCESS
---------------------------------------------------------------
comb : process(r, rst, in_val, in_dat, add_result_q, sub_result_q)
variable v : reg_type;
comb : process(r, rst, in_val, in_dat, add_result, sub_result)
variable v : t_reg;
begin
v := r;
......@@ -188,7 +139,7 @@ begin
v.val_dly(0) := in_val;
-- Composition of the output registers:
v.out_dat := sub_result_q & add_result_q;
v.out_dat := sub_result & add_result;
v.out_val := r.val_dly(c_pipeline-1);
-- Compose the inputs for the adder and subtractor
......@@ -196,16 +147,16 @@ begin
if in_val = '1' or r.val_dly(0) = '1' then
if r.switch = '0' then
v.xm_reg := in_dat;
v.add_reg_a := r.xm_reg(c_c_data_w-1 downto c_data_w); -- Xm imag
v.add_reg_b := r.xn_m_reg(c_c_data_w-1 downto c_data_w); -- Xn-m imag
v.sub_reg_a := r.xn_m_reg(c_data_w-1 downto 0); -- Xn-m real
v.sub_reg_b := r.xm_reg(c_data_w-1 downto 0); -- Xm real
v.add_reg_a := r.xm_reg(c_in_complex_w-1 downto c_in_data_w); -- Xm imag
v.add_reg_b := r.xn_m_reg(c_in_complex_w-1 downto c_in_data_w); -- Xn-m imag
v.sub_reg_a := r.xn_m_reg(c_in_data_w-1 downto 0); -- Xn-m real
v.sub_reg_b := r.xm_reg(c_in_data_w-1 downto 0); -- Xm real
else
v.xn_m_reg := in_dat;
v.add_reg_a := r.xm_reg(c_data_w-1 downto 0); -- Xm real
v.add_reg_b := in_dat(c_data_w-1 downto 0); -- Xn-m real
v.sub_reg_a := r.xm_reg(c_c_data_w-1 downto c_data_w); -- Xm imag
v.sub_reg_b := in_dat(c_c_data_w-1 downto c_data_w); -- Xn-m imag
v.add_reg_a := r.xm_reg(c_in_data_w-1 downto 0); -- Xm real
v.add_reg_b := in_dat(c_in_data_w-1 downto 0); -- Xn-m real
v.sub_reg_a := r.xm_reg(c_in_complex_w-1 downto c_in_data_w); -- Xm imag
v.sub_reg_b := in_dat(c_in_complex_w-1 downto c_in_data_w); -- Xn-m imag
end if;
end if;
......@@ -213,16 +164,10 @@ begin
v.switch := not r.switch;
end if;
if(rst = '1') then
if rst = '1' then
-- Only need to reset the control signals
v.switch := '0';
v.val_dly := (others => '0');
v.xn_m_reg := (others => '0');
v.xm_reg := (others => '0');
v.add_reg_a := (others => '0');
v.add_reg_b := (others => '0');
v.sub_reg_a := (others => '0');
v.sub_reg_b := (others => '0');
v.out_dat := (others => '0');
v.out_val := '0';
end if;
......
......@@ -67,11 +67,17 @@ architecture rtl of fft_sepa_wide is
constant c_page_size : natural := g_fft.nof_points/g_fft.wb_factor; -- Size of the memories
constant c_nof_pages : natural := 2; -- The number of pages in each ram.
constant c_dat_w : natural := c_nof_complex*g_fft.stage_dat_w; -- Data width for the internal vectors where real and imag are combined.
constant c_in_w : natural := g_fft.stage_dat_w;
constant c_dat_w : natural := c_nof_complex*c_in_w; -- Data width for the internal vectors where real and imag are combined.
constant c_adr_w : natural := ceil_log2(c_page_size); -- Address width of the rams
constant c_nof_streams : natural := 2; -- Number of inputstreams for the zip units
type t_dat_arr is array(integer range <> ) of std_logic_vector(c_dat_w-1 downto 0);
constant c_sepa_growth_w : natural := sel_a_b(g_fft.use_separate, 1, 0); -- add one bit for add sub growth in separate
constant c_out_w : natural := c_in_w + c_sepa_growth_w;
constant c_raw_dat_w : natural := c_nof_complex*c_out_w; -- = c_dat_w or c_dat_w + 2
type t_dat_arr is array(integer range <> ) of std_logic_vector(c_dat_w-1 downto 0);
type t_raw_dat_arr is array(integer range <> ) of std_logic_vector(c_raw_dat_w-1 downto 0);
type t_rd_adr_arr is array(integer range <> ) of std_logic_vector(c_adr_w-1 downto 0);
type t_zip_in_matrix is array(integer range <> ) of t_slv_64_arr(1 downto 0); -- Every Zip unit has two inputs.
......@@ -85,24 +91,27 @@ architecture rtl of fft_sepa_wide is
signal zip_in_matrix : t_zip_in_matrix(g_fft.wb_factor-1 downto 0); -- Matrix that contains the inputs for zip units
signal zip_in_val : std_logic_vector(g_fft.wb_factor-1 downto 0); -- Vector that holds the data input valids for the zip units
signal zip_out_dat_arr : t_dat_arr(g_fft.wb_factor-1 downto 0); -- Array that holds the outputs of all zip units.
signal zip_out_dat_arr : t_dat_arr(g_fft.wb_factor-1 downto 0); -- Array that holds the outputs of all zip units.
signal zip_out_val : std_logic_vector(g_fft.wb_factor-1 downto 0); -- Vector that holds the output valids of the zip units
signal sep_out_dat_arr : t_dat_arr(g_fft.wb_factor-1 downto 0); -- Array that holds the outputs of the separation blocks
signal sep_out_dat_arr : t_raw_dat_arr(g_fft.wb_factor-1 downto 0); -- Array that holds the outputs of the separation blocks
signal sep_out_val_vec : std_logic_vector(g_fft.wb_factor-1 downto 0); -- Vector containing the datavalids from the separation blocks
signal out_dat_arr : t_dat_arr(g_fft.wb_factor-1 downto 0); -- Array that holds the ouput values, where real and imag are concatenated
signal out_dat_arr : t_raw_dat_arr(g_fft.wb_factor-1 downto 0); -- Array that holds the ouput values, where real and imag are concatenated
type state_type is (s_idle, s_read);
type reg_type is record
type t_state is (s_idle, s_read);
type t_reg is record
switch : std_logic; -- Toggle register used for separate functionalilty
count_up : natural range 0 to c_page_size; -- An upwards counter for read addressing
count_down : natural range 0 to c_page_size; -- A downwards counter for read addressing
val_odd : std_logic; -- Register that drives the in_valid of the odd zip units
val_even : std_logic; -- Register that drives the in_valid of the even zip units
state : state_type; -- The state machine.
state : t_state; -- The state machine.
end record;
signal r, rin : reg_type;
constant c_reg_init : t_reg := ('0', 0, 0, '0', '0', s_idle);
signal r : t_reg := c_reg_init;
signal rin : t_reg;
begin
......@@ -111,7 +120,7 @@ begin
---------------------------------------------------------------
-- Prepare the data for the dual paged memory. Real and imaginary part are concatenated into one vector.
gen_prep_write_data : for I in 0 to g_fft.wb_factor-1 generate
wr_dat(I) <= in_im_arr(I)(g_fft.stage_dat_w-1 downto 0) & in_re_arr(I)(g_fft.stage_dat_w-1 downto 0);
wr_dat(I) <= in_im_arr(I)(c_in_w-1 downto 0) & in_re_arr(I)(c_in_w-1 downto 0);
end generate;
-- Prepare the write control signals for the memories.
......@@ -204,9 +213,9 @@ begin
port map (
clk => clk,
rst => rst,
in_dat => zip_out_dat_arr(I),
in_dat => zip_out_dat_arr(I), -- c_dat_w
in_val => zip_out_val(I),
out_dat => sep_out_dat_arr(I),
out_dat => sep_out_dat_arr(I), -- c_dat_w + 2
out_val => sep_out_val_vec(I)
);
end generate;
......@@ -218,13 +227,13 @@ begin
-- the fellow toggle signals. It also controls the starting and stopping
-- of the data stream.
comb : process(r, rst, next_page)
variable v : reg_type;
variable v : t_reg;
begin
v := r;
case r.state is
when s_idle =>
when s_idle =>
v.switch := '0';
v.val_odd := '0';
v.val_even := '0';
......@@ -234,7 +243,7 @@ begin
v.state := s_read;
end if;
when s_read =>
when s_read =>
if(r.switch = '0') then -- Toggle the switch register from 0 to 1
v.switch := '1';
end if;
......@@ -255,22 +264,17 @@ begin
v.val_odd := r.switch; -- Assignment of the odd and even markers
v.val_even := not(r.switch);
when others =>
v.state := s_idle;
when others =>
v.state := s_idle;
end case;
end case;
if(rst = '1') then
v.switch := '0';
v.count_up := 0;
v.count_down := 0;
v.val_odd := '0';
v.val_even := '0';
v.state := s_idle;
if rst = '1' then
v := c_reg_init;
end if;
rin <= v;
end process comb;
regs : process(clk)
......@@ -287,8 +291,8 @@ begin
u_output_pipeline_align : entity common_lib.common_pipeline
generic map (
g_pipeline => c_pipeline_output + 1, -- Pipeline + one stage for allignment
g_in_dat_w => c_dat_w,
g_out_dat_w => c_dat_w
g_in_dat_w => c_raw_dat_w,
g_out_dat_w => c_raw_dat_w
)
port map (
clk => clk,
......@@ -299,8 +303,8 @@ begin
u_output_pipeline : entity common_lib.common_pipeline
generic map (
g_pipeline => c_pipeline_output, -- Only pipeline stage
g_in_dat_w => c_dat_w,
g_out_dat_w => c_dat_w
g_in_dat_w => c_raw_dat_w,
g_out_dat_w => c_raw_dat_w
)
port map (
clk => clk,
......@@ -321,8 +325,8 @@ begin
-- Split the concatenated array into a real and imaginary array for the output
gen_output_arrays : for I in g_fft.wb_factor-1 downto 0 generate
out_re_arr(I) <= resize_fft_svec(out_dat_arr(I)( g_fft.stage_dat_w-1 downto 0));
out_im_arr(I) <= resize_fft_svec(out_dat_arr(I)(c_nof_complex*g_fft.stage_dat_w-1 downto g_fft.stage_dat_w));
out_re_arr(I) <= resize_fft_svec(out_dat_arr(I)( c_out_w-1 downto 0));
out_im_arr(I) <= resize_fft_svec(out_dat_arr(I)(c_nof_complex*c_out_w-1 downto c_out_w));
end generate;
end rtl;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment