diff --git a/libraries/dsp/doc/filterbank.txt b/libraries/dsp/doc/filterbank.txt
index b8750865ab39699854e03f026866a2025930cd9d..e7eedb9959bdc5b8d695bdaf298f412fb3bc4a7e 100644
--- a/libraries/dsp/doc/filterbank.txt
+++ b/libraries/dsp/doc/filterbank.txt
@@ -263,5 +263,18 @@ b) FFT
        . first start the tb simulation (run -a, or about 30 us)
        . then start the tc script
        The tc does not verify the output, so the tc PASSED statement is void.
+ 
+    $5) Two real separate rounding
+       The add and sub output of the separate have 1 bit growth that needs to be
+       rounded. Simply skipping 1 LSbit is not suitable, because it yields
+       asymmetry around 0 and thus a DC offset. For example for N = 3-bit data:
+                  x =  -4 -3 -2 -1  0  1  2  3
+         round(x/2) =  -2 -2 -1 -1  0  1  1  2  = common_round for signed
+         floor(x/2) =  -2 -2 -1 -1  0  0  1  1  = truncation
+       The most negative value can be ignored:
+                  x : mean(-3 -2 -1  0  1  2  3) = 0
+       . round(x/2) : mean(-2 -1 -1  0  1  1  2) = 0
+       . floor(x/2) : mean(-2 -1 -1  0  0  1  1) = -2/8 = -0.25 = -2^(N-1)/2 / 2^N
+       So the DC offset due to truncation is -0.25 LSbit, independent of N.
    
 c) 
\ No newline at end of file
diff --git a/libraries/dsp/fft/src/vhdl/fft_r2_par.vhd b/libraries/dsp/fft/src/vhdl/fft_r2_par.vhd
index 7cd67dd0720fbb84683125d87c3414adab423097..3f1927422398eee82f0e0d4065299cfaffe994ad 100644
--- a/libraries/dsp/fft/src/vhdl/fft_r2_par.vhd
+++ b/libraries/dsp/fft/src/vhdl/fft_r2_par.vhd
@@ -33,7 +33,9 @@
 --                        real stream (B) presented on the imaginary input. 
 --                        The separation unit outputs the spectrum of A and B in 
 --                        an alternating way: A(0), B(0), A(1), B(1).... etc
-          
+--                        The separate function adds and subtracts two complex bins. 
+--                        Therefore it causes 1 bit growth that needs to be rounded, as
+--                        explained in fft_sepa.vhd
 
 library ieee, common_lib, rTwoSDF_lib;
 use IEEE.std_logic_1164.all;
@@ -104,14 +106,14 @@ architecture str of fft_r2_par is
     v_nr_of_domains := nr_of_points/2**(stage+1);                     
     v_offset := 2**stage;             
     for I in 0 to v_nr_of_domains loop                                
-      if(array_index >= (2*I)*2**stage and array_index < (2*I+1)*2**stage) then      -- Detect if output is an even section
-        if((array_index mod 2) = 0) then                                             -- Check if input value is odd or even
+      if array_index >= (2*I)*2**stage and array_index < (2*I+1)*2**stage then       -- Detect if output is an even section
+        if (array_index mod 2) = 0 then                                              -- Check if input value is odd or even
           v_return := array_index;                                                   -- When even: value of element                                                 
         else                                                          
           v_return := array_index+v_offset-1;                                        -- When odd: value of element + offset
         end if;
-      elsif(array_index >= (2*I+1)*2**stage and array_index < (2*I+2)*2**stage) then
-        if((array_index mod 2) = 0) then                                                 -- Check if input value is odd or even
+      elsif array_index >= (2*I+1)*2**stage and array_index < (2*I+2)*2**stage then
+        if (array_index mod 2) = 0 then                                              -- Check if input value is odd or even
           v_return := array_index-v_offset+1;                                        -- When even: offset is subtracted from the element
         else                                                                                                       
           v_return := array_index;                                                   -- When odd: element stays the the same.       
@@ -121,6 +123,9 @@ architecture str of fft_r2_par is
     return v_return; 
   end;
   
+  constant c_pipeline_remove_lsb : natural := 1;
+  constant c_sepa_round          : boolean := true;  -- must be true, because separate should round the 1 bit growth
+  
   constant c_nof_stages         : natural := ceil_log2(g_fft.nof_points);  
   constant c_nof_bf_per_stage   : natural := g_fft.nof_points/2;  
   constant c_in_scale_w_tester  : integer := g_fft.stage_dat_w - g_fft.in_dat_w - sel_a_b(g_fft.guard_enable, g_fft.guard_w, 0);
@@ -140,11 +145,12 @@ architecture str of fft_r2_par is
   signal data_val         : t_val_arr;
   signal int_re_arr       : t_stage_dat_arr(g_fft.nof_points-1 downto 0);
   signal int_im_arr       : t_stage_dat_arr(g_fft.nof_points-1 downto 0);
-  signal pre_quant_re_arr : t_stage_dat_arr(g_fft.nof_points-1 downto 0);
-  signal pre_quant_im_arr : t_stage_dat_arr(g_fft.nof_points-1 downto 0);
+  signal fft_re_arr       : t_stage_dat_arr(g_fft.nof_points-1 downto 0);
+  signal fft_im_arr       : t_stage_dat_arr(g_fft.nof_points-1 downto 0);
   signal add_arr          : t_stage_sum_arr(g_fft.nof_points-1 downto 0);
   signal sub_arr          : t_stage_sum_arr(g_fft.nof_points-1 downto 0);
   signal int_val          : std_logic;
+  signal fft_val          : std_logic;
 
 begin
  
@@ -217,8 +223,12 @@ begin
   --------------------------------------------------------------------------------
   -- Optional separate 
   --------------------------------------------------------------------------------
-  gen_separate : if(g_fft.use_separate) generate
-    gen_reordering : for I in 1 to g_fft.nof_points/2 - 1 generate
+  gen_separate : if g_fft.use_separate generate
+    ---------------------------------------------------------------------------
+    -- Calulate the positive bins
+    ---------------------------------------------------------------------------
+    gen_positive_bins : for I in 1 to g_fft.nof_points/2 - 1 generate
+      -- common_add_sub
       a_output_real_adder : entity common_lib.common_add_sub
       generic map (
         g_direction       => "ADD",      
@@ -232,9 +242,8 @@ begin
         clk     => clk,
         in_a    => int_re_arr(g_fft.nof_points-I), 
         in_b    => int_re_arr(I), 
-        result  => add_arr(2*i)
+        result  => add_arr(2*I)
       );
-      pre_quant_re_arr(2*I) <= add_arr(2*i)(g_fft.stage_dat_w DOWNTO 1);
       
       b_output_real_adder : entity common_lib.common_add_sub
       generic map (
@@ -249,9 +258,8 @@ begin
         clk     => clk,
         in_a    => int_im_arr(g_fft.nof_points-I), 
         in_b    => int_im_arr(I), 
-        result  => add_arr(2*i+1)
+        result  => add_arr(2*I+1)
       );
-      pre_quant_re_arr(2*I+1) <= add_arr(2*i+1)(g_fft.stage_dat_w DOWNTO 1);
       
       a_output_imag_subtractor : entity common_lib.common_add_sub
       generic map (
@@ -266,9 +274,8 @@ begin
         clk     => clk,
         in_a    => int_im_arr(I), 
         in_b    => int_im_arr(g_fft.nof_points-I), 
-        result  => sub_arr(2*i)
+        result  => sub_arr(2*I)
       );
-      pre_quant_im_arr(2*I) <= sub_arr(2*i)(g_fft.stage_dat_w DOWNTO 1);
       
       b_output_imag_subtractor : entity common_lib.common_add_sub
       generic map (
@@ -283,12 +290,94 @@ begin
         clk     => clk,
         in_a    => int_re_arr(g_fft.nof_points-I), 
         in_b    => int_re_arr(I), 
-        result  => sub_arr(2*i+1)
+        result  => sub_arr(2*I+1)
       );
-      pre_quant_im_arr(2*I+1) <= sub_arr(2*i+1)(g_fft.stage_dat_w DOWNTO 1);
       
-    end generate;    
+      gen_sepa_truncate : IF c_sepa_round=false GENERATE
+        -- truncate the one LSbit
+        fft_re_arr(2*I  ) <= add_arr(2*I  )(g_fft.stage_dat_w DOWNTO 1);  -- A real
+        fft_re_arr(2*I+1) <= add_arr(2*I+1)(g_fft.stage_dat_w DOWNTO 1);  -- B real
+        fft_im_arr(2*I  ) <= sub_arr(2*I  )(g_fft.stage_dat_w DOWNTO 1);  -- A imag
+        fft_im_arr(2*I+1) <= sub_arr(2*I+1)(g_fft.stage_dat_w DOWNTO 1);  -- B imag
+      end generate;
+      
+      gen_sepa_round : IF c_sepa_round=true GENERATE
+        -- round the one LSbit
+        round_re_a : ENTITY common_lib.common_round
+        GENERIC MAP (
+          g_representation  => "SIGNED",  -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity)
+          g_round           => TRUE,      -- when TRUE round the input, else truncate the input
+          g_round_clip      => FALSE,     -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned)
+          g_pipeline_input  => 0,         -- >= 0
+          g_pipeline_output => 0,         -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output
+          g_in_dat_w        => g_fft.stage_dat_w+1,
+          g_out_dat_w       => g_fft.stage_dat_w
+        )
+        PORT MAP (
+          clk        => clk,
+          in_dat     => add_arr(2*I),
+          out_dat    => fft_re_arr(2*I)
+        );
+      
+        round_re_b : ENTITY common_lib.common_round
+        GENERIC MAP (
+          g_representation  => "SIGNED",  -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity)
+          g_round           => TRUE,      -- when TRUE round the input, else truncate the input
+          g_round_clip      => FALSE,     -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned)
+          g_pipeline_input  => 0,         -- >= 0
+          g_pipeline_output => 0,         -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output
+          g_in_dat_w        => g_fft.stage_dat_w+1,
+          g_out_dat_w       => g_fft.stage_dat_w
+        )
+        PORT MAP (
+          clk        => clk,
+          in_dat     => add_arr(2*I+1),
+          out_dat    => fft_re_arr(2*I+1)
+        );
+            
+        round_im_a : ENTITY common_lib.common_round
+        GENERIC MAP (
+          g_representation  => "SIGNED",  -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity)
+          g_round           => TRUE,      -- when TRUE round the input, else truncate the input
+          g_round_clip      => FALSE,     -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned)
+          g_pipeline_input  => 0,         -- >= 0
+          g_pipeline_output => 0,         -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output
+          g_in_dat_w        => g_fft.stage_dat_w+1,
+          g_out_dat_w       => g_fft.stage_dat_w
+        )
+        PORT MAP (
+          clk        => clk,
+          in_dat     => sub_arr(2*I),
+          out_dat    => fft_im_arr(2*I)
+        );
+      
+        round_im_b : ENTITY common_lib.common_round
+        GENERIC MAP (
+          g_representation  => "SIGNED",  -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity)
+          g_round           => TRUE,      -- when TRUE round the input, else truncate the input
+          g_round_clip      => FALSE,     -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned)
+          g_pipeline_input  => 0,         -- >= 0
+          g_pipeline_output => 0,         -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output
+          g_in_dat_w        => g_fft.stage_dat_w+1,
+          g_out_dat_w       => g_fft.stage_dat_w
+        )
+        PORT MAP (
+          clk        => clk,
+          in_dat     => sub_arr(2*I+1),
+          out_dat    => fft_im_arr(2*I+1)
+        );
+      end generate;
+    end generate;
 
+    ---------------------------------------------------------------------------
+    -- Generate bin 0 directly
+    ---------------------------------------------------------------------------
+    -- Index N=g_fft.nof_points wraps to index 0:
+    -- . fft_re_arr(0) = (int_re_arr(0) + int_re_arr(N)) / 2 = int_re_arr(0)
+    -- . fft_re_arr(1) = (int_im_arr(0) + int_im_arr(N)) / 2 = int_im_arr(0)
+    -- . fft_im_arr(0) = (int_im_arr(0) - int_im_arr(N)) / 2 = 0
+    -- . fft_im_arr(1) = (int_re_arr(0) - int_re_arr(N)) / 2 = 0
+    
     u_pipeline_a_re_0 : entity common_lib.common_pipeline
     generic map (
       g_pipeline  => g_pipeline.sep_lat,
@@ -298,7 +387,7 @@ begin
     port map (
       clk     => clk,
       in_dat  => int_re_arr(0),
-      out_dat => pre_quant_re_arr(0)
+      out_dat => fft_re_arr(0)
     );
     
     u_pipeline_b_re_0 : entity common_lib.common_pipeline
@@ -310,39 +399,39 @@ begin
     port map (
       clk     => clk,
       in_dat  => int_im_arr(0),
-      out_dat => pre_quant_re_arr(1)
+      out_dat => fft_re_arr(1)
     );
+    
     -- The imaginary outputs of A(0) and B(0) are always zero in case two real inputs are provided
-    pre_quant_im_arr(0) <= (others => '0');  
-    pre_quant_im_arr(1) <= (others => '0');  
+    fft_im_arr(0) <= (others=>'0');
+    fft_im_arr(1) <= (others=>'0');
 
     ------------------------------------------------------------------------------
-    -- Valid pipelining. The val signal must be pipelined for ser_lat cycles
-    -- to compensate for the pipelines atge of the adders and subtractors 
+    -- Valid pipelining for separate
     ------------------------------------------------------------------------------
-    u_val_ser_lat : entity common_lib.common_pipeline_sl
+    u_seperate_fft_val : entity common_lib.common_pipeline_sl
     generic map (
       g_pipeline => g_pipeline.sep_lat
     )
     port map (
       clk     => clk,
       in_dat  => int_val,
-      out_dat => out_val
+      out_dat => fft_val
     );     
   end generate;
 
-  no_separate : if(g_fft.use_separate=false) generate 
+  no_separate : if g_fft.use_separate=false generate 
     assign_outputs : for I in 0 to g_fft.nof_points-1 generate
-      pre_quant_re_arr(I) <= int_re_arr(I);    
-      pre_quant_im_arr(I) <= int_im_arr(I);  
+      fft_re_arr(I) <= int_re_arr(I);    
+      fft_im_arr(I) <= int_im_arr(I);  
     end generate;
-    out_val <= int_val;
+    fft_val <= int_val;
   end generate;  
  
   ------------------------------------------------------------------------------
   -- Parallel FFT output requantization
   ------------------------------------------------------------------------------
-  create_output_requantizers : for I in 0 to g_fft.nof_points-1 generate  
+  gen_output_requantizers : for I in 0 to g_fft.nof_points-1 generate  
     u_requantize_re : entity common_lib.common_requantize
     generic map (
       g_representation      => "SIGNED",      
@@ -352,15 +441,14 @@ begin
       g_msb_clip            => FALSE,            
       g_msb_clip_symmetric  => FALSE,  
       g_gain_w              => c_out_gain_w,
-      g_pipeline_remove_lsb => 0, 
+      g_pipeline_remove_lsb => c_pipeline_remove_lsb, 
       g_pipeline_remove_msb => 0, 
       g_in_dat_w            => g_fft.stage_dat_w,            
       g_out_dat_w           => g_fft.out_dat_w
     )
     port map (
       clk        => clk,
-      clken      => '1',
-      in_dat     => pre_quant_re_arr(I),
+      in_dat     => fft_re_arr(I),
       out_dat    => out_re_arr(I), 
       out_ovr    => open
     );       
@@ -374,18 +462,28 @@ begin
       g_msb_clip            => FALSE,            
       g_msb_clip_symmetric  => FALSE,  
       g_gain_w              => c_out_gain_w,
-      g_pipeline_remove_lsb => 0, 
+      g_pipeline_remove_lsb => c_pipeline_remove_lsb, 
       g_pipeline_remove_msb => 0, 
       g_in_dat_w            => g_fft.stage_dat_w,            
       g_out_dat_w           => g_fft.out_dat_w
     )
     port map (
       clk        => clk,
-      clken      => '1',
-      in_dat     => pre_quant_im_arr(I),
+      in_dat     => fft_im_arr(I),
       out_dat    => out_im_arr(I), 
       out_ovr    => open
     );
+    
+    u_out_val : entity common_lib.common_pipeline_sl
+    generic map (
+      g_pipeline => c_pipeline_remove_lsb
+    )
+    port map (
+      rst     => rst,
+      clk     => clk,
+      in_dat  => fft_val,
+      out_dat => out_val
+    );
   end generate; 
   
 end str;
diff --git a/libraries/dsp/fft/src/vhdl/fft_r2_pipe.vhd b/libraries/dsp/fft/src/vhdl/fft_r2_pipe.vhd
index 19ab52a6ae0109263bc94c388110fbf98307c574..e3be369fd9fdc7127c32f65905dff8c4b5c21a43 100644
--- a/libraries/dsp/fft/src/vhdl/fft_r2_pipe.vhd
+++ b/libraries/dsp/fft/src/vhdl/fft_r2_pipe.vhd
@@ -76,6 +76,8 @@ end entity fft_r2_pipe;
 
 architecture str of fft_r2_pipe is
 
+  constant c_pipeline_remove_lsb : natural := 0;
+  
   constant c_nof_stages         : natural := ceil_log2(g_fft.nof_points);
   constant c_stage_offset       : natural := true_log2(g_fft.wb_factor);                         -- Stage offset is required for twiddle generation in wideband fft
   constant c_in_scale_w         : natural := g_fft.stage_dat_w - g_fft.in_dat_w - sel_a_b(g_fft.guard_enable, g_fft.guard_w, 0);              
@@ -176,14 +178,13 @@ begin
     g_msb_clip            => FALSE,            
     g_msb_clip_symmetric  => FALSE,  
     g_gain_w              => c_out_gain_w,
-    g_pipeline_remove_lsb => 0, 
+    g_pipeline_remove_lsb => c_pipeline_remove_lsb, 
     g_pipeline_remove_msb => 0, 
     g_in_dat_w            => g_fft.stage_dat_w,            
     g_out_dat_w           => g_fft.out_dat_w
   )
   port map (
     clk        => clk,
-    clken      => '1',
     in_dat     => raw_out_re,
     out_dat    => out_re, 
     out_ovr    => open
@@ -198,21 +199,29 @@ begin
     g_msb_clip            => FALSE,            
     g_msb_clip_symmetric  => FALSE,  
     g_gain_w              => c_out_gain_w,
-    g_pipeline_remove_lsb => 0, 
+    g_pipeline_remove_lsb => c_pipeline_remove_lsb, 
     g_pipeline_remove_msb => 0, 
     g_in_dat_w            => g_fft.stage_dat_w,            
     g_out_dat_w           => g_fft.out_dat_w   
   )
   port map (
     clk        => clk,
-    clken      => '1',
     in_dat     => raw_out_im,
     out_dat    => out_im, 
     out_ovr    => open
   );
   
   -- Valid Output
-  out_val <= raw_out_val;
+  u_out_val : entity common_lib.common_pipeline_sl
+  generic map (
+    g_pipeline => c_pipeline_remove_lsb
+  )
+  port map (
+    rst     => rst,
+    clk     => clk,
+    in_dat  => raw_out_val,
+    out_dat => out_val
+  );
   
 end str;
 
diff --git a/libraries/dsp/fft/src/vhdl/fft_r2_wide.vhd b/libraries/dsp/fft/src/vhdl/fft_r2_wide.vhd
index 72ecd6d70d2d4f7ce02642f84c317eb684adbbd2..7385f24d9404342cceaa1feaa5e574973e2b17f9 100644
--- a/libraries/dsp/fft/src/vhdl/fft_r2_wide.vhd
+++ b/libraries/dsp/fft/src/vhdl/fft_r2_wide.vhd
@@ -50,15 +50,14 @@
 --    fft_shift() only applies to spectra for complex input.
 --
 -- Remarks:
--- . This fft_r2_wide does not (yet) support wb_factor = 1 (= only a
---   fft_r2_pipe instance) or wb_factor = g_fft.nof_points (= only a
---   fft_r2_par instance). Fixing this is nice to have, but not essential.
---   Care must be taken to properly account for guard_w and out_gain_w.
---   Therefore probably it is most clear to use a structural approach that
---   would generate seperate instances for each case:
---   . wb_factor = 1
---   . wb_factor > 1 AND wb_factor < g_fft.nof_points
---   . wb_factor = g_fft.nof_points.
+-- . This fft_r2_wide also support wb_factor = 1 (= only a fft_r2_pipe
+--   instance) or wb_factor = g_fft.nof_points (= only a fft_r2_par instance).
+--   Care must be taken to properly account for guard_w and out_gain_w,
+--   therefore it is best to simply use a structural approach that generates
+--   seperate instances for each case:
+--   . wb_factor = 1                                  --> pipe
+--   . wb_factor > 1 AND wb_factor < g_fft.nof_points --> wide
+--   . wb_factor = g_fft.nof_points                   --> par
 -- . This fft_r2_wide uses the use_reorder in the pipeline FFT, in the parallel
 --   FFT and also has reorder memory in the fft_sepa_wide instance. The reorder
 --   memories in the FFTs can maybe be saved by using only the reorder memory
@@ -144,6 +143,8 @@ architecture rtl of fft_r2_wide is
     return v_return; 
   end;
   
+  constant c_pipeline_remove_lsb : natural := 0;
+  
   constant c_fft_r2_pipe_arr  : t_fft_arr(g_fft.wb_factor-1 downto 0) := func_create_generic_for_pipe_fft(g_fft); 
   constant c_fft_r2_par       : t_fft                                 := func_create_generic_for_par_fft(g_fft);
   
@@ -162,6 +163,9 @@ architecture rtl of fft_r2_wide is
   signal in_fft_par_re_arr    : t_fft_slv_arr(g_fft.wb_factor-1 downto 0);
   signal in_fft_par_im_arr    : t_fft_slv_arr(g_fft.wb_factor-1 downto 0);
 
+  signal fft_pipe_out_re      : std_logic_vector(g_fft.out_dat_w-1 downto 0);
+  signal fft_pipe_out_im      : std_logic_vector(g_fft.out_dat_w-1 downto 0);
+  
   signal fft_out_re_arr       : t_fft_slv_arr(g_fft.wb_factor-1 downto 0);
   signal fft_out_im_arr       : t_fft_slv_arr(g_fft.wb_factor-1 downto 0);
   signal fft_out_val          : std_logic;                                   
@@ -170,63 +174,106 @@ architecture rtl of fft_r2_wide is
   signal sep_out_im_arr       : t_fft_slv_arr(g_fft.wb_factor-1 downto 0);   
   signal sep_out_val          : std_logic;                                   
 
-  signal int_val              : std_logic_vector(g_fft.wb_factor-1 downto 0); 
+  signal int_val              : std_logic_vector(g_fft.wb_factor-1 downto 0);
   
   signal out_cplx             : std_logic_vector(c_nof_complex*g_fft.stage_dat_w-1 downto 0);
   signal in_cplx              : std_logic_vector(c_nof_complex*g_fft.stage_dat_w-1 downto 0);
 
 begin
+
+  -- Default to fft_r2_pipe when g_fft.wb_factor=1
+  gen_fft_r2_pipe : if g_fft.wb_factor=1 generate
+    u_fft_r2_pipe : entity work.fft_r2_pipe
+    generic map (
+      g_fft      => g_fft,
+      g_pipeline => g_pft_pipeline
+    )
+    port map (
+      clk        => clk,
+      rst        => rst,
+      in_re      => in_re_arr(0)(g_fft.in_dat_w-1 downto 0),
+      in_im      => in_im_arr(0)(g_fft.in_dat_w-1 downto 0),
+      in_val     => in_val,
+      out_re     => fft_pipe_out_re,
+      out_im     => fft_pipe_out_im,
+      out_val    => out_val
+    );
+    
+    out_re_arr(0) <= resize_fft_svec(fft_pipe_out_re);
+    out_im_arr(0) <= resize_fft_svec(fft_pipe_out_im);
+  end generate;
   
-  ------------------------------------------------------------------------------
-  -- Inputs are prepared/scaled for the pipelined ffts
-  ------------------------------------------------------------------------------
-  gen_get_the_inputs : for I in 0 to g_fft.wb_factor-1 generate
-    in_fft_pipe_re_arr(I) <= scale_and_resize_svec(in_re_arr(I), c_in_scale_w, c_fft_slv_w);
-    in_fft_pipe_im_arr(I) <= scale_and_resize_svec(in_im_arr(I), c_in_scale_w, c_fft_slv_w);
-  end generate;   
-  
-  ---------------------------------------------------------------
-  -- PIPELINED FFT STAGE
-  ---------------------------------------------------------------
-  -- The first stage of the wideband fft consist of the generation of "wb_factor"
-  -- pipelined fft's. These pipelines fft's operate in parallel.   
-  gen_pipelined_ffts : for I in g_fft.wb_factor-1 downto 0 generate
-    u_pft : entity work.fft_r2_pipe
-    generic map(
-      g_fft      => c_fft_r2_pipe_arr(I),   -- generics for the pipelined FFTs
-      g_pipeline => g_pft_pipeline          -- pipeline generics for the pipelined FFTs
+  -- Default to fft_r2_par when g_fft.wb_factor=g_fft.nof_points
+  gen_fft_r2_par : if g_fft.wb_factor=g_fft.nof_points generate
+    u_fft_r2_par : entity work.fft_r2_par
+    generic map (
+      g_fft      => g_fft,
+      g_pipeline => g_fft_pipeline
     )
-    port map(
-      clk       => clk,
-      rst       => rst,
-      in_re     => in_fft_pipe_re_arr(I)(c_fft_r2_pipe_arr(I).in_dat_w-1 downto 0),
-      in_im     => in_fft_pipe_im_arr(I)(c_fft_r2_pipe_arr(I).in_dat_w-1 downto 0),
-      in_val    => in_val,
-      out_re    => out_fft_pipe_re_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0),
-      out_im    => out_fft_pipe_im_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0),
-      out_val   => int_val(I)
-    );     
-  end generate;       
-   
-  create_par_fft : if (g_fft.wb_factor > 1) generate    
-    -- Create input for paralle FFT.
-    gen_inputs_for_par : for I in g_fft.wb_factor-1 downto 0 generate
-      -- Perform the 1 bit scaling here befor entering the parallel FFT:
-      in_fft_par_re_arr(I) <= RESIZE_SVEC(out_fft_pipe_re_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0), c_fft_slv_w);  
-      in_fft_par_im_arr(I) <= RESIZE_SVEC(out_fft_pipe_im_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0), c_fft_slv_w);
-    end generate;
+    port map (
+      clk        => clk,
+      rst        => rst,
+      in_re_arr  => in_re_arr,
+      in_im_arr  => in_im_arr,
+      in_val     => in_val,
+      out_re_arr => out_re_arr,
+      out_im_arr => out_im_arr,
+      out_val    => out_val
+    );
+  end generate;
+  
+  -- Create wideband FFT as combinination of g_fft.wb_factor instances of fft_r2_pipe with one instance of fft_r2_par
+  gen_fft_r2_wide : if g_fft.wb_factor>1 and g_fft.wb_factor<g_fft.nof_points generate
   
+    ---------------------------------------------------------------
+    -- PIPELINED FFT STAGE
+    ---------------------------------------------------------------
+
+    -- Inputs are prepared/scaled for the pipelined ffts
+    gen_fft_pipe_inputs : for I in 0 to g_fft.wb_factor-1 generate
+      in_fft_pipe_re_arr(I) <= scale_and_resize_svec(in_re_arr(I), c_in_scale_w, c_fft_slv_w);
+      in_fft_pipe_im_arr(I) <= scale_and_resize_svec(in_im_arr(I), c_in_scale_w, c_fft_slv_w);
+    end generate;   
+    
+    -- The first stage of the wideband fft consist of the generation of g_fft.wb_factor
+    -- pipelined fft's. These pipelines fft's operate in parallel.   
+    gen_pipelined_ffts : for I in g_fft.wb_factor-1 downto 0 generate
+      u_pft : entity work.fft_r2_pipe
+      generic map (
+        g_fft      => c_fft_r2_pipe_arr(I),   -- generics for the pipelined FFTs
+        g_pipeline => g_pft_pipeline          -- pipeline generics for the pipelined FFTs
+      )
+      port map (
+        clk       => clk,
+        rst       => rst,
+        in_re     => in_fft_pipe_re_arr(I)(c_fft_r2_pipe_arr(I).in_dat_w-1 downto 0),
+        in_im     => in_fft_pipe_im_arr(I)(c_fft_r2_pipe_arr(I).in_dat_w-1 downto 0),
+        in_val    => in_val,
+        out_re    => out_fft_pipe_re_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0),
+        out_im    => out_fft_pipe_im_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0),
+        out_val   => int_val(I)
+      );     
+    end generate;       
+    
+    
     ---------------------------------------------------------------
     -- PARALLEL FFT STAGE
     ---------------------------------------------------------------
-    -- The "wb_factor" outputs of the pipelined fft's are offered
+
+    -- Create input for parallel FFT
+    gen_inputs_for_par : for I in g_fft.wb_factor-1 downto 0 generate
+      in_fft_par_re_arr(I) <= resize_fft_svec(out_fft_pipe_re_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0));
+      in_fft_par_im_arr(I) <= resize_fft_svec(out_fft_pipe_im_arr(I)(c_fft_r2_pipe_arr(I).out_dat_w-1 downto 0));
+    end generate;
+  
+    -- The g_fft.wb_factor outputs of the pipelined fft's are offered
     -- to the input of a single parallel FFT. 
     u_fft : entity work.fft_r2_par
-    generic map(
+    generic map (
       g_fft      => c_fft_r2_par,           -- generics for the FFT
       g_pipeline => g_fft_pipeline          -- pipeline generics for the parallel FFT
     )
-    port map(
+    port map (
       clk        => clk,
       rst        => rst,
       in_re_arr  => in_fft_par_re_arr,
@@ -236,27 +283,18 @@ begin
       out_im_arr => fft_out_im_arr,
       out_val    => fft_out_val
     );
-  end generate;                                         
-  
-  -- When wb_factor = 1 the parallel FFT can be skipped. 
-  bypass_par_fft : if (g_fft.wb_factor = 1) generate    
-    fft_out_re_arr(0) <= RESIZE_SVEC(out_fft_pipe_re_arr(0)(c_fft_r2_pipe_arr(0).out_dat_w-1 downto 0), c_fft_slv_w);
-    fft_out_im_arr(0) <= RESIZE_SVEC(out_fft_pipe_im_arr(0)(c_fft_r2_pipe_arr(0).out_dat_w-1 downto 0), c_fft_slv_w);
-    fft_out_val       <= int_val(0);            
-  end generate;
-
-  ---------------------------------------------------------------
-  -- OPTIONAL: SEPARATION STAGE
-  ---------------------------------------------------------------
-  -- When the separate functionality is required:
-  gen_separate : if(g_fft.use_separate) generate 
-    use_wideband_separator : if (g_fft.wb_factor > 1) generate    
+    
+    ---------------------------------------------------------------
+    -- OPTIONAL: SEPARATION STAGE
+    ---------------------------------------------------------------
+    -- When the separate functionality is required:
+    gen_separate : if g_fft.use_separate generate 
       u_separator : entity work.fft_sepa_wide
-      generic map(
+      generic map (
         g_fft      => g_fft,
         g_pipeline => g_fft_pipeline.sep_lat 
       )
-      port map(
+      port map (
         clk        => clk,
         rst        => rst,
         in_re_arr  => fft_out_re_arr, 
@@ -268,86 +306,70 @@ begin
       );
     end generate;
     
-    -- Use different separation implementation for wb_factor = 1 
-    use_single_channel_separator : if (g_fft.wb_factor = 1) generate        
-      in_cplx <= fft_out_im_arr(0)(g_fft.stage_dat_w-1 downto 0) & fft_out_re_arr(0)(g_fft.stage_dat_w-1 downto 0);
-  
-      u_reorder_sep : entity work.fft_reorder_sepa_pipe
+     -- In case no separtion is required, the output of the parallel fft is used. 
+    no_separate : if g_fft.use_separate=false generate 
+      sep_out_re_arr <= fft_out_re_arr;    
+      sep_out_im_arr <= fft_out_im_arr;    
+      sep_out_val    <= fft_out_val;
+    end generate;  
+    
+    ---------------------------------------------------------------
+    -- OUTPUT QUANTIZER
+    ---------------------------------------------------------------
+    gen_output_requantizers : for I in g_fft.wb_factor-1 downto 0 generate
+      u_requantize_output_re : entity common_lib.common_requantize
       generic map (
-        g_bit_flip    => FALSE,              -- Reordering is done in the pipelined FFT already. 
-        g_separate    => g_fft.use_separate,
-        g_nof_points  => g_fft.nof_points
+        g_representation      => "SIGNED",      
+        g_lsb_w               => c_out_scale_w,               
+        g_lsb_round           => TRUE,           
+        g_lsb_round_clip      => FALSE,      
+        g_msb_clip            => FALSE,            
+        g_msb_clip_symmetric  => FALSE,  
+        g_gain_w              => c_out_gain_w,
+        g_pipeline_remove_lsb => c_pipeline_remove_lsb, 
+        g_pipeline_remove_msb => 0, 
+        g_in_dat_w            => g_fft.stage_dat_w,            
+        g_out_dat_w           => g_fft.out_dat_w
       )
       port map (
-        clk     => clk,
-        rst     => rst,
-        in_dat  => in_cplx,
-        in_val  => fft_out_val,
-        out_dat => out_cplx,
-        out_val => sep_out_val
-      );
-  
-      sep_out_re_arr(0) <= RESIZE_SVEC(out_cplx(  g_fft.stage_dat_w-1 downto                 0), c_fft_slv_w);
-      sep_out_im_arr(0) <= RESIZE_SVEC(out_cplx(2*g_fft.stage_dat_w-1 downto g_fft.stage_dat_w), c_fft_slv_w);
-    end generate;  
-  end generate;
-  
-   -- In case no separtion is required, the output of the parallel fft is used. 
-  no_separate : if(g_fft.use_separate=false) generate 
-    sep_out_re_arr <= fft_out_re_arr;    
-    sep_out_im_arr <= fft_out_im_arr;    
-    sep_out_val    <= fft_out_val;
-  end generate;  
+        clk        => clk,
+        in_dat     => sep_out_re_arr(I),
+        out_dat    => out_re_arr(I), 
+        out_ovr    => open
+      );  
   
-  ---------------------------------------------------------------
-  -- OUTPUT QUANTIZER
-  ---------------------------------------------------------------
-  gen_output_requantizers : for I in g_fft.wb_factor-1 downto 0 generate
-    u_requantize_output_re : entity common_lib.common_requantize
-    generic map (
-      g_representation      => "SIGNED",      
-      g_lsb_w               => c_out_scale_w,               
-      g_lsb_round           => TRUE,           
-      g_lsb_round_clip      => FALSE,      
-      g_msb_clip            => FALSE,            
-      g_msb_clip_symmetric  => FALSE,  
-      g_gain_w              => c_out_gain_w,
-      g_pipeline_remove_lsb => 0, 
-      g_pipeline_remove_msb => 0, 
-      g_in_dat_w            => g_fft.stage_dat_w,            
-      g_out_dat_w           => g_fft.out_dat_w
-    )
-    port map (
-      clk        => clk,
-      clken      => '1',
-      in_dat     => sep_out_re_arr(I),
-      out_dat    => out_re_arr(I), 
-      out_ovr    => open
-    );  
+      u_requantize_output_im : entity common_lib.common_requantize
+      generic map (
+        g_representation      => "SIGNED",      
+        g_lsb_w               => c_out_scale_w,               
+        g_lsb_round           => TRUE,           
+        g_lsb_round_clip      => FALSE,      
+        g_msb_clip            => FALSE,            
+        g_msb_clip_symmetric  => FALSE,  
+        g_gain_w              => c_out_gain_w,
+        g_pipeline_remove_lsb => c_pipeline_remove_lsb, 
+        g_pipeline_remove_msb => 0, 
+        g_in_dat_w            => g_fft.stage_dat_w,            
+        g_out_dat_w           => g_fft.out_dat_w
+      )
+      port map (
+        clk        => clk,
+        in_dat     => sep_out_im_arr(I),
+        out_dat    => out_im_arr(I), 
+        out_ovr    => open
+      );  
+    end generate;     
 
-    u_requantize_output_im : entity common_lib.common_requantize
+    u_out_val : entity common_lib.common_pipeline_sl
     generic map (
-      g_representation      => "SIGNED",      
-      g_lsb_w               => c_out_scale_w,               
-      g_lsb_round           => TRUE,           
-      g_lsb_round_clip      => FALSE,      
-      g_msb_clip            => FALSE,            
-      g_msb_clip_symmetric  => FALSE,  
-      g_gain_w              => c_out_gain_w,
-      g_pipeline_remove_lsb => 0, 
-      g_pipeline_remove_msb => 0, 
-      g_in_dat_w            => g_fft.stage_dat_w,            
-      g_out_dat_w           => g_fft.out_dat_w
+      g_pipeline => c_pipeline_remove_lsb
     )
     port map (
-      clk        => clk,
-      clken      => '1',
-      in_dat     => sep_out_im_arr(I),
-      out_dat    => out_im_arr(I), 
-      out_ovr    => open
-    );  
-  end generate;     
-  
-  out_val <= sep_out_val;
-  
+      rst     => rst,
+      clk     => clk,
+      in_dat  => sep_out_val,
+      out_dat => out_val
+    );
+    
+  end generate;  
 end rtl;
diff --git a/libraries/dsp/fft/src/vhdl/fft_sepa.vhd b/libraries/dsp/fft/src/vhdl/fft_sepa.vhd
index b8dccb90dde79ab5c94b01e22514163e359bce83..5bf2423a8595c65f9e3218c48f4ab6fc01906049 100644
--- a/libraries/dsp/fft/src/vhdl/fft_sepa.vhd
+++ b/libraries/dsp/fft/src/vhdl/fft_sepa.vhd
@@ -40,6 +40,18 @@
 --              B.real(m) = (X.imag(m)   + X.imag(N-m))/2
 --              B.imag(m) = (X.real(N-m) - X.real(m))/2
 --
+-- Remarks:
+-- . The add and sub output of the separate have 1 bit growth that needs to be
+--   rounded. Simply skipping 1 LSbit is not suitable, because it yields
+--   asymmetry around 0 and thus a DC offset. For example for N = 3-bit data:
+--              x =  -4 -3 -2 -1  0  1  2  3
+--     round(x/2) =  -2 -2 -1 -1  0  1  1  2  = common_round for signed
+--     floor(x/2) =  -2 -2 -1 -1  0  0  1  1  = truncation
+--   The most negative value can be ignored:
+--              x : mean(-3 -2 -1  0  1  2  3) = 0
+--   . round(x/2) : mean(-2 -1 -1  0  1  1  2) = 0
+--   . floor(x/2) : mean(-2 -1 -1  0  0  1  1) = -2/8 = -0.25 = -2^(N-1)/2 / 2^N
+--   So the DC offset due to truncation is -0.25 LSbit, independent of N.
 
 library IEEE, common_lib;
 use IEEE.std_logic_1164.ALL;
@@ -59,6 +71,8 @@ end entity fft_sepa;
 
 architecture rtl of fft_sepa is                
   
+  constant c_sepa_round  : boolean := true;  -- must be true, because separate should round the 1 bit growth
+  
   constant c_data_w   : natural := in_dat'length/c_nof_complex;  
   constant c_c_data_w : natural := c_nof_complex*c_data_w;
   constant c_pipeline : natural := 3;
@@ -80,6 +94,9 @@ architecture rtl of fft_sepa is
   signal sub_result : std_logic_vector(c_data_w downto 0); -- Result of the subtractor   
   signal add_result : std_logic_vector(c_data_w downto 0); -- Result of the adder   
   
+  signal sub_result_q : std_logic_vector(c_data_w-1 downto 0); -- Requantized result of the subtractor   
+  signal add_result_q : std_logic_vector(c_data_w-1 downto 0); -- Requantized result of the adder
+  
 begin
 
   ---------------------------------------------------------------
@@ -117,10 +134,51 @@ begin
     result  => sub_result
   );
 
+  gen_sepa_truncate : IF c_sepa_round=FALSE GENERATE
+    -- truncate the one LSbit
+    add_result_q <= add_result(c_data_w downto 1);
+    sub_result_q <= sub_result(c_data_w downto 1);
+  end generate;
+    
+  gen_sepa_round : IF c_sepa_round=TRUE GENERATE
+    -- round the one LSbit
+    round_add : ENTITY common_lib.common_round
+    GENERIC MAP (
+      g_representation  => "SIGNED",  -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity)
+      g_round           => TRUE,      -- when TRUE round the input, else truncate the input
+      g_round_clip      => FALSE,     -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned)
+      g_pipeline_input  => 0,         -- >= 0
+      g_pipeline_output => 0,         -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output
+      g_in_dat_w        => c_data_w+1,
+      g_out_dat_w       => c_data_w
+    )
+    PORT MAP (
+      clk        => clk,
+      in_dat     => add_result,
+      out_dat    => add_result_q
+    );
+  
+    round_sub : ENTITY common_lib.common_round
+    GENERIC MAP (
+      g_representation  => "SIGNED",  -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity)
+      g_round           => TRUE,      -- when TRUE round the input, else truncate the input
+      g_round_clip      => FALSE,     -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned)
+      g_pipeline_input  => 0,         -- >= 0
+      g_pipeline_output => 0,         -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output
+      g_in_dat_w        => c_data_w+1,
+      g_out_dat_w       => c_data_w
+    )
+    PORT MAP (
+      clk        => clk,
+      in_dat     => sub_result,
+      out_dat    => sub_result_q
+    );
+  end generate;
+
   ---------------------------------------------------------------
   -- CONTROL PROCESS
   ---------------------------------------------------------------
-  comb : process(r, rst, in_val, in_dat, add_result, sub_result)
+  comb : process(r, rst, in_val, in_dat, add_result_q, sub_result_q)
     variable v : reg_type;
   begin
     v := r; 
@@ -130,7 +188,7 @@ begin
     v.val_dly(0) := in_val;
     
     -- Composition of the output registers:
-    v.out_dat := sub_result(c_data_w downto 1) & add_result(c_data_w downto 1);
+    v.out_dat := sub_result_q & add_result_q;
     v.out_val := r.val_dly(c_pipeline-1);
     
     -- Compose the inputs for the adder and subtractor
diff --git a/libraries/dsp/fft/src/vhdl/fft_sepa_wide.vhd b/libraries/dsp/fft/src/vhdl/fft_sepa_wide.vhd
index 5feeb2e259def0f993157014d348a1ec9993e693..5775854e1b2c2bcef95f6520d8d16f10bb99e119 100644
--- a/libraries/dsp/fft/src/vhdl/fft_sepa_wide.vhd
+++ b/libraries/dsp/fft/src/vhdl/fft_sepa_wide.vhd
@@ -320,8 +320,8 @@ begin
 
   -- Split the concatenated array into a real and imaginary array for the output
   gen_output_arrays : for I in g_fft.wb_factor-1 downto 0 generate
-    out_re_arr(I) <= RESIZE_SVEC(out_dat_arr(I)(              g_fft.stage_dat_w-1 downto                 0), c_fft_slv_w);
-    out_im_arr(I) <= RESIZE_SVEC(out_dat_arr(I)(c_nof_complex*g_fft.stage_dat_w-1 downto g_fft.stage_dat_w), c_fft_slv_w);
+    out_re_arr(I) <= resize_fft_svec(out_dat_arr(I)(              g_fft.stage_dat_w-1 downto                 0));
+    out_im_arr(I) <= resize_fft_svec(out_dat_arr(I)(c_nof_complex*g_fft.stage_dat_w-1 downto g_fft.stage_dat_w));
   end generate; 
 
 end rtl;
diff --git a/libraries/dsp/fft/tb/vhdl/tb_fft_r2_wide.vhd b/libraries/dsp/fft/tb/vhdl/tb_fft_r2_wide.vhd
index d30e4577cdd352e359b1746508925e79d92de5a7..1c814cfd6f0b3b69c417ac3a712ea71b552c1dc1 100644
--- a/libraries/dsp/fft/tb/vhdl/tb_fft_r2_wide.vhd
+++ b/libraries/dsp/fft/tb/vhdl/tb_fft_r2_wide.vhd
@@ -152,7 +152,7 @@ architecture tb of tb_fft_r2_wide is
 
   constant c_rnd_factor            : natural := sel_a_b(g_enable_in_val_gaps, 3, 1);
   constant c_dut_block_latency     : natural := 4;
-  constant c_dut_clk_latency       : natural := c_nof_valid_per_block * c_dut_block_latency * c_rnd_factor;  -- worst case
+  constant c_dut_clk_latency       : natural := c_nof_valid_per_block * c_dut_block_latency * c_rnd_factor + 50;  -- worst case
 
   -- input/output data width
   constant c_in_dat_w              : natural := g_fft.in_dat_w;   
@@ -356,13 +356,19 @@ begin
     -- Wait until tb_end_almost
     proc_common_wait_until_high(clk, tb_end_almost);
     assert in_val_cnt > 0 report "Test did not run, no valid input data"  severity error;
-    -- The PFFT has a memory of 1 block, independent of use_reorder and use_separate, but without the
-    -- reorder buffer it outputs 1 sample more, because that is immediately available in a new block.
-    -- Ensure g_data_file_nof_lines is multiple of g_fft.nof_points.
-    if g_fft.use_reorder=true then
-      assert out_val_cnt = in_val_cnt-c_nof_valid_per_block                report "Unexpected number of valid output data" severity error;
+    if g_fft.wb_factor=g_fft.nof_points then
+      -- Parallel FFT 
+      assert out_val_cnt = in_val_cnt report "Unexpected number of valid output data" severity error;
     else
-      assert out_val_cnt = in_val_cnt-c_nof_valid_per_block+c_nof_channels report "Unexpected number of valid output data" severity error;
+      -- Wideband FFT 
+      -- The PFFT has a memory of 1 block, independent of use_reorder and use_separate, but without the
+      -- reorder buffer it outputs 1 sample more, because that is immediately available in a new block.
+      -- Ensure g_data_file_nof_lines is multiple of g_fft.nof_points.
+      if g_fft.use_reorder=true then
+        assert out_val_cnt = in_val_cnt-c_nof_valid_per_block                report "Unexpected number of valid output data" severity error;
+      else
+        assert out_val_cnt = in_val_cnt-c_nof_valid_per_block+c_nof_channels report "Unexpected number of valid output data" severity error;
+      end if;
     end if;
     wait;
   end process;
diff --git a/libraries/dsp/fft/tb/vhdl/tb_tb_fft_r2_wide.vhd b/libraries/dsp/fft/tb/vhdl/tb_tb_fft_r2_wide.vhd
index ca32674640976d7ab829681f23251b8ff3eb7b2e..1dcb38e653b62dc2a6b9d334bd1b4d1600132c28 100644
--- a/libraries/dsp/fft/tb/vhdl/tb_tb_fft_r2_wide.vhd
+++ b/libraries/dsp/fft/tb/vhdl/tb_tb_fft_r2_wide.vhd
@@ -44,10 +44,13 @@ ARCHITECTURE tb OF tb_tb_fft_r2_wide IS
   CONSTANT c_pipeline       : t_fft_pipeline := (1, 1, 3, 1, 1, 0, 0, 1);
   
   CONSTANT c_fft_wb4_two_real          : t_fft := ( true, false,  true, 0, 4, 0, 128, 8, 16, 0, c_dsp_mult_w, 2, true, 56, 2);
-  CONSTANT c_fft_wb4_complex           : t_fft := ( true, false, false, 0, 4, 0,  64, 8, 16, 0, c_dsp_mult_w, 2, true, 56, 2);
   CONSTANT c_fft_wb4_complex_fft_shift : t_fft := ( true,  true, false, 0, 4, 0,  64, 8, 16, 0, c_dsp_mult_w, 2, true, 56, 2);
   CONSTANT c_fft_wb4_complex_flipped   : t_fft := (false, false, false, 0, 4, 0,  64, 8, 16, 0, c_dsp_mult_w, 2, true, 56, 2);
+  CONSTANT c_fft_wb4_complex           : t_fft := ( true, false, false, 0, 4, 0,  64, 8, 16, 0, c_dsp_mult_w, 2, true, 56, 2);
                      
+  CONSTANT c_fft_wb1_complex           : t_fft := ( true, false, false, 0, 1, 0,  64, 8, 16, 0, c_dsp_mult_w, 2, true, 56, 2);
+  CONSTANT c_fft_wb64_complex          : t_fft := ( true, false, false, 0,64, 0,  64, 8, 16, 0, c_dsp_mult_w, 2, true, 56, 2);
+  
   CONSTANT c_diff_margin    : natural := 2;
   
   -- Real input  
@@ -156,4 +159,8 @@ BEGIN
   u_act_complex_fft_shift : ENTITY work.tb_fft_r2_wide GENERIC MAP (c_pipeline, c_fft_wb4_complex_fft_shift, c_diff_margin, c_unused, 0, c_unused, 0, c_phasor_chirp,  12800,  1280, FALSE);
   u_act_complex_flipped   : ENTITY work.tb_fft_r2_wide GENERIC MAP (c_pipeline, c_fft_wb4_complex_flipped,   c_diff_margin, c_unused, 0, c_unused, 0, c_phasor_chirp,  12800,  1280, FALSE);
   u_rnd_complex_noise     : ENTITY work.tb_fft_r2_wide GENERIC MAP (c_pipeline, c_fft_wb4_complex,           c_diff_margin, c_unused, 0, c_unused, 0, c_noise_complex,   640,   640, TRUE);
+
+  -- Extreme wb_factor=1 and wb_factor=nof_points
+  u_act_wb1_complex_noise     : ENTITY work.tb_fft_r2_wide GENERIC MAP (c_pipeline, c_fft_wb1_complex,       c_diff_margin, c_unused, 0, c_unused, 0, c_noise_complex,   640,   640, FALSE);
+  u_act_wb64_complex_noise    : ENTITY work.tb_fft_r2_wide GENERIC MAP (c_pipeline, c_fft_wb64_complex,      c_diff_margin, c_unused, 0, c_unused, 0, c_noise_complex,   640,   640, FALSE);
 END tb;