Still c_use_truncate = TRUE in rTwoMult.vhd, but prepare for using round....

Still c_use_truncate = TRUE in rTwoMult.vhd, but prepare for using round. Increase c_fft_pipeline.mul_lat = 4. Remove g_pipeline from tb, uses default c_fft_pipeline from rTwoSDFPkg.vhd.

Still c_use_truncate = TRUE in rTwoMult.vhd, but prepare for using round....
dfa7df1d · Eric Kooistra · 33db9d3f · dfa7df1d · dfa7df1d · dfa7df1d
Commit dfa7df1d authored 6 years ago by Eric Kooistra
--- a/libraries/dsp/rTwoSDF/src/vhdl/rTwoSDFPkg.vhd
+++ b/libraries/dsp/rTwoSDF/src/vhdl/rTwoSDFPkg.vhd
@@ -30,7 +30,7 @@ package rTwoSDFPkg is
    -- generics for rTwoSDFStage
    stage_lat      : natural;  -- = 1
    weight_lat     : natural;  -- = 1
-    mul_lat        : natural;  -- = 3
+    mul_lat        : natural;  -- = 3+1
    -- generics for rTwoBFStage
    bf_lat         : natural;  -- = 1
    -- generics for rTwoBF
@@ -41,7 +41,7 @@ package rTwoSDFPkg is
    sep_lat        : natural;  -- = 1
  end record;
  
-  constant c_fft_pipeline   : t_fft_pipeline := (1, 1, 3, 1, 1, 0, 0, 1);
+  constant c_fft_pipeline   : t_fft_pipeline := (1, 1, 4, 1, 1, 0, 0, 1);
  
 end package rTwoSDFPkg;


--- a/libraries/dsp/rTwoSDF/src/vhdl/rTwoWMul.vhd
+++ b/libraries/dsp/rTwoSDF/src/vhdl/rTwoWMul.vhd
@@ -28,7 +28,7 @@ entity rTwoWMul is
  generic (
    g_technology : NATURAL := c_tech_select_default;
    g_stage      : natural := 1;
-    g_lat        : natural := 3
+    g_lat        : natural := 3+1       -- 3 for mult, 1 for round
  );
  port (
    clk       : in  std_logic;
@@ -47,16 +47,32 @@ end entity rTwoWMul;

 architecture str of rTwoWMul is

-  -- Derive the common_complex_mult g_pipeline_* values from g_lat. The sum must match g_lat.
-  constant c_mult_input_lat   : natural := sel_a_b(g_lat>1, 1, 0);                     -- second priority use DSP pipeline input
-  constant c_mult_product_lat : natural := 0;
-  constant c_mult_adder_lat   : natural := sel_a_b(g_lat>2, 1, 0);                     -- third priority use DSP internal product-sum pipeline
-  constant c_mult_extra_lat   : natural := sel_a_b(g_lat>3, g_lat-3, 0);               -- remaining extra pipelining in logic
-  constant c_mult_output_lat  : natural := sel_a_b(g_lat>0, 1, 0) + c_mult_extra_lat;  -- first priority use DSP pipeline output
+  -- Use multiplier product truncate or signed rounding (= away from zero). On hardware for Fsub in
+  -- Apertif and using the WG at various frequencies at subband or between subbands it appears that
+  -- using truncate or sround does not make a noticable difference in the SST. Still choose to use
+  -- signed rounding to preserve zero DC.
+  constant c_use_truncate     : boolean := true; --false;
+  
+  -- Derive the common_complex_mult g_pipeline_* values from g_lat. The sum c_total_lat = g_lat, so that g_lat defines
+  -- the total latency from in_* to out_*.
  
  -- DSP multiplier IP
  constant c_dsp_mult_lat     : natural := 3;

+  -- Pipeline multiplier product rounding from c_prod_w via c_round_w to c_out_dat_w
+  constant c_round_lat        : natural := sel_a_b(g_lat > c_dsp_mult_lat, 1, 0);  -- allocate 1 pipeline for round
+  constant c_lat              : natural := g_lat - c_round_lat;                    -- allocate remaining pipeline to multiplier
+  
+  constant c_mult_input_lat   : natural := sel_a_b(c_lat>1, 1, 0);                     -- second priority use DSP pipeline input
+  constant c_mult_product_lat : natural := 0;
+  constant c_mult_adder_lat   : natural := sel_a_b(c_lat>2, 1, 0);                     -- third priority use DSP internal product-sum pipeline
+  constant c_mult_extra_lat   : natural := sel_a_b(c_lat>3, c_lat-3, 0);               -- remaining extra pipelining in logic
+  constant c_mult_output_lat  : natural := sel_a_b(c_lat>0, 1, 0) + c_mult_extra_lat;  -- first priority use DSP pipeline output
+  constant c_mult_lat         : natural := c_mult_input_lat + c_mult_product_lat + c_mult_adder_lat + c_mult_output_lat;
+  
+  -- Total input to output latency
+  constant c_total_lat   : natural := c_mult_lat + c_round_lat;
+  
  -- Quantization
  constant c_in_dat_w         : natural:= in_re'length;
  constant c_weight_w         : natural:= weight_re'length;
@@ -74,6 +90,11 @@ architecture str of rTwoWMul is

 begin
  
+  -- Total latency check
+  ASSERT c_total_lat = g_lat
+    REPORT "rTwoWMul: total pipeline error"
+    SEVERITY FAILURE;
+  
  ------------------------------------------------------------------------------
  -- Complex multiplication
  -- . use the common_complex_mult(rtl) for the output stage 1 because then
@@ -81,11 +102,11 @@ begin
  --   weight_re = 1 and weight_im = 0 inputs.
  -- . the IP in common_complex_mult(stratix4) only supports up to 18b wide
  --   inputs.
-  --   . for g_lat = 0,1,2 use the RTL multiplier
-  --   . for g_lat >= 3 default best use the FPGA multiplier IP block.
+  --   . for c_lat = 0,1,2 use the RTL multiplier
+  --   . for c_lat >= 3 default best use the FPGA multiplier IP block.
  ------------------------------------------------------------------------------

-  gen_rtl : if g_stage=1 or c_in_dat_w>c_dsp_mult_w or g_lat<c_dsp_mult_lat generate
+  gen_rtl : if g_stage=1 or c_in_dat_w>c_dsp_mult_w or c_lat<c_dsp_mult_lat generate
    u_CmplxMul : entity common_mult_lib.common_complex_mult
    generic map (
      g_technology       => g_technology,
@@ -109,11 +130,11 @@ begin
      in_val    => in_val,
      out_pr    => product_re,
      out_pi    => product_im,
-      out_val   => out_val
+      out_val   => OPEN
    );
  end generate;
  
-  gen_ip : if g_stage>1 and c_in_dat_w<=c_dsp_mult_w and g_lat>=c_dsp_mult_lat generate
+  gen_ip : if g_stage>1 and c_in_dat_w<=c_dsp_mult_w and c_lat>=c_dsp_mult_lat generate
    u_cmplx_mul : entity common_mult_lib.common_complex_mult
    generic map (
      g_technology       => g_technology,
@@ -137,7 +158,7 @@ begin
      in_val    => in_val,
      out_pr    => product_re,
      out_pi    => product_im,
-      out_val   => out_val
+      out_val   => OPEN
    );
  end generate;

@@ -145,20 +166,40 @@ begin
  -- Round WMult output
  ------------------------------------------------------------------------------
  
+  gen_truncate : if c_use_truncate=true GENERATE
    -- use truncate    that throws away the c_round_w lower bits as rounding function
    -- use resize_svec that keeps the c_out_dat_w lower bits to get to the output width
+    gen_comb : if c_round_lat=0 generate
      round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w);
      round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w);
+    end generate;
+    gen_reg : if c_round_lat=1 generate
+      round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w) when rising_edge(clk);
+      round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w) when rising_edge(clk);
+    end generate;
+  end generate;
    
-  -- output real and imaginary, switch between input and product
-  out_re <= round_re when out_sel = '1' else in_re_dly;
-  out_im <= round_im when out_sel = '1' else in_im_dly;
+  
+  gen_sround : if c_use_truncate=false GENERATE
+    -- Use resize_svec(s_round()) instead of truncate_and_resize_svec() to have symmetrical rounding around 0
+    -- Rounding takes logic due to adding 0.5 therefore need to use c_round_lat=1 to achieve timing
+    gen_comb : if c_round_lat=0 generate
+      ASSERT false REPORT "rTwoWMul: can probably not achieve timing for sround without pipeline" SEVERITY FAILURE;
+      round_re <= RESIZE_SVEC(s_round(product_re, c_round_w), c_out_dat_w);
+      round_im <= RESIZE_SVEC(s_round(product_im, c_round_w), c_out_dat_w);
+    end generate;
+    gen_reg : if c_round_lat=1 generate
+      round_re <= RESIZE_SVEC(s_round(product_re, c_round_w), c_out_dat_w) when rising_edge(clk);
+      round_im <= RESIZE_SVEC(s_round(product_im, c_round_w), c_out_dat_w) when rising_edge(clk);
+    end generate;
+  end generate;


  ------------------------------------------------------------------------------
  -- Propagate data and control signals for input/output choice at WMult output
  ------------------------------------------------------------------------------

+  -- No need to use rst for data, because initial data value is don't care
  u_re_lat : entity common_lib.common_pipeline
  generic map (
    g_pipeline  => g_lat,
@@ -183,14 +224,33 @@ begin
    out_dat => in_im_dly
  );

+  -- Use rst for control to ensure initial low
  u_sel_lat : entity common_lib.common_pipeline_sl
  generic map (
    g_pipeline => g_lat
  )
  port map (
+    rst     => rst,
    clk     => clk,
    in_dat  => in_sel,
    out_dat => out_sel
  );

+  u_pipeline_out_val : entity common_lib.common_pipeline_sl
+  generic map (
+    g_pipeline  => g_lat
+  )
+  port map (
+    rst     => rst,
+    clk     => clk,
+    in_dat  => in_val,
+    out_dat => out_val
+  );
+
+  ------------------------------------------------------------------------------
+  -- Output real and imaginary, switch between input and product
+  ------------------------------------------------------------------------------
+  out_re <= round_re when out_sel = '1' else in_re_dly;
+  out_im <= round_im when out_sel = '1' else in_im_dly;
+
 end str;
\ No newline at end of file
--- a/libraries/dsp/rTwoSDF/tb/vhdl/tb_rTwoSDF.vhd
+++ b/libraries/dsp/rTwoSDF/tb/vhdl/tb_rTwoSDF.vhd
@@ -84,22 +84,7 @@ entity tb_rTwoSDF is
    g_nof_points        : natural  := 1024;
    g_in_dat_w          : natural  := 8;   
    g_out_dat_w         : natural  := 14;   
-    g_guard_w           : natural  := 2;     -- guard bits are used to avoid overflow in single FFT stage.   
-
-    -- Internal pipeline settings for rTwoSDF
-    g_pipeline : t_fft_pipeline := (1, 1, 3, 1, 1, 0, 0, 1)  -- type t_rtwo_sdf_stage_pipeline is record
-                                                             --   -- generics for rTwoSDFStage
-                                                             --   stage_lat      : natural;  -- = 1
-                                                             --   weight_lat     : natural;  -- = 1
-                                                             --   mul_lat        : natural;  -- = 3
-                                                             --   -- generics for rTwoBFStage
-                                                             --   bf_lat         : natural;  -- = 1
-                                                             --   -- generics for rTwoBF
-                                                             --   bf_use_zdly    : natural;  -- = 1
-                                                             --   bf_in_a_zdly   : natural;  -- = 0
-                                                             --   bf_out_d_zdly  : natural;  -- = 0
-                                                             --   sep_lat        : natural;  -- = 1
-                                                             -- end record;
+    g_guard_w           : natural  := 2      -- guard bits are used to avoid overflow in single FFT stage.
  );
 end entity tb_rTwoSDF;

@@ -269,9 +254,7 @@ begin
    g_out_dat_w   => g_out_dat_w, 
    g_stage_dat_w => c_stage_dat_w,
    g_guard_w     => g_guard_w,
-    g_nof_points  => g_nof_points,
-    -- generics for rTwoSDFStage
-    g_pipeline    => g_pipeline
+    g_nof_points  => g_nof_points
  )
  port map(
    clk       => clk,

--- a/libraries/dsp/rTwoSDF/tb/vhdl/tb_tb_rTwoSDF.vhd
+++ b/libraries/dsp/rTwoSDF/tb/vhdl/tb_tb_rTwoSDF.vhd
@@ -52,26 +52,11 @@ begin
 --  g_nof_points        : natural  := 1024;
 --  g_in_dat_w          : natural  := 8;   
 --  g_out_dat_w         : natural  := 14;   
--  g_guard_w           : natural  := 2;     -- guard bits are used to avoid overflow in single FFT stage.   
--
--  -- Internal pipeline settings for rTwoSDF
--  g_pipeline : t_fft_pipeline := (1, 1, 3, 1, 1, 0, 0, 1)  -- type t_rtwo_sdf_stage_pipeline is record
--                                                           --   -- generics for rTwoSDFStage
--                                                           --   stage_lat      : natural;  -- = 1
--                                                           --   weight_lat     : natural;  -- = 1
--                                                           --   mul_lat        : natural;  -- = 3
--                                                           --   -- generics for rTwoBFStage
--                                                           --   bf_lat         : natural;  -- = 1
--                                                           --   -- generics for rTwoBF
--                                                           --   bf_use_zdly    : natural;  -- = 1
--                                                           --   bf_in_a_zdly   : natural;  -- = 0
--                                                           --   bf_out_d_zdly  : natural;  -- = 0
--                                                           --   sep_lat        : natural;  -- = 1
--                                                           -- end record;
+--  g_guard_w           : natural  := 2      -- guard bits are used to avoid overflow in single FFT stage.   

-  --u_act_impulse_16p_16i_16o         : entity work.tb_rTwoSDF generic map (false, 1,  true,   16, 16, 16, 2, (1, 1, 3, 1, 1, 0, 0, 1));
-  u_act_noise_1024p_8i_14o          : entity work.tb_rTwoSDF generic map (true,  1,  true, 1024,  8, 14, 2, (1, 1, 3, 1, 1, 0, 0, 1));
-  u_rnd_noise_1024p_8i_14o          : entity work.tb_rTwoSDF generic map (true,  0,  true, 1024,  8, 14, 2, (1, 1, 3, 1, 1, 0, 0, 1));
-  u_rnd_noise_1024p_8i_14o_flipped  : entity work.tb_rTwoSDF generic map (true,  0, false, 1024,  8, 14, 2, (1, 1, 3, 1, 1, 0, 0, 1));
+  --u_act_impulse_16p_16i_16o         : entity work.tb_rTwoSDF generic map (false, 1,  true,   16, 16, 16, 2);
+  u_act_noise_1024p_8i_14o          : entity work.tb_rTwoSDF generic map (true,  1,  true, 1024,  8, 14, 2);
+  u_rnd_noise_1024p_8i_14o          : entity work.tb_rTwoSDF generic map (true,  0,  true, 1024,  8, 14, 2);
+  u_rnd_noise_1024p_8i_14o_flipped  : entity work.tb_rTwoSDF generic map (true,  0, false, 1024,  8, 14, 2);
  
 end tb;