Skip to content
Snippets Groups Projects
Commit dfa7df1d authored by Eric Kooistra's avatar Eric Kooistra
Browse files

Still c_use_truncate = TRUE in rTwoMult.vhd, but prepare for using round....

Still c_use_truncate = TRUE in rTwoMult.vhd, but prepare for using round. Increase c_fft_pipeline.mul_lat = 4. Remove g_pipeline from tb, uses default c_fft_pipeline from rTwoSDFPkg.vhd.
parent 33db9d3f
No related branches found
No related tags found
No related merge requests found
......@@ -30,7 +30,7 @@ package rTwoSDFPkg is
-- generics for rTwoSDFStage
stage_lat : natural; -- = 1
weight_lat : natural; -- = 1
mul_lat : natural; -- = 3
mul_lat : natural; -- = 3+1
-- generics for rTwoBFStage
bf_lat : natural; -- = 1
-- generics for rTwoBF
......@@ -41,7 +41,7 @@ package rTwoSDFPkg is
sep_lat : natural; -- = 1
end record;
constant c_fft_pipeline : t_fft_pipeline := (1, 1, 3, 1, 1, 0, 0, 1);
constant c_fft_pipeline : t_fft_pipeline := (1, 1, 4, 1, 1, 0, 0, 1);
end package rTwoSDFPkg;
......
......@@ -28,7 +28,7 @@ entity rTwoWMul is
generic (
g_technology : NATURAL := c_tech_select_default;
g_stage : natural := 1;
g_lat : natural := 3
g_lat : natural := 3+1 -- 3 for mult, 1 for round
);
port (
clk : in std_logic;
......@@ -47,16 +47,32 @@ end entity rTwoWMul;
architecture str of rTwoWMul is
-- Derive the common_complex_mult g_pipeline_* values from g_lat. The sum must match g_lat.
constant c_mult_input_lat : natural := sel_a_b(g_lat>1, 1, 0); -- second priority use DSP pipeline input
constant c_mult_product_lat : natural := 0;
constant c_mult_adder_lat : natural := sel_a_b(g_lat>2, 1, 0); -- third priority use DSP internal product-sum pipeline
constant c_mult_extra_lat : natural := sel_a_b(g_lat>3, g_lat-3, 0); -- remaining extra pipelining in logic
constant c_mult_output_lat : natural := sel_a_b(g_lat>0, 1, 0) + c_mult_extra_lat; -- first priority use DSP pipeline output
-- Use multiplier product truncate or signed rounding (= away from zero). On hardware for Fsub in
-- Apertif and using the WG at various frequencies at subband or between subbands it appears that
-- using truncate or sround does not make a noticable difference in the SST. Still choose to use
-- signed rounding to preserve zero DC.
constant c_use_truncate : boolean := true; --false;
-- Derive the common_complex_mult g_pipeline_* values from g_lat. The sum c_total_lat = g_lat, so that g_lat defines
-- the total latency from in_* to out_*.
-- DSP multiplier IP
constant c_dsp_mult_lat : natural := 3;
-- Pipeline multiplier product rounding from c_prod_w via c_round_w to c_out_dat_w
constant c_round_lat : natural := sel_a_b(g_lat > c_dsp_mult_lat, 1, 0); -- allocate 1 pipeline for round
constant c_lat : natural := g_lat - c_round_lat; -- allocate remaining pipeline to multiplier
constant c_mult_input_lat : natural := sel_a_b(c_lat>1, 1, 0); -- second priority use DSP pipeline input
constant c_mult_product_lat : natural := 0;
constant c_mult_adder_lat : natural := sel_a_b(c_lat>2, 1, 0); -- third priority use DSP internal product-sum pipeline
constant c_mult_extra_lat : natural := sel_a_b(c_lat>3, c_lat-3, 0); -- remaining extra pipelining in logic
constant c_mult_output_lat : natural := sel_a_b(c_lat>0, 1, 0) + c_mult_extra_lat; -- first priority use DSP pipeline output
constant c_mult_lat : natural := c_mult_input_lat + c_mult_product_lat + c_mult_adder_lat + c_mult_output_lat;
-- Total input to output latency
constant c_total_lat : natural := c_mult_lat + c_round_lat;
-- Quantization
constant c_in_dat_w : natural:= in_re'length;
constant c_weight_w : natural:= weight_re'length;
......@@ -74,6 +90,11 @@ architecture str of rTwoWMul is
begin
-- Total latency check
ASSERT c_total_lat = g_lat
REPORT "rTwoWMul: total pipeline error"
SEVERITY FAILURE;
------------------------------------------------------------------------------
-- Complex multiplication
-- . use the common_complex_mult(rtl) for the output stage 1 because then
......@@ -81,11 +102,11 @@ begin
-- weight_re = 1 and weight_im = 0 inputs.
-- . the IP in common_complex_mult(stratix4) only supports up to 18b wide
-- inputs.
-- . for g_lat = 0,1,2 use the RTL multiplier
-- . for g_lat >= 3 default best use the FPGA multiplier IP block.
-- . for c_lat = 0,1,2 use the RTL multiplier
-- . for c_lat >= 3 default best use the FPGA multiplier IP block.
------------------------------------------------------------------------------
gen_rtl : if g_stage=1 or c_in_dat_w>c_dsp_mult_w or g_lat<c_dsp_mult_lat generate
gen_rtl : if g_stage=1 or c_in_dat_w>c_dsp_mult_w or c_lat<c_dsp_mult_lat generate
u_CmplxMul : entity common_mult_lib.common_complex_mult
generic map (
g_technology => g_technology,
......@@ -109,11 +130,11 @@ begin
in_val => in_val,
out_pr => product_re,
out_pi => product_im,
out_val => out_val
out_val => OPEN
);
end generate;
gen_ip : if g_stage>1 and c_in_dat_w<=c_dsp_mult_w and g_lat>=c_dsp_mult_lat generate
gen_ip : if g_stage>1 and c_in_dat_w<=c_dsp_mult_w and c_lat>=c_dsp_mult_lat generate
u_cmplx_mul : entity common_mult_lib.common_complex_mult
generic map (
g_technology => g_technology,
......@@ -137,7 +158,7 @@ begin
in_val => in_val,
out_pr => product_re,
out_pi => product_im,
out_val => out_val
out_val => OPEN
);
end generate;
......@@ -145,20 +166,40 @@ begin
-- Round WMult output
------------------------------------------------------------------------------
gen_truncate : if c_use_truncate=true GENERATE
-- use truncate that throws away the c_round_w lower bits as rounding function
-- use resize_svec that keeps the c_out_dat_w lower bits to get to the output width
gen_comb : if c_round_lat=0 generate
round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w);
round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w);
end generate;
gen_reg : if c_round_lat=1 generate
round_re <= truncate_and_resize_svec(product_re, c_round_w, c_out_dat_w) when rising_edge(clk);
round_im <= truncate_and_resize_svec(product_im, c_round_w, c_out_dat_w) when rising_edge(clk);
end generate;
end generate;
-- output real and imaginary, switch between input and product
out_re <= round_re when out_sel = '1' else in_re_dly;
out_im <= round_im when out_sel = '1' else in_im_dly;
gen_sround : if c_use_truncate=false GENERATE
-- Use resize_svec(s_round()) instead of truncate_and_resize_svec() to have symmetrical rounding around 0
-- Rounding takes logic due to adding 0.5 therefore need to use c_round_lat=1 to achieve timing
gen_comb : if c_round_lat=0 generate
ASSERT false REPORT "rTwoWMul: can probably not achieve timing for sround without pipeline" SEVERITY FAILURE;
round_re <= RESIZE_SVEC(s_round(product_re, c_round_w), c_out_dat_w);
round_im <= RESIZE_SVEC(s_round(product_im, c_round_w), c_out_dat_w);
end generate;
gen_reg : if c_round_lat=1 generate
round_re <= RESIZE_SVEC(s_round(product_re, c_round_w), c_out_dat_w) when rising_edge(clk);
round_im <= RESIZE_SVEC(s_round(product_im, c_round_w), c_out_dat_w) when rising_edge(clk);
end generate;
end generate;
------------------------------------------------------------------------------
-- Propagate data and control signals for input/output choice at WMult output
------------------------------------------------------------------------------
-- No need to use rst for data, because initial data value is don't care
u_re_lat : entity common_lib.common_pipeline
generic map (
g_pipeline => g_lat,
......@@ -183,14 +224,33 @@ begin
out_dat => in_im_dly
);
-- Use rst for control to ensure initial low
u_sel_lat : entity common_lib.common_pipeline_sl
generic map (
g_pipeline => g_lat
)
port map (
rst => rst,
clk => clk,
in_dat => in_sel,
out_dat => out_sel
);
u_pipeline_out_val : entity common_lib.common_pipeline_sl
generic map (
g_pipeline => g_lat
)
port map (
rst => rst,
clk => clk,
in_dat => in_val,
out_dat => out_val
);
------------------------------------------------------------------------------
-- Output real and imaginary, switch between input and product
------------------------------------------------------------------------------
out_re <= round_re when out_sel = '1' else in_re_dly;
out_im <= round_im when out_sel = '1' else in_im_dly;
end str;
\ No newline at end of file
......@@ -84,22 +84,7 @@ entity tb_rTwoSDF is
g_nof_points : natural := 1024;
g_in_dat_w : natural := 8;
g_out_dat_w : natural := 14;
g_guard_w : natural := 2; -- guard bits are used to avoid overflow in single FFT stage.
-- Internal pipeline settings for rTwoSDF
g_pipeline : t_fft_pipeline := (1, 1, 3, 1, 1, 0, 0, 1) -- type t_rtwo_sdf_stage_pipeline is record
-- -- generics for rTwoSDFStage
-- stage_lat : natural; -- = 1
-- weight_lat : natural; -- = 1
-- mul_lat : natural; -- = 3
-- -- generics for rTwoBFStage
-- bf_lat : natural; -- = 1
-- -- generics for rTwoBF
-- bf_use_zdly : natural; -- = 1
-- bf_in_a_zdly : natural; -- = 0
-- bf_out_d_zdly : natural; -- = 0
-- sep_lat : natural; -- = 1
-- end record;
g_guard_w : natural := 2 -- guard bits are used to avoid overflow in single FFT stage.
);
end entity tb_rTwoSDF;
......@@ -269,9 +254,7 @@ begin
g_out_dat_w => g_out_dat_w,
g_stage_dat_w => c_stage_dat_w,
g_guard_w => g_guard_w,
g_nof_points => g_nof_points,
-- generics for rTwoSDFStage
g_pipeline => g_pipeline
g_nof_points => g_nof_points
)
port map(
clk => clk,
......
......@@ -52,26 +52,11 @@ begin
-- g_nof_points : natural := 1024;
-- g_in_dat_w : natural := 8;
-- g_out_dat_w : natural := 14;
-- g_guard_w : natural := 2; -- guard bits are used to avoid overflow in single FFT stage.
--
-- -- Internal pipeline settings for rTwoSDF
-- g_pipeline : t_fft_pipeline := (1, 1, 3, 1, 1, 0, 0, 1) -- type t_rtwo_sdf_stage_pipeline is record
-- -- -- generics for rTwoSDFStage
-- -- stage_lat : natural; -- = 1
-- -- weight_lat : natural; -- = 1
-- -- mul_lat : natural; -- = 3
-- -- -- generics for rTwoBFStage
-- -- bf_lat : natural; -- = 1
-- -- -- generics for rTwoBF
-- -- bf_use_zdly : natural; -- = 1
-- -- bf_in_a_zdly : natural; -- = 0
-- -- bf_out_d_zdly : natural; -- = 0
-- -- sep_lat : natural; -- = 1
-- -- end record;
-- g_guard_w : natural := 2 -- guard bits are used to avoid overflow in single FFT stage.
--u_act_impulse_16p_16i_16o : entity work.tb_rTwoSDF generic map (false, 1, true, 16, 16, 16, 2, (1, 1, 3, 1, 1, 0, 0, 1));
u_act_noise_1024p_8i_14o : entity work.tb_rTwoSDF generic map (true, 1, true, 1024, 8, 14, 2, (1, 1, 3, 1, 1, 0, 0, 1));
u_rnd_noise_1024p_8i_14o : entity work.tb_rTwoSDF generic map (true, 0, true, 1024, 8, 14, 2, (1, 1, 3, 1, 1, 0, 0, 1));
u_rnd_noise_1024p_8i_14o_flipped : entity work.tb_rTwoSDF generic map (true, 0, false, 1024, 8, 14, 2, (1, 1, 3, 1, 1, 0, 0, 1));
--u_act_impulse_16p_16i_16o : entity work.tb_rTwoSDF generic map (false, 1, true, 16, 16, 16, 2);
u_act_noise_1024p_8i_14o : entity work.tb_rTwoSDF generic map (true, 1, true, 1024, 8, 14, 2);
u_rnd_noise_1024p_8i_14o : entity work.tb_rTwoSDF generic map (true, 0, true, 1024, 8, 14, 2);
u_rnd_noise_1024p_8i_14o_flipped : entity work.tb_rTwoSDF generic map (true, 0, false, 1024, 8, 14, 2);
end tb;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment