diff --git a/libraries/base/common/src/vhdl/common_complex_mult.vhd b/libraries/base/common/src/vhdl/common_complex_mult.vhd index c92b110048348ca6b48c64b98e8373649ed5e32e..887194efc31e5cd2764a01b57fe6c39e409c4e49 100644 --- a/libraries/base/common/src/vhdl/common_complex_mult.vhd +++ b/libraries/base/common/src/vhdl/common_complex_mult.vhd @@ -19,9 +19,10 @@ -- ------------------------------------------------------------------------------- -LIBRARY IEEE; +LIBRARY IEEE, technology_lib, tech_mult_lib; USE IEEE.std_logic_1164.ALL; USE IEEE.numeric_std.ALL; +USE technology_lib.technology_select_pkg.ALL; USE work.common_pkg.ALL; -- @@ -46,6 +47,8 @@ USE work.common_pkg.ALL; ENTITY common_complex_mult IS GENERIC ( + g_technology : NATURAL := c_tech_select_default; + g_variant : STRING := "IP"; g_in_a_w : POSITIVE; g_in_b_w : POSITIVE; g_out_p_w : POSITIVE; -- default use g_out_p_w = g_in_a_w+g_in_b_w = c_prod_w @@ -71,70 +74,42 @@ ENTITY common_complex_mult IS END common_complex_mult; -------------------------------------------------------------------------------- --- str -------------------------------------------------------------------------------- - ARCHITECTURE str OF common_complex_mult IS - CONSTANT c_pipeline : NATURAL := g_pipeline_input + g_pipeline_product + g_pipeline_adder + g_pipeline_output; - - CONSTANT c_re_add_sub : STRING := sel_a_b(g_conjugate_b, "ADD", "SUB"); - CONSTANT c_im_add_sub : STRING := sel_a_b(g_conjugate_b, "SUB", "ADD"); - - SIGNAL in_a_ir : STD_LOGIC_VECTOR(2*g_in_a_w-1 DOWNTO 0); - SIGNAL in_a_ri : STD_LOGIC_VECTOR(2*g_in_a_w-1 DOWNTO 0); - SIGNAL in_b_ir : STD_LOGIC_VECTOR(2*g_in_b_w-1 DOWNTO 0); + CONSTANT c_pipeline : NATURAL := g_pipeline_input + g_pipeline_product + g_pipeline_adder + g_pipeline_output; + -- MegaWizard IP ip_stratixiv_complex_mult was generated with latency c_dsp_latency = 3 + CONSTANT c_dsp_latency : NATURAL := 3; + + -- Extra output pipelining is only needed when c_pipeline > c_dsp_latency + CONSTANT c_pipeline_output : NATURAL := sel_a_b(c_pipeline>c_dsp_latency, c_pipeline-c_dsp_latency, 0); + + -- Force to maximum 18 bit width, because: + -- . the ip_stratixiv_complex_mult is generated for 18b inputs and 36b output and then uses 4 real multipliers and no additional registers + -- . if one input > 18b then another IP needs to be regenerated and that will use 8 real multipliers and some extra LUTs and registers + -- . if both inputs > 18b then another IP needs to be regenerated and that will use 16 real multipliers and some extra LUTs and registers + -- . if the output is set to 18b+18b + 1b =37b to account for the sum then another IP needs to be regenerated and that will use some extra registers + -- ==> for inputs <= 18b this ip_stratixiv_complex_mult is appropriate and it can not be made parametrisable to fit also inputs > 18b. + CONSTANT c_dsp_dat_w : NATURAL := 18; + CONSTANT c_dsp_prod_w : NATURAL := 2*c_dsp_dat_w; + + SIGNAL ar : STD_LOGIC_VECTOR(c_dsp_dat_w-1 DOWNTO 0); + SIGNAL ai : STD_LOGIC_VECTOR(c_dsp_dat_w-1 DOWNTO 0); + SIGNAL br : STD_LOGIC_VECTOR(c_dsp_dat_w-1 DOWNTO 0); + SIGNAL bi : STD_LOGIC_VECTOR(c_dsp_dat_w-1 DOWNTO 0); + SIGNAL mult_re : STD_LOGIC_VECTOR(c_dsp_prod_w-1 DOWNTO 0); + SIGNAL mult_im : STD_LOGIC_VECTOR(c_dsp_prod_w-1 DOWNTO 0); + SIGNAL result_re : STD_LOGIC_VECTOR(g_out_p_w-1 DOWNTO 0); + SIGNAL result_im : STD_LOGIC_VECTOR(g_out_p_w-1 DOWNTO 0); + BEGIN - in_a_ir <= in_ai & in_ar; - in_a_ri <= in_ar & in_ai; - in_b_ir <= in_bi & in_br; + -- User specificied latency must be >= MegaWizard IP dsp_mult_add2 latency + ASSERT c_pipeline >= c_dsp_latency + REPORT "tech_complex_mult(stratix4): pipeline value not supported" + SEVERITY FAILURE; - re : ENTITY work.common_mult_add2(rtl) - GENERIC MAP ( - g_in_a_w => g_in_a_w, - g_in_b_w => g_in_b_w, - g_res_w => g_out_p_w, - g_add_sub => c_re_add_sub, -- vector low part product + or - vector high part product - -- . "SUB" for a*b : ar*br - ai*bi --> a_ir - b_ir - -- . "ADD" for a*conj(b) : ar*br + ai*bi --> a_ir + b_ir - g_pipeline_input => g_pipeline_input, - g_pipeline_product => g_pipeline_product, - g_pipeline_adder => g_pipeline_adder, - g_pipeline_output => g_pipeline_output - ) - PORT MAP ( - clk => clk, - clken => clken, - in_a => in_a_ir, - in_b => in_b_ir, - res => out_pr - ); - - im : ENTITY work.common_mult_add2(rtl) - GENERIC MAP ( - g_in_a_w => g_in_a_w, - g_in_b_w => g_in_b_w, - g_res_w => g_out_p_w, - g_add_sub => c_im_add_sub, -- vector low part product + or - vector high part product - -- . "ADD" for a*b : ai*br + ar*bi --> a_ri - b_ir - -- . "SUB" for a*conj(b) : ai*br - ar*bi --> a_ri + b_ir - g_pipeline_input => g_pipeline_input, - g_pipeline_product => g_pipeline_product, - g_pipeline_adder => g_pipeline_adder, - g_pipeline_output => g_pipeline_output - ) - PORT MAP ( - clk => clk, - clken => clken, - in_a => in_a_ri, - in_b => in_b_ir, - res => out_pi - ); - - -- Propagate in_val with dsp latency + -- Propagate in_val with c_pipeline latency u_out_val : ENTITY work.common_pipeline_sl GENERIC MAP ( g_pipeline => c_pipeline @@ -147,219 +122,40 @@ BEGIN out_dat => out_val ); -END ARCHITECTURE; -- str - - -------------------------------------------------------------------------------- --- rtl -------------------------------------------------------------------------------- + -- Adapt DSP input widths + ar <= RESIZE_SVEC(in_ar, c_dsp_dat_w); + ai <= RESIZE_SVEC(in_ai, c_dsp_dat_w); + br <= RESIZE_SVEC(in_br, c_dsp_dat_w); + bi <= RESIZE_SVEC(in_bi, c_dsp_dat_w) WHEN g_conjugate_b=FALSE ELSE TO_SVEC(-TO_SINT(in_bi), c_dsp_dat_w); -ARCHITECTURE rtl OF common_complex_mult IS - - CONSTANT c_pipeline : NATURAL := g_pipeline_input + g_pipeline_product + g_pipeline_adder + g_pipeline_output; - - -- Extra output pipelining using common_pipeline is only needed when g_pipeline_output > 1 - CONSTANT c_pipeline_output : NATURAL := sel_a_b(g_pipeline_output>0, g_pipeline_output-1, 0); - - CONSTANT c_prod_w : NATURAL := g_in_a_w+g_in_b_w; - CONSTANT c_sum_w : NATURAL := c_prod_w+1; - - CONSTANT c_re_add_sub : STRING := sel_a_b(g_conjugate_b, "ADD", "SUB"); - CONSTANT c_im_add_sub : STRING := sel_a_b(g_conjugate_b, "SUB", "ADD"); - - -- registers - SIGNAL reg_ar : SIGNED(g_in_a_w-1 DOWNTO 0); - SIGNAL reg_ai : SIGNED(g_in_a_w-1 DOWNTO 0); - SIGNAL reg_br : SIGNED(g_in_b_w-1 DOWNTO 0); - SIGNAL reg_bi : SIGNED(g_in_b_w-1 DOWNTO 0); - SIGNAL reg_prod_ar_br : SIGNED(c_prod_w-1 DOWNTO 0); -- re - SIGNAL reg_prod_ai_bi : SIGNED(c_prod_w-1 DOWNTO 0); - SIGNAL reg_prod_ai_br : SIGNED(c_prod_w-1 DOWNTO 0); -- im - SIGNAL reg_prod_ar_bi : SIGNED(c_prod_w-1 DOWNTO 0); - SIGNAL reg_sum_re : SIGNED(c_sum_w-1 DOWNTO 0); - SIGNAL reg_sum_im : SIGNED(c_sum_w-1 DOWNTO 0); - SIGNAL reg_result_re : SIGNED(g_out_p_w-1 DOWNTO 0); - SIGNAL reg_result_im : SIGNED(g_out_p_w-1 DOWNTO 0); - - -- combinatorial - SIGNAL nxt_ar : SIGNED(g_in_a_w-1 DOWNTO 0); - SIGNAL nxt_ai : SIGNED(g_in_a_w-1 DOWNTO 0); - SIGNAL nxt_br : SIGNED(g_in_b_w-1 DOWNTO 0); - SIGNAL nxt_bi : SIGNED(g_in_b_w-1 DOWNTO 0); - SIGNAL nxt_prod_ar_br : SIGNED(c_prod_w-1 DOWNTO 0); -- re - SIGNAL nxt_prod_ai_bi : SIGNED(c_prod_w-1 DOWNTO 0); - SIGNAL nxt_prod_ai_br : SIGNED(c_prod_w-1 DOWNTO 0); -- im - SIGNAL nxt_prod_ar_bi : SIGNED(c_prod_w-1 DOWNTO 0); - SIGNAL nxt_sum_re : SIGNED(c_sum_w-1 DOWNTO 0); - SIGNAL nxt_sum_im : SIGNED(c_sum_w-1 DOWNTO 0); - SIGNAL nxt_result_re : SIGNED(g_out_p_w-1 DOWNTO 0); - SIGNAL nxt_result_im : SIGNED(g_out_p_w-1 DOWNTO 0); - - -- the active signals - SIGNAL ar : SIGNED(g_in_a_w-1 DOWNTO 0); - SIGNAL ai : SIGNED(g_in_a_w-1 DOWNTO 0); - SIGNAL br : SIGNED(g_in_b_w-1 DOWNTO 0); - SIGNAL bi : SIGNED(g_in_b_w-1 DOWNTO 0); - SIGNAL prod_ar_br : SIGNED(c_prod_w-1 DOWNTO 0); -- re - SIGNAL prod_ai_bi : SIGNED(c_prod_w-1 DOWNTO 0); - SIGNAL prod_ai_br : SIGNED(c_prod_w-1 DOWNTO 0); -- im - SIGNAL prod_ar_bi : SIGNED(c_prod_w-1 DOWNTO 0); - SIGNAL sum_re : SIGNED(c_sum_w-1 DOWNTO 0); - SIGNAL sum_im : SIGNED(c_sum_w-1 DOWNTO 0); - SIGNAL result_re : SIGNED(g_out_p_w-1 DOWNTO 0); - SIGNAL result_im : SIGNED(g_out_p_w-1 DOWNTO 0); - -BEGIN - - ------------------------------------------------------------------------------ - -- Registers - ------------------------------------------------------------------------------ - - -- Put all potential registers in a single process for optimal DSP inferrence - -- Use rst only if it is supported by the DSP primitive, else leave it at '0' - p_reg : PROCESS (rst, clk) - BEGIN - IF rising_edge(clk) THEN - IF rst='1' THEN - reg_ar <= (OTHERS=>'0'); - reg_ai <= (OTHERS=>'0'); - reg_br <= (OTHERS=>'0'); - reg_bi <= (OTHERS=>'0'); - reg_prod_ar_br <= (OTHERS=>'0'); - reg_prod_ai_bi <= (OTHERS=>'0'); - reg_prod_ai_br <= (OTHERS=>'0'); - reg_prod_ar_bi <= (OTHERS=>'0'); - reg_sum_re <= (OTHERS=>'0'); - reg_sum_im <= (OTHERS=>'0'); - reg_result_re <= (OTHERS=>'0'); - reg_result_im <= (OTHERS=>'0'); - ELSIF clken='1' THEN - reg_ar <= nxt_ar; -- inputs - reg_ai <= nxt_ai; - reg_br <= nxt_br; - reg_bi <= nxt_bi; - reg_prod_ar_br <= nxt_prod_ar_br; -- products for re - reg_prod_ai_bi <= nxt_prod_ai_bi; - reg_prod_ai_br <= nxt_prod_ai_br; -- products for im - reg_prod_ar_bi <= nxt_prod_ar_bi; - reg_sum_re <= nxt_sum_re; -- sum - reg_sum_im <= nxt_sum_im; - reg_result_re <= nxt_result_re; -- result sum after optional register stage - reg_result_im <= nxt_result_im; - END IF; - END IF; - END PROCESS; - - -- Propagate in_val with dsp latency - u_out_val : ENTITY work.common_pipeline_sl - GENERIC MAP ( - g_pipeline => c_pipeline +u_complex_mult : ENTITY tech_mult_lib.tech_complex_mult + GENERIC MAP( + g_technology => g_technology, + g_variant => g_variant, + g_in_a_w => g_in_a_w, + g_in_b_w => g_in_b_w, + g_out_p_w => g_out_p_w, + g_conjugate_b => g_conjugate_b, + g_pipeline_input => g_pipeline_input, + g_pipeline_product => g_pipeline_product, + g_pipeline_adder => g_pipeline_adder, + g_pipeline_output => g_pipeline_output ) - PORT MAP ( - rst => rst, - clk => clk, - clken => clken, - in_dat => in_val, - out_dat => out_val - ); - - ------------------------------------------------------------------------------ - -- Inputs - ------------------------------------------------------------------------------ - - nxt_ar <= SIGNED(in_ar); - nxt_ai <= SIGNED(in_ai); - nxt_br <= SIGNED(in_br); - nxt_bi <= SIGNED(in_bi); - - no_input_reg : IF g_pipeline_input=0 GENERATE -- wired - ar <= nxt_ar; - ai <= nxt_ai; - br <= nxt_br; - bi <= nxt_bi; - END GENERATE; - - gen_input_reg : IF g_pipeline_input>0 GENERATE -- register input - ar <= reg_ar; - ai <= reg_ai; - br <= reg_br; - bi <= reg_bi; - END GENERATE; - - - ------------------------------------------------------------------------------ - -- Products - ------------------------------------------------------------------------------ - - nxt_prod_ar_br <= ar * br; -- products for re - nxt_prod_ai_bi <= ai * bi; - nxt_prod_ai_br <= ai * br; -- products for im - nxt_prod_ar_bi <= ar * bi; - - no_product_reg : IF g_pipeline_product=0 GENERATE -- wired - prod_ar_br <= nxt_prod_ar_br; - prod_ai_bi <= nxt_prod_ai_bi; - prod_ai_br <= nxt_prod_ai_br; - prod_ar_bi <= nxt_prod_ar_bi; - END GENERATE; - gen_product_reg : IF g_pipeline_product>0 GENERATE -- register - prod_ar_br <= reg_prod_ar_br; - prod_ai_bi <= reg_prod_ai_bi; - prod_ai_br <= reg_prod_ai_br; - prod_ar_bi <= reg_prod_ar_bi; - END GENERATE; - - - ------------------------------------------------------------------------------ - -- Sum - ------------------------------------------------------------------------------ - - -- Re - -- . "ADD" for a*conj(b) : ar*br + ai*bi - -- . "SUB" for a*b : ar*br - ai*bi - gen_re_add : IF c_re_add_sub = "ADD" GENERATE - nxt_sum_re <= RESIZE_NUM(prod_ar_br, c_sum_w) + prod_ai_bi; - END GENERATE; - gen_re_sub : IF c_re_add_sub = "SUB" GENERATE - nxt_sum_re <= RESIZE_NUM(prod_ar_br, c_sum_w) - prod_ai_bi; - END GENERATE; - - -- Im - -- . "ADD" for a*b : ai*br + ar*bi - -- . "SUB" for a*conj(b) : ai*br - ar*bi - gen_im_add : IF c_im_add_sub = "ADD" GENERATE - nxt_sum_im <= RESIZE_NUM(prod_ai_br, c_sum_w) + prod_ar_bi; - END GENERATE; - gen_im_sub : IF c_im_add_sub = "SUB" GENERATE - nxt_sum_im <= RESIZE_NUM(prod_ai_br, c_sum_w) - prod_ar_bi; - END GENERATE; - - - no_adder_reg : IF g_pipeline_adder=0 GENERATE -- wired - sum_re <= nxt_sum_re; - sum_im <= nxt_sum_im; - END GENERATE; - gen_adder_reg : IF g_pipeline_adder>0 GENERATE -- register - sum_re <= reg_sum_re; - sum_im <= reg_sum_im; - END GENERATE; + PORT MAP( + rst => rst, + clk => clk, + clken => clken, + in_ar => ar, + in_ai => ai, + in_br => br, + in_bi => bi, + out_pr => mult_re, + out_pi => mult_im + ); - ------------------------------------------------------------------------------ - -- Result sum after optional rounding - ------------------------------------------------------------------------------ - - nxt_result_re <= RESIZE_NUM(sum_re, g_out_p_w); - nxt_result_im <= RESIZE_NUM(sum_im, g_out_p_w); - - no_result_reg : IF g_pipeline_output=0 GENERATE -- wired - result_re <= nxt_result_re; - result_im <= nxt_result_im; - END GENERATE; - gen_result_reg : IF g_pipeline_output>0 GENERATE -- register - result_re <= reg_result_re; - result_im <= reg_result_im; - END GENERATE; - + -- Back to true input widths and then resize for output width + result_re <= RESIZE_SVEC(mult_re, g_out_p_w); + result_im <= RESIZE_SVEC(mult_im, g_out_p_w); ------------------------------------------------------------------------------ -- Extra output pipelining @@ -393,166 +189,5 @@ BEGIN out_dat => out_pi ); -END ARCHITECTURE; -- rtl - - -------------------------------------------------------------------------------- --- rtl_dsp -------------------------------------------------------------------------------- - -architecture rtl_dsp of common_complex_mult is - - -- This architecture has: - -- . fixed latency of g_pipeline_input + g_pipeline_adder = 2 clock cycles - -- . fixed g_conjugate_b = false - CONSTANT c_pipeline : NATURAL := g_pipeline_input + g_pipeline_product + g_pipeline_adder + g_pipeline_output; - - CONSTANT c_prod_w : NATURAL := in_ar'LENGTH + in_br'LENGTH; -- assume equal width for Re and im - CONSTANT c_sum_w : NATURAL := c_prod_w+1; - - signal a_re : signed(in_ar'range) := (OTHERS=>'0'); - signal a_im : signed(in_ai'range) := (OTHERS=>'0'); - signal b_re : signed(in_br'range) := (OTHERS=>'0'); - signal b_im : signed(in_bi'range) := (OTHERS=>'0'); - - signal sum_pr : signed(c_sum_w-1 DOWNTO 0); - signal sum_pi : signed(c_sum_w-1 DOWNTO 0); - - signal dly_out_val : std_logic; - -begin - - -- Latency must be 2 - ASSERT c_pipeline = 2 - REPORT "common_complex_mult(rtl_dsp): pipeline value not supported" - SEVERITY FAILURE; - - -- Conjugate not supported - ASSERT g_conjugate_b = FALSE - REPORT "common_complex_mult(rtl_dsp): conjugate input is not supported" - SEVERITY FAILURE; - - p_CmplxMul: process(clk,rst) - begin - if rst='1' then - - --a_re <= (OTHERS=>'0'); - --a_im <= (OTHERS=>'0'); - --b_re <= (OTHERS=>'0'); - --b_im <= (OTHERS=>'0'); - - sum_pr <= (OTHERS=>'0'); - sum_pi <= (OTHERS=>'0'); - - out_val <= '0'; - dly_out_val <= '0'; - - elsif rising_edge(clk) then - a_re <= signed(in_ar); - a_im <= signed(in_ai); - b_re <= signed(in_br); - b_im <= signed(in_bi); - - sum_pr <= RESIZE_NUM(a_re*b_re, c_sum_w) - RESIZE_NUM(a_im*b_im, c_sum_w); - sum_pi <= RESIZE_NUM(a_re*b_im, c_sum_w) + RESIZE_NUM(a_im*b_re, c_sum_w); - - dly_out_val <= in_val; - out_val <= dly_out_val; - - end if; - end process; - - out_pr <= RESIZE_SVEC(std_logic_vector(sum_pr), g_out_p_w); - out_pi <= RESIZE_SVEC(std_logic_vector(sum_pi), g_out_p_w); - -end rtl_dsp; - -------------------------------------------------------------------------------- --- altera_rtl -------------------------------------------------------------------------------- - -architecture altera_rtl of common_complex_mult is - - -- This architecture (by Raj Rajan Thilak) has: - -- . fixed latency of g_pipeline_input + g_pipeline_adder + g_pipeline_output = 3 clock cycles - -- . fixed g_conjugate_b = false - CONSTANT c_pipeline : NATURAL := g_pipeline_input + g_pipeline_product + g_pipeline_adder + g_pipeline_output; - - CONSTANT c_prod_w : NATURAL := in_ar'LENGTH + in_br'LENGTH; -- assume equal width for Re and im - CONSTANT c_sum_w : NATURAL := c_prod_w+1; - - signal a0_reg : signed(g_in_a_w-1 downto 0); - signal b0_reg : signed(g_in_b_w-1 downto 0); - signal a1_reg : signed(g_in_a_w-1 downto 0); - signal b1_reg : signed(g_in_b_w-1 downto 0); - signal a2_reg : signed(g_in_a_w-1 downto 0); - signal b2_reg : signed(g_in_b_w-1 downto 0); - signal a3_reg : signed(g_in_a_w-1 downto 0); - signal b3_reg : signed(g_in_b_w-1 downto 0); - signal rout_sig : signed(c_sum_w-1 downto 0); - signal iout_sig : signed(c_sum_w-1 downto 0); - signal rout_reg : signed(c_sum_w-1 downto 0); - signal iout_reg : signed(c_sum_w-1 downto 0); - signal in_val_reg : std_logic; - signal sig_val : std_logic; - -begin - - -- Latency must be 3 - ASSERT c_pipeline = 3 - REPORT "common_complex_mult(altera_rtl): pipeline value not supported" - SEVERITY FAILURE; - - -- Conjugate not supported - ASSERT g_conjugate_b = FALSE - REPORT "common_complex_mult(altera_rtl): conjugate input is not supported" - SEVERITY FAILURE; - - process (clk, rst, clken) - begin - if rst='1' then -- asynchronous reset - a0_reg <= (others => '0'); - b0_reg <= (others => '0'); - a1_reg <= (others => '0'); - b1_reg <= (others => '0'); - a2_reg <= (others => '0'); - b2_reg <= (others => '0'); - a3_reg <= (others => '0'); - b3_reg <= (others => '0'); - - rout_sig <= (others => '0'); - iout_sig <= (others => '0'); - - rout_reg <= (others => '0'); - iout_reg <= (others => '0'); - - in_val_reg <= '0'; - sig_val <= '0'; - out_val <= '0'; - - elsif clk'event and clk = '1' and clken = '1'then -- rising clock edge - a0_reg <= signed(in_ar); - b0_reg <= signed(in_br); - a1_reg <= signed(in_ai); - b1_reg <= signed(in_bi); - a2_reg <= signed(in_ai); - b2_reg <= signed(in_br); - a3_reg <= signed(in_ar); - b3_reg <= signed(in_bi); - - rout_sig <= RESIZE_NUM((a0_reg*b0_reg), c_sum_w) - RESIZE_NUM((a1_reg*b1_reg), c_sum_w); - iout_sig <= RESIZE_NUM((a2_reg*b2_reg), c_sum_w) + RESIZE_NUM((a3_reg*b3_reg), c_sum_w); - - rout_reg <= rout_sig; - iout_reg <= iout_sig; - - in_val_reg <= in_val; - sig_val <= in_val_reg; - out_val <= sig_val; - end if; - end process; - - out_pr <= RESIZE_SVEC(std_logic_vector(rout_reg), g_out_p_w); - out_pi <= RESIZE_SVEC(std_logic_vector(iout_reg), g_out_p_w); -end altera_rtl; +END str;