Skip to content
Snippets Groups Projects
Commit 92a45716 authored by Zanting's avatar Zanting
Browse files

Commit before move to new library common_mult

parent ceb794ec
Branches
No related tags found
No related merge requests found
......@@ -19,9 +19,10 @@
--
-------------------------------------------------------------------------------
LIBRARY IEEE;
LIBRARY IEEE, technology_lib, tech_mult_lib;
USE IEEE.std_logic_1164.ALL;
USE IEEE.numeric_std.ALL;
USE technology_lib.technology_select_pkg.ALL;
USE work.common_pkg.ALL;
--
......@@ -46,6 +47,8 @@ USE work.common_pkg.ALL;
ENTITY common_complex_mult IS
GENERIC (
g_technology : NATURAL := c_tech_select_default;
g_variant : STRING := "IP";
g_in_a_w : POSITIVE;
g_in_b_w : POSITIVE;
g_out_p_w : POSITIVE; -- default use g_out_p_w = g_in_a_w+g_in_b_w = c_prod_w
......@@ -71,70 +74,42 @@ ENTITY common_complex_mult IS
END common_complex_mult;
-------------------------------------------------------------------------------
-- str
-------------------------------------------------------------------------------
ARCHITECTURE str OF common_complex_mult IS
CONSTANT c_pipeline : NATURAL := g_pipeline_input + g_pipeline_product + g_pipeline_adder + g_pipeline_output;
CONSTANT c_re_add_sub : STRING := sel_a_b(g_conjugate_b, "ADD", "SUB");
CONSTANT c_im_add_sub : STRING := sel_a_b(g_conjugate_b, "SUB", "ADD");
SIGNAL in_a_ir : STD_LOGIC_VECTOR(2*g_in_a_w-1 DOWNTO 0);
SIGNAL in_a_ri : STD_LOGIC_VECTOR(2*g_in_a_w-1 DOWNTO 0);
SIGNAL in_b_ir : STD_LOGIC_VECTOR(2*g_in_b_w-1 DOWNTO 0);
-- MegaWizard IP ip_stratixiv_complex_mult was generated with latency c_dsp_latency = 3
CONSTANT c_dsp_latency : NATURAL := 3;
-- Extra output pipelining is only needed when c_pipeline > c_dsp_latency
CONSTANT c_pipeline_output : NATURAL := sel_a_b(c_pipeline>c_dsp_latency, c_pipeline-c_dsp_latency, 0);
-- Force to maximum 18 bit width, because:
-- . the ip_stratixiv_complex_mult is generated for 18b inputs and 36b output and then uses 4 real multipliers and no additional registers
-- . if one input > 18b then another IP needs to be regenerated and that will use 8 real multipliers and some extra LUTs and registers
-- . if both inputs > 18b then another IP needs to be regenerated and that will use 16 real multipliers and some extra LUTs and registers
-- . if the output is set to 18b+18b + 1b =37b to account for the sum then another IP needs to be regenerated and that will use some extra registers
-- ==> for inputs <= 18b this ip_stratixiv_complex_mult is appropriate and it can not be made parametrisable to fit also inputs > 18b.
CONSTANT c_dsp_dat_w : NATURAL := 18;
CONSTANT c_dsp_prod_w : NATURAL := 2*c_dsp_dat_w;
SIGNAL ar : STD_LOGIC_VECTOR(c_dsp_dat_w-1 DOWNTO 0);
SIGNAL ai : STD_LOGIC_VECTOR(c_dsp_dat_w-1 DOWNTO 0);
SIGNAL br : STD_LOGIC_VECTOR(c_dsp_dat_w-1 DOWNTO 0);
SIGNAL bi : STD_LOGIC_VECTOR(c_dsp_dat_w-1 DOWNTO 0);
SIGNAL mult_re : STD_LOGIC_VECTOR(c_dsp_prod_w-1 DOWNTO 0);
SIGNAL mult_im : STD_LOGIC_VECTOR(c_dsp_prod_w-1 DOWNTO 0);
SIGNAL result_re : STD_LOGIC_VECTOR(g_out_p_w-1 DOWNTO 0);
SIGNAL result_im : STD_LOGIC_VECTOR(g_out_p_w-1 DOWNTO 0);
BEGIN
in_a_ir <= in_ai & in_ar;
in_a_ri <= in_ar & in_ai;
in_b_ir <= in_bi & in_br;
re : ENTITY work.common_mult_add2(rtl)
GENERIC MAP (
g_in_a_w => g_in_a_w,
g_in_b_w => g_in_b_w,
g_res_w => g_out_p_w,
g_add_sub => c_re_add_sub, -- vector low part product + or - vector high part product
-- . "SUB" for a*b : ar*br - ai*bi --> a_ir - b_ir
-- . "ADD" for a*conj(b) : ar*br + ai*bi --> a_ir + b_ir
g_pipeline_input => g_pipeline_input,
g_pipeline_product => g_pipeline_product,
g_pipeline_adder => g_pipeline_adder,
g_pipeline_output => g_pipeline_output
)
PORT MAP (
clk => clk,
clken => clken,
in_a => in_a_ir,
in_b => in_b_ir,
res => out_pr
);
im : ENTITY work.common_mult_add2(rtl)
GENERIC MAP (
g_in_a_w => g_in_a_w,
g_in_b_w => g_in_b_w,
g_res_w => g_out_p_w,
g_add_sub => c_im_add_sub, -- vector low part product + or - vector high part product
-- . "ADD" for a*b : ai*br + ar*bi --> a_ri - b_ir
-- . "SUB" for a*conj(b) : ai*br - ar*bi --> a_ri + b_ir
g_pipeline_input => g_pipeline_input,
g_pipeline_product => g_pipeline_product,
g_pipeline_adder => g_pipeline_adder,
g_pipeline_output => g_pipeline_output
)
PORT MAP (
clk => clk,
clken => clken,
in_a => in_a_ri,
in_b => in_b_ir,
res => out_pi
);
-- User specificied latency must be >= MegaWizard IP dsp_mult_add2 latency
ASSERT c_pipeline >= c_dsp_latency
REPORT "tech_complex_mult(stratix4): pipeline value not supported"
SEVERITY FAILURE;
-- Propagate in_val with dsp latency
-- Propagate in_val with c_pipeline latency
u_out_val : ENTITY work.common_pipeline_sl
GENERIC MAP (
g_pipeline => c_pipeline
......@@ -147,219 +122,40 @@ BEGIN
out_dat => out_val
);
END ARCHITECTURE; -- str
-- Adapt DSP input widths
ar <= RESIZE_SVEC(in_ar, c_dsp_dat_w);
ai <= RESIZE_SVEC(in_ai, c_dsp_dat_w);
br <= RESIZE_SVEC(in_br, c_dsp_dat_w);
bi <= RESIZE_SVEC(in_bi, c_dsp_dat_w) WHEN g_conjugate_b=FALSE ELSE TO_SVEC(-TO_SINT(in_bi), c_dsp_dat_w);
-------------------------------------------------------------------------------
-- rtl
-------------------------------------------------------------------------------
ARCHITECTURE rtl OF common_complex_mult IS
CONSTANT c_pipeline : NATURAL := g_pipeline_input + g_pipeline_product + g_pipeline_adder + g_pipeline_output;
-- Extra output pipelining using common_pipeline is only needed when g_pipeline_output > 1
CONSTANT c_pipeline_output : NATURAL := sel_a_b(g_pipeline_output>0, g_pipeline_output-1, 0);
CONSTANT c_prod_w : NATURAL := g_in_a_w+g_in_b_w;
CONSTANT c_sum_w : NATURAL := c_prod_w+1;
CONSTANT c_re_add_sub : STRING := sel_a_b(g_conjugate_b, "ADD", "SUB");
CONSTANT c_im_add_sub : STRING := sel_a_b(g_conjugate_b, "SUB", "ADD");
-- registers
SIGNAL reg_ar : SIGNED(g_in_a_w-1 DOWNTO 0);
SIGNAL reg_ai : SIGNED(g_in_a_w-1 DOWNTO 0);
SIGNAL reg_br : SIGNED(g_in_b_w-1 DOWNTO 0);
SIGNAL reg_bi : SIGNED(g_in_b_w-1 DOWNTO 0);
SIGNAL reg_prod_ar_br : SIGNED(c_prod_w-1 DOWNTO 0); -- re
SIGNAL reg_prod_ai_bi : SIGNED(c_prod_w-1 DOWNTO 0);
SIGNAL reg_prod_ai_br : SIGNED(c_prod_w-1 DOWNTO 0); -- im
SIGNAL reg_prod_ar_bi : SIGNED(c_prod_w-1 DOWNTO 0);
SIGNAL reg_sum_re : SIGNED(c_sum_w-1 DOWNTO 0);
SIGNAL reg_sum_im : SIGNED(c_sum_w-1 DOWNTO 0);
SIGNAL reg_result_re : SIGNED(g_out_p_w-1 DOWNTO 0);
SIGNAL reg_result_im : SIGNED(g_out_p_w-1 DOWNTO 0);
-- combinatorial
SIGNAL nxt_ar : SIGNED(g_in_a_w-1 DOWNTO 0);
SIGNAL nxt_ai : SIGNED(g_in_a_w-1 DOWNTO 0);
SIGNAL nxt_br : SIGNED(g_in_b_w-1 DOWNTO 0);
SIGNAL nxt_bi : SIGNED(g_in_b_w-1 DOWNTO 0);
SIGNAL nxt_prod_ar_br : SIGNED(c_prod_w-1 DOWNTO 0); -- re
SIGNAL nxt_prod_ai_bi : SIGNED(c_prod_w-1 DOWNTO 0);
SIGNAL nxt_prod_ai_br : SIGNED(c_prod_w-1 DOWNTO 0); -- im
SIGNAL nxt_prod_ar_bi : SIGNED(c_prod_w-1 DOWNTO 0);
SIGNAL nxt_sum_re : SIGNED(c_sum_w-1 DOWNTO 0);
SIGNAL nxt_sum_im : SIGNED(c_sum_w-1 DOWNTO 0);
SIGNAL nxt_result_re : SIGNED(g_out_p_w-1 DOWNTO 0);
SIGNAL nxt_result_im : SIGNED(g_out_p_w-1 DOWNTO 0);
-- the active signals
SIGNAL ar : SIGNED(g_in_a_w-1 DOWNTO 0);
SIGNAL ai : SIGNED(g_in_a_w-1 DOWNTO 0);
SIGNAL br : SIGNED(g_in_b_w-1 DOWNTO 0);
SIGNAL bi : SIGNED(g_in_b_w-1 DOWNTO 0);
SIGNAL prod_ar_br : SIGNED(c_prod_w-1 DOWNTO 0); -- re
SIGNAL prod_ai_bi : SIGNED(c_prod_w-1 DOWNTO 0);
SIGNAL prod_ai_br : SIGNED(c_prod_w-1 DOWNTO 0); -- im
SIGNAL prod_ar_bi : SIGNED(c_prod_w-1 DOWNTO 0);
SIGNAL sum_re : SIGNED(c_sum_w-1 DOWNTO 0);
SIGNAL sum_im : SIGNED(c_sum_w-1 DOWNTO 0);
SIGNAL result_re : SIGNED(g_out_p_w-1 DOWNTO 0);
SIGNAL result_im : SIGNED(g_out_p_w-1 DOWNTO 0);
BEGIN
------------------------------------------------------------------------------
-- Registers
------------------------------------------------------------------------------
-- Put all potential registers in a single process for optimal DSP inferrence
-- Use rst only if it is supported by the DSP primitive, else leave it at '0'
p_reg : PROCESS (rst, clk)
BEGIN
IF rising_edge(clk) THEN
IF rst='1' THEN
reg_ar <= (OTHERS=>'0');
reg_ai <= (OTHERS=>'0');
reg_br <= (OTHERS=>'0');
reg_bi <= (OTHERS=>'0');
reg_prod_ar_br <= (OTHERS=>'0');
reg_prod_ai_bi <= (OTHERS=>'0');
reg_prod_ai_br <= (OTHERS=>'0');
reg_prod_ar_bi <= (OTHERS=>'0');
reg_sum_re <= (OTHERS=>'0');
reg_sum_im <= (OTHERS=>'0');
reg_result_re <= (OTHERS=>'0');
reg_result_im <= (OTHERS=>'0');
ELSIF clken='1' THEN
reg_ar <= nxt_ar; -- inputs
reg_ai <= nxt_ai;
reg_br <= nxt_br;
reg_bi <= nxt_bi;
reg_prod_ar_br <= nxt_prod_ar_br; -- products for re
reg_prod_ai_bi <= nxt_prod_ai_bi;
reg_prod_ai_br <= nxt_prod_ai_br; -- products for im
reg_prod_ar_bi <= nxt_prod_ar_bi;
reg_sum_re <= nxt_sum_re; -- sum
reg_sum_im <= nxt_sum_im;
reg_result_re <= nxt_result_re; -- result sum after optional register stage
reg_result_im <= nxt_result_im;
END IF;
END IF;
END PROCESS;
-- Propagate in_val with dsp latency
u_out_val : ENTITY work.common_pipeline_sl
u_complex_mult : ENTITY tech_mult_lib.tech_complex_mult
GENERIC MAP(
g_pipeline => c_pipeline
g_technology => g_technology,
g_variant => g_variant,
g_in_a_w => g_in_a_w,
g_in_b_w => g_in_b_w,
g_out_p_w => g_out_p_w,
g_conjugate_b => g_conjugate_b,
g_pipeline_input => g_pipeline_input,
g_pipeline_product => g_pipeline_product,
g_pipeline_adder => g_pipeline_adder,
g_pipeline_output => g_pipeline_output
)
PORT MAP(
rst => rst,
clk => clk,
clken => clken,
in_dat => in_val,
out_dat => out_val
in_ar => ar,
in_ai => ai,
in_br => br,
in_bi => bi,
out_pr => mult_re,
out_pi => mult_im
);
------------------------------------------------------------------------------
-- Inputs
------------------------------------------------------------------------------
nxt_ar <= SIGNED(in_ar);
nxt_ai <= SIGNED(in_ai);
nxt_br <= SIGNED(in_br);
nxt_bi <= SIGNED(in_bi);
no_input_reg : IF g_pipeline_input=0 GENERATE -- wired
ar <= nxt_ar;
ai <= nxt_ai;
br <= nxt_br;
bi <= nxt_bi;
END GENERATE;
gen_input_reg : IF g_pipeline_input>0 GENERATE -- register input
ar <= reg_ar;
ai <= reg_ai;
br <= reg_br;
bi <= reg_bi;
END GENERATE;
------------------------------------------------------------------------------
-- Products
------------------------------------------------------------------------------
nxt_prod_ar_br <= ar * br; -- products for re
nxt_prod_ai_bi <= ai * bi;
nxt_prod_ai_br <= ai * br; -- products for im
nxt_prod_ar_bi <= ar * bi;
no_product_reg : IF g_pipeline_product=0 GENERATE -- wired
prod_ar_br <= nxt_prod_ar_br;
prod_ai_bi <= nxt_prod_ai_bi;
prod_ai_br <= nxt_prod_ai_br;
prod_ar_bi <= nxt_prod_ar_bi;
END GENERATE;
gen_product_reg : IF g_pipeline_product>0 GENERATE -- register
prod_ar_br <= reg_prod_ar_br;
prod_ai_bi <= reg_prod_ai_bi;
prod_ai_br <= reg_prod_ai_br;
prod_ar_bi <= reg_prod_ar_bi;
END GENERATE;
------------------------------------------------------------------------------
-- Sum
------------------------------------------------------------------------------
-- Re
-- . "ADD" for a*conj(b) : ar*br + ai*bi
-- . "SUB" for a*b : ar*br - ai*bi
gen_re_add : IF c_re_add_sub = "ADD" GENERATE
nxt_sum_re <= RESIZE_NUM(prod_ar_br, c_sum_w) + prod_ai_bi;
END GENERATE;
gen_re_sub : IF c_re_add_sub = "SUB" GENERATE
nxt_sum_re <= RESIZE_NUM(prod_ar_br, c_sum_w) - prod_ai_bi;
END GENERATE;
-- Im
-- . "ADD" for a*b : ai*br + ar*bi
-- . "SUB" for a*conj(b) : ai*br - ar*bi
gen_im_add : IF c_im_add_sub = "ADD" GENERATE
nxt_sum_im <= RESIZE_NUM(prod_ai_br, c_sum_w) + prod_ar_bi;
END GENERATE;
gen_im_sub : IF c_im_add_sub = "SUB" GENERATE
nxt_sum_im <= RESIZE_NUM(prod_ai_br, c_sum_w) - prod_ar_bi;
END GENERATE;
no_adder_reg : IF g_pipeline_adder=0 GENERATE -- wired
sum_re <= nxt_sum_re;
sum_im <= nxt_sum_im;
END GENERATE;
gen_adder_reg : IF g_pipeline_adder>0 GENERATE -- register
sum_re <= reg_sum_re;
sum_im <= reg_sum_im;
END GENERATE;
------------------------------------------------------------------------------
-- Result sum after optional rounding
------------------------------------------------------------------------------
nxt_result_re <= RESIZE_NUM(sum_re, g_out_p_w);
nxt_result_im <= RESIZE_NUM(sum_im, g_out_p_w);
no_result_reg : IF g_pipeline_output=0 GENERATE -- wired
result_re <= nxt_result_re;
result_im <= nxt_result_im;
END GENERATE;
gen_result_reg : IF g_pipeline_output>0 GENERATE -- register
result_re <= reg_result_re;
result_im <= reg_result_im;
END GENERATE;
-- Back to true input widths and then resize for output width
result_re <= RESIZE_SVEC(mult_re, g_out_p_w);
result_im <= RESIZE_SVEC(mult_im, g_out_p_w);
------------------------------------------------------------------------------
-- Extra output pipelining
......@@ -393,166 +189,5 @@ BEGIN
out_dat => out_pi
);
END ARCHITECTURE; -- rtl
-------------------------------------------------------------------------------
-- rtl_dsp
-------------------------------------------------------------------------------
architecture rtl_dsp of common_complex_mult is
-- This architecture has:
-- . fixed latency of g_pipeline_input + g_pipeline_adder = 2 clock cycles
-- . fixed g_conjugate_b = false
CONSTANT c_pipeline : NATURAL := g_pipeline_input + g_pipeline_product + g_pipeline_adder + g_pipeline_output;
CONSTANT c_prod_w : NATURAL := in_ar'LENGTH + in_br'LENGTH; -- assume equal width for Re and im
CONSTANT c_sum_w : NATURAL := c_prod_w+1;
signal a_re : signed(in_ar'range) := (OTHERS=>'0');
signal a_im : signed(in_ai'range) := (OTHERS=>'0');
signal b_re : signed(in_br'range) := (OTHERS=>'0');
signal b_im : signed(in_bi'range) := (OTHERS=>'0');
signal sum_pr : signed(c_sum_w-1 DOWNTO 0);
signal sum_pi : signed(c_sum_w-1 DOWNTO 0);
signal dly_out_val : std_logic;
begin
-- Latency must be 2
ASSERT c_pipeline = 2
REPORT "common_complex_mult(rtl_dsp): pipeline value not supported"
SEVERITY FAILURE;
-- Conjugate not supported
ASSERT g_conjugate_b = FALSE
REPORT "common_complex_mult(rtl_dsp): conjugate input is not supported"
SEVERITY FAILURE;
p_CmplxMul: process(clk,rst)
begin
if rst='1' then
--a_re <= (OTHERS=>'0');
--a_im <= (OTHERS=>'0');
--b_re <= (OTHERS=>'0');
--b_im <= (OTHERS=>'0');
sum_pr <= (OTHERS=>'0');
sum_pi <= (OTHERS=>'0');
out_val <= '0';
dly_out_val <= '0';
elsif rising_edge(clk) then
a_re <= signed(in_ar);
a_im <= signed(in_ai);
b_re <= signed(in_br);
b_im <= signed(in_bi);
sum_pr <= RESIZE_NUM(a_re*b_re, c_sum_w) - RESIZE_NUM(a_im*b_im, c_sum_w);
sum_pi <= RESIZE_NUM(a_re*b_im, c_sum_w) + RESIZE_NUM(a_im*b_re, c_sum_w);
dly_out_val <= in_val;
out_val <= dly_out_val;
end if;
end process;
out_pr <= RESIZE_SVEC(std_logic_vector(sum_pr), g_out_p_w);
out_pi <= RESIZE_SVEC(std_logic_vector(sum_pi), g_out_p_w);
end rtl_dsp;
-------------------------------------------------------------------------------
-- altera_rtl
-------------------------------------------------------------------------------
architecture altera_rtl of common_complex_mult is
-- This architecture (by Raj Rajan Thilak) has:
-- . fixed latency of g_pipeline_input + g_pipeline_adder + g_pipeline_output = 3 clock cycles
-- . fixed g_conjugate_b = false
CONSTANT c_pipeline : NATURAL := g_pipeline_input + g_pipeline_product + g_pipeline_adder + g_pipeline_output;
CONSTANT c_prod_w : NATURAL := in_ar'LENGTH + in_br'LENGTH; -- assume equal width for Re and im
CONSTANT c_sum_w : NATURAL := c_prod_w+1;
signal a0_reg : signed(g_in_a_w-1 downto 0);
signal b0_reg : signed(g_in_b_w-1 downto 0);
signal a1_reg : signed(g_in_a_w-1 downto 0);
signal b1_reg : signed(g_in_b_w-1 downto 0);
signal a2_reg : signed(g_in_a_w-1 downto 0);
signal b2_reg : signed(g_in_b_w-1 downto 0);
signal a3_reg : signed(g_in_a_w-1 downto 0);
signal b3_reg : signed(g_in_b_w-1 downto 0);
signal rout_sig : signed(c_sum_w-1 downto 0);
signal iout_sig : signed(c_sum_w-1 downto 0);
signal rout_reg : signed(c_sum_w-1 downto 0);
signal iout_reg : signed(c_sum_w-1 downto 0);
signal in_val_reg : std_logic;
signal sig_val : std_logic;
begin
-- Latency must be 3
ASSERT c_pipeline = 3
REPORT "common_complex_mult(altera_rtl): pipeline value not supported"
SEVERITY FAILURE;
-- Conjugate not supported
ASSERT g_conjugate_b = FALSE
REPORT "common_complex_mult(altera_rtl): conjugate input is not supported"
SEVERITY FAILURE;
process (clk, rst, clken)
begin
if rst='1' then -- asynchronous reset
a0_reg <= (others => '0');
b0_reg <= (others => '0');
a1_reg <= (others => '0');
b1_reg <= (others => '0');
a2_reg <= (others => '0');
b2_reg <= (others => '0');
a3_reg <= (others => '0');
b3_reg <= (others => '0');
rout_sig <= (others => '0');
iout_sig <= (others => '0');
rout_reg <= (others => '0');
iout_reg <= (others => '0');
in_val_reg <= '0';
sig_val <= '0';
out_val <= '0';
elsif clk'event and clk = '1' and clken = '1'then -- rising clock edge
a0_reg <= signed(in_ar);
b0_reg <= signed(in_br);
a1_reg <= signed(in_ai);
b1_reg <= signed(in_bi);
a2_reg <= signed(in_ai);
b2_reg <= signed(in_br);
a3_reg <= signed(in_ar);
b3_reg <= signed(in_bi);
rout_sig <= RESIZE_NUM((a0_reg*b0_reg), c_sum_w) - RESIZE_NUM((a1_reg*b1_reg), c_sum_w);
iout_sig <= RESIZE_NUM((a2_reg*b2_reg), c_sum_w) + RESIZE_NUM((a3_reg*b3_reg), c_sum_w);
rout_reg <= rout_sig;
iout_reg <= iout_sig;
in_val_reg <= in_val;
sig_val <= in_val_reg;
out_val <= sig_val;
end if;
end process;
out_pr <= RESIZE_SVEC(std_logic_vector(rout_reg), g_out_p_w);
out_pi <= RESIZE_SVEC(std_logic_vector(iout_reg), g_out_p_w);
end altera_rtl;
END str;
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment