From 0f5367cf0148fcb28b308376a969e7ce6cc4c308 Mon Sep 17 00:00:00 2001
From: Zanting <zanting>
Date: Wed, 18 Nov 2015 11:20:06 +0000
Subject: [PATCH] Added Arria 10 multiplier (IP and RTL version)

---
 .../technology/ip_arria10/mult/hdllib.cfg     |  12 ++
 .../ip_arria10/mult/ip_arria10_mult.vhd       |  77 ++++++++++
 .../ip_arria10/mult/ip_arria10_mult_rtl.vhd   | 144 ++++++++++++++++++
 3 files changed, 233 insertions(+)
 create mode 100644 libraries/technology/ip_arria10/mult/hdllib.cfg
 create mode 100644 libraries/technology/ip_arria10/mult/ip_arria10_mult.vhd
 create mode 100644 libraries/technology/ip_arria10/mult/ip_arria10_mult_rtl.vhd

diff --git a/libraries/technology/ip_arria10/mult/hdllib.cfg b/libraries/technology/ip_arria10/mult/hdllib.cfg
new file mode 100644
index 0000000000..d4b2970091
--- /dev/null
+++ b/libraries/technology/ip_arria10/mult/hdllib.cfg
@@ -0,0 +1,12 @@
+hdl_lib_name = ip_arria10_mult	
+hdl_library_clause_name = ip_arria10_mult_lib
+hdl_lib_uses_synth = 
+hdl_lib_uses_sim = 
+
+hdl_lib_technology = ip_arria10
+
+synth_files =
+    ip_arria10_mult.vhd
+    ip_arria10_mult_rtl.vhd
+    
+test_bench_files =
diff --git a/libraries/technology/ip_arria10/mult/ip_arria10_mult.vhd b/libraries/technology/ip_arria10/mult/ip_arria10_mult.vhd
new file mode 100644
index 0000000000..4c08e19c97
--- /dev/null
+++ b/libraries/technology/ip_arria10/mult/ip_arria10_mult.vhd
@@ -0,0 +1,77 @@
+LIBRARY IEEE;
+USE ieee.std_logic_1164.ALL;
+USE ieee.numeric_std.ALL;
+
+LIBRARY lpm;
+USE lpm.lpm_components.ALL;
+
+-- Comments:
+-- . Directly instantiate LPM component, because MegaWizard does so too, see dsp_mult.vhd.
+-- . Use MegaWizard to learn more about the generics.
+-- . Strangely the MegaWizard does not support setting the rounding and saturation mode
+ ENTITY  ip_arria10_mult IS 
+  GENERIC (
+    g_in_a_w           : POSITIVE := 18;      -- Width of the data A port
+    g_in_b_w           : POSITIVE := 18;      -- Width of the data B port
+    g_out_p_w          : POSITIVE := 36;      -- Width of the result port
+--    g_out_s_w          : POSITIVE := 1;       -- Width of the sum port (not used in current designs)
+    g_nof_mult         : POSITIVE := 1;       -- using 2 for 18x18, 4 for 9x9 may yield better results when inferring * is used
+    g_pipeline_input   : NATURAL  := 1;        -- 0 or 1
+    g_pipeline_product : NATURAL  := 1;        -- 0 or 1
+    g_pipeline_output  : NATURAL  := 1;        -- >= 0
+    g_representation   : STRING   := "SIGNED"   -- or "UNSIGNED"
+  );
+  PORT (
+    clk        : IN  STD_LOGIC;
+    clken      : IN  STD_LOGIC := '1';
+--    aclr       : IN  STD_LOGIC := '0'; (not used in current designs)
+    in_a       : IN  STD_LOGIC_VECTOR(g_nof_mult*g_in_a_w-1 DOWNTO 0);
+    in_b       : IN  STD_LOGIC_VECTOR(g_nof_mult*g_in_b_w-1 DOWNTO 0);
+--    sum        : IN  STD_LOGIC_VECTOR(g_nof_mult*g_in_s_w-1 DOWNTO 0) := (OTHERS => '0'); (not used in current designs)
+    out_p      : OUT STD_LOGIC_VECTOR(g_nof_mult*(g_in_a_w+g_in_b_w)-1 DOWNTO 0)
+  );
+ END ip_arria10_mult;
+
+
+ARCHITECTURE str OF ip_arria10_mult IS
+  
+  CONSTANT c_pipeline : NATURAL := g_pipeline_input + g_pipeline_product + g_pipeline_output;
+  
+  -- When g_out_p_w < g_in_a_w+g_in_b_w then the LPM_MULT truncates the LSbits of the product. Therefore
+  -- define c_prod_w to be able to let common_mult truncate the LSBits of the product.
+  CONSTANT c_prod_w : NATURAL := g_in_a_w + g_in_b_w;
+  
+  SIGNAL prod  : STD_LOGIC_VECTOR(g_nof_mult*c_prod_w-1 DOWNTO 0);
+  
+BEGIN
+
+  gen_mult : FOR I IN 0 TO g_nof_mult-1 GENERATE
+    m : lpm_mult
+    GENERIC MAP (
+      lpm_hint => "MAXIMIZE_SPEED=5",   -- default "UNUSED"
+      lpm_pipeline => c_pipeline,
+      lpm_representation => g_representation,
+      lpm_type => "LPM_MULT",
+      lpm_widtha => g_in_a_w,
+      lpm_widthb => g_in_b_w,
+--      lpm_widths => g_in_s_w, (Partial sum input with not used in current designs)
+      lpm_widthp => c_prod_w
+    )
+    PORT MAP (
+      dataa => in_a((I+1)*g_in_a_w-1 DOWNTO I*g_in_a_w),
+      datab => in_b((I+1)*g_in_b_w-1 DOWNTO I*g_in_b_w),
+    --  sum   => sum((I+1)*g_in_s_w-1 DOWNTO I*g_in_s_w),  -- partial sum input is not used in current designs
+    --  aclr  => aclr,                                     -- async clear input is not used in current designs
+      clock => clk,
+      clken => clken,
+      result => prod((I+1)*c_prod_w-1 DOWNTO I*c_prod_w)
+    );
+
+    
+    out_p <= prod;
+---- Truncate MSbits, also for signed (common_pkg.vhd for explanation of RESIZE_SVEC)
+--    out_p((I+1)*g_out_p_w-1 DOWNTO I*g_out_p_w) <= RESIZE_SVEC(prod((I+1)*c_prod_w-1 DOWNTO I*c_prod_w), g_out_p_w) WHEN g_representation="SIGNED" ELSE
+--                                                   RESIZE_UVEC(prod((I+1)*c_prod_w-1 DOWNTO I*c_prod_w), g_out_p_w);
+  END GENERATE;
+  
+END str;
diff --git a/libraries/technology/ip_arria10/mult/ip_arria10_mult_rtl.vhd b/libraries/technology/ip_arria10/mult/ip_arria10_mult_rtl.vhd
new file mode 100644
index 0000000000..bbfc6f5975
--- /dev/null
+++ b/libraries/technology/ip_arria10/mult/ip_arria10_mult_rtl.vhd
@@ -0,0 +1,144 @@
+-------------------------------------------------------------------------------
+--
+-- Copyright (C) 2011
+-- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/>
+-- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
+--
+-- This program is free software: you can redistribute it and/or modify
+-- it under the terms of the GNU General Public License as published by
+-- the Free Software Foundation, either version 3 of the License, or
+-- (at your option) any later version.
+--
+-- This program is distributed in the hope that it will be useful,
+-- but WITHOUT ANY WARRANTY; without even the implied warranty of
+-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+-- GNU General Public License for more details.
+--
+-- You should have received a copy of the GNU General Public License
+-- along with this program.  If not, see <http://www.gnu.org/licenses/>.
+--
+-------------------------------------------------------------------------------
+
+LIBRARY IEEE;
+USE IEEE.std_logic_1164.ALL;
+USE IEEE.numeric_std.ALL;
+
+-- no support for rounding in this RTL architecture
+ ENTITY  ip_arria10_mult_rtl IS 
+  GENERIC (
+    g_in_a_w           : POSITIVE := 18;
+    g_in_b_w           : POSITIVE := 18;
+    g_out_p_w          : POSITIVE := 36;      -- c_prod_w = g_in_a_w+g_in_b_w, use smaller g_out_p_w to truncate MSbits, or larger g_out_p_w to extend MSbits
+    g_nof_mult         : POSITIVE := 1;       -- using 2 for 18x18, 4 for 9x9 may yield better results when inferring * is used
+    g_pipeline_input   : NATURAL  := 1;        -- 0 or 1
+    g_pipeline_product : NATURAL  := 1;        -- 0 or 1
+    g_pipeline_output  : NATURAL  := 1;        -- >= 0
+    g_representation   : STRING   := "SIGNED"   -- or "UNSIGNED"
+  );
+  PORT (
+    rst        : IN  STD_LOGIC;
+    clk        : IN  STD_LOGIC;
+    clken      : IN  STD_LOGIC := '1';
+    in_a       : IN  STD_LOGIC_VECTOR(g_nof_mult*g_in_a_w-1 DOWNTO 0);
+    in_b       : IN  STD_LOGIC_VECTOR(g_nof_mult*g_in_b_w-1 DOWNTO 0);
+    out_p      : OUT STD_LOGIC_VECTOR(g_nof_mult*(g_in_a_w+g_in_b_w)-1 DOWNTO 0)
+  );
+ END ip_arria10_mult_rtl;
+
+
+ARCHITECTURE str OF ip_arria10_mult_rtl IS
+
+  CONSTANT c_prod_w          : NATURAL := g_in_a_w+g_in_b_w;
+  
+  -- registers
+  SIGNAL reg_a         : STD_LOGIC_VECTOR(in_a'RANGE);
+  SIGNAL reg_b         : STD_LOGIC_VECTOR(in_b'RANGE);
+  SIGNAL reg_prod      : STD_LOGIC_VECTOR(g_nof_mult*c_prod_w-1 DOWNTO 0);
+  SIGNAL reg_result    : STD_LOGIC_VECTOR(out_p'RANGE);
+  
+  -- combinatorial
+  SIGNAL nxt_a         : STD_LOGIC_VECTOR(in_a'RANGE);
+  SIGNAL nxt_b         : STD_LOGIC_VECTOR(in_b'RANGE);
+  SIGNAL nxt_prod      : STD_LOGIC_VECTOR(g_nof_mult*c_prod_w-1 DOWNTO 0);
+  SIGNAL nxt_result    : STD_LOGIC_VECTOR(out_p'RANGE);
+  
+  -- the active signals
+  SIGNAL inp_a         : STD_LOGIC_VECTOR(in_a'RANGE);
+  SIGNAL inp_b         : STD_LOGIC_VECTOR(in_b'RANGE);
+  SIGNAL prod          : STD_LOGIC_VECTOR(g_nof_mult*c_prod_w-1 DOWNTO 0);   -- stage dependent on g_pipeline_product being 0 or 1
+  SIGNAL result        : STD_LOGIC_VECTOR(out_p'RANGE);                      -- stage dependent on g_pipeline_output  being 0 or 1
+  
+BEGIN
+
+  ------------------------------------------------------------------------------
+  -- Registers
+  ------------------------------------------------------------------------------
+  
+  -- Put all potential registers in a single process for optimal DSP inferrence
+  -- Use rst only if it is supported by the DSP primitive, else leave it at '0'
+  p_reg : PROCESS (rst, clk)
+  BEGIN
+    IF rst='1' THEN
+      reg_a      <= (OTHERS=>'0');
+      reg_b      <= (OTHERS=>'0');
+      reg_prod   <= (OTHERS=>'0');
+      reg_result <= (OTHERS=>'0');
+    ELSIF rising_edge(clk) THEN
+      IF clken='1' THEN
+        reg_a      <= nxt_a;
+        reg_b      <= nxt_b;
+        reg_prod   <= nxt_prod;
+        reg_result <= nxt_result;
+      END IF;
+    END IF;
+  END PROCESS;
+  
+  ------------------------------------------------------------------------------
+  -- Inputs
+  ------------------------------------------------------------------------------
+  
+  nxt_a <= in_a;
+  nxt_b <= in_b;
+  
+  no_input_reg : IF g_pipeline_input=0 GENERATE   -- wired
+    inp_a <= nxt_a;
+    inp_b <= nxt_b;
+  END GENERATE;
+  
+  gen_input_reg : IF g_pipeline_input>0 GENERATE  -- register input
+    inp_a <= reg_a;
+    inp_b <= reg_b;
+  END GENERATE;
+  
+  ------------------------------------------------------------------------------
+  -- Products
+  ------------------------------------------------------------------------------
+
+  gen_mult : FOR I IN 0 TO g_nof_mult-1 GENERATE  
+    nxt_prod((I+1)*c_prod_w-1 DOWNTO I*c_prod_w) <=
+      STD_LOGIC_VECTOR(  SIGNED(inp_a((I+1)*g_in_a_w-1 DOWNTO I*g_in_a_w)) *   SIGNED(inp_b((I+1)*g_in_b_w-1 DOWNTO I*g_in_b_w))) WHEN g_representation="SIGNED" ELSE
+      STD_LOGIC_VECTOR(UNSIGNED(inp_a((I+1)*g_in_a_w-1 DOWNTO I*g_in_a_w)) * UNSIGNED(inp_b((I+1)*g_in_b_w-1 DOWNTO I*g_in_b_w)));
+  END GENERATE;
+  
+  no_product_reg : IF g_pipeline_product=0 GENERATE   -- wired
+    prod <= nxt_prod;
+  END GENERATE;  
+  gen_product_reg : IF g_pipeline_product>0 GENERATE  -- register
+    prod <= reg_prod;
+  END GENERATE;
+
+  ------------------------------------------------------------------------------
+  -- Results
+  ------------------------------------------------------------------------------
+  nxt_result <= prod;
+
+  no_result_reg : IF g_pipeline_output=0 GENERATE   -- wired
+    result <= nxt_result;
+  END GENERATE;  
+  gen_result_reg : IF g_pipeline_output>0 GENERATE  -- register
+    result <= reg_result;
+  END GENERATE;
+
+out_p <= result;
+  
+END str;
-- 
GitLab