From a28a4232f9f66523b05b103ce76fa5e00ef3dd06 Mon Sep 17 00:00:00 2001
From: Eric Kooistra <kooistra@astron.nl>
Date: Thu, 28 Oct 2021 17:58:18 +0200
Subject: [PATCH] Add support for round half to even via g_round_even. Not yet
 fully tested. STill to do for s_round().

---
 libraries/base/common/src/vhdl/common_pkg.vhd | 118 +++++-
 .../base/common/src/vhdl/common_round.vhd     |   6 +-
 libraries/base/common/tb/vhdl/tb_round.vhd    | 397 ++++++++++++++----
 3 files changed, 407 insertions(+), 114 deletions(-)

diff --git a/libraries/base/common/src/vhdl/common_pkg.vhd b/libraries/base/common/src/vhdl/common_pkg.vhd
index cf03d8a7b2..a74053a4cd 100644
--- a/libraries/base/common/src/vhdl/common_pkg.vhd
+++ b/libraries/base/common/src/vhdl/common_pkg.vhd
@@ -464,12 +464,14 @@ PACKAGE common_pkg IS
   FUNCTION truncate_or_resize_uvec( vec : STD_LOGIC_VECTOR; b : BOOLEAN; w : NATURAL) RETURN STD_LOGIC_VECTOR;  -- when b=TRUE then truncate to width w, else resize to width w
   FUNCTION truncate_or_resize_svec( vec : STD_LOGIC_VECTOR; b : BOOLEAN; w : NATURAL) RETURN STD_LOGIC_VECTOR;  -- idem for signed values
   
-  FUNCTION s_round(   vec : STD_LOGIC_VECTOR; n : NATURAL; clip : BOOLEAN) RETURN STD_LOGIC_VECTOR;  -- remove n LSBits from vec by rounding away from 0, so result has width vec'LENGTH-n, and clip to avoid wrap
-  FUNCTION s_round(   vec : STD_LOGIC_VECTOR; n : NATURAL)                 RETURN STD_LOGIC_VECTOR;  -- remove n LSBits from vec by rounding away from 0, so result has width vec'LENGTH-n
-  FUNCTION s_round_up(vec : STD_LOGIC_VECTOR; n : NATURAL; clip : BOOLEAN) RETURN STD_LOGIC_VECTOR;  -- idem but round up to +infinity (s_round_up = u_round)
-  FUNCTION s_round_up(vec : STD_LOGIC_VECTOR; n : NATURAL)                 RETURN STD_LOGIC_VECTOR;  -- idem but round up to +infinity (s_round_up = u_round)
-  FUNCTION u_round(   vec : STD_LOGIC_VECTOR; n : NATURAL; clip : BOOLEAN) RETURN STD_LOGIC_VECTOR;  -- idem round up for unsigned values
-  FUNCTION u_round(   vec : STD_LOGIC_VECTOR; n : NATURAL)                 RETURN STD_LOGIC_VECTOR;  -- idem round up for unsigned values
+  FUNCTION s_round(   vec : STD_LOGIC_VECTOR; n : NATURAL; clip       : BOOLEAN) RETURN STD_LOGIC_VECTOR;  -- remove n LSBits from vec by rounding away from 0, so result has width vec'LENGTH-n, and clip to avoid wrap
+  FUNCTION s_round(   vec : STD_LOGIC_VECTOR; n : NATURAL)                       RETURN STD_LOGIC_VECTOR;  -- remove n LSBits from vec by rounding away from 0, so result has width vec'LENGTH-n
+  FUNCTION s_round(   vec : STD_LOGIC_VECTOR; n : NATURAL; clip, even : BOOLEAN) RETURN STD_LOGIC_VECTOR;  -- idem but round half to even for signed
+  FUNCTION s_round_up(vec : STD_LOGIC_VECTOR; n : NATURAL; clip       : BOOLEAN) RETURN STD_LOGIC_VECTOR;  -- idem but round up to +infinity (s_round_up = u_round)
+  FUNCTION s_round_up(vec : STD_LOGIC_VECTOR; n : NATURAL)                       RETURN STD_LOGIC_VECTOR;  -- idem but round up to +infinity (s_round_up = u_round)
+  FUNCTION u_round(   vec : STD_LOGIC_VECTOR; n : NATURAL; clip       : BOOLEAN) RETURN STD_LOGIC_VECTOR;  -- idem round up for unsigned values
+  FUNCTION u_round(   vec : STD_LOGIC_VECTOR; n : NATURAL)                       RETURN STD_LOGIC_VECTOR;  -- idem round up for unsigned values
+  FUNCTION u_round(   vec : STD_LOGIC_VECTOR; n : NATURAL; clip, even : BOOLEAN) RETURN STD_LOGIC_VECTOR;  -- idem but round half to even for unsigned
 
   FUNCTION u_to_s(u : NATURAL; w : NATURAL) RETURN INTEGER;   -- interpret w bit unsigned u as w bit   signed, and remove any MSbits
   FUNCTION s_to_u(s : INTEGER; w : NATURAL) RETURN NATURAL;   -- interpret w bit   signed s as w bit unsigned, and remove any MSbits
@@ -2313,8 +2315,52 @@ PACKAGE BODY common_pkg IS
     RETURN v_res;
   END;
   
-  
-  -- Functions s_round, s_round_up and u_round:
+  -------------------------------------------------------------------------------------------------
+  -- Rounding schemes:
+  -------------------------------------------------------------------------------------------------
+  --
+  -- From https://en.wikipedia.org/wiki/Rounding it follows that there are three main
+  -- catergories for rounding to integer:
+  --
+  -- 1) Direct rounding to integer :
+  --    . down           : y = floor(x),
+  --    . up             : y = ceil(x),
+  --    . towards zero   : y = truncate(x)
+  --                         = sgn(x) floor(|x|) = floor(x) when x >= 0, else ceil(x)
+  --    . away from zero : y = sgn(x) ceil(|x|) = ceil(x) when x >= 0, else floor(x)
+  -- 2) Rounding to nearest integer :
+  --    . half down           : y = ceil(x - 0.5),
+  --    . half up             : y = floor(x + 0.5),
+  --    . half towards zero   : y = sgn(x) ceil(|x| - 0.5) = ceil(x - 0.5) when x >= 0, else floor(x + 0.5)
+  --    . half away from zero : y = sgn(x) floor(|x| + 0.5) = floor(x + 0.5) when x >= 0, else ceil(x - 0.5)
+  --    . round half to even : rounds to the nearest even integer when fraction = 0.5 else use
+  --      either floor(x + 0.5) or ceil(x - 0.5), because they are quivalent then. This avoid
+  --      DC bias and bias towards or away from zero.
+  --
+  -- 3) Randomized rounding to an integer : round to nearest when fraction != 0.5, round up or
+  --    down when fraction = 0.5. This avoid DC bias and bias towards or away from zero:
+  --    . alternate tie breaking : alternately select round up or round down when fraction = 0.5
+  --        else either floor(x + 0.5) or ceil(x - 0.5), because they are equivalent then.
+  --    . random tie breaking : idem as alternate, but use random selection.
+  --    . stochastic rounding : round up or down with a probability that depends on proximity.
+  --      This avoids DC bias when the input is not random, e.g. when the input has a constant
+  --      fraction > 0. For DSP with ADC related data with sufficient dynamic range this does
+  --      not occur.
+  --
+  --  The advantage of round half to even, over round with tie breaking is, that for tie
+  --  breaking an external signal is needed to hold the alternate or random selector state,
+  --  whereas for round half to even the current input signal itself determines the
+  --  selection. Hence round half to even can be fully implemented in a function, whereas
+  --  a function for round using tie breaking requires an external state to manage the
+  --  selection.
+  --
+  --  * u_round() and s_round_up() use half up. This introduces +DC bias when fraction 0.5
+  --    occurs.
+  --  * s_round() uses half away from zero, similar as by round() in Matlab, Python, TCL.
+  --    This avoid DC bias, but does introduce bias away from zero, which can show as a up
+  --    bias in power values because (negative)**2 > 0 and (positive)**2 > 0.
+  --
+  -- Functions s_round(), s_round_up() and u_round():
   --
   -- . The returned output width is input width - n.
   -- . If n=0 then the return value is the same as the input value so only
@@ -2332,7 +2378,15 @@ PACKAGE BODY common_pkg IS
   --   maximum product is -8*-8=+64 <= 127-8, so wrapping due to rounding
   --   overflow will never occur.
 
-  FUNCTION s_round(vec : STD_LOGIC_VECTOR; n : NATURAL; clip : BOOLEAN) RETURN STD_LOGIC_VECTOR IS
+  FUNCTION s_round(vec : STD_LOGIC_VECTOR; n : NATURAL; clip, even : BOOLEAN) RETURN STD_LOGIC_VECTOR IS
+    -- # Round half to even algorithm:
+    -- #                vec: -3.5  -2.5  -1.5  -0.5  0.5  1.5  2.5  3.5
+    -- #   floor(vec + 0.5)  -4    -3    -2    -1    1    2    3    4
+    -- #   round even        -4    -2    -2     0    0    2    2    4
+    -- #   round even clip   -4    -2    -2     0    0    2    2    3, clip to c_clip = 3 when c_out_w = 3
+    -- v_out = floor(vec + 0.5)
+    -- if fraction = 0.5 and v_out = odd:
+    --   v_out -= 1
     -- Use SIGNED to avoid NATURAL (32 bit range) overflow error
     CONSTANT c_in_w  : NATURAL := vec'LENGTH;
     CONSTANT c_out_w : NATURAL := vec'LENGTH - n;
@@ -2360,6 +2414,11 @@ PACKAGE BODY common_pkg IS
     RETURN STD_LOGIC_VECTOR(v_out);
   END;
 
+  FUNCTION s_round(vec : STD_LOGIC_VECTOR; n : NATURAL; clip, even : BOOLEAN) RETURN STD_LOGIC_VECTOR IS
+  BEGIN
+    RETURN s_round(vec, n, clip, FALSE);  -- no round half to even
+  END;
+
   FUNCTION s_round(vec : STD_LOGIC_VECTOR; n : NATURAL) RETURN STD_LOGIC_VECTOR IS
   BEGIN
     RETURN s_round(vec, n, FALSE);  -- no round clip
@@ -2377,23 +2436,39 @@ PACKAGE BODY common_pkg IS
   END;
   
   -- Unsigned numbers are round up (almost same as s_round, but without the else on negative vec)
-  FUNCTION u_round(vec : STD_LOGIC_VECTOR; n : NATURAL; clip : BOOLEAN ) RETURN STD_LOGIC_VECTOR IS
+  FUNCTION u_round(vec : STD_LOGIC_VECTOR; n : NATURAL; clip, even : BOOLEAN) RETURN STD_LOGIC_VECTOR IS
+    -- # Round half to even algorithm:
+    -- #                vec: 0.5  1.5  2.5  3.5  4.5  5.5  6.5  7.5
+    -- #   floor(vec + 0.5)  1    2    3    4    5    6    7    8
+    -- #   round even        0    2    2    4    4    6    6    8
+    -- #   round even clip   0    2    2    4    4    6    6    7, clip to c_clip = 7 when c_out_w = 3
+    -- v_out = floor(vec + 0.5)
+    -- if fraction = 0.5 and v_out = odd:
+    --   v_out -= 1
+    --
     -- Use UNSIGNED to avoid NATURAL (32 bit range) overflow error
-    CONSTANT c_in_w  : NATURAL := vec'LENGTH;
-    CONSTANT c_out_w : NATURAL := vec'LENGTH - n;
-    CONSTANT c_one   : UNSIGNED(c_in_w-1 DOWNTO 0) := TO_UNSIGNED(1, c_in_w);
-    CONSTANT c_half  : UNSIGNED(c_in_w-1 DOWNTO 0) := SHIFT_LEFT(c_one, n-1);                        -- = 2**(n-1)
-    CONSTANT c_max   : UNSIGNED(c_in_w-1 DOWNTO 0) := UNSIGNED(c_slv1(c_in_w-1 DOWNTO 0)) - c_half;  -- = 2**c_in_w-1 - c_half  
-    CONSTANT c_clip  : UNSIGNED(c_out_w-1 DOWNTO 0) := UNSIGNED(c_slv1(c_out_w-1 DOWNTO 0));         -- = 2**c_out_w-1
-    VARIABLE v_in    : UNSIGNED(c_in_w-1 DOWNTO 0);
-    VARIABLE v_out   : UNSIGNED(c_out_w-1 DOWNTO 0);
+    CONSTANT c_in_w     : NATURAL := vec'LENGTH;
+    CONSTANT c_out_w    : NATURAL := vec'LENGTH - n;
+    CONSTANT c_one      : UNSIGNED(c_in_w-1 DOWNTO 0) := TO_UNSIGNED(1, c_in_w);
+    CONSTANT c_half     : UNSIGNED(c_in_w-1 DOWNTO 0) := SHIFT_LEFT(c_one, n-1);                        -- = 2**(n-1)
+    CONSTANT c_max      : UNSIGNED(c_in_w-1 DOWNTO 0) := UNSIGNED(c_slv1(c_in_w-1 DOWNTO 0)) - c_half;  -- = 2**c_in_w-1 - c_half
+    CONSTANT c_clip     : UNSIGNED(c_out_w-1 DOWNTO 0) := UNSIGNED(c_slv1(c_out_w-1 DOWNTO 0));         -- = 2**c_out_w-1
+    VARIABLE v_in       : UNSIGNED(c_in_w-1 DOWNTO 0);
+    VARIABLE v_fraction : UNSIGNED(n-1 DOWNTO 0);
+    VARIABLE v_out      : UNSIGNED(c_out_w-1 DOWNTO 0);
   BEGIN
     v_in := UNSIGNED(vec);
     IF n > 0 THEN
       IF clip = TRUE AND v_in > c_max THEN
         v_out := c_clip;                                              -- Round clip to +max to avoid wrap to 0
       ELSE
-        v_out := RESIZE_NUM(SHIFT_RIGHT(v_in + c_half, n), c_out_w);  -- Round up
+        v_out := RESIZE_NUM(SHIFT_RIGHT(v_in + c_half, n), c_out_w);  -- Round half up using floor(vec + 0.5)
+        IF even = TRUE THEN
+          v_fraction := UNSIGNED(vec(n-1 DOWNTO 0));
+          IF v_fraction = c_half AND v_out(0) = '1' THEN              -- Round half to even, so when odd subtract 1
+            v_out := v_out - 1;                                       -- to make v_out even
+          END IF;
+        END IF;
       END IF;
     ELSE
       v_out := RESIZE_NUM(v_in, c_out_w);                             -- NOP
@@ -2401,6 +2476,11 @@ PACKAGE BODY common_pkg IS
     RETURN STD_LOGIC_VECTOR(v_out);
   END;
 
+  FUNCTION u_round(vec : STD_LOGIC_VECTOR; n : NATURAL; clip : BOOLEAN) RETURN STD_LOGIC_VECTOR IS
+  BEGIN
+    RETURN u_round(vec, n, clip, FALSE);  -- no round half to even
+  END;
+
   FUNCTION u_round(vec : STD_LOGIC_VECTOR; n : NATURAL) RETURN STD_LOGIC_VECTOR IS
   BEGIN
     RETURN u_round(vec, n, FALSE);  -- no round clip
diff --git a/libraries/base/common/src/vhdl/common_round.vhd b/libraries/base/common/src/vhdl/common_round.vhd
index 60e57c440b..588ecaa385 100644
--- a/libraries/base/common/src/vhdl/common_round.vhd
+++ b/libraries/base/common/src/vhdl/common_round.vhd
@@ -36,12 +36,14 @@ ENTITY common_round IS
   --
   -- If the input comes from a product and is rounded to the input width then g_round_clip can safely be FALSE, because e.g. for unsigned
   -- 4b*4b=8b->4b the maximum product is 15*15=225 <= 255-8, so wrapping will never occur.
+  -- When g_round = FALSE then truncate (= remove) the LSbits and then g_round_clip and g_round_even are dont care.
   -- 
 
   GENERIC (
     g_representation  : STRING  := "SIGNED";  -- SIGNED (round +-0.5 away from zero to +- infinity) or UNSIGNED rounding (round 0.5 up to + inifinity)
     g_round           : BOOLEAN := TRUE;      -- when TRUE round the input, else truncate the input
     g_round_clip      : BOOLEAN := FALSE;     -- when TRUE clip rounded input >= +max to avoid wrapping to output -min (signed) or 0 (unsigned)
+    g_round_even      : BOOLEAN := FALSE;     -- when TRUE round even
     g_pipeline_input  : NATURAL := 0;         -- >= 0
     g_pipeline_output : NATURAL := 1;         -- >= 0, use g_pipeline_input=0 and g_pipeline_output=0 for combinatorial output
     g_in_dat_w        : NATURAL := 36;
@@ -90,10 +92,10 @@ BEGIN
   -- Decrease to out_dat width by c_remove_w number of LSbits
   -- . rounding
   gen_s : IF c_remove_w>0 AND g_round=TRUE AND g_representation="SIGNED" GENERATE
-    res_dat <= s_round(reg_dat, c_remove_w, g_round_clip);
+    res_dat <= s_round(reg_dat, c_remove_w, g_round_clip, g_round_even);
   END GENERATE;
   gen_u : IF c_remove_w>0 AND g_round=TRUE AND g_representation="UNSIGNED" GENERATE
-    res_dat <= u_round(reg_dat, c_remove_w, g_round_clip);
+    res_dat <= u_round(reg_dat, c_remove_w, g_round_clip, g_round_even);
   END GENERATE;
   -- . truncating
   gen_t : IF c_remove_w>0 AND g_round=FALSE GENERATE
diff --git a/libraries/base/common/tb/vhdl/tb_round.vhd b/libraries/base/common/tb/vhdl/tb_round.vhd
index 65ca3d57e9..51bbcbbfee 100644
--- a/libraries/base/common/tb/vhdl/tb_round.vhd
+++ b/libraries/base/common/tb/vhdl/tb_round.vhd
@@ -19,186 +19,397 @@
 --
 -------------------------------------------------------------------------------
 
+-- Author: E. Kooistra 2009, updated in 2021
+-- Purpose: Test bench for common_round.vhd
+-- Usage:
+-- > as 5
+--   and manually set the signed data output signals to radix decimal
+-- > run -a
+
 LIBRARY IEEE;
 USE IEEE.STD_LOGIC_1164.ALL;
 USE IEEE.NUMERIC_STD.ALL;
 USE work.common_pkg.ALL;
 
--- Purpose: Test bench for common_round.vhd
--- Usage:
--- > do wave_round.do
--- > run 500 ns
--- . Observe reg_dat with respect to out_sdat_no_rc   for signed   round without clipping of rounding overflow
--- . Observe reg_dat with respect to out_sdat_with_rc for signed   round with    clipping of rounding overflow
--- . Observe reg_dat with respect to out_udat for         unsigned round
--- . Observe reg_dat with respect to out_tdat for         truncation
-
 ENTITY tb_round IS
+  GENERIC (
+    g_in_dat_w        : NATURAL := 5;
+    g_out_dat_w       : NATURAL := 3
+  );
 END tb_round;
 
 ARCHITECTURE tb OF tb_round IS
 
   CONSTANT clk_period   : TIME    := 10 ns;
   
-  CONSTANT c_pipeline_input  : NATURAL := 0;
-  CONSTANT c_pipeline_output : NATURAL := 0;
-  CONSTANT c_pipeline        : NATURAL := c_pipeline_input + c_pipeline_output;
+  CONSTANT c_pipeline_input   : NATURAL := 0;
+  CONSTANT c_pipeline_output  : NATURAL := 0;
+  CONSTANT c_pipeline         : NATURAL := c_pipeline_input + c_pipeline_output;
   
-  --CONSTANT c_round_clip   : BOOLEAN := TRUE;
-  CONSTANT c_round_clip   : BOOLEAN := FALSE;
-  CONSTANT c_in_dat_w     : NATURAL := 5;
-  CONSTANT c_out_dat_w    : NATURAL := 3;
-  CONSTANT c_round_w      : NATURAL := c_in_dat_w - c_out_dat_w;
+  CONSTANT c_round_w          : NATURAL := g_in_dat_w - g_out_dat_w;
   
-  SIGNAL in_val           : STD_LOGIC;
-  SIGNAL in_dat           : STD_LOGIC_VECTOR(c_in_dat_w-1 DOWNTO 0);
-  SIGNAL in_vec           : STD_LOGIC_VECTOR(c_in_dat_w   DOWNTO 0);
-  SIGNAL reg_vec          : STD_LOGIC_VECTOR(c_in_dat_w   DOWNTO 0);
-  SIGNAL reg_val          : STD_LOGIC;
-  SIGNAL reg_dat          : STD_LOGIC_VECTOR(c_in_dat_w-1 DOWNTO 0);
-  
-  SIGNAL out_sdat_no_rc   : STD_LOGIC_VECTOR(c_out_dat_w-1 DOWNTO 0);
-  SIGNAL out_sdat_with_rc : STD_LOGIC_VECTOR(c_out_dat_w-1 DOWNTO 0);
-  SIGNAL out_udat         : STD_LOGIC_VECTOR(c_out_dat_w-1 DOWNTO 0);
-  SIGNAL out_tdat         : STD_LOGIC_VECTOR(c_out_dat_w-1 DOWNTO 0);  -- truncate
+  CONSTANT c_in_smax          : INTEGER :=  2**(g_in_dat_w-1) - 1;
+  CONSTANT c_in_smin          : INTEGER := -2**(g_in_dat_w-1);
+  CONSTANT c_in_half          : INTEGER :=  2**(c_round_w-1);
+  CONSTANT c_in_umax          : INTEGER :=  2**g_in_dat_w - 1;
+  CONSTANT c_in_umax_no_clip  : INTEGER :=  c_in_umax - c_in_half;
+
+  SIGNAL tb_end               : STD_LOGIC := '0';
+  SIGNAL clk                  : STD_LOGIC := '1';
+
+  SIGNAL in_val                    : STD_LOGIC;
+  SIGNAL in_dat                    : STD_LOGIC_VECTOR(g_in_dat_w-1 DOWNTO 0);
+  SIGNAL reg_val                   : STD_LOGIC;
+  SIGNAL reg_dat                   : STD_LOGIC_VECTOR(g_in_dat_w-1 DOWNTO 0);
   
-  SIGNAL tb_end           : STD_LOGIC := '0';
-  SIGNAL clk              : STD_LOGIC := '1';
-  SIGNAL rst              : STD_LOGIC := '1';
+  -- Signed output data
+  -- . view as radix decimal in Wave window
+  SIGNAL reg_sdat                  : STD_LOGIC_VECTOR(g_in_dat_w-1 DOWNTO 0);
+  SIGNAL round_sdat_rclip          : STD_LOGIC_VECTOR(g_out_dat_w-1 DOWNTO 0);
+  SIGNAL round_sdat_rclip_even     : STD_LOGIC_VECTOR(g_out_dat_w-1 DOWNTO 0);
+  SIGNAL round_sdat_no_rclip       : STD_LOGIC_VECTOR(g_out_dat_w-1 DOWNTO 0);
+  SIGNAL round_sdat_no_rclip_even  : STD_LOGIC_VECTOR(g_out_dat_w-1 DOWNTO 0);
+  SIGNAL trunc_sdat                : STD_LOGIC_VECTOR(g_out_dat_w-1 DOWNTO 0);
+
+  -- . show as real in Wave window
+  SIGNAL reg_sreal                         : REAL := 0.0;
+  SIGNAL round_sreal_rclip                 : REAL := 0.0;
+  SIGNAL round_sreal_rclip_even            : REAL := 0.0;
+  SIGNAL round_sreal_no_rclip              : REAL := 0.0;
+  SIGNAL round_sreal_no_rclip_even         : REAL := 0.0;
+  SIGNAL trunc_sreal                       : REAL := 0.0;
 
-  CONSTANT c_init         : STD_LOGIC_VECTOR(in_dat'RANGE) := (OTHERS=>'0');
+  -- . sum values to determine bias compared to input range
+  SIGNAL sum_reg_sreal                     : REAL;
+  SIGNAL sum_round_sreal_rclip             : REAL;
+  SIGNAL sum_round_sreal_rclip_even        : REAL;
+  SIGNAL sum_round_sreal_no_rclip          : REAL;
+  SIGNAL sum_round_sreal_no_rclip_even     : REAL;
+  SIGNAL sum_trunc_sreal                   : REAL;
+
+  -- Unsigned output data
+  -- . view as radix unsigned in Wave window
+  SIGNAL reg_udat                  : STD_LOGIC_VECTOR(g_in_dat_w-1 DOWNTO 0);
+  SIGNAL round_udat_rclip          : STD_LOGIC_VECTOR(g_out_dat_w-1 DOWNTO 0);
+  SIGNAL round_udat_rclip_even     : STD_LOGIC_VECTOR(g_out_dat_w-1 DOWNTO 0);
+  SIGNAL round_udat_no_rclip       : STD_LOGIC_VECTOR(g_out_dat_w-1 DOWNTO 0);
+  SIGNAL round_udat_no_rclip_even  : STD_LOGIC_VECTOR(g_out_dat_w-1 DOWNTO 0);
+  SIGNAL trunc_udat                : STD_LOGIC_VECTOR(g_out_dat_w-1 DOWNTO 0);
   
+  -- . show as real in Wave window
+  SIGNAL reg_ureal                         : REAL := 0.0;
+  SIGNAL round_ureal_rclip                 : REAL := 0.0;
+  SIGNAL round_ureal_rclip_even            : REAL := 0.0;
+  SIGNAL round_ureal_no_rclip              : REAL := 0.0;
+  SIGNAL round_ureal_no_rclip_even         : REAL := 0.0;
+  SIGNAL trunc_ureal                       : REAL := 0.0;
+
+  -- . sum values to determine bias compared to input range
+  SIGNAL sum_reg_ureal                     : REAL;
+  SIGNAL sum_round_ureal_rclip             : REAL;
+  SIGNAL sum_round_ureal_rclip_even        : REAL;
+  SIGNAL sum_round_ureal_no_rclip          : REAL;
+  SIGNAL sum_round_ureal_no_rclip_even     : REAL;
+  SIGNAL sum_trunc_ureal                   : REAL;
+
+  -- Hold sums
+  SIGNAL reg_sum_reg_sreal                 : REAL := 0.0;
+  SIGNAL reg_sum_round_sreal_rclip         : REAL := 0.0;
+  SIGNAL reg_sum_round_sreal_rclip_even    : REAL := 0.0;
+  SIGNAL reg_sum_round_sreal_no_rclip      : REAL := 0.0;
+  SIGNAL reg_sum_round_sreal_no_rclip_even : REAL := 0.0;
+  SIGNAL reg_sum_trunc_sreal               : REAL := 0.0;
+
+  SIGNAL reg_sum_reg_ureal                 : REAL := 0.0;
+  SIGNAL reg_sum_round_ureal_rclip         : REAL := 0.0;
+  SIGNAL reg_sum_round_ureal_rclip_even    : REAL := 0.0;
+  SIGNAL reg_sum_round_ureal_no_rclip      : REAL := 0.0;
+  SIGNAL reg_sum_round_ureal_no_rclip_even : REAL := 0.0;
+  SIGNAL reg_sum_trunc_ureal               : REAL := 0.0;
+
 BEGIN
 
   -- Stimuli
   clk <= NOT clk OR tb_end AFTER clk_period/2;
-  rst <= '1', '0' AFTER 3*clk_period;
   
-  -- Testbench end
-  p_tb_end : PROCESS
-    VARIABLE v_dat : STD_LOGIC_VECTOR(in_dat'RANGE);
+  p_stimuli : PROCESS
   BEGIN
-    tb_end <= '0';
-    WAIT UNTIL in_val='1';
-    WAIT UNTIL rising_edge(clk);
-    v_dat := in_dat;              -- keep first in_dat 
-    WAIT UNTIL rising_edge(clk);
-    WAIT UNTIL v_dat=in_dat;      -- wait until all incrementing in_dat values have been applied at least once
-    WAIT UNTIL rising_edge(clk);
-    WAIT UNTIL rising_edge(clk);
+    in_val <= '0';
+    in_dat <= (OTHERS=>'0');
+    FOR I IN 0 TO 3 LOOP WAIT UNTIL rising_edge(clk); END LOOP;
+    in_val <= '1';
     WAIT UNTIL rising_edge(clk);
+    FOR I IN 0 TO 2**g_in_dat_w - 1 LOOP
+      in_dat <= INCR_UVEC(in_dat, 1);
+      WAIT UNTIL rising_edge(clk);
+    END LOOP;
+    in_val <= '0';
+    in_dat <= (OTHERS=>'0');
+    FOR I IN 0 TO 3 LOOP WAIT UNTIL rising_edge(clk); END LOOP;
     tb_end <= '1';
     WAIT;
   END PROCESS;
-  
-  p_clk : PROCESS (rst, clk)
+
+  -- Sum fractions
+  -- . signed, skip c_in_smin from sum to have final input sum_reg_sreal = 0
+  sum_reg_sreal                 <= reg_sum_reg_sreal                 + sel_a_b(SIGNED(reg_sdat) = c_in_smin, 0.0, reg_sreal);                -- input
+  sum_round_sreal_no_rclip      <= reg_sum_round_sreal_no_rclip      + sel_a_b(SIGNED(reg_sdat) = c_in_smin, 0.0, round_sreal_no_rclip);     -- outputs
+  sum_round_sreal_rclip         <= reg_sum_round_sreal_rclip         + sel_a_b(SIGNED(reg_sdat) = c_in_smin, 0.0, round_sreal_rclip);
+  sum_round_sreal_no_rclip_even <= reg_sum_round_sreal_no_rclip_even + sel_a_b(SIGNED(reg_sdat) = c_in_smin, 0.0, round_sreal_no_rclip_even);
+  sum_round_sreal_rclip_even    <= reg_sum_round_sreal_rclip_even    + sel_a_b(SIGNED(reg_sdat) = c_in_smin, 0.0, round_sreal_rclip_even);
+  sum_trunc_sreal               <= reg_sum_trunc_sreal               + sel_a_b(SIGNED(reg_sdat) = c_in_smin, 0.0, trunc_sreal);
+
+  -- . unsigned, output sums get disturbed due to clip or wrap of round up overflow when UNSIGNED(reg_udat) > c_in_umax_no_clip
+  sum_reg_ureal                 <= reg_sum_reg_ureal                 + reg_ureal;             -- input
+  sum_round_ureal_no_rclip      <= reg_sum_round_ureal_no_rclip      + round_ureal_no_rclip;  -- outputs
+  sum_round_ureal_rclip         <= reg_sum_round_ureal_rclip         + round_ureal_rclip;
+  sum_round_ureal_no_rclip_even <= reg_sum_round_ureal_no_rclip_even + round_ureal_no_rclip_even;
+  sum_round_ureal_rclip_even    <= reg_sum_round_ureal_rclip_even    + round_ureal_rclip_even;
+  sum_trunc_ureal               <= reg_sum_trunc_ureal               + trunc_ureal;
+
+  p_reg : PROCESS(clk)
   BEGIN
-    IF rst='1' THEN
-      in_val      <= '0';
-      in_dat      <= c_init;
-    ELSIF rising_edge(clk) THEN
-      in_val      <= '1';
-      in_dat      <= STD_LOGIC_VECTOR(SIGNED(in_dat)+1);
+    IF rising_edge(clk) THEN
+      IF reg_val = '1' THEN
+        -- . signed
+        reg_sum_reg_sreal                 <= sum_reg_sreal;              -- input
+        reg_sum_round_sreal_no_rclip      <= sum_round_sreal_no_rclip;   -- outputs
+        reg_sum_round_sreal_rclip         <= sum_round_sreal_rclip;
+        reg_sum_round_sreal_no_rclip_even <= sum_round_sreal_no_rclip_even;
+        reg_sum_round_sreal_rclip_even    <= sum_round_sreal_rclip_even;
+        reg_sum_trunc_sreal               <= sum_trunc_sreal;
+
+        -- . unsigned
+        reg_sum_reg_ureal                 <= sum_reg_ureal;              -- input
+        reg_sum_round_ureal_no_rclip      <= sum_round_ureal_no_rclip;   -- outputs
+        reg_sum_round_ureal_rclip         <= sum_round_ureal_rclip;
+        reg_sum_round_ureal_no_rclip_even <= sum_round_ureal_no_rclip_even;
+        reg_sum_round_ureal_rclip_even    <= sum_round_ureal_rclip_even;
+        reg_sum_trunc_ureal               <= sum_trunc_ureal;
+      END IF;
     END IF;
   END PROCESS;
+
+  -- Delay input as much as DUT output, assume c_pipeline = 0
+  reg_val <= in_val;
+  reg_dat <= in_dat;
   
-  -- Delay input as much as DUT output
-  in_vec <= in_val & in_dat;
-  
-  u_pipe : ENTITY work.common_pipeline
+  reg_sdat <= reg_dat;
+  reg_udat <= reg_dat;
+
+  -----------------------------------------------------------------------------
+  -- SIGNED DUTs
+  -----------------------------------------------------------------------------
+
+  s_round_no_rclip : ENTITY work.common_round
   GENERIC MAP (
-    g_representation => "SIGNED",
-    g_pipeline       => c_pipeline,
-    g_in_dat_w       => c_in_dat_w+1,
-    g_out_dat_w      => c_in_dat_w+1
+    g_representation  => "SIGNED",
+    g_round           => TRUE,
+    g_round_clip      => FALSE,
+    g_pipeline_input  => c_pipeline_input,
+    g_pipeline_output => c_pipeline_output,
+    g_in_dat_w        => g_in_dat_w,
+    g_out_dat_w       => g_out_dat_w
   )
   PORT MAP (
-    clk     => clk,
-    in_dat  => in_vec,
-    out_dat => reg_vec
+    clk            => clk,
+    in_dat         => in_dat,
+    out_dat        => round_sdat_no_rclip
+  );
+
+  s_round_rclip : ENTITY work.common_round
+  GENERIC MAP (
+    g_representation  => "SIGNED",
+    g_round           => TRUE,
+    g_round_clip      => TRUE,
+    g_pipeline_input  => c_pipeline_input,
+    g_pipeline_output => c_pipeline_output,
+    g_in_dat_w        => g_in_dat_w,
+    g_out_dat_w       => g_out_dat_w
+  )
+  PORT MAP (
+    clk            => clk,
+    in_dat         => in_dat,
+    out_dat        => round_sdat_rclip
   );
   
-  reg_val <= reg_vec(c_in_dat_w);
-  reg_dat <= reg_vec(c_in_dat_w-1 DOWNTO 0);
-  
-  -- DUT for "SIGNED" round without clipping of rounding overflow
-  u_s_round : ENTITY work.common_round
+  s_round_no_rclip_even : ENTITY work.common_round
   GENERIC MAP (
     g_representation  => "SIGNED",
     g_round           => TRUE,
-    g_round_clip      => FALSE,
+    g_round_clip      => TRUE,
+    g_round_even      => TRUE,
     g_pipeline_input  => c_pipeline_input,
     g_pipeline_output => c_pipeline_output,
-    g_in_dat_w        => c_in_dat_w,
-    g_out_dat_w       => c_out_dat_w
+    g_in_dat_w        => g_in_dat_w,
+    g_out_dat_w       => g_out_dat_w
   )
   PORT MAP (
     clk            => clk,
-    clken          => '1',
     in_dat         => in_dat,
-    out_dat        => out_sdat_no_rc
+    out_dat        => round_sdat_no_rclip_even
   );
 
-  -- DUT for "SIGNED" round with clipping of rounding overflow
-  u_s_round_rc : ENTITY work.common_round
+  s_round_rclip_even : ENTITY work.common_round
   GENERIC MAP (
     g_representation  => "SIGNED",
     g_round           => TRUE,
     g_round_clip      => TRUE,
+    g_round_even      => TRUE,
     g_pipeline_input  => c_pipeline_input,
     g_pipeline_output => c_pipeline_output,
-    g_in_dat_w        => c_in_dat_w,
-    g_out_dat_w       => c_out_dat_w
+    g_in_dat_w        => g_in_dat_w,
+    g_out_dat_w       => g_out_dat_w
   )
   PORT MAP (
     clk            => clk,
-    clken          => '1',
     in_dat         => in_dat,
-    out_dat        => out_sdat_with_rc
+    out_dat        => round_sdat_rclip_even
   );
-  
-  -- DUT for "UNSIGNED" round
-  u_u_round : ENTITY work.common_round
+
+  s_truncate : ENTITY work.common_round
+  GENERIC MAP (
+    g_representation  => "SIGNED",
+    g_round           => FALSE,
+    g_round_clip      => FALSE,
+    g_pipeline_input  => c_pipeline_input,
+    g_pipeline_output => c_pipeline_output,
+    g_in_dat_w        => g_in_dat_w,
+    g_out_dat_w       => g_out_dat_w
+  )
+  PORT MAP (
+    clk            => clk,
+    in_dat         => in_dat,
+    out_dat        => trunc_sdat
+  );
+
+
+  -----------------------------------------------------------------------------
+  -- UNSIGNED DUTs
+  -----------------------------------------------------------------------------
+
+  u_round_no_rclip : ENTITY work.common_round
   GENERIC MAP (
     g_representation  => "UNSIGNED",
     g_round           => TRUE,
-    g_round_clip      => c_round_clip,
+    g_round_clip      => FALSE,
     g_pipeline_input  => c_pipeline_input,
     g_pipeline_output => c_pipeline_output,
-    g_in_dat_w        => c_in_dat_w,
-    g_out_dat_w       => c_out_dat_w
+    g_in_dat_w        => g_in_dat_w,
+    g_out_dat_w       => g_out_dat_w
   )
   PORT MAP (
     clk            => clk,
-    clken          => '1',
     in_dat         => in_dat,
-    out_dat        => out_udat
+    out_dat        => round_udat_no_rclip
   );
   
+  u_round_rclip : ENTITY work.common_round
+  GENERIC MAP (
+    g_representation  => "UNSIGNED",
+    g_round           => TRUE,
+    g_round_clip      => TRUE,
+    g_pipeline_input  => c_pipeline_input,
+    g_pipeline_output => c_pipeline_output,
+    g_in_dat_w        => g_in_dat_w,
+    g_out_dat_w       => g_out_dat_w
+  )
+  PORT MAP (
+    clk            => clk,
+    in_dat         => in_dat,
+    out_dat        => round_udat_rclip
+  );
+
+  u_round_no_rclip_even : ENTITY work.common_round
+  GENERIC MAP (
+    g_representation  => "UNSIGNED",
+    g_round           => TRUE,
+    g_round_clip      => FALSE,
+    g_round_even      => TRUE,
+    g_pipeline_input  => c_pipeline_input,
+    g_pipeline_output => c_pipeline_output,
+    g_in_dat_w        => g_in_dat_w,
+    g_out_dat_w       => g_out_dat_w
+  )
+  PORT MAP (
+    clk            => clk,
+    in_dat         => in_dat,
+    out_dat        => round_udat_no_rclip_even
+  );
+
+  u_round_rclip_even : ENTITY work.common_round
+  GENERIC MAP (
+    g_representation  => "UNSIGNED",
+    g_round           => TRUE,
+    g_round_clip      => TRUE,
+    g_round_even      => TRUE,
+    g_pipeline_input  => c_pipeline_input,
+    g_pipeline_output => c_pipeline_output,
+    g_in_dat_w        => g_in_dat_w,
+    g_out_dat_w       => g_out_dat_w
+  )
+  PORT MAP (
+    clk            => clk,
+    in_dat         => in_dat,
+    out_dat        => round_udat_rclip_even
+  );
+
   -- DUT for truncate
   u_truncate : ENTITY work.common_round
   GENERIC MAP (
     g_representation  => "UNSIGNED",
     g_round           => FALSE,
-    g_round_clip      => c_round_clip,
+    g_round_clip      => FALSE,
     g_pipeline_input  => c_pipeline_input,
     g_pipeline_output => c_pipeline_output,
-    g_in_dat_w        => c_in_dat_w,
-    g_out_dat_w       => c_out_dat_w
+    g_in_dat_w        => g_in_dat_w,
+    g_out_dat_w       => g_out_dat_w
   )
   PORT MAP (
     clk            => clk,
-    clken          => '1',
     in_dat         => in_dat,
-    out_dat        => out_tdat
+    out_dat        => trunc_udat
   );
   
-  
+  -- Observe fixed point SLV values as REAL
+  -- . signed
+  reg_sreal                 <= TO_SREAL(reg_sdat,                 c_round_w);
+  round_sreal_no_rclip      <= TO_SREAL(round_sdat_no_rclip,      0);
+  round_sreal_rclip         <= TO_SREAL(round_sdat_rclip,         0);
+  round_sreal_no_rclip_even <= TO_SREAL(round_sdat_no_rclip_even, 0);
+  round_sreal_rclip_even    <= TO_SREAL(round_sdat_rclip_even,    0);
+  trunc_sreal               <= TO_SREAL(trunc_sdat,               0);
+
+  -- . unsigned
+  reg_ureal                 <= TO_UREAL(reg_udat,                 c_round_w);
+  round_ureal_no_rclip      <= TO_UREAL(round_udat_no_rclip,      0);
+  round_ureal_rclip         <= TO_UREAL(round_udat_rclip,         0);
+  round_ureal_no_rclip_even <= TO_UREAL(round_udat_no_rclip_even, 0);
+  round_ureal_rclip_even    <= TO_UREAL(round_udat_rclip_even,    0);
+  trunc_ureal               <= TO_UREAL(trunc_udat,               0);
+
   -- Verification
   p_verify : PROCESS
   BEGIN
     WAIT UNTIL rising_edge(clk);
     IF reg_val = '1' THEN
+      IF c_round_w = 0 THEN
+        -- Without rounding the expected value is same as input value
+        -- . signed
+        ASSERT   SIGNED(round_sdat_no_rclip      ) =   SIGNED(reg_dat) REPORT "Wrong wired round_sdat_no_rclip"      SEVERITY ERROR;
+        ASSERT   SIGNED(round_sdat_rclip         ) =   SIGNED(reg_dat) REPORT "Wrong wired round_sdat_rclip"         SEVERITY ERROR;
+        ASSERT   SIGNED(round_sdat_no_rclip_even ) =   SIGNED(reg_dat) REPORT "Wrong wired round_sdat_no_rclip_even" SEVERITY ERROR;
+        ASSERT   SIGNED(round_sdat_rclip_even    ) =   SIGNED(reg_dat) REPORT "Wrong wired round_sdat_rclip_even"    SEVERITY ERROR;
+        ASSERT   SIGNED(trunc_sdat               ) =   SIGNED(reg_dat) REPORT "Wrong wired trunc_sdat"               SEVERITY ERROR;
+        -- . unsigned
+        ASSERT UNSIGNED(round_udat_no_rclip      ) = UNSIGNED(reg_dat) REPORT "Wrong wired round_udat_no_rclip"      SEVERITY ERROR;
+        ASSERT UNSIGNED(round_udat_rclip         ) = UNSIGNED(reg_dat) REPORT "Wrong wired round_udat_rclip"         SEVERITY ERROR;
+        ASSERT UNSIGNED(round_udat_no_rclip_even ) = UNSIGNED(reg_dat) REPORT "Wrong wired round_udat_no_rclip_even" SEVERITY ERROR;
+        ASSERT UNSIGNED(round_udat_rclip_even    ) = UNSIGNED(reg_dat) REPORT "Wrong wired round_udat_rclip_even"    SEVERITY ERROR;
+        ASSERT UNSIGNED(trunc_udat               ) = UNSIGNED(reg_dat) REPORT "Wrong wired trunc_udat"               SEVERITY ERROR;
+      ELSE
+        -- For reduced width compare unsigned with lowrange
+      END IF;
     END IF;
   END PROCESS;
 
-- 
GitLab