diff --git a/applications/apertif/designs/apertif_unb1_fn_beamformer/hdllib.cfg b/applications/apertif/designs/apertif_unb1_fn_beamformer/hdllib.cfg
new file mode 100644
index 0000000000000000000000000000000000000000..3be48cde3e344b0025b31f6f831be507e5fc2e8b
--- /dev/null
+++ b/applications/apertif/designs/apertif_unb1_fn_beamformer/hdllib.cfg
@@ -0,0 +1,31 @@
+hdl_lib_name = apertif_unb1_fn_beamformer
+hdl_library_clause_name = apertif_unb1_fn_beamformer_lib
+hdl_lib_uses_synth = common technology tech_mac_10g tr_10GbE mm i2c unb1_board bf apertif tech_ddr io_ddr diag fringe_stop ss
+hdl_lib_technology = ip_stratixiv
+#hdl_lib_include_ip = ip_stratixiv_ddr3_uphy_4g_single_rank_800_master
+
+synth_files =
+    ../../../../libraries/dsp/bf/designs/unb1_fn_bf/src/vhdl/node_unb1_fn_bf.vhd
+    $HDL_BUILD_DIR/unb1/quartus/apertif_unb1_fn_beamformer/sopc_apertif_unb1_fn_beamformer.vhd
+    src/vhdl/apertif_unb1_fn_beamformer_pkg.vhd
+    src/vhdl/apertif_unb1_fn_beamformer_udp_offload.vhd
+    src/vhdl/mmm_apertif_unb1_fn_beamformer.vhd
+    src/vhdl/node_apertif_unb1_fn_beamformer.vhd
+    src/vhdl/apertif_unb1_fn_beamformer.vhd                   # Hajee toplevel
+    src/vhdl/node_apertif_unb1_fn_beamformer_transpose.vhd
+    src/vhdl/node_apertif_unb1_fn_beamformer_output.vhd
+    src/vhdl/apertif_unb1_fn_beamformer_nodes.vhd             # Hierarchical toplevel using node_* instances
+
+test_bench_files =
+    tb/vhdl/tb_apertif_unb1_fn_beamformer.vhd
+    tb/vhdl/tb_apertif_unb1_fn_beamformer_nodes.vhd
+
+regression_test_vhdl = 
+    tb/vhdl/tb_apertif_unb1_fn_beamformer_nodes.vhd
+
+[modelsim_project_file]
+modelsim_copy_files = src/hex hex
+
+[quartus_project_file]
+quartus_copy_files = quartus/sopc_apertif_unb1_fn_beamformer.sopc .
+                     src/hex hex
diff --git a/applications/apertif/designs/apertif_unb1_fn_beamformer/tb/vhdl/tb_apertif_unb1_fn_beamformer_nodes.vhd b/applications/apertif/designs/apertif_unb1_fn_beamformer/tb/vhdl/tb_apertif_unb1_fn_beamformer_nodes.vhd
new file mode 100644
index 0000000000000000000000000000000000000000..0f1c38f984082fda70c169d59a133e1b668ccc96
--- /dev/null
+++ b/applications/apertif/designs/apertif_unb1_fn_beamformer/tb/vhdl/tb_apertif_unb1_fn_beamformer_nodes.vhd
@@ -0,0 +1,295 @@
+-------------------------------------------------------------------------------
+--
+-- Copyright (C) 2017
+-- ASTRON (Netherlands Institute for Radio Astronomy) <http://www.astron.nl/>
+-- JIVE (Joint Institute for VLBI in Europe) <http://www.jive.nl/>
+-- P.O.Box 2, 7990 AA Dwingeloo, The Netherlands
+--
+-- This program is free software: you can redistribute it and/or modify
+-- it under the terms of the GNU General Public License as published by
+-- the Free Software Foundation, either version 3 of the License, or
+-- (at your option) any later version.
+--
+-- This program is distributed in the hope that it will be useful,
+-- but WITHOUT ANY WARRANTY; without even the implied warranty of
+-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+-- GNU General Public License for more details.
+--
+-- You should have received a copy of the GNU General Public License
+-- along with this program.  If not, see <http://www.gnu.org/licenses/>.
+--
+-------------------------------------------------------------------------------
+
+-- Author: Eric Kooistra
+-- Purpose: Testbench for apertif_unb1_fn_beamformer_nodes.vhd
+-- Description:
+--   The testbench uses MM file IO to control the stimuli and monitor the
+--   results.
+--
+-- * Transpose input BG:
+--   The transpose input BG hex files are created for 2 polarizations and per
+--   BF unit, so in total 2 pol * 16 FN bands * 4 BF units = 128 hex files:
+--     > cd applications/apertif/designs/apertif_unb1_fn_beamformer/src/python/
+--     > python gen_hex_files_block_gen_transpose_input.py
+--   The im part contains the file number (0:63 for X and 65:127 for Y) and
+--   the real part contains the beamlet index within a block (0:255). Hence
+--   e.g. for FN 0 the transpose input BG creates:
+--      pol FN=band  bf unit   beamlet [bu]
+--       0    0        0       0:255
+--       0    0        1       0:255
+--       0    0        2       0:255
+--       0    0        3       0:255
+--       0    1        4       0:255
+--       0    1        5       0:255
+--       0    1        6       0:255
+--       0    1        7       0:255
+--       ...........................
+--       0   15       60       0:255
+--       0   15       61       0:255
+--       0   15       62       0:255
+--       0   15       63       0:255
+--       1  idem, but bf unit index 64:127
+--   --> input BG sosi.im = bf unit index
+--       input BG sosi.re = local beamlet index [bu]       
+-- Remarks:
+-- * Warning: (vsim-8607) nofile(-1): Non-positive replication multiplier inside concat. Replication will be ignored.
+--   --> this warning can be ignored, it appears due to ip_stratixiv_mac_10g in tech_mac_10g_stratixiv in tr_10GbE
+--
+-- Usage:
+--   > as 10
+--   > run 180 us
+--   View testbench dbg_* signals and internal apertif_unb1_fn_beamformer_nodes/dbg_* and reorder_transpose/dbg_* signals
+--   with radix hexadecimal in Wave window
+
+LIBRARY IEEE, common_lib, unb1_board_lib, i2c_lib, tech_ddr_lib, bf_lib, mm_lib;
+USE IEEE.std_logic_1164.ALL;
+USE IEEE.numeric_std.ALL;
+USE common_lib.common_pkg.ALL;
+USE common_lib.common_str_pkg.ALL;
+USE unb1_board_lib.unb1_board_pkg.ALL;
+USE common_lib.tb_common_pkg.ALL; 
+USE tech_ddr_lib.tech_ddr_pkg.ALL;
+USE bf_lib.bf_pkg.ALL;
+USE mm_lib.mm_file_pkg.ALL;
+USE mm_lib.mm_file_unb_pkg.ALL;
+
+ENTITY tb_apertif_unb1_fn_beamformer_nodes IS
+END tb_apertif_unb1_fn_beamformer_nodes;
+
+ARCHITECTURE tb OF tb_apertif_unb1_fn_beamformer_nodes IS
+
+  CONSTANT c_sim_unb_nr      : NATURAL := 0; -- UniBoard 0:3
+  CONSTANT c_sim_node_nr     : NATURAL := 0; -- Front node 0:3
+  
+  CONSTANT c_id              : STD_LOGIC_VECTOR(7 DOWNTO 0) := TO_UVEC(c_sim_unb_nr, c_unb1_board_nof_uniboard_w) & TO_UVEC(c_sim_node_nr, c_unb1_board_nof_chip_w); 
+  CONSTANT c_version         : STD_LOGIC_VECTOR(1 DOWNTO 0) := "00";
+  CONSTANT c_fw_version      : t_unb1_board_fw_version := (1, 0);
+
+  CONSTANT c_tb_clk_period   : TIME := 100 ps;   -- use fast tb_clk and internal mm_clk to speed up M&C
+  CONSTANT c_ext_clk_period  : TIME := 5 ns; 
+  CONSTANT c_eth_clk_period  : TIME := 40 ns;  -- 25 MHz XO on UniBoard
+  CONSTANT c_sa_clk_period   : TIME := 6.4 ns;  
+  CONSTANT c_pps_period      : NATURAL := 1000;
+
+  -- DUT
+  SIGNAL ext_clk             : STD_LOGIC := '0';
+  SIGNAL pps                 : STD_LOGIC := '0';
+  SIGNAL pps_rst             : STD_LOGIC := '0';
+  SIGNAL sa_clk              : STD_LOGIC := '1';
+  SIGNAL sb_clk              : STD_LOGIC := '1';
+
+  SIGNAL WDI                 : STD_LOGIC;
+  SIGNAL INTA                : STD_LOGIC;
+  SIGNAL INTB                : STD_LOGIC;
+
+  SIGNAL eth_clk             : STD_LOGIC := '0';
+  SIGNAL eth_txp             : STD_LOGIC;
+  SIGNAL eth_rxp             : STD_LOGIC;
+  
+  SIGNAL VERSION             : STD_LOGIC_VECTOR(c_unb1_board_aux.version_w-1 DOWNTO 0) := c_version; 
+  SIGNAL ID                  : STD_LOGIC_VECTOR(c_unb1_board_aux.id_w-1 DOWNTO 0)      := c_id;
+  SIGNAL TESTIO              : STD_LOGIC_VECTOR(c_unb1_board_aux.testio_w-1 DOWNTO 0);
+
+  SIGNAL sens_scl            : STD_LOGIC;
+  SIGNAL sens_sda            : STD_LOGIC;
+  SIGNAL si_fn_0_tx          : STD_LOGIC_VECTOR(c_unb1_board_ci.tr.bus_w-1 DOWNTO 0) := (OTHERS => '0');  
+  SIGNAL fn_bn_0_tx          : STD_LOGIC_VECTOR(c_unb1_board_tr_mesh.bus_w-1 DOWNTO 0) := (OTHERS => '0');  
+  
+  SIGNAL phy_in_x            : t_tech_ddr3_phy_in := c_tech_ddr3_phy_in_x;  -- use internal DDR3 model instead
+  
+  -- tb
+  CONSTANT c_nof_block_per_sync : NATURAL := 32;  -- multiple of N_pre_tranpose = 16
+  CONSTANT c_block_size_out     : NATURAL := 176;
+  CONSTANT c_transport_w        : NATURAL := 8;
+  
+  SIGNAL tb_end                : STD_LOGIC := '0';
+  SIGNAL tb_clk                : STD_LOGIC := '0';
+  SIGNAL rd_data_ppsh          : STD_LOGIC_VECTOR(c_32-1 DOWNTO 0);
+  SIGNAL pps_level             : STD_LOGIC := '0';
+  SIGNAL rd_data_db            : STD_LOGIC_VECTOR(c_32-1 DOWNTO 0);
+  SIGNAL nof_rd_data_db_X      : NATURAL := 0;
+  
+  -- MM  
+  CONSTANT c_cross_clock_domain_delay         : NATURAL := 50;  -- ext_clk cycles, assuming internal mm_clk is faster than ext_clk in sim
+  CONSTANT c_mm_file_reg_ppsh                 : STRING := mmf_unb_file_prefix(c_sim_unb_nr, c_sim_node_nr) & "PIO_PPS";
+  CONSTANT c_mm_file_ram_diag_data_buf_output : STRING := mmf_unb_file_prefix(c_sim_unb_nr, c_sim_node_nr) & "RAM_DIAG_DATA_BUFFER_OUTPUT";
+
+BEGIN
+
+  ----------------------------------------------------------------------------
+  -- System setup
+  ----------------------------------------------------------------------------
+  tb_clk  <= NOT tb_clk  OR tb_end AFTER c_tb_clk_period/2;   -- Testbench MM clock
+  ext_clk <= NOT ext_clk OR tb_end AFTER c_ext_clk_period/2;  -- External clock (200 MHz)
+  eth_clk <= NOT eth_clk OR tb_end AFTER c_eth_clk_period/2;  -- Ethernet ref clock (25 MHz)
+  sa_clk  <= NOT sa_clk  OR tb_end AFTER c_sa_clk_period/2;
+  sb_clk  <= NOT sb_clk  OR tb_end AFTER c_sa_clk_period/2;
+  
+  INTA <= 'H';  -- pull up
+  INTB <= 'H';  -- pull up
+
+  sens_scl <= 'H';  -- pull up
+  sens_sda <= 'H';  -- pull up
+
+  ------------------------------------------------------------------------------
+  -- External PPS
+  ------------------------------------------------------------------------------  
+  proc_common_gen_pulse(1, c_pps_period, '1', pps_rst, ext_clk, pps);
+  
+  ------------------------------------------------------------------------------
+  -- DUT
+  ------------------------------------------------------------------------------
+  u_apertif_unb1_fn_beamformer_nodes : ENTITY work.apertif_unb1_fn_beamformer_nodes
+  GENERIC MAP (
+    g_design_name        => "apertif_unb1_fn_beamformer_bg_trans",
+    g_design_note        => "simulation",
+    g_sim                => TRUE,
+    g_sim_unb_nr         => c_sim_unb_nr,
+    g_sim_node_nr        => c_sim_node_nr,
+    g_bf                 => c_bf,
+    g_nof_block_per_sync => c_nof_block_per_sync
+  )
+  PORT MAP (
+    -- GENERAL
+    CLK         => ext_clk,
+    PPS         => pps,
+    WDI         => WDI,
+    INTA        => INTA,
+    INTB        => INTB,
+
+    -- Others
+    VERSION     => VERSION,
+    ID          => ID,
+    TESTIO      => TESTIO,
+
+    -- I2C Interface to Sensors
+    sens_sc     => sens_scl,
+    sens_sd     => sens_sda,
+
+    -- 1GbE Control Interface
+    ETH_clk     => eth_clk,
+    ETH_SGIN    => eth_rxp,
+    ETH_SGOUT   => eth_txp,
+    
+    -- Transceiver clocks
+    SA_CLK      => sa_clk,
+    SB_CLK      => sb_clk,
+
+    -- Mesh Serial I/O
+    FN_BN_0_RX  => fn_bn_0_tx,
+    FN_BN_1_RX  => fn_bn_0_tx,
+    FN_BN_2_RX  => fn_bn_0_tx,
+    FN_BN_3_RX  => fn_bn_0_tx,
+
+    -- Front Serial I/O
+    SI_FN_0_RX  => si_fn_0_tx,
+    SI_FN_1_RX  => si_fn_0_tx,
+    SI_FN_2_RX  => si_fn_0_tx,
+    SI_FN_3_RX  => si_fn_0_tx,
+    
+    MB_I_in     => phy_in_x,
+    MB_I_io     => OPEN, 
+    MB_I_ou     => OPEN
+  );  
+  
+  p_mm_ppsh : PROCESS
+  BEGIN
+    -- Wait for PPS (not necessary, but leave this code in as an example of how to wait for PPS)
+    mmf_mm_bus_rd(c_mm_file_reg_ppsh, 0, rd_data_ppsh, ext_clk);
+    pps_level <= rd_data_ppsh(31);                              -- read PPS level
+    proc_common_wait_some_cycles(ext_clk, 1);
+    WHILE pps_level=rd_data_ppsh(31) LOOP
+      mmf_mm_bus_rd(c_mm_file_reg_ppsh, 0, rd_data_ppsh, ext_clk);
+    END LOOP;
+    pps_level <= rd_data_ppsh(31);                              -- keep PPS level to mark change level event
+    
+    WAIT;
+  END PROCESS;
+  
+  p_mm_diag_data_buf_output : PROCESS
+    CONSTANT N_clk        : NATURAL := c_bf.nof_weights;      -- = 256
+    CONSTANT N_blk        : NATURAL := c_block_size_out;      -- = 176 (8 bit mode), 240 (6 bit mode)
+    CONSTANT N_int_x      : NATURAL := c_nof_block_per_sync;  -- = e.g. 32 in sim, 800000 on hw (multiple of N_pre_tranpose = 16)
+    CONSTANT Q_interleave : NATURAL := 2;                     -- = 8/4 = nof_pn / P_BF
+    
+    CONSTANT c_output_db_nof_words       : NATURAL := N_int_x * N_blk;
+    CONSTANT c_output_db_nof_words_pow2  : NATURAL := true_log_pow2(c_output_db_nof_words);
+  
+    CONSTANT c_max_X    : NATURAL := 4;
+    
+    VARIABLE v_I        : NATURAL;
+    VARIABLE v_exp_re   : NATURAL;
+    VARIABLE v_exp_im   : NATURAL;
+    VARIABLE v_exp_data : NATURAL;
+    VARIABLE v_rd_data  : NATURAL;
+  BEGIN    
+    -- Wait for DB to have filled with beamlet data from first transpose sync interval
+    proc_common_wait_until_time(ext_clk, 150 us);
+    
+    ----------------------------------------------------------------------------
+    -- Read and verify transpose output data buffer
+    ----------------------------------------------------------------------------
+    -- Verify transpose output DB
+    --   Transpose input per BF unit is:
+    --       0 1 2 3 4 ................................................. 175 : 255
+    --       <-------------------------------------------------------------------> N_clk = 255
+    --   Expected transpose via DDR3 output per BF unit is:
+    --       0 1 0 1 ... 0 1 2 3 2 3 ... 2 3     ...                 174 175
+    --       <-------------------------------------------------------------> N_blk = 176
+    --       <-------------> <------------->                 <-------------> N_int_x = 32 in sim, 800000 on hw
+    --       <-> <-> <->                                                 <-> Q_interleave = 2
+    FOR u IN 0 TO c_bf.nof_bf_units-1 LOOP
+      v_exp_im := u;
+      v_I := 0;
+      FOR bui IN 0 TO N_blk/Q_interleave-1 LOOP
+        FOR t IN 0 TO N_int_x-1 LOOP
+          FOR q IN 0 TO Q_interleave-1 LOOP
+            -- Expected data
+            v_exp_re := bui * Q_interleave + q;
+            v_exp_data := v_exp_im * 2**c_transport_w + v_exp_re;
+            -- MM read data
+            mmf_mm_bus_rd(c_mm_file_ram_diag_data_buf_output, u*c_output_db_nof_words_pow2 + v_I, rd_data_db, tb_clk);
+            v_rd_data := TO_UINT(rd_data_db);
+            IF v_rd_data/=v_exp_data THEN
+              -- Retry MM read when occasionaly MM read and internal firmware write occur at same address and then cause 'X'
+              mmf_mm_bus_rd(c_mm_file_ram_diag_data_buf_output, u*c_output_db_nof_words_pow2 + v_I, rd_data_db, tb_clk);
+              v_rd_data := TO_UINT(rd_data_db);
+              nof_rd_data_db_X <= nof_rd_data_db_X + 1;
+              REPORT int_to_str(v_I) & int_to_str(nof_rd_data_db_X) & " : " & int_to_str(v_rd_data) & " /= " & int_to_str(v_exp_data);
+            END IF;
+            ASSERT v_rd_data=v_exp_data REPORT "DB unexpected transpose output data : " & int_to_str(v_rd_data) & " /= " & int_to_str(v_exp_data);
+            v_I := v_I + 1;
+          END LOOP;
+        END LOOP;     
+      END LOOP;
+      -- Leave gap between BF unit accesses, to ease view in Wave window
+      proc_common_wait_some_cycles(ext_clk, 100);
+    END LOOP;
+    ASSERT nof_rd_data_db_X <= c_max_X REPORT "DB too many transpose output data MM read retries : " & int_to_str(nof_rd_data_db_X) & " > " & int_to_str(c_max_X);
+    proc_common_wait_some_cycles(ext_clk, 1000);
+    
+    tb_end <= '1';
+    WAIT;
+  END PROCESS;
+
+END tb;