diff --git a/applications/rdma_demo/libraries/rdma_packetiser/hdllib.cfg b/applications/rdma_demo/libraries/rdma_packetiser/hdllib.cfg
index bd02154d12170f8894e110fc6c74bb74148bc3a4..c449e83d95a91ecdf4244a1e792bbbc7ca5316ae 100644
--- a/applications/rdma_demo/libraries/rdma_packetiser/hdllib.cfg
+++ b/applications/rdma_demo/libraries/rdma_packetiser/hdllib.cfg
@@ -10,8 +10,10 @@ synth_files =
 
 test_bench_files = 
     tb/vhdl/tb_rdma_packetiser_assemble_header.vhd
+    tb/vhdl/tb_tb_rdma_packetiser_assemble_header.vhd
 
 regression_test_vhdl = 
+    tb/vhdl/tb_tb_rdma_packetiser_assemble_header.vhd
 
 
 [modelsim_project_file]
diff --git a/applications/rdma_demo/libraries/rdma_packetiser/src/vhdl/rdma_packetiser_assemble_header.vhd b/applications/rdma_demo/libraries/rdma_packetiser/src/vhdl/rdma_packetiser_assemble_header.vhd
index 2bc15f76ee19ccd7d6bbb159303e1fffd92d6d3e..174ff8d1db16f1bf22543c970650bb26f7b38552 100644
--- a/applications/rdma_demo/libraries/rdma_packetiser/src/vhdl/rdma_packetiser_assemble_header.vhd
+++ b/applications/rdma_demo/libraries/rdma_packetiser/src/vhdl/rdma_packetiser_assemble_header.vhd
@@ -45,6 +45,10 @@
 --   of messages has reached "nof_msg". Then the cycle repeats.
 -- . The PSN field (= Packet Sequence Number) is set to LSBs of the incoming BSN.
 --   This can be used to check the order or detect missing packets at the receiver.
+-- . The incoming datastream has to be at least 6 valid cycles long and be able to handle
+--   backpressure. Using less than 6 cycles seem to result in corrupted packets. It seems
+--   that it has to do with dp_offload_tx_v3 not capable of processing fast enough, further
+--   investigation is necessary in order to get rid of this limitation.
 
 -- References:
 -- . [1] https://support.astron.nl/confluence/x/3pKrB
@@ -88,8 +92,19 @@ entity rdma_packetiser_assemble_header is
 end rdma_packetiser_assemble_header;
 
 architecture str of rdma_packetiser_assemble_header is
-  constant c_udp_app_hdr_length    : natural := c_network_udp_header_len + c_rdma_packetiser_bth_len;
-  constant c_ip_udp_app_hdr_length : natural := c_network_ip_header_len + c_udp_app_hdr_length;
+  -- Created constants for each header length variant, also included the icrc length here.
+  -- first = rdma_write_first and rdma_write_only without immediate.
+  -- mid   = rdma_write_middle and rdma_write_last without immediate.
+  -- last  = rdma_write_last with immediate.
+  -- wo    = rdma_write_only with immediate.
+  constant c_udp_app_first_hdr_len    : natural := c_network_udp_header_len + c_rdma_packetiser_bth_len + c_rdma_packetiser_reth_len + c_rdma_packetiser_icrc_len;
+  constant c_udp_app_mid_hdr_len      : natural := c_network_udp_header_len + c_rdma_packetiser_bth_len + c_rdma_packetiser_icrc_len;
+  constant c_udp_app_last_hdr_len     : natural := c_network_udp_header_len + c_rdma_packetiser_bth_len + c_rdma_packetiser_imm_len  + c_rdma_packetiser_icrc_len;
+  constant c_udp_app_wo_hdr_len       : natural := c_network_udp_header_len + c_rdma_packetiser_bth_len + c_rdma_packetiser_reth_len + c_rdma_packetiser_imm_len + c_rdma_packetiser_icrc_len;
+  constant c_ip_udp_app_first_hdr_len : natural := c_network_ip_header_len + c_udp_app_first_hdr_len;
+  constant c_ip_udp_app_mid_hdr_len   : natural := c_network_ip_header_len + c_udp_app_mid_hdr_len;
+  constant c_ip_udp_app_last_hdr_len  : natural := c_network_ip_header_len + c_udp_app_last_hdr_len;
+  constant c_ip_udp_app_wo_hdr_len    : natural := c_network_ip_header_len + c_udp_app_wo_hdr_len;
   constant c_nof_offload           : natural := 4; 
 
   type t_state is (s_first, s_middle, s_last);
@@ -179,24 +194,36 @@ begin
 
           if v.nof_packets_in_msg = 1 then -- set opcode to write_only.
             v.opcode := c_rdma_packetiser_opcode_uc_write_only;
+            v.udp_total_length := c_udp_app_first_hdr_len + to_uint(block_len);
+            v.ip_total_length  := c_ip_udp_app_first_hdr_len + to_uint(block_len);
             v.sel_ctrl := 0;
             if use_immediate = '1' then  -- set opcode to write_only with immediate data.
               v.opcode := c_rdma_packetiser_opcode_uc_write_only_imm;
+              v.udp_total_length := c_udp_app_wo_hdr_len + to_uint(block_len);
+              v.ip_total_length  := c_ip_udp_app_wo_hdr_len + to_uint(block_len);
             v.sel_ctrl := 3;
             end if;
           elsif v.nof_packets_in_msg = 2 then -- set opcode to write_first.
             v.state := s_last; -- next state is last as there are only 2 packets.
             v.opcode := c_rdma_packetiser_opcode_uc_write_first;
+            v.udp_total_length := c_udp_app_first_hdr_len + to_uint(block_len);
+            v.ip_total_length  := c_ip_udp_app_first_hdr_len + to_uint(block_len);
+
             v.sel_ctrl := 0;
           elsif v.nof_packets_in_msg > 2 then
             v.state := s_middle;
             v.opcode := c_rdma_packetiser_opcode_uc_write_first;
+            v.udp_total_length := c_udp_app_first_hdr_len + to_uint(block_len);
+            v.ip_total_length  := c_ip_udp_app_first_hdr_len + to_uint(block_len);
+
             v.sel_ctrl := 0;
           end if;
 
         when s_middle => -- wait unitl the first packet is done and set next opcode.
           v.opcode := c_rdma_packetiser_opcode_uc_write_middle;
-            v.sel_ctrl := 1;
+          v.udp_total_length := c_udp_app_mid_hdr_len + to_uint(block_len);
+          v.ip_total_length  := c_ip_udp_app_mid_hdr_len + to_uint(block_len);
+          v.sel_ctrl := 1;
           if q.p_cnt >= v.nof_packets_in_msg - 2 then -- wait until last middle packet
             v.state := s_last;
           end if;
@@ -204,9 +231,13 @@ begin
         when s_last => -- next packet must be last packet, set opcode.
           v.state := s_first;
           v.opcode := c_rdma_packetiser_opcode_uc_write_last;
-            v.sel_ctrl := 1;
+          v.udp_total_length := c_udp_app_mid_hdr_len + to_uint(block_len);
+          v.ip_total_length  := c_ip_udp_app_mid_hdr_len + to_uint(block_len);
+          v.sel_ctrl := 1;
           if use_immediate = '1' then -- set opcode to write_last with immediate data
             v.opcode := c_rdma_packetiser_opcode_uc_write_last_imm;
+            v.udp_total_length := c_udp_app_last_hdr_len + to_uint(block_len);
+            v.ip_total_length  := c_ip_udp_app_last_hdr_len + to_uint(block_len);
             v.sel_ctrl := 2;
           end if;
       end case;
@@ -215,8 +246,6 @@ begin
     if v.msg_cnt = 0 then -- set on new message
       v.virtual_address    := unsigned(start_address);
       v.dma_len            := unsigned(dma_len);
-      v.udp_total_length   := c_udp_app_hdr_length    + to_uint(block_len) + c_rdma_packetiser_icrc_len;
-      v.ip_total_length    := c_ip_udp_app_hdr_length + to_uint(block_len) + c_rdma_packetiser_icrc_len;
       v.nof_packets_in_msg := to_uint(nof_packets_in_msg);
     end if;
 
@@ -278,7 +307,7 @@ begin
     hdr_fields_slv_in_first <= field_select_subset(c_rdma_packetiser_first_hdr_field_arr, 
                                                    c_rdma_packetiser_mm_field_arr, 
                                                    hdr_fields_slv_out_mm);
-    hdr_fields_slv_in_mid   <= field_select_subset(c_rdma_packetiser_last_hdr_field_arr, 
+    hdr_fields_slv_in_mid   <= field_select_subset(c_rdma_packetiser_mid_hdr_field_arr, 
                                                    c_rdma_packetiser_mm_field_arr, 
                                                    hdr_fields_slv_out_mm);
     hdr_fields_slv_in_last  <= field_select_subset(c_rdma_packetiser_last_hdr_field_arr, 
diff --git a/applications/rdma_demo/libraries/rdma_packetiser/src/vhdl/rdma_packetiser_pkg.vhd b/applications/rdma_demo/libraries/rdma_packetiser/src/vhdl/rdma_packetiser_pkg.vhd
index 5e25e0595cf0bd88da6f25f562015442ce8a23d2..b66da5f7dc56ef1eb9d137d0c2bca9c350c5bc0f 100644
--- a/applications/rdma_demo/libraries/rdma_packetiser/src/vhdl/rdma_packetiser_pkg.vhd
+++ b/applications/rdma_demo/libraries/rdma_packetiser/src/vhdl/rdma_packetiser_pkg.vhd
@@ -90,7 +90,7 @@ package rdma_packetiser_pkg is
   -- ETH + IP + UDP + Base Transport Header (BTH) + RDMA Extended Transport Header (RETH) + Immediate Data + user config
   -- Primarly used as a MM register map to provide the data needed for all header variants.
   constant c_rdma_packetiser_mm_nof_fields : natural := 3 + 12 + 4 + 13 + 3 + 1 + 6;
-  constant c_rdma_packetiser_mm_field_sel  : std_logic_vector(c_rdma_packetiser_mm_nof_fields - 1 downto 0) :=  "111" & "111011111011" & "1100" & "0111111111110" & "010" & "1" & "011111";
+  constant c_rdma_packetiser_mm_field_sel  : std_logic_vector(c_rdma_packetiser_mm_nof_fields - 1 downto 0) :=  "111" & "111011111011" & "1100" & "0111111111110" & "011" & "1" & "011111";
 
   constant c_rdma_packetiser_mm_field_arr : t_common_field_arr(
     c_rdma_packetiser_mm_nof_fields - 1 downto 0) := (
@@ -342,18 +342,18 @@ package rdma_packetiser_pkg is
   constant c_rdma_packetiser_imm_len  : natural := 4;   -- octets
   constant c_rdma_packetiser_icrc_len : natural := 4;   -- octets
 
-  constant c_rdma_packetiser_opcode_uc_send_first      : std_logic_vector := "001" & "00000";
-  constant c_rdma_packetiser_opcode_uc_send_middle     : std_logic_vector := "001" & "00001";
-  constant c_rdma_packetiser_opcode_uc_send_last       : std_logic_vector := "001" & "00010"; -- without immediate
-  constant c_rdma_packetiser_opcode_uc_send_last_imm   : std_logic_vector := "001" & "00011"; -- with immediate
-  constant c_rdma_packetiser_opcode_uc_send_only       : std_logic_vector := "001" & "00100"; -- without immediate
-  constant c_rdma_packetiser_opcode_uc_send_only_imm   : std_logic_vector := "001" & "00101"; -- with immediate
-  constant c_rdma_packetiser_opcode_uc_write_first     : std_logic_vector := "001" & "00110";
-  constant c_rdma_packetiser_opcode_uc_write_middle    : std_logic_vector := "001" & "00111";
-  constant c_rdma_packetiser_opcode_uc_write_last      : std_logic_vector := "001" & "01000"; -- without immediate
-  constant c_rdma_packetiser_opcode_uc_write_last_imm  : std_logic_vector := "001" & "01001"; -- with immediate
-  constant c_rdma_packetiser_opcode_uc_write_only      : std_logic_vector := "001" & "01010"; -- without immediate
-  constant c_rdma_packetiser_opcode_uc_write_only_imm  : std_logic_vector := "001" & "01011"; -- with immediate
+  constant c_rdma_packetiser_opcode_uc_send_first      : std_logic_vector(7 downto 0) := "001" & "00000";
+  constant c_rdma_packetiser_opcode_uc_send_middle     : std_logic_vector(7 downto 0) := "001" & "00001";
+  constant c_rdma_packetiser_opcode_uc_send_last       : std_logic_vector(7 downto 0) := "001" & "00010"; -- without immediate
+  constant c_rdma_packetiser_opcode_uc_send_last_imm   : std_logic_vector(7 downto 0) := "001" & "00011"; -- with immediate
+  constant c_rdma_packetiser_opcode_uc_send_only       : std_logic_vector(7 downto 0) := "001" & "00100"; -- without immediate
+  constant c_rdma_packetiser_opcode_uc_send_only_imm   : std_logic_vector(7 downto 0) := "001" & "00101"; -- with immediate
+  constant c_rdma_packetiser_opcode_uc_write_first     : std_logic_vector(7 downto 0) := "001" & "00110";
+  constant c_rdma_packetiser_opcode_uc_write_middle    : std_logic_vector(7 downto 0) := "001" & "00111";
+  constant c_rdma_packetiser_opcode_uc_write_last      : std_logic_vector(7 downto 0) := "001" & "01000"; -- without immediate
+  constant c_rdma_packetiser_opcode_uc_write_last_imm  : std_logic_vector(7 downto 0) := "001" & "01001"; -- with immediate
+  constant c_rdma_packetiser_opcode_uc_write_only      : std_logic_vector(7 downto 0) := "001" & "01010"; -- without immediate
+  constant c_rdma_packetiser_opcode_uc_write_only_imm  : std_logic_vector(7 downto 0) := "001" & "01011"; -- with immediate
 
   function func_rdma_packetiser_map_header(hdr_fields_raw : std_logic_vector; field_arr : t_common_field_arr) return t_rdma_packetiser_roce_header;
 end rdma_packetiser_pkg;
diff --git a/applications/rdma_demo/libraries/rdma_packetiser/tb/vhdl/tb_rdma_packetiser_assemble_header.vhd b/applications/rdma_demo/libraries/rdma_packetiser/tb/vhdl/tb_rdma_packetiser_assemble_header.vhd
index 6b4dd312f62e2de4f6d845fdb60f2b4a8c4e22c7..66b41e714ef9bdf1ecf9cdc54577d267667194cc 100644
--- a/applications/rdma_demo/libraries/rdma_packetiser/tb/vhdl/tb_rdma_packetiser_assemble_header.vhd
+++ b/applications/rdma_demo/libraries/rdma_packetiser/tb/vhdl/tb_rdma_packetiser_assemble_header.vhd
@@ -55,6 +55,7 @@ library IEEE, common_lib, dp_lib;
 
 entity tb_rdma_packetiser_assemble_header is
   generic (
+    g_data_w                   : natural := c_word_w;
     g_use_immediate            : boolean := true;
     g_use_msg_cnt_as_immediate : boolean := true;
     g_nof_rep                  : natural := 60;
@@ -68,35 +69,47 @@ end tb_rdma_packetiser_assemble_header;
 architecture tb of tb_rdma_packetiser_assemble_header is
   constant c_dp_clk_period      : time := 5 ns;  -- 200 MHz
   constant c_mm_clk_period      : time := 1 ns;  -- 1 GHz
-  constant c_data_w             : natural := c_word_w;
   constant c_data_init          : natural := 13;
   constant c_hdr_fields_slv_rst : std_logic_vector(1023 downto 0) := (others => '0');
-  constant c_block_len          : natural := g_frame_len * (c_data_w / c_octet_w);
+  constant c_block_len          : natural := g_frame_len * (g_data_w / c_octet_w);
   constant c_dma_len            : natural := c_block_len * g_nof_packets_in_msg;
 
-  signal tb_end              : std_logic := '0';
+  constant c_mm_addr_config_start_address            : natural := 0;
+  constant c_mm_addr_config_nof_msg                  : natural := 2;
+  constant c_mm_addr_config_nof_packets_in_msg       : natural := 3;
+  constant c_mm_addr_config_use_msg_cnt_as_immediate : natural := 4;
+  constant c_mm_addr_config_use_immediate            : natural := 5;
+  constant c_mm_addr_immediate_data                  : natural := 7;
+  constant c_mm_addr_reth_dma_length                 : natural := 8;
 
-  signal dp_clk              : std_logic := '1';
-  signal dp_rst              : std_logic;
-  signal mm_clk              : std_logic := '1';
-  signal mm_rst              : std_logic;
+  signal tb_end               : std_logic := '0';
 
-  signal immediate_data      : std_logic_vector(c_word_w - 1 downto 0) := X"89ABCDEF";
-  signal block_len           : std_logic_vector(c_halfword_w - 1 downto 0) := TO_UVEC(c_block_len, c_halfword_w);
-  signal nof_packets_in_msg  : std_logic_vector(c_word_w - 1 downto 0) := TO_UVEC(g_nof_packets_in_msg, c_word_w);
-  signal nof_msg             : std_logic_vector(c_word_w - 1 downto 0) := TO_UVEC(g_nof_msg, c_word_w);
-  signal dma_len             : std_logic_vector(c_word_w - 1 downto 0) := TO_UVEC(c_dma_len, c_word_w);
-  signal start_address       : std_logic_vector(c_longword_w - 1 downto 0) := std_logic_vector(g_start_address);
+  signal dp_clk               : std_logic := '1';
+  signal dp_rst               : std_logic;
+  signal mm_clk               : std_logic := '1';
+  signal mm_rst               : std_logic;
 
-  signal hdr_fields_slv      : std_logic_vector(1023 downto 0) := (others => '0');
-  signal rx_rdma_header      : t_rdma_packetiser_roce_header;
-  --signal exp_rdma_header     : t_rdma_packetiser_roce_header := func_rdma_packetiser_map_header(c_hdr_fields_slv_rst, g_use_immediate);
-  signal in_en : std_logic := '0';
+  signal block_len            : std_logic_vector(c_halfword_w - 1 downto 0) := TO_UVEC(c_block_len, c_halfword_w);
+  signal nof_msg              : std_logic_vector(c_word_w - 1 downto 0) := TO_UVEC(g_nof_msg, c_word_w);
+  signal dma_len              : std_logic_vector(c_word_w - 1 downto 0) := TO_UVEC(c_dma_len, c_word_w);
+  signal start_address        : std_logic_vector(c_longword_w - 1 downto 0) := std_logic_vector(g_start_address);
+  signal immediate_data       : std_logic_vector(c_word_w - 1 downto 0) := X"89ABCDEF";
 
-  signal snk_in  : t_dp_sosi := c_dp_sosi_rst;
-  signal snk_out : t_dp_siso := c_dp_siso_rdy;
-  signal src_out : t_dp_sosi := c_dp_sosi_rst;
-  signal src_in  : t_dp_siso := c_dp_siso_rdy;
+  signal first_hdr_fields_arr : std_logic_vector(1023 downto 0) := (others => '0');
+  signal mid_hdr_fields_arr   : std_logic_vector(1023 downto 0) := (others => '0');
+  signal last_hdr_fields_arr  : std_logic_vector(1023 downto 0) := (others => '0');
+  signal wo_hdr_fields_arr    : std_logic_vector(1023 downto 0) := (others => '0');
+  signal rx_rdma_header       : t_rdma_packetiser_roce_header;
+  signal exp_rdma_header      : t_rdma_packetiser_roce_header := func_rdma_packetiser_map_header(c_hdr_fields_slv_rst, c_rdma_packetiser_wo_hdr_field_arr);
+
+  signal in_en   : std_logic := '0';
+  signal mm_done : std_logic := '0';
+
+  signal snk_in   : t_dp_sosi := c_dp_sosi_rst;
+  signal snk_out  : t_dp_siso := c_dp_siso_rdy;
+  signal src_out  : t_dp_sosi := c_dp_sosi_rst;
+  signal src_in   : t_dp_siso := c_dp_siso_rdy;
+  signal sop_sosi : t_dp_sosi := c_dp_sosi_rst;
 
   signal reg_hdr_dat_copi   : t_mem_copi := c_mem_copi_rst;
   signal reg_hdr_dat_cipo   : t_mem_cipo;
@@ -106,25 +119,159 @@ begin
   dp_clk <= (not dp_clk) or tb_end after c_dp_clk_period / 2;
   mm_rst <= '1', '0' after c_mm_clk_period * 7;
   mm_clk <= (not mm_clk) or tb_end after c_mm_clk_period / 2;
-  --rx_rdma_header <= func_rdma_packetiser_map_header(hdr_fields_slv, g_use_immediate );
 
   p_dp_stimuli : process
   begin
     -- dp stimuli
     proc_common_wait_until_low(dp_clk, dp_rst);
+    proc_common_wait_until_high(dp_clk, mm_done);  -- wait mm setup
     proc_common_wait_some_cycles(dp_clk, 100);
     in_en <= '1';
     for rep in 0 to g_nof_rep - 1 loop
-      proc_dp_gen_block_data(1, true, c_data_w, c_data_w, c_data_init, 0, 0, g_frame_len, 0, 0, '0', TO_DP_BSN(rep), dp_clk, in_en, snk_out, snk_in);
+      proc_dp_gen_block_data(1, true, g_data_w, g_data_w, c_data_init, 0, 0, g_frame_len, 0, 0, '0', TO_DP_BSN(rep), dp_clk, in_en, snk_out, snk_in);
     end loop;
     proc_common_wait_some_cycles(dp_clk, 100);
     in_en <= '0';
     wait;
   end process;
 
+  p_mm_setup : process
+  begin
+    proc_common_wait_until_low(dp_clk, mm_rst);
+    proc_common_wait_until_low(dp_clk, dp_rst);
+    proc_common_wait_some_cycles(mm_clk, 50);
+
+    proc_mem_mm_bus_wr(c_mm_addr_config_start_address,                    start_address(31 downto 0),        mm_clk, reg_hdr_dat_cipo, reg_hdr_dat_copi);
+    proc_mem_mm_bus_wr(c_mm_addr_config_start_address + 1,                start_address(63 downto 32),       mm_clk, reg_hdr_dat_cipo, reg_hdr_dat_copi);
+    proc_mem_mm_bus_wr(c_mm_addr_config_nof_msg,                          g_nof_msg,                         mm_clk, reg_hdr_dat_cipo, reg_hdr_dat_copi);
+    proc_mem_mm_bus_wr(c_mm_addr_config_nof_packets_in_msg,               g_nof_packets_in_msg,              mm_clk, reg_hdr_dat_cipo, reg_hdr_dat_copi);
+    proc_mem_mm_bus_wr(c_mm_addr_config_use_msg_cnt_as_immediate, sel_a_b(g_use_msg_cnt_as_immediate, 1, 0), mm_clk, reg_hdr_dat_cipo, reg_hdr_dat_copi);
+    proc_mem_mm_bus_wr(c_mm_addr_config_use_immediate,            sel_a_b(g_use_immediate, 1, 0),            mm_clk, reg_hdr_dat_cipo, reg_hdr_dat_copi);
+    proc_mem_mm_bus_wr(c_mm_addr_immediate_data,                          immediate_data,                    mm_clk, reg_hdr_dat_cipo, reg_hdr_dat_copi);
+    proc_mem_mm_bus_wr(c_mm_addr_reth_dma_length,                         c_dma_len,                         mm_clk, reg_hdr_dat_cipo, reg_hdr_dat_copi);
+    mm_done <= '1';
+    wait;
+  end process;
+
+
+  -- check if values in rdma_packetiser_assemble_header match with expected values
+  p_verify_rdma_header : process
+    variable v_exp_ip_total_length      : natural;
+    variable v_exp_udp_total_length     : natural;
+    variable v_exp_bth_opcode           : std_logic_vector(c_byte_w - 1 downto 0);
+    variable v_exp_bth_psn              : natural;
+    variable v_exp_reth_virtual_address : unsigned(c_longword_w - 1 downto 0);
+    variable v_exp_reth_dma_length      : natural;
+    variable v_exp_immediate_data       : std_logic_vector(c_word_w - 1 downto 0);
+    variable v_p, v_m                   : natural := 0;
+  begin
+
+    proc_common_wait_until_high(dp_clk, mm_done);  -- wait mm setup
+
+    for rep in 0 to g_nof_rep - 1 loop
+      proc_common_wait_until_high(dp_clk, sop_sosi.sop);  -- wait for sop of dp_offload
+
+      v_exp_bth_psn              := v_p;
+      v_exp_immediate_data       := sel_a_b(g_use_immediate,
+      sel_a_b(g_use_msg_cnt_as_immediate, to_uvec((v_m mod g_nof_msg), c_word_w), immediate_data), to_uvec(0, c_word_w));
+
+      -- determine expected opcode
+      if v_p mod g_nof_packets_in_msg = 0 then
+        v_exp_bth_opcode := c_rdma_packetiser_opcode_uc_write_first;
+        if g_nof_packets_in_msg = 1 and g_use_immediate then
+          v_exp_bth_opcode := c_rdma_packetiser_opcode_uc_write_only_imm;
+        elsif g_nof_packets_in_msg = 1 then
+          v_exp_bth_opcode := c_rdma_packetiser_opcode_uc_write_only;
+        end if;
+      elsif v_p mod g_nof_packets_in_msg = g_nof_packets_in_msg - 1 then
+        v_exp_bth_opcode := c_rdma_packetiser_opcode_uc_write_last;
+        if g_use_immediate then
+          v_exp_bth_opcode := c_rdma_packetiser_opcode_uc_write_last_imm;
+        end if;
+      else
+        v_exp_bth_opcode := c_rdma_packetiser_opcode_uc_write_middle;
+      end if;
+
+      -- calculate expected lengths
+      v_exp_udp_total_length     := c_network_udp_header_len + c_rdma_packetiser_bth_len + to_uint(block_len) + c_rdma_packetiser_icrc_len;
+      v_exp_reth_virtual_address := (others => '0');
+      v_exp_reth_dma_length      := 0; 
+
+      if v_exp_bth_opcode = c_rdma_packetiser_opcode_uc_write_first    or
+         v_exp_bth_opcode = c_rdma_packetiser_opcode_uc_write_only     or
+         v_exp_bth_opcode = c_rdma_packetiser_opcode_uc_write_only_imm then
+        v_exp_udp_total_length := v_exp_udp_total_length + c_rdma_packetiser_reth_len;
+        v_exp_reth_virtual_address := g_start_address + to_unsigned((v_m mod g_nof_msg) * c_dma_len, c_longword_w);
+        v_exp_reth_dma_length      := c_dma_len;
+      end if;
+
+      if v_exp_bth_opcode = c_rdma_packetiser_opcode_uc_write_only_imm or
+         v_exp_bth_opcode = c_rdma_packetiser_opcode_uc_write_last_imm then
+        v_exp_udp_total_length := v_exp_udp_total_length + c_rdma_packetiser_imm_len;
+      end if;
+
+      v_exp_ip_total_length := c_network_ip_header_len + v_exp_udp_total_length;
+
+      -- select header based on expected opcode
+      if v_exp_bth_opcode = c_rdma_packetiser_opcode_uc_write_first or
+         v_exp_bth_opcode = c_rdma_packetiser_opcode_uc_write_only then
+        rx_rdma_header <= func_rdma_packetiser_map_header(first_hdr_fields_arr, c_rdma_packetiser_first_hdr_field_arr);
+        v_exp_immediate_data := (others => '0'); -- does not exist in this header
+      elsif v_exp_bth_opcode = c_rdma_packetiser_opcode_uc_write_middle or
+            v_exp_bth_opcode = c_rdma_packetiser_opcode_uc_write_last then
+        rx_rdma_header <= func_rdma_packetiser_map_header(mid_hdr_fields_arr,   c_rdma_packetiser_mid_hdr_field_arr);
+        v_exp_immediate_data := (others => '0'); -- does not exist in this header
+      elsif v_exp_bth_opcode = c_rdma_packetiser_opcode_uc_write_last_imm then
+        rx_rdma_header <= func_rdma_packetiser_map_header(last_hdr_fields_arr,    c_rdma_packetiser_last_hdr_field_arr);
+      elsif v_exp_bth_opcode = c_rdma_packetiser_opcode_uc_write_only_imm then
+        rx_rdma_header <= func_rdma_packetiser_map_header(wo_hdr_fields_arr,    c_rdma_packetiser_wo_hdr_field_arr);
+      end if;
+
+      -- assign expected values to signal to view in wave window.
+      -- defaults
+      exp_rdma_header.eth.eth_type         <= x"0800";
+      exp_rdma_header.ip.version           <= to_uvec(4, 4);
+      exp_rdma_header.ip.header_length     <= to_uvec(5, 4);
+      exp_rdma_header.ip.flags             <= to_uvec(2, 3);
+      exp_rdma_header.ip.time_to_live      <= to_uvec(127, 8);
+      exp_rdma_header.ip.protocol          <= to_uvec(17, 8);
+      exp_rdma_header.bth.partition_key    <= to_uvec(65535, 16);
+      -- changed by DUT
+      exp_rdma_header.ip.total_length      <= to_uvec(v_exp_ip_total_length, c_halfword_w);
+      exp_rdma_header.udp.total_length     <= to_uvec(v_exp_udp_total_length, c_halfword_w );
+      exp_rdma_header.bth.opcode           <= v_exp_bth_opcode;
+      exp_rdma_header.bth.psn              <= to_uvec(v_exp_bth_psn, c_word_w);
+      exp_rdma_header.reth.virtual_address <= std_logic_vector(v_exp_reth_virtual_address);
+      exp_rdma_header.reth.dma_length      <= to_uvec(v_exp_reth_dma_length, c_word_w);
+      exp_rdma_header.immediate_data       <= v_exp_immediate_data;
+
+      -- increase counters
+      v_p := v_p + 1;
+      v_m := v_p / g_nof_packets_in_msg;
+
+      proc_common_wait_some_cycles(dp_clk, 1);
+
+      -- assert when header is not as expected.
+      assert rx_rdma_header                      = exp_rdma_header                      report "Wrong rx_rdma_header" severity error;
+      assert rx_rdma_header.ip.total_length      = exp_rdma_header.ip.total_length      report "Wrong rx_rdma_header.ip.total_length value" severity error;
+      assert rx_rdma_header.udp.total_length     = exp_rdma_header.udp.total_length     report "Wrong rx_rdma_header.udp.total_length value" severity error;
+      assert rx_rdma_header.bth.opcode           = exp_rdma_header.bth.opcode           report "Wrong rx_rdma_header.bth.opcode value" severity error;
+      assert rx_rdma_header.bth.psn              = exp_rdma_header.bth.psn              report "Wrong rx_rdma_header.bth.psn value" severity error;
+      assert rx_rdma_header.reth.virtual_address = exp_rdma_header.reth.virtual_address report "Wrong rx_rdma_header.reth.virtual_address value" severity error;
+      assert rx_rdma_header.reth.dma_length      = exp_rdma_header.reth.dma_length      report "Wrong rx_rdma_header.reth.dma_length value" severity error;
+      assert rx_rdma_header.immediate_data       = exp_rdma_header.immediate_data       report "Wrong rx_rdma_header.immediate_data value" severity error;
+
+    end loop;
+
+    proc_common_wait_some_cycles(dp_clk, 100);
+    tb_end <= '1';
+    wait;
+  end process;
+
+
   u_dut: entity work.rdma_packetiser_assemble_header
     generic map (
-      g_data_w => 512
+      g_data_w => g_data_w
     )
     port map (
       st_clk             => dp_clk,
@@ -143,4 +290,81 @@ begin
 
       block_len          => block_len
     );
+
+  -------------------------------------------------------------------------------
+  -- Header for first packets or write only without immediate data 
+  -------------------------------------------------------------------------------
+  u_dp_offload_first: entity dp_lib.dp_offload_rx
+  generic map (
+    g_nof_streams => 1,
+    g_data_w => g_data_w,
+    g_symbol_w => c_byte_w,
+    g_hdr_field_arr => c_rdma_packetiser_first_hdr_field_arr
+  )
+  port map (
+    dp_clk                => dp_clk,
+    dp_rst                => dp_rst,
+    mm_clk                => mm_clk,
+    mm_rst                => mm_rst,
+    snk_in_arr(0)         => src_out,
+    hdr_fields_raw_arr(0) => first_hdr_fields_arr
+  );
+
+  -------------------------------------------------------------------------------
+  -- Header for middle or last without immediate data (no RETH, no immediate data)
+  -------------------------------------------------------------------------------
+  u_dp_offload_mid: entity dp_lib.dp_offload_rx
+  generic map (
+    g_nof_streams => 1,
+    g_data_w => g_data_w,
+    g_symbol_w => c_byte_w,
+    g_hdr_field_arr => c_rdma_packetiser_mid_hdr_field_arr
+  )
+  port map (
+    dp_clk                => dp_clk,
+    dp_rst                => dp_rst,
+    mm_clk                => mm_clk,
+    mm_rst                => mm_rst,
+    snk_in_arr(0)         => src_out,
+    hdr_fields_raw_arr(0) => mid_hdr_fields_arr
+  );
+
+  -------------------------------------------------------------------------------
+  -- Header for last packets with immediate data
+  -------------------------------------------------------------------------------
+  u_dp_offload_last: entity dp_lib.dp_offload_rx
+  generic map (
+    g_nof_streams => 1,
+    g_data_w => g_data_w,
+    g_symbol_w => c_byte_w,
+    g_hdr_field_arr => c_rdma_packetiser_last_hdr_field_arr
+  )
+  port map (
+    dp_clk                => dp_clk,
+    dp_rst                => dp_rst,
+    mm_clk                => mm_clk,
+    mm_rst                => mm_rst,
+    snk_in_arr(0)         => src_out,
+    hdr_fields_raw_arr(0) => last_hdr_fields_arr
+  );
+
+  -------------------------------------------------------------------------------
+  -- Header for write only packets with immediate data 
+  -------------------------------------------------------------------------------
+  u_dp_offload_wo: entity dp_lib.dp_offload_rx
+  generic map (
+    g_nof_streams => 1,
+    g_data_w => g_data_w,
+    g_symbol_w => c_byte_w,
+    g_hdr_field_arr => c_rdma_packetiser_wo_hdr_field_arr
+  )
+  port map (
+    dp_clk                => dp_clk,
+    dp_rst                => dp_rst,
+    mm_clk                => mm_clk,
+    mm_rst                => mm_rst,
+    snk_in_arr(0)         => src_out,
+    src_out_arr(0)        => sop_sosi,
+    hdr_fields_raw_arr(0) => wo_hdr_fields_arr
+  );
 end tb;
diff --git a/applications/rdma_demo/libraries/rdma_packetiser/tb/vhdl/tb_tb_rdma_packetiser_assemble_header.vhd b/applications/rdma_demo/libraries/rdma_packetiser/tb/vhdl/tb_tb_rdma_packetiser_assemble_header.vhd
index f660180caa85165c58e1994f55addcf8c66bdc66..2a800a776a368365f0f07dfd6609436e0cccb3ba 100644
--- a/applications/rdma_demo/libraries/rdma_packetiser/tb/vhdl/tb_tb_rdma_packetiser_assemble_header.vhd
+++ b/applications/rdma_demo/libraries/rdma_packetiser/tb/vhdl/tb_tb_rdma_packetiser_assemble_header.vhd
@@ -25,6 +25,7 @@
 -- Usage:
 -- > as 3
 -- > run -all
+-- Remark: testbench takes roughly 3 minutes without wave window.
 -------------------------------------------------------------------------------
 
 library IEEE;
@@ -40,6 +41,7 @@ architecture tb of tb_tb_rdma_packetiser_assemble_header is
   signal tb_end : std_logic := '0';  -- declare tb_end to avoid 'No objects found' error on 'when -label tb_end'
 begin
   -- All generics of TB
+  --    g_data_w                   : natural := c_word_w;
   --    g_use_immediate            : boolean := true;
   --    g_use_msg_cnt_as_immediate : boolean := true;
   --    g_nof_rep                  : natural := 15;
@@ -48,14 +50,14 @@ begin
   --    g_nof_packets_in_msg       : natural := 4;
   --    g_nof_msg                  : natural := 3
 
-  u_lo_addr    : entity work.tb_rdma_packetiser_assemble_header generic map( true,  true,  20,   15,   c_low_start_addr,  4, 5);
-  u_hi_addr    : entity work.tb_rdma_packetiser_assemble_header generic map( true,  true,  20,   15,   c_high_start_addr, 4, 5);
-  u_no_mid     : entity work.tb_rdma_packetiser_assemble_header generic map( true,  true,  20,   15,   c_high_start_addr, 2, 5);
-  u_wr_only    : entity work.tb_rdma_packetiser_assemble_header generic map( true,  true,  20,   15,   c_high_start_addr, 1, 5);
-  u_large      : entity work.tb_rdma_packetiser_assemble_header generic map( true,  true,  10,   2000, c_low_start_addr,  3, 1);
-  u_no_imm_cnt : entity work.tb_rdma_packetiser_assemble_header generic map( false, true,  100,  15,   c_low_start_addr,  4, 10);
-  u_no_cnt     : entity work.tb_rdma_packetiser_assemble_header generic map( true,  false, 20,   15,   c_low_start_addr,  4, 5);
-  u_no_imm     : entity work.tb_rdma_packetiser_assemble_header generic map( false, false, 30,   7,    c_high_start_addr, 3, 2);
-  u_one        : entity work.tb_rdma_packetiser_assemble_header generic map( true,  true,  20,   1,    c_low_start_addr,  1, 5);
-  u_many       : entity work.tb_rdma_packetiser_assemble_header generic map( true,  true,  6000, 3,    c_low_start_addr,  5, 1000);
+  u_lo_addr    : entity work.tb_rdma_packetiser_assemble_header generic map( 32,   true,  true,  50,   15,   c_low_start_addr,  4,   5);
+  u_hi_addr    : entity work.tb_rdma_packetiser_assemble_header generic map( 32,   true,  true,  50,   15,   c_high_start_addr, 4,   5);
+  u_no_mid     : entity work.tb_rdma_packetiser_assemble_header generic map( 32,   true,  true,  50,   15,   c_high_start_addr, 2,   5);
+  u_wr_only    : entity work.tb_rdma_packetiser_assemble_header generic map( 32,   true,  true,  50,   15,   c_high_start_addr, 1,   5); 
+  u_large      : entity work.tb_rdma_packetiser_assemble_header generic map( 32,   true,  true,  10,   2000, c_low_start_addr,  3,   1);
+  u_no_imm_cnt : entity work.tb_rdma_packetiser_assemble_header generic map( 32,   false, true,  50,   15,   c_low_start_addr,  4,   5);
+  u_no_cnt     : entity work.tb_rdma_packetiser_assemble_header generic map( 32,   true,  false, 50,   15,   c_low_start_addr,  4,   5);
+  u_no_imm     : entity work.tb_rdma_packetiser_assemble_header generic map( 32,   false, false, 50,   7,    c_high_start_addr, 3,   5);
+  u_wide       : entity work.tb_rdma_packetiser_assemble_header generic map( 1024, true,  true,  50,   6,    c_low_start_addr,  1,   5); 
+  u_many       : entity work.tb_rdma_packetiser_assemble_header generic map( 32,   true,  true,  600,  7,    c_low_start_addr,  100, 5); 
 end tb;