diff --git a/applications/rdma_demo/libraries/rdma_packetiser/src/vhdl/rdma_packetiser_assemble_header.vhd b/applications/rdma_demo/libraries/rdma_packetiser/src/vhdl/rdma_packetiser_assemble_header.vhd
index e51876b7d4d5972fcb4e5f70adbbe29595b0f768..2bc15f76ee19ccd7d6bbb159303e1fffd92d6d3e 100644
--- a/applications/rdma_demo/libraries/rdma_packetiser/src/vhdl/rdma_packetiser_assemble_header.vhd
+++ b/applications/rdma_demo/libraries/rdma_packetiser/src/vhdl/rdma_packetiser_assemble_header.vhd
@@ -18,29 +18,28 @@
 --
 -------------------------------------------------------------------------------
 -- Author: R. van der Walle
--- Purpose: Assembles the RDMA header at snk_in.sop
+-- Purpose: Assembles the RDMA header at snk_in.sop and inserts it in front of
+-- the incoming data stream
 -- Description:
--- Generates a RoCEv2 header (ETH + UDP + IP + RDMA). See [1].
--- Generics:
--- . g_use_immediate: When true, the immediate data field will be added to the
+-- Generates a RoCEv2 header (ETH + UDP + IP + RDMA). See [1]. Then it prepends
+-- the generated header to the incoming sosi data stream.
+-- MM config:
+-- . config_use_immediate: When true, the immediate data field will be added to the
 --   header.
--- . g_use_msg_cnt_as_immediate: When true, the message counter going from 0 to
+-- . config_use_msg_cnt_as_immediate: When true, the message counter going from 0 to
 --   "nof_msg" is used as immediate data. When false, the "immediate_data"
 --   input port is used.
--- Signal inputs:
 -- . immediate_data: Will be used as immediate data when
---   g_use_msg_cnt_as_immediate = False.
--- . block_len: Should be set to the length of the incoming data frame in octets.
--- . nof_packets_in_msg: Should be set to the desired amount of packets in a message.
--- . nof_msg: Should be set to the desired amount of messages, this determines the
+--   config_use_msg_cnt_as_immediate = False.
+-- . config_nof_packets_in_msg: Should be set to the desired amount of packets in a message.
+-- . config_nof_msg: Should be set to the desired amount of messages, this determines the
 --   address space aswell, see remarks.
--- . dma_len: The amount with which the address should increase every message,
+-- . reth_dma_len: The amount with which the address should increase every message,
 --   this should be set to >= nof_packets_in_msg * block_len.
--- . start_address: The start address for the virtual_address field.
+-- . config_start_address: The start address for the virtual_address field.
+-- Signal inputs:
+-- . block_len: Should be set to the length of the incoming data frame in octets.
 -- Remarks
--- . The hdr_fields_slv output is set one st_clk cycle after snk_in.sop and will
---   contain the RoCEv2 (RDMA over Converged Ethernet v2) header information for
---   the corresponding data frame.
 -- . The virtual_address is set automatically by increasing it with dma_len every
 --   new message. The virtual address is reset to "start_address" when the number
 --   of messages has reached "nof_msg". Then the cycle repeats.
@@ -65,38 +64,38 @@ library IEEE, common_lib, dp_lib, eth_lib;
 
 entity rdma_packetiser_assemble_header is
   generic (
-    g_use_immediate            : boolean := true;
-    g_use_msg_cnt_as_immediate : boolean := true
+    g_data_w : natural := 512
   );
   port (
     -- Clocks and reset
     st_clk             : in  std_logic;
     st_rst             : in  std_logic;
 
+    mm_clk             : in  std_logic;
+    mm_rst             : in  std_logic;
+
+    reg_hdr_dat_copi   : in  t_mem_copi := c_mem_copi_rst;
+    reg_hdr_dat_cipo   : out t_mem_cipo;
+
     snk_in             : in  t_dp_sosi := c_dp_sosi_rst;
+    snk_out            : out t_dp_siso := c_dp_siso_rdy;
 
-    hdr_fields_slv     : out std_logic_vector(1023 downto 0) := (others => '0');
+    src_out            : out t_dp_sosi := c_dp_sosi_rst;
+    src_in             : in  t_dp_siso := c_dp_siso_rdy;
 
-    immediate_data     : in  std_logic_vector(c_rdma_packetiser_roce_imm_len * c_octet_w - 1 downto 0) := (others => '0');
-    block_len          : in  std_logic_vector(c_halfword_w - 1 downto 0); -- in octets
-    nof_packets_in_msg : in  std_logic_vector(c_word_w - 1 downto 0);
-    nof_msg            : in  std_logic_vector(c_word_w - 1 downto 0);
-    dma_len            : in  std_logic_vector(c_word_w - 1 downto 0); -- = block_len * nof_packets_in_msg
-    start_address      : in  std_logic_vector(c_longword_w - 1 downto 0)
+    block_len          : in  std_logic_vector(c_halfword_w - 1 downto 0) -- in octets
   );
 end rdma_packetiser_assemble_header;
 
 architecture str of rdma_packetiser_assemble_header is
-  constant c_hdr_field_arr         : t_common_field_arr := sel_a_b(g_use_immediate, c_rdma_packetiser_roce_hdr_field_arr, c_rdma_packetiser_roce_no_imm_hdr_field_arr);
-  constant c_app_hdr_length        : natural := sel_a_b(g_use_immediate, c_rdma_packetiser_roce_hdr_len, c_rdma_packetiser_roce_no_imm_hdr_len);
-  constant c_udp_app_hdr_length    : natural := c_network_udp_header_len + c_app_hdr_length;
+  constant c_udp_app_hdr_length    : natural := c_network_udp_header_len + c_rdma_packetiser_bth_len;
   constant c_ip_udp_app_hdr_length : natural := c_network_ip_header_len + c_udp_app_hdr_length;
+  constant c_nof_offload           : natural := 4; 
 
   type t_state is (s_first, s_middle, s_last);
   type t_reg is record -- record to keep the registers organized.
     state              : t_state;
     opcode             : std_logic_vector(c_byte_w - 1 downto 0);
-    immediate_data     : std_logic_vector(c_rdma_packetiser_roce_imm_len * c_octet_w - 1 downto 0);
     psn                : std_logic_vector(c_word_w - 1 downto 0);
     virtual_address    : unsigned(c_longword_w - 1 downto 0);
     dma_len            : unsigned(c_word_w - 1 downto 0);
@@ -105,15 +104,61 @@ architecture str of rdma_packetiser_assemble_header is
     udp_total_length   : natural;
     ip_total_length    : natural;
     nof_packets_in_msg : natural;
+    sel_ctrl           : natural range 0 to c_nof_offload - 1;
   end record;
 
-  constant c_reg_rst : t_reg := (s_first, (others => '1'), (others => '0'), (others => '0'), (others => '0'), (others => '0'), 0, 0, 0, 0, 0);
+  constant c_reg_rst : t_reg := (s_first, (others => '1'), (others => '0'), (others => '0'), (others => '0'), 0, 0, 0, 0, 0, 0);
   signal d, q : t_reg;
+
+  signal use_immediate            : std_logic;
+  signal use_msg_cnt_as_immediate : std_logic;
+  signal immediate_data           : std_logic_vector(c_rdma_packetiser_imm_len * c_octet_w - 1 downto 0) := (others => '0');
+  signal nof_packets_in_msg       : std_logic_vector(c_word_w - 1 downto 0);
+  signal nof_msg                  : std_logic_vector(c_word_w - 1 downto 0);
+  signal dma_len                  : std_logic_vector(c_word_w - 1 downto 0); -- = block_len * nof_packets_in_msg
+  signal start_address            : std_logic_vector(c_longword_w - 1 downto 0);
+
+  signal hdr_fields_slv_out_mm    : std_logic_vector(1023 downto 0) := (others => '0');
+  signal hdr_fields_slv_in        : std_logic_vector(1023 downto 0) := (others => '0');
+  signal hdr_fields_slv_in_first  : std_logic_vector(1023 downto 0) := (others => '0');
+  signal hdr_fields_slv_in_mid    : std_logic_vector(1023 downto 0) := (others => '0');
+  signal hdr_fields_slv_in_last   : std_logic_vector(1023 downto 0) := (others => '0');
+  signal hdr_fields_slv_in_wo     : std_logic_vector(1023 downto 0) := (others => '0');
+
+  signal dp_demux_src_out_arr     : t_dp_sosi_arr(c_nof_offload - 1 downto 0) := (others => c_dp_sosi_rst);
+  signal dp_demux_src_in_arr      : t_dp_siso_arr(c_nof_offload - 1 downto 0) := (others => c_dp_siso_rdy);
+  signal dp_mux_snk_in_arr        : t_dp_sosi_arr(c_nof_offload - 1 downto 0) := (others => c_dp_sosi_rst);
+  signal dp_mux_snk_out_arr       : t_dp_siso_arr(c_nof_offload - 1 downto 0) := (others => c_dp_siso_rdy);
+  signal dp_offload_first_snk_in  : t_dp_sosi := c_dp_sosi_rst;
+  signal dp_offload_first_snk_out : t_dp_siso := c_dp_siso_rdy;
+  signal dp_offload_first_src_out : t_dp_sosi := c_dp_sosi_rst;
+  signal dp_offload_first_src_in  : t_dp_siso := c_dp_siso_rdy;
+  signal dp_offload_mid_snk_in    : t_dp_sosi := c_dp_sosi_rst;
+  signal dp_offload_mid_snk_out   : t_dp_siso := c_dp_siso_rdy;
+  signal dp_offload_mid_src_out   : t_dp_sosi := c_dp_sosi_rst;
+  signal dp_offload_mid_src_in    : t_dp_siso := c_dp_siso_rdy;
+  signal dp_offload_last_snk_in   : t_dp_sosi := c_dp_sosi_rst;
+  signal dp_offload_last_snk_out  : t_dp_siso := c_dp_siso_rdy;
+  signal dp_offload_last_src_out  : t_dp_sosi := c_dp_sosi_rst;
+  signal dp_offload_last_src_in   : t_dp_siso := c_dp_siso_rdy;
+  signal dp_offload_wo_snk_in     : t_dp_sosi := c_dp_sosi_rst;
+  signal dp_offload_wo_snk_out    : t_dp_siso := c_dp_siso_rdy;
+  signal dp_offload_wo_src_out    : t_dp_sosi := c_dp_sosi_rst;
+  signal dp_offload_wo_src_in     : t_dp_siso := c_dp_siso_rdy;
+  signal dp_pipeline_src_out      : t_dp_sosi := c_dp_sosi_rst;
+
 begin
-  q <= d when rising_edge(st_clk);
+  immediate_data           <=    hdr_fields_slv_out_mm(field_hi(c_rdma_packetiser_mm_field_arr, "immediate_data"                 ) downto field_lo(c_rdma_packetiser_mm_field_arr, "immediate_data"                 )); 
+  use_immediate            <= sl(hdr_fields_slv_out_mm(field_hi(c_rdma_packetiser_mm_field_arr, "config_use_immediate"           ) downto field_lo(c_rdma_packetiser_mm_field_arr, "config_use_immediate"           ))); 
+  use_msg_cnt_as_immediate <= sl(hdr_fields_slv_out_mm(field_hi(c_rdma_packetiser_mm_field_arr, "config_use_msg_cnt_as_immediate") downto field_lo(c_rdma_packetiser_mm_field_arr, "config_use_msg_cnt_as_immediate"))); 
+  nof_packets_in_msg       <=    hdr_fields_slv_out_mm(field_hi(c_rdma_packetiser_mm_field_arr, "config_nof_packets_in_msg"      ) downto field_lo(c_rdma_packetiser_mm_field_arr, "config_nof_packets_in_msg"      )); 
+  nof_msg                  <=    hdr_fields_slv_out_mm(field_hi(c_rdma_packetiser_mm_field_arr, "config_nof_msg"                 ) downto field_lo(c_rdma_packetiser_mm_field_arr, "config_nof_msg"                 )); 
+  dma_len                  <=    hdr_fields_slv_out_mm(field_hi(c_rdma_packetiser_mm_field_arr, "reth_dma_length"                ) downto field_lo(c_rdma_packetiser_mm_field_arr, "reth_dma_length"                )); 
+  start_address            <=    hdr_fields_slv_out_mm(field_hi(c_rdma_packetiser_mm_field_arr, "config_start_address"           ) downto field_lo(c_rdma_packetiser_mm_field_arr, "config_start_address"           )); 
 
   -- State machine to derive RDMA header fields.
-  p_comb : process(st_rst, q, snk_in, nof_packets_in_msg, start_address, nof_msg, immediate_data, dma_len, block_len)
+  q <= d when rising_edge(st_clk);
+  p_comb : process(st_rst, q, snk_in, nof_packets_in_msg, start_address, nof_msg, immediate_data, dma_len, block_len, use_immediate)
     variable v : t_reg;
   begin
     v := q;
@@ -134,19 +179,24 @@ begin
 
           if v.nof_packets_in_msg = 1 then -- set opcode to write_only.
             v.opcode := c_rdma_packetiser_opcode_uc_write_only;
-            if g_use_immediate then  -- set opcode to write_only with immediate data.
+            v.sel_ctrl := 0;
+            if use_immediate = '1' then  -- set opcode to write_only with immediate data.
               v.opcode := c_rdma_packetiser_opcode_uc_write_only_imm;
+            v.sel_ctrl := 3;
             end if;
           elsif v.nof_packets_in_msg = 2 then -- set opcode to write_first.
             v.state := s_last; -- next state is last as there are only 2 packets.
             v.opcode := c_rdma_packetiser_opcode_uc_write_first;
+            v.sel_ctrl := 0;
           elsif v.nof_packets_in_msg > 2 then
             v.state := s_middle;
             v.opcode := c_rdma_packetiser_opcode_uc_write_first;
+            v.sel_ctrl := 0;
           end if;
 
         when s_middle => -- wait unitl the first packet is done and set next opcode.
           v.opcode := c_rdma_packetiser_opcode_uc_write_middle;
+            v.sel_ctrl := 1;
           if q.p_cnt >= v.nof_packets_in_msg - 2 then -- wait until last middle packet
             v.state := s_last;
           end if;
@@ -154,8 +204,10 @@ begin
         when s_last => -- next packet must be last packet, set opcode.
           v.state := s_first;
           v.opcode := c_rdma_packetiser_opcode_uc_write_last;
-          if g_use_immediate then -- set opcode to write_last with immediate data
+            v.sel_ctrl := 1;
+          if use_immediate = '1' then -- set opcode to write_last with immediate data
             v.opcode := c_rdma_packetiser_opcode_uc_write_last_imm;
+            v.sel_ctrl := 2;
           end if;
       end case;
     end if;
@@ -163,10 +215,9 @@ begin
     if v.msg_cnt = 0 then -- set on new message
       v.virtual_address    := unsigned(start_address);
       v.dma_len            := unsigned(dma_len);
-      v.udp_total_length   := c_udp_app_hdr_length    + to_uint(block_len) + c_rdma_packetiser_roce_icrc_len;
-      v.ip_total_length    := c_ip_udp_app_hdr_length + to_uint(block_len) + c_rdma_packetiser_roce_icrc_len;
+      v.udp_total_length   := c_udp_app_hdr_length    + to_uint(block_len) + c_rdma_packetiser_icrc_len;
+      v.ip_total_length    := c_ip_udp_app_hdr_length + to_uint(block_len) + c_rdma_packetiser_icrc_len;
       v.nof_packets_in_msg := to_uint(nof_packets_in_msg);
-      v.immediate_data     := immediate_data;
     end if;
 
     if st_rst = '1' then
@@ -176,20 +227,228 @@ begin
     d <= v;
   end process;
 
-  hdr_fields_slv(field_hi(c_hdr_field_arr, "ip_total_length"     ) downto field_lo(c_hdr_field_arr, "ip_total_length"     )) <= TO_UVEC(q.ip_total_length, 16);
-  hdr_fields_slv(field_hi(c_hdr_field_arr, "udp_total_length"    ) downto field_lo(c_hdr_field_arr, "udp_total_length"    )) <= TO_UVEC(q.udp_total_length, 16);
-  hdr_fields_slv(field_hi(c_hdr_field_arr, "bth_opcode"          ) downto field_lo(c_hdr_field_arr, "bth_opcode"          )) <= q.opcode;
-  hdr_fields_slv(field_hi(c_hdr_field_arr, "bth_psn"             ) downto field_lo(c_hdr_field_arr, "bth_psn"             )) <= q.psn;
-  hdr_fields_slv(field_hi(c_hdr_field_arr, "reth_virtual_address") downto field_lo(c_hdr_field_arr, "reth_virtual_address")) <= std_logic_vector(q.virtual_address);
-  hdr_fields_slv(field_hi(c_hdr_field_arr, "reth_dma_length"     ) downto field_lo(c_hdr_field_arr, "reth_dma_length"     )) <= std_logic_vector(q.dma_len);
-
-  gen_use_immediate : if g_use_immediate generate
-    gen_use_msg_cnt : if g_use_msg_cnt_as_immediate generate
-      hdr_fields_slv(field_hi(c_hdr_field_arr, "immediate_data") downto field_lo(c_hdr_field_arr, "immediate_data")) <= TO_UVEC(q.msg_cnt, 32);
-    end generate;
-
-    gen_use_no_msg_cnt : if not g_use_msg_cnt_as_immediate generate
-      hdr_fields_slv(field_hi(c_hdr_field_arr, "immediate_data") downto field_lo(c_hdr_field_arr, "immediate_data")) <= q.immediate_data;
-    end generate;
-  end generate;
+  -------------------------------------------------------------------------------
+  -- Wire the header fields 
+  -------------------------------------------------------------------------------
+  hdr_fields_slv_in(field_hi(c_rdma_packetiser_mm_field_arr, "ip_total_length"     ) downto field_lo(c_rdma_packetiser_mm_field_arr, "ip_total_length"     )) <= TO_UVEC(q.ip_total_length, 16);
+  hdr_fields_slv_in(field_hi(c_rdma_packetiser_mm_field_arr, "udp_total_length"    ) downto field_lo(c_rdma_packetiser_mm_field_arr, "udp_total_length"    )) <= TO_UVEC(q.udp_total_length, 16);
+  hdr_fields_slv_in(field_hi(c_rdma_packetiser_mm_field_arr, "bth_opcode"          ) downto field_lo(c_rdma_packetiser_mm_field_arr, "bth_opcode"          )) <= q.opcode;
+  hdr_fields_slv_in(field_hi(c_rdma_packetiser_mm_field_arr, "bth_psn"             ) downto field_lo(c_rdma_packetiser_mm_field_arr, "bth_psn"             )) <= q.psn;
+  hdr_fields_slv_in(field_hi(c_rdma_packetiser_mm_field_arr, "reth_virtual_address") downto field_lo(c_rdma_packetiser_mm_field_arr, "reth_virtual_address")) <= std_logic_vector(q.virtual_address);
+  hdr_fields_slv_in(field_hi(c_rdma_packetiser_mm_field_arr, "reth_dma_length"     ) downto field_lo(c_rdma_packetiser_mm_field_arr, "reth_dma_length"     )) <= std_logic_vector(q.dma_len);
+
+  -------------------------------------------------------------------------------
+  -- demux to guide the incoming stream to the correct dp_offload_tx_v3 
+  -------------------------------------------------------------------------------
+  u_dp_demux : entity dp_lib.dp_demux
+  generic map (
+    g_mode            => 2,
+    g_nof_output      => c_nof_offload,
+    g_combined        => false,
+    g_sel_ctrl_invert => true,
+    g_sel_ctrl_pkt    => true
+  )
+  port map (
+    rst         => st_rst,
+    clk         => st_clk,
+
+    sel_ctrl    => q.sel_ctrl,
+    
+    snk_in      => snk_in,
+    snk_out     => snk_out,
+    
+    src_out_arr => dp_demux_src_out_arr,
+    src_in_arr  => dp_demux_src_in_arr
+  );
+
+  -- Wire demux outputs to dp_offload inputs.
+  dp_offload_first_snk_in <= dp_demux_src_out_arr(0);
+  dp_offload_mid_snk_in   <= dp_demux_src_out_arr(1);
+  dp_offload_last_snk_in  <= dp_demux_src_out_arr(2);
+  dp_offload_wo_snk_in    <= dp_demux_src_out_arr(3);
+  dp_demux_src_in_arr(0)  <= dp_offload_first_snk_out;
+  dp_demux_src_in_arr(1)  <= dp_offload_mid_snk_out;
+  dp_demux_src_in_arr(2)  <= dp_offload_last_snk_out;
+  dp_demux_src_in_arr(3)  <= dp_offload_wo_snk_out;
+
+  -- Wire header fields for every do_offload
+  p_wire_headers : process(hdr_fields_slv_out_mm, use_msg_cnt_as_immediate, q)
+  begin
+    -- set headers.
+    hdr_fields_slv_in_first <= field_select_subset(c_rdma_packetiser_first_hdr_field_arr, 
+                                                   c_rdma_packetiser_mm_field_arr, 
+                                                   hdr_fields_slv_out_mm);
+    hdr_fields_slv_in_mid   <= field_select_subset(c_rdma_packetiser_last_hdr_field_arr, 
+                                                   c_rdma_packetiser_mm_field_arr, 
+                                                   hdr_fields_slv_out_mm);
+    hdr_fields_slv_in_last  <= field_select_subset(c_rdma_packetiser_last_hdr_field_arr, 
+                                                   c_rdma_packetiser_mm_field_arr, 
+                                                   hdr_fields_slv_out_mm);
+    hdr_fields_slv_in_wo    <= field_select_subset(c_rdma_packetiser_wo_hdr_field_arr, 
+                                                   c_rdma_packetiser_mm_field_arr, 
+                                                   hdr_fields_slv_out_mm);
+
+    if use_msg_cnt_as_immediate = '1' then -- set immediate data to msg_cnt when use_msg_cnt_as_immediate = '1'
+      hdr_fields_slv_in_last(field_hi(c_rdma_packetiser_last_hdr_field_arr, "immediate_data") downto 
+                             field_lo(c_rdma_packetiser_last_hdr_field_arr, "immediate_data")) <= TO_UVEC(q.msg_cnt, 32);
+      hdr_fields_slv_in_wo(  field_hi(c_rdma_packetiser_wo_hdr_field_arr,   "immediate_data") downto 
+                             field_lo(c_rdma_packetiser_wo_hdr_field_arr,   "immediate_data")) <= TO_UVEC(q.msg_cnt, 32);
+    end if;
+  end process;
+
+  -------------------------------------------------------------------------------
+  -- Header for first packets or write only without immediate data 
+  -------------------------------------------------------------------------------
+  u_dp_offload_first : entity dp_lib.dp_offload_tx_v3
+  generic map (
+    g_nof_streams    => 1,
+    g_data_w         => g_data_w,
+    g_symbol_w       => c_octet_w,
+    g_hdr_field_arr  => c_rdma_packetiser_first_hdr_field_arr,
+    g_hdr_field_sel  => c_rdma_packetiser_first_hdr_field_sel,
+    g_pipeline_ready => true
+  )
+  port map (
+    dp_rst                => st_rst,
+    dp_clk                => st_clk,
+    snk_in_arr(0)         => dp_offload_first_snk_in,
+    snk_out_arr(0)        => dp_offload_first_snk_out,
+    src_out_arr(0)        => dp_offload_first_src_out,
+    src_in_arr(0)         => dp_offload_first_src_in,
+    hdr_fields_in_arr(0)  => hdr_fields_slv_in_first
+  );
+
+  -------------------------------------------------------------------------------
+  -- Header for middle or last without immediate data (no RETH, no immediate data)
+  -------------------------------------------------------------------------------
+  u_dp_offload_mid : entity dp_lib.dp_offload_tx_v3
+  generic map (
+    g_nof_streams    => 1,
+    g_data_w         => g_data_w,
+    g_symbol_w       => c_octet_w,
+    g_hdr_field_arr  => c_rdma_packetiser_mid_hdr_field_arr,
+    g_hdr_field_sel  => c_rdma_packetiser_mid_hdr_field_sel,
+    g_pipeline_ready => true
+  )
+  port map (
+    dp_rst                => st_rst,
+    dp_clk                => st_clk,
+    snk_in_arr(0)         => dp_offload_mid_snk_in,
+    snk_out_arr(0)        => dp_offload_mid_snk_out,
+    src_out_arr(0)        => dp_offload_mid_src_out,
+    src_in_arr(0)         => dp_offload_mid_src_in,
+    hdr_fields_in_arr(0)  => hdr_fields_slv_in_mid
+  );
+
+  -------------------------------------------------------------------------------
+  -- Header for last packets with immediate data
+  -------------------------------------------------------------------------------
+  u_dp_offload_last : entity dp_lib.dp_offload_tx_v3
+  generic map (
+    g_nof_streams    => 1,
+    g_data_w         => g_data_w,
+    g_symbol_w       => c_octet_w,
+    g_hdr_field_arr  => c_rdma_packetiser_last_hdr_field_arr,
+    g_hdr_field_sel  => c_rdma_packetiser_last_hdr_field_sel,
+    g_pipeline_ready => true
+  )
+  port map (
+    dp_rst                => st_rst,
+    dp_clk                => st_clk,
+    snk_in_arr(0)         => dp_offload_last_snk_in,
+    snk_out_arr(0)        => dp_offload_last_snk_out,
+    src_out_arr(0)        => dp_offload_last_src_out,
+    src_in_arr(0)         => dp_offload_last_src_in,
+    hdr_fields_in_arr(0)  => hdr_fields_slv_in_last 
+  );
+
+  -------------------------------------------------------------------------------
+  -- Header for write only packets with immediate data 
+  -------------------------------------------------------------------------------
+  u_dp_offload_wo : entity dp_lib.dp_offload_tx_v3
+  generic map (
+    g_nof_streams    => 1,
+    g_data_w         => g_data_w,
+    g_symbol_w       => c_octet_w,
+    g_hdr_field_arr  => c_rdma_packetiser_wo_hdr_field_arr,
+    g_hdr_field_sel  => c_rdma_packetiser_wo_hdr_field_sel,
+    g_pipeline_ready => true
+  )
+  port map (
+    dp_rst                => st_rst,
+    dp_clk                => st_clk,
+    snk_in_arr(0)         => dp_offload_wo_snk_in,
+    snk_out_arr(0)        => dp_offload_wo_snk_out,
+    src_out_arr(0)        => dp_offload_wo_src_out,
+    src_in_arr(0)         => dp_offload_wo_src_in,
+    hdr_fields_in_arr(0)  => hdr_fields_slv_in_wo
+  );
+
+  -------------------------------------------------------------------------------
+  -- Using extra dp_offload_tx_v3 only for MM that contains all headers + config register
+  -------------------------------------------------------------------------------
+  -- DP pipeline to correct for state machine latency
+  u_dp_pipeline : entity dp_lib.dp_pipeline
+  port map (
+    rst => st_rst,
+    clk => st_clk,
+    snk_out => open,
+    snk_in  => snk_in,
+    src_out => dp_pipeline_src_out
+  );
+
+  -- dp_offload_tx_v3
+  u_dp_offload_tx : entity dp_lib.dp_offload_tx_v3
+  generic map (
+    g_nof_streams    => 1,
+    g_data_w         => g_data_w,
+    g_symbol_w       => c_octet_w,
+    g_hdr_field_arr  => c_rdma_packetiser_mm_field_arr,
+    g_hdr_field_sel  => c_rdma_packetiser_mm_field_sel,
+    g_pipeline_ready => true
+  )
+  port map (
+    mm_rst                => mm_rst,
+    mm_clk                => mm_clk,
+    dp_rst                => st_rst,
+    dp_clk                => st_clk,
+    reg_hdr_dat_mosi      => reg_hdr_dat_copi, 
+    reg_hdr_dat_miso      => reg_hdr_dat_cipo, 
+    snk_in_arr(0)         => dp_pipeline_src_out,
+    hdr_fields_in_arr(0)  => hdr_fields_slv_in,
+    hdr_fields_out_arr(0) => hdr_fields_slv_out_mm
+  );
+
+  -------------------------------------------------------------------------------
+  -- Mux to merge the packets from the different dp_offload_tx_v3
+  -------------------------------------------------------------------------------
+  -- Wire demux outputs to dp_offload inputs.
+  dp_mux_snk_in_arr(0)    <= dp_offload_first_src_out;
+  dp_mux_snk_in_arr(1)    <= dp_offload_mid_src_out;
+  dp_mux_snk_in_arr(2)    <= dp_offload_last_src_out;
+  dp_mux_snk_in_arr(3)    <= dp_offload_wo_src_out;
+  dp_offload_first_src_in <= dp_mux_snk_out_arr(0);
+  dp_offload_mid_src_in   <= dp_mux_snk_out_arr(1);
+  dp_offload_last_src_in  <= dp_mux_snk_out_arr(2);
+  dp_offload_wo_src_in    <= dp_mux_snk_out_arr(3);
+
+  u_dp_mux : entity dp_lib.dp_mux
+  generic map (
+    g_mode              => 2,
+    g_nof_input         => c_nof_offload,
+    g_append_channel_lo => false,
+    g_sel_ctrl_invert   => true,
+    g_use_fifo          => false, -- fifo is not needed as the inputs cannot occur simultaneously.
+    g_fifo_size         => array_init(0, c_nof_offload),
+    g_fifo_fill         => array_init(0, c_nof_offload)
+  )
+  port map (
+    rst         => st_rst,
+    clk         => st_clk,
+    sel_ctrl    => q.sel_ctrl,
+    snk_in_arr  => dp_mux_snk_in_arr,
+    snk_out_arr => dp_mux_snk_out_arr,
+    src_out     => src_out,
+    src_in      => src_in
+  );
 end str;
diff --git a/applications/rdma_demo/libraries/rdma_packetiser/src/vhdl/rdma_packetiser_pkg.vhd b/applications/rdma_demo/libraries/rdma_packetiser/src/vhdl/rdma_packetiser_pkg.vhd
index bebc9205415d93fa2e33336d48b6a880852ae40c..5e25e0595cf0bd88da6f25f562015442ce8a23d2 100644
--- a/applications/rdma_demo/libraries/rdma_packetiser/src/vhdl/rdma_packetiser_pkg.vhd
+++ b/applications/rdma_demo/libraries/rdma_packetiser/src/vhdl/rdma_packetiser_pkg.vhd
@@ -86,112 +86,261 @@ package rdma_packetiser_pkg is
   --   = 0 can be achieve via data path and default hdr_fields_in_arr = 0 or
   --   via MM controlled and field_default(0).
 
-  -- RoCEv2 header for RDMA operation with immediate data
-  -- ETH + IP + UDP + Base Transport Header (BTH) + RDMA Extended Transport Header (RETH) + Immediate Data
-  constant c_rdma_packetiser_roce_nof_hdr_fields : natural := 3 + 12 + 4 + 13 + 3 + 1;
-  constant c_rdma_packetiser_roce_hdr_field_sel  : std_logic_vector(c_rdma_packetiser_roce_nof_hdr_fields - 1 downto 0) :=  "111" & "111011111001" & "0100" & "1111111111111" & "111" & "1";
-
-  constant c_rdma_packetiser_roce_hdr_field_arr : t_common_field_arr(
-    c_rdma_packetiser_roce_nof_hdr_fields - 1 downto 0) := (
-    ( field_name_pad("eth_dst_mac"         ), "RW", 48, field_default(0) ), -- set by M&C
-    ( field_name_pad("eth_src_mac"         ), "RW", 48, field_default(0) ), -- set by M&C
-    ( field_name_pad("eth_type"            ), "RW", 16, field_default(x"0800") ), -- fixed
-
-    ( field_name_pad("ip_version"          ), "RW",  4, field_default(4) ), -- fixed
-    ( field_name_pad("ip_header_length"    ), "RW",  4, field_default(5) ), -- fixed
-    ( field_name_pad("ip_services"         ), "RW",  8, field_default(0) ), -- fixed
-    ( field_name_pad("ip_total_length"     ), "RW", 16, field_default(0) ), -- set by data path
-    ( field_name_pad("ip_identification"   ), "RW", 16, field_default(0) ), -- fixed
-    ( field_name_pad("ip_flags"            ), "RW",  3, field_default(2) ), -- fixed
-    ( field_name_pad("ip_fragment_offset"  ), "RW", 13, field_default(0) ), -- fixed
-    ( field_name_pad("ip_time_to_live"     ), "RW",  8, field_default(127) ), -- fixed
-    ( field_name_pad("ip_protocol"         ), "RW",  8, field_default(17) ), -- fixed
-    ( field_name_pad("ip_header_checksum"  ), "RW", 16, field_default(0) ), -- set by data path
-    ( field_name_pad("ip_src_addr"         ), "RW", 32, field_default(0) ), -- set by M&C
-    ( field_name_pad("ip_dst_addr"         ), "RW", 32, field_default(0) ), -- set by M&C
-
-    ( field_name_pad("udp_src_port"        ), "RW", 16, field_default(0) ), -- set by M&C
-    ( field_name_pad("udp_dst_port"        ), "RW", 16, field_default(0) ), -- set by M&C
-    ( field_name_pad("udp_total_length"    ), "RW", 16, field_default(0) ), -- set by data path
-    ( field_name_pad("udp_checksum"        ), "RW", 16, field_default(0) ), -- fixed
-
-    ( field_name_pad("bth_opcode"          ), "RW",  8, field_default(x"FF") ), -- set by data path
-    ( field_name_pad("bth_se"              ), "RW",  1, field_default(0) ), -- set by M&C
-    ( field_name_pad("bth_m"               ), "RW",  1, field_default(0) ), -- set by M&C
-    ( field_name_pad("bth_pad"             ), "RW",  2, field_default(0) ), -- set by M&C
-    ( field_name_pad("bth_tver"            ), "RW",  4, field_default(0) ), -- set by M&C
-    ( field_name_pad("bth_partition_key"   ), "RW", 16, field_default(65535) ), -- set by M&C
-    ( field_name_pad("bth_fres"            ), "RW",  1, field_default(0) ), -- set by M&C
-    ( field_name_pad("bth_bres"            ), "RW",  1, field_default(0) ), -- set by M&C
-    ( field_name_pad("bth_reserved_a"      ), "RW",  6, field_default(0) ), -- fixed
-    ( field_name_pad("bth_dest_qp"         ), "RW", 16, field_default(0) ), -- set by M&C
-    ( field_name_pad("bth_ack_req"         ), "RW",  1, field_default(0) ), -- set by M&C
-    ( field_name_pad("bth_reserved_b"      ), "RW",  7, field_default(0) ), -- fixed
-    ( field_name_pad("bth_psn"             ), "RW", 32, field_default(0) ), -- set by data path
-
-    ( field_name_pad("reth_virtual_address"), "RW", 64, field_default(0) ), -- set by data path
-    ( field_name_pad("reth_r_key"          ), "RW", 32, field_default(0) ), -- set by M&C
-    ( field_name_pad("reth_dma_length"     ), "RW", 32, field_default(0) ), -- set by M&C
-
-    ( field_name_pad("immediate_data"      ), "RW", 32, field_default(0) ) -- set by data path or M&C
-    );
-  constant c_rdma_packetiser_roce_reg_hdr_dat_addr_w    : natural := ceil_log2(field_nof_words(c_rdma_packetiser_roce_hdr_field_arr, c_word_w));
-  constant c_rdma_packetiser_roce_reg_hdr_dat_addr_span : natural := 2**c_rdma_packetiser_roce_reg_hdr_dat_addr_w;
-
-  -- RoCEv2 header for RDMA operation without immediate data
-  -- ETH + IP + UDP + Base Transport Header (BTH) + RDMA Extended Transport Header (RETH), so no immediate data ("no_imm").
-  constant c_rdma_packetiser_roce_no_imm_nof_hdr_fields : natural := 3 + 12 + 4 + 13 + 3;
-  constant c_rdma_packetiser_roce_no_imm_hdr_field_sel  : std_logic_vector(c_rdma_packetiser_roce_no_imm_nof_hdr_fields - 1 downto 0) :=  "111" & "111011111001" & "0100" & "1111111111111" & "111";
-
-  constant c_rdma_packetiser_roce_no_imm_hdr_field_arr : t_common_field_arr(
-    c_rdma_packetiser_roce_no_imm_nof_hdr_fields - 1 downto 0) := (
-    ( field_name_pad("eth_dst_mac"         ), "RW", 48, field_default(0) ), -- set by M&C
-    ( field_name_pad("eth_src_mac"         ), "RW", 48, field_default(0) ), -- set by M&C
-    ( field_name_pad("eth_type"            ), "RW", 16, field_default(x"0800") ), -- fixed
-
-    ( field_name_pad("ip_version"          ), "RW",  4, field_default(4) ), -- fixed
-    ( field_name_pad("ip_header_length"    ), "RW",  4, field_default(5) ), -- fixed
-    ( field_name_pad("ip_services"         ), "RW",  8, field_default(0) ), -- fixed
-    ( field_name_pad("ip_total_length"     ), "RW", 16, field_default(0) ), -- set by data path
-    ( field_name_pad("ip_identification"   ), "RW", 16, field_default(0) ), -- fixed
-    ( field_name_pad("ip_flags"            ), "RW",  3, field_default(2) ), -- fixed
-    ( field_name_pad("ip_fragment_offset"  ), "RW", 13, field_default(0) ), -- fixed
-    ( field_name_pad("ip_time_to_live"     ), "RW",  8, field_default(127) ), -- fixed
-    ( field_name_pad("ip_protocol"         ), "RW",  8, field_default(17) ), -- fixed
-    ( field_name_pad("ip_header_checksum"  ), "RW", 16, field_default(0) ), -- set by data path
-    ( field_name_pad("ip_src_addr"         ), "RW", 32, field_default(0) ), -- set by M&C
-    ( field_name_pad("ip_dst_addr"         ), "RW", 32, field_default(0) ), -- set by M&C
-
-    ( field_name_pad("udp_src_port"        ), "RW", 16, field_default(0) ), -- set by M&C
-    ( field_name_pad("udp_dst_port"        ), "RW", 16, field_default(0) ), -- set by M&C
-    ( field_name_pad("udp_total_length"    ), "RW", 16, field_default(0) ), -- set by data path
-    ( field_name_pad("udp_checksum"        ), "RW", 16, field_default(0) ), -- fixed
-
-    ( field_name_pad("bth_opcode"          ), "RW",  8, field_default(x"FF") ), -- set by data path
-    ( field_name_pad("bth_se"              ), "RW",  1, field_default(0) ), -- set by M&C
-    ( field_name_pad("bth_m"               ), "RW",  1, field_default(0) ), -- set by M&C
-    ( field_name_pad("bth_pad"             ), "RW",  2, field_default(0) ), -- set by M&C
-    ( field_name_pad("bth_tver"            ), "RW",  4, field_default(0) ), -- set by M&C
-    ( field_name_pad("bth_partition_key"   ), "RW", 16, field_default(65535) ), -- set by M&C
-    ( field_name_pad("bth_fres"            ), "RW",  1, field_default(0) ), -- set by M&C
-    ( field_name_pad("bth_bres"            ), "RW",  1, field_default(0) ), -- set by M&C
-    ( field_name_pad("bth_reserved_a"      ), "RW",  6, field_default(0) ), -- fixed
-    ( field_name_pad("bth_dest_qp"         ), "RW", 16, field_default(0) ), -- set by M&C
-    ( field_name_pad("bth_ack_req"         ), "RW",  1, field_default(0) ), -- set by M&C
-    ( field_name_pad("bth_reserved_b"      ), "RW",  7, field_default(0) ), -- fixed
-    ( field_name_pad("bth_psn"             ), "RW", 32, field_default(0) ), -- set by data path
-
-    ( field_name_pad("reth_virtual_address"), "RW", 64, field_default(0) ), -- set by data path
-    ( field_name_pad("reth_r_key"          ), "RW", 32, field_default(0) ), -- set by M&C
-    ( field_name_pad("reth_dma_length"     ), "RW", 32, field_default(0) ) -- set by M&C
-    );
-  constant c_rdma_packetiser_roce_reg_no_imm_hdr_dat_addr_w    : natural := ceil_log2(field_nof_words(c_rdma_packetiser_roce_no_imm_hdr_field_arr, c_word_w));
-  constant c_rdma_packetiser_roce_reg_no_imm_hdr_dat_addr_span : natural := 2**c_rdma_packetiser_roce_reg_no_imm_hdr_dat_addr_w;
-
-  constant c_rdma_packetiser_roce_hdr_len         : natural := 32;  -- octets
-  constant c_rdma_packetiser_roce_imm_len         : natural := 4;   -- octets
-  constant c_rdma_packetiser_roce_icrc_len        : natural := 4;   -- octets
-  constant c_rdma_packetiser_roce_no_imm_hdr_len  : natural := c_rdma_packetiser_roce_hdr_len - c_rdma_packetiser_roce_imm_len;
+  -- RoCEv2 header for RDMA operation with all possible fields and user config fields
+  -- ETH + IP + UDP + Base Transport Header (BTH) + RDMA Extended Transport Header (RETH) + Immediate Data + user config
+  -- Primarly used as a MM register map to provide the data needed for all header variants.
+  constant c_rdma_packetiser_mm_nof_fields : natural := 3 + 12 + 4 + 13 + 3 + 1 + 6;
+  constant c_rdma_packetiser_mm_field_sel  : std_logic_vector(c_rdma_packetiser_mm_nof_fields - 1 downto 0) :=  "111" & "111011111011" & "1100" & "0111111111110" & "010" & "1" & "011111";
+
+  constant c_rdma_packetiser_mm_field_arr : t_common_field_arr(
+    c_rdma_packetiser_mm_nof_fields - 1 downto 0) := (
+    ( field_name_pad("eth_dst_mac"                    ), "RW", 48, field_default(0) ), -- set by M&C
+    ( field_name_pad("eth_src_mac"                    ), "RW", 48, field_default(0) ), -- set by M&C
+    ( field_name_pad("eth_type"                       ), "RW", 16, field_default(x"0800") ), -- fixed
+
+    ( field_name_pad("ip_version"                     ), "RW",  4, field_default(4) ), -- fixed
+    ( field_name_pad("ip_header_length"               ), "RW",  4, field_default(5) ), -- fixed
+    ( field_name_pad("ip_services"                    ), "RW",  8, field_default(0) ), -- fixed
+    ( field_name_pad("ip_total_length"                ), "RW", 16, field_default(0) ), -- set by data path
+    ( field_name_pad("ip_identification"              ), "RW", 16, field_default(0) ), -- fixed
+    ( field_name_pad("ip_flags"                       ), "RW",  3, field_default(2) ), -- fixed
+    ( field_name_pad("ip_fragment_offset"             ), "RW", 13, field_default(0) ), -- fixed
+    ( field_name_pad("ip_time_to_live"                ), "RW",  8, field_default(127) ), -- fixed
+    ( field_name_pad("ip_protocol"                    ), "RW",  8, field_default(17) ), -- fixed
+    ( field_name_pad("ip_header_checksum"             ), "RW", 16, field_default(0) ), -- set by data path
+    ( field_name_pad("ip_src_addr"                    ), "RW", 32, field_default(0) ), -- set by M&C
+    ( field_name_pad("ip_dst_addr"                    ), "RW", 32, field_default(0) ), -- set by M&C
+
+    ( field_name_pad("udp_src_port"                   ), "RW", 16, field_default(0) ), -- set by M&C
+    ( field_name_pad("udp_dst_port"                   ), "RW", 16, field_default(0) ), -- set by M&C
+    ( field_name_pad("udp_total_length"               ), "RW", 16, field_default(0) ), -- set by data path
+    ( field_name_pad("udp_checksum"                   ), "RW", 16, field_default(0) ), -- fixed
+
+    ( field_name_pad("bth_opcode"                     ), "RW",  8, field_default(x"FF") ), -- set by data path
+    ( field_name_pad("bth_se"                         ), "RW",  1, field_default(0) ), -- set by M&C
+    ( field_name_pad("bth_m"                          ), "RW",  1, field_default(0) ), -- set by M&C
+    ( field_name_pad("bth_pad"                        ), "RW",  2, field_default(0) ), -- set by M&C
+    ( field_name_pad("bth_tver"                       ), "RW",  4, field_default(0) ), -- set by M&C
+    ( field_name_pad("bth_partition_key"              ), "RW", 16, field_default(65535) ), -- set by M&C
+    ( field_name_pad("bth_fres"                       ), "RW",  1, field_default(0) ), -- set by M&C
+    ( field_name_pad("bth_bres"                       ), "RW",  1, field_default(0) ), -- set by M&C
+    ( field_name_pad("bth_reserved_a"                 ), "RW",  6, field_default(0) ), -- fixed
+    ( field_name_pad("bth_dest_qp"                    ), "RW", 16, field_default(0) ), -- set by M&C
+    ( field_name_pad("bth_ack_req"                    ), "RW",  1, field_default(0) ), -- set by M&C
+    ( field_name_pad("bth_reserved_b"                 ), "RW",  7, field_default(0) ), -- fixed
+    ( field_name_pad("bth_psn"                        ), "RW", 32, field_default(0) ), -- set by data path
+
+    ( field_name_pad("reth_virtual_address"           ), "RW", 64, field_default(0) ), -- set by data path
+    ( field_name_pad("reth_r_key"                     ), "RW", 32, field_default(0) ), -- set by M&C
+    ( field_name_pad("reth_dma_length"                ), "RW", 32, field_default(0) ), -- set by M&C
+
+    ( field_name_pad("immediate_data"                 ), "RW", 32, field_default(0) ), -- set by M&C
+
+    ( field_name_pad("config_reserved"                ), "RW",  6, field_default(0) ), -- fixed
+    ( field_name_pad("config_use_immediate"           ), "RW",  1, field_default(0) ), -- set by M&C
+    ( field_name_pad("config_use_msg_cnt_as_immediate"), "RW",  1, field_default(0) ), -- set by M&C
+    ( field_name_pad("config_nof_packets_in_msg"      ), "RW", 32, field_default(0) ), -- set by M&C
+    ( field_name_pad("config_nof_msg"                 ), "RW", 32, field_default(0) ), -- set by M&C
+    ( field_name_pad("config_start_address"           ), "RW", 64, field_default(0) ) -- set by M&C
+  );
+  constant c_rdma_packetiser_reg_mm_dat_addr_w    : natural := ceil_log2(field_nof_words(c_rdma_packetiser_mm_field_arr, c_word_w));
+  constant c_rdma_packetiser_reg_mm_dat_addr_span : natural := 2**c_rdma_packetiser_reg_mm_dat_addr_w;
+
+
+
+  -- RoCEv2 header for first packets and write only packets without immediate data
+  -- ETH + IP + UDP + Base Transport Header (BTH) + RDMA Extended Transport Header (RETH)
+  constant c_rdma_packetiser_first_nof_hdr_fields : natural := 3 + 12 + 4 + 13 + 3;
+  constant c_rdma_packetiser_first_hdr_field_sel  : std_logic_vector(c_rdma_packetiser_first_nof_hdr_fields - 1 downto 0) :=  (others => '0');
+
+  constant c_rdma_packetiser_first_hdr_field_arr : t_common_field_arr(
+    c_rdma_packetiser_first_nof_hdr_fields - 1 downto 0) := (
+    ( field_name_pad("eth_dst_mac"         ), "RW", 48, field_default(0) ),
+    ( field_name_pad("eth_src_mac"         ), "RW", 48, field_default(0) ), 
+    ( field_name_pad("eth_type"            ), "RW", 16, field_default(x"0800") ),
+
+    ( field_name_pad("ip_version"          ), "RW",  4, field_default(4) ),
+    ( field_name_pad("ip_header_length"    ), "RW",  4, field_default(5) ),
+    ( field_name_pad("ip_services"         ), "RW",  8, field_default(0) ),
+    ( field_name_pad("ip_total_length"     ), "RW", 16, field_default(0) ),
+    ( field_name_pad("ip_identification"   ), "RW", 16, field_default(0) ),
+    ( field_name_pad("ip_flags"            ), "RW",  3, field_default(2) ),
+    ( field_name_pad("ip_fragment_offset"  ), "RW", 13, field_default(0) ),
+    ( field_name_pad("ip_time_to_live"     ), "RW",  8, field_default(127) ),
+    ( field_name_pad("ip_protocol"         ), "RW",  8, field_default(17) ),
+    ( field_name_pad("ip_header_checksum"  ), "RW", 16, field_default(0) ),
+    ( field_name_pad("ip_src_addr"         ), "RW", 32, field_default(0) ),
+    ( field_name_pad("ip_dst_addr"         ), "RW", 32, field_default(0) ),
+
+    ( field_name_pad("udp_src_port"        ), "RW", 16, field_default(0) ),
+    ( field_name_pad("udp_dst_port"        ), "RW", 16, field_default(0) ),
+    ( field_name_pad("udp_total_length"    ), "RW", 16, field_default(0) ),
+    ( field_name_pad("udp_checksum"        ), "RW", 16, field_default(0) ),
+
+    ( field_name_pad("bth_opcode"          ), "RW",  8, field_default(x"FF") ),
+    ( field_name_pad("bth_se"              ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_m"               ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_pad"             ), "RW",  2, field_default(0) ),
+    ( field_name_pad("bth_tver"            ), "RW",  4, field_default(0) ),
+    ( field_name_pad("bth_partition_key"   ), "RW", 16, field_default(65535) ),
+    ( field_name_pad("bth_fres"            ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_bres"            ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_reserved_a"      ), "RW",  6, field_default(0) ),
+    ( field_name_pad("bth_dest_qp"         ), "RW", 16, field_default(0) ),
+    ( field_name_pad("bth_ack_req"         ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_reserved_b"      ), "RW",  7, field_default(0) ),
+    ( field_name_pad("bth_psn"             ), "RW", 32, field_default(0) ), 
+
+    ( field_name_pad("reth_virtual_address"), "RW", 64, field_default(0) ),
+    ( field_name_pad("reth_r_key"          ), "RW", 32, field_default(0) ),
+    ( field_name_pad("reth_dma_length"     ), "RW", 32, field_default(0) ) 
+
+  );
+
+  -- RoCEv2 header for middle packets and last packets without immediate data.
+  -- ETH + IP + UDP + Base Transport Header (BTH)
+  constant c_rdma_packetiser_mid_nof_hdr_fields : natural := 3 + 12 + 4 + 13;
+  constant c_rdma_packetiser_mid_hdr_field_sel  : std_logic_vector(c_rdma_packetiser_mid_nof_hdr_fields - 1 downto 0) :=  (others => '0');
+
+  constant c_rdma_packetiser_mid_hdr_field_arr : t_common_field_arr(
+    c_rdma_packetiser_mid_nof_hdr_fields - 1 downto 0) := (
+    ( field_name_pad("eth_dst_mac"       ), "RW", 48, field_default(0) ),
+    ( field_name_pad("eth_src_mac"       ), "RW", 48, field_default(0) ), 
+    ( field_name_pad("eth_type"          ), "RW", 16, field_default(x"0800") ),
+
+    ( field_name_pad("ip_version"        ), "RW",  4, field_default(4) ),
+    ( field_name_pad("ip_header_length"  ), "RW",  4, field_default(5) ),
+    ( field_name_pad("ip_services"       ), "RW",  8, field_default(0) ),
+    ( field_name_pad("ip_total_length"   ), "RW", 16, field_default(0) ),
+    ( field_name_pad("ip_identification" ), "RW", 16, field_default(0) ),
+    ( field_name_pad("ip_flags"          ), "RW",  3, field_default(2) ),
+    ( field_name_pad("ip_fragment_offset"), "RW", 13, field_default(0) ),
+    ( field_name_pad("ip_time_to_live"   ), "RW",  8, field_default(127) ),
+    ( field_name_pad("ip_protocol"       ), "RW",  8, field_default(17) ),
+    ( field_name_pad("ip_header_checksum"), "RW", 16, field_default(0) ),
+    ( field_name_pad("ip_src_addr"       ), "RW", 32, field_default(0) ),
+    ( field_name_pad("ip_dst_addr"       ), "RW", 32, field_default(0) ),
+
+    ( field_name_pad("udp_src_port"      ), "RW", 16, field_default(0) ),
+    ( field_name_pad("udp_dst_port"      ), "RW", 16, field_default(0) ),
+    ( field_name_pad("udp_total_length"  ), "RW", 16, field_default(0) ),
+    ( field_name_pad("udp_checksum"      ), "RW", 16, field_default(0) ),
+
+    ( field_name_pad("bth_opcode"        ), "RW",  8, field_default(x"FF") ),
+    ( field_name_pad("bth_se"            ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_m"             ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_pad"           ), "RW",  2, field_default(0) ),
+    ( field_name_pad("bth_tver"          ), "RW",  4, field_default(0) ),
+    ( field_name_pad("bth_partition_key" ), "RW", 16, field_default(65535) ),
+    ( field_name_pad("bth_fres"          ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_bres"          ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_reserved_a"    ), "RW",  6, field_default(0) ),
+    ( field_name_pad("bth_dest_qp"       ), "RW", 16, field_default(0) ),
+    ( field_name_pad("bth_ack_req"       ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_reserved_b"    ), "RW",  7, field_default(0) ),
+    ( field_name_pad("bth_psn"           ), "RW", 32, field_default(0) ) 
+  );
+
+  -- RoCEv2 header for last packets with immediate data
+  -- ETH + IP + UDP + Base Transport Header (BTH) + immediate data
+  constant c_rdma_packetiser_last_nof_hdr_fields : natural := 3 + 12 + 4 + 13 + 1;
+  constant c_rdma_packetiser_last_hdr_field_sel  : std_logic_vector(c_rdma_packetiser_last_nof_hdr_fields - 1 downto 0) :=  (others => '0');
+
+  constant c_rdma_packetiser_last_hdr_field_arr : t_common_field_arr(
+    c_rdma_packetiser_last_nof_hdr_fields - 1 downto 0) := (
+    ( field_name_pad("eth_dst_mac"       ), "RW", 48, field_default(0) ),
+    ( field_name_pad("eth_src_mac"       ), "RW", 48, field_default(0) ), 
+    ( field_name_pad("eth_type"          ), "RW", 16, field_default(x"0800") ),
+
+    ( field_name_pad("ip_version"        ), "RW",  4, field_default(4) ),
+    ( field_name_pad("ip_header_length"  ), "RW",  4, field_default(5) ),
+    ( field_name_pad("ip_services"       ), "RW",  8, field_default(0) ),
+    ( field_name_pad("ip_total_length"   ), "RW", 16, field_default(0) ),
+    ( field_name_pad("ip_identification" ), "RW", 16, field_default(0) ),
+    ( field_name_pad("ip_flags"          ), "RW",  3, field_default(2) ),
+    ( field_name_pad("ip_fragment_offset"), "RW", 13, field_default(0) ),
+    ( field_name_pad("ip_time_to_live"   ), "RW",  8, field_default(127) ),
+    ( field_name_pad("ip_protocol"       ), "RW",  8, field_default(17) ),
+    ( field_name_pad("ip_header_checksum"), "RW", 16, field_default(0) ),
+    ( field_name_pad("ip_src_addr"       ), "RW", 32, field_default(0) ),
+    ( field_name_pad("ip_dst_addr"       ), "RW", 32, field_default(0) ),
+
+    ( field_name_pad("udp_src_port"      ), "RW", 16, field_default(0) ),
+    ( field_name_pad("udp_dst_port"      ), "RW", 16, field_default(0) ),
+    ( field_name_pad("udp_total_length"  ), "RW", 16, field_default(0) ),
+    ( field_name_pad("udp_checksum"      ), "RW", 16, field_default(0) ),
+
+    ( field_name_pad("bth_opcode"        ), "RW",  8, field_default(x"FF") ),
+    ( field_name_pad("bth_se"            ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_m"             ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_pad"           ), "RW",  2, field_default(0) ),
+    ( field_name_pad("bth_tver"          ), "RW",  4, field_default(0) ),
+    ( field_name_pad("bth_partition_key" ), "RW", 16, field_default(65535) ),
+    ( field_name_pad("bth_fres"          ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_bres"          ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_reserved_a"    ), "RW",  6, field_default(0) ),
+    ( field_name_pad("bth_dest_qp"       ), "RW", 16, field_default(0) ),
+    ( field_name_pad("bth_ack_req"       ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_reserved_b"    ), "RW",  7, field_default(0) ),
+    ( field_name_pad("bth_psn"           ), "RW", 32, field_default(0) ),
+
+    ( field_name_pad("immediate_data"    ), "RW", 32, field_default(0) )
+  );
+
+  -- RoCEv2 header for write only packets with immediate data
+  -- ETH + IP + UDP + Base Transport Header (BTH) + RDMA Extended Transport Header (RETH) + immediate data
+  constant c_rdma_packetiser_wo_nof_hdr_fields : natural := 3 + 12 + 4 + 13 + 3 + 1;
+  constant c_rdma_packetiser_wo_hdr_field_sel  : std_logic_vector(c_rdma_packetiser_wo_nof_hdr_fields - 1 downto 0) :=  (others => '0');
+
+  constant c_rdma_packetiser_wo_hdr_field_arr : t_common_field_arr(
+    c_rdma_packetiser_wo_nof_hdr_fields - 1 downto 0) := (
+    ( field_name_pad("eth_dst_mac"         ), "RW", 48, field_default(0) ),
+    ( field_name_pad("eth_src_mac"         ), "RW", 48, field_default(0) ), 
+    ( field_name_pad("eth_type"            ), "RW", 16, field_default(x"0800") ),
+
+    ( field_name_pad("ip_version"          ), "RW",  4, field_default(4) ),
+    ( field_name_pad("ip_header_length"    ), "RW",  4, field_default(5) ),
+    ( field_name_pad("ip_services"         ), "RW",  8, field_default(0) ),
+    ( field_name_pad("ip_total_length"     ), "RW", 16, field_default(0) ),
+    ( field_name_pad("ip_identification"   ), "RW", 16, field_default(0) ),
+    ( field_name_pad("ip_flags"            ), "RW",  3, field_default(2) ),
+    ( field_name_pad("ip_fragment_offset"  ), "RW", 13, field_default(0) ),
+    ( field_name_pad("ip_time_to_live"     ), "RW",  8, field_default(127) ),
+    ( field_name_pad("ip_protocol"         ), "RW",  8, field_default(17) ),
+    ( field_name_pad("ip_header_checksum"  ), "RW", 16, field_default(0) ),
+    ( field_name_pad("ip_src_addr"         ), "RW", 32, field_default(0) ),
+    ( field_name_pad("ip_dst_addr"         ), "RW", 32, field_default(0) ),
+
+    ( field_name_pad("udp_src_port"        ), "RW", 16, field_default(0) ),
+    ( field_name_pad("udp_dst_port"        ), "RW", 16, field_default(0) ),
+    ( field_name_pad("udp_total_length"    ), "RW", 16, field_default(0) ),
+    ( field_name_pad("udp_checksum"        ), "RW", 16, field_default(0) ),
+
+    ( field_name_pad("bth_opcode"          ), "RW",  8, field_default(x"FF") ),
+    ( field_name_pad("bth_se"              ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_m"               ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_pad"             ), "RW",  2, field_default(0) ),
+    ( field_name_pad("bth_tver"            ), "RW",  4, field_default(0) ),
+    ( field_name_pad("bth_partition_key"   ), "RW", 16, field_default(65535) ),
+    ( field_name_pad("bth_fres"            ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_bres"            ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_reserved_a"      ), "RW",  6, field_default(0) ),
+    ( field_name_pad("bth_dest_qp"         ), "RW", 16, field_default(0) ),
+    ( field_name_pad("bth_ack_req"         ), "RW",  1, field_default(0) ),
+    ( field_name_pad("bth_reserved_b"      ), "RW",  7, field_default(0) ),
+    ( field_name_pad("bth_psn"             ), "RW", 32, field_default(0) ),
+ 
+    ( field_name_pad("reth_virtual_address"), "RW", 64, field_default(0) ),
+    ( field_name_pad("reth_r_key"          ), "RW", 32, field_default(0) ),
+    ( field_name_pad("reth_dma_length"     ), "RW", 32, field_default(0) ),
+
+    ( field_name_pad("immediate_data"      ), "RW", 32, field_default(0) ) 
+  );
+
+  constant c_rdma_packetiser_bth_len  : natural := 12;  -- octets
+  constant c_rdma_packetiser_reth_len : natural := 16;  -- octets
+  constant c_rdma_packetiser_imm_len  : natural := 4;   -- octets
+  constant c_rdma_packetiser_icrc_len : natural := 4;   -- octets
 
   constant c_rdma_packetiser_opcode_uc_send_first      : std_logic_vector := "001" & "00000";
   constant c_rdma_packetiser_opcode_uc_send_middle     : std_logic_vector := "001" & "00001";
@@ -206,14 +355,13 @@ package rdma_packetiser_pkg is
   constant c_rdma_packetiser_opcode_uc_write_only      : std_logic_vector := "001" & "01010"; -- without immediate
   constant c_rdma_packetiser_opcode_uc_write_only_imm  : std_logic_vector := "001" & "01011"; -- with immediate
 
-  function func_rdma_packetiser_map_header(hdr_fields_raw : std_logic_vector; use_immediate : boolean) return t_rdma_packetiser_roce_header;
-
+  function func_rdma_packetiser_map_header(hdr_fields_raw : std_logic_vector; field_arr : t_common_field_arr) return t_rdma_packetiser_roce_header;
 end rdma_packetiser_pkg;
 
 package body rdma_packetiser_pkg is
-  function func_rdma_packetiser_map_header(hdr_fields_raw : std_logic_vector; use_immediate : boolean) return t_rdma_packetiser_roce_header is
+  function func_rdma_packetiser_map_header(hdr_fields_raw : std_logic_vector; field_arr : t_common_field_arr) return t_rdma_packetiser_roce_header is
     variable v : t_rdma_packetiser_roce_header;
-    constant c_hdr_field_arr : t_common_field_arr := sel_a_b(use_immediate, c_rdma_packetiser_roce_hdr_field_arr, c_rdma_packetiser_roce_no_imm_hdr_field_arr);
+    constant c_hdr_field_arr : t_common_field_arr := field_arr;
   begin
     -- eth header
     v.eth.dst_mac          := hdr_fields_raw(field_hi(c_hdr_field_arr, "eth_dst_mac") downto field_lo(c_hdr_field_arr, "eth_dst_mac"));
@@ -254,15 +402,18 @@ package body rdma_packetiser_pkg is
     v.bth.ack_req          := hdr_fields_raw(field_hi(c_hdr_field_arr, "bth_ack_req")       downto field_lo(c_hdr_field_arr, "bth_ack_req"));
     v.bth.reserved_b       := hdr_fields_raw(field_hi(c_hdr_field_arr, "bth_reserved_b")    downto field_lo(c_hdr_field_arr, "bth_reserved_b"));
     v.bth.psn              := hdr_fields_raw(field_hi(c_hdr_field_arr, "bth_psn")           downto field_lo(c_hdr_field_arr, "bth_psn"));
+    
+    -- reth header (optional)
+    v.reth := ((others => '0'), (others => '0'),(others => '0'));
+    if field_exists(c_hdr_field_arr, "reth_virtual_address") then -- reth header exists
+      v.reth.virtual_address := hdr_fields_raw(field_hi(c_hdr_field_arr, "reth_virtual_address") downto field_lo(c_hdr_field_arr, "reth_virtual_address"));
+      v.reth.r_key           := hdr_fields_raw(field_hi(c_hdr_field_arr, "reth_r_key")           downto field_lo(c_hdr_field_arr, "reth_r_key"));
+      v.reth.dma_length      := hdr_fields_raw(field_hi(c_hdr_field_arr, "reth_dma_length")      downto field_lo(c_hdr_field_arr, "reth_dma_length"));
+    end if;
 
-    -- reth header
-    v.reth.virtual_address := hdr_fields_raw(field_hi(c_hdr_field_arr, "reth_virtual_address") downto field_lo(c_hdr_field_arr, "reth_virtual_address"));
-    v.reth.r_key           := hdr_fields_raw(field_hi(c_hdr_field_arr, "reth_r_key")           downto field_lo(c_hdr_field_arr, "reth_r_key"));
-    v.reth.dma_length      := hdr_fields_raw(field_hi(c_hdr_field_arr, "reth_dma_length")      downto field_lo(c_hdr_field_arr, "reth_dma_length"));
-
-    --immediate data
+    --immediate data (optional)
     v.immediate_data := (others => '0');
-    if use_immediate then
+    if field_exists(c_hdr_field_arr, "immediate_data") then -- immediate data exists
       v.immediate_data := hdr_fields_raw(field_hi(c_hdr_field_arr, "immediate_data") downto field_lo(c_hdr_field_arr, "immediate_data"));
     end if;
 
diff --git a/applications/rdma_demo/libraries/rdma_packetiser/tb/vhdl/tb_rdma_packetiser_assemble_header.vhd b/applications/rdma_demo/libraries/rdma_packetiser/tb/vhdl/tb_rdma_packetiser_assemble_header.vhd
index b6a19a3744ecbbf954656b5f6af1b60f151dd18d..6b4dd312f62e2de4f6d845fdb60f2b4a8c4e22c7 100644
--- a/applications/rdma_demo/libraries/rdma_packetiser/tb/vhdl/tb_rdma_packetiser_assemble_header.vhd
+++ b/applications/rdma_demo/libraries/rdma_packetiser/tb/vhdl/tb_rdma_packetiser_assemble_header.vhd
@@ -17,7 +17,7 @@
 -- limitations under the License.
 --
 -------------------------------------------------------------------------------
-
+-- NOTE Work in progress, will be finished in HPR-131
 -------------------------------------------------------------------------------
 --
 -- Author: R. vd Walle
@@ -67,12 +67,10 @@ end tb_rdma_packetiser_assemble_header;
 
 architecture tb of tb_rdma_packetiser_assemble_header is
   constant c_dp_clk_period      : time := 5 ns;  -- 200 MHz
+  constant c_mm_clk_period      : time := 1 ns;  -- 1 GHz
   constant c_data_w             : natural := c_word_w;
   constant c_data_init          : natural := 13;
   constant c_hdr_fields_slv_rst : std_logic_vector(1023 downto 0) := (others => '0');
-  constant c_rdma_hdr_len : natural := c_rdma_packetiser_roce_icrc_len + sel_a_b(
-    g_use_immediate,
-    c_rdma_packetiser_roce_hdr_len, c_rdma_packetiser_roce_no_imm_hdr_len);
   constant c_block_len          : natural := g_frame_len * (c_data_w / c_octet_w);
   constant c_dma_len            : natural := c_block_len * g_nof_packets_in_msg;
 
@@ -80,6 +78,8 @@ architecture tb of tb_rdma_packetiser_assemble_header is
 
   signal dp_clk              : std_logic := '1';
   signal dp_rst              : std_logic;
+  signal mm_clk              : std_logic := '1';
+  signal mm_rst              : std_logic;
 
   signal immediate_data      : std_logic_vector(c_word_w - 1 downto 0) := X"89ABCDEF";
   signal block_len           : std_logic_vector(c_halfword_w - 1 downto 0) := TO_UVEC(c_block_len, c_halfword_w);
@@ -89,17 +89,24 @@ architecture tb of tb_rdma_packetiser_assemble_header is
   signal start_address       : std_logic_vector(c_longword_w - 1 downto 0) := std_logic_vector(g_start_address);
 
   signal hdr_fields_slv      : std_logic_vector(1023 downto 0) := (others => '0');
-  signal rx_rdma_header         : t_rdma_packetiser_roce_header;
-  signal exp_rdma_header     : t_rdma_packetiser_roce_header := func_rdma_packetiser_map_header(c_hdr_fields_slv_rst, g_use_immediate);
+  signal rx_rdma_header      : t_rdma_packetiser_roce_header;
+  --signal exp_rdma_header     : t_rdma_packetiser_roce_header := func_rdma_packetiser_map_header(c_hdr_fields_slv_rst, g_use_immediate);
   signal in_en : std_logic := '0';
 
   signal snk_in  : t_dp_sosi := c_dp_sosi_rst;
   signal snk_out : t_dp_siso := c_dp_siso_rdy;
+  signal src_out : t_dp_sosi := c_dp_sosi_rst;
+  signal src_in  : t_dp_siso := c_dp_siso_rdy;
+
+  signal reg_hdr_dat_copi   : t_mem_copi := c_mem_copi_rst;
+  signal reg_hdr_dat_cipo   : t_mem_cipo;
+
 begin
   dp_rst <= '1', '0' after c_dp_clk_period * 7;
   dp_clk <= (not dp_clk) or tb_end after c_dp_clk_period / 2;
-
-  rx_rdma_header <= func_rdma_packetiser_map_header(hdr_fields_slv, g_use_immediate );
+  mm_rst <= '1', '0' after c_mm_clk_period * 7;
+  mm_clk <= (not mm_clk) or tb_end after c_mm_clk_period / 2;
+  --rx_rdma_header <= func_rdma_packetiser_map_header(hdr_fields_slv, g_use_immediate );
 
   p_dp_stimuli : process
   begin
@@ -115,92 +122,25 @@ begin
     wait;
   end process;
 
-  -- check if values in rdma_packetiser_assemble_header match with expected values
-  p_verify_rdma_header : process
-    variable v_exp_ip_total_length      : natural;
-    variable v_exp_udp_total_length     : natural;
-    variable v_exp_bth_opcode           : std_logic_vector(c_byte_w - 1 downto 0);
-    variable v_exp_bth_psn              : natural;
-    variable v_exp_reth_virtual_address : unsigned(c_longword_w - 1 downto 0);
-    variable v_exp_reth_dma_length      : natural;
-    variable v_exp_immediate_data       : std_logic_vector(c_word_w - 1 downto 0);
-    variable v_p, v_m                   : natural := 0;
-  begin
-    for rep in 0 to g_nof_rep - 1 loop
-      proc_common_wait_until_high(dp_clk, snk_in.sop);  -- wait for sop
-
-      v_exp_ip_total_length      := c_network_ip_header_len + c_network_udp_header_len + c_rdma_hdr_len + to_uint(block_len);
-      v_exp_udp_total_length     := c_network_udp_header_len + c_rdma_hdr_len + to_uint(block_len);
-      v_exp_bth_psn              := v_p;
-      v_exp_reth_virtual_address := g_start_address + to_unsigned((v_m mod g_nof_msg) * c_dma_len, c_longword_w);
-      v_exp_reth_dma_length      := c_dma_len;
-      v_exp_immediate_data       := sel_a_b(g_use_immediate,
-      sel_a_b(g_use_msg_cnt_as_immediate, to_uvec((v_m mod g_nof_msg), c_word_w), immediate_data), to_uvec(0, c_word_w));
-
-      -- determine expected opcode
-      if v_p mod g_nof_packets_in_msg = 0 then
-        v_exp_bth_opcode := c_rdma_packetiser_opcode_uc_write_first;
-        if g_nof_packets_in_msg = 1 and g_use_immediate then
-          v_exp_bth_opcode := c_rdma_packetiser_opcode_uc_write_only_imm;
-        elsif g_nof_packets_in_msg = 1 then
-          v_exp_bth_opcode := c_rdma_packetiser_opcode_uc_write_only;
-        end if;
-      elsif v_p mod g_nof_packets_in_msg = g_nof_packets_in_msg - 1 then
-        v_exp_bth_opcode := c_rdma_packetiser_opcode_uc_write_last;
-        if g_use_immediate then
-          v_exp_bth_opcode := c_rdma_packetiser_opcode_uc_write_last_imm;
-        end if;
-      else
-        v_exp_bth_opcode := c_rdma_packetiser_opcode_uc_write_middle;
-      end if;
-
-      -- increase counters
-      v_p := v_p + 1;
-      v_m := v_p / g_nof_packets_in_msg;
-
-      -- assign expected values to signal to view in wave window.
-      exp_rdma_header.ip.total_length      <= to_uvec(v_exp_ip_total_length, c_halfword_w);
-      exp_rdma_header.udp.total_length     <= to_uvec(v_exp_udp_total_length, c_halfword_w );
-      exp_rdma_header.bth.opcode           <= v_exp_bth_opcode;
-      exp_rdma_header.bth.psn              <= to_uvec(v_exp_bth_psn, c_word_w);
-      exp_rdma_header.reth.virtual_address <= std_logic_vector(v_exp_reth_virtual_address);
-      exp_rdma_header.reth.dma_length      <= to_uvec(v_exp_reth_dma_length, c_word_w);
-      exp_rdma_header.immediate_data       <= v_exp_immediate_data;
-      proc_common_wait_some_cycles(dp_clk, 1);
-
-      -- assert when header is not as expected.
-      assert rx_rdma_header                      = exp_rdma_header                      report "Wrong rx_rdma_header" severity error;
-      assert rx_rdma_header.ip.total_length      = exp_rdma_header.ip.total_length      report "Wrong rx_rdma_header.ip.total_length value" severity error;
-      assert rx_rdma_header.udp.total_length     = exp_rdma_header.udp.total_length     report "Wrong rx_rdma_header.udp.total_length value" severity error;
-      assert rx_rdma_header.bth.opcode           = exp_rdma_header.bth.opcode           report "Wrong rx_rdma_header.bth.opcode value" severity error;
-      assert rx_rdma_header.bth.psn              = exp_rdma_header.bth.psn              report "Wrong rx_rdma_header.bth.psn value" severity error;
-      assert rx_rdma_header.reth.virtual_address = exp_rdma_header.reth.virtual_address report "Wrong rx_rdma_header.reth.virtual_address value" severity error;
-      assert rx_rdma_header.reth.dma_length      = exp_rdma_header.reth.dma_length      report "Wrong rx_rdma_header.reth.dma_length value" severity error;
-      assert rx_rdma_header.immediate_data       = exp_rdma_header.immediate_data       report "Wrong rx_rdma_header.immediate_data value" severity error;
-    end loop;
-
-    proc_common_wait_some_cycles(dp_clk, 100);
-    tb_end <= '1';
-    wait;
-  end process;
-
   u_dut: entity work.rdma_packetiser_assemble_header
     generic map (
-      g_use_immediate            => g_use_immediate,
-      g_use_msg_cnt_as_immediate => g_use_msg_cnt_as_immediate
+      g_data_w => 512
     )
     port map (
       st_clk             => dp_clk,
       st_rst             => dp_rst,
+      mm_clk             => mm_clk,
+      mm_rst             => mm_rst,
+
+      reg_hdr_dat_copi   => reg_hdr_dat_copi,
+      reg_hdr_dat_cipo   => reg_hdr_dat_cipo,
 
       snk_in             => snk_in,
-      hdr_fields_slv     => hdr_fields_slv,
+      snk_out            => snk_out,
+      
+      src_out            => src_out,
+      src_in             => src_in,
 
-      immediate_data     => immediate_data,
-      block_len          => block_len,
-      nof_packets_in_msg => nof_packets_in_msg,
-      nof_msg            => nof_msg,
-      dma_len            => dma_len,
-      start_address      => start_address
+      block_len          => block_len
     );
 end tb;
diff --git a/libraries/base/common/src/vhdl/common_field_pkg.vhd b/libraries/base/common/src/vhdl/common_field_pkg.vhd
index 7f3d90b4ee7ab330bff244ff5f96f986c9824c87..061e90dc27565a7ec5196e4ab97289e9a680dc68 100644
--- a/libraries/base/common/src/vhdl/common_field_pkg.vhd
+++ b/libraries/base/common/src/vhdl/common_field_pkg.vhd
@@ -73,6 +73,8 @@ package common_field_pkg is
   function field_map_out     (field_arr : t_common_field_arr; word_arr   : std_logic_vector; word_w : natural                ) return std_logic_vector;  -- returns slv_out
   function field_map         (field_arr : t_common_field_arr; word_arr_in: std_logic_vector; word_arr_out: std_logic_vector; word_w : natural) return std_logic_vector;  -- returns word_arr
 
+  function field_select_subset(subset_field_arr : t_common_field_arr; larger_field_arr : t_common_field_arr; larger_slv : std_logic_vector) return std_logic_vector;  -- returns subset slv
+
   function field_ovr_arr(field_arr : t_common_field_arr; ovr_init: std_logic_vector) return t_common_field_arr;
 
   function field_exists(field_arr : t_common_field_arr; name: string) return boolean;
@@ -295,6 +297,25 @@ package body common_field_pkg is
     return v_word_arr;
   end field_map;
 
+  function field_select_subset(subset_field_arr : t_common_field_arr; larger_field_arr : t_common_field_arr; larger_slv : std_logic_vector) return std_logic_vector is
+  -- Create one SLV consisting of both read-only and output-readback fields, e.g. as input to an MM reg
+    variable v_hi_sub  : natural;
+    variable v_lo_sub  : natural;
+    variable v_hi_full : natural;
+    variable v_lo_full : natural;
+    variable out_slv   : std_logic_vector(larger_slv'range) := (others => '0');
+
+  begin
+    for j in subset_field_arr'range loop
+      v_hi_sub  := field_hi(subset_field_arr, j);
+      v_lo_sub  := field_lo(subset_field_arr, j);
+      v_hi_full := field_hi(larger_field_arr, subset_field_arr(j).name);
+      v_lo_full := field_lo(larger_field_arr, subset_field_arr(j).name);
+      out_slv(v_hi_sub downto v_lo_sub) := larger_slv(v_hi_full downto v_lo_full);
+    end loop;
+    return out_slv;
+  end field_select_subset;
+
   function field_ovr_arr(field_arr : t_common_field_arr; ovr_init: std_logic_vector) return t_common_field_arr is
   -- Copy field_arr but change widths to 1 to create a 1-bit override field for each field in field_arr.
     variable v_ovr_field_arr : t_common_field_arr(field_arr'range);
diff --git a/libraries/base/dp/src/vhdl/dp_offload_tx_v3.vhd b/libraries/base/dp/src/vhdl/dp_offload_tx_v3.vhd
index 9382254dc534548c4d079d4396cc91143c303f55..4242e252a3c9ef2f0df6c281231fb367de73f503 100644
--- a/libraries/base/dp/src/vhdl/dp_offload_tx_v3.vhd
+++ b/libraries/base/dp/src/vhdl/dp_offload_tx_v3.vhd
@@ -58,8 +58,8 @@ entity dp_offload_tx_v3 is
     g_pipeline_ready : boolean := false
   );
   port (
-    mm_rst               : in  std_logic;
-    mm_clk               : in  std_logic;
+    mm_rst               : in  std_logic := '0';
+    mm_clk               : in  std_logic := '0';
 
     dp_rst               : in  std_logic;
     dp_clk               : in  std_logic;