diff --git a/applications/rdma_demo/libraries/rdma_packetiser/cocotb/dp_bus.py b/applications/rdma_demo/libraries/rdma_packetiser/cocotb/dp_bus.py
index 1b627f866b1bbe80e0327711b30e040ba8021fe3..81e2c2de4e689989c4d2b9a0aad235c9e31b2bb1 100644
--- a/applications/rdma_demo/libraries/rdma_packetiser/cocotb/dp_bus.py
+++ b/applications/rdma_demo/libraries/rdma_packetiser/cocotb/dp_bus.py
@@ -1,4 +1,3 @@
-
 from typing import Iterable, Union, Optional
 
 import cocotb
@@ -9,7 +8,8 @@ from cocotb.result import TestError
 from cocotb_bus.drivers import ValidatedBusDriver
 from cocotb_bus.monitors import BusMonitor
 
-
+# Note: Copied from AvalonSTPkts in [1] and slightly adapted to use sosi/siso signals
+# [1] - https://docs.cocotb.org/en/v1.5.1/_modules/cocotb_bus/drivers/avalon.html
 class SosiDriver(ValidatedBusDriver):
     _optional_signals = ['valid', 'sop', 'eop', 'sync', 'bsn', 'data', 're', 'im', 'empty', 'channel', 'err']
     _signals = []
@@ -19,7 +19,7 @@ class SosiDriver(ValidatedBusDriver):
         "maxChannel"                    : 1,
         "readyLatency"                  : 1
     }
-    def __init__(self, dut, name, clk, data_w = None):
+    def __init__(self, dut, name, clk, data_w = None, bsn_init = 0):
         ValidatedBusDriver.__init__(self, dut, name, clk, bus_separator='.')    
         self.config = self._default_config.copy()
         self.use_empty = True
@@ -28,6 +28,7 @@ class SosiDriver(ValidatedBusDriver):
             self.data_w = len(self.bus.data)
         else:
             self.data_w = data_w
+        self.bsn = bsn_init
 
     async def _wait_ready(self):
         """Wait for a ready cycle on the bus before continuing.
@@ -103,13 +104,15 @@ class SosiDriver(ValidatedBusDriver):
 
             if firstword:
                 self.bus.sop.value = 1
+                self.bus.bsn.value = self.bsn
+                self.bsn += 1
                 firstword = False
             else:
                 self.bus.sop.value = 0
 
             nbytes = min(len(string), bus_width)
             data = string[:nbytes]
-            
+
             # set unused bits of dp_sosi.data field to 0
             data = bytes(word.n_bits // 8 - bus_width) + data        
             word.buff = data
@@ -345,10 +348,9 @@ class DpStream:
     # It does work in the following way:
     # 1. Only use _optional_signals, no _signals.
     # 2. Use bus_separator='.'
-    def __init__(self, dut, sosi_name, siso_name, clk, rst, data_w = None):
-        self.sosi_drv = SosiDriver(dut, sosi_name, clk, data_w)
+    def __init__(self, dut, sosi_name, siso_name, clk, rst, data_w = None, bsn_init = 0):
+        self.sosi_drv = SosiDriver(dut, sosi_name, clk, data_w, bsn_init)
         self.siso_drv = SisoDriver(dut, siso_name, clk)
         self.sosi_mon = SosiMonitor(dut, sosi_name, clk, data_w, reset=rst)
         self.siso_mon = SisoMonitor(dut, siso_name, clk, reset=rst)
         cocotb.start_soon(self.siso_mon._monitor_recv())
-
diff --git a/applications/rdma_demo/libraries/rdma_packetiser/cocotb/mm_bus.py b/applications/rdma_demo/libraries/rdma_packetiser/cocotb/mm_bus.py
index a25d6c79d4a83e0d05555f79b18a5ae1eb6cff0e..89cdcb8576eca736cc5f7197982da2f8a3166bdb 100644
--- a/applications/rdma_demo/libraries/rdma_packetiser/cocotb/mm_bus.py
+++ b/applications/rdma_demo/libraries/rdma_packetiser/cocotb/mm_bus.py
@@ -1,14 +1,12 @@
-
-from typing import Iterable, Union, Optional
-import cocotb.binary
 from cocotb.log import SimLog
-from cocotb.utils import hexdump
 from cocotb.triggers import RisingEdge, ReadOnly
 from cocotb.binary import BinaryValue
 from cocotb.result import TestError
 from cocotb_bus.drivers import BusDriver
 from cocotb.decorators import coroutine
 
+# Note: Copied from AvalonMM in [1] and slightly adapted to use copi/cipo signals
+# [1] - https://docs.cocotb.org/en/v1.5.1/_modules/cocotb_bus/drivers/avalon.html
 class CopiDriver(BusDriver):
     _signals = []
     _optional_signals = ["rd", "wr", "wrdata", "address"]
diff --git a/applications/rdma_demo/libraries/rdma_packetiser/cocotb/tb_rdma_packetiser.py b/applications/rdma_demo/libraries/rdma_packetiser/cocotb/tb_rdma_packetiser.py
index a92c8b4d271c89df17c726f4db266612d17a556a..f0389b0423b854970022e7faca8291aafee6fb3e 100644
--- a/applications/rdma_demo/libraries/rdma_packetiser/cocotb/tb_rdma_packetiser.py
+++ b/applications/rdma_demo/libraries/rdma_packetiser/cocotb/tb_rdma_packetiser.py
@@ -1,13 +1,21 @@
+from zlib import crc32
+
 import cocotb
 from cocotb.utils import hexdump
-from cocotb.triggers import FallingEdge, Timer, ReadOnly
+from cocotb.triggers import FallingEdge, Timer
 from cocotb.clock import Clock
 from cocotb.binary import BinaryValue
 
 from dp_bus import DpStream
 from mm_bus import MMController
 
-
+# Global constants
+c_bth_first = 0x26
+c_bth_middle = 0x27
+c_bth_last = 0x28
+c_bth_last_imm = 0x29
+c_bth_wo = 0x2A
+c_bth_wo_imm = 0x2B
 
 async def perform_rst(rst, clk, cycles):    
     rst.value = 1
@@ -44,28 +52,103 @@ async def write_mm_dict(mm: MMController, write_dict: dict) -> None:
         # write data
         for i in range(v['size']):
             await mm.write(v['offset'] + i, wrdata[i])
-                
+
+def verify_mm_regs(actual_dict, exp_dict):
+    # assume exp_dict is a dictionary of dictionaries. actual_dict is a 1d dictionary.
+    # Only check register that are 'rw'
+    d = {k: v for k, v in exp_dict.items() if 'rw' == v['access'].lower()}
+    for k, v in d.items():        
+        assert actual_dict[k].integer == v["value"], (
+                f'ERROR: Wrong value when reading back register, expected {k} = {v["value"]} but got {actual_dict[k]}')
+
 async def send_multi_dp_packet(dp_stream: DpStream, data, n):
     for i in range(n):
         await cocotb.start_soon(dp_stream.sosi_drv._driver_send(data))
 
+def carry_around_add(a, b):
+    c = a + b
+    return (c & 0xffff) + (c >> 16)
+
+def compute_ip_checksum(msg):
+    # assume msg is packet complete network packet (ETH + IP + etc...)
+    ip_header = msg[14:24] + msg[26:34] # IP header without checksum
+    s = 0
+    for i in range(0, len(ip_header), 2):
+        w = ip_header[i+1] + (ip_header[i] << 8)
+        s = carry_around_add(s, w)
+    return ~s & 0xffff
 
+def extract_header(data):
+    opcode = data[42] # assume BTH opcode is at byte 42
+    hdr_length = 54 # bth = write_middle or write_last
+    hdr = {
+        "eth_dst_mac":        int.from_bytes(data[0:6], "big"),
+        "eth_src_mac":        int.from_bytes(data[6:12], "big"),
+        "eth_type":           int.from_bytes(data[12:14], "big"),
+        "ip_version":         data[14] >> 4,
+        "ip_header_length":   data[14] & 0x0F,
+        "ip_services":        data[15],
+        "ip_total_length":    int.from_bytes(data[16:18], "big"),
+        "ip_identification":  int.from_bytes(data[18:20], "big"),
+        "ip_flags":           data[20] >> 5,
+        "ip_fragment_offset": int.from_bytes(data[20:22], "big") & 0x1FFF,
+        "ip_time_to_live":    data[22],
+        "ip_protocol":        data[23],
+        "ip_header_checksum": int.from_bytes(data[24:26], "big"),
+        "ip_src_addr":        int.from_bytes(data[26:30], "big"),
+        "ip_dst_addr":        int.from_bytes(data[30:34], "big"),
+        "udp_src_port":       int.from_bytes(data[34:36], "big"),
+        "udp_dst_port":       int.from_bytes(data[36:38], "big"),
+        "udp_total_length":   int.from_bytes(data[38:40], "big"),
+        "udp_checksum":       int.from_bytes(data[40:42], "big"),
+        "bth_opcode":         data[42],
+        "bth_se":             data[43] >> 7,
+        "bth_m":              (data[43] >> 6) & 0x01,
+        "bth_pad":            (data[43] >> 4) & 0x03,
+        "bth_tver":           data[43] & 0x0F,
+        "bth_partition_key":  int.from_bytes(data[44:46], "big"),
+        "bth_fres":           data[46] >> 7,
+        "bth_bres":           (data[46] >> 6) & 0x01,
+        "bth_dest_qp":        int.from_bytes(data[47:49], "big"),
+        "bth_ack_req":        data[49] >> 7,        
+        "bth_psn":            int.from_bytes(data[50:54], "big")
+    }
+    # Set optional header fields and header length based on opcode
+    if opcode in [c_bth_first, c_bth_wo, c_bth_wo_imm]:
+        hdr_length += 16 # + RETH
+        hdr["reth_virtual_address"] = int.from_bytes(data[54:62], "big")
+        hdr["reth_r_key"]           = int.from_bytes(data[62:66], "big")
+        hdr["reth_dma_length"]      = int.from_bytes(data[66:70], "big")
+    if opcode == c_bth_wo_imm:
+        hdr_length += 4 # + immediate
+        hdr["immediate_data"] = int.from_bytes(data[70:74], "big")
+    if opcode == c_bth_last_imm:
+        hdr_length += 4 # + immediate
+        hdr["immediate_data"] = int.from_bytes(data[54:58], "big")
+    return hdr, hdr_length
+
+def verify_header(hdr, exp_hdr):
+    for k, v in hdr.items():
+        assert exp_hdr[k]["value"] == v, (
+                f'ERROR: Wrong header got {k} = {v} but expected {exp_hdr[k]["value"]}')
+        
 @cocotb.test()
 async def tb_rdma_packetiser(dut):
     """Try accessing the design. run with < run -a >"""
-
+    # Constants    
+    c_bsn_init = 17 # some bsn as starting bsn
     n_bytes = dut.c_nof_byte.value
     c_data_w = dut.c_data_w.value
     n_words = 120 # = 7680 bytes
     c_block_len = n_words * n_bytes
     c_nof_packets_in_msg = 5
-    c_dma_len = c_block_len * c_nof_packets_in_msg    
-    n_hdr_regs = 46
+    c_dma_len = c_block_len * c_nof_packets_in_msg
 
+    # Packet header definition and config
     hdr_dict = {
         "eth_dst_mac":                     {"access": "RW", "size": 2, "offset": 44, "value": 0xCAFEBABE1996},
         "eth_src_mac":                     {"access": "RW", "size": 2, "offset": 42, "value": 0x1DECAFC0FFEE},
-        "eth_type":                        {"access": "RO", "size": 1, "offset": 41, "value": 0x0800},
+        "eth_type":                        {"access": "RW", "size": 1, "offset": 41, "value": 0x0800},
 
         "ip_version":                      {"access": "RO", "size": 1, "offset": 40, "value": 4},
         "ip_header_length":                {"access": "RO", "size": 1, "offset": 39, "value": 5},
@@ -81,7 +164,7 @@ async def tb_rdma_packetiser(dut):
         "ip_dst_addr":                     {"access": "RW", "size": 1, "offset": 29, "value": 0x7DECADE},
 
         "udp_src_port":                    {"access": "RW", "size": 1, "offset": 28, "value": 1234},
-        "udp_dst_port":                    {"access": "RW", "size": 1, "offset": 27, "value": 4321},
+        "udp_dst_port":                    {"access": "RW", "size": 1, "offset": 27, "value": 4791}, # 4791 is RoCEv2 port
         "udp_total_length":                {"access": "RO", "size": 1, "offset": 26, "value": -1},
         "udp_checksum":                    {"access": "RO", "size": 1, "offset": 25, "value": 0},
 
@@ -112,9 +195,16 @@ async def tb_rdma_packetiser(dut):
         "config_nof_msg":                  {"access": "RW", "size": 1, "offset": 2,  "value": 3}, 
         "config_start_address":            {"access": "RW", "size": 2, "offset": 0,  "value": 1000000}, 
     }
-        
 
-    # simple counter value per byte
+    # Determine expected BTH opcodes for each packet in a message based on configuration.
+    exp_bth_opcodes = [c_bth_middle] * (c_nof_packets_in_msg - 2)
+    if c_nof_packets_in_msg > 1:
+        bth_last = c_bth_last_imm if hdr_dict["config_use_immediate"]["value"] == 1 else c_bth_last        
+        exp_bth_opcodes = [c_bth_first] + exp_bth_opcodes + [bth_last]
+    else: # write only
+        exp_bth_opcodes = [c_bth_wo_imm] if hdr_dict["config_use_immediate"]["value"] == 1 else [c_bth_wo]
+
+    # simple counter value (0..255) to serve as input data for snk_in.
     snk_in_data = b''.join([(i % 2**8).to_bytes(1, 'little') for i in range(n_words * n_bytes)])
     
     # Create clocks
@@ -122,7 +212,7 @@ async def tb_rdma_packetiser(dut):
     mmClock = Clock(dut.mm_clk, 1, units="ns")
 
     # DP streams
-    in_stream = DpStream(dut, 'snk_in', 'snk_out', dut.dp_clk, dut.dp_rst, c_data_w)
+    in_stream = DpStream(dut, 'snk_in', 'snk_out', dut.dp_clk, dut.dp_rst, c_data_w, c_bsn_init)
     out_stream = DpStream(dut, 'src_out', 'src_in', dut.dp_clk, dut.dp_rst, c_data_w)
 
     # MM busses
@@ -133,24 +223,74 @@ async def tb_rdma_packetiser(dut):
     cocotb.start_soon(perform_rst(dut.dp_rst, dpClock, 7))
     cocotb.start_soon(perform_rst(dut.mm_rst, mmClock, 7))
 
+    # set block_len
+    dut.block_len.value = c_block_len
+
+    # Stimuli
     await Timer(dpClock.period * 10)  # wait a bit for resets to occur
     await FallingEdge(dut.dp_clk)  # wait for falling edge/"negedge"
-    await write_mm_dict(reg_hdr_dat, hdr_dict)
-    #print(snk_in_data)
+    await write_mm_dict(reg_hdr_dat, hdr_dict) # write MM stimuli
     await Timer(dpClock.period * 10)  # wait a bit for resets to occur
     await FallingEdge(dut.dp_clk)  # wait for falling edge/"negedge"
-    mm_rd = await read_mm_dict(reg_hdr_dat, hdr_dict)
-    for k, v in mm_rd.items():
-        print(f'{k} = {hex(v.integer)} in hex, {v.integer} in dec')            
-    
+    mm_rd = await read_mm_dict(reg_hdr_dat, hdr_dict) # read back MM registers
+    verify_mm_regs(mm_rd, hdr_dict) # verify that the read (RW)registers are the same as what is written.            
     await FallingEdge(dut.dp_clk)  # wait for falling edge/"negedge"
-    cocotb.start_soon(send_multi_dp_packet(in_stream, snk_in_data, 10))
-    #cocotb.start_soon(in_stream.sosi_drv._driver_send(snk_in_data))
+    cocotb.start_soon(send_multi_dp_packet(in_stream, snk_in_data, 10)) # Send DP packets
     
-    # wait for packet to arrive on src_out
+    # Verify output packets
     for i in range(10):
-        data = await out_stream.sosi_mon.wait_for_recv()
-        dut._log.info("src_out data = \n%s", hexdump(data))
+        # Receive packet
+        packet = await out_stream.sosi_mon.wait_for_recv()
 
+        # Verify bth_opcode
+        exp_bth_opcode = exp_bth_opcodes[i % len(exp_bth_opcodes)]        
+        bth_opcode = packet[42]
+        assert exp_bth_opcode == bth_opcode, (
+                f'ERROR: Unexpected BTH opcode value {bth_opcode}, expected {exp_bth_opcode}')
 
-    
+        # Verify header                
+        hdr, hdr_length = extract_header(packet)        
+        dut._log.debug(f"header of length {hdr_length} = \n")
+        for k, v in hdr.items():
+            dut._log.debug(f'{k} = {hex(v)} \t(hex)\t\t{v} \t(dec)') 
+
+        start_addr = hdr_dict["config_start_address"]["value"]
+        nof_msg = hdr_dict["config_nof_msg"]["value"]
+        msg_cnt = (i // c_nof_packets_in_msg)
+
+        exp_hdr = hdr_dict.copy()
+        exp_hdr["ip_total_length"]["value"] = len(packet) - 14 # subtract ETH header length
+        exp_hdr["ip_header_checksum"]["value"] = compute_ip_checksum(packet)
+        exp_hdr["udp_total_length"]["value"] = len(packet) - 20 - 14 # subtract ETH + IP header lengths
+        exp_hdr["bth_opcode"]["value"] = exp_bth_opcode
+        exp_hdr["bth_psn"]["value"] = c_bsn_init + i
+        exp_hdr["reth_virtual_address"]["value"] = start_addr + c_dma_len * (msg_cnt % nof_msg)
+        if hdr_dict["config_use_msg_cnt_as_immediate"]["value"] == 1:
+            exp_hdr["immediate_data"]["value"] = msg_cnt
+        verify_header(hdr, exp_hdr)          
+
+        # Verify data
+        data = packet[hdr_length:-4]
+        assert snk_in_data == data, (
+                'ERROR: Data in output packet is not identical to input packet.')
+        dut._log.debug("src_out data = \n%s", hexdump(data))
+
+        # Verify icrc        
+        # calculation of icrc is done as in:
+        # https://github.com/secdev/scapy/blob/master/scapy/contrib/roce.py
+        # ICRC could be calculated using the scapy library (if installed) with the code below. 
+        #   from scapy.layers.l2 import Ether        
+        #   from scapy.contrib.roce import BTH # for compute_icrc function        
+        #   icrc = int.from_bytes(Ether(packet)['BTH'].compute_icrc(None), 'little')        
+        icrc = int.from_bytes(packet[-4:], "big")
+        ones = (0xFF).to_bytes(1, 'little')
+        pseudo_packet = ([ones] * 8 + [packet[14:15]] + 
+                         [ones] + [packet[16:22]] + [ones] +
+                         [packet[23:24]] + [ones] * 2 +
+                         [packet[26:40]] + [ones] * 2 +
+                         [packet[42:46]] + [ones] + [packet[47:-4]]
+        )
+        pseudo_packet = b''.join(pseudo_packet)        
+        exp_icrc = crc32(pseudo_packet) & 0xffffffff
+        assert exp_icrc == icrc, (
+                f'ERROR: Wrong ICRC, expected = {hex(exp_icrc)}, actual = {hex(icrc)}')