diff --git a/applications/lofar2/designs/lofar2_unb2b_ring/lofar2_unb2b_ring.cl b/applications/lofar2/designs/lofar2_unb2b_ring/lofar2_unb2b_ring.cl
index 61a557f520b4558a26c5113da663914051388084..435ae80b4e2a88e7d5fdb48c437df7aa4689653c 100644
--- a/applications/lofar2/designs/lofar2_unb2b_ring/lofar2_unb2b_ring.cl
+++ b/applications/lofar2/designs/lofar2_unb2b_ring/lofar2_unb2b_ring.cl
@@ -62,7 +62,14 @@
 // mm_channel order enum
 enum mm_channel {
   CH_INTERFACE_SELECT,
-  CH_BLOCK_VALIDATE_DECODE,
+  CH_BLOCK_VALIDATE_DECODE_0,
+  CH_BLOCK_VALIDATE_DECODE_1,
+  CH_BLOCK_VALIDATE_DECODE_2,
+  CH_BLOCK_VALIDATE_DECODE_3,
+  CH_BLOCK_VALIDATE_DECODE_4,
+  CH_BLOCK_VALIDATE_DECODE_5,
+  CH_BLOCK_VALIDATE_DECODE_6,
+  CH_BLOCK_VALIDATE_DECODE_7,
   CH_VALIDATE_CHANNEL,
   CH_LANE_DIRECTION,
   LAST_MM_CHANNEL_ENTRY
@@ -74,8 +81,8 @@ struct param_rx_validate_struct {
   uint err_cnt[NOF_ERR_COUNTS];
 };
 union param_rx_validate {
-  struct param_rx_validate_struct parameters[NOF_LANES];
-  uint arr[DIVIDE_AND_ROUND_UP(NOF_LANES*sizeof(struct param_rx_validate_struct),sizeof(uint))];
+  struct param_rx_validate_struct parameters;
+  uint arr[DIVIDE_AND_ROUND_UP(sizeof(struct param_rx_validate_struct),sizeof(uint))];
 };
 
 struct param_interface_select_struct {
@@ -250,7 +257,7 @@ channel struct line_10GbE rx_10GbE_channels[NOF_LANES] __attribute__((depth(0)))
 channel struct line_dp rx_decoded_channels[NOF_LANES] __attribute__((depth(0)));
 channel struct line_dp rx_sosi_channels[NOF_LANES] __attribute__((depth(0)));
 
-channel struct line_dp tx_validated_channels[NOF_LANES] __attribute__((depth(0)));
+channel struct line_dp tx_validated_channels[NOF_LANES] __attribute__((depth(DP_HEADER_SIZE)));
 channel struct line_10GbE tx_sosi_channels[NOF_LANES] __attribute__((depth(0)));
 
 channel struct mm_in mm_channel_in[LAST_MM_CHANNEL_ENTRY] __attribute__((depth(0)));
@@ -260,10 +267,19 @@ channel struct mm_out mm_channel_out[LAST_MM_CHANNEL_ENTRY+1] __attribute__((dep
 __constant uchar destination_mac[6] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF};
 __constant uchar source_mac[6] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00};
 
+__constant uint64_t c_header_out[ETH_HEADER_SIZE] = {(__constant uint64_t) 0xFFFFFFFFFFFF0000, (__constant uint64_t) 0x0000000006000000};
+
 // Regmap table with offset, size. Offsets are chosen to fit the largest sizes when NOF_LANES=8
 __constant struct reg regmap[LAST_MM_CHANNEL_ENTRY] = {
   {0 , DIVIDE_AND_ROUND_UP(sizeof(struct param_interface_select_struct),sizeof(uint))},          //CH_INTERFACE_SELECT, size = 2 
-  {2 , DIVIDE_AND_ROUND_UP(NOF_LANES*sizeof(struct param_rx_validate_struct),sizeof(uint))},     //CH_BLOCK_VALIDATE_DECODE size = NOF_LANES*8
+  {2 , DIVIDE_AND_ROUND_UP(sizeof(struct param_rx_validate_struct),sizeof(uint))},     //CH_BLOCK_VALIDATE_DECODE_0 size = 8
+  {10, DIVIDE_AND_ROUND_UP(sizeof(struct param_rx_validate_struct),sizeof(uint))},     //CH_BLOCK_VALIDATE_DECODE_1 size = 8
+  {18, DIVIDE_AND_ROUND_UP(sizeof(struct param_rx_validate_struct),sizeof(uint))},     //CH_BLOCK_VALIDATE_DECODE_2 size = 8
+  {26, DIVIDE_AND_ROUND_UP(sizeof(struct param_rx_validate_struct),sizeof(uint))},     //CH_BLOCK_VALIDATE_DECODE_3 size = 8
+  {34, DIVIDE_AND_ROUND_UP(sizeof(struct param_rx_validate_struct),sizeof(uint))},     //CH_BLOCK_VALIDATE_DECODE_4 size = 8
+  {42, DIVIDE_AND_ROUND_UP(sizeof(struct param_rx_validate_struct),sizeof(uint))},     //CH_BLOCK_VALIDATE_DECODE_5 size = 8
+  {50, DIVIDE_AND_ROUND_UP(sizeof(struct param_rx_validate_struct),sizeof(uint))},     //CH_BLOCK_VALIDATE_DECODE_6 size = 8
+  {58, DIVIDE_AND_ROUND_UP(sizeof(struct param_rx_validate_struct),sizeof(uint))},     //CH_BLOCK_VALIDATE_DECODE_7 size = 8
   {66, DIVIDE_AND_ROUND_UP(NOF_LANES*sizeof(struct param_validate_channel_struct),sizeof(uint))},//CH_VALIDATE_CHANNEL size = NOF_LANES*1 
   {74, 8}                                                                                        //CH_LANE_DIRECTION size = 8 
 };
@@ -275,9 +291,10 @@ void handle_mm_request(const int ch_id, uint *reg_arr, bool ro)
   struct mm_in mm_request = read_channel_nb_intel(mm_channel_in[ch_id], &mm_valid); //non-blocking read
   struct mm_out mm_response;
   if (mm_valid) {
-    if(mm_request.wr && !ro) //write request
+    if(mm_request.wr) //write request
     {
-      reg_arr[mm_request.address] = mm_request.wrdata;
+      if(!ro)
+        reg_arr[mm_request.address] = mm_request.wrdata;
     } else { //read request
       mm_response.rddata = reg_arr[mm_request.address];
       write_channel_intel(mm_channel_out[ch_id], mm_response);
@@ -428,15 +445,17 @@ __kernel void interface_select()
         if (i % 2) { // odd
           input_10GbE = line_in_ring[i];
           valid_input = valid_ring_input[i];
-          if(valid_output)
+          if(valid_output){
             valid_qsfp_output[i/2] = true;
             line_out_qsfp[i/2] = output_10GbE;
+          }
         } else { // even
           input_10GbE = line_in_qsfp[i/2];
           valid_input = valid_qsfp_input[i/2];
-          if(valid_output)
+          if(valid_output){
             valid_ring_output[i] = true;
             line_out_ring[i] = output_10GbE;
+          }
         }
       }
 
@@ -447,15 +466,17 @@ __kernel void interface_select()
         if (i % 2) { // odd
           input_10GbE = line_in_qsfp[i/2];
           valid_input = valid_qsfp_input[i/2];
-          if(valid_output)
+          if(valid_output){
             valid_ring_output[i] = true;
             line_out_ring[i] = output_10GbE;
+          }
         } else { // even
           input_10GbE = line_in_ring[i];
           valid_input = valid_ring_input[i];
-          if(valid_output)
+          if(valid_output){
             valid_qsfp_output[i/2] = true;
             line_out_qsfp[i/2] = output_10GbE;
+          }
         }
 
       }
@@ -463,9 +484,10 @@ __kernel void interface_select()
       else { // board input
         input_10GbE = line_in_ring[i];
         valid_input = valid_ring_input[i];
-        if(valid_output)
+        if(valid_output){
           valid_ring_output[i] = true;
           line_out_ring[i] = output_10GbE;
+        }
       }
       // Write rx channels
       if(valid_input)
@@ -498,89 +520,102 @@ __kernel void interface_select()
 
 
 /* ----- ring_rx  ----- */
-__attribute__((max_global_work_dim(0)))
+__attribute__((num_compute_units(NOF_LANES), max_global_work_dim(0)))
 #ifndef EMULATOR
 __attribute__((autorun))
 #endif
 __kernel void block_validate_decode()
 {
+  const int laneIndex = get_compute_id(0);
   union param_rx_validate reg;
-  for (int i = 0; i < NOF_LANES; i++){
-    reg.parameters[i].block_cnt = 0;
-    for (int j = 0; j < NOF_ERR_COUNTS; j++)
-      reg.parameters[i].err_cnt[j] = 0; 
-  }
+  reg.parameters.block_cnt = 0;
+  for (int x = 0; x < NOF_ERR_COUNTS; x++)
+    reg.parameters.err_cnt[x] = 0; 
 
-  while(1){
-    // handle MM read/write requests
-    handle_rw_mm_request(CH_BLOCK_VALIDATE_DECODE, reg.arr);
-    
-    // Do someting with parameters
-    #pragma unroll
-    for (int i = 0; i < NOF_LANES; i++){
 #if USE_DP_LAYER
-      union dp_packet packet;
+  union dp_packet packets[2]; //one to read and one to write
 #else
-      union eth_packet packet;
+  union eth_packet packets[2]; //one to read and one to write
 #endif
-      struct line_10GbE input_10GbE;
-      uchar last_flags;
-      bool valid;
-      for (int j = 0; j < BLOCK_LENGTH; j++){
-        input_10GbE = read_channel_intel(rx_10GbE_channels[i]);
-
-        // validation
-        if((input_10GbE.flags & FLAG_LAST) == FLAG_LAST){
-          if (j == BLOCK_LENGTH -1 && input_10GbE.err == 0) {
-            valid = true;
-            last_flags = input_10GbE.flags;
-            reg.parameters[i].block_cnt += 1;
-          }
-          else {
-            valid = false;
-            if (j != BLOCK_LENGTH-1)
-              reg.parameters[i].err_cnt[ERR_BI] += 1;
-
-            for (int err = 0; err < NOF_ERR_COUNTS; err++){
-              if (err != ERR_BI)
-                reg.parameters[i].err_cnt[err] += ((input_10GbE.err & (1 << err)) >> err);
-            }
-            break;
-          }       
-        }
+  bool valid = false;
+  bool canWrite = false;
+  uint i = 0; // read iterator
+  uint j = 0; // write iterator
+  uint1_t readIndex = 0;
+  uint1_t writeIndex = 0;
+  while(1){
+    struct line_10GbE input_10GbE;
+    struct line_dp line_out;
+    bool ch_valid;
     
-        //Packet capturing
-        packet.raw[j] = input_10GbE.data; 
+    handle_rw_mm_request((laneIndex+CH_BLOCK_VALIDATE_DECODE_0), reg.arr); // handle MM read/write requests
+
+    input_10GbE = read_channel_nb_intel(rx_10GbE_channels[laneIndex], &ch_valid);
+    if(ch_valid){
+      // validation
+      if((input_10GbE.flags & FLAG_LAST) == FLAG_LAST){
+        if (i == BLOCK_LENGTH -1 && input_10GbE.err == 0) {
+          valid = true;
+          reg.parameters.block_cnt += 1;
+        }
+        else {
+          if (i != BLOCK_LENGTH-1)
+            reg.parameters.err_cnt[ERR_BI] += 1;
+          for (int err = 0; err < NOF_ERR_COUNTS; err++){
+            if (err != ERR_BI)
+              reg.parameters.err_cnt[err] += ((input_10GbE.err & (1 << err)) >> err);
+          }
+        }       
+      } 
+      //Packet capturing
+      packets[readIndex].raw[i] = input_10GbE.data;
+      if (i == BLOCK_LENGTH-1 || (input_10GbE.flags & FLAG_LAST) == FLAG_LAST){
+        i = 0; // reset read iterator
+      }
+      else {
+        i++; //only iterate if ch_valid = true
       }
- 
-      // Packet decoding
-      struct line_dp line_out;
-
-      if (valid) {
-        for (int j = 0; j < PAYLOAD_SIZE; j++){
-          line_out.data = packet.packet.payload[j];
-          line_out.flags = 0;
-          line_out.dp_bsn = 0;
-          line_out.dp_channel = 0;
-          if (j == 0)
-            line_out.flags |= FLAG_FIRST;
+    } 
+    
+    // Packet decoding
+    if (valid) {
+      writeIndex = readIndex; // Write the stored packet 
+      readIndex = !readIndex; // set read index to the packet which can be overwritten   
+      valid = false;
+      canWrite = true; // assumes canWrite will be false again before valid is true as outgoing packet is shorter than incoming packet
+    }
+
+    if (canWrite){
+      line_out.data = packets[writeIndex].packet.payload[j];
+      line_out.flags = 0;
+      line_out.dp_bsn = 0;
+      line_out.dp_channel = 0;
+      if (j == 0) {
+        line_out.flags |= FLAG_FIRST;
 #if USE_DP_LAYER
-            line_out.dp_bsn = (packet.packet.dp_header.dp_sync_and_bsn & MASK_BSN); //62:0 = bsn
-            line_out.dp_channel = packet.packet.dp_header.dp_channel;
-            
-            if( 0 != (packet.packet.dp_header.dp_sync_and_bsn & MASK_SYNC))
-              line_out.flags |= FLAG_SYNC;
+        line_out.dp_bsn = (packets[writeIndex].packet.dp_header.dp_sync_and_bsn & MASK_BSN); //62:0 = bsn
+        line_out.dp_channel = packets[writeIndex].packet.dp_header.dp_channel;
+        
+        if( 0 != (packets[writeIndex].packet.dp_header.dp_sync_and_bsn & MASK_SYNC))
+          line_out.flags |= FLAG_SYNC;
 #endif
-          if (j == BLOCK_LENGTH-1)
-            line_out.flags = last_flags; 
-
-          write_channel_intel(rx_decoded_channels[i], line_out);
-        }
       }
+      if (j == BLOCK_LENGTH-1){
+        line_out.flags |= FLAG_LAST;
+        j = 0;
+        canWrite = false;
+      } 
+      else {
+        j++;  
+      }
+
+      write_channel_intel(rx_decoded_channels[laneIndex], line_out);
     }
   }
 }
 
+
+
 __attribute__((max_global_work_dim(0)))
 #ifndef EMULATOR
 __attribute__((autorun))
@@ -618,11 +653,11 @@ __kernel void validate_bsn_at_sync()
 {
   bool discard[NOF_LANES] = {};
   uint64_t localBsn = 0;
-  struct line_bs_sosi bs_sosi;
 #ifdef EMULATOR
   int emu_i = 0;
 #endif
   while(1){
+    struct line_bs_sosi bs_sosi;
     struct line_dp line[NOF_LANES];
     bool valid[NOF_LANES];
     bool bs_sosi_valid;
@@ -634,7 +669,7 @@ __kernel void validate_bsn_at_sync()
     #pragma unroll
     for (int i = 0; i < NOF_LANES; i++){
       line[i] = read_channel_nb_intel(rx_sosi_channels[i], &valid[i]);
-      if (valid[i] && ((bs_sosi.flags & FLAG_SYNC) == FLAG_SYNC))
+      if (valid[i] && ((line[i].flags & FLAG_SYNC) == FLAG_SYNC))
          discard[i] = (localBsn != line[i].dp_bsn);
     }
      
@@ -697,7 +732,7 @@ __kernel void validate_channel()
     reg.parameters[i].transport_nof_hops = REMOVE_CHANNEL;
   }
 
-  bool discard[NOF_LANES] = {};
+  bool discard[NOF_LANES] = {0};
   while(1){
     // handle MM read/write requests
     handle_rw_mm_request(CH_VALIDATE_CHANNEL, reg.arr);
@@ -777,63 +812,69 @@ __kernel void no_validate_channel()
 #endif
 
 // TODO: make sure the latency is low.
-__attribute__((max_global_work_dim(0)))
+__attribute__((num_compute_units(NOF_LANES), max_global_work_dim(0)))
 #ifndef EMULATOR
 __attribute__((autorun))
 #endif
 __kernel void tx_encode()
 {
-  uint64_t dp_sync_and_bsn = 0;
+  const int laneIndex = get_compute_id(0);
   while(1){
-    #pragma unroll
-    for (int i = 0; i < NOF_LANES; i++){
+    struct line_10GbE output_10GbE; 
+    struct line_dp input_dp;
+    uint64_t dp_sync_and_bsn = 0;
+    ushort dp_channel = 0;
+    for (int j = 0; j < BLOCK_LENGTH; j++){
+
 #if USE_DP_LAYER
-      union dp_packet packet;
+      if(j == 0 || (j > DP_HEADER_SIZE)){
 #else
-      union eth_packet packet;
-#endif
-      for (int x = 0; x < sizeof(destination_mac); x++)
-        packet.packet.ethernet_header.destination_mac[x] = destination_mac[x];
-      for (int x = 0; x < sizeof(source_mac); x++)
-        packet.packet.ethernet_header.source_mac[x] = source_mac[x];
-      packet.packet.ethernet_header.ether_type = ETHER_TYPE;
-     
-      // Read input data
-      for (int j = 0; j < PAYLOAD_SIZE; j++){
-        struct line_dp input_dp;
-        input_dp = read_channel_intel(tx_validated_channels[i]);
-        packet.packet.payload[j] = input_dp.data;
-#if USE_DP_LAYER
-        // encode dp data
-        dp_sync_and_bsn &= MASK_BSN; //reset sync bit
-        if ((input_dp.flags & FLAG_FIRST) == FLAG_FIRST){
-          dp_sync_and_bsn = (input_dp.dp_bsn & MASK_BSN); // set bsn
-          packet.packet.dp_header.dp_channel = input_dp.dp_channel + 1; //Add 1 hop.
-          if ((input_dp.flags & FLAG_SYNC)==FLAG_SYNC)
-            dp_sync_and_bsn |= MASK_SYNC;
-        }     
-        packet.packet.dp_header.dp_sync_and_bsn = dp_sync_and_bsn;
+      if(j == 0 || (j > ETH_HEADER_SIZE)){
 #endif
+        input_dp = read_channel_intel(tx_validated_channels[laneIndex]);
       }
-
-      // Write out packet
-      struct line_10GbE output_10GbE; 
       output_10GbE.flags = 0;
       output_10GbE.err = 0;
-      for (int j = 0; j < BLOCK_LENGTH; j++){
-        output_10GbE.data = packet.raw[j];
-        if (j == 0)
-          output_10GbE.flags = FLAG_FIRST;
-        if (j == BLOCK_LENGTH-1)
-          output_10GbE.flags = FLAG_LAST; 
-
-        write_channel_intel(tx_sosi_channels[i], output_10GbE);
+
+      if(j == 0){
+#if USE_DP_LAYER
+        dp_channel = input_dp.dp_channel + 1; //Add 1 hop.
+        if ((input_dp.flags & FLAG_SYNC)==FLAG_SYNC){
+          dp_sync_and_bsn = (input_dp.dp_bsn | MASK_SYNC); // set bsn and sync
+        }
+        else{
+          dp_sync_and_bsn = (input_dp.dp_bsn & MASK_BSN); // set bsn and clear sync (if set)
+        }
+#endif            
+        output_10GbE.flags = FLAG_FIRST; 
+        output_10GbE.data = c_header_out[0];
+      }
+      else if (j == 1){
+#if USE_DP_LAYER
+        output_10GbE.data = (c_header_out[1] | ((uint64_t) dp_channel));
+#else
+        output_10GbE.data = c_header_out[1];
+#endif
+      }
+#if USE_DP_LAYER
+      else if (j == 2){
+        output_10GbE.data = dp_sync_and_bsn;
       }
+#endif
+      else {
+        output_10GbE.data = input_dp.data;
+      }
+      if (j == BLOCK_LENGTH-1){
+        output_10GbE.flags = FLAG_LAST; 
+      }
+      
+      write_channel_intel(tx_sosi_channels[laneIndex], output_10GbE);
     }
   }
 }
 
 
+
 __attribute__((max_global_work_dim(0)))
 __kernel void dummy()
 {
diff --git a/applications/ta2/bsp/hardware/lofar2_unb2b_ring_bsp/top.vhd b/applications/ta2/bsp/hardware/lofar2_unb2b_ring_bsp/top.vhd
index 82b9de6412df97dba7a394ff489ca7b634513343..e8bc077f5559eb052dba6fdc6c9b793bd76eb2d2 100644
--- a/applications/ta2/bsp/hardware/lofar2_unb2b_ring_bsp/top.vhd
+++ b/applications/ta2/bsp/hardware/lofar2_unb2b_ring_bsp/top.vhd
@@ -352,9 +352,9 @@ ARCHITECTURE str OF top IS
   SIGNAL dp_demux_tx_monitor_siso_arr         : t_dp_siso_arr(c_nof_streams_ring-1 DOWNTO 0) := (OTHERS => c_dp_siso_rst);
 
   SIGNAL rx_monitor_sosi_2arr                 : t_dp_sosi_rx_monitor_2arr(c_nof_streams_ring-1 DOWNTO 0) := (OTHERS => (OTHERS => c_dp_sosi_rst));
-  SIGNAL rx_monitor_siso_2arr                 : t_dp_siso_rx_monitor_2arr(c_nof_streams_ring-1 DOWNTO 0) := (OTHERS => (OTHERS => c_dp_siso_rst));
+  SIGNAL rx_monitor_siso_2arr                 : t_dp_siso_rx_monitor_2arr(c_nof_streams_ring-1 DOWNTO 0) := (OTHERS => (OTHERS => c_dp_siso_rdy));
   SIGNAL tx_monitor_sosi_2arr                 : t_dp_sosi_tx_monitor_2arr(c_nof_streams_ring-1 DOWNTO 0) := (OTHERS => (OTHERS => c_dp_sosi_rst));
-  SIGNAL tx_monitor_siso_2arr                 : t_dp_siso_tx_monitor_2arr(c_nof_streams_ring-1 DOWNTO 0) := (OTHERS => (OTHERS => c_dp_siso_rst));
+  SIGNAL tx_monitor_siso_2arr                 : t_dp_siso_tx_monitor_2arr(c_nof_streams_ring-1 DOWNTO 0) := (OTHERS => (OTHERS => c_dp_siso_rdy));
 
   SIGNAL local_sosi_arr                       : t_dp_sosi_arr(g_nof_lanes-1 DOWNTO 0) := (OTHERS => c_dp_sosi_rst);
   SIGNAL local_siso_arr                       : t_dp_siso_arr(g_nof_lanes-1 DOWNTO 0) := (OTHERS => c_dp_siso_rst);
@@ -561,8 +561,8 @@ BEGIN
     g_use_channel => TRUE
   )
   PORT MAP(
-    dp_clk                => st_clk, 
-    dp_rst                => st_rst, 
+    dp_clk             => st_clk, 
+    dp_rst             => st_rst, 
     dp_src_out_arr     => from_lane_sosi_arr(g_nof_lanes-1 DOWNTO 0), 
     dp_src_in_arr      => from_lane_siso_arr(g_nof_lanes-1 DOWNTO 0), 
     dp_snk_out_arr     => to_lane_siso_arr(g_nof_lanes-1 DOWNTO 0), 
@@ -709,7 +709,7 @@ BEGIN
     -----------------------------------------------------------------------------
     u_dp_demux_tx_monitor : ENTITY dp_lib.dp_demux
     GENERIC MAP(
-      g_nof_output => g_nof_rx_monitors
+      g_nof_output => g_nof_tx_monitors
     )
     PORT MAP(
       rst => st_rst,