diff --git a/applications/lofar2/designs/lofar2_unb2b_ring/lofar2_unb2b_ring.cl b/applications/lofar2/designs/lofar2_unb2b_ring/lofar2_unb2b_ring.cl index 61a557f520b4558a26c5113da663914051388084..435ae80b4e2a88e7d5fdb48c437df7aa4689653c 100644 --- a/applications/lofar2/designs/lofar2_unb2b_ring/lofar2_unb2b_ring.cl +++ b/applications/lofar2/designs/lofar2_unb2b_ring/lofar2_unb2b_ring.cl @@ -62,7 +62,14 @@ // mm_channel order enum enum mm_channel { CH_INTERFACE_SELECT, - CH_BLOCK_VALIDATE_DECODE, + CH_BLOCK_VALIDATE_DECODE_0, + CH_BLOCK_VALIDATE_DECODE_1, + CH_BLOCK_VALIDATE_DECODE_2, + CH_BLOCK_VALIDATE_DECODE_3, + CH_BLOCK_VALIDATE_DECODE_4, + CH_BLOCK_VALIDATE_DECODE_5, + CH_BLOCK_VALIDATE_DECODE_6, + CH_BLOCK_VALIDATE_DECODE_7, CH_VALIDATE_CHANNEL, CH_LANE_DIRECTION, LAST_MM_CHANNEL_ENTRY @@ -74,8 +81,8 @@ struct param_rx_validate_struct { uint err_cnt[NOF_ERR_COUNTS]; }; union param_rx_validate { - struct param_rx_validate_struct parameters[NOF_LANES]; - uint arr[DIVIDE_AND_ROUND_UP(NOF_LANES*sizeof(struct param_rx_validate_struct),sizeof(uint))]; + struct param_rx_validate_struct parameters; + uint arr[DIVIDE_AND_ROUND_UP(sizeof(struct param_rx_validate_struct),sizeof(uint))]; }; struct param_interface_select_struct { @@ -250,7 +257,7 @@ channel struct line_10GbE rx_10GbE_channels[NOF_LANES] __attribute__((depth(0))) channel struct line_dp rx_decoded_channels[NOF_LANES] __attribute__((depth(0))); channel struct line_dp rx_sosi_channels[NOF_LANES] __attribute__((depth(0))); -channel struct line_dp tx_validated_channels[NOF_LANES] __attribute__((depth(0))); +channel struct line_dp tx_validated_channels[NOF_LANES] __attribute__((depth(DP_HEADER_SIZE))); channel struct line_10GbE tx_sosi_channels[NOF_LANES] __attribute__((depth(0))); channel struct mm_in mm_channel_in[LAST_MM_CHANNEL_ENTRY] __attribute__((depth(0))); @@ -260,10 +267,19 @@ channel struct mm_out mm_channel_out[LAST_MM_CHANNEL_ENTRY+1] __attribute__((dep __constant uchar destination_mac[6] = {0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF}; __constant uchar source_mac[6] = {0x00, 0x00, 0x00, 0x00, 0x00, 0x00}; +__constant uint64_t c_header_out[ETH_HEADER_SIZE] = {(__constant uint64_t) 0xFFFFFFFFFFFF0000, (__constant uint64_t) 0x0000000006000000}; + // Regmap table with offset, size. Offsets are chosen to fit the largest sizes when NOF_LANES=8 __constant struct reg regmap[LAST_MM_CHANNEL_ENTRY] = { {0 , DIVIDE_AND_ROUND_UP(sizeof(struct param_interface_select_struct),sizeof(uint))}, //CH_INTERFACE_SELECT, size = 2 - {2 , DIVIDE_AND_ROUND_UP(NOF_LANES*sizeof(struct param_rx_validate_struct),sizeof(uint))}, //CH_BLOCK_VALIDATE_DECODE size = NOF_LANES*8 + {2 , DIVIDE_AND_ROUND_UP(sizeof(struct param_rx_validate_struct),sizeof(uint))}, //CH_BLOCK_VALIDATE_DECODE_0 size = 8 + {10, DIVIDE_AND_ROUND_UP(sizeof(struct param_rx_validate_struct),sizeof(uint))}, //CH_BLOCK_VALIDATE_DECODE_1 size = 8 + {18, DIVIDE_AND_ROUND_UP(sizeof(struct param_rx_validate_struct),sizeof(uint))}, //CH_BLOCK_VALIDATE_DECODE_2 size = 8 + {26, DIVIDE_AND_ROUND_UP(sizeof(struct param_rx_validate_struct),sizeof(uint))}, //CH_BLOCK_VALIDATE_DECODE_3 size = 8 + {34, DIVIDE_AND_ROUND_UP(sizeof(struct param_rx_validate_struct),sizeof(uint))}, //CH_BLOCK_VALIDATE_DECODE_4 size = 8 + {42, DIVIDE_AND_ROUND_UP(sizeof(struct param_rx_validate_struct),sizeof(uint))}, //CH_BLOCK_VALIDATE_DECODE_5 size = 8 + {50, DIVIDE_AND_ROUND_UP(sizeof(struct param_rx_validate_struct),sizeof(uint))}, //CH_BLOCK_VALIDATE_DECODE_6 size = 8 + {58, DIVIDE_AND_ROUND_UP(sizeof(struct param_rx_validate_struct),sizeof(uint))}, //CH_BLOCK_VALIDATE_DECODE_7 size = 8 {66, DIVIDE_AND_ROUND_UP(NOF_LANES*sizeof(struct param_validate_channel_struct),sizeof(uint))},//CH_VALIDATE_CHANNEL size = NOF_LANES*1 {74, 8} //CH_LANE_DIRECTION size = 8 }; @@ -275,9 +291,10 @@ void handle_mm_request(const int ch_id, uint *reg_arr, bool ro) struct mm_in mm_request = read_channel_nb_intel(mm_channel_in[ch_id], &mm_valid); //non-blocking read struct mm_out mm_response; if (mm_valid) { - if(mm_request.wr && !ro) //write request + if(mm_request.wr) //write request { - reg_arr[mm_request.address] = mm_request.wrdata; + if(!ro) + reg_arr[mm_request.address] = mm_request.wrdata; } else { //read request mm_response.rddata = reg_arr[mm_request.address]; write_channel_intel(mm_channel_out[ch_id], mm_response); @@ -428,15 +445,17 @@ __kernel void interface_select() if (i % 2) { // odd input_10GbE = line_in_ring[i]; valid_input = valid_ring_input[i]; - if(valid_output) + if(valid_output){ valid_qsfp_output[i/2] = true; line_out_qsfp[i/2] = output_10GbE; + } } else { // even input_10GbE = line_in_qsfp[i/2]; valid_input = valid_qsfp_input[i/2]; - if(valid_output) + if(valid_output){ valid_ring_output[i] = true; line_out_ring[i] = output_10GbE; + } } } @@ -447,15 +466,17 @@ __kernel void interface_select() if (i % 2) { // odd input_10GbE = line_in_qsfp[i/2]; valid_input = valid_qsfp_input[i/2]; - if(valid_output) + if(valid_output){ valid_ring_output[i] = true; line_out_ring[i] = output_10GbE; + } } else { // even input_10GbE = line_in_ring[i]; valid_input = valid_ring_input[i]; - if(valid_output) + if(valid_output){ valid_qsfp_output[i/2] = true; line_out_qsfp[i/2] = output_10GbE; + } } } @@ -463,9 +484,10 @@ __kernel void interface_select() else { // board input input_10GbE = line_in_ring[i]; valid_input = valid_ring_input[i]; - if(valid_output) + if(valid_output){ valid_ring_output[i] = true; line_out_ring[i] = output_10GbE; + } } // Write rx channels if(valid_input) @@ -498,89 +520,102 @@ __kernel void interface_select() /* ----- ring_rx ----- */ -__attribute__((max_global_work_dim(0))) +__attribute__((num_compute_units(NOF_LANES), max_global_work_dim(0))) #ifndef EMULATOR __attribute__((autorun)) #endif __kernel void block_validate_decode() { + const int laneIndex = get_compute_id(0); union param_rx_validate reg; - for (int i = 0; i < NOF_LANES; i++){ - reg.parameters[i].block_cnt = 0; - for (int j = 0; j < NOF_ERR_COUNTS; j++) - reg.parameters[i].err_cnt[j] = 0; - } + reg.parameters.block_cnt = 0; + for (int x = 0; x < NOF_ERR_COUNTS; x++) + reg.parameters.err_cnt[x] = 0; - while(1){ - // handle MM read/write requests - handle_rw_mm_request(CH_BLOCK_VALIDATE_DECODE, reg.arr); - - // Do someting with parameters - #pragma unroll - for (int i = 0; i < NOF_LANES; i++){ #if USE_DP_LAYER - union dp_packet packet; + union dp_packet packets[2]; //one to read and one to write #else - union eth_packet packet; + union eth_packet packets[2]; //one to read and one to write #endif - struct line_10GbE input_10GbE; - uchar last_flags; - bool valid; - for (int j = 0; j < BLOCK_LENGTH; j++){ - input_10GbE = read_channel_intel(rx_10GbE_channels[i]); - - // validation - if((input_10GbE.flags & FLAG_LAST) == FLAG_LAST){ - if (j == BLOCK_LENGTH -1 && input_10GbE.err == 0) { - valid = true; - last_flags = input_10GbE.flags; - reg.parameters[i].block_cnt += 1; - } - else { - valid = false; - if (j != BLOCK_LENGTH-1) - reg.parameters[i].err_cnt[ERR_BI] += 1; - - for (int err = 0; err < NOF_ERR_COUNTS; err++){ - if (err != ERR_BI) - reg.parameters[i].err_cnt[err] += ((input_10GbE.err & (1 << err)) >> err); - } - break; - } - } + bool valid = false; + bool canWrite = false; + uint i = 0; // read iterator + uint j = 0; // write iterator + uint1_t readIndex = 0; + uint1_t writeIndex = 0; + while(1){ + struct line_10GbE input_10GbE; + struct line_dp line_out; + bool ch_valid; - //Packet capturing - packet.raw[j] = input_10GbE.data; + handle_rw_mm_request((laneIndex+CH_BLOCK_VALIDATE_DECODE_0), reg.arr); // handle MM read/write requests + + input_10GbE = read_channel_nb_intel(rx_10GbE_channels[laneIndex], &ch_valid); + if(ch_valid){ + // validation + if((input_10GbE.flags & FLAG_LAST) == FLAG_LAST){ + if (i == BLOCK_LENGTH -1 && input_10GbE.err == 0) { + valid = true; + reg.parameters.block_cnt += 1; + } + else { + if (i != BLOCK_LENGTH-1) + reg.parameters.err_cnt[ERR_BI] += 1; + for (int err = 0; err < NOF_ERR_COUNTS; err++){ + if (err != ERR_BI) + reg.parameters.err_cnt[err] += ((input_10GbE.err & (1 << err)) >> err); + } + } + } + //Packet capturing + packets[readIndex].raw[i] = input_10GbE.data; + if (i == BLOCK_LENGTH-1 || (input_10GbE.flags & FLAG_LAST) == FLAG_LAST){ + i = 0; // reset read iterator + } + else { + i++; //only iterate if ch_valid = true } - - // Packet decoding - struct line_dp line_out; - - if (valid) { - for (int j = 0; j < PAYLOAD_SIZE; j++){ - line_out.data = packet.packet.payload[j]; - line_out.flags = 0; - line_out.dp_bsn = 0; - line_out.dp_channel = 0; - if (j == 0) - line_out.flags |= FLAG_FIRST; + } + + // Packet decoding + if (valid) { + writeIndex = readIndex; // Write the stored packet + readIndex = !readIndex; // set read index to the packet which can be overwritten + valid = false; + canWrite = true; // assumes canWrite will be false again before valid is true as outgoing packet is shorter than incoming packet + } + + if (canWrite){ + line_out.data = packets[writeIndex].packet.payload[j]; + line_out.flags = 0; + line_out.dp_bsn = 0; + line_out.dp_channel = 0; + if (j == 0) { + line_out.flags |= FLAG_FIRST; #if USE_DP_LAYER - line_out.dp_bsn = (packet.packet.dp_header.dp_sync_and_bsn & MASK_BSN); //62:0 = bsn - line_out.dp_channel = packet.packet.dp_header.dp_channel; - - if( 0 != (packet.packet.dp_header.dp_sync_and_bsn & MASK_SYNC)) - line_out.flags |= FLAG_SYNC; + line_out.dp_bsn = (packets[writeIndex].packet.dp_header.dp_sync_and_bsn & MASK_BSN); //62:0 = bsn + line_out.dp_channel = packets[writeIndex].packet.dp_header.dp_channel; + + if( 0 != (packets[writeIndex].packet.dp_header.dp_sync_and_bsn & MASK_SYNC)) + line_out.flags |= FLAG_SYNC; #endif - if (j == BLOCK_LENGTH-1) - line_out.flags = last_flags; - - write_channel_intel(rx_decoded_channels[i], line_out); - } } + if (j == BLOCK_LENGTH-1){ + line_out.flags |= FLAG_LAST; + j = 0; + canWrite = false; + } + else { + j++; + } + + write_channel_intel(rx_decoded_channels[laneIndex], line_out); } } } + + __attribute__((max_global_work_dim(0))) #ifndef EMULATOR __attribute__((autorun)) @@ -618,11 +653,11 @@ __kernel void validate_bsn_at_sync() { bool discard[NOF_LANES] = {}; uint64_t localBsn = 0; - struct line_bs_sosi bs_sosi; #ifdef EMULATOR int emu_i = 0; #endif while(1){ + struct line_bs_sosi bs_sosi; struct line_dp line[NOF_LANES]; bool valid[NOF_LANES]; bool bs_sosi_valid; @@ -634,7 +669,7 @@ __kernel void validate_bsn_at_sync() #pragma unroll for (int i = 0; i < NOF_LANES; i++){ line[i] = read_channel_nb_intel(rx_sosi_channels[i], &valid[i]); - if (valid[i] && ((bs_sosi.flags & FLAG_SYNC) == FLAG_SYNC)) + if (valid[i] && ((line[i].flags & FLAG_SYNC) == FLAG_SYNC)) discard[i] = (localBsn != line[i].dp_bsn); } @@ -697,7 +732,7 @@ __kernel void validate_channel() reg.parameters[i].transport_nof_hops = REMOVE_CHANNEL; } - bool discard[NOF_LANES] = {}; + bool discard[NOF_LANES] = {0}; while(1){ // handle MM read/write requests handle_rw_mm_request(CH_VALIDATE_CHANNEL, reg.arr); @@ -777,63 +812,69 @@ __kernel void no_validate_channel() #endif // TODO: make sure the latency is low. -__attribute__((max_global_work_dim(0))) +__attribute__((num_compute_units(NOF_LANES), max_global_work_dim(0))) #ifndef EMULATOR __attribute__((autorun)) #endif __kernel void tx_encode() { - uint64_t dp_sync_and_bsn = 0; + const int laneIndex = get_compute_id(0); while(1){ - #pragma unroll - for (int i = 0; i < NOF_LANES; i++){ + struct line_10GbE output_10GbE; + struct line_dp input_dp; + uint64_t dp_sync_and_bsn = 0; + ushort dp_channel = 0; + for (int j = 0; j < BLOCK_LENGTH; j++){ + #if USE_DP_LAYER - union dp_packet packet; + if(j == 0 || (j > DP_HEADER_SIZE)){ #else - union eth_packet packet; -#endif - for (int x = 0; x < sizeof(destination_mac); x++) - packet.packet.ethernet_header.destination_mac[x] = destination_mac[x]; - for (int x = 0; x < sizeof(source_mac); x++) - packet.packet.ethernet_header.source_mac[x] = source_mac[x]; - packet.packet.ethernet_header.ether_type = ETHER_TYPE; - - // Read input data - for (int j = 0; j < PAYLOAD_SIZE; j++){ - struct line_dp input_dp; - input_dp = read_channel_intel(tx_validated_channels[i]); - packet.packet.payload[j] = input_dp.data; -#if USE_DP_LAYER - // encode dp data - dp_sync_and_bsn &= MASK_BSN; //reset sync bit - if ((input_dp.flags & FLAG_FIRST) == FLAG_FIRST){ - dp_sync_and_bsn = (input_dp.dp_bsn & MASK_BSN); // set bsn - packet.packet.dp_header.dp_channel = input_dp.dp_channel + 1; //Add 1 hop. - if ((input_dp.flags & FLAG_SYNC)==FLAG_SYNC) - dp_sync_and_bsn |= MASK_SYNC; - } - packet.packet.dp_header.dp_sync_and_bsn = dp_sync_and_bsn; + if(j == 0 || (j > ETH_HEADER_SIZE)){ #endif + input_dp = read_channel_intel(tx_validated_channels[laneIndex]); } - - // Write out packet - struct line_10GbE output_10GbE; output_10GbE.flags = 0; output_10GbE.err = 0; - for (int j = 0; j < BLOCK_LENGTH; j++){ - output_10GbE.data = packet.raw[j]; - if (j == 0) - output_10GbE.flags = FLAG_FIRST; - if (j == BLOCK_LENGTH-1) - output_10GbE.flags = FLAG_LAST; - - write_channel_intel(tx_sosi_channels[i], output_10GbE); + + if(j == 0){ +#if USE_DP_LAYER + dp_channel = input_dp.dp_channel + 1; //Add 1 hop. + if ((input_dp.flags & FLAG_SYNC)==FLAG_SYNC){ + dp_sync_and_bsn = (input_dp.dp_bsn | MASK_SYNC); // set bsn and sync + } + else{ + dp_sync_and_bsn = (input_dp.dp_bsn & MASK_BSN); // set bsn and clear sync (if set) + } +#endif + output_10GbE.flags = FLAG_FIRST; + output_10GbE.data = c_header_out[0]; + } + else if (j == 1){ +#if USE_DP_LAYER + output_10GbE.data = (c_header_out[1] | ((uint64_t) dp_channel)); +#else + output_10GbE.data = c_header_out[1]; +#endif + } +#if USE_DP_LAYER + else if (j == 2){ + output_10GbE.data = dp_sync_and_bsn; } +#endif + else { + output_10GbE.data = input_dp.data; + } + if (j == BLOCK_LENGTH-1){ + output_10GbE.flags = FLAG_LAST; + } + + write_channel_intel(tx_sosi_channels[laneIndex], output_10GbE); } } } + __attribute__((max_global_work_dim(0))) __kernel void dummy() { diff --git a/applications/ta2/bsp/hardware/lofar2_unb2b_ring_bsp/top.vhd b/applications/ta2/bsp/hardware/lofar2_unb2b_ring_bsp/top.vhd index 82b9de6412df97dba7a394ff489ca7b634513343..e8bc077f5559eb052dba6fdc6c9b793bd76eb2d2 100644 --- a/applications/ta2/bsp/hardware/lofar2_unb2b_ring_bsp/top.vhd +++ b/applications/ta2/bsp/hardware/lofar2_unb2b_ring_bsp/top.vhd @@ -352,9 +352,9 @@ ARCHITECTURE str OF top IS SIGNAL dp_demux_tx_monitor_siso_arr : t_dp_siso_arr(c_nof_streams_ring-1 DOWNTO 0) := (OTHERS => c_dp_siso_rst); SIGNAL rx_monitor_sosi_2arr : t_dp_sosi_rx_monitor_2arr(c_nof_streams_ring-1 DOWNTO 0) := (OTHERS => (OTHERS => c_dp_sosi_rst)); - SIGNAL rx_monitor_siso_2arr : t_dp_siso_rx_monitor_2arr(c_nof_streams_ring-1 DOWNTO 0) := (OTHERS => (OTHERS => c_dp_siso_rst)); + SIGNAL rx_monitor_siso_2arr : t_dp_siso_rx_monitor_2arr(c_nof_streams_ring-1 DOWNTO 0) := (OTHERS => (OTHERS => c_dp_siso_rdy)); SIGNAL tx_monitor_sosi_2arr : t_dp_sosi_tx_monitor_2arr(c_nof_streams_ring-1 DOWNTO 0) := (OTHERS => (OTHERS => c_dp_sosi_rst)); - SIGNAL tx_monitor_siso_2arr : t_dp_siso_tx_monitor_2arr(c_nof_streams_ring-1 DOWNTO 0) := (OTHERS => (OTHERS => c_dp_siso_rst)); + SIGNAL tx_monitor_siso_2arr : t_dp_siso_tx_monitor_2arr(c_nof_streams_ring-1 DOWNTO 0) := (OTHERS => (OTHERS => c_dp_siso_rdy)); SIGNAL local_sosi_arr : t_dp_sosi_arr(g_nof_lanes-1 DOWNTO 0) := (OTHERS => c_dp_sosi_rst); SIGNAL local_siso_arr : t_dp_siso_arr(g_nof_lanes-1 DOWNTO 0) := (OTHERS => c_dp_siso_rst); @@ -561,8 +561,8 @@ BEGIN g_use_channel => TRUE ) PORT MAP( - dp_clk => st_clk, - dp_rst => st_rst, + dp_clk => st_clk, + dp_rst => st_rst, dp_src_out_arr => from_lane_sosi_arr(g_nof_lanes-1 DOWNTO 0), dp_src_in_arr => from_lane_siso_arr(g_nof_lanes-1 DOWNTO 0), dp_snk_out_arr => to_lane_siso_arr(g_nof_lanes-1 DOWNTO 0), @@ -709,7 +709,7 @@ BEGIN ----------------------------------------------------------------------------- u_dp_demux_tx_monitor : ENTITY dp_lib.dp_demux GENERIC MAP( - g_nof_output => g_nof_rx_monitors + g_nof_output => g_nof_tx_monitors ) PORT MAP( rst => st_rst,