diff --git a/RTCP/Cobalt/GPUProc/share/gpu/kernels/BeamFormer.cu b/RTCP/Cobalt/GPUProc/share/gpu/kernels/BeamFormer.cu index b4b8137e69be4b4b3e25e4e70d1c9696879013e6..9cee345bb9334f68997e7c41c21a36a49a8bc71d 100644 --- a/RTCP/Cobalt/GPUProc/share/gpu/kernels/BeamFormer.cu +++ b/RTCP/Cobalt/GPUProc/share/gpu/kernels/BeamFormer.cu @@ -42,7 +42,7 @@ #define DELAY_INDEX(s) (delayIndices[s]) //# Typedefs used to map input data on arrays -typedef double (*DelaysType)[1][NR_DELAYS][NR_TABS]; +typedef double (*DelaysType)[NR_DELAYS][NR_TABS]; #ifdef FLYS_EYE typedef float2 (*BandPassCorrectedType)[NR_INPUT_STATIONS][NR_CHANNELS][NR_SAMPLES_PER_CHANNEL][NR_POLARIZATIONS]; #else @@ -60,7 +60,6 @@ typedef float2 (*ComplexVoltagesType)[NR_CHANNELS][NR_SAMPLES_PER_CHANNEL][NR_T * \param[in] delaysPtr 3D input array of complex valued delays to be applied to the correctData samples. There is a delay for each Sub-Array Pointing, station, and Tied Array Beam triplet. * \param[in] delayIndices 1D input array of which stations to use out of delaysPtr, if a subset of the stations need to be beam formed * \param[in] subbandFrequency central frequency of the subband - * \param[in] sap number (index) of the Sub-Array Pointing (aka (station) beam) * * Pre-processor input symbols (some are tied to the execution configuration) * Symbol | Valid Values | Description @@ -84,8 +83,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, const unsigned *stationIndices, // lookup index for stations to use in samplesPtr const void *delaysPtr, const unsigned *delayIndices, // lookup index for stations to use in delaysPtr - double subbandFrequency, - unsigned sap) + double subbandFrequency) { ComplexVoltagesType complexVoltages = (ComplexVoltagesType) complexVoltagesPtr; BandPassCorrectedType samples = (BandPassCorrectedType) samplesPtr; @@ -132,7 +130,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, fcomplex weight_00; // assign the weights to register variables if (first_station + 0 < NR_OUTPUT_STATIONS) { // Number of station might be larger then 32: // We then do multiple passes to span all stations - double delay = (*delays)[sap][DELAY_INDEX(first_station + 0)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 0)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_00 = make_float2(weight.x, weight.y); } @@ -141,7 +139,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 2 fcomplex weight_01; if (first_station + 1 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 1)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 1)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_01 = make_float2(weight.x, weight.y); } @@ -150,7 +148,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 3 fcomplex weight_02; if (first_station + 2 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 2)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 2)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_02 = make_float2(weight.x, weight.y); } @@ -159,7 +157,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 4 fcomplex weight_03; if (first_station + 3 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 3)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 3)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_03 = make_float2(weight.x, weight.y); } @@ -168,7 +166,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 5 fcomplex weight_04; if (first_station + 4 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 4)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 4)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_04 = make_float2(weight.x, weight.y); } @@ -177,7 +175,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 6 fcomplex weight_05; if (first_station + 5 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 5)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 5)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_05 = make_float2(weight.x, weight.y); } @@ -186,7 +184,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 7 fcomplex weight_06; if (first_station + 6 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 6)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 6)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_06 = make_float2(weight.x, weight.y); } @@ -195,7 +193,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 8 fcomplex weight_07; if (first_station + 7 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 7)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 7)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_07 = make_float2(weight.x, weight.y); } @@ -204,7 +202,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 9 fcomplex weight_08; if (first_station + 8 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 8)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 8)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_08 = make_float2(weight.x, weight.y); } @@ -213,7 +211,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 10 fcomplex weight_09; if (first_station + 9 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 9)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 9)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_09 = make_float2(weight.x, weight.y); } @@ -222,7 +220,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 11 fcomplex weight_10; if (first_station + 10 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 10)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 10)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_10 = make_float2(weight.x, weight.y); } @@ -231,7 +229,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 12 fcomplex weight_11; if (first_station + 11 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 11)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 11)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_11 = make_float2(weight.x, weight.y); } @@ -240,7 +238,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 13 fcomplex weight_12; if (first_station + 12 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 12)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 12)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_12 = make_float2(weight.x, weight.y); } @@ -249,7 +247,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 14 fcomplex weight_13; if (first_station + 13 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 13)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 13)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_13 = make_float2(weight.x, weight.y); } @@ -258,7 +256,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 15 fcomplex weight_14; if (first_station + 14 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 14)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 14)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_14 = make_float2(weight.x, weight.y); } @@ -267,7 +265,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 16 fcomplex weight_15; if (first_station + 15 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 15)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 15)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_15 = make_float2(weight.x, weight.y); } @@ -276,7 +274,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 17 fcomplex weight_16; if (first_station + 16 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 16)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 16)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_16 = make_float2(weight.x, weight.y); } @@ -285,7 +283,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 18 fcomplex weight_17; if (first_station + 17 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 17)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 17)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_17 = make_float2(weight.x, weight.y); } @@ -294,7 +292,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 19 fcomplex weight_18; if (first_station + 18 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 18)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 18)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_18 = make_float2(weight.x, weight.y); } @@ -303,7 +301,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 20 fcomplex weight_19; if (first_station + 19 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 19)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 19)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_19 = make_float2(weight.x, weight.y); } @@ -312,7 +310,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 21 fcomplex weight_20; if (first_station + 20 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 20)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 20)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_20 = make_float2(weight.x, weight.y); } @@ -321,7 +319,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 22 fcomplex weight_21; if (first_station + 21 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 21)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 21)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_21 = make_float2(weight.x, weight.y); } @@ -330,7 +328,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 23 fcomplex weight_22; if (first_station + 22 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 22)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 22)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_22 = make_float2(weight.x, weight.y); } @@ -339,7 +337,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 24 fcomplex weight_23; if (first_station + 23 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 23)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 23)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_23 = make_float2(weight.x, weight.y); } @@ -348,7 +346,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 25 fcomplex weight_24; if (first_station + 24 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 24)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 24)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_24 = make_float2(weight.x, weight.y); } @@ -357,7 +355,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 26 fcomplex weight_25; if (first_station + 25 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 25)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 25)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_25 = make_float2(weight.x, weight.y); } @@ -366,7 +364,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 27 fcomplex weight_26; if (first_station + 26 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 26)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 26)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_26 = make_float2(weight.x, weight.y); } @@ -375,7 +373,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 28 fcomplex weight_27; if (first_station + 27 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 27)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 27)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_27 = make_float2(weight.x, weight.y); } @@ -384,7 +382,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 29 fcomplex weight_28; if (first_station + 28 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 28)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 28)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_28 = make_float2(weight.x, weight.y); } @@ -393,7 +391,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 30 fcomplex weight_29; if (first_station + 29 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 29)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 29)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_29 = make_float2(weight.x, weight.y); } @@ -402,7 +400,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 31 fcomplex weight_30; if (first_station + 30 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 30)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 30)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_30 = make_float2(weight.x, weight.y); } @@ -411,7 +409,7 @@ extern "C" __global__ void beamFormer( void *complexVoltagesPtr, #if NR_STATIONS_PER_PASS >= 32 fcomplex weight_31; if (first_station + 31 < NR_OUTPUT_STATIONS) { - double delay = (*delays)[sap][DELAY_INDEX(first_station + 31)][tab_or_zero]; + double delay = (*delays)[DELAY_INDEX(first_station + 31)][tab_or_zero]; dcomplex weight = dphaseShift(frequency, delay); weight_31 = make_float2(weight.x, weight.y); } diff --git a/RTCP/Cobalt/GPUProc/share/gpu/kernels/DelayAndBandPass.cu b/RTCP/Cobalt/GPUProc/share/gpu/kernels/DelayAndBandPass.cu index b65e940f6a11757253354b9b301f4c68fca1e892..df27ced597f46bdc8d8f38fcbb889175ea41984a 100644 --- a/RTCP/Cobalt/GPUProc/share/gpu/kernels/DelayAndBandPass.cu +++ b/RTCP/Cobalt/GPUProc/share/gpu/kernels/DelayAndBandPass.cu @@ -75,7 +75,7 @@ typedef fcomplex(*OutputDataType)[NR_STATIONS][NR_POLARIZATIONS][NR_CHANNELS][N #endif typedef fcomplex(*InputDataType)[NR_STATIONS][NR_POLARIZATIONS][NR_SAMPLES_PER_CHANNEL][NR_CHANNELS]; -typedef const double(*DelaysType)[1][NR_DELAYS][NR_POLARIZATIONS]; // 2 Polarizations; in seconds +typedef const double(*DelaysType)[NR_DELAYS][NR_POLARIZATIONS]; // 2 Polarizations; in seconds typedef const double2(*Phase0sType)[NR_STATIONS]; // 2 Polarizations; in radians typedef const float(*BandPassFactorsType)[NR_CHANNELS]; @@ -111,13 +111,12 @@ inline __device__ fcomplex sincos_d2f(double phi) * 4D array [station][polarization][sample][channel][complex] * of ::fcomplex * @param[in] subbandFrequency center freqency of the subband -* @param[in] beam index number of the beam * @param[in] delaysAtBeginPtr pointer to delay data of ::DelaysType, -* a 2D array [beam][station] of float2 (real: +* a 1D array [station] of float2 (real: * 2 polarizations), containing delays in * seconds at begin of integration period * @param[in] delaysAfterEndPtr pointer to delay data of ::DelaysType, -* a 2D array [beam][station] of float2 (real: +* a 1D array [station] of float2 (real: * 2 polarizations), containing delays in * seconds after end of integration period * @param[in] phase0sPt r pointer to phase offset data of @@ -134,7 +133,6 @@ extern "C" { const fcomplex * filteredDataPtr, const unsigned * delayIndices, double subbandFrequency, - unsigned beam, // =nrSAPS const double * delaysAtBeginPtr, const double * delaysAfterEndPtr, const double * phase0sPtr, @@ -226,8 +224,8 @@ extern "C" { ? subbandFrequency : subbandFrequency - 0.5 * SUBBAND_BANDWIDTH + channel * (SUBBAND_BANDWIDTH / NR_CHANNELS); - const double2 delayAtBegin = make_double2((*delaysAtBegin)[beam][delayIdx][0], (*delaysAtBegin)[beam][delayIdx][1]); - const double2 delayAfterEnd = make_double2((*delaysAfterEnd)[beam][delayIdx][0], (*delaysAfterEnd)[beam][delayIdx][1]); + const double2 delayAtBegin = make_double2((*delaysAtBegin)[delayIdx][0], (*delaysAtBegin)[delayIdx][1]); + const double2 delayAfterEnd = make_double2((*delaysAfterEnd)[delayIdx][0], (*delaysAfterEnd)[delayIdx][1]); // Calculate the angles to rotate for for the first and (beyond the) last sample. // diff --git a/RTCP/Cobalt/GPUProc/share/gpu/kernels/FIR_Filter.cu b/RTCP/Cobalt/GPUProc/share/gpu/kernels/FIR_Filter.cu index 034f046217bf9684fea8ebb7068b2665aa3721cb..dcf82d04b1729a6376af4379a319c9066ea016c5 100644 --- a/RTCP/Cobalt/GPUProc/share/gpu/kernels/FIR_Filter.cu +++ b/RTCP/Cobalt/GPUProc/share/gpu/kernels/FIR_Filter.cu @@ -124,7 +124,7 @@ inline __device__ float2 sincos_d2f_select(double phi, int ri) return make_float2((ri?s:c),(ri?c:-s)); } -typedef const double(*DelaysType)[1][NR_STABS][NR_POLARIZATIONS]; // 2 Polarizations; in seconds +typedef const double(*DelaysType)[NR_STABS][NR_POLARIZATIONS]; // 2 Polarizations; in seconds typedef const double(*Phase0sType)[NR_STABS][NR_POLARIZATIONS]; // 2 Polarizations; in radians #endif /* DOPPLER_CORRECTION */ @@ -201,8 +201,8 @@ __global__ void FIR_filter( void *filteredDataPtr, #ifdef DOPPLER_CORRECTION DelaysType delaysAtBegin = (DelaysType)delaysAtBeginPtr; DelaysType delaysAfterEnd = (DelaysType)delaysAfterEndPtr; - const double delayAtBegin = (*delaysAtBegin)[0][station][pol]; - const double delayAfterEnd = (*delaysAfterEnd)[0][station][pol]; + const double delayAtBegin = (*delaysAtBegin)[station][pol]; + const double delayAfterEnd = (*delaysAfterEnd)[station][pol]; // Calculate the angles to rotate for for the first and (beyond the) last sample. // diff --git a/RTCP/Cobalt/GPUProc/src/Kernels/BeamFormerKernel.cc b/RTCP/Cobalt/GPUProc/src/Kernels/BeamFormerKernel.cc index 907d3265b782051508726d3ad74ff0298c5dd8a3..a75eadf5ac1e0614799129cfc80fbfe84512b4f3 100644 --- a/RTCP/Cobalt/GPUProc/src/Kernels/BeamFormerKernel.cc +++ b/RTCP/Cobalt/GPUProc/src/Kernels/BeamFormerKernel.cc @@ -50,7 +50,6 @@ namespace LOFAR unsigned nrDelays_, unsigned nrChannels_, unsigned nrSamplesPerChannel_, - unsigned nrSAPs_, unsigned nrTABs_, double subbandBandWidth_, bool doFlysEye_, @@ -70,7 +69,6 @@ namespace LOFAR nrChannels(nrChannels_), nrSamplesPerChannel(nrSamplesPerChannel_), - nrSAPs(nrSAPs_), nrTABs(nrTABs_), subbandBandwidth(subbandBandWidth_), doFlysEye(doFlysEye_) @@ -100,7 +98,7 @@ namespace LOFAR (size_t) delayIndices.size() * sizeof delayIndices[0]; case BeamFormerKernel::BEAM_FORMER_DELAYS: return - (size_t) nrSAPs * nrDelays * + (size_t) nrDelays * nrTABs * sizeof(double); default: THROW(GPUProcException, "Invalid bufferType (" << bufferType << ")"); @@ -151,10 +149,9 @@ namespace LOFAR } void BeamFormerKernel::enqueue(const BlockID &blockId, - double subbandFrequency, unsigned SAP) + double subbandFrequency) { setArg(5, subbandFrequency); - setArg(6, SAP); Kernel::enqueue(blockId); } diff --git a/RTCP/Cobalt/GPUProc/src/Kernels/BeamFormerKernel.h b/RTCP/Cobalt/GPUProc/src/Kernels/BeamFormerKernel.h index b9a91c3966f88ae989c92531ae5f5f238856c7f2..e405dba8250cee48dca67674fd8eafcbeb151665 100644 --- a/RTCP/Cobalt/GPUProc/src/Kernels/BeamFormerKernel.h +++ b/RTCP/Cobalt/GPUProc/src/Kernels/BeamFormerKernel.h @@ -57,7 +57,6 @@ namespace LOFAR unsigned nrDelays, unsigned nrChannels, unsigned nrSamplesPerChannel, - unsigned nrSAPs, unsigned nrTABs, double subbandWidth, bool doFlysEye, @@ -97,7 +96,7 @@ namespace LOFAR const Parameters ¶m); void enqueue(const BlockID &blockId, - double subbandFrequency, unsigned SAP); + double subbandFrequency); gpu::DeviceMemory stationIndices; gpu::DeviceMemory delayIndices; diff --git a/RTCP/Cobalt/GPUProc/src/Kernels/DelayAndBandPassKernel.cc b/RTCP/Cobalt/GPUProc/src/Kernels/DelayAndBandPassKernel.cc index 730e54e45ddb1025e31c1fa8d2317168e65bb18e..5cadf9eb246f31889f0f63f276c0755da087372d 100644 --- a/RTCP/Cobalt/GPUProc/src/Kernels/DelayAndBandPassKernel.cc +++ b/RTCP/Cobalt/GPUProc/src/Kernels/DelayAndBandPassKernel.cc @@ -53,7 +53,6 @@ namespace LOFAR unsigned nrSamplesPerChannel_, unsigned clockMHz_, double subbandBandwidth_, - unsigned nrSAPs_, bool correlator_, bool delayCompensation_, bool correctBandPass_, @@ -70,7 +69,6 @@ namespace LOFAR nrSamplesPerChannel(nrSamplesPerChannel_), clockMHz(clockMHz_), subbandBandwidth(subbandBandwidth_), - nrSAPs(nrSAPs_), delayCompensation(delayCompensation_), correctBandPass(correctBandPass_), transpose(transpose_), @@ -114,7 +112,7 @@ namespace LOFAR delayIndices.size() * sizeof delayIndices[0]; case DelayAndBandPassKernel::DELAYS: return - (size_t) nrSAPs * nrDelays * + (size_t) nrDelays * NR_POLARIZATIONS * sizeof(double); case DelayAndBandPassKernel::PHASE_ZEROS: return @@ -153,10 +151,10 @@ namespace LOFAR setArg(0, buffers.output); setArg(1, buffers.input); setArg(2, delayIndices); - setArg(5, delaysAtBegin); - setArg(6, delaysAfterEnd); - setArg(7, phase0s); - setArg(8, bandPassCorrectionWeights); + setArg(4, delaysAtBegin); + setArg(5, delaysAfterEnd); + setArg(6, phase0s); + setArg(7, bandPassCorrectionWeights); if (params.transpose) setEnqueueWorkSizes( gpu::Grid(256, @@ -192,10 +190,9 @@ namespace LOFAR void DelayAndBandPassKernel::enqueue(const BlockID &blockId, - double subbandFrequency, unsigned SAP) + double subbandFrequency) { setArg(3, subbandFrequency); - setArg(4, SAP); Kernel::enqueue(blockId); } diff --git a/RTCP/Cobalt/GPUProc/src/Kernels/DelayAndBandPassKernel.h b/RTCP/Cobalt/GPUProc/src/Kernels/DelayAndBandPassKernel.h index b37693bd27234c5bde854afe50b80b9d98c5bacf..f3a773e82d7cefc448a0a48081931d9c222a311a 100644 --- a/RTCP/Cobalt/GPUProc/src/Kernels/DelayAndBandPassKernel.h +++ b/RTCP/Cobalt/GPUProc/src/Kernels/DelayAndBandPassKernel.h @@ -64,7 +64,6 @@ namespace LOFAR unsigned nrSamplesPerChannel, unsigned clockMHz, double subbandBandwidth, - unsigned nrSAPs, bool correlator, bool delayCompensation, bool correctBandPass, @@ -85,8 +84,6 @@ namespace LOFAR unsigned clockMHz; double subbandBandwidth; - unsigned nrSAPs; - bool delayCompensation; bool correctBandPass; bool transpose; @@ -108,7 +105,7 @@ namespace LOFAR void enqueue(const BlockID &blockId, - double subbandFrequency, unsigned SAP); + double subbandFrequency); // Input parameters for the delay compensation gpu::DeviceMemory delayIndices; diff --git a/RTCP/Cobalt/GPUProc/src/Kernels/FIR_FilterKernel.cc b/RTCP/Cobalt/GPUProc/src/Kernels/FIR_FilterKernel.cc index 4d4e9dacdd10ca7bc965a6947e8c22739a7b86eb..888cfd4ac4d482257304ca9a18e4fd9d33d7eb7e 100644 --- a/RTCP/Cobalt/GPUProc/src/Kernels/FIR_FilterKernel.cc +++ b/RTCP/Cobalt/GPUProc/src/Kernels/FIR_FilterKernel.cc @@ -119,7 +119,7 @@ namespace LOFAR : (nrBitsPerSample == 4 ? 2U : nrBytesPerComplexSample())); case FIR_FilterKernel::DELAYS: return (dopplerCorrection? - (size_t) 1 * nrSTABs * // nrSAPs=1 here + (size_t) nrSTABs * NR_POLARIZATIONS * sizeof(double) : 0); default: THROW(GPUProcException, "Invalid bufferType (" << bufferType << ")"); diff --git a/RTCP/Cobalt/GPUProc/src/Pipelines/Pipeline.cc b/RTCP/Cobalt/GPUProc/src/Pipelines/Pipeline.cc index b113cc0e7c69a982dadfe55a00b6749fee9ecbc4..b9f9bdaecfec2feafeed8a1e6d7a49d3c39ae0f7 100644 --- a/RTCP/Cobalt/GPUProc/src/Pipelines/Pipeline.cc +++ b/RTCP/Cobalt/GPUProc/src/Pipelines/Pipeline.cc @@ -499,7 +499,7 @@ namespace LOFAR // Translate the metadata as provided by receiver for (size_t stat = 0; stat < ps.settings.antennaFields.size(); ++stat) { - input->applyMetaData(ps, stat, SAP, input->metaData[stat]); + input->applyMetaData(ps, stat, input->metaData[stat]); } preprocessTimer.stop(); diff --git a/RTCP/Cobalt/GPUProc/src/SubbandProcs/BeamFormerCoherentStep.cc b/RTCP/Cobalt/GPUProc/src/SubbandProcs/BeamFormerCoherentStep.cc index 87c335bbac416b0020650e0e13890f22f24ec501..517ccff975860f5055af656ba598c9c9e52e7278 100644 --- a/RTCP/Cobalt/GPUProc/src/SubbandProcs/BeamFormerCoherentStep.cc +++ b/RTCP/Cobalt/GPUProc/src/SubbandProcs/BeamFormerCoherentStep.cc @@ -53,7 +53,6 @@ namespace LOFAR obsParameters.nrStations, // nrDelays preParameters.nrDelayCompensationChannels, obsParameters.blockSize / preParameters.nrDelayCompensationChannels, - bfParameters.nrSAPs, bfParameters.maxNrCoherentTABsPerSAP, obsParameters.subbandWidth, bfParameters.doFlysEye, @@ -220,7 +219,7 @@ namespace LOFAR } - void BeamFormerCoherentStep::writeInput(const MultiDimArrayHostBuffer<double, 3>& tabDelays) + void BeamFormerCoherentStep::writeInput(const MultiDimArrayHostBuffer<double, 2>& tabDelays) { // Upload the new beamformerDelays (pointings) to the GPU htodStream->waitEvent(executeFinished); @@ -234,10 +233,9 @@ namespace LOFAR executeStream->waitEvent(inputFinished); executeStream->waitEvent(outputFinished); - // The centralFrequency and SAP immediate kernel args must outlive kernel runs. + // The centralFrequency immediate kernel arg must outlive kernel runs. beamFormerKernel->enqueue(input.blockID, - obsParameters.subbands[input.blockID.globalSubbandIdx].centralFrequency, - obsParameters.subbands[input.blockID.globalSubbandIdx].SAP); + obsParameters.subbands[input.blockID.globalSubbandIdx].centralFrequency); coherentTransposeKernel->enqueue(input.blockID); diff --git a/RTCP/Cobalt/GPUProc/src/SubbandProcs/BeamFormerCoherentStep.h b/RTCP/Cobalt/GPUProc/src/SubbandProcs/BeamFormerCoherentStep.h index c871db064dfaeaa455da6e9bf60aa5e9e06f0f65..1e760ed1e0818dcb399b96c8831ddcaed735fddf 100644 --- a/RTCP/Cobalt/GPUProc/src/SubbandProcs/BeamFormerCoherentStep.h +++ b/RTCP/Cobalt/GPUProc/src/SubbandProcs/BeamFormerCoherentStep.h @@ -86,7 +86,7 @@ namespace LOFAR gpu::DeviceMemory outputBuffer(); - void writeInput(const MultiDimArrayHostBuffer<double, 3>& tabDelays); + void writeInput(const MultiDimArrayHostBuffer<double, 2>& tabDelays); void process(const SubbandProcInputData &input); diff --git a/RTCP/Cobalt/GPUProc/src/SubbandProcs/BeamFormerPreprocessingStep.cc b/RTCP/Cobalt/GPUProc/src/SubbandProcs/BeamFormerPreprocessingStep.cc index 78a289100f6a80235aab273a16b231c302f2155d..2a8d53660504bfaccdc3edefbfb155a66e0d3bf3 100644 --- a/RTCP/Cobalt/GPUProc/src/SubbandProcs/BeamFormerPreprocessingStep.cc +++ b/RTCP/Cobalt/GPUProc/src/SubbandProcs/BeamFormerPreprocessingStep.cc @@ -77,7 +77,6 @@ namespace LOFAR obsParameters.blockSize / preParameters.nrDelayCompensationChannels, obsParameters.clockMHz, //not needed in beamformer pipeline obsParameters.subbandWidth, - obsParameters.nrSAPs, false, // correlator preParameters.delayCompensationEnabled, false, // correctBandPass @@ -189,8 +188,7 @@ namespace LOFAR // The centralFrequency and SAP immediate kernel args must outlive kernel runs. delayCompensationKernel->enqueue( input.blockID, - obsParameters.subbands[input.blockID.globalSubbandIdx].centralFrequency, - obsParameters.subbands[input.blockID.globalSubbandIdx].SAP); + obsParameters.subbands[input.blockID.globalSubbandIdx].centralFrequency); bandPassCorrectionKernel->enqueue( input.blockID); diff --git a/RTCP/Cobalt/GPUProc/src/SubbandProcs/CorrelatorStep.cc b/RTCP/Cobalt/GPUProc/src/SubbandProcs/CorrelatorStep.cc index f4ffbffcca420ad705cf625fc5982484e7801820..70114959a0568730c032072b0cc5dffb40774d4e 100644 --- a/RTCP/Cobalt/GPUProc/src/SubbandProcs/CorrelatorStep.cc +++ b/RTCP/Cobalt/GPUProc/src/SubbandProcs/CorrelatorStep.cc @@ -100,7 +100,6 @@ namespace LOFAR obsParameters.blockSize / corParameters.nrChannels, obsParameters.clockMHz, obsParameters.subbandWidth, - obsParameters.nrSAPs, true, // correlator preParameters.delayCompensationEnabled, preParameters.bandPassCorrectionEnabled, @@ -410,8 +409,7 @@ namespace LOFAR // The centralFrequency and SAP immediate kernel args must outlive kernel runs. delayAndBandPassKernel->enqueue( input.blockID, - obsParameters.subbands[input.blockID.globalSubbandIdx].centralFrequency, - obsParameters.subbands[input.blockID.globalSubbandIdx].SAP); + obsParameters.subbands[input.blockID.globalSubbandIdx].centralFrequency); correlatorKernel->enqueue(input.blockID); diff --git a/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProcInputData.cc b/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProcInputData.cc index 5440d53d5c52b84255c577a1b6445290beaf57fc..53fafe2c790fb8b08f02cdb96a137b9c8bea658e 100644 --- a/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProcInputData.cc +++ b/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProcInputData.cc @@ -33,9 +33,9 @@ namespace LOFAR gpu::Context &context, unsigned int hostBufferFlags) : - delaysAtBegin(boost::extents[ps.settings.SAPs.size()][ps.settings.antennaFields.size()][NR_POLARIZATIONS], + delaysAtBegin(boost::extents[ps.settings.antennaFields.size()][NR_POLARIZATIONS], context, hostBufferFlags), - delaysAfterEnd(boost::extents[ps.settings.SAPs.size()][ps.settings.antennaFields.size()][NR_POLARIZATIONS], + delaysAfterEnd(boost::extents[ps.settings.antennaFields.size()][NR_POLARIZATIONS], context, hostBufferFlags), phase0s(boost::extents[ps.settings.antennaFields.size()][NR_POLARIZATIONS], context, hostBufferFlags), @@ -53,8 +53,8 @@ namespace LOFAR auto& bfPipeline = bfPipelines[i]; tabDelays[i].reset( - new MultiDimArrayHostBuffer<double, 3>( - boost::extents[bfPipeline.SAPs.size()][ps.settings.antennaFields.size()][bfPipeline.maxNrCoherentTABsPerSAP()], + new MultiDimArrayHostBuffer<double, 2>( + boost::extents[ps.settings.antennaFields.size()][bfPipeline.maxNrCoherentTABsPerSAP()], context, hostBufferFlags) ); @@ -74,13 +74,13 @@ namespace LOFAR // extract and assign the delays for the station beams // X polarisation - delaysAtBegin[SAP][station][0] = ps.settings.antennaFields[station].delay.x + metaData.stationBeam.delayAtBegin; - delaysAfterEnd[SAP][station][0] = ps.settings.antennaFields[station].delay.x + metaData.stationBeam.delayAfterEnd; + delaysAtBegin[station][0] = ps.settings.antennaFields[station].delay.x + metaData.stationBeam.delayAtBegin; + delaysAfterEnd[station][0] = ps.settings.antennaFields[station].delay.x + metaData.stationBeam.delayAfterEnd; phase0s[station][0] = ps.settings.antennaFields[station].phase0.x; // Y polarisation - delaysAtBegin[SAP][station][1] = ps.settings.antennaFields[station].delay.y + metaData.stationBeam.delayAtBegin; - delaysAfterEnd[SAP][station][1] = ps.settings.antennaFields[station].delay.y + metaData.stationBeam.delayAfterEnd; + delaysAtBegin[station][1] = ps.settings.antennaFields[station].delay.y + metaData.stationBeam.delayAtBegin; + delaysAfterEnd[station][1] = ps.settings.antennaFields[station].delay.y + metaData.stationBeam.delayAfterEnd; phase0s[station][1] = ps.settings.antennaFields[station].phase0.y; if (ps.settings.beamFormer.enabled) @@ -107,9 +107,9 @@ namespace LOFAR if (tab.coherent) { // subtract the delay that was already compensated for - (*tabDelays[pipelineNr])[SAP][station][coherentIdxInSAP] = (metaData.TABs[tab.coherentIdxInSAP].delayAtBegin + - metaData.TABs[tab.coherentIdxInSAP].delayAfterEnd) * 0.5 - - compensatedDelay; + (*tabDelays[pipelineNr])[station][coherentIdxInSAP] = (metaData.TABs[tab.coherentIdxInSAP].delayAtBegin + + metaData.TABs[tab.coherentIdxInSAP].delayAfterEnd) * 0.5 - + compensatedDelay; coherentIdxInSAP++; nrTABs++; } @@ -120,7 +120,7 @@ namespace LOFAR // Zero padding entries that exist because we always produce maxNrCoherentTABsPerSAP for any subband for (unsigned tab = pipeline.SAPs[SAP].TABs.size(); tab < pipeline.maxNrCoherentTABsPerSAP(); tab++) - (*tabDelays[pipelineNr])[SAP][station][tab] = 0.0; + (*tabDelays[pipelineNr])[station][tab] = 0.0; } ASSERTSTR(nrTABs == ps.settings.beamFormer.SAPs[SAP].nrCoherent, diff --git a/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProcInputData.h b/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProcInputData.h index 6250168144ecbd893585d3605dad11839dfc0041..8d3c5fcb779a2f6a0d0b2e6d461afe4eddfbd0a2 100644 --- a/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProcInputData.h +++ b/RTCP/Cobalt/GPUProc/src/SubbandProcs/SubbandProcInputData.h @@ -52,16 +52,16 @@ namespace LOFAR // otherwise the to be computed phase shifts become too inprecise. //!< Whole sample delays at the start of the workitem - MultiDimArrayHostBuffer<double, 3> delaysAtBegin; + MultiDimArrayHostBuffer<double, 2> delaysAtBegin; //!< Whole sample delays at the end of the workitem - MultiDimArrayHostBuffer<double, 3> delaysAfterEnd; + MultiDimArrayHostBuffer<double, 2> delaysAfterEnd; //!< Remainder of delays MultiDimArrayHostBuffer<double, 2> phase0s; //!< Delays for TABs (aka pencil beams) after station beam correction - std::vector<std::shared_ptr<MultiDimArrayHostBuffer<double, 3>>> tabDelays; + std::vector<std::shared_ptr<MultiDimArrayHostBuffer<double, 2>>> tabDelays; // inputdata with flagged data set to zero MultiDimArrayHostBuffer<char, 4> inputSamples; diff --git a/RTCP/Cobalt/GPUProc/test/Kernels/tBeamFormerKernel.cc b/RTCP/Cobalt/GPUProc/test/Kernels/tBeamFormerKernel.cc index 5e77681fa35ed4da65ffb62690db98f0b9953cac..13ba29de2aad3e74e81a8d631c6dfdee06f70b39 100644 --- a/RTCP/Cobalt/GPUProc/test/Kernels/tBeamFormerKernel.cc +++ b/RTCP/Cobalt/GPUProc/test/Kernels/tBeamFormerKernel.cc @@ -72,7 +72,6 @@ int main(int argc, char *argv[]) ps.settings.antennaFieldNames.size(), ps.settings.beamFormer.nrDelayCompensationChannels, ps.settings.blockSize / ps.settings.beamFormer.nrDelayCompensationChannels, - ps.settings.SAPs.size(), ps.settings.beamFormer.maxNrCoherentTABsPerSAP(), ps.settings.subbandWidth(), ps.settings.beamFormer.doFlysEye @@ -88,12 +87,11 @@ int main(int argc, char *argv[]) unique_ptr<BeamFormerKernel> kernel(factory.create(stream, devBandPassCorrectedMemory, devComplexVoltagesMemory)); float subbandFreq = 60e6f; - unsigned sap = 0; BlockID blockId; // run for (unsigned i = 0; i < 10; i++) { - kernel->enqueue(blockId, subbandFreq, sap); + kernel->enqueue(blockId, subbandFreq); stream.synchronize(); } diff --git a/RTCP/Cobalt/GPUProc/test/Kernels/tDelayAndBandPassKernel.cc b/RTCP/Cobalt/GPUProc/test/Kernels/tDelayAndBandPassKernel.cc index 2f14c7385f7f3773c83a551234ef85d04093a78e..d83a7f53f714e8a964a54c4bfcacfbc13c68a115 100644 --- a/RTCP/Cobalt/GPUProc/test/Kernels/tDelayAndBandPassKernel.cc +++ b/RTCP/Cobalt/GPUProc/test/Kernels/tDelayAndBandPassKernel.cc @@ -69,7 +69,6 @@ int main(int argc, char *argv[]) ps.settings.blockSize / ps.settings.beamFormer.nrDelayCompensationChannels, ps.settings.clockMHz, ps.settings.subbandWidth(), - ps.settings.SAPs.size(), ps.settings.delayCompensation.enabled, correlator, false, // correctBandPass @@ -86,9 +85,8 @@ int main(int argc, char *argv[]) size_t subbandIdx = 0; float centralFrequency = ps.settings.subbands[subbandIdx].centralFrequency; - size_t SAP = ps.settings.subbands[subbandIdx].SAP; BlockID blockId; - kernel->enqueue(blockId, centralFrequency, SAP); + kernel->enqueue(blockId, centralFrequency); stream.synchronize(); return 0; diff --git a/RTCP/Cobalt/GPUProc/test/Kernels/tDelayAndBandPassKernel2.cc b/RTCP/Cobalt/GPUProc/test/Kernels/tDelayAndBandPassKernel2.cc index dae560faf4aaff0b20f11adcb6e484fcd50699b6..0bf201d915e8a7eaf4fe37145e5b4d6cab2814c0 100644 --- a/RTCP/Cobalt/GPUProc/test/Kernels/tDelayAndBandPassKernel2.cc +++ b/RTCP/Cobalt/GPUProc/test/Kernels/tDelayAndBandPassKernel2.cc @@ -39,7 +39,6 @@ struct TestFixture ps.settings.blockSize / ps.settings.correlator.nrChannels, ps.settings.clockMHz, ps.settings.subbandWidth(), - ps.settings.SAPs.size(), ps.settings.delayCompensation.enabled, true, // correlator ps.settings.corrections.bandPass, // correctBandPass diff --git a/RTCP/Cobalt/GPUProc/test/Kernels/tFIR_FilterKernel.cc b/RTCP/Cobalt/GPUProc/test/Kernels/tFIR_FilterKernel.cc index 8bf6165cdee6c682777a207b39af061638761f8f..4b719a9081377055ba7aefee823125c0dbfa093f 100644 --- a/RTCP/Cobalt/GPUProc/test/Kernels/tFIR_FilterKernel.cc +++ b/RTCP/Cobalt/GPUProc/test/Kernels/tFIR_FilterKernel.cc @@ -46,7 +46,6 @@ TEST(FIR_FilterKernel) // ratio of outputs for 1 and 3 above should give us back the applied correction // some constants (not in the parset) - const size_t NR_SAPS=1; const size_t NR_POLARIZATIONS=2; const size_t COMPLEX=2; const double subbandFreq=50e6; @@ -160,13 +159,11 @@ TEST(FIR_FilterKernel) KernelFactory<FIR_FilterKernel> factory_dop(params_dop); - MultiDimArrayHostBuffer<double, 3> delaysAtBegin(boost::extents - [NR_SAPS] + MultiDimArrayHostBuffer<double, 2> delaysAtBegin(boost::extents [ps.settings.antennaFields.size()] //NR_DELAYS [NR_POLARIZATIONS], context); - MultiDimArrayHostBuffer<double, 3> delaysAfterEnd(boost::extents - [NR_SAPS] + MultiDimArrayHostBuffer<double, 2> delaysAfterEnd(boost::extents [ps.settings.antennaFields.size()] //NR_DELAYS [NR_POLARIZATIONS], context); @@ -192,7 +189,7 @@ TEST(FIR_FilterKernel) } unique_ptr<FIR_FilterKernel> kernel_dop_zero(factory_dop.create(stream, dInput, dOutput)); // Note: delays are zero - kernel_dop_zero->enqueue(blockId, 0, subbandFreq); + kernel_dop_zero->enqueue(blockId, subbandFreq); stream.readBuffer(hOutput1, dOutput,true); // compare results (for a fraction of the output, to save time) for(size_t i = NR_SAMPLES_PER_CHANNEL/2; i < (NR_SAMPLES_PER_CHANNEL*3)/4; ++i) { diff --git a/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPass.cc b/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPass.cc index f78dab5af26fdbd43884b80b0dadefefbaa0b571..c54636cea8a4fcb607588e80408dff4e192bcb42 100644 --- a/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPass.cc +++ b/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPass.cc @@ -57,7 +57,6 @@ const unsigned NR_SAMPLES_PER_SUBBAND = NR_SAMPLES_PER_CHANNEL * NR_CHANNELS; const unsigned NR_BITS_PER_SAMPLE = 8; const unsigned NR_POLARIZATIONS = 2; -const unsigned NR_SAPS = 8; const double SUBBAND_BANDWIDTH = 0.0 * NR_CHANNELS; const bool BANDPASS_CORRECTION = true; const bool DELAY_COMPENSATION = false; @@ -72,12 +71,11 @@ void runKernel(gpu::Function kfunc, MultiDimArrayHostBuffer<fcomplex, 4> &outputData, MultiDimArrayHostBuffer<T, 4> &inputData, MultiDimArrayHostBuffer<unsigned, 1> &delayIndices, - MultiDimArrayHostBuffer<double, 3> &delaysAtBegin, - MultiDimArrayHostBuffer<double, 3> &delaysAfterEnd, + MultiDimArrayHostBuffer<double, 2> &delaysAtBegin, + MultiDimArrayHostBuffer<double, 2> &delaysAfterEnd, MultiDimArrayHostBuffer<double, 2> &phase0s, MultiDimArrayHostBuffer<float, 1> &bandPassFactors, double subbandFrequency, - unsigned beam, bool transpose) { gpu::Context ctx(stream->getContext()); @@ -94,11 +92,10 @@ void runKernel(gpu::Function kfunc, kfunc.setArg(1, devInput); kfunc.setArg(2, devStationIndices); kfunc.setArg(3, subbandFrequency); - kfunc.setArg(4, beam); - kfunc.setArg(5, devDelaysAtBegin); - kfunc.setArg(6, devDelaysAfterEnd); - kfunc.setArg(7, devPhase0s); - kfunc.setArg(8, devBandPassFactors); + kfunc.setArg(4, devDelaysAtBegin); + kfunc.setArg(5, devDelaysAfterEnd); + kfunc.setArg(6, devPhase0s); + kfunc.setArg(7, devBandPassFactors); // Overwrite devOutput, so result verification is more reliable. stream->writeBuffer(devOutput, outputData); @@ -160,8 +157,6 @@ CompileDefinitions getDefaultCompileDefinitions() boost::lexical_cast<string>(NR_BITS_PER_SAMPLE); defs["NR_POLARIZATIONS"] = boost::lexical_cast<string>(NR_POLARIZATIONS); - defs["NR_SAPS"] = - boost::lexical_cast<string>(NR_SAPS); defs["SUBBAND_BANDWIDTH"] = boost::lexical_cast<string>(SUBBAND_BANDWIDTH); @@ -178,7 +173,6 @@ CompileDefinitions getDefaultCompileDefinitions() // It is the value type of the data input array. vector<fcomplex> runTest(const CompileDefinitions& compileDefs, double subbandFrequency, - unsigned beam, double delayBegin, double delayEnd, double phase0, @@ -231,13 +225,11 @@ vector<fcomplex> runTest(const CompileDefinitions& compileDefs, [NR_STATIONS], ctx); - MultiDimArrayHostBuffer<double, 3> delaysAtBegin(boost::extents - [NR_SAPS] + MultiDimArrayHostBuffer<double, 2> delaysAtBegin(boost::extents [NR_DELAYS] [NR_POLARIZATIONS], ctx); - MultiDimArrayHostBuffer<double, 3> delaysAfterEnd(boost::extents - [NR_SAPS] + MultiDimArrayHostBuffer<double, 2> delaysAfterEnd(boost::extents [NR_DELAYS] [NR_POLARIZATIONS], ctx); @@ -280,7 +272,7 @@ vector<fcomplex> runTest(const CompileDefinitions& compileDefs, runKernel(kfunc, *outputData, *inputData, delayIndices, delaysAtBegin, delaysAfterEnd, phase0s, bandPassFactors, - subbandFrequency, beam, compileDefs.find("DO_TRANSPOSE") != compileDefs.end()); + subbandFrequency, compileDefs.find("DO_TRANSPOSE") != compileDefs.end()); // Tests that use this function only check the first and last 2 output floats. const unsigned nrResultVals = 2; @@ -307,7 +299,6 @@ TEST(BandPass) vector<fcomplex> results(runTest( defs, 0.0, // sb freq - 0U, // beam 0.0, // delays begin 0.0, // delays end 0.0, // phase offsets @@ -332,7 +323,6 @@ TEST(Phase0s) vector<fcomplex> results(runTest( defs, 1.0, // sb freq - 0U, // beam 0.0, // delays begin 0.0, // delays end -M_PI, // phase offsets @@ -361,7 +351,6 @@ SUITE(DelayCompensation) vector<fcomplex> results(runTest( defs, 1.0, // sb freq - 0U, // beam 1.0, // delays begin 1.0, // delays end 0.0, // phase offsets @@ -458,7 +447,6 @@ SUITE(DelayCompensation) vector<fcomplex> results(runTest( defs, 1.0, // sb freq - 0U, // beam 1.0, // delays begin 0.0, // delays end 0.0, // phase offsets @@ -539,7 +527,6 @@ TEST(AllAtOnce) vector<fcomplex> results(runTest( defs, 1.0, // sb freq - 0U, // beam 1.0, // delays begin 0.0, // delays end -1.0, // phase offsets (-1 rad)