diff --git a/RTCP/Cobalt/GPUProc/share/gpu/kernels/DelayAndBandPass.cu b/RTCP/Cobalt/GPUProc/share/gpu/kernels/DelayAndBandPass.cu index 16ba600c1240868bae44e920823515ae7a092cf9..f3df1ba37f252c009befeaf61189c96576ac858d 100644 --- a/RTCP/Cobalt/GPUProc/share/gpu/kernels/DelayAndBandPass.cu +++ b/RTCP/Cobalt/GPUProc/share/gpu/kernels/DelayAndBandPass.cu @@ -65,8 +65,8 @@ typedef complexchar (* InputDataType)[NR_STATIONS][NR_SAMPLES_PER_SUBBAND][NR_P #else typedef complexfloat (* InputDataType)[NR_STATIONS][NR_POLARIZATIONS][NR_SAMPLES_PER_CHANNEL][NR_CHANNELS]; #endif -typedef const float (* DelaysType)[NR_BEAMS][NR_STATIONS][COMPLEX]; // 2 Polarizations; in seconds -typedef const float (* PhaseOffsetsType)[NR_STATIONS][COMPLEX]; // 2 Polarizations; in radians +typedef const float (* DelaysType)[NR_BEAMS][NR_STATIONS][NR_POLARIZATIONS]; // 2 Polarizations; in seconds +typedef const float (* PhaseOffsetsType)[NR_STATIONS][NR_POLARIZATIONS]; // 2 Polarizations; in radians typedef const float (* BandPassFactorsType)[NR_CHANNELS]; /** @@ -169,10 +169,10 @@ extern "C" { 16.0f * deltaPhi.y * frequency); // Magic constant: 16 is the time step we take in the samples #endif - complexfloat vX = LOFAR::Cobalt::gpu::exp(complexfloat(myPhiBegin.x)); // This cast might be costly - complexfloat vY = LOFAR::Cobalt::gpu::exp(complexfloat(myPhiBegin.y)); - complexfloat dvX = LOFAR::Cobalt::gpu::exp(complexfloat(myPhiDelta.x)); - complexfloat dvY = LOFAR::Cobalt::gpu::exp(complexfloat(myPhiDelta.y)); + complexfloat vX = LOFAR::Cobalt::gpu::cosisin(myPhiBegin.x); + complexfloat vY = LOFAR::Cobalt::gpu::cosisin(myPhiBegin.y); + complexfloat dvX = LOFAR::Cobalt::gpu::cosisin(myPhiDelta.x); + complexfloat dvY = LOFAR::Cobalt::gpu::cosisin(myPhiDelta.y); #endif #if defined BANDPASS_CORRECTION diff --git a/RTCP/Cobalt/GPUProc/share/gpu/kernels/complex.cuh b/RTCP/Cobalt/GPUProc/share/gpu/kernels/complex.cuh index 726c45fff5e07cf6af0c16ae662f7a8f7d9dd2d0..61185f417eee5324fc33564d24ef060cf5890595 100644 --- a/RTCP/Cobalt/GPUProc/share/gpu/kernels/complex.cuh +++ b/RTCP/Cobalt/GPUProc/share/gpu/kernels/complex.cuh @@ -776,6 +776,7 @@ complex<_Tp> polar(const _Tp& __rho, const _Tp& __phi) { __device__ complex<float> sqrt(const complex<float>&); __device__ complex<float> exp(const complex<float>&); +__device__ complex<float> cosisin(const float&); __device__ complex<float> log(const complex<float>&); __device__ complex<float> log10(const complex<float>&); @@ -796,6 +797,7 @@ __device__ complex<float> tanh(const complex<float>&); __device__ complex<double> sqrt(const complex<double>&); __device__ complex<double> exp(const complex<double>&); +__device__ complex<double> cosisin(const double&); __device__ complex<double> log(const complex<double>&); __device__ complex<double> log10(const complex<double>&); @@ -968,6 +970,19 @@ __device__ complex<float> exp(const complex<float>& z) __device__ complex<double> exp(const complex<double>& z) { return expT(z); } +//---------------------------------------------------------------------- +// cosisin +template <class _Tp> +__device__ +static complex<_Tp> cosisinT(const _Tp& angle) { + return complex<_Tp>(::cos(angle), ::sin(angle)); +} +__device__ complex<float> cosisin(const float& angle) +{ return cosisinT(angle); } + +__device__ complex<double> cosisin(const double& angle) +{ return cosisinT(angle); } + #if 0 //---------------------------------------------------------------------- // log10 diff --git a/RTCP/Cobalt/GPUProc/src/cuda/WorkQueues/CorrelatorWorkQueue.cc b/RTCP/Cobalt/GPUProc/src/cuda/WorkQueues/CorrelatorWorkQueue.cc index ef71aa6341465cb3b10d299776d31c7755a39b06..14d5455739fb652af27ad0968571a21fd1b3d26d 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/WorkQueues/CorrelatorWorkQueue.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/WorkQueues/CorrelatorWorkQueue.cc @@ -421,6 +421,8 @@ namespace LOFAR prevSAP = SAP; prevBlock = block; + + queue.synchronize(); } if (ps.nrChannelsPerSubband() > 1) { diff --git a/RTCP/Cobalt/GPUProc/test/cuda/CMakeLists.txt b/RTCP/Cobalt/GPUProc/test/cuda/CMakeLists.txt index 49783fcc5f919b10d4fcccb6e1f224f0c8010144..be937f2a3b7dc47121417f1f40a8647964ef28ac 100644 --- a/RTCP/Cobalt/GPUProc/test/cuda/CMakeLists.txt +++ b/RTCP/Cobalt/GPUProc/test/cuda/CMakeLists.txt @@ -7,6 +7,7 @@ cuda_include_directories(${PACKAGE_SOURCE_DIR}/share/gpu/kernels) if(UNITTEST++_FOUND) lofar_add_test(tGPUWrapper tGPUWrapper.cc) + lofar_add_test(tDelayAndBandPass tDelayAndBandPass.cc) endif(UNITTEST++_FOUND) lofar_add_test(tCudaRuntimeCompiler tCudaRuntimeCompiler.cc) @@ -17,7 +18,6 @@ lofar_add_test(tCorrelator tCorrelator.cc) lofar_add_test(tFFT tFFT.cc) lofar_add_test(tFIR_Filter tFIR_Filter.cc) -lofar_add_test(tDelayAndBandPass tDelayAndBandPass.cc) lofar_add_test(tDelayAndBandPassKernel tDelayAndBandPassKernel.cc) # Trick tDelayAndBandPass into thinking that it can find the kernels it needs diff --git a/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPass.cc b/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPass.cc index 0fc8a7cbe1396ca26afd88783b2fe8f14da83b4e..b086fdfaa975c527d33dccf7f60addf1ec92cc51 100644 --- a/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPass.cc +++ b/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPass.cc @@ -33,6 +33,7 @@ #include <GPUProc/gpu_wrapper.h> #include <GPUProc/gpu_utils.h> #include <GPUProc/cuda/CudaRuntimeCompiler.h> +#include <UnitTest++.h> #include "TestUtil.h" @@ -173,114 +174,119 @@ float * runTest(float bandPassFactor, return firstAndLastComplex; } -int main() +TEST(BandPass) { - INIT_LOGGER("tDelayAndBandPass"); - // *********************************************************** // Test if the bandpass correction factor is applied correctly in isolation float bandPassFactor = 2.0; - bool delayCompensation = false; float * results; // The input samples are all ones // After correction, multiply with 2. // The first and the last complex values are retrieved. They should be scaled with the bandPassFactor == 2 results = runTest(bandPassFactor); - for (unsigned idx = 0; idx < 4; ++idx) - { - if (results[idx] != 2.0) - { - cerr << "Bandpass correction returned an incorrect value at index " << idx << endl; - cerr << " expected: 2, 2, 2, 2" << endl; - cerr << " received: " << results[0] << ", " << results[1] << ", "<< results[2] << ", "<< results[3] << endl; - return -1; - } - } + + CHECK_CLOSE(2.0, results[0], 0.00001); + CHECK_CLOSE(2.0, results[1], 0.00001); + CHECK_CLOSE(2.0, results[2], 0.00001); + CHECK_CLOSE(2.0, results[3], 0.00001); + + delete[] results; +} + +TEST(PhaseOffsets) +{ + float * results; //********************************************************************** // Delaycompensation but only for the phase ofsets: - // All computations the drop except the phase ofset of 1,0 which is fed into a an cexp - // cexp(1) = e = 2.71828 + // All computations the drop except the phase ofset of 1,0 which is fed into a cosisin + // cosisin(pi) = -1 results = runTest(1.0, // bandpass factor 1.0, // frequency 1.0, true, // delayCompensation 0.0, // delays begin 0.0, // delays end - 1.0); // phase offsets + M_PI); // phase offsets - for (unsigned idx = 0; idx < 4; ++idx) - { - if ((results[idx] - 2.71828) > 0.00001 ) - { - cerr << " phase offsets correction returned an incorrect value at index " << idx << endl; - cerr << " expected: 2.71828, 2.71828, 2.71828, 2.71828" << endl; - cerr << " received: " << results[0] << ", " << results[1] << ", "<< results[2] << ", "<< results[3] << endl; - return -1; - } - } + CHECK_CLOSE(-1.0, results[0], 0.00001); + CHECK_CLOSE(-1.0, results[1], 0.00001); + CHECK_CLOSE(-1.0, results[2], 0.00001); + CHECK_CLOSE(-1.0, results[3], 0.00001); - //**************************************************************************** - // delays begin and end both 1 no phase offset frequency 1 width 1 - // frequency = subbandFrequency - .5f * SUBBAND_BANDWIDTH + (channel + minor) * (SUBBAND_BANDWIDTH / NR_CHANNELS) - // (delaysbegin * - 2 * pi ) * (frequency == 0.5) == -3.14 - // exp(-3.14159+0 i) == 0.04312 - results = runTest(1.0, // bandpass factor - 1.0, // frequency - 1.0, - true, // delayCompensation - 1.0, // delays begin - 1.0, // delays end - 0.0); // phase offsets + delete[] results; +} - for (unsigned idx = 0; idx < 4; ++idx) +SUITE(DelayCompensation) +{ + TEST(ConstantDelay) { - if (fabs(results[idx] - 0.04321) > 0.00001 ) - { - cerr << " delays begin and end both 1 no phase offset frequency 1 width 1 at index " << idx << endl; - cerr << " expected: 0.04321, 0.04321, 0.04321, 0.04321" << endl; - cerr << " received: " << results[0] << ", " << results[1] << ", "<< results[2] << ", "<< results[3] << endl; - return -1; - } + float * results; + + //**************************************************************************** + // delays begin and end both 1 no phase offset frequency 1 width 1 + // frequency = subbandFrequency - .5f * SUBBAND_BANDWIDTH + (channel + minor) * (SUBBAND_BANDWIDTH / NR_CHANNELS) + // (delaysbegin * - 2 * pi ) * (frequency == 0.5) == -3.14 + // cosisin(-3.14159+0 i) == -1 + results = runTest(1.0, // bandpass factor + 1.0, // frequency + 1.0, + true, // delayCompensation + 1.0, // delays begin + 1.0, // delays end + 0.0); // phase offsets + + CHECK_CLOSE(-1.0, results[0], 0.00001); + CHECK_CLOSE(-1.0, results[1], 0.00001); + CHECK_CLOSE(-1.0, results[2], 0.00001); + CHECK_CLOSE(-1.0, results[3], 0.00001); + + delete[] results; } - //**************************************************************************** - // delays begin 1 and end 0 no phase offset frequency 1 width 1 - // frequency = subbandFrequency - .5f * SUBBAND_BANDWIDTH + (channel + minor) * (SUBBAND_BANDWIDTH / NR_CHANNELS) - // (delaysbegin * - 2 * pi ) * (frequency == 0.5) == -3.14 - // exp(-3.14159+0 i) == 0.04312 - // The later sets of samples are calculate as: - // vX = vX * dvX; The delays are multiplied because we are calculating with exponents - // Ask john Romein for more details - results = runTest(1.0, // bandpass factor - 1.0, // frequency - 1.0, - true, // delayCompensation - 1.0, // delays begin - 0.0, // delays end - 0.0); // phase offsets - - for (unsigned idx = 0; idx < 4; ++idx) - { // Magic number ask John Romein why they are correct - if(!((fabs(results[idx] - 0.04321) < 0.00001) || (fabs(results[idx] - 0.952098) < 0.00001))) - { - cerr << " delays begin and end both 1 no phase offset frequency 1 width 1 at index " << idx << endl; - cerr << " expected: 0.04321, 0.04321, 0.952098, 0.952098" << endl; - cerr << " received: " << results[0] << ", " << results[1] << ", "<< results[2] << ", "<< results[3] << endl; - return -1; - } + TEST(SlopedDelay) + { + float * results; + + //**************************************************************************** + // delays begin 1 and end 0 no phase offset frequency 1 width 1 + // frequency = subbandFrequency - .5f * SUBBAND_BANDWIDTH + (channel + minor) * (SUBBAND_BANDWIDTH / NR_CHANNELS) + // (delaysbegin * - 2 * pi ) * (frequency == 0.5) == -3.14 + // cosisin(-3.14159+0 i) == -1 + // The later sets of samples are calculate as: + // vX = vX * dvX; The delays are multiplied because we are calculating with exponents + // Ask john Romein for more details + results = runTest(1.0, // bandpass factor + 1.0, // frequency + 1.0, + true, // delayCompensation + 1.0, // delays begin + 0.0, // delays end + 0.0); // phase offsets + + CHECK_CLOSE(-1.0, results[0], 0.00001); + CHECK_CLOSE(-1.0, results[1], 0.00001); + CHECK_CLOSE(1.047860, results[2], 0.00001); + CHECK_CLOSE(0.949728, results[3], 0.00001); + + delete[] results; } +} + +TEST(AllAtOnce) +{ + float * results; //**************************************************************************** // delays begin 1 and end 0 no phase offset frequency 1 width 1 // frequency = subbandFrequency - .5f * SUBBAND_BANDWIDTH + (channel + minor) * (SUBBAND_BANDWIDTH / NR_CHANNELS) // (delaysbegin * - 2 * pi ) * (frequency == 0.5) == -3.14 - // exp(-3.14159+0 i) == 0.04312 + // cosisin(-3.14159+0 i) == -1 // The later sets of samples are calculate as: // vX = vX * dvX; The delays are multiplied because we are calculating with exponents // Ask john Romein for more details - // In this test the phase offsetss are also compensated + // In this test the phase offsets are also compensated results = runTest(2.0, // bandpass factor (weights == 2) 1.0, // frequency 1.0, @@ -289,18 +295,18 @@ int main() 0.0, // delays end 1.0); // phase offsets (correct with e = 2.71828) - for (unsigned idx = 0; idx < 4; ++idx) - { // Magic number ask John Romein why they are correct - if(!((fabs(results[idx] - 0.04321 * 2.71828 * 2) < 0.0001) || - (fabs(results[idx] - 2.58807* 2) < 0.0001))) - { - cerr << " delays begin and end both 1 no phase offset frequency 1 width 1 at index " << idx << endl; - cerr << " expected: 0.04321, 0.04321, 0.952098, 0.952098" << endl; - cerr << " received: " << results[0] << ", " << results[1] << ", "<< results[2] << ", "<< results[3] << endl; - return -1; - } - } + CHECK_CLOSE( 0.602337, results[0], 0.00001); + CHECK_CLOSE(-2.763550, results[1], 0.00001); + CHECK_CLOSE(-0.466011, results[2], 0.00001); + CHECK_CLOSE( 2.789770, results[3], 0.00001); + + delete[] results; +} + +int main() +{ + INIT_LOGGER("tDelayAndBandPass"); - return 0; + return UnitTest::RunAllTests() > 0; }