diff --git a/RTCP/Cobalt/GPUProc/share/gpu/kernels/DelayAndBandPass.cu b/RTCP/Cobalt/GPUProc/share/gpu/kernels/DelayAndBandPass.cu
index 16ba600c1240868bae44e920823515ae7a092cf9..f3df1ba37f252c009befeaf61189c96576ac858d 100644
--- a/RTCP/Cobalt/GPUProc/share/gpu/kernels/DelayAndBandPass.cu
+++ b/RTCP/Cobalt/GPUProc/share/gpu/kernels/DelayAndBandPass.cu
@@ -65,8 +65,8 @@ typedef  complexchar (* InputDataType)[NR_STATIONS][NR_SAMPLES_PER_SUBBAND][NR_P
 #else
 typedef  complexfloat (* InputDataType)[NR_STATIONS][NR_POLARIZATIONS][NR_SAMPLES_PER_CHANNEL][NR_CHANNELS];
 #endif
-typedef  const float (* DelaysType)[NR_BEAMS][NR_STATIONS][COMPLEX]; // 2 Polarizations; in seconds
-typedef  const float (* PhaseOffsetsType)[NR_STATIONS][COMPLEX]; // 2 Polarizations; in radians
+typedef  const float (* DelaysType)[NR_BEAMS][NR_STATIONS][NR_POLARIZATIONS]; // 2 Polarizations; in seconds
+typedef  const float (* PhaseOffsetsType)[NR_STATIONS][NR_POLARIZATIONS]; // 2 Polarizations; in radians
 typedef  const float (* BandPassFactorsType)[NR_CHANNELS];
 
 /**
@@ -169,10 +169,10 @@ extern "C" {
                                   16.0f * deltaPhi.y * frequency); // Magic constant: 16 is the time step we take in the samples
 #endif
 
-  complexfloat vX = LOFAR::Cobalt::gpu::exp(complexfloat(myPhiBegin.x));  // This cast might be costly
-  complexfloat vY = LOFAR::Cobalt::gpu::exp(complexfloat(myPhiBegin.y));
-  complexfloat dvX = LOFAR::Cobalt::gpu::exp(complexfloat(myPhiDelta.x));
-  complexfloat dvY = LOFAR::Cobalt::gpu::exp(complexfloat(myPhiDelta.y));
+  complexfloat vX = LOFAR::Cobalt::gpu::cosisin(myPhiBegin.x);
+  complexfloat vY = LOFAR::Cobalt::gpu::cosisin(myPhiBegin.y);
+  complexfloat dvX = LOFAR::Cobalt::gpu::cosisin(myPhiDelta.x);
+  complexfloat dvY = LOFAR::Cobalt::gpu::cosisin(myPhiDelta.y);
 #endif
 
 #if defined BANDPASS_CORRECTION
diff --git a/RTCP/Cobalt/GPUProc/share/gpu/kernels/complex.cuh b/RTCP/Cobalt/GPUProc/share/gpu/kernels/complex.cuh
index 726c45fff5e07cf6af0c16ae662f7a8f7d9dd2d0..61185f417eee5324fc33564d24ef060cf5890595 100644
--- a/RTCP/Cobalt/GPUProc/share/gpu/kernels/complex.cuh
+++ b/RTCP/Cobalt/GPUProc/share/gpu/kernels/complex.cuh
@@ -776,6 +776,7 @@ complex<_Tp> polar(const _Tp& __rho, const _Tp& __phi) {
 __device__ complex<float> sqrt(const complex<float>&);
 
 __device__ complex<float> exp(const complex<float>&);
+__device__ complex<float> cosisin(const float&);
 __device__ complex<float>  log(const complex<float>&);
 __device__ complex<float> log10(const complex<float>&);
 
@@ -796,6 +797,7 @@ __device__ complex<float> tanh(const complex<float>&);
 __device__ complex<double> sqrt(const complex<double>&);
 
 __device__ complex<double> exp(const complex<double>&);
+__device__ complex<double> cosisin(const double&);
 __device__ complex<double> log(const complex<double>&);
 __device__ complex<double> log10(const complex<double>&);
 
@@ -968,6 +970,19 @@ __device__ complex<float>  exp(const complex<float>& z)
 __device__ complex<double> exp(const complex<double>& z)
 { return expT(z); }
 
+//----------------------------------------------------------------------
+// cosisin
+template <class _Tp>
+__device__ 
+static complex<_Tp> cosisinT(const _Tp& angle) {
+  return complex<_Tp>(::cos(angle), ::sin(angle));
+}
+__device__ complex<float>  cosisin(const float& angle)
+{ return cosisinT(angle); }
+
+__device__ complex<double> cosisin(const double& angle)
+{ return cosisinT(angle); }
+
 #if 0
 //----------------------------------------------------------------------
 // log10
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/WorkQueues/CorrelatorWorkQueue.cc b/RTCP/Cobalt/GPUProc/src/cuda/WorkQueues/CorrelatorWorkQueue.cc
index ef71aa6341465cb3b10d299776d31c7755a39b06..14d5455739fb652af27ad0968571a21fd1b3d26d 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/WorkQueues/CorrelatorWorkQueue.cc
+++ b/RTCP/Cobalt/GPUProc/src/cuda/WorkQueues/CorrelatorWorkQueue.cc
@@ -421,6 +421,8 @@ namespace LOFAR
 
         prevSAP = SAP;
         prevBlock = block;
+
+        queue.synchronize();
       }
 
       if (ps.nrChannelsPerSubband() > 1) {
diff --git a/RTCP/Cobalt/GPUProc/test/cuda/CMakeLists.txt b/RTCP/Cobalt/GPUProc/test/cuda/CMakeLists.txt
index 49783fcc5f919b10d4fcccb6e1f224f0c8010144..be937f2a3b7dc47121417f1f40a8647964ef28ac 100644
--- a/RTCP/Cobalt/GPUProc/test/cuda/CMakeLists.txt
+++ b/RTCP/Cobalt/GPUProc/test/cuda/CMakeLists.txt
@@ -7,6 +7,7 @@ cuda_include_directories(${PACKAGE_SOURCE_DIR}/share/gpu/kernels)
 
 if(UNITTEST++_FOUND)
   lofar_add_test(tGPUWrapper tGPUWrapper.cc)
+  lofar_add_test(tDelayAndBandPass tDelayAndBandPass.cc)
 endif(UNITTEST++_FOUND)
 
 lofar_add_test(tCudaRuntimeCompiler tCudaRuntimeCompiler.cc)
@@ -17,7 +18,6 @@ lofar_add_test(tCorrelator tCorrelator.cc)
 
 lofar_add_test(tFFT tFFT.cc)
 lofar_add_test(tFIR_Filter tFIR_Filter.cc)
-lofar_add_test(tDelayAndBandPass tDelayAndBandPass.cc)
 lofar_add_test(tDelayAndBandPassKernel tDelayAndBandPassKernel.cc)
 
 # Trick tDelayAndBandPass into thinking that it can find the kernels it needs
diff --git a/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPass.cc b/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPass.cc
index 0fc8a7cbe1396ca26afd88783b2fe8f14da83b4e..b086fdfaa975c527d33dccf7f60addf1ec92cc51 100644
--- a/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPass.cc
+++ b/RTCP/Cobalt/GPUProc/test/cuda/tDelayAndBandPass.cc
@@ -33,6 +33,7 @@
 #include <GPUProc/gpu_wrapper.h>
 #include <GPUProc/gpu_utils.h>
 #include <GPUProc/cuda/CudaRuntimeCompiler.h>
+#include <UnitTest++.h>
 
 #include "TestUtil.h"
 
@@ -173,114 +174,119 @@ float * runTest(float bandPassFactor,
   return firstAndLastComplex;
 }
 
-int main()
+TEST(BandPass)
 {
-  INIT_LOGGER("tDelayAndBandPass");
-
   // ***********************************************************
   // Test if the bandpass correction factor is applied correctly in isolation
   float bandPassFactor = 2.0;
-  bool delayCompensation = false;
   float * results;
 
   // The input samples are all ones
   // After correction, multiply with 2.
   // The first and the last complex values are retrieved. They should be scaled with the bandPassFactor == 2
   results = runTest(bandPassFactor);
-  for (unsigned idx = 0; idx < 4; ++idx)
-  {
-    if (results[idx] != 2.0)
-    {
-      cerr << "Bandpass correction returned an incorrect value at index " << idx << endl;
-      cerr << " expected: 2, 2, 2, 2" << endl;    
-      cerr << " received: " << results[0] << ", " << results[1] << ", "<< results[2] << ", "<< results[3] << endl;
-      return -1;
-    }
-  }
+
+  CHECK_CLOSE(2.0, results[0], 0.00001);
+  CHECK_CLOSE(2.0, results[1], 0.00001);
+  CHECK_CLOSE(2.0, results[2], 0.00001);
+  CHECK_CLOSE(2.0, results[3], 0.00001);
+
+  delete[] results;
+}
+
+TEST(PhaseOffsets)
+{
+  float * results;
 
   //**********************************************************************
   // Delaycompensation but only for the phase ofsets:
-  // All computations the drop except the phase ofset of 1,0 which is fed into a an cexp
-  // cexp(1) = e = 2.71828
+  // All computations the drop except the phase ofset of 1,0 which is fed into a cosisin
+  // cosisin(pi) = -1
   results = runTest(1.0,   // bandpass factor
                     1.0,   // frequency
                     1.0,   
                     true,  // delayCompensation
                     0.0,   // delays begin  
                     0.0,   // delays end
-                    1.0);  // phase offsets
+                    M_PI); // phase offsets
 
-  for (unsigned idx = 0; idx < 4; ++idx)
-  {
-    if ((results[idx] -  2.71828) > 0.00001 )
-    {
-      cerr << " phase offsets correction returned an incorrect value at index " << idx << endl;
-      cerr << " expected: 2.71828, 2.71828, 2.71828, 2.71828" << endl;    
-      cerr << " received: " << results[0] << ", " << results[1] << ", "<< results[2] << ", "<< results[3] << endl;
-      return -1;
-    }
-  }
+  CHECK_CLOSE(-1.0, results[0], 0.00001);
+  CHECK_CLOSE(-1.0, results[1], 0.00001);
+  CHECK_CLOSE(-1.0, results[2], 0.00001);
+  CHECK_CLOSE(-1.0, results[3], 0.00001);
 
-  //****************************************************************************
-  // delays  begin and end both 1 no phase offset frequency 1 width 1
-  // frequency = subbandFrequency - .5f * SUBBAND_BANDWIDTH + (channel + minor) * (SUBBAND_BANDWIDTH / NR_CHANNELS)
-  //  (delaysbegin * - 2 * pi ) * (frequency == 0.5) == -3.14
-  // exp(-3.14159+0 i) == 0.04312
-  results = runTest(1.0,   // bandpass factor
-                    1.0,   // frequency
-                    1.0,   
-                    true,  // delayCompensation
-                    1.0,   // delays begin  
-                    1.0,   // delays end
-                    0.0);  // phase offsets
+  delete[] results;
+}
 
-  for (unsigned idx = 0; idx < 4; ++idx)
+SUITE(DelayCompensation)
+{
+  TEST(ConstantDelay)
   {
-    if (fabs(results[idx] -  0.04321) > 0.00001 )
-    {
-      cerr << " delays  begin and end both 1 no phase offset frequency 1 width 1 at index " << idx << endl;
-      cerr << " expected:  0.04321,  0.04321,  0.04321,  0.04321" << endl;    
-      cerr << " received: " << results[0] << ", " << results[1] << ", "<< results[2] << ", "<< results[3] << endl;
-      return -1;
-    }
+    float * results;
+
+    //****************************************************************************
+    // delays  begin and end both 1 no phase offset frequency 1 width 1
+    // frequency = subbandFrequency - .5f * SUBBAND_BANDWIDTH + (channel + minor) * (SUBBAND_BANDWIDTH / NR_CHANNELS)
+    //  (delaysbegin * - 2 * pi ) * (frequency == 0.5) == -3.14
+    // cosisin(-3.14159+0 i) == -1
+    results = runTest(1.0,   // bandpass factor
+                      1.0,   // frequency
+                      1.0,   
+                      true,  // delayCompensation
+                      1.0,   // delays begin  
+                      1.0,   // delays end
+                      0.0);  // phase offsets
+
+    CHECK_CLOSE(-1.0, results[0], 0.00001);
+    CHECK_CLOSE(-1.0, results[1], 0.00001);
+    CHECK_CLOSE(-1.0, results[2], 0.00001);
+    CHECK_CLOSE(-1.0, results[3], 0.00001);
+
+    delete[] results;
   }
 
-  //****************************************************************************
-  // delays  begin 1 and end 0 no phase offset frequency 1 width 1
-  // frequency = subbandFrequency - .5f * SUBBAND_BANDWIDTH + (channel + minor) * (SUBBAND_BANDWIDTH / NR_CHANNELS)
-  //  (delaysbegin * - 2 * pi ) * (frequency == 0.5) == -3.14
-  // exp(-3.14159+0 i) == 0.04312
-  // The later sets of samples are calculate as:
-  // vX = vX * dvX;  The delays are multiplied because we are calculating with exponents
-  // Ask john Romein for more details
-  results = runTest(1.0,   // bandpass factor
-                    1.0,   // frequency
-                    1.0,   
-                    true,  // delayCompensation
-                    1.0,   // delays begin  
-                    0.0,   // delays end
-                    0.0);  // phase offsets
-
-  for (unsigned idx = 0; idx < 4; ++idx)
-  {  // Magic number ask John Romein why they are correct
-    if(!((fabs(results[idx] -  0.04321) < 0.00001) || (fabs(results[idx] -  0.952098) < 0.00001)))
-    {
-      cerr << " delays  begin and end both 1 no phase offset frequency 1 width 1 at index " << idx << endl;
-      cerr << " expected:  0.04321,  0.04321,  0.952098,  0.952098" << endl;    
-      cerr << " received: " << results[0] << ", " << results[1] << ", "<< results[2] << ", "<< results[3] << endl;
-      return -1;
-    }
+  TEST(SlopedDelay)
+  {
+    float * results;
+
+    //****************************************************************************
+    // delays  begin 1 and end 0 no phase offset frequency 1 width 1
+    // frequency = subbandFrequency - .5f * SUBBAND_BANDWIDTH + (channel + minor) * (SUBBAND_BANDWIDTH / NR_CHANNELS)
+    //  (delaysbegin * - 2 * pi ) * (frequency == 0.5) == -3.14
+    // cosisin(-3.14159+0 i) == -1
+    // The later sets of samples are calculate as:
+    // vX = vX * dvX;  The delays are multiplied because we are calculating with exponents
+    // Ask john Romein for more details
+    results = runTest(1.0,   // bandpass factor
+                      1.0,   // frequency
+                      1.0,   
+                      true,  // delayCompensation
+                      1.0,   // delays begin  
+                      0.0,   // delays end
+                      0.0);  // phase offsets
+
+    CHECK_CLOSE(-1.0,     results[0], 0.00001);
+    CHECK_CLOSE(-1.0,     results[1], 0.00001);
+    CHECK_CLOSE(1.047860, results[2], 0.00001);
+    CHECK_CLOSE(0.949728, results[3], 0.00001);
+
+    delete[] results;
   }
+}
+
+TEST(AllAtOnce)
+{
+  float * results;
 
   //****************************************************************************
   // delays  begin 1 and end 0 no phase offset frequency 1 width 1
   // frequency = subbandFrequency - .5f * SUBBAND_BANDWIDTH + (channel + minor) * (SUBBAND_BANDWIDTH / NR_CHANNELS)
   //  (delaysbegin * - 2 * pi ) * (frequency == 0.5) == -3.14
-  // exp(-3.14159+0 i) == 0.04312
+  // cosisin(-3.14159+0 i) == -1
   // The later sets of samples are calculate as:
   // vX = vX * dvX;  The delays are multiplied because we are calculating with exponents
   // Ask john Romein for more details
-  // In this test the phase offsetss are also compensated
+  // In this test the phase offsets are also compensated
   results = runTest(2.0,   // bandpass factor (weights == 2)
                     1.0,   // frequency
                     1.0,   
@@ -289,18 +295,18 @@ int main()
                     0.0,   // delays end
                     1.0);  // phase offsets (correct with e = 2.71828)
 
-  for (unsigned idx = 0; idx < 4; ++idx)
-  {  // Magic number ask John Romein why they are correct
-    if(!((fabs(results[idx] -  0.04321 * 2.71828 * 2) < 0.0001)  ||
-        (fabs(results[idx] -  2.58807* 2) < 0.0001)))
-    {
-      cerr << " delays  begin and end both 1 no phase offset frequency 1 width 1 at index " << idx << endl;
-      cerr << " expected:  0.04321,  0.04321,  0.952098,  0.952098" << endl;    
-      cerr << " received: " << results[0] << ", " << results[1] << ", "<< results[2] << ", "<< results[3] << endl;
-      return -1;
-    }
-  }
+  CHECK_CLOSE( 0.602337, results[0], 0.00001);
+  CHECK_CLOSE(-2.763550, results[1], 0.00001);
+  CHECK_CLOSE(-0.466011, results[2], 0.00001);
+  CHECK_CLOSE( 2.789770, results[3], 0.00001);
+
+  delete[] results;
+}
+
+int main()
+{
+  INIT_LOGGER("tDelayAndBandPass");
 
-  return 0;
+  return UnitTest::RunAllTests() > 0;
 }