diff --git a/.gitattributes b/.gitattributes
index cb42d5500f666ff55664a04f7f732c72ea977eb8..8dd2e56af6db51fab1ab9c611b798d167373a37c 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -3976,6 +3976,7 @@ RTCP/Cobalt/GPUProc/test/Kernels/tKernelFunctions.sh eol=lf
 RTCP/Cobalt/GPUProc/test/Kernels/tKernelPerformance.py eol=lf
 RTCP/Cobalt/GPUProc/test/Kernels/tKernelPerformance.run eol=lf
 RTCP/Cobalt/GPUProc/test/Kernels/tKernelPerformance.sh eol=lf
+RTCP/Cobalt/GPUProc/test/Kernels/tZeroingKernel.sh eol=lf
 RTCP/Cobalt/GPUProc/test/Kernels/visualizeBeamformer.py eol=lf
 RTCP/Cobalt/GPUProc/test/Pipelines/tCorrelatorPipelineProcessObs.sh eol=lf
 RTCP/Cobalt/GPUProc/test/Storage/tStorageProcesses.queue -text
diff --git a/RTCP/Cobalt/CoInterface/src/Parset.cc b/RTCP/Cobalt/CoInterface/src/Parset.cc
index fcc317a4089037c99197330866741d64de50399f..42a383800ad76d6fa88961975585eeab64cbc1bf 100644
--- a/RTCP/Cobalt/CoInterface/src/Parset.cc
+++ b/RTCP/Cobalt/CoInterface/src/Parset.cc
@@ -618,18 +618,6 @@ namespace LOFAR
 
       if (settings.beamFormer.enabled) {
         // Parse global settings
-
-        // 4096 channels is enough, but allow parset override.
-        if (!isDefined("Cobalt.BeamFormer.nrHighResolutionChannels")) {
-          settings.beamFormer.nrHighResolutionChannels = 4096;
-        } else {
-          settings.beamFormer.nrHighResolutionChannels =
-              getUint32("Cobalt.BeamFormer.nrHighResolutionChannels");
-          ASSERTSTR(powerOfTwo(settings.beamFormer.nrHighResolutionChannels) &&
-              settings.beamFormer.nrHighResolutionChannels < 65536,
-              "Parset: Cobalt.BeamFormer.nrHighResolutionChannels must be a power of 2 and < 64k");
-        }
-
         settings.beamFormer.doFlysEye = getBool("Cobalt.BeamFormer.flysEye", false);
 
         unsigned nrDelayCompCh;
@@ -638,9 +626,6 @@ namespace LOFAR
         } else {
           nrDelayCompCh = getUint32("Cobalt.BeamFormer.nrDelayCompensationChannels");
         }
-        if (nrDelayCompCh > settings.beamFormer.nrHighResolutionChannels) {
-          nrDelayCompCh = settings.beamFormer.nrHighResolutionChannels;
-        }
         settings.beamFormer.nrDelayCompensationChannels = nrDelayCompCh;
 
         // Derive antennaFields to use for beam forming
diff --git a/RTCP/Cobalt/CoInterface/src/Parset.h b/RTCP/Cobalt/CoInterface/src/Parset.h
index fea46954dc81e08c60beadd923330358b4778e82..4f9f67e11334f80c9bc8903db26d01aa287782be 100644
--- a/RTCP/Cobalt/CoInterface/src/Parset.h
+++ b/RTCP/Cobalt/CoInterface/src/Parset.h
@@ -589,11 +589,6 @@ namespace LOFAR
         // Equal to the size of the first FFT. Power of two.
         unsigned nrDelayCompensationChannels;
 
-        // Number of channels per subband for bandpass correction, narrow band
-        // flagging, beamforming, and coherent dedispersion.
-        // Power of two and at least nrDelayCompensationChannels.
-        unsigned nrHighResolutionChannels;
-
         // Are we in fly's eye mode?
         bool doFlysEye;
 
diff --git a/RTCP/Cobalt/CoInterface/src/SubbandMetaData.h b/RTCP/Cobalt/CoInterface/src/SubbandMetaData.h
index c50996d90d510660c8dfdda23979d7663484676b..a22286dbff27e49d590b3edaf2fbe11f736bae57 100644
--- a/RTCP/Cobalt/CoInterface/src/SubbandMetaData.h
+++ b/RTCP/Cobalt/CoInterface/src/SubbandMetaData.h
@@ -54,8 +54,11 @@ namespace LOFAR
       void read(Stream *str);
       void write(Stream *str) const;
 
+      // Maximum number of flags ranges to marshall
+      static const size_t MAXNRFLAGRANGES = 512;
+
       // Maximum size of the buffer to marshall flags
-      static const size_t MAXFLAGSIZE     = 8192 + 4;
+      static const size_t MAXFLAGSIZE     = MAXNRFLAGRANGES * 2 * sizeof(unsigned) + 4;
 
       // Maximum number of TABs we'll support when marshalling
       static const size_t MAXNRTABS       = 512;
diff --git a/RTCP/Cobalt/GPUProc/doc/pipeline-buffers.txt b/RTCP/Cobalt/GPUProc/doc/pipeline-buffers.txt
index 782355349a1a9068e85ac128d714eefadcd034b3..347929e4e1f1424c800351af09d694074594fa1f 100644
--- a/RTCP/Cobalt/GPUProc/doc/pipeline-buffers.txt
+++ b/RTCP/Cobalt/GPUProc/doc/pipeline-buffers.txt
@@ -46,7 +46,10 @@ NB: Numbers are for 80 antenna fields.
   FFT (if >1ch) {out-of-place}
    |            [station][pol][sample][channel]         [80][2][3072][64]   = 240 MiB               Nch: E
    V
-Delay compensation + Band pass + Transpose {I/O: delays}
+Zeroing (in-place)
+   |            [station][pol][sample][channel]         [80][2][3072][64]   = 240 MiB               Nch: E
+   V
+Delay compensation (channel) + Band pass + Transpose {I/O: delays}
    |            [station][channel][sample][pol]         [80][64][3072][2]   = 240 MiB               B
    V
 Correlator
@@ -67,15 +70,12 @@ IntToFloat + Transpose + FFT-shift
 FFT-64 {inplace}
    |            [station][pol][sample][channel]         [48][2][3072][64]   = 144 MiB               B
    V
-Delay compensation + Transpose (implicit, DO_TRANSPOSE not defined) {I/O: delays}
-   |            [station][pol][channel][sample]         [48][2][64][3072]   = 144 MiB               A
+Zeroing (in-place)
+   |            [station][pol][sample][channel]         [48][2][3072][64]   = 144 MiB               B
    V
-FFT-shift {inplace}
+Delay compensation + Transpose (implicit, DO_TRANSPOSE not defined) {I/O: delays}
    |            [station][pol][channel][sample]         [48][2][64][3072]   = 144 MiB               A
    V
-FFT-64 {inplace}
-   |            [station][pol][chan1][sample][chan2]    [48][2][64][48][64] = 144 MiB               A
-   V
 BandPass + Transpose {I/O: weights}
    |            [station][chan1][chan2][sample][pol]    [48][64][64][48][2] = 144 MiB               B
    V          = [station][channel][sample][pol]
diff --git a/RTCP/Cobalt/GPUProc/share/gpu/kernels/Zeroing.cu b/RTCP/Cobalt/GPUProc/share/gpu/kernels/Zeroing.cu
new file mode 100644
index 0000000000000000000000000000000000000000..597d0c04e0af1fe4a502cd59d36210d48eb77a73
--- /dev/null
+++ b/RTCP/Cobalt/GPUProc/share/gpu/kernels/Zeroing.cu
@@ -0,0 +1,96 @@
+//# Zeroing.cu: zero ranges of samples
+//# Copyright (C) 2012-2013  ASTRON (Netherlands Institute for Radio Astronomy)
+//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+//#
+//# This file is part of the LOFAR software suite.
+//# The LOFAR software suite is free software: you can redistribute it and/or
+//# modify it under the terms of the GNU General Public License as published
+//# by the Free Software Foundation, either version 3 of the License, or
+//# (at your option) any later version.
+//#
+//# The LOFAR software suite is distributed in the hope that it will be useful,
+//# but WITHOUT ANY WARRANTY; without even the implied warranty of
+//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//# GNU General Public License for more details.
+//#
+//# You should have received a copy of the GNU General Public License along
+//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+//#
+//# $Id$
+
+#include "gpu_math.cuh"
+#include <stdio.h>
+
+typedef float2 FilteredDataType[NR_STABS][NR_POLARIZATIONS][NR_SAMPLES_PER_CHANNEL][NR_CHANNELS];
+
+// This format much match the output of SubbandMetaData::marshall
+typedef struct {
+  int nrRanges;
+  int ranges[MAX_NR_RANGES][2]; // [begin, end)
+} marshalledRanges;
+
+typedef marshalledRanges RangesType[NR_STABS];
+
+/**
+ * Zero samples that have been flagged. Clears samples for all channels for
+ * ranged specified per station.
+ *
+ * @param[data] a multi-dimensional array with time samples of type complex
+ * float in the last dimension.
+ * @param[ranges] an array of ranges of samples to flag, compatible with SubbandMetaData::marshall
+ */
+
+extern "C"
+{
+  __global__ void Zeroing(FilteredDataType data,
+                           RangesType ranges)
+  {
+    int sample  = blockIdx.x * blockDim.x + threadIdx.x;
+    int pol     = blockIdx.y % 2;
+    int channel = blockIdx.y / 2;
+    int station = threadIdx.z;
+
+    // Determine whether we have to clear our sample
+    bool clear = false;
+
+#if 1
+    // Binary search, needed to avoid a worst-case scan of all ranges
+
+    int lo = 0, hi = ranges[station].nrRanges - 1;
+
+    while (lo <= hi) {
+      int mid = (lo + hi) / 2;
+
+      int from = ranges[station].ranges[mid][0];
+      int to   = ranges[station].ranges[mid][1];
+
+      if (sample >= from && sample < to) {
+        clear = true;
+        break;
+      }
+
+      if (sample < from) {
+        hi = mid - 1;
+      } else {
+        lo = mid + 1;
+      }
+    }
+#else
+    // Linear search, faster for a small number of flags
+    for (int range = 0; range < ranges[station].nrRanges; range++) {
+      int from = ranges[station].ranges[range][0];
+      int to   = ranges[station].ranges[range][1];
+
+      if (sample >= from && sample < to)
+        clear = true;
+    }
+#endif
+
+    __syncthreads();
+
+    if (clear) {
+      // Clear our sample
+      data[station][pol][sample][channel] = make_float2(0.0f, 0.0f);
+    }
+  }
+}
diff --git a/RTCP/Cobalt/GPUProc/src/CMakeLists.txt b/RTCP/Cobalt/GPUProc/src/CMakeLists.txt
index fbc4e762035d4ad5f105eecbfe579d76eba16874..2592aa280ea31d17bfb36a72634d897379f82a25 100644
--- a/RTCP/Cobalt/GPUProc/src/CMakeLists.txt
+++ b/RTCP/Cobalt/GPUProc/src/CMakeLists.txt
@@ -13,6 +13,7 @@ set(_gpuproc_sources
   CommandThread.cc
   cpu_utils.cc
   FilterBank.cc
+  Flagger.cc
   global_defines.cc
   MPIReceiver.cc
   Package__Version.cc
@@ -49,6 +50,7 @@ if(USE_CUDA)
     cuda/Kernels/IncoherentStokesTransposeKernel.cc
     cuda/Kernels/IntToFloatKernel.cc
     cuda/Kernels/FFTShiftKernel.cc
+    cuda/Kernels/ZeroingKernel.cc
     #cuda/Kernels/UHEP_BeamFormerKernel.cc
     #cuda/Kernels/UHEP_InvFFT_Kernel.cc
     #cuda/Kernels/UHEP_InvFIR_Kernel.cc
diff --git a/RTCP/Cobalt/GPUProc/src/Flagger.cc b/RTCP/Cobalt/GPUProc/src/Flagger.cc
new file mode 100644
index 0000000000000000000000000000000000000000..31d3e92c27a4a00fd89d677d73898997ca3831e2
--- /dev/null
+++ b/RTCP/Cobalt/GPUProc/src/Flagger.cc
@@ -0,0 +1,104 @@
+//# Flagger.cc
+//# Copyright (C) 2012-2013  ASTRON (Netherlands Institute for Radio Astronomy)
+//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+//#
+//# This file is part of the LOFAR software suite.
+//# The LOFAR software suite is free software: you can redistribute it and/or
+//# modify it under the terms of the GNU General Public License as published
+//# by the Free Software Foundation, either version 3 of the License, or
+//# (at your option) any later version.
+//#
+//# The LOFAR software suite is distributed in the hope that it will be useful,
+//# but WITHOUT ANY WARRANTY; without even the implied warranty of
+//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//# GNU General Public License for more details.
+//#
+//# You should have received a copy of the GNU General Public License along
+//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+//#
+//# $Id$
+
+#include <lofar_config.h>
+
+#include <GPUProc/Flagger.h>
+#include <Common/LofarLogger.h>
+
+namespace LOFAR
+{
+  namespace Cobalt
+  {
+    void Flagger::convertFlagsToChannelFlags(
+      MultiDimArray<LOFAR::SparseSet<unsigned>, 1>const &inputFlags,
+      MultiDimArray<SparseSet<unsigned>, 1>& flagsPerChannel,
+      const unsigned nrSamplesPerChannel,
+      const unsigned nrChannels,
+      const ssize_t nrPrefixedSamples)
+    {
+      ASSERT(inputFlags.num_elements() == flagsPerChannel.num_elements());
+
+      // If nrChannels == 1, we do not expect nrPrefixedSamples
+      ASSERT(nrChannels > 1 || nrPrefixedSamples == 0);
+
+      unsigned log2NrChannels = log2(nrChannels);
+
+      // Convert the flags per sample to flags per channel
+      for (unsigned station = 0; station < inputFlags.num_elements(); station ++) 
+      {
+        // reset the channel flags for this station
+        flagsPerChannel[station].reset();
+
+        // get the flag ranges
+        const SparseSet<unsigned>::Ranges &ranges = inputFlags[station].getRanges();
+        for (SparseSet<unsigned>::const_iterator it = ranges.begin();
+          it != ranges.end(); it ++) 
+        {
+          unsigned begin_idx;
+          unsigned end_idx;
+          if (nrChannels == 1)
+          {
+            // do nothing, just take the ranges as supplied
+            begin_idx = it->begin; 
+            end_idx = std::min(nrSamplesPerChannel, it->end);
+          }
+          else
+          {
+            // Never flag before the start of the time range               
+            // use bitshift to divide to the number of channels. 
+            //
+            // In case of nrPrefixedSamples, there are FIR Filter
+            // samples in front of those who we split the flags for.
+            // In that case, nrPrefixedSamples == NR_TAPS - 1.
+            //
+            // NR_TAPS is the width of the filter: they are
+            // absorbed by the FIR and thus should be excluded
+            // from the original flag set.
+            //
+            // The original flag set can span up to
+            //    [0, nrSamplesPerBlock + nrChannels * (NR_TAPS - 1))
+            // of which the FIRST (NR_TAPS - 1) samples belong to
+            // the previous block, and are used to initialise the
+            // FIR filter. Every sample i of the current block is thus
+            // actually at index (i + nrChannels * (NR_TAPS - 1)),
+            // or, after converting to channels, at index (i' + NR_TAPS - 1).
+            //
+            // At the same time, every sample is affected by
+            // the NR_TAPS-1 samples before it. So, any flagged
+            // sample in the input flags NR_TAPS samples in
+            // the channel.
+            begin_idx = std::max(0L, 
+              (signed) (it->begin >> log2NrChannels) - nrPrefixedSamples);
+
+            // The min is needed, because flagging the last input
+            // samples would cause NR_TAPS subsequent samples to
+            // be flagged, which aren't necessarily part of this block.
+            end_idx = std::min(nrSamplesPerChannel, 
+              ((it->end - 1) >> log2NrChannels) + 1);
+          }
+
+          // Now copy the transformed ranges to the channelflags
+          flagsPerChannel[station].include(begin_idx, end_idx);
+        }
+      }
+    }
+  }
+}
diff --git a/RTCP/Cobalt/GPUProc/src/Flagger.h b/RTCP/Cobalt/GPUProc/src/Flagger.h
new file mode 100644
index 0000000000000000000000000000000000000000..b56b629bf423c4030b55243752f8b988096392d1
--- /dev/null
+++ b/RTCP/Cobalt/GPUProc/src/Flagger.h
@@ -0,0 +1,48 @@
+//# CorrelatorStep.h
+//# Copyright (C) 2012-2013  ASTRON (Netherlands Institute for Radio Astronomy)
+//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+//#
+//# This file is part of the LOFAR software suite.
+//# The LOFAR software suite is free software: you can redistribute it and/or
+//# modify it under the terms of the GNU General Public License as published
+//# by the Free Software Foundation, either version 3 of the License, or
+//# (at your option) any later version.
+//#
+//# The LOFAR software suite is distributed in the hope that it will be useful,
+//# but WITHOUT ANY WARRANTY; without even the implied warranty of
+//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//# GNU General Public License for more details.
+//#
+//# You should have received a copy of the GNU General Public License along
+//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+//#
+//# $Id$
+
+#ifndef LOFAR_GPUPROC_FLAGGER_H
+#define LOFAR_GPUPROC_FLAGGER_H
+
+#include <CoInterface/MultiDimArray.h>
+#include <CoInterface/SparseSet.h>
+
+namespace LOFAR
+{
+  namespace Cobalt
+  {
+    // Collection of functions to tranfer the input flags to the output.
+    class Flagger
+    {
+    public:
+      // Convert the flags from one channel to multiple channels, per station.
+      // If nrChannels > 1, nrPrefixedSamples are assumed to be already
+      // prepended to the input flags as a result of the FIR-filter history.
+      static void convertFlagsToChannelFlags(
+        MultiDimArray<SparseSet<unsigned>, 1>const &inputFlags,
+        MultiDimArray<SparseSet<unsigned>, 1>& flagsPerChannel,
+        const unsigned nrSamplesPerChannel,
+        const unsigned nrChannels,
+        const ssize_t nrPrefixedSamples);
+    };
+  }
+}
+
+#endif
diff --git a/RTCP/Cobalt/GPUProc/src/Kernels/ZeroingKernel.h b/RTCP/Cobalt/GPUProc/src/Kernels/ZeroingKernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..b22aa3ae5535e4c7c857305b9bfacbeadf6aafbf
--- /dev/null
+++ b/RTCP/Cobalt/GPUProc/src/Kernels/ZeroingKernel.h
@@ -0,0 +1,41 @@
+//# ZeroingKernel.h
+//#
+//# Copyright (C) 2013  ASTRON (Netherlands Institute for Radio Astronomy)
+//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+//#
+//# This file is part of the LOFAR software suite.
+//# The LOFAR software suite is free software: you can redistribute it and/or
+//# modify it under the terms of the GNU General Public License as published
+//# by the Free Software Foundation, either version 3 of the License, or
+//# (at your option) any later version.
+//#
+//# The LOFAR software suite is distributed in the hope that it will be useful,
+//# but WITHOUT ANY WARRANTY; without even the implied warranty of
+//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//# GNU General Public License for more details.
+//#
+//# You should have received a copy of the GNU General Public License along
+//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+//#
+//# $Id$
+
+// \file
+// Include the right GPU API include with our options.
+
+#ifndef LOFAR_GPUPROC_ZEROING_KERNEL_H
+#define LOFAR_GPUPROC_ZEROING_KERNEL_H
+
+#if defined (USE_CUDA) && defined (USE_OPENCL)
+# error "Either CUDA or OpenCL must be enabled, not both"
+#endif
+
+#if defined (USE_CUDA)
+# include <GPUProc/cuda/Kernels/ZeroingKernel.h>
+#elif defined (USE_OPENCL)
+# include <GPUProc/opencl/Kernels/ZeroingKernel.h>
+#else
+# error "Either CUDA or OpenCL must be enabled, not neither"
+#endif
+
+#endif
+
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BandPassCorrectionKernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BandPassCorrectionKernel.cc
index 77ed74bc8fcb9a7026733ce61f40813b0933dd8e..32bf4da8a3350d84144d21954e43871b399dec2d 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BandPassCorrectionKernel.cc
+++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BandPassCorrectionKernel.cc
@@ -49,7 +49,7 @@ namespace LOFAR
       nrStations(ps.settings.beamFormer.antennaFieldNames.size()),
 
       nrDelayCompensationChannels(ps.settings.beamFormer.nrDelayCompensationChannels),
-      nrHighResolutionChannels(ps.settings.beamFormer.nrHighResolutionChannels),
+      nrHighResolutionChannels(ps.settings.beamFormer.nrDelayCompensationChannels),
       nrSamplesPerChannel(ps.settings.blockSize / nrHighResolutionChannels),
 
       correctBandPass(ps.settings.corrections.bandPass)
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerKernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerKernel.cc
index 410fc4171d95d43b21223aed886386b9783f2e52..ef83105acd97cc22dac1070ddc017ea776869fdd 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerKernel.cc
+++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerKernel.cc
@@ -52,7 +52,7 @@ namespace LOFAR
       delayIndices(ObservationSettings::AntennaFieldName::indices(ps.settings.beamFormer.antennaFieldNames, ps.settings.antennaFieldNames)),
       nrDelays(ps.settings.antennaFieldNames.size()),
 
-      nrChannels(ps.settings.beamFormer.nrHighResolutionChannels),
+      nrChannels(ps.settings.beamFormer.nrDelayCompensationChannels),
       nrSamplesPerChannel(ps.settings.blockSize / nrChannels),
 
       nrSAPs(ps.settings.beamFormer.SAPs.size()),
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerTransposeKernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerTransposeKernel.cc
index fff5f02513c288ab0579cb035949dea99c498c68..994c4eff04219de9c230ea906ff6d37367d6f20a 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerTransposeKernel.cc
+++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerTransposeKernel.cc
@@ -46,7 +46,7 @@ namespace LOFAR
 
     BeamFormerTransposeKernel::Parameters::Parameters(const Parset& ps) :
       Kernel::Parameters("beamFormerTranspose"),
-      nrChannels(ps.settings.beamFormer.nrHighResolutionChannels),
+      nrChannels(ps.settings.beamFormer.nrDelayCompensationChannels),
       nrSamplesPerChannel(ps.settings.blockSize / nrChannels),
       nrTABs(ps.settings.beamFormer.maxNrCoherentTABsPerSAP())
     {
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CoherentStokesTransposeKernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CoherentStokesTransposeKernel.cc
index fa5b80092f7ae07c297983ac29d5a2f0037c2e53..c06839adbf82e7560cf2366d864eaa72ddafece4 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CoherentStokesTransposeKernel.cc
+++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CoherentStokesTransposeKernel.cc
@@ -46,7 +46,7 @@ namespace LOFAR
 
     CoherentStokesTransposeKernel::Parameters::Parameters(const Parset& ps) :
       Kernel::Parameters("coherentStokesTranspose"),
-      nrChannels(ps.settings.beamFormer.nrHighResolutionChannels),
+      nrChannels(ps.settings.beamFormer.nrDelayCompensationChannels),
       nrSamplesPerChannel(ps.settings.blockSize / nrChannels),
 
       nrTABs(ps.settings.beamFormer.maxNrCoherentTABsPerSAP())
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/IncoherentStokesTransposeKernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/IncoherentStokesTransposeKernel.cc
index 184cb207f90c8f624a7322692bda2cb3ea3deeb8..7ab683197f763edf7f3822d66291be86d4de8223 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/IncoherentStokesTransposeKernel.cc
+++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/IncoherentStokesTransposeKernel.cc
@@ -44,7 +44,7 @@ namespace LOFAR
     IncoherentStokesTransposeKernel::Parameters::Parameters(const Parset& ps) :
       Kernel::Parameters("incoherentStokesTranspose"),
       nrStations(ps.settings.beamFormer.antennaFieldNames.size()),
-      nrChannels(ps.settings.beamFormer.nrHighResolutionChannels),
+      nrChannels(ps.settings.beamFormer.nrDelayCompensationChannels),
       nrSamplesPerChannel(ps.settings.blockSize / nrChannels),
 
       tileSize(16)
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/ZeroingKernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/ZeroingKernel.cc
new file mode 100644
index 0000000000000000000000000000000000000000..44f0488cef94f64203cf72c6c4012223f029a8cd
--- /dev/null
+++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/ZeroingKernel.cc
@@ -0,0 +1,139 @@
+//# ZeroingKernel.cc
+//# Copyright (C) 2012-2013  ASTRON (Netherlands Institute for Radio Astronomy)
+//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+//#
+//# This file is part of the LOFAR software suite.
+//# The LOFAR software suite is free software: you can redistribute it and/or
+//# modify it under the terms of the GNU General Public License as published
+//# by the Free Software Foundation, either version 3 of the License, or
+//# (at your option) any later version.
+//#
+//# The LOFAR software suite is distributed in the hope that it will be useful,
+//# but WITHOUT ANY WARRANTY; without even the implied warranty of
+//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//# GNU General Public License for more details.
+//#
+//# You should have received a copy of the GNU General Public License along
+//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+//#
+//# $Id$
+
+#include <lofar_config.h>
+
+#include "ZeroingKernel.h"
+
+#include <GPUProc/gpu_utils.h>
+#include <CoInterface/BlockID.h>
+#include <CoInterface/Config.h>
+#include <CoInterface/SubbandMetaData.h>
+#include <Common/lofar_complex.h>
+
+#include <boost/lexical_cast.hpp>
+#include <boost/format.hpp>
+
+#include <fstream>
+
+using boost::lexical_cast;
+using boost::format;
+
+namespace LOFAR
+{
+  namespace Cobalt
+  {
+    string ZeroingKernel::theirSourceFile = "Zeroing.cu";
+    string ZeroingKernel::theirFunction = "Zeroing";
+
+    ZeroingKernel::Parameters::Parameters(const Parset& ps, unsigned nrSTABs, unsigned nrChannels, const std::string &name):
+      Kernel::Parameters(name),
+      nrSTABs(nrSTABs),
+
+      nrChannels(nrChannels),
+      nrSamplesPerChannel(ps.settings.blockSize / nrChannels)
+    {
+      dumpBuffers = 
+        ps.getBool("Cobalt.Kernels.ZeroingKernel.dumpOutput", false);
+      dumpFilePattern = 
+        str(format("L%d_SB%%03d_BL%%03d_ZeroingKernel.dat") % 
+            ps.settings.observationID);
+    }
+
+
+    size_t ZeroingKernel::Parameters::bufferSize(BufferType bufferType) const
+    {
+      switch (bufferType) {
+      case ZeroingKernel::INPUT_DATA:
+      case ZeroingKernel::OUTPUT_DATA: // fall thru
+        return (size_t)nrSTABs * NR_POLARIZATIONS *
+          nrChannels * nrSamplesPerChannel *
+          sizeof(std::complex<float>);
+
+      case ZeroingKernel::RANGES:
+        return (size_t)nrSTABs * SubbandMetaData::MAXFLAGSIZE *
+          sizeof(unsigned);
+          
+      default:
+        THROW(GPUProcException, "Invalid bufferType (" << bufferType << ")");
+      }
+    }
+
+    ZeroingKernel::ZeroingKernel(const gpu::Stream& stream,
+                                   const gpu::Module& module,
+                                   const Buffers& buffers,
+                                   const Parameters& params) :
+      CompiledKernel(stream, gpu::Function(module, theirFunction), buffers, params),
+      nrSTABs(params.nrSTABs),
+      gpuRanges(stream.getContext(), params.bufferSize(RANGES)),
+      hostRanges(stream.getContext(), params.bufferSize(RANGES))
+    {
+      setArg(0, buffers.input);
+      setArg(1, gpuRanges);
+      
+      // Number of samples per channel must be even
+      ASSERT(params.nrSamplesPerChannel % 2 == 0);
+
+      setEnqueueWorkSizes(
+        gpu::Grid(params.nrSamplesPerChannel, NR_POLARIZATIONS * params.nrChannels, params.nrSTABs),
+        gpu::Block(256 / params.nrChannels, NR_POLARIZATIONS * params.nrChannels, 1));
+    }
+
+
+    void ZeroingKernel::enqueue(const BlockID &blockId, const MultiDimArray<SparseSet<unsigned>, 1> &channelFlags)
+    {
+      // marshall flags to GPU host buffer
+      const ptrdiff_t rangeSize = SubbandMetaData::MAXFLAGSIZE;
+      for(unsigned station = 0; station < nrSTABs; ++station) {
+        if (channelFlags[station].marshall(hostRanges.get<char>() + station * rangeSize, rangeSize) < 0) {
+          LOG_ERROR_STR("ZeroingKernel: Received more flags than can be marshalled to GPU");
+
+          // skip this set, by setting the nrRanges to 0
+          *(reinterpret_cast<unsigned*>(hostRanges.get<char>() + station * rangeSize)) = 0;
+        }
+      }
+
+      // Copy host buffer to GPU
+      itsStream.writeBuffer(gpuRanges, hostRanges, false);
+
+      Kernel::enqueue(blockId);
+    }
+
+    //--------  Template specializations for KernelFactory  --------//
+
+    template<> CompileDefinitions
+      KernelFactory<ZeroingKernel>::compileDefinitions() const
+    {
+      CompileDefinitions defs =
+        KernelFactoryBase::compileDefinitions(itsParameters);
+
+      defs["NR_STABS"] = lexical_cast<string>(itsParameters.nrSTABs);
+      defs["NR_CHANNELS"] = lexical_cast<string>(itsParameters.nrChannels);
+      defs["NR_SAMPLES_PER_CHANNEL"] = 
+        lexical_cast<string>(itsParameters.nrSamplesPerChannel);
+
+      defs["MAX_NR_RANGES"] =
+        lexical_cast<string>(SubbandMetaData::MAXNRFLAGRANGES);
+
+      return defs;
+    }
+
+  }
+}
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/ZeroingKernel.h b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/ZeroingKernel.h
new file mode 100644
index 0000000000000000000000000000000000000000..8b84f58de515bf6c44381e0a62bfec3a11afff32
--- /dev/null
+++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/ZeroingKernel.h
@@ -0,0 +1,92 @@
+//# ZeroingKernel.h
+//# Copyright (C) 2012-2013  ASTRON (Netherlands Institute for Radio Astronomy)
+//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+//#
+//# This file is part of the LOFAR software suite.
+//# The LOFAR software suite is free software: you can redistribute it and/or
+//# modify it under the terms of the GNU General Public License as published
+//# by the Free Software Foundation, either version 3 of the License, or
+//# (at your option) any later version.
+//#
+//# The LOFAR software suite is distributed in the hope that it will be useful,
+//# but WITHOUT ANY WARRANTY; without even the implied warranty of
+//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//# GNU General Public License for more details.
+//#
+//# You should have received a copy of the GNU General Public License along
+//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+//#
+//# $Id$
+
+#ifndef LOFAR_GPUPROC_CUDA_ZEROING_KERNEL_H
+#define LOFAR_GPUPROC_CUDA_ZEROING_KERNEL_H
+
+#include <CoInterface/Parset.h>
+#include <CoInterface/MultiDimArray.h>
+#include <CoInterface/SparseSet.h>
+
+#include <GPUProc/Kernels/Kernel.h>
+#include <GPUProc/KernelFactory.h>
+#include <GPUProc/gpu_wrapper.h>
+
+namespace LOFAR
+{
+  namespace Cobalt
+  {
+    class ZeroingKernel : public CompiledKernel
+    {
+    public:
+      static std::string theirSourceFile;
+      static std::string theirFunction;
+
+      enum BufferType
+      {
+        INPUT_DATA,
+        OUTPUT_DATA,
+        RANGES
+      };
+
+      // Parameters that must be passed to the constructor of the
+      // IntToFloatKernel class.
+      struct Parameters : Kernel::Parameters
+      {
+        Parameters(const Parset& ps, unsigned nrSTABs, unsigned nrChannels, const std::string &name = "Zeroing");
+        unsigned nrSTABs;
+
+        unsigned nrChannels;
+        unsigned nrSamplesPerChannel;
+
+        size_t bufferSize(BufferType bufferType) const;
+      };
+
+      // Construct a Zeroing kernel.
+      // \pre The number of samples per channel must be even.
+      // \pre The product of the number of stations, the number of
+      // polarizations, the number of channels per subband, and the number of
+      // samples per channel must be divisible by the maximum number of threads
+      // per block (typically 1024).
+      ZeroingKernel(const gpu::Stream &stream,
+                     const gpu::Module &module,
+                     const Buffers &buffers,
+                     const Parameters &param);
+
+      void enqueue(const BlockID &blockId, const MultiDimArray<SparseSet<unsigned>, 1> &channelFlags);
+
+    private:
+      const unsigned nrSTABs;
+
+      // The ranges of samples to clear
+      gpu::DeviceMemory gpuRanges;
+      gpu::HostMemory hostRanges;
+    };
+
+    //# --------  Template specializations for KernelFactory  -------- #//
+
+    template<> CompileDefinitions
+    KernelFactory<ZeroingKernel>::compileDefinitions() const;
+  }
+
+}
+
+#endif
+
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.cc
index 9b02246211f9db52cb69e6d7d6185f26e4942703..237409c371f26bd2616288855599083a4f051f34 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.cc
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.cc
@@ -46,13 +46,13 @@ namespace LOFAR
       coherentTranspose(CoherentStokesTransposeKernel::Parameters(ps)),
 
       coherentInverseFFT(FFT_Kernel::Parameters(
-        ps.settings.beamFormer.nrHighResolutionChannels,
+        ps.settings.beamFormer.nrDelayCompensationChannels,
         ps.settings.beamFormer.maxNrCoherentTABsPerSAP() * NR_POLARIZATIONS * ps.settings.blockSize,
         false,
         "FFT (coherent, inverse)")),
       coherentInverseFFTShift(FFTShiftKernel::Parameters(ps,
         ps.settings.beamFormer.maxNrCoherentTABsPerSAP(),
-        ps.settings.beamFormer.nrHighResolutionChannels,
+        ps.settings.beamFormer.nrDelayCompensationChannels,
         "FFT-shift (coherent, inverse)")),
 
       coherentFirFilter(
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerIncoherentStep.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerIncoherentStep.cc
index 3cd289e16eeb703d751148a6976a713a718ef50c..f370ffe5833d6e8904a5bd07a16441bf34961ebf 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerIncoherentStep.cc
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerIncoherentStep.cc
@@ -45,12 +45,12 @@ namespace LOFAR
       incoherentStokesTranspose(IncoherentStokesTransposeKernel::Parameters(ps)),
 
       incoherentInverseFFT(FFT_Kernel::Parameters(
-        ps.settings.beamFormer.nrHighResolutionChannels,
+        ps.settings.beamFormer.nrDelayCompensationChannels,
         ps.settings.beamFormer.antennaFieldNames.size() * NR_POLARIZATIONS * ps.settings.blockSize, false,
         "FFT (incoherent, inverse)")),
       incoherentInverseFFTShift(FFTShiftKernel::Parameters(ps,
         ps.settings.beamFormer.antennaFieldNames.size(),
-        ps.settings.beamFormer.nrHighResolutionChannels,
+        ps.settings.beamFormer.nrDelayCompensationChannels,
         "FFT-shift (incoherent, inverse)")),
 
       incoherentFirFilter(
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.cc
index b2f4ca265050ecb272c54a4a8886d65402e3a399..11bf4c5d1a5e79115c1812c8a5c3e299a1bf159f 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.cc
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.cc
@@ -24,6 +24,7 @@
 
 #include <GPUProc/global_defines.h>
 #include <GPUProc/gpu_wrapper.h>
+#include <GPUProc/Flagger.h>
 
 #include <CoInterface/Parset.h>
 #include <ApplCommon/PosixTime.h>
@@ -48,14 +49,11 @@ namespace LOFAR
           ps.settings.beamFormer.nrDelayCompensationChannels,
           "FFT-shift (beamformer)")),
 
-        delayCompensation(DelayAndBandPassKernel::Parameters(ps, false)),
+        zeroing(ZeroingKernel::Parameters(ps,
+          ps.settings.antennaFields.size(),
+          ps.settings.beamFormer.nrDelayCompensationChannels)),
 
-        secondFFT(FFT_Kernel::Parameters(
-          ps.settings.beamFormer.nrHighResolutionChannels /
-          ps.settings.beamFormer.nrDelayCompensationChannels,
-          ps.settings.beamFormer.antennaFieldNames.size() * NR_POLARIZATIONS * ps.settings.blockSize,
-          true,
-          "FFT (beamformer, 2nd)")),
+        delayCompensation(DelayAndBandPassKernel::Parameters(ps, false)),
 
         bandPassCorrection(BandPassCorrectionKernel::Parameters(ps))
     {
@@ -69,16 +67,13 @@ namespace LOFAR
       boost::shared_ptr<gpu::DeviceMemory> i_devA,
       boost::shared_ptr<gpu::DeviceMemory> i_devB)
       :
-      ProcessStep(parset, i_queue)
+      ProcessStep(parset, i_queue),
+      flagsPerChannel(boost::extents[parset.settings.antennaFields.size()])
     {
       devA=i_devA;
       devB=i_devB;
       (void)context;
 
-      doSecondFFT = 
-        (ps.settings.beamFormer.nrHighResolutionChannels /
-         ps.settings.beamFormer.nrDelayCompensationChannels) > 1;
-
       // intToFloat + FFTShift: A -> B
       intToFloatKernel = std::auto_ptr<IntToFloatKernel>(
         factories.intToFloat.create(queue, *devA, *devB));
@@ -87,26 +82,17 @@ namespace LOFAR
       firstFFT = std::auto_ptr<FFT_Kernel>(
         factories.firstFFT.create(queue, *devB, *devB));
 
+      // zeroing: B -> B
+      zeroingKernel = std::auto_ptr<ZeroingKernel>(
+        factories.zeroing.create(queue, *devB, *devB));
+
       // delayComp: B -> A
       delayCompensationKernel = std::auto_ptr<DelayAndBandPassKernel>(
         factories.delayCompensation.create(queue, *devB, *devA));
 
-      // Only perform second FFTshift and FFT if we have to.
-      if (doSecondFFT) {
-
-        // FFTShift: A -> A
-        secondFFTShiftKernel = std::auto_ptr<FFTShiftKernel>(
-          factories.fftShift.create(queue, *devA, *devA));
-
-        // FFT: A -> A
-        secondFFT = std::auto_ptr<FFT_Kernel>(
-          factories.secondFFT.create(queue, *devA, *devA));
-      }
-
       // bandPass: A -> B
       bandPassCorrectionKernel = std::auto_ptr<BandPassCorrectionKernel>(
         factories.bandPassCorrection.create(queue, *devA, *devB));
-
     }
 
     void BeamFormerPreprocessingStep::writeInput(const SubbandProcInputData &input)
@@ -133,18 +119,24 @@ namespace LOFAR
 
       firstFFT->enqueue(input.blockID);
 
+      // Convert input flags to channel flags
+      Flagger::convertFlagsToChannelFlags(
+        input.inputFlags,
+        flagsPerChannel,
+        ps.settings.blockSize / ps.settings.beamFormer.nrDelayCompensationChannels,
+        ps.settings.beamFormer.nrDelayCompensationChannels,
+        0);
+
+      zeroingKernel->enqueue(
+        input.blockID,
+        flagsPerChannel);
+
       // The centralFrequency and SAP immediate kernel args must outlive kernel runs.
       delayCompensationKernel->enqueue(
         input.blockID,
         ps.settings.subbands[input.blockID.globalSubbandIdx].centralFrequency,
         ps.settings.subbands[input.blockID.globalSubbandIdx].SAP);
 
-      if (doSecondFFT) {
-        secondFFTShiftKernel->enqueue(input.blockID);
-
-        secondFFT->enqueue(input.blockID);
-      }
-
       bandPassCorrectionKernel->enqueue(
         input.blockID);
     }
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.h b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.h
index 3492a46b5f98e1a960365ab70403959a61a794bd..e9b46d3bc3c269e7356eb167b37dc5b542ae0839 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.h
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.h
@@ -31,6 +31,8 @@
 
 #include <GPUProc/MultiDimArrayHostBuffer.h>
 #include <CoInterface/BlockID.h>
+#include <CoInterface/SparseSet.h>
+#include <CoInterface/MultiDimArray.h>
 
 #include "SubbandProcInputData.h"
 #include "SubbandProcOutputData.h"
@@ -42,7 +44,7 @@
 #include <GPUProc/Kernels/FFTShiftKernel.h>
 #include <GPUProc/Kernels/FFT_Kernel.h>
 #include <GPUProc/Kernels/IntToFloatKernel.h>
-
+#include <GPUProc/Kernels/ZeroingKernel.h>
 
 namespace LOFAR
 {
@@ -59,9 +61,9 @@ namespace LOFAR
         KernelFactory<FFT_Kernel> firstFFT;
         KernelFactory<FFTShiftKernel> fftShift;
 
-        KernelFactory<DelayAndBandPassKernel> delayCompensation;
+        KernelFactory<ZeroingKernel> zeroing;
 
-        KernelFactory<FFT_Kernel> secondFFT;
+        KernelFactory<DelayAndBandPassKernel> delayCompensation;
 
         KernelFactory<BandPassCorrectionKernel> bandPassCorrection;
       };
@@ -78,6 +80,8 @@ namespace LOFAR
       void process(const SubbandProcInputData &input);
 
     private:
+      // Flags for FFT-ed data
+      MultiDimArray<SparseSet<unsigned>, 1> flagsPerChannel;
 
       //Data members
       boost::shared_ptr<gpu::DeviceMemory> devA;
@@ -89,20 +93,14 @@ namespace LOFAR
       // First (64 points) FFT
       std::auto_ptr<FFT_Kernel> firstFFT;
 
+      // Zeroing flagged samples
+      std::auto_ptr<ZeroingKernel> zeroingKernel;
+
       // Delay compensation
       std::auto_ptr<DelayAndBandPassKernel> delayCompensationKernel;
 
-      // Second FFT-shift
-      std::auto_ptr<FFTShiftKernel> secondFFTShiftKernel;
-
-      // Second (64 points) FFT
-      std::auto_ptr<FFT_Kernel> secondFFT;
-
       // Bandpass correction and tranpose
       std::auto_ptr<BandPassCorrectionKernel> bandPassCorrectionKernel;
-
-      // Flag that indicates if we need to perform a second FFT
-      bool doSecondFFT;
     };
   }
 }
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.cc
index 4807054c07e53cf932a32c452cda909550bbe233..b1ed83fe4b42817c68ce961e57bf593191e67d51 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.cc
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.cc
@@ -61,6 +61,12 @@ namespace LOFAR
             std::sqrt((double)ps.settings.correlator.nrChannels),
             "FIR (correlator)"))
           : NULL),
+      zeroing(ps.settings.correlator.nrChannels > 1
+          ? new KernelFactory<ZeroingKernel>(ZeroingKernel::Parameters(ps,
+            ps.settings.antennaFields.size(),
+            ps.settings.correlator.nrChannels,
+            "Zeroing (correlator)"))
+          : NULL),
 
       delayAndBandPass(DelayAndBandPassKernel::Parameters(ps, true)),
 
@@ -68,65 +74,6 @@ namespace LOFAR
     {
     }
 
-    void CorrelatorStep::Flagger::convertFlagsToChannelFlags(Parset const &ps,
-      MultiDimArray<LOFAR::SparseSet<unsigned>, 1>const &inputFlags,
-      MultiDimArray<SparseSet<unsigned>, 1>& flagsPerChannel)
-    {
-      unsigned numberOfChannels = ps.settings.correlator.nrChannels;
-      unsigned log2NrChannels = log2(numberOfChannels);
-      //Convert the flags per sample to flags per channel
-      for (unsigned station = 0; station < ps.settings.correlator.stations.size(); station ++) 
-      {
-        // get the flag ranges
-        const SparseSet<unsigned>::Ranges &ranges = inputFlags[station].getRanges();
-        for (SparseSet<unsigned>::const_iterator it = ranges.begin();
-          it != ranges.end(); it ++) 
-        {
-          unsigned begin_idx;
-          unsigned end_idx;
-          if (numberOfChannels == 1)
-          {
-            // do nothing, just take the ranges as supplied
-            begin_idx = it->begin; 
-            end_idx = std::min(static_cast<unsigned>(ps.settings.correlator.nrSamplesPerBlock), it->end);
-          }
-          else
-          {
-            // Never flag before the start of the time range               
-            // use bitshift to divide to the number of channels. 
-            //
-            // NR_TAPS is the width of the filter: they are
-            // absorbed by the FIR and thus should be excluded
-            // from the original flag set.
-            //
-            // The original flag set can span up to
-            //    [0, nrSamplesPerBlock + nrChannels * (NR_TAPS - 1))
-            // of which the FIRST (NR_TAPS - 1) samples belong to
-            // the previous block, and are used to initialise the
-            // FIR filter. Every sample i of the current block is thus
-            // actually at index (i + nrChannels * (NR_TAPS - 1)),
-            // or, after converting to channels, at index (i' + NR_TAPS - 1).
-            //
-            // At the same time, every sample is affected by
-            // the NR_TAPS-1 samples before it. So, any flagged
-            // sample in the input flags NR_TAPS samples in
-            // the channel.
-            begin_idx = std::max(0, 
-              (signed) (it->begin >> log2NrChannels) - NR_TAPS + 1);
-
-            // The min is needed, because flagging the last input
-            // samples would cause NR_TAPS subsequent samples to
-            // be flagged, which aren't necessarily part of this block.
-            end_idx = std::min(static_cast<unsigned>(ps.settings.correlator.nrSamplesPerBlock), 
-              ((it->end - 1) >> log2NrChannels) + 1);
-          }
-
-          // Now copy the transformed ranges to the channelflags
-          flagsPerChannel[station].include(begin_idx, end_idx);
-        }
-      }
-    }
-
 
     void CorrelatorStep::Flagger::propagateFlags(
       Parset const &parset,
@@ -139,7 +86,12 @@ namespace LOFAR
 
       // First transform the flags to channel flags: taking in account 
       // reduced resolution in time and the size of the filter
-      convertFlagsToChannelFlags(parset, inputFlags, flagsPerChannel);
+      Cobalt::Flagger::convertFlagsToChannelFlags(
+        inputFlags,
+        flagsPerChannel,
+        parset.settings.correlator.nrSamplesPerBlock,
+        parset.settings.correlator.nrChannels,
+        parset.settings.correlator.nrChannels == 1 ? 0 : NR_TAPS - 1);
 
       // Calculate the number of flags per baseline and assign to
       // output object.
@@ -321,6 +273,9 @@ namespace LOFAR
 
         // FFT: B -> E
         fftKernel = factories.fft->create(queue, *devB, devE);
+
+        // Zeroing: E -> E
+        zeroingKernel = factories.zeroing->create(queue, devE, devE);
       }
 
       // Delay and Bandpass: A/E -> B
@@ -360,6 +315,24 @@ namespace LOFAR
         firFilterKernel->enqueue(input.blockID, 
                                  input.blockID.subbandProcSubbandIdx);
         fftKernel->enqueue(input.blockID);
+
+        // Process flags enough to determine which data to zero
+        MultiDimArray<LOFAR::SparseSet<unsigned>, 1> flags = input.inputFlags;
+        MultiDimArray<SparseSet<unsigned>, 1> flagsPerChannel(
+          boost::extents[ps.settings.antennaFields.size()]);
+
+        firFilterKernel->prefixHistoryFlags(
+          flags, input.blockID.subbandProcSubbandIdx);
+
+        Cobalt::Flagger::convertFlagsToChannelFlags(
+          flags,
+          flagsPerChannel,
+          ps.settings.blockSize,
+          ps.settings.correlator.nrChannels,
+          NR_TAPS - 1);
+
+        // Zero the output of each FFT that had flagged input samples
+        zeroingKernel->enqueue(input.blockID, flagsPerChannel);
       }
 
       // Even if we skip delay compensation and bandpass correction (rare), run
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.h b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.h
index 23be8fe062ee1eac057bfc370c65cfe849b529ab..048fb6f5fb5a114a812c8658b865ba76f4e85278 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.h
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.h
@@ -40,10 +40,12 @@
 #include "ProcessStep.h"
 
 #include <GPUProc/PerformanceCounter.h>
+#include <GPUProc/Flagger.h>
 #include <GPUProc/KernelFactory.h>
 #include <GPUProc/Kernels/DelayAndBandPassKernel.h>
 #include <GPUProc/Kernels/FIR_FilterKernel.h>
 #include <GPUProc/Kernels/FFT_Kernel.h>
+#include <GPUProc/Kernels/ZeroingKernel.h>
 #include <GPUProc/Kernels/CorrelatorKernel.h>
 
 
@@ -59,6 +61,7 @@ namespace LOFAR
 
         SmartPtr< KernelFactory<FFT_Kernel> > fft;
         SmartPtr< KernelFactory<FIR_FilterKernel> > firFilter;
+        SmartPtr< KernelFactory<ZeroingKernel> > zeroing;
 
         KernelFactory<DelayAndBandPassKernel> delayAndBandPass;
 
@@ -86,7 +89,7 @@ namespace LOFAR
       // \c propagateFlags can be called parallel to the kernels.
       // After the data is copied from the the shared buffer
       // \c applyNrValidSamples can be used to weight the visibilities
-      class Flagger
+      class Flagger: public Cobalt::Flagger
       {
       public:
         // 1. Convert input flags to channel flags, calculate the amount flagged
@@ -97,9 +100,7 @@ namespace LOFAR
 
         // 1.1 Convert the flags per station to channel flags, change time scale
         // if nchannel > 1
-        static void convertFlagsToChannelFlags(Parset const &ps,
-          MultiDimArray<SparseSet<unsigned>, 1> const &inputFlags,
-          MultiDimArray<SparseSet<unsigned>, 1> &flagsPerChannel);
+        // (Uses convertFlagsToChannelFlags)
 
         // 2. Calculate the weight based on the number of flags and apply this
         // weighting to all output values
@@ -147,6 +148,9 @@ namespace LOFAR
       // FFT
       SmartPtr<FFT_Kernel> fftKernel;
 
+      // Zeroing
+      SmartPtr<ZeroingKernel> zeroingKernel;
+
       // Delay and Bandpass
       std::auto_ptr<DelayAndBandPassKernel> delayAndBandPassKernel;
 
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcInputData.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcInputData.cc
index fa4e3c752050e3c59c80392b68b914ce385a1f03..4a01bdca66162ed159787dfa18903a2d01bb8bb4 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcInputData.cc
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcInputData.cc
@@ -77,7 +77,8 @@ namespace LOFAR
       // extract and apply the flags
       inputFlags[station] = metaData.flags;
 
-      flagInputSamples(station, metaData);
+      // NOTE: We do not zero flagged samples here anymore,
+      // as we are using the ZeroingKernel to do so.
 
       // extract and assign the delays for the station beams
 
@@ -115,29 +116,6 @@ namespace LOFAR
           tabDelays[SAP][station][tab] = 0.0;
       }
     }
-
-
-    // flag the input samples.
-    void SubbandProcInputData::flagInputSamples(unsigned station,
-                                              const SubbandMetaData& metaData)
-    {
-
-      // Get the size of a sample in bytes.
-      size_t sizeof_sample = sizeof *inputSamples.origin();
-
-      // Calculate the number elements to skip when striding over the second
-      // dimension of inputSamples.
-      size_t stride = inputSamples[station][0].num_elements();
-
-      // Zero the bytes in the input data for the flagged ranges.
-      for(SparseSet<unsigned>::const_iterator it = metaData.flags.getRanges().begin();
-        it != metaData.flags.getRanges().end(); ++it)
-      {
-        void *offset = inputSamples[station][it->begin].origin();
-        size_t size = stride * (it->end - it->begin) * sizeof_sample;
-        memset(offset, 0, size);
-      }
-    }
   }
 }
 
diff --git a/RTCP/Cobalt/GPUProc/test/Kernels/CMakeLists.txt b/RTCP/Cobalt/GPUProc/test/Kernels/CMakeLists.txt
index f26a7e0014098581dbf51ca0b889cdcdec031388..b57f2cf655f81ab8357305828dc74b88a7b0b568 100644
--- a/RTCP/Cobalt/GPUProc/test/Kernels/CMakeLists.txt
+++ b/RTCP/Cobalt/GPUProc/test/Kernels/CMakeLists.txt
@@ -32,6 +32,7 @@ if(UNITTEST++_FOUND AND BUILD_TESTING)
   lofar_add_test(tFFT_Kernel tFFT_Kernel.cc)
   lofar_add_test(tFFTShiftKernel tFFTShiftKernel.cc)
   lofar_add_test(tFIR_FilterKernel tFIR_FilterKernel.cc)
+  lofar_add_test(tZeroingKernel tZeroingKernel.cc)
   lofar_add_test(tKernelFunctions tKernelFunctions.cc)
   lofar_add_test(tCoherentStokesKernel
     tCoherentStokesKernel.cc KernelTestHelpers.cc)
@@ -44,6 +45,7 @@ if(UNITTEST++_FOUND AND BUILD_TESTING)
     tFFTShiftKernel
     tKernelFunctions
     tCoherentStokesKernel
+    tZeroingKernel
     PROPERTIES ENVIRONMENT "LOFARROOT=${PACKAGE_SOURCE_DIR}"
   )
   # This test uses quite a lot of memory, so force it to run serially.
diff --git a/RTCP/Cobalt/GPUProc/test/Kernels/tZeroingKernel.cc b/RTCP/Cobalt/GPUProc/test/Kernels/tZeroingKernel.cc
new file mode 100644
index 0000000000000000000000000000000000000000..625e0044e13f65b947fae1cd9ab74c96128c5347
--- /dev/null
+++ b/RTCP/Cobalt/GPUProc/test/Kernels/tZeroingKernel.cc
@@ -0,0 +1,277 @@
+//# tZeroingKernel.cc: test ZeroingKernel class
+//#
+//# Copyright (C) 2013  ASTRON (Netherlands Institute for Radio Astronomy)
+//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+//#
+//# This file is part of the LOFAR software suite.
+//# The LOFAR software suite is free software: you can redistribute it and/or
+//# modify it under the terms of the GNU General Public License as published
+//# by the Free Software Foundation, either version 3 of the License, or
+//# (at your option) any later version.
+//#
+//# The LOFAR software suite is distributed in the hope that it will be useful,
+//# but WITHOUT ANY WARRANTY; without even the implied warranty of
+//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//# GNU General Public License for more details.
+//#
+//# You should have received a copy of the GNU General Public License along
+//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+//#
+//# $Id$
+
+#include <lofar_config.h>
+
+
+#include <lofar_config.h>
+
+#include <GPUProc/Kernels/ZeroingKernel.h>
+#include <GPUProc/MultiDimArrayHostBuffer.h>
+#include <CoInterface/BlockID.h>
+#include <CoInterface/SubbandMetaData.h>
+#include <CoInterface/Config.h>
+#include <CoInterface/Parset.h>
+#include <Common/LofarLogger.h>
+
+#include <UnitTest++.h>
+#include <boost/format.hpp>
+#include <boost/lexical_cast.hpp>
+#include <boost/scoped_ptr.hpp>
+#include <iostream>
+#include <iomanip>
+#include <vector>
+
+using namespace std;
+using namespace boost;
+using namespace LOFAR;
+using namespace LOFAR::Cobalt;
+
+typedef complex<float> fcomplex;
+
+// Fixture for testing correct translation of parset values
+struct ParsetSUT
+{
+  size_t  nrChannels, nrStations, nrSamplesSubband, nrBlockSize;
+
+  Parset parset;
+
+  ParsetSUT(size_t nrChannels, size_t nrStations,
+            size_t nrSamplesChannel)
+  :
+    nrChannels(nrChannels),
+    nrStations(nrStations),
+    nrSamplesSubband(nrSamplesChannel * nrChannels),
+    nrBlockSize(nrSamplesSubband)
+  {
+    // 4 for number of stokes
+    parset.add("Observation.DataProducts.Output_Correlated.enabled", "true");
+    parset.add("Cobalt.Correlator.nrChannelsPerSubband", lexical_cast<string>(nrChannels));
+    parset.add("Observation.VirtualInstrument.stationList",
+      str(format("[%d*RS000]") % nrStations));
+    parset.add("Observation.antennaSet", "LBA_INNER");
+    parset.add("Observation.rspBoardList", "[0]");
+    parset.add("Observation.rspSlotList", "[0]");
+    parset.add("Cobalt.blockSize",
+      lexical_cast<string>(nrBlockSize));
+    parset.add("Observation.nrBeams", "1");
+    parset.add("Observation.Beam[0].subbandList", "[0]");
+    parset.add("Observation.DataProducts.Output_Correlated.filenames", "[dummy.raw]");
+    parset.add("Observation.DataProducts.Output_Correlated.locations",  "[:.]");
+    //parset.add(""); //ps.settings.beamFormer.nrDelayCompensationChannels
+    parset.updateSettings();
+
+  }
+};
+
+
+struct SUTWrapper : ParsetSUT
+{
+  gpu::Device device;
+  gpu::Context context;
+  gpu::Stream stream;
+  size_t nrSTABs;
+  KernelFactory<ZeroingKernel> factory;
+  MultiDimArrayHostBuffer<fcomplex, 4> hData;
+  MultiDimArrayHostBuffer<fcomplex, 4> hRefOutput;
+  gpu::DeviceMemory deviceMemory;
+  scoped_ptr<ZeroingKernel> kernel;
+
+  SUTWrapper(size_t nrChannels , size_t nrStations, size_t nrSamplesPerChannel) :
+    ParsetSUT(nrChannels, nrStations, nrSamplesPerChannel),
+    device(gpu::Platform().devices()[0]),
+    context(device),
+    stream(context),
+    nrSTABs(parset.settings.antennaFields.size()),
+    factory(ZeroingKernel::Parameters(parset, nrStations, nrChannels)),
+    hData(
+      boost::extents[nrStations][NR_POLARIZATIONS][nrSamplesPerChannel][nrChannels],
+      context),
+    hRefOutput(
+      boost::extents[nrStations][NR_POLARIZATIONS][nrSamplesPerChannel][nrChannels],
+      context),
+    deviceMemory(context, factory.bufferSize(ZeroingKernel::INPUT_DATA)),
+    kernel(factory.create(stream, deviceMemory, deviceMemory))
+  {
+    initializeHostBuffers();
+  }
+
+
+  // Initialize all the elements of the input host buffer to zero, and all
+  // elements of the output host buffer to NaN.
+  void initializeHostBuffers()
+  {
+    cout << "Kernel buffersize set to: " << factory.bufferSize(
+              ZeroingKernel::INPUT_DATA) << endl;
+    cout << "\nInitializing host buffers..." << endl
+      << " buffers.input.size()  = " << setw(7) << deviceMemory.size() << endl
+      << " hData.size()  = " << setw(7) << hData.size() << endl
+      << " buffers.output.size() = " << setw(7) << deviceMemory.size() 
+      << endl;
+    CHECK_EQUAL(deviceMemory.size(), hData.size());
+    fill(hData.data(), hData.data() + hData.num_elements(),
+             fcomplex(1.0f, 2.0f));
+    fill(hRefOutput.data(), hRefOutput.data() + hRefOutput.num_elements(),
+             fcomplex(1.0f, 2.0f));
+  }
+
+  void runKernel(const MultiDimArray<SparseSet<unsigned>, 1> &channelFlags)
+  {
+    // Dummy BlockID
+    BlockID blockId;
+    // Copy input data from host- to device buffer synchronously
+    stream.writeBuffer(deviceMemory, hData, true);
+    // Launch the kernel
+    kernel->enqueue(blockId, channelFlags);
+    // Copy output data from device- to host buffer synchronously
+    stream.readBuffer(hData, deviceMemory, true);
+  }
+
+};
+
+// Test if we can succesfully create all necessary classes and run the kernel
+TEST(BasicRun)
+{
+  cout << "running test: BasicRun" << endl;
+  SUTWrapper sut(1, 2, 4096);
+  MultiDimArray<SparseSet<unsigned>, 1> channelFlags(boost::extents[sut.nrSTABs]);
+
+  sut.runKernel(channelFlags);
+}
+
+// If we flag nothing, nothing should change
+TEST(NothingFlaggedTest)
+{
+  cout << "running test: NothingFlaggedTest" << endl;
+
+  SUTWrapper sut(64, 5, 1024);
+  MultiDimArray<SparseSet<unsigned>, 1> channelFlags(boost::extents[sut.nrSTABs]);
+
+  // run kernel
+  cout << "Running kernel" << endl;
+  sut.runKernel(channelFlags);
+
+  // compare output
+  cout << "Comparing output" << endl;
+  CHECK_ARRAY_EQUAL(sut.hRefOutput.data(),
+    sut.hData.data(),
+    sut.hData.num_elements());
+  cout << "Comparing output: done" << endl;
+}
+
+// If we flag one sample, it should zero in the output
+TEST(SingleFlagTest)
+{
+  cout << "running test: SingleFlagTest" << endl;
+
+  SUTWrapper sut(64, 1, 1024);
+  MultiDimArray<SparseSet<unsigned>, 1> channelFlags(boost::extents[sut.nrSTABs]);
+
+  // flag a sample
+  cout << "Flagging 1 sample" << endl;
+  channelFlags[0].include(13);
+
+  // also zero reference output
+  for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol++)
+    for (unsigned c = 0; c < sut.nrChannels; c++)
+      sut.hRefOutput[0][pol][13][c] = 0.0f;
+
+  // run kernel
+  cout << "Running kernel" << endl;
+  sut.runKernel(channelFlags);
+
+  // compare output
+  cout << "Comparing output" << endl;
+  CHECK_ARRAY_EQUAL(sut.hRefOutput.data(),
+    sut.hData.data(),
+    sut.hData.num_elements());
+  cout << "Comparing output: done" << endl;
+}
+
+// Flag patterns of input and check if the kernel zeroes the correct samples
+TEST(PatternsTest)
+{
+  cout << "running test: PatternsTest" << endl;
+
+  //size_t nrStations[] = { 12, 53, 66, 77, 80 };
+  size_t nrStations[] = { 80 };
+
+  size_t nrSamples = 32768; // For performance tests, use 196608, and disable output verification
+  nrSamples = 196608;
+  size_t nrChannels[] = { 1, 16, 64, 256 };
+  size_t maxNrRanges = SubbandMetaData::MAXNRFLAGRANGES;
+
+  for (unsigned st = 0; st < sizeof(nrStations)/sizeof(nrStations[0]); ++st)
+  for (unsigned ch = 0; ch < sizeof(nrChannels)/sizeof(nrChannels[0]); ++ch)
+  {
+    cout << "*******testing stations: " << nrStations[st]
+      << " channels: " << nrChannels[ch] << endl;
+
+    SUTWrapper sut(nrChannels[ch], nrStations[st], nrSamples / nrChannels[ch]);
+    MultiDimArray<SparseSet<unsigned>, 1> channelFlags(boost::extents[sut.nrSTABs]);
+
+    // flag samples (with different patterns per station)
+    cout << "Flagging samples" << endl;
+    for (unsigned st_z = 0; st_z < nrStations[st]; st_z++)
+    for (unsigned sample_z = st_z; sample_z < nrSamples/nrChannels[ch]; sample_z += st_z + 1) {
+      if (channelFlags[st_z].count() >= maxNrRanges)
+        continue;
+
+      channelFlags[st_z].include(sample_z);
+
+      // also zero reference output
+      for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol++)
+        for (unsigned c = 0; c < sut.nrChannels; c++)
+          sut.hRefOutput[st_z][pol][sample_z][c] = 0.0f;
+    }
+
+    // run kernel
+    cout << "Running kernel" << endl;
+    sut.runKernel(channelFlags);
+
+    // compare output
+    cout << "Comparing output" << endl;
+    /*
+    CHECK_ARRAY_EQUAL(sut.hRefOutput.data(),
+      sut.hData.data(),
+      sut.hData.num_elements());
+    */
+    cout << "Comparing output: done" << endl;
+
+  }
+}
+
+
+int main()
+{
+  INIT_LOGGER("tZeroingKernel");
+
+  try {
+    gpu::Platform pf;
+  }
+  catch (gpu::GPUException&) {
+    cerr << "No GPU device(s) found. Skipping tests." << endl;
+    return 3;
+  }
+  return UnitTest::RunAllTests() == 0 ? 0 : 1;  
+
+}
+
diff --git a/RTCP/Cobalt/GPUProc/test/Kernels/tZeroingKernel.sh b/RTCP/Cobalt/GPUProc/test/Kernels/tZeroingKernel.sh
new file mode 100755
index 0000000000000000000000000000000000000000..1b06659b6bad332519888659b0a76bffb26a9a7b
--- /dev/null
+++ b/RTCP/Cobalt/GPUProc/test/Kernels/tZeroingKernel.sh
@@ -0,0 +1,3 @@
+#!/bin/bash
+./runctest.sh tZeroingKernel
+
diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tBeamFormerSubbandProcProcessSb.cc b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tBeamFormerSubbandProcProcessSb.cc
index 51bafeb59c04f534db6cc0c8d4193602493c4f8c..b3df8475e33705d1a39f405574af16ae8f47897c 100644
--- a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tBeamFormerSubbandProcProcessSb.cc
+++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tBeamFormerSubbandProcProcessSb.cc
@@ -86,8 +86,7 @@ int main() {
   const size_t nrBitsPerSample = ps.settings.nrBitsPerSample;
   const size_t nrBytesPerComplexSample = ps.nrBytesPerComplexSample();
 
-  const unsigned fft1Size = ps.settings.beamFormer.nrDelayCompensationChannels;
-  const unsigned fft2Size = ps.settings.beamFormer.nrHighResolutionChannels / fft1Size;
+  const unsigned fftSize = ps.settings.beamFormer.nrDelayCompensationChannels;
 
   // We only support 8-bit or 16-bit input samples
   ASSERT(nrBitsPerSample == 8 || nrBitsPerSample == 16);
@@ -105,8 +104,7 @@ int main() {
     "\n  nrSamplesPerSubband = " << nrSamplesPerSubband <<
     "\n  nrBitsPerSample = " << nrBitsPerSample <<
     "\n  nrBytesPerComplexSample = " << nrBytesPerComplexSample <<
-    "\n  fft1Size = " << fft1Size <<
-    "\n  fft2Size = " << fft2Size);
+    "\n  fftSize = " << fftSize);
 
   // Create very simple kernel programs, with predictable output. Skip as much
   // as possible. Nr of channels/sb from the parset is 1, so the PPF will not
@@ -186,7 +184,7 @@ int main() {
   // Coherent Stokes takes the stokes of the sums of all fields (stokes(sum(x))).
   // We can calculate the expected output values, since we're supplying a
   // complex sine/cosine input signal. We only have Stokes-I, so the output
-  // should be: nrStations * (amp * scaleFactor * fft1Size * fft2Size) ** 2
+  // should be: nrStations * (amp * scaleFactor * fftSize) ** 2
   // - amp is set to the maximum possible value for the bit-mode:
   //   i.e. 127 for 8-bit and 32767 for 16-bit mode
   // - scaleFactor is the scaleFactor applied by the IntToFloat kernel. 
@@ -195,7 +193,7 @@ int main() {
   // - for 16-bit input: (2 * 32767 * 1 * 64 * 64)^2 = 72053196058525696
   // - for 8-bit input: (2 * 127 * 16 * 64 * 64)^2 = 277094110068736
 
-  float coh_outVal = sqr(nrBFStations * amplitude * scaleFactor * fft1Size * fft2Size);
+  float coh_outVal = sqr(nrBFStations * amplitude * scaleFactor * fftSize);
   cout << "coherent outVal = " << coh_outVal << endl;
 
   for (size_t t = 0; t < ps.settings.beamFormer.coherentSettings.nrSamples; t++)
@@ -210,7 +208,7 @@ int main() {
   // Incoherent Stokes sums the stokes of each field (sum(stokes(x))).
   // We can calculate the expected output values, since we're supplying a
   // complex sine/cosine input signal. We only have Stokes-I, so the output
-  // should be: nrStation * (amp * scaleFactor * fft1Size * fft2Size)^2
+  // should be: nrStation * (amp * scaleFactor * fftSize)^2
   // - amp is set to the maximum possible value for the bit-mode:
   //   i.e. 127 for 8-bit and 32767 for 16-bit mode
   // - scaleFactor is the scaleFactor applied by the IntToFloat kernel. 
@@ -219,7 +217,7 @@ int main() {
   // - for 16-bit input: 2 * (32767 * 1 * 64 * 64)^2 = 36026598029262848
   // - for 8-bit input: 2 * (127 * 16 * 64 * 64)^2 = 138547055034368
 
-  float incoh_outVal = nrBFStations * sqr(amplitude * scaleFactor * fft1Size * fft2Size);
+  float incoh_outVal = nrBFStations * sqr(amplitude * scaleFactor * fftSize);
   cout << "incoherent outVal = " << incoh_outVal << endl;
 
   for (size_t t = 0; t < ps.settings.beamFormer.incoherentSettings.nrSamples; t++)
diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCoherentStokesBeamFormerSubbandProcProcessSb.cc b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCoherentStokesBeamFormerSubbandProcProcessSb.cc
index 377400a75b973c35a71f8bbd317b4f2388c02473..3220febe8e078307ae10cb9d5038da035d42ad8f 100644
--- a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCoherentStokesBeamFormerSubbandProcProcessSb.cc
+++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCoherentStokesBeamFormerSubbandProcProcessSb.cc
@@ -99,8 +99,7 @@ int main(/*int argc, char *argv[]*/) {
   const size_t nrBitsPerSample = ps.settings.nrBitsPerSample;
   const size_t nrBytesPerComplexSample = ps.nrBytesPerComplexSample();
 
-  const unsigned fft1Size = ps.settings.beamFormer.nrDelayCompensationChannels;
-  const unsigned fft2Size = ps.settings.beamFormer.nrHighResolutionChannels / fft1Size;
+  const unsigned fftSize = ps.settings.beamFormer.nrDelayCompensationChannels;
 
   // We only support 8-bit or 16-bit input samples
   ASSERT(nrBitsPerSample == 8 || nrBitsPerSample == 16);
@@ -118,8 +117,7 @@ int main(/*int argc, char *argv[]*/) {
     "\n  nrSamplesPerSubband = " << nrSamplesPerSubband <<
     "\n  nrBitsPerSample = " << nrBitsPerSample <<
     "\n  nrBytesPerComplexSample = " << nrBytesPerComplexSample <<
-    "\n  fft1Size = " << fft1Size <<
-    "\n  fft2Size = " << fft2Size);
+    "\n  fftSize = " << fftSize);
 
   // Output array sizes
   const size_t nrStokes = ps.settings.beamFormer.coherentSettings.nrStokes;
@@ -210,7 +208,7 @@ int main(/*int argc, char *argv[]*/) {
 
   // We can calculate the expected output values, since we're supplying a
   // complex sine/cosine input signal. We only have Stokes-I, so the output
-  // should be: (nrStations * amp * scaleFactor * fft1Size * fft2Size) ** 2
+  // should be: (nrStations * amp * scaleFactor * fftSize) ** 2
   // - amp is set to the maximum possible value for the bit-mode:
   //   i.e. 127 for 8-bit and 32767 for 16-bit mode
   // - scaleFactor is the scaleFactor applied by the IntToFloat kernel. 
@@ -220,8 +218,8 @@ int main(/*int argc, char *argv[]*/) {
   // - for 8-bit input: (5 * 127 * 16 * 64 * 64) ** 2 = 1731838187929600
 
   float outVal = 
-    (nrStations * amplitude * scaleFactor * fft1Size * fft2Size) *
-    (nrStations * amplitude * scaleFactor * fft1Size * fft2Size);
+    (nrStations * amplitude * scaleFactor * fftSize) *
+    (nrStations * amplitude * scaleFactor * fftSize);
   cout << "outVal = " << setprecision(12) << outVal << endl;
 
   // Skip output validation when started with commandline parsed parameters!
diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCorrelatorStep.cc b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCorrelatorStep.cc
index 9cd96a11db0d31c4b1c5a11526a49b8cb11be868..c23a63ec6812c688643cb6c76e184e2728677b3f 100644
--- a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCorrelatorStep.cc
+++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCorrelatorStep.cc
@@ -76,7 +76,13 @@ TEST(convertFlagsToChannelFlags)
           boost::extents[parset.settings.antennaFields.size()]);
 
   // ****** perform the translation
-  CorrelatorStep::Flagger::convertFlagsToChannelFlags(parset, inputFlags, flagsPerChannel);
+  CorrelatorStep::Flagger::convertFlagsToChannelFlags(
+    inputFlags,
+    flagsPerChannel,
+    parset.settings.correlator.nrSamplesPerBlock,
+    parset.settings.correlator.nrChannels,
+    NR_TAPS - 1
+    );
   // ******
 
   //validate the corner cases
diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tFlysEyeBeamFormerSubbandProcProcessSb.cc b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tFlysEyeBeamFormerSubbandProcProcessSb.cc
index be24b2dfdcd36507d323234930c44a306dc128f7..642398919d064b0be7225d5f291f161044ff12c8 100644
--- a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tFlysEyeBeamFormerSubbandProcProcessSb.cc
+++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tFlysEyeBeamFormerSubbandProcProcessSb.cc
@@ -82,8 +82,7 @@ int main() {
   const size_t nrBitsPerSample = ps.settings.nrBitsPerSample;
   const size_t nrBytesPerComplexSample = ps.nrBytesPerComplexSample();
 
-  const unsigned fft1Size = ps.settings.beamFormer.nrDelayCompensationChannels;
-  const unsigned fft2Size = ps.settings.beamFormer.nrHighResolutionChannels / fft1Size;
+  const unsigned fftSize = ps.settings.beamFormer.nrDelayCompensationChannels;
 
   // We only support 8-bit or 16-bit input samples
   ASSERT(nrBitsPerSample == 8 || nrBitsPerSample == 16);
@@ -101,8 +100,7 @@ int main() {
     "\n  nrSamplesPerSubband = " << nrSamplesPerSubband <<
     "\n  nrBitsPerSample = " << nrBitsPerSample <<
     "\n  nrBytesPerComplexSample = " << nrBytesPerComplexSample <<
-    "\n  fft1Size = " << fft1Size <<
-    "\n  fft2Size = " << fft2Size);
+    "\n  fftSize = " << fftSize);
 
   // Because this is fly's eye mode!
   ASSERT(nrStations == maxNrTABsPerSAP);
@@ -199,7 +197,7 @@ int main() {
 
   // We can calculate the expected output values, since we're supplying a
   // complex sine/cosine input signal. We only have Stokes-I, so the output
-  // should be: (amp * scaleFactor * fft1Size * fft2Size) ** 2
+  // should be: (amp * scaleFactor * fftSize) ** 2
   // - amp is set to the maximum possible value for the bit-mode:
   //   i.e. 127 for 8-bit and 32767 for 16-bit mode
   // - scaleFactor is the scaleFactor applied by the IntToFloat kernel. 
@@ -209,8 +207,8 @@ int main() {
   // - for 8-bit input: (127 * 16 * 64 * 64) ** 2 = 69273527517184
 
   float outVal = 
-    amplitude * scaleFactor * fft1Size * fft2Size *
-    amplitude * scaleFactor * fft1Size * fft2Size;
+    amplitude * scaleFactor * fftSize *
+    amplitude * scaleFactor * fftSize;
   cout << "outVal = " << setprecision(12) << outVal << endl;
 
   for (size_t tab = 0; tab < nrTABs; tab++)