diff --git a/RTCP/Cobalt/CobaltTest/test/tManyPartTABOutput.cc b/RTCP/Cobalt/CobaltTest/test/tManyPartTABOutput.cc
index b3dbc3a8aaa00206857740876aefdf68911a2526..7f2ee0a62ad72f1d86d7ec53b2a4879f23846fb0 100644
--- a/RTCP/Cobalt/CobaltTest/test/tManyPartTABOutput.cc
+++ b/RTCP/Cobalt/CobaltTest/test/tManyPartTABOutput.cc
@@ -30,7 +30,7 @@
 #include <Common/LofarLogger.h>
 #include <CoInterface/Parset.h>
 #include <GPUProc/Pipelines/BeamFormerPipeline.h>
-#include <GPUProc/SubbandProcs/BeamFormerSubbandProc.h>
+#include <GPUProc/cuda/SubbandProcs/SubbandProcOutputData.h>
 #include <GPUProc/Station/StationInput.h>
 #include <GPUProc/Storage/StorageProcesses.h>
 
@@ -44,8 +44,7 @@ using boost::str;
 SmartPtr<SubbandProcOutputData> getTestSbCohData(const Parset& ps, gpu::Context& ctx,
                                                  unsigned blockIdx, unsigned sbIdx)
 { 
-  // BeamFormedData is a sub-class of SubbandProcOutputData.
-  BeamFormedData *bfData = new BeamFormedData(ps, ctx);
+  SubbandProcOutputData *bfData = new SubbandProcOutputData(ps, ctx);
 
   bfData->blockID.block = blockIdx;
   bfData->blockID.globalSubbandIdx = sbIdx;
diff --git a/RTCP/Cobalt/CobaltTest/test/tMultiPartTABOutput.cc b/RTCP/Cobalt/CobaltTest/test/tMultiPartTABOutput.cc
index 2f3e6c3719def70091ce26701c2b4ecdd576c449..e8473b121be5579b504461ada83a8652eef1d752 100644
--- a/RTCP/Cobalt/CobaltTest/test/tMultiPartTABOutput.cc
+++ b/RTCP/Cobalt/CobaltTest/test/tMultiPartTABOutput.cc
@@ -30,7 +30,7 @@
 #include <Common/LofarLogger.h>
 #include <CoInterface/Parset.h>
 #include <GPUProc/Pipelines/BeamFormerPipeline.h>
-#include <GPUProc/SubbandProcs/BeamFormerSubbandProc.h>
+#include <GPUProc/cuda/SubbandProcs/SubbandProcOutputData.h>
 #include <GPUProc/Station/StationInput.h>
 #include <GPUProc/Storage/StorageProcesses.h>
 
@@ -44,8 +44,7 @@ using boost::str;
 SmartPtr<SubbandProcOutputData> getTestSbIncohData(const Parset& ps, gpu::Context& ctx,
                                                    unsigned blockIdx, unsigned sbIdx)
 {
-  // BeamFormedData is a sub-class of SubbandProcOutputData.
-  BeamFormedData *bfData = new BeamFormedData(ps, ctx);
+  SubbandProcOutputData *bfData = new SubbandProcOutputData(ps, ctx);
 
   bfData->blockID.block = blockIdx;
   bfData->blockID.globalSubbandIdx = sbIdx;
diff --git a/RTCP/Cobalt/GPUProc/src/CMakeLists.txt b/RTCP/Cobalt/GPUProc/src/CMakeLists.txt
index 68663c1f5faa564ad6d3e6e1e0a2f3aa2929a09f..44ee8edd9987e5ffffb66e5ff393ce6a0b24d604 100644
--- a/RTCP/Cobalt/GPUProc/src/CMakeLists.txt
+++ b/RTCP/Cobalt/GPUProc/src/CMakeLists.txt
@@ -56,7 +56,8 @@ if(USE_CUDA)
     cuda/Pipelines/BeamFormerPipeline.cc
 #    cuda/Pipelines/UHEP_Pipeline.cc
     cuda/SubbandProcs/SubbandProc.cc
-    cuda/SubbandProcs/BeamFormerSubbandProc.cc
+    cuda/SubbandProcs/SubbandProcInputData.cc
+    cuda/SubbandProcs/SubbandProcOutputData.cc
     cuda/SubbandProcs/BeamFormerFactories.cc
     cuda/SubbandProcs/CorrelatorStep.cc
     cuda/SubbandProcs/BeamFormerPreprocessingStep.cc
diff --git a/RTCP/Cobalt/GPUProc/src/SubbandProcs/BeamFormerSubbandProc.h b/RTCP/Cobalt/GPUProc/src/SubbandProcs/BeamFormerSubbandProc.h
deleted file mode 100644
index dac9468ff602722838a57ec33fae7203a1c43546..0000000000000000000000000000000000000000
--- a/RTCP/Cobalt/GPUProc/src/SubbandProcs/BeamFormerSubbandProc.h
+++ /dev/null
@@ -1,41 +0,0 @@
-//# BeamFormerSubbandProc.h
-//#
-//# Copyright (C) 2013  ASTRON (Netherlands Institute for Radio Astronomy)
-//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
-//#
-//# This file is part of the LOFAR software suite.
-//# The LOFAR software suite is free software: you can redistribute it and/or
-//# modify it under the terms of the GNU General Public License as published
-//# by the Free Software Foundation, either version 3 of the License, or
-//# (at your option) any later version.
-//#
-//# The LOFAR software suite is distributed in the hope that it will be useful,
-//# but WITHOUT ANY WARRANTY; without even the implied warranty of
-//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//# GNU General Public License for more details.
-//#
-//# You should have received a copy of the GNU General Public License along
-//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
-//#
-//# $Id$
-
-// \file
-// Include the right GPU API include with our options.
-
-#ifndef LOFAR_GPUPROC_BEAM_FORMER_SUBBAND_PROC_H
-#define LOFAR_GPUPROC_BEAM_FORMER_SUBBAND_PROC_H
-
-#if defined (USE_CUDA) && defined (USE_OPENCL)
-# error "Either CUDA or OpenCL must be enabled, not both"
-#endif
-
-#if defined (USE_CUDA)
-# include <GPUProc/cuda/SubbandProcs/BeamFormerSubbandProc.h>
-#elif defined (USE_OPENCL)
-# include <GPUProc/opencl/SubbandProcs/BeamFormerSubbandProc.h>
-#else
-# error "Either CUDA or OpenCL must be enabled, not neither"
-#endif
-
-#endif
-
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Pipelines/BeamFormerPipeline.cc b/RTCP/Cobalt/GPUProc/src/cuda/Pipelines/BeamFormerPipeline.cc
index 70feebaf0b4a4361709a5d74809d0611a70a5503..0e8128425de5cca7f5a896df9a3f48688e9ef3a6 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/Pipelines/BeamFormerPipeline.cc
+++ b/RTCP/Cobalt/GPUProc/src/cuda/Pipelines/BeamFormerPipeline.cc
@@ -37,7 +37,7 @@
 #include <CoInterface/SmartPtr.h>
 #include <CoInterface/Stream.h>
 #include <CoInterface/TimeFuncs.h>
-#include <GPUProc/SubbandProcs/BeamFormerSubbandProc.h>
+#include <GPUProc/SubbandProcs/SubbandProc.h>
 #include <GPUProc/gpu_wrapper.h>
 #include <GPUProc/gpu_utils.h>
 #include <GPUProc/global_defines.h>
@@ -151,7 +151,7 @@ namespace LOFAR
       for (size_t i = 0; i < workQueues.size(); ++i) {
         gpu::Context context(devices[i % devices.size()]);
 
-        workQueues[i] = new BeamFormerSubbandProc(ps, context, factories, nrSubbandsPerSubbandProc);
+        workQueues[i] = new SubbandProc(ps, context, factories, nrSubbandsPerSubbandProc);
       }
     }
 
@@ -227,8 +227,6 @@ namespace LOFAR
       // Process pool elements until end-of-output
       while ((data = inputQueue.remove()) != NULL) 
       {
-        BeamFormedData &beamFormedData = dynamic_cast<BeamFormedData&>(*data);
-
         const struct BlockID id = data->blockID;
         ASSERT( globalSubbandIdx == id.globalSubbandIdx );
         ASSERT( id.block >= 0 ); // Negative blocks should not reach storage
@@ -267,7 +265,7 @@ namespace LOFAR
             const size_t nrSamples =  stokes.nrSamples;
 
             // Our data has the shape
-            //   beamFormedData.(in)coherentData[tab][stokes][sample][channel]
+            //   data->(in)coherentData[tab][stokes][sample][channel]
             //
             // To transpose our data, we copy a slice representing
             //   slice[sample][channel]
@@ -291,8 +289,8 @@ namespace LOFAR
             MultiDimArray<float, 2> srcData(
                 boost::extents[nrSamples][nrChannels],
                 file.coherent
-                     ? beamFormedData.coherentData[file.coherentIdxInSAP][file.stokesNr].origin()
-                     : beamFormedData.incoherentData[file.incoherentIdxInSAP][file.stokesNr].origin(),
+                     ? data->coherentData[file.coherentIdxInSAP][file.stokesNr].origin()
+                     : data->incoherentData[file.incoherentIdxInSAP][file.stokesNr].origin(),
                 false);
 
             // Copy data to block
@@ -366,13 +364,12 @@ namespace LOFAR
 
       // Process pool elements until end-of-output
       while ((data = inputQueue.remove()) != NULL) {
-        BeamFormedData &beamFormedData = dynamic_cast<BeamFormedData&>(*data);
-        CorrelatedData &correlatedData = beamFormedData.correlatedData;
+        CorrelatedData &correlatedData = data->correlatedData;
 
         const struct BlockID id = data->blockID;
         ASSERT( globalSubbandIdx == id.globalSubbandIdx );
 
-        if (beamFormedData.emit_correlatedData) {
+        if (data->emit_correlatedData) {
           ASSERT(ps.settings.correlator.enabled);
           ASSERT(outputStream.get());
 
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Pipelines/BeamFormerPipeline.h b/RTCP/Cobalt/GPUProc/src/cuda/Pipelines/BeamFormerPipeline.h
index d330e07a84570fa2b9eef32ad167122d0c66445d..7ad287c2403c7ae4431bfe8e7d1f83ed7fc3c422 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/Pipelines/BeamFormerPipeline.h
+++ b/RTCP/Cobalt/GPUProc/src/cuda/Pipelines/BeamFormerPipeline.h
@@ -28,6 +28,8 @@
 
 #include "Pipeline.h"
 
+#include <MACIO/RTmetadata.h>
+
 #include <GPUProc/SubbandProcs/BeamFormerFactories.h>
 
 namespace LOFAR
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Pipelines/Pipeline.cc b/RTCP/Cobalt/GPUProc/src/cuda/Pipelines/Pipeline.cc
index d1af82ea14e71d5680f6923e0d9ed7a8805b06cf..bd9abc776ab41f4cdd8c1702e465fce10b51b569 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/Pipelines/Pipeline.cc
+++ b/RTCP/Cobalt/GPUProc/src/cuda/Pipelines/Pipeline.cc
@@ -39,7 +39,6 @@
 #include <GPUProc/gpu_utils.h>
 #include <GPUProc/global_defines.h>
 #include <GPUProc/Kernels/Kernel.h>
-#include <GPUProc/SubbandProcs/SubbandProc.h>
 #include <InputProc/SampleType.h>
 #include <InputProc/RSPTimeStamp.h>
 
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.cc
index b3fa3e46d47c3fcf551c2d11c8bab64a515ddf3b..9f0ed9b8d7c431f070ba3573a83b5cc3d9d8bde4 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.cc
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.cc
@@ -32,8 +32,6 @@
 #include <CoInterface/Parset.h>
 
 #include "SubbandProc.h"
-
-#include "BeamFormerSubbandProc.h"
 #include "BeamFormerCoherentStep.h"
 
 #include <iomanip>
@@ -184,7 +182,7 @@ void BeamFormerCoherentStep::process(const SubbandProcInputData &input)
 }
 
 
-void BeamFormerCoherentStep::readOutput(BeamFormedData &output)
+void BeamFormerCoherentStep::readOutput(SubbandProcOutputData &output)
 {
   if (nrCoherent(output.blockID) == 0)
     return;
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.h b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.h
index e1bdbc3677456d7e53ca3cd1991f073af95513c6..12572f7953f7248dbc8fee579c92a5dde59514e4 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.h
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.h
@@ -33,7 +33,8 @@
 #include <GPUProc/MultiDimArrayHostBuffer.h>
 #include <CoInterface/BlockID.h>
 
-#include "SubbandProc.h"
+#include "SubbandProcInputData.h"
+#include "SubbandProcOutputData.h"
 #include "ProcessStep.h"
 
 #include <GPUProc/Kernels/BeamFormerKernel.h>
@@ -50,7 +51,6 @@ namespace LOFAR
   {
     //# Forward declarations
     struct BeamFormerFactories;
-    class BeamFormedData;
 
     class BeamFormerCoherentStep: public ProcessStep
     {
@@ -83,7 +83,7 @@ namespace LOFAR
 
       void process(const SubbandProcInputData &input);
 
-      void readOutput(BeamFormedData &output);
+      void readOutput(SubbandProcOutputData &output);
 
     private:
 
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerFactories.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerFactories.cc
index 2ec4a06d5703191cbfa0c92c44325c7adbdc8520..1b90590e4233c2bd52bed8a3c0754547646556a6 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerFactories.cc
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerFactories.cc
@@ -22,7 +22,6 @@
 #include <lofar_config.h>
 
 #include "BeamFormerFactories.h"
-#include "BeamFormerSubbandProc.h"
 
 namespace LOFAR
 {
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerIncoherentStep.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerIncoherentStep.cc
index 2e3b13512974b1bcba5d75eec7bbe17436daa569..bc31c3a4a788311a7362943304cad93008c595da 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerIncoherentStep.cc
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerIncoherentStep.cc
@@ -32,7 +32,6 @@
 #include <CoInterface/Parset.h>
 
 #include "SubbandProc.h"
-#include "BeamFormerSubbandProc.h"
 
 #include "BeamFormerIncoherentStep.h"
 
@@ -171,7 +170,7 @@ namespace LOFAR
     }
 
 
-    void BeamFormerIncoherentStep::readOutput(BeamFormedData &output)
+    void BeamFormerIncoherentStep::readOutput(SubbandProcOutputData &output)
     {
       if (nrIncoherent(output.blockID) == 0)
         return;
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerIncoherentStep.h b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerIncoherentStep.h
index 33a4fe5b2ac19a5ec7001260989bced60331bf08..12587f3d1ef8fd81270f7056b4de6178544cc194 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerIncoherentStep.h
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerIncoherentStep.h
@@ -33,7 +33,8 @@
 #include <GPUProc/MultiDimArrayHostBuffer.h>
 #include <CoInterface/BlockID.h>
 
-#include "SubbandProc.h"
+#include "SubbandProcInputData.h"
+#include "SubbandProcOutputData.h"
 #include "ProcessStep.h"
 
 #include <GPUProc/Kernels/FFT_Kernel.h>
@@ -47,9 +48,6 @@ namespace LOFAR
 {
   namespace Cobalt
   {
-    //# Forward declarations
-    class BeamFormedData;
-
     class BeamFormerIncoherentStep : public ProcessStep
     {
     public:
@@ -81,7 +79,7 @@ namespace LOFAR
 
       void process(const SubbandProcInputData &input);
 
-      void readOutput(BeamFormedData &output);
+      void readOutput(SubbandProcOutputData &output);
 
     private:
 
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.h b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.h
index edead2f3e7eb9a2bb9569e953967fb4ca8fc3818..5789e36452d810198f17dd333d24f3c54aa67ef1 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.h
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.h
@@ -32,7 +32,8 @@
 #include <GPUProc/MultiDimArrayHostBuffer.h>
 #include <CoInterface/BlockID.h>
 
-#include "SubbandProc.h"
+#include "SubbandProcInputData.h"
+#include "SubbandProcOutputData.h"
 #include "ProcessStep.h"
 
 #include <GPUProc/KernelFactory.h>
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerSubbandProc.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerSubbandProc.cc
deleted file mode 100644
index d7fe3df399f2078ddcdb6b79359f89d94c6ef7e0..0000000000000000000000000000000000000000
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerSubbandProc.cc
+++ /dev/null
@@ -1,238 +0,0 @@
-//# BeamFormerSubbandProc.cc
-//# Copyright (C) 2012-2013  ASTRON (Netherlands Institute for Radio Astronomy)
-//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
-//#
-//# This file is part of the LOFAR software suite.
-//# The LOFAR software suite is free software: you can redistribute it and/or
-//# modify it under the terms of the GNU General Public License as published
-//# by the Free Software Foundation, either version 3 of the License, or
-//# (at your option) any later version.
-//#
-//# The LOFAR software suite is distributed in the hope that it will be useful,
-//# but WITHOUT ANY WARRANTY; without even the implied warranty of
-//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//# GNU General Public License for more details.
-//#
-//# You should have received a copy of the GNU General Public License along
-//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
-//#
-//# $Id$
-
-#include <lofar_config.h>
-
-#include "BeamFormerSubbandProc.h"
-#include "BeamFormerFactories.h"
-
-#include <GPUProc/global_defines.h>
-#include <GPUProc/gpu_wrapper.h>
-
-#include <CoInterface/Parset.h>
-#include <ApplCommon/PosixTime.h>
-#include <Common/LofarLogger.h>
-
-#include <iomanip>
-
-
-namespace LOFAR
-{
-  namespace Cobalt
-  {
-    BeamFormedData::BeamFormedData(
-        const Parset &ps,
-        gpu::Context &context) :
-      coherentData(ps.settings.beamFormer.anyCoherentTABs()
-        ? boost::extents[ps.settings.beamFormer.maxNrCoherentTABsPerSAP()]
-                        [ps.settings.beamFormer.coherentSettings.nrStokes]
-                        [ps.settings.beamFormer.coherentSettings.nrSamples]
-                        [ps.settings.beamFormer.coherentSettings.nrChannels]
-        : boost::extents[0][0][0][0],
-        context, 0),
-
-      incoherentData(ps.settings.beamFormer.anyIncoherentTABs()
-        ? boost::extents[ps.settings.beamFormer.maxNrIncoherentTABsPerSAP()]
-                        [ps.settings.beamFormer.incoherentSettings.nrStokes]
-                        [ps.settings.beamFormer.incoherentSettings.nrSamples]
-                        [ps.settings.beamFormer.incoherentSettings.nrChannels]
-        : boost::extents[0][0][0][0],
-        context, 0),
-
-      correlatedData(ps.settings.correlator.enabled ? ps.settings.antennaFields.size()           : 0,
-                     ps.settings.correlator.enabled ? ps.settings.correlator.nrChannels          : 0,
-                     ps.settings.correlator.enabled ? ps.settings.correlator.nrSamplesPerChannel : 0,
-                     context),
-      emit_correlatedData(false)
-    {
-    }
-
-
-    BeamFormerSubbandProc::BeamFormerSubbandProc(
-      const Parset &parset,
-      gpu::Context &context,
-      BeamFormerFactories &factories,
-      size_t nrSubbandsPerSubbandProc)
-    :
-      SubbandProc(parset, context, nrSubbandsPerSubbandProc),
-      prevBlock(-1),
-      prevSAP(-1),
-      inputCounter(context, "input")
-    {
-      // See doc/bf-pipeline.txt
-      size_t devA_size = 0;
-      size_t devB_size = 0;
-
-      if (factories.correlator) {
-        CorrelatorStep::Factories &cf = *factories.correlator;
-
-        devA_size = std::max(devA_size,
-          cf.firFilter ? cf.firFilter->bufferSize(FIR_FilterKernel::INPUT_DATA)
-                       : cf.delayAndBandPass.bufferSize(DelayAndBandPassKernel::INPUT_DATA));
-        devB_size = std::max(devB_size,
-                      cf.correlator.bufferSize(CorrelatorKernel::INPUT_DATA));
-      }
-
-      if (factories.preprocessing) {
-        devA_size = std::max(devA_size,
-          factories.preprocessing->intToFloat.bufferSize(IntToFloatKernel::OUTPUT_DATA));
-        devB_size = std::max(devB_size,
-          factories.preprocessing->intToFloat.bufferSize(IntToFloatKernel::OUTPUT_DATA));
-      }
-
-      if (factories.incoherentStokes) {
-        ASSERT(factories.preprocessing);
-
-        /* incoherentStokes uses devA and devB, but the sizes provided b the preprocessing
-           pipeline are already sufficient. */
-      }
-
-      // NOTE: For an explanation of the different buffers being used, please refer
-      // to the document bf-pipeline.txt in the GPUProc/doc directory.
-      devA.reset(new gpu::DeviceMemory(context, devA_size));
-      devB.reset(new gpu::DeviceMemory(context, devB_size));
-
-      //################################################
-      // Create objects containing the kernel and device buffers
-
-      if (factories.correlator) {
-        correlatorStep = std::auto_ptr<CorrelatorStep>(
-          new CorrelatorStep(parset, queue, context, *factories.correlator,
-          devA, devB, nrSubbandsPerSubbandProc));
-      }
-
-      if (factories.preprocessing) {
-        preprocessingStep = std::auto_ptr<BeamFormerPreprocessingStep>(
-          new BeamFormerPreprocessingStep(parset, queue, context, *factories.preprocessing, 
-          devA, devB));
-      }
-
-      if (factories.coherentStokes) {
-        coherentStep = std::auto_ptr<BeamFormerCoherentStep>(
-          new BeamFormerCoherentStep(parset, queue, context, *factories.coherentStokes,
-          devB));
-      }
-
-      if (factories.incoherentStokes) {
-        incoherentStep = std::auto_ptr<BeamFormerIncoherentStep>(
-          new BeamFormerIncoherentStep(parset, queue, context, *factories.incoherentStokes, 
-              devA, devB));
-      }
-
-
-      LOG_INFO_STR("Pipeline configuration: "
-        << (correlatorStep.get() ?    "[correlator] " : "")
-        << (preprocessingStep.get() ? "[bf preproc] " : "")
-        << (coherentStep.get() ?      "[coh stokes] " : "")
-        << (incoherentStep.get() ?    "[incoh stokes] " : "")
-      );
-      
-      // put enough objects in the outputPool to operate
-      for (size_t i = 0; i < nrOutputElements(); ++i)
-      {
-        outputPool.free.append(new BeamFormedData(ps, context));
-      }
-    }
-
-
-    void BeamFormerSubbandProc::processSubband( SubbandProcInputData &input,
-      SubbandProcOutputData &_output)
-    {
-      BeamFormedData &output = dynamic_cast<BeamFormedData&>(_output);
-
-      //*******************************************************************
-      // calculate some variables depending on the input subband
-      size_t block = input.blockID.block;
-      unsigned SAP = ps.settings.subbands[input.blockID.globalSubbandIdx].SAP;
-
-      //****************************************
-      // Send inputs to GPU
-      queue.writeBuffer(*devA, input.inputSamples, inputCounter, true);
-
-      // Some additional buffers
-      // Only upload delays if they changed w.r.t. the previous subband.
-      if ((int)SAP != prevSAP || (ssize_t)block != prevBlock) {
-        if (correlatorStep.get()) {
-          correlatorStep->writeInput(input);
-        }
-
-        if (preprocessingStep.get()) {
-          preprocessingStep->writeInput(input);
-        }
-
-        if (coherentStep.get()) {
-          coherentStep->writeInput(input);
-        }
-
-        prevSAP = SAP;
-        prevBlock = block;
-      }
-
-      // ************************************************
-      // Start the GPU processing
-
-      if (correlatorStep.get()) {
-        output.correlatedData.blockID = input.blockID;
-
-        correlatorStep->process(input);
-        correlatorStep->readOutput(output.correlatedData);
-      }
-
-      if (preprocessingStep.get()) {
-        preprocessingStep->process(input);
-      }
-
-      if (coherentStep.get())
-      {
-        coherentStep->process(input);
-        coherentStep->readOutput(output);
-      }
-
-      if (incoherentStep.get())
-      {
-        incoherentStep->process(input);
-        incoherentStep->readOutput(output);
-      }
-
-      // ************************************************
-      // Do CPU computations while the GPU is working
-
-      if (correlatorStep.get()) {
-        correlatorStep->processCPU(input, output.correlatedData);
-      }
-
-      // Synchronise to assure that all the work in the data is done
-      queue.synchronize();
-    }
-
-    void BeamFormerSubbandProc::postprocessSubband(SubbandProcOutputData &_output)
-    {
-      BeamFormedData &output = dynamic_cast<BeamFormedData&>(_output);
-
-      if (correlatorStep.get()) {
-        output.emit_correlatedData = correlatorStep->postprocessSubband(output.correlatedData);
-      } else {
-        output.emit_correlatedData = false;
-      }
-    }
-
-  }
-}
-
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerSubbandProc.h b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerSubbandProc.h
deleted file mode 100644
index a5772ed0b83cbf439c48b7767b223b995cea4aba..0000000000000000000000000000000000000000
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerSubbandProc.h
+++ /dev/null
@@ -1,107 +0,0 @@
-//# BeamFormerSubbandProc.h
-//# Copyright (C) 2012-2013  ASTRON (Netherlands Institute for Radio Astronomy)
-//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
-//#
-//# This file is part of the LOFAR software suite.
-//# The LOFAR software suite is free software: you can redistribute it and/or
-//# modify it under the terms of the GNU General Public License as published
-//# by the Free Software Foundation, either version 3 of the License, or
-//# (at your option) any later version.
-//#
-//# The LOFAR software suite is distributed in the hope that it will be useful,
-//# but WITHOUT ANY WARRANTY; without even the implied warranty of
-//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//# GNU General Public License for more details.
-//#
-//# You should have received a copy of the GNU General Public License along
-//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
-//#
-//# $Id$
-
-#ifndef LOFAR_GPUPROC_CUDA_BEAM_FORMER_SUBBAND_PROC_H
-#define LOFAR_GPUPROC_CUDA_BEAM_FORMER_SUBBAND_PROC_H
-
-#include <complex>
-#include <memory>
-
-#include <boost/shared_ptr.hpp>
-#include <Common/LofarLogger.h>
-#include <CoInterface/CorrelatedData.h>
-#include <CoInterface/Parset.h>
-
-#include <GPUProc/gpu_wrapper.h>
-
-#include <GPUProc/MultiDimArrayHostBuffer.h>
-#include <GPUProc/Pipelines/BeamFormerPipeline.h>
-
-#include "CorrelatorStep.h"
-#include "BeamFormerPreprocessingStep.h"
-#include "BeamFormerCoherentStep.h"
-#include "BeamFormerIncoherentStep.h"
-
-#include "SubbandProc.h"
-
-namespace LOFAR
-{
-  namespace Cobalt
-  {
-    //# Forward declarations
-    struct BeamFormerFactories;
-
-    // Our output data type
-    class BeamFormedData: public SubbandProcOutputData
-    {
-    public:
-
-      MultiDimArrayHostBuffer<float, 4> coherentData;
-      MultiDimArrayHostBuffer<float, 4> incoherentData;
-
-      CorrelatorStep::CorrelatedData correlatedData;
-      bool emit_correlatedData;
-
-      BeamFormedData(const Parset &ps,
-                     gpu::Context &context);
-    };
-
-    class BeamFormerSubbandProc : public SubbandProc
-    {
-    public:
-      BeamFormerSubbandProc(const Parset &parset, gpu::Context &context,
-                            BeamFormerFactories &factories,
-                            size_t nrSubbandsPerSubbandProc = 1);
-
-      // Beam form the data found in the input data buffer
-      virtual void processSubband(SubbandProcInputData &input,
-                                  SubbandProcOutputData &output);
-
-      // Do post processing on the CPU
-      virtual void postprocessSubband(SubbandProcOutputData &output);
-
-    private:
-      // The previously processed SAP/block, or -1 if nothing has been
-      // processed yet. Used in order to determine if new delays have
-      // to be uploaded.
-      ssize_t prevBlock;
-      signed int prevSAP;
-
-      // @{
-      // Device memory buffers. These buffers are used interleaved. For details,
-      // please refer to the document bf-pipeline.txt in the directory
-      // GPUProc/doc.
-      boost::shared_ptr<gpu::DeviceMemory> devA;
-      boost::shared_ptr<gpu::DeviceMemory> devB;
-      // @}
-
-      PerformanceCounter inputCounter;
-
-      std::auto_ptr<CorrelatorStep> correlatorStep;
-      std::auto_ptr<BeamFormerPreprocessingStep> preprocessingStep;
-      std::auto_ptr<BeamFormerCoherentStep> coherentStep;
-      std::auto_ptr<BeamFormerIncoherentStep> incoherentStep;
-    };
-
-  }
-}
-
-#endif
-
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.cc
index 88a5bd224a45d304fecd938d94556551a14e30a6..99b0a1bbbb7d2e913b7c6772bf9722de744e5429 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.cc
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.cc
@@ -21,6 +21,7 @@
 #include <lofar_config.h>
 
 #include "CorrelatorStep.h"
+#include "SubbandProc.h"
 
 #include <GPUProc/global_defines.h>
 #include <GPUProc/gpu_wrapper.h>
@@ -35,21 +36,6 @@ namespace LOFAR
 {
   namespace Cobalt
   {
-    CorrelatorStep::CorrelatedData::CorrelatedData(
-      unsigned nrStations, unsigned nrChannels,
-      unsigned maxNrValidSamples, gpu::Context &context)
-      :
-      MultiDimArrayHostBuffer<fcomplex, 4>(
-        boost::extents
-        [nrStations * (nrStations + 1) / 2]
-        [nrChannels][NR_POLARIZATIONS]
-        [NR_POLARIZATIONS], 
-        context, 0),
-      LOFAR::Cobalt::CorrelatedData(nrStations, nrChannels, 
-                     maxNrValidSamples, this->origin(),
-                     this->num_elements(), heapAllocator, 1)
-    {
-    }
 
 
     CorrelatorStep::Factories::Factories(const Parset &ps, size_t nrSubbandsPerSubbandProc) :
@@ -95,7 +81,7 @@ namespace LOFAR
 
       // First transform the flags to channel flags: taking in account 
       // reduced resolution in time and the size of the filter
-      convertFlagsToChannelFlags(parset, inputFlags, flagsPerChannel);
+      SubbandProc::Flagger::convertFlagsToChannelFlags(parset, inputFlags, flagsPerChannel);
 
       // Calculate the number of flags per baseline and assign to
       // output object.
@@ -313,14 +299,14 @@ namespace LOFAR
     }
 
 
-    void CorrelatorStep::readOutput(CorrelatedData &output)
+    void CorrelatorStep::readOutput(SubbandProcOutputData &output)
     {
       // Read data back from the kernel
-      queue.readBuffer(output, devE, outputCounter, false);
+      queue.readBuffer(output.correlatedData, devE, outputCounter, false);
     }
 
 
-    void CorrelatorStep::processCPU(const SubbandProcInputData &input, CorrelatedData &output)
+    void CorrelatorStep::processCPU(const SubbandProcInputData &input, SubbandProcOutputData &output)
     {
       // Propagate the flags.
       MultiDimArray<LOFAR::SparseSet<unsigned>, 1> flags = input.inputFlags;
@@ -332,30 +318,30 @@ namespace LOFAR
           flags, input.blockID.subbandProcSubbandIdx);
       }
 
-      Flagger::propagateFlags(ps, flags, output);
+      Flagger::propagateFlags(ps, flags, output.correlatedData);
     }
 
 
-    bool CorrelatorStep::integrate(CorrelatedData &output)
+    bool CorrelatorStep::integrate(SubbandProcOutputData &output)
     {
       const size_t idx = output.blockID.subbandProcSubbandIdx;
       const size_t nblock = ps.settings.correlator.nrBlocksPerIntegration;
       
       // We don't want to copy the data if we don't need to integrate.
       if (nblock == 1) {
-        output.setSequenceNumber(output.blockID.block);
+        output.correlatedData.setSequenceNumber(output.blockID.block);
         return true;
       }
 
       integratedData[idx].first++;
 
       if (integratedData[idx].first < nblock) {
-        *integratedData[idx].second += output;
+        *integratedData[idx].second += output.correlatedData;
         return false;
       }
       else {
-        output += *integratedData[idx].second;
-        output.setSequenceNumber(output.blockID.block / nblock);
+        output.correlatedData += *integratedData[idx].second;
+        output.correlatedData.setSequenceNumber(output.blockID.block / nblock);
         integratedData[idx].first = 0;
         integratedData[idx].second->reset();
         return true;
@@ -363,7 +349,7 @@ namespace LOFAR
     }
 
 
-    bool CorrelatorStep::postprocessSubband(CorrelatedData &output)
+    bool CorrelatorStep::postprocessSubband(SubbandProcOutputData &output)
     {
       if (!integrate(output)) {
         // Not yet done constructing output block 
@@ -372,7 +358,7 @@ namespace LOFAR
 
       // The flags are already copied to the correct location
       // now the flagged amount should be applied to the visibilities
-      Flagger::applyWeights(ps, output);  
+      Flagger::applyWeights(ps, output.correlatedData);  
 
       return true;
     }
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.h b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.h
index 3fe345122d95b01eba8d38529b22902532c6f474..5075f0888866cd4fd13512485fd7a76e0d27c721 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.h
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.h
@@ -25,8 +25,8 @@
 #include <vector>
 #include <utility> // for std::pair
 
-#include <Common/LofarLogger.h>
 #include <CoInterface/Parset.h>
+#include <CoInterface/SmartPtr.h>
 
 #include <boost/shared_ptr.hpp>
 #include <GPUProc/gpu_wrapper.h>
@@ -35,7 +35,8 @@
 #include <CoInterface/BlockID.h>
 #include <CoInterface/CorrelatedData.h>
 
-#include "SubbandProc.h"
+#include "SubbandProcInputData.h"
+#include "SubbandProcOutputData.h"
 #include "ProcessStep.h"
 
 #include <GPUProc/PerformanceCounter.h>
@@ -53,17 +54,6 @@ namespace LOFAR
     class CorrelatorStep: public ProcessStep
     {
     public:
-      struct CorrelatedData:
-        public MultiDimArrayHostBuffer<fcomplex,4>,
-        public LOFAR::Cobalt::CorrelatedData,
-        public SubbandProcOutputData
-      {
-        CorrelatedData(unsigned nrStations, 
-                       unsigned nrChannels,
-                       unsigned maxNrValidSamples,
-                       gpu::Context &context);
-      };
-
       struct Factories {
         Factories(const Parset &ps, size_t nrSubbandsPerSubbandProc);
 
@@ -89,17 +79,17 @@ namespace LOFAR
       void writeInput(const SubbandProcInputData &input);
 
       void process(const SubbandProcInputData &input);
-      void processCPU(const SubbandProcInputData &input, CorrelatedData &output);
+      void processCPU(const SubbandProcInputData &input, SubbandProcOutputData &output);
 
-      void readOutput(CorrelatedData &output);
+      void readOutput(SubbandProcOutputData &output);
 
-      bool postprocessSubband(CorrelatedData &output);
+      bool postprocessSubband(SubbandProcOutputData &output);
 
       // Collection of functions to tranfer the input flags to the output.
       // \c propagateFlags can be called parallel to the kernels.
       // After the data is copied from the the shared buffer
       // \c applyWeights can be used to weight the visibilities
-      class Flagger: public SubbandProc::Flagger
+      class Flagger
       {
       public:
         // 1. Convert input flags to channel flags, calculate the amount flagged
@@ -169,7 +159,7 @@ namespace LOFAR
       std::vector< std::pair< size_t, SmartPtr<LOFAR::Cobalt::CorrelatedData> > >
       integratedData;
 
-      bool integrate(CorrelatedData &output);
+      bool integrate(SubbandProcOutputData &output);
     };
   }
 }
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/ProcessStep.h b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/ProcessStep.h
index fedbbf5589f7c55180035e3724089d1d63cea292..63ed74a484c0e26e788636737c52be38afe17630 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/ProcessStep.h
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/ProcessStep.h
@@ -25,9 +25,7 @@
 
 #include <GPUProc/gpu_wrapper.h>
 
-#include <GPUProc/SubbandProcs/SubbandProc.h>
-
-#include "SubbandProc.h"
+#include "SubbandProcInputData.h"
 
 namespace LOFAR
 {
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProc.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProc.cc
index f0abf75dd323b64a507b37cb4a7e92d09fab2222..ea4a4018017db7ae1e723a886db27e09ad7087ff 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProc.cc
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProc.cc
@@ -21,18 +21,28 @@
 #include <lofar_config.h>
 
 #include "SubbandProc.h"
+#include "BeamFormerFactories.h"
 
-#include <Common/LofarLogger.h>
+#include <GPUProc/global_defines.h>
+#include <GPUProc/gpu_wrapper.h>
 
+#include <CoInterface/Parset.h>
 #include <CoInterface/Align.h>
 
-#include <GPUProc/global_defines.h>
+#include <ApplCommon/PosixTime.h>
+#include <Common/LofarLogger.h>
+
+#include <iomanip>
 
 namespace LOFAR
 {
   namespace Cobalt
   {
-    SubbandProc::SubbandProc(const Parset &ps, gpu::Context &context, size_t nrSubbandsPerSubbandProc)
+    SubbandProc::SubbandProc(
+      const Parset &ps,
+      gpu::Context &context,
+      BeamFormerFactories &factories,
+      size_t nrSubbandsPerSubbandProc)
     :
       inputPool("SubbandProc::inputPool"),
       processPool("SubbandProc::processPool"),
@@ -40,8 +50,79 @@ namespace LOFAR
 
       ps(ps),
       nrSubbandsPerSubbandProc(nrSubbandsPerSubbandProc),
-      queue(gpu::Stream(context))
+      queue(gpu::Stream(context)),
+      prevBlock(-1),
+      prevSAP(-1),
+      inputCounter(context, "input")
     {
+      // See doc/bf-pipeline.txt
+      size_t devA_size = 0;
+      size_t devB_size = 0;
+
+      if (factories.correlator) {
+        CorrelatorStep::Factories &cf = *factories.correlator;
+
+        devA_size = std::max(devA_size,
+          cf.firFilter ? cf.firFilter->bufferSize(FIR_FilterKernel::INPUT_DATA)
+                       : cf.delayAndBandPass.bufferSize(DelayAndBandPassKernel::INPUT_DATA));
+        devB_size = std::max(devB_size,
+                      cf.correlator.bufferSize(CorrelatorKernel::INPUT_DATA));
+      }
+
+      if (factories.preprocessing) {
+        devA_size = std::max(devA_size,
+          factories.preprocessing->intToFloat.bufferSize(IntToFloatKernel::OUTPUT_DATA));
+        devB_size = std::max(devB_size,
+          factories.preprocessing->intToFloat.bufferSize(IntToFloatKernel::OUTPUT_DATA));
+      }
+
+      if (factories.incoherentStokes) {
+        ASSERT(factories.preprocessing);
+
+        /* incoherentStokes uses devA and devB, but the sizes provided b the preprocessing
+           pipeline are already sufficient. */
+      }
+
+      // NOTE: For an explanation of the different buffers being used, please refer
+      // to the document bf-pipeline.txt in the GPUProc/doc directory.
+      devA.reset(new gpu::DeviceMemory(context, devA_size));
+      devB.reset(new gpu::DeviceMemory(context, devB_size));
+
+      //################################################
+      // Create objects containing the kernel and device buffers
+
+      if (factories.correlator) {
+        correlatorStep = std::auto_ptr<CorrelatorStep>(
+          new CorrelatorStep(ps, queue, context, *factories.correlator,
+          devA, devB, nrSubbandsPerSubbandProc));
+      }
+
+      if (factories.preprocessing) {
+        preprocessingStep = std::auto_ptr<BeamFormerPreprocessingStep>(
+          new BeamFormerPreprocessingStep(ps, queue, context, *factories.preprocessing, 
+          devA, devB));
+      }
+
+      if (factories.coherentStokes) {
+        coherentStep = std::auto_ptr<BeamFormerCoherentStep>(
+          new BeamFormerCoherentStep(ps, queue, context, *factories.coherentStokes,
+          devB));
+      }
+
+      if (factories.incoherentStokes) {
+        incoherentStep = std::auto_ptr<BeamFormerIncoherentStep>(
+          new BeamFormerIncoherentStep(ps, queue, context, *factories.incoherentStokes, 
+              devA, devB));
+      }
+
+
+      LOG_INFO_STR("Pipeline configuration: "
+        << (correlatorStep.get() ?    "[correlator] " : "")
+        << (preprocessingStep.get() ? "[bf preproc] " : "")
+        << (coherentStep.get() ?      "[coh stokes] " : "")
+        << (incoherentStep.get() ?    "[incoh stokes] " : "")
+      );
+
       // put enough objects in the inputPool to operate
       //
       // At least 3 items are needed for a smooth Pool operation.
@@ -49,11 +130,14 @@ namespace LOFAR
       for (size_t i = 0; i < nrInputDatas; ++i) {
         inputPool.free.append(new SubbandProcInputData(ps, context), false);
       }
+      
+      // put enough objects in the outputPool to operate
+      for (size_t i = 0; i < nrOutputElements(); ++i)
+      {
+        outputPool.free.append(new SubbandProcOutputData(ps, context));
+      }
     }
 
-    SubbandProc::~SubbandProc()
-    {
-    }
 
     size_t SubbandProc::nrOutputElements() const
     {
@@ -70,84 +154,14 @@ namespace LOFAR
       return 7 * nrSubbandsPerSubbandProc;
     }
 
-
-    void SubbandProcInputData::applyMetaData(const Parset &ps,
-                                           unsigned station, unsigned SAP,
-                                           const SubbandMetaData &metaData)
-    {
-      // extract and apply the flags
-      inputFlags[station] = metaData.flags;
-
-      flagInputSamples(station, metaData);
-
-      // extract and assign the delays for the station beams
-
-      // X polarisation
-      delaysAtBegin[SAP][station][0]  = ps.settings.antennaFields[station].delay.x + metaData.stationBeam.delayAtBegin;
-      delaysAfterEnd[SAP][station][0] = ps.settings.antennaFields[station].delay.x + metaData.stationBeam.delayAfterEnd;
-      phase0s[station][0]             = ps.settings.antennaFields[station].phase0.x;
-
-      // Y polarisation
-      delaysAtBegin[SAP][station][1]  = ps.settings.antennaFields[station].delay.y + metaData.stationBeam.delayAtBegin;
-      delaysAfterEnd[SAP][station][1] = ps.settings.antennaFields[station].delay.y + metaData.stationBeam.delayAfterEnd;
-      phase0s[station][1]             = ps.settings.antennaFields[station].phase0.y;
-
-      if (ps.settings.beamFormer.enabled)
-      {
-        // we already compensated for the delay for the first beam
-        double compensatedDelay = (metaData.stationBeam.delayAfterEnd +
-                                   metaData.stationBeam.delayAtBegin) * 0.5;
-
-        size_t nrTABs = ps.settings.beamFormer.SAPs[SAP].nrCoherent;
-
-        ASSERTSTR(metaData.TABs.size() == nrTABs, "Need delays for " << nrTABs << " coherent TABs, but got delays for " << metaData.TABs.size() << " TABs");
-
-        // Note: We only get delays for the coherent TABs
-        for (unsigned tab = 0; tab < nrTABs; tab++)
-        {
-          // subtract the delay that was already compensated for
-          tabDelays[SAP][station][tab] = (metaData.TABs[tab].delayAtBegin +
-                                          metaData.TABs[tab].delayAfterEnd) * 0.5 -
-                                         compensatedDelay;
-        }
-
-        // Zero padding entries that exist because we always produce maxNrCoherentTABsPerSAP for any subband
-        for (unsigned tab = nrTABs; tab < ps.settings.beamFormer.maxNrCoherentTABsPerSAP(); tab++)
-          tabDelays[SAP][station][tab] = 0.0;
-      }
-    }
-
-
-    // flag the input samples.
-    void SubbandProcInputData::flagInputSamples(unsigned station,
-                                              const SubbandMetaData& metaData)
-    {
-
-      // Get the size of a sample in bytes.
-      size_t sizeof_sample = sizeof *inputSamples.origin();
-
-      // Calculate the number elements to skip when striding over the second
-      // dimension of inputSamples.
-      size_t stride = inputSamples[station][0].num_elements();
-
-      // Zero the bytes in the input data for the flagged ranges.
-      for(SparseSet<unsigned>::const_iterator it = metaData.flags.getRanges().begin();
-        it != metaData.flags.getRanges().end(); ++it)
-      {
-        void *offset = inputSamples[station][it->begin].origin();
-        size_t size = stride * (it->end - it->begin) * sizeof_sample;
-        memset(offset, 0, size);
-      }
-    }
-
-    void SubbandProc::Flagger::convertFlagsToChannelFlags(Parset const &parset,
+    void SubbandProc::Flagger::convertFlagsToChannelFlags(Parset const &ps,
       MultiDimArray<LOFAR::SparseSet<unsigned>, 1>const &inputFlags,
       MultiDimArray<SparseSet<unsigned>, 2>& flagsPerChannel)
     {
-      unsigned numberOfChannels = parset.nrChannelsPerSubband();
+      unsigned numberOfChannels = ps.nrChannelsPerSubband();
       unsigned log2NrChannels = log2(numberOfChannels);
       //Convert the flags per sample to flags per channel
-      for (unsigned station = 0; station < parset.nrStations(); station ++) 
+      for (unsigned station = 0; station < ps.nrStations(); station ++) 
       {
         // get the flag ranges
         const SparseSet<unsigned>::Ranges &ranges = inputFlags[station].getRanges();
@@ -160,7 +174,7 @@ namespace LOFAR
           {
             // do nothing, just take the ranges as supplied
             begin_idx = it->begin; 
-            end_idx = std::min(parset.nrSamplesPerChannel(), it->end );
+            end_idx = std::min(ps.nrSamplesPerChannel(), it->end );
           }
           else
           {
@@ -181,7 +195,7 @@ namespace LOFAR
             // The min is needed, because flagging the last input
             // samples would cause NR_TAPS subsequent samples to
             // be flagged, which aren't necessarily part of this block.
-            end_idx = std::min(parset.nrSamplesPerChannel() + 1, 
+            end_idx = std::min(ps.nrSamplesPerChannel() + 1, 
               ((it->end - 1) >> log2NrChannels) + 1);
           }
 
@@ -192,6 +206,84 @@ namespace LOFAR
         }
       }
     }
+
+
+    void SubbandProc::processSubband( SubbandProcInputData &input,
+      SubbandProcOutputData &output)
+    {
+      //*******************************************************************
+      // calculate some variables depending on the input subband
+      size_t block = input.blockID.block;
+      unsigned SAP = ps.settings.subbands[input.blockID.globalSubbandIdx].SAP;
+
+      //****************************************
+      // Send inputs to GPU
+      queue.writeBuffer(*devA, input.inputSamples, inputCounter, true);
+
+      // Some additional buffers
+      // Only upload delays if they changed w.r.t. the previous subband.
+      if ((int)SAP != prevSAP || (ssize_t)block != prevBlock) {
+        if (correlatorStep.get()) {
+          correlatorStep->writeInput(input);
+        }
+
+        if (preprocessingStep.get()) {
+          preprocessingStep->writeInput(input);
+        }
+
+        if (coherentStep.get()) {
+          coherentStep->writeInput(input);
+        }
+
+        prevSAP = SAP;
+        prevBlock = block;
+      }
+
+      // ************************************************
+      // Start the GPU processing
+
+      if (correlatorStep.get()) {
+        correlatorStep->process(input);
+        correlatorStep->readOutput(output);
+      }
+
+      if (preprocessingStep.get()) {
+        preprocessingStep->process(input);
+      }
+
+      if (coherentStep.get())
+      {
+        coherentStep->process(input);
+        coherentStep->readOutput(output);
+      }
+
+      if (incoherentStep.get())
+      {
+        incoherentStep->process(input);
+        incoherentStep->readOutput(output);
+      }
+
+      // ************************************************
+      // Do CPU computations while the GPU is working
+
+      if (correlatorStep.get()) {
+        correlatorStep->processCPU(input, output);
+      }
+
+      // Synchronise to assure that all the work in the data is done
+      queue.synchronize();
+    }
+
+
+    void SubbandProc::postprocessSubband(SubbandProcOutputData &output)
+    {
+      if (correlatorStep.get()) {
+        output.emit_correlatedData = correlatorStep->postprocessSubband(output);
+      } else {
+        output.emit_correlatedData = false;
+      }
+    }
   }
 }
 
+
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProc.h b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProc.h
index 5597ddbada71d708d2ee807e73919db91a08af7d..dd9c81129972c7d64eda642c506175db711e3a0c 100644
--- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProc.h
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProc.h
@@ -23,7 +23,12 @@
 
 #include <string>
 #include <map>
+#include <complex>
+#include <memory>
 
+#include <boost/shared_ptr.hpp>
+#include <Common/LofarLogger.h>
+#include <CoInterface/CorrelatedData.h>
 #include <CoInterface/Parset.h>
 #include <CoInterface/Pool.h>
 #include <CoInterface/SmartPtr.h>
@@ -34,6 +39,13 @@
 #include <GPUProc/gpu_wrapper.h>
 #include <GPUProc/MultiDimArrayHostBuffer.h>
 
+#include "SubbandProcInputData.h"
+#include "SubbandProcOutputData.h"
+#include "CorrelatorStep.h"
+#include "BeamFormerPreprocessingStep.h"
+#include "BeamFormerCoherentStep.h"
+#include "BeamFormerIncoherentStep.h"
+
 // \file
 // TODO: Update documentation
 
@@ -41,101 +53,8 @@ namespace LOFAR
 {
   namespace Cobalt
   {
-    //   Collect all inputData for the correlatorSubbandProc item:
-    //    \arg inputsamples
-    //    \arg delays
-    //    \arg phaseOffSets
-    //    \arg flags
-    // It also contains a read function parsing all this data from an input stream.   
-    class SubbandProcInputData
-    {
-    public:
-      // Which block this InputData represents
-      struct BlockID blockID;
-
-      // Delays are computed and applied in double precision,
-      // otherwise the to be computed phase shifts become too inprecise.
-
-      //!< Whole sample delays at the start of the workitem      
-      MultiDimArrayHostBuffer<double, 3> delaysAtBegin;
-
-      //!< Whole sample delays at the end of the workitem      
-      MultiDimArrayHostBuffer<double, 3> delaysAfterEnd;
-
-      //!< Remainder of delays
-      MultiDimArrayHostBuffer<double, 2> phase0s;
-
-      //!< Delays for TABs (aka pencil beams) after station beam correction
-      MultiDimArrayHostBuffer<double, 3> tabDelays;
-
-      // inputdata with flagged data set to zero
-      MultiDimArrayHostBuffer<char, 4> inputSamples;
-
-      // The input flags
-      MultiDimArray<SparseSet<unsigned>, 1> inputFlags;
-
-      // CPU-side holder for the Meta Data
-      std::vector<SubbandMetaData> metaData; // [station]
-
-      // Create the inputData object we need shared host/device memory on the
-      // supplied devicequeue
-      SubbandProcInputData(size_t n_beams, size_t n_stations, 
-                           size_t n_polarizations, size_t n_coherent_tabs, 
-                           size_t n_samples, size_t bytes_per_complex_sample,
-                           gpu::Context &context,
-                           unsigned int hostBufferFlags = 0)
-        :
-        delaysAtBegin(boost::extents[n_beams][n_stations][n_polarizations],
-                       context, hostBufferFlags),
-        delaysAfterEnd(boost::extents[n_beams][n_stations][n_polarizations],
-                       context, hostBufferFlags),
-        phase0s(boost::extents[n_stations][n_polarizations],
-                       context, hostBufferFlags),
-        tabDelays(boost::extents[n_beams][n_stations][n_coherent_tabs],
-                       context, hostBufferFlags),
-        inputSamples(boost::extents[n_stations][n_samples][n_polarizations][bytes_per_complex_sample],
-                       context, hostBufferFlags), // TODO: The size of the buffer is NOT validated
-        inputFlags(boost::extents[n_stations]),
-        metaData(n_stations)
-      {
-      }
-
-      // Short-hand constructor pulling all relevant values from a Parset
-      SubbandProcInputData(const Parset &ps,
-                           gpu::Context &context,
-                           unsigned int hostBufferFlags = 0)
-        :
-        delaysAtBegin(boost::extents[ps.settings.SAPs.size()][ps.settings.antennaFields.size()][NR_POLARIZATIONS],
-                       context, hostBufferFlags),
-        delaysAfterEnd(boost::extents[ps.settings.SAPs.size()][ps.settings.antennaFields.size()][NR_POLARIZATIONS],
-                       context, hostBufferFlags),
-        phase0s(boost::extents[ps.settings.antennaFields.size()][NR_POLARIZATIONS],
-                       context, hostBufferFlags),
-        tabDelays(boost::extents[ps.settings.SAPs.size()][ps.settings.antennaFields.size()][ps.settings.beamFormer.maxNrCoherentTABsPerSAP()],
-                       context, hostBufferFlags),
-        inputSamples(boost::extents[ps.settings.antennaFields.size()][ps.settings.blockSize][NR_POLARIZATIONS][ps.nrBytesPerComplexSample()],
-                       context, hostBufferFlags), // TODO: The size of the buffer is NOT validated
-        inputFlags(boost::extents[ps.settings.antennaFields.size()]),
-        metaData(ps.settings.antennaFields.size())
-      {
-      }
-
-      // process the given meta data 
-      void applyMetaData(const Parset &ps, unsigned station,
-                         unsigned SAP, const SubbandMetaData &metaData);
-
-      // set all flagged inputSamples to zero.
-      void flagInputSamples(unsigned station, const SubbandMetaData& metaData);
-    };
-
-    class SubbandProcOutputData
-    {
-    public:
-      struct BlockID blockID;
-
-      // Need a virtual destructor to make type polymorphic
-      virtual ~SubbandProcOutputData() {}
-    };
+    //# Forward declarations
+    struct BeamFormerFactories;
 
     /*
      * The SubbandProc does the following transformation:
@@ -179,15 +98,15 @@ namespace LOFAR
     class SubbandProc {
     public:
       SubbandProc(const Parset &ps, gpu::Context &context,
+                  BeamFormerFactories &factories,
                   size_t nrSubbandsPerSubbandProc = 1);
-      virtual ~SubbandProc();
 
       class Flagger
       {
       public:
         // 1.1 Convert the flags per station to channel flags, change time scale
         // if nchannel > 1
-        static void convertFlagsToChannelFlags(Parset const &parset,
+        static void convertFlagsToChannelFlags(Parset const &ps,
           MultiDimArray<SparseSet<unsigned>, 1> const &inputFlags,
           MultiDimArray<SparseSet<unsigned>, 2> &flagsPerChannel);
       };
@@ -204,10 +123,10 @@ namespace LOFAR
       Pool<SubbandProcOutputData> outputPool;
 
       // Correlate the data found in the input data buffer
-      virtual void processSubband(SubbandProcInputData &input, SubbandProcOutputData &output) = 0;
+      void processSubband(SubbandProcInputData &input, SubbandProcOutputData &output);
 
       // Do post processing on the CPU.
-      virtual void postprocessSubband(SubbandProcOutputData &output) = 0;
+      void postprocessSubband(SubbandProcOutputData &output);
 
     protected:
       const Parset &ps;
@@ -215,6 +134,27 @@ namespace LOFAR
 
       gpu::Stream queue;
 
+      // The previously processed SAP/block, or -1 if nothing has been
+      // processed yet. Used in order to determine if new delays have
+      // to be uploaded.
+      ssize_t prevBlock;
+      signed int prevSAP;
+
+      // @{
+      // Device memory buffers. These buffers are used interleaved. For details,
+      // please refer to the document bf-pipeline.txt in the directory
+      // GPUProc/doc.
+      boost::shared_ptr<gpu::DeviceMemory> devA;
+      boost::shared_ptr<gpu::DeviceMemory> devB;
+      // @}
+
+      PerformanceCounter inputCounter;
+
+      std::auto_ptr<CorrelatorStep> correlatorStep;
+      std::auto_ptr<BeamFormerPreprocessingStep> preprocessingStep;
+      std::auto_ptr<BeamFormerCoherentStep> coherentStep;
+      std::auto_ptr<BeamFormerIncoherentStep> incoherentStep;
+
       // Returns the number of output elements to create to get a smooth
       // running pipeline.
       size_t nrOutputElements() const;
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcInputData.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcInputData.cc
new file mode 100644
index 0000000000000000000000000000000000000000..fa4e3c752050e3c59c80392b68b914ce385a1f03
--- /dev/null
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcInputData.cc
@@ -0,0 +1,144 @@
+//# SubbandProcInputData.cc
+//# Copyright (C) 2012-2013  ASTRON (Netherlands Institute for Radio Astronomy)
+//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+//#
+//# This file is part of the LOFAR software suite.
+//# The LOFAR software suite is free software: you can redistribute it and/or
+//# modify it under the terms of the GNU General Public License as published
+//# by the Free Software Foundation, either version 3 of the License, or
+//# (at your option) any later version.
+//#
+//# The LOFAR software suite is distributed in the hope that it will be useful,
+//# but WITHOUT ANY WARRANTY; without even the implied warranty of
+//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//# GNU General Public License for more details.
+//#
+//# You should have received a copy of the GNU General Public License along
+//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+//#
+//# $Id$
+
+#include <lofar_config.h>
+
+#include "SubbandProcInputData.h"
+
+#include <CoInterface/Config.h>
+
+namespace LOFAR
+{
+  namespace Cobalt
+  {
+    SubbandProcInputData::SubbandProcInputData(size_t n_beams, size_t n_stations, 
+                         size_t n_polarizations, size_t n_coherent_tabs, 
+                         size_t n_samples, size_t bytes_per_complex_sample,
+                         gpu::Context &context,
+                         unsigned int hostBufferFlags)
+      :
+      delaysAtBegin(boost::extents[n_beams][n_stations][n_polarizations],
+                     context, hostBufferFlags),
+      delaysAfterEnd(boost::extents[n_beams][n_stations][n_polarizations],
+                     context, hostBufferFlags),
+      phase0s(boost::extents[n_stations][n_polarizations],
+                     context, hostBufferFlags),
+      tabDelays(boost::extents[n_beams][n_stations][n_coherent_tabs],
+                     context, hostBufferFlags),
+      inputSamples(boost::extents[n_stations][n_samples][n_polarizations][bytes_per_complex_sample],
+                     context, hostBufferFlags), // TODO: The size of the buffer is NOT validated
+      inputFlags(boost::extents[n_stations]),
+      metaData(n_stations)
+    {
+    }
+
+    // Short-hand constructor pulling all relevant values from a Parset
+    SubbandProcInputData::SubbandProcInputData(const Parset &ps,
+                         gpu::Context &context,
+                         unsigned int hostBufferFlags)
+      :
+      delaysAtBegin(boost::extents[ps.settings.SAPs.size()][ps.settings.antennaFields.size()][NR_POLARIZATIONS],
+                     context, hostBufferFlags),
+      delaysAfterEnd(boost::extents[ps.settings.SAPs.size()][ps.settings.antennaFields.size()][NR_POLARIZATIONS],
+                     context, hostBufferFlags),
+      phase0s(boost::extents[ps.settings.antennaFields.size()][NR_POLARIZATIONS],
+                     context, hostBufferFlags),
+      tabDelays(boost::extents[ps.settings.SAPs.size()][ps.settings.antennaFields.size()][ps.settings.beamFormer.maxNrCoherentTABsPerSAP()],
+                     context, hostBufferFlags),
+      inputSamples(boost::extents[ps.settings.antennaFields.size()][ps.settings.blockSize][NR_POLARIZATIONS][ps.nrBytesPerComplexSample()],
+                     context, hostBufferFlags), // TODO: The size of the buffer is NOT validated
+      inputFlags(boost::extents[ps.settings.antennaFields.size()]),
+      metaData(ps.settings.antennaFields.size())
+    {
+    }
+
+
+    void SubbandProcInputData::applyMetaData(const Parset &ps,
+                                           unsigned station, unsigned SAP,
+                                           const SubbandMetaData &metaData)
+    {
+      // extract and apply the flags
+      inputFlags[station] = metaData.flags;
+
+      flagInputSamples(station, metaData);
+
+      // extract and assign the delays for the station beams
+
+      // X polarisation
+      delaysAtBegin[SAP][station][0]  = ps.settings.antennaFields[station].delay.x + metaData.stationBeam.delayAtBegin;
+      delaysAfterEnd[SAP][station][0] = ps.settings.antennaFields[station].delay.x + metaData.stationBeam.delayAfterEnd;
+      phase0s[station][0]             = ps.settings.antennaFields[station].phase0.x;
+
+      // Y polarisation
+      delaysAtBegin[SAP][station][1]  = ps.settings.antennaFields[station].delay.y + metaData.stationBeam.delayAtBegin;
+      delaysAfterEnd[SAP][station][1] = ps.settings.antennaFields[station].delay.y + metaData.stationBeam.delayAfterEnd;
+      phase0s[station][1]             = ps.settings.antennaFields[station].phase0.y;
+
+      if (ps.settings.beamFormer.enabled)
+      {
+        // we already compensated for the delay for the first beam
+        double compensatedDelay = (metaData.stationBeam.delayAfterEnd +
+                                   metaData.stationBeam.delayAtBegin) * 0.5;
+
+        size_t nrTABs = ps.settings.beamFormer.SAPs[SAP].nrCoherent;
+
+        ASSERTSTR(metaData.TABs.size() == nrTABs, "Need delays for " << nrTABs << " coherent TABs, but got delays for " << metaData.TABs.size() << " TABs");
+
+        // Note: We only get delays for the coherent TABs
+        for (unsigned tab = 0; tab < nrTABs; tab++)
+        {
+          // subtract the delay that was already compensated for
+          tabDelays[SAP][station][tab] = (metaData.TABs[tab].delayAtBegin +
+                                          metaData.TABs[tab].delayAfterEnd) * 0.5 -
+                                         compensatedDelay;
+        }
+
+        // Zero padding entries that exist because we always produce maxNrCoherentTABsPerSAP for any subband
+        for (unsigned tab = nrTABs; tab < ps.settings.beamFormer.maxNrCoherentTABsPerSAP(); tab++)
+          tabDelays[SAP][station][tab] = 0.0;
+      }
+    }
+
+
+    // flag the input samples.
+    void SubbandProcInputData::flagInputSamples(unsigned station,
+                                              const SubbandMetaData& metaData)
+    {
+
+      // Get the size of a sample in bytes.
+      size_t sizeof_sample = sizeof *inputSamples.origin();
+
+      // Calculate the number elements to skip when striding over the second
+      // dimension of inputSamples.
+      size_t stride = inputSamples[station][0].num_elements();
+
+      // Zero the bytes in the input data for the flagged ranges.
+      for(SparseSet<unsigned>::const_iterator it = metaData.flags.getRanges().begin();
+        it != metaData.flags.getRanges().end(); ++it)
+      {
+        void *offset = inputSamples[station][it->begin].origin();
+        size_t size = stride * (it->end - it->begin) * sizeof_sample;
+        memset(offset, 0, size);
+      }
+    }
+  }
+}
+
+
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcInputData.h b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcInputData.h
new file mode 100644
index 0000000000000000000000000000000000000000..a002bf5b75111b862dfdba844e2e234454fb0270
--- /dev/null
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcInputData.h
@@ -0,0 +1,99 @@
+//# SubbandProcInputData.h
+//# Copyright (C) 2012-2013  ASTRON (Netherlands Institute for Radio Astronomy)
+//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+//#
+//# This file is part of the LOFAR software suite.
+//# The LOFAR software suite is free software: you can redistribute it and/or
+//# modify it under the terms of the GNU General Public License as published
+//# by the Free Software Foundation, either version 3 of the License, or
+//# (at your option) any later version.
+//#
+//# The LOFAR software suite is distributed in the hope that it will be useful,
+//# but WITHOUT ANY WARRANTY; without even the implied warranty of
+//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//# GNU General Public License for more details.
+//#
+//# You should have received a copy of the GNU General Public License along
+//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+//#
+//# $Id$
+
+#ifndef LOFAR_GPUPROC_CUDA_SUBBAND_PROC_INPUT_DATA_H
+#define LOFAR_GPUPROC_CUDA_SUBBAND_PROC_INPUT_DATA_H
+
+#include <vector>
+
+#include <CoInterface/BlockID.h>
+#include <CoInterface/Parset.h>
+#include <CoInterface/SubbandMetaData.h>
+#include <GPUProc/MultiDimArrayHostBuffer.h>
+#include <GPUProc/gpu_wrapper.h>
+
+// \file
+// TODO: Update documentation
+
+namespace LOFAR
+{
+  namespace Cobalt
+  {
+    //   Collect all inputData for the correlatorSubbandProc item:
+    //    \arg inputsamples
+    //    \arg delays
+    //    \arg phaseOffSets
+    //    \arg flags
+    // It also contains a read function parsing all this data from an input stream.   
+    class SubbandProcInputData
+    {
+    public:
+      // Which block this InputData represents
+      struct BlockID blockID;
+
+      // Delays are computed and applied in double precision,
+      // otherwise the to be computed phase shifts become too inprecise.
+
+      //!< Whole sample delays at the start of the workitem      
+      MultiDimArrayHostBuffer<double, 3> delaysAtBegin;
+
+      //!< Whole sample delays at the end of the workitem      
+      MultiDimArrayHostBuffer<double, 3> delaysAfterEnd;
+
+      //!< Remainder of delays
+      MultiDimArrayHostBuffer<double, 2> phase0s;
+
+      //!< Delays for TABs (aka pencil beams) after station beam correction
+      MultiDimArrayHostBuffer<double, 3> tabDelays;
+
+      // inputdata with flagged data set to zero
+      MultiDimArrayHostBuffer<char, 4> inputSamples;
+
+      // The input flags
+      MultiDimArray<SparseSet<unsigned>, 1> inputFlags;
+
+      // CPU-side holder for the Meta Data
+      std::vector<SubbandMetaData> metaData; // [station]
+
+      // Create the inputData object we need shared host/device memory on the
+      // supplied devicequeue
+      SubbandProcInputData(size_t n_beams, size_t n_stations, 
+                           size_t n_polarizations, size_t n_coherent_tabs, 
+                           size_t n_samples, size_t bytes_per_complex_sample,
+                           gpu::Context &context,
+                           unsigned int hostBufferFlags = 0);
+
+      // Short-hand constructor pulling all relevant values from a Parset
+      SubbandProcInputData(const Parset &ps,
+                           gpu::Context &context,
+                           unsigned int hostBufferFlags = 0);
+
+      // process the given meta data 
+      void applyMetaData(const Parset &ps, unsigned station,
+                         unsigned SAP, const SubbandMetaData &metaData);
+
+      // set all flagged inputSamples to zero.
+      void flagInputSamples(unsigned station, const SubbandMetaData& metaData);
+    };
+  }
+}
+
+#endif
+
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcOutputData.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcOutputData.cc
new file mode 100644
index 0000000000000000000000000000000000000000..228434f83180fce035cfaed2c67bc9d2ab8a754d
--- /dev/null
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcOutputData.cc
@@ -0,0 +1,75 @@
+//# SubbandProcOutputData.cc
+//# Copyright (C) 2012-2013  ASTRON (Netherlands Institute for Radio Astronomy)
+//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+//#
+//# This file is part of the LOFAR software suite.
+//# The LOFAR software suite is free software: you can redistribute it and/or
+//# modify it under the terms of the GNU General Public License as published
+//# by the Free Software Foundation, either version 3 of the License, or
+//# (at your option) any later version.
+//#
+//# The LOFAR software suite is distributed in the hope that it will be useful,
+//# but WITHOUT ANY WARRANTY; without even the implied warranty of
+//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//# GNU General Public License for more details.
+//#
+//# You should have received a copy of the GNU General Public License along
+//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+//#
+//# $Id$
+
+#include <lofar_config.h>
+
+#include "SubbandProcOutputData.h"
+
+namespace LOFAR
+{
+  namespace Cobalt
+  {
+    SubbandProcOutputData::SubbandProcOutputData(
+        const Parset &ps,
+        gpu::Context &context) :
+      coherentData(ps.settings.beamFormer.anyCoherentTABs()
+        ? boost::extents[ps.settings.beamFormer.maxNrCoherentTABsPerSAP()]
+                        [ps.settings.beamFormer.coherentSettings.nrStokes]
+                        [ps.settings.beamFormer.coherentSettings.nrSamples]
+                        [ps.settings.beamFormer.coherentSettings.nrChannels]
+        : boost::extents[0][0][0][0],
+        context, 0),
+
+      incoherentData(ps.settings.beamFormer.anyIncoherentTABs()
+        ? boost::extents[ps.settings.beamFormer.maxNrIncoherentTABsPerSAP()]
+                        [ps.settings.beamFormer.incoherentSettings.nrStokes]
+                        [ps.settings.beamFormer.incoherentSettings.nrSamples]
+                        [ps.settings.beamFormer.incoherentSettings.nrChannels]
+        : boost::extents[0][0][0][0],
+        context, 0),
+
+      correlatedData(ps.settings.correlator.enabled ? ps.settings.antennaFields.size()           : 0,
+                     ps.settings.correlator.enabled ? ps.settings.correlator.nrChannels          : 0,
+                     ps.settings.correlator.enabled ? ps.settings.correlator.nrSamplesPerChannel : 0,
+                     context),
+      emit_correlatedData(false)
+    {
+    }
+
+
+    SubbandProcOutputData::CorrelatedData::CorrelatedData(
+      unsigned nrStations, unsigned nrChannels,
+      unsigned maxNrValidSamples, gpu::Context &context)
+      :
+      MultiDimArrayHostBuffer<fcomplex, 4>(
+        boost::extents
+        [nrStations * (nrStations + 1) / 2]
+        [nrChannels][NR_POLARIZATIONS]
+        [NR_POLARIZATIONS], 
+        context, 0),
+      LOFAR::Cobalt::CorrelatedData(nrStations, nrChannels, 
+                     maxNrValidSamples, this->origin(),
+                     this->num_elements(), heapAllocator, 1)
+    {
+    }
+  }
+}
+
+
diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcOutputData.h b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcOutputData.h
new file mode 100644
index 0000000000000000000000000000000000000000..58743cee0c3ceee310a5560a084478b930eac6fc
--- /dev/null
+++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcOutputData.h
@@ -0,0 +1,65 @@
+//# SubbandProcOutputData.h
+//# Copyright (C) 2012-2013  ASTRON (Netherlands Institute for Radio Astronomy)
+//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands
+//#
+//# This file is part of the LOFAR software suite.
+//# The LOFAR software suite is free software: you can redistribute it and/or
+//# modify it under the terms of the GNU General Public License as published
+//# by the Free Software Foundation, either version 3 of the License, or
+//# (at your option) any later version.
+//#
+//# The LOFAR software suite is distributed in the hope that it will be useful,
+//# but WITHOUT ANY WARRANTY; without even the implied warranty of
+//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//# GNU General Public License for more details.
+//#
+//# You should have received a copy of the GNU General Public License along
+//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>.
+//#
+//# $Id$
+
+#ifndef LOFAR_GPUPROC_CUDA_SUBBAND_PROC_OUTPUT_DATA_H
+#define LOFAR_GPUPROC_CUDA_SUBBAND_PROC_OUTPUT_DATA_H
+
+#include <CoInterface/BlockID.h>
+#include <CoInterface/Parset.h>
+#include <CoInterface/CorrelatedData.h>
+#include <GPUProc/gpu_wrapper.h>
+#include <GPUProc/MultiDimArrayHostBuffer.h>
+
+// \file
+// TODO: Update documentation
+
+namespace LOFAR
+{
+  namespace Cobalt
+  {
+    // Our output data type
+    class SubbandProcOutputData
+    {
+    public:
+      struct BlockID blockID;
+
+      MultiDimArrayHostBuffer<float, 4> coherentData;
+      MultiDimArrayHostBuffer<float, 4> incoherentData;
+
+      struct CorrelatedData:
+        public MultiDimArrayHostBuffer<fcomplex,4>,
+        public LOFAR::Cobalt::CorrelatedData
+      {
+        CorrelatedData(unsigned nrStations, 
+                       unsigned nrChannels,
+                       unsigned maxNrValidSamples,
+                       gpu::Context &context);
+      };
+
+      CorrelatedData correlatedData;
+      bool emit_correlatedData;
+
+      SubbandProcOutputData(const Parset &ps, gpu::Context &context);
+    };
+  }
+}
+
+#endif
+
diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tBeamFormerSubbandProcProcessSb.cc b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tBeamFormerSubbandProcProcessSb.cc
index 6ea4fa512575b4331601ab60ca2fd7566202c6a3..920fdcff6c5902387a0986a3886af7a5decfaf1e 100644
--- a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tBeamFormerSubbandProcProcessSb.cc
+++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tBeamFormerSubbandProcProcessSb.cc
@@ -29,7 +29,7 @@
 #include <CoInterface/Parset.h>
 #include <CoInterface/fpequals.h>
 #include <GPUProc/gpu_utils.h>
-#include <GPUProc/SubbandProcs/BeamFormerSubbandProc.h>
+#include <GPUProc/SubbandProcs/SubbandProc.h>
 #include <GPUProc/SubbandProcs/BeamFormerFactories.h>
 
 using namespace std;
@@ -113,7 +113,7 @@ int main() {
   // transform the data order).
 
   BeamFormerFactories factories(ps);
-  BeamFormerSubbandProc bwq(ps, ctx, factories);
+  SubbandProc bwq(ps, ctx, factories);
 
   SubbandProcInputData in(ps, ctx);
 
@@ -162,7 +162,7 @@ int main() {
   for (size_t i = 0; i < in.tabDelays.num_elements(); i++)
     in.tabDelays.get<float>()[i] = 0.0f;
 
-  BeamFormedData out(ps, ctx);
+  SubbandProcOutputData out(ps, ctx);
 
   for (size_t i = 0; i < out.coherentData.num_elements(); i++)
     out.coherentData.get<float>()[i] = 42.0f;
diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCoherentStokesBeamFormerSubbandProcProcessSb.cc b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCoherentStokesBeamFormerSubbandProcProcessSb.cc
index 5b1551e6ede0ba70a78aeffdeee50da0aa9b59a4..e7934ad830ec2cb4e04854758e1346389b980605 100644
--- a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCoherentStokesBeamFormerSubbandProcProcessSb.cc
+++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCoherentStokesBeamFormerSubbandProcProcessSb.cc
@@ -29,7 +29,7 @@
 #include <CoInterface/Parset.h>
 #include <CoInterface/fpequals.h>
 #include <GPUProc/gpu_utils.h>
-#include <GPUProc/SubbandProcs/BeamFormerSubbandProc.h>
+#include <GPUProc/SubbandProcs/SubbandProc.h>
 #include <GPUProc/SubbandProcs/BeamFormerFactories.h>
 
 #include "../Kernels/KernelTestHelpers.h"
@@ -142,7 +142,7 @@ int main(/*int argc, char *argv[]*/) {
   // transform the data order).
 
   BeamFormerFactories factories(ps);
-  BeamFormerSubbandProc bwq(ps, ctx, factories);
+  SubbandProc bwq(ps, ctx, factories);
 
   SubbandProcInputData in(
     nrBeams, nrStations, nrPolarisations, maxNrTABsPerSAP, 
@@ -193,7 +193,7 @@ int main(/*int argc, char *argv[]*/) {
   for (size_t i = 0; i < in.tabDelays.num_elements(); i++)
     in.tabDelays.get<float>()[i] = 0.0f;
 
-  BeamFormedData out(ps, ctx);
+  SubbandProcOutputData out(ps, ctx);
 
   for (size_t i = 0; i < out.coherentData.num_elements(); i++)
     out.coherentData.get<float>()[i] = 42.0f;
diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCorrelatorSubbandProcProcessSb.cc b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCorrelatorSubbandProcProcessSb.cc
index 3961c18d83b6fc4e673567508f72644d900ac442..bc9f88f9f003e16f04b4bf8cb3afe0837716a69d 100644
--- a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCorrelatorSubbandProcProcessSb.cc
+++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCorrelatorSubbandProcProcessSb.cc
@@ -26,7 +26,8 @@
 #include <CoInterface/fpequals.h>
 #include <CoInterface/Parset.h>
 #include <GPUProc/gpu_utils.h>
-#include <GPUProc/SubbandProcs/BeamFormerSubbandProc.h>
+#include <GPUProc/SubbandProcs/BeamFormerFactories.h>
+#include <GPUProc/SubbandProcs/SubbandProc.h>
 
 using namespace std;
 using namespace LOFAR;
@@ -83,13 +84,13 @@ int main() {
   // transform the data order).
 
   BeamFormerFactories factories(ps, 1);
-  BeamFormerSubbandProc cwq(ps, ctx, factories);
+  SubbandProc cwq(ps, ctx, factories);
 
   SubbandProcInputData in(
     nrBeams, nrStations, nrPolarisations, maxNrTABsPerSAP,
     nrSamplesPerSubband, nrBytesPerComplexSample, ctx);
 
-  BeamFormedData out(ps, ctx);
+  SubbandProcOutputData out(ps, ctx);
 
   LOG_INFO_STR(
     "\nInput info:" <<
diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tFlysEyeBeamFormerSubbandProcProcessSb.cc b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tFlysEyeBeamFormerSubbandProcProcessSb.cc
index 5ed0f53c618952569dd439218dff38f854cb99dd..7fd3b2d02f1e34e7eb06dff7863cec70d7e50a76 100644
--- a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tFlysEyeBeamFormerSubbandProcProcessSb.cc
+++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tFlysEyeBeamFormerSubbandProcProcessSb.cc
@@ -30,7 +30,7 @@
 #include <CoInterface/Parset.h>
 #include <CoInterface/fpequals.h>
 #include <GPUProc/gpu_utils.h>
-#include <GPUProc/SubbandProcs/BeamFormerSubbandProc.h>
+#include <GPUProc/SubbandProcs/SubbandProc.h>
 #include <GPUProc/SubbandProcs/BeamFormerFactories.h>
 
 using namespace std;
@@ -130,7 +130,7 @@ int main() {
   // transform the data order).
 
   BeamFormerFactories factories(ps);
-  BeamFormerSubbandProc bwq(ps, ctx, factories);
+  SubbandProc bwq(ps, ctx, factories);
 
   SubbandProcInputData in(
     nrBeams, nrStations, nrPolarisations, nrTABs, 
@@ -182,7 +182,7 @@ int main() {
     in.tabDelays.get<float>()[i] = 0.0f;
 
   // Allocate buffer for output signal
-  BeamFormedData out(ps, ctx);
+  SubbandProcOutputData out(ps, ctx);
 
   for (size_t i = 0; i < out.coherentData.num_elements(); i++)
     out.coherentData.get<float>()[i] = 42.0f;