diff --git a/.gitignore b/.gitignore
index 10065980b37a44002fc5753f0e34b750b13ac7ec..66c4575c358b538bf13ebd9b734d788258a9b0fa 100644
--- a/.gitignore
+++ b/.gitignore
@@ -34,9 +34,6 @@ Appl/CEP/CS1/CS1_IONProc/lofarconf
 Appl/CEP/CS1/CS1_Imager/Makefile.common
 Appl/CEP/CS1/CS1_Imager/autoconf_share
 Appl/CEP/CS1/CS1_Imager/lofarconf
-Appl/CEP/CS1/CS1_InputSection/Makefile.common
-Appl/CEP/CS1/CS1_InputSection/autoconf_share
-Appl/CEP/CS1/CS1_InputSection/lofarconf
 Appl/CEP/CS1/CS1_Interface/Makefile.common
 Appl/CEP/CS1/CS1_Interface/autoconf_share
 Appl/CEP/CS1/CS1_Interface/lofarconf
diff --git a/Appl/CEP/CS1/CS1_BGLProc/configure.in b/Appl/CEP/CS1/CS1_BGLProc/configure.in
index 1d25c376a4fb8cbf6dc0e23cf89b33f9fadbfbf9..4bc44d4bac392fae2a6abbf7ad45c3ce307c340b 100644
--- a/Appl/CEP/CS1/CS1_BGLProc/configure.in
+++ b/Appl/CEP/CS1/CS1_BGLProc/configure.in
@@ -6,6 +6,7 @@ dnl AC_CONFIG_AUX_DIR(config)
 dnl AM_CONFIG_HEADER(config/config.h)
 AM_CONFIG_HEADER(config.h)
 AM_INIT_AUTOMAKE(CS1_BGLProc, 1.0, no-define)
+AM_PROG_AS(gcc)
 
 dnl Initialize for LOFAR (may set compilers)
 lofar_INIT
@@ -59,14 +60,11 @@ lofar_EXTERNAL(mass,0,mass.h)
 
 lofar_INTERNAL(LCS/Common,Common,,1,Common/LofarTypedefs.h,,)
 lofar_INTERNAL(LCS/Transport,Transport,,1,Transport/DataHolder.h,,)
-lofar_INTERNAL(LCS/ACC/APS,APS,,1,APS/ParameterSet.h,,)
-lofar_INTERNAL(LCS/ACC/PLC,PLC,,1,PLC/ACCmain.h,,)
-lofar_INTERNAL(CEP/tinyCEP,tinyCEP,,1,tinyCEP/TinyDataManager.h,,)
-lofar_INTERNAL(Appl/CEP/CS1/CS1_Interface,CS1_Interface,,1,CS1_Interface/DH_RSP.h,,)
+dnl lofar_INTERNAL(LCS/ACC/APS,APS,,1,APS/ParameterSet.h,,)
+dnl lofar_INTERNAL(LCS/ACC/PLC,PLC,,1,PLC/ACCmain.h,,)
+dnl lofar_INTERNAL(CEP/tinyCEP,tinyCEP,,1,tinyCEP/TinyDataManager.h,,)
+lofar_INTERNAL(Appl/CEP/CS1/CS1_Interface,CS1_Interface,,1,CS1_Interface/CS1_Config.h,,)
 lofar_EXTERNAL(boost,1,boost/multi_array.hpp,"")
-dnl lofar_EXTERNAL(lofar_blrts,0,lofar.h,,/cephome/romein/projects/zoid/zoid/lofar)
-dnl lofar_EXTERNAL(c,0,"",,/cephome/romein/projects/zoid/glibc-build-zoid)
-dnl lofar_EXTERNAL(zoid,0,zoid_api.h,"",/cephome/romein/projects/zoid/glibc/sysdeps/blrts/zoid)
 lofar_EXTERNAL(zoid,0,zoid_api.h,"",/cephome/romein/projects/zoid/glibc/sysdeps/blrts/zoid,-I/cephome/romein/projects/zoid/zoid/lofar,,"-L/cephome/romein/projects/zoid/glibc-build-zoid -L/cephome/romein/projects/zoid/zoid/lofar",-llofar_blrts)
 
 dnl
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/AH_BGL_Processing.cc b/Appl/CEP/CS1/CS1_BGLProc/src/AH_BGL_Processing.cc
deleted file mode 100644
index 454eef3d09cbe97604bc842024f97b9253d2226e..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_BGLProc/src/AH_BGL_Processing.cc
+++ /dev/null
@@ -1,277 +0,0 @@
-//#  AH_BGL_Processing.cc: 
-//#
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-
-//# Always #include <lofar_config.h> first!
-#include <lofar_config.h>
-
-#include <Common/lofar_iostream.h>
-
-#include <Blob/KeyValueMap.h>
-#include <CS1_BGLProc/AH_BGL_Processing.h>
-#include <CS1_BGLProc/WH_BGL_Processing.h>
-#include <CS1_BGLProc/TH_ZoidClient.h>
-#include <CS1_Interface/CS1_Config.h>
-#include <CS1_Interface/Stub_BGL.h>
-#include <Transport/BGLConnection.h>
-#include <Transport/TH_Null.h>
-
-#if defined HAVE_MPI
-#include <Transport/TH_MPI.h>
-#endif
-
-#if defined HAVE_ZOID
-extern "C" {
-#include <lofar.h>
-}
-#endif
-
-namespace LOFAR {
-namespace CS1 {
-
-
-char **AH_BGL_Processing::original_argv;
-
-
-AH_BGL_Processing::AH_BGL_Processing() 
-  : itsCS1PS(0),
-    itsWHs(0),
-    itsSubbandStub(0),
-    itsVisibilitiesStub(0)
-{
-}
-
-AH_BGL_Processing::~AH_BGL_Processing()
-{
-  undefine();
-}
-
-void AH_BGL_Processing::undefine()
-{
-  for (uint i = 0; i < itsWHs.size(); i ++) {
-    delete itsWHs[i];
-  }
-  itsWHs.clear();
-
-  delete itsCS1PS;              itsCS1PS              = 0;
-  delete itsSubbandStub;	itsSubbandStub	      = 0;
-  delete itsVisibilitiesStub;	itsVisibilitiesStub   = 0;
-}  
-
-
-#if defined HAVE_BGL
-
-unsigned AH_BGL_Processing::remapOnTree(unsigned pset, unsigned core, struct BGLPersonality &personality)
-{
-  unsigned psetXsize  = personality.getXpsetSize();
-  unsigned psetYsize  = personality.getYpsetSize();
-  unsigned psetZsize  = personality.getZpsetSize();
-
-  unsigned psetXcount = personality.getXsize() / psetXsize;
-  unsigned psetYcount = personality.getYsize() / psetYsize;
-  unsigned psetZcount = personality.getZsize() / psetZsize;
-
-  unsigned xOrigin    = pset			       % psetXcount * psetXsize;
-  unsigned yOrigin    = pset / psetXcount	       % psetYcount * psetYsize;
-  unsigned zOrigin    = pset / psetXcount / psetYcount % psetZcount * psetZsize;
-
-  unsigned psetSize   = personality.numNodesInPset();
-
-  unsigned numProcs, xOffset, yOffset, zOffset, node;
-
-  personality.coordsForPsetRank(core % psetSize, xOffset, yOffset, zOffset);
-
-  unsigned x = xOrigin + xOffset - personality.xPsetOrigin();
-  unsigned y = yOrigin + yOffset - personality.yPsetOrigin();
-  unsigned z = zOrigin + zOffset - personality.zPsetOrigin();
-  unsigned t = core / psetSize;
-
-  rts_rankForCoordinates(x, y, z, t, &node, &numProcs);
-
-#if defined HAVE_MPI
-  ASSERTSTR(node < (unsigned) TH_MPI::getNumberOfNodes(), "not enough nodes allocated (node = " << node << ", TH_MPI::getNumberOfNodes() = " << TH_MPI::getNumberOfNodes() << ")\n");
-#endif
-
-  return node;
-}
-
-#endif
-
-
-void AH_BGL_Processing::define(const KeyValueMap&) {
-
-  LOG_TRACE_FLOW_STR("Start of AH_BGL_Processing::define()");
-  
-  itsCS1PS = new CS1_Parset(&itsParamSet);
-  itsCS1PS->adoptFile("OLAP.parset");
-
-#if defined HAVE_ZOID
-  ASSERT(itsCS1PS->getBool("OLAP.BGLProc.useZoid"));
-#else
-  ASSERT(!itsCS1PS->getBool("OLAP.BGLProc.useZoid"));
-#endif
-  
-  unsigned nrSubBands	     = itsCS1PS->nrSubbands();
-  vector<double> baseFreqs   = itsCS1PS->refFreqs();
-  unsigned psetsPerCell	     = itsCS1PS->getInt32("OLAP.BGLProc.psetsPerCell");
-  unsigned usedNodesPerPset  = itsCS1PS->getInt32("OLAP.BGLProc.nodesPerPset");
-  unsigned nrSubbandsPerPset = itsCS1PS->getInt32("OLAP.subbandsPerPset");
-  unsigned nrPsetsPerStorage = itsCS1PS->getInt32("OLAP.psetsPerStorage");
-
-  ASSERTSTR(nrSubBands <= baseFreqs.size(), "Not enough base frequencies in Data.RefFreqs specified");
-
-  itsSubbandStub	= new Stub_BGL(true, true, "input_BGLProc", itsCS1PS);
-  itsVisibilitiesStub	= new Stub_BGL(true, false, "BGLProc_Storage", itsCS1PS);
-
-#if defined HAVE_BGL
-  struct BGLPersonality personality;
-  int retval = rts_get_personality(&personality, sizeof personality);
-  ASSERTSTR(retval == 0, "Could not get personality");
-  unsigned physicalNodesPerPset = personality.numNodesInPset();
-
-  if (personality.isVirtualNodeMode())
-    physicalNodesPerPset *= 2;
-
-  ASSERTSTR(usedNodesPerPset <= physicalNodesPerPset, "too many nodes per pset");
-#else
-  unsigned physicalNodesPerPset = usedNodesPerPset;
-#endif
-
-  const char *str	  = getenv("FIRST_NODE");
-  unsigned   logicalNode  = str != 0 ? atoi(str) : 0;
-
-  ASSERTSTR(logicalNode % usedNodesPerPset == 0, "FIRST_NODE not a multiple of BGLProc.NodesPerPset");
-
-#if defined HAVE_MPI
-  unsigned maxPsets   = (TH_MPI::getNumberOfNodes() + physicalNodesPerPset) / physicalNodesPerPset;
-#else
-  unsigned maxPsets   = 1;
-#endif
-
-  unsigned firstPset  = logicalNode / usedNodesPerPset;
-  unsigned totalPsets = nrSubBands / nrSubbandsPerPset;
-  unsigned lastPset   = firstPset + std::min(totalPsets - firstPset, maxPsets);
-
-  ASSERTSTR(firstPset < lastPset, "not enough nodes specified (firstPset = " << firstPset << ", lastPset = " << lastPset << ", totalPsets = " << totalPsets << ", logicalNode = " << logicalNode << ", nrSubBands = " << nrSubBands << ", nrSubbandsPerPset = " << nrSubbandsPerPset << ", physicalNodesPerPset = " << physicalNodesPerPset << ", usedNodesPerPset = " << usedNodesPerPset << ")\n");
-
-#if defined HAVE_ZOID
-  // one of the compute cores in each Pset has to initialize its I/O node
-  if (personality.rankInPset() == 0 && (unsigned) TH_MPI::getCurrentRank() < personality.numComputeNodes()) {
-    vector<size_t> lengths;
-
-    for (int arg = 0; original_argv[arg] != 0; arg ++) {
-      std::clog << "adding arg " << original_argv[arg] << std::endl;
-      lengths.push_back(strlen(original_argv[arg]) + 1);
-    }
-
-    std::clog << "calling lofar_init(..., ..., " << lengths.size() << ")" << std::endl;
-    lofar_init(original_argv, &lengths[0], lengths.size());
-  }
-
-  TH_ZoidClient *thZoid = new TH_ZoidClient();
-#endif
-
-  for (unsigned pset = firstPset; pset < lastPset; pset ++) {
-    for (unsigned core = 0; core < usedNodesPerPset; core ++) {
-      WH_BGL_Processing *wh = new WH_BGL_Processing("BGL_Proc", logicalNode, itsCS1PS);
-      itsWHs.push_back(wh);
-      TinyDataManager &dm = wh->getDataManager();
-
-      unsigned cell = pset / psetsPerCell;
-      unsigned cellCore = core + usedNodesPerPset * (pset % psetsPerCell);
-      
-      unsigned storage_host = pset / psetsPerCell / nrPsetsPerStorage;
-      unsigned storage_port = core + usedNodesPerPset * (pset % (psetsPerCell * nrPsetsPerStorage) );
-
-#if defined HAVE_ZOID
-      if (itsParamSet.getBool("OLAP.IONProc.useScatter")) {
-	Connection *in = new BGLConnection("zoid", 0, dm.getGeneralInHolder(WH_BGL_Processing::SUBBAND_CHANNEL), thZoid);
-	dm.setInConnection(WH_BGL_Processing::SUBBAND_CHANNEL, in);
-      } else
-#endif
-	itsSubbandStub->connect(cell, cellCore, dm, WH_BGL_Processing::SUBBAND_CHANNEL);
-
-#if defined HAVE_ZOID
-      if (itsParamSet.getBool("OLAP.IONProc.useGather")) {
-	Connection *out = new BGLConnection("zoid", dm.getGeneralOutHolder(WH_BGL_Processing::VISIBILITIES_CHANNEL), 0, thZoid);
-	dm.setOutConnection(WH_BGL_Processing::VISIBILITIES_CHANNEL, out);
-      } else
-#endif
-	itsVisibilitiesStub->connect(storage_host, storage_port, dm, WH_BGL_Processing::VISIBILITIES_CHANNEL);
-
-#if defined HAVE_BGL
-      wh->runOnNode(remapOnTree(pset - firstPset, core, personality));
-#else
-      wh->runOnNode(logicalNode);
-#endif
-      ++ logicalNode;
-    }
-  }
-
-  LOG_TRACE_FLOW_STR("Finished define()");
-}
-
-void AH_BGL_Processing::init()
-{
-  for (uint i = 0; i < itsWHs.size(); i ++) {
-    WH_BGL_Processing *wh = itsWHs[i];
-    wh->basePreprocess();
-  }
-}
-
-void AH_BGL_Processing::run(int steps) {
-  LOG_TRACE_FLOW_STR("Start AH_BGL_Processing::run() "  );
-  for (int i = 0; i < steps; i++) {
-    LOG_TRACE_LOOP_STR("processing run " << i );
-
-    for (uint j = 0; j < itsWHs.size(); j ++) {
-      itsWHs[j]->baseProcess();
-    }
-  }
-  LOG_TRACE_FLOW_STR("Finished AH_BGL_Processing::run() "  );
-}
-
-// void AH_BGL_Processing::postrun() {
-//   vector<WorkHolder*>::iterator it = itsWHs.begin();
-//   for (; it < itsWHs.end(); it++) {
-//     (*it)->basePostprocess();
-//   }
-// }
-
-
-void AH_BGL_Processing::dump() const {
-  vector<WH_BGL_Processing *>::const_iterator it;
-  for (it = itsWHs.begin(); it < itsWHs.end(); it++) {
-#if defined HAVE_MPI
-    if ((*it)->getNode() == TH_MPI::getCurrentRank()) {
-      (*it)->dump();
-    }
-#else
-    (*it)->dump();
-#endif
-  }
-}
-
-void AH_BGL_Processing::quit() {
-  undefine();
-}
-
-} // namespace CS1
-} // namespace LOFAR
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/AH_BGL_Processing.h b/Appl/CEP/CS1/CS1_BGLProc/src/AH_BGL_Processing.h
deleted file mode 100644
index 27cc74e37b3479f88b9b89b9d3288533596ba5c3..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_BGLProc/src/AH_BGL_Processing.h
+++ /dev/null
@@ -1,67 +0,0 @@
-//#  AH_BGL_Processing.h: 
-//#
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-#ifndef LOFAR_CS1_BGL_PROC_AH_BGL_PROCESSING_H
-#define LOFAR_CS1_BGL_PROC_AH_BGL_PROCESSING_H
-
-#include <tinyCEP/TinyApplicationHolder.h>
-#include <CS1_Interface/Stub_BGL.h>
-#include <CS1_Interface/CS1_Parset.h>
-
-#if defined HAVE_BGL
-#include <rts.h>
-#endif
-
-namespace LOFAR {
-namespace CS1 {
-
-    //# Forward declarations
-class WH_BGL_Processing;
-
-    // Description of class.
-class AH_BGL_Processing : public TinyApplicationHolder
-{
-  public:
-		 AH_BGL_Processing();
-    virtual	 ~AH_BGL_Processing();
-    virtual void undefine();
-    virtual void define(const KeyValueMap&);
-    virtual void init();
-    virtual void run(int nsteps);
-    /*   virtual void postrun  (); */
-    virtual void dump() const;
-    virtual void quit();
-
-    static char  **original_argv;
-
-  private:
-#if defined HAVE_BGL
-    static unsigned remapOnTree(unsigned cell, unsigned core, struct BGLPersonality &);
-#endif
-    
-    CS1_Parset                  *itsCS1PS;
-    vector<WH_BGL_Processing *> itsWHs;
-    Stub_BGL			*itsSubbandStub, *itsVisibilitiesStub;
-};
-
-} // namespace CS1
-} // namespace LOFAR
-
-#endif
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/Allocator.cc b/Appl/CEP/CS1/CS1_BGLProc/src/Allocator.cc
new file mode 100644
index 0000000000000000000000000000000000000000..fcdc4a6bc0f6c917eee17ad7a61f1bea31fd5c43
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/Allocator.cc
@@ -0,0 +1,62 @@
+#include <lofar_config.h>
+
+#include <Allocator.h>
+
+#include <malloc.h>
+
+
+namespace LOFAR {
+namespace CS1 {
+
+Heap::Heap(size_t heapSize, int alignment)
+{
+  size = heapSize;
+
+  if (posix_memalign(&start, alignment, heapSize) != 0) {
+    std::cerr << "could not allocate heap" << std::endl;
+    exit(1);
+  }
+}
+
+
+Heap::~Heap()
+{
+  free(start);
+}
+
+
+Overlay::Overlay(const Heap &heap)
+{
+
+  freeList.include(heap.start, (void *) ((char *) heap.start + heap.size));
+}
+
+
+void *Overlay::allocate(size_t size, int alignment)
+{
+  const std::vector<SparseSet<void *>::range> &ranges = freeList.getRanges();
+
+  for (SparseSet<void *>::const_iterator it = ranges.begin(); it != ranges.end(); it ++) {
+    void *begin = (void *) (((size_t) it->begin + alignment - 1) & ~(alignment - 1));
+
+    if ((char *) it->end - (char *) begin >= (ptrdiff_t) size) {
+      freeList.exclude(begin, (void *) ((char *) begin + size));
+      sizes[begin] = size;
+      return begin;
+    }
+  }
+
+  std::cerr << "could not allocate memory from heap" << std::endl;
+  std::exit(1);
+}
+
+
+void Overlay::deallocate(void *ptr)
+{
+  std::map<void *, size_t>::iterator index = sizes.find(ptr);
+  freeList.include(ptr, (void *) ((char *) ptr + index->second));
+  sizes.erase(index);
+}
+
+} // namespace CS1
+} // namespace LOFAR
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/Allocator.h b/Appl/CEP/CS1/CS1_BGLProc/src/Allocator.h
new file mode 100644
index 0000000000000000000000000000000000000000..720712ed22c70e5dadf116e332aca23e503883d6
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/Allocator.h
@@ -0,0 +1,39 @@
+#ifndef LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_ALLOCATOR_H
+#define LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_ALLOCATOR_H
+
+#include <CS1_Interface/SparseSet.h>
+#include <map>
+
+namespace LOFAR {
+namespace CS1 {
+
+class Heap
+{
+  public:
+    Heap(size_t heapSize, int alignment);
+    ~Heap();
+
+  private:
+    friend class Overlay;
+    void	 *start;
+    size_t	 size;
+};
+
+
+class Overlay
+{
+  public:
+    Overlay(const Heap &);
+
+    void *allocate(size_t size, int alignment);
+    void deallocate(void *ptr);
+
+  private:
+    SparseSet<void *>	     freeList;
+    std::map<void *, size_t> sizes;
+};
+
+} // namespace CS1
+} // namespace LOFAR
+
+#endif
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/BGL_Processing.cc b/Appl/CEP/CS1/CS1_BGLProc/src/BGL_Processing.cc
new file mode 100644
index 0000000000000000000000000000000000000000..dce1d57360caa4e5530478cea6268672576398a9
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/BGL_Processing.cc
@@ -0,0 +1,471 @@
+//#  BGL_Processing.cc: Blue Gene processing for 1 second of sampled data
+//#
+//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
+//#
+//#  This program is free software; you can redistribute it and/or modify
+//#  it under the terms of the GNU General Public License as published by
+//#  the Free Software Foundation; either version 2 of the License, or
+//#  (at your option) any later version.
+//#
+//#  This program is distributed in the hope that it will be useful,
+//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//#  GNU General Public License for more details.
+//#
+//#  You should have received a copy of the GNU General Public License
+//#  along with this program; if not, write to the Free Software
+//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//#
+//#  $Id$
+
+//# Always #include <lofar_config.h> first!
+#include <lofar_config.h>
+
+//# Includes
+#include <BGL_Processing.h>
+#include <CorrelatorAsm.h>
+#include <FIR_Asm.h>
+
+#include <Common/Timer.h>
+#include <Transport/TH_MPI.h>
+#include <CS1_Interface/BGL_Configuration.h>
+#include <CS1_Interface/BGL_Mapping.h>
+
+#include <cassert>
+#include <complex>
+#include <cmath>
+#include <iomanip>
+#include <iostream>
+#include <map>
+
+
+#if defined HAVE_ZOID && defined HAVE_BGL
+extern "C" {
+#include <lofar.h>
+}
+
+#endif
+
+#if defined HAVE_MPI
+#define LOG_CONDITION	(itsPersonality.rankInPset() == 0)
+//#define LOG_CONDITION	(TH_MPI::getCurrentRank() == 0)
+#else
+#define LOG_CONDITION	1
+#endif
+
+namespace LOFAR {
+namespace CS1 {
+
+#if !defined HAVE_MASS
+
+inline static dcomplex cosisin(double x)
+{
+  return makedcomplex(cos(x), sin(x));
+}
+
+#endif
+
+
+static NSTimer transposeTimer("transpose()", true);
+static NSTimer computeTimer("computing", true);
+
+char **BGL_Processing::original_argv;
+
+
+BGL_Processing::BGL_Processing(TransportHolder *th)
+:
+  itsTransportHolder(th),
+  itsInputData(0),
+  itsTransposedData(0),
+  itsFilteredData(0),
+  itsCorrelatedData(0),
+#if defined HAVE_BGL
+  itsTranspose(0),
+#endif
+  itsPPF(0),
+  itsCorrelator(0)
+{
+  memset(itsHeaps, 0, sizeof itsHeaps);
+
+#if defined HAVE_BGL
+  getPersonality();
+#endif
+
+#if defined HAVE_ZOID && defined HAVE_BGL
+  initIONode();
+#endif
+}
+
+
+BGL_Processing::~BGL_Processing()
+{
+}
+
+
+#if defined HAVE_BGL
+
+void BGL_Processing::getPersonality()
+{
+  int retval = rts_get_personality(&itsPersonality, sizeof itsPersonality);
+  assert(retval == 0);
+
+  if (TH_MPI::getCurrentRank() == 0)
+    std::clog << "topology = ("
+	      << itsPersonality.getXsize() << ','
+	      << itsPersonality.getYsize() << ','
+	      << itsPersonality.getZsize() << "), torus wraparound = ("
+	      << (itsPersonality.isTorusX() ? 'T' : 'F') << ','
+	      << (itsPersonality.isTorusY() ? 'T' : 'F') << ','
+	      << (itsPersonality.isTorusZ() ? 'T' : 'F') << ')'
+	      << std::endl;
+}
+
+#endif
+
+
+#if defined HAVE_ZOID && defined HAVE_BGL
+
+void BGL_Processing::initIONode() const
+{
+  // one of the compute cores in each Pset has to initialize its I/O node
+
+  if (itsPersonality.rankInPset() == 0 && TH_MPI::getCurrentRank() / itsPersonality.numComputeNodes() == 0) {
+    std::vector<size_t> lengths;
+
+    for (int arg = 0; original_argv[arg] != 0; arg ++) {
+      std::clog << "adding arg " << original_argv[arg] << std::endl;
+      lengths.push_back(strlen(original_argv[arg]) + 1);
+    }
+
+    std::clog << "calling lofar_init(..., ..., " << lengths.size() << ")" << std::endl;
+    lofar_init(original_argv, &lengths[0], lengths.size());
+  }
+}
+
+#endif
+
+
+#if 0
+void BGL_Processing::checkConsistency(CS1_Parset *parset) const
+{
+  ASSERT(parset->nrPPFTaps()				 == NR_TAPS);
+  ASSERT(parset->getInt32("Observation.nrPolarisations") == NR_POLARIZATIONS);
+  ASSERT(parset->nrChannelsPerSubband()			 == NR_SUBBAND_CHANNELS);
+
+#if !defined C_IMPLEMENTATION
+  ASSERT(parset->BGLintegrationSteps() % 16		 == 0);
+
+  ASSERT(_FIR_constants_used.input_type			 == INPUT_TYPE);
+  ASSERT(_FIR_constants_used.nr_subband_channels	 == NR_SUBBAND_CHANNELS);
+  ASSERT(_FIR_constants_used.nr_taps			 == NR_TAPS);
+  ASSERT(_FIR_constants_used.nr_polarizations		 == NR_POLARIZATIONS);
+
+  ASSERT(_correlator_constants_used.nr_subband_channels	 == NR_SUBBAND_CHANNELS);
+  ASSERT(_correlator_constants_used.nr_polarizations	 == NR_POLARIZATIONS);
+#endif
+
+#if defined HAVE_BGL
+  unsigned physicalCoresPerPset = itsPersonality.numNodesInPset();
+
+  if (itsPersonality.isVirtualNodeMode())
+    physicalCoresPerPset *= 2;
+
+  ASSERTSTR(parset->nrCoresPerPset() <= physicalCoresPerPset, "too many cores per pset specified");
+  ASSERTSTR(parset->nrPsets() <= itsPersonality.numPsets(), "not enough psets available");
+#endif
+}
+#endif
+
+
+#if defined HAVE_MPI
+
+void BGL_Processing::printSubbandList() const
+{
+  std::clog << "node " << TH_MPI::getCurrentRank() << " filters and correlates subbands ";
+
+  unsigned sb = itsCurrentSubband; 
+
+  do {
+    std::clog << (sb == itsCurrentSubband ? '[' : ',') << sb;
+
+    if ((sb += itsSubbandIncrement) >= itsLastSubband)
+      sb -= itsLastSubband - itsFirstSubband;
+
+  } while (sb != itsCurrentSubband);
+  
+  std::clog << ']' << std::endl;
+}
+
+#endif
+
+
+
+#if 0
+void BGL_Processing::preprocess(CS1_Parset *parset)
+{
+  checkConsistency(parset);
+  
+#if defined HAVE_BGL
+  unsigned usedCoresPerPset = parset->nrCoresPerPset();
+  unsigned myPset	    = itsPersonality.getPsetNum();
+  unsigned myCore	    = BGL_Mapping::reverseMapCoreOnPset(BGLPersonality_rankInPset(&itsPersonality) + itsPersonality.numNodesInPset() * (TH_MPI::getCurrentRank() / itsPersonality.numComputeNodes()), myPset);;
+#else
+  unsigned usedCoresPerPset = 1;
+  unsigned myPset	    = 0;
+  unsigned myCore	    = 0;
+#endif
+
+  vector<unsigned> inputPsets  = parset->getUint32Vector("OLAP.BGLProc.inputPsets");
+  vector<unsigned> outputPsets = parset->getUint32Vector("OLAP.BGLProc.outputPsets");
+
+#if defined HAVE_BGL
+  Transpose::getMPIgroups(usedCoresPerPset, itsPersonality, inputPsets, outputPsets);
+#endif
+
+  vector<unsigned>::const_iterator inputPsetIndex  = std::find(inputPsets.begin(),  inputPsets.end(),  myPset);
+  vector<unsigned>::const_iterator outputPsetIndex = std::find(outputPsets.begin(), outputPsets.end(), myPset);
+
+  itsIsTransposeInput  = inputPsetIndex  != inputPsets.end();
+  itsIsTransposeOutput = outputPsetIndex != outputPsets.end();
+
+  unsigned nrStations		   = parset->nrStations();
+  unsigned nrBaselines		   = nrStations * (nrStations + 1) / 2;
+  unsigned nrSamplesPerIntegration = parset->BGLintegrationSteps();
+  unsigned nrSamplesToBGLProc	   = parset->nrSamplesToBGLProc();
+
+  size_t inputDataSize      = itsIsTransposeInput  ? InputData::requiredSize(outputPsets.size(), nrSamplesToBGLProc) : 0;
+  size_t transposedDataSize = itsIsTransposeOutput ? TransposedData::requiredSize(nrStations, nrSamplesToBGLProc) : 0;
+  size_t filteredDataSize   = itsIsTransposeOutput ? FilteredData::requiredSize(nrStations, nrSamplesPerIntegration) : 0;
+  size_t correlatedDataSize = itsIsTransposeOutput ? CorrelatedData::requiredSize(nrBaselines) : 0;
+
+  itsHeaps[0] = new Heap(std::max(inputDataSize, filteredDataSize), 32);
+  itsHeaps[1] = new Heap(std::max(transposedDataSize, correlatedDataSize), 32);
+
+  if (itsIsTransposeInput) {
+    itsInputData = new InputData(*itsHeaps[0], outputPsets.size(), nrSamplesToBGLProc);
+  }
+
+  if (itsIsTransposeOutput) {
+    // FIXME: !useGather not implemented
+    ASSERT(parset->getBool("OLAP.IONProc.useGather"));
+
+    unsigned nrSubbandsPerPset	= parset->nrSubbandsPerPset();
+    unsigned logicalNode	= usedCoresPerPset * (outputPsetIndex - outputPsets.begin()) + myCore;
+    // TODO: logicalNode assumes output psets are consecutively numbered
+
+    itsCenterFrequencies = parset->refFreqs();
+    itsFirstSubband	 = (logicalNode / usedCoresPerPset) * nrSubbandsPerPset;
+    itsLastSubband	 = itsFirstSubband + nrSubbandsPerPset;
+    itsCurrentSubband	 = itsFirstSubband + logicalNode % usedCoresPerPset % nrSubbandsPerPset;
+    itsSubbandIncrement	 = usedCoresPerPset % nrSubbandsPerPset;
+
+#if defined HAVE_MPI
+    printSubbandList();
+#endif
+
+    itsTransposedData = new TransposedData(*itsHeaps[1], nrStations, nrSamplesToBGLProc);
+    itsFilteredData   = new FilteredData(*itsHeaps[0], nrStations, nrSamplesPerIntegration);
+    itsCorrelatedData = new CorrelatedData(*itsHeaps[1], nrBaselines);
+
+    itsPPF	      = new PPF(nrStations, nrSamplesPerIntegration, parset->sampleRate() / NR_SUBBAND_CHANNELS, parset->getBool("OLAP.delayCompensation"));
+    itsCorrelator     = new Correlator(nrStations, nrSamplesPerIntegration);
+  }
+
+#if defined HAVE_MPI
+  if (itsIsTransposeInput || itsIsTransposeOutput) {
+    itsTranspose = new Transpose(itsIsTransposeInput, itsIsTransposeOutput, myCore, nrStations);
+    itsTranspose->setupTransposeParams(inputPsets, outputPsets, itsInputData, itsTransposedData);
+  }
+#endif
+}
+
+#else
+
+void BGL_Processing::preprocess(BGL_Configuration &configuration)
+{
+  //checkConsistency(parset);	TODO
+
+#if defined HAVE_BGL
+  unsigned usedCoresPerPset = configuration.nrUsedCoresPerPset();
+  unsigned myPset	    = itsPersonality.getPsetNum();
+  unsigned myCore	    = BGL_Mapping::reverseMapCoreOnPset(BGLPersonality_rankInPset(&itsPersonality) + itsPersonality.numNodesInPset() * (TH_MPI::getCurrentRank() / itsPersonality.numComputeNodes()), myPset);;
+#else
+  unsigned usedCoresPerPset = 1;
+  unsigned myPset	    = 0;
+  unsigned myCore	    = 0;
+#endif
+
+  std::vector<unsigned> &inputPsets  = configuration.inputPsets();
+  std::vector<unsigned> &outputPsets = configuration.outputPsets();
+
+#if defined HAVE_BGL
+  Transpose::getMPIgroups(usedCoresPerPset, itsPersonality, inputPsets, outputPsets);
+#endif
+
+  std::vector<unsigned>::const_iterator inputPsetIndex  = std::find(inputPsets.begin(),  inputPsets.end(),  myPset);
+  std::vector<unsigned>::const_iterator outputPsetIndex = std::find(outputPsets.begin(), outputPsets.end(), myPset);
+
+  itsIsTransposeInput  = inputPsetIndex  != inputPsets.end();
+  itsIsTransposeOutput = outputPsetIndex != outputPsets.end();
+
+  unsigned nrStations		   = configuration.nrStations();
+  unsigned nrBaselines		   = nrStations * (nrStations + 1) / 2;
+  unsigned nrSamplesPerIntegration = configuration.nrSamplesPerIntegration();
+  unsigned nrSamplesToBGLProc	   = configuration.nrSamplesToBGLProc();
+
+  size_t inputDataSize      = itsIsTransposeInput  ? InputData::requiredSize(outputPsets.size(), nrSamplesToBGLProc) : 0;
+  size_t transposedDataSize = itsIsTransposeOutput ? TransposedData::requiredSize(nrStations, nrSamplesToBGLProc) : 0;
+  size_t filteredDataSize   = itsIsTransposeOutput ? FilteredData::requiredSize(nrStations, nrSamplesPerIntegration) : 0;
+  size_t correlatedDataSize = itsIsTransposeOutput ? CorrelatedData::requiredSize(nrBaselines) : 0;
+
+  itsHeaps[0] = new Heap(std::max(inputDataSize, filteredDataSize), 32);
+  itsHeaps[1] = new Heap(std::max(transposedDataSize, correlatedDataSize), 32);
+
+  if (itsIsTransposeInput) {
+    itsInputData = new InputData(*itsHeaps[0], outputPsets.size(), nrSamplesToBGLProc);
+  }
+
+  if (itsIsTransposeOutput) {
+    // TODO: !useGather not implemented
+    //ASSERT(parset->getBool("OLAP.IONProc.useGather"));
+
+    unsigned nrSubbandsPerPset	= configuration.nrSubbandsPerPset();
+    unsigned logicalNode	= usedCoresPerPset * (outputPsetIndex - outputPsets.begin()) + myCore;
+    // TODO: logicalNode assumes output psets are consecutively numbered
+
+    itsCenterFrequencies = configuration.refFreqs();
+    itsFirstSubband	 = (logicalNode / usedCoresPerPset) * nrSubbandsPerPset;
+    itsLastSubband	 = itsFirstSubband + nrSubbandsPerPset;
+    itsCurrentSubband	 = itsFirstSubband + logicalNode % usedCoresPerPset % nrSubbandsPerPset;
+    itsSubbandIncrement	 = usedCoresPerPset % nrSubbandsPerPset;
+
+#if defined HAVE_MPI
+    printSubbandList();
+#endif
+
+    itsTransposedData = new TransposedData(*itsHeaps[1], nrStations, nrSamplesToBGLProc);
+    itsFilteredData   = new FilteredData(*itsHeaps[0], nrStations, nrSamplesPerIntegration);
+    itsCorrelatedData = new CorrelatedData(*itsHeaps[1], nrBaselines);
+
+    itsPPF	      = new PPF(nrStations, nrSamplesPerIntegration, configuration.sampleRate() / NR_SUBBAND_CHANNELS, configuration.delayCompensation());
+    itsCorrelator     = new Correlator(nrStations, nrSamplesPerIntegration);
+  }
+
+#if defined HAVE_MPI
+  if (itsIsTransposeInput || itsIsTransposeOutput) {
+    itsTranspose = new Transpose(itsIsTransposeInput, itsIsTransposeOutput, myCore, nrStations);
+    itsTranspose->setupTransposeParams(inputPsets, outputPsets, itsInputData, itsTransposedData);
+  }
+#endif
+}
+#endif
+
+
+void BGL_Processing::process()
+{
+  NSTimer totalTimer("total", LOG_CONDITION);
+  totalTimer.start();
+
+  if (itsIsTransposeInput) {
+#if defined HAVE_MPI
+    if (LOG_CONDITION)
+      std::clog << std::setprecision(12) << "core " << TH_MPI::getCurrentRank() << ": start reading at " << MPI_Wtime() << '\n';
+#endif
+
+    static NSTimer readTimer("receive timer", true);
+    readTimer.start();
+    itsInputData->read(itsTransportHolder);
+    readTimer.stop();
+  }
+
+  if (itsIsTransposeInput || itsIsTransposeOutput) {
+#if defined HAVE_MPI
+    if (LOG_CONDITION)
+      std::clog << std::setprecision(12) << "core " << TH_MPI::getCurrentRank() << ": start transpose at " << MPI_Wtime() << '\n';
+
+#if 0
+MPI_Barrier(itsTransposeGroup);
+MPI_Barrier(itsTransposeGroup);
+#endif
+
+    NSTimer transposeTimer("one transpose", LOG_CONDITION);
+    transposeTimer.start();
+    itsTranspose->transpose(itsInputData, itsTransposedData);
+    itsTranspose->transposeMetaData(itsInputData, itsTransposedData);
+    transposeTimer.stop();
+#endif
+  }
+
+  if (itsIsTransposeOutput) {
+#if defined HAVE_MPI
+    if (LOG_CONDITION)
+      std::clog << std::setprecision(12) << "core " << TH_MPI::getCurrentRank() << ": start processing at " << MPI_Wtime() << '\n';
+#endif
+
+    computeTimer.start();
+    itsPPF->computeFlags(itsTransposedData, itsFilteredData);
+    itsPPF->filter(itsCenterFrequencies[itsCurrentSubband], itsTransposedData, itsFilteredData);
+
+    itsCorrelator->computeFlagsAndCentroids(itsFilteredData, itsCorrelatedData);
+    itsCorrelator->correlate(itsFilteredData, itsCorrelatedData);
+
+    if ((itsCurrentSubband += itsSubbandIncrement) >= itsLastSubband)
+      itsCurrentSubband -= itsLastSubband - itsFirstSubband;
+
+    computeTimer.stop();
+
+#if defined HAVE_MPI
+    if (LOG_CONDITION)
+      std::clog << std::setprecision(12) << "core " << TH_MPI::getCurrentRank() << ": start writing at " << MPI_Wtime() << '\n';
+#endif
+
+    static NSTimer writeTimer("send timer", true);
+    writeTimer.start();
+    itsCorrelatedData->write(itsTransportHolder);
+    writeTimer.stop();
+  }
+
+#if defined HAVE_MPI
+  if (itsIsTransposeInput || itsIsTransposeOutput)
+    if (LOG_CONDITION)
+      std::clog << std::setprecision(12) << "core " << TH_MPI::getCurrentRank() << ": start idling at " << MPI_Wtime() << '\n';
+#endif
+
+#if 0
+  static unsigned count = 0;
+
+  if (TH_MPI::getCurrentRank() == 5 && ++ count == 9)
+    for (double time = MPI_Wtime() + 4.0; MPI_Wtime() < time;)
+      ;
+#endif
+
+  totalTimer.stop();
+}
+
+
+void BGL_Processing::postprocess()
+{
+  if (itsIsTransposeInput) {
+    delete itsInputData;
+  }
+
+  if (itsIsTransposeInput || itsIsTransposeOutput) {
+#if defined HAVE_MPI
+    delete itsTranspose;
+#endif
+  }
+
+  if (itsIsTransposeOutput) {
+    delete itsTransposedData;
+    delete itsPPF;
+    delete itsFilteredData;
+    delete itsCorrelator;
+    delete itsCorrelatedData;
+
+    delete itsHeaps[0];
+    delete itsHeaps[1];
+  }
+}
+
+} // namespace CS1
+} // namespace LOFAR
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/BGL_Processing.h b/Appl/CEP/CS1/CS1_BGLProc/src/BGL_Processing.h
new file mode 100644
index 0000000000000000000000000000000000000000..5756079eb532dd8c6e2fcf2384bce3ae8f0ecb8c
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/BGL_Processing.h
@@ -0,0 +1,119 @@
+//#  BGL_Processing.h: polyphase filter and correlator
+//#
+//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
+//#
+//#  This program is free software; you can redistribute it and/or modify
+//#  it under the terms of the GNU General Public License as published by
+//#  the Free Software Foundation; either version 2 of the License, or
+//#  (at your option) any later version.
+//#
+//#  This program is distributed in the hope that it will be useful,
+//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//#  GNU General Public License for more details.
+//#
+//#  You should have received a copy of the GNU General Public License
+//#  along with this program; if not, write to the Free Software
+//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//#
+//#  $Id$
+
+#ifndef LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_BGL_PROCESSING_H
+#define LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_BGL_PROCESSING_H
+
+#if 0 || !defined HAVE_BGL
+#define C_IMPLEMENTATION
+#endif
+
+#include <Transport/TransportHolder.h>
+#include <CS1_Interface/CS1_Config.h>
+#if 0
+#include <CS1_Interface/CS1_Parset.h>
+#else
+#include <CS1_Interface/BGL_Configuration.h>
+#endif
+
+#include <Allocator.h>
+#include <InputData.h>
+#include <FilteredData.h>
+#include <TransposedData.h>
+#include <CorrelatedData.h>
+
+#include <Transpose.h>
+#include <PPF.h>
+#include <Correlator.h>
+
+#if defined HAVE_BGL
+#include <mpi.h>
+#endif
+
+#if defined HAVE_BGL
+#include <bglpersonality.h>
+#include <rts.h>
+#endif
+
+
+
+namespace LOFAR {
+namespace CS1 {
+
+
+class BGL_Processing {
+  public:
+			BGL_Processing(TransportHolder *th);
+			~BGL_Processing();
+
+#if 0
+    void		preprocess(CS1_Parset *parset);
+#else
+    void		preprocess(BGL_Configuration &);
+#endif
+    void		process();
+    void		postprocess();
+
+    static char		**original_argv;
+
+  //private:
+    // TODO: make test program friend of itsTransposedData
+#if 0
+    void		checkConsistency(CS1_Parset *) const;
+#endif
+
+#if defined HAVE_BGL
+    void		getPersonality();
+#endif
+
+#if defined HAVE_ZOID && defined HAVE_BGL
+    void		initIONode() const;
+#endif
+
+#if defined HAVE_MPI
+    void		printSubbandList() const;
+#endif
+
+    TransportHolder	*itsTransportHolder;
+    std::vector<double> itsCenterFrequencies;
+    unsigned    	itsFirstSubband, itsCurrentSubband, itsLastSubband, itsSubbandIncrement;
+    bool		itsIsTransposeInput, itsIsTransposeOutput;
+
+    Heap		*itsHeaps[2];
+    InputData		*itsInputData;
+    TransposedData	*itsTransposedData;
+    FilteredData	*itsFilteredData;
+    CorrelatedData	*itsCorrelatedData;
+
+#if defined HAVE_MPI
+    Transpose		*itsTranspose;
+#endif
+    PPF			*itsPPF;
+    Correlator		*itsCorrelator;
+
+#if defined HAVE_BGL
+    BGLPersonality	itsPersonality;
+#endif
+};
+
+} // namespace CS1
+} // namespace LOFAR
+
+#endif
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/CS1_BGL_Processing_main.cc b/Appl/CEP/CS1/CS1_BGLProc/src/CS1_BGL_Processing_main.cc
index 360478a8109a56f5cb855632e5647fffd0439faa..c302adad20935a2d1ac258a96c7a403325d3b7df 100644
--- a/Appl/CEP/CS1/CS1_BGLProc/src/CS1_BGL_Processing_main.cc
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/CS1_BGL_Processing_main.cc
@@ -20,19 +20,76 @@
 
 #include <lofar_config.h>
 
-#include <PLC/ACCmain.h>
-#include <Common/LofarLogger.h>
-#include <tinyCEP/ApplicationHolderController.h>
-#include <CS1_BGLProc/AH_BGL_Processing.h>
+#if 0
+#include <CS1_Interface/CS1_Parset.h>
+#else
+#include <Common/Exception.h>
+#include <CS1_Interface/BGL_Command.h>
+#include <CS1_Interface/BGL_Configuration.h>
+#include <Transport/TH_Null.h>
+#include <CS1_BGLProc/TH_ZoidClient.h>
+#endif
+#include <CS1_BGLProc/BGL_Processing.h>
+#include <Transport/TH_MPI.h>
+
+#include <boost/lexical_cast.hpp>
 
 using namespace LOFAR;
 using namespace LOFAR::CS1;
 
-int main(int argc, char **argv) {
-  INIT_LOGGER("CS1_BGL_Processing");
+int main(int argc, char **argv)
+{
+  try {
+    BGL_Processing::original_argv = argv;
+
+#if defined HAVE_MPI
+    TH_MPI::initMPI(argc, argv);
+#endif
+
+#if defined HAVE_ZOID && defined HAVE_BGL
+    TH_ZoidClient     th;
+#else
+    TH_Null	      th;
+#endif
+
+    BGL_Processing    proc(&th);
+    BGL_Command	      command;
+
+    do {
+      command.read(&th);
+
+      switch (command.value()) {
+	case BGL_Command::PREPROCESS :	{
+					  BGL_Configuration configuration;
+
+					  configuration.read(&th);
+					  proc.preprocess(configuration);
+					}
+					break;
+
+	case BGL_Command::PROCESS :	proc.process();
+					break;
+
+	case BGL_Command::POSTPROCESS :	proc.postprocess();
+					break;
+
+	default :			break;
+      }
+    } while (command.value() != BGL_Command::STOP);
+
+#if defined HAVE_MPI
+    TH_MPI::finalize();
+#endif
 
-  AH_BGL_Processing::original_argv = argv;
-  AH_BGL_Processing myAH;
-  ApplicationHolderController myAHController(myAH, 1); //listen to ACC every 1 runs
-  return ACC::PLC::ACCmain(argc, argv, &myAHController);
+    //abort(); // quickly release the partition
+    return 0;
+  } catch (Exception &ex) {
+    std::cerr << "Uncaught Exception: " << ex.what() << std::endl;
+    //abort(); // quickly release the partition
+    return 1;
+  } catch (std::exception &ex) {
+    std::cerr << "Uncaught exception: " << ex.what() << std::endl;
+    //abort(); // quickly release the partition
+    return 1;
+  }
 }
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/CacheAlignedAllocator.h b/Appl/CEP/CS1/CS1_BGLProc/src/CacheAlignedAllocator.h
new file mode 100644
index 0000000000000000000000000000000000000000..71898996683435ec1edfc998e92b90bb9975ee92
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/CacheAlignedAllocator.h
@@ -0,0 +1,41 @@
+#ifndef LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_CACHE_ALIGNED_ALLOCATOR_H
+#define LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_CACHE_ALIGNED_ALLOCATOR_H
+
+#include <malloc.h>
+#include <memory>
+
+
+#if defined HAVE_BGL
+#define CACHE_LINE_SIZE	32
+#define CACHE_ALIGNED	__attribute__ ((aligned(CACHE_LINE_SIZE)))
+#else
+#define CACHE_LINE_SIZE	16
+#define CACHE_ALIGNED
+#endif
+
+
+namespace LOFAR {
+namespace CS1 {
+
+template <typename T> class CacheAlignedAllocator : public std::allocator<T>
+{
+  public:
+    typedef typename std::allocator<T>::size_type size_type;
+    typedef typename std::allocator<T>::pointer pointer;
+    typedef typename std::allocator<T>::const_pointer const_pointer;
+
+    pointer allocate(size_type size, const_pointer /*hint*/ = 0)
+    {
+      return static_cast<pointer>(memalign(CACHE_LINE_SIZE, size * sizeof(T)));
+    }
+
+    void deallocate(pointer ptr, size_type /*size*/)
+    {
+      free(ptr);
+    }
+};
+
+} // namespace CS1
+} // namespace LOFAR
+
+#endif
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/CorrelatedData.h b/Appl/CEP/CS1/CS1_BGLProc/src/CorrelatedData.h
new file mode 100644
index 0000000000000000000000000000000000000000..c4438a398d0167d24620fc2b3f1b9b9b88f0e915
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/CorrelatedData.h
@@ -0,0 +1,98 @@
+#ifndef LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_CORRELATED_DATA_H
+#define LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_CORRELATED_DATA_H
+
+#include <Common/lofar_complex.h>
+#include <CS1_Interface/CS1_Config.h>
+#include <Transport/TH_Null.h>
+
+#include <Allocator.h>
+#include <TH_ZoidClient.h>
+
+#include <boost/multi_array.hpp>
+
+
+namespace LOFAR {
+namespace CS1 {
+
+class CorrelatedData
+{
+  public:
+    CorrelatedData(const Heap &heap, unsigned nrBaselines);
+    ~CorrelatedData();
+
+    static size_t requiredSize(unsigned nrBaselines);
+    void	  write(TransportHolder *) /*const*/;
+
+  private:
+    Overlay	  overlay;
+    unsigned	  itsNrBaselines;
+
+  public:
+    boost::multi_array_ref<fcomplex, 4>       visibilities; //[itsNrBaselines][NR_SUBBAND_CHANNELS][NR_POLARIZATIONS][NR_POLARIZATIONS]
+    boost::multi_array_ref<unsigned short, 2> nrValidSamples; //[itsNrBaselines][NR_SUBBAND_CHANNELS]
+    float				      *centroids; //[itsNrBaselines]
+
+  private:
+    static size_t visibilitiesSize(unsigned nrBaselines);
+    static size_t nrValidSamplesSize(unsigned nrBaselines);
+    static size_t centroidSize(unsigned nrBaselines);
+};
+
+
+inline size_t CorrelatedData::visibilitiesSize(unsigned nrBaselines)
+{
+  return sizeof(fcomplex) * nrBaselines * NR_SUBBAND_CHANNELS * NR_POLARIZATIONS * NR_POLARIZATIONS;
+}
+
+
+inline size_t CorrelatedData::nrValidSamplesSize(unsigned nrBaselines)
+{
+  return sizeof(unsigned short) * nrBaselines * NR_SUBBAND_CHANNELS;
+}
+
+
+inline size_t CorrelatedData::centroidSize(unsigned nrBaselines)
+{
+  size_t unalignedSize = sizeof(float) * nrBaselines;
+  return (unalignedSize + 31) & ~31;
+}
+
+
+inline size_t CorrelatedData::requiredSize(unsigned nrBaselines)
+{
+  return visibilitiesSize(nrBaselines) + nrValidSamplesSize(nrBaselines) + centroidSize(nrBaselines);
+}
+
+
+inline CorrelatedData::CorrelatedData(const Heap &heap, unsigned nrBaselines)
+:
+  overlay(heap),
+  itsNrBaselines(nrBaselines),
+  visibilities(static_cast<fcomplex *>(overlay.allocate(visibilitiesSize(nrBaselines), 32)), boost::extents[nrBaselines][NR_SUBBAND_CHANNELS][NR_POLARIZATIONS][NR_POLARIZATIONS]),
+  nrValidSamples(static_cast<unsigned short *>(overlay.allocate(nrValidSamplesSize(nrBaselines), 32)), boost::extents[nrBaselines][NR_SUBBAND_CHANNELS]),
+  centroids(static_cast<float *>(overlay.allocate(centroidSize(nrBaselines), 32)))
+{
+}
+
+
+inline CorrelatedData::~CorrelatedData()
+{
+  overlay.deallocate(visibilities.origin());
+  overlay.deallocate(nrValidSamples.origin());
+  overlay.deallocate(centroids);
+}
+
+
+inline void CorrelatedData::write(TransportHolder *th) /*const*/
+{
+  th->sendBlocking(visibilities.origin(), visibilities.num_elements() * sizeof(fcomplex), 0, 0);
+  th->sendBlocking(nrValidSamples.origin(), nrValidSamples.num_elements() * sizeof(unsigned short), 0, 0);
+  //th->sendBlocking(centroids, itsNrBaselines * sizeof(float), 0, 0);
+}
+
+
+
+} // namespace CS1
+} // namespace LOFAR
+
+#endif
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/Correlator.cc b/Appl/CEP/CS1/CS1_BGLProc/src/Correlator.cc
new file mode 100644
index 0000000000000000000000000000000000000000..8b1bbeb57d2c6c37d6605b47b42d4e572708af15
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/Correlator.cc
@@ -0,0 +1,284 @@
+//# Always #include <lofar_config.h> first!
+#include <lofar_config.h>
+
+#include <Common/Timer.h>
+
+#include <Correlator.h>
+#include <CorrelatorAsm.h>
+
+#include <map>
+
+
+namespace LOFAR {
+namespace CS1 {
+
+
+static NSTimer computeFlagsTimer("Correlator::computeFlags()", true);
+static NSTimer correlateTimer("Correlator::correlate()", true);
+static NSTimer weightTimer("Correlator::weight()", true);
+
+
+Correlator::Correlator(unsigned nrStations, unsigned nrSamplesPerIntegration)
+:
+  itsNrStations(nrStations),
+  itsNrBaselines(nrStations * (nrStations + 1) / 2),
+  itsNrSamplesPerIntegration(nrSamplesPerIntegration),
+  itsCorrelationWeights(new float[nrSamplesPerIntegration + 1])
+{
+
+  itsCorrelationWeights[0] = 0.0;
+
+  for (unsigned i = 1; i <= nrSamplesPerIntegration; i ++) {
+#if INPUT_TYPE == I4COMPLEX_TYPE
+    itsCorrelationWeights[i] = 1.0 / i;
+#else
+    itsCorrelationWeights[i] = 1.0e-6 / i;
+#endif
+  }
+}
+
+
+Correlator::~Correlator()
+{
+  delete [] itsCorrelationWeights;
+}
+
+
+#if 1
+
+double Correlator::computeCentroidAndValidSamples(const SparseSet<unsigned> &flags, unsigned &nrValidSamples) const
+{
+  unsigned sq	     = itsNrSamplesPerIntegration * itsNrSamplesPerIntegration;
+  unsigned nrSamples = itsNrSamplesPerIntegration;
+
+  const std::vector<SparseSet<unsigned>::range> &ranges = flags.getRanges();
+
+  for (SparseSet<unsigned>::const_iterator it = ranges.begin(); it != ranges.end(); it ++) {
+    sq	      -= (it->end - it->begin) * (it->end + it->begin);
+    nrSamples -= (it->end - it->begin);
+  }
+
+  nrValidSamples = nrSamples;
+  return nrSamples > 0 ? (double) sq / (double) (2 * nrSamples) : .5;
+}
+
+
+void Correlator::computeFlagsAndCentroids(const FilteredData *filteredData, CorrelatedData *correlatedData)
+{
+  computeFlagsTimer.start();
+
+  for (unsigned stat2 = 0; stat2 < itsNrStations; stat2 ++) {
+    for (unsigned stat1 = 0; stat1 <= stat2; stat1 ++) {
+      unsigned nrValidSamples;
+      unsigned bl = baseline(stat1, stat2);
+
+      correlatedData->centroids[bl] = computeCentroidAndValidSamples(filteredData->flags[stat1] | filteredData->flags[stat2], nrValidSamples);
+      correlatedData->nrValidSamples[bl][0] = 0; // channel 0 does not contain valid data
+
+      for (unsigned ch = 1; ch < NR_SUBBAND_CHANNELS; ch ++)
+	correlatedData->nrValidSamples[bl][ch] = nrValidSamples;
+    }
+  }
+
+  computeFlagsTimer.stop();
+}
+
+#else
+
+void Correlator::computeFlags(const FilteredData *filteredData, CorrelatedData *correlatedData)
+{
+  computeFlagsTimer.start();
+
+  for (unsigned stat2 = 0; stat2 < itsNrStations; stat2 ++) {
+    for (unsigned stat1 = 0; stat1 <= stat2; stat1 ++) {
+      unsigned bl             = baseline(stat1, stat2);
+      unsigned nrValidSamples = itsNrSamplesPerIntegration - (filteredData->flags[stat1] | filteredData->flags[stat2]).count();
+
+      correlatedData->nrValidSamples[bl][0] = 0; // channel 0 does not contain valid data
+
+      for (unsigned ch = 1; ch < NR_SUBBAND_CHANNELS; ch ++)
+	correlatedData->nrValidSamples[bl][ch] = nrValidSamples;
+    }
+  }
+
+  computeFlagsTimer.stop();
+}
+
+#endif
+
+
+void Correlator::correlate(const FilteredData *filteredData, CorrelatedData *correlatedData)
+{
+  correlateTimer.start();
+
+#if defined CORRELATOR_C_IMPLEMENTATION
+  for (unsigned ch = 0; ch < NR_SUBBAND_CHANNELS; ch ++) {
+    for (unsigned stat2 = 0; stat2 < itsNrStations; stat2 ++) {
+      for (unsigned stat1 = 0; stat1 <= stat2; stat1 ++) { 
+	unsigned bl = baseline(stat1, stat2), nrValid = 0;
+
+	if (ch > 0 /* && !itsRFIflags[stat1][ch] && !itsRFIflags[stat2][ch] */) {
+	  nrValid = correlatedData->nrValidSamples[bl][ch];
+	  for (unsigned pol1 = 0; pol1 < NR_POLARIZATIONS; pol1 ++) {
+	    for (unsigned pol2 = 0; pol2 < NR_POLARIZATIONS; pol2 ++) {
+	      dcomplex sum = makedcomplex(0, 0);
+	      for (unsigned time = 0; time < itsNrSamplesPerIntegration; time ++) {
+		sum += filteredData->samples[ch][stat1][time][pol1] * conj(filteredData->samples[ch][stat2][time][pol2]);
+	      }
+	      sum *= itsCorrelationWeights[nrValid];
+	      correlatedData->visibilities[bl][ch][pol1][pol2] = sum;
+	    }
+	  }
+	}
+    
+	if (nrValid == 0) {
+	  for (unsigned pol1 = 0; pol1 < NR_POLARIZATIONS; pol1 ++) {
+	    for (unsigned pol2 = 0; pol2 < NR_POLARIZATIONS; pol2 ++) {
+	      correlatedData->visibilities[bl][ch][pol1][pol2] = makefcomplex(0, 0);
+	    }
+	  }
+	}
+
+	//nrValidSamples[bl][ch] = nrValid;
+      }
+    }
+  }
+#else
+  // Blue Gene/L assembler version. 
+
+  for (unsigned ch = 1; ch < NR_SUBBAND_CHANNELS; ch ++) {
+    // build a map of valid stations
+    unsigned nrValidStations = 0, map[itsNrStations];
+
+    for (unsigned stat2 = 0; stat2 < itsNrStations; stat2 ++) {
+//    if (!itsRFIflags[stat2][ch]) {
+	map[nrValidStations ++] = stat2;
+//    } else { // clear correlations that involve invalided stations
+//	for (unsigned stat1 = 0; stat1 < itsNrStations; stat1 ++) {
+//	  unsigned bl = stat1 < stat2 ? baseline(stat1, stat2) :
+//	    baseline(stat2, stat1);
+//	  //_clear_correlation(&visibilities[bl][ch]);
+//	  nrValidSamples[bl][ch] = 0;
+//	}
+//    }
+    }
+
+    if (nrValidStations == 0) {
+      break;
+    }
+
+    // Divide the correlation matrix into blocks of 3x2, 2x2, 3+2, 2+1, and 1x1.
+
+    // do the first (auto)correlation(s) (these are the "left"most 1 or 3
+    // squares in the corner of the triangle)
+    if (nrValidStations % 2 == 0) {
+      unsigned stat10 = map[0], stat11 = map[1];
+
+      _auto_correlate_2(filteredData->samples[ch][stat10].origin(),
+			filteredData->samples[ch][stat11].origin(),
+			correlatedData->visibilities[baseline(stat10, stat10)][ch].origin(),
+			correlatedData->visibilities[baseline(stat10, stat11)][ch].origin(),
+			correlatedData->visibilities[baseline(stat11, stat11)][ch].origin(),
+			itsNrSamplesPerIntegration);
+    } else {
+      unsigned stat10 = map[0];
+
+      _auto_correlate_1(filteredData->samples[ch][stat10].origin(),
+			correlatedData->visibilities[baseline(stat10, stat10)][ch].origin(),
+			itsNrSamplesPerIntegration);
+    }
+
+    for (unsigned stat2 = nrValidStations % 2 ? 1 : 2; stat2 < nrValidStations; stat2 += 2) {
+      unsigned stat1 = 0;
+
+      // do as many 3x2 blocks as possible
+      for (; stat1 + 3 <= stat2; stat1 += 3) { 
+	unsigned stat10 = map[stat1], stat11 = map[stat1+1], stat12 = map[stat1+2];
+	unsigned stat20 = map[stat2], stat21 = map[stat2+1];
+
+	_correlate_3x2(filteredData->samples[ch][stat10].origin(),
+		       filteredData->samples[ch][stat11].origin(),
+		       filteredData->samples[ch][stat12].origin(),
+		       filteredData->samples[ch][stat20].origin(),
+		       filteredData->samples[ch][stat21].origin(),
+		       correlatedData->visibilities[baseline(stat10, stat20)][ch].origin(),
+		       correlatedData->visibilities[baseline(stat10, stat21)][ch].origin(),
+		       correlatedData->visibilities[baseline(stat11, stat20)][ch].origin(),
+		       correlatedData->visibilities[baseline(stat11, stat21)][ch].origin(),
+		       correlatedData->visibilities[baseline(stat12, stat20)][ch].origin(),
+		       correlatedData->visibilities[baseline(stat12, stat21)][ch].origin(),
+		       itsNrSamplesPerIntegration);
+      }
+
+      // see if a 2x2 block is necessary
+      if (stat1 + 2 <= stat2) {
+	unsigned stat10 = map[stat1], stat11 = map[stat1+1];
+	unsigned stat20 = map[stat2], stat21 = map[stat2+1];
+
+	_correlate_2x2(filteredData->samples[ch][stat10].origin(),
+		       filteredData->samples[ch][stat11].origin(),
+		       filteredData->samples[ch][stat20].origin(),
+		       filteredData->samples[ch][stat21].origin(),
+		       correlatedData->visibilities[baseline(stat10, stat20)][ch].origin(),
+		       correlatedData->visibilities[baseline(stat10, stat21)][ch].origin(),
+		       correlatedData->visibilities[baseline(stat11, stat20)][ch].origin(),
+		       correlatedData->visibilities[baseline(stat11, stat21)][ch].origin(),
+		       itsNrSamplesPerIntegration);
+	stat1 += 2;
+      }
+
+      // do the remaining (auto)correlations near the diagonal
+      if (stat1 == stat2) {
+	unsigned stat10 = map[stat1], stat11 = map[stat1+1];
+
+	_auto_correlate_2(filteredData->samples[ch][stat10].origin(),
+			  filteredData->samples[ch][stat11].origin(),
+			  correlatedData->visibilities[baseline(stat10,stat10)][ch].origin(),
+			  correlatedData->visibilities[baseline(stat10,stat11)][ch].origin(),
+			  correlatedData->visibilities[baseline(stat11,stat11)][ch].origin(),
+			  itsNrSamplesPerIntegration);
+      } else {
+	unsigned stat10 = map[stat1], stat11 = map[stat1+1], stat12 = map[stat1+2];
+
+	_auto_correlate_3(filteredData->samples[ch][stat10].origin(),
+			  filteredData->samples[ch][stat11].origin(),
+			  filteredData->samples[ch][stat12].origin(),
+			  correlatedData->visibilities[baseline(stat10,stat11)][ch].origin(),
+			  correlatedData->visibilities[baseline(stat10,stat12)][ch].origin(),
+			  correlatedData->visibilities[baseline(stat11,stat11)][ch].origin(),
+			  correlatedData->visibilities[baseline(stat11,stat12)][ch].origin(),
+			  correlatedData->visibilities[baseline(stat12,stat12)][ch].origin(),
+			  itsNrSamplesPerIntegration);
+      }
+    }
+  }
+
+  weightTimer.start();
+#if 0
+  for (unsigned bl = 0; bl < itsNrBaselines; bl ++) {
+    for (unsigned ch = 0; ch < NR_SUBBAND_CHANNELS; ch ++) {
+      for (unsigned pol1 = 0; pol1 < NR_POLARIZATIONS; pol1 ++) {
+	for (unsigned pol2 = 0; pol2 < NR_POLARIZATIONS; pol2 ++) {
+	  itsCorrelatedData->visibilities[bl][ch][pol1][pol2] *= itsCorrelationWeights[(*nrValidSamples)[bl][ch]];
+	}
+      }
+    }
+  }
+#else
+  _weigh_visibilities(correlatedData->visibilities.origin(), correlatedData->nrValidSamples.origin(), itsCorrelationWeights, itsNrBaselines * NR_SUBBAND_CHANNELS);
+#endif
+  weightTimer.stop();
+#endif  
+
+#if 0
+static unsigned count;
+nrValidSamples[0][0] = itsCurrentSubband;
+nrValidSamples[0][1] = TH_MPI::getCurrentRank();
+nrValidSamples[0][2] = count ++;
+#endif
+  correlateTimer.stop();
+}
+
+
+} // namespace CS1
+} // namespace LOFAR
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/Correlator.h b/Appl/CEP/CS1/CS1_BGLProc/src/Correlator.h
index 1d393dd85c902ff35b155f6f37a1dda581db6a47..730b1d9de70c9788b846669ba322520eae25c23c 100644
--- a/Appl/CEP/CS1/CS1_BGLProc/src/Correlator.h
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/Correlator.h
@@ -1,105 +1,47 @@
-//#  Correlator.h: header files for BGL assembly
-//#
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
 #ifndef LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_CORRELATOR_H
 #define LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_CORRELATOR_H
 
-#if defined HAVE_BGL
-#include <CS1_Interface/CS1_Config.h>
-#include <CS1_Interface/DH_Visibilities.h>
+#if 0 || !defined HAVE_BGL
+#define CORRELATOR_C_IMPLEMENTATION
+#endif
 
-namespace LOFAR {
-namespace CS1 {
 
-//typedef fcomplex stationInputType[NR_SAMPLES_PER_INTEGRATION | 2][NR_POLARIZATIONS];
-//typedef fcomplex CorrelatedOutputType[NR_POLARIZATIONS][NR_POLARIZATIONS];
-typedef fcomplex stationInputType, CorrelatedOutputType;
+#include <FilteredData.h>
+#include <CorrelatedData.h>
 
-extern "C" {
-  void _correlate_2x2(const stationInputType *S0,
-		      const stationInputType *S1,
-		      const stationInputType *S2,
-		      const stationInputType *S3,
-		      CorrelatedOutputType *S0_S2,
-		      CorrelatedOutputType *S0_S3,
-		      CorrelatedOutputType *S1_S2,
-		      CorrelatedOutputType *S1_S3,
-		      unsigned nrSamplesToIntegrate);
-  
-  void _correlate_3x2(const stationInputType *S0,
-		      const stationInputType *S1,
-		      const stationInputType *S2,
-		      const stationInputType *S3,
-		      const stationInputType *S4,
-		      CorrelatedOutputType *S0_S3,
-		      CorrelatedOutputType *S0_S4,
-		      CorrelatedOutputType *S1_S3,
-		      CorrelatedOutputType *S1_S4,
-		      CorrelatedOutputType *S2_S3,
-		      CorrelatedOutputType *S2_S4,
-		      unsigned nrSamplesToIntegrate);
+#include <cassert>
 
-  void _auto_correlate_1(const stationInputType *S0,
-			 CorrelatedOutputType *S0_S0,
-			 unsigned nrSamplesToIntegrate);
+#include <boost/multi_array.hpp>
 
-  void _auto_correlate_2(const stationInputType *S0,
-			 const stationInputType *S1,
-			 CorrelatedOutputType *S0_S0,
-			 CorrelatedOutputType *S0_S1,
-			 CorrelatedOutputType *S1_S1,
-			 unsigned nrSamplesToIntegrate);
+namespace LOFAR {
+namespace CS1 {
 
-  void _auto_correlate_3(const stationInputType *S0,
-			 const stationInputType *S1,
-			 const stationInputType *S2,
-			 CorrelatedOutputType *S0_S1,
-			 CorrelatedOutputType *S0_S2,
-			 CorrelatedOutputType *S1_S1,
-			 CorrelatedOutputType *S1_S2,
-			 CorrelatedOutputType *S2_S2,
-			 unsigned nrSamplesToIntegrate);
+class Correlator
+{
+  public:
+    Correlator(unsigned nrStations, unsigned nrSamplesPerIntegration);
+    ~Correlator();
 
-#if 0
-  void _add_correlations(DH_Visibilities::AllVisibilitiesType *dstVis,
-			 const DH_Visibilities::AllVisibilitiesType *srcVis,
-			 DH_Visibilities::AllNrValidSamplesType *dstVal,
-			 const DH_Visibilities::AllNrValidSamplesType *srcVal,
-			 unsigned count);
-#endif
-			 
-  void _clear_correlation(CorrelatedOutputType *S0_S0);
+    void	    correlate(const FilteredData *, CorrelatedData *);
+    void	    computeFlagsAndCentroids(const FilteredData *, CorrelatedData *);
 
-  void _weigh_visibilities(
-	DH_Visibilities::VisibilityType *visibilities,
-	DH_Visibilities::NrValidSamplesType *nrValidSamplesCounted,
-	const float correlationWeights[/*nrSamplesToIntegrate + 1*/],
-	unsigned nrBaselinesTimesNrChannels);
+    static unsigned baseline(unsigned station1, unsigned station2);
 
-  extern struct {
-    unsigned nr_subband_channels;
-    unsigned nr_polarizations;
-  } _correlator_constants_used;
+  private:
+    unsigned	    itsNrStations, itsNrBaselines, itsNrSamplesPerIntegration;
+    float	    *itsCorrelationWeights; //[itsNrSamplesPerIntegration + 1]
+
+    double	    computeCentroidAndValidSamples(const SparseSet<unsigned> &flags, unsigned &nrValidSamples) const;
 };
 
+
+inline unsigned Correlator::baseline(unsigned station1, unsigned station2)
+{
+  assert(station1 <= station2);
+  return station2 * (station2 + 1) / 2 + station1;
 }
-}
-#endif
+
+} // namespace CS1
+} // namespace LOFAR
+
 #endif
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/Correlator.S b/Appl/CEP/CS1/CS1_BGLProc/src/CorrelatorAsm.S
similarity index 100%
rename from Appl/CEP/CS1/CS1_BGLProc/src/Correlator.S
rename to Appl/CEP/CS1/CS1_BGLProc/src/CorrelatorAsm.S
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/CorrelatorAsm.h b/Appl/CEP/CS1/CS1_BGLProc/src/CorrelatorAsm.h
new file mode 100644
index 0000000000000000000000000000000000000000..5928b14b17c4aad37407e977ffdd729bf28809ce
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/CorrelatorAsm.h
@@ -0,0 +1,96 @@
+//#  Correlator.h: header files for BGL assembly
+//#
+//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
+//#
+//#  This program is free software; you can redistribute it and/or modify
+//#  it under the terms of the GNU General Public License as published by
+//#  the Free Software Foundation; either version 2 of the License, or
+//#  (at your option) any later version.
+//#
+//#  This program is distributed in the hope that it will be useful,
+//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//#  GNU General Public License for more details.
+//#
+//#  You should have received a copy of the GNU General Public License
+//#  along with this program; if not, write to the Free Software
+//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//#
+//#  $Id$
+
+#ifndef LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_CORRELATOR_ASM_H
+#define LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_CORRELATOR_ASM_H
+
+#if defined HAVE_BGL
+#include <CS1_Interface/CS1_Config.h>
+
+namespace LOFAR {
+namespace CS1 {
+
+//typedef fcomplex stationInputType[NR_SAMPLES_PER_INTEGRATION | 2][NR_POLARIZATIONS];
+//typedef fcomplex CorrelatedOutputType[NR_POLARIZATIONS][NR_POLARIZATIONS];
+typedef fcomplex stationInputType, CorrelatedOutputType;
+
+extern "C" {
+  void _correlate_2x2(const stationInputType *S0,
+		      const stationInputType *S1,
+		      const stationInputType *S2,
+		      const stationInputType *S3,
+		      CorrelatedOutputType *S0_S2,
+		      CorrelatedOutputType *S0_S3,
+		      CorrelatedOutputType *S1_S2,
+		      CorrelatedOutputType *S1_S3,
+		      unsigned nrSamplesToIntegrate);
+  
+  void _correlate_3x2(const stationInputType *S0,
+		      const stationInputType *S1,
+		      const stationInputType *S2,
+		      const stationInputType *S3,
+		      const stationInputType *S4,
+		      CorrelatedOutputType *S0_S3,
+		      CorrelatedOutputType *S0_S4,
+		      CorrelatedOutputType *S1_S3,
+		      CorrelatedOutputType *S1_S4,
+		      CorrelatedOutputType *S2_S3,
+		      CorrelatedOutputType *S2_S4,
+		      unsigned nrSamplesToIntegrate);
+
+  void _auto_correlate_1(const stationInputType *S0,
+			 CorrelatedOutputType *S0_S0,
+			 unsigned nrSamplesToIntegrate);
+
+  void _auto_correlate_2(const stationInputType *S0,
+			 const stationInputType *S1,
+			 CorrelatedOutputType *S0_S0,
+			 CorrelatedOutputType *S0_S1,
+			 CorrelatedOutputType *S1_S1,
+			 unsigned nrSamplesToIntegrate);
+
+  void _auto_correlate_3(const stationInputType *S0,
+			 const stationInputType *S1,
+			 const stationInputType *S2,
+			 CorrelatedOutputType *S0_S1,
+			 CorrelatedOutputType *S0_S2,
+			 CorrelatedOutputType *S1_S1,
+			 CorrelatedOutputType *S1_S2,
+			 CorrelatedOutputType *S2_S2,
+			 unsigned nrSamplesToIntegrate);
+
+  void _clear_correlation(CorrelatedOutputType *S0_S0);
+
+  void _weigh_visibilities(
+	fcomplex *visibilities,
+	unsigned short *nrValidSamplesCounted,
+	const float correlationWeights[/*nrSamplesToIntegrate + 1*/],
+	unsigned nrBaselinesTimesNrChannels);
+
+  extern struct {
+    unsigned nr_subband_channels;
+    unsigned nr_polarizations;
+  } _correlator_constants_used;
+};
+
+}
+}
+#endif
+#endif
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/FFT.S b/Appl/CEP/CS1/CS1_BGLProc/src/FFT_Asm.S
similarity index 100%
rename from Appl/CEP/CS1/CS1_BGLProc/src/FFT.S
rename to Appl/CEP/CS1/CS1_BGLProc/src/FFT_Asm.S
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/FFT.h b/Appl/CEP/CS1/CS1_BGLProc/src/FFT_Asm.h
similarity index 91%
rename from Appl/CEP/CS1/CS1_BGLProc/src/FFT.h
rename to Appl/CEP/CS1/CS1_BGLProc/src/FFT_Asm.h
index b31c48b1d5c26cb6fee4a989accb909434821e6c..1b5d4f8fd33ab0e1d8ec12ed1ca8a1a3bb1f7fa2 100644
--- a/Appl/CEP/CS1/CS1_BGLProc/src/FFT.h
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/FFT_Asm.h
@@ -18,8 +18,8 @@
 //#
 //#  $Id$
 
-#ifndef LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_FFT_H
-#define LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_FFT_H
+#ifndef LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_FFT_ASM_H
+#define LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_FFT_ASM_H
 
 #if defined HAVE_BGL
 #include <Common/lofar_complex.h>
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/WH_BGL_Processing.cc b/Appl/CEP/CS1/CS1_BGLProc/src/FIR.cc
similarity index 72%
rename from Appl/CEP/CS1/CS1_BGLProc/src/WH_BGL_Processing.cc
rename to Appl/CEP/CS1/CS1_BGLProc/src/FIR.cc
index 80d2da2bcbcf083782fbbc8f72ceedf9132d7713..49dd6c599f238701be113e2faa18b52b9bf15fd9 100644
--- a/Appl/CEP/CS1/CS1_BGLProc/src/WH_BGL_Processing.cc
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/FIR.cc
@@ -1,71 +1,13 @@
-//#  WH_BGL_Processing.cc: Blue Gene processing for 1 second of sampled data
-//#
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
 //# Always #include <lofar_config.h> first!
 #include <lofar_config.h>
 
 //# Includes
-#include <WH_BGL_Processing.h>
-#include <Correlator.h>
-#include <FFT.h>
 #include <FIR.h>
-
-#include <Common/Timer.h>
-#include <Transport/TH_MPI.h>
-
-#include <complex>
-#include <cmath>
-
-#if defined HAVE_MASS
-#include <mass.h>
-#endif
-
-#if defined HAVE_BGL
-#include <rts.h>
-#endif
+#include <CacheAlignedAllocator.h>
 
 namespace LOFAR {
 namespace CS1 {
 
-#if !defined HAVE_MASS
-
-inline static dcomplex cosisin(double x)
-{
-  return makedcomplex(cos(x), sin(x));
-}
-
-#endif
-
-
-#if defined HAVE_BGL && !defined C_IMPLEMENTATION
-static BGL_Mutex *mutex;
-#endif
-
-static NSTimer computeFlagsTimer("computeFlags", true);
-static NSTimer FIRtimer("FIRtimer", true);
-static NSTimer FFTtimer("FFT", true);
-static NSTimer doPPFtimer("doPPF()", true);
-static NSTimer doCorrelateTimer("doCorrelate()", true);
-static NSTimer weightTimer("weight", true);
-static NSTimer computeTimer("computing", true);
-
 
 // The first subband is from -98 KHz to 98 KHz, rather than from 0 to 195 KHz.
 // To avoid that the FFT outputs the channels in the wrong order (from 128 to
@@ -1105,7 +1047,7 @@ const float FIR::weights[NR_SUBBAND_CHANNELS][NR_TAPS] CACHE_ALIGNED = {
 };
 
 
-#if defined C_IMPLEMENTATION
+#if defined FIR_C_IMPLEMENTATION
 
 FIR::FIR()
 {
@@ -1128,659 +1070,5 @@ fcomplex FIR::processNextSample(fcomplex sample, const float weights[NR_TAPS])
 
 #endif
 
-
-WH_BGL_Processing::WH_BGL_Processing(const string& name, unsigned coreNumber, CS1_Parset *ps)
-  :
-  WorkHolder(NR_IN_CHANNELS, NR_OUT_CHANNELS, name, "WH_Correlator"),
-  itsCS1PS(ps),
-  itsCoreNumber(coreNumber)
-{
-  ASSERT(itsCS1PS->nrPFFTaps()	                            == NR_TAPS);
-  ASSERT(itsCS1PS->getInt32("Observation.nrPolarisations")  == NR_POLARIZATIONS);
-  ASSERT(itsCS1PS->nrChannelsPerSubband()	            == NR_SUBBAND_CHANNELS);
-
-#if !defined C_IMPLEMENTATION
-  ASSERT(_FIR_constants_used.input_type			== INPUT_TYPE);
-  ASSERT(_FIR_constants_used.nr_subband_channels	== NR_SUBBAND_CHANNELS);
-  ASSERT(_FIR_constants_used.nr_taps			== NR_TAPS);
-  ASSERT(_FIR_constants_used.nr_polarizations		== NR_POLARIZATIONS);
-
-  ASSERT(_correlator_constants_used.nr_subband_channels	== NR_SUBBAND_CHANNELS);
-  ASSERT(_correlator_constants_used.nr_polarizations	== NR_POLARIZATIONS);
-#endif
-
-  itsNrStations	       = itsCS1PS->nrStations();
-  itsNrBaselines       = itsNrStations * (itsNrStations + 1) / 2;
-  itsNrSamplesPerIntegration = itsCS1PS->BGLintegrationSteps();
-
-#if !defined C_IMPLEMENTATION
-  ASSERT(itsNrSamplesPerIntegration % 16 == 0);
-#endif
-
-  itsCenterFrequencies = itsCS1PS->refFreqs();
-  itsChannelBandwidth  = itsCS1PS->sampleRate() / NR_SUBBAND_CHANNELS;
-
-  unsigned nrSubbandsPerPset = itsCS1PS->getUint32("OLAP.subbandsPerPset");
-  unsigned nrNodesPerPset    = itsCS1PS->getUint32("OLAP.BGLProc.nodesPerPset");
-
-  itsFirstSubband     = (coreNumber / nrNodesPerPset) * nrSubbandsPerPset;
-  itsLastSubband      = itsFirstSubband + nrSubbandsPerPset;
-  itsCurrentSubband   = itsFirstSubband + coreNumber % nrNodesPerPset % nrSubbandsPerPset;
-  itsSubbandIncrement = nrNodesPerPset % nrSubbandsPerPset;
-
-  itsInputConnected   = itsCS1PS->getString("OLAP.OLAP_Conn.input_BGLProc_Transport") != "NULL";
-
-  getDataManager().addInDataHolder(SUBBAND_CHANNEL, new DH_Subband("input", ps));
-//getDataManager().addInDataHolder(RFI_MITIGATION_CHANNEL, new DH_RFI_Mitigation("RFI"));
-  getDataManager().addOutDataHolder(VISIBILITIES_CHANNEL, new DH_Visibilities("output", ps));
-  getDataManager().setAutoTriggerIn(SUBBAND_CHANNEL, false);
-  getDataManager().setAutoTriggerOut(VISIBILITIES_CHANNEL, false);
-}
-
-
-WH_BGL_Processing::~WH_BGL_Processing()
-{
-}
-
-
-WorkHolder* WH_BGL_Processing::construct(const string &name, unsigned coreNumber, CS1_Parset *ps)
-{
-  return new WH_BGL_Processing(name, coreNumber, ps);
-}
-
-
-WH_BGL_Processing* WH_BGL_Processing::make(const string &name)
-{
-  return new WH_BGL_Processing(name, itsCoreNumber, itsCS1PS);
-}
-
-
-#if defined HAVE_BGL
-
-void FFTtest()
-{
-  fftw_plan plan = fftw_create_plan(256, FFTW_FORWARD, FFTW_ESTIMATE);
-
-  fcomplex in[256], fout[256], sout[256];
-
-  for (unsigned i = 0; i < 256; i ++)
-    in[i] = makefcomplex(2 * i, 2 * i + 1);
-
-  fftw_one(plan, (fftw_complex *) in, (fftw_complex *) fout);
-
-  _fft256(in, sout);
-
-  for (unsigned i = 0; i < 256; i ++) {
-    fcomplex diff = fout[i] / sout[i];
-    std::cout << i << " (" << real(fout[i]) << ',' << imag(fout[i]) << ") / (" << real(sout[i]) << ',' << imag(sout[i]) << ") = (" << real(diff) << ',' << imag(diff) << ")\n";
-  }
-
-  //std::exit(0);
-}
-
-#endif
-
-
-void WH_BGL_Processing::preprocess()
-{
-#if defined HAVE_MPI
-  std::clog << "node " << TH_MPI::getCurrentRank() << " filters subbands " << itsFirstSubband << " to " << itsLastSubband << " starting at " << itsCurrentSubband << " with " << itsSubbandIncrement << " as increment\n";
-#endif
-
-#if defined HAVE_FFTW3
-  fftwf_complex cbuf1[NR_SUBBAND_CHANNELS], cbuf2[NR_SUBBAND_CHANNELS];
-  itsFFTWPlan = fftwf_plan_dft_1d(NR_SUBBAND_CHANNELS, cbuf1, cbuf2, FFTW_FORWARD, FFTW_ESTIMATE);
-#elif defined HAVE_FFTW2
-#if defined HAVE_BGL && NR_SUBBAND_CHANNELS == 256
-  fftw_import_wisdom_from_string("(FFTW-2.1.5 (256 529 -1 0 1 1 1 352 0) (128 529 -1 0 1 1 0 2817 0) (64 529 -1 0 1 1 0 1409 0) (32 529 -1 0 1 1 0 705 0) (16 529 -1 0 1 1 0 353 0) (8 529 -1 0 1 1 0 177 0) (4 529 -1 0 1 1 0 89 0) (2 529 -1 0 1 1 0 45 0))");
-  itsFFTWPlan = fftw_create_plan(NR_SUBBAND_CHANNELS, FFTW_FORWARD, FFTW_USE_WISDOM);
-#else
-  itsFFTWPlan = fftw_create_plan(NR_SUBBAND_CHANNELS, FFTW_FORWARD, FFTW_ESTIMATE);
-#endif
-#endif
-
-  //FFTtest();
-
-#if defined C_IMPLEMENTATION
-  itsFIRs		= new itsFIRsType(boost::extents[itsNrStations][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS]);
-  itsFFTinData		= new itsFFTdataType(boost::extents[NR_TAPS - 1 + itsNrSamplesPerIntegration][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS]);
-#else
-  itsTmp		= new itsTmpType(boost::extents[4][itsNrSamplesPerIntegration]);
-  itsFFTinData		= new itsFFTdataType(boost::extents[itsNrSamplesPerIntegration][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS + 4]);
-  itsFFToutData		= new itsFFTdataType(boost::extents[2][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS]);
-#endif
-
-  itsSamples		= new itsSamplesType(boost::extents[NR_SUBBAND_CHANNELS][itsNrStations][itsNrSamplesPerIntegration | 2][NR_POLARIZATIONS]);
-  itsFlags		= new SparseSet<unsigned>[itsNrStations];
-  itsNrValidSamples	= new unsigned[itsNrBaselines];
-  itsCorrelationWeights = new float[itsNrSamplesPerIntegration + 1];
-  itsRFIflags		= new bitset<NR_SUBBAND_CHANNELS>[itsNrStations];
-  memset(itsRFIflags, 0, itsNrStations * sizeof(bitset<NR_SUBBAND_CHANNELS>));
-
-  itsCorrelationWeights[0] = 0.0;
-
-  for (unsigned i = 1; i <= itsNrSamplesPerIntegration; i ++) {
-#if INPUT_TYPE == I4COMPLEX_TYPE
-    itsCorrelationWeights[i] = 1.0 / i;
-#else
-    itsCorrelationWeights[i] = 1.0e-6 / i;
-#endif
-  }
-
-#if defined HAVE_BGL && !defined C_IMPLEMENTATION
-  mutex = rts_allocate_mutex();
-#endif
-
-  itsDelayCompensation = itsCS1PS->getBool("OLAP.delayCompensation");
-}
-
-
-void WH_BGL_Processing::computeFlags()
-{
-  computeFlagsTimer.start();
-
-#if NR_SUBBAND_CHANNELS == 1
-#error Not implementated
-#else
-  DH_Subband::FlagsType flags = get_DH_Subband()->getFlags();
-
-  for (unsigned stat = 0; stat < itsNrStations; stat ++) {
-    itsFlags[stat].reset();
-    const std::vector<SparseSet<unsigned>::range> &ranges = flags[stat].getRanges();
-
-    for (SparseSet<unsigned>::const_iterator it = ranges.begin(); it != ranges.end(); it ++) {
-      unsigned begin = std::max(0, (signed) it->begin / NR_SUBBAND_CHANNELS - NR_TAPS + 1);
-      unsigned end   = std::min(itsNrSamplesPerIntegration, (it->end - 1) / NR_SUBBAND_CHANNELS + 1);
-
-      itsFlags[stat].include(begin, end);
-    }
-  }
-#endif
-
-  for (unsigned stat2 = 0; stat2 < itsNrStations; stat2 ++) {
-    for (unsigned stat1 = 0; stat1 <= stat2; stat1 ++) {
-      itsNrValidSamples[DH_Visibilities::baseline(stat1, stat2)] = itsNrSamplesPerIntegration - (itsFlags[stat1] | itsFlags[stat2]).count();
-    }
-  }
-
-  computeFlagsTimer.stop();
-}
-
-
-#if defined C_IMPLEMENTATION
-
-fcomplex WH_BGL_Processing::phaseShift(unsigned time, unsigned chan, double baseFrequency, const DH_Subband::DelayIntervalType &delay) const
-{
-  double timeInterpolatedDelay = delay.delayAtBegin + ((double) time / itsNrSamplesPerIntegration) * (delay.delayAfterEnd - delay.delayAtBegin);
-  double frequency	       = baseFrequency + chan * itsChannelBandwidth;
-  double phaseShift	       = timeInterpolatedDelay * frequency;
-  double phi		       = -2 * M_PI * phaseShift;
-
-  return makefcomplex(std::cos(phi), std::sin(phi));
-}
-
-#else
-
-void WH_BGL_Processing::computePhaseShifts(struct phase_shift phaseShifts[/*itsNrSamplesPerIntegration*/], const DH_Subband::DelayIntervalType &delay, double baseFrequency) const
-{
-  double   phiBegin = -2 * M_PI * delay.delayAtBegin;
-  double   phiEnd   = -2 * M_PI * delay.delayAfterEnd;
-  double   deltaPhi = (phiEnd - phiBegin) / itsNrSamplesPerIntegration;
-  dcomplex v	    = cosisin(phiBegin * baseFrequency);
-  dcomplex dv       = cosisin(phiBegin * itsChannelBandwidth);
-  dcomplex vf       = cosisin(deltaPhi * baseFrequency);
-  dcomplex dvf      = cosisin(deltaPhi * itsChannelBandwidth);
-
-  for (unsigned time = 0; time < itsNrSamplesPerIntegration; time ++) {
-    phaseShifts[time].v0 =  v;  v *=  vf;
-    phaseShifts[time].dv = dv; dv *= dvf;
-  }
-}
-
-#endif
-
-
-void WH_BGL_Processing::doPPF(double baseFrequency)
-{
-  doPPFtimer.start();
-
-#if defined HAVE_BGL && !defined C_IMPLEMENTATION
-  _bgl_mutex_lock(mutex);
-#endif
-
-#if defined C_IMPLEMENTATION && defined WORDS_BIGENDIAN
-  get_DH_Subband()->swapBytes();
-#endif
-
-  DH_Subband::Samples4Dtype input  = get_DH_Subband()->getSamples4D();
-  DH_Subband::DelaysType    delays = get_DH_Subband()->getDelays();
-
-  for (unsigned stat = 0; stat < itsNrStations; stat ++) {
-#if 0
-    std::clog << setprecision(15) << "stat " << stat << ", basefreq " << baseFrequency << ": delay from " << delays[stat].delayAtBegin << " to " << delays[stat].delayAfterEnd << " sec" << std::endl;
-#endif
-
-#if defined C_IMPLEMENTATION
-    fcomplex fftOutData[NR_SUBBAND_CHANNELS];
-
-    FIRtimer.start();
-    for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol ++) {
-      for (unsigned chan = 0; chan < NR_SUBBAND_CHANNELS; chan ++) {
-	for (unsigned time = 0; time < NR_TAPS - 1 + itsNrSamplesPerIntegration; time ++) {
-	  fcomplex sample = makefcomplex(input[stat][time][chan][pol]);
-	  (*itsFFTinData)[time][pol][chan] = (*itsFIRs)[stat][pol][chan].processNextSample(sample, FIR::weights[chan]);
-	}
-      }
-    }
-    FIRtimer.stop();
-
-    FFTtimer.start();
-    for (unsigned time = 0; time < itsNrSamplesPerIntegration; time ++) {
-      for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol ++) {
-	if (itsFlags[stat].test(time)) {
-	  for (unsigned chan = 0; chan < NR_SUBBAND_CHANNELS; chan ++) {
-	    (*itsSamples)[chan][stat][time][pol] = makefcomplex(0, 0);
-	  }
-	} else {
-#if defined HAVE_FFTW3
-	  fftwf_execute_dft(itsFFTWPlan,
-			    (fftwf_complex *) (*itsFFTinData)[NR_TAPS - 1 + time][pol].origin(),
-			    (fftwf_complex *) (void *) fftOutData);
-#else
-	  fftw_one(itsFFTWPlan,
-		   (fftw_complex *) (*itsFFTinData)[NR_TAPS - 1 + time][pol].origin(),
-		   (fftw_complex *) (void *) fftOutData);
-#endif
-
-	  for (unsigned chan = 0; chan < NR_SUBBAND_CHANNELS; chan ++) {
-	    if (itsDelayCompensation) {
-	      fftOutData[chan] *= phaseShift(time, chan, baseFrequency, delays[stat]);
-	    }
-
-	    (*itsSamples)[chan][stat][time][pol] = fftOutData[chan];
-	  }
-	}
-      }
-    }
-    FFTtimer.stop();
-#else // assembly implementation
-    int transpose_stride = sizeof(fcomplex) * (NR_POLARIZATIONS * (itsNrSamplesPerIntegration | 2) * itsNrStations - (itsDelayCompensation ? 3 : 0));
-
-    for (unsigned chan = 0; chan < NR_SUBBAND_CHANNELS; chan += 4) {
-      for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol ++) {
-#if defined __GNUC__	// work around bug ???
-	for (register unsigned ch asm ("r28") = 0; ch < 4; ch ++) {
-#else
-	for (unsigned ch = 0; ch < 4; ch ++) {
-#endif
-	  FIRtimer.start();
-	  _filter(0, // itsFIRs[stat][pol][chan + ch].itsDelayLine,
-		  FIR::weights[chan + ch],
-		  &input[stat][0][chan + ch][pol],
-		  (*itsTmp)[ch].origin(),
-		  itsNrSamplesPerIntegration / NR_TAPS);
-	  FIRtimer.stop();
-	}
-
-	_transpose_4x8(&(*itsFFTinData)[0][pol][chan],
-		       itsTmp->origin(),
-		       itsNrSamplesPerIntegration,
-		       sizeof(fcomplex) * itsNrSamplesPerIntegration,
-		       sizeof(fcomplex) * NR_POLARIZATIONS * (NR_SUBBAND_CHANNELS + 4));
-      }
-    }
-
-    struct phase_shift phaseShifts[itsNrSamplesPerIntegration];
-
-    if (itsDelayCompensation) {
-      computePhaseShifts(phaseShifts, delays[stat], baseFrequency);
-    }
-
-    const std::vector<SparseSet<unsigned>::range> &ranges = itsFlags[stat].getRanges();
-    SparseSet<unsigned>::const_iterator it = ranges.begin();
-
-    for (unsigned time = 0; time < itsNrSamplesPerIntegration; time ++) {
-      bool good = it == ranges.end() || time < it->begin || (time == it->end && (++ it, true));
-
-      if (good) {
-	FFTtimer.start();
-#if 0
-	_prefetch((*itsFFTinData)[time].origin(),
-		  sizeof(fcomplex[NR_POLARIZATIONS][NR_SUBBAND_CHANNELS]) / CACHE_LINE_SIZE,
-		  CACHE_LINE_SIZE);
-#endif
-
-	for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol ++) {
-#if 0
-	  fftw_one(itsFFTWPlan,
-		   (fftw_complex *) (*itsFFTinData)[time][pol].origin(),
-		   (fftw_complex *) (*itsFFToutData)[time & 1][pol].origin());
-#else
-	  _fft256((*itsFFTinData)[time][pol].origin(),
-		  (*itsFFToutData)[time & 1][pol].origin());
-#endif
-	}
-	FFTtimer.stop();
-      } else {
-	  _memzero((*itsFFToutData)[time & 1].origin(),
-		   (*itsFFToutData)[time & 1].num_elements() * sizeof(fcomplex));
-      }
-
-      if (time & 1) {
-	if (itsDelayCompensation) {
-	  _phase_shift_and_transpose(&(*itsSamples)[0][stat][time - 1][0],
-				     itsFFToutData->origin(),
-				     &phaseShifts[time - 1],
-				     transpose_stride);
-	} else {
-	  _transpose_4x8(&(*itsSamples)[0][stat][time - 1][0],
-			 itsFFToutData->origin(),
-			 NR_SUBBAND_CHANNELS,
-			 sizeof(fcomplex) * NR_SUBBAND_CHANNELS,
-			 transpose_stride);
-	}
-      }
-    }
-#endif // C_IMPLEMENTATION
-  }
-
-#if defined HAVE_BGL && !defined C_IMPLEMENTATION
-  _bgl_mutex_unlock(mutex);
-#endif
-
-  doPPFtimer.stop();
-}
-
-
-void WH_BGL_Processing::bypassPPF()
-{
-  DH_Subband *input = get_DH_Subband();
-
-  for (unsigned stat = 0; stat < itsNrStations; stat ++) {
-    for (unsigned time = 0; time < itsNrSamplesPerIntegration; time ++) {
-      for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol ++) {
-	(*itsSamples)[0][stat][time][pol] = makefcomplex(input->getSample(stat, time, pol));
-      }
-    }
-  }
-}
-
-
-void WH_BGL_Processing::doCorrelate()
-{
-  doCorrelateTimer.start();
-
-  DH_Visibilities::VisibilitiesType	 visibilities	= get_DH_Visibilities()->getVisibilities();
-  DH_Visibilities::AllNrValidSamplesType nrValidSamples = get_DH_Visibilities()->getNrValidSamples();
-
-#if defined C_IMPLEMENTATION
-  for (unsigned ch = 0; ch < NR_SUBBAND_CHANNELS; ch ++) {
-    for (unsigned stat2 = 0; stat2 < itsNrStations; stat2 ++) {
-      for (unsigned stat1 = 0; stat1 <= stat2; stat1 ++) { 
-	unsigned bl = DH_Visibilities::baseline(stat1, stat2), nrValid = 0;
-
-	if (ch > 0 && !itsRFIflags[stat1][ch] && !itsRFIflags[stat2][ch]) {
-	  for (unsigned pol1 = 0; pol1 < NR_POLARIZATIONS; pol1 ++) {
-	    for (unsigned pol2 = 0; pol2 < NR_POLARIZATIONS; pol2 ++) {
-	      dcomplex sum = makedcomplex(0, 0);
-	      for (unsigned time = 0; time < itsNrSamplesPerIntegration; time ++) {
-		sum += (*itsSamples)[ch][stat1][time][pol1] * conj((*itsSamples)[ch][stat2][time][pol2]);
-	      }
-	      sum *= itsCorrelationWeights[itsNrValidSamples[bl]];
-	      visibilities[bl][ch][pol1][pol2] = sum;
-	    }
-	  }
-	  nrValid = itsNrValidSamples[bl];
-	}
-    
-	if (nrValid == 0) {
-	  for (unsigned pol1 = 0; pol1 < NR_POLARIZATIONS; pol1 ++) {
-	    for (unsigned pol2 = 0; pol2 < NR_POLARIZATIONS; pol2 ++) {
-	      visibilities[bl][ch][pol1][pol2] = makefcomplex(0, 0);
-	    }
-	  }
-	}
-
-	nrValidSamples[bl][ch] = nrValid;
-      }
-    }
-  }
-#else
-  // Blue Gene/L assembler version. 
-
-  for (unsigned bl = 0; bl < itsNrBaselines; bl ++) {
-    nrValidSamples[bl][0] = 0; // channel 0 does not contain valid data
-
-    for (unsigned ch = 1; ch < NR_SUBBAND_CHANNELS; ch ++) {
-      nrValidSamples[bl][ch] = itsNrValidSamples[bl];
-    }
-  }
-
-  for (unsigned ch = 1; ch < NR_SUBBAND_CHANNELS; ch ++) {
-    // build a map of valid stations
-    unsigned nrValidStations = 0, map[itsNrStations];
-
-    for (unsigned stat2 = 0; stat2 < itsNrStations; stat2 ++) {
-      if (!itsRFIflags[stat2][ch]) {
-	map[nrValidStations ++] = stat2;
-      } else { // clear correlations that involve invalided stations
-	for (unsigned stat1 = 0; stat1 < itsNrStations; stat1 ++) {
-	  unsigned bl = stat1 < stat2 ? DH_Visibilities::baseline(stat1, stat2) :
-	    DH_Visibilities::baseline(stat2, stat1);
-	  //_clear_correlation(&visibilities[bl][ch]);
-	  nrValidSamples[bl][ch] = 0;
-	}
-      }
-    }
-
-    if (nrValidStations == 0) {
-      break;
-    }
-
-    // Divide the correlation matrix into blocks of 3x2, 2x2, 3+2, 2+1, and 1x1.
-
-    // do the first (auto)correlation(s) (these are the "left"most 1 or 3
-    // squares in the corner of the triangle)
-    if (nrValidStations % 2 == 0) {
-      unsigned stat10 = map[0], stat11 = map[1];
-
-      _auto_correlate_2((*itsSamples)[ch][stat10].origin(),
-			(*itsSamples)[ch][stat11].origin(),
-			visibilities[DH_Visibilities::baseline(stat10, stat10)][ch].origin(),
-			visibilities[DH_Visibilities::baseline(stat10, stat11)][ch].origin(),
-			visibilities[DH_Visibilities::baseline(stat11, stat11)][ch].origin(),
-			itsNrSamplesPerIntegration);
-    } else {
-      unsigned stat10 = map[0];
-
-      _auto_correlate_1((*itsSamples)[ch][stat10].origin(),
-			visibilities[DH_Visibilities::baseline(stat10, stat10)][ch].origin(),
-			itsNrSamplesPerIntegration);
-    }
-
-    for (unsigned stat2 = nrValidStations % 2 ? 1 : 2; stat2 < nrValidStations; stat2 += 2) {
-      unsigned stat1 = 0;
-
-      // do as many 3x2 blocks as possible
-      for (; stat1 + 3 <= stat2; stat1 += 3) { 
-	unsigned stat10 = map[stat1], stat11 = map[stat1+1], stat12 = map[stat1+2];
-	unsigned stat20 = map[stat2], stat21 = map[stat2+1];
-
-	_correlate_3x2((*itsSamples)[ch][stat10].origin(),
-		       (*itsSamples)[ch][stat11].origin(),
-		       (*itsSamples)[ch][stat12].origin(),
-		       (*itsSamples)[ch][stat20].origin(),
-		       (*itsSamples)[ch][stat21].origin(),
-		       visibilities[DH_Visibilities::baseline(stat10, stat20)][ch].origin(),
-		       visibilities[DH_Visibilities::baseline(stat10, stat21)][ch].origin(),
-		       visibilities[DH_Visibilities::baseline(stat11, stat20)][ch].origin(),
-		       visibilities[DH_Visibilities::baseline(stat11, stat21)][ch].origin(),
-		       visibilities[DH_Visibilities::baseline(stat12, stat20)][ch].origin(),
-		       visibilities[DH_Visibilities::baseline(stat12, stat21)][ch].origin(),
-		       itsNrSamplesPerIntegration);
-      }
-
-      // see if a 2x2 block is necessary
-      if (stat1 + 2 <= stat2) {
-	unsigned stat10 = map[stat1], stat11 = map[stat1+1];
-	unsigned stat20 = map[stat2], stat21 = map[stat2+1];
-
-	_correlate_2x2((*itsSamples)[ch][stat10].origin(),
-		       (*itsSamples)[ch][stat11].origin(),
-		       (*itsSamples)[ch][stat20].origin(),
-		       (*itsSamples)[ch][stat21].origin(),
-		       visibilities[DH_Visibilities::baseline(stat10, stat20)][ch].origin(),
-		       visibilities[DH_Visibilities::baseline(stat10, stat21)][ch].origin(),
-		       visibilities[DH_Visibilities::baseline(stat11, stat20)][ch].origin(),
-		       visibilities[DH_Visibilities::baseline(stat11, stat21)][ch].origin(),
-		       itsNrSamplesPerIntegration);
-	stat1 += 2;
-      }
-
-      // do the remaining (auto)correlations near the diagonal
-      if (stat1 == stat2) {
-	unsigned stat10 = map[stat1], stat11 = map[stat1+1];
-
-	_auto_correlate_2((*itsSamples)[ch][stat10].origin(),
-			  (*itsSamples)[ch][stat11].origin(),
-			  visibilities[DH_Visibilities::baseline(stat10,stat10)][ch].origin(),
-			  visibilities[DH_Visibilities::baseline(stat10,stat11)][ch].origin(),
-			  visibilities[DH_Visibilities::baseline(stat11,stat11)][ch].origin(),
-			  itsNrSamplesPerIntegration);
-      } else {
-	unsigned stat10 = map[stat1], stat11 = map[stat1+1], stat12 = map[stat1+2];
-
-	_auto_correlate_3((*itsSamples)[ch][stat10].origin(),
-			  (*itsSamples)[ch][stat11].origin(),
-			  (*itsSamples)[ch][stat12].origin(),
-			  visibilities[DH_Visibilities::baseline(stat10,stat11)][ch].origin(),
-			  visibilities[DH_Visibilities::baseline(stat10,stat12)][ch].origin(),
-			  visibilities[DH_Visibilities::baseline(stat11,stat11)][ch].origin(),
-			  visibilities[DH_Visibilities::baseline(stat11,stat12)][ch].origin(),
-			  visibilities[DH_Visibilities::baseline(stat12,stat12)][ch].origin(),
-			  itsNrSamplesPerIntegration);
-      }
-    }
-  }
-
-  weightTimer.start();
-#if 0
-  for (unsigned bl = 0; bl < itsNrBaselines; bl ++) {
-    for (unsigned ch = 0; ch < NR_SUBBAND_CHANNELS; ch ++) {
-      for (unsigned pol1 = 0; pol1 < NR_POLARIZATIONS; pol1 ++) {
-	for (unsigned pol2 = 0; pol2 < NR_POLARIZATIONS; pol2 ++) {
-	  visibilities[bl][ch][pol1][pol2] *= itsCorrelationWeights[(*nrValidSamples)[bl][ch]];
-	}
-      }
-    }
-  }
-#else
-  _weigh_visibilities(visibilities.origin(), nrValidSamples.origin(), itsCorrelationWeights, itsNrBaselines * NR_SUBBAND_CHANNELS);
-#endif
-  weightTimer.stop();
-#endif  
-
-#if 0
-static unsigned count;
-nrValidSamples[0][0] = itsCurrentSubband;
-nrValidSamples[0][1] = TH_MPI::getCurrentRank();
-nrValidSamples[0][2] = count ++;
-#endif
-  doCorrelateTimer.stop();
-}
-
-
-void WH_BGL_Processing::process()
-{
-  NSTimer totalTimer("total", true);
-  totalTimer.start();
-
-#if defined HAVE_MPI
-  std::clog << setprecision(15) << "core " << TH_MPI::getCurrentRank() << ": start reading at " << MPI_Wtime() << '\n';
-#endif
-
-  static NSTimer readTimer("receive timer", true);
-  readTimer.start();
-  getDataManager().readyWithInHolder(SUBBAND_CHANNEL);
-
-  if (itsInputConnected)
-    get_DH_Subband()->getExtraData();
-
-  readTimer.stop();
-
-#if defined HAVE_MPI
-  std::clog << "core " << TH_MPI::getCurrentRank() << ": start processing at " << MPI_Wtime() << '\n';
-#endif
-
-  computeTimer.start();
-
-  computeFlags();
-
-#if NR_SUBBAND_CHANNELS > 1
-  doPPF(itsCenterFrequencies[itsCurrentSubband] - (NR_SUBBAND_CHANNELS / 2) * itsChannelBandwidth);
-#else
-  bypassPPF();
-#endif
-
-  doCorrelate();
-
-  if ((itsCurrentSubband += itsSubbandIncrement) >= itsLastSubband)
-    itsCurrentSubband -= itsLastSubband - itsFirstSubband;
-
-  computeTimer.stop();
-
-#if defined HAVE_MPI
-  std::clog << "core " << TH_MPI::getCurrentRank() << ": start writing at " << MPI_Wtime() << '\n';
-#endif
-
-  static NSTimer writeTimer("send timer", true);
-  writeTimer.start();
-  getDataManager().readyWithOutHolder(VISIBILITIES_CHANNEL);
-  writeTimer.stop();
-
-#if defined HAVE_MPI
-  std::clog << "core " << TH_MPI::getCurrentRank() << ": start idling at " << MPI_Wtime() << '\n';
-#endif
-
-#if 0
-  static unsigned count = 0;
-
-  if (TH_MPI::getCurrentRank() == 5 && ++ count == 9)
-    for (double time = MPI_Wtime() + 4.0; MPI_Wtime() < time;)
-      ;
-#endif
-
-  totalTimer.stop();
-}
-
-
-void WH_BGL_Processing::postprocess()
-{
-#if defined C_IMPLEMENTATION
-  delete itsFIRs;
-#else
-  delete itsTmp;
-  delete itsFFToutData;
-#endif
-  delete itsFFTinData;
-  delete itsSamples;
-
-  delete [] itsFlags;
-  delete [] itsNrValidSamples;
-  delete [] itsCorrelationWeights;
-  delete [] itsRFIflags;
-
-#if defined HAVE_FFTW3
-  fftwf_destroy_plan(itsFFTWPlan);
-#elif defined HAVE_FFTW2
-  fftw_destroy_plan(itsFFTWPlan);
-#endif
-}
-
-
-void WH_BGL_Processing::dump() const
-{
-}
-
 } // namespace CS1
 } // namespace LOFAR
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/FIR.h b/Appl/CEP/CS1/CS1_BGLProc/src/FIR.h
index 6701f1ae976077290a4365f489c8644a9ecde834..278e72d7cbc9985c2a6587827c65c1f96f97e1e6 100644
--- a/Appl/CEP/CS1/CS1_BGLProc/src/FIR.h
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/FIR.h
@@ -1,78 +1,32 @@
-//#  FIR.h: header files for BGL assembly
-//#
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
 #ifndef LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_FIR_H
 #define LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_FIR_H
 
-#if defined HAVE_BGL
-#include <CS1_Interface/bitset.h>
-#include <CS1_Interface/CS1_Config.h>
-#include <CS1_Interface/DH_Subband.h>
-
-#include <rts.h>
+#if 0 || !defined HAVE_BGL
+#define FIR_C_IMPLEMENTATION
+#endif
 
-namespace LOFAR
-{
-namespace CS1
-{
-
-struct phase_shift {
-  dcomplex v0, dv;
-};
+#include <Common/lofar_complex.h>
 
+#include <CS1_Interface/CS1_Config.h>
 
-extern "C" {
-  void _filter(fcomplex delayLine[NR_TAPS],
-	       const float weights[NR_TAPS],
-	       const DH_Subband::SampleType samples[],
-	       fcomplex out[],
-	       int nr_samples_div_16);
 
-  void _transpose_4x8(fcomplex *out,
-		      const fcomplex *in,
-		      int length,
-		      int input_stride,
-		      int output_stride);
+namespace LOFAR {
+namespace CS1 {
 
-  void _phase_shift_and_transpose(fcomplex *out,
-				  const fcomplex *in,
-				  const struct phase_shift *,
-				  int stride);
+class FIR {
+  public:
+#if defined FIR_C_IMPLEMENTATION
+    FIR();
 
-  void _fast_memcpy(void *dst, const void *src, size_t bytes);
-  void _memzero(void *dst, size_t bytes); // bytes must be multiple of 128
-  void _prefetch(const void *src, size_t count, size_t stride);
+    fcomplex processNextSample(fcomplex sample, const float weights[NR_TAPS]);
 
-  extern struct {
-    unsigned input_type;
-    unsigned nr_subband_channels;
-    unsigned nr_taps;
-    unsigned nr_polarizations;
-  } _FIR_constants_used;
+    fcomplex itsDelayLine[NR_TAPS];
+#endif
 
-  void _bgl_mutex_lock(BGL_Mutex *), _bgl_mutex_unlock(BGL_Mutex *);
-  unsigned long long _rdtsc();
+    static const float weights[NR_SUBBAND_CHANNELS][NR_TAPS];
 };
 
 } // namespace CS1
 } // namespace LOFAR
 
 #endif
-#endif
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/FIR.S b/Appl/CEP/CS1/CS1_BGLProc/src/FIR_Asm.S
similarity index 100%
rename from Appl/CEP/CS1/CS1_BGLProc/src/FIR.S
rename to Appl/CEP/CS1/CS1_BGLProc/src/FIR_Asm.S
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/FIR_Asm.h b/Appl/CEP/CS1/CS1_BGLProc/src/FIR_Asm.h
new file mode 100644
index 0000000000000000000000000000000000000000..709cc3d6c9953999eb5cced075b6dffb81ed643d
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/FIR_Asm.h
@@ -0,0 +1,76 @@
+//#  FIR.h: header files for BGL assembly
+//#
+//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
+//#
+//#  This program is free software; you can redistribute it and/or modify
+//#  it under the terms of the GNU General Public License as published by
+//#  the Free Software Foundation; either version 2 of the License, or
+//#  (at your option) any later version.
+//#
+//#  This program is distributed in the hope that it will be useful,
+//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//#  GNU General Public License for more details.
+//#
+//#  You should have received a copy of the GNU General Public License
+//#  along with this program; if not, write to the Free Software
+//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//#
+//#  $Id$
+
+#ifndef LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_FIR_ASM_H
+#define LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_FIR_ASM_H
+
+#if defined HAVE_BGL
+#include <CS1_Interface/CS1_Config.h>
+
+#include <rts.h>
+
+namespace LOFAR
+{
+namespace CS1
+{
+
+struct phase_shift {
+  dcomplex v0, dv;
+};
+
+
+extern "C" {
+  void _filter(fcomplex delayLine[NR_TAPS],
+	       const float weights[NR_TAPS],
+	       const INPUT_SAMPLE_TYPE samples[],
+	       fcomplex out[],
+	       int nr_samples_div_16);
+
+  void _transpose_4x8(fcomplex *out,
+		      const fcomplex *in,
+		      int length,
+		      int input_stride,
+		      int output_stride);
+
+  void _phase_shift_and_transpose(fcomplex *out,
+				  const fcomplex *in,
+				  const struct phase_shift *,
+				  int stride);
+
+  void _fast_memcpy(void *dst, const void *src, size_t bytes);
+  void _memzero(void *dst, size_t bytes); // bytes must be multiple of 128
+  void _prefetch(const void *src, size_t count, size_t stride);
+
+  extern struct {
+    unsigned input_type;
+    unsigned nr_subband_channels;
+    unsigned nr_taps;
+    unsigned nr_polarizations;
+  } _FIR_constants_used;
+
+  void _bgl_mutex_lock(BGL_Mutex *), _bgl_mutex_unlock(BGL_Mutex *);
+  unsigned long long _rdtsc();
+};
+
+} // namespace CS1
+} // namespace LOFAR
+
+#endif
+#endif
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/FilteredData.h b/Appl/CEP/CS1/CS1_BGLProc/src/FilteredData.h
new file mode 100644
index 0000000000000000000000000000000000000000..961ac56f9b038cf44f3f5756f2e25b91989a9206
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/FilteredData.h
@@ -0,0 +1,60 @@
+#ifndef LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_FILTERED_DATA_H
+#define LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_FILTERED_DATA_H
+
+#include <Common/lofar_complex.h>
+#include <CS1_Interface/CS1_Config.h>
+#include <CS1_Interface/SparseSet.h>
+
+#include <Allocator.h>
+
+#include <boost/multi_array.hpp>
+
+
+namespace LOFAR {
+namespace CS1 {
+
+class FilteredData
+{
+  public:
+    FilteredData(const Heap &, unsigned nrStations, unsigned nrSamplesPerIntegration);
+    ~FilteredData();
+
+    static size_t requiredSize(unsigned nrStations, unsigned nrSamplesPerIntegration);
+
+  private:
+    Overlay overlay;
+
+  public:
+    // The "| 2" significantly improves transpose speeds for particular
+    // numbers of stations due to cache conflict effects.  The extra memory
+    // is not used.
+    boost::multi_array_ref<fcomplex, 4> samples; //[NR_SUBBAND_CHANNELS][itsNrStations][itsNrSamplesPerIntegration | 2][NR_POLARIZATIONS] CACHE_ALIGNED
+    SparseSet<unsigned>			*flags; //[itsNrStations]
+};
+
+
+inline size_t FilteredData::requiredSize(unsigned nrStations, unsigned nrSamplesPerIntegration)
+{
+  return sizeof(fcomplex) * NR_SUBBAND_CHANNELS * nrStations * (nrSamplesPerIntegration | 2) * NR_POLARIZATIONS;
+}
+
+
+inline FilteredData::FilteredData(const Heap &heap, unsigned nrStations, unsigned nrSamplesPerIntegration)
+:
+  overlay(heap),
+  samples(static_cast<fcomplex *>(overlay.allocate(requiredSize(nrStations, nrSamplesPerIntegration), 32)), boost::extents[NR_SUBBAND_CHANNELS][nrStations][nrSamplesPerIntegration | 2][NR_POLARIZATIONS]),
+  flags(new SparseSet<unsigned>[nrStations])
+{
+}
+
+
+inline FilteredData::~FilteredData()
+{
+  overlay.deallocate(samples.origin());
+  delete [] flags;
+}
+
+} // namespace CS1
+} // namespace LOFAR
+
+#endif
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/InputData.h b/Appl/CEP/CS1/CS1_BGLProc/src/InputData.h
new file mode 100644
index 0000000000000000000000000000000000000000..64dc2946400bcc3c14570e521e63d6bbf863955a
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/InputData.h
@@ -0,0 +1,78 @@
+#ifndef LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_INPUT_DATA_H
+#define LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_INPUT_DATA_H
+
+#include <Common/lofar_complex.h>
+#include <Common/DataConvert.h>
+#include <CS1_Interface/CS1_Config.h>
+#include <CS1_Interface/ION_to_CN.h>
+#include <Transport/TH_Null.h>
+
+#include <Allocator.h>
+#include <TH_ZoidClient.h>
+
+#include <boost/multi_array.hpp>
+
+
+namespace LOFAR {
+namespace CS1 {
+
+class InputData
+{
+  public:
+    InputData(const Heap &, unsigned nrSubbands, unsigned nrSamplesToBGLProc);
+    ~InputData();
+
+    void read(TransportHolder *th);
+
+    static size_t requiredSize(unsigned nrSubbands, unsigned nrSamplesToBGLProc);
+
+    typedef INPUT_SAMPLE_TYPE		  SampleType;
+
+  private:
+    Overlay				  overlay;
+
+  public:
+    boost::multi_array_ref<SampleType, 3> samples; //[outputPsets.size()][itsCS1PS->nrSamplesToBGLProc()][NR_POLARIZATIONS]
+
+    ION_to_CN				  metaData;
+};
+
+
+inline size_t InputData::requiredSize(unsigned nrSubbands, unsigned nrSamplesToBGLProc)
+{
+  return sizeof(SampleType) * nrSubbands * nrSamplesToBGLProc * NR_POLARIZATIONS;
+}
+
+
+inline InputData::InputData(const Heap &heap, unsigned nrSubbands, unsigned nrSamplesToBGLProc)
+:
+  overlay(heap),
+  samples(static_cast<SampleType *>(overlay.allocate(requiredSize(nrSubbands, nrSamplesToBGLProc), 32)), boost::extents[nrSubbands][nrSamplesToBGLProc][NR_POLARIZATIONS])
+{
+}
+
+
+inline InputData::~InputData()
+{
+  overlay.deallocate(samples.origin());
+}
+
+
+inline void InputData::read(TransportHolder *th)
+{
+  metaData.read(th);
+
+  // now read all subbands using one recvBlocking call, even though the ION
+  // sends all subbands one at a time
+  th->recvBlocking(samples.origin(), samples.num_elements() * sizeof(SampleType), 0, 0, 0);
+
+#if defined C_IMPLEMENTATION && defined WORDS_BIGENDIAN
+  dataConvert(LittleEndian, samples.origin(), samples.num_elements());
+#endif
+}
+
+
+} // namespace CS1
+} // namespace LOFAR
+
+#endif
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/Makefile.am b/Appl/CEP/CS1/CS1_BGLProc/src/Makefile.am
index 769656b5c91f474dc214d2848e46c9f69ea22b01..17033955a6b3fdac87334f84f3f683b057b27419 100644
--- a/Appl/CEP/CS1/CS1_BGLProc/src/Makefile.am
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/Makefile.am
@@ -1,9 +1,17 @@
 INSTHDRS         	=\
-Correlator.h		\
-FFT.h			\
+Allocator.h		\
+CorrelatorAsm.h		\
+FFT_Asm.h		\
+FIR_Asm.h		\
+InputData.h		\
+FilteredData.h		\
+TransposedData.h	\
+CorrelatedData.h	\
 FIR.h			\
-AH_BGL_Processing.h	\
-WH_BGL_Processing.h	\
+PPF.h			\
+Transpose.h		\
+Correlator.h		\
+BGL_Processing.h	\
 TH_ZoidClient.h
 
 NOINSTHDRS 		=
@@ -24,12 +32,16 @@ CCAS			= $(CC)
 CCASFLAGS		= $(patsubst -q%,,$(CPPFLAGS)) $(EXTRA_CPPFLAGS)
 
 CS1_BGL_Processing_SOURCES	= $(DOCHDRS) \
-Correlator.S			\
-FIR.S				\
-FFT.S				\
+Allocator.cc			\
+CorrelatorAsm.S			\
+FIR_Asm.S			\
+FFT_Asm.S			\
+FIR.cc				\
+Transpose.cc			\
+PPF.cc				\
+Correlator.cc			\
 CS1_BGL_Processing_main.cc 	\
-AH_BGL_Processing.cc		\
-WH_BGL_Processing.cc		\
+BGL_Processing.cc		\
 TH_ZoidClient.cc
 
 configfilesdir=$(bindir)
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/PPF.cc b/Appl/CEP/CS1/CS1_BGLProc/src/PPF.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4461c74651aa8f96f3229b3ebb3f6869e5512ada
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/PPF.cc
@@ -0,0 +1,336 @@
+//# Always #include <lofar_config.h> first!
+#include <lofar_config.h>
+
+//# Includes
+#include <PPF.h>
+#include <FFT_Asm.h>
+#include <FIR_Asm.h>
+
+#include <Common/Timer.h>
+
+#include <complex>
+#include <cmath>
+
+
+#if defined HAVE_MASS
+#include <mass.h>
+#endif
+
+
+namespace LOFAR {
+namespace CS1 {
+
+#if !defined HAVE_MASS
+
+inline static dcomplex cosisin(double x)
+{
+  return makedcomplex(cos(x), sin(x));
+}
+
+#endif
+
+
+static NSTimer computeFlagsTimer("PPF::computeFlags()", true);
+static NSTimer FIRtimer("PPF::FIRtimer", true);
+static NSTimer FFTtimer("PPF::FFT", true);
+static NSTimer PPFtimer("PPF::filter()", true);
+
+
+PPF::PPF(unsigned nrStations, unsigned nrSamplesPerIntegration, double channelBandwidth, bool delayCompensation)
+:
+  itsNrStations(nrStations),
+  itsNrSamplesPerIntegration(nrSamplesPerIntegration),
+  itsChannelBandwidth(channelBandwidth),
+  itsDelayCompensation(delayCompensation),
+
+#if defined PPF_C_IMPLEMENTATION
+  itsFIRs(boost::extents[nrStations][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS]),
+  itsFFTinData(boost::extents[NR_TAPS - 1 + nrSamplesPerIntegration][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS])
+#else
+  itsTmp(boost::extents[4][nrSamplesPerIntegration]),
+  itsFFTinData(boost::extents[nrSamplesPerIntegration][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS + 4]),
+  itsFFToutData(boost::extents[2][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS])
+#endif
+
+#if defined HAVE_BGL
+, mutex(rts_allocate_mutex())
+#endif
+{
+  init_fft();
+}
+
+
+PPF::~PPF()
+{
+  destroy_fft();
+}
+
+
+#if 0 && defined HAVE_BGL
+
+static void FFTtest()
+{
+  fftw_plan plan = fftw_create_plan(256, FFTW_FORWARD, FFTW_ESTIMATE);
+
+  fcomplex in[256], fout[256], sout[256];
+
+  for (unsigned i = 0; i < 256; i ++)
+    in[i] = makefcomplex(2 * i, 2 * i + 1);
+
+  fftw_one(plan, (fftw_complex *) in, (fftw_complex *) fout);
+
+  _fft256(in, sout);
+
+  for (unsigned i = 0; i < 256; i ++) {
+    fcomplex diff = fout[i] / sout[i];
+    std::cout << i << " (" << real(fout[i]) << ',' << imag(fout[i]) << ") / (" << real(sout[i]) << ',' << imag(sout[i]) << ") = (" << real(diff) << ',' << imag(diff) << ")\n";
+  }
+
+  //std::exit(0);
+}
+
+#endif
+
+
+void PPF::init_fft()
+{
+#if defined HAVE_FFTW3
+  fftwf_complex cbuf1[NR_SUBBAND_CHANNELS], cbuf2[NR_SUBBAND_CHANNELS];
+  itsFFTWPlan = fftwf_plan_dft_1d(NR_SUBBAND_CHANNELS, cbuf1, cbuf2, FFTW_FORWARD, FFTW_ESTIMATE);
+#elif defined HAVE_FFTW2
+#if defined HAVE_BGL && NR_SUBBAND_CHANNELS == 256
+  fftw_import_wisdom_from_string("(FFTW-2.1.5 (256 529 -1 0 1 1 1 352 0) (128 529 -1 0 1 1 0 2817 0) (64 529 -1 0 1 1 0 1409 0) (32 529 -1 0 1 1 0 705 0) (16 529 -1 0 1 1 0 353 0) (8 529 -1 0 1 1 0 177 0) (4 529 -1 0 1 1 0 89 0) (2 529 -1 0 1 1 0 45 0))");
+  itsFFTWPlan = fftw_create_plan(NR_SUBBAND_CHANNELS, FFTW_FORWARD, FFTW_USE_WISDOM);
+#else
+  itsFFTWPlan = fftw_create_plan(NR_SUBBAND_CHANNELS, FFTW_FORWARD, FFTW_ESTIMATE);
+#endif
+#endif
+
+  //FFTtest();
+}
+
+
+void PPF::destroy_fft()
+{
+#if defined HAVE_FFTW3
+  fftwf_destroy_plan(itsFFTWPlan);
+#elif defined HAVE_FFTW2
+  fftw_destroy_plan(itsFFTWPlan);
+#endif
+}
+
+
+void PPF::computeFlags(const TransposedData *transposedData, FilteredData *filteredData)
+{
+  computeFlagsTimer.start();
+
+#if NR_SUBBAND_CHANNELS == 1
+#error Not implementated
+#else
+  for (unsigned stat = 0; stat < itsNrStations; stat ++) {
+    filteredData->flags[stat].reset();
+    const std::vector<SparseSet<unsigned>::range> &ranges = transposedData->flags[stat].getRanges();
+
+    for (SparseSet<unsigned>::const_iterator it = ranges.begin(); it != ranges.end(); it ++) {
+      unsigned begin = std::max(0, (signed) it->begin / NR_SUBBAND_CHANNELS - NR_TAPS + 1);
+      unsigned end   = std::min(itsNrSamplesPerIntegration, (it->end - 1) / NR_SUBBAND_CHANNELS + 1);
+
+      filteredData->flags[stat].include(begin, end);
+    }
+  }
+#endif
+
+  computeFlagsTimer.stop();
+}
+
+
+#if defined PPF_C_IMPLEMENTATION
+
+fcomplex PPF::phaseShift(unsigned time, unsigned chan, double baseFrequency, const TransposedData::DelayIntervalType &delay) const
+{
+  double timeInterpolatedDelay = delay.delayAtBegin + ((double) time / itsNrSamplesPerIntegration) * (delay.delayAfterEnd - delay.delayAtBegin);
+  double frequency	       = baseFrequency + chan * itsChannelBandwidth;
+  double phaseShift	       = timeInterpolatedDelay * frequency;
+  double phi		       = -2 * M_PI * phaseShift;
+
+  return makefcomplex(std::cos(phi), std::sin(phi));
+}
+
+#else
+
+void PPF::computePhaseShifts(struct phase_shift phaseShifts[/*itsNrSamplesPerIntegration*/], const TransposedData::DelayIntervalType &delay, double baseFrequency) const
+{
+  double   phiBegin = -2 * M_PI * delay.delayAtBegin;
+  double   phiEnd   = -2 * M_PI * delay.delayAfterEnd;
+  double   deltaPhi = (phiEnd - phiBegin) / itsNrSamplesPerIntegration;
+  dcomplex v	    = cosisin(phiBegin * baseFrequency);
+  dcomplex dv       = cosisin(phiBegin * itsChannelBandwidth);
+  dcomplex vf       = cosisin(deltaPhi * baseFrequency);
+  dcomplex dvf      = cosisin(deltaPhi * itsChannelBandwidth);
+
+  for (unsigned time = 0; time < itsNrSamplesPerIntegration; time ++) {
+    phaseShifts[time].v0 =  v;  v *=  vf;
+    phaseShifts[time].dv = dv; dv *= dvf;
+  }
+}
+
+#endif
+
+
+void PPF::filter(double centerFrequency, const TransposedData *transposedData, FilteredData *filteredData)
+{
+  PPFtimer.start();
+
+  double baseFrequency = centerFrequency - (NR_SUBBAND_CHANNELS / 2) * itsChannelBandwidth;
+
+#if defined HAVE_BGL && !defined PPF_C_IMPLEMENTATION
+  // PPF puts a lot of pressure on the memory bus.  Avoid that both cores
+  // run simultaneously, since it slows them both.
+  _bgl_mutex_lock(mutex);
+#endif
+
+  for (unsigned stat = 0; stat < itsNrStations; stat ++) {
+    unsigned alignmentShift = transposedData->alignmentShifts[stat];
+
+#if 0
+    std::clog << setprecision(15) << "stat " << stat << ", basefreq " << baseFrequency << ": delay from " << delays[stat].delayAtBegin << " to " << delays[stat].delayAfterEnd << " sec" << std::endl;
+#endif
+
+#if defined PPF_C_IMPLEMENTATION
+    fcomplex fftOutData[NR_SUBBAND_CHANNELS];
+
+    FIRtimer.start();
+    for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol ++) {
+      for (unsigned chan = 0; chan < NR_SUBBAND_CHANNELS; chan ++) {
+	for (unsigned time = 0; time < NR_TAPS - 1 + itsNrSamplesPerIntegration; time ++) {
+#if 0
+	  fcomplex sample = makefcomplex(transposedData->samples[stat][NR_SUBBAND_CHANNELS * time + chan + alignmentShift][pol]);
+#else
+	  i16complex tmp = transposedData->samples[stat][NR_SUBBAND_CHANNELS * time + chan + alignmentShift][pol];
+	  fcomplex sample = makefcomplex(real(tmp), imag(tmp));
+#endif
+	  itsFFTinData[time][pol][chan] = itsFIRs[stat][pol][chan].processNextSample(sample, FIR::weights[chan]);
+	}
+      }
+    }
+    FIRtimer.stop();
+
+    FFTtimer.start();
+    for (unsigned time = 0; time < itsNrSamplesPerIntegration; time ++) {
+      for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol ++) {
+	if (filteredData->flags[stat].test(time)) {
+	  for (unsigned chan = 0; chan < NR_SUBBAND_CHANNELS; chan ++) {
+	    filteredData->samples[chan][stat][time][pol] = makefcomplex(0, 0);
+	  }
+	} else {
+#if defined HAVE_FFTW3
+	  fftwf_execute_dft(itsFFTWPlan,
+			    (fftwf_complex *) itsFFTinData[NR_TAPS - 1 + time][pol].origin(),
+			    (fftwf_complex *) (void *) fftOutData);
+#else
+	  fftw_one(itsFFTWPlan,
+		   (fftw_complex *) itsFFTinData[NR_TAPS - 1 + time][pol].origin(),
+		   (fftw_complex *) (void *) fftOutData);
+#endif
+
+	  for (unsigned chan = 0; chan < NR_SUBBAND_CHANNELS; chan ++) {
+	    if (itsDelayCompensation) {
+	      fftOutData[chan] *= phaseShift(time, chan, baseFrequency, transposedData->delays[stat]);
+	    }
+
+	    filteredData->samples[chan][stat][time][pol] = fftOutData[chan];
+	  }
+	}
+      }
+    }
+    FFTtimer.stop();
+#else // assembly implementation
+    int transpose_stride = sizeof(fcomplex) * (NR_POLARIZATIONS * (itsNrSamplesPerIntegration | 2) * itsNrStations - (itsDelayCompensation ? 3 : 0));
+
+    for (unsigned chan = 0; chan < NR_SUBBAND_CHANNELS; chan += 4) {
+      for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol ++) {
+#if defined __GNUC__	// work around bug ???
+	for (register unsigned ch asm ("r28") = 0; ch < 4; ch ++) {
+#else
+	for (unsigned ch = 0; ch < 4; ch ++) {
+#endif
+	  FIRtimer.start();
+	  _filter(0, // itsFIRs[stat][pol][chan + ch].itsDelayLine,
+		  FIR::weights[chan + ch],
+		  &transposedData->samples[stat][chan + ch + alignmentShift][pol],
+		  itsTmp[ch].origin(),
+		  itsNrSamplesPerIntegration / NR_TAPS);
+	  FIRtimer.stop();
+	}
+
+	_transpose_4x8(&itsFFTinData[0][pol][chan],
+		       itsTmp.origin(),
+		       itsNrSamplesPerIntegration,
+		       sizeof(fcomplex) * itsNrSamplesPerIntegration,
+		       sizeof(fcomplex) * NR_POLARIZATIONS * (NR_SUBBAND_CHANNELS + 4));
+      }
+    }
+
+    struct phase_shift phaseShifts[itsNrSamplesPerIntegration];
+
+    if (itsDelayCompensation) {
+      computePhaseShifts(phaseShifts, transposedData->delays[stat], baseFrequency);
+    }
+
+    const std::vector<SparseSet<unsigned>::range> &ranges = filteredData->flags[stat].getRanges();
+    SparseSet<unsigned>::const_iterator it = ranges.begin();
+
+    for (unsigned time = 0; time < itsNrSamplesPerIntegration; time ++) {
+      bool good = it == ranges.end() || time < it->begin || (time == it->end && (++ it, true));
+
+      if (good) {
+	FFTtimer.start();
+#if 0
+	_prefetch(itsFFTinData[time].origin(),
+		  sizeof(fcomplex[NR_POLARIZATIONS][NR_SUBBAND_CHANNELS]) / CACHE_LINE_SIZE,
+		  CACHE_LINE_SIZE);
+#endif
+
+	for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol ++) {
+#if 0
+	  fftw_one(itsFFTWPlan,
+		   (fftw_complex *) itsFFTinData[time][pol].origin(),
+		   (fftw_complex *) itsFFToutData[time & 1][pol].origin());
+#else
+	  _fft256(itsFFTinData[time][pol].origin(),
+		  itsFFToutData[time & 1][pol].origin());
+#endif
+	}
+	FFTtimer.stop();
+      } else {
+	  _memzero(itsFFToutData[time & 1].origin(),
+		   itsFFToutData[time & 1].num_elements() * sizeof(fcomplex));
+      }
+
+      if (time & 1) {
+	if (itsDelayCompensation) {
+	  _phase_shift_and_transpose(&filteredData->samples[0][stat][time - 1][0],
+				     itsFFToutData.origin(),
+				     &phaseShifts[time - 1],
+				     transpose_stride);
+	} else {
+	  _transpose_4x8(&filteredData->samples[0][stat][time - 1][0],
+			 itsFFToutData.origin(),
+			 NR_SUBBAND_CHANNELS,
+			 sizeof(fcomplex) * NR_SUBBAND_CHANNELS,
+			 transpose_stride);
+	}
+      }
+    }
+#endif // PPF_C_IMPLEMENTATION
+  }
+
+#if defined HAVE_BGL && !defined PPF_C_IMPLEMENTATION
+  _bgl_mutex_unlock(mutex);
+#endif
+
+  PPFtimer.stop();
+}
+
+} // namespace CS1
+} // namespace LOFAR
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/PPF.h b/Appl/CEP/CS1/CS1_BGLProc/src/PPF.h
new file mode 100644
index 0000000000000000000000000000000000000000..81831cafebcf8cbd571ae5b5f8a46e8870e8dde1
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/PPF.h
@@ -0,0 +1,77 @@
+#ifndef LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_PPF_H
+#define LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_PPF_H
+
+#if 0 || !defined HAVE_BGL
+#define PPF_C_IMPLEMENTATION
+#endif
+
+
+#include <FIR.h>
+#include <TransposedData.h>
+#include <FilteredData.h>
+#include <CacheAlignedAllocator.h>
+
+#include <boost/multi_array.hpp>
+
+#if defined HAVE_BGL
+#include <rts.h>
+#endif
+
+#if defined HAVE_FFTW3
+#include <fftw3.h>
+#elif defined HAVE_FFTW2
+#include <fftw.h>
+#else
+#error Should have FFTW3 or FFTW2 installed
+#endif
+
+
+namespace LOFAR {
+namespace CS1 {
+
+class PPF
+{
+  public:
+    PPF(unsigned nrStations, unsigned nrSamplesPerIntegration, double channelBandwidth, bool delayCompensation);
+    ~PPF();
+
+    void computeFlags(const TransposedData *, FilteredData *);
+    void filter(double centerFrequency, const TransposedData *, FilteredData *);
+
+  private:
+    void init_fft(), destroy_fft();
+
+#if defined PPF_C_IMPLEMENTATION
+    fcomplex phaseShift(unsigned time, unsigned chan, double baseFrequency, const TransposedData::DelayIntervalType &delay) const;
+#else
+    void     computePhaseShifts(struct phase_shift phaseShifts[/*itsNrSamplesPerIntegration*/], const TransposedData::DelayIntervalType &delay, double baseFrequency) const;
+#endif
+
+    unsigned itsNrStations, itsNrSamplesPerIntegration;
+    double   itsChannelBandwidth;
+    bool     itsDelayCompensation;
+
+#if defined PPF_C_IMPLEMENTATION
+    boost::multi_array<FIR, 3> itsFIRs; //[itsNrStations][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS]
+    boost::multi_array<fcomplex, 3> itsFFTinData; //[NR_TAPS - 1 + itsNrSamplesPerIntegration][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS]
+#else
+    boost::multi_array<fcomplex, 2, CacheAlignedAllocator<fcomplex> > itsTmp; //[4][itsNrSamplesPerIntegration]
+    boost::multi_array<fcomplex, 3, CacheAlignedAllocator<fcomplex> > itsFFTinData; //[itsNrSamplesPerIntegration][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS + 4]
+    boost::multi_array<fcomplex, 3, CacheAlignedAllocator<fcomplex> > itsFFToutData; //[2][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS]
+#endif
+
+#if defined HAVE_FFTW3
+    fftwf_plan itsFFTWPlan;
+#elif defined HAVE_FFTW2
+    fftw_plan  itsFFTWPlan;
+#endif
+
+#if defined HAVE_BGL && !defined PPF_C_IMPLEMENTATION
+    BGL_Mutex  *mutex;
+#endif
+};
+
+} // namespace CS1
+} // namespace LOFAR
+
+#endif
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/TH_ZoidClient.cc b/Appl/CEP/CS1/CS1_BGLProc/src/TH_ZoidClient.cc
index cac63e0f7cb7183a89e4b895479299d3dbb9919c..14946c19654f78293312ae069752f1698d5992a0 100644
--- a/Appl/CEP/CS1/CS1_BGLProc/src/TH_ZoidClient.cc
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/TH_ZoidClient.cc
@@ -23,7 +23,7 @@
 //# Always #include <lofar_config.h> first!
 #include <lofar_config.h>
 
-#if defined HAVE_ZOID
+#if defined HAVE_ZOID && defined HAVE_BGL
 
 #include <Transport/DataHolder.h>
 #include <CS1_BGLProc/TH_ZoidClient.h>
@@ -32,16 +32,30 @@ extern "C" {
 #include <lofar.h>
 }
 
-namespace LOFAR
+namespace LOFAR {
+namespace CS1 {
+
+#if 0
+static unsigned checksum(const void *buf, size_t size)
 {
+  unsigned sum = 0;
+
+  for (int i = 0; i < (int) (size / sizeof(unsigned)); i ++)
+    sum ^= ((unsigned *) buf)[i];
+
+  return sum;
+}
+#endif
 
-bool TH_ZoidClient::recvBlocking(void *buf, int nbytes, int, int, DataHolder *)
+bool TH_ZoidClient::recvBlocking(void *buf, int nbytes, int onecopy, int, DataHolder *)
 {
+  //std::clog << "TH_ZoidClient::recvBlocking(" << buf << ", " << nbytes << "...)" << std::endl;
+
   static size_t maxBytes = ~ (size_t) 0;
 
   for (size_t bytesRead = 0; bytesRead < (size_t) nbytes;) {
     size_t count = std::min(nbytes - bytesRead, maxBytes);
-    lofar_ion_to_cn((char *) buf + bytesRead, &count);
+    (onecopy ? lofar_ion_to_cn_onecopy : lofar_ion_to_cn_zerocopy)(static_cast<char *>(buf) + bytesRead, &count);
 
     switch (__zoid_error()) {
       case 0	  : bytesRead += count;
@@ -58,13 +72,16 @@ bool TH_ZoidClient::recvBlocking(void *buf, int nbytes, int, int, DataHolder *)
 }
 
 
-bool TH_ZoidClient::sendBlocking(void *buf, int nbytes, int, DataHolder *)
+bool TH_ZoidClient::sendBlocking(void *buf, int nbytes, int onecopy, DataHolder *)
 {
+  //std::clog << "TH_ZoidClient::sendBlocking(" << buf << ", " << nbytes << "...)" << std::endl;
+
   static size_t maxBytes = ~ (size_t) 0;
 
   for (size_t bytesWritten = 0; bytesWritten < (size_t) nbytes;) {
     size_t count = std::min(nbytes - bytesWritten, maxBytes);
-    lofar_cn_to_ion((char *) buf + bytesWritten, count);
+
+    count = (onecopy ? lofar_cn_to_ion_onecopy : lofar_cn_to_ion_zerocopy)(static_cast<char *>(buf) + bytesWritten, count);
 
     switch (__zoid_error()) {
       case 0	  : bytesWritten += count;
@@ -80,6 +97,7 @@ bool TH_ZoidClient::sendBlocking(void *buf, int nbytes, int, DataHolder *)
   return true;
 }
 
-}
+} // namespace CS1
+} // namespace LOFAR
 
 #endif
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/TH_ZoidClient.h b/Appl/CEP/CS1/CS1_BGLProc/src/TH_ZoidClient.h
index 67f55d2ad3a8de50cc178ed91c5b2ef711aa58df..7e1796af55c94d932f037c08f728e97acce91885 100644
--- a/Appl/CEP/CS1/CS1_BGLProc/src/TH_ZoidClient.h
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/TH_ZoidClient.h
@@ -33,59 +33,52 @@
 //# Includes
 #include <Transport/TransportHolder.h>
 
-namespace LOFAR 
-{
-    // \addtogroup Transport
-    // @{
-
-    //# Forward Declarations
-
-    // Description of class.
-    class TH_ZoidClient : public TransportHolder
-    {
-    public:
-      TH_ZoidClient(){};
-      virtual ~TH_ZoidClient(){};
+namespace LOFAR {
+namespace CS1 {
 
-      virtual bool init() {return true;};
-
-      virtual bool recvBlocking (void*, int, int, int, DataHolder*);
-
-      virtual bool sendBlocking (void*, int, int, DataHolder*);
+class TH_ZoidClient : public TransportHolder
+{
+  public:
+    TH_ZoidClient(){};
+    virtual ~TH_ZoidClient(){};
 
-      virtual int32 recvNonBlocking (void*, int32, int, int32, DataHolder*)
-      { return true; }
+    virtual bool init() {return true;};
 
-      virtual void waitForReceived(void*, int, int)
-      {}
+    // if doCopy == 0, Zoid's zero-copy protocol is used; memory must be
+    // obtained through __zoid_alloc and must be a multiple of 32
+    virtual bool recvBlocking(void *ptr, int size, int doCopy, int, DataHolder*);
+    virtual bool sendBlocking(void *ptr, int size, int doCopy, DataHolder*);
 
-      virtual bool sendNonBlocking (void*, int, int, DataHolder*)
-      { return true; }
+    virtual int32 recvNonBlocking (void*, int32, int, int32, DataHolder*)
+    { return true; }
 
-      virtual void waitForSent(void*, int, int)
-      {}
+    virtual void waitForReceived(void*, int, int)
+    {}
 
-      virtual string getType() const
-      { return "TH_ZoidClient"; }
+    virtual bool sendNonBlocking (void*, int, int, DataHolder*)
+    { return true; }
 
-      virtual bool isClonable() const
-      { return true; }
+    virtual void waitForSent(void*, int, int)
+    {}
 
-      virtual TransportHolder* clone() const
-      { return new TH_ZoidClient(); }
+    virtual string getType() const
+    { return "TH_ZoidClient"; }
 
-      virtual void reset()
-      {}
+    virtual bool isClonable() const
+    { return true; }
 
-    private:
-      // Copying is not allowed
-      TH_ZoidClient(const TH_ZoidClient& that);
-      TH_ZoidClient& operator=(const TH_ZoidClient& that);
+    virtual TransportHolder* clone() const
+    { return new TH_ZoidClient(); }
 
-      //# Datamembers
-    };
+    virtual void reset()
+    {}
 
-    // @}
+  private:
+    // Copying is not allowed
+    TH_ZoidClient(const TH_ZoidClient& that);
+    TH_ZoidClient& operator=(const TH_ZoidClient& that);
+  };
+} // namespace CS1
 } // namespace LOFAR
 
 #endif
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/Transpose.cc b/Appl/CEP/CS1/CS1_BGLProc/src/Transpose.cc
new file mode 100644
index 0000000000000000000000000000000000000000..b2f96ec7bb707b08568af84e3d856cf7dd19b461
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/Transpose.cc
@@ -0,0 +1,294 @@
+//# Always #include <lofar_config.h> first!
+#include <lofar_config.h>
+
+#include <Transpose.h>
+
+#include <Common/Timer.h>
+#include <Transport/TH_MPI.h>
+#include <CS1_Interface/BGL_Mapping.h>
+
+#if defined HAVE_BGL
+#include <rts.h>
+#endif
+
+#include <cassert>
+#include <map>
+#include <set>
+
+
+namespace LOFAR {
+namespace CS1 {
+
+#if defined HAVE_MPI
+
+static NSTimer transposeTimer("transpose()", true);
+
+
+std::vector<MPI_Comm> Transpose::allTransposeGroups;
+
+
+Transpose::Transpose(bool isTransposeInput, bool isTransposeOutput, unsigned myCore, unsigned nrStations)
+:
+  itsIsTransposeInput(isTransposeInput),
+  itsIsTransposeOutput(isTransposeOutput),
+  itsNrStations(nrStations),
+  itsTransposeGroup(allTransposeGroups[myCore])
+{
+}
+
+
+Transpose::~Transpose()
+{
+}
+
+
+#if defined HAVE_BGL
+
+unsigned Transpose::remapOnTree(unsigned pset, unsigned core, const struct BGLPersonality &personality)
+{
+  unsigned psetXsize  = personality.getXpsetSize();
+  unsigned psetYsize  = personality.getYpsetSize();
+  unsigned psetZsize  = personality.getZpsetSize();
+
+  unsigned psetXcount = personality.getXsize() / psetXsize;
+  unsigned psetYcount = personality.getYsize() / psetYsize;
+  unsigned psetZcount = personality.getZsize() / psetZsize;
+
+  unsigned xOrigin    = pset			       % psetXcount * psetXsize;
+  unsigned yOrigin    = pset / psetXcount	       % psetYcount * psetYsize;
+  unsigned zOrigin    = pset / psetXcount / psetYcount % psetZcount * psetZsize;
+
+  unsigned nodesPerPset = personality.numNodesInPset();
+
+  unsigned numProcs, xOffset, yOffset, zOffset, node;
+
+  core = BGL_Mapping::mapCoreOnPset(core, pset);
+  personality.coordsForPsetRank(core % nodesPerPset, xOffset, yOffset, zOffset);
+
+  unsigned x = xOrigin + xOffset - personality.xPsetOrigin();
+  unsigned y = yOrigin + yOffset - personality.yPsetOrigin();
+  unsigned z = zOrigin + zOffset - personality.zPsetOrigin();
+  unsigned t = core / nodesPerPset;
+
+  rts_rankForCoordinates(x, y, z, t, &node, &numProcs);
+
+#if defined HAVE_MPI
+  if (node >= (unsigned) TH_MPI::getNumberOfNodes()) {
+    std::cerr << "not enough nodes allocated (node = " << node << ", TH_MPI::getNumberOfNodes() = " << TH_MPI::getNumberOfNodes() << std::endl;
+    exit(1);
+  }
+#endif
+
+  return node;
+}
+
+
+void Transpose::getMPIgroups(unsigned nrCoresPerPset, const struct BGLPersonality &personality, const std::vector<unsigned> &inputPsets, const std::vector<unsigned> &outputPsets)
+{
+  allTransposeGroups.resize(nrCoresPerPset);
+
+  MPI_Group all, group;
+
+  if (MPI_Comm_group(MPI_COMM_WORLD, &all) != MPI_SUCCESS) {
+    std::cerr << "MPI_Comm_group() failed" << std::endl;
+    exit(1);
+  }
+
+  std::set<unsigned> psets; // ordered list of all psets
+  std::set_union(inputPsets.begin(), inputPsets.end(),
+		 outputPsets.begin(), outputPsets.end(),
+		 std::insert_iterator<std::set<unsigned> >(psets, psets.begin()));
+
+  for (unsigned core = 0; core < nrCoresPerPset; core ++) {
+    std::vector<int> ranks;
+
+    for (std::set<unsigned>::const_iterator pset = psets.begin(); pset != psets.end(); pset ++)
+      ranks.push_back(remapOnTree(*pset, core, personality));
+
+    if (TH_MPI::getCurrentRank() == 0) {
+      std::clog << "group " << core << " contains cores [";
+      
+      for (unsigned i = 0; i < ranks.size(); i ++)
+	std::clog << ranks[i] << (i == ranks.size() - 1 ? ']' : ',');
+
+      std::clog << std::endl;
+    }
+
+    if (MPI_Group_incl(all, ranks.size(), &ranks[0], &group) != MPI_SUCCESS) {
+      std::cerr << "MPI_Group_incl() failed" << std::endl;
+      exit(1);
+    }
+
+    if (MPI_Comm_create(MPI_COMM_WORLD, group, &allTransposeGroups[core]) != MPI_SUCCESS) {
+      std::cerr << "MPI_Comm_create() failed" << std::endl;
+      exit(1);
+    }
+
+    if (MPI_Group_free(&group) != MPI_SUCCESS) {
+      std::cerr << "MPI_Group_incl() failed" << std::endl;
+      exit(1);
+    }
+  }
+}
+
+#endif
+
+
+void Transpose::setupTransposeParams(const std::vector<unsigned> &inputPsets, const std::vector<unsigned> &outputPsets, InputData *inputData, TransposedData *transposedData)
+{
+  std::set<unsigned> psets; // ordered list of all psets
+  std::set_union(inputPsets.begin(), inputPsets.end(),
+		 outputPsets.begin(), outputPsets.end(),
+		 std::insert_iterator<std::set<unsigned> >(psets, psets.begin()));
+
+  unsigned		       nrPsetsUsed = psets.size();
+  std::map<unsigned, unsigned> psetToGroupIndex;
+  unsigned		       groupIndex  = 0;
+
+  for (std::set<unsigned>::const_iterator pset = psets.begin(); pset != psets.end(); pset ++, groupIndex ++)
+    psetToGroupIndex[*pset] = groupIndex;
+
+  if (TH_MPI::getCurrentRank() == 0)
+    for (std::map<unsigned, unsigned>::const_iterator it = psetToGroupIndex.begin(); it != psetToGroupIndex.end(); it ++)
+      std::clog << "pset " << it->first << " maps to group index " << it->second << std::endl;
+
+  itsTransposeParams.send.counts.resize(nrPsetsUsed, 0);
+  itsTransposeParams.send.displacements.resize(nrPsetsUsed);
+  itsTransposeParams.receive.counts.resize(nrPsetsUsed, 0);
+  itsTransposeParams.receive.displacements.resize(nrPsetsUsed);
+  itsTransposeMetaParams.send.counts.resize(nrPsetsUsed, 0);
+  itsTransposeMetaParams.send.displacements.resize(nrPsetsUsed);
+  itsTransposeMetaParams.receive.counts.resize(nrPsetsUsed, 0);
+  itsTransposeMetaParams.receive.displacements.resize(nrPsetsUsed);
+
+  itsOutputMetaData.resize(inputPsets.size());
+
+  if (itsIsTransposeInput) {
+    for (unsigned psetIndex = 0; psetIndex < outputPsets.size(); psetIndex ++) {
+      unsigned pset  = outputPsets[psetIndex];
+      unsigned index = psetToGroupIndex[pset];
+
+      if (1 /* FIXME: psetIndex % itsCS1PS->nrRSPboardsPerStation() == 0 */) {
+	const boost::detail::multi_array::sub_array<InputData::SampleType, 2> &slice = inputData->samples[psetIndex];
+
+	itsTransposeParams.send.counts[index] = slice.num_elements() * sizeof(InputData::SampleType);
+	itsTransposeParams.send.displacements[index] = reinterpret_cast<const char *>(slice.origin()) - reinterpret_cast<const char *>(inputData->samples.origin());
+
+	itsTransposeMetaParams.send.counts[index] = sizeof itsInputMetaData;
+	itsTransposeMetaParams.send.displacements[index] = 0;
+      }
+    }
+  }
+
+  if (itsIsTransposeOutput)
+    for (unsigned psetIndex = 0; psetIndex < inputPsets.size(); psetIndex ++) {
+      unsigned pset  = inputPsets[psetIndex];
+      unsigned index = psetToGroupIndex[pset];
+      const boost::detail::multi_array::sub_array<TransposedData::SampleType, 2> &slice = transposedData->samples[psetIndex];
+
+      itsTransposeParams.receive.counts[index] = slice.num_elements() * sizeof(TransposedData::SampleType);
+      itsTransposeParams.receive.displacements[index] = reinterpret_cast<const char *>(slice.origin()) - reinterpret_cast<const char *>(transposedData->samples.origin());
+
+      itsTransposeMetaParams.receive.counts[index] = sizeof itsInputMetaData;
+      itsTransposeMetaParams.receive.displacements[index] = psetIndex * sizeof itsInputMetaData;
+    }
+
+#if 0
+if (itsIsTransposeInput) std::clog << "send_base: " << inputData->samples.origin() << std::endl;
+std::clog << "send_counts:";
+for (unsigned pset = 0; pset < nrPsetsUsed; pset ++)
+std::clog << ' ' << itsTransposeParams.send.counts[pset];
+std::clog << std::endl;
+std::clog << "send_displacements:";
+for (unsigned pset = 0; pset < nrPsetsUsed; pset ++)
+std::clog << ' ' << itsTransposeParams.send.displacements[pset];
+std::clog << std::endl;
+if (itsIsTransposeOutput) std::clog << "receive_base: " << transposedData->samples.origin() << std::endl;
+std::clog << "receive_counts:";
+for (unsigned pset = 0; pset < nrPsetsUsed; pset ++)
+std::clog << ' ' << itsTransposeParams.receive.counts[pset];
+std::clog << std::endl;
+std::clog << "receive_displacements:";
+for (unsigned pset = 0; pset < nrPsetsUsed; pset ++)
+std::clog << ' ' << itsTransposeParams.receive.displacements[pset];
+std::clog << std::endl;
+#endif
+
+#if 0
+std::clog << "meta send_counts:";
+for (unsigned pset = 0; pset < nrPsetsUsed; pset ++)
+std::clog << ' ' << itsTransposeMetaParams.send.counts[pset];
+std::clog << std::endl;
+std::clog << "meta send_displacements:";
+for (unsigned pset = 0; pset < nrPsetsUsed; pset ++)
+std::clog << ' ' << itsTransposeMetaParams.send.displacements[pset];
+std::clog << std::endl;
+std::clog << "meta receive_counts:";
+for (unsigned pset = 0; pset < nrPsetsUsed; pset ++)
+std::clog << ' ' << itsTransposeMetaParams.receive.counts[pset];
+std::clog << std::endl;
+std::clog << "meta receive_displacements:";
+for (unsigned pset = 0; pset < nrPsetsUsed; pset ++)
+std::clog << ' ' << itsTransposeMetaParams.receive.displacements[pset];
+std::clog << std::endl;
+#endif
+}
+
+
+void Transpose::transpose(const InputData *inputData, TransposedData *transposedData)
+{
+  if (MPI_Alltoallv(
+	itsIsTransposeInput ? (void *) inputData->samples.origin() : 0,
+	&itsTransposeParams.send.counts[0],
+	&itsTransposeParams.send.displacements[0],
+	MPI_BYTE,
+	itsIsTransposeOutput ? transposedData->samples.origin() : 0,
+	&itsTransposeParams.receive.counts[0],
+	&itsTransposeParams.receive.displacements[0],
+	MPI_BYTE,
+	itsTransposeGroup) != MPI_SUCCESS)
+  {
+    std::cerr << "MPI_Alltoallv() failed" << std::endl;
+    exit(1);
+  }
+}
+
+
+void Transpose::transposeMetaData(/*const*/ InputData *inputData, TransposedData *transposedData)
+{
+  if (itsIsTransposeInput) {
+    itsInputMetaData.delayAtBegin   = inputData->metaData.delayAtBegin();
+    itsInputMetaData.delayAfterEnd  = inputData->metaData.delayAfterEnd();
+    itsInputMetaData.alignmentShift = inputData->metaData.alignmentShift();
+    assert(inputData->metaData.flags().marshall(&itsInputMetaData.flagsBuffer, sizeof itsInputMetaData.flagsBuffer) >= 0);
+  }
+
+  if (MPI_Alltoallv(
+	&itsInputMetaData,
+	&itsTransposeMetaParams.send.counts[0],
+	&itsTransposeMetaParams.send.displacements[0],
+	MPI_BYTE,
+	&itsOutputMetaData[0],
+	&itsTransposeMetaParams.receive.counts[0],
+	&itsTransposeMetaParams.receive.displacements[0],
+	MPI_BYTE,
+	itsTransposeGroup) != MPI_SUCCESS)
+  {
+    std::cerr << "MPI_Alltoallv() failed" << std::endl;
+    exit(1);
+  }
+
+  if (itsIsTransposeOutput) {
+    for (unsigned station = 0; station < itsNrStations; station ++) {
+      transposedData->delays[station].delayAtBegin  = itsOutputMetaData[station].delayAtBegin;
+      transposedData->delays[station].delayAfterEnd = itsOutputMetaData[station].delayAfterEnd;
+      transposedData->alignmentShifts[station]      = itsOutputMetaData[station].alignmentShift;
+      transposedData->flags[station].unmarshall(itsOutputMetaData[station].flagsBuffer);
+    }
+  }
+}
+
+#endif
+
+} // namespace CS1
+} // namespace LOFAR
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/Transpose.h b/Appl/CEP/CS1/CS1_BGLProc/src/Transpose.h
new file mode 100644
index 0000000000000000000000000000000000000000..62ee38ec40ef1eb1e10a79cc096876728432f64e
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/Transpose.h
@@ -0,0 +1,69 @@
+#ifndef LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_TRANSPOSE_H
+#define LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_TRANSPOSE_H
+
+#include <InputData.h>
+#include <TransposedData.h>
+
+#include <boost/multi_array.hpp>
+
+#if defined HAVE_BGL
+#include <bglpersonality.h>
+#endif
+
+#if defined HAVE_MPI
+#include <mpi.h>
+#endif
+
+#include <vector>
+
+
+namespace LOFAR {
+namespace CS1 {
+
+#if defined HAVE_MPI
+
+class Transpose {
+  public:
+    Transpose(bool isTransposeInput, bool isTransposeOutput, unsigned myCore, unsigned nrStations);
+    ~Transpose();
+
+    void setupTransposeParams(const std::vector<unsigned> &inputPsets, const std::vector<unsigned> &outputPsets, InputData *, TransposedData *);
+    static void	getMPIgroups(unsigned nrCoresPerPset, const BGLPersonality &, const std::vector<unsigned> &inputPsets, const std::vector<unsigned> &outputPsets);
+
+#if defined HAVE_BGL
+    static unsigned remapOnTree(unsigned pset, unsigned core, const struct BGLPersonality &);
+#endif
+
+    void transpose(const InputData *, TransposedData *);
+    void transposeMetaData(/*const*/ InputData *, TransposedData *);
+
+  private:
+    bool     itsIsTransposeInput, itsIsTransposeOutput;
+    unsigned itsNrStations;
+
+    // All cores at the same position within a pset form a group.  The
+    // transpose is done between members of this group.
+    struct {
+      struct {
+	std::vector<int> counts, displacements;
+      } send, receive;
+    } itsTransposeParams, itsTransposeMetaParams;
+
+    struct metaData {
+      float    delayAtBegin, delayAfterEnd;
+      unsigned alignmentShift;
+      char     flagsBuffer[132]; // enough for 16 flag ranges
+    } itsInputMetaData;
+
+    std::vector<struct metaData> itsOutputMetaData;
+    MPI_Comm			 itsTransposeGroup;
+
+    static std::vector<MPI_Comm> allTransposeGroups;
+};
+
+#endif // defined HAVE_MPI
+
+} // namespace CS1
+} // namespace LOFAR
+
+#endif
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/TransposedData.h b/Appl/CEP/CS1/CS1_BGLProc/src/TransposedData.h
new file mode 100644
index 0000000000000000000000000000000000000000..8fab887c14b6d7e3b0aa3d2416abacdab1e9e2cc
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_BGLProc/src/TransposedData.h
@@ -0,0 +1,71 @@
+#ifndef LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_TRANSPOSED_DATA_H
+#define LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_TRANSPOSED_DATA_H
+
+#include <Common/lofar_complex.h>
+#include <CS1_Interface/CS1_Config.h>
+#include <CS1_Interface/SparseSet.h>
+
+#include <Allocator.h>
+
+#include <boost/multi_array.hpp>
+
+
+namespace LOFAR {
+namespace CS1 {
+
+class TransposedData
+{
+  public:
+    TransposedData(const Heap &, unsigned nrStations, unsigned nrSamplesToBGLProc);
+    ~TransposedData();
+
+    static size_t requiredSize(unsigned nrStations, unsigned nrSamplesToBGLProc);
+
+    typedef INPUT_SAMPLE_TYPE SampleType;
+
+  private:
+    Overlay overlay;
+
+  public:
+    boost::multi_array_ref<SampleType, 3> samples; //[itsNrStations][itsCS1PS->nrSamplesToBGLProc()][NR_POLARIZATIONS]
+
+    SparseSet<unsigned> *flags; //[itsNrStations]
+
+    typedef struct {
+      float delayAtBegin, delayAfterEnd;
+    } DelayIntervalType;
+    
+    DelayIntervalType *delays; // [itsNrStations]
+    unsigned          *alignmentShifts; // [itsNrStations]
+};
+
+
+inline TransposedData::TransposedData(const Heap &heap, unsigned nrStations, unsigned nrSamplesToBGLProc)
+:
+  overlay(heap),
+  samples(static_cast<SampleType *>(overlay.allocate(requiredSize(nrStations, nrSamplesToBGLProc), 32)), boost::extents[nrStations][nrSamplesToBGLProc][NR_POLARIZATIONS]),
+  flags(new SparseSet<unsigned>[nrStations]),
+  delays(new DelayIntervalType[nrStations]),
+  alignmentShifts(new unsigned[nrStations])
+{
+}
+
+
+inline TransposedData::~TransposedData()
+{
+  overlay.deallocate(samples.origin());
+  delete [] flags;
+  delete [] alignmentShifts;
+  delete [] delays;
+}
+
+
+inline size_t TransposedData::requiredSize(unsigned nrStations, unsigned nrSamplesToBGLProc)
+{
+  return sizeof(SampleType) * nrStations * nrSamplesToBGLProc * NR_POLARIZATIONS;
+}
+
+} // namespace CS1
+} // namespace LOFAR
+
+#endif
diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/WH_BGL_Processing.h b/Appl/CEP/CS1/CS1_BGLProc/src/WH_BGL_Processing.h
deleted file mode 100644
index 06ce0cbd6d12419168d5ef3fab2861259bcde910..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_BGLProc/src/WH_BGL_Processing.h
+++ /dev/null
@@ -1,202 +0,0 @@
-//#  WH_BGL_Processing.h: polyphase filter and correlator
-//#
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-#ifndef LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_WH_BGL_PROCESSING_H
-#define LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_WH_BGL_PROCESSING_H
-
-#if 0 || !defined HAVE_BGL
-#define C_IMPLEMENTATION
-#endif
-
-#if defined HAVE_FFTW3
-#include <fftw3.h>
-#elif defined HAVE_FFTW2
-#include <fftw.h>
-#else
-#error Should have FFTW3 or FFTW2 installed
-#endif
-
-#include <tinyCEP/WorkHolder.h>
-#include <CS1_Interface/bitset.h>
-#include <CS1_Interface/CS1_Config.h>
-#include <CS1_Interface/DH_Subband.h>
-//#include <CS1_Interface/DH_RFI_Mitigation.h>
-#include <CS1_Interface/DH_Visibilities.h>
-#include <CS1_Interface/CS1_Parset.h>
-
-#include <boost/multi_array.hpp>
-#include <malloc.h>
-
-
-#if defined HAVE_BGL
-#define CACHE_LINE_SIZE	32
-#define CACHE_ALIGNED	__attribute__ ((aligned(CACHE_LINE_SIZE)))
-#else
-#define CACHE_LINE_SIZE	16
-#define CACHE_ALIGNED
-#endif
-
-
-namespace LOFAR {
-namespace CS1 {
-
-template <typename T> class CacheAlignedAllocator : public std::allocator<T>
-{
-  public:
-    typedef typename std::allocator<T>::size_type size_type;
-    typedef typename std::allocator<T>::pointer pointer;
-    typedef typename std::allocator<T>::const_pointer const_pointer;
-
-    pointer allocate(size_type size, const_pointer /*hint*/ = 0)
-    {
-      return static_cast<pointer>(memalign(CACHE_LINE_SIZE, size * sizeof(T)));
-    }
-
-    void deallocate(pointer ptr, size_type /*size*/)
-    {
-      free(ptr);
-    }
-};
-
-
-class FIR {
-  public:
-#if defined C_IMPLEMENTATION
-    FIR();
-
-    fcomplex processNextSample(fcomplex sample, const float weights[NR_TAPS]);
-
-    fcomplex itsDelayLine[NR_TAPS];
-#endif
-
-    static const float weights[NR_SUBBAND_CHANNELS][NR_TAPS];
-};
-
-class WH_BGL_Processing: public WorkHolder {
-  public:
-    enum inDataHolders {
-      SUBBAND_CHANNEL,
-//    RFI_MITIGATION_CHANNEL,
-      NR_IN_CHANNELS
-    };
-
-    enum outDataHolders {
-      VISIBILITIES_CHANNEL,
-      NR_OUT_CHANNELS
-    };
-
-    explicit WH_BGL_Processing(const string &name, unsigned coreNumber, CS1_Parset *ps);
-    virtual ~WH_BGL_Processing();
-
-    static WorkHolder *construct(const string &name, unsigned coreNumber, CS1_Parset *ps);
-    virtual WH_BGL_Processing *make(const string &name);
-
-    virtual void preprocess();
-    virtual void process();
-    virtual void dump() const;
-    virtual void postprocess();
-
-    DH_Subband *get_DH_Subband() {
-      return dynamic_cast<DH_Subband *>(getDataManager().getInHolder(SUBBAND_CHANNEL));
-    }
-
-#if 0
-    DH_RFI_Mitigation *get_DH_RFI_Mitigation() {
-      return dynamic_cast<DH_RFI_Mitigation *>(getDataManager().getInHolder(RFI_MITIGATION_CHANNEL));
-    }
-#endif
-
-    DH_Visibilities *get_DH_Visibilities() {
-      return dynamic_cast<DH_Visibilities *>(getDataManager().getOutHolder(VISIBILITIES_CHANNEL));
-    }
-
-  private:
-    /// forbid copy constructor
-    WH_BGL_Processing(const WH_BGL_Processing&);
-
-    /// forbid assignment
-    WH_BGL_Processing &operator = (const WH_BGL_Processing&);
-
-    void doPPF(double baseFrequency), bypassPPF();
-    void computeFlags();
-    void doCorrelate();
-
-#if defined C_IMPLEMENTATION
-    fcomplex phaseShift(unsigned time, unsigned chan, double baseFrequency, const DH_Subband::DelayIntervalType &delay) const;
-#else
-    void computePhaseShifts(struct phase_shift phaseShifts[/*itsNrSamplesPerIntegration*/], const DH_Subband::DelayIntervalType &delay, double baseFrequency) const;
-#endif
-
-    /// FIR Filter variables
-#if defined HAVE_FFTW3
-    fftwf_plan	    itsFFTWPlan;
-#elif defined HAVE_FFTW2
-    fftw_plan	    itsFFTWPlan;
-#endif
-
-    vector<double>  itsCenterFrequencies;
-    double	    itsChannelBandwidth;
-
-    unsigned	    itsNrStations, itsNrBaselines;
-    unsigned	    itsNrSamplesPerIntegration;
-
-    CS1_Parset     *itsCS1PS;
-    const unsigned  itsCoreNumber;
-    unsigned        itsFirstSubband, itsCurrentSubband, itsLastSubband, itsSubbandIncrement;
-    bool	    itsInputConnected;
-    bool	    itsDelayCompensation;
-
-#if defined C_IMPLEMENTATION
-    typedef boost::multi_array<FIR, 3> itsFIRsType;
-    itsFIRsType     *itsFIRs;
-    //[itsNrStations][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS]
-
-    typedef boost::multi_array<fcomplex, 3> itsFFTdataType;
-    itsFFTdataType  *itsFFTinData;
-    //[NR_TAPS - 1 + itsNrSamplesPerIntegration][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS]
-#else
-    typedef boost::multi_array<fcomplex, 2, CacheAlignedAllocator<fcomplex> > itsTmpType;
-    itsTmpType	    *itsTmp; //[4][itsNrSamplesPerIntegration]
-
-    typedef boost::multi_array<fcomplex, 3, CacheAlignedAllocator<fcomplex> > itsFFTdataType;
-    itsFFTdataType  *itsFFTinData;
-    //[itsNrSamplesPerIntegration][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS + 4]
-
-    itsFFTdataType  *itsFFToutData;
-    //[2][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS]
-#endif
-
-    // The "| 2" significantly improves transpose speeds for particular numbers
-    // of stations due to cache conflict effects.  The extra memory is not used.
-    typedef boost::multi_array<fcomplex, 4, CacheAlignedAllocator<fcomplex> > itsSamplesType;
-    itsSamplesType  *itsSamples;
-    //[NR_SUBBAND_CHANNELS][itsNrStations][itsNrSamplesPerIntegration | 2][NR_POLARIZATIONS] CACHE_ALIGNED
-
-    SparseSet<unsigned> *itsFlags; //[itsNrStations]
-    unsigned	    *itsNrValidSamples; //[itsNrBaselines]
-    float	    *itsCorrelationWeights; //[itsNrSamplesPerIntegration + 1]
-
-    bitset<NR_SUBBAND_CHANNELS> *itsRFIflags; //[itsNrStations]
-};
-
-} // namespace CS1
-} // namespace LOFAR
-
-#endif
diff --git a/Appl/CEP/CS1/CS1_BGLProc/test/Makefile.am b/Appl/CEP/CS1/CS1_BGLProc/test/Makefile.am
index d68d2e8ab419a7807e5cdcb67f795e810244e502..c031b50231412c5bc50dc00d74cc0115160d16ba 100644
--- a/Appl/CEP/CS1/CS1_BGLProc/test/Makefile.am
+++ b/Appl/CEP/CS1/CS1_BGLProc/test/Makefile.am
@@ -1,5 +1,5 @@
 
-check_PROGRAMS = tWH_BGL_Processing transpose
+check_PROGRAMS = tBGL_Processing transpose
 
 
 AM_CPPFLAGS = $(EXTRA_CPPFLAGS)
@@ -7,12 +7,13 @@ AM_CPPFLAGS = $(EXTRA_CPPFLAGS)
 AM_CXXFLAGS = \
 	-I../../../src
 
-tWH_BGL_Processing_SOURCES =			\
-tWH_BGL_Processing.cc				\
-../src/WH_BGL_Processing.cc			\
-../src/Correlator.S				\
-../src/FIR.S					\
-../src/FFT.S
+tBGL_Processing_SOURCES =			\
+tBGL_Processing.cc				\
+../src/Allocator.cc				\
+../src/BGL_Processing.cc			\
+../src/Correlator.cc				\
+../src/FIR.cc					\
+../src/PPF.cc
 
 transpose_SOURCES =				\
 transpose.cc					
@@ -22,7 +23,7 @@ CCASFLAGS		= $(patsubst -q%,,$(CPPFLAGS)) $(EXTRA_CPPFLAGS)
 
 
 TESTS   = 					\
-tWH_BGL_Processing.sh
+tBGL_Processing.sh
 
 
 include $(top_srcdir)/Makefile.common
diff --git a/Appl/CEP/CS1/CS1_BGLProc/test/tWH_BGL_Processing.cc b/Appl/CEP/CS1/CS1_BGLProc/test/tBGL_Processing.cc
similarity index 55%
rename from Appl/CEP/CS1/CS1_BGLProc/test/tWH_BGL_Processing.cc
rename to Appl/CEP/CS1/CS1_BGLProc/test/tBGL_Processing.cc
index c405f6d0c61a41373783b857d82788bdf803b693..095da99aa839f489088531f2e716e4ed136a418f 100644
--- a/Appl/CEP/CS1/CS1_BGLProc/test/tWH_BGL_Processing.cc
+++ b/Appl/CEP/CS1/CS1_BGLProc/test/tBGL_Processing.cc
@@ -23,8 +23,6 @@
 //# Always #include <lofar_config.h> first!
 #include <lofar_config.h>
 
-#include <APS/ParameterSet.h>
-
 #if defined HAVE_MPI
 #include <Transport/TH_MPI.h>
 #endif
@@ -34,8 +32,12 @@
 #include <rts.h>
 #endif
 
+#include <CS1_Interface/BGL_Configuration.h>
+#include <Common/DataConvert.h>
+#include <Common/Exception.h>
 #include <Common/Timer.h>
-#include <WH_BGL_Processing.h>
+#include <Transport/TH_Null.h>
+#include <BGL_Processing.h>
 #include <cmath>
 #include <cstring>
 #include <exception>
@@ -44,7 +46,7 @@ using namespace LOFAR;
 using namespace LOFAR::CS1;
 
 
-inline DH_Subband::SampleType toComplex(double phi)
+inline TransposedData::SampleType toComplex(double phi)
 {
     double s, c;
 
@@ -59,60 +61,58 @@ inline DH_Subband::SampleType toComplex(double phi)
 }
 
 
-void setSubbandTestPattern(WH_BGL_Processing &wh, unsigned nrStations, double signalFrequency, double sampleRate)
+void setSubbandTestPattern(TransposedData *transposedData, unsigned nrStations, double signalFrequency, double sampleRate)
 {
   // Simulate a monochrome complex signal into the PPF, with station 1 at a
   // distance of .25 labda to introduce a delay.  Also, a few samples can be
   // flagged.
 
-  std::clog << "setSubbandTestPattern::setTestPattern() ... ";
+  std::clog << "setSubbandTestPattern() ... ";
 
   static NSTimer timer("setTestPattern", true);
   timer.start();
 
-  DH_Subband		    *dh        = wh.get_DH_Subband();
-  DH_Subband::Samples3Dtype samples    = dh->getSamples3D();
-  DH_Subband::DelaysType    delays     = dh->getDelays();
-  DH_Subband::FlagsType     flags      = dh->getFlags();
-
-  const double		    distance   = .25; // labda
-  const double		    phaseShift = 2 * M_PI * distance;
+  const double distance   = .25; // labda
+  const double phaseShift = 2 * M_PI * distance;
 
   for (unsigned stat = 0; stat < nrStations; stat ++) {
-    delays[0].delayAtBegin = delays[0].delayAfterEnd = 0;
+    transposedData->delays[stat].delayAtBegin	= 0;
+    transposedData->delays[stat].delayAfterEnd	= 0;
+    transposedData->alignmentShifts[stat]	= 0;
   }
 
-  for (unsigned time = 0; time < samples[0].size(); time ++) {
+  for (unsigned time = 0; time < transposedData->samples3D[0].size(); time ++) {
     double phi = 2 * M_PI * signalFrequency * time / sampleRate;
-    DH_Subband::SampleType sample = toComplex(phi);
+    TransposedData::SampleType sample = toComplex(phi);
 
     for (unsigned stat = 0; stat < nrStations; stat ++) {
-      samples[stat][time][0] = samples[stat][time][1] = sample;
+      transposedData->samples3D[stat][time][0] = sample;
+      transposedData->samples3D[stat][time][1] = sample;
     }
 
     if (NR_POLARIZATIONS >= 2 && nrStations > 2) {
-      samples[1][time][1]    = toComplex(phi + phaseShift);
-      delays[1].delayAtBegin = delays[1].delayAfterEnd = distance / signalFrequency;
+      transposedData->samples3D[1][time][1]     = toComplex(phi + phaseShift);
+      transposedData->delays[1].delayAtBegin  = distance / signalFrequency;
+      transposedData->delays[1].delayAfterEnd = distance / signalFrequency;
     }
   }
   
   for (unsigned stat = 0; stat < nrStations; stat ++) {
-    flags[stat].reset();
+    transposedData->flags[stat].reset();
   }
 
 #if 1
-  if (dh->nrInputSamples() > 17000 && nrStations >= 6) {
-    flags[4].include(14000);
-    flags[5].include(17000);
+  if (transposedData->samples3D[0].size() > 17000 && nrStations >= 6) {
+    transposedData->flags[4].include(14000);
+    transposedData->flags[5].include(17000);
   }
 #endif
 
-  dh->fillExtraData();
   std::clog << "done." << std::endl;;
 
-#if defined WORDS_BIGENDIAN
-  std::clog << "swapBytes()" << std::endl;
-  dh->swapBytes();
+#if 0 && defined WORDS_BIGENDIAN
+  std::clog << "swap bytes" << std::endl;
+  dataConvert(LittleEndian, transposedData->samples3D.data(), transposedData->samples3D.num_elements());
 #endif
 
   timer.stop();
@@ -120,9 +120,9 @@ void setSubbandTestPattern(WH_BGL_Processing &wh, unsigned nrStations, double si
 
 
 #if 0
-void setRFItestPattern(WH_BGL_Processing &wh, unsigned nrStations)
+void WH_BGL_ProcessingTest::setRFItestPattern(unsigned nrStations)
 {
-  DH_RFI_Mitigation::ChannelFlagsType *flags = wh.get_DH_RFI_Mitigation()->getChannelFlags();
+  DH_RFI_Mitigation::ChannelFlagsType *flags = get_DH_RFI_Mitigation()->getChannelFlags();
 
   memset(flags, 0, sizeof(DH_RFI_Mitigation::ChannelFlagsType));
 
@@ -134,16 +134,15 @@ void setRFItestPattern(WH_BGL_Processing &wh, unsigned nrStations)
 #endif
 
 
-void checkCorrelatorTestPattern(WH_BGL_Processing &wh, unsigned nrStations)
+void checkCorrelatorTestPattern(const CorrelatedData *correlatedData, unsigned nrStations)
 {
-  DH_Visibilities::VisibilitiesType	 visibilities	= wh.get_DH_Visibilities()->getVisibilities();
-  DH_Visibilities::AllNrValidSamplesType nrValidSamples = wh.get_DH_Visibilities()->getNrValidSamples();
+  const boost::multi_array_ref<fcomplex, 4> &visibilities = correlatedData->visibilities;
 
-  static const int			 channels[]	= { 1, 73, 255 };
+  static const int channels[] = { 1, 201, 255 };
 
   for (unsigned stat1 = 0; stat1 < std::min(nrStations, 8U); stat1 ++) {
     for (unsigned stat2 = stat1; stat2 < std::min(nrStations, 8U); stat2 ++) {
-      int bl = DH_Visibilities::baseline(stat1, stat2);
+      int bl = Correlator::baseline(stat1, stat2);
 
       std::cout << "S(" << stat1 << ") * ~S(" << stat2 << ") :\n";
 
@@ -155,7 +154,7 @@ void checkCorrelatorTestPattern(WH_BGL_Processing &wh, unsigned nrStations)
 	    int ch = channels[chidx];
 
 	    if (ch < NR_SUBBAND_CHANNELS) {
-	      std::cout << ' ' << visibilities[bl][ch][pol1][pol2] << '/' << nrValidSamples[bl][ch];
+	      std::cout << ' ' << visibilities[bl][ch][pol1][pol2] << '/' << correlatedData->nrValidSamples[bl][ch];
 	    }
 	  }
 
@@ -194,40 +193,60 @@ void doWork()
   }
 #endif
 
-  ACC::APS::ParameterSet parameterSet("CS1.parset");
-  CS1_Parset pset(&parameterSet);
-  double     signalFrequency = pset.refFreqs()[0] + 73 * pset.chanWidth(); // channel 73
-  int	     nRuns	     = 1;
+#if 0
+  ACC::APS::ParameterSet parameterSet("../../test/test.parset");
+  CS1_Parset ps(&parameterSet);
+  double     signalFrequency = ps.refFreqs()[0] + 73 * ps.chanWidth(); // channel 73
   const char *env;
 
-  if ((env = getenv("NRUNS")) != 0) {
-    nRuns = atoi(env);
-    std::clog << "setting nRuns to " << env << std::endl;
-  }
-
   if ((env = getenv("SIGNAL_FREQUENCY")) != 0) {
     signalFrequency = atof(env);
   }
 
-  std::clog << "base frequency = " << pset.refFreqs()[0] << std::endl;
-  std::clog << "channel bandwidth = " << pset.chanWidth() << std::endl;
+  std::clog << "base frequency = " << ps.refFreqs()[0] << std::endl;
+  std::clog << "channel bandwidth = " << ps.chanWidth() << std::endl;
   std::clog << "signal frequency = " << signalFrequency << std::endl;
-  WH_BGL_Processing wh("WH_BGL_Processing", 0, &pset);
 
-#if defined HAVE_MPI
-  wh.runOnNode(TH_MPI::getCurrentRank());
-#endif
+  BGL_Processing proc;
+  proc.preprocess(&ps);
+  setSubbandTestPattern(proc.itsTransposedData, ps.nrStations(), signalFrequency, ps.sampleRate());
+  proc.process();
 
-  wh.basePreprocess();
-  setSubbandTestPattern(wh, pset.nrStations(), signalFrequency, pset.sampleRate());
-//setRFItestPattern(wh);
+  checkCorrelatorTestPattern(proc.itsCorrelatedData, ps.nrStations());
+  proc.postprocess();
+#else
+  BGL_Configuration configuration;
+
+  configuration.nrStations()		  = 6;
+  configuration.nrSamplesPerIntegration() = 608;
+  configuration.nrSamplesToBGLProc()	  = NR_SUBBAND_CHANNELS * (configuration.nrSamplesPerIntegration() + NR_TAPS - 1) + 32 / sizeof(TransposedData::SampleType[NR_POLARIZATIONS]);
+  configuration.nrUsedCoresPerPset()	  = 1;
+  configuration.nrSubbandsPerPset()	  = 1;
+  configuration.delayCompensation()	  = true;
+  configuration.sampleRate()		  = 156250.0;
+  configuration.inputPsets()		  = std::vector<unsigned>();
+  configuration.outputPsets()		  = std::vector<unsigned>(1, 0);
+  configuration.refFreqs()		  = std::vector<double>(1, 384 * configuration.sampleRate());
+
+  double     signalFrequency = configuration.refFreqs()[0] + 73 * configuration.sampleRate() / NR_SUBBAND_CHANNELS; // channel 73
+  const char *env;
 
-  for (int i = 0; i < nRuns; i ++) {
-    wh.baseProcess();
+  if ((env = getenv("SIGNAL_FREQUENCY")) != 0) {
+    signalFrequency = atof(env);
   }
 
-  checkCorrelatorTestPattern(wh, pset.nrStations());
-  wh.basePostprocess();
+  std::clog << "base frequency = " << configuration.refFreqs()[0] << std::endl;
+  std::clog << "signal frequency = " << signalFrequency << std::endl;
+
+  TH_Null	 th;
+  BGL_Processing proc(&th);
+  proc.preprocess(configuration);
+  setSubbandTestPattern(proc.itsTransposedData, configuration.nrStations(), signalFrequency, configuration.sampleRate());
+  proc.process();
+
+  checkCorrelatorTestPattern(proc.itsCorrelatedData, configuration.nrStations());
+  proc.postprocess();
+#endif
 }
 
 
diff --git a/Appl/CEP/CS1/CS1_BGLProc/test/tWH_BGL_Processing.sh b/Appl/CEP/CS1/CS1_BGLProc/test/tBGL_Processing.sh
similarity index 100%
rename from Appl/CEP/CS1/CS1_BGLProc/test/tWH_BGL_Processing.sh
rename to Appl/CEP/CS1/CS1_BGLProc/test/tBGL_Processing.sh
diff --git a/Appl/CEP/CS1/CS1_BGLProc/test/test.parset b/Appl/CEP/CS1/CS1_BGLProc/test/test.parset
new file mode 100644
index 0000000000000000000000000000000000000000..b7ac771ddff1cff5fc9bdf9be37b8855fe00387a
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_BGLProc/test/test.parset
@@ -0,0 +1,13 @@
+Observation.subbandList		= [384]
+Observation.nyquistZone		= 1
+Observation.sampleClock		= 160
+Observation.channelsPerSubband	= 256
+OLAP.BGLProc.nrPPFTaps		= 16
+Observation.nrPolarisations	= 2
+OLAP.BGLProc.inputPsets		= []
+OLAP.BGLProc.outputPsets	= [0]
+OLAP.storageStationNames	= ["S0", "S1", "S2", "S3", "S4", "S5"]
+OLAP.IONProc.useGather		= T
+OLAP.subbandsPerPset		= 1
+OLAP.BGLProc.integrationSteps	= 608
+OLAP.delayCompensation		= T
diff --git a/Appl/CEP/CS1/CS1_BGLProc/test/transpose.cc b/Appl/CEP/CS1/CS1_BGLProc/test/transpose.cc
index b98f16c93b3e2c994e19045817f0ac0a63091fa1..48b645da167fa169eb393f11f6110d135ab5ce20 100644
--- a/Appl/CEP/CS1/CS1_BGLProc/test/transpose.cc
+++ b/Appl/CEP/CS1/CS1_BGLProc/test/transpose.cc
@@ -54,12 +54,12 @@ namespace LOFAR {
 namespace CS1 {
 
 
-class DH_Station : public DataHolder
+class DH_RSP : public DataHolder
 {
   public:
     typedef i4complex SampleType;
 
-    DH_Station(const string &name, const ACC::APS::ParameterSet &ps);
+    DH_RSP(const string &name, const ACC::APS::ParameterSet &ps);
 
     DataHolder	 *clone() const;
     virtual void init();
@@ -71,28 +71,28 @@ class DH_Station : public DataHolder
 };
 
 
-DH_Station::DH_Station(const string &name, const ACC::APS::ParameterSet &ps)
+DH_RSP::DH_RSP(const string &name, const ACC::APS::ParameterSet &ps)
 :
-  DataHolder(name, "DH_Station")
+  DataHolder(name, "DH_RSP")
 {
   itsNrSamples = ps.getUint32("Observation.NSubbandSamples") * ps.getUint32("Observation.NPolarisations");
 }
 
 
-DataHolder *DH_Station::clone() const
+DataHolder *DH_RSP::clone() const
 {
-  return new DH_Station(*this);
+  return new DH_RSP(*this);
 }
 
 
-void DH_Station::init()
+void DH_RSP::init()
 {
   addField("Samples", BlobField<uint8>(1, itsNrSamples * sizeof(SampleType)), 32);
   createDataBlock(); // calls fillDataPointers
 }
 
 
-void DH_Station::fillDataPointers()
+void DH_RSP::fillDataPointers()
 {
   itsSamples = (SampleType *) getData<uint8>("Samples");
 }
diff --git a/Appl/CEP/CS1/CS1_IONProc/Makefile.am b/Appl/CEP/CS1/CS1_IONProc/Makefile.am
index 6695b4b8f2cc8afe8b18202f4dfce018bc669b1c..9874e078b2ae43d625609aaea30f90f9f7b826f3 100644
--- a/Appl/CEP/CS1/CS1_IONProc/Makefile.am
+++ b/Appl/CEP/CS1/CS1_IONProc/Makefile.am
@@ -1,4 +1,3 @@
-#SUBDIRS=src test include
 SUBDIRS=src
 
 DISTCHECK_CONFIGURE_FLAGS=\
diff --git a/Appl/CEP/CS1/CS1_IONProc/configure.in b/Appl/CEP/CS1/CS1_IONProc/configure.in
index 2690f36ecb85ab341c77e66469fa95aca59d01ae..1ed71a9b556d5088ceadaaf83319c74e62cb3ee7 100644
--- a/Appl/CEP/CS1/CS1_IONProc/configure.in
+++ b/Appl/CEP/CS1/CS1_IONProc/configure.in
@@ -70,10 +70,7 @@ dnl
 dnl Output Makefiles
 dnl
 AC_OUTPUT(
-include/Makefile
-include/CS1_IONProc/Makefile
 src/Makefile
-test/Makefile
 Makefile
 CS1_IONProc.spec
 )
diff --git a/Appl/CEP/CS1/CS1_IONProc/include/CS1_IONProc/Makefile.am b/Appl/CEP/CS1/CS1_IONProc/include/CS1_IONProc/Makefile.am
deleted file mode 100644
index 1d46807de0b3867a5809a78cd124d4cc887c3f82..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_IONProc/include/CS1_IONProc/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-pkginclude_HEADERS =
-
-include $(top_srcdir)/Makefile.common
diff --git a/Appl/CEP/CS1/CS1_IONProc/include/Makefile.am b/Appl/CEP/CS1/CS1_IONProc/include/Makefile.am
deleted file mode 100644
index b258baf1f338d715d58837a57bd1a7f35e08899d..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_IONProc/include/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS = CS1_IONProc
-
-include $(top_srcdir)/Makefile.common
diff --git a/Appl/CEP/CS1/CS1_IONProc/src/AH_ION_Gather.cc b/Appl/CEP/CS1/CS1_IONProc/src/AH_ION_Gather.cc
index 0156c751bdf02ce60f7158853f3a424d6448f8c7..d9a63262f93ae0e91020c657d3902a2c5e987f6d 100644
--- a/Appl/CEP/CS1/CS1_IONProc/src/AH_ION_Gather.cc
+++ b/Appl/CEP/CS1/CS1_IONProc/src/AH_ION_Gather.cc
@@ -53,19 +53,23 @@ AH_ION_Gather::~AH_ION_Gather()
 void AH_ION_Gather::define(const KeyValueMap&)
 {
   itsCS1PS = new CS1_Parset(&itsParamSet);
-  itsWH = new WH_ION_Gather("ION_Gather", itsCS1PS);
+
+  unsigned myPsetNumber = getBGLpersonality()->getPsetNum();
+
+  itsWH = new WH_ION_Gather("ION_Gather", myPsetNumber, itsCS1PS);
   itsWH->runOnNode(0);
 
   DataManager *dm = new DataManager(itsWH->getDataManager());
   itsWH->setDataManager(dm);
   dm->setOutBuffer(0, false, 2);
 
-  unsigned nrPsetsPerStorage = itsCS1PS->getInt32("OLAP.psetsPerStorage");
-  unsigned pset		     = getBGLpersonality()->getPsetNum();
-  unsigned storage_host	     = pset / nrPsetsPerStorage;
-  unsigned storage_port	     = pset % nrPsetsPerStorage;
-
   itsVisibilitiesStub = new Stub_BGL(true, false, "BGLProc_Storage", itsCS1PS);
+
+  unsigned myPsetIndex	     = itsCS1PS->outputPsetIndex(myPsetNumber);
+  unsigned nrPsetsPerStorage = itsCS1PS->nrPsetsPerStorage();
+  unsigned storage_host	     = myPsetIndex / nrPsetsPerStorage;
+  unsigned storage_port      = myPsetIndex % nrPsetsPerStorage;
+
   itsVisibilitiesStub->connect(storage_host, storage_port, *dm, /*channel*/ 0);
 }
 
@@ -86,14 +90,12 @@ void AH_ION_Gather::prerun()
 
 void AH_ION_Gather::run(int steps)
 {
-  steps *= itsCS1PS->getUint32("OLAP.BGLProc.nodesPerPset");
-
   for (int i = 0; i < steps; i++) {
-    class NSTimer timer("baseProcess", true);
+    //class NSTimer timer("baseProcess", true);
 
-    timer.start();
+    //timer.start();
     itsWH->baseProcess();
-    timer.stop();
+    //timer.stop();
   }
 }
 
diff --git a/Appl/CEP/CS1/CS1_InputSection/src/AH_InputSection.cc b/Appl/CEP/CS1/CS1_IONProc/src/AH_InputSection.cc
similarity index 52%
rename from Appl/CEP/CS1/CS1_InputSection/src/AH_InputSection.cc
rename to Appl/CEP/CS1/CS1_IONProc/src/AH_InputSection.cc
index 9462a1ef4504d6de1ed379eb5d5ec6a7de4d8a5b..4dd8a0ff7f090430900b33f106cefc9a3c37bda3 100644
--- a/Appl/CEP/CS1/CS1_InputSection/src/AH_InputSection.cc
+++ b/Appl/CEP/CS1/CS1_IONProc/src/AH_InputSection.cc
@@ -25,10 +25,13 @@
 
 //# Includes
 #include <Common/LofarLogger.h>
-#include <CS1_InputSection/AH_InputSection.h>
-#include <CS1_InputSection/WH_InputSection.h>
-#include <CS1_InputSection/Connector.h>
+#include <CS1_IONProc/AH_InputSection.h>
+#include <CS1_IONProc/WH_InputSection.h>
+#include <CS1_IONProc/BGL_Personality.h>
+
+#include <CS1_IONProc/Connector.h>
 #include <CS1_Interface/RSPTimeStamp.h>
+#include <CEPFrame/Step.h>
 
 //# Workholders
 
@@ -43,103 +46,53 @@ namespace CS1 {
 AH_InputSection::AH_InputSection() :
   itsCS1PS(0),
   itsDelayStub(0),
-  itsOutputStub(0)
+  itsWH(0)
 {
 }
 
 AH_InputSection::~AH_InputSection()
 {
+  std::clog << "AH_InputSection::~AH_InputSection()" << std::endl;
   undefine();
 }
 
-void AH_InputSection::undefine()
-{
-  for (unsigned i = 0; i < itsWHs.size(); i ++)
-    delete itsWHs[i];
-
-  itsWHs.resize(0);
-
-  delete itsOutputStub;
-  delete itsDelayStub;
-  delete itsCS1PS;
-  itsDelayStub = 0;
-  itsOutputStub = 0;
-  itsCS1PS = 0;
-}
-
 void AH_InputSection::define(const LOFAR::KeyValueMap&) 
 {
   LOG_TRACE_FLOW_STR("Start of AH_InputSection::define()");
+
   itsCS1PS = new CS1_Parset(&itsParamSet);
   itsCS1PS->adoptFile("OLAP.parset");
 
-   TimeStamp::setMaxBlockId(itsCS1PS->sampleRate());
-
-  LOG_TRACE_FLOW_STR("Create the top-level composite");
-  Composite comp(0, 0, "topComposite");
-  setComposite(comp); // tell the ApplicationHolder this is the top-level compisite
-
-  LOG_TRACE_FLOW_STR("Create the input side delay stub");
-  LOG_TRACE_FLOW_STR("Create the RSP reception Steps");
-
-  itsDelayStub  = new Stub_Delay(true, itsCS1PS);
-  itsOutputStub = new Stub_BGL(false, false, "input_BGLProc", itsCS1PS);
+  TimeStamp::setMaxBlockId(itsCS1PS->sampleRate());
 
   // TODO: support multiple RSPs per station
+#if 0
   itsInputNodes  = itsCS1PS->getUint32Vector("Input.InputNodes");
   itsOutputNodes = itsCS1PS->getUint32Vector("Input.OutputNodes");
   unsigned nrOutputChannels = itsCS1PS->nrOutputsPerInputNode();
-
-#if defined HAVE_MPI
-  unsigned nrNodes = TH_MPI::getNumberOfNodes();
-#else
-  unsigned nrNodes = 1;
 #endif
 
-  itsWHs.resize(nrNodes);
-
-  bool doTranspose = itsInputNodes.size() > 0 && itsOutputNodes.size() > 0;
-
-  for (unsigned node = 0, cell = 0, station = 0; node < nrNodes; node ++) {
-    bool doInput  = std::find(itsInputNodes.begin(), itsInputNodes.end(), node) != itsInputNodes.end();
-    bool doOutput = std::find(itsOutputNodes.begin(), itsOutputNodes.end(), node) != itsOutputNodes.end();
-    TransportHolder *th = 0;
-
-    if (doInput) {
-      th = Connector::readTH(itsCS1PS, itsCS1PS->stationName(station)); 
-    }
-
-    itsWHs[node] = new WH_InputSection("InputSection", doInput, doTranspose, doOutput, itsCS1PS, th, doInput ? station : 0, doInput ? 1 : 0, doOutput ? nrOutputChannels : 0, itsInputNodes, itsOutputNodes);
-    Step *step = new Step(itsWHs[node], "Step", false);
-    step->runOnNode(node); 
-    comp.addBlock(step);
-
-    if (doInput) {
-      itsDelayStub->connect(station, step->getInDataManager(0), 0);
-      station ++;
-    }
-
-    if (doOutput) {
-      DataManager      &dm = step->getOutDataManager(0);
-      std::vector<int> channels(nrOutputChannels);
-
-      for (unsigned core = 0; core < nrOutputChannels; core ++) {
-	dm.setOutBuffer(core, false, itsCS1PS->useScatter() ? 8 : 3);
-	itsOutputStub->connect(cell, core, dm, core);
-	channels[core] = core;
-      }
-	
-      dm.setOutRoundRobinPolicy(channels, itsCS1PS->getInt32("OLAP.BGLProc.maxConcurrentComm"));
-      cell ++;
-    }
-  }
+  unsigned myPsetNumber	 = getBGLpersonality()->getPsetNum();
+  unsigned stationNumber = itsCS1PS->inputPsetIndex(myPsetNumber);
+  std::clog << "station " << stationNumber << " = " << itsCS1PS->stationName(stationNumber) << std::endl;
+  TransportHolder *th = Connector::readTH(itsCS1PS, itsCS1PS->stationName(stationNumber)); //FIXME probably never deleted
+  itsWH = new WH_InputSection("InputSection", stationNumber, itsCS1PS, th);
+  Step *step = new Step(itsWH, "Step", false);
+  step->runOnNode(0); 
+  Composite comp(0, 0, "topComposite");
+  setComposite(comp); // tell the ApplicationHolder this is the top-level compisite
+  comp.addBlock(step);
+
+  itsDelayStub  = new Stub_Delay(true, itsCS1PS);
+  itsDelayStub->connect(stationNumber, step->getInDataManager(0), 0);
+
   LOG_TRACE_FLOW_STR("Finished define()");
 }
 
 void AH_InputSection::run(int steps)
 {
   LOG_TRACE_FLOW_STR("Start AH_InputSection::run() "  );
-  
+
   for (int i = 0; i < steps; i++) {
     LOG_TRACE_LOOP_STR("processing run " << i );
     getComposite().process();
@@ -147,5 +100,13 @@ void AH_InputSection::run(int steps)
   LOG_TRACE_FLOW_STR("Finished AH_InputSection::run() "  );
 }
 
+void AH_InputSection::undefine()
+{
+  delete itsWH;		itsWH        = 0;
+  delete itsDelayStub;	itsDelayStub = 0;
+  delete itsCS1PS;	itsCS1PS     = 0;
+}
+
+
 } // namespace CS1
 } // namespace LOFAR
diff --git a/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/ACCmain_InputSection.h b/Appl/CEP/CS1/CS1_IONProc/src/AH_InputSection.h
similarity index 55%
rename from Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/ACCmain_InputSection.h
rename to Appl/CEP/CS1/CS1_IONProc/src/AH_InputSection.h
index 90753d98ccd8dbf5f162d92981fe19aebf94b117..7506828e732837e9317c7491489a98706f60ddff 100644
--- a/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/ACCmain_InputSection.h
+++ b/Appl/CEP/CS1/CS1_IONProc/src/AH_InputSection.h
@@ -1,4 +1,4 @@
-//#  ACCmain_InputSection.h: main loop that can be used by any ACC enabled program
+//#  AH_InputSection.h: one line description
 //#
 //#  Copyright (C) 2006
 //#  ASTRON (Netherlands Foundation for Research in Astronomy)
@@ -20,23 +20,44 @@
 //#
 //#  $Id$
 
-#ifndef LOFAR_CS1_INPUTSECTION_ACCMAIN_INPUTSECTION_H
-#define LOFAR_CS1_INPUTSECTION_ACCMAIN_INPUTSECTION_H
+#ifndef LOFAR_CS1_INPUTSECTION_AH_INPUTSECTION_H
+#define LOFAR_CS1_INPUTSECTION_AH_INPUTSECTION_H
 
 // \file
-// main loop that can be used by any ACC enabled program
+// one line description.
 
 //# Never #include <config.h> or #include <lofar_config.h> in a header file!
 
 //# Includes
-#include <PLC/ProcessControl.h>
+#include <CEPFrame/ApplicationHolder.h>
+#include <CS1_Interface/Stub_Delay.h>
+#include <CS1_Interface/CS1_Parset.h>
 
-namespace LOFAR 
+namespace LOFAR {
+namespace CS1 {
+
+class AH_InputSection: public ApplicationHolder
 {
-  namespace CS1
-  {
-    int ACCmain_InputSection (int argc, char* argv[], ACC::PLC::ProcessControl* theProcess);
-  } // namespace CS1
+  public:
+		 AH_InputSection();
+    virtual      ~AH_InputSection();
+
+    virtual void define(const LOFAR::KeyValueMap&);
+    virtual void run(int nsteps);
+    virtual void undefine();
+
+  private:
+    // Copying is not allowed
+    AH_InputSection (const AH_InputSection& that);
+    AH_InputSection& operator= (const AH_InputSection& that);
+
+    //# Datamembers
+    CS1_Parset *itsCS1PS;
+    Stub_Delay *itsDelayStub;
+    WorkHolder *itsWH;
+};
+
+} // namespace CS1
 } // namespace LOFAR
 
 #endif
diff --git a/Appl/CEP/CS1/CS1_InputSection/src/BeamletBuffer.cc b/Appl/CEP/CS1/CS1_IONProc/src/BeamletBuffer.cc
similarity index 64%
rename from Appl/CEP/CS1/CS1_InputSection/src/BeamletBuffer.cc
rename to Appl/CEP/CS1/CS1_IONProc/src/BeamletBuffer.cc
index 4d17d4eb7e6f717f9151cd48fa11fa2f085efc57..4a816cfe86e5bd95bb724694804d4c641d1f8a43 100644
--- a/Appl/CEP/CS1/CS1_InputSection/src/BeamletBuffer.cc
+++ b/Appl/CEP/CS1/CS1_IONProc/src/BeamletBuffer.cc
@@ -25,9 +25,10 @@
 
 //# Includes
 #include <Common/LofarLogger.h>
-#include <CS1_InputSection/BeamletBuffer.h>
 #include <Common/lofar_complex.h>
 #include <Common/Timer.h>
+#include <CS1_IONProc/ION_Allocator.h>
+#include <CS1_IONProc/BeamletBuffer.h>
 #include <CS1_Interface/RSPTimeStamp.h>
 
 namespace LOFAR {
@@ -36,20 +37,22 @@ namespace CS1 {
 BeamletBuffer::BeamletBuffer(int bufferSize, unsigned nSubbands, unsigned history, unsigned readWriteDelay):
   itsNSubbands(nSubbands),
   itsSize(bufferSize),
-  itsSBBuffers(boost::extents[nSubbands][bufferSize][NR_POLARIZATIONS]),
+  itsSBBuffers(reinterpret_cast<SampleType *>(ION_Allocator().allocate(nSubbands * bufferSize * NR_POLARIZATIONS * sizeof(SampleType), 32)), boost::extents[nSubbands][bufferSize][NR_POLARIZATIONS]),
   itsLockedRange(bufferSize, readWriteDelay, bufferSize - history, 0),
   itsDroppedItems(0),
   itsDummyItems(0),
   itsSkippedItems(0),
-  itsWriteTimer("write"),
-  itsReadTimer("read")
+  itsReadTimer("read"),
+  itsWriteTimer("write")
 {
-  mutex::scoped_lock sl(itsFlagsMutex);
+  pthread_mutex_init(&itsFlagsMutex, 0);
   itsFlags.include(0, bufferSize);
 }
 
 BeamletBuffer::~BeamletBuffer()
 {      
+  pthread_mutex_destroy(&itsFlagsMutex);
+  ION_Allocator().deallocate(itsSBBuffers.origin());
   clog<<"BeamletBuffer did not receive "<<itsDummyItems<<" stamps and received "<<itsDroppedItems<<" items too late. "<<itsSkippedItems<<" items were skipped (but may be received later)."<<endl;
   clog<<"BeamletBufferTimers:"<<endl;
   clog<<itsReadTimer<<endl;
@@ -72,14 +75,13 @@ void BeamletBuffer::checkForSkippedData(TimeStamp writeBegin) {
     itsSkippedItems += flagEnd - itsHighestWritten;
     unsigned startI = mapTime2Index(realBegin), endI = mapTime2Index(flagEnd);
 
-    {
-      mutex::scoped_lock sl(itsFlagsMutex);
-      if (endI < startI) {
-	itsFlags.include(0, endI).include(startI, itsSize);
-      } else {
-	itsFlags.include(startI, endI);
-      }
-    } 
+    pthread_mutex_lock(&itsFlagsMutex);
+    if (endI < startI) {
+      itsFlags.include(0, endI).include(startI, itsSize);
+    } else {
+      itsFlags.include(startI, endI);
+    }
+    pthread_mutex_unlock(&itsFlagsMutex);
 
     itsWriteTimer.stop();
 
@@ -110,8 +112,6 @@ void BeamletBuffer::writeElements(Beamlet *data, TimeStamp begin, unsigned nElem
 
     unsigned startI = mapTime2Index(realBegin), endI = mapTime2Index(end);
 
-    //cerr<<"BeamletBuffer: write from "<<realBegin<<" instead of "<<begin<<endl;
-    //cerr<<"BeamletBuffer: Writing from "<<startI<<" to "<<endI<<" timestamp "<<begin<<endl;
     if (endI < startI) {
       // the data wraps around the allocated memory, so do it in two parts
       
@@ -122,16 +122,18 @@ void BeamletBuffer::writeElements(Beamlet *data, TimeStamp begin, unsigned nElem
 	data += nElements;		
       }
 
-      mutex::scoped_lock sl(itsFlagsMutex);
+      pthread_mutex_lock(&itsFlagsMutex);
       itsFlags.exclude(startI, itsSize).exclude(0, endI);
+      pthread_mutex_unlock(&itsFlagsMutex);
     } else {
       for (unsigned sb = 0; sb < itsNSubbands; sb ++) {
 	memcpy(itsSBBuffers[sb][startI].origin(), data, sizeof(SampleType[endI - startI][NR_POLARIZATIONS]));
 	data += nElements;		
       }
 
-      mutex::scoped_lock sl(itsFlagsMutex);
+      pthread_mutex_lock(&itsFlagsMutex);
       itsFlags.exclude(startI, endI);
+      pthread_mutex_unlock(&itsFlagsMutex);
     }
 
     itsWriteTimer.stop();
@@ -143,7 +145,8 @@ void BeamletBuffer::writeElements(Beamlet *data, TimeStamp begin, unsigned nElem
   itsLockedRange.writeUnlock(end);
 }
 
-void BeamletBuffer::getElements(boost::multi_array_ref<SampleType, 3> &buffers, SparseSet<unsigned> &flags, TimeStamp begin, unsigned nElements)
+#if 0
+void BeamletBuffer::getElements(boost::multi_array_ref<SampleType, 3> &buffers, SparseSet &flags, TimeStamp begin, unsigned nElements)
 {
   //ASSERTSTR(buffers.size() == itsNSubbands, "BeamletBuffer received wrong number of buffers to write to (in getElements).");
   TimeStamp end = begin + nElements;
@@ -167,15 +170,18 @@ void BeamletBuffer::getElements(boost::multi_array_ref<SampleType, 3> &buffers,
       memcpy(buffers[sb][firstChunk].origin(), itsSBBuffers[sb][0].origin(),      sizeof(SampleType[endI][NR_POLARIZATIONS]));
     }
 
-    mutex::scoped_lock sl(itsFlagsMutex);
+    pthread_mutex_lock(&itsFlagsMutex);
     flags |= (itsFlags.subset(0,      endI)    += firstChunk);
     flags |= (itsFlags.subset(startI, itsSize) -= startI);
+    pthread_mutex_unlock(&itsFlagsMutex);
   } else {
     for (unsigned sb = 0; sb < itsNSubbands; sb ++) {
       memcpy(buffers[sb].origin(), itsSBBuffers[sb][startI].origin(), sizeof(SampleType[endI - startI][NR_POLARIZATIONS]));
     }	  
-    mutex::scoped_lock sl(itsFlagsMutex);
+
+    pthread_mutex_lock(&itsFlagsMutex);
     flags |= (itsFlags.subset(startI, endI) -= startI);
+    pthread_mutex_unlock(&itsFlagsMutex);
   }
 
   //cout<<"BeamletBuffer: getting elements "<<begin<<" - "<<begin+nElements<<": "<<flags<<endl;
@@ -183,7 +189,76 @@ void BeamletBuffer::getElements(boost::multi_array_ref<SampleType, 3> &buffers,
   itsReadTimer.stop();
   itsLockedRange.readUnlock(end);
 }
+#endif
+
+void BeamletBuffer::startReadTransaction(TimeStamp begin, unsigned nElements)
+{
+  itsEnd = begin + nElements;
+  TimeStamp realBegin = itsLockedRange.readLock(begin, itsEnd);
+  itsNInvalid = realBegin - begin;
+  itsDummyItems += itsNInvalid * itsNSubbands;
+
+  itsStartI = mapTime2Index(begin), itsEndI = mapTime2Index(itsEnd);
+}
+
+#if 0
+void BeamletBuffer::readSubband(const boost::detail::multi_array::sub_array<SampleType, 2> &samples, unsigned subband)
+{
+  // copy the real data
+  if (itsEndI < itsStartI) {
+    // the data wraps around the allocated memory, so copy in two parts
+    unsigned firstChunk = itsSize - itsStartI;
+
+    memcpy((void *) samples.origin()	        , itsSBBuffers[subband][itsStartI].origin(), sizeof(SampleType[firstChunk][NR_POLARIZATIONS]));
+    memcpy((void *) samples[firstChunk].origin(), itsSBBuffers[subband][0].origin(),      sizeof(SampleType[itsEndI][NR_POLARIZATIONS]));
+  } else {
+    memcpy((void *) samples.origin(), itsSBBuffers[subband][itsStartI].origin(), sizeof(SampleType[itsEndI - itsStartI][NR_POLARIZATIONS]));
+  }
+}
+#endif
+
+void BeamletBuffer::sendSubband(TransportHolder *th, unsigned subband) /*const*/
+{
+  // Align to 32 bytes and make multiple of 32 bytes by prepending/appending
+  // extra data.  Always send 32 bytes extra, even if data was already aligned.
+  unsigned startI = itsStartI & ~(32 / sizeof(Beamlet) - 1); // round down
+  unsigned endI   = (itsEndI + 32 / sizeof(Beamlet)) & ~(32 / sizeof(Beamlet) - 1); // round up, possibly adding 32 bytes
+
+  if (endI < startI) {
+    // the data wraps around the allocated memory, so copy in two parts
+    unsigned firstChunk = itsSize - startI;
+
+    th->sendBlocking(itsSBBuffers[subband][startI].origin(), sizeof(SampleType[firstChunk][NR_POLARIZATIONS]), 0, 0);
+    th->sendBlocking(itsSBBuffers[subband][0].origin(),      sizeof(SampleType[endI][NR_POLARIZATIONS]), 0, 0);
+  } else {
+    th->sendBlocking(itsSBBuffers[subband][startI].origin(), sizeof(SampleType[endI - startI][NR_POLARIZATIONS]), 0, 0);
+  }
+}
+
+void BeamletBuffer::readFlags(SparseSet<unsigned> &flags)
+{
+  flags.reset().include(0, itsNInvalid);
+
+  if (itsEndI < itsStartI) {
+    // the data wraps around the allocated memory, so copy in two parts
+    pthread_mutex_lock(&itsFlagsMutex);
+    flags |= (itsFlags.subset(0,	 itsEndI) += itsSize - itsStartI);
+    flags |= (itsFlags.subset(itsStartI, itsSize) -= itsStartI);
+    pthread_mutex_unlock(&itsFlagsMutex);
+  } else {
+    pthread_mutex_lock(&itsFlagsMutex);
+    flags |= (itsFlags.subset(itsStartI, itsEndI) -= itsStartI);
+    pthread_mutex_unlock(&itsFlagsMutex);
+  }
+}
+
+void BeamletBuffer::stopReadTransaction()
+{
+  itsLockedRange.readUnlock(itsEnd);
+}
+
 
+#if 0
 TimeStamp BeamletBuffer::startBufferRead() {
   TimeStamp oldest = itsLockedRange.getReadStart();
   TimeStamp fixPoint(oldest.getSeqId() + 1, 0); 
@@ -193,6 +268,7 @@ TimeStamp BeamletBuffer::startBufferRead() {
   itsLockedRange.readUnlock(fixPoint);
   return fixPoint;
 }
+#endif
 
 TimeStamp BeamletBuffer::startBufferRead(TimeStamp begin) {
   TimeStamp oldest = itsLockedRange.getReadStart();
diff --git a/Appl/CEP/CS1/CS1_IONProc/src/BeamletBuffer.h b/Appl/CEP/CS1/CS1_IONProc/src/BeamletBuffer.h
new file mode 100644
index 0000000000000000000000000000000000000000..9d041e14cb1d46e9dfb6dfeff45557881717a361
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_IONProc/src/BeamletBuffer.h
@@ -0,0 +1,129 @@
+//#  BeamletBuffer.h: a cyclic buffer that holds the beamlets from the rspboards
+//#
+//#  Copyright (C) 2006
+//#  ASTRON (Netherlands Foundation for Research in Astronomy)
+//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
+//#
+//#  This program is free software; you can redistribute it and/or modify
+//#  it under the terms of the GNU General Public License as published by
+//#  the Free Software Foundation; either version 2 of the License, or
+//#  (at your option) any later version.
+//#
+//#  This program is distributed in the hope that it will be useful,
+//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//#  GNU General Public License for more details.
+//#
+//#  You should have received a copy of the GNU General Public License
+//#  along with this program; if not, write to the Free Software
+//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//#
+//#  $Id$
+
+#ifndef LOFAR_CS1_INPUTSECTION_BEAMLETBUFFER_H
+#define LOFAR_CS1_INPUTSECTION_BEAMLETBUFFER_H
+
+// \file
+// a cyclic buffer that holds the beamlets from the rspboards
+
+//# Never #include <config.h> or #include <lofar_config.h> in a header file!
+
+//# Includes
+#include <Common/lofar_vector.h>
+#include <Common/lofar_complex.h>
+#include <Common/Timer.h>
+#include <Transport/TransportHolder.h>
+#include <CS1_IONProc/LockedRange.h>
+#include <CS1_Interface/CS1_Config.h>
+#include <CS1_Interface/RSPTimeStamp.h>
+#include <CS1_Interface/SparseSet.h>
+
+#include <boost/multi_array.hpp>
+#include <pthread.h>
+
+namespace LOFAR {
+namespace CS1 {
+
+typedef INPUT_SAMPLE_TYPE SampleType;
+
+class Beamlet {
+  SampleType Xpol, Ypol;
+};
+
+// A BeamletBuffer can hold the beamlets coming from the rspboards
+// It is implemented as a cyclic buffer (using the mapTime2Index method).
+// Locking is done using a LockedRange
+// This buffer also reshuffles the data. It comes in in packets of different subbands per timestep.
+// The data leaves as a time series per subband.
+class BeamletBuffer
+{
+  public:
+    BeamletBuffer(int bufferSize, unsigned nSubbands, unsigned history, unsigned readWriteDelay);
+    ~BeamletBuffer();
+
+    void writeElements(Beamlet* data, TimeStamp begin, unsigned nElements);
+    //void getElements(boost::multi_array_ref<SampleType, 3> &buffers, SparseSet &flags, TimeStamp begin, unsigned nElements);
+
+    void startReadTransaction(TimeStamp begin, unsigned nElements);
+    void sendSubband(TransportHolder *, unsigned subband) /*const*/;
+    unsigned alignmentShift() const;
+    void readFlags(SparseSet<unsigned> &flags);
+    void stopReadTransaction();
+
+    //TimeStamp startBufferRead();
+    TimeStamp startBufferRead(TimeStamp);
+
+    void setAllowOverwrite(bool o) {itsLockedRange.setOverwriting(o);};
+
+    void clear() {itsLockedRange.clear();};
+
+  private:
+    // Copying is not allowed
+    BeamletBuffer (const BeamletBuffer& that);
+    BeamletBuffer& operator= (const BeamletBuffer& that);
+
+    // Needed for mapping a timestamp to a place in the buffer
+    unsigned mapTime2Index(TimeStamp time) const;
+
+    // checked for skipped data and flag it in chunks
+    void checkForSkippedData(TimeStamp writeBegin);
+
+    //# Datamembers
+    //vector<Beamlet *> itsSBBuffers;
+    pthread_mutex_t itsFlagsMutex;
+    SparseSet<unsigned> itsFlags;
+    unsigned itsNSubbands;
+    int itsSize;
+
+    boost::multi_array_ref<SampleType, 3> itsSBBuffers;
+
+    TimeStamp itsHighestWritten;
+    
+    LockedRange<TimeStamp, int> itsLockedRange;
+
+    // read internals
+    TimeStamp itsEnd;
+    size_t    itsStartI, itsEndI;
+    unsigned  itsNInvalid;
+
+    // These are for statistics
+    unsigned itsDroppedItems, itsDummyItems, itsSkippedItems;
+    NSTimer  itsReadTimer, itsWriteTimer;
+};
+
+
+inline unsigned BeamletBuffer::alignmentShift() const
+{
+  return itsStartI % (32 / sizeof(Beamlet));
+}
+
+inline unsigned BeamletBuffer::mapTime2Index(TimeStamp time) const
+{ 
+  // TODO: this is very slow because of the %
+  return time % itsSize;
+}
+
+} // namespace CS1
+} // namespace LOFAR
+
+#endif
diff --git a/Appl/CEP/CS1/CS1_IONProc/src/CS1_ION_main.cc b/Appl/CEP/CS1/CS1_IONProc/src/CS1_ION_main.cc
index 836b9be88072a673726c27870e4461a05da5374c..338b6cc49bc74f42f88939aa39b747b4049ce606 100644
--- a/Appl/CEP/CS1/CS1_IONProc/src/CS1_ION_main.cc
+++ b/Appl/CEP/CS1/CS1_IONProc/src/CS1_ION_main.cc
@@ -21,13 +21,21 @@
 #include <lofar_config.h>
 
 #include <PLC/ACCmain.h>
-#include <Common/LofarLogger.h>
 #include <tinyCEP/ApplicationHolderController.h>
-#include <CS1_IONProc/AH_ION_Scatter.h>
+#include <CS1_IONProc/AH_InputSection.h>
 #include <CS1_IONProc/AH_ION_Gather.h>
+#include <CS1_IONProc/BGL_Personality.h>
 #include <CS1_IONProc/TH_ZoidServer.h>
-
+#include <CS1_Interface/BGL_Command.h>
+#include <CS1_Interface/BGL_Configuration.h>
+#include <CS1_Interface/BGL_Mapping.h>
+#include <CS1_Interface/CS1_Parset.h>
+
+#include <sys/types.h>
+#include <sys/stat.h>
+#include <fcntl.h>
 #include <pthread.h>
+#include <unistd.h>
 
 extern "C" {
 #include <lofar.h>
@@ -38,40 +46,174 @@ using namespace LOFAR;
 using namespace LOFAR::CS1;
 
 
-static int  global_argc;
-static char **global_argv;
+static int	  global_argc;
+static char	  **global_argv;
+static unsigned   nrCoresPerPset;
 
-#if 0
-static char *argv[] = {
-  "CS1_ION_Gather",
-  "9999999",	// FIXME get this from real argv
-  0,
-};
-#endif
 
+static void configureCNs(const CS1_Parset &parset)
+{
+  BGL_Command	    command(BGL_Command::PREPROCESS);
+  BGL_Configuration configuration;
+
+  configuration.nrStations()              = parset.nrStations();
+  configuration.nrSamplesPerIntegration() = parset.BGLintegrationSteps();
+  configuration.nrSamplesToBGLProc()      = parset.nrSamplesToBGLProc();
+  configuration.nrUsedCoresPerPset()      = parset.nrCoresPerPset();
+  configuration.nrSubbandsPerPset()       = parset.nrSubbandsPerPset();
+  configuration.delayCompensation()       = parset.getBool("OLAP.delayCompensation");
+  configuration.sampleRate()              = parset.sampleRate();
+  configuration.inputPsets()              = parset.getUint32Vector("OLAP.BGLProc.inputPsets");
+  configuration.outputPsets()             = parset.getUint32Vector("OLAP.BGLProc.outputPsets");
+  configuration.refFreqs()                = parset.refFreqs();
+
+  for (unsigned core = 0; core < parset.nrCoresPerPset(); core ++) {
+    std::clog << "configure core " << core << std::endl;
+    command.write(TH_ZoidServer::theirTHs[core]);
+    configuration.write(TH_ZoidServer::theirTHs[core]);
+  }
+}
 
-void *scatter_thread(void *)
+
+static void unconfigureCNs(CS1_Parset &parset)
 {
-  INIT_LOGGER("CS1_ION_Scatter");
+  BGL_Command command(BGL_Command::POSTPROCESS);
 
-  std::clog << "starting scatter_thread" << std::endl;
-  AH_ION_Scatter myAH;
+  for (unsigned core = 0; core < parset.nrCoresPerPset(); core ++) {
+    std::clog << "unconfigure core " << core << std::endl;
+    command.write(TH_ZoidServer::theirTHs[core]);
+  }
+}
+
+
+static void stopCNs()
+{
+  BGL_Command command(BGL_Command::STOP);
+
+  for (unsigned core = 0; core < nrCoresPerPset; core ++) {
+    std::clog << "stopping core " << core << std::endl;
+    command.write(TH_ZoidServer::theirTHs[core]);
+  }
+}
+
+
+void *input_thread(void *argv)
+{
+  std::clog << "starting input thread, nrRuns = " << ((char **) argv)[2] << std::endl;
+  AH_InputSection myAH;
   ApplicationHolderController myAHController(myAH, 1); //listen to ACC every 1 runs
-  ACC::PLC::ACCmain(global_argc, global_argv, &myAHController);
-  //ACC::PLC::ACCmain(sizeof argv / sizeof *argv - 1, argv, &myAHController);
+  ACC::PLC::ACCmain(3, (char **) argv, &myAHController);
+  std::clog << "input thread finished" << std::endl;
   return 0;
 }
 
 
-void *gather_thread(void *)
+void *gather_thread(void *argv)
 {
-  INIT_LOGGER("CS1_ION_Gather");
-
-  std::clog << "starting gather_thread" << std::endl;
+  std::clog << "starting gather thread, nrRuns = " << ((char **) argv)[2] << std::endl;
   AH_ION_Gather myAH;
   ApplicationHolderController myAHController(myAH, 1); //listen to ACC every 1 runs
-  ACC::PLC::ACCmain(global_argc, global_argv, &myAHController);
-  //ACC::PLC::ACCmain(sizeof argv / sizeof *argv - 1, argv, &myAHController);
+  ACC::PLC::ACCmain(3, (char **) argv, &myAHController);
+  std::clog << "gather thread finished" << std::endl;
+  return 0;
+}
+
+
+void *master_thread(void *)
+{
+  std::clog << "starting master_thread" << std::endl;
+
+  try {
+    pthread_t input_thread_id = 0, gather_thread_id = 0;
+
+    std::clog << "trying to use " << global_argv[1] << " as ParameterSet" << std::endl;
+    ACC::APS::ParameterSet parameterSet(global_argv[1]);
+    CS1_Parset cs1_parset(&parameterSet);
+
+    configureCNs(cs1_parset);
+
+    unsigned myPsetNumber = getBGLpersonality()->getPsetNum();
+
+    if (cs1_parset.inputPsetIndex(myPsetNumber) >= 0) {
+      static char nrRuns[16], *argv[] = {
+	global_argv[0],
+	global_argv[1],
+	nrRuns,
+	0
+      };
+
+      sprintf(nrRuns, "%u", atoi(global_argv[2]) * cs1_parset.nrCoresPerPset() / cs1_parset.nrSubbandsPerPset());
+
+      if (pthread_create(&input_thread_id, 0, input_thread, argv) != 0) {
+	perror("pthread_create");
+	exit(1);
+      }
+    }
+
+    if (cs1_parset.useGather() && cs1_parset.outputPsetIndex(myPsetNumber) >= 0) {
+      static char nrRuns[16], *argv[] = {
+	global_argv[0],
+	global_argv[1],
+	nrRuns,
+	0
+      };
+
+      sprintf(nrRuns, "%u", atoi(global_argv[2]) * cs1_parset.nrCoresPerPset());
+
+      if (pthread_create(&gather_thread_id, 0, gather_thread, argv) != 0) {
+	perror("pthread_create");
+	exit(1);
+      }
+    }
+
+    if (gather_thread_id != 0 && input_thread_id == 0) {
+      // quick hack to send PROCESS commands to CNs
+
+      BGL_Command command(BGL_Command::PROCESS);
+      unsigned	  nrRuns  = atoi(global_argv[2]);
+      unsigned	  nrCores = cs1_parset.nrCoresPerPset();
+
+      for (unsigned run = 0; run < nrRuns; run ++)
+	for (unsigned core = 0; core < nrCores; core ++)
+	  command.write(TH_ZoidServer::theirTHs[BGL_Mapping::mapCoreOnPset(core, myPsetNumber)]);
+    }
+
+    if (input_thread_id != 0) {
+      if (pthread_join(input_thread_id, 0) != 0) {
+	perror("pthread join");
+	exit(1);
+      }
+
+      std::clog << "lofar__fini: input thread joined" << std::endl;
+    }
+
+    unconfigureCNs(cs1_parset);
+    stopCNs();
+
+    if (gather_thread_id != 0) {
+      if (pthread_join(gather_thread_id, 0) != 0) {
+	perror("pthread join");
+	exit(1);
+      }
+
+      std::clog << "lofar__fini: gather thread joined" << std::endl;
+    }
+  } catch (std::exception &ex) {
+    std::cerr << "caught exception: " << ex.what() << std::endl;
+  }
+
+  if (pthread_detach(pthread_self()) != 0) {
+    std::cerr << "could not detach master thread" << std::endl;
+  }
+
+  if (global_argv != 0) {
+    for (int arg = 0; arg < global_argc; arg ++)
+      delete global_argv[arg];
+
+    delete global_argv;
+  }
+
+  std::clog << "master thread finishes" << std::endl;
   return 0;
 }
 
@@ -82,20 +224,31 @@ extern "C"
   void lofar__fini(void);
 }
 
-static pthread_t scatter_thread_id, gather_thread_id;
+inline static void redirect_output()
+{
+  int  fd;
+  char file_name[32];
+
+  sprintf(file_name, "run.CS1_IONProc.%u", getBGLpersonality()->getPsetNum());
+
+  if ((fd = open(file_name, O_CREAT | O_TRUNC | O_RDWR, 0666)) < 0 || dup2(fd, 1) < 0 || dup2(fd, 2) < 0)
+      perror("redirecting stdout/stderr");
+}
+
 
-void lofar__init(int nrComputeNodes)
+void lofar__init(int nrComputeCores)
 {
+  nrCoresPerPset = nrComputeCores;
+  redirect_output();
   std::clog << "begin of lofar__init" << std::endl;
 
-  TH_ZoidServer::createAllTH_ZoidServers(nrComputeNodes);
+  TH_ZoidServer::createAllTH_ZoidServers(nrComputeCores);
 
-  global_argv	    = 0;
-  scatter_thread_id = 0;
-  gather_thread_id  = 0;
+  global_argv = 0;
 }
 
 
+
 void lofar_init(char   **argv /* in:arr2d:size=+1 */,
 		size_t *lengths /* in:arr:size=+1 */,
 		int    argc /* in:obj */)
@@ -117,26 +270,14 @@ void lofar_init(char   **argv /* in:arr2d:size=+1 */,
 
   global_argv[argc] = 0; // terminating zero pointer
 
-  try {
-    std::string fileName = std::string(basename(argv[0])) + ".parset";
-    std::clog << "trying to use " << fileName << " as ParameterSet" << std::endl;
-    ACC::APS::ParameterSet parameterSet(fileName);
+  if (argc != 3)
+    std::cerr << "unexpected number of arguments, expect trouble!" << std::endl;
 
-    if (parameterSet.getBool("OLAP.IONProc.useScatter")) {
-      if (pthread_create(&scatter_thread_id, 0, scatter_thread, 0) != 0) {
-	perror("pthread_create");
-	exit(1);
-      }
-    }
+  pthread_t master_thread_id;
 
-    if (parameterSet.getBool("OLAP.IONProc.useGather")) {
-      if (pthread_create(&gather_thread_id, 0, gather_thread, 0) != 0) {
-	perror("pthread_create");
-	exit(1);
-      }
-    }
-  } catch (std::exception &ex) {
-    std::cerr << "caught exception: " << ex.what() << std::endl;
+  if (pthread_create(&master_thread_id, 0, master_thread, 0) != 0) {
+    perror("pthread_create");
+    exit(1);
   }
 }
 
@@ -145,28 +286,7 @@ void lofar__fini(void)
 {
   std::clog << "begin of lofar__fini" << std::endl;
 
-  if (scatter_thread_id != 0) {
-    if (pthread_join(scatter_thread_id, 0) != 0) {
-      perror("pthread join");
-      exit(1);
-    }
-  }
-
-  if (gather_thread_id != 0) {
-    if (pthread_join(gather_thread_id, 0) != 0) {
-      perror("pthread join");
-      exit(1);
-    }
-  }
-
   TH_ZoidServer::deleteAllTH_ZoidServers();
 
-  if (global_argv != 0) {
-    for (int arg = 0; arg < global_argc; arg ++)
-      delete global_argv[arg];
-
-    delete global_argv;
-  }
-
   std::clog << "end of lofar__fini" << std::endl;
 }
diff --git a/Appl/CEP/CS1/CS1_InputSection/src/Connector.cc b/Appl/CEP/CS1/CS1_IONProc/src/Connector.cc
similarity index 93%
rename from Appl/CEP/CS1/CS1_InputSection/src/Connector.cc
rename to Appl/CEP/CS1/CS1_IONProc/src/Connector.cc
index d4c6a3c1acdd8de56e85e1c7445cfa12d4c6e435..4fe1abc88eee0789d08529a5a23bd57785bfc855 100644
--- a/Appl/CEP/CS1/CS1_InputSection/src/Connector.cc
+++ b/Appl/CEP/CS1/CS1_IONProc/src/Connector.cc
@@ -25,7 +25,7 @@
 
 //# Includes
 #include <Common/LofarLogger.h>
-#include <CS1_InputSection/Connector.h>
+#include <CS1_IONProc/Connector.h>
 #include <Transport/TH_Mem.h>
 #include <Transport/TH_Ethernet.h>
 #include <Transport/TH_MPI.h>
@@ -66,12 +66,9 @@ namespace LOFAR {
 	}	  
       } else 
 #endif      
-      if (transportType == "NULL") {
+      if (transportType=="NULL") {
 	theTH = new TH_Null();
-      } else if (transportType == "FILE") {
-	string filename = ps->getString("OLAP.OLAP_Conn.station_Input_BaseFileName") + "." + key;
-	theTH = new TH_File(filename, TH_File::Read);
-      } else if (transportType == "TCP") {
+      } else if (transportType=="TCP") {
 	string service = ps->inputPortnr(key);
 	theTH = new TH_Socket(service, 
 		              true, 
@@ -92,6 +89,7 @@ namespace LOFAR {
 #endif      
       else if (transportType == "UDP") {
 	string service = ps->inputPortnr(key);
+	std::clog << "creating UDP socket, service = " << service << std::endl;
 	theTH = new TH_Socket(service,
 			      true, 
 			      Socket::UDP, 
diff --git a/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/Connector.h b/Appl/CEP/CS1/CS1_IONProc/src/Connector.h
similarity index 100%
rename from Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/Connector.h
rename to Appl/CEP/CS1/CS1_IONProc/src/Connector.h
diff --git a/Appl/CEP/CS1/CS1_IONProc/src/ION_Allocator.cc b/Appl/CEP/CS1/CS1_IONProc/src/ION_Allocator.cc
index dd00713792ce474c0703d20b38158ad33abcca8d..0353450f1ecdaf60654b361144e582e8b0f83fc7 100644
--- a/Appl/CEP/CS1/CS1_IONProc/src/ION_Allocator.cc
+++ b/Appl/CEP/CS1/CS1_IONProc/src/ION_Allocator.cc
@@ -30,12 +30,23 @@
 #include <cstdlib>
 #include <iostream>
 
+#define USE_ZOID_ALLOCATOR
+
+#if defined USE_ZOID_ALLOCATOR
+extern "C" {
+  void *__zoid_alloc(size_t);
+  void __zoid_free(void *);
+}
+#endif
+
+
 namespace LOFAR
 {
-
+#if !defined USE_ZOID_ALLOCATOR
 pthread_mutex_t		 ION_Allocator::mutex	 = PTHREAD_MUTEX_INITIALIZER;
 SparseSet<char *>	 ION_Allocator::freeList = SparseSet<char *>().include((char *) 0xA4002400, (char *) 0xB0000000);
 std::map<char *, size_t> ION_Allocator::sizes;
+#endif
 
 ION_Allocator *ION_Allocator::clone() const
 {
@@ -44,6 +55,14 @@ ION_Allocator *ION_Allocator::clone() const
 
 void *ION_Allocator::allocate(size_t nbytes, size_t alignment)
 {
+#if defined USE_ZOID_ALLOCATOR
+  void *ptr = __zoid_alloc(nbytes);
+
+  std::clog << "ION_Allocator::allocate(" << nbytes << ", " << alignment << ") = " << ptr << std::endl;
+
+  if (ptr != 0)
+    return ptr;
+#else
   pthread_mutex_lock(&mutex);
 
   const std::vector<SparseSet<char *>::range> &ranges = freeList.getRanges();
@@ -59,6 +78,7 @@ void *ION_Allocator::allocate(size_t nbytes, size_t alignment)
       return (void *) begin;
     }
   }
+#endif
 
   std::cerr << "ION_Allocator::allocate(" << nbytes << ", " << alignment << ") : out of large-TLB memory" << std::endl;
   std::exit(1);
@@ -69,11 +89,15 @@ void ION_Allocator::deallocate(void *ptr)
   std::clog << "ION_Allocator::deallocate(" << ptr << ")" << std::endl;
 
   if (ptr != 0) {
+#if defined USE_ZOID_ALLOCATOR
+    __zoid_free(ptr);
+#else
     pthread_mutex_lock(&mutex);
     std::map<char *, size_t>::iterator index = sizes.find((char *) ptr);
     freeList.include((char *) ptr, (char *) ptr + index->second);
     sizes.erase(index);
     pthread_mutex_unlock(&mutex);
+#endif
   }
 }
 
diff --git a/Appl/CEP/CS1/CS1_InputSection/src/InputThread.cc b/Appl/CEP/CS1/CS1_IONProc/src/InputThread.cc
similarity index 71%
rename from Appl/CEP/CS1/CS1_InputSection/src/InputThread.cc
rename to Appl/CEP/CS1/CS1_IONProc/src/InputThread.cc
index c46989215dea8d9db46c8697a7c44f0f534124f4..4dfe5fc4540d0f29d1178d0b9f4cbf40757b8662 100644
--- a/Appl/CEP/CS1/CS1_InputSection/src/InputThread.cc
+++ b/Appl/CEP/CS1/CS1_IONProc/src/InputThread.cc
@@ -26,47 +26,73 @@
 //# Includes
 #include <Common/LofarLogger.h>
 #include <Common/hexdump.h>
-#include <CS1_InputSection/InputThread.h>
+#include <CS1_IONProc/InputThread.h>
 #include <Common/DataConvert.h>
 #include <Common/Timer.h>
 #include <Transport/TransportHolder.h>
-#include <Transport/TH_MPI.h>
-#include <CS1_InputSection/BeamletBuffer.h>
+#include <CS1_IONProc/BeamletBuffer.h>
 
 namespace LOFAR {
 namespace CS1 {
 
-bool InputThread::theirShouldStop = false;
+volatile bool InputThread::theirShouldStop = false;
 volatile unsigned InputThread::nrPacketsReceived, InputThread::nrPacketsRejected;
 
-InputThread::InputThread(ThreadArgs args) : itsArgs(args)
+InputThread::InputThread(const ThreadArgs &args) : itsArgs(args)
 {
+  std::clog << "InputThread::InputThread(...)" << std::endl;
+  if (pthread_create(&thread, 0, mainLoopStub, this) != 0) {
+    std::cerr << "could not create input thread" << std::endl;
+    exit(1);
+  }
 }
 
 InputThread::~InputThread()
 {
+  std::clog << "InputThread::~InputThread()" << std::endl;
+  theirShouldStop = true;
+
+  if (pthread_join(thread, 0) != 0) {
+    std::cerr << "could not join input thread" << std::endl;
+    exit(1);
+  }
 }
 
 // log from separate thread, since printing from a signal handler causes deadlocks
 
 void *InputThread::logThread(void *)
 {
+  std::clog << "InputThread::logThread()" << std::endl;
   while (!theirShouldStop) {
-    std::clog <<
-#if defined HAVE_MPI
-	TH_MPI::getCurrentRank() << ": "
+#if 0
+    static unsigned count;
+
+    if ((++ count & 63) == 0)
+      system("cat /proc/meminfo");
 #endif
+
+    std::clog <<
 	"received " << nrPacketsReceived << " packets, "
 	"rejected " << nrPacketsRejected << " packets" << std::endl;
     nrPacketsReceived = nrPacketsRejected = 0; // race conditions, but who cares
     sleep(1);
   }
 
+  std::clog << "InputThread::logThread() finished" << std::endl;
   return 0;
 }
 
-void InputThread::operator()()
+void *InputThread::mainLoopStub(void *inputThread)
 {
+  std::clog << "InputThread::mainLoopStub()" << std::endl;
+  reinterpret_cast<InputThread *>(inputThread)->mainLoop();
+  return 0;
+}
+
+
+void InputThread::mainLoop()
+{
+  std::clog << "InputThread::mainLoop()" << std::endl;
   LOG_TRACE_FLOW_STR("WH_RSPInput WriterThread");   
 
   pthread_t logThreadId;
@@ -96,6 +122,8 @@ void InputThread::operator()()
   NSTimer receiveTimer("receiveTimer", true), writeTimer("writeTimer", true);
   bool dataShouldContainValidStamp = (itsArgs.th->getType() != "TH_Null");
 
+  std::clog << "InputThread::mainLoop() entering loop" << std::endl;
+
   while (!theirShouldStop) {
 retry: // until valid packet received
 
@@ -104,7 +132,7 @@ retry: // until valid packet received
       itsArgs.th->recvBlocking((void *) totRecvframe, frameSize, 0);
       receiveTimer.stop();
       ++ nrPacketsReceived;
-    } catch (Exception& e) {
+    } catch (Exception &e) {
       LOG_TRACE_FLOW_STR("WriteToBufferThread couldn't read from TransportHolder(" << e.what() << ", exiting");
       exit(1);
     }	
@@ -118,6 +146,7 @@ retry: // until valid packet received
       seqid   = byteSwap(seqid);
       blockid = byteSwap(blockid);
 #endif
+//std::clog << "InputThread::mainLoop: seqid = " << seqid << ", blockid = " << blockid << std::endl;
 
       //if the seconds counter is 0xFFFFFFFF, the data cannot be trusted.
       if (seqid == ~0U) {
@@ -132,26 +161,21 @@ retry: // until valid packet received
   
     // expected packet received so write data into corresponding buffer
     writeTimer.start();
-
-    try {
-      itsArgs.BBuffer->writeElements((Beamlet *) &recvframe[itsArgs.frameHeaderSize], actualstamp, itsArgs.nTimesPerFrame);
-    } catch (Exception& e) {
-      LOG_TRACE_FLOW_STR("WriteToBufferThread couldn't write to BeamletBuffer(" << e.what() << ", stopping thread");
-      break;
-    }	
-
+    itsArgs.BBuffer->writeElements((Beamlet *) &recvframe[itsArgs.frameHeaderSize], actualstamp, itsArgs.nTimesPerFrame);
     writeTimer.stop();
   }
 
+  std::clog << "InputThread::mainLoop() exiting loop" << std::endl;
   if (pthread_join(logThreadId, 0) != 0) {
     std::cerr << "could not join log thread" << std::endl;
     exit(1);
   }
 }
 
-InputThread::InputThread(const InputThread &that)
-  : itsArgs(that.itsArgs)
+void InputThread::stopThreads()
 {
+  std::clog << "InputThread::stopThreads()" << std::endl;
+  theirShouldStop = true;
 }
 
 } // namespace CS1
diff --git a/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/InputThread.h b/Appl/CEP/CS1/CS1_IONProc/src/InputThread.h
similarity index 76%
rename from Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/InputThread.h
rename to Appl/CEP/CS1/CS1_IONProc/src/InputThread.h
index dc2c80eaf1529b91e208ee2f0f89ef36faf55c56..c3ab7c46ebdccf82aff6aa4a2255c9258302a2c2 100644
--- a/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/InputThread.h
+++ b/Appl/CEP/CS1/CS1_IONProc/src/InputThread.h
@@ -31,7 +31,9 @@
 //# Includes
 #include <Common/lofar_vector.h>
 #include <CS1_Interface/RSPTimeStamp.h>
-#include <CS1_InputSection/BeamletBuffer.h>
+#include <CS1_IONProc/BeamletBuffer.h>
+
+#include <pthread.h>
 
 namespace LOFAR 
 {
@@ -46,39 +48,35 @@ namespace LOFAR
 
     // TODO: this information doesn't have to be in a struct
     struct ThreadArgs {
-      BeamletBuffer* BBuffer;
-      TransportHolder* th; 
+      BeamletBuffer	*BBuffer;
+      TransportHolder	*th; 
 
-      int frameSize;
-      int ipHeaderSize;
-      int frameHeaderSize;
-      int nTimesPerFrame;
-      int nSubbandsPerFrame;
-      int ID;
+      int		frameSize;
+      int		ipHeaderSize;
+      int		frameHeaderSize;
+      int		nTimesPerFrame;
+      int		nSubbandsPerFrame;
     };
   
     class InputThread
     {
     public:
-      InputThread(ThreadArgs args);
+      InputThread(const ThreadArgs &args);
       ~InputThread();
 
-      static void stopThreads() {theirShouldStop = true;};
+      static void stopThreads();
 
-      void operator()();
+      static void *mainLoopStub(void *inputThread);
+      void	  mainLoop();
 
-      InputThread (const InputThread& that);
     private:
-      // Copying is not allowed
-      InputThread& operator= (const InputThread& that);
-
       static void *logThread(void *);
     
       //# Datamembers
-      // make this static for now, because boost copies the object
-      static bool theirShouldStop;
+      static volatile bool theirShouldStop;
       static volatile unsigned nrPacketsReceived, nrPacketsRejected;
       ThreadArgs itsArgs;
+      pthread_t	 thread;
     };
 
     // @}
diff --git a/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/LockedRange.h b/Appl/CEP/CS1/CS1_IONProc/src/LockedRange.h
similarity index 94%
rename from Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/LockedRange.h
rename to Appl/CEP/CS1/CS1_IONProc/src/LockedRange.h
index 2a0c1941eac803d18392724cae25586cc4b0f890..4901ce13cbbc02151430c6344e3f57d682ee8cdc 100644
--- a/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/LockedRange.h
+++ b/Appl/CEP/CS1/CS1_IONProc/src/LockedRange.h
@@ -29,17 +29,14 @@
 //# Never #include <config.h> or #include <lofar_config.h> in a header file!
 
 //# Includes
-#include <boost/thread.hpp>
 #include <Common/Timer.h>
 #include <ostream>
+#include <pthread.h>
 
 namespace LOFAR 
 {
   namespace CS1 
   {
-
-    using namespace boost;
-
     // \addtogroup CS1_InputSection
     // @{
 
@@ -90,9 +87,8 @@ namespace LOFAR
       T itsFirstItem;
       bool itsIsEmpty;
   
-      mutex itsMutex;
-      condition itsDataAvailCond;
-      condition itsSpaceAvailCond;
+      pthread_mutex_t itsMutex;
+      pthread_cond_t  itsDataAvailCond, itsSpaceAvailCond;
 
       // If this buffer IsOverwriting the readtail and readhead are shifted when the buffer is full
       // this means old data is lost, but all new data is stored in the buffer
@@ -111,6 +107,6 @@ namespace LOFAR
   } // namespace CS1
 } // namespace LOFAR
 
-#include <CS1_InputSection/LockedRange.tcc>
+#include <CS1_IONProc/LockedRange.tcc>
 
 #endif
diff --git a/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/LockedRange.tcc b/Appl/CEP/CS1/CS1_IONProc/src/LockedRange.tcc
similarity index 84%
rename from Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/LockedRange.tcc
rename to Appl/CEP/CS1/CS1_IONProc/src/LockedRange.tcc
index 3fe509071012fee0777c8bcc9448f0b29f4b0b86..367daa36f8356dfa74e48d6b78cef1455891c075 100644
--- a/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/LockedRange.tcc
+++ b/Appl/CEP/CS1/CS1_IONProc/src/LockedRange.tcc
@@ -36,12 +36,18 @@ namespace LOFAR
       itsWaitingForDataTimer ("waitingForData"),
       itsWaitingForSpaceTimer ("waitingForSpace")
     {
+      pthread_mutex_init(&itsMutex, 0);
+      pthread_cond_init(&itsDataAvailCond, 0);
+      pthread_cond_init(&itsSpaceAvailCond, 0);
       clear ();
     };
 
     template < class T, class S > LockedRange < T, S >::~LockedRange ()
     {
-      printTimers (cout);
+      pthread_mutex_destroy(&itsMutex);
+      pthread_cond_destroy(&itsDataAvailCond);
+      pthread_cond_destroy(&itsSpaceAvailCond);
+      printTimers(clog);
     };
 
     template < class T, class S >
@@ -77,7 +83,7 @@ namespace LOFAR
 		    "in LockedRange::writeLock(): end(" << end <<
 		    ") should be larger than begin(" << begin << ")");
 
-      mutex::scoped_lock sl (itsMutex);
+      pthread_mutex_lock(&itsMutex);
 
       // check if writeLock was called before
       if (itsIsEmpty)
@@ -108,7 +114,7 @@ namespace LOFAR
 	      itsWaitingForSpaceTimer.start ();
 	      amWaiting = true;
 	    }
-	  itsSpaceAvailCond.wait (sl);
+	  pthread_cond_wait(&itsSpaceAvailCond, &itsMutex);
 	}
       if (amWaiting)
 	{
@@ -130,6 +136,7 @@ namespace LOFAR
 	    itsReadHead = itsReadTail;
 	}
 
+      pthread_mutex_unlock(&itsMutex);
       itsWriteLockTimer.stop ();
       return begin;
     }
@@ -139,9 +146,10 @@ namespace LOFAR
     {
       itsWriteUnlockTimer.start ();
 
-      mutex::scoped_lock sl (itsMutex);
+      pthread_mutex_lock(&itsMutex);
       itsWriteTail = itsWriteHead = end;
-      itsDataAvailCond.notify_all ();
+      pthread_cond_broadcast(&itsDataAvailCond);
+      pthread_mutex_unlock(&itsMutex);
 
       itsWriteUnlockTimer.stop ();
     };
@@ -156,7 +164,7 @@ namespace LOFAR
       DBGASSERTSTR (end >= begin,
 		    "in LockedRange::readLock(): end(" << end <<
 		    ") should be larger than begin(" << begin << ")");
-      mutex::scoped_lock sl (itsMutex);
+      pthread_mutex_lock(&itsMutex);
 
       itsIsOverwriting = false;
 
@@ -165,7 +173,7 @@ namespace LOFAR
 	{
 	  begin = itsWriteHead - itsSize;
 	  itsReadTail = begin;
-	  itsSpaceAvailCond.notify_all ();
+	  pthread_cond_broadcast(&itsSpaceAvailCond);
 	}
 
       if (end < begin)
@@ -173,7 +181,7 @@ namespace LOFAR
 
       if (begin > itsReadTail) {
         itsReadTail = begin;
-        itsSpaceAvailCond.notify_all ();
+        pthread_cond_broadcast(&itsSpaceAvailCond);
       }
 
       bool amWaiting = false;	
@@ -187,7 +195,7 @@ namespace LOFAR
 	      itsWaitingForDataTimer.start ();
 	      amWaiting = true;
 	    }
-	  itsDataAvailCond.wait (sl);
+	  pthread_cond_wait(&itsDataAvailCond, &itsMutex);
 	}
       if (amWaiting)
 	{
@@ -197,6 +205,8 @@ namespace LOFAR
 	}
       itsReadHead = end;
       itsReadTail = begin;
+      pthread_mutex_unlock(&itsMutex);
+
       itsReadLockTimer.stop ();
       return begin;
     }
@@ -205,16 +215,16 @@ namespace LOFAR
       void LockedRange < T, S >::readUnlock (const T & end)
     {
       itsReadUnlockTimer.start ();
-      mutex::scoped_lock sl (itsMutex);
+      pthread_mutex_lock(&itsMutex);
       itsReadTail = itsReadHead = end;
-      itsSpaceAvailCond.notify_all ();
+      pthread_cond_broadcast(&itsSpaceAvailCond);
+      pthread_mutex_unlock(&itsMutex);
       itsReadUnlockTimer.stop ();
     };
 
     template < class T, class S > T LockedRange < T, S >::getReadStart ()
     {
-
-      mutex::scoped_lock sl (itsMutex);
+      pthread_mutex_lock(&itsMutex);
 
       bool amWaiting = false;
       while (itsIsEmpty)
@@ -225,7 +235,7 @@ namespace LOFAR
 	      itsWaitingForDataTimer.start ();
 	      amWaiting = true;
 	    }
-	  itsDataAvailCond.wait (sl);
+	  pthread_cond_wait(&itsDataAvailCond, &itsMutex);
 	}
       if (amWaiting)
 	{
@@ -240,20 +250,24 @@ namespace LOFAR
 	{
 	  begin = itsWriteHead - itsSize;
 	  itsReadTail = begin;
-	  itsSpaceAvailCond.notify_all ();
+	  pthread_cond_broadcast(&itsSpaceAvailCond);
 	}
 
       itsReadHead = begin;
       itsReadTail = begin;
+      pthread_mutex_unlock(&itsMutex);
+
       return begin;
     }
 
     template < class T, class S > void LockedRange < T, S >::clear ()
     {
-      mutex::scoped_lock sl (itsMutex);
+      pthread_mutex_lock(&itsMutex);
       itsReadHead = itsReadTail = itsWriteTail = itsWriteHead = itsNullOfType;
       itsIsOverwriting = true;
-      itsSpaceAvailCond.notify_all ();
+      pthread_cond_broadcast(&itsSpaceAvailCond);
+      pthread_mutex_unlock(&itsMutex);
+
       itsIsEmpty = true;
     };
 
diff --git a/Appl/CEP/CS1/CS1_IONProc/src/Makefile.am b/Appl/CEP/CS1/CS1_IONProc/src/Makefile.am
index e85c7cc6e723f64a8896e0c5952fee8a2443dccc..1099a9d29a40e729b001b33f5b38b1a77d7f14f6 100644
--- a/Appl/CEP/CS1/CS1_IONProc/src/Makefile.am
+++ b/Appl/CEP/CS1/CS1_IONProc/src/Makefile.am
@@ -1,17 +1,34 @@
 lib_LTLIBRARIES		= liblofar_impl.la
 
 liblofar_impl_la_SOURCES = $(DOCHDRS) \
+BeamletBuffer.h			\
+BeamletBuffer.cc		\
+BGL_Personality.h		\
 BGL_Personality.cc		\
+CS1_ION_main.h		 	\
 CS1_ION_main.cc		 	\
+Connector.h			\
+Connector.cc			\
+InputThread.h			\
+InputThread.cc			\
+ION_Allocator.h			\
 ION_Allocator.cc		\
-AH_ION_Scatter.cc		\
-WH_ION_Scatter.cc		\
+LockedRange.h			\
+LockedRange.tcc			\
+AH_InputSection.h		\
+AH_InputSection.cc		\
+WH_InputSection.h		\
+WH_InputSection.cc		\
+AH_ION_Gather.h			\
 AH_ION_Gather.cc		\
+WH_ION_Gather.h			\
 WH_ION_Gather.cc		\
+TH_ZoidServer.h			\
 TH_ZoidServer.cc
 
 install-exec-local:
-	ln -sf .libs/liblofar_impl.so lofar_impl.so
+	ln -sf .libs/liblofar_impl.so.0.0.0 lofar_impl.so
+#	cp lofar_impl.so /bgl/lofar-utils/zoid-test/lib
 
 uninstall-local:
 	rm lofar_impl.so
diff --git a/Appl/CEP/CS1/CS1_IONProc/src/TH_ZoidServer.cc b/Appl/CEP/CS1/CS1_IONProc/src/TH_ZoidServer.cc
index 2683aa8fda40653fca97c7d7decc714a77a2f901..c22389808fdfa93e078bd114bb072b7bf391478b 100644
--- a/Appl/CEP/CS1/CS1_IONProc/src/TH_ZoidServer.cc
+++ b/Appl/CEP/CS1/CS1_IONProc/src/TH_ZoidServer.cc
@@ -35,10 +35,22 @@ namespace CS1 {
 extern "C"
 {
 #include <lofar.h>
-  void *lofar_cn_to_ion_allocate_cb(int len);
+  void *lofar_cn_to_ion_zerocopy_allocate_cb(int len);
 }
 
 
+#if 0
+static unsigned checksum(const void *buf, size_t size)
+{
+  unsigned sum = 0;
+
+  for (int i = 0; i < (int) (size / sizeof(unsigned)); i ++)
+    sum ^= ((unsigned *) buf)[i];
+
+  return sum;
+}
+#endif
+
 std::vector<TH_ZoidServer *> TH_ZoidServer::theirTHs;
 
 
@@ -52,10 +64,11 @@ void TH_ZoidServer::sendCompleted(void * /*buf*/, void *arg)
   pthread_mutex_unlock(&th->mutex);
 }
 
-ssize_t lofar_ion_to_cn(void * /*buf*/ /* out:arr:size=+1:zerocopy:userbuf */,
-		        size_t *count /* inout:ptr */)
+ssize_t lofar_ion_to_cn_zerocopy(void   * /*buf*/ /* out:arr:size=+1:zerocopy:userbuf */,
+				 size_t *count /* inout:ptr */)
 {
   TH_ZoidServer *th = TH_ZoidServer::theirTHs[__zoid_calling_process_id()];
+  //std::clog << "lofar_ion_to_cn_zerocopy(..., " << *count << "), __zoid_calling_process_id() = " << __zoid_calling_process_id() << std::endl;
 
   pthread_mutex_lock(&th->mutex);
 
@@ -67,13 +80,40 @@ ssize_t lofar_ion_to_cn(void * /*buf*/ /* out:arr:size=+1:zerocopy:userbuf */,
 
   __zoid_register_userbuf(th->sendBufferPtr, TH_ZoidServer::sendCompleted, th);
   th->sendBufferPtr += *count;
-  th->bytesToSend -= *count;
+  th->bytesToSend   -= *count;
 
   return *count;
 }
 
 
-void *lofar_cn_to_ion_allocate_cb(int /*len*/)
+ssize_t lofar_ion_to_cn_onecopy(void   *buf /* out:arr:size=+1 */,
+				size_t *count /* inout:ptr */)
+{
+  TH_ZoidServer *th = TH_ZoidServer::theirTHs[__zoid_calling_process_id()];
+  //std::clog << "lofar_ion_to_cn_onecopy(..., " << *count << "), __zoid_calling_process_id() = " << __zoid_calling_process_id() << std::endl;
+
+  pthread_mutex_lock(&th->mutex);
+
+  while (th->bytesToSend == 0)
+    pthread_cond_wait(&th->newSendDataAvailable, &th->mutex);
+
+  if (*count > th->bytesToSend)
+    *count = th->bytesToSend;
+
+  memcpy(buf, th->sendBufferPtr, *count);
+  th->sendBufferPtr += *count;
+  th->bytesToSend   -= *count;
+
+  if (th->bytesToSend == 0)
+    pthread_cond_signal(&th->dataSent);
+
+  pthread_mutex_unlock(&th->mutex);
+
+  return *count;
+}
+
+
+void *lofar_cn_to_ion_zerocopy_allocate_cb(int /*len*/)
 {
   TH_ZoidServer *th = TH_ZoidServer::theirTHs[__zoid_calling_process_id()];
   pthread_mutex_lock(&th->mutex);
@@ -86,17 +126,43 @@ void *lofar_cn_to_ion_allocate_cb(int /*len*/)
 }
 
 
-ssize_t lofar_cn_to_ion(const void * /*buf*/ /* in:arr:size=+1:zerocopy:userbuf */,
-		        size_t count /* in:obj */)
+ssize_t lofar_cn_to_ion_zerocopy(const void * /*buf*/ /* in:arr:size=+1:zerocopy:userbuf */,
+				 size_t	    count /* in:obj */)
 {
   // still holding lock
+
+  TH_ZoidServer *th = TH_ZoidServer::theirTHs[__zoid_calling_process_id()];
+
+  if (count > th->bytesToReceive)
+    count = th->bytesToReceive;
+
+  th->receiveBufferPtr += count;
+  th->bytesToReceive   -= count;
+
+  if (th->bytesToReceive == 0)
+    pthread_cond_signal(&th->dataReceived);
+
+  pthread_mutex_unlock(&th->mutex);
+  return count;
+}
+
+
+ssize_t lofar_cn_to_ion_onecopy(const void *buf /* in:arr:size=+1 */,
+				size_t	   count /* in:obj */)
+{
   TH_ZoidServer *th = TH_ZoidServer::theirTHs[__zoid_calling_process_id()];
 
+  pthread_mutex_lock(&th->mutex);
+
+  while (th->bytesToReceive == 0)
+    pthread_cond_wait(&th->newReceiveBufferAvailable, &th->mutex);
+
   if (count > th->bytesToReceive)
     count = th->bytesToReceive;
 
+  memcpy(TH_ZoidServer::theirTHs[__zoid_calling_process_id()]->receiveBufferPtr, buf, count);
   th->receiveBufferPtr += count;
-  th->bytesToReceive -= count;
+  th->bytesToReceive   -= count;
 
   if (th->bytesToReceive == 0)
     pthread_cond_signal(&th->dataReceived);
@@ -155,9 +221,10 @@ bool TH_ZoidServer::init()
 
 bool TH_ZoidServer::sendBlocking(void *buf, int nbytes, int, DataHolder *)
 {
+  //std::clog << "TH_ZoidServer::sendBlocking(" << buf << ", " << nbytes << ", ...)" << std::endl;
   pthread_mutex_lock(&mutex);
 
-  sendBufferPtr = (char *) buf;
+  sendBufferPtr = static_cast<char *>(buf);
   bytesToSend = nbytes;
   pthread_cond_signal(&newSendDataAvailable);
 
@@ -172,8 +239,9 @@ bool TH_ZoidServer::sendBlocking(void *buf, int nbytes, int, DataHolder *)
 
 bool TH_ZoidServer::recvBlocking(void *buf, int nbytes, int, int, DataHolder *)
 {
+  //std::clog << "TH_ZoidServer::recvBlocking(" << buf << ", " << nbytes << ", ...)" << std::endl;
   pthread_mutex_lock(&mutex);
-  receiveBufferPtr = (char *) buf;
+  receiveBufferPtr = static_cast<char *>(buf);
   pthread_cond_signal(&newReceiveBufferAvailable);
 
   for (bytesToReceive = nbytes; bytesToReceive > 0;)
diff --git a/Appl/CEP/CS1/CS1_IONProc/src/TH_ZoidServer.h b/Appl/CEP/CS1/CS1_IONProc/src/TH_ZoidServer.h
index e844ce82b53cd1ca3e5a929c713357b261afe8bf..14468df5c2dbf9743ca79e2284452d5b08c2b69b 100644
--- a/Appl/CEP/CS1/CS1_IONProc/src/TH_ZoidServer.h
+++ b/Appl/CEP/CS1/CS1_IONProc/src/TH_ZoidServer.h
@@ -44,10 +44,12 @@ class TH_ZoidServer : public TransportHolder
 
     virtual bool	     init();
 
-    virtual bool	     recvBlocking(void *, int, int, int, DataHolder *);
-    virtual bool	     sendBlocking(void *, int, int, DataHolder *);
+    // if doCopy == 0, Zoid's zero-copy protocol is used; memory must be
+    // obtained through __zoid_alloc and must be a multiple of 32
+    virtual bool	     recvBlocking(void *ptr, int size, int doCopy, int, DataHolder *);
+    virtual bool	     sendBlocking(void *ptr, int size, int doCopy, DataHolder *);
 
-    static  void	     sendCompleted(void *buf, void *arg);
+    static  void	     sendCompleted(void *ptr, void *arg);
 
     // functions below are not supported
     virtual int32	     recvNonBlocking(void *, int32, int, int32, DataHolder *);
diff --git a/Appl/CEP/CS1/CS1_IONProc/src/WH_ION_Gather.cc b/Appl/CEP/CS1/CS1_IONProc/src/WH_ION_Gather.cc
index b1031c0f3afa3ea4c3bed1ff4fe742b5eca109da..80429d2cae3cd291af744719f9ed27ded8d4a396 100644
--- a/Appl/CEP/CS1/CS1_IONProc/src/WH_ION_Gather.cc
+++ b/Appl/CEP/CS1/CS1_IONProc/src/WH_ION_Gather.cc
@@ -21,6 +21,7 @@
 //# Always #include <lofar_config.h> first!
 #include <lofar_config.h>
 
+#include <CS1_Interface/BGL_Mapping.h>
 #include <CS1_IONProc/ION_Allocator.h>
 #include <CS1_IONProc/WH_ION_Gather.h>
 #include <CS1_IONProc/TH_ZoidServer.h>
@@ -38,18 +39,18 @@
 namespace LOFAR {
 namespace CS1 {
 
-
-WH_ION_Gather::WH_ION_Gather(const string &name, const CS1_Parset *ps)
+WH_ION_Gather::WH_ION_Gather(const string &name, unsigned psetNumber, const CS1_Parset *ps)
 :
   WorkHolder(0, 1, name, "WH_ION_Gather"),
+  itsPsetNumber(psetNumber),
   itsPS(ps)
 {
   itsTmpDH		    = 0;
-  itsNrComputeNodes	    = ps->getUint32("OLAP.BGLProc.nodesPerPset");
-  itsCurrentComputeNode	    = 0;
-  itsNrSubbandsPerPset	    = ps->getUint32("OLAP.subbandsPerPset");
+  itsNrComputeCores	    = ps->nrCoresPerPset();
+  itsCurrentComputeCore	    = 0;
+  itsNrSubbandsPerPset	    = ps->nrSubbandsPerPset();
   itsCurrentSubband	    = 0;
-  itsNrIntegrationSteps     = ps->getUint32("OLAP.IONProc.integrationSteps");
+  itsNrIntegrationSteps     = ps->IONintegrationSteps();
   itsCurrentIntegrationStep = 0;
 
   TinyDataManager &dm = getDataManager();
@@ -59,7 +60,7 @@ WH_ION_Gather::WH_ION_Gather(const string &name, const CS1_Parset *ps)
   dm.setAutoTriggerOut(0, false);
 
 #if 0
-  for (unsigned i = 0; i < itsNrComputeNodes; i ++) {
+  for (unsigned i = 0; i < itsNrComputeCores; i ++) {
     dm.addInDataHolder(i, new DH_Visibilities("input", ps));
     dm.setAutoTriggerIn(i, false);
   }
@@ -82,7 +83,7 @@ WorkHolder* WH_ION_Gather::construct(const string &name, const ACC::APS::Paramet
 
 WH_ION_Gather* WH_ION_Gather::make(const string &name)
 {
-  return new WH_ION_Gather(name, itsPS);
+  return new WH_ION_Gather(name, itsPsetNumber, itsPS);
 }
 
 
@@ -128,10 +129,13 @@ void WH_ION_Gather::process()
   bool firstTime = itsCurrentIntegrationStep == 0;
   bool lastTime  = itsCurrentIntegrationStep == itsNrIntegrationSteps - 1;
 
-  //std::clog << "itsCurrentComputeNode = " << itsCurrentComputeNode << ", itsCurrentSubband = " << itsCurrentSubband << ", itsCurrentIntegrationStep = " << itsCurrentIntegrationStep << ", firstTime = " << firstTime << ", lastTime = " << lastTime << std::endl;
+  //std::clog << "itsCurrentComputeCore = " << itsCurrentComputeCore << ", itsCurrentSubband = " << itsCurrentSubband << ", itsCurrentIntegrationStep = " << itsCurrentIntegrationStep << ", firstTime = " << firstTime << ", lastTime = " << lastTime << std::endl;
   DH_Visibilities *dh = lastTime ? dynamic_cast<DH_Visibilities *>(getDataManager().getOutHolder(0)) : firstTime ? itsSumDHs[itsCurrentSubband] : itsTmpDH;
   
-  TH_ZoidServer::theirTHs[itsCurrentComputeNode]->recvBlocking(dh->getDataPtr(), (dh->getDataSize() + 15) & ~15, 0, 0, dh);
+  unsigned channel = BGL_Mapping::mapCoreOnPset(itsCurrentComputeCore, itsPsetNumber);
+  //TH_ZoidServer::theirTHs[channel]->recvBlocking(dh->getDataPtr(), (dh->getDataSize() + 31) & ~31, 0, 0, dh);
+  TH_ZoidServer::theirTHs[channel]->recvBlocking(dh->getVisibilities().origin(), dh->getVisibilities().num_elements() * sizeof(fcomplex), 0, 0, 0);
+  TH_ZoidServer::theirTHs[channel]->recvBlocking(dh->getNrValidSamples().origin(), dh->getNrValidSamples().num_elements() * sizeof(unsigned short), 0, 0, 0);
 
   if (!firstTime)
     if (lastTime)
@@ -142,8 +146,8 @@ void WH_ION_Gather::process()
   if (lastTime)
     getDataManager().readyWithOutHolder(0);
 
-  if (++ itsCurrentComputeNode == itsNrComputeNodes)
-    itsCurrentComputeNode = 0;
+  if (++ itsCurrentComputeCore == itsNrComputeCores)
+    itsCurrentComputeCore = 0;
 
   if (++ itsCurrentSubband == itsNrSubbandsPerPset) {
     itsCurrentSubband = 0;
diff --git a/Appl/CEP/CS1/CS1_IONProc/src/WH_ION_Gather.h b/Appl/CEP/CS1/CS1_IONProc/src/WH_ION_Gather.h
index f18fc24871a0ad746902df6966352db8ac394465..e6b3960b46449dd13f4696a645fd7f68a7717d10 100644
--- a/Appl/CEP/CS1/CS1_IONProc/src/WH_ION_Gather.h
+++ b/Appl/CEP/CS1/CS1_IONProc/src/WH_ION_Gather.h
@@ -34,7 +34,7 @@ namespace CS1 {
 class WH_ION_Gather : public WorkHolder
 {
   public:
-    explicit WH_ION_Gather(const string &name, const CS1_Parset *ps);
+    explicit WH_ION_Gather(const string &name, unsigned psetNumber, const CS1_Parset *ps);
     virtual  ~WH_ION_Gather();
 
     //static WorkHolder *construct(const string &name, const ACC::APS::ParameterSet &);
@@ -54,9 +54,10 @@ class WH_ION_Gather : public WorkHolder
     vector<DH_Visibilities *>	itsSumDHs;
     DH_Visibilities		*itsTmpDH;
 
-    unsigned			itsNrComputeNodes, itsCurrentComputeNode;
+    unsigned			itsPsetNumber, itsNrComputeCores, itsCurrentComputeCore;
     unsigned			itsNrSubbandsPerPset, itsCurrentSubband;
     unsigned			itsNrIntegrationSteps, itsCurrentIntegrationStep;
+
     const CS1_Parset		*itsPS;
 };
 
diff --git a/Appl/CEP/CS1/CS1_IONProc/src/WH_InputSection.cc b/Appl/CEP/CS1/CS1_IONProc/src/WH_InputSection.cc
new file mode 100644
index 0000000000000000000000000000000000000000..da3f4ac8b34b89f5faabedf2418227c9d36627c9
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_IONProc/src/WH_InputSection.cc
@@ -0,0 +1,227 @@
+//#  WH_InputSection.cc: Catch RSP ethernet frames and synchronize RSP inputs 
+//#
+//#  Copyright (C) 2006
+//#  ASTRON (Netherlands Foundation for Research in Astronomy)
+//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
+//#
+//#  This program is free software; you can redistribute it and/or modify
+//#  it under the terms of the GNU General Public License as published by
+//#  the Free Software Foundation; either version 2 of the License, or
+//#  (at your option) any later version.
+//#
+//#  This program is distributed in the hope that it will be useful,
+//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//#  GNU General Public License for more details.
+//#
+//#  You should have received a copy of the GNU General Public License
+//#  along with this program; if not, write to the Free Software
+//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//#
+//#  $Id$
+
+//# Always #include <lofar_config.h> first!
+#include <lofar_config.h>
+
+//# Includes
+#include <Common/LofarLogger.h>
+//#include <AMCBase/Epoch.h>
+#include <CS1_IONProc/BGL_Personality.h>
+#include <CS1_IONProc/WH_InputSection.h>
+#include <CS1_IONProc/BeamletBuffer.h>
+#include <CS1_IONProc/InputThread.h>
+#include <CS1_IONProc/ION_Allocator.h>
+#include <CS1_IONProc/TH_ZoidServer.h>
+#include <CS1_Interface/BGL_Command.h>
+#include <CS1_Interface/BGL_Mapping.h>
+#include <CS1_Interface/DH_Delay.h>
+#include <CS1_Interface/CS1_Parset.h>
+#include <Transport/TransportHolder.h>
+#include <CS1_Interface/RSPTimeStamp.h>
+//#include <tinyCEP/Sel_RoundRobin.h>
+
+
+namespace LOFAR {
+namespace CS1 {
+
+WH_InputSection::WH_InputSection(const string &name, 
+				 unsigned stationNumber,
+				 CS1_Parset *ps,
+				 TransportHolder *inputTH)
+:
+  WorkHolder(1, 0, name, "WH_InputSection"),
+  itsInputThread(0),
+  itsInputTH(inputTH),
+  itsStationNr(stationNumber),
+  itsCS1PS(ps),
+  itsBBuffer(0),
+  itsPrePostTimer("pre/post"),
+  itsProcessTimer("process"),
+  itsGetElemTimer("getElem")
+{
+  LOG_TRACE_FLOW_STR("WH_InputSection constructor");    
+
+  // get parameters
+  itsNSubbandsPerPset = itsCS1PS->nrSubbandsPerPset();
+  itsNSamplesPerSec   = itsCS1PS->nrSubbandSamples();
+  itsNHistorySamples  = itsCS1PS->nrHistorySamples();
+
+  // create incoming dataholder holding the delay information 
+  getDataManager().addInDataHolder(0, new DH_Delay("DH_Delay", itsCS1PS->nrStations()));
+}
+
+
+WH_InputSection::~WH_InputSection() 
+{
+  std::clog << "WH_InputSection::~WH_InputSection" << std::endl;
+}
+
+
+WH_InputSection *WH_InputSection::make(const string& name)
+{
+  return new WH_InputSection(name, itsStationNr, itsCS1PS, itsInputTH);
+}
+
+
+void WH_InputSection::startThread()
+{
+  /* start up thread which writes RSP data from ethernet link
+     into cyclic buffers */
+  LOG_TRACE_FLOW_STR("WH_InputSection starting thread");   
+
+  ThreadArgs args;
+  args.BBuffer            = itsBBuffer;
+  args.th                 = itsInputTH;
+  args.ipHeaderSize       = itsCS1PS->getInt32("OLAP.IPHeaderSize");
+  args.frameHeaderSize    = itsCS1PS->getInt32("OLAP.EPAHeaderSize");
+  args.nTimesPerFrame     = itsCS1PS->getInt32("OLAP.nrTimesInFrame");
+  args.nSubbandsPerFrame  = itsCS1PS->getInt32("OLAP.nrSubbandsPerFrame");
+
+  args.frameSize          = args.frameHeaderSize + args.nSubbandsPerFrame * args.nTimesPerFrame * sizeof(Beamlet);
+
+
+  if (itsInputTH->getType() == "TH_File" || itsInputTH->getType() == "TH_Null") {
+    // if we are reading from file, overwriting the buffer should not be allowed
+    // this way we can work with smaller files
+    itsBBuffer->setAllowOverwrite(false);
+  }
+
+  itsInputThread = new InputThread(args);
+}
+
+
+void WH_InputSection::preprocess()
+{
+  itsPrePostTimer.start();
+
+  itsCurrentComputeCore = 0;
+  itsNrCoresPerPset	= itsCS1PS->nrCoresPerPset();
+  itsPsetNumber		= getBGLpersonality()->getPsetNum();
+
+  // create the buffer controller.
+  int cyclicBufferSize = itsCS1PS->nrSamplesToBuffer();
+  int subbandsToReadFromFrame = itsCS1PS->subbandsToReadFromFrame();
+  ASSERTSTR(subbandsToReadFromFrame <= itsCS1PS->getInt32("OLAP.nrSubbandsPerFrame"), subbandsToReadFromFrame << " < " << itsCS1PS->getInt32("OLAP.nrSubbandsPerFrame"));
+
+  itsBBuffer = new BeamletBuffer(cyclicBufferSize, subbandsToReadFromFrame, cyclicBufferSize/6, cyclicBufferSize/6);
+  startThread();
+
+  itsDelayCompensation = itsCS1PS->getBool("OLAP.delayCompensation");
+
+  double startTime = itsCS1PS->startTime(); // UTC
+
+  int sampleFreq = (int) itsCS1PS->sampleRate();
+  int seconds	 = (int) floor(startTime);
+  int samples	 = (int) ((startTime - floor(startTime)) * sampleFreq);
+
+  itsSyncedStamp = TimeStamp(seconds, samples);
+
+  std::clog << "Starting buffer at " << itsSyncedStamp << std::endl;
+  itsBBuffer->startBufferRead(itsSyncedStamp);
+}
+
+
+void WH_InputSection::limitFlagsLength(SparseSet<unsigned> &flags)
+{
+  const std::vector<SparseSet<unsigned>::range> &ranges = flags.getRanges();
+
+  if (ranges.size() > 16)
+    flags.include(ranges[15].begin, ranges[ranges.size() - 1].end);
+}
+
+
+void WH_InputSection::process() 
+{ 
+  BGL_Command command(BGL_Command::PROCESS);
+
+  itsProcessTimer.start();
+
+  TimeStamp delayedStamp = itsSyncedStamp - itsNHistorySamples;
+  itsSyncedStamp += itsNSamplesPerSec;
+  int samplesDelay;
+
+  // set delay
+  if (itsDelayCompensation) {
+    DH_Delay *dh = static_cast<DH_Delay *>(getDataManager().getInHolder(0));
+    struct DH_Delay::DelayInfo &delay = (*dh)[itsStationNr];
+    delayedStamp -= delay.coarseDelay;
+    samplesDelay		   = -delay.coarseDelay;
+    itsIONtoCNdata.delayAtBegin()  = delay.fineDelayAtBegin;
+    itsIONtoCNdata.delayAfterEnd() = delay.fineDelayAfterEnd;
+  } else {
+    samplesDelay		   = 0;
+    itsIONtoCNdata.delayAtBegin()  = 0;
+    itsIONtoCNdata.delayAfterEnd() = 0;
+  }
+
+  itsBBuffer->startReadTransaction(delayedStamp, itsNSamplesPerSec + itsNHistorySamples);
+
+  itsIONtoCNdata.alignmentShift() = itsBBuffer->alignmentShift();
+
+  // set flags
+  itsBBuffer->readFlags(itsIONtoCNdata.flags());
+  limitFlagsLength(itsIONtoCNdata.flags());
+  std::clog << "RSP " << itsStationNr << ' ' << delayedStamp << " delay: " << samplesDelay << " flags: " << itsIONtoCNdata.flags() << " (" << (100.0 * itsIONtoCNdata.flags().count() / (itsNSamplesPerSec + itsNHistorySamples)) << "%)" << std::endl;
+
+  for (unsigned subbandBase = 0; subbandBase < itsNSubbandsPerPset; subbandBase ++) {
+    unsigned	    core = BGL_Mapping::mapCoreOnPset(itsCurrentComputeCore, itsPsetNumber);
+    TransportHolder *th  = TH_ZoidServer::theirTHs[core];
+
+    command.write(th);
+    itsIONtoCNdata.write(th);
+
+    itsGetElemTimer.start();
+
+    for (unsigned pset = 0; pset < itsCS1PS->nrPsets(); pset ++) {
+      unsigned subband = itsNSubbandsPerPset * pset + subbandBase;
+
+      itsBBuffer->sendSubband(th, subband);
+    }
+
+    itsGetElemTimer.stop();
+
+    if (++ itsCurrentComputeCore == itsNrCoresPerPset)
+      itsCurrentComputeCore = 0;
+  }
+
+  itsBBuffer->stopReadTransaction();
+  itsProcessTimer.stop();
+}
+
+
+void WH_InputSection::postprocess()
+{
+  std::clog << "WH_InputSection::postprocess" << std::endl;
+
+  delete itsInputThread;	itsInputThread	= 0;
+  delete itsBBuffer;		itsBBuffer	= 0;
+
+  itsPrePostTimer.stop();
+
+  itsPrePostTimer.print(clog);
+  itsProcessTimer.print(clog);
+  itsGetElemTimer.print(clog);
+}
+
+} // namespace CS1
+} // namespace LOFAR
diff --git a/Appl/CEP/CS1/CS1_IONProc/src/WH_InputSection.h b/Appl/CEP/CS1/CS1_IONProc/src/WH_InputSection.h
new file mode 100644
index 0000000000000000000000000000000000000000..943bd975a4a0b48daae7152cd395000110d6182c
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_IONProc/src/WH_InputSection.h
@@ -0,0 +1,102 @@
+//#  WH_InputSection.h: Catch RSP ethernet frames and synchronize RSP inputs 
+//#
+//#  Copyright (C) 2006
+//#  ASTRON (Netherlands Foundation for Research in Astronomy)
+//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
+//#
+//#  This program is free software; you can redistribute it and/or modify
+//#  it under the terms of the GNU General Public License as published by
+//#  the Free Software Foundation; either version 2 of the License, or
+//#  (at your option) any later version.
+//#
+//#  This program is distributed in the hope that it will be useful,
+//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//#  GNU General Public License for more details.
+//#
+//#  You should have received a copy of the GNU General Public License
+//#  along with this program; if not, write to the Free Software
+//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//#
+//#  $Id$
+
+#ifndef LOFAR_CS1_INPUTSECTION_WH_INPUTSECTION_H
+#define LOFAR_CS1_INPUTSECTION_WH_INPUTSECTION_H
+
+// \file
+// Catch RSP ethernet frames and synchronize RSP inputs 
+
+//# Never #include <config.h> or #include <lofar_config.h> in a header file!
+
+//# Includes
+#include <tinyCEP/WorkHolder.h>
+#include <CS1_Interface/CS1_Parset.h>
+#include <CS1_Interface/RSPTimeStamp.h>
+#include <CS1_Interface/ION_to_CN.h>
+#include <CS1_IONProc/BeamletBuffer.h>
+#include <CS1_IONProc/InputThread.h>
+#include <Common/Timer.h>
+
+#include <boost/multi_array.hpp>
+#include <pthread.h>
+
+
+namespace LOFAR {
+namespace CS1 {
+
+// This class is the workholder that receives data from the RSP boards
+// and distributes it per subband to the Blue Gene/L
+class WH_InputSection: public WorkHolder {
+  public:
+    explicit WH_InputSection(const string &name, 
+			     unsigned stationNumber,
+			     CS1_Parset *ps,
+			     TransportHolder *inputTH);
+    virtual ~WH_InputSection();
+  
+    virtual WH_InputSection *make(const string &name);
+   
+    virtual void preprocess();
+    virtual void process();
+    virtual void postprocess();
+      
+  private:
+    // Copying is not allowed
+    WH_InputSection (const WH_InputSection &that);
+    WH_InputSection& operator= (const WH_InputSection &that);
+
+    void limitFlagsLength(SparseSet<unsigned> &flags);
+
+    bool itsDelayCompensation;
+
+    ION_to_CN itsIONtoCNdata;
+
+    // writer thread
+    InputThread *itsInputThread;
+
+    TransportHolder *itsInputTH;
+    unsigned itsStationNr;
+    
+    CS1_Parset *itsCS1PS;
+    
+    // synced stamp
+    TimeStamp itsSyncedStamp;
+   
+    unsigned itsNSubbandsPerPset;
+    unsigned itsNSamplesPerSec;
+    unsigned itsNHistorySamples;
+
+    unsigned itsCurrentComputeCore, itsNrCoresPerPset;
+    unsigned itsPsetNumber;
+   
+    BeamletBuffer *itsBBuffer;
+    
+    NSTimer itsPrePostTimer, itsProcessTimer, itsGetElemTimer;
+    
+    void startThread();
+};
+    
+} // namespace CS1
+} // namespace LOFAR
+
+#endif
diff --git a/Appl/CEP/CS1/CS1_IONProc/test/Makefile.am b/Appl/CEP/CS1/CS1_IONProc/test/Makefile.am
deleted file mode 100644
index 34507a9251f644a06d0576829068011f4d92a4eb..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_IONProc/test/Makefile.am
+++ /dev/null
@@ -1,2 +0,0 @@
-
-include $(top_srcdir)/Makefile.common
diff --git a/Appl/CEP/CS1/CS1_InputSection/CS1_InputSection.spec.in b/Appl/CEP/CS1/CS1_InputSection/CS1_InputSection.spec.in
deleted file mode 100644
index 217b23e481b8c9e0a63b97560119e97bcbed1533..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/CS1_InputSection.spec.in
+++ /dev/null
@@ -1,160 +0,0 @@
-# -*- Mode:rpm-spec -*-
-# CS1_InputSection.spec.in
-#
-
-##############################################################################
-#
-# Preamble
-#
-##############################################################################
-
-Summary: CS1_InputSection is ... brief description ...
-
-%define release @RPM_RELEASE@
-%define version @VERSION@
-%define pkgname @PACKAGE@
-%define pkgdir %{pkgname}-%{version}-%{release}
-%define prefix /opt/lofar
-%define configure_args @RPM_CONFIGURE_ARGS@
-##define build_kernel_version @BUILD_KERNEL_VERSION@
-
-Name: %{pkgname}
-Version: %{version}
-Release: %{release}
-Copyright: LGPL
-Group: Application/System
-Source: %{pkgname}-%{version}.tar.gz
-BuildRoot: %{_tmppath}/%{pkgdir}-root
-URL: http://www.astron.nl
-Prefix: %{prefix}
-BuildArchitectures: i386 # Target platforms, i.e., i586
-##Requires: Common = 1.2   ## define dependent packages here
-Packager: %{packager}
-Distribution: The LOFAR project
-Vendor: ASTRON
-
-AutoReqProv: no
-
-%description
-
-CS1_InputSection ... more detailed description ...
-
-##############################################################################
-#
-# prep
-#
-##############################################################################
-%prep
-echo $prefix
-
-# create the build directory, untar the source
-%setup
-
-##############################################################################
-#
-# build
-#
-##############################################################################
-%build
-./configure %{configure_args} --prefix=%{prefix} && make
-
-##############################################################################
-#
-# install
-#
-##############################################################################
-%install
-# To make things work with BUILDROOT
-if [ "$RPM_BUILD_ROOT" != "%{_tmppath}/%{pkgdir}-root" ]
-then
-  echo
-  echo @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-  echo @                                                                    @
-  echo @  RPM_BUILD_ROOT is not what I expected.  Please clean it yourself. @
-  echo @                                                                    @
-  echo @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-  echo
-else
-  echo Cleaning RPM_BUILD_ROOT: "$RPM_BUILD_ROOT"
-  rm -rf "$RPM_BUILD_ROOT"
-fi
-mkdir -p $RPM_BUILD_ROOT%{prefix}
-make DESTDIR="$RPM_BUILD_ROOT" install
-
-#uninstall
-
-##############################################################################
-#
-# verify
-#
-##############################################################################
-#verify
-
-##############################################################################
-#
-# clean
-#
-##############################################################################
-%clean
-# Call me paranoid, but I do not want to be responsible for nuking
-# someone's harddrive!
-if [ "$RPM_BUILD_ROOT" != "%{_tmppath}/%{pkgdir}-root" ]
-then
-  echo
-  echo @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-  echo @                                                                    @
-  echo @  RPM_BUILD_ROOT is not what I expected.  Please clean it yourself. @
-  echo @                                                                    @
-  echo @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
-  echo
-else
-  echo Cleaning RPM_BUILD_ROOT: "$RPM_BUILD_ROOT"
-  rm -rf "$RPM_BUILD_ROOT"
-fi
-
-##############################################################################
-#
-# files
-#
-##############################################################################
-
-# empty 'files' means all distributed files
-%files
-%defattr(-, root, root)
-%{prefix}
-
-# Your application file list goes here
-# %{prefix}/lib/lib*.so*
-
-# Documentation
-# doc COPYING ChangeLog README AUTHORS NEWS
-# doc doc/*
-
-# link the module to the correct path
-%post 
-
-# before uninstall
-%preun
-
-# after uninstall
-%postun
-
-##############################################################################
-#
-# package devel
-#
-##############################################################################
-
-#package devel
-#Summary: Development files for %{pkgname}
-#Group: Applications/System
-#description devel
-#Development files for %{pkgname}.
-
-#files devel
-
-# Your development files go here
-# Programmers documentation goes here
-#doc doc
-
-# end of file
diff --git a/Appl/CEP/CS1/CS1_InputSection/Makefile.am b/Appl/CEP/CS1/CS1_InputSection/Makefile.am
deleted file mode 100644
index 7f71108fd0104c8b7ae50b39bc8a98b456a811a2..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/Makefile.am
+++ /dev/null
@@ -1,14 +0,0 @@
-SUBDIRS=src test include
-
-pkgextdir     = $(prefix)/config/$(PACKAGE)
-pkgext_DATA   = pkgext pkgextcppflags pkgextcxxflags pkgextldflags
-
-DISTCHECK_CONFIGURE_FLAGS=\
-      --with-common=$(prefix)
-
-EXTRA_DIST = \
-      Makefile.common \
-      CS1_InputSection.spec \
-      autoconf_share/compiletool
-
-include $(top_srcdir)/Makefile.common
diff --git a/Appl/CEP/CS1/CS1_InputSection/bootstrap b/Appl/CEP/CS1/CS1_InputSection/bootstrap
deleted file mode 100755
index 7f674c416802db5e7c438f093a99e63f63c784aa..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/bootstrap
+++ /dev/null
@@ -1,3 +0,0 @@
-#!/bin/sh
-
-../../../../autoconf_share/bootstrap ../../../../autoconf_share
diff --git a/Appl/CEP/CS1/CS1_InputSection/configure.in b/Appl/CEP/CS1/CS1_InputSection/configure.in
deleted file mode 100644
index 98e1c57156169ba77f44cbc4fa2fb5c39733bbf3..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/configure.in
+++ /dev/null
@@ -1,74 +0,0 @@
-dnl
-dnl Process this file with autoconf to produce a configure script.
-dnl
-AC_INIT
-dnl AC_CONFIG_AUX_DIR(config)
-dnl AM_CONFIG_HEADER(config/config.h)
-AM_CONFIG_HEADER(config.h)
-AM_INIT_AUTOMAKE(CS1_InputSection, 1.0, no-define)
-
-dnl Initialize for LOFAR (may set compilers)
-lofar_INIT
-
-dnl Checks for programs.
-dnl AC_PROG_AWK
-dnl AC_PROG_YACC
-AC_PROG_CC
-AC_PROG_CXX
-dnl AM_PROG_LEX
-AC_PROG_INSTALL
-AC_PROG_LN_S
-AC_DISABLE_SHARED
-AC_PROG_LIBTOOL
-
-dnl Checks for libraries.
-
-dnl dnl Replace `main' with a function in -lfl:
-dnl AC_CHECK_LIB(fl, main)
-dnl dnl Replace `main' with a function in -lcosev_r:
-dnl AC_CHECK_LIB(cosev_r, main)
-dnl dnl Replace `main' with a function in -lcosnm_r:
-dnl AC_CHECK_LIB(cosnm_r, main)
-dnl dnl Replace `main' with a function in -lorb_r:
-dnl AC_CHECK_LIB(orb_r, main)
-dnl dnl Replace `main' with a function in -lpthread:
-dnl AC_CHECK_LIB(pthread, main)
-dnl dnl Replace `main' with a function in -lvport_r:
-dnl AC_CHECK_LIB(vport_r, main)
-
-dnl Checks for header files.
-AC_HEADER_STDC
-AC_CHECK_HEADERS(unistd.h)
-
-dnl Checks for typedefs, structures, and compiler characteristics.
-AC_C_CONST
-AC_TYPE_SIZE_T
-
-dnl Checks for library functions.
-AC_FUNC_VPRINTF
-
-dnl
-dnl Check for LOFAR specific things
-dnl
-lofar_GENERAL
-lofar_MPI
-lofar_INTERNAL(LCS/Common,Common,,1,Common/LofarTypedefs.h,,)
-lofar_INTERNAL(LCS/AMC/AMCBase,AMCBase,,1,AMCBase/Epoch.h,,)
-lofar_INTERNAL(LCS/Transport,Transport,,1,Transport/DataHolder.h,,)
-lofar_INTERNAL(LCS/ACC/APS,APS,,1,APS/ParameterSet.h,,)
-lofar_INTERNAL(CEP/tinyCEP,tinyCEP,,1,tinyCEP/TinyDataManager.h,,)
-lofar_INTERNAL(CEP/CEPFrame,CEPFrame,,1,CEPFrame/DataManager.h,,)
-lofar_INTERNAL(Appl/CEP/CS1/CS1_Interface,CS1_Interface,,1,CS1_Interface/DH_RSP.h,,)
-lofar_EXTERNAL(boost,1,boost/thread.hpp,"boost_thread boost_date_time")
-
-dnl
-dnl Output Makefiles
-dnl
-AC_OUTPUT(
-include/Makefile
-include/CS1_InputSection/Makefile
-src/Makefile
-test/Makefile
-Makefile
-CS1_InputSection.spec
-)
diff --git a/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/AH_InputSection.h b/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/AH_InputSection.h
deleted file mode 100644
index bd6c49fcb78a29ea1762f0be34471eee65353099..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/AH_InputSection.h
+++ /dev/null
@@ -1,79 +0,0 @@
-//#  AH_InputSection.h: one line description
-//#
-//#  Copyright (C) 2006
-//#  ASTRON (Netherlands Foundation for Research in Astronomy)
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-#ifndef LOFAR_CS1_INPUTSECTION_AH_INPUTSECTION_H
-#define LOFAR_CS1_INPUTSECTION_AH_INPUTSECTION_H
-
-// \file
-// one line description.
-
-//# Never #include <config.h> or #include <lofar_config.h> in a header file!
-
-//# Includes
-#include <CEPFrame/ApplicationHolder.h>
-#include <CS1_Interface/Stub_BGL.h>
-#include <CS1_Interface/Stub_Delay.h>
-#include <CS1_Interface/CS1_Parset.h>
-
-namespace LOFAR 
-{
-  namespace CS1 
-  {
-
-    // \addtogroup CS1_InputSection
-    // @{
-
-    // Description of class.
-    // This is the ApplicationHolder for the input section of the CS1 application
-    // Its main purposes are: 1) buffering the station data in an circular
-    // buffer, and 2) transpose the data over a fast interconnect (using
-    // MPI_Alltoallv, as opposed to CEPframe connections in earlier versions).
-
-    class AH_InputSection: public ApplicationHolder
-    {
-    public:
-      AH_InputSection();
-      virtual ~AH_InputSection();
-      virtual void define(const LOFAR::KeyValueMap&);
-      virtual void undefine();
-      virtual void run(int nsteps);
-
-    private:
-      // Copying is not allowed
-      AH_InputSection (const AH_InputSection& that);
-      AH_InputSection& operator= (const AH_InputSection& that);
-
-      //# Datamembers
-      CS1_Parset    *itsCS1PS;
-      Stub_Delay     *itsDelayStub;
-      Stub_BGL	     *itsOutputStub;
-
-      std::vector<unsigned>	itsInputNodes, itsOutputNodes;
-      std::vector<WorkHolder *> itsWHs;
-    };
-
-    // @}
-
-  } // namespace CS1
-} // namespace LOFAR
-
-#endif
diff --git a/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/BeamletBuffer.h b/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/BeamletBuffer.h
deleted file mode 100644
index 93c59b3060a08d043382bd6e714192c27d7a8bc9..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/BeamletBuffer.h
+++ /dev/null
@@ -1,119 +0,0 @@
-//#  BeamletBuffer.h: a cyclic buffer that holds the beamlets from the rspboards
-//#
-//#  Copyright (C) 2006
-//#  ASTRON (Netherlands Foundation for Research in Astronomy)
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-#ifndef LOFAR_CS1_INPUTSECTION_BEAMLETBUFFER_H
-#define LOFAR_CS1_INPUTSECTION_BEAMLETBUFFER_H
-
-// \file
-// a cyclic buffer that holds the beamlets from the rspboards
-
-//# Never #include <config.h> or #include <lofar_config.h> in a header file!
-
-//# Includes
-#include <Common/lofar_vector.h>
-#include <Common/lofar_complex.h>
-#include <Common/Timer.h>
-#include <CS1_InputSection/LockedRange.h>
-#include <CS1_Interface/DH_RSP.h>
-#include <CS1_Interface/RSPTimeStamp.h>
-#include <CS1_Interface/SparseSet.h>
-#include <boost/thread.hpp>
-#include <boost/multi_array.hpp>
-
-namespace LOFAR 
-{
-  namespace CS1 
-  {
-
-    // \addtogroup CS1_InputSection
-    // @{
-
-    typedef DH_RSP::BufferType SampleType;
-
-    class Beamlet {
-      DH_RSP::BufferType Xpol, Ypol;
-    };
-
-    // A BeamletBuffer can hold the beamlets coming from the rspboards
-    // It is implemented as a cyclic buffer (using the mapTime2Index method).
-    // Locking is done using a LockedRange
-    // This buffer also reshuffles the data. It comes in in packets of different subbands per timestep.
-    // The data leaves as a time series per subband.
-    class BeamletBuffer
-    {
-    public:
-      BeamletBuffer(int bufferSize, unsigned nSubbands, unsigned history, unsigned readWriteDelay);
-      ~BeamletBuffer();
-
-      void writeElements(Beamlet* data, TimeStamp begin, unsigned nElements);
-      void getElements(boost::multi_array_ref<SampleType, 3> &buffers, SparseSet<unsigned> &flags, TimeStamp begin, unsigned nElements);
-
-      TimeStamp startBufferRead();
-      TimeStamp startBufferRead(TimeStamp);
-
-      void setAllowOverwrite(bool o) {itsLockedRange.setOverwriting(o);};
-
-      void clear() {itsLockedRange.clear();};
-
-    private:
-      // Copying is not allowed
-      BeamletBuffer (const BeamletBuffer& that);
-      BeamletBuffer& operator= (const BeamletBuffer& that);
-
-      // Needed for mapping a timestamp to a place in the buffer
-      unsigned mapTime2Index(TimeStamp time) const { 
-	// TODO: this is very slow because of the %
-	return time % itsSize;
-      }
-
-      // checked for skipped data and flag it in chunks
-      void checkForSkippedData(TimeStamp writeBegin);
-
-      //# Datamembers
-      //vector<Beamlet *> itsSBBuffers;
-      mutex itsFlagsMutex;
-      SparseSet<unsigned> itsFlags;
-      unsigned itsNSubbands;
-      int itsSize;
-
-      boost::multi_array<SampleType, 3> itsSBBuffers;
-
-      TimeStamp itsHighestWritten;
-      
-      LockedRange<TimeStamp, int> itsLockedRange;
-
-      // These are for statistics
-      unsigned itsDroppedItems;
-      unsigned itsDummyItems;
-      unsigned itsSkippedItems;
-
-      NSTimer itsWriteTimer;
-      NSTimer itsReadTimer;
-
-    };
-
-    // @}
-
-  } // namespace CS1
-} // namespace LOFAR
-
-#endif
diff --git a/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/Makefile.am b/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/Makefile.am
deleted file mode 100644
index d04e116fc08fff9329913f1b9770937070b51150..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/Makefile.am
+++ /dev/null
@@ -1,9 +0,0 @@
-pkginclude_HEADERS        = Connector.h \
-	 InputThread.h \
-	 LockedRange.h \
-	 LockedRange.tcc \
-	 BeamletBuffer.h \
-	 AH_InputSection.h \
-	 WH_InputSection.h
-
-include $(top_srcdir)/Makefile.common
diff --git a/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/WH_InputSection.h b/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/WH_InputSection.h
deleted file mode 100644
index 1070f0a4f653f1afdf27acc26a543f0534e9d164..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/include/CS1_InputSection/WH_InputSection.h
+++ /dev/null
@@ -1,140 +0,0 @@
-//#  WH_InputSection.h: Catch RSP ethernet frames and synchronize RSP inputs 
-//#
-//#  Copyright (C) 2006
-//#  ASTRON (Netherlands Foundation for Research in Astronomy)
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-#ifndef LOFAR_CS1_INPUTSECTION_WH_INPUTSECTION_H
-#define LOFAR_CS1_INPUTSECTION_WH_INPUTSECTION_H
-
-// \file
-// Catch RSP ethernet frames and synchronize RSP inputs 
-
-//# Never #include <config.h> or #include <lofar_config.h> in a header file!
-
-//# Includes
-#include <tinyCEP/WorkHolder.h>
-#include <CS1_Interface/RSPTimeStamp.h>
-#include <CS1_Interface/DH_Subband.h>
-#include <Common/Timer.h>
-
-#include <boost/thread.hpp>
-#include <boost/multi_array.hpp>
-
-
-namespace LOFAR 
-{
-  class NSTimer;
-  class TransportHolder;
-
-  namespace CS1 
-  {
-
-    // \addtogroup CS1_InputSection
-    // @{
-
-    //# Forward Declarations
-    class BeamletBuffer;
-    class InputThread;
-
-    // This class is the workholder that receives data from the RSP boards
-    // and distributes it per subband to the Blue Gene/L
-    class WH_InputSection: public WorkHolder {
-    public:
-      typedef DH_Subband::SampleType SampleType;
-
-      explicit WH_InputSection(const string &name, 
-			   bool doInput,
-			   bool doTranspose,
-			   bool doOutput,
-                           CS1_Parset *ps,
-                           TransportHolder *inputTH,
-			   unsigned stationNr,
-			   unsigned nrInputChannels,
-			   unsigned nrOutputChannels,
-			   const std::vector<unsigned> &inputNodes,
-			   const std::vector<unsigned> &outputNodes);
-      virtual ~WH_InputSection();
-    
-      virtual WH_InputSection *make(const string &name);
-     
-      virtual void preprocess();
-      virtual void process();
-      virtual void postprocess();
-      
-    private:
-      // Copying is not allowed
-      WH_InputSection (const WH_InputSection &that);
-      WH_InputSection& operator= (const WH_InputSection &that);
-
-      void doInput(SparseSet<unsigned> &flags);
-      void doOutput();
-
-      void limitFlagsLength(SparseSet<unsigned> &flags);
-
-      void transposeData();
-      void transposeMetaData(const SparseSet<unsigned> &flags);
-      
-      //# Datamembers
-      bool itsDelayCompensation;
-      bool itsDoInput, itsDoTranspose, itsDoOutput;
-      const std::vector<unsigned> &itsInputNodes, &itsOutputNodes;
-
-      boost::multi_array<SampleType, 4> *itsInputData, *itsOutputData;
-
-      struct metaData {
-	float fineDelayAtBegin, fineDelayAfterEnd;
-	char  flagsBuffer[132]; // enough for 16 flag ranges
-      } *itsInputMetaData, *itsOutputMetaData;
-
-      // writer thread
-      InputThread *itsInputThreadObject;
-      boost::thread *itsInputThread;
-
-      TransportHolder *itsInputTH;
-      uint itsStationNr;
-      
-      CS1_Parset *itsCS1PS;
-      
-      // synced stamp
-      TimeStamp itsSyncedStamp;
-     
-      unsigned itsNSubbandsPerCell;
-      unsigned itsNSamplesPerSec;
-      unsigned itsNHistorySamples;
-     
-      BeamletBuffer *itsBBuffer;
-      
-      bool itsFirstRun;
-
-      NSTimer itsPrePostTimer, itsProcessTimer, itsGetElemTimer;
-      
-      void	  startThread();
-    
-      //handle timer alarm
-      static void timerSignal(int signal);    
-      static bool signalReceived;
-    };
-    
-    // @}
-
-  } // namespace CS1
-} // namespace LOFAR
-
-#endif
diff --git a/Appl/CEP/CS1/CS1_InputSection/include/Makefile.am b/Appl/CEP/CS1/CS1_InputSection/include/Makefile.am
deleted file mode 100644
index d3e14aefabc9cc2ca755f8217cb6c636124e2b0c..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/include/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS	= CS1_InputSection
-
-include $(top_srcdir)/Makefile.common
diff --git a/Appl/CEP/CS1/CS1_InputSection/package.dox b/Appl/CEP/CS1/CS1_InputSection/package.dox
deleted file mode 100644
index bbb273593a9b6c31bce3f0b52c821416e2454dc6..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/package.dox
+++ /dev/null
@@ -1,2 +0,0 @@
-// \ingroup CS1
-// \defgroup CS1_InputSection CS1_InputSection Description
diff --git a/Appl/CEP/CS1/CS1_InputSection/scripts/InputAppl.sh b/Appl/CEP/CS1/CS1_InputSection/scripts/InputAppl.sh
deleted file mode 100755
index d5fee786c5ae24d777627d91a5cd43395137ad4e..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/scripts/InputAppl.sh
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/bin/bash
-#
-# /InputAppl: a start/stop/status script for swlevel
-#
-# Copyright (C) 2007
-# ASTRON (Netherlands Foundation for Research in Astronomy)
-# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-#
-# Syntax: InputAppl start|stop|status
-#
-# $Id$
-#
-
-#
-# SyntaxError msg
-#
-SyntaxError()
-{
-	Msg=$1
-
-	[ -z "${Msg}" ] || echo "ERROR: ${Msg}"
-	echo ""
-	echo "Syntax: $(basename $0) start | stop | status"
-	echo ""
-	exit 1
-}
-
-#
-# Start the program when it exists
-#
-start_prog()
-{
-	# put here your code to start your program
-	echo 'start_prog()'
-}
-
-#
-# Stop the program when it is running
-#
-stop_prog()
-{
-	# put here your code to stop your program
-	ps -ef | grep -v grep | grep -v ACDaemon[^\ ] | grep ACDaemon 2>&1 >/dev/null
-	if [ $? -ne 0 ]; then
-	  if [ -f ../etc/ACD.admin ]; then 	
-	    rm ../etc/ACD.admin
-	  fi
-	fi  
-	cexec killall -9 CS1_InputSection 
-}
-
-#
-# show status of program
-#
-# arg1 = levelnr
-#
-status_prog()
-{
-	levelnr=$1
-
-	# put here code to figure out the status of your program and
-	# fill the variables prog and pid with the right information
-
-	# e.g.
-	prog=InputAppl
-	pid=DOWN
-	cexec 'ps -ef | grep -v grep '| grep CS1_InputSection| grep -v bash2>$1 1>/dev/null
-	if [ $? -eq 0 ]; then
-	  cexec 'ps -ef | grep -v grep | grep CS1_InputSection'| grep -v bash | awk -v levelnr=${levelnr} '{
-          if (substr($1,1,3) == "---") {
-            machine = substr($2,1,6)
-          }
-          else {
-            if (substr($1,1,3) != "***") {
-              fullname="InputAppl@"machine
-              printf "%s : %-25.25s %s\n", levelnr,fullname,$2
-            }
-          }
-        }'
-	else
-          # this line should be left in, it shows the status in the right format
-	  echo ${levelnr} ${prog} ${pid} | awk '{ printf "%s : %-25s %s\n", $1, $2, $3 }'
-	fi
-}
-
-#
-# MAIN
-#
-
-# when no argument is given show syntax error.
-if [ -z "$1" ]; then
-	SyntaxError
-fi
-
-# first power down to this level
-case $1 in
-	start)	start_prog
-			;;
-	stop)	stop_prog
-			;;
-	status)	status_prog $2
-			;;
-	*)		SyntaxError
-			;;
-esac
diff --git a/Appl/CEP/CS1/CS1_InputSection/scripts/prepare_CS1_InputSection.py b/Appl/CEP/CS1/CS1_InputSection/scripts/prepare_CS1_InputSection.py
deleted file mode 100755
index 836487af53e77bec1b4748a740f50b840e9b3e20..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/scripts/prepare_CS1_InputSection.py
+++ /dev/null
@@ -1,240 +0,0 @@
-#!/usr/bin/env python
-
-import math
-import time
-import datetime
-import os
-import sys
-import copy
-from optparse import OptionParser
-
-class CS1_Parset(object):
-
-    def __init__(self):
-        self.stationList = list()
-	self.parameters = dict()
-
-    def readFromFile(self, fileName):
-        lastline = ''
-        for line in open(fileName, 'r').readlines():
-            lastline = lastline + line.split('#')[0]
-            lastline = lastline.rstrip()
-            if len(lastline) > 0 and lastline[-1] == '\\':
-                lastline = lastline[:-1]
-            elif '=' in lastline:
-                key, value = lastline.split('=')
-                self.parameters[key.strip()] = value.strip()
-                lastline = ''
-
-    def writeToFile(self, fileName):
-        outf = open(fileName, 'w')
-        for key, value in sorted(self.parameters.iteritems()):
-            outf.write(key + ' = ' + str(value) + '\n')
-        outf.close()
-
-    def __contains__(self, key):
-        return key in self.parameters
-        
-    def __setitem__(self, key, value):
-        self.parameters[key] = value
-
-    def __getitem__(self, key):
-        return self.parameters[key]
-
-    def getInt32Vector(self, key):
-        ln = self.parameters[key]
-	ln_tmp = ln.split('[')
-        line = 	ln_tmp[1].split(']')
-        return [int(lp) for lp in line[0].split(',')]
-
-    def getInt32(self, key):
-        return int(self.parameters[key])
-
-    def getStringVector(self, key):
-        line = self.parameters[key]
-        line.strip('[').strip(']')
-        return line.split(',')
-
-    def getString(self, key):
-        return self.parameters[key]
-
-    def getFloat(self, key):
-        return float(self.parameters[key])
-
-    def getBool(self, key):
-        return self.parameters[key] == 'true'
-
-class ClusterSlave(object):
-    def __init__(self, intName, extIP):
-        self.intName = intName
-        self.extIP = extIP
-    def getIntName(self):
-        return self.intName
-    def getExtIP(self):
-        return self.extIP
-
-class ClusterFEN(object):
-    def __init__(self, name, address, slaves = list()):
-        self.slaves = slaves
-        #Host.__init__(self, name, address)
-    def getSlaves(self, number = None):
-        return self.slaves[0:number]
-    def setSlaves(self, slaves):
-        self.slaves = slaves
-    def setSlavesByPattern(self, intNamePattern, extIPPattern, numberRange):
-        self.slaves = list()
-        for number in numberRange:
-            self.slaves.append(ClusterSlave(intNamePattern % number, extIPPattern % number))
-
-def parseStationList():
-    """
-    pattern = '^CS010_dipole0|CS010_dipole4|CS010_dipole8|CS010_dipole12| \
-	        CS008_dipole0|CS008_dipole4|CS008_dipole8|CS008_dipole12| \
-		CS001_dipole0|CS001_dipole4|CS001_dipole8|CS001_dipole12| \
-		CS016_dipole0|CS016_dipole4|CS016_dipole8|CS016_dipole12$'
-    print 'pattern = ' + str(re.search(pattern, 'CS010_dipole8'))
-    """
-
-def getInputNodes(stationList, parset):
-    inputNodelist = list()
-	
-    for s in stationList:
-        s = s.strip(" ")
-	s = s.strip("[ ]")
-	s = s.strip("'")
-	name = parset.getString('PIC.Core.' + s + '.port')
-	name=name.split(":")
-	name=name[0].strip("lii")
-	inputNodelist.append(int(name))
-    
-    return inputNodelist
-
-if __name__ == '__main__':
-    
-    parser = OptionParser()
-
-    parser.add_option('--parsetfile'    , dest='parsetfile'    , default='../share/Transpose.parset', type='string', help='username [%default]')
-
-    # parse the options
-    (options, args) = parser.parse_args()
-    
-    # create the parset
-    parset = CS1_Parset() 
-    stationList = list()
-    
-    if os.path.exists(options.parsetfile):
-       
-        #read keys from parset file.
-        parset.readFromFile(options.parsetfile)
-	
-	#read keys from parset file: OLAP.parset
-	if os.path.exists("OLAP.parset"):
-	    parset.readFromFile('OLAP.parset')
-	else:
-	    print 'file OLAP.parset does not exist!'
-	    sys.exit(0)
-
-        '''
-        if parset.getString('OLAP.OLAP_Conn.station_Input_Transport') == 'NULL':
-            # Read from memory!
-            parset['Observation.startTime'] = datetime.datetime.fromtimestamp(1)
-        else:
-            start=int(time.time() + 80)
-            parset['Observation.startTime'] = datetime.datetime.fromtimestamp(start)
-
-	duration = 300
-	
-	parset['Observation.stopTime'] = datetime.datetime.fromtimestamp(start + duration) 
-	
-	nSubbandSamples = parset.getFloat('OLAP.BGLProc.integrationSteps') * parset.getFloat('Observation.channelsPerSubband')
-	stepTime = nSubbandSamples / (parset.getFloat('Observation.sampleClock') * 1000000.0 / 1024)
-	startTime = parset['Observation.startTime']
-        stopTime = parset['Observation.stopTime']
-	sz = int(math.ceil((time.mktime(stopTime.timetuple()) - time.mktime(startTime.timetuple())) / stepTime))
-	noRuns = ((sz+15)&~15) + 16
-	parset['Observation.stopTime'] = datetime.datetime.fromtimestamp(time.mktime(startTime.timetuple()) + noRuns)
-        ''' 
-	
-	if parset.getString('OLAP.OLAP_Conn.input_DelayComp_Transport') == 'Null':
-	    parset['OLAP.OLAP_Conn.input_DelayComp_Transport']	 = 'NULL'
-	    
-	if parset.getString('OLAP.OLAP_Conn.input_BGLProc_Transport') == 'Null':
-	    parset['OLAP.OLAP_Conn.input_BGLProc_Transport']	 = 'NULL'
-	    
-	if parset.getString('OLAP.OLAP_Conn.station_Input_Transport') == 'Null':
-	    parset['OLAP.OLAP_Conn.station_Input_Transport']	 = 'NULL'
-
-	if parset.getString('OLAP.OLAP_Conn.BGLProc_Storage_Transport') == 'Null':
-	    parset['OLAP.OLAP_Conn.BGLProc_Storage_Transport']	 = 'NULL'
-       
-        if not parset.getBool('OLAP.BGLProc.useZoid'): # override CS1.parset
-            print 'ZOID!!!!'
-	    parset['OLAP.IONProc.useScatter']	 = 'false'
-	    parset['OLAP.IONProc.useGather']	 = 'false'
-	    parset['OLAP.BGLProc.nodesPerPset']	 = 8
-	    parset['OLAP.IONProc.maxConcurrentComm'] = 2
-    
-	if parset.getBool('OLAP.IONProc.useGather'):
-	    print 'useGather!!!!'
-	    #parset['OLAP.IONProc.integrationSteps']     = integrationTime
-	    parset['OLAP.StorageProc.integrationSteps'] = 1
-	else:
-	    parset['OLAP.IONProc.integrationSteps']     = 1
-	    #parset['OLAP.StorageProc.integrationSteps'] = integrationTime
-
-        if parset.getInt32('Observation.sampleClock') == 160:
-            parset['OLAP.BGLProc.integrationSteps'] = 608
-        elif parset.getInt32('Observation.sampleClock') == 200:
-            parset['OLAP.BGLProc.integrationSteps'] = 768
-	
-	#get the stations
-	stationList = parset.getStringVector('OLAP.storageStationNames')
-	parset['OLAP.nrRSPboards'] = len(stationList)
-	
-	#create input cluster objects
-        liifen    = ClusterFEN(name = 'liifen', address = '129.125.99.51')
-        liifen.setSlavesByPattern('lii%03d', '10.162.0.%d', [1,2,3,4,5,6,7,8,9,10,11,12])
-
-        #set keys 'Input.InputNodes' and 'Input.OutputNodes'
-        nSubbands = len(parset.getInt32Vector('Observation.subbandList'))
-        nSubbandsPerCell = parset.getInt32('OLAP.subbandsPerPset') * parset.getInt32('OLAP.BGLProc.psetsPerCell')
-	nCells = float(nSubbands) / float(nSubbandsPerCell)
-        if not nSubbands % nSubbandsPerCell == 0:
-            raise Exception('Not a integer number of compute cells (nSubbands = %d and nSubbandsPerCell = %d)' % (nSubbands, nSubbandsPerCell))
-        nCells = int(nCells)
-        host = copy.deepcopy(liifen)
-        slaves = host.getSlaves()
-	
-	inputNodes = getInputNodes(stationList, parset)
-	outputNodes = range(1, nCells + 1)
-	allNodes = inputNodes + [node for node in outputNodes if not node in inputNodes]
-
-	inputIndices = range(len(inputNodes))
-	outputIndices = [allNodes.index(node) for node in outputNodes]
-
-	newslaves = [slaves[ind - 1] for ind in allNodes]
-	host.setSlaves(newslaves)
-	noProcesses = len(newslaves)
-	
-	parset['Input.InputNodes'] = inputIndices
-	parset['Input.OutputNodes'] = outputIndices
-	
-	bglprocIPs = [newslaves[j].getExtIP() for j in outputIndices]
-	parset['OLAP.OLAP_Conn.input_BGLProc_ServerHosts'] = '[' + ','.join(bglprocIPs) + ']'
-
-	parset.writeToFile('../share/Transpose.parset')
-	
-	#createMachinefile
-	lmf = '/tmp/CS1_tmpfile'
-	slaves = host.getSlaves(noProcesses)
-	outf = open(lmf, 'w')
-        for slave in slaves:
-            outf.write(slave.getIntName() + '\n')
-        outf.close()
-	os.system('mv /tmp/CS1_tmpfile /opt/lofar/bin/Transpose.machinefile 2>&1 >> /dev/null')
-	print 'noProcesses = ' + str(noProcesses)
-	sys.exit(noProcesses)
-    else:
-        print 'file ' + options.parsetfile + ' does not exist!'
-        sys.exit(0) 	
-	
diff --git a/Appl/CEP/CS1/CS1_InputSection/scripts/startMPI.sh b/Appl/CEP/CS1/CS1_InputSection/scripts/startMPI.sh
deleted file mode 100755
index 6e8011dd172f4269aee09aa5b269b6eb040d4adf..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/scripts/startMPI.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-# startMPI.sh jobName machinefile executable paramfile noNodes
-#
-# $1 jobName             identifier for this job
-# $2 machinefile         procID.machinefile
-# $3 executable          processname
-# $4 parameterfile       procID.ps
-# $5 numberOfNodes
-#
-# calls mpirun and remembers the pid
-#
-
-# now all ACC processes expect to be started with ACC as first parameter
-
-# start process
-# TODO: on some hosts, mpirun has a different name (or a specific path)
-#       on some hosts, we should use -hostfile instead of -machinefile
-
-./prepare_$3.py
-
-cd /opt/lofar/bin/; mpirun_rsh -np $5 -hostfile $2 ./$3 ACC $4 $1>>/opt/lofar/log/$3.log 2>&1 &
diff --git a/Appl/CEP/CS1/CS1_InputSection/scripts/stopMPI.sh b/Appl/CEP/CS1/CS1_InputSection/scripts/stopMPI.sh
deleted file mode 100755
index 107a6598e7e34d34dd1e88f82781c0cc7028d322..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/scripts/stopMPI.sh
+++ /dev/null
@@ -1,13 +0,0 @@
-# stopMPI.sh execName 
-#
-#
-# Stops the given process by killing the process whose pid is in the
-# proces.pid file.
-
-# TODO: for some mpi versions it is not enough to kill mpirun
-#       we could "killall executable", but that would also kill
-#       processes started by another ApplicationController
-
-cexec killall -9 $1
-
-rm -f $1*.ps
diff --git a/Appl/CEP/CS1/CS1_InputSection/scripts/swlevel.conf b/Appl/CEP/CS1/CS1_InputSection/scripts/swlevel.conf
deleted file mode 100644
index 7bf43012fbdd4e0d5cf8e24a6559ee4424019e29..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/scripts/swlevel.conf
+++ /dev/null
@@ -1,10 +0,0 @@
-#
-# swlevel.conf
-#
-# Table to manage the progrma that should be started and stopped
-# level : up : down : root : mpi : program
-#
-
-1:u:d:::ACDaemon
-6::d:::ApplController
-6::d:::InputAppl
diff --git a/Appl/CEP/CS1/CS1_InputSection/src/ACCmain_InputSection.cc b/Appl/CEP/CS1/CS1_InputSection/src/ACCmain_InputSection.cc
deleted file mode 100644
index 33d9006d3879825fa60a0ee9746810cc54ff7ebd..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/src/ACCmain_InputSection.cc
+++ /dev/null
@@ -1,235 +0,0 @@
-//#  ACCmain.cc: main loop that can be used by any ACC enabled program
-//#
-//#  Copyright (C) 2006
-//#  ASTRON (Netherlands Foundation for Research in Astronomy)
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-//# Always #include <lofar_config.h> first!
-#include <lofar_config.h>
-
-//# Includes
-#include <libgen.h>
-#include <Common/LofarLogger.h>
-#include <Common/LofarLocators.h>
-#include <APS/ParameterSet.h>
-#include <APS/Exceptions.h>
-#include <PLC/ProcControlServer.h>
-#include <CS1_InputSection/ACCmain_InputSection.h>
-#ifdef HAVE_MPI
-#include <Transport/TH_MPI.h>
-#endif
-
-namespace LOFAR {
-namespace CS1 {
-
-using ACC::APS::ParameterSet;
-
-//
-// ACCmain(argc, argv, procCtrl*)
-//
-int ACCmain_InputSection (int argc, char* orig_argv[], ACC::PLC::ProcessControl* theProcess) {
-	char** argv = orig_argv;
-
-#ifdef HAVE_MPI
-	TH_MPI::initMPI(argc, orig_argv);
-
-	int myRank = TH_MPI::getCurrentRank();
-
-	// The MPI standard does not demand that the commandline
-	// arguments are distributed, so we do it ourselves. 
-
-	// Broadcast number of arguments
-	MPI_Bcast(&argc, 1, MPI_INT, 0, MPI_COMM_WORLD);
-	// Some MPI implementations block on the Bcast. Synchronize
-	// the nodes to avoid deadlock. 
-	MPI_Barrier(MPI_COMM_WORLD);
-
-	if (myRank != 0) {
-	        argv = new char*[argc + 1];
-	        argv[argc] = 0;
-	} else {
-		char** argv = orig_argv;
-	}
-
-	for (int arg = 0; arg < argc; arg++) {
-		int arglen = 0;
-		if (myRank == 0) {
-			arglen = strlen(argv[arg]) + 1;
-		}
-
-		// Broadcast the length of this argument
-		MPI_Bcast(&arglen, 1, MPI_INT, 0, MPI_COMM_WORLD);
-
-		if (myRank != 0) {
-			argv[arg] = new char[arglen];
-		}
-		// Broadcast the argument;
-		MPI_Bcast(argv[arg], arglen, MPI_BYTE, 0, MPI_COMM_WORLD);	
-	}
-#endif
-
-	// 
-	string	programName(basename(argv[0]));
-	bool	ACCmode(true);
-
-	try {
-		// Check invocation syntax: [ACC] parsetfile UniqProcesName
-		// When we are called by ACC the first argument is ACC.
-		// otherwise we do all states right after each other.
-		if ((argc < 2) || (strcmp("ACC", argv[1]) != 0)) {
-			// we were not called by ACC
-			LOG_DEBUG(programName + " not started by ACC");
-			ACCmode = false;
-		}
-		else {
-			LOG_DEBUG(programName + " started by ACC");
-		}
-
-		// Read in the parameterset.
-		ConfigLocator	CL;
-		string	ParsetFile = CL.locate(argv[1 + (ACCmode ? 1 : 0)]);
-		ASSERTSTR(!ParsetFile.empty(), "Could not find parameterset " << argv[1]);
-		LOG_INFO_STR("Using parameterset " << ParsetFile);
-		ACC::APS::globalParameterSet()->adoptFile(ParsetFile);
-
-		// When not under control of ACC execute all modes immediately
-		if (!ACCmode) {
-			LOG_DEBUG(programName + " starting define");
-			if (!theProcess->define()) {
-				return (1);
-			}
-
-			LOG_DEBUG(programName + " initializing");
-			if (!theProcess->init()) {
-				return (1);
-			}
-
-			LOG_DEBUG(programName + " running");
-			int noRuns = atoi(argv[argc - 1]);
-			if (noRuns == 0) {
-				noRuns = 1;
-			}
-			for (int run = 0; run < noRuns; run++) {
-				if (!theProcess->run()) {
-					return (1);
-				}
-			}
-
-			LOG_DEBUG(programName + " releasing");
-			if (!theProcess->release()) {
-				return (1);
-			}
-			
-			LOG_DEBUG(programName + " quitting");
-			if (!theProcess->quit()) {
-				return (1);
-			}
-
-			LOG_DEBUG(programName + " deleting process");
-
-		} 
-		else {
-			// we are under control of ACC
-			// Note args are: ACC parsetfile UniqProcesName
-			
-			string	procID(argv[3]);
-			string	prefix = ACC::APS::globalParameterSet()->getString("_parsetPrefix");
-			
-			// connect to Application Controller
-			ACC::PLC::ProcControlServer pcServer(ACC::APS::globalParameterSet()->getString(prefix+"_ACnode"),
-									           ACC::APS::globalParameterSet()->getUint16(prefix+"_ACport"),
-									           theProcess);
-
-
-			// Tell AC who we are.
-			LOG_DEBUG_STR("Registering at ApplController as " << procID);
-			sleep(1);
-			pcServer.registerAtAC(procID);
-
-			// Main processing loop
-			bool	quiting(false);
-			while (!quiting) {
-				LOG_TRACE_STAT("Polling ApplController for message");
-				if (pcServer.pollForMessage()) {
-					LOG_TRACE_COND("Message received from ApplController");
-
-					// get pointer to received data
-					ACC::PLC::DH_ProcControl* newMsg = pcServer.getDataHolder();
-					
-		                        if (newMsg->getCommand() == ACC::PLC::PCCmdInit) {
-		                          pcServer.sendResult(newMsg->getCommand(), ACC::PLC::PcCmdMaskOk);
-		                          newMsg->setCommand(ACC::PLC::PCCmd(newMsg->getCommand() &~ ACC::PLC::PCCmdResult));
-		                        }
-
-		                        if (newMsg->getCommand() == ACC::PLC::PCCmdPause) {
-		                          pcServer.sendResult(newMsg->getCommand(), ACC::PLC::PcCmdMaskOk);
-		                          newMsg->setCommand(ACC::PLC::PCCmd(newMsg->getCommand() &~ ACC::PLC::PCCmdResult));
-		                        }
-
-		                        if (newMsg->getCommand() == ACC::PLC::PCCmdRelease) {
-		                          pcServer.sendResult(newMsg->getCommand(), ACC::PLC::PcCmdMaskOk);
-		                          newMsg->setCommand(ACC::PLC::PCCmd(newMsg->getCommand() &~ ACC::PLC::PCCmdResult));
-		                        }
-
-					if (newMsg->getCommand() == ACC::PLC::PCCmdQuit) {
-						quiting = true;
-					} 
-
-					if (!pcServer.handleMessage(newMsg)) {
-						LOG_ERROR("ProcControlServer::handleMessage() failed");
-					}
-
-				} else  {
-					// no new command received. If we are in the runstate 
-					// call the run-routine again.
-					if (theProcess->inRunState()) {
-						ACC::PLC::DH_ProcControl		tmpMsg(ACC::PLC::PCCmdRun);
-						pcServer.handleMessage(&tmpMsg);
-					}
-				}
-			}
-
-			LOG_INFO_STR("Shutting down: ApplicationController");
-			pcServer.unregisterAtAC("");		// send to AC before quiting
-		}
-	} 
-	catch (Exception& ex) {
-		LOG_FATAL_STR("Caught exception: " << ex << endl);
-		LOG_FATAL_STR(programName << " terminated by exception!");
-		return (1);
-	} 
-	catch (std::exception& ex) {
-		LOG_FATAL_STR("Caught std::exception: " << ex.what());
-		return (1);
-	} 
-	catch (...) {
-		LOG_FATAL_STR("Caught unknown exception, exitting");
-		return (1);
-	}  
-
-#ifdef HAVE_MPI
-	TH_MPI::finalize();
-#endif
-
-	LOG_INFO_STR(programName << " terminated normally");
-	return (0);
-}
-
-  } // namespace CS1
-} // namespace LOFAR
diff --git a/Appl/CEP/CS1/CS1_InputSection/src/CS1_InputSection.log_prop b/Appl/CEP/CS1/CS1_InputSection/src/CS1_InputSection.log_prop
deleted file mode 100644
index f3544f42d999d02dd205459fce0f0786d29b8a66..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/src/CS1_InputSection.log_prop
+++ /dev/null
@@ -1,25 +0,0 @@
-
-# Configure the loggers
-log4cplus.rootLogger=INFO, STDOUT, FILE
-#log4cplus.logger.TRC=INFO
-log4cplus.logger.TRC=INFO
-log4cplus.logger.LCS.Common=FATAL, STDOUT, FILE
-
-# Define the appenders
-log4cplus.appender.STDOUT=log4cplus::ConsoleAppender
-log4cplus.appender.STDOUT.layout=log4cplus::PatternLayout
-log4cplus.appender.STDOUT.layout.ConversionPattern=%D{%d-%m %H:%M:%S.%q} %-5p %c{9} - %m [%.25l]%n
-
-log4cplus.appender.STDERR=log4cplus::ConsoleAppender
-log4cplus.appender.STDERR.layout=log4cplus::PatternLayout
-log4cplus.appender.STDERR.layout.ConversionPattern=%D{%d-%m %H:%M:%S.%q} %-5p %c{3} - %m [%.25l]%n
-log4cplus.appender.STDERR.logToStdErr=true
-
-log4cplus.appender.FILE=log4cplus::RollingFileAppender
-log4cplus.appender.FILE.File=../log/%filename.log
-log4cplus.appender.FILE.MaxFileSize=10MB
-log4cplus.appender.FILE.MaxBackupIndex=2
-log4cplus.appender.FILE.layout=log4cplus::PatternLayout
-log4cplus.appender.FILE.layout.ConversionPattern=%x %D{%d-%m %H:%M:%S.%q} %-5p %c{3} - %m [%.25l]%n
-
-log4cplus.appender.DUMP=log4cplus::NullAppender
diff --git a/Appl/CEP/CS1/CS1_InputSection/src/InputAppl.sh b/Appl/CEP/CS1/CS1_InputSection/src/InputAppl.sh
deleted file mode 100755
index d5fee786c5ae24d777627d91a5cd43395137ad4e..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/src/InputAppl.sh
+++ /dev/null
@@ -1,119 +0,0 @@
-#!/bin/bash
-#
-# /InputAppl: a start/stop/status script for swlevel
-#
-# Copyright (C) 2007
-# ASTRON (Netherlands Foundation for Research in Astronomy)
-# P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License
-# along with this program; if not, write to the Free Software
-# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-#
-# Syntax: InputAppl start|stop|status
-#
-# $Id$
-#
-
-#
-# SyntaxError msg
-#
-SyntaxError()
-{
-	Msg=$1
-
-	[ -z "${Msg}" ] || echo "ERROR: ${Msg}"
-	echo ""
-	echo "Syntax: $(basename $0) start | stop | status"
-	echo ""
-	exit 1
-}
-
-#
-# Start the program when it exists
-#
-start_prog()
-{
-	# put here your code to start your program
-	echo 'start_prog()'
-}
-
-#
-# Stop the program when it is running
-#
-stop_prog()
-{
-	# put here your code to stop your program
-	ps -ef | grep -v grep | grep -v ACDaemon[^\ ] | grep ACDaemon 2>&1 >/dev/null
-	if [ $? -ne 0 ]; then
-	  if [ -f ../etc/ACD.admin ]; then 	
-	    rm ../etc/ACD.admin
-	  fi
-	fi  
-	cexec killall -9 CS1_InputSection 
-}
-
-#
-# show status of program
-#
-# arg1 = levelnr
-#
-status_prog()
-{
-	levelnr=$1
-
-	# put here code to figure out the status of your program and
-	# fill the variables prog and pid with the right information
-
-	# e.g.
-	prog=InputAppl
-	pid=DOWN
-	cexec 'ps -ef | grep -v grep '| grep CS1_InputSection| grep -v bash2>$1 1>/dev/null
-	if [ $? -eq 0 ]; then
-	  cexec 'ps -ef | grep -v grep | grep CS1_InputSection'| grep -v bash | awk -v levelnr=${levelnr} '{
-          if (substr($1,1,3) == "---") {
-            machine = substr($2,1,6)
-          }
-          else {
-            if (substr($1,1,3) != "***") {
-              fullname="InputAppl@"machine
-              printf "%s : %-25.25s %s\n", levelnr,fullname,$2
-            }
-          }
-        }'
-	else
-          # this line should be left in, it shows the status in the right format
-	  echo ${levelnr} ${prog} ${pid} | awk '{ printf "%s : %-25s %s\n", $1, $2, $3 }'
-	fi
-}
-
-#
-# MAIN
-#
-
-# when no argument is given show syntax error.
-if [ -z "$1" ]; then
-	SyntaxError
-fi
-
-# first power down to this level
-case $1 in
-	start)	start_prog
-			;;
-	stop)	stop_prog
-			;;
-	status)	status_prog $2
-			;;
-	*)		SyntaxError
-			;;
-esac
diff --git a/Appl/CEP/CS1/CS1_InputSection/src/Makefile.am b/Appl/CEP/CS1/CS1_InputSection/src/Makefile.am
deleted file mode 100644
index 7c2e826e81e77c637cb9914def8b01e33bd87b00..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/src/Makefile.am
+++ /dev/null
@@ -1,39 +0,0 @@
-# if HAVE_SHMEM
-# SUBDIRS = shmem .
-# endif
-
-lib_LTLIBRARIES         = libcs1_inputsection.la
-
-libcs1_inputsection_la_SOURCES        = Connector.cc \
-		 InputThread.cc \
-		 BeamletBuffer.cc \
-		 ACCmain_InputSection.cc \
-		 AH_InputSection.cc \
-		 WH_InputSection.cc
-
-# AM_YFLAGS               = -d -p KeyParse
-# AM_LFLAGS               = -PKeyTokenize
-# LEX_OUTPUT_ROOT         = lex.KeyTokenize
-
-# if HAVE_SHMEM
-# libcs1_inputsection_la_LIBADD     = shmem/libshmem.la
-# endif
-
-bin_PROGRAMS = CS1_InputSection
-
-CS1_InputSection_SOURCES = main.cc
-CS1_InputSection_LDADD = libcs1_inputsection.la
-CS1_InputSection_DEPENDENCIES	= libcs1_inputsection.la $(LOFAR_DEPEND)
-
-glishdir = $(libexecdir)/glish
-dist_glish_SCRIPTS = 
-
-pythondir = $(bindir)
-dist_python_SCRIPTS = 
-
-scriptdir = $(bindir)
-dist_script_SCRIPTS = 
-
-include $(top_srcdir)/Makefile.common
-
-sysconf_DATA = CS1_InputSection.log_prop
diff --git a/Appl/CEP/CS1/CS1_InputSection/src/WH_InputSection.cc b/Appl/CEP/CS1/CS1_InputSection/src/WH_InputSection.cc
deleted file mode 100644
index f88f994b39b433f99b05cedd8068e82f9b8bafec..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/src/WH_InputSection.cc
+++ /dev/null
@@ -1,433 +0,0 @@
-//#  WH_InputSection.cc: Catch RSP ethernet frames and synchronize RSP inputs 
-//#
-//#  Copyright (C) 2006
-//#  ASTRON (Netherlands Foundation for Research in Astronomy)
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-//# Always #include <lofar_config.h> first!
-#include <lofar_config.h>
-
-//# Includes
-#include <Common/LofarLogger.h>
-#include <AMCBase/Epoch.h>
-#include <CS1_InputSection/WH_InputSection.h>
-#include <CS1_Interface/DH_Delay.h>
-#include <CS1_Interface/CS1_Parset.h>
-#include <Transport/TransportHolder.h>
-#include <CS1_Interface/RSPTimeStamp.h>
-#include <CS1_InputSection/BeamletBuffer.h>
-#include <CS1_InputSection/InputThread.h>
-#include <tinyCEP/Sel_RoundRobin.h>
-
-#if defined HAVE_MPI
-#include <Transport/TH_MPI.h>
-#include <mpi.h>
-#endif
-
-#include <signal.h>
-#include <sys/time.h>
-
-
-#undef USE_TIMER
-
-
-namespace LOFAR {
-namespace CS1 {
-
-bool WH_InputSection::signalReceived;
-
-WH_InputSection::WH_InputSection(const string &name, 
-				 bool doInput,
-				 bool doTranspose,
-				 bool doOutput,
-				 CS1_Parset *ps,
-				 TransportHolder *inputTH,
-				 unsigned stationNr,
-				 unsigned nrInputChannels,
-				 unsigned nrOutputChannels,
-				 const std::vector<unsigned> &inputNodes,
-				 const std::vector<unsigned> &outputNodes)
-:
-  WorkHolder(nrInputChannels, nrOutputChannels, name, "WH_InputSection"),
-  itsDoInput(doInput),
-  itsDoTranspose(doTranspose),
-  itsDoOutput(doOutput),
-  itsInputNodes(inputNodes),
-  itsOutputNodes(outputNodes),
-  itsInputTH(inputTH),
-  itsStationNr(stationNr),
-  itsCS1PS(ps),
-  itsBBuffer(0),
-  itsPrePostTimer("pre/post"),
-  itsProcessTimer("process"),
-  itsGetElemTimer("getElem")
-{
-  LOG_TRACE_FLOW_STR("WH_InputSection constructor");    
-
-  // get parameters
-  itsNSubbandsPerCell = itsCS1PS->nrSubbandsPerCell();
-  itsNSamplesPerSec   = itsCS1PS->nrSubbandSamples();
-  itsNHistorySamples  = itsCS1PS->nrHistorySamples();
-
-  // create incoming dataholder holding the delay information 
-  if (doInput)
-    getDataManager().addInDataHolder(0, new DH_Delay("DH_Delay", itsCS1PS->getInt32("OLAP.nrRSPboards")));
-
-  // create a outgoing dataholder for each subband
-  if (doOutput) {
-    vector<int> channels;
-
-    for (int i = 0; i < itsNoutputs; i ++) {
-      getDataManager().addOutDataHolder(i, new DH_Subband("DH_Subband", itsCS1PS));
-      getDataManager().setAutoTriggerOut(i, false);
-      channels.push_back(i);
-    }
-
-    getDataManager().setOutputSelector(new Sel_RoundRobin(channels));
-  }
-}
-
-
-WH_InputSection::~WH_InputSection() 
-{
-}
-
-
-WH_InputSection *WH_InputSection::make(const string& name)
-{
-  return new WH_InputSection(name, itsDoInput, itsDoTranspose, itsDoOutput, itsCS1PS, itsInputTH, itsStationNr, itsNinputs, itsNoutputs, itsInputNodes, itsOutputNodes);
-}
-
-
-void WH_InputSection::startThread()
-{
-  /* start up thread which writes RSP data from ethernet link
-     into cyclic buffers */
-  LOG_TRACE_FLOW_STR("WH_InputSection starting thread");   
-
-  ThreadArgs args;
-  args.BBuffer            = itsBBuffer;
-  args.th                 = itsInputTH;
-  args.ipHeaderSize       = itsCS1PS->getInt32("OLAP.IPHeaderSize");
-  args.frameHeaderSize    = itsCS1PS->getInt32("OLAP.EPAHeaderSize");
-  args.nTimesPerFrame     = itsCS1PS->getInt32("OLAP.nrTimesInFrame");
-  args.nSubbandsPerFrame  = itsCS1PS->getInt32("OLAP.nrSubbandsPerFrame");
-
-  args.frameSize          = args.frameHeaderSize + args.nSubbandsPerFrame * args.nTimesPerFrame * sizeof(Beamlet);
-  args.ID                 = itsStationNr;
-
-
-  if (itsInputTH->getType() == "TH_File" || itsInputTH->getType() == "TH_Null") {
-    // if we are reading from file, overwriting the buffer should not be allowed
-    // this way we can work with smaller files
-    itsBBuffer->setAllowOverwrite(false);
-  }
-
-  itsInputThreadObject = new InputThread(args);
-  itsInputThread       = new boost::thread(*itsInputThreadObject);
-}
-
-void WH_InputSection::preprocess()
-{
-  itsPrePostTimer.start();
-
-  if (itsDoInput) {
-    // create the buffer controller.
-    int cyclicBufferSize = itsCS1PS->nrSamplesToBuffer();
-    int subbandsToReadFromFrame = itsCS1PS->subbandsToReadFromFrame();
-    ASSERTSTR(subbandsToReadFromFrame <= itsCS1PS->getInt32("OLAP.nrSubbandsPerFrame"), subbandsToReadFromFrame << " < " << itsCS1PS->getInt32("OLAP.nrSubbandsPerFrame"));
-
-    itsBBuffer = new BeamletBuffer(cyclicBufferSize, subbandsToReadFromFrame, cyclicBufferSize/6, cyclicBufferSize/6);
-    startThread();
-
-    itsDelayCompensation = itsCS1PS->getBool("OLAP.delayCompensation");
-
-    // determine starttime
-    double startTime = itsCS1PS->startTime();
-
-#if 1
-    // interpret the time as utc
-    double utc = startTime;
-#else
-    double utc = AMC::Epoch(startTime).utc();
-#endif
-    int sampleFreq = (int)itsCS1PS->sampleRate();
-    int seconds = (int)floor(utc);
-    int samples = (int)((utc - floor(utc)) * sampleFreq);
-
-    itsSyncedStamp = TimeStamp(seconds, samples);
-
-    std::clog << "Starting buffer at " << itsSyncedStamp << std::endl;
-    itsBBuffer->startBufferRead(itsSyncedStamp);
-    unsigned nrCells = itsCS1PS->nrCells();
-
-    itsInputData     = new boost::multi_array<SampleType, 4>(boost::extents[nrCells][itsNSubbandsPerCell][itsNSamplesPerSec + itsNHistorySamples][NR_POLARIZATIONS]);
-    itsInputMetaData = new struct metaData[nrCells];
-  }
-
-  if (itsDoOutput) {
-    unsigned nrStations = itsCS1PS->nrStations();
-
-    itsOutputData     = new boost::multi_array<SampleType, 4>(boost::extents[nrStations][itsNSubbandsPerCell][itsNSamplesPerSec + itsNHistorySamples][NR_POLARIZATIONS]);
-    itsOutputMetaData = new struct metaData[nrStations];
-
-#if defined USE_TIMER
-    sighandler_t ret = signal(SIGALRM, *WH_InputSection::timerSignal);
-    ASSERTSTR(ret != SIG_ERR, "WH_InputSection couldn't set signal handler for timer");    
-    struct itimerval value;
-
-    double interval = itsCS1PS->timeInterval();
-    __time_t secs  = static_cast<__time_t>(floor(interval));
-    __time_t usecs = static_cast<__time_t>(1e6 * (interval - secs));
-
-    value.it_interval.tv_sec  = value.it_value.tv_sec  = secs;
-    value.it_interval.tv_usec = value.it_value.tv_usec = usecs;
-    cout << "Setting timer interval to " << secs << "secs and " << usecs << "ms" << endl;
-
-    setitimer(ITIMER_REAL, &value, 0);
-#endif
-  }
-}
-
-void WH_InputSection::doInput(SparseSet<unsigned> &flags)
-{
-  TimeStamp delayedStamp = itsSyncedStamp - itsNHistorySamples;
-  itsSyncedStamp += itsNSamplesPerSec;
-
-  if (itsDelayCompensation) {
-    DH_Delay *dh = static_cast<DH_Delay *>(getDataManager().getInHolder(0));
-    delayedStamp += (*dh)[itsStationNr].coarseDelay;
-  }
-
-  // get the data from the cyclic buffer
-  itsGetElemTimer.start();
-  boost::multi_array_ref<SampleType, 3> inputData(itsInputData->origin(), boost::extents[itsOutputNodes.size() * itsNSubbandsPerCell][itsNSamplesPerSec + itsNHistorySamples][NR_POLARIZATIONS]);
-  itsBBuffer->getElements(inputData, flags, delayedStamp, itsNSamplesPerSec + itsNHistorySamples);
-  itsGetElemTimer.stop();
-
-  std::clog << "WH_InputSection out " << itsStationNr << " " << delayedStamp << " flags: " << flags << std::endl;
-}
-
-
-void WH_InputSection::limitFlagsLength(SparseSet<unsigned> &flags)
-{
-  const std::vector<struct SparseSet<unsigned>::range> &ranges = flags.getRanges();
-
-  if (ranges.size() > 16)
-    flags.include(ranges[15].begin, ranges[ranges.size() - 1].end);
-}
-
-
-void WH_InputSection::transposeData() 
-{
-#if defined HAVE_MPI
-  int nrNodes = TH_MPI::getNumberOfNodes();
-#else
-  int nrNodes = 1;
-#endif
-
-  int sendCounts[nrNodes], sendDisplacements[nrNodes];
-  int receiveCounts[nrNodes], receiveDisplacements[nrNodes];
-
-  memset(sendCounts, 0, sizeof sendCounts);
-  memset(receiveCounts, 0, sizeof receiveCounts);
-
-  if (itsDoInput)
-    for (unsigned output = 0; output < itsOutputNodes.size(); output ++) {
-      sendCounts[itsOutputNodes[output]] = (*itsInputData)[output].num_elements() * sizeof(SampleType);
-      sendDisplacements[itsOutputNodes[output]] = reinterpret_cast<char *>((*itsInputData)[output].origin()) - reinterpret_cast<char *>(itsInputData->origin());
-    }
-
-  if (itsDoOutput)
-    for (unsigned input = 0; input < itsInputNodes.size(); input ++) {
-      receiveCounts[itsInputNodes[input]] = (*itsOutputData)[input].num_elements() * sizeof(SampleType);
-      receiveDisplacements[itsInputNodes[input]] = reinterpret_cast<char *>((*itsOutputData)[input].origin()) - reinterpret_cast<char *>(itsOutputData->origin());
-    }
-
-#if 1 && defined HAVE_MPI
-  if (MPI_Alltoallv(itsDoInput ? itsInputData->origin() : 0,
-		    sendCounts, sendDisplacements, MPI_BYTE,
-		    itsDoOutput ? itsOutputData->origin() : 0,
-		    receiveCounts, receiveDisplacements, MPI_BYTE,
-		    MPI_COMM_WORLD) != MPI_SUCCESS) {
-    std::cerr << "MPI_Alltoallv() failed" << std::endl;
-    exit(1);
-  }
-#else
-  TH_MPI::synchroniseAllProcesses();
-#endif
-}
-
-
-void WH_InputSection::transposeMetaData(const SparseSet<unsigned> &flags)
-{
-#if defined HAVE_MPI
-  int nrNodes = TH_MPI::getNumberOfNodes();
-#else
-  int nrNodes = 1;
-#endif
-
-  int sendCounts[nrNodes], sendDisplacements[nrNodes];
-  int receiveCounts[nrNodes], receiveDisplacements[nrNodes];
-
-  memset(sendCounts, 0, sizeof sendCounts);
-  memset(receiveCounts, 0, sizeof receiveCounts);
-
-  if (itsDoInput) {
-    DH_Delay *delayDHp = static_cast<DH_Delay *>(getDataManager().getInHolder(0));
-
-    for (unsigned output = 0; output < itsOutputNodes.size(); output++) { 
-      itsInputMetaData[output].fineDelayAtBegin  = (*delayDHp)[itsStationNr].fineDelayAtBegin;
-      itsInputMetaData[output].fineDelayAfterEnd = (*delayDHp)[itsStationNr].fineDelayAfterEnd;
-
-      if (flags.marshall(itsInputMetaData[output].flagsBuffer, sizeof itsInputMetaData[output].flagsBuffer) < 0) {
-	std::cerr << "Too many flags!" << std::endl;
-	std::exit(1);
-      }
-
-      sendCounts[itsOutputNodes[output]] = sizeof(struct metaData);
-      sendDisplacements[itsOutputNodes[output]] = reinterpret_cast<char *>(&itsInputMetaData[output]) - reinterpret_cast<char *>(itsInputMetaData);
-    }
-  }
-
-  if (itsDoOutput)
-    for (unsigned input = 0; input < itsInputNodes.size(); input ++) {
-      receiveCounts[itsInputNodes[input]] = sizeof(struct metaData);
-      receiveDisplacements[itsInputNodes[input]] = reinterpret_cast<char *>(&itsOutputMetaData[input]) - reinterpret_cast<char *>(itsOutputMetaData);
-    }
-
-#if defined HAVE_MPI
-  if (MPI_Alltoallv(itsDoInput ? itsInputMetaData : 0,
-		    sendCounts, sendDisplacements, MPI_BYTE,
-		    itsDoOutput ? itsOutputMetaData : 0,
-		    receiveCounts, receiveDisplacements, MPI_BYTE,
-		    MPI_COMM_WORLD) != MPI_SUCCESS) {
-    std::cerr << "MPI_Alltoallv() failed" << std::endl;
-    exit(1);
-  }
-#endif
-}
-
-
-void WH_InputSection::doOutput() 
-{
-  // Copy every subband to one BG/L core
-  Selector *selector = getDataManager().getOutputSelector();
-
-  for (unsigned subband = 0; subband < itsNSubbandsPerCell; subband ++) {
-    // ask the round robin selector for the next output
-    DH_Subband *outHolder = static_cast<DH_Subband *>(getDataManager().getOutHolder(selector->getCurrentSelection()));
-
-    // Copy one subband from every input
-    for (unsigned station = 0; station < itsInputNodes.size(); station ++) {
-      ASSERT(outHolder->getSamples3D()[station].num_elements() == (*itsOutputData)[station][subband].num_elements());
-
-      memcpy(outHolder->getSamples3D()[station].origin(),
-	     (*itsOutputData)[station][subband].origin(),
-	     outHolder->getSamples3D()[station].num_elements() * sizeof(DH_Subband::SampleType));
-
-      // copy other information (delayInfo, flags etc)
-      outHolder->getDelays()[station].delayAtBegin  = itsOutputMetaData[station].fineDelayAtBegin;
-      outHolder->getDelays()[station].delayAfterEnd = itsOutputMetaData[station].fineDelayAfterEnd;
-      outHolder->getFlags()[station].unmarshall(itsOutputMetaData[station].flagsBuffer);
-    }
-
-    outHolder->fillExtraData();
-
-    getDataManager().readyWithOutHolder(selector->getCurrentSelection());
-    selector->selectNext();
-  }
-}
-
-
-void WH_InputSection::process() 
-{ 
-  itsProcessTimer.start();
-  SparseSet<unsigned> flags;
-
-  if (itsDoInput) {
-    doInput(flags);
-    limitFlagsLength(flags);
-  }
-
-  if (itsDoTranspose) {
-    NSTimer transposeTimer("transpose", TH_MPI::getCurrentRank() == 0);
-    transposeTimer.start();
-
-    transposeData();
-    transposeMetaData(flags);
-
-    transposeTimer.stop();
-  }
-
-  if (itsDoOutput)
-    doOutput();
-
-  itsProcessTimer.stop();
-
-#if defined USE_TIMER
-  while (!signalReceived)
-    pause();
-
-  signalReceived = false;
-#endif
-}
-
-void WH_InputSection::postprocess()
-{
-  if (itsDoInput) {
-    InputThread::stopThreads();
-    itsBBuffer->clear();
-    itsInputThread->join();
-    delete itsInputThread;
-    delete itsInputThreadObject;
-    delete itsBBuffer;
-    delete itsInputData;
-    delete [] itsInputMetaData;
-  }
-
-  if (itsDoOutput) {
-    delete itsOutputData;
-    delete [] itsOutputMetaData;
-  }
-
-#if defined USE_TIMER
-  // unset timer
-  struct itimerval value;
-  memset(&value, 0, sizeof value);
-  setitimer(ITIMER_REAL, &value, 0);
-  // remove sig handler
-  sighandler_t ret = signal(SIGALRM, SIG_DFL);
-  ASSERTSTR(ret != SIG_ERR, "WH_InputSection couldn't unset signal handler for timer");    
-#endif
-
-  itsPrePostTimer.stop();
-
-  itsPrePostTimer.print(clog);
-  itsProcessTimer.print(clog);
-  itsGetElemTimer.print(clog);
-}
-
-void WH_InputSection::timerSignal(int)
-{
-  signalReceived = true;
-}
-
-} // namespace CS1
-} // namespace LOFAR
diff --git a/Appl/CEP/CS1/CS1_InputSection/src/main.cc b/Appl/CEP/CS1/CS1_InputSection/src/main.cc
deleted file mode 100644
index 6eda03efcdc14e62f5626171b0fac71a5033edf8..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/src/main.cc
+++ /dev/null
@@ -1,113 +0,0 @@
-//#  main: main function for CS1_InputSection
-//#
-//#  Copyright (C) 2006
-//#  ASTRON (Netherlands Foundation for Research in Astronomy)
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-//# Always #include <lofar_config.h> first!
-#include <lofar_config.h>
-
-//# Includes
-#include <Common/LofarLogger.h>
-#include <Common/LofarLocators.h>
-#include <CS1_InputSection/AH_InputSection.h>
-#include <tinyCEP/Profiler.h>
-#include <tinyCEP/ApplicationHolderController.h>
-#include <CS1_InputSection/ACCmain_InputSection.h>
-
-#include <exception>
-
-#ifdef HAVE_MPI
-#include <Transport/TH_MPI.h>
-#endif
-
-using namespace LOFAR;
-using namespace LOFAR::CS1;
-
-#if 1
-int main(int argc, char **argv)
-{
-  ConfigLocator aCL;
-  string        progName = basename(argv[0]);
-  string        logPropFile(progName + ".log_prop");
-  INIT_LOGGER (aCL.locate(logPropFile).c_str());
-  LOG_DEBUG_STR("Initialized logsystem with: " << aCL.locate(logPropFile));
-
-  AH_InputSection myAH;
-  ApplicationHolderController myAHController(myAH, 1); //listen to ACC every 1 runs
-  return ACCmain_InputSection(argc, argv, &myAHController);
-}
-
-#else
-
-int main (int argc, const char** argv) {
-
-  INIT_LOGGER("CS1_InputSection");
-
-  // Check invocation syntax
-  try {
-    if ((argc==3) && (strncmp("ACC", argv[1], 3) == 0)) {
-      LOG_TRACE_FLOW("Main program started by ACC");
-      // we were called by ACC so execute the ACCmain
-      AH_InputSection myAH;
-      ApplicationHolderController myAHController(myAH);
-      myAHController.main(argc, argv);
-    } else {
-      LOG_TRACE_FLOW("Main program not started by ACC");
-      // there are no commandline arguments, so we were not called by ACC
-      AH_InputSection myAH;
-
-      ACC::APS::ParameterSet ps("CS1.parset"); 
-      myAH.setParameters(ps);
-      
-      myAH.setarg(argc, argv);
-      myAH.baseDefine();
-      cout << "defined" << endl;
-      Profiler::init();
-      myAH.basePrerun();
-      cout << "init done" << endl;
-      // This is for synchronisation between WH_RSPInput and WH_SBCollect
-      // WH_SBCollect won't exit the preprocess before the connection is made
-      // WH_RSPInput won't start the bufferthread before the barrier is passed
-#ifdef HAVE_MPI
-      TH_MPI::synchroniseAllProcesses();
-#endif
-      Profiler::activate();
-      cout << "run" << endl;
-      myAH.baseRun(ps.getInt32("General.NRuns"));
-      cout << "run complete" << endl;
-#ifdef HAVE_MPI
-      TH_MPI::synchroniseAllProcesses();
-#endif
-      myAH.baseDump();
-      myAH.baseQuit();
-      Profiler::deActivate();
-    }
-  } catch (Exception& ex) {
-    LOG_FATAL_STR("Caught exception: " << ex << endl);
-    LOG_FATAL_STR(argv[0] << " terminated by exception!");
-    exit(1);
-  } catch (...) {
-    LOG_FATAL_STR("Caught unknown exception, exitting");
-    exit (1);
-  }  
-  LOG_INFO_STR(argv[0] << " terminated normally");
-  return (0);
-}
-#endif
diff --git a/Appl/CEP/CS1/CS1_InputSection/test/Makefile.am b/Appl/CEP/CS1/CS1_InputSection/test/Makefile.am
deleted file mode 100644
index 964c67ffc9a03c6dcd0a1ebc1ad272baef972739..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_InputSection/test/Makefile.am
+++ /dev/null
@@ -1,25 +0,0 @@
-check_PROGRAMS        =
-
-# programs to run through supplied checktools
-CHECKTOOLPROGS        =
-#ENDCHECKTOOLPROGS
-
-# scripts used to run tests
-TESTSCRIPTS           =
-
-# scripts and possible programs to run tests
-TESTS	              = $(TESTSCRIPTS)
-
-XFAIL_TESTS           =
-
-# all files (.run, .stdout, .in, .log_prop, etc.) needed to run tests
-EXTRA_DIST            = $(TESTSCRIPTS)
-
-# Lines to build a test program testprg
-#testprg_SOURCES		= testprg.cc
-#testprg_LDADD			= ../src/libcs1_inputsection.la
-#testprg_DEPENDENCIES		= ../src/libcs1_inputsection.la $(LOFAR_DEPEND)
-
-
-
-include $(top_srcdir)/Makefile.common
diff --git a/Appl/CEP/CS1/CS1_Interface/configure.in b/Appl/CEP/CS1/CS1_Interface/configure.in
index 0dd4b672506424169a3de36df6ee035ab247872e..dfae4799a95a02120f63f9ee1e9c0fd86532202f 100644
--- a/Appl/CEP/CS1/CS1_Interface/configure.in
+++ b/Appl/CEP/CS1/CS1_Interface/configure.in
@@ -55,15 +55,12 @@ lofar_MPI
 
 lofar_INTERNAL(LCS/Common,Common,,1,Common/LofarTypedefs.h,,)
 lofar_INTERNAL(LCS/Transport,Transport,,1,Transport/DataHolder.h,,)
-lofar_INTERNAL(LCS/ACC/APS,APS,,1,APS/ParameterSet.h,,)
-lofar_INTERNAL(CEP/tinyCEP,tinyCEP,,1,tinyCEP/WorkHolder.h,,)
-dnl lofar_EXTERNAL(boost,0,boost/multi_array.hpp,"")
-dnl lofar_EXTERNAL(boost,1.32,boost/date_time/date.hpp, boost_date_time)
+lofar_INTERNAL(LCS/ACC/APS,APS,,0,APS/ParameterSet.h,,)
+lofar_INTERNAL(CEP/tinyCEP,tinyCEP,,0,tinyCEP/TinyDataManager.h,,)
 lofar_EXTERNAL(boost,1,boost/date_time/date.hpp,"")
 
 dnl lofar_BLITZ
 dnl lofar_MPI
-dnl lofar_INTERNAL(LCS/Common,Common,,1,Common/LofarTypedefs.h,,)
 
 dnl
 dnl Output Makefiles
diff --git a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/BGL_Command.h b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/BGL_Command.h
new file mode 100644
index 0000000000000000000000000000000000000000..fb2d2ebd7251583ba3ba2036670ada1bd08448af
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/BGL_Command.h
@@ -0,0 +1,86 @@
+//#  BGL_Command.h:
+//#
+//#  Copyright (C) 2007
+//#  ASTRON (Netherlands Foundation for Research in Astronomy)
+//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
+//#
+//#  This program is free software; you can redistribute it and/or modify
+//#  it under the terms of the GNU General Public License as published by
+//#  the Free Software Foundation; either version 2 of the License, or
+//#  (at your option) any later version.
+//#
+//#  This program is distributed in the hope that it will be useful,
+//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//#  GNU General Public License for more details.
+//#
+//#  You should have received a copy of the GNU General Public License
+//#  along with this program; if not, write to the Free Software
+//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//#
+//#  $Id$
+
+#ifndef LOFAR_CS1_INTERFACE_BGL_COMMAND_H
+#define LOFAR_CS1_INTERFACE_BGL_COMMAND_H
+
+#include <Transport/TransportHolder.h>
+
+
+namespace LOFAR {
+namespace CS1 {
+
+class BGL_Command
+{
+  public:
+    enum Command {
+      PREPROCESS,
+      PROCESS,
+      POSTPROCESS,
+      STOP,
+    };
+    
+		 BGL_Command();
+		 BGL_Command(enum Command);
+
+    enum Command &value();
+
+    void	 read(TransportHolder *);
+    void	 write(TransportHolder *);
+
+  private:
+    struct MarshalledData
+    {
+      enum Command value;
+    } itsMarshalledData;
+};
+
+
+inline BGL_Command::BGL_Command()
+{
+}
+
+inline BGL_Command::BGL_Command(enum Command value)
+{
+  itsMarshalledData.value = value;
+}
+
+inline enum BGL_Command::Command &BGL_Command::value()
+{
+  return itsMarshalledData.value;
+}
+
+inline void BGL_Command::read(TransportHolder *th)
+{
+  th->recvBlocking(&itsMarshalledData, sizeof itsMarshalledData, 1, 0, 0);
+}
+
+inline void BGL_Command::write(TransportHolder *th)
+{
+  th->sendBlocking(&itsMarshalledData, sizeof itsMarshalledData, 1, 0);
+}
+
+
+} // namespace CS1
+} // namespace LOFAR
+
+#endif 
diff --git a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/BGL_Configuration.h b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/BGL_Configuration.h
new file mode 100644
index 0000000000000000000000000000000000000000..345d34be8a7f984c7efef0d576344d30838db541
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/BGL_Configuration.h
@@ -0,0 +1,127 @@
+//# BGL_Configuration.h:
+//#
+//#  Copyright (C) 2007
+//#  ASTRON (Netherlands Foundation for Research in Astronomy)
+//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
+//#
+//#  This program is free software; you can redistribute it and/or modify
+//#  it under the terms of the GNU General Public License as published by
+//#  the Free Software Foundation; either version 2 of the License, or
+//#  (at your option) any later version.
+//#
+//#  This program is distributed in the hope that it will be useful,
+//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//#  GNU General Public License for more details.
+//#
+//#  You should have received a copy of the GNU General Public License
+//#  along with this program; if not, write to the Free Software
+//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//#
+//#  $Id$
+
+#ifndef LOFAR_CS1_INTERFACE_BGL_CONFIGURATION_H
+#define LOFAR_CS1_INTERFACE_BGL_CONFIGURATION_H
+
+#include <Transport/TransportHolder.h>
+
+#include <vector>
+
+
+namespace LOFAR {
+namespace CS1 {
+
+class BGL_Configuration
+{
+  public:
+    unsigned		  &nrStations();
+    unsigned		  &nrSamplesPerIntegration();
+    unsigned		  &nrSamplesToBGLProc();
+    unsigned		  &nrUsedCoresPerPset();
+    unsigned		  &nrSubbandsPerPset();
+    bool		  &delayCompensation();
+    double		  &sampleRate();
+    std::vector<unsigned> &inputPsets(), &outputPsets();
+    std::vector<double>	  &refFreqs();
+
+    void		  read(TransportHolder *);
+    void		  write(TransportHolder *);
+
+    static const unsigned MAX_PSETS    = 64;
+    static const unsigned MAX_SUBBANDS = 54;
+
+  private:
+    std::vector<unsigned> itsInputPsets, itsOutputPsets;
+    std::vector<double>	  itsRefFreqs;
+
+    struct MarshalledData
+    {
+      unsigned		  itsNrStations;
+      unsigned		  itsNrSamplesPerIntegration;
+      unsigned		  itsNrSamplesToBGLProc;
+      unsigned		  itsNrUsedCoresPerPset;
+      unsigned		  itsNrSubbandsPerPset;
+      bool		  itsDelayCompensation;
+      double		  itsSampleRate;
+      unsigned		  itsInputPsetsSize, itsOutputPsetsSize;
+      unsigned		  itsRefFreqsSize;
+      unsigned		  itsInputPsets[MAX_PSETS], itsOutputPsets[MAX_PSETS];
+      double		  itsRefFreqs[MAX_SUBBANDS];
+    } itsMarshalledData;
+};
+
+
+inline unsigned &BGL_Configuration::nrStations()
+{
+  return itsMarshalledData.itsNrStations;
+}
+
+inline unsigned &BGL_Configuration::nrSamplesPerIntegration()
+{
+  return itsMarshalledData.itsNrSamplesPerIntegration;
+}
+
+inline unsigned &BGL_Configuration::nrSamplesToBGLProc()
+{
+  return itsMarshalledData.itsNrSamplesToBGLProc;
+}
+
+inline unsigned &BGL_Configuration::nrUsedCoresPerPset()
+{
+  return itsMarshalledData.itsNrUsedCoresPerPset;
+}
+
+inline unsigned &BGL_Configuration::nrSubbandsPerPset()
+{
+  return itsMarshalledData.itsNrSubbandsPerPset;
+}
+
+inline bool &BGL_Configuration::delayCompensation()
+{
+  return itsMarshalledData.itsDelayCompensation;
+}
+
+inline double &BGL_Configuration::sampleRate()
+{
+  return itsMarshalledData.itsSampleRate;
+}
+
+inline std::vector<unsigned> &BGL_Configuration::inputPsets()
+{
+  return itsInputPsets;
+}
+
+inline std::vector<unsigned> &BGL_Configuration::outputPsets()
+{
+  return itsOutputPsets;
+}
+
+inline std::vector<double> & BGL_Configuration::refFreqs()
+{
+  return itsRefFreqs;
+}
+
+} // namespace CS1
+} // namespace LOFAR
+
+#endif
diff --git a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Stub_RSP.h b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/BGL_Mapping.h
similarity index 53%
rename from Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Stub_RSP.h
rename to Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/BGL_Mapping.h
index bd1cfc0b7e38b559c6e53e2b0c1478648da7191d..edee3bbdfedb26a61dd59c93e6a0ec6ce172c343 100644
--- a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Stub_RSP.h
+++ b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/BGL_Mapping.h
@@ -1,4 +1,4 @@
-//# Stub_RSP.h: Stub for connection of SB filter with outside world
+//#  SparseSet.h: portable <bitset> adaptation
 //#
 //#  Copyright (C) 2006
 //#  ASTRON (Netherlands Foundation for Research in Astronomy)
@@ -20,35 +20,29 @@
 //#
 //#  $Id$
 
-#ifndef LOFAR_CS1_INTERFACE_STUB_RSP_H
-#define LOFAR_CS1_INTERFACE_STUB_RSP_H
 
-#include <CS1_Interface/CS1_Parset.h>
+#ifndef LOFAR_APPL_CEP_CS1_CS1_INTERFACE_BGL_MAPPING_H
+#define LOFAR_APPL_CEP_CS1_CS1_INTERFACE_BGL_MAPPING_H
 
-namespace LOFAR 
-{
-  namespace CS1
-  {
-
-    class Stub_RSP
-    {
-    public:
-      // Create the stub. Get its parameters from the given file name.
-      explicit Stub_RSP (bool onServer, const CS1_Parset *ps);
-
-      ~Stub_RSP();
-
-      // Connect the given objects to the stubs.
-      void connect ();
 
-    private:
-      bool			       itsStubOnServer;
-      const CS1_Parset                *itsCS1PS;
-    };
-
-  } // namespace CS1
+namespace LOFAR {
+namespace CS1 {
 
+class BGL_Mapping
+{
+  public:
+    // Reshuffle cores within different psets differently, to make the transpose
+    // over the 3D-torus much more efficient.  Without reshuffling, transposing
+    // cores often communicate in the same line or plane in the torus, causing
+    // severe bottlenecks over a few links.  With reshuffling, there are more
+    // redundant links, significantly improving the bandwidth.  TODO: improve
+    // the reshuffling function further, to minimize transpose times.
+
+    static unsigned mapCoreOnPset(unsigned core, unsigned pset);
+    static unsigned reverseMapCoreOnPset(unsigned core, unsigned pset);
+};
+
+} // namespace CS1
 } // namespace LOFAR
 
 #endif
-
diff --git a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/CS1_Parset.h b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/CS1_Parset.h
index bc132934280f6ca2e9bebef7431fa9c9edbd2711..1652659f5c3eb62920bdf8b2ae975049fc1410bc 100644
--- a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/CS1_Parset.h
+++ b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/CS1_Parset.h
@@ -23,20 +23,21 @@
 #ifndef LOFAR_CS1_INTERFACE_CS1_PARSET_H
 #define LOFAR_CS1_INTERFACE_CS1_PARSET_H
 
+#if defined HAVE_APS
+
 // \file
 // class/struct that holds the CS1_Parset information
 
 //# Never #include <config.h> or #include <lofar_config.h> in a header file!
 
 //# Includes
-#include <bitset>
-#include <set>
 #include <APS/ParameterSet.h>
 #include <Common/StreamUtil.h>
 #include <Common/lofar_datetime.h>
-#include <boost/date_time/c_local_time_adjustor.hpp>
-
 #include <Common/LofarLogger.h> 
+#include <CS1_Interface/CS1_Config.h>
+
+#include <boost/date_time/c_local_time_adjustor.hpp>
 
 namespace LOFAR {
 namespace CS1 {
@@ -72,17 +73,18 @@ public:
 	double         IONintegrationTime() const;
 	double         storageIntegrationTime() const;
 	uint32         nrSubbandSamples() const;
-	uint32         nrSamplesToBGLProc() const;
-        uint32         nrSubbandsPerCell() const; 
+        uint32         nrSubbandsPerPset() const; 
 	uint32         nrHistorySamples() const;
+	uint32         nrSamplesToBGLProc() const;
 	uint32         nrSamplesToBuffer() const;
-	int            subbandsToReadFromFrame() const;
-	uint32         nrPFFTaps() const;
+	uint32         nrRSPboards() const;
+	uint32         nrRSPboardsPerStation() const;
+	uint32         subbandsToReadFromFrame() const;
+	uint32         nrPPFTaps() const;
 	uint32         nrChannelsPerSubband() const;
 	uint32         nrSubbands() const;
-	uint32         nrCells() const;
-	double         timeInterval() const;
-	uint32         nrBGLNodesPerCell() const;
+	uint32         nrPsets() const;
+	uint32         nrCoresPerPset() const;
 	vector<double> refFreqs() const;
 	double         chanWidth() const;
 	vector<string> delay_Ports() const;
@@ -92,8 +94,13 @@ public:
 	string         expandedArrayString(const string& orgStr) const;
 	bool	       useScatter() const;
 	bool	       useGather() const;
+	uint32	       nrPsetsPerStorage() const;
 	uint32	       nrOutputsPerInputNode() const;
 	uint32	       nrInputsPerStorageNode() const;
+	vector<uint32> inputPsets() const;
+	vector<uint32> outputPsets() const;
+	int	       inputPsetIndex(uint32 pset) const;
+	int	       outputPsetIndex(uint32 pset) const;
 	string	       getMSname(unsigned firstSB, unsigned lastSB) const;
 	
 	//# Datamembers
@@ -102,7 +109,8 @@ public:
 	
 private:
 	void           addPosition(string stName);
-	double         getTime(const char *name) const;
+	double	       getTime(const char *name) const;
+	static int     findIndex(uint32 pset, const vector<uint32> &psets);
 };
 
 // @}
@@ -179,32 +187,42 @@ inline uint32 CS1_Parset::nrSubbandSamples() const
   return BGLintegrationSteps() * nrChannelsPerSubband();
 }
 
-inline uint32 CS1_Parset::nrSamplesToBGLProc() const
+inline uint32 CS1_Parset::nrHistorySamples() const
 {
-  return nrSubbandSamples() + ((nrPFFTaps() - 1) * nrChannelsPerSubband());
+  return (nrPPFTaps() - 1) * nrChannelsPerSubband();
 }
 
-inline uint32 CS1_Parset::nrHistorySamples() const
+inline uint32 CS1_Parset::nrSamplesToBGLProc() const
 {
-  return (nrPFFTaps() - 1) * nrChannelsPerSubband();
+  return nrSubbandSamples() + nrHistorySamples() + 32 / sizeof(INPUT_SAMPLE_TYPE[NR_POLARIZATIONS]);
 }
 
 inline uint32 CS1_Parset::nrSamplesToBuffer() const
 {
-  return getUint32("OLAP.nrSecondsOfBuffer") * nrSubbandSamples();
+  return (uint32) (getDouble("OLAP.nrSecondsOfBuffer") * sampleRate()) & ~(32 / sizeof(INPUT_SAMPLE_TYPE[NR_POLARIZATIONS]) - 1);
+}
+
+inline uint32 CS1_Parset::nrRSPboards() const
+{
+  return getUint32("OLAP.nrRSPboards");
 }
 
-inline int CS1_Parset::subbandsToReadFromFrame() const
+inline uint32 CS1_Parset::nrRSPboardsPerStation() const
 {
-  return nrSubbands() * nrStations() / getInt32("OLAP.nrRSPboards");
+  return nrRSPboards() / nrStations();
 }
 
-inline uint32 CS1_Parset::nrSubbandsPerCell() const
+inline uint32 CS1_Parset::subbandsToReadFromFrame() const
 {
-  return getUint32("OLAP.subbandsPerPset") * getUint32("OLAP.BGLProc.psetsPerCell");
+  return nrSubbands() * nrStations() / nrRSPboards();
 }
 
-inline uint32 CS1_Parset::nrPFFTaps() const
+inline uint32 CS1_Parset::nrSubbandsPerPset() const
+{
+  return getUint32("OLAP.subbandsPerPset");
+}
+
+inline uint32 CS1_Parset::nrPPFTaps() const
 {
   return getUint32("OLAP.BGLProc.nrPPFTaps");
 }
@@ -219,19 +237,14 @@ inline uint32 CS1_Parset::nrSubbands() const
   return getUint32Vector("Observation.subbandList").size();
 }
   
-inline uint32 CS1_Parset::nrCells() const
+inline uint32 CS1_Parset::nrPsets() const
 {
-  return nrSubbands() / nrSubbandsPerCell();
+  return nrSubbands() / nrSubbandsPerPset();
 }
 
-inline double CS1_Parset::timeInterval() const
+inline uint32 CS1_Parset::nrCoresPerPset() const
 {
-  return nrSubbandSamples() / sampleRate() / nrSubbandsPerCell();
-}
-
-inline uint32 CS1_Parset::nrBGLNodesPerCell() const
-{
-  return getUint32("OLAP.BGLProc.nodesPerPset") * getUint32("OLAP.BGLProc.psetsPerCell");
+  return getUint32("OLAP.BGLProc.coresPerPset");
 }  
  
 inline double CS1_Parset::chanWidth() const
@@ -249,17 +262,43 @@ inline bool CS1_Parset::useGather() const
   return getBool("OLAP.IONProc.useGather");
 }
 
+inline uint32 CS1_Parset::nrPsetsPerStorage() const
+{
+  return getUint32("OLAP.psetsPerStorage");
+}
+
 inline uint32 CS1_Parset::nrOutputsPerInputNode() const
 {
-  return useScatter() ? getUint32("OLAP.BGLProc.psetsPerCell") : nrBGLNodesPerCell();
+  return useScatter() ? 1 : nrCoresPerPset();
 }
 
 inline uint32 CS1_Parset::nrInputsPerStorageNode() const
 {
-  return (useGather() ? getUint32("OLAP.BGLProc.psetsPerCell") : nrBGLNodesPerCell()) * getUint32("OLAP.psetsPerStorage");
+  return (useGather() ? 1 : nrCoresPerPset()) * nrPsetsPerStorage();
+}
+
+inline vector<uint32> CS1_Parset::inputPsets() const
+{
+  return getUint32Vector("OLAP.BGLProc.inputPsets");
+}
+
+inline vector<uint32> CS1_Parset::outputPsets() const
+{
+  return getUint32Vector("OLAP.BGLProc.outputPsets");
+}
+
+inline int CS1_Parset::inputPsetIndex(uint32 pset) const
+{
+  return findIndex(pset, inputPsets());
+}
+
+inline int CS1_Parset::outputPsetIndex(uint32 pset) const
+{
+  return findIndex(pset, outputPsets());
 }
 
 } // namespace CS1
 } // namespace LOFAR
 
+#endif // defined HAVE_APS
 #endif
diff --git a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/DH_RFI_Mitigation.h b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/DH_RFI_Mitigation.h
deleted file mode 100644
index 5072c0a491c58bdf37a426695d7dc7ac8b1d2f83..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/DH_RFI_Mitigation.h
+++ /dev/null
@@ -1,82 +0,0 @@
-//# DH_RFI_Mitigation.h: RFI_Mitigation DataHolder
-//#
-//#  Copyright (C) 2006
-//#  ASTRON (Netherlands Foundation for Research in Astronomy)
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-#if 0 // currently unused
-#ifndef LOFAR_CS1_INTERFACE_DH_RFI_MITIGATION_H
-#define LOFAR_CS1_INTERFACE_DH_RFI_MITIGATION_H
-
-#include <CS1_Interface/CS1_Config.h>
-#include <CS1_Interface/bitset.h>
-#include <Transport/DataHolder.h>
-
-namespace LOFAR {
-namespace CS1 {
-
-class DH_RFI_Mitigation: public DataHolder
-{
-  // this class needs additional accessor functions to be useful
-  public:
-#if defined BGL_PROCESSING
-    typedef bitset<NR_SUBBAND_CHANNELS> ChannelFlagsType[NR_STATIONS];
-
-    explicit DH_RFI_Mitigation(const string& name);
-
-    DH_RFI_Mitigation(const DH_RFI_Mitigation&);
-
-    virtual ~DH_RFI_Mitigation();
-
-    DataHolder *clone() const;
-
-    virtual void init();
-
-    ChannelFlagsType *getChannelFlags()
-    {
-      return itsChannelFlags;
-    }
-
-    const ChannelFlagsType *getChannelFlags() const
-    {
-      return itsChannelFlags;
-    }
-
-    const size_t nrChannelFlags() const
-    {
-      return NR_STATIONS * NR_SUBBAND_CHANNELS;
-    }
-#endif
-
-  private:
-    /// Forbid assignment.
-    DH_RFI_Mitigation &operator = (const DH_RFI_Mitigation&);
-
-#if defined BGL_PROCESSING
-    ChannelFlagsType *itsChannelFlags;
-#endif
-
-    void fillDataPointers();
-  };
-
-} // namespace CS1
-} // namespace LOFAR
-
-#endif /* BGL_PROCESSING */
-#endif
diff --git a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/DH_RSP.h b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/DH_RSP.h
deleted file mode 100644
index 9b1e860697ddf380ef9445795cb55dba2c194592..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/DH_RSP.h
+++ /dev/null
@@ -1,148 +0,0 @@
-//# DH_RSP.h: DataHolder storing RSP raw ethernet frames for 
-//#           StationCorrelator demo
-//#
-//#  Copyright (C) 2006
-//#  ASTRON (Netherlands Foundation for Research in Astronomy)
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-#ifndef LOFAR_CS1_INTERFACE_DH_RSP_H
-#define LOFAR_CS1_INTERFACE_DH_RSP_H
-
-#include <Transport/DataHolder.h>
-#include <CS1_Interface/CS1_Config.h>
-#include <CS1_Interface/RSPTimeStamp.h>
-#include <CS1_Interface/SparseSet.h>
-#include <CS1_Interface/CS1_Parset.h>
-
-#if defined HAVE_BOOST
-#include <boost/multi_array.hpp>
-#endif
-
-namespace LOFAR
-{
-  namespace CS1
-  {
-
-    class DH_RSP: public DataHolder
-    {
-    public:
-      typedef INPUT_SAMPLE_TYPE BufferType;
-
-      explicit DH_RSP (const string &name,
-                       const CS1_Parset *pSet);
-
-      DH_RSP(const DH_RSP&);
-
-      virtual ~DH_RSP();
-
-      DataHolder* clone() const;
-
-      /// Allocate the buffers.
-      virtual void init();
-
-      /// Accessor functions
-      const int getStationID() const;
-      void setStationID(int);
-      const timestamp_t getTimeStamp() const;
-      void setTimeStamp(timestamp_t);
-      float getFineDelayAtBegin() const;
-      void  setFineDelayAtBegin(float delay);
-      float getFineDelayAfterEnd() const;
-      void  setFineDelayAfterEnd(float delay);
-
-#if defined HAVE_BOOST
-     typedef boost::multi_array_ref<BufferType, 3> SamplesType;
-
-     SamplesType getSamples() const
-     {
-       static boost::detail::multi_array::extent_gen<3u> extents = boost::extents[itsNSubbands][itsNTimes][NR_POLARIZATIONS];
-       return SamplesType(itsBuffer, extents);
-     }
-#endif
-
-      /// Reset the buffer
-      void resetBuffer();
-
-      SparseSet<unsigned> &getFlags();
-      const SparseSet<unsigned> &getFlags() const;
-
-      /// (un)marshall flags into/from blob
-      void getExtraData(), fillExtraData();
-
-    private:
-      /// Forbid assignment.
-      DH_RSP& operator= (const DH_RSP&);
-
-      // Fill the pointers (itsBuffer) to the data in the blob.
-      virtual void fillDataPointers();
-
-      /// pointers to data in the blob
-      const CS1_Parset *itsCS1PS;
-      
-      BufferType*  itsBuffer;
-      SparseSet<unsigned> *itsFlags;
-      int* itsStationID;
-      float* itsDelays;
-      timestamp_t* itsTimeStamp;
-
-      int itsNTimes;
-      int itsNoPolarisations;
-      int itsNSubbands;
-      unsigned int itsBufSize;
-
-    };
-
-    inline const int DH_RSP::getStationID() const
-    { return *itsStationID; }
-
-    inline void DH_RSP::setStationID(int id)
-    { *itsStationID = id; }
-
-    inline const timestamp_t DH_RSP::getTimeStamp() const
-    { return *itsTimeStamp; }
-
-    inline void DH_RSP::setTimeStamp(timestamp_t timestamp)
-    { *itsTimeStamp = timestamp; }
-
-    inline void DH_RSP::resetBuffer()
-    { memset(itsBuffer, 0, itsBufSize*sizeof(BufferType)); }
-
-    inline SparseSet<unsigned> &DH_RSP::getFlags()
-    { return *itsFlags; }
-
-    inline const SparseSet<unsigned> &DH_RSP::getFlags() const
-    { return *itsFlags; }
-
-    inline float DH_RSP::getFineDelayAtBegin() const
-    { return itsDelays[0]; }
-
-    inline void DH_RSP::setFineDelayAtBegin(float delay)
-    { itsDelays[0] = delay; }
-
-    inline float DH_RSP::getFineDelayAfterEnd() const
-    { return itsDelays[1]; }
-    
-    inline void DH_RSP::setFineDelayAfterEnd(float delay)
-    { itsDelays[1] = delay; }
-
-  } // namespace CS1
-
-} // namespace LOFAR
-
-#endif 
diff --git a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/DH_RSPSync.h b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/DH_RSPSync.h
deleted file mode 100644
index 711697db2254a5b62b1824e33067e5d8c8936d9a..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/DH_RSPSync.h
+++ /dev/null
@@ -1,79 +0,0 @@
-//# DH_RSPSync.h: DataHolder used to synchronize incoming RSP data
-//#
-//#  Copyright (C) 2006
-//#  ASTRON (Netherlands Foundation for Research in Astronomy)
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-#ifndef LOFAR_CS1_INTERFACE_DH_RSPSYNC_H
-#define LOFAR_CS1_INTERFACE_DH_RSPSYNC_H
-
-
-#include <Transport/DataHolder.h>
-#include <CS1_Interface/RSPTimeStamp.h>
-
-namespace LOFAR
-{
-  namespace CS1
-  {
-
-    class DH_RSPSync: public DataHolder
-    {
-    public:
-
-      explicit DH_RSPSync (const string& name);
-
-      DH_RSPSync(const DH_RSPSync&);
-
-      virtual ~DH_RSPSync();
-
-      DataHolder* clone() const;
-
-      /// Allocate the buffers.
-      virtual void init();
-
-      /// Set the sync stamp
-      void setSyncStamp(const timestamp_t syncStamp);
-
-      /// Get the sync stamp
-      const timestamp_t getSyncStamp() const;
-      void incrementStamp(const int value);
-
-    private:
-      /// Forbid assignment.
-      DH_RSPSync& operator= (const DH_RSPSync&);
-
-      timestamp_t*  itsSyncStamp;
-
-      void fillDataPointers();
-    };
-
-    inline void DH_RSPSync::setSyncStamp(const timestamp_t syncStamp)
-    { *itsSyncStamp = syncStamp; }
- 
-    inline const timestamp_t DH_RSPSync::getSyncStamp() const
-    { return *itsSyncStamp;}
-
-    inline void DH_RSPSync::incrementStamp(const int value)
-    { *itsSyncStamp += value;}
-
-  } // namespace CS1
-
-} // namespace LOFAR
-
-#endif 
diff --git a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/DH_Subband.h b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/DH_Subband.h
deleted file mode 100644
index 628dfd44b4b4670ef8f59e08459285c75336edd7..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/DH_Subband.h
+++ /dev/null
@@ -1,148 +0,0 @@
-//# DH_Subband.h: DataHolder for subband samples and flags
-//#
-//#  Copyright (C) 2006
-//#  ASTRON (Netherlands Foundation for Research in Astronomy)
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-#ifndef LOFAR_CS1_INTERFACE_DH_SUBBAND_H
-#define LOFAR_CS1_INTERFACE_DH_SUBBAND_H
-
-#include <CS1_Interface/CS1_Config.h>
-#include <CS1_Interface/SparseSet.h>
-#include <Transport/DataHolder.h>
-#include <Common/lofar_complex.h>
-#include <CS1_Interface/CS1_Parset.h>
-
-#if defined HAVE_BOOST
-#include <boost/multi_array.hpp>
-#endif
-
-namespace LOFAR {
-namespace CS1 {
-
-class DH_Subband: public DataHolder
-{
-  public:
-    // samples are ALWAYS stored in little endian format !
-
-    typedef INPUT_SAMPLE_TYPE SampleType;
-
-    // Fine-grained delays
-    typedef struct {
-      float delayAtBegin, delayAfterEnd;
-    } DelayIntervalType;
-
-    explicit DH_Subband(const string &name,
-			const CS1_Parset *pSet); 
-
-    DH_Subband(const DH_Subband &);
-
-    virtual ~DH_Subband();
-
-    DataHolder *clone() const;
-
-    virtual void init();
-
-    SampleType &getSample(unsigned station, unsigned time, unsigned pol)
-    {
-      return itsSamples[NR_POLARIZATIONS * (itsNrInputSamples * station + time) + pol];
-    }
-
-    size_t nrSamples() const
-    {
-      return itsNrStations * itsNrInputSamples * NR_POLARIZATIONS;
-    }
-
-    size_t nrInputSamples() const
-    {
-      return itsNrInputSamples;
-    }
-
-    DelayIntervalType &getDelay(unsigned station)
-    {
-      return itsDelays[station];
-    }
-
-    SparseSet<unsigned> &getFlags(unsigned station)
-    {
-      return itsFlags[station];
-    }
-
-    const SparseSet<unsigned> &getFlags(unsigned station) const
-    {
-      return itsFlags[station];
-    }
-
-    size_t nrDelays() const
-    {
-      return itsNrStations;
-    }
-
-#if defined HAVE_BOOST
-    typedef boost::multi_array_ref<SampleType, 3>	   Samples3Dtype;
-    typedef boost::multi_array_ref<SampleType, 4>	   Samples4Dtype;
-    typedef boost::multi_array_ref<DelayIntervalType, 1>   DelaysType;
-    typedef boost::multi_array_ref<SparseSet<unsigned>, 1> FlagsType;
-
-    Samples3Dtype getSamples3D() const
-    {
-      static boost::detail::multi_array::extent_gen<3u> extents = boost::extents[itsNrStations][itsNrInputSamples][NR_POLARIZATIONS];
-      return Samples3Dtype(itsSamples, extents);
-    }
-
-    Samples4Dtype getSamples4D() const
-    {
-      static boost::detail::multi_array::extent_gen<4u> extents = boost::extents[itsNrStations][itsNrInputSamples / NR_SUBBAND_CHANNELS][NR_SUBBAND_CHANNELS][NR_POLARIZATIONS];
-      return Samples4Dtype(itsSamples, extents);
-    }
-
-    DelaysType getDelays() const
-    {
-      static boost::detail::multi_array::extent_gen<1u> extents = boost::extents[itsNrStations];
-      return DelaysType(itsDelays, extents);
-    }
-
-    FlagsType getFlags()
-    {
-      static boost::detail::multi_array::extent_gen<1u> extents = boost::extents[itsNrStations];
-      return FlagsType(itsFlags, extents);
-    }
-#endif
-
-    void		   swapBytes();
-    void		   getExtraData(), fillExtraData();
-
-  private:
-    /// Forbid assignment.
-    DH_Subband &operator = (const DH_Subband &);
-
-    const CS1_Parset       *itsCS1PS;
-    unsigned		   itsNrStations, itsNrInputSamples;
-
-    SampleType		   *itsSamples;
-    SparseSet<unsigned>	   *itsFlags;
-    DelayIntervalType	   *itsDelays;
-
-    void fillDataPointers();
-};
-
-} // namespace CS1
-} // namespace LOFAR
-
-#endif 
diff --git a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/DH_Visibilities.h b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/DH_Visibilities.h
index 2537a553a5f81725aba9521b7799435be937723f..90a8778acd533b260aba73fb197ae27e84da0228 100644
--- a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/DH_Visibilities.h
+++ b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/DH_Visibilities.h
@@ -23,6 +23,8 @@
 #ifndef LOFAR_CS1_INTERFACE_DH_VISIBILITIES_H
 #define LOFAR_CS1_INTERFACE_DH_VISIBILITIES_H
 
+#if defined HAVE_APS
+
 #include <Transport/DataHolder.h>
 #include <Common/lofar_complex.h>
 #include <CS1_Interface/CS1_Config.h>
@@ -40,6 +42,7 @@ class DH_Visibilities: public DataHolder
   public:
     typedef fcomplex	   VisibilityType;
     typedef unsigned short NrValidSamplesType;
+    typedef float	   CentroidType;
 
     explicit DH_Visibilities(const string& name,
 			     const CS1_Parset *pSet);
@@ -59,7 +62,6 @@ class DH_Visibilities: public DataHolder
       return station2 * (station2 + 1) / 2 + station1;
     }
 
-#if defined HAVE_BOOST
     typedef boost::multi_array_ref<VisibilityType, 4>	  VisibilitiesType;
     typedef boost::multi_array_ref<NrValidSamplesType, 2> AllNrValidSamplesType;
 
@@ -74,7 +76,6 @@ class DH_Visibilities: public DataHolder
       static boost::detail::multi_array::extent_gen<2u> extents = boost::extents[itsNrBaselines][itsNrChannels];
       return AllNrValidSamplesType(itsNrValidSamples, extents);
     }
-#endif
 
     VisibilityType &getVisibility(unsigned baseline, unsigned channel, unsigned pol1, unsigned pol2)
     {
@@ -91,17 +92,23 @@ class DH_Visibilities: public DataHolder
       return itsNrBaselines * itsNrChannels * NR_POLARIZATIONS * NR_POLARIZATIONS;
     }
 
+    CentroidType *getCentroids()
+    {
+      return itsCentroids;
+    }
+
     DH_Visibilities &operator += (const DH_Visibilities &);
 
   private:
     /// Forbid assignment.
     DH_Visibilities& operator= (const DH_Visibilities&);
 
-    const CS1_Parset  *itsCS1PS;
+    const CS1_Parset   *itsCS1PS;
     unsigned	       itsNrBaselines, itsNrChannels;
 
     VisibilityType     *itsVisibilities;
     NrValidSamplesType *itsNrValidSamples;
+    CentroidType       *itsCentroids;
 
     void fillDataPointers();
 };
@@ -109,4 +116,5 @@ class DH_Visibilities: public DataHolder
 } // namespace CS1
 } // namespace LOFAR
 
+#endif // defined HAVE_APS
 #endif 
diff --git a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/ION_to_CN.h b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/ION_to_CN.h
new file mode 100644
index 0000000000000000000000000000000000000000..5cec3b46587c161dfc6c16795a5dd4cec02ba400
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/ION_to_CN.h
@@ -0,0 +1,93 @@
+//# ION_to_CN.h:
+//#
+//#  Copyright (C) 2007
+//#  ASTRON (Netherlands Foundation for Research in Astronomy)
+//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
+//#
+//#  This program is free software; you can redistribute it and/or modify
+//#  it under the terms of the GNU General Public License as published by
+//#  the Free Software Foundation; either version 2 of the License, or
+//#  (at your option) any later version.
+//#
+//#  This program is distributed in the hope that it will be useful,
+//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//#  GNU General Public License for more details.
+//#
+//#  You should have received a copy of the GNU General Public License
+//#  along with this program; if not, write to the Free Software
+//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//#
+//#  $Id$
+
+#ifndef LOFAR_CS1_INTERFACE_ION_TO_CN_H
+#define LOFAR_CS1_INTERFACE_ION_TO_CN_H
+
+#include <CS1_Interface/SparseSet.h>
+#include <Transport/TransportHolder.h>
+
+#include <cassert>
+
+
+namespace LOFAR {
+namespace CS1 {
+
+class ION_to_CN
+{
+  public:
+    SparseSet<unsigned> &flags();
+    double		&delayAtBegin(), &delayAfterEnd();
+    unsigned		&alignmentShift();
+
+    void		read(TransportHolder *);
+    void		write(TransportHolder *);
+
+  private:
+    SparseSet<unsigned>	itsFlags;
+
+    struct MarshalledData
+    {
+      double		delayAtBegin, delayAfterEnd;
+      unsigned		alignmentShift;
+      unsigned char	flagsBuffer[132];
+    } itsMarshalledData;
+};
+
+inline SparseSet<unsigned> &ION_to_CN::flags()
+{
+  return itsFlags;
+}
+
+inline double &ION_to_CN::delayAtBegin()
+{
+  return itsMarshalledData.delayAtBegin;
+}
+
+inline double &ION_to_CN::delayAfterEnd()
+{
+  return itsMarshalledData.delayAfterEnd;
+}
+
+inline unsigned &ION_to_CN::alignmentShift()
+{
+  return itsMarshalledData.alignmentShift;
+}
+
+inline void ION_to_CN::read(TransportHolder *th)
+{
+  th->recvBlocking(&itsMarshalledData, sizeof itsMarshalledData, 1, 0, 0);
+  itsFlags.unmarshall(itsMarshalledData.flagsBuffer);
+}
+
+inline void ION_to_CN::write(TransportHolder *th)
+{
+  assert(itsFlags.marshall(&itsMarshalledData.flagsBuffer, sizeof itsMarshalledData.flagsBuffer) >= 0);
+  th->sendBlocking(&itsMarshalledData, sizeof itsMarshalledData, 1, 0);
+}
+
+
+
+} // namespace CS1
+} // namespace LOFAR
+
+#endif 
diff --git a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Makefile.am b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Makefile.am
index cb008bfa2036776c72e2e01abf8d98d74133c739..5d6311cc72077bed1c025bfe63200c785ed2b912 100644
--- a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Makefile.am
+++ b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Makefile.am
@@ -1,17 +1,15 @@
 pkginclude_HEADERS = \
+	BGL_Command.h \
+	BGL_Configuration.h \
+	BGL_Mapping.h \
 	CS1_Config.h \
-	bitset.h \
+	CS1_Parset.h \
 	DH_Delay.h \
-	DH_RFI_Mitigation.h \
-	DH_RSP.h \
-	DH_RSPSync.h \
-	DH_Subband.h \
 	DH_Visibilities.h \
+	ION_to_CN.h \
 	RSPTimeStamp.h \
 	SparseSet.h \
 	Stub_BGL.h \
-	Stub_Delay.h \
-	CS1_Parset.h \
-	Stub_RSP.h
+	Stub_Delay.h
 
 include $(top_srcdir)/Makefile.common
diff --git a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Stub_BGL.h b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Stub_BGL.h
index 4fa01e4216cdebdd3aeb715038f5b4241b4bcfb7..cb04f50523a9a546cc3400f22d31ae4be5e5d150 100644
--- a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Stub_BGL.h
+++ b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Stub_BGL.h
@@ -23,6 +23,8 @@
 #ifndef LOFAR_CS1_INTERFACE_STUB_BGL_H
 #define LOFAR_CS1_INTERFACE_STUB_BGL_H
 
+#if defined HAVE_TINYCEP && defined HAVE_APS
+
 #include <tinyCEP/TinyDataManager.h>
 #include <Transport/Connection.h>
 #include <Transport/TransportHolder.h>
@@ -44,7 +46,7 @@ class Stub_BGL
     Stub_BGL(bool iAmOnBGL, bool isInput, const char *connectionName, const CS1_Parset *pSet);
     ~Stub_BGL();
 
-    void connect(unsigned cellNr, unsigned nodeNr, TinyDataManager &dm, unsigned channel);
+    void connect(unsigned psetNr, unsigned coreNr, TinyDataManager &dm, unsigned channel);
 
   private:
     const CS1_Parset				     *itsCS1PS;
@@ -57,4 +59,5 @@ class Stub_BGL
 } // namespace CS1
 } // namespace LOFAR
 
+#endif // defined HAVE_TINYCEP && defined HAVE_APS
 #endif
diff --git a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Stub_Delay.h b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Stub_Delay.h
index 86c66b4d82b1e5b5f240406e5cb4c0fec83c2f7b..3015dedd8650c54f461a2eaf73fbd1d9c4bf33a6 100644
--- a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Stub_Delay.h
+++ b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Stub_Delay.h
@@ -23,6 +23,8 @@
 #ifndef LOFAR_CS1_INTERFACE_STUB_DELAY_H
 #define LOFAR_CS1_INTERFACE_STUB_DELAY_H
 
+#if defined HAVE_TINYCEP && defined HAVE_APS
+
 #include <tinyCEP/TinyDataManager.h>
 #include <Common/lofar_vector.h>
 #include <CS1_Interface/CS1_Parset.h>
@@ -66,4 +68,5 @@ namespace LOFAR
 
 } // namespace LOFAR
 
+#endif // defined HAVE_TINYCEP && defined HAVE_APS
 #endif
diff --git a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/bitset.h b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/bitset.h
deleted file mode 100644
index 20de3bb270ff1ee1ef9e1e423fa70b214cad8021..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/bitset.h
+++ /dev/null
@@ -1,1295 +0,0 @@
-//#  bitset.h: portable <bitset> adaptation
-//#
-//#  Copyright (C) 2006
-//#  ASTRON (Netherlands Foundation for Research in Astronomy)
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-// Below is a slightly adapted version of GNU's std::bitset implementation.
-// std::bitset is not suitable for communication between different types of
-// machines, since std::bitset uses "unsigned long" as building block, which
-// may have different sizes on different machines.
-// BlobField, which is resposible for byte-swapping data are communicated
-// between machines with different endiannesses, cannot handle different sizes.
-// Also, a 77-bit bitset requires 12 bytes on a 32-bit machine and 16 bytes on
-// a 64-bit machine, making the communication problems even worse.
-// This implementation uses "uint32" rather than "unsinged long".
-
-
-
-
-
-// <bitset> -*- C++ -*-
-
-// Copyright (C) 2001, 2002 Free Software Foundation, Inc.
-//
-// This file is part of the GNU ISO C++ Library.  This library is free
-// software; you can redistribute it and/or modify it under the
-// terms of the GNU General Public License as published by the
-// Free Software Foundation; either version 2, or (at your option)
-// any later version.
-
-// This library is distributed in the hope that it will be useful,
-// but WITHOUT ANY WARRANTY; without even the implied warranty of
-// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-// GNU General Public License for more details.
-
-// You should have received a copy of the GNU General Public License along
-// with this library; see the file COPYING.  If not, write to the Free
-// Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307,
-// USA.
-
-// As a special exception, you may use this file as part of a free software
-// library without restriction.  Specifically, if other files instantiate
-// templates or use macros or inline functions from this file, or you compile
-// this file and link it with other files to produce an executable, this
-// file does not by itself cause the resulting executable to be covered by
-// the GNU General Public License.  This exception does not however
-// invalidate any other reasons why the executable file might be covered by
-// the GNU General Public License.
-
-/*
- * Copyright (c) 1998
- * Silicon Graphics Computer Systems, Inc.
- *
- * Permission to use, copy, modify, distribute and sell this software
- * and its documentation for any purpose is hereby granted without fee,
- * provided that the above copyright notice appear in all copies and
- * that both that copyright notice and this permission notice appear
- * in supporting documentation.  Silicon Graphics makes no
- * representations about the suitability of this software for any
- * purpose.  It is provided "as is" without express or implied warranty.
- */
-
-/** @file bitset
- *  This is a Standard C++ Library header.  You should @c #include this header
- *  in your programs, rather than any of the "st[dl]_*.h" implementation files.
- */
-
-#ifndef _LOFAR_BITSET_H
-#define _LOFAR_BITSET_H
-
-//#pragma GCC system_header
-
-#include <Common/LofarTypes.h>	// for uint32
-#include <cstddef>     // for size_t
-#include <cstring>     // for memset
-#include <string>
-#include <bits/functexcept.h>   // for invalid_argument, out_of_range,
-                                // overflow_error
-#include <ostream>     // for ostream (operator<<)
-#include <istream>     // for istream (operator>>)
-
-
-#define _LOFAR_BITSET_BITS_PER_WORD (CHAR_BIT*sizeof(uint32))
-#define _LOFAR_BITSET_WORDS(__n) \
- ((__n) < 1 ? 0 : ((__n) + _LOFAR_BITSET_BITS_PER_WORD - 1)/_LOFAR_BITSET_BITS_PER_WORD)
-
-namespace LOFAR
-{
-  namespace CS1
-  {
-    using namespace std;
-
-    extern const unsigned char _S_bit_count[256];
-    extern const unsigned char _S_first_one[256];
-    /**
-     *  @if maint
-     *  Base class, general case.  It is a class inveriant that _Nw will be
-     *  nonnegative.
-     *
-     *  See documentation for bitset.
-     *  @endif
-     */
-    template<size_t _Nw>
-    struct _Base_bitset
-    {
-      typedef uint32 _WordT;
-
-      /// 0 is the least significant word.
-      _WordT 		_M_w[_Nw];
-
-      _Base_bitset() { _M_do_reset(); }
-      _Base_bitset(uint32 __val)
-      {
-	_M_do_reset();
-	_M_w[0] = __val;
-      }
-
-      static size_t
-      _S_whichword(size_t __pos )
-      { return __pos / _LOFAR_BITSET_BITS_PER_WORD; }
-
-      static size_t
-      _S_whichbyte(size_t __pos )
-      { return (__pos % _LOFAR_BITSET_BITS_PER_WORD) / CHAR_BIT; }
-
-      static size_t
-      _S_whichbit(size_t __pos )
-      { return __pos % _LOFAR_BITSET_BITS_PER_WORD; }
-
-      static _WordT
-      _S_maskbit(size_t __pos )
-      { return (static_cast<_WordT>(1)) << _S_whichbit(__pos); }
-
-      _WordT&
-      _M_getword(size_t __pos)
-      { return _M_w[_S_whichword(__pos)]; }
-
-      _WordT
-      _M_getword(size_t __pos) const
-      { return _M_w[_S_whichword(__pos)]; }
-
-      _WordT&
-      _M_hiword() { return _M_w[_Nw - 1]; }
-
-      _WordT
-      _M_hiword() const { return _M_w[_Nw - 1]; }
-
-      void
-      _M_do_and(const _Base_bitset<_Nw>& __x)
-      {
-	for (size_t __i = 0; __i < _Nw; __i++)
-	  _M_w[__i] &= __x._M_w[__i];
-      }
-
-      void
-      _M_do_or(const _Base_bitset<_Nw>& __x)
-      {
-	for (size_t __i = 0; __i < _Nw; __i++)
-	  _M_w[__i] |= __x._M_w[__i];
-      }
-
-      void
-      _M_do_xor(const _Base_bitset<_Nw>& __x)
-      {
-	for (size_t __i = 0; __i < _Nw; __i++)
-	  _M_w[__i] ^= __x._M_w[__i];
-      }
-
-      void
-      _M_do_left_shift(size_t __shift);
-
-      void
-      _M_do_right_shift(size_t __shift);
-
-      void
-      _M_do_flip()
-      {
-	for (size_t __i = 0; __i < _Nw; __i++)
-	  _M_w[__i] = ~_M_w[__i];
-      }
-
-      void
-      _M_do_set()
-      {
-	for (size_t __i = 0; __i < _Nw; __i++)
-	  _M_w[__i] = ~static_cast<_WordT>(0);
-      }
-
-      void
-      _M_do_reset() { memset(_M_w, 0, _Nw * sizeof(_WordT)); }
-
-      bool
-      _M_is_equal(const _Base_bitset<_Nw>& __x) const
-      {
-	for (size_t __i = 0; __i < _Nw; ++__i)
-        {
-          if (_M_w[__i] != __x._M_w[__i])
-            return false;
-        }
-	return true;
-      }
-
-      bool
-      _M_is_any() const
-      {
-	for (size_t __i = 0; __i < _Nw; __i++)
-        {
-          if (_M_w[__i] != static_cast<_WordT>(0))
-            return true;
-        }
-	return false;
-      }
-
-      size_t
-      _M_do_count() const
-      {
-	size_t __result = 0;
-	const unsigned char* __byte_ptr = (const unsigned char*)_M_w;
-	const unsigned char* __end_ptr = (const unsigned char*)(_M_w + _Nw);
-
-	while ( __byte_ptr < __end_ptr )
-        {
-          __result += _S_bit_count[*__byte_ptr];
-          __byte_ptr++;
-        }
-	return __result;
-      }
-
-      uint32
-      _M_do_to_uint32() const;
-
-      // find first "on" bit
-      size_t
-      _M_do_find_first(size_t __not_found) const;
-
-      // find the next "on" bit that follows "prev"
-      size_t
-      _M_do_find_next(size_t __prev, size_t __not_found) const;
-    };
-
-    // Definitions of non-inline functions from _Base_bitset.
-    template<size_t _Nw>
-    void
-    _Base_bitset<_Nw>::_M_do_left_shift(size_t __shift)
-    {
-      if (__builtin_expect(__shift != 0, 1))
-      {
-        const size_t __wshift = __shift / _LOFAR_BITSET_BITS_PER_WORD;
-        const size_t __offset = __shift % _LOFAR_BITSET_BITS_PER_WORD;
-
-        if (__offset == 0)
-          for (size_t __n = _Nw - 1; __n >= __wshift; --__n)
-            _M_w[__n] = _M_w[__n - __wshift];
-        else
-        {
-          const size_t __sub_offset = _LOFAR_BITSET_BITS_PER_WORD - __offset;
-          for (size_t __n = _Nw - 1; __n > __wshift; --__n)
-            _M_w[__n] = (_M_w[__n - __wshift] << __offset) |
-              (_M_w[__n - __wshift - 1] >> __sub_offset);
-          _M_w[__wshift] = _M_w[0] << __offset;
-        }
-
-        fill(_M_w + 0, _M_w + __wshift, static_cast<_WordT>(0));
-      }
-    }
-
-    template<size_t _Nw>
-    void
-    _Base_bitset<_Nw>::_M_do_right_shift(size_t __shift)
-    {
-      if (__builtin_expect(__shift != 0, 1))
-      {
-        const size_t __wshift = __shift / _LOFAR_BITSET_BITS_PER_WORD;
-        const size_t __offset = __shift % _LOFAR_BITSET_BITS_PER_WORD;
-        const size_t __limit = _Nw - __wshift - 1;
-
-        if (__offset == 0)
-          for (size_t __n = 0; __n <= __limit; ++__n)
-            _M_w[__n] = _M_w[__n + __wshift];
-        else
-        {
-          const size_t __sub_offset = _LOFAR_BITSET_BITS_PER_WORD - __offset;
-          for (size_t __n = 0; __n < __limit; ++__n)
-            _M_w[__n] = (_M_w[__n + __wshift] >> __offset) |
-              (_M_w[__n + __wshift + 1] << __sub_offset);
-          _M_w[__limit] = _M_w[_Nw-1] >> __offset;
-        }
-
-        fill(_M_w + __limit + 1, _M_w + _Nw, static_cast<_WordT>(0));
-      }
-    }
-
-    template<size_t _Nw>
-    uint32
-    _Base_bitset<_Nw>::_M_do_to_uint32() const
-    {
-      for (size_t __i = 1; __i < _Nw; ++__i)
-	if (_M_w[__i])
-	  __throw_overflow_error("bitset -- too large to fit in unsigned uint32");
-      return _M_w[0];
-    }
-
-    template<size_t _Nw>
-    size_t
-    _Base_bitset<_Nw>::_M_do_find_first(size_t __not_found) const
-    {
-      for (size_t __i = 0; __i < _Nw; __i++ )
-      {
-        _WordT __thisword = _M_w[__i];
-        if ( __thisword != static_cast<_WordT>(0) )
-        {
-          // find byte within word
-          for (size_t __j = 0; __j < sizeof(_WordT); __j++ )
-          {
-            unsigned char __this_byte
-              = static_cast<unsigned char>(__thisword & (~(unsigned char)0));
-            if (__this_byte)
-              return __i*_LOFAR_BITSET_BITS_PER_WORD + __j*CHAR_BIT +
-                _S_first_one[__this_byte];
-
-            __thisword >>= CHAR_BIT;
-          }
-        }
-      }
-      // not found, so return an indication of failure.
-      return __not_found;
-    }
-
-    template<size_t _Nw>
-    size_t
-    _Base_bitset<_Nw>::_M_do_find_next(size_t __prev, size_t __not_found) const
-    {
-      // make bound inclusive
-      ++__prev;
-
-      // check out of bounds
-      if ( __prev >= _Nw * _LOFAR_BITSET_BITS_PER_WORD )
-	return __not_found;
-
-      // search first word
-      size_t __i = _S_whichword(__prev);
-      _WordT __thisword = _M_w[__i];
-
-      // mask off bits below bound
-      __thisword &= (~static_cast<_WordT>(0)) << _S_whichbit(__prev);
-
-      if ( __thisword != static_cast<_WordT>(0) )
-      {
-        // find byte within word
-        // get first byte into place
-        __thisword >>= _S_whichbyte(__prev) * CHAR_BIT;
-        for (size_t __j = _S_whichbyte(__prev); __j < sizeof(_WordT); __j++)
-        {
-          unsigned char __this_byte
-            = static_cast<unsigned char>(__thisword & (~(unsigned char)0));
-          if ( __this_byte )
-            return __i*_LOFAR_BITSET_BITS_PER_WORD + __j*CHAR_BIT +
-              _S_first_one[__this_byte];
-
-          __thisword >>= CHAR_BIT;
-        }
-      }
-
-      // check subsequent words
-      __i++;
-      for ( ; __i < _Nw; __i++ )
-      {
-        __thisword = _M_w[__i];
-        if ( __thisword != static_cast<_WordT>(0) )
-        {
-          // find byte within word
-          for (size_t __j = 0; __j < sizeof(_WordT); __j++ )
-          {
-            unsigned char __this_byte
-              = static_cast<unsigned char>(__thisword & (~(unsigned char)0));
-            if ( __this_byte )
-              return __i*_LOFAR_BITSET_BITS_PER_WORD + __j*CHAR_BIT +
-                _S_first_one[__this_byte];
-
-            __thisword >>= CHAR_BIT;
-          }
-        }
-      }
-      // not found, so return an indication of failure.
-      return __not_found;
-    } // end _M_do_find_next
-
-
-    /**
-     *  @if maint
-     *  Base class, specialization for a single word.
-     *
-     *  See documentation for bitset.
-     *  @endif
-     */
-    template<>
-    struct _Base_bitset<1>
-    {
-      typedef uint32 _WordT;
-      _WordT _M_w;
-
-      _Base_bitset( void ) : _M_w(0) {}
-      _Base_bitset(uint32 __val) : _M_w(__val) {}
-
-      static size_t
-      _S_whichword(size_t __pos )
-      { return __pos / _LOFAR_BITSET_BITS_PER_WORD; }
-
-      static size_t
-      _S_whichbyte(size_t __pos )
-      { return (__pos % _LOFAR_BITSET_BITS_PER_WORD) / CHAR_BIT; }
-
-      static size_t
-      _S_whichbit(size_t __pos )
-      {  return __pos % _LOFAR_BITSET_BITS_PER_WORD; }
-
-      static _WordT
-      _S_maskbit(size_t __pos )
-      { return (static_cast<_WordT>(1)) << _S_whichbit(__pos); }
-
-      _WordT&
-      _M_getword(size_t) { return _M_w; }
-
-      _WordT
-      _M_getword(size_t) const { return _M_w; }
-
-      _WordT&
-      _M_hiword() { return _M_w; }
-
-      _WordT
-      _M_hiword() const { return _M_w; }
-
-      void
-      _M_do_and(const _Base_bitset<1>& __x) { _M_w &= __x._M_w; }
-
-      void
-      _M_do_or(const _Base_bitset<1>& __x)  { _M_w |= __x._M_w; }
-
-      void
-      _M_do_xor(const _Base_bitset<1>& __x) { _M_w ^= __x._M_w; }
-
-      void
-      _M_do_left_shift(size_t __shift) { _M_w <<= __shift; }
-
-      void
-      _M_do_right_shift(size_t __shift) { _M_w >>= __shift; }
-
-      void
-      _M_do_flip() { _M_w = ~_M_w; }
-
-      void
-      _M_do_set() { _M_w = ~static_cast<_WordT>(0); }
-
-      void
-      _M_do_reset() { _M_w = 0; }
-
-      bool
-      _M_is_equal(const _Base_bitset<1>& __x) const
-      { return _M_w == __x._M_w; }
-
-      bool
-      _M_is_any() const { return _M_w != 0; }
-
-      size_t
-      _M_do_count() const
-      {
-	size_t __result = 0;
-	const unsigned char* __byte_ptr = (const unsigned char*)&_M_w;
-	const unsigned char* __end_ptr
-	  = ((const unsigned char*)&_M_w)+sizeof(_M_w);
-	while ( __byte_ptr < __end_ptr )
-        {
-          __result += _S_bit_count[*__byte_ptr];
-          __byte_ptr++;
-        }
-	return __result;
-      }
-
-      uint32
-      _M_do_to_uint32() const { return _M_w; }
-
-      size_t
-      _M_do_find_first(size_t __not_found) const;
-
-      // find the next "on" bit that follows "prev"
-      size_t
-      _M_do_find_next(size_t __prev, size_t __not_found) const;
-    };
-
-
-    /**
-     *  @if maint
-     *  Base class, specialization for no storage (zero-length %bitset).
-     *
-     *  See documentation for bitset.
-     *  @endif
-     */
-    template<>
-    struct _Base_bitset<0>
-    {
-      typedef uint32 _WordT;
-
-      _Base_bitset() {}
-      _Base_bitset(uint32) {}
-
-      static size_t
-      _S_whichword(size_t __pos )
-      { return __pos / _LOFAR_BITSET_BITS_PER_WORD; }
-
-      static size_t
-      _S_whichbyte(size_t __pos )
-      { return (__pos % _LOFAR_BITSET_BITS_PER_WORD) / CHAR_BIT; }
-
-      static size_t
-      _S_whichbit(size_t __pos )
-      {  return __pos % _LOFAR_BITSET_BITS_PER_WORD; }
-
-      static _WordT
-      _S_maskbit(size_t __pos )
-      { return (static_cast<_WordT>(1)) << _S_whichbit(__pos); }
-
-      // This would normally give access to the data.  The bounds-checking
-      // in the bitset class will prevent the user from getting this far,
-      // but (1) it must still return an lvalue to compile, and (2) the
-      // user might call _Unchecked_set directly, in which case this /needs/
-      // to fail.  Let's not penalize zero-length users unless they actually
-      // make an unchecked call; all the memory ugliness is therefore
-      // localized to this single should-never-get-this-far function.
-      _WordT&
-      _M_getword(size_t) const
-      { __throw_out_of_range("bitset -- zero-length"); return *new _WordT; }
-
-      _WordT
-      _M_hiword() const { return 0; }
-
-      void
-      _M_do_and(const _Base_bitset<0>&) { }
-
-      void
-      _M_do_or(const _Base_bitset<0>&)  { }
-
-      void
-      _M_do_xor(const _Base_bitset<0>&) { }
-
-      void
-      _M_do_left_shift(size_t) { }
-
-      void
-      _M_do_right_shift(size_t) { }
-
-      void
-      _M_do_flip() { }
-
-      void
-      _M_do_set() { }
-
-      void
-      _M_do_reset() { }
-
-      // Are all empty bitsets equal to each other?  Are they equal to
-      // themselves?  How to compare a thing which has no state?  What is
-      // the sound of one zero-length bitset clapping?
-      bool
-      _M_is_equal(const _Base_bitset<0>&) const { return true; }
-
-      bool
-      _M_is_any() const { return false; }
-
-      size_t
-      _M_do_count() const { return 0; }
-
-      uint32
-      _M_do_to_uint32() const { return 0; }
-
-      // Normally "not found" is the size, but that could also be
-      // misinterpreted as an index in this corner case.  Oh well.
-      size_t
-      _M_do_find_first(size_t) const { return 0; }
-
-      size_t
-      _M_do_find_next(size_t, size_t) const { return 0; }
-    };
-
-
-    // Helper class to zero out the unused high-order bits in the highest word.
-    template<size_t _Extrabits>
-    struct _Sanitize
-    {
-      static void _S_do_sanitize(uint32& __val)
-      { __val &= ~((~static_cast<uint32>(0)) << _Extrabits); }
-    };
-
-    template<>
-    struct _Sanitize<0>
-    { static void _S_do_sanitize(uint32) { } };
-
-
-    /**
-     *  @brief  The %bitset class represents a @e fixed-size sequence of bits.
-     *
-     *  @ingroup Containers
-     *
-     *  (Note that %bitset does @e not meet the formal requirements of a
-     *  <a href="tables.html#65">container</a>.  Mainly, it lacks iterators.)
-     *
-     *  The template argument, @a Nb, may be any non-negative number,
-     *  specifying the number of bits (e.g., "0", "12", "1024*1024").
-     *
-     *  In the general unoptimized case, storage is allocated in word-sized
-     *  blocks.  Let B be the number of bits in a word, then (Nb+(B-1))/B
-     *  words will be used for storage.  B - Nb%B bits are unused.  (They are
-     *  the high-order bits in the highest word.)  It is a class invariant
-     *  that those unused bits are always zero.
-     *
-     *  If you think of %bitset as "a simple array of bits," be aware that
-     *  your mental picture is reversed:  a %bitset behaves the same way as
-     *  bits in integers do, with the bit at index 0 in the "least significant
-     *  / right-hand" position, and the bit at index Nb-1 in the "most
-     *  significant / left-hand" position.  Thus, unlike other containers, a
-     *  %bitset's index "counts from right to left," to put it very loosely.
-     *
-     *  This behavior is preserved when translating to and from strings.  For
-     *  example, the first line of the following program probably prints
-     *  "b('a') is 0001100001" on a modern ASCII system.
-     *
-     *  @code
-     *     #include <bitset>
-     *     #include <iostream>
-     *     #include <sstream>
-     *
-     *     using namespace std;
-     *
-     *     int main()
-     *     {
-     *         uint32       a = 'a';
-     *         bitset<10>   b(a);
-     *
-     *         cout << "b('a') is " << b << endl;
-     *
-     *         ostringstream s;
-     *         s << b;
-     *         string  str = s.str();
-     *         cout << "index 3 in the string is " << str[3] << " but\n"
-     *              << "index 3 in the bitset is " << b[3] << endl;
-     *     }
-     *  @endcode
-     *
-     *  Also see http://gcc.gnu.org/onlinedocs/libstdc++/ext/sgiexts.html#ch23
-     *  for a description of extensions.
-     *
-     *  @if maint
-     *  Most of the actual code isn't contained in %bitset<> itself, but in the
-     *  base class _Base_bitset.  The base class works with whole words, not with
-     *  individual bits.  This allows us to specialize _Base_bitset for the
-     *  important special case where the %bitset is only a single word.
-     *
-     *  Extra confusion can result due to the fact that the storage for
-     *  _Base_bitset @e is a regular array, and is indexed as such.  This is
-     *  carefully encapsulated.
-     *  @endif
-     */
-    template<size_t _Nb>
-    class bitset : private _Base_bitset<_LOFAR_BITSET_WORDS(_Nb)>
-    {
-    private:
-      typedef _Base_bitset<_LOFAR_BITSET_WORDS(_Nb)> _Base;
-      typedef uint32 _WordT;
-
-      void
-        _M_do_sanitize()
-        {
-          _Sanitize<_Nb%_LOFAR_BITSET_BITS_PER_WORD>::
-            _S_do_sanitize(this->_M_hiword());
-        }
-
-    public:
-      /**
-       *  This encapsulates the concept of a single bit.  An instance of this
-       *  class is a proxy for an actual bit; this way the individual bit
-       *  operations are done as faster word-size bitwise instructions.
-       *
-       *  Most users will never need to use this class directly; conversions
-       *  to and from bool are automatic and should be transparent.  Overloaded
-       *  operators help to preserve the illusion.
-       *
-       *  (On a typical system, this "bit %reference" is 64 times the size of
-       *  an actual bit.  Ha.)
-       */
-      class reference
-      {
-        friend class bitset;
-
-        _WordT *_M_wp;
-        size_t _M_bpos;
-
-        // left undefined
-        reference();
-
-      public:
-        reference(bitset& __b, size_t __pos)
-        {
-          _M_wp = &__b._M_getword(__pos);
-          _M_bpos = _Base::_S_whichbit(__pos);
-        }
-
-        ~reference() { }
-
-        // for b[i] = __x;
-        reference&
-        operator=(bool __x)
-        {
-          if ( __x )
-            *_M_wp |= _Base::_S_maskbit(_M_bpos);
-          else
-            *_M_wp &= ~_Base::_S_maskbit(_M_bpos);
-          return *this;
-        }
-
-        // for b[i] = b[__j];
-        reference&
-        operator=(const reference& __j)
-        {
-          if ( (*(__j._M_wp) & _Base::_S_maskbit(__j._M_bpos)) )
-            *_M_wp |= _Base::_S_maskbit(_M_bpos);
-          else
-            *_M_wp &= ~_Base::_S_maskbit(_M_bpos);
-          return *this;
-        }
-
-        // flips the bit
-        bool
-        operator~() const
-        { return (*(_M_wp) & _Base::_S_maskbit(_M_bpos)) == 0; }
-
-        // for __x = b[i];
-        operator bool() const
-        { return (*(_M_wp) & _Base::_S_maskbit(_M_bpos)) != 0; }
-
-        // for b[i].flip();
-        reference&
-        flip()
-        {
-          *_M_wp ^= _Base::_S_maskbit(_M_bpos);
-          return *this;
-        }
-      };
-      friend class reference;
-
-      // 23.3.5.1 constructors:
-      /// All bits set to zero.
-        bitset() { }
-
-        /// Initial bits bitwise-copied from a single word (others set to zero).
-          bitset(uint32 __val) : _Base(__val)
-            { _M_do_sanitize(); }
-
-          /**
-           *  @brief  Use a subset of a string.
-           *  @param  s  A string of '0' and '1' characters.
-           *  @param  pos  Index of the first character in @a s to use; defaults
-           *               to zero.
-           *  @throw  std::out_of_range  If @a pos is bigger the size of @a s.
-           *  @throw  std::invalid_argument  If a character appears in the string
-           *                                 which is neither '0' nor '1'.
-           */
-          template<class _CharT, class _Traits, class _Alloc>
-            explicit bitset(const basic_string<_CharT, _Traits, _Alloc>& __s,
-                            size_t __pos = 0) : _Base()
-            {
-              if (__pos > __s.size())
-                __throw_out_of_range("bitset -- initial position is larger than "
-                                     "the string itself");
-              _M_copy_from_string(__s, __pos,
-                                  basic_string<_CharT, _Traits, _Alloc>::npos);
-            }
-
-          /**
-           *  @brief  Use a subset of a string.
-           *  @param  s  A string of '0' and '1' characters.
-           *  @param  pos  Index of the first character in @a s to use.
-           *  @param  n    The number of characters to copy.
-           *  @throw  std::out_of_range  If @a pos is bigger the size of @a s.
-           *  @throw  std::invalid_argument  If a character appears in the string
-           *                                 which is neither '0' nor '1'.
-           */
-          template<class _CharT, class _Traits, class _Alloc>
-            bitset(const basic_string<_CharT, _Traits, _Alloc>& __s,
-                   size_t __pos, size_t __n) : _Base()
-            {
-              if (__pos > __s.size())
-                __throw_out_of_range("bitset -- initial position is larger than "
-                                     "the string itself");
-              _M_copy_from_string(__s, __pos, __n);
-            }
-
-          // 23.3.5.2 bitset operations:
-          //@{
-          /**
-           *  @brief  Operations on bitsets.
-           *  @param  rhs  A same-sized bitset.
-           *
-           *  These should be self-explanatory.
-           */
-          bitset<_Nb>&
-            operator&=(const bitset<_Nb>& __rhs)
-            {
-              this->_M_do_and(__rhs);
-              return *this;
-            }
-
-          bitset<_Nb>&
-            operator|=(const bitset<_Nb>& __rhs)
-            {
-              this->_M_do_or(__rhs);
-              return *this;
-            }
-
-          bitset<_Nb>&
-            operator^=(const bitset<_Nb>& __rhs)
-            {
-              this->_M_do_xor(__rhs);
-              return *this;
-            }
-          //@}
-
-          //@{
-          /**
-           *  @brief  Operations on bitsets.
-           *  @param  pos  The number of places to shift.
-           *
-           *  These should be self-explanatory.
-           */
-          bitset<_Nb>&
-            operator<<=(size_t __pos)
-            {
-              if (__builtin_expect(__pos < _Nb, 1))
-              {
-                this->_M_do_left_shift(__pos);
-                this->_M_do_sanitize();
-              }
-              else
-                this->_M_do_reset();
-              return *this;
-            }
-
-          bitset<_Nb>&
-            operator>>=(size_t __pos)
-            {
-              if (__builtin_expect(__pos < _Nb, 1))
-              {
-                this->_M_do_right_shift(__pos);
-                this->_M_do_sanitize();
-              }
-              else
-                this->_M_do_reset();
-              return *this;
-            }
-          //@}
-
-          //@{
-          /**
-           *  These versions of single-bit set, reset, flip, and test are
-           *  extensions from the SGI version.  They do no range checking.
-           *  @ingroup SGIextensions
-           */
-          bitset<_Nb>&
-            _Unchecked_set(size_t __pos)
-            {
-              this->_M_getword(__pos) |= _Base::_S_maskbit(__pos);
-              return *this;
-            }
-
-          bitset<_Nb>&
-            _Unchecked_set(size_t __pos, int __val)
-            {
-              if (__val)
-                this->_M_getword(__pos) |= _Base::_S_maskbit(__pos);
-              else
-                this->_M_getword(__pos) &= ~_Base::_S_maskbit(__pos);
-              return *this;
-            }
-
-          bitset<_Nb>&
-            _Unchecked_reset(size_t __pos)
-            {
-              this->_M_getword(__pos) &= ~_Base::_S_maskbit(__pos);
-              return *this;
-            }
-
-          bitset<_Nb>&
-            _Unchecked_flip(size_t __pos)
-            {
-              this->_M_getword(__pos) ^= _Base::_S_maskbit(__pos);
-              return *this;
-            }
-
-          bool
-            _Unchecked_test(size_t __pos) const
-            {
-              return (this->_M_getword(__pos) & _Base::_S_maskbit(__pos))
-                != static_cast<_WordT>(0);
-            }
-          //@}
-
-          // Set, reset, and flip.
-          /**
-           *  @brief Sets every bit to true.
-           */
-          bitset<_Nb>&
-            set()
-            {
-              this->_M_do_set();
-              this->_M_do_sanitize();
-              return *this;
-            }
-
-          /**
-           *  @brief Sets a given bit to a particular value.
-           *  @param  pos  The index of the bit.
-           *  @param  val  Either true or false, defaults to true.
-           *  @throw  std::out_of_range  If @a pos is bigger the size of the %set.
-           */
-          bitset<_Nb>&
-            set(size_t __pos, bool __val = true)
-            {
-              if (__pos >= _Nb)
-                __throw_out_of_range("bitset -- set() argument too large");
-              return _Unchecked_set(__pos, __val);
-            }
-
-          /**
-           *  @brief Sets every bit to false.
-           */
-          bitset<_Nb>&
-            reset()
-            {
-              this->_M_do_reset();
-              return *this;
-            }
-
-          /**
-           *  @brief Sets a given bit to false.
-           *  @param  pos  The index of the bit.
-           *  @throw  std::out_of_range  If @a pos is bigger the size of the %set.
-           *
-           *  Same as writing @c set(pos,false).
-           */
-          bitset<_Nb>&
-            reset(size_t __pos)
-            {
-              if (__pos >= _Nb)
-                __throw_out_of_range("bitset -- reset() argument too large");
-              return _Unchecked_reset(__pos);
-            }
-
-          /**
-           *  @brief Toggles every bit to its opposite value.
-           */
-          bitset<_Nb>&
-            flip()
-            {
-              this->_M_do_flip();
-              this->_M_do_sanitize();
-              return *this;
-            }
-
-          /**
-           *  @brief Toggles a given bit to its opposite value.
-           *  @param  pos  The index of the bit.
-           *  @throw  std::out_of_range  If @a pos is bigger the size of the %set.
-           */
-          bitset<_Nb>&
-            flip(size_t __pos)
-            {
-              if (__pos >= _Nb)
-                __throw_out_of_range("bitset -- flip() argument too large");
-              return _Unchecked_flip(__pos);
-            }
-
-          /// See the no-argument flip().
-            bitset<_Nb>
-              operator~() const { return bitset<_Nb>(*this).flip(); }
-
-            //@{
-            /**
-             *  @brief  Array-indexing support.
-             *  @param  pos  Index into the %bitset.
-             *  @return  A bool for a 'const %bitset'.  For non-const bitsets, an
-             *           instance of the reference proxy class.
-             *  @note  These operators do no range checking and throw no exceptions,
-             *         as required by DR 11 to the standard.
-             *
-             *  @if maint
-             *  _GLIBCPP_RESOLVE_LIB_DEFECTS Note that this implementation already
-             *  resolves DR 11 (items 1 and 2), but does not do the range-checking
-             *  required by that DR's resolution.  -pme
-             *  The DR has since been changed:  range-checking is a precondition
-             *  (users' responsibility), and these functions must not throw.  -pme
-             *  @endif
-             */
-            reference
-              operator[](size_t __pos) { return reference(*this,__pos); }
-
-            bool
-              operator[](size_t __pos) const { return _Unchecked_test(__pos); }
-            //@}
-
-            /**
-             *  @brief Retuns a numerical interpretation of the %bitset.
-             *  @return  The integral equivalent of the bits.
-             *  @throw  std::overflow_error  If there are too many bits to be
-             *                               represented in an @c uint32.
-             */
-            uint32
-              to_uint32() const { return this->_M_do_to_uint32(); }
-
-            /**
-             *  @brief Retuns a character interpretation of the %bitset.
-             *  @return  The string equivalent of the bits.
-             *
-             *  Note the ordering of the bits:  decreasing character positions
-             *  correspond to increasing bit positions (see the main class notes for
-             *  an example).
-             *
-             *  Also note that you must specify the string's template parameters
-             *  explicitly.  Given a bitset @c bs and a string @s:
-             *  @code
-             *     s = bs.to_string<char,char_traits<char>,allocator<char> >();
-             *  @endcode
-             */
-            template<class _CharT, class _Traits, class _Alloc>
-              basic_string<_CharT, _Traits, _Alloc>
-              to_string() const
-              {
-                basic_string<_CharT, _Traits, _Alloc> __result;
-                _M_copy_to_string(__result);
-                return __result;
-              }
-
-            // Helper functions for string operations.
-            template<class _CharT, class _Traits, class _Alloc>
-              void
-              _M_copy_from_string(const basic_string<_CharT,_Traits,_Alloc>& __s,
-                                  size_t, size_t);
-
-            template<class _CharT, class _Traits, class _Alloc>
-              void
-              _M_copy_to_string(basic_string<_CharT,_Traits,_Alloc>&) const;
-
-            /// Returns the number of bits which are set.
-              size_t
-                count() const { return this->_M_do_count(); }
-
-              /// Returns the total number of bits.
-                size_t
-                  size() const { return _Nb; }
-
-                //@{
-                /// These comparisons for equality/inequality are, well, @e bitwise.
-                  bool
-                    operator==(const bitset<_Nb>& __rhs) const
-                    { return this->_M_is_equal(__rhs); }
-
-                  bool
-                    operator!=(const bitset<_Nb>& __rhs) const
-                    { return !this->_M_is_equal(__rhs); }
-                  //@}
-
-                  /**
-                   *  @brief Tests the value of a bit.
-                   *  @param  pos  The index of a bit.
-                   *  @return  The value at @a pos.
-                   *  @throw  std::out_of_range  If @a pos is bigger the size of the %set.
-                   */
-                  bool
-                    test(size_t __pos) const
-                    {
-                      if (__pos >= _Nb)
-                        __throw_out_of_range("bitset -- test() argument too large");
-                      return _Unchecked_test(__pos);
-                    }
-
-                  /**
-                   *  @brief Tests whether any of the bits are on.
-                   *  @return  True if at least one bit is set.
-                   */
-                  bool
-                    any() const { return this->_M_is_any(); }
-
-                  /**
-                   *  @brief Tests whether any of the bits are on.
-                   *  @return  True if none of the bits are set.
-                   */
-                  bool
-                    none() const { return !this->_M_is_any(); }
-
-                  //@{
-                  /// Self-explanatory.
-                    bitset<_Nb>
-                      operator<<(size_t __pos) const
-                      { return bitset<_Nb>(*this) <<= __pos; }
-
-                    bitset<_Nb>
-                      operator>>(size_t __pos) const
-                      { return bitset<_Nb>(*this) >>= __pos; }
-                    //@}
-
-                    /**
-                     *  @brief  Finds the index of the first "on" bit.
-                     *  @return  The index of the first bit set, or size() if not found.
-                     *  @ingroup SGIextensions
-                     *  @sa  _Find_next
-                     */
-                    size_t
-                      _Find_first() const
-                      { return this->_M_do_find_first(_Nb); }
-
-                    /**
-                     *  @brief  Finds the index of the next "on" bit after prev.
-                     *  @return  The index of the next bit set, or size() if not found.
-                     *  @param  prev  Where to start searching.
-                     *  @ingroup SGIextensions
-                     *  @sa  _Find_first
-                     */
-                    size_t
-                      _Find_next(size_t __prev ) const
-                      { return this->_M_do_find_next(__prev, _Nb); }
-    };
-
-    // Definitions of non-inline member functions.
-    template<size_t _Nb>
-    template<class _CharT, class _Traits, class _Alloc>
-    void
-    bitset<_Nb>::_M_copy_from_string(const basic_string<_CharT,_Traits,_Alloc>& __s, size_t __pos, size_t __n)
-    {
-      reset();
-      const size_t __nbits = min(_Nb, min(__n, __s.size() - __pos));
-      for (size_t __i = 0; __i < __nbits; ++__i)
-      {
-        switch(__s[__pos + __nbits - __i - 1])
-        {
-        case '0':
-          break;
-        case '1':
-          set(__i);
-          break;
-        default:
-          __throw_invalid_argument("bitset -- string contains characters "
-                                   "which are neither 0 nor 1");
-        }
-      }
-    }
-
-    template<size_t _Nb>
-    template<class _CharT, class _Traits, class _Alloc>
-    void
-    bitset<_Nb>::_M_copy_to_string(basic_string<_CharT, _Traits, _Alloc>& __s) const
-    {
-      __s.assign(_Nb, '0');
-      for (size_t __i = 0; __i < _Nb; ++__i)
-	if (_Unchecked_test(__i))
-	  __s[_Nb - 1 - __i] = '1';
-    }
-
-    // 23.3.5.3 bitset operations:
-    //@{
-    /**
-     *  @brief  Global bitwise operations on bitsets.
-     *  @param  x  A bitset.
-     *  @param  y  A bitset of the same size as @a x.
-     *  @return  A new bitset.
-     *
-     *  These should be self-explanatory.
-     */
-    template<size_t _Nb>
-    inline bitset<_Nb>
-    operator&(const bitset<_Nb>& __x, const bitset<_Nb>& __y)
-    {
-      bitset<_Nb> __result(__x);
-      __result &= __y;
-      return __result;
-    }
-
-    template<size_t _Nb>
-    inline bitset<_Nb>
-    operator|(const bitset<_Nb>& __x, const bitset<_Nb>& __y)
-    {
-      bitset<_Nb> __result(__x);
-      __result |= __y;
-      return __result;
-    }
-
-    template <size_t _Nb>
-    inline bitset<_Nb>
-    operator^(const bitset<_Nb>& __x, const bitset<_Nb>& __y)
-    {
-      bitset<_Nb> __result(__x);
-      __result ^= __y;
-      return __result;
-    }
-    //@}
-
-    //@{
-    /**
-     *  @brief Global I/O operators for bitsets.
-     *
-     *  Direct I/O between streams and bitsets is supported.  Output is
-     *  straightforward.  Input will skip whitespace, only accept '0' and '1'
-     *  characters, and will only extract as many digits as the %bitset will
-     *  hold.
-     */
-    template<class _CharT, class _Traits, size_t _Nb>
-    basic_istream<_CharT, _Traits>&
-    operator>>(basic_istream<_CharT, _Traits>& __is, bitset<_Nb>& __x)
-    {
-      typedef typename _Traits::char_type char_type;
-      basic_string<_CharT, _Traits> __tmp;
-      __tmp.reserve(_Nb);
-
-      // Skip whitespace
-      typename basic_istream<_CharT, _Traits>::sentry __sentry(__is);
-      if (__sentry)
-      {
-        ios_base::iostate  __state = ios_base::goodbit;
-        basic_streambuf<_CharT, _Traits>* __buf = __is.rdbuf();
-        for (size_t __i = 0; __i < _Nb; ++__i)
-        {
-          static typename _Traits::int_type __eof = _Traits::eof();
-
-          typename _Traits::int_type __c1 = __buf->sbumpc();
-          if (_Traits::eq_int_type(__c1, __eof))
-          {
-            __state |= ios_base::eofbit;
-            break;
-          }
-          else
-          {
-            char_type __c2 = _Traits::to_char_type(__c1);
-            char_type __c  = __is.narrow(__c2, '*');
-
-            if (__c == '0' || __c == '1')
-              __tmp.push_back(__c);
-            else if (_Traits::eq_int_type(__buf->sputbackc(__c2), __eof))
-            {
-              __state |= ios_base::failbit;
-              break;
-            }
-          }
-        }
-
-        if (__tmp.empty() && !_Nb)
-          __state |= ios_base::failbit;
-        else
-          __x._M_copy_from_string(__tmp, static_cast<size_t>(0), _Nb);
-
-        if (__state != ios_base::goodbit)
-          __is.setstate(__state);    // may throw an exception
-      }
-
-      return __is;
-    }
-
-    template <class _CharT, class _Traits, size_t _Nb>
-    basic_ostream<_CharT, _Traits>&
-    operator<<(basic_ostream<_CharT, _Traits>& __os, const bitset<_Nb>& __x)
-    {
-      basic_string<_CharT, _Traits> __tmp;
-      __x._M_copy_to_string(__tmp);
-      return __os << __tmp;
-    }
-    //@}
-
-  } // namespace CS1
-
-} // namespace LOFAR
-
-#undef _LOFAR_BITSET_WORDS
-
-#endif /* _LOFAR_BITSET_H */
diff --git a/Appl/CEP/CS1/CS1_Interface/src/BGL_Configuration.cc b/Appl/CEP/CS1/CS1_Interface/src/BGL_Configuration.cc
new file mode 100644
index 0000000000000000000000000000000000000000..4babdc3641fbe37d63df55d73c8a46c4f1ab43b2
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_Interface/src/BGL_Configuration.cc
@@ -0,0 +1,66 @@
+//#  BGL_Configuration.cc:
+//#
+//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
+//#
+//#  This program is free software; you can redistribute it and/or modify
+//#  it under the terms of the GNU General Public License as published by
+//#  the Free Software Foundation; either version 2 of the License, or
+//#  (at your option) any later version.
+//#
+//#  This program is distributed in the hope that it will be useful,
+//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//#  GNU General Public License for more details.
+//#
+//#  You should have received a copy of the GNU General Public License
+//#  along with this program; if not, write to the Free Software
+//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//#
+//#  $Id$
+
+#include <lofar_config.h>
+
+#include <CS1_Interface/BGL_Configuration.h>
+
+#include <cassert>
+
+
+namespace LOFAR {
+namespace CS1 {
+
+
+void BGL_Configuration::read(TransportHolder *th)
+{
+  th->recvBlocking(&itsMarshalledData, sizeof itsMarshalledData, 1, 0, 0);
+
+  itsInputPsets.resize(itsMarshalledData.itsInputPsetsSize);
+  memcpy(&itsInputPsets[0], itsMarshalledData.itsInputPsets, itsMarshalledData.itsInputPsetsSize * sizeof(unsigned));
+
+  itsOutputPsets.resize(itsMarshalledData.itsOutputPsetsSize);
+  memcpy(&itsOutputPsets[0], itsMarshalledData.itsOutputPsets, itsMarshalledData.itsOutputPsetsSize * sizeof(unsigned));
+
+  itsRefFreqs.resize(itsMarshalledData.itsRefFreqsSize);
+  memcpy(&itsRefFreqs[0], itsMarshalledData.itsRefFreqs, itsMarshalledData.itsRefFreqsSize * sizeof(double));
+}
+
+
+void BGL_Configuration::write(TransportHolder *th)
+{
+  itsMarshalledData.itsInputPsetsSize = itsInputPsets.size();
+  assert(itsMarshalledData.itsInputPsetsSize <= MAX_PSETS);
+  memcpy(itsMarshalledData.itsInputPsets, &itsInputPsets[0], itsMarshalledData.itsInputPsetsSize * sizeof(unsigned));
+
+  itsMarshalledData.itsOutputPsetsSize = itsOutputPsets.size();
+  assert(itsMarshalledData.itsOutputPsetsSize <= MAX_PSETS);
+  memcpy(itsMarshalledData.itsOutputPsets, &itsOutputPsets[0], itsMarshalledData.itsOutputPsetsSize * sizeof(unsigned));
+
+  itsMarshalledData.itsRefFreqsSize = itsRefFreqs.size();
+  assert(itsMarshalledData.itsRefFreqsSize <= MAX_SUBBANDS);
+  memcpy(itsMarshalledData.itsRefFreqs, &itsRefFreqs[0], itsMarshalledData.itsRefFreqsSize * sizeof(double));
+
+  th->sendBlocking(&itsMarshalledData, sizeof itsMarshalledData, 1, 0);
+}
+
+
+} // namespace CS1
+} // namespace LOFAR
diff --git a/Appl/CEP/CS1/CS1_Interface/src/Stub_RSP.cc b/Appl/CEP/CS1/CS1_Interface/src/BGL_Mapping.cc
similarity index 56%
rename from Appl/CEP/CS1/CS1_Interface/src/Stub_RSP.cc
rename to Appl/CEP/CS1/CS1_Interface/src/BGL_Mapping.cc
index 64b39f0dd711df7dd1700031179d3ae2bbdcd542..8932f40a7628e232c1a0c6ac1ccfd6b4bd6056b5 100644
--- a/Appl/CEP/CS1/CS1_Interface/src/Stub_RSP.cc
+++ b/Appl/CEP/CS1/CS1_Interface/src/BGL_Mapping.cc
@@ -1,4 +1,4 @@
-//# Stub_RSP.cc: Stub for connection to DFTServer and DFTRequest
+//#  BGL_Mapping.cc: map work to cores on BG/L psets
 //#
 //#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
 //#
@@ -20,34 +20,27 @@
 
 #include <lofar_config.h>
 
-#include <CS1_Interface/Stub_RSP.h>
-#include <Transport/TH_Socket.h>
+#include <CS1_Interface/BGL_Mapping.h>
 
-namespace LOFAR 
-{ 
-  namespace CS1
-  {
-    Stub_RSP::Stub_RSP (bool stubOnServer, const CS1_Parset *ps)
-      : itsStubOnServer (stubOnServer),
-        itsCS1PS(ps)
-    {
-      // todo: add DH_?? for pre-correlation correction factors 
-      //    for (int i=0; i<itsNSBF; i++) {
-      //      itsSB.push_back(new DH_SubBand("noname",1)); //todo: get correct SubbandID
-      //    }
-    }
+namespace LOFAR {
+namespace CS1 {
 
-    Stub_RSP::~Stub_RSP()
-    {}
+unsigned BGL_Mapping::mapCoreOnPset(unsigned core, unsigned pset)
+{
+  // TODO: there may be better mappings for partitions larger than one midplane
+  static unsigned char mapX[] = { 0, 2, 6, 4 };
+  static unsigned char mapY[] = { 0, 1, 5, 4 };
+  static unsigned char mapZ[] = { 0, 1, 3, 2 };
 
-    void Stub_RSP::connect ()
-    {
-    };
+  return core ^ mapX[(pset >> 0) & 3] ^ mapY[(pset >> 2) & 3] ^ mapZ[(pset >> 4) & 3];
+}
 
-    //todo: add connections for pre-correlation correction DH_?? 
+unsigned BGL_Mapping::reverseMapCoreOnPset(unsigned core, unsigned pset)
+{
+  // just the same function
+  return mapCoreOnPset(core, pset);
+}
 
-  } // namespace CS1
 
+} // namespace CS1
 } // namespace LOFAR
-
-
diff --git a/Appl/CEP/CS1/CS1_Interface/src/CS1_Parset.cc b/Appl/CEP/CS1/CS1_Interface/src/CS1_Parset.cc
index 56a82eb1372a32430a99da9ee5bdaf2f573fd3d4..85ec31cfa225733d5ae5fcd6b22cf0555847cee0 100644
--- a/Appl/CEP/CS1/CS1_Interface/src/CS1_Parset.cc
+++ b/Appl/CEP/CS1/CS1_Interface/src/CS1_Parset.cc
@@ -23,6 +23,8 @@
 //# Always #include <lofar_config.h> first!
 #include <lofar_config.h>
 
+#if defined HAVE_APS
+
 //# Includes
 #include <Common/LofarLogger.h>
 #include <Common/lofar_datetime.h>
@@ -36,8 +38,9 @@
 
 
 namespace LOFAR {
-	using namespace ACC::APS;
-	namespace CS1 {
+namespace CS1 {
+
+using namespace ACC::APS;
 
 CS1_Parset::CS1_Parset() :
 	name()
@@ -135,6 +138,14 @@ vector<string> CS1_Parset::getPortsOf(const string& aKey) const
   return pParset.getStringVector("ports");
 }
 
+int CS1_Parset::findIndex(uint32 pset, const vector<uint32> &psets)
+{
+  unsigned index = std::find(psets.begin(), psets.end(), pset) - psets.begin();
+
+  return index != psets.size() ? (int) index : -1;
+}
+
+
 //
 // expandedArrayString(string)
 //
@@ -229,5 +240,9 @@ string CS1_Parset::expandedArrayString(const string& orgStr) const
 	return (result+"]");
 }
 
-  } // namespace CS1
+
+
+} // namespace CS1
 } // namespace LOFAR
+
+#endif // defined HAVE_APS
diff --git a/Appl/CEP/CS1/CS1_Interface/src/DH_RFI_Mitigation.cc b/Appl/CEP/CS1/CS1_Interface/src/DH_RFI_Mitigation.cc
deleted file mode 100644
index f8cd247d16d9142a8a1e90c9f716f527793bded9..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_Interface/src/DH_RFI_Mitigation.cc
+++ /dev/null
@@ -1,63 +0,0 @@
-//#  DH_RFI_Mitigation.cc:
-//#
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-#include <lofar_config.h>
-
-#if 0
-#include <CS1_Interface/DH_RFI_Mitigation.h>
-
-namespace LOFAR {
-namespace CS1 {
-
-DH_RFI_Mitigation::DH_RFI_Mitigation(const string &name)
-  : DataHolder(name, "DH_RFI_Mitigation"),
-    itsChannelFlags(0)
-{
-}
-
-DH_RFI_Mitigation::DH_RFI_Mitigation(const DH_RFI_Mitigation &that)
-  : DataHolder(that),
-    itsChannelFlags(that.itsChannelFlags)
-{
-}
-
-DH_RFI_Mitigation::~DH_RFI_Mitigation()
-{
-}
-
-DataHolder *DH_RFI_Mitigation::clone() const
-{
-  return new DH_RFI_Mitigation(*this);
-}
-
-void DH_RFI_Mitigation::init()
-{
-  addField("ChannelFlags", BlobField<uint32>(1, sizeof(ChannelFlagsType) / sizeof(uint32)));
-  createDataBlock();
-}
-
-void DH_RFI_Mitigation::fillDataPointers()
-{
-  itsChannelFlags = (ChannelFlagsType *) getData<uint32>("ChannelFlags");
-}
-
-} // namespace CS1
-} // namespace LOFAR
-#endif
diff --git a/Appl/CEP/CS1/CS1_Interface/src/DH_RSP.cc b/Appl/CEP/CS1/CS1_Interface/src/DH_RSP.cc
deleted file mode 100644
index 11d808e8472e620158843d30176881683848b382..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_Interface/src/DH_RSP.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-//#  DH_RSP.cc: DataHolder storing RSP raw ethernet frames for 
-//#             StationCorrelator demo
-//#
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-#include <lofar_config.h>
-
-#include <CS1_Interface/DH_RSP.h>
-#include <Common/lofar_complex.h>
-
-namespace LOFAR
-{
-  namespace CS1
-  {
-
-    DH_RSP::DH_RSP (const string& name,
-                    const CS1_Parset *pSet)
-      : DataHolder (name, "DH_RSP"),
-        itsCS1PS   (pSet),
-        itsBuffer  (0),
-	itsFlags   (0)
-    {
-      setExtraBlob("Flags", 0);
-      itsNTimes          = itsCS1PS->nrSamplesToBGLProc();
-      itsNoPolarisations = itsCS1PS->getInt32("Observation.nrPolarisations");
-      itsNSubbands       = itsCS1PS->nrSubbandsPerCell();
-      itsBufSize         = itsNTimes * itsNoPolarisations * itsNSubbands;
-    }
-
-    DH_RSP::DH_RSP(const DH_RSP& that)
-      : DataHolder         (that),
-        itsCS1PS           (that.itsCS1PS),
-        itsBuffer          (0),
-	itsFlags	   (that.itsFlags),
-        itsNTimes          (that.itsNTimes),
-        itsNoPolarisations (that.itsNoPolarisations),
-	itsNSubbands       (that.itsNSubbands),
-        itsBufSize         (that.itsBufSize)
-    {}
-
-    DH_RSP::~DH_RSP()
-    {
-      delete itsFlags;
-    };
-
-    LOFAR::DataHolder* DH_RSP::clone() const
-    {
-      return new DH_RSP(*this);
-    }
-
-    void DH_RSP::init()
-    {
-      // Add the fields to the data definition.
-      addField ("Buffer", BlobField<BufferType>(1,itsBufSize));
-      addField ("StationID", BlobField<int>(1));
-      addField ("Delay", BlobField<float>(1, 2));
-      addField ("TimeStamp", BlobField<TimeStamp>(1));
-  
-      itsFlags = new SparseSet<unsigned>;
-
-      // Create the data blob
-      createDataBlock();
-      // use memset to null the buffer
-      memset(itsBuffer, 0, itsBufSize*sizeof(BufferType));
-      itsFlags->write(createExtraBlob());
-    }
-
-    void DH_RSP::fillDataPointers()
-    {
-      // Fill in the buffer pointer.
-      itsBuffer  = getData<BufferType> ("Buffer");
-
-      // Fill in the StationID pointer
-      itsStationID = getData<int> ("StationID");
-  
-      // Fill in the Delay pointer
-      itsDelays = getData<float> ("Delay");
-  
-      // Fill in TimeStamp pointer
-      itsTimeStamp = getData<TimeStamp> ("TimeStamp");
-    }
-
-    void DH_RSP::fillExtraData()
-    {
-      itsFlags->write(createExtraBlob());
-    }
-
-    void DH_RSP::getExtraData()
-    {
-      itsFlags->read(getExtraBlob());
-    }
-
-  } // namespace CS1
-
-} // end namespace LOFAR
diff --git a/Appl/CEP/CS1/CS1_Interface/src/DH_RSPSync.cc b/Appl/CEP/CS1/CS1_Interface/src/DH_RSPSync.cc
deleted file mode 100644
index b618980d92df50e188df41b6c0e08a9ac408f4ae..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_Interface/src/DH_RSPSync.cc
+++ /dev/null
@@ -1,66 +0,0 @@
-//#  DH_RSPSync.cc: DataHolder used to synchronize incoming RSP data
-//#
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-#include <lofar_config.h>
-
-#include <CS1_Interface/DH_RSPSync.h>
-#include <Blob/KeyValueMap.h>
-
-namespace LOFAR
-{
-  namespace CS1
-  {
-
-    DH_RSPSync::DH_RSPSync (const string& name)
-      : DataHolder    (name, "DH_RSPSync")
-    {
-    }
-
-    DH_RSPSync::DH_RSPSync(const DH_RSPSync& that)
-      : DataHolder(that)
-    {
-    }
-
-    DH_RSPSync::~DH_RSPSync()
-    {
-    }
-
-    DataHolder* DH_RSPSync::clone() const
-    {
-      return new DH_RSPSync(*this);
-    }
-
-    void DH_RSPSync::init()
-    {
-      // this could be done nicer, but it works for now because SyncStamp doesn't contain
-      // any pointers
-      addField("RSPsyncStamp", BlobField<char>(1, sizeof(timestamp_t)));
-      createDataBlock();
-      fillDataPointers();
-      itsSyncStamp->setStamp(0, 0);
-    }
-
-    void DH_RSPSync::fillDataPointers() {
-      itsSyncStamp = (timestamp_t*)getData<char> ("RSPsyncStamp");
-    }
-
-  } // namespace CS1
-
-} // namespace LOFAR
diff --git a/Appl/CEP/CS1/CS1_Interface/src/DH_Subband.cc b/Appl/CEP/CS1/CS1_Interface/src/DH_Subband.cc
deleted file mode 100644
index 4563dd1ff63d60d7311471ba3ebf5e7ef970b57d..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_Interface/src/DH_Subband.cc
+++ /dev/null
@@ -1,111 +0,0 @@
-//#  DH_Subband.cc:
-//#
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-#include <lofar_config.h>
-
-#include <Blob/BlobOStream.h>
-#include <Blob/BlobIStream.h>
-#include <Common/DataConvert.h>
-#include <Common/Timer.h>
-#include <CS1_Interface/DH_Subband.h>
-
-
-namespace LOFAR {
-namespace CS1 {
-
-DH_Subband::DH_Subband(const string &name, const CS1_Parset *pSet)
-  : DataHolder(name, "DH_Subband"),
-    itsCS1PS  (pSet),
-    itsNrStations(itsCS1PS->nrStations()),
-    itsNrInputSamples(itsCS1PS->nrSamplesToBGLProc()),
-    itsSamples(0),
-    itsFlags(0),
-    itsDelays(0)
-{
-  setExtraBlob("Flags", 0);
-
-  ASSERT(itsCS1PS->nrSubbandSamples() % (itsCS1PS->nrPFFTaps() * itsCS1PS->nrChannelsPerSubband()) == 0);
-}
-
-DH_Subband::DH_Subband(const DH_Subband &that)
-  : DataHolder(that),
-    itsCS1PS(that.itsCS1PS),
-    itsNrStations(that.itsNrStations),
-    itsNrInputSamples(that.itsNrInputSamples),
-    itsSamples(0),
-    itsFlags(0),
-    itsDelays(0)
-{
-  setExtraBlob("Flags", 0);
-}
-
-DH_Subband::~DH_Subband()
-{
-  delete [] itsFlags;
-}
-
-LOFAR::DataHolder *DH_Subband::clone() const
-{
-  return new DH_Subband(*this);
-}
-
-void DH_Subband::init()
-{
-  addField("Samples", BlobField<uint8>(1, nrSamples() * sizeof(SampleType)), 32);
-  addField("Delays",  BlobField<float>(1, nrDelays() * sizeof(DelayIntervalType) / sizeof(float)));
-
-  itsFlags = new SparseSet<unsigned>[itsNrStations];
-
-  createDataBlock();
-}
-
-void DH_Subband::fillDataPointers()
-{
-  itsSamples = (SampleType *)	     getData<uint8> ("Samples");
-  itsDelays  = (DelayIntervalType *) getData<float> ("Delays");
-}
-
-
-void DH_Subband::fillExtraData()
-{
-  BlobOStream& bos = createExtraBlob();
-
-  for (unsigned stat = 0; stat < itsNrStations; stat ++)
-    itsFlags[stat].write(bos);
-}
-  
-
-void DH_Subband::getExtraData()
-{
-  BlobIStream &bis = getExtraBlob();
-  
-  for (unsigned stat = 0; stat < itsNrStations; stat ++)
-    itsFlags[stat].read(bis);
-}
-
-
-void DH_Subband::swapBytes()
-{
-  // only convert Samples; CEPframe converts Flags and Delays
-  dataConvert(LittleEndian, itsSamples, nrSamples());
-}
-
-} // namespace CS1
-} // namespace LOFAR
diff --git a/Appl/CEP/CS1/CS1_Interface/src/DH_Visibilities.cc b/Appl/CEP/CS1/CS1_Interface/src/DH_Visibilities.cc
index ad82dc3bfe8b54764796561090d0bea7c29c0c93..e2946509eca3d3eeb832f762969af5edbda13791 100644
--- a/Appl/CEP/CS1/CS1_Interface/src/DH_Visibilities.cc
+++ b/Appl/CEP/CS1/CS1_Interface/src/DH_Visibilities.cc
@@ -20,6 +20,8 @@
 
 #include <lofar_config.h>
 
+#if defined HAVE_APS
+
 #include <CS1_Interface/DH_Visibilities.h>
 #include <Common/Timer.h>
 
@@ -30,7 +32,8 @@ DH_Visibilities::DH_Visibilities(const string &name, const CS1_Parset *pSet)
 : DataHolder(name, "DH_Visibilities"),
   itsCS1PS  (pSet),
   itsVisibilities(0),
-  itsNrValidSamples(0)
+  itsNrValidSamples(0),
+  itsCentroids(0)
 {
   itsNrChannels       = itsCS1PS->nrChannelsPerSubband();
   unsigned nrStations = itsCS1PS->nrStations();
@@ -44,7 +47,8 @@ DH_Visibilities::DH_Visibilities(const DH_Visibilities &that)
   itsNrBaselines(that.itsNrBaselines),
   itsNrChannels(that.itsNrChannels),
   itsVisibilities(0),
-  itsNrValidSamples(0)
+  itsNrValidSamples(0),
+  itsCentroids(0)
 {
 }
 
@@ -60,7 +64,8 @@ DataHolder* DH_Visibilities::clone() const
 void DH_Visibilities::init()
 {
   addField("Visibilities",   BlobField<fcomplex>(1, getNrVisibilities()), 32);
-  addField("NrValidSamples", BlobField<NrValidSamplesType>(1, itsNrBaselines * itsNrChannels));
+  addField("NrValidSamples", BlobField<NrValidSamplesType>(1, itsNrBaselines * itsNrChannels), 32);
+  //addField("Centroids",      BlobField<CentroidsType>(1, itsNrBaselines), 32);
 
   createDataBlock();  // calls fillDataPointers
 }
@@ -111,8 +116,8 @@ asm(
 
 DH_Visibilities &DH_Visibilities::operator += (const DH_Visibilities &dh)
 {
-  NSTimer timer("DH_Vis add", true);
-  timer.start();
+  //NSTimer timer("DH_Vis add", true);
+  //timer.start();
 
 #if 1
   for (unsigned i = 0; i < getNrVisibilities(); i ++)
@@ -120,11 +125,15 @@ DH_Visibilities &DH_Visibilities::operator += (const DH_Visibilities &dh)
 
   for (unsigned i = 0; i < itsNrBaselines * itsNrChannels; i ++)
     itsNrValidSamples[i] += dh.itsNrValidSamples[i];
+
+  for (unsigned i = 0; i < itsNrBaselines; i ++) {
+    // add centroids here
+  }
 #else
   do_add(itsVisibilities, dh.itsVisibilities, getNrVisibilities());
 #endif
 
-  timer.stop();
+  //timer.stop();
   return *this;
 }
 
@@ -132,7 +141,10 @@ void DH_Visibilities::fillDataPointers()
 {
   itsVisibilities   = (VisibilityType *)     getData<fcomplex>("Visibilities");
   itsNrValidSamples = (NrValidSamplesType *) getData<NrValidSamplesType>("NrValidSamples");
+  //itsCentroids      = (CentroidsType *)      getData<CentroidsType>("Centroids");
 }
 
 } // namespace CS1
 } // namespace LOFAR
+
+#endif // defined HAVE_APS
diff --git a/Appl/CEP/CS1/CS1_Interface/src/Makefile.am b/Appl/CEP/CS1/CS1_Interface/src/Makefile.am
index 0422c431e56a43e41ea38a7d0f86eb0db59d85b6..ab586603702421b34a441346e86effe146d3c856 100644
--- a/Appl/CEP/CS1/CS1_Interface/src/Makefile.am
+++ b/Appl/CEP/CS1/CS1_Interface/src/Makefile.am
@@ -1,18 +1,14 @@
 lib_LTLIBRARIES			= libcs1_interface.la
 
 libcs1_interface_la_SOURCES	= \
-	bitset.cc \
+	BGL_Configuration.cc \
+	BGL_Mapping.cc \
+	CS1_Parset.cc \
 	DH_Delay.cc \
-	DH_RFI_Mitigation.cc \
-	DH_RSP.cc \
-	DH_RSPSync.cc \
-	DH_Subband.cc \
 	DH_Visibilities.cc \
 	RSPTimeStamp.cc \
 	Stub_BGL.cc \
-	Stub_Delay.cc \
-	CS1_Parset.cc \
-	Stub_RSP.cc
+	Stub_Delay.cc
 
 configfilesdir			= $(bindir)
 
diff --git a/Appl/CEP/CS1/CS1_Interface/src/SparseSet.cc b/Appl/CEP/CS1/CS1_Interface/src/SparseSet.cc
deleted file mode 100644
index 02ff41516a7c6d3fdb7609819e8304d47350aae2..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_Interface/src/SparseSet.cc
+++ /dev/null
@@ -1,240 +0,0 @@
-//#  SparseSet.h: portable <bitset> adaptation
-//#
-//#  Copyright (C) 2006
-//#  ASTRON (Netherlands Foundation for Research in Astronomy)
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-
-// #include "lofar_config.h"
-
-#include <CS1_Interface/SparseSet.h>
-
-#include <algorithm>
-#include <cassert>
-#include <cstring>
-#include <iostream>
-
-
-namespace LOFAR
-{
-
-struct less {
-  bool operator() (const SparseSet::range &x, const SparseSet::range &y)
-  {
-    return x.end < y.begin;
-  }
-};
-
-struct less_equal {
-  bool operator() (const SparseSet::range &x, const SparseSet::range &y)
-  {
-    return x.end <= y.begin;
-  }
-};
-
-
-SparseSet &SparseSet::include(unsigned first, unsigned last)
-{
-  if (first < last) {
-    // find two iterators that mark the first resp. last involved ranges
-    range r(first, last);
-    std::pair<std::vector<range>::iterator, std::vector<range>::iterator> iters = equal_range(ranges.begin(), ranges.end(), r, less());
-
-    if (iters.first == iters.second) {
-      // insert new tuple
-      ranges.insert(iters.first, r);
-    } else {
-      // combine with existing tuple(s)
-      iters.first->begin = std::min(first, iters.first->begin);
-      iters.first->end   = std::max(last , iters.second[-1].end);
-
-      ranges.erase(iters.first + 1, iters.second);
-    } 
-  }
-
-  return *this;
-}
-
-
-SparseSet &SparseSet::exclude(unsigned first, unsigned last)
-{
-  if (first < last) {
-    // find two iterators that mark the first resp. last involved ranges
-    // unlike in include(), a range that is adjacent to first or last is not
-    // considered to be involved, hence the use of less_equal()
-    std::pair<std::vector<range>::iterator, std::vector<range>::iterator> iters = equal_range(ranges.begin(), ranges.end(), range(first, last), less_equal());
-
-    if (iters.first != iters.second) { // check if there are tuples involved
-      if (iters.second - iters.first == 1 && first > iters.first->begin && last < iters.first->end) {
-	// split tuple
-	range r(last, iters.first->end);
-	iters.first->end = first;
-	ranges.insert(iters.second, r);
-      } else {
-	// possibly erase tuples
-	if (first > iters.first->begin)
-	  (iters.first ++)->end = first; // adjust first tuple; do not erase
-
-	if (last < iters.second[-1].end)
-	  (-- iters.second)->begin = last; // adjust last tuple; do not erase
-
-	ranges.erase(iters.first, iters.second);
-      }
-    }
-  }
-
-  return *this;
-}
-
-
-unsigned SparseSet::count() const
-{
-  unsigned count = 0;
-
-  for (std::vector<range>::const_iterator it = ranges.begin(); it != ranges.end(); it ++)
-    count += it->end - it->begin;
-
-  return count;
-}
-
-
-bool SparseSet::test(unsigned index) const
-{
-  std::vector<range>::const_iterator it = lower_bound(ranges.begin(), ranges.end(), range(index, index + 1), less_equal());
-  return it != ranges.end() && index >= it->begin;
-}
-
-
-SparseSet SparseSet::operator | (const SparseSet &other) const
-{
-  // iterate with two iterators over both sets, comparing the ranges to decide
-  // what to do: include a range from the first set, include a range from the
-  // second set, or merge (multiple) ranges from both sets.
-
-  SparseSet union_set;
-  std::vector<range>::const_iterator it1 = ranges.begin(), it2 = other.ranges.begin();
-
-  while (it1 != ranges.end() && it2 != other.ranges.end()) {
-    if (it1->end < it2->begin) {
-      union_set.ranges.push_back(*it1 ++); // no overlap; *it1 is the smallest
-    } else if (it2->end < it1->begin) {
-      union_set.ranges.push_back(*it2 ++); // no overlap; *it2 is the smallest
-    } else { // there is overlap, or it1 and it2 are contiguous
-      unsigned new_begin = std::min(it1->begin, it2->begin);
-
-      // check if subsequent ranges from set1 and set2 must be joined as well
-      while (1) {
-	if (it1 + 1 != ranges.end() && it1[1].begin <= it2->end) {
-	  ++ it1;
-	} else if (it2 + 1 != other.ranges.end() && it2[1].begin <= it1->end) {
-	  ++ it2;
-	} else {
-	  break;
-	}
-      }
-
-      union_set.ranges.push_back(range(new_begin, std::max(it1->end, it2->end)));
-      ++ it1, ++ it2;
-    }
-  }
-
-  // possibly append the remainder of the set that we have not finished yet
-  union_set.ranges.insert(union_set.ranges.end(), it1, ranges.end());
-  union_set.ranges.insert(union_set.ranges.end(), it2, other.ranges.end());
-  return union_set;
-}
-
-
-SparseSet &SparseSet::operator += (size_t count)
-{
-  for (std::vector<range>::iterator it = ranges.begin(); it != ranges.end(); it ++)
-    it->begin += count, it->end += count;
-
-  return *this;
-}
-
-
-SparseSet &SparseSet::operator -= (size_t count)
-{
-  assert(ranges.size() == 0 || ranges[0].begin >= count);
-
-  for (std::vector<range>::iterator it = ranges.begin(); it != ranges.end(); it ++)
-    it->begin -= count, it->end -= count;
-
-  return *this;
-}
-
-
-void SparseSet::write(BlobOStream &bos) const
-{
-  bos << (uint32) ranges.size();
-
-  for (std::vector<range>::const_iterator it = ranges.begin(); it != ranges.end(); it ++)
-    bos << (uint32) it->begin << (uint32) it->end;
-}
-
-
-void SparseSet::read(BlobIStream &bis)
-{
-  uint32 size, begin, end;
-
-  bis >> size;
-  ranges.resize(size);
-
-  for (std::vector<range>::iterator it = ranges.begin(); it != ranges.end(); it ++) {
-    bis >> begin >> end;
-    it->begin = begin;
-    it->end   = end;
-  }
-}
-
-
-ssize_t SparseSet::marshall(void *ptr, size_t maxSize) const
-{
-  size_t size = sizeof(uint32) + ranges.size() * sizeof(range);
-
-  if (size > maxSize)
-    return -1;
-
-  * (uint32 *) ptr = ranges.size();
-  memcpy((uint32 *) ptr + 1, &ranges[0], ranges.size() * sizeof(range));
-
-  return size;
-}
-
-
-void SparseSet::unmarshall(const void *ptr)
-{
-  ranges.resize(* (uint32 *) ptr);
-  memcpy(&ranges[0], (uint32 *) ptr + 1, ranges.size() * sizeof(range));
-}
-
-
-std::ostream &operator << (std::ostream &str, const SparseSet &set)
-{
-  for (std::vector<SparseSet::range>::const_iterator it = set.getRanges().begin(); it != set.getRanges().end(); it ++)
-    if (it->end == it->begin + 1)
-      str << '[' << it->begin << "] ";
-    else
-      str << '[' << it->begin << ".." << it->end << "> ";
-
-  return str;
-}
-
-} // namespace LOFAR
diff --git a/Appl/CEP/CS1/CS1_Interface/src/Stub_BGL.cc b/Appl/CEP/CS1/CS1_Interface/src/Stub_BGL.cc
index 1d70d3d9aca88898eddb9fb3d8c4b232583252e1..bc1ead4315a132e8d79d02e06cac484271c11a44 100644
--- a/Appl/CEP/CS1/CS1_Interface/src/Stub_BGL.cc
+++ b/Appl/CEP/CS1/CS1_Interface/src/Stub_BGL.cc
@@ -20,6 +20,8 @@
 
 #include <lofar_config.h>
 
+#if defined HAVE_TINYCEP && defined HAVE_APS
+
 #include <CS1_Interface/Stub_BGL.h>
 #include <Transport/BGLConnection.h>
 #include <Transport/TH_File.h>
@@ -52,27 +54,27 @@ Stub_BGL::~Stub_BGL()
 }
 
 
-void Stub_BGL::connect(unsigned cellNr, unsigned nodeNr, TinyDataManager &dm, unsigned channel)
+void Stub_BGL::connect(unsigned psetNr, unsigned coreNr, TinyDataManager &dm, unsigned channel)
 {
-  pair<unsigned, unsigned> index(cellNr, nodeNr);
+  pair<unsigned, unsigned> index(psetNr, coreNr);
 
-  ASSERTSTR(itsTHs.find(index) == itsTHs.end(), "already connected: cellNr = " << cellNr << ", nodeNr = " << nodeNr);
+  ASSERTSTR(itsTHs.find(index) == itsTHs.end(), "already connected: psetNr = " << psetNr << ", coreNr = " << coreNr);
    
   TransportHolder *th;
   string transportType = itsCS1PS->getString(itsPrefix + "_Transport");
 
   if (transportType == "TCP") {
-    string server  = itsCS1PS->getStringVector(itsPrefix + "_ServerHosts")[cellNr];
-    string service = itsCS1PS->getPortsOf(itsPrefix)[nodeNr];
+    string server  = itsCS1PS->getStringVector(itsPrefix + "_ServerHosts")[psetNr];
+    string service = itsCS1PS->getPortsOf(itsPrefix)[coreNr];
     th = itsIAmOnBGL ? new TH_Socket(server, service, false, Socket::TCP, false) : new TH_Socket(service, false, Socket::TCP, 5, false);
   } else if (transportType == "FILE") {
     string baseFileName = itsCS1PS->getString(itsPrefix + "_BaseFileName");
     char fileName[baseFileName.size() + 32];
-    sprintf(fileName, "%s.%u.%u", baseFileName.c_str(), cellNr, nodeNr);
+    sprintf(fileName, "%s.%u.%u", baseFileName.c_str(), psetNr, coreNr);
     th = new TH_File(string(fileName), itsIsInput ? TH_File::Read : TH_File::Write);
 #if 0
   } else if (transportType == "ZOID") {
-    th = itsIAmOnBGL ? TH_ZoidClient() : TH_ZoidServer(nodeNr);
+    th = itsIAmOnBGL ? TH_ZoidClient() : TH_ZoidServer(coreNr);
 #endif
   } else if (transportType == "NULL") {
     th = new TH_Null();
@@ -95,3 +97,5 @@ void Stub_BGL::connect(unsigned cellNr, unsigned nodeNr, TinyDataManager &dm, un
 
 } // namespace CS1
 } // namespace LOFAR
+
+#endif // defined HAVE_TINYCEP && defined HAVE_APS
diff --git a/Appl/CEP/CS1/CS1_Interface/src/Stub_Delay.cc b/Appl/CEP/CS1/CS1_Interface/src/Stub_Delay.cc
index d0cf0343831f208fbd0b48b4604e70904135f269..2bf947c82c56f3f4abb7e4a0b613164921c84e4d 100644
--- a/Appl/CEP/CS1/CS1_Interface/src/Stub_Delay.cc
+++ b/Appl/CEP/CS1/CS1_Interface/src/Stub_Delay.cc
@@ -20,6 +20,8 @@
 
 #include <lofar_config.h>
 
+#if defined HAVE_TINYCEP
+
 #include <CS1_Interface/Stub_Delay.h>
 #include <Transport/TH_Socket.h>
 #include <Transport/TH_File.h>
@@ -97,7 +99,11 @@ namespace LOFAR
 	}
       } else if (transportType == "NULL") { 
 	th = new TH_Null();
+      } else {
+	std::cerr << "Unrecognized transport type for OLAP.OLAP_Conn.input_DelayComp_Transport" << std::endl;
+	exit(1);
       }
+
       
       itsTHs[RSP_nr] = th;
       if (itsIsInput) {
@@ -111,3 +117,4 @@ namespace LOFAR
   } // namespace CS1
 } //namespace LOFAR
 
+#endif // defined HAVE_TINYCEP
diff --git a/Appl/CEP/CS1/CS1_Interface/src/bitset.cc b/Appl/CEP/CS1/CS1_Interface/src/bitset.cc
deleted file mode 100644
index a134d37f30217ff6a48b15bb16c12af34d831907..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_Interface/src/bitset.cc
+++ /dev/null
@@ -1,70 +0,0 @@
-//#  bitset.cc: 
-//#
-//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
-//#
-//#  This program is free software; you can redistribute it and/or modify
-//#  it under the terms of the GNU General Public License as published by
-//#  the Free Software Foundation; either version 2 of the License, or
-//#  (at your option) any later version.
-//#
-//#  This program is distributed in the hope that it will be useful,
-//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
-//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-//#  GNU General Public License for more details.
-//#
-//#  You should have received a copy of the GNU General Public License
-//#  along with this program; if not, write to the Free Software
-//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-//#
-//#  $Id$
-
-
-#include <lofar_config.h>
-
-#include <CS1_Interface/bitset.h>
-
-namespace LOFAR
-{
-  namespace CS1
-  {
-    const unsigned char _S_bit_count[256] = {
-      0, 1, 1, 2, 1, 2, 2, 3, 1, 2, 2, 3, 2, 3, 3, 4,
-      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-      1, 2, 2, 3, 2, 3, 3, 4, 2, 3, 3, 4, 3, 4, 4, 5,
-      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-      2, 3, 3, 4, 3, 4, 4, 5, 3, 4, 4, 5, 4, 5, 5, 6,
-      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-      3, 4, 4, 5, 4, 5, 5, 6, 4, 5, 5, 6, 5, 6, 6, 7,
-      4, 5, 5, 6, 5, 6, 6, 7, 5, 6, 6, 7, 6, 7, 7, 8,
-    };
-
-    const unsigned char _S_first_one[256] = {
-      0, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-      4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-      5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-      4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-      6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-      4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-      5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-      4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-      7, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-      4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-      5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-      4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-      6, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-      4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-      5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-      4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0,
-    };
-
-  } // namespace CS1
-
-} // namespace LOFAR
diff --git a/Appl/CEP/CS1/CS1_Run/Makefile.am b/Appl/CEP/CS1/CS1_Run/Makefile.am
index a4873c2d3ed19f4cab51688fb3362e45b1e3c293..c8dd1c206bbd4216265acf5005b50a0c4295ea60 100644
--- a/Appl/CEP/CS1/CS1_Run/Makefile.am
+++ b/Appl/CEP/CS1/CS1_Run/Makefile.am
@@ -1,4 +1,4 @@
-SUBDIRS=src test include
+SUBDIRS=src
 
 pkgextdir     = $(prefix)/config/$(PACKAGE)
 pkgext_DATA   = pkgext pkgextcppflags pkgextcxxflags pkgextldflags
diff --git a/Appl/CEP/CS1/CS1_Run/configure.in b/Appl/CEP/CS1/CS1_Run/configure.in
index 0553e67252f37b28dc4b8426ea5160bfccddbe9a..c4ef4405bb302f8d2792a89cef5dd85198b41274 100644
--- a/Appl/CEP/CS1/CS1_Run/configure.in
+++ b/Appl/CEP/CS1/CS1_Run/configure.in
@@ -59,10 +59,7 @@ dnl
 dnl Output Makefiles
 dnl
 AC_OUTPUT(
-include/Makefile
-include/CS1_Run/Makefile
 src/Makefile
-test/Makefile
 Makefile
 CS1_Run.spec
 )
diff --git a/Appl/CEP/CS1/CS1_Run/include/Makefile.am b/Appl/CEP/CS1/CS1_Run/include/Makefile.am
deleted file mode 100644
index 23e00549746cb3c746075a8b0928c69f45bbaa7f..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_Run/include/Makefile.am
+++ /dev/null
@@ -1,3 +0,0 @@
-SUBDIRS	= CS1_Run
-
-include $(top_srcdir)/Makefile.common
diff --git a/Appl/CEP/CS1/CS1_Run/src/CS1.parset b/Appl/CEP/CS1/CS1_Run/src/CS1.parset
index 2ec52cb12ce8fe6a09d5ddc1005383dbc0820cd4..0585c5ea91f288105d5e5e56c332151245064843 100644
--- a/Appl/CEP/CS1/CS1_Run/src/CS1.parset
+++ b/Appl/CEP/CS1/CS1_Run/src/CS1.parset
@@ -9,7 +9,7 @@
 # Do not change these hosts without changing the machinefile(s). Host on which TFC_DelayCompensation runs.
 
 OLAP.OLAP_Conn.AMCServerHost = localhost
-OLAP.DelayComp.hostname = list001
+OLAP.DelayComp.hostname = 10.181.0.3	# IONodes cannot resolve list*
 
 # The ports for all (socket)connections
 
@@ -17,31 +17,25 @@ OLAP.OLAP_Conn.AMCServerPort = 31337
 OLAP.DelayComp.ports = [7200..7263]
 OLAP.OLAP_Conn.input_BGLProc_Ports = [8100..8163]
 OLAP.OLAP_Conn.BGLProc_Storage_Ports = [8300..8363]
-OLAP.firstInputPortnr = 4346
 
-OLAP.OLAP_Conn.input_BGLProc_Transport = TCP # one of TCP, UDP, FILE, NULL
-OLAP.OLAP_Conn.input_BGLProc_BaseFileName = "/data/test123"
-
-OLAP.OLAP_Conn.BGLProc_Storage_Transport = TCP # one of TCP, UDP, FILE, NULL
-OLAP.OLAP_Conn.BGLProc_Storage_BaseFileName = "Correlations"
+OLAP.OLAP_Conn.BGLProc_Storage_Transport = NULL # one of TCP, UDP, FILE, NULL
+OLAP.OLAP_Conn.BGLProc_Storage_BaseFileName = "/cephome/romein/out"
 
 # should be one of NULL(inputFromMemory), FILE, TCP, UDP, ETHERNET
 OLAP.OLAP_Conn.station_Input_Transport = UDP
 
-OLAP.OLAP_Conn.input_DelayComp_Transport = TCP # one of TCP, NULL
+OLAP.OLAP_Conn.input_DelayComp_Transport = NULL # one of TCP, NULL
 
 # The format of the data
 
 # Variables for Storage
-OLAP.subbandsPerPset = 4
-OLAP.psetsPerStorage = 2
+OLAP.subbandsPerPset = 1
+OLAP.psetsPerStorage = 1
 OLAP.BGLProc.integrationSteps = 608  #768 at 200MHz
 OLAP.BGLProc.nrPPFTaps=16
-OLAP.BGLProc.psetsPerCell = 1
-OLAP.BGLProc.nodesPerPset = 16
+OLAP.BGLProc.coresPerPset = 16
 OLAP.BGLProc.maxConcurrentComm = 1
 
-OLAP.IONProc.useScatter = T
 OLAP.IONProc.useGather = T
 OLAP.IONProc.integrationSteps = 1
 
@@ -56,25 +50,29 @@ OLAP.DelayComp.positionType  = ITRF  # should be ITRF
 OLAP.IPHeaderSize = 32
 OLAP.EPAHeaderSize = 16
 OLAP.nrTimesInFrame = 16
-OLAP.nrSubbandsPerFrame = 48
+OLAP.nrSubbandsPerFrame = 12
 OLAP.nrBitsPerSample=16
-OLAP.nrSecondsOfBuffer = 20
-OLAP.delayCompensation = T
+OLAP.nrSecondsOfBuffer = 6
+OLAP.delayCompensation = F
 
 Observation.sampleClock = 160
-Observation.nyquistZone = 3    #160MHz-LBA&HBA nq=1; 200MHZ-LBA nq=3; 200MHz-HBA nq=2;
-Observation.Beam.angle1 = [6.1234876806221052] # Cas A
-Observation.Beam.angle2 = [1.0265153995604648]
+Observation.nyquistZone = 1    #160MHz-LBA&HBA nq=1; 200MHZ-LBA nq=3; 200MHz-HBA nq=2;
+#Observation.Beam.angle1 = [6.1234876806221052] # Cas A
+#Observation.Beam.angle2 = [1.0265153995604648]
+#Observation.Beam.angle1 = [0.9293405574] # pulsar
+#Observation.Beam.angle2 = [0.9525774347]
 #Observation.Beam.angle1 = [5.27962]            # Transient 
 #Observation.Beam.angle2 = [0.706858]
 #Observation.Beam.angle1 = [5.2336866848083394] # Cygnus
 #Observation.Beam.angle2 = [0.71094251447010637]
 #Observation.Beam.angle1 = [0]                  # NCP
-#Observation.Beam.angle2 = [90]
+#Observation.Beam.angle2 = [1.570796327]
 #Observation.Beam.angle1 = [4.5192832066722115] # Jupiter
 #Observation.Beam.angle2 = [5.893698795]
 #Observation.Beam.angle1 = [1.4596748494230258] # Taurus
 #Observation.Beam.angle2 = [0.38422502336661052]
+Observation.Beam.angle1 = [0]
+Observation.Beam.angle2 = [0]
 
 Observation.Beam.directionTypes = J2000
 Observation.nrBeams = 1
@@ -82,11 +80,11 @@ Observation.channelsPerSubband = 256
 Observation.nrPolarisations = 2
 Observation.year = 2007
 Observation.treeID = 00001
-Observation.subbandList = [65, 73, 81, 89, 98, 106, 114, 122, 130, 138, 146, 155, 163, 171, 179, 187, 195, 204, 212, 220, 228, 236, 244, 252, 261, 269, 277, 285, 293, 301, 309, 318, 326, 334, 342, 350, 358, 367, 375, 383, 391, 399, 407, 415, 424, 432, 440, 448]
+Observation.subbandList = [0,1]
+#Observation.subbandList = [195,198,201,204,208,211,214,218,221,224,228,231,234,238,241,244,247,251,254,258,261,264,267,271,274,277,282,283,287,291,294,297,301,304,307,319]
+
+#Observation.subbandList = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35]
+#Observation.subbandList = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47]
 
 Observation.VirtualInstrument.stationList = ['CS010', 'CS008', 'CS001', 'CS016']
 
-PIC.Core.CS010.inputNodeList = [lii001, lii001, lii002, lii003]
-PIC.Core.CS008.inputNodeList = [lii004, lii004, lii005, lii006]
-PIC.Core.CS001.inputNodeList = [lii007, lii007, lii008, lii009]
-PIC.Core.CS016.inputNodeList = [lii010, lii010, lii011, lii012]
diff --git a/Appl/CEP/CS1/CS1_Run/src/CS1_Hosts.py b/Appl/CEP/CS1/CS1_Run/src/CS1_Hosts.py
index c9b8d0947e03ca9f0e262a12977c37bd2d75adb4..17270cf63d72e62f2c07238e57a06fba7e6d8007 100644
--- a/Appl/CEP/CS1/CS1_Run/src/CS1_Hosts.py
+++ b/Appl/CEP/CS1/CS1_Run/src/CS1_Hosts.py
@@ -8,7 +8,7 @@ liifen.setSlavesByPattern('lii%03d', '10.162.0.%d', [1,2,3,4,5,6,7,8,9,10,11,12,
 
 listfen   = ClusterFEN(name = 'listfen'    ,
                        address = '129.125.99.50')
-listfen.setSlavesByPattern('list%03d', '10.181.0.%d', [1,2])
+listfen.setSlavesByPattern('list%03d', '10.181.0.%d', [3,4])
 #listfen.slaves.append(ClusterSlave('lifs001', '10.182.0.1'))
 #listfen.slaves.append(ClusterSlave('lifs002', '10.182.0.2'))
 #listfen.slaves.append(ClusterSlave('lifs003', '10.182.0.3'))
@@ -20,12 +20,15 @@ list001   = Host(name = 'list001'  , \
                  address = '10.181.0.1')
 list002   = Host(name = 'list002'  , \
                  address = '10.181.0.2')
+list003   = Host(name = 'list003'  , \
+                 address = '10.181.0.3')
 hpclf     = Host(name = 'hpclf'  , \
                  address = 'hpclf1.service.rug.nl')
 bglfen1   = Host(name = 'bglfen1', \
                  address = 'bglfen1.service.rug.nl')
 bglfen2   = Host(name = 'bglfen2', \
-                 address = 'bglfen2.service.rug.nl')
+                 #address = 'bglfen2.service.rug.nl')
+                 address = 'bglfen2')
 bglfen3   = Host(name = 'bglfen3', \
                  address = 'bglfen3.service.rug.nl')
 bglfen0   = Host(name = 'bglfen0', \
@@ -39,3 +42,124 @@ gels        = UserId(bglfen1,'gels')
 romein      = UserId(bglfen2,'romein')
 lofarsystem = UserId(bglfen3,'lofarsystem')
 broekema    = UserId(bglfen2,'broekema')
+
+
+IONodes = dict({ \
+  'R000_128_0' : [ \
+    '10.170.0.26', \
+    '10.170.0.28', \
+    '10.170.0.18', \
+    '10.170.0.20', \
+    '10.170.0.25', \
+    '10.170.0.27', \
+    '10.170.0.17', \
+    '10.170.0.19', \
+    '10.170.0.10', \
+    '10.170.0.12', \
+    '10.170.0.2', \
+    '10.170.0.4', \
+    '10.170.0.9', \
+    '10.170.0.11', \
+    '10.170.0.1', \
+    '10.170.0.3', \
+  ], \
+  'R000_128_3' : [ \
+    '10.170.0.122', \
+    '10.170.0.124', \
+    '10.170.0.114', \
+    '10.170.0.116', \
+    '10.170.0.121', \
+    '10.170.0.123', \
+    '10.170.0.113', \
+    '10.170.0.115', \
+    '10.170.0.106', \
+    '10.170.0.108', \
+    '10.170.0.98', \
+    '10.170.0.100', \
+    '10.170.0.105', \
+    '10.170.0.107', \
+    '10.170.0.97', \
+    '10.170.0.99', \
+  ], \
+  'R000_B04' : [ \
+    '10.170.0.34', \
+    '10.170.0.36', \
+    '10.170.0.33', \
+    '10.170.0.35', \
+  ], \
+  'R000_B06' : [ \
+    '10.170.0.50', \
+    '10.170.0.52', \
+    '10.170.0.49', \
+    '10.170.0.51', \
+  ], \
+  'R000_8' : [ \
+    '10.170.0.122', \
+    '10.170.0.124', \
+    '10.170.0.114', \
+    '10.170.0.116', \
+    '10.170.0.121', \
+    '10.170.0.123', \
+    '10.170.0.113', \
+    '10.170.0.115', \
+    '10.170.0.58', \
+    '10.170.0.60', \
+    '10.170.0.50', \
+    '10.170.0.52', \
+    '10.170.0.57', \
+    '10.170.0.59', \
+    '10.170.0.49', \
+    '10.170.0.51', \
+    '10.170.0.106', \
+    '10.170.0.108', \
+    '10.170.0.98', \
+    '10.170.0.100', \
+    '10.170.0.105', \
+    '10.170.0.107', \
+    '10.170.0.97', \
+    '10.170.0.99', \
+    '10.170.0.42', \
+    '10.170.0.44', \
+    '10.170.0.34', \
+    '10.170.0.36', \
+    '10.170.0.41', \
+    '10.170.0.43', \
+    '10.170.0.33', \
+    '10.170.0.35', \
+    '10.170.0.90', \
+    '10.170.0.92', \
+    '10.170.0.82', \
+    '10.170.0.84', \
+    '10.170.0.89', \
+    '10.170.0.91', \
+    '10.170.0.81', \
+    '10.170.0.83', \
+    '10.170.0.26', \
+    '10.170.0.28', \
+    '10.170.0.18', \
+    '10.170.0.20', \
+    '10.170.0.25', \
+    '10.170.0.27', \
+    '10.170.0.17', \
+    '10.170.0.19', \
+    '10.170.0.74', \
+    '10.170.0.76', \
+    '10.170.0.66', \
+    '10.170.0.68', \
+    '10.170.0.73', \
+    '10.170.0.75', \
+    '10.170.0.65', \
+    '10.170.0.67', \
+    '10.170.0.10', \
+    '10.170.0.12', \
+    '10.170.0.2', \
+    '10.170.0.4', \
+    '10.170.0.9', \
+    '10.170.0.11', \
+    '10.170.0.1', \
+    '10.170.0.3', \
+  ], \
+})
+
+IONodes['R000_128_0T'] = IONodes['R000_128_0']
+IONodes['R000_128_3T'] = IONodes['R000_128_3']
diff --git a/Appl/CEP/CS1/CS1_Run/src/CS1_Parset.py b/Appl/CEP/CS1/CS1_Run/src/CS1_Parset.py
index 230942ba694ff090715ac3d07b768015e5fcf820..2e53860a85876655cf72fa8536b8e6803e6b566e 100644
--- a/Appl/CEP/CS1/CS1_Run/src/CS1_Parset.py
+++ b/Appl/CEP/CS1/CS1_Run/src/CS1_Parset.py
@@ -21,6 +21,7 @@ class CS1_Parset(LOFAR_Parset.Parset):
         elif self.clock == '200MHz':
 	    self['Observation.sampleClock'] = 200
 	    self['OLAP.BGLProc.integrationSteps'] = 768
+	    #self['OLAP.BGLProc.integrationSteps'] = 16
         self.updateSBValues()
         
     def getClockString(self):
@@ -45,9 +46,8 @@ class CS1_Parset(LOFAR_Parset.Parset):
 	
 	for s in self.stationList:
 	    name = self.getString('PIC.Core.' + s.getName() + '.port')
-	    name=name.split(":")
-	    name=name[0].strip("lii")
-	    inputNodelist.append(int(name))
+	    name=name.split(":")[0]
+	    inputNodelist.append(name)
     
         return inputNodelist
 	
@@ -125,11 +125,10 @@ class CS1_Parset(LOFAR_Parset.Parset):
     def getMSName(self):
         return self['Observation.MSNameMask']
 
-    def getNCells(self):
+    def getNPsets(self):
         subbands = len(self.getInt32Vector('Observation.subbandList'))
-        psetspercell = self.getInt32('OLAP.BGLProc.psetsPerCell')
         subbandsperpset = self.getInt32('OLAP.subbandsPerPset')
-        return subbands / (psetspercell * subbandsperpset)
+        return subbands / subbandsperpset
 
     def updateSBValues(self):
         if self.clock == '160MHz':
diff --git a/Appl/CEP/CS1/CS1_Run/src/CS1_Run.py b/Appl/CEP/CS1/CS1_Run/src/CS1_Run.py
index ff6d8720921e1b463acfcdbae86f4df91c509651..450e834650c4fb917d6a84e207ec3270c2876b43 100755
--- a/Appl/CEP/CS1/CS1_Run/src/CS1_Run.py
+++ b/Appl/CEP/CS1/CS1_Run/src/CS1_Run.py
@@ -35,13 +35,15 @@ def doObservation(obsID, parset):
         print 'Invalid userId: ' + logname
 	sys.exit(1)
 
-    BGLPartition = ('R000_128_0', 'R000_128_0Z')[parset.getBool('OLAP.BGLProc.useZoid')]
+    #BGLPartition = 'R000_128_0T'
+    BGLPartition = 'R000_128_3T'
+    #BGLPartition = 'R000_B06'
+    #BGLPartition = 'R000_8'
 
     sections = [\
-        DelayCompensationSection(parset, list001),
-        InputSection(parset, liifen),
+        #DelayCompensationSection(parset, list003),
         BGLProcSection(parset, userId.getHost(), BGLPartition),
-        StorageSection(parset, listfen)
+        #StorageSection(parset, listfen)
         #Flagger(parset, listfen)
         ]
     
@@ -56,12 +58,15 @@ def doObservation(obsID, parset):
     logdir = '/log/'
     if not os.access(logdir, os.W_OK):
         logdir = './'
-    parset.writeToFile(logdir + obsID + '.parset')
+    logdirCommand = 'mkdir ' + logdir + obsID
+    if os.system(logdirCommand) != 0:
+        print 'Failed to create directory: ' + logdirstr
+    parset.writeToFile(logdir +'/' + obsID + '/' + obsID + '.parset')
 
     try:
         for section in sections:
             print ('Starting ' + section.package)
-            runlog = logdir + obsID + '.' + section.getName() + '.runlog'
+            runlog = logdir + obsID + '/' + section.getName() + '.runlog'
 
             # todo 27-10-2006 this is a temporary hack because storage doesn't close neatly.
             # This way all sections run longer than needed and storage stops before the rest does
@@ -93,16 +98,13 @@ if __name__ == '__main__':
     # do not use the callback actions of the OptionParser, because we must make sure we read the parset before adding any variables
     parser.add_option('--parset'         , dest='parset'         , default='CS1.parset', type='string', help='name of the parameterset [%default]')
     parser.add_option('--clock'          , dest='clock'          , default='160MHz'    , type='string', help='clock frequency (either 160MHz or 200MHz) [%default]')
-    parser.add_option('--subbands'       , dest='subbands'       , default='60MHz,8'   , type='string', help='freq of first subband and number of subbands to use [%default]')
+    parser.add_option('--subbands'       , dest='subbands'       , default='60MHz,36'   , type='string', help='freq of first subband and number of subbands to use [%default]')
     parser.add_option('--runtime'        , dest='runtime'        , default='600'       , type='int'   , help='length of measurement in seconds [%default]')
-    parser.add_option('--starttime'      , dest='starttime', default=int(time.time() + 80), type='int', help='start of measurement in UTC seconds [now + 80s]')
+    parser.add_option('--starttime'      , dest='starttime', default=int(time.time() + 90), type='int', help='start of measurement in UTC seconds [now + 90s]')
     parser.add_option('--integrationtime', dest='integrationtime', default='60'        , type='int'   , help='length of integration interval in seconds [%default]')
     parser.add_option('--msname'         , dest='msname'                               , type='string', help='name of the measurement set')
     parser.add_option('--stationlist'    , dest='stationlist'	 , default='CS10_4dipoles', type='string', help='name of the station or stationconfiguration (see CS1_Stations.py) [%default]')
     parser.add_option('--fakeinput'      , dest='fakeinput'      , action='count'                     , help='do not really read from the inputs, but from memory')
-    parser.add_option('--zoid'		 , dest='zoid'		 , action='store_true', default=True, help='use ZOID (default)')
-    parser.add_option('--nozoid'	 , dest='zoid'		 , action='store_false', help='do not use ZOID')
-
     # parse the options
     (options, args) = parser.parse_args()
 
@@ -111,14 +113,6 @@ if __name__ == '__main__':
 
     parset.readFromFile(options.parset)
 
-    parset['OLAP.BGLProc.useZoid'] = 'FT'[options.zoid == True]
-
-    if not options.zoid: # override CS1.parset
-	parset['OLAP.IONProc.useScatter']	 = 'F'
-	parset['OLAP.IONProc.useGather']	 = 'F'
-	parset['OLAP.BGLProc.nodesPerPset']	 = 8
-	parset['OLAP.IONProc.maxConcurrentComm'] = 2
-
     parset.setClock(options.clock)
     parset.setIntegrationTime(options.integrationtime)
     if options.msname:
@@ -160,6 +154,7 @@ if __name__ == '__main__':
     try:
 	inf = open(runningNumberFile, 'r')
 	measurementnumber = int(inf.readline())
+	print 'MS =', measurementnumber
 	inf.close()
 	parset['Observation.ObsID'] = measurementnumber
 	outf = open(runningNumberFile, 'w')
@@ -167,7 +162,6 @@ if __name__ == '__main__':
 	outf.close()
 	
 	dbfile = open(MSdatabaseFile, 'a')
-	nodesStr = str([1] * parset.getNCells() + [0] * (12 - parset.getNCells()))[1:-1]
 	dateStr = time.strftime('%Y %0m %0d %H %M %S', time.gmtime()).split()
 	MS = parset.getString('Observation.MSNameMask')
 	MS = MS.replace('${YEAR}', dateStr[0])
@@ -179,12 +173,12 @@ if __name__ == '__main__':
 	MS = MS.replace('${MSNUMBER}', '%05d' % parset['Observation.ObsID'])
 	MS = MS.replace('${SUBBAND}', '*')
 	
-	dbfile.write(MS + '\t' + ' '.join(dateStr[0:3]) + '\t' + nodesStr + '\n')
+	dbfile.write(MS + '\t' + ' '.join(dateStr[0:3]) + '\n')
 	dbfile.close()
     except:
+	print 'caught exception'
 	sys.exit(1)
 
-
     obsID = 'L' + dateStr[0] + '_' + '%05d' % measurementnumber
     
     # start the observation
diff --git a/Appl/CEP/CS1/CS1_Run/src/CS1_Sections.py b/Appl/CEP/CS1/CS1_Run/src/CS1_Sections.py
index aaeefac869ccdf740e4cd3c1cc82b19520f9155c..e83fc79c539e6de87b197887f6cd410c4acb865d 100644
--- a/Appl/CEP/CS1/CS1_Run/src/CS1_Sections.py
+++ b/Appl/CEP/CS1/CS1_Run/src/CS1_Sections.py
@@ -4,6 +4,7 @@ import time
 import os
 import copy
 import sys
+from CS1_Hosts import *
 
 class Section(object):
     """
@@ -76,17 +77,17 @@ class InputSection(Section):
         self.nrsp = parset.getInt32('OLAP.nrRSPboards')
 	
         nSubbands = len(parset.getInt32Vector('Observation.subbandList'))
-        nSubbandsPerCell = parset.getInt32('OLAP.subbandsPerPset') * parset.getInt32('OLAP.BGLProc.psetsPerCell')
-        nCells = float(nSubbands) / float(nSubbandsPerCell)
-        if not nSubbands % nSubbandsPerCell == 0:
-            raise Exception('Not a integer number of compute cells (nSubbands = %d and nSubbandsPerCell = %d)' % (nSubbands, nSubbandsPerCell))
-        self.nCells = int(nCells)
+        nSubbandsPerPset = parset.getInt32('OLAP.subbandsPerPset')
+        nPsets = float(nSubbands) / float(nSubbandsPerPset)
+        if not nSubbands % nSubbandsPerPset == 0:
+            raise Exception('subbands cannot be evenly divided over psets (nSubbands = %d and nSubbandsPerPset = %d)' % (nSubbands, nSubbandsPerPset))
+        self.nPsets = int(nPsets)
 
         host = copy.deepcopy(myhost)
         slaves = host.getSlaves()
 
         inputNodes = parset.getInputNodes()
-        outputNodes = range(1, self.nCells + 1)
+        outputNodes = range(1, self.nPsets + 1)
         allNodes = inputNodes + [node for node in outputNodes if not node in inputNodes]
     
         inputIndices = range(len(inputNodes))
@@ -117,17 +118,17 @@ class StorageSection(Section):
     def __init__(self, parset, host):
 
         nSubbands = len(parset.getInt32Vector('Observation.subbandList'))
-        nSubbandsPerCell = parset.getInt32('OLAP.subbandsPerPset')
+        nSubbandsPerPset = parset.getInt32('OLAP.subbandsPerPset')
         nPsetsPerStorage = parset.getInt32('OLAP.psetsPerStorage');
-        if not nSubbands % (nSubbandsPerCell * nPsetsPerStorage) == 0:
+        if not nSubbands % (nSubbandsPerPset * nPsetsPerStorage) == 0:
             raise Exception('Not a integer number of subbands per storage node!')
 
-        self.noProcesses = nSubbands / (nSubbandsPerCell * nPsetsPerStorage)
+        self.noProcesses = nSubbands / (nSubbandsPerPset * nPsetsPerStorage)
 
         Section.__init__(self, parset, \
                          'Appl/CEP/CS1/CS1_Storage', \
                          host = host, \
-                         buildvar = 'gnu32_openmpi-opt')
+                         buildvar = 'gnu_openmpi-opt')
 
         storageIPs = [s.getExtIP() for s in self.host.getSlaves(self.noProcesses * nPsetsPerStorage)]
         self.parset['OLAP.OLAP_Conn.BGLProc_Storage_ServerHosts'] = '[' + ','.join(storageIPs) + ']'
@@ -143,15 +144,26 @@ class BGLProcSection(Section):
         self.partition = partition
 
         nSubbands = len(parset.getInt32Vector('Observation.subbandList'))
-        nSubbandsPerCell = parset.getInt32('OLAP.subbandsPerPset') * parset.getInt32('OLAP.BGLProc.psetsPerCell')
+        nSubbandsPerPset = parset.getInt32('OLAP.subbandsPerPset')
 
-        if not nSubbands % nSubbandsPerCell == 0:
-            raise Exception('Not a integer number of compute cells!')
+        if not nSubbands % nSubbandsPerPset == 0:
+            raise Exception('subbands cannot be evenly divided over psets (nSubbands = %d and nSubbandsPerPset = %d)' % (nSubbands, nSubbandsPerPset))
 
-        nCells = nSubbands / nSubbandsPerCell
-        self.noProcesses = int(nCells) * parset.getInt32('OLAP.BGLProc.nodesPerPset') * parset.getInt32('OLAP.BGLProc.psetsPerCell')
+        nPsets = nSubbands / nSubbandsPerPset
+        self.noProcesses = int(nPsets) * parset.getInt32('OLAP.BGLProc.coresPerPset')
         self.noProcesses = 256 # The calculation above is not correct, because some ranks aren't used
 
+        inputNodes = parset.getInputNodes()
+	interfaces = IONodes.get(partition)
+	inputPsets = [interfaces.index(i) for i in inputNodes]
+	outputPsets = range(nPsets)
+	parset['OLAP.BGLProc.inputPsets']  = inputPsets
+	parset['OLAP.BGLProc.outputPsets'] = outputPsets
+	print 'inputNodes = ', inputNodes
+	print 'interfaces = ', interfaces
+	print 'inputPsets = ', inputPsets
+	print 'outputPsets = ', outputPsets
+
         Section.__init__(self, parset, \
                          'Appl/CEP/CS1/CS1_BGLProc', \
                          host = host, \
@@ -162,10 +174,10 @@ class BGLProcSection(Section):
 	self.executable = 'CS1_BGL_Processing'
         
     def run(self, runlog, noRuns, runCmd = None):
-        nodesPerCell = self.parset.getInt32('OLAP.BGLProc.nodesPerPset') * self.parset.getInt32('OLAP.BGLProc.psetsPerCell')
-        subbandsPerCell = self.parset.getInt32('OLAP.subbandsPerPset') * self.parset.getInt32('OLAP.BGLProc.psetsPerCell')
-        actualRuns = int(noRuns * subbandsPerCell / nodesPerCell)
-        if not actualRuns * nodesPerCell == noRuns * subbandsPerCell:
+        coresPerPset = self.parset.getInt32('OLAP.BGLProc.coresPerPset')
+        subbandsPerPset = self.parset.getInt32('OLAP.subbandsPerPset')
+        actualRuns = int(noRuns * subbandsPerPset / coresPerPset)
+        if not actualRuns * coresPerPset == noRuns * subbandsPerPset:
             raise Exception('illegal number of runs')
         Section.run(self, runlog, actualRuns, runCmd)        
 
diff --git a/Appl/CEP/CS1/CS1_Run/src/CS1_Stations.py b/Appl/CEP/CS1/CS1_Run/src/CS1_Stations.py
index c064a54ea82eaa3745cc756986857945edb79643..a0b193ad24bf73201f888484e348a155d23d3894 100644
--- a/Appl/CEP/CS1/CS1_Run/src/CS1_Stations.py
+++ b/Appl/CEP/CS1/CS1_Run/src/CS1_Stations.py
@@ -7,7 +7,7 @@ class Station(object):
     """
     def __init__(self, name):
         self.name = name
-        stationNumber = int(name.split('_')[0].split('CS')[-1])
+        #stationNumber = int(name.split('_')[0].split('CS')[-1])
     def getName(self):
         return self.name
 
@@ -99,6 +99,18 @@ CS032_dipole42 = [Station('CS032_dipole42')]
 CS032_dipole46 = [Station('CS032_dipole46')]
 CS032_4dipoles = CS032_dipole34 + CS032_dipole37 + CS032_dipole42 + CS032_dipole46
 
+CS001T_dipole0 = [Station('CS001T_dipole0')]
+CS001T_dipole4 = [Station('CS001T_dipole4')]
+CS001T_dipole8 = [Station('CS001T_dipole8')]
+CS001T_dipole12 = [Station('CS001T_dipole12')]
+CS001T_4dipoles = CS001T_dipole0 + CS001T_dipole4 + CS001T_dipole8 + CS001T_dipole12
+
+CS010T_dipole0 = [Station('CS010T_dipole0')]
+CS010T_dipole4 = [Station('CS010T_dipole4')]
+CS010T_dipole8 = [Station('CS010T_dipole8')]
+CS010T_dipole12 = [Station('CS010T_dipole12')]
+CS010T_4dipoles = CS010T_dipole0 + CS010T_dipole4 + CS010T_dipole8 + CS010T_dipole12
+
 CS010_us0  = [Station('CS010_us0')]
 CS010_us1  = [Station('CS010_us1')]
 CS010_us2  = [Station('CS010_us2')]
@@ -123,10 +135,15 @@ CS016_us2  = [Station('CS016_us2')]
 CS016_us3 =  [Station('CS016_us3')]
 CS016_4us = CS016_us0 + CS016_us1 + CS016_us2 + CS016_us3
 
+B06_0 = [Station('B06_0')]
+
 AllMicroStations = CS010_4us + CS001_4us + CS008_4us + CS016_4us
 AllDipoles0_4_8_12 = CS010_4dipoles0_4_8_12 + CS001_4dipoles0_4_8_12 + CS008_4dipoles0_4_8_12 + CS016_4dipoles0_4_8_12
 AllDipolesMixed = CS010_4dipoles0_4_8_12 + CS001_4dipoles2_6_10_14 + CS008_4dipoles2_6_10_14 + CS016_4dipoles2_6_10_14
 AllHBAs = CS010_HBAs + CS001_HBAs + CS008_HBAs + CS016_HBAs
 Mixed = CS010_us0 + CS010_dipole4 + CS010_dipole8 + CS010_dipole12 + CS001_4dipoles0_4_8_12 + CS008_4dipoles0_4_8_12 + CS016_4dipoles0_4_8_12
 All24Dipoles = CS001_4dipoles0_4_8_12 + CS008_4dipoles0_4_8_12 + CS016_4dipoles0_4_8_12 + CS030_4dipoles + CS031_4dipoles + CS032_4dipoles
-Test = CS001_4dipoles0_4_8_12 + CS008_4dipoles0_4_8_12 + CS016_4dipoles0_4_8_12
+Generator = CS010_HBA0 + CS010_HBA2 + CS010_HBA3 + CS008_HBA0 + CS008_HBA2 + CS008_HBA3 + CS001_HBA0 + CS001_HBA2 + CS001_HBA3 + CS016_HBA0 + CS016_HBA2 + CS016_HBA3
+#Test = CS001T_4dipoles + CS001_4dipoles0_4_8_12
+#Test = CS001T_4dipoles + CS010_4dipoles0_4_8_12 + CS001_4dipoles0_4_8_12 + CS008_4dipoles0_4_8_12 + CS016_dipole0 + CS016_dipole4 + CS016_dipole8
+Test = CS001T_4dipoles + CS001_4dipoles0_4_8_12 + CS008_4dipoles0_4_8_12 + CS016_dipole0 + CS016_dipole4 + CS016_dipole8
diff --git a/Appl/CEP/CS1/CS1_Run/src/LOFAR_Jobs.py b/Appl/CEP/CS1/CS1_Run/src/LOFAR_Jobs.py
index 61ec19c378f651e56c2b2b49c9764d1bf26a17d9..0e748596c7efd996f6941bddddd3c6cf5cbef51c 100644
--- a/Appl/CEP/CS1/CS1_Run/src/LOFAR_Jobs.py
+++ b/Appl/CEP/CS1/CS1_Run/src/LOFAR_Jobs.py
@@ -14,6 +14,7 @@ class Job(object):
         self.executable = executable
         self.remoteRunLog = self.workingDir + '/run.' + name + '.log'
         self.runlog = None
+	self.BGLpartition = None
 
     def run(self, runlog, parsetfile, timeOut, noRuns, runCmd = None):
         self.runlog = runlog
@@ -22,6 +23,9 @@ class Job(object):
 	tmp = self.workingDir + '/LOFAR/Appl/CEP/CS1/' + self.name + '/src/' + self.name + '.log_prop'
 	self.host.sput(tmp, '~/')
 	self.host.sput('OLAP.parset', '~/')
+	if (self.name == 'CS1_BGLProc'):
+	    self.BGLpartition = runCmd.split(' ')[2]
+
         if runCmd == None:
             runCmd = self.executable
         self.runCommand = self.host.executeAsync('( cd ~ ; ' + runCmd + ' ' + parsetfile.split('/')[2] + ' ' + str(noRuns) + ') &> ' + self.remoteRunLog, timeout = timeOut)
@@ -39,7 +43,12 @@ class Job(object):
     def isSuccess(self):
         self.waitForDone()
         if not self.runLogRetreived:
-	    
+	    if (self.name == 'CS1_BGLProc'):
+		interfaces = IONodes.get(self.BGLpartition)
+		for i in range(0, len(interfaces)):
+		    remoteRunLogIONProc = self.workingDir + '/run.CS1_IONProc.' + str(i)
+		    runlogIOProc = '/' + self.runlog.split('/')[1]+ '/' + self.runlog.split('/')[2]+ '/CS1_IONProc.' + str(i) + '.runlog'
+		    self.host.sget(remoteRunLogIONProc, runlogIOProc)
             self.host.sget(self.remoteRunLog, self.runlog)
             self.runLogRetreived = True
         return self.runCommand.isSuccess()
diff --git a/Appl/CEP/CS1/CS1_Run/src/OLAP.parset b/Appl/CEP/CS1/CS1_Run/src/OLAP.parset
index 854208b32b7d5725aeeff6f8e6fe748877ee72e4..1e3714e789742cb757902160d98501d9078cb089 100644
--- a/Appl/CEP/CS1/CS1_Run/src/OLAP.parset
+++ b/Appl/CEP/CS1/CS1_Run/src/OLAP.parset
@@ -169,145 +169,171 @@ PIC.Core.CS032_dipole45.position = [0.119886666319, 0.92026152339, 6364621.27747
 PIC.Core.CS032_dipole46.position = [0.11988492099, 0.920261523389, 6364621.27547]
 PIC.Core.CS032_dipole47.position = [0.119886317254, 0.920261523385, 6364621.26847]
 
-PIC.Core.CS010_HBA0.position = [0.119881248042,  0.920262545738, 6364621.18843]
-PIC.Core.CS010_HBA1.position = [0.119888779622,  0.920265564336, 6364621.16629]
-PIC.Core.CS010_HBA2.position = [0.119879764027,  0.920262155263, 6364621.23847]
-PIC.Core.CS010_HBA3.position = [0.119885557551,  0.920264027099, 6364621.15694]
+PIC.Core.CS001_dipole0.port  = 10.170.0.2:4346
+PIC.Core.CS001_dipole4.port  = 10.170.0.4:4347
+PIC.Core.CS001_dipole8.port  = 10.170.0.1:4348
+PIC.Core.CS001_dipole12.port = 10.170.0.3:4349
+
+PIC.Core.CS001_dipole2.port  = 10.170.0.2:4346
+PIC.Core.CS001_dipole6.port  = 10.170.0.4:4347
+PIC.Core.CS001_dipole10.port = 10.170.0.1:4348
+PIC.Core.CS001_dipole14.port = 10.170.0.3:4349
+
+PIC.Core.CS008_dipole0.port  = 10.170.0.10:4346
+PIC.Core.CS008_dipole4.port  = 10.170.0.9:4347
+PIC.Core.CS008_dipole8.port  = 10.170.0.12:4348
+PIC.Core.CS008_dipole12.port = 10.170.0.11:4349
+
+PIC.Core.CS008_dipole2.port  = 10.170.0.10:4346
+PIC.Core.CS008_dipole6.port  = 10.170.0.9:4347
+PIC.Core.CS008_dipole10.port = 10.170.0.12:4348
+PIC.Core.CS008_dipole14.port = 10.170.0.11:4349
+
+PIC.Core.CS010_dipole0.port  = 10.170.0.18:4346
+PIC.Core.CS010_dipole4.port  = 10.170.0.17:4347
+PIC.Core.CS010_dipole8.port  = 10.170.0.20:4348
+PIC.Core.CS010_dipole12.port = 10.170.0.19:4349
+
+PIC.Core.CS016_dipole0.port  = 10.170.0.42:4346
+PIC.Core.CS016_dipole4.port  = 10.170.0.41:4347
+PIC.Core.CS016_dipole8.port  = 10.170.0.44:4348
+PIC.Core.CS016_dipole12.port = 10.170.0.43:4349
+
+PIC.Core.CS016_dipole2.port  = 10.170.0.42:4346
+PIC.Core.CS016_dipole6.port  = 10.170.0.41:4347
+PIC.Core.CS016_dipole10.port = 10.170.0.44:4348
+PIC.Core.CS016_dipole14.port = 10.170.0.43:4349
+
+PIC.Core.CS030_dipole0.port  = 10.170.0.18:4346
+PIC.Core.CS030_dipole7.port  = 10.170.0.17:4347
+PIC.Core.CS030_dipole10.port = 10.170.0.20:4348
+PIC.Core.CS030_dipole14.port = 10.170.0.19:4349
+
+PIC.Core.CS031_dipole16.port = 10.170.0.26:4346
+PIC.Core.CS031_dipole22.port = 10.170.0.25:4347
+PIC.Core.CS031_dipole24.port = 10.170.0.28:4348
+PIC.Core.CS031_dipole31.port = 10.170.0.27:4349
+
+PIC.Core.CS032_dipole34.port = 10.170.0.34:4346
+PIC.Core.CS032_dipole37.port = 10.170.0.33:4347
+PIC.Core.CS032_dipole42.port = 10.170.0.36:4348
+PIC.Core.CS032_dipole46.port = 10.170.0.35:4349
 
-PIC.Core.CS010_HBA0.port  = lii005:4346
-PIC.Core.CS010_HBA1.port  = lii005:4347
-PIC.Core.CS010_HBA2.port  = lii006:4348
-PIC.Core.CS010_HBA3.port  = lii006:4349
-
-PIC.Core.CS010_S30.position = [0.119892920416, 0.92026905084 , 6364619.58352]
-PIC.Core.CS010_S31.position = [0.119895877779, 0.920274150605, 6364619.46855]
-PIC.Core.CS010_S32.position = [0.119904798351, 0.920274053467, 6364619.47055]
-PIC.Core.CS010_S33.position = [0.119907416345, 0.920268856583, 6364619.62752]
-
-PIC.Core.CS010_S30.port  = lii005:4346
-PIC.Core.CS010_S31.port  = lii005:4347
-PIC.Core.CS010_S32.port  = lii006:4348
-PIC.Core.CS010_S33.port  = lii006:4349
+PIC.Core.CS001_HBA0.position = [0.119852973708, 0.920234762967, 6364622.31931]
+PIC.Core.CS001_HBA1.position = [0.119856867247, 0.920239170623, 6364622.22858]
+PIC.Core.CS001_HBA2.position = [0.119864144785, 0.920236812588, 6364622.27712]
+PIC.Core.CS001_HBA3.position = [0.119860251246, 0.920232404931, 6364622.36784]
 
 PIC.Core.CS008_HBA0.position = [0.11987012884 , 0.920289237881, 6364620.52801]
 PIC.Core.CS008_HBA1.position = [0.119874022864, 0.920293645541, 6364620.43728]
 PIC.Core.CS008_HBA2.position = [0.119898033261, 0.920287307739, 6364620.56774]
 PIC.Core.CS008_HBA3.position = [0.119894139238, 0.92028290008 , 6364620.65846]
 
-PIC.Core.CS008_HBA0.port  = lii003:4346
-PIC.Core.CS008_HBA1.port  = lii003:4347
-PIC.Core.CS008_HBA2.port  = lii004:4348
-PIC.Core.CS008_HBA3.port  = lii004:4349
-
-PIC.Core.CS001_HBA0.position = [0.119852973708, 0.920234762967, 6364622.31931]
-PIC.Core.CS001_HBA1.position = [0.119856867247, 0.920239170623, 6364622.22858]
-PIC.Core.CS001_HBA2.position = [0.119864144785, 0.920236812588, 6364622.27712]
-PIC.Core.CS001_HBA3.position = [0.119860251246, 0.920232404931, 6364622.36784]
-
-PIC.Core.CS001_HBA0.port  = lii001:4346
-PIC.Core.CS001_HBA1.port  = lii001:4347
-PIC.Core.CS001_HBA2.port  = lii002:4348
-PIC.Core.CS001_HBA3.port  = lii002:4349
+PIC.Core.CS010_HBA0.position = [0.119881248042,  0.920262545738, 6364621.18843]
+PIC.Core.CS010_HBA1.position = [0.119888779622,  0.920265564336, 6364621.16629]
+PIC.Core.CS010_HBA2.position = [0.119879764027,  0.920262155263, 6364621.23847]
+PIC.Core.CS010_HBA3.position = [0.119885557551,  0.920264027099, 6364621.15694]
 
 PIC.Core.CS016_HBA0.position = [0.119960298852, 0.920262342708, 6364621.17161]
 PIC.Core.CS016_HBA1.position = [0.11996419336 , 0.92026674988 , 6364621.08089]
 PIC.Core.CS016_HBA2.position = [0.119971470898, 0.920264391844, 6364621.12943]
 PIC.Core.CS016_HBA3.position = [0.11996757639 , 0.920259984186, 6364621.22015]
 
-PIC.Core.CS016_HBA0.port  = lii011:4346
-PIC.Core.CS016_HBA1.port  = lii011:4347
-PIC.Core.CS016_HBA2.port  = lii012:4348
-PIC.Core.CS016_HBA3.port  = lii012:4349
+PIC.Core.CS001_HBA0.port  = 10.170.0.2:4346
+PIC.Core.CS001_HBA1.port  = 10.170.0.4:4347
+PIC.Core.CS001_HBA2.port  = 10.170.0.1:4348
+PIC.Core.CS001_HBA3.port  = 10.170.0.3:4349
+
+PIC.Core.CS008_HBA0.port  = 10.170.0.10:4346
+PIC.Core.CS008_HBA1.port  = 10.170.0.9:4347
+PIC.Core.CS008_HBA2.port  = 10.170.0.12:4348
+PIC.Core.CS008_HBA3.port  = 10.170.0.11:4349
+
+PIC.Core.CS010_HBA0.port  = 10.170.0.18:4346
+PIC.Core.CS010_HBA1.port  = 10.170.0.17:4347
+PIC.Core.CS010_HBA2.port  = 10.170.0.20:4348
+PIC.Core.CS010_HBA3.port  = 10.170.0.19:4349
+
+PIC.Core.CS016_HBA0.port  = 10.170.0.42:4346
+PIC.Core.CS016_HBA1.port  = 10.170.0.41:4347
+PIC.Core.CS016_HBA2.port  = 10.170.0.44:4348
+PIC.Core.CS016_HBA3.port  = 10.170.0.43:4349
+
+PIC.Core.CS010_S30.position = [0.119892920416, 0.92026905084 , 6364619.58352]
+PIC.Core.CS010_S31.position = [0.119895877779, 0.920274150605, 6364619.46855]
+PIC.Core.CS010_S32.position = [0.119904798351, 0.920274053467, 6364619.47055]
+PIC.Core.CS010_S33.position = [0.119907416345, 0.920268856583, 6364619.62752]
+
+PIC.Core.CS010_S30.port  = 10.170.0.18:4346
+PIC.Core.CS010_S31.port  = 10.170.0.17:4347
+PIC.Core.CS010_S32.port  = 10.170.0.20:4348
+PIC.Core.CS010_S33.port  = 10.170.0.19:4349
 
 #PIC.Core.CS010_us0.position = [0.119880751593, 0.920263316053, 6364621.18657]
 # For CS10_us0 (48 dipoles) the physical position and phase centre are equal
-PIC.Core.CS010_us0.position = [0.119884530715, 0.920263520535, 6364621.19236]
-PIC.Core.CS010_us1.position = [0.11988976137 , 0.920265828067, 6364621.16087]
-PIC.Core.CS010_us2.position = [0.119878900574, 0.920261296557, 6364621.25414]
-PIC.Core.CS010_us3.position = [0.119884742094, 0.920264493858, 6364621.16333]
+PIC.Core.CS001_us0.position = [0.119852074863, 0.920235868407, 6364622.30655]
+PIC.Core.CS001_us1.position = [0.119858692085, 0.920239715084, 6364622.21637]
+PIC.Core.CS001_us2.position = [0.119865043629, 0.920235707148, 6364622.29187]
+PIC.Core.CS001_us3.position = [0.119858426407, 0.920231860473, 6364622.38405]
 
 PIC.Core.CS008_us0.position = [0.119869229995, 0.920290343316, 6364620.50225]
 PIC.Core.CS008_us1.position = [0.119875847702, 0.920294190008, 6364620.43708]
 PIC.Core.CS008_us2.position = [0.119898932106, 0.920286202343, 6364620.66949]
 PIC.Core.CS008_us3.position = [0.119892314399, 0.920282356152, 6364620.76366]
 
-PIC.Core.CS001_us0.position = [0.119852074863, 0.920235868407, 6364622.30655]
-PIC.Core.CS001_us1.position = [0.119858692085, 0.920239715084, 6364622.21637]
-PIC.Core.CS001_us2.position = [0.119865043629, 0.920235707148, 6364622.29187]
-PIC.Core.CS001_us3.position = [0.119858426407, 0.920231860473, 6364622.38405]
+PIC.Core.CS010_us0.position = [0.119884530715, 0.920263520535, 6364621.19236]
+PIC.Core.CS010_us1.position = [0.11988976137 , 0.920265828067, 6364621.16087]
+PIC.Core.CS010_us2.position = [0.119878900574, 0.920261296557, 6364621.25414]
+PIC.Core.CS010_us3.position = [0.119884742094, 0.920264493858, 6364621.16333]
 
 PIC.Core.CS016_us0.position = [0.119959400492, 0.920263448231, 6364621.32185]
 PIC.Core.CS016_us1.position = [0.119966018199, 0.92026729443 , 6364621.24368]
 PIC.Core.CS016_us2.position = [0.119972369258, 0.9202632865  , 6364621.33318]
 PIC.Core.CS016_us3.position = [0.119965751066, 0.920259440307, 6364621.42235]
 
-PIC.Core.CS010_us0.port  = lii005:4346
-PIC.Core.CS010_us1.port  = lii005:4347
-PIC.Core.CS010_us2.port  = lii006:4348
-PIC.Core.CS010_us3.port  = lii006:4349
-
-PIC.Core.CS008_us0.port  = lii003:4346
-PIC.Core.CS008_us1.port  = lii003:4347
-PIC.Core.CS008_us2.port  = lii004:4348
-PIC.Core.CS008_us3.port  = lii004:4349
-
-PIC.Core.CS001_us0.port  = lii001:4346
-PIC.Core.CS001_us1.port  = lii001:4347
-PIC.Core.CS001_us2.port  = lii002:4348
-PIC.Core.CS001_us3.port  = lii002:4349
-
-PIC.Core.CS016_us0.port  = lii011:4346
-PIC.Core.CS016_us1.port  = lii011:4347
-PIC.Core.CS016_us2.port  = lii012:4348
-PIC.Core.CS016_us3.port  = lii012:4349
-
-PIC.Core.CS001_dipole0.port  = lii001:4346
-PIC.Core.CS001_dipole4.port  = lii001:4347
-PIC.Core.CS001_dipole8.port  = lii002:4348
-PIC.Core.CS001_dipole12.port = lii002:4349
-
-PIC.Core.CS001_dipole2.port  = lii001:4346
-PIC.Core.CS001_dipole6.port  = lii001:4347
-PIC.Core.CS001_dipole10.port = lii002:4348
-PIC.Core.CS001_dipole14.port = lii002:4349
-
-PIC.Core.CS008_dipole0.port  = lii003:4346
-PIC.Core.CS008_dipole4.port  = lii003:4347
-PIC.Core.CS008_dipole8.port  = lii004:4348
-PIC.Core.CS008_dipole12.port = lii004:4349
-
-PIC.Core.CS008_dipole2.port  = lii003:4346
-PIC.Core.CS008_dipole6.port  = lii003:4347
-PIC.Core.CS008_dipole10.port = lii004:4348
-PIC.Core.CS008_dipole14.port = lii004:4349
-
-PIC.Core.CS010_dipole0.port  = lii005:4346
-PIC.Core.CS010_dipole4.port  = lii005:4347
-PIC.Core.CS010_dipole8.port  = lii006:4348
-PIC.Core.CS010_dipole12.port = lii006:4349
-
-PIC.Core.CS016_dipole0.port  = lii011:4346
-PIC.Core.CS016_dipole4.port  = lii011:4347
-PIC.Core.CS016_dipole8.port  = lii012:4348
-PIC.Core.CS016_dipole12.port = lii012:4349
-
-PIC.Core.CS016_dipole2.port  = lii011:4346
-PIC.Core.CS016_dipole6.port  = lii011:4347
-PIC.Core.CS016_dipole10.port = lii012:4348
-PIC.Core.CS016_dipole14.port = lii012:4349
-
-PIC.Core.CS030_dipole0.port  = lii005:4346
-PIC.Core.CS030_dipole7.port  = lii005:4347
-PIC.Core.CS030_dipole10.port = lii006:4348
-PIC.Core.CS030_dipole14.port = lii006:4349
-
-PIC.Core.CS031_dipole16.port = lii007:4346
-PIC.Core.CS031_dipole22.port = lii007:4347
-PIC.Core.CS031_dipole24.port = lii008:4348
-PIC.Core.CS031_dipole31.port = lii008:4349
-
-PIC.Core.CS032_dipole34.port = lii009:4346
-PIC.Core.CS032_dipole37.port = lii009:4347
-PIC.Core.CS032_dipole42.port = lii010:4348
-PIC.Core.CS032_dipole46.port = lii010:4349
-
+PIC.Core.CS001_us0.port  = 10.170.0.2:4346
+PIC.Core.CS001_us1.port  = 10.170.0.4:4347
+PIC.Core.CS001_us2.port  = 10.170.0.1:4348
+PIC.Core.CS001_us3.port  = 10.170.0.3:4349
+
+PIC.Core.CS008_us0.port  = 10.170.0.10:4346
+PIC.Core.CS008_us1.port  = 10.170.0.9:4347
+PIC.Core.CS008_us2.port  = 10.170.0.12:4348
+PIC.Core.CS008_us3.port  = 10.170.0.11:4349
+
+PIC.Core.CS010_us0.port  = 10.170.0.18:4346
+PIC.Core.CS010_us1.port  = 10.170.0.17:4347
+PIC.Core.CS010_us2.port  = 10.170.0.20:4348
+PIC.Core.CS010_us3.port  = 10.170.0.19:4349
+
+PIC.Core.CS016_us0.port  = 10.170.0.42:4346
+PIC.Core.CS016_us1.port  = 10.170.0.41:4347
+PIC.Core.CS016_us2.port  = 10.170.0.44:4348
+PIC.Core.CS016_us3.port  = 10.170.0.43:4349
+
+PIC.Core.CS001T.phaseCenter = [0.111617470856, 0.918506058083, 6364659.82025]
+PIC.Core.CS010T.phaseCenter = [0.111620612449, 0.918506058083, 6364659.82025]
+
+PIC.Core.CS001T_dipole0.position  = [0.111617470856, 0.918506058083, 6364659.82025]
+PIC.Core.CS001T_dipole4.position  = [0.111618168988, 0.918506058083, 6364659.82025]
+PIC.Core.CS001T_dipole8.position  = [0.111618867120, 0.918506058083, 6364659.82025]
+PIC.Core.CS001T_dipole12.position  = [0.111619739784, 0.918506058083, 6364659.82025]
+
+PIC.Core.CS010T_dipole0.position  = [0.111620612449, 0.918506058083, 6364659.82025]
+PIC.Core.CS010T_dipole4.position  = [0.111621310581, 0.918506058083, 6364659.82025]
+PIC.Core.CS010T_dipole8.position  = [0.111622008713, 0.918506058083, 6364659.82025]
+PIC.Core.CS010T_dipole12.position  = [0.111622881377, 0.918506058083, 6364659.82025]
+
+PIC.Core.CS001T_dipole0.port = 10.170.0.121:4346
+PIC.Core.CS001T_dipole4.port = 10.170.0.123:4347
+PIC.Core.CS001T_dipole8.port = 10.170.0.113:4348
+PIC.Core.CS001T_dipole12.port = 10.170.0.115:4349
+
+PIC.Core.CS010T_dipole0.port = 10.170.0.122:4346
+PIC.Core.CS010T_dipole4.port = 10.170.0.124:4347
+PIC.Core.CS010T_dipole8.port = 10.170.0.114:4348
+PIC.Core.CS010T_dipole12.port = 10.170.0.116:4349
+
+PIC.Core.B06.phaseCenter     = [0.119884530715, 0.920263520535, 6364621.19236]
+PIC.Core.B06_0.position      = [0.119884530715, 0.920263520535, 6364621.19236]
+PIC.Core.B06_0.port          = 10.170.0.50:4346
diff --git a/Appl/CEP/CS1/CS1_Run/test/Makefile.am b/Appl/CEP/CS1/CS1_Run/test/Makefile.am
deleted file mode 100644
index 75f0fcc64b9eb84bbb32b8bc80bd37abc98173c8..0000000000000000000000000000000000000000
--- a/Appl/CEP/CS1/CS1_Run/test/Makefile.am
+++ /dev/null
@@ -1,30 +0,0 @@
-check_PROGRAMS        =
-
-# programs to run through supplied checktools
-CHECKTOOLPROGS        =
-#ENDCHECKTOOLPROGS
-
-# scripts used to run tests
-TESTSCRIPTS           =
-
-# scripts and possible programs to run tests
-TESTS	              = $(TESTSCRIPTS)
-
-XFAIL_TESTS           =
-
-# all files (.run, .stdout, .in, .log_prop, etc.) needed to run tests
-EXTRA_DIST            = $(TESTSCRIPTS)
-
-# Lines to build a test program testprg
-#testprg_SOURCES		= testprg.cc
-#testprg_LDADD			= ../src/libcs1_run.la
-#testprg_DEPENDENCIES		= ../src/libcs1_run.la $(LOFAR_DEPEND)
-
-TESTS_ENVIRONMENT		= lofar_sharedir=$(lofar_sharedir) \
-				  srcdir=$(srcdir) \
-				  LOFARROOT=$(LOFARROOT) \
-				  LOFARDATAROOT=$(LOFARDATAROOT) \
-				  MPIBIN=$(MPIBIN)
-
-
-include $(top_srcdir)/Makefile.common
diff --git a/Appl/CEP/CS1/CS1_Storage/include/CS1_Storage/Makefile.am b/Appl/CEP/CS1/CS1_Storage/include/CS1_Storage/Makefile.am
index 02e021caf25f40342a1766ffde75dd61a2f09682..607889cd4de7d446c16e1fa20156df02a3b67741 100644
--- a/Appl/CEP/CS1/CS1_Storage/include/CS1_Storage/Makefile.am
+++ b/Appl/CEP/CS1/CS1_Storage/include/CS1_Storage/Makefile.am
@@ -1,7 +1,21 @@
-pkginclude_HEADERS =    	\
+# if HAVE_SHMEM
+# SUBDIRS = shmem .
+# endif
+
+INSTHDRS =    	\
 AH_Storage.h	\
 WH_SubbandWriter.h   	\
 MSWriter.h     	\
 MSWriterImpl.h
 
+NOINSTHDRS =
+
+TCCHDRS =
+
+nobase_pkginclude_HEADERS = $(INSTHDRS) $(TCCHDRS)
+
+noinst_HEADERS = $(NOINSTHDRS)
+
+DOCHDRS = $(INSTHDRS) $(NOINSTHDRS)
+
 include $(top_srcdir)/Makefile.common
diff --git a/Appl/CEP/CS1/CS1_Storage/include/CS1_Storage/WH_SubbandWriter.h b/Appl/CEP/CS1/CS1_Storage/include/CS1_Storage/WH_SubbandWriter.h
index 8b1613925454a0c079d76bb008902e3b5c465f19..d7b746d75e01bc87c0554bdf38216fb1c5d90b26 100644
--- a/Appl/CEP/CS1/CS1_Storage/include/CS1_Storage/WH_SubbandWriter.h
+++ b/Appl/CEP/CS1/CS1_Storage/include/CS1_Storage/WH_SubbandWriter.h
@@ -85,9 +85,9 @@ namespace LOFAR
 
       vector <MSWriter *> itsWriters;
 
-      uint itsNrSubbandsPerCell; ///< Number of subbands per BG/L cell
+      uint itsNrSubbandsPerPset;
       uint itsNrSubbandsPerStorage;
-      uint itsNrInputChannels;
+      uint itsNrInputChannelsPerPset;
       uint itsNrSubbandsPerMS;
 
       vector<uint> itsCurrentInputs;
diff --git a/Appl/CEP/CS1/CS1_Storage/src/AH_Storage.cc b/Appl/CEP/CS1/CS1_Storage/src/AH_Storage.cc
index a97633dd29f2ea764db027a9520166aadc2600cd..91380303091726e6174eb0f09311a2ba29e16ad8 100644
--- a/Appl/CEP/CS1/CS1_Storage/src/AH_Storage.cc
+++ b/Appl/CEP/CS1/CS1_Storage/src/AH_Storage.cc
@@ -53,30 +53,30 @@ namespace LOFAR
 
       uint nrSubbands = itsCS1PS->nrSubbands();
       ASSERT(nrSubbands > 0);
-      uint nrSubbandsPerCell = itsCS1PS->nrSubbandsPerCell();
-      ASSERT(nrSubbandsPerCell > 0);
-      uint nrInputChannels = (itsCS1PS->useGather() ? 1 : itsCS1PS->getUint32("OLAP.BGLProc.nodesPerPset")) * itsCS1PS->getUint32("OLAP.BGLProc.psetsPerCell");
+      uint nrSubbandsPerPset = itsCS1PS->nrSubbandsPerPset();
+      ASSERT(nrSubbandsPerPset > 0);
+      uint nrInputChannels = itsCS1PS->useGather() ? 1 : itsCS1PS->nrCoresPerPset();
       ASSERT(nrInputChannels > 0);
       uint nrPsetsPerStorage = itsParamSet.getUint32("OLAP.psetsPerStorage");
-      ASSERT(nrSubbands % nrSubbandsPerCell == 0);
-      ASSERT(nrSubbands / nrSubbandsPerCell % nrPsetsPerStorage == 0);
+      ASSERT(nrSubbands % nrSubbandsPerPset == 0);
+      ASSERT(nrSubbands / nrSubbandsPerPset % nrPsetsPerStorage == 0);
 
       // We must derive how many WH_SubbandWriter objects we have to
-      // create. Each WH_SubbandWriter will write up to \a nrSubbandsPerCell
+      // create. Each WH_SubbandWriter will write up to \a nrSubbandsPerPset
       // to an AIPS++ Measurement Set.
-      uint nrWriters = nrSubbands / nrSubbandsPerCell / nrPsetsPerStorage;
+      uint nrWriters = nrSubbands / nrSubbandsPerPset / nrPsetsPerStorage;
       uint maxConcurrent = itsCS1PS->getInt32("OLAP.BGLProc.maxConcurrentComm");
       LOG_TRACE_VAR_STR("Creating " << nrWriters << " subband writers ...");
 
-      for (uint nw = 0; nw < nrWriters; ++nw)
+      for (unsigned nw = 0; nw < nrWriters; ++nw)
       {
         // For now, we'll assume that the subbands can be sorted and grouped
         // by ID. Hence, the first WH_SubbandWriter will write the first \a
-        // nrSubbandsPerCell subbands, the second will write the second \a
-        // nrSubbandsPerCell, etc.
-        vector<uint> sbIDs(nrSubbandsPerCell * nrPsetsPerStorage);
-        for (uint i = 0; i < nrSubbandsPerCell * nrPsetsPerStorage; ++i) {
-          sbIDs[i] = nrSubbandsPerCell * nrPsetsPerStorage * nw + i;         
+        // nrSubbandsPerPset subbands, the second will write the second \a
+        // nrSubbandsPerPset, etc.
+        vector<uint> sbIDs(nrSubbandsPerPset * nrPsetsPerStorage);
+        for (uint i = 0; i < nrSubbandsPerPset * nrPsetsPerStorage; ++i) {
+          sbIDs[i] = nrSubbandsPerPset * nrPsetsPerStorage * nw + i;         
 	  LOG_TRACE_LOOP_STR("Writer " << nw << ": sbIDs[" << i << "] = " 
                              << sbIDs[i]);
         }
@@ -108,7 +108,7 @@ namespace LOFAR
 	}
       }
 #ifdef HAVE_MPI
-      ASSERTSTR (TH_MPI::getNumberOfNodes() ==  nrWriters,
+      ASSERTSTR((unsigned) TH_MPI::getNumberOfNodes() == nrWriters,
                  TH_MPI::getNumberOfNodes() << " == " << nrWriters );
 #endif
     }
diff --git a/Appl/CEP/CS1/CS1_Storage/src/WH_SubbandWriter.cc b/Appl/CEP/CS1/CS1_Storage/src/WH_SubbandWriter.cc
index fb46e13458127ee2ecdfcd7df5ae3e602074a684..d042585e76315f67b35e02102732164e5945d555 100644
--- a/Appl/CEP/CS1/CS1_Storage/src/WH_SubbandWriter.cc
+++ b/Appl/CEP/CS1/CS1_Storage/src/WH_SubbandWriter.cc
@@ -32,6 +32,7 @@
 // Application specific includes
 #include <CS1_Storage/WH_SubbandWriter.h>
 #include <CS1_Interface/DH_Visibilities.h>
+#include <CS1_Interface/BGL_Mapping.h>
 #include <CS1_Storage/MSWriter.h>
 #include <tinyCEP/Sel_RoundRobin.h>
 #include <Transport/TH_MPI.h>
@@ -143,15 +144,15 @@ namespace LOFAR
       vector<double> antPos = itsCS1PS->positions();
       ASSERTSTR(antPos.size() == 3 * itsNStations,
                 antPos.size() << " == " << 3 * itsNStations);
-      itsNrSubbandsPerCell    = itsCS1PS->nrSubbandsPerCell();
-      itsNrSubbandsPerStorage = itsNrSubbandsPerCell * itsCS1PS->getUint32("OLAP.psetsPerStorage");
-      itsNrInputChannels      = itsCS1PS->useGather() ? itsCS1PS->getUint32("OLAP.BGLProc.psetsPerCell") : itsCS1PS->nrBGLNodesPerCell();
-      itsCurrentInputs.resize(itsNrSubbandsPerStorage / itsNrSubbandsPerCell, 0);
+      itsNrSubbandsPerPset	= itsCS1PS->nrSubbandsPerPset();
+      itsNrSubbandsPerStorage	= itsNrSubbandsPerPset * itsCS1PS->nrPsetsPerStorage();
+      itsNrInputChannelsPerPset = itsCS1PS->useGather() ? 1 : itsCS1PS->nrCoresPerPset();
+      itsCurrentInputs.resize(itsNrSubbandsPerStorage / itsNrSubbandsPerPset, 0);
       LOG_TRACE_VAR_STR("SubbandsPerStorage = " << itsNrSubbandsPerStorage);
       vector<string> storageStationNames = itsCS1PS->getStringVector("OLAP.storageStationNames");
 
       itsNrSubbandsPerMS = itsCS1PS->getUint32("OLAP.StorageProc.subbandsPerMS");
-      ASSERT(itsCS1PS->getUint32("OLAP.subbandsPerPset") * itsCS1PS->getUint32("OLAP.psetsPerStorage") % itsNrSubbandsPerMS == 0);
+      ASSERT(itsCS1PS->nrSubbandsPerPset() * itsCS1PS->nrPsetsPerStorage() % itsNrSubbandsPerMS == 0);
       unsigned mssesPerStorage = itsCS1PS->getUint32("OLAP.subbandsPerPset") * itsCS1PS->getUint32("OLAP.psetsPerStorage") / itsNrSubbandsPerMS;
       itsWriters.resize(mssesPerStorage);
       itsFieldIDs.resize(mssesPerStorage);
@@ -237,11 +238,16 @@ namespace LOFAR
       }
 #endif
 
-      // Write the visibilities for all subbands per cell.
+      // Write the visibilities for all subbands per pset.
       for (uint sb = 0; sb < itsNrSubbandsPerStorage; ++ sb) {
         // find out from which input channel we should read
-	unsigned cell	      = sb / itsNrSubbandsPerCell;
-	unsigned inputChannel = itsCurrentInputs[cell] + cell * itsNrInputChannels;
+	unsigned pset = sb / itsNrSubbandsPerPset;
+	unsigned core = itsCurrentInputs[pset];
+
+	if (!itsCS1PS->useGather())
+	  core = BGL_Mapping::mapCoreOnPset(core, pset);
+
+	unsigned inputChannel = core + pset * itsNrInputChannelsPerPset;
 
 	DH_Visibilities			    *inputDH	= static_cast<DH_Visibilities *>(getDataManager().getInHolder(inputChannel));
         DH_Visibilities::NrValidSamplesType *valSamples = &inputDH->getNrValidSamples(0, 0);
@@ -301,8 +307,8 @@ namespace LOFAR
 	getDataManager().readyWithInHolder(inputChannel);
 
 	// select next channel
-	if (++ itsCurrentInputs[cell] == itsNrInputChannels)
-	  itsCurrentInputs[cell] = 0;
+	if (++ itsCurrentInputs[pset] == itsNrInputChannelsPerPset)
+	  itsCurrentInputs[pset] = 0;
       }
 
       // Update the time counter.
diff --git a/LCS/AMC/AMCImpl/src/WGS84toITRF.cc b/LCS/AMC/AMCImpl/src/WGS84toITRF.cc
index 49f92e2ef4f36d7448a29cc2dd18b498734366cf..d76a98fe41284fde87bfb35736e12d4507d2d3d9 100644
--- a/LCS/AMC/AMCImpl/src/WGS84toITRF.cc
+++ b/LCS/AMC/AMCImpl/src/WGS84toITRF.cc
@@ -2,6 +2,7 @@
 #include <measures/Measures/MPosition.h>
 #include <measures/Measures/MeasConvert.h>
 #include <measures/Measures/MeasFrame.h>
+#include <measures/Measures/MCPosition.h>
 #include <casa/Exceptions/Error.h>
 #include <Common/lofar_iostream.h>
 #include <Common/lofar_iomanip.h>
diff --git a/autoconf_share/variants.bglfen b/autoconf_share/variants.bglfen
index 1d1ad4c30c3ce44f98d73fc4ad867fc7b72f7219..08aa0750ca2ea2b0c2a9eeb2b336d4d8262c80a8 100644
--- a/autoconf_share/variants.bglfen
+++ b/autoconf_share/variants.bglfen
@@ -2,7 +2,7 @@ xlc.compiler.conf: CC=blrts_xlc CXX=blrts_xlC
 blrts.compiler.conf: CC=blrts_xlc CXX=blrts_xlC
 gnubgl.compiler.conf: CC=/bgl/BlueLight/ppcfloor/blrts-gnu/bin/powerpc-bgl-blrts-gnu-gcc CXX=/bgl/BlueLight/ppcfloor/blrts-gnu/bin/powerpc-bgl-blrts-gnu-g++
 
-bgl.variant.conf:         $(lofar_root) $(debugopt) $(nothreads) $(noshmem) $(bgl_cpp) $(bgl_ldd) --without-log4cplus --with-bglmpich $(bgl_searchpath)
+bgl.variant.conf:         $(lofar_root) $(debugopt) $(nothreads) $(noshmem) $(bgl_cpp) $(bgl_ldd) --without-tinycep --without-log4cplus --with-bglmpich $(bgl_searchpath)
 fpic.variant.conf:	  $(debugopt) $(threads) --without-log4cplus --with-optimize='-O2 -g -fPIC' $(ion_searchpath)
 
 lofar_root.var:
diff --git a/autoconf_share/variants.liifen b/autoconf_share/variants.liifen
index 6c4ca60169c8645b295c7ac676e775b46bf9695d..2b81cb2ea3050eba185da1fa75b78d732cf3d416 100644
--- a/autoconf_share/variants.liifen
+++ b/autoconf_share/variants.liifen
@@ -1,8 +1,11 @@
 gnu.compiler.conf: CXX="$CCACHE /usr/bin/g++"
 gnu64.compiler.conf: CXX="$CCACHE /usr/bin/g++"
+#gnu64.compiler.aipspp.var: --with-aipspp=/app/aips++/Unstable/linux_gnu
+gnu64.compiler.aipspp.var: --with-casacore=/app/aips++/casacore_64 --with-wcs=/app/wcs_64
+gnu.compiler.aipspp.var: --with-casacore=/app/aips++/casacore_64 --with-wcs=/app/wcs_64
 
 mpich.var:                --with-mpich=/usr/local/ofed/mpi/gcc/mvapich-0.9.7-mlx2.2.0
-openmpi.var:              --with-mpich=/usr/local/ofed/mpi/gcc/openmpi-1.1b1-1
+openmpi.var:              --with-mpich=/usr/local/ofed/mpi/gcc/openmpi-1.1.1-1
 
 openmpi.variant.conf:	  $(standard) $(openmpi)
 openmpi-opt.variant.conf: $(standard) $(openmpi) $(optimize)