From 9d4ee0eae4969b8172c1538aba6238b9b5953b5c Mon Sep 17 00:00:00 2001
From: John Romein <romein@astron.nl>
Date: Tue, 19 Aug 2008 10:15:58 +0000
Subject: [PATCH] bug 225: Use assembly code to copy data from RSP packet into
 BeamletBuffer. nrTimesPerPacket must be 16.

---
 Appl/CEP/CS1/CS1_IONProc/configure.in         |   1 +
 Appl/CEP/CS1/CS1_IONProc/src/BeamletBuffer.cc | 114 +++++++++++-------
 Appl/CEP/CS1/CS1_IONProc/src/BeamletBuffer.h  |  26 ++--
 Appl/CEP/CS1/CS1_IONProc/src/InputSection.cc  |   2 +-
 Appl/CEP/CS1/CS1_IONProc/src/InputThread.cc   |   2 +-
 Appl/CEP/CS1/CS1_IONProc/src/InputThreadAsm.S |  86 +++++++++++++
 Appl/CEP/CS1/CS1_IONProc/src/InputThreadAsm.h |  33 +++++
 Appl/CEP/CS1/CS1_IONProc/src/Makefile.am      |   2 +
 autoconf_share/variants.bgfen                 |   7 +-
 9 files changed, 216 insertions(+), 57 deletions(-)
 create mode 100644 Appl/CEP/CS1/CS1_IONProc/src/InputThreadAsm.S
 create mode 100644 Appl/CEP/CS1/CS1_IONProc/src/InputThreadAsm.h

diff --git a/Appl/CEP/CS1/CS1_IONProc/configure.in b/Appl/CEP/CS1/CS1_IONProc/configure.in
index 2e45508b9da..d4984a297ba 100644
--- a/Appl/CEP/CS1/CS1_IONProc/configure.in
+++ b/Appl/CEP/CS1/CS1_IONProc/configure.in
@@ -16,6 +16,7 @@ AC_PROG_YACC
 AC_PROG_CC
 AC_PROG_CXX
 AM_PROG_LEX
+AM_PROG_AS
 AC_PROG_INSTALL
 AC_PROG_LN_S
 AC_DISABLE_STATIC
diff --git a/Appl/CEP/CS1/CS1_IONProc/src/BeamletBuffer.cc b/Appl/CEP/CS1/CS1_IONProc/src/BeamletBuffer.cc
index ab4e8ffc370..a31d53ab434 100644
--- a/Appl/CEP/CS1/CS1_IONProc/src/BeamletBuffer.cc
+++ b/Appl/CEP/CS1/CS1_IONProc/src/BeamletBuffer.cc
@@ -23,32 +23,46 @@
 //# Always #include <lofar_config.h> first!
 #include <lofar_config.h>
 
+#include <CS1_Interface/Align.h>
 #include <BeamletBuffer.h>
 #include <ION_Allocator.h>
+#include <InputThreadAsm.h>
+
+#include <boost/lexical_cast.hpp>
+#include <stdexcept>
 
 
 namespace LOFAR {
 namespace CS1 {
 
-BeamletBuffer::BeamletBuffer(unsigned bufferSize, unsigned nrSubbands, unsigned history, bool isSynchronous, unsigned maxNetworkDelay)
+// The buffer size is a multiple of the input packet size.  By setting
+// itsOffset to a proper value, we can assure that input packets never
+// wrap around the circular buffer
+
+BeamletBuffer::BeamletBuffer(unsigned bufferSize, unsigned nrTimesPerPacket, unsigned nrSubbands, unsigned nrBeams, unsigned history, bool isSynchronous, unsigned maxNetworkDelay)
 :
   itsNSubbands(nrSubbands),
-  itsSize(bufferSize),
+  itsSize(align(bufferSize, nrTimesPerPacket)),
   itsHistorySize(history),
-  itsSBBuffers(reinterpret_cast<SampleType *>(ION_Allocator().allocate(nrSubbands * bufferSize * NR_POLARIZATIONS * sizeof(SampleType), 32)), boost::extents[nrSubbands][bufferSize][NR_POLARIZATIONS]),
+  itsSBBuffers(reinterpret_cast<SampleType *>(ION_Allocator().allocate(nrSubbands * itsSize * NR_POLARIZATIONS * sizeof(SampleType), 32)), boost::extents[nrSubbands][itsSize][NR_POLARIZATIONS]),
+  itsOffset(0),
+  itsStride(reinterpret_cast<char *>(itsSBBuffers[1].origin()) - reinterpret_cast<char *>(itsSBBuffers[0].origin())),
   itsReadTimer("buffer read", true),
   itsWriteTimer("buffer write", true)
 {
+  if (nrTimesPerPacket != this->nrTimesPerPacket)
+    throw std::runtime_error(std::string("OLAP.nrTimesInFrame should be ") + boost::lexical_cast<std::string>(nrTimesPerPacket));
+
   pthread_mutex_init(&itsValidDataMutex, 0);
 
   if (isSynchronous)
-    itsSynchronizedReaderWriter = new SynchronizedReaderAndWriter(bufferSize);
+    itsSynchronizedReaderWriter = new SynchronizedReaderAndWriter(itsSize);
   else
     itsSynchronizedReaderWriter = new TimeSynchronizedReader(maxNetworkDelay);  
 
-  itsEnd.reserve(MAX_BEAMLETS);
-  itsStartI.reserve(MAX_BEAMLETS);
-  itsEndI.reserve(MAX_BEAMLETS);
+  itsEnd.resize(nrBeams);
+  itsStartI.resize(nrBeams);
+  itsEndI.resize(nrBeams);
 }
 
 
@@ -60,14 +74,33 @@ BeamletBuffer::~BeamletBuffer()
 }
 
 
-void BeamletBuffer::writeElements(Beamlet *data, const TimeStamp &begin, unsigned nrElements)
+void BeamletBuffer::writePacketData(Beamlet *data, const TimeStamp &begin)
 {
-  TimeStamp end = begin + nrElements;
+  TimeStamp end = begin + nrTimesPerPacket;
   itsWriteTimer.start();
 
   // cache previous index, to avoid expensive mapTime2Index()
-  unsigned startI = (begin == itsPreviousTimeStamp) ? itsPreviousI : mapTime2Index(begin);
-  unsigned endI   = startI + nrElements;
+  unsigned startI;
+
+  if (begin == itsPreviousTimeStamp) {
+    startI = itsPreviousI;
+  } else {
+    startI = mapTime2Index(begin);
+
+    if (!aligned(startI, nrTimesPerPacket)) {
+      // RSP board reset?  Recompute itsOffset and clear the entire buffer.
+      itsOffset = - (startI % nrTimesPerPacket);
+      startI    = mapTime2Index(begin);
+
+      pthread_mutex_lock(&itsValidDataMutex);
+      itsValidData.reset();
+      pthread_mutex_unlock(&itsValidDataMutex);
+    }
+
+    //std::clog << "timestamp = " << (uint64_t) begin << ", itsOffset = " << itsOffset << std::endl;
+  }
+
+  unsigned endI = startI + nrTimesPerPacket;
 
   if (endI >= itsSize)
     endI -= itsSize;
@@ -80,30 +113,29 @@ void BeamletBuffer::writeElements(Beamlet *data, const TimeStamp &begin, unsigne
   // do not write in circular buffer section that is being read
   itsLockedRanges.lock(startI, endI, itsSize);
 
-  if (endI < startI) {
-    // the data wraps around the allocated memory, so do it in two parts
-    
-    unsigned chunk1 = itsSize - startI;
-    for (unsigned sb = 0; sb < itsNSubbands; sb ++) {
-      memcpy(itsSBBuffers[sb][startI].origin(), &data[0]     , sizeof(SampleType[chunk1][NR_POLARIZATIONS]));
-      memcpy(itsSBBuffers[sb][0].origin()     , &data[chunk1], sizeof(SampleType[endI][NR_POLARIZATIONS]));
-      data += nrElements;		
-    }
-  } else {
-    for (unsigned sb = 0; sb < itsNSubbands; sb ++) {
-      if (sizeof(SampleType[NR_POLARIZATIONS]) == sizeof(double)) {
-	double *dst = reinterpret_cast<double *>(itsSBBuffers[sb][startI].origin());
-	const double *src = reinterpret_cast<const double *>(data);
-
-	for (unsigned time = 0; time < nrElements; time ++)
-	  dst[time] = src[time];
-      } else {
-	memcpy(itsSBBuffers[sb][startI].origin(), data, sizeof(SampleType[endI - startI][NR_POLARIZATIONS]));
-      }
-
-      data += nrElements;		
-    }
+#if defined HAVE_BGP
+  void *dst = itsSBBuffers[0][startI].origin();
+  
+#if NR_BITS_PER_SAMPLE == 16
+  _copy_pkt_to_bbuffer_128_bytes(dst, itsStride, data, itsNSubbands);
+#elif NR_BITS_PER_SAMPLE == 8
+  _copy_pkt_to_bbuffer_64_bytes(dst, itsStride, data, itsNSubbands);
+#elif NR_BITS_PER_SAMPLE == 4
+  _copy_pkt_to_bbuffer_32_bytes(dst, itsStride, data, itsNSubbands);
+#else
+#error Not implemented
+#endif
+#else
+  Beamlet *dst = reinterpret_cast<Beamlet *>(itsSBBuffers[0][startI].origin());
+  size_t stride = reinterpret_cast<Beamlet *>(itsSBBuffers[1][startI].origin()) - dst;
+  
+  for (unsigned sb = 0; sb < itsNSubbands; sb ++) {
+    for (unsigned time = 0; time < nrTimesPerPacket; time ++)
+      dst[time] = *data ++;
+
+    dst += stride;
   }
+#endif
 
   // forget old ValidData
   pthread_mutex_lock(&itsValidDataMutex);
@@ -131,9 +163,9 @@ void BeamletBuffer::startReadTransaction(const std::vector<TimeStamp> &begin, un
   itsBegin = begin;
 
   for (unsigned beam = 0; beam < begin.size(); beam++) {
-    itsEnd.push_back(begin[beam] + nrElements);
-    itsStartI.push_back(mapTime2Index(begin[beam]));
-    itsEndI.push_back(mapTime2Index(itsEnd[beam]));
+    itsEnd[beam]    = begin[beam] + nrElements;
+    itsStartI[beam] = mapTime2Index(begin[beam]);
+    itsEndI[beam]   = mapTime2Index(itsEnd[beam]);
   }
  
   TimeStamp minBegin = *std::min_element(itsBegin.begin(),  itsBegin.end());
@@ -153,8 +185,8 @@ void BeamletBuffer::sendSubband(Stream *str, unsigned subband, unsigned beam) co
 {
   // Align to 32 bytes and make multiple of 32 bytes by prepending/appending
   // extra data.  Always send 32 bytes extra, even if data was already aligned.
-  unsigned startI = itsStartI[beam] & ~(32 / sizeof(Beamlet) - 1); // round down
-  unsigned endI   = (itsEndI[beam] + 32 / sizeof(Beamlet)) & ~(32 / sizeof(Beamlet) - 1); // round up, possibly adding 32 bytes
+  unsigned startI = align(itsStartI[beam] - itsAlignment + 1, itsAlignment); // round down
+  unsigned endI   = align(itsEndI[beam] + 1, itsAlignment); // round up, possibly adding 32 bytes
   
   if (endI < startI) {
     // the data wraps around the allocated memory, so copy in two parts
@@ -204,10 +236,6 @@ void BeamletBuffer::stopReadTransaction()
   // subtract 16 extra; due to alignment restrictions and the changing delays,
   // it is hard to predict where the next read will begin.
   
-  itsStartI.clear();
-  itsEndI.clear();
-  itsEnd.clear();
-
   itsReadTimer.stop();
 }
 
diff --git a/Appl/CEP/CS1/CS1_IONProc/src/BeamletBuffer.h b/Appl/CEP/CS1/CS1_IONProc/src/BeamletBuffer.h
index 5ab3bca81b1..97d93063743 100644
--- a/Appl/CEP/CS1/CS1_IONProc/src/BeamletBuffer.h
+++ b/Appl/CEP/CS1/CS1_IONProc/src/BeamletBuffer.h
@@ -48,17 +48,24 @@ namespace CS1 {
 
 typedef INPUT_SAMPLE_TYPE SampleType;
 
-struct Beamlet {
-  SampleType Xpol, Ypol;
-};
+// define a "simple" type of which the size equals the size of two samples
+// (X and Y polarizations)
+
+#if NR_BITS_PER_SAMPLE == 16
+typedef double Beamlet;
+#elif NR_BITS_PER_SAMPLE == 8
+typedef int32_t Beamlet;
+#elif NR_BITS_PER_SAMPLE == 4
+typedef int16_t Beamlet;
+#endif
 
 class BeamletBuffer
 {
   public:
-	     BeamletBuffer(unsigned bufferSize, unsigned nrSubbands, unsigned history, bool isSynchronous, unsigned maxNetworkDelay);
+	     BeamletBuffer(unsigned bufferSize, unsigned nrTimesPerPacket, unsigned nrSubbands, unsigned nrBeams, unsigned history, bool isSynchronous, unsigned maxNetworkDelay);
 	     ~BeamletBuffer();
 
-    void     writeElements(Beamlet *data, const TimeStamp &begin, unsigned nrElements);
+    void     writePacketData(Beamlet *data, const TimeStamp &begin);
 
     void     startReadTransaction(const std::vector<TimeStamp> &begin, unsigned nrElements);
     void     sendSubband(Stream *, unsigned subband, unsigned currentBeam) const;
@@ -67,7 +74,7 @@ class BeamletBuffer
     SparseSet<unsigned> readFlags(unsigned beam);
     void     stopReadTransaction();
     
-    static const unsigned MAX_BEAMLETS    = 8;
+    const static unsigned nrTimesPerPacket = 16;
 
   private:
     unsigned mapTime2Index(TimeStamp time) const;
@@ -79,6 +86,8 @@ class BeamletBuffer
     ReaderAndWriterSynchronization	  *itsSynchronizedReaderWriter;
     LockedRanges			  itsLockedRanges;
     boost::multi_array_ref<SampleType, 3> itsSBBuffers;
+    int					  itsOffset;
+    const static unsigned		  itsAlignment = 32 / sizeof(Beamlet);
 
     // read internals
     std::vector<TimeStamp>		  itsBegin, itsEnd;
@@ -89,6 +98,7 @@ class BeamletBuffer
     // write internals
     TimeStamp				  itsPreviousTimeStamp;
     unsigned				  itsPreviousI;
+    size_t				  itsStride;
 
     NSTimer				  itsReadTimer, itsWriteTimer;
 };
@@ -96,13 +106,13 @@ class BeamletBuffer
 
 inline unsigned BeamletBuffer::alignmentShift(unsigned beam) const
 {
-  return itsStartI[beam] % (32 / sizeof(Beamlet));
+  return itsStartI[beam] % itsAlignment;
 }
 
 inline unsigned BeamletBuffer::mapTime2Index(TimeStamp time) const
 { 
   // TODO: this is very slow because of the %
-  return time % itsSize;
+  return time % itsSize + itsOffset;
 }
 
 } // namespace CS1
diff --git a/Appl/CEP/CS1/CS1_IONProc/src/InputSection.cc b/Appl/CEP/CS1/CS1_IONProc/src/InputSection.cc
index 82fd10011e1..da488ef687b 100644
--- a/Appl/CEP/CS1/CS1_IONProc/src/InputSection.cc
+++ b/Appl/CEP/CS1/CS1_IONProc/src/InputSection.cc
@@ -200,7 +200,7 @@ void InputSection::preprocess(const CS1_Parset *ps)
   itsBBuffers.resize(itsNrInputs);
 
   for (unsigned rsp = 0; rsp < itsNrInputs; rsp ++)
-    itsBBuffers[rsp] = new BeamletBuffer(ps->inputBufferSize(), ps->nrSubbandsPerFrame(), itsNHistorySamples, !itsIsRealTime, itsMaxNetworkDelay);
+    itsBBuffers[rsp] = new BeamletBuffer(ps->inputBufferSize(), ps->getUint32("OLAP.nrTimesInFrame"), ps->nrSubbandsPerFrame(), itsNrBeams, itsNHistorySamples, !itsIsRealTime, itsMaxNetworkDelay);
 
 #if defined DUMP_RAW_DATA
   vector<string> rawDataServers = ps->getStringVector("OLAP.OLAP_Conn.rawDataServers");
diff --git a/Appl/CEP/CS1/CS1_IONProc/src/InputThread.cc b/Appl/CEP/CS1/CS1_IONProc/src/InputThread.cc
index bf229fb2df3..85ee0bab8f9 100644
--- a/Appl/CEP/CS1/CS1_IONProc/src/InputThread.cc
+++ b/Appl/CEP/CS1/CS1_IONProc/src/InputThread.cc
@@ -203,7 +203,7 @@ void InputThread::mainLoop()
     }
 
     // expected packet received so write data into corresponding buffer
-    itsArgs.BBuffer->writeElements(reinterpret_cast<Beamlet *>(packet + 16), actualstamp, itsArgs.nrTimesPerPacket);
+    itsArgs.BBuffer->writePacketData(reinterpret_cast<Beamlet *>(packet + 16), actualstamp);
   }
 
   std::clog << "InputThread::mainLoop() exiting loop" << std::endl;
diff --git a/Appl/CEP/CS1/CS1_IONProc/src/InputThreadAsm.S b/Appl/CEP/CS1/CS1_IONProc/src/InputThreadAsm.S
new file mode 100644
index 00000000000..c5059a86b0c
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_IONProc/src/InputThreadAsm.S
@@ -0,0 +1,86 @@
+#  InputThreadAsm.S: fast packet->BBuffer copy routines
+#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
+#
+#  This program is free software; you can redistribute it and/or modify
+#  it under the terms of the GNU General Public License as published by
+#  the Free Software Foundation; either version 2 of the License, or
+#  (at your option) any later version.
+#
+#  This program is distributed in the hope that it will be useful,
+#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#  GNU General Public License for more details.
+#
+#  You should have received a copy of the GNU General Public License
+#  along with this program; if not, write to the Free Software
+#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+#
+#  $Id$
+
+#if defined HAVE_BGL || defined HAVE_BGP
+
+.global	_copy_pkt_to_bbuffer_32_bytes
+_copy_pkt_to_bbuffer_32_bytes:
+
+	addi	4,4,-1*16
+	mtctr	6
+	li	8,16
+	addi	5,5,-16
+	sub	3,3,4
+
+1:	lfpdux	0,5,8
+	lfpdux	1,5,8
+	stfpdux	0,3,4
+	stfpdux	1,3,8
+	bdnz	1b
+	blr
+
+.global	_copy_pkt_to_bbuffer_64_bytes
+_copy_pkt_to_bbuffer_64_bytes:
+
+	addi	4,4,-3*16
+	mtctr	6
+	li	8,16
+	addi	5,5,-16
+	sub	3,3,4
+
+1:	lfpdux	0,5,8
+	lfpdux	1,5,8
+	lfpdux	2,5,8
+	lfpdux	3,5,8
+	stfpdux	0,3,4
+	stfpdux	1,3,8
+	stfpdux	2,3,8
+	stfpdux	3,3,8
+	bdnz	1b
+	blr
+
+.global	_copy_pkt_to_bbuffer_128_bytes
+_copy_pkt_to_bbuffer_128_bytes:
+
+	addi	4,4,-7*16
+	mtctr	6
+	li	8,16
+	addi	5,5,-16
+	sub	3,3,4
+
+1:	lfpdux	0,5,8
+	lfpdux	1,5,8
+	lfpdux	2,5,8
+	stfpdux	0,3,4
+	stfpdux	1,3,8
+	stfpdux	2,3,8
+	lfpdux	3,5,8
+	lfpdux	4,5,8
+	lfpdux	5,5,8
+	stfpdux	3,3,8
+	stfpdux	4,3,8
+	stfpdux	5,3,8
+	lfpdux	6,5,8
+	lfpdux	7,5,8
+	stfpdux	6,3,8
+	stfpdux	7,3,8
+	bdnz	1b
+	blr
+
+#endif
diff --git a/Appl/CEP/CS1/CS1_IONProc/src/InputThreadAsm.h b/Appl/CEP/CS1/CS1_IONProc/src/InputThreadAsm.h
new file mode 100644
index 00000000000..47a4b3d74a0
--- /dev/null
+++ b/Appl/CEP/CS1/CS1_IONProc/src/InputThreadAsm.h
@@ -0,0 +1,33 @@
+//#  InputThreadAsm.h: fast packet->BBuffer copy routines
+//#
+//#  P.O.Box 2, 7990 AA Dwingeloo, The Netherlands, seg@astron.nl
+//#
+//#  This program is free software; you can redistribute it and/or modify
+//#  it under the terms of the GNU General Public License as published by
+//#  the Free Software Foundation; either version 2 of the License, or
+//#  (at your option) any later version.
+//#
+//#  This program is distributed in the hope that it will be useful,
+//#  but WITHOUT ANY WARRANTY; without even the implied warranty of
+//#  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+//#  GNU General Public License for more details.
+//#
+//#  You should have received a copy of the GNU General Public License
+//#  along with this program; if not, write to the Free Software
+//#  Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
+//#
+//#  $Id$
+
+#ifndef LOFAR_APPL_CEP_CS1_CS1_ION_PROC_INPUT_THREAD_ASM_H
+#define LOFAR_APPL_CEP_CS1_CS1_ION_PROC_INPUT_THREAD_ASM_H
+
+#if 1 /*defined HAVE_BGL || defined HAVE_BGP*/
+
+extern "C" {
+  void _copy_pkt_to_bbuffer_32_bytes(void *dst, size_t stride, const void *src, unsigned nrSubbands);
+  void _copy_pkt_to_bbuffer_64_bytes(void *dst, size_t stride, const void *src, unsigned nrSubbands);
+  void _copy_pkt_to_bbuffer_128_bytes(void *dst, size_t stride, const void *src, unsigned nrSubbands);
+};
+
+#endif
+#endif
diff --git a/Appl/CEP/CS1/CS1_IONProc/src/Makefile.am b/Appl/CEP/CS1/CS1_IONProc/src/Makefile.am
index 746e01c7bf7..9a1b4492111 100644
--- a/Appl/CEP/CS1/CS1_IONProc/src/Makefile.am
+++ b/Appl/CEP/CS1/CS1_IONProc/src/Makefile.am
@@ -3,6 +3,7 @@ BeamletBuffer.h			\
 BGL_Personality.h		\
 InputSection.h			\
 InputThread.h			\
+InputThreadAsm.h		\
 ION_Allocator.h			\
 LockedRanges.h			\
 LogThread.h			\
@@ -19,6 +20,7 @@ BGL_Personality.cc		\
 CS1_ION_main.cc		 	\
 InputSection.cc			\
 InputThread.cc			\
+InputThreadAsm.S		\
 ION_Allocator.cc		\
 LogThread.cc			\
 OutputSection.cc		\
diff --git a/autoconf_share/variants.bgfen b/autoconf_share/variants.bgfen
index 09079512d4a..b71532468b9 100644
--- a/autoconf_share/variants.bgfen
+++ b/autoconf_share/variants.bgfen
@@ -1,12 +1,11 @@
-#gnubgp.compiler.conf:  CC=/bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc-bgp-linux-gcc CXX=/bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc-bgp-linux-g++ CCAS=/bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc-bgp-linux-gcc
+gnu.compiler.conf:   CC=/usr/bin/gcc CXX=/usr/bin/g++ CCAS=/bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc-bgp-linux-gcc CCASFLAGS="-D HAVE_BGP" --with-cppflags="-D HAVE_BGP"
 gnubgp.compiler.conf:   CC=/usr/bin/gcc CXX=/usr/bin/g++ CCAS=/bgsys/drivers/ppcfloor/gnu-linux/bin/powerpc-bgp-linux-gcc
 
 gnu.compiler.aipspp.var: --with-casacore=/cephome/romein/packages/casacore-0.3.0/stage --without-wcs
 
-bgp.variant.conf:       $(lofar_root) $(debugopt) $(nothreads) $(noshmem) $(bgp_cpp) $(bgp_ldd) --without-tinycep --without-log4cplus --with-bglmpich='/bgsys/drivers/ppcfloor/comm' --with-fftw2='/cephome/romein/packages/fftw-2.1.5-single-precision' --with-cppflags='-DHAVE_BGP -I/bgsys/drivers/ppcfloor/comm/include -I/bgsys/drivers/ppcfloor/arch/include'
-fpic.variant.conf:      $(debugopt) $(threads) $(aipspp) --without-log4cplus --with-cppflags='-fPIC' $(ion_searchpath) --with-ldflags='-L/bgl/lofar-utils/mass/lib' --with-libs='-lmass'
+bgp.variant.conf:       $(lofar_root) $(debugopt) $(nothreads) $(noshmem) $(bgp_cpp) $(bgp_ldd) --without-log4cplus --with-bglmpich='/bgsys/drivers/ppcfloor/comm' --with-fftw2='/cephome/romein/packages/fftw-2.1.5-single-precision' --with-cppflags='-DHAVE_BGP -I/bgsys/drivers/ppcfloor/comm/include -I/bgsys/drivers/ppcfloor/arch/include'
+#fpic.variant.conf:      $(debugopt) $(threads) $(aipspp) --without-log4cplus --with-cppflags='-fPIC' $(ion_searchpath) --with-ldflags='-L/bgl/lofar-utils/mass/lib' --with-libs='-lmass'
 
 
 bgp_cpp.var:            --with-cppflags='-DHAVE_BGP'
 bgp_ldd.var:            --with-ldflags='-L/bgsys/drivers/ppcfloor/comm/lib -L/bgsys/drivers/ppcfloor/runtime/SPI' --with-libs='-lcxxmpich.cnk -lmpich.cnk -ldcmfcoll.cnk -ldcmf.cnk -lpthread -lrt -lSPI.cna -lm'
-#bgp_ldd.var:           --with-ldflags='-L/bgsys/drivers/ppcfloor/comm/lib -L/bgsys/drivers/ppcfloor/runtime/SPI -L/opt/ibmcmp/vac/bg/9.0/bglib' --with-libs=' -lcxxmpich.cnk -lmpich.cnk -ldcmfcoll.cnk -ldcmf.cnk -lpthread -lrt -lSPI.cna -lxl -lm'
-- 
GitLab