diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/CacheAlignedAllocator.h b/Appl/CEP/CS1/CS1_BGLProc/src/CacheAlignedAllocator.h deleted file mode 100644 index ebff5e89ef08fd7f51fa484d206e9ec6bcfeb620..0000000000000000000000000000000000000000 --- a/Appl/CEP/CS1/CS1_BGLProc/src/CacheAlignedAllocator.h +++ /dev/null @@ -1,41 +0,0 @@ -#ifndef LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_CACHE_ALIGNED_ALLOCATOR_H -#define LOFAR_APPL_CEP_CS1_CS1_BGL_PROC_CACHE_ALIGNED_ALLOCATOR_H - -#include <malloc.h> -#include <memory> - - -#if defined HAVE_BGL || defined HAVE_BGP -#define CACHE_LINE_SIZE 32 -#define CACHE_ALIGNED __attribute__ ((aligned(CACHE_LINE_SIZE))) -#else -#define CACHE_LINE_SIZE 16 -#define CACHE_ALIGNED -#endif - - -namespace LOFAR { -namespace CS1 { - -template <typename T> class CacheAlignedAllocator : public std::allocator<T> -{ - public: - typedef typename std::allocator<T>::size_type size_type; - typedef typename std::allocator<T>::pointer pointer; - typedef typename std::allocator<T>::const_pointer const_pointer; - - pointer allocate(size_type size, const_pointer /*hint*/ = 0) - { - return static_cast<pointer>(memalign(CACHE_LINE_SIZE, size * sizeof(T))); - } - - void deallocate(pointer ptr, size_type /*size*/) - { - free(ptr); - } -}; - -} // namespace CS1 -} // namespace LOFAR - -#endif diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/FCNP_ClientStream.cc b/Appl/CEP/CS1/CS1_BGLProc/src/FCNP_ClientStream.cc index 75a4a952e07b950390dff83182413331950e4676..6297de0761ba93c19959c66a36d1294d001da180 100644 --- a/Appl/CEP/CS1/CS1_BGLProc/src/FCNP_ClientStream.cc +++ b/Appl/CEP/CS1/CS1_BGLProc/src/FCNP_ClientStream.cc @@ -26,11 +26,13 @@ #if defined HAVE_FCNP && defined HAVE_BGP #include <Common/Timer.h> +#include <CS1_Interface/AlignedStdAllocator.h> #include <FCNP_ClientStream.h> #include <fcnp_cn.h> -//#include <algorithm> +#include <cstring> +#include <vector> namespace LOFAR { @@ -48,10 +50,10 @@ void FCNP_ClientStream::read(void *ptr, size_t size) if (reinterpret_cast<size_t>(ptr) % 16 != 0 || size % 16 != 0) { size_t alignedSize = (size + 15) & ~ (size_t) 15; - char tmp[alignedSize] __attribute__ ((aligned(16))); + std::vector<char, AlignedStdAllocator<char, 16> > alignedBuffer(alignedSize); - FCNP_CN::IONtoCN_ZeroCopy(tmp, alignedSize); - memcpy(ptr, tmp, size); + FCNP_CN::IONtoCN_ZeroCopy(&alignedBuffer[0], alignedSize); + memcpy(ptr, &alignedBuffer[0], size); } else { FCNP_CN::IONtoCN_ZeroCopy(ptr, size); } @@ -64,10 +66,10 @@ void FCNP_ClientStream::write(const void *ptr, size_t size) if (reinterpret_cast<size_t>(ptr) % 16 != 0 || size % 16 != 0) { size_t alignedSize = (size + 15) & ~ (size_t) 15; - char tmp[alignedSize] __attribute__ ((aligned(16))); + std::vector<char, AlignedStdAllocator<char, 16> > alignedBuffer(alignedSize); - memcpy(tmp, ptr, size); - FCNP_CN::CNtoION_ZeroCopy(tmp, alignedSize); + memcpy(&alignedBuffer[0], ptr, size); + FCNP_CN::CNtoION_ZeroCopy(&alignedBuffer[0], alignedSize); } else { FCNP_CN::CNtoION_ZeroCopy(ptr, size); } diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/FIR.cc b/Appl/CEP/CS1/CS1_BGLProc/src/FIR.cc index e84f1d4515f94a784f88c4010e7b48f836a852d8..ec265d09a161b8ee5b14c81df508333863b6d5cf 100644 --- a/Appl/CEP/CS1/CS1_BGLProc/src/FIR.cc +++ b/Appl/CEP/CS1/CS1_BGLProc/src/FIR.cc @@ -3,7 +3,6 @@ //# Includes #include <FIR.h> -#include <CacheAlignedAllocator.h> namespace LOFAR { namespace CS1 { @@ -15,7 +14,7 @@ namespace CS1 { // This is efficiently achieved by negating the FIR filter constants of all // uneven FIR filters. -const float FIR::weights[NR_SUBBAND_CHANNELS][NR_TAPS] CACHE_ALIGNED = { +const float FIR::weights[NR_SUBBAND_CHANNELS][NR_TAPS] __attribute__ ((aligned(32))) = { #if NR_SUBBAND_CHANNELS == 256 && NR_TAPS == 16 { 0.011659500, -0.011535200, 0.005131880, 0.001219900, -0.006891530, 0.011598600, -0.015420900, 1.000000000, @@ -1047,7 +1046,7 @@ const float FIR::weights[NR_SUBBAND_CHANNELS][NR_TAPS] CACHE_ALIGNED = { }; -const float FIR::bandPassCorrectionFactors[NR_SUBBAND_CHANNELS] CACHE_ALIGNED = { +const float FIR::bandPassCorrectionFactors[NR_SUBBAND_CHANNELS] __attribute__ ((aligned(32))) = { #if NR_SUBBAND_CHANNELS == 256 && NR_TAPS == 16 #if 1 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, diff --git a/Appl/CEP/CS1/CS1_BGLProc/src/PPF.h b/Appl/CEP/CS1/CS1_BGLProc/src/PPF.h index ade78c9c437643f28a8d1a8c9f1ed067598a1dcd..fd72c65d5345169b96c0c35c7942793aa47829fe 100644 --- a/Appl/CEP/CS1/CS1_BGLProc/src/PPF.h +++ b/Appl/CEP/CS1/CS1_BGLProc/src/PPF.h @@ -9,7 +9,7 @@ #include <FIR.h> #include <TransposedData.h> #include <FilteredData.h> -#include <CacheAlignedAllocator.h> +#include <CS1_Interface/AlignedStdAllocator.h> #include <boost/multi_array.hpp> @@ -55,9 +55,9 @@ class PPF boost::multi_array<FIR, 3> itsFIRs; //[itsNrStations][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS] boost::multi_array<fcomplex, 3> itsFFTinData; //[NR_TAPS - 1 + itsNrSamplesPerIntegration][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS] #else - boost::multi_array<fcomplex, 2, CacheAlignedAllocator<fcomplex> > itsTmp; //[4][itsNrSamplesPerIntegration] - boost::multi_array<fcomplex, 3, CacheAlignedAllocator<fcomplex> > itsFFTinData; //[itsNrSamplesPerIntegration][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS + 4] - boost::multi_array<fcomplex, 3, CacheAlignedAllocator<fcomplex> > itsFFToutData; //[2][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS] + boost::multi_array<fcomplex, 2, AlignedStdAllocator<fcomplex, 32> > itsTmp; //[4][itsNrSamplesPerIntegration] + boost::multi_array<fcomplex, 3, AlignedStdAllocator<fcomplex, 32> > itsFFTinData; //[itsNrSamplesPerIntegration][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS + 4] + boost::multi_array<fcomplex, 3, AlignedStdAllocator<fcomplex, 32> > itsFFToutData; //[2][NR_POLARIZATIONS][NR_SUBBAND_CHANNELS] #endif #if defined HAVE_FFTW3 diff --git a/Appl/CEP/CS1/CS1_IONProc/src/FCNP_ServerStream.cc b/Appl/CEP/CS1/CS1_IONProc/src/FCNP_ServerStream.cc index 236431e226dcaac3de78edaf77687f31a919f2f4..03334ec7891007cf6b7abbfbd31560b5153ac720 100644 --- a/Appl/CEP/CS1/CS1_IONProc/src/FCNP_ServerStream.cc +++ b/Appl/CEP/CS1/CS1_IONProc/src/FCNP_ServerStream.cc @@ -26,11 +26,14 @@ #if defined HAVE_FCNP && defined __PPC__ #include <Common/Timer.h> +#include <CS1_Interface/AlignedStdAllocator.h> #include <FCNP_ServerStream.h> #include <fcnp_ion.h> #include <algorithm> +#include <cstring> +#include <vector> namespace LOFAR { @@ -83,10 +86,10 @@ void FCNP_ServerStream::write(const void *buf, size_t size) if (reinterpret_cast<size_t>(buf) % 16 != 0 || size % 16 != 0) { size_t alignedSize = (size + 15) & ~ (size_t) 15; - char tmp[alignedSize] __attribute__ ((aligned(16))); + std::vector<char, AlignedStdAllocator<char, 16> > alignedBuffer(alignedSize); - memcpy(tmp, buf, size); - FCNP_ION::IONtoCN_ZeroCopy(itsCore, tmp, alignedSize); + memcpy(&alignedBuffer[0], buf, size); + FCNP_ION::IONtoCN_ZeroCopy(itsCore, &alignedBuffer[0], alignedSize); } else { FCNP_ION::IONtoCN_ZeroCopy(itsCore, const_cast<const void *>(buf), size); } @@ -99,10 +102,10 @@ void FCNP_ServerStream::read(void *buf, size_t size) if (reinterpret_cast<size_t>(buf) % 16 != 0 || size % 16 != 0) { size_t alignedSize = (size + 15) & ~ (size_t) 15; - char tmp[alignedSize] __attribute__ ((aligned(16))); + std::vector<char, AlignedStdAllocator<char, 16> > alignedBuffer(alignedSize); - FCNP_ION::CNtoION_ZeroCopy(itsCore, tmp, alignedSize); - memcpy(buf, tmp, size); + FCNP_ION::CNtoION_ZeroCopy(itsCore, &alignedBuffer[0], alignedSize); + memcpy(buf, &alignedBuffer[0], size); } else { FCNP_ION::CNtoION_ZeroCopy(itsCore, buf, size); } diff --git a/Appl/CEP/CS1/CS1_IONProc/src/InputSection.cc b/Appl/CEP/CS1/CS1_IONProc/src/InputSection.cc index 48e26068187988aef15d2f270eda4139f7ef1da5..b5379634e64cb4f16309625d39bc0dae4bda7b72 100644 --- a/Appl/CEP/CS1/CS1_IONProc/src/InputSection.cc +++ b/Appl/CEP/CS1/CS1_IONProc/src/InputSection.cc @@ -33,6 +33,7 @@ #include <InputThread.h> #include <ION_Allocator.h> //#include <TH_ZoidServer.h> +#include <CS1_Interface/AlignedStdAllocator.h> #include <CS1_Interface/BGL_Command.h> #include <CS1_Interface/BGL_Mapping.h> #include <CS1_Interface/SubbandMetaData.h> @@ -381,7 +382,7 @@ void InputSection::process() command.write(str); - std::vector<SubbandMetaData> metaDataPerComputeNode(itsNrPsets); + std::vector<SubbandMetaData, AlignedStdAllocator<SubbandMetaData, 16> > metaDataPerComputeNode(itsNrPsets); for (unsigned pset = 0; pset < itsNrPsets; pset ++) { unsigned subband = itsNSubbandsPerPset * pset + subbandBase; diff --git a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/AlignedStdAllocator.h b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/AlignedStdAllocator.h new file mode 100644 index 0000000000000000000000000000000000000000..ae761abd98cee6e469e7947ab29d09c96d2afde6 --- /dev/null +++ b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/AlignedStdAllocator.h @@ -0,0 +1,53 @@ +#ifndef LOFAR_APPL_CEP_CS1_CS1_INTERFACE_ALIGNED_ALLOCATOR_H +#define LOFAR_APPL_CEP_CS1_CS1_INTERFACE_ALIGNED_ALLOCATOR_H + +#include <memory> +#include <stdexcept> + +#if _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 +#include <cstdlib> +#else +#include <malloc.h> +#endif + + +namespace LOFAR { +namespace CS1 { + +template <typename T, size_t ALIGNMENT> class AlignedStdAllocator : public std::allocator<T> +{ + public: + typedef typename std::allocator<T>::size_type size_type; + typedef typename std::allocator<T>::pointer pointer; + typedef typename std::allocator<T>::const_pointer const_pointer; + + template <class U> struct rebind + { + typedef AlignedStdAllocator<U, ALIGNMENT> other; + }; + + pointer allocate(size_type size, const_pointer /*hint*/ = 0) + { + void *ptr; + +#if _POSIX_C_SOURCE >= 200112L || _XOPEN_SOURCE >= 600 + if (posix_memalign(&ptr, ALIGNMENT, size * sizeof(T)) != 0) + throw std::bad_alloc(); +#else + if ((ptr = memalign(ALIGNMENT, size * sizeof(T))) == 0) + throw std::bad_alloc(); +#endif + + return static_cast<pointer>(ptr); + } + + void deallocate(pointer ptr, size_type /*size*/) + { + free(ptr); + } +}; + +} // namespace CS1 +} // namespace LOFAR + +#endif diff --git a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Makefile.am b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Makefile.am index 05e0f7ad9d7db5cff545931a208a6fd25a8c000e..a067287cd2ba74e65729682e8b18d73498bec379 100644 --- a/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Makefile.am +++ b/Appl/CEP/CS1/CS1_Interface/include/CS1_Interface/Makefile.am @@ -1,4 +1,5 @@ pkginclude_HEADERS = Package__Version.h \ + AlignedStdAllocator.h \ Allocator.h \ BGL_Command.h \ BGL_Configuration.h \