Skip to content
Snippets Groups Projects
Commit 10ece6ad authored by Jan David Mol's avatar Jan David Mol
Browse files

Task #3696: Replaced GPUProc/Delays by latest version from IONProc, and force...

Task #3696: Replaced GPUProc/Delays by latest version from IONProc, and force the use of SAP 0 to fix RTCP.cc
parent fa878e40
No related branches found
No related tags found
No related merge requests found
......@@ -18,12 +18,13 @@
//# along with this program; if not, write to the Free Software
//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//#
//# $Id: Delays.cc 17975 2011-05-10 09:52:51Z mol $
//# $Id: Delays.cc 23195 2012-12-06 16:01:41Z mol $
//# Always #include <lofar_config.h> first!
#include <lofar_config.h>
#include <Delays.h>
#include <Scheduling.h>
#include <Common/LofarLogger.h>
#include <Common/PrettyUnits.h>
#include <Interface/Exceptions.h>
......@@ -53,22 +54,29 @@ Delays::Delays(const Parset &parset, const string &stationName, const TimeStamp
itsParset(parset),
stop(false),
// we need an extra entry for the central beam
itsBuffer(bufferSize, parset.nrBeams(), parset.nrTABs() + 1),
itsBuffer(bufferSize, parset.nrBeams(), parset.maxNrTABs() + 1),
head(0),
tail(0),
bufferFree(bufferSize),
bufferUsed(0),
itsNrCalcDelays(parset.nrCalcDelays()),
itsNrBeams(parset.nrBeams()),
itsMaxNrTABs(parset.maxNrTABs()),
itsNrTABs(parset.nrTABs()),
itsDirectionType(MDirection::J2000),
itsStartTime(startTime),
itsNrSamplesPerSec(parset.nrSamplesPerSubband()),
itsSampleDuration(parset.sampleDuration()),
itsStationName(stationName),
itsDelayTimer("delay producer", true, true),
itsThread(this, &Delays::mainLoop, "[DelayCompensation] ")
itsDelayTimer("delay producer", true, true)
{
// FIXME: call from outside this class
start();
}
void Delays::start()
{
itsThread = new Thread(this, &Delays::mainLoop, "[DelayCompensation] ");
}
......@@ -115,15 +123,28 @@ void Delays::init()
// Set the position for the itsFrame.
itsFrame.set(itsPhaseCentre);
// Set-up the conversion engine, using reference direction ITRF.
itsConverter = new MDirection::Convert(itsDirectionType, MDirection::Ref(MDirection::ITRF, itsFrame));
// Set-up the conversion engines, using reference direction ITRF.
for (unsigned beam = 0; beam < itsNrBeams; beam++) {
const casa::MDirection::Types &dirtype = itsDirectionTypes[beam];
if (itsConverters.find(dirtype) == itsConverters.end())
itsConverters[dirtype] = MDirection::Convert(dirtype, MDirection::Ref(MDirection::ITRF, itsFrame));
}
}
void Delays::mainLoop()
{
#if defined HAVE_BGP_ION
doNotRunOnCore0();
#endif
LOG_DEBUG("Delay compensation thread running");
#if defined HAVE_BGP_ION
runOnCore0();
#endif
init();
// the current time, in samples
......@@ -153,13 +174,14 @@ void Delays::mainLoop()
// For each given direction in the sky ...
for (uint b = 0; b < itsNrBeams; b ++) {
for (uint p = 0; p < itsNrTABs + 1; p ++) {
MDirection::Convert &converter = itsConverters[itsDirectionTypes[b]];
for (uint p = 0; p < itsNrTABs[b] + 1; p ++) {
// Define the astronomical direction as a J2000 direction.
MVDirection &sky = itsBeamDirections[b][p];
// Convert this direction, using the conversion engine.
MDirection dir = (*itsConverter)(sky);
MDirection dir = converter(sky);
// Add to the return vector
itsBuffer[tail][b][p] = dir.getValue();
......@@ -184,6 +206,10 @@ void Delays::mainLoop()
bufferUsed.up(itsNrCalcDelays);
}
} catch (AipsError &ex) {
// trigger getNextDelays and force it to stop
stop = true;
bufferUsed.up(1);
THROW(GPUProcException, "AipsError: " << ex.what());
}
......@@ -193,18 +219,23 @@ void Delays::mainLoop()
void Delays::getNextDelays(Matrix<MVDirection> &directions, Matrix<double> &delays)
{
ASSERTSTR(directions.num_elements() == itsNrBeams * (itsNrTABs + 1),
directions.num_elements() << " == " << itsNrBeams << "*" << (itsNrTABs + 1));
ASSERTSTR(directions.num_elements() == itsNrBeams * (itsMaxNrTABs + 1),
directions.num_elements() << " == " << itsNrBeams << "*" << (itsMaxNrTABs + 1));
ASSERTSTR(delays.num_elements() == itsNrBeams * (itsNrTABs + 1),
delays.num_elements() << " == " << itsNrBeams << "*" << (itsNrTABs + 1));
ASSERTSTR(delays.num_elements() == itsNrBeams * (itsMaxNrTABs + 1),
delays.num_elements() << " == " << itsNrBeams << "*" << (itsMaxNrTABs + 1));
ASSERT(itsThread);
bufferUsed.down();
if (stop)
THROW(GPUProcException, "Cannot obtain delays -- delay thread stopped running");
// copy the directions at itsBuffer[head] into the provided buffer,
// and calculate the respective delays
for (unsigned b = 0; b < itsNrBeams; b ++) {
for (unsigned p = 0; p < itsNrTABs + 1; p ++) {
for (unsigned p = 0; p < itsNrTABs[b] + 1; p ++) {
const MVDirection &dir = itsBuffer[head][b][p];
directions[b][p] = dir;
......@@ -222,39 +253,33 @@ void Delays::getNextDelays(Matrix<MVDirection> &directions, Matrix<double> &dela
void Delays::setBeamDirections(const Parset &parset)
{
const BeamCoordinates& pencilBeams = parset.pencilBeams();
// TODO: For now, we include pencil beams for all regular beams,
// and use the pencil beam offsets as offsets in J2000.
// To do the coordinates properly, the offsets should be applied
// in today's coordinates (JMEAN/JTRUE?), not J2000.
itsBeamDirections.resize(itsNrBeams, itsNrTABs + 1);
// We only support beams of the same direction type for now
const string type0 = toUpper(parset.getBeamDirectionType(0));
itsBeamDirections.resize(itsNrBeams, itsMaxNrTABs + 1);
itsDirectionTypes.resize(itsNrBeams);
for (unsigned beam = 1; beam < itsNrBeams; beam ++) {
const string typeN = toUpper(parset.getBeamDirectionType(beam));
for (unsigned beam = 0; beam < itsNrBeams; beam ++) {
const string type = toUpper(parset.getBeamDirectionType(beam));
if (type0 != typeN)
THROW(GPUProcException, "All beams must use the same coordinate system (beam 0 uses " << type0 << " but beam " << beam << " uses " << typeN << ")");
if (!MDirection::getType(itsDirectionTypes[beam], type))
THROW(GPUProcException, "Beam direction type unknown: " << type);
}
if (!MDirection::getType(itsDirectionType, type0))
THROW(GPUProcException, "Beam direction type unknown: " << type0);
// Get the source directions from the parameter set.
// Split the \a dir vector into separate Direction objects.
for (unsigned beam = 0; beam < itsNrBeams; beam ++) {
const vector<double> beamDir = parset.getBeamDirection(beam);
const BeamCoordinates& TABs = parset.TABs(beam);
// add central beam coordinates for non-beamforming pipelines
itsBeamDirections[beam][0] = MVDirection(beamDir[0], beamDir[1]);
for (unsigned pencil = 0; pencil < itsNrTABs; pencil ++) {
for (unsigned pencil = 0; pencil < itsNrTABs[beam]; pencil ++) {
// obtain pencil coordinate
const BeamCoord3D &pencilCoord = pencilBeams[pencil];
const BeamCoord3D &pencilCoord = TABs[pencil];
// apply angle modification
const double angle1 = beamDir[0] + pencilCoord[0];
......
......@@ -18,7 +18,7 @@
//# along with this program; if not, write to the Free Software
//# Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
//#
//# $Id: Delays.h 17975 2011-05-10 09:52:51Z mol $
//# $Id: Delays.h 23195 2012-12-06 16:01:41Z mol $
#ifndef LOFAR_GPUPROC_DELAYS_H
#define LOFAR_GPUPROC_DELAYS_H
......@@ -29,13 +29,13 @@
//# Never #include <config.h> or #include <lofar_config.h> in a header file!
//# Includes
#include "Common/Timer.h"
#include "Interface/MultiDimArray.h"
#include "Interface/Parset.h"
#include "Interface/RSPTimeStamp.h"
#include "Interface/SmartPtr.h"
#include "Common/Thread/Semaphore.h"
#include "Common/Thread/Thread.h"
#include <Common/Timer.h>
#include <Interface/MultiDimArray.h>
#include <Interface/Parset.h>
#include <Interface/RSPTimeStamp.h>
#include <Interface/SmartPtr.h>
#include <Common/Thread/Semaphore.h>
#include <Common/Thread/Thread.h>
#include <measures/Measures/MeasConvert.h>
#include <measures/Measures/MDirection.h>
......@@ -83,8 +83,10 @@ class Delays
Delays(const Parset &ps, const string &stationName, const TimeStamp &startTime);
~Delays();
void start();
// get the set of directions (ITRF) and delays for the beams, for the next CN integration time
// Both matrices must have dimensions [itsNrBeams][itsNrTABs+1]
// Both matrices must have dimensions [itsNrBeams][itsMaxNrTABs+1]
void getNextDelays(Matrix<casa::MVDirection> &directions, Matrix<double> &delays);
private:
......@@ -132,9 +134,10 @@ class Delays
// Beam info.
const unsigned itsNrBeams;
const unsigned itsNrTABs;
casa::MDirection::Types itsDirectionType;
Matrix<casa::MVDirection> itsBeamDirections; // [itsNrBeams][itsNrTABs+1]
const unsigned itsMaxNrTABs;
const std::vector<unsigned> itsNrTABs;
Vector<casa::MDirection::Types> itsDirectionTypes;
Matrix<casa::MVDirection> itsBeamDirections; // [itsNrBeams][itsMaxNrTABs+1]
// Sample timings.
const TimeStamp itsStartTime;
......@@ -144,7 +147,7 @@ class Delays
// Station Name.
const string itsStationName;
casa::MeasFrame itsFrame;
SmartPtr<casa::MDirection::Convert> itsConverter;
std::map<casa::MDirection::Types, casa::MDirection::Convert> itsConverters;
// Station phase centre.
casa::MPosition itsPhaseCentre;
......@@ -154,7 +157,7 @@ class Delays
NSTimer itsDelayTimer;
Thread itsThread;
SmartPtr<Thread> itsThread;
};
} // namespace RTCP
......
......@@ -40,6 +40,9 @@ unsigned nrGPUs;
#define NR_TAPS 16
#define NR_STATION_FILTER_TAPS 16
// the SAP to process (we support only one SAP for now)
#define SAP 0
#define USE_2X2
#undef USE_CUSTOM_FFT
#undef USE_TEST_DATA
......@@ -196,7 +199,7 @@ cl::Program createProgram(const Parset &ps, cl::Context &context, std::vector<cl
args << " -DNR_SAMPLES_PER_CHANNEL=" << ps.nrSamplesPerChannel();
args << " -DNR_SAMPLES_PER_SUBBAND=" << ps.nrSamplesPerSubband();
args << " -DNR_BEAMS=" << ps.nrBeams();
args << " -DNR_TABS=" << ps.nrTABs();
args << " -DNR_TABS=" << ps.nrTABs(SAP);
args << " -DNR_COHERENT_STOKES=" << ps.nrCoherentStokes();
args << " -DNR_INCOHERENT_STOKES=" << ps.nrIncoherentStokes();
args << " -DCOHERENT_STOKES_TIME_INTEGRATION_FACTOR=" << ps.coherentStokesTimeIntegrationFactor();
......@@ -811,18 +814,18 @@ class BeamFormerKernel : public Kernel
setArg(1, devCorrectedData);
setArg(2, devBeamFormerWeights);
globalWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(), ps.nrChannelsPerSubband());
localWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(), 1);
globalWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(SAP), ps.nrChannelsPerSubband());
localWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(SAP), 1);
// FIXME: nrTABs
//queue.enqueueNDRangeKernel(*this, cl::NullRange, cl::NDRange(16, ps.nrTABs(), ps.nrChannelsPerSubband()), cl::NDRange(16, ps.nrTABs(), 1), 0, &event);
//queue.enqueueNDRangeKernel(*this, cl::NullRange, cl::NDRange(16, ps.nrTABs(SAP), ps.nrChannelsPerSubband()), cl::NDRange(16, ps.nrTABs(SAP), 1), 0, &event);
size_t count = ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * NR_POLARIZATIONS;
size_t nrWeightsBytes = ps.nrStations() * ps.nrTABs() * ps.nrChannelsPerSubband() * NR_POLARIZATIONS * sizeof(std::complex<float>);
size_t nrWeightsBytes = ps.nrStations() * ps.nrTABs(SAP) * ps.nrChannelsPerSubband() * NR_POLARIZATIONS * sizeof(std::complex<float>);
size_t nrSampleBytesPerPass = count * ps.nrStations() * sizeof(std::complex<float>);
size_t nrComplexVoltagesBytesPerPass = count * ps.nrTABs() * sizeof(std::complex<float>);
size_t nrComplexVoltagesBytesPerPass = count * ps.nrTABs(SAP) * sizeof(std::complex<float>);
unsigned nrPasses = std::max((ps.nrStations() + 6) / 16, 1U);
nrOperations = count * ps.nrStations() * ps.nrTABs() * 8;
nrOperations = count * ps.nrStations() * ps.nrTABs(SAP) * 8;
nrBytesRead = nrWeightsBytes + nrSampleBytesPerPass + (nrPasses - 1) * nrComplexVoltagesBytesPerPass;
nrBytesWritten = nrPasses * nrComplexVoltagesBytesPerPass;
}
......@@ -840,14 +843,14 @@ class BeamFormerTransposeKernel : public Kernel
setArg(0, devTransposedData);
setArg(1, devComplexVoltages);
//globalWorkSize = cl::NDRange(256, (ps.nrTABs() + 15) / 16, (ps.nrChannelsPerSubband() + 15) / 16);
globalWorkSize = cl::NDRange(256, (ps.nrTABs() + 15) / 16, ps.nrSamplesPerChannel() / 16);
//globalWorkSize = cl::NDRange(256, (ps.nrTABs(SAP) + 15) / 16, (ps.nrChannelsPerSubband() + 15) / 16);
globalWorkSize = cl::NDRange(256, (ps.nrTABs(SAP) + 15) / 16, ps.nrSamplesPerChannel() / 16);
localWorkSize = cl::NDRange(256, 1, 1);
nrOperations = 0;
nrBytesRead = (size_t) ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>),
//nrBytesWritten = (size_t) ps.nrTABs() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() * sizeof(std::complex<float>);
nrBytesWritten = (size_t) ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * sizeof(std::complex<float>);
nrBytesRead = (size_t) ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs(SAP) * NR_POLARIZATIONS * sizeof(std::complex<float>),
//nrBytesWritten = (size_t) ps.nrTABs(SAP) * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * ps.nrChannelsPerSubband() * sizeof(std::complex<float>);
nrBytesWritten = (size_t) ps.nrTABs(SAP) * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * sizeof(std::complex<float>);
}
};
......@@ -867,7 +870,7 @@ class Dedispersion_FFT_Kernel
void enqueue(cl::CommandQueue &queue, PerformanceCounter &counter, clFFT_Direction direction)
{
size_t nrFFTs = (size_t) ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() / ps.dedispersionFFTsize();
size_t nrFFTs = (size_t) ps.nrTABs(SAP) * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() / ps.dedispersionFFTsize();
cl_int error = clFFT_ExecuteInterleaved(queue(), plan.plan, nrFFTs, direction, buffer(), buffer(), 0, 0, &event());
......@@ -892,7 +895,7 @@ class DedispersionForwardFFTkernel : public FFT_Kernel
public:
DedispersionForwardFFTkernel(const Parset &ps, cl::Context &context, cl::Buffer &buffer)
:
FFT_Kernel(context, ps.dedispersionFFTsize(), ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() / ps.dedispersionFFTsize(), true, buffer)
FFT_Kernel(context, ps.dedispersionFFTsize(), ps.nrTABs(SAP) * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() / ps.dedispersionFFTsize(), true, buffer)
{
ASSERT(ps.nrSamplesPerChannel() % ps.dedispersionFFTsize() == 0);
}
......@@ -904,7 +907,7 @@ class DedispersionBackwardFFTkernel : public FFT_Kernel
public:
DedispersionBackwardFFTkernel(const Parset &ps, cl::Context &context, cl::Buffer &buffer)
:
FFT_Kernel(context, ps.dedispersionFFTsize(), ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() / ps.dedispersionFFTsize(), false, buffer)
FFT_Kernel(context, ps.dedispersionFFTsize(), ps.nrTABs(SAP) * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() / ps.dedispersionFFTsize(), false, buffer)
{
ASSERT(ps.nrSamplesPerChannel() % ps.dedispersionFFTsize() == 0);
}
......@@ -942,8 +945,8 @@ class DedispersionChirpKernel : public Kernel
//std::cout << "localWorkSize = NDRange(" << fftSize / divisor << ", 1, 1))" << std::endl;
}
nrOperations = (size_t) NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * (9 * ps.nrTABs() + 17),
nrBytesRead = nrBytesWritten = sizeof(std::complex<float>) * ps.nrTABs() * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel();
nrOperations = (size_t) NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * (9 * ps.nrTABs(SAP) + 17),
nrBytesRead = nrBytesWritten = sizeof(std::complex<float>) * ps.nrTABs(SAP) * NR_POLARIZATIONS * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel();
}
void enqueue(cl::CommandQueue &queue, PerformanceCounter &counter, double subbandFrequency)
......@@ -966,12 +969,12 @@ class CoherentStokesKernel : public Kernel
setArg(0, devStokesData);
setArg(1, devComplexVoltages);
globalWorkSize = cl::NDRange(256, (ps.nrTABs() + 15) / 16, (ps.nrChannelsPerSubband() + 15) / 16);
globalWorkSize = cl::NDRange(256, (ps.nrTABs(SAP) + 15) / 16, (ps.nrChannelsPerSubband() + 15) / 16);
localWorkSize = cl::NDRange(256, 1, 1);
nrOperations = (size_t) ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs() * (ps.nrCoherentStokes() == 1 ? 8 : 20 + 2.0 / ps.coherentStokesTimeIntegrationFactor());
nrBytesRead = (size_t) ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>);
nrBytesWritten = (size_t) ps.nrTABs() * ps.nrCoherentStokes() * ps.nrSamplesPerChannel() / ps.coherentStokesTimeIntegrationFactor() * ps.nrChannelsPerSubband() * sizeof(float);
nrOperations = (size_t) ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs(SAP) * (ps.nrCoherentStokes() == 1 ? 8 : 20 + 2.0 / ps.coherentStokesTimeIntegrationFactor());
nrBytesRead = (size_t) ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs(SAP) * NR_POLARIZATIONS * sizeof(std::complex<float>);
nrBytesWritten = (size_t) ps.nrTABs(SAP) * ps.nrCoherentStokes() * ps.nrSamplesPerChannel() / ps.coherentStokesTimeIntegrationFactor() * ps.nrChannelsPerSubband() * sizeof(float);
}
};
......@@ -988,31 +991,31 @@ class UHEP_BeamFormerKernel : public Kernel
setArg(2, devBeamFormerWeights);
#if 1
globalWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(), ps.nrSubbands());
localWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(), 1);
globalWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(SAP), ps.nrSubbands());
localWorkSize = cl::NDRange(NR_POLARIZATIONS, ps.nrTABs(SAP), 1);
size_t count = ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * NR_POLARIZATIONS;
size_t nrWeightsBytes = ps.nrStations() * ps.nrTABs() * ps.nrSubbands() * NR_POLARIZATIONS * sizeof(std::complex<float>);
size_t nrWeightsBytes = ps.nrStations() * ps.nrTABs(SAP) * ps.nrSubbands() * NR_POLARIZATIONS * sizeof(std::complex<float>);
size_t nrSampleBytes = count * ps.nrStations() * ps.nrBytesPerComplexSample();
size_t nrComplexVoltagesBytesPerPass = count * ps.nrTABs() * sizeof(std::complex<float>);
size_t nrComplexVoltagesBytesPerPass = count * ps.nrTABs(SAP) * sizeof(std::complex<float>);
unsigned nrPasses = std::max((ps.nrStations() + 6) / 16, 1U);
nrOperations = count * ps.nrStations() * ps.nrTABs() * 8;
nrOperations = count * ps.nrStations() * ps.nrTABs(SAP) * 8;
nrBytesRead = nrWeightsBytes + nrSampleBytes + (nrPasses - 1) * nrComplexVoltagesBytesPerPass;
nrBytesWritten = nrPasses * nrComplexVoltagesBytesPerPass;
#else
ASSERT(ps.nrTABs() % 3 == 0);
ASSERT(ps.nrTABs(SAP) % 3 == 0);
ASSERT(ps.nrStations() % 6 == 0);
unsigned nrThreads = NR_POLARIZATIONS * (ps.nrTABs() / 3) * (ps.nrStations() / 6);
unsigned nrThreads = NR_POLARIZATIONS * (ps.nrTABs(SAP) / 3) * (ps.nrStations() / 6);
globalWorkSize = cl::NDRange(nrThreads, ps.nrSubbands());
localWorkSize = cl::NDRange(nrThreads, 1);
//globalWorkSize = cl::NDRange(ps.nrStations() / 6, ps.nrTABs() / 3, ps.nrSubbands());
//localWorkSize = cl::NDRange(ps.nrStations() / 6, ps.nrTABs() / 3, 1);
//globalWorkSize = cl::NDRange(ps.nrStations() / 6, ps.nrTABs(SAP) / 3, ps.nrSubbands());
//localWorkSize = cl::NDRange(ps.nrStations() / 6, ps.nrTABs(SAP) / 3, 1);
size_t count = ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * NR_POLARIZATIONS;
size_t nrWeightsBytes = ps.nrStations() * ps.nrTABs() * ps.nrSubbands() * NR_POLARIZATIONS * sizeof(std::complex<float>);
size_t nrWeightsBytes = ps.nrStations() * ps.nrTABs(SAP) * ps.nrSubbands() * NR_POLARIZATIONS * sizeof(std::complex<float>);
size_t nrSampleBytes = count * ps.nrStations() * ps.nrBytesPerComplexSample();
size_t nrComplexVoltagesBytes = count * ps.nrTABs() * sizeof(std::complex<float>);
nrOperations = count * ps.nrStations() * ps.nrTABs() * 8;
size_t nrComplexVoltagesBytes = count * ps.nrTABs(SAP) * sizeof(std::complex<float>);
nrOperations = count * ps.nrStations() * ps.nrTABs(SAP) * 8;
nrBytesRead = nrWeightsBytes + nrSampleBytes;
nrBytesWritten = nrComplexVoltagesBytes;
#endif
......@@ -1031,12 +1034,12 @@ class UHEP_TransposeKernel : public Kernel
setArg(1, devComplexVoltages);
setArg(2, devReverseSubbandMapping);
globalWorkSize = cl::NDRange(256, (ps.nrTABs() + 15) / 16, 512 / 16);
globalWorkSize = cl::NDRange(256, (ps.nrTABs(SAP) + 15) / 16, 512 / 16);
localWorkSize = cl::NDRange(256, 1, 1);
nrOperations = 0;
nrBytesRead = (size_t) ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>);
nrBytesWritten = (size_t) ps.nrTABs() * NR_POLARIZATIONS * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * 512 * sizeof(std::complex<float>);
nrBytesRead = (size_t) ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * ps.nrTABs(SAP) * NR_POLARIZATIONS * sizeof(std::complex<float>);
nrBytesWritten = (size_t) ps.nrTABs(SAP) * NR_POLARIZATIONS * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * 512 * sizeof(std::complex<float>);
}
};
......@@ -1051,10 +1054,10 @@ class UHEP_InvFFT_Kernel : public Kernel
setArg(0, devFFTedData);
setArg(1, devFFTedData);
globalWorkSize = cl::NDRange(128, ps.nrTABs() * NR_POLARIZATIONS * ps.nrSamplesPerChannel());
globalWorkSize = cl::NDRange(128, ps.nrTABs(SAP) * NR_POLARIZATIONS * ps.nrSamplesPerChannel());
localWorkSize = cl::NDRange(128, 1);
size_t nrFFTs = (size_t) ps.nrTABs() * NR_POLARIZATIONS * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1);
size_t nrFFTs = (size_t) ps.nrTABs(SAP) * NR_POLARIZATIONS * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1);
nrOperations = nrFFTs * 5 * 1024 * 10;
nrBytesRead = nrFFTs * 512 * sizeof(std::complex<float>);
nrBytesWritten = nrFFTs * 1024 * sizeof(float);
......@@ -1079,10 +1082,10 @@ class UHEP_InvFIR_Kernel : public Kernel
for (nrThreads = 1024; nrThreads > maxNrThreads; nrThreads /= 2)
;
globalWorkSize = cl::NDRange(1024, NR_POLARIZATIONS, ps.nrTABs());
globalWorkSize = cl::NDRange(1024, NR_POLARIZATIONS, ps.nrTABs(SAP));
localWorkSize = cl::NDRange(nrThreads, 1, 1);
size_t count = ps.nrTABs() * NR_POLARIZATIONS * 1024;
size_t count = ps.nrTABs(SAP) * NR_POLARIZATIONS * 1024;
nrOperations = count * ps.nrSamplesPerChannel() * NR_STATION_FILTER_TAPS * 2;
nrBytesRead = count * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * sizeof(float);
nrBytesWritten = count * ps.nrSamplesPerChannel() * sizeof(float);
......@@ -1100,12 +1103,12 @@ class UHEP_TriggerKernel : public Kernel
setArg(0, devTriggerInfo);
setArg(1, devInvFIRfilteredData);
globalWorkSize = cl::NDRange(16, 16, ps.nrTABs());
globalWorkSize = cl::NDRange(16, 16, ps.nrTABs(SAP));
localWorkSize = cl::NDRange(16, 16, 1);
nrOperations = (size_t) ps.nrTABs() * ps.nrSamplesPerChannel() * 1024 * (3 /* power */ + 2 /* window */ + 1 /* max */ + 7 /* mean/variance */);
nrBytesRead = (size_t) ps.nrTABs() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * 1024 * sizeof(float);
nrBytesWritten = (size_t) ps.nrTABs() * sizeof(TriggerInfo);
nrOperations = (size_t) ps.nrTABs(SAP) * ps.nrSamplesPerChannel() * 1024 * (3 /* power */ + 2 /* window */ + 1 /* max */ + 7 /* mean/variance */);
nrBytesRead = (size_t) ps.nrTABs(SAP) * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * 1024 * sizeof(float);
nrBytesWritten = (size_t) ps.nrTABs(SAP) * sizeof(TriggerInfo);
}
};
......@@ -1272,11 +1275,11 @@ BeamFormerWorkQueue::BeamFormerWorkQueue(BeamFormerPipeline &pipeline)
delaysAfterEnd(boost::extents[ps.nrBeams()][ps.nrStations()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY),
phaseOffsets(boost::extents[ps.nrBeams()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY),
devCorrectedData(cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, ps.nrStations() * ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * NR_POLARIZATIONS * sizeof(std::complex<float>))),
beamFormerWeights(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrTABs()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY),
devComplexVoltages(cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>))),
//transposedComplexVoltages(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE)
transposedComplexVoltages(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE),
DMs(boost::extents[ps.nrTABs()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY)
beamFormerWeights(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrTABs(SAP)], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY),
devComplexVoltages(cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, ps.nrChannelsPerSubband() * ps.nrSamplesPerChannel() * ps.nrTABs(SAP) * NR_POLARIZATIONS * sizeof(std::complex<float>))),
//transposedComplexVoltages(boost::extents[ps.nrTABs(SAP)][NR_POLARIZATIONS][ps.nrSamplesPerChannel()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE)
transposedComplexVoltages(boost::extents[ps.nrTABs(SAP)][NR_POLARIZATIONS][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE),
DMs(boost::extents[ps.nrTABs(SAP)], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY)
{
if (ps.correctBandPass()) {
BandPass::computeCorrectionFactors(bandPassCorrectionWeights.origin(), ps.nrChannelsPerSubband());
......@@ -1380,13 +1383,13 @@ UHEP_WorkQueue::UHEP_WorkQueue(UHEP_Pipeline &pipeline)
WorkQueue(pipeline),
pipeline(pipeline),
hostInputSamples(boost::extents[ps.nrStations()][ps.nrSubbands()][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][NR_POLARIZATIONS][ps.nrBytesPerComplexSample()], queue, CL_MEM_WRITE_ONLY),
hostBeamFormerWeights(boost::extents[ps.nrStations()][ps.nrSubbands()][ps.nrTABs()], queue, CL_MEM_WRITE_ONLY),
hostTriggerInfo(ps.nrTABs(), queue, CL_MEM_READ_ONLY)
hostBeamFormerWeights(boost::extents[ps.nrStations()][ps.nrSubbands()][ps.nrTABs(SAP)], queue, CL_MEM_WRITE_ONLY),
hostTriggerInfo(ps.nrTABs(SAP), queue, CL_MEM_READ_ONLY)
{
size_t inputSamplesSize = ps.nrStations() * ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * NR_POLARIZATIONS * ps.nrBytesPerComplexSample();
size_t complexVoltagesSize = ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * ps.nrTABs() * NR_POLARIZATIONS * sizeof(std::complex<float>);
size_t transposedDataSize = ps.nrTABs() * NR_POLARIZATIONS * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * 512 * sizeof(std::complex<float>);
size_t invFIRfilteredDataSize = ps.nrTABs() * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * 512 * sizeof(std::complex<float>);
size_t complexVoltagesSize = ps.nrSubbands() * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * ps.nrTABs(SAP) * NR_POLARIZATIONS * sizeof(std::complex<float>);
size_t transposedDataSize = ps.nrTABs(SAP) * NR_POLARIZATIONS * (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) * 512 * sizeof(std::complex<float>);
size_t invFIRfilteredDataSize = ps.nrTABs(SAP) * NR_POLARIZATIONS * ps.nrSamplesPerChannel() * 512 * sizeof(std::complex<float>);
size_t buffer0size = std::max(inputSamplesSize, transposedDataSize);
size_t buffer1size = std::max(complexVoltagesSize, invFIRfilteredDataSize);
......@@ -1394,7 +1397,7 @@ UHEP_WorkQueue::UHEP_WorkQueue(UHEP_Pipeline &pipeline)
devBuffers[0] = cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, buffer0size);
devBuffers[1] = cl::Buffer(pipeline.context, CL_MEM_READ_WRITE, buffer1size);
size_t beamFormerWeightsSize = ps.nrStations() * ps.nrSubbands() * ps.nrTABs() * sizeof(std::complex<float>);
size_t beamFormerWeightsSize = ps.nrStations() * ps.nrSubbands() * ps.nrTABs(SAP) * sizeof(std::complex<float>);
devBeamFormerWeights = cl::Buffer(pipeline.context, CL_MEM_READ_ONLY, beamFormerWeightsSize);
devInputSamples = devBuffers[0];
......@@ -1405,7 +1408,7 @@ UHEP_WorkQueue::UHEP_WorkQueue(UHEP_Pipeline &pipeline)
devFFTedData = devBuffers[0];
devInvFIRfilteredData = devBuffers[1];
devTriggerInfo = cl::Buffer(pipeline.context, CL_MEM_WRITE_ONLY, ps.nrTABs() * sizeof(TriggerInfo));
devTriggerInfo = cl::Buffer(pipeline.context, CL_MEM_WRITE_ONLY, ps.nrTABs(SAP) * sizeof(TriggerInfo));
}
......@@ -1695,10 +1698,10 @@ struct BeamFormerTest : public UnitTest
:
UnitTest(ps, "BeamFormer/BeamFormer.cl")
{
if (ps.nrStations() >= 5 && ps.nrSamplesPerChannel() >= 13 && ps.nrChannelsPerSubband() >= 7 && ps.nrTABs() >= 6) {
if (ps.nrStations() >= 5 && ps.nrSamplesPerChannel() >= 13 && ps.nrChannelsPerSubband() >= 7 && ps.nrTABs(SAP) >= 6) {
MultiArraySharedBuffer<std::complex<float>, 4> inputData(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][NR_POLARIZATIONS], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY);
MultiArraySharedBuffer<std::complex<float>, 3> beamFormerWeights(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrTABs()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY);
MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE);
MultiArraySharedBuffer<std::complex<float>, 3> beamFormerWeights(boost::extents[ps.nrStations()][ps.nrChannelsPerSubband()][ps.nrTABs(SAP)], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY);
MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][ps.nrTABs(SAP)][NR_POLARIZATIONS], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE);
BeamFormerKernel beamFormer(ps, program, complexVoltages, inputData, beamFormerWeights);
inputData[4][6][12][1] = std::complex<float>(2.2, 3);
......@@ -1712,7 +1715,7 @@ struct BeamFormerTest : public UnitTest
check(complexVoltages[6][12][5][1], std::complex<float>(-6.2, 23));
#if 0
for (unsigned tab = 0; tab < ps.nrTABs(); tab ++)
for (unsigned tab = 0; tab < ps.nrTABs(SAP); tab ++)
for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol ++)
for (unsigned ch = 0; ch < ps.nrChannelsPerSubband(); ch ++)
for (unsigned t = 0; t < ps.nrSamplesPerChannel(); t ++)
......@@ -1730,9 +1733,9 @@ struct BeamFormerTransposeTest : public UnitTest
:
UnitTest(ps, "BeamFormer/Transpose.cl")
{
if (ps.nrChannelsPerSubband() >= 19 && ps.nrSamplesPerChannel() >= 175 && ps.nrTABs() >= 5) {
MultiArraySharedBuffer<std::complex<float>, 4> transposedData(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY);
MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY);
if (ps.nrChannelsPerSubband() >= 19 && ps.nrSamplesPerChannel() >= 175 && ps.nrTABs(SAP) >= 5) {
MultiArraySharedBuffer<std::complex<float>, 4> transposedData(boost::extents[ps.nrTABs(SAP)][NR_POLARIZATIONS][ps.nrSamplesPerChannel()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY);
MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][ps.nrTABs(SAP)][NR_POLARIZATIONS], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY);
BeamFormerTransposeKernel transpose(ps, program, transposedData, complexVoltages);
complexVoltages[18][174][4][1] = std::complex<float>(24, 42);
......@@ -1753,9 +1756,9 @@ struct DedispersionChirpTest : public UnitTest
:
UnitTest(ps, "BeamFormer/Dedispersion.cl")
{
if (ps.nrTABs() > 3 && ps.nrChannelsPerSubband() > 13 && ps.nrSamplesPerChannel() / ps.dedispersionFFTsize() > 1 && ps.dedispersionFFTsize() > 77) {
MultiArraySharedBuffer<std::complex<float>, 5> data(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel() / ps.dedispersionFFTsize()][ps.dedispersionFFTsize()], queue, CL_MEM_READ_WRITE, CL_MEM_READ_WRITE);
MultiArraySharedBuffer<float, 1> DMs(boost::extents[ps.nrTABs()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY);
if (ps.nrTABs(SAP) > 3 && ps.nrChannelsPerSubband() > 13 && ps.nrSamplesPerChannel() / ps.dedispersionFFTsize() > 1 && ps.dedispersionFFTsize() > 77) {
MultiArraySharedBuffer<std::complex<float>, 5> data(boost::extents[ps.nrTABs(SAP)][NR_POLARIZATIONS][ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel() / ps.dedispersionFFTsize()][ps.dedispersionFFTsize()], queue, CL_MEM_READ_WRITE, CL_MEM_READ_WRITE);
MultiArraySharedBuffer<float, 1> DMs(boost::extents[ps.nrTABs(SAP)], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY);
DedispersionChirpKernel dedispersionChirpKernel(ps, program, queue, data, DMs);
data[3][1][13][1][77] = std::complex<float>(2, 3);
......@@ -1778,16 +1781,16 @@ struct CoherentStokesTest : public UnitTest
:
UnitTest(ps, "BeamFormer/CoherentStokes.cl")
{
if (ps.nrChannelsPerSubband() >= 19 && ps.nrSamplesPerChannel() >= 175 && ps.nrTABs() >= 5) {
MultiArraySharedBuffer<float, 4> stokesData(boost::extents[ps.nrTABs()][ps.nrCoherentStokes()][ps.nrSamplesPerChannel() / ps.coherentStokesTimeIntegrationFactor()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY);
if (ps.nrChannelsPerSubband() >= 19 && ps.nrSamplesPerChannel() >= 175 && ps.nrTABs(SAP) >= 5) {
MultiArraySharedBuffer<float, 4> stokesData(boost::extents[ps.nrTABs(SAP)][ps.nrCoherentStokes()][ps.nrSamplesPerChannel() / ps.coherentStokesTimeIntegrationFactor()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY);
#if 1
MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY);
MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrChannelsPerSubband()][ps.nrSamplesPerChannel()][ps.nrTABs(SAP)][NR_POLARIZATIONS], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY);
CoherentStokesKernel stokesKernel(ps, program, stokesData, complexVoltages);
complexVoltages[18][174][4][0] = std::complex<float>(2, 3);
complexVoltages[18][174][4][1] = std::complex<float>(4, 5);
#else
MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY);
MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrTABs(SAP)][NR_POLARIZATIONS][ps.nrSamplesPerChannel()][ps.nrChannelsPerSubband()], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY);
CoherentStokesKernel stokesKernel(ps, program, stokesData, complexVoltages);
complexVoltages[18][174][4][0] = std::complex<float>(2, 3);
......@@ -1811,10 +1814,10 @@ struct UHEP_BeamFormerTest : public UnitTest
:
UnitTest(ps, "UHEP/BeamFormer.cl")
{
if (ps.nrStations() >= 5 && (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) >= 13 && ps.nrSubbands() >= 7 && ps.nrTABs() >= 6) {
if (ps.nrStations() >= 5 && (ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1) >= 13 && ps.nrSubbands() >= 7 && ps.nrTABs(SAP) >= 6) {
MultiArraySharedBuffer<char, 5> inputSamples(boost::extents[ps.nrStations()][ps.nrSubbands()][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][NR_POLARIZATIONS][ps.nrBytesPerComplexSample()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY);
MultiArraySharedBuffer<std::complex<float>, 3> beamFormerWeights(boost::extents[ps.nrStations()][ps.nrSubbands()][ps.nrTABs()], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY);
MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrSubbands()][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE);
MultiArraySharedBuffer<std::complex<float>, 3> beamFormerWeights(boost::extents[ps.nrStations()][ps.nrSubbands()][ps.nrTABs(SAP)], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY);
MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrSubbands()][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][ps.nrTABs(SAP)][NR_POLARIZATIONS], queue, CL_MEM_READ_ONLY, CL_MEM_READ_WRITE);
UHEP_BeamFormerKernel beamFormer(ps, program, complexVoltages, inputSamples, beamFormerWeights);
switch (ps.nrBytesPerComplexSample()) {
......@@ -1847,9 +1850,9 @@ struct UHEP_TransposeTest : public UnitTest
:
UnitTest(ps, "UHEP/Transpose.cl")
{
if (ps.nrSubbands() >= 19 && ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1 >= 175 && ps.nrTABs() >= 5) {
MultiArraySharedBuffer<std::complex<float>, 4> transposedData(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][512], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY);
MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrSubbands()][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][ps.nrTABs()][NR_POLARIZATIONS], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY);
if (ps.nrSubbands() >= 19 && ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1 >= 175 && ps.nrTABs(SAP) >= 5) {
MultiArraySharedBuffer<std::complex<float>, 4> transposedData(boost::extents[ps.nrTABs(SAP)][NR_POLARIZATIONS][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][512], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY);
MultiArraySharedBuffer<std::complex<float>, 4> complexVoltages(boost::extents[ps.nrSubbands()][ps.nrSamplesPerChannel() + NR_STATION_FILTER_TAPS - 1][ps.nrTABs(SAP)][NR_POLARIZATIONS], queue, CL_MEM_READ_WRITE, CL_MEM_READ_ONLY);
cl::Buffer devReverseSubbandMapping(context, CL_MEM_READ_ONLY, 512 * sizeof(int));
UHEP_TransposeKernel transpose(ps, program, transposedData, complexVoltages, devReverseSubbandMapping);
......@@ -1872,9 +1875,9 @@ struct UHEP_TriggerTest : public UnitTest
:
UnitTest(ps, "UHEP/Trigger.cl")
{
if (ps.nrTABs() >= 4 && 1024 * ps.nrSamplesPerChannel() > 100015) {
MultiArraySharedBuffer<float, 3> inputData(boost::extents[ps.nrTABs()][NR_POLARIZATIONS][ps.nrSamplesPerChannel() * 1024], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY);
MultiArraySharedBuffer<TriggerInfo, 1> triggerInfo(boost::extents[ps.nrTABs()], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY);
if (ps.nrTABs(SAP) >= 4 && 1024 * ps.nrSamplesPerChannel() > 100015) {
MultiArraySharedBuffer<float, 3> inputData(boost::extents[ps.nrTABs(SAP)][NR_POLARIZATIONS][ps.nrSamplesPerChannel() * 1024], queue, CL_MEM_WRITE_ONLY, CL_MEM_READ_ONLY);
MultiArraySharedBuffer<TriggerInfo, 1> triggerInfo(boost::extents[ps.nrTABs(SAP)], queue, CL_MEM_READ_ONLY, CL_MEM_WRITE_ONLY);
UHEP_TriggerKernel trigger(ps, program, triggerInfo, inputData);
inputData[3][1][100015] = 1000;
......@@ -1993,7 +1996,7 @@ int main(int argc, char **argv)
ps.nrSubbands() = 488;
ps.nrChannelsPerSubband() = 2048;
ps.nrBeams() = 1;
ps.nrTABs() = 128;
ps.nrTABs(SAP) = 128;
ps.nrIncoherentStokes() = 4;
ps.nrCoherentStokes() = 4;
ps.incoherentStokesTimeIntegrationFactor() = 8;
......@@ -2018,7 +2021,7 @@ int main(int argc, char **argv)
ps.nrSamplesPerChannel() = 1024;
ps.nrBeams() = 1;
ps.subbandBandwidth() = 195312.5;
ps.nrTABs() = 48;
ps.nrTABs(SAP) = 48;
profiling = false; UHEP_Pipeline(ps).doWork();
profiling = true; UHEP_Pipeline(ps).doWork();
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment