diff --git a/.gitattributes b/.gitattributes index cb42d5500f666ff55664a04f7f732c72ea977eb8..8dd2e56af6db51fab1ab9c611b798d167373a37c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -3976,6 +3976,7 @@ RTCP/Cobalt/GPUProc/test/Kernels/tKernelFunctions.sh eol=lf RTCP/Cobalt/GPUProc/test/Kernels/tKernelPerformance.py eol=lf RTCP/Cobalt/GPUProc/test/Kernels/tKernelPerformance.run eol=lf RTCP/Cobalt/GPUProc/test/Kernels/tKernelPerformance.sh eol=lf +RTCP/Cobalt/GPUProc/test/Kernels/tZeroingKernel.sh eol=lf RTCP/Cobalt/GPUProc/test/Kernels/visualizeBeamformer.py eol=lf RTCP/Cobalt/GPUProc/test/Pipelines/tCorrelatorPipelineProcessObs.sh eol=lf RTCP/Cobalt/GPUProc/test/Storage/tStorageProcesses.queue -text diff --git a/RTCP/Cobalt/CoInterface/src/Parset.cc b/RTCP/Cobalt/CoInterface/src/Parset.cc index fcc317a4089037c99197330866741d64de50399f..42a383800ad76d6fa88961975585eeab64cbc1bf 100644 --- a/RTCP/Cobalt/CoInterface/src/Parset.cc +++ b/RTCP/Cobalt/CoInterface/src/Parset.cc @@ -618,18 +618,6 @@ namespace LOFAR if (settings.beamFormer.enabled) { // Parse global settings - - // 4096 channels is enough, but allow parset override. - if (!isDefined("Cobalt.BeamFormer.nrHighResolutionChannels")) { - settings.beamFormer.nrHighResolutionChannels = 4096; - } else { - settings.beamFormer.nrHighResolutionChannels = - getUint32("Cobalt.BeamFormer.nrHighResolutionChannels"); - ASSERTSTR(powerOfTwo(settings.beamFormer.nrHighResolutionChannels) && - settings.beamFormer.nrHighResolutionChannels < 65536, - "Parset: Cobalt.BeamFormer.nrHighResolutionChannels must be a power of 2 and < 64k"); - } - settings.beamFormer.doFlysEye = getBool("Cobalt.BeamFormer.flysEye", false); unsigned nrDelayCompCh; @@ -638,9 +626,6 @@ namespace LOFAR } else { nrDelayCompCh = getUint32("Cobalt.BeamFormer.nrDelayCompensationChannels"); } - if (nrDelayCompCh > settings.beamFormer.nrHighResolutionChannels) { - nrDelayCompCh = settings.beamFormer.nrHighResolutionChannels; - } settings.beamFormer.nrDelayCompensationChannels = nrDelayCompCh; // Derive antennaFields to use for beam forming diff --git a/RTCP/Cobalt/CoInterface/src/Parset.h b/RTCP/Cobalt/CoInterface/src/Parset.h index fea46954dc81e08c60beadd923330358b4778e82..4f9f67e11334f80c9bc8903db26d01aa287782be 100644 --- a/RTCP/Cobalt/CoInterface/src/Parset.h +++ b/RTCP/Cobalt/CoInterface/src/Parset.h @@ -589,11 +589,6 @@ namespace LOFAR // Equal to the size of the first FFT. Power of two. unsigned nrDelayCompensationChannels; - // Number of channels per subband for bandpass correction, narrow band - // flagging, beamforming, and coherent dedispersion. - // Power of two and at least nrDelayCompensationChannels. - unsigned nrHighResolutionChannels; - // Are we in fly's eye mode? bool doFlysEye; diff --git a/RTCP/Cobalt/CoInterface/src/SubbandMetaData.h b/RTCP/Cobalt/CoInterface/src/SubbandMetaData.h index c50996d90d510660c8dfdda23979d7663484676b..a22286dbff27e49d590b3edaf2fbe11f736bae57 100644 --- a/RTCP/Cobalt/CoInterface/src/SubbandMetaData.h +++ b/RTCP/Cobalt/CoInterface/src/SubbandMetaData.h @@ -54,8 +54,11 @@ namespace LOFAR void read(Stream *str); void write(Stream *str) const; + // Maximum number of flags ranges to marshall + static const size_t MAXNRFLAGRANGES = 512; + // Maximum size of the buffer to marshall flags - static const size_t MAXFLAGSIZE = 8192 + 4; + static const size_t MAXFLAGSIZE = MAXNRFLAGRANGES * 2 * sizeof(unsigned) + 4; // Maximum number of TABs we'll support when marshalling static const size_t MAXNRTABS = 512; diff --git a/RTCP/Cobalt/GPUProc/doc/pipeline-buffers.txt b/RTCP/Cobalt/GPUProc/doc/pipeline-buffers.txt index 782355349a1a9068e85ac128d714eefadcd034b3..347929e4e1f1424c800351af09d694074594fa1f 100644 --- a/RTCP/Cobalt/GPUProc/doc/pipeline-buffers.txt +++ b/RTCP/Cobalt/GPUProc/doc/pipeline-buffers.txt @@ -46,7 +46,10 @@ NB: Numbers are for 80 antenna fields. FFT (if >1ch) {out-of-place} | [station][pol][sample][channel] [80][2][3072][64] = 240 MiB Nch: E V -Delay compensation + Band pass + Transpose {I/O: delays} +Zeroing (in-place) + | [station][pol][sample][channel] [80][2][3072][64] = 240 MiB Nch: E + V +Delay compensation (channel) + Band pass + Transpose {I/O: delays} | [station][channel][sample][pol] [80][64][3072][2] = 240 MiB B V Correlator @@ -67,15 +70,12 @@ IntToFloat + Transpose + FFT-shift FFT-64 {inplace} | [station][pol][sample][channel] [48][2][3072][64] = 144 MiB B V -Delay compensation + Transpose (implicit, DO_TRANSPOSE not defined) {I/O: delays} - | [station][pol][channel][sample] [48][2][64][3072] = 144 MiB A +Zeroing (in-place) + | [station][pol][sample][channel] [48][2][3072][64] = 144 MiB B V -FFT-shift {inplace} +Delay compensation + Transpose (implicit, DO_TRANSPOSE not defined) {I/O: delays} | [station][pol][channel][sample] [48][2][64][3072] = 144 MiB A V -FFT-64 {inplace} - | [station][pol][chan1][sample][chan2] [48][2][64][48][64] = 144 MiB A - V BandPass + Transpose {I/O: weights} | [station][chan1][chan2][sample][pol] [48][64][64][48][2] = 144 MiB B V = [station][channel][sample][pol] diff --git a/RTCP/Cobalt/GPUProc/share/gpu/kernels/Zeroing.cu b/RTCP/Cobalt/GPUProc/share/gpu/kernels/Zeroing.cu new file mode 100644 index 0000000000000000000000000000000000000000..597d0c04e0af1fe4a502cd59d36210d48eb77a73 --- /dev/null +++ b/RTCP/Cobalt/GPUProc/share/gpu/kernels/Zeroing.cu @@ -0,0 +1,96 @@ +//# Zeroing.cu: zero ranges of samples +//# Copyright (C) 2012-2013 ASTRON (Netherlands Institute for Radio Astronomy) +//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +//# +//# This file is part of the LOFAR software suite. +//# The LOFAR software suite is free software: you can redistribute it and/or +//# modify it under the terms of the GNU General Public License as published +//# by the Free Software Foundation, either version 3 of the License, or +//# (at your option) any later version. +//# +//# The LOFAR software suite is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License along +//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. +//# +//# $Id$ + +#include "gpu_math.cuh" +#include <stdio.h> + +typedef float2 FilteredDataType[NR_STABS][NR_POLARIZATIONS][NR_SAMPLES_PER_CHANNEL][NR_CHANNELS]; + +// This format much match the output of SubbandMetaData::marshall +typedef struct { + int nrRanges; + int ranges[MAX_NR_RANGES][2]; // [begin, end) +} marshalledRanges; + +typedef marshalledRanges RangesType[NR_STABS]; + +/** + * Zero samples that have been flagged. Clears samples for all channels for + * ranged specified per station. + * + * @param[data] a multi-dimensional array with time samples of type complex + * float in the last dimension. + * @param[ranges] an array of ranges of samples to flag, compatible with SubbandMetaData::marshall + */ + +extern "C" +{ + __global__ void Zeroing(FilteredDataType data, + RangesType ranges) + { + int sample = blockIdx.x * blockDim.x + threadIdx.x; + int pol = blockIdx.y % 2; + int channel = blockIdx.y / 2; + int station = threadIdx.z; + + // Determine whether we have to clear our sample + bool clear = false; + +#if 1 + // Binary search, needed to avoid a worst-case scan of all ranges + + int lo = 0, hi = ranges[station].nrRanges - 1; + + while (lo <= hi) { + int mid = (lo + hi) / 2; + + int from = ranges[station].ranges[mid][0]; + int to = ranges[station].ranges[mid][1]; + + if (sample >= from && sample < to) { + clear = true; + break; + } + + if (sample < from) { + hi = mid - 1; + } else { + lo = mid + 1; + } + } +#else + // Linear search, faster for a small number of flags + for (int range = 0; range < ranges[station].nrRanges; range++) { + int from = ranges[station].ranges[range][0]; + int to = ranges[station].ranges[range][1]; + + if (sample >= from && sample < to) + clear = true; + } +#endif + + __syncthreads(); + + if (clear) { + // Clear our sample + data[station][pol][sample][channel] = make_float2(0.0f, 0.0f); + } + } +} diff --git a/RTCP/Cobalt/GPUProc/src/CMakeLists.txt b/RTCP/Cobalt/GPUProc/src/CMakeLists.txt index fbc4e762035d4ad5f105eecbfe579d76eba16874..2592aa280ea31d17bfb36a72634d897379f82a25 100644 --- a/RTCP/Cobalt/GPUProc/src/CMakeLists.txt +++ b/RTCP/Cobalt/GPUProc/src/CMakeLists.txt @@ -13,6 +13,7 @@ set(_gpuproc_sources CommandThread.cc cpu_utils.cc FilterBank.cc + Flagger.cc global_defines.cc MPIReceiver.cc Package__Version.cc @@ -49,6 +50,7 @@ if(USE_CUDA) cuda/Kernels/IncoherentStokesTransposeKernel.cc cuda/Kernels/IntToFloatKernel.cc cuda/Kernels/FFTShiftKernel.cc + cuda/Kernels/ZeroingKernel.cc #cuda/Kernels/UHEP_BeamFormerKernel.cc #cuda/Kernels/UHEP_InvFFT_Kernel.cc #cuda/Kernels/UHEP_InvFIR_Kernel.cc diff --git a/RTCP/Cobalt/GPUProc/src/Flagger.cc b/RTCP/Cobalt/GPUProc/src/Flagger.cc new file mode 100644 index 0000000000000000000000000000000000000000..31d3e92c27a4a00fd89d677d73898997ca3831e2 --- /dev/null +++ b/RTCP/Cobalt/GPUProc/src/Flagger.cc @@ -0,0 +1,104 @@ +//# Flagger.cc +//# Copyright (C) 2012-2013 ASTRON (Netherlands Institute for Radio Astronomy) +//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +//# +//# This file is part of the LOFAR software suite. +//# The LOFAR software suite is free software: you can redistribute it and/or +//# modify it under the terms of the GNU General Public License as published +//# by the Free Software Foundation, either version 3 of the License, or +//# (at your option) any later version. +//# +//# The LOFAR software suite is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License along +//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. +//# +//# $Id$ + +#include <lofar_config.h> + +#include <GPUProc/Flagger.h> +#include <Common/LofarLogger.h> + +namespace LOFAR +{ + namespace Cobalt + { + void Flagger::convertFlagsToChannelFlags( + MultiDimArray<LOFAR::SparseSet<unsigned>, 1>const &inputFlags, + MultiDimArray<SparseSet<unsigned>, 1>& flagsPerChannel, + const unsigned nrSamplesPerChannel, + const unsigned nrChannels, + const ssize_t nrPrefixedSamples) + { + ASSERT(inputFlags.num_elements() == flagsPerChannel.num_elements()); + + // If nrChannels == 1, we do not expect nrPrefixedSamples + ASSERT(nrChannels > 1 || nrPrefixedSamples == 0); + + unsigned log2NrChannels = log2(nrChannels); + + // Convert the flags per sample to flags per channel + for (unsigned station = 0; station < inputFlags.num_elements(); station ++) + { + // reset the channel flags for this station + flagsPerChannel[station].reset(); + + // get the flag ranges + const SparseSet<unsigned>::Ranges &ranges = inputFlags[station].getRanges(); + for (SparseSet<unsigned>::const_iterator it = ranges.begin(); + it != ranges.end(); it ++) + { + unsigned begin_idx; + unsigned end_idx; + if (nrChannels == 1) + { + // do nothing, just take the ranges as supplied + begin_idx = it->begin; + end_idx = std::min(nrSamplesPerChannel, it->end); + } + else + { + // Never flag before the start of the time range + // use bitshift to divide to the number of channels. + // + // In case of nrPrefixedSamples, there are FIR Filter + // samples in front of those who we split the flags for. + // In that case, nrPrefixedSamples == NR_TAPS - 1. + // + // NR_TAPS is the width of the filter: they are + // absorbed by the FIR and thus should be excluded + // from the original flag set. + // + // The original flag set can span up to + // [0, nrSamplesPerBlock + nrChannels * (NR_TAPS - 1)) + // of which the FIRST (NR_TAPS - 1) samples belong to + // the previous block, and are used to initialise the + // FIR filter. Every sample i of the current block is thus + // actually at index (i + nrChannels * (NR_TAPS - 1)), + // or, after converting to channels, at index (i' + NR_TAPS - 1). + // + // At the same time, every sample is affected by + // the NR_TAPS-1 samples before it. So, any flagged + // sample in the input flags NR_TAPS samples in + // the channel. + begin_idx = std::max(0L, + (signed) (it->begin >> log2NrChannels) - nrPrefixedSamples); + + // The min is needed, because flagging the last input + // samples would cause NR_TAPS subsequent samples to + // be flagged, which aren't necessarily part of this block. + end_idx = std::min(nrSamplesPerChannel, + ((it->end - 1) >> log2NrChannels) + 1); + } + + // Now copy the transformed ranges to the channelflags + flagsPerChannel[station].include(begin_idx, end_idx); + } + } + } + } +} diff --git a/RTCP/Cobalt/GPUProc/src/Flagger.h b/RTCP/Cobalt/GPUProc/src/Flagger.h new file mode 100644 index 0000000000000000000000000000000000000000..b56b629bf423c4030b55243752f8b988096392d1 --- /dev/null +++ b/RTCP/Cobalt/GPUProc/src/Flagger.h @@ -0,0 +1,48 @@ +//# CorrelatorStep.h +//# Copyright (C) 2012-2013 ASTRON (Netherlands Institute for Radio Astronomy) +//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +//# +//# This file is part of the LOFAR software suite. +//# The LOFAR software suite is free software: you can redistribute it and/or +//# modify it under the terms of the GNU General Public License as published +//# by the Free Software Foundation, either version 3 of the License, or +//# (at your option) any later version. +//# +//# The LOFAR software suite is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License along +//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. +//# +//# $Id$ + +#ifndef LOFAR_GPUPROC_FLAGGER_H +#define LOFAR_GPUPROC_FLAGGER_H + +#include <CoInterface/MultiDimArray.h> +#include <CoInterface/SparseSet.h> + +namespace LOFAR +{ + namespace Cobalt + { + // Collection of functions to tranfer the input flags to the output. + class Flagger + { + public: + // Convert the flags from one channel to multiple channels, per station. + // If nrChannels > 1, nrPrefixedSamples are assumed to be already + // prepended to the input flags as a result of the FIR-filter history. + static void convertFlagsToChannelFlags( + MultiDimArray<SparseSet<unsigned>, 1>const &inputFlags, + MultiDimArray<SparseSet<unsigned>, 1>& flagsPerChannel, + const unsigned nrSamplesPerChannel, + const unsigned nrChannels, + const ssize_t nrPrefixedSamples); + }; + } +} + +#endif diff --git a/RTCP/Cobalt/GPUProc/src/Kernels/ZeroingKernel.h b/RTCP/Cobalt/GPUProc/src/Kernels/ZeroingKernel.h new file mode 100644 index 0000000000000000000000000000000000000000..b22aa3ae5535e4c7c857305b9bfacbeadf6aafbf --- /dev/null +++ b/RTCP/Cobalt/GPUProc/src/Kernels/ZeroingKernel.h @@ -0,0 +1,41 @@ +//# ZeroingKernel.h +//# +//# Copyright (C) 2013 ASTRON (Netherlands Institute for Radio Astronomy) +//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +//# +//# This file is part of the LOFAR software suite. +//# The LOFAR software suite is free software: you can redistribute it and/or +//# modify it under the terms of the GNU General Public License as published +//# by the Free Software Foundation, either version 3 of the License, or +//# (at your option) any later version. +//# +//# The LOFAR software suite is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License along +//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. +//# +//# $Id$ + +// \file +// Include the right GPU API include with our options. + +#ifndef LOFAR_GPUPROC_ZEROING_KERNEL_H +#define LOFAR_GPUPROC_ZEROING_KERNEL_H + +#if defined (USE_CUDA) && defined (USE_OPENCL) +# error "Either CUDA or OpenCL must be enabled, not both" +#endif + +#if defined (USE_CUDA) +# include <GPUProc/cuda/Kernels/ZeroingKernel.h> +#elif defined (USE_OPENCL) +# include <GPUProc/opencl/Kernels/ZeroingKernel.h> +#else +# error "Either CUDA or OpenCL must be enabled, not neither" +#endif + +#endif + diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BandPassCorrectionKernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BandPassCorrectionKernel.cc index 77ed74bc8fcb9a7026733ce61f40813b0933dd8e..32bf4da8a3350d84144d21954e43871b399dec2d 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BandPassCorrectionKernel.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BandPassCorrectionKernel.cc @@ -49,7 +49,7 @@ namespace LOFAR nrStations(ps.settings.beamFormer.antennaFieldNames.size()), nrDelayCompensationChannels(ps.settings.beamFormer.nrDelayCompensationChannels), - nrHighResolutionChannels(ps.settings.beamFormer.nrHighResolutionChannels), + nrHighResolutionChannels(ps.settings.beamFormer.nrDelayCompensationChannels), nrSamplesPerChannel(ps.settings.blockSize / nrHighResolutionChannels), correctBandPass(ps.settings.corrections.bandPass) diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerKernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerKernel.cc index 410fc4171d95d43b21223aed886386b9783f2e52..ef83105acd97cc22dac1070ddc017ea776869fdd 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerKernel.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerKernel.cc @@ -52,7 +52,7 @@ namespace LOFAR delayIndices(ObservationSettings::AntennaFieldName::indices(ps.settings.beamFormer.antennaFieldNames, ps.settings.antennaFieldNames)), nrDelays(ps.settings.antennaFieldNames.size()), - nrChannels(ps.settings.beamFormer.nrHighResolutionChannels), + nrChannels(ps.settings.beamFormer.nrDelayCompensationChannels), nrSamplesPerChannel(ps.settings.blockSize / nrChannels), nrSAPs(ps.settings.beamFormer.SAPs.size()), diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerTransposeKernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerTransposeKernel.cc index fff5f02513c288ab0579cb035949dea99c498c68..994c4eff04219de9c230ea906ff6d37367d6f20a 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerTransposeKernel.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerTransposeKernel.cc @@ -46,7 +46,7 @@ namespace LOFAR BeamFormerTransposeKernel::Parameters::Parameters(const Parset& ps) : Kernel::Parameters("beamFormerTranspose"), - nrChannels(ps.settings.beamFormer.nrHighResolutionChannels), + nrChannels(ps.settings.beamFormer.nrDelayCompensationChannels), nrSamplesPerChannel(ps.settings.blockSize / nrChannels), nrTABs(ps.settings.beamFormer.maxNrCoherentTABsPerSAP()) { diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CoherentStokesTransposeKernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CoherentStokesTransposeKernel.cc index fa5b80092f7ae07c297983ac29d5a2f0037c2e53..c06839adbf82e7560cf2366d864eaa72ddafece4 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CoherentStokesTransposeKernel.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CoherentStokesTransposeKernel.cc @@ -46,7 +46,7 @@ namespace LOFAR CoherentStokesTransposeKernel::Parameters::Parameters(const Parset& ps) : Kernel::Parameters("coherentStokesTranspose"), - nrChannels(ps.settings.beamFormer.nrHighResolutionChannels), + nrChannels(ps.settings.beamFormer.nrDelayCompensationChannels), nrSamplesPerChannel(ps.settings.blockSize / nrChannels), nrTABs(ps.settings.beamFormer.maxNrCoherentTABsPerSAP()) diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/IncoherentStokesTransposeKernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/IncoherentStokesTransposeKernel.cc index 184cb207f90c8f624a7322692bda2cb3ea3deeb8..7ab683197f763edf7f3822d66291be86d4de8223 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/IncoherentStokesTransposeKernel.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/IncoherentStokesTransposeKernel.cc @@ -44,7 +44,7 @@ namespace LOFAR IncoherentStokesTransposeKernel::Parameters::Parameters(const Parset& ps) : Kernel::Parameters("incoherentStokesTranspose"), nrStations(ps.settings.beamFormer.antennaFieldNames.size()), - nrChannels(ps.settings.beamFormer.nrHighResolutionChannels), + nrChannels(ps.settings.beamFormer.nrDelayCompensationChannels), nrSamplesPerChannel(ps.settings.blockSize / nrChannels), tileSize(16) diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/ZeroingKernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/ZeroingKernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..44f0488cef94f64203cf72c6c4012223f029a8cd --- /dev/null +++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/ZeroingKernel.cc @@ -0,0 +1,139 @@ +//# ZeroingKernel.cc +//# Copyright (C) 2012-2013 ASTRON (Netherlands Institute for Radio Astronomy) +//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +//# +//# This file is part of the LOFAR software suite. +//# The LOFAR software suite is free software: you can redistribute it and/or +//# modify it under the terms of the GNU General Public License as published +//# by the Free Software Foundation, either version 3 of the License, or +//# (at your option) any later version. +//# +//# The LOFAR software suite is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License along +//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. +//# +//# $Id$ + +#include <lofar_config.h> + +#include "ZeroingKernel.h" + +#include <GPUProc/gpu_utils.h> +#include <CoInterface/BlockID.h> +#include <CoInterface/Config.h> +#include <CoInterface/SubbandMetaData.h> +#include <Common/lofar_complex.h> + +#include <boost/lexical_cast.hpp> +#include <boost/format.hpp> + +#include <fstream> + +using boost::lexical_cast; +using boost::format; + +namespace LOFAR +{ + namespace Cobalt + { + string ZeroingKernel::theirSourceFile = "Zeroing.cu"; + string ZeroingKernel::theirFunction = "Zeroing"; + + ZeroingKernel::Parameters::Parameters(const Parset& ps, unsigned nrSTABs, unsigned nrChannels, const std::string &name): + Kernel::Parameters(name), + nrSTABs(nrSTABs), + + nrChannels(nrChannels), + nrSamplesPerChannel(ps.settings.blockSize / nrChannels) + { + dumpBuffers = + ps.getBool("Cobalt.Kernels.ZeroingKernel.dumpOutput", false); + dumpFilePattern = + str(format("L%d_SB%%03d_BL%%03d_ZeroingKernel.dat") % + ps.settings.observationID); + } + + + size_t ZeroingKernel::Parameters::bufferSize(BufferType bufferType) const + { + switch (bufferType) { + case ZeroingKernel::INPUT_DATA: + case ZeroingKernel::OUTPUT_DATA: // fall thru + return (size_t)nrSTABs * NR_POLARIZATIONS * + nrChannels * nrSamplesPerChannel * + sizeof(std::complex<float>); + + case ZeroingKernel::RANGES: + return (size_t)nrSTABs * SubbandMetaData::MAXFLAGSIZE * + sizeof(unsigned); + + default: + THROW(GPUProcException, "Invalid bufferType (" << bufferType << ")"); + } + } + + ZeroingKernel::ZeroingKernel(const gpu::Stream& stream, + const gpu::Module& module, + const Buffers& buffers, + const Parameters& params) : + CompiledKernel(stream, gpu::Function(module, theirFunction), buffers, params), + nrSTABs(params.nrSTABs), + gpuRanges(stream.getContext(), params.bufferSize(RANGES)), + hostRanges(stream.getContext(), params.bufferSize(RANGES)) + { + setArg(0, buffers.input); + setArg(1, gpuRanges); + + // Number of samples per channel must be even + ASSERT(params.nrSamplesPerChannel % 2 == 0); + + setEnqueueWorkSizes( + gpu::Grid(params.nrSamplesPerChannel, NR_POLARIZATIONS * params.nrChannels, params.nrSTABs), + gpu::Block(256 / params.nrChannels, NR_POLARIZATIONS * params.nrChannels, 1)); + } + + + void ZeroingKernel::enqueue(const BlockID &blockId, const MultiDimArray<SparseSet<unsigned>, 1> &channelFlags) + { + // marshall flags to GPU host buffer + const ptrdiff_t rangeSize = SubbandMetaData::MAXFLAGSIZE; + for(unsigned station = 0; station < nrSTABs; ++station) { + if (channelFlags[station].marshall(hostRanges.get<char>() + station * rangeSize, rangeSize) < 0) { + LOG_ERROR_STR("ZeroingKernel: Received more flags than can be marshalled to GPU"); + + // skip this set, by setting the nrRanges to 0 + *(reinterpret_cast<unsigned*>(hostRanges.get<char>() + station * rangeSize)) = 0; + } + } + + // Copy host buffer to GPU + itsStream.writeBuffer(gpuRanges, hostRanges, false); + + Kernel::enqueue(blockId); + } + + //-------- Template specializations for KernelFactory --------// + + template<> CompileDefinitions + KernelFactory<ZeroingKernel>::compileDefinitions() const + { + CompileDefinitions defs = + KernelFactoryBase::compileDefinitions(itsParameters); + + defs["NR_STABS"] = lexical_cast<string>(itsParameters.nrSTABs); + defs["NR_CHANNELS"] = lexical_cast<string>(itsParameters.nrChannels); + defs["NR_SAMPLES_PER_CHANNEL"] = + lexical_cast<string>(itsParameters.nrSamplesPerChannel); + + defs["MAX_NR_RANGES"] = + lexical_cast<string>(SubbandMetaData::MAXNRFLAGRANGES); + + return defs; + } + + } +} diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/ZeroingKernel.h b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/ZeroingKernel.h new file mode 100644 index 0000000000000000000000000000000000000000..8b84f58de515bf6c44381e0a62bfec3a11afff32 --- /dev/null +++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/ZeroingKernel.h @@ -0,0 +1,92 @@ +//# ZeroingKernel.h +//# Copyright (C) 2012-2013 ASTRON (Netherlands Institute for Radio Astronomy) +//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +//# +//# This file is part of the LOFAR software suite. +//# The LOFAR software suite is free software: you can redistribute it and/or +//# modify it under the terms of the GNU General Public License as published +//# by the Free Software Foundation, either version 3 of the License, or +//# (at your option) any later version. +//# +//# The LOFAR software suite is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License along +//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. +//# +//# $Id$ + +#ifndef LOFAR_GPUPROC_CUDA_ZEROING_KERNEL_H +#define LOFAR_GPUPROC_CUDA_ZEROING_KERNEL_H + +#include <CoInterface/Parset.h> +#include <CoInterface/MultiDimArray.h> +#include <CoInterface/SparseSet.h> + +#include <GPUProc/Kernels/Kernel.h> +#include <GPUProc/KernelFactory.h> +#include <GPUProc/gpu_wrapper.h> + +namespace LOFAR +{ + namespace Cobalt + { + class ZeroingKernel : public CompiledKernel + { + public: + static std::string theirSourceFile; + static std::string theirFunction; + + enum BufferType + { + INPUT_DATA, + OUTPUT_DATA, + RANGES + }; + + // Parameters that must be passed to the constructor of the + // IntToFloatKernel class. + struct Parameters : Kernel::Parameters + { + Parameters(const Parset& ps, unsigned nrSTABs, unsigned nrChannels, const std::string &name = "Zeroing"); + unsigned nrSTABs; + + unsigned nrChannels; + unsigned nrSamplesPerChannel; + + size_t bufferSize(BufferType bufferType) const; + }; + + // Construct a Zeroing kernel. + // \pre The number of samples per channel must be even. + // \pre The product of the number of stations, the number of + // polarizations, the number of channels per subband, and the number of + // samples per channel must be divisible by the maximum number of threads + // per block (typically 1024). + ZeroingKernel(const gpu::Stream &stream, + const gpu::Module &module, + const Buffers &buffers, + const Parameters ¶m); + + void enqueue(const BlockID &blockId, const MultiDimArray<SparseSet<unsigned>, 1> &channelFlags); + + private: + const unsigned nrSTABs; + + // The ranges of samples to clear + gpu::DeviceMemory gpuRanges; + gpu::HostMemory hostRanges; + }; + + //# -------- Template specializations for KernelFactory -------- #// + + template<> CompileDefinitions + KernelFactory<ZeroingKernel>::compileDefinitions() const; + } + +} + +#endif + diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.cc index 9b02246211f9db52cb69e6d7d6185f26e4942703..237409c371f26bd2616288855599083a4f051f34 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.cc @@ -46,13 +46,13 @@ namespace LOFAR coherentTranspose(CoherentStokesTransposeKernel::Parameters(ps)), coherentInverseFFT(FFT_Kernel::Parameters( - ps.settings.beamFormer.nrHighResolutionChannels, + ps.settings.beamFormer.nrDelayCompensationChannels, ps.settings.beamFormer.maxNrCoherentTABsPerSAP() * NR_POLARIZATIONS * ps.settings.blockSize, false, "FFT (coherent, inverse)")), coherentInverseFFTShift(FFTShiftKernel::Parameters(ps, ps.settings.beamFormer.maxNrCoherentTABsPerSAP(), - ps.settings.beamFormer.nrHighResolutionChannels, + ps.settings.beamFormer.nrDelayCompensationChannels, "FFT-shift (coherent, inverse)")), coherentFirFilter( diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerIncoherentStep.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerIncoherentStep.cc index 3cd289e16eeb703d751148a6976a713a718ef50c..f370ffe5833d6e8904a5bd07a16441bf34961ebf 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerIncoherentStep.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerIncoherentStep.cc @@ -45,12 +45,12 @@ namespace LOFAR incoherentStokesTranspose(IncoherentStokesTransposeKernel::Parameters(ps)), incoherentInverseFFT(FFT_Kernel::Parameters( - ps.settings.beamFormer.nrHighResolutionChannels, + ps.settings.beamFormer.nrDelayCompensationChannels, ps.settings.beamFormer.antennaFieldNames.size() * NR_POLARIZATIONS * ps.settings.blockSize, false, "FFT (incoherent, inverse)")), incoherentInverseFFTShift(FFTShiftKernel::Parameters(ps, ps.settings.beamFormer.antennaFieldNames.size(), - ps.settings.beamFormer.nrHighResolutionChannels, + ps.settings.beamFormer.nrDelayCompensationChannels, "FFT-shift (incoherent, inverse)")), incoherentFirFilter( diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.cc index b2f4ca265050ecb272c54a4a8886d65402e3a399..11bf4c5d1a5e79115c1812c8a5c3e299a1bf159f 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.cc @@ -24,6 +24,7 @@ #include <GPUProc/global_defines.h> #include <GPUProc/gpu_wrapper.h> +#include <GPUProc/Flagger.h> #include <CoInterface/Parset.h> #include <ApplCommon/PosixTime.h> @@ -48,14 +49,11 @@ namespace LOFAR ps.settings.beamFormer.nrDelayCompensationChannels, "FFT-shift (beamformer)")), - delayCompensation(DelayAndBandPassKernel::Parameters(ps, false)), + zeroing(ZeroingKernel::Parameters(ps, + ps.settings.antennaFields.size(), + ps.settings.beamFormer.nrDelayCompensationChannels)), - secondFFT(FFT_Kernel::Parameters( - ps.settings.beamFormer.nrHighResolutionChannels / - ps.settings.beamFormer.nrDelayCompensationChannels, - ps.settings.beamFormer.antennaFieldNames.size() * NR_POLARIZATIONS * ps.settings.blockSize, - true, - "FFT (beamformer, 2nd)")), + delayCompensation(DelayAndBandPassKernel::Parameters(ps, false)), bandPassCorrection(BandPassCorrectionKernel::Parameters(ps)) { @@ -69,16 +67,13 @@ namespace LOFAR boost::shared_ptr<gpu::DeviceMemory> i_devA, boost::shared_ptr<gpu::DeviceMemory> i_devB) : - ProcessStep(parset, i_queue) + ProcessStep(parset, i_queue), + flagsPerChannel(boost::extents[parset.settings.antennaFields.size()]) { devA=i_devA; devB=i_devB; (void)context; - doSecondFFT = - (ps.settings.beamFormer.nrHighResolutionChannels / - ps.settings.beamFormer.nrDelayCompensationChannels) > 1; - // intToFloat + FFTShift: A -> B intToFloatKernel = std::auto_ptr<IntToFloatKernel>( factories.intToFloat.create(queue, *devA, *devB)); @@ -87,26 +82,17 @@ namespace LOFAR firstFFT = std::auto_ptr<FFT_Kernel>( factories.firstFFT.create(queue, *devB, *devB)); + // zeroing: B -> B + zeroingKernel = std::auto_ptr<ZeroingKernel>( + factories.zeroing.create(queue, *devB, *devB)); + // delayComp: B -> A delayCompensationKernel = std::auto_ptr<DelayAndBandPassKernel>( factories.delayCompensation.create(queue, *devB, *devA)); - // Only perform second FFTshift and FFT if we have to. - if (doSecondFFT) { - - // FFTShift: A -> A - secondFFTShiftKernel = std::auto_ptr<FFTShiftKernel>( - factories.fftShift.create(queue, *devA, *devA)); - - // FFT: A -> A - secondFFT = std::auto_ptr<FFT_Kernel>( - factories.secondFFT.create(queue, *devA, *devA)); - } - // bandPass: A -> B bandPassCorrectionKernel = std::auto_ptr<BandPassCorrectionKernel>( factories.bandPassCorrection.create(queue, *devA, *devB)); - } void BeamFormerPreprocessingStep::writeInput(const SubbandProcInputData &input) @@ -133,18 +119,24 @@ namespace LOFAR firstFFT->enqueue(input.blockID); + // Convert input flags to channel flags + Flagger::convertFlagsToChannelFlags( + input.inputFlags, + flagsPerChannel, + ps.settings.blockSize / ps.settings.beamFormer.nrDelayCompensationChannels, + ps.settings.beamFormer.nrDelayCompensationChannels, + 0); + + zeroingKernel->enqueue( + input.blockID, + flagsPerChannel); + // The centralFrequency and SAP immediate kernel args must outlive kernel runs. delayCompensationKernel->enqueue( input.blockID, ps.settings.subbands[input.blockID.globalSubbandIdx].centralFrequency, ps.settings.subbands[input.blockID.globalSubbandIdx].SAP); - if (doSecondFFT) { - secondFFTShiftKernel->enqueue(input.blockID); - - secondFFT->enqueue(input.blockID); - } - bandPassCorrectionKernel->enqueue( input.blockID); } diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.h b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.h index 3492a46b5f98e1a960365ab70403959a61a794bd..e9b46d3bc3c269e7356eb167b37dc5b542ae0839 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.h +++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerPreprocessingStep.h @@ -31,6 +31,8 @@ #include <GPUProc/MultiDimArrayHostBuffer.h> #include <CoInterface/BlockID.h> +#include <CoInterface/SparseSet.h> +#include <CoInterface/MultiDimArray.h> #include "SubbandProcInputData.h" #include "SubbandProcOutputData.h" @@ -42,7 +44,7 @@ #include <GPUProc/Kernels/FFTShiftKernel.h> #include <GPUProc/Kernels/FFT_Kernel.h> #include <GPUProc/Kernels/IntToFloatKernel.h> - +#include <GPUProc/Kernels/ZeroingKernel.h> namespace LOFAR { @@ -59,9 +61,9 @@ namespace LOFAR KernelFactory<FFT_Kernel> firstFFT; KernelFactory<FFTShiftKernel> fftShift; - KernelFactory<DelayAndBandPassKernel> delayCompensation; + KernelFactory<ZeroingKernel> zeroing; - KernelFactory<FFT_Kernel> secondFFT; + KernelFactory<DelayAndBandPassKernel> delayCompensation; KernelFactory<BandPassCorrectionKernel> bandPassCorrection; }; @@ -78,6 +80,8 @@ namespace LOFAR void process(const SubbandProcInputData &input); private: + // Flags for FFT-ed data + MultiDimArray<SparseSet<unsigned>, 1> flagsPerChannel; //Data members boost::shared_ptr<gpu::DeviceMemory> devA; @@ -89,20 +93,14 @@ namespace LOFAR // First (64 points) FFT std::auto_ptr<FFT_Kernel> firstFFT; + // Zeroing flagged samples + std::auto_ptr<ZeroingKernel> zeroingKernel; + // Delay compensation std::auto_ptr<DelayAndBandPassKernel> delayCompensationKernel; - // Second FFT-shift - std::auto_ptr<FFTShiftKernel> secondFFTShiftKernel; - - // Second (64 points) FFT - std::auto_ptr<FFT_Kernel> secondFFT; - // Bandpass correction and tranpose std::auto_ptr<BandPassCorrectionKernel> bandPassCorrectionKernel; - - // Flag that indicates if we need to perform a second FFT - bool doSecondFFT; }; } } diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.cc index 4807054c07e53cf932a32c452cda909550bbe233..b1ed83fe4b42817c68ce961e57bf593191e67d51 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.cc @@ -61,6 +61,12 @@ namespace LOFAR std::sqrt((double)ps.settings.correlator.nrChannels), "FIR (correlator)")) : NULL), + zeroing(ps.settings.correlator.nrChannels > 1 + ? new KernelFactory<ZeroingKernel>(ZeroingKernel::Parameters(ps, + ps.settings.antennaFields.size(), + ps.settings.correlator.nrChannels, + "Zeroing (correlator)")) + : NULL), delayAndBandPass(DelayAndBandPassKernel::Parameters(ps, true)), @@ -68,65 +74,6 @@ namespace LOFAR { } - void CorrelatorStep::Flagger::convertFlagsToChannelFlags(Parset const &ps, - MultiDimArray<LOFAR::SparseSet<unsigned>, 1>const &inputFlags, - MultiDimArray<SparseSet<unsigned>, 1>& flagsPerChannel) - { - unsigned numberOfChannels = ps.settings.correlator.nrChannels; - unsigned log2NrChannels = log2(numberOfChannels); - //Convert the flags per sample to flags per channel - for (unsigned station = 0; station < ps.settings.correlator.stations.size(); station ++) - { - // get the flag ranges - const SparseSet<unsigned>::Ranges &ranges = inputFlags[station].getRanges(); - for (SparseSet<unsigned>::const_iterator it = ranges.begin(); - it != ranges.end(); it ++) - { - unsigned begin_idx; - unsigned end_idx; - if (numberOfChannels == 1) - { - // do nothing, just take the ranges as supplied - begin_idx = it->begin; - end_idx = std::min(static_cast<unsigned>(ps.settings.correlator.nrSamplesPerBlock), it->end); - } - else - { - // Never flag before the start of the time range - // use bitshift to divide to the number of channels. - // - // NR_TAPS is the width of the filter: they are - // absorbed by the FIR and thus should be excluded - // from the original flag set. - // - // The original flag set can span up to - // [0, nrSamplesPerBlock + nrChannels * (NR_TAPS - 1)) - // of which the FIRST (NR_TAPS - 1) samples belong to - // the previous block, and are used to initialise the - // FIR filter. Every sample i of the current block is thus - // actually at index (i + nrChannels * (NR_TAPS - 1)), - // or, after converting to channels, at index (i' + NR_TAPS - 1). - // - // At the same time, every sample is affected by - // the NR_TAPS-1 samples before it. So, any flagged - // sample in the input flags NR_TAPS samples in - // the channel. - begin_idx = std::max(0, - (signed) (it->begin >> log2NrChannels) - NR_TAPS + 1); - - // The min is needed, because flagging the last input - // samples would cause NR_TAPS subsequent samples to - // be flagged, which aren't necessarily part of this block. - end_idx = std::min(static_cast<unsigned>(ps.settings.correlator.nrSamplesPerBlock), - ((it->end - 1) >> log2NrChannels) + 1); - } - - // Now copy the transformed ranges to the channelflags - flagsPerChannel[station].include(begin_idx, end_idx); - } - } - } - void CorrelatorStep::Flagger::propagateFlags( Parset const &parset, @@ -139,7 +86,12 @@ namespace LOFAR // First transform the flags to channel flags: taking in account // reduced resolution in time and the size of the filter - convertFlagsToChannelFlags(parset, inputFlags, flagsPerChannel); + Cobalt::Flagger::convertFlagsToChannelFlags( + inputFlags, + flagsPerChannel, + parset.settings.correlator.nrSamplesPerBlock, + parset.settings.correlator.nrChannels, + parset.settings.correlator.nrChannels == 1 ? 0 : NR_TAPS - 1); // Calculate the number of flags per baseline and assign to // output object. @@ -321,6 +273,9 @@ namespace LOFAR // FFT: B -> E fftKernel = factories.fft->create(queue, *devB, devE); + + // Zeroing: E -> E + zeroingKernel = factories.zeroing->create(queue, devE, devE); } // Delay and Bandpass: A/E -> B @@ -360,6 +315,24 @@ namespace LOFAR firFilterKernel->enqueue(input.blockID, input.blockID.subbandProcSubbandIdx); fftKernel->enqueue(input.blockID); + + // Process flags enough to determine which data to zero + MultiDimArray<LOFAR::SparseSet<unsigned>, 1> flags = input.inputFlags; + MultiDimArray<SparseSet<unsigned>, 1> flagsPerChannel( + boost::extents[ps.settings.antennaFields.size()]); + + firFilterKernel->prefixHistoryFlags( + flags, input.blockID.subbandProcSubbandIdx); + + Cobalt::Flagger::convertFlagsToChannelFlags( + flags, + flagsPerChannel, + ps.settings.blockSize, + ps.settings.correlator.nrChannels, + NR_TAPS - 1); + + // Zero the output of each FFT that had flagged input samples + zeroingKernel->enqueue(input.blockID, flagsPerChannel); } // Even if we skip delay compensation and bandpass correction (rare), run diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.h b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.h index 23be8fe062ee1eac057bfc370c65cfe849b529ab..048fb6f5fb5a114a812c8658b865ba76f4e85278 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.h +++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/CorrelatorStep.h @@ -40,10 +40,12 @@ #include "ProcessStep.h" #include <GPUProc/PerformanceCounter.h> +#include <GPUProc/Flagger.h> #include <GPUProc/KernelFactory.h> #include <GPUProc/Kernels/DelayAndBandPassKernel.h> #include <GPUProc/Kernels/FIR_FilterKernel.h> #include <GPUProc/Kernels/FFT_Kernel.h> +#include <GPUProc/Kernels/ZeroingKernel.h> #include <GPUProc/Kernels/CorrelatorKernel.h> @@ -59,6 +61,7 @@ namespace LOFAR SmartPtr< KernelFactory<FFT_Kernel> > fft; SmartPtr< KernelFactory<FIR_FilterKernel> > firFilter; + SmartPtr< KernelFactory<ZeroingKernel> > zeroing; KernelFactory<DelayAndBandPassKernel> delayAndBandPass; @@ -86,7 +89,7 @@ namespace LOFAR // \c propagateFlags can be called parallel to the kernels. // After the data is copied from the the shared buffer // \c applyNrValidSamples can be used to weight the visibilities - class Flagger + class Flagger: public Cobalt::Flagger { public: // 1. Convert input flags to channel flags, calculate the amount flagged @@ -97,9 +100,7 @@ namespace LOFAR // 1.1 Convert the flags per station to channel flags, change time scale // if nchannel > 1 - static void convertFlagsToChannelFlags(Parset const &ps, - MultiDimArray<SparseSet<unsigned>, 1> const &inputFlags, - MultiDimArray<SparseSet<unsigned>, 1> &flagsPerChannel); + // (Uses convertFlagsToChannelFlags) // 2. Calculate the weight based on the number of flags and apply this // weighting to all output values @@ -147,6 +148,9 @@ namespace LOFAR // FFT SmartPtr<FFT_Kernel> fftKernel; + // Zeroing + SmartPtr<ZeroingKernel> zeroingKernel; + // Delay and Bandpass std::auto_ptr<DelayAndBandPassKernel> delayAndBandPassKernel; diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcInputData.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcInputData.cc index fa4e3c752050e3c59c80392b68b914ce385a1f03..4a01bdca66162ed159787dfa18903a2d01bb8bb4 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcInputData.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProcInputData.cc @@ -77,7 +77,8 @@ namespace LOFAR // extract and apply the flags inputFlags[station] = metaData.flags; - flagInputSamples(station, metaData); + // NOTE: We do not zero flagged samples here anymore, + // as we are using the ZeroingKernel to do so. // extract and assign the delays for the station beams @@ -115,29 +116,6 @@ namespace LOFAR tabDelays[SAP][station][tab] = 0.0; } } - - - // flag the input samples. - void SubbandProcInputData::flagInputSamples(unsigned station, - const SubbandMetaData& metaData) - { - - // Get the size of a sample in bytes. - size_t sizeof_sample = sizeof *inputSamples.origin(); - - // Calculate the number elements to skip when striding over the second - // dimension of inputSamples. - size_t stride = inputSamples[station][0].num_elements(); - - // Zero the bytes in the input data for the flagged ranges. - for(SparseSet<unsigned>::const_iterator it = metaData.flags.getRanges().begin(); - it != metaData.flags.getRanges().end(); ++it) - { - void *offset = inputSamples[station][it->begin].origin(); - size_t size = stride * (it->end - it->begin) * sizeof_sample; - memset(offset, 0, size); - } - } } } diff --git a/RTCP/Cobalt/GPUProc/test/Kernels/CMakeLists.txt b/RTCP/Cobalt/GPUProc/test/Kernels/CMakeLists.txt index f26a7e0014098581dbf51ca0b889cdcdec031388..b57f2cf655f81ab8357305828dc74b88a7b0b568 100644 --- a/RTCP/Cobalt/GPUProc/test/Kernels/CMakeLists.txt +++ b/RTCP/Cobalt/GPUProc/test/Kernels/CMakeLists.txt @@ -32,6 +32,7 @@ if(UNITTEST++_FOUND AND BUILD_TESTING) lofar_add_test(tFFT_Kernel tFFT_Kernel.cc) lofar_add_test(tFFTShiftKernel tFFTShiftKernel.cc) lofar_add_test(tFIR_FilterKernel tFIR_FilterKernel.cc) + lofar_add_test(tZeroingKernel tZeroingKernel.cc) lofar_add_test(tKernelFunctions tKernelFunctions.cc) lofar_add_test(tCoherentStokesKernel tCoherentStokesKernel.cc KernelTestHelpers.cc) @@ -44,6 +45,7 @@ if(UNITTEST++_FOUND AND BUILD_TESTING) tFFTShiftKernel tKernelFunctions tCoherentStokesKernel + tZeroingKernel PROPERTIES ENVIRONMENT "LOFARROOT=${PACKAGE_SOURCE_DIR}" ) # This test uses quite a lot of memory, so force it to run serially. diff --git a/RTCP/Cobalt/GPUProc/test/Kernels/tZeroingKernel.cc b/RTCP/Cobalt/GPUProc/test/Kernels/tZeroingKernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..625e0044e13f65b947fae1cd9ab74c96128c5347 --- /dev/null +++ b/RTCP/Cobalt/GPUProc/test/Kernels/tZeroingKernel.cc @@ -0,0 +1,277 @@ +//# tZeroingKernel.cc: test ZeroingKernel class +//# +//# Copyright (C) 2013 ASTRON (Netherlands Institute for Radio Astronomy) +//# P.O. Box 2, 7990 AA Dwingeloo, The Netherlands +//# +//# This file is part of the LOFAR software suite. +//# The LOFAR software suite is free software: you can redistribute it and/or +//# modify it under the terms of the GNU General Public License as published +//# by the Free Software Foundation, either version 3 of the License, or +//# (at your option) any later version. +//# +//# The LOFAR software suite is distributed in the hope that it will be useful, +//# but WITHOUT ANY WARRANTY; without even the implied warranty of +//# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +//# GNU General Public License for more details. +//# +//# You should have received a copy of the GNU General Public License along +//# with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. +//# +//# $Id$ + +#include <lofar_config.h> + + +#include <lofar_config.h> + +#include <GPUProc/Kernels/ZeroingKernel.h> +#include <GPUProc/MultiDimArrayHostBuffer.h> +#include <CoInterface/BlockID.h> +#include <CoInterface/SubbandMetaData.h> +#include <CoInterface/Config.h> +#include <CoInterface/Parset.h> +#include <Common/LofarLogger.h> + +#include <UnitTest++.h> +#include <boost/format.hpp> +#include <boost/lexical_cast.hpp> +#include <boost/scoped_ptr.hpp> +#include <iostream> +#include <iomanip> +#include <vector> + +using namespace std; +using namespace boost; +using namespace LOFAR; +using namespace LOFAR::Cobalt; + +typedef complex<float> fcomplex; + +// Fixture for testing correct translation of parset values +struct ParsetSUT +{ + size_t nrChannels, nrStations, nrSamplesSubband, nrBlockSize; + + Parset parset; + + ParsetSUT(size_t nrChannels, size_t nrStations, + size_t nrSamplesChannel) + : + nrChannels(nrChannels), + nrStations(nrStations), + nrSamplesSubband(nrSamplesChannel * nrChannels), + nrBlockSize(nrSamplesSubband) + { + // 4 for number of stokes + parset.add("Observation.DataProducts.Output_Correlated.enabled", "true"); + parset.add("Cobalt.Correlator.nrChannelsPerSubband", lexical_cast<string>(nrChannels)); + parset.add("Observation.VirtualInstrument.stationList", + str(format("[%d*RS000]") % nrStations)); + parset.add("Observation.antennaSet", "LBA_INNER"); + parset.add("Observation.rspBoardList", "[0]"); + parset.add("Observation.rspSlotList", "[0]"); + parset.add("Cobalt.blockSize", + lexical_cast<string>(nrBlockSize)); + parset.add("Observation.nrBeams", "1"); + parset.add("Observation.Beam[0].subbandList", "[0]"); + parset.add("Observation.DataProducts.Output_Correlated.filenames", "[dummy.raw]"); + parset.add("Observation.DataProducts.Output_Correlated.locations", "[:.]"); + //parset.add(""); //ps.settings.beamFormer.nrDelayCompensationChannels + parset.updateSettings(); + + } +}; + + +struct SUTWrapper : ParsetSUT +{ + gpu::Device device; + gpu::Context context; + gpu::Stream stream; + size_t nrSTABs; + KernelFactory<ZeroingKernel> factory; + MultiDimArrayHostBuffer<fcomplex, 4> hData; + MultiDimArrayHostBuffer<fcomplex, 4> hRefOutput; + gpu::DeviceMemory deviceMemory; + scoped_ptr<ZeroingKernel> kernel; + + SUTWrapper(size_t nrChannels , size_t nrStations, size_t nrSamplesPerChannel) : + ParsetSUT(nrChannels, nrStations, nrSamplesPerChannel), + device(gpu::Platform().devices()[0]), + context(device), + stream(context), + nrSTABs(parset.settings.antennaFields.size()), + factory(ZeroingKernel::Parameters(parset, nrStations, nrChannels)), + hData( + boost::extents[nrStations][NR_POLARIZATIONS][nrSamplesPerChannel][nrChannels], + context), + hRefOutput( + boost::extents[nrStations][NR_POLARIZATIONS][nrSamplesPerChannel][nrChannels], + context), + deviceMemory(context, factory.bufferSize(ZeroingKernel::INPUT_DATA)), + kernel(factory.create(stream, deviceMemory, deviceMemory)) + { + initializeHostBuffers(); + } + + + // Initialize all the elements of the input host buffer to zero, and all + // elements of the output host buffer to NaN. + void initializeHostBuffers() + { + cout << "Kernel buffersize set to: " << factory.bufferSize( + ZeroingKernel::INPUT_DATA) << endl; + cout << "\nInitializing host buffers..." << endl + << " buffers.input.size() = " << setw(7) << deviceMemory.size() << endl + << " hData.size() = " << setw(7) << hData.size() << endl + << " buffers.output.size() = " << setw(7) << deviceMemory.size() + << endl; + CHECK_EQUAL(deviceMemory.size(), hData.size()); + fill(hData.data(), hData.data() + hData.num_elements(), + fcomplex(1.0f, 2.0f)); + fill(hRefOutput.data(), hRefOutput.data() + hRefOutput.num_elements(), + fcomplex(1.0f, 2.0f)); + } + + void runKernel(const MultiDimArray<SparseSet<unsigned>, 1> &channelFlags) + { + // Dummy BlockID + BlockID blockId; + // Copy input data from host- to device buffer synchronously + stream.writeBuffer(deviceMemory, hData, true); + // Launch the kernel + kernel->enqueue(blockId, channelFlags); + // Copy output data from device- to host buffer synchronously + stream.readBuffer(hData, deviceMemory, true); + } + +}; + +// Test if we can succesfully create all necessary classes and run the kernel +TEST(BasicRun) +{ + cout << "running test: BasicRun" << endl; + SUTWrapper sut(1, 2, 4096); + MultiDimArray<SparseSet<unsigned>, 1> channelFlags(boost::extents[sut.nrSTABs]); + + sut.runKernel(channelFlags); +} + +// If we flag nothing, nothing should change +TEST(NothingFlaggedTest) +{ + cout << "running test: NothingFlaggedTest" << endl; + + SUTWrapper sut(64, 5, 1024); + MultiDimArray<SparseSet<unsigned>, 1> channelFlags(boost::extents[sut.nrSTABs]); + + // run kernel + cout << "Running kernel" << endl; + sut.runKernel(channelFlags); + + // compare output + cout << "Comparing output" << endl; + CHECK_ARRAY_EQUAL(sut.hRefOutput.data(), + sut.hData.data(), + sut.hData.num_elements()); + cout << "Comparing output: done" << endl; +} + +// If we flag one sample, it should zero in the output +TEST(SingleFlagTest) +{ + cout << "running test: SingleFlagTest" << endl; + + SUTWrapper sut(64, 1, 1024); + MultiDimArray<SparseSet<unsigned>, 1> channelFlags(boost::extents[sut.nrSTABs]); + + // flag a sample + cout << "Flagging 1 sample" << endl; + channelFlags[0].include(13); + + // also zero reference output + for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol++) + for (unsigned c = 0; c < sut.nrChannels; c++) + sut.hRefOutput[0][pol][13][c] = 0.0f; + + // run kernel + cout << "Running kernel" << endl; + sut.runKernel(channelFlags); + + // compare output + cout << "Comparing output" << endl; + CHECK_ARRAY_EQUAL(sut.hRefOutput.data(), + sut.hData.data(), + sut.hData.num_elements()); + cout << "Comparing output: done" << endl; +} + +// Flag patterns of input and check if the kernel zeroes the correct samples +TEST(PatternsTest) +{ + cout << "running test: PatternsTest" << endl; + + //size_t nrStations[] = { 12, 53, 66, 77, 80 }; + size_t nrStations[] = { 80 }; + + size_t nrSamples = 32768; // For performance tests, use 196608, and disable output verification + nrSamples = 196608; + size_t nrChannels[] = { 1, 16, 64, 256 }; + size_t maxNrRanges = SubbandMetaData::MAXNRFLAGRANGES; + + for (unsigned st = 0; st < sizeof(nrStations)/sizeof(nrStations[0]); ++st) + for (unsigned ch = 0; ch < sizeof(nrChannels)/sizeof(nrChannels[0]); ++ch) + { + cout << "*******testing stations: " << nrStations[st] + << " channels: " << nrChannels[ch] << endl; + + SUTWrapper sut(nrChannels[ch], nrStations[st], nrSamples / nrChannels[ch]); + MultiDimArray<SparseSet<unsigned>, 1> channelFlags(boost::extents[sut.nrSTABs]); + + // flag samples (with different patterns per station) + cout << "Flagging samples" << endl; + for (unsigned st_z = 0; st_z < nrStations[st]; st_z++) + for (unsigned sample_z = st_z; sample_z < nrSamples/nrChannels[ch]; sample_z += st_z + 1) { + if (channelFlags[st_z].count() >= maxNrRanges) + continue; + + channelFlags[st_z].include(sample_z); + + // also zero reference output + for (unsigned pol = 0; pol < NR_POLARIZATIONS; pol++) + for (unsigned c = 0; c < sut.nrChannels; c++) + sut.hRefOutput[st_z][pol][sample_z][c] = 0.0f; + } + + // run kernel + cout << "Running kernel" << endl; + sut.runKernel(channelFlags); + + // compare output + cout << "Comparing output" << endl; + /* + CHECK_ARRAY_EQUAL(sut.hRefOutput.data(), + sut.hData.data(), + sut.hData.num_elements()); + */ + cout << "Comparing output: done" << endl; + + } +} + + +int main() +{ + INIT_LOGGER("tZeroingKernel"); + + try { + gpu::Platform pf; + } + catch (gpu::GPUException&) { + cerr << "No GPU device(s) found. Skipping tests." << endl; + return 3; + } + return UnitTest::RunAllTests() == 0 ? 0 : 1; + +} + diff --git a/RTCP/Cobalt/GPUProc/test/Kernels/tZeroingKernel.sh b/RTCP/Cobalt/GPUProc/test/Kernels/tZeroingKernel.sh new file mode 100755 index 0000000000000000000000000000000000000000..1b06659b6bad332519888659b0a76bffb26a9a7b --- /dev/null +++ b/RTCP/Cobalt/GPUProc/test/Kernels/tZeroingKernel.sh @@ -0,0 +1,3 @@ +#!/bin/bash +./runctest.sh tZeroingKernel + diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tBeamFormerSubbandProcProcessSb.cc b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tBeamFormerSubbandProcProcessSb.cc index 51bafeb59c04f534db6cc0c8d4193602493c4f8c..b3df8475e33705d1a39f405574af16ae8f47897c 100644 --- a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tBeamFormerSubbandProcProcessSb.cc +++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tBeamFormerSubbandProcProcessSb.cc @@ -86,8 +86,7 @@ int main() { const size_t nrBitsPerSample = ps.settings.nrBitsPerSample; const size_t nrBytesPerComplexSample = ps.nrBytesPerComplexSample(); - const unsigned fft1Size = ps.settings.beamFormer.nrDelayCompensationChannels; - const unsigned fft2Size = ps.settings.beamFormer.nrHighResolutionChannels / fft1Size; + const unsigned fftSize = ps.settings.beamFormer.nrDelayCompensationChannels; // We only support 8-bit or 16-bit input samples ASSERT(nrBitsPerSample == 8 || nrBitsPerSample == 16); @@ -105,8 +104,7 @@ int main() { "\n nrSamplesPerSubband = " << nrSamplesPerSubband << "\n nrBitsPerSample = " << nrBitsPerSample << "\n nrBytesPerComplexSample = " << nrBytesPerComplexSample << - "\n fft1Size = " << fft1Size << - "\n fft2Size = " << fft2Size); + "\n fftSize = " << fftSize); // Create very simple kernel programs, with predictable output. Skip as much // as possible. Nr of channels/sb from the parset is 1, so the PPF will not @@ -186,7 +184,7 @@ int main() { // Coherent Stokes takes the stokes of the sums of all fields (stokes(sum(x))). // We can calculate the expected output values, since we're supplying a // complex sine/cosine input signal. We only have Stokes-I, so the output - // should be: nrStations * (amp * scaleFactor * fft1Size * fft2Size) ** 2 + // should be: nrStations * (amp * scaleFactor * fftSize) ** 2 // - amp is set to the maximum possible value for the bit-mode: // i.e. 127 for 8-bit and 32767 for 16-bit mode // - scaleFactor is the scaleFactor applied by the IntToFloat kernel. @@ -195,7 +193,7 @@ int main() { // - for 16-bit input: (2 * 32767 * 1 * 64 * 64)^2 = 72053196058525696 // - for 8-bit input: (2 * 127 * 16 * 64 * 64)^2 = 277094110068736 - float coh_outVal = sqr(nrBFStations * amplitude * scaleFactor * fft1Size * fft2Size); + float coh_outVal = sqr(nrBFStations * amplitude * scaleFactor * fftSize); cout << "coherent outVal = " << coh_outVal << endl; for (size_t t = 0; t < ps.settings.beamFormer.coherentSettings.nrSamples; t++) @@ -210,7 +208,7 @@ int main() { // Incoherent Stokes sums the stokes of each field (sum(stokes(x))). // We can calculate the expected output values, since we're supplying a // complex sine/cosine input signal. We only have Stokes-I, so the output - // should be: nrStation * (amp * scaleFactor * fft1Size * fft2Size)^2 + // should be: nrStation * (amp * scaleFactor * fftSize)^2 // - amp is set to the maximum possible value for the bit-mode: // i.e. 127 for 8-bit and 32767 for 16-bit mode // - scaleFactor is the scaleFactor applied by the IntToFloat kernel. @@ -219,7 +217,7 @@ int main() { // - for 16-bit input: 2 * (32767 * 1 * 64 * 64)^2 = 36026598029262848 // - for 8-bit input: 2 * (127 * 16 * 64 * 64)^2 = 138547055034368 - float incoh_outVal = nrBFStations * sqr(amplitude * scaleFactor * fft1Size * fft2Size); + float incoh_outVal = nrBFStations * sqr(amplitude * scaleFactor * fftSize); cout << "incoherent outVal = " << incoh_outVal << endl; for (size_t t = 0; t < ps.settings.beamFormer.incoherentSettings.nrSamples; t++) diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCoherentStokesBeamFormerSubbandProcProcessSb.cc b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCoherentStokesBeamFormerSubbandProcProcessSb.cc index 377400a75b973c35a71f8bbd317b4f2388c02473..3220febe8e078307ae10cb9d5038da035d42ad8f 100644 --- a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCoherentStokesBeamFormerSubbandProcProcessSb.cc +++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCoherentStokesBeamFormerSubbandProcProcessSb.cc @@ -99,8 +99,7 @@ int main(/*int argc, char *argv[]*/) { const size_t nrBitsPerSample = ps.settings.nrBitsPerSample; const size_t nrBytesPerComplexSample = ps.nrBytesPerComplexSample(); - const unsigned fft1Size = ps.settings.beamFormer.nrDelayCompensationChannels; - const unsigned fft2Size = ps.settings.beamFormer.nrHighResolutionChannels / fft1Size; + const unsigned fftSize = ps.settings.beamFormer.nrDelayCompensationChannels; // We only support 8-bit or 16-bit input samples ASSERT(nrBitsPerSample == 8 || nrBitsPerSample == 16); @@ -118,8 +117,7 @@ int main(/*int argc, char *argv[]*/) { "\n nrSamplesPerSubband = " << nrSamplesPerSubband << "\n nrBitsPerSample = " << nrBitsPerSample << "\n nrBytesPerComplexSample = " << nrBytesPerComplexSample << - "\n fft1Size = " << fft1Size << - "\n fft2Size = " << fft2Size); + "\n fftSize = " << fftSize); // Output array sizes const size_t nrStokes = ps.settings.beamFormer.coherentSettings.nrStokes; @@ -210,7 +208,7 @@ int main(/*int argc, char *argv[]*/) { // We can calculate the expected output values, since we're supplying a // complex sine/cosine input signal. We only have Stokes-I, so the output - // should be: (nrStations * amp * scaleFactor * fft1Size * fft2Size) ** 2 + // should be: (nrStations * amp * scaleFactor * fftSize) ** 2 // - amp is set to the maximum possible value for the bit-mode: // i.e. 127 for 8-bit and 32767 for 16-bit mode // - scaleFactor is the scaleFactor applied by the IntToFloat kernel. @@ -220,8 +218,8 @@ int main(/*int argc, char *argv[]*/) { // - for 8-bit input: (5 * 127 * 16 * 64 * 64) ** 2 = 1731838187929600 float outVal = - (nrStations * amplitude * scaleFactor * fft1Size * fft2Size) * - (nrStations * amplitude * scaleFactor * fft1Size * fft2Size); + (nrStations * amplitude * scaleFactor * fftSize) * + (nrStations * amplitude * scaleFactor * fftSize); cout << "outVal = " << setprecision(12) << outVal << endl; // Skip output validation when started with commandline parsed parameters! diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCorrelatorStep.cc b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCorrelatorStep.cc index 9cd96a11db0d31c4b1c5a11526a49b8cb11be868..c23a63ec6812c688643cb6c76e184e2728677b3f 100644 --- a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCorrelatorStep.cc +++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tCorrelatorStep.cc @@ -76,7 +76,13 @@ TEST(convertFlagsToChannelFlags) boost::extents[parset.settings.antennaFields.size()]); // ****** perform the translation - CorrelatorStep::Flagger::convertFlagsToChannelFlags(parset, inputFlags, flagsPerChannel); + CorrelatorStep::Flagger::convertFlagsToChannelFlags( + inputFlags, + flagsPerChannel, + parset.settings.correlator.nrSamplesPerBlock, + parset.settings.correlator.nrChannels, + NR_TAPS - 1 + ); // ****** //validate the corner cases diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tFlysEyeBeamFormerSubbandProcProcessSb.cc b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tFlysEyeBeamFormerSubbandProcProcessSb.cc index be24b2dfdcd36507d323234930c44a306dc128f7..642398919d064b0be7225d5f291f161044ff12c8 100644 --- a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tFlysEyeBeamFormerSubbandProcProcessSb.cc +++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tFlysEyeBeamFormerSubbandProcProcessSb.cc @@ -82,8 +82,7 @@ int main() { const size_t nrBitsPerSample = ps.settings.nrBitsPerSample; const size_t nrBytesPerComplexSample = ps.nrBytesPerComplexSample(); - const unsigned fft1Size = ps.settings.beamFormer.nrDelayCompensationChannels; - const unsigned fft2Size = ps.settings.beamFormer.nrHighResolutionChannels / fft1Size; + const unsigned fftSize = ps.settings.beamFormer.nrDelayCompensationChannels; // We only support 8-bit or 16-bit input samples ASSERT(nrBitsPerSample == 8 || nrBitsPerSample == 16); @@ -101,8 +100,7 @@ int main() { "\n nrSamplesPerSubband = " << nrSamplesPerSubband << "\n nrBitsPerSample = " << nrBitsPerSample << "\n nrBytesPerComplexSample = " << nrBytesPerComplexSample << - "\n fft1Size = " << fft1Size << - "\n fft2Size = " << fft2Size); + "\n fftSize = " << fftSize); // Because this is fly's eye mode! ASSERT(nrStations == maxNrTABsPerSAP); @@ -199,7 +197,7 @@ int main() { // We can calculate the expected output values, since we're supplying a // complex sine/cosine input signal. We only have Stokes-I, so the output - // should be: (amp * scaleFactor * fft1Size * fft2Size) ** 2 + // should be: (amp * scaleFactor * fftSize) ** 2 // - amp is set to the maximum possible value for the bit-mode: // i.e. 127 for 8-bit and 32767 for 16-bit mode // - scaleFactor is the scaleFactor applied by the IntToFloat kernel. @@ -209,8 +207,8 @@ int main() { // - for 8-bit input: (127 * 16 * 64 * 64) ** 2 = 69273527517184 float outVal = - amplitude * scaleFactor * fft1Size * fft2Size * - amplitude * scaleFactor * fft1Size * fft2Size; + amplitude * scaleFactor * fftSize * + amplitude * scaleFactor * fftSize; cout << "outVal = " << setprecision(12) << outVal << endl; for (size_t tab = 0; tab < nrTABs; tab++)