Skip to content
Snippets Groups Projects
Commit 1475c354 authored by Wouter Klijn's avatar Wouter Klijn
Browse files

Task #5287: Merge with trunk (Add fftshift kernel to BeamFormer pipeline)...

Task #5287: Merge with trunk (Add fftshift kernel to BeamFormer pipeline) Updated test for new output. Small refactoring in namings. Reviewed by Wouter 
parents 9b372438 e977b8ab
Branches
Tags
No related merge requests found
...@@ -22,85 +22,96 @@ Pipeline ...@@ -22,85 +22,96 @@ Pipeline
For max size, we assume: For max size, we assume:
- 48 stations - 48 stations
- 1 subband - 1 subband
- 1 second blocks (195312.5 samples) rounded to next multiple of 4096 - 1 second blocks (195312.5 samples) rounded to next multiple of 4096 (= 196608 samples).
Note: Note:
MiB = 2^20 bytes (1048576) MiB = 2^20 bytes (= 1048576 bytes).
(*) = requires change from current implementation
Flow: Data dimensions: Max size (fcomplex): Buffer: Flow: Data dimensions: Max size (fcomplex): Buffer:
=================================================================================================================== ===================================================================================================================
(input) [station][samples][pol] [48][196608][2] = 72 MiB A (input) [station][sample][pol] [48][196608][2] = 72 MiB A
| (i16complex) | (i16complex)
V V
IntToFloat + Transpose IntToFloat + Transpose
| [station][pol][samples] [48][2][196608] = 144 MiB B | [station][pol][sample] [48][2][196608] = 144 MiB B
V
FFT-shift {inplace}
| [station][pol][sample] [48][2][196608] = 144 MiB B
V V
FFT-64 {inplace} FFT-64 {inplace}
| [station][pol][samples][channel] [48][2][3072][64] = 144 MiB B | [station][pol][sample][channel] [48][2][3072][64] = 144 MiB B
V
Delay compensation + Transpose {I/O: delays}
| [station][pol][channel][sample] [48][2][64][3072] = 144 MiB A
V V
Delay compensation (*: no transpose) {I/O: delays} FFT-shift {inplace}
| [station][pol][channel][samples] [48][2][64][3072] = 144 MiB A | [station][pol][channel][sample] [48][2][64][3072] = 144 MiB A
V V
FFT-64 {inplace} FFT-64 {inplace}
| [station][pol][chan1][samples][chan2] [48][2][64][48][64] = 144 MiB A | [station][pol][chan1][sample][chan2] [48][2][64][48][64] = 144 MiB A
V V
BandPass + Transpose {I/O: weights} BandPass + Transpose {I/O: weights}
| [station][chan1][chan2][samples][pol] [48][64][64][48][2] = 144 MiB B | [station][chan1][chan2][sample][pol] [48][64][64][48][2] = 144 MiB B
V = [stations][channel][samples][pol] V = [station][channel][sample][pol]
X X
Complex Voltages/Coherent Stokes: Complex Voltages/Coherent Stokes:
----------------------------------- -----------------------------------
X [station][channel][samples][pol] [48][4096][48][2] = 144 MiB B X [station][channel][sample][pol] [48][4096][48][2] = 144 MiB B
| |
V V
BeamFormer {I/O: weights} BeamFormer {I/O: weights}
| [channel][samples][tab][pol] [4096][48][tab][2] = 3 MiB/TAB A | [channel][sample][tab][pol] [4096][48][tab][2] = 3 MiB/TAB A
V V
Transpose Transpose
| [tab][pol][samples][channel] [tab][2][48][4096] = 3 MiB/TAB 1ch: CS: C, CV: D | [tab][pol][sample][channel] [tab][2][48][4096] = 3 MiB/TAB 1ch: CS: C, CV: D
| Nch: CS: D, CV: C | Nch: CS: D, CV: C
V V
iFFT-4k {inplace} iFFT-4k {inplace}
| [tab][pol][samples] [tab][2][196608] = 3 MiB/TAB 1ch: CS: C, CV: D | [tab][pol][sample] [tab][2][196608] = 3 MiB/TAB 1ch: CS: C, CV: D
| Nch: CS: D, CV: C
V
FFT-shift {inplace}
| [tab][pol][sample] [tab][2][196608] = 3 MiB/TAB 1ch: CS: C, CV: D
| Nch: CS: D, CV: C | Nch: CS: D, CV: C
V V
FIR-16 (if >1ch) FIR-16 (if >1ch)
| [tab][pol][samples] [tab][2][196608] = 3 MiB/TAB 1ch: CS: -, CV: - | [tab][pol][sample] [tab][2][196608] = 3 MiB/TAB 1ch: CS: -, CV: -
| Nch: CS: C, CV: D | Nch: CS: C, CV: D
V V
FFT-16 {inplace} (if >1ch) FFT-16 {inplace} (if >1ch)
| [tab][pol][samples][channel] [tab][2][12288][16] = 3 MiB/TAB 1ch: CS: -, CV: - | [tab][pol][sample][channel] [tab][2][12288][16] = 3 MiB/TAB 1ch: CS: -, CV: -
| Nch: CS: C, CV: D | Nch: CS: C, CV: D
V V
Coherent Stokes (*: no transpose) Coherent Stokes
| [tab][stokes][samples][channel] [tab][4][12288][16] = 0.75 MiB/TAB/Stokes 1ch: CS: D, CV: - | [tab][stokes][sample][channel] [tab][4][12288][16] = 0.75 MiB/TAB/Stokes 1ch: CS: D, CV: -
| (float) Nch: CS: D, CV: - | (float) Nch: CS: D, CV: -
V V
(output) (output)
Incoherent Stokes: Incoherent Stokes:
----------------------------------- -----------------------------------
X [station][channel][samples][pol] [48][4096][48][2] = 144 MiB B X [station][channel][sample][pol] [48][4096][48][2] = 144 MiB B
| |
V V
Transpose + Copy Transpose + Copy
| [station][pol][samples][channel] [48][2][48][4096] = 144 MiB A | [station][pol][sample][channel] [48][2][48][4096] = 144 MiB A
V V
iFFT-4k {inplace} iFFT-4k {inplace}
| [station][pol][samples] [48][2][196608] = 144 MiB A | [station][pol][sample] [48][2][196608] = 144 MiB A
V
FFT-shift {inplace}
| [station][pol][sample] [48][2][196608] = 144 MiB A
V V
FIR-16 (if >1ch) FIR-16 (if >1ch)
| [station][pol][samples] [48][2][196608] = 144 MiB B | [station][pol][sample] [48][2][196608] = 144 MiB B
| |
V V
FFT-16 {inplace} (if >1ch) FFT-16 {inplace} (if >1ch)
| [station][pol][samples][channel] [48][2][12288][16] = 144 MiB B | [station][pol][sample][channel] [48][2][12288][16] = 144 MiB B
|
V V
Incoherent Stokes (*: no transpose) Incoherent Stokes
| [stokes][samples][channel] [4][12288][16] = 3 MiB E | [stokes][sample][channel] [4][12288][16] = 3 MiB E
V (float) V (float)
(output) (output)
......
...@@ -34,7 +34,7 @@ namespace LOFAR ...@@ -34,7 +34,7 @@ namespace LOFAR
delayCompensation(delayCompensationParams(ps)), delayCompensation(delayCompensationParams(ps)),
beamFormer(beamFormerParams(ps)), beamFormer(beamFormerParams(ps)),
coherentTranspose(coherentTransposeParams(ps)), coherentTranspose(coherentTransposeParams(ps)),
fftShiftKernel(FFTShiftKernelParams(ps)), fftShift(fftShiftParams(ps)),
firFilter(firFilterParams(ps, nrSubbandsPerSubbandProc)), firFilter(firFilterParams(ps, nrSubbandsPerSubbandProc)),
coherentStokes(coherentStokesParams(ps)), coherentStokes(coherentStokesParams(ps)),
incoherentStokes(incoherentStokesParams(ps)), incoherentStokes(incoherentStokesParams(ps)),
...@@ -129,7 +129,7 @@ namespace LOFAR ...@@ -129,7 +129,7 @@ namespace LOFAR
} }
FFTShiftKernel::Parameters FFTShiftKernel::Parameters
BeamFormerFactories::FFTShiftKernelParams(const Parset &ps) const BeamFormerFactories::fftShiftParams(const Parset &ps) const
{ {
FFTShiftKernel::Parameters params(ps); FFTShiftKernel::Parameters params(ps);
// Currently a static in the subband proc // Currently a static in the subband proc
......
...@@ -50,7 +50,7 @@ namespace LOFAR ...@@ -50,7 +50,7 @@ namespace LOFAR
KernelFactory<DelayAndBandPassKernel> delayCompensation; KernelFactory<DelayAndBandPassKernel> delayCompensation;
KernelFactory<BeamFormerKernel> beamFormer; KernelFactory<BeamFormerKernel> beamFormer;
KernelFactory<CoherentStokesTransposeKernel> coherentTranspose; KernelFactory<CoherentStokesTransposeKernel> coherentTranspose;
KernelFactory<FFTShiftKernel> fftShiftKernel; KernelFactory<FFTShiftKernel> fftShift;
KernelFactory<FIR_FilterKernel> firFilter; KernelFactory<FIR_FilterKernel> firFilter;
KernelFactory<CoherentStokesKernel> coherentStokes; KernelFactory<CoherentStokesKernel> coherentStokes;
KernelFactory<IncoherentStokesKernel> incoherentStokes; KernelFactory<IncoherentStokesKernel> incoherentStokes;
...@@ -74,7 +74,7 @@ namespace LOFAR ...@@ -74,7 +74,7 @@ namespace LOFAR
delayCompensationParams(const Parset &ps) const; delayCompensationParams(const Parset &ps) const;
FFTShiftKernel::Parameters FFTShiftKernel::Parameters
FFTShiftKernelParams(const Parset &ps) const; fftShiftParams(const Parset &ps) const;
FIR_FilterKernel::Parameters FIR_FilterKernel::Parameters
firFilterParams(const Parset &ps, size_t nrSubbandsPerSubbandProc) const; firFilterParams(const Parset &ps, size_t nrSubbandsPerSubbandProc) const;
......
...@@ -98,6 +98,11 @@ namespace LOFAR ...@@ -98,6 +98,11 @@ namespace LOFAR
intToFloatBuffers(devInput.inputSamples, devB), intToFloatBuffers(devInput.inputSamples, devB),
intToFloatKernel(factories.intToFloat.create(queue, intToFloatBuffers)), intToFloatKernel(factories.intToFloat.create(queue, intToFloatBuffers)),
// FFTShift: B -> B
firstFFTShiftBuffers(devB, devB),
firstFFTShiftKernel(
factories.fftShift.create(queue, firstFFTShiftBuffers)),
// FFT: B -> B // FFT: B -> B
firstFFT(queue, firstFFT(queue,
ps.settings.beamFormer.nrDelayCompensationChannels, ps.settings.beamFormer.nrDelayCompensationChannels,
...@@ -112,6 +117,11 @@ namespace LOFAR ...@@ -112,6 +117,11 @@ namespace LOFAR
delayCompensationKernel( delayCompensationKernel(
factories.delayCompensation.create(queue, delayCompensationBuffers)), factories.delayCompensation.create(queue, delayCompensationBuffers)),
// FFTShift: A -> A
secondFFTShiftBuffers(devA, devA),
secondFFTShiftKernel(
factories.fftShift.create(queue, secondFFTShiftBuffers)),
// FFT: A -> A // FFT: A -> A
secondFFT(queue, secondFFT(queue,
ps.settings.beamFormer.nrHighResolutionChannels / ps.settings.beamFormer.nrHighResolutionChannels /
...@@ -167,6 +177,12 @@ namespace LOFAR ...@@ -167,6 +177,12 @@ namespace LOFAR
ps.settings.beamFormer.nrHighResolutionChannels), ps.settings.beamFormer.nrHighResolutionChannels),
false, coherentTransposeBuffers.output), false, coherentTransposeBuffers.output),
// fftshift: C/D -> C/D (in-place) = transposeBuffers.output
inverseFFTShiftBuffers(
coherentTransposeBuffers.output, coherentTransposeBuffers.output),
inverseFFTShiftKernel(
factories.fftShift.create(queue, inverseFFTShiftBuffers)),
// FIR filter: D/C -> C/D // FIR filter: D/C -> C/D
// //
// Input buffer: // Input buffer:
...@@ -225,6 +241,11 @@ namespace LOFAR ...@@ -225,6 +241,11 @@ namespace LOFAR
ps.settings.beamFormer.nrHighResolutionChannels), ps.settings.beamFormer.nrHighResolutionChannels),
false, devA), false, devA),
// inverse FFTShift: A -> A
incoherentInverseFFTShiftBuffers(devA, devA),
incoherentInverseFFTShiftKernel(
factories.fftShift.create(queue, incoherentInverseFFTShiftBuffers)),
// FIR filter: A -> B // FIR filter: A -> B
devIncoherentFilterWeights( devIncoherentFilterWeights(
context, context,
...@@ -335,17 +356,21 @@ namespace LOFAR ...@@ -335,17 +356,21 @@ namespace LOFAR
BeamFormerSubbandProc::Counters::Counters(gpu::Context &context) BeamFormerSubbandProc::Counters::Counters(gpu::Context &context)
: :
intToFloat(context), intToFloat(context),
firstFFTShift(context),
firstFFT(context), firstFFT(context),
delayBp(context), delayBp(context),
secondFFTShift(context),
secondFFT(context), secondFFT(context),
correctBandpass(context), correctBandpass(context),
beamformer(context), beamformer(context),
transpose(context), transpose(context),
inverseFFT(context), inverseFFT(context),
inverseFFTShift(context),
firFilterKernel(context), firFilterKernel(context),
finalFFT(context), finalFFT(context),
coherentStokes(context), coherentStokes(context),
incoherentInverseFFT(context), incoherentInverseFFT(context),
incoherentInverseFFTShift(context),
incoherentFirFilterKernel(context), incoherentFirFilterKernel(context),
incoherentFinalFFT(context), incoherentFinalFFT(context),
incoherentStokes(context), incoherentStokes(context),
...@@ -362,13 +387,16 @@ namespace LOFAR ...@@ -362,13 +387,16 @@ namespace LOFAR
LOG_INFO_STR( LOG_INFO_STR(
"**** BeamFormerSubbandProc GPU mean and stDev ****" << endl << "**** BeamFormerSubbandProc GPU mean and stDev ****" << endl <<
std::setw(20) << "(intToFloat)" << intToFloat.stats << endl << std::setw(20) << "(intToFloat)" << intToFloat.stats << endl <<
std::setw(20) << "(firstFFTShift)" << firstFFTShift.stats << endl <<
std::setw(20) << "(firstFFT)" << firstFFT.stats << endl << std::setw(20) << "(firstFFT)" << firstFFT.stats << endl <<
std::setw(20) << "(delayBp)" << delayBp.stats << endl << std::setw(20) << "(delayBp)" << delayBp.stats << endl <<
std::setw(20) << "(secondFFTShift)" << secondFFTShift.stats << endl <<
std::setw(20) << "(secondFFT)" << secondFFT.stats << endl << std::setw(20) << "(secondFFT)" << secondFFT.stats << endl <<
std::setw(20) << "(correctBandpass)" << correctBandpass.stats << endl << std::setw(20) << "(correctBandpass)" << correctBandpass.stats << endl <<
std::setw(20) << "(beamformer)" << beamformer.stats << endl << std::setw(20) << "(beamformer)" << beamformer.stats << endl <<
std::setw(20) << "(transpose)" << transpose.stats << endl << std::setw(20) << "(transpose)" << transpose.stats << endl <<
std::setw(20) << "(inverseFFT)" << inverseFFT.stats << endl << std::setw(20) << "(inverseFFT)" << inverseFFT.stats << endl <<
std::setw(20) << "(inverseFFTShift)" << inverseFFTShift.stats << endl <<
std::setw(20) << "(firFilterKernel)" << firFilterKernel.stats << endl << std::setw(20) << "(firFilterKernel)" << firFilterKernel.stats << endl <<
std::setw(20) << "(finalFFT)" << finalFFT.stats << endl << std::setw(20) << "(finalFFT)" << finalFFT.stats << endl <<
std::setw(20) << "(coherentStokes)" << coherentStokes.stats << endl << std::setw(20) << "(coherentStokes)" << coherentStokes.stats << endl <<
...@@ -376,6 +404,7 @@ namespace LOFAR ...@@ -376,6 +404,7 @@ namespace LOFAR
std::setw(20) << "(visibilities)" << visibilities.stats << endl << std::setw(20) << "(visibilities)" << visibilities.stats << endl <<
std::setw(20) << "(incoherentOutput )" << incoherentOutput.stats << endl << std::setw(20) << "(incoherentOutput )" << incoherentOutput.stats << endl <<
std::setw(20) << "(incoherentInverseFFT)" << incoherentInverseFFT.stats << endl << std::setw(20) << "(incoherentInverseFFT)" << incoherentInverseFFT.stats << endl <<
std::setw(20) << "(incoherentInverseFFTShift)" << incoherentInverseFFTShift.stats << endl <<
std::setw(20) << "(incoherentFirFilterKernel)" << incoherentFirFilterKernel.stats << endl << std::setw(20) << "(incoherentFirFilterKernel)" << incoherentFirFilterKernel.stats << endl <<
std::setw(20) << "(incoherentFinalFFT)" << incoherentFinalFFT.stats << endl << std::setw(20) << "(incoherentFinalFFT)" << incoherentFinalFFT.stats << endl <<
std::setw(20) << "(incoherentStokes)" << incoherentStokes.stats << endl << std::setw(20) << "(incoherentStokes)" << incoherentStokes.stats << endl <<
...@@ -419,6 +448,7 @@ namespace LOFAR ...@@ -419,6 +448,7 @@ namespace LOFAR
// Otherwise, a kernel arg may not be set... // Otherwise, a kernel arg may not be set...
intToFloatKernel->enqueue(input.blockID, counters.intToFloat); intToFloatKernel->enqueue(input.blockID, counters.intToFloat);
firstFFTShiftKernel->enqueue(input.blockID, counters.firstFFTShift);
firstFFT.enqueue(input.blockID, counters.firstFFT); firstFFT.enqueue(input.blockID, counters.firstFFT);
delayCompensationKernel->enqueue( delayCompensationKernel->enqueue(
...@@ -426,6 +456,7 @@ namespace LOFAR ...@@ -426,6 +456,7 @@ namespace LOFAR
ps.settings.subbands[subband].centralFrequency, ps.settings.subbands[subband].centralFrequency,
ps.settings.subbands[subband].SAP); ps.settings.subbands[subband].SAP);
secondFFTShiftKernel->enqueue(input.blockID, counters.secondFFTShift);
secondFFT.enqueue(input.blockID, counters.secondFFT); secondFFT.enqueue(input.blockID, counters.secondFFT);
bandPassCorrectionKernel->enqueue( bandPassCorrectionKernel->enqueue(
...@@ -440,7 +471,9 @@ namespace LOFAR ...@@ -440,7 +471,9 @@ namespace LOFAR
ps.settings.subbands[subband].SAP); ps.settings.subbands[subband].SAP);
coherentTransposeKernel->enqueue(input.blockID, counters.transpose); coherentTransposeKernel->enqueue(input.blockID, counters.transpose);
inverseFFT.enqueue(input.blockID, counters.inverseFFT); inverseFFT.enqueue(input.blockID, counters.inverseFFT);
inverseFFTShiftKernel->enqueue(input.blockID, counters.inverseFFTShift);
if (coherentStokesPPF) if (coherentStokesPPF)
{ {
...@@ -464,6 +497,8 @@ namespace LOFAR ...@@ -464,6 +497,8 @@ namespace LOFAR
incoherentInverseFFT.enqueue( incoherentInverseFFT.enqueue(
input.blockID, counters.incoherentInverseFFT); input.blockID, counters.incoherentInverseFFT);
incoherentInverseFFTShiftKernel->enqueue(
input.blockID, counters.incoherentInverseFFTShift);
if (incoherentStokesPPF) if (incoherentStokesPPF)
{ {
......
...@@ -37,6 +37,7 @@ ...@@ -37,6 +37,7 @@
#include <GPUProc/Kernels/CoherentStokesTransposeKernel.h> #include <GPUProc/Kernels/CoherentStokesTransposeKernel.h>
#include <GPUProc/Kernels/CoherentStokesKernel.h> #include <GPUProc/Kernels/CoherentStokesKernel.h>
#include <GPUProc/Kernels/DelayAndBandPassKernel.h> #include <GPUProc/Kernels/DelayAndBandPassKernel.h>
#include <GPUProc/Kernels/FFTShiftKernel.h>
#include <GPUProc/Kernels/FFT_Kernel.h> #include <GPUProc/Kernels/FFT_Kernel.h>
#include <GPUProc/Kernels/FIR_FilterKernel.h> #include <GPUProc/Kernels/FIR_FilterKernel.h>
#include <GPUProc/Kernels/IncoherentStokesKernel.h> #include <GPUProc/Kernels/IncoherentStokesKernel.h>
...@@ -90,18 +91,22 @@ namespace LOFAR ...@@ -90,18 +91,22 @@ namespace LOFAR
// gpu kernel counters // gpu kernel counters
PerformanceCounter intToFloat; PerformanceCounter intToFloat;
PerformanceCounter firstFFTShift;
PerformanceCounter firstFFT; PerformanceCounter firstFFT;
PerformanceCounter delayBp; PerformanceCounter delayBp;
PerformanceCounter secondFFTShift;
PerformanceCounter secondFFT; PerformanceCounter secondFFT;
PerformanceCounter correctBandpass; PerformanceCounter correctBandpass;
PerformanceCounter beamformer; PerformanceCounter beamformer;
PerformanceCounter transpose; PerformanceCounter transpose;
PerformanceCounter inverseFFT; PerformanceCounter inverseFFT;
PerformanceCounter inverseFFTShift;
PerformanceCounter firFilterKernel; PerformanceCounter firFilterKernel;
PerformanceCounter finalFFT; PerformanceCounter finalFFT;
PerformanceCounter coherentStokes; PerformanceCounter coherentStokes;
PerformanceCounter incoherentInverseFFT; PerformanceCounter incoherentInverseFFT;
PerformanceCounter incoherentInverseFFTShift;
PerformanceCounter incoherentFirFilterKernel; PerformanceCounter incoherentFirFilterKernel;
PerformanceCounter incoherentFinalFFT; PerformanceCounter incoherentFinalFFT;
PerformanceCounter incoherentStokes; PerformanceCounter incoherentStokes;
...@@ -149,6 +154,10 @@ namespace LOFAR ...@@ -149,6 +154,10 @@ namespace LOFAR
IntToFloatKernel::Buffers intToFloatBuffers; IntToFloatKernel::Buffers intToFloatBuffers;
std::auto_ptr<IntToFloatKernel> intToFloatKernel; std::auto_ptr<IntToFloatKernel> intToFloatKernel;
// First FFT-shift
FFTShiftKernel::Buffers firstFFTShiftBuffers;
std::auto_ptr<FFTShiftKernel> firstFFTShiftKernel;
// First (64 points) FFT // First (64 points) FFT
FFT_Kernel firstFFT; FFT_Kernel firstFFT;
...@@ -156,6 +165,10 @@ namespace LOFAR ...@@ -156,6 +165,10 @@ namespace LOFAR
DelayAndBandPassKernel::Buffers delayCompensationBuffers; DelayAndBandPassKernel::Buffers delayCompensationBuffers;
std::auto_ptr<DelayAndBandPassKernel> delayCompensationKernel; std::auto_ptr<DelayAndBandPassKernel> delayCompensationKernel;
// Second FFT-shift
FFTShiftKernel::Buffers secondFFTShiftBuffers;
std::auto_ptr<FFTShiftKernel> secondFFTShiftKernel;
// Second (64 points) FFT // Second (64 points) FFT
FFT_Kernel secondFFT; FFT_Kernel secondFFT;
...@@ -182,6 +195,10 @@ namespace LOFAR ...@@ -182,6 +195,10 @@ namespace LOFAR
// inverse (4k points) FFT // inverse (4k points) FFT
FFT_Kernel inverseFFT; FFT_Kernel inverseFFT;
// inverse FFT-shift
FFTShiftKernel::Buffers inverseFFTShiftBuffers;
std::auto_ptr<FFTShiftKernel> inverseFFTShiftKernel;
// Poly-phase filter (FIR + FFT) // Poly-phase filter (FIR + FFT)
gpu::DeviceMemory devFilterWeights; gpu::DeviceMemory devFilterWeights;
gpu::DeviceMemory devFilterHistoryData; gpu::DeviceMemory devFilterHistoryData;
...@@ -205,6 +222,10 @@ namespace LOFAR ...@@ -205,6 +222,10 @@ namespace LOFAR
// Inverse (4k points) FFT // Inverse (4k points) FFT
FFT_Kernel incoherentInverseFFT; FFT_Kernel incoherentInverseFFT;
// Inverse FFT-shift
FFTShiftKernel::Buffers incoherentInverseFFTShiftBuffers;
std::auto_ptr<FFTShiftKernel> incoherentInverseFFTShiftKernel;
// Poly-phase filter (FIR + FFT) // Poly-phase filter (FIR + FFT)
gpu::DeviceMemory devIncoherentFilterWeights; gpu::DeviceMemory devIncoherentFilterWeights;
gpu::DeviceMemory devIncoherentFilterHistoryData; gpu::DeviceMemory devIncoherentFilterHistoryData;
......
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include <complex> #include <complex>
#include <cmath> #include <cmath>
#include <iomanip>
#include <Common/LofarLogger.h> #include <Common/LofarLogger.h>
#include <CoInterface/Parset.h> #include <CoInterface/Parset.h>
...@@ -39,16 +40,20 @@ using namespace LOFAR::TYPES; ...@@ -39,16 +40,20 @@ using namespace LOFAR::TYPES;
template<typename T> T inputSignal(size_t t) template<typename T> T inputSignal(size_t t)
{ {
size_t nrBits = sizeof(T) / 2 * 8; size_t nrBits = sizeof(T) / 2 * 8;
double freq = 1.0 / 4.0; // in samples
// double freq = (2 * 64.0 + 17.0) / 4096.0; // in samples
double amp = (1 << (nrBits - 1)) - 1; double amp = (1 << (nrBits - 1)) - 1;
#if 1 // Toggle to experiment with pulse like input
// Sine wave
// double freq = 1.0 / 4.0; // in samples
double freq = (2 * 64.0 + 17.0) / 4096.0; // in samples
double angle = (double)t * 2.0 * M_PI * freq; double angle = (double)t * 2.0 * M_PI * freq;
double s = ::sin(angle); double s = ::sin(angle);
double c = ::cos(angle); double c = ::cos(angle);
return T(::round(amp * c), ::round(amp * s)); return T(::round(amp * c), ::round(amp * s));
#else
// Pulse train
if (t % (2 * 64 + 17) == 0) return T(amp);
else return T(0);
#endif
} }
int main() { int main() {
...@@ -152,7 +157,7 @@ int main() { ...@@ -152,7 +157,7 @@ int main() {
// Block number: 0 .. inf // Block number: 0 .. inf
in.blockID.block = 0; in.blockID.block = 0;
// Subband index in the observation: [0, ps.nrSubbands()) // Subband index in the observation: [0, ps.nrSubbands())
in.blockID.globalSubbandIdx = 0; in.blockID.globalSubbandIdx = 0;
// Subband index for this pipeline/workqueue: [0, subbandIndices.size()) // Subband index for this pipeline/workqueue: [0, subbandIndices.size())
...@@ -206,9 +211,10 @@ int main() { ...@@ -206,9 +211,10 @@ int main() {
for (size_t s = 0; s < nrStokes; s++) for (size_t s = 0; s < nrStokes; s++)
for (size_t t = 0; t < nrSamples; t++) for (size_t t = 0; t < nrSamples; t++)
for (size_t c = 0; c < nrChannels; c++) for (size_t c = 0; c < nrChannels; c++)
ASSERTSTR(fpEquals(out[0][s][t][c], outVal), ASSERTSTR(fpEquals(out[0][s][t][c], outVal, 1e-4f),
"out[" << s << "][" << t << "][" << c << "] = " << "out[" << s << "][" << t << "][" << c << "] = " <<
out[0][s][t][c] << "; outVal = " << outVal); setprecision(12) << out[0][s][t][c] <<
"; outVal = " << outVal);
return 0; return 0;
} }
......
...@@ -40,16 +40,20 @@ using namespace LOFAR::TYPES; ...@@ -40,16 +40,20 @@ using namespace LOFAR::TYPES;
template<typename T> T inputSignal(size_t t) template<typename T> T inputSignal(size_t t)
{ {
size_t nrBits = sizeof(T) / 2 * 8; size_t nrBits = sizeof(T) / 2 * 8;
double freq = 1.0 / 4.0; // in samples
// double freq = (2 * 64.0 + 17.0) / 4096.0; // in samples
double amp = (1 << (nrBits - 1)) - 1; double amp = (1 << (nrBits - 1)) - 1;
#if 1 // Toggle to experiment with pulse type input
// Sine wave
// double freq = 1.0 / 4.0; // in samples
double freq = (2 * 64.0 + 17.0) / 4096.0; // in samples
double angle = (double)t * 2.0 * M_PI * freq; double angle = (double)t * 2.0 * M_PI * freq;
double s = ::sin(angle); double s = ::sin(angle);
double c = ::cos(angle); double c = ::cos(angle);
return T(::round(amp * c), ::round(amp * s)); return T(::round(amp * c), ::round(amp * s));
#else
// Pulse train
if (t % (2 * 64 + 17) == 0) return T(amp);
else return T(0);
#endif
} }
int main() { int main() {
...@@ -219,7 +223,7 @@ int main() { ...@@ -219,7 +223,7 @@ int main() {
for (size_t t = 0; t < nrSamples; t++) for (size_t t = 0; t < nrSamples; t++)
for (size_t c = 0; c < nrChannels; c++) for (size_t c = 0; c < nrChannels; c++)
{ {
ASSERTSTR(fpEquals(out[tab][s][t][c], outVal), ASSERTSTR(fpEquals(out[tab][s][t][c], outVal, 1e-4f),
"out[" << tab << "][" << s << "][" << t << "][" << c << "] = " << setprecision(12) << "out[" << tab << "][" << s << "][" << t << "][" << c << "] = " << setprecision(12) <<
out[tab][s][t][c] << "; outVal = " << outVal); out[tab][s][t][c] << "; outVal = " << outVal);
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment