diff --git a/RTCP/Cobalt/GPUProc/share/gpu/kernels/CoherentStokes.cu b/RTCP/Cobalt/GPUProc/share/gpu/kernels/CoherentStokes.cu index a9618cd987e6b60c46cdb4f27ea861536fa66fb6..508f050a041ae0ec650ccc37a50c89f0e3c584d8 100644 --- a/RTCP/Cobalt/GPUProc/share/gpu/kernels/CoherentStokes.cu +++ b/RTCP/Cobalt/GPUProc/share/gpu/kernels/CoherentStokes.cu @@ -42,8 +42,11 @@ #error Precondition violated: NR_SAMPLES_PER_CHANNEL > 0 && NR_SAMPLES_PER_CHANNEL % INTEGRATION_SIZE == 0 #endif +// This kernel is also compiled for unused combination of settings. +// with 0 tabs. Change to warning untill we have optional kernel compilation + #if !(NR_TABS >= 1) -#error Precondition violated: NR_TABS >= 1 +#warning Precondition violated: NR_TABS >= 1 #endif #if !(TIME_PARALLEL_FACTOR >= 1) diff --git a/RTCP/Cobalt/GPUProc/share/gpu/kernels/CoherentStokesTranspose.cu b/RTCP/Cobalt/GPUProc/share/gpu/kernels/CoherentStokesTranspose.cu index 6026909fff6d38630c1d6eaedd560471cb0294ce..073d955bff6d05730575b0b17ef8ca30670ad624 100644 --- a/RTCP/Cobalt/GPUProc/share/gpu/kernels/CoherentStokesTranspose.cu +++ b/RTCP/Cobalt/GPUProc/share/gpu/kernels/CoherentStokesTranspose.cu @@ -52,8 +52,10 @@ #error Precondition violated: NR_SAMPLES_PER_CHANNEL >= 1 #endif +// This kernel is also compiled for unused combination of settings. +// with 0 tabs. Change to warning untill we have optional kernel compilation #if !(NR_TABS >= 1) -#error Precondition violated: NR_TABS >= 1 +#warning Precondition violated: NR_TABS >= 1 #endif #if !(NR_CHANNELS >= 16) diff --git a/RTCP/Cobalt/GPUProc/src/cuda/KernelFactory.h b/RTCP/Cobalt/GPUProc/src/cuda/KernelFactory.h index fc09e3afc282b5855aef2e5f45757c920c8143d9..edeaca625586709cab75f8bba33934966a8fb4ba 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/KernelFactory.h +++ b/RTCP/Cobalt/GPUProc/src/cuda/KernelFactory.h @@ -82,6 +82,12 @@ namespace LOFAR T* create(const gpu::Stream& stream, const typename T::Buffers& buffers) const { + // Since we use overlapping input/output buffers, their size + // could be wrong. + ASSERT(buffers.input.size() >= bufferSize(T::INPUT_DATA)); + // Untill we have optional kernel compilation this test will fail on unused and thus incorrect kernels + //ASSERT(buffers.output.size() >= bufferSize(T::OUTPUT_DATA)); + return new T( stream, createModule(stream.getContext(), T::theirSourceFile, diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerKernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerKernel.cc index 105b9742b0542a7cb04c251ed8dee6143a79ea6f..f4c08ceb55b4aa8221d5eef2a3dbfd0abfc69e9b 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerKernel.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerKernel.cc @@ -47,7 +47,7 @@ namespace LOFAR BeamFormerKernel::Parameters::Parameters(const Parset& ps) : Kernel::Parameters(ps), nrSAPs(ps.settings.beamFormer.SAPs.size()), - nrTABs(ps.settings.beamFormer.maxNrTABsPerSAP()), + nrTABs(ps.settings.beamFormer.maxNrCoherentTABsPerSAP()), subbandBandwidth(ps.settings.subbandWidth()), doFlysEye(ps.settings.beamFormer.doFlysEye) { diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerTransposeKernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerTransposeKernel.cc index 0996be45b01297f6516435e9772f5e1d2eccd08b..4e4f89f2afec73e65f4a7fc0b6d47bf857b17fa7 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerTransposeKernel.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/BeamFormerTransposeKernel.cc @@ -45,7 +45,7 @@ namespace LOFAR BeamFormerTransposeKernel::Parameters::Parameters(const Parset& ps) : Kernel::Parameters(ps), - nrTABs(ps.settings.beamFormer.maxNrTABsPerSAP()) + nrTABs(ps.settings.beamFormer.maxNrCoherentTABsPerSAP()) { nrChannelsPerSubband = ps.settings.beamFormer.coherentSettings.nrChannels; diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CoherentStokesKernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CoherentStokesKernel.cc index bd03a5b277bc36d52e50a2efd9fa8e8a36c52ddb..7b5835841a885819cea0671e5562835ae1547d85 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CoherentStokesKernel.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CoherentStokesKernel.cc @@ -45,7 +45,7 @@ namespace LOFAR CoherentStokesKernel::Parameters::Parameters(const Parset& ps) : Kernel::Parameters(ps), - nrTABs(ps.settings.beamFormer.maxNrTABsPerSAP()), + nrTABs(ps.settings.beamFormer.maxNrCoherentTABsPerSAP()), nrStokes(ps.settings.beamFormer.coherentSettings.nrStokes), outputComplexVoltages(ps.settings.beamFormer.coherentSettings.type == STOKES_XXYY ? 1 : 0), timeIntegrationFactor( diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CoherentStokesTransposeKernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CoherentStokesTransposeKernel.cc index eb0471a8530a960b771a283237641183091cb558..e1a2de8f26e87f812330111e7ba000afb6cd12a7 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CoherentStokesTransposeKernel.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/CoherentStokesTransposeKernel.cc @@ -45,7 +45,7 @@ namespace LOFAR CoherentStokesTransposeKernel::Parameters::Parameters(const Parset& ps) : Kernel::Parameters(ps), - nrTABs(ps.settings.beamFormer.maxNrTABsPerSAP()) + nrTABs(ps.settings.beamFormer.maxNrCoherentTABsPerSAP()) { nrChannelsPerSubband = ps.settings.beamFormer.coherentSettings.nrChannels; diff --git a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/FFT_Kernel.cc b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/FFT_Kernel.cc index e8ca0d75f91e4fa22fe9b4eb387f5dcf7f7d81d1..ac43df9c80d971223f7d52bd3e532b6d627a80cb 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/Kernels/FFT_Kernel.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/Kernels/FFT_Kernel.cc @@ -71,6 +71,10 @@ namespace LOFAR // fftSize must fit into maxNrFFTpoints an exact number of times ASSERT(maxNrFFTpoints % fftSize == 0); + // buffer must be big enough for the job + // Untill we have optional kernel compilation this test will fail on unused and thus incorrect kernels + //ASSERT(buffer.size() >= fftSize * nrFFTs * sizeof(fcomplex)); + LOG_DEBUG_STR("FFT_Kernel: " << "fftSize=" << fftSize << ", direction=" << (forward ? "forward" : "inverse") << diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.cc index 54bed744e208115477182c2b37739d768a204770..6d739942128eecb186126171265bd2d6c59aedcb 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerCoherentStep.cc @@ -103,7 +103,7 @@ namespace LOFAR queue, *coherentTransposeBuffers)); // inverse FFT: C/D -> C/D (in-place) - unsigned nrInverFFTs = ps.settings.beamFormer.maxNrTABsPerSAP() * + unsigned nrInverFFTs = ps.settings.beamFormer.maxNrCoherentTABsPerSAP() * NR_POLARIZATIONS * ps.nrSamplesPerSubband() / ps.settings.beamFormer.nrHighResolutionChannels; inverseFFT = std::auto_ptr<FFT_Kernel>(new FFT_Kernel( @@ -142,7 +142,7 @@ namespace LOFAR // final FFT: C -> C (in-place) = firFilterBuffers.output - unsigned nrFinalFFTs = ps.settings.beamFormer.maxNrTABsPerSAP() * + unsigned nrFinalFFTs = ps.settings.beamFormer.maxNrCoherentTABsPerSAP() * NR_POLARIZATIONS * ps.nrSamplesPerSubband() / ps.settings.beamFormer.coherentSettings.nrChannels; finalFFT = std::auto_ptr<FFT_Kernel>(new FFT_Kernel( diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerSubbandProc.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerSubbandProc.cc index ba3afa49e9cfb393685ddba3f3bad8d3b2348570..fea9c188a568fcdc012e81185c5c88db0d29a3b7 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerSubbandProc.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/BeamFormerSubbandProc.cc @@ -128,13 +128,16 @@ namespace LOFAR new BeamFormerPreprocessingStep(parset, queue, context, factories, devInput, devA, devB, devNull)); - coherentStep = std::auto_ptr<BeamFormerCoherentStep>( - new BeamFormerCoherentStep(parset, queue, context, factories, - devInput, devA, devB, devC, devD, devBeamFormerDelays, devNull)); - - incoherentStep = std::auto_ptr<BeamFormerIncoherentStep>( - new BeamFormerIncoherentStep(parset, queue, context, factories, - devInput, devA, devB, devC, devD, devE, devNull)); + // Only create the parts actually needed prevents possible checks + // that are not used anyways + if (ps.settings.beamFormer.maxNrCoherentTABsPerSAP()) + coherentStep = std::auto_ptr<BeamFormerCoherentStep>( + new BeamFormerCoherentStep(parset, queue, context, factories, + devInput, devA, devB, devC, devD, devBeamFormerDelays, devNull)); + if (ps.settings.beamFormer.maxNrIncoherentTABsPerSAP()) + incoherentStep = std::auto_ptr<BeamFormerIncoherentStep>( + new BeamFormerIncoherentStep(parset, queue, context, factories, + devInput, devA, devB, devC, devD, devE, devNull)); LOG_INFO_STR("Running coherent pipeline: " @@ -175,8 +178,10 @@ namespace LOFAR void BeamFormerSubbandProc::printStats() { preprocessingPart->printStats(); - coherentStep->printStats(); - incoherentStep->printStats(); + if (ps.settings.beamFormer.maxNrCoherentTABsPerSAP()) + coherentStep->printStats(); + if (ps.settings.beamFormer.maxNrIncoherentTABsPerSAP()) + incoherentStep->printStats(); counters.printStats(); } diff --git a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProc.cc b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProc.cc index 8b76042d8193f7691ef3b730d31854468b52bd9b..aff2e8873a6217d49e603e9e46709ccfe63f8876 100644 --- a/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProc.cc +++ b/RTCP/Cobalt/GPUProc/src/cuda/SubbandProcs/SubbandProc.cc @@ -110,6 +110,10 @@ namespace LOFAR metaData.TABs[tab].delayAfterEnd) * 0.5 - compensatedDelay; } + + // Zero padding entries that exist because we always produce maxNrCoherentTABsPerSAP for any subband + for (unsigned tab = metaData.TABs.size(); tab < ps.settings.beamFormer.maxNrCoherentTABsPerSAP(); tab++) + tabDelays[SAP][station][tab] = 0.0; } } diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/CMakeLists.txt b/RTCP/Cobalt/GPUProc/test/SubbandProcs/CMakeLists.txt index 1b8c43163f4b9da2d141566eda1553f4b938c2ea..688964bb9672e1f6a6501a502a36105d4b1388ae 100644 --- a/RTCP/Cobalt/GPUProc/test/SubbandProcs/CMakeLists.txt +++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/CMakeLists.txt @@ -9,7 +9,7 @@ endif() # This test is instable. Added to issue tracker: https://support.astron.nl/lofar_issuetracker/issues/5807 # Understand the proble -#lofar_add_test(tBeamFormerSubbandProcProcessSb tBeamFormerSubbandProcProcessSb.cc) +lofar_add_test(tBeamFormerSubbandProcProcessSb tBeamFormerSubbandProcProcessSb.cc) lofar_add_test(tCorrelatorSubbandProcProcessSb tCorrelatorSubbandProcProcessSb.cc) @@ -21,7 +21,7 @@ lofar_add_test(tFlysEyeBeamFormerSubbandProcProcessSb # Trick tests that compile kernels that need to be installed into thinking # that they can find the kernels using the LOFARROOT environment variable. set_tests_properties( - #tBeamFormerSubbandProcProcessSb + tBeamFormerSubbandProcProcessSb tCorrelatorSubbandProcProcessSb tCoherentStokesBeamFormerSubbandProcProcessSb tFlysEyeBeamFormerSubbandProcProcessSb diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tBeamFormerSubbandProcProcessSb.parset b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tBeamFormerSubbandProcProcessSb.parset index 04b31d39dec0de8f4353e2d10a74e54d3dd91277..96c98c1dc29f7009afc5a545695ef093bfac78d0 100644 --- a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tBeamFormerSubbandProcProcessSb.parset +++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tBeamFormerSubbandProcProcessSb.parset @@ -13,9 +13,9 @@ Observation.Beam[0].nrTiedArrayBeams = 2 Observation.Beam[0].TiedArrayBeam[0].absoluteAngle1 = 0 Observation.Beam[0].TiedArrayBeam[0].absoluteAngle2 = 0 Observation.Beam[0].TiedArrayBeam[0].coherent = F -Observation.Beam[1].TiedArrayBeam[1].absoluteAngle1 = 0 -Observation.Beam[1].TiedArrayBeam[1].absoluteAngle2 = 0 -Observation.Beam[1].TiedArrayBeam[1].coherent = T +Observation.Beam[0].TiedArrayBeam[1].absoluteAngle1 = 0 +Observation.Beam[0].TiedArrayBeam[1].absoluteAngle2 = 0 +Observation.Beam[0].TiedArrayBeam[1].coherent = T Cobalt.BeamFormer.CoherentStokes.which = I # IQUV Cobalt.BeamFormer.CoherentStokes.timeIntegrationFactor = 1 diff --git a/RTCP/Cobalt/VisualStudio/Cobalt.v12.suo b/RTCP/Cobalt/VisualStudio/Cobalt.v12.suo index df628e8bfd42987dc49d4aa6f59bcb924c2688fd..ad9d0fb47468c6f384e9cbbd79c25a613a531cc1 100644 Binary files a/RTCP/Cobalt/VisualStudio/Cobalt.v12.suo and b/RTCP/Cobalt/VisualStudio/Cobalt.v12.suo differ diff --git a/RTCP/Cobalt/VisualStudio/GPUProc/GPUProc.vcxproj.filters b/RTCP/Cobalt/VisualStudio/GPUProc/GPUProc.vcxproj.filters index a6813f4485728209057bb220e66f42ff829321b9..e58f8f3f9ffbfd5a4cab22104dbd906b331708bf 100644 --- a/RTCP/Cobalt/VisualStudio/GPUProc/GPUProc.vcxproj.filters +++ b/RTCP/Cobalt/VisualStudio/GPUProc/GPUProc.vcxproj.filters @@ -776,7 +776,6 @@ <ClCompile Include="..\..\GPUProc\src\cuda\Kernels\BandPassCorrectionKernel.cc"> <Filter>src\cuda\Kernels</Filter> </ClCompile> - <ClCompile Include="..\..\GPUProc\test\SubbandProcs\tFlysEyeBeamFormerSubbandProcProcessSb.cc" /> <ClCompile Include="..\..\GPUProc\src\cuda\SubbandProcs\BeamFormerPreprocessingStep.cc"> <Filter>src\cuda\SubbandProcs\steps</Filter> </ClCompile> @@ -795,6 +794,9 @@ <ClCompile Include="..\..\GPUProc\test\Kernels\KernelTestHelpers.cc"> <Filter>test\Kernels</Filter> </ClCompile> + <ClCompile Include="..\..\GPUProc\test\SubbandProcs\tFlysEyeBeamFormerSubbandProcProcessSb.cc"> + <Filter>test\SubbandProcs</Filter> + </ClCompile> </ItemGroup> <ItemGroup> <None Include="..\..\GPUProc\src\cuda\CMakeLists.txt"> @@ -1119,8 +1121,6 @@ <None Include="..\..\GPUProc\share\gpu\kernels\BandPassCorrection.cu"> <Filter>share</Filter> </None> - <None Include="..\..\GPUProc\test\SubbandProcs\tFlysEyeBeamFormerSubbandProcProcessSb.parset" /> - <None Include="..\..\GPUProc\test\SubbandProcs\tFlysEyeBeamFormerSubbandProcProcessSb.sh" /> <None Include="..\..\GPUProc\test\tBeamform_1sec_1st_5sb_noflagging_2SapDivNTab.parset" /> <None Include="..\..\GPUProc\test\tBeamform_1sec_1st_5sb_noflagging_2SapDivNTab.run" /> <None Include="..\..\GPUProc\test\tBeamform_1sec_1st_5sb_noflagging_2SapDivNTab.sh" /> @@ -1133,6 +1133,12 @@ <None Include="..\..\GPUProc\test\Kernels\visualizeBeamformer.py"> <Filter>test\Kernels</Filter> </None> + <None Include="..\..\GPUProc\test\SubbandProcs\tFlysEyeBeamFormerSubbandProcProcessSb.parset"> + <Filter>test\SubbandProcs</Filter> + </None> + <None Include="..\..\GPUProc\test\SubbandProcs\tFlysEyeBeamFormerSubbandProcProcessSb.sh"> + <Filter>test\SubbandProcs</Filter> + </None> </ItemGroup> <ItemGroup> <Text Include="..\..\GPUProc\test\CMakeLists.txt">