From a68a6730eb33fbfdbcf723dda17d399a95c25cc1 Mon Sep 17 00:00:00 2001 From: Mark de Wever <mark.dewever@stcorp.nl> Date: Fri, 4 Nov 2022 12:48:28 +0000 Subject: [PATCH] RAP-100 Reduce memory usage scalar solver --- ddecal/gain_solvers/DiagonalSolver.cc | 79 ++++++++++++++--------- ddecal/gain_solvers/DiagonalSolver.h | 11 ++++ ddecal/gain_solvers/FullJonesSolver.cc | 80 ++++++++++++++--------- ddecal/gain_solvers/FullJonesSolver.h | 10 +++ ddecal/gain_solvers/ScalarSolver.cc | 88 +++++++++++++++----------- ddecal/gain_solvers/ScalarSolver.h | 10 +++ 6 files changed, 180 insertions(+), 98 deletions(-) diff --git a/ddecal/gain_solvers/DiagonalSolver.cc b/ddecal/gain_solvers/DiagonalSolver.cc index b55ca5e95..9144b1fbc 100644 --- a/ddecal/gain_solvers/DiagonalSolver.cc +++ b/ddecal/gain_solvers/DiagonalSolver.cc @@ -23,33 +23,11 @@ DiagonalSolver::SolveResult DiagonalSolver::Solve( assert(solutions.size() == NChannelBlocks()); PrepareConstraints(); - - std::vector<std::vector<DComplex>> next_solutions(NChannelBlocks()); - SolveResult result; - // Model matrix 2 x ant x [2N x D] and visibility vector 2 x ant x [2N], - // for each channelblock. The following loop allocates all structures - std::vector<std::vector<Matrix>> g_times_cs(NChannelBlocks()); - std::vector<std::vector<std::vector<Complex>>> vs(NChannelBlocks()); - for (size_t ch_block = 0; ch_block != NChannelBlocks(); ++ch_block) { - const SolveData::ChannelBlockData& channelBlock = - data.ChannelBlock(ch_block); - next_solutions[ch_block].resize(NDirections() * NAntennas() * 2); - g_times_cs[ch_block].reserve(NAntennas() * 2); - vs[ch_block].reserve(NAntennas() * 2); - - for (size_t ant = 0; ant != NAntennas(); ++ant) { - // Model matrix [(2N) x D] and visibility vector [2N] - const size_t n_visibilities = channelBlock.NAntennaVisibilities(ant); - const size_t m = 2 * n_visibilities; - const size_t n = NDirections(); - for (size_t p = 0; p != 2; ++p) { - g_times_cs[ch_block].emplace_back(m, n); - vs[ch_block].emplace_back(std::max(m, n)); - } - } - } + std::vector<std::vector<DComplex>> next_solutions(NChannelBlocks()); + for (std::vector<DComplex>& next_solution : next_solutions) + next_solution.resize(NDirections() * NAntennas() * 2); /// /// Start iterating @@ -64,17 +42,29 @@ DiagonalSolver::SolveResult DiagonalSolver::Solve( double avg_squared_diff = 1.0E4; + // Using more threads wastes CPU and memory resources. + const size_t n_threads = std::min(NChannelBlocks(), GetNThreads()); + // For each thread: + // - Model matrix 2 x ant x [2N x D] + // - Visibility vector 2 x ant x [2N] + std::vector<std::vector<Matrix>> thread_g_times_cs(n_threads); + std::vector<std::vector<std::vector<Complex>>> thread_vs(n_threads); + + aocommon::ParallelFor<size_t> loop(n_threads); do { MakeSolutionsFinite2Pol(solutions); - ParallelFor<size_t> loop(GetNThreads()); loop.Run(0, NChannelBlocks(), [&](size_t ch_block, [[maybe_unused]] size_t thread) { - const SolveData::ChannelBlockData& channelBlock = + const SolveData::ChannelBlockData& channel_block = data.ChannelBlock(ch_block); - PerformIteration(channelBlock, g_times_cs[ch_block], - vs[ch_block], solutions[ch_block], - next_solutions[ch_block]); + + std::vector<Matrix>& g_times_cs = thread_g_times_cs[thread]; + std::vector<std::vector<Complex>>& vs = thread_vs[thread]; + InitializeModelMatrix(channel_block, g_times_cs, vs); + + PerformIteration(channel_block, g_times_cs, vs, + solutions[ch_block], next_solutions[ch_block]); }); Step(solutions, next_solutions); @@ -204,5 +194,34 @@ void DiagonalSolver::PerformIteration( } } +void DiagonalSolver::InitializeModelMatrix( + const SolveData::ChannelBlockData& channel_block_data, + std::vector<Matrix>& g_times_cs, + std::vector<std::vector<Complex>>& vs) const { + assert(g_times_cs.empty() == vs.empty()); + if (g_times_cs.empty()) { + // Executed the first iteration only. + g_times_cs.reserve(NAntennas() * 2); + vs.reserve(NAntennas() * 2); + } else { + // Note clear() does not modify the capacity. + // See https://en.cppreference.com/w/cpp/container/vector/clear + g_times_cs.clear(); + vs.clear(); + } + + // Update the size of the model matrix and initialize them to zero. + for (size_t ant = 0; ant != NAntennas(); ++ant) { + // Model matrix [(2N) x D] and visibility vector [2N] + const size_t n_visibilities = channel_block_data.NAntennaVisibilities(ant); + const size_t m = n_visibilities * 2; + const size_t n = NDirections(); + for (size_t p = 0; p != 2; ++p) { + g_times_cs.emplace_back(m, n); + vs.emplace_back(std::max(m, n)); + } + } +} + } // namespace ddecal } // namespace dp3 diff --git a/ddecal/gain_solvers/DiagonalSolver.h b/ddecal/gain_solvers/DiagonalSolver.h index dc4a78780..2b00188af 100644 --- a/ddecal/gain_solvers/DiagonalSolver.h +++ b/ddecal/gain_solvers/DiagonalSolver.h @@ -26,6 +26,17 @@ class DiagonalSolver final : public SolverBase { std::vector<std::vector<Complex>>& vs, const std::vector<DComplex>& solutions, std::vector<DComplex>& next_solutions); + + /** + * Initialize the model matrix for a channel block. + * + * The number of elements in the model matrix depends on the number of + * antenna visibilities in the corresponding channel block. + */ + void InitializeModelMatrix( + const SolveData::ChannelBlockData& channel_block_data, + std::vector<Matrix>& g_times_cs, + std::vector<std::vector<Complex>>& vs) const; }; } // namespace ddecal diff --git a/ddecal/gain_solvers/FullJonesSolver.cc b/ddecal/gain_solvers/FullJonesSolver.cc index 660754f0b..b4f316b41 100644 --- a/ddecal/gain_solvers/FullJonesSolver.cc +++ b/ddecal/gain_solvers/FullJonesSolver.cc @@ -51,33 +51,11 @@ FullJonesSolver::SolveResult FullJonesSolver::Solve( assert(solutions.size() == NChannelBlocks()); PrepareConstraints(); - - std::vector<std::vector<DComplex>> next_solutions(NChannelBlocks()); - SolveResult result; - // Dimensions for each channelblock: - // - Model matrix: n_antennas x [2N x 2D] - // - Visibility matrix: n_antennas x [2N x 2] - // The following loop allocates all structures: - std::vector<std::vector<Matrix>> g_times_cs(NChannelBlocks()); - std::vector<std::vector<Matrix>> vs(NChannelBlocks()); - for (size_t ch_block = 0; ch_block != NChannelBlocks(); ++ch_block) { - const SolveData::ChannelBlockData& channel_block_data = - data.ChannelBlock(ch_block); + std::vector<std::vector<DComplex>> next_solutions(NChannelBlocks()); + for (size_t ch_block = 0; ch_block != NChannelBlocks(); ++ch_block) next_solutions[ch_block].resize(NDirections() * NAntennas() * 4); - g_times_cs[ch_block].reserve(NAntennas()); - vs[ch_block].reserve(NAntennas()); - - for (size_t ant = 0; ant != NAntennas(); ++ant) { - // Model matrix [2N x 2D] and visibility matrix [2N x 2] - const size_t m = channel_block_data.NAntennaVisibilities(ant) * 2; - const size_t n = NDirections() * 2; - const size_t n_rhs = 2; - g_times_cs[ch_block].emplace_back(m, n); - vs[ch_block].emplace_back(std::max(m, n), n_rhs); - } - } /// /// Start iterating @@ -90,16 +68,29 @@ FullJonesSolver::SolveResult FullJonesSolver::Solve( std::vector<double> step_magnitudes; step_magnitudes.reserve(GetMaxIterations()); + // Using more threads wastes CPU and memory resources. + const size_t n_threads = std::min(NChannelBlocks(), GetNThreads()); + // For each thread: + // - Model matrix: n_antennas x [2N x 2D] + // - Visibility matrix: n_antennas x [2N x 2] + std::vector<std::vector<Matrix>> thread_g_times_cs(n_threads); + std::vector<std::vector<Matrix>> thread_vs(n_threads); + + aocommon::ParallelFor<size_t> loop(GetNThreads()); do { MakeSolutionsFinite4Pol(solutions); - aocommon::ParallelFor<size_t> loop(GetNThreads()); - loop.Run(0, NChannelBlocks(), - [&](size_t ch_block, [[maybe_unused]] size_t thread) { - PerformIteration(data.ChannelBlock(ch_block), - g_times_cs[ch_block], vs[ch_block], - solutions[ch_block], next_solutions[ch_block]); - }); + loop.Run(0, NChannelBlocks(), [&](size_t ch_block, size_t thread) { + const SolveData::ChannelBlockData& channel_block = + data.ChannelBlock(ch_block); + + std::vector<Matrix>& g_times_cs = thread_g_times_cs[thread]; + std::vector<Matrix>& vs = thread_vs[thread]; + InitializeModelMatrix(channel_block, g_times_cs, vs); + + PerformIteration(channel_block, g_times_cs, vs, solutions[ch_block], + next_solutions[ch_block]); + }); Step(solutions, next_solutions); @@ -257,5 +248,32 @@ void FullJonesSolver::PerformIteration( } } +void FullJonesSolver::InitializeModelMatrix( + const SolveData::ChannelBlockData& channel_block_data, + std::vector<Matrix>& g_times_cs, std::vector<Matrix>& vs) const { + assert(g_times_cs.empty() == vs.empty()); + if (g_times_cs.empty()) { + // Executed the first iteration only. + g_times_cs.reserve(NAntennas()); + vs.reserve(NAntennas()); + } else { + // Note clear() does not modify the capacity. + // See https://en.cppreference.com/w/cpp/container/vector/clear + g_times_cs.clear(); + vs.clear(); + } + + // Update the size of the model matrix and initialize them to zero. + for (size_t ant = 0; ant != NAntennas(); ++ant) { + // Model matrix [2N x 2D] and visibility matrix [2N x 2] + const size_t n_visibilities = channel_block_data.NAntennaVisibilities(ant); + const size_t m = n_visibilities * 2; + const size_t n = NDirections() * 2; + const size_t n_rhs = 2; + g_times_cs.emplace_back(m, n); + vs.emplace_back(std::max(m, n), n_rhs); + } +} + } // namespace ddecal } // namespace dp3 diff --git a/ddecal/gain_solvers/FullJonesSolver.h b/ddecal/gain_solvers/FullJonesSolver.h index aecf1038b..70b1e9d2a 100644 --- a/ddecal/gain_solvers/FullJonesSolver.h +++ b/ddecal/gain_solvers/FullJonesSolver.h @@ -24,6 +24,16 @@ class FullJonesSolver final : public SolverBase { std::vector<Matrix>& vs, const std::vector<DComplex>& solutions, std::vector<DComplex>& next_solutions); + + /** + * Initialize the model matrix for a channel block. + * + * The number of elements in the model matrix depends on the number of + * antenna visibilities in the corresponding channel block. + */ + void InitializeModelMatrix( + const SolveData::ChannelBlockData& channel_block_data, + std::vector<Matrix>& g_times_cs, std::vector<Matrix>& vs) const; }; } // namespace ddecal diff --git a/ddecal/gain_solvers/ScalarSolver.cc b/ddecal/gain_solvers/ScalarSolver.cc index 7c57f7ffa..1a8c8ba9f 100644 --- a/ddecal/gain_solvers/ScalarSolver.cc +++ b/ddecal/gain_solvers/ScalarSolver.cc @@ -20,33 +20,11 @@ ScalarSolver::SolveResult ScalarSolver::Solve( assert(solutions.size() == NChannelBlocks()); PrepareConstraints(); - - std::vector<std::vector<DComplex>> next_solutions(NChannelBlocks()); - SolveResult result; - // Model matrix ant x [N x D] and visibility vector ant x [N x 1], - // for each channelblock - // The following loop allocates all structures - std::vector<std::vector<Matrix>> g_times_cs(NChannelBlocks()); - std::vector<std::vector<Matrix>> vs(NChannelBlocks()); - for (size_t chBlock = 0; chBlock != NChannelBlocks(); ++chBlock) { - const SolveData::ChannelBlockData& channelBlock = - data.ChannelBlock(chBlock); - next_solutions[chBlock].resize(NDirections() * NAntennas()); - g_times_cs[chBlock].reserve(NAntennas()); - vs[chBlock].reserve(NAntennas()); - - for (size_t ant = 0; ant != NAntennas(); ++ant) { - // Model matrix [N x D] and visibility vector [N x 1] - const size_t n_visibilities = channelBlock.NAntennaVisibilities(ant); - const size_t m = n_visibilities * 4; - const size_t n = NDirections(); - const size_t nrhs = 1; - g_times_cs[chBlock].emplace_back(m, n); - vs[chBlock].emplace_back(std::max(m, n), nrhs); - } - } + std::vector<std::vector<DComplex>> next_solutions(NChannelBlocks()); + for (std::vector<DComplex>& next_solution : next_solutions) + next_solution.resize(NDirections() * NAntennas()); /// /// Start iterating @@ -61,17 +39,29 @@ ScalarSolver::SolveResult ScalarSolver::Solve( double avg_squared_diff = 1.0E4; + // Using more threads wastes CPU and memory resources. + const size_t n_threads = std::min(NChannelBlocks(), GetNThreads()); + // For each thread: + // - Model matrix ant x [N x D] and + // - Visibility vector ant x [N x 1] + std::vector<std::vector<Matrix>> thread_g_times_cs(n_threads); + std::vector<std::vector<Matrix>> thread_vs(n_threads); + + aocommon::ParallelFor<size_t> loop(n_threads); do { MakeSolutionsFinite1Pol(solutions); - aocommon::ParallelFor<size_t> loop(GetNThreads()); - loop.Run(0, NChannelBlocks(), - [&](size_t chBlock, [[maybe_unused]] size_t thread) { - const SolveData::ChannelBlockData& channelBlock = - data.ChannelBlock(chBlock); - PerformIteration(channelBlock, g_times_cs[chBlock], vs[chBlock], - solutions[chBlock], next_solutions[chBlock]); - }); + loop.Run(0, NChannelBlocks(), [&](size_t ch_block, size_t thread) { + const SolveData::ChannelBlockData& channel_block = + data.ChannelBlock(ch_block); + + std::vector<Matrix>& g_times_cs = thread_g_times_cs[thread]; + std::vector<Matrix>& vs = thread_vs[thread]; + InitializeModelMatrix(channel_block, g_times_cs, vs); + + PerformIteration(channel_block, g_times_cs, vs, solutions[ch_block], + next_solutions[ch_block]); + }); Step(solutions, next_solutions); @@ -112,10 +102,6 @@ void ScalarSolver::PerformIteration(const SolveData::ChannelBlockData& cb_data, std::vector<Matrix>& vs, const std::vector<DComplex>& solutions, std::vector<DComplex>& next_solutions) { - for (size_t ant = 0; ant != NAntennas(); ++ant) { - g_times_cs[ant].SetZero(); - vs[ant].SetZero(); - } const size_t n_visibilities = cb_data.NVisibilities(); const size_t p1_to_p2[4] = {0, 2, 1, 3}; @@ -196,5 +182,33 @@ void ScalarSolver::PerformIteration(const SolveData::ChannelBlockData& cb_data, } } +void ScalarSolver::InitializeModelMatrix( + const SolveData::ChannelBlockData& channel_block_data, + std::vector<Matrix>& g_times_cs, std::vector<Matrix>& vs) const { + assert(g_times_cs.empty() == vs.empty()); + if (g_times_cs.empty()) { + // Executed the first iteration only. + g_times_cs.reserve(NAntennas()); + vs.reserve(NAntennas()); + } else { + // Note clear() does not modify the capacity. + // See https://en.cppreference.com/w/cpp/container/vector/clear + g_times_cs.clear(); + vs.clear(); + } + + // Update the size of the model matrix and initialize them to zero. + for (size_t ant = 0; ant != NAntennas(); ++ant) { + // Model matrix [N x D] and visibility vector [N x 1] + const size_t n_visibilities = channel_block_data.NAntennaVisibilities(ant); + const size_t m = n_visibilities * 4; + const size_t n = NDirections(); + const size_t nrhs = 1; + + g_times_cs.emplace_back(m, n); + vs.emplace_back(std::max(m, n), nrhs); + } +} + } // namespace ddecal } // namespace dp3 diff --git a/ddecal/gain_solvers/ScalarSolver.h b/ddecal/gain_solvers/ScalarSolver.h index 62fba9e10..9c48015ae 100644 --- a/ddecal/gain_solvers/ScalarSolver.h +++ b/ddecal/gain_solvers/ScalarSolver.h @@ -24,6 +24,16 @@ class ScalarSolver final : public SolverBase { std::vector<Matrix>& vs, const std::vector<DComplex>& solutions, std::vector<DComplex>& next_solutions); + + /** + * Initialize the model matrix for a channel block. + * + * The number of elements in the model matrix depends on the number of + * antenna visibilities in the corresponding channel block. + */ + void InitializeModelMatrix( + const SolveData::ChannelBlockData& channel_block_data, + std::vector<Matrix>& g_times_cs, std::vector<Matrix>& vs) const; }; } // namespace ddecal -- GitLab