diff --git a/CMakeLists.txt b/CMakeLists.txt index 74d8f4150e9a3bda53108ffeec6fe0f40f7943ac..47bc2e8735199e14d344adf37a67a6282f3a9887 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -278,7 +278,10 @@ endif() # Include aocommon include_directories(${CMAKE_SOURCE_DIR}/external/aocommon/include/) +# TODO(AST-1278): Try using a new (release) version of XSimd in aocommon. +set(XSIMD_GIT_TAG 2f5eddf8912c7e2527f0c50895c7560b964d29af) include(external/aocommon/CMake/FetchXTensor.cmake) +add_compile_definitions(XTENSOR_USE_XSIMD) # Include pybind11 set(PYTHON_EXECUTABLE "${Python3_EXECUTABLE}") @@ -370,7 +373,7 @@ add_library( ddecal/gain_solvers/SolverTools.cc ddecal/linear_solvers/LLSSolver.cc ${DDE_ARMADILLO_FILES}) -target_link_libraries(DDECal xtensor) +target_link_libraries(DDECal xsimd xtensor) # 'Base' files and steps are only included in DP3. add_library( @@ -449,7 +452,7 @@ add_library( steps/ApplyBeam.cc steps/DemixerNew.cc steps/NullStokes.cc) -target_link_libraries(DP3_OBJ xtensor) +target_link_libraries(DP3_OBJ xsimd xtensor) add_library( ParmDB OBJECT diff --git a/antennaflagger/Flagger.cc b/antennaflagger/Flagger.cc index a4297c502387625e5659694a214deb4c9866a9b1..0e4f20f1bef3bbcdd57b0190968fb3df37bad1b8 100644 --- a/antennaflagger/Flagger.cc +++ b/antennaflagger/Flagger.cc @@ -17,30 +17,29 @@ #include "Flagger.h" namespace { -xt::xtensor<bool, 1> FindOutliers(float sigma, size_t max_iterations, - const xt::xtensor<float, 1>& data) { +xt::xtensor<int, 1> FindOutliers(float sigma, size_t max_iterations, + xt::xtensor<float, 1>&& data) { // All non finite numbers are flagged as outlier - xt::xtensor<bool, 1> flags = !xt::isfinite(data); - xt::xtensor<float, 1> data_copy = data; + xt::xtensor<int, 1> flags = !xt::isfinite(data); // Take the median of the finite numbers only, otherwise the median is NaN. const float median = xt::median(xt::filter(data, !flags)); // Overwrite NaN's with median to workaround limitation of xt::stddev. - xt::masked_view(data_copy, flags) = median; + xt::masked_view(data, flags) = median; for (size_t i = 0; i < max_iterations; ++i) { - const float stddev = xt::stddev(data_copy)(); + const float stddev = xt::stddev(data)(); size_t outlier_count = 0; const float lower_bound = median - (sigma * stddev); const float upper_bound = median + (sigma * stddev); for (size_t j = 0; j < data.size(); ++j) { - const float value = data_copy[j]; + const float value = data[j]; if (!flags(j) && (value < lower_bound || value > upper_bound)) { flags(j) = true; - data_copy(j) = median; + data(j) = median; ++outlier_count; } } @@ -102,32 +101,30 @@ xt::xtensor<std::complex<float>, 3> Flagger::GroupStats( return stats_antenna; } -xt::xtensor<bool, 2> Flagger::ComputeAntennaFlags( +xt::xtensor<int, 2> Flagger::ComputeAntennaFlags( float sigma, int max_iterations, const xt::xtensor<std::complex<float>, 3>& stats) { const size_t n_stations = stats.shape(0); const size_t n_antennas_per_station = stats.shape(1); const size_t n_correlations = stats.shape(2); - xt::xtensor<bool, 2> flags({n_stations, n_antennas_per_station}, false); + xt::xtensor<int, 2> flags({n_stations, n_antennas_per_station}, false); for (size_t station = 0; station < n_stations; ++station) { for (size_t cor = 0; cor < n_correlations; ++cor) { - const auto check_sum = + const xt::xtensor<std::complex<float>, 1> check_sum = xt::log(1.0f + xt::view(stats, station, xt::all(), cor)); - const xt::xtensor<float, 1> check_sum_real = xt::real(check_sum); - const xt::xtensor<float, 1> check_sum_imag = xt::imag(check_sum); xt::view(flags, station, xt::all()) |= - FindOutliers(sigma, max_iterations, check_sum_real) | - FindOutliers(sigma, max_iterations, check_sum_imag); + FindOutliers(sigma, max_iterations, xt::real(check_sum)) | + FindOutliers(sigma, max_iterations, xt::imag(check_sum)); } } return flags; } -xt::xtensor<bool, 2> Flagger::ComputeStationFlags( +xt::xtensor<int, 2> Flagger::ComputeStationFlags( float sigma, int max_iterations, const xt::xtensor<std::complex<float>, 3>& stats) { const size_t n_stations = stats.shape(0); @@ -136,7 +133,7 @@ xt::xtensor<bool, 2> Flagger::ComputeStationFlags( // In case of n_correlations == 4, use only XX and YY (index 0 and 3), // otherwise use only XX (index 0). const size_t n_correlations_out = n_correlations == 4 ? 2 : 1; - xt::xtensor<bool, 2> flags({n_stations, n_correlations_out}, false); + xt::xtensor<int, 2> flags({n_stations, n_correlations_out}, false); const xt::xtensor<float, 3> stats_real = xt::real(stats); const xt::xtensor<float, 3> stats_imag = xt::imag(stats); @@ -164,14 +161,14 @@ xt::xtensor<bool, 2> Flagger::ComputeStationFlags( const float scale_real = xt::median(stats_real_cor); const float scale_imag = xt::median(stats_imag_cor); - const xt::xtensor<float, 1> median_real = + xt::xtensor<float, 1> median_real = xt::median(stats_real_cor, 1) / scale_real; - const xt::xtensor<float, 1> median_imag = + xt::xtensor<float, 1> median_imag = xt::median(stats_imag_cor, 1) / scale_imag; xt::view(flags, xt::all(), cor_out) = - FindOutliers(sigma, max_iterations, median_real) | - FindOutliers(sigma, max_iterations, median_imag); + FindOutliers(sigma, max_iterations, std::move(median_real)) | + FindOutliers(sigma, max_iterations, std::move(median_imag)); } return flags; @@ -200,44 +197,44 @@ void Flagger::AssertStatsComputed() const { assert(stats_antenna_stddev_.shape() == stats_antenna_sum_square_.shape()); } -xt::xtensor<bool, 1> Flagger::FindBadAntennas(float sigma, int max_iterations) { +xt::xtensor<int, 1> Flagger::FindBadAntennas(float sigma, int max_iterations) { AssertStatsComputed(); find_bad_antennas_timer_.start(); - const xt::xtensor<bool, 2> flags_stddev = Flagger::ComputeAntennaFlags( + const xt::xtensor<int, 2> flags_stddev = Flagger::ComputeAntennaFlags( sigma, max_iterations, stats_antenna_stddev_); assert(flags_stddev.shape(0) == n_stations_); assert(flags_stddev.shape(1) == n_antennas_per_station_); - const xt::xtensor<bool, 2> flags_sum_square = Flagger::ComputeAntennaFlags( + const xt::xtensor<int, 2> flags_sum_square = Flagger::ComputeAntennaFlags( sigma, max_iterations, stats_antenna_sum_square_); - const xt::xtensor<bool, 1> flagged_antennas = - xt::flatten(flags_stddev) && xt::flatten(flags_sum_square); + const xt::xtensor<int, 1> flagged_antennas = + xt::flatten(flags_stddev) & xt::flatten(flags_sum_square); find_bad_antennas_timer_.stop(); return flagged_antennas; } -xt::xtensor<bool, 1> Flagger::FindBadStations(float sigma, int max_iterations) { +xt::xtensor<int, 1> Flagger::FindBadStations(float sigma, int max_iterations) { AssertStatsComputed(); find_bad_stations_timer_.start(); - const xt::xtensor<bool, 2> flags_stddev = Flagger::ComputeStationFlags( + const xt::xtensor<int, 2> flags_stddev = Flagger::ComputeStationFlags( sigma, max_iterations, stats_antenna_stddev_); assert(flags_stddev.shape(0) == n_stations_); assert(flags_stddev.shape(1) == (n_correlations_ == 4 ? 2 : 1)); - const xt::xtensor<bool, 2> flags_sum_square = Flagger::ComputeStationFlags( + const xt::xtensor<int, 2> flags_sum_square = Flagger::ComputeStationFlags( sigma, max_iterations, stats_antenna_sum_square_); const xt::xtensor<size_t, 1> station_indices = xt::flatten_indices( xt::where(xt::prod(flags_stddev & flags_sum_square, {1}))); - xt::xtensor<bool, 1> antenna_flags({n_antennas_}, false); + xt::xtensor<int, 1> antenna_flags({n_antennas_}, false); for (size_t station : station_indices) { const size_t first_antenna = station * n_antennas_per_station_; diff --git a/antennaflagger/Flagger.h b/antennaflagger/Flagger.h index c28a8efa4f4ae5f6223ce5abc086bb8f721b5d81..3eada63d0589854c718ede86ee90244f68497de4 100644 --- a/antennaflagger/Flagger.h +++ b/antennaflagger/Flagger.h @@ -1,11 +1,11 @@ -// Copyright (C) 2022 ASTRON (Netherlands Institute for Radio Astronomy) +// Copyright (C) 2023 ASTRON (Netherlands Institute for Radio Astronomy) // SPDX-License-Identifier: GPL-3.0-or-later #ifndef DP3_ANTENNAFLAGGER_FLAGGER_H_ #define DP3_ANTENNAFLAGGER_FLAGGER_H_ -#include <vector> #include <complex> +#include <vector> #include <aocommon/xt/span.h> @@ -63,17 +63,18 @@ class Flagger { * @return Flags as a tensor of booleans of length n_stations * * n_antennas_per_station. */ - xt::xtensor<bool, 1> FindBadAntennas(float sigma, int max_iterations); + xt::xtensor<int, 1> FindBadAntennas(float sigma, int max_iterations); /** * Identify stations that are outliers compared to the other stations. * In case of n_antennas_per_station > 1, a per-station statistic over all * antennas in a station is used. * - * @return Flags as a tensor of booleans of length n_stations * - * n_antennas_per_station. + * @return Flags as a tensor of integers of length n_stations * + * n_antennas_per_station. The booleans are 'int' instead of 'bool' since + * XSimd otherwise converts booleans to integers and back. */ - xt::xtensor<bool, 1> FindBadStations(float sigma, int max_iterations); + xt::xtensor<int, 1> FindBadStations(float sigma, int max_iterations); /** * Compute standard deviation for the real and imaginary component seperately @@ -111,10 +112,11 @@ class Flagger { * The input is assumed to be in the form (n_stations, n_antennas_per_station, * n_correlations). * - * @return Flags as a tensor of booleans in the form (n_stations, - * n_antennas_per_station). + * @return Flags as a tensor of integers in the form (n_stations, + * n_antennas_per_station). The flags are 'int' instead of 'bool' since + * XSimd otherwise converts booleans to integers and back. */ - static xt::xtensor<bool, 2> ComputeAntennaFlags( + static xt::xtensor<int, 2> ComputeAntennaFlags( float sigma, int max_iterations, const xt::xtensor<std::complex<float>, 3>& stats); @@ -125,8 +127,10 @@ class Flagger { * the form (n_stations, n_antennas_per_station, n_correlations). * * @return Flags as a tensor of booleans in the form (n_stations, 1 || 2). + * The booleans are 'int' instead of 'bool' since XSimd otherwise converts + * booleans to integers and back. */ - static xt::xtensor<bool, 2> ComputeStationFlags( + static xt::xtensor<int, 2> ComputeStationFlags( float sigma, int max_iterations, const xt::xtensor<std::complex<float>, 3>& stats); diff --git a/ddecal/constraints/AntennaConstraint.h b/ddecal/constraints/AntennaConstraint.h index d5e3e431d2e6c0f1044753456324995789476e2e..4481bf79e1d33182499072d7165ed6ef937188dc 100644 --- a/ddecal/constraints/AntennaConstraint.h +++ b/ddecal/constraints/AntennaConstraint.h @@ -9,6 +9,8 @@ #include <set> #include <vector> +#include <xtensor/xcomplex.hpp> +#include <xtensor/xmasked_view.hpp> #include <xtensor/xview.hpp> namespace dp3 { @@ -47,17 +49,24 @@ class AntennaConstraint final : public Constraint { xt::xtensor<size_t, 2> solution_count_per_set = xt::zeros<size_t>({n_directions, n_polarizations}); + // Declaring these variables outside the loop allows reusing memory. + xt::xtensor<dcomplex, 2> solution_set; + xt::xtensor<std::size_t, 2> solution_set_is_finite; + // Calculate the sum of solutions over the set of stations for (size_t antenna_index : antenna_set) { - auto solution_set = + solution_set = xt::view(solutions, ch, antenna_index, xt::all(), xt::all()); - auto solution_set_is_finite = xt::isfinite(solution_set); - solution_per_set += solution_set_is_finite * solution_set; + solution_set_is_finite = xt::isfinite(solution_set); + xt::masked_view(solution_set, !solution_set_is_finite) + .fill(std::complex<double>(0.0, 0.0)); + + solution_per_set += solution_set; solution_count_per_set += solution_set_is_finite; } // Divide by nr of core stations to get the mean solution - solution_per_set /= solution_count_per_set; + solution_per_set /= xt::cast<double>(solution_count_per_set); // Assign all core stations to the mean solution for (size_t antenna_index : antenna_set) { diff --git a/ddecal/gain_solvers/SolverTools.cc b/ddecal/gain_solvers/SolverTools.cc index 77b29c027f3e1550fed6cb722de70869601b925d..488129eabe2b9348ea80a2016ea6e917af50c579 100644 --- a/ddecal/gain_solvers/SolverTools.cc +++ b/ddecal/gain_solvers/SolverTools.cc @@ -42,7 +42,10 @@ void AssignAndWeight( // flagged. Although the initialization to false can be avoided by directly // assigning flags for the first correlation, a benchmark showed that the // (simpler) code below gives slightly better performance. - xt::xtensor<bool, 2> flags( + // Use 'int' instead of 'bool' for flags since XSimd uses integers when + // or'ing flags. Using integers directly prevents unpacking booleans to + // integers beforehand and packing integers to booleans afterwards. + xt::xtensor<int, 2> flags( {unweighted_data.shape(0), unweighted_data.shape(1)}, false); for (std::size_t correlation = 0; correlation < n_correlations; ++correlation) { @@ -50,8 +53,9 @@ void AssignAndWeight( !xt::isfinite(xt::view(unweighted_data, xt::all(), xt::all(), correlation)); for (const std::string& name : direction_names) { - flags |= !xt::isfinite(xt::view(unweighted_buffer.GetData(name), - xt::all(), xt::all(), correlation)); + flags |= xt::cast<int>( + !xt::isfinite(xt::view(unweighted_buffer.GetData(name), xt::all(), + xt::all(), correlation))); } } diff --git a/steps/AntennaFlagger.cc b/steps/AntennaFlagger.cc index 3b5f05a9d73ba78f479f74b5bad6cd79bb0dfe53..4ecbf7ac19a12f99440e755fb30ae9ff3852bb59 100644 --- a/steps/AntennaFlagger.cc +++ b/steps/AntennaFlagger.cc @@ -100,7 +100,7 @@ bool AntennaFlagger::process(std::unique_ptr<base::DPBuffer> buffer) { flagger_->ComputeStats(buffer->GetData(), baseline_order_); // Find outlier antennas - const xt::xtensor<bool, 1> antenna_flags = + const xt::xtensor<int, 1> antenna_flags = flagger_->FindBadAntennas(antenna_flagging_sigma_, antenna_flagging_max_iterations_) | flagger_->FindBadStations(station_flagging_sigma_, diff --git a/steps/PreFlagger.cc b/steps/PreFlagger.cc index 242a4eaf3dda89bc1c368c2207226aef71586d73..2f3574e9742cc38e7acb65d2c9dc85f2e9cbe8aa 100644 --- a/steps/PreFlagger.cc +++ b/steps/PreFlagger.cc @@ -131,7 +131,7 @@ bool PreFlagger::process(std::unique_ptr<DPBuffer> buffer) { assert(buffer->GetFlags().size() != 0); // Do the PSet steps and combine the result with the current flags. // Only count if the flag changes. - const xt::xtensor<bool, 3>* const flags = + const xt::xtensor<int, 3>* const flags = itsPSet.process(*buffer, itsCount, xt::xtensor<bool, 1>(), itsTimer); switch (itsMode) { case Mode::kSetFlag: @@ -324,7 +324,7 @@ void PreFlagger::PSet::updateInfo(const DPInfo& info) { void PreFlagger::PSet::fillChannels(const DPInfo& info) { unsigned int nrcorr = info.ncorr(); unsigned int nrchan = info.nchan(); - xt::xtensor<bool, 1> selChan(std::array<size_t, 1>{nrchan}); + xt::xtensor<int, 1> selChan(std::array<size_t, 1>{nrchan}); if (itsStrChan.empty()) { selChan.fill(true); } else { @@ -449,7 +449,7 @@ void PreFlagger::PSet::show(std::ostream& os, bool showName) const { } } -xt::xtensor<bool, 3>* PreFlagger::PSet::process( +xt::xtensor<int, 3>* PreFlagger::PSet::process( DPBuffer& out, unsigned int timeSlot, const xt::xtensor<bool, 1>& matchBL, common::NSTimer& timer) { // No need to process it if the time mismatches or if only time selection. @@ -495,7 +495,7 @@ xt::xtensor<bool, 3>* PreFlagger::PSet::process( return &itsFlags; } // Convert each baseline flag to a flag per correlation/channel. - bool* flagPtr = itsFlags.data(); + int* flagPtr = itsFlags.data(); for (unsigned int i = 0; i < itsMatchBL.size(); ++i) { if (itsMatchBL[i]) { std::fill(flagPtr, flagPtr + nr, itsMatchBL[i]); @@ -525,17 +525,17 @@ xt::xtensor<bool, 3>* PreFlagger::PSet::process( // are reused to AND or OR subexpressions. This can be done harmlessly // and saves the creation of too many arrays. if (!itsPSets.empty()) { - std::stack<xt::xtensor<bool, 3>*> results; + std::stack<xt::xtensor<int, 3>*> results; for (int oper : itsRpn) { if (oper >= 0) { results.push(itsPSets[oper]->process(out, timeSlot, itsMatchBL, timer)); } else if (oper == OpNot) { - xt::xtensor<bool, 3>* left = results.top(); + xt::xtensor<int, 3>* left = results.top(); *left = !*left; } else if (oper == OpOr || oper == OpAnd) { - xt::xtensor<bool, 3>* right = results.top(); + xt::xtensor<int, 3>* right = results.top(); results.pop(); - xt::xtensor<bool, 3>* left = results.top(); + xt::xtensor<int, 3>* left = results.top(); if (oper == OpOr) { *left |= *right; } else { @@ -550,7 +550,7 @@ xt::xtensor<bool, 3>* PreFlagger::PSet::process( throw std::runtime_error( "Something went wrong while evaluating expression: results.size() != " "1"); - xt::xtensor<bool, 3>* mflags = results.top(); + xt::xtensor<int, 3>* mflags = results.top(); itsFlags &= *mflags; } return &itsFlags; @@ -709,7 +709,7 @@ void PreFlagger::PSet::flagAmpl( const xt::xtensor<float, 3> amplitudes = xt::abs(data); const float* valPtr = amplitudes.data(); - bool* flagPtr = itsFlags.data(); + int* flagPtr = itsFlags.data(); for (unsigned int i = 0; i < nr; ++i) { bool flag = false; for (unsigned int j = 0; j < nrcorr; ++j) { @@ -733,7 +733,7 @@ void PreFlagger::PSet::flagPhase( const xt::xtensor<float, 3> phases = xt::arg(data); const float* valPtr = phases.data(); - bool* flagPtr = itsFlags.data(); + int* flagPtr = itsFlags.data(); for (unsigned int i = 0; i < nr; ++i) { bool flag = false; for (unsigned int j = 0; j < nrcorr; ++j) { @@ -756,7 +756,7 @@ void PreFlagger::PSet::flagReal( const std::size_t nrcorr = data.shape(2); const std::complex<float>* valPtr = data.data(); - bool* flagPtr = itsFlags.data(); + int* flagPtr = itsFlags.data(); for (unsigned int i = 0; i < nr; ++i) { bool flag = false; for (unsigned int j = 0; j < nrcorr; ++j) { @@ -780,7 +780,7 @@ void PreFlagger::PSet::flagImag( const std::size_t nrcorr = data.shape(2); const std::complex<float>* valPtr = data.data(); - bool* flagPtr = itsFlags.data(); + int* flagPtr = itsFlags.data(); for (unsigned int i = 0; i < nr; ++i) { bool flag = false; for (unsigned int j = 0; j < nrcorr; ++j) { @@ -1058,10 +1058,10 @@ void PreFlagger::PSet::fillBLMatrix() { } } -xt::xtensor<bool, 1> PreFlagger::PSet::handleFreqRanges( +xt::xtensor<int, 1> PreFlagger::PSet::handleFreqRanges( const std::vector<double>& chanFreqs) { unsigned int nrchan = chanFreqs.size(); - xt::xtensor<bool, 1> selChan({nrchan}, false); + xt::xtensor<int, 1> selChan({nrchan}, false); // A frequency range can be given as value..value or value+-value. // Units can be given for each value; if one is given it applies to both. // Default unit is MHz. diff --git a/steps/PreFlagger.h b/steps/PreFlagger.h index a9b6e02078008350f200c5c7b2ce017bfba1e4f6..7fc2d729e93d13d2f9f86793cddc3981871a66a1 100644 --- a/steps/PreFlagger.h +++ b/steps/PreFlagger.h @@ -3,7 +3,7 @@ // SPDX-License-Identifier: GPL-3.0-or-later /// @file -/// @brief DPPP step class to flag data on channel, baseline, or time +/// @brief DP3 step class to flag data on channel, baseline, or time /// @author Ger van Diepen #ifndef DP3_STEPS_PREFLAGGER_H_ @@ -23,7 +23,7 @@ namespace dp3::steps { -/// @brief DPPP step class to flag data on channel, baseline, or time +/// @brief DP3 step class to flag data on channel, baseline, or time /// This class is a Step class flagging data points based on data /// selections given in the parset file. @@ -128,9 +128,9 @@ class PreFlagger : public Step { } /// Set and return the flags. - xt::xtensor<bool, 3>* process(base::DPBuffer&, unsigned int timeSlot, - const xt::xtensor<bool, 1>& matchBL, - common::NSTimer& timer); + xt::xtensor<int, 3>* process(base::DPBuffer&, unsigned int timeSlot, + const xt::xtensor<bool, 1>& matchBL, + common::NSTimer& timer); /// Update the general info. /// It is used to adjust the parms if needed. @@ -208,7 +208,7 @@ class PreFlagger : public Step { /// Handle the frequency ranges given and determine which channels /// have to be flagged. - xt::xtensor<bool, 1> handleFreqRanges(const std::vector<double>& chanFreqs); + xt::xtensor<int, 1> handleFreqRanges(const std::vector<double>& chanFreqs); /// Get the value and possible unit. /// If no unit is given, the argument is left untouched. @@ -267,7 +267,9 @@ class PreFlagger : public Step { std::vector<int> itsRpn; ///< PSet expression in RPN form std::vector<PSet::ShPtr> itsPSets; ///< PSets used in itsRpn xt::xtensor<bool, 2> itsChanFlags; ///< flags for channels to be flagged - xt::xtensor<bool, 3> itsFlags; + // Using 'int' instead of 'bool' in itsFlags works better with XSimd, since + // XSimd uses an 'int' representation anyway for boolean operations. + xt::xtensor<int, 3> itsFlags; xt::xtensor<bool, 1> itsMatchBL; ///< true = baseline in buffer matches }; diff --git a/steps/test/unit/tPreFlagger.cc b/steps/test/unit/tPreFlagger.cc index 3b6fe8e75fa7ed8b9884b3229b352282c1cd9673..c539fc27f5091ade3d344b6bd0ec0f7273fcf300 100644 --- a/steps/test/unit/tPreFlagger.cc +++ b/steps/test/unit/tPreFlagger.cc @@ -123,7 +123,7 @@ class TestInput : public dp3::steps::MockInput { buffer->ResizeData(shape); for (std::size_t i = 0; i < buffer->GetData().size(); ++i) { buffer->GetData().data()[i] = - std::complex<float>(i + itsCount * 10, i - 10 + itsCount * 6); + std::complex<float>(i + itsCount * 10, int(i) - 10 + itsCount * 6); } buffer->ResizeUvw(itsNBl);