From d9f199f98c68fd689bb4eb8e35f805cc17bb2ef0 Mon Sep 17 00:00:00 2001 From: mancini <mancini@astron.nl> Date: Fri, 7 Jun 2024 11:22:23 +0200 Subject: [PATCH] First implementation of performance tests --- CMakeLists.txt | 32 +++- steps/ApplyBeam.cc | 282 +++++++++++++++++++++++----- steps/ApplyBeam.h | 8 + steps/test/performance/applybeam.cc | 177 +++++++++++++++++ 4 files changed, 450 insertions(+), 49 deletions(-) create mode 100644 steps/test/performance/applybeam.cc diff --git a/CMakeLists.txt b/CMakeLists.txt index f6e84ee97..869fd8e61 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -72,11 +72,11 @@ option(BUILD_TESTING "Include tests in the build" OFF) set(CMAKE_MODULE_PATH ${CMAKE_SOURCE_DIR}/CMake) add_compile_options( + -fopenmp -Wall -Wnon-virtual-dtor -Wzero-as-null-pointer-constant -Wduplicated-branches - -Wundef -Wvla -Wpointer-arith -Wextra @@ -226,7 +226,7 @@ endif() # Prevent accidentally finding old BoostConfig.cmake file from casapy set(Boost_NO_BOOST_CMAKE ON) unset(BOOST_MINIMUM_VERSION) -set(BOOST_COMPONENTS "filesystem;program_options;system") +set(BOOST_COMPONENTS "filesystem;program_options;system;unit_test_framework") if(BUILD_TESTING) # Boost 1.59 introduced BOOST_TEST. Many tests use this feature. set(BOOST_MINIMUM_VERSION 1.59) @@ -687,6 +687,34 @@ target_link_libraries(showsourcedb ${SOURCEDB_LIBRARIES}) add_executable(msoverview base/msoverview.cc base/MS.cc) target_link_libraries(msoverview ${CASACORE_LIBRARIES}) +include(FetchContent) + + +set(BENCHMARK_ENABLE_GTEST_TESTS + OFF + CACHE INTERNAL "Download GTest sources") + +FetchContent_Declare( + googlebenchmark + GIT_REPOSITORY https://github.com/google/benchmark.git + GIT_TAG v1.8.3 # Optionally specify the exact tag or commit to fetch + CMAKE_ARGS "-DBENCHMARK_DOWNLOAD_DEPENDENCIES=True") + +# Make the fetched content available. +FetchContent_MakeAvailable(googlebenchmark) + +set(COMPILER_FLAGS "-O3;-march=native;-ggdb;-fno-omit-frame-pointer;-fopenmp") +# Add the benchmark executable +file(GLOB BENCHMARK_SOURCES "steps/test/performance/*.cc") +add_executable(microbenchmarks ${BENCHMARK_SOURCES} ${KERNEL_SOURCES}) +# Link against Google Benchmark +target_link_libraries(microbenchmarks benchmark::benchmark) +target_include_directories(microbenchmarks PRIVATE PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${EVERYBEAM_INCLUDE_DIR}) +target_link_libraries(microbenchmarks LIBDP3 ${HDF5_LIBRARIES} ${EVERYBEAM_LIB}) +target_compile_options(microbenchmarks PUBLIC ${COMPILER_FLAGS}) + +target_compile_options(LIBDP3 PUBLIC -fno-omit-frame-pointer PUBLIC -ggdb3) + install(TARGETS DP3 makesourcedb showsourcedb msoverview DESTINATION bin) # Install a script that warns users that DP3 is the new name of the executable diff --git a/steps/ApplyBeam.cc b/steps/ApplyBeam.cc index 88a253558..2a2a64238 100644 --- a/steps/ApplyBeam.cc +++ b/steps/ApplyBeam.cc @@ -282,9 +282,6 @@ void ApplyBeam::applyBeam(const DPInfo& info, double time, T* data0, // Get the beam values for each station. const size_t n_channels = info.chanFreqs().size(); const size_t n_baselines = info.nbaselines(); - // std::vector<std::array<std::complex<double>, 4>> beam_values_normal; - // beam_values_normal.resize(beam_values.size()); - std::unique_ptr<everybeam::pointresponse::PointResponse> point_response = telescope->GetPointResponse(time); @@ -300,14 +297,6 @@ void ApplyBeam::applyBeam(const DPInfo& info, double time, T* data0, case everybeam::CorrectionMode::kElement: // Fill beam_values for channel ch for (size_t st = 0; st < n_stations; ++st) { - /* - auto val = point_response->Response( - mode, station_indices[st], info.chanFreqs()[ch], srcdir, mutex); - beam_values_normal[n_channels * st + ch][0] = val[0]; - beam_values_normal[n_channels * st + ch][1] = val[1]; - beam_values_normal[n_channels * st + ch][2] = val[2]; - beam_values_normal[n_channels * st + ch][3] = val[3]; - */ beam_values[n_channels * st + ch] = point_response->Response( mode, station_indices[st], info.chanFreqs()[ch], srcdir, mutex); if (invert) { @@ -343,56 +332,248 @@ void ApplyBeam::applyBeam(const DPInfo& info, double time, T* data0, // Apply beam for channel ch on all baselines // For mode=ARRAY_FACTOR, too much work is done here because we know // that r and l are diagonal - std::array<size_t, 1> shape{n_baselines * n_channels * 4}; - xt::xtensor<T, 1> data_tensor(shape); - memcpy(data_tensor.data(), data0, shape[0] * sizeof(T)); for (size_t bl = 0; bl < n_baselines; ++bl) { - T* data = data_tensor.data(); - data += bl * 4 * n_channels + - ch * 4; // data0 + bl * 4 * n_channels + ch * 4; - // const aocommon::MC2x2F mat(data); - const aocommon::MC2x2F mat(data0 + bl * 4 * n_channels + ch * 4); - // std::array<size_t, 2> shape = {2, 2}; - /* - auto mat = xt::adapt(data, 4U, xt::no_ownership(), shape); - xt::xtensor<std::complex<double>, 2> mat(shape); - mat(0, 0) = data[0]; - mat(0, 1) = data[1]; - mat(1, 0) = data[2]; - mat(1, 1) = data[3]; - */ + T* data = data0 + bl * 4 * n_channels + ch * 4; + const aocommon::MC2x2F mat(data); // If the beam is the same for all stations (i.e. when n_stations = // 1), all baselines will have the same beam values size_t index_left = (n_stations == 1 ? ch : n_channels * info.getAnt1()[bl] + ch); size_t index_right = (n_stations == 1 ? ch : n_channels * info.getAnt2()[bl] + ch); - /* - auto left = xt::adapt(beam_values_normal[index_left].data(), 4U, - xt::no_ownership(), shape); - auto right = xt::adapt(beam_values_normal[index_right].data(), 4U, - xt::no_ownership(), shape); - - auto result = xt::linalg::dot( - left, xt::linalg::dot(xt::transpose(xt::conj(mat)), right)); - */ + const aocommon::MC2x2F left(beam_values[index_left]); const aocommon::MC2x2F right(beam_values[index_right]); const aocommon::MC2x2F result = left * mat.MultiplyHerm(right); - // const aocommon::MC2x2 result{res.data()}; - result.AssignTo(data); - /* - mat(0, 0) = result(0, 0); - mat(1, 0) = result(1, 0); - mat(0, 1) = result(0, 1); - mat(1, 1) = result(1, 1); -*/ + result.AssignTo(data); if (doUpdateWeights) { - // ApplyCal::ApplyWeights(left, right, - // weight0 + bl * 4 * n_channels + ch * 4); + ApplyCal::ApplyWeights(left, right, + weight0 + bl * 4 * n_channels + ch * 4); + } + } + } +} + +template <typename T, typename D> +void assign(xt::xtensor<T, 3> mat, D other_mat, size_t st) { + mat(st, 0, 0) = other_mat[0]; + mat(st, 0, 1) = other_mat[1]; + mat(st, 1, 0) = other_mat[2]; + mat(st, 1, 1) = other_mat[3]; +} + +void compute_beam(everybeam::pointresponse::PointResponse& point_response, + std::vector<aocommon::MC2x2>& beam_values, + xt::xtensor<std::complex<float>, 3>& left, + xt::xtensor<std::complex<float>, 3>& right, bool invert, + const std::vector<size_t>& station_indices, size_t n_stations, + everybeam::CorrectionMode mode, size_t n_channels, size_t ch, + everybeam::vector3r_t srcdir, double frequency, + std::mutex* mutex) { + switch (mode) { + case everybeam::CorrectionMode::kFull: + case everybeam::CorrectionMode::kElement: + // Fill beam_values for channel ch + for (size_t st = 0; st < n_stations; ++st) { + const aocommon::MC2x2 tmp = point_response.Response( + mode, station_indices[st], frequency, srcdir, mutex); + beam_values[n_channels * st + ch] = tmp; + left(st, 0, 0) = tmp[0]; + left(st, 0, 1) = tmp[1]; + left(st, 1, 0) = tmp[2]; + left(st, 1, 1) = tmp[3]; + xt::view(right, st, xt::all(), xt::all()) = + xt::transpose(xt::conj(xt::view(left, st, xt::all(), xt::all()))); + + if (invert) { + // Terminate if the matrix is not invertible. + [[maybe_unused]] bool status = + beam_values[n_channels * st + ch].Invert(); + assert(status); + } + } + break; + case everybeam::CorrectionMode::kArrayFactor: { + aocommon::MC2x2 af_tmp; + // Fill beam_values for channel ch + for (size_t st = 0; st < n_stations; ++st) { + af_tmp = point_response.Response(mode, station_indices[st], frequency, + srcdir, mutex); + + if (invert) { + af_tmp[0] = 1. / af_tmp[0]; + af_tmp[3] = 1. / af_tmp[3]; + } + assign(left, af_tmp, st); + xt::view(right, st, xt::all(), xt::all()) = + xt::transpose(xt::conj(xt::view(left, st, xt::all(), xt::all()))); + beam_values[n_channels * st + ch] = af_tmp; } + break; } + case everybeam::CorrectionMode::kNone: // this should not happen + for (size_t st = 0; st < n_stations; ++st) { + beam_values[n_channels * st + ch] = aocommon::MC2x2::Unity(); + /* + left[st] = aocommon::MC2x2F::Unity(); + right[st] = aocommon::MC2x2F::Unity(); + */ + } + break; + } +} + +inline void multiply_mat(const float* a, const float* b, float* c, float sign) { + c[0] += sign * (a[0] * b[0] + a[1] * b[2]); + c[1] += sign * (a[0] * b[1] + a[1] * b[3]); + c[2] += sign * (a[2] * b[0] + a[3] * b[2]); + c[3] += sign * (a[2] * b[1] + a[3] * b[3]); +} + +inline void multiply_mat_cmpl(const float* a, const float* b, float* c, + float sign) { + c[0] += sign * (a[0] * b[0] + a[1] * b[2]); + c[1] += sign * (a[0] * b[1] + a[1] * b[3]); + c[2] += sign * (a[2] * b[0] + a[3] * b[2]); + c[3] += sign * (a[2] * b[1] + a[3] * b[3]); +} + +void multiply_complex(const std::complex<float>* a, + const std::complex<float>* b, std::complex<float>* c) { + const float a_real[] = {a[0].real(), a[1].real(), a[2].real(), a[3].real()}; + const float b_real[] = {b[0].real(), b[1].real(), b[2].real(), b[3].real()}; + const float a_imag[] = {a[0].imag(), a[1].imag(), a[2].imag(), a[3].imag()}; + const float b_imag[] = {b[0].imag(), b[1].imag(), b[2].imag(), b[3].imag()}; + + float c_real[4] = {0, 0, 0, 0}; + float c_imag[4] = {0, 0, 0, 0}; + + multiply_mat(a_real, b_real, c_real, 1.0f); + multiply_mat(a_imag, b_imag, c_real, -1.0f); + multiply_mat(a_real, b_imag, c_imag, 1.0f); + multiply_mat(a_imag, b_real, c_imag, 1.0f); + + c[0] = {c_real[0], c_imag[0]}; + c[1] = {c_real[1], c_imag[1]}; + c[2] = {c_real[2], c_imag[2]}; + c[3] = {c_real[3], c_imag[3]}; +} + +void multiply_complex_batch(const std::complex<float>* a, + const std::complex<float>* b, + std::complex<float>* c, size_t batch_size) { + float* a_ptr = reinterpret_cast<float*>(const_cast<std::complex<float>*>(a)); + float* b_ptr = reinterpret_cast<float*>(const_cast<std::complex<float>*>(b)); + float* c_ptr = reinterpret_cast<float*>(c); +#pragma omp simd + for (size_t s = 0; s < batch_size; s++) { + const float a_00_real = a_ptr[s * 8 + 0]; + const float a_00_imag = a_ptr[s * 8 + 1]; + const float a_01_real = a_ptr[s * 8 + 2]; + const float a_01_imag = a_ptr[s * 8 + 3]; + const float a_10_real = a_ptr[s * 8 + 4]; + const float a_10_imag = a_ptr[s * 8 + 5]; + const float a_11_real = a_ptr[s * 8 + 6]; + const float a_11_imag = a_ptr[s * 8 + 7]; + const float b_00_real = b_ptr[s * 8 + 0]; + const float b_00_imag = b_ptr[s * 8 + 1]; + const float b_01_real = b_ptr[s * 8 + 2]; + const float b_01_imag = b_ptr[s * 8 + 3]; + const float b_10_real = b_ptr[s * 8 + 4]; + const float b_10_imag = b_ptr[s * 8 + 5]; + const float b_11_real = b_ptr[s * 8 + 6]; + const float b_11_imag = b_ptr[s * 8 + 7]; + + c_ptr[s * 8 + 0] = a_00_real * b_00_real + a_01_real * b_10_real; + c_ptr[s * 8 + 0] -= a_00_imag * b_00_imag + a_01_imag * b_10_imag; + c_ptr[s * 8 + 1] = a_00_real * b_00_imag + a_01_real * b_10_imag; + c_ptr[s * 8 + 1] += a_00_imag * b_00_real + a_01_imag * b_10_real; + c_ptr[s * 8 + 2] = a_00_real * b_01_real + a_01_real * b_11_real; + c_ptr[s * 8 + 2] -= a_00_imag * b_01_imag + a_01_imag * b_11_imag; + c_ptr[s * 8 + 3] = a_00_real * b_01_imag + a_01_real * b_11_imag; + c_ptr[s * 8 + 3] += a_00_imag * b_01_real + a_01_imag * b_11_real; + c_ptr[s * 8 + 4] = a_10_real * b_00_real + a_11_real * b_10_real; + c_ptr[s * 8 + 4] -= a_10_imag * b_00_imag + a_11_imag * b_10_imag; + c_ptr[s * 8 + 5] = a_10_real * b_00_imag + a_11_real * b_10_imag; + c_ptr[s * 8 + 5] += a_10_imag * b_00_real + a_11_imag * b_10_real; + c_ptr[s * 8 + 6] = a_10_real * b_01_real + a_11_real * b_11_real; + c_ptr[s * 8 + 6] -= a_10_imag * b_01_imag + a_11_imag * b_11_imag; + c_ptr[s * 8 + 7] = a_10_real * b_01_imag + a_11_real * b_11_imag; + c_ptr[s * 8 + 7] += a_10_imag * b_01_real + a_11_imag * b_11_real; + } +} + +template <typename T> +void apply_beam(xt::xtensor<std::complex<float>, 3>& left_beam_values, + xt::xtensor<std::complex<float>, 3>& right_beam_values, + T* data0, const std::vector<int>& ant1, + const std::vector<int>& ant2, size_t n_baselines, + size_t n_stations, size_t n_channels) { + // Apply beam for channel ch on all baselines + // For mode=ARRAY_FACTOR, too much work is done here because we know + // that r and l are diagonal + + const std::array<size_t, 3> shape = {n_channels, 2, 2}; + xt::xtensor<std::complex<float>, 3> tmp(shape); + for (size_t bl = 0; bl < n_baselines; ++bl) { + const int ant1_bl = ant1[bl]; + const int ant2_bl = ant2[bl]; + + T* mat = data0 + bl * 4 * n_channels; + // If the beam is the same for all stations (i.e. when n_stations = + // 1), all baselines will have the same beam values + const size_t index_left = ant1_bl * n_channels; + const size_t index_right = ant2_bl * n_channels; + + multiply_complex_batch(mat, right_beam_values.data() + index_right, + tmp.data(), n_channels); + multiply_complex_batch(left_beam_values.data() + index_left, tmp.data(), + mat, n_channels); + } +} + +template <typename T> +void ApplyBeam::applyBeamFast(const DPInfo& info, double time, T* data0, + float* weight0, + const everybeam::vector3r_t& srcdir, + const everybeam::telescope::Telescope* telescope, + std::vector<aocommon::MC2x2>& beam_values, + bool invert, everybeam::CorrectionMode mode, + bool doUpdateWeights, std::mutex* mutex) { + // Get the beam values for each station. + const size_t n_channels = info.chanFreqs().size(); + const size_t n_baselines = info.nbaselines(); + + std::unique_ptr<everybeam::pointresponse::PointResponse> point_response = + telescope->GetPointResponse(time); + + const std::vector<size_t> station_indices = + base::SelectStationIndices(*telescope, info.antennaNames()); + + const size_t n_stations = station_indices.size(); + const std::vector<double>& channel_frequencies = info.chanFreqs(); + const std::vector<int>& ant1 = info.getAnt1(); + const std::vector<int>& ant2 = info.getAnt2(); + std::array<size_t, 3> shape = {n_stations * n_channels, 2, 2}; + xt::xtensor<std::complex<float>, 3> left(shape), right(shape); + + // Apply the beam values of both stations to the ApplyBeamed data. + for (size_t ch = 0; ch < n_channels; ++ch) { + const double channel_frequency = channel_frequencies[ch]; + compute_beam(*point_response, beam_values, left, right, invert, + station_indices, n_stations, mode, n_channels, ch, srcdir, + channel_frequency, mutex); + } + // Apply beam for channel ch on all baselines + // For mode=ARRAY_FACTOR, too much work is done here because we know + // that r and l are diagonal + apply_beam(left, right, data0, ant1, ant2, n_baselines, n_stations, + n_channels); + + if (doUpdateWeights) { + // ApplyCal::ApplyWeights(left, right, + // weight0 + bl * 4 * n_channels + ch * 4); } } @@ -410,6 +591,13 @@ template void ApplyBeam::applyBeam( std::vector<aocommon::MC2x2>& beam_values, bool invert, everybeam::CorrectionMode mode, bool doUpdateWeights, std::mutex* mutex); +template void ApplyBeam::applyBeamFast( + const DPInfo& info, double time, std::complex<float>* data0, float* weight0, + const everybeam::vector3r_t& srcdir, + const everybeam::telescope::Telescope* telescope, + std::vector<aocommon::MC2x2>& beam_values, bool invert, + everybeam::CorrectionMode mode, bool doUpdateWeights, std::mutex* mutex); + void ApplyBeam::applyBeam(const DPInfo& info, double time, std::complex<double>* data0, float* weight0, const everybeam::vector3r_t& srcdir, diff --git a/steps/ApplyBeam.h b/steps/ApplyBeam.h index e0c49a072..a50dd8a91 100644 --- a/steps/ApplyBeam.h +++ b/steps/ApplyBeam.h @@ -85,6 +85,14 @@ class ApplyBeam : public Step { everybeam::CorrectionMode mode, bool doUpdateWeights = false, std::mutex* mutex = nullptr); + template <typename T> + static void applyBeamFast(const base::DPInfo& info, double time, T* data0, + float* weight0, const everybeam::vector3r_t& srcdir, + const everybeam::telescope::Telescope* telescope, + std::vector<aocommon::MC2x2>& beamValues, + bool invert, everybeam::CorrectionMode mode, + bool doUpdateWeights = false, + std::mutex* mutex = nullptr); // This method applies the beam for processing when parallelizing over // baselines. Because the beam is a per-antenna effect, this requires // synchronisation, which is performed with the provided barrier. diff --git a/steps/test/performance/applybeam.cc b/steps/test/performance/applybeam.cc new file mode 100644 index 000000000..f0171d80a --- /dev/null +++ b/steps/test/performance/applybeam.cc @@ -0,0 +1,177 @@ +#include "../../ApplyBeam.h" +#include <benchmark/benchmark.h> +#include <xtensor/xcomplex.hpp> +#include <base/DPInfo.cc> +#include <vector> +#include <random> +#include <casacore/ms/MeasurementSets.h> +#include <casacore/tables/Tables/ArrayColumn.h> +#include <casacore/tables/Tables/ScalarColumn.h> +#include <casacore/tables/Tables/Table.h> +#include <base/Telescope.h> +#include <EveryBeam/pointresponse/pointresponse.h> +#include <steps/ApplyBeam.h> +#include <random> + +#include <cstdlib> // For getenv + +std::string get_env_var(const std::string& key) { + const char* val = std::getenv(key.c_str()); + if (val == nullptr) { + return ""; // Environment variable not found + } + return std::string(val); +} + +using dp3::steps::ApplyBeam; +using dp3::steps::Step; + +std::unique_ptr<dp3::base::DPInfo> GetDPInfoFromMS(std::string ms_path) { + casacore::MeasurementSet ms(ms_path); + + casacore::Table antenna_table(ms.antenna()); + casacore::Table spectral_window(ms.spectralWindow()); + casacore::Table data_table(ms); + + casacore::ScalarColumn<casacore::String> antenna_names_column(antenna_table, + "NAME"); + casacore::ScalarColumn<casacore::Double> antenna_diameters_column( + antenna_table, "DISH_DIAMETER"); + + casacore::ArrayColumn<casacore::Double> antenna_positions_column( + antenna_table, "POSITION"); + + casacore::ArrayColumn<casacore::Double> channels_frequencies_column( + spectral_window, "CHAN_FREQ"); + casacore::ArrayColumn<casacore::Double> channels_width_column(spectral_window, + "CHAN_WIDTH"); + casacore::ScalarColumn<casacore::Int> antenna1_column(data_table, "ANTENNA1"); + casacore::ScalarColumn<casacore::Int> antenna2_column(data_table, "ANTENNA2"); + + const size_t n_channels = channels_width_column.shape(0)[0]; + const size_t n_antennas = antenna_names_column.nrow(); + const size_t n_correlations = n_antennas * (n_antennas + 1) / 2; + std::unique_ptr<dp3::base::DPInfo> info = std::make_unique<dp3::base::DPInfo>( + n_correlations, n_channels, 0U, "HBA_DUAL"); + + std::vector<casacore::MPosition> antenna_positions; + std::vector<int> ant1 = antenna1_column + .getColumnRange(casacore::Slicer( + casacore::IPosition(1, 0), + casacore::IPosition(1, n_correlations - 1), + casacore::Slicer::endIsLast)) + .tovector(); + std::vector<int> ant2 = antenna2_column + .getColumnRange(casacore::Slicer( + casacore::IPosition(1, 0), + casacore::IPosition(1, n_correlations - 1), + casacore::Slicer::endIsLast)) + .tovector(); + + std::vector<string> antenna_names; + + for (size_t i = 0; i < n_antennas; i++) { + antenna_names.push_back(antenna_names_column(i)); + std::vector<double> position = antenna_positions_column(i).tovector(); + antenna_positions.push_back(casacore::MPosition( + casacore::MVPosition{position[0], position[1], position[2]}, + casacore::MPosition::ITRF)); + } + info->setAntennas(antenna_names, + antenna_diameters_column.getColumn().tovector(), + antenna_positions, ant1, ant2); + info->setChannels(channels_frequencies_column.getColumn().tovector(), + channels_width_column.getColumn().tovector(), + channels_width_column.getColumn().tovector()); + return info; +} + +template <typename T> +void ReadData(std::vector<T>& data, const dp3::base::DPInfo& info, + std::string ms_path) { + casacore::Table data_table(ms_path); + casacore::ArrayColumn<casacore::Complex> data_column(data_table, "DATA"); + const size_t kNCorrelations = info.ncorr(); + const size_t kNChannels = info.nchan(); + const size_t kNPolarizations = 4; + data.resize(kNChannels * kNCorrelations * kNPolarizations); + std::vector<casacore::Complex> data_vec = + data_column.getColumnCells(casacore::RefRows(0, kNCorrelations - 1)) + .tovector(); + std::copy(data_vec.begin(), data_vec.end(), data.begin()); +} + +class LoadMeasurementSet : public benchmark::Fixture { + public: + void SetUp(::benchmark::State& state) { + std::string ms_path = get_env_var("MS"); + if (ms_path.compare("") == 0) { + throw std::runtime_error( + "Please set environment variable MS to point to a valid measurement " + "set"); + } + info = GetDPInfoFromMS(ms_path); + telescope = dp3::base::GetTelescope( + ms_path, everybeam::ElementResponseModel::kDefault, false); + + beam_values.resize(info->nantenna() * info->nchan()); + + ReadData(data, *info, ms_path); + weights.resize(data.size()); + std::fill(weights.begin(), weights.end(), 1.0); + size_t size_in_bytes = sizeof(std::complex<float>) * data.size(); + data_ptr = static_cast<std::complex<float>*>( + aligned_alloc(sizeof(aocommon::MC2x2F), size_in_bytes)); + beam_values.resize(info->nantenna() * info->nchan()); + memcpy(data_ptr, data.data(), size_in_bytes); + time_u = 5221188346.770621; + telescope->SetTime(time_u); + + // Create a random device to seed the random number generator + std::random_device rd; + // Initialize a Mersenne Twister random number generator + std::mt19937 gen(rd()); + + // Define the range for the random number + std::uniform_real_distribution<> distr(-1.0f, 1.0f); + const float x = distr(gen); + const float y = distr(gen); + const float z = sqrt(1.0f - (x * x) - (y * y)); + pointing = {x, y, z}; + } + void TearDown(::benchmark::State& state) {} + + std::vector<casacore::Complex> data; + std::vector<float> weights; + std::vector<aocommon::MC2x2> beam_values; + + std::unique_ptr<everybeam::telescope::Telescope> telescope; + std::unique_ptr<dp3::base::DPInfo> info; + std::complex<float>* data_ptr; + std::mutex mut; + double time_u; + everybeam::vector3r_t pointing; +}; + +BENCHMARK_DEFINE_F(LoadMeasurementSet, ApplyBeamReference) +(benchmark::State& state) { + for (auto _ : state) { + dp3::steps::ApplyBeam::applyBeam( + *info, time_u, data_ptr, weights.data(), pointing, telescope.get(), + beam_values, false, everybeam::CorrectionMode::kFull, false, &mut); + } +} + +BENCHMARK_DEFINE_F(LoadMeasurementSet, ApplyBeamBatch) +(benchmark::State& state) { + for (auto _ : state) { + dp3::steps::ApplyBeam::applyBeamFast( + *info, time_u, data_ptr, weights.data(), pointing, telescope.get(), + beam_values, false, everybeam::CorrectionMode::kFull, false, &mut); + } +} + +BENCHMARK_REGISTER_F(LoadMeasurementSet, ApplyBeamReference)->Iterations(100); +BENCHMARK_REGISTER_F(LoadMeasurementSet, ApplyBeamBatch)->Iterations(100); + +BENCHMARK_MAIN(); -- GitLab