diff --git a/.gitlab-ci.yml b/.gitlab-ci.yml index 376dd2fb357e1ae7cf55724d85017420e6071d07..17720a1d66083f281ead22efce83ab05ddcefda4 100644 --- a/.gitlab-ci.yml +++ b/.gitlab-ci.yml @@ -153,6 +153,8 @@ collect-performance: - python3 ci/summarize-results.py --filter DirectionsBenchmark/Directions results*.json result-summary-directions - python3 ci/summarize-results.py --filter PhasesBenchmark/ComputePhases results*.json result-summary-phases - python3 ci/summarize-results.py --filter SpectrumBenchmark/Spectrum results*.json result-summary-spectrum + - python3 ci/summarize-results.py --filter SmearTermsBenchmark/SmearTermBenchmark results*.json result-summary-smearterms + diff --git a/CMakeLists.txt b/CMakeLists.txt index 4915368919e1ca75d915d1a32e92b81fb0506e4c..ce66efeaebe37f9d2caf82e22ef5de4c1408b93c 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -35,6 +35,8 @@ find_package(BLAS REQUIRED) file(GLOB_RECURSE SOURCES src/*.cpp) file(GLOB_RECURSE HEADERS include/*.h) +find_package(OpenMP REQUIRED) + # Library target add_library(predict STATIC ${SOURCES}) @@ -45,8 +47,9 @@ target_include_directories( predict PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include> $<INSTALL_INTERFACE:include>) -target_link_libraries(predict PUBLIC xtensor xtensor-blas xsimd - ${CASACORE_LIBRARIES} ${BLAS_LIBRARIES}) +target_link_libraries( + predict PUBLIC xtensor xtensor-blas xsimd OpenMP::OpenMP_CXX + ${CASACORE_LIBRARIES} ${BLAS_LIBRARIES}) target_include_directories(predict PRIVATE ${CASACORE_INCLUDE_DIRS}) diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index edc25fa85a6540a4e2e03af3f33b0e20aabd580b..34f67ba24d2fb97bccd6f205bb5760ed16fb9e2f 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -14,8 +14,15 @@ FetchContent_MakeAvailable(googlebenchmark) set(BENCHMARKS simulator) -add_executable(microbenchmarks main.cpp predict.cpp directions.cpp spectrum.cpp - phases.cpp spectrum_cross.cpp) +add_executable( + microbenchmarks + main.cpp + predict.cpp + directions.cpp + spectrum.cpp + phases.cpp + spectrum_cross.cpp + smearterms.cpp) target_compile_options(microbenchmarks PRIVATE ${PREDICT_BUILD_ARGUMENTS}) diff --git a/benchmark/smearterms.cpp b/benchmark/smearterms.cpp new file mode 100644 index 0000000000000000000000000000000000000000..52e55dc64c102b02e5d45da564d7df00e710e0f3 --- /dev/null +++ b/benchmark/smearterms.cpp @@ -0,0 +1,76 @@ +#include <benchmark/benchmark.h> +#include <xtensor/xtensor.hpp> + +#include <Direction.h> +#include <PointSource.h> +#include <Spectrum.h> +#include <test/Common.h> + +const Direction test_reference_point{0.5, 0.1}; +const Direction test_offset_point{test_reference_point.ra + 0.02, + test_reference_point.dec + 0.02}; + +const std::vector<int64_t> n_frequencies = {128, 256, 512}; +const std::vector<int64_t> n_stations = {12, 32, 52}; + +const double kHz = 1.e3; +const double MHz = 1.e6; +namespace {} // namespace + +class SmearTermsBenchmark : public benchmark::Fixture { +public: + void SetUp(benchmark::State &state) override { + size_t n_stations = state.range(0); + size_t n_frequencies = state.range(1); + + run = MakePredictRun(test_reference_point, test_offset_point, n_stations, + n_frequencies, false, true); + + plan_exec_ptr = std::make_unique<PredictPlanExec>(run->plan); + static std::mt19937 gen(-5); + std::uniform_real_distribution<> phase_dist(-M_PI, M_PI); + for (size_t t = 0; t < n_stations; t++) { + plan_exec_ptr->station_phases(t) = phase_dist(gen); + } + } + void TearDown(benchmark::State &) override {} + +protected: + std::unique_ptr<PredictRun> run; + std::unique_ptr<PredictPlanExec> plan_exec_ptr; +}; + +BENCHMARK_DEFINE_F(SmearTermsBenchmark, SmearTermBenchmarkXTensor) +(benchmark::State &state) { + for (auto _ : state) { + plan_exec_ptr->ComputeSmearTerms(); + } +} + +BENCHMARK_DEFINE_F(SmearTermsBenchmark, SmearTermBenchmarkXOriginal) +(benchmark::State &state) { + PredictPlanExec &sim_plan_exec = *plan_exec_ptr; + for (auto _ : state) { + for (size_t bl_idx = 0; bl_idx < sim_plan_exec.baselines.size(); bl_idx++) { + const auto &baseline = sim_plan_exec.baselines[bl_idx]; + double phase_diff = sim_plan_exec.GetStationPhases()[baseline.second] - + sim_plan_exec.GetStationPhases()[baseline.first]; + + for (size_t f_idx = 0; f_idx < sim_plan_exec.channel_widths.size(); + f_idx++) { + benchmark::DoNotOptimize(ComputeSmearterm( + phase_diff, sim_plan_exec.channel_widths[f_idx] * 0.5)); + } + } + } +} + +BENCHMARK_REGISTER_F(SmearTermsBenchmark, SmearTermBenchmarkXTensor) + ->ArgsProduct({n_stations, n_frequencies}) + ->ArgNames({"#stations", "#frequencies"}) + ->Unit(benchmark::kMillisecond); + +BENCHMARK_REGISTER_F(SmearTermsBenchmark, SmearTermBenchmarkXOriginal) + ->ArgsProduct({n_stations, n_frequencies}) + ->ArgNames({"#stations", "#frequencies"}) + ->Unit(benchmark::kMillisecond); diff --git a/include/PredictPlanExec.h b/include/PredictPlanExec.h index 246bc94a471bc41c842dca0b447489789eb0fdab..c9e79d853dfc7e6465ee595bd6cc17465f5155f6 100644 --- a/include/PredictPlanExec.h +++ b/include/PredictPlanExec.h @@ -44,12 +44,7 @@ public: void FillEulerMatrix(xt::xtensor<double, 2> &mat, const double ra, const double dec); - // FIXME: move this to a more appropriate location - inline float ComputeSmearterm(double uvw, double halfwidth) { - float smearterm = static_cast<float>(uvw) * static_cast<float>(halfwidth); - return (smearterm == 0.0f) ? 1.0f - : std::fabs(std::sin(smearterm) / smearterm); - } + void ComputeSmearTerms(); const xt::xtensor<double, 2> &get_lmn() const { return lmn; } const xt::xtensor<double, 2, xt::layout_type::column_major> &get_uvw() const { @@ -60,6 +55,7 @@ public: xt::xtensor<double, 2> lmn; xt::xtensor<double, 1> station_phases; xt::xtensor<std::complex<double>, 2> shift_data; + xt::xtensor<double, 2> smear_terms; private: void Initialize(); diff --git a/include/test/Common.h b/include/test/Common.h index 6664a9262c51ed4702486923d61699da73df9d0b..62c305c80528a860caa0a753cf7a5c5d12865b84 100644 --- a/include/test/Common.h +++ b/include/test/Common.h @@ -97,4 +97,10 @@ void SetUpFixedSource(Direction &test_reference_point, } // namespace base } // namespace dp3 +inline float ComputeSmearterm(double uvw, double halfwidth) { + float smearterm = static_cast<float>(uvw) * static_cast<float>(halfwidth); + + return (smearterm == 0.0f) ? 1.0f + : std::fabs(std::sin(smearterm) / smearterm); +} #endif // PREDICT_BENCHMARK_COMMON_HPP \ No newline at end of file diff --git a/src/PredictPlanExec.cpp b/src/PredictPlanExec.cpp index 12c4480bd8dd8ed31f682dc16235db9a9a01ba25..4b91cef87ceaa30ff94e9489098a4fc8c315713a 100644 --- a/src/PredictPlanExec.cpp +++ b/src/PredictPlanExec.cpp @@ -6,7 +6,10 @@ #include "Spectrum.h" #include <xtensor-blas/xlinalg.hpp> +#include <xtensor/xadapt.hpp> +#include <xtensor/xindex_view.hpp> #include <xtensor/xtensor.hpp> +#include <xtensor/xview.hpp> void PredictPlanExec::Initialize() { // Initialize the station phases and shifts @@ -15,11 +18,14 @@ void PredictPlanExec::Initialize() { lmn.resize({nchannels, 3}); uvw.resize({nstations, 3}); frequencies.resize({nchannels}); + smear_terms = xt::ones<float>({baselines.size(), nchannels}); } void PredictPlanExec::Precompute(const PointSourceCollection &sources) { sources.direction_vector.RaDec2Lmn(reference, lmn); ComputeStationPhases(); + if (correct_frequency_smearing) + ComputeSmearTerms(); } /** @@ -89,7 +95,6 @@ void PredictPlanExec::Compute(const PointSourceCollection &sources, // Use preallocated storage (outside for loops) std::vector<double> temp_prod_real_I(nchannels); std::vector<double> temp_prod_imag_I(nchannels); - std::vector<double> smear_terms(nchannels); std::vector<double> temp_prod_real_Q; std::vector<double> temp_prod_imag_Q; @@ -121,16 +126,6 @@ void PredictPlanExec::Compute(const PointSourceCollection &sources, const size_t q = baselines[bl].second; if (p != q) { - // Precompute smearing factors if needed - if (correct_frequency_smearing) { -#pragma GCC ivdep - for (size_t ch = 0; ch < nchannels; ++ch) { - smear_terms[ch] = - ComputeSmearterm(GetStationPhases()(q) - GetStationPhases()(p), - channel_widths[ch] * 0.5); - } - } - if (compute_stokes_I_only) { #pragma GCC ivdep for (size_t ch = 0; ch < nchannels; ++ch) { @@ -158,8 +153,8 @@ void PredictPlanExec::Compute(const PointSourceCollection &sources, #pragma GCC ivdep for (size_t ch = 0; ch < nchannels; ++ch) { buffer(StokesComponent::I, ch, bl) += - dcomplex(smear_terms[ch] * temp_prod_real_I[ch], - smear_terms[ch] * temp_prod_imag_I[ch]); + dcomplex(smear_terms(bl, ch) * temp_prod_real_I[ch], + smear_terms(bl, ch) * temp_prod_imag_I[ch]); } } else { #pragma GCC ivdep @@ -169,7 +164,6 @@ void PredictPlanExec::Compute(const PointSourceCollection &sources, } } } else { - #pragma GCC ivdep for (size_t ch = 0; ch < nchannels; ++ch) { const double x_p = (GetShiftData()(p, ch).real()); @@ -217,17 +211,17 @@ void PredictPlanExec::Compute(const PointSourceCollection &sources, #pragma GCC ivdep for (size_t ch = 0; ch < nchannels; ++ch) { buffer(StokesComponent::I, ch, bl) += - dcomplex(smear_terms[ch] * temp_prod_real_I[ch], - smear_terms[ch] * temp_prod_imag_I[ch]); + dcomplex(smear_terms(bl, ch) * temp_prod_real_I[ch], + smear_terms(bl, ch) * temp_prod_imag_I[ch]); buffer(StokesComponent::Q, ch, bl) += - dcomplex(smear_terms[ch] * temp_prod_real_Q[ch], - smear_terms[ch] * temp_prod_imag_Q[ch]); + dcomplex(smear_terms(bl, ch) * temp_prod_real_Q[ch], + smear_terms(bl, ch) * temp_prod_imag_Q[ch]); buffer(StokesComponent::U, ch, bl) += - dcomplex(smear_terms[ch] * temp_prod_real_U[ch], - smear_terms[ch] * temp_prod_imag_U[ch]); + dcomplex(smear_terms(bl, ch) * temp_prod_real_U[ch], + smear_terms(bl, ch) * temp_prod_imag_U[ch]); buffer(StokesComponent::V, ch, bl) += - dcomplex(smear_terms[ch] * temp_prod_real_V[ch], - smear_terms[ch] * temp_prod_imag_V[ch]); + dcomplex(smear_terms(bl, ch) * temp_prod_real_V[ch], + smear_terms(bl, ch) * temp_prod_imag_V[ch]); } } else { #pragma GCC ivdep @@ -255,6 +249,7 @@ void PredictPlanExec::Compute(const GaussianSourceCollection &sources, xt::xtensor<std::complex<double>, 2> computed_spectrum; xt::xtensor<double, 2> xtStationUvw; xt::xtensor<double, 2> uvwShifted; + std::vector<double> angular_smear(nchannels); for (size_t source_index = 0; source_index < sources.Size(); ++source_index) { const Spectrum &spectrum = sources.spectrums[source_index]; @@ -305,8 +300,6 @@ void PredictPlanExec::Compute(const GaussianSourceCollection &sources, const double kVScale = minor_axis * kFwhm2Sigma; const double kInvCSqr = 1.0 / (casacore::C::c * casacore::C::c); - std::vector<double> smear_terms(nchannels); - for (size_t bl = 0; bl < baselines.size(); ++bl) { const size_t p = baselines[bl].first; const size_t q = baselines[bl].second; @@ -328,21 +321,10 @@ void PredictPlanExec::Compute(const GaussianSourceCollection &sources, const double uvPrime = (-2.0 * M_PI * M_PI) * (uPrime * uPrime + vPrime * vPrime); - // Precompute amplitudes -#pragma GCC ivdep for (size_t ch = 0; ch < nchannels; ++ch) { const double lambda2 = frequencies(ch) * frequencies(ch) * kInvCSqr * uvPrime; - smear_terms[ch] = exp(lambda2); - } - // Precompute smearing factors if needed and modify amplitudes - if (correct_frequency_smearing) { -#pragma GCC ivdep - for (size_t ch = 0; ch < nchannels; ++ch) { - smear_terms[ch] *= - ComputeSmearterm(GetStationPhases()(q) - GetStationPhases()(p), - channel_widths[ch] * 0.5); - } + angular_smear[ch] = exp(lambda2); } if (compute_stokes_I_only) { @@ -365,9 +347,9 @@ void PredictPlanExec::Compute(const GaussianSourceCollection &sources, q_conj_p_real * (x_c)-q_conj_p_imag * (y_c); const double temp_prod_imag_I = q_conj_p_real * (y_c) + q_conj_p_imag * (x_c); - buffer(StokesComponent::I, ch, bl) += - dcomplex(smear_terms[ch] * temp_prod_real_I, - smear_terms[ch] * temp_prod_imag_I); + buffer(StokesComponent::I, ch, bl) += dcomplex( + angular_smear[ch] * smear_terms(bl, ch) * temp_prod_real_I, + angular_smear[ch] * smear_terms(bl, ch) * temp_prod_imag_I); } } else { #pragma GCC ivdep @@ -417,21 +399,41 @@ void PredictPlanExec::Compute(const GaussianSourceCollection &sources, // The following operations are memory-bound, unlike // the compute-bound segment above. By merging these together, // an improvement in speed is expected - buffer(StokesComponent::I, ch, bl) += - dcomplex(smear_terms[ch] * temp_prod_real_I, - smear_terms[ch] * temp_prod_imag_I); - buffer(StokesComponent::Q, ch, bl) += - dcomplex(smear_terms[ch] * temp_prod_real_Q, - smear_terms[ch] * temp_prod_imag_Q); - buffer(StokesComponent::U, ch, bl) += - dcomplex(smear_terms[ch] * temp_prod_real_U, - smear_terms[ch] * temp_prod_imag_U); - buffer(StokesComponent::V, ch, bl) += - dcomplex(smear_terms[ch] * temp_prod_real_V, - smear_terms[ch] * temp_prod_imag_V); + buffer(StokesComponent::I, ch, bl) += dcomplex( + angular_smear[ch] * smear_terms(bl, ch) * temp_prod_real_I, + angular_smear[ch] * smear_terms(bl, ch) * temp_prod_imag_I); + buffer(StokesComponent::Q, ch, bl) += dcomplex( + angular_smear[ch] * smear_terms(bl, ch) * temp_prod_real_Q, + angular_smear[ch] * smear_terms(bl, ch) * temp_prod_imag_Q); + buffer(StokesComponent::U, ch, bl) += dcomplex( + angular_smear[ch] * smear_terms(bl, ch) * temp_prod_real_U, + angular_smear[ch] * smear_terms(bl, ch) * temp_prod_imag_U); + buffer(StokesComponent::V, ch, bl) += dcomplex( + angular_smear[ch] * smear_terms(bl, ch) * temp_prod_real_V, + angular_smear[ch] * smear_terms(bl, ch) * temp_prod_imag_V); } } } } // Baselines. } +} +void PredictPlanExec::ComputeSmearTerms() { + // Compute the smear terms + size_t p = 0UL; + size_t q = 0UL; + std::vector<float> channel_floats(channel_widths.size()); + std::copy(channel_widths.begin(), channel_widths.end(), + channel_floats.begin()); + for (size_t bl = 0; bl < baselines.size(); ++bl) { + std::tie(p, q) = baselines[bl]; + const float phase_diff = + static_cast<float>(station_phases[q] - station_phases[p]); + if (abs(phase_diff) > 1.e-6) [[likely]] { +#pragma omp simd + for (size_t ch = 0; ch < nchannels; ch++) { + const float shift = phase_diff * channel_floats[ch] * 0.5f; + smear_terms(bl, ch) = std::fabs(sinf(shift) / shift); + } + } + } } \ No newline at end of file diff --git a/tests/test_predictplanexec.cpp b/tests/test_predictplanexec.cpp index fdd784287007a3854cb1bdff6069e8c1037aaa54..ea055206506995b207acd86ea7fe74a623c9dcf3 100644 --- a/tests/test_predictplanexec.cpp +++ b/tests/test_predictplanexec.cpp @@ -5,9 +5,81 @@ #define BOOST_TEST_MODULE PREDICT_PLAN_EXEC_TEST #include <PredictPlanExec.h> #include <boost/test/unit_test.hpp> +#include <test/Common.h> BOOST_AUTO_TEST_SUITE(PredictPlanExecTestSuite) BOOST_AUTO_TEST_CASE(test_PredictPlanExec) { BOOST_CHECK(true); } +BOOST_AUTO_TEST_CASE(compute_smear_terms) { + static constexpr size_t test_n_stations = 4; + static constexpr size_t test_n_channels = 2; + static constexpr size_t test_n_sources = 10; + const Direction test_reference_point{0.5, 0.1}; + const Direction test_offset_point{test_reference_point.ra + 0.02, + test_reference_point.dec + 0.02}; + // Create a SimulationPlanExec object + std::unique_ptr<PredictRun> run = + MakePredictRun(test_reference_point, test_offset_point, test_n_stations, + test_n_channels, false, true); + run->Initialize(); + + PredictPlan plan; + PointSourceCollection sources; + PredictPlanExec sim_plan_exec(run->plan); + + for (size_t s = 0; s < test_n_sources; ++s) { + sources.Add(run->makeSource<PointSource>()); + } + + // Precompute the station phases and shifts + sim_plan_exec.Precompute(sources); + + for (size_t bl_idx = 0; bl_idx < sim_plan_exec.baselines.size(); bl_idx++) { + const auto &baseline = sim_plan_exec.baselines[bl_idx]; + double phase_diff = sim_plan_exec.GetStationPhases()[baseline.second] - + sim_plan_exec.GetStationPhases()[baseline.first]; + + for (size_t f_idx = 0; f_idx < sim_plan_exec.channel_widths.size(); + f_idx++) { + double expected = ComputeSmearterm( + phase_diff, sim_plan_exec.channel_widths[f_idx] * 0.5); + BOOST_CHECK_CLOSE(expected, sim_plan_exec.smear_terms(bl_idx, f_idx), + 1e-4); + } + } +} + +BOOST_AUTO_TEST_CASE(dont_compute_smear_terms) { + static constexpr size_t test_n_stations = 4; + static constexpr size_t test_n_channels = 2; + static constexpr size_t test_n_sources = 10; + const Direction test_reference_point{0.5, 0.1}; + const Direction test_offset_point{test_reference_point.ra + 0.02, + test_reference_point.dec + 0.02}; + // Create a SimulationPlanExec object + std::unique_ptr<PredictRun> run = + MakePredictRun(test_reference_point, test_offset_point, test_n_stations, + test_n_channels, false, false); + run->Initialize(); + + PredictPlan plan; + PointSourceCollection sources; + PredictPlanExec sim_plan_exec(run->plan); + + for (size_t s = 0; s < test_n_sources; ++s) { + sources.Add(run->makeSource<PointSource>()); + } + + // Precompute the station phases and shifts + sim_plan_exec.Precompute(sources); + + for (size_t bl_idx = 0; bl_idx < sim_plan_exec.baselines.size(); bl_idx++) { + for (size_t f_idx = 0; f_idx < sim_plan_exec.channel_widths.size(); + f_idx++) { + BOOST_CHECK_CLOSE(1.0, sim_plan_exec.smear_terms(bl_idx, f_idx), 1e-4); + } + } +} + BOOST_AUTO_TEST_SUITE_END() \ No newline at end of file