Skip to content
Snippets Groups Projects
Commit 5b2425e4 authored by Mattia Mancini's avatar Mattia Mancini
Browse files

Refactor smearing

parent a6d3f85e
No related branches found
No related tags found
1 merge request!31Refactor smearing
......@@ -153,6 +153,8 @@ collect-performance:
- python3 ci/summarize-results.py --filter DirectionsBenchmark/Directions results*.json result-summary-directions
- python3 ci/summarize-results.py --filter PhasesBenchmark/ComputePhases results*.json result-summary-phases
- python3 ci/summarize-results.py --filter SpectrumBenchmark/Spectrum results*.json result-summary-spectrum
- python3 ci/summarize-results.py --filter SmearTermsBenchmark/SmearTermBenchmark results*.json result-summary-smearterms
......
......@@ -35,6 +35,8 @@ find_package(BLAS REQUIRED)
file(GLOB_RECURSE SOURCES src/*.cpp)
file(GLOB_RECURSE HEADERS include/*.h)
find_package(OpenMP REQUIRED)
# Library target
add_library(predict STATIC ${SOURCES})
......@@ -45,7 +47,8 @@ target_include_directories(
predict PUBLIC $<BUILD_INTERFACE:${CMAKE_CURRENT_SOURCE_DIR}/include>
$<INSTALL_INTERFACE:include>)
target_link_libraries(predict PUBLIC xtensor xtensor-blas xsimd
target_link_libraries(
predict PUBLIC xtensor xtensor-blas xsimd OpenMP::OpenMP_CXX
${CASACORE_LIBRARIES} ${BLAS_LIBRARIES})
target_include_directories(predict PRIVATE ${CASACORE_INCLUDE_DIRS})
......
......@@ -14,8 +14,15 @@ FetchContent_MakeAvailable(googlebenchmark)
set(BENCHMARKS simulator)
add_executable(microbenchmarks main.cpp predict.cpp directions.cpp spectrum.cpp
phases.cpp spectrum_cross.cpp)
add_executable(
microbenchmarks
main.cpp
predict.cpp
directions.cpp
spectrum.cpp
phases.cpp
spectrum_cross.cpp
smearterms.cpp)
target_compile_options(microbenchmarks PRIVATE ${PREDICT_BUILD_ARGUMENTS})
......
#include <benchmark/benchmark.h>
#include <xtensor/xtensor.hpp>
#include <Direction.h>
#include <PointSource.h>
#include <Spectrum.h>
#include <test/Common.h>
const Direction test_reference_point{0.5, 0.1};
const Direction test_offset_point{test_reference_point.ra + 0.02,
test_reference_point.dec + 0.02};
const std::vector<int64_t> n_frequencies = {128, 256, 512};
const std::vector<int64_t> n_stations = {12, 32, 52};
const double kHz = 1.e3;
const double MHz = 1.e6;
namespace {} // namespace
class SmearTermsBenchmark : public benchmark::Fixture {
public:
void SetUp(benchmark::State &state) override {
size_t n_stations = state.range(0);
size_t n_frequencies = state.range(1);
run = MakePredictRun(test_reference_point, test_offset_point, n_stations,
n_frequencies, false, true);
plan_exec_ptr = std::make_unique<PredictPlanExec>(run->plan);
static std::mt19937 gen(-5);
std::uniform_real_distribution<> phase_dist(-M_PI, M_PI);
for (size_t t = 0; t < n_stations; t++) {
plan_exec_ptr->station_phases(t) = phase_dist(gen);
}
}
void TearDown(benchmark::State &) override {}
protected:
std::unique_ptr<PredictRun> run;
std::unique_ptr<PredictPlanExec> plan_exec_ptr;
};
BENCHMARK_DEFINE_F(SmearTermsBenchmark, SmearTermBenchmarkXTensor)
(benchmark::State &state) {
for (auto _ : state) {
plan_exec_ptr->ComputeSmearTerms();
}
}
BENCHMARK_DEFINE_F(SmearTermsBenchmark, SmearTermBenchmarkXOriginal)
(benchmark::State &state) {
PredictPlanExec &sim_plan_exec = *plan_exec_ptr;
for (auto _ : state) {
for (size_t bl_idx = 0; bl_idx < sim_plan_exec.baselines.size(); bl_idx++) {
const auto &baseline = sim_plan_exec.baselines[bl_idx];
double phase_diff = sim_plan_exec.GetStationPhases()[baseline.second] -
sim_plan_exec.GetStationPhases()[baseline.first];
for (size_t f_idx = 0; f_idx < sim_plan_exec.channel_widths.size();
f_idx++) {
benchmark::DoNotOptimize(ComputeSmearterm(
phase_diff, sim_plan_exec.channel_widths[f_idx] * 0.5));
}
}
}
}
BENCHMARK_REGISTER_F(SmearTermsBenchmark, SmearTermBenchmarkXTensor)
->ArgsProduct({n_stations, n_frequencies})
->ArgNames({"#stations", "#frequencies"})
->Unit(benchmark::kMillisecond);
BENCHMARK_REGISTER_F(SmearTermsBenchmark, SmearTermBenchmarkXOriginal)
->ArgsProduct({n_stations, n_frequencies})
->ArgNames({"#stations", "#frequencies"})
->Unit(benchmark::kMillisecond);
......@@ -44,12 +44,7 @@ public:
void FillEulerMatrix(xt::xtensor<double, 2> &mat, const double ra,
const double dec);
// FIXME: move this to a more appropriate location
inline float ComputeSmearterm(double uvw, double halfwidth) {
float smearterm = static_cast<float>(uvw) * static_cast<float>(halfwidth);
return (smearterm == 0.0f) ? 1.0f
: std::fabs(std::sin(smearterm) / smearterm);
}
void ComputeSmearTerms();
const xt::xtensor<double, 2> &get_lmn() const { return lmn; }
const xt::xtensor<double, 2, xt::layout_type::column_major> &get_uvw() const {
......@@ -60,6 +55,7 @@ public:
xt::xtensor<double, 2> lmn;
xt::xtensor<double, 1> station_phases;
xt::xtensor<std::complex<double>, 2> shift_data;
xt::xtensor<double, 2> smear_terms;
private:
void Initialize();
......
......@@ -97,4 +97,10 @@ void SetUpFixedSource(Direction &test_reference_point,
} // namespace base
} // namespace dp3
inline float ComputeSmearterm(double uvw, double halfwidth) {
float smearterm = static_cast<float>(uvw) * static_cast<float>(halfwidth);
return (smearterm == 0.0f) ? 1.0f
: std::fabs(std::sin(smearterm) / smearterm);
}
#endif // PREDICT_BENCHMARK_COMMON_HPP
\ No newline at end of file
......@@ -6,7 +6,10 @@
#include "Spectrum.h"
#include <xtensor-blas/xlinalg.hpp>
#include <xtensor/xadapt.hpp>
#include <xtensor/xindex_view.hpp>
#include <xtensor/xtensor.hpp>
#include <xtensor/xview.hpp>
void PredictPlanExec::Initialize() {
// Initialize the station phases and shifts
......@@ -15,11 +18,14 @@ void PredictPlanExec::Initialize() {
lmn.resize({nchannels, 3});
uvw.resize({nstations, 3});
frequencies.resize({nchannels});
smear_terms = xt::ones<float>({baselines.size(), nchannels});
}
void PredictPlanExec::Precompute(const PointSourceCollection &sources) {
sources.direction_vector.RaDec2Lmn(reference, lmn);
ComputeStationPhases();
if (correct_frequency_smearing)
ComputeSmearTerms();
}
/**
......@@ -89,7 +95,6 @@ void PredictPlanExec::Compute(const PointSourceCollection &sources,
// Use preallocated storage (outside for loops)
std::vector<double> temp_prod_real_I(nchannels);
std::vector<double> temp_prod_imag_I(nchannels);
std::vector<double> smear_terms(nchannels);
std::vector<double> temp_prod_real_Q;
std::vector<double> temp_prod_imag_Q;
......@@ -121,16 +126,6 @@ void PredictPlanExec::Compute(const PointSourceCollection &sources,
const size_t q = baselines[bl].second;
if (p != q) {
// Precompute smearing factors if needed
if (correct_frequency_smearing) {
#pragma GCC ivdep
for (size_t ch = 0; ch < nchannels; ++ch) {
smear_terms[ch] =
ComputeSmearterm(GetStationPhases()(q) - GetStationPhases()(p),
channel_widths[ch] * 0.5);
}
}
if (compute_stokes_I_only) {
#pragma GCC ivdep
for (size_t ch = 0; ch < nchannels; ++ch) {
......@@ -158,8 +153,8 @@ void PredictPlanExec::Compute(const PointSourceCollection &sources,
#pragma GCC ivdep
for (size_t ch = 0; ch < nchannels; ++ch) {
buffer(StokesComponent::I, ch, bl) +=
dcomplex(smear_terms[ch] * temp_prod_real_I[ch],
smear_terms[ch] * temp_prod_imag_I[ch]);
dcomplex(smear_terms(bl, ch) * temp_prod_real_I[ch],
smear_terms(bl, ch) * temp_prod_imag_I[ch]);
}
} else {
#pragma GCC ivdep
......@@ -169,7 +164,6 @@ void PredictPlanExec::Compute(const PointSourceCollection &sources,
}
}
} else {
#pragma GCC ivdep
for (size_t ch = 0; ch < nchannels; ++ch) {
const double x_p = (GetShiftData()(p, ch).real());
......@@ -217,17 +211,17 @@ void PredictPlanExec::Compute(const PointSourceCollection &sources,
#pragma GCC ivdep
for (size_t ch = 0; ch < nchannels; ++ch) {
buffer(StokesComponent::I, ch, bl) +=
dcomplex(smear_terms[ch] * temp_prod_real_I[ch],
smear_terms[ch] * temp_prod_imag_I[ch]);
dcomplex(smear_terms(bl, ch) * temp_prod_real_I[ch],
smear_terms(bl, ch) * temp_prod_imag_I[ch]);
buffer(StokesComponent::Q, ch, bl) +=
dcomplex(smear_terms[ch] * temp_prod_real_Q[ch],
smear_terms[ch] * temp_prod_imag_Q[ch]);
dcomplex(smear_terms(bl, ch) * temp_prod_real_Q[ch],
smear_terms(bl, ch) * temp_prod_imag_Q[ch]);
buffer(StokesComponent::U, ch, bl) +=
dcomplex(smear_terms[ch] * temp_prod_real_U[ch],
smear_terms[ch] * temp_prod_imag_U[ch]);
dcomplex(smear_terms(bl, ch) * temp_prod_real_U[ch],
smear_terms(bl, ch) * temp_prod_imag_U[ch]);
buffer(StokesComponent::V, ch, bl) +=
dcomplex(smear_terms[ch] * temp_prod_real_V[ch],
smear_terms[ch] * temp_prod_imag_V[ch]);
dcomplex(smear_terms(bl, ch) * temp_prod_real_V[ch],
smear_terms(bl, ch) * temp_prod_imag_V[ch]);
}
} else {
#pragma GCC ivdep
......@@ -255,6 +249,7 @@ void PredictPlanExec::Compute(const GaussianSourceCollection &sources,
xt::xtensor<std::complex<double>, 2> computed_spectrum;
xt::xtensor<double, 2> xtStationUvw;
xt::xtensor<double, 2> uvwShifted;
std::vector<double> angular_smear(nchannels);
for (size_t source_index = 0; source_index < sources.Size(); ++source_index) {
const Spectrum &spectrum = sources.spectrums[source_index];
......@@ -305,8 +300,6 @@ void PredictPlanExec::Compute(const GaussianSourceCollection &sources,
const double kVScale = minor_axis * kFwhm2Sigma;
const double kInvCSqr = 1.0 / (casacore::C::c * casacore::C::c);
std::vector<double> smear_terms(nchannels);
for (size_t bl = 0; bl < baselines.size(); ++bl) {
const size_t p = baselines[bl].first;
const size_t q = baselines[bl].second;
......@@ -328,21 +321,10 @@ void PredictPlanExec::Compute(const GaussianSourceCollection &sources,
const double uvPrime =
(-2.0 * M_PI * M_PI) * (uPrime * uPrime + vPrime * vPrime);
// Precompute amplitudes
#pragma GCC ivdep
for (size_t ch = 0; ch < nchannels; ++ch) {
const double lambda2 =
frequencies(ch) * frequencies(ch) * kInvCSqr * uvPrime;
smear_terms[ch] = exp(lambda2);
}
// Precompute smearing factors if needed and modify amplitudes
if (correct_frequency_smearing) {
#pragma GCC ivdep
for (size_t ch = 0; ch < nchannels; ++ch) {
smear_terms[ch] *=
ComputeSmearterm(GetStationPhases()(q) - GetStationPhases()(p),
channel_widths[ch] * 0.5);
}
angular_smear[ch] = exp(lambda2);
}
if (compute_stokes_I_only) {
......@@ -365,9 +347,9 @@ void PredictPlanExec::Compute(const GaussianSourceCollection &sources,
q_conj_p_real * (x_c)-q_conj_p_imag * (y_c);
const double temp_prod_imag_I =
q_conj_p_real * (y_c) + q_conj_p_imag * (x_c);
buffer(StokesComponent::I, ch, bl) +=
dcomplex(smear_terms[ch] * temp_prod_real_I,
smear_terms[ch] * temp_prod_imag_I);
buffer(StokesComponent::I, ch, bl) += dcomplex(
angular_smear[ch] * smear_terms(bl, ch) * temp_prod_real_I,
angular_smear[ch] * smear_terms(bl, ch) * temp_prod_imag_I);
}
} else {
#pragma GCC ivdep
......@@ -417,21 +399,41 @@ void PredictPlanExec::Compute(const GaussianSourceCollection &sources,
// The following operations are memory-bound, unlike
// the compute-bound segment above. By merging these together,
// an improvement in speed is expected
buffer(StokesComponent::I, ch, bl) +=
dcomplex(smear_terms[ch] * temp_prod_real_I,
smear_terms[ch] * temp_prod_imag_I);
buffer(StokesComponent::Q, ch, bl) +=
dcomplex(smear_terms[ch] * temp_prod_real_Q,
smear_terms[ch] * temp_prod_imag_Q);
buffer(StokesComponent::U, ch, bl) +=
dcomplex(smear_terms[ch] * temp_prod_real_U,
smear_terms[ch] * temp_prod_imag_U);
buffer(StokesComponent::V, ch, bl) +=
dcomplex(smear_terms[ch] * temp_prod_real_V,
smear_terms[ch] * temp_prod_imag_V);
buffer(StokesComponent::I, ch, bl) += dcomplex(
angular_smear[ch] * smear_terms(bl, ch) * temp_prod_real_I,
angular_smear[ch] * smear_terms(bl, ch) * temp_prod_imag_I);
buffer(StokesComponent::Q, ch, bl) += dcomplex(
angular_smear[ch] * smear_terms(bl, ch) * temp_prod_real_Q,
angular_smear[ch] * smear_terms(bl, ch) * temp_prod_imag_Q);
buffer(StokesComponent::U, ch, bl) += dcomplex(
angular_smear[ch] * smear_terms(bl, ch) * temp_prod_real_U,
angular_smear[ch] * smear_terms(bl, ch) * temp_prod_imag_U);
buffer(StokesComponent::V, ch, bl) += dcomplex(
angular_smear[ch] * smear_terms(bl, ch) * temp_prod_real_V,
angular_smear[ch] * smear_terms(bl, ch) * temp_prod_imag_V);
}
}
}
} // Baselines.
}
}
void PredictPlanExec::ComputeSmearTerms() {
// Compute the smear terms
size_t p = 0UL;
size_t q = 0UL;
std::vector<float> channel_floats(channel_widths.size());
std::copy(channel_widths.begin(), channel_widths.end(),
channel_floats.begin());
for (size_t bl = 0; bl < baselines.size(); ++bl) {
std::tie(p, q) = baselines[bl];
const float phase_diff =
static_cast<float>(station_phases[q] - station_phases[p]);
if (abs(phase_diff) > 1.e-6) [[likely]] {
#pragma omp simd
for (size_t ch = 0; ch < nchannels; ch++) {
const float shift = phase_diff * channel_floats[ch] * 0.5f;
smear_terms(bl, ch) = std::fabs(sinf(shift) / shift);
}
}
}
}
\ No newline at end of file
......@@ -5,9 +5,81 @@
#define BOOST_TEST_MODULE PREDICT_PLAN_EXEC_TEST
#include <PredictPlanExec.h>
#include <boost/test/unit_test.hpp>
#include <test/Common.h>
BOOST_AUTO_TEST_SUITE(PredictPlanExecTestSuite)
BOOST_AUTO_TEST_CASE(test_PredictPlanExec) { BOOST_CHECK(true); }
BOOST_AUTO_TEST_CASE(compute_smear_terms) {
static constexpr size_t test_n_stations = 4;
static constexpr size_t test_n_channels = 2;
static constexpr size_t test_n_sources = 10;
const Direction test_reference_point{0.5, 0.1};
const Direction test_offset_point{test_reference_point.ra + 0.02,
test_reference_point.dec + 0.02};
// Create a SimulationPlanExec object
std::unique_ptr<PredictRun> run =
MakePredictRun(test_reference_point, test_offset_point, test_n_stations,
test_n_channels, false, true);
run->Initialize();
PredictPlan plan;
PointSourceCollection sources;
PredictPlanExec sim_plan_exec(run->plan);
for (size_t s = 0; s < test_n_sources; ++s) {
sources.Add(run->makeSource<PointSource>());
}
// Precompute the station phases and shifts
sim_plan_exec.Precompute(sources);
for (size_t bl_idx = 0; bl_idx < sim_plan_exec.baselines.size(); bl_idx++) {
const auto &baseline = sim_plan_exec.baselines[bl_idx];
double phase_diff = sim_plan_exec.GetStationPhases()[baseline.second] -
sim_plan_exec.GetStationPhases()[baseline.first];
for (size_t f_idx = 0; f_idx < sim_plan_exec.channel_widths.size();
f_idx++) {
double expected = ComputeSmearterm(
phase_diff, sim_plan_exec.channel_widths[f_idx] * 0.5);
BOOST_CHECK_CLOSE(expected, sim_plan_exec.smear_terms(bl_idx, f_idx),
1e-4);
}
}
}
BOOST_AUTO_TEST_CASE(dont_compute_smear_terms) {
static constexpr size_t test_n_stations = 4;
static constexpr size_t test_n_channels = 2;
static constexpr size_t test_n_sources = 10;
const Direction test_reference_point{0.5, 0.1};
const Direction test_offset_point{test_reference_point.ra + 0.02,
test_reference_point.dec + 0.02};
// Create a SimulationPlanExec object
std::unique_ptr<PredictRun> run =
MakePredictRun(test_reference_point, test_offset_point, test_n_stations,
test_n_channels, false, false);
run->Initialize();
PredictPlan plan;
PointSourceCollection sources;
PredictPlanExec sim_plan_exec(run->plan);
for (size_t s = 0; s < test_n_sources; ++s) {
sources.Add(run->makeSource<PointSource>());
}
// Precompute the station phases and shifts
sim_plan_exec.Precompute(sources);
for (size_t bl_idx = 0; bl_idx < sim_plan_exec.baselines.size(); bl_idx++) {
for (size_t f_idx = 0; f_idx < sim_plan_exec.channel_widths.size();
f_idx++) {
BOOST_CHECK_CLOSE(1.0, sim_plan_exec.smear_terms(bl_idx, f_idx), 1e-4);
}
}
}
BOOST_AUTO_TEST_SUITE_END()
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment