Skip to content
Snippets Groups Projects
Commit e074aaf0 authored by Wiebe van Breukelen's avatar Wiebe van Breukelen
Browse files

Merge branch 'rename_spectrum_variables' into 'improve-cpuplan-perf-structure'

Rename spectrum variables

See merge request !32
parents 6661f1ea f89ce168
Branches
No related tags found
3 merge requests!33Draft: GPU smearterms,!32Rename spectrum variables,!30Add SIMD-based optimizations and refactored PredictPlanExecCPU
Pipeline #118798 passed with warnings
...@@ -78,16 +78,20 @@ private: ...@@ -78,16 +78,20 @@ private:
template <computation_strategy T = computation_strategy::MULTI_SIMD, class E, template <computation_strategy T = computation_strategy::MULTI_SIMD, class E,
class S> class S>
constexpr void ProcessStokesComponent(E &stoke_buffer_expr, inline void ProcessPolarizationComponent(E &stoke_buffer_expr,
const S &stoke_spectrum) { const S &stoke_spectrum) {
if constexpr (T == computation_strategy::SINGLE) { if constexpr (T == computation_strategy::SINGLE) {
ProcessStokesComponentSingle<E, S>(stoke_buffer_expr, stoke_spectrum); ProcessPolarizationComponentSingle<E, S>(stoke_buffer_expr,
stoke_spectrum);
} else if constexpr (T == computation_strategy::XTENSOR) { } else if constexpr (T == computation_strategy::XTENSOR) {
ProcessStokesComponentXtensor<E, S>(stoke_buffer_expr, stoke_spectrum); ProcessPolarizationComponentXtensor<E, S>(stoke_buffer_expr,
stoke_spectrum);
} else if constexpr (T == computation_strategy::MULTI) { } else if constexpr (T == computation_strategy::MULTI) {
ProcessStokesComponentMulti<E, S>(stoke_buffer_expr, stoke_spectrum); ProcessPolarizaionComponentMulti<E, S>(stoke_buffer_expr, stoke_spectrum);
} else if constexpr (T == computation_strategy::MULTI_SIMD) { } else if constexpr (T == computation_strategy::MULTI_SIMD) {
ProcessStokesComponentMultiSimd<E, S>(stoke_buffer_expr, stoke_spectrum); ProcessPolarizationComponentMultiSimd<E, S>(stoke_buffer_expr,
stoke_spectrum);
} else { } else {
static_assert(T == computation_strategy::SINGLE || static_assert(T == computation_strategy::SINGLE ||
T == computation_strategy::MULTI || T == computation_strategy::MULTI ||
...@@ -97,21 +101,20 @@ private: ...@@ -97,21 +101,20 @@ private:
} }
template <class E, class S> template <class E, class S>
inline void ProcessStokesComponent(const computation_strategy T, inline void ProcessPolarizationComponent(const computation_strategy T,
E &stoke_buffer_expr, E &buffer_expr, const S &spectrum) {
const S &stoke_spectrum) {
switch (T) { switch (T) {
case computation_strategy::SINGLE: case computation_strategy::SINGLE:
ProcessStokesComponentSingle<E, S>(stoke_buffer_expr, stoke_spectrum); ProcessPolarizationComponentSingle<E, S>(buffer_expr, spectrum);
break; break;
case computation_strategy::XTENSOR: case computation_strategy::XTENSOR:
ProcessStokesComponentXtensor<E, S>(stoke_buffer_expr, stoke_spectrum); ProcessPolarizationComponentXtensor<E, S>(buffer_expr, spectrum);
break; break;
case computation_strategy::MULTI: case computation_strategy::MULTI:
ProcessStokesComponentMulti<E, S>(stoke_buffer_expr, stoke_spectrum); ProcessPolarizationComponentMulti<E, S>(buffer_expr, spectrum);
break; break;
case computation_strategy::MULTI_SIMD: case computation_strategy::MULTI_SIMD:
ProcessStokesComponentMultiSimd<E, S>(stoke_buffer_expr, stoke_spectrum); ProcessPolarizationComponentMultiSimd<E, S>(buffer_expr, spectrum);
break; break;
default: default:
break; break;
...@@ -119,19 +122,18 @@ private: ...@@ -119,19 +122,18 @@ private:
} }
template <class E, class S> template <class E, class S>
void ProcessStokesComponentSingle(E &stoke_buffer_expr, void ProcessPolarizationComponentSingle(E &stoke_buffer_expr,
const S &stoke_spectrum); const S &stoke_spectrum);
template <class E, class S> template <class E, class S>
void ProcessStokesComponentXtensor(E &stoke_buffer_expr, void ProcessPolarizationComponentMulti(E &stoke_buffer_expr,
const S &stoke_spectrum); const S &stoke_spectrum);
template <class E, class S> template <class E, class S>
void ProcessStokesComponentMulti(E &stoke_buffer_expr, void ProcessPolarizationComponentMultiSimd(E &stoke_buffer_expr,
const S &stoke_spectrum); const S &stoke_spectrum);
template <class E, class S> template <class E, class S>
void ProcessStokesComponentMultiSimd(E &stoke_buffer_expr, void ProcessPolarizationComponentXtensor(E &stoke_buffer_expr,
const S &stoke_spectrum); const S &stoke_spectrum);
private: private:
......
...@@ -120,8 +120,8 @@ void PredictPlanExec::FillEulerMatrix(xt::xtensor<double, 2> &mat, ...@@ -120,8 +120,8 @@ void PredictPlanExec::FillEulerMatrix(xt::xtensor<double, 2> &mat,
} }
template <class E, class S> template <class E, class S>
void PredictPlanExec::ProcessStokesComponentSingle(E &stoke_buffer, void PredictPlanExec::ProcessPolarizationComponentSingle(
const S &stoke_spectrum) { E &stoke_buffer, const S &stoke_spectrum) {
// Use preallocated storage (outside for loops) // Use preallocated storage (outside for loops)
std::vector<double> temp_prod_real(nchannels); std::vector<double> temp_prod_real(nchannels);
std::vector<double> temp_prod_imag(nchannels); std::vector<double> temp_prod_imag(nchannels);
...@@ -188,16 +188,16 @@ std::ostream &operator<<(std::ostream &os, const __m256d &reg) { ...@@ -188,16 +188,16 @@ std::ostream &operator<<(std::ostream &os, const __m256d &reg) {
} }
template <class E, class S> template <class E, class S>
void PredictPlanExec::ProcessStokesComponentXtensor(E &stoke_buffer, void PredictPlanExec::ProcessPolarizationComponentXtensor(E &buffer_expr,
const S &stoke_spectrum) {} const S &spectrum) {}
template <class E, class S> template <class E, class S>
void PredictPlanExec::ProcessStokesComponentMulti(E &stoke_buffer, void PredictPlanExec::ProcessPolarizationComponentMulti(E &buffer_expr,
const S &stoke_spectrum) {} const S &spectrum) {}
template <class E, class S> template <class E, class S>
void PredictPlanExec::ProcessStokesComponentMultiSimd(E &stoke_buffer, void PredictPlanExec::ProcessPolarizationComponentMultiSimd(E &buffer_expr,
const S &stoke_spectrum) { const S &spectrum) {
// Use preallocated storage (outside for loops) // Use preallocated storage (outside for loops)
const size_t simd_channels = (nchannels / 4) * 4; const size_t simd_channels = (nchannels / 4) * 4;
...@@ -231,13 +231,13 @@ void PredictPlanExec::ProcessStokesComponentMultiSimd(E &stoke_buffer, ...@@ -231,13 +231,13 @@ void PredictPlanExec::ProcessStokesComponentMultiSimd(E &stoke_buffer,
shift_data(q, ch + 3).imag(), shift_data(q, ch + 2).imag(), shift_data(q, ch + 3).imag(), shift_data(q, ch + 2).imag(),
shift_data(q, ch + 1).imag(), shift_data(q, ch + 0).imag()); shift_data(q, ch + 1).imag(), shift_data(q, ch + 0).imag());
__m256d x_c = _mm256_set_pd( __m256d x_c =
stoke_spectrum(ch + 3).real(), stoke_spectrum(ch + 2).real(), _mm256_set_pd(spectrum(ch + 3).real(), spectrum(ch + 2).real(),
stoke_spectrum(ch + 1).real(), stoke_spectrum(ch + 0).real()); spectrum(ch + 1).real(), spectrum(ch + 0).real());
__m256d y_c = _mm256_set_pd( __m256d y_c =
stoke_spectrum(ch + 3).imag(), stoke_spectrum(ch + 2).imag(), _mm256_set_pd(spectrum(ch + 3).imag(), spectrum(ch + 2).imag(),
stoke_spectrum(ch + 1).imag(), stoke_spectrum(ch + 0).imag()); spectrum(ch + 1).imag(), spectrum(ch + 0).imag());
// Complex multiply (q_conj * p) * stokes // Complex multiply (q_conj * p) * stokes
__m256d real_part = __m256d real_part =
...@@ -257,11 +257,11 @@ void PredictPlanExec::ProcessStokesComponentMultiSimd(E &stoke_buffer, ...@@ -257,11 +257,11 @@ void PredictPlanExec::ProcessStokesComponentMultiSimd(E &stoke_buffer,
if (correct_frequency_smearing) { if (correct_frequency_smearing) {
for (int k = 0; k < 4; ++k) { for (int k = 0; k < 4; ++k) {
const double smear = smear_terms[ch + k]; const double smear = smear_terms[ch + k];
stoke_buffer(bl, ch + k) += dcomplex{r[k] * smear, i[k] * smear}; buffer_expr(bl, ch + k) += dcomplex{r[k] * smear, i[k] * smear};
} }
} else { } else {
for (int k = 0; k < 4; ++k) { for (int k = 0; k < 4; ++k) {
stoke_buffer(bl, ch + k) += dcomplex{r[k], i[k]}; buffer_expr(bl, ch + k) += dcomplex{r[k], i[k]};
} }
} }
} }
...@@ -271,8 +271,8 @@ void PredictPlanExec::ProcessStokesComponentMultiSimd(E &stoke_buffer, ...@@ -271,8 +271,8 @@ void PredictPlanExec::ProcessStokesComponentMultiSimd(E &stoke_buffer,
const double y_p = (shift_data(p, ch).imag()); const double y_p = (shift_data(p, ch).imag());
const double x_q = (shift_data(q, ch).real()); const double x_q = (shift_data(q, ch).real());
const double y_q = (shift_data(q, ch).imag()); const double y_q = (shift_data(q, ch).imag());
const double x_c = stoke_spectrum(ch).real(); const double x_c = spectrum(ch).real();
const double y_c = stoke_spectrum(ch).imag(); const double y_c = spectrum(ch).imag();
// Compute baseline phase shift. // Compute baseline phase shift.
// Compute visibilities. // Compute visibilities.
...@@ -285,10 +285,10 @@ void PredictPlanExec::ProcessStokesComponentMultiSimd(E &stoke_buffer, ...@@ -285,10 +285,10 @@ void PredictPlanExec::ProcessStokesComponentMultiSimd(E &stoke_buffer,
if (correct_frequency_smearing) { if (correct_frequency_smearing) {
const double smear_term = smear_terms[ch]; const double smear_term = smear_terms[ch];
stoke_buffer(bl, ch) += buffer_expr(bl, ch) +=
dcomplex{temp_prod_real * smear_term, temp_prod_imag * smear_term}; dcomplex{temp_prod_real * smear_term, temp_prod_imag * smear_term};
} else { } else {
stoke_buffer(bl, ch) += dcomplex{temp_prod_real, temp_prod_imag}; buffer_expr(bl, ch) += dcomplex{temp_prod_real, temp_prod_imag};
} }
} }
} }
...@@ -297,14 +297,13 @@ void PredictPlanExec::ProcessStokesComponentMultiSimd(E &stoke_buffer, ...@@ -297,14 +297,13 @@ void PredictPlanExec::ProcessStokesComponentMultiSimd(E &stoke_buffer,
void PredictPlanExec::Compute( void PredictPlanExec::Compute(
const PointSourceCollection &sources, const PointSourceCollection &sources,
xt::xtensor<std::complex<double>, 3, xt::layout_type::row_major> &buffer) { xt::xtensor<std::complex<double>, 3, xt::layout_type::row_major> &buffer) {
xt::xtensor<std::complex<double>, 2> computed_spectrum( xt::xtensor<std::complex<double>, 2> computed_spectrum(
{4, frequencies.size()}); {4, frequencies.size()});
const auto spectrum_I = xt::view(computed_spectrum, 0, xt::all()); const auto spectrum_XX = xt::view(computed_spectrum, 0, xt::all());
const auto spectrum_Q = xt::view(computed_spectrum, 1, xt::all()); const auto spectrum_XY = xt::view(computed_spectrum, 1, xt::all());
const auto spectrum_U = xt::view(computed_spectrum, 2, xt::all()); const auto spectrum_YX = xt::view(computed_spectrum, 2, xt::all());
const auto spectrum_V = xt::view(computed_spectrum, 3, xt::all()); const auto spectrum_YY = xt::view(computed_spectrum, 3, xt::all());
for (size_t source_index = 0; source_index < sources.Size(); ++source_index) { for (size_t source_index = 0; source_index < sources.Size(); ++source_index) {
const Spectrum &spectrum = sources.spectrums[source_index]; const Spectrum &spectrum = sources.spectrums[source_index];
...@@ -313,17 +312,17 @@ void PredictPlanExec::Compute( ...@@ -313,17 +312,17 @@ void PredictPlanExec::Compute(
if (compute_stokes_I_only) { if (compute_stokes_I_only) {
auto buffer_I = xt::view(buffer, 0, xt::all(), xt::all()); auto buffer_I = xt::view(buffer, 0, xt::all(), xt::all());
ProcessStokesComponent(buffer_I, spectrum_I); ProcessPolarizationComponent(buffer_I, spectrum_XX);
} else { } else {
auto buffer_I = xt::view(buffer, 0, xt::all(), xt::all()); auto buffer_XX = xt::view(buffer, 0, xt::all(), xt::all());
auto buffer_Q = xt::view(buffer, 1, xt::all(), xt::all()); auto buffer_XY = xt::view(buffer, 1, xt::all(), xt::all());
auto buffer_U = xt::view(buffer, 2, xt::all(), xt::all()); auto buffer_YX = xt::view(buffer, 2, xt::all(), xt::all());
auto buffer_V = xt::view(buffer, 3, xt::all(), xt::all()); auto buffer_YY = xt::view(buffer, 3, xt::all(), xt::all());
ProcessStokesComponent(buffer_I, spectrum_I); ProcessPolarizationComponent(buffer_XX, spectrum_XX);
ProcessStokesComponent(buffer_Q, spectrum_Q); ProcessPolarizationComponent(buffer_XY, spectrum_XY);
ProcessStokesComponent(buffer_U, spectrum_U); ProcessPolarizationComponent(buffer_YX, spectrum_YX);
ProcessStokesComponent(buffer_V, spectrum_V); ProcessPolarizationComponent(buffer_YY, spectrum_YY);
} }
} // Baselines. } // Baselines.
...@@ -333,14 +332,13 @@ void PredictPlanExec::ComputeWithTarget( ...@@ -333,14 +332,13 @@ void PredictPlanExec::ComputeWithTarget(
const PointSourceCollection &sources, const PointSourceCollection &sources,
xt::xtensor<std::complex<double>, 3, xt::layout_type::row_major> &buffer, xt::xtensor<std::complex<double>, 3, xt::layout_type::row_major> &buffer,
const computation_strategy strat) { const computation_strategy strat) {
xt::xtensor<std::complex<double>, 2> computed_spectrum( xt::xtensor<std::complex<double>, 2> computed_spectrum(
{4, frequencies.size()}); {4, frequencies.size()});
const auto spectrum_I = xt::view(computed_spectrum, 0, xt::all()); const auto spectrum_XX = xt::view(computed_spectrum, 0, xt::all());
const auto spectrum_Q = xt::view(computed_spectrum, 1, xt::all()); const auto spectrum_XY = xt::view(computed_spectrum, 1, xt::all());
const auto spectrum_U = xt::view(computed_spectrum, 2, xt::all()); const auto spectrum_YX = xt::view(computed_spectrum, 2, xt::all());
const auto spectrum_V = xt::view(computed_spectrum, 3, xt::all()); const auto spectrum_YY = xt::view(computed_spectrum, 3, xt::all());
for (size_t source_index = 0; source_index < sources.Size(); ++source_index) { for (size_t source_index = 0; source_index < sources.Size(); ++source_index) {
const Spectrum &spectrum = sources.spectrums[source_index]; const Spectrum &spectrum = sources.spectrums[source_index];
...@@ -349,17 +347,17 @@ void PredictPlanExec::ComputeWithTarget( ...@@ -349,17 +347,17 @@ void PredictPlanExec::ComputeWithTarget(
if (compute_stokes_I_only) { if (compute_stokes_I_only) {
auto buffer_I = xt::view(buffer, 0, xt::all(), xt::all()); auto buffer_I = xt::view(buffer, 0, xt::all(), xt::all());
ProcessStokesComponent(strat, buffer_I, spectrum_I); ProcessPolarizationComponent(strat, buffer_I, spectrum_XX);
} else { } else {
auto buffer_I = xt::view(buffer, 0, xt::all(), xt::all()); auto buffer_XX = xt::view(buffer, 0, xt::all(), xt::all());
auto buffer_Q = xt::view(buffer, 1, xt::all(), xt::all()); auto buffer_XY = xt::view(buffer, 1, xt::all(), xt::all());
auto buffer_U = xt::view(buffer, 2, xt::all(), xt::all()); auto buffer_YX = xt::view(buffer, 2, xt::all(), xt::all());
auto buffer_V = xt::view(buffer, 3, xt::all(), xt::all()); auto buffer_YY = xt::view(buffer, 3, xt::all(), xt::all());
ProcessStokesComponent(strat, buffer_I, spectrum_I); ProcessPolarizationComponent(strat, buffer_XX, spectrum_XX);
ProcessStokesComponent(strat, buffer_Q, spectrum_Q); ProcessPolarizationComponent(strat, buffer_XY, spectrum_XY);
ProcessStokesComponent(strat, buffer_U, spectrum_U); ProcessPolarizationComponent(strat, buffer_YX, spectrum_YX);
ProcessStokesComponent(strat, buffer_V, spectrum_V); ProcessPolarizationComponent(strat, buffer_YY, spectrum_YY);
} }
} // Baselines. } // Baselines.
...@@ -495,21 +493,21 @@ void PredictPlanExec::Compute( ...@@ -495,21 +493,21 @@ void PredictPlanExec::Compute(
const double x_q = (GetShiftData()(q, ch).real()); const double x_q = (GetShiftData()(q, ch).real());
const double y_q = (GetShiftData()(q, ch).imag()); const double y_q = (GetShiftData()(q, ch).imag());
const double x_c = const double x_c =
(computed_spectrum(StokesComponent::I, ch).real()); (computed_spectrum(CrossCorrelation::XX, ch).real());
const double y_c = const double y_c =
(computed_spectrum(StokesComponent::I, ch).imag()); (computed_spectrum(CrossCorrelation::XX, ch).imag());
const double x_c2 = const double x_c2 =
(computed_spectrum(StokesComponent::Q, ch).real()); (computed_spectrum(CrossCorrelation::XY, ch).real());
const double y_c2 = const double y_c2 =
(computed_spectrum(StokesComponent::Q, ch).imag()); (computed_spectrum(CrossCorrelation::XY, ch).imag());
const double x_c3 = const double x_c3 =
(computed_spectrum(StokesComponent::U, ch).real()); (computed_spectrum(CrossCorrelation::YX, ch).real());
const double y_c3 = const double y_c3 =
(computed_spectrum(StokesComponent::U, ch).imag()); (computed_spectrum(CrossCorrelation::YX, ch).imag());
const double x_c4 = const double x_c4 =
(computed_spectrum(StokesComponent::V, ch).real()); (computed_spectrum(CrossCorrelation::YY, ch).real());
const double y_c4 = const double y_c4 =
(computed_spectrum(StokesComponent::V, ch).imag()); (computed_spectrum(CrossCorrelation::YY, ch).imag());
// Compute baseline phase shift. // Compute baseline phase shift.
// Compute visibilities. // Compute visibilities.
...@@ -535,16 +533,16 @@ void PredictPlanExec::Compute( ...@@ -535,16 +533,16 @@ void PredictPlanExec::Compute(
// The following operations are memory-bound, unlike // The following operations are memory-bound, unlike
// the compute-bound segment above. By merging these together, // the compute-bound segment above. By merging these together,
// an improvement in speed is expected // an improvement in speed is expected
buffer(StokesComponent::I, bl, ch) += buffer(CrossCorrelation::XX, bl, ch) +=
dcomplex(smear_terms[ch] * temp_prod_real_I, dcomplex(smear_terms[ch] * temp_prod_real_I,
smear_terms[ch] * temp_prod_imag_I); smear_terms[ch] * temp_prod_imag_I);
buffer(StokesComponent::Q, bl, ch) += buffer(CrossCorrelation::XY, bl, ch) +=
dcomplex(smear_terms[ch] * temp_prod_real_Q, dcomplex(smear_terms[ch] * temp_prod_real_Q,
smear_terms[ch] * temp_prod_imag_Q); smear_terms[ch] * temp_prod_imag_Q);
buffer(StokesComponent::U, bl, ch) += buffer(CrossCorrelation::YX, bl, ch) +=
dcomplex(smear_terms[ch] * temp_prod_real_U, dcomplex(smear_terms[ch] * temp_prod_real_U,
smear_terms[ch] * temp_prod_imag_U); smear_terms[ch] * temp_prod_imag_U);
buffer(StokesComponent::V, bl, ch) += buffer(CrossCorrelation::YY, bl, ch) +=
dcomplex(smear_terms[ch] * temp_prod_real_V, dcomplex(smear_terms[ch] * temp_prod_real_V,
smear_terms[ch] * temp_prod_imag_V); smear_terms[ch] * temp_prod_imag_V);
} }
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment