diff --git a/include/PredictPlanExec.h b/include/PredictPlanExec.h index c705a682b9198503441c0b0dd35d75ce5c65c654..f088bbc5614596183bd9e848f3ad3442dc5036dc 100644 --- a/include/PredictPlanExec.h +++ b/include/PredictPlanExec.h @@ -78,16 +78,20 @@ private: template <computation_strategy T = computation_strategy::MULTI_SIMD, class E, class S> - constexpr void ProcessStokesComponent(E &stoke_buffer_expr, - const S &stoke_spectrum) { + inline void ProcessPolarizationComponent(E &stoke_buffer_expr, + const S &stoke_spectrum) { if constexpr (T == computation_strategy::SINGLE) { - ProcessStokesComponentSingle<E, S>(stoke_buffer_expr, stoke_spectrum); + ProcessPolarizationComponentSingle<E, S>(stoke_buffer_expr, + stoke_spectrum); } else if constexpr (T == computation_strategy::XTENSOR) { - ProcessStokesComponentXtensor<E, S>(stoke_buffer_expr, stoke_spectrum); + ProcessPolarizationComponentXtensor<E, S>(stoke_buffer_expr, + stoke_spectrum); + } else if constexpr (T == computation_strategy::MULTI) { - ProcessStokesComponentMulti<E, S>(stoke_buffer_expr, stoke_spectrum); + ProcessPolarizaionComponentMulti<E, S>(stoke_buffer_expr, stoke_spectrum); } else if constexpr (T == computation_strategy::MULTI_SIMD) { - ProcessStokesComponentMultiSimd<E, S>(stoke_buffer_expr, stoke_spectrum); + ProcessPolarizationComponentMultiSimd<E, S>(stoke_buffer_expr, + stoke_spectrum); } else { static_assert(T == computation_strategy::SINGLE || T == computation_strategy::MULTI || @@ -97,21 +101,20 @@ private: } template <class E, class S> - inline void ProcessStokesComponent(const computation_strategy T, - E &stoke_buffer_expr, - const S &stoke_spectrum) { + inline void ProcessPolarizationComponent(const computation_strategy T, + E &buffer_expr, const S &spectrum) { switch (T) { case computation_strategy::SINGLE: - ProcessStokesComponentSingle<E, S>(stoke_buffer_expr, stoke_spectrum); + ProcessPolarizationComponentSingle<E, S>(buffer_expr, spectrum); break; case computation_strategy::XTENSOR: - ProcessStokesComponentXtensor<E, S>(stoke_buffer_expr, stoke_spectrum); + ProcessPolarizationComponentXtensor<E, S>(buffer_expr, spectrum); break; case computation_strategy::MULTI: - ProcessStokesComponentMulti<E, S>(stoke_buffer_expr, stoke_spectrum); + ProcessPolarizationComponentMulti<E, S>(buffer_expr, spectrum); break; case computation_strategy::MULTI_SIMD: - ProcessStokesComponentMultiSimd<E, S>(stoke_buffer_expr, stoke_spectrum); + ProcessPolarizationComponentMultiSimd<E, S>(buffer_expr, spectrum); break; default: break; @@ -119,20 +122,19 @@ private: } template <class E, class S> - void ProcessStokesComponentSingle(E &stoke_buffer_expr, - const S &stoke_spectrum); + void ProcessPolarizationComponentSingle(E &stoke_buffer_expr, + const S &stoke_spectrum); template <class E, class S> - void ProcessStokesComponentXtensor(E &stoke_buffer_expr, - const S &stoke_spectrum); + void ProcessPolarizationComponentMulti(E &stoke_buffer_expr, + const S &stoke_spectrum); template <class E, class S> - void ProcessStokesComponentMulti(E &stoke_buffer_expr, - const S &stoke_spectrum); - + void ProcessPolarizationComponentMultiSimd(E &stoke_buffer_expr, + const S &stoke_spectrum); template <class E, class S> - void ProcessStokesComponentMultiSimd(E &stoke_buffer_expr, - const S &stoke_spectrum); + void ProcessPolarizationComponentXtensor(E &stoke_buffer_expr, + const S &stoke_spectrum); private: // std::vector<double> p_real; diff --git a/src/PredictPlanExec.cpp b/src/PredictPlanExec.cpp index ae2f12889284632ed271f751e2368eccbca0bf96..9126c4ef16800c43f57b8d18bfa3e0587831994a 100644 --- a/src/PredictPlanExec.cpp +++ b/src/PredictPlanExec.cpp @@ -120,8 +120,8 @@ void PredictPlanExec::FillEulerMatrix(xt::xtensor<double, 2> &mat, } template <class E, class S> -void PredictPlanExec::ProcessStokesComponentSingle(E &stoke_buffer, - const S &stoke_spectrum) { +void PredictPlanExec::ProcessPolarizationComponentSingle( + E &stoke_buffer, const S &stoke_spectrum) { // Use preallocated storage (outside for loops) std::vector<double> temp_prod_real(nchannels); std::vector<double> temp_prod_imag(nchannels); @@ -188,16 +188,16 @@ std::ostream &operator<<(std::ostream &os, const __m256d ®) { } template <class E, class S> -void PredictPlanExec::ProcessStokesComponentXtensor(E &stoke_buffer, - const S &stoke_spectrum) {} +void PredictPlanExec::ProcessPolarizationComponentXtensor(E &buffer_expr, + const S &spectrum) {} template <class E, class S> -void PredictPlanExec::ProcessStokesComponentMulti(E &stoke_buffer, - const S &stoke_spectrum) {} +void PredictPlanExec::ProcessPolarizationComponentMulti(E &buffer_expr, + const S &spectrum) {} template <class E, class S> -void PredictPlanExec::ProcessStokesComponentMultiSimd(E &stoke_buffer, - const S &stoke_spectrum) { +void PredictPlanExec::ProcessPolarizationComponentMultiSimd(E &buffer_expr, + const S &spectrum) { // Use preallocated storage (outside for loops) const size_t simd_channels = (nchannels / 4) * 4; @@ -231,13 +231,13 @@ void PredictPlanExec::ProcessStokesComponentMultiSimd(E &stoke_buffer, shift_data(q, ch + 3).imag(), shift_data(q, ch + 2).imag(), shift_data(q, ch + 1).imag(), shift_data(q, ch + 0).imag()); - __m256d x_c = _mm256_set_pd( - stoke_spectrum(ch + 3).real(), stoke_spectrum(ch + 2).real(), - stoke_spectrum(ch + 1).real(), stoke_spectrum(ch + 0).real()); + __m256d x_c = + _mm256_set_pd(spectrum(ch + 3).real(), spectrum(ch + 2).real(), + spectrum(ch + 1).real(), spectrum(ch + 0).real()); - __m256d y_c = _mm256_set_pd( - stoke_spectrum(ch + 3).imag(), stoke_spectrum(ch + 2).imag(), - stoke_spectrum(ch + 1).imag(), stoke_spectrum(ch + 0).imag()); + __m256d y_c = + _mm256_set_pd(spectrum(ch + 3).imag(), spectrum(ch + 2).imag(), + spectrum(ch + 1).imag(), spectrum(ch + 0).imag()); // Complex multiply (q_conj * p) * stokes __m256d real_part = @@ -257,11 +257,11 @@ void PredictPlanExec::ProcessStokesComponentMultiSimd(E &stoke_buffer, if (correct_frequency_smearing) { for (int k = 0; k < 4; ++k) { const double smear = smear_terms[ch + k]; - stoke_buffer(bl, ch + k) += dcomplex{r[k] * smear, i[k] * smear}; + buffer_expr(bl, ch + k) += dcomplex{r[k] * smear, i[k] * smear}; } } else { for (int k = 0; k < 4; ++k) { - stoke_buffer(bl, ch + k) += dcomplex{r[k], i[k]}; + buffer_expr(bl, ch + k) += dcomplex{r[k], i[k]}; } } } @@ -271,8 +271,8 @@ void PredictPlanExec::ProcessStokesComponentMultiSimd(E &stoke_buffer, const double y_p = (shift_data(p, ch).imag()); const double x_q = (shift_data(q, ch).real()); const double y_q = (shift_data(q, ch).imag()); - const double x_c = stoke_spectrum(ch).real(); - const double y_c = stoke_spectrum(ch).imag(); + const double x_c = spectrum(ch).real(); + const double y_c = spectrum(ch).imag(); // Compute baseline phase shift. // Compute visibilities. @@ -285,10 +285,10 @@ void PredictPlanExec::ProcessStokesComponentMultiSimd(E &stoke_buffer, if (correct_frequency_smearing) { const double smear_term = smear_terms[ch]; - stoke_buffer(bl, ch) += + buffer_expr(bl, ch) += dcomplex{temp_prod_real * smear_term, temp_prod_imag * smear_term}; } else { - stoke_buffer(bl, ch) += dcomplex{temp_prod_real, temp_prod_imag}; + buffer_expr(bl, ch) += dcomplex{temp_prod_real, temp_prod_imag}; } } } @@ -297,14 +297,13 @@ void PredictPlanExec::ProcessStokesComponentMultiSimd(E &stoke_buffer, void PredictPlanExec::Compute( const PointSourceCollection &sources, xt::xtensor<std::complex<double>, 3, xt::layout_type::row_major> &buffer) { - xt::xtensor<std::complex<double>, 2> computed_spectrum( {4, frequencies.size()}); - const auto spectrum_I = xt::view(computed_spectrum, 0, xt::all()); - const auto spectrum_Q = xt::view(computed_spectrum, 1, xt::all()); - const auto spectrum_U = xt::view(computed_spectrum, 2, xt::all()); - const auto spectrum_V = xt::view(computed_spectrum, 3, xt::all()); + const auto spectrum_XX = xt::view(computed_spectrum, 0, xt::all()); + const auto spectrum_XY = xt::view(computed_spectrum, 1, xt::all()); + const auto spectrum_YX = xt::view(computed_spectrum, 2, xt::all()); + const auto spectrum_YY = xt::view(computed_spectrum, 3, xt::all()); for (size_t source_index = 0; source_index < sources.Size(); ++source_index) { const Spectrum &spectrum = sources.spectrums[source_index]; @@ -313,17 +312,17 @@ void PredictPlanExec::Compute( if (compute_stokes_I_only) { auto buffer_I = xt::view(buffer, 0, xt::all(), xt::all()); - ProcessStokesComponent(buffer_I, spectrum_I); + ProcessPolarizationComponent(buffer_I, spectrum_XX); } else { - auto buffer_I = xt::view(buffer, 0, xt::all(), xt::all()); - auto buffer_Q = xt::view(buffer, 1, xt::all(), xt::all()); - auto buffer_U = xt::view(buffer, 2, xt::all(), xt::all()); - auto buffer_V = xt::view(buffer, 3, xt::all(), xt::all()); - - ProcessStokesComponent(buffer_I, spectrum_I); - ProcessStokesComponent(buffer_Q, spectrum_Q); - ProcessStokesComponent(buffer_U, spectrum_U); - ProcessStokesComponent(buffer_V, spectrum_V); + auto buffer_XX = xt::view(buffer, 0, xt::all(), xt::all()); + auto buffer_XY = xt::view(buffer, 1, xt::all(), xt::all()); + auto buffer_YX = xt::view(buffer, 2, xt::all(), xt::all()); + auto buffer_YY = xt::view(buffer, 3, xt::all(), xt::all()); + + ProcessPolarizationComponent(buffer_XX, spectrum_XX); + ProcessPolarizationComponent(buffer_XY, spectrum_XY); + ProcessPolarizationComponent(buffer_YX, spectrum_YX); + ProcessPolarizationComponent(buffer_YY, spectrum_YY); } } // Baselines. @@ -333,14 +332,13 @@ void PredictPlanExec::ComputeWithTarget( const PointSourceCollection &sources, xt::xtensor<std::complex<double>, 3, xt::layout_type::row_major> &buffer, const computation_strategy strat) { - xt::xtensor<std::complex<double>, 2> computed_spectrum( {4, frequencies.size()}); - const auto spectrum_I = xt::view(computed_spectrum, 0, xt::all()); - const auto spectrum_Q = xt::view(computed_spectrum, 1, xt::all()); - const auto spectrum_U = xt::view(computed_spectrum, 2, xt::all()); - const auto spectrum_V = xt::view(computed_spectrum, 3, xt::all()); + const auto spectrum_XX = xt::view(computed_spectrum, 0, xt::all()); + const auto spectrum_XY = xt::view(computed_spectrum, 1, xt::all()); + const auto spectrum_YX = xt::view(computed_spectrum, 2, xt::all()); + const auto spectrum_YY = xt::view(computed_spectrum, 3, xt::all()); for (size_t source_index = 0; source_index < sources.Size(); ++source_index) { const Spectrum &spectrum = sources.spectrums[source_index]; @@ -349,17 +347,17 @@ void PredictPlanExec::ComputeWithTarget( if (compute_stokes_I_only) { auto buffer_I = xt::view(buffer, 0, xt::all(), xt::all()); - ProcessStokesComponent(strat, buffer_I, spectrum_I); + ProcessPolarizationComponent(strat, buffer_I, spectrum_XX); } else { - auto buffer_I = xt::view(buffer, 0, xt::all(), xt::all()); - auto buffer_Q = xt::view(buffer, 1, xt::all(), xt::all()); - auto buffer_U = xt::view(buffer, 2, xt::all(), xt::all()); - auto buffer_V = xt::view(buffer, 3, xt::all(), xt::all()); - - ProcessStokesComponent(strat, buffer_I, spectrum_I); - ProcessStokesComponent(strat, buffer_Q, spectrum_Q); - ProcessStokesComponent(strat, buffer_U, spectrum_U); - ProcessStokesComponent(strat, buffer_V, spectrum_V); + auto buffer_XX = xt::view(buffer, 0, xt::all(), xt::all()); + auto buffer_XY = xt::view(buffer, 1, xt::all(), xt::all()); + auto buffer_YX = xt::view(buffer, 2, xt::all(), xt::all()); + auto buffer_YY = xt::view(buffer, 3, xt::all(), xt::all()); + + ProcessPolarizationComponent(strat, buffer_XX, spectrum_XX); + ProcessPolarizationComponent(strat, buffer_XY, spectrum_XY); + ProcessPolarizationComponent(strat, buffer_YX, spectrum_YX); + ProcessPolarizationComponent(strat, buffer_YY, spectrum_YY); } } // Baselines. @@ -495,21 +493,21 @@ void PredictPlanExec::Compute( const double x_q = (GetShiftData()(q, ch).real()); const double y_q = (GetShiftData()(q, ch).imag()); const double x_c = - (computed_spectrum(StokesComponent::I, ch).real()); + (computed_spectrum(CrossCorrelation::XX, ch).real()); const double y_c = - (computed_spectrum(StokesComponent::I, ch).imag()); + (computed_spectrum(CrossCorrelation::XX, ch).imag()); const double x_c2 = - (computed_spectrum(StokesComponent::Q, ch).real()); + (computed_spectrum(CrossCorrelation::XY, ch).real()); const double y_c2 = - (computed_spectrum(StokesComponent::Q, ch).imag()); + (computed_spectrum(CrossCorrelation::XY, ch).imag()); const double x_c3 = - (computed_spectrum(StokesComponent::U, ch).real()); + (computed_spectrum(CrossCorrelation::YX, ch).real()); const double y_c3 = - (computed_spectrum(StokesComponent::U, ch).imag()); + (computed_spectrum(CrossCorrelation::YX, ch).imag()); const double x_c4 = - (computed_spectrum(StokesComponent::V, ch).real()); + (computed_spectrum(CrossCorrelation::YY, ch).real()); const double y_c4 = - (computed_spectrum(StokesComponent::V, ch).imag()); + (computed_spectrum(CrossCorrelation::YY, ch).imag()); // Compute baseline phase shift. // Compute visibilities. @@ -535,16 +533,16 @@ void PredictPlanExec::Compute( // The following operations are memory-bound, unlike // the compute-bound segment above. By merging these together, // an improvement in speed is expected - buffer(StokesComponent::I, bl, ch) += + buffer(CrossCorrelation::XX, bl, ch) += dcomplex(smear_terms[ch] * temp_prod_real_I, smear_terms[ch] * temp_prod_imag_I); - buffer(StokesComponent::Q, bl, ch) += + buffer(CrossCorrelation::XY, bl, ch) += dcomplex(smear_terms[ch] * temp_prod_real_Q, smear_terms[ch] * temp_prod_imag_Q); - buffer(StokesComponent::U, bl, ch) += + buffer(CrossCorrelation::YX, bl, ch) += dcomplex(smear_terms[ch] * temp_prod_real_U, smear_terms[ch] * temp_prod_imag_U); - buffer(StokesComponent::V, bl, ch) += + buffer(CrossCorrelation::YY, bl, ch) += dcomplex(smear_terms[ch] * temp_prod_real_V, smear_terms[ch] * temp_prod_imag_V); }