diff --git a/include/Directions.h b/include/Directions.h index 1534eb8527491a132dbba5d47c7c14ed8ef118f2..b17b97a12774d591cd3d131403a6d7244e1e190e 100644 --- a/include/Directions.h +++ b/include/Directions.h @@ -14,6 +14,7 @@ #include <common/SmartVector.h> #include <vector> +#include <xsimd/types/xsimd_avx512f_register.hpp> #include <xtensor/xmath.hpp> #include <xtensor/xview.hpp> @@ -145,16 +146,18 @@ struct radec2lmn { template <class C, class Tag, class Arch> void radec2lmn::operator()(Arch, const Direction &reference, const C &ra, const C &dec, xt::xtensor<double, 2> &lmn, Tag) { - using b_type = xsimd::batch<double, Arch>; + using b_type = xsimd::batch<double>; std::size_t inc = b_type::size; std::size_t size = ra.size(); - double sin_dec0 = std::sin(reference.dec); - double cos_dec0 = std::cos(reference.dec); + const double sin_dec0 = std::sin(reference.dec); + const double cos_dec0 = std::cos(reference.dec); + + const ConstSmartVectorView<double> ra_view = ra.view(); // size for which the vectorization is possible std::size_t vec_size = size - size % inc; for (std::size_t i = 0; i < vec_size; i += inc) { - const b_type ra_vec = b_type::load(&ra[i], Tag()); - const b_type dec_vec = b_type::load(&dec[i], Tag()); + const b_type ra_vec = b_type::load(&(ra_view[i]), Tag()); + const b_type dec_vec = b_type::load(&(dec[i]), Tag()); const b_type delta_ra = ra_vec - reference.ra; @@ -196,8 +199,22 @@ template <> inline void Directions::radec2lmn<Directions::computation_strategy::MULTI_SIMD>( const Direction &reference, xt::xtensor<double, 2> &lmn) { xt::xtensor<double, 2> lmn_tmp({3, ra.size()}); - xsimd::dispatch(xsimd::radec2lmn{})(reference, ra, dec, lmn_tmp, - xsimd::aligned_mode()); + + // FIXME: we have to make sure that SmartVector is actually aligned. Then we + // can switch back to the aligned mode. The current implementation does not + // guarantee that the data is aligned. + if (reinterpret_cast<std::uintptr_t>(ra.data()) % + xsimd::arch_list<xsimd::avx2>::alignment() != + 0) { + std::cerr << "Warning: ra is not aligned. Using unaligned mode." + << std::endl; + xsimd::dispatch<xsimd::arch_list<xsimd::avx2>>(xsimd::radec2lmn{})( + reference, ra, dec, lmn_tmp, xsimd::unaligned_mode{}); + } else { + xsimd::dispatch<xsimd::arch_list<xsimd::avx2>>(xsimd::radec2lmn{})( + reference, ra, dec, lmn_tmp, xsimd::aligned_mode{}); + } + lmn = xt::transpose(lmn_tmp); } #endif \ No newline at end of file diff --git a/include/common/SmartVector.h b/include/common/SmartVector.h index a8cd1e99e2c29bb8835751c65cd089894ce0652d..b0d4af6a12c3e6249faec484bcdf61cf408000b0 100644 --- a/include/common/SmartVector.h +++ b/include/common/SmartVector.h @@ -18,6 +18,10 @@ using SmartVectorView = decltype(::xt::adapt(std::add_pointer_t<T>{}, std::array<size_t, 1UL>{})); ; +template <typename T> +using ConstSmartVectorView = + decltype(::xt::adapt(std::declval<const T *>(), std::array<size_t, 1>{})); + template <typename T> class SmartVector : public std::vector<T> { public: SmartVector() : std::vector<T>() {} @@ -25,7 +29,7 @@ public: SmartVector(const std::initializer_list<T> &vec) : std::vector<T>(vec) {} SmartVectorView<T> view() { return xt::adapt(this->data(), {this->size()}); } - const SmartVectorView<T> view() const { + ConstSmartVectorView<T> view() const { return xt::adapt(this->data(), {this->size()}); }