Skip to content
Snippets Groups Projects
Commit 8a3c26ec authored by Bram Veenboer's avatar Bram Veenboer Committed by Bram Veenboer
Browse files

Remove multiplication with inv

parent 8c061688
No related branches found
No related tags found
1 merge request!4Add matrixMultiplyAVX2b
......@@ -132,7 +132,6 @@ void matrixMultiplyAVX2b(const std::complex<float>* a,
__m256i a_4_ind = _mm256_set_epi32(7, 7, 7, 7, 3, 3, 3, 3);
__m256i b_4_ind = _mm256_set_epi32(6, 7, 4, 5, 6, 7, 4, 5);
__m256 inv = _mm256_set_ps(1., -1., 1., -1., 1., -1., 1., -1.);
__m256 a_1 = _mm256_permutevar8x32_ps(a_m, a_1_ind);
__m256 b_1 = _mm256_permutevar8x32_ps(b_m, b_1_ind);
......@@ -140,16 +139,14 @@ void matrixMultiplyAVX2b(const std::complex<float>* a,
__m256 b_2 = _mm256_permutevar8x32_ps(b_m, b_2_ind);
__m256 a_3 = _mm256_permutevar8x32_ps(a_m, a_3_ind);
__m256 a_3_inv = _mm256_mul_ps(a_3, inv);
__m256 b_3 = _mm256_permutevar8x32_ps(b_m, b_3_ind);
__m256 a_4 = _mm256_permutevar8x32_ps(a_m, a_4_ind);
__m256 a_4_inv = _mm256_mul_ps(a_4, inv);
__m256 b_4 = _mm256_permutevar8x32_ps(b_m, b_4_ind);
__m256 c_m = _mm256_mul_ps(a_1, b_1);
c_m = _mm256_fmadd_ps(a_2, b_2, c_m);
c_m = _mm256_fmadd_ps(a_3_inv, b_3, c_m);
c_m = _mm256_fmadd_ps(a_4_inv, b_4, c_m);
c_m = _mm256_add_ps(_mm256_mul_ps(a_2, b_2), c_m);
c_m = _mm256_addsub_ps(c_m, _mm256_mul_ps(a_3, b_3));
c_m = _mm256_addsub_ps(c_m, _mm256_mul_ps(a_4, b_4));
_mm256_store_ps(c_ptr, c_m);
}
\ No newline at end of file
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment