Skip to content
Snippets Groups Projects
Commit 918bd29e authored by Leon Oostrum's avatar Leon Oostrum
Browse files

Remove temporary syncthreads

parent be3a6f2c
No related branches found
No related tags found
1 merge request!13Draft: AMD support
Pipeline #96846 passed
...@@ -365,8 +365,7 @@ template <bool add>__device__ inline void storeVisibilities(Visibilities visibil ...@@ -365,8 +365,7 @@ template <bool add>__device__ inline void storeVisibilities(Visibilities visibil
#else #else
#ifdef __HIP_PLATFORM_AMD__ #ifdef __HIP_PLATFORM_AMD__
#if NR_BITS == 8 || NR_BITS == 16 #if NR_BITS == 8 || NR_BITS == 16
// swap elements such that each thread holds N/2 real/imag pairs // swap elements such that each thread holds n_element/2 real/imag pairs instead of only real or only imag
// offset is N/2 for even threads, 0 for odd threads
unsigned offset = threadIdx.x % 2 == 0 ? sum.num_elements / 2 : 0; unsigned offset = threadIdx.x % 2 == 0 ? sum.num_elements / 2 : 0;
unsigned reverse_offset = sum.num_elements / 2 - offset; unsigned reverse_offset = sum.num_elements / 2 - offset;
for (unsigned i = offset; i < offset + sum.num_elements / 2; i++) { for (unsigned i = offset; i < offset + sum.num_elements / 2; i++) {
...@@ -393,10 +392,6 @@ template <bool add>__device__ inline void storeVisibilities(Visibilities visibil ...@@ -393,10 +392,6 @@ template <bool add>__device__ inline void storeVisibilities(Visibilities visibil
int offset_real = (threadIdx.x % 2 == 0 ? 0 : -1 * static_cast<int>(sum.num_elements) / 2); int offset_real = (threadIdx.x % 2 == 0 ? 0 : -1 * static_cast<int>(sum.num_elements) / 2);
int offset_imag = (threadIdx.x % 2 == 0 ? sum.num_elements / 2 : 0); int offset_imag = (threadIdx.x % 2 == 0 ? sum.num_elements / 2 : 0);
storeVisibility<add>(visibilities, channel, baseline, recvY, recvX, 0, 0, polY, polX, skipCheckY, skipCheckX, sum.x[i + offset_real], sum.x[i + offset_imag]); storeVisibility<add>(visibilities, channel, baseline, recvY, recvX, 0, 0, polY, polX, skipCheckY, skipCheckX, sum.x[i + offset_real], sum.x[i + offset_imag]);
// ToDo: Figure out why the results are wrong in some cases without this sync. e.g. CorrelatorTest -b 16 -c 1 -n 33 -N 32 -r 1 -R 1 -t 8
#if NR_BITS == 16
// __syncthreads();
#endif
} }
#endif #endif
#else #else
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment