Skip to content
Snippets Groups Projects
Commit ff225d9d authored by John Romein's avatar John Romein
Browse files

storeVisibility() now has recvX, recvY as arguments, instead of baseline number.

parent 5d873e92
No related branches found
No related tags found
No related merge requests found
Pipeline #101197 passed
......@@ -265,8 +265,10 @@ __device__ inline float2 make_complex(float real, float imag)
CUSTOM_STORE_VISIBILITY
#else
template <bool add> __device__ inline void storeVisibility(Visibilities visibilities, unsigned channel, unsigned baseline, unsigned polY, unsigned polX, Visibility visibility)
template <bool add> __device__ inline void storeVisibility(Visibilities visibilities, unsigned channel, unsigned recvY, unsigned recvX, unsigned polY, unsigned polX, Visibility visibility)
{
unsigned baseline = (recvX * (recvX + 1) / 2) + recvY;
if (add)
visibilities[channel][baseline][polY][polX] += visibility;
else
......@@ -276,10 +278,10 @@ template <bool add> __device__ inline void storeVisibility(Visibilities visibili
#endif
template <bool add, typename T> __device__ inline void storeVisibility(Visibilities visibilities, unsigned channel, unsigned baseline, unsigned recvY, unsigned recvX, unsigned tcY, unsigned tcX, unsigned polY, unsigned polX, bool skipCheckY, bool skipCheckX, T sumR, T sumI)
template <bool add, typename T> __device__ inline void storeVisibility(Visibilities visibilities, unsigned channel, unsigned recvY, unsigned recvX, unsigned polY, unsigned polX, bool skipCheckY, bool skipCheckX, T sumR, T sumI)
{
if ((skipCheckY || recvY + tcY <= recvX + tcX) && (skipCheckX || recvX + tcX < NR_RECEIVERS))
storeVisibility<add>(visibilities, channel, baseline + tcX * recvX + tcX * (tcX + 1) / 2 + tcY, polY, polX, make_complex(sumR, sumI));
if ((skipCheckY || recvY <= recvX) && (skipCheckX || recvX < NR_RECEIVERS))
storeVisibility<add>(visibilities, channel, recvY, recvX, polY, polX, make_complex(sumR, sumI));
}
......@@ -323,32 +325,28 @@ template <bool add>__device__ inline void storeVisibilities(Visibilities visibil
#endif
#else
#if __CUDA_ARCH__ == 700 || (__CUDA_ARCH__ == 720 && NR_BITS == 16)
unsigned recvY = firstReceiverY + NR_RECEIVERS_PER_TCM_Y * y + ((threadIdx.x >> 3) & 2) + (threadIdx.x & 4);
unsigned recvX = firstReceiverX + NR_RECEIVERS_PER_TCM_X * x + ((threadIdx.x >> 2) & 2);
unsigned polY = threadIdx.x & 1;
unsigned polX = (threadIdx.x >> 1) & 1;
unsigned recvY = firstReceiverY + NR_RECEIVERS_PER_TCM_Y * y + ((threadIdx.x >> 3) & 2) + (threadIdx.x & 4);
unsigned recvX = firstReceiverX + NR_RECEIVERS_PER_TCM_X * x + ((threadIdx.x >> 2) & 2);
storeVisibility<add>(visibilities, channel, recvY + 0, recvX + 0, polY, polX, skipCheckY, skipCheckX, sum.x[0], sum.x[1]);
storeVisibility<add>(visibilities, channel, recvY + 0, recvX + 1, polY, polX, skipCheckY, skipCheckX, sum.x[4], sum.x[5]);
storeVisibility<add>(visibilities, channel, recvY + 1, recvX + 0, polY, polX, skipCheckY, skipCheckX, sum.x[2], sum.x[3]);
storeVisibility<add>(visibilities, channel, recvY + 1, recvX + 1, polY, polX, skipCheckY, skipCheckX, sum.x[6], sum.x[7]);
#elif (__CUDA_ARCH__ == 720 && NR_BITS == 8) || __CUDA_ARCH__ == 750 || __CUDA_ARCH__ == 800 || __CUDA_ARCH__ == 860 || __CUDA_ARCH__ == 870 || __CUDA_ARCH__ == 890 || __CUDA_ARCH__ == 900
unsigned recvY = firstReceiverY + NR_RECEIVERS_PER_TCM_Y * y + ((threadIdx.x >> 3) & 3);
unsigned recvX = firstReceiverX + NR_RECEIVERS_PER_TCM_X * x + ((threadIdx.x >> 1) & 1);
unsigned polY = (threadIdx.x >> 2) & 1;
unsigned polX = threadIdx.x & 1;
#endif
unsigned baseline = (recvX * (recvX + 1) / 2) + recvY;
unsigned recvY = firstReceiverY + NR_RECEIVERS_PER_TCM_Y * y + ((threadIdx.x >> 3) & 3);
unsigned recvX = firstReceiverX + NR_RECEIVERS_PER_TCM_X * x + ((threadIdx.x >> 1) & 1);
#if __CUDA_ARCH__ == 700 || (__CUDA_ARCH__ == 720 && NR_BITS == 16)
storeVisibility<add>(visibilities, channel, baseline, recvY, recvX, 0, 0, polY, polX, skipCheckY, skipCheckX, sum.x[0], sum.x[1]);
storeVisibility<add>(visibilities, channel, baseline, recvY, recvX, 0, 1, polY, polX, skipCheckY, skipCheckX, sum.x[4], sum.x[5]);
storeVisibility<add>(visibilities, channel, baseline, recvY, recvX, 1, 0, polY, polX, skipCheckY, skipCheckX, sum.x[2], sum.x[3]);
storeVisibility<add>(visibilities, channel, baseline, recvY, recvX, 1, 1, polY, polX, skipCheckY, skipCheckX, sum.x[6], sum.x[7]);
#elif (__CUDA_ARCH__ == 720 && NR_BITS == 8) || __CUDA_ARCH__ == 750 || __CUDA_ARCH__ == 800 || __CUDA_ARCH__ == 860 || __CUDA_ARCH__ == 870 || __CUDA_ARCH__ == 890 || __CUDA_ARCH__ == 900
storeVisibility<add>(visibilities, channel, baseline, recvY, recvX, 0, 0, polY, polX, skipCheckY, skipCheckX, sum.x[0], sum.x[1]);
storeVisibility<add>(visibilities, channel, recvY + 0, recvX + 0, polY, polX, skipCheckY, skipCheckX, sum.x[0], sum.x[1]);
#if NR_BITS == 8 || NR_BITS == 16
storeVisibility<add>(visibilities, channel, baseline, recvY, recvX, 0, 2, polY, polX, skipCheckY, skipCheckX, sum.x[4], sum.x[5]);
storeVisibility<add>(visibilities, channel, recvY + 0, recvX + 2, polY, polX, skipCheckY, skipCheckX, sum.x[4], sum.x[5]);
#endif
storeVisibility<add>(visibilities, channel, baseline, recvY, recvX, 4, 0, polY, polX, skipCheckY, skipCheckX, sum.x[2], sum.x[3]);
storeVisibility<add>(visibilities, channel, recvY + 4, recvX + 0, polY, polX, skipCheckY, skipCheckX, sum.x[2], sum.x[3]);
#if NR_BITS == 8 || NR_BITS == 16
storeVisibility<add>(visibilities, channel, baseline, recvY, recvX, 4, 2, polY, polX, skipCheckY, skipCheckX, sum.x[6], sum.x[7]);
storeVisibility<add>(visibilities, channel, recvY + 4, recvX + 2, polY, polX, skipCheckY, skipCheckX, sum.x[6], sum.x[7]);
#endif
#endif
#endif
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment