From 2fdfa0b4a2647b875ff0d5b0f8c600631f682f86 Mon Sep 17 00:00:00 2001 From: John Romein <romein@astron.nl> Date: Thu, 22 Sep 2022 18:12:01 -0500 Subject: [PATCH] Added support for Jetson AGX Orin. --- libtcc/TCCorrelator.cu | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/libtcc/TCCorrelator.cu b/libtcc/TCCorrelator.cu index f046c90..0f5d9e3 100644 --- a/libtcc/TCCorrelator.cu +++ b/libtcc/TCCorrelator.cu @@ -22,7 +22,7 @@ #error this architecture has no suitable tensor cores #endif -#if __CUDA_ARCH__ != 700 && __CUDA_ARCH__ != 720 && __CUDA_ARCH__ != 750 && __CUDA_ARCH__ != 800 && __CUDA_ARCH__ != 860 +#if __CUDA_ARCH__ != 700 && __CUDA_ARCH__ != 720 && __CUDA_ARCH__ != 750 && __CUDA_ARCH__ != 800 && __CUDA_ARCH__ != 860 && __CUDA_ARCH__ != 870 #define PORTABLE // unknown architecture -> write visibilities in portable way (via shared memory) #endif @@ -317,7 +317,7 @@ __device__ inline void storeVisibilities(Visibilities visibilities, unsigned cha unsigned recvX = firstReceiverX + NR_RECEIVERS_PER_TCM_X * x + ((threadIdx.x >> 2) & 2); unsigned polY = threadIdx.x & 1; unsigned polX = (threadIdx.x >> 1) & 1; -#elif (__CUDA_ARCH__ == 720 && NR_BITS == 8) || __CUDA_ARCH__ == 750 || __CUDA_ARCH__ == 800 || __CUDA_ARCH__ == 860 +#elif (__CUDA_ARCH__ == 720 && NR_BITS == 8) || __CUDA_ARCH__ == 750 || __CUDA_ARCH__ == 800 || __CUDA_ARCH__ == 860 || __CUDA_ARCH__ == 870 unsigned recvY = firstReceiverY + NR_RECEIVERS_PER_TCM_Y * y + ((threadIdx.x >> 3) & 3); unsigned recvX = firstReceiverX + NR_RECEIVERS_PER_TCM_X * x + ((threadIdx.x >> 1) & 1); unsigned polY = (threadIdx.x >> 2) & 1; @@ -331,7 +331,7 @@ __device__ inline void storeVisibilities(Visibilities visibilities, unsigned cha storeVisibility(visibilities, channel, baseline, recvY, recvX, 0, 1, polY, polX, skipCheckY, skipCheckX, sum.x[4], sum.x[5]); storeVisibility(visibilities, channel, baseline, recvY, recvX, 1, 0, polY, polX, skipCheckY, skipCheckX, sum.x[2], sum.x[3]); storeVisibility(visibilities, channel, baseline, recvY, recvX, 1, 1, polY, polX, skipCheckY, skipCheckX, sum.x[6], sum.x[7]); -#elif (__CUDA_ARCH__ == 720 && NR_BITS == 8) || __CUDA_ARCH__ == 750 || __CUDA_ARCH__ == 800 || __CUDA_ARCH__ == 860 +#elif (__CUDA_ARCH__ == 720 && NR_BITS == 8) || __CUDA_ARCH__ == 750 || __CUDA_ARCH__ == 800 || __CUDA_ARCH__ == 860 || __CUDA_ARCH__ == 870 storeVisibility(visibilities, channel, baseline, recvY, recvX, 0, 0, polY, polX, skipCheckY, skipCheckX, sum.x[0], sum.x[1]); #if NR_BITS == 8 || NR_BITS == 16 storeVisibility(visibilities, channel, baseline, recvY, recvX, 0, 2, polY, polX, skipCheckY, skipCheckX, sum.x[4], sum.x[5]); -- GitLab