Skip to content
Snippets Groups Projects
Commit 3250cec0 authored by John Romein's avatar John Romein
Browse files

Added support for sm_120 (consumer-grade Blackwell)

parent 01a863c8
Branches
No related tags found
No related merge requests found
Pipeline #111746 passed
...@@ -22,7 +22,7 @@ ...@@ -22,7 +22,7 @@
#error this architecture has no suitable tensor cores #error this architecture has no suitable tensor cores
#endif #endif
#if __CUDA_ARCH__ != 700 && __CUDA_ARCH__ != 720 && __CUDA_ARCH__ != 750 && __CUDA_ARCH__ != 800 && __CUDA_ARCH__ != 860 && __CUDA_ARCH__ != 870 && __CUDA_ARCH__ != 890 && __CUDA_ARCH__ != 900 #if __CUDA_ARCH__ != 700 && __CUDA_ARCH__ != 720 && __CUDA_ARCH__ != 750 && __CUDA_ARCH__ != 800 && __CUDA_ARCH__ != 860 && __CUDA_ARCH__ != 870 && __CUDA_ARCH__ != 890 && __CUDA_ARCH__ != 900 && __CUDA_ARCH__ != 1200
#define PORTABLE // unknown architecture -> write visibilities in portable way (via shared memory) #define PORTABLE // unknown architecture -> write visibilities in portable way (via shared memory)
#endif #endif
...@@ -334,7 +334,7 @@ template <bool add>__device__ inline void storeVisibilities(Visibilities visibil ...@@ -334,7 +334,7 @@ template <bool add>__device__ inline void storeVisibilities(Visibilities visibil
storeVisibility<add>(visibilities, channel, recvY + 0, recvX + 1, polY, polX, skipCheckY, skipCheckX, sum.x[4], sum.x[5]); storeVisibility<add>(visibilities, channel, recvY + 0, recvX + 1, polY, polX, skipCheckY, skipCheckX, sum.x[4], sum.x[5]);
storeVisibility<add>(visibilities, channel, recvY + 1, recvX + 0, polY, polX, skipCheckY, skipCheckX, sum.x[2], sum.x[3]); storeVisibility<add>(visibilities, channel, recvY + 1, recvX + 0, polY, polX, skipCheckY, skipCheckX, sum.x[2], sum.x[3]);
storeVisibility<add>(visibilities, channel, recvY + 1, recvX + 1, polY, polX, skipCheckY, skipCheckX, sum.x[6], sum.x[7]); storeVisibility<add>(visibilities, channel, recvY + 1, recvX + 1, polY, polX, skipCheckY, skipCheckX, sum.x[6], sum.x[7]);
#elif (__CUDA_ARCH__ == 720 && NR_BITS == 8) || __CUDA_ARCH__ == 750 || __CUDA_ARCH__ == 800 || __CUDA_ARCH__ == 860 || __CUDA_ARCH__ == 870 || __CUDA_ARCH__ == 890 || __CUDA_ARCH__ == 900 #elif (__CUDA_ARCH__ == 720 && NR_BITS == 8) || __CUDA_ARCH__ == 750 || __CUDA_ARCH__ == 800 || __CUDA_ARCH__ == 860 || __CUDA_ARCH__ == 870 || __CUDA_ARCH__ == 890 || __CUDA_ARCH__ == 900 || __CUDA_ARCH__ == 1200
unsigned polY = (threadIdx.x >> 2) & 1; unsigned polY = (threadIdx.x >> 2) & 1;
unsigned polX = threadIdx.x & 1; unsigned polX = threadIdx.x & 1;
unsigned recvY = firstReceiverY + NR_RECEIVERS_PER_TCM_Y * y + ((threadIdx.x >> 3) & 3); unsigned recvY = firstReceiverY + NR_RECEIVERS_PER_TCM_Y * y + ((threadIdx.x >> 3) & 3);
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment