Skip to content
Snippets Groups Projects
Commit 52ac727c authored by John Romein's avatar John Romein
Browse files

Added e4m3 benchmark.

parent dab3a82c
No related branches found
No related tags found
1 merge request!15Fp8
Pipeline #114301 passed
...@@ -8,6 +8,8 @@ ...@@ -8,6 +8,8 @@
#include <cstring> #include <cstring>
#include <iostream> #include <iostream>
#include <cuda_fp8.h>
#include <cudawrappers/nvrtc.hpp> #include <cudawrappers/nvrtc.hpp>
#define GNU_SOURCE #define GNU_SOURCE
...@@ -28,7 +30,7 @@ Benchmark::Benchmark() ...@@ -28,7 +30,7 @@ Benchmark::Benchmark()
using Format = tcc::Format; using Format = tcc::Format;
for (Format format : { Format::fp16, Format::e4m3, Format::e5m2, Format::i8, Format::i4 }) for (Format format : { Format::fp16, Format::e4m3, Format::i8, Format::i4 }) // e5m2 not tested separately, as it performs equal to e4m3
#pragma omp for collapse(2) schedule(dynamic) ordered #pragma omp for collapse(2) schedule(dynamic) ordered
for (unsigned nrReceivers = 1; nrReceivers <= 576; nrReceivers ++) for (unsigned nrReceivers = 1; nrReceivers <= 576; nrReceivers ++)
for (unsigned nrReceiversPerBlock = 32; nrReceiversPerBlock <= 64; nrReceiversPerBlock += 16) for (unsigned nrReceiversPerBlock = 32; nrReceiversPerBlock <= 64; nrReceiversPerBlock += 16)
...@@ -43,6 +45,11 @@ Benchmark::Benchmark() ...@@ -43,6 +45,11 @@ Benchmark::Benchmark()
break; break;
case Format::e4m3 : if (capability >= 90)
doTest<std::complex<__nv_fp8_e4m3>, std::complex<float>>(format, nrReceiversPerBlock, nrReceivers);
break;
case Format::fp16 : if (capability >= 70) case Format::fp16 : if (capability >= 70)
doTest<std::complex<__half>, std::complex<float>>(format, nrReceiversPerBlock, nrReceivers); doTest<std::complex<__half>, std::complex<float>>(format, nrReceiversPerBlock, nrReceivers);
......
...@@ -37,11 +37,12 @@ template<> std::complex<int8_t> Benchmark::randomValue<std::complex<int8_t>>() ...@@ -37,11 +37,12 @@ template<> std::complex<int8_t> Benchmark::randomValue<std::complex<int8_t>>()
} }
template<> std::complex<__half> Benchmark::randomValue<std::complex<__half>>() template<typename SampleType> SampleType Benchmark::randomValue()
{ {
return std::complex<__half>(drand48() - .5, drand48() - .5); return SampleType((typename SampleType::value_type) (drand48() - .5), (typename SampleType::value_type) (drand48() - .5));
} }
template <typename VisibilityType> bool Benchmark::approximates(const VisibilityType &a, const VisibilityType &b) const template <typename VisibilityType> bool Benchmark::approximates(const VisibilityType &a, const VisibilityType &b) const
{ {
return a == b; return a == b;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment