Skip to content
Snippets Groups Projects
Commit 52ac727c authored by John Romein's avatar John Romein
Browse files

Added e4m3 benchmark.

parent dab3a82c
No related branches found
No related tags found
1 merge request!15Fp8
Pipeline #114301 passed
......@@ -8,6 +8,8 @@
#include <cstring>
#include <iostream>
#include <cuda_fp8.h>
#include <cudawrappers/nvrtc.hpp>
#define GNU_SOURCE
......@@ -28,7 +30,7 @@ Benchmark::Benchmark()
using Format = tcc::Format;
for (Format format : { Format::fp16, Format::e4m3, Format::e5m2, Format::i8, Format::i4 })
for (Format format : { Format::fp16, Format::e4m3, Format::i8, Format::i4 }) // e5m2 not tested separately, as it performs equal to e4m3
#pragma omp for collapse(2) schedule(dynamic) ordered
for (unsigned nrReceivers = 1; nrReceivers <= 576; nrReceivers ++)
for (unsigned nrReceiversPerBlock = 32; nrReceiversPerBlock <= 64; nrReceiversPerBlock += 16)
......@@ -43,6 +45,11 @@ Benchmark::Benchmark()
break;
case Format::e4m3 : if (capability >= 90)
doTest<std::complex<__nv_fp8_e4m3>, std::complex<float>>(format, nrReceiversPerBlock, nrReceivers);
break;
case Format::fp16 : if (capability >= 70)
doTest<std::complex<__half>, std::complex<float>>(format, nrReceiversPerBlock, nrReceivers);
......
......@@ -37,11 +37,12 @@ template<> std::complex<int8_t> Benchmark::randomValue<std::complex<int8_t>>()
}
template<> std::complex<__half> Benchmark::randomValue<std::complex<__half>>()
template<typename SampleType> SampleType Benchmark::randomValue()
{
return std::complex<__half>(drand48() - .5, drand48() - .5);
return SampleType((typename SampleType::value_type) (drand48() - .5), (typename SampleType::value_type) (drand48() - .5));
}
template <typename VisibilityType> bool Benchmark::approximates(const VisibilityType &a, const VisibilityType &b) const
{
return a == b;
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment