diff --git a/.gitmodules b/.gitmodules new file mode 100644 index 0000000000000000000000000000000000000000..5c2de547ee7275e615821173bf63de8982dd7207 --- /dev/null +++ b/.gitmodules @@ -0,0 +1,3 @@ +[submodule "external/cuda-wrappers"] + path = external/cuda-wrappers + url = https://github.com/nlesc-recruit/CUDA-wrappers.git diff --git a/Makefile b/Makefile index 04e5ea223dd5825fa92002528b73ee50fdf05a27..e7f090f1a39230054cc5b819bc3168a58a939cf3 100644 --- a/Makefile +++ b/Makefile @@ -3,7 +3,6 @@ CUDA= $(shell dirname `dirname \`which nvcc\``) #CUDA= /usr/local/cuda CUDA_INCLUDE= $(shell dirname `find $(CUDA) -name cuda.h`) CUDA_LIBDIR= $(shell dirname `find $(CUDA) -name libcuda.so`|head -n1) -NVRTC_INCLUDE= $(shell dirname `find $(CUDA) -name nvrtc.h`) NVRTC_LIBDIR= $(shell dirname `find $(CUDA) -name libnvrtc.so`|head -n1) #POWER_SENSOR= $(HOME)/projects/libpowersensor-master/build ARCH= $(shell arch) @@ -11,16 +10,14 @@ CC= gcc CXX= g++ #-Wno-deprecated-declarations NVCC= nvcc INCLUDES= -I. -INCLUDES+= -I$(CUDA_INCLUDE) -I$(NVRTC_INCLUDE) +INCLUDES+= -I$(CUDA_INCLUDE) #INCLUDES+= -I$(POWER_SENSOR)/include CXXFLAGS+= -std=c++11 -O3 -g -fpic -fopenmp $(INCLUDES) -DNDEBUG NVCCFLAGS= $(INCLUDES) #CXXFLAGS+= -march=core-avx2 -mcmodel=medium -LIBTCC_SOURCES= util/cu.cc\ - util/nvrtc.cc\ - libtcc/CorrelatorKernel.cc\ +LIBTCC_SOURCES= libtcc/CorrelatorKernel.cc\ libtcc/Correlator.cc\ libtcc/Kernel.cc @@ -53,8 +50,14 @@ EXECUTABLES= test/SimpleExample/SimpleExample\ test/CorrelatorTest/CorrelatorTest\ test/OpenCLCorrelatorTest/OpenCLCorrelatorTest +CUDA_WRAPPERS_DIR= external/cuda-wrappers +CUDA_WRAPPERS_LIB= $(CUDA_WRAPPERS_DIR)/libcu.so +CUDA_WRAPPERS_INCLUDE= $(CUDA_WRAPPERS_DIR)/cu +#LIBTCC_OBJECTS+= $(CUDA_WRAPPERS_LIB) + LIBRARIES= -L$(CUDA_LIBDIR) -lcuda\ - -L$(NVRTC_LIBDIR) -lnvrtc #\ + $(CUDA_WRAPPERS_LIB) \ + -L$(NVRTC_LIBDIR) -lnvrtc #-L$(POWER_SENSOR)/lib -lpowersensor #-lnvidia-ml @@ -82,13 +85,17 @@ all:: $(EXECUTABLES) clean:: $(RM) $(OBJECTS) $(SHARED_OBJECTS) $(DEPENDENCIES) $(EXECUTABLES) +$(CUDA_WRAPPERS_LIB): + cd $(CUDA_WRAPPERS_DIR) && cmake . + cd $(CUDA_WRAPPERS_DIR) && CPATH=$(CPATH):$(CUDA_INCLUDE) make + libtcc/TCCorrelator.o: libtcc/TCCorrelator.cu # CUDA code embedded in object file ld -r -b binary -o $@ $< libtcc/TCCorrelator.d: - -libtcc/libtcc.so.$(VERSION): $(LIBTCC_OBJECTS) +libtcc/libtcc.so.$(VERSION): $(LIBTCC_OBJECTS) $(CUDA_WRAPPERS_LIB) $(CXX) -shared -o $@ $^ $(LIBRARIES) test/SimpleExample/SimpleExample: $(SIMPLE_EXAMPLE_OBJECTS) libtcc/libtcc.so diff --git a/README.md b/README.md index 8f020f0e8ac7c8f847ab88590dba43a3a101ec1f..362d4e09392bf4e17b3666de817d8ae6124d8242 100644 --- a/README.md +++ b/README.md @@ -11,6 +11,8 @@ appear in _Astronomy and Astrophysics_ soon. ## Brief overview on how to use the Tensor-Core Correlator library: +Clone the repository (`git clone --recursive`) + Build the library (just type `make`) Include `libtcc/Correlator.h`, and link with `libtcc/libtcc.so`. diff --git a/external/cuda-wrappers b/external/cuda-wrappers new file mode 160000 index 0000000000000000000000000000000000000000..884fbc7c69617f0b4fbc6696435272a488f49716 --- /dev/null +++ b/external/cuda-wrappers @@ -0,0 +1 @@ +Subproject commit 884fbc7c69617f0b4fbc6696435272a488f49716 diff --git a/libtcc/Correlator.h b/libtcc/Correlator.h index 5b9cac6744a1d4f194634e967f30dbf67fe10aaa..0a14267b1d7549e320663485d5213c98d5a71c8f 100644 --- a/libtcc/Correlator.h +++ b/libtcc/Correlator.h @@ -2,8 +2,8 @@ #define TCC_CORRELATOR_H #include "libtcc/CorrelatorKernel.h" -#include "util/cu.h" -#include "util/nvrtc.h" +#include "external/cuda-wrappers/cu/cu.h" +#include "external/cuda-wrappers/cu/nvrtc.h" #include <string> diff --git a/libtcc/Kernel.h b/libtcc/Kernel.h index 63329ee32a91a50e6c261b5f5f48e97bfbe533df..a5d195c21f32e0a30b4f6cfc1a61478e28326dd0 100644 --- a/libtcc/Kernel.h +++ b/libtcc/Kernel.h @@ -1,7 +1,7 @@ #if !defined TCC_KERNEL_H #define TCC_KERNEL_H -#include "util/cu.h" +#include "external/cuda-wrappers/cu/cu.h" #include <stdint.h> diff --git a/test/Common/Record.h b/test/Common/Record.h index 2704748115ff90eaeb62c158377fbb2d00df655d..fa65629c19ef398f97bbd9fcdd0011e14ec62143 100644 --- a/test/Common/Record.h +++ b/test/Common/Record.h @@ -3,7 +3,7 @@ #include "test/Common/Config.h" -#include "util/cu.h" +#include "external/cuda-wrappers/cu/cu.h" #if defined MEASURE_POWER #include <powersensor/NVMLPowerSensor.h> diff --git a/test/Common/UnitTest.h b/test/Common/UnitTest.h index dce8b476d3a008b0c265d245cf640a5f10e47ff8..be3c090f487fae9f0cc757fb196beef8cba25b32 100644 --- a/test/Common/UnitTest.h +++ b/test/Common/UnitTest.h @@ -2,7 +2,7 @@ #define UNIT_TEST_H #include "test/Common/Record.h" -#include "util/cu.h" +#include "external/cuda-wrappers/cu/cu.h" #if defined MEASURE_POWER #include <powersensor/NVMLPowerSensor.h> diff --git a/test/CorrelatorTest/CorrelatorTest.cc b/test/CorrelatorTest/CorrelatorTest.cc index 2735a36378a9545b678b78b089623571e107d66c..33b33d01381d53f22fbbaee48bce2f47b73d6211 100644 --- a/test/CorrelatorTest/CorrelatorTest.cc +++ b/test/CorrelatorTest/CorrelatorTest.cc @@ -2,7 +2,7 @@ #include "test/Common/Record.h" #include "test/CorrelatorTest/CorrelatorTest.h" #include "util/ExceptionPropagator.h" -#include "util/nvrtc.h" +#include "external/cuda-wrappers/cu/nvrtc.h" #include <cstdlib> #include <cstring> diff --git a/util/cu.cc b/util/cu.cc deleted file mode 100644 index 82113bedfe3a17f0c66e300214ca9cb197cb4e35..0000000000000000000000000000000000000000 --- a/util/cu.cc +++ /dev/null @@ -1,37 +0,0 @@ -#include "cu.h" - -#include <iostream> -#include <sstream> - - -namespace cu { - -const char *Error::what() const noexcept -{ - const char *str; - return cuGetErrorString(_result, &str) != CUDA_ERROR_INVALID_VALUE ? str : "unknown error"; -} - - -Context Device::primaryCtxRetain() -{ - CUcontext context; - checkCudaCall(cuDevicePrimaryCtxRetain(&context, _obj)); - return Context(context, *this); -} - - -void Source::compile(const char *output_file_name, const char *compiler_options) -{ - std::stringstream command_line; - command_line << "nvcc -cubin " << compiler_options << " -o " << output_file_name << ' ' << input_file_name; -//#pragma omp critical (clog) - //std::clog << command_line.str() << std::endl; - - int retval = system(command_line.str().c_str()); - - if (WEXITSTATUS(retval) != 0) - throw Error(CUDA_ERROR_INVALID_SOURCE); -} - -} diff --git a/util/cu.h b/util/cu.h deleted file mode 100644 index 72f6cf575a7512b097539202a623b0f089ed951a..0000000000000000000000000000000000000000 --- a/util/cu.h +++ /dev/null @@ -1,674 +0,0 @@ -#if !defined CU_WRAPPER_H -#define CU_WRAPPER_H - -#include <cuda.h> -#include <cuda_runtime_api.h> -#include <exception> -#include <fstream> -#include <memory> -#include <string> -#include <vector> - - -namespace cu { - class Error : public std::exception { - public: - Error(CUresult result) - : - _result(result) - { - } - - virtual const char *what() const noexcept; - - operator CUresult () const - { - return _result; - } - - private: - CUresult _result; - }; - - - inline void checkCudaCall(CUresult result) - { - if (result != CUDA_SUCCESS) - throw Error(result); - } - - - inline void init(unsigned flags = 0) - { - checkCudaCall(cuInit(flags)); - } - - - inline int driverGetVersion() - { - int version; - checkCudaCall(cuDriverGetVersion(&version)); - return version; - } - - inline void memcpyHtoD(CUdeviceptr dst, const void *src, size_t size) - { - checkCudaCall(cuMemcpyHtoD(dst, src, size)); - } - - class Context; - class Stream; - - template <typename T> class Wrapper - { - public: - // conversion to C-style T - - operator T () const - { - return _obj; - } - - operator T () - { - return _obj; - } - - bool operator == (const Wrapper<T> &other) - { - return _obj == other._obj; - } - - bool operator != (const Wrapper<T> &other) - { - return _obj != other._obj; - } - - protected: - Wrapper<T>() - { - } - - Wrapper<T>(const Wrapper<T> &other) - : - _obj(other._obj), - manager(other.manager) - { - } - - Wrapper<T>(Wrapper<T> &&other) - : - _obj(other._obj), - manager(std::move(other.manager)) - { - other._obj = 0; - } - - Wrapper<T>(T &obj) - : - _obj(obj) - { - } - - T _obj; - std::shared_ptr<T> manager; - }; - - class Device : public Wrapper<CUdevice> - { - public: - // Device Management - - Device(int ordinal) - { - checkCudaCall(cuDeviceGet(&_obj, ordinal)); - } - - int getAttribute(CUdevice_attribute attribute) const - { - int value; - checkCudaCall(cuDeviceGetAttribute(&value, attribute, _obj)); - return value; - } - - template <CUdevice_attribute attribute> int getAttribute() const - { - return getAttribute(attribute); - } - - static int getCount() - { - int nrDevices; - checkCudaCall(cuDeviceGetCount(&nrDevices)); - return nrDevices; - } - - std::string getName() const - { - char name[64]; - checkCudaCall(cuDeviceGetName(name, sizeof name, _obj)); - return std::string(name); - } - - size_t totalMem() const - { - size_t size; - checkCudaCall(cuDeviceTotalMem(&size, _obj)); - return size; - } - - - // Primary Context Management - - std::pair<unsigned, bool> primaryCtxGetState() const - { - unsigned flags; - int active; - checkCudaCall(cuDevicePrimaryCtxGetState(_obj, &flags, &active)); - return std::pair<unsigned, bool>(flags, active); - } - - // void primaryCtxRelease() not available; it is released on destruction of the Context returned by Device::primaryContextRetain() - - void primaryCtxReset() - { - checkCudaCall(cuDevicePrimaryCtxReset(_obj)); - } - - Context primaryCtxRetain(); // retain this context until the primary context can be released - - void primaryCtxSetFlags(unsigned flags) - { - checkCudaCall(cuDevicePrimaryCtxSetFlags(_obj, flags)); - } - }; - - - class Context : public Wrapper<CUcontext> - { - public: - // Context Management - - Context(int flags, Device &device) - : - _primaryContext(false) - { - checkCudaCall(cuCtxCreate(&_obj, flags, device)); - manager = std::shared_ptr<CUcontext>(new CUcontext(_obj), [] (CUcontext *ptr) { if (*ptr) cuCtxDestroy(*ptr); delete ptr; }); - } - - Context(CUcontext context) - : - Wrapper<CUcontext>(context), - _primaryContext(false) - { - } - - unsigned getApiVersion() const - { - unsigned version; - checkCudaCall(cuCtxGetApiVersion(_obj, &version)); - return version; - } - - static CUfunc_cache getCacheConfig() - { - CUfunc_cache config; - checkCudaCall(cuCtxGetCacheConfig(&config)); - return config; - } - - static void setCacheConfig(CUfunc_cache config) - { - checkCudaCall(cuCtxSetCacheConfig(config)); - } - - static Context getCurrent() - { - CUcontext context; - checkCudaCall(cuCtxGetCurrent(&context)); - return std::move(Context(context)); - } - - void setCurrent() const - { - checkCudaCall(cuCtxSetCurrent(_obj)); - } - - void pushCurrent() - { - checkCudaCall(cuCtxPushCurrent(_obj)); - } - - static Context popCurrent() - { - CUcontext context; - checkCudaCall(cuCtxPopCurrent(&context)); - return Context(context); - } - - void setSharedMemConfig(CUsharedconfig config) - { - checkCudaCall(cuCtxSetSharedMemConfig(config)); - } - - static Device getDevice() - { - CUdevice device; - checkCudaCall(cuCtxGetDevice(&device)); - return Device(device); // FIXME: ~Device() - } - - static size_t getLimit(CUlimit limit) - { - size_t value; - checkCudaCall(cuCtxGetLimit(&value, limit)); - return value; - } - - template <CUlimit limit> static size_t getLimit() - { - return getLimit(limit); - } - - static void setLimit(CUlimit limit, size_t value) - { - checkCudaCall(cuCtxSetLimit(limit, value)); - } - - template <CUlimit limit> static void setLimit(size_t value) - { - setLimit(limit, value); - } - - static void synchronize() - { - checkCudaCall(cuCtxSynchronize()); - } - - private: - friend class Device; - Context(CUcontext context, Device &device) - : - Wrapper<CUcontext>(context), - _primaryContext(true) - { - } - - bool _primaryContext; - }; - - - class HostMemory : public Wrapper<void *> - { - public: - HostMemory(size_t size, int flags = 0) - { - checkCudaCall(cuMemHostAlloc(&_obj, size, flags)); - manager = std::shared_ptr<void *>(new (void *)(_obj), [] (void **ptr) { cuMemFreeHost(*ptr); delete ptr; }); - } - - template <typename T> operator T * () - { - return static_cast<T *>(_obj); - } - }; - - - class DeviceMemory : public Wrapper<CUdeviceptr> - { - public: - DeviceMemory(size_t size) - { - checkCudaCall(cuMemAlloc(&_obj, size)); - manager = std::shared_ptr<CUdeviceptr>(new CUdeviceptr(_obj), [] (CUdeviceptr *ptr) { cuMemFree(*ptr); delete ptr; }); - } - - DeviceMemory(CUdeviceptr ptr) - : - Wrapper(ptr) - { - } - - DeviceMemory(const HostMemory &hostMemory) - { - checkCudaCall(cuMemHostGetDevicePointer(&_obj, hostMemory, 0)); - } - - const void *parameter() const // used to construct parameter list for launchKernel(); - { - return &_obj; - } - }; - - - class Array : public Wrapper<CUarray> - { - public: - Array(unsigned width, CUarray_format format, unsigned numChannels) - { - Array(width, 0, format, numChannels); - manager = std::shared_ptr<CUarray>(new CUarray(_obj), [] (CUarray *ptr) { cuArrayDestroy(*ptr); delete ptr; }); - } - - Array(unsigned width, unsigned height, CUarray_format format, unsigned numChannels) - { - CUDA_ARRAY_DESCRIPTOR descriptor; - descriptor.Width = width; - descriptor.Height = height; - descriptor.Format = format; - descriptor.NumChannels = numChannels; - checkCudaCall(cuArrayCreate(&_obj, &descriptor)); - manager = std::shared_ptr<CUarray>(new CUarray(_obj), [] (CUarray *ptr) { cuArrayDestroy(*ptr); delete ptr; }); - } - - Array(unsigned width, unsigned height, unsigned depth, CUarray_format format, unsigned numChannels) - { - CUDA_ARRAY3D_DESCRIPTOR descriptor; - descriptor.Width = width; - descriptor.Height = height; - descriptor.Depth = depth; - descriptor.Format = format; - descriptor.NumChannels = numChannels; - descriptor.Flags = 0; - checkCudaCall(cuArray3DCreate(&_obj, &descriptor)); - manager = std::shared_ptr<CUarray>(new CUarray(_obj), [] (CUarray *ptr) { cuArrayDestroy(*ptr); delete ptr; }); - } - - Array(CUarray &array) - : - Wrapper(array) - { - } - }; - - - class Source - { - public: - Source(const char *input_file_name) - : - input_file_name(input_file_name) - { - } - - void compile(const char *ptx_name, const char *compile_options = 0); - - private: - const char *input_file_name; - }; - - - class Module : public Wrapper<CUmodule> - { - public: - Module(const char *file_name) - { -#if defined TEGRA_QUIRKS // cuModuleLoad broken on Jetson TX1 - std::ifstream file(file_name); - std::string program((std::istreambuf_iterator<char>(file)), std::istreambuf_iterator<char>()); - checkCudaCall(cuModuleLoadData(&_obj, program.c_str())); -#else - checkCudaCall(cuModuleLoad(&_obj, file_name)); -#endif - manager = std::shared_ptr<CUmodule>(new CUmodule(_obj), [] (CUmodule *ptr) { cuModuleUnload(*ptr); delete ptr; }); - } - - Module(const void *data) - { - checkCudaCall(cuModuleLoadData(&_obj, data)); - manager = std::shared_ptr<CUmodule>(new CUmodule(_obj), [] (CUmodule *ptr) { cuModuleUnload(*ptr); delete ptr; }); - } - - Module(CUmodule &module) - : - Wrapper(module) - { - } - -#if 0 - TexRef getTexRef(const char *name) const - { - CUtexref texref; - checkCudaCall(cuModuleGetTexRef(&texref, _obj, name)); - return TexRef(texref); - } -#endif - - CUdeviceptr getGlobal(const char *name) const - { - CUdeviceptr deviceptr; - checkCudaCall(cuModuleGetGlobal(&deviceptr, nullptr, _obj, name)); - return deviceptr; - } - }; - - - class Function : public Wrapper<CUfunction> - { - public: - Function(const Module &module, const char *name) - { - checkCudaCall(cuModuleGetFunction(&_obj, module, name)); - } - - Function(CUfunction &function) - : - Wrapper(function) - { - } - - int getAttribute(CUfunction_attribute attribute) - { - int value; - checkCudaCall(cuFuncGetAttribute(&value, attribute, _obj)); - return value; - } - - void setCacheConfig(CUfunc_cache config) - { - checkCudaCall(cuFuncSetCacheConfig(_obj, config)); - } - }; - - - class Event : public Wrapper<CUevent> - { - public: - Event(int flags = CU_EVENT_DEFAULT) - { - checkCudaCall(cuEventCreate(&_obj, flags)); - manager = std::shared_ptr<CUevent>(new CUevent(_obj), [] (CUevent *ptr) { cuEventDestroy(*ptr); delete ptr; }); - } - - Event(CUevent &event) - : - Wrapper(event) - { - } - - float elapsedTime(const Event &start) const - { - float ms; - checkCudaCall(cuEventElapsedTime(&ms, start, _obj)); - return ms; - } - - void query() const - { - checkCudaCall(cuEventQuery(_obj)); // unsuccessful result throws cu::Error - } - - void record() - { - checkCudaCall(cuEventRecord(_obj, 0)); - } - - void record(Stream &); - - void synchronize() - { - checkCudaCall(cuEventSynchronize(_obj)); - } - }; - - - class Stream : public Wrapper<CUstream> - { - friend class Event; - - public: - Stream(int flags = CU_STREAM_DEFAULT) - { - checkCudaCall(cuStreamCreate(&_obj, flags)); - manager = std::shared_ptr<CUstream>(new CUstream(_obj), [] (CUstream *ptr) { cuStreamDestroy(*ptr); delete ptr; }); - } - - Stream(CUstream stream) - : - Wrapper<CUstream>(stream) - { - } - - void memcpyHtoDAsync(CUdeviceptr devPtr, const void *hostPtr, size_t size) - { - checkCudaCall(cuMemcpyHtoDAsync(devPtr, hostPtr, size, _obj)); - } - - void memcpyDtoHAsync(void *hostPtr, CUdeviceptr devPtr, size_t size) - { - checkCudaCall(cuMemcpyDtoHAsync(hostPtr, devPtr, size, _obj)); - } - - void launchKernel(Function &function, unsigned gridX, unsigned gridY, unsigned gridZ, unsigned blockX, unsigned blockY, unsigned blockZ, unsigned sharedMemBytes, const std::vector<const void *> ¶meters) - { - checkCudaCall(cuLaunchKernel(function, gridX, gridY, gridZ, blockX, blockY, blockZ, sharedMemBytes, _obj, const_cast<void **>(¶meters[0]), 0)); - } - -#if CUDART_VERSION >= 9000 - void launchCooperativeKernel(Function &function, unsigned gridX, unsigned gridY, unsigned gridZ, unsigned blockX, unsigned blockY, unsigned blockZ, unsigned sharedMemBytes, const std::vector<const void *> ¶meters) - { - checkCudaCall(cuLaunchCooperativeKernel(function, gridX, gridY, gridZ, blockX, blockY, blockZ, sharedMemBytes, _obj, const_cast<void **>(¶meters[0]))); - } -#endif - - void query() - { - checkCudaCall(cuStreamQuery(_obj)); // unsuccessful result throws cu::Error - } - - void synchronize() - { - checkCudaCall(cuStreamSynchronize(_obj)); - } - - void wait(Event &event) - { - checkCudaCall(cuStreamWaitEvent(_obj, event, 0)); - } - - void addCallback(CUstreamCallback callback, void *userData, int flags = 0) - { - checkCudaCall(cuStreamAddCallback(_obj, callback, userData, flags)); - } - - void record(Event &event) - { - checkCudaCall(cuEventRecord(event, _obj)); - } - - void batchMemOp(unsigned count, CUstreamBatchMemOpParams *paramArray, unsigned flags) - { - checkCudaCall(cuStreamBatchMemOp(_obj, count, paramArray, flags)); - } - - void waitValue32(CUdeviceptr addr, cuuint32_t value, unsigned flags) const - { - checkCudaCall(cuStreamWaitValue32(_obj, addr, value, flags)); - } - - void writeValue32(CUdeviceptr addr, cuuint32_t value, unsigned flags) - { - checkCudaCall(cuStreamWriteValue32(_obj, addr, value, flags)); - } - }; - -#if 0 - class Graph : public Wrapper<CUgraph> - { - public: - class GraphNode : public Wrapper<CUgraphNode> - { - }; - - class ExecKernelNode : public GraphNode - { - }; - - class KernelNodeParams : public Wrapper<CUDA_KERNEL_NODE_PARAMS> - { - public: - KernelNodeParams(const Function &function, - unsigned gridDimX, unsigned gridDimY, unsigned gridDimZ, - unsigned blockDimX, unsigned blockDimY, unsigned blockDimZ, - unsigned sharedMemBytes, - const std::vector<const void *> &kernelParams) - { - _obj.func = function; - _obj.blockDimX = blockDimX; - _obj.blockDimY = blockDimY; - _obj.blockDimZ = blockDimZ; - _obj.gridDimX = gridDimX; - _obj.gridDimY = gridDimY; - _obj.gridDimZ = gridDimZ; - _obj.sharedMemBytes = sharedMemBytes; - _obj.kernelParams = const_cast<void **>(kernelParams.data()); - _obj.extra = nullptr; - } - }; - - class Exec : public Wrapper<CUgraphExec> - { - public: - void launch(Stream &stream) - { - checkCudaCall(cuGraphLaunch(_obj, stream)); - } - }; - - Graph(unsigned flags = 0) - { - checkCudaCall(cuGraphCreate(&_obj, flags)); - manager = std::shared_ptr<CUgraphNode>(new CUgraphNode(_obj), [] (CUgraphNode *ptr) { cuGraphDestroy(*ptr); delete ptr; }); - } - - Graph(CUgraph &graph) - : - Wrapper(graph) - { - } - - ExecKernelNode addKernelNode(/* std::vector<GraphNode> dependencies, */ const KernelNodeParams &kernelArgs) - { - ExecKernelNode node; - checkCudaCall(cuGraphAddKernelNode(& (CUgraphNode &) node, _obj, nullptr, 0, & (const CUDA_KERNEL_NODE_PARAMS &) kernelArgs)); - return node; - } - - Exec instantiate() - { - Exec exec; - checkCudaCall(cuGraphInstantiate(& (CUgraphExec &) exec, _obj, nullptr, nullptr, 0)); - return exec; - } - }; -#endif - - - inline void Event::record(Stream &stream) - { - checkCudaCall(cuEventRecord(_obj, stream._obj)); - } -} - -#endif diff --git a/util/nvrtc.cc b/util/nvrtc.cc deleted file mode 100644 index b6ac88599a78ba2cad86136e2013674d52b30943..0000000000000000000000000000000000000000 --- a/util/nvrtc.cc +++ /dev/null @@ -1,11 +0,0 @@ -#include "nvrtc.h" - - -namespace nvrtc { - -const char *Error::what() const noexcept -{ - return nvrtcGetErrorString(_result); -} - -} diff --git a/util/nvrtc.h b/util/nvrtc.h deleted file mode 100644 index f429a737bb5f8bdd05fb2c798bb970c50ad4b924..0000000000000000000000000000000000000000 --- a/util/nvrtc.h +++ /dev/null @@ -1,109 +0,0 @@ -#if !defined NVRTC_H -#define NVRTC_H - -#include <cuda.h> -#include <nvrtc.h> - -#include <algorithm> -#include <exception> -#include <fstream> -#include <iterator> -#include <string> -#include <vector> - - -namespace nvrtc { - class Error : public std::exception { - public: - Error(nvrtcResult result) - : - _result(result) - { - } - - virtual const char *what() const noexcept; - - operator nvrtcResult () const - { - return _result; - } - - private: - nvrtcResult _result; - }; - - - inline void checkNvrtcCall(nvrtcResult result) - { - if (result != NVRTC_SUCCESS) - throw Error(result); - } - - - class Program { - public: - Program(const std::string &src, const std::string &name, int numHeaders = 0, const char *headers[] = nullptr, const char *includeNames[] = nullptr) // TODO: use std::vector<std::string> - { - checkNvrtcCall(nvrtcCreateProgram(&program, src.c_str(), name.c_str(), numHeaders, headers, includeNames)); - } - - Program(const std::string &filename) - { - std::ifstream ifs(filename); - std::string source(std::istreambuf_iterator<char>{ifs}, {}); - checkNvrtcCall(nvrtcCreateProgram(&program, source.c_str(), filename.c_str(), 0, nullptr, nullptr)); - } - - ~Program() - { - checkNvrtcCall(nvrtcDestroyProgram(&program)); - } - - void compile(const std::vector<std::string> &options) - { - std::vector<const char *> c_options; - std::transform(options.begin(), options.end(), std::back_inserter(c_options), [] (const std::string &option) { return option.c_str();}); - checkNvrtcCall(nvrtcCompileProgram(program, c_options.size(), c_options.data())); - } - - std::string getPTX() - { - size_t size; - std::string ptx; - - checkNvrtcCall(nvrtcGetPTXSize(program, &size)); - ptx.resize(size); - checkNvrtcCall(nvrtcGetPTX(program, &ptx[0])); - return ptx; - } - -#if CUDA_VERSION >= 11020 - std::vector<char> getCUBIN() - { - size_t size; - std::vector<char> cubin; - - checkNvrtcCall(nvrtcGetCUBINSize(program, &size)); - cubin.resize(size); - checkNvrtcCall(nvrtcGetCUBIN(program, &cubin[0])); - return cubin; - } -#endif - - std::string getLog() - { - size_t size; - std::string log; - - checkNvrtcCall(nvrtcGetProgramLogSize(program, &size)); - log.resize(size); - checkNvrtcCall(nvrtcGetProgramLog(program, &log[0])); - return log; - } - - private: - nvrtcProgram program; - }; -} - -#endif