Skip to content
Snippets Groups Projects
Commit 8927f21d authored by Mattia Mancini's avatar Mattia Mancini
Browse files

Add small test of a graph

parents
No related branches found
No related tags found
No related merge requests found
cmake_minimum_required(VERSION 3.17 FATAL_ERROR)
project(cmake_and_cuda LANGUAGES CXX CUDA)
# Find out if cudawrappers if used as a dependency. Build tests by default only
# if not used as a dependency.
if(NOT DEFINED PROJECT_NAME AND BUILD_TESTING)
set(CUDAWRAPPERS_TESTING_DEFAULT True)
else()
set(CUDAWRAPPERS_TESTING_DEFAULT False)
endif()
project(
cudawrappers
DESCRIPTION "Playgrounds for the CUDA graph api"
VERSION 0.0.1
LANGUAGES CXX
)
include(GNUInstallDirs)
set(CMAKE_CUDA_ARCHITECTURES 75)
set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED True)
set(CMAKE_BUILD_TYPE Debug)
find_package(CUDAToolkit 10 REQUIRED)
add_executable(main main.cu)
target_link_libraries(main CUDA::cudart)
target_include_directories(main PRIVATE ${CMAKE_PROJECT_DIR})
\ No newline at end of file
This diff is collapsed.
/* Copyright (c) 2022, NVIDIA CORPORATION. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
* * Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* * Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* * Neither the name of NVIDIA CORPORATION nor the names of its
* contributors may be used to endorse or promote products derived
* from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
* EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
* IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
* PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
* CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
* EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
* PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
* OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
// These are helper functions for the SDK samples (string parsing, timers, etc)
#ifndef COMMON_HELPER_STRING_H_
#define COMMON_HELPER_STRING_H_
#include <stdio.h>
#include <stdlib.h>
#include <fstream>
#include <string>
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
#ifndef _CRT_SECURE_NO_DEPRECATE
#define _CRT_SECURE_NO_DEPRECATE
#endif
#ifndef STRCASECMP
#define STRCASECMP _stricmp
#endif
#ifndef STRNCASECMP
#define STRNCASECMP _strnicmp
#endif
#ifndef STRCPY
#define STRCPY(sFilePath, nLength, sPath) strcpy_s(sFilePath, nLength, sPath)
#endif
#ifndef FOPEN
#define FOPEN(fHandle, filename, mode) fopen_s(&fHandle, filename, mode)
#endif
#ifndef FOPEN_FAIL
#define FOPEN_FAIL(result) (result != 0)
#endif
#ifndef SSCANF
#define SSCANF sscanf_s
#endif
#ifndef SPRINTF
#define SPRINTF sprintf_s
#endif
#else // Linux Includes
#include <string.h>
#include <strings.h>
#ifndef STRCASECMP
#define STRCASECMP strcasecmp
#endif
#ifndef STRNCASECMP
#define STRNCASECMP strncasecmp
#endif
#ifndef STRCPY
#define STRCPY(sFilePath, nLength, sPath) strcpy(sFilePath, sPath)
#endif
#ifndef FOPEN
#define FOPEN(fHandle, filename, mode) (fHandle = fopen(filename, mode))
#endif
#ifndef FOPEN_FAIL
#define FOPEN_FAIL(result) (result == NULL)
#endif
#ifndef SSCANF
#define SSCANF sscanf
#endif
#ifndef SPRINTF
#define SPRINTF sprintf
#endif
#endif
#ifndef EXIT_WAIVED
#define EXIT_WAIVED 2
#endif
// CUDA Utility Helper Functions
inline int stringRemoveDelimiter(char delimiter, const char *string) {
int string_start = 0;
while (string[string_start] == delimiter) {
string_start++;
}
if (string_start >= static_cast<int>(strlen(string) - 1)) {
return 0;
}
return string_start;
}
inline int getFileExtension(char *filename, char **extension) {
int string_length = static_cast<int>(strlen(filename));
while (filename[string_length--] != '.') {
if (string_length == 0) break;
}
if (string_length > 0) string_length += 2;
if (string_length == 0)
*extension = NULL;
else
*extension = &filename[string_length];
return string_length;
}
inline bool checkCmdLineFlag(const int argc, const char **argv,
const char *string_ref) {
bool bFound = false;
if (argc >= 1) {
for (int i = 1; i < argc; i++) {
int string_start = stringRemoveDelimiter('-', argv[i]);
const char *string_argv = &argv[i][string_start];
const char *equal_pos = strchr(string_argv, '=');
int argv_length = static_cast<int>(
equal_pos == 0 ? strlen(string_argv) : equal_pos - string_argv);
int length = static_cast<int>(strlen(string_ref));
if (length == argv_length &&
!STRNCASECMP(string_argv, string_ref, length)) {
bFound = true;
continue;
}
}
}
return bFound;
}
// This function wraps the CUDA Driver API into a template function
template <class T>
inline bool getCmdLineArgumentValue(const int argc, const char **argv,
const char *string_ref, T *value) {
bool bFound = false;
if (argc >= 1) {
for (int i = 1; i < argc; i++) {
int string_start = stringRemoveDelimiter('-', argv[i]);
const char *string_argv = &argv[i][string_start];
int length = static_cast<int>(strlen(string_ref));
if (!STRNCASECMP(string_argv, string_ref, length)) {
if (length + 1 <= static_cast<int>(strlen(string_argv))) {
int auto_inc = (string_argv[length] == '=') ? 1 : 0;
*value = (T)atoi(&string_argv[length + auto_inc]);
}
bFound = true;
i = argc;
}
}
}
return bFound;
}
inline int getCmdLineArgumentInt(const int argc, const char **argv,
const char *string_ref) {
bool bFound = false;
int value = -1;
if (argc >= 1) {
for (int i = 1; i < argc; i++) {
int string_start = stringRemoveDelimiter('-', argv[i]);
const char *string_argv = &argv[i][string_start];
int length = static_cast<int>(strlen(string_ref));
if (!STRNCASECMP(string_argv, string_ref, length)) {
if (length + 1 <= static_cast<int>(strlen(string_argv))) {
int auto_inc = (string_argv[length] == '=') ? 1 : 0;
value = atoi(&string_argv[length + auto_inc]);
} else {
value = 0;
}
bFound = true;
continue;
}
}
}
if (bFound) {
return value;
} else {
return 0;
}
}
inline float getCmdLineArgumentFloat(const int argc, const char **argv,
const char *string_ref) {
bool bFound = false;
float value = -1;
if (argc >= 1) {
for (int i = 1; i < argc; i++) {
int string_start = stringRemoveDelimiter('-', argv[i]);
const char *string_argv = &argv[i][string_start];
int length = static_cast<int>(strlen(string_ref));
if (!STRNCASECMP(string_argv, string_ref, length)) {
if (length + 1 <= static_cast<int>(strlen(string_argv))) {
int auto_inc = (string_argv[length] == '=') ? 1 : 0;
value = static_cast<float>(atof(&string_argv[length + auto_inc]));
} else {
value = 0.f;
}
bFound = true;
continue;
}
}
}
if (bFound) {
return value;
} else {
return 0;
}
}
inline bool getCmdLineArgumentString(const int argc, const char **argv,
const char *string_ref,
char **string_retval) {
bool bFound = false;
if (argc >= 1) {
for (int i = 1; i < argc; i++) {
int string_start = stringRemoveDelimiter('-', argv[i]);
char *string_argv = const_cast<char *>(&argv[i][string_start]);
int length = static_cast<int>(strlen(string_ref));
if (!STRNCASECMP(string_argv, string_ref, length)) {
*string_retval = &string_argv[length + 1];
bFound = true;
continue;
}
}
}
if (!bFound) {
*string_retval = NULL;
}
return bFound;
}
//////////////////////////////////////////////////////////////////////////////
//! Find the path for a file assuming that
//! files are found in the searchPath.
//!
//! @return the path if succeeded, otherwise 0
//! @param filename name of the file
//! @param executable_path optional absolute path of the executable
//////////////////////////////////////////////////////////////////////////////
inline char *sdkFindFilePath(const char *filename,
const char *executable_path) {
// <executable_name> defines a variable that is replaced with the name of the
// executable
// Typical relative search paths to locate needed companion files (e.g. sample
// input data, or JIT source files) The origin for the relative search may be
// the .exe file, a .bat file launching an .exe, a browser .exe launching the
// .exe or .bat, etc
const char *searchPath[] = {
"./", // same dir
"./data/", // same dir
"../../../../Samples/<executable_name>/", // up 4 in tree
"../../../Samples/<executable_name>/", // up 3 in tree
"../../Samples/<executable_name>/", // up 2 in tree
"../../../../Samples/<executable_name>/data/", // up 4 in tree
"../../../Samples/<executable_name>/data/", // up 3 in tree
"../../Samples/<executable_name>/data/", // up 2 in tree
"../../../../Samples/0_Introduction/<executable_name>/", // up 4 in tree
"../../../Samples/0_Introduction/<executable_name>/", // up 3 in tree
"../../Samples/0_Introduction/<executable_name>/", // up 2 in tree
"../../../../Samples/1_Utilities/<executable_name>/", // up 4 in tree
"../../../Samples/1_Utilities/<executable_name>/", // up 3 in tree
"../../Samples/1_Utilities/<executable_name>/", // up 2 in tree
"../../../../Samples/2_Concepts_and_Techniques/<executable_name>/", // up
// 4
// in
// tree
"../../../Samples/2_Concepts_and_Techniques/<executable_name>/", // up 3
// in
// tree
"../../Samples/2_Concepts_and_Techniques/<executable_name>/", // up 2 in
// tree
"../../../../Samples/3_CUDA_Features/<executable_name>/", // up 4 in tree
"../../../Samples/3_CUDA_Features/<executable_name>/", // up 3 in tree
"../../Samples/3_CUDA_Features/<executable_name>/", // up 2 in tree
"../../../../Samples/4_CUDA_Libraries/<executable_name>/", // up 4 in
// tree
"../../../Samples/4_CUDA_Libraries/<executable_name>/", // up 3 in tree
"../../Samples/4_CUDA_Libraries/<executable_name>/", // up 2 in tree
"../../../../Samples/5_Domain_Specific/<executable_name>/", // up 4 in
// tree
"../../../Samples/5_Domain_Specific/<executable_name>/", // up 3 in tree
"../../Samples/5_Domain_Specific/<executable_name>/", // up 2 in tree
"../../../../Samples/6_Performance/<executable_name>/", // up 4 in tree
"../../../Samples/6_Performance/<executable_name>/", // up 3 in tree
"../../Samples/6_Performance/<executable_name>/", // up 2 in tree
"../../../../Samples/0_Introduction/<executable_name>/data/", // up 4 in
// tree
"../../../Samples/0_Introduction/<executable_name>/data/", // up 3 in
// tree
"../../Samples/0_Introduction/<executable_name>/data/", // up 2 in tree
"../../../../Samples/1_Utilities/<executable_name>/data/", // up 4 in
// tree
"../../../Samples/1_Utilities/<executable_name>/data/", // up 3 in tree
"../../Samples/1_Utilities/<executable_name>/data/", // up 2 in tree
"../../../../Samples/2_Concepts_and_Techniques/<executable_name>/data/", // up 4 in tree
"../../../Samples/2_Concepts_and_Techniques/<executable_name>/data/", // up 3 in tree
"../../Samples/2_Concepts_and_Techniques/<executable_name>/data/", // up
// 2
// in
// tree
"../../../../Samples/3_CUDA_Features/<executable_name>/data/", // up 4 in
// tree
"../../../Samples/3_CUDA_Features/<executable_name>/data/", // up 3 in
// tree
"../../Samples/3_CUDA_Features/<executable_name>/data/", // up 2 in tree
"../../../../Samples/4_CUDA_Libraries/<executable_name>/data/", // up 4
// in
// tree
"../../../Samples/4_CUDA_Libraries/<executable_name>/data/", // up 3 in
// tree
"../../Samples/4_CUDA_Libraries/<executable_name>/data/", // up 2 in tree
"../../../../Samples/5_Domain_Specific/<executable_name>/data/", // up 4
// in
// tree
"../../../Samples/5_Domain_Specific/<executable_name>/data/", // up 3 in
// tree
"../../Samples/5_Domain_Specific/<executable_name>/data/", // up 2 in
// tree
"../../../../Samples/6_Performance/<executable_name>/data/", // up 4 in
// tree
"../../../Samples/6_Performance/<executable_name>/data/", // up 3 in tree
"../../Samples/6_Performance/<executable_name>/data/", // up 2 in tree
"../../../../Common/data/", // up 4 in tree
"../../../Common/data/", // up 3 in tree
"../../Common/data/" // up 2 in tree
};
// Extract the executable name
std::string executable_name;
if (executable_path != 0) {
executable_name = std::string(executable_path);
#if defined(WIN32) || defined(_WIN32) || defined(WIN64) || defined(_WIN64)
// Windows path delimiter
size_t delimiter_pos = executable_name.find_last_of('\\');
executable_name.erase(0, delimiter_pos + 1);
if (executable_name.rfind(".exe") != std::string::npos) {
// we strip .exe, only if the .exe is found
executable_name.resize(executable_name.size() - 4);
}
#else
// Linux & OSX path delimiter
size_t delimiter_pos = executable_name.find_last_of('/');
executable_name.erase(0, delimiter_pos + 1);
#endif
}
// Loop over all search paths and return the first hit
for (unsigned int i = 0; i < sizeof(searchPath) / sizeof(char *); ++i) {
std::string path(searchPath[i]);
size_t executable_name_pos = path.find("<executable_name>");
// If there is executable_name variable in the searchPath
// replace it with the value
if (executable_name_pos != std::string::npos) {
if (executable_path != 0) {
path.replace(executable_name_pos, strlen("<executable_name>"),
executable_name);
} else {
// Skip this path entry if no executable argument is given
continue;
}
}
#ifdef _DEBUG
printf("sdkFindFilePath <%s> in %s\n", filename, path.c_str());
#endif
// Test if the file exists
path.append(filename);
FILE *fp;
FOPEN(fp, path.c_str(), "rb");
if (fp != NULL) {
fclose(fp);
// File found
// returning an allocated array here for backwards compatibility reasons
char *file_path = reinterpret_cast<char *>(malloc(path.length() + 1));
STRCPY(file_path, path.length() + 1, path.c_str());
return file_path;
}
if (fp) {
fclose(fp);
}
}
// File not found
printf("\nerror: sdkFindFilePath: file <%s> not found!\n", filename);
return 0;
}
#endif // COMMON_HELPER_STRING_H_
\ No newline at end of file
main.cu 0 → 100644
#include <cuda_runtime.h>
#include <stdio.h>
#include <array>
#include <iostream>
#include <vector>
#include "helper_cuda.h"
__global__ void debugPrint(float* data, int size) {
int index = threadIdx.x + blockDim.x * blockIdx.x;
int jump = blockDim.x * gridDim.x;
for (int idx = index; idx < size; idx += jump) {
if (idx < size) {
printf("Device: data[%d] = %f\n", idx, data[idx]);
data[idx] *= 2;
}
}
}
int main(int argc, char* argv[]) {
constexpr int data_size = 100;
std::array<float, data_size> data_in;
std::array<float, data_size> data_out;
for (size_t idx = 0; idx < data_size; idx++) {
data_in[idx] = 3;
data_out[idx] = -1;
}
cudaGraph_t graph;
cudaGraphExec_t graph_exec;
cudaStream_t stream;
cudaGraphNode_t alloc, host_set, copy_to_device, copy_to_host, exec_kernel,
free;
cudaMemAllocNodeParams alloc_pars{};
memset(&alloc_pars, 0, sizeof(alloc_pars));
alloc_pars.poolProps.allocType = cudaMemAllocationTypePinned;
alloc_pars.poolProps.location.id = 0;
alloc_pars.poolProps.location.type = cudaMemLocationTypeDevice;
alloc_pars.bytesize = sizeof(float) * data_in.size();
auto set_value = [](void* data) {
float* par = static_cast<float*>(data);
for (int i = 0; i < data_size; i++) {
par[i] = 42;
std::cout << "Host: data[" << i << "] = " << par[i] << std::endl;
}
};
cudaHostNodeParams host_pars{set_value, data_in.data()};
checkCudaErrors(cudaGraphCreate(&graph, 0));
checkCudaErrors(
cudaGraphAddHostNode(&host_set, graph, nullptr, 0, &host_pars));
checkCudaErrors(
cudaGraphAddMemAllocNode(&alloc, graph, nullptr, 0, &alloc_pars));
size_t array_size = data_in.size();
cudaKernelNodeParams kernel_pars = {0};
kernel_pars.func = (void*)debugPrint;
kernel_pars.gridDim = dim3(15, 1, 1);
kernel_pars.blockDim = dim3(1, 1, 1);
kernel_pars.extra = NULL;
kernel_pars.sharedMemBytes = 0;
void* parameters[2] = {(void*)&alloc_pars.dptr, &array_size};
kernel_pars.kernelParams = parameters;
std::vector<cudaGraphNode_t> copy_to_device_dep;
copy_to_device_dep.push_back(alloc);
copy_to_device_dep.push_back(host_set);
cudaMemcpy3DParms to_device_pars = {0};
to_device_pars.dstPos = make_cudaPos(0, 0, 0);
to_device_pars.dstPtr = make_cudaPitchedPtr(
alloc_pars.dptr, array_size * sizeof(float), array_size, 1);
to_device_pars.extent = make_cudaExtent(sizeof(float) * array_size, 1, 1);
to_device_pars.kind = cudaMemcpyHostToDevice;
to_device_pars.srcPos = make_cudaPos(0, 0, 0);
to_device_pars.srcPtr = make_cudaPitchedPtr(
data_in.data(), array_size * sizeof(float), array_size, 1);
cudaMemcpy3DParms to_host_pars = {0};
to_host_pars.dstPos = make_cudaPos(0, 0, 0);
to_host_pars.dstPtr = make_cudaPitchedPtr(
data_out.data(), array_size * sizeof(float), array_size, 1);
to_host_pars.extent = make_cudaExtent(sizeof(float) * array_size, 1, 1);
to_host_pars.kind = cudaMemcpyDeviceToHost;
to_host_pars.srcPos = make_cudaPos(0, 0, 0);
to_host_pars.srcPtr = make_cudaPitchedPtr(
alloc_pars.dptr, array_size * sizeof(float), array_size, 1);
checkCudaErrors(
cudaGraphAddMemcpyNode(&copy_to_device, graph, copy_to_device_dep.data(),
copy_to_device_dep.size(), &to_device_pars));
checkCudaErrors(cudaGraphAddKernelNode(&exec_kernel, graph, &copy_to_device,
1, &kernel_pars));
checkCudaErrors(cudaGraphAddMemcpyNode(&copy_to_host, graph, &exec_kernel, 1,
&to_host_pars));
checkCudaErrors(
cudaGraphAddMemFreeNode(&free, graph, &copy_to_host, 1, alloc_pars.dptr));
checkCudaErrors(cudaStreamCreate(&stream));
checkCudaErrors(cudaGraphInstantiate(&graph_exec, graph, NULL, NULL, 0));
checkCudaErrors(cudaGraphLaunch(graph_exec, stream));
checkCudaErrors(cudaStreamSynchronize(stream));
checkCudaErrors(cudaGraphExecDestroy(graph_exec));
std::cout << "Final result" << std::endl;
for (size_t idx = 0; idx < data_out.size(); idx++) {
std::cout << "data_in[" << idx << "] = " << data_in[idx] << " data_out["
<< idx << "] = " << data_out[idx] << std::endl;
}
}
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment