diff --git a/main.cu b/main.cu index 42b05e94d3b0e20c60d89ebc2e01607ff841592c..e026266da832342fde3b03340da72af7aa871146 100644 --- a/main.cu +++ b/main.cu @@ -64,8 +64,8 @@ bool verify_host_memory(float *h_buffer, uint32_t n, float value) { void run_benchmark(const char *name, float *h_input_1, float *h_input_2, float *h_output, float v_input_1, float v_input_2, - float v_output, float *d_output, float v_output_reference, - uint32_t n, std::function<void(void)> function_input, + float v_output, float v_output_reference, uint32_t n, + std::function<void(void)> function_input, std::function<void(void)> function_kernel, std::function<void(void)> function_output, CUstream stream, int num_repeats = 100, int num_warmups = 10) { @@ -218,7 +218,7 @@ void benchmark_allochost_alloc(const char *name, int n, float v_input_1, }; run_benchmark(name, h_input_1, h_input_2, h_output, v_input_1, v_input_2, - v_output, d_output, v_output_reference, n, function_input, + v_output, v_output_reference, n, function_input, function_kernel, function_output, stream, num_repeats, num_warmups); @@ -272,7 +272,7 @@ void benchmark_allochost(const char *name, int n, float v_input_1, }; run_benchmark(name, h_input_1, h_input_2, h_output, v_input_1, v_input_2, - v_output, m_output, v_output_reference, n, function_input, + v_output, v_output_reference, n, function_input, function_kernel, function_output, stream, num_repeats, num_warmups); @@ -312,7 +312,7 @@ void benchmark_hostalloc(const char *name, int n, float v_input_1, }; run_benchmark(name, h_input_1, h_input_2, h_output, v_input_1, v_input_2, - v_output, h_output, v_output_reference, n, function_input, + v_output, v_output_reference, n, function_input, function_kernel, function_output, stream, num_repeats, num_warmups); @@ -352,7 +352,7 @@ void benchmark_allochost_hostpointer(const char *name, int n, float v_input_1, }; run_benchmark(name, h_input_1, h_input_2, h_output, v_input_1, v_input_2, - v_output, h_output, v_output_reference, n, function_input, + v_output, v_output_reference, n, function_input, function_kernel, function_output, stream, num_repeats, num_warmups); @@ -409,7 +409,7 @@ void benchmark_allocmanaged(const char *name, int n, float v_input_1, }; run_benchmark(name, d_input_1, d_input_2, d_output, v_input_1, v_input_2, - v_output, d_output, v_output_reference, n, function_input, + v_output, v_output_reference, n, function_input, function_kernel, function_output, stream, num_repeats, num_warmups); @@ -462,7 +462,7 @@ void benchmark_hybrid(const char *name, int n, float v_input_1, float v_input_2, }; run_benchmark(name, d_input_1, d_input_2, h_output, v_input_1, v_input_2, - v_output, d_output, v_output_reference, n, function_input, + v_output, v_output_reference, n, function_input, function_kernel, function_output, stream, num_repeats, num_warmups); @@ -524,12 +524,12 @@ int main() { num_warmups); flags = CU_MEMHOSTALLOC_DEVICEMAP; benchmark_hostalloc("cuMemHostAlloc (devicemap)", n, v_input_1, v_input_2, - v_output, v_output_reference, stream, device, flags, num_repeats, - num_warmups); + v_output, v_output_reference, stream, device, flags, + num_repeats, num_warmups); flags = CU_MEMHOSTALLOC_WRITECOMBINED; benchmark_hostalloc("cuMemHostAlloc (writecombined)", n, v_input_1, v_input_2, - v_output, v_output_reference, stream, device, flags, num_repeats, - num_warmups); + v_output, v_output_reference, stream, device, flags, + num_repeats, num_warmups); CHECK_CUDA_ERROR(cuDeviceGetAttribute( &property, CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, device));