diff --git a/main.cu b/main.cu
index 42b05e94d3b0e20c60d89ebc2e01607ff841592c..e026266da832342fde3b03340da72af7aa871146 100644
--- a/main.cu
+++ b/main.cu
@@ -64,8 +64,8 @@ bool verify_host_memory(float *h_buffer, uint32_t n, float value) {
 
 void run_benchmark(const char *name, float *h_input_1, float *h_input_2,
                    float *h_output, float v_input_1, float v_input_2,
-                   float v_output, float *d_output, float v_output_reference,
-                   uint32_t n, std::function<void(void)> function_input,
+                   float v_output, float v_output_reference, uint32_t n,
+                   std::function<void(void)> function_input,
                    std::function<void(void)> function_kernel,
                    std::function<void(void)> function_output, CUstream stream,
                    int num_repeats = 100, int num_warmups = 10) {
@@ -218,7 +218,7 @@ void benchmark_allochost_alloc(const char *name, int n, float v_input_1,
   };
 
   run_benchmark(name, h_input_1, h_input_2, h_output, v_input_1, v_input_2,
-                v_output, d_output, v_output_reference, n, function_input,
+                v_output, v_output_reference, n, function_input,
                 function_kernel, function_output, stream, num_repeats,
                 num_warmups);
 
@@ -272,7 +272,7 @@ void benchmark_allochost(const char *name, int n, float v_input_1,
   };
 
   run_benchmark(name, h_input_1, h_input_2, h_output, v_input_1, v_input_2,
-                v_output, m_output, v_output_reference, n, function_input,
+                v_output, v_output_reference, n, function_input,
                 function_kernel, function_output, stream, num_repeats,
                 num_warmups);
 
@@ -312,7 +312,7 @@ void benchmark_hostalloc(const char *name, int n, float v_input_1,
   };
 
   run_benchmark(name, h_input_1, h_input_2, h_output, v_input_1, v_input_2,
-                v_output, h_output, v_output_reference, n, function_input,
+                v_output, v_output_reference, n, function_input,
                 function_kernel, function_output, stream, num_repeats,
                 num_warmups);
 
@@ -352,7 +352,7 @@ void benchmark_allochost_hostpointer(const char *name, int n, float v_input_1,
   };
 
   run_benchmark(name, h_input_1, h_input_2, h_output, v_input_1, v_input_2,
-                v_output, h_output, v_output_reference, n, function_input,
+                v_output, v_output_reference, n, function_input,
                 function_kernel, function_output, stream, num_repeats,
                 num_warmups);
 
@@ -409,7 +409,7 @@ void benchmark_allocmanaged(const char *name, int n, float v_input_1,
   };
 
   run_benchmark(name, d_input_1, d_input_2, d_output, v_input_1, v_input_2,
-                v_output, d_output, v_output_reference, n, function_input,
+                v_output, v_output_reference, n, function_input,
                 function_kernel, function_output, stream, num_repeats,
                 num_warmups);
 
@@ -462,7 +462,7 @@ void benchmark_hybrid(const char *name, int n, float v_input_1, float v_input_2,
   };
 
   run_benchmark(name, d_input_1, d_input_2, h_output, v_input_1, v_input_2,
-                v_output, d_output, v_output_reference, n, function_input,
+                v_output, v_output_reference, n, function_input,
                 function_kernel, function_output, stream, num_repeats,
                 num_warmups);
 
@@ -524,12 +524,12 @@ int main() {
                       num_warmups);
   flags = CU_MEMHOSTALLOC_DEVICEMAP;
   benchmark_hostalloc("cuMemHostAlloc (devicemap)", n, v_input_1, v_input_2,
-                      v_output, v_output_reference, stream, device, flags, num_repeats,
-                      num_warmups);
+                      v_output, v_output_reference, stream, device, flags,
+                      num_repeats, num_warmups);
   flags = CU_MEMHOSTALLOC_WRITECOMBINED;
   benchmark_hostalloc("cuMemHostAlloc (writecombined)", n, v_input_1, v_input_2,
-                      v_output, v_output_reference, stream, device, flags, num_repeats,
-                      num_warmups);
+                      v_output, v_output_reference, stream, device, flags,
+                      num_repeats, num_warmups);
 
   CHECK_CUDA_ERROR(cuDeviceGetAttribute(
       &property, CU_DEVICE_ATTRIBUTE_MANAGED_MEMORY, device));