diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance.run b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance.run
index cc3c4e7fe372385b0a90be8ac7b451eff5c12c77..c35964b0551c8bb911b45c0edbd5c783c34803eb 100755
--- a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance.run
+++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance.run
@@ -7,11 +7,11 @@ GPULOAD="${LOFARROOT}/bin/gpu_load"
 # Run the benchmark for ITERATIONS times
 ITERATIONS=100
 
-# Set the test criterium. 1 means that
+# Set the test criterium. 0 means that
 # the benchmark should be at least as fast
-# as the reference. Values slightly lower,
-# e.g. 0.9 allow for a bit of variation.
-TOLERANCE=0.85
+# as the reference. A value of 10 allows
+# for up to 10 % performance degration.
+TOLERANCE=10
 
 # Find the GPU to use for this test
 # In case multiple GPUs are present, use the first one.
@@ -19,14 +19,6 @@ GPUNAME=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader | head -n 1)
 GPUNAME_=$(echo ${GPUNAME} | sed 's/ /-/')
 echo "Running benchmarks on ${GPUNAME}"
 
-# Check whether we have a reference output for this GPU
-REFERENCE_FILE=${srcdir}/${TESTNAME}_${GPUNAME_}.timings
-if [ ! -f  ${REFERENCE_FILE} ]
-then
-    echo "No reference output for ${GPUNAME}, skipping test."
-    exit 0
-fi
-
 # Create output directory if it doesn't yet exist; make sure it's empty
 mkdir -p "${OUTDIR}" || error "Failed to create temporary directory ${OUTDIR}"
 cd "${OUTDIR}" || error "Failed to change directory to ${OUTDIR}"
@@ -42,24 +34,22 @@ do
     # Get the OBSID from the current parset
     OBSID=$(echo ${SRCPARSET} | sed -r 's/.*_([0-9]*)\..*/\1/g')
 
-    # Get reference time, the reference files are structured as follows:
-    # OBSID,REFERENCE_TIME
-    REFERENCE_TIME=$(cat ${REFERENCE_FILE} | grep ${OBSID} | awk -F, '{print $2}')
+    # Get reference file
+    REFERENCE_FILE=${srcdir}/${TESTNAME}_reference/${OBSID}_${GPUNAME_}.csv
 
-    # Skip this parset when no reference output is specified
-    if [ -z ${REFERENCE_TIME} ]
+    # Skip this parset when reference output is not found
+    if [ -z ${REFERENCE_FILE} ]
     then
-        echo "No reference output for ${OBSID}, skipping test."
+        echo ">>> No reference output for ${OBSID}, skipping test."
         continue
     else
-        echo "Running test for observation ${OBSID}"
-        echo "Reference time: ${REFERENCE_TIME}"
+        echo ">>> Running test for observation ${OBSID}"
     fi
 
     # Create parset for this observation with benchmarking enabled
     PARSET=${TESTNAME}_${OBSID}.parset
-    RESULTS_FILE=${TESTNAME}_${OBSID}.csv
-    OUTPUT_FILE=${TESTNAME}_${OBSID}.out
+    RESULTS_FILE=${OUTDIR}/${TESTNAME}_${OBSID}.csv
+    OUTPUT_FILE=${OUTDIR}/${TESTNAME}_${OBSID}.out
     cat ${SRCPARSET} > ${PARSET}
     echo "Cobalt.Benchmark.enabled=true" >> ${PARSET}
     echo "Cobalt.Benchmark.file=${RESULTS_FILE}" >> ${PARSET}
@@ -69,20 +59,12 @@ do
     echo "Executing: ${COMMAND}"
     ${COMMAND} > ${OUTPUT_FILE} 2>&1
 
-    # Get the mean total timing (column 4 in the csv file)
-    BENCHMARK_TIME=$(cat ${RESULTS_FILE} | grep 'total' | awk -F\; '{print $4}')
-    echo "Benchmark time: ${BENCHMARK_TIME}"
-
-    # Check whether the benchmark ran fast enough
-    RESULT=$(echo "scale=2; ${REFERENCE_TIME}/${BENCHMARK_TIME}" | bc)
-    echo -n "Result: ${RESULT}, "
-    PASS=$(echo "${RESULT} >= ${TOLERANCE}" | bc)
-    if [ ${PASS} ]
+    # Compare with reference
+    COMPARE=${srcdir}/${TESTNAME}_compare.py
+    ${COMPARE} ${REFERENCE_FILE} ${RESULTS_FILE}
+    if [ ! {STATUS} ]
     then
-        echo "PASSED."
-    else
-        echo "FAILED."
-        STATUS = 1
+        STATUS=$?
     fi
 done
 
diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_Tesla-V100-PCIE-16GB.timings b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_Tesla-V100-PCIE-16GB.timings
deleted file mode 100644
index 86004e1c72d46ef2439983fdee9a111b3f38319c..0000000000000000000000000000000000000000
--- a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_Tesla-V100-PCIE-16GB.timings
+++ /dev/null
@@ -1,5 +0,0 @@
-763847,9.17709
-784441,12.17178
-796954,9.61318
-797086,11.26417
-797130,6.29351
diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_compare.py b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_compare.py
new file mode 100755
index 0000000000000000000000000000000000000000..f712804a51371b4098436c82c9f7d9b2d62d989e
--- /dev/null
+++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_compare.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+
+import sys
+import argparse
+import csv
+import os
+
+def create_arg_parser():
+    # Creates and returns the ArgumentParser object
+    parser = argparse.ArgumentParser(description='Compare mean runtime from different runs from gpu_load performance benchmarks.')
+    parser.add_argument('filename_reference',
+                    help='The full path to the reference file')
+    parser.add_argument('filename_candidate',
+                    help='The full path to the current file')
+    parser.add_argument('--tolerance', type=float, default=5,
+                    help='Maximum tolerable performance degradation (in percent)')
+    return parser
+
+argParser = create_arg_parser()
+parsedArgs = argParser.parse_args(sys.argv[1:])
+
+# Reference input
+filename_reference = parsedArgs.filename_reference
+if not os.path.exists(filename_reference):
+    print("Could not open: {}".format(filename_reference))
+    exit(1)
+
+# Candidate input
+filename_candidate = parsedArgs.filename_candidate
+if not os.path.exists(filename_candidate):
+    print("Could not open: {}".format(filename_candidate))
+    exit(1)
+
+# Tolerance
+tolerance = parsedArgs.tolerance
+
+# Print arguments
+print(">>> Parameters")
+print("Reference: {}".format(filename_reference))
+print("Candidate: {}".format(filename_candidate))
+print("Tolerance: {}%".format(tolerance))
+
+def read_measurements(filename):
+
+    # Open results file, skip the first two lines:
+    # Line 0: date
+    # Line 1: info
+    # Parse the remainder as dictionary
+    # Line 3: header
+    # Line 4 and further: data
+    data = open(filename).readlines()[2:]
+    reader = csv.DictReader(data, delimiter=";")
+    measurements = dict()
+    for row in reader:
+        name = row[' kernelName'].lstrip()
+        mean = float(row[' mean'])
+        count = int(row[' count'])
+        measurements[name] = mean
+
+    return measurements
+
+measurements_reference = read_measurements(filename_reference)
+measurements_candidate = read_measurements(filename_candidate)
+runtime_total = measurements_reference["total"]
+runtime_threshold = runtime_total * 0.05 # 5 percent
+
+# Check all measurements
+passed = list()
+failed = list()
+missing = list()
+status = 0
+for name, runtime_reference in measurements_reference.items():
+    # Skip very short measurements
+    if (runtime_reference < runtime_threshold):
+        continue
+
+    # Try to get runtime for candidate
+    try:
+        runtime_candidate = measurements_candidate[name]
+
+        # Compare the two measurements
+        performance = runtime_reference / runtime_candidate * 100
+        result = "\"{}\", reference: {:.2f}, candidate: {:.2f} ({:.1f} %)".format(name, runtime_reference, runtime_candidate, performance)
+        if (performance > (100 - tolerance)):
+            passed.append(result)
+        else:
+            failed.append(result)
+            status = 1
+
+    except KeyError:
+        missing.append("\"{}\"".format(name))
+        status = 1
+
+# Print summary
+print(">>> Results")
+print("PASSED:", end='')
+if len(passed):
+    print()
+    print("\n".join(passed))
+else:
+    print(" none")
+
+print("FAILED:", end='')
+if len(failed):
+    print()
+    print("\n".join(failed))
+else:
+    print(" none")
+
+print("MISSING:", end='')
+if len(missing):
+    print()
+    print(", ".join(missing))
+else:
+    print(" none")
+
+exit(status)
\ No newline at end of file
diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/763847_Tesla-V100-PCIE-16GB.csv b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/763847_Tesla-V100-PCIE-16GB.csv
new file mode 100644
index 0000000000000000000000000000000000000000..a1415f4f079eeea17336dd8ec87321e7be901520
--- /dev/null
+++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/763847_Tesla-V100-PCIE-16GB.csv
@@ -0,0 +1,25 @@
+Wed Apr 21 15:16:35 2021
+info; Test gpu_load: gpu_load with parset: tSubbandProcPerformance_763847.parset with nIterations: 1000
+format; kernelName; count; mean; stDev; min; max; unit
+PerformanceCounter; bandPassCorrection; 1000; 0.15295; 0.00140; 0.14950; 0.15770; ms
+PerformanceCounter; delayCompensation; 1000; 0.09772; 0.00054; 0.09626; 0.09933; ms
+PerformanceCounter; Zeroing (beamformer); 1000; 0.02151; 0.00079; 0.01891; 0.02806; ms
+PerformanceCounter; FFT (beamformer, 1st); 1000; 0.10470; 0.00102; 0.10240; 0.11981; ms
+PerformanceCounter; intToFloat; 1000; 0.15922; 0.00243; 0.15466; 0.20381; ms
+PerformanceCounter; output (coherent); 1000; 0.73045; 0.00035; 0.73024; 0.73299; ms
+PerformanceCounter; coherentStokes; 1000; 0.31819; 0.00098; 0.31539; 0.32154; ms
+PerformanceCounter; FFT (coherent, final); 1000; 0.63808; 0.00275; 0.63283; 0.69325; ms
+PerformanceCounter; FIR (coherent, final); 1000; 1.69718; 0.01367; 1.67731; 1.80634; ms
+PerformanceCounter; FFT-shift (coherent, inverse); 1000; 0.57222; 0.00053; 0.57139; 0.57552; ms
+PerformanceCounter; FFT (coherent, inverse); 1000; 0.63251; 0.00345; 0.62566; 0.71574; ms
+PerformanceCounter; coherentStokesTranspose; 1000; 0.61415; 0.00201; 0.60826; 0.62259; ms
+PerformanceCounter; beamFormer; 1000; 0.58667; 0.00879; 0.57754; 0.66048; ms
+PerformanceCounter; output (incoherent); 1000; 0.01346; 0.00008; 0.01338; 0.01402; ms
+PerformanceCounter; incoherentStokes; 1000; 0.11950; 0.00140; 0.11776; 0.13107; ms
+PerformanceCounter; FFT (incoherent, final); 1000; 0.10603; 0.00095; 0.10342; 0.10858; ms
+PerformanceCounter; FIR (incoherent, final); 1000; 1.33881; 0.01112; 1.32198; 1.42848; ms
+PerformanceCounter; FFT-shift (incoherent, inverse); 1000; 0.09771; 0.00052; 0.09626; 0.09933; ms
+PerformanceCounter; FFT (incoherent, inverse); 1000; 0.10665; 0.00090; 0.10445; 0.11162; ms
+PerformanceCounter; incoherentStokesTranspose; 1000; 0.10145; 0.00118; 0.09933; 0.12288; ms
+PerformanceCounter; input; 1000; 0.77771; 0.00233; 0.77558; 0.83270; ms
+PerformanceCounter; total; 7; 9.17709; 0.22384; 9.04602; 9.67680; ms
diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/784441_Tesla-V100-PCIE-16GB.csv b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/784441_Tesla-V100-PCIE-16GB.csv
new file mode 100644
index 0000000000000000000000000000000000000000..78dafcd872eb012d0255bc6f971ac1919490e3aa
--- /dev/null
+++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/784441_Tesla-V100-PCIE-16GB.csv
@@ -0,0 +1,24 @@
+Wed Apr 21 15:16:50 2021
+info; Test gpu_load: gpu_load with parset: tSubbandProcPerformance_784441.parset with nIterations: 1000
+format; kernelName; count; mean; stDev; min; max; unit
+PerformanceCounter; output (correlator); 1000; 0.02079; 0.00029; 0.02067; 0.02950; ms
+PerformanceCounter; correlate; 1000; 2.10369; 0.02335; 2.09306; 2.29786; ms
+PerformanceCounter; delayAndBandPass; 1000; 0.50549; 0.00503; 0.49971; 0.54989; ms
+PerformanceCounter; Zeroing (correlator); 1000; 0.09988; 0.00132; 0.09818; 0.11034; ms
+PerformanceCounter; FFT (correlator); 1000; 0.17120; 0.00117; 0.16794; 0.18640; ms
+PerformanceCounter; FIR (correlator); 1000; 0.67940; 0.00689; 0.67072; 0.76394; ms
+PerformanceCounter; bandPassCorrection; 1000; 0.24467; 0.00228; 0.23859; 0.25395; ms
+PerformanceCounter; delayCompensation; 1000; 0.15393; 0.00058; 0.15258; 0.15770; ms
+PerformanceCounter; Zeroing (beamformer); 1000; 0.02976; 0.00148; 0.02762; 0.03520; ms
+PerformanceCounter; FFT (beamformer, 1st); 1000; 0.16922; 0.00110; 0.16589; 0.17712; ms
+PerformanceCounter; intToFloat; 1000; 0.14052; 0.00159; 0.13722; 0.16282; ms
+PerformanceCounter; output (coherent); 1000; 0.16193; 0.00022; 0.16157; 0.16288; ms
+PerformanceCounter; coherentStokes; 1000; 0.31116; 0.00120; 0.30822; 0.31642; ms
+PerformanceCounter; FFT (coherent, final); 1000; 0.73716; 0.00219; 0.73114; 0.74650; ms
+PerformanceCounter; FIR (coherent, final); 1000; 1.30460; 0.01393; 1.26464; 1.38243; ms
+PerformanceCounter; FFT-shift (coherent, inverse); 1000; 0.66283; 0.00049; 0.66150; 0.66458; ms
+PerformanceCounter; FFT (coherent, inverse); 1000; 0.73336; 0.00232; 0.72602; 0.74448; ms
+PerformanceCounter; coherentStokesTranspose; 1000; 0.66205; 0.00200; 0.65638; 0.66970; ms
+PerformanceCounter; beamFormer; 1000; 1.89865; 0.07048; 1.75821; 2.16371; ms
+PerformanceCounter; input; 1000; 1.25965; 0.00735; 1.24358; 1.30774; ms
+PerformanceCounter; total; 1000; 12.17178; 0.11055; 11.97261; 13.44202; ms
diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/796954_Tesla-V100-PCIE-16GB.csv b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/796954_Tesla-V100-PCIE-16GB.csv
new file mode 100644
index 0000000000000000000000000000000000000000..1faee7bbd463308425b0dd9244ed4de764ca28eb
--- /dev/null
+++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/796954_Tesla-V100-PCIE-16GB.csv
@@ -0,0 +1,11 @@
+Wed Apr 21 15:17:06 2021
+info; Test gpu_load: gpu_load with parset: tSubbandProcPerformance_796954.parset with nIterations: 1000
+format; kernelName; count; mean; stDev; min; max; unit
+PerformanceCounter; output (correlator); 1000; 0.42430; 0.00030; 0.42406; 0.43030; ms
+PerformanceCounter; correlate; 1000; 2.07460; 0.00112; 2.07053; 2.07770; ms
+PerformanceCounter; delayAndBandPass; 1000; 0.92203; 0.00607; 0.90522; 0.94413; ms
+PerformanceCounter; Zeroing (correlator); 1000; 0.35872; 0.00049; 0.35485; 0.35939; ms
+PerformanceCounter; FFT (correlator); 1000; 0.63168; 0.00248; 0.62669; 0.67686; ms
+PerformanceCounter; FIR (correlator); 1000; 0.48622; 0.00386; 0.47718; 0.54067; ms
+PerformanceCounter; input; 1000; 4.64835; 0.00598; 4.64518; 4.75971; ms
+PerformanceCounter; total; 1000; 9.61318; 0.01428; 9.58976; 9.92461; ms
diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/797086_Tesla-V100-PCIE-16GB.csv b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/797086_Tesla-V100-PCIE-16GB.csv
new file mode 100644
index 0000000000000000000000000000000000000000..4bd95c8e816d7bf4c9bd2e4236204ff05ae1d939
--- /dev/null
+++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/797086_Tesla-V100-PCIE-16GB.csv
@@ -0,0 +1,22 @@
+Wed Apr 21 15:17:22 2021
+info; Test gpu_load: gpu_load with parset: tSubbandProcPerformance_797086.parset with nIterations: 1000
+format; kernelName; count; mean; stDev; min; max; unit
+PerformanceCounter; output (correlator); 1000; 0.28892; 0.00034; 0.28874; 0.29760; ms
+PerformanceCounter; correlate; 1000; 1.46201; 0.00083; 1.45920; 1.46432; ms
+PerformanceCounter; delayAndBandPass; 1000; 0.72008; 0.00426; 0.70861; 0.73421; ms
+PerformanceCounter; Zeroing (correlator); 1000; 0.29476; 0.00061; 0.29245; 0.29603; ms
+PerformanceCounter; FFT (correlator); 1000; 0.51938; 0.00222; 0.51405; 0.55706; ms
+PerformanceCounter; FIR (correlator); 1000; 0.42854; 0.00455; 0.41882; 0.46285; ms
+PerformanceCounter; bandPassCorrection; 1000; 0.59152; 0.00424; 0.57754; 0.60621; ms
+PerformanceCounter; delayCompensation; 1000; 0.34200; 0.00047; 0.33997; 0.34406; ms
+PerformanceCounter; Zeroing (beamformer); 1000; 0.05880; 0.00068; 0.05744; 0.06054; ms
+PerformanceCounter; FFT (beamformer, 1st); 1000; 0.38046; 0.00153; 0.37683; 0.38707; ms
+PerformanceCounter; intToFloat; 1000; 0.27912; 0.00224; 0.27341; 0.29491; ms
+PerformanceCounter; output (coherent); 1000; 0.48189; 0.00025; 0.48163; 0.48333; ms
+PerformanceCounter; coherentStokes; 1000; 0.11419; 0.00134; 0.10957; 0.11776; ms
+PerformanceCounter; FFT-shift (coherent, inverse); 1000; 0.01494; 0.00055; 0.01331; 0.01638; ms
+PerformanceCounter; FFT (coherent, inverse); 1000; 0.01734; 0.00059; 0.01638; 0.01946; ms
+PerformanceCounter; coherentStokesTranspose; 1000; 0.03076; 0.00065; 0.02867; 0.03277; ms
+PerformanceCounter; beamFormer; 1000; 1.28760; 0.00216; 1.28205; 1.30662; ms
+PerformanceCounter; input; 1000; 3.84327; 0.02941; 3.83926; 4.76208; ms
+PerformanceCounter; total; 1000; 11.26417; 0.03360; 11.22611; 12.23578; ms
diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/797130_Tesla-V100-PCIE-16GB.csv b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/797130_Tesla-V100-PCIE-16GB.csv
new file mode 100644
index 0000000000000000000000000000000000000000..b2c0c9ef93f4ccacfb22c6002e1d10a64940db69
--- /dev/null
+++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/797130_Tesla-V100-PCIE-16GB.csv
@@ -0,0 +1,16 @@
+Wed Apr 21 15:17:38 2021
+info; Test gpu_load: gpu_load with parset: tSubbandProcPerformance_797130.parset with nIterations: 1000
+format; kernelName; count; mean; stDev; min; max; unit
+PerformanceCounter; bandPassCorrection; 1000; 0.62068; 0.00717; 0.60211; 0.67990; ms
+PerformanceCounter; delayCompensation; 1000; 0.35668; 0.00063; 0.35533; 0.36045; ms
+PerformanceCounter; Zeroing (beamformer); 1000; 0.06114; 0.00075; 0.05978; 0.06890; ms
+PerformanceCounter; FFT (beamformer, 1st); 1000; 0.39899; 0.00174; 0.39424; 0.42288; ms
+PerformanceCounter; intToFloat; 1000; 0.29476; 0.00271; 0.28877; 0.33997; ms
+PerformanceCounter; output (coherent); 1000; 0.24260; 0.00020; 0.24243; 0.24691; ms
+PerformanceCounter; coherentStokes; 1000; 0.09614; 0.00122; 0.09421; 0.10653; ms
+PerformanceCounter; FFT-shift (coherent, inverse); 1000; 0.00657; 0.00050; 0.00614; 0.00819; ms
+PerformanceCounter; FFT (coherent, inverse); 1000; 0.00846; 0.00046; 0.00816; 0.01126; ms
+PerformanceCounter; coherentStokesTranspose; 1000; 0.02417; 0.00056; 0.02355; 0.02765; ms
+PerformanceCounter; beamFormer; 1000; 1.13920; 0.01143; 1.12947; 1.24112; ms
+PerformanceCounter; input; 1000; 2.94744; 0.00349; 2.94413; 2.99594; ms
+PerformanceCounter; total; 11; 6.29351; 0.09036; 6.25152; 6.56387; ms