diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance.run b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance.run index cc3c4e7fe372385b0a90be8ac7b451eff5c12c77..c35964b0551c8bb911b45c0edbd5c783c34803eb 100755 --- a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance.run +++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance.run @@ -7,11 +7,11 @@ GPULOAD="${LOFARROOT}/bin/gpu_load" # Run the benchmark for ITERATIONS times ITERATIONS=100 -# Set the test criterium. 1 means that +# Set the test criterium. 0 means that # the benchmark should be at least as fast -# as the reference. Values slightly lower, -# e.g. 0.9 allow for a bit of variation. -TOLERANCE=0.85 +# as the reference. A value of 10 allows +# for up to 10 % performance degration. +TOLERANCE=10 # Find the GPU to use for this test # In case multiple GPUs are present, use the first one. @@ -19,14 +19,6 @@ GPUNAME=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader | head -n 1) GPUNAME_=$(echo ${GPUNAME} | sed 's/ /-/') echo "Running benchmarks on ${GPUNAME}" -# Check whether we have a reference output for this GPU -REFERENCE_FILE=${srcdir}/${TESTNAME}_${GPUNAME_}.timings -if [ ! -f ${REFERENCE_FILE} ] -then - echo "No reference output for ${GPUNAME}, skipping test." - exit 0 -fi - # Create output directory if it doesn't yet exist; make sure it's empty mkdir -p "${OUTDIR}" || error "Failed to create temporary directory ${OUTDIR}" cd "${OUTDIR}" || error "Failed to change directory to ${OUTDIR}" @@ -42,24 +34,22 @@ do # Get the OBSID from the current parset OBSID=$(echo ${SRCPARSET} | sed -r 's/.*_([0-9]*)\..*/\1/g') - # Get reference time, the reference files are structured as follows: - # OBSID,REFERENCE_TIME - REFERENCE_TIME=$(cat ${REFERENCE_FILE} | grep ${OBSID} | awk -F, '{print $2}') + # Get reference file + REFERENCE_FILE=${srcdir}/${TESTNAME}_reference/${OBSID}_${GPUNAME_}.csv - # Skip this parset when no reference output is specified - if [ -z ${REFERENCE_TIME} ] + # Skip this parset when reference output is not found + if [ -z ${REFERENCE_FILE} ] then - echo "No reference output for ${OBSID}, skipping test." + echo ">>> No reference output for ${OBSID}, skipping test." continue else - echo "Running test for observation ${OBSID}" - echo "Reference time: ${REFERENCE_TIME}" + echo ">>> Running test for observation ${OBSID}" fi # Create parset for this observation with benchmarking enabled PARSET=${TESTNAME}_${OBSID}.parset - RESULTS_FILE=${TESTNAME}_${OBSID}.csv - OUTPUT_FILE=${TESTNAME}_${OBSID}.out + RESULTS_FILE=${OUTDIR}/${TESTNAME}_${OBSID}.csv + OUTPUT_FILE=${OUTDIR}/${TESTNAME}_${OBSID}.out cat ${SRCPARSET} > ${PARSET} echo "Cobalt.Benchmark.enabled=true" >> ${PARSET} echo "Cobalt.Benchmark.file=${RESULTS_FILE}" >> ${PARSET} @@ -69,20 +59,12 @@ do echo "Executing: ${COMMAND}" ${COMMAND} > ${OUTPUT_FILE} 2>&1 - # Get the mean total timing (column 4 in the csv file) - BENCHMARK_TIME=$(cat ${RESULTS_FILE} | grep 'total' | awk -F\; '{print $4}') - echo "Benchmark time: ${BENCHMARK_TIME}" - - # Check whether the benchmark ran fast enough - RESULT=$(echo "scale=2; ${REFERENCE_TIME}/${BENCHMARK_TIME}" | bc) - echo -n "Result: ${RESULT}, " - PASS=$(echo "${RESULT} >= ${TOLERANCE}" | bc) - if [ ${PASS} ] + # Compare with reference + COMPARE=${srcdir}/${TESTNAME}_compare.py + ${COMPARE} ${REFERENCE_FILE} ${RESULTS_FILE} + if [ ! {STATUS} ] then - echo "PASSED." - else - echo "FAILED." - STATUS = 1 + STATUS=$? fi done diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_Tesla-V100-PCIE-16GB.timings b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_Tesla-V100-PCIE-16GB.timings deleted file mode 100644 index 86004e1c72d46ef2439983fdee9a111b3f38319c..0000000000000000000000000000000000000000 --- a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_Tesla-V100-PCIE-16GB.timings +++ /dev/null @@ -1,5 +0,0 @@ -763847,9.17709 -784441,12.17178 -796954,9.61318 -797086,11.26417 -797130,6.29351 diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_compare.py b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_compare.py new file mode 100755 index 0000000000000000000000000000000000000000..f712804a51371b4098436c82c9f7d9b2d62d989e --- /dev/null +++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_compare.py @@ -0,0 +1,117 @@ +#!/usr/bin/env python3 + +import sys +import argparse +import csv +import os + +def create_arg_parser(): + # Creates and returns the ArgumentParser object + parser = argparse.ArgumentParser(description='Compare mean runtime from different runs from gpu_load performance benchmarks.') + parser.add_argument('filename_reference', + help='The full path to the reference file') + parser.add_argument('filename_candidate', + help='The full path to the current file') + parser.add_argument('--tolerance', type=float, default=5, + help='Maximum tolerable performance degradation (in percent)') + return parser + +argParser = create_arg_parser() +parsedArgs = argParser.parse_args(sys.argv[1:]) + +# Reference input +filename_reference = parsedArgs.filename_reference +if not os.path.exists(filename_reference): + print("Could not open: {}".format(filename_reference)) + exit(1) + +# Candidate input +filename_candidate = parsedArgs.filename_candidate +if not os.path.exists(filename_candidate): + print("Could not open: {}".format(filename_candidate)) + exit(1) + +# Tolerance +tolerance = parsedArgs.tolerance + +# Print arguments +print(">>> Parameters") +print("Reference: {}".format(filename_reference)) +print("Candidate: {}".format(filename_candidate)) +print("Tolerance: {}%".format(tolerance)) + +def read_measurements(filename): + + # Open results file, skip the first two lines: + # Line 0: date + # Line 1: info + # Parse the remainder as dictionary + # Line 3: header + # Line 4 and further: data + data = open(filename).readlines()[2:] + reader = csv.DictReader(data, delimiter=";") + measurements = dict() + for row in reader: + name = row[' kernelName'].lstrip() + mean = float(row[' mean']) + count = int(row[' count']) + measurements[name] = mean + + return measurements + +measurements_reference = read_measurements(filename_reference) +measurements_candidate = read_measurements(filename_candidate) +runtime_total = measurements_reference["total"] +runtime_threshold = runtime_total * 0.05 # 5 percent + +# Check all measurements +passed = list() +failed = list() +missing = list() +status = 0 +for name, runtime_reference in measurements_reference.items(): + # Skip very short measurements + if (runtime_reference < runtime_threshold): + continue + + # Try to get runtime for candidate + try: + runtime_candidate = measurements_candidate[name] + + # Compare the two measurements + performance = runtime_reference / runtime_candidate * 100 + result = "\"{}\", reference: {:.2f}, candidate: {:.2f} ({:.1f} %)".format(name, runtime_reference, runtime_candidate, performance) + if (performance > (100 - tolerance)): + passed.append(result) + else: + failed.append(result) + status = 1 + + except KeyError: + missing.append("\"{}\"".format(name)) + status = 1 + +# Print summary +print(">>> Results") +print("PASSED:", end='') +if len(passed): + print() + print("\n".join(passed)) +else: + print(" none") + +print("FAILED:", end='') +if len(failed): + print() + print("\n".join(failed)) +else: + print(" none") + +print("MISSING:", end='') +if len(missing): + print() + print(", ".join(missing)) +else: + print(" none") + +exit(status) \ No newline at end of file diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/763847_Tesla-V100-PCIE-16GB.csv b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/763847_Tesla-V100-PCIE-16GB.csv new file mode 100644 index 0000000000000000000000000000000000000000..a1415f4f079eeea17336dd8ec87321e7be901520 --- /dev/null +++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/763847_Tesla-V100-PCIE-16GB.csv @@ -0,0 +1,25 @@ +Wed Apr 21 15:16:35 2021 +info; Test gpu_load: gpu_load with parset: tSubbandProcPerformance_763847.parset with nIterations: 1000 +format; kernelName; count; mean; stDev; min; max; unit +PerformanceCounter; bandPassCorrection; 1000; 0.15295; 0.00140; 0.14950; 0.15770; ms +PerformanceCounter; delayCompensation; 1000; 0.09772; 0.00054; 0.09626; 0.09933; ms +PerformanceCounter; Zeroing (beamformer); 1000; 0.02151; 0.00079; 0.01891; 0.02806; ms +PerformanceCounter; FFT (beamformer, 1st); 1000; 0.10470; 0.00102; 0.10240; 0.11981; ms +PerformanceCounter; intToFloat; 1000; 0.15922; 0.00243; 0.15466; 0.20381; ms +PerformanceCounter; output (coherent); 1000; 0.73045; 0.00035; 0.73024; 0.73299; ms +PerformanceCounter; coherentStokes; 1000; 0.31819; 0.00098; 0.31539; 0.32154; ms +PerformanceCounter; FFT (coherent, final); 1000; 0.63808; 0.00275; 0.63283; 0.69325; ms +PerformanceCounter; FIR (coherent, final); 1000; 1.69718; 0.01367; 1.67731; 1.80634; ms +PerformanceCounter; FFT-shift (coherent, inverse); 1000; 0.57222; 0.00053; 0.57139; 0.57552; ms +PerformanceCounter; FFT (coherent, inverse); 1000; 0.63251; 0.00345; 0.62566; 0.71574; ms +PerformanceCounter; coherentStokesTranspose; 1000; 0.61415; 0.00201; 0.60826; 0.62259; ms +PerformanceCounter; beamFormer; 1000; 0.58667; 0.00879; 0.57754; 0.66048; ms +PerformanceCounter; output (incoherent); 1000; 0.01346; 0.00008; 0.01338; 0.01402; ms +PerformanceCounter; incoherentStokes; 1000; 0.11950; 0.00140; 0.11776; 0.13107; ms +PerformanceCounter; FFT (incoherent, final); 1000; 0.10603; 0.00095; 0.10342; 0.10858; ms +PerformanceCounter; FIR (incoherent, final); 1000; 1.33881; 0.01112; 1.32198; 1.42848; ms +PerformanceCounter; FFT-shift (incoherent, inverse); 1000; 0.09771; 0.00052; 0.09626; 0.09933; ms +PerformanceCounter; FFT (incoherent, inverse); 1000; 0.10665; 0.00090; 0.10445; 0.11162; ms +PerformanceCounter; incoherentStokesTranspose; 1000; 0.10145; 0.00118; 0.09933; 0.12288; ms +PerformanceCounter; input; 1000; 0.77771; 0.00233; 0.77558; 0.83270; ms +PerformanceCounter; total; 7; 9.17709; 0.22384; 9.04602; 9.67680; ms diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/784441_Tesla-V100-PCIE-16GB.csv b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/784441_Tesla-V100-PCIE-16GB.csv new file mode 100644 index 0000000000000000000000000000000000000000..78dafcd872eb012d0255bc6f971ac1919490e3aa --- /dev/null +++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/784441_Tesla-V100-PCIE-16GB.csv @@ -0,0 +1,24 @@ +Wed Apr 21 15:16:50 2021 +info; Test gpu_load: gpu_load with parset: tSubbandProcPerformance_784441.parset with nIterations: 1000 +format; kernelName; count; mean; stDev; min; max; unit +PerformanceCounter; output (correlator); 1000; 0.02079; 0.00029; 0.02067; 0.02950; ms +PerformanceCounter; correlate; 1000; 2.10369; 0.02335; 2.09306; 2.29786; ms +PerformanceCounter; delayAndBandPass; 1000; 0.50549; 0.00503; 0.49971; 0.54989; ms +PerformanceCounter; Zeroing (correlator); 1000; 0.09988; 0.00132; 0.09818; 0.11034; ms +PerformanceCounter; FFT (correlator); 1000; 0.17120; 0.00117; 0.16794; 0.18640; ms +PerformanceCounter; FIR (correlator); 1000; 0.67940; 0.00689; 0.67072; 0.76394; ms +PerformanceCounter; bandPassCorrection; 1000; 0.24467; 0.00228; 0.23859; 0.25395; ms +PerformanceCounter; delayCompensation; 1000; 0.15393; 0.00058; 0.15258; 0.15770; ms +PerformanceCounter; Zeroing (beamformer); 1000; 0.02976; 0.00148; 0.02762; 0.03520; ms +PerformanceCounter; FFT (beamformer, 1st); 1000; 0.16922; 0.00110; 0.16589; 0.17712; ms +PerformanceCounter; intToFloat; 1000; 0.14052; 0.00159; 0.13722; 0.16282; ms +PerformanceCounter; output (coherent); 1000; 0.16193; 0.00022; 0.16157; 0.16288; ms +PerformanceCounter; coherentStokes; 1000; 0.31116; 0.00120; 0.30822; 0.31642; ms +PerformanceCounter; FFT (coherent, final); 1000; 0.73716; 0.00219; 0.73114; 0.74650; ms +PerformanceCounter; FIR (coherent, final); 1000; 1.30460; 0.01393; 1.26464; 1.38243; ms +PerformanceCounter; FFT-shift (coherent, inverse); 1000; 0.66283; 0.00049; 0.66150; 0.66458; ms +PerformanceCounter; FFT (coherent, inverse); 1000; 0.73336; 0.00232; 0.72602; 0.74448; ms +PerformanceCounter; coherentStokesTranspose; 1000; 0.66205; 0.00200; 0.65638; 0.66970; ms +PerformanceCounter; beamFormer; 1000; 1.89865; 0.07048; 1.75821; 2.16371; ms +PerformanceCounter; input; 1000; 1.25965; 0.00735; 1.24358; 1.30774; ms +PerformanceCounter; total; 1000; 12.17178; 0.11055; 11.97261; 13.44202; ms diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/796954_Tesla-V100-PCIE-16GB.csv b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/796954_Tesla-V100-PCIE-16GB.csv new file mode 100644 index 0000000000000000000000000000000000000000..1faee7bbd463308425b0dd9244ed4de764ca28eb --- /dev/null +++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/796954_Tesla-V100-PCIE-16GB.csv @@ -0,0 +1,11 @@ +Wed Apr 21 15:17:06 2021 +info; Test gpu_load: gpu_load with parset: tSubbandProcPerformance_796954.parset with nIterations: 1000 +format; kernelName; count; mean; stDev; min; max; unit +PerformanceCounter; output (correlator); 1000; 0.42430; 0.00030; 0.42406; 0.43030; ms +PerformanceCounter; correlate; 1000; 2.07460; 0.00112; 2.07053; 2.07770; ms +PerformanceCounter; delayAndBandPass; 1000; 0.92203; 0.00607; 0.90522; 0.94413; ms +PerformanceCounter; Zeroing (correlator); 1000; 0.35872; 0.00049; 0.35485; 0.35939; ms +PerformanceCounter; FFT (correlator); 1000; 0.63168; 0.00248; 0.62669; 0.67686; ms +PerformanceCounter; FIR (correlator); 1000; 0.48622; 0.00386; 0.47718; 0.54067; ms +PerformanceCounter; input; 1000; 4.64835; 0.00598; 4.64518; 4.75971; ms +PerformanceCounter; total; 1000; 9.61318; 0.01428; 9.58976; 9.92461; ms diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/797086_Tesla-V100-PCIE-16GB.csv b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/797086_Tesla-V100-PCIE-16GB.csv new file mode 100644 index 0000000000000000000000000000000000000000..4bd95c8e816d7bf4c9bd2e4236204ff05ae1d939 --- /dev/null +++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/797086_Tesla-V100-PCIE-16GB.csv @@ -0,0 +1,22 @@ +Wed Apr 21 15:17:22 2021 +info; Test gpu_load: gpu_load with parset: tSubbandProcPerformance_797086.parset with nIterations: 1000 +format; kernelName; count; mean; stDev; min; max; unit +PerformanceCounter; output (correlator); 1000; 0.28892; 0.00034; 0.28874; 0.29760; ms +PerformanceCounter; correlate; 1000; 1.46201; 0.00083; 1.45920; 1.46432; ms +PerformanceCounter; delayAndBandPass; 1000; 0.72008; 0.00426; 0.70861; 0.73421; ms +PerformanceCounter; Zeroing (correlator); 1000; 0.29476; 0.00061; 0.29245; 0.29603; ms +PerformanceCounter; FFT (correlator); 1000; 0.51938; 0.00222; 0.51405; 0.55706; ms +PerformanceCounter; FIR (correlator); 1000; 0.42854; 0.00455; 0.41882; 0.46285; ms +PerformanceCounter; bandPassCorrection; 1000; 0.59152; 0.00424; 0.57754; 0.60621; ms +PerformanceCounter; delayCompensation; 1000; 0.34200; 0.00047; 0.33997; 0.34406; ms +PerformanceCounter; Zeroing (beamformer); 1000; 0.05880; 0.00068; 0.05744; 0.06054; ms +PerformanceCounter; FFT (beamformer, 1st); 1000; 0.38046; 0.00153; 0.37683; 0.38707; ms +PerformanceCounter; intToFloat; 1000; 0.27912; 0.00224; 0.27341; 0.29491; ms +PerformanceCounter; output (coherent); 1000; 0.48189; 0.00025; 0.48163; 0.48333; ms +PerformanceCounter; coherentStokes; 1000; 0.11419; 0.00134; 0.10957; 0.11776; ms +PerformanceCounter; FFT-shift (coherent, inverse); 1000; 0.01494; 0.00055; 0.01331; 0.01638; ms +PerformanceCounter; FFT (coherent, inverse); 1000; 0.01734; 0.00059; 0.01638; 0.01946; ms +PerformanceCounter; coherentStokesTranspose; 1000; 0.03076; 0.00065; 0.02867; 0.03277; ms +PerformanceCounter; beamFormer; 1000; 1.28760; 0.00216; 1.28205; 1.30662; ms +PerformanceCounter; input; 1000; 3.84327; 0.02941; 3.83926; 4.76208; ms +PerformanceCounter; total; 1000; 11.26417; 0.03360; 11.22611; 12.23578; ms diff --git a/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/797130_Tesla-V100-PCIE-16GB.csv b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/797130_Tesla-V100-PCIE-16GB.csv new file mode 100644 index 0000000000000000000000000000000000000000..b2c0c9ef93f4ccacfb22c6002e1d10a64940db69 --- /dev/null +++ b/RTCP/Cobalt/GPUProc/test/SubbandProcs/tSubbandProcPerformance_reference/797130_Tesla-V100-PCIE-16GB.csv @@ -0,0 +1,16 @@ +Wed Apr 21 15:17:38 2021 +info; Test gpu_load: gpu_load with parset: tSubbandProcPerformance_797130.parset with nIterations: 1000 +format; kernelName; count; mean; stDev; min; max; unit +PerformanceCounter; bandPassCorrection; 1000; 0.62068; 0.00717; 0.60211; 0.67990; ms +PerformanceCounter; delayCompensation; 1000; 0.35668; 0.00063; 0.35533; 0.36045; ms +PerformanceCounter; Zeroing (beamformer); 1000; 0.06114; 0.00075; 0.05978; 0.06890; ms +PerformanceCounter; FFT (beamformer, 1st); 1000; 0.39899; 0.00174; 0.39424; 0.42288; ms +PerformanceCounter; intToFloat; 1000; 0.29476; 0.00271; 0.28877; 0.33997; ms +PerformanceCounter; output (coherent); 1000; 0.24260; 0.00020; 0.24243; 0.24691; ms +PerformanceCounter; coherentStokes; 1000; 0.09614; 0.00122; 0.09421; 0.10653; ms +PerformanceCounter; FFT-shift (coherent, inverse); 1000; 0.00657; 0.00050; 0.00614; 0.00819; ms +PerformanceCounter; FFT (coherent, inverse); 1000; 0.00846; 0.00046; 0.00816; 0.01126; ms +PerformanceCounter; coherentStokesTranspose; 1000; 0.02417; 0.00056; 0.02355; 0.02765; ms +PerformanceCounter; beamFormer; 1000; 1.13920; 0.01143; 1.12947; 1.24112; ms +PerformanceCounter; input; 1000; 2.94744; 0.00349; 2.94413; 2.99594; ms +PerformanceCounter; total; 11; 6.29351; 0.09036; 6.25152; 6.56387; ms