Skip to content
Snippets Groups Projects
Commit 709db735 authored by Bram Veenboer's avatar Bram Veenboer
Browse files

COB-121: Update tSubbandProcPerformance with kernel tests

Compare the runtime for all of the kernels with the reference.
parent 935ec891
No related branches found
No related tags found
2 merge requests!436COB-148: Enable NVRTC for CUDA kernel compilation,!431COB-121: Add tSubbandProcPerformance
Showing
with 232 additions and 40 deletions
......@@ -7,11 +7,11 @@ GPULOAD="${LOFARROOT}/bin/gpu_load"
# Run the benchmark for ITERATIONS times
ITERATIONS=100
# Set the test criterium. 1 means that
# Set the test criterium. 0 means that
# the benchmark should be at least as fast
# as the reference. Values slightly lower,
# e.g. 0.9 allow for a bit of variation.
TOLERANCE=0.85
# as the reference. A value of 10 allows
# for up to 10 % performance degration.
TOLERANCE=10
# Find the GPU to use for this test
# In case multiple GPUs are present, use the first one.
......@@ -19,14 +19,6 @@ GPUNAME=$(nvidia-smi --query-gpu=gpu_name --format=csv,noheader | head -n 1)
GPUNAME_=$(echo ${GPUNAME} | sed 's/ /-/')
echo "Running benchmarks on ${GPUNAME}"
# Check whether we have a reference output for this GPU
REFERENCE_FILE=${srcdir}/${TESTNAME}_${GPUNAME_}.timings
if [ ! -f ${REFERENCE_FILE} ]
then
echo "No reference output for ${GPUNAME}, skipping test."
exit 0
fi
# Create output directory if it doesn't yet exist; make sure it's empty
mkdir -p "${OUTDIR}" || error "Failed to create temporary directory ${OUTDIR}"
cd "${OUTDIR}" || error "Failed to change directory to ${OUTDIR}"
......@@ -42,24 +34,22 @@ do
# Get the OBSID from the current parset
OBSID=$(echo ${SRCPARSET} | sed -r 's/.*_([0-9]*)\..*/\1/g')
# Get reference time, the reference files are structured as follows:
# OBSID,REFERENCE_TIME
REFERENCE_TIME=$(cat ${REFERENCE_FILE} | grep ${OBSID} | awk -F, '{print $2}')
# Get reference file
REFERENCE_FILE=${srcdir}/${TESTNAME}_reference/${OBSID}_${GPUNAME_}.csv
# Skip this parset when no reference output is specified
if [ -z ${REFERENCE_TIME} ]
# Skip this parset when reference output is not found
if [ -z ${REFERENCE_FILE} ]
then
echo "No reference output for ${OBSID}, skipping test."
echo ">>> No reference output for ${OBSID}, skipping test."
continue
else
echo "Running test for observation ${OBSID}"
echo "Reference time: ${REFERENCE_TIME}"
echo ">>> Running test for observation ${OBSID}"
fi
# Create parset for this observation with benchmarking enabled
PARSET=${TESTNAME}_${OBSID}.parset
RESULTS_FILE=${TESTNAME}_${OBSID}.csv
OUTPUT_FILE=${TESTNAME}_${OBSID}.out
RESULTS_FILE=${OUTDIR}/${TESTNAME}_${OBSID}.csv
OUTPUT_FILE=${OUTDIR}/${TESTNAME}_${OBSID}.out
cat ${SRCPARSET} > ${PARSET}
echo "Cobalt.Benchmark.enabled=true" >> ${PARSET}
echo "Cobalt.Benchmark.file=${RESULTS_FILE}" >> ${PARSET}
......@@ -69,20 +59,12 @@ do
echo "Executing: ${COMMAND}"
${COMMAND} > ${OUTPUT_FILE} 2>&1
# Get the mean total timing (column 4 in the csv file)
BENCHMARK_TIME=$(cat ${RESULTS_FILE} | grep 'total' | awk -F\; '{print $4}')
echo "Benchmark time: ${BENCHMARK_TIME}"
# Check whether the benchmark ran fast enough
RESULT=$(echo "scale=2; ${REFERENCE_TIME}/${BENCHMARK_TIME}" | bc)
echo -n "Result: ${RESULT}, "
PASS=$(echo "${RESULT} >= ${TOLERANCE}" | bc)
if [ ${PASS} ]
# Compare with reference
COMPARE=${srcdir}/${TESTNAME}_compare.py
${COMPARE} ${REFERENCE_FILE} ${RESULTS_FILE}
if [ ! {STATUS} ]
then
echo "PASSED."
else
echo "FAILED."
STATUS = 1
STATUS=$?
fi
done
......
763847,9.17709
784441,12.17178
796954,9.61318
797086,11.26417
797130,6.29351
#!/usr/bin/env python3
import sys
import argparse
import csv
import os
def create_arg_parser():
# Creates and returns the ArgumentParser object
parser = argparse.ArgumentParser(description='Compare mean runtime from different runs from gpu_load performance benchmarks.')
parser.add_argument('filename_reference',
help='The full path to the reference file')
parser.add_argument('filename_candidate',
help='The full path to the current file')
parser.add_argument('--tolerance', type=float, default=5,
help='Maximum tolerable performance degradation (in percent)')
return parser
argParser = create_arg_parser()
parsedArgs = argParser.parse_args(sys.argv[1:])
# Reference input
filename_reference = parsedArgs.filename_reference
if not os.path.exists(filename_reference):
print("Could not open: {}".format(filename_reference))
exit(1)
# Candidate input
filename_candidate = parsedArgs.filename_candidate
if not os.path.exists(filename_candidate):
print("Could not open: {}".format(filename_candidate))
exit(1)
# Tolerance
tolerance = parsedArgs.tolerance
# Print arguments
print(">>> Parameters")
print("Reference: {}".format(filename_reference))
print("Candidate: {}".format(filename_candidate))
print("Tolerance: {}%".format(tolerance))
def read_measurements(filename):
# Open results file, skip the first two lines:
# Line 0: date
# Line 1: info
# Parse the remainder as dictionary
# Line 3: header
# Line 4 and further: data
data = open(filename).readlines()[2:]
reader = csv.DictReader(data, delimiter=";")
measurements = dict()
for row in reader:
name = row[' kernelName'].lstrip()
mean = float(row[' mean'])
count = int(row[' count'])
measurements[name] = mean
return measurements
measurements_reference = read_measurements(filename_reference)
measurements_candidate = read_measurements(filename_candidate)
runtime_total = measurements_reference["total"]
runtime_threshold = runtime_total * 0.05 # 5 percent
# Check all measurements
passed = list()
failed = list()
missing = list()
status = 0
for name, runtime_reference in measurements_reference.items():
# Skip very short measurements
if (runtime_reference < runtime_threshold):
continue
# Try to get runtime for candidate
try:
runtime_candidate = measurements_candidate[name]
# Compare the two measurements
performance = runtime_reference / runtime_candidate * 100
result = "\"{}\", reference: {:.2f}, candidate: {:.2f} ({:.1f} %)".format(name, runtime_reference, runtime_candidate, performance)
if (performance > (100 - tolerance)):
passed.append(result)
else:
failed.append(result)
status = 1
except KeyError:
missing.append("\"{}\"".format(name))
status = 1
# Print summary
print(">>> Results")
print("PASSED:", end='')
if len(passed):
print()
print("\n".join(passed))
else:
print(" none")
print("FAILED:", end='')
if len(failed):
print()
print("\n".join(failed))
else:
print(" none")
print("MISSING:", end='')
if len(missing):
print()
print(", ".join(missing))
else:
print(" none")
exit(status)
\ No newline at end of file
Wed Apr 21 15:16:35 2021
info; Test gpu_load: gpu_load with parset: tSubbandProcPerformance_763847.parset with nIterations: 1000
format; kernelName; count; mean; stDev; min; max; unit
PerformanceCounter; bandPassCorrection; 1000; 0.15295; 0.00140; 0.14950; 0.15770; ms
PerformanceCounter; delayCompensation; 1000; 0.09772; 0.00054; 0.09626; 0.09933; ms
PerformanceCounter; Zeroing (beamformer); 1000; 0.02151; 0.00079; 0.01891; 0.02806; ms
PerformanceCounter; FFT (beamformer, 1st); 1000; 0.10470; 0.00102; 0.10240; 0.11981; ms
PerformanceCounter; intToFloat; 1000; 0.15922; 0.00243; 0.15466; 0.20381; ms
PerformanceCounter; output (coherent); 1000; 0.73045; 0.00035; 0.73024; 0.73299; ms
PerformanceCounter; coherentStokes; 1000; 0.31819; 0.00098; 0.31539; 0.32154; ms
PerformanceCounter; FFT (coherent, final); 1000; 0.63808; 0.00275; 0.63283; 0.69325; ms
PerformanceCounter; FIR (coherent, final); 1000; 1.69718; 0.01367; 1.67731; 1.80634; ms
PerformanceCounter; FFT-shift (coherent, inverse); 1000; 0.57222; 0.00053; 0.57139; 0.57552; ms
PerformanceCounter; FFT (coherent, inverse); 1000; 0.63251; 0.00345; 0.62566; 0.71574; ms
PerformanceCounter; coherentStokesTranspose; 1000; 0.61415; 0.00201; 0.60826; 0.62259; ms
PerformanceCounter; beamFormer; 1000; 0.58667; 0.00879; 0.57754; 0.66048; ms
PerformanceCounter; output (incoherent); 1000; 0.01346; 0.00008; 0.01338; 0.01402; ms
PerformanceCounter; incoherentStokes; 1000; 0.11950; 0.00140; 0.11776; 0.13107; ms
PerformanceCounter; FFT (incoherent, final); 1000; 0.10603; 0.00095; 0.10342; 0.10858; ms
PerformanceCounter; FIR (incoherent, final); 1000; 1.33881; 0.01112; 1.32198; 1.42848; ms
PerformanceCounter; FFT-shift (incoherent, inverse); 1000; 0.09771; 0.00052; 0.09626; 0.09933; ms
PerformanceCounter; FFT (incoherent, inverse); 1000; 0.10665; 0.00090; 0.10445; 0.11162; ms
PerformanceCounter; incoherentStokesTranspose; 1000; 0.10145; 0.00118; 0.09933; 0.12288; ms
PerformanceCounter; input; 1000; 0.77771; 0.00233; 0.77558; 0.83270; ms
PerformanceCounter; total; 7; 9.17709; 0.22384; 9.04602; 9.67680; ms
Wed Apr 21 15:16:50 2021
info; Test gpu_load: gpu_load with parset: tSubbandProcPerformance_784441.parset with nIterations: 1000
format; kernelName; count; mean; stDev; min; max; unit
PerformanceCounter; output (correlator); 1000; 0.02079; 0.00029; 0.02067; 0.02950; ms
PerformanceCounter; correlate; 1000; 2.10369; 0.02335; 2.09306; 2.29786; ms
PerformanceCounter; delayAndBandPass; 1000; 0.50549; 0.00503; 0.49971; 0.54989; ms
PerformanceCounter; Zeroing (correlator); 1000; 0.09988; 0.00132; 0.09818; 0.11034; ms
PerformanceCounter; FFT (correlator); 1000; 0.17120; 0.00117; 0.16794; 0.18640; ms
PerformanceCounter; FIR (correlator); 1000; 0.67940; 0.00689; 0.67072; 0.76394; ms
PerformanceCounter; bandPassCorrection; 1000; 0.24467; 0.00228; 0.23859; 0.25395; ms
PerformanceCounter; delayCompensation; 1000; 0.15393; 0.00058; 0.15258; 0.15770; ms
PerformanceCounter; Zeroing (beamformer); 1000; 0.02976; 0.00148; 0.02762; 0.03520; ms
PerformanceCounter; FFT (beamformer, 1st); 1000; 0.16922; 0.00110; 0.16589; 0.17712; ms
PerformanceCounter; intToFloat; 1000; 0.14052; 0.00159; 0.13722; 0.16282; ms
PerformanceCounter; output (coherent); 1000; 0.16193; 0.00022; 0.16157; 0.16288; ms
PerformanceCounter; coherentStokes; 1000; 0.31116; 0.00120; 0.30822; 0.31642; ms
PerformanceCounter; FFT (coherent, final); 1000; 0.73716; 0.00219; 0.73114; 0.74650; ms
PerformanceCounter; FIR (coherent, final); 1000; 1.30460; 0.01393; 1.26464; 1.38243; ms
PerformanceCounter; FFT-shift (coherent, inverse); 1000; 0.66283; 0.00049; 0.66150; 0.66458; ms
PerformanceCounter; FFT (coherent, inverse); 1000; 0.73336; 0.00232; 0.72602; 0.74448; ms
PerformanceCounter; coherentStokesTranspose; 1000; 0.66205; 0.00200; 0.65638; 0.66970; ms
PerformanceCounter; beamFormer; 1000; 1.89865; 0.07048; 1.75821; 2.16371; ms
PerformanceCounter; input; 1000; 1.25965; 0.00735; 1.24358; 1.30774; ms
PerformanceCounter; total; 1000; 12.17178; 0.11055; 11.97261; 13.44202; ms
Wed Apr 21 15:17:06 2021
info; Test gpu_load: gpu_load with parset: tSubbandProcPerformance_796954.parset with nIterations: 1000
format; kernelName; count; mean; stDev; min; max; unit
PerformanceCounter; output (correlator); 1000; 0.42430; 0.00030; 0.42406; 0.43030; ms
PerformanceCounter; correlate; 1000; 2.07460; 0.00112; 2.07053; 2.07770; ms
PerformanceCounter; delayAndBandPass; 1000; 0.92203; 0.00607; 0.90522; 0.94413; ms
PerformanceCounter; Zeroing (correlator); 1000; 0.35872; 0.00049; 0.35485; 0.35939; ms
PerformanceCounter; FFT (correlator); 1000; 0.63168; 0.00248; 0.62669; 0.67686; ms
PerformanceCounter; FIR (correlator); 1000; 0.48622; 0.00386; 0.47718; 0.54067; ms
PerformanceCounter; input; 1000; 4.64835; 0.00598; 4.64518; 4.75971; ms
PerformanceCounter; total; 1000; 9.61318; 0.01428; 9.58976; 9.92461; ms
Wed Apr 21 15:17:22 2021
info; Test gpu_load: gpu_load with parset: tSubbandProcPerformance_797086.parset with nIterations: 1000
format; kernelName; count; mean; stDev; min; max; unit
PerformanceCounter; output (correlator); 1000; 0.28892; 0.00034; 0.28874; 0.29760; ms
PerformanceCounter; correlate; 1000; 1.46201; 0.00083; 1.45920; 1.46432; ms
PerformanceCounter; delayAndBandPass; 1000; 0.72008; 0.00426; 0.70861; 0.73421; ms
PerformanceCounter; Zeroing (correlator); 1000; 0.29476; 0.00061; 0.29245; 0.29603; ms
PerformanceCounter; FFT (correlator); 1000; 0.51938; 0.00222; 0.51405; 0.55706; ms
PerformanceCounter; FIR (correlator); 1000; 0.42854; 0.00455; 0.41882; 0.46285; ms
PerformanceCounter; bandPassCorrection; 1000; 0.59152; 0.00424; 0.57754; 0.60621; ms
PerformanceCounter; delayCompensation; 1000; 0.34200; 0.00047; 0.33997; 0.34406; ms
PerformanceCounter; Zeroing (beamformer); 1000; 0.05880; 0.00068; 0.05744; 0.06054; ms
PerformanceCounter; FFT (beamformer, 1st); 1000; 0.38046; 0.00153; 0.37683; 0.38707; ms
PerformanceCounter; intToFloat; 1000; 0.27912; 0.00224; 0.27341; 0.29491; ms
PerformanceCounter; output (coherent); 1000; 0.48189; 0.00025; 0.48163; 0.48333; ms
PerformanceCounter; coherentStokes; 1000; 0.11419; 0.00134; 0.10957; 0.11776; ms
PerformanceCounter; FFT-shift (coherent, inverse); 1000; 0.01494; 0.00055; 0.01331; 0.01638; ms
PerformanceCounter; FFT (coherent, inverse); 1000; 0.01734; 0.00059; 0.01638; 0.01946; ms
PerformanceCounter; coherentStokesTranspose; 1000; 0.03076; 0.00065; 0.02867; 0.03277; ms
PerformanceCounter; beamFormer; 1000; 1.28760; 0.00216; 1.28205; 1.30662; ms
PerformanceCounter; input; 1000; 3.84327; 0.02941; 3.83926; 4.76208; ms
PerformanceCounter; total; 1000; 11.26417; 0.03360; 11.22611; 12.23578; ms
Wed Apr 21 15:17:38 2021
info; Test gpu_load: gpu_load with parset: tSubbandProcPerformance_797130.parset with nIterations: 1000
format; kernelName; count; mean; stDev; min; max; unit
PerformanceCounter; bandPassCorrection; 1000; 0.62068; 0.00717; 0.60211; 0.67990; ms
PerformanceCounter; delayCompensation; 1000; 0.35668; 0.00063; 0.35533; 0.36045; ms
PerformanceCounter; Zeroing (beamformer); 1000; 0.06114; 0.00075; 0.05978; 0.06890; ms
PerformanceCounter; FFT (beamformer, 1st); 1000; 0.39899; 0.00174; 0.39424; 0.42288; ms
PerformanceCounter; intToFloat; 1000; 0.29476; 0.00271; 0.28877; 0.33997; ms
PerformanceCounter; output (coherent); 1000; 0.24260; 0.00020; 0.24243; 0.24691; ms
PerformanceCounter; coherentStokes; 1000; 0.09614; 0.00122; 0.09421; 0.10653; ms
PerformanceCounter; FFT-shift (coherent, inverse); 1000; 0.00657; 0.00050; 0.00614; 0.00819; ms
PerformanceCounter; FFT (coherent, inverse); 1000; 0.00846; 0.00046; 0.00816; 0.01126; ms
PerformanceCounter; coherentStokesTranspose; 1000; 0.02417; 0.00056; 0.02355; 0.02765; ms
PerformanceCounter; beamFormer; 1000; 1.13920; 0.01143; 1.12947; 1.24112; ms
PerformanceCounter; input; 1000; 2.94744; 0.00349; 2.94413; 2.99594; ms
PerformanceCounter; total; 11; 6.29351; 0.09036; 6.25152; 6.56387; ms
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment