more reasonable buckets for transfer durations

3e564120 · Jan David Mol · e0cd9829 · 3e564120
Commit 3e564120 authored 7 months ago by Jan David Mol
--- a/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/ingestpipeline.py
+++ b/LTA/LTAIngest/LTAIngestServer/LTAIngestTransferServer/lib/ingestpipeline.py
@@ -7,7 +7,7 @@ import random
 import socket
 import re
 import getpass
-from prometheus_client import Gauge, Counter
+from prometheus_client import Gauge, Counter, Histogram, INF

 from lofar.lta.ingest.common.job import *
 from lofar.lta.ingest.server.sip import validateSIPAgainstSchema, addIngestInfoToSIP
@@ -29,7 +29,8 @@ logger = logging.getLogger(__name__)
 #---------------------- Prometheus Metrics ----------------------------------------

 metric_nr_transfers_in_progress = Gauge("ingest_transfers_in_progress", "Count how many transfers are currently in progress", labelnames=["site"])
-metric_transfer_durations = Histogram("ingest_transfer_durations", "How long transfers are taking", labelnames=["site"])
+metric_transfer_durations = Histogram("ingest_transfer_durations", "How long transfers are taking", labelnames=["site"],
+    buckets=(1.0, 10.0, 60.0, 5 * 60.0, 10 * 60.0, 30 * 60.0, 3600.0, 2 * 3600.0, 4 * 3600.0, 8 * 3600.0, 12 * 3600.0, 24 * 3600.0, INF))
 metric_nr_transfer_exceptions = Counter("ingest_transfer_exceptions", "Number of exceptions raised during transfer", labelnames=["site", "reason"])
 metric_nr_bytes_transferred = Counter("ingest_bytes_ingested", "Number of payload bytes ingested into the LTA (sum of ingested file sizes)", labelnames=["site"])