Skip to content
Snippets Groups Projects

Resolve L2SS-1215 "Tango prometheus exporter cascading reconnect error"

1 file
+ 11
0
Compare changes
  • Side-by-side
  • Inline
@@ -26,6 +26,9 @@ handler = AsynchronousLogstashHandler(
handler.setLevel(logging.INFO)
logger.addHandler(handler)
# time to wait before reconnecting after encountering connection issues
reconnect_timeout_time = 1.0
""" Functions to parse and apply policy files. """
@@ -277,6 +280,7 @@ class CustomCollector(object):
for device_name in self.policy.devices():
logger.debug(f"Processing device {device_name}")
dev_scrape_begin = time.time()
last_exception_time = time.time()
try:
metrics = self.device_metrics(device_name)
@@ -285,6 +289,13 @@ class CustomCollector(object):
except DevFailed as e:
reason = e.args[0].desc.replace("\n", " ")
logger.warning(f"Error processing device {device_name}: {reason}")
# get the time since the last try, if less than a second, sleep for a bit.
retry_wait_time = time.time() - last_exception_time
if retry_wait_time < reconnect_timeout_time:
time.sleep(reconnect_timeout_time - retry_wait_time)
last_exception_time = time.time()
except Exception as e:
logger.exception(f"Error processing device {device_name}")
finally:
Loading