Skip to content
Snippets Groups Projects
Commit 60008e88 authored by Jan David Mol's avatar Jan David Mol
Browse files

Optimisation for not reading the full list

parent 6e5830d1
No related branches found
No related tags found
No related merge requests found
Checking pipeline status
......@@ -10,6 +10,7 @@ import json
import logging
from datetime import datetime, timedelta
import time
from typing import Generator
from minio import Minio
import minio.datatypes
......@@ -43,20 +44,20 @@ class S3PacketLoader: # pylint: disable=too-few-public-methods
self.prefix = prefix.strip("/")
self.block_duration = block_duration
def _list_objects_after(self, timestamp: datetime) -> list[minio.datatypes.Object]:
def _list_objects_after(
self, timestamp: datetime
) -> Generator[minio.datatypes.Object, None, None]:
"""Return a list of objects that (should) contain packets of and after
the given timestamp."""
# NB: The timestamp in the filename is the time when the file was
# completed, so after the last timestamp recorded in the file.
return list(
self._minio_client.list_objects(
self.bucket,
recursive=True,
prefix=f"{self.prefix}",
start_after=f"{self.prefix}/{timestamp.year}/{timestamp.month:02d}/"
f"{timestamp.day:02d}/{timestamp.isoformat()}.json",
)
return self._minio_client.list_objects(
self.bucket,
recursive=True,
prefix=f"{self.prefix}",
start_after=f"{self.prefix}/{timestamp.year}/{timestamp.month:02d}/"
f"{timestamp.day:02d}/{timestamp.isoformat()}.json",
)
def wait_for_packets(self, timestamp: datetime) -> bool:
......@@ -65,7 +66,7 @@ class S3PacketLoader: # pylint: disable=too-few-public-methods
Returns whether such packets were found."""
# if the packets are there, always return True
if self._list_objects_after(timestamp):
if next(self._list_objects_after(timestamp), False):
logger.info(
"wait_for_packets: Requested timestamp are available at startup"
)
......@@ -88,7 +89,7 @@ class S3PacketLoader: # pylint: disable=too-few-public-methods
sleep_interval = min(max_wait_time, MINIO_POLL_INTERVAL)
time.sleep(sleep_interval.seconds)
if self._list_objects_after(timestamp):
if next(self._list_objects_after(timestamp), False):
logger.info(
"wait_for_packets: Requested timestamp appeared after waiting %s",
datetime.now(tz=timestamp.tzinfo) - start_time,
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment