Skip to content
Snippets Groups Projects
Commit bc02a5fc authored by Jorrit Schaap's avatar Jorrit Schaap
Browse files

ROHD-692: only persist the most important subset of the cache to disk, because...

ROHD-692: only persist the most important subset of the cache to disk, because (de)serialization of large dicts is just too slow and memomy consuming
parent d0a93a43
No related branches found
No related tags found
No related merge requests found
...@@ -103,6 +103,10 @@ class CacheManager: ...@@ -103,6 +103,10 @@ class CacheManager:
self._cache = eval(file.read().strip()) self._cache = eval(file.read().strip())
if not isinstance(self._cache, dict): if not isinstance(self._cache, dict):
self._cache = {'path_du_results': {}, 'otdb_id2path': {} } self._cache = {'path_du_results': {}, 'otdb_id2path': {} }
if 'path_du_results' not in self._cache:
self._cache['path_du_results'] = {}
if 'otdb_id2path' not in self._cache:
self._cache['otdb_id2path'] = {}
except Exception as e: except Exception as e:
logger.error("Error while reading in du cache: %s", e) logger.error("Error while reading in du cache: %s", e)
with self._cacheLock: with self._cacheLock:
...@@ -111,11 +115,20 @@ class CacheManager: ...@@ -111,11 +115,20 @@ class CacheManager:
def _writeCacheToDisk(self): def _writeCacheToDisk(self):
try: try:
if datetime.datetime.utcnow() - self._last_cache_write_timestamp > datetime.timedelta(minutes=5): # only persist (a subset of) the cache to disk every once in a while.
if datetime.datetime.utcnow() - self._last_cache_write_timestamp > datetime.timedelta(minutes=0.2):
tmp_path = '/tmp/tmp_storagequery_cache.py' tmp_path = '/tmp/tmp_storagequery_cache.py'
cache_str = '' cache_str = ''
with self._cacheLock: with self._cacheLock:
cache_str = str(self._cache) # Take a subset of the entire cache
# only the path_du_results for paths at project level (like /data/projects, /data/projects/LC9_001)
# Do not store path_du_results for deeper levels on disk, because that makes the disk read/write too slow,
# and the deeper levels can be obtained via rhb-du calls quite fast anyway.
# Furthermore, once a deeper level du results is stored in the memory cache, then it is also available for fast lookup.
# We just don't store these deep levels on disk.
sub_cache = { path:du_result for path,du_result in self._cache['path_du_results'].items()
if self.getDepthToProjectsDir(path) <= 1 }
cache_str = str(sub_cache)
with open(tmp_path, 'w') as file: with open(tmp_path, 'w') as file:
file.write(cache_str) file.write(cache_str)
...@@ -179,10 +192,17 @@ class CacheManager: ...@@ -179,10 +192,17 @@ class CacheManager:
return result return result
def getDepthToProjectsDir(self, path):
return len(path.replace(self.disk_usage.path_resolver.projects_path, '').strip('/').split('/'))
def _scanProjectsTree(self): def _scanProjectsTree(self):
try: try:
def addSubDirectoriesToCache(directory): def addSubDirectoriesToCache(directory):
depth = len(directory.replace(self.disk_usage.path_resolver.projects_path, '').strip('/').split('/')) depth = self.getDepthToProjectsDir(directory)
#depth=0 : projects
#depth=1 : projects/<project>
#depth=2 : projects/<project>/<obs>
#depth=3 : projects/<project>/<obs>/<sub_dir>
if depth > 3: if depth > 3:
return return
......
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Please register or to comment