diff --git a/SAS/DataManagement/StorageQueryService/cache.py b/SAS/DataManagement/StorageQueryService/cache.py index 43bd5fdbd411eafc6887afdf040dd5d82abf2ab5..45b61063f80c212cfe989cfdcae17201b022bc45 100644 --- a/SAS/DataManagement/StorageQueryService/cache.py +++ b/SAS/DataManagement/StorageQueryService/cache.py @@ -103,6 +103,10 @@ class CacheManager: self._cache = eval(file.read().strip()) if not isinstance(self._cache, dict): self._cache = {'path_du_results': {}, 'otdb_id2path': {} } + if 'path_du_results' not in self._cache: + self._cache['path_du_results'] = {} + if 'otdb_id2path' not in self._cache: + self._cache['otdb_id2path'] = {} except Exception as e: logger.error("Error while reading in du cache: %s", e) with self._cacheLock: @@ -111,11 +115,20 @@ class CacheManager: def _writeCacheToDisk(self): try: - if datetime.datetime.utcnow() - self._last_cache_write_timestamp > datetime.timedelta(minutes=5): + # only persist (a subset of) the cache to disk every once in a while. + if datetime.datetime.utcnow() - self._last_cache_write_timestamp > datetime.timedelta(minutes=0.2): tmp_path = '/tmp/tmp_storagequery_cache.py' cache_str = '' with self._cacheLock: - cache_str = str(self._cache) + # Take a subset of the entire cache + # only the path_du_results for paths at project level (like /data/projects, /data/projects/LC9_001) + # Do not store path_du_results for deeper levels on disk, because that makes the disk read/write too slow, + # and the deeper levels can be obtained via rhb-du calls quite fast anyway. + # Furthermore, once a deeper level du results is stored in the memory cache, then it is also available for fast lookup. + # We just don't store these deep levels on disk. + sub_cache = { path:du_result for path,du_result in self._cache['path_du_results'].items() + if self.getDepthToProjectsDir(path) <= 1 } + cache_str = str(sub_cache) with open(tmp_path, 'w') as file: file.write(cache_str) @@ -179,10 +192,17 @@ class CacheManager: return result + def getDepthToProjectsDir(self, path): + return len(path.replace(self.disk_usage.path_resolver.projects_path, '').strip('/').split('/')) + def _scanProjectsTree(self): try: def addSubDirectoriesToCache(directory): - depth = len(directory.replace(self.disk_usage.path_resolver.projects_path, '').strip('/').split('/')) + depth = self.getDepthToProjectsDir(directory) + #depth=0 : projects + #depth=1 : projects/<project> + #depth=2 : projects/<project>/<obs> + #depth=3 : projects/<project>/<obs>/<sub_dir> if depth > 3: return