diff --git a/LCS/PyCommon/json_utils.py b/LCS/PyCommon/json_utils.py index f270198563025baf737c2d3028dccc390f0e3428..963e397174ee5943fa038d869af8c78edcaae33e 100644 --- a/LCS/PyCommon/json_utils.py +++ b/LCS/PyCommon/json_utils.py @@ -19,6 +19,9 @@ import json import jsonschema from copy import deepcopy import requests +from datetime import datetime, timedelta + +DEFAULT_MAX_SCHEMA_CACHE_AGE = timedelta(minutes=1) def _extend_with_default(validator_class): """ @@ -109,7 +112,7 @@ def get_default_json_object_for_schema(schema: str) -> dict: '''return a valid json object for the given schema with all properties with their default values''' return add_defaults_to_json_object_for_schema({}, schema) -def add_defaults_to_json_object_for_schema(json_object: dict, schema: str) -> dict: +def add_defaults_to_json_object_for_schema(json_object: dict, schema: str, cache: dict=None, max_cache_age: timedelta=DEFAULT_MAX_SCHEMA_CACHE_AGE) -> dict: '''return a copy of the json object with defaults filled in according to the schema for all the missing properties''' copy_of_json_object = deepcopy(json_object) @@ -118,7 +121,7 @@ def add_defaults_to_json_object_for_schema(json_object: dict, schema: str) -> di copy_of_json_object['$schema'] = schema['$id'] # resolve $refs to fill in defaults for those, too - schema = resolved_refs(schema) + schema = resolved_refs(schema, cache=cache, max_cache_age=max_cache_age) # run validator, which populates the properties with defaults. get_validator_for_schema(schema, add_defaults=True).validate(copy_of_json_object) @@ -152,16 +155,23 @@ def replace_host_in_urls(schema, new_base_url: str, keys=['$id', '$ref', '$schem return schema -def get_referenced_subschema(ref_url, cache: dict=None): +def get_referenced_subschema(ref_url, cache: dict=None, max_cache_age: timedelta=DEFAULT_MAX_SCHEMA_CACHE_AGE): '''fetch the schema given by the ref_url, and get the sub-schema given by the #/ path in the ref_url''' # deduct referred schema name and version from ref-value head, anchor, tail = ref_url.partition('#') if isinstance(cache, dict) and head in cache: - referenced_schema = cache[head] + # use cached value + referenced_schema, last_update_timestamp = cache[head] + + # refresh cache if outdated + if datetime.utcnow() - last_update_timestamp > max_cache_age: + referenced_schema = json.loads(requests.get(ref_url).text) + cache[head] = referenced_schema, datetime.utcnow() else: + # fetch url, and store in cache referenced_schema = json.loads(requests.get(ref_url).text) if isinstance(cache, dict): - cache[head] = referenced_schema + cache[head] = referenced_schema, datetime.utcnow() # extract sub-schema tail = tail.strip('/') @@ -173,7 +183,7 @@ def get_referenced_subschema(ref_url, cache: dict=None): return referenced_schema -def resolved_refs(schema, cache: dict=None): +def resolved_refs(schema, cache: dict=None, max_cache_age: timedelta=DEFAULT_MAX_SCHEMA_CACHE_AGE): '''return the given schema with all $ref fields replaced by the referred json (sub)schema that they point to.''' if cache is None: cache = {} @@ -183,7 +193,7 @@ def resolved_refs(schema, cache: dict=None): keys = list(schema.keys()) if "$ref" in keys and isinstance(schema['$ref'], str) and schema['$ref'].startswith('http'): keys.remove("$ref") - referenced_subschema = get_referenced_subschema(schema['$ref'], cache) + referenced_subschema = get_referenced_subschema(schema['$ref'], cache=cache, max_cache_age=max_cache_age) updated_schema = resolved_refs(referenced_subschema, cache) for key in keys: