diff --git a/LCS/PyCommon/json_utils.py b/LCS/PyCommon/json_utils.py index 963e397174ee5943fa038d869af8c78edcaae33e..74dcfa1db09902b6bc078b99521a01a0396a6713 100644 --- a/LCS/PyCommon/json_utils.py +++ b/LCS/PyCommon/json_utils.py @@ -16,6 +16,8 @@ # with the LOFAR software suite. If not, see <http://www.gnu.org/licenses/>. import json +import time + import jsonschema from copy import deepcopy import requests @@ -159,19 +161,31 @@ def get_referenced_subschema(ref_url, cache: dict=None, max_cache_age: timedelta '''fetch the schema given by the ref_url, and get the sub-schema given by the #/ path in the ref_url''' # deduct referred schema name and version from ref-value head, anchor, tail = ref_url.partition('#') + + def _fech_url_and_update_cache_entry_if_needed(): + # try to fetch the url a few time (jsonschema.org is down quite often, but only for a brief moment) + for attempt_nr in range(5): + try: + response = requests.get(ref_url) + if response.status_code == 200: + referenced_schema = json.loads(response.text) + if isinstance(cache, dict): + cache[head] = referenced_schema, datetime.utcnow() + return referenced_schema + except requests.exceptions.RequestException as e: + time.sleep(2) # retry after a little sleep + raise Exception("Could not get: %s" % (ref_url,)) + if isinstance(cache, dict) and head in cache: # use cached value referenced_schema, last_update_timestamp = cache[head] # refresh cache if outdated if datetime.utcnow() - last_update_timestamp > max_cache_age: - referenced_schema = json.loads(requests.get(ref_url).text) - cache[head] = referenced_schema, datetime.utcnow() + referenced_schema = _fech_url_and_update_cache_entry_if_needed() else: # fetch url, and store in cache - referenced_schema = json.loads(requests.get(ref_url).text) - if isinstance(cache, dict): - cache[head] = referenced_schema, datetime.utcnow() + referenced_schema = _fech_url_and_update_cache_entry_if_needed() # extract sub-schema tail = tail.strip('/') @@ -222,13 +236,12 @@ def get_refs(schema) -> set: return refs -def validate_json_against_its_schema(json_object: dict): +def validate_json_against_its_schema(json_object: dict, cache: dict=None, max_cache_age: timedelta=DEFAULT_MAX_SCHEMA_CACHE_AGE): '''validate the give json object against its own schema (the URI/URL that its propery $schema points to)''' schema_url = json_object['$schema'] - response = requests.get(schema_url, headers={"Accept":"application/json"}) - if response.status_code == 200: - return validate_json_against_schema(json_object, response.text) - raise jsonschema.exceptions.ValidationError("Could not get schema from '%s'\n%s" % (schema_url, str(response.text))) + schema_object = get_referenced_subschema(schema_url, cache=cache, max_cache_age=max_cache_age) + return validate_json_against_schema(json_object, schema_object) + def validate_json_against_schema(json_string: str, schema: str): '''validate the given json_string against the given schema.