Source code for data_model.schema
"""Module providing functionality to read and validate dictionaries using schema."""
import logging
from pathlib import Path
import jsonschema
import simtools.utils.general as gen
from simtools.constants import (
METADATA_JSON_SCHEMA,
MODEL_PARAMETER_METASCHEMA,
MODEL_PARAMETER_SCHEMA_PATH,
SCHEMA_PATH,
)
from simtools.data_model import format_checkers
from simtools.utils import names
_logger = logging.getLogger(__name__)
[docs]
def get_get_model_parameter_schema_files(schema_directory=MODEL_PARAMETER_SCHEMA_PATH):
"""
Return list of parameters and schema files located in schema file directory.
Returns
-------
list
List of parameters found in schema file directory.
list
List of schema files found in schema file directory.
"""
schema_files = sorted(Path(schema_directory).rglob("*.schema.yml"))
if not schema_files:
raise FileNotFoundError(f"No schema files found in {schema_directory}")
parameters = []
for schema_file in schema_files:
schema_dict = gen.collect_data_from_file(file_name=schema_file)
parameters.append(schema_dict.get("name"))
return parameters, schema_files
[docs]
def get_model_parameter_schema_file(parameter):
"""
Return schema file path for a given model parameter.
Parameters
----------
parameter: str
Model parameter name.
Returns
-------
Path
Schema file path.
"""
schema_file = MODEL_PARAMETER_SCHEMA_PATH / f"{parameter}.schema.yml"
if not schema_file.exists():
raise FileNotFoundError(f"Schema file not found: {schema_file}")
return schema_file
[docs]
def get_model_parameter_schema_version(schema_version=None):
"""
Validate and return schema versions.
If no schema_version is given, the most recent version is provided.
Parameters
----------
schema_version: str
Schema version.
Returns
-------
str
Schema version.
"""
schemas = gen.collect_data_from_file(MODEL_PARAMETER_METASCHEMA)
if schema_version is None and schemas:
return schemas[0].get("version")
if any(schema.get("version") == schema_version for schema in schemas):
return schema_version
raise ValueError(f"Schema version {schema_version} not found in {MODEL_PARAMETER_METASCHEMA}.")
[docs]
def validate_dict_using_schema(data, schema_file=None, json_schema=None):
"""
Validate a data dictionary against a schema.
Parameters
----------
data
dictionary to be validated
schema_file (dict)
schema used for validation
Raises
------
jsonschema.exceptions.ValidationError
if validation fails
"""
if json_schema is None and schema_file is None:
_logger.warning(f"No schema provided for validation of {data}")
return
if json_schema is None:
json_schema = load_schema(
schema_file,
data.get("schema_version")
or data.get(
"SCHEMA_VERSION", "0.1.0"
), # default version to ensure backward compatibility
)
try:
jsonschema.validate(data, schema=json_schema, format_checker=format_checkers.format_checker)
except jsonschema.exceptions.ValidationError as exc:
_logger.error(f"Validation failed using schema: {json_schema}")
raise exc
if data.get("meta_schema_url") and not gen.url_exists(data["meta_schema_url"]):
raise FileNotFoundError(f"Meta schema URL does not exist: {data['meta_schema_url']}")
_logger.debug(f"Successful validation of data using schema ({json_schema.get('name')})")
[docs]
def load_schema(schema_file=None, schema_version=None):
"""
Load parameter schema from file.
Parameters
----------
schema_file: str
Path to schema file.
schema_version: str
Schema version.
Returns
-------
schema: dict
Schema dictionary.
Raises
------
FileNotFoundError
if schema file is not found
"""
schema_file = schema_file or METADATA_JSON_SCHEMA
for path in (schema_file, SCHEMA_PATH / schema_file):
try:
schema = gen.collect_data_from_file(file_name=path)
break
except FileNotFoundError:
continue
else:
raise FileNotFoundError(f"Schema file not found: {schema_file}")
if isinstance(schema, list): # schema file with several schemas defined
if schema_version is None:
raise ValueError(f"Schema version not given in {schema_file}.")
schema = next((doc for doc in schema if doc.get("version") == schema_version), None)
if schema is None:
raise ValueError(f"Schema version {schema_version} not found in {schema_file}.")
elif schema_version is not None and schema_version != schema.get("version"):
_logger.warning(f"Schema version {schema_version} does not match {schema.get('version')}")
_logger.debug(f"Loading schema from {schema_file}")
_add_array_elements("InstrumentTypeElement", schema)
return schema
def _add_array_elements(key, schema):
"""
Add list of array elements to schema.
Avoids having to list all array elements in multiple schema.
Assumes an element [key]['enum'] is a list of elements.
Parameters
----------
key: str
Key in schema dictionary
schema: dict
Schema dictionary
Returns
-------
dict
Schema dictionary with added array elements.
"""
_list_of_array_elements = sorted(names.array_elements().keys())
def recursive_search(sub_schema, key):
if key in sub_schema:
if "enum" in sub_schema[key] and isinstance(sub_schema[key]["enum"], list):
sub_schema[key]["enum"] = list(
set(sub_schema[key]["enum"] + _list_of_array_elements)
)
else:
sub_schema[key]["enum"] = _list_of_array_elements
else:
for _, v in sub_schema.items():
if isinstance(v, dict):
recursive_search(v, key)
recursive_search(schema, key)
return schema