"""Utilities for managing the simulation models repository.
Simulation model parameters and production tables are managed through
a gitlab repository ('SimulationModels'). This module provides service
functions to interact with and verify the repository.
"""
import json
import logging
import shutil
from pathlib import Path
from simtools.io import ascii_handler
from simtools.utils import names
_logger = logging.getLogger(__name__)
[docs]
def verify_simulation_model_production_tables(simulation_models_path):
"""
Verify the simulation model production tables in the specified path.
Checks that all model parameters defined in the production tables are
present in the simulation models repository.
Parameters
----------
simulation_models_path : str
Path to the simulation models repository.
Returns
-------
bool
True if all parameters found, False if any missing.
"""
productions_path = Path(simulation_models_path) / "simulation-models" / "productions"
production_files = list(productions_path.rglob("*.json"))
_logger.info(
f"Verifying {len(production_files)} simulation model production "
f"tables in {simulation_models_path}"
)
missing_files = []
total_checked = 0
for production_file in production_files:
file_missing, file_checked = _verify_model_parameters_for_production(
simulation_models_path, production_file
)
missing_files.extend(file_missing)
total_checked += file_checked
_logger.info(f"Checked {total_checked} parameters, {len(missing_files)} missing")
if missing_files:
for missing_file in missing_files:
_logger.error(f"Missing: {missing_file}")
return False
_logger.info("Verification passed: All parameters found")
return True
def _verify_model_parameters_for_production(simulation_models_path, production_file):
"""
Verify that model parameters defined in the production tables exist.
Parameters
----------
simulation_models_path : str
Path to the simulation models repository.
production_file : Path
Path to the production file.
Returns
-------
tuple
(missing_files_list, total_checked_count)
"""
production_table = ascii_handler.collect_data_from_file(production_file)
missing_files = []
total_checked = 0
parameters = production_table.get("parameters", {})
for array_element, par_dict in parameters.items():
if isinstance(par_dict, dict):
for param_name, param_version in par_dict.items():
total_checked += 1
parameter_file = _get_model_parameter_file_path(
simulation_models_path, array_element, param_name, param_version
)
if parameter_file and not parameter_file.exists():
missing_files.append(str(parameter_file))
return missing_files, total_checked
def _get_model_parameter_file_path(
simulation_models_path, array_element, parameter_name, parameter_version
):
"""
Get the file path for a model parameter.
Take into account path structure based on collections and array elements.
Parameters
----------
simulation_models_path : str
Path to the simulation models repository.
array_element : str
Name of the array element (e.g., 'telescope').
parameter_name : str
Name of the parameter.
parameter_version : str
Version of the parameter.
Returns
-------
Path
The file path to the model parameter JSON file.
"""
collection = names.get_collection_name_from_parameter_name(parameter_name)
return (
Path(simulation_models_path)
/ "simulation-models"
/ "model_parameters"
/ (
collection
if collection in ("configuration_sim_telarray", "configuration_corsika")
else ""
)
/ (array_element if collection != "configuration_corsika" else "")
/ parameter_name
/ f"{parameter_name}-{parameter_version}.json"
)
[docs]
def copy_and_update_production_table(args_dict):
"""
Copy and update simulation model production tables.
Parameters
----------
args_dict: dict
Dictionary containing the arguments for copying and updating production tables.
"""
modifications = ascii_handler.collect_data_from_file(args_dict["modifications"])
changes = modifications.get("changes", {})
model_version = modifications["model_version"]
simulation_models_path = Path(args_dict["simulation_models_path"])
source_prod_table_path = (
simulation_models_path / "productions" / args_dict["source_prod_table_dir"]
)
target_prod_table_path = simulation_models_path / "productions" / model_version
model_parameters_dir = simulation_models_path / "model_parameters"
_logger.info(
f"Copying production tables from {source_prod_table_path} to {target_prod_table_path}"
)
if Path(target_prod_table_path).exists():
raise FileExistsError(
f"The target production table directory '{target_prod_table_path}' already exists."
)
shutil.copytree(source_prod_table_path, target_prod_table_path)
_apply_changes_to_production_tables(target_prod_table_path, changes, model_version)
for telescope, parameters in changes.items():
for param, param_data in parameters.items():
if param_data.get("value"):
_create_new_parameter_entry(telescope, param, param_data, model_parameters_dir)
def _apply_changes_to_production_tables(target_prod_table_path, changes, model_version):
"""Apply changes to the production tables in the target directory."""
for file_path in Path(target_prod_table_path).rglob("*.json"):
if file_path.name.startswith("configuration"):
continue
data = ascii_handler.collect_data_from_file(file_path)
_apply_changes_to_production_table(data, changes, model_version)
with file_path.open("w", encoding="utf-8") as f:
json.dump(data, f, indent=4, sort_keys=True)
f.write("\n")
def _apply_changes_to_production_table(data, changes, model_version):
"""
Recursively apply changes to the new production tables.
Parameters
----------
data: dict or list
The JSON data to be updated.
changes: dict
The changes to be applied.
model_version: str
The model version to be set in the JSON data.
"""
if isinstance(data, dict):
if "model_version" in data:
data["model_version"] = model_version
_update_parameters(data.get("parameters", {}), changes)
elif isinstance(data, list):
for item in data:
_apply_changes_to_production_table(item, changes, model_version)
def _update_parameters(params, changes):
"""Update parameters in the given dictionary based on changes."""
for telescope, updates in changes.items():
if telescope not in params:
continue
for param, param_data in updates.items():
if param in params[telescope]:
old = params[telescope][param]
new = param_data["version"]
_logger.info(f"Updating '{telescope} - {param}' from {old} to {new}")
params[telescope][param] = new
else:
_logger.info(
f"Adding new parameter '{telescope} - {param}' "
f"with version {param_data['version']}"
)
params[telescope][param] = param_data["version"]
def _create_new_parameter_entry(telescope, param, param_data, model_parameters_dir):
"""
Create new model parameter JSON file by copying the latest version and updating fields.
Parameters
----------
telescope: str
Name of the telescope.
param: str
Name of the parameter.
param_data: dict
Dictionary containing the parameter data including version and value.
model_parameters_dir: str
Path to the model parameters directory.
"""
telescope_dir = Path(model_parameters_dir) / telescope
if not telescope_dir.exists():
raise FileNotFoundError(
f"Directory for telescope '{telescope}' does not exist in '{model_parameters_dir}'."
)
param_dir = telescope_dir / param
if not param_dir.exists():
raise FileNotFoundError(
f"Directory for parameter '{param}' does not exist in '{telescope}'."
)
latest_file = _get_latest_model_parameter_file(param_dir, param)
if not latest_file:
raise FileNotFoundError(
f"No files found for parameter '{param}' in directory '{param_dir}'."
)
json_data = ascii_handler.collect_data_from_file(latest_file)
json_data["parameter_version"] = _update_model_parameter_version(
json_data, param_data, param, telescope
)
json_data["value"] = param_data["value"]
new_file_name = f"{param}-{param_data['version']}.json"
new_file_path = param_dir / new_file_name
with new_file_path.open("w", encoding="utf-8") as f:
json.dump(json_data, f, indent=4)
f.write("\n")
_logger.info(f"Created new model parameter JSON file: {new_file_path}")
def _get_latest_model_parameter_file(directory, parameter):
"""
Get the latest model parameter JSON file for a parameter in the given directory.
Assume files are named in the format 'parameter-version.json'.
Parameters
----------
directory: str
Path to the directory containing parameter JSON files.
parameter: str
Name of the parameter to find.
Returns
-------
str
Path to the latest JSON file for the parameter.
Raises
------
FileNotFoundError
If no files for the parameter are found in the directory.
"""
directory_path = Path(directory)
files = list(directory_path.glob(f"{parameter}-*.json"))
if not files:
raise FileNotFoundError(
f"No JSON files found for parameter '{parameter}' in directory '{directory}'."
)
# Sort files by version number (assumes version is part of the filename)
def safe_parse_version(filename):
version_str = filename.stem.split("-")[-1]
parts = version_str.split(".")
return tuple(part.zfill(8) for part in parts)
files.sort(key=safe_parse_version)
return str(files[-1])
def _update_model_parameter_version(json_data, param_data, param, telescope):
"""Check for major version jump and print a warning if necessary."""
latest_version = int(json_data.get("parameter_version", "0").split(".")[0])
new_version = int(param_data["version"].split(".")[0])
if new_version > latest_version + 1:
_logger.warning(
f"Major version jump from {latest_version} to {new_version} "
f"for parameter '{param}' in telescope '{telescope}'."
)
return param_data["version"]