Source code for production_merge_corsika_limits

#!/usr/bin/python3

r"""
Merge CORSIKA limit tables from multiple grid points and check grid completeness.

The corsika limit tables are first generated by the application
'simtools-production-derive-corsika-limits' and contain derived limits for
energy, radial distance, and viewcone for different observational conditions.
The individual tables are merged into a single table for each grid point by
the application 'merge_tables'. This tool finalizes the merging process and
checks the completeness of the grid against a provided grid definition file.

This tool supports three main use cases:

1. Merge multiple CORSIKA limit tables into a single file and optionally generate
   plots of the derived limits.
2. Merge tables and also check for grid completeness against a provided grid
   definition file. This requires the --grid_definition parameter. Coverage plots
   can also be generated.
3. Check grid completeness of an already merged table file. This requires both
   the --merged_table and --grid_definition parameters.

Command line arguments
----------------------
input_files (str)
    Directory containing corsika_simulation_limits_lookup*.ecsv files to be merged,
    or a list of specific files to merge. Not used if --merged_table is provided.
input_files_list (str)
    Path to a text file containing a list of input files (one file path per line)
    to be merged. Not used if --merged_table is provided.
merged_table (str)
    Path to an already merged table file. Used for checking grid completeness.
grid_definition (str)
    Path to a YAML file defining the expected grid points. Required for grid
    completeness checks and coverage plots.
output_file (str, optional)
    Name of the output file for the merged limits table.
    Default is "merged_corsika_limits.ecsv".
plot_grid_coverage (bool, optional)
    Flag to generate plots showing grid coverage. Requires --grid_definition.
plot_limits (bool, optional)
    Flag to generate plots showing the derived limits.

Examples
--------
1. Merge CORSIKA limit tables from a directory:

   .. code-block:: console

      simtools-production-merge-corsika-limits \\
          --input_files "simtools-output/corsika_limits/" \\
          --output_file merged_limits.ecsv --plot_limits

2. Merge tables and check grid completeness:

   .. code-block:: console

      simtools-production-merge-corsika-limits \\
          --input_files "simtools-output/corsika_limits/" \\
          --grid_definition grid_definition.yaml \\
          --output_file merged_limits.ecsv --plot_grid_coverage

3. Check grid completeness of an existing merged table:

   .. code-block:: console

      simtools-production-merge-corsika-limits \\
          --merged_table merged_limits.ecsv \\
          --grid_definition grid_definition.yaml --plot_grid_coverage

4. Merge tables using a list of files from a text file:

   .. code-block:: console

      simtools-production-merge-corsika-limits \\
          --input_files_list file_list.txt \\
          --output_file merged_limits.ecsv
"""

import logging
from pathlib import Path

import simtools.utils.general as gen
from simtools.configuration import configurator
from simtools.data_model import data_reader
from simtools.production_configuration.merge_corsika_limits import CorsikaMergeLimits

_logger = logging.getLogger(__name__)


def _parse():
    """Parse command line configuration."""
    config = configurator.Configurator(
        description="Merge CORSIKA limit tables and check grid completeness."
    )
    config.parser.add_argument(
        "--input_files",
        type=str,
        default=None,
        nargs="+",
        help=(
            "A list of input files to be merged, or a single directory "
            "containing the files (*.ecsv)."
        ),
    )
    config.parser.add_argument(
        "--input_files_list",
        type=str,
        default=None,
        help=(
            "Path to a text file containing a list of input files (one file path per line) "
            "to be merged."
        ),
    )
    config.parser.add_argument(
        "--merged_table",
        type=str,
        default=None,
        help="Path to an already merged table file.",
    )
    config.parser.add_argument(
        "--grid_definition",
        type=str,
        default=None,
        help="Path to YAML file defining the expected grid points.",
    )
    config.parser.add_argument(
        "--plot_grid_coverage",
        help="Generate plots showing grid coverage.",
        action="store_true",
        default=False,
    )
    config.parser.add_argument(
        "--plot_limits",
        help="Generate plots showing the derived limits.",
        action="store_true",
        default=False,
    )
    return config.initialize(output=True)


[docs] def main(): """Merge CORSIKA limit tables and check grid completeness.""" args_dict, _ = _parse() logger = logging.getLogger() logger.setLevel(gen.get_log_level_from_user(args_dict.get("log_level", "info"))) merger = CorsikaMergeLimits() grid_definition = ( gen.collect_data_from_file(args_dict["grid_definition"]) if args_dict.get("grid_definition") else None ) if args_dict.get("merged_table"): # Case 3: Check coverage on an existing merged table merged_table_path = Path(args_dict["merged_table"]).expanduser() merged_table = data_reader.read_table_from_file(merged_table_path) input_files = [merged_table_path] elif args_dict.get("input_files") or args_dict.get("input_files_list"): # Case 1 & 2: Merge files input_files = [] # Process input_files argument if args_dict.get("input_files"): raw_paths = args_dict.get("input_files") if len(raw_paths) == 1 and Path(raw_paths[0]).expanduser().is_dir(): input_dir = Path(raw_paths[0]).expanduser() input_files.extend(input_dir.glob("*.ecsv")) else: input_files.extend(Path(f).expanduser() for f in raw_paths) # Process input_files_list argument if args_dict.get("input_files_list"): files_from_list = merger.read_file_list(args_dict["input_files_list"]) input_files.extend(files_from_list) if not input_files: raise FileNotFoundError( "No input files found. Check --input_files or --input_files_list arguments." ) merged_table = merger.merge_tables(input_files) else: raise ValueError( "Either --input_files, --input_files_list, or --merged_table must be provided." ) is_complete, grid_completeness = merger.check_grid_completeness(merged_table, grid_definition) if args_dict.get("plot_grid_coverage"): merger.plot_grid_coverage(merged_table, grid_definition) if args_dict.get("plot_limits"): merger.plot_limits(merged_table) if not args_dict.get("merged_table"): # Write output file only when merging output_file = merger.output_dir / args_dict["output_file"] merger.write_merged_table( merged_table, output_file, input_files, { "is_complete": is_complete, "expected": grid_completeness.get("expected", 0), "found": grid_completeness.get("found", 0), "missing": grid_completeness.get("missing", []), }, )
if __name__ == "__main__": main()