Source code for generate_simtel_event_data

#!/usr/bin/python3

"""
Reduces and compiles event data from multiple input files into a structured dataset with event info.

Command line arguments
----------------------
prefix (str, required)
    Path prefix for the input files.
output_file (str, required)
    Path to save the output file.
max_files (int, optional, default=100)
    Maximum number of files to process.
print_dataset_information (flag)
    Print information about the datasets in the generated reduced event dataset.

Example
-------
Generate a reduced dataset from input files and save the result.

.. code-block:: console

    simtools-production-extract-mc-event-data \
    simtools-generate-simtel-event-data \
        --prefix path/to/input_files/ \
        --wildcard 'gamma_*dark*.simtel.zst' \
        --output_file output_file.hdf5 \
        --max_files 50 \
        --print_dataset_information
"""

import logging
from pathlib import Path

import simtools.utils.general as gen
from simtools.configuration import configurator
from simtools.io_operations import io_handler
from simtools.simtel.simtel_io_event_reader import SimtelIOEventDataReader
from simtools.simtel.simtel_io_event_writer import SimtelIOEventDataWriter


def _parse(label, description):
    """
    Parse command line arguments.

    Returns
    -------
    dict
        Parsed command-line arguments.
    """
    config = configurator.Configurator(label=label, description=description)

    config.parser.add_argument(
        "--prefix", type=str, required=True, help="Prefix path for input files."
    )
    config.parser.add_argument(
        "--wildcard",
        type=str,
        required=True,
        help="Wildcard for querying the files in the directory (e.g., 'gamma_*dark*.simtel.zst')",
    )
    config.parser.add_argument("--output_file", type=str, required=True, help="Output filename.")
    config.parser.add_argument(
        "--max_files", type=int, default=100, help="Maximum number of files to process."
    )

    config.parser.add_argument(
        "--print_dataset_information",
        type=int,
        help="Print given number of rows of the dataset.",
        default=0,
    )

    return config.initialize(db_config=False)


[docs] def main(): """ Process event data files and store data in reduced dataset. The reduced dataset contains shower information, array information and triggered telescopes. """ label = Path(__file__).stem args_dict, _ = _parse( label=label, description=( "Process files and store reduced dataset with event information, " "array information and triggered telescopes." ), ) _logger = logging.getLogger() _logger.setLevel(gen.get_log_level_from_user(args_dict["log_level"])) _logger.info(f"Loading input files with prefix: {args_dict['prefix']}") input_path = Path(args_dict["prefix"]) files = list(input_path.glob(args_dict["wildcard"])) if not files: _logger.warning("No matching input files found.") return output_path = io_handler.IOHandler().get_output_directory(label) output_filepath = Path(output_path).joinpath(f"{args_dict['output_file']}") output_filepath.parent.mkdir(parents=True, exist_ok=True) generator = SimtelIOEventDataWriter(files, output_filepath, args_dict["max_files"]) generator.process_files() _logger.info(f"reduced dataset saved to: {output_filepath}") if args_dict["print_dataset_information"] > 0: reader = SimtelIOEventDataReader(output_filepath) reader.print_dataset_information(args_dict.get("print_dataset_information"))
if __name__ == "__main__": main()