Source code for simtel.simtel_io_event_histograms

"""Histograms for shower and triggered events."""

import copy
import logging

import astropy.units as u
import numpy as np

from simtools.simtel.simtel_io_event_reader import SimtelIOEventDataReader



[docs]
class SimtelIOEventHistograms:
    """
    Generate and fill histograms for shower and triggered events.

    Event data is read from the reduced MC event data file.
    Calculate cumulative and relative (efficiency) distributions.

    Parameters
    ----------
    event_data_file : str
        Path to the event-data file.
    array_name : str, optional
        Name of the telescope array configuration (default is None).
    telescope_list : list, optional
        List of telescope IDs to filter the events (default is None).
    """

    def __init__(self, event_data_file, array_name=None, telescope_list=None):
        """Initialize."""
        self._logger = logging.getLogger(__name__)
        self.event_data_file = event_data_file
        self.array_name = array_name

        self.histograms = {}
        self.file_info = {}

        self.reader = SimtelIOEventDataReader(event_data_file, telescope_list=telescope_list)


[docs]
    def fill(self):
        """
        Fill histograms with event data.

        Involves looping over all event data, and therefore is the slowest part of the
        histogram module. Adds the histograms to the histogram dictionary.

        Assume that all event data files are generated with similar configurations
        (self.file_info contains the file info of the last file).
        """
        for data_set in self.reader.data_sets:
            self._logger.info(f"Reading event data from {self.event_data_file} for {data_set}")
            _file_info_table, shower_data, event_data, triggered_data = self.reader.read_event_data(
                self.event_data_file, table_name_map=data_set
            )
            _file_info_table = self.reader.get_reduced_simulation_file_info(_file_info_table)
            self.file_info = {
                "energy_min": _file_info_table["energy_min"].to("TeV"),
                "core_scatter_max": _file_info_table["core_scatter_max"].to("m"),
                "viewcone_max": _file_info_table["viewcone_max"].to("deg"),
                "solid_angle": _file_info_table["solid_angle"].to("sr"),
                "scatter_area": _file_info_table["scatter_area"].to("cm2"),
            }

            self.histograms = self._define_histograms(event_data, triggered_data, shower_data)

            for name, data in self.histograms.items():
                self._logger.debug(f"Filling histogram {name}")
                self._fill_histogram_and_bin_edges(data)

        self.print_summary()

        self.calculate_efficiency_data()
        self.calculate_cumulative_data()


    def _define_histograms(self, event_data, triggered_data, shower_data):
        """
        Define histograms including event data, binning, naming, and labels.

        All histograms are defined for simulated and triggered events (note
        the subtlety of triggered events being read from event_data and triggered_data).

        Parameters
        ----------
        event_data : EventData
            The event data to use for filling the histograms.
        triggered_data : TriggeredData
            The triggered data to use for filling the histograms.
        shower_data : ShowerData
            The shower data to use for filling the histograms.

        Returns
        -------
        dict
            Dictionary with histogram definitions.
        """
        xy_bins = np.linspace(
            -1.0 * self.core_distance_bins.max(),
            self.core_distance_bins.max(),
            len(self.core_distance_bins),
        )
        hists = {}

        energy_axis_title = "Energy (TeV)"
        event_count_axis_title = "Event Count"

        definitions = {
            "energy": {
                "event_data_column": "simulated_energy",
                "event_data": event_data,
                "bin_edges": self.energy_bins,
                "axis_titles": [energy_axis_title, event_count_axis_title],
                "plot_scales": {"x": "log", "y": "log"},
            },
            "core_distance": {
                "event_data_column": "core_distance_shower",
                "event_data": event_data,
                "bin_edges": self.core_distance_bins,
                "axis_titles": ["Core Distance (m)", event_count_axis_title],
            },
            "angular_distance": {
                "event_data_column": "angular_distance",
                "event_data": triggered_data,
                "bin_edges": self.view_cone_bins,
                "axis_titles": ["Angular Distance (deg)", event_count_axis_title],
            },
            "x_core_shower_vs_y_core_shower": {
                "event_data_column": ("x_core_shower", "y_core_shower"),
                "event_data": (event_data, event_data),
                "bin_edges": (xy_bins, xy_bins),
                "is_1d": False,
                "axis_titles": ["Core X (m)", "Core Y (m)", event_count_axis_title],
            },
            "core_vs_energy": {
                "event_data_column": ("core_distance_shower", "simulated_energy"),
                "event_data": (event_data, event_data),
                "bin_edges": (self.core_distance_bins, self.energy_bins),
                "is_1d": False,
                "axis_titles": ["Core Distance (m)", energy_axis_title, event_count_axis_title],
                "plot_scales": {"y": "log"},
            },
            "angular_distance_vs_energy": {
                "event_data_column": ("angular_distance", "simulated_energy"),
                "event_data": (triggered_data, event_data),
                "bin_edges": (self.view_cone_bins, self.energy_bins),
                "is_1d": False,
                "axis_titles": [
                    "Angular Distance (deg)",
                    energy_axis_title,
                    event_count_axis_title,
                ],
                "plot_scales": {"y": "log"},
            },
        }

        hists = {
            name: self.get_histogram_definition(**cfg) | {"suffix": "", "title": "Triggered Events"}
            for name, cfg in definitions.items()
        }

        hists_mc = {}
        for key, hist in hists.items():
            key_mc = f"{key}_mc"
            hists_mc[key_mc] = copy.copy(hist)
            hists_mc[key_mc]["suffix"] = "_mc"
            hists_mc[key_mc]["title"] = "Simulated Events"
            hists_mc[key_mc]["event_data"] = (
                shower_data if hist["1d"] else (shower_data, shower_data)
            )

        hists.update(hists_mc)
        return hists


[docs]
    def get_histogram_definition(
        self,
        event_data_column=None,
        event_data=None,
        histogram=None,
        bin_edges=None,
        title=None,
        axis_titles=None,
        suffix=None,
        is_1d=True,
        plot_scales=None,
    ):
        """Return a single histogram definition."""
        return {
            "histogram": histogram,
            "event_data_column": event_data_column,
            "event_data": event_data,
            "1d": is_1d,
            "bin_edges": bin_edges,
            "title": title,
            "axis_titles": axis_titles,
            "suffix": suffix,
            "plot_scales": plot_scales,
        }


    def _fill_histogram_and_bin_edges(self, data):
        """
        Fill histogram and bin edges into the histogram dictionary.

        Adds to existing histogram if present, otherwise initializes it.
        """
        if data["1d"]:
            hist, _ = np.histogram(
                getattr(data["event_data"], data["event_data_column"]),
                bins=data["bin_edges"],
            )
        else:
            hist, _, _ = np.histogram2d(
                getattr(data["event_data"][0], data["event_data_column"][0]),
                getattr(data["event_data"][1], data["event_data_column"][1]),
                bins=[data["bin_edges"][0], data["bin_edges"][1]],
            )

        data["histogram"] = hist if data["histogram"] is None else data["histogram"] + hist


[docs]
    def calculate_efficiency_data(self):
        """
        Calculate efficiency histograms (triggered divided by simulated).

        Assumes that for each histogram with simulated events, there is a
        corresponding histogram with triggered events.

        Returns
        -------
        dict
            Dictionary containing the efficiency histograms.
        """

        def calculate_efficiency(trig_hist, mc_hist):
            with np.errstate(divide="ignore", invalid="ignore"):
                return np.divide(
                    trig_hist,
                    mc_hist,
                    out=np.zeros_like(trig_hist, dtype=float),
                    where=mc_hist > 0,
                )

        eff_histograms = {}
        for name, mc_hist in self.histograms.items():
            if not name.endswith("_mc"):
                continue

            base_name = name[:-3]
            trig_hist = self.histograms.get(base_name)
            if trig_hist is None:
                continue

            if mc_hist["histogram"].shape != trig_hist["histogram"].shape:
                self._logger.warning(
                    f"Shape mismatch for {base_name} and {name}, skipping efficiency calculation."
                )
                continue

            eff = copy.copy(mc_hist)
            eff.update(
                {
                    "histogram": calculate_efficiency(trig_hist["histogram"], mc_hist["histogram"]),
                    "suffix": "_eff",
                    "title": "Efficiency",
                }
            )
            eff["axis_titles"] = copy.copy(mc_hist["axis_titles"])
            eff["axis_titles"][-1] = "Efficiency"
            eff_histograms[f"{base_name}_eff"] = eff

        self.histograms.update(eff_histograms)
        return eff_histograms


    @property
    def energy_bins(self):
        """Return bins for the energy histogram."""
        if "energy_bin_edges" in self.histograms:
            return self.histograms["energy_bin_edges"]
        return np.logspace(
            np.log10(self.file_info.get("energy_min", 1.0e-3 * u.TeV).to("TeV").value),
            np.log10(self.file_info.get("energy_max", 1.0e3 * u.TeV).to("TeV").value),
            100,
        )

    @property
    def core_distance_bins(self):
        """Return bins for the core distance histogram."""
        if "core_distance_bin_edges" in self.histograms:
            return self.histograms["core_distance_bin_edges"]
        return np.linspace(
            self.file_info.get("core_scatter_min", 0.0 * u.m).to("m").value,
            self.file_info.get("core_scatter_max", 1.0e5 * u.m).to("m").value,
            100,
        )

    @property
    def view_cone_bins(self):
        """Return bins for the viewcone histogram."""
        if "viewcone_bin_edges" in self.histograms:
            return self.histograms["viewcone_bin_edges"]
        return np.linspace(
            self.file_info.get("viewcone_min", 0.0 * u.deg).to("deg").value,
            self.file_info.get("viewcone_max", 20.0 * u.deg).to("deg").value,
            100,
        )


[docs]
    def calculate_cumulative_data(self):
        """
        Calculate cumulative distributions for triggered histograms.

        Returns
        -------
        dict
            Dictionary containing the cumulative histograms.
        """
        cumulative_data = {}
        suffix = "_cumulative"

        def add_cumulative(name, hist, **kwargs):
            new = copy.copy(hist)
            new["histogram"] = self._calculate_cumulative_histogram(hist["histogram"], **kwargs)
            new["axis_titles"] = copy.copy(hist["axis_titles"])
            new.update(
                {
                    "suffix": suffix,
                    "title": "Cumulative triggered events",
                }
            )
            new["axis_titles"][-1] = "Fraction of Events"
            cumulative_data[f"{name}{suffix}"] = new

        # 2D histograms vs energy
        for name, hist in self.histograms.items():
            if name.endswith("_vs_energy") and not name.endswith("_mc"):
                add_cumulative(name, hist, axis=0, normalize=True)

        # 1D histograms
        for name in ["energy", "core_distance", "angular_distance"]:
            if (hist := self.histograms.get(name)) is not None:
                add_cumulative(name, hist, reverse=name == "energy")

        self.histograms.update(cumulative_data)
        return cumulative_data


    def _calculate_cumulative_histogram(self, hist, reverse=False, axis=None, normalize=False):
        """
        Calculate cumulative distribution of a histogram.

        Works with both 1D and 2D histograms.

        Parameters
        ----------
        hist : np.ndarray
            Histogram (1D or 2D)
        reverse : bool, optional
            If True, sum from high to low values
        axis : int, optional
            For 2D histograms, axis along which to compute cumulative sum
            None means default behavior: for 1D just cumsum, for 2D along rows
        normalize : bool, optional
            If True, normalize by the total sum for each slice along the specified axis
            For 1D histograms, normalizes by the total sum

        Returns
        -------
        np.ndarray
            Histogram with cumulative counts, optionally normalized
        """
        if hist is None:
            return None

        if hist.ndim == 1:
            result = self._calculate_cumulative_1d(hist, reverse)
            if normalize and np.sum(hist) > 0:
                result = result / np.sum(hist)
            return result

        axis = axis if axis is not None else 1
        result = self._apply_cumsum_along_axis(hist.copy(), axis, reverse)

        if normalize:
            # Ensure floating dtype to allow in-place normalization without casting errors
            if not np.issubdtype(result.dtype, np.floating):
                result = result.astype(float)
            self._normalize_along_axis(result, hist, axis)

        return result

    def _normalize_along_axis(self, result, hist, axis):
        """
        Normalize cumulative histogram along the specified axis.

        Parameters
        ----------
        result : np.ndarray
            Cumulative histogram to normalize (modified in-place)
        hist : np.ndarray
            Original histogram (for calculating totals)
        axis : int
            Axis along which normalization should be applied
        """
        normalized = np.zeros_like(result, dtype=float)

        if axis == 0:
            for i in range(result.shape[1]):
                col_total = np.sum(hist[:, i])
                if col_total > 0:
                    normalized[:, i] = result[:, i] / col_total
        else:  # axis == 1
            for i in range(result.shape[0]):
                row_total = np.sum(hist[i, :])
                if row_total > 0:
                    normalized[i, :] = result[i, :] / row_total

        np.copyto(result, normalized)

    def _calculate_cumulative_1d(self, hist, reverse):
        """Calculate cumulative distribution for 1D histogram."""
        if reverse:
            return np.cumsum(hist[::-1])[::-1]
        return np.cumsum(hist)

    def _calculate_cumulative_2d(self, hist, reverse, axis=None):
        """Calculate cumulative distribution for 2D histogram."""
        axis = axis if axis is not None else 1
        return self._apply_cumsum_along_axis(hist, axis, reverse)

    def _apply_cumsum_along_axis(self, hist, axis, reverse):
        """Apply cumulative sum along the specified axis of a 2D histogram."""

        def cumsum_func(arr):
            return np.cumsum(arr[::-1])[::-1] if reverse else np.cumsum(arr)

        return np.apply_along_axis(cumsum_func, axis, hist)


[docs]
    @staticmethod
    def rebin_2d_histogram(hist, x_bins, y_bins, rebin_factor=2):
        """
        Rebin a 2D histogram by merging neighboring bins along the energy dimension (y-axis) only.

        Parameters
        ----------
        hist : np.ndarray
            Original 2D histogram data
        x_bins : np.ndarray
            Original x-axis bin edges (preserved)
        y_bins : np.ndarray
            Original y-axis (energy) bin edges
        rebin_factor : int, optional
            Factor by which to reduce the number of bins in the energy dimension
            Default is 2 (merge every 2 bins)

        Returns
        -------
        tuple
            (re-binned_hist, x_bins, re-binned_y_bins)
        """
        if rebin_factor <= 1:
            return hist, x_bins, y_bins

        x_size = hist.shape[0]
        new_y_size = hist.shape[1] // rebin_factor

        new_hist = np.zeros((x_size, new_y_size), dtype=float)

        for i in range(x_size):
            for j in range(new_y_size):
                y_start = j * rebin_factor
                y_end = (j + 1) * rebin_factor
                new_hist[i, j] = np.sum(hist[i, y_start:y_end])

        new_y_bins = y_bins[::rebin_factor]

        return new_hist, x_bins, new_y_bins



[docs]
    def print_summary(self):
        """
        Print a summary of the histogram statistics.

        Total number of events is retrieved from the 'energy' histograms.
        """
        total_simulated = np.sum(self.histograms.get("energy_mc", {}).get("histogram", []))
        total_triggered = np.sum(self.histograms.get("energy", {}).get("histogram", []))

        self._logger.info(f"Total simulated events: {total_simulated}")
        self._logger.info(f"Total triggered events: {total_triggered}")