Source code for production_configuration.derive_production_statistics

"""
Calculate the event production statistics based on metrics.

Module for calculating the production event statistics based on statistical error metrics.
Contains the `ProductionStatisticsDerivator` class, which derives the number of events for
both the entire dataset and specific grid points. Event statistic is calculated using error
metrics and the evaluator's results.
"""

import astropy.units as u
import numpy as np

__all__ = ["ProductionStatisticsDerivator"]



[docs]
class ProductionStatisticsDerivator:
    """
    Derives the production statistics based on statistical error metrics.

    Supports deriving statistics for both the entire dataset and
    specific grid points like energy values.
    """

    def __init__(self, evaluator, metrics: dict):
        """
        Initialize the ProductionStatisticsDerivator with the evaluator and metrics.

        Parameters
        ----------
        evaluator : StatisticalUncertaintyEvaluator
            The evaluator responsible for calculating metrics and handling event data.
        metrics : dict
            Dictionary containing metrics, including target error for effective area.
        """
        self.evaluator = evaluator
        self.metrics = metrics

    def _compute_scaling_factor(self) -> np.ndarray:
        """
        Compute bin-wise scaling factors based on error metrics.

        Takes into account the energy range specified in the metrics and
        calculates a separate scaling factor for each energy bin.

        Returns
        -------
        np.ndarray
            Array of scaling factors for each energy bin.
        """
        uncertainty_effective_area = self.evaluator.metric_results.get("uncertainty_effective_area")
        relative_uncertainties = uncertainty_effective_area.get("relative_uncertainties")
        energy_range = (
            self.metrics.get("uncertainty_effective_area").get("energy_range").get("value")
        )
        energy_unit = u.Unit(
            self.metrics.get("uncertainty_effective_area").get("energy_range").get("unit")
        )

        energy_range_converted = np.array(energy_range) * energy_unit

        bin_edges = self.evaluator.energy_bin_edges
        bin_centers = (bin_edges[:-1] + bin_edges[1:]) / 2

        # Mask for bins within the metric specified energy range
        mask = (bin_centers >= energy_range_converted[0]) & (
            bin_centers <= energy_range_converted[1]
        )

        scaling_factors = np.zeros_like(relative_uncertainties)

        target_uncertainty = (
            self.metrics.get("uncertainty_effective_area").get("target_uncertainty").get("value")
        )

        # Calculate scaling factor only for bins within the energy range
        # For bins with zero events/uncertainty, use a scaling factor of 0
        valid_bins = mask & (relative_uncertainties > 0)
        scaling_factors[valid_bins] = (relative_uncertainties[valid_bins] / target_uncertainty) ** 2

        return scaling_factors


[docs]
    def derive_statistics(self, return_sum: bool = True) -> u.Quantity:
        """
        Derive the production statistics based on statistical error metrics.

        Parameters
        ----------
        return_sum : bool, optional
            If True, returns the sum of production statistics for the entire set of MC events.
            If False, returns the production statistics for each grid point along the energy axis.
            Default is True.

        Returns
        -------
        u.Quantity
            If 'return_sum' is True, returns the total
            derived production statistics as a u.Quantity.
            If 'return_sum' is False, returns an array of production statistics along the energy
            axis as a u.Quantity.
        """
        scaling_factors = self._compute_scaling_factor()
        base_events = self._number_of_simulated_events()
        # currently we use the maximum of the scaling factors to scale the events. This is a soft
        # requirement if we want to keep the power law shape of the production statistics.
        scaled_events = base_events * np.max(scaling_factors)

        if return_sum:
            return np.sum(scaled_events)
        return scaled_events


    def _number_of_simulated_events(self) -> u.Quantity:
        """
        Fetch the number of simulated events from the evaluator's data.

        Returns
        -------
        u.Quantity
            The number of simulated events.
        """
        return self.evaluator.data.get("simulated_event_histogram")


[docs]
    def calculate_production_statistics_at_grid_point(
        self,
        grid_point: tuple,
    ) -> u.Quantity:
        """
        Derive the production statistics for a specific energy grid point.

        Parameters
        ----------
        grid_point : tuple
            The grid point specifying energy, azimuth, zenith, NSB, and offset.

        Returns
        -------
        float
            The derived production statistics at the specified grid point (energy).
        """
        energy = grid_point[0]
        bin_edges = self.evaluator.create_bin_edges()
        bin_idx = np.digitize(energy, bin_edges) - 1

        # Get scaling factors for all bins
        scaling_factors = self._compute_scaling_factor()

        simulated_event_histogram = self.evaluator.data.get("simulated_event_histogram", [])

        if bin_idx < 0 or bin_idx >= len(simulated_event_histogram):
            raise ValueError(f"Energy {energy} is outside the range of the simulated events data.")

        base_events = self._number_of_simulated_events()
        base_event_at_energy = base_events[bin_idx]
        scaling_factor_at_energy = scaling_factors[bin_idx]

        return base_event_at_energy * scaling_factor_at_energy