Source code for simtel.simtel_io_event_histograms

"""Histograms for shower and triggered events."""

import copy
import logging

import astropy.units as u
import numpy as np

from simtools.simtel.simtel_io_event_reader import SimtelIOEventDataReader


[docs] class SimtelIOEventHistograms: """ Generate and fill histograms for shower and triggered events. Event data is read from the reduced MC event data file. Calculate cumulative and relative (efficiency) distributions. Parameters ---------- event_data_file : str Path to the event-data file. array_name : str, optional Name of the telescope array configuration (default is None). telescope_list : list, optional List of telescope IDs to filter the events (default is None). """ def __init__(self, event_data_file, array_name=None, telescope_list=None): """Initialize.""" self._logger = logging.getLogger(__name__) self.event_data_file = event_data_file self.array_name = array_name self.histograms = {} self.file_info = {} self.reader = SimtelIOEventDataReader(event_data_file, telescope_list=telescope_list)
[docs] def fill(self): """ Fill histograms with event data. Involves looping over all event data, and therefore is the slowest part of the histogram module. Adds the histograms to the histogram dictionary. Assume that all event data files are generated with similar configurations (self.file_info contains the file info of the last file). """ for data_set in self.reader.data_sets: self._logger.info(f"Reading event data from {self.event_data_file} for {data_set}") _file_info_table, shower_data, event_data, triggered_data = self.reader.read_event_data( self.event_data_file, table_name_map=data_set ) _file_info_table = self.reader.get_reduced_simulation_file_info(_file_info_table) self.file_info = { "energy_min": _file_info_table["energy_min"].to("TeV"), "core_scatter_max": _file_info_table["core_scatter_max"].to("m"), "viewcone_max": _file_info_table["viewcone_max"].to("deg"), "solid_angle": _file_info_table["solid_angle"].to("sr"), "scatter_area": _file_info_table["scatter_area"].to("cm2"), } self.histograms = self._define_histograms(event_data, triggered_data, shower_data) for name, data in self.histograms.items(): self._logger.debug(f"Filling histogram {name}") self._fill_histogram_and_bin_edges(data) self.print_summary() self.calculate_efficiency_data() self.calculate_cumulative_data()
def _define_histograms(self, event_data, triggered_data, shower_data): """ Define histograms including event data, binning, naming, and labels. All histograms are defined for simulated and triggered events (note the subtlety of triggered events being read from event_data and triggered_data). Parameters ---------- event_data : EventData The event data to use for filling the histograms. triggered_data : TriggeredData The triggered data to use for filling the histograms. shower_data : ShowerData The shower data to use for filling the histograms. Returns ------- dict Dictionary with histogram definitions. """ xy_bins = np.linspace( -1.0 * self.core_distance_bins.max(), self.core_distance_bins.max(), len(self.core_distance_bins), ) hists = {} energy_axis_title = "Energy (TeV)" event_count_axis_title = "Event Count" definitions = { "energy": { "event_data_column": "simulated_energy", "event_data": event_data, "bin_edges": self.energy_bins, "axis_titles": [energy_axis_title, event_count_axis_title], "plot_scales": {"x": "log", "y": "log"}, }, "core_distance": { "event_data_column": "core_distance_shower", "event_data": event_data, "bin_edges": self.core_distance_bins, "axis_titles": ["Core Distance (m)", event_count_axis_title], }, "angular_distance": { "event_data_column": "angular_distance", "event_data": triggered_data, "bin_edges": self.view_cone_bins, "axis_titles": ["Angular Distance (deg)", event_count_axis_title], }, "x_core_shower_vs_y_core_shower": { "event_data_column": ("x_core_shower", "y_core_shower"), "event_data": (event_data, event_data), "bin_edges": (xy_bins, xy_bins), "is_1d": False, "axis_titles": ["Core X (m)", "Core Y (m)", event_count_axis_title], }, "core_vs_energy": { "event_data_column": ("core_distance_shower", "simulated_energy"), "event_data": (event_data, event_data), "bin_edges": (self.core_distance_bins, self.energy_bins), "is_1d": False, "axis_titles": ["Core Distance (m)", energy_axis_title, event_count_axis_title], "plot_scales": {"y": "log"}, }, "angular_distance_vs_energy": { "event_data_column": ("angular_distance", "simulated_energy"), "event_data": (triggered_data, event_data), "bin_edges": (self.view_cone_bins, self.energy_bins), "is_1d": False, "axis_titles": [ "Angular Distance (deg)", energy_axis_title, event_count_axis_title, ], "plot_scales": {"y": "log"}, }, } hists = { name: self.get_histogram_definition(**cfg) | {"suffix": "", "title": "Triggered Events"} for name, cfg in definitions.items() } hists_mc = {} for key, hist in hists.items(): key_mc = f"{key}_mc" hists_mc[key_mc] = copy.copy(hist) hists_mc[key_mc]["suffix"] = "_mc" hists_mc[key_mc]["title"] = "Simulated Events" hists_mc[key_mc]["event_data"] = ( shower_data if hist["1d"] else (shower_data, shower_data) ) hists.update(hists_mc) return hists
[docs] def get_histogram_definition( self, event_data_column=None, event_data=None, histogram=None, bin_edges=None, title=None, axis_titles=None, suffix=None, is_1d=True, plot_scales=None, ): """Return a single histogram definition.""" return { "histogram": histogram, "event_data_column": event_data_column, "event_data": event_data, "1d": is_1d, "bin_edges": bin_edges, "title": title, "axis_titles": axis_titles, "suffix": suffix, "plot_scales": plot_scales, }
def _fill_histogram_and_bin_edges(self, data): """ Fill histogram and bin edges into the histogram dictionary. Adds to existing histogram if present, otherwise initializes it. """ if data["1d"]: hist, _ = np.histogram( getattr(data["event_data"], data["event_data_column"]), bins=data["bin_edges"], ) else: hist, _, _ = np.histogram2d( getattr(data["event_data"][0], data["event_data_column"][0]), getattr(data["event_data"][1], data["event_data_column"][1]), bins=[data["bin_edges"][0], data["bin_edges"][1]], ) data["histogram"] = hist if data["histogram"] is None else data["histogram"] + hist
[docs] def calculate_efficiency_data(self): """ Calculate efficiency histograms (triggered divided by simulated). Assumes that for each histogram with simulated events, there is a corresponding histogram with triggered events. Returns ------- dict Dictionary containing the efficiency histograms. """ def calculate_efficiency(trig_hist, mc_hist): with np.errstate(divide="ignore", invalid="ignore"): return np.divide( trig_hist, mc_hist, out=np.zeros_like(trig_hist, dtype=float), where=mc_hist > 0, ) eff_histograms = {} for name, mc_hist in self.histograms.items(): if not name.endswith("_mc"): continue base_name = name[:-3] trig_hist = self.histograms.get(base_name) if trig_hist is None: continue if mc_hist["histogram"].shape != trig_hist["histogram"].shape: self._logger.warning( f"Shape mismatch for {base_name} and {name}, skipping efficiency calculation." ) continue eff = copy.copy(mc_hist) eff.update( { "histogram": calculate_efficiency(trig_hist["histogram"], mc_hist["histogram"]), "suffix": "_eff", "title": "Efficiency", } ) eff["axis_titles"] = copy.copy(mc_hist["axis_titles"]) eff["axis_titles"][-1] = "Efficiency" eff_histograms[f"{base_name}_eff"] = eff self.histograms.update(eff_histograms) return eff_histograms
@property def energy_bins(self): """Return bins for the energy histogram.""" if "energy_bin_edges" in self.histograms: return self.histograms["energy_bin_edges"] return np.logspace( np.log10(self.file_info.get("energy_min", 1.0e-3 * u.TeV).to("TeV").value), np.log10(self.file_info.get("energy_max", 1.0e3 * u.TeV).to("TeV").value), 100, ) @property def core_distance_bins(self): """Return bins for the core distance histogram.""" if "core_distance_bin_edges" in self.histograms: return self.histograms["core_distance_bin_edges"] return np.linspace( self.file_info.get("core_scatter_min", 0.0 * u.m).to("m").value, self.file_info.get("core_scatter_max", 1.0e5 * u.m).to("m").value, 100, ) @property def view_cone_bins(self): """Return bins for the viewcone histogram.""" if "viewcone_bin_edges" in self.histograms: return self.histograms["viewcone_bin_edges"] return np.linspace( self.file_info.get("viewcone_min", 0.0 * u.deg).to("deg").value, self.file_info.get("viewcone_max", 20.0 * u.deg).to("deg").value, 100, )
[docs] def calculate_cumulative_data(self): """ Calculate cumulative distributions for triggered histograms. Returns ------- dict Dictionary containing the cumulative histograms. """ cumulative_data = {} suffix = "_cumulative" def add_cumulative(name, hist, **kwargs): new = copy.copy(hist) new["histogram"] = self._calculate_cumulative_histogram(hist["histogram"], **kwargs) new["axis_titles"] = copy.copy(hist["axis_titles"]) new.update( { "suffix": suffix, "title": "Cumulative triggered events", } ) new["axis_titles"][-1] = "Fraction of Events" cumulative_data[f"{name}{suffix}"] = new # 2D histograms vs energy for name, hist in self.histograms.items(): if name.endswith("_vs_energy") and not name.endswith("_mc"): add_cumulative(name, hist, axis=0, normalize=True) # 1D histograms for name in ["energy", "core_distance", "angular_distance"]: if (hist := self.histograms.get(name)) is not None: add_cumulative(name, hist, reverse=name == "energy") self.histograms.update(cumulative_data) return cumulative_data
def _calculate_cumulative_histogram(self, hist, reverse=False, axis=None, normalize=False): """ Calculate cumulative distribution of a histogram. Works with both 1D and 2D histograms. Parameters ---------- hist : np.ndarray Histogram (1D or 2D) reverse : bool, optional If True, sum from high to low values axis : int, optional For 2D histograms, axis along which to compute cumulative sum None means default behavior: for 1D just cumsum, for 2D along rows normalize : bool, optional If True, normalize by the total sum for each slice along the specified axis For 1D histograms, normalizes by the total sum Returns ------- np.ndarray Histogram with cumulative counts, optionally normalized """ if hist is None: return None if hist.ndim == 1: result = self._calculate_cumulative_1d(hist, reverse) if normalize and np.sum(hist) > 0: result = result / np.sum(hist) return result axis = axis if axis is not None else 1 result = self._apply_cumsum_along_axis(hist.copy(), axis, reverse) if normalize: # Ensure floating dtype to allow in-place normalization without casting errors if not np.issubdtype(result.dtype, np.floating): result = result.astype(float) self._normalize_along_axis(result, hist, axis) return result def _normalize_along_axis(self, result, hist, axis): """ Normalize cumulative histogram along the specified axis. Parameters ---------- result : np.ndarray Cumulative histogram to normalize (modified in-place) hist : np.ndarray Original histogram (for calculating totals) axis : int Axis along which normalization should be applied """ normalized = np.zeros_like(result, dtype=float) if axis == 0: for i in range(result.shape[1]): col_total = np.sum(hist[:, i]) if col_total > 0: normalized[:, i] = result[:, i] / col_total else: # axis == 1 for i in range(result.shape[0]): row_total = np.sum(hist[i, :]) if row_total > 0: normalized[i, :] = result[i, :] / row_total np.copyto(result, normalized) def _calculate_cumulative_1d(self, hist, reverse): """Calculate cumulative distribution for 1D histogram.""" if reverse: return np.cumsum(hist[::-1])[::-1] return np.cumsum(hist) def _calculate_cumulative_2d(self, hist, reverse, axis=None): """Calculate cumulative distribution for 2D histogram.""" axis = axis if axis is not None else 1 return self._apply_cumsum_along_axis(hist, axis, reverse) def _apply_cumsum_along_axis(self, hist, axis, reverse): """Apply cumulative sum along the specified axis of a 2D histogram.""" def cumsum_func(arr): return np.cumsum(arr[::-1])[::-1] if reverse else np.cumsum(arr) return np.apply_along_axis(cumsum_func, axis, hist)
[docs] @staticmethod def rebin_2d_histogram(hist, x_bins, y_bins, rebin_factor=2): """ Rebin a 2D histogram by merging neighboring bins along the energy dimension (y-axis) only. Parameters ---------- hist : np.ndarray Original 2D histogram data x_bins : np.ndarray Original x-axis bin edges (preserved) y_bins : np.ndarray Original y-axis (energy) bin edges rebin_factor : int, optional Factor by which to reduce the number of bins in the energy dimension Default is 2 (merge every 2 bins) Returns ------- tuple (re-binned_hist, x_bins, re-binned_y_bins) """ if rebin_factor <= 1: return hist, x_bins, y_bins x_size = hist.shape[0] new_y_size = hist.shape[1] // rebin_factor new_hist = np.zeros((x_size, new_y_size), dtype=float) for i in range(x_size): for j in range(new_y_size): y_start = j * rebin_factor y_end = (j + 1) * rebin_factor new_hist[i, j] = np.sum(hist[i, y_start:y_end]) new_y_bins = y_bins[::rebin_factor] return new_hist, x_bins, new_y_bins
[docs] def print_summary(self): """ Print a summary of the histogram statistics. Total number of events is retrieved from the 'energy' histograms. """ total_simulated = np.sum(self.histograms.get("energy_mc", {}).get("histogram", [])) total_triggered = np.sum(self.histograms.get("energy", {}).get("histogram", [])) self._logger.info(f"Total simulated events: {total_simulated}") self._logger.info(f"Total triggered events: {total_triggered}")