Source code for infomeasure.estimators.entropy.bonachela

"""Module for the Bonachela entropy estimator."""

from numpy import log
from numpy import arange, sum as np_sum

from infomeasure.estimators.base import DiscreteHEstimator
from infomeasure.utils.exceptions import TheoreticalInconsistencyError


[docs] class BonachelaEntropyEstimator(DiscreteHEstimator): r"""Bonachela (Bonachela-Hinrichsen-Muñoz) entropy estimator for discrete data. The Bonachela estimator computes the Shannon entropy using the formula from :cite:p:`bonachelaEntropyEstimatesSmall2008`: .. math:: \hat{H}_{B} = \frac{1}{N+2} \sum_{i=1}^{K} \left( (n_i + 1) \sum_{j=n_i + 2}^{N+2} \frac{1}{j} \right) where :math:`n_i` are the counts for each unique value, :math:`K` is the number of unique values, and :math:`N` is the total number of observations. This estimator is specially designed to provide a compromise between low bias and small statistical errors for short data series, particularly when the data sets are small and the probabilities are not close to zero. Attributes ---------- *data : array-like The data used to estimate the entropy. """ def _simple_entropy(self): """Calculate the Bonachela entropy of the data. Returns ------- float The calculated Bonachela entropy. """ # Get counts and total observations counts = self.data[0].counts N = self.data[0].N # Vectorized computation # For each count ni = count + 1, we need sum(1/j for j in range(ni + 1, N + 3)) ni_values = counts + 1 # Shape: (K,) # Create array of all possible j values from 2 to N+2 j_values = arange(2, N + 3) # Shape: (N+1,) # Create a mask matrix where mask[i, j] is True if j_values[j] > ni_values[i] # This uses broadcasting: ni_values[:, None] has shape (K, 1), j_values has shape (N+1,) mask = j_values[None, :] > ni_values[:, None] # Shape: (K, N+1) # Create reciprocal array: 1/j for each j reciprocals = 1.0 / j_values # Shape: (N+1,) # Apply mask and sum along j dimension to get inner sums for each count # mask * reciprocals[None, :] broadcasts reciprocals to shape (K, N+1) inner_sums = np_sum(mask * reciprocals[None, :], axis=1) # Shape: (K,) # Calculate contributions: ni * inner_sum for each count contributions = ni_values * inner_sums # Shape: (K,) # Sum all contributions acc = np_sum(contributions) # Calculate final entropy with normalization factor ent = acc / (N + 2) # Convert to the desired base if needed if self.base != "e": ent /= log(self.base) return ent def _extract_local_values(self): """Calculate local Bonachela entropy values for each data point. Returns ------- ndarray[float] The calculated local values of Bonachela entropy. """ raise TheoreticalInconsistencyError( "Local values are not implemented for Bonachela estimator due to " "theoretical inconsistencies in the mathematical foundation." ) def _cross_entropy(self): """Calculate cross-entropy between two distributions using Bonachela estimator. Returns ------- float The calculated cross-entropy. """ raise TheoreticalInconsistencyError( "Cross-entropy is not implemented for Bonachela estimator due to " "theoretical inconsistencies in applying bias corrections from " "different distributions." )