Source code for infomeasure.estimators.entropy.grassberger

"""Module for the discrete Grassberger entropy estimator."""

from numpy import asarray, log
from scipy.special import digamma

from infomeasure.estimators.base import DiscreteHEstimator
from ...utils.exceptions import TheoreticalInconsistencyError


[docs] class GrassbergerEntropyEstimator(DiscreteHEstimator): r"""Discrete Grassberger entropy estimator. .. math:: \hat{H}_{\text{Gr88}} = \sum_i \frac{n_i}{H} \left(\log(N) - \psi(n_i) - \frac{(-1)^{n_i}}{n_i + 1} \right) :math:`\hat{H}_{\text{Gr88}}` is the Grassberger entropy, where :math:`n_i` are the counts, :math:`H` is the total number of observations :math:`N`, and :math:`\psi` is the digamma function :cite:p:`grassbergerFiniteSampleCorrections1988,grassbergerEntropyEstimatesInsufficient2008`. Attributes ---------- *data : array-like The data used to estimate the entropy. """ def _simple_entropy(self): """Calculate the Grassberger entropy of the data. Returns ------- float The calculated entropy. """ # Create a mapping from unique values to their counts count_dict = dict(zip(self.data[0].uniq, self.data[0].counts)) # Vectorized calculation of local values n_i = asarray([count_dict[val] for val in self.data[0].data]) local_values = log(self.data[0].N) - digamma(n_i) - ((-1) ** n_i) / (n_i + 1) # Convert to the requested base if needed if self.base != "e": local_values /= log(self.base) return local_values def _cross_entropy(self) -> float: """Calculate cross-entropy between two distributions. Raises ------ TheoreticalInconsistencyError Cross-entropy is not theoretically sound for Grassberger estimator due to conceptual mismatch between bias correction and cross-entropy. """ raise TheoreticalInconsistencyError( "Cross-entropy is not implemented for Grassberger estimator. " "The Grassberger correction is designed for bias correction in entropy " "estimation using count-based corrections, but cross-entropy mixes " "probabilities from one distribution with corrections from another, " "creating a theoretical inconsistency." )