Source code for infomeasure.estimators.entropy.shrink

"""Module for the shrink (James-Stein) entropy estimator."""

from numpy import asarray
from numpy import sum as np_sum

from infomeasure.estimators.base import DiscreteHEstimator
from ...utils.exceptions import TheoreticalInconsistencyError


[docs] class ShrinkEntropyEstimator(DiscreteHEstimator): r"""Shrinkage (James-Stein) entropy estimator. This estimator applies James-Stein shrinkage to the probability estimates before computing entropy, which can reduce bias in small sample scenarios. The shrinkage probabilities are calculated as: .. math:: \hat{p}_x^{\text{SHR}} = \lambda t_x + (1 - \lambda) \hat{p}_x^{\text{ML}} where :math:`\hat{p}_x^{\text{ML}}` are the maximum likelihood probability estimates, :math:`t_x = 1/K` is the uniform target distribution, and the shrinkage parameter :math:`\lambda` is given by: .. math:: \lambda = \frac{ 1 - \sum_{x=1}^{K} (\hat{p}_x^{\text{SHR}})^2}{(n-1) \sum_{x=1}^K (t_x - \hat{p}_x^{\text{ML}})^2} The entropy is then computed using these shrinkage-corrected probabilities. Based on the implementation in the R package entropy :cite:p:`hausserEntropyInferenceJamesStein2009`. Attributes ---------- *data : array-like The data used to estimate the entropy. """ def _simple_entropy(self): """Calculate the shrinkage entropy of the data. Returns ------- float The calculated entropy. """ p_shrink = self._shrink_probs() # Calculate entropy entropy = -np_sum(p_shrink * self._log_base(p_shrink)) return entropy def _calculate_lambda_shrink(self, N, u, t): """Calculate the shrinkage parameter lambda. Parameters ---------- N : int Total number of observations u : array-like Maximum likelihood probabilities t : float Target probability (1/K) Returns ------- float The shrinkage parameter lambda """ # Variance of u varu = u * (1.0 - u) / (N - 1) # Mean squared difference msp = np_sum((u - t) ** 2) if msp == 0: return 1.0 else: lambda_val = np_sum(varu) / msp # Clamp lambda to [0, 1] if lambda_val > 1: return 1.0 elif lambda_val < 0: return 0.0 else: return lambda_val def _extract_local_values(self): """Separately, calculate the local values. Returns ------- ndarray[float] The calculated local values of entropy. """ shrink_dict = self.dist_dict # Calculate local values for each data point local_values = asarray( [-self._log_base(shrink_dict[val]) for val in self.data[0].data] ) return local_values @property def dist_dict(self): """Dictionary of shrinkage probabilities for each unique value. Used by JSD.""" p_shrink = self._shrink_probs() # Create a mapping from unique values to their shrinkage probabilities return dict(zip(self.data[0].uniq, p_shrink)) def _shrink_probs(self): N = self.data[0].N # total number of observations K = self.data[0].K # Maximum likelihood probabilities # p_ml = counts / N p_ml = self.data[0].probabilities # Target probabilities (uniform distribution) t = 1.0 / K # Calculate lambda (shrinkage parameter) if N == 0 or N == 1: lambda_shrink = 1.0 else: lambda_shrink = self._calculate_lambda_shrink(N, p_ml, t) # Calculate shrinkage probabilities p_shrink = lambda_shrink * t + (1 - lambda_shrink) * p_ml return p_shrink def _cross_entropy(self) -> float: """Calculate cross-entropy between two distributions. Raises ------ TheoreticalInconsistencyError Cross-entropy is not theoretically sound for shrinkage estimator due to a conceptual mismatch between shrinkage correction and cross-entropy. """ raise TheoreticalInconsistencyError( "Cross-entropy is not implemented for shrinkage estimator. " "The shrinkage correction is designed for bias correction in entropy " "estimation using a specific shrinkage target, but cross-entropy mixes " "probabilities from one distribution with corrections from another, " "creating a theoretical inconsistency." )