Source code for infomeasure.estimators.entropy.shrink
"""Module for the shrink (James-Stein) entropy estimator."""
from numpy import asarray
from numpy import sum as np_sum
from infomeasure.estimators.base import DiscreteHEstimator
from ...utils.exceptions import TheoreticalInconsistencyError
[docs]
class ShrinkEntropyEstimator(DiscreteHEstimator):
r"""Shrinkage (James-Stein) entropy estimator.
This estimator applies James-Stein shrinkage to the probability estimates
before computing entropy, which can reduce bias in small sample scenarios.
The shrinkage probabilities are calculated as:
.. math::
\hat{p}_x^{\text{SHR}} = \lambda t_x + (1 - \lambda) \hat{p}_x^{\text{ML}}
where :math:`\hat{p}_x^{\text{ML}}` are the maximum likelihood probability estimates,
:math:`t_x = 1/K` is the uniform target distribution, and the shrinkage parameter
:math:`\lambda` is given by:
.. math::
\lambda = \frac{ 1 - \sum_{x=1}^{K} (\hat{p}_x^{\text{SHR}})^2}{(n-1) \sum_{x=1}^K (t_x - \hat{p}_x^{\text{ML}})^2}
The entropy is then computed using these shrinkage-corrected probabilities.
Based on the implementation in the R package entropy :cite:p:`hausserEntropyInferenceJamesStein2009`.
Attributes
----------
*data : array-like
The data used to estimate the entropy.
"""
def _simple_entropy(self):
"""Calculate the shrinkage entropy of the data.
Returns
-------
float
The calculated entropy.
"""
p_shrink = self._shrink_probs()
# Calculate entropy
entropy = -np_sum(p_shrink * self._log_base(p_shrink))
return entropy
def _calculate_lambda_shrink(self, N, u, t):
"""Calculate the shrinkage parameter lambda.
Parameters
----------
N : int
Total number of observations
u : array-like
Maximum likelihood probabilities
t : float
Target probability (1/K)
Returns
-------
float
The shrinkage parameter lambda
"""
# Variance of u
varu = u * (1.0 - u) / (N - 1)
# Mean squared difference
msp = np_sum((u - t) ** 2)
if msp == 0:
return 1.0
else:
lambda_val = np_sum(varu) / msp
# Clamp lambda to [0, 1]
if lambda_val > 1:
return 1.0
elif lambda_val < 0:
return 0.0
else:
return lambda_val
def _extract_local_values(self):
"""Separately, calculate the local values.
Returns
-------
ndarray[float]
The calculated local values of entropy.
"""
shrink_dict = self.dist_dict
# Calculate local values for each data point
local_values = asarray(
[-self._log_base(shrink_dict[val]) for val in self.data[0].data]
)
return local_values
@property
def dist_dict(self):
"""Dictionary of shrinkage probabilities for each unique value. Used by JSD."""
p_shrink = self._shrink_probs()
# Create a mapping from unique values to their shrinkage probabilities
return dict(zip(self.data[0].uniq, p_shrink))
def _shrink_probs(self):
N = self.data[0].N # total number of observations
K = self.data[0].K
# Maximum likelihood probabilities
# p_ml = counts / N
p_ml = self.data[0].probabilities
# Target probabilities (uniform distribution)
t = 1.0 / K
# Calculate lambda (shrinkage parameter)
if N == 0 or N == 1:
lambda_shrink = 1.0
else:
lambda_shrink = self._calculate_lambda_shrink(N, p_ml, t)
# Calculate shrinkage probabilities
p_shrink = lambda_shrink * t + (1 - lambda_shrink) * p_ml
return p_shrink
def _cross_entropy(self) -> float:
"""Calculate cross-entropy between two distributions.
Raises
------
TheoreticalInconsistencyError
Cross-entropy is not theoretically sound for shrinkage estimator
due to a conceptual mismatch between shrinkage correction and cross-entropy.
"""
raise TheoreticalInconsistencyError(
"Cross-entropy is not implemented for shrinkage estimator. "
"The shrinkage correction is designed for bias correction in entropy "
"estimation using a specific shrinkage target, but cross-entropy mixes "
"probabilities from one distribution with corrections from another, "
"creating a theoretical inconsistency."
)