Source code for infomeasure.estimators.entropy.discrete

"""Module for the discrete entropy estimator."""

from numpy import sum as np_sum, ndarray, unique

from ..base import EntropyEstimator, DistributionMixin
from ..utils.ordinal import reduce_joint_space
from ... import Config
from ...utils.config import logger
from ...utils.types import LogBaseType


[docs] class DiscreteEntropyEstimator(DistributionMixin, EntropyEstimator): """Estimator for discrete entropy (Shannon entropy). Attributes ---------- data : array-like The data used to estimate the entropy. """ def __init__(self, data, *, base: LogBaseType = Config.get("base")): """Initialize the DiscreteEntropyEstimator.""" super().__init__(data, base=base) # warn if the data looks like a float array if isinstance(self.data, ndarray) and self.data.dtype.kind == "f": logger.warning( "The data looks like a float array (" f"{data.dtype}). " "Make sure it is properly symbolized or discretized " "for the entropy estimation." ) elif isinstance(self.data, tuple) and any( isinstance(marginal, ndarray) and marginal.dtype.kind == "f" for marginal in self.data ): logger.warning( "Some of the data looks like a float array. " "Make sure it is properly symbolized or discretized " "for the entropy estimation." ) if (isinstance(self.data, ndarray) and self.data.ndim > 1) or isinstance( self.data, tuple ): # As the discrete shannon entropy disregards the order of the data, # we can reduce the values to unique integers. # In case of having multiple random variables (tuple or list), # this enumerates the unique co-occurrences. self.data = reduce_joint_space(self.data) def _simple_entropy(self): """Calculate the entropy of the data. Returns ------- float The calculated entropy. """ uniq, counts = unique(self.data, return_counts=True) probabilities = counts / self.data.shape[0] # normalize self.dist_dict = dict( zip(uniq, probabilities) ) # store the distribution for later # Calculate the entropy return -np_sum(probabilities * self._log_base(probabilities)) def _joint_entropy(self): """Calculate the joint entropy of the data. Returns ------- float The calculated joint entropy. """ # The data has already been reduced to unique values of co-occurrences return self._simple_entropy() def _extract_local_values(self): """Separately calculate the local values. Returns ------- ndarray[float] The calculated local values of entropy. """ p_local = [self.dist_dict[val] for val in self.data] return -self._log_base(p_local)