Source code for infomeasure.composite_measures.jsd

"""Jensen-Shannon Divergence (JSD)."""

from numpy import sum as np_sum, concatenate, ndarray

from ..estimators.entropy import (
    RenyiEntropyEstimator,
    TsallisEntropyEstimator,
    KozachenkoLeonenkoEntropyEstimator,
    KernelEntropyEstimator,
    OrdinalEntropyEstimator,
    BayesEntropyEstimator,
    DiscreteEntropyEstimator,
    ShrinkEntropyEstimator,
)
from ..estimators.functional import get_estimator_class


[docs] def jensen_shannon_divergence(*data, approach: str | None = None, **kwargs): r"""Calculate the Jensen-Shannon Divergence between two or more distributions. The Jensen-Shannon Divergence is a symmetrized and smoothed version of the Kullback-Leibler Divergence. It is calculated as the average of the Kullback-Leibler Divergence between each distribution and the average distribution. .. math:: JSD(P \| Q) = \frac{1}{2} KL(P \| M) + \frac{1}{2} KL(Q \| M) where :math:`M = \frac{1}{2} (P + Q)`. Parameters ---------- p : array-like The first data. q : array-like The second data. ... : array-like Further data to compare. approach : str The name of the entropy estimator to use. **kwargs : dict Additional keyword arguments to pass to the entropy estimator. Returns ------- float The Jensen-Shannon Divergence. Raises ------ ValueError If the approach is not supported or the entropy estimator is not compatible with the Jensen-Shannon Divergence. ValueError If any of the given data is not an array-like object. """ if approach is None: raise ValueError("The approach must be specified.") if not all(isinstance(var, (list, ndarray)) for var in data): raise ValueError("All data must be array-like objects.") estimator_class = get_estimator_class(measure="entropy", approach=approach) if issubclass( estimator_class, ( RenyiEntropyEstimator, TsallisEntropyEstimator, KozachenkoLeonenkoEntropyEstimator, ), ): raise ValueError( "The Jensen-Shannon Divergence is not supported for the " f"{estimator_class.__name__} estimator." ) if issubclass( estimator_class, ( OrdinalEntropyEstimator, BayesEntropyEstimator, DiscreteEntropyEstimator, ShrinkEntropyEstimator, ), ): estimators = tuple(estimator_class(var, **kwargs) for var in data) marginal = sum(estimator.global_val() for estimator in estimators) / len(data) # the distributions have some matching and some unique keys, create a new dict # with the sum of the values of union of keys dists = [estimator.dist_dict for estimator in estimators] # dict( # m_i: (p(x_i) + q(x_i) + ... + r(x_i)) / n # ) dists = { key: sum(dist.get(key, 0) for dist in dists) / len(dists) for key in set().union(*dists) } mixture = list(dists.values()) mixture = -np_sum(mixture * estimators[0]._log_base(mixture)) return mixture - marginal if issubclass(estimator_class, KernelEntropyEstimator): # The mixture distribution is the union of the data, as the kernel density # estimation is applied afterward. mix_est = estimator_class(concatenate(data, axis=0), **kwargs) return mix_est.global_val() - sum( estimator_class(var, **kwargs).global_val() for var in data ) / len(data) else: raise ValueError( # pragma: no cover f"The approach {approach} is not supported." )