Source code for infomeasure.composite_measures.jsd
"""Jensen-Shannon Divergence (JSD)."""
from numpy import sum as np_sum, concatenate, ndarray
from ..estimators.entropy import (
RenyiEntropyEstimator,
TsallisEntropyEstimator,
KozachenkoLeonenkoEntropyEstimator,
KernelEntropyEstimator,
OrdinalEntropyEstimator,
BayesEntropyEstimator,
DiscreteEntropyEstimator,
ShrinkEntropyEstimator,
)
from ..estimators.functional import get_estimator_class
[docs]
def jensen_shannon_divergence(*data, approach: str | None = None, **kwargs):
r"""Calculate the Jensen-Shannon Divergence between two or more distributions.
The Jensen-Shannon Divergence is a symmetrized and smoothed version of the
Kullback-Leibler Divergence. It is calculated as the average of the
Kullback-Leibler Divergence between each distribution and the average
distribution.
.. math::
JSD(P \| Q) = \frac{1}{2} KL(P \| M) + \frac{1}{2} KL(Q \| M)
where :math:`M = \frac{1}{2} (P + Q)`.
Parameters
----------
p : array-like
The first data.
q : array-like
The second data.
... : array-like
Further data to compare.
approach : str
The name of the entropy estimator to use.
**kwargs : dict
Additional keyword arguments to pass to the entropy estimator.
Returns
-------
float
The Jensen-Shannon Divergence.
Raises
------
ValueError
If the approach is not supported or the entropy estimator is not
compatible with the Jensen-Shannon Divergence.
ValueError
If any of the given data is not an array-like object.
"""
if approach is None:
raise ValueError("The approach must be specified.")
if not all(isinstance(var, (list, ndarray)) for var in data):
raise ValueError("All data must be array-like objects.")
estimator_class = get_estimator_class(measure="entropy", approach=approach)
if issubclass(
estimator_class,
(
RenyiEntropyEstimator,
TsallisEntropyEstimator,
KozachenkoLeonenkoEntropyEstimator,
),
):
raise ValueError(
"The Jensen-Shannon Divergence is not supported for the "
f"{estimator_class.__name__} estimator."
)
if issubclass(
estimator_class,
(
OrdinalEntropyEstimator,
BayesEntropyEstimator,
DiscreteEntropyEstimator,
ShrinkEntropyEstimator,
),
):
estimators = tuple(estimator_class(var, **kwargs) for var in data)
marginal = sum(estimator.global_val() for estimator in estimators) / len(data)
# the distributions have some matching and some unique keys, create a new dict
# with the sum of the values of union of keys
dists = [estimator.dist_dict for estimator in estimators]
# dict(
# m_i: (p(x_i) + q(x_i) + ... + r(x_i)) / n
# )
dists = {
key: sum(dist.get(key, 0) for dist in dists) / len(dists)
for key in set().union(*dists)
}
mixture = list(dists.values())
mixture = -np_sum(mixture * estimators[0]._log_base(mixture))
return mixture - marginal
if issubclass(estimator_class, KernelEntropyEstimator):
# The mixture distribution is the union of the data, as the kernel density
# estimation is applied afterward.
mix_est = estimator_class(concatenate(data, axis=0), **kwargs)
return mix_est.global_val() - sum(
estimator_class(var, **kwargs).global_val() for var in data
) / len(data)
else:
raise ValueError( # pragma: no cover
f"The approach {approach} is not supported."
)