Source code for infomeasure.estimators.entropy.kernel
"""Module for the kernel entropy estimator."""
from numpy import column_stack
from ... import Config
from ...utils.types import LogBaseType
from ..base import EntropyEstimator, WorkersMixin
from ..utils.array import assure_2d_data
from ..utils.kde import kde_probability_density_function
[docs]
class KernelEntropyEstimator(WorkersMixin, EntropyEstimator):
"""Estimator for entropy (Shannon) using Kernel Density Estimation (KDE).
Attributes
----------
data : array-like
The data used to estimate the entropy.
bandwidth : float | int
The bandwidth for the kernel.
kernel : str
Type of kernel to use, compatible with the KDE
implementation :func:`kde_probability_density_function() <infomeasure.estimators.utils.kde.kde_probability_density_function>`.
workers : int, optional
Number of workers to use for parallel processing.
Default is 1, meaning no parallel processing.
If set to -1, all available CPU cores will be used.
Notes
-----
A small ``bandwidth`` can lead to under-sampling,
while a large ``bandwidth`` may over-smooth the data, obscuring details.
"""
def __init__(
self,
data,
*, # all following parameters are keyword-only
bandwidth: float | int,
kernel: str,
workers: int = 1,
base: LogBaseType = Config.get("base"),
):
"""Initialize the KernelEntropyEstimator.
Parameters
----------
bandwidth : float | int
The bandwidth for the kernel.
kernel : str
Type of kernel to use, compatible with the KDE
implementation :func:`kde_probability_density_function() <infomeasure.estimators.utils.kde.kde_probability_density_function>`.
workers : int, optional
Number of workers to use for parallel processing.
Default is 1, meaning no parallel processing.
If set to -1, all available CPU cores will be used.
"""
super().__init__(data, workers=workers, base=base)
self.data = assure_2d_data(data)
self.bandwidth = bandwidth
self.kernel = kernel
def _simple_entropy(self):
"""Calculate the entropy of the data.
Returns
-------
array-like
The local form of the entropy.
"""
# Compute the KDE densities
densities = kde_probability_density_function(
self.data, self.bandwidth, kernel=self.kernel, workers=self.n_workers
)
# Compute the log of the densities
return -self._log_base(densities)
def _joint_entropy(self):
"""Calculate the joint entropy of the data.
This is done by joining the variables into one space
and calculating the entropy.
Returns
-------
array-like
The local form of the joint entropy.
"""
self.data = column_stack(self.data)
return self._simple_entropy()