from math import log import numpy as np class Metrics: @staticmethod def conditional_entropy(x, y, base=2): """quantifies the amount of information needed to describe the outcome of Y given that the value of X is known computes H(Y|X) Parameters ---------- x : np.array values of the variable y : np.array array of labels base : int, optional base of the logarithm, by default 2 Returns ------- float conditional entropy of y given x """ xy = np.c_[x, y] return Metrics.entropy(xy, base) - Metrics.entropy(x, base) @staticmethod def entropy(y, base=2): """measure of the uncertainty in predicting the value of y Parameters ---------- y : np.array array of labels base : int, optional base of the logarithm, by default 2 Returns ------- float entropy of y """ _, count = np.unique(y, return_counts=True, axis=0) proba = count.astype(float) / len(y) proba = proba[proba > 0.0] return np.sum(proba * np.log(1.0 / proba)) / log(base) @staticmethod def information_gain(x, y, base=2): """Measures the reduction in uncertainty about the value of y when the value of X is known (also called mutual information) (https://www.sciencedirect.com/science/article/pii/S0020025519303603) Parameters ---------- x : np.array values of the variable y : np.array array of labels base : int, optional base of the logarithm, by default 2 Returns ------- float Information gained """ return Metrics.entropy(y, base) - Metrics.conditional_entropy( x, y, base ) @staticmethod def symmetrical_uncertainty(x, y): return ( 2.0 * Metrics.information_gain(x, y) / (Metrics.entropy(x) + Metrics.entropy(y)) ) class CFS: def __init__(self, a): self.a = a