Fix some tests

This commit is contained in:
2021-06-01 23:14:22 +02:00
parent b15a059b1d
commit eb00e1516a
4 changed files with 68 additions and 12 deletions

View File

@@ -2,11 +2,9 @@ from math import log
import numpy as np import numpy as np
from scipy.special import gamma, psi from scipy.special import gamma, psi
from sklearn.neighbors import BallTree, KDTree, NearestNeighbors from sklearn.neighbors import NearestNeighbors
from sklearn.feature_selection._mutual_info import _compute_mi from sklearn.feature_selection._mutual_info import _compute_mi
# from .entropy_estimators import mi, entropy as c_entropy
class Metrics: class Metrics:
@staticmethod @staticmethod
@@ -65,6 +63,10 @@ class Metrics:
and: and:
Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual
information. Phys Rev E 69(6 Pt 2):066138. information. Phys Rev E 69(6 Pt 2):066138.
Differential entropy can be negative
https://stats.stackexchange.com/questions/73881/
when-is-the-differential-entropy-negative
""" """
if x.ndim == 1: if x.ndim == 1:
x = x.reshape(-1, 1) x = x.reshape(-1, 1)
@@ -131,7 +133,10 @@ class Metrics:
return ( return (
2.0 2.0
* Metrics.information_gain_cont(x, y) * Metrics.information_gain_cont(x, y)
/ (Metrics.differential_entropy(x) + Metrics.entropy(y)) / (
Metrics.differential_entropy(x, k=len(x) - 1)
+ Metrics.entropy(y)
)
) )
@staticmethod @staticmethod

View File

@@ -20,6 +20,9 @@ class MFS:
---------- ----------
max_features: int max_features: int
The maximum number of features to return The maximum number of features to return
discrete: boolean
If the features are continuous or discrete. It always supose discrete
labels.
""" """
def __init__(self, max_features=None, discrete=True): def __init__(self, max_features=None, discrete=True):

22
mfs/k.py Normal file
View File

@@ -0,0 +1,22 @@
from sklearn.datasets import load_wine
from mfs import MFS
from mfs.Metrics import Metrics
mfsc = MFS(discrete=False)
mfsd = MFS(discrete=True)
X, y = load_wine(return_X_y=True)
m, n = X.shape
print("* Differential entropy in X")
for i in range(n):
print(i, Metrics.differential_entropy(X[:, i], k=10))
print("* Information Gain")
print("- Discrete features")
print(Metrics.information_gain(X, y))
for i in range(n):
print(i, Metrics.information_gain(X[:, i], y))
print("- Continuous features")
# print(Metrics.information_gain_cont(X, y))
for i in range(n):
print(i, Metrics.information_gain_cont(X[:, i], y))

View File

@@ -1,6 +1,7 @@
import unittest import unittest
import numpy as np import numpy as np
from sklearn.datasets import load_iris, load_wine from sklearn.datasets import load_iris, load_wine
from ..entropy_estimators import entropy
from mdlp import MDLP from mdlp import MDLP
from ..Selection import Metrics from ..Selection import Metrics
@@ -25,9 +26,9 @@ class Metrics_test(unittest.TestCase):
([1, 1, 5], 2, 0.9182958340544896), ([1, 1, 5], 2, 0.9182958340544896),
(self.y_i, 3, 0.999999999), (self.y_i, 3, 0.999999999),
] ]
for dataset, base, entropy in datasets: for dataset, base, entropy_expected in datasets:
computed = metric.entropy(dataset, base) computed = metric.entropy(dataset, base)
self.assertAlmostEqual(entropy, computed) self.assertAlmostEqual(entropy_expected, computed)
def test_differential_entropy(self): def test_differential_entropy(self):
metric = Metrics() metric = Metrics()
@@ -41,11 +42,13 @@ class Metrics_test(unittest.TestCase):
(self.X_i_c, 37, 3.06627326925228), (self.X_i_c, 37, 3.06627326925228),
(self.X_w_c, 37, 63.13827518897429), (self.X_w_c, 37, 63.13827518897429),
] ]
for dataset, base, entropy in datasets: for dataset, base, entropy_expected in datasets:
computed = metric.differential_entropy( computed = metric.differential_entropy(
np.array(dataset, dtype="float64"), base np.array(dataset, dtype="float64"), base
) )
self.assertAlmostEqual(entropy, computed, msg=str(dataset)) self.assertAlmostEqual(
entropy_expected, computed, msg=str(dataset)
)
expected = [ expected = [
1.6378708764142766, 1.6378708764142766,
2.0291571802275037, 2.0291571802275037,
@@ -68,6 +71,29 @@ class Metrics_test(unittest.TestCase):
) )
self.assertAlmostEqual(computed, res_expected) self.assertAlmostEqual(computed, res_expected)
def test_dif_ent(self):
expected = [
1.6378708764142766,
2.0291571802275037,
0.8273865123744271,
3.203935772642847,
4.859193341386733,
1.3707315434976266,
1.8794952925706312,
-0.2983180654207054,
1.4521478934625076,
2.834404839362728,
0.4894081282811191,
1.361210381692561,
7.6373991502818175,
]
n_samples, n_features = self.X_w_c.shape
for c, res_expected in enumerate(expected):
computed = entropy(
self.X_w_c[:, c].reshape(-1, 1), k=n_samples - 2
)
print("-*-", computed)
def test_conditional_entropy(self): def test_conditional_entropy(self):
metric = Metrics() metric = Metrics()
results_expected = [ results_expected = [
@@ -133,10 +159,10 @@ class Metrics_test(unittest.TestCase):
def test_symmetrical_uncertainty_continuous(self): def test_symmetrical_uncertainty_continuous(self):
metric = Metrics() metric = Metrics()
results_expected = [ results_expected = [
-0.08368315199022527, 0.3116626663552704,
-0.08539330663499867, 0.22524988105092494,
-0.026524185532893957, 0.24511182026415218,
-0.016238166071083728, 0.07114329389542708,
] ]
for expected, col in zip(results_expected, range(self.X_w.shape[1])): for expected, col in zip(results_expected, range(self.X_w.shape[1])):
computed = metric.symmetrical_unc_continuous( computed = metric.symmetrical_unc_continuous(