Fix some tests

This commit is contained in:
2021-06-01 23:14:22 +02:00
parent b15a059b1d
commit eb00e1516a
4 changed files with 68 additions and 12 deletions

View File

@@ -2,11 +2,9 @@ from math import log
import numpy as np
from scipy.special import gamma, psi
from sklearn.neighbors import BallTree, KDTree, NearestNeighbors
from sklearn.neighbors import NearestNeighbors
from sklearn.feature_selection._mutual_info import _compute_mi
# from .entropy_estimators import mi, entropy as c_entropy
class Metrics:
@staticmethod
@@ -65,6 +63,10 @@ class Metrics:
and:
Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual
information. Phys Rev E 69(6 Pt 2):066138.
Differential entropy can be negative
https://stats.stackexchange.com/questions/73881/
when-is-the-differential-entropy-negative
"""
if x.ndim == 1:
x = x.reshape(-1, 1)
@@ -131,7 +133,10 @@ class Metrics:
return (
2.0
* Metrics.information_gain_cont(x, y)
/ (Metrics.differential_entropy(x) + Metrics.entropy(y))
/ (
Metrics.differential_entropy(x, k=len(x) - 1)
+ Metrics.entropy(y)
)
)
@staticmethod

View File

@@ -20,6 +20,9 @@ class MFS:
----------
max_features: int
The maximum number of features to return
discrete: boolean
If the features are continuous or discrete. It always supose discrete
labels.
"""
def __init__(self, max_features=None, discrete=True):

22
mfs/k.py Normal file
View File

@@ -0,0 +1,22 @@
from sklearn.datasets import load_wine
from mfs import MFS
from mfs.Metrics import Metrics
mfsc = MFS(discrete=False)
mfsd = MFS(discrete=True)
X, y = load_wine(return_X_y=True)
m, n = X.shape
print("* Differential entropy in X")
for i in range(n):
print(i, Metrics.differential_entropy(X[:, i], k=10))
print("* Information Gain")
print("- Discrete features")
print(Metrics.information_gain(X, y))
for i in range(n):
print(i, Metrics.information_gain(X[:, i], y))
print("- Continuous features")
# print(Metrics.information_gain_cont(X, y))
for i in range(n):
print(i, Metrics.information_gain_cont(X[:, i], y))

View File

@@ -1,6 +1,7 @@
import unittest
import numpy as np
from sklearn.datasets import load_iris, load_wine
from ..entropy_estimators import entropy
from mdlp import MDLP
from ..Selection import Metrics
@@ -25,9 +26,9 @@ class Metrics_test(unittest.TestCase):
([1, 1, 5], 2, 0.9182958340544896),
(self.y_i, 3, 0.999999999),
]
for dataset, base, entropy in datasets:
for dataset, base, entropy_expected in datasets:
computed = metric.entropy(dataset, base)
self.assertAlmostEqual(entropy, computed)
self.assertAlmostEqual(entropy_expected, computed)
def test_differential_entropy(self):
metric = Metrics()
@@ -41,11 +42,13 @@ class Metrics_test(unittest.TestCase):
(self.X_i_c, 37, 3.06627326925228),
(self.X_w_c, 37, 63.13827518897429),
]
for dataset, base, entropy in datasets:
for dataset, base, entropy_expected in datasets:
computed = metric.differential_entropy(
np.array(dataset, dtype="float64"), base
)
self.assertAlmostEqual(entropy, computed, msg=str(dataset))
self.assertAlmostEqual(
entropy_expected, computed, msg=str(dataset)
)
expected = [
1.6378708764142766,
2.0291571802275037,
@@ -68,6 +71,29 @@ class Metrics_test(unittest.TestCase):
)
self.assertAlmostEqual(computed, res_expected)
def test_dif_ent(self):
expected = [
1.6378708764142766,
2.0291571802275037,
0.8273865123744271,
3.203935772642847,
4.859193341386733,
1.3707315434976266,
1.8794952925706312,
-0.2983180654207054,
1.4521478934625076,
2.834404839362728,
0.4894081282811191,
1.361210381692561,
7.6373991502818175,
]
n_samples, n_features = self.X_w_c.shape
for c, res_expected in enumerate(expected):
computed = entropy(
self.X_w_c[:, c].reshape(-1, 1), k=n_samples - 2
)
print("-*-", computed)
def test_conditional_entropy(self):
metric = Metrics()
results_expected = [
@@ -133,10 +159,10 @@ class Metrics_test(unittest.TestCase):
def test_symmetrical_uncertainty_continuous(self):
metric = Metrics()
results_expected = [
-0.08368315199022527,
-0.08539330663499867,
-0.026524185532893957,
-0.016238166071083728,
0.3116626663552704,
0.22524988105092494,
0.24511182026415218,
0.07114329389542708,
]
for expected, col in zip(results_expected, range(self.X_w.shape[1])):
computed = metric.symmetrical_unc_continuous(