mirror of
https://github.com/Doctorado-ML/mufs.git
synced 2025-08-17 08:35:52 +00:00
Fix some tests
This commit is contained in:
@@ -2,11 +2,9 @@ from math import log
|
||||
import numpy as np
|
||||
|
||||
from scipy.special import gamma, psi
|
||||
from sklearn.neighbors import BallTree, KDTree, NearestNeighbors
|
||||
from sklearn.neighbors import NearestNeighbors
|
||||
from sklearn.feature_selection._mutual_info import _compute_mi
|
||||
|
||||
# from .entropy_estimators import mi, entropy as c_entropy
|
||||
|
||||
|
||||
class Metrics:
|
||||
@staticmethod
|
||||
@@ -65,6 +63,10 @@ class Metrics:
|
||||
and:
|
||||
Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual
|
||||
information. Phys Rev E 69(6 Pt 2):066138.
|
||||
|
||||
Differential entropy can be negative
|
||||
https://stats.stackexchange.com/questions/73881/
|
||||
when-is-the-differential-entropy-negative
|
||||
"""
|
||||
if x.ndim == 1:
|
||||
x = x.reshape(-1, 1)
|
||||
@@ -131,7 +133,10 @@ class Metrics:
|
||||
return (
|
||||
2.0
|
||||
* Metrics.information_gain_cont(x, y)
|
||||
/ (Metrics.differential_entropy(x) + Metrics.entropy(y))
|
||||
/ (
|
||||
Metrics.differential_entropy(x, k=len(x) - 1)
|
||||
+ Metrics.entropy(y)
|
||||
)
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
|
@@ -20,6 +20,9 @@ class MFS:
|
||||
----------
|
||||
max_features: int
|
||||
The maximum number of features to return
|
||||
discrete: boolean
|
||||
If the features are continuous or discrete. It always supose discrete
|
||||
labels.
|
||||
"""
|
||||
|
||||
def __init__(self, max_features=None, discrete=True):
|
||||
|
22
mfs/k.py
Normal file
22
mfs/k.py
Normal file
@@ -0,0 +1,22 @@
|
||||
from sklearn.datasets import load_wine
|
||||
from mfs import MFS
|
||||
from mfs.Metrics import Metrics
|
||||
|
||||
mfsc = MFS(discrete=False)
|
||||
mfsd = MFS(discrete=True)
|
||||
X, y = load_wine(return_X_y=True)
|
||||
m, n = X.shape
|
||||
|
||||
print("* Differential entropy in X")
|
||||
for i in range(n):
|
||||
print(i, Metrics.differential_entropy(X[:, i], k=10))
|
||||
|
||||
print("* Information Gain")
|
||||
print("- Discrete features")
|
||||
print(Metrics.information_gain(X, y))
|
||||
for i in range(n):
|
||||
print(i, Metrics.information_gain(X[:, i], y))
|
||||
print("- Continuous features")
|
||||
# print(Metrics.information_gain_cont(X, y))
|
||||
for i in range(n):
|
||||
print(i, Metrics.information_gain_cont(X[:, i], y))
|
@@ -1,6 +1,7 @@
|
||||
import unittest
|
||||
import numpy as np
|
||||
from sklearn.datasets import load_iris, load_wine
|
||||
from ..entropy_estimators import entropy
|
||||
from mdlp import MDLP
|
||||
from ..Selection import Metrics
|
||||
|
||||
@@ -25,9 +26,9 @@ class Metrics_test(unittest.TestCase):
|
||||
([1, 1, 5], 2, 0.9182958340544896),
|
||||
(self.y_i, 3, 0.999999999),
|
||||
]
|
||||
for dataset, base, entropy in datasets:
|
||||
for dataset, base, entropy_expected in datasets:
|
||||
computed = metric.entropy(dataset, base)
|
||||
self.assertAlmostEqual(entropy, computed)
|
||||
self.assertAlmostEqual(entropy_expected, computed)
|
||||
|
||||
def test_differential_entropy(self):
|
||||
metric = Metrics()
|
||||
@@ -41,11 +42,13 @@ class Metrics_test(unittest.TestCase):
|
||||
(self.X_i_c, 37, 3.06627326925228),
|
||||
(self.X_w_c, 37, 63.13827518897429),
|
||||
]
|
||||
for dataset, base, entropy in datasets:
|
||||
for dataset, base, entropy_expected in datasets:
|
||||
computed = metric.differential_entropy(
|
||||
np.array(dataset, dtype="float64"), base
|
||||
)
|
||||
self.assertAlmostEqual(entropy, computed, msg=str(dataset))
|
||||
self.assertAlmostEqual(
|
||||
entropy_expected, computed, msg=str(dataset)
|
||||
)
|
||||
expected = [
|
||||
1.6378708764142766,
|
||||
2.0291571802275037,
|
||||
@@ -68,6 +71,29 @@ class Metrics_test(unittest.TestCase):
|
||||
)
|
||||
self.assertAlmostEqual(computed, res_expected)
|
||||
|
||||
def test_dif_ent(self):
|
||||
expected = [
|
||||
1.6378708764142766,
|
||||
2.0291571802275037,
|
||||
0.8273865123744271,
|
||||
3.203935772642847,
|
||||
4.859193341386733,
|
||||
1.3707315434976266,
|
||||
1.8794952925706312,
|
||||
-0.2983180654207054,
|
||||
1.4521478934625076,
|
||||
2.834404839362728,
|
||||
0.4894081282811191,
|
||||
1.361210381692561,
|
||||
7.6373991502818175,
|
||||
]
|
||||
n_samples, n_features = self.X_w_c.shape
|
||||
for c, res_expected in enumerate(expected):
|
||||
computed = entropy(
|
||||
self.X_w_c[:, c].reshape(-1, 1), k=n_samples - 2
|
||||
)
|
||||
print("-*-", computed)
|
||||
|
||||
def test_conditional_entropy(self):
|
||||
metric = Metrics()
|
||||
results_expected = [
|
||||
@@ -133,10 +159,10 @@ class Metrics_test(unittest.TestCase):
|
||||
def test_symmetrical_uncertainty_continuous(self):
|
||||
metric = Metrics()
|
||||
results_expected = [
|
||||
-0.08368315199022527,
|
||||
-0.08539330663499867,
|
||||
-0.026524185532893957,
|
||||
-0.016238166071083728,
|
||||
0.3116626663552704,
|
||||
0.22524988105092494,
|
||||
0.24511182026415218,
|
||||
0.07114329389542708,
|
||||
]
|
||||
for expected, col in zip(results_expected, range(self.X_w.shape[1])):
|
||||
computed = metric.symmetrical_unc_continuous(
|
||||
|
Reference in New Issue
Block a user