mirror of
https://github.com/Doctorado-ML/mufs.git
synced 2025-08-18 17:15:52 +00:00
Fix some tests
This commit is contained in:
@@ -2,11 +2,9 @@ from math import log
|
|||||||
import numpy as np
|
import numpy as np
|
||||||
|
|
||||||
from scipy.special import gamma, psi
|
from scipy.special import gamma, psi
|
||||||
from sklearn.neighbors import BallTree, KDTree, NearestNeighbors
|
from sklearn.neighbors import NearestNeighbors
|
||||||
from sklearn.feature_selection._mutual_info import _compute_mi
|
from sklearn.feature_selection._mutual_info import _compute_mi
|
||||||
|
|
||||||
# from .entropy_estimators import mi, entropy as c_entropy
|
|
||||||
|
|
||||||
|
|
||||||
class Metrics:
|
class Metrics:
|
||||||
@staticmethod
|
@staticmethod
|
||||||
@@ -65,6 +63,10 @@ class Metrics:
|
|||||||
and:
|
and:
|
||||||
Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual
|
Kraskov A, Stogbauer H, Grassberger P. (2004). Estimating mutual
|
||||||
information. Phys Rev E 69(6 Pt 2):066138.
|
information. Phys Rev E 69(6 Pt 2):066138.
|
||||||
|
|
||||||
|
Differential entropy can be negative
|
||||||
|
https://stats.stackexchange.com/questions/73881/
|
||||||
|
when-is-the-differential-entropy-negative
|
||||||
"""
|
"""
|
||||||
if x.ndim == 1:
|
if x.ndim == 1:
|
||||||
x = x.reshape(-1, 1)
|
x = x.reshape(-1, 1)
|
||||||
@@ -131,7 +133,10 @@ class Metrics:
|
|||||||
return (
|
return (
|
||||||
2.0
|
2.0
|
||||||
* Metrics.information_gain_cont(x, y)
|
* Metrics.information_gain_cont(x, y)
|
||||||
/ (Metrics.differential_entropy(x) + Metrics.entropy(y))
|
/ (
|
||||||
|
Metrics.differential_entropy(x, k=len(x) - 1)
|
||||||
|
+ Metrics.entropy(y)
|
||||||
|
)
|
||||||
)
|
)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@@ -20,6 +20,9 @@ class MFS:
|
|||||||
----------
|
----------
|
||||||
max_features: int
|
max_features: int
|
||||||
The maximum number of features to return
|
The maximum number of features to return
|
||||||
|
discrete: boolean
|
||||||
|
If the features are continuous or discrete. It always supose discrete
|
||||||
|
labels.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, max_features=None, discrete=True):
|
def __init__(self, max_features=None, discrete=True):
|
||||||
|
22
mfs/k.py
Normal file
22
mfs/k.py
Normal file
@@ -0,0 +1,22 @@
|
|||||||
|
from sklearn.datasets import load_wine
|
||||||
|
from mfs import MFS
|
||||||
|
from mfs.Metrics import Metrics
|
||||||
|
|
||||||
|
mfsc = MFS(discrete=False)
|
||||||
|
mfsd = MFS(discrete=True)
|
||||||
|
X, y = load_wine(return_X_y=True)
|
||||||
|
m, n = X.shape
|
||||||
|
|
||||||
|
print("* Differential entropy in X")
|
||||||
|
for i in range(n):
|
||||||
|
print(i, Metrics.differential_entropy(X[:, i], k=10))
|
||||||
|
|
||||||
|
print("* Information Gain")
|
||||||
|
print("- Discrete features")
|
||||||
|
print(Metrics.information_gain(X, y))
|
||||||
|
for i in range(n):
|
||||||
|
print(i, Metrics.information_gain(X[:, i], y))
|
||||||
|
print("- Continuous features")
|
||||||
|
# print(Metrics.information_gain_cont(X, y))
|
||||||
|
for i in range(n):
|
||||||
|
print(i, Metrics.information_gain_cont(X[:, i], y))
|
@@ -1,6 +1,7 @@
|
|||||||
import unittest
|
import unittest
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.datasets import load_iris, load_wine
|
from sklearn.datasets import load_iris, load_wine
|
||||||
|
from ..entropy_estimators import entropy
|
||||||
from mdlp import MDLP
|
from mdlp import MDLP
|
||||||
from ..Selection import Metrics
|
from ..Selection import Metrics
|
||||||
|
|
||||||
@@ -25,9 +26,9 @@ class Metrics_test(unittest.TestCase):
|
|||||||
([1, 1, 5], 2, 0.9182958340544896),
|
([1, 1, 5], 2, 0.9182958340544896),
|
||||||
(self.y_i, 3, 0.999999999),
|
(self.y_i, 3, 0.999999999),
|
||||||
]
|
]
|
||||||
for dataset, base, entropy in datasets:
|
for dataset, base, entropy_expected in datasets:
|
||||||
computed = metric.entropy(dataset, base)
|
computed = metric.entropy(dataset, base)
|
||||||
self.assertAlmostEqual(entropy, computed)
|
self.assertAlmostEqual(entropy_expected, computed)
|
||||||
|
|
||||||
def test_differential_entropy(self):
|
def test_differential_entropy(self):
|
||||||
metric = Metrics()
|
metric = Metrics()
|
||||||
@@ -41,11 +42,13 @@ class Metrics_test(unittest.TestCase):
|
|||||||
(self.X_i_c, 37, 3.06627326925228),
|
(self.X_i_c, 37, 3.06627326925228),
|
||||||
(self.X_w_c, 37, 63.13827518897429),
|
(self.X_w_c, 37, 63.13827518897429),
|
||||||
]
|
]
|
||||||
for dataset, base, entropy in datasets:
|
for dataset, base, entropy_expected in datasets:
|
||||||
computed = metric.differential_entropy(
|
computed = metric.differential_entropy(
|
||||||
np.array(dataset, dtype="float64"), base
|
np.array(dataset, dtype="float64"), base
|
||||||
)
|
)
|
||||||
self.assertAlmostEqual(entropy, computed, msg=str(dataset))
|
self.assertAlmostEqual(
|
||||||
|
entropy_expected, computed, msg=str(dataset)
|
||||||
|
)
|
||||||
expected = [
|
expected = [
|
||||||
1.6378708764142766,
|
1.6378708764142766,
|
||||||
2.0291571802275037,
|
2.0291571802275037,
|
||||||
@@ -68,6 +71,29 @@ class Metrics_test(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
self.assertAlmostEqual(computed, res_expected)
|
self.assertAlmostEqual(computed, res_expected)
|
||||||
|
|
||||||
|
def test_dif_ent(self):
|
||||||
|
expected = [
|
||||||
|
1.6378708764142766,
|
||||||
|
2.0291571802275037,
|
||||||
|
0.8273865123744271,
|
||||||
|
3.203935772642847,
|
||||||
|
4.859193341386733,
|
||||||
|
1.3707315434976266,
|
||||||
|
1.8794952925706312,
|
||||||
|
-0.2983180654207054,
|
||||||
|
1.4521478934625076,
|
||||||
|
2.834404839362728,
|
||||||
|
0.4894081282811191,
|
||||||
|
1.361210381692561,
|
||||||
|
7.6373991502818175,
|
||||||
|
]
|
||||||
|
n_samples, n_features = self.X_w_c.shape
|
||||||
|
for c, res_expected in enumerate(expected):
|
||||||
|
computed = entropy(
|
||||||
|
self.X_w_c[:, c].reshape(-1, 1), k=n_samples - 2
|
||||||
|
)
|
||||||
|
print("-*-", computed)
|
||||||
|
|
||||||
def test_conditional_entropy(self):
|
def test_conditional_entropy(self):
|
||||||
metric = Metrics()
|
metric = Metrics()
|
||||||
results_expected = [
|
results_expected = [
|
||||||
@@ -133,10 +159,10 @@ class Metrics_test(unittest.TestCase):
|
|||||||
def test_symmetrical_uncertainty_continuous(self):
|
def test_symmetrical_uncertainty_continuous(self):
|
||||||
metric = Metrics()
|
metric = Metrics()
|
||||||
results_expected = [
|
results_expected = [
|
||||||
-0.08368315199022527,
|
0.3116626663552704,
|
||||||
-0.08539330663499867,
|
0.22524988105092494,
|
||||||
-0.026524185532893957,
|
0.24511182026415218,
|
||||||
-0.016238166071083728,
|
0.07114329389542708,
|
||||||
]
|
]
|
||||||
for expected, col in zip(results_expected, range(self.X_w.shape[1])):
|
for expected, col in zip(results_expected, range(self.X_w.shape[1])):
|
||||||
computed = metric.symmetrical_unc_continuous(
|
computed = metric.symmetrical_unc_continuous(
|
||||||
|
Reference in New Issue
Block a user