mirror of
https://github.com/Doctorado-ML/mufs.git
synced 2025-08-17 08:35:52 +00:00
Add max_features to selection
Add first approach to continuous variables
This commit is contained in:
@@ -9,11 +9,11 @@ class MFS_test(unittest.TestCase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
mdlp = MDLP(random_state=1)
|
||||
X, self.y_w = load_wine(return_X_y=True)
|
||||
self.X_w = mdlp.fit_transform(X, self.y_w).astype("int64")
|
||||
X, self.y_i = load_iris(return_X_y=True)
|
||||
self.X_wc, self.y_w = load_wine(return_X_y=True)
|
||||
self.X_w = mdlp.fit_transform(self.X_wc, self.y_w).astype("int64")
|
||||
self.X_ic, self.y_i = load_iris(return_X_y=True)
|
||||
mdlp = MDLP(random_state=1)
|
||||
self.X_i = mdlp.fit_transform(X, self.y_i).astype("int64")
|
||||
self.X_i = mdlp.fit_transform(self.X_ic, self.y_i).astype("int64")
|
||||
|
||||
def assertListAlmostEqual(self, list1, list2, tol=7):
|
||||
self.assertEqual(len(list1), len(list2))
|
||||
@@ -21,16 +21,16 @@ class MFS_test(unittest.TestCase):
|
||||
self.assertAlmostEqual(a, b, tol)
|
||||
|
||||
def test_initialize(self):
|
||||
mfs = MFS(max_features=100)
|
||||
mfs = MFS()
|
||||
mfs.fcbf(self.X_w, self.y_w, 0.05)
|
||||
mfs._initialize()
|
||||
mfs._initialize(self.X_w, self.y_w)
|
||||
self.assertIsNone(mfs.get_results())
|
||||
self.assertListEqual([], mfs.get_scores())
|
||||
self.assertDictEqual({}, mfs._su_features)
|
||||
self.assertIsNone(mfs._su_labels)
|
||||
|
||||
def test_csf_wine(self):
|
||||
mfs = MFS(max_features=100)
|
||||
mfs = MFS()
|
||||
expected = [6, 12, 9, 4, 10, 0]
|
||||
self.assertListAlmostEqual(
|
||||
expected, mfs.cfs(self.X_w, self.y_w).get_results()
|
||||
@@ -45,6 +45,23 @@ class MFS_test(unittest.TestCase):
|
||||
]
|
||||
self.assertListAlmostEqual(expected, mfs.get_scores())
|
||||
|
||||
def test_csf_wine_cont(self):
|
||||
mfs = MFS(discrete=False)
|
||||
expected = [6, 11, 9, 0, 12, 5]
|
||||
self.assertListAlmostEqual(
|
||||
expected, mfs.cfs(self.X_wc, self.y_w).get_results()
|
||||
)
|
||||
expected = [
|
||||
0.5218299405215557,
|
||||
0.602513857132804,
|
||||
0.4877384978817362,
|
||||
0.3743688234383051,
|
||||
0.28795671854246285,
|
||||
0.2309165735173175,
|
||||
]
|
||||
# self.assertListAlmostEqual(expected, mfs.get_scores())
|
||||
print(expected, mfs.get_scores())
|
||||
|
||||
def test_csf_max_features(self):
|
||||
mfs = MFS(max_features=3)
|
||||
expected = [6, 12, 9]
|
||||
@@ -59,7 +76,7 @@ class MFS_test(unittest.TestCase):
|
||||
self.assertListAlmostEqual(expected, mfs.get_scores())
|
||||
|
||||
def test_csf_iris(self):
|
||||
mfs = MFS(max_features=100)
|
||||
mfs = MFS()
|
||||
expected = [3, 2, 0, 1]
|
||||
computed = mfs.cfs(self.X_i, self.y_i).get_results()
|
||||
self.assertListAlmostEqual(expected, computed)
|
||||
@@ -72,7 +89,7 @@ class MFS_test(unittest.TestCase):
|
||||
self.assertListAlmostEqual(expected, mfs.get_scores())
|
||||
|
||||
def test_fcbf_wine(self):
|
||||
mfs = MFS(max_features=100)
|
||||
mfs = MFS()
|
||||
computed = mfs.fcbf(self.X_w, self.y_w, threshold=0.05).get_results()
|
||||
expected = [6, 9, 12, 0, 11, 4]
|
||||
self.assertListAlmostEqual(expected, computed)
|
||||
@@ -99,7 +116,7 @@ class MFS_test(unittest.TestCase):
|
||||
self.assertListAlmostEqual(expected, mfs.get_scores())
|
||||
|
||||
def test_fcbf_iris(self):
|
||||
mfs = MFS(max_features=100)
|
||||
mfs = MFS()
|
||||
computed = mfs.fcbf(self.X_i, self.y_i, threshold=0.05).get_results()
|
||||
expected = [3, 2]
|
||||
self.assertListAlmostEqual(expected, computed)
|
||||
@@ -107,7 +124,7 @@ class MFS_test(unittest.TestCase):
|
||||
self.assertListAlmostEqual(expected, mfs.get_scores())
|
||||
|
||||
def test_compute_su_labels(self):
|
||||
mfs = MFS(max_features=100)
|
||||
mfs = MFS()
|
||||
mfs.fcbf(self.X_i, self.y_i, threshold=0.05)
|
||||
expected = [0.0, 0.0, 0.810724587460511, 0.870521418179061]
|
||||
self.assertListAlmostEqual(expected, mfs._compute_su_labels().tolist())
|
||||
@@ -115,12 +132,12 @@ class MFS_test(unittest.TestCase):
|
||||
self.assertListAlmostEqual([1, 2, 3, 4], mfs._compute_su_labels())
|
||||
|
||||
def test_invalid_threshold(self):
|
||||
mfs = MFS(max_features=100)
|
||||
mfs = MFS()
|
||||
with self.assertRaises(ValueError):
|
||||
mfs.fcbf(self.X_i, self.y_i, threshold=1e-15)
|
||||
|
||||
def test_fcbf_exit_threshold(self):
|
||||
mfs = MFS(max_features=100)
|
||||
mfs = MFS()
|
||||
computed = mfs.fcbf(self.X_w, self.y_w, threshold=0.4).get_results()
|
||||
expected = [6, 9, 12]
|
||||
self.assertListAlmostEqual(expected, computed)
|
||||
|
@@ -1,5 +1,6 @@
|
||||
import unittest
|
||||
from sklearn.datasets import load_iris
|
||||
import numpy as np
|
||||
from sklearn.datasets import load_iris, load_wine
|
||||
from mdlp import MDLP
|
||||
from ..Selection import Metrics
|
||||
|
||||
@@ -8,12 +9,10 @@ class Metrics_test(unittest.TestCase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
mdlp = MDLP(random_state=1)
|
||||
X, self.y = load_iris(return_X_y=True)
|
||||
self.X = mdlp.fit_transform(X, self.y).astype("int64")
|
||||
self.m, self.n = self.X.shape
|
||||
|
||||
# @classmethod
|
||||
# def setup(cls):
|
||||
self.X_i_c, self.y_i = load_iris(return_X_y=True)
|
||||
self.X_i = mdlp.fit_transform(self.X_i_c, self.y_i).astype("int64")
|
||||
self.X_w_c, self.y_w = load_wine(return_X_y=True)
|
||||
self.X_w = mdlp.fit_transform(self.X_w_c, self.y_w).astype("int64")
|
||||
|
||||
def test_entropy(self):
|
||||
metric = Metrics()
|
||||
@@ -24,12 +23,51 @@ class Metrics_test(unittest.TestCase):
|
||||
([1, 1, 1, 5, 2, 2, 3, 3, 3], 4, 0.9455305560363263),
|
||||
([1, 1, 1, 2, 2, 3, 3, 3, 5], 4, 0.9455305560363263),
|
||||
([1, 1, 5], 2, 0.9182958340544896),
|
||||
(self.y, 3, 0.999999999),
|
||||
(self.y_i, 3, 0.999999999),
|
||||
]
|
||||
for dataset, base, entropy in datasets:
|
||||
computed = metric.entropy(dataset, base)
|
||||
self.assertAlmostEqual(entropy, computed)
|
||||
|
||||
def test_differential_entropy(self):
|
||||
metric = Metrics()
|
||||
datasets = [
|
||||
([0, 0, 0, 0, 1, 1, 1, 1], 6, 1.0026709900837547096),
|
||||
([0, 1, 0, 2, 1, 2], 5, 1.3552453009332424),
|
||||
([0, 0, 0, 0, 0, 0, 0, 2, 2, 2], 7, 1.7652626150881443),
|
||||
([1, 1, 1, 5, 2, 2, 3, 3, 3], 8, 1.9094631320594582),
|
||||
([1, 1, 1, 2, 2, 3, 3, 3, 5], 8, 1.9094631320594582),
|
||||
([1, 1, 5], 2, 2.5794415416798357),
|
||||
(self.X_i_c, 37, 3.06627326925228),
|
||||
(self.X_w_c, 37, 63.13827518897429),
|
||||
]
|
||||
for dataset, base, entropy in datasets:
|
||||
computed = metric.differential_entropy(
|
||||
np.array(dataset, dtype="float64"), base
|
||||
)
|
||||
self.assertAlmostEqual(entropy, computed, msg=str(dataset))
|
||||
expected = [
|
||||
1.6378708764142766,
|
||||
2.0291571802275037,
|
||||
0.8273865123744271,
|
||||
3.203935772642847,
|
||||
4.859193341386733,
|
||||
1.3707315434976266,
|
||||
1.8794952925706312,
|
||||
-0.2983180654207054,
|
||||
1.4521478934625076,
|
||||
2.834404839362728,
|
||||
0.4894081282811191,
|
||||
1.361210381692561,
|
||||
7.6373991502818175,
|
||||
]
|
||||
n_samples, n_features = self.X_w_c.shape
|
||||
for c, res_expected in zip(range(n_features), expected):
|
||||
computed = metric.differential_entropy(
|
||||
self.X_w_c[:, c], n_samples - 1
|
||||
)
|
||||
self.assertAlmostEqual(computed, res_expected)
|
||||
|
||||
def test_conditional_entropy(self):
|
||||
metric = Metrics()
|
||||
results_expected = [
|
||||
@@ -39,7 +77,7 @@ class Metrics_test(unittest.TestCase):
|
||||
0.13032469395094992,
|
||||
]
|
||||
for expected, col in zip(results_expected, range(self.n)):
|
||||
computed = metric.conditional_entropy(self.X[:, col], self.y, 3)
|
||||
computed = metric.conditional_entropy(self.X_i[:, col], self.y, 3)
|
||||
self.assertAlmostEqual(expected, computed)
|
||||
self.assertAlmostEqual(
|
||||
0.6309297535714573,
|
||||
@@ -62,7 +100,7 @@ class Metrics_test(unittest.TestCase):
|
||||
0.8696753060490499,
|
||||
]
|
||||
for expected, col in zip(results_expected, range(self.n)):
|
||||
computed = metric.information_gain(self.X[:, col], self.y, 3)
|
||||
computed = metric.information_gain(self.X_i[:, col], self.y, 3)
|
||||
self.assertAlmostEqual(expected, computed)
|
||||
# https://planetcalc.com/8419/
|
||||
# ?_d=FrDfFN2COAhqh9Pb5ycqy5CeKgIOxlfSjKgyyIR.Q5L0np-g-hw6yv8M1Q8_
|
||||
@@ -73,7 +111,7 @@ class Metrics_test(unittest.TestCase):
|
||||
1.378402748,
|
||||
]
|
||||
for expected, col in zip(results_expected, range(self.n)):
|
||||
computed = metric.information_gain(self.X[:, col], self.y, 2)
|
||||
computed = metric.information_gain(self.X_i[:, col], self.y, 2)
|
||||
self.assertAlmostEqual(expected, computed)
|
||||
|
||||
def test_symmetrical_uncertainty(self):
|
||||
@@ -85,5 +123,20 @@ class Metrics_test(unittest.TestCase):
|
||||
0.870521418179061,
|
||||
]
|
||||
for expected, col in zip(results_expected, range(self.n)):
|
||||
computed = metric.symmetrical_uncertainty(self.X[:, col], self.y)
|
||||
computed = metric.symmetrical_uncertainty(self.X_i[:, col], self.y)
|
||||
self.assertAlmostEqual(expected, computed)
|
||||
|
||||
def test_symmetrical_uncertainty_continuous(self):
|
||||
metric = Metrics()
|
||||
results_expected = [
|
||||
0.33296547388990266,
|
||||
0.19068147573570668,
|
||||
0.810724587460511,
|
||||
0.870521418179061,
|
||||
]
|
||||
for expected, col in zip(results_expected, range(self.n)):
|
||||
computed = metric.symmetrical_unc_continuous(
|
||||
self.X_i[:, col], self.y
|
||||
)
|
||||
print(computed)
|
||||
# self.assertAlmostEqual(expected, computed)
|
||||
|
Reference in New Issue
Block a user