odte/odte/tests/Odte_tests.py
2020-07-06 00:12:46 +02:00

155 lines
5.6 KiB
Python

import unittest
import numpy as np
import warnings
from sklearn.exceptions import ConvergenceWarning
from odte import Odte
from .utils import load_dataset
class Odte_test(unittest.TestCase):
def __init__(self, *args, **kwargs):
self._random_state = 1
super().__init__(*args, **kwargs)
def test_max_samples_bogus(self):
values = [0, 3000, 1.1, 0.0, "duck"]
for max_samples in values:
with self.assertRaises(ValueError):
tclf = Odte(max_samples=max_samples)
tclf.fit(*load_dataset(self._random_state))
def test_get_bootstrap_nsamples(self):
expected_values = [(1, 1), (1500, 1500), (0.1, 150)]
for value, expected in expected_values:
tclf = Odte(max_samples=value)
computed = tclf._get_bootstrap_n_samples(1500)
self.assertEqual(expected, computed)
def test_initialize_sample_weight(self):
m = 5
ones = np.ones(m,)
weights = np.random.rand(m,)
expected_values = [(None, ones), (weights, weights)]
for value, expected in expected_values:
tclf = Odte()
computed = tclf._initialize_sample_weight(value, m)
self.assertListEqual(expected.tolist(), computed.tolist())
def test_initialize_max_feature(self):
expected_values = [
[0, 5, 6, 15],
[0, 2, 3, 9, 11, 14],
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15],
[0, 5, 6, 15],
[0, 5, 6, 15],
[0, 5, 6, 15],
]
X, y = load_dataset(
random_state=self._random_state, n_features=16, n_samples=10
)
for max_features in [4, 0.4, 1.0, None, "auto", "sqrt", "log2"]:
tclf = Odte(
random_state=self._random_state, max_features=max_features
)
tclf.fit(X, y)
computed = tclf._get_random_subspace(X, y)
expected = expected_values.pop(0)
self.assertListEqual(expected, list(computed))
def test_initialize_random(self):
expected = [37, 235, 908]
tclf = Odte(random_state=self._random_state)
box = tclf._initialize_random()
computed = box.randint(0, 1000, 3)
self.assertListEqual(expected, computed.tolist())
# test None
tclf = Odte()
box = tclf._initialize_random()
computed = box.randint(101, 1000, 3)
for value in computed.tolist():
self.assertGreaterEqual(value, 101)
self.assertLessEqual(value, 1000)
def test_bogus_max_features(self):
values = ["duck", -0.1, 0.0]
for max_features in values:
with self.assertRaises(ValueError):
tclf = Odte(max_features=max_features)
tclf.fit(*load_dataset(self._random_state))
def test_bogus_n_estimator(self):
values = [0, -1, 2]
for n_estimators in values:
with self.assertRaises(ValueError):
tclf = Odte(n_estimators=n_estimators)
tclf.fit(*load_dataset(self._random_state))
def test_simple_predict(self):
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
X, y = [[1, 2], [5, 6], [9, 10], [16, 17]], [0, 1, 1, 2]
expected = [0, 1, 1, 1]
tclf = Odte(random_state=self._random_state, n_estimators=10,)
tclf.set_params(
**dict(
base_estimator__kernel="rbf",
base_estimator__random_state=self._random_state,
)
)
computed = tclf.fit(X, y).predict(X)
self.assertListEqual(expected, computed.tolist())
def test_predict(self):
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
X, y = load_dataset(self._random_state)
expected = y
tclf = Odte(
random_state=self._random_state, max_features=1.0, max_samples=0.1,
)
tclf.set_params(**dict(base_estimator__kernel="linear",))
computed = tclf.fit(X, y).predict(X)
self.assertListEqual(expected[:27].tolist(), computed[:27].tolist())
def test_score(self):
X, y = load_dataset(self._random_state)
expected = 0.9526666666666667
tclf = Odte(
random_state=self._random_state,
max_features=None,
n_estimators=10,
)
computed = tclf.fit(X, y).score(X, y)
self.assertAlmostEqual(expected, computed)
def test_score_splitter_max_features(self):
X, y = load_dataset(self._random_state, n_features=12, n_samples=150)
results = [
1.0,
1.0,
0.9933333333333333,
0.9933333333333333,
]
for max_features in ["auto", None]:
for splitter in ["best", "random"]:
tclf = Odte(
random_state=self._random_state,
max_features=max_features,
n_estimators=10,
)
tclf.set_params(**dict(base_estimator__splitter=splitter,))
expected = results.pop(0)
computed = tclf.fit(X, y).score(X, y)
self.assertAlmostEqual(expected, computed)
@staticmethod
def test_is_a_sklearn_classifier():
warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=RuntimeWarning)
from sklearn.utils.estimator_checks import check_estimator
check_estimator(Odte())