mirror of
https://github.com/Doctorado-ML/STree.git
synced 2025-08-15 15:36:00 +00:00
Fix problem in _min_distance
Remove grapher (moved to another repo)
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -1,5 +1,4 @@
|
|||||||
numpy
|
numpy
|
||||||
scikit-learn
|
scikit-learn
|
||||||
pandas
|
pandas
|
||||||
matplotlib
|
|
||||||
ipympl
|
ipympl
|
2
setup.py
2
setup.py
@@ -30,7 +30,7 @@ setuptools.setup(
|
|||||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||||
"Intended Audience :: Science/Research",
|
"Intended Audience :: Science/Research",
|
||||||
],
|
],
|
||||||
install_requires=["scikit-learn>=0.23.0", "numpy", "matplotlib", "ipympl"],
|
install_requires=["scikit-learn>=0.23.0", "numpy", "ipympl"],
|
||||||
test_suite="stree.tests",
|
test_suite="stree.tests",
|
||||||
zip_safe=False,
|
zip_safe=False,
|
||||||
)
|
)
|
||||||
|
@@ -96,9 +96,6 @@ class Siterator:
|
|||||||
self._stack = []
|
self._stack = []
|
||||||
self._push(tree)
|
self._push(tree)
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return self
|
|
||||||
|
|
||||||
def _push(self, node: Snode):
|
def _push(self, node: Snode):
|
||||||
if node is not None:
|
if node is not None:
|
||||||
self._stack.append(node)
|
self._stack.append(node)
|
||||||
@@ -184,7 +181,9 @@ class Stree(BaseEstimator, ClassifierMixin):
|
|||||||
def _min_distance(self, data: np.array, _) -> np.array:
|
def _min_distance(self, data: np.array, _) -> np.array:
|
||||||
# chooses the lowest distance of every sample
|
# chooses the lowest distance of every sample
|
||||||
indices = np.argmin(np.abs(data), axis=1)
|
indices = np.argmin(np.abs(data), axis=1)
|
||||||
return np.take(data, indices)
|
return np.array(
|
||||||
|
[data[x, y] for x, y in zip(range(len(data[:, 0])), indices)]
|
||||||
|
)
|
||||||
|
|
||||||
def _max_samples(self, data: np.array, y: np.array) -> np.array:
|
def _max_samples(self, data: np.array, y: np.array) -> np.array:
|
||||||
# select the class with max number of samples
|
# select the class with max number of samples
|
||||||
|
@@ -1,205 +0,0 @@
|
|||||||
"""
|
|
||||||
__author__ = "Ricardo Montañana Gómez"
|
|
||||||
__copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
|
|
||||||
__license__ = "MIT"
|
|
||||||
__version__ = "0.9"
|
|
||||||
Plot 3D views of nodes in Stree
|
|
||||||
"""
|
|
||||||
|
|
||||||
import os
|
|
||||||
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import numpy as np
|
|
||||||
from sklearn.decomposition import PCA
|
|
||||||
from mpl_toolkits.mplot3d import Axes3D
|
|
||||||
|
|
||||||
from .Strees import Stree, Snode, Siterator
|
|
||||||
|
|
||||||
|
|
||||||
class Snode_graph(Snode):
|
|
||||||
def __init__(self, node: Stree):
|
|
||||||
self._plot_size = (8, 8)
|
|
||||||
self._xlimits = (None, None)
|
|
||||||
self._ylimits = (None, None)
|
|
||||||
self._zlimits = (None, None)
|
|
||||||
n = Snode.copy(node)
|
|
||||||
super().__init__(n._clf, n._X, n._y, n._title)
|
|
||||||
|
|
||||||
def set_plot_size(self, size: tuple):
|
|
||||||
self._plot_size = size
|
|
||||||
|
|
||||||
def get_plot_size(self) -> tuple:
|
|
||||||
return self._plot_size
|
|
||||||
|
|
||||||
def _is_pure(self) -> bool:
|
|
||||||
"""is considered pure a leaf node with one label
|
|
||||||
"""
|
|
||||||
if self.is_leaf():
|
|
||||||
return self._belief == 1.0
|
|
||||||
return False
|
|
||||||
|
|
||||||
def set_axis_limits(self, limits: tuple):
|
|
||||||
self._xlimits, self._ylimits, self._zlimits = limits
|
|
||||||
|
|
||||||
def get_axis_limits(self) -> tuple:
|
|
||||||
return self._xlimits, self._ylimits, self._zlimits
|
|
||||||
|
|
||||||
def _set_graphics_axis(self, ax: Axes3D):
|
|
||||||
ax.set_xlim(self._xlimits)
|
|
||||||
ax.set_ylim(self._ylimits)
|
|
||||||
ax.set_zlim(self._zlimits)
|
|
||||||
|
|
||||||
def save_hyperplane(
|
|
||||||
self, save_folder: str = "./", save_prefix: str = "", save_seq: int = 1
|
|
||||||
):
|
|
||||||
_, fig = self.plot_hyperplane()
|
|
||||||
name = os.path.join(save_folder, f"{save_prefix}STnode{save_seq}.png")
|
|
||||||
fig.savefig(name, bbox_inches="tight")
|
|
||||||
plt.close(fig)
|
|
||||||
|
|
||||||
def _get_cmap(self):
|
|
||||||
cmap = "jet"
|
|
||||||
if self._is_pure() and self._class == 1:
|
|
||||||
cmap = "jet_r"
|
|
||||||
return cmap
|
|
||||||
|
|
||||||
def _graph_title(self):
|
|
||||||
n_class, card = np.unique(self._y, return_counts=True)
|
|
||||||
return f"{self._title} {n_class} {card}"
|
|
||||||
|
|
||||||
def plot_hyperplane(self, plot_distribution: bool = True):
|
|
||||||
fig = plt.figure(figsize=self._plot_size)
|
|
||||||
ax = fig.add_subplot(1, 1, 1, projection="3d")
|
|
||||||
if not self._is_pure():
|
|
||||||
# Can't plot hyperplane of leaves with one label because it hasn't
|
|
||||||
# classiffier
|
|
||||||
# get the splitting hyperplane
|
|
||||||
def hyperplane(x, y):
|
|
||||||
return (
|
|
||||||
-self._clf.intercept_
|
|
||||||
- self._clf.coef_[0][0] * x
|
|
||||||
- self._clf.coef_[0][1] * y
|
|
||||||
) / self._clf.coef_[0][2]
|
|
||||||
|
|
||||||
tmpx = np.linspace(self._X[:, 0].min(), self._X[:, 0].max())
|
|
||||||
tmpy = np.linspace(self._X[:, 1].min(), self._X[:, 1].max())
|
|
||||||
xx, yy = np.meshgrid(tmpx, tmpy)
|
|
||||||
ax.plot_surface(
|
|
||||||
xx,
|
|
||||||
yy,
|
|
||||||
hyperplane(xx, yy),
|
|
||||||
alpha=0.5,
|
|
||||||
antialiased=True,
|
|
||||||
rstride=1,
|
|
||||||
cstride=1,
|
|
||||||
cmap="seismic",
|
|
||||||
)
|
|
||||||
self._set_graphics_axis(ax)
|
|
||||||
if plot_distribution:
|
|
||||||
self.plot_distribution(ax)
|
|
||||||
else:
|
|
||||||
plt.title(self._graph_title())
|
|
||||||
plt.show()
|
|
||||||
return ax, fig
|
|
||||||
|
|
||||||
def plot_distribution(self, ax: Axes3D = None):
|
|
||||||
if ax is None:
|
|
||||||
fig = plt.figure(figsize=self._plot_size)
|
|
||||||
ax = fig.add_subplot(1, 1, 1, projection="3d")
|
|
||||||
plt.title(self._graph_title())
|
|
||||||
cmap = self._get_cmap()
|
|
||||||
ax.scatter(
|
|
||||||
self._X[:, 0], self._X[:, 1], self._X[:, 2], c=self._y, cmap=cmap
|
|
||||||
)
|
|
||||||
ax.set_xlabel("X0")
|
|
||||||
ax.set_ylabel("X1")
|
|
||||||
ax.set_zlabel("X2")
|
|
||||||
plt.show()
|
|
||||||
|
|
||||||
|
|
||||||
class Stree_grapher(Stree):
|
|
||||||
"""Build 3d graphs of any dataset, if it's more than 3 features PCA shall
|
|
||||||
make its magic
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, params: dict):
|
|
||||||
self._plot_size = (8, 8)
|
|
||||||
self._tree_gr = None
|
|
||||||
# make Snode store X's
|
|
||||||
os.environ["TESTING"] = "1"
|
|
||||||
self._fitted = False
|
|
||||||
self._pca = None
|
|
||||||
super().__init__(**params)
|
|
||||||
|
|
||||||
def __del__(self):
|
|
||||||
try:
|
|
||||||
os.environ.pop("TESTING")
|
|
||||||
except KeyError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
def _copy_tree(self, node: Snode) -> Snode_graph:
|
|
||||||
mirror = Snode_graph(node)
|
|
||||||
# clone node
|
|
||||||
mirror._class = node._class
|
|
||||||
mirror._belief = node._belief
|
|
||||||
if node.get_down() is not None:
|
|
||||||
mirror.set_down(self._copy_tree(node.get_down()))
|
|
||||||
if node.get_up() is not None:
|
|
||||||
mirror.set_up(self._copy_tree(node.get_up()))
|
|
||||||
return mirror
|
|
||||||
|
|
||||||
def fit(
|
|
||||||
self, X: np.array, y: np.array, sample_weight: np.array = None
|
|
||||||
) -> "Stree_grapher":
|
|
||||||
"""Fit the Stree and copy the tree in a Snode_graph tree
|
|
||||||
|
|
||||||
:param X: Dataset
|
|
||||||
:type X: np.array
|
|
||||||
:param y: Labels
|
|
||||||
:type y: np.array
|
|
||||||
:return: Stree model
|
|
||||||
:rtype: Stree
|
|
||||||
"""
|
|
||||||
if X.shape[1] != 3:
|
|
||||||
self._pca = PCA(n_components=3)
|
|
||||||
X = self._pca.fit_transform(X)
|
|
||||||
super().fit(X, y, sample_weight=sample_weight)
|
|
||||||
self._tree_gr = self._copy_tree(self.tree_)
|
|
||||||
self._fitted = True
|
|
||||||
return self
|
|
||||||
|
|
||||||
def score(self, X: np.array, y: np.array) -> float:
|
|
||||||
self._check_fitted()
|
|
||||||
if X.shape[1] != 3:
|
|
||||||
X = self._pca.transform(X)
|
|
||||||
return super().score(X, y)
|
|
||||||
|
|
||||||
def _check_fitted(self):
|
|
||||||
if not self._fitted:
|
|
||||||
raise Exception("Have to fit the grapher first!")
|
|
||||||
|
|
||||||
def save_all(self, save_folder: str = "./", save_prefix: str = ""):
|
|
||||||
"""Save all the node plots in png format, each with a sequence number
|
|
||||||
|
|
||||||
:param save_folder: folder where the plots are saved, defaults to './'
|
|
||||||
:type save_folder: str, optional
|
|
||||||
"""
|
|
||||||
self._check_fitted()
|
|
||||||
if not os.path.isdir(save_folder):
|
|
||||||
os.mkdir(save_folder)
|
|
||||||
seq = 1
|
|
||||||
for node in self:
|
|
||||||
node.save_hyperplane(
|
|
||||||
save_folder=save_folder, save_prefix=save_prefix, save_seq=seq
|
|
||||||
)
|
|
||||||
seq += 1
|
|
||||||
|
|
||||||
def plot_all(self):
|
|
||||||
"""Plots all the nodes
|
|
||||||
"""
|
|
||||||
self._check_fitted()
|
|
||||||
for node in self:
|
|
||||||
node.plot_hyperplane()
|
|
||||||
|
|
||||||
def __iter__(self):
|
|
||||||
return Siterator(self._tree_gr)
|
|
@@ -1,4 +1,3 @@
|
|||||||
from .Strees import Stree, Snode, Siterator
|
from .Strees import Stree, Snode, Siterator
|
||||||
from .Strees_grapher import Stree_grapher, Snode_graph
|
|
||||||
|
|
||||||
__all__ = ["Stree", "Snode", "Siterator", "Stree_grapher", "Snode_graph"]
|
__all__ = ["Stree", "Snode", "Siterator"]
|
||||||
|
91
stree/tests/Snode_test.py
Normal file
91
stree/tests/Snode_test.py
Normal file
@@ -0,0 +1,91 @@
|
|||||||
|
import os
|
||||||
|
import unittest
|
||||||
|
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
from stree import Stree, Snode
|
||||||
|
from .utils import get_dataset
|
||||||
|
|
||||||
|
|
||||||
|
class Snode_test(unittest.TestCase):
|
||||||
|
def __init__(self, *args, **kwargs):
|
||||||
|
self._random_state = 1
|
||||||
|
self._clf = Stree(random_state=self._random_state)
|
||||||
|
self._clf.fit(*get_dataset(self._random_state))
|
||||||
|
super().__init__(*args, **kwargs)
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def setUp(cls):
|
||||||
|
os.environ["TESTING"] = "1"
|
||||||
|
|
||||||
|
def test_attributes_in_leaves(self):
|
||||||
|
"""Check if the attributes in leaves have correct values so they form a
|
||||||
|
predictor
|
||||||
|
"""
|
||||||
|
|
||||||
|
def check_leave(node: Snode):
|
||||||
|
if not node.is_leaf():
|
||||||
|
check_leave(node.get_down())
|
||||||
|
check_leave(node.get_up())
|
||||||
|
return
|
||||||
|
# Check Belief in leave
|
||||||
|
classes, card = np.unique(node._y, return_counts=True)
|
||||||
|
max_card = max(card)
|
||||||
|
min_card = min(card)
|
||||||
|
if len(classes) > 1:
|
||||||
|
try:
|
||||||
|
belief = max_card / (max_card + min_card)
|
||||||
|
except ZeroDivisionError:
|
||||||
|
belief = 0.0
|
||||||
|
else:
|
||||||
|
belief = 1
|
||||||
|
self.assertEqual(belief, node._belief)
|
||||||
|
# Check Class
|
||||||
|
class_computed = classes[card == max_card]
|
||||||
|
self.assertEqual(class_computed, node._class)
|
||||||
|
|
||||||
|
check_leave(self._clf.tree_)
|
||||||
|
|
||||||
|
def test_nodes_coefs(self):
|
||||||
|
"""Check if the nodes of the tree have the right attributes filled
|
||||||
|
"""
|
||||||
|
|
||||||
|
def run_tree(node: Snode):
|
||||||
|
if node._belief < 1:
|
||||||
|
# only exclude pure leaves
|
||||||
|
self.assertIsNotNone(node._clf)
|
||||||
|
self.assertIsNotNone(node._clf.coef_)
|
||||||
|
if node.is_leaf():
|
||||||
|
return
|
||||||
|
run_tree(node.get_down())
|
||||||
|
run_tree(node.get_up())
|
||||||
|
|
||||||
|
run_tree(self._clf.tree_)
|
||||||
|
|
||||||
|
def test_make_predictor_on_leaf(self):
|
||||||
|
test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], "test")
|
||||||
|
test.make_predictor()
|
||||||
|
self.assertEqual(1, test._class)
|
||||||
|
self.assertEqual(0.75, test._belief)
|
||||||
|
|
||||||
|
def test_make_predictor_on_not_leaf(self):
|
||||||
|
test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], "test")
|
||||||
|
test.set_up(Snode(None, [1], [1], "another_test"))
|
||||||
|
test.make_predictor()
|
||||||
|
self.assertIsNone(test._class)
|
||||||
|
self.assertEqual(0, test._belief)
|
||||||
|
|
||||||
|
def test_make_predictor_on_leaf_bogus_data(self):
|
||||||
|
test = Snode(None, [1, 2, 3, 4], [], "test")
|
||||||
|
test.make_predictor()
|
||||||
|
self.assertIsNone(test._class)
|
||||||
|
|
||||||
|
def test_copy_node(self):
|
||||||
|
px = [1, 2, 3, 4]
|
||||||
|
py = [1]
|
||||||
|
test = Snode(Stree(), px, py, "test")
|
||||||
|
computed = Snode.copy(test)
|
||||||
|
self.assertListEqual(computed._X, px)
|
||||||
|
self.assertListEqual(computed._y, py)
|
||||||
|
self.assertEqual("test", computed._title)
|
||||||
|
self.assertIsInstance(computed._clf, Stree)
|
@@ -2,25 +2,10 @@ import os
|
|||||||
import unittest
|
import unittest
|
||||||
|
|
||||||
import numpy as np
|
import numpy as np
|
||||||
from sklearn.datasets import make_classification, load_iris
|
from sklearn.datasets import load_iris
|
||||||
|
|
||||||
from stree import Stree, Snode
|
from stree import Stree, Snode
|
||||||
|
from .utils import get_dataset
|
||||||
|
|
||||||
def get_dataset(random_state=0, n_classes=2):
|
|
||||||
X, y = make_classification(
|
|
||||||
n_samples=1500,
|
|
||||||
n_features=3,
|
|
||||||
n_informative=3,
|
|
||||||
n_redundant=0,
|
|
||||||
n_repeated=0,
|
|
||||||
n_classes=n_classes,
|
|
||||||
n_clusters_per_class=2,
|
|
||||||
class_sep=1.5,
|
|
||||||
flip_y=0,
|
|
||||||
random_state=random_state,
|
|
||||||
)
|
|
||||||
return X, y
|
|
||||||
|
|
||||||
|
|
||||||
class Stree_test(unittest.TestCase):
|
class Stree_test(unittest.TestCase):
|
||||||
@@ -280,76 +265,33 @@ class Stree_test(unittest.TestCase):
|
|||||||
outcome = outcomes[name][f"{criteria} {kernel}"]
|
outcome = outcomes[name][f"{criteria} {kernel}"]
|
||||||
self.assertAlmostEqual(outcome, clf.score(px, py))
|
self.assertAlmostEqual(outcome, clf.score(px, py))
|
||||||
|
|
||||||
|
def test_min_distance(self):
|
||||||
|
clf = Stree()
|
||||||
|
data = np.array(
|
||||||
|
[
|
||||||
|
[-0.1, 0.2, -0.3],
|
||||||
|
[0.7, 0.01, -0.1],
|
||||||
|
[0.7, -0.9, 0.5],
|
||||||
|
[0.1, 0.2, 0.3],
|
||||||
|
]
|
||||||
|
)
|
||||||
|
expected = np.array([-0.1, 0.01, 0.5, 0.1])
|
||||||
|
computed = clf._min_distance(data, None)
|
||||||
|
self.assertEqual((4,), computed.shape)
|
||||||
|
self.assertListEqual(expected.tolist(), computed.tolist())
|
||||||
|
|
||||||
class Snode_test(unittest.TestCase):
|
def test_max_samples(self):
|
||||||
def __init__(self, *args, **kwargs):
|
clf = Stree()
|
||||||
self._random_state = 1
|
data = np.array(
|
||||||
self._clf = Stree(random_state=self._random_state)
|
[
|
||||||
self._clf.fit(*get_dataset(self._random_state))
|
[-0.1, 0.2, -0.3],
|
||||||
super().__init__(*args, **kwargs)
|
[0.7, 0.01, -0.1],
|
||||||
|
[0.7, -0.9, 0.5],
|
||||||
@classmethod
|
[0.1, 0.2, 0.3],
|
||||||
def setUp(cls):
|
]
|
||||||
os.environ["TESTING"] = "1"
|
)
|
||||||
|
expected = np.array([0.2, 0.01, -0.9, 0.2])
|
||||||
def test_attributes_in_leaves(self):
|
y = [1, 2, 1, 0]
|
||||||
"""Check if the attributes in leaves have correct values so they form a
|
computed = clf._max_samples(data, y)
|
||||||
predictor
|
self.assertEqual((4,), computed.shape)
|
||||||
"""
|
self.assertListEqual(expected.tolist(), computed.tolist())
|
||||||
|
|
||||||
def check_leave(node: Snode):
|
|
||||||
if not node.is_leaf():
|
|
||||||
check_leave(node.get_down())
|
|
||||||
check_leave(node.get_up())
|
|
||||||
return
|
|
||||||
# Check Belief in leave
|
|
||||||
classes, card = np.unique(node._y, return_counts=True)
|
|
||||||
max_card = max(card)
|
|
||||||
min_card = min(card)
|
|
||||||
if len(classes) > 1:
|
|
||||||
try:
|
|
||||||
belief = max_card / (max_card + min_card)
|
|
||||||
except ZeroDivisionError:
|
|
||||||
belief = 0.0
|
|
||||||
else:
|
|
||||||
belief = 1
|
|
||||||
self.assertEqual(belief, node._belief)
|
|
||||||
# Check Class
|
|
||||||
class_computed = classes[card == max_card]
|
|
||||||
self.assertEqual(class_computed, node._class)
|
|
||||||
|
|
||||||
check_leave(self._clf.tree_)
|
|
||||||
|
|
||||||
def test_nodes_coefs(self):
|
|
||||||
"""Check if the nodes of the tree have the right attributes filled
|
|
||||||
"""
|
|
||||||
|
|
||||||
def run_tree(node: Snode):
|
|
||||||
if node._belief < 1:
|
|
||||||
# only exclude pure leaves
|
|
||||||
self.assertIsNotNone(node._clf)
|
|
||||||
self.assertIsNotNone(node._clf.coef_)
|
|
||||||
if node.is_leaf():
|
|
||||||
return
|
|
||||||
run_tree(node.get_down())
|
|
||||||
run_tree(node.get_up())
|
|
||||||
|
|
||||||
run_tree(self._clf.tree_)
|
|
||||||
|
|
||||||
def test_make_predictor_on_leaf(self):
|
|
||||||
test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], "test")
|
|
||||||
test.make_predictor()
|
|
||||||
self.assertEqual(1, test._class)
|
|
||||||
self.assertEqual(0.75, test._belief)
|
|
||||||
|
|
||||||
def test_make_predictor_on_not_leaf(self):
|
|
||||||
test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], "test")
|
|
||||||
test.set_up(Snode(None, [1], [1], "another_test"))
|
|
||||||
test.make_predictor()
|
|
||||||
self.assertIsNone(test._class)
|
|
||||||
self.assertEqual(0, test._belief)
|
|
||||||
|
|
||||||
def test_make_predictor_on_leaf_bogus_data(self):
|
|
||||||
test = Snode(None, [1, 2, 3, 4], [], "test")
|
|
||||||
test.make_predictor()
|
|
||||||
self.assertIsNone(test._class)
|
|
@@ -1,226 +0,0 @@
|
|||||||
import os
|
|
||||||
import imghdr
|
|
||||||
import unittest
|
|
||||||
|
|
||||||
import numpy as np
|
|
||||||
import matplotlib
|
|
||||||
import matplotlib.pyplot as plt
|
|
||||||
import warnings
|
|
||||||
from sklearn.datasets import make_classification
|
|
||||||
|
|
||||||
from stree import Stree_grapher, Snode_graph, Snode
|
|
||||||
|
|
||||||
|
|
||||||
def get_dataset(random_state=0, n_features=3):
|
|
||||||
X, y = make_classification(
|
|
||||||
n_samples=1500,
|
|
||||||
n_features=n_features,
|
|
||||||
n_informative=3,
|
|
||||||
n_redundant=0,
|
|
||||||
n_repeated=0,
|
|
||||||
n_classes=2,
|
|
||||||
n_clusters_per_class=2,
|
|
||||||
class_sep=1.5,
|
|
||||||
flip_y=0,
|
|
||||||
weights=[0.5, 0.5],
|
|
||||||
random_state=random_state,
|
|
||||||
)
|
|
||||||
return X, y
|
|
||||||
|
|
||||||
|
|
||||||
class Stree_grapher_test(unittest.TestCase):
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
self._random_state = 1
|
|
||||||
self._clf = Stree_grapher(dict(random_state=self._random_state))
|
|
||||||
self._clf.fit(*get_dataset(self._random_state, n_features=4))
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def setUp(cls):
|
|
||||||
os.environ["TESTING"] = "1"
|
|
||||||
|
|
||||||
def test_iterator(self):
|
|
||||||
"""Check preorder iterator
|
|
||||||
"""
|
|
||||||
expected = [
|
|
||||||
"root",
|
|
||||||
"root - Down",
|
|
||||||
"root - Down - Down, <cgaf> - Leaf class=1 belief= 0.976023 counts"
|
|
||||||
"=(array([0, 1]), array([ 17, 692]))",
|
|
||||||
"root - Down - Up",
|
|
||||||
"root - Down - Up - Down, <cgaf> - Leaf class=0 belief= 0.500000 "
|
|
||||||
"counts=(array([0, 1]), array([1, 1]))",
|
|
||||||
"root - Down - Up - Up, <cgaf> - Leaf class=0 belief= 0.888889 "
|
|
||||||
"counts=(array([0, 1]), array([8, 1]))",
|
|
||||||
"root - Up, <cgaf> - Leaf class=0 belief= 0.928205 counts=(array("
|
|
||||||
"[0, 1]), array([724, 56]))",
|
|
||||||
]
|
|
||||||
computed = []
|
|
||||||
for node in self._clf:
|
|
||||||
computed.append(str(node))
|
|
||||||
self.assertListEqual(expected, computed)
|
|
||||||
|
|
||||||
def test_score(self):
|
|
||||||
X, y = get_dataset(self._random_state)
|
|
||||||
accuracy_score = self._clf.score(X, y)
|
|
||||||
yp = self._clf.predict(X)
|
|
||||||
accuracy_computed = np.mean(yp == y)
|
|
||||||
self.assertEqual(accuracy_score, accuracy_computed)
|
|
||||||
self.assertGreater(accuracy_score, 0.86)
|
|
||||||
|
|
||||||
def test_score_4dims(self):
|
|
||||||
X, y = get_dataset(self._random_state, n_features=4)
|
|
||||||
accuracy_score = self._clf.score(X, y)
|
|
||||||
self.assertEqual(accuracy_score, 0.95)
|
|
||||||
|
|
||||||
def test_save_all(self):
|
|
||||||
folder_name = os.path.join(os.sep, "tmp", "stree")
|
|
||||||
if os.path.isdir(folder_name):
|
|
||||||
os.rmdir(folder_name)
|
|
||||||
file_names = [
|
|
||||||
os.path.join(folder_name, f"STnode{i}.png") for i in range(1, 8)
|
|
||||||
]
|
|
||||||
with warnings.catch_warnings():
|
|
||||||
warnings.simplefilter("ignore")
|
|
||||||
matplotlib.use("Agg")
|
|
||||||
self._clf.save_all(save_folder=folder_name)
|
|
||||||
for file_name in file_names:
|
|
||||||
self.assertTrue(os.path.exists(file_name))
|
|
||||||
self.assertEqual("png", imghdr.what(file_name))
|
|
||||||
os.remove(file_name)
|
|
||||||
os.rmdir(folder_name)
|
|
||||||
|
|
||||||
def test_plot_all(self):
|
|
||||||
with warnings.catch_warnings():
|
|
||||||
warnings.simplefilter("ignore")
|
|
||||||
matplotlib.use("Agg")
|
|
||||||
num_figures_before = plt.gcf().number
|
|
||||||
self._clf.plot_all()
|
|
||||||
num_figures_after = plt.gcf().number
|
|
||||||
self.assertEqual(7, num_figures_after - num_figures_before)
|
|
||||||
|
|
||||||
|
|
||||||
class Snode_graph_test(unittest.TestCase):
|
|
||||||
def __init__(self, *args, **kwargs):
|
|
||||||
self._random_state = 1
|
|
||||||
self._clf = Stree_grapher(dict(random_state=self._random_state))
|
|
||||||
self._clf.fit(*get_dataset(self._random_state))
|
|
||||||
super().__init__(*args, **kwargs)
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def setUp(cls):
|
|
||||||
os.environ["TESTING"] = "1"
|
|
||||||
|
|
||||||
def test_plot_size(self):
|
|
||||||
default = self._clf._tree_gr.get_plot_size()
|
|
||||||
expected = (17, 3)
|
|
||||||
self._clf._tree_gr.set_plot_size(expected)
|
|
||||||
self.assertEqual(expected, self._clf._tree_gr.get_plot_size())
|
|
||||||
self._clf._tree_gr.set_plot_size(default)
|
|
||||||
self.assertEqual(default, self._clf._tree_gr.get_plot_size())
|
|
||||||
|
|
||||||
def test_attributes_in_leaves_graph(self):
|
|
||||||
"""Check if the attributes in leaves have correct values so they form a
|
|
||||||
predictor
|
|
||||||
"""
|
|
||||||
|
|
||||||
def check_leave(node: Snode_graph):
|
|
||||||
if not node.is_leaf():
|
|
||||||
check_leave(node.get_down())
|
|
||||||
check_leave(node.get_up())
|
|
||||||
return
|
|
||||||
# Check Belief in leave
|
|
||||||
classes, card = np.unique(node._y, return_counts=True)
|
|
||||||
max_card = max(card)
|
|
||||||
min_card = min(card)
|
|
||||||
if len(classes) > 1:
|
|
||||||
try:
|
|
||||||
belief = max_card / (max_card + min_card)
|
|
||||||
except ZeroDivisionError:
|
|
||||||
belief = 0.0
|
|
||||||
else:
|
|
||||||
belief = 1
|
|
||||||
self.assertEqual(belief, node._belief)
|
|
||||||
# Check Class
|
|
||||||
class_computed = classes[card == max_card]
|
|
||||||
self.assertEqual(class_computed, node._class)
|
|
||||||
|
|
||||||
check_leave(self._clf._tree_gr)
|
|
||||||
|
|
||||||
def test_nodes_graph_coefs(self):
|
|
||||||
"""Check if the nodes of the tree have the right attributes filled
|
|
||||||
"""
|
|
||||||
|
|
||||||
def run_tree(node: Snode_graph):
|
|
||||||
if node._belief < 1:
|
|
||||||
# only exclude pure leaves
|
|
||||||
self.assertIsNotNone(node._clf)
|
|
||||||
self.assertIsNotNone(node._clf.coef_)
|
|
||||||
if node.is_leaf():
|
|
||||||
return
|
|
||||||
run_tree(node.get_down())
|
|
||||||
run_tree(node.get_up())
|
|
||||||
|
|
||||||
run_tree(self._clf._tree_gr)
|
|
||||||
|
|
||||||
def test_save_hyperplane(self):
|
|
||||||
folder_name = "/tmp/"
|
|
||||||
file_name = os.path.join(folder_name, "STnode1.png")
|
|
||||||
with warnings.catch_warnings():
|
|
||||||
warnings.simplefilter("ignore")
|
|
||||||
matplotlib.use("Agg")
|
|
||||||
self._clf._tree_gr.save_hyperplane(folder_name)
|
|
||||||
self.assertTrue(os.path.exists(file_name))
|
|
||||||
self.assertEqual("png", imghdr.what(file_name))
|
|
||||||
os.remove(file_name)
|
|
||||||
|
|
||||||
def test_plot_hyperplane_with_distribution(self):
|
|
||||||
plt.close()
|
|
||||||
# select a pure node
|
|
||||||
node = self._clf._tree_gr.get_down().get_up().get_up()
|
|
||||||
with warnings.catch_warnings():
|
|
||||||
warnings.simplefilter("ignore")
|
|
||||||
matplotlib.use("Agg")
|
|
||||||
num_figures_before = plt.gcf().number
|
|
||||||
node.plot_hyperplane(plot_distribution=True)
|
|
||||||
num_figures_after = plt.gcf().number
|
|
||||||
self.assertEqual(1, num_figures_after - num_figures_before)
|
|
||||||
|
|
||||||
def test_plot_hyperplane_without_distribution(self):
|
|
||||||
plt.close()
|
|
||||||
with warnings.catch_warnings():
|
|
||||||
warnings.simplefilter("ignore")
|
|
||||||
matplotlib.use("Agg")
|
|
||||||
num_figures_before = plt.gcf().number
|
|
||||||
self._clf._tree_gr.plot_hyperplane(plot_distribution=False)
|
|
||||||
num_figures_after = plt.gcf().number
|
|
||||||
self.assertEqual(1, num_figures_after - num_figures_before)
|
|
||||||
|
|
||||||
def test_plot_distribution(self):
|
|
||||||
plt.close()
|
|
||||||
with warnings.catch_warnings():
|
|
||||||
warnings.simplefilter("ignore")
|
|
||||||
matplotlib.use("Agg")
|
|
||||||
num_figures_before = plt.gcf().number
|
|
||||||
self._clf._tree_gr.plot_distribution()
|
|
||||||
num_figures_after = plt.gcf().number
|
|
||||||
self.assertEqual(1, num_figures_after - num_figures_before)
|
|
||||||
|
|
||||||
def test_set_axis_limits(self):
|
|
||||||
node = Snode_graph(Snode(None, None, None, "test"))
|
|
||||||
limits = (-2, 2), (-3, 3), (-4, 4)
|
|
||||||
node.set_axis_limits(limits)
|
|
||||||
computed = node.get_axis_limits()
|
|
||||||
x, y, z = limits
|
|
||||||
xx, yy, zz = computed
|
|
||||||
self.assertEqual(x, xx)
|
|
||||||
self.assertEqual(y, yy)
|
|
||||||
self.assertEqual(z, zz)
|
|
||||||
|
|
||||||
def test_cmap_change(self):
|
|
||||||
node = Snode_graph(Snode(None, None, None, "test"))
|
|
||||||
self.assertEqual("jet", node._get_cmap())
|
|
||||||
# make node pure
|
|
||||||
node._belief = 1.0
|
|
||||||
node._class = 1
|
|
||||||
self.assertEqual("jet_r", node._get_cmap())
|
|
@@ -1,9 +1,4 @@
|
|||||||
from .Strees_test import Stree_test, Snode_test
|
from .Stree_test import Stree_test
|
||||||
from .Strees_grapher_test import Stree_grapher_test, Snode_graph_test
|
from .Snode_test import Snode_test
|
||||||
|
|
||||||
__all__ = [
|
__all__ = ["Stree_test", "Snode_test"]
|
||||||
"Stree_test",
|
|
||||||
"Snode_test",
|
|
||||||
"Stree_grapher_test",
|
|
||||||
"Snode_graph_test",
|
|
||||||
]
|
|
||||||
|
17
stree/tests/utils.py
Normal file
17
stree/tests/utils.py
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
from sklearn.datasets import make_classification
|
||||||
|
|
||||||
|
|
||||||
|
def get_dataset(random_state=0, n_classes=2):
|
||||||
|
X, y = make_classification(
|
||||||
|
n_samples=1500,
|
||||||
|
n_features=3,
|
||||||
|
n_informative=3,
|
||||||
|
n_redundant=0,
|
||||||
|
n_repeated=0,
|
||||||
|
n_classes=n_classes,
|
||||||
|
n_clusters_per_class=2,
|
||||||
|
class_sep=1.5,
|
||||||
|
flip_y=0,
|
||||||
|
random_state=random_state,
|
||||||
|
)
|
||||||
|
return X, y
|
Reference in New Issue
Block a user