mirror of
https://github.com/Doctorado-ML/STree.git
synced 2025-08-15 15:36:00 +00:00
Fix problem in _min_distance
Remove grapher (moved to another repo)
This commit is contained in:
File diff suppressed because one or more lines are too long
@@ -1,5 +1,4 @@
|
||||
numpy
|
||||
scikit-learn
|
||||
pandas
|
||||
matplotlib
|
||||
ipympl
|
2
setup.py
2
setup.py
@@ -30,7 +30,7 @@ setuptools.setup(
|
||||
"Topic :: Scientific/Engineering :: Artificial Intelligence",
|
||||
"Intended Audience :: Science/Research",
|
||||
],
|
||||
install_requires=["scikit-learn>=0.23.0", "numpy", "matplotlib", "ipympl"],
|
||||
install_requires=["scikit-learn>=0.23.0", "numpy", "ipympl"],
|
||||
test_suite="stree.tests",
|
||||
zip_safe=False,
|
||||
)
|
||||
|
@@ -96,9 +96,6 @@ class Siterator:
|
||||
self._stack = []
|
||||
self._push(tree)
|
||||
|
||||
def __iter__(self):
|
||||
return self
|
||||
|
||||
def _push(self, node: Snode):
|
||||
if node is not None:
|
||||
self._stack.append(node)
|
||||
@@ -184,7 +181,9 @@ class Stree(BaseEstimator, ClassifierMixin):
|
||||
def _min_distance(self, data: np.array, _) -> np.array:
|
||||
# chooses the lowest distance of every sample
|
||||
indices = np.argmin(np.abs(data), axis=1)
|
||||
return np.take(data, indices)
|
||||
return np.array(
|
||||
[data[x, y] for x, y in zip(range(len(data[:, 0])), indices)]
|
||||
)
|
||||
|
||||
def _max_samples(self, data: np.array, y: np.array) -> np.array:
|
||||
# select the class with max number of samples
|
||||
|
@@ -1,205 +0,0 @@
|
||||
"""
|
||||
__author__ = "Ricardo Montañana Gómez"
|
||||
__copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
|
||||
__license__ = "MIT"
|
||||
__version__ = "0.9"
|
||||
Plot 3D views of nodes in Stree
|
||||
"""
|
||||
|
||||
import os
|
||||
|
||||
import matplotlib.pyplot as plt
|
||||
import numpy as np
|
||||
from sklearn.decomposition import PCA
|
||||
from mpl_toolkits.mplot3d import Axes3D
|
||||
|
||||
from .Strees import Stree, Snode, Siterator
|
||||
|
||||
|
||||
class Snode_graph(Snode):
|
||||
def __init__(self, node: Stree):
|
||||
self._plot_size = (8, 8)
|
||||
self._xlimits = (None, None)
|
||||
self._ylimits = (None, None)
|
||||
self._zlimits = (None, None)
|
||||
n = Snode.copy(node)
|
||||
super().__init__(n._clf, n._X, n._y, n._title)
|
||||
|
||||
def set_plot_size(self, size: tuple):
|
||||
self._plot_size = size
|
||||
|
||||
def get_plot_size(self) -> tuple:
|
||||
return self._plot_size
|
||||
|
||||
def _is_pure(self) -> bool:
|
||||
"""is considered pure a leaf node with one label
|
||||
"""
|
||||
if self.is_leaf():
|
||||
return self._belief == 1.0
|
||||
return False
|
||||
|
||||
def set_axis_limits(self, limits: tuple):
|
||||
self._xlimits, self._ylimits, self._zlimits = limits
|
||||
|
||||
def get_axis_limits(self) -> tuple:
|
||||
return self._xlimits, self._ylimits, self._zlimits
|
||||
|
||||
def _set_graphics_axis(self, ax: Axes3D):
|
||||
ax.set_xlim(self._xlimits)
|
||||
ax.set_ylim(self._ylimits)
|
||||
ax.set_zlim(self._zlimits)
|
||||
|
||||
def save_hyperplane(
|
||||
self, save_folder: str = "./", save_prefix: str = "", save_seq: int = 1
|
||||
):
|
||||
_, fig = self.plot_hyperplane()
|
||||
name = os.path.join(save_folder, f"{save_prefix}STnode{save_seq}.png")
|
||||
fig.savefig(name, bbox_inches="tight")
|
||||
plt.close(fig)
|
||||
|
||||
def _get_cmap(self):
|
||||
cmap = "jet"
|
||||
if self._is_pure() and self._class == 1:
|
||||
cmap = "jet_r"
|
||||
return cmap
|
||||
|
||||
def _graph_title(self):
|
||||
n_class, card = np.unique(self._y, return_counts=True)
|
||||
return f"{self._title} {n_class} {card}"
|
||||
|
||||
def plot_hyperplane(self, plot_distribution: bool = True):
|
||||
fig = plt.figure(figsize=self._plot_size)
|
||||
ax = fig.add_subplot(1, 1, 1, projection="3d")
|
||||
if not self._is_pure():
|
||||
# Can't plot hyperplane of leaves with one label because it hasn't
|
||||
# classiffier
|
||||
# get the splitting hyperplane
|
||||
def hyperplane(x, y):
|
||||
return (
|
||||
-self._clf.intercept_
|
||||
- self._clf.coef_[0][0] * x
|
||||
- self._clf.coef_[0][1] * y
|
||||
) / self._clf.coef_[0][2]
|
||||
|
||||
tmpx = np.linspace(self._X[:, 0].min(), self._X[:, 0].max())
|
||||
tmpy = np.linspace(self._X[:, 1].min(), self._X[:, 1].max())
|
||||
xx, yy = np.meshgrid(tmpx, tmpy)
|
||||
ax.plot_surface(
|
||||
xx,
|
||||
yy,
|
||||
hyperplane(xx, yy),
|
||||
alpha=0.5,
|
||||
antialiased=True,
|
||||
rstride=1,
|
||||
cstride=1,
|
||||
cmap="seismic",
|
||||
)
|
||||
self._set_graphics_axis(ax)
|
||||
if plot_distribution:
|
||||
self.plot_distribution(ax)
|
||||
else:
|
||||
plt.title(self._graph_title())
|
||||
plt.show()
|
||||
return ax, fig
|
||||
|
||||
def plot_distribution(self, ax: Axes3D = None):
|
||||
if ax is None:
|
||||
fig = plt.figure(figsize=self._plot_size)
|
||||
ax = fig.add_subplot(1, 1, 1, projection="3d")
|
||||
plt.title(self._graph_title())
|
||||
cmap = self._get_cmap()
|
||||
ax.scatter(
|
||||
self._X[:, 0], self._X[:, 1], self._X[:, 2], c=self._y, cmap=cmap
|
||||
)
|
||||
ax.set_xlabel("X0")
|
||||
ax.set_ylabel("X1")
|
||||
ax.set_zlabel("X2")
|
||||
plt.show()
|
||||
|
||||
|
||||
class Stree_grapher(Stree):
|
||||
"""Build 3d graphs of any dataset, if it's more than 3 features PCA shall
|
||||
make its magic
|
||||
"""
|
||||
|
||||
def __init__(self, params: dict):
|
||||
self._plot_size = (8, 8)
|
||||
self._tree_gr = None
|
||||
# make Snode store X's
|
||||
os.environ["TESTING"] = "1"
|
||||
self._fitted = False
|
||||
self._pca = None
|
||||
super().__init__(**params)
|
||||
|
||||
def __del__(self):
|
||||
try:
|
||||
os.environ.pop("TESTING")
|
||||
except KeyError:
|
||||
pass
|
||||
|
||||
def _copy_tree(self, node: Snode) -> Snode_graph:
|
||||
mirror = Snode_graph(node)
|
||||
# clone node
|
||||
mirror._class = node._class
|
||||
mirror._belief = node._belief
|
||||
if node.get_down() is not None:
|
||||
mirror.set_down(self._copy_tree(node.get_down()))
|
||||
if node.get_up() is not None:
|
||||
mirror.set_up(self._copy_tree(node.get_up()))
|
||||
return mirror
|
||||
|
||||
def fit(
|
||||
self, X: np.array, y: np.array, sample_weight: np.array = None
|
||||
) -> "Stree_grapher":
|
||||
"""Fit the Stree and copy the tree in a Snode_graph tree
|
||||
|
||||
:param X: Dataset
|
||||
:type X: np.array
|
||||
:param y: Labels
|
||||
:type y: np.array
|
||||
:return: Stree model
|
||||
:rtype: Stree
|
||||
"""
|
||||
if X.shape[1] != 3:
|
||||
self._pca = PCA(n_components=3)
|
||||
X = self._pca.fit_transform(X)
|
||||
super().fit(X, y, sample_weight=sample_weight)
|
||||
self._tree_gr = self._copy_tree(self.tree_)
|
||||
self._fitted = True
|
||||
return self
|
||||
|
||||
def score(self, X: np.array, y: np.array) -> float:
|
||||
self._check_fitted()
|
||||
if X.shape[1] != 3:
|
||||
X = self._pca.transform(X)
|
||||
return super().score(X, y)
|
||||
|
||||
def _check_fitted(self):
|
||||
if not self._fitted:
|
||||
raise Exception("Have to fit the grapher first!")
|
||||
|
||||
def save_all(self, save_folder: str = "./", save_prefix: str = ""):
|
||||
"""Save all the node plots in png format, each with a sequence number
|
||||
|
||||
:param save_folder: folder where the plots are saved, defaults to './'
|
||||
:type save_folder: str, optional
|
||||
"""
|
||||
self._check_fitted()
|
||||
if not os.path.isdir(save_folder):
|
||||
os.mkdir(save_folder)
|
||||
seq = 1
|
||||
for node in self:
|
||||
node.save_hyperplane(
|
||||
save_folder=save_folder, save_prefix=save_prefix, save_seq=seq
|
||||
)
|
||||
seq += 1
|
||||
|
||||
def plot_all(self):
|
||||
"""Plots all the nodes
|
||||
"""
|
||||
self._check_fitted()
|
||||
for node in self:
|
||||
node.plot_hyperplane()
|
||||
|
||||
def __iter__(self):
|
||||
return Siterator(self._tree_gr)
|
@@ -1,4 +1,3 @@
|
||||
from .Strees import Stree, Snode, Siterator
|
||||
from .Strees_grapher import Stree_grapher, Snode_graph
|
||||
|
||||
__all__ = ["Stree", "Snode", "Siterator", "Stree_grapher", "Snode_graph"]
|
||||
__all__ = ["Stree", "Snode", "Siterator"]
|
||||
|
91
stree/tests/Snode_test.py
Normal file
91
stree/tests/Snode_test.py
Normal file
@@ -0,0 +1,91 @@
|
||||
import os
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
|
||||
from stree import Stree, Snode
|
||||
from .utils import get_dataset
|
||||
|
||||
|
||||
class Snode_test(unittest.TestCase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._random_state = 1
|
||||
self._clf = Stree(random_state=self._random_state)
|
||||
self._clf.fit(*get_dataset(self._random_state))
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def setUp(cls):
|
||||
os.environ["TESTING"] = "1"
|
||||
|
||||
def test_attributes_in_leaves(self):
|
||||
"""Check if the attributes in leaves have correct values so they form a
|
||||
predictor
|
||||
"""
|
||||
|
||||
def check_leave(node: Snode):
|
||||
if not node.is_leaf():
|
||||
check_leave(node.get_down())
|
||||
check_leave(node.get_up())
|
||||
return
|
||||
# Check Belief in leave
|
||||
classes, card = np.unique(node._y, return_counts=True)
|
||||
max_card = max(card)
|
||||
min_card = min(card)
|
||||
if len(classes) > 1:
|
||||
try:
|
||||
belief = max_card / (max_card + min_card)
|
||||
except ZeroDivisionError:
|
||||
belief = 0.0
|
||||
else:
|
||||
belief = 1
|
||||
self.assertEqual(belief, node._belief)
|
||||
# Check Class
|
||||
class_computed = classes[card == max_card]
|
||||
self.assertEqual(class_computed, node._class)
|
||||
|
||||
check_leave(self._clf.tree_)
|
||||
|
||||
def test_nodes_coefs(self):
|
||||
"""Check if the nodes of the tree have the right attributes filled
|
||||
"""
|
||||
|
||||
def run_tree(node: Snode):
|
||||
if node._belief < 1:
|
||||
# only exclude pure leaves
|
||||
self.assertIsNotNone(node._clf)
|
||||
self.assertIsNotNone(node._clf.coef_)
|
||||
if node.is_leaf():
|
||||
return
|
||||
run_tree(node.get_down())
|
||||
run_tree(node.get_up())
|
||||
|
||||
run_tree(self._clf.tree_)
|
||||
|
||||
def test_make_predictor_on_leaf(self):
|
||||
test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], "test")
|
||||
test.make_predictor()
|
||||
self.assertEqual(1, test._class)
|
||||
self.assertEqual(0.75, test._belief)
|
||||
|
||||
def test_make_predictor_on_not_leaf(self):
|
||||
test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], "test")
|
||||
test.set_up(Snode(None, [1], [1], "another_test"))
|
||||
test.make_predictor()
|
||||
self.assertIsNone(test._class)
|
||||
self.assertEqual(0, test._belief)
|
||||
|
||||
def test_make_predictor_on_leaf_bogus_data(self):
|
||||
test = Snode(None, [1, 2, 3, 4], [], "test")
|
||||
test.make_predictor()
|
||||
self.assertIsNone(test._class)
|
||||
|
||||
def test_copy_node(self):
|
||||
px = [1, 2, 3, 4]
|
||||
py = [1]
|
||||
test = Snode(Stree(), px, py, "test")
|
||||
computed = Snode.copy(test)
|
||||
self.assertListEqual(computed._X, px)
|
||||
self.assertListEqual(computed._y, py)
|
||||
self.assertEqual("test", computed._title)
|
||||
self.assertIsInstance(computed._clf, Stree)
|
@@ -2,25 +2,10 @@ import os
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
from sklearn.datasets import make_classification, load_iris
|
||||
from sklearn.datasets import load_iris
|
||||
|
||||
from stree import Stree, Snode
|
||||
|
||||
|
||||
def get_dataset(random_state=0, n_classes=2):
|
||||
X, y = make_classification(
|
||||
n_samples=1500,
|
||||
n_features=3,
|
||||
n_informative=3,
|
||||
n_redundant=0,
|
||||
n_repeated=0,
|
||||
n_classes=n_classes,
|
||||
n_clusters_per_class=2,
|
||||
class_sep=1.5,
|
||||
flip_y=0,
|
||||
random_state=random_state,
|
||||
)
|
||||
return X, y
|
||||
from .utils import get_dataset
|
||||
|
||||
|
||||
class Stree_test(unittest.TestCase):
|
||||
@@ -280,76 +265,33 @@ class Stree_test(unittest.TestCase):
|
||||
outcome = outcomes[name][f"{criteria} {kernel}"]
|
||||
self.assertAlmostEqual(outcome, clf.score(px, py))
|
||||
|
||||
def test_min_distance(self):
|
||||
clf = Stree()
|
||||
data = np.array(
|
||||
[
|
||||
[-0.1, 0.2, -0.3],
|
||||
[0.7, 0.01, -0.1],
|
||||
[0.7, -0.9, 0.5],
|
||||
[0.1, 0.2, 0.3],
|
||||
]
|
||||
)
|
||||
expected = np.array([-0.1, 0.01, 0.5, 0.1])
|
||||
computed = clf._min_distance(data, None)
|
||||
self.assertEqual((4,), computed.shape)
|
||||
self.assertListEqual(expected.tolist(), computed.tolist())
|
||||
|
||||
class Snode_test(unittest.TestCase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._random_state = 1
|
||||
self._clf = Stree(random_state=self._random_state)
|
||||
self._clf.fit(*get_dataset(self._random_state))
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def setUp(cls):
|
||||
os.environ["TESTING"] = "1"
|
||||
|
||||
def test_attributes_in_leaves(self):
|
||||
"""Check if the attributes in leaves have correct values so they form a
|
||||
predictor
|
||||
"""
|
||||
|
||||
def check_leave(node: Snode):
|
||||
if not node.is_leaf():
|
||||
check_leave(node.get_down())
|
||||
check_leave(node.get_up())
|
||||
return
|
||||
# Check Belief in leave
|
||||
classes, card = np.unique(node._y, return_counts=True)
|
||||
max_card = max(card)
|
||||
min_card = min(card)
|
||||
if len(classes) > 1:
|
||||
try:
|
||||
belief = max_card / (max_card + min_card)
|
||||
except ZeroDivisionError:
|
||||
belief = 0.0
|
||||
else:
|
||||
belief = 1
|
||||
self.assertEqual(belief, node._belief)
|
||||
# Check Class
|
||||
class_computed = classes[card == max_card]
|
||||
self.assertEqual(class_computed, node._class)
|
||||
|
||||
check_leave(self._clf.tree_)
|
||||
|
||||
def test_nodes_coefs(self):
|
||||
"""Check if the nodes of the tree have the right attributes filled
|
||||
"""
|
||||
|
||||
def run_tree(node: Snode):
|
||||
if node._belief < 1:
|
||||
# only exclude pure leaves
|
||||
self.assertIsNotNone(node._clf)
|
||||
self.assertIsNotNone(node._clf.coef_)
|
||||
if node.is_leaf():
|
||||
return
|
||||
run_tree(node.get_down())
|
||||
run_tree(node.get_up())
|
||||
|
||||
run_tree(self._clf.tree_)
|
||||
|
||||
def test_make_predictor_on_leaf(self):
|
||||
test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], "test")
|
||||
test.make_predictor()
|
||||
self.assertEqual(1, test._class)
|
||||
self.assertEqual(0.75, test._belief)
|
||||
|
||||
def test_make_predictor_on_not_leaf(self):
|
||||
test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], "test")
|
||||
test.set_up(Snode(None, [1], [1], "another_test"))
|
||||
test.make_predictor()
|
||||
self.assertIsNone(test._class)
|
||||
self.assertEqual(0, test._belief)
|
||||
|
||||
def test_make_predictor_on_leaf_bogus_data(self):
|
||||
test = Snode(None, [1, 2, 3, 4], [], "test")
|
||||
test.make_predictor()
|
||||
self.assertIsNone(test._class)
|
||||
def test_max_samples(self):
|
||||
clf = Stree()
|
||||
data = np.array(
|
||||
[
|
||||
[-0.1, 0.2, -0.3],
|
||||
[0.7, 0.01, -0.1],
|
||||
[0.7, -0.9, 0.5],
|
||||
[0.1, 0.2, 0.3],
|
||||
]
|
||||
)
|
||||
expected = np.array([0.2, 0.01, -0.9, 0.2])
|
||||
y = [1, 2, 1, 0]
|
||||
computed = clf._max_samples(data, y)
|
||||
self.assertEqual((4,), computed.shape)
|
||||
self.assertListEqual(expected.tolist(), computed.tolist())
|
@@ -1,226 +0,0 @@
|
||||
import os
|
||||
import imghdr
|
||||
import unittest
|
||||
|
||||
import numpy as np
|
||||
import matplotlib
|
||||
import matplotlib.pyplot as plt
|
||||
import warnings
|
||||
from sklearn.datasets import make_classification
|
||||
|
||||
from stree import Stree_grapher, Snode_graph, Snode
|
||||
|
||||
|
||||
def get_dataset(random_state=0, n_features=3):
|
||||
X, y = make_classification(
|
||||
n_samples=1500,
|
||||
n_features=n_features,
|
||||
n_informative=3,
|
||||
n_redundant=0,
|
||||
n_repeated=0,
|
||||
n_classes=2,
|
||||
n_clusters_per_class=2,
|
||||
class_sep=1.5,
|
||||
flip_y=0,
|
||||
weights=[0.5, 0.5],
|
||||
random_state=random_state,
|
||||
)
|
||||
return X, y
|
||||
|
||||
|
||||
class Stree_grapher_test(unittest.TestCase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._random_state = 1
|
||||
self._clf = Stree_grapher(dict(random_state=self._random_state))
|
||||
self._clf.fit(*get_dataset(self._random_state, n_features=4))
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def setUp(cls):
|
||||
os.environ["TESTING"] = "1"
|
||||
|
||||
def test_iterator(self):
|
||||
"""Check preorder iterator
|
||||
"""
|
||||
expected = [
|
||||
"root",
|
||||
"root - Down",
|
||||
"root - Down - Down, <cgaf> - Leaf class=1 belief= 0.976023 counts"
|
||||
"=(array([0, 1]), array([ 17, 692]))",
|
||||
"root - Down - Up",
|
||||
"root - Down - Up - Down, <cgaf> - Leaf class=0 belief= 0.500000 "
|
||||
"counts=(array([0, 1]), array([1, 1]))",
|
||||
"root - Down - Up - Up, <cgaf> - Leaf class=0 belief= 0.888889 "
|
||||
"counts=(array([0, 1]), array([8, 1]))",
|
||||
"root - Up, <cgaf> - Leaf class=0 belief= 0.928205 counts=(array("
|
||||
"[0, 1]), array([724, 56]))",
|
||||
]
|
||||
computed = []
|
||||
for node in self._clf:
|
||||
computed.append(str(node))
|
||||
self.assertListEqual(expected, computed)
|
||||
|
||||
def test_score(self):
|
||||
X, y = get_dataset(self._random_state)
|
||||
accuracy_score = self._clf.score(X, y)
|
||||
yp = self._clf.predict(X)
|
||||
accuracy_computed = np.mean(yp == y)
|
||||
self.assertEqual(accuracy_score, accuracy_computed)
|
||||
self.assertGreater(accuracy_score, 0.86)
|
||||
|
||||
def test_score_4dims(self):
|
||||
X, y = get_dataset(self._random_state, n_features=4)
|
||||
accuracy_score = self._clf.score(X, y)
|
||||
self.assertEqual(accuracy_score, 0.95)
|
||||
|
||||
def test_save_all(self):
|
||||
folder_name = os.path.join(os.sep, "tmp", "stree")
|
||||
if os.path.isdir(folder_name):
|
||||
os.rmdir(folder_name)
|
||||
file_names = [
|
||||
os.path.join(folder_name, f"STnode{i}.png") for i in range(1, 8)
|
||||
]
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
matplotlib.use("Agg")
|
||||
self._clf.save_all(save_folder=folder_name)
|
||||
for file_name in file_names:
|
||||
self.assertTrue(os.path.exists(file_name))
|
||||
self.assertEqual("png", imghdr.what(file_name))
|
||||
os.remove(file_name)
|
||||
os.rmdir(folder_name)
|
||||
|
||||
def test_plot_all(self):
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
matplotlib.use("Agg")
|
||||
num_figures_before = plt.gcf().number
|
||||
self._clf.plot_all()
|
||||
num_figures_after = plt.gcf().number
|
||||
self.assertEqual(7, num_figures_after - num_figures_before)
|
||||
|
||||
|
||||
class Snode_graph_test(unittest.TestCase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
self._random_state = 1
|
||||
self._clf = Stree_grapher(dict(random_state=self._random_state))
|
||||
self._clf.fit(*get_dataset(self._random_state))
|
||||
super().__init__(*args, **kwargs)
|
||||
|
||||
@classmethod
|
||||
def setUp(cls):
|
||||
os.environ["TESTING"] = "1"
|
||||
|
||||
def test_plot_size(self):
|
||||
default = self._clf._tree_gr.get_plot_size()
|
||||
expected = (17, 3)
|
||||
self._clf._tree_gr.set_plot_size(expected)
|
||||
self.assertEqual(expected, self._clf._tree_gr.get_plot_size())
|
||||
self._clf._tree_gr.set_plot_size(default)
|
||||
self.assertEqual(default, self._clf._tree_gr.get_plot_size())
|
||||
|
||||
def test_attributes_in_leaves_graph(self):
|
||||
"""Check if the attributes in leaves have correct values so they form a
|
||||
predictor
|
||||
"""
|
||||
|
||||
def check_leave(node: Snode_graph):
|
||||
if not node.is_leaf():
|
||||
check_leave(node.get_down())
|
||||
check_leave(node.get_up())
|
||||
return
|
||||
# Check Belief in leave
|
||||
classes, card = np.unique(node._y, return_counts=True)
|
||||
max_card = max(card)
|
||||
min_card = min(card)
|
||||
if len(classes) > 1:
|
||||
try:
|
||||
belief = max_card / (max_card + min_card)
|
||||
except ZeroDivisionError:
|
||||
belief = 0.0
|
||||
else:
|
||||
belief = 1
|
||||
self.assertEqual(belief, node._belief)
|
||||
# Check Class
|
||||
class_computed = classes[card == max_card]
|
||||
self.assertEqual(class_computed, node._class)
|
||||
|
||||
check_leave(self._clf._tree_gr)
|
||||
|
||||
def test_nodes_graph_coefs(self):
|
||||
"""Check if the nodes of the tree have the right attributes filled
|
||||
"""
|
||||
|
||||
def run_tree(node: Snode_graph):
|
||||
if node._belief < 1:
|
||||
# only exclude pure leaves
|
||||
self.assertIsNotNone(node._clf)
|
||||
self.assertIsNotNone(node._clf.coef_)
|
||||
if node.is_leaf():
|
||||
return
|
||||
run_tree(node.get_down())
|
||||
run_tree(node.get_up())
|
||||
|
||||
run_tree(self._clf._tree_gr)
|
||||
|
||||
def test_save_hyperplane(self):
|
||||
folder_name = "/tmp/"
|
||||
file_name = os.path.join(folder_name, "STnode1.png")
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
matplotlib.use("Agg")
|
||||
self._clf._tree_gr.save_hyperplane(folder_name)
|
||||
self.assertTrue(os.path.exists(file_name))
|
||||
self.assertEqual("png", imghdr.what(file_name))
|
||||
os.remove(file_name)
|
||||
|
||||
def test_plot_hyperplane_with_distribution(self):
|
||||
plt.close()
|
||||
# select a pure node
|
||||
node = self._clf._tree_gr.get_down().get_up().get_up()
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
matplotlib.use("Agg")
|
||||
num_figures_before = plt.gcf().number
|
||||
node.plot_hyperplane(plot_distribution=True)
|
||||
num_figures_after = plt.gcf().number
|
||||
self.assertEqual(1, num_figures_after - num_figures_before)
|
||||
|
||||
def test_plot_hyperplane_without_distribution(self):
|
||||
plt.close()
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
matplotlib.use("Agg")
|
||||
num_figures_before = plt.gcf().number
|
||||
self._clf._tree_gr.plot_hyperplane(plot_distribution=False)
|
||||
num_figures_after = plt.gcf().number
|
||||
self.assertEqual(1, num_figures_after - num_figures_before)
|
||||
|
||||
def test_plot_distribution(self):
|
||||
plt.close()
|
||||
with warnings.catch_warnings():
|
||||
warnings.simplefilter("ignore")
|
||||
matplotlib.use("Agg")
|
||||
num_figures_before = plt.gcf().number
|
||||
self._clf._tree_gr.plot_distribution()
|
||||
num_figures_after = plt.gcf().number
|
||||
self.assertEqual(1, num_figures_after - num_figures_before)
|
||||
|
||||
def test_set_axis_limits(self):
|
||||
node = Snode_graph(Snode(None, None, None, "test"))
|
||||
limits = (-2, 2), (-3, 3), (-4, 4)
|
||||
node.set_axis_limits(limits)
|
||||
computed = node.get_axis_limits()
|
||||
x, y, z = limits
|
||||
xx, yy, zz = computed
|
||||
self.assertEqual(x, xx)
|
||||
self.assertEqual(y, yy)
|
||||
self.assertEqual(z, zz)
|
||||
|
||||
def test_cmap_change(self):
|
||||
node = Snode_graph(Snode(None, None, None, "test"))
|
||||
self.assertEqual("jet", node._get_cmap())
|
||||
# make node pure
|
||||
node._belief = 1.0
|
||||
node._class = 1
|
||||
self.assertEqual("jet_r", node._get_cmap())
|
@@ -1,9 +1,4 @@
|
||||
from .Strees_test import Stree_test, Snode_test
|
||||
from .Strees_grapher_test import Stree_grapher_test, Snode_graph_test
|
||||
from .Stree_test import Stree_test
|
||||
from .Snode_test import Snode_test
|
||||
|
||||
__all__ = [
|
||||
"Stree_test",
|
||||
"Snode_test",
|
||||
"Stree_grapher_test",
|
||||
"Snode_graph_test",
|
||||
]
|
||||
__all__ = ["Stree_test", "Snode_test"]
|
||||
|
17
stree/tests/utils.py
Normal file
17
stree/tests/utils.py
Normal file
@@ -0,0 +1,17 @@
|
||||
from sklearn.datasets import make_classification
|
||||
|
||||
|
||||
def get_dataset(random_state=0, n_classes=2):
|
||||
X, y = make_classification(
|
||||
n_samples=1500,
|
||||
n_features=3,
|
||||
n_informative=3,
|
||||
n_redundant=0,
|
||||
n_repeated=0,
|
||||
n_classes=n_classes,
|
||||
n_clusters_per_class=2,
|
||||
class_sep=1.5,
|
||||
flip_y=0,
|
||||
random_state=random_state,
|
||||
)
|
||||
return X, y
|
Reference in New Issue
Block a user