mirror of
https://github.com/Doctorado-ML/STree.git
synced 2025-08-17 08:26:00 +00:00
Add complete classes counts to node and tests
This commit is contained in:
@@ -68,6 +68,7 @@ class Snode:
|
|||||||
self._impurity = impurity
|
self._impurity = impurity
|
||||||
self._partition_column: int = -1
|
self._partition_column: int = -1
|
||||||
self._scaler = scaler
|
self._scaler = scaler
|
||||||
|
self._proba = None
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def copy(cls, node: "Snode") -> "Snode":
|
def copy(cls, node: "Snode") -> "Snode":
|
||||||
@@ -127,23 +128,22 @@ class Snode:
|
|||||||
def get_up(self) -> "Snode":
|
def get_up(self) -> "Snode":
|
||||||
return self._up
|
return self._up
|
||||||
|
|
||||||
def make_predictor(self):
|
def make_predictor(self, num_classes: int) -> None:
|
||||||
"""Compute the class of the predictor and its belief based on the
|
"""Compute the class of the predictor and its belief based on the
|
||||||
subdataset of the node only if it is a leaf
|
subdataset of the node only if it is a leaf
|
||||||
"""
|
"""
|
||||||
if not self.is_leaf():
|
if not self.is_leaf():
|
||||||
return
|
return
|
||||||
classes, card = np.unique(self._y, return_counts=True)
|
classes, card = np.unique(self._y, return_counts=True)
|
||||||
if len(classes) > 1:
|
self._proba = np.zeros((num_classes,))
|
||||||
|
for c, n in zip(classes, card):
|
||||||
|
self._proba[c] = n
|
||||||
|
try:
|
||||||
max_card = max(card)
|
max_card = max(card)
|
||||||
self._class = classes[card == max_card][0]
|
self._class = classes[card == max_card][0]
|
||||||
self._belief = max_card / np.sum(card)
|
self._belief = max_card / np.sum(card)
|
||||||
else:
|
except ValueError:
|
||||||
self._belief = 1
|
self._class = None
|
||||||
try:
|
|
||||||
self._class = classes[0]
|
|
||||||
except IndexError:
|
|
||||||
self._class = None
|
|
||||||
|
|
||||||
def graph(self):
|
def graph(self):
|
||||||
"""
|
"""
|
||||||
@@ -155,7 +155,7 @@ class Snode:
|
|||||||
output += (
|
output += (
|
||||||
f'N{id(self)} [shape=box style=filled label="'
|
f'N{id(self)} [shape=box style=filled label="'
|
||||||
f"class={self._class} impurity={self._impurity:.3f} "
|
f"class={self._class} impurity={self._impurity:.3f} "
|
||||||
f'classes={count_values[0]} samples={count_values[1]}"];\n'
|
f'counts={self._proba}"];\n'
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
output += (
|
output += (
|
||||||
|
@@ -314,7 +314,7 @@ class Stree(BaseEstimator, ClassifierMixin):
|
|||||||
if np.unique(y).shape[0] == 1:
|
if np.unique(y).shape[0] == 1:
|
||||||
# only 1 class => pure dataset
|
# only 1 class => pure dataset
|
||||||
node.set_title(title + ", <pure>")
|
node.set_title(title + ", <pure>")
|
||||||
node.make_predictor()
|
node.make_predictor(self.n_classes_)
|
||||||
return node
|
return node
|
||||||
# Train the model
|
# Train the model
|
||||||
clf = self._build_clf()
|
clf = self._build_clf()
|
||||||
@@ -333,7 +333,7 @@ class Stree(BaseEstimator, ClassifierMixin):
|
|||||||
if X_U is None or X_D is None:
|
if X_U is None or X_D is None:
|
||||||
# didn't part anything
|
# didn't part anything
|
||||||
node.set_title(title + ", <cgaf>")
|
node.set_title(title + ", <cgaf>")
|
||||||
node.make_predictor()
|
node.make_predictor(self.n_classes_)
|
||||||
return node
|
return node
|
||||||
node.set_up(
|
node.set_up(
|
||||||
self._train(X_U, y_u, sw_u, depth + 1, title + f" - Up({depth+1})")
|
self._train(X_U, y_u, sw_u, depth + 1, title + f" - Up({depth+1})")
|
||||||
|
@@ -67,10 +67,28 @@ class Snode_test(unittest.TestCase):
|
|||||||
|
|
||||||
def test_make_predictor_on_leaf(self):
|
def test_make_predictor_on_leaf(self):
|
||||||
test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test")
|
test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test")
|
||||||
test.make_predictor()
|
test.make_predictor(2)
|
||||||
self.assertEqual(1, test._class)
|
self.assertEqual(1, test._class)
|
||||||
self.assertEqual(0.75, test._belief)
|
self.assertEqual(0.75, test._belief)
|
||||||
self.assertEqual(-1, test._partition_column)
|
self.assertEqual(-1, test._partition_column)
|
||||||
|
self.assertListEqual([1, 3], test._proba.tolist())
|
||||||
|
|
||||||
|
def test_make_predictor_on_not_leaf(self):
|
||||||
|
test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test")
|
||||||
|
test.set_up(Snode(None, [1], [1], [], 0.0, "another_test"))
|
||||||
|
test.make_predictor(2)
|
||||||
|
self.assertIsNone(test._class)
|
||||||
|
self.assertEqual(0, test._belief)
|
||||||
|
self.assertEqual(-1, test._partition_column)
|
||||||
|
self.assertEqual(-1, test.get_up()._partition_column)
|
||||||
|
self.assertIsNone(test._proba)
|
||||||
|
|
||||||
|
def test_make_predictor_on_leaf_bogus_data(self):
|
||||||
|
test = Snode(None, [1, 2, 3, 4], [], [], 0.0, "test")
|
||||||
|
test.make_predictor(2)
|
||||||
|
self.assertIsNone(test._class)
|
||||||
|
self.assertEqual(-1, test._partition_column)
|
||||||
|
self.assertListEqual([0, 0], test._proba.tolist())
|
||||||
|
|
||||||
def test_set_title(self):
|
def test_set_title(self):
|
||||||
test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test")
|
test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test")
|
||||||
@@ -97,21 +115,6 @@ class Snode_test(unittest.TestCase):
|
|||||||
test.set_features([1, 2])
|
test.set_features([1, 2])
|
||||||
self.assertListEqual([1, 2], test.get_features())
|
self.assertListEqual([1, 2], test.get_features())
|
||||||
|
|
||||||
def test_make_predictor_on_not_leaf(self):
|
|
||||||
test = Snode(None, [1, 2, 3, 4], [1, 0, 1, 1], [], 0.0, "test")
|
|
||||||
test.set_up(Snode(None, [1], [1], [], 0.0, "another_test"))
|
|
||||||
test.make_predictor()
|
|
||||||
self.assertIsNone(test._class)
|
|
||||||
self.assertEqual(0, test._belief)
|
|
||||||
self.assertEqual(-1, test._partition_column)
|
|
||||||
self.assertEqual(-1, test.get_up()._partition_column)
|
|
||||||
|
|
||||||
def test_make_predictor_on_leaf_bogus_data(self):
|
|
||||||
test = Snode(None, [1, 2, 3, 4], [], [], 0.0, "test")
|
|
||||||
test.make_predictor()
|
|
||||||
self.assertIsNone(test._class)
|
|
||||||
self.assertEqual(-1, test._partition_column)
|
|
||||||
|
|
||||||
def test_copy_node(self):
|
def test_copy_node(self):
|
||||||
px = [1, 2, 3, 4]
|
px = [1, 2, 3, 4]
|
||||||
py = [1]
|
py = [1]
|
||||||
|
@@ -695,7 +695,7 @@ class Stree_test(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
expected_tail = (
|
expected_tail = (
|
||||||
' [shape=box style=filled label="class=1 impurity=0.000 '
|
' [shape=box style=filled label="class=1 impurity=0.000 '
|
||||||
'classes=[1] samples=[1]"];\n}\n'
|
'counts=[0. 1. 0.]"];\n}\n'
|
||||||
)
|
)
|
||||||
self.assertEqual(clf.graph(), expected_head + "}\n")
|
self.assertEqual(clf.graph(), expected_head + "}\n")
|
||||||
clf.fit(X, y)
|
clf.fit(X, y)
|
||||||
@@ -715,7 +715,7 @@ class Stree_test(unittest.TestCase):
|
|||||||
)
|
)
|
||||||
expected_tail = (
|
expected_tail = (
|
||||||
' [shape=box style=filled label="class=1 impurity=0.000 '
|
' [shape=box style=filled label="class=1 impurity=0.000 '
|
||||||
'classes=[1] samples=[1]"];\n}\n'
|
'counts=[0. 1. 0.]"];\n}\n'
|
||||||
)
|
)
|
||||||
self.assertEqual(clf.graph("Sample title"), expected_head + "}\n")
|
self.assertEqual(clf.graph("Sample title"), expected_head + "}\n")
|
||||||
clf.fit(X, y)
|
clf.fit(X, y)
|
||||||
|
Reference in New Issue
Block a user