Files
stree_datasets/stats_stree.py
Ricardo Montañana 54b73880e3 Remove normalization
As every dataset is already standardized
2021-06-26 13:27:03 +02:00

41 lines
908 B
Python

from stree import Stree
from experimentation.Sets import Datasets
def nodes_leaves(clf):
nodes = 0
leaves = 0
for node in clf:
if node.is_leaf():
leaves += 1
else:
nodes += 1
return nodes, leaves
def compute_depth(node, depth):
if node is None:
return depth
if node.is_leaf():
return depth + 1
return max(
compute_depth(node.get_up(), depth + 1),
compute_depth(node.get_down(), depth + 1),
)
dt = Datasets(False, False, "tanveer")
for dataset in dt:
dataset_name = dataset[0]
X, y = dt.load(dataset_name)
clf = Stree(random_state=1)
clf.fit(X, y)
accuracy = clf.score(X, y)
nodes, leaves = nodes_leaves(clf)
depth = compute_depth(clf.tree_, 0)
print(
f"{dataset_name:30s} {nodes:5d} {leaves:5d} {clf.depth_:5d} "
f"{depth:5d} {accuracy:7.5f}"
)