Commit Inicial

2025-08-20 01:46:03 +00:00 · 2020-11-20 11:23:40 +01:00
commit 5611e5bc01
2914 changed files with 2625178 additions and 0 deletions
--- a/test_arff.py
+++ b/test_arff.py
@@ -0,0 +1,48 @@
+import os
+import time
+import numpy as np
+import pandas as pd
+from scipy.io import arff
+from stree import Stree
+from sklearn.model_selection import cross_val_score
+from sklearn.model_selection import train_test_split
+
+folder = (
+    "/Volumes/Datos/OneDrive - Universidad de Castilla-La Mancha/"
+    "Doctorado2019/Compartida/FuentesDescargados/data-4/"
+)
+
+name = "yeast"
+random_state = 1
+
+file_name = os.path.join(folder, name, f"{name}.arff")
+data, meta = arff.loadarff(file_name)
+df = pd.DataFrame(data)
+y = df["clase"].to_numpy().astype(np.int16)
+df.drop(columns="clase", inplace=True)
+X = df.to_numpy().astype(np.float16)
+print(f"Xshape {X.shape} Xtype {X.dtype}")
+print(f"yshape {y.shape} ytype {y.dtype}")
+
+X_train, X_test, y_train, y_test = train_test_split(
+    X, y, test_size=0.3, random_state=random_state
+)
+
+clf = Stree(
+    random_state=random_state,
+    C=1e5,
+    max_iter=1e5,
+    kernel="poly",
+    degree=5,
+    gamma=0.8,
+)
+now = time.time()
+scores = cross_val_score(clf, X, y, cv=5)
+print(f"Accuracy for {name}: {scores.mean():.2f} (+/- {scores.std() * 2:.2f})")
+print(f"Took : {time.time() - now:.2f} seconds")
+print(f"Score one tree all samples .: {clf.fit(X, y).score(X, y):.4f}")
+print(
+    f"Score one tree train/test .: "
+    f"{clf.fit(X_train, y_train).score(X_test, y_test):.4f}"
+)
+print("*" * 80)