mirror of
https://github.com/Doctorado-ML/Stree_datasets.git
synced 2025-08-20 01:46:03 +00:00
Commit Inicial
This commit is contained in:
48
test_arff.py
Normal file
48
test_arff.py
Normal file
@@ -0,0 +1,48 @@
|
||||
import os
|
||||
import time
|
||||
import numpy as np
|
||||
import pandas as pd
|
||||
from scipy.io import arff
|
||||
from stree import Stree
|
||||
from sklearn.model_selection import cross_val_score
|
||||
from sklearn.model_selection import train_test_split
|
||||
|
||||
folder = (
|
||||
"/Volumes/Datos/OneDrive - Universidad de Castilla-La Mancha/"
|
||||
"Doctorado2019/Compartida/FuentesDescargados/data-4/"
|
||||
)
|
||||
|
||||
name = "yeast"
|
||||
random_state = 1
|
||||
|
||||
file_name = os.path.join(folder, name, f"{name}.arff")
|
||||
data, meta = arff.loadarff(file_name)
|
||||
df = pd.DataFrame(data)
|
||||
y = df["clase"].to_numpy().astype(np.int16)
|
||||
df.drop(columns="clase", inplace=True)
|
||||
X = df.to_numpy().astype(np.float16)
|
||||
print(f"Xshape {X.shape} Xtype {X.dtype}")
|
||||
print(f"yshape {y.shape} ytype {y.dtype}")
|
||||
|
||||
X_train, X_test, y_train, y_test = train_test_split(
|
||||
X, y, test_size=0.3, random_state=random_state
|
||||
)
|
||||
|
||||
clf = Stree(
|
||||
random_state=random_state,
|
||||
C=1e5,
|
||||
max_iter=1e5,
|
||||
kernel="poly",
|
||||
degree=5,
|
||||
gamma=0.8,
|
||||
)
|
||||
now = time.time()
|
||||
scores = cross_val_score(clf, X, y, cv=5)
|
||||
print(f"Accuracy for {name}: {scores.mean():.2f} (+/- {scores.std() * 2:.2f})")
|
||||
print(f"Took : {time.time() - now:.2f} seconds")
|
||||
print(f"Score one tree all samples .: {clf.fit(X, y).score(X, y):.4f}")
|
||||
print(
|
||||
f"Score one tree train/test .: "
|
||||
f"{clf.fit(X_train, y_train).score(X_test, y_test):.4f}"
|
||||
)
|
||||
print("*" * 80)
|
Reference in New Issue
Block a user