mirror of
https://github.com/Doctorado-ML/bayesclass.git
synced 2025-08-18 00:55:54 +00:00
Fix fit/build/train mistake
This commit is contained in:
111
test.py
Normal file
111
test.py
Normal file
@@ -0,0 +1,111 @@
|
||||
#!/usr/bin/env python
|
||||
# coding: utf-8
|
||||
|
||||
# In[1]:
|
||||
|
||||
|
||||
from mdlp import MDLP
|
||||
import pandas as pd
|
||||
from benchmark import Datasets
|
||||
from bayesclass import TAN
|
||||
from sklearn.model_selection import (
|
||||
cross_validate,
|
||||
StratifiedKFold,
|
||||
KFold,
|
||||
cross_val_score,
|
||||
train_test_split,
|
||||
)
|
||||
import numpy as np
|
||||
import warnings
|
||||
from stree import Stree
|
||||
|
||||
# In[2]:
|
||||
|
||||
|
||||
# Get data as a dataset
|
||||
dt = Datasets()
|
||||
data = dt.load("glass", dataframe=True)
|
||||
features = dt.dataset.features
|
||||
class_name = dt.dataset.class_name
|
||||
factorization, class_factors = pd.factorize(data[class_name])
|
||||
data[class_name] = factorization
|
||||
data.head()
|
||||
|
||||
|
||||
# In[3]:
|
||||
|
||||
|
||||
# Fayyad Irani
|
||||
discretiz = MDLP()
|
||||
Xdisc = discretiz.fit_transform(
|
||||
data[features].to_numpy(), data[class_name].to_numpy()
|
||||
)
|
||||
features_discretized = pd.DataFrame(Xdisc, columns=features)
|
||||
dataset_discretized = features_discretized.copy()
|
||||
dataset_discretized[class_name] = data[class_name]
|
||||
X = dataset_discretized[features]
|
||||
y = dataset_discretized[class_name]
|
||||
dataset_discretized
|
||||
|
||||
|
||||
# In[4]:
|
||||
|
||||
|
||||
n_folds = 5
|
||||
score_name = "accuracy"
|
||||
random_state = 17
|
||||
test_size = 0.3
|
||||
|
||||
|
||||
def validate_classifier(model, X, y, stratified, fit_params):
|
||||
stratified_class = StratifiedKFold if stratified else KFold
|
||||
kfold = stratified_class(
|
||||
shuffle=True, random_state=random_state, n_splits=n_folds
|
||||
)
|
||||
# return cross_validate(model, X, y, cv=kfold, return_estimator=True,
|
||||
# scoring=score_name)
|
||||
return cross_val_score(model, X, y, fit_params=fit_params)
|
||||
|
||||
|
||||
def split_data(X, y, stratified):
|
||||
if stratified:
|
||||
return train_test_split(
|
||||
X,
|
||||
y,
|
||||
test_size=test_size,
|
||||
random_state=random_state,
|
||||
stratify=y,
|
||||
shuffle=True,
|
||||
)
|
||||
else:
|
||||
return train_test_split(
|
||||
X, y, test_size=test_size, random_state=random_state, shuffle=True
|
||||
)
|
||||
|
||||
|
||||
# In[5]:
|
||||
|
||||
|
||||
warnings.filterwarnings("ignore")
|
||||
for simple_init in [False, True]:
|
||||
model = TAN(simple_init=simple_init)
|
||||
for head in range(4):
|
||||
X_train, X_test, y_train, y_test = split_data(X, y, stratified=False)
|
||||
model.fit(
|
||||
X_train,
|
||||
y_train,
|
||||
head=head,
|
||||
features=features,
|
||||
class_name=class_name,
|
||||
)
|
||||
y = model.predict(X_test)
|
||||
model.plot()
|
||||
|
||||
# In[ ]:
|
||||
|
||||
|
||||
model = TAN(simple_init=simple_init)
|
||||
model.fit(X, y, features=features, class_name=class_name)
|
||||
model.plot(
|
||||
f"**simple_init={simple_init} head={head} score={model.score(X, y)}"
|
||||
)
|
Reference in New Issue
Block a user