mirror of
https://github.com/Doctorado-ML/bayesclass.git
synced 2025-08-17 00:26:10 +00:00
112 lines
2.4 KiB
Python
112 lines
2.4 KiB
Python
#!/usr/bin/env python
|
|
# coding: utf-8
|
|
|
|
# In[1]:
|
|
|
|
|
|
from mdlp import MDLP
|
|
import pandas as pd
|
|
from benchmark import Datasets
|
|
from bayesclass import TAN
|
|
from sklearn.model_selection import (
|
|
cross_validate,
|
|
StratifiedKFold,
|
|
KFold,
|
|
cross_val_score,
|
|
train_test_split,
|
|
)
|
|
import numpy as np
|
|
import warnings
|
|
from stree import Stree
|
|
|
|
# In[2]:
|
|
|
|
|
|
# Get data as a dataset
|
|
dt = Datasets()
|
|
data = dt.load("glass", dataframe=True)
|
|
features = dt.dataset.features
|
|
class_name = dt.dataset.class_name
|
|
factorization, class_factors = pd.factorize(data[class_name])
|
|
data[class_name] = factorization
|
|
data.head()
|
|
|
|
|
|
# In[3]:
|
|
|
|
|
|
# Fayyad Irani
|
|
discretiz = MDLP()
|
|
Xdisc = discretiz.fit_transform(
|
|
data[features].to_numpy(), data[class_name].to_numpy()
|
|
)
|
|
features_discretized = pd.DataFrame(Xdisc, columns=features)
|
|
dataset_discretized = features_discretized.copy()
|
|
dataset_discretized[class_name] = data[class_name]
|
|
X = dataset_discretized[features]
|
|
y = dataset_discretized[class_name]
|
|
dataset_discretized
|
|
|
|
|
|
# In[4]:
|
|
|
|
|
|
n_folds = 5
|
|
score_name = "accuracy"
|
|
random_state = 17
|
|
test_size = 0.3
|
|
|
|
|
|
def validate_classifier(model, X, y, stratified, fit_params):
|
|
stratified_class = StratifiedKFold if stratified else KFold
|
|
kfold = stratified_class(
|
|
shuffle=True, random_state=random_state, n_splits=n_folds
|
|
)
|
|
# return cross_validate(model, X, y, cv=kfold, return_estimator=True,
|
|
# scoring=score_name)
|
|
return cross_val_score(model, X, y, fit_params=fit_params)
|
|
|
|
|
|
def split_data(X, y, stratified):
|
|
if stratified:
|
|
return train_test_split(
|
|
X,
|
|
y,
|
|
test_size=test_size,
|
|
random_state=random_state,
|
|
stratify=y,
|
|
shuffle=True,
|
|
)
|
|
else:
|
|
return train_test_split(
|
|
X, y, test_size=test_size, random_state=random_state, shuffle=True
|
|
)
|
|
|
|
|
|
# In[5]:
|
|
|
|
|
|
warnings.filterwarnings("ignore")
|
|
for simple_init in [False, True]:
|
|
model = TAN(simple_init=simple_init)
|
|
for head in range(4):
|
|
X_train, X_test, y_train, y_test = split_data(X, y, stratified=False)
|
|
model.fit(
|
|
X_train,
|
|
y_train,
|
|
head=head,
|
|
features=features,
|
|
class_name=class_name,
|
|
)
|
|
y = model.predict(X_test)
|
|
model.plot()
|
|
|
|
# In[ ]:
|
|
|
|
|
|
model = TAN(simple_init=simple_init)
|
|
model.fit(X, y, features=features, class_name=class_name)
|
|
model.plot(
|
|
f"**simple_init={simple_init} head={head} score={model.score(X, y)}"
|
|
)
|