Files
bayesclass/test.py

112 lines
2.4 KiB
Python

#!/usr/bin/env python
# coding: utf-8
# In[1]:
from mdlp import MDLP
import pandas as pd
from benchmark import Datasets
from bayesclass import TAN
from sklearn.model_selection import (
cross_validate,
StratifiedKFold,
KFold,
cross_val_score,
train_test_split,
)
import numpy as np
import warnings
from stree import Stree
# In[2]:
# Get data as a dataset
dt = Datasets()
data = dt.load("glass", dataframe=True)
features = dt.dataset.features
class_name = dt.dataset.class_name
factorization, class_factors = pd.factorize(data[class_name])
data[class_name] = factorization
data.head()
# In[3]:
# Fayyad Irani
discretiz = MDLP()
Xdisc = discretiz.fit_transform(
data[features].to_numpy(), data[class_name].to_numpy()
)
features_discretized = pd.DataFrame(Xdisc, columns=features)
dataset_discretized = features_discretized.copy()
dataset_discretized[class_name] = data[class_name]
X = dataset_discretized[features]
y = dataset_discretized[class_name]
dataset_discretized
# In[4]:
n_folds = 5
score_name = "accuracy"
random_state = 17
test_size = 0.3
def validate_classifier(model, X, y, stratified, fit_params):
stratified_class = StratifiedKFold if stratified else KFold
kfold = stratified_class(
shuffle=True, random_state=random_state, n_splits=n_folds
)
# return cross_validate(model, X, y, cv=kfold, return_estimator=True,
# scoring=score_name)
return cross_val_score(model, X, y, fit_params=fit_params)
def split_data(X, y, stratified):
if stratified:
return train_test_split(
X,
y,
test_size=test_size,
random_state=random_state,
stratify=y,
shuffle=True,
)
else:
return train_test_split(
X, y, test_size=test_size, random_state=random_state, shuffle=True
)
# In[5]:
warnings.filterwarnings("ignore")
for simple_init in [False, True]:
model = TAN(simple_init=simple_init)
for head in range(4):
X_train, X_test, y_train, y_test = split_data(X, y, stratified=False)
model.fit(
X_train,
y_train,
head=head,
features=features,
class_name=class_name,
)
y = model.predict(X_test)
model.plot()
# In[ ]:
model = TAN(simple_init=simple_init)
model.fit(X, y, features=features, class_name=class_name)
model.plot(
f"**simple_init={simple_init} head={head} score={model.score(X, y)}"
)