mirror of
https://github.com/Doctorado-ML/STree.git
synced 2025-08-15 15:36:00 +00:00
Add C param in constructor and creditcard dataset
This commit is contained in:
44
main.py
44
main.py
@@ -5,8 +5,42 @@ random_state = 1
|
||||
X, y = make_classification(n_samples=1500, n_features=3, n_informative=3,
|
||||
n_redundant=0, n_repeated=0, n_classes=2, n_clusters_per_class=2,
|
||||
class_sep=1.5, flip_y=0, weights=[0.5, 0.5], random_state=random_state)
|
||||
model = Stree(random_state=random_state)
|
||||
model.fit(X, y)
|
||||
print(model)
|
||||
model.save_sub_datasets()
|
||||
print(f"Prediciting [{y[0]}] we have {model.predict(X[0, :].reshape(-1, X.shape[1]))}")
|
||||
|
||||
def load_creditcard(n_examples=0):
|
||||
import pandas as pd
|
||||
import numpy as np
|
||||
import random
|
||||
df = pd.read_csv('data/creditcard.csv')
|
||||
print("Fraud: {0:.3f}% {1}".format(df.Class[df.Class == 1].count()*100/df.shape[0], df.Class[df.Class == 1].count()))
|
||||
print("Valid: {0:.3f}% {1}".format(df.Class[df.Class == 0].count()*100/df.shape[0], df.Class[df.Class == 0].count()))
|
||||
y = np.expand_dims(df.Class.values, axis=1)
|
||||
X = df.drop(['Class', 'Time', 'Amount'], axis=1).values
|
||||
#Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=random_state, stratify=y)
|
||||
#return Xtrain, Xtest, ytrain, ytest
|
||||
if n_examples > 0:
|
||||
# Take first n_examples samples
|
||||
X = X[:n_examples, :]
|
||||
y = y[:n_examples, :]
|
||||
else:
|
||||
# Take all the positive samples with a number of random negatives
|
||||
if n_examples < 0:
|
||||
Xt = X[(y == 1).ravel()]
|
||||
yt = y[(y == 1).ravel()]
|
||||
indices = random.sample(range(X.shape[0]), -1 * n_examples)
|
||||
X = np.append(Xt, X[indices], axis=0)
|
||||
y = np.append(yt, y[indices], axis=0)
|
||||
print("X.shape", X.shape, " y.shape", y.shape)
|
||||
print("Fraud: {0:.3f}% {1}".format(len(y[y == 1])*100/X.shape[0], len(y[y == 1])))
|
||||
print("Valid: {0:.3f}% {1}".format(len(y[y == 0])*100/X.shape[0], len(y[y == 0])))
|
||||
return X, y
|
||||
#X, y = load_creditcard(-5000)
|
||||
#X, y = load_creditcard(0)
|
||||
|
||||
clf = Stree(C=.01, max_iter=100, random_state=random_state)
|
||||
clf.fit(X, y)
|
||||
print(clf)
|
||||
clf.show_tree()
|
||||
clf.save_sub_datasets()
|
||||
print(f"Predicting {y[0]} we have {clf.predict(X[0, :].reshape(-1, X.shape[1]))}")
|
||||
print(f"Classifier's accuracy: {clf.score(X, y, print_out=False):.4f}")
|
||||
clf.show_tree(only_leaves=True)
|
||||
|
Reference in New Issue
Block a user