#4 Add code coverage & codacy badge

Add code coverage configuration in codecov
Add some tests
This commit is contained in:
2020-06-06 03:04:18 +02:00
parent b4816b2995
commit b9f14aec05
14 changed files with 608 additions and 204 deletions

57
main.py
View File

@@ -2,17 +2,29 @@ import time
from sklearn.model_selection import train_test_split
from stree import Stree
random_state=1
random_state = 1
def load_creditcard(n_examples=0):
import pandas as pd
import numpy as np
import random
df = pd.read_csv('data/creditcard.csv')
print("Fraud: {0:.3f}% {1}".format(df.Class[df.Class == 1].count()*100/df.shape[0], df.Class[df.Class == 1].count()))
print("Valid: {0:.3f}% {1}".format(df.Class[df.Class == 0].count()*100/df.shape[0], df.Class[df.Class == 0].count()))
df = pd.read_csv("data/creditcard.csv")
print(
"Fraud: {0:.3f}% {1}".format(
df.Class[df.Class == 1].count() * 100 / df.shape[0],
df.Class[df.Class == 1].count(),
)
)
print(
"Valid: {0:.3f}% {1}".format(
df.Class[df.Class == 0].count() * 100 / df.shape[0],
df.Class[df.Class == 0].count(),
)
)
y = np.expand_dims(df.Class.values, axis=1)
X = df.drop(['Class', 'Time', 'Amount'], axis=1).values
X = df.drop(["Class", "Time", "Amount"], axis=1).values
if n_examples > 0:
# Take first n_examples samples
X = X[:n_examples, :]
@@ -26,14 +38,30 @@ def load_creditcard(n_examples=0):
X = np.append(Xt, X[indices], axis=0)
y = np.append(yt, y[indices], axis=0)
print("X.shape", X.shape, " y.shape", y.shape)
print("Fraud: {0:.3f}% {1}".format(len(y[y == 1])*100/X.shape[0], len(y[y == 1])))
print("Valid: {0:.3f}% {1}".format(len(y[y == 0]) * 100 / X.shape[0], len(y[y == 0])))
Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=random_state, stratify=y)
print(
"Fraud: {0:.3f}% {1}".format(
len(y[y == 1]) * 100 / X.shape[0], len(y[y == 1])
)
)
print(
"Valid: {0:.3f}% {1}".format(
len(y[y == 0]) * 100 / X.shape[0], len(y[y == 0])
)
)
Xtrain, Xtest, ytrain, ytest = train_test_split(
X,
y,
train_size=0.7,
shuffle=True,
random_state=random_state,
stratify=y,
)
return Xtrain, Xtest, ytrain, ytest
# data = load_creditcard(-5000) # Take all true samples + 5000 of the others
# data = load_creditcard(5000) # Take the first 5000 samples
data = load_creditcard() # Take all the samples
data = load_creditcard() # Take all the samples
Xtrain = data[0]
Xtest = data[1]
@@ -41,17 +69,20 @@ ytrain = data[2]
ytest = data[3]
now = time.time()
clf = Stree(C=.01, random_state=random_state)
clf = Stree(C=0.01, random_state=random_state)
clf.fit(Xtrain, ytrain)
print(f"Took {time.time() - now:.2f} seconds to train")
print(clf)
print(f"Classifier's accuracy (train): {clf.score(Xtrain, ytrain):.4f}")
print(f"Classifier's accuracy (test) : {clf.score(Xtest, ytest):.4f}")
proba = clf.predict_proba(Xtest)
print("Checking that we have correct probabilities, these are probabilities of sample belonging to class 1")
print(
"Checking that we have correct probabilities, these are probabilities of "
"sample belonging to class 1"
)
res0 = proba[proba[:, 0] == 0]
res1 = proba[proba[:, 0] == 1]
print("++++++++++res0 > .8++++++++++++")
print(res0[res0[:, 1] > .8])
print(res0[res0[:, 1] > 0.8])
print("**********res1 < .4************")
print(res1[res1[:, 1] < .4])
print(res1[res1[:, 1] < 0.4])