#4 Add code coverage & codacy badge

Add code coverage configuration in codecov Add some tests
2025-08-16 16:06:01 +00:00 · 2020-06-06 03:04:18 +02:00
parent b4816b2995
commit b9f14aec05
14 changed files with 608 additions and 204 deletions
--- a/main.py
+++ b/main.py
@@ -2,17 +2,29 @@ import time
 from sklearn.model_selection import train_test_split
 from stree import Stree

-random_state=1
+random_state = 1
+

 def load_creditcard(n_examples=0):
    import pandas as pd
    import numpy as np
    import random
-    df = pd.read_csv('data/creditcard.csv')
-    print("Fraud: {0:.3f}% {1}".format(df.Class[df.Class == 1].count()*100/df.shape[0], df.Class[df.Class == 1].count()))
-    print("Valid: {0:.3f}% {1}".format(df.Class[df.Class == 0].count()*100/df.shape[0], df.Class[df.Class == 0].count()))
+
+    df = pd.read_csv("data/creditcard.csv")
+    print(
+        "Fraud: {0:.3f}% {1}".format(
+            df.Class[df.Class == 1].count() * 100 / df.shape[0],
+            df.Class[df.Class == 1].count(),
+        )
+    )
+    print(
+        "Valid: {0:.3f}% {1}".format(
+            df.Class[df.Class == 0].count() * 100 / df.shape[0],
+            df.Class[df.Class == 0].count(),
+        )
+    )
    y = np.expand_dims(df.Class.values, axis=1)
-    X = df.drop(['Class', 'Time', 'Amount'], axis=1).values
+    X = df.drop(["Class", "Time", "Amount"], axis=1).values
    if n_examples > 0:
        # Take first n_examples samples
        X = X[:n_examples, :]
@@ -26,14 +38,30 @@ def load_creditcard(n_examples=0):
            X = np.append(Xt, X[indices], axis=0)
            y = np.append(yt, y[indices], axis=0)
    print("X.shape", X.shape, " y.shape", y.shape)
-    print("Fraud: {0:.3f}% {1}".format(len(y[y == 1])*100/X.shape[0], len(y[y == 1])))
-    print("Valid: {0:.3f}% {1}".format(len(y[y == 0]) * 100 / X.shape[0], len(y[y == 0])))
-    Xtrain, Xtest, ytrain, ytest = train_test_split(X, y, train_size=0.7, shuffle=True, random_state=random_state, stratify=y)
+    print(
+        "Fraud: {0:.3f}% {1}".format(
+            len(y[y == 1]) * 100 / X.shape[0], len(y[y == 1])
+        )
+    )
+    print(
+        "Valid: {0:.3f}% {1}".format(
+            len(y[y == 0]) * 100 / X.shape[0], len(y[y == 0])
+        )
+    )
+    Xtrain, Xtest, ytrain, ytest = train_test_split(
+        X,
+        y,
+        train_size=0.7,
+        shuffle=True,
+        random_state=random_state,
+        stratify=y,
+    )
    return Xtrain, Xtest, ytrain, ytest

+
 # data = load_creditcard(-5000) # Take all true samples + 5000 of the others
 # data = load_creditcard(5000)  # Take the first 5000 samples
-data = load_creditcard() # Take all the samples
+data = load_creditcard()  # Take all the samples

 Xtrain = data[0]
 Xtest = data[1]
@@ -41,17 +69,20 @@ ytrain = data[2]
 ytest = data[3]

 now = time.time()
-clf = Stree(C=.01, random_state=random_state)
+clf = Stree(C=0.01, random_state=random_state)
 clf.fit(Xtrain, ytrain)
 print(f"Took {time.time() - now:.2f} seconds to train")
 print(clf)
 print(f"Classifier's accuracy (train): {clf.score(Xtrain, ytrain):.4f}")
 print(f"Classifier's accuracy (test) : {clf.score(Xtest, ytest):.4f}")
 proba = clf.predict_proba(Xtest)
-print("Checking that we have correct probabilities, these are probabilities of sample belonging to class 1")
+print(
+    "Checking that we have correct probabilities, these are probabilities of "
+    "sample belonging to class 1"
+)
 res0 = proba[proba[:, 0] == 0]
 res1 = proba[proba[:, 0] == 1]
 print("++++++++++res0 > .8++++++++++++")
-print(res0[res0[:, 1] > .8])
+print(res0[res0[:, 1] > 0.8])
 print("**********res1 < .4************")
-print(res1[res1[:, 1] < .4])
+print(res1[res1[:, 1] < 0.4])