First commit

2025-08-16 16:05:56 +00:00 · 2020-05-24 18:47:27 +02:00
parent cc23dddc1b
commit d13081765a
9 changed files with 1184 additions and 2 deletions
--- a/README.md
+++ b/README.md
@@ -1,2 +1,35 @@
-# NeuralNetwork
+# N_Network
-Neural Network implementation based on the DeepLearning courses in Coursera
+
 Neural Network implementation based on the Andrew Ng courses
 Implements Batch GD, Stochastic GD (minibatch_size=1) & Stochastic minibatch GD:
 - Cost function: Cross Entropy Loss
 - Activation functions: relu, sigmoid, tanh
 - Regularization: l2 (lambd), Momentum (beta), Dropout (keep_prob)
 - Optimization: Minibatch Gradient Descent, RMS Prop, Adam
 - Learning rate decay, computes a factor of the learning rate at each # of epochs
 - Fair minibatches: Can create batches with the same proportion of labels 1/0 as in train data
 Restriction:
 - Multiclass only with onehot label
 ## Install
 ```bash
 pip install git+https://github.com/doctorado-ml/NeuralNetwork
 ```
 ## Example
 #### Console
 ```bash
 python main.py
 ```
 #### Jupyter Notebook
 [![Test](https://colab.research.google.com/assets/colab-badge.svg)](https://colab.research.google.com/github/Doctorado-ML/NeuralNetwork/blob/master/test.ipynb) Test notebook
--- a/main.py
+++ b/main.py
@@ -0,0 +1,54 @@
 import numpy as np
 import matplotlib.pyplot as plt
 import time
 from n_network import N_Network, plot_decision_boundary
 def load_planar_dataset(random_seed):
    np.random.seed(random_seed)
    m = 400 # number of examples
    N = int(m / 2) # number of points per class
    D = 2 # dimensionality
    X = np.zeros((m,D)) # data matrix where each row is a single example
    Y = np.zeros((m, 1), dtype='uint8') # labels vector (0 for red, 1 for blue)
    a = 4 # maximum ray of the flower
    for j in range(2):
        ix = range(N * j, N * (j + 1))
        t = np.linspace(j * 3.12, (j + 1) * 3.12, N) + np.random.randn(N) * 0.2 # theta
        r = a * np.sin(4 * t) + np.random.randn(N) * 0.2 # radius
        X[ix] = np.c_[r * np.sin(t), r * np.cos(t)]
        Y[ix] = j
    X = X.T
    Y = Y.T
    return X, Y
 random_seed = 1
 Xtrain, ytrain = load_planar_dataset(random_seed)
 X = Xtrain.T
 y = ytrain.T
 print('X', X.shape, 'y', y.shape)
 # Visualize the data:
 plt.scatter(X[:, 0], X[:, 1], c=y.T[0], s=40, cmap=plt.cm.Spectral);
 plt.title('Dataset')
 plt.show();
 #Define a four layer network
 nu = [X.shape[1], 10, 7, 5, 1]
 xg = [0, N_Network.relu, N_Network.relu, N_Network.relu, N_Network.sigmoid]
 xgprime = [0, N_Network.relu_prime, N_Network.relu_prime, N_Network.relu_prime, N_Network.sigmoid_prime]
 init_params = dict(m=X.shape[0], n=X.shape[1], n_units=nu, g=xg, optim='sgd',
                   gprime=xgprime, epochs=10000, alpha=0.075)
 nd = N_Network(init_params)
 nd.set_seed(random_seed)
 costs = nd.train(X, y)
 print("First cost: {0:.6f} final cost: {1:.6f}".format(costs[0], costs[-1]))
 print("Number of units in each layer: ", nu)
 nd.print_time()
 nd.plot_costs()
 pred = nd.valid(X, y)
 indices = nd.mislabeled(y, pred)
 # Plot decission boundary
 plot_decision_boundary(nd, X, y, True, '4 Layers N_Network')
--- a/n_network/Metrics.py
+++ b/n_network/Metrics.py
@@ -0,0 +1,220 @@
 '''
 __author__ = "Ricardo Montañana Gómez"
 __copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
 __license__ = "MIT"
 Compute metrics for predicted data
 '''
 import numpy as np
 from .Utils import one_hot
 class Metrics:
    """
    True Positives (tp), These are the correctly predicted positive values
    True Negatives (tn), These are the correctly predicted negative values
    False Positives (fp), When actual class is target and predicted class is the other
    False Negatives (fn), When actual class is reverse of target but predicted class is target
    """
    _truth = None
    _predicted = None
    _tp = None
    _fp = None
    _fn = None
    _num_classes = 0
    def __init__(self, y=None, yhat=None):
        self._truth = self._adapt(y, update_num=True)
        self._predicted = self._adapt(yhat)
        self._compute_parameters()
    def _adapt(self, data, update_num=False):
        if data.max() > 1 or data.ndim == 1 or (data.ndim == 2 and data.shape[1] == 1):
            if update_num:
                self._num_classes = data.max() + 1
            return data
        else:
            if update_num:
                res = np.argmax(data, axis=1)
                self._num_classes = res.max() + 1
            return res
    def _compute_param(self, set_a, set_b):
        return np.sum(np.logical_and(set_a, set_b))
    def _compute_parameters(self):
        self._tp = np.zeros((self._num_classes), dtype=int)
        self._fp = np.zeros((self._num_classes), dtype=int)
        self._fn = np.zeros((self._num_classes), dtype=int)
        for target in range(self._num_classes):
            self._tp[target] = self._compute_param(
                self._truth == target, self._predicted == target)
            self._fp[target] = self._compute_param(
                self._truth != target, self._predicted == target)
            self._fn[target] = self._compute_param(
                self._truth == target, self._predicted != target)
    def parameters(self):
        vmacro, vweigh, _, vmicro = self._compute_metrics()
        return dict(tp=self._tp, fp=self._fp, fn=self._fn, macro=vmacro, weigh=vweigh, micro=vmicro)
    def sets(self):
        return self._truth, self._predicted
    def fp_indices(self, target):
        return np.where(np.logical_and(self._truth != target, self._predicted == target))[0]
    def fn_indices(self, target):
        return np.where(np.logical_and(self._truth == target, self._predicted != target))[0]
    def correct(self):
        """
        Return the number of correct predictions
        """
        return np.sum(self._tp)
    def _get_dict(self, vmacro, vweigh, vmicro):
        return dict(macro=vmacro, weigh=vweigh, micro=vmicro)
    def recall(self, target):
        """
        recall, Recall is the ratio of correctly predicted positive observations to the all observations in positive class
        """
        if target == 'all':
            macro, weigh, _, micro = self._compute_metrics()
            return self._get_dict(macro['rec'], weigh['rec'], micro['rec'])
        else:
            tp = self._tp[target]
            fn = self._fn[target]
            if (tp + fn) > 0:
                return tp / (tp + fn)
            return 0
    def precision(self, target):
        """
        precision, Precision is the ratio of correctly predicted positive observations to the total predicted positive observations
        """
        if target == 'all':
            macro, weigh, _, micro = self._compute_metrics()
            return self._get_dict(macro['prec'], weigh['prec'], micro['prec'])
        else:
            tp = self._tp[target]
            fp = self._fp[target]
            if (tp + fp) > 0:
                return tp / (tp + fp)
            return 0
    def accuracy(self):
        """
        accuracy, Accuracy is a ratio of correctly predicted observations to the total observations
        """
        tp = np.sum(self._tp)
        elements = self._truth.size
        if (elements) > 0:
            return tp / elements
        return 0
    def f1(self, target):
        """
        f1 score, is the weighted average of Precision and Recall
        """
        if target == 'all':
            macro, weigh, _, micro = self._compute_metrics()
            return self._get_dict(macro['f1'], weigh['f1'], micro['f1'])
        else:
            divider = self.recall(target) + self.precision(target)
            if divider != 0:
                return 2 * (self.recall(target) * self.precision(target)) / divider
            return 0
    def confusion_matrix(self):
        """
        Return the confusion matrix associated to the data provided
        """
        result = np.zeros((self._num_classes, self._num_classes), dtype=int)
        for target in reversed(range(self._num_classes)):
            for j in range(self._num_classes):
                result[target][j] = self._compute_param(
                    self._truth == target, self._predicted == j)
        return result
    def debug(self):
        for target in range(self._num_classes):
            tp = self._tp[target]
            fp = self._fp[target]
            fn = self._fn[target]
            print("target=[{0}], tp=[{1}], fp=[{2}], fn=[{3}]".format(
                target, tp, fp, fn))
            print("Truth shape=", self._truth.shape,
                  " Prediction shape=", self._predicted.shape)
            print("Number of classes:", self._num_classes)
    def _compute_micro_metrics(self):
        ttp = np.sum(self._tp)
        tfp = np.sum(self._fp)
        pr = re = ttp / (ttp + tfp)
        if ttp + tfp == 0:
            return 0
        return 2 * (pr * re) / (pr + re), pr, re
    def _compute_metrics(self):
        tf1 = tpr = tre = 0.0
        twf1 = twpr = twre = 0.0
        total_samples = 0
        for target in range(self._num_classes):
            f1 = self.f1(target)
            pr = self.precision(target)
            re = self.recall(target)
            num_samples = len(np.where(self._truth == target)[0])
            tf1 += f1
            tpr += pr
            tre += re
            twf1 += f1 * num_samples
            twpr += pr * num_samples
            twre += re * num_samples
            total_samples += num_samples
        tf1 /= self._num_classes
        tpr /= self._num_classes
        tre /= self._num_classes
        twf1 /= total_samples
        twpr /= total_samples
        twre /= total_samples
        mf1, mpr, mre = self._compute_micro_metrics()
        macro = {}
        weigh = {}
        micro = {}
        macro['f1'] = tf1
        macro['prec'] = tpr
        macro['rec'] = tre
        weigh['f1'] = twf1
        weigh['prec'] = twpr
        weigh['rec'] = twre
        micro['f1'] = mf1
        micro['prec'] = mpr
        micro['rec'] = mre
        return macro, weigh, total_samples, micro
    def classification_report(self, title='', digits=6):
        def format_line(a, b, c, d, e):
            return "[{0:^5}]\t[{1:.{digits}f}]\t[{2:.{digits}f}]\t[{3:.{digits}f}]\t[{4:5d}]".format(a, b, c, d, e, digits=digits)
        print(
            "======================== {0} ========================".format(title))
        header = ['target', 'f1-score', 'precision', 'recall', 'support']
        print("{d[0]:^7}\t{d[1]:^{length}.{length}}\t{d[2]:^{length}.{length}}\t{d[3]:^{length}.{length}}\t{d[4]:^7}".format(
            d=header, length=digits + 4))
        for target in range(self._num_classes):
            f1 = self.f1(target)
            pr = self.precision(target)
            re = self.recall(target)
            num_samples = len(np.where(self._truth == target)[0])
            print(format_line(target, f1, pr, re, num_samples))
        print("")
        macro, weigh, total_samples, micro = self._compute_metrics()
        print(format_line(
            'macro', macro['f1'], macro['prec'], macro['rec'], total_samples))
        print(format_line(
            'weig.', weigh['f1'], weigh['prec'], weigh['rec'], total_samples))
        print("accuracy=[{0:.{digits}f}]".format(
            self.accuracy(), digits=digits))
--- a/n_network/Neural_Network.py
+++ b/n_network/Neural_Network.py
@@ -0,0 +1,540 @@
 '''
 __author__ = "Ricardo Montañana Gómez"
 __copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
 __license__ = "MIT"
 Neural Network implementation based on the Andrew Ng courses
 Implements Batch GD, Stochastic GD (minibatch_size=1) & Stochastic minibatch GD:
 -Cost function: Cross Entropy Loss
 -Activation functions: relu, sigmoid, tanh
 -Regularization: l2 (lambd), Momentum (beta), Dropout (keep_prob)
 -Optimization: Minibatch Gradient Descent, RMS Prop, Adam
 -Learning rate decay, computes a factor of the learning rate at each # of epochs
 -Fair minibatches: Can create batches with the same proportion of labels 1/0 as in train data
 Restriction:
 -Multiclass only with onehot label
 '''
 import time
 import math
 import pickle
 import numpy as np
 import matplotlib.pyplot as plt
 import seaborn as sns
 from .Metrics import Metrics
 # Cost function (Cross-entropy):
 # Compute the cross-entropy cost $J$
 # $$ J = -\frac{1}{m} \sum\limits_{i = 1}^{m} (y^{(i)}\log\left(a^{[L] (i)}\right) + (1 - y^{(i)})\log\left(1 - a^{[L](i)}\right)) \tag{7}$$
 class N_Network:
    def __init__(self, hyperparam):
        # NN State
        self._ct = 0  # Time inverted in computation
        self._optim = {}  # Update parameters functions depending on the optimization algorithm
        self._optim_update = None  # update function selected
        self._optim_selected = ''
        self._multiclass = False  # Is it a multiclass classification problem?
        self._epochs_decay = ()  # (decay rate, applied each # epochs)
        self._verbose = False
        # Hyperparams
        self._L = 0  # Number of layers including the input layer
        self._n_units = []  # Number of units in each layer
        self._g = []  # Activation functions of each layer
        self._gprime = []  # Derivative of the activation functions needed in backpropagation
        self._alpha = 0  # Learning rate in gradient descent
        self._beta = 0  # Momentum coefficient / acts as beta1 in adam
        self._beta2 = 0.999  # RMS Prop coefficient
        self._epsilon = 1e-8  # RMS Prop value to prevent division by zero
        self._params = {}  # dict of parameters
        self._epochs = 0  # Number of iterations to train
        self._seed = 2020  # Random seed
        self._lambd = 0  # Regularization coefficient
        self._keep_prob = 1  # dropout regularization
        self._minibatch_size = 0  # Number of samples to take into account to upgrade parameters
        self._fair_minibatches = False  # Wether or not create fair minibatches
        if 'filename' in hyperparam:
            self.load(hyperparam['filename'])
            return
        self._m = hyperparam['m']
        self._n = hyperparam['n']
        self._n_units = hyperparam['n_units']
        self._g = hyperparam['g']
        self._gprime = hyperparam['gprime']
        self._alpha = hyperparam['alpha']
        self._learning_rate = self._alpha
        self._epochs = hyperparam['epochs']
        self._L = len(self._n_units)
        # ensures that at most, only one regularization method is chosen
        if 'lambd' in hyperparam:
            self._lambd = hyperparam['lambd']
        else:
            if 'keep_prob' in hyperparam:
                self._keep_prob = hyperparam['keep_prob']
        if 'minibatch_size' in hyperparam:
            self._minibatch_size = hyperparam['minibatch_size']
        else:
            self._minibatch_size = self._m
        if 'fair_minibatches' in hyperparam:
            self._fair_minibatches = hyperparam['fair_minibatches']
        optim = {
            'adam': self._update_parameters_adam,
            'sgd': self._update_parameters_sgd,
            'rms': self._update_parameters_rms
        }
        self._optim_selected = hyperparam['optim']
        self._optim_update = optim[self._optim_selected]
        if hyperparam['optim'] != 'sgd':
            self._beta = 0.9  # if opt. algorithm is rms or adam set default beta/beta1
        if 'beta' in hyperparam:
            self._beta = hyperparam['beta']
        np.random.seed(self._seed)
        if 'multiclass' in hyperparam:
            self._multiclass = hyperparam['multiclass']
        if 'epochs_decay' in hyperparam:
            self._epochs_decay = hyperparam['epochs_decay']
        self.initialize()
    # Activation functions
    @staticmethod
    def softmax(x):  # stable softmax
        exps = np.exp(x - np.max(x))
        return exps / exps.sum(axis=0, keepdims=True)
    @staticmethod
    def softmax_prime(x):
        return 1
    @staticmethod
    def relu(x):
        return np.maximum(0, x)
    @staticmethod
    def sigmoid(x):
        return 1 / (1 + np.exp(-x))
    @staticmethod
    def tanh(x):
        return np.tanh(x)
    @staticmethod
    def sigmoid_prime(x):
        s = N_Network.sigmoid(x)
        return s * (1 - s)
    @staticmethod
    def relu_prime(x):
        return np.greater(x, 0).astype(int)
    @staticmethod
    def tanh_prime(x):
        z = N_Network.tanh(x)
        return 1 - z * z
    def initialize(self):
        # Initialize dictionaries of Parameters
        b = {}
        W = {}
        Z = {}
        A = {}
        dZ = {}
        dW = {}
        db = {}
        vdW = {}
        vdb = {}
        SdW = {}
        Sdb = {}
        for i in range(self._L):
            if self._verbose:
                print("Initializing %d layer..." % i)
            # Help ease the vanishing / Exploding gradient problem
            cte = 0.01
            if self._g[i] == self.relu:
                # Make Var(W) = 2 / n
                cte = np.sqrt(2 / self._n_units[i - 1])
            else:
                # based on Xavier initialization makes var(W) = 1 / n
                if self._g[i] == self.tanh:
                    cte = 1 / np.sqrt(self._n_units[i - 1])
                else:
                    # makes var(W) = 2 / n
                    if self._g[i] == self.sigmoid:
                        prev_layer = (i - 1) if i > 0 else 0
                        cte = np.sqrt(
                            2 / (self._n_units[prev_layer] + self._n_units[i]))
            # Don't need W and b and its optimizers for the input layer
            if i > 0:
                W[i] = np.random.randn(
                    self._n_units[i], self._n_units[i - 1]) * cte
                b[i] = np.zeros((self._n_units[i], 1))
                dW[i] = np.zeros(
                    (self._n_units[i], self._n_units[i - 1] if i > 0 else self._minibatch_size))
                db[i] = np.zeros((self._n_units[i], 1))
                vdW[i] = np.zeros(
                    (self._n_units[i], self._n_units[i - 1] if i > 0 else self._minibatch_size))
                vdb[i] = np.zeros((self._n_units[i], 1))
                SdW[i] = np.zeros(
                    (self._n_units[i], self._n_units[i - 1] if i > 0 else self._minibatch_size))
                Sdb[i] = np.zeros((self._n_units[i], 1))
            A[i] = np.zeros(
                (self._n_units[i], self._minibatch_size if i < self._L else 1))
            Z[i] = np.zeros(
                (self._n_units[i], self._minibatch_size if i < self._L else 1))
            dZ[i] = np.zeros((self._n_units[i], self._minibatch_size))
        self._params = dict(b=b, W=W, Z=Z, A=A, dZ=dZ, dW=dW,
                            db=db, vdW=vdW, vdb=vdb, SdW=SdW, Sdb=Sdb)
    def get_accuracy(self, y, ypred, direct_result=False):
        m = y.shape[0]
        met = Metrics(y, ypred)
        ac = met.accuracy()
        right = met.correct()
        if direct_result:
            return ac
        return "Accuracy: {0:.3f}% ({1} of {2})".format(100 * ac, right, m)
    def get_metrics(self, y, ypred):
        return Metrics(y, ypred)
    def plot_costs(self):
        plt.plot(self._costs)
        plt.ylabel('Cost (cross-entropy)')
        plt.xlabel('Epochs')
        plt.title("Epochs: {0} Learning rate: {1}".format(
            self._epochs, self._learning_rate))
        plt.show()
    def plot_confusion_matrix(self, y, yhat, title='', figsize=(10, 7), scale=1.4):
        cm = Metrics(y, yhat).confusion_matrix()
        plt.figure(figsize=figsize)
        sns.set(font_scale=scale)
        fig = sns.heatmap(cm, annot=True, fmt='d', cmap="Blues", cbar=False)
        x = fig.set_title("{0} ({1}) / {2}". format(title,
                                                    self._optim_selected, self.get_accuracy(y, yhat)))
        x = fig.set_xlabel('Predicted')
        x = fig.set_ylabel('Truth')
        # fig.invert_yaxis()
    def check_dimensions(self):
        for i in range(self._L):
            print("i={0}, b({1}, W{2}, A{3}, Z{4}, vdW{5}, vdb{6}, SdW{7}, Sdb{8}, dW{9}, db{10}\n".format(
                i, self._params['b'][i].shape if i > 0 else ' XXX',
                self._params['W'][i].shape if i > 0 else ' XXX',
                self._params['A'][i].shape,
                self._params['Z'][i].shape,
                self._params['vdW'][i].shape if i > 0 else ' XXX',
                self._params['vdb'][i].shape if i > 0 else ' XXX',
                self._params['SdW'][i].shape if i > 0 else ' XXX',
                self._params['Sdb'][i].shape if i > 0 else ' XXX',
                self._params['dW'][i].shape if i > 0 else ' XXX',
                self._params['db'][i].shape if i > 0 else ' XXX'
            ))
    def get_params(self):
        return self._params
    def num_minibatches(self):
        return math.floor(self._m / self._minibatch_size) + (0 if self._m % self._minibatch_size == 0 else 1)
    def create_minibatches(self, X, y):
        return self.create_fair_minibatches(X, y) if self._fair_minibatches else self.create_random_minibatches(X, y)
    def _balance_sets(self, y):
        """
        Returns:
        class0: category 0 indexes
        class1: category 1 indexes 
        num0: number of samples of 0 category to include in the minibatch
        num1: number of samples of 1 category to include in the minibatch
        """
        class_one = np.array(np.where(y == 1))[0]
        class_zero = np.array(np.where(y == 0))[0]
        percent = len(class_one) / len(y)
        num_class0 = math.floor((1 - percent) * self._minibatch_size)
        num_class1 = self._minibatch_size - num_class0
        return num_class0, num_class1, class_zero, class_one
    def create_fair_minibatches(self, X, y):
        """
        Creates a list of random minibatches from (X, y) 
        """
        mini_batches = []
        num_zero, num_one, class_zero, class_one = self._balance_sets(y)
        # Compute categorized shuffled sets
        X0 = X[class_zero]
        X1 = X[class_one]
        y0 = y[class_zero]
        y1 = y[class_one]
        permutation0 = list(np.random.permutation(len(class_zero)))
        permutation1 = list(np.random.permutation(len(class_one)))
        shuffledX0 = X0[permutation0, :]
        shuffledX1 = X1[permutation1, :]
        shuffledY0 = y0[permutation0, :]
        shuffledY1 = y1[permutation1, :]
        size = self._minibatch_size
        num = math.floor(self._m / size)
        for k in range(num):
            # Inserts the category 0 elements to mini batch
            miniX = shuffledX0[k * num_zero:(k + 1) * num_zero, :]
            miniY = shuffledY0[k * num_zero:(k + 1) * num_zero, :]
            # Appends the cateogory 1 elements to mini batch
            miniX = np.vstack((miniX, X1[k * num_one:(k + 1) * num_one, :]))
            miniY = np.vstack((miniY, y1[k * num_one:(k + 1) * num_one, :]))
            mini_batch = (miniX, miniY)
            mini_batches.append(mini_batch)
        if self._m % num != 0:
            miniX = shuffledX0[num * num_zero:y0.shape[0], :]
            miniY = shuffledY0[num * num_zero:y0.shape[0], :]
            miniX = np.vstack((miniX, X1[num * num_one:y1.shape[0], :]))
            miniY = np.vstack((miniY, y1[num * num_one:y1.shape[0], :]))
            mini_batch = (miniX, miniY)
            mini_batches.append(mini_batch)
        return mini_batches
    def create_random_minibatches(self, X, y):
        """
        Creates a list of random minibatches from (X, y) 
        """
        mini_batches = []
        permutation = list(np.random.permutation(self._m))
        shuffledX = X[permutation, :]
        shuffledY = y[permutation, :]
        size = self._minibatch_size
        num = math.floor(self._m / size)
        for k in range(num):
            miniX = shuffledX[k * size:(k + 1) * size, :]
            miniY = shuffledY[k * size:(k + 1) * size, :]
            mini_batch = (miniX, miniY)
            mini_batches.append(mini_batch)
        if self._m % size != 0:
            miniX = shuffledX[num * size:self._m, :]
            miniY = shuffledY[num * size:self._m, :]
            mini_batch = (miniX, miniY)
            mini_batches.append(mini_batch)
        return mini_batches
    def _compute_Sd(self, i):
        self._params['SdW'][i] = self._beta2 * self._params['SdW'][i] + \
            (1 - self._beta2) * np.square(self._params['dW'][i])
        self._params['Sdb'][i] = self._beta2 * self._params['Sdb'][i] + \
            (1 - self._beta2) * np.square(self._params['db'][i])
        return self._params['SdW'][i], self._params['Sdb'][i]
    def _compute_vd(self, i):
        self._params['vdW'][i] = self._beta * self._params['vdW'][i] + \
            (1 - self._beta) * self._params['dW'][i]
        self._params['vdb'][i] = self._beta * self._params['vdb'][i] + \
            (1 - self._beta) * self._params['db'][i]
        return self._params['vdW'][i], self._params['vdb'][i]
    def _update_parameters_rms(self, t):
        for i in range(1, self._L):
            SdW, Sdb = self._compute_Sd(i)
            dW = self._params['dW'][i]
            db = self._params['db'][i]
            self._params['W'][i] -= self._alpha * \
                dW / (np.sqrt(SdW) + self._epsilon)
            self._params['b'][i] -= self._alpha * \
                db / (np.sqrt(Sdb) + self._epsilon)
    def _update_parameters_adam(self, t):
        for i in range(1, self._L):
            vdW, vdb = self._compute_vd(i)
            SdW, Sdb = self._compute_Sd(i)
            vdW_corr = vdW / (1 - math.pow(self._beta, 2))
            vdb_corr = vdb / (1 - math.pow(self._beta, 2))
            SdW_corr = SdW / (1 - math.pow(self._beta2, t))
            Sdb_corr = Sdb / (1 - math.pow(self._beta2, t))
            self._params['W'][i] -= self._alpha * \
                vdW_corr / (np.sqrt(SdW_corr) + self._epsilon)
            self._params['b'][i] -= self._alpha * \
                vdb_corr / (np.sqrt(Sdb_corr) + self._epsilon)
    def _update_parameters_sgd(self, t):
        for i in range(1, self._L):
            vdW, vdb = self._compute_vd(i)
            self._params['W'][i] -= self._alpha * vdW
            self._params['b'][i] -= self._alpha * vdb
    def set_verbose(self, verbose):
        self._verbose = verbose
    def set_seed(self, seed):
        self._seed = seed
        np.random.seed(self._seed)
    def _cost_function(self, yhat, y):
        """
        Compute cost (cross-entropy) of prediction
        yhat: vector of predictions, shape (number of examples, 1)
        Y:  vector of labels, shape (number of examples, 1)
        Returns: cost
        """
        if self._multiclass:
            cost = -np.mean(y * np.log(yhat + self._epsilon))
        else:
            cost = -np.sum(np.nansum(y * np.log(yhat) + (1 - y)
                                     * np.log(1 - yhat))) / self._minibatch_size
        # Add regularization term
        cost += self._lambd / (2 * self._minibatch_size) * \
            np.sum([np.sum(np.square(x)) for x in self._params['W']])
        assert(cost.shape == ())
        return cost
    def _get_prediction(self, transform=False):
        res = self._get_AL().T
        if transform:
            if self._multiclass:
                return np.argmax(res, axis=1)
            else:
                return np.round(res).astype(int)
        return res
    def _get_AL(self):
        return self._params['A'][self._L - 1]
    def _backward_propagation(self, y):
        AL = self._get_AL()
        Y = y.T
        assert(Y.shape == AL.shape)
        if self._multiclass:
            dA = AL - Y
        else:
            # derivative of cost with respect to A[L]
            dA = np.nan_to_num(-(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL)))
        for i in reversed(range(1, self._L)):
            dZ = dA * self._gprime[i](self._params['Z'][i])
            dW = dZ.dot(self._params['A'][i - 1].T) / self._minibatch_size + \
                (self._lambd / self._minibatch_size) * self._params['W'][i]
            db = np.sum(dZ, axis=1, keepdims=True) / self._minibatch_size
            dA = self._params['W'][i].T.dot(dZ)
            self._params['dW'][i] = dW
            self._params['db'][i] = db
    def train(self, X, y):
        return self.fit(X, y)
    def fit(self, X, y):
        self._costs = []
        tic = time.time()
        if self._verbose:
            print('Training neural net...{0} epochs with {1} minibatches'.format(
                self._epochs, self.num_minibatches()))
        divider = 1 if self._epochs < 100 else 100
        t = 0
        for e in range(self._epochs):
            minibatches = self.create_minibatches(X, y)
            cost_total = 0
            for minibatch in minibatches:
                Xt, yt = minibatch
                self._forward_propagation(Xt, train=True)
                # Compute gradient descent
                self._backward_propagation(yt)
                t += 1  # Only used in adam
                self._optim_update(t)
                cost_total += self._cost_function(self._get_prediction(), yt)
            cost_avg = cost_total / self.num_minibatches()
            self._costs.append(cost_avg)
            if e % divider == 0 and self._verbose:
                print("Epoch: {0} Cost {1:.8f}".format(e, cost_avg))
            if self._epochs_decay != ():
                (rate, number) = self._epochs_decay
                if e > 0 and e % number == 0:
                    self._alpha *= rate
                    if self._verbose:
                        print(
                            "*Setting learning rate (alpha) to: {0}".format(self._alpha))
        self._ct = time.time() - tic
        self._alpha = self._learning_rate
        if self._verbose:
            self.print_time()
        return self._costs
    def print_time(self):
        print("Elapsed time: {0:.2f} s".format(self._ct))
    def _forward_propagation(self, X, train=False):
        self._params['A'][0] = X.T
        for i in range(1, self._L):
            if train and self._keep_prob != 1:
                d = np.random.rand(*self._params['A'][i].shape)
                d = (d < self._keep_prob).astype(int)
                '''
                 divide by self._keep_prob is done to keep the same behavior of the neuron in training with dropout and in
                 testing without dropout. "This is important because at test time all neurons see all their inputs, 
                 so we want the outputs of neurons at test time to be identical to their expected outputs at training time"
                 (Stanford CS231n Convolutional Neural Networks for Visual Recognition)
                '''
                self._params['A'][i] = (
                    self._params['A'][i] * d) / self._keep_prob  # inverted dropout
            self._params['Z'][i] = self._params['W'][i].dot(
                self._params['A'][i - 1]) + self._params['b'][i]
            self._params['A'][i] = self._g[i](self._params['Z'][i])
        prediction = self._get_AL()
    def predict(self, X):
        self._forward_propagation(X, train=False)
        if self._multiclass:
            yhat = np.argmax(self._get_prediction(False), axis=1)
        else:
            yhat = self._get_prediction(transform=True)
        return yhat
    def predict_proba(self, X):
        self._forward_propagation(X, train=False)
        return self._get_prediction(transform=False)
    def evaluate(self, X, y, transform=True):
        return self.valid(X, y, transform)
    def valid(self, X, y, transform=True, score=False):
        if X.shape[0] != y.shape[0]:
            print('Dimension error X, y', X.shape, y.shape)
        yhat = self.predict(X)
        ypred = self._get_prediction(transform=True)
        if score:
            return self.get_accuracy(y, ypred, direct_result=True)
        print(self.get_accuracy(y, ypred))
        return yhat
    def score(self, X, y):
        return self.valid(X, y, score=True)
    def mislabeled(self, y, ypred, target=1):
        return Metrics(y, ypred).fn_indices(target)
    def save(self, name=''):
        try:
            filename = "{0}.nn".format(name)
            f = open(filename, 'wb')
            pickle.dump(self.__dict__, f, 2)
            f.close()
        except:
            print("I couldn't write the file ", filename)
            return False
        return True
    def load(self, filename):
        try:
            f = open(filename, 'rb')
            tmp_dict = pickle.load(f)
            f.close()
        except:
            print(filename, " doesn't exists or I couldn't open it.")
            return False
        self.__dict__.update(tmp_dict)
        return True
    def compact_state(self):
        return {
            "_m": self._m,
            "_n": self._n
        }
--- a/n_network/Utils.py
+++ b/n_network/Utils.py
@@ -0,0 +1,44 @@
 '''
 __author__ = "Ricardo Montañana Gómez"
 __copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
 __license__ = "MIT"
 Util functions to use with the classifier
 '''
 import numpy as np
 import matplotlib.pyplot as plt
 def one_hot(label, num):
    yht = np.zeros((label.size, num))
    yht[np.arange(label.size), label.T] = 1
    return yht
 def plot_decision_boundary(model, X, y, binary, title):
    y = y.T[0]
    # Set min and max values and give it some padding
    x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1
    y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1
    h = 0.01
    # Generate a grid of points with distance h between them
    xx, yy = np.meshgrid(np.arange(x_min, x_max, h),
                         np.arange(y_min, y_max, h))
    # Predict the function value for the whole grid
    case = np.array(np.c_[xx.ravel(), yy.ravel()])
    if type(model).__name__ == 'N_Network':
        if binary:
            Z = model.predict(case)
        else:
            Z = model.predict_proba(case)
    else:
        Z = model.predict(case)
        Z = np.round(Z) if binary else Z
    Z = Z.reshape(xx.shape)
    # Plot the contour and training examples
    plt.title(title + ' Decision boundary')
    plt.contourf(xx, yy, Z, cmap=plt.cm.Spectral)
    plt.ylabel('x2')
    plt.xlabel('x1')
    plt.scatter(X[:, 0], X[:, 1], c=y, cmap=plt.cm.Spectral)
    plt.show()
--- a/n_network/init.py
+++ b/n_network/init.py
@@ -0,0 +1,3 @@
 from .Neural_Network import N_Network
 from .Metrics import Metrics
 from .Utils import plot_decision_boundary, one_hot
--- a/requirements.txt
+++ b/requirements.txt
@@ -0,0 +1,5 @@
 numpy
 scikit-learn
 matplotlib
 seaborn
 git+https://github.com/doctorado-ml/stree
--- a/setup.py
+++ b/setup.py
@@ -0,0 +1,38 @@
 import setuptools
 __version__ = "1.0rc1"
 __author__ = "Ricardo Montañana Gómez"
 def readme():
    with open('README.md') as f:
        return f.read()
 setuptools.setup(
    name='N_Network',
    version=__version__,
    license='MIT License',
    description='A personal implementation of a Neural Network',
    long_description=readme(),
    long_description_content_type='text/markdown',
    packages=setuptools.find_packages(),
    url='https://github.com/doctorado-ml/neuralnetwork',
    author=__author__,
    author_email='ricardo.montanana@alu.uclm.es',
    keywords='neural_network',
    classifiers=[
        'Development Status :: 4 - Beta',
        'License :: OSI Approved :: MIT License',
        'Programming Language :: Python :: 3.7',
        'Natural Language :: English',
        'Topic :: Scientific/Engineering :: Artificial Intelligence',
        'Intended Audience :: Science/Research'
    ],
    install_requires=[
        'scikit-learn>=0.23.0',
        'numpy',
        'matplotlib',
        'seaborn'
    ],
    zip_safe=False
 )
--- a/test.ipynb
+++ b/test.ipynb