neural-network/n_network/Metrics.py

'''
__author__ = "Ricardo Montañana Gómez"
__copyright__ = "Copyright 2020, Ricardo Montañana Gómez"
__license__ = "MIT"
Compute metrics for predicted data
'''

import numpy as np
from .Utils import one_hot


class Metrics:
    """
    True Positives (tp), These are the correctly predicted positive values
    True Negatives (tn), These are the correctly predicted negative values
    False Positives (fp), When actual class is target and predicted class is the other
    False Negatives (fn), When actual class is reverse of target but predicted class is target
    """
    _truth = None
    _predicted = None
    _tp = None
    _fp = None
    _fn = None
    _num_classes = 0

    def __init__(self, y=None, yhat=None):
        self._truth = self._adapt(y, update_num=True)
        self._predicted = self._adapt(yhat)
        self._compute_parameters()

    def _adapt(self, data, update_num=False):
        if data.max() > 1 or data.ndim == 1 or (data.ndim == 2 and data.shape[1] == 1):
            if update_num:
                self._num_classes = data.max() + 1
            return data
        else:
            if update_num:
                res = np.argmax(data, axis=1)
                self._num_classes = res.max() + 1
            return res

    def _compute_param(self, set_a, set_b):
        return np.sum(np.logical_and(set_a, set_b))

    def _compute_parameters(self):

        self._tp = np.zeros((self._num_classes), dtype=int)
        self._fp = np.zeros((self._num_classes), dtype=int)
        self._fn = np.zeros((self._num_classes), dtype=int)
        for target in range(self._num_classes):
            self._tp[target] = self._compute_param(
                self._truth == target, self._predicted == target)
            self._fp[target] = self._compute_param(
                self._truth != target, self._predicted == target)
            self._fn[target] = self._compute_param(
                self._truth == target, self._predicted != target)

    def parameters(self):
        vmacro, vweigh, _, vmicro = self._compute_metrics()
        return dict(tp=self._tp, fp=self._fp, fn=self._fn, macro=vmacro, weigh=vweigh, micro=vmicro)

    def sets(self):
        return self._truth, self._predicted

    def fp_indices(self, target):
        return np.where(np.logical_and(self._truth != target, self._predicted == target))[0]

    def fn_indices(self, target):
        return np.where(np.logical_and(self._truth == target, self._predicted != target))[0]

    def correct(self):
        """
        Return the number of correct predictions
        """
        return np.sum(self._tp)

    def _get_dict(self, vmacro, vweigh, vmicro):
        return dict(macro=vmacro, weigh=vweigh, micro=vmicro)

    def recall(self, target):
        """
        recall, Recall is the ratio of correctly predicted positive observations to the all observations in positive class
        """
        if target == 'all':
            macro, weigh, _, micro = self._compute_metrics()
            return self._get_dict(macro['rec'], weigh['rec'], micro['rec'])
        else:
            tp = self._tp[target]
            fn = self._fn[target]
            if (tp + fn) > 0:
                return tp / (tp + fn)
            return 0

    def precision(self, target):
        """
        precision, Precision is the ratio of correctly predicted positive observations to the total predicted positive observations
        """
        if target == 'all':
            macro, weigh, _, micro = self._compute_metrics()
            return self._get_dict(macro['prec'], weigh['prec'], micro['prec'])
        else:
            tp = self._tp[target]
            fp = self._fp[target]
            if (tp + fp) > 0:
                return tp / (tp + fp)
            return 0

    def accuracy(self):
        """
        accuracy, Accuracy is a ratio of correctly predicted observations to the total observations
        """
        tp = np.sum(self._tp)
        elements = self._truth.size
        if (elements) > 0:
            return tp / elements
        return 0

    def f1(self, target):
        """
        f1 score, is the weighted average of Precision and Recall
        """
        if target == 'all':
            macro, weigh, _, micro = self._compute_metrics()
            return self._get_dict(macro['f1'], weigh['f1'], micro['f1'])
        else:
            divider = self.recall(target) + self.precision(target)
            if divider != 0:
                return 2 * (self.recall(target) * self.precision(target)) / divider
            return 0

    def confusion_matrix(self):
        """
        Return the confusion matrix associated to the data provided
        """
        result = np.zeros((self._num_classes, self._num_classes), dtype=int)
        for target in reversed(range(self._num_classes)):
            for j in range(self._num_classes):
                result[target][j] = self._compute_param(
                    self._truth == target, self._predicted == j)
        return result

    def debug(self):
        for target in range(self._num_classes):
            tp = self._tp[target]
            fp = self._fp[target]
            fn = self._fn[target]
            print("target=[{0}], tp=[{1}], fp=[{2}], fn=[{3}]".format(
                target, tp, fp, fn))
            print("Truth shape=", self._truth.shape,
                  " Prediction shape=", self._predicted.shape)
            print("Number of classes:", self._num_classes)

    def _compute_micro_metrics(self):
        ttp = np.sum(self._tp)
        tfp = np.sum(self._fp)
        pr = re = ttp / (ttp + tfp)
        if ttp + tfp == 0:
            return 0
        return 2 * (pr * re) / (pr + re), pr, re

    def _compute_metrics(self):
        tf1 = tpr = tre = 0.0
        twf1 = twpr = twre = 0.0
        total_samples = 0
        for target in range(self._num_classes):
            f1 = self.f1(target)
            pr = self.precision(target)
            re = self.recall(target)
            num_samples = len(np.where(self._truth == target)[0])
            tf1 += f1
            tpr += pr
            tre += re
            twf1 += f1 * num_samples
            twpr += pr * num_samples
            twre += re * num_samples
            total_samples += num_samples
        tf1 /= self._num_classes
        tpr /= self._num_classes
        tre /= self._num_classes
        twf1 /= total_samples
        twpr /= total_samples
        twre /= total_samples
        mf1, mpr, mre = self._compute_micro_metrics()
        macro = {}
        weigh = {}
        micro = {}
        macro['f1'] = tf1
        macro['prec'] = tpr
        macro['rec'] = tre
        weigh['f1'] = twf1
        weigh['prec'] = twpr
        weigh['rec'] = twre
        micro['f1'] = mf1
        micro['prec'] = mpr
        micro['rec'] = mre
        return macro, weigh, total_samples, micro

    def classification_report(self, title='', digits=6):
        def format_line(a, b, c, d, e):
            return "[{0:^5}]\t[{1:.{digits}f}]\t[{2:.{digits}f}]\t[{3:.{digits}f}]\t[{4:5d}]".format(a, b, c, d, e, digits=digits)
        print(
            "======================== {0} ========================".format(title))

        header = ['target', 'f1-score', 'precision', 'recall', 'support']
        print("{d[0]:^7}\t{d[1]:^{length}.{length}}\t{d[2]:^{length}.{length}}\t{d[3]:^{length}.{length}}\t{d[4]:^7}".format(
            d=header, length=digits + 4))
        for target in range(self._num_classes):
            f1 = self.f1(target)
            pr = self.precision(target)
            re = self.recall(target)
            num_samples = len(np.where(self._truth == target)[0])
            print(format_line(target, f1, pr, re, num_samples))
        print("")
        macro, weigh, total_samples, micro = self._compute_metrics()
        print(format_line(
            'macro', macro['f1'], macro['prec'], macro['rec'], total_samples))
        print(format_line(
            'weig.', weigh['f1'], weigh['prec'], weigh['rec'], total_samples))
        print("accuracy=[{0:.{digits}f}]".format(
            self.accuracy(), digits=digits))