mirror of
https://github.com/Doctorado-ML/mufs.git
synced 2025-08-15 15:45:53 +00:00
Implement Metric methods and tests
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -128,4 +128,5 @@ dmypy.json
|
||||
# Pyre type checker
|
||||
.pyre/
|
||||
|
||||
.vscode
|
||||
.vscode
|
||||
junk/*
|
||||
|
49
Makefile
Normal file
49
Makefile
Normal file
@@ -0,0 +1,49 @@
|
||||
SHELL := /bin/bash
|
||||
.DEFAULT_GOAL := help
|
||||
.PHONY: coverage deps help lint push test doc build
|
||||
|
||||
coverage: ## Run tests with coverage
|
||||
coverage erase
|
||||
coverage run -m unittest -v cfs.tests
|
||||
coverage report -m
|
||||
|
||||
deps: ## Install dependencies
|
||||
pip install -r requirements.txt
|
||||
|
||||
lint: ## Lint and static-check
|
||||
black cfs
|
||||
flake8 cfs
|
||||
mypy cfs
|
||||
|
||||
push: ## Push code with tags
|
||||
git push && git push --tags
|
||||
|
||||
test: ## Run tests
|
||||
python -m unittest -v cfs.tests
|
||||
|
||||
doc: ## Update documentation
|
||||
make -C docs --makefile=Makefile html
|
||||
|
||||
build: ## Build package
|
||||
rm -fr dist/*
|
||||
python setup.py sdist bdist_wheel
|
||||
|
||||
doc-clean: ## Update documentation
|
||||
make -C docs --makefile=Makefile clean
|
||||
|
||||
help: ## Show help message
|
||||
@IFS=$$'\n' ; \
|
||||
help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \
|
||||
printf "%s\n\n" "Usage: make [task]"; \
|
||||
printf "%-20s %s\n" "task" "help" ; \
|
||||
printf "%-20s %s\n" "------" "----" ; \
|
||||
for help_line in $${help_lines[@]}; do \
|
||||
IFS=$$':' ; \
|
||||
help_split=($$help_line) ; \
|
||||
help_command=`echo $${help_split[0]} | sed -e 's/^ *//' -e 's/ *$$//'` ; \
|
||||
help_info=`echo $${help_split[2]} | sed -e 's/^ *//' -e 's/ *$$//'` ; \
|
||||
printf '\033[36m'; \
|
||||
printf "%-20s %s" $$help_command ; \
|
||||
printf '\033[0m'; \
|
||||
printf "%s\n" $$help_info; \
|
||||
done
|
39
cfs/Entropy.py
Normal file
39
cfs/Entropy.py
Normal file
@@ -0,0 +1,39 @@
|
||||
##Entropy
|
||||
def entropy(Y):
|
||||
"""
|
||||
Also known as Shanon Entropy
|
||||
Reference: https://en.wikipedia.org/wiki/Entropy_(information_theory)
|
||||
"""
|
||||
unique, count = np.unique(Y, return_counts=True, axis=0)
|
||||
prob = count / len(Y)
|
||||
en = -np.sum(prob * np.log2(prob))
|
||||
return en
|
||||
|
||||
|
||||
# Joint Entropy
|
||||
def jEntropy(Y, X):
|
||||
"""
|
||||
H(Y;X)
|
||||
Reference: https://en.wikipedia.org/wiki/Joint_entropy
|
||||
"""
|
||||
YX = np.c_[Y, X]
|
||||
return entropy(YX)
|
||||
|
||||
|
||||
# Conditional Entropy
|
||||
def cEntropy(Y, X):
|
||||
"""
|
||||
conditional entropy = Joint Entropy - Entropy of X
|
||||
H(Y|X) = H(Y;X) - H(X)
|
||||
Reference: https://en.wikipedia.org/wiki/Conditional_entropy
|
||||
"""
|
||||
return jEntropy(Y, X) - entropy(X)
|
||||
|
||||
|
||||
# Information Gain
|
||||
def gain(Y, X):
|
||||
"""
|
||||
Information Gain, I(Y;X) = H(Y) - H(Y|X)
|
||||
Reference: https://en.wikipedia.org/wiki/Information_gain_in_decision_trees#Formal_definition
|
||||
"""
|
||||
return entropy(Y) - cEntropy(Y, X)
|
@@ -1,3 +1,86 @@
|
||||
from math import log
|
||||
import numpy as np
|
||||
|
||||
|
||||
class Metrics:
|
||||
@staticmethod
|
||||
def conditional_entropy(x, y, base=2):
|
||||
"""quantifies the amount of information needed to describe the outcome
|
||||
of Y given that the value of X is known
|
||||
computes H(Y|X)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : np.array
|
||||
values of the variable
|
||||
y : np.array
|
||||
array of labels
|
||||
base : int, optional
|
||||
base of the logarithm, by default 2
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
conditional entropy of y given x
|
||||
"""
|
||||
xy = np.c_[x, y]
|
||||
return Metrics.entropy(xy, base) - Metrics.entropy(x, base)
|
||||
|
||||
@staticmethod
|
||||
def entropy(y, base=2):
|
||||
"""measure of the uncertainty in predicting the value of y
|
||||
|
||||
Parameters
|
||||
----------
|
||||
y : np.array
|
||||
array of labels
|
||||
base : int, optional
|
||||
base of the logarithm, by default 2
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
entropy of y
|
||||
"""
|
||||
_, count = np.unique(y, return_counts=True, axis=0)
|
||||
proba = count.astype(float) / len(y)
|
||||
proba = proba[proba > 0.0]
|
||||
return np.sum(proba * np.log(1.0 / proba)) / log(base)
|
||||
|
||||
@staticmethod
|
||||
def information_gain(x, y, base=2):
|
||||
"""Measures the reduction in uncertainty about the value of y when the
|
||||
value of X is known (also called mutual information)
|
||||
(https://www.sciencedirect.com/science/article/pii/S0020025519303603)
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : np.array
|
||||
values of the variable
|
||||
y : np.array
|
||||
array of labels
|
||||
base : int, optional
|
||||
base of the logarithm, by default 2
|
||||
|
||||
Returns
|
||||
-------
|
||||
float
|
||||
Information gained
|
||||
"""
|
||||
return Metrics.entropy(y, base) - Metrics.conditional_entropy(
|
||||
x, y, base
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def symmetrical_uncertainty(x, y):
|
||||
|
||||
return (
|
||||
2.0
|
||||
* Metrics.information_gain(x, y)
|
||||
/ (Metrics.entropy(x) + Metrics.entropy(y))
|
||||
)
|
||||
|
||||
|
||||
class CFS:
|
||||
def __init__(self, a):
|
||||
self.a = a
|
||||
|
90
cfs/tests/Metrics_test.py
Normal file
90
cfs/tests/Metrics_test.py
Normal file
@@ -0,0 +1,90 @@
|
||||
import unittest
|
||||
from sklearn.datasets import load_iris
|
||||
from mdlp import MDLP
|
||||
import numpy as np
|
||||
from ..Selection import Metrics
|
||||
|
||||
|
||||
class Metrics_test(unittest.TestCase):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
mdlp = MDLP(random_state=1)
|
||||
X, self.y = load_iris(return_X_y=True)
|
||||
self.X = mdlp.fit_transform(X, self.y).astype("int64")
|
||||
self.m, self.n = self.X.shape
|
||||
|
||||
# @classmethod
|
||||
# def setup(cls):
|
||||
|
||||
def test_entropy(self):
|
||||
metric = Metrics()
|
||||
datasets = [
|
||||
([0, 0, 0, 0, 1, 1, 1, 1], 2, 1.0),
|
||||
([0, 1, 0, 2, 1, 2], 3, 1.0),
|
||||
([0, 0, 0, 0, 0, 0, 0, 2, 2, 2], 2, 0.8812908992306927),
|
||||
([1, 1, 1, 5, 2, 2, 3, 3, 3], 4, 0.9455305560363263),
|
||||
([1, 1, 1, 2, 2, 3, 3, 3, 5], 4, 0.9455305560363263),
|
||||
([1, 1, 5], 2, 0.9182958340544896),
|
||||
(self.y, 3, 0.999999999),
|
||||
]
|
||||
for dataset, base, entropy in datasets:
|
||||
computed = metric.entropy(dataset, base)
|
||||
self.assertAlmostEqual(entropy, computed)
|
||||
|
||||
def test_conditional_entropy(self):
|
||||
metric = Metrics()
|
||||
results_expected = [
|
||||
0.490953458537736,
|
||||
0.7110077966379169,
|
||||
0.15663362014829718,
|
||||
0.13032469395094992,
|
||||
]
|
||||
for expected, col in zip(results_expected, range(self.n)):
|
||||
computed = metric.conditional_entropy(self.X[:, col], self.y, 3)
|
||||
self.assertAlmostEqual(expected, computed)
|
||||
self.assertAlmostEqual(
|
||||
0.6309297535714573,
|
||||
metric.conditional_entropy(
|
||||
[1, 3, 2, 3, 2, 1], [1, 2, 0, 1, 1, 2], 3
|
||||
),
|
||||
)
|
||||
# https://planetcalc.com/8414/?joint=0.4%200%0A0.2%200.4&showDetails=1
|
||||
self.assertAlmostEqual(
|
||||
0.5509775004326938,
|
||||
metric.conditional_entropy([1, 1, 2, 2, 2], [0, 0, 0, 2, 2], 2),
|
||||
)
|
||||
|
||||
def test_information_gain(self):
|
||||
metric = Metrics()
|
||||
results_expected = [
|
||||
0.5090465414622638,
|
||||
0.28899220336208287,
|
||||
0.8433663798517026,
|
||||
0.8696753060490499,
|
||||
]
|
||||
for expected, col in zip(results_expected, range(self.n)):
|
||||
computed = metric.information_gain(self.X[:, col], self.y, 3)
|
||||
self.assertAlmostEqual(expected, computed)
|
||||
# https://planetcalc.com/8419/
|
||||
# ?_d=FrDfFN2COAhqh9Pb5ycqy5CeKgIOxlfSjKgyyIR.Q5L0np-g-hw6yv8M1Q8_
|
||||
results_expected = [
|
||||
0.806819679,
|
||||
0.458041805,
|
||||
1.336704086,
|
||||
1.378402748,
|
||||
]
|
||||
for expected, col in zip(results_expected, range(self.n)):
|
||||
computed = metric.information_gain(self.X[:, col], self.y, 2)
|
||||
self.assertAlmostEqual(expected, computed)
|
||||
|
||||
def test_symmetrical_uncertainty(self):
|
||||
metric = Metrics()
|
||||
results_expected = [
|
||||
0.33296547388990266,
|
||||
0.19068147573570668,
|
||||
0.810724587460511,
|
||||
0.870521418179061,
|
||||
]
|
||||
for expected, col in zip(results_expected, range(self.n)):
|
||||
computed = metric.symmetrical_uncertainty(self.X[:, col], self.y)
|
||||
self.assertAlmostEqual(expected, computed)
|
@@ -1,3 +1,4 @@
|
||||
from .CFS_test import CFS_test
|
||||
from .Metrics_test import Metrics_test
|
||||
|
||||
__all__ = ["CFS_test"]
|
||||
__all__ = ["CFS_test", "Metrics_test"]
|
||||
|
Reference in New Issue
Block a user