13 Commits

Author SHA1 Message Date
f85ae33e40 Update main.yml requirements 2022-05-19 01:21:03 +02:00
971314c5ed Update build.yml 2022-05-18 17:35:37 +02:00
95a3df360e add sonar build 2022-05-18 12:19:32 +02:00
d610f86483 Fix 3.10 python version 2022-05-18 11:09:33 +02:00
f313032b98 Add python version 3.10 2022-05-18 11:08:00 +02:00
4ef1e937de Add version to _version file, method and test 2022-05-18 10:48:40 +02:00
a0f172ac13 Update version number and sample 2021-10-28 14:30:28 +02:00
Ricardo Montañana Gómez
cfb37d2f6c Merge pull request #3 from Doctorado-ML/Add-IWSS-(#2)
Add iwss (#2)
2021-10-28 12:39:57 +02:00
5d1720c9ae Update ci file 2021-10-28 12:22:21 +02:00
1c5f1977e5 Complete iwss based implementation (#2) 2021-10-28 11:55:40 +02:00
27f8a370c5 Begin IWSS implementation
Update requirements
Create requirements for dev
2021-10-10 19:06:57 +02:00
Ricardo Montañana Gómez
9d74bc8a70 Add package version badge to README 2021-08-17 12:02:15 +02:00
Ricardo Montañana Gómez
ba7dc3eeb3 Merge pull request #1 from Doctorado-ML/updateCI
Update ci file
2021-08-02 18:46:25 +02:00
16 changed files with 214 additions and 30 deletions

23
.github/workflows/build.yml vendored Normal file
View File

@@ -0,0 +1,23 @@
name: Build
on:
push:
branches:
- main
jobs:
build:
name: Build
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2
with:
fetch-depth: 0
- uses: sonarsource/sonarqube-scan-action@master
env:
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
# If you wish to fail your job when the Quality Gate is red, uncomment the
# following lines. This would typically be used to fail a deployment.
- uses: sonarsource/sonarqube-quality-gate-action@master
timeout-minutes: 5
env:
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}

View File

@@ -13,7 +13,7 @@ jobs:
strategy: strategy:
matrix: matrix:
os: [macos-latest, ubuntu-latest] os: [macos-latest, ubuntu-latest]
python: [3.8] python: [3.8, "3.10"]
steps: steps:
- uses: actions/checkout@v2 - uses: actions/checkout@v2
@@ -26,8 +26,8 @@ jobs:
pip install -q --upgrade pip pip install -q --upgrade pip
pip install -q cython pip install -q cython
pip install -q numpy pip install -q numpy
pip install -q git+git://github.com/doctorado-ml/mdlp pip install -q git+https://github.com/doctorado-ml/mdlp
pip install -q -r requirements.txt pip install -q -r requirements/dev.txt
pip install -q --upgrade codecov coverage black flake8 codacy-coverage pip install -q --upgrade codecov coverage black flake8 codacy-coverage
- name: Lint - name: Lint
run: | run: |

View File

@@ -1,6 +1,6 @@
SHELL := /bin/bash SHELL := /bin/bash
.DEFAULT_GOAL := help .DEFAULT_GOAL := help
.PHONY: coverage deps help lint push test doc build .PHONY: coverage deps help lint push test build
coverage: ## Run tests with coverage coverage: ## Run tests with coverage
coverage erase coverage erase
@@ -26,9 +26,6 @@ build: ## Build package
rm -fr build/* rm -fr build/*
python setup.py sdist bdist_wheel python setup.py sdist bdist_wheel
doc-clean: ## Update documentation
make -C docs --makefile=Makefile clean
help: ## Show help message help: ## Show help message
@IFS=$$'\n' ; \ @IFS=$$'\n' ; \
help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \ help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \

View File

@@ -1,6 +1,10 @@
![CI](https://github.com/Doctorado-ML/mufs/workflows/CI/badge.svg) ![CI](https://github.com/Doctorado-ML/mufs/workflows/CI/badge.svg)
[![Codacy Badge](https://app.codacy.com/project/badge/Grade/66ad727eb13e4c7a8816db1e44d994a7)](https://www.codacy.com/gh/Doctorado-ML/mufs/dashboard?utm_source=github.com&utm_medium=referral&utm_content=Doctorado-ML/mufs&utm_campaign=Badge_Grade) [![Codacy Badge](https://app.codacy.com/project/badge/Grade/66ad727eb13e4c7a8816db1e44d994a7)](https://www.codacy.com/gh/Doctorado-ML/mufs/dashboard?utm_source=github.com&utm_medium=referral&utm_content=Doctorado-ML/mufs&utm_campaign=Badge_Grade)
[![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/Doctorado-ML/mufs.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/Doctorado-ML/mufs/context:python) [![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/Doctorado-ML/mufs.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/Doctorado-ML/mufs/context:python)
[![PyPI version](https://badge.fury.io/py/MUFS.svg)](https://badge.fury.io/py/MUFS)
![https://img.shields.io/badge/python-3.8%2B-blue](https://img.shields.io/badge/python-3.8%2B-brightgreen)
[![Security Rating](https://haystack.rmontanana.es:25000/api/project_badges/measure?project=mufs&metric=security_rating&token=1119a3bfd4025d50ef3009a44c600c16670ee31a)](http://haystack.local:25000/dashboard?id=mufs)
[![Technical Debt](https://haystack.rmontanana.es:25000/api/project_badges/measure?project=mufs&metric=sqale_index&token=1119a3bfd4025d50ef3009a44c600c16670ee31a)](http://haystack.local:25000/dashboard?id=mufs)
# MUFS # MUFS
@@ -15,3 +19,7 @@ Proceedings, Twentieth International Conference on Machine Learning. ed. / T. Fa
### Correlation-based Feature Selection ### Correlation-based Feature Selection
Hall, M. A. (1999), 'Correlation-based Feature Selection for Machine Learning'. Hall, M. A. (1999), 'Correlation-based Feature Selection for Machine Learning'.
### IWSS
Based on: P. Bermejo, J. A. Gamez and J. M. Puerta, "Incremental Wrapper-based subset Selection with replacement: An advantageous alternative to sequential forward selection," 2009 IEEE Symposium on Computational Intelligence and Data Mining, 2009, pp. 367-374, doi: 10.1109/CIDM.2009.4938673.

View File

@@ -3,6 +3,7 @@ from sys import float_info
from itertools import combinations from itertools import combinations
import numpy as np import numpy as np
from .Metrics import Metrics from .Metrics import Metrics
from ._version import __version__
class MUFS: class MUFS:
@@ -26,7 +27,7 @@ class MUFS:
""" """
def __init__(self, max_features=None, discrete=True): def __init__(self, max_features=None, discrete=True):
self._max_features = max_features self.max_features = max_features
self._discrete = discrete self._discrete = discrete
self.symmetrical_uncertainty = ( self.symmetrical_uncertainty = (
Metrics.symmetrical_uncertainty Metrics.symmetrical_uncertainty
@@ -40,6 +41,11 @@ class MUFS:
) )
self._fitted = False self._fitted = False
@staticmethod
def version() -> str:
"""Return the version of the package."""
return __version__
def _initialize(self, X, y): def _initialize(self, X, y):
"""Initialize the attributes so support multiple calls using same """Initialize the attributes so support multiple calls using same
object object
@@ -53,8 +59,10 @@ class MUFS:
""" """
self.X_ = X self.X_ = X
self.y_ = y self.y_ = y
if self._max_features is None: if self.max_features is None:
self._max_features = X.shape[1] self._max_features = X.shape[1]
else:
self._max_features = self.max_features
self._result = None self._result = None
self._scores = [] self._scores = []
self._su_labels = None self._su_labels = None
@@ -105,7 +113,9 @@ class MUFS:
def _compute_merit(self, features): def _compute_merit(self, features):
"""Compute the merit function for cfs algorithms """Compute the merit function for cfs algorithms
"Good feature subsets contain features highly correlated with
(predictive of) the class, yet uncorrelated with (not predictive of)
each other"
Parameters Parameters
---------- ----------
features : list features : list
@@ -264,3 +274,58 @@ class MUFS:
list of scores of the features selected list of scores of the features selected
""" """
return self._scores if self._fitted else [] return self._scores if self._fitted else []
def iwss(self, X, y, threshold):
"""Incremental Wrapper Subset Selection
Parameters
----------
X : np.array
array of features
y : np.array
vector of labels
threshold : float
threshold to select relevant features
Returns
-------
self
self
Raises
------
ValueError
if the threshold is less than a selected value of 1e-7
or greater than .5
"""
if threshold < 0 or threshold > 0.5:
raise ValueError(
"Threshold cannot be less than 0 or greater than 0.5"
)
self._initialize(X, y)
s_list = self._compute_su_labels()
feature_order = (-s_list).argsort()
features = feature_order.copy().tolist()
candidates = []
# Add first and second features to result
first_feature = features.pop(0)
candidates.append(first_feature)
self._scores.append(s_list[first_feature])
candidates.append(features.pop(0))
merit = self._compute_merit(candidates)
self._scores.append(merit)
for feature in features:
candidates.append(feature)
merit_new = self._compute_merit(candidates)
delta = abs(merit - merit_new) / merit if merit != 0.0 else 0.0
if merit_new > merit or delta < threshold:
if merit_new > merit:
merit = merit_new
self._scores.append(merit_new)
else:
candidates.pop()
break
if len(candidates) == self._max_features:
break
self._result = candidates
return self

View File

@@ -1,9 +1,8 @@
from .Selection import MUFS from .Selection import MUFS
__version__ = "0.1.1"
__author__ = "Ricardo Montañana Gómez" __author__ = "Ricardo Montañana Gómez"
__author_email__ = "Ricardo.Montanana@alu.uclm.es" __author_email__ = "Ricardo.Montanana@alu.uclm.es"
__copyright__ = "Copyright 2021, Ricardo Montañana Gómez" __copyright__ = "Copyright 2021-2022, Ricardo Montañana Gómez"
__license__ = "MIT License" __license__ = "MIT License"
__all__ = ["MUFS"] __all__ = ["MUFS"]

1
mufs/_version.py Normal file
View File

@@ -0,0 +1 @@
__version__ = "0.1.3"

View File

@@ -1,11 +1,14 @@
import unittest import unittest
import os
import pandas as pd
import numpy as np
from mdlp import MDLP from mdlp import MDLP
from sklearn.datasets import load_wine, load_iris from sklearn.datasets import load_wine, load_iris
from ..Selection import MUFS from ..Selection import MUFS
from .._version import __version__
class MUFS_test(unittest.TestCase): class MUFSTest(unittest.TestCase):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
mdlp = MDLP(random_state=1) mdlp = MDLP(random_state=1)
@@ -15,6 +18,11 @@ class MUFS_test(unittest.TestCase):
mdlp = MDLP(random_state=1) mdlp = MDLP(random_state=1)
self.X_i = mdlp.fit_transform(self.X_ic, self.y_i).astype("int64") self.X_i = mdlp.fit_transform(self.X_ic, self.y_i).astype("int64")
def test_version(self):
"""Check package version."""
mufs = MUFS()
self.assertEqual(__version__, mufs.version())
def assertListAlmostEqual(self, list1, list2, tol=7): def assertListAlmostEqual(self, list1, list2, tol=7):
self.assertEqual(len(list1), len(list2)) self.assertEqual(len(list1), len(list2))
for a, b in zip(list1, list2): for a, b in zip(list1, list2):
@@ -32,7 +40,7 @@ class MUFS_test(unittest.TestCase):
def test_csf_wine(self): def test_csf_wine(self):
mufs = MUFS() mufs = MUFS()
expected = [6, 12, 9, 4, 10, 0] expected = [6, 12, 9, 4, 10, 0]
self.assertListAlmostEqual( self.assertListEqual(
expected, mufs.cfs(self.X_w, self.y_w).get_results() expected, mufs.cfs(self.X_w, self.y_w).get_results()
) )
expected = [ expected = [
@@ -78,7 +86,7 @@ class MUFS_test(unittest.TestCase):
mufs = MUFS() mufs = MUFS()
expected = [3, 2, 0, 1] expected = [3, 2, 0, 1]
computed = mufs.cfs(self.X_i, self.y_i).get_results() computed = mufs.cfs(self.X_i, self.y_i).get_results()
self.assertListAlmostEqual(expected, computed) self.assertListEqual(expected, computed)
expected = [ expected = [
0.870521418179061, 0.870521418179061,
0.8968651482682227, 0.8968651482682227,
@@ -148,3 +156,46 @@ class MUFS_test(unittest.TestCase):
0.44518278979085646, 0.44518278979085646,
] ]
self.assertListAlmostEqual(expected, mufs.get_scores()) self.assertListAlmostEqual(expected, mufs.get_scores())
def test_iwss_wine(self):
mufs = MUFS()
expected = [6, 9, 12]
self.assertListEqual(
expected, mufs.iwss(self.X_w, self.y_w, 0.2).get_results()
)
expected = [0.5218299405215557, 0.5947822876110085, 0.4877384978817362]
self.assertListAlmostEqual(expected, mufs.get_scores())
def test_iwss_wine_max_features(self):
mufs = MUFS(max_features=3)
expected = [6, 9, 12]
self.assertListEqual(
expected, mufs.iwss(self.X_w, self.y_w, 0.4).get_results()
)
expected = [0.5218299405215557, 0.5947822876110085, 0.4877384978817362]
self.assertListAlmostEqual(expected, mufs.get_scores())
def test_iwss_exception(self):
mufs = MUFS()
with self.assertRaises(ValueError):
mufs.iwss(self.X_w, self.y_w, 0.51)
with self.assertRaises(ValueError):
mufs.iwss(self.X_w, self.y_w, -0.01)
def test_iwss_better_merit_condition(self):
folder = os.path.dirname(os.path.abspath(__file__))
data = pd.read_csv(
os.path.join(folder, "balloons_R.dat"),
sep="\t",
index_col=0,
)
X = data.drop("clase", axis=1).to_numpy()
y = data["clase"].to_numpy()
mufs = MUFS()
expected = [0, 2, 3, 1]
self.assertListEqual(expected, mufs.iwss(X, y, 0.3).get_results())
def test_iwss_empty(self):
mufs = MUFS()
X = np.delete(self.X_i, [0, 1], 1)
self.assertListEqual(mufs.iwss(X, self.y_i, 0.3).get_results(), [1, 0])

View File

@@ -6,7 +6,7 @@ from mdlp import MDLP
from ..Selection import Metrics from ..Selection import Metrics
class Metrics_test(unittest.TestCase): class MetricsTest(unittest.TestCase):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
mdlp = MDLP(random_state=1) mdlp = MDLP(random_state=1)

View File

@@ -1,4 +1,4 @@
from .MUFS_test import MUFS_test from .MUFS_test import MUFSTest
from .Metrics_test import Metrics_test from .Metrics_test import MetricsTest
__all__ = ["MUFS_test", "Metrics_test"] __all__ = ["MUFSTest", "MetricsTest"]

17
mufs/tests/balloons_R.dat Executable file
View File

@@ -0,0 +1,17 @@
f1 f2 f3 f4 clase
1 0.968246 -0.968246 0.968246 0.968246 1
2 0.968246 -0.968246 0.968246 -0.968246 1
3 0.968246 -0.968246 -0.968246 0.968246 1
4 0.968246 -0.968246 -0.968246 -0.968246 1
5 0.968246 0.968246 0.968246 0.968246 1
6 0.968246 0.968246 0.968246 -0.968246 0
7 0.968246 0.968246 -0.968246 0.968246 0
8 0.968246 0.968246 -0.968246 -0.968246 0
9 -0.968246 -0.968246 0.968246 0.968246 1
10 -0.968246 -0.968246 0.968246 -0.968246 0
11 -0.968246 -0.968246 -0.968246 0.968246 0
12 -0.968246 -0.968246 -0.968246 -0.968246 0
13 -0.968246 0.968246 0.968246 0.968246 1
14 -0.968246 0.968246 0.968246 -0.968246 0
15 -0.968246 0.968246 -0.968246 0.968246 0
16 -0.968246 0.968246 -0.968246 -0.968246 0

3
requirements/dev.txt Normal file
View File

@@ -0,0 +1,3 @@
-r production.txt
mdlp
pandas

View File

@@ -1,2 +1 @@
scikit-learn>0.24 scikit-learn>0.24
mdlp

View File

@@ -1,4 +1,5 @@
import warnings import warnings
import time
from mufs import MUFS from mufs import MUFS
from mufs.Metrics import Metrics from mufs.Metrics import Metrics
from stree import Stree from stree import Stree
@@ -26,16 +27,26 @@ for i in range(n):
# Classification # Classification
warnings.filterwarnings("ignore") warnings.filterwarnings("ignore")
print("CFS") print("CFS")
now = time.time()
cfs_f = mufsc.cfs(X, y).get_results() cfs_f = mufsc.cfs(X, y).get_results()
print(cfs_f) time_cfs = time.time() - now
print(cfs_f, "items: ", len(cfs_f), f"time: {time_cfs:.3f} seconds")
print("FCBF") print("FCBF")
fcfb_f = mufsc.fcbf(X, y, 5e-2).get_results() now = time.time()
print(fcfb_f, len(fcfb_f)) fcbf_f = mufsc.fcbf(X, y, 0.07).get_results()
time_fcbf = time.time() - now
print(fcbf_f, "items: ", len(fcbf_f), f"time: {time_fcbf:.3f} seconds")
now = time.time()
print("IWSS")
iwss_f = mufsc.iwss(X, y, 0.5).get_results()
time_iwss = time.time() - now
print(iwss_f, "items: ", len(iwss_f), f"time: {time_iwss:.3f} seconds")
print("X.shape=", X.shape) print("X.shape=", X.shape)
clf = Stree(random_state=0) clf = Stree(random_state=0)
print("Accuracy whole dataset", clf.fit(X, y).score(X, y)) print("Accuracy whole dataset", clf.fit(X, y).score(X, y))
clf = Stree(random_state=0) clf = Stree(random_state=0)
print("Accuracy cfs", clf.fit(X[:, cfs_f], y).score(X[:, cfs_f], y)) print("Accuracy cfs", clf.fit(X[:, cfs_f], y).score(X[:, cfs_f], y))
clf = Stree(random_state=0) clf = Stree(random_state=0)
subf = fcfb_f print("Accuracy fcfb", clf.fit(X[:, fcbf_f], y).score(X[:, fcbf_f], y))
print("Accuracy fcfb", clf.fit(X[:, subf], y).score(X[:, subf], y)) clf = Stree(random_state=0)
print("Accuracy iwss", clf.fit(X[:, iwss_f], y).score(X[:, iwss_f], y))

View File

@@ -1,3 +1,4 @@
import os
import setuptools import setuptools
@@ -6,9 +7,10 @@ def readme():
return f.read() return f.read()
def get_data(field: str): def get_data(field):
item = "" item = ""
with open("mufs/__init__.py") as f: file_name = "_version.py" if field == "version" else "__init__.py"
with open(os.path.join("mufs", file_name)) as f:
for line in f.readlines(): for line in f.readlines():
if line.startswith(f"__{field}__"): if line.startswith(f"__{field}__"):
delim = '"' if '"' in line else "'" delim = '"' if '"' in line else "'"
@@ -19,6 +21,11 @@ def get_data(field: str):
return item return item
def get_requirements():
with open("requirements/production.txt") as f:
return f.read().splitlines()
setuptools.setup( setuptools.setup(
name="MUFS", name="MUFS",
version=get_data("version"), version=get_data("version"),
@@ -38,11 +45,13 @@ setuptools.setup(
"Development Status :: 4 - Beta", "Development Status :: 4 - Beta",
"License :: OSI Approved :: " + get_data("license"), "License :: OSI Approved :: " + get_data("license"),
"Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Natural Language :: English", "Natural Language :: English",
"Topic :: Scientific/Engineering :: Artificial Intelligence", "Topic :: Scientific/Engineering :: Artificial Intelligence",
"Intended Audience :: Science/Research", "Intended Audience :: Science/Research",
], ],
install_requires=["scikit-learn"], install_requires=get_requirements(),
test_suite="mufs.tests", test_suite="mufs.tests",
zip_safe=False, zip_safe=False,
) )

1
sonar-project.properties Normal file
View File

@@ -0,0 +1 @@
sonar.projectKey=mufs