28 Commits
v0.1.1 ... main

Author SHA1 Message Date
0fdd754050 Add constant features treatment 2023-10-13 13:35:24 +02:00
7035cc4edc Update readme 2022-05-19 17:52:54 +02:00
edc8816041 Update Setup and __init__ 2022-05-19 17:46:00 +02:00
20db8c5745 Add version to _version file, method and test 2022-05-19 17:36:59 +02:00
a9384685fe Clean main.yml 2022-05-19 17:27:40 +02:00
86aaf23dd9 Patch main.yml 2022-05-19 17:18:45 +02:00
9395e8cc23 Patch main.yml 2022-05-19 17:08:44 +02:00
5723da9535 Patch main.yml 2022-05-19 16:46:25 +02:00
fb4ed468b0 Patch main.yml 2022-05-19 16:37:19 +02:00
57334a0b74 Patch main.yml 2022-05-19 16:31:37 +02:00
c47f69847e Patch main.yml 2022-05-19 16:22:31 +02:00
4532309309 Patch main.yml 2022-05-19 16:09:36 +02:00
aa53e3dbc0 update main.yml 2022-05-19 14:22:10 +02:00
2861e22c57 Update main using checkout@v3 2022-05-19 12:53:14 +02:00
e0acd6d239 Update main.yml 2022-05-19 12:30:29 +02:00
3d98a39d4b Update sonar.sources 2022-05-19 11:57:44 +02:00
1a4de38328 Update sonar.project.properties 2022-05-19 11:52:23 +02:00
a9c40f1fb7 Fix issue in gh action 2022-05-19 11:46:48 +02:00
81da48ec31 Fix format issue 2022-05-19 11:41:09 +02:00
2548ab8533 Update formatter version 2022-05-19 11:37:45 +02:00
08cade5dec Add sonarqube scanner to gh actions 2022-05-19 11:21:55 +02:00
0a13f5e5eb Update main.yml requirements 2022-05-19 01:20:11 +02:00
a0f172ac13 Update version number and sample 2021-10-28 14:30:28 +02:00
Ricardo Montañana Gómez
cfb37d2f6c Merge pull request #3 from Doctorado-ML/Add-IWSS-(#2)
Add iwss (#2)
2021-10-28 12:39:57 +02:00
5d1720c9ae Update ci file 2021-10-28 12:22:21 +02:00
1c5f1977e5 Complete iwss based implementation (#2) 2021-10-28 11:55:40 +02:00
27f8a370c5 Begin IWSS implementation
Update requirements
Create requirements for dev
2021-10-10 19:06:57 +02:00
Ricardo Montañana Gómez
9d74bc8a70 Add package version badge to README 2021-08-17 12:02:15 +02:00
16 changed files with 230 additions and 37 deletions

View File

@@ -12,11 +12,13 @@ jobs:
runs-on: ${{ matrix.os }}
strategy:
matrix:
os: [macos-latest, ubuntu-latest]
python: [3.8]
os: [ubuntu-latest]
python: ["3.10"]
steps:
- uses: actions/checkout@v2
- uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Set up Python ${{ matrix.python }}
uses: actions/setup-python@v2
with:
@@ -26,14 +28,37 @@ jobs:
pip install -q --upgrade pip
pip install -q cython
pip install -q numpy
pip install -q git+git://github.com/doctorado-ml/mdlp
pip install -q -r requirements.txt
pip install -q --upgrade codecov coverage black flake8 codacy-coverage
pip install -q git+https://github.com/doctorado-ml/mdlp
pip install -q -r requirements/dev.txt
pip install -q --upgrade codecov coverage black flake8 codacy-coverage unittest-xml-reporting
- name: Lint
run: |
black --check --diff mufs
flake8 --count mufs
- name: Tests & coverage
run: |
coverage run -m unittest -v mufs.tests
mkdir .report
coverage run -m xmlrunner -v mufs.tests -o .report
coverage xml -i -o .report/coverage.xml
coverage report -m --fail-under=100
- name: Get project version
run: echo "project_version=$(git describe --tags --abbrev=0)" >> $GITHUB_ENV
- name: Override Coverage Source Path for Sonar
run: sed -i 's/\/home\/runner\/work\/mufs\/mufs\//\/github\/workspace\//g' .report/coverage.xml
- name: SonarQube scanner
uses: sonarsource/sonarqube-scan-action@master
env:
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}
SONAR_HOST_URL: ${{ secrets.SONAR_HOST_URL }}
with:
args: >
-Dsonar.projectVersion=${{ env.project_version }}
-Dsonar.python.coverage.reportPaths=.report/coverage.xml
-Dsonar.python.xunit.reportPath=.report/TEST*
# If you wish to fail your job when the Quality Gate is red, uncomment the
# following lines. This would typically be used to fail a deployment.
- name: Quality Gate
uses: sonarsource/sonarqube-quality-gate-action@master
timeout-minutes: 5
env:
SONAR_TOKEN: ${{ secrets.SONAR_TOKEN }}

View File

@@ -1,12 +1,12 @@
repos:
- repo: https://github.com/ambv/black
rev: 20.8b1
rev: 22.3.0
hooks:
- id: black
exclude: ".virtual_documents"
language_version: python3.8
- repo: https://gitlab.com/pycqa/flake8
rev: 3.8.4
rev: 3.9.2
hooks:
- id: flake8
exclude: ".virtual_documents"
@@ -16,7 +16,7 @@ repos:
# - id: mypy
# # args: [--strict, --ignore-missing-imports]
- repo: https://github.com/pre-commit/pre-commit-hooks
rev: v3.4.0
rev: v4.2.0
hooks:
- id: trailing-whitespace
- id: check-case-conflict

View File

@@ -1,6 +1,6 @@
SHELL := /bin/bash
.DEFAULT_GOAL := help
.PHONY: coverage deps help lint push test doc build
.PHONY: coverage deps help lint push test build
coverage: ## Run tests with coverage
coverage erase
@@ -26,9 +26,6 @@ build: ## Build package
rm -fr build/*
python setup.py sdist bdist_wheel
doc-clean: ## Update documentation
make -C docs --makefile=Makefile clean
help: ## Show help message
@IFS=$$'\n' ; \
help_lines=(`fgrep -h "##" $(MAKEFILE_LIST) | fgrep -v fgrep | sed -e 's/\\$$//' | sed -e 's/##/:/'`); \

View File

@@ -1,6 +1,10 @@
![CI](https://github.com/Doctorado-ML/mufs/workflows/CI/badge.svg)
[![Codacy Badge](https://app.codacy.com/project/badge/Grade/66ad727eb13e4c7a8816db1e44d994a7)](https://www.codacy.com/gh/Doctorado-ML/mufs/dashboard?utm_source=github.com&utm_medium=referral&utm_content=Doctorado-ML/mufs&utm_campaign=Badge_Grade)
[![Language grade: Python](https://img.shields.io/lgtm/grade/python/g/Doctorado-ML/mufs.svg?logo=lgtm&logoWidth=18)](https://lgtm.com/projects/g/Doctorado-ML/mufs/context:python)
[![PyPI version](https://badge.fury.io/py/MUFS.svg)](https://badge.fury.io/py/MUFS)
[![Technical Debt](https://haystack.rmontanana.es:25000/api/project_badges/measure?project=mufs&metric=sqale_index&token=1119a3bfd4025d50ef3009a44c600c16670ee31a)](https://haystack.rmontanana.es:25000/dashboard?id=mufs)
[![Quality Gate Status](https://haystack.rmontanana.es:25000/api/project_badges/measure?project=mufs&metric=alert_status&token=1119a3bfd4025d50ef3009a44c600c16670ee31a)](https://haystack.rmontanana.es:25000/dashboard?id=mufs)
![https://img.shields.io/badge/python-3.8%2B-blue](https://img.shields.io/badge/python-3.8%2B-brightgreen)
# MUFS
@@ -15,3 +19,7 @@ Proceedings, Twentieth International Conference on Machine Learning. ed. / T. Fa
### Correlation-based Feature Selection
Hall, M. A. (1999), 'Correlation-based Feature Selection for Machine Learning'.
### IWSS
Based on: P. Bermejo, J. A. Gamez and J. M. Puerta, "Incremental Wrapper-based subset Selection with replacement: An advantageous alternative to sequential forward selection," 2009 IEEE Symposium on Computational Intelligence and Data Mining, 2009, pp. 367-374, doi: 10.1109/CIDM.2009.4938673.

View File

@@ -3,6 +3,7 @@ from sys import float_info
from itertools import combinations
import numpy as np
from .Metrics import Metrics
from ._version import __version__
class MUFS:
@@ -26,7 +27,7 @@ class MUFS:
"""
def __init__(self, max_features=None, discrete=True):
self._max_features = max_features
self.max_features = max_features
self._discrete = discrete
self.symmetrical_uncertainty = (
Metrics.symmetrical_uncertainty
@@ -40,6 +41,11 @@ class MUFS:
)
self._fitted = False
@staticmethod
def version() -> str:
"""Return the version of the package."""
return __version__
def _initialize(self, X, y):
"""Initialize the attributes so support multiple calls using same
object
@@ -53,8 +59,10 @@ class MUFS:
"""
self.X_ = X
self.y_ = y
if self._max_features is None:
if self.max_features is None:
self._max_features = X.shape[1]
else:
self._max_features = self.max_features
self._result = None
self._scores = []
self._su_labels = None
@@ -105,7 +113,9 @@ class MUFS:
def _compute_merit(self, features):
"""Compute the merit function for cfs algorithms
"Good feature subsets contain features highly correlated with
(predictive of) the class, yet uncorrelated with (not predictive of)
each other"
Parameters
----------
features : list
@@ -124,7 +134,7 @@ class MUFS:
k = len(features)
for pair in list(combinations(features, 2)):
rff += self._compute_su_features(*pair)
return rcf / sqrt(k + (k ** 2 - k) * rff)
return rcf / sqrt(k + (k**2 - k) * rff)
def cfs(self, X, y):
"""Correlation-based Feature Selection
@@ -162,6 +172,10 @@ class MUFS:
id_selected = idx
merit = merit_new
candidates.pop()
if id_selected is None:
# No more features to add all merits are nan because of
# constant features
break
candidates.append(feature_order[id_selected])
self._scores.append(merit)
del feature_order[id_selected]
@@ -264,3 +278,58 @@ class MUFS:
list of scores of the features selected
"""
return self._scores if self._fitted else []
def iwss(self, X, y, threshold):
"""Incremental Wrapper Subset Selection
Parameters
----------
X : np.array
array of features
y : np.array
vector of labels
threshold : float
threshold to select relevant features
Returns
-------
self
self
Raises
------
ValueError
if the threshold is less than a selected value of 1e-7
or greater than .5
"""
if threshold < 0 or threshold > 0.5:
raise ValueError(
"Threshold cannot be less than 0 or greater than 0.5"
)
self._initialize(X, y)
s_list = self._compute_su_labels()
feature_order = (-s_list).argsort()
features = feature_order.copy().tolist()
candidates = []
# Add first and second features to result
first_feature = features.pop(0)
candidates.append(first_feature)
self._scores.append(s_list[first_feature])
candidates.append(features.pop(0))
merit = self._compute_merit(candidates)
self._scores.append(merit)
for feature in features:
candidates.append(feature)
merit_new = self._compute_merit(candidates)
delta = abs(merit - merit_new) / merit if merit != 0.0 else 0.0
if merit_new > merit or delta < threshold:
if merit_new > merit:
merit = merit_new
self._scores.append(merit_new)
else:
candidates.pop()
break
if len(candidates) == self._max_features:
break
self._result = candidates
return self

View File

@@ -1,9 +1,8 @@
from .Selection import MUFS
__version__ = "0.1.1"
__author__ = "Ricardo Montañana Gómez"
__author_email__ = "Ricardo.Montanana@alu.uclm.es"
__copyright__ = "Copyright 2021, Ricardo Montañana Gómez"
__copyright__ = "Copyright 2021-2022, Ricardo Montañana Gómez"
__license__ = "MIT License"
__all__ = ["MUFS"]

1
mufs/_version.py Normal file
View File

@@ -0,0 +1 @@
__version__ = "0.1.3"

View File

@@ -1,11 +1,14 @@
import unittest
import os
import pandas as pd
import numpy as np
from mdlp import MDLP
from sklearn.datasets import load_wine, load_iris
from ..Selection import MUFS
from .._version import __version__
class MUFS_test(unittest.TestCase):
class MUFSTest(unittest.TestCase):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
mdlp = MDLP(random_state=1)
@@ -15,6 +18,11 @@ class MUFS_test(unittest.TestCase):
mdlp = MDLP(random_state=1)
self.X_i = mdlp.fit_transform(self.X_ic, self.y_i).astype("int64")
def test_version(self):
"""Check package version."""
mufs = MUFS()
self.assertEqual(__version__, mufs.version())
def assertListAlmostEqual(self, list1, list2, tol=7):
self.assertEqual(len(list1), len(list2))
for a, b in zip(list1, list2):
@@ -32,7 +40,7 @@ class MUFS_test(unittest.TestCase):
def test_csf_wine(self):
mufs = MUFS()
expected = [6, 12, 9, 4, 10, 0]
self.assertListAlmostEqual(
self.assertListEqual(
expected, mufs.cfs(self.X_w, self.y_w).get_results()
)
expected = [
@@ -78,7 +86,7 @@ class MUFS_test(unittest.TestCase):
mufs = MUFS()
expected = [3, 2, 0, 1]
computed = mufs.cfs(self.X_i, self.y_i).get_results()
self.assertListAlmostEqual(expected, computed)
self.assertListEqual(expected, computed)
expected = [
0.870521418179061,
0.8968651482682227,
@@ -148,3 +156,46 @@ class MUFS_test(unittest.TestCase):
0.44518278979085646,
]
self.assertListAlmostEqual(expected, mufs.get_scores())
def test_iwss_wine(self):
mufs = MUFS()
expected = [6, 9, 12]
self.assertListEqual(
expected, mufs.iwss(self.X_w, self.y_w, 0.2).get_results()
)
expected = [0.5218299405215557, 0.5947822876110085, 0.4877384978817362]
self.assertListAlmostEqual(expected, mufs.get_scores())
def test_iwss_wine_max_features(self):
mufs = MUFS(max_features=3)
expected = [6, 9, 12]
self.assertListEqual(
expected, mufs.iwss(self.X_w, self.y_w, 0.4).get_results()
)
expected = [0.5218299405215557, 0.5947822876110085, 0.4877384978817362]
self.assertListAlmostEqual(expected, mufs.get_scores())
def test_iwss_exception(self):
mufs = MUFS()
with self.assertRaises(ValueError):
mufs.iwss(self.X_w, self.y_w, 0.51)
with self.assertRaises(ValueError):
mufs.iwss(self.X_w, self.y_w, -0.01)
def test_iwss_better_merit_condition(self):
folder = os.path.dirname(os.path.abspath(__file__))
data = pd.read_csv(
os.path.join(folder, "balloons_R.dat"),
sep="\t",
index_col=0,
)
X = data.drop("clase", axis=1).to_numpy()
y = data["clase"].to_numpy()
mufs = MUFS()
expected = [0, 2, 3, 1]
self.assertListEqual(expected, mufs.iwss(X, y, 0.3).get_results())
def test_iwss_empty(self):
mufs = MUFS()
X = np.delete(self.X_i, [0, 1], 1)
self.assertListEqual(mufs.iwss(X, self.y_i, 0.3).get_results(), [1, 0])

View File

@@ -6,7 +6,7 @@ from mdlp import MDLP
from ..Selection import Metrics
class Metrics_test(unittest.TestCase):
class MetricsTest(unittest.TestCase):
def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs)
mdlp = MDLP(random_state=1)

View File

@@ -1,4 +1,4 @@
from .MUFS_test import MUFS_test
from .Metrics_test import Metrics_test
from .MUFS_test import MUFSTest
from .Metrics_test import MetricsTest
__all__ = ["MUFS_test", "Metrics_test"]
__all__ = ["MUFSTest", "MetricsTest"]

17
mufs/tests/balloons_R.dat Executable file
View File

@@ -0,0 +1,17 @@
f1 f2 f3 f4 clase
1 0.968246 -0.968246 0.968246 0.968246 1
2 0.968246 -0.968246 0.968246 -0.968246 1
3 0.968246 -0.968246 -0.968246 0.968246 1
4 0.968246 -0.968246 -0.968246 -0.968246 1
5 0.968246 0.968246 0.968246 0.968246 1
6 0.968246 0.968246 0.968246 -0.968246 0
7 0.968246 0.968246 -0.968246 0.968246 0
8 0.968246 0.968246 -0.968246 -0.968246 0
9 -0.968246 -0.968246 0.968246 0.968246 1
10 -0.968246 -0.968246 0.968246 -0.968246 0
11 -0.968246 -0.968246 -0.968246 0.968246 0
12 -0.968246 -0.968246 -0.968246 -0.968246 0
13 -0.968246 0.968246 0.968246 0.968246 1
14 -0.968246 0.968246 0.968246 -0.968246 0
15 -0.968246 0.968246 -0.968246 0.968246 0
16 -0.968246 0.968246 -0.968246 -0.968246 0

3
requirements/dev.txt Normal file
View File

@@ -0,0 +1,3 @@
-r production.txt
mdlp
pandas

View File

@@ -1,2 +1 @@
scikit-learn>0.24
mdlp

View File

@@ -1,4 +1,5 @@
import warnings
import time
from mufs import MUFS
from mufs.Metrics import Metrics
from stree import Stree
@@ -26,16 +27,26 @@ for i in range(n):
# Classification
warnings.filterwarnings("ignore")
print("CFS")
now = time.time()
cfs_f = mufsc.cfs(X, y).get_results()
print(cfs_f)
time_cfs = time.time() - now
print(cfs_f, "items: ", len(cfs_f), f"time: {time_cfs:.3f} seconds")
print("FCBF")
fcfb_f = mufsc.fcbf(X, y, 5e-2).get_results()
print(fcfb_f, len(fcfb_f))
now = time.time()
fcbf_f = mufsc.fcbf(X, y, 0.07).get_results()
time_fcbf = time.time() - now
print(fcbf_f, "items: ", len(fcbf_f), f"time: {time_fcbf:.3f} seconds")
now = time.time()
print("IWSS")
iwss_f = mufsc.iwss(X, y, 0.5).get_results()
time_iwss = time.time() - now
print(iwss_f, "items: ", len(iwss_f), f"time: {time_iwss:.3f} seconds")
print("X.shape=", X.shape)
clf = Stree(random_state=0)
print("Accuracy whole dataset", clf.fit(X, y).score(X, y))
clf = Stree(random_state=0)
print("Accuracy cfs", clf.fit(X[:, cfs_f], y).score(X[:, cfs_f], y))
clf = Stree(random_state=0)
subf = fcfb_f
print("Accuracy fcfb", clf.fit(X[:, subf], y).score(X[:, subf], y))
print("Accuracy fcfb", clf.fit(X[:, fcbf_f], y).score(X[:, fcbf_f], y))
clf = Stree(random_state=0)
print("Accuracy iwss", clf.fit(X[:, iwss_f], y).score(X[:, iwss_f], y))

View File

@@ -1,3 +1,4 @@
import os
import setuptools
@@ -6,9 +7,10 @@ def readme():
return f.read()
def get_data(field: str):
def get_data(field):
item = ""
with open("mufs/__init__.py") as f:
file_name = "_version.py" if field == "version" else "__init__.py"
with open(os.path.join("mufs", file_name)) as f:
for line in f.readlines():
if line.startswith(f"__{field}__"):
delim = '"' if '"' in line else "'"
@@ -19,6 +21,11 @@ def get_data(field: str):
return item
def get_requirements():
with open("requirements/production.txt") as f:
return f.read().splitlines()
setuptools.setup(
name="MUFS",
version=get_data("version"),
@@ -38,11 +45,13 @@ setuptools.setup(
"Development Status :: 4 - Beta",
"License :: OSI Approved :: " + get_data("license"),
"Programming Language :: Python :: 3.8",
"Programming Language :: Python :: 3.9",
"Programming Language :: Python :: 3.10",
"Natural Language :: English",
"Topic :: Scientific/Engineering :: Artificial Intelligence",
"Intended Audience :: Science/Research",
],
install_requires=["scikit-learn"],
install_requires=get_requirements(),
test_suite="mufs.tests",
zip_safe=False,
)

4
sonar-project.properties Normal file
View File

@@ -0,0 +1,4 @@
sonar.projectKey=mufs
sonar.sourceEncoding=UTF-8
sonar.sources=.
sonar.python.version=3.8, 3.9, 3.10